;
78m_TaxidComponentTotal = 0;
79m_SpeciesLevelTaxonCheck =
false;
80m_GenBankCompLineCount=0;
94cerr <<
"FATAL: cannot connect to GenBank!\n";
102 if(!wgsInfo.IsCreated()) {
103cerr <<
"FATAL: cannot connect to VDB!\n";
122cout <<
" <LinesWithValidCompAcc>"<< m_GenBankCompLineCount <<
"</LinesWithValidCompAcc>\n";
123cout <<
" <errors>"<< e_count <<
"</errors>\n";
125cout <<
" <skipped>"<<
pAgpErr->m_msg_skipped <<
"</skipped>\n";
129 if(m_GenBankCompLineCount) {
130 out<< m_GenBankCompLineCount <<
" lines with valid component accessions";
133 out<<
"No valid component accessions found";
136 if(e_count==1)
out<<
".\n1 error";
137 else out<<
".\n"<< e_count <<
" errors";
138 if(
pAgpErr->m_msg_skipped)
out<<
", "<<
pAgpErr->m_msg_skipped <<
" not printed";
143 out<<
"; no invalid component accessions.\n";
152map_it = m_TaxidSpeciesMap.find(taxid);
155 if(map_it == m_TaxidSpeciesMap.end()) {
156species_id = x_GetTaxonSpecies(taxid);
157m_TaxidSpeciesMap.insert(
160species_id = map_it->second;
171 boolis_species =
true;
185 id, is_species, is_uncultured, blast_name
187 if(org_ref ==
null) {
192 if(
id==taxid) blast_name0=blast_name;
197species_id = prev_id;
202 if(blast_name0.size()) {
209 string(
" - ") + blast_name0 +
210 " is above species level");
230(res.first)->second.push_back(line_info);
231m_TaxidComponentTotal++;
236 if(m_TaxidMap.size() == 0)
return true;
239 floatagp_taxid_percent = 0;
241 floatmax_percent = 0;
242 booltaxid_found=
false;
246agp_taxid_percent = float(it->second.size())/float(m_TaxidComponentTotal);
247 if(agp_taxid_percent > max_percent) {
248max_percent = agp_taxid_percent;
249agp_taxid = it->first;
250 if(agp_taxid_percent>=.8) {
257 if(use_xml)
out<<
" <taxid>"<< agp_taxid <<
"</taxid>\n";
260 out<<
" <cannot_determine_taxid/>\n";
263cerr <<
"\nUnable to determine a Taxid for the AGP";
265cerr <<
":\nless than 80% of components have one common taxid="<<agp_taxid<<
"";
273 if(!use_xml)
out<<
"The AGP taxid is: "<< agp_taxid << endl;
274 if(m_TaxidMap.size() == 1)
return true;
276 if(!use_xml) cerr <<
"Components with incorrect taxids:\n";
280 if(map_it->first == agp_taxid)
continue;
282 TTaxIdtaxid = map_it->first;
285 out<<
" <CompBadTaxid taxid=\""<< taxid
286<<
"\" line_num=\""<< list_it->line_num
287<< (list_it->file_num ?
string(
290<<
"\">"<<
NStr::XmlEncode(list_it->component_id)<<
"</CompBadTaxid>\n";
294 if(list_it->file_num) {
295cerr <<
pAgpErr->GetFile(list_it->file_num) <<
":";
297cerr<< list_it->line_num <<
": "<< list_it->component_id
298<<
" - Taxid "<< taxid <<
"\n";
302 if(!use_xml) cerr <<
"\n";
308 const string& orig_line,
const string& comp_id,
316m_LineQueue.push_back(ld);
317m_Accessions.insert(comp_id);
325m_LineQueue.push_back(ld);
331m_InvalidAccessions.clear();
332vector<CSeq_id_Handle> idHandles;
334 for(
const auto& accession : m_Accessions) {
336seqId.
Set(accession);
340m_InvalidAccessions.insert(accession);
349 for(
autoit=begin(bioseqHandles);
350it!=end(bioseqHandles);
355 const auto& bioseqHandle = *it;
357 const auto& accVer = idHandle.GetSeqId()->IsGenbank() ?
358idHandle.GetSeqId()->GetGenbank() :
359idHandle.GetSeqId()->GetOther();
362 auto& compInfo = m_ComponentInfoMap[accVer.GetAccession()];
363compInfo.currentVersion = accVer.GetVersion();
364compInfo.len = bioseqHandle.GetInst_Length();
366compInfo.inDatabase=
true;
368m_Accessions.clear();
372 const string& accession,
376 boolversionSpecifiedInFile,
380 if(versionSpecifiedInFile) {
381ostr << line <<
'\n';
385 if(currentVersion==1) {
388 for(
size_t i=0;
i<line.
size(); ++
i) {
389 if(line[
i] ==
'\t') {
397ostr << line.
substr(0, pos);
398 if(pos !=
NPOS) ostr <<
".1"<< line.
substr(pos);
403 if(currentVersion) {
404ostr << line <<
"#current version "<< accession <<
"."<< currentVersion <<
'\n';
409ostr << line <<
"#component_id not in GenBank"<<
'\n';
418 for(
const auto& lineInfo : m_LineQueue)
421 const string& orig_line = lineInfo.orig_line;
424*m_pOut << orig_line <<
'\n';
426 pAgpErr->LineDone(orig_line, lineInfo.line_num);
432 const boolversionSpecified =
438acc.
substr(0, pos_ver) :
442 autoitAccData = m_ComponentInfoMap.find(acc_nover);
444 if(itAccData!=m_ComponentInfoMap.end()) {
445acc_data = itAccData->second;
448 " - cannot get version for "+acc);
453 " - cannot retrieve taxonomic id for "+acc);
456m_GenBankCompLineCount++;
462 if(m_check_len_taxid) {
469 if(m_SpeciesLevelTaxonCheck) {
470taxid = x_GetSpecies(taxid);
472x_AddToTaxidMap(taxid, acc, lineInfo.line_num);
477 autoit = m_InvalidAccessions.find(acc);
478 if(it != end(m_InvalidAccessions)) {
494 pAgpErr->LineDone(orig_line, lineInfo.line_num);
502 if(pos1==
NPOS)
returnlong_acc;
504 SIZE_TYPEpos2=long_acc.find(
'|', pos1+1);
505 if(pos2==
NPOS)
returnlong_acc.substr(pos1+1);
507 if( long_acc.substr(0,pos1) ==
"gi") {
519 strings = ids.front()->GetSeqIdString(
true);
521pos1 = s.rfind(
':');
522 if( pos1 !=
NPOS)
returns.substr(pos1+1);
536 if( temp_seq_id->
IsLocal() ) {
541 const boolbAccnIsProtOnly = (
544 if( bAccnIsProtOnly ) {
555cerr <<
"WARNING: '"<< acc <<
"' was found in component files but Genbank version overrides it."<< endl;
static CRef< CScope > m_Scope
void OverrideLenIfAccession(const string &acc, int &in_out_len)
string ExtractAccession(const string &long_acc)
static void s_WriteLine(const string &accession, const CTempString &line, bool inDatabase, int currentVersion, bool versionSpecifiedInFile, CNcbiOstream &ostr)
static CRef< CObjectManager > m_ObjectManager
User-defined methods of the data storage class.
CRef< CAgpErrEx > pAgpErr
@ CODE_First
The number of the first CAgpErr error enum.
@ CODE_Last
This is one past the last code allowed, after built-in and user errors.
static bool CheckComponentEnd(const string &comp_id, TAgpPos comp_end, TAgpLen comp_len, CAgpErr &agp_err)
TTaxId x_GetTaxonSpecies(TTaxId taxid)
bool CheckTaxids(CNcbiOstream &out, bool use_xml)
void QueueLine(const string &orig_line, const string &comp_id, int line_num, int comp_end)
TTaxId x_GetSpecies(TTaxId taxid)
void PrintTotals(CNcbiOstream &out, bool use_xml)
void x_AddToTaxidMap(TTaxId taxid, const string &comp_id, int line_num)
vector< SAgpLineInfo > TAgpInfoList
pair< TTaxidMap::iterator, bool > TTaxidMapRes
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
CConstRef< COrg_ref > GetOrgRef(TTaxId tax_id, bool &is_species, bool &is_uncultured, string &blast_name, bool *is_specified=NULL)
TTaxId GetParent(TTaxId id_tax)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const SLoaderParams ¶ms, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
container_type::iterator iterator
container_type::value_type value_type
std::ofstream out("events_result.xml")
main entry point for tests
static unsigned int line_num
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
EAccessionInfo
For IdentifyAccession (below)
static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)
Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.
CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)
Reassign based on flat specifications; arguments interpreted as with constructors.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TTaxId GetTaxId(const CBioseq_Handle &handle)
return the tax-id associated with a given sequence.
@ eGetId_ForceAcc
return only an accession based seq-id
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
TBioseqHandles GetBioseqHandles(const TIds &ids)
Get bioseq handles for all ids.
bool IsCreated(void) const
Return true if the loader was just created, false if already registered or if the operation failed.
TInst_Length GetInst_Length(void) const
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static string XmlEncode(const CTempString str, TXmlEncode flags=eXmlEnc_Contents)
Encode a string for XML.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
size_type size(void) const
Return the length of the represented array.
bool IsLocal(void) const
Check if variant Local is selected.
list< CRef< CSeq_id > > TId
Magic spell ;-) needed for some weird compilers... very empiric.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4