sequence;
62: m_Feat(feat),
m_Scope(scope), m_Imp(imp), m_ProductIsFar(
false)
72 "The feature is missing a location");
77 boollowerSev =
false;
81 const CDbtag& dbtag = **it;
82 if( dbtag.
GetDb() ==
"dbSNP") {
90 "Location",
m_Feat, lowerSev);
131 "Inference or experiment qualifier missing but obsolete experimental evidence qualifier set");
157 if(loc.IsInt() || loc.IsWhole()) {
162 for(CSeq_loc_CI citer(loc); citer; ++citer) {
163 const CSeq_id& this_id = citer.GetSeq_id();
164 if(!
prev|| !
prev->Equals(this_id)) {
169 prev.Reset(&this_id);
183 switch(sid.
Which()) {
196 "Feature product should not put an accession in the Textseq-id 'name' slot");
199 "Feature product should not use " 200 "Textseq-id 'name' slot");
215 if(id->Which() == sid.
Which()) {
217 stringfrom_seq =
id->AsFastaString();
222 "Capitalization change from product location on feature to product sequence");
225 switch(id->Which()) {
238 "Protein bioseq has Textseq-id 'name' that " 239 "looks like it is derived from a nucleotide " 243 "Protein bioseq has Textseq-id 'name' and no accession");
260 boolis_seqloc_bond =
false;
264 for(CSeq_loc_CI it(feat.
GetLocation()); it; ++it) {
265 if(it.GetEmbeddingSeq_loc().IsBond()
266&& (!it.GetEmbeddingSeq_loc().GetBond().IsSetA()
267|| it.GetEmbeddingSeq_loc().GetBond().IsSetB())) {
268is_seqloc_bond =
true;
273 for(CSeq_loc_CI it(feat.
GetLocation()); it; ++it) {
274 if(it.GetEmbeddingSeq_loc().IsBond()) {
275is_seqloc_bond =
true;
281 for(CSeq_loc_CI it(feat.
GetLocation()); it; ++it) {
282 if(it.GetEmbeddingSeq_loc().IsBond()) {
283is_seqloc_bond =
true;
288 returnis_seqloc_bond;
299 if(both || both_rev) {
301 if(both && both_rev) {
302suffix =
"(forward and reverse)";
304suffix =
"(forward)";
305}
else if(both_rev) {
306suffix =
"(reverse)";
312 label+
" may not be on both "+ suffix +
" strands");
321 for(CSeq_loc_CI it(loc); it; ++it) {
322 if(it.IsSetStrand()) {
330 if(both && both_rev) {
339has_parent_gene_id =
false;
345has_parent_gene_id =
true;
346 if((*it)->IsSetTag() && (*it)->GetTag().Equals(
tag)) {
378 boolhas_parent_gene_id =
false;
379 if(!
HasGeneIdXref(parent, (*it)->GetTag(), has_parent_gene_id)) {
380 if(has_parent_gene_id ||
386parent = feat_tree->GetParent(parent);
403 if((*pi)->IsEquiv()) {
405 "Citation on feature has unexpected internal Pub-equiv");
415 "empty inference string",
416 "bad inference prefix",
417 "bad inference body",
418 "single inference field",
419 "spaces in inference",
420 "possible comment in inference",
421 "same species misused",
422 "the value in the accession field is not legal. The only allowed value is accession.version, eg AF123456.1. Problem =",
423 "bad inference accession version",
424 "accession.version not public",
425 "bad accession type",
426 "unrecognized database",
441 "Qualifier other than replace has just quotation marks");
449 "Inference qualifier problem - empty inference string ()");
457qual.
GetVal() +
" is not in proper EC_number format");
459 stringec_number = qual.
GetVal();
465 "EC_number "+ ec_number +
" was deleted");
470 "EC_number "+ ec_number +
" was replaced");
475 if(pos == string::npos || !
isdigit(ec_number.c_str()[pos + 1])) {
477ec_number +
" is not a legal value for qualifier EC_number");
480ec_number +
" is not a legal preliminary value for qualifier EC_number");
509 "/pseudogene value should not be '"+ qual.
GetVal() +
"'",
m_Feat);
512 boolhas_space =
false;
513 boolhas_char_after_space =
false;
515 if(
isspace((
unsigned char)(*it))) {
517}
else if(has_space) {
519has_char_after_space =
true;
523 if(has_char_after_space) {
525 "Number qualifiers should not contain spaces");
530 "feature qualifier "+ qual.
GetVal() +
" has SGML");
543 "Unable to find EC number file 'ecnum_ambiguous.txt' in data directory");
547 "Unable to find EC number file 'ecnum_deleted.txt' in data directory");
551 "Unable to find EC number file 'ecnum_replaced.txt' in data directory");
555 "Unable to find EC number file 'ecnum_specific.txt' in data directory");
564 for(
autoit : errors) {
566it.first, it.second);
593 "Feature comment may refer to reference by serial number - " 594 "attach reference specific comments to the reference " 595 "REMARK instead.",
m_Feat);
599 "feature comment "+ comment +
" has SGML",
627 "On partial Bioseq, SeqFeat.partial should be TRUE");
630 else if(is_partial &&
639 "When SeqFeat.product is a partial Bioseq, SeqFeat.location " 640 "should also be partial");
648 "Gene of 'order' with otherwise complete location should " 649 "have partial flag set");
655 boolis_far_fail =
false;
663 string str(
"Inconsistent: Product= complete, Location= ");
665 str+=
"Feature.partial= ";
666 str+= is_partial ?
"TRUE":
"FALSE";
669}
else if(is_far_fail) {
677 string str(
"Inconsistent: ");
682 str+=
"Location= ";
684 str+=
"Feature.partial= ";
685 str+= is_partial ?
"TRUE":
"FALSE";
698 "5' or 3' partial location should not have unclassified" 699 " partial in product molinfo descriptor");
714 "Bond location should only be on bond features");
719 stringprefix =
"Feature";
736 stringloc_id = os.str();
738 if((*it)->IsGi() || (*it)->IsGibbsq() || (*it)->IsGibbmt()) {
742(*it)->WriteAsFasta(os2);
743 stringbs_id = os2.str();
746 "Sequence identifier in feature location differs in capitalization with identifier on Bioseq");
754 "Feature on protein indicates negative strand");
761vector<TSeqPos> gap_starts;
767 "Feature contains more than 50% Ns");
769 for(
autogap_start : gap_starts) {
776 "Feature inside sequence gap");
781 "Internal interval begins or ends in gap");
785 "Feature crosses gap of unknown length");
790 string(
"Exception while checking for intervals in gaps. EXCEPTION: ") +
792}
catch(
conststd::exception&) {
837 while(map_iter && pos <= stop) {
840 for(; pos < map_end && pos <= stop; pos++) {
899 if( (*it)->IsLoc() ) {
920 intnum_unknown_gap = 0;
921 boolfirst_in_gap =
false, last_in_gap =
false;
922 boollocal_first_gap =
false, local_last_gap =
false;
923 boolstartsOrEndsInGap =
false;
926 for(CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {
929 if(!vec.
empty()) {
933 if(id_it->Equals(loc_it.GetSeq_id())) {
948local_first_gap =
false;
949local_last_gap =
false;
954string::iterator it = vec_data.begin();
955 while(it != vec_data.end() && pos <
len) {
956 boolis_gap =
false;
957 boolunknown_length =
false;
963unknown_length =
true;
970unknown_length =
true;
976local_first_gap =
true;
977}
else if(pos ==
len- 1) {
978local_last_gap =
true;
980 if(unknown_length) {
985}
else if(*it ==
'N') {
1002first_in_gap = local_first_gap;
1005last_in_gap = local_last_gap;
1006 if(local_first_gap || local_last_gap) {
1007startsOrEndsInGap =
true;
1011 if(num_real == 0 && num_n == 0) {
1021 if(num_gap == 0 && num_unknown_gap == 0 && num_n == 0) {
1023}
else if(first_in_gap || last_in_gap) {
1028gap_starts.push_back(gap_start);
1032}
else if(num_real == 0 && num_gap == 0 && num_unknown_gap == 0 && num_n >= 50) {
1034}
else if(startsOrEndsInGap) {
1036}
else if(num_unknown_gap > 0) {
1060 if((*it)->IsLiteral()) {
1061 len= (*it)->GetLiteral().GetLength();
1062}
else if((*it)->IsLoc()) {
1083 for(CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {
1086 if(!vec.
empty()) {
1088 bool match=
false;
1090 if(id_it->Equals(loc_it.GetSeq_id())) {
1106string::iterator it = vec_data.begin();
1107 while(it != vec_data.end()) {
1115 if((
unsigned)(*it + 1) <= 256 &&
isalpha(*it)) {
1123}
catch(
conststd::exception& ) {
1128 return(num_n > real_bases);
1139 const CSeq_id* protid =
nullptr;
1164 if(!prot_handle && look_far) {
1177 boollook_far =
false;
1198 "Exception text is present, but exception flag is not set");
1202 "Exception flag is set, but exception text is empty");
1212 if(
text.empty())
return;
1215 boolfound =
false;
1219 boolreasons_in_cit =
false;
1220 boolannotated_by_transcript_or_proteomic =
false;
1221 boolredundant_with_comment =
false;
1222 boolrefseq_except =
false;
1223vector<string> exceptions;
1225 ITERATE(vector<string>, it, exceptions) {
1235reasons_in_cit =
true;
1237annotated_by_transcript_or_proteomic =
true;
1242 boolcheck_refseq =
false;
1244check_refseq =
true;
1246check_refseq =
true;
1249 if((*id_it)->IsOther()) {
1250check_refseq =
true;
1259refseq_except =
true;
1272 str+
" is not a legal exception explanation");
1279redundant_with_comment =
true;
1281redundant_with_comment =
true;
1285 if(redundant_with_comment) {
1287 "Exception explanation text is also found in feature comment");
1289 if(refseq_except) {
1292 if(!found_just_the_exception) {
1294 "Genome processing exception should not be combined with other explanations");
1300 "Reasons given in citation exception does not have the required citation");
1302 if(annotated_by_transcript_or_proteomic) {
1303 boolhas_inference =
false;
1306has_inference =
true;
1310 if(!has_inference) {
1312 "Annotated by transcript or proteomic data exception does not have the required inference qualifier");
1336 boolis_imp =
false;
1360 const string& qual_str = gbq->GetQual();
1366 autogbqual = gbqual_and_value.first;
1371qual_str +
" is improperly capitalized");
1382 "Unknown qualifier "+ qual_str);
1397 if(
NStr::Equal(qual_str,
"orig_transcript_id")) {
1401 if(
NStr::Equal(qual_str,
"orig_transcript_id")) {
1411 "Wrong qualifier "+ qual_str +
" for feature "+
1417 "feat_class qualifier is only legal for RefSeq");
1422 const string&
val= gbq->GetVal();
1428 "Compound '"+
val+
"' must be split into separate instances of qualifier "+ qual_str);
1432 val+
" is not a legal value for qualifier "+ qual_str);
1462 val+
" is not a legal value for qualifier "+ qual_str);
1474 "Vector Contamination region should be trimmed from sequence");
1483 "A product qualifier is not used on a gene feature");
1491 "locus-tag values should be on genes");
1504 boolmultiple_rpt_unit =
false;
1508}
else if( *it ==
'('|| *it ==
')'||
1509*it ==
','|| *it ==
'.'||
1510 isdigit((
unsigned char)(*it)) ) {
1511multiple_rpt_unit =
true;
1521!multiple_rpt_unit ) {
1523 booljust_nuc_letters =
true;
1525 'A',
'C',
'G',
'T',
'N',
'a',
'c',
'g',
't',
'n' 1528 if( nuc_letters.find(*it) == nuc_letters.end() ) {
1529just_nuc_letters =
false;
1533 if( just_nuc_letters ) {
1535 if( !vec.
empty() ) {
1540 "repeat_region /rpt_unit and underlying " 1541 "sequence do not match");
1547 "Length of rpt_unit_seq is greater than feature length");
1559 const char*cp =
val.c_str();
1560 boolbadchars =
false;
1561 while(*cp != 0 && !badchars) {
1564}
else if(*cp !=
'('&& *cp !=
')' 1566&& *cp !=
','&& *cp !=
';') {
1573 "/rpt_unit_seq has illegal characters");
1581 if(
str.length() > 25) {
1585 if(pos == string::npos) {
1589 inttmp_from, tmp_to;
1597}
catch(
conststd::exception& ) {
1600 if(tmp_from < 0 || tmp_to < 0) {
1612 "/rpt_unit_range is not a base range");
1615 if(from - 1 < range.
GetFrom() || from - 1> range.
GetTo() || to - 1 < range.
GetFrom() || to - 1 > range.
GetTo()) {
1617 "/rpt_unit_range is not within sequence length");
1619 boolnulls_between =
false;
1622nulls_between =
true;
1625 if(nulls_between) {
1626 boolin_range =
false;
1628range = it.GetEmbeddingSeq_loc().GetTotalRange();
1629 if(from - 1 < range.
GetFrom() || from - 1> range.
GetTo() || to - 1 < range.
GetFrom() || to - 1 > range.
GetTo()) {
1636 "/rpt_unit_range is not within ordered intervals");
1646 boolonly_digits =
true,
1647has_spaces =
false;
1650 if(
isspace((
unsigned char)(*it)) ) {
1653 if( !
isdigit((
unsigned char)(*it)) ) {
1654only_digits =
false;
1657 if(only_digits || has_spaces) {
1669 val+
" accession missing version for qualifier compare");
1672 val+
" accession has bad version for qualifier compare");
1675 val+
" is not a legal accession for qualifier compare");
1678 "RefSeq accession "+
val+
" cannot be used for qualifier compare");
1686 const char*src =
str.c_str();
1687 const char*find = consist.c_str();
1690 while(*src != 0 && rval) {
1691 if(strchr (find, *src) ==
NULL) {
1707 val+
" is not a legal value for qualifier "+ qual_str
1708+
" - should only be composed of acgt unambiguous nucleotide bases");
1712 val+
" is not a legal value for qualifier "+ qual_str
1713+
" - should only be composed of acgtmrwsykvhdbn nucleotide bases");
1718 val+
" is not a legal value for qualifier "+ qual_str
1719+
" - should only be composed of acdefghiklmnpqrstuvwy* amino acids");
1724 boolhas_fuzz =
false;
1725 for( objects::CSeq_loc_CI it(
m_Feat.
GetLocation()); it && !has_fuzz; ++it) {
1726 if(it.IsPoint() && (it.GetFuzzFrom() || it.GetFuzzTo())) {
1737 "/replace already matches underlying sequence ("+
val+
")");
1740}
catch(
conststd::exception& ) {
1751field_name +
" contains undesired character");
1755field_name +
" ends with undesired character");
1760field_name +
" ends with hyphen");
1781 "feature has exception but passes splice site test");
1802 "Bad sequence at splice donor after exon ending at position " 1806 "Splice donor consensus (GT) not found after exon ending at position " 1817 "Bad sequence at splice acceptor before exon starting at position " 1821 "Splice acceptor consensus (AG) not found before exon starting at position " 1832 for(
autoit = donor_problems.begin(); it != donor_problems.end(); it++) {
1836 for(
autoit = acceptor_problems.begin(); it != acceptor_problems.end(); it++) {
1846 if((*it)->IsOther() && (*it)->GetOther().IsSetAccession()
1870 boolfound =
false;
1887 if((*it)->IsOther()) {
1919 " for feature "+
key);
1930 if(strand1 == strand2) {
1958 "Gene cross-reference is not on expected strand");
1966 boolequivalent =
false;
1984g2.
GetSyn().front())) {
2006 boolhas_gene_id_xref =
false;
2009 if((*xref)->IsSetId() && (*xref)->GetId().IsLocal()) {
2012 if(gene_feats.size() > 0) {
2013has_gene_id_xref =
true;
2021 if(has_gene_id_xref) {
2033 size_tnum_genes = 0;
2035 size_tnum_trans_spliced = 0;
2036 boolequivalent =
false;
2047 string label=
"?";
2048 size_tnum_match_by_locus = 0;
2049 size_tnum_match_by_locus_tag = 0;
2051 for( ; gene_it; ++gene_it) {
2052 if(gene_xref && gene_xref->
IsSetLocus() &&
2055num_match_by_locus++;
2061num_match_by_locus_tag++;
2067 "Feature has Gene Xref with locus_tag but no locus, gene with locus_tag and locus exists");
2074 if(
len<
max|| num_genes == 0) {
2077num_trans_spliced = 0;
2080num_trans_spliced++;
2082equivalent =
false;
2083prev_gene = gene_it;
2084}
else if(
len==
max) {
2089num_trans_spliced++;
2098 if(num_genes > 1 &&
2103}
else if(equivalent) {
2105 "Feature overlapped by " 2107+
" identical-length equivalent genes but has no cross-reference");
2110 "Feature overlapped by " 2112+
" identical-length genes but has no cross-reference");
2114}
else if(num_genes == 1
2120 const CGb_qual& qual = **qual_iter;
2126 "Redundant allele qualifier ("+ allele +
2127 ") on gene and feature");
2130 "Mismatched allele qualifier on gene ("+ allele +
2131 ") and feature ("+ qual.
GetVal() +
")");
2142 const string& allele = gene_xref->
GetAllele();
2145 const CGb_qual& qual = **qual_iter;
2151 "Redundant allele qualifier ("+ allele +
2152 ") on gene and feature");
2155 "Mismatched allele qualifier on gene ("+ allele +
2156 ") and feature ("+ qual.
GetVal() +
")");
2162 if(num_match_by_locus == 0 && num_match_by_locus_tag == 0) {
2174 const CSeq_id*
id= loc.GetId();
2192 "Feature has gene locus_tag cross-reference but no equivalent gene feature exists");
2197 "Feature has gene locus cross-reference but no equivalent gene feature exists");
2216 if(it->IsSetQual() &&
NStr::Equal(it->GetQual(),
"old_locus_tag")
2239 for(
autoit : feat.
GetQual()) {
2279 stringgene_old_locus_tag;
2282 if((*it)->IsSetQual() &&
NStr::Equal((*it)->GetQual(),
"old_locus_tag")
2283&& (*it)->IsSetVal() && !
NStr::IsBlank((*it)->GetVal())) {
2284gene_old_locus_tag = (*it)->GetVal();
2291 "Old locus tag on feature ("+ old_locus_tag
2292+
") does not match that on gene ("+ gene_old_locus_tag +
")");
2303 "old_locus_tag without inherited locus_tag");
2318 if( imp_loc.find(
"one-of") != string::npos ) {
2320 "ImpFeat loc "+ imp_loc +
2321 " has obsolete 'one-of' text for feature "+
key);
2326 if( imp_loc != temp_loc ) {
2328 "ImpFeat loc "+ imp_loc +
" does not equal feature location "+
2329temp_loc +
" for feature "+
key);
2346 boolfound =
false;
2362 if((*it)->IsOther()) {
2385 " for feature "+
key);
2429 if((*it)->IsOther() && (*it)->GetTextseq_Id()->IsSetAccession()
2448 boolhas_sfp_pseudo =
false;
2449 boolhas_gene_pseudo =
false;
2452 if(it->IsSetQual() &&
2455sfp_pseudo = it->GetVal();
2456has_sfp_pseudo =
true;
2461 for(
autoit : gene->
GetQual()) {
2462 if(it->IsSetQual() &&
2465gene_pseudo = it->GetVal();
2466has_gene_pseudo =
true;
2471 if(!has_sfp_pseudo && !has_gene_pseudo) {
2473}
else if(!has_sfp_pseudo) {
2475}
else if(has_sfp_pseudo && !has_gene_pseudo) {
2477 msg+=
" has pseudogene qualifier, gene does not";
2481 string msg=
"Different pseudogene values on ";
2483 msg+=
" ("+ sfp_pseudo +
") and gene ("+ gene_pseudo +
")";
2534 "Gene locus_tag does not match general ID of product");
2545 for(
charch : src) {
2546 unsigned charchu = ch;
2547 if(chu > 31 && chu < 128) {
2563 const string&
str= *it;
2565 const char& ch = *c_it;
2566 unsigned charchu = ch;
2567 if(ch > 127 || (ch < 32 && ch !=
'\t'&& ch !=
'\r'&& ch !=
'\n')) {
2584 for(
autoit :
prot.GetName()) {
2585 if(
prot.IsSetEc() && !
prot.IsSetProcessed()
2591 "Unknown or hypothetical protein should not have EC number");
2598 "protein description "+
prot.GetDesc() +
" has SGML");
2604 "Comment has same value as protein description");
2609 "Apparent EC number in protein comment");
2616 if(
prot.IsSetName() &&
prot.GetName().size() > 0) {
2619 "Apparent EC number in protein title");
2624 if(
prot.CanGetDb () ) {
2627 if( (!
prot.IsSetName() ||
prot.GetName().empty()) &&
2628(!
prot.IsSetProcessed()
2633 "Protein feature has description but no name");
2634}
else if(
prot.IsSetActivity() && !
prot.GetActivity().empty()) {
2636 "Protein feature has function but no name");
2637}
else if(
prot.IsSetEc() && !
prot.GetEc().empty()) {
2639 "Protein feature has EC number but no name");
2642 "Protein feature has no name");
2657 if(
prot.IsSetProcessed() ) {
2658processed =
prot.GetProcessed();
2664 if(
prot.IsSetName() &&
2665!
prot.GetName().empty() &&
2666!
prot.GetName().front().empty() ) {
2669 if(
prot.CanGetDesc() && !
prot.GetDesc().empty() ) {
2672 if(
prot.CanGetEc() && !
prot.GetEc().empty() ) {
2675 if(
prot.CanGetActivity() && !
prot.GetActivity().empty() ) {
2678 if(
prot.CanGetDb() && !
prot.GetDb().empty() ) {
2684 "There is a protein feature where all fields are empty");
2693 "'hypothetical protein",
2696 "alternatively spliced",
2697 "bacteriophage hypothetical protein",
2700 "cnserved hypothetical protein",
2701 "conesrved hypothetical protein",
2702 "conserevd hypothetical protein",
2703 "conserved archaeal protein",
2704 "conserved domain protein",
2705 "conserved hypohetical protein",
2706 "conserved hypotehtical protein",
2707 "conserved hypotheical protein",
2708 "conserved hypothertical protein",
2709 "conserved hypothetcial protein",
2710 "conserved hypothetical",
2711 "conserved hypothetical exported protein",
2712 "conserved hypothetical integral membrane protein",
2713 "conserved hypothetical membrane protein",
2714 "conserved hypothetical phage protein",
2715 "conserved hypothetical prophage protein",
2716 "conserved hypothetical protein",
2717 "conserved hypothetical protein - phage associated",
2718 "conserved hypothetical protein fragment 3",
2719 "conserved hypothetical protein, fragment",
2720 "conserved hypothetical protein, putative",
2721 "conserved hypothetical protein, truncated",
2722 "conserved hypothetical protein, truncation",
2723 "conserved hypothetical protein.",
2724 "conserved hypothetical protein; possible membrane protein",
2725 "conserved hypothetical protein; putative membrane protein",
2726 "conserved hypothetical proteins",
2727 "conserved hypothetical protien",
2728 "conserved hypothetical transmembrane protein",
2729 "conserved hypotheticcal protein",
2730 "conserved hypthetical protein",
2731 "conserved in bacteria",
2732 "conserved membrane protein",
2733 "conserved protein",
2734 "conserved protein of unknown function",
2735 "conserved protein of unknown function ; putative membrane protein",
2736 "conserved unknown protein",
2737 "conservedhypothetical protein",
2738 "conserverd hypothetical protein",
2739 "conservered hypothetical protein",
2740 "consrved hypothetical protein",
2741 "converved hypothetical protein",
2745 "duplicated hypothetical protein",
2750 "homeodomain protein",
2752 "hyopthetical protein",
2754 "hypotheical protein",
2755 "hypothertical protein",
2756 "hypothetcical protein",
2758 "hypothetical protein",
2759 "hypothetical conserved protein",
2760 "hypothetical exported protein",
2761 "hypothetical novel protein",
2762 "hypothetical orf",
2763 "hypothetical phage protein",
2764 "hypothetical prophage protein",
2765 "hypothetical protein (fragment)",
2766 "hypothetical protein (multi-domain)",
2767 "hypothetical protein (phage associated)",
2768 "hypothetical protein - phage associated",
2769 "hypothetical protein fragment",
2770 "hypothetical protein fragment 1",
2771 "hypothetical protein predicted by genemark",
2772 "hypothetical protein predicted by glimmer",
2773 "hypothetical protein predicted by glimmer/critica",
2774 "hypothetical protein, conserved",
2775 "hypothetical protein, phage associated",
2776 "hypothetical protein, truncated",
2777 "hypothetical protein-putative conserved hypothetical protein",
2778 "hypothetical protein.",
2779 "hypothetical proteins",
2780 "hypothetical protien",
2781 "hypothetical transmembrane protein",
2782 "hypothetoical protein",
2783 "hypothteical protein",
2784 "identified by sequence similarity; putative; orf located~using blastx/framed",
2785 "identified by sequence similarity; putative; orf located~using blastx/glimmer/genemark",
2787 "membrane protein, putative",
2789 "narrowly conserved hypothetical protein",
2792 "orf, conserved hypothetical protein",
2793 "orf, hypothetical",
2794 "orf, hypothetical protein",
2795 "orf, hypothetical, fragment",
2796 "orf, partial conserved hypothetical protein",
2797 "orf; hypothetical protein",
2798 "orf; unknown function",
2800 "partial cds, hypothetical",
2801 "partially conserved hypothetical protein",
2802 "phage hypothetical protein",
2803 "phage-related conserved hypothetical protein",
2804 "phage-related protein",
2806 "possible hypothetical protein",
2808 "predicted coding region",
2809 "predicted protein",
2810 "predicted protein (pseudogene)",
2811 "predicted protein family",
2812 "product uncharacterised protein family",
2814 "protein of unknown function",
2817 "putative conserved protein",
2818 "putative exported protein",
2819 "putative hypothetical protein",
2820 "putative membrane protein",
2821 "putative orf; unknown function",
2822 "putative phage protein",
2823 "putative protein",
2825 "repeats containing protein",
2827 "ribosomal protein",
2830 "small hypothetical protein",
2831 "transmembrane protein",
2834 "trp-repeat protein",
2835 "truncated conserved hypothetical protein",
2836 "truncated hypothetical protein",
2837 "uncharacterized conserved membrane protein",
2838 "uncharacterized conserved protein",
2839 "uncharacterized conserved secreted protein",
2840 "uncharacterized protein",
2841 "uncharacterized protein conserved in archaea",
2842 "uncharacterized protein conserved in bacteria",
2843 "unique hypothetical",
2844 "unique hypothetical protein",
2847 "unknown function",
2850 "unknown, conserved protein",
2851 "unknown, hypothetical",
2852 "unknown-related protein",
2853 "unknown; predicted coding region",
2855 "unnamed protein product",
2856 "very hypothetical protein" 2868 if(!
prot.IsSetName()) {
2869 if(!
prot.IsSetProcessed() ||
2873 "Protein name is not set");
2880 if(search.empty()) {
2882 "Protein name is empty");
2883}
else if(sc_BadProtName.find (search.c_str()) != sc_BadProtName.end()
2891 "Uninformative protein name '"+ it +
"'");
2907(it) +
" is not in proper EC_number format");
2909 const string& ec_number = it;
2915 "EC_number "+ ec_number +
" was deleted");
2920 "EC_number "+ ec_number +
" was transferred and is no longer valid");
2925 if(pos == string::npos || !
isdigit(ec_number.c_str()[pos + 1])) {
2927ec_number +
" is not a legal value for qualifier EC_number");
2930ec_number +
" is not a legal preliminary value for qualifier EC_number");
2946 boolreport_name =
true;
2948 if(pos == string::npos) {
2950}
else if(prot_name.length() - pos < 5) {
2953report_name =
false;
2958 "Protein name ends with bracket and may contain organism name");
2964 if(id_it->IsOther()
2965&& id_it->GetOther().IsSetAccession()
2967prot_name.substr(21))) {
2969 "Hypothetical protein reference does not match accession");
2978 "Comment has same value as protein name");
2983 "Protein name has internal PMID");
2989&&
NStr::FindCase(prot_name,
"methyltransferase") == string::npos
2991 if(
NStr::EqualNocase(prot_name,
"ribulose-1,5-bisphosphate carboxylase/oxygenase")) {
2993}
else if(!
NStr::EqualNocase(prot_name,
"ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit")
2994&& !
NStr::EqualNocase(prot_name,
"ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit")) {
2996 "Nonstandard ribulose bisphosphate protein name");
3006 "protein name "+ prot_name +
" has SGML");
3023 if(!
prot)
return;
3026 if(! mi_i)
return;
3031 constCSeq_loc& prot_loc =
prot->GetLocation();
3035 boolconflict =
false;
3050 "Molinfo completeness and protein feature partials conflict");
3061 if(
rna.IsSetType()) {
3062rna_type =
rna.GetType();
3066 if(
rna.CanGetExt() &&
rna.GetExt().IsName()) {
3067 const string& rna_name =
rna.GetExt().GetName();
3071 "rRNA name "+ rna_name +
" has SGML");
3080 boolpseudo = feat_pseudo;
3102rna_typename +
" has no name");
3110 "RNA type 0 (unknown) not supported");
3130 "A pseudo RNA should not have a product");
3131}
else if(pseudo) {
3133 "An RNA overlapped by a pseudogene should not have a product");
3180 "Type of RNA does not match MolInfo of product Bioseq");
3214 "tRNA data structure on non-tRNA feature");
3221 if( anticodon_len != 3 ) {
3223 "Anticodon is not 3 bases in length");
3231 "Anticodon location not in tRNA");
3252 "Unparsed anticodon qualifier in tRNA");
3258 "Unparsed product qualifier in tRNA");
3265 if(
rna.IsSetExt() &&
3268 "Unparsed product qualifier in tRNA");
3271 "Missing encoded amino acid qualifier in tRNA");
3276 boolisLessThan100 =
false;
3278CSeq_loc_CI li(loc);
3280 TSeqPoslast_start = li.GetRange().GetFrom();
3281 TSeqPoslast_stop = li.GetRange().GetTo();
3283last_id->
Assign(li.GetSeq_id());
3287 TSeqPosthis_start = li.GetRange().GetFrom();
3288 TSeqPosthis_stop = li.GetRange().GetTo();
3289 if(
abs((
int)this_start - (
int)last_stop) < 100 ||
abs((
int)this_stop - (
int)last_start) < 100) {
3290 if(li.GetSeq_id().Equals(*last_id)) {
3292isLessThan100 =
true;
3298 for(
autoid_it : last_bsh.
GetId()) {
3299 if(id_it.GetSeqId()->Equals(li.GetSeq_id())) {
3300isLessThan100 =
true;
3307last_start = this_start;
3308last_stop = this_stop;
3309last_id->
Assign(li.GetSeq_id());
3315 if( grp ==
NULL) {
3324 if( !pseudo && grp !=
NULL) {
3328 if(isLessThan100 && ! pseudo) {
3334 if(
source.IsSetLineage()) {
3335 stringlineage =
source.GetLineage();
3338 "tRNA intron in bacteria is less than 100 bp");
3349 boolordered =
true;
3350 booladjacent =
false;
3351 boolunmarked_strand =
false;
3352 boolmixed_strand =
false;
3355 for(CSeq_loc_CI curr(anticodon); curr; ++curr) {
3357 if(curr.GetEmbeddingSeq_loc().IsInt()) {
3359}
else if(curr.GetEmbeddingSeq_loc().IsPnt()) {
3367curr.GetEmbeddingSeq_loc().GetLabel(&lbl);
3369 "Anticodon location ["+ lbl +
"] out of range");
3372 if(
prev&& curr &&
3378 if(prev_range.
GetTo() < curr_range.
GetTo()) {
3381 if(curr_range.
GetTo() + 1 == prev_range.
GetFrom()) {
3385 if(prev_range.
GetTo() > curr_range.
GetTo()) {
3388 if(prev_range.
GetTo() + 1 == curr_range.
GetFrom()) {
3395 if( curr_range == prev_range && curr_strand == prev_strand ) {
3397 "Duplicate anticodon exons in location");
3399 if( curr_strand != prev_strand ) {
3401unmarked_strand =
true;
3403unmarked_strand =
true;
3405mixed_strand =
true;
3413 "Adjacent intervals in Anticodon");
3417 ENa_strandac_strand = anticodon.GetStrand();
3420 "Anticodon strand and tRNA strand do not match.");
3423 "Anticodon strand and tRNA strand do not match.");
3427 booltrans_splice =
false;
3430trans_splice =
true;
3433 if(!trans_splice) {
3435anticodon.GetLabel(&loc_lbl);
3438 "Mixed strands in Anticodon ["+ loc_lbl +
"]");
3440 if(unmarked_strand) {
3442 "Mixed plus and unknown strands in Anticodon ["+ loc_lbl +
"]");
3446 "Intervals out of order in Anticodon ["+ loc_lbl +
"]");
3452 int s_LegalNcbieaaValues[] = { 42, 65, 66, 67, 68, 69, 70, 71, 72, 73,
345374, 75, 76, 77, 78, 79, 80, 81, 82, 83,
345484, 85, 86, 87, 88, 89, 90 };
3457 "---",
"Ala",
"Asx",
"Cys",
"Asp",
"Glu",
"Phe",
"Gly",
"His",
"Ile",
3458 "Lys",
"Leu",
"Met",
"Asn",
"Pro",
"Gln",
"Arg",
"Ser",
"Thr",
3459 "Val",
"Trp",
"OTHER",
"Tyr",
"Glx",
"Sec",
"TERM",
"Pyl",
"Xle" 3473}
catch(
conststd::exception& ) {
3482 constlist<CRef<CGenetic_code> >& codes = code_table.
Get();
3484 for( list<
CRef<CGenetic_code>>::const_iterator code_it = codes.begin(), code_it_end = codes.end(); code_it != code_it_end; ++code_it ) {
3485 if((*code_it)->GetId() == gcode) {
3486 return(*code_it)->GetName();
3507 unsigned charaa = 0, orig_aa;
3508vector<char> seqData;
3537 boolfound =
false;
3551 boolmustbemethionine =
false;
3556mustbemethionine =
true;
3560 if(mustbemethionine) {
3564 "Initiation tRNA claims to be tRNA-"+ aanm +
3565 ", but should be tRNA-Met");
3581 if( ncbieaa.length() != 64 ) {
3589 stringaaname =
buf;
3595 boolmodified_codon_recognition =
false;
3596 boolrna_editing =
false;
3600modified_codon_recognition =
true;
3603rna_editing =
true;
3607vector<string> recognized_codon_values;
3608vector<unsigned char> recognized_taa_values;
3611 if(*iter == 255)
continue;
3616 " is greater than maximum 63");
3618}
else if(*iter < 0) {
3621 " is less than 0");
3625 if( !modified_codon_recognition && !rna_editing ) {
3626 unsigned char taa= ncbieaa[*iter];
3628recognized_codon_values.push_back (codon);
3629recognized_taa_values.push_back (
taa);
3632 if( (aa ==
'U') && (
taa==
'*') && (*iter == 14) ) {
3640 "Codon recognized by tRNA ("+ codon +
") does not match amino acid (" 3641+ aaname +
") specified by genetic code (" 3649 stringanticodon =
"?";
3650vector<string> codon_values;
3651vector<unsigned char> taa_values;
3659 if(codon.length() > 3) {
3660codon = codon.substr (0, 3);
3666 charch = anticodon.c_str()[0];
3684string::iterator str_it = wobble.begin();
3685 while(str_it != wobble.end()) {
3688 if(index < 64 && index > -1) {
3689 unsigned char taa= ncbieaa[index];
3690taa_values.push_back(
taa);
3691codon_values.push_back(codon);
3697 if(anticodon.length() > 3) {
3698anticodon = anticodon.substr(0, 3);
3701}
catch(
conststd::exception& ) {
3704 if(codon_values.size() > 0) {
3707 for(
size_t i= 0;
i< codon_values.size();
i++) {
3713 if(aa ==
'U'&&
NStr::Equal(anticodon,
"UCA")) {
3715}
else if(aa ==
'O'&&
NStr::Equal(anticodon,
"CUA")) {
3717}
else if(aa ==
'I'&&
NStr::Equal(anticodon,
"CAU")) {
3723 "Codons predicted from anticodon ("+ anticodon
3724+
") cannot produce amino acid ("+ aaname +
")");
3729 if(recognized_codon_values.size() > 0) {
3731 for(
size_t i= 0;
i< codon_values.size() && !
ok;
i++) {
3732 for(
size_tj = 0; j < recognized_codon_values.size() && !
ok; j++) {
3733 if(
NStr::Equal(codon_values[
i], recognized_codon_values[j])) {
3735}
else if(
NStr::Equal(codon_values[
i],
"ATG") && aa ==
'I') {
3745 "Codon recognized cannot be produced from anticodon (" 3746+ anticodon +
")");
3753 if(orig_aa == 0 || orig_aa == 255) {
3763 if(idx == 0 || idx >= 28) {
3783 boolfound_bad =
false;
3784 for(
autoit : scores) {
3798 "tRNA-rRNA overlap");
3805 "tRNA overlaps CDS");
3812 size_tmismatches = 0;
3826 "Unable to transcribe mRNA");
3832 "Unable to fetch mRNA transcript '"+
label+
"'");
3838 if((*it)->IsOther()) {
3866 "] less than "+ farstr +
"product length ["+
3873+
"] less than "+ farstr +
"product length [" 3879 "] less than "+ farstr +
"product length ["+
3885 "greater than "+ farstr +
"product length ["+
3892 " bases between the transcript and "+ farstr +
"product sequence");
3896 "mRNA has exception but passes transcription test");
3901 "mRNA has unclassified exception but only difference is "+
NStr::SizetToString(mismatches)
3906 "mRNA has transcribed product replaced exception");
3957 "protein_id should not be a gbqual on an mRNA feature");
3961 "transcript_id should not be a gbqual on an mRNA feature");
3967 if(
rna.IsSetExt() &&
rna.GetExt().IsName()) {
3968 const string& rna_name =
rna.GetExt().GetName();
3973 "mRNA feature product indicates it should be a tRNA feature");
3978 "mRNA name "+ rna_name +
" has SGML");
3996 "Product Bioseq of mRNA feature is not " 3997 "packaged in the record");
4006 "Identical transcript IDs found on multiple mRNAs");
4037 const CGene_ref* genomicgrp =
nullptr;
4049 boolfound_match =
false;
4050 boolfound_mismatch =
false;
4051 for(
int i= 1;
i<= 4;
i++) {
4054 if(
gen!=
""&&
rna!=
"") {
4056found_match =
true;
4058found_mismatch =
true;
4063 if(found_mismatch) {
4065 "Found match and mismatch between gene on mRNA bioseq and gene on genomic bioseq",
4068}
else if(found_mismatch) {
4070 "Gene on mRNA bioseq does not match gene on genomic bioseq",
4090 "Focus must be on BioSource descriptor, not BioSource feature.");
4111 "BioSource descriptor must have focus or transgenic " 4112 "when BioSource feature with different taxname is " 4130 "PolyA_site should be a single point");
4162 "sig/mat/transit_peptide feature cannot be associated with a " 4163 "protein product of a coding region feature");
4166 "Peptide processing feature should be converted to the " 4167 "appropriate protein feature subtype");
4191 "Start and stop of "+
key+
" are out of frame with CDS codons");
4196 "Start and stop of "+
key+
" are out of frame with CDS codons");
4200 "Start of "+
key+
" is out of frame with CDS codons");
4204 "Stop of "+
key+
" is out of frame with CDS codons");
4216 boolpseudo = feat_pseudo;
4233 boolpseudo = feat_pseudo;
4243 "Introns should be at least 10 nt long");
4255 if(partial5 && partial3) {
4266 if(scores.size() > 0) {
4277 if(scores.size() > 0) {
4302 booldonor_in_gap =
false;
4303 boolacceptor_in_gap =
false;
4307donor_in_gap =
true;
4312acceptor_in_gap =
true;
4315 if(!partial5 && !partial3) {
4316 if(donor_in_gap && acceptor_in_gap) {
4323 booldonor_good =
false;
4324 boolacceptor_good =
false;
4327 if(!partial5 && !donor_in_gap) {
4330donor[0] = vec[end5 - 1];
4331donor[1] = vec[end5];
4337donor[0] = vec[end5];
4338donor[1] = vec[end5 + 1];
4345 if(!partial3 && !acceptor_in_gap) {
4348acceptor[0] = vec[end3];
4349acceptor[1] = vec[end3 + 1];
4350acceptor_good =
true;
4355acceptor[0] = vec[end3 - 1];
4356acceptor[1] = vec[end3];
4357acceptor_good =
true;
4363 if(!partial5 && !partial3) {
4364 if(donor_good && acceptor_good) {
4373 if(!donor_in_gap) {
4387 "Splice donor consensus (GT) not found at start of terminal intron, position " 4392 "Splice donor consensus (GT) not found at start of intron, position " 4401 if(!acceptor_in_gap) {
4404 if(acceptor_good) {
4414 "Splice acceptor consensus (AG) not found at end of terminal intron, position " 4419 "Splice acceptor consensus (AG) not found at end of intron, position " 4439 boolis_short =
false;
4452}
else if(partial_right &&
4473 "A note or other qualifier is required for a misc_feature");
4479 stringcontent_label;
4481 if(
NStr::Equal(content_label,
"cold-shock protein")) {
4483 "cspA misc_feature overlapped by cold-shock protein CDS");
4496 boolis_far_delta =
false;
4500is_far_delta =
true;
4504 if( !(*sg) )
continue;
4506is_far_delta =
false;
4510 if(! is_far_delta) {
4512 "An assembly_gap feature should only be on a contig record");
4536 "Assembly_gap flanked by Ns on 5' and 3' sides");
4539 "Assembly_gap flanked by Ns on 5' side");
4542 "Assembly_gap flanked by Ns on 3' side");
4545 for(
size_t i= 0;
i< sequence.size();
i++) {
4546 if(sequence[
i] !=
'N') {
4563 if((*it)->IsSetQual() &&
NStr::EqualNocase((*it)->GetQual(),
"estimated_length")
4567 if(estimated_length != loc_len) {
4574}
catch(
conststd::exception& ) {
4581 if( !vec.
empty() ) {
4586 unsigned intnum_gap = 0;
4588string::iterator it = vec_data.begin();
4589 while(it != vec_data.end()) {
4597}
else if(*it !=
'-') {
4603 if(num_real > 0 && num_n > 0) {
4608}
else if(num_real > 0) {
4612}
else if(num_n > 0) {
4620+
" gap characters");
4625}
catch(
conststd::exception& ) {
4638 "NULL feature key");
4682 "Feature key Import is no longer legal");
4687 switch( subtype ) {
4693 "Unknown feature key "+
key);
4700 "Feature key "+
key+
" is no longer legal");
4709 "Pre/pro protein feature cannot be associated with a " 4710 "protein product of a coding region feature");
4713 "Peptide processing feature should be converted to the " 4714 "appropriate protein feature subtype");
4728 "RNA feature should be converted to the appropriate RNA feature " 4729 "subtype, location should be converted manually");
4738 "ImpFeat CDS should be pseudo");
4744 "ImpFeat CDS with /translation found");
4751 "Unknown feature key "+
key);
4758 "repeat_region has no qualifiers");
4766 const string&
val= (*gbqual)->GetVal();
4774 "repeat_region has no qualifiers except rpt_type other");
4784 const string&
val= (*gbqual)->GetVal();
4785 boolmissing =
true;
4795 "The regulatory_class 'other' is missing the required /note");
4805 if(
NStr::CompareNocase( (*gbqual)->GetQual(),
"recombination_class") != 0 )
continue;
4806 const string&
val= (*gbqual)->GetVal();
4807 if( recomb_values.
find(
val.c_str()) == recomb_values.
end() ) {
4811 "The recombination_class 'other' is missing the required /note");
static CRef< CScope > m_Scope
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
@ eErr_SEQ_FEAT_WrongQualOnImpFeat
@ eErr_SEQ_FEAT_NotSpliceConsensusAcceptor
@ eErr_SEQ_FEAT_rRNADoesNotHaveProduct
@ eErr_SEQ_FEAT_WholeLocation
@ eErr_SEQ_FEAT_MobileElementInvalidQualifier
@ eErr_SEQ_FEAT_DuplicateAnticodonInterval
@ eErr_SEQ_FEAT_ShortTRNAIntron
@ eErr_SEQ_FEAT_MinusStrandProtein
@ eErr_SEQ_FEAT_NotSpliceConsensusDonor
@ eErr_SEQ_FEAT_GeneXrefWithoutLocus
@ eErr_SEQ_FEAT_GenesInconsistent
@ eErr_SEQ_FEAT_PseudoRnaHasProduct
@ eErr_SEQ_FEAT_EcNumberDataMissing
@ eErr_SEQ_FEAT_InvalidProductOnGene
@ eErr_SEQ_FEAT_BadRRNAcomponentOverlapTRNA
@ eErr_SEQ_FEAT_mRNAUnnecessaryException
@ eErr_SEQ_FEAT_UnknownImpFeatQual
@ eErr_SEQ_FEAT_InvalidCompareBadAccession
@ eErr_SEQ_FEAT_InvalidCompareMissingVersion
@ eErr_SEQ_FEAT_InvalidRptUnitRange
@ eErr_GENERIC_SgmlPresentInText
@ eErr_SEQ_FEAT_BadAnticodonAA
@ eErr_SEQ_FEAT_RnaProductMismatch
@ eErr_SEQ_FEAT_FeatureBeginsOrEndsInGap
@ eErr_SEQ_FEAT_EcNumberInProteinName
@ eErr_SEQ_FEAT_InvalidTRNAdata
@ eErr_SEQ_FEAT_UnnecessaryException
@ eErr_SEQ_FEAT_AssemblyGapFeatureProblem
@ eErr_SEQ_FEAT_OldLocusTagWithoutLocusTag
@ eErr_SEQ_FEAT_NotSpliceConsensusAcceptorTerminalIntron
@ eErr_SEQ_FEAT_AnticodonMixedStrand
@ eErr_SEQ_FEAT_UnparsedtRNAProduct
@ eErr_SEQ_FEAT_InconsistentPseudogeneValue
@ eErr_SEQ_FEAT_GeneXrefWithoutGene
@ eErr_SEQ_FEAT_ReplacedEcNumber
@ eErr_SEQ_FEAT_PartialsInconsistent
@ eErr_SEQ_FEAT_InvalidQualifierValue
@ eErr_SEQ_FEAT_DuplicateGeneOntologyTerm
@ eErr_SEQ_FEAT_ProtRefHasNoData
@ eErr_SEQ_FEAT_NotSpliceConsensusDonorTerminalIntron
@ eErr_SEQ_FEAT_BadTrnaAA
@ eErr_SEQ_FEAT_WrongQualOnFeature
@ eErr_SEQ_FEAT_ProductFetchFailure
@ eErr_SEQ_FEAT_MismatchedAllele
@ eErr_SEQ_FEAT_RepeatSeqDoNotMatch
@ eErr_SEQ_FEAT_MissingQualOnImpFeat
@ eErr_SEQ_FEAT_InvalidRptUnitSeqCharacters
@ eErr_SEQ_FEAT_TranscriptLen
@ eErr_SEQ_FEAT_RubiscoProblem
@ eErr_SEQ_FEAT_InvalidAlleleDuplicates
@ eErr_SEQ_FEAT_ImpCDSnotPseudo
@ eErr_SEQ_FEAT_BadCDScomponentOverlapTRNA
@ eErr_SEQ_FEAT_BadEcNumberValue
@ eErr_SEQ_FEAT_EcNumberEmpty
@ eErr_SEQ_FEAT_ImpCDShasTranslation
@ eErr_SEQ_FEAT_PeptideFeatOutOfFrame
@ eErr_SEQ_FEAT_ProteinNameHasPMID
@ eErr_SEQ_FEAT_ImpFeatBadLoc
@ eErr_SEQ_FEAT_MissingQualOnFeature
@ eErr_SEQ_FEAT_PolyAsiteNotPoint
@ eErr_SEQ_FEAT_RepeatRegionNeedsNote
@ eErr_SEQ_FEAT_GeneXrefStrandProblem
@ eErr_SEQ_FEAT_PolyATail
@ eErr_SEQ_FEAT_MissingTrnaAA
@ eErr_GENERIC_NonAsciiAsn
@ eErr_SEQ_FEAT_UnparsedtRNAAnticodon
@ eErr_SEQ_FEAT_RefSeqInText
@ eErr_SEQ_FEAT_ErroneousException
@ eErr_SEQ_FEAT_ImproperBondLocation
@ eErr_SEQ_FEAT_InvalidPseudoQualifier
@ eErr_SEQ_FEAT_FeatureSeqIDCaseDifference
@ eErr_SEQ_FEAT_BadProductSeqId
@ eErr_SEQ_FEAT_PeptideFeatureLacksCDS
@ eErr_SEQ_FEAT_InvalidCompareRefSeqAccession
@ eErr_SEQ_FEAT_InvalidReplace
@ eErr_SEQ_FEAT_UnknownImpFeatKey
@ eErr_SEQ_FEAT_IdenticalMRNAtranscriptIDs
@ eErr_SEQ_FEAT_AssemblyGapCoversSequence
@ eErr_SEQ_FEAT_ShortIntron
@ eErr_SEQ_FEAT_SplitEcNumber
@ eErr_SEQ_FEAT_AssemblyGapAdjacentToNs
@ eErr_SEQ_FEAT_InvalidPunctuation
@ eErr_SEQ_FEAT_LocusTagProductMismatch
@ eErr_SEQ_FEAT_UnknownFeatureQual
@ eErr_SEQ_FEAT_TranscriptMismatches
@ eErr_SEQ_FEAT_IncorrectQualifierCapitalization
@ eErr_SEQ_FEAT_InvalidNumberQualifier
@ eErr_SEQ_FEAT_FeatureInsideGap
@ eErr_SEQ_FEAT_InvalidRNAFeature
@ eErr_SEQ_FEAT_tRNArange
@ eErr_SEQ_FEAT_GeneIdMismatch
@ eErr_SEQ_FEAT_MissingMRNAproduct
@ eErr_SEQ_FEAT_tRNAmRNAmixup
@ eErr_SEQ_FEAT_UndesiredProteinName
@ eErr_SEQ_FEAT_MrnaTransFail
@ eErr_SEQ_FEAT_InvalidInferenceValue
@ eErr_SEQ_FEAT_GeneXrefNeeded
@ eErr_SEQ_FEAT_InvalidType
@ eErr_SEQ_FEAT_SerialInComment
@ eErr_SEQ_FEAT_BadTrailingCharacter
@ eErr_SEQ_FEAT_IntervalBeginsOrEndsInGap
@ eErr_SEQ_FEAT_ProteinNameEndsInBracket
@ eErr_SEQ_FEAT_BadInternalCharacter
@ eErr_SEQ_FEAT_BadProteinName
@ eErr_SEQ_FEAT_MissingLocation
@ eErr_SEQ_FEAT_ExceptionMissingText
@ eErr_SEQ_FEAT_BadAnticodonCodon
@ eErr_SEQ_FEAT_BadTrailingHyphen
@ eErr_SEQ_FEAT_OldLocusTagMismtach
@ eErr_SEQ_FEAT_PseudoRnaViaGeneHasProduct
@ eErr_SEQ_FEAT_DeletedEcNumber
@ eErr_SEQ_FEAT_FeatureIsMostlyNs
@ eErr_SEQ_FEAT_InvalidMatchingReplace
@ eErr_INTERNAL_Exception
@ eErr_SEQ_FEAT_BadEcNumberFormat
@ eErr_SEQ_FEAT_BothStrands
@ eErr_SEQ_FEAT_ExceptionProblem
@ eErr_SEQ_FEAT_RedundantFields
@ eErr_SEQ_FEAT_ColdShockProteinProblem
@ eErr_SEQ_FEAT_TrnaCodonWrong
@ eErr_SEQ_FEAT_NoNameForProtein
@ eErr_SEQ_FEAT_RptUnitRangeProblem
@ eErr_SEQ_FEAT_InvalidVariationReplace
@ eErr_SEQ_FEAT_SeqLocOrder
@ eErr_SEQ_FEAT_AnticodonStrandConflict
@ eErr_SEQ_FEAT_InvalidRepeatUnitLength
@ eErr_SEQ_FEAT_VectorContamination
@ eErr_SEQ_FEAT_AbuttingIntervals
@ eErr_SEQ_FEAT_EcNumberInProteinComment
@ eErr_SEQ_FEAT_UnnecessaryCitPubEquiv
@ eErr_SEQ_FEAT_PartialProblem
@ eErr_SEQ_FEAT_RegulatoryClassOtherNeedsNote
@ eErr_SEQ_FEAT_MiscFeatureNeedsNote
@ eErr_SEQ_FEAT_FocusOnBioSourceFeature
@ eErr_SEQ_FEAT_PolyAsignalNotRange
@ eErr_SEQ_DESCR_BioSourceNeedsFocus
@ eErr_SEQ_FEAT_BadTrnaCodon
@ eErr_SEQ_FEAT_FeatureCrossesGap
@ eErr_SEQ_FEAT_SelfReferentialProduct
@ eErr_SEQ_FEAT_GapFeatureProblem
@ eErr_SEQ_FEAT_HypotheticalProteinMismatch
@ eErr_SEQ_FEAT_MissingGeneXref
@ eErr_SEQ_FEAT_RecombinationClassOtherNeedsNote
@ eErr_SEQ_FEAT_MissingExceptionFlag
bool IsOrganismEukaryote() const
int GetGenCode(int def=1) const
bool IsSkippable(void) const
bool IsKnownGap(size_t offset)
bool IsGap(size_t offset)
bool IsUnknownGap(size_t offset)
map< size_t, EGapType > TGapTypeMap
CGapCache(const CSeq_loc &loc, CBioseq_Handle bsh)
@Gb_qual.hpp User-defined methods of the data storage class.
static bool IsLegalMobileElementValue(const string &val)
static bool IsValidPseudogeneValue(const string &val)
static bool IsValidRptTypeValue(const string &val)
static const TLegalRecombinationClassSet & GetSetOfLegalRecombinationClassValues(void)
static const string & GetNcbieaa(int id)
static string IndexToCodon(int index)
static int CodonToIndex(char base1, char base2, char base3)
static const CGenetic_code_table & GetCodeTable(void)
CRef< feature::CFeatTree > GetFeatTreeFromCache(const CSeq_loc &loc, CScope &scope)
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
bool IsSuppressed(void) const
bool x_IsIntronShort(bool pseudo)
CMRNAValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
void x_ValidateMrnaGene()
CConstRef< CSeq_feat > m_Gene
void x_ValidateCommonMRNAProduct()
CPeptideValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
void x_ValidatePeptideOnCodonBoundary()
CConstRef< CSeq_feat > m_CDS
void x_ValidateSeqFeatLoc() override
void x_ValidateSeqFeatLoc() override
void x_ValidateECNumbers()
void x_ValidateProteinName(const string &prot_name)
void x_ReportUninformativeNames()
void x_ValidateMolinfoPartials()
static EECNumberFileStatus GetECNumAmbiguousStatus()
static EECNumberFileStatus GetECNumSpecificStatus()
@ eECFile_not_found
File was not found in expected directory.
static bool IsECNumberSplit(const string &old_ecno)
static bool IsValidECNumberFormat(const string &ecno)
Verify correct form of EC number.
static EECNumberFileStatus GetECNumDeletedStatus()
EECNumberStatus
Enzyme Commission number status.
@ eEC_replaced
Obsolete synonym for some other EC number.
@ eEC_unknown
Unrecognized; possibly malformed.
@ eEC_deleted
Withdrawn, with no (single?) replacement.
static EECNumberFileStatus GetECNumReplacedStatus()
static EECNumberStatus GetECNumberStatus(const string &ecno)
Determine an EC number's validity and specificity.
void x_ValidateTrnaOverlap()
void x_ValidateRnaTrans()
void x_ValidateRnaProductType()
void x_ValidateTrnaCodons()
void x_ReportRNATranslationProblems(size_t problems, size_t mismatches)
void x_ValidateAnticodon(const CSeq_loc &anticodon)
void x_ValidateTrnaData()
void x_ValidateTrnaType()
void x_ValidateRnaProduct(bool feat_pseudo, bool pseudo)
@RNA_ref.hpp User-defined methods of the data storage class.
static string GetRnaTypeName(const CRNA_ref::EType rna_type)
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
bool IsLegalQualifier(EQualifier qual) const
Test wheather a certain qualifier is legal for the feature.
@ eQual_mobile_element_type
static bool AllowStrandBoth(ESubtype subtype)
ESubtype GetSubtype(void) const
string GetKey(EVocabulary vocab=eVocabulary_full) const
const TQualifiers & GetMandatoryQualifiers(void) const
Get the list of all mandatory qualifiers for the feature.
static std::pair< EQualifier, CTempString > GetQualifierTypeAndValue(CTempString qual)
@ eSubtype_transit_peptide
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
@ eSubtype_mobile_element
static EQualifier GetQualifierType(CTempString qual)
convert qual string to enumerated value
static CTempString GetQualifierAsString(EQualifier qual)
Convert a qualifier from an enumerated value to a string representation or empty if not found.
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
static const vector< string > & GetRegulatoryClassList()
namespace ncbi::objects::
static bool IsExceptionTextInLegalList(const string &exception_text, bool allow_refseq)
Indicates whether this specific text occurs in the list of legal exceptions.
const CGene_ref * GetGeneXref(void) const
See related function in util/feature.hpp.
static bool IsExceptionTextRefSeqOnly(const string &exception_text)
Indicates whether this specific text is a RefSeq-only exception.
static TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)
static bool x_IsMostlyNs(const CSeq_loc &loc, CBioseq_Handle bsh)
CBioseq_Handle x_GetFeatureProduct(bool look_far, bool &is_far)
EDiagSev x_SeverityForConsensusSplice()
void x_ValidateBothStrands()
void ValidateCharactersInField(string value, string field_name)
void PostErr(EDiagSev sv, EErrType et, const string &msg)
void x_ValidateLabelVal(const string &val)
CSingleFeatValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
void x_ReportAcceptorSpliceSiteReadErrors(const CSpliceProblems::TSpliceProblem &problem, const string &label)
void x_ValidateRptUnitVal(const string &val, const string &key)
void x_ValidateGeneXRef()
void x_ValidateReplaceQual(const string &key, const string &qual_str, const string &val)
void x_CheckForNonAsciiCharacters()
CBioseq_Handle x_GetBioseqByLocation(const CSeq_loc &loc)
void x_ValidateSeqFeatDataType()
CBioseq_Handle m_ProductBioseq
static bool s_IsPseudo(const CSeq_feat &feat)
virtual bool x_ReportOrigProteinId()
CBioseq_Handle m_LocationBioseq
void x_ValidateCompareVal(const string &val)
void x_ValidateRptUnitSeqVal(const string &val, const string &key)
void x_ValidateExtUserObject()
void x_ValidateSeqFeatProduct()
void x_ReportDonorSpliceSiteReadErrors(const CSpliceProblems::TSpliceProblem &problem, const string &label)
void x_ValidateImpFeatQuals()
static bool s_GeneRefsAreEquivalent(const CGene_ref &g1, const CGene_ref &g2, string &label)
void x_ReportECNumFileStatus()
void x_ValidateGbQual(const CGb_qual &qual)
void x_ValidateGeneFeaturePair(const CSeq_feat &gene)
void x_ValidateOldLocusTag(const string &old_locus_tag)
virtual void x_ValidateFeatComment()
void x_ValidateNonImpFeat()
bool x_HasNamedQual(const string &qual_name)
void x_ValidateLocusTagGeneralMatch(CConstRef< CSeq_feat > gene)
static bool x_HasSeqLocBond(const CSeq_feat &feat)
void x_ReportPseudogeneConflict(CConstRef< CSeq_feat > gene)
void x_ValidateRptUnitRangeVal(const string &val)
void x_ValidateFeatPartialness()
static bool x_BioseqHasNmAccession(CBioseq_Handle bsh)
void ValidateSplice(bool gene_pseudo, bool check_all)
void x_ReportSpliceProblems(const CSpliceProblems &problems, const string &label)
bool x_AllowFeatureToMatchGapExactly()
static TSeqPos x_FindStartOfGap(CBioseq_Handle bsh, TSeqPos pos, CScope *scope)
static size_t x_CalculateLocationGaps(CBioseq_Handle bsh, const CSeq_loc &loc, vector< TSeqPos > &gap_starts)
virtual void x_ValidateSeqFeatLoc()
virtual void x_ValidateExceptText(const string &text)
static bool s_BioseqHasRefSeqThatStartsWithPrefix(CBioseq_Handle bsh, string prefix)
@ eLocationGapInternalIntervalEndpointInGap
@ eLocationGapCrossesUnknownGap
@ eLocationGapContainedInGapOfNs
@ eLocationGapContainedInGap
@ eLocationGapFeatureMatchesGap
static void x_LocHasStrandBoth(const CSeq_loc &feat, bool &both, bool &both_rev)
void x_ValidateImpFeatLoc()
const TSpliceProblemList & GetDonorProblems() const
void CalculateSpliceProblems(const CSeq_feat &feat, bool check_all, bool pseudo, CBioseq_Handle loc_handle)
vector< TSpliceProblem > TSpliceProblemList
@ eSpliceSiteRead_WrongNT
bool IsExceptionUnnecessary() const
bool AreErrorsUnexpected() const
pair< size_t, TSeqPos > TSpliceProblem
const TSpliceProblemList & GetAcceptorProblems() const
const_iterator find(const key_type &key) const
Return a const_iterator pointing to the specified element, or to the end if the element is not found.
const_iterator end() const
Return the end of the controlled sequence.
vector< CSeq_feat_Handle > TSeq_feat_Handles
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Template class for iteration on objects of class C (non-medifiable version)
@ eInferenceValidCode_valid
static EInferenceValidCode ValidateInference(string inference, bool fetch_accession, CScope *scope=nullptr)
bool DoRubiscoTest() const
bool ReportSpliceAsError() const
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
const CBioSourceKind & BioSourceKind() const
bool IsRemoteFetch() const
CConstRef< CSeq_feat > GetmRNAGivenProduct(const CBioseq &seq)
bool DoesAnyFeatLocHaveGI() const
bool IsLocusTagGeneralMatch() const
bool IgnoreInferences() const
void ValidateDbxref(const CDbtag &xref, const CSerialObject &obj, bool biosource=false, const CSeq_entry *ctx=nullptr)
bool IsSerialNumberInComment(const string &comment)
bool IsFarSequence(const CSeq_id &id)
const CTSE_Handle & GetTSE_Handle()
bool ValidateInferenceAccessions() const
bool IsHugeFileMode() const
void IncrementPseudogeneCount()
CConstRef< CSeq_feat > GetCDSGivenProduct(const CBioseq &seq)
CBioseq_Handle GetLocalBioseqHandle(const CSeq_id &id)
bool x_IsFarFetchFailure(const CSeq_loc &loc)
bool IsGenomeSubmission() const
void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void IncrementGeneXrefCount()
bool IsFarFetchCDSproducts() const
bool IsValidateExons() const
size_t GetCumulativeInferenceCount(void) const
bool IgnoreExceptions() const
void SetFarFetchFailure()
bool IsRefSeqConventions() const
SValidatorContext & SetContext()
bool IsIndexerVersion() const
CGeneCache & GetGeneCache()
bool IsSmallGenomeSet() const
bool HasRefSeq(void) const
void ValidateSeqLoc(const CSeq_loc &loc, const CBioseq_Handle &seq, bool report_abutting, const string &prefix, const CSerialObject &obj, bool lowerSev=false)
bool IsFarFetchMRNAproducts() const
bool IsTransgenic(const CBioSource &bsrc)
void ValidatePubdesc(const CPubdesc &pub, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
#define MAKE_CONST_SET(name, type,...)
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static void chk(int check, const char *fmt,...)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * expected[]
static const char * str(char *buf, int n)
Public API for finding the gene(s) on a given feature using the same criteria as the flatfile generat...
vector< TGoTermError > GetGoTermErrors(const CSeq_feat &feat)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error â guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
const string AsFastaString(void) const
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
virtual void WriteAsFasta(ostream &out) const
Implement serializable interface.
string GetLabel(const CSeq_id &id)
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
TSeqPos GetStop(ESeqLocExtremes ext) const
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
ELocationInFrame IsLocationInFrame(const CSeq_feat_Handle &cds, const CSeq_loc &loc)
Determines whether location loc is in frame with coding region cds.
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
@ eLocationInFrame_InFrame
@ eLocationInFrame_BadStart
@ eLocationInFrame_BadStop
@ eLocationInFrame_BadStartAndStop
@ fFGL_Content
Include its content if there is any.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
bool IsValid(const CSeq_point &pt, CScope *scope)
Checks that point >= 0 and point < length of Bioseq.
Int8 TestForOverlapEx(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, CScope *scope=0, TOverlapFlags flags=fOverlap_Default)
Updated version of TestForOverlap64().
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
CSeq_loc * SeqLocRevCmpl(const CSeq_loc &loc, CScope *scope)
Get reverse complement of the seq-loc (?)
@ eSeqlocPartial_Internal
@ eSeqlocPartial_Complete
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Interval
at least one pair of intervals must overlap
@ eOverlap_Contained
2nd contained within 1st extremes
@ eOverlap_Subset
2nd is a subset of 1st ranges
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
CConstRef< CSeq_feat > GetOverlappingGene(const CSeq_loc &loc, CScope &scope, ETransSplicing eTransSplicing=eTransSplicing_Auto)
CConstRef< CSeq_feat > GetOverlappingCDS(const CSeq_loc &loc, CScope &scope)
vector< TFeatScore > TFeatScores
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)
Get bioseq handle for sequence withing one TSE.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
bool IsSetExcept(void) const
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TClass GetClass(void) const
const TInst_Ext & GetInst_Ext(void) const
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
bool IsSetInst_Ext(void) const
bool IsSetDbxref(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
bool IsSetExcept_text(void) const
TInst_Length GetInst_Length(void) const
const string & GetExcept_text(void) const
bool IsSetInst_Repr(void) const
bool IsSetClass(void) const
TInst_Repr GetInst_Repr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
const CSeq_feat::TDbxref & GetDbxref(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
const TId & GetId(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
const TInst & GetInst(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
CSeqMap::ESegmentType GetType(void) const
bool IsUnknownLength(void) const
return true if current segment is a gap of unknown length
TSeqPos GetPosition(void) const
return position of current segment in sequence
TSeqPos GetLength(void) const
return length of current segment
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
ENa_strand GetStrand(void) const
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
char Char
Alias for char.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static SIZE_TYPE FindWord(const CTempString str, const CTempString word, ECase use_case=eCase, EDirection direction=eForwardSearch)
Find given word in the string.
static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case sensitive search.
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static string & ToLower(string &str)
Convert string to lower case â string& version.
@ eReverseSearch
Search in a backward direction.
@ eNocase
Case insensitive compare.
static const char label[]
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetIs_focus(void) const
to distinguish biological focus Check if a value has been assigned to Is_focus data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetSyn(void) const
synonyms for locus Check if a value has been assigned to Syn data member.
const TSyn & GetSyn(void) const
Get the Syn member data.
const TDesc & GetDesc(void) const
Get the Desc member data.
bool IsSetPseudo(void) const
pseudogene Check if a value has been assigned to Pseudo data member.
bool CanGetLocus(void) const
Check if it is safe to call GetLocus method.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool CanGetLocus_tag(void) const
Check if it is safe to call GetLocus_tag method.
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
bool IsSetAllele(void) const
Official allele designation Check if a value has been assigned to Allele data member.
bool CanGetAllele(void) const
Check if it is safe to call GetAllele method.
bool CanGetDesc(void) const
Check if it is safe to call GetDesc method.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
const TAllele & GetAllele(void) const
Get the Allele member data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
const TDb & GetDb(void) const
Get the Db member data.
const TStr & GetStr(void) const
Get the variant data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool CanGetTaxname(void) const
Check if it is safe to call GetTaxname method.
EProcessed
processing status
const TName & GetName(void) const
Get the Name member data.
bool IsSetEc(void) const
E.C.
const TEc & GetEc(void) const
Get the Ec member data.
@ eProcessed_signal_peptide
@ eProcessed_transit_peptide
const TPub & GetPub(void) const
Get the variant data.
list< CRef< CPub > > TPub
bool IsPub(void) const
Check if variant Pub is selected.
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
TType GetType(void) const
Get the Type member data.
TNcbi8aa GetNcbi8aa(void) const
Get the variant data.
const TAa & GetAa(void) const
Get the Aa member data.
const TCodon & GetCodon(void) const
Get the Codon member data.
bool IsSetAa(void) const
Check if a value has been assigned to Aa data member.
bool IsTRNA(void) const
Check if variant TRNA is selected.
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
EType
type of RNA feature
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
TNcbieaa GetNcbieaa(void) const
Get the variant data.
bool IsGen(void) const
Check if variant Gen is selected.
TIupacaa GetIupacaa(void) const
Get the variant data.
bool CanGetAnticodon(void) const
Check if it is safe to call GetAnticodon method.
const TGen & GetGen(void) const
Get the variant data.
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
bool IsSetClass(void) const
for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...
E_Choice Which(void) const
Which variant is currently selected.
const TExt & GetExt(void) const
Get the Ext member data.
const TTRNA & GetTRNA(void) const
Get the variant data.
const TClass & GetClass(void) const
Get the Class member data.
@ e_not_set
No variant selected.
@ e_Name
for naming "other" type
@ eType_scRNA
will become ncRNA, with RNA-gen.class = scRNA
@ eType_snoRNA
will become ncRNA, with RNA-gen.class = snoRNA
@ eType_snRNA
will become ncRNA, with RNA-gen.class = snRNA
bool CanGetDbxref(void) const
Check if it is safe to call GetDbxref method.
const TVal & GetVal(void) const
Get the Val member data.
const TKey & GetKey(void) const
Get the Key member data.
bool IsSetLoc(void) const
original location string Check if a value has been assigned to Loc data member.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
vector< CRef< CDbtag > > TDbxref
const TPub & GetPub(void) const
Get the variant data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsBond(void) const
Check if variant Bond is selected.
bool IsProt(void) const
Check if variant Prot is selected.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsImp(void) const
Check if variant Imp is selected.
const TCit & GetCit(void) const
Get the Cit member data.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
const TLocation & GetLocation(void) const
Get the Location member data.
E_Choice
Choice variants.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsPub(void) const
Check if variant Pub is selected.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
bool CanGetVal(void) const
Check if it is safe to call GetVal method.
bool IsHet(void) const
Check if variant Het is selected.
bool IsSetExp_ev(void) const
Check if a value has been assigned to Exp_ev data member.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool CanGetExcept_text(void) const
Check if it is safe to call GetExcept_text method.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
const TProduct & GetProduct(void) const
Get the Product member data.
bool IsSetQual(void) const
Check if a value has been assigned to Qual data member.
bool CanGetExcept(void) const
Check if it is safe to call GetExcept method.
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
const TComment & GetComment(void) const
Get the Comment member data.
bool IsSetCit(void) const
citations for this feature Check if a value has been assigned to Cit data member.
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
const TGene & GetGene(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
TPartial GetPartial(void) const
Get the Partial member data.
const TProt & GetProt(void) const
Get the variant data.
TExcept GetExcept(void) const
Get the Except member data.
const TXref & GetXref(void) const
Get the Xref member data.
vector< CRef< CSeqFeatXref > > TXref
vector< CRef< CGb_qual > > TQual
const TQual & GetQual(void) const
Get the Qual member data.
const TRna & GetRna(void) const
Get the variant data.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
bool IsSetVal(void) const
Check if a value has been assigned to Val data member.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
const TLoc & GetLoc(void) const
Get the Loc member data.
bool IsRna(void) const
Check if variant Rna is selected.
TExp_ev GetExp_ev(void) const
Get the Exp_ev member data.
const TImp & GetImp(void) const
Get the variant data.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ e_Het
cofactor, prosthetic grp, etc, bound to seq
@ e_Region
named region (globin locus)
@ e_Seq
to annotate origin from another seq
@ e_Txinit
transcription initiation
@ e_Num
a numbering system
@ e_Pub
publication applies to this seq
@ e_User
user defined structure
@ e_Rsite
restriction site (for maps really)
@ e_Comment
just a comment
@ e_Non_std_residue
non-standard residue here in seq
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
const TName & GetName(void) const
Get the Name member data.
ENa_strand
strand of nucleic acid
TFrom GetFrom(void) const
Get the From member data.
bool IsGeneral(void) const
Check if variant General is selected.
bool CanGetName(void) const
Check if it is safe to call GetName method.
E_Choice Which(void) const
Which variant is currently selected.
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TGeneral & GetGeneral(void) const
Get the variant data.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
@ eNa_strand_both_rev
in reverse orientation
@ eNa_strand_both
in forward orientation
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_Tpg
Third Party Annot/Seq Genbank.
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
TRepr GetRepr(void) const
Get the Repr member data.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
bool CanGetBiomol(void) const
Check if it is safe to call GetBiomol method.
ERepr
representation class
const TInst & GetInst(void) const
Get the Inst member data.
const TSource & GetSource(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
bool IsDelta(void) const
Check if variant Delta is selected.
const TExt & GetExt(void) const
Get the Ext member data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
const TDelta & GetDelta(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
const Tdata & Get(void) const
Get the member data.
list< CRef< CDelta_seq > > Tdata
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
@ e_Loc
point to a sequence
static void text(MDB_val *v)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
#define FOR_EACH_GBQUAL_ON_FEATURE
#define FOR_EACH_GBQUAL_ON_SEQFEAT(Itr, Var)
FOR_EACH_GBQUAL_ON_SEQFEAT EDIT_EACH_GBQUAL_ON_SEQFEAT.
#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)
FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.
#define FOR_EACH_CHAR_IN_STRING(Itr, Var)
FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.
bool ContainsSgml(const string &str)
static string s_AsciiString(const string &src)
static bool s_LocationStrandsIncompatible(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
CSingleFeatValidator * FeatValidatorFactory(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
bool s_HasNamedQual(const CSeq_feat &feat, const string &qual)
const string kInferenceMessage[]
static bool s_StringConsistsOf(string str, string consist)
static string GetGeneticCodeName(int gcode)
const string kOrigProteinId
int s_LegalNcbieaaValues[]
static const char *const sc_BadProtNameText[]
CStaticArraySet< const char *, PCase_CStr > TBadProtNameSet
static bool s_IsBioseqPartial(CBioseq_Handle bsh)
const char * GetAAName(unsigned char aa, bool is_ascii)
DEFINE_STATIC_ARRAY_MAP(TBadProtNameSet, sc_BadProtName, sc_BadProtNameText)
static const char * kAANames[]
static string s_GetGeneRefFields(const CGene_ref &gene, int field)
bool HasGeneIdXref(const CMappedFeat &sf, const CObject_id &tag, bool &has_parent_gene_id)
static bool s_RptUnitIsBaseRange(string str, TSeqPos &from, TSeqPos &to)
static bool xf_IsDeltaLitOnly(CBioseq_Handle bsh)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
bool CheckIntronAcceptor(ENa_strand strand, TConstSpliceSite acceptor)
bool CheckIntronSpliceSites(ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor)
bool CheckIntronDonor(ENa_strand strand, TConstSpliceSite donor)
Selector used in CSeqMap methods returning iterators.
atomic_bool CheckECNumFileStatus
size_t GetMRNATranslationProblems(const CSeq_feat &feat, size_t &mismatches, bool ignore_exceptions, CBioseq_Handle nuc, CBioseq_Handle rna, bool far_fetch, bool is_gpipe, bool is_genomic, CScope *scope)
@ eMRNAProblem_UnnecessaryException
@ eMRNAProblem_UnableToFetch
@ eMRNAProblem_TranscriptLenLess
@ eMRNAProblem_PolyATail95
@ eMRNAProblem_TranscriptLenMore
@ eMRNAProblem_ProductReplaced
@ eMRNAProblem_ErroneousException
@ eMRNAProblem_PolyATail100
const int InferenceAccessionCutoff
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4