Overlaps(
const CSeq_feat& cds)
const;
72 boolHasMatch()
const;
74 boolOkWithoutCds(
boolisGenbank =
false)
const;
151 for(
auto id: bsh.
GetId()) {
168 if((*it)->IsSetSubtype() && (*it)->IsSetName() && !
NStr::IsBlank((*it)->GetName())) {
176 if(
source.IsSetLineage()) {
177 stringlineage =
source.GetLineage();
188 if(
source.IsSetDivision()) {
189 stringdiv =
source.GetDivision();
195 if(
source.IsSetGenome()) {
230 if(appropriate_parent) {
249}
catch(
constexception&) {
273 if(appropriate_parent) {
278}
catch(
constexception& e) {
280 string(
"Exception while validating bioseq. EXCEPTION: ") +
298 const string& db = dbt.
GetDb();
311 const char& ch = *itr;
312 if(ch ==
'|'|| ch ==
',')
returnch;
320 for(
size_t i= 0;
i<
id.length();
i++) {
322 return id.c_str()[
i];
332 const char& ch = *itr;
333 if(ch ==
'|'|| ch ==
',')
returnch;
347 "BioseqFind ("+
id.AsFastaString() +
348 ") unable to find itself - possible internal error",
ctx);
360 "BioseqFind ("+
id.AsFastaString() +
361 ") unable to find itself - possible internal error",
ctx);
365 "SeqID "+
id.AsFastaString() +
366 " is present on multiple Bioseqs in record",
ctx);
370 "BioseqFind ("+
id.AsFastaString() +
371 ") unable to find itself - possible internal error",
ctx);
377 switch(
id.Which()) {
383 "TPA record "+
ctx.GetId().front()->AsFastaString() +
384 " should have Seq-hist.assembly for PRIMARY block",
395 if(badch !=
'\0') {
397 "Bad character '"+
string(1, badch) +
"' in accession '"+ acc +
"'",
ctx);
404 "Bad accession "+ acc,
ctx);
415 const string& name = tsid->
GetName();
417 if(
isspace((
unsigned char)(*s))) {
420 "Seq-id.name '"+ name +
"' should be a single " 421 "word without any spaces",
ctx);
430 if(badch !=
'\0') {
432 "Bad character '"+
string(1, badch) +
"' in accession '"+ acc +
"'",
ctx);
434 size_tnum_letters = 0;
435 size_tnum_digits = 0;
436 size_tnum_underscores = 0;
437 boolbad_id_chars =
false;
440 boolletter_after_digit =
false;
446 for(;
i< acc.length(); ++
i) {
447 if(
isupper((
unsigned char)acc[
i])) {
449}
else if(
isdigit((
unsigned char)acc[
i])) {
451}
else if(acc[
i] ==
'_') {
453 if(num_digits > 0 || num_underscores > 1) {
454letter_after_digit =
true;
457bad_id_chars =
true;
461 if(letter_after_digit || bad_id_chars) {
463 "Bad accession "+ acc,
ctx);
464}
else if(is_NZ && (num_letters == 4 || num_letters == 6) &&
465(num_digits >= 8 && num_digits <= 11) && num_underscores == 0) {
469}
else if(num_letters == 2 &&
470(num_digits == 6 || num_digits == 8 || num_digits == 9) &&
471num_underscores == 1) {
473}
else if(num_letters == 4 && num_digits == 10 &&
ctx.IsNa()) {
476 "Bad accession "+ acc,
ctx);
490 if(!
id.IsDdbj() ||
492 string msg=
"Missing accession for "+
id.AsFastaString();
501 "Seq-id type not handled",
ctx);
505 if(
id.GetGi() <=
ZERO_GI) {
507 "Invalid GI number",
ctx);
511 if(!
id.GetGeneral().IsSetDb() ||
NStr::IsBlank(
id.GetGeneral().GetDb())) {
514 if(
id.GetGeneral().IsSetDb()) {
515 const CDbtag& dbt =
id.GetGeneral();
516 size_tdblen = dbt.
GetDb().length();
528 if(dblen > max_dblen) {
535 if(longer_general) {
540 if(idlen > maxlen && !
m_Imp.
IsGI()) {
565 if(badch ==
'\0'&& dbt.
IsSetDb()) {
569 if(badch !=
'\0') {
571 "Bad character '"+
string(1, badch) +
"' in sequence ID '"+
id.AsFastaString() +
"'",
ctx);
586 if(
id.IsLocal() &&
id.GetLocal().IsStr()) {
587 const string& acc =
id.GetLocal().GetStr();
589 if(badch !=
'\0') {
591 "Bad character '"+
string(1, badch) +
"' in local ID '"+ acc +
"'",
ctx);
596 if(
id.
IsPdb()) {
601 if(chain_id.size() == 1 && chain_id[0] == chain) {
603}
else if(
islower(chain) && chain_id.size() == 2
604&& chain_id[0] == chain_id[1]
605&& chain_id[0] ==
toupper(chain)) {
607}
else if(chain ==
'|'&& chain_id ==
"VB") {
611 "PDB Seq-id contains mismatched \'chain\' and" 612 " \'chain-id\' slots",
ctx);
623 if(! IsNCBIFILESeqId(**
i)) {
625(*i)->GetLabel(&
label);
626 if(
label.length() > 40) {
628 "Sequence ID is unusually long ("+
640 constlist< string > *extra_acc =
nullptr;
642 switch(desc.
Which()) {
673 boolfound_good =
false;
681 "The only ids on this Bioseq will be stripped during ID load", seq);
691 "No ids on a Bioseq", seq);
700 boolhas_gi =
false;
701 boolis_lrg =
false;
702 boolhas_ng =
false;
703 boolwgs_tech_needs_wgs_accession =
false;
704 boolis_segset_accession =
false;
705 boolhas_wgs_general =
false;
706 boolis_eb_db =
false;
707 boollonger_general =
false;
710 if((*i)->IsOther() || (*i)->IsEmbl() || (*i)->IsTpe()) {
711longer_general =
true;
719 if((*i)->IsGeneral() && (*i)->GetGeneral().IsSetDb()) {
724has_wgs_general =
true;
726}
else if((*i)->IsOther() && (*i)->GetOther().IsSetAccession()) {
727 const string& acc = (*i)->GetOther().GetAccession();
730wgs_tech_needs_wgs_accession =
true;
734wgs_tech_needs_wgs_accession =
true;
736}
else if((*i)->IsEmbl() && (*i)->GetEmbl().IsSetAccession()) {
738}
else if((*i)->IsDdbj() && (*i)->GetDdbj().IsSetAccession()) {
743CBioseq::TId::const_iterator j;
744 for(j =
i, ++j; j != seq.
GetId().end(); ++j) {
747os <<
"Conflicting ids on a Bioseq: (";
748(**i).WriteAsFasta(os);
750(**j).WriteAsFasta(os);
757 if((*i)->IsGenbank() || (*i)->IsEmbl() || (*i)->IsDdbj()) {
758wgs_tech_needs_wgs_accession =
true;
766is_segset_accession =
true;
770 if(is_lrg && ! has_ng) {
772 "LRG sequence needs NG_ accession", seq);
777 boolis_wgs =
false;
778 unsigned intgi_count = 0;
779 unsigned intaccn_count = 0;
780 unsigned intlcl_count = 0;
783 switch((**k).Which()) {
794 if((*k)->IsGenbank() || (*k)->IsEmbl() || (*k)->IsDdbj()) {
802 "Accession "+ acc +
" has 0 version", seq);
815 "Missing accession for "+ tsid->
GetName(), seq);
829 string label= (*k)->AsFastaString();
831 "Missing identifier for "+
label, seq);
847 if(! mi || ! mi->IsSetTech() ||
852 "WGS accession should have Mol-info.tech of wgs", seq);
854}
else if(mi && mi->IsSetTech() &&
856wgs_tech_needs_wgs_accession &&
857! is_segset_accession &&
866 "Mol-info.tech of wgs should have WGS accession", seq);
871&& (! mi->IsSetBiomol()
875 "genomic RefSeq accession should use genomic or cRNA moltype",
880 if(mi && mi->IsSetBiomol()) {
881 switch(mi->GetBiomol()) {
894 "Molecule type (DNA) does not match biomol (RNA)", seq);
903 if(gi_count > 0 && accn_count == 0 && !
m_Imp.
IsPDB() &&
906 "No accession on sequence with gi number", seq);
908 if(gi_count > 0 && accn_count > 1) {
910 "Multiple accessions on sequence with gi number", seq);
953CEMBL_block::TKeywords::const_iterator keyword = embl_i->
GetEmbl().
GetKeywords().begin();
968 const string& primary_acc,
974 constlist<string>* extra_acc =
nullptr;
989primary_acc +
" used for both primary and" 990 " secondary accession", seq);
1015 boolhas_barcode_tech =
false;
1019has_barcode_tech =
true;
1022 boolhas_barcode_keyword =
false;
1026has_barcode_keyword =
true;
1030 if(has_barcode_keyword && ! has_barcode_tech) {
1032 "BARCODE keyword without Molinfo.tech barcode",
1036 if(has_barcode_tech && ! has_barcode_keyword && di) {
1038 "Molinfo.tech barcode without BARCODE keyword",
1043 "Sequence has both BARCODE and UNVERIFIED keywords",
1068 "Bioseq.mol is type nucleic acid", seq);
1076 "Non-linear topology set on protein", seq);
1082 "Protein not single stranded", seq);
1097 "Circular Bacteria or Archaea should be chromosome, or plasmid, or extrachromosomal", seq);
1110 "Bioseq.mol is type other", seq);
1180 boolis_wgs =
false;
1181 boolis_grc =
false;
1204sequence::CDeflineGenerator defline_generator;
1205 stringtitle = defline_generator.GenerateDefline(seq, *
m_Scope, sequence::CDeflineGenerator::fIgnoreExisting);
1213is_wgs =
IsWGS(bsh);
1215 boolis_gb =
false,
is_refseq =
false, is_ng =
false;
1218 const CSeq_id& sid = **sid_itr;
1219 switch(sid.
Which()) {
1231 if(acc ==
"NG_") {
1245 if(! is_wgs && ! is_grc)
1287 "No CdRegion in nuc-prot set points to this protein",
1293 boolis_complete =
false;
1298is_complete =
true;
1399 "BioProject entries not present on CON record", seq);
1402}
catch(
constexception& e) {
1403 if(
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
1405 string(
"Exception while validating BioseqContext. EXCEPTION: ") +
1426 "Orphaned stand-alone protein", seq);
1436 if(prot_feats.size() > 1) {
1439 "Protein sequence has multiple unprocessed protein features",
1440feat->GetOriginalFeature());
1447 "Expected submission citation is missing for this Bioseq", seq);
1457 for(
CFeat_CIfeat_ci(bsh, sel); feat_ci; ++feat_ci) {
1459 const CSeq_feat& matpeptide = feat_ci->GetOriginalFeature();
1466 if(matlen != prdlen) {
1468 "Mat_peptide does not match length of instantiated product",
1476 if(
len> prdlen) {
1484 if(m_res != p_res) {
1486 "Mismatch in mat_peptide ("+
string(1, (
char)m_res) +
") and instantiated product ("+ \
1512 if(pub.
IsSub()) {
1524 boolhas_cit_sub =
false;
1526 while(p && !has_cit_sub) {
1537 template<
classIterator,
classPredicate>
1540 while(iter1 != iter1_stop && iter2 != iter2_stop) {
1541 if(! pred(*iter1, *iter2)) {
1547 if(iter1 != iter1_stop || iter2 != iter2_stop) {
1559 const COrgMod& omd1 = *(om1);
1560 const COrgMod& omd2 = *(om2);
1570 if(chs1 == chs2)
return true;
1580 returndbt1->
Compare(*dbt2) == 0;
1599org2.
GetDb().begin(), org2.
GetDb().end(),
1683printf (
"Orgname not set!\n");
1685printf (
"Lineage not set!\n");
1701 if(start1 == stop2 + 1 || start2 == stop1 + 1) {
1719 if(start1 == stop2 + 1 || start2 == stop1 + 1) {
1737 if(start1 == stop2 + 1 || start2 == stop1 + 1) {
1758CCacheImpl::TFeatValue::const_iterator feat = rnas.begin();
1759 if(feat != rnas.end()) {
1761CCacheImpl::TFeatValue::const_iterator feat_prev = feat;
1763 for(; feat != rnas.end(); ++feat_prev, ++feat) {
1766feat->GetLocation(),
m_Scope)) {
1770 const CRNA_ref& tm = feat_prev->GetData().GetRna();
1771 const CRNA_ref& tr = feat->GetData().GetRna();
1775feat->GetLocation(),
m_Scope)) {
1777 "tRNA contained within tmRNA",
1778feat->GetOriginalFeature());
1784}
catch(
constexception& e) {
1785 if(
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
1787 string(
"Exception while validating RNA features. EXCEPTION: ") +
1804CCacheImpl::TFeatValue::const_iterator feat = biosrcs.begin();
1805 if(feat != biosrcs.end()) {
1811 "Source feature is full length, should be descriptor",
1812feat->GetOriginalFeature());
1817CCacheImpl::TFeatValue::const_iterator feat_prev = feat;
1819 for(; feat != biosrcs.end(); ++feat_prev, ++feat) {
1822 "Multiple full-length source features, should only be one if descriptor is transgenic",
1823feat->GetOriginalFeature());
1827feat->GetLocation(),
m_Scope)) {
1833 boolare_identical =
true;
1834 if(feat_prev->IsSetComment() && feat->IsSetComment()
1836are_identical =
false;
1838 const CBioSource& src_prev = feat_prev->GetData().GetBiosrc();
1839 const CBioSource& src = feat->GetData().GetBiosrc();
1842are_identical =
false;
1849are_identical =
false;
1854are_identical =
false;
1859 "Multiple equivalent source features should be combined into one multi-interval feature",
1860feat->GetOriginalFeature());
1864}
catch(
constexception& e) {
1865 if(
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
1867 string(
"Exception while validating source features. EXCEPTION: ") +
1880 if((*it)->IsGen() && (*it)->GetGen().IsSetCit()
1881&& ! (*it)->GetGen().IsSetCit()
1882&& ! (*it)->GetGen().IsSetJournal()
1883&& ! (*it)->GetGen().IsSetDate()
1884&& (*it)->GetGen().IsSetSerial_number()) {
1905CCacheImpl::TFeatValue::const_iterator feat = pubs.begin();
1906 if(feat != pubs.end()) {
1909 "Publication feature is full length, should be descriptor",
1910feat->GetOriginalFeature());
1913CCacheImpl::TFeatValue::const_iterator feat_prev = feat;
1915 if(feat_prev != pubs.end()) {
1919 for(; feat != pubs.end(); ++feat, ++feat_prev) {
1922 "Publication feature is full length, should be descriptor",
1923feat->GetOriginalFeature());
1926 boolare_identical =
true;
1927 if(feat_prev->IsSetComment() && feat->IsSetComment()
1929are_identical =
false;
1935are_identical =
false;
1939prev_label.swap(
label);
1944 if(are_identical) {
1946 "Multiple equivalent publication features should be combined into one multi-interval feature",
1947feat->GetOriginalFeature());
1951}
catch(
constexception& e) {
1952 if(
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
1954 string(
"Exception while validating pub features. EXCEPTION: ") +
1986 const CBioseq& seq,
constvector<CTempString>& labels)
1988 if(labels.size() <= 1) {
1996 static const charkWarningPrefix[] =
1997 "Multiple equivalent publications annotated on this sequence [";
1998 static const size_tkWarningPrefixLen =
sizeof(kWarningPrefix) - 1;
1999 static conststring::size_type kMaxSummaryLen = 100;
2004TLabelCount label_count;
2006 ITERATE(vector<CTempString>, label_it, labels) {
2007++label_count[*label_it];
2011vector<CTempString> sorted_dup_labels;
2012 ITERATE(TLabelCount, label_count_it, label_count) {
2013 intnum_appearances = label_count_it->second;
2014 _ASSERT(num_appearances > 0);
2015 if(num_appearances > 1) {
2016 const CTempString& dup_label = label_count_it->first;
2017sorted_dup_labels.push_back(dup_label);
2023 stringerr_msg = kWarningPrefix;
2024 ITERATE(vector<CTempString>, dup_label_it, sorted_dup_labels) {
2027err_msg.resize(kWarningPrefixLen);
2028 if(summary.
length() > kMaxSummaryLen) {
2029err_msg += summary.
substr(0, kMaxSummaryLen);
2048vector<int> serials;
2049vector<CTempString> published_labels;
2050vector<CTempString> unpublished_labels;
2065back_inserter(published_labels));
2067back_inserter(unpublished_labels));
2071 boolotherpub =
false;
2073 switch((*pub_it)->Which()) {
2075muid = (*pub_it)->GetMuid();
2078pmid = (*pub_it)->GetPmid();
2087 boolcollision =
false;
2089 if(muids_seen.
find(muid) != muids_seen.
end()) {
2092muids_seen.
insert(muid);
2096 if(pmids_seen.
find(pmid) != pmids_seen.
end()) {
2099pmids_seen.
insert(pmid);
2104 "Multiple publications with identical PubMed ID", *
ctx, *it);
2123 if((*id)->IsGi()) {
2124gi = (*id)->GetGi();
2136 if((*id)->IsGi()) {
2137 if(gi == (*id)->GetGi()) {
2139 "Replaced by gi ("+
2151 if((*id)->IsGi()) {
2152 if(gi == (*id)->GetGi()) {
2176 if(
id.Match(**it)) {
2191 switch(seqdata.
Which()) {
2235 if(
prot[
prot.size() - 1] ==
'*') {
2245 if(mi && mi->IsSetCompleteness()) {
2252}
catch(
conststd::exception&) {
2273mix.
Set().push_back(*it);
2345 if(
prev.IsSetExcept() &&
prev.GetExcept() &&
prev.IsSetExcept_text()) {
2356 for(
autoit : currP.
GetName()) {
2361 for(
autoit : prevP.
GetName()) {
2408 #define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var) \ 2409 ITERATE (CBioseq_Handle::TId, Itr, Var.GetId()) 2424 if(entry.
IsSeq()) {
2446 if((*it)->IsMolinfo() && (*it)->GetMolinfo().IsSetTech() && (*it)->GetMolinfo().GetTech() ==
CMolInfo::eTech_wgs) {
2467 if(entry.
IsSeq()) {
2559 if((*id)->IsPdb()) {
2610&& (*it)->GetSet().IsSetClass()
2619 if((*loc)->IsNull()) {
2623 if(locs.size() - nulls < parts.size()) {
2625 "Parts set contains too many Bioseqs", seq);
2627}
else if(locs.size() - nulls > parts.size()) {
2629 "Parts set does not contain enough Bioseqs", seq);
2635 size_t size= locs.size();
2636CSeg_ext::Tdata::const_iterator loc_it = locs.begin();
2637CBioseq_set::TSeq_set::const_iterator part_it = parts.begin();
2638 for(
size_t i= 0;
i<
size; ++
i) {
2640 if((*loc_it)->IsNull()) {
2644 if(! (*part_it)->IsSeq()) {
2646 "Parts set component is not Bioseq", seq);
2650 if(!
IsIdIn(loc_id, (*part_it)->GetSeq())) {
2652 "Segmented bioseq seq_ext does not correspond to parts " 2653 "packaging order", seq);
2661 ERR_POST_X(4,
"Seq-loc not for unique sequence");
2664 stringerr_msg =
"Unknown error:";
2665err_msg += x1.
what();
2668}
catch(std::exception& x2) {
2669 stringerr_msg =
"Unknown error:";
2670err_msg += x2.what();
2683 if(! inst.
IsSetExt())
return false;
2687 if(! (*iter)->IsLiteral())
continue;
2699 boolhas_gap =
false;
2702 if((*iter)->IsLiteral() &&
2703(! (*iter)->GetLiteral().IsSetSeq_data() || (*iter)->GetLiteral().GetSeq_data().IsGap())) {
2719 stringtitle = sequence::CDeflineGenerator().GenerateDefline(bsh);
2735 "Complete genome in title without complete flag set",
2744 "Circular topology without complete flag set",
ctx, *desc);
2752 "Title contains 'complete genome' but sequence has gaps", seq);
2766 if(! (*sg))
continue;
2783 if(! (*sg) )
continue;
2793 intgaptype = gap.
GetType();
2814 if(! (*sg))
continue;
2846 "WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence.", seq);
2855 "TSA submission includes wrong gap type. Gaps for TSA should be Assembly Gaps with linkage evidence.", seq);
2864 "Genome submission includes wrong gap type. Gaps for genomes should be Assembly Gaps with linkage evidence.", seq);
2904 boolhas_biosample =
false;
2905 boolhas_bioproject =
false;
2913has_biosample =
true;
2917has_bioproject =
true;
2929 for(
autoit : uo.
GetData()) {
2930 if(! it->GetLabel().IsStr()) {
2933 const string&
label= it->GetLabel().GetStr();
2936 const string&
str= it->GetData().GetStr();
2937 autofst =
str.find_first_of(
"0123456789");
2941 const string&
str= it->GetData().GetStr();
2942 autolst =
str.find_first_of(
"0123456789");
2946 if((fr != 0) && (to != 0)) {
2947 int df= to - fr + 1;
2960 if(! has_biosample && ! has_bioproject) {
2962 "WGS master lacks both BioSample and BioProject",
2964}
else if(! has_biosample) {
2966 "WGS master lacks BioSample",
2968}
else if(! has_bioproject) {
2970 "WGS master lacks BioProject",
2973 if(! has_biosample || ! has_bioproject) {
2981 boolonly_local =
true;
2982 boolis_NCACNTNW =
false;
2983 boolis_patent =
false;
2985 if(! (*id_it)->IsLocal()) {
2986only_local =
false;
2987 if((*id_it)->IsPatent()) {
2990is_NCACNTNW =
true;
2995 if(is_NCACNTNW || is_patent) {
2997}
else if(is_circular) {
2999}
else if(only_local) {
3021 if(vec[
i] ==
'N') {
3024 if(max_stretch < this_stretch) {
3025max_stretch = this_stretch;
3030 if(this_stretch >= 10) {
3034 if(vec.
size() > 20 &&
i> vec.
size() - 10) {
3040 if(max_stretch < this_stretch) {
3041max_stretch = this_stretch;
3046 if(max_stretch < this_stretch) {
3047max_stretch = this_stretch;
3072 if(max_stretch >= 15) {
3079 "Sequence has a stretch of at least 10 Ns within the first 20 bases", seq);
3084 "Sequence has a stretch of at least 10 Ns within the last 20 bases", seq);
3096 boolat_least_one =
false;
3098 for(
CSeqVector_CIsv_iter(vec); (sv_iter) && rval; ++sv_iter) {
3099 if(*sv_iter !=
'N') {
3102at_least_one =
true;
3106 return(rval && at_least_one);
3113 switch(seq_data.
Which()) {
3115vector<char>::const_iterator it = seq_data.
GetNcbi4na().
Get().begin();
3116 unsigned char mask= 0xf0;
3117 unsigned charshift = 4;
3118 for(
size_t n= 0;
n<
len;
n++) {
3119 unsigned charc = ((*it) &
mask) >> shift;
3135 for(
size_t n= 0;
n<
len&&
n< s.length();
n++) {
3136 if(s[
n] ==
'N') {
3149 for(
size_t n= 0;
n<
len;
n++) {
3150 if(s[
n] ==
'N') {
3167 for(
CSeqMap_CIseq_iter(bsh, sel); seq_iter; ++seq_iter) {
3168 switch(seq_iter.GetType()) {
3170 count+=
CountNs(seq_iter.GetData(), seq_iter.GetLength());
3203 boolis_first =
true;
3207 if((*iter)->IsLoc()) {
3210 if((*iter)->IsLiteral()) {
3267 intmax_stretch = 0;
3268 autoIsN = [](
charc) {
returnc ==
'N'; };
3270 for(
autobegin_it = find_if_not(begin(vec), end(vec), IsN);
3271begin_it != end(vec);) {
3272 autodistanceToEnd = distance(begin_it, end(vec));
3274 autointerval = (distanceToEnd > threshold) ? threshold : distanceToEnd;
3275 autoend_it = find_if(begin_it,
next(begin_it, interval), IsN);
3276 const autocurrent_stretch = distance(begin_it, end_it);
3277 if(current_stretch >= threshold) {
3281 if(current_stretch > max_stretch) {
3282max_stretch = current_stretch;
3284begin_it = find_if_not(end_it, end(vec), IsN);
3329 "Maximum contig length is "+
NStr::IntToString(*oMaxLength) +
" bases", seq);
3336 boolbegin_ambig =
false, end_ambig =
false;
3344 boolis_circular =
false;
3346is_circular =
true;
3367 "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases",
3372 "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases",
3387 TSeqPosnum_ns = 0, max_stretch = 0;
3390 intpct_n = (num_ns * 100) / seq.
GetLength();
3396 if(max_stretch >= 15) {
3402 "Sequence has a stretch of at least 10 Ns within the first 20 bases", seq);
3406 "Sequence has a stretch of at least 10 Ns within the last 20 bases", seq);
3412 intpct_n =
PctNs(bsh);
3428}
catch(exception&) {
3465vector<TSeqPos> gapPositions;
3474 for(; gap_it; ++gap_it) {
3479gapPositions.push_back(gp_start);
3480gapPositions.push_back(gp_end);
3487vector<TSeqPos> featPositions;
3493 for(; feat_it; ++feat_it) {
3504featPositions.push_back(ft_start);
3505featPositions.push_back(ft_end);
3512 intremaininig_gaps = (
int) gapPositions.size() / 2;
3513 intremaining_feats = (
int) featPositions.size() / 2;
3515 if(remaininig_gaps < 1 || remaining_feats < 1) {
3522 TSeqPosgap_start = gapPositions[gap_idx];
3524 TSeqPosgap_end = gapPositions[gap_idx];
3528 TSeqPosfeat_start = featPositions[feat_idx];
3530 TSeqPosfeat_end = featPositions[feat_idx];
3536 while(remaininig_gaps >= 0 && remaining_feats >= 0) {
3537 if(gap_end < feat_start) {
3538 if(remaininig_gaps <= 0) {
3541gap_start = gapPositions[gap_idx];
3543gap_end = gapPositions[gap_idx];
3546}
else if(feat_end < gap_start) {
3547 if(remaining_feats <= 0) {
3550feat_start = featPositions[feat_idx];
3552feat_end = featPositions[feat_idx];
3557 if(feat_start != gap_start || feat_end != gap_end) {
3560 if(remaininig_gaps <= 0) {
3563gap_start = gapPositions[gap_idx];
3565gap_end = gapPositions[gap_idx];
3568 if(remaining_feats <= 0) {
3571feat_start = featPositions[feat_idx];
3573feat_end = featPositions[feat_idx];
3581}
catch(
constexception&) {
3596 "Fuzzy length on "+ rpr +
" Bioseq", seq);
3603 "Invalid Bioseq length ["+
len+
"]", seq);
3617 "HTGS 2 raw seq has no gaps and no graphs", seq);
3632 if(inst.
IsAa()) {
3634 "Using a nucleic acid alphabet on a protein sequence",
3644 if(inst.
IsNa()) {
3646 "Using a protein alphabet on a nucleic acid",
3655 "Sequence alphabet not set",
3660 boolcheck_alphabet =
false;
3661 unsigned intfactor = 1;
3667check_alphabet =
true;
3687 "Sequence alphabet not set",
3692 if(calc_len % factor) {
3701 if(calc_len > data_len) {
3703 "Bioseq.seq_data too short ["+ data_len_str +
3704 "] for given length ["+ s_len +
"]", seq);
3706}
else if(calc_len < data_len) {
3708 "Bioseq.seq_data is larger ["+ data_len_str +
3709 "] than given length ["+ s_len +
"]", seq);
3712 if(check_alphabet) {
3713 unsigned inttrailingX = 0;
3715 boolleading_x =
false, found_lower =
false, cds_5_prime =
false;
3722 for(
CSeqVector_CIsv_iter(*sv), sv_res_iter(sv_res); (sv_iter) && (sv_res_iter); ++sv_iter, ++sv_res_iter) {
3728}
else if(res ==
'*'&& bsh.
IsAa()) {
3730}
else if(res ==
'-'&& bsh.
IsAa()) {
3738 if(++bad_cnt > 10) {
3740 "More than 10 invalid residues. Checking stopped",
3750found_lower =
true;
3752 string msg=
"Invalid";
3753 if(seq.
IsNa() && strchr (
"EFIJLOPQXZ", res) !=
NULL) {
3754 msg+=
" nucleotide";
3755}
else if(seq.
IsNa() && res ==
'U') {
3756 msg+=
" nucleotide";
3758 msg+=
" residue ";
3772}
else if(res ==
'-'|| sv->IsInGap(pos - 1)) {
3774}
else if(res ==
'*') {
3776}
else if(res ==
'X') {
3782 string msg=
"Invalid residue [";
3797 if(seq.
IsAa() && (leading_x || trailingX > 0)) {
3807cds_seq = cds_seq.substr(1);
3809cds_seq = cds_seq.substr(2);
3816 if(cds_seq.length() >= 3) {
3817 stringlastcodon = cds_seq.substr(cds_seq.length() - 3);
3826cds_5_prime =
true;
3838 "Sequence starts with leading X", seq);
3843 string msg=
"Sequence ends in "+
3845 if(trailingX > 1) {
3853 "Sequence contains lower-case characters", seq);
3856 if(terminations > 0 || dashes > 0) {
3873 stringprotein_label;
3879 if(! prots.empty()) {
3881prots[0].GetData().GetProt();
3883protein_label = first_prot.
GetName().front();
3887}
catch(
conststd::exception&) {
3891gene_label =
"gene?";
3894protein_label =
"prot?";
3898 if(gap_at_start && dashes == 1) {
3900 "gap symbol at start of protein sequence ("+ gene_label +
" - "+ protein_label +
")",
3902}
else if(gap_at_start) {
3904 "gap symbol at start of protein sequence ("+ gene_label +
" - "+ protein_label +
")",
3907 "["+
NStr::SizetToString(dashes - 1) +
"] internal gap symbols in protein sequence ("+ gene_label +
" - "+ protein_label +
")",
3911 "["+
NStr::SizetToString(dashes) +
"] internal gap symbols in protein sequence ("+ gene_label +
" - "+ protein_label +
")",
3916 if(terminations > 0) {
3918 msg+=
" ("+ gene_label +
" - "+ protein_label +
")";
3929 boolis_wgs =
IsWGS(bsh);
3933 boolhas_gap_char =
false;
3938 const size_trun_len_cutoff = ( is_wgs ? 20 : 100 );
3939 for(
CSeqVector_CIsv_iter(sv); (sv_iter); ++sv_iter, ++pos) {
3949has_gap_char =
true;
3955 if(run_len >= run_len_cutoff && start_pos > 1) {
3967 "Raw nucleotide should not contain gap characters", seq);
3979 stringid_test_label;
3996 if(seqlen > loclen) {
4001}
else if(seqlen < loclen) {
4014 constlist<CRef<CSeq_loc>>& locs = inst.
GetExt().
GetSeg().
Get();
4020list<CRef<CSeq_loc>>::const_iterator i2 = i1;
4021 for(++i2; i2 != locs.end(); ++i2) {
4029 if((**i1).IsWhole() && (**i2).IsWhole()) {
4032 "Segmented sequence has multiple references to "+
4037 "Segmented sequence has multiple references to "+
4038sid +
" that are not SEQLOC_WHOLE", seq);
4049 boolgot_partial =
false;
4051 if(! (*sd)->IsMolinfo() || ! (*sd)->GetMolinfo().IsSetCompleteness()) {
4055 switch((*sd)->GetMolinfo().GetCompleteness()) {
4057got_partial =
true;
4060 "Complete segmented sequence with MolInfo partial", seq);
4066 "No-left inconsistent with segmented SeqLoc",
4069got_partial =
true;
4074 "No-right inconsistent with segmented SeqLoc",
4077got_partial =
true;
4082 "No-ends inconsistent with segmented SeqLoc",
4085got_partial =
true;
4091 if(! got_partial) {
4093 "Partial segmented sequence without MolInfo partial", seq);
4124 if((*it)->IsSwissprot()) {
4136}
else if(
cmp> 0) {
4142 if(start1 < start2) {
4144}
else if(start2 < start1) {
4151 if(stop1 < stop2) {
4171}
else if((*sg)->IsLoc()) {
4172 const CSeq_id*
id= (*sg)->GetLoc().GetId();
4190 if(! loc.
IsInt()) {
4199far_loc->
SetInt().SetFrom(start - 2);
4200far_loc->
SetInt().SetTo(start - 1);
4209far_loc->
SetInt().SetFrom(stop + 1);
4210far_loc->
SetInt().SetTo(stop + 2);
4228 "Delta seq component should not be of type whole", seq);
4235 "Delta component is gi|0", seq);
4250 if(seq_len <= stop) {
4251 stringid_label =
id->AsFastaString();
4254+
") greater than length of "+ id_label
4259 stringid_label =
id->AsFastaString();
4261 "Scaffold points to some but not all of "+
4262id_label +
", excluded portion contains features", seq);
4266 "Unable to find far delta sequence component", seq);
4269}
catch(
conststd::exception&) {
4280 "-1 length on seq-loc of delta seq_ext", seq);
4283 if(loc_str.empty()) {
4288 "Short length (-1) on seq-loc ("+ loc_str +
") of delta seq_ext", seq);
4293 if(loc_len <= 10) {
4296 if(loc_str.empty()) {
4302 ") on seq-loc ("+ loc_str +
") of delta seq_ext", seq);
4310 if(loc_str.empty()) {
4314 "No length for Seq-loc ("+ loc_str +
") of delta seq-ext",
4324}
else if(seg.
IsLoc()) {
4342 "proximity ligation",
4360 boolis_unspec =
false;
4365 intlinktype = evidence.
GetType();
4366 if(linktype == 8) {
4416 "No CDelta_ext data for delta Bioseq", seq);
4419 boolany_tech_ok =
false;
4420 boolhas_gi =
false;
4423any_tech_ok =
true;
4425}
else if((*id_it)->IsGi()) {
4430 if(! any_tech_ok && seq.
IsNa()
4444 boollast_is_gap =
false;
4445 intprev_gap_linkage = -1;
4447 intgap_linkage = -1;
4449 size_tnum_gaps = 0;
4450 size_tnum_adjacent_gaps = 0;
4451 boolnon_interspersed_gaps =
false;
4453 intnum_gap_known_or_spec = 0;
4454 intnum_gap_unknown_unspec = 0;
4456vector<CConstRef<CSeq_loc> > delta_locs;
4462 "NULL pointer in delta seq_ext valnode (segment "+
4466 switch((**sg).Which()) {
4468 const CSeq_loc& loc = (**sg).GetLoc();
4470delta_locs.push_back(
tmp);
4474 if(! last_is_gap && !
first) {
4475non_interspersed_gaps =
true;
4477last_is_gap =
false;
4478prev_gap_linkage = -1;
4493 "Seq-lit of length 0 in delta chain", seq);
4498 if(! last_is_gap && !
first) {
4499non_interspersed_gaps =
true;
4501last_is_gap =
false;
4502prev_gap_linkage = -1;
4505vector<TSeqPos> badIdx;
4507 const string* ss =
nullptr;
4508 switch(
data.Which()) {
4510ss = &
data.GetIupacaa().Get();
4513ss = &
data.GetIupacna().Get();
4516ss = &
data.GetNcbieaa().Get();
4519 constvector<char>& c =
data.GetNcbistdaa().Get();
4520 ITERATE(vector<TSeqPos>, ci, badIdx) {
4522 "Invalid residue ["+
4532 ITERATE(vector<TSeqPos>, it, badIdx) {
4534 "Invalid residue ["+
4535ss->substr(*it, 1) +
"] at position ["+
4544 if(max_ns >= 0 && adjacent_ns >
unsigned(max_ns)) {
4563num_gap_unknown_unspec++;
4565num_gap_known_or_spec++;
4579 "First delta seq component is a gap", seq);
4583(prev_gap_type == gap_type ||
4584prev_gap_linkage != gap_linkage ||
4587++num_adjacent_gaps;
4596 "Gap of length 0 in delta chain", seq);
4599 "Gap of length 0 with unknown fuzz in delta chain", seq);
4604 "Gap of unknown length should have length 100", seq);
4607last_is_gap =
true;
4608prev_gap_type = gap_type;
4609prev_gap_linkage = gap_linkage;
4616 "CDelta_seq::Which() is e_not_set", seq);
4620 if(num_gap_unknown_unspec > 0 && num_gap_known_or_spec == 0) {
4621 if(num_gap_unknown_unspec > 1) {
4624 " Seq-gaps have unknown type and unspecified linkage", seq);
4627 "Single Seq-gap has unknown type and unspecified linkage", seq);
4642 if(non_interspersed_gaps && ! has_gi && mi &&
4656 "HTGS delta seq should have gaps between all sequence runs", seq);
4658 if(num_adjacent_gaps >= 1) {
4659 string msg= (num_adjacent_gaps == 1) ?
4660 "There is 1 adjacent gap in delta seq":
4662 " adjacent gaps in delta seq";
4672 "Last delta seq component is a gap", seq);
4676 if(num_gaps == 0 && mi) {
4681 "HTGS 2 delta seq has no gaps and no graphs", seq);
4686 if(delta_locs.size() > 1) {
4688vector<CConstRef<CSeq_loc>>::iterator it1 = delta_locs.begin();
4689vector<CConstRef<CSeq_loc>>::iterator it2 = it1;
4691 while(it2 != delta_locs.end()) {
4694 stringseq_label = (*it1)->GetId()->AsFastaString();
4700+
" on a Bioseq "+ seq_label,
4710 "Self-referential delta sequence", seq);
4719 if(delta_i->Empty()) {
4727 if(res ==
'N'&& ! sv.
IsInGap(pos - 1)) {
4729 "Ambiguous residue N is adjacent to a gap around position "+
NStr::SizetToString(pos + 1),
4734 if(delta_len > 0 && pos + delta_len <
len) {
4735 if(sv.
IsInGap(pos + delta_len - 1)) {
4737 if(res ==
'N'&& ! sv.
IsInGap(pos + delta_len)) {
4739 "Ambiguous residue N is adjacent to a gap around position "+
NStr::SizetToString(pos + delta_len + 1),
4747}
catch(
conststd::exception&) {
4756 boolhas_gi =
false;
4758 if((*id_it)->IsGi()) {
4771 intlinkevarray[13];
4772 for(
int i= 0;
i< 13;
i++) {
4773linkevarray[
i] = 0;
4775 boolis_unspec =
false;
4780 intlinktype = evidence.
GetType();
4781 if(linktype == 8) {
4785 if(linktype == 255) {
4786(linkevarray[11])++;
4787}
else if(linktype < 0 || linktype > 10) {
4788(linkevarray[12])++;
4790(linkevarray[linktype])++;
4794 "Seq-gap type should not be within-clone for genome submission", seq);
4797 if(linkevarray[8] > 0 && linkcount > linkevarray[8]) {
4799 "Seq-gap type has unspecified and additional linkage evidence", seq);
4801 for(
int i= 0;
i< 13;
i++) {
4802 if(linkevarray[
i] > 1) {
4804 string(
"Linkage evidence '") +
linkEvStrings[
i] +
"' appears "+
4810 "Seq-gap with linkage evidence must have linkage field set to linked", seq);
4813 intgaptype = gap.
GetType();
4821 if(linkevarray[8] > 0 && linkcount == linkevarray[8]) {
4825 "Contamination gaps must have linkage evidence 'unspecified'", seq);
4830 " should not have linkage evidence", seq);
4836 intgaptype = gap.
GetType();
4839 "Seq-gap type == scaffold is missing required linkage evidence", seq);
4842 boolsuppress_SEQ_INST_SeqGapProblem =
false;
4846 if((**it).IsCreate_date())
4850suppress_SEQ_INST_SeqGapProblem =
true;
4855 if(! suppress_SEQ_INST_SeqGapProblem)
4857 "Seq-gap type == repeat and linkage == linked is missing required linkage evidence", seq);
4862 "Contamination gap-types must be linked and have linkage-evidence of type 'unspecified'", seq);
4879rpr =
"constructed";
4881 const stringerr0 =
"Bioseq-ext not allowed on "+ rpr +
" Bioseq";
4882 const stringerr1 =
"Missing or incorrect Bioseq-ext on "+ rpr +
" Bioseq";
4883 const stringerr2 =
"Missing Seq-data on "+ rpr +
" Bioseq";
4884 const stringerr3 =
"Seq-data not allowed on "+ rpr +
" Bioseq";
4953 "Invalid Bioseq->repr = "+
4976 if(! it->IsLoc())
continue;
4978 if(! hdl)
continue;
4980 if(! ci)
continue;
4986 if(parent_location == cgenome)
break;
5036 "Transgenic source descriptor requires presence of source feature",
5043 "Genome difference between parent and component",
5050 "Mitochondrial Metazoan sequences should be less than 65000 bp",
5060 "A genomic sequence should not have uncultured in its organism name",
5074 "No Mol-info applies to this Bioseq",
5092 if(all_feat_it->IsSetCit() || all_feat_it->GetData().IsPub()) {
5105 if(closest_molinfo) {
5108 "Suspicious use of complete",
ctx, *closest_molinfo);
5111 "Suspicious use of complete", seq);
5123 if(! seq.
IsNa()) {
5138sequence::CDeflineGenerator defline_generator;
5139title = defline_generator.GenerateDefline(seq, *
m_Scope, sequence::CDeflineGenerator::fIgnoreExisting);
5148 boolreported =
false;
5152 boolis_gb =
false;
5154 if((*it)->IsGenbank()) {
5165 "Circular topology has complete flag set, but title should say complete sequence or complete genome",
5240 if(it->IsSeq() && it->GetSeq().IsSetInst_Repr() &&
5242parent = it->GetSeq();
5275vector<CConstRef<CSeq_feat>> containing_genes;
5276vector<int> num_contained;
5279vector<CConstRef<CSeq_feat>>::iterator cit = containing_genes.begin();
5280vector<int>::iterator nit = num_contained.begin();
5281 while(cit != containing_genes.end() && nit != num_contained.end()) {
5287 if(n_right < left) {
5295cit = containing_genes.erase(cit);
5296nit = num_contained.erase(nit);
5303 const CSeq_feat& ft = fi->GetOriginalFeature();
5306containing_genes.push_back(ref);
5307num_contained.push_back(0);
5310vector<CConstRef<CSeq_feat>>::iterator cit = containing_genes.begin();
5311vector<int>::iterator nit = num_contained.begin();
5312 while(cit != containing_genes.end() && nit != num_contained.end()) {
5321}
catch(
constexception& e) {
5323 string(
"Exception while validating bioseq MultipleGeneOverlap. EXCEPTION: ") +
5331 string msg(
"gene [");
5336 msg+=
"] overlaps CDS but does not completely contain it";
5345 msg+=
"] overlaps mRNA but does not completely contain it";
5368 if(!
label.empty()) {
5376 if(!
label.empty()) {
5392 if(connected_gene) {
5400overlap_type,
m_Scope) < 0) {
5435 if((*db)->CanGetDb() &&
5447 if(vec.
IsInGap(pos) || vec[pos] ==
'N') {
5467 if(pos <
len- after && vec.
IsInGap(pos + after)) {
5487 if(pos >= before && vec.
IsInGap(pos - before)) {
5510 for(
CSeq_loc_CIsl_iter(loc); sl_iter; ++sl_iter) {
5517 if(
first.GetStrand() !=
last.GetStrand()) {
5540 if(start >=
len|| stop >=
len) {
5545 swap(acceptor, donor);
5546stop =
len- donor - 1;
5547start =
len- acceptor - 1;
5570}
catch(exception&) {
5580 if((res1 ==
'G'&& res2 ==
'T') ||
5581(res1 ==
'G'&& res2 ==
'C')) {
5587}
catch(exception&) {
5596 if((res1 ==
'A') && (res2 ==
'G')) {
5602}
catch(exception&) {
5615 switch(partial_type) {
5679vector<CMappedFeat> children = tr->GetChildren(feat);
5680 ITERATE(vector<CMappedFeat>, it, children) {
5690 boollook_for_gene =
true;
5699vector<CMappedFeat> cds_children = tr->GetChildren(feat);
5700 if(cds_children.size() > 0) {
5701look_for_gene =
false;
5702 for(
autoit = cds_children.begin(); it != cds_children.end(); it++) {
5719 if(! rval && look_for_gene) {
5739 boolbad_seq =
false;
5740 boolis_gap =
false;
5741 boolabuts_n =
false;
5749 #ifdef USE_FEAT_TREE_FOR_EXON 5772 const CSeq_loc& mrna_loc = s->second->GetLocation();
5801 "PartialLocation: Improper use of partial (greater than or less than)", feat);
5820 "PartialLocation: Internal partial intervals do not include first/last residue of sequence", feat);
5847 if(! partial_start && ! partial_stop) {
5869 if(intron_start == stop + 1 && partial_stop) {
5872 if(intron_start > stop + 1) {
5875 if(start > 0 && partial_start) {
5877 if(intron_stop == start - 1) {
5897 string msg= (partial_type == 0 ?
"Start":
"Stop");
5898 msg+=
" does not include first/last residue of ";
5901 boolorganelle =
false;
5902 boolnot_expected =
false;
5903 if(at_splice_or_gap) {
5909 msg+=
"organelle ";
5913not_expected =
true;
5923 msg+=
" (organelle does not use standard splice site convention)";
5926 msg+=
" (but is at consensus splice site)";
5935 if(partial_type == 0) {
5938}
else if(organelle) {
5946}
else if(organelle) {
5968 "Feature products should be entire sequences.", *(feat.
GetSeq_feat()));
5989 boolno_nonconsensus_except =
true;
5993 if(
NStr::Find(except_text,
"nonconsensus splice site") != string::npos ||
5994 NStr::Find(except_text,
"heterogeneous population sequenced") != string::npos ||
5995 NStr::Find(except_text,
"low-quality sequence region") != string::npos ||
5996 NStr::Find(except_text,
"artificial location") != string::npos) {
5997no_nonconsensus_except =
false;
6002 stringcomment_text;
6009 "Partial CDS on complete sequence",
6015 for(
intj = 0; j < 2; ++j) {
6016 if(partial_loc & errtype) {
6017 boolbad_seq =
false;
6018 boolis_gap =
false;
6019 boolabuts_n =
false;
6035}
else if(bad_seq) {
6038 "PartialLocation: Start does not include first/last residue of sequence (and is at bad sequence)":
6039 "PartialLocation: Stop does not include first/last residue of sequence (and is at bad sequence)"),
6043&&
NStr::Find(except_text,
"rearrangement required for product") != string::npos) {
6046 NStr::Find(comment_text,
"coding region disrupted by sequencing gap") != string::npos) {
6050}
else if(! no_nonconsensus_except) {
6061 "5' partial is not at beginning of sequence, gap, or consensus splice site",
6065 "3' partial is not at end of sequence, gap, or consensus splice site",
6074 "Start does not include first/last residue of sequence", *(feat.
GetSeq_feat()));
6075}
else if(j == 1) {
6077 "Stop does not include first/last residue of sequence", *(feat.
GetSeq_feat()));
6112 const CBioseq& seq,
boolis_complete)
6117 if((*it)->IsGenbank()) {
6118 if((*it)->GetGenbank().IsSetAccession()) {
6119accession = (*it)->GetGenbank().GetAccession();
6122}
else if((*it)->IsDdbj()) {
6123 if((*it)->GetDdbj().IsSetAccession()) {
6124accession = (*it)->GetDdbj().GetAccession();
6127}
else if((*it)->IsGi()) {
6133 unsigned intnummrna = 0, numcds = 0, numcrgn = 0, numvseg = 0, numdseg = 0, numjseg = 0;
6134 intnumgene = 0, num_pseudomrna = 0, num_pseudocds = 0, num_rearrangedcds = 0;
6135vector< CConstRef < CSeq_id > > cds_products, mrna_products;
6137 intnum_full_length_prot_ref = 0;
6144 boolis_emb =
false, non_pseudo_16S_rRNA =
false;
6147 if((*seq_it)->IsEmbl()) {
6149}
else if((*seq_it)->IsOther()) {
6154 intfirstcdsgencode = 0;
6155 boolmixedcdsgencodes =
false;
6159 const CSeq_feat& feat = fi->GetOriginalFeature();
6168 stringlocus = gene_ref.
GetLocus();
6171 const CSeq_feat& gene_feat = gene_it->GetOriginalFeature();
6178 "locus collides with locus_tag in another gene", feat);
6188cds_products.push_back(ref);
6197num_rearrangedcds++;
6206 if((*it)->IsId()) {
6207cdsgencode = (*it)->GetId();
6210 if(cdsgencode != 0) {
6211 if(firstcdsgencode == 0) {
6212firstcdsgencode = cdsgencode;
6213}
else if(firstcdsgencode != cdsgencode) {
6214mixedcdsgencodes =
true;
6224mrna_products.push_back(ref);
6240non_pseudo_16S_rRNA =
true;
6270num_full_length_prot_ref++;
6279 "Genes on protein sequences with PGAP annotation should not have locus tags.", feat);
6285 "Invalid feature for a protein Bioseq.", feat);
6300 boolslippage_except =
false;
6301 boolcircular_rna =
false;
6309 if((! excpet || ! slippage_except) && ! circular_rna) {
6312 "Multi-interval CDS feature is invalid on an mRNA " 6322 "mRNA feature is invalid on an mRNA (cDNA) Bioseq.",
6328 if(imp.
GetKey() ==
"intron") {
6330 "Invalid feature for an mRNA Bioseq.", feat);
6341 "Feature has 'far' location - accession not packaged in record",
6353 boolisEukaryote =
false;
6354 boolisMicrosporidia =
false;
6361isEukaryote =
true;
6363isMicrosporidia =
true;
6367 if(isEukaryote && (! isMicrosporidia) &&
6378 "Improper 16S ribosomal RNA",
6385 if(mixedcdsgencodes) {
6391 "Multiple CDS genetic codes on sequence", seq);
6396 if(is_aa && num_full_length_prot_ref == 0) {
6405 const CSeq_feat& prot_feat = it->GetOriginalFeature();
6409(range.
GetFrom() == 0 && range.
GetTo() == parent_len - 1)) &&
6413num_full_length_prot_ref++;
6415}
catch(
constexception&) {
6418(range.
GetFrom() == 0 && range.
GetTo() == parent_len - 1)) &&
6419(! it->GetData().GetProt().IsSetProcessed() ||
6422num_full_length_prot_ref++;
6429 if(is_aa && num_full_length_prot_ref == 0 && ! is_virtual && !
m_Imp.
IsPDB()) {
6433 if(is_aa && num_full_length_prot_ref > 1 && !
SeqIsPatent(seq)) {
6436+
" full-length protein features present on protein", seq);
6451 boolcds_products_unique =
true;
6452 if(cds_products.size() > 1) {
6453stable_sort(cds_products.begin(), cds_products.end(),
s_SeqIdCompare);
6457 boolmrna_products_unique =
true;
6458 if(mrna_products.size() > 1) {
6459stable_sort(mrna_products.begin(), mrna_products.end(),
s_SeqIdCompare);
6463 if(numcds > 0 && nummrna > 1) {
6464 if(cds_products.size() > 0 && cds_products.size() + num_pseudocds + num_rearrangedcds != numcds) {
6470 if(cds_products.size() > 0 && (! cds_products_unique)) {
6472 "CDS products are not unique", seq);
6474 if(mrna_products.size() > 0 && mrna_products.size() + num_pseudomrna != nummrna) {
6480 if(mrna_products.size() > 0 && (! mrna_products_unique)) {
6482 "mRNA products are not unique", seq);
6505}
catch(
constexception& e) {
6506 if(
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
6508 string(
"Exception while validating Seqfeat Context. EXCEPTION: ") +
6526 if((*id_it)->IsGi()) {
6527 return(*id_it)->GetGi();
6581 boolmatch1 =
false, match2 =
false;
6582 boolhas1 =
false, has2 =
false;
6584 if((*itx)->IsSetId()) {
6593 if((*itx)->IsSetId()) {
6601 if((has1 || has2) && (! match1 || ! match2)) {
6620 if(
id.GetGi() == gi) {
6628}
catch(
conststd::exception&) {
6642 bool match=
false;
6674m_HasMatch(
false),
6721 const CSeq_loc& utr5_loc = s->second->GetLocation();
6724 if(utr5_start == mrna_start) {
6725 if(mrna_stop >= utr5_stop && mrna_stop - utr5_stop < 6) {
6727}
else if(utr5_stop >= mrna_stop && utr5_stop - mrna_stop < 6) {
6736 const CSeq_loc& utr3_loc = s->second->GetLocation();
6739 if(utr3_stop == mrna_stop) {
6740 if(mrna_start >= utr3_start && mrna_start - utr3_start < 6) {
6742}
else if(utr3_start >= mrna_start && utr3_start - mrna_start < 6) {
6758m_IsPseudo(
false),
6759m_NeedsMatch(
true),
6760m_ProductsUnique(
true)
6816 if(unmatched_mrnas.
empty()) {
6821 if(! (*xref_it)->IsSetId() ||
6822! (*xref_it)->GetId().IsLocal()) {
6826 ITERATE(vector<CSeq_feat_Handle>, h, handles) {
6831 if(mrna_it != unmatched_mrnas.
end()) {
6834unmatched_mrnas.
erase(mrna_it);
6846 if(unmatched_mrnas.
empty()) {
6854 if(feats.size() == 0) {
6856 while(mrna_it != unmatched_mrnas.
end()) {
6857 if(
Overlaps(mrna_it->second->GetSeqfeat())) {
6860unmatched_mrnas.
erase(mrna_it);
6868 if(mrna_it != unmatched_mrnas.
end()) {
6874unmatched_mrnas.
erase(mrna_it);
6887 stringproduct_string;
6890 returnproduct_string;
6895 returnproduct_string;
6901list<CConstRef<CSeq_feat>>::iterator it =
m_OtherMrnas.begin();
6902list<string> product_strings;
6905 if(mrna_it == unmatched_mrnas.
end()) {
6909product_strings.push_back(product_string);
6919 const autonum_products = product_strings.size();
6920 if(product_strings.size() > 1) {
6922product_strings.sort();
6923product_strings.unique();
6924 const autonum_unique_products = product_strings.size();
6925 if(num_unique_products == num_products) {
6968 if(num_mrnas < 2) {
6974+
" mRNAs, but product locations are unique",
6993 const auto& cds_feat = cds_match.
GetSeqfeat();
6998 if(xrefs_match == 2) {
7000 "MrnaProteinLink inconsistent with feature ID cross-references",
7008 size_tstart_pos =
NStr::Find(protein_id,
"gnl|");
7009 if(start_pos == string::npos) {
7012start_pos =
NStr::Find(protein_id,
"|", start_pos + 5);
7013 if(start_pos == string::npos) {
7016 size_tend_pos =
NStr::Find(protein_id,
"|", start_pos + 1);
7018 if(end_pos == string::npos) {
7019prot_tag = protein_id.substr(start_pos + 1);
7021prot_tag = protein_id.substr(start_pos + 1, end_pos - start_pos - 1);
7044 "CDS-mRNA pair has one missing protein_id ("+ protein_id +
")", cds);
7052 "CDS-mRNA pair has mismatching protein_ids ("+
7053product_id.
AsFastaString() +
", "+ protein_id +
")", cds);
7060 if((*id_it)->IsGeneral()) {
7063 "CDS-mRNA pair has mismatching protein_ids ("+
7064(*id_it)->AsFastaString() +
", "+ protein_id +
")", cds);
7078 if(
NStr::Equal(protein_id, (*id_it)->AsFastaString())) {
7087 "CDS-mRNA pair has one missing protein_id ("+ protein_id +
")", cds);
7097 const auto& cds_feat = cds_match.
GetSeqfeat();
7098 stringcds_transcript_id;
7099 stringmrna_transcript_id;
7100 stringmrna_protein_id;
7101 boolmust_reconcile =
false;
7102 if(mrna_feat.IsSetQual()) {
7104 if((*q)->IsSetQual() && (*q)->IsSetVal()) {
7106mrna_transcript_id = (*q)->GetVal();
7107must_reconcile =
true;
7109mrna_protein_id = (*q)->GetVal();
7110must_reconcile =
true;
7115 if(cds_feat.IsSetQual()) {
7117 if((*q)->IsSetQual() && (*q)->IsSetVal()) {
7119cds_transcript_id = (*q)->GetVal();
7120must_reconcile =
true;
7126 if(must_reconcile) {
7127 if(!
NStr::Equal(mrna_transcript_id, cds_transcript_id)) {
7129 "CDS-mRNA pair has mismatching transcript_ids (" 7130+ cds_transcript_id +
","+ mrna_transcript_id +
")",
7190 for(
autoit : feat.
GetQual()) {
7218 if(
data.IsGene()) {
7276 if(strand1 == strand2) {
7283 "No parent for (pseudo) CdRegion", ft1);
7286 "No parent for CdRegion", ft1);
7294 "No parent for (pseudo) CdRegion", ft2);
7297 "No parent for CdRegion", ft2);
7311 unsigned intlclcds = 0, lclcrgn = 0, lclvseg = 0, lcldseg = 0, lcljseg = 0, lclnone = 0, lclothr = 0;
7318 if(sbt == CSeqFeatData::ESubtype::eSubtype_cdregion) {
7325 if(ptyp == CSeqFeatData::ESubtype::eSubtype_C_region) {
7327}
else if(ptyp == CSeqFeatData::ESubtype::eSubtype_V_segment) {
7329}
else if(ptyp == CSeqFeatData::ESubtype::eSubtype_D_segment) {
7331}
else if(ptyp == CSeqFeatData::ESubtype::eSubtype_J_segment) {
7349locus = gene.GetLocus();
7351locus = gene.GetLocus_tag();
7353CConstRef<CSeq_loc> gloc = gne->GetMappedLocation();
7356gloc->GetLabel(&locus);
7361 if(locus.length() > 0) {
7362PostErr(eDiag_Warning, eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,
7363 "No parent for CdRegion (gene is "+ locus +
")", *sf);
7365PostErr(eDiag_Warning, eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,
7366 "No parent for CdRegion", *sf);
7389list<CRef<CCdsMatchInfo>> cds_list;
7394 for(
const auto& mapped_feat : *
m_AllFeatIt) {
7395 if(! mapped_feat.IsSetData()) {
7399 if(mapped_feat.GetData().IsCdregion()) {
7400 const auto& cds_feat = *mapped_feat.GetSeq_feat();
7405 if(cds_feat.IsSetPseudo() && cds_feat.GetPseudo()) {
7406cds_match->SetPseudo();
7414cds_list.push_back(cds_match);
7416 const auto& feat = *mapped_feat.GetSeq_feat();
7421 if(! mrna_map.
empty()) {
7425 const size_tnum_mrna = mrna_map.
size();
7428 for(
auto&& cds : cds_list) {
7429cds->AssignXrefMatch(mrna_map, tse);
7432 if(! mrna_map.
empty()) {
7433 for(
auto&& cds : cds_list) {
7434 if(! cds->HasMatch()) {
7435cds->AssignOverlapMatch(mrna_map, *
m_Scope);
7440 intnum_matched_cds = 0;
7441 intnum_unmatched_cds = 0;
7442 for(
auto&& cds : cds_list) {
7450 if(cds->IsPseudo() ||
7451(cds->GetSeqfeat().IsSetExcept() &&
7452cds->GetSeqfeat().IsSetExcept_text() &&
7453 NStr::Find(cds->GetSeqfeat().GetExcept_text(),
"rearrangement required for product") != string::npos)) {
7454cds->NeedsMatch(
false);
7458 if(cds->HasMatch()) {
7461++num_unmatched_cds;
7467 if(num_unmatched_cds > 0 &&
7469 if(num_unmatched_cds >= 10) {
7470 const autonmcds = num_matched_cds + num_unmatched_cds;
7474+
" CDSs unmatched",
7477 for(
const auto& cds : cds_list) {
7478 if(! cds->HasMatch() && cds->NeedsMatch()) {
7480 "Unmatched CDS", cds->GetSeqfeat());
7487 size_tnum_unmatched_mrna = 0;
7496num_unmatched_mrna++;
7501 if(num_unmatched_mrna > 10) {
7509 "No CDS location match for 1 mRNA", it->second->GetSeqfeat());
7523TFeatCount cds_count, mrna_count;
7536 const CSeq_feat& feat = it->GetOriginalFeature();
7541 if(cds_count.find(gene) == cds_count.end()) {
7542cds_count[gene] = mrna_count[gene] = 0;
7558 ITERATE(TFeatCount, it, cds_count) {
7559 size_tcds_num = it->second,
7560mrna_num = mrna_count[it->first];
7561 if(cds_num > 0 && mrna_num > 1 && cds_num != mrna_num) {
7565 ") count for gene", *it->first);
7592 const size_tnum_cds = cd_region_feats.size();
7602strand = cd_region_feats.back().GetLocation().GetStrand();
7605 boolis_mrna =
false;
7621 "CDS should not be on minus strand of mRNA molecule", cdregion_it->GetOriginalFeature());
7626 if(is_mrna || (num_cds == 1 && num_gene < 2)) {
7636 boolfirst_cds =
true;
7639vector<CCacheImpl::SFeatKey> featKeys;
7646featKeys.push_back(multi_feat_key_template);
7648featKeys.push_back(multi_feat_key_template);
7650featKeys.push_back(multi_feat_key_template);
7652featKeys.push_back(multi_feat_key_template);
7666 "3'UTR is not on minus strand", cug_it->GetOriginalFeature());
7667}
else if(utr5_right > 0 && utr5_right + 1 != this_left) {
7669 "Previous 5'UTR does not abut next 3'UTR", cug_it->GetOriginalFeature());
7671utr3_right = this_right;
7673 if(utr3_right > 0 && utr3_right + 1 != this_left) {
7675 "CDS does not abut 3'UTR", cug_it->GetOriginalFeature());
7678cds_right = this_right;
7682 "5'UTR is not on minus strand", cug_it->GetOriginalFeature());
7683}
else if(cds_right > 0 && cds_right + 1 != this_left) {
7685 "5'UTR does not abut CDS", cug_it->GetOriginalFeature());
7687utr5_right = this_right;
7699 "5'UTR is not on plus strand", cug_it->GetOriginalFeature());
7700}
else if(utr3_right > 0 && utr3_right + 1 != this_left) {
7702 "Previous 3'UTR does not abut next 5'UTR", cug_it->GetOriginalFeature());
7704utr5_right = this_right;
7706 if(utr5_right > 0 && utr5_right + 1 != this_left && first_cds ) {
7709 "5'UTR does not abut CDS", cug_it->GetOriginalFeature());
7712cds_right = this_right;
7716 "3'UTR is not on plus strand", cug_it->GetOriginalFeature());
7717}
else if(cds_right > 0 && cds_right + 1 != this_left && num_3utr == 1) {
7719 "CDS does not abut 3'UTR", cug_it->GetOriginalFeature());
7721 if(is_mrna && num_cds == 1 && num_3utr == 1 && this_right != (
int) seq.
GetBioseqLength() - 1) {
7723 "3'UTR does not extend to end of mRNA", cug_it->GetOriginalFeature());
7749 if(!
rna.IsSetType()) {
7751}
else if(!
rna.IsSetExt()) {
7754 const string& product =
rna.GetExt().GetName();
7778 if(
rna.GetExt().IsName()) {
7779product =
rna.GetExt().GetName();
7783&& (*it)->IsSetVal() && !
NStr::IsBlank((*it)->GetVal())) {
7784product = (*it)->GetVal();
7789}
else if(
rna.GetExt().IsGen()) {
7790 if(
rna.GetExt().GetGen().IsSetProduct()) {
7791product =
rna.GetExt().GetGen().GetProduct();
7821 if(start < 0 || (
unsigned int) stop >= seq.
GetInst_Length() || start > stop) {
7828 if((*it)->IsLiteral()) {
7829this_len = (*it)->GetLiteral().GetLength();
7830}
else if((*it)->IsLoc()) {
7833 if((*it)->IsLiteral() &&
7834(! (*it)->GetLiteral().IsSetSeq_data() || (*it)->GetLiteral().GetSeq_data().IsGap())) {
7915 "Inconsistent strands for rRNA components",
7921}
else if(right1 + 1 < left2) {
7928 "ITS does not abut adjacent rRNA component",
7934 "ITS does not abut adjacent rRNA component",
7938}
else if(right1 + 1 > left2) {
7944 "ITS overlaps adjacent rRNA component",
7954 "ITS overlaps adjacent rRNA component",
7962}
else if(! is_organelle) {
7973 "Problem with order of abutting rRNA components",
7985 "Problem with order of abutting rRNA components",
8010 if(! same_annot && ! same_label) {
8031 if(g1 && g2 && g1 != g2) {
8039&&
prev.IsSetPartial() &&
prev.GetPartial()) {
8048&&
prev.IsSetPseudo() &&
prev.GetPseudo()) {
8099 "Duplicate feature", feat2);
8109 "Features have identical intervals, but labels differ",
8117 "Duplicate feature (packaged in different feature table)",
8124 "Features have identical intervals, but labels " 8125 "differ (packaged in different feature table)",
8160 string msg=
"Signal, Transit, or Mature peptide features overlap";
8172cds_loc = cds_loc.substr(8);
8179cds_loc =
" (parent CDS is on "+ cds_loc +
")";
8183}
catch(
constexception&) {
8186 if(! reported_last_peptide) {
8196reported_last_peptide =
true;
8198reported_last_peptide =
false;
8212 boolfruit_fly =
false;
8213 boolviral =
false;
8239CCacheImpl::TFeatValue::const_iterator curr_it = prev_it;
8244 for(; curr_it !=
m_AllFeatIt->end(); ++curr_it) {
8247 if(curr_start > prev_end) {
8256CCacheImpl::TFeatValue::const_iterator prev_prot =
m_AllFeatIt->begin();
8258CCacheImpl::TFeatValue::const_iterator curr_prot = prev_prot;
8260 boolreported_last_peptide =
false;
8261 for(; curr_prot !=
m_AllFeatIt->end(); ++prev_prot, ++curr_prot) {
8265}
catch(
constexception& e) {
8266 if(
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
8268 string(
"Exception while validating duplicate/overlapping features. EXCEPTION: ") +
8276vector<int> intervalpoints;
8281 if(part.
IsInt()) {
8283intervalpoints.push_back(ivl.
GetFrom());
8284intervalpoints.push_back(ivl.
GetTo());
8288intervalpoints.push_back(ivl.
GetFrom());
8289intervalpoints.push_back(ivl.
GetTo());
8293}
else if(part.
IsPnt()) {
8295intervalpoints.push_back(pnt.
GetPoint());
8296intervalpoints.push_back(pnt.
GetPoint());
8301 returnintervalpoints;
8309 while(feat_ci_dup) {
8316 const CSeq_loc& part = curr.GetEmbeddingSeq_loc();
8317 if(part.
IsInt()) {
8321}
else if(part.
IsPnt()) {
8328 if(start + 1 == fr && stop - 1 == to) {
8331 if(start + 1 == fr && to ==
max) {
8352 for(
CFeat_CIfeat_ci(bsh, sel); feat_ci; ++feat_ci) {
8354 const CSeq_feat& const_feat = feat_ci->GetOriginalFeature();
8359 unsigned len= (unsigned)intervalpoints.size();
8365 booltwintron =
true;
8367 for(
unsignedpos = 1; pos <
max; pos += 2) {
8368 Int4intL = intervalpoints[pos];
8369 Int4intR = intervalpoints[pos + 1];
8384 "Multi-interval intron contains possible twintron",
8388 "An intron should not have multiple intervals",
8393 if(
NStr::Find(e.
what(),
"Error: Cannot resolve") == string::npos) {
8405 if((*db)->CanGetDb()) {
8418 boolhas_local =
false, has_genbank =
false;
8419 boolhas_gi =
false, has_tpa =
false, has_bankit =
false, has_smart =
false;
8422 switch((*it)->Which()) {
8429has_genbank =
true;
8443 if((*it)->GetGeneral().IsSetDb()) {
8456 if(has_genbank)
return false;
8457 if(has_tpa)
return true;
8458 if(has_refseq)
return false;
8459 if(has_bankit)
return true;
8460 if(has_smart)
return true;
8461 if(has_gi)
return false;
8462 if(has_local)
return true;
8470 if(date.
IsStr()) {
8471out_date_str = date.
GetStr();
8472}
else if(date.
IsStd()) {
8474date.
GetDate(&out_date_str,
"%{%3N %{%D, %}%}%Y");
8494vector<string> sc_prefixes;
8502sc_prefixes.push_back(prefix);
8508 sort(sc_prefixes.begin(), sc_prefixes.end());
8511 ITERATE(vector<string>, it, sc_prefixes) {
8517 "Multiple structured comments with prefix "+ previous,
8526 "Multiple structured comments with prefix "+ previous,
8617 boolis_master =
false;
8628 boolis_WP =
false;
8632 switch(sid.
Which()) {
8638 const CTextseq_id& tsid = *
id->GetTextseq_Id();
8659 boolembl_or_ddbj =
false;
8661 if((*id)->IsEmbl() || (*id)->IsDdbj()) {
8662embl_or_ddbj =
true;
8667 returnembl_or_ddbj;
8674 if((*id)->IsGenbank()) {
8685 if((*id)->IsOther()) {
8702 "Undesired multiple comment descriptors, identical text",
8724 boolis_nc =
false;
8725 boolis_ac =
false;
8727 if((*id_it)->IsOther() && (*id_it)->GetOther().IsSetAccession()) {
8728 stringaccession = (*id_it)->GetOther().GetAccession();
8738 if(! is_nc && ! is_ac) {
8773 inttech = -1, completeness = -1;
8779 boolis_genome_assembly =
false;
8780 boolis_assembly =
false;
8781 boolis_finished_status =
false;
8790 switch(desc.
Which()) {
8794 if(pos != string::npos) {
8797 if(pos != string::npos) {
8800 if(pos != string::npos) {
8801 boolreport_fasta_brackets =
true;
8803 if((*id_it)->IsGeneral()) {
8804 const CDbtag& dbtag = (*id_it)->GetGeneral();
8808report_fasta_brackets =
false;
8814 if(report_fasta_brackets) {
8816 const CBioSource& bsrc = (*bs_ref).GetSource();
8822 if(pos2 != string::npos) {
8824 if(pos2 != string::npos) {
8825report_fasta_brackets =
false;
8832 if(report_fasta_brackets) {
8834 "Title may have unparsed [...=...] construct",
8845vector<string> keywords;
8848keywords.push_back(*
key);
8855 switch(desc.
Which()) {
8859org = &(desc.
GetOrg());
8892 if(! use_ctx || ! use_ctx->
IsSet()
8898 "Inconsistent create_dates ["+ current_str +
8899 "] and ["+ create_str +
"]", *use_ctx, desc);
8902create_desc = &desc;
8916update_desc = &desc;
8936 if(
source.IsSetOrg()) {
8958 "Non-TPA record "+ id_str +
" should not have TpaAssembly object", seq);
8964 "RefGeneTracking object should only be in RefSeq record",
8971 boolfound =
false;
8983 "Structured Comment is non-compliant, keyword should be removed",
ctx, desc);
8988 if((*field)->IsSetLabel() && (*field)->GetLabel().IsStr()) {
8989 if(
NStr::EqualNocase((*field)->GetLabel().GetStr(),
"StructuredCommentPrefix")) {
8990 const string& prefix = (*field)->GetData().GetStr();
8992is_genome_assembly =
true;
8994is_assembly =
true;
8996}
else if(
NStr::EqualNocase((*field)->GetLabel().GetStr(),
"Current Finishing Status")) {
8997 const string& prefix = (*field)->GetData().GetStr();
8999is_finished_status =
true;
9038 if((*id_it)->IsOther()) {
9065title = title.substr (11);
9070 "RefSeq nucleotide title does not start with organism name",
9073}
else if(seq.
IsAa()) {
9074taxname =
"["+ taxname +
"]";
9078 "RefSeq protein title does not end with organism name",
9096 "Undesired multiple name descriptors, identical text",
9100 "Undesired multiple name descriptors, different text",
9110 if(! seq.
IsAa()) {
9112 "Nucleic acid with protein sequence method",
9125 const string&
buf= seq.
GetId().front()->AsFastaString();
9140 "Multiple GenBank blocks",
ctx, *last_gb);
9145 "Multiple EMBL blocks",
ctx, *last_embl);
9150 "Multiple PIR blocks",
ctx, *last_pir);
9155 "Multiple PDB blocks",
ctx, *last_pdb);
9160 "Multiple PRF blocks",
ctx, *last_prf);
9165 "Multiple SWISS-PROT blocks",
ctx, *last_sp);
9183 boolhas_tpa_inf =
false, has_tpa_exp =
false;
9186has_tpa_exp =
true;
9188has_tpa_inf =
true;
9191 if(has_tpa_inf && has_tpa_exp) {
9193 "TPA:experimental and TPA:inferential should not both be in the same set of keywords",
9204 "TSA sequence should not be DNA", seq);
9215 int& last_completeness,
9221 boolis_synthetic_construct =
false;
9222 boolis_artificial =
false;
9227 if(! is_synthetic_construct) {
9230 if(! is_artificial) {
9240 if(seq_biomol < 0) {
9241seq_biomol = biomol;
9248 "Nucleic acid with Molinfo = peptide",
ctx, desc);
9253 if(! is_artificial) {
9255 "Molinfo-biomol = other genetic",
ctx, desc);
9264 "Molinfo-biomol other used",
ctx, desc);
9274 "] used on protein",
ctx, desc);
9276 if(biomol != seq_biomol) {
9278 "Inconsistent Molinfo-biomol ["+
9291 "mRNA should be single stranded not double stranded",
ctx, desc);
9294 if(is_synthetic_construct && ! seq.
IsAa()) {
9312 "Nucleic acid with protein sequence method",
ctx, desc);
9333 "Protein with nucleic acid sequence method",
ctx, desc);
9359 "HTGS/STS/GSS/WGS sequence should be genomic", seq);
9364 "HTGS/STS/GSS/WGS sequence should not be RNA", seq);
9372 "EST sequence should be mRNA", seq);
9384 boolhas_draft =
false;
9385 boolhas_prefin =
false;
9386 boolhas_activefin =
false;
9387 boolhas_fulltop =
false;
9397has_activefin =
true;
9399has_fulltop =
true;
9408 "HTGS 3 sequence should not have HTGS_DRAFT keyword", seq);
9412 "HTGS 3 sequence should not have HTGS_PREFIN keyword", seq);
9414 if(has_activefin) {
9416 "HTGS 3 sequence should not have HTGS_ACTIVEFIN keyword", seq);
9420 "HTGS 3 sequence should not have HTGS_FULLTOP keyword", seq);
9424 if(last_tech > 0) {
9425 if(last_tech != tech) {
9434 if(last_tech > -1) {
9435 if(last_tech != 0) {
9438+
"] and [0]",
ctx, desc);
9446 if(last_completeness > 0) {
9456 if(last_completeness > -1) {
9457 if(last_completeness != 0) {
9460+
"] and [0]",
ctx, desc);
9463last_completeness = 0;
9468 if(closest_molinfo) {
9522 const string& lineage,
9523 const string& stranded_mol,
9543 "dsRNA virus should be genomic RNA",
9562mssg =
"single-stranded RNA";
9564mssg =
"double-stranded RNA";
9566mssg =
"single-stranded DNA";
9568mssg =
"double-stranded DNA";
9570mssg =
"unknown-stranded RNA";
9572mssg =
"unknown-stranded DNA";
9576 "Taxonomy indicates "+ mssg +
9578 ") is conflicting.",
9585 const string& lineage,
9586 const string& stranded_mol,
9606 "Ambisense virus should be genomic RNA or cRNA",
9615 "Retrovirus should be genomic RNA or genomic DNA",
9621 boolnegative_strand_virus =
false;
9622 boolplus_strand_virus =
false;
9624negative_strand_virus =
true;
9627plus_strand_virus =
true;
9629 if(! negative_strand_virus && ! plus_strand_virus) {
9633 boolis_synthetic =
false;
9635is_synthetic =
true;
9636}
else if(
source.IsSetOrigin()) {
9641is_synthetic =
true;
9645 boolhas_cds =
false;
9646 boolhas_plus_cds =
false;
9647 boolhas_minus_cds =
false;
9653has_minus_cds =
true;
9655has_plus_cds =
true;
9657 if(has_minus_cds && has_plus_cds) {
9664 boolhas_minus_misc_feat =
false;
9665 boolhas_plus_misc_feat =
false;
9673has_minus_misc_feat =
true;
9675has_plus_misc_feat =
true;
9678 if(has_minus_misc_feat && has_plus_misc_feat) {
9685 if(negative_strand_virus) {
9687 if(has_minus_cds) {
9690 "Negative-sense single-stranded RNA virus with minus strand CDS should be genomic RNA",
9695 if(has_plus_cds && ! is_synthetic && ! is_ambisense) {
9698 "Negative-sense single-stranded RNA virus with plus strand CDS should be cRNA",
9703 if(has_minus_misc_feat) {
9706 "Negative-sense single-stranded RNA virus with nonfunctional minus strand misc_feature should be genomic RNA",
9711 if(has_plus_misc_feat && ! is_synthetic && ! is_ambisense) {
9714 "Negative-sense single-stranded RNA virus with nonfunctional plus strand misc_feature should be cRNA",
9720 if(plus_strand_virus) {
9722 if(has_minus_cds) {
9724 "CDS should not be on minus strand of a positive-sense single-stranded RNA virus",
9728 if(! is_synthetic && ! is_ambisense) {
9731 "Positive-sense single-stranded RNA virus should be genomic RNA",
9741{
"root",
"dsDNA"},
9742{
"Alphasatellitidae",
"ssDNA"},
9743{
"Anelloviridae",
"ssDNA(-)"},
9744{
"Bacilladnaviridae",
"ssDNA"},
9745{
"Bidnaviridae",
"ssDNA"},
9746{
"Circoviridae",
"ssDNA(+/-)"},
9747{
"Geminiviridae",
"ssDNA(+/-)"},
9748{
"Genomoviridae",
"ssDNA"},
9749{
"Hepadnaviridae",
"dsDNA-RT"},
9750{
"Inoviridae",
"ssDNA(+)"},
9751{
"Microviridae",
"ssDNA(+)"},
9752{
"Nanoviridae",
"ssDNA(+)"},
9753{
"Ortervirales",
"ssRNA-RT"},
9754{
"Caulimoviridae",
"dsDNA-RT"},
9755{
"Parvoviridae",
"ssDNA(+/-)"},
9756{
"Alphapleolipovirus",
"dsDNA; ssDNA"},
9757{
"Riboviria",
"RNA"},
9758{
"Albetovirus",
"ssRNA(+)"},
9759{
"Alphatetraviridae",
"ssRNA(+)"},
9760{
"Alvernaviridae",
"ssRNA(+)"},
9761{
"Amalgaviridae",
"dsRNA"},
9762{
"Astroviridae",
"ssRNA(+)"},
9763{
"Aumaivirus",
"ssRNA(+)"},
9764{
"Avsunviroidae",
"ssRNA"},
9765{
"Barnaviridae",
"ssRNA(+)"},
9766{
"Benyviridae",
"ssRNA(+)"},
9767{
"Birnaviridae",
"dsRNA"},
9768{
"Botourmiaviridae",
"ssRNA(+)"},
9769{
"Botybirnavirus",
"dsRNA"},
9770{
"Bromoviridae",
"ssRNA(+)"},
9771{
"Caliciviridae",
"ssRNA(+)"},
9772{
"Carmotetraviridae",
"ssRNA(+)"},
9773{
"Chrysoviridae",
"dsRNA"},
9774{
"Closteroviridae",
"ssRNA(+)"},
9775{
"Cystoviridae",
"dsRNA"},
9776{
"Deltavirus",
"ssRNA(-)"},
9777{
"dsRNA viruses",
"dsRNA"},
9778{
"Endornaviridae",
"dsRNA"},
9779{
"Flaviviridae",
"ssRNA(+)"},
9780{
"Hepeviridae",
"ssRNA(+)"},
9781{
"Hypoviridae",
"ssRNA(+)"},
9782{
"Idaeovirus",
"ssRNA(+)"},
9783{
"Kitaviridae",
"ssRNA(+)"},
9784{
"Leviviridae",
"ssRNA(+)"},
9785{
"Luteoviridae",
"ssRNA(+)"},
9786{
"Matonaviridae",
"ssRNA(+)"},
9787{
"Megabirnaviridae",
"dsRNA"},
9788{
"Narnaviridae",
"ssRNA(+)"},
9789{
"Haploviricotina",
"ssRNA(-)"},
9790{
"Arenaviridae",
"ssRNA(+/-)"},
9791{
"Coguvirus",
"ssRNA(-)"},
9792{
"Cruliviridae",
"ssRNA(-)"},
9793{
"Fimoviridae",
"ssRNA(-)"},
9794{
"Hantaviridae",
"ssRNA(-)"},
9795{
"Leishbuviridae",
"ssRNA(-)"},
9796{
"Mypoviridae",
"ssRNA(-)"},
9797{
"Nairoviridae",
"ssRNA(-)"},
9798{
"Peribunyaviridae",
"ssRNA(-)"},
9799{
"Phasmaviridae",
"ssRNA(-)"},
9800{
"Banyangvirus",
"ssRNA(+/-)"},
9801{
"Beidivirus",
"ssRNA(-)"},
9802{
"Goukovirus",
"ssRNA(-)"},
9803{
"Horwuvirus",
"ssRNA(-)"},
9804{
"Hudivirus",
"ssRNA(-)"},
9805{
"Hudovirus",
"ssRNA(-)"},
9806{
"Kabutovirus",
"ssRNA(-)"},
9807{
"Laulavirus",
"ssRNA(-)"},
9808{
"Mobuvirus",
"ssRNA(-)"},
9809{
"Phasivirus",
"ssRNA(-)"},
9810{
"Phlebovirus",
"ssRNA(+/-)"},
9811{
"Pidchovirus",
"ssRNA(-)"},
9812{
"Tenuivirus",
"ssRNA(-)"},
9813{
"Wenrivirus",
"ssRNA(-)"},
9814{
"Wubeivirus",
"ssRNA(-)"},
9815{
"Tospoviridae",
"ssRNA(+/-)"},
9816{
"Wupedeviridae",
"ssRNA(-)"},
9817{
"Insthoviricetes",
"ssRNA(-)"},
9818{
"Nidovirales",
"ssRNA(+)"},
9819{
"Nodaviridae",
"ssRNA(+)"},
9820{
"Papanivirus",
"ssRNA(+)"},
9821{
"Partitiviridae",
"dsRNA"},
9822{
"Permutotetraviridae",
"ssRNA(+)"},
9823{
"Picobirnaviridae",
"dsRNA"},
9824{
"Picornavirales",
"ssRNA(+)"},
9825{
"Pospiviroidae",
"ssRNA"},
9826{
"Potyviridae",
"ssRNA(+)"},
9827{
"Quadriviridae",
"dsRNA"},
9828{
"Reoviridae",
"dsRNA"},
9829{
"Sarthroviridae",
"ssRNA(+)"},
9830{
"Sinaivirus",
"ssRNA(+)"},
9831{
"Solemoviridae",
"ssRNA(+)"},
9832{
"Solinviviridae",
"ssRNA(+)"},
9833{
"Togaviridae",
"ssRNA(+)"},
9834{
"Tombusviridae",
"ssRNA(+)"},
9835{
"Totiviridae",
"dsRNA"},
9836{
"Tymovirales",
"ssRNA(+)"},
9837{
"Virgaviridae",
"ssRNA(+)"},
9838{
"Virtovirus",
"ssRNA(+)"},
9839{
"ssRNA viruses",
"ssRNA"},
9840{
"unclassified ssRNA viruses",
"ssRNA"},
9841{
"unclassified ssRNA negative-strand viruses",
"ssRNA(-)"},
9842{
"unclassified ssRNA positive-strand viruses",
"ssRNA(+)"},
9843{
"unclassified viroids",
"ssRNA"},
9844{
"DNA satellites",
"DNA"},
9845{
"RNA satellites",
"RNA"},
9846{
"Smacoviridae",
"ssDNA"},
9847{
"Spiraviridae",
"ssDNA(+)"},
9848{
"Tolecusatellitidae",
"ssDNA"},
9849{
"unclassified viruses",
"unknown"},
9850{
"unclassified DNA viruses",
"DNA"},
9851{
"unclassified archaeal dsDNA viruses",
"dsDNA"},
9852{
"unclassified dsDNA phages",
"dsDNA"},
9853{
"unclassified dsDNA viruses",
"dsDNA"},
9854{
"unclassified ssDNA bacterial viruses",
"ssDNA"},
9855{
"unclassified ssDNA viruses",
"ssDNA"},
9856{
"environmental samples",
"unknown"},
9872 for(
autoit : moltypes) {
9874 if(it->GetIval2() == 1) {
9875(*viral_map)[sName] = it->GetSval();
9897 return "ssRNA(+/-)";
9902 return "ssRNA(+/-)";
9907 return "ssRNA(+/-)";
9912 return "ssRNA(+/-)";
9925 if(s_ViralMap->empty()) {
9926 for(
const auto& x : kViralStrandMap) {
9932 for(
const auto& x : s_ViralMap.
Get()) {
9947 if(new_mod != old_mod) {
9962 intlast_na_mod = -1;
9963 intlast_organelle = -1;
9964 intlast_partialness = -1;
9965 intlast_left_right = -1;
9970CSeqdesc::TModif::const_iterator it = modif.begin();
9971 while(it != modif.end()) {
10009last_left_right = modval;
10025 intlast_na_mol = 0;
10031 if(! seq.
IsAa()) {
10033 "Nucleic acid with GIBB-mol = peptide",
10040 "GIBB-mol unknown or other used",
10044 if(seq.
IsAa()) {
10050 if(last_na_mol != modval) {
10057last_na_mol = modval;
10073 if(
source.CanGetOrigin() &&
10077 if(
source.CanGetOrg() &&
source.GetOrg().CanGetOrgname()) {
10110 const CDate& update,
10111 const CDate& create,
10122 stringerr_msg =
"Inconsistent create_date [";
10123err_msg += create_str;
10124err_msg +=
"] and update_date [";
10125err_msg += update_str;
10130err_msg, *
ctx, desc);
10143 boolis_wp =
false;
10145 const CSeq_id& sid = **sid_itr;
10150 if(acc ==
"WP_") {
10158 "Inconsistent organism names ["+ this_org.
GetTaxname() +
10189 const string&
type)
10194 bool first=
true;
10195 boolreported_first =
false;
10196 boollastIsSplit =
false;
10197 const string* strp =
nullptr;
10202strp = &(it->first);
10212message =
"Colliding "+
type+
" in gene features";
10214message =
"Colliding "+
type+
" (with different capitalization) in gene features";
10220 boolsuppress_message =
false;
10224it->second->IsSetExcept() && it->second->IsSetExcept_text()
10225&&
NStr::FindNoCase(it->second->GetExcept_text(),
"trans-splicing") != string::npos) {
10227suppress_message =
true;
10231 if(suppress_message) {
10234(*it->second).GetLocation(),
10238message +
", but feature locations are identical", *it->second);
10239}
else if(! is_gene_locus) {
10245 if(! suppress_message && ((! isSplit) || (! lastIsSplit))) {
10246 if(! reported_first) {
10249reported_first =
true;
10257strp = &(it->first);
10278 const CSeq_feat& feat = fi->GetOriginalFeature();
10304 if(gene_it != locus_map.
end()) {
10305 boolfound =
false;
10314 "gene synonym has same value ("+ syngene_it->first +
") as locus of another gene feature",
10315*syngene_it->second);
10322}
catch(
constexception& e) {
10323 if(
NStr::Find(e.what(),
"Error: Cannot resolve") == string::npos) {
10325 string(
"Exception while validating colliding genes. EXCEPTION: ") +
10334 if(! seq.
IsNa()) {
10339 boolembl_ddbj =
false;
10341 if((*id)->IsDdbj() || (*id)->IsEmbl()) {
10352 boolcomplete_genome =
false;
10358sequence::CDeflineGenerator defline_generator;
10359title = defline_generator.GenerateDefline(seq, *
m_Scope, sequence::CDeflineGenerator::fIgnoreExisting);
10363 if(! complete_genome) {
10370complete_genome =
true;
10377 if(! complete_genome) {
10383 if(!
si|| !
si->GetSource().IsSetDivision() ||
si->GetSource().GetDivision() !=
"BCT") {
10388 boolbioproject_accession_set =
false;
10391 if(ui->GetUser().IsSetData() && ui->GetUser().IsSetType() && ui->GetUser().GetType().IsStr() &&
NStr::EqualCase(ui->GetUser().GetType().GetStr(),
"DBLink")) {
10392bioproject_accession_set = ! ui->GetUser().GetData().empty();
10397 if(bioproject_accession_set)
10401 boolno_gaps =
true;
10405 if(
delta.IsSet()) {
10409 if((*part)->IsLiteral()) {
10416 if(
literal.IsSetSeq_data() &&
literal.GetSeq_data().IsGap()) {
10436 "No BioProject Accession exists for what appears to be a complete genome",
10444 const CSeq_id* gb_id =
nullptr;
10449 const CDbtag* general_id =
nullptr;
10452 switch((*id)->Which()) {
10454gb_id =
id->GetPointer();
10458gi = (*id)->GetGi();
10462general_id = &((*id)->GetGeneral());
10470 if(gi ==
ZERO_GI&& gb_id) {
10479 if(! id_set.empty()) {
10481 switch((*id).Which()) {
10484db_gb_id->
Assign(*(id->GetSeqId()));
10487db_gi = (*id).GetGi();
10491db_general_id->
Assign(*((*id).GetSeqId()));
10502 "New gi number ("+ gi_str +
")"+
10506 if(gb_id && db_gb_id) {
10507 if(! gb_id->
Match(*db_gb_id)) {
10510 ") does not match one in NCBI sequence repository ("+ db_gb_id->
AsFastaString() +
10511 ") on gi ("+ gi_str +
")", seq);
10513}
else if(gb_id) {
10515 "Gain of accession ("+ gb_id->
AsFastaString() +
") on gi ("+
10516gi_str +
") compared to the NCBI sequence repository", seq);
10517}
else if(db_gb_id) {
10520 ") on gi ("+ gi_str +
") compared to the NCBI sequence repository", seq);
10523 stringnew_gen_label, old_gen_label;
10524 if(general_id && db_general_id) {
10527general_id->
GetLabel(&new_gen_label);
10529 "New general ID ("+ new_gen_label +
10530 ") does not match one in NCBI sequence repository ("+ old_gen_label +
10531 ") on gi ("+ gi_str +
")", seq);
10533}
else if(general_id) {
10534general_id->
GetLabel(&new_gen_label);
10536 "Gain of general ID ("+ new_gen_label +
") on gi ("+
10537gi_str +
") compared to the NCBI sequence repository", seq);
10538}
else if(db_general_id) {
10541 "Loss of general ID ("+ old_gen_label +
") on gi ("+
10542gi_str +
") compared to the NCBI sequence repository", seq);
10588 ITERATE(
string, res,
data.GetIupacna().Get() ) {
10589 if(*res ==
'N') {
10606 ITERATE(
string, res,
data.GetIupacaa().Get() ) {
10607 if(*res ==
'N') {
10632 if((*iter)->IsLoc()) {
10698 ENa_strandstrand =
f->GetLocation().GetStrand();
10699 if(
f->GetData().IsCdregion()) {
10701cds_minus =
f->GetSeq_feat();
10703cds_plus =
f->GetSeq_feat();
10707utr3_minus =
f->GetSeq_feat();
10709utr3_plus =
f->GetSeq_feat();
10710 if(! cds_plus && utr5_plus &&
x_ReportUTRPair(*utr5_plus, *utr3_plus)) {
10712 "CDS not between 5'UTR and 3'UTR on plus strand", *utr3_plus);
10714utr5_plus.
Reset();
10715cds_plus.
Reset();
10716utr3_plus.
Reset();
10720utr5_minus =
f->GetSeq_feat();
10721 if(! cds_minus && utr3_minus &&
x_ReportUTRPair(*utr5_minus, *utr3_minus)) {
10723 "CDS not between 5'UTR and 3'UTR on minus strand", *utr5_minus);
10725utr5_minus.
Reset();
10726cds_minus.
Reset();
10727utr3_minus.
Reset();
10729utr5_plus =
f->GetSeq_feat();
10737CValidError_bioseq::CmRNACDSIndex::CmRNACDSIndex()
10742CValidError_bioseq::CmRNACDSIndex::~CmRNACDSIndex()
10754 bool match=
false;
10819&& mrna.
GetExt().
GetData().front()->GetData().IsStr()) {
10824 if(
id.GetGi() == gi) {
10831}
catch(
conststd::exception&) {
static CRef< CScope > m_Scope
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
@ eErr_SEQ_INST_HTGS_STS_GSS_WGSshouldNotBeRNA
@ eErr_SEQ_INST_BadDeltaSeq
@ eErr_SEQ_DESCR_InconsistentBioSources_ConLocation
@ eErr_SEQ_FEAT_mRNAgeneRange
@ eErr_SEQ_DESCR_FinishedStatusForWGS
@ eErr_SEQ_DESCR_InconsistentTaxName
@ eErr_GENERIC_MissingPubRequirement
@ eErr_SEQ_FEAT_TRNAinsideTMRNA
@ eErr_SEQ_INST_CompleteGenomeHasGaps
@ eErr_SEQ_INST_BadSeqIdCharacter
@ eErr_SEQ_INST_CompleteTitleProblem
@ eErr_SEQ_INST_HistoryGiCollision
@ eErr_SEQ_DESCR_UnwantedCompleteFlag
@ eErr_SEQ_INST_mRNAshouldBeSingleStranded
@ eErr_SEQ_FEAT_MultipleGenCodes
@ eErr_SEQ_DESCR_DBLinkBadAssembly
@ eErr_SEQ_DESCR_WGSmasterLacksBioProject
@ eErr_SEQ_INST_HighNContentStretch
@ eErr_SEQ_INST_HighNcontent3Prime
@ eErr_SEQ_INST_TerminalGap
@ eErr_SEQ_INST_MultipleAccessions
@ eErr_SEQ_DESCR_MultipleDBLinkObjects
@ eErr_SEQ_INST_BadProteinStart
@ eErr_SEQ_FEAT_PartialProblem3Prime
@ eErr_SEQ_FEAT_ProductShouldBeWhole
@ eErr_SEQ_INST_ProteinShouldNotHaveGaps
@ eErr_SEQ_INST_ESTshouldBemRNA
@ eErr_SEQ_DESCR_BadKeywordUnverified
@ eErr_SEQ_FEAT_ITSdoesNotAbutRRNA
@ eErr_SEQ_DESCR_InvalidMolInfo
@ eErr_SEQ_DESCR_InconsistentMolInfoTechnique
@ eErr_SEQ_DESCR_NoOrganismInTitle
@ eErr_SEQ_DESCR_InconsistentMolInfo
@ eErr_SEQ_INST_TSAMasterLacksStrucComm
@ eErr_SEQ_INST_WholeComponent
@ eErr_SEQ_FEAT_BadRRNAcomponentOrder
@ eErr_SEQ_INST_ReprInvalid
@ eErr_SEQ_INST_TSAseqGapProblem
@ eErr_SEQ_INST_HTGS_STS_GSS_WGSshouldBeGenomic
@ eErr_SEQ_INST_SeqLitDataLength0
@ eErr_SEQ_INST_CircBactGenomeProblem
@ eErr_SEQ_INST_WGSMasterLacksStrucComm
@ eErr_SEQ_INST_ContigsTooShort
@ eErr_SEQ_DESCR_NoMolInfoFound
@ eErr_SEQ_PKG_OrphanedProtein
@ eErr_SEQ_INST_SeqGapBadLinkage
@ eErr_SEQ_INST_SelfReferentialSequence
@ eErr_SEQ_DESCR_TransgenicProblem
@ eErr_SEQ_INST_DeltaComponentIsGi0
@ eErr_SEQ_FEAT_CDSmRNANotMatched
@ eErr_SEQ_FEAT_FeatContentDup
@ eErr_SEQ_INST_MolNotSet
@ eErr_SEQ_DESCR_WGSMasterLacksBothBioSampleBioProject
@ eErr_SEQ_INST_GiWithoutAccession
@ eErr_SEQ_INST_MissingGaps
@ eErr_SEQ_DESCR_InvalidForType
@ eErr_SEQ_FEAT_BadRRNAcomponentOverlapRRNA
@ eErr_SEQ_DESCR_FastaBracketTitle
@ eErr_SEQ_FEAT_MisMatchAA
@ eErr_SEQ_INST_StopInProtein
@ eErr_SEQ_INST_UnknownLengthGapNot100
@ eErr_SEQ_FEAT_MultipleProtRefs
@ eErr_SEQ_FEAT_MultipleEquivPublications
@ eErr_SEQ_DESCR_DBLinkProblem
@ eErr_SEQ_INST_InvalidLen
@ eErr_SEQ_DESCR_TPAassemblyWithoutTPAKeyword
@ eErr_SEQ_DESCR_InvalidForTypeGIBB
@ eErr_SEQ_FEAT_InvalidFeatureForProtein
@ eErr_SEQ_INST_HighNContentPercent
@ eErr_SEQ_DESCR_RefGeneTrackingOnNonRefSeq
@ eErr_SEQ_FEAT_IdenticalGeneSymbolAndSynonym
@ eErr_SEQ_FEAT_MultipleEquivBioSources
@ eErr_SEQ_INST_HighNcontent5Prime
@ eErr_SEQ_INST_TSAshouldBNotBeDNA
@ eErr_SEQ_DESCR_MissingChromosome
@ eErr_SEQ_INST_BadProteinMoltype
@ eErr_SEQ_DESCR_NucleotideTechniqueOnProtein
@ eErr_SEQ_INST_CompleteCircleProblem
@ eErr_SEQ_FEAT_CDSwithMultipleMRNAs
@ eErr_SEQ_FEAT_CDSmRNAMismatchProteinIDs
@ eErr_SEQ_FEAT_CDSmRNAMismatchTranscriptIDs
@ eErr_SEQ_FEAT_PartialProblemOrganelle3Prime
@ eErr_SEQ_INST_OverlappingDeltaRange
@ eErr_SEQ_FEAT_OverlappingPeptideFeat
@ eErr_SEQ_DESCR_BadKeywordNoTechnique
@ eErr_SEQ_FEAT_ExtraProteinFeature
@ eErr_SEQ_INST_SeqLocLength
@ eErr_SEQ_INST_FarLocationExcludesFeatures
@ eErr_SEQ_DESCR_InconsistentVirusMoltype
@ eErr_SEQ_INST_IdOnMultipleBioseqs
@ eErr_SEQ_DESCR_MoltypeOtherGenetic
@ eErr_SEQ_INST_HighNpercent3Prime
@ eErr_SEQ_INST_BadSecondaryAccn
@ eErr_SEQ_INST_InvalidAlphabet
@ eErr_SEQ_FEAT_CDSonMinusStrandMRNA
@ eErr_SEQ_INST_MolNuclAcid
@ eErr_SEQ_DESCR_MoltypeOther
@ eErr_SEQ_DESCR_Inconsistent
@ eErr_SEQ_INST_ExtNotAllowed
@ eErr_SEQ_DESCR_InconsistentRefSeqMoltype
@ eErr_SEQ_FEAT_PartialProblem5Prime
@ eErr_SEQ_FEAT_CDSmRNAMismatchLocation
@ eErr_SEQ_INST_TrailingX
@ eErr_SEQ_DESCR_InconsistentDates
@ eErr_SEQ_INST_CircularProtein
@ eErr_SEQ_INST_NoIdOnBioseq
@ eErr_SEQ_INST_PartsOutOfOrder
@ eErr_SEQ_FEAT_BadFullLengthFeature
@ eErr_SEQ_DESCR_InconsistentGenBankblocks
@ eErr_SEQ_FEAT_FarLocation
@ eErr_SEQ_INST_MolinfoOther
@ eErr_SEQ_INST_BadSeqIdLength
@ eErr_SEQ_INST_SeqDataNotAllowed
@ eErr_SEQ_INST_BadHTGSeq
@ eErr_SEQ_FEAT_PartialProblemOrganelle5Prime
@ eErr_SEQ_DESCR_NoKeywordHasTechnique
@ eErr_SEQ_INST_UnexpectedIdentifierChange
@ eErr_SEQ_INST_WGSseqGapProblem
@ eErr_SEQ_DESCR_MultipleStrucComms
@ eErr_SEQ_FEAT_InconsistentRRNAstrands
@ eErr_SEQ_FEAT_PartialProblemNotSpliceConsensus5Prime
@ eErr_SEQ_FEAT_BadRRNAcomponentOverlapAndOrder
@ eErr_SEQ_DESCR_DBLinkBadFormat
@ eErr_SEQ_FEAT_InvalidForType
@ eErr_SEQ_FEAT_GeneLocusCollidesWithLocusTag
@ eErr_SEQ_FEAT_CDSgeneRange
@ eErr_SEQ_INST_MitoMetazoanTooLong
@ eErr_SEQ_DESCR_CompleteGenomeLacksBioProject
@ eErr_SEQ_DESCR_CollidingPubMedID
@ eErr_SEQ_FEAT_DuplicateFeat
@ eErr_SEQ_INST_ExtBadOrMissing
@ eErr_SEQ_FEAT_FeatureProductInconsistency
@ eErr_SEQ_DESCR_SyntheticConstructWrongMolType
@ eErr_SEQ_FEAT_DuplicateGeneConflictingLocusTag
@ eErr_SEQ_DESCR_MolInfoConflictsWithBioSource
@ eErr_SEQ_INST_InstantiatedGapMismatch
@ eErr_SEQ_FEAT_UTRdoesNotAbutCDS
@ eErr_SEQ_INST_PartialInconsistent
@ eErr_SEQ_FEAT_CollidingLocusTags
@ eErr_SEQ_DESCR_MultipleNames
@ eErr_SEQ_FEAT_PartialProblemNotSpliceConsensus3Prime
@ eErr_SEQ_INST_BadSeqIdFormat
@ eErr_SEQ_FEAT_NoCDSbetweenUTRs
@ eErr_SEQ_INST_ZeroGiNumber
@ eErr_INTERNAL_Exception
@ eErr_SEQ_INST_ConflictingIdsOnBioseq
@ eErr_SEQ_DESCR_WrongOrganismFor16SrRNA
@ eErr_SEQ_INST_HistAssemblyMissing
@ eErr_SEQ_PKG_NoCdRegionPtr
@ eErr_SEQ_INST_InternalNsInSeqRaw
@ eErr_SEQ_INST_TerminalNs
@ eErr_SEQ_FEAT_SeqFeatXrefProblem
@ eErr_SEQ_DESCR_BadKeywordForStrucComm
@ eErr_SEQ_FEAT_CDSdoesNotMatchVDJC
@ eErr_SEQ_DESCR_InconsistentMolType
@ eErr_SEQ_FEAT_CDSmRNAMissingProteinIDs
@ eErr_SEQ_DESCR_WGSmasterLacksBioSample
@ eErr_SEQ_FEAT_MultiIntervalIntron
@ eErr_SEQ_DESCR_InconsistentTPA
@ eErr_SEQ_FEAT_LocusTagProblem
@ eErr_SEQ_INST_HighNpercent5Prime
@ eErr_SEQ_DESCR_ScaffoldLacksBioProject
@ eErr_SEQ_INST_InternalNsAdjacentToGap
@ eErr_SEQ_FEAT_PartialProblem
@ eErr_SEQ_DESCR_MultipleComments
@ eErr_SEQ_INST_SeqDataNotFound
@ eErr_SEQ_INST_InternalGapsInSeqRaw
@ eErr_SEQ_FEAT_MultipleGeneOverlap
@ eErr_SEQ_INST_DuplicateSegmentReferences
@ eErr_SEQ_DESCR_InconsistentWGSFlags
@ eErr_SEQ_FEAT_CDSmRNAmismatchCount
@ eErr_SEQ_FEAT_UTRdoesNotExtendToEnd
@ eErr_SEQ_INST_SeqLitGapLength0
@ eErr_SEQ_INST_SeqIdNameHasSpace
@ eErr_SEQ_DESCR_ProteinTechniqueOnNucleotide
@ eErr_SEQ_DESCR_CollidingPublications
@ eErr_SEQ_FEAT_PartialProblemmRNASequence3Prime
@ eErr_SEQ_INST_InternalNsInSeqLit
@ eErr_SEQ_INST_SeqDataLenWrong
@ eErr_SEQ_INST_GapInProtein
@ eErr_SEQ_INST_SeqGapProblem
@ eErr_SEQ_INST_InvalidResidue
@ eErr_SEQ_FEAT_PartialProblemmRNASequence5Prime
@ eErr_SEQ_FEAT_InvalidFeatureForMRNA
@ eErr_SEQ_FEAT_CDSwithNoMRNA
@ eErr_GENERIC_DeltaSeqError
@ eErr_SEQ_DESCR_UnculturedGenome
ncbi::TMaskedQueryRegions mask
const string & GetLineage(void) const
const string & GetTaxname(void) const
bool IsSetLineage(void) const
bool IsSetTaxname(void) const
size_t IterateFeatures(Fnc m)
CSeq_entry * GetParentEntry(void) const
CConstRef< CSeqdesc > GetClosestDescriptor(CSeqdesc::E_Choice choice, int *level=NULL) const
TSeqPos GetLength(void) const
void GetLabel(string *label, ELabelType type, bool worst=false) const
bool AssignMatch(TmRNAList &mrna_map, CFeatTree &feat_tree, CScope &scope)
bool Overlaps(const CSeq_feat &mrna) const
sequence::EOverlapType m_OverlapType
const CSeq_feat & GetSeqfeat() const
bool AssignXrefMatch(TmRNAList &unmatched_mrnas, const CTSE_Handle &tse)
CConstRef< CSeq_feat > m_Cds
bool AssignOverlapMatch(TmRNAList &unmatched_mrnas, CScope &scope)
CCdsMatchInfo(const CSeq_feat &cds, CScope *scope)
bool AreMrnaProductsUnique()
CRef< CMrnaMatchInfo > m_BestMatch
const CMrnaMatchInfo & GetMatch() const
void SetMatch(CRef< CMrnaMatchInfo > match)
list< CConstRef< CSeq_feat > > m_OtherMrnas
void UpdateOtherMrnas(const TmRNAList &unmatched_mrnas)
ECompare Compare(const CDate &date) const
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
@ eCompare_before
*this comes first.
@ eCompare_same
They're equivalent.
void GetLabel(string *label) const
bool Match(const CDbtag &dbt2) const
int Compare(const CDbtag &dbt2) const
CSeqFeatData::ESubtype GetSubtype(void) const
CRef< CFeatureIndex > GetBestParent(void)
CRef< CFeatureIndex > GetBestGene(void)
CSeq_feat_Handle GetSeqFeatHandle(void) const
const CMappedFeat GetMappedFeat(void) const
CConstRef< CSeq_loc > GetMappedLocation(void) const
CRef< feature::CFeatTree > GetFeatTreeFromCache(const CSeq_loc &loc, CScope &scope)
static bool IsPseudo(const CSeq_feat &feat)
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
void GetLabel(string *label) const
bool IsSuppressed(void) const
@Imp_feat.hpp User-defined methods of the data storage class.
CConstRef< CSeq_feat > m_Mrna
bool Overlaps(const CSeq_feat &cds) const
CMrnaMatchInfo(const CSeq_feat &mrna, CScope *scope)
void SetPseudo(bool val=true)
const CSeq_feat & GetSeqfeat() const
bool OkWithoutCds(bool isGenbank=false) const
Exceptions for objmgr/util library.
@OrgMod.hpp User-defined methods of the data storage class.
const string & GetLineage(void) const
bool IsSetLineage(void) const
@Pubdesc.hpp User-defined methods of the data storage class.
@RNA_ref.hpp User-defined methods of the data storage class.
T & Get(void)
Create the variable if not created yet, return the reference.
CRef< CBioseqIndex > GetBioseqIndex(void)
ESubtype GetSubtype(void) const
@ eSubtype_transit_peptide_aa
@ eSubtype_sig_peptide_aa
@ eSubtype_mat_peptide_aa
CSeq_entry * GetParentEntry(void) const
namespace ncbi::objects::
const CGene_ref * GetGeneXref(void) const
See related function in util/feature.hpp.
static bool IsAa(EMol mol)
static string GetMoleculeClass(EMol mol)
static bool IsNa(EMol mol)
Seq-loc iterator class â iterates all intervals from a seq-loc in the correct order.
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static void Validate(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
Base class for all serializable objects.
static bool NeedsNoText(const TSubtype &subtype)
CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const
Get Bioseq handle from this TSE.
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
bool GetInheritedPropertyDefines(const string &prop_name, TInfoList &results_out, TTaxId subtree_root=TAX_ID_CONST(1))
bool GetScientificName(TTaxId tax_id, string &name_out)
list< CRef< CTaxon1_info > > TInfoList
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Template class for iteration on objects of class C (non-medifiable version)
bool IsRefGeneTracking() const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
bool IsStructuredComment() const
EObjectType GetObjectType() const
void ValidateSeqAnnot(const CSeq_annot_Handle &annot)
void ValidateSeqAnnotContext(const CSeq_annot &annot, const CBioseq &seq)
static CSeq_entry_Handle GetAppropriateXrefParent(CSeq_entry_Handle seh)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
static bool IsPdb(const CBioseq &seq)
void ValidateUpdateDateContext(const CDate &update, const CDate &create, const CBioseq &seq, const CSeqdesc &desc)
void ValidateOrgContext(const COrg_ref &this_org, const COrg_ref &org, const CBioseq &seq, const CSeqdesc &desc)
CBioseq_Handle m_CurrentHandle
void ValidateInst(const CBioseq &seq)
static bool IsRefSeq(const CBioseq &seq)
void x_ValidateMultiplePubs(const CBioseq_Handle &bsh)
bool IsHistAssemblyMissing(const CBioseq &seq)
void ReportBadAssemblyGap(const CBioseq &seq)
static bool IsSelfReferential(const CBioseq &seq)
EDiagSev x_DupFeatSeverity(const CSeq_feat &curr, const CSeq_feat &prev, bool viral, bool htgs, bool same_annot, bool same_label)
CRef< CSeq_loc > GetLocFromSeq(const CBioseq &seq)
void x_ValidateBarcode(const CBioseq &seq)
void x_CompareStrings(const TStrFeatMap &str_feat_map, const string &type)
void x_CheckGeneralIDs(const CBioseq &seq)
void x_TranscriptIDsMatch(const string &protein_id, const CSeq_feat &cds)
static bool IsTSAAccession(const CSeq_id &id)
static bool IsEmblOrDdbj(const CBioseq &seq)
void x_CheckMrnaProteinLink(const CCdsMatchInfo &cds_match)
bool x_IsRangeGap(const CBioseq_Handle &seq, int start, int stop)
void ValidateBioseq(const CBioseq &seq)
void ValidateWGSMaster(CBioseq_Handle bsh)
CValidError_descr m_DescrValidator
void ValidateDeltaLoc(const CSeq_loc &loc, const CBioseq &seq, TSeqPos &len)
bool x_IsSameAsCDS(const CMappedFeat &feat)
void x_ValidateMolInfoForBioSource(const CBioSource &src, const CMolInfo &minfo, const CSeqdesc &desc)
void x_CheckForMultiplemRNAs(CCdsMatchInfo &cds_match, const TmRNAList &unmatched_mrnas)
void ValidateHistory(const CBioseq &seq)
void x_ValidateCompletness(const CBioseq &seq, const CMolInfo &mi)
bool SuppressTrailingXMsg(const CBioseq &seq)
void ValidateMolInfoContext(const CMolInfo &minfo, int &seq_biomol, int &tech, int &completeness, const CBioseq &seq, const CSeqdesc &desc)
bool x_HasCitSub(CBioseq_Handle bsh) const
static bool x_HasGap(const CBioseq &seq)
void ValidateSeqParts(const CBioseq &seq)
void x_ReportOverlappingPeptidePair(CSeq_feat_Handle f1, CSeq_feat_Handle f2, const CBioseq &bioseq, bool &reported_last_peptide)
void ValidateSegRef(const CBioseq &seq)
void x_CheckSingleStrandedRNAViruses(const CBioSource &source, const string &lineage, const string &stranded_mol, const CMolInfo::TBiomol biomol, const CBioseq_Handle &bsh, const CSerialObject &obj, const CSeq_entry *ctx)
void ValidateSecondaryAccConflict(const string &primary_acc, const CBioseq &seq, int choice)
static bool IsWGSMaster(const CBioseq &seq, CScope &scope)
void x_ValidateTitle(const CBioseq &seq)
void ValidateMultipleGeneOverlap(const CBioseq_Handle &bsh)
void ValidateSeqFeatContext(const CBioseq &seq, bool is_complete)
void ValidateDelta(const CBioseq &seq)
static bool HasBadWGSGap(const CBioseq &seq)
static bool x_HasPGAPStructuredComment(CBioseq_Handle bsh)
bool m_report_missing_chromosome
CValidError_annot m_AnnotValidator
void ValidateTwintrons(const CBioseq &seq)
unsigned int x_IdXrefsNotReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)
void x_ValidateGeneCDSmRNACounts()
void x_ReportStartStopPartialProblem(int partial_type, bool at_splice_or_gap, bool abuts_n, const CSeq_feat &feat)
static size_t x_BadMetazoanMitochondrialLength(const CBioSource &src, const CSeq_inst &inst)
void ReportBadTSAGap(const CBioseq &seq)
void ValidateSeqGap(const CSeq_gap &gap, const CBioseq &seq)
void ValidateBadGeneOverlap(const CSeq_feat &feat)
bool x_IsPartialAtSpliceSiteOrGap(const CSeq_loc &loc, unsigned int tag, bool &bad_seq, bool &is_gap, bool &abuts_n)
void x_SetupCommonFlags(CBioseq_Handle bsh)
bool m_splicing_not_expected
bool x_IsDeltaLitOnly(const CSeq_inst &inst) const
void ValidateNsAndGaps(const CBioseq &seq)
void ValidateCompleteGenome(const CBioseq &seq)
bool x_IsMicroRNA() const
CValidError_bioseq(CValidError_imp &imp)
void ValidateRawConst(const CBioseq &seq)
void ValidateBioseqContext(const CBioseq &seq)
bool CdError(const CBioseq_Handle &bsh)
void x_ReportLineageConflictWithMol(const string &lineage, const string &stranded_mol, const CMolInfo::TBiomol biomol, CSeq_inst::EMol mol, const CSerialObject &obj, const CSeq_entry *ctx)
bool ValidateRepr(const CSeq_inst &inst, const CBioseq &seq)
void ValidateFeatPartialInContext(const CMappedFeat &feat, bool is_complete)
void ValidateGBBlock(const CGB_block &gbblock, const CBioseq &seq, const CSeqdesc &desc)
bool IsMrna(const CBioseq_Handle &bsh)
void ReportBadWGSGap(const CBioseq &seq)
bool x_SuppressDicistronic(const CSeq_feat_Handle &f1, const CSeq_feat_Handle &f2, bool fruit_fly)
static bool IsWGSAccession(const CSeq_id &id)
void ValidateSeqLen(const CBioseq &seq)
bool x_PartialAdjacentToIntron(const CSeq_loc &loc)
void x_CheckOrigProteinAndTranscriptIds(const CCdsMatchInfo &cds_match)
size_t GetDataLen(const CSeq_inst &inst)
void CheckForPubOnBioseq(const CBioseq &seq)
void x_CalculateNsStretchAndTotal(const CSeqVector &seqvec, TSeqPos &num_ns, TSeqPos &max_stretch, bool &n5, bool &n3)
void CheckForMolinfoOnBioseq(const CBioseq &seq)
static bool IsAllNs(const CSeqVector &vec)
static string s_GetStrandedMolStringFromLineage(const string &lineage)
bool GetTSAConflictingBiomolTechErrors(const CBioseq &seq)
bool GraphsOnBioseq() const
void CheckTpaHistory(const CBioseq &seq)
static bool IsPartial(const CBioseq &seq, CScope &scope)
const CCacheImpl::TFeatValue * m_AllFeatIt
void x_ValidateCDSmRNAmatch(const CBioseq_Handle &seq)
void ReportModifInconsistentError(int new_mod, int &old_mod, const CSeqdesc &desc, const CSeq_entry &ctx)
static bool x_IgnoreEndGap(CBioseq_Handle bsh, CSeq_gap::TType gap_type)
static bool x_ParentAndComponentLocationsDiffer(CBioseq_Handle bsh, CBioSource::TGenome parent_location)
void x_ValidateCDSVDJCmatch(const CBioseq_Handle &seq)
bool x_ShowBioProjectWarning(const CBioseq &seq)
void CheckForMultipleStructuredComments(const CBioseq &seq)
void ValidateCollidingGenes(const CBioseq &seq)
bool x_IdXrefsAreReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)
static bool IsGenbank(const CBioseq &seq)
void x_ReportDuplicatePubLabels(const CBioseq &seq, const vector< CTempString > &labels)
void ValidateSeqIds(const CBioseq &seq)
void x_ReportInternalPartial(const CSeq_feat &feat)
void ValidateModifDescriptors(const CBioseq &seq)
void x_ReportSuspiciousUseOfComplete(const CBioseq &seq, EDiagSev sev)
CValidError_feat m_FeatValidator
static int PctNs(CBioseq_Handle bsh)
void ReportBadGenomeGap(const CBioseq &seq)
static bool IsWp(CBioseq_Handle bsh)
void ValidateDupOrOverlapFeats(const CBioseq &seq)
bool x_MatchesOverlappingFeaturePartial(const CMappedFeat &feat, unsigned int partial_type)
void CheckForMissingChromosome(CBioseq_Handle bsh)
bool IsIdIn(const CSeq_id &id, const CBioseq &seq)
void ValidateMoltypeDescriptors(const CBioseq &seq)
size_t NumOfIntervals(const CSeq_loc &loc)
void x_ReportImproperPartial(const CSeq_feat &feat)
bool IsFlybaseDbxrefs(const TDbtags &dbxrefs)
void CheckSourceDescriptor(const CBioseq_Handle &bsh)
void x_ReportGeneOverlapError(const CSeq_feat &feat, const string &gene_label)
void x_CheckForMultipleComments(CBioseq_Handle bsh)
void ValidateIDSetAgainstDb(const CBioseq &seq)
static bool IsMaster(const CBioseq &seq)
bool x_IsActiveFin() const
bool x_ReportUTRPair(const CSeq_feat &utr5, const CSeq_feat &utr3)
void x_ValidateAbuttingRNA(const CBioseq_Handle &seq)
void x_ValidateSourceFeatures(const CBioseq_Handle &bsh)
void ValidateSeqId(const CSeq_id &id, const CBioseq &ctx, bool longer_general=false)
~CValidError_bioseq() override
void x_ValidateAbuttingUTR(const CBioseq_Handle &seq)
bool x_ReportDupOverlapFeaturePair(const CSeq_feat_Handle &f1, const CSeq_feat_Handle &f2, bool fruit_fly, bool viral, bool htgs)
void x_ValidateCDSagainstVDJC(const CBioseq_Handle &seq)
static bool IsWGS(const CBioseq &seq)
size_t x_CountAdjacentNs(const CSeq_literal &lit)
void ValidateSeqDescContext(const CBioseq &seq)
void x_ValidateOverlappingRNAFeatures(const CBioseq_Handle &bsh)
bool GetTSANStretchErrors(const CBioseq &seq)
const CCacheImpl::TFeatValue * m_GeneIt
void GapByGapInst(const CBioseq &seq)
void x_ValidatePubFeatures(const CBioseq_Handle &bsh)
void ValidateSeqDescr(const CSeq_descr &descr, const CSeq_entry &ctx)
bool ValidateStructuredComment(const CSeqdesc &desc, bool report)
void ValidateSeqFeatContext(const CSeq_feat &feat, const CBioseq &seq)
void SetScope(CScope &scope)
void SetTSE(CSeq_entry_Handle seh)
void ValidateGraphsOnBioseq(const CBioseq &seq)
const CSeq_entry_Handle & GetTSEH()
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
bool IsSyntheticConstruct(const CBioSource &src)
bool HasGiOrAccnVer() const
const SValidatorContext & GetContext() const
void AddBioseqWithNoBiosource(const CBioseq &seq)
CConstRef< CSeq_feat > GetCachedGene(const CSeq_feat *f)
bool IsValidateIdSet() const
void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
static bool IsWGSIntermediate(const CBioseq &seq)
bool IsNoCitSubPubs() const
CConstRef< CSeq_feat > GetCDSGivenProduct(const CBioseq &seq)
CBioseq_Handle GetLocalBioseqHandle(const CSeq_id &id)
bool IsSeqSubmitParent() const
bool x_IsFarFetchFailure(const CSeq_loc &loc)
void AddBioseqWithNoPub(const CBioseq &seq)
bool IsGenomeSubmission() const
void AddProtWithoutFullRef(const CBioseq_Handle &seq)
bool IsArtificial(const CBioSource &src)
void ValidateBioSourceForSeq(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx, const CBioseq_Handle &bsh)
void IncrementTpaWithHistoryCount()
bool IsNoBioSource() const
bool IsLocalGeneralOnly() const
void SetFarFetchFailure()
void IncrementTpaWithoutHistoryCount()
bool IsRefSeqConventions() const
bool IsIndexerVersion() const
CGeneCache & GetGeneCache()
bool IsSmallGenomeSet() const
void ValidateSeqLoc(const CSeq_loc &loc, const CBioseq_Handle &seq, bool report_abutting, const string &prefix, const CSerialObject &obj, bool lowerSev=false)
bool DoCompareVDJCtoCDS() const
bool ShouldSubdivide() const
bool IsTransgenic(const CBioSource &bsrc)
vector< string > m_unpublished_labels
vector< string > m_published_labels
const TFeatValue & GetFeatFromCache(const SFeatKey &featKey)
AutoPtr< TFeatValue > GetFeatFromCacheMulti(const vector< SFeatKey > &featKeys)
const CPubdescInfo & GetPubdescToInfo(CConstRef< CPubdesc > pub)
static const CSeqFeatData::ESubtype kAnyFeatSubtype
static const CSeqFeatData::E_Choice kAnyFeatType
std::vector< CMappedFeat > TFeatValue
@ fLabel_Unique
Append a unique tag [V1].
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
const_iterator find(const key_type &key) const
const_iterator end() const
iterator insert(const value_type &val)
container_type::iterator iterator
container_type::value_type value_type
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char si[8][64]
bool AllowOrphanedProtein(const CBioseq &seq, bool force_refseq=false)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
forward_list< Gene > TGeneList
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define TAX_ID_FROM(T, value)
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
EDiagSev
Severity level for the posted diagnostics.
#define LOG_POST_XX(error_name, err_subcode, message)
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error â guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
void Critical(CExceptionArgs_Base &args)
void Error(CExceptionArgs_Base &args)
const string & GetMsg(void) const
Get message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
const TPrim & Get(void) const
#define ENUM_METHOD_NAME(EnumName)
const string AsFastaString(void) const
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static const size_t kMaxLocalIDLength
ID length restrictions.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CConstRef< CSeq_id > GetSeqId(void) const
EAccessionInfo
For IdentifyAccession (below)
int CompareOrdered(const CSeq_id &sid2) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static bool IsValidLocalID(const CTempString &s)
Perform rudimentary validation on potential local IDs, whose contents should be pure ASCII and limite...
static const size_t kMaxGeneralTagLength
CSeq_id::E_Choice Which(void) const
string GetLabel(const CSeq_id &id)
static const size_t kMaxGeneralDBLength
@ e_YES
SeqIds compared, but are different.
@ eContent
Untagged human-readable accession or the like.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_loc & GetEmbeddingSeq_loc(void) const
Get the nearest seq-loc containing the current range.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TRange GetRange(void) const
Get the range.
ENa_strand GetStrand(void) const
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
CMappedFeat GetParent(const CMappedFeat &feat)
Return nearest parent of a feature.
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
@ fFGL_Content
Include its content if there is any.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
Int8 TestForOverlapEx(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, CScope *scope=0, TOverlapFlags flags=fOverlap_Default)
Updated version of TestForOverlap64().
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ eSeqlocPartial_Nointernal
@ eSeqlocPartial_Complete
@ eSeqlocPartial_Limwrong
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_SubsetRev
1st is a subset of 2nd ranges
@ eOverlap_CheckIntervals
2nd is a subset of 1st with matching boundaries
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_CheckIntRev
1st is a subset of 2nd with matching boundaries
@ eOverlap_Simple
any overlap of extremes
@ eOverlap_Contained
2nd contained within 1st extremes
@ eOverlap_Subset
2nd is a subset of 1st ranges
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
@ eNoOverlap
CSeq_locs do not overlap or abut.
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
CConstRef< CSeq_feat > GetOverlappingOperon(const CSeq_loc &loc, CScope &scope)
const CSeq_feat * GetPROTForProduct(const CBioseq &product, CScope *scope)
Get the mature peptide feature of a protein.
vector< TFeatScore > TFeatScores
void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)
Find all features overlapping the location.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
vector< CSeq_id_Handle > TIds
@ eGetBioseq_All
Search bioseq, load if not loaded yet.
bool IsSetExcept(void) const
const CFeat_id & GetId(void) const
bool IsSetInst_Mol(void) const
bool IsSetComment(void) const
const CTSE_Handle & GetTSE_Handle(void) const
Get CTSE_Handle of containing TSE.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TClass GetClass(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
const CSeqFeatData & GetData(void) const
TSeqPos GetBioseqLength(void) const
CSeq_entry_Handle GetSeq_entry_Handle(void) const
Get parent Seq-entry handle.
bool IsSetExcept_text(void) const
TInst_Mol GetInst_Mol(void) const
bool IsSetProduct(void) const
bool IsSetInst_Length(void) const
TInst_Topology GetInst_Topology(void) const
const string & GetComment(void) const
TInst_Length GetInst_Length(void) const
const string & GetExcept_text(void) const
bool IsSetInst(void) const
void Reset(void)
Reset handle and make it not to point to any bioseq.
bool IsSetInst_Repr(void) const
bool IsSetClass(void) const
CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const
Complete and get const reference to the seq-entry.
TInst_Repr GetInst_Repr(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
CSeq_entry_Handle GetExactComplexityLevel(CBioseq_set::EClass cls) const
Return level with exact complexity, or empty handle if not found.
CSeqFeatData::ESubtype GetFeatSubtype(void) const
bool IsSetInst_Topology(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
const TId & GetId(void) const
TMol GetBioseqMolType(void) const
Get some values from core:
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
bool CanGetInst_Mol(void) const
const TInst & GetInst(void) const
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
SSeqMapSelector & SetResolveCount(size_t res_cnt)
Set max depth of resolving seq-map.
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
bool IsSetPartial(void) const
const CSeq_loc & GetLocation(void) const
bool GetPartial(void) const
SSeqMapSelector & SetFlags(TFlags flags)
Select segment type(s)
const CSeq_feat & GetOriginalFeature(void) const
Get original feature with unmapped location/product.
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
const CSeq_loc & GetProduct(void) const
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
TSeqPos GetPosition(void) const
return position of current segment in sequence
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
TCoding GetCoding(void) const
Target sequence coding.
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
@ eSeqData
real sequence data
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
bool IsNull(void) const THROWS_NONE
Check if pointer is null â same effect as Empty().
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
CTempString literal(const char(&str)[Size])
Templatized initialization from a string literal.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Compare of a substring with another string.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
size_type length(void) const
Return the length of the represented array.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
@ eNocase
Case insensitive compare.
static const char label[]
const TKeywords & GetKeywords(void) const
Get the Keywords member data.
bool IsSetExtra_acc(void) const
Check if a value has been assigned to Extra_acc data member.
const TExtra_acc & GetExtra_acc(void) const
Get the Extra_acc member data.
bool IsSetKeywords(void) const
Check if a value has been assigned to Keywords data member.
bool IsSetExtra_accessions(void) const
Check if a value has been assigned to Extra_accessions data member.
const TExtra_accessions & GetExtra_accessions(void) const
Get the Extra_accessions member data.
const TKeywords & GetKeywords(void) const
Get the Keywords member data.
bool IsSetKeywords(void) const
Check if a value has been assigned to Keywords data member.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
TGenome GetGenome(void) const
Get the Genome member data.
TOrigin GetOrigin(void) const
Get the Origin member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
TSubtype GetSubtype(void) const
Get the Subtype member data.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
const TName & GetName(void) const
Get the Name member data.
bool IsSetIs_focus(void) const
to distinguish biological focus Check if a value has been assigned to Is_focus data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
@ eOrigin_synthetic
purely synthetic
@ eOrigin_mut
artificially mutagenized
@ eOrigin_artificial
artificially engineered
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
const TDesc & GetDesc(void) const
Get the Desc member data.
bool IsSetPseudo(void) const
pseudogene Check if a value has been assigned to Pseudo data member.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
bool IsSetDesc(void) const
descriptive name Check if a value has been assigned to Desc data member.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
const TStr & GetStr(void) const
Get the variant data.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
bool CanGetType(void) const
Check if it is safe to call GetType method.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsLim(void) const
Check if variant Lim is selected.
bool IsSetYear(void) const
full year (including 1900) Check if a value has been assigned to Year data member.
bool IsStd(void) const
Check if variant Std is selected.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsStrs(void) const
Check if variant Strs is selected.
const TStrs & GetStrs(void) const
Get the variant data.
bool IsId(void) const
Check if variant Id is selected.
const TData & GetData(void) const
Get the Data member data.
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
const TDb & GetDb(void) const
Get the Db member data.
TLim GetLim(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
const TStr & GetStr(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TStr & GetStr(void) const
Get the variant data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
bool IsSetData(void) const
Check if a value has been assigned to Data data member.
vector< CStringUTF8 > TStrs
const TStd & GetStd(void) const
Get the variant data.
vector< CRef< CUser_field > > TData
TId GetId(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
const TMod & GetMod(void) const
Get the Mod member data.
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
const TLineage & GetLineage(void) const
Get the Lineage member data.
TSubtype GetSubtype(void) const
Get the Subtype member data.
bool CanGetDiv(void) const
Check if it is safe to call GetDiv method.
const TDiv & GetDiv(void) const
Get the Div member data.
const TSubname & GetSubname(void) const
Get the Subname member data.
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
const TName & GetName(void) const
Get the Name member data.
TProcessed GetProcessed(void) const
Get the Processed member data.
bool IsSetProcessed(void) const
Check if a value has been assigned to Processed data member.
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
list< CRef< CPub > > Tdata
const Tdata & Get(void) const
Get the member data.
const TEquiv & GetEquiv(void) const
Get the variant data.
bool IsEquiv(void) const
Check if variant Equiv is selected.
bool IsSub(void) const
Check if variant Sub is selected.
TType GetType(void) const
Get the Type member data.
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
bool CanGetExt(void) const
Check if it is safe to call GetExt method.
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
const TName & GetName(void) const
Get the variant data.
const TExt & GetExt(void) const
Get the Ext member data.
bool IsName(void) const
Check if variant Name is selected.
const TKey & GetKey(void) const
Get the Key member data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
bool IsSetExt(void) const
user defined structure extension Check if a value has been assigned to Ext data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsImp(void) const
Check if variant Imp is selected.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
const TId & GetId(void) const
Get the Id member data.
const TLocation & GetLocation(void) const
Get the Location member data.
E_Choice
Choice variants.
bool IsLocal(void) const
Check if variant Local is selected.
bool IsGene(void) const
Check if variant Gene is selected.
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
const TCode & GetCode(void) const
Get the Code member data.
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
list< CRef< C_E > > Tdata
const TCdregion & GetCdregion(void) const
Get the variant data.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
TPseudo GetPseudo(void) const
Get the Pseudo member data.
const TProduct & GetProduct(void) const
Get the Product member data.
const Tdata & Get(void) const
Get the member data.
bool IsSetPseudo(void) const
annotated on pseudogene? Check if a value has been assigned to Pseudo data member.
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
const TGene & GetGene(void) const
Get the variant data.
TPartial GetPartial(void) const
Get the Partial member data.
const TProt & GetProt(void) const
Get the variant data.
TExcept GetExcept(void) const
Get the Except member data.
vector< CRef< CGb_qual > > TQual
const TRna & GetRna(void) const
Get the variant data.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
const TExt & GetExt(void) const
Get the Ext member data.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsRegion(void) const
Check if variant Region is selected.
const TImp & GetImp(void) const
Get the variant data.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ e_Pub
publication applies to this seq
bool IsGenbank(void) const
Check if variant Genbank is selected.
TChain GetChain(void) const
Get the Chain member data.
bool IsSetChain_id(void) const
chain identifier; length-independent generalization of 'chain' Check if a value has been assigned to ...
bool IsSetChain(void) const
Deprecated: 'chain' can't support multiple character PDB chain identifiers (introduced in 2015).
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsTpg(void) const
Check if variant Tpg is selected.
const TName & GetName(void) const
Get the Name member data.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
const Tdata & Get(void) const
Get the member data.
const TId & GetId(void) const
Get the Id member data.
const TPnt & GetPnt(void) const
Get the variant data.
bool IsTpd(void) const
Check if variant Tpd is selected.
TPoint GetPoint(void) const
Get the Point member data.
bool IsOther(void) const
Check if variant Other is selected.
TFrom GetFrom(void) const
Get the From member data.
bool IsGeneral(void) const
Check if variant General is selected.
bool IsEmbl(void) const
Check if variant Embl is selected.
E_Choice Which(void) const
Which variant is currently selected.
TGi GetGi(void) const
Get the variant data.
TVersion GetVersion(void) const
Get the Version member data.
E_Choice
Choice variants.
const TOther & GetOther(void) const
Get the variant data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
Tdata & Set(void)
Assign a value to data member.
const TChain_id & GetChain_id(void) const
Get the Chain_id member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsTpe(void) const
Check if variant Tpe is selected.
bool IsPnt(void) const
Check if variant Pnt is selected.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
const TAccession & GetAccession(void) const
Get the Accession member data.
bool IsDdbj(void) const
Check if variant Ddbj is selected.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
@ e_Tpg
Third Party Annot/Seq Genbank.
const TSeq & GetSeq(void) const
Get the variant data.
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
TClass GetClass(void) const
Get the Class member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
list< CRef< CSeq_entry > > TSeq_set
@ eClass_parts
parts for 2 or 3
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_nuc_prot
nuc acid and coded proteins
@ eClass_gen_prod_set
genomic products, chrom+mRNA+protein
@ eClass_segset
segmented sequence + parts
const TIupacaa & GetIupacaa(void) const
Get the variant data.
bool IsSetLinkage(void) const
Check if a value has been assigned to Linkage data member.
TRepr GetRepr(void) const
Get the Repr member data.
bool IsMap(void) const
Check if variant Map is selected.
const TSeg & GetSeg(void) const
Get the variant data.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
list< CRef< CSeqdesc > > Tdata
bool IsRef(void) const
Check if variant Ref is selected.
bool IsSetReplaced_by(void) const
these seqs make this one obsolete Check if a value has been assigned to Replaced_by data member.
const TUser & GetUser(void) const
Get the variant data.
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
TLinkage GetLinkage(void) const
Get the Linkage member data.
TStrand GetStrand(void) const
Get the Strand member data.
ERepr
representation class
const TInst & GetInst(void) const
Get the Inst member data.
const TGap & GetGap(void) const
Get the variant data.
bool IsSetAssembly(void) const
how was this assembled? Check if a value has been assigned to Assembly data member.
TTopology GetTopology(void) const
Get the Topology member data.
const TIupacna & GetIupacna(void) const
Get the variant data.
const TUpdate_date & GetUpdate_date(void) const
Get the variant data.
const TNcbipna & GetNcbipna(void) const
Get the variant data.
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
const TNcbipaa & GetNcbipaa(void) const
Get the variant data.
TType GetType(void) const
Get the Type member data.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
const TTitle & GetTitle(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
const TPub & GetPub(void) const
Get the variant data.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
const TNcbi8aa & GetNcbi8aa(void) const
Get the variant data.
const TLiteral & GetLiteral(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
bool IsLoc(void) const
Check if variant Loc is selected.
E_Choice
Choice variants.
const TId & GetId(void) const
Get the Id member data.
bool IsSetHist(void) const
sequence history Check if a value has been assigned to Hist data member.
bool IsNcbi4na(void) const
Check if variant Ncbi4na is selected.
TTech GetTech(void) const
Get the Tech member data.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
const Tdata & Get(void) const
Get the member data.
bool IsSetReplaces(void) const
seq makes these seqs obsolete Check if a value has been assigned to Replaces data member.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
bool IsNcbi8na(void) const
Check if variant Ncbi8na is selected.
TLength GetLength(void) const
Get the Length member data.
const TOrg & GetOrg(void) const
Get the variant data.
TLength GetLength(void) const
Get the Length member data.
const TAssembly & GetAssembly(void) const
Get the Assembly member data.
list< CRef< CSeq_id > > TId
const TGenbank & GetGenbank(void) const
Get the variant data.
bool IsSeg(void) const
Check if variant Seg is selected.
list< CRef< CSeq_id > > TIds
bool CanGetLength(void) const
Check if it is safe to call GetLength method.
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
TMol GetMol(void) const
Get the Mol member data.
const TIds & GetIds(void) const
Get the Ids member data.
const TLinkage_evidence & GetLinkage_evidence(void) const
Get the Linkage_evidence member data.
bool IsName(void) const
Check if variant Name is selected.
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
bool IsSetFuzz(void) const
could be unsure Check if a value has been assigned to Fuzz data member.
TType GetType(void) const
Get the Type member data.
bool IsDelta(void) const
Check if variant Delta is selected.
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
bool IsSetLength(void) const
length of sequence in residues Check if a value has been assigned to Length data member.
bool CanGetHist(void) const
Check if it is safe to call GetHist method.
const THist & GetHist(void) const
Get the Hist member data.
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
const TExt & GetExt(void) const
Get the Ext member data.
bool CanGetRepr(void) const
Check if it is safe to call GetRepr method.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
E_Choice
Choice variants.
TMol_type GetMol_type(void) const
Get the variant data.
const TEmbl & GetEmbl(void) const
Get the variant data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
bool CanGetType(void) const
Check if it is safe to call GetType method.
EMol
molecule class in living organism
bool IsSetLength(void) const
must give a length in residues Check if a value has been assigned to Length data member.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
const TDelta & GetDelta(void) const
Get the variant data.
bool IsSetPub(void) const
the citation(s) Check if a value has been assigned to Pub data member.
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
const TLoc & GetLoc(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
const TModif & GetModif(void) const
Get the variant data.
bool IsSet(void) const
Check if a value has been assigned to data member.
bool CanGetSeq_data(void) const
Check if it is safe to call GetSeq_data method.
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetDate(void) const
Check if a value has been assigned to Date data member.
bool CanGetExt(void) const
Check if it is safe to call GetExt method.
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
const TCreate_date & GetCreate_date(void) const
Get the variant data.
bool IsLiteral(void) const
Check if variant Literal is selected.
bool IsSetSeq_data(void) const
may have the data Check if a value has been assigned to Seq_data data member.
list< CRef< CDelta_seq > > Tdata
const TReplaces & GetReplaces(void) const
Get the Replaces member data.
const Tdata & Get(void) const
Get the member data.
bool IsGap(void) const
Check if variant Gap is selected.
const TPub & GetPub(void) const
Get the Pub member data.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
bool IsNcbi2na(void) const
Check if variant Ncbi2na is selected.
const TReplaced_by & GetReplaced_by(void) const
Get the Replaced_by member data.
list< CRef< CSeq_loc > > Tdata
const TNcbi8na & GetNcbi8na(void) const
Get the variant data.
const TDescr & GetDescr(void) const
Get the Descr member data.
const TComment & GetComment(void) const
Get the variant data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
bool IsIupacna(void) const
Check if variant Iupacna is selected.
const TName & GetName(void) const
Get the variant data.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
list< CRef< CLinkage_evidence > > TLinkage_evidence
const TRef & GetRef(void) const
Get the variant data.
bool CanGetInst(void) const
Check if it is safe to call GetInst method.
bool IsSetLinkage_evidence(void) const
Check if a value has been assigned to Linkage_evidence data member.
bool IsSetTopology(void) const
Check if a value has been assigned to Topology data member.
bool IsSetFuzz(void) const
length uncertainty Check if a value has been assigned to Fuzz data member.
E_Choice Which(void) const
Which variant is currently selected.
@ eRepr_const
constructed sequence
@ eRepr_ref
reference to another sequence
@ eRepr_seg
segmented sequence
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_map
ordered map of any kind
@ eRepr_raw
continuous sequence
@ eRepr_virtual
no seq data
@ eCompleteness_complete
complete biological entity
@ eCompleteness_no_left
missing 5' or NH3 end
@ eCompleteness_partial
partial but no details given
@ eCompleteness_no_right
missing 3' or COOH end
@ eCompleteness_no_ends
missing both ends
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_physmap
from physical mapping techniques
@ eTech_htc
high throughput cDNA
@ eTech_both
concept transl. w/ partial pept. seq.
@ eTech_targeted
targeted locus sets/studies
@ eTech_seq_pept_homol
sequenced peptide, ordered by homology
@ eTech_composite_wgs_htgs
composite of WGS and HTGS
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_seq_pept_overlap
sequenced peptide, ordered by overlap
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_concept_trans
conceptual translation
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_standard
standard sequencing
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_seq_pept
peptide was sequenced
@ eTech_survey
one-pass genomic sequence
@ eTech_barcode
barcode of life project
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
@ eTech_concept_trans_a
conceptual transl. supplied by author
@ eTech_genemap
from genetic mapping techniques
@ e_not_set
No variant selected.
@ e_Ncbipna
nucleic acid probabilities
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Ncbi2na
2 bit nucleic acid code
@ e_Iupacna
IUPAC 1 letter nuc acid code.
@ e_Ncbipaa
amino acid probabilities
@ e_Ncbi8na
8 bit extended nucleic acid code
@ e_Ncbi4na
4 bit nucleic acid code
@ e_Iupacaa
IUPAC 1 letter amino acid code.
@ e_Ncbi8aa
8 bit extended amino acid codes
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_snoRNA
small nucleolar RNA
@ eBiomol_transcribed_RNA
transcribed RNA other than existing classes
@ eBiomol_other_genetic
other genetic material
@ eGIBB_mod_no_right
missing right end (3' or COOH)
@ eGIBB_mod_mitochondrial
@ eGIBB_mod_no_left
missing left end (5' for na, NH2 for aa)
@ e_Embl
EMBL specific information.
@ e_Org
if all from one organism
@ e_User
user defined object
@ e_Update_date
date of last update
@ e_Pub
a reference to the publication
@ e_Pir
PIR specific info.
@ e_Genbank
GenBank specific info.
@ e_Prf
PRF specific information.
@ e_Mol_type
type of molecule
@ e_Sp
SWISSPROT specific info.
@ e_Comment
a more extensive comment
@ e_Method
sequencing method
@ e_Molinfo
info on the molecule and techniques
@ e_Create_date
date entry first created/released
@ e_Title
a title for this sequence
@ e_Pdb
PDB specific information.
@ e_Name
a name for this sequence
@ e_Source
source of materials, includes Org-ref
@ eType_clone
Deprecated. Used only for AGP 1.1.
@ eType_fragment
Deprecated. Used only for AGP 1.1.
@ eMol_not_set
> cdna = rna
@ eMol_na
just a nucleic acid
@ eStrand_ss
single strand
@ e_Literal
a piece of sequence
@ e_Loc
point to a sequence
unsigned int
A callback function used to compare two keys in a database.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
if(yy_accept[yy_current_state])
static void text(MDB_val *v)
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
Miscellaneous common-use basic types and functionality.
Defines: CTimeFormat - storage class for time format.
Int4 delta(size_t dimension_, const Int4 *score_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
double df(double x_, const double &y_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
#define FOR_EACH_PUB_ON_PUBDESC(Itr, Var)
FOR_EACH_PUB_ON_PUBDESC EDIT_EACH_PUB_ON_PUBDESC.
#define FOR_EACH_DESCRIPTOR_ON_BIOSEQ
#define FOR_EACH_ANNOT_ON_BIOSEQ
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
#define IF_EXISTS_CLOSEST_BIOSOURCE(Cref, Var, Lvl)
IF_EXISTS_CLOSEST_BIOSOURCE.
#define FOR_EACH_KEYWORD_ON_GENBANKBLOCK(Itr, Var)
FOR_EACH_KEYWORD_ON_GENBANKBLOCK EDIT_EACH_KEYWORD_ON_GENBANKBLOCK.
CSubSource::TSubtype TSUBSOURCE_SUBTYPE
#define NCBI_GENOME(Type)
@NAME Convenience macros for NCBI objects
#define FOR_EACH_SYNONYM_ON_GENEREF(Itr, Var)
FOR_EACH_SYNONYM_ON_GENEREF EDIT_EACH_SYNONYM_ON_GENEREF.
#define NCBI_ORGMOD(Type)
COrgMod definitions.
#define FOR_EACH_GBQUAL_ON_SEQFEAT(Itr, Var)
FOR_EACH_GBQUAL_ON_SEQFEAT EDIT_EACH_GBQUAL_ON_SEQFEAT.
#define FOR_EACH_SUBSOURCE_ON_BIOSOURCE(Itr, Var)
FOR_EACH_SUBSOURCE_ON_BIOSOURCE EDIT_EACH_SUBSOURCE_ON_BIOSOURCE.
COrgMod::TSubtype TORGMOD_SUBTYPE
#define FOR_EACH_SEQFEATXREF_ON_SEQFEAT(Itr, Var)
FOR_EACH_SEQFEATXREF_ON_SEQFEAT EDIT_EACH_SEQFEATXREF_ON_SEQFEAT.
#define FOR_EACH_DBXREF_ON_FEATURE
#define NCBI_SEQID(Type)
@NAME Convenience macros for NCBI objects
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define FOR_EACH_STRING_IN_LIST(Itr, Var)
FOR_EACH_STRING_IN_LIST EDIT_EACH_STRING_IN_LIST.
#define RAW_FIELD_IS_EMPTY_OR_UNSET(Var, Fld)
RAW_FIELD_IS_EMPTY_OR_UNSET macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
#define FOR_EACH_CHAR_IN_STRING(Itr, Var)
FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.
bool seq_mac_is_unique(Iterator iter1, Iterator iter2, Predicate pred)
#define BEGIN_COMMA_END(container)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
CSeqFeatData::ESubtype feat_subtype
bool operator()(const CTempString &lhs, const CTempString &rhs) const
bool operator()(const CTempString &lhs, const CTempString &rhs) const
Selector used in CSeqMap methods returning iterators.
map< string, string > TViralMap
bool HasExcludedAnnotation(const CSeq_loc &loc, CBioseq_Handle far_bsh)
static bool s_NotPeptideException(const CSeq_feat &curr, const CSeq_feat &prev)
static char CheckForBadFileIDSeqIdChars(const string &id)
bool s_ContainedIn(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
bool s_FieldHasLabel(const CUser_field &field, const string &label)
bool s_AfterIsGapORN(TSeqPos pos, TSeqPos after, TSeqPos len, const CSeqVector &vec)
static TViralMap * s_InitializeViralMap()
bool s_CheckIntervals(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
MAKE_CONST_MAP(kViralStrandMap, string, string, { {"root", "dsDNA"}, {"Alphasatellitidae", "ssDNA"}, {"Anelloviridae", "ssDNA(-)"}, {"Bacilladnaviridae", "ssDNA"}, {"Bidnaviridae", "ssDNA"}, {"Circoviridae", "ssDNA(+/-)"}, {"Geminiviridae", "ssDNA(+/-)"}, {"Genomoviridae", "ssDNA"}, {"Hepadnaviridae", "dsDNA-RT"}, {"Inoviridae", "ssDNA(+)"}, {"Microviridae", "ssDNA(+)"}, {"Nanoviridae", "ssDNA(+)"}, {"Ortervirales", "ssRNA-RT"}, {"Caulimoviridae", "dsDNA-RT"}, {"Parvoviridae", "ssDNA(+/-)"}, {"Alphapleolipovirus", "dsDNA; ssDNA"}, {"Riboviria", "RNA"}, {"Albetovirus", "ssRNA(+)"}, {"Alphatetraviridae", "ssRNA(+)"}, {"Alvernaviridae", "ssRNA(+)"}, {"Amalgaviridae", "dsRNA"}, {"Astroviridae", "ssRNA(+)"}, {"Aumaivirus", "ssRNA(+)"}, {"Avsunviroidae", "ssRNA"}, {"Barnaviridae", "ssRNA(+)"}, {"Benyviridae", "ssRNA(+)"}, {"Birnaviridae", "dsRNA"}, {"Botourmiaviridae", "ssRNA(+)"}, {"Botybirnavirus", "dsRNA"}, {"Bromoviridae", "ssRNA(+)"}, {"Caliciviridae", "ssRNA(+)"}, {"Carmotetraviridae", "ssRNA(+)"}, {"Chrysoviridae", "dsRNA"}, {"Closteroviridae", "ssRNA(+)"}, {"Cystoviridae", "dsRNA"}, {"Deltavirus", "ssRNA(-)"}, {"dsRNA viruses", "dsRNA"}, {"Endornaviridae", "dsRNA"}, {"Flaviviridae", "ssRNA(+)"}, {"Hepeviridae", "ssRNA(+)"}, {"Hypoviridae", "ssRNA(+)"}, {"Idaeovirus", "ssRNA(+)"}, {"Kitaviridae", "ssRNA(+)"}, {"Leviviridae", "ssRNA(+)"}, {"Luteoviridae", "ssRNA(+)"}, {"Matonaviridae", "ssRNA(+)"}, {"Megabirnaviridae", "dsRNA"}, {"Narnaviridae", "ssRNA(+)"}, {"Haploviricotina", "ssRNA(-)"}, {"Arenaviridae", "ssRNA(+/-)"}, {"Coguvirus", "ssRNA(-)"}, {"Cruliviridae", "ssRNA(-)"}, {"Fimoviridae", "ssRNA(-)"}, {"Hantaviridae", "ssRNA(-)"}, {"Leishbuviridae", "ssRNA(-)"}, {"Mypoviridae", "ssRNA(-)"}, {"Nairoviridae", "ssRNA(-)"}, {"Peribunyaviridae", "ssRNA(-)"}, {"Phasmaviridae", "ssRNA(-)"}, {"Banyangvirus", "ssRNA(+/-)"}, {"Beidivirus", "ssRNA(-)"}, {"Goukovirus", "ssRNA(-)"}, {"Horwuvirus", "ssRNA(-)"}, {"Hudivirus", "ssRNA(-)"}, {"Hudovirus", "ssRNA(-)"}, {"Kabutovirus", "ssRNA(-)"}, {"Laulavirus", "ssRNA(-)"}, {"Mobuvirus", "ssRNA(-)"}, {"Phasivirus", "ssRNA(-)"}, {"Phlebovirus", "ssRNA(+/-)"}, {"Pidchovirus", "ssRNA(-)"}, {"Tenuivirus", "ssRNA(-)"}, {"Wenrivirus", "ssRNA(-)"}, {"Wubeivirus", "ssRNA(-)"}, {"Tospoviridae", "ssRNA(+/-)"}, {"Wupedeviridae", "ssRNA(-)"}, {"Insthoviricetes", "ssRNA(-)"}, {"Nidovirales", "ssRNA(+)"}, {"Nodaviridae", "ssRNA(+)"}, {"Papanivirus", "ssRNA(+)"}, {"Partitiviridae", "dsRNA"}, {"Permutotetraviridae", "ssRNA(+)"}, {"Picobirnaviridae", "dsRNA"}, {"Picornavirales", "ssRNA(+)"}, {"Pospiviroidae", "ssRNA"}, {"Potyviridae", "ssRNA(+)"}, {"Quadriviridae", "dsRNA"}, {"Reoviridae", "dsRNA"}, {"Sarthroviridae", "ssRNA(+)"}, {"Sinaivirus", "ssRNA(+)"}, {"Solemoviridae", "ssRNA(+)"}, {"Solinviviridae", "ssRNA(+)"}, {"Togaviridae", "ssRNA(+)"}, {"Tombusviridae", "ssRNA(+)"}, {"Totiviridae", "dsRNA"}, {"Tymovirales", "ssRNA(+)"}, {"Virgaviridae", "ssRNA(+)"}, {"Virtovirus", "ssRNA(+)"}, {"ssRNA viruses", "ssRNA"}, {"unclassified ssRNA viruses", "ssRNA"}, {"unclassified ssRNA negative-strand viruses", "ssRNA(-)"}, {"unclassified ssRNA positive-strand viruses", "ssRNA(+)"}, {"unclassified viroids", "ssRNA"}, {"DNA satellites", "DNA"}, {"RNA satellites", "RNA"}, {"Smacoviridae", "ssDNA"}, {"Spiraviridae", "ssDNA(+)"}, {"Tolecusatellitidae", "ssDNA"}, {"unclassified viruses", "unknown"}, {"unclassified DNA viruses", "DNA"}, {"unclassified archaeal dsDNA viruses", "dsDNA"}, {"unclassified dsDNA phages", "dsDNA"}, {"unclassified dsDNA viruses", "dsDNA"}, {"unclassified ssDNA bacterial viruses", "ssDNA"}, {"unclassified ssDNA viruses", "ssDNA"}, {"environmental samples", "unknown"}, })
static bool s_MatchPartialType(const CSeq_loc &loc1, const CSeq_loc &loc2, unsigned int partial_type)
@ e_RnaPosition_MIDDLE_RIBOSOMAL_SUBUNIT
@ e_RnaPosition_INTERNAL_SPACER_X
@ e_RnaPosition_LEFT_RIBOSOMAL_SUBUNIT
@ e_RnaPosition_INTERNAL_SPACER_2
@ e_RnaPosition_RIGHT_RIBOSOMAL_SUBUNIT
@ e_RnaPosition_INTERNAL_SPACER_1
static bool s_SubsequentIntron(CFeat_CI feat_ci_dup, Int4 start, Int4 stop, Int4 max)
#define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var)
static bool s_GetFlankingGapTypes(const CSeq_inst &inst, CSeq_gap::TType &fst, CSeq_gap::TType &lst)
static bool s_SeqIdMatch(const CConstRef< CSeq_id > &q1, const CConstRef< CSeq_id > &q2)
bool x_IsPseudo(const CGene_ref &ref)
static const char * linkEvStrings[]
static bool s_LocSortCompare(const CConstRef< CSeq_loc > &q1, const CConstRef< CSeq_loc > &q2)
static int CountNs(const CSeq_data &seq_data, TSeqPos len)
bool s_BeforeIsGapOrN(TSeqPos pos, TSeqPos before, const CSeqVector &vec)
static bool x_BadCDSinVDJC(const CSeq_loc &cdsloc, const CSeq_loc &vdjcloc, CScope *scope)
static bool x_FeatIsVDJC(const CSeq_feat &ft)
static int s_MaxNsInSeqLitForTech(CMolInfo::TTech tech)
unsigned int s_IdXrefsNotReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)
bool s_IsCDDFeat(const CMappedFeat &feat)
static EDiagSev GetBioseqEndWarning(const CBioseq &seq, bool is_circular, EBioseqEndIsType end_is_char)
bool s_CheckPosNOrGap(TSeqPos pos, const CSeqVector &vec)
bool s_DbtagEqual(const CRef< CDbtag > &dbt1, const CRef< CDbtag > &dbt2)
static bool x_FeatIsCDS(const CSeq_feat &ft)
bool s_HasGI(const CBioseq &seq)
bool s_AfterIsGap(TSeqPos pos, TSeqPos after, TSeqPos len, const CSeqVector &vec)
static optional< int > s_MaxSeqStretchIfLessThanThreshold(const CSeqVector &vec, int threshold)
bool HasUnverified(CBioseq_Handle bsh)
static bool s_OrgModEqual(const CRef< COrgMod > &om1, const CRef< COrgMod > &om2)
string s_GetMrnaProductString(const CSeq_feat &mrna)
static bool s_SubsourceEquivalent(const CRef< CSubSource > &st1, const CRef< CSubSource > &st2)
bool x_HasNamedQual(const CSeq_feat &feat, const string &qual)
static char CheckForBadSeqIdChars(const string &id)
static string s_GetKeywordForStructuredComment(const CUser_object &obj)
TGi GetGIForSeqId(const CSeq_id &id, CScope &scope)
bool StrandsMatch(ENa_strand s1, ENa_strand s2)
static CBioseq_Handle s_GetParent(const CBioseq_Handle &part)
static ERnaPosition s_RnaPosition(const CSeq_feat &feat)
bool s_AreAdjacent(ERnaPosition pos1, ERnaPosition pos2)
bool lists_match(Iterator iter1, Iterator iter1_stop, Iterator iter2, Iterator iter2_stop, Predicate pred)
static bool s_IsConWithGaps(const CBioseq &seq)
static bool s_BiosrcFullLengthIsOk(const CBioSource &src)
static bool s_StandaloneProt(const CBioseq_Handle &bsh)
static TSeqPos s_GetDeltaLen(const CDelta_seq &seg, CScope *scope)
bool s_IdXrefsAreReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)
static bool HasAssemblyOrNullGap(const CBioseq &seq)
static bool s_IsTPAAssemblyOkForBioseq(const CBioseq &seq, bool has_refseq)
bool s_HasTpaUserObject(CBioseq_Handle bsh)
static bool s_OrgrefEquivalent(const COrg_ref &org1, const COrg_ref &org2)
bool s_GeneralTagsMatch(const string &protein_id, const CDbtag &dbtag)
static bool s_WillReportTerminalGap(const CBioseq &seq, CBioseq_Handle bsh)
string s_GetMrnaProteinLink(const CUser_field &field)
static bool s_ReportableCollision(const CGene_ref &g1, const CGene_ref &g2)
static char CheckForBadLocalIdChars(const string &id)
bool s_BeforeIsGap(TSeqPos pos, TSeqPos before, const CSeqVector &vec)
static bool s_IsSkippableDbtag(const CDbtag &dbt)
static void s_MakePubLabelString(const CPubdesc &pd, string &label)
static void s_GetGeneTextLabel(const CSeq_feat &feat, string &label)
static vector< int > s_LocationToStartStopPairs(const CSeq_loc &loc)
static void GetDateString(string &out_date_str, const CDate &date)
static bool s_SeqIdCompare(const CConstRef< CSeq_id > &q1, const CConstRef< CSeq_id > &q2)
static bool s_IsSwissProt(const CBioseq &seq)
bool s_FieldHasNonBlankValue(const CUser_field &field)
static bool s_IsUnspecified(const CSeq_gap &gap)
static bool s_SuppressMultipleEquivBioSources(const CBioSource &src)
bool s_OverlapOrAbut(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
static bool x_IsWgsSecondary(const CBioseq &seq)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4