(bioseq.
IsAa()) {
85 autobsh =
context.GetBioseqHandle(bioseq);
87sequence::CDeflineGenerator deflineGenerator;
88 autodefline = deflineGenerator.GenerateDefline(bsh, 0);
91 for(
const auto& desc :
context.GetSeqdesc()) {
93m_Objs[defline].Add(*
context.SeqdescObjRef(desc));
98m_Objs[defline].Add(*
context.BioseqObjRef());
106 if(m_Objs.empty()) {
109 boolall_unique =
true;
111 for(
auto& it : m_Objs.GetMap()) {
113 if(list.size() == 1) {
116 else if(list.size() > 1) {
125m_ReportItems =
tmp.Export(*this)->GetSubitems();
137m_Objs[
"[n] sequence[s] [has] terminal Ns"].Fatal().Add(*
context.BioseqObjRef());
151m_Objs[
"[n] protein sequences are shorter than 50 aa."].Add(*
context.BioseqObjRef(),
false);
161 for(
const auto& desc :
context.GetSeqdesc()) {
162 if(desc.IsComment()) {
163m_Objs[desc.GetComment()].Add(*
context.SeqdescObjRef(desc));
171 if(!m_Objs.empty()) {
173 string label= m_Objs.GetMap().size() == 1 ?
"[n] comment descriptor[s] were found (all same)":
"[n] comment descriptor[s] were found (some different)";
174 for(
autoit : m_Objs.GetMap()) {
175 for(
autoobj : it.second->GetObjects()) {
186 DISCREPANCY_CASE(MRNA_ON_WRONG_SEQUENCE_TYPE, SEQUENCE,
eDisc|
eOncaller,
"Eukaryotic sequences that are not genomic or macronuclear should not have mRNA features")
204m_Objs[
"[n] mRNA[s] [is] located on eukaryotic sequence[s] that [does] not have genomic or plasmid source[s]"].Add(*
context.SeqFeatObjRef(*feat));
218 boolhas_gaps = !!sum.
Gaps;
221 for(
autoit : bioseq.
GetAnnot()) {
222 if(it->IsFtable()) {
249 for(
const auto& desc :
context.GetAllSeqdesc()) {
253 for(
const auto& user_field : user.
GetData()) {
254 if(user_field->IsSetLabel() && user_field->GetLabel().IsStr() && user_field->GetLabel().GetStr() ==
"BioProject"&& user_field->IsSetData() && user_field->GetData().IsStrs()) {
256 if(!strs.empty() && !strs[0].empty()) {
257m_Objs[
"[n] sequence[s] contain[S] BioProject IDs"].Add(*
context.BioseqObjRef());
275 for(
const auto& desc :
context.GetAllSeqdesc()) {
279 for(
const auto& user_field : user.
GetData()) {
280 if(user_field->IsSetLabel() && user_field->GetLabel().IsStr() && user_field->GetLabel().GetStr() ==
"Sequence Read Archive"&& user_field->IsSetData() && user_field->GetData().IsStrs()) {
282 if(!strs.empty() && !strs[0].empty()) {
283m_Objs[
"[n] sequence[s] contain[S] Sequence Read Archive IDs"].Add(*
context.BioseqObjRef());
301m_Objs[
"[n] bioseq[s] [has] no definition line"].Add(*
context.BioseqObjRef());
314 if(sum.
MaxN> 14) {
315m_Objs[
"[n] sequence[s] [has] runs of 15 or more Ns"].Add(*
context.BioseqObjRef());
329m_Objs[
"[n] sequence[s] [has] external references"].Add(*
context.BioseqObjRef());
339 const doubleMIN_N_PERCENTAGE = 10.0;
344 if(!sum.
HasRef&& sum.
N* 100. / sum.
Len> MIN_N_PERCENTAGE) {
345m_Objs[
"[n] sequence[s] [has] > 10% Ns"].Add(*
context.BioseqObjRef());
356 for(
const auto& feat :
context.GetFeat()) {
361m_Objs[
key+
": [n] present"].Info().Incr();
370 for(
const auto& feat :
context.GetAllFeat()) {
375 key= to_string(feat.GetData().GetSubtype()) +
" "+
key;
378m_Objs[
kEmptyCStr][na ?
"N":
"A"].Add(*rep);
386 for(
auto& it : m_Objs[
kEmptyCStr].GetMap()) {
387 if(it.first ==
"N"|| it.first ==
"A") {
390 size_t n= it.first.find(
' ');
391 string key= it.first.substr(
n+ 1);
393 string label=
key+
": [n] present";
405 for(
auto& obj : m_Objs[
kEmptyStr][it.first].GetObjects()) {
408 for(
auto& pp : obj2num) {
409m_Objs[
label][
"[n] bioseq[s] [has] [(]"+ to_string(pp.second) +
"[)] "+
key+
" features"].Info().Add(*pp.first);
424 if(
context.FeatExons().size()) {
478 if(m_Objs.empty()) {
489 size_tnum_of_missing = 0,
492 for(
autoit : the_map) {
493num_of_bioseqs += it.second->GetObjects().
size();
494 if(it.first.empty()) {
495num_of_missing += it.second->GetObjects().size();
501 else if(tech != it.first) {
506 if(num_of_missing == num_of_bioseqs || (same && !num_of_missing)) {
509summary += num_of_missing ?
"some missing, ":
"all present, ";
510summary += same ?
"all same)":
"some different)";
511 if(num_of_missing) {
512 if(num_of_missing == num_of_bioseqs) {
513report[summary].
SetCount(num_of_missing);
528 return(ch ==
'A'|| ch ==
'T'|| ch ==
'G'|| ch ==
'C');
534 static const size_tMIN_TITLE_SEQ_LEN = 19;
537 for(string::const_reverse_iterator it = title.rbegin(); it != title.rend(); ++it) {
544 if(
count>= MIN_TITLE_SEQ_LEN) {
549 return count>= MIN_TITLE_SEQ_LEN;
555 for(
auto& desc :
context.GetSeqdesc()) {
557m_Objs[
"[n] defline[s] appear[S] to end with sequence characters"].Add(*
context.SeqdescObjRef(desc));
567 boolis_dna =
false;
568 boolis_genomic =
false;
573 automolinfo =
context.GetMolinfo();
577 if(!is_genomic || !is_dna) {
580 for(
auto& annot_it : bioseq.
GetAnnot()) {
581 if(annot_it->IsFtable()) {
589 if(feat->IsSetData()) {
608 if(descrs.
IsSet()) {
609 for(
autodescr : descrs.
Set()) {
610 if(descr->IsMolinfo()) {
611molinfo = &(descr->SetMolinfo());
616 if(molinfo ==
nullptr) {
619descrs.
Set().push_back(new_descr);
621 if(molinfo ==
nullptr) {
668 const string& object_name,
669 const string& field_prefix =
kEmptyStr)
674 for(
auto& z : obj.second->GetMap()) {
675collector[field_prefix + z.first][
" [n] "+ object_name +
"[s] [is] missing field "+ field_prefix + z.first]
686 if(
f->IsSetLabel() &&
f->GetLabel().IsStr() &&
f->IsSetData()) {
687 stringfield_name = field_prefix +
f->GetLabel().GetStr();
689 if(already_seen && !collector.
Exist(field_name)) {
691 stringmissing_label =
"[n] "+ object_name +
"[s] [is] missing field "+ field_name;
695collector[field_name][missing_label].
Add(*ro);
698collector[field_name][
"[n] "+ object_name +
"[s] [has] field "+ field_name +
" value '"+
GetFieldValueAsString(*
f) +
"'"].
Add(*
context.SeqdescObjRef(*desc),
false);
707collector[field_prefix + z.first][
" [n] "+ object_name +
"[s] [is] missing field "+ field_prefix + z.first].
Add(*
context.SeqdescObjRef(*desc));
725 autorep_seq =
context.BioseqObjRef();
726 for(
auto& desc :
context.GetAllSeqdesc()) {
747 size_tnum_values = 0;
750 for(
auto& s : node.
GetMap()) {
752all_present =
false;
757 value= s.first.substr(pos);
765 if(num_values > 1) {
780 for(
auto& s : node.
GetMap()) {
781 boolthis_present =
true;
782 boolthis_same =
true;
784all_present &= this_present;
785all_same &= this_same;
786 if(!all_present && !all_same) {
795 stringsummary =
"(";
797summary +=
"all present";
799summary +=
"some missing";
803summary +=
"all same";
805summary +=
"inconsistent";
814 for(
auto& s : original.
GetMap()){
815 for(
autoq : s.second->GetObjects()) {
816new_home[s.first].
Add(*q);
827 if(
NStr::Equal(orig_field_name,
"BioSample")) {
828 return " "+ orig_field_name;
829}
else if(
NStr::Equal(orig_field_name,
"ProbeDB")) {
830 return " "+ orig_field_name;
831}
else if(
NStr::Equal(orig_field_name,
"Sequence Read Archive")) {
832 return " "+ orig_field_name;
833}
else if(
NStr::Equal(orig_field_name,
"BioProject")) {
834 return " "+ orig_field_name;
835}
else if(
NStr::Equal(orig_field_name,
"Assembly")) {
836 return " "+ orig_field_name;
838 returnorig_field_name;
847 if(m_Objs.empty()) {
852 boolall_present =
true;
853 boolall_same =
true;
855 if(all_present && all_same) {
859 stringtop_label =
"DBLink Report "+
GetSummaryLabel(all_present, all_same);
862 while(it != m_Objs.GetMap().end()) {
865 CopyNode(m_Objs[top_label][
" "+ it->first], *it->second);
866it = m_Objs.GetMap().erase(it);
873 boolthis_present =
true;
874 boolthis_same =
true;
877 for(
auto& s : it2.second->GetMap()){
878 for(
auto& q : s.second->GetObjects()) {
879m_Objs[top_label][new_label][s.first].Add(*q);
901 autorep_seq =
context.BioseqObjRef();
902 for(
auto& desc :
context.GetAllSeqdesc()) {
918m_Objs[
"[n] Bioseq[s] [is] missing "+ it.first +
" structured comment"].Add(*rep_seq);
926m_Objs[
"[n] Bioseq[s] [is] missing "+ it.first +
" structured comment"].Add(*ro);
948 if(m_Objs.empty()) {
953 boolall_present =
true;
954 boolall_same =
true;
956 if(all_present && all_same) {
960 stringtop_label =
"Structured Comment Report "+
GetSummaryLabel(all_present, all_same);
963 while(it != m_Objs.GetMap().end()) {
966 CopyNode(m_Objs[top_label][
" "+ it->first], *it->second);
967it = m_Objs.GetMap().erase(it);
974 boolthis_present =
true;
975 boolthis_same =
true;
977 stringnew_label = it2.first +
" "+
GetSummaryLabel(this_present, this_same);
978 for(
auto& s : it2.second->GetMap()) {
979 stringsub_label = s.first;
980 if(this_present && this_same) {
983 for(
auto& q : s.second->GetObjects()) {
984m_Objs[top_label][new_label][sub_label].Add(*q);
1000 for(
auto& desc :
context.GetAllSeqdesc()) {
1001 if(!desc.IsUser()) {
1017m_Objs[
"[n] Assembly Name[s] in Genome Assembly Structured Comment"].Add(*
context.BioseqObjRef());
1030 for(
auto& desc :
context.GetAllSeqdesc()) {
1031 if(desc.IsUser()) {
1038m_Objs[
"[n] sequence[s] [does] not include structured comments."].Add(*
context.BioseqObjRef());
1049 for(
auto& desc :
context.GetAllSeqdesc()) {
1050 if(desc.IsUser()) {
1054 for(
auto& it : user.
GetData()) {
1055 if(it->IsSetLabel() && it->GetLabel().IsStr() &&
NStr::Equal(it->GetLabel().GetStr(),
"BioProject")) {
1066m_Objs[
"[n] sequence[s] [does] not include project."].Add(*
context.BioseqObjRef());
1077 for(
auto& desc :
context.GetAllSeqdesc()) {
1078 if(desc.IsUser()) {
1081m_Objs[
"[n] sequence[s] [is] unverified"].Add(*
context.BioseqObjRef(),
false);
1111m_Objs[
"[n] sequence[s] contain[S] nucleotides that are not ATCG or N"].Add(*
context.BioseqObjRef());
1127 if(
source&&
source->IsSource() &&
source->GetSource().IsSetOrg() &&
source->GetSource().GetOrg().IsSetTaxname() && title) {
1128 stringtaxname =
source->GetSource().GetOrg().GetTaxname();
1135 boolno_taxname_in_defline =
false;
1137 if(taxname_pos ==
NPOS) {
1138no_taxname_in_defline =
true;
1142no_taxname_in_defline =
NStr::CompareCase(title->
GetTitle().c_str() + taxname_pos, 1, taxname.size() - 1, taxname.c_str() + 1) != 0;
1144no_taxname_in_defline =
true;
1147 if(no_taxname_in_defline) {
1161 for(
autofield: user.
GetData()) {
1162 if(field->IsSetData() && field->GetData().IsInt() && field->IsSetLabel() && field->GetLabel().IsStr() && field->GetLabel().GetStr() ==
"ProjectID") {
1175 for(
auto& desc :
context.GetAllSeqdesc()) {
1176 if(desc.IsUser()) {
1180 if(!proj_id.empty()) {
1181m_Objs[proj_id][bioseq.
IsNa() ?
"N":
"A"].Add(*
context.BioseqObjRef());
1192 if(m_Objs.empty()) {
1196 string all=
"[n] sequence[s] [has] project IDs ";
1197 stringprots =
"[n] protein sequence[s] [has] project IDs ";
1198 stringnucs =
"[n] nucleotide sequence[s] [has] project IDs ";
1199 auto& projects = m_Objs.GetMap();
1200 all+= projects.size() > 1 ?
"(some different)":
"(all same)";
1201 size_tcount_prots = 0;
1202 size_tcount_nucs = 0;
1203 for(
autoit: projects) {
1204 auto&
M= it.second->GetMap();
1205 if(
M.find(
"A") !=
M.end()) {
1208 if(
M.find(
"N") !=
M.end()) {
1212prots += count_prots > 1 ?
"(some different)":
"(all same)";
1213nucs += count_nucs > 1 ?
"(some different)":
"(all same)";
1214 for(
autoit : projects) {
1215 auto&
M= it.second->GetMap();
1216 if(
M.find(
"A") !=
M.end()) {
1218res[
all][prots].
Add(*obj);
1221 if(
M.find(
"N") !=
M.end()) {
1223res[
all][nucs].
Add(*obj);
1238 auto& cds =
context.FeatCDS();
1239 if(cds.size() < 2) {
1242 size_tcount_pseudo = 0;
1243 size_tcount_disrupt = 0;
1244 for(
autofeat : cds) {
1245 if(feat->IsSetComment() &&
NStr::Find(feat->GetComment(),
"coding region disrupted by sequencing gap") !=
NPOS) {
1248 if(
context.IsPseudo(*feat)) {
1252 if(count_disrupt != cds.size() && count_pseudo != cds.size()) {
1253m_Objs[
"[n] mRNA bioseq[s] [has] multiple CDS features"].Add(*
context.BioseqObjRef());
1267 auto& cds =
context.FeatCDS();
1268 size_tcount_plus = 0;
1269 size_tcount_minus = 0;
1270 for(
auto& feat : cds) {
1295vector<CSeq_feat*> features;
1297 for(; feat_ci; ++feat_ci) {
1302new_inst->Assign(bioseq.
GetInst());
1306 for(
auto& feat : features) {
1319 const size_tMAX_N_IN_SEQ = 7;
1323 if(sum.
MinQ> MAX_N_IN_SEQ) {
1324m_Objs[
"[n] sequence[s] contain[S] low quality region"].Add(*
context.BioseqObjRef());
1335 if(
set.IsSetDescr()) {
1336 for(
const auto& descr :
set.GetDescr().Get()) {
1337 if(descr->IsTitle()) {
1338m_Objs[
"[n] title[s] on sets were found"].Add(*
context.SeqdescObjRef(*descr));
1347m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1358 boolhas_D_loop =
false;
1359 boolhas_misc_feat_with_control_region =
false;
1360 for(
auto& feat :
all) {
1361 if(feat->IsSetData()) {
1367 if(feat->IsSetComment() &&
NStr::FindNoCase(feat->GetComment(),
"control region") !=
NPOS) {
1368has_misc_feat_with_control_region =
true;
1374 if(has_D_loop || has_misc_feat_with_control_region) {
1375m_Objs[
"[n] bioseq[s] [has] D-loop or control region misc_feature, but [is] do not have mitochondrial source"].Add(*
context.BioseqObjRef(
CDiscrepancyContext::eFixSet));
1383m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1388 static boolFixGenome(
const CBioseq& bioseq,
CScope& scope)
1433m_Objs[
"[n] sequence[s] [is] shorter than 50 nt"].Add(*
context.BioseqObjRef());
1452 for(
auto& annot_it : bioseq.
GetAnnot()) {
1453 if(annot_it->IsFtable()) {
1458m_Objs[
"[n] contig[s] [is] shorter than 200 nt"].Add(*
context.BioseqObjRef(fix));
1465m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1487m_Objs[
"[n] RNA bioseq[s] [is] proviral"].Add(*
context.BioseqObjRef());
1495m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1507 if((bio_src.*is_set_fn)()) {
1508 if(
val.empty()) {
1509 val= (bio_src.*get_fn)();
1511 else if(
val!= (bio_src.*get_fn)()) {
1524 if(
mod->IsSetSubtype() &&
mod->GetSubtype() == subtype &&
mod->IsSetSubname()) {
1525 if(
val.empty()) {
1526 val=
mod->GetSubname();
1529 if(
mod->GetSubname() !=
val) {
1545 for(
const auto& subtype : bio_src.
GetSubtype()) {
1560 stringtaxname, isolate, strain;
1561 boolall_taxname_same =
true, all_isolate_same =
true, all_strain_same =
true;
1562 for(
auto& descr_bio_src :
context.GetSetBiosources()) {
1563 const CBioSource& bio_src = descr_bio_src->GetSource();
1564 if(
context.HasLineage(bio_src,
"",
"Viruses")) {
1566m_Objs[
"[n] biosource[s] should have segment qualifier but [does] not"].Add(*
context.SeqdescObjRef(*descr_bio_src));
1569 if(all_taxname_same) {
1572 if(all_isolate_same) {
1575 if(all_strain_same) {
1579 if(!all_taxname_same) {
1580m_Objs[
"Not all biosources have same taxname"];
1582 if(!all_isolate_same) {
1583m_Objs[
"Not all biosources have same isolate"];
1585 if(!all_strain_same) {
1586m_Objs[
"Not all biosources have same strain"];
1594m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1604 for(
auto& qual : feat.
GetQual()) {
1626 for(
auto& feat :
context.GetFeat()) {
1636 if(
set.IsSetClass()) {
1641m_Objs[
"[n] unwanted set wrapper[s]"].Add(*
context.BioseqSetObjRef());
1658{
"Agricultutral",
"agricultural",
false},
1659{
"Bacilllus",
"Bacillus",
false},
1660{
"Enviromental",
"Environmental",
false},
1661{
"Insitiute",
"institute",
false},
1662{
"Instutite",
"institute",
false},
1663{
"Instutute",
"Institute",
false},
1664{
"P.R.Chian",
"P.R. China",
false},
1665{
"PRChian",
"PR China",
false},
1666{
"Scieces",
"Sciences",
false},
1667{
"agricultral",
"agricultural",
false},
1668{
"agriculturral",
"agricultural",
false},
1669{
"biotechnlogy",
"biotechnology",
false},
1670{
"Biotechnlogy",
"Biotechnology",
false},
1671{
"biotechnolgy",
"biotechnology",
false},
1672{
"biotechology",
"biotechnology",
false},
1673{
"caputre",
"capture",
true},
1674{
"casette",
"cassette",
true},
1675{
"catalize",
"catalyze",
false},
1676{
"charaterization",
"characterization",
false},
1677{
"clonging",
"cloning",
false},
1678{
"consevered",
"conserved",
false},
1679{
"cotaining",
"containing",
false},
1680{
"cytochome",
"cytochrome",
true},
1681{
"diveristy",
"diversity",
true},
1682{
"enivronment",
"environment",
false},
1683{
"enviroment",
"environment",
false},
1684{
"genone",
"genome",
true},
1685{
"homologue",
"homolog",
true},
1686{
"hypotethical",
"hypothetical",
false},
1687{
"hypotetical",
"hypothetical",
false},
1688{
"hypothetcial",
"hypothetical",
false},
1689{
"hypothteical",
"hypothetical",
false},
1690{
"indepedent",
"independent",
false},
1691{
"insititute",
"institute",
false},
1692{
"insitute",
"institute",
false},
1693{
"institue",
"institute",
false},
1694{
"instute",
"institute",
false},
1695{
"muesum",
"museum",
true},
1696{
"musuem",
"museum",
true},
1697{
"nuclear shutting",
"nuclear shuttling",
true},
1698{
"phylogentic",
"phylogenetic",
false},
1699{
"protien",
"protein",
false},
1700{
"puatative",
"putative",
false},
1701{
"putaitve",
"putative",
false},
1702{
"putaive",
"putative",
false},
1703{
"putataive",
"putative",
false},
1704{
"putatitve",
"putative",
false},
1705{
"putatuve",
"putative",
false},
1706{
"putatvie",
"putative",
false},
1707{
"pylogeny",
"phylogeny",
false},
1708{
"resaerch",
"research",
false},
1709{
"reseach",
"research",
false},
1710{
"reserach",
"research",
true},
1711{
"reserch",
"research",
false},
1712{
"ribosoml",
"ribosomal",
false},
1713{
"ribossomal",
"ribosomal",
false},
1714{
"scencies",
"sciences",
false},
1715{
"scinece",
"science",
false},
1716{
"simmilar",
"similar",
false},
1717{
"structual",
"structural",
false},
1718{
"subitilus",
"subtilis",
false},
1719{
"sulfer",
"sulfur",
false},
1720{
"technlogy",
"technology",
false},
1721{
"technolgy",
"technology",
false},
1722{
"Technlogy",
"Technology",
false},
1723{
"Veterinry",
"Veterinary",
false},
1724{
"Argricultural",
"Agricultural",
false},
1725{
"transcirbed",
"transcribed",
false},
1726{
"transcirption",
"transcription",
true},
1727{
"uiniversity",
"university",
false},
1728{
"uinversity",
"university",
false},
1729{
"univercity",
"university",
false},
1730{
"univerisity",
"university",
false},
1731{
"univeristy",
"university",
false},
1732{
"univesity",
"university",
false},
1733{
"unversity",
"university",
true},
1734{
"uviversity",
"university",
false},
1735{
"anaemia",
nullptr,
false},
1736{
"haem",
nullptr,
false},
1737{
"haemagglutination",
nullptr,
false},
1738{
"heam",
nullptr,
false},
1739{
"mithocon",
nullptr,
false},
1747 #include "FLATFILE_FIND.inc" 1748 staticconstexpr TLocalFSM s_FSM{s_compact, s_hits_init_1, s_hits_init_2, s_states,
nullptr};
1760 string error=
"String not found: ";
1773 "FLATFILE_FIND_ONCALLER",
1774 "FLATFILE_FIND_ONCALLER_UNFIXABLE",
1775 "FLATFILE_FIND_ONCALLER_FIXABLE" 1778 static const stringkFixable =
"Fixable";
1779 static const stringkNonFixable =
"Non-fixable";
1782 for(
auto& desc :
context.GetAllSeqdesc()) {
1789 stringsubitem =
string(
"[n] object[s] contain[S] ") +
kSpellFixes[
i].m_misspell;
1790 boolautofix =
kSpellFixes[
i].m_correct !=
nullptr;
1791 const string& fixable = (autofix ? kFixable : kNonFixable);
1792m_Objs[fixable][subitem].Add(*
context.SeqdescObjRef(desc, &desc));
1796 for(
auto& feat:
context.FeatAll()) {
1803 stringsubitem =
string(
"[n] object[s] contain[S] ") +
kSpellFixes[
i].m_misspell;
1804 boolautofix =
kSpellFixes[
i].m_correct !=
nullptr;
1805 const string& fixable = (autofix ? kFixable : kNonFixable);
1806m_Objs[fixable][subitem].Add(*
context.SeqFeatObjRef(*feat, feat));
1815m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1884 if(m_Objs.GetMap().find(
kEmptyStr) == m_Objs.GetMap().end()) {
1886m_Objs[
"No sequences longer than 20,000 nt found"];
1891m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1900 if(m_Objs[
"N"].GetCount()) {
1908m_Objs[
"C"].Incr();
1909 if(!m_Objs[
"F"].GetCount()) {
1911 for(
auto id: bioseq.
GetId()) {
1916m_Objs[
"F"].Incr();
1923 for(
const auto& descr : bioseq.
GetDescr().
Get()) {
1924 if(descr->IsMolinfo() && descr->GetMolinfo().CanGetTech()) {
1926m_Objs[
"F"].Incr();
1935m_Objs[
"N"].Incr();
1944 if(m_Objs[
"C"].GetCount() && !m_Objs[
"N"].GetCount()) {
1956 staticconstexpr
autosuspicious_id_re =
ctll::fixed_string{
"chromosome|plasmid|mito|chloroplast|apicoplast|plastid|^chr|^lg|\\bnw_|\\bnz_|\\bnm_|\\bnc_|\\bac_|cp\\d\\d\\d\\d\\d\\d|^x$|^y$|^z$|^w$|^mt$|^pltd$|^chl$"};
1957 returnctre::search<suspicious_id_re, ctre::case_insensitive>(s);
1964 boolreport =
false;
1965 for(
const auto&
id: bioseq.
GetId()) {
1966 if(id->IsLocal()) {
1967 if(id->GetLocal().IsStr() &&
SuspiciousId(id->GetLocal().GetStr())) {
1972 else if(id->IsGeneral()) {
1973 if(id->GetGeneral().IsSetDb() &&
SuspiciousId(id->GetGeneral().GetDb())) {
1977 if(id->GetGeneral().IsSetTag() && id->GetGeneral().GetTag().IsStr() &&
SuspiciousId(id->GetGeneral().GetTag().GetStr())) {
1984m_Objs[
"[n] sequence[s] [has] suspicious identifiers"].Add(*
context.BioseqSetObjRef());
1992m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
2031 if(
set.IsSetSeq_set()) {
2032 for(
const auto& se :
set.GetSeq_set()) {
2033 if(!se->IsSetDescr()) {
2037 for(
const auto& descr : se->GetDescr().Get()) {
2038 if(!descr->IsSource()) {
2041 const CBioSource& bio_src = descr->GetSource();
2053 for(
const auto& subtype : bio_src.
GetSubtype()) {
2055 if(subtype->IsSetSubtype()) {
2059m_Objs[
"one or more chromosomes are present"];
2064m_Objs[
"one or more chromosomes are present"];
2078 if(
set.IsSetSeq_set()) {
2079 for(
const auto& se :
set.GetSeq_set()) {
2080 if(!se->IsSetDescr()) {
2084 for(
const auto& descr : se->GetDescr().Get()) {
2085 if(!descr->IsSource()) {
2088 const CBioSource& bio_src = descr->GetSource();
2094 switch( Location ) {
2106m_Objs[
"one or more organelles are present"];
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
const string & GetTaxname(void) const
bool IsSetOrgMod(void) const
const COrgName & GetOrgname(void) const
bool IsSetTaxname(void) const
TSeqPos GetLength(void) const
bool IsSetLength(void) const
void Search(const char *input, VoidCall1 found_callback) const
virtual vector< CRef< CReportItem > > GetSubitems() const =0
static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)
TReportObjectList & GetObjects()
CReportNode & Severity(CReportItem::ESeverity s)
CRef< CReportItem > Export(CDiscrepancyCore &test, bool unique=true) const
static bool Exist(TReportObjectSet &hash, CReportObj &obj)
static EFeatureLocationAllowed AllowedFeatureLocation(ESubtype subtype)
@ eFeatureLocationAllowed_NucOnly
@ eFeatureLocationAllowed_ProtOnly
@ eFeatureLocationAllowed_Any
ESubtype GetSubtype(void) const
@Seq_descr.hpp User-defined methods of the data storage class.
CSeq_feat_EditHandle â.
namespace ncbi::objects::
static bool IsAa(EMol mol)
static bool IsNa(EMol mol)
Base class for all serializable objects.
Template class for iteration on objects of class C (non-medifiable version)
Template class for iteration on objects of class C.
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
@ eObjectType_StructuredComment
EObjectType GetObjectType() const
container_type::iterator iterator
API (CDeflineGenerator) for computing sequences' titles ("definitions").
vector< CRef< CReportObj > > TReportObjectList
#define DISCREPANCY_AUTOFIX(name)
#define DISCREPANCY_CASE1(name, type, group, descr,...)
#define DISCREPANCY_CASE0(name, sname, type, group, descr)
#define DISCREPANCY_CASE(name, type, group, descr)
#define DISCREPANCY_SUMMARIZE(name)
vector< CConstRef< CObject > > GetObjects(CSeq_entry_Handle seh, const string &field, CFieldNamePanel::EFieldType field_type, int subtype, const string &ncRNA_class, CConstRef< objects::CSeq_submit > submit, CRef< CEditingActionConstraint > constraint, vector< CSeq_entry_Handle > *descr_context=nullptr)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
EAccessionInfo
For IdentifyAccession (below)
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void SetDescr(TDescr &v) const
void SetInst_Mol(TInst_Mol v) const
const CSeqFeatData & GetData(void) const
void Remove(void) const
Remove the feature from Seq-annot.
void SetInst(TInst &v) const
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
bool IsSetData(void) const
const TInst & GetInst(void) const
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
const char *const kEmptyCStr
Empty "C" string (points to a '\0').
static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive compare of a substring with another string.
@ eNocase
Case insensitive compare.
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
TGenome GetGenome(void) const
Get the Genome member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
void SetGenome(TGenome value)
Assign a value to Genome data member.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
EGenome
biological context
const TStr & GetStr(void) const
Get the variant data.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsStrs(void) const
Check if variant Strs is selected.
const TStrs & GetStrs(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TType & GetType(void) const
Get the Type member data.
vector< CStringUTF8 > TStrs
const TMod & GetMod(void) const
Get the Mod member data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
const TQual & GetQual(void) const
Get the Qual member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
TRepr GetRepr(void) const
Get the Repr member data.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
const TUser & GetUser(void) const
Get the variant data.
const TInst & GetInst(void) const
Get the Inst member data.
TTopology GetTopology(void) const
Get the Topology member data.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
bool CanGetTopology(void) const
Check if it is safe to call GetTopology method.
const TTitle & GetTitle(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
const TId & GetId(void) const
Get the Id member data.
TTech GetTech(void) const
Get the Tech member data.
const Tdata & Get(void) const
Get the member data.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
TLength GetLength(void) const
Get the Length member data.
TMol GetMol(void) const
Get the Mol member data.
bool IsSetLength(void) const
length of sequence in residues Check if a value has been assigned to Length data member.
TSource & SetSource(void)
Select the variant.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
TBiomol GetBiomol(void) const
Get the Biomol member data.
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
bool CanGetId(void) const
Check if it is safe to call GetId method.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
const TFtable & GetFtable(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
Tdata & Set(void)
Assign a value to data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
TMolinfo & SetMolinfo(void)
Select the variant.
bool CanGetInst(void) const
Check if it is safe to call GetInst method.
@ eRepr_delta
sequence made by changes (delta) to others
@ eCompleteness_complete
complete biological entity
@ eTech_targeted
targeted locus sets/studies
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_wgs
whole genome shotgun sequencing
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ e_Source
source of materials, includes Org-ref
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
void ReverseComplementFeature(CSeq_feat &feat, CScope &scope)
Simultaneous search of multiple RegEx patterns in the input string.
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
const string kStructuredCommentReport
const string & kPreviouslySeenFields
static const string kMrnaSequenceMinusStrandFeatures
static bool IsSegmentSubtype(const CBioSource &bio_src)
static bool s_areCompatible(CBioSource::EGenome Location, CSubSource::ESubtype Qualifier)
void UnitTest_FLATFILE_FIND()
Checking that FLATFILE_FIND.inc is in sync with kSpellFixes If the array is changed,...
string AdjustDBLinkFieldName(const string &orig_field_name)
static const CSubSource::ESubtype eSubtype_unknown
static constexpr size_t kSpellFixesSize
static constexpr auto kSpellFixes
const string kMissingDBLink
const string kStructuredCommentObservedPrefixes
const string kSomeIdenticalDeflines
const string &(CBioSource::* FnGet)() const
string GetFieldValueAsString(const CUser_field &field)
const string & kPreviouslySeenObjects
static bool SuspiciousId(const string &s)
static const string kInconsistentMolinfoTech
void AddUserObjectFieldItems(const CSeqdesc *desc, CReportObj &rep_seq, CReportNode &collector, CReportNode &previously_seen, CDiscrepancyContext &context, const string &object_name, const string &field_prefix=kEmptyStr)
const string kDBLinkObjectList
static const string kInconsistentMolinfoTechSummary
const string kStructuredCommentObservedPrefixesThis
static bool IsATGC(char ch)
string GetSummaryLabel(bool all_present, bool all_same)
static const size_t MIN_SEQUENCE_LEN
const string & kPreviouslySeenFieldsThis
static bool EndsWithSequence(const string &title)
static bool FixTextInObject(CSerialObject *obj, size_t misspell_idx)
void AnalyzeFieldReport(CReportNode &node, bool &all_present, bool &all_same)
const string kSequencesWithGaps
const string kIdenticalDeflines
const string kDeflineExists
static void FindFlatfileText(const char *str, bool *result)
static bool IsMolProd(int biomol)
void AnalyzeField(CReportNode &node, bool &all_present, bool &all_same)
static bool CompareOrGetString(const CBioSource &bio_src, FnIsSet is_set_fn, FnGet get_fn, string &val)
void CopyNode(CReportNode &new_home, CReportNode &original)
const string kStructuredCommentPrevious
const string kNoTaxnameInDefline
static bool CompareOrgModValue(const CBioSource &bio_src, COrgMod::TSubtype subtype, string &val)
const string kStructuredCommentFieldPrefix
const string kDBLinkFieldCountTop
static string GetProjectID(const CUser_object &user)
const string kUniqueDeflines
const string kAllUniqueDeflines
static bool IsMicroSatellite(const CSeq_feat &feat)
const string kDBLinkCollect
bool(CBioSource::* FnIsSet)() const
const string kStructuredCommentsSeqs
static CS_CONTEXT * context
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4