sequence;
82string::iterator it =
str.begin();
83 while(it !=
str.end()) {
86}
else if(*it ==
')') {
91}
else if(*it ==
'[') {
93}
else if(*it ==
']') {
101 if(par > 0 || bkt > 0) {
161 static bools_IsValidPrimerSequence (
string str,
char& bad_ch)
187string::iterator sit =
str.begin();
188 while(sit !=
str.end()) {
191 if(pos2 == string::npos) {
195 string match=
str.substr(pos + 1, pos2 - pos - 1);
196 if(find(list_begin, list_end,
match) == list_end) {
200sit += pos2 - pos + 1;
203 if(*sit !=
'('&& *sit !=
')'&& *sit !=
','&& *sit !=
':') {
209 if(strchr (
"ABCDGHKMNRSTVWY", ch) ==
NULL) {
265countryname =
"USA: District of Columbia";
267countryname =
"USA: Puerto Rico";
269countryname =
"USA: Puerto Rico";
272countryname = countryname.substr(5);
276countryname = countryname.substr(7);
284(
stringcountryname,
296PostObjErr(sev, errtype,
error, obj,
ctx);
412 boolall_local_or_gnl =
true;
413 for(
autopId : bioseq.
GetId()) {
414 switch(pId->Which()) {
424all_local_or_gnl =
false;
427 returnall_local_or_gnl;
448 if(pDesc->IsMolinfo()) {
449 const auto& molinfo = pDesc->GetMolinfo();
462 if(
ctx->IsSeq()) {
463 return&(
ctx->GetSeq());
466 if(
ctx->IsSet() &&
467 ctx->GetSet().IsSetClass() &&
469 const auto& bioseq_set =
ctx->GetSet();
470 if(bioseq_set.IsSetSeq_set()) {
471 for(
const auto& pEntry : bioseq_set.GetSeq_set()) {
472 if(pEntry->IsSeq()) {
473 const auto& bioseq = pEntry->GetSeq();
474 if(bioseq.IsSetInst() &&
475bioseq.GetInst().IsNa()) {
493 if(!
isdigit(*it) && *it !=
' ') {
503 if(entry.
IsSeq()) {
522 "No organism has been applied to this Bioseq. Other qualifiers may exist.", obj,
ctx);
529 boolisInfluenzaOrSars2 =
false;
530 boolisMetagenome =
false;
531 boolhasChromosome =
false;
532 boolhasPlasmidName =
false;
536 const string& taxname = orgref.
GetTaxname();
538 boolis_env_sample =
false;
542is_env_sample =
true;
546 if(!is_env_sample) {
548 "Uncultured should also have /environmental_sample",
553 "Blank sample should not be associated with any sequences",
561isInfluenzaOrSars2 =
true;
563}
else if(
NStr::EqualNocase(taxname,
"Severe acute respiratory syndrome coronavirus 2")) {
564isInfluenzaOrSars2 =
true;
566isMetagenome =
true;
570 if(m_genomeSubmission && isMetagenome) {
572 "Metagenome is not a legal organism name",
580 "Transposon and insertion sequence are no longer legal locations",
584 if(IsIndexerVersion()
588 "INDEXER_ONLY - BioSource location is chromosome",
592 boolisViral =
false, isAnimal =
false, isPlant =
false,
593isBacteria =
false, isArchaea =
false, isFungal =
false,
617 boolchrom_conflict =
false;
623 doublelat_value = 0.0, lon_value = 0.0;
624 boolis_single_cell_amplification =
false;
628ValidateSubSource(**ssit, obj,
ctx, isViral, isInfluenzaOrSars2);
629 if(!(*ssit)->IsSetSubtype()) {
633 if((*ssit)->IsSetName()) {
634 string str= (*ssit)->GetName();
637 "Subsource name should not be "+
str,
648countryname = (**ssit).GetName();
652 if((*ssit)->IsSetName()) {
653lat_lon = (*ssit)->GetName();
654 boolformat_correct =
false, lat_in_range =
false, lon_in_range =
false, precision_correct =
false;
656lat_in_range, lon_in_range,
657lat_value, lon_value);
664 if((*ssit)->IsSetName()) {
665 val= (*ssit)->GetName();
668 "'"+
val+
"' is an invalid altitude value, altitude should be provided in meters",
674hasChromosome =
true;
677chrom_conflict =
true;
680chromosome = ssit->GetPointer();
685linkage_group = ssit->GetPointer();
689 if((*ssit)->IsSetName()) {
690pcr_set_list.
AddFwdName((*ssit)->GetName());
695 if((*ssit)->IsSetName()) {
696pcr_set_list.
AddRevName((*ssit)->GetName());
701 if((*ssit)->IsSetName()) {
702pcr_set_list.
AddFwdSeq((*ssit)->GetName());
707 if((*ssit)->IsSetName()) {
708pcr_set_list.
AddRevSeq((*ssit)->GetName());
715 if(IsGpipe() && IsGenomic()) {
718 if(isAnimal || isPlant) {
720 const string str= (*ssit)->GetName();
723 "Invalid value ("+
str+
") for /sex qualifier", obj,
ctx);
725}
else if(isViral) {
727 "Virus has unexpected Sex qualifier", obj,
ctx);
728}
else if(isBacteria || isArchaea || isFungal) {
730 "Unexpected use of /sex qualifier", obj,
ctx);
732 const string str= (*ssit)->GetName();
736 "Invalid value ("+
str+
") for /sex qualifier", obj,
ctx);
743 if(isAnimal || isPlant || isViral) {
745 "Unexpected use of /mating_type qualifier", obj,
ctx);
749 "Unexpected use of /mating_type qualifier", obj,
ctx);
754hasPlasmidName =
true;
757 "Plasmid subsource but not plasmid location", obj,
ctx);
763 if((*ssit)->IsSetName()) {
769 const string&
subname= ((*ssit)->GetName());
779 if(genome_from_name != genome) {
782val_name = val_name.substr(8);
785 "Plastid name subsource "+ val_name +
" but not "+ val_name +
" location", obj,
ctx);
789 "Plastid name subsource contains unrecognized value", obj,
ctx);
796 if((*ssit)->IsSetName() && hasTaxname) {
807 "Tissue-type is inappropriate for bacteria", obj,
ctx);
808}
else if(isViroid) {
810 "Viroid has unexpected tissue-type qualifier", obj,
ctx);
815 if((*ssit)->IsSetName()) {
816 const string&
subname= ((*ssit)->GetName());
818is_single_cell_amplification =
true;
822 stringnum =
subname.substr(0, pos);
824is_single_cell_amplification =
true;
841 "Virus has unexpected "+
subname+
" qualifier", obj,
ctx);
845 if(hasChromosome && hasPlasmidName) {
847 "Source should not have both chromosome and plasmid name fields",
854 boolsuppress =
false;
857it->IsSetName() &&
NStr::Equal(it->GetName(),
"unlocalized")) {
865 if(entry.
IsSeq()) {
869 switch(sid.
Which()) {
881 if(acc.length() == 8) {
896 string msg=
"INDEXER_ONLY - source contains chromosome value '";
900 msg+=
"' but the BioSource location is not set to chromosome";
909 boolsuppress =
false;
912it->IsSetName() &&
NStr::Equal(it->GetName(),
"unlocalized")) {
920 if(entry.
IsSeq()) {
924 switch(sid.
Which()) {
936 if(acc.length() == 8) {
951 string msg=
"INDEXER_ONLY - source contains linkage_group value '";
955 msg+=
"' but the BioSource location is not set to chromosome";
963 if(it->second <= 1)
continue;
965 stringqual =
"***";
968qual = chrom_conflict ?
"conflicting chromosome":
"identical chromosome";
break;
970qual =
"germline";
break;
972qual =
"rearranged";
break;
974qual =
"plasmid_name";
break;
976qual =
"segment";
break;
980 if(use_geo_loc_name) {
981qual =
"geo_loc_name";
988qual =
"transgenic";
break;
990qual =
"environmental_sample";
break;
992qual =
"lat_lon";
break;
994qual =
"collection_date";
break;
996qual =
"collected_by";
break;
998qual =
"identified_by";
break;
1000qual =
"fwd_primer_seq";
break;
1002qual =
"rev_primer_seq";
break;
1004qual =
"fwd_primer_name";
break;
1006qual =
"rev_primer_name";
break;
1008qual =
"metagenomic";
break;
1010qual =
"altitude";
break;
1020 "Germline and rearranged should not both be present", obj,
ctx);
1024 "Transgenic and environmental sample should not both be present", obj,
ctx);
1028 "Metagenomic should also have environmental sample annotated", obj,
ctx);
1032 "Sex and mating type should not both be present", obj,
ctx);
1036 if(m_genomeSubmission) {
1040 "Plasmid location set but plasmid name missing. Add a plasmid source modifier with the plasmid name. Use unnamed if the name is not known.",
1050 "PCR primer does not have both sequences", obj,
ctx);
1053 boolhas_duplicate_primers =
false;
1055has_duplicate_primers =
true;
1058has_duplicate_primers =
true;
1061 if(has_duplicate_primers) {
1063 "PCR primer sequence has duplicates", obj,
ctx);
1067ValidateLatLonCountry(countryname, lat_lon, obj,
ctx);
1074 if(!IsSeqSubmitParent() && IsIndexerVersion()) {
1086 if(IsEmbl() || IsDdbj()) {
1091 "No lineage for this BioSource.", obj,
ctx);
1096 const string& lineage = orgname.
GetLineage();
1098 if(lineage.find(
"Kinetoplastida") == string::npos && lineage.find(
"Kinetoplastea") == string::npos) {
1100 "Only Kinetoplastida have kinetoplasts", obj,
ctx);
1103 if(lineage.find(
"Chlorarachniophyceae") == string::npos &&
1104lineage.find(
"Cryptophyceae") == string::npos) {
1107 "Only Chlorarachniophyceae and Cryptophyceae have nucleomorphs", obj,
ctx);
1110 if(lineage.find(
"Ciliophora") == string::npos) {
1112 "Only Ciliophora have macronuclear locations", obj,
ctx);
1117 const string& div = orgname.
GetDiv();
1129 "Bacterial or viral source should not have organelle location",
1134 "BioSource with ENV division is missing environmental sample subsource",
1141 "If metagenomes appears in lineage, BioSource should have metagenomic qualifier",
1148 boolspecific_host =
false;
1152 if(!it->IsSetSubtype()) {
1158specific_host =
true;
1178 "Virus has unexpected "+
subname+
" qualifier", obj,
ctx);
1184 "Environmental sample should also have isolation source or specific host annotated",
1188m_biosource_kind = bsrc;
1190 const CBioseq* pBioseq=
nullptr;
1191 const boolcheckForUndefinedSpecies = hasTaxname &&
1192(IsGenomeSubmission() ||
1197ValidateOrgRef(orgref, obj,
ctx, checkForUndefinedSpecies, is_single_cell_amplification);
1206(
const string& primer_kind,
1211 if(badch < ' ' || badch >
'~') {
1214 string msg=
"PCR "+ primer_kind +
" primer sequence format is incorrect, first bad character is '";
1224 const string& primer_kind,
1230x_ReportPCRSeqProblem(primer_kind, badch, obj,
ctx);
1236 "PCR "+ primer_kind +
" primer name appears to be a sequence",
1248 for(
autoit : pcrset.
Get())
1250 if(it->IsSetForward()) {
1251 for(
autopit : it->GetForward().Get())
1253x_CheckPCRPrimer(*pit,
"forward", obj,
ctx);
1256 if(it->IsSetReverse()) {
1257 for(
autopit : it->GetReverse().Get())
1259x_CheckPCRPrimer(*pit,
"reverse", obj,
ctx);
1272 const boolisInfluenzaOrSars2)
1276 "Unknown subsource subtype 0", obj,
ctx);
1290 const auto& fdata = feat->
GetData();
1291 if(fdata.IsBiosrc() && fdata.GetBiosrc().IsSetTaxname()) {
1306 stringcountryname = subsrc.
GetName();
1307 boolis_miscapitalized =
false;
1308 boolis_null_and_virus =
false;
1310 if(
CCountries::IsValid(countryname, is_miscapitalized, is_null_and_virus, isInfluenzaOrSars2)) {
1311 if(is_miscapitalized) {
1312 if(use_geo_loc_name) {
1314 "Bad geo_loc_name capitalization ["+ countryname +
"]",
1318 "Bad country capitalization ["+ countryname +
"]",
1322 if(is_null_and_virus) {
1323 if(use_geo_loc_name) {
1325 "Null geo_loc_name ["+ countryname +
"] for influenza or Sars virus",
1329 "Null country ["+ countryname +
"] for influenza or Sars virus",
1334 if(use_geo_loc_name) {
1336 "Colon at end of geo_loc_name ["+ countryname +
"]", obj,
ctx);
1339 "Colon at end of country name ["+ countryname +
"]", obj,
ctx);
1343 if(use_geo_loc_name) {
1345 "Replaced geo_loc_name ["+ countryname +
"]", obj,
ctx);
1348 "Replaced country name ["+ countryname +
"]", obj,
ctx);
1352 if(countryname.empty()) {
1355 if(use_geo_loc_name) {
1357 "Bad geo_loc_name ["+ countryname +
"]", obj,
ctx);
1360 "Bad country name ["+ countryname +
"]", obj,
ctx);
1368 boolformat_correct =
false, lat_in_range =
false, lon_in_range =
false, precision_correct =
false;
1369 doublelat_value = 0.0, lon_value = 0.0;
1370 stringlat_lon = subsrc.
GetName();
1372lat_in_range, lon_in_range,
1373lat_value, lon_value);
1374 if(!format_correct) {
1376 if(pos != string::npos) {
1378 if(format_correct) {
1380 "lat_lon format has extra text after correct dd.dd N|S ddd.dd E|W format",
1386 if(!format_correct) {
1388 "lat_lon format is incorrect - should be dd.dd N|S ddd.dd E|W",
1391 if(!lat_in_range) {
1393 "latitude value is out of range - should be between 90.00 N and 90.00 S",
1396 if(!lon_in_range) {
1398 "longitude value is out of range - should be between 180.00 E and 180.00 W",
1401 if(!precision_correct) {
1414 stringname = subsrc.
GetName();
1416 if(name.length() > 10
1419 "PCR primer name appears to be a sequence",
1427 stringname = subsrc.
GetName();
1429 if(name.length() > 10
1432 "PCR primer name appears to be a sequence",
1442x_ReportPCRSeqProblem(
"forward", bad_ch, obj,
ctx);
1451x_ReportPCRSeqProblem(
"reverse", bad_ch, obj,
ctx);
1459 "Transposon name and insertion sequence name are no " 1460 "longer legal qualifiers", obj,
ctx);
1465 "Unknown subsource subtype 0", obj,
ctx);
1469ValidateSourceQualTags(subsrc.
GetName(), obj,
ctx);
1499 "Problematic plasmid/chromosome/linkage group name '"+ sname +
"'",
1504 "Chromosome should not include contig or scaffold: '"+ sname +
"'",
1510 "Problematic plasmid/chromosome/linkage group name '"+ sname +
"'",
1517 "Problematic plasmid/chromosome/linkage group name '"+ sname +
"'",
1529 "Non-viral source feature should not have a segment qualifier",
1552 const string& frequency = subsrc.
GetName();
1557 "bad frequency qualifier value "+ frequency,
1560string::const_iterator sit = frequency.begin();
1561 boolbad_frequency =
false;
1565 if(sit != frequency.end() && *sit ==
'.') {
1567 if(sit == frequency.end()) {
1568bad_frequency =
true;
1570 while(sit != frequency.end() &&
isdigit(*sit)) {
1573 if(sit != frequency.end()) {
1574bad_frequency =
true;
1577bad_frequency =
true;
1579 if(bad_frequency) {
1581 "bad frequency qualifier value "+ frequency,
1590 "Collection_date format is not in DD-Mmm-YYYY format",
1593 boolis_null_and_virus =
false;
1598}
else if(isInfluenzaOrSars2) {
1600 "Null collection date ["+ problem +
"] for influenza or Sars virus",
1620 subname+
" qualifier should not have descriptive text",
1627 "Unbalanced parentheses in subsource '"+
subname+
"'",
1632 "subsource "+
subname+
" has SGML",
1646 size_tvalue_len =
value.length();
1647 while(pos != string::npos
1648&& (((pos != 0 &&
isalpha(taxname.c_str()[pos - 1]))
1649||
isalpha(taxname.c_str()[pos + value_len])))) {
1652 if(pos == string::npos) {
1678 if(pos == string::npos) {
1682}
else if(pos > 0 &&
NStr::EqualNocase(taxname.substr(0, pos),
"Salmonella")) {
1718 const boolcheckForUndefinedSpecies,
1719 const boolis_single_cell_amplification)
1725 "No organism name included in the source. Other qualifiers may exist.", obj,
ctx);
1747 "Organism '"+ taxname +
"' is undefined species and does not have a specific identifier.",
1753 "Unbalanced parentheses in taxname '"+ orgref.
GetTaxname() +
"'", obj,
ctx);
1757 "taxname "+ taxname +
" has SGML",
1765ValidateTaxNameOrgname(taxname, orgref.
GetOrgname(), obj,
ctx);
1771ValidateDbxref(orgref.
GetDb(), obj,
true,
ctx);
1774 boolhas_taxon =
false;
1782 if(! IsLocalGeneralOnly() || m_NotJustLocalOrGeneral) {
1785 if(IsRequireTaxonID() &&
!has_taxon) {
1787 "BioSource is missing taxon ID", obj,
ctx);
1794ValidateOrgName(orgname, has_taxon, obj,
ctx);
1797 stringtaxname_search = taxname;
1799 size_tpos =
NStr::Find(taxname_search,
" ");
1800 if(pos == string::npos) {
1801taxname_search.clear();
1803taxname_search = taxname_search.substr(pos + 1);
1806 if(pos == string::npos) {
1807taxname_search.clear();
1809taxname_search = taxname_search.substr(pos + 1);
1819 if(!(*it)->IsSetSubtype() || !(*it)->IsSetSubname()) {
1823 const string&
subname= (*it)->GetSubname();
1825 if(orgmod_name.length() > 0) {
1826orgmod_name[0] =
toupper(orgmod_name[0]);
1832 "Subspecies value specified is not found in taxname",
1838orgmod_name +
" value specified is not found in taxname",
1845orgmod_name +
" value specified is not found in taxname",
1851 "Specific host is identical to taxname",
1857 if(s_IsSalmonellaGenus(taxname)) {
1859 "Salmonella organisms should use serovar instead of serotype.",
1864 if(s_IsSalmonellaGenus(taxname) &&
NStr::Find(taxname,
subname) == string::npos) {
1866 "Salmonella organism name should contain the serovar value.",
1892 for(
autoit : hybrid) {
1893 if(it->IsSetName() &&
s_MatchOrgname(taxname, *it, mismatch)) {
1898 if(!rval && hybrid.size() > 1 &&
1899hybrid.front()->IsSetName()) {
1908 for(
autoit : partial) {
1909 if(it->IsSetName()) {
1910mismatch = it->GetName();
1917 if(!rval && partial.size() > 1 &&
1918partial.front()->IsSetName()) {
1920mismatch = partial.front()->GetName();
1932(
const string& taxname,
1940 "Taxname does not match orgname ('"+ taxname +
"', '"+ mismatch +
"')",
1949 const boolhas_taxon,
1953 boolis_viral =
false;
1983 boolhas_strain =
false;
1984 boolhas_isolate =
false;
1985vector<string> vouchers;
1988 const COrgMod& omd = **omd_itr;
1995 "Orgmod name should not be "+
str,
2012 "Orgmod.strain should not start with subsp.",
2016 "Orgmod.strain should not start with serovar",
2020 "Orgmod.strain should not be '"+
str+
"'",
2026 "Multiple strain qualifiers on the same BioSource", obj,
ctx);
2036 "Orgmod.isolate should not be '"+
str+
"'",
2040 if(has_isolate && check_multiple_isolates) {
2042 "Multiple isolate qualifiers on the same BioSource", obj,
ctx);
2044has_isolate =
true;
2052 "Orgmod.serovar should not start with subsp.",
2056 "Orgmod.serovar should not start with strain",
2067 "Orgmod.sub-species should not contain subsp.",
2081 "Orgmod variety should only be in plants, fungi, or cyanobacteria",
2090 if((*omd_itr)->IsSetSubname() && !
NStr::IsBlank((*omd_itr)->GetSubname())) {
2091 const string&
val= (*omd_itr)->GetSubname();
2096 if((*it2)->IsSetSubtype()
2098&& (*it2)->IsSetSubname()
2101 "OrgMod synonym is identical to OrgMod gb_synonym",
2110ValidateOrgModVoucher(omd, obj,
ctx);
2115 if(!(*omd_itr)->IsSetSubname() ||
2118 "Bad value for type_material", obj,
ctx);
2130 "Unbalanced parentheses in orgmod '"+
subname+
"'",
2135 "orgmod "+
subname+
" has SGML",
2140 if(m_genomeSubmission && has_strain && has_isolate) {
2142 "Organism has both strain: '"+ strain +
"' and isolate: '"+ isolate +
"'",
2153 if(strain.length() < 1) {
2158 "Orgmod.strain should not be species '"+ species +
"'",
2163 "Orgmod.strain should not be subspecies '"+ sub_species +
"'",
2168 "Orgmod.strain should not be serovar '"+ serovar +
"'",
2171 if(
NStr::FindNoCase(strain, genus +
" "+ species) != string::npos && genus.length() > 0 && species.length() > 0) {
2173 "Orgmod.strain should not contain '"+ genus +
" "+ species +
"'",
2199 if(!
source.IsSetGenome()
2202 boolis_viral =
false;
2203 if(
source.IsSetOrg()) {
2225 if(
source.IsSetLineage()) {
2226 stringlineage =
source.GetLineage();
2239 if(
source.IsSetLineage()) {
2240 stringlineage =
source.GetLineage();
2253 while(d && !rval) {
2254 const auto& user = d->
GetUser();
2255 if(user.IsSetType() && user.GetType().IsStr() &&
NStr::Equal(user.GetType().GetStr(),
"DBLink")) {
2256 for(
auto f: user.GetData()) {
2257 if(
f->IsSetLabel() &&
f->GetLabel().IsStr() &&
NStr::Equal(
f->GetLabel().GetStr(),
"BioSample")
2258&&
f->IsSetData() && (
f->GetData().IsStr() ||
f->GetData().IsStrs())) {
2276m_biosource_kind =
source;
2278 const auto& inst = bsh.
GetInst();
2280 if(
source.IsSetIs_focus()) {
2282 if(!bsh.
IsAa() &&
2288 "BioSource descriptor has focus, " 2289 "but no BioSource feature", obj,
ctx);
2293 if(
source.CanGetOrigin() &&
2295 if(!IsOtherDNA(bsh) && !bsh.
IsAa()) {
2297 "Molinfo-biomol other should be used if " 2298 "Biosource-location is synthetic", obj,
ctx);
2304&&
source.IsSetOrg() &&
source.GetOrg().IsSetTaxname()
2312 "HIV with moltype DNA should be proviral",
2320 "HIV with mRNA molecule type is rare",
2333sequence::CDeflineGenerator defline_generator;
2334title = defline_generator.GenerateDefline(bsh, sequence::CDeflineGenerator::fIgnoreExisting);
2337 boolisViral =
false;
2338 if(
source.IsSetLineage()) {
2339 stringlineage =
source.GetLineage();
2350&&
NStr::Find(title,
"complete genome") != string::npos
2353 "Non-viral complete genome not labeled as chromosome",
2360 boolis_synthetic_construct = IsSyntheticConstruct(
source);
2361 boolis_artificial = IsArtificial(
source);
2363 if(is_synthetic_construct) {
2367 "synthetic construct should have other-genetic",
2370 if(!is_artificial) {
2372 "synthetic construct should have artificial origin",
2375}
else if(is_artificial) {
2377 "artificial origin should have other-genetic and synthetic construct",
2380 if(is_artificial) {
2385 "artificial origin should have other-genetic",
2396 if(!(*it)->IsSetSubtype()) {
2404 if(mi && (*it)->IsSetName() &&
NStr::EqualNocase((*it)->GetName(),
"cRNA")) {
2409 "cRNA note conflicts with molecule type",
2413 "cRNA note redundant with molecule type",
2425 if(
source.IsSetOrg()) {
2434 if(it->IsSetSubtype()
2436&& it->IsSetSubname()
2441 "cRNA note conflicts with molecule type",
2446 "cRNA note redundant with molecule type",
2462 "Genomic DNA viral lineage indicates no DNA stage",
2470 if( (IsGpipe() || IsIndexerVersion() ) &&
s_IsBioSample(bsh) ) {
2473 if( is_bact || is_arch ) {
2474 boolhas_strain =
false;
2475 boolhas_isolate =
false;
2476 boolenv_sample =
false;
2477 if(
source.IsSetSubtype()) {
2486 if(!env_sample &&
source.IsSetOrg()
2487&&
source.GetOrg().IsSetOrgname()) {
2488 const auto& orgname =
source.GetOrg().GetOrgname();
2489 if(orgname.IsSetMod()) {
2490 for(
auto om: orgname.GetMod()) {
2491 if(
om->IsSetSubtype()) {
2493has_isolate =
true;
2506 if(!has_strain && !has_isolate && !env_sample) {
2509 "Bacteria should have strain or isolate or environmental sample",
2511}
else if(is_arch) {
2513 "Archaea should have strain or isolate or environmental sample",
2557 "collection_date:",
2561 "culture_collection:",
2565 "endogenous_virus_name:",
2566 "environmental_sample:",
2568 "forma_specialis:",
2570 "fwd_pcr_primer_name",
2571 "fwd_pcr_primer_seq",
2580 "insertion_seq_name:",
2582 "isolation_source:",
2589 "metagenome_source:",
2599 "rev_pcr_primer_name",
2600 "rev_pcr_primer_seq",
2609 "specimen_voucher:",
2622 "transposon_name:",
2631 staticstd::mutex m;
2633std::lock_guard
g(m);
2649(
const string&
str,
2655 size_tstr_len =
str.length();
2659 for(
size_t i= 0;
i< str_len; ++
i) {
2663 if(
match.empty()) {
2666 size_tmatch_len =
match.length();
2669 if((
int)(
i- match_len) >= 0) {
2670 charch =
str[
i- match_len];
2671 if(!
isspace((
unsigned char)ch) && ch !=
';') {
2679 if(pos != string::npos) {
2680 if(pos == 0 ||
isspace((
unsigned char)
str[pos]) ||
str[pos] ==
';') {
2692 "Source note has structured tag '"+
match+
"'", obj,
ctx);
2703user_object.
GetType().
GetStr() !=
"StructuredComment") {
2726user_object.
GetType().
GetStr() !=
"StructuredComment") {
2759usr_descs.push_back(desc);
2762desc_ctxs.push_back(r_se);
2768 for(
autoannot_it : se.
GetAnnot()) {
2769 if(annot_it->IsFtable()) {
2770 for(
autofeat_it : annot_it->GetData().GetFtable()) {
2771 if(feat_it->IsSetData() && feat_it->GetData().IsUser()
2774feat.
Reset(feat_it);
2775usr_feats.push_back(feat);
2786GatherTentativeName(**it, usr_descs, desc_ctxs, usr_feats);
2797 if(org_rq_list.size() > 0) {
2801 while(
i< org_rq_list.size()) {
2803vector< CRef<COrg_ref> > tmp_rq(org_rq_list.begin() +
i, org_rq_list.begin() +
i+
len);
2805 if(!reply || !reply->IsSetReply()) {
2826 if(org_rq_list.size() == 0) {
2832 while(
i< org_rq_list.size()) {
2834vector< CRef<COrg_ref> > tmp_rq(org_rq_list.begin() +
i, org_rq_list.begin() +
i+
len);
2837 if(tmp_spec_host_reply) {
2840err_msg =
"Connection to taxonomy failed";
2858 if(org_rq_list.size() == 0) {
2864 while(
i< org_rq_list.size()) {
2866vector< CRef<COrg_ref> > tmp_rq(org_rq_list.begin() +
i, org_rq_list.begin() +
i+
len);
2882 autopTval = x_CreateTaxValidator();
2884ValidateSpecificHost(*pTval);
2890 const stringerr_str =
error.IsSetMessage() ?
error.GetMessage() :
"?";
2892 if(
NStr::Equal(err_str,
"Organism not found")) {
2902vector<CConstRef<CSeqdesc> > src_descs;
2903vector<CConstRef<CSeq_entry> > desc_ctxs;
2904vector<CConstRef<CSeq_feat> > src_feats;
2906GatherTentativeName(se, src_descs, desc_ctxs, src_feats);
2909vector< CRef<COrg_ref> > org_rq_list;
2912vector<CConstRef<CSeqdesc> >::iterator desc_it = src_descs.begin();
2913vector<CConstRef<CSeq_entry> >::iterator ctx_it = desc_ctxs.begin();
2914 while(desc_it != src_descs.end() && ctx_it != desc_ctxs.end()) {
2918org_rq_list.push_back(rq);
2925vector<CConstRef<CSeq_feat> >::iterator feat_it = src_feats.begin();
2926 while(feat_it != src_feats.end()) {
2930org_rq_list.push_back(rq);
2935 if(org_rq_list.empty()) {
2940 if(!reply || !reply->IsSetReply()) {
2942 "Taxonomy service connection failure", se);
2945 const auto& rlist = reply->GetReply();
2946CTaxon3_reply::TReply::const_iterator reply_it = rlist.begin();
2949desc_it = src_descs.begin();
2950ctx_it = desc_ctxs.begin();
2953 while(reply_it != rlist.end()
2954&& desc_it != src_descs.end()
2955&& ctx_it != desc_ctxs.end()) {
2956 if((*reply_it)->IsError()) {
2959 "Taxonomy lookup failed for Tentative Name '"+ org_rq_list[pos]->GetTaxname() +
"'",
2960**desc_it, *ctx_it);
2962HandleTaxonomyError((*reply_it)->GetError(),
2973feat_it = src_feats.begin();
2974 while(reply_it != rlist.end()
2975&& feat_it != src_feats.end()) {
2976 if((*reply_it)->IsError()) {
2979 "Taxonomy lookup failed for Tentative Name '"+ org_rq_list[pos]->GetTaxname() +
"'",
2982HandleTaxonomyError((*reply_it)->GetError(),
2995 const stringerr_str =
error.IsSetMessage() ?
error.GetMessage() :
"?";
2997 if(
NStr::Equal(err_str,
"Organism not found")) {
2998 string msg=
"Organism not found in taxonomy database";
2999 if(
error.IsSetOrg()) {
3000 const auto& e_org =
error.GetOrg();
3002 if(e_org.IsSetTaxname() &&
3003!
NStr::Equal(e_org.GetTaxname(),
"Not valid") &&
3004(!d_org.IsSetTaxname() ||
3005!
NStr::Equal(d_org.GetTaxname(), e_org.GetTaxname()))) {
3006 msg+=
" (suggested:"+ e_org.GetTaxname() +
")";
3018 "Taxonomy lookup failed with message '"+ err_str +
"'",
3022 "Taxonomy lookup failed with message '"+ err_str +
"'",
3030 const stringerr_str =
error.IsSetMessage() ?
error.GetMessage() :
"?";
3038 "Taxonomy lookup failed with message '"+ err_str +
"'",
3042 "Taxonomy lookup failed with message '"+ err_str +
"'",
3048 const string& host,
const COrg_ref& org)
3050 const stringerr_str =
error.IsSetMessage() ?
error.GetMessage() :
"?";
3052 if(
NStr::Equal(err_str,
"Organism not found")) {
3054 "Organism not found in taxonomy database",
3058 "Specific host value is ambiguous: "+ host, org);
3065 "Invalid value for specific host: "+ host, org);
3092 if(use_new_strain_validation) {
3097 stringfromEnv =
env.Get(
"NCBI_NEW_STRAIN_VALIDATION");
3099 if(fromEnv ==
"true") {
3101}
else if(fromEnv ==
"false") {
3131 autopTval = x_CreateTaxValidator();
3135ValidateOrgRefs(*pTval);
3138ValidateSpecificHost(*pTval);
3145 if(request.size() < 1) {
3146return CRef<CTaxon3_reply>();
3150 if(m_NewStrainValidation) {
3151cerr <<
"CStrainRequest::ExploreStrainsForTaxonInfo TaxonReply:"<< endl <<
MSerial_AsnText<< reply << endl;
3158ValidateStrain(*pTval, pTval->m_descTaxID);
3161ValidateTentativeName(se);
3167 autopTval = x_CreateTaxValidator();
3168pTval->CheckOneOrg(org, genome, *
this);
3199 unsigned intpcr_num = 0;
3201name = name.substr(1, name.length() - 2);
3202vector<string> mult_names;
3204 unsigned intname_num = 0;
3205 while(name_num < mult_names.size()) {
3212 m_SetList[pcr_num]->SetFwdName(mult_names[name_num]);
3230 unsigned intpcr_num = 0;
3232name = name.substr(1, name.length() - 2);
3233vector<string> mult_names;
3235 unsigned intname_num = 0;
3236 while(name_num < mult_names.size()) {
3243 m_SetList[pcr_num]->SetRevName(mult_names[name_num]);
3261 unsigned intpcr_num = 0;
3263name = name.substr(1, name.length() - 2);
3264vector<string> mult_names;
3266 unsigned intname_num = 0;
3267 while(name_num < mult_names.size()) {
3274 m_SetList[pcr_num]->SetFwdSeq(mult_names[name_num]);
3292 unsigned intpcr_num = 0;
3294name = name.substr(1, name.length() - 2);
3295vector<string> mult_names;
3297 unsigned intname_num = 0;
3298 while(name_num < mult_names.size()) {
3305 m_SetList[pcr_num]->SetRevSeq(mult_names[name_num]);
3330}
else if(compare > 0) {
3334}
else if(compare > 0) {
3338}
else if(compare > 0) {
3390}
else if(compare > 0) {
3414}
else if(s1.
Get().size() < s2.
Get().size()) {
3416}
else if(s1.
Get().size() > s2.
Get().size()) {
3419 autoit1 = s1.
Get().begin();
3420 autoit2 = s2.
Get().begin();
3421 while(it1 != s1.
Get().end()) {
3472 template<
typenameT>
3486 if(!primers.
IsSet() || primers.
Get().size() < 2) {
3490 for(
autoit : primers.
Get()) {
3491 if(already_seen.
find(it) != already_seen.
end()) {
3494already_seen.
insert(it);
3529vector<string> error_list;
3531 ITERATE(vector<string>, err, error_list) {
3534}
else if(
NStr::FindNoCase(*err,
"should be structured") != string::npos) {
3536}
else if(
NStr::FindNoCase(*err,
"missing institution code") != string::npos) {
3538}
else if(
NStr::FindNoCase(*err,
"missing specific identifier") != string::npos) {
3549}
else if(
NStr::FindNoCase(*err,
"should not be qualified with a <COUNTRY> designation") != string::npos) {
3550 if(use_geo_loc_name) {
3555}
else if(
NStr::FindNoCase(*err,
"needs to be qualified with a <COUNTRY> designation") != string::npos) {
3557}
else if(
NStr::FindNoCase(*err,
" exists, but collection ") != string::npos) {
3571 returnm_taxon->SendOrgRefList(orgRefs);
3573 returnmake_unique<CTaxValidationAndCleanup>(taxFunc);
3577 returnmake_unique<CTaxValidationAndCleanup>(
m_pContext->m_taxon_update);
const char * sm_ValidModifiedPrimerBases[]
@ eErr_SEQ_DESCR_BadPlastidName
@ eErr_SEQ_DESCR_ObsoleteSourceQual
@ eErr_SEQ_DESCR_MissingEnvironmentalSample
@ eErr_SEQ_DESCR_ObsoleteSourceLocation
@ eErr_SEQ_DESCR_MissingPlasmidLocation
@ eErr_SEQ_DESCR_BadNullCountry
@ eErr_SEQ_DESCR_InvalidTissueType
@ eErr_SEQ_DESCR_TaxonomyServiceProblem
@ eErr_SEQ_DESCR_TaxonomyBlankSample
@ eErr_SEQ_DESCR_MissingPersonalCollectionName
@ eErr_SEQ_DESCR_LatLonRange
@ eErr_SEQ_DESCR_DuplicatePCRPrimerSequence
@ eErr_GENERIC_SgmlPresentInText
@ eErr_SEQ_DESCR_UnstructuredVoucher
@ eErr_SEQ_DESCR_TaxonomyIsMetagenome
@ eErr_SEQ_DESCR_BadVariety
@ eErr_SEQ_DESCR_BadInstitutionGeoLocName
@ eErr_SEQ_DESCR_BadTypeMaterial
@ eErr_SEQ_DESCR_OrgModMissingValue
@ eErr_SEQ_DESCR_NoOrgFound
@ eErr_SEQ_DESCR_BadPCRPrimerSequence
@ eErr_SEQ_DESCR_UnnecessaryBioSourceFocus
@ eErr_SEQ_DESCR_InvalidForType
@ eErr_SEQ_DESCR_LatLonValue
@ eErr_SEQ_DESCR_OrganismIsUndefinedSpecies
@ eErr_SEQ_DESCR_IdenticalInstitutionCode
@ eErr_SEQ_DESCR_BacteriaMissingSourceQualifier
@ eErr_SEQ_DESCR_BadCountryCapitalization
@ eErr_SEQ_DESCR_BadCollectionDate
@ eErr_SEQ_DESCR_BadContigOrScaffoldChromosome
@ eErr_SEQ_DESCR_BadInstitutionCode
@ eErr_SEQ_DESCR_BadAltitude
@ eErr_SEQ_DESCR_IncorrectlyFormattedVoucherID
@ eErr_SEQ_DESCR_StrainWithEnvironSample
@ eErr_SEQ_DESCR_BadNullCollectionDate
@ eErr_SEQ_DESCR_OrganismNotFound
@ eErr_SEQ_DESCR_InconsistentVirusMoltype
@ eErr_SEQ_DESCR_BadInstitutionCountry
@ eErr_SEQ_DESCR_MissingPlasmidName
@ eErr_SEQ_DESCR_UnculturedNeedsEnvSample
@ eErr_SEQ_DESCR_BadTentativeName
@ eErr_SEQ_DESCR_BadPlasmidChromosomeLinkageName
@ eErr_SEQ_DESCR_BadTextInSourceQualifier
@ eErr_SEQ_DESCR_SuspectedContaminatedCellLine
@ eErr_SEQ_DESCR_AmbiguousSpecificHost
@ eErr_SEQ_DESCR_BadGeoLocNameCapitalization
@ eErr_SEQ_DESCR_ChromosomeWithoutLocation
@ eErr_SEQ_DESCR_StructuredSourceNote
@ eErr_SEQ_DESCR_InvalidMatingType
@ eErr_SEQ_DESCR_BadSubSource
@ eErr_SEQ_DESCR_MultipleStrains
@ eErr_SEQ_DESCR_BadGeoLocNameCode
@ eErr_SEQ_DESCR_InvalidSexQualifier
@ eErr_SEQ_DESCR_TaxonomyAmbiguousName
@ eErr_SEQ_DESCR_MultipleSourceQualifiers
@ eErr_SEQ_DESCR_WrongVoucherType
@ eErr_SEQ_DESCR_HasStrainAndIsolate
@ eErr_SEQ_DESCR_BadCollectionCode
@ eErr_SEQ_DESCR_SyntheticConstructWrongMolType
@ eErr_SEQ_DESCR_TaxonomyLookupProblem
@ eErr_SEQ_DESCR_BadNullGeoLocName
@ eErr_SEQ_DESCR_NoTaxonID
@ eErr_SEQ_DESCR_LatLonFormat
@ eErr_SEQ_DESCR_MissingLineage
@ eErr_SEQ_DESCR_BadOrgMod
@ eErr_SEQ_DESCR_BadSpecificHost
@ eErr_SEQ_DESCR_BadPCRPrimerName
@ eErr_SEQ_DESCR_OrgModValueInvalid
@ eErr_SEQ_DESCR_BadOrganelleLocation
@ eErr_SEQ_DESCR_EnvironSampleMissingQualifier
@ eErr_SEQ_DESCR_MultipleIsolates
@ eErr_SEQ_DESCR_BadCountryCode
@ eErr_SEQ_DESCR_ChromosomeLocation
@ eErr_SEQ_DESCR_BioSourceNeedsChromosome
@ eErr_SEQ_DESCR_BioSourceInconsistency
@ eErr_SEQ_DESCR_HostIdenticalToOrganism
@ eErr_SEQ_DESCR_BadBioSourceFrequencyValue
@ eErr_SEQ_DESCR_ReplacedCountryCode
@ eErr_SEQ_DESCR_ReplacedGeoLocNameCode
@ eErr_SEQ_DESCR_UnbalancedParentheses
@ eErr_SEQ_DESCR_MissingMetagenomicQualifier
@ eErr_SEQ_DESCR_SyntheticConstructNeedsArtificial
@ eErr_SEQ_DESCR_NonViralSegment
bool IsOrganismArchaea() const
bool IsOrganismEukaryote() const
bool IsOrganismBacteria() const
CBioSourceKind & operator=(const CBioSource &bsrc)
const string & GetLineage(void) const
static string GetOrganelleByGenome(unsigned int genome)
bool IsSetLineage(void) const
static CBioSource::EGenome GetGenomeByOrganelle(const string &organelle, NStr::ECase use_case=NStr::eCase, bool starts_with=false)
bool IsSetTaxname(void) const
static bool WasValid(const string &country)
static bool IsValid(const string &country)
static CNcbiApplication * Instance(void)
Singleton method.
@OrgMod.hpp User-defined methods of the data storage class.
static bool NCBI_ValidateForMultipleIsolates(void)
static string IsCultureCollectionValid(const string &culture_collection)
static bool IsStrainValid(const string &strain)
static bool IsValidTypeMaterial(const string &type_material)
static string IsBiomaterialValid(const string &biomaterial)
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
static string CheckMultipleVouchers(const vector< string > &)
static bool IsIsolateValid(const string &isolate)
static string IsSpecimenVoucherValid(const string &specimen_voucher)
bool GetFlatName(string &name_out, string *lineage=0) const
const string & GetLineage(void) const
const string & GetDivision(void) const
bool IsSetDivision(void) const
bool IsVarietyValid(const string &variety) const
bool IsSetOrgMod(void) const
bool IsSubspeciesValid(const string &subspecies) const
bool IsSetLineage(void) const
static bool IsValid(const string &seq, char &bad_ch)
void AddFwdName(string name)
vector< CPCRSet * > m_SetList
void AddRevName(string name)
void AddFwdSeq(string name)
void AddRevSeq(string name)
size_t GetOrigPos() const
string GetRevName() const
string GetFwdName() const
const TAnnot & GetAnnot(void) const
const CSeq_descr & GetDescr(void) const
bool IsSetAnnot(void) const
bool IsSetDescr(void) const
namespace ncbi::objects::
Base class for all serializable objects.
static void ExploreStrainsForTaxonInfo(CTaxValidationAndCleanup &tval, CValidError_imp &imp, const CSeq_entry &se, std::function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref >> &)> taxoncallback)
static bool NCBI_UseGeoLocNameForCountry(void)
static string GetCollectionDateProblem(const string &date_string)
static bool IsPlasmidNameValid(const string &value, const string &taxname)
static bool IsValidSexQualifierValue(const string &value)
static bool IsMultipleValuesAllowed(TSubtype)
static bool IsAltitudeValid(const string &value)
static string ValidateLatLonCountry(const string &countryname, string &lat_lon, bool check_state, ELatLonCountryErr &errcode)
static string CheckCellLine(const string &cell_line, const string &organism)
static string GetSubtypeName(CSubSource::TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
static bool NeedsNoText(const TSubtype &subtype)
static bool IsEndogenousVirusNameValid(const string &value)
static bool IsChromosomeNameValid(const string &value, const string &taxname)
static void IsCorrectLatLonFormat(string lat_lon, bool &format_correct, bool &precision_correct, bool &lat_in_range, bool &lon_in_range, double &lat_value, double &lon_value)
static bool IsSegmentValid(const string &value)
static bool IsLinkageGroupNameValid(const string &value, const string &taxname)
vector< CRef< COrg_ref > > GetTaxonomyLookupRequest() const
void ReportSpecificHostErrors(const CTaxon3_reply &reply, CValidError_imp &imp)
void ReportIncrementalTaxLookupErrors(const CTaxon3_reply &reply, CValidError_imp &imp, bool is_insd_patent, size_t offset) const
vector< CRef< COrg_ref > > GetStrainLookupRequest()
string IncrementalSpecificHostMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply)
CConstRef< CSeq_entry > GetTopReportObject() const
string IncrementalStrainMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply, TTaxId descTaxID=ZERO_TAX_ID)
void ReportStrainErrors(CValidError_imp &imp)
vector< CRef< COrg_ref > > GetSpecificHostLookupRequest(bool for_fix)
static bool IsWGS(const CBioseq &seq)
void ValidateTaxNameOrgname(const string &taxname, const COrgName &orgname, const CSerialObject &obj, const CSeq_entry *ctx)
bool IsSyntheticConstruct(const CBioSource &src)
void HandleTaxonomyError(const CT3Error &error, const string &host, const COrg_ref &orf)
void ValidateSubSource(const CSubSource &subsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr, const bool isViral=false, const bool isInfluenzaOrSars2=false)
void ValidateLatLonCountry(string countryname, string lat_lon, const CSerialObject &obj, const CSeq_entry *ctx)
void GatherTentativeName(const CSeq_entry &se, vector< CConstRef< CSeqdesc > > &usr_descs, vector< CConstRef< CSeq_entry > > &desc_ctxs, vector< CConstRef< CSeq_feat > > &usr_feats)
void ValidateOrgName(const COrgName &orgname, const bool has_taxon, const CSerialObject &obj, const CSeq_entry *ctx)
void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void InitializeSourceQualTags()
void ValidateTaxonomy(const CSeq_entry &se)
void ValidateOrgRef(const COrg_ref &orgref, const CSerialObject &obj, const CSeq_entry *ctx, const bool checkForUndefinedSpecies=false, const bool is_single_cell_amplification=false)
static bool s_IsSalmonellaGenus(const string &taxname)
unique_ptr< CTaxValidationAndCleanup > x_CreateTaxValidator() const
void x_ReportPCRSeqProblem(const string &primer_kind, char badch, const CSerialObject &obj, const CSeq_entry *ctx)
void x_CheckPCRPrimer(const CPCRPrimer &primer, const string &primer_kind, const CSerialObject &obj, const CSeq_entry *ctx)
void ValidateSourceQualTags(const string &str, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
bool IsArtificial(const CBioSource &src)
void ValidatePCRReactionSet(const CPCRReactionSet &pcrset, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void ValidateBioSourceForSeq(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx, const CBioseq_Handle &bsh)
void ValidateOrgRefs(CTaxValidationAndCleanup &tval)
bool IsOtherDNA(const CBioseq_Handle &bsh) const
void ValidateSpecificHost(CTaxValidationAndCleanup &tval)
void ValidateStrain(CTaxValidationAndCleanup &tval, TTaxId descTaxID=ZERO_TAX_ID)
shared_ptr< SValidatorContext > m_pContext
void ValidateOrgModVoucher(const COrgMod &orgmod, const CSerialObject &obj, const CSeq_entry *ctx)
void ValidateTentativeName(const CSeq_entry &se)
bool IsTransgenic(const CBioSource &bsrc)
EDiagSev x_SalmonellaErrorLevel()
static EErrType ConvertCode(CSubSource::ELatLonCountryErr errcode)
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
static const int chunk_size
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char * str(char *buf, int n)
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
const CNcbiEnvironment & GetEnvironment(void) const
Get the application's cached environment.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error â guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
const TPrim & Get(void) const
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
#define MSerial_AsnText
I/O stream manipulators â.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool IsSetInst(void) const
const TInst & GetInst(void) const
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string & ToLower(string &str)
Convert string to lower case â string& version.
@ eNocase
Case insensitive compare.
@ eCase
Case sensitive compare.
const Tdata & Get(void) const
Get the member data.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
const TPcr_primers & GetPcr_primers(void) const
Get the Pcr_primers member data.
TGenome GetGenome(void) const
Get the Genome member data.
TOrigin GetOrigin(void) const
Get the Origin member data.
bool IsSetSeq(void) const
Check if a value has been assigned to Seq data member.
const Tdata & Get(void) const
Get the member data.
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool CanGetOrg(void) const
Check if it is safe to call GetOrg method.
list< CRef< CSubSource > > TSubtype
bool IsSetPcr_primers(void) const
Check if a value has been assigned to Pcr_primers data member.
const TForward & GetForward(void) const
Get the Forward member data.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
TSubtype GetSubtype(void) const
Get the Subtype member data.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TSeq & GetSeq(void) const
Get the Seq member data.
bool IsSetReverse(void) const
Check if a value has been assigned to Reverse data member.
const TName & GetName(void) const
Get the Name member data.
const TReverse & GetReverse(void) const
Get the Reverse member data.
const TName & GetName(void) const
Get the Name member data.
bool IsSetForward(void) const
Check if a value has been assigned to Forward data member.
EGenome
biological context
bool IsSet(void) const
Check if a value has been assigned to data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
@ eSubtype_collection_date
DD-MMM-YYYY format.
@ eSubtype_insertion_seq_name
@ eSubtype_transposon_name
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_lat_lon
+/- decimal degrees
@ eSubtype_rev_primer_name
@ eSubtype_collected_by
name of person who collected the sample
@ eSubtype_fwd_primer_name
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_isolation_source
@ eSubtype_environmental_sample
@ eSubtype_endogenous_virus_name
@ eSubtype_identified_by
name of person who identified the sample
@ eOrigin_synthetic
purely synthetic
@ eOrigin_artificial
artificially engineered
const TData & GetData(void) const
Get the Data member data.
const TStr & GetStr(void) const
Get the variant data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
const TMod & GetMod(void) const
Get the Mod member data.
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
const TLineage & GetLineage(void) const
Get the Lineage member data.
TSubtype GetSubtype(void) const
Get the Subtype member data.
const TDiv & GetDiv(void) const
Get the Div member data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TSubname & GetSubname(void) const
Get the Subname member data.
const THybrid & GetHybrid(void) const
Get the variant data.
bool IsSetCommon(void) const
common name Check if a value has been assigned to Common data member.
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member.
const TName & GetName(void) const
Get the Name member data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
const TCommon & GetCommon(void) const
Get the Common member data.
const TBinomial & GetBinomial(void) const
Get the variant data.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
const Tdata & Get(void) const
Get the member data.
bool IsSetGenus(void) const
required Check if a value has been assigned to Genus data member.
const TSpecies & GetSpecies(void) const
Get the Species member data.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
bool IsSetSubname(void) const
Check if a value has been assigned to Subname data member.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
const TGenus & GetGenus(void) const
Get the Genus member data.
const TPartial & GetPartial(void) const
Get the variant data.
bool IsSetSpecies(void) const
species required if subspecies used Check if a value has been assigned to Species data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
const Tdata & Get(void) const
Get the member data.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
@ eSubtype_gb_synonym
used by taxonomy database
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
@ eSubtype_nat_host
natural host of this specimen
@ eSubtype_metagenome_source
@ eSubtype_specimen_voucher
@ eSubtype_culture_collection
@ eSubtype_forma_specialis
@ e_Hybrid
hybrid between organisms
@ e_Binomial
genus/species type name
@ e_Partial
when genus not known
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
const TData & GetData(void) const
Get the Data member data.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
E_Choice Which(void) const
Which variant is currently selected.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_General
for other databases
@ e_Tpg
Third Party Annot/Seq Genbank.
const TSeq & GetSeq(void) const
Get the variant data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
@ eClass_parts
parts for 2 or 3
@ eClass_nuc_prot
nuc acid and coded proteins
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
bool CanGetBiomol(void) const
Check if it is safe to call GetBiomol method.
const TUser & GetUser(void) const
Get the variant data.
bool IsMolinfo(void) const
Check if variant Molinfo is selected.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
const TTitle & GetTitle(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSource(void) const
Check if variant Source is selected.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
const TId & GetId(void) const
Get the Id member data.
const Tdata & Get(void) const
Get the member data.
TMol GetMol(void) const
Get the Mol member data.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
TBiomol GetBiomol(void) const
Get the Biomol member data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ eRepr_seg
segmented sequence
@ eCompleteness_complete
complete biological entity
@ eTech_wgs
whole genome shotgun sequencing
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_other_genetic
other genetic material
@ e_User
user defined object
@ e_Molinfo
info on the molecule and techniques
@ e_Title
a title for this sequence
if(yy_accept[yy_current_state])
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
#define FOR_EACH_DBXREF_ON_ORGREF(Itr, Var)
FOR_EACH_DBXREF_ON_ORGREF EDIT_EACH_DBXREF_ON_ORGREF.
#define FOR_EACH_SUBSOURCE_ON_BIOSOURCE(Itr, Var)
FOR_EACH_SUBSOURCE_ON_BIOSOURCE EDIT_EACH_SUBSOURCE_ON_BIOSOURCE.
#define FOR_EACH_ORGMOD_ON_ORGNAME(Itr, Var)
FOR_EACH_ORGMOD_ON_ORGNAME EDIT_EACH_ORGMOD_ON_ORGNAME.
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
bool seq_mac_is_unique(Iterator iter1, Iterator iter2, Predicate pred)
bool ContainsSgml(const string &str)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
CRef< objects::CObjectManager > om
bool operator()(T l, T r) const
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
static string x_RepairCountryName(string countryname)
static bool s_init_NewTaxVal(bool use_new_strain_validation)
static bool s_PCRReactionLess(CConstRef< CPCRReaction > pp1, CConstRef< CPCRReaction > pp2)
static const int sNumUnexpectedViralOrgModQualifiers
static const int sNumUnexpectedViralSubSourceQualifiers
static bool s_MatchOrgname(const string &taxname, const COrgName &orgname, string &mismatch)
std::string_view sm_SourceQualPrefixes[]
bool s_IsAllDigitsOrSpaces(string str)
static bool s_PCRPrimerSetLess(const CPCRPrimerSet &s1, const CPCRPrimerSet &s2)
static unique_ptr< CTextFsa > m_SourceQualTags
static bool IsUnexpectedViralOrgModQualifier(COrgMod::TSubtype subtype)
bool s_IsBioSample(const CBioseq_Handle &bsh)
static const CSubSource::ESubtype sUnexpectedViralSubSourceQualifiers[]
static bool s_HasMetagenomeSource(const COrg_ref &org)
static const COrgMod::TSubtype sUnexpectedViralOrgModQualifiers[]
static bool s_PCRSetEqual(const CPCRSet *p1, const CPCRSet *p2)
static bool NCBI_NewTaxVal(bool use_new_strain_validation)
static bool s_UnbalancedParentheses(string str)
static bool s_IsChromosome(const CBioSource &biosource)
static bool x_HasTentativeName(const CUser_object &user_object)
const size_t kDefaultChunkSize
static const string kInvalidReplyMsg
static string x_GetTentativeName(const CUser_object &user_object)
bool s_IsArchaea(const CBioSource &source)
static bool s_HasWGSTech(const CBioseq &bioseq)
static bool s_PCRSetCompare(const CPCRSet *p1, const CPCRSet *p2)
static bool s_FindWholeName(const string &taxname, const string &value)
static bool s_IsEukaryoteOrProkaryote(const CBioSourceKind &biosourceKind)
static bool s_ReportUndefinedSpeciesId(const CBioseq &bioseq)
static bool s_CompleteGenomeNeedsChromosome(const CBioSource &source)
static const CSeq_entry & s_GetJustNucSeqEntry(const CSeq_entry &entry)
static bool s_PCRPrimerLess(const CPCRPrimer &p1, const CPCRPrimer &p2)
bool IsOrgNotFound(const CT3Error &error)
static const CBioseq * s_GetNucSeqFromContext(const CSeq_entry *ctx)
bool s_IsBacteria(const CBioSource &source)
static bool s_IsUndefinedSpecies(const string &taxname)
static bool IsUnexpectedViralSubSourceQualifier(CSubSource::TSubtype subtype)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4