A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/valid__biosource_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/validator/valid_biosource.cpp Source File

64 #define NCBI_USE_ERRCODE_X Objtools_Validator 69 using namespace

sequence;

82

string::iterator it =

str

.begin();

83  while

(it !=

str

.end()) {

86

}

else if

(*it ==

')'

) {

91

}

else if

(*it ==

'['

) {

93

}

else if

(*it ==

']'

) {

101  if

(par > 0 || bkt > 0) {

161 static bool

s_IsValidPrimerSequence (

string str

,

char

& bad_ch)

187

string::iterator sit =

str

.begin();

188  while

(sit !=

str

.end()) {

191  if

(pos2 == string::npos) {

195  string match

=

str

.substr(pos + 1, pos2 - pos - 1);

196  if

(find(list_begin, list_end,

match

) == list_end) {

200

sit += pos2 - pos + 1;

203  if

(*sit !=

'('

&& *sit !=

')'

&& *sit !=

','

&& *sit !=

':'

) {

209  if

(strchr (

"ABCDGHKMNRSTVWY"

, ch) ==

NULL

) {

265

countryname =

"USA: District of Columbia"

;

267

countryname =

"USA: Puerto Rico"

;

269

countryname =

"USA: Puerto Rico"

;

272

countryname = countryname.substr(5);

276

countryname = countryname.substr(7);

284

(

string

countryname,

296

PostObjErr(sev, errtype,

error

, obj,

ctx

);

412  bool

all_local_or_gnl =

true

;

413  for

(

auto

pId : bioseq.

GetId

()) {

414  switch

(pId->Which()) {

424

all_local_or_gnl =

false

;

427  return

all_local_or_gnl;

448  if

(pDesc->IsMolinfo()) {

449  const auto

& molinfo = pDesc->GetMolinfo();

462  if

(

ctx

->IsSeq()) {

463  return

&(

ctx

->GetSeq());

466  if

(

ctx

->IsSet() &&

467  ctx

->GetSet().IsSetClass() &&

469  const auto

& bioseq_set =

ctx

->GetSet();

470  if

(bioseq_set.IsSetSeq_set()) {

471  for

(

const auto

& pEntry : bioseq_set.GetSeq_set()) {

472  if

(pEntry->IsSeq()) {

473  const auto

& bioseq = pEntry->GetSeq();

474  if

(bioseq.IsSetInst() &&

475

bioseq.GetInst().IsNa()) {

493  if

(!

isdigit

(*it) && *it !=

' '

) {

503  if

(entry.

IsSeq

()) {

522  "No organism has been applied to this Bioseq. Other qualifiers may exist."

, obj,

ctx

);

529  bool

isInfluenzaOrSars2 =

false

;

530  bool

isMetagenome =

false

;

531  bool

hasChromosome =

false

;

532  bool

hasPlasmidName =

false

;

536  const string

& taxname = orgref.

GetTaxname

();

538  bool

is_env_sample =

false

;

542

is_env_sample =

true

;

546  if

(!is_env_sample) {

548  "Uncultured should also have /environmental_sample"

,

553  "Blank sample should not be associated with any sequences"

,

561

isInfluenzaOrSars2 =

true

;

563

}

else if

(

NStr::EqualNocase

(taxname,

"Severe acute respiratory syndrome coronavirus 2"

)) {

564

isInfluenzaOrSars2 =

true

;

566

isMetagenome =

true

;

570  if

(m_genomeSubmission && isMetagenome) {

572  "Metagenome is not a legal organism name"

,

580  "Transposon and insertion sequence are no longer legal locations"

,

584  if

(IsIndexerVersion()

588  "INDEXER_ONLY - BioSource location is chromosome"

,

592  bool

isViral =

false

, isAnimal =

false

, isPlant =

false

,

593

isBacteria =

false

, isArchaea =

false

, isFungal =

false

,

617  bool

chrom_conflict =

false

;

623  double

lat_value = 0.0, lon_value = 0.0;

624  bool

is_single_cell_amplification =

false

;

628

ValidateSubSource(**ssit, obj,

ctx

, isViral, isInfluenzaOrSars2);

629  if

(!(*ssit)->IsSetSubtype()) {

633  if

((*ssit)->IsSetName()) {

634  string str

= (*ssit)->GetName();

637  "Subsource name should not be "

+

str

,

648

countryname = (**ssit).GetName();

652  if

((*ssit)->IsSetName()) {

653

lat_lon = (*ssit)->GetName();

654  bool

format_correct =

false

, lat_in_range =

false

, lon_in_range =

false

, precision_correct =

false

;

656

lat_in_range, lon_in_range,

657

lat_value, lon_value);

664  if

((*ssit)->IsSetName()) {

665  val

= (*ssit)->GetName();

668  "'"

+

val

+

"' is an invalid altitude value, altitude should be provided in meters"

,

674

hasChromosome =

true

;

677

chrom_conflict =

true

;

680

chromosome = ssit->GetPointer();

685

linkage_group = ssit->GetPointer();

689  if

((*ssit)->IsSetName()) {

690

pcr_set_list.

AddFwdName

((*ssit)->GetName());

695  if

((*ssit)->IsSetName()) {

696

pcr_set_list.

AddRevName

((*ssit)->GetName());

701  if

((*ssit)->IsSetName()) {

702

pcr_set_list.

AddFwdSeq

((*ssit)->GetName());

707  if

((*ssit)->IsSetName()) {

708

pcr_set_list.

AddRevSeq

((*ssit)->GetName());

715  if

(IsGpipe() && IsGenomic()) {

718  if

(isAnimal || isPlant) {

720  const string str

= (*ssit)->GetName();

723  "Invalid value ("

+

str

+

") for /sex qualifier"

, obj,

ctx

);

725

}

else if

(isViral) {

727  "Virus has unexpected Sex qualifier"

, obj,

ctx

);

728

}

else if

(isBacteria || isArchaea || isFungal) {

730  "Unexpected use of /sex qualifier"

, obj,

ctx

);

732  const string str

= (*ssit)->GetName();

736  "Invalid value ("

+

str

+

") for /sex qualifier"

, obj,

ctx

);

743  if

(isAnimal || isPlant || isViral) {

745  "Unexpected use of /mating_type qualifier"

, obj,

ctx

);

749  "Unexpected use of /mating_type qualifier"

, obj,

ctx

);

754

hasPlasmidName =

true

;

757  "Plasmid subsource but not plasmid location"

, obj,

ctx

);

763  if

((*ssit)->IsSetName()) {

769  const string

&

subname

= ((*ssit)->GetName());

779  if

(genome_from_name != genome) {

782

val_name = val_name.substr(8);

785  "Plastid name subsource "

+ val_name +

" but not "

+ val_name +

" location"

, obj,

ctx

);

789  "Plastid name subsource contains unrecognized value"

, obj,

ctx

);

796  if

((*ssit)->IsSetName() && hasTaxname) {

807  "Tissue-type is inappropriate for bacteria"

, obj,

ctx

);

808

}

else if

(isViroid) {

810  "Viroid has unexpected tissue-type qualifier"

, obj,

ctx

);

815  if

((*ssit)->IsSetName()) {

816  const string

&

subname

= ((*ssit)->GetName());

818

is_single_cell_amplification =

true

;

822  string

num =

subname

.substr(0, pos);

824

is_single_cell_amplification =

true

;

841  "Virus has unexpected "

+

subname

+

" qualifier"

, obj,

ctx

);

845  if

(hasChromosome && hasPlasmidName) {

847  "Source should not have both chromosome and plasmid name fields"

,

854  bool

suppress =

false

;

857

it->IsSetName() &&

NStr::Equal

(it->GetName(),

"unlocalized"

)) {

865  if

(entry.

IsSeq

()) {

869  switch

(sid.

Which

()) {

881  if

(acc.length() == 8) {

896  string msg

=

"INDEXER_ONLY - source contains chromosome value '"

;

900  msg

+=

"' but the BioSource location is not set to chromosome"

;

909  bool

suppress =

false

;

912

it->IsSetName() &&

NStr::Equal

(it->GetName(),

"unlocalized"

)) {

920  if

(entry.

IsSeq

()) {

924  switch

(sid.

Which

()) {

936  if

(acc.length() == 8) {

951  string msg

=

"INDEXER_ONLY - source contains linkage_group value '"

;

955  msg

+=

"' but the BioSource location is not set to chromosome"

;

963  if

(it->second <= 1)

continue

;

965  string

qual =

"***"

;

968

qual = chrom_conflict ?

"conflicting chromosome"

:

"identical chromosome"

;

break

;

970

qual =

"germline"

;

break

;

972

qual =

"rearranged"

;

break

;

974

qual =

"plasmid_name"

;

break

;

976

qual =

"segment"

;

break

;

980  if

(use_geo_loc_name) {

981

qual =

"geo_loc_name"

;

988

qual =

"transgenic"

;

break

;

990

qual =

"environmental_sample"

;

break

;

992

qual =

"lat_lon"

;

break

;

994

qual =

"collection_date"

;

break

;

996

qual =

"collected_by"

;

break

;

998

qual =

"identified_by"

;

break

;

1000

qual =

"fwd_primer_seq"

;

break

;

1002

qual =

"rev_primer_seq"

;

break

;

1004

qual =

"fwd_primer_name"

;

break

;

1006

qual =

"rev_primer_name"

;

break

;

1008

qual =

"metagenomic"

;

break

;

1010

qual =

"altitude"

;

break

;

1020  "Germline and rearranged should not both be present"

, obj,

ctx

);

1024  "Transgenic and environmental sample should not both be present"

, obj,

ctx

);

1028  "Metagenomic should also have environmental sample annotated"

, obj,

ctx

);

1032  "Sex and mating type should not both be present"

, obj,

ctx

);

1036  if

(m_genomeSubmission) {

1040  "Plasmid location set but plasmid name missing. Add a plasmid source modifier with the plasmid name. Use unnamed if the name is not known."

,

1050  "PCR primer does not have both sequences"

, obj,

ctx

);

1053  bool

has_duplicate_primers =

false

;

1055

has_duplicate_primers =

true

;

1058

has_duplicate_primers =

true

;

1061  if

(has_duplicate_primers) {

1063  "PCR primer sequence has duplicates"

, obj,

ctx

);

1067

ValidateLatLonCountry(countryname, lat_lon, obj,

ctx

);

1074  if

(!IsSeqSubmitParent() && IsIndexerVersion()) {

1086  if

(IsEmbl() || IsDdbj()) {

1091  "No lineage for this BioSource."

, obj,

ctx

);

1096  const string

& lineage = orgname.

GetLineage

();

1098  if

(lineage.find(

"Kinetoplastida"

) == string::npos && lineage.find(

"Kinetoplastea"

) == string::npos) {

1100  "Only Kinetoplastida have kinetoplasts"

, obj,

ctx

);

1103  if

(lineage.find(

"Chlorarachniophyceae"

) == string::npos &&

1104

lineage.find(

"Cryptophyceae"

) == string::npos) {

1107  "Only Chlorarachniophyceae and Cryptophyceae have nucleomorphs"

, obj,

ctx

);

1110  if

(lineage.find(

"Ciliophora"

) == string::npos) {

1112  "Only Ciliophora have macronuclear locations"

, obj,

ctx

);

1117  const string

& div = orgname.

GetDiv

();

1129  "Bacterial or viral source should not have organelle location"

,

1134  "BioSource with ENV division is missing environmental sample subsource"

,

1141  "If metagenomes appears in lineage, BioSource should have metagenomic qualifier"

,

1148  bool

specific_host =

false

;

1152  if

(!it->IsSetSubtype()) {

1158

specific_host =

true

;

1178  "Virus has unexpected "

+

subname

+

" qualifier"

, obj,

ctx

);

1184  "Environmental sample should also have isolation source or specific host annotated"

,

1188

m_biosource_kind = bsrc;

1190  const CBioseq

* pBioseq=

nullptr

;

1191  const bool

checkForUndefinedSpecies = hasTaxname &&

1192

(IsGenomeSubmission() ||

1197

ValidateOrgRef(orgref, obj,

ctx

, checkForUndefinedSpecies, is_single_cell_amplification);

1206

(

const string

& primer_kind,

1211  if

(badch < ' ' || badch >

'~'

) {

1214  string msg

=

"PCR "

+ primer_kind +

" primer sequence format is incorrect, first bad character is '"

;

1224 const string

& primer_kind,

1230

x_ReportPCRSeqProblem(primer_kind, badch, obj,

ctx

);

1236  "PCR "

+ primer_kind +

" primer name appears to be a sequence"

,

1248  for

(

auto

it : pcrset.

Get

())

1250  if

(it->IsSetForward()) {

1251  for

(

auto

pit : it->GetForward().Get())

1253

x_CheckPCRPrimer(*pit,

"forward"

, obj,

ctx

);

1256  if

(it->IsSetReverse()) {

1257  for

(

auto

pit : it->GetReverse().Get())

1259

x_CheckPCRPrimer(*pit,

"reverse"

, obj,

ctx

);

1272 const bool

isInfluenzaOrSars2)

1276  "Unknown subsource subtype 0"

, obj,

ctx

);

1290  const auto

& fdata = feat->

GetData

();

1291  if

(fdata.IsBiosrc() && fdata.GetBiosrc().IsSetTaxname()) {

1306  string

countryname = subsrc.

GetName

();

1307  bool

is_miscapitalized =

false

;

1308  bool

is_null_and_virus =

false

;

1310  if

(

CCountries::IsValid

(countryname, is_miscapitalized, is_null_and_virus, isInfluenzaOrSars2)) {

1311  if

(is_miscapitalized) {

1312  if

(use_geo_loc_name) {

1314  "Bad geo_loc_name capitalization ["

+ countryname +

"]"

,

1318  "Bad country capitalization ["

+ countryname +

"]"

,

1322  if

(is_null_and_virus) {

1323  if

(use_geo_loc_name) {

1325  "Null geo_loc_name ["

+ countryname +

"] for influenza or Sars virus"

,

1329  "Null country ["

+ countryname +

"] for influenza or Sars virus"

,

1334  if

(use_geo_loc_name) {

1336  "Colon at end of geo_loc_name ["

+ countryname +

"]"

, obj,

ctx

);

1339  "Colon at end of country name ["

+ countryname +

"]"

, obj,

ctx

);

1343  if

(use_geo_loc_name) {

1345  "Replaced geo_loc_name ["

+ countryname +

"]"

, obj,

ctx

);

1348  "Replaced country name ["

+ countryname +

"]"

, obj,

ctx

);

1352  if

(countryname.empty()) {

1355  if

(use_geo_loc_name) {

1357  "Bad geo_loc_name ["

+ countryname +

"]"

, obj,

ctx

);

1360  "Bad country name ["

+ countryname +

"]"

, obj,

ctx

);

1368  bool

format_correct =

false

, lat_in_range =

false

, lon_in_range =

false

, precision_correct =

false

;

1369  double

lat_value = 0.0, lon_value = 0.0;

1370  string

lat_lon = subsrc.

GetName

();

1372

lat_in_range, lon_in_range,

1373

lat_value, lon_value);

1374  if

(!format_correct) {

1376  if

(pos != string::npos) {

1378  if

(format_correct) {

1380  "lat_lon format has extra text after correct dd.dd N|S ddd.dd E|W format"

,

1386  if

(!format_correct) {

1388  "lat_lon format is incorrect - should be dd.dd N|S ddd.dd E|W"

,

1391  if

(!lat_in_range) {

1393  "latitude value is out of range - should be between 90.00 N and 90.00 S"

,

1396  if

(!lon_in_range) {

1398  "longitude value is out of range - should be between 180.00 E and 180.00 W"

,

1401  if

(!precision_correct) {

1414  string

name = subsrc.

GetName

();

1416  if

(name.length() > 10

1419  "PCR primer name appears to be a sequence"

,

1427  string

name = subsrc.

GetName

();

1429  if

(name.length() > 10

1432  "PCR primer name appears to be a sequence"

,

1442

x_ReportPCRSeqProblem(

"forward"

, bad_ch, obj,

ctx

);

1451

x_ReportPCRSeqProblem(

"reverse"

, bad_ch, obj,

ctx

);

1459  "Transposon name and insertion sequence name are no " 1460  "longer legal qualifiers"

, obj,

ctx

);

1465  "Unknown subsource subtype 0"

, obj,

ctx

);

1469

ValidateSourceQualTags(subsrc.

GetName

(), obj,

ctx

);

1499  "Problematic plasmid/chromosome/linkage group name '"

+ sname +

"'"

,

1504  "Chromosome should not include contig or scaffold: '"

+ sname +

"'"

,

1510  "Problematic plasmid/chromosome/linkage group name '"

+ sname +

"'"

,

1517  "Problematic plasmid/chromosome/linkage group name '"

+ sname +

"'"

,

1529  "Non-viral source feature should not have a segment qualifier"

,

1552  const string

& frequency = subsrc.

GetName

();

1557  "bad frequency qualifier value "

+ frequency,

1560

string::const_iterator sit = frequency.begin();

1561  bool

bad_frequency =

false

;

1565  if

(sit != frequency.end() && *sit ==

'.'

) {

1567  if

(sit == frequency.end()) {

1568

bad_frequency =

true

;

1570  while

(sit != frequency.end() &&

isdigit

(*sit)) {

1573  if

(sit != frequency.end()) {

1574

bad_frequency =

true

;

1577

bad_frequency =

true

;

1579  if

(bad_frequency) {

1581  "bad frequency qualifier value "

+ frequency,

1590  "Collection_date format is not in DD-Mmm-YYYY format"

,

1593  bool

is_null_and_virus =

false

;

1598

}

else if

(isInfluenzaOrSars2) {

1600  "Null collection date ["

+ problem +

"] for influenza or Sars virus"

,

1620  subname

+

" qualifier should not have descriptive text"

,

1627  "Unbalanced parentheses in subsource '"

+

subname

+

"'"

,

1632  "subsource "

+

subname

+

" has SGML"

,

1646  size_t

value_len =

value

.length();

1647  while

(pos != string::npos

1648

&& (((pos != 0 &&

isalpha

(taxname.c_str()[pos - 1]))

1649

||

isalpha

(taxname.c_str()[pos + value_len])))) {

1652  if

(pos == string::npos) {

1678  if

(pos == string::npos) {

1682

}

else if

(pos > 0 &&

NStr::EqualNocase

(taxname.substr(0, pos),

"Salmonella"

)) {

1718 const bool

checkForUndefinedSpecies,

1719 const bool

is_single_cell_amplification)

1725  "No organism name included in the source. Other qualifiers may exist."

, obj,

ctx

);

1747  "Organism '"

+ taxname +

"' is undefined species and does not have a specific identifier."

,

1753  "Unbalanced parentheses in taxname '"

+ orgref.

GetTaxname

() +

"'"

, obj,

ctx

);

1757  "taxname "

+ taxname +

" has SGML"

,

1765

ValidateTaxNameOrgname(taxname, orgref.

GetOrgname

(), obj,

ctx

);

1771

ValidateDbxref(orgref.

GetDb

(), obj,

true

,

ctx

);

1774  bool

has_taxon =

false

;

1782  if

(! IsLocalGeneralOnly() || m_NotJustLocalOrGeneral) {

1785  if

(IsRequireTaxonID() &&

!has_taxon) {

1787  "BioSource is missing taxon ID"

, obj,

ctx

);

1794

ValidateOrgName(orgname, has_taxon, obj,

ctx

);

1797  string

taxname_search = taxname;

1799  size_t

pos =

NStr::Find

(taxname_search,

" "

);

1800  if

(pos == string::npos) {

1801

taxname_search.clear();

1803

taxname_search = taxname_search.substr(pos + 1);

1806  if

(pos == string::npos) {

1807

taxname_search.clear();

1809

taxname_search = taxname_search.substr(pos + 1);

1819  if

(!(*it)->IsSetSubtype() || !(*it)->IsSetSubname()) {

1823  const string

&

subname

= (*it)->GetSubname();

1825  if

(orgmod_name.length() > 0) {

1826

orgmod_name[0] =

toupper

(orgmod_name[0]);

1832  "Subspecies value specified is not found in taxname"

,

1838

orgmod_name +

" value specified is not found in taxname"

,

1845

orgmod_name +

" value specified is not found in taxname"

,

1851  "Specific host is identical to taxname"

,

1857  if

(s_IsSalmonellaGenus(taxname)) {

1859  "Salmonella organisms should use serovar instead of serotype."

,

1864  if

(s_IsSalmonellaGenus(taxname) &&

NStr::Find

(taxname,

subname

) == string::npos) {

1866  "Salmonella organism name should contain the serovar value."

,

1892  for

(

auto

it : hybrid) {

1893  if

(it->IsSetName() &&

s_MatchOrgname

(taxname, *it, mismatch)) {

1898  if

(!rval && hybrid.size() > 1 &&

1899

hybrid.front()->IsSetName()) {

1908  for

(

auto

it : partial) {

1909  if

(it->IsSetName()) {

1910

mismatch = it->GetName();

1917  if

(!rval && partial.size() > 1 &&

1918

partial.front()->IsSetName()) {

1920

mismatch = partial.front()->GetName();

1932

(

const string

& taxname,

1940  "Taxname does not match orgname ('"

+ taxname +

"', '"

+ mismatch +

"')"

,

1949 const bool

has_taxon,

1953  bool

is_viral =

false

;

1983  bool

has_strain =

false

;

1984  bool

has_isolate =

false

;

1985

vector<string> vouchers;

1988  const COrgMod

& omd = **omd_itr;

1995  "Orgmod name should not be "

+

str

,

2012  "Orgmod.strain should not start with subsp."

,

2016  "Orgmod.strain should not start with serovar"

,

2020  "Orgmod.strain should not be '"

+

str

+

"'"

,

2026  "Multiple strain qualifiers on the same BioSource"

, obj,

ctx

);

2036  "Orgmod.isolate should not be '"

+

str

+

"'"

,

2040  if

(has_isolate && check_multiple_isolates) {

2042  "Multiple isolate qualifiers on the same BioSource"

, obj,

ctx

);

2044

has_isolate =

true

;

2052  "Orgmod.serovar should not start with subsp."

,

2056  "Orgmod.serovar should not start with strain"

,

2067  "Orgmod.sub-species should not contain subsp."

,

2081  "Orgmod variety should only be in plants, fungi, or cyanobacteria"

,

2090  if

((*omd_itr)->IsSetSubname() && !

NStr::IsBlank

((*omd_itr)->GetSubname())) {

2091  const string

&

val

= (*omd_itr)->GetSubname();

2096  if

((*it2)->IsSetSubtype()

2098

&& (*it2)->IsSetSubname()

2101  "OrgMod synonym is identical to OrgMod gb_synonym"

,

2110

ValidateOrgModVoucher(omd, obj,

ctx

);

2115  if

(!(*omd_itr)->IsSetSubname() ||

2118  "Bad value for type_material"

, obj,

ctx

);

2130  "Unbalanced parentheses in orgmod '"

+

subname

+

"'"

,

2135  "orgmod "

+

subname

+

" has SGML"

,

2140  if

(m_genomeSubmission && has_strain && has_isolate) {

2142  "Organism has both strain: '"

+ strain +

"' and isolate: '"

+ isolate +

"'"

,

2153  if

(strain.length() < 1) {

2158  "Orgmod.strain should not be species '"

+ species +

"'"

,

2163  "Orgmod.strain should not be subspecies '"

+ sub_species +

"'"

,

2168  "Orgmod.strain should not be serovar '"

+ serovar +

"'"

,

2171  if

(

NStr::FindNoCase

(strain, genus +

" "

+ species) != string::npos && genus.length() > 0 && species.length() > 0) {

2173  "Orgmod.strain should not contain '"

+ genus +

" "

+ species +

"'"

,

2199  if

(!

source

.IsSetGenome()

2202  bool

is_viral =

false

;

2203  if

(

source

.IsSetOrg()) {

2225  if

(

source

.IsSetLineage()) {

2226  string

lineage =

source

.GetLineage();

2239  if

(

source

.IsSetLineage()) {

2240  string

lineage =

source

.GetLineage();

2253  while

(d && !rval) {

2254  const auto

& user = d->

GetUser

();

2255  if

(user.IsSetType() && user.GetType().IsStr() &&

NStr::Equal

(user.GetType().GetStr(),

"DBLink"

)) {

2256  for

(

auto f

: user.GetData()) {

2257  if

(

f

->IsSetLabel() &&

f

->GetLabel().IsStr() &&

NStr::Equal

(

f

->GetLabel().GetStr(),

"BioSample"

)

2258

&&

f

->IsSetData() && (

f

->GetData().IsStr() ||

f

->GetData().IsStrs())) {

2276

m_biosource_kind =

source

;

2278  const auto

& inst = bsh.

GetInst

();

2280  if

(

source

.IsSetIs_focus()) {

2282  if

(!bsh.

IsAa

() &&

2288  "BioSource descriptor has focus, " 2289  "but no BioSource feature"

, obj,

ctx

);

2293  if

(

source

.CanGetOrigin() &&

2295  if

(!IsOtherDNA(bsh) && !bsh.

IsAa

()) {

2297  "Molinfo-biomol other should be used if " 2298  "Biosource-location is synthetic"

, obj,

ctx

);

2304

&&

source

.IsSetOrg() &&

source

.GetOrg().IsSetTaxname()

2312  "HIV with moltype DNA should be proviral"

,

2320  "HIV with mRNA molecule type is rare"

,

2333

sequence::CDeflineGenerator defline_generator;

2334

title = defline_generator.GenerateDefline(bsh, sequence::CDeflineGenerator::fIgnoreExisting);

2337  bool

isViral =

false

;

2338  if

(

source

.IsSetLineage()) {

2339  string

lineage =

source

.GetLineage();

2350

&&

NStr::Find

(title,

"complete genome"

) != string::npos

2353  "Non-viral complete genome not labeled as chromosome"

,

2360  bool

is_synthetic_construct = IsSyntheticConstruct(

source

);

2361  bool

is_artificial = IsArtificial(

source

);

2363  if

(is_synthetic_construct) {

2367  "synthetic construct should have other-genetic"

,

2370  if

(!is_artificial) {

2372  "synthetic construct should have artificial origin"

,

2375

}

else if

(is_artificial) {

2377  "artificial origin should have other-genetic and synthetic construct"

,

2380  if

(is_artificial) {

2385  "artificial origin should have other-genetic"

,

2396  if

(!(*it)->IsSetSubtype()) {

2404  if

(mi && (*it)->IsSetName() &&

NStr::EqualNocase

((*it)->GetName(),

"cRNA"

)) {

2409  "cRNA note conflicts with molecule type"

,

2413  "cRNA note redundant with molecule type"

,

2425  if

(

source

.IsSetOrg()) {

2434  if

(it->IsSetSubtype()

2436

&& it->IsSetSubname()

2441  "cRNA note conflicts with molecule type"

,

2446  "cRNA note redundant with molecule type"

,

2462  "Genomic DNA viral lineage indicates no DNA stage"

,

2470  if

( (IsGpipe() || IsIndexerVersion() ) &&

s_IsBioSample

(bsh) ) {

2473  if

( is_bact || is_arch ) {

2474  bool

has_strain =

false

;

2475  bool

has_isolate =

false

;

2476  bool

env_sample =

false

;

2477  if

(

source

.IsSetSubtype()) {

2486  if

(!env_sample &&

source

.IsSetOrg()

2487

&&

source

.GetOrg().IsSetOrgname()) {

2488  const auto

& orgname =

source

.GetOrg().GetOrgname();

2489  if

(orgname.IsSetMod()) {

2490  for

(

auto om

: orgname.GetMod()) {

2491  if

(

om

->IsSetSubtype()) {

2493

has_isolate =

true

;

2506  if

(!has_strain && !has_isolate && !env_sample) {

2509  "Bacteria should have strain or isolate or environmental sample"

,

2511

}

else if

(is_arch) {

2513  "Archaea should have strain or isolate or environmental sample"

,

2557  "collection_date:"

,

2561  "culture_collection:"

,

2565  "endogenous_virus_name:"

,

2566  "environmental_sample:"

,

2568  "forma_specialis:"

,

2570  "fwd_pcr_primer_name"

,

2571  "fwd_pcr_primer_seq"

,

2580  "insertion_seq_name:"

,

2582  "isolation_source:"

,

2589  "metagenome_source:"

,

2599  "rev_pcr_primer_name"

,

2600  "rev_pcr_primer_seq"

,

2609  "specimen_voucher:"

,

2622  "transposon_name:"

,

2631  static

std::mutex m;

2633

std::lock_guard

g

(m);

2649

(

const string

&

str

,

2655  size_t

str_len =

str

.length();

2659  for

(

size_t i

= 0;

i

< str_len; ++

i

) {

2663  if

(

match

.empty()) {

2666  size_t

match_len =

match

.length();

2669  if

((

int

)(

i

- match_len) >= 0) {

2670  char

ch =

str

[

i

- match_len];

2671  if

(!

isspace

((

unsigned char

)ch) && ch !=

';'

) {

2679  if

(pos != string::npos) {

2680  if

(pos == 0 ||

isspace

((

unsigned char

)

str

[pos]) ||

str

[pos] ==

';'

) {

2692  "Source note has structured tag '"

+

match

+

"'"

, obj,

ctx

);

2703

user_object.

GetType

().

GetStr

() !=

"StructuredComment"

) {

2726

user_object.

GetType

().

GetStr

() !=

"StructuredComment"

) {

2759

usr_descs.push_back(desc);

2762

desc_ctxs.push_back(r_se);

2768  for

(

auto

annot_it : se.

GetAnnot

()) {

2769  if

(annot_it->IsFtable()) {

2770  for

(

auto

feat_it : annot_it->GetData().GetFtable()) {

2771  if

(feat_it->IsSetData() && feat_it->GetData().IsUser()

2774

feat.

Reset

(feat_it);

2775

usr_feats.push_back(feat);

2786

GatherTentativeName(**it, usr_descs, desc_ctxs, usr_feats);

2797  if

(org_rq_list.size() > 0) {

2801  while

(

i

< org_rq_list.size()) {

2803

vector< CRef<COrg_ref> > tmp_rq(org_rq_list.begin() +

i

, org_rq_list.begin() +

i

+

len

);

2805  if

(!reply || !reply->IsSetReply()) {

2826  if

(org_rq_list.size() == 0) {

2832  while

(

i

< org_rq_list.size()) {

2834

vector< CRef<COrg_ref> > tmp_rq(org_rq_list.begin() +

i

, org_rq_list.begin() +

i

+

len

);

2837  if

(tmp_spec_host_reply) {

2840

err_msg =

"Connection to taxonomy failed"

;

2858  if

(org_rq_list.size() == 0) {

2864  while

(

i

< org_rq_list.size()) {

2866

vector< CRef<COrg_ref> > tmp_rq(org_rq_list.begin() +

i

, org_rq_list.begin() +

i

+

len

);

2882  auto

pTval = x_CreateTaxValidator();

2884

ValidateSpecificHost(*pTval);

2890  const string

err_str =

error

.IsSetMessage() ?

error

.GetMessage() :

"?"

;

2892  if

(

NStr::Equal

(err_str,

"Organism not found"

)) {

2902

vector<CConstRef<CSeqdesc> > src_descs;

2903

vector<CConstRef<CSeq_entry> > desc_ctxs;

2904

vector<CConstRef<CSeq_feat> > src_feats;

2906

GatherTentativeName(se, src_descs, desc_ctxs, src_feats);

2909

vector< CRef<COrg_ref> > org_rq_list;

2912

vector<CConstRef<CSeqdesc> >::iterator desc_it = src_descs.begin();

2913

vector<CConstRef<CSeq_entry> >::iterator ctx_it = desc_ctxs.begin();

2914  while

(desc_it != src_descs.end() && ctx_it != desc_ctxs.end()) {

2918

org_rq_list.push_back(rq);

2925

vector<CConstRef<CSeq_feat> >::iterator feat_it = src_feats.begin();

2926  while

(feat_it != src_feats.end()) {

2930

org_rq_list.push_back(rq);

2935  if

(org_rq_list.empty()) {

2940  if

(!reply || !reply->IsSetReply()) {

2942  "Taxonomy service connection failure"

, se);

2945  const auto

& rlist = reply->GetReply();

2946

CTaxon3_reply::TReply::const_iterator reply_it = rlist.begin();

2949

desc_it = src_descs.begin();

2950

ctx_it = desc_ctxs.begin();

2953  while

(reply_it != rlist.end()

2954

&& desc_it != src_descs.end()

2955

&& ctx_it != desc_ctxs.end()) {

2956  if

((*reply_it)->IsError()) {

2959  "Taxonomy lookup failed for Tentative Name '"

+ org_rq_list[pos]->GetTaxname() +

"'"

,

2960

**desc_it, *ctx_it);

2962

HandleTaxonomyError((*reply_it)->GetError(),

2973

feat_it = src_feats.begin();

2974  while

(reply_it != rlist.end()

2975

&& feat_it != src_feats.end()) {

2976  if

((*reply_it)->IsError()) {

2979  "Taxonomy lookup failed for Tentative Name '"

+ org_rq_list[pos]->GetTaxname() +

"'"

,

2982

HandleTaxonomyError((*reply_it)->GetError(),

2995  const string

err_str =

error

.IsSetMessage() ?

error

.GetMessage() :

"?"

;

2997  if

(

NStr::Equal

(err_str,

"Organism not found"

)) {

2998  string msg

=

"Organism not found in taxonomy database"

;

2999  if

(

error

.IsSetOrg()) {

3000  const auto

& e_org =

error

.GetOrg();

3002  if

(e_org.IsSetTaxname() &&

3003

!

NStr::Equal

(e_org.GetTaxname(),

"Not valid"

) &&

3004

(!d_org.IsSetTaxname() ||

3005

!

NStr::Equal

(d_org.GetTaxname(), e_org.GetTaxname()))) {

3006  msg

+=

" (suggested:"

+ e_org.GetTaxname() +

")"

;

3018  "Taxonomy lookup failed with message '"

+ err_str +

"'"

,

3022  "Taxonomy lookup failed with message '"

+ err_str +

"'"

,

3030  const string

err_str =

error

.IsSetMessage() ?

error

.GetMessage() :

"?"

;

3038  "Taxonomy lookup failed with message '"

+ err_str +

"'"

,

3042  "Taxonomy lookup failed with message '"

+ err_str +

"'"

,

3048  const string

& host,

const COrg_ref

& org)

3050  const string

err_str =

error

.IsSetMessage() ?

error

.GetMessage() :

"?"

;

3052  if

(

NStr::Equal

(err_str,

"Organism not found"

)) {

3054  "Organism not found in taxonomy database"

,

3058  "Specific host value is ambiguous: "

+ host, org);

3065  "Invalid value for specific host: "

+ host, org);

3092  if

(use_new_strain_validation) {

3097  string

fromEnv =

env

.Get(

"NCBI_NEW_STRAIN_VALIDATION"

);

3099  if

(fromEnv ==

"true"

) {

3101

}

else if

(fromEnv ==

"false"

) {

3131  auto

pTval = x_CreateTaxValidator();

3135

ValidateOrgRefs(*pTval);

3138

ValidateSpecificHost(*pTval);

3145  if

(request.size() < 1) {

3146

return CRef<CTaxon3_reply>();

3150  if

(m_NewStrainValidation) {

3151

cerr <<

"CStrainRequest::ExploreStrainsForTaxonInfo TaxonReply:"

<< endl <<

MSerial_AsnText

<< reply << endl;

3158

ValidateStrain(*pTval, pTval->m_descTaxID);

3161

ValidateTentativeName(se);

3167  auto

pTval = x_CreateTaxValidator();

3168

pTval->CheckOneOrg(org, genome, *

this

);

3199  unsigned int

pcr_num = 0;

3201

name = name.substr(1, name.length() - 2);

3202

vector<string> mult_names;

3204  unsigned int

name_num = 0;

3205  while

(name_num < mult_names.size()) {

3212  m_SetList

[pcr_num]->SetFwdName(mult_names[name_num]);

3230  unsigned int

pcr_num = 0;

3232

name = name.substr(1, name.length() - 2);

3233

vector<string> mult_names;

3235  unsigned int

name_num = 0;

3236  while

(name_num < mult_names.size()) {

3243  m_SetList

[pcr_num]->SetRevName(mult_names[name_num]);

3261  unsigned int

pcr_num = 0;

3263

name = name.substr(1, name.length() - 2);

3264

vector<string> mult_names;

3266  unsigned int

name_num = 0;

3267  while

(name_num < mult_names.size()) {

3274  m_SetList

[pcr_num]->SetFwdSeq(mult_names[name_num]);

3292  unsigned int

pcr_num = 0;

3294

name = name.substr(1, name.length() - 2);

3295

vector<string> mult_names;

3297  unsigned int

name_num = 0;

3298  while

(name_num < mult_names.size()) {

3305  m_SetList

[pcr_num]->SetRevSeq(mult_names[name_num]);

3330

}

else if

(compare > 0) {

3334

}

else if

(compare > 0) {

3338

}

else if

(compare > 0) {

3390

}

else if

(compare > 0) {

3414

}

else if

(s1.

Get

().size() < s2.

Get

().size()) {

3416

}

else if

(s1.

Get

().size() > s2.

Get

().size()) {

3419  auto

it1 = s1.

Get

().begin();

3420  auto

it2 = s2.

Get

().begin();

3421  while

(it1 != s1.

Get

().end()) {

3472  template

<

typename

T>

3486  if

(!primers.

IsSet

() || primers.

Get

().size() < 2) {

3490  for

(

auto

it : primers.

Get

()) {

3491  if

(already_seen.

find

(it) != already_seen.

end

()) {

3494

already_seen.

insert

(it);

3529

vector<string> error_list;

3531  ITERATE

(vector<string>, err, error_list) {

3534

}

else if

(

NStr::FindNoCase

(*err,

"should be structured"

) != string::npos) {

3536

}

else if

(

NStr::FindNoCase

(*err,

"missing institution code"

) != string::npos) {

3538

}

else if

(

NStr::FindNoCase

(*err,

"missing specific identifier"

) != string::npos) {

3549

}

else if

(

NStr::FindNoCase

(*err,

"should not be qualified with a <COUNTRY> designation"

) != string::npos) {

3550  if

(use_geo_loc_name) {

3555

}

else if

(

NStr::FindNoCase

(*err,

"needs to be qualified with a <COUNTRY> designation"

) != string::npos) {

3557

}

else if

(

NStr::FindNoCase

(*err,

" exists, but collection "

) != string::npos) {

3571  return

m_taxon->SendOrgRefList(orgRefs);

3573  return

make_unique<CTaxValidationAndCleanup>(taxFunc);

3577  return

make_unique<CTaxValidationAndCleanup>(

m_pContext

->m_taxon_update);

const char * sm_ValidModifiedPrimerBases[]

@ eErr_SEQ_DESCR_BadPlastidName

@ eErr_SEQ_DESCR_ObsoleteSourceQual

@ eErr_SEQ_DESCR_MissingEnvironmentalSample

@ eErr_SEQ_DESCR_ObsoleteSourceLocation

@ eErr_SEQ_DESCR_MissingPlasmidLocation

@ eErr_SEQ_DESCR_BadNullCountry

@ eErr_SEQ_DESCR_InvalidTissueType

@ eErr_SEQ_DESCR_TaxonomyServiceProblem

@ eErr_SEQ_DESCR_TaxonomyBlankSample

@ eErr_SEQ_DESCR_MissingPersonalCollectionName

@ eErr_SEQ_DESCR_LatLonRange

@ eErr_SEQ_DESCR_DuplicatePCRPrimerSequence

@ eErr_GENERIC_SgmlPresentInText

@ eErr_SEQ_DESCR_UnstructuredVoucher

@ eErr_SEQ_DESCR_TaxonomyIsMetagenome

@ eErr_SEQ_DESCR_BadVariety

@ eErr_SEQ_DESCR_BadInstitutionGeoLocName

@ eErr_SEQ_DESCR_BadTypeMaterial

@ eErr_SEQ_DESCR_OrgModMissingValue

@ eErr_SEQ_DESCR_NoOrgFound

@ eErr_SEQ_DESCR_BadPCRPrimerSequence

@ eErr_SEQ_DESCR_UnnecessaryBioSourceFocus

@ eErr_SEQ_DESCR_InvalidForType

@ eErr_SEQ_DESCR_LatLonValue

@ eErr_SEQ_DESCR_OrganismIsUndefinedSpecies

@ eErr_SEQ_DESCR_IdenticalInstitutionCode

@ eErr_SEQ_DESCR_BacteriaMissingSourceQualifier

@ eErr_SEQ_DESCR_BadCountryCapitalization

@ eErr_SEQ_DESCR_BadCollectionDate

@ eErr_SEQ_DESCR_BadContigOrScaffoldChromosome

@ eErr_SEQ_DESCR_BadInstitutionCode

@ eErr_SEQ_DESCR_BadAltitude

@ eErr_SEQ_DESCR_IncorrectlyFormattedVoucherID

@ eErr_SEQ_DESCR_StrainWithEnvironSample

@ eErr_SEQ_DESCR_BadNullCollectionDate

@ eErr_SEQ_DESCR_OrganismNotFound

@ eErr_SEQ_DESCR_InconsistentVirusMoltype

@ eErr_SEQ_DESCR_BadInstitutionCountry

@ eErr_SEQ_DESCR_MissingPlasmidName

@ eErr_SEQ_DESCR_UnculturedNeedsEnvSample

@ eErr_SEQ_DESCR_BadTentativeName

@ eErr_SEQ_DESCR_BadPlasmidChromosomeLinkageName

@ eErr_SEQ_DESCR_BadTextInSourceQualifier

@ eErr_SEQ_DESCR_SuspectedContaminatedCellLine

@ eErr_SEQ_DESCR_AmbiguousSpecificHost

@ eErr_SEQ_DESCR_BadGeoLocNameCapitalization

@ eErr_SEQ_DESCR_ChromosomeWithoutLocation

@ eErr_SEQ_DESCR_StructuredSourceNote

@ eErr_SEQ_DESCR_InvalidMatingType

@ eErr_SEQ_DESCR_BadSubSource

@ eErr_SEQ_DESCR_MultipleStrains

@ eErr_SEQ_DESCR_BadGeoLocNameCode

@ eErr_SEQ_DESCR_InvalidSexQualifier

@ eErr_SEQ_DESCR_TaxonomyAmbiguousName

@ eErr_SEQ_DESCR_MultipleSourceQualifiers

@ eErr_SEQ_DESCR_WrongVoucherType

@ eErr_SEQ_DESCR_HasStrainAndIsolate

@ eErr_SEQ_DESCR_BadCollectionCode

@ eErr_SEQ_DESCR_SyntheticConstructWrongMolType

@ eErr_SEQ_DESCR_TaxonomyLookupProblem

@ eErr_SEQ_DESCR_BadNullGeoLocName

@ eErr_SEQ_DESCR_NoTaxonID

@ eErr_SEQ_DESCR_LatLonFormat

@ eErr_SEQ_DESCR_MissingLineage

@ eErr_SEQ_DESCR_BadOrgMod

@ eErr_SEQ_DESCR_BadSpecificHost

@ eErr_SEQ_DESCR_BadPCRPrimerName

@ eErr_SEQ_DESCR_OrgModValueInvalid

@ eErr_SEQ_DESCR_BadOrganelleLocation

@ eErr_SEQ_DESCR_EnvironSampleMissingQualifier

@ eErr_SEQ_DESCR_MultipleIsolates

@ eErr_SEQ_DESCR_BadCountryCode

@ eErr_SEQ_DESCR_ChromosomeLocation

@ eErr_SEQ_DESCR_BioSourceNeedsChromosome

@ eErr_SEQ_DESCR_BioSourceInconsistency

@ eErr_SEQ_DESCR_HostIdenticalToOrganism

@ eErr_SEQ_DESCR_BadBioSourceFrequencyValue

@ eErr_SEQ_DESCR_ReplacedCountryCode

@ eErr_SEQ_DESCR_ReplacedGeoLocNameCode

@ eErr_SEQ_DESCR_UnbalancedParentheses

@ eErr_SEQ_DESCR_MissingMetagenomicQualifier

@ eErr_SEQ_DESCR_SyntheticConstructNeedsArtificial

@ eErr_SEQ_DESCR_NonViralSegment

bool IsOrganismArchaea() const

bool IsOrganismEukaryote() const

bool IsOrganismBacteria() const

CBioSourceKind & operator=(const CBioSource &bsrc)

const string & GetLineage(void) const

static string GetOrganelleByGenome(unsigned int genome)

bool IsSetLineage(void) const

static CBioSource::EGenome GetGenomeByOrganelle(const string &organelle, NStr::ECase use_case=NStr::eCase, bool starts_with=false)

bool IsSetTaxname(void) const

static bool WasValid(const string &country)

static bool IsValid(const string &country)

static CNcbiApplication * Instance(void)

Singleton method.

@OrgMod.hpp User-defined methods of the data storage class.

static bool NCBI_ValidateForMultipleIsolates(void)

static string IsCultureCollectionValid(const string &culture_collection)

static bool IsStrainValid(const string &strain)

static bool IsValidTypeMaterial(const string &type_material)

static string IsBiomaterialValid(const string &biomaterial)

static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)

static string CheckMultipleVouchers(const vector< string > &)

static bool IsIsolateValid(const string &isolate)

static string IsSpecimenVoucherValid(const string &specimen_voucher)

bool GetFlatName(string &name_out, string *lineage=0) const

const string & GetLineage(void) const

const string & GetDivision(void) const

bool IsSetDivision(void) const

bool IsVarietyValid(const string &variety) const

bool IsSetOrgMod(void) const

bool IsSubspeciesValid(const string &subspecies) const

bool IsSetLineage(void) const

static bool IsValid(const string &seq, char &bad_ch)

void AddFwdName(string name)

vector< CPCRSet * > m_SetList

void AddRevName(string name)

void AddFwdSeq(string name)

void AddRevSeq(string name)

size_t GetOrigPos() const

string GetRevName() const

string GetFwdName() const

const TAnnot & GetAnnot(void) const

const CSeq_descr & GetDescr(void) const

bool IsSetAnnot(void) const

bool IsSetDescr(void) const

namespace ncbi::objects::

Base class for all serializable objects.

static void ExploreStrainsForTaxonInfo(CTaxValidationAndCleanup &tval, CValidError_imp &imp, const CSeq_entry &se, std::function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref >> &)> taxoncallback)

static bool NCBI_UseGeoLocNameForCountry(void)

static string GetCollectionDateProblem(const string &date_string)

static bool IsPlasmidNameValid(const string &value, const string &taxname)

static bool IsValidSexQualifierValue(const string &value)

static bool IsMultipleValuesAllowed(TSubtype)

static bool IsAltitudeValid(const string &value)

static string ValidateLatLonCountry(const string &countryname, string &lat_lon, bool check_state, ELatLonCountryErr &errcode)

static string CheckCellLine(const string &cell_line, const string &organism)

static string GetSubtypeName(CSubSource::TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)

static bool NeedsNoText(const TSubtype &subtype)

static bool IsEndogenousVirusNameValid(const string &value)

static bool IsChromosomeNameValid(const string &value, const string &taxname)

static void IsCorrectLatLonFormat(string lat_lon, bool &format_correct, bool &precision_correct, bool &lat_in_range, bool &lon_in_range, double &lat_value, double &lon_value)

static bool IsSegmentValid(const string &value)

static bool IsLinkageGroupNameValid(const string &value, const string &taxname)

vector< CRef< COrg_ref > > GetTaxonomyLookupRequest() const

void ReportSpecificHostErrors(const CTaxon3_reply &reply, CValidError_imp &imp)

void ReportIncrementalTaxLookupErrors(const CTaxon3_reply &reply, CValidError_imp &imp, bool is_insd_patent, size_t offset) const

vector< CRef< COrg_ref > > GetStrainLookupRequest()

string IncrementalSpecificHostMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply)

CConstRef< CSeq_entry > GetTopReportObject() const

string IncrementalStrainMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply, TTaxId descTaxID=ZERO_TAX_ID)

void ReportStrainErrors(CValidError_imp &imp)

vector< CRef< COrg_ref > > GetSpecificHostLookupRequest(bool for_fix)

static bool IsWGS(const CBioseq &seq)

void ValidateTaxNameOrgname(const string &taxname, const COrgName &orgname, const CSerialObject &obj, const CSeq_entry *ctx)

bool IsSyntheticConstruct(const CBioSource &src)

void HandleTaxonomyError(const CT3Error &error, const string &host, const COrg_ref &orf)

void ValidateSubSource(const CSubSource &subsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr, const bool isViral=false, const bool isInfluenzaOrSars2=false)

void ValidateLatLonCountry(string countryname, string lat_lon, const CSerialObject &obj, const CSeq_entry *ctx)

void GatherTentativeName(const CSeq_entry &se, vector< CConstRef< CSeqdesc > > &usr_descs, vector< CConstRef< CSeq_entry > > &desc_ctxs, vector< CConstRef< CSeq_feat > > &usr_feats)

void ValidateOrgName(const COrgName &orgname, const bool has_taxon, const CSerialObject &obj, const CSeq_entry *ctx)

void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)

void InitializeSourceQualTags()

void ValidateTaxonomy(const CSeq_entry &se)

void ValidateOrgRef(const COrg_ref &orgref, const CSerialObject &obj, const CSeq_entry *ctx, const bool checkForUndefinedSpecies=false, const bool is_single_cell_amplification=false)

static bool s_IsSalmonellaGenus(const string &taxname)

unique_ptr< CTaxValidationAndCleanup > x_CreateTaxValidator() const

void x_ReportPCRSeqProblem(const string &primer_kind, char badch, const CSerialObject &obj, const CSeq_entry *ctx)

void x_CheckPCRPrimer(const CPCRPrimer &primer, const string &primer_kind, const CSerialObject &obj, const CSeq_entry *ctx)

void ValidateSourceQualTags(const string &str, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)

void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)

bool IsArtificial(const CBioSource &src)

void ValidatePCRReactionSet(const CPCRReactionSet &pcrset, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)

void ValidateBioSourceForSeq(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx, const CBioseq_Handle &bsh)

void ValidateOrgRefs(CTaxValidationAndCleanup &tval)

bool IsOtherDNA(const CBioseq_Handle &bsh) const

void ValidateSpecificHost(CTaxValidationAndCleanup &tval)

void ValidateStrain(CTaxValidationAndCleanup &tval, TTaxId descTaxID=ZERO_TAX_ID)

shared_ptr< SValidatorContext > m_pContext

void ValidateOrgModVoucher(const COrgMod &orgmod, const CSerialObject &obj, const CSeq_entry *ctx)

void ValidateTentativeName(const CSeq_entry &se)

bool IsTransgenic(const CBioSource &bsrc)

EDiagSev x_SalmonellaErrorLevel()

static EErrType ConvertCode(CSubSource::ELatLonCountryErr errcode)

iterator_bool insert(const value_type &val)

const_iterator find(const key_type &key) const

const_iterator end() const

static const int chunk_size

Include a standard set of the NCBI C++ Toolkit most basic headers.

The NCBI C++ standard methods for dealing with std::string.

static const char * str(char *buf, int n)

#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)

FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.

const CNcbiEnvironment & GetEnvironment(void) const

Get the application's cached environment.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

SStrictId_Tax::TId TTaxId

Taxon id type.

EDiagSev

Severity level for the posted diagnostics.

@ eDiag_Info

Informational message.

@ eDiag_Error

Error message.

@ eDiag_Warning

Warning message.

@ eDiag_Fatal

Fatal error – guarantees exit(or abort)

@ eDiag_Critical

Critical error message.

const TPrim & Get(void) const

virtual const CTypeInfo * GetThisTypeInfo(void) const =0

#define MSerial_AsnText

I/O stream manipulators –.

const CTextseq_id * GetTextseq_Id(void) const

Return embedded CTextseq_id, if any.

CConstRef< CBioseq > GetCompleteBioseq(void) const

Get the complete bioseq.

bool IsSetInst(void) const

const TInst & GetInst(void) const

void Reset(void)

Reset reference object.

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive compare of a substring with another string.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)

Find the pattern in the specified range of a string using a case insensitive search.

static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)

Check if a string ends with a specified suffix value.

static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)

Check if a string is blank (has no text).

static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string (in-place)

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-sensitive equality of a substring with another string.

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive equality of a substring with another string.

static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)

Test for equality of a substring with another string.

static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

static string & ToLower(string &str)

Convert string to lower case – string& version.

@ eNocase

Case insensitive compare.

@ eCase

Case sensitive compare.

const Tdata & Get(void) const

Get the member data.

const TSubtype & GetSubtype(void) const

Get the Subtype member data.

const TPcr_primers & GetPcr_primers(void) const

Get the Pcr_primers member data.

TGenome GetGenome(void) const

Get the Genome member data.

TOrigin GetOrigin(void) const

Get the Origin member data.

bool IsSetSeq(void) const

Check if a value has been assigned to Seq data member.

const Tdata & Get(void) const

Get the member data.

bool CanGetSubtype(void) const

Check if it is safe to call GetSubtype method.

bool IsSetName(void) const

Check if a value has been assigned to Name data member.

bool IsSetOrg(void) const

Check if a value has been assigned to Org data member.

bool CanGetOrg(void) const

Check if it is safe to call GetOrg method.

list< CRef< CSubSource > > TSubtype

bool IsSetPcr_primers(void) const

Check if a value has been assigned to Pcr_primers data member.

const TForward & GetForward(void) const

Get the Forward member data.

const TOrg & GetOrg(void) const

Get the Org member data.

bool IsSetOrigin(void) const

Check if a value has been assigned to Origin data member.

TSubtype GetSubtype(void) const

Get the Subtype member data.

bool IsSetGenome(void) const

Check if a value has been assigned to Genome data member.

bool IsSetSubtype(void) const

Check if a value has been assigned to Subtype data member.

const TSeq & GetSeq(void) const

Get the Seq member data.

bool IsSetReverse(void) const

Check if a value has been assigned to Reverse data member.

const TName & GetName(void) const

Get the Name member data.

const TReverse & GetReverse(void) const

Get the Reverse member data.

const TName & GetName(void) const

Get the Name member data.

bool IsSetForward(void) const

Check if a value has been assigned to Forward data member.

EGenome

biological context

bool IsSet(void) const

Check if a value has been assigned to data member.

bool IsSet(void) const

Check if a value has been assigned to data member.

bool IsSetName(void) const

Check if a value has been assigned to Name data member.

@ eSubtype_collection_date

DD-MMM-YYYY format.

@ eSubtype_insertion_seq_name

@ eSubtype_transposon_name

@ eSubtype_fwd_primer_seq

sequence (possibly more than one; semicolon-separated)

@ eSubtype_lat_lon

+/- decimal degrees

@ eSubtype_rev_primer_name

@ eSubtype_collected_by

name of person who collected the sample

@ eSubtype_fwd_primer_name

@ eSubtype_rev_primer_seq

sequence (possibly more than one; semicolon-separated)

@ eSubtype_isolation_source

@ eSubtype_environmental_sample

@ eSubtype_endogenous_virus_name

@ eSubtype_identified_by

name of person who identified the sample

@ eOrigin_synthetic

purely synthetic

@ eOrigin_artificial

artificially engineered

const TData & GetData(void) const

Get the Data member data.

const TStr & GetStr(void) const

Get the variant data.

const TLabel & GetLabel(void) const

Get the Label member data.

const TType & GetType(void) const

Get the Type member data.

const TMod & GetMod(void) const

Get the Mod member data.

bool IsSetDb(void) const

ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.

const TLineage & GetLineage(void) const

Get the Lineage member data.

TSubtype GetSubtype(void) const

Get the Subtype member data.

const TDiv & GetDiv(void) const

Get the Div member data.

E_Choice Which(void) const

Which variant is currently selected.

bool IsSetSubtype(void) const

Check if a value has been assigned to Subtype data member.

const TSubname & GetSubname(void) const

Get the Subname member data.

const THybrid & GetHybrid(void) const

Get the variant data.

bool IsSetCommon(void) const

common name Check if a value has been assigned to Common data member.

bool IsSetLineage(void) const

lineage with semicolon separators Check if a value has been assigned to Lineage data member.

const TName & GetName(void) const

Get the Name member data.

const TTaxname & GetTaxname(void) const

Get the Taxname member data.

const TCommon & GetCommon(void) const

Get the Common member data.

const TBinomial & GetBinomial(void) const

Get the variant data.

const TDb & GetDb(void) const

Get the Db member data.

bool IsSetDiv(void) const

GenBank division code Check if a value has been assigned to Div data member.

void SetTaxname(const TTaxname &value)

Assign a value to Taxname data member.

bool IsSetMod(void) const

Check if a value has been assigned to Mod data member.

const Tdata & Get(void) const

Get the member data.

bool IsSetGenus(void) const

required Check if a value has been assigned to Genus data member.

const TSpecies & GetSpecies(void) const

Get the Species member data.

bool IsSetOrgname(void) const

Check if a value has been assigned to Orgname data member.

bool IsSetSubname(void) const

Check if a value has been assigned to Subname data member.

bool IsSetTaxname(void) const

preferred formal name Check if a value has been assigned to Taxname data member.

const TGenus & GetGenus(void) const

Get the Genus member data.

const TPartial & GetPartial(void) const

Get the variant data.

bool IsSetSpecies(void) const

species required if subspecies used Check if a value has been assigned to Species data member.

bool IsSetName(void) const

Check if a value has been assigned to Name data member.

const Tdata & Get(void) const

Get the member data.

const TOrgname & GetOrgname(void) const

Get the Orgname member data.

@ eSubtype_gb_synonym

used by taxonomy database

@ eSubtype_other

ASN5: old-name (254) will be added to next spec.

@ eSubtype_nat_host

natural host of this specimen

@ eSubtype_metagenome_source

@ eSubtype_specimen_voucher

@ eSubtype_culture_collection

@ eSubtype_forma_specialis

@ e_Hybrid

hybrid between organisms

@ e_Binomial

genus/species type name

@ e_Partial

when genus not known

bool IsSetData(void) const

the specific data Check if a value has been assigned to Data data member.

const TData & GetData(void) const

Get the Data member data.

const TBiosrc & GetBiosrc(void) const

Get the variant data.

bool IsSetAccession(void) const

Check if a value has been assigned to Accession data member.

E_Choice Which(void) const

Which variant is currently selected.

const TAccession & GetAccession(void) const

Get the Accession member data.

@ e_Tpe

Third Party Annot/Seq EMBL.

@ e_Tpd

Third Party Annot/Seq DDBJ.

@ e_General

for other databases

@ e_Tpg

Third Party Annot/Seq Genbank.

const TSeq & GetSeq(void) const

Get the variant data.

const TSet & GetSet(void) const

Get the variant data.

bool IsSeq(void) const

Check if variant Seq is selected.

bool IsSetSeq_set(void) const

Check if a value has been assigned to Seq_set data member.

bool IsSet(void) const

Check if variant Set is selected.

const TSeq_set & GetSeq_set(void) const

Get the Seq_set member data.

@ eClass_parts

parts for 2 or 3

@ eClass_nuc_prot

nuc acid and coded proteins

bool IsSetCompleteness(void) const

Check if a value has been assigned to Completeness data member.

bool CanGetBiomol(void) const

Check if it is safe to call GetBiomol method.

const TUser & GetUser(void) const

Get the variant data.

bool IsMolinfo(void) const

Check if variant Molinfo is selected.

bool IsSetMol(void) const

Check if a value has been assigned to Mol data member.

const TTitle & GetTitle(void) const

Get the variant data.

const TSource & GetSource(void) const

Get the variant data.

bool IsSource(void) const

Check if variant Source is selected.

bool IsSetBiomol(void) const

Check if a value has been assigned to Biomol data member.

const TId & GetId(void) const

Get the Id member data.

const Tdata & Get(void) const

Get the member data.

TMol GetMol(void) const

Get the Mol member data.

bool IsSetDescr(void) const

descriptors Check if a value has been assigned to Descr data member.

TBiomol GetBiomol(void) const

Get the Biomol member data.

TCompleteness GetCompleteness(void) const

Get the Completeness member data.

bool IsSetId(void) const

equivalent identifiers Check if a value has been assigned to Id data member.

const TDescr & GetDescr(void) const

Get the Descr member data.

const TMolinfo & GetMolinfo(void) const

Get the variant data.

@ eRepr_seg

segmented sequence

@ eCompleteness_complete

complete biological entity

@ eTech_wgs

whole genome shotgun sequencing

@ eBiomol_cRNA

viral RNA genome copy intermediate

@ eBiomol_other_genetic

other genetic material

@ e_User

user defined object

@ e_Molinfo

info on the molecule and techniques

@ e_Title

a title for this sequence

if(yy_accept[yy_current_state])

const GenericPointer< typename T::ValueType > T2 value

const CharType(& source)[N]

Defines the CNcbiApplication and CAppException classes for creating NCBI applications.

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)

#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)

FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.

#define FOR_EACH_DBXREF_ON_ORGREF(Itr, Var)

FOR_EACH_DBXREF_ON_ORGREF EDIT_EACH_DBXREF_ON_ORGREF.

#define FOR_EACH_SUBSOURCE_ON_BIOSOURCE(Itr, Var)

FOR_EACH_SUBSOURCE_ON_BIOSOURCE EDIT_EACH_SUBSOURCE_ON_BIOSOURCE.

#define FOR_EACH_ORGMOD_ON_ORGNAME(Itr, Var)

FOR_EACH_ORGMOD_ON_ORGNAME EDIT_EACH_ORGMOD_ON_ORGNAME.

#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)

FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.

#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)

FIELD_IS_SET_AND_IS base macro.

#define GET_FIELD(Var, Fld)

GET_FIELD base macro.

bool seq_mac_is_unique(Iterator iter1, Iterator iter2, Predicate pred)

bool ContainsSgml(const string &str)

static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

CRef< objects::CObjectManager > om

bool operator()(T l, T r) const

int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)

static string x_RepairCountryName(string countryname)

static bool s_init_NewTaxVal(bool use_new_strain_validation)

static bool s_PCRReactionLess(CConstRef< CPCRReaction > pp1, CConstRef< CPCRReaction > pp2)

static const int sNumUnexpectedViralOrgModQualifiers

static const int sNumUnexpectedViralSubSourceQualifiers

static bool s_MatchOrgname(const string &taxname, const COrgName &orgname, string &mismatch)

std::string_view sm_SourceQualPrefixes[]

bool s_IsAllDigitsOrSpaces(string str)

static bool s_PCRPrimerSetLess(const CPCRPrimerSet &s1, const CPCRPrimerSet &s2)

static unique_ptr< CTextFsa > m_SourceQualTags

static bool IsUnexpectedViralOrgModQualifier(COrgMod::TSubtype subtype)

bool s_IsBioSample(const CBioseq_Handle &bsh)

static const CSubSource::ESubtype sUnexpectedViralSubSourceQualifiers[]

static bool s_HasMetagenomeSource(const COrg_ref &org)

static const COrgMod::TSubtype sUnexpectedViralOrgModQualifiers[]

static bool s_PCRSetEqual(const CPCRSet *p1, const CPCRSet *p2)

static bool NCBI_NewTaxVal(bool use_new_strain_validation)

static bool s_UnbalancedParentheses(string str)

static bool s_IsChromosome(const CBioSource &biosource)

static bool x_HasTentativeName(const CUser_object &user_object)

const size_t kDefaultChunkSize

static const string kInvalidReplyMsg

static string x_GetTentativeName(const CUser_object &user_object)

bool s_IsArchaea(const CBioSource &source)

static bool s_HasWGSTech(const CBioseq &bioseq)

static bool s_PCRSetCompare(const CPCRSet *p1, const CPCRSet *p2)

static bool s_FindWholeName(const string &taxname, const string &value)

static bool s_IsEukaryoteOrProkaryote(const CBioSourceKind &biosourceKind)

static bool s_ReportUndefinedSpeciesId(const CBioseq &bioseq)

static bool s_CompleteGenomeNeedsChromosome(const CBioSource &source)

static const CSeq_entry & s_GetJustNucSeqEntry(const CSeq_entry &entry)

static bool s_PCRPrimerLess(const CPCRPrimer &p1, const CPCRPrimer &p2)

bool IsOrgNotFound(const CT3Error &error)

static const CBioseq * s_GetNucSeqFromContext(const CSeq_entry *ctx)

bool s_IsBacteria(const CBioSource &source)

static bool s_IsUndefinedSpecies(const string &taxname)

static bool IsUnexpectedViralSubSourceQualifier(CSubSource::TSubtype subtype)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4