A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/validerror__bioseq_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/validator/validerror_bioseq.cpp Source File

55 #define NCBI_USE_ERRCODE_X Objtools_Validator 70  bool

Overlaps(

const CSeq_feat

& cds)

const

;

72  bool

HasMatch()

const

;

74  bool

OkWithoutCds(

bool

isGenbank =

false

)

const

;

151  for

(

auto id

: bsh.

GetId

()) {

168  if

((*it)->IsSetSubtype() && (*it)->IsSetName() && !

NStr::IsBlank

((*it)->GetName())) {

176  if

(

source

.IsSetLineage()) {

177  string

lineage =

source

.GetLineage();

188  if

(

source

.IsSetDivision()) {

189  string

div =

source

.GetDivision();

195  if

(

source

.IsSetGenome()) {

230  if

(appropriate_parent) {

249

}

catch

(

const

exception&) {

273  if

(appropriate_parent) {

278

}

catch

(

const

exception& e) {

280  string

(

"Exception while validating bioseq. EXCEPTION: "

) +

298  const string

& db = dbt.

GetDb

();

311  const char

& ch = *itr;

312  if

(ch ==

'|'

|| ch ==

','

)

return

ch;

320  for

(

size_t i

= 0;

i

<

id

.length();

i

++) {

322  return id

.c_str()[

i

];

332  const char

& ch = *itr;

333  if

(ch ==

'|'

|| ch ==

','

)

return

ch;

347  "BioseqFind ("

+

id

.AsFastaString() +

348  ") unable to find itself - possible internal error"

,

ctx

);

360  "BioseqFind ("

+

id

.AsFastaString() +

361  ") unable to find itself - possible internal error"

,

ctx

);

365  "SeqID "

+

id

.AsFastaString() +

366  " is present on multiple Bioseqs in record"

,

ctx

);

370  "BioseqFind ("

+

id

.AsFastaString() +

371  ") unable to find itself - possible internal error"

,

ctx

);

377  switch

(

id

.Which()) {

383  "TPA record "

+

ctx

.GetId().front()->AsFastaString() +

384  " should have Seq-hist.assembly for PRIMARY block"

,

395  if

(badch !=

'\0'

) {

397  "Bad character '"

+

string

(1, badch) +

"' in accession '"

+ acc +

"'"

,

ctx

);

404  "Bad accession "

+ acc,

ctx

);

415  const string

& name = tsid->

GetName

();

417  if

(

isspace

((

unsigned char

)(*s))) {

420  "Seq-id.name '"

+ name +

"' should be a single " 421  "word without any spaces"

,

ctx

);

430  if

(badch !=

'\0'

) {

432  "Bad character '"

+

string

(1, badch) +

"' in accession '"

+ acc +

"'"

,

ctx

);

434  size_t

num_letters = 0;

435  size_t

num_digits = 0;

436  size_t

num_underscores = 0;

437  bool

bad_id_chars =

false

;

440  bool

letter_after_digit =

false

;

446  for

(;

i

< acc.length(); ++

i

) {

447  if

(

isupper

((

unsigned char

)acc[

i

])) {

449

}

else if

(

isdigit

((

unsigned char

)acc[

i

])) {

451

}

else if

(acc[

i

] ==

'_'

) {

453  if

(num_digits > 0 || num_underscores > 1) {

454

letter_after_digit =

true

;

457

bad_id_chars =

true

;

461  if

(letter_after_digit || bad_id_chars) {

463  "Bad accession "

+ acc,

ctx

);

464

}

else if

(is_NZ && (num_letters == 4 || num_letters == 6) &&

465

(num_digits >= 8 && num_digits <= 11) && num_underscores == 0) {

469

}

else if

(num_letters == 2 &&

470

(num_digits == 6 || num_digits == 8 || num_digits == 9) &&

471

num_underscores == 1) {

473

}

else if

(num_letters == 4 && num_digits == 10 &&

ctx

.IsNa()) {

476  "Bad accession "

+ acc,

ctx

);

490  if

(!

id

.IsDdbj() ||

492  string msg

=

"Missing accession for "

+

id

.AsFastaString();

501  "Seq-id type not handled"

,

ctx

);

505  if

(

id

.GetGi() <=

ZERO_GI

) {

507  "Invalid GI number"

,

ctx

);

511  if

(!

id

.GetGeneral().IsSetDb() ||

NStr::IsBlank

(

id

.GetGeneral().GetDb())) {

514  if

(

id

.GetGeneral().IsSetDb()) {

515  const CDbtag

& dbt =

id

.GetGeneral();

516  size_t

dblen = dbt.

GetDb

().length();

528  if

(dblen > max_dblen) {

535  if

(longer_general) {

540  if

(idlen > maxlen && !

m_Imp

.

IsGI

()) {

565  if

(badch ==

'\0'

&& dbt.

IsSetDb

()) {

569  if

(badch !=

'\0'

) {

571  "Bad character '"

+

string

(1, badch) +

"' in sequence ID '"

+

id

.AsFastaString() +

"'"

,

ctx

);

586  if

(

id

.IsLocal() &&

id

.GetLocal().IsStr()) {

587  const string

& acc =

id

.GetLocal().GetStr();

589  if

(badch !=

'\0'

) {

591  "Bad character '"

+

string

(1, badch) +

"' in local ID '"

+ acc +

"'"

,

ctx

);

596  if

(

id

.

IsPdb

()) {

601  if

(chain_id.size() == 1 && chain_id[0] == chain) {

603

}

else if

(

islower

(chain) && chain_id.size() == 2

604

&& chain_id[0] == chain_id[1]

605

&& chain_id[0] ==

toupper

(chain)) {

607

}

else if

(chain ==

'|'

&& chain_id ==

"VB"

) {

611  "PDB Seq-id contains mismatched \'chain\' and" 612  " \'chain-id\' slots"

,

ctx

);

623  if

(! IsNCBIFILESeqId(**

i

)) {

625

(*i)->GetLabel(&

label

);

626  if

(

label

.length() > 40) {

628  "Sequence ID is unusually long ("

+

640  const

list< string > *extra_acc =

nullptr

;

642  switch

(desc.

Which

()) {

673  bool

found_good =

false

;

681  "The only ids on this Bioseq will be stripped during ID load"

, seq);

691  "No ids on a Bioseq"

, seq);

700  bool

has_gi =

false

;

701  bool

is_lrg =

false

;

702  bool

has_ng =

false

;

703  bool

wgs_tech_needs_wgs_accession =

false

;

704  bool

is_segset_accession =

false

;

705  bool

has_wgs_general =

false

;

706  bool

is_eb_db =

false

;

707  bool

longer_general =

false

;

710  if

((*i)->IsOther() || (*i)->IsEmbl() || (*i)->IsTpe()) {

711

longer_general =

true

;

719  if

((*i)->IsGeneral() && (*i)->GetGeneral().IsSetDb()) {

724

has_wgs_general =

true

;

726

}

else if

((*i)->IsOther() && (*i)->GetOther().IsSetAccession()) {

727  const string

& acc = (*i)->GetOther().GetAccession();

730

wgs_tech_needs_wgs_accession =

true

;

734

wgs_tech_needs_wgs_accession =

true

;

736

}

else if

((*i)->IsEmbl() && (*i)->GetEmbl().IsSetAccession()) {

738

}

else if

((*i)->IsDdbj() && (*i)->GetDdbj().IsSetAccession()) {

743

CBioseq::TId::const_iterator j;

744  for

(j =

i

, ++j; j != seq.

GetId

().end(); ++j) {

747

os <<

"Conflicting ids on a Bioseq: ("

;

748

(**i).WriteAsFasta(os);

750

(**j).WriteAsFasta(os);

757  if

((*i)->IsGenbank() || (*i)->IsEmbl() || (*i)->IsDdbj()) {

758

wgs_tech_needs_wgs_accession =

true

;

766

is_segset_accession =

true

;

770  if

(is_lrg && ! has_ng) {

772  "LRG sequence needs NG_ accession"

, seq);

777  bool

is_wgs =

false

;

778  unsigned int

gi_count = 0;

779  unsigned int

accn_count = 0;

780  unsigned int

lcl_count = 0;

783  switch

((**k).Which()) {

794  if

((*k)->IsGenbank() || (*k)->IsEmbl() || (*k)->IsDdbj()) {

802  "Accession "

+ acc +

" has 0 version"

, seq);

815  "Missing accession for "

+ tsid->

GetName

(), seq);

829  string label

= (*k)->AsFastaString();

831  "Missing identifier for "

+

label

, seq);

847  if

(! mi || ! mi->IsSetTech() ||

852  "WGS accession should have Mol-info.tech of wgs"

, seq);

854

}

else if

(mi && mi->IsSetTech() &&

856

wgs_tech_needs_wgs_accession &&

857

! is_segset_accession &&

866  "Mol-info.tech of wgs should have WGS accession"

, seq);

871

&& (! mi->IsSetBiomol()

875  "genomic RefSeq accession should use genomic or cRNA moltype"

,

880  if

(mi && mi->IsSetBiomol()) {

881  switch

(mi->GetBiomol()) {

894  "Molecule type (DNA) does not match biomol (RNA)"

, seq);

903  if

(gi_count > 0 && accn_count == 0 && !

m_Imp

.

IsPDB

() &&

906  "No accession on sequence with gi number"

, seq);

908  if

(gi_count > 0 && accn_count > 1) {

910  "Multiple accessions on sequence with gi number"

, seq);

953

CEMBL_block::TKeywords::const_iterator keyword = embl_i->

GetEmbl

().

GetKeywords

().begin();

968  const string

& primary_acc,

974  const

list<string>* extra_acc =

nullptr

;

989

primary_acc +

" used for both primary and" 990  " secondary accession"

, seq);

1015  bool

has_barcode_tech =

false

;

1019

has_barcode_tech =

true

;

1022  bool

has_barcode_keyword =

false

;

1026

has_barcode_keyword =

true

;

1030  if

(has_barcode_keyword && ! has_barcode_tech) {

1032  "BARCODE keyword without Molinfo.tech barcode"

,

1036  if

(has_barcode_tech && ! has_barcode_keyword && di) {

1038  "Molinfo.tech barcode without BARCODE keyword"

,

1043  "Sequence has both BARCODE and UNVERIFIED keywords"

,

1068  "Bioseq.mol is type nucleic acid"

, seq);

1076  "Non-linear topology set on protein"

, seq);

1082  "Protein not single stranded"

, seq);

1097  "Circular Bacteria or Archaea should be chromosome, or plasmid, or extrachromosomal"

, seq);

1110  "Bioseq.mol is type other"

, seq);

1180  bool

is_wgs =

false

;

1181  bool

is_grc =

false

;

1204

sequence::CDeflineGenerator defline_generator;

1205  string

title = defline_generator.GenerateDefline(seq, *

m_Scope

, sequence::CDeflineGenerator::fIgnoreExisting);

1213

is_wgs =

IsWGS

(bsh);

1215  bool

is_gb =

false

,

is_refseq =

false

, is_ng =

false

;

1218  const CSeq_id

& sid = **sid_itr;

1219  switch

(sid.

Which

()) {

1231  if

(acc ==

"NG_"

) {

1245  if

(! is_wgs && ! is_grc)

1287  "No CdRegion in nuc-prot set points to this protein"

,

1293  bool

is_complete =

false

;

1298

is_complete =

true

;

1399  "BioProject entries not present on CON record"

, seq);

1402

}

catch

(

const

exception& e) {

1403  if

(

NStr::Find

(e.what(),

"Error: Cannot resolve"

) == string::npos) {

1405  string

(

"Exception while validating BioseqContext. EXCEPTION: "

) +

1426  "Orphaned stand-alone protein"

, seq);

1436  if

(prot_feats.size() > 1) {

1439  "Protein sequence has multiple unprocessed protein features"

,

1440

feat->GetOriginalFeature());

1447  "Expected submission citation is missing for this Bioseq"

, seq);

1457  for

(

CFeat_CI

feat_ci(bsh, sel); feat_ci; ++feat_ci) {

1459  const CSeq_feat

& matpeptide = feat_ci->GetOriginalFeature();

1466  if

(matlen != prdlen) {

1468  "Mat_peptide does not match length of instantiated product"

,

1476  if

(

len

> prdlen) {

1484  if

(m_res != p_res) {

1486  "Mismatch in mat_peptide ("

+

string

(1, (

char

)m_res) +

") and instantiated product ("

+ \

1512  if

(pub.

IsSub

()) {

1524  bool

has_cit_sub =

false

;

1526  while

(p && !has_cit_sub) {

1537 template

<

class

Iterator,

class

Predicate>

1540  while

(iter1 != iter1_stop && iter2 != iter2_stop) {

1541  if

(! pred(*iter1, *iter2)) {

1547  if

(iter1 != iter1_stop || iter2 != iter2_stop) {

1559  const COrgMod

& omd1 = *(om1);

1560  const COrgMod

& omd2 = *(om2);

1570  if

(chs1 == chs2)

return true

;

1580  return

dbt1->

Compare

(*dbt2) == 0;

1599

org2.

GetDb

().begin(), org2.

GetDb

().end(),

1683

printf (

"Orgname not set!\n"

);

1685

printf (

"Lineage not set!\n"

);

1701  if

(start1 == stop2 + 1 || start2 == stop1 + 1) {

1719  if

(start1 == stop2 + 1 || start2 == stop1 + 1) {

1737  if

(start1 == stop2 + 1 || start2 == stop1 + 1) {

1758

CCacheImpl::TFeatValue::const_iterator feat = rnas.begin();

1759  if

(feat != rnas.end()) {

1761

CCacheImpl::TFeatValue::const_iterator feat_prev = feat;

1763  for

(; feat != rnas.end(); ++feat_prev, ++feat) {

1766

feat->GetLocation(),

m_Scope

)) {

1770  const CRNA_ref

& tm = feat_prev->GetData().GetRna();

1771  const CRNA_ref

& tr = feat->GetData().GetRna();

1775

feat->GetLocation(),

m_Scope

)) {

1777  "tRNA contained within tmRNA"

,

1778

feat->GetOriginalFeature());

1784

}

catch

(

const

exception& e) {

1785  if

(

NStr::Find

(e.what(),

"Error: Cannot resolve"

) == string::npos) {

1787  string

(

"Exception while validating RNA features. EXCEPTION: "

) +

1804

CCacheImpl::TFeatValue::const_iterator feat = biosrcs.begin();

1805  if

(feat != biosrcs.end()) {

1811  "Source feature is full length, should be descriptor"

,

1812

feat->GetOriginalFeature());

1817

CCacheImpl::TFeatValue::const_iterator feat_prev = feat;

1819  for

(; feat != biosrcs.end(); ++feat_prev, ++feat) {

1822  "Multiple full-length source features, should only be one if descriptor is transgenic"

,

1823

feat->GetOriginalFeature());

1827

feat->GetLocation(),

m_Scope

)) {

1833  bool

are_identical =

true

;

1834  if

(feat_prev->IsSetComment() && feat->IsSetComment()

1836

are_identical =

false

;

1838  const CBioSource

& src_prev = feat_prev->GetData().GetBiosrc();

1839  const CBioSource

& src = feat->GetData().GetBiosrc();

1842

are_identical =

false

;

1849

are_identical =

false

;

1854

are_identical =

false

;

1859  "Multiple equivalent source features should be combined into one multi-interval feature"

,

1860

feat->GetOriginalFeature());

1864

}

catch

(

const

exception& e) {

1865  if

(

NStr::Find

(e.what(),

"Error: Cannot resolve"

) == string::npos) {

1867  string

(

"Exception while validating source features. EXCEPTION: "

) +

1880  if

((*it)->IsGen() && (*it)->GetGen().IsSetCit()

1881

&& ! (*it)->GetGen().IsSetCit()

1882

&& ! (*it)->GetGen().IsSetJournal()

1883

&& ! (*it)->GetGen().IsSetDate()

1884

&& (*it)->GetGen().IsSetSerial_number()) {

1905

CCacheImpl::TFeatValue::const_iterator feat = pubs.begin();

1906  if

(feat != pubs.end()) {

1909  "Publication feature is full length, should be descriptor"

,

1910

feat->GetOriginalFeature());

1913

CCacheImpl::TFeatValue::const_iterator feat_prev = feat;

1915  if

(feat_prev != pubs.end()) {

1919  for

(; feat != pubs.end(); ++feat, ++feat_prev) {

1922  "Publication feature is full length, should be descriptor"

,

1923

feat->GetOriginalFeature());

1926  bool

are_identical =

true

;

1927  if

(feat_prev->IsSetComment() && feat->IsSetComment()

1929

are_identical =

false

;

1935

are_identical =

false

;

1939

prev_label.swap(

label

);

1944  if

(are_identical) {

1946  "Multiple equivalent publication features should be combined into one multi-interval feature"

,

1947

feat->GetOriginalFeature());

1951

}

catch

(

const

exception& e) {

1952  if

(

NStr::Find

(e.what(),

"Error: Cannot resolve"

) == string::npos) {

1954  string

(

"Exception while validating pub features. EXCEPTION: "

) +

1986  const CBioseq

& seq,

const

vector<CTempString>& labels)

1988  if

(labels.size() <= 1) {

1996  static const char

kWarningPrefix[] =

1997  "Multiple equivalent publications annotated on this sequence ["

;

1998  static const size_t

kWarningPrefixLen =

sizeof

(kWarningPrefix) - 1;

1999  static const

string::size_type kMaxSummaryLen = 100;

2004

TLabelCount label_count;

2006  ITERATE

(vector<CTempString>, label_it, labels) {

2007

++label_count[*label_it];

2011

vector<CTempString> sorted_dup_labels;

2012  ITERATE

(TLabelCount, label_count_it, label_count) {

2013  int

num_appearances = label_count_it->second;

2014  _ASSERT

(num_appearances > 0);

2015  if

(num_appearances > 1) {

2016  const CTempString

& dup_label = label_count_it->first;

2017

sorted_dup_labels.push_back(dup_label);

2023  string

err_msg = kWarningPrefix;

2024  ITERATE

(vector<CTempString>, dup_label_it, sorted_dup_labels) {

2027

err_msg.resize(kWarningPrefixLen);

2028  if

(summary.

length

() > kMaxSummaryLen) {

2029

err_msg += summary.

substr

(0, kMaxSummaryLen);

2048

vector<int> serials;

2049

vector<CTempString> published_labels;

2050

vector<CTempString> unpublished_labels;

2065

back_inserter(published_labels));

2067

back_inserter(unpublished_labels));

2071  bool

otherpub =

false

;

2073  switch

((*pub_it)->Which()) {

2075

muid = (*pub_it)->GetMuid();

2078

pmid = (*pub_it)->GetPmid();

2087  bool

collision =

false

;

2089  if

(muids_seen.

find

(muid) != muids_seen.

end

()) {

2092

muids_seen.

insert

(muid);

2096  if

(pmids_seen.

find

(pmid) != pmids_seen.

end

()) {

2099

pmids_seen.

insert

(pmid);

2104  "Multiple publications with identical PubMed ID"

, *

ctx

, *it);

2123  if

((*id)->IsGi()) {

2124

gi = (*id)->GetGi();

2136  if

((*id)->IsGi()) {

2137  if

(gi == (*id)->GetGi()) {

2139  "Replaced by gi ("

+

2151  if

((*id)->IsGi()) {

2152  if

(gi == (*id)->GetGi()) {

2176  if

(

id

.Match(**it)) {

2191  switch

(seqdata.

Which

()) {

2235  if

(

prot

[

prot

.size() - 1] ==

'*'

) {

2245  if

(mi && mi->IsSetCompleteness()) {

2252

}

catch

(

const

std::exception&) {

2273

mix.

Set

().push_back(*it);

2345  if

(

prev

.IsSetExcept() &&

prev

.GetExcept() &&

prev

.IsSetExcept_text()) {

2356  for

(

auto

it : currP.

GetName

()) {

2361  for

(

auto

it : prevP.

GetName

()) {

2408 #define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var) \ 2409 ITERATE (CBioseq_Handle::TId, Itr, Var.GetId()) 2424  if

(entry.

IsSeq

()) {

2446  if

((*it)->IsMolinfo() && (*it)->GetMolinfo().IsSetTech() && (*it)->GetMolinfo().GetTech() ==

CMolInfo::eTech_wgs

) {

2467  if

(entry.

IsSeq

()) {

2559  if

((*id)->IsPdb()) {

2610

&& (*it)->GetSet().IsSetClass()

2619  if

((*loc)->IsNull()) {

2623  if

(locs.size() - nulls < parts.size()) {

2625  "Parts set contains too many Bioseqs"

, seq);

2627

}

else if

(locs.size() - nulls > parts.size()) {

2629  "Parts set does not contain enough Bioseqs"

, seq);

2635  size_t size

= locs.size();

2636

CSeg_ext::Tdata::const_iterator loc_it = locs.begin();

2637

CBioseq_set::TSeq_set::const_iterator part_it = parts.begin();

2638  for

(

size_t i

= 0;

i

<

size

; ++

i

) {

2640  if

((*loc_it)->IsNull()) {

2644  if

(! (*part_it)->IsSeq()) {

2646  "Parts set component is not Bioseq"

, seq);

2650  if

(!

IsIdIn

(loc_id, (*part_it)->GetSeq())) {

2652  "Segmented bioseq seq_ext does not correspond to parts " 2653  "packaging order"

, seq);

2661  ERR_POST_X

(4,

"Seq-loc not for unique sequence"

);

2664  string

err_msg =

"Unknown error:"

;

2665

err_msg += x1.

what

();

2668

}

catch

(std::exception& x2) {

2669  string

err_msg =

"Unknown error:"

;

2670

err_msg += x2.what();

2683  if

(! inst.

IsSetExt

())

return false

;

2687  if

(! (*iter)->IsLiteral())

continue

;

2699  bool

has_gap =

false

;

2702  if

((*iter)->IsLiteral() &&

2703

(! (*iter)->GetLiteral().IsSetSeq_data() || (*iter)->GetLiteral().GetSeq_data().IsGap())) {

2719  string

title = sequence::CDeflineGenerator().GenerateDefline(bsh);

2735  "Complete genome in title without complete flag set"

,

2744  "Circular topology without complete flag set"

,

ctx

, *desc);

2752  "Title contains 'complete genome' but sequence has gaps"

, seq);

2766  if

(! (*sg))

continue

;

2783  if

(! (*sg) )

continue

;

2793  int

gaptype = gap.

GetType

();

2814  if

(! (*sg))

continue

;

2846  "WGS submission includes wrong gap type. Gaps for WGS genomes should be Assembly Gaps with linkage evidence."

, seq);

2855  "TSA submission includes wrong gap type. Gaps for TSA should be Assembly Gaps with linkage evidence."

, seq);

2864  "Genome submission includes wrong gap type. Gaps for genomes should be Assembly Gaps with linkage evidence."

, seq);

2904  bool

has_biosample =

false

;

2905  bool

has_bioproject =

false

;

2913

has_biosample =

true

;

2917

has_bioproject =

true

;

2929  for

(

auto

it : uo.

GetData

()) {

2930  if

(! it->GetLabel().IsStr()) {

2933  const string

&

label

= it->GetLabel().GetStr();

2936  const string

&

str

= it->GetData().GetStr();

2937  auto

fst =

str

.find_first_of(

"0123456789"

);

2941  const string

&

str

= it->GetData().GetStr();

2942  auto

lst =

str

.find_first_of(

"0123456789"

);

2946  if

((fr != 0) && (to != 0)) {

2947  int df

= to - fr + 1;

2960  if

(! has_biosample && ! has_bioproject) {

2962  "WGS master lacks both BioSample and BioProject"

,

2964

}

else if

(! has_biosample) {

2966  "WGS master lacks BioSample"

,

2968

}

else if

(! has_bioproject) {

2970  "WGS master lacks BioProject"

,

2973  if

(! has_biosample || ! has_bioproject) {

2981  bool

only_local =

true

;

2982  bool

is_NCACNTNW =

false

;

2983  bool

is_patent =

false

;

2985  if

(! (*id_it)->IsLocal()) {

2986

only_local =

false

;

2987  if

((*id_it)->IsPatent()) {

2990

is_NCACNTNW =

true

;

2995  if

(is_NCACNTNW || is_patent) {

2997

}

else if

(is_circular) {

2999

}

else if

(only_local) {

3021  if

(vec[

i

] ==

'N'

) {

3024  if

(max_stretch < this_stretch) {

3025

max_stretch = this_stretch;

3030  if

(this_stretch >= 10) {

3034  if

(vec.

size

() > 20 &&

i

> vec.

size

() - 10) {

3040  if

(max_stretch < this_stretch) {

3041

max_stretch = this_stretch;

3046  if

(max_stretch < this_stretch) {

3047

max_stretch = this_stretch;

3072  if

(max_stretch >= 15) {

3079  "Sequence has a stretch of at least 10 Ns within the first 20 bases"

, seq);

3084  "Sequence has a stretch of at least 10 Ns within the last 20 bases"

, seq);

3096  bool

at_least_one =

false

;

3098  for

(

CSeqVector_CI

sv_iter(vec); (sv_iter) && rval; ++sv_iter) {

3099  if

(*sv_iter !=

'N'

) {

3102

at_least_one =

true

;

3106  return

(rval && at_least_one);

3113  switch

(seq_data.

Which

()) {

3115

vector<char>::const_iterator it = seq_data.

GetNcbi4na

().

Get

().begin();

3116  unsigned char mask

= 0xf0;

3117  unsigned char

shift = 4;

3118  for

(

size_t n

= 0;

n

<

len

;

n

++) {

3119  unsigned char

c = ((*it) &

mask

) >> shift;

3135  for

(

size_t n

= 0;

n

<

len

&&

n

< s.length();

n

++) {

3136  if

(s[

n

] ==

'N'

) {

3149  for

(

size_t n

= 0;

n

<

len

;

n

++) {

3150  if

(s[

n

] ==

'N'

) {

3167  for

(

CSeqMap_CI

seq_iter(bsh, sel); seq_iter; ++seq_iter) {

3168  switch

(seq_iter.GetType()) {

3170  count

+=

CountNs

(seq_iter.GetData(), seq_iter.GetLength());

3203  bool

is_first =

true

;

3207  if

((*iter)->IsLoc()) {

3210  if

((*iter)->IsLiteral()) {

3267  int

max_stretch = 0;

3268  auto

IsN = [](

char

c) {

return

c ==

'N'

; };

3270  for

(

auto

begin_it = find_if_not(begin(vec), end(vec), IsN);

3271

begin_it != end(vec);) {

3272  auto

distanceToEnd = distance(begin_it, end(vec));

3274  auto

interval = (distanceToEnd > threshold) ? threshold : distanceToEnd;

3275  auto

end_it = find_if(begin_it,

next

(begin_it, interval), IsN);

3276  const auto

current_stretch = distance(begin_it, end_it);

3277  if

(current_stretch >= threshold) {

3281  if

(current_stretch > max_stretch) {

3282

max_stretch = current_stretch;

3284

begin_it = find_if_not(end_it, end(vec), IsN);

3329  "Maximum contig length is "

+

NStr::IntToString

(*oMaxLength) +

" bases"

, seq);

3336  bool

begin_ambig =

false

, end_ambig =

false

;

3344  bool

is_circular =

false

;

3346

is_circular =

true

;

3367  "Sequence has more than 5 Ns in the first 10 bases or more than 15 Ns in the first 50 bases"

,

3372  "Sequence has more than 5 Ns in the last 10 bases or more than 15 Ns in the last 50 bases"

,

3387  TSeqPos

num_ns = 0, max_stretch = 0;

3390  int

pct_n = (num_ns * 100) / seq.

GetLength

();

3396  if

(max_stretch >= 15) {

3402  "Sequence has a stretch of at least 10 Ns within the first 20 bases"

, seq);

3406  "Sequence has a stretch of at least 10 Ns within the last 20 bases"

, seq);

3412  int

pct_n =

PctNs

(bsh);

3428

}

catch

(exception&) {

3465

vector<TSeqPos> gapPositions;

3474  for

(; gap_it; ++gap_it) {

3479

gapPositions.push_back(gp_start);

3480

gapPositions.push_back(gp_end);

3487

vector<TSeqPos> featPositions;

3493  for

(; feat_it; ++feat_it) {

3504

featPositions.push_back(ft_start);

3505

featPositions.push_back(ft_end);

3512  int

remaininig_gaps = (

int

) gapPositions.size() / 2;

3513  int

remaining_feats = (

int

) featPositions.size() / 2;

3515  if

(remaininig_gaps < 1 || remaining_feats < 1) {

3522  TSeqPos

gap_start = gapPositions[gap_idx];

3524  TSeqPos

gap_end = gapPositions[gap_idx];

3528  TSeqPos

feat_start = featPositions[feat_idx];

3530  TSeqPos

feat_end = featPositions[feat_idx];

3536  while

(remaininig_gaps >= 0 && remaining_feats >= 0) {

3537  if

(gap_end < feat_start) {

3538  if

(remaininig_gaps <= 0) {

3541

gap_start = gapPositions[gap_idx];

3543

gap_end = gapPositions[gap_idx];

3546

}

else if

(feat_end < gap_start) {

3547  if

(remaining_feats <= 0) {

3550

feat_start = featPositions[feat_idx];

3552

feat_end = featPositions[feat_idx];

3557  if

(feat_start != gap_start || feat_end != gap_end) {

3560  if

(remaininig_gaps <= 0) {

3563

gap_start = gapPositions[gap_idx];

3565

gap_end = gapPositions[gap_idx];

3568  if

(remaining_feats <= 0) {

3571

feat_start = featPositions[feat_idx];

3573

feat_end = featPositions[feat_idx];

3581

}

catch

(

const

exception&) {

3596  "Fuzzy length on "

+ rpr +

" Bioseq"

, seq);

3603  "Invalid Bioseq length ["

+

len

+

"]"

, seq);

3617  "HTGS 2 raw seq has no gaps and no graphs"

, seq);

3632  if

(inst.

IsAa

()) {

3634  "Using a nucleic acid alphabet on a protein sequence"

,

3644  if

(inst.

IsNa

()) {

3646  "Using a protein alphabet on a nucleic acid"

,

3655  "Sequence alphabet not set"

,

3660  bool

check_alphabet =

false

;

3661  unsigned int

factor = 1;

3667

check_alphabet =

true

;

3687  "Sequence alphabet not set"

,

3692  if

(calc_len % factor) {

3701  if

(calc_len > data_len) {

3703  "Bioseq.seq_data too short ["

+ data_len_str +

3704  "] for given length ["

+ s_len +

"]"

, seq);

3706

}

else if

(calc_len < data_len) {

3708  "Bioseq.seq_data is larger ["

+ data_len_str +

3709  "] than given length ["

+ s_len +

"]"

, seq);

3712  if

(check_alphabet) {

3713  unsigned int

trailingX = 0;

3715  bool

leading_x =

false

, found_lower =

false

, cds_5_prime =

false

;

3722  for

(

CSeqVector_CI

sv_iter(*sv), sv_res_iter(sv_res); (sv_iter) && (sv_res_iter); ++sv_iter, ++sv_res_iter) {

3728

}

else if

(res ==

'*'

&& bsh.

IsAa

()) {

3730

}

else if

(res ==

'-'

&& bsh.

IsAa

()) {

3738  if

(++bad_cnt > 10) {

3740  "More than 10 invalid residues. Checking stopped"

,

3750

found_lower =

true

;

3752  string msg

=

"Invalid"

;

3753  if

(seq.

IsNa

() && strchr (

"EFIJLOPQXZ"

, res) !=

NULL

) {

3754  msg

+=

" nucleotide"

;

3755

}

else if

(seq.

IsNa

() && res ==

'U'

) {

3756  msg

+=

" nucleotide"

;

3758  msg

+=

" residue "

;

3772

}

else if

(res ==

'-'

|| sv->IsInGap(pos - 1)) {

3774

}

else if

(res ==

'*'

) {

3776

}

else if

(res ==

'X'

) {

3782  string msg

=

"Invalid residue ["

;

3797  if

(seq.

IsAa

() && (leading_x || trailingX > 0)) {

3807

cds_seq = cds_seq.substr(1);

3809

cds_seq = cds_seq.substr(2);

3816  if

(cds_seq.length() >= 3) {

3817  string

lastcodon = cds_seq.substr(cds_seq.length() - 3);

3826

cds_5_prime =

true

;

3838  "Sequence starts with leading X"

, seq);

3843  string msg

=

"Sequence ends in "

+

3845  if

(trailingX > 1) {

3853  "Sequence contains lower-case characters"

, seq);

3856  if

(terminations > 0 || dashes > 0) {

3873  string

protein_label;

3879  if

(! prots.empty()) {

3881

prots[0].GetData().GetProt();

3883

protein_label = first_prot.

GetName

().front();

3887

}

catch

(

const

std::exception&) {

3891

gene_label =

"gene?"

;

3894

protein_label =

"prot?"

;

3898  if

(gap_at_start && dashes == 1) {

3900  "gap symbol at start of protein sequence ("

+ gene_label +

" - "

+ protein_label +

")"

,

3902

}

else if

(gap_at_start) {

3904  "gap symbol at start of protein sequence ("

+ gene_label +

" - "

+ protein_label +

")"

,

3907  "["

+

NStr::SizetToString

(dashes - 1) +

"] internal gap symbols in protein sequence ("

+ gene_label +

" - "

+ protein_label +

")"

,

3911  "["

+

NStr::SizetToString

(dashes) +

"] internal gap symbols in protein sequence ("

+ gene_label +

" - "

+ protein_label +

")"

,

3916  if

(terminations > 0) {

3918  msg

+=

" ("

+ gene_label +

" - "

+ protein_label +

")"

;

3929  bool

is_wgs =

IsWGS

(bsh);

3933  bool

has_gap_char =

false

;

3938  const size_t

run_len_cutoff = ( is_wgs ? 20 : 100 );

3939  for

(

CSeqVector_CI

sv_iter(sv); (sv_iter); ++sv_iter, ++pos) {

3949

has_gap_char =

true

;

3955  if

(run_len >= run_len_cutoff && start_pos > 1) {

3967  "Raw nucleotide should not contain gap characters"

, seq);

3979  string

id_test_label;

3996  if

(seqlen > loclen) {

4001

}

else if

(seqlen < loclen) {

4014  const

list<CRef<CSeq_loc>>& locs = inst.

GetExt

().

GetSeg

().

Get

();

4020

list<CRef<CSeq_loc>>::const_iterator i2 = i1;

4021  for

(++i2; i2 != locs.end(); ++i2) {

4029  if

((**i1).IsWhole() && (**i2).IsWhole()) {

4032  "Segmented sequence has multiple references to "

+

4037  "Segmented sequence has multiple references to "

+

4038

sid +

" that are not SEQLOC_WHOLE"

, seq);

4049  bool

got_partial =

false

;

4051  if

(! (*sd)->IsMolinfo() || ! (*sd)->GetMolinfo().IsSetCompleteness()) {

4055  switch

((*sd)->GetMolinfo().GetCompleteness()) {

4057

got_partial =

true

;

4060  "Complete segmented sequence with MolInfo partial"

, seq);

4066  "No-left inconsistent with segmented SeqLoc"

,

4069

got_partial =

true

;

4074  "No-right inconsistent with segmented SeqLoc"

,

4077

got_partial =

true

;

4082  "No-ends inconsistent with segmented SeqLoc"

,

4085

got_partial =

true

;

4091  if

(! got_partial) {

4093  "Partial segmented sequence without MolInfo partial"

, seq);

4124  if

((*it)->IsSwissprot()) {

4136

}

else if

(

cmp

> 0) {

4142  if

(start1 < start2) {

4144

}

else if

(start2 < start1) {

4151  if

(stop1 < stop2) {

4171

}

else if

((*sg)->IsLoc()) {

4172  const CSeq_id

*

id

= (*sg)->GetLoc().GetId();

4190  if

(! loc.

IsInt

()) {

4199

far_loc->

SetInt

().SetFrom(start - 2);

4200

far_loc->

SetInt

().SetTo(start - 1);

4209

far_loc->

SetInt

().SetFrom(stop + 1);

4210

far_loc->

SetInt

().SetTo(stop + 2);

4228  "Delta seq component should not be of type whole"

, seq);

4235  "Delta component is gi|0"

, seq);

4250  if

(seq_len <= stop) {

4251  string

id_label =

id

->AsFastaString();

4254

+

") greater than length of "

+ id_label

4259  string

id_label =

id

->AsFastaString();

4261  "Scaffold points to some but not all of "

+

4262

id_label +

", excluded portion contains features"

, seq);

4266  "Unable to find far delta sequence component"

, seq);

4269

}

catch

(

const

std::exception&) {

4280  "-1 length on seq-loc of delta seq_ext"

, seq);

4283  if

(loc_str.empty()) {

4288  "Short length (-1) on seq-loc ("

+ loc_str +

") of delta seq_ext"

, seq);

4293  if

(loc_len <= 10) {

4296  if

(loc_str.empty()) {

4302  ") on seq-loc ("

+ loc_str +

") of delta seq_ext"

, seq);

4310  if

(loc_str.empty()) {

4314  "No length for Seq-loc ("

+ loc_str +

") of delta seq-ext"

,

4324

}

else if

(seg.

IsLoc

()) {

4342  "proximity ligation"

,

4360  bool

is_unspec =

false

;

4365  int

linktype = evidence.

GetType

();

4366  if

(linktype == 8) {

4416  "No CDelta_ext data for delta Bioseq"

, seq);

4419  bool

any_tech_ok =

false

;

4420  bool

has_gi =

false

;

4423

any_tech_ok =

true

;

4425

}

else if

((*id_it)->IsGi()) {

4430  if

(! any_tech_ok && seq.

IsNa

()

4444  bool

last_is_gap =

false

;

4445  int

prev_gap_linkage = -1;

4447  int

gap_linkage = -1;

4449  size_t

num_gaps = 0;

4450  size_t

num_adjacent_gaps = 0;

4451  bool

non_interspersed_gaps =

false

;

4453  int

num_gap_known_or_spec = 0;

4454  int

num_gap_unknown_unspec = 0;

4456

vector<CConstRef<CSeq_loc> > delta_locs;

4462  "NULL pointer in delta seq_ext valnode (segment "

+

4466  switch

((**sg).Which()) {

4468  const CSeq_loc

& loc = (**sg).GetLoc();

4470

delta_locs.push_back(

tmp

);

4474  if

(! last_is_gap && !

first

) {

4475

non_interspersed_gaps =

true

;

4477

last_is_gap =

false

;

4478

prev_gap_linkage = -1;

4493  "Seq-lit of length 0 in delta chain"

, seq);

4498  if

(! last_is_gap && !

first

) {

4499

non_interspersed_gaps =

true

;

4501

last_is_gap =

false

;

4502

prev_gap_linkage = -1;

4505

vector<TSeqPos> badIdx;

4507  const string

* ss =

nullptr

;

4508  switch

(

data

.Which()) {

4510

ss = &

data

.GetIupacaa().Get();

4513

ss = &

data

.GetIupacna().Get();

4516

ss = &

data

.GetNcbieaa().Get();

4519  const

vector<char>& c =

data

.GetNcbistdaa().Get();

4520  ITERATE

(vector<TSeqPos>, ci, badIdx) {

4522  "Invalid residue ["

+

4532  ITERATE

(vector<TSeqPos>, it, badIdx) {

4534  "Invalid residue ["

+

4535

ss->substr(*it, 1) +

"] at position ["

+

4544  if

(max_ns >= 0 && adjacent_ns >

unsigned

(max_ns)) {

4563

num_gap_unknown_unspec++;

4565

num_gap_known_or_spec++;

4579  "First delta seq component is a gap"

, seq);

4583

(prev_gap_type == gap_type ||

4584

prev_gap_linkage != gap_linkage ||

4587

++num_adjacent_gaps;

4596  "Gap of length 0 in delta chain"

, seq);

4599  "Gap of length 0 with unknown fuzz in delta chain"

, seq);

4604  "Gap of unknown length should have length 100"

, seq);

4607

last_is_gap =

true

;

4608

prev_gap_type = gap_type;

4609

prev_gap_linkage = gap_linkage;

4616  "CDelta_seq::Which() is e_not_set"

, seq);

4620  if

(num_gap_unknown_unspec > 0 && num_gap_known_or_spec == 0) {

4621  if

(num_gap_unknown_unspec > 1) {

4624  " Seq-gaps have unknown type and unspecified linkage"

, seq);

4627  "Single Seq-gap has unknown type and unspecified linkage"

, seq);

4642  if

(non_interspersed_gaps && ! has_gi && mi &&

4656  "HTGS delta seq should have gaps between all sequence runs"

, seq);

4658  if

(num_adjacent_gaps >= 1) {

4659  string msg

= (num_adjacent_gaps == 1) ?

4660  "There is 1 adjacent gap in delta seq"

:

4662  " adjacent gaps in delta seq"

;

4672  "Last delta seq component is a gap"

, seq);

4676  if

(num_gaps == 0 && mi) {

4681  "HTGS 2 delta seq has no gaps and no graphs"

, seq);

4686  if

(delta_locs.size() > 1) {

4688

vector<CConstRef<CSeq_loc>>::iterator it1 = delta_locs.begin();

4689

vector<CConstRef<CSeq_loc>>::iterator it2 = it1;

4691  while

(it2 != delta_locs.end()) {

4694  string

seq_label = (*it1)->GetId()->AsFastaString();

4700

+

" on a Bioseq "

+ seq_label,

4710  "Self-referential delta sequence"

, seq);

4719  if

(delta_i->Empty()) {

4727  if

(res ==

'N'

&& ! sv.

IsInGap

(pos - 1)) {

4729  "Ambiguous residue N is adjacent to a gap around position "

+

NStr::SizetToString

(pos + 1),

4734  if

(delta_len > 0 && pos + delta_len <

len

) {

4735  if

(sv.

IsInGap

(pos + delta_len - 1)) {

4737  if

(res ==

'N'

&& ! sv.

IsInGap

(pos + delta_len)) {

4739  "Ambiguous residue N is adjacent to a gap around position "

+

NStr::SizetToString

(pos + delta_len + 1),

4747

}

catch

(

const

std::exception&) {

4756  bool

has_gi =

false

;

4758  if

((*id_it)->IsGi()) {

4771  int

linkevarray[13];

4772  for

(

int i

= 0;

i

< 13;

i

++) {

4773

linkevarray[

i

] = 0;

4775  bool

is_unspec =

false

;

4780  int

linktype = evidence.

GetType

();

4781  if

(linktype == 8) {

4785  if

(linktype == 255) {

4786

(linkevarray[11])++;

4787

}

else if

(linktype < 0 || linktype > 10) {

4788

(linkevarray[12])++;

4790

(linkevarray[linktype])++;

4794  "Seq-gap type should not be within-clone for genome submission"

, seq);

4797  if

(linkevarray[8] > 0 && linkcount > linkevarray[8]) {

4799  "Seq-gap type has unspecified and additional linkage evidence"

, seq);

4801  for

(

int i

= 0;

i

< 13;

i

++) {

4802  if

(linkevarray[

i

] > 1) {

4804  string

(

"Linkage evidence '"

) +

linkEvStrings

[

i

] +

"' appears "

+

4810  "Seq-gap with linkage evidence must have linkage field set to linked"

, seq);

4813  int

gaptype = gap.

GetType

();

4821  if

(linkevarray[8] > 0 && linkcount == linkevarray[8]) {

4825  "Contamination gaps must have linkage evidence 'unspecified'"

, seq);

4830  " should not have linkage evidence"

, seq);

4836  int

gaptype = gap.

GetType

();

4839  "Seq-gap type == scaffold is missing required linkage evidence"

, seq);

4842  bool

suppress_SEQ_INST_SeqGapProblem =

false

;

4846  if

((**it).IsCreate_date())

4850

suppress_SEQ_INST_SeqGapProblem =

true

;

4855  if

(! suppress_SEQ_INST_SeqGapProblem)

4857  "Seq-gap type == repeat and linkage == linked is missing required linkage evidence"

, seq);

4862  "Contamination gap-types must be linked and have linkage-evidence of type 'unspecified'"

, seq);

4879

rpr =

"constructed"

;

4881  const string

err0 =

"Bioseq-ext not allowed on "

+ rpr +

" Bioseq"

;

4882  const string

err1 =

"Missing or incorrect Bioseq-ext on "

+ rpr +

" Bioseq"

;

4883  const string

err2 =

"Missing Seq-data on "

+ rpr +

" Bioseq"

;

4884  const string

err3 =

"Seq-data not allowed on "

+ rpr +

" Bioseq"

;

4953  "Invalid Bioseq->repr = "

+

4976  if

(! it->IsLoc())

continue

;

4978  if

(! hdl)

continue

;

4980  if

(! ci)

continue

;

4986  if

(parent_location == cgenome)

break

;

5036  "Transgenic source descriptor requires presence of source feature"

,

5043  "Genome difference between parent and component"

,

5050  "Mitochondrial Metazoan sequences should be less than 65000 bp"

,

5060  "A genomic sequence should not have uncultured in its organism name"

,

5074  "No Mol-info applies to this Bioseq"

,

5092  if

(all_feat_it->IsSetCit() || all_feat_it->GetData().IsPub()) {

5105  if

(closest_molinfo) {

5108  "Suspicious use of complete"

,

ctx

, *closest_molinfo);

5111  "Suspicious use of complete"

, seq);

5123  if

(! seq.

IsNa

()) {

5138

sequence::CDeflineGenerator defline_generator;

5139

title = defline_generator.GenerateDefline(seq, *

m_Scope

, sequence::CDeflineGenerator::fIgnoreExisting);

5148  bool

reported =

false

;

5152  bool

is_gb =

false

;

5154  if

((*it)->IsGenbank()) {

5165  "Circular topology has complete flag set, but title should say complete sequence or complete genome"

,

5240  if

(it->IsSeq() && it->GetSeq().IsSetInst_Repr() &&

5242

parent = it->GetSeq();

5275

vector<CConstRef<CSeq_feat>> containing_genes;

5276

vector<int> num_contained;

5279

vector<CConstRef<CSeq_feat>>::iterator cit = containing_genes.begin();

5280

vector<int>::iterator nit = num_contained.begin();

5281  while

(cit != containing_genes.end() && nit != num_contained.end()) {

5287  if

(n_right < left) {

5295

cit = containing_genes.erase(cit);

5296

nit = num_contained.erase(nit);

5303  const CSeq_feat

& ft = fi->GetOriginalFeature();

5306

containing_genes.push_back(ref);

5307

num_contained.push_back(0);

5310

vector<CConstRef<CSeq_feat>>::iterator cit = containing_genes.begin();

5311

vector<int>::iterator nit = num_contained.begin();

5312  while

(cit != containing_genes.end() && nit != num_contained.end()) {

5321

}

catch

(

const

exception& e) {

5323  string

(

"Exception while validating bioseq MultipleGeneOverlap. EXCEPTION: "

) +

5331  string msg

(

"gene ["

);

5336  msg

+=

"] overlaps CDS but does not completely contain it"

;

5345  msg

+=

"] overlaps mRNA but does not completely contain it"

;

5368  if

(!

label

.empty()) {

5376  if

(!

label

.empty()) {

5392  if

(connected_gene) {

5400

overlap_type,

m_Scope

) < 0) {

5435  if

((*db)->CanGetDb() &&

5447  if

(vec.

IsInGap

(pos) || vec[pos] ==

'N'

) {

5467  if

(pos <

len

- after && vec.

IsInGap

(pos + after)) {

5487  if

(pos >= before && vec.

IsInGap

(pos - before)) {

5510  for

(

CSeq_loc_CI

sl_iter(loc); sl_iter; ++sl_iter) {

5517  if

(

first

.GetStrand() !=

last

.GetStrand()) {

5540  if

(start >=

len

|| stop >=

len

) {

5545  swap

(acceptor, donor);

5546

stop =

len

- donor - 1;

5547

start =

len

- acceptor - 1;

5570

}

catch

(exception&) {

5580  if

((res1 ==

'G'

&& res2 ==

'T'

) ||

5581

(res1 ==

'G'

&& res2 ==

'C'

)) {

5587

}

catch

(exception&) {

5596  if

((res1 ==

'A'

) && (res2 ==

'G'

)) {

5602

}

catch

(exception&) {

5615  switch

(partial_type) {

5679

vector<CMappedFeat> children = tr->GetChildren(feat);

5680  ITERATE

(vector<CMappedFeat>, it, children) {

5690  bool

look_for_gene =

true

;

5699

vector<CMappedFeat> cds_children = tr->GetChildren(feat);

5700  if

(cds_children.size() > 0) {

5701

look_for_gene =

false

;

5702  for

(

auto

it = cds_children.begin(); it != cds_children.end(); it++) {

5719  if

(! rval && look_for_gene) {

5739  bool

bad_seq =

false

;

5740  bool

is_gap =

false

;

5741  bool

abuts_n =

false

;

5749 #ifdef USE_FEAT_TREE_FOR_EXON 5772  const CSeq_loc

& mrna_loc = s->second->GetLocation();

5801  "PartialLocation: Improper use of partial (greater than or less than)"

, feat);

5820  "PartialLocation: Internal partial intervals do not include first/last residue of sequence"

, feat);

5847  if

(! partial_start && ! partial_stop) {

5869  if

(intron_start == stop + 1 && partial_stop) {

5872  if

(intron_start > stop + 1) {

5875  if

(start > 0 && partial_start) {

5877  if

(intron_stop == start - 1) {

5897  string msg

= (partial_type == 0 ?

"Start"

:

"Stop"

);

5898  msg

+=

" does not include first/last residue of "

;

5901  bool

organelle =

false

;

5902  bool

not_expected =

false

;

5903  if

(at_splice_or_gap) {

5909  msg

+=

"organelle "

;

5913

not_expected =

true

;

5923  msg

+=

" (organelle does not use standard splice site convention)"

;

5926  msg

+=

" (but is at consensus splice site)"

;

5935  if

(partial_type == 0) {

5938

}

else if

(organelle) {

5946

}

else if

(organelle) {

5968  "Feature products should be entire sequences."

, *(feat.

GetSeq_feat

()));

5989  bool

no_nonconsensus_except =

true

;

5993  if

(

NStr::Find

(except_text,

"nonconsensus splice site"

) != string::npos ||

5994  NStr::Find

(except_text,

"heterogeneous population sequenced"

) != string::npos ||

5995  NStr::Find

(except_text,

"low-quality sequence region"

) != string::npos ||

5996  NStr::Find

(except_text,

"artificial location"

) != string::npos) {

5997

no_nonconsensus_except =

false

;

6002  string

comment_text;

6009  "Partial CDS on complete sequence"

,

6015  for

(

int

j = 0; j < 2; ++j) {

6016  if

(partial_loc & errtype) {

6017  bool

bad_seq =

false

;

6018  bool

is_gap =

false

;

6019  bool

abuts_n =

false

;

6035

}

else if

(bad_seq) {

6038  "PartialLocation: Start does not include first/last residue of sequence (and is at bad sequence)"

:

6039  "PartialLocation: Stop does not include first/last residue of sequence (and is at bad sequence)"

),

6043

&&

NStr::Find

(except_text,

"rearrangement required for product"

) != string::npos) {

6046  NStr::Find

(comment_text,

"coding region disrupted by sequencing gap"

) != string::npos) {

6050

}

else if

(! no_nonconsensus_except) {

6061  "5' partial is not at beginning of sequence, gap, or consensus splice site"

,

6065  "3' partial is not at end of sequence, gap, or consensus splice site"

,

6074  "Start does not include first/last residue of sequence"

, *(feat.

GetSeq_feat

()));

6075

}

else if

(j == 1) {

6077  "Stop does not include first/last residue of sequence"

, *(feat.

GetSeq_feat

()));

6112  const CBioseq

& seq,

bool

is_complete)

6117  if

((*it)->IsGenbank()) {

6118  if

((*it)->GetGenbank().IsSetAccession()) {

6119

accession = (*it)->GetGenbank().GetAccession();

6122

}

else if

((*it)->IsDdbj()) {

6123  if

((*it)->GetDdbj().IsSetAccession()) {

6124

accession = (*it)->GetDdbj().GetAccession();

6127

}

else if

((*it)->IsGi()) {

6133  unsigned int

nummrna = 0, numcds = 0, numcrgn = 0, numvseg = 0, numdseg = 0, numjseg = 0;

6134  int

numgene = 0, num_pseudomrna = 0, num_pseudocds = 0, num_rearrangedcds = 0;

6135

vector< CConstRef < CSeq_id > > cds_products, mrna_products;

6137  int

num_full_length_prot_ref = 0;

6144  bool

is_emb =

false

, non_pseudo_16S_rRNA =

false

;

6147  if

((*seq_it)->IsEmbl()) {

6149

}

else if

((*seq_it)->IsOther()) {

6154  int

firstcdsgencode = 0;

6155  bool

mixedcdsgencodes =

false

;

6159  const CSeq_feat

& feat = fi->GetOriginalFeature();

6168  string

locus = gene_ref.

GetLocus

();

6171  const CSeq_feat

& gene_feat = gene_it->GetOriginalFeature();

6178  "locus collides with locus_tag in another gene"

, feat);

6188

cds_products.push_back(ref);

6197

num_rearrangedcds++;

6206  if

((*it)->IsId()) {

6207

cdsgencode = (*it)->GetId();

6210  if

(cdsgencode != 0) {

6211  if

(firstcdsgencode == 0) {

6212

firstcdsgencode = cdsgencode;

6213

}

else if

(firstcdsgencode != cdsgencode) {

6214

mixedcdsgencodes =

true

;

6224

mrna_products.push_back(ref);

6240

non_pseudo_16S_rRNA =

true

;

6270

num_full_length_prot_ref++;

6279  "Genes on protein sequences with PGAP annotation should not have locus tags."

, feat);

6285  "Invalid feature for a protein Bioseq."

, feat);

6300  bool

slippage_except =

false

;

6301  bool

circular_rna =

false

;

6309  if

((! excpet || ! slippage_except) && ! circular_rna) {

6312  "Multi-interval CDS feature is invalid on an mRNA " 6322  "mRNA feature is invalid on an mRNA (cDNA) Bioseq."

,

6328  if

(imp.

GetKey

() ==

"intron"

) {

6330  "Invalid feature for an mRNA Bioseq."

, feat);

6341  "Feature has 'far' location - accession not packaged in record"

,

6353  bool

isEukaryote =

false

;

6354  bool

isMicrosporidia =

false

;

6361

isEukaryote =

true

;

6363

isMicrosporidia =

true

;

6367  if

(isEukaryote && (! isMicrosporidia) &&

6378  "Improper 16S ribosomal RNA"

,

6385  if

(mixedcdsgencodes) {

6391  "Multiple CDS genetic codes on sequence"

, seq);

6396  if

(is_aa && num_full_length_prot_ref == 0) {

6405  const CSeq_feat

& prot_feat = it->GetOriginalFeature();

6409

(range.

GetFrom

() == 0 && range.

GetTo

() == parent_len - 1)) &&

6413

num_full_length_prot_ref++;

6415

}

catch

(

const

exception&) {

6418

(range.

GetFrom

() == 0 && range.

GetTo

() == parent_len - 1)) &&

6419

(! it->GetData().GetProt().IsSetProcessed() ||

6422

num_full_length_prot_ref++;

6429  if

(is_aa && num_full_length_prot_ref == 0 && ! is_virtual && !

m_Imp

.

IsPDB

()) {

6433  if

(is_aa && num_full_length_prot_ref > 1 && !

SeqIsPatent

(seq)) {

6436

+

" full-length protein features present on protein"

, seq);

6451  bool

cds_products_unique =

true

;

6452  if

(cds_products.size() > 1) {

6453

stable_sort(cds_products.begin(), cds_products.end(),

s_SeqIdCompare

);

6457  bool

mrna_products_unique =

true

;

6458  if

(mrna_products.size() > 1) {

6459

stable_sort(mrna_products.begin(), mrna_products.end(),

s_SeqIdCompare

);

6463  if

(numcds > 0 && nummrna > 1) {

6464  if

(cds_products.size() > 0 && cds_products.size() + num_pseudocds + num_rearrangedcds != numcds) {

6470  if

(cds_products.size() > 0 && (! cds_products_unique)) {

6472  "CDS products are not unique"

, seq);

6474  if

(mrna_products.size() > 0 && mrna_products.size() + num_pseudomrna != nummrna) {

6480  if

(mrna_products.size() > 0 && (! mrna_products_unique)) {

6482  "mRNA products are not unique"

, seq);

6505

}

catch

(

const

exception& e) {

6506  if

(

NStr::Find

(e.what(),

"Error: Cannot resolve"

) == string::npos) {

6508  string

(

"Exception while validating Seqfeat Context. EXCEPTION: "

) +

6526  if

((*id_it)->IsGi()) {

6527  return

(*id_it)->GetGi();

6581  bool

match1 =

false

, match2 =

false

;

6582  bool

has1 =

false

, has2 =

false

;

6584  if

((*itx)->IsSetId()) {

6593  if

((*itx)->IsSetId()) {

6601  if

((has1 || has2) && (! match1 || ! match2)) {

6620  if

(

id

.GetGi() == gi) {

6628

}

catch

(

const

std::exception&) {

6642  bool match

=

false

;

6674

m_HasMatch(

false

),

6721  const CSeq_loc

& utr5_loc = s->second->GetLocation();

6724  if

(utr5_start == mrna_start) {

6725  if

(mrna_stop >= utr5_stop && mrna_stop - utr5_stop < 6) {

6727

}

else if

(utr5_stop >= mrna_stop && utr5_stop - mrna_stop < 6) {

6736  const CSeq_loc

& utr3_loc = s->second->GetLocation();

6739  if

(utr3_stop == mrna_stop) {

6740  if

(mrna_start >= utr3_start && mrna_start - utr3_start < 6) {

6742

}

else if

(utr3_start >= mrna_start && utr3_start - mrna_start < 6) {

6758

m_IsPseudo(

false

),

6759

m_NeedsMatch(

true

),

6760

m_ProductsUnique(

true

)

6816  if

(unmatched_mrnas.

empty

()) {

6821  if

(! (*xref_it)->IsSetId() ||

6822

! (*xref_it)->GetId().IsLocal()) {

6826  ITERATE

(vector<CSeq_feat_Handle>, h, handles) {

6831  if

(mrna_it != unmatched_mrnas.

end

()) {

6834

unmatched_mrnas.

erase

(mrna_it);

6846  if

(unmatched_mrnas.

empty

()) {

6854  if

(feats.size() == 0) {

6856  while

(mrna_it != unmatched_mrnas.

end

()) {

6857  if

(

Overlaps

(mrna_it->second->GetSeqfeat())) {

6860

unmatched_mrnas.

erase

(mrna_it);

6868  if

(mrna_it != unmatched_mrnas.

end

()) {

6874

unmatched_mrnas.

erase

(mrna_it);

6887  string

product_string;

6890  return

product_string;

6895  return

product_string;

6901

list<CConstRef<CSeq_feat>>::iterator it =

m_OtherMrnas

.begin();

6902

list<string> product_strings;

6905  if

(mrna_it == unmatched_mrnas.

end

()) {

6909

product_strings.push_back(product_string);

6919  const auto

num_products = product_strings.size();

6920  if

(product_strings.size() > 1) {

6922

product_strings.sort();

6923

product_strings.unique();

6924  const auto

num_unique_products = product_strings.size();

6925  if

(num_unique_products == num_products) {

6968  if

(num_mrnas < 2) {

6974

+

" mRNAs, but product locations are unique"

,

6993  const auto

& cds_feat = cds_match.

GetSeqfeat

();

6998  if

(xrefs_match == 2) {

7000  "MrnaProteinLink inconsistent with feature ID cross-references"

,

7008  size_t

start_pos =

NStr::Find

(protein_id,

"gnl|"

);

7009  if

(start_pos == string::npos) {

7012

start_pos =

NStr::Find

(protein_id,

"|"

, start_pos + 5);

7013  if

(start_pos == string::npos) {

7016  size_t

end_pos =

NStr::Find

(protein_id,

"|"

, start_pos + 1);

7018  if

(end_pos == string::npos) {

7019

prot_tag = protein_id.substr(start_pos + 1);

7021

prot_tag = protein_id.substr(start_pos + 1, end_pos - start_pos - 1);

7044  "CDS-mRNA pair has one missing protein_id ("

+ protein_id +

")"

, cds);

7052  "CDS-mRNA pair has mismatching protein_ids ("

+

7053

product_id.

AsFastaString

() +

", "

+ protein_id +

")"

, cds);

7060  if

((*id_it)->IsGeneral()) {

7063  "CDS-mRNA pair has mismatching protein_ids ("

+

7064

(*id_it)->AsFastaString() +

", "

+ protein_id +

")"

, cds);

7078  if

(

NStr::Equal

(protein_id, (*id_it)->AsFastaString())) {

7087  "CDS-mRNA pair has one missing protein_id ("

+ protein_id +

")"

, cds);

7097  const auto

& cds_feat = cds_match.

GetSeqfeat

();

7098  string

cds_transcript_id;

7099  string

mrna_transcript_id;

7100  string

mrna_protein_id;

7101  bool

must_reconcile =

false

;

7102  if

(mrna_feat.IsSetQual()) {

7104  if

((*q)->IsSetQual() && (*q)->IsSetVal()) {

7106

mrna_transcript_id = (*q)->GetVal();

7107

must_reconcile =

true

;

7109

mrna_protein_id = (*q)->GetVal();

7110

must_reconcile =

true

;

7115  if

(cds_feat.IsSetQual()) {

7117  if

((*q)->IsSetQual() && (*q)->IsSetVal()) {

7119

cds_transcript_id = (*q)->GetVal();

7120

must_reconcile =

true

;

7126  if

(must_reconcile) {

7127  if

(!

NStr::Equal

(mrna_transcript_id, cds_transcript_id)) {

7129  "CDS-mRNA pair has mismatching transcript_ids (" 7130

+ cds_transcript_id +

","

+ mrna_transcript_id +

")"

,

7190  for

(

auto

it : feat.

GetQual

()) {

7218  if

(

data

.IsGene()) {

7276  if

(strand1 == strand2) {

7283  "No parent for (pseudo) CdRegion"

, ft1);

7286  "No parent for CdRegion"

, ft1);

7294  "No parent for (pseudo) CdRegion"

, ft2);

7297  "No parent for CdRegion"

, ft2);

7311  unsigned int

lclcds = 0, lclcrgn = 0, lclvseg = 0, lcldseg = 0, lcljseg = 0, lclnone = 0, lclothr = 0;

7318  if

(sbt == CSeqFeatData::ESubtype::eSubtype_cdregion) {

7325  if

(ptyp == CSeqFeatData::ESubtype::eSubtype_C_region) {

7327

}

else if

(ptyp == CSeqFeatData::ESubtype::eSubtype_V_segment) {

7329

}

else if

(ptyp == CSeqFeatData::ESubtype::eSubtype_D_segment) {

7331

}

else if

(ptyp == CSeqFeatData::ESubtype::eSubtype_J_segment) {

7349

locus = gene.GetLocus();

7351

locus = gene.GetLocus_tag();

7353

CConstRef<CSeq_loc> gloc = gne->GetMappedLocation();

7356

gloc->GetLabel(&locus);

7361  if

(locus.length() > 0) {

7362

PostErr(eDiag_Warning, eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,

7363  "No parent for CdRegion (gene is "

+ locus +

")"

, *sf);

7365

PostErr(eDiag_Warning, eErr_SEQ_FEAT_CDSdoesNotMatchVDJC,

7366  "No parent for CdRegion"

, *sf);

7389

list<CRef<CCdsMatchInfo>> cds_list;

7394  for

(

const auto

& mapped_feat : *

m_AllFeatIt

) {

7395  if

(! mapped_feat.IsSetData()) {

7399  if

(mapped_feat.GetData().IsCdregion()) {

7400  const auto

& cds_feat = *mapped_feat.GetSeq_feat();

7405  if

(cds_feat.IsSetPseudo() && cds_feat.GetPseudo()) {

7406

cds_match->SetPseudo();

7414

cds_list.push_back(cds_match);

7416  const auto

& feat = *mapped_feat.GetSeq_feat();

7421  if

(! mrna_map.

empty

()) {

7425  const size_t

num_mrna = mrna_map.

size

();

7428  for

(

auto

&& cds : cds_list) {

7429

cds->AssignXrefMatch(mrna_map, tse);

7432  if

(! mrna_map.

empty

()) {

7433  for

(

auto

&& cds : cds_list) {

7434  if

(! cds->HasMatch()) {

7435

cds->AssignOverlapMatch(mrna_map, *

m_Scope

);

7440  int

num_matched_cds = 0;

7441  int

num_unmatched_cds = 0;

7442  for

(

auto

&& cds : cds_list) {

7450  if

(cds->IsPseudo() ||

7451

(cds->GetSeqfeat().IsSetExcept() &&

7452

cds->GetSeqfeat().IsSetExcept_text() &&

7453  NStr::Find

(cds->GetSeqfeat().GetExcept_text(),

"rearrangement required for product"

) != string::npos)) {

7454

cds->NeedsMatch(

false

);

7458  if

(cds->HasMatch()) {

7461

++num_unmatched_cds;

7467  if

(num_unmatched_cds > 0 &&

7469  if

(num_unmatched_cds >= 10) {

7470  const auto

nmcds = num_matched_cds + num_unmatched_cds;

7474

+

" CDSs unmatched"

,

7477  for

(

const auto

& cds : cds_list) {

7478  if

(! cds->HasMatch() && cds->NeedsMatch()) {

7480  "Unmatched CDS"

, cds->GetSeqfeat());

7487  size_t

num_unmatched_mrna = 0;

7496

num_unmatched_mrna++;

7501  if

(num_unmatched_mrna > 10) {

7509  "No CDS location match for 1 mRNA"

, it->second->GetSeqfeat());

7523

TFeatCount cds_count, mrna_count;

7536  const CSeq_feat

& feat = it->GetOriginalFeature();

7541  if

(cds_count.find(gene) == cds_count.end()) {

7542

cds_count[gene] = mrna_count[gene] = 0;

7558  ITERATE

(TFeatCount, it, cds_count) {

7559  size_t

cds_num = it->second,

7560

mrna_num = mrna_count[it->first];

7561  if

(cds_num > 0 && mrna_num > 1 && cds_num != mrna_num) {

7565  ") count for gene"

, *it->first);

7592  const size_t

num_cds = cd_region_feats.size();

7602

strand = cd_region_feats.back().GetLocation().GetStrand();

7605  bool

is_mrna =

false

;

7621  "CDS should not be on minus strand of mRNA molecule"

, cdregion_it->GetOriginalFeature());

7626  if

(is_mrna || (num_cds == 1 && num_gene < 2)) {

7636  bool

first_cds =

true

;

7639

vector<CCacheImpl::SFeatKey> featKeys;

7646

featKeys.push_back(multi_feat_key_template);

7648

featKeys.push_back(multi_feat_key_template);

7650

featKeys.push_back(multi_feat_key_template);

7652

featKeys.push_back(multi_feat_key_template);

7666  "3'UTR is not on minus strand"

, cug_it->GetOriginalFeature());

7667

}

else if

(utr5_right > 0 && utr5_right + 1 != this_left) {

7669  "Previous 5'UTR does not abut next 3'UTR"

, cug_it->GetOriginalFeature());

7671

utr3_right = this_right;

7673  if

(utr3_right > 0 && utr3_right + 1 != this_left) {

7675  "CDS does not abut 3'UTR"

, cug_it->GetOriginalFeature());

7678

cds_right = this_right;

7682  "5'UTR is not on minus strand"

, cug_it->GetOriginalFeature());

7683

}

else if

(cds_right > 0 && cds_right + 1 != this_left) {

7685  "5'UTR does not abut CDS"

, cug_it->GetOriginalFeature());

7687

utr5_right = this_right;

7699  "5'UTR is not on plus strand"

, cug_it->GetOriginalFeature());

7700

}

else if

(utr3_right > 0 && utr3_right + 1 != this_left) {

7702  "Previous 3'UTR does not abut next 5'UTR"

, cug_it->GetOriginalFeature());

7704

utr5_right = this_right;

7706  if

(utr5_right > 0 && utr5_right + 1 != this_left && first_cds ) {

7709  "5'UTR does not abut CDS"

, cug_it->GetOriginalFeature());

7712

cds_right = this_right;

7716  "3'UTR is not on plus strand"

, cug_it->GetOriginalFeature());

7717

}

else if

(cds_right > 0 && cds_right + 1 != this_left && num_3utr == 1) {

7719  "CDS does not abut 3'UTR"

, cug_it->GetOriginalFeature());

7721  if

(is_mrna && num_cds == 1 && num_3utr == 1 && this_right != (

int

) seq.

GetBioseqLength

() - 1) {

7723  "3'UTR does not extend to end of mRNA"

, cug_it->GetOriginalFeature());

7749  if

(!

rna

.IsSetType()) {

7751

}

else if

(!

rna

.IsSetExt()) {

7754  const string

& product =

rna

.GetExt().GetName();

7778  if

(

rna

.GetExt().IsName()) {

7779

product =

rna

.GetExt().GetName();

7783

&& (*it)->IsSetVal() && !

NStr::IsBlank

((*it)->GetVal())) {

7784

product = (*it)->GetVal();

7789

}

else if

(

rna

.GetExt().IsGen()) {

7790  if

(

rna

.GetExt().GetGen().IsSetProduct()) {

7791

product =

rna

.GetExt().GetGen().GetProduct();

7821  if

(start < 0 || (

unsigned int

) stop >= seq.

GetInst_Length

() || start > stop) {

7828  if

((*it)->IsLiteral()) {

7829

this_len = (*it)->GetLiteral().GetLength();

7830

}

else if

((*it)->IsLoc()) {

7833  if

((*it)->IsLiteral() &&

7834

(! (*it)->GetLiteral().IsSetSeq_data() || (*it)->GetLiteral().GetSeq_data().IsGap())) {

7915  "Inconsistent strands for rRNA components"

,

7921

}

else if

(right1 + 1 < left2) {

7928  "ITS does not abut adjacent rRNA component"

,

7934  "ITS does not abut adjacent rRNA component"

,

7938

}

else if

(right1 + 1 > left2) {

7944  "ITS overlaps adjacent rRNA component"

,

7954  "ITS overlaps adjacent rRNA component"

,

7962

}

else if

(! is_organelle) {

7973  "Problem with order of abutting rRNA components"

,

7985  "Problem with order of abutting rRNA components"

,

8010  if

(! same_annot && ! same_label) {

8031  if

(g1 && g2 && g1 != g2) {

8039

&&

prev

.IsSetPartial() &&

prev

.GetPartial()) {

8048

&&

prev

.IsSetPseudo() &&

prev

.GetPseudo()) {

8099  "Duplicate feature"

, feat2);

8109  "Features have identical intervals, but labels differ"

,

8117  "Duplicate feature (packaged in different feature table)"

,

8124  "Features have identical intervals, but labels " 8125  "differ (packaged in different feature table)"

,

8160  string msg

=

"Signal, Transit, or Mature peptide features overlap"

;

8172

cds_loc = cds_loc.substr(8);

8179

cds_loc =

" (parent CDS is on "

+ cds_loc +

")"

;

8183

}

catch

(

const

exception&) {

8186  if

(! reported_last_peptide) {

8196

reported_last_peptide =

true

;

8198

reported_last_peptide =

false

;

8212  bool

fruit_fly =

false

;

8213  bool

viral =

false

;

8239

CCacheImpl::TFeatValue::const_iterator curr_it = prev_it;

8244  for

(; curr_it !=

m_AllFeatIt

->end(); ++curr_it) {

8247  if

(curr_start > prev_end) {

8256

CCacheImpl::TFeatValue::const_iterator prev_prot =

m_AllFeatIt

->begin();

8258

CCacheImpl::TFeatValue::const_iterator curr_prot = prev_prot;

8260  bool

reported_last_peptide =

false

;

8261  for

(; curr_prot !=

m_AllFeatIt

->end(); ++prev_prot, ++curr_prot) {

8265

}

catch

(

const

exception& e) {

8266  if

(

NStr::Find

(e.what(),

"Error: Cannot resolve"

) == string::npos) {

8268  string

(

"Exception while validating duplicate/overlapping features. EXCEPTION: "

) +

8276

vector<int> intervalpoints;

8281  if

(part.

IsInt

()) {

8283

intervalpoints.push_back(ivl.

GetFrom

());

8284

intervalpoints.push_back(ivl.

GetTo

());

8288

intervalpoints.push_back(ivl.

GetFrom

());

8289

intervalpoints.push_back(ivl.

GetTo

());

8293

}

else if

(part.

IsPnt

()) {

8295

intervalpoints.push_back(pnt.

GetPoint

());

8296

intervalpoints.push_back(pnt.

GetPoint

());

8301  return

intervalpoints;

8309  while

(feat_ci_dup) {

8316  const CSeq_loc

& part = curr.GetEmbeddingSeq_loc();

8317  if

(part.

IsInt

()) {

8321

}

else if

(part.

IsPnt

()) {

8328  if

(start + 1 == fr && stop - 1 == to) {

8331  if

(start + 1 == fr && to ==

max

) {

8352  for

(

CFeat_CI

feat_ci(bsh, sel); feat_ci; ++feat_ci) {

8354  const CSeq_feat

& const_feat = feat_ci->GetOriginalFeature();

8359  unsigned len

= (unsigned)intervalpoints.size();

8365  bool

twintron =

true

;

8367  for

(

unsigned

pos = 1; pos <

max

; pos += 2) {

8368  Int4

intL = intervalpoints[pos];

8369  Int4

intR = intervalpoints[pos + 1];

8384  "Multi-interval intron contains possible twintron"

,

8388  "An intron should not have multiple intervals"

,

8393  if

(

NStr::Find

(e.

what

(),

"Error: Cannot resolve"

) == string::npos) {

8405  if

((*db)->CanGetDb()) {

8418  bool

has_local =

false

, has_genbank =

false

;

8419  bool

has_gi =

false

, has_tpa =

false

, has_bankit =

false

, has_smart =

false

;

8422  switch

((*it)->Which()) {

8429

has_genbank =

true

;

8443  if

((*it)->GetGeneral().IsSetDb()) {

8456  if

(has_genbank)

return false

;

8457  if

(has_tpa)

return true

;

8458  if

(has_refseq)

return false

;

8459  if

(has_bankit)

return true

;

8460  if

(has_smart)

return true

;

8461  if

(has_gi)

return false

;

8462  if

(has_local)

return true

;

8470  if

(date.

IsStr

()) {

8471

out_date_str = date.

GetStr

();

8472

}

else if

(date.

IsStd

()) {

8474

date.

GetDate

(&out_date_str,

"%{%3N %{%D, %}%}%Y"

);

8494

vector<string> sc_prefixes;

8502

sc_prefixes.push_back(prefix);

8508  sort

(sc_prefixes.begin(), sc_prefixes.end());

8511  ITERATE

(vector<string>, it, sc_prefixes) {

8517  "Multiple structured comments with prefix "

+ previous,

8526  "Multiple structured comments with prefix "

+ previous,

8617  bool

is_master =

false

;

8628  bool

is_WP =

false

;

8632  switch

(sid.

Which

()) {

8638  const CTextseq_id

& tsid = *

id

->GetTextseq_Id();

8659  bool

embl_or_ddbj =

false

;

8661  if

((*id)->IsEmbl() || (*id)->IsDdbj()) {

8662

embl_or_ddbj =

true

;

8667  return

embl_or_ddbj;

8674  if

((*id)->IsGenbank()) {

8685  if

((*id)->IsOther()) {

8702  "Undesired multiple comment descriptors, identical text"

,

8724  bool

is_nc =

false

;

8725  bool

is_ac =

false

;

8727  if

((*id_it)->IsOther() && (*id_it)->GetOther().IsSetAccession()) {

8728  string

accession = (*id_it)->GetOther().GetAccession();

8738  if

(! is_nc && ! is_ac) {

8773  int

tech = -1, completeness = -1;

8779  bool

is_genome_assembly =

false

;

8780  bool

is_assembly =

false

;

8781  bool

is_finished_status =

false

;

8790  switch

(desc.

Which

()) {

8794  if

(pos != string::npos) {

8797  if

(pos != string::npos) {

8800  if

(pos != string::npos) {

8801  bool

report_fasta_brackets =

true

;

8803  if

((*id_it)->IsGeneral()) {

8804  const CDbtag

& dbtag = (*id_it)->GetGeneral();

8808

report_fasta_brackets =

false

;

8814  if

(report_fasta_brackets) {

8816  const CBioSource

& bsrc = (*bs_ref).GetSource();

8822  if

(pos2 != string::npos) {

8824  if

(pos2 != string::npos) {

8825

report_fasta_brackets =

false

;

8832  if

(report_fasta_brackets) {

8834  "Title may have unparsed [...=...] construct"

,

8845

vector<string> keywords;

8848

keywords.push_back(*

key

);

8855  switch

(desc.

Which

()) {

8859

org = &(desc.

GetOrg

());

8892  if

(! use_ctx || ! use_ctx->

IsSet

()

8898  "Inconsistent create_dates ["

+ current_str +

8899  "] and ["

+ create_str +

"]"

, *use_ctx, desc);

8902

create_desc = &desc;

8916

update_desc = &desc;

8936  if

(

source

.IsSetOrg()) {

8958  "Non-TPA record "

+ id_str +

" should not have TpaAssembly object"

, seq);

8964  "RefGeneTracking object should only be in RefSeq record"

,

8971  bool

found =

false

;

8983  "Structured Comment is non-compliant, keyword should be removed"

,

ctx

, desc);

8988  if

((*field)->IsSetLabel() && (*field)->GetLabel().IsStr()) {

8989  if

(

NStr::EqualNocase

((*field)->GetLabel().GetStr(),

"StructuredCommentPrefix"

)) {

8990  const string

& prefix = (*field)->GetData().GetStr();

8992

is_genome_assembly =

true

;

8994

is_assembly =

true

;

8996

}

else if

(

NStr::EqualNocase

((*field)->GetLabel().GetStr(),

"Current Finishing Status"

)) {

8997  const string

& prefix = (*field)->GetData().GetStr();

8999

is_finished_status =

true

;

9038  if

((*id_it)->IsOther()) {

9065

title = title.substr (11);

9070  "RefSeq nucleotide title does not start with organism name"

,

9073

}

else if

(seq.

IsAa

()) {

9074

taxname =

"["

+ taxname +

"]"

;

9078  "RefSeq protein title does not end with organism name"

,

9096  "Undesired multiple name descriptors, identical text"

,

9100  "Undesired multiple name descriptors, different text"

,

9110  if

(! seq.

IsAa

()) {

9112  "Nucleic acid with protein sequence method"

,

9125  const string

&

buf

= seq.

GetId

().front()->AsFastaString();

9140  "Multiple GenBank blocks"

,

ctx

, *last_gb);

9145  "Multiple EMBL blocks"

,

ctx

, *last_embl);

9150  "Multiple PIR blocks"

,

ctx

, *last_pir);

9155  "Multiple PDB blocks"

,

ctx

, *last_pdb);

9160  "Multiple PRF blocks"

,

ctx

, *last_prf);

9165  "Multiple SWISS-PROT blocks"

,

ctx

, *last_sp);

9183  bool

has_tpa_inf =

false

, has_tpa_exp =

false

;

9186

has_tpa_exp =

true

;

9188

has_tpa_inf =

true

;

9191  if

(has_tpa_inf && has_tpa_exp) {

9193  "TPA:experimental and TPA:inferential should not both be in the same set of keywords"

,

9204  "TSA sequence should not be DNA"

, seq);

9215  int

& last_completeness,

9221  bool

is_synthetic_construct =

false

;

9222  bool

is_artificial =

false

;

9227  if

(! is_synthetic_construct) {

9230  if

(! is_artificial) {

9240  if

(seq_biomol < 0) {

9241

seq_biomol = biomol;

9248  "Nucleic acid with Molinfo = peptide"

,

ctx

, desc);

9253  if

(! is_artificial) {

9255  "Molinfo-biomol = other genetic"

,

ctx

, desc);

9264  "Molinfo-biomol other used"

,

ctx

, desc);

9274  "] used on protein"

,

ctx

, desc);

9276  if

(biomol != seq_biomol) {

9278  "Inconsistent Molinfo-biomol ["

+

9291  "mRNA should be single stranded not double stranded"

,

ctx

, desc);

9294  if

(is_synthetic_construct && ! seq.

IsAa

()) {

9312  "Nucleic acid with protein sequence method"

,

ctx

, desc);

9333  "Protein with nucleic acid sequence method"

,

ctx

, desc);

9359  "HTGS/STS/GSS/WGS sequence should be genomic"

, seq);

9364  "HTGS/STS/GSS/WGS sequence should not be RNA"

, seq);

9372  "EST sequence should be mRNA"

, seq);

9384  bool

has_draft =

false

;

9385  bool

has_prefin =

false

;

9386  bool

has_activefin =

false

;

9387  bool

has_fulltop =

false

;

9397

has_activefin =

true

;

9399

has_fulltop =

true

;

9408  "HTGS 3 sequence should not have HTGS_DRAFT keyword"

, seq);

9412  "HTGS 3 sequence should not have HTGS_PREFIN keyword"

, seq);

9414  if

(has_activefin) {

9416  "HTGS 3 sequence should not have HTGS_ACTIVEFIN keyword"

, seq);

9420  "HTGS 3 sequence should not have HTGS_FULLTOP keyword"

, seq);

9424  if

(last_tech > 0) {

9425  if

(last_tech != tech) {

9434  if

(last_tech > -1) {

9435  if

(last_tech != 0) {

9438

+

"] and [0]"

,

ctx

, desc);

9446  if

(last_completeness > 0) {

9456  if

(last_completeness > -1) {

9457  if

(last_completeness != 0) {

9460

+

"] and [0]"

,

ctx

, desc);

9463

last_completeness = 0;

9468  if

(closest_molinfo) {

9522  const string

& lineage,

9523  const string

& stranded_mol,

9543  "dsRNA virus should be genomic RNA"

,

9562

mssg =

"single-stranded RNA"

;

9564

mssg =

"double-stranded RNA"

;

9566

mssg =

"single-stranded DNA"

;

9568

mssg =

"double-stranded DNA"

;

9570

mssg =

"unknown-stranded RNA"

;

9572

mssg =

"unknown-stranded DNA"

;

9576  "Taxonomy indicates "

+ mssg +

9578  ") is conflicting."

,

9585  const string

& lineage,

9586  const string

& stranded_mol,

9606  "Ambisense virus should be genomic RNA or cRNA"

,

9615  "Retrovirus should be genomic RNA or genomic DNA"

,

9621  bool

negative_strand_virus =

false

;

9622  bool

plus_strand_virus =

false

;

9624

negative_strand_virus =

true

;

9627

plus_strand_virus =

true

;

9629  if

(! negative_strand_virus && ! plus_strand_virus) {

9633  bool

is_synthetic =

false

;

9635

is_synthetic =

true

;

9636

}

else if

(

source

.IsSetOrigin()) {

9641

is_synthetic =

true

;

9645  bool

has_cds =

false

;

9646  bool

has_plus_cds =

false

;

9647  bool

has_minus_cds =

false

;

9653

has_minus_cds =

true

;

9655

has_plus_cds =

true

;

9657  if

(has_minus_cds && has_plus_cds) {

9664  bool

has_minus_misc_feat =

false

;

9665  bool

has_plus_misc_feat =

false

;

9673

has_minus_misc_feat =

true

;

9675

has_plus_misc_feat =

true

;

9678  if

(has_minus_misc_feat && has_plus_misc_feat) {

9685  if

(negative_strand_virus) {

9687  if

(has_minus_cds) {

9690  "Negative-sense single-stranded RNA virus with minus strand CDS should be genomic RNA"

,

9695  if

(has_plus_cds && ! is_synthetic && ! is_ambisense) {

9698  "Negative-sense single-stranded RNA virus with plus strand CDS should be cRNA"

,

9703  if

(has_minus_misc_feat) {

9706  "Negative-sense single-stranded RNA virus with nonfunctional minus strand misc_feature should be genomic RNA"

,

9711  if

(has_plus_misc_feat && ! is_synthetic && ! is_ambisense) {

9714  "Negative-sense single-stranded RNA virus with nonfunctional plus strand misc_feature should be cRNA"

,

9720  if

(plus_strand_virus) {

9722  if

(has_minus_cds) {

9724  "CDS should not be on minus strand of a positive-sense single-stranded RNA virus"

,

9728  if

(! is_synthetic && ! is_ambisense) {

9731  "Positive-sense single-stranded RNA virus should be genomic RNA"

,

9741

{

"root"

,

"dsDNA"

},

9742

{

"Alphasatellitidae"

,

"ssDNA"

},

9743

{

"Anelloviridae"

,

"ssDNA(-)"

},

9744

{

"Bacilladnaviridae"

,

"ssDNA"

},

9745

{

"Bidnaviridae"

,

"ssDNA"

},

9746

{

"Circoviridae"

,

"ssDNA(+/-)"

},

9747

{

"Geminiviridae"

,

"ssDNA(+/-)"

},

9748

{

"Genomoviridae"

,

"ssDNA"

},

9749

{

"Hepadnaviridae"

,

"dsDNA-RT"

},

9750

{

"Inoviridae"

,

"ssDNA(+)"

},

9751

{

"Microviridae"

,

"ssDNA(+)"

},

9752

{

"Nanoviridae"

,

"ssDNA(+)"

},

9753

{

"Ortervirales"

,

"ssRNA-RT"

},

9754

{

"Caulimoviridae"

,

"dsDNA-RT"

},

9755

{

"Parvoviridae"

,

"ssDNA(+/-)"

},

9756

{

"Alphapleolipovirus"

,

"dsDNA; ssDNA"

},

9757

{

"Riboviria"

,

"RNA"

},

9758

{

"Albetovirus"

,

"ssRNA(+)"

},

9759

{

"Alphatetraviridae"

,

"ssRNA(+)"

},

9760

{

"Alvernaviridae"

,

"ssRNA(+)"

},

9761

{

"Amalgaviridae"

,

"dsRNA"

},

9762

{

"Astroviridae"

,

"ssRNA(+)"

},

9763

{

"Aumaivirus"

,

"ssRNA(+)"

},

9764

{

"Avsunviroidae"

,

"ssRNA"

},

9765

{

"Barnaviridae"

,

"ssRNA(+)"

},

9766

{

"Benyviridae"

,

"ssRNA(+)"

},

9767

{

"Birnaviridae"

,

"dsRNA"

},

9768

{

"Botourmiaviridae"

,

"ssRNA(+)"

},

9769

{

"Botybirnavirus"

,

"dsRNA"

},

9770

{

"Bromoviridae"

,

"ssRNA(+)"

},

9771

{

"Caliciviridae"

,

"ssRNA(+)"

},

9772

{

"Carmotetraviridae"

,

"ssRNA(+)"

},

9773

{

"Chrysoviridae"

,

"dsRNA"

},

9774

{

"Closteroviridae"

,

"ssRNA(+)"

},

9775

{

"Cystoviridae"

,

"dsRNA"

},

9776

{

"Deltavirus"

,

"ssRNA(-)"

},

9777

{

"dsRNA viruses"

,

"dsRNA"

},

9778

{

"Endornaviridae"

,

"dsRNA"

},

9779

{

"Flaviviridae"

,

"ssRNA(+)"

},

9780

{

"Hepeviridae"

,

"ssRNA(+)"

},

9781

{

"Hypoviridae"

,

"ssRNA(+)"

},

9782

{

"Idaeovirus"

,

"ssRNA(+)"

},

9783

{

"Kitaviridae"

,

"ssRNA(+)"

},

9784

{

"Leviviridae"

,

"ssRNA(+)"

},

9785

{

"Luteoviridae"

,

"ssRNA(+)"

},

9786

{

"Matonaviridae"

,

"ssRNA(+)"

},

9787

{

"Megabirnaviridae"

,

"dsRNA"

},

9788

{

"Narnaviridae"

,

"ssRNA(+)"

},

9789

{

"Haploviricotina"

,

"ssRNA(-)"

},

9790

{

"Arenaviridae"

,

"ssRNA(+/-)"

},

9791

{

"Coguvirus"

,

"ssRNA(-)"

},

9792

{

"Cruliviridae"

,

"ssRNA(-)"

},

9793

{

"Fimoviridae"

,

"ssRNA(-)"

},

9794

{

"Hantaviridae"

,

"ssRNA(-)"

},

9795

{

"Leishbuviridae"

,

"ssRNA(-)"

},

9796

{

"Mypoviridae"

,

"ssRNA(-)"

},

9797

{

"Nairoviridae"

,

"ssRNA(-)"

},

9798

{

"Peribunyaviridae"

,

"ssRNA(-)"

},

9799

{

"Phasmaviridae"

,

"ssRNA(-)"

},

9800

{

"Banyangvirus"

,

"ssRNA(+/-)"

},

9801

{

"Beidivirus"

,

"ssRNA(-)"

},

9802

{

"Goukovirus"

,

"ssRNA(-)"

},

9803

{

"Horwuvirus"

,

"ssRNA(-)"

},

9804

{

"Hudivirus"

,

"ssRNA(-)"

},

9805

{

"Hudovirus"

,

"ssRNA(-)"

},

9806

{

"Kabutovirus"

,

"ssRNA(-)"

},

9807

{

"Laulavirus"

,

"ssRNA(-)"

},

9808

{

"Mobuvirus"

,

"ssRNA(-)"

},

9809

{

"Phasivirus"

,

"ssRNA(-)"

},

9810

{

"Phlebovirus"

,

"ssRNA(+/-)"

},

9811

{

"Pidchovirus"

,

"ssRNA(-)"

},

9812

{

"Tenuivirus"

,

"ssRNA(-)"

},

9813

{

"Wenrivirus"

,

"ssRNA(-)"

},

9814

{

"Wubeivirus"

,

"ssRNA(-)"

},

9815

{

"Tospoviridae"

,

"ssRNA(+/-)"

},

9816

{

"Wupedeviridae"

,

"ssRNA(-)"

},

9817

{

"Insthoviricetes"

,

"ssRNA(-)"

},

9818

{

"Nidovirales"

,

"ssRNA(+)"

},

9819

{

"Nodaviridae"

,

"ssRNA(+)"

},

9820

{

"Papanivirus"

,

"ssRNA(+)"

},

9821

{

"Partitiviridae"

,

"dsRNA"

},

9822

{

"Permutotetraviridae"

,

"ssRNA(+)"

},

9823

{

"Picobirnaviridae"

,

"dsRNA"

},

9824

{

"Picornavirales"

,

"ssRNA(+)"

},

9825

{

"Pospiviroidae"

,

"ssRNA"

},

9826

{

"Potyviridae"

,

"ssRNA(+)"

},

9827

{

"Quadriviridae"

,

"dsRNA"

},

9828

{

"Reoviridae"

,

"dsRNA"

},

9829

{

"Sarthroviridae"

,

"ssRNA(+)"

},

9830

{

"Sinaivirus"

,

"ssRNA(+)"

},

9831

{

"Solemoviridae"

,

"ssRNA(+)"

},

9832

{

"Solinviviridae"

,

"ssRNA(+)"

},

9833

{

"Togaviridae"

,

"ssRNA(+)"

},

9834

{

"Tombusviridae"

,

"ssRNA(+)"

},

9835

{

"Totiviridae"

,

"dsRNA"

},

9836

{

"Tymovirales"

,

"ssRNA(+)"

},

9837

{

"Virgaviridae"

,

"ssRNA(+)"

},

9838

{

"Virtovirus"

,

"ssRNA(+)"

},

9839

{

"ssRNA viruses"

,

"ssRNA"

},

9840

{

"unclassified ssRNA viruses"

,

"ssRNA"

},

9841

{

"unclassified ssRNA negative-strand viruses"

,

"ssRNA(-)"

},

9842

{

"unclassified ssRNA positive-strand viruses"

,

"ssRNA(+)"

},

9843

{

"unclassified viroids"

,

"ssRNA"

},

9844

{

"DNA satellites"

,

"DNA"

},

9845

{

"RNA satellites"

,

"RNA"

},

9846

{

"Smacoviridae"

,

"ssDNA"

},

9847

{

"Spiraviridae"

,

"ssDNA(+)"

},

9848

{

"Tolecusatellitidae"

,

"ssDNA"

},

9849

{

"unclassified viruses"

,

"unknown"

},

9850

{

"unclassified DNA viruses"

,

"DNA"

},

9851

{

"unclassified archaeal dsDNA viruses"

,

"dsDNA"

},

9852

{

"unclassified dsDNA phages"

,

"dsDNA"

},

9853

{

"unclassified dsDNA viruses"

,

"dsDNA"

},

9854

{

"unclassified ssDNA bacterial viruses"

,

"ssDNA"

},

9855

{

"unclassified ssDNA viruses"

,

"ssDNA"

},

9856

{

"environmental samples"

,

"unknown"

},

9872  for

(

auto

it : moltypes) {

9874  if

(it->GetIval2() == 1) {

9875

(*viral_map)[sName] = it->GetSval();

9897  return "ssRNA(+/-)"

;

9902  return "ssRNA(+/-)"

;

9907  return "ssRNA(+/-)"

;

9912  return "ssRNA(+/-)"

;

9925  if

(s_ViralMap->empty()) {

9926  for

(

const auto

& x : kViralStrandMap) {

9932  for

(

const auto

& x : s_ViralMap.

Get

()) {

9947  if

(new_mod != old_mod) {

9962  int

last_na_mod = -1;

9963  int

last_organelle = -1;

9964  int

last_partialness = -1;

9965  int

last_left_right = -1;

9970

CSeqdesc::TModif::const_iterator it = modif.begin();

9971  while

(it != modif.end()) {

10009

last_left_right = modval;

10025  int

last_na_mol = 0;

10031  if

(! seq.

IsAa

()) {

10033  "Nucleic acid with GIBB-mol = peptide"

,

10040  "GIBB-mol unknown or other used"

,

10044  if

(seq.

IsAa

()) {

10050  if

(last_na_mol != modval) {

10057

last_na_mol = modval;

10073  if

(

source

.CanGetOrigin() &&

10077  if

(

source

.CanGetOrg() &&

source

.GetOrg().CanGetOrgname()) {

10110  const CDate

& update,

10111  const CDate

& create,

10122  string

err_msg =

"Inconsistent create_date ["

;

10123

err_msg += create_str;

10124

err_msg +=

"] and update_date ["

;

10125

err_msg += update_str;

10130

err_msg, *

ctx

, desc);

10143  bool

is_wp =

false

;

10145  const CSeq_id

& sid = **sid_itr;

10150  if

(acc ==

"WP_"

) {

10158  "Inconsistent organism names ["

+ this_org.

GetTaxname

() +

10189  const string

&

type

)

10194  bool first

=

true

;

10195  bool

reported_first =

false

;

10196  bool

lastIsSplit =

false

;

10197  const string

* strp =

nullptr

;

10202

strp = &(it->first);

10212

message =

"Colliding "

+

type

+

" in gene features"

;

10214

message =

"Colliding "

+

type

+

" (with different capitalization) in gene features"

;

10220  bool

suppress_message =

false

;

10224

it->second->IsSetExcept() && it->second->IsSetExcept_text()

10225

&&

NStr::FindNoCase

(it->second->GetExcept_text(),

"trans-splicing"

) != string::npos) {

10227

suppress_message =

true

;

10231  if

(suppress_message) {

10234

(*it->second).GetLocation(),

10238

message +

", but feature locations are identical"

, *it->second);

10239

}

else if

(! is_gene_locus) {

10245  if

(! suppress_message && ((! isSplit) || (! lastIsSplit))) {

10246  if

(! reported_first) {

10249

reported_first =

true

;

10257

strp = &(it->first);

10278  const CSeq_feat

& feat = fi->GetOriginalFeature();

10304  if

(gene_it != locus_map.

end

()) {

10305  bool

found =

false

;

10314  "gene synonym has same value ("

+ syngene_it->first +

") as locus of another gene feature"

,

10315

*syngene_it->second);

10322

}

catch

(

const

exception& e) {

10323  if

(

NStr::Find

(e.what(),

"Error: Cannot resolve"

) == string::npos) {

10325  string

(

"Exception while validating colliding genes. EXCEPTION: "

) +

10334  if

(! seq.

IsNa

()) {

10339  bool

embl_ddbj =

false

;

10341  if

((*id)->IsDdbj() || (*id)->IsEmbl()) {

10352  bool

complete_genome =

false

;

10358

sequence::CDeflineGenerator defline_generator;

10359

title = defline_generator.GenerateDefline(seq, *

m_Scope

, sequence::CDeflineGenerator::fIgnoreExisting);

10363  if

(! complete_genome) {

10370

complete_genome =

true

;

10377  if

(! complete_genome) {

10383  if

(!

si

|| !

si

->GetSource().IsSetDivision() ||

si

->GetSource().GetDivision() !=

"BCT"

) {

10388  bool

bioproject_accession_set =

false

;

10391  if

(ui->GetUser().IsSetData() && ui->GetUser().IsSetType() && ui->GetUser().GetType().IsStr() &&

NStr::EqualCase

(ui->GetUser().GetType().GetStr(),

"DBLink"

)) {

10392

bioproject_accession_set = ! ui->GetUser().GetData().empty();

10397  if

(bioproject_accession_set)

10401  bool

no_gaps =

true

;

10405  if

(

delta

.IsSet()) {

10409  if

((*part)->IsLiteral()) {

10416  if

(

literal

.IsSetSeq_data() &&

literal

.GetSeq_data().IsGap()) {

10436  "No BioProject Accession exists for what appears to be a complete genome"

,

10444  const CSeq_id

* gb_id =

nullptr

;

10449  const CDbtag

* general_id =

nullptr

;

10452  switch

((*id)->Which()) {

10454

gb_id =

id

->GetPointer();

10458

gi = (*id)->GetGi();

10462

general_id = &((*id)->GetGeneral());

10470  if

(gi ==

ZERO_GI

&& gb_id) {

10479  if

(! id_set.empty()) {

10481  switch

((*id).Which()) {

10484

db_gb_id->

Assign

(*(id->GetSeqId()));

10487

db_gi = (*id).GetGi();

10491

db_general_id->

Assign

(*((*id).GetSeqId()));

10502  "New gi number ("

+ gi_str +

")"

+

10506  if

(gb_id && db_gb_id) {

10507  if

(! gb_id->

Match

(*db_gb_id)) {

10510  ") does not match one in NCBI sequence repository ("

+ db_gb_id->

AsFastaString

() +

10511  ") on gi ("

+ gi_str +

")"

, seq);

10513

}

else if

(gb_id) {

10515  "Gain of accession ("

+ gb_id->

AsFastaString

() +

") on gi ("

+

10516

gi_str +

") compared to the NCBI sequence repository"

, seq);

10517

}

else if

(db_gb_id) {

10520  ") on gi ("

+ gi_str +

") compared to the NCBI sequence repository"

, seq);

10523  string

new_gen_label, old_gen_label;

10524  if

(general_id && db_general_id) {

10527

general_id->

GetLabel

(&new_gen_label);

10529  "New general ID ("

+ new_gen_label +

10530  ") does not match one in NCBI sequence repository ("

+ old_gen_label +

10531  ") on gi ("

+ gi_str +

")"

, seq);

10533

}

else if

(general_id) {

10534

general_id->

GetLabel

(&new_gen_label);

10536  "Gain of general ID ("

+ new_gen_label +

") on gi ("

+

10537

gi_str +

") compared to the NCBI sequence repository"

, seq);

10538

}

else if

(db_general_id) {

10541  "Loss of general ID ("

+ old_gen_label +

") on gi ("

+

10542

gi_str +

") compared to the NCBI sequence repository"

, seq);

10588  ITERATE

(

string

, res,

data

.GetIupacna().Get() ) {

10589  if

(*res ==

'N'

) {

10606  ITERATE

(

string

, res,

data

.GetIupacaa().Get() ) {

10607  if

(*res ==

'N'

) {

10632  if

((*iter)->IsLoc()) {

10698  ENa_strand

strand =

f

->GetLocation().GetStrand();

10699  if

(

f

->GetData().IsCdregion()) {

10701

cds_minus =

f

->GetSeq_feat();

10703

cds_plus =

f

->GetSeq_feat();

10707

utr3_minus =

f

->GetSeq_feat();

10709

utr3_plus =

f

->GetSeq_feat();

10710  if

(! cds_plus && utr5_plus &&

x_ReportUTRPair

(*utr5_plus, *utr3_plus)) {

10712  "CDS not between 5'UTR and 3'UTR on plus strand"

, *utr3_plus);

10714

utr5_plus.

Reset

();

10715

cds_plus.

Reset

();

10716

utr3_plus.

Reset

();

10720

utr5_minus =

f

->GetSeq_feat();

10721  if

(! cds_minus && utr3_minus &&

x_ReportUTRPair

(*utr5_minus, *utr3_minus)) {

10723  "CDS not between 5'UTR and 3'UTR on minus strand"

, *utr5_minus);

10725

utr5_minus.

Reset

();

10726

cds_minus.

Reset

();

10727

utr3_minus.

Reset

();

10729

utr5_plus =

f

->GetSeq_feat();

10737

CValidError_bioseq::CmRNACDSIndex::CmRNACDSIndex()

10742

CValidError_bioseq::CmRNACDSIndex::~CmRNACDSIndex()

10754  bool match

=

false

;

10819

&& mrna.

GetExt

().

GetData

().front()->GetData().IsStr()) {

10824  if

(

id

.GetGi() == gi) {

10831

}

catch

(

const

std::exception&) {

static CRef< CScope > m_Scope

@ eExtreme_Positional

numerical value

@ eExtreme_Biological

5' and 3'

@ eErr_SEQ_INST_HTGS_STS_GSS_WGSshouldNotBeRNA

@ eErr_SEQ_INST_BadDeltaSeq

@ eErr_SEQ_DESCR_InconsistentBioSources_ConLocation

@ eErr_SEQ_FEAT_mRNAgeneRange

@ eErr_SEQ_DESCR_FinishedStatusForWGS

@ eErr_SEQ_DESCR_InconsistentTaxName

@ eErr_GENERIC_MissingPubRequirement

@ eErr_SEQ_FEAT_TRNAinsideTMRNA

@ eErr_SEQ_INST_CompleteGenomeHasGaps

@ eErr_SEQ_INST_BadSeqIdCharacter

@ eErr_SEQ_INST_CompleteTitleProblem

@ eErr_SEQ_INST_HistoryGiCollision

@ eErr_SEQ_DESCR_UnwantedCompleteFlag

@ eErr_SEQ_INST_mRNAshouldBeSingleStranded

@ eErr_SEQ_FEAT_MultipleGenCodes

@ eErr_SEQ_DESCR_DBLinkBadAssembly

@ eErr_SEQ_DESCR_WGSmasterLacksBioProject

@ eErr_SEQ_INST_HighNContentStretch

@ eErr_SEQ_INST_HighNcontent3Prime

@ eErr_SEQ_INST_TerminalGap

@ eErr_SEQ_INST_MultipleAccessions

@ eErr_SEQ_DESCR_MultipleDBLinkObjects

@ eErr_SEQ_INST_BadProteinStart

@ eErr_SEQ_FEAT_PartialProblem3Prime

@ eErr_SEQ_FEAT_ProductShouldBeWhole

@ eErr_SEQ_INST_ProteinShouldNotHaveGaps

@ eErr_SEQ_INST_ESTshouldBemRNA

@ eErr_SEQ_DESCR_BadKeywordUnverified

@ eErr_SEQ_FEAT_ITSdoesNotAbutRRNA

@ eErr_SEQ_DESCR_InvalidMolInfo

@ eErr_SEQ_DESCR_InconsistentMolInfoTechnique

@ eErr_SEQ_DESCR_NoOrganismInTitle

@ eErr_SEQ_DESCR_InconsistentMolInfo

@ eErr_SEQ_INST_TSAMasterLacksStrucComm

@ eErr_SEQ_INST_WholeComponent

@ eErr_SEQ_FEAT_BadRRNAcomponentOrder

@ eErr_SEQ_INST_ReprInvalid

@ eErr_SEQ_INST_TSAseqGapProblem

@ eErr_SEQ_INST_HTGS_STS_GSS_WGSshouldBeGenomic

@ eErr_SEQ_INST_SeqLitDataLength0

@ eErr_SEQ_INST_CircBactGenomeProblem

@ eErr_SEQ_INST_WGSMasterLacksStrucComm

@ eErr_SEQ_INST_ContigsTooShort

@ eErr_SEQ_DESCR_NoMolInfoFound

@ eErr_SEQ_PKG_OrphanedProtein

@ eErr_SEQ_INST_SeqGapBadLinkage

@ eErr_SEQ_INST_SelfReferentialSequence

@ eErr_SEQ_DESCR_TransgenicProblem

@ eErr_SEQ_INST_DeltaComponentIsGi0

@ eErr_SEQ_FEAT_CDSmRNANotMatched

@ eErr_SEQ_FEAT_FeatContentDup

@ eErr_SEQ_INST_MolNotSet

@ eErr_SEQ_DESCR_WGSMasterLacksBothBioSampleBioProject

@ eErr_SEQ_INST_GiWithoutAccession

@ eErr_SEQ_INST_MissingGaps

@ eErr_SEQ_DESCR_InvalidForType

@ eErr_SEQ_FEAT_BadRRNAcomponentOverlapRRNA

@ eErr_SEQ_DESCR_FastaBracketTitle

@ eErr_SEQ_FEAT_MisMatchAA

@ eErr_SEQ_INST_StopInProtein

@ eErr_SEQ_INST_UnknownLengthGapNot100

@ eErr_SEQ_FEAT_MultipleProtRefs

@ eErr_SEQ_FEAT_MultipleEquivPublications

@ eErr_SEQ_DESCR_DBLinkProblem

@ eErr_SEQ_INST_InvalidLen

@ eErr_SEQ_DESCR_TPAassemblyWithoutTPAKeyword

@ eErr_SEQ_DESCR_InvalidForTypeGIBB

@ eErr_SEQ_FEAT_InvalidFeatureForProtein

@ eErr_SEQ_INST_HighNContentPercent

@ eErr_SEQ_DESCR_RefGeneTrackingOnNonRefSeq

@ eErr_SEQ_FEAT_IdenticalGeneSymbolAndSynonym

@ eErr_SEQ_FEAT_MultipleEquivBioSources

@ eErr_SEQ_INST_HighNcontent5Prime

@ eErr_SEQ_INST_TSAshouldBNotBeDNA

@ eErr_SEQ_DESCR_MissingChromosome

@ eErr_SEQ_INST_BadProteinMoltype

@ eErr_SEQ_DESCR_NucleotideTechniqueOnProtein

@ eErr_SEQ_INST_CompleteCircleProblem

@ eErr_SEQ_FEAT_CDSwithMultipleMRNAs

@ eErr_SEQ_FEAT_CDSmRNAMismatchProteinIDs

@ eErr_SEQ_FEAT_CDSmRNAMismatchTranscriptIDs

@ eErr_SEQ_FEAT_PartialProblemOrganelle3Prime

@ eErr_SEQ_INST_OverlappingDeltaRange

@ eErr_SEQ_FEAT_OverlappingPeptideFeat

@ eErr_SEQ_DESCR_BadKeywordNoTechnique

@ eErr_SEQ_FEAT_ExtraProteinFeature

@ eErr_SEQ_INST_SeqLocLength

@ eErr_SEQ_INST_FarLocationExcludesFeatures

@ eErr_SEQ_DESCR_InconsistentVirusMoltype

@ eErr_SEQ_INST_IdOnMultipleBioseqs

@ eErr_SEQ_DESCR_MoltypeOtherGenetic

@ eErr_SEQ_INST_HighNpercent3Prime

@ eErr_SEQ_INST_BadSecondaryAccn

@ eErr_SEQ_INST_InvalidAlphabet

@ eErr_SEQ_FEAT_CDSonMinusStrandMRNA

@ eErr_SEQ_INST_MolNuclAcid

@ eErr_SEQ_DESCR_MoltypeOther

@ eErr_SEQ_DESCR_Inconsistent

@ eErr_SEQ_INST_ExtNotAllowed

@ eErr_SEQ_DESCR_InconsistentRefSeqMoltype

@ eErr_SEQ_FEAT_PartialProblem5Prime

@ eErr_SEQ_FEAT_CDSmRNAMismatchLocation

@ eErr_SEQ_INST_TrailingX

@ eErr_SEQ_DESCR_InconsistentDates

@ eErr_SEQ_INST_CircularProtein

@ eErr_SEQ_INST_NoIdOnBioseq

@ eErr_SEQ_INST_PartsOutOfOrder

@ eErr_SEQ_FEAT_BadFullLengthFeature

@ eErr_SEQ_DESCR_InconsistentGenBankblocks

@ eErr_SEQ_FEAT_FarLocation

@ eErr_SEQ_INST_MolinfoOther

@ eErr_SEQ_INST_BadSeqIdLength

@ eErr_SEQ_INST_SeqDataNotAllowed

@ eErr_SEQ_INST_BadHTGSeq

@ eErr_SEQ_FEAT_PartialProblemOrganelle5Prime

@ eErr_SEQ_DESCR_NoKeywordHasTechnique

@ eErr_SEQ_INST_UnexpectedIdentifierChange

@ eErr_SEQ_INST_WGSseqGapProblem

@ eErr_SEQ_DESCR_MultipleStrucComms

@ eErr_SEQ_FEAT_InconsistentRRNAstrands

@ eErr_SEQ_FEAT_PartialProblemNotSpliceConsensus5Prime

@ eErr_SEQ_FEAT_BadRRNAcomponentOverlapAndOrder

@ eErr_SEQ_DESCR_DBLinkBadFormat

@ eErr_SEQ_FEAT_InvalidForType

@ eErr_SEQ_FEAT_GeneLocusCollidesWithLocusTag

@ eErr_SEQ_FEAT_CDSgeneRange

@ eErr_SEQ_INST_MitoMetazoanTooLong

@ eErr_SEQ_DESCR_CompleteGenomeLacksBioProject

@ eErr_SEQ_DESCR_CollidingPubMedID

@ eErr_SEQ_FEAT_DuplicateFeat

@ eErr_SEQ_INST_ExtBadOrMissing

@ eErr_SEQ_FEAT_FeatureProductInconsistency

@ eErr_SEQ_DESCR_SyntheticConstructWrongMolType

@ eErr_SEQ_FEAT_DuplicateGeneConflictingLocusTag

@ eErr_SEQ_DESCR_MolInfoConflictsWithBioSource

@ eErr_SEQ_INST_InstantiatedGapMismatch

@ eErr_SEQ_FEAT_UTRdoesNotAbutCDS

@ eErr_SEQ_INST_PartialInconsistent

@ eErr_SEQ_FEAT_CollidingLocusTags

@ eErr_SEQ_DESCR_MultipleNames

@ eErr_SEQ_FEAT_PartialProblemNotSpliceConsensus3Prime

@ eErr_SEQ_INST_BadSeqIdFormat

@ eErr_SEQ_FEAT_NoCDSbetweenUTRs

@ eErr_SEQ_INST_ZeroGiNumber

@ eErr_INTERNAL_Exception

@ eErr_SEQ_INST_ConflictingIdsOnBioseq

@ eErr_SEQ_DESCR_WrongOrganismFor16SrRNA

@ eErr_SEQ_INST_HistAssemblyMissing

@ eErr_SEQ_PKG_NoCdRegionPtr

@ eErr_SEQ_INST_InternalNsInSeqRaw

@ eErr_SEQ_INST_TerminalNs

@ eErr_SEQ_FEAT_SeqFeatXrefProblem

@ eErr_SEQ_DESCR_BadKeywordForStrucComm

@ eErr_SEQ_FEAT_CDSdoesNotMatchVDJC

@ eErr_SEQ_DESCR_InconsistentMolType

@ eErr_SEQ_FEAT_CDSmRNAMissingProteinIDs

@ eErr_SEQ_DESCR_WGSmasterLacksBioSample

@ eErr_SEQ_FEAT_MultiIntervalIntron

@ eErr_SEQ_DESCR_InconsistentTPA

@ eErr_SEQ_FEAT_LocusTagProblem

@ eErr_SEQ_INST_HighNpercent5Prime

@ eErr_SEQ_DESCR_ScaffoldLacksBioProject

@ eErr_SEQ_INST_InternalNsAdjacentToGap

@ eErr_SEQ_FEAT_PartialProblem

@ eErr_SEQ_DESCR_MultipleComments

@ eErr_SEQ_INST_SeqDataNotFound

@ eErr_SEQ_INST_InternalGapsInSeqRaw

@ eErr_SEQ_FEAT_MultipleGeneOverlap

@ eErr_SEQ_INST_DuplicateSegmentReferences

@ eErr_SEQ_DESCR_InconsistentWGSFlags

@ eErr_SEQ_FEAT_CDSmRNAmismatchCount

@ eErr_SEQ_FEAT_UTRdoesNotExtendToEnd

@ eErr_SEQ_INST_SeqLitGapLength0

@ eErr_SEQ_INST_SeqIdNameHasSpace

@ eErr_SEQ_DESCR_ProteinTechniqueOnNucleotide

@ eErr_SEQ_DESCR_CollidingPublications

@ eErr_SEQ_FEAT_PartialProblemmRNASequence3Prime

@ eErr_SEQ_INST_InternalNsInSeqLit

@ eErr_SEQ_INST_SeqDataLenWrong

@ eErr_SEQ_INST_GapInProtein

@ eErr_SEQ_INST_SeqGapProblem

@ eErr_SEQ_INST_InvalidResidue

@ eErr_SEQ_FEAT_PartialProblemmRNASequence5Prime

@ eErr_SEQ_FEAT_InvalidFeatureForMRNA

@ eErr_SEQ_FEAT_CDSwithNoMRNA

@ eErr_GENERIC_DeltaSeqError

@ eErr_SEQ_DESCR_UnculturedGenome

ncbi::TMaskedQueryRegions mask

const string & GetLineage(void) const

const string & GetTaxname(void) const

bool IsSetLineage(void) const

bool IsSetTaxname(void) const

size_t IterateFeatures(Fnc m)

CSeq_entry * GetParentEntry(void) const

CConstRef< CSeqdesc > GetClosestDescriptor(CSeqdesc::E_Choice choice, int *level=NULL) const

TSeqPos GetLength(void) const

void GetLabel(string *label, ELabelType type, bool worst=false) const

bool AssignMatch(TmRNAList &mrna_map, CFeatTree &feat_tree, CScope &scope)

bool Overlaps(const CSeq_feat &mrna) const

sequence::EOverlapType m_OverlapType

const CSeq_feat & GetSeqfeat() const

bool AssignXrefMatch(TmRNAList &unmatched_mrnas, const CTSE_Handle &tse)

CConstRef< CSeq_feat > m_Cds

bool AssignOverlapMatch(TmRNAList &unmatched_mrnas, CScope &scope)

CCdsMatchInfo(const CSeq_feat &cds, CScope *scope)

bool AreMrnaProductsUnique()

CRef< CMrnaMatchInfo > m_BestMatch

const CMrnaMatchInfo & GetMatch() const

void SetMatch(CRef< CMrnaMatchInfo > match)

list< CConstRef< CSeq_feat > > m_OtherMrnas

void UpdateOtherMrnas(const TmRNAList &unmatched_mrnas)

ECompare Compare(const CDate &date) const

void GetDate(string *label, bool year_only=false) const

Append a standardized string representation of the date to the label.

@ eCompare_before

*this comes first.

@ eCompare_same

They're equivalent.

void GetLabel(string *label) const

bool Match(const CDbtag &dbt2) const

int Compare(const CDbtag &dbt2) const

CSeqFeatData::ESubtype GetSubtype(void) const

CRef< CFeatureIndex > GetBestParent(void)

CRef< CFeatureIndex > GetBestGene(void)

CSeq_feat_Handle GetSeqFeatHandle(void) const

const CMappedFeat GetMappedFeat(void) const

CConstRef< CSeq_loc > GetMappedLocation(void) const

CRef< feature::CFeatTree > GetFeatTreeFromCache(const CSeq_loc &loc, CScope &scope)

static bool IsPseudo(const CSeq_feat &feat)

CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)

void GetLabel(string *label) const

bool IsSuppressed(void) const

@Imp_feat.hpp User-defined methods of the data storage class.

CConstRef< CSeq_feat > m_Mrna

bool Overlaps(const CSeq_feat &cds) const

CMrnaMatchInfo(const CSeq_feat &mrna, CScope *scope)

void SetPseudo(bool val=true)

const CSeq_feat & GetSeqfeat() const

bool OkWithoutCds(bool isGenbank=false) const

Exceptions for objmgr/util library.

@OrgMod.hpp User-defined methods of the data storage class.

const string & GetLineage(void) const

bool IsSetLineage(void) const

@Pubdesc.hpp User-defined methods of the data storage class.

@RNA_ref.hpp User-defined methods of the data storage class.

T & Get(void)

Create the variable if not created yet, return the reference.

CRef< CBioseqIndex > GetBioseqIndex(void)

ESubtype GetSubtype(void) const

@ eSubtype_transit_peptide_aa

@ eSubtype_sig_peptide_aa

@ eSubtype_mat_peptide_aa

CSeq_entry * GetParentEntry(void) const

namespace ncbi::objects::

const CGene_ref * GetGeneXref(void) const

See related function in util/feature.hpp.

static bool IsAa(EMol mol)

static string GetMoleculeClass(EMol mol)

static bool IsNa(EMol mol)

Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.

static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)

static void Validate(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx=0, TSeqPos uLength=0)

Base class for all serializable objects.

static bool NeedsNoText(const TSubtype &subtype)

CBioseq_Handle GetBioseqHandle(const CSeq_id &id) const

Get Bioseq handle from this TSE.

TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const

bool GetInheritedPropertyDefines(const string &prop_name, TInfoList &results_out, TTaxId subtree_root=TAX_ID_CONST(1))

bool GetScientificName(TTaxId tax_id, string &name_out)

list< CRef< CTaxon1_info > > TInfoList

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

Template class for iteration on objects of class C (non-medifiable version)

bool IsRefGeneTracking() const

bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const

Verify that a named field exists.

const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const

Access a named field in this user object.

bool IsStructuredComment() const

EObjectType GetObjectType() const

void ValidateSeqAnnot(const CSeq_annot_Handle &annot)

void ValidateSeqAnnotContext(const CSeq_annot &annot, const CBioseq &seq)

static CSeq_entry_Handle GetAppropriateXrefParent(CSeq_entry_Handle seh)

void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)

static bool IsPdb(const CBioseq &seq)

void ValidateUpdateDateContext(const CDate &update, const CDate &create, const CBioseq &seq, const CSeqdesc &desc)

void ValidateOrgContext(const COrg_ref &this_org, const COrg_ref &org, const CBioseq &seq, const CSeqdesc &desc)

CBioseq_Handle m_CurrentHandle

void ValidateInst(const CBioseq &seq)

static bool IsRefSeq(const CBioseq &seq)

void x_ValidateMultiplePubs(const CBioseq_Handle &bsh)

bool IsHistAssemblyMissing(const CBioseq &seq)

void ReportBadAssemblyGap(const CBioseq &seq)

static bool IsSelfReferential(const CBioseq &seq)

EDiagSev x_DupFeatSeverity(const CSeq_feat &curr, const CSeq_feat &prev, bool viral, bool htgs, bool same_annot, bool same_label)

CRef< CSeq_loc > GetLocFromSeq(const CBioseq &seq)

void x_ValidateBarcode(const CBioseq &seq)

void x_CompareStrings(const TStrFeatMap &str_feat_map, const string &type)

void x_CheckGeneralIDs(const CBioseq &seq)

void x_TranscriptIDsMatch(const string &protein_id, const CSeq_feat &cds)

static bool IsTSAAccession(const CSeq_id &id)

static bool IsEmblOrDdbj(const CBioseq &seq)

void x_CheckMrnaProteinLink(const CCdsMatchInfo &cds_match)

bool x_IsRangeGap(const CBioseq_Handle &seq, int start, int stop)

void ValidateBioseq(const CBioseq &seq)

void ValidateWGSMaster(CBioseq_Handle bsh)

CValidError_descr m_DescrValidator

void ValidateDeltaLoc(const CSeq_loc &loc, const CBioseq &seq, TSeqPos &len)

bool x_IsSameAsCDS(const CMappedFeat &feat)

void x_ValidateMolInfoForBioSource(const CBioSource &src, const CMolInfo &minfo, const CSeqdesc &desc)

void x_CheckForMultiplemRNAs(CCdsMatchInfo &cds_match, const TmRNAList &unmatched_mrnas)

void ValidateHistory(const CBioseq &seq)

void x_ValidateCompletness(const CBioseq &seq, const CMolInfo &mi)

bool SuppressTrailingXMsg(const CBioseq &seq)

void ValidateMolInfoContext(const CMolInfo &minfo, int &seq_biomol, int &tech, int &completeness, const CBioseq &seq, const CSeqdesc &desc)

bool x_HasCitSub(CBioseq_Handle bsh) const

static bool x_HasGap(const CBioseq &seq)

void ValidateSeqParts(const CBioseq &seq)

void x_ReportOverlappingPeptidePair(CSeq_feat_Handle f1, CSeq_feat_Handle f2, const CBioseq &bioseq, bool &reported_last_peptide)

void ValidateSegRef(const CBioseq &seq)

void x_CheckSingleStrandedRNAViruses(const CBioSource &source, const string &lineage, const string &stranded_mol, const CMolInfo::TBiomol biomol, const CBioseq_Handle &bsh, const CSerialObject &obj, const CSeq_entry *ctx)

void ValidateSecondaryAccConflict(const string &primary_acc, const CBioseq &seq, int choice)

static bool IsWGSMaster(const CBioseq &seq, CScope &scope)

void x_ValidateTitle(const CBioseq &seq)

void ValidateMultipleGeneOverlap(const CBioseq_Handle &bsh)

void ValidateSeqFeatContext(const CBioseq &seq, bool is_complete)

void ValidateDelta(const CBioseq &seq)

static bool HasBadWGSGap(const CBioseq &seq)

static bool x_HasPGAPStructuredComment(CBioseq_Handle bsh)

bool m_report_missing_chromosome

CValidError_annot m_AnnotValidator

void ValidateTwintrons(const CBioseq &seq)

unsigned int x_IdXrefsNotReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)

void x_ValidateGeneCDSmRNACounts()

void x_ReportStartStopPartialProblem(int partial_type, bool at_splice_or_gap, bool abuts_n, const CSeq_feat &feat)

static size_t x_BadMetazoanMitochondrialLength(const CBioSource &src, const CSeq_inst &inst)

void ReportBadTSAGap(const CBioseq &seq)

void ValidateSeqGap(const CSeq_gap &gap, const CBioseq &seq)

void ValidateBadGeneOverlap(const CSeq_feat &feat)

bool x_IsPartialAtSpliceSiteOrGap(const CSeq_loc &loc, unsigned int tag, bool &bad_seq, bool &is_gap, bool &abuts_n)

void x_SetupCommonFlags(CBioseq_Handle bsh)

bool m_splicing_not_expected

bool x_IsDeltaLitOnly(const CSeq_inst &inst) const

void ValidateNsAndGaps(const CBioseq &seq)

void ValidateCompleteGenome(const CBioseq &seq)

bool x_IsMicroRNA() const

CValidError_bioseq(CValidError_imp &imp)

void ValidateRawConst(const CBioseq &seq)

void ValidateBioseqContext(const CBioseq &seq)

bool CdError(const CBioseq_Handle &bsh)

void x_ReportLineageConflictWithMol(const string &lineage, const string &stranded_mol, const CMolInfo::TBiomol biomol, CSeq_inst::EMol mol, const CSerialObject &obj, const CSeq_entry *ctx)

bool ValidateRepr(const CSeq_inst &inst, const CBioseq &seq)

void ValidateFeatPartialInContext(const CMappedFeat &feat, bool is_complete)

void ValidateGBBlock(const CGB_block &gbblock, const CBioseq &seq, const CSeqdesc &desc)

bool IsMrna(const CBioseq_Handle &bsh)

void ReportBadWGSGap(const CBioseq &seq)

bool x_SuppressDicistronic(const CSeq_feat_Handle &f1, const CSeq_feat_Handle &f2, bool fruit_fly)

static bool IsWGSAccession(const CSeq_id &id)

void ValidateSeqLen(const CBioseq &seq)

bool x_PartialAdjacentToIntron(const CSeq_loc &loc)

void x_CheckOrigProteinAndTranscriptIds(const CCdsMatchInfo &cds_match)

size_t GetDataLen(const CSeq_inst &inst)

void CheckForPubOnBioseq(const CBioseq &seq)

void x_CalculateNsStretchAndTotal(const CSeqVector &seqvec, TSeqPos &num_ns, TSeqPos &max_stretch, bool &n5, bool &n3)

void CheckForMolinfoOnBioseq(const CBioseq &seq)

static bool IsAllNs(const CSeqVector &vec)

static string s_GetStrandedMolStringFromLineage(const string &lineage)

bool GetTSAConflictingBiomolTechErrors(const CBioseq &seq)

bool GraphsOnBioseq() const

void CheckTpaHistory(const CBioseq &seq)

static bool IsPartial(const CBioseq &seq, CScope &scope)

const CCacheImpl::TFeatValue * m_AllFeatIt

void x_ValidateCDSmRNAmatch(const CBioseq_Handle &seq)

void ReportModifInconsistentError(int new_mod, int &old_mod, const CSeqdesc &desc, const CSeq_entry &ctx)

static bool x_IgnoreEndGap(CBioseq_Handle bsh, CSeq_gap::TType gap_type)

static bool x_ParentAndComponentLocationsDiffer(CBioseq_Handle bsh, CBioSource::TGenome parent_location)

void x_ValidateCDSVDJCmatch(const CBioseq_Handle &seq)

bool x_ShowBioProjectWarning(const CBioseq &seq)

void CheckForMultipleStructuredComments(const CBioseq &seq)

void ValidateCollidingGenes(const CBioseq &seq)

bool x_IdXrefsAreReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)

static bool IsGenbank(const CBioseq &seq)

void x_ReportDuplicatePubLabels(const CBioseq &seq, const vector< CTempString > &labels)

void ValidateSeqIds(const CBioseq &seq)

void x_ReportInternalPartial(const CSeq_feat &feat)

void ValidateModifDescriptors(const CBioseq &seq)

void x_ReportSuspiciousUseOfComplete(const CBioseq &seq, EDiagSev sev)

CValidError_feat m_FeatValidator

static int PctNs(CBioseq_Handle bsh)

void ReportBadGenomeGap(const CBioseq &seq)

static bool IsWp(CBioseq_Handle bsh)

void ValidateDupOrOverlapFeats(const CBioseq &seq)

bool x_MatchesOverlappingFeaturePartial(const CMappedFeat &feat, unsigned int partial_type)

void CheckForMissingChromosome(CBioseq_Handle bsh)

bool IsIdIn(const CSeq_id &id, const CBioseq &seq)

void ValidateMoltypeDescriptors(const CBioseq &seq)

size_t NumOfIntervals(const CSeq_loc &loc)

void x_ReportImproperPartial(const CSeq_feat &feat)

bool IsFlybaseDbxrefs(const TDbtags &dbxrefs)

void CheckSourceDescriptor(const CBioseq_Handle &bsh)

void x_ReportGeneOverlapError(const CSeq_feat &feat, const string &gene_label)

void x_CheckForMultipleComments(CBioseq_Handle bsh)

void ValidateIDSetAgainstDb(const CBioseq &seq)

static bool IsMaster(const CBioseq &seq)

bool x_IsActiveFin() const

bool x_ReportUTRPair(const CSeq_feat &utr5, const CSeq_feat &utr3)

void x_ValidateAbuttingRNA(const CBioseq_Handle &seq)

void x_ValidateSourceFeatures(const CBioseq_Handle &bsh)

void ValidateSeqId(const CSeq_id &id, const CBioseq &ctx, bool longer_general=false)

~CValidError_bioseq() override

void x_ValidateAbuttingUTR(const CBioseq_Handle &seq)

bool x_ReportDupOverlapFeaturePair(const CSeq_feat_Handle &f1, const CSeq_feat_Handle &f2, bool fruit_fly, bool viral, bool htgs)

void x_ValidateCDSagainstVDJC(const CBioseq_Handle &seq)

static bool IsWGS(const CBioseq &seq)

size_t x_CountAdjacentNs(const CSeq_literal &lit)

void ValidateSeqDescContext(const CBioseq &seq)

void x_ValidateOverlappingRNAFeatures(const CBioseq_Handle &bsh)

bool GetTSANStretchErrors(const CBioseq &seq)

const CCacheImpl::TFeatValue * m_GeneIt

void GapByGapInst(const CBioseq &seq)

void x_ValidatePubFeatures(const CBioseq_Handle &bsh)

void ValidateSeqDescr(const CSeq_descr &descr, const CSeq_entry &ctx)

bool ValidateStructuredComment(const CSeqdesc &desc, bool report)

void ValidateSeqFeatContext(const CSeq_feat &feat, const CBioseq &seq)

void SetScope(CScope &scope)

void SetTSE(CSeq_entry_Handle seh)

void ValidateGraphsOnBioseq(const CBioseq &seq)

const CSeq_entry_Handle & GetTSEH()

void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)

bool IsSyntheticConstruct(const CBioSource &src)

bool HasGiOrAccnVer() const

const SValidatorContext & GetContext() const

void AddBioseqWithNoBiosource(const CBioseq &seq)

CConstRef< CSeq_feat > GetCachedGene(const CSeq_feat *f)

bool IsValidateIdSet() const

void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)

static bool IsWGSIntermediate(const CBioseq &seq)

bool IsNoCitSubPubs() const

CConstRef< CSeq_feat > GetCDSGivenProduct(const CBioseq &seq)

CBioseq_Handle GetLocalBioseqHandle(const CSeq_id &id)

bool IsSeqSubmitParent() const

bool x_IsFarFetchFailure(const CSeq_loc &loc)

void AddBioseqWithNoPub(const CBioseq &seq)

bool IsGenomeSubmission() const

void AddProtWithoutFullRef(const CBioseq_Handle &seq)

bool IsArtificial(const CBioSource &src)

void ValidateBioSourceForSeq(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx, const CBioseq_Handle &bsh)

void IncrementTpaWithHistoryCount()

bool IsNoBioSource() const

bool IsLocalGeneralOnly() const

void SetFarFetchFailure()

void IncrementTpaWithoutHistoryCount()

bool IsRefSeqConventions() const

bool IsIndexerVersion() const

CGeneCache & GetGeneCache()

bool IsSmallGenomeSet() const

void ValidateSeqLoc(const CSeq_loc &loc, const CBioseq_Handle &seq, bool report_abutting, const string &prefix, const CSerialObject &obj, bool lowerSev=false)

bool DoCompareVDJCtoCDS() const

bool ShouldSubdivide() const

bool IsTransgenic(const CBioSource &bsrc)

vector< string > m_unpublished_labels

vector< string > m_published_labels

const TFeatValue & GetFeatFromCache(const SFeatKey &featKey)

AutoPtr< TFeatValue > GetFeatFromCacheMulti(const vector< SFeatKey > &featKeys)

const CPubdescInfo & GetPubdescToInfo(CConstRef< CPubdesc > pub)

static const CSeqFeatData::ESubtype kAnyFeatSubtype

static const CSeqFeatData::E_Choice kAnyFeatType

std::vector< CMappedFeat > TFeatValue

@ fLabel_Unique

Append a unique tag [V1].

container_type::const_iterator const_iterator

container_type::iterator iterator

const_iterator begin() const

const_iterator end() const

const_iterator find(const key_type &key) const

const_iterator find(const key_type &key) const

const_iterator end() const

iterator insert(const value_type &val)

container_type::iterator iterator

container_type::value_type value_type

iterator_bool insert(const value_type &val)

const_iterator find(const key_type &key) const

const_iterator end() const

Include a standard set of the NCBI C++ Toolkit most basic headers.

The NCBI C++ standard methods for dealing with std::string.

static const char si[8][64]

bool AllowOrphanedProtein(const CBioseq &seq, bool force_refseq=false)

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)

static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

static const char * str(char *buf, int n)

#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)

FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.

forward_list< Gene > TGeneList

SStrictId_Entrez::TId TEntrezId

TEntrezId type for entrez ids which require the same strictness as TGi.

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

#define TAX_ID_FROM(T, value)

void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)

#define ERR_POST_X(err_subcode, message)

Error posting with default error code and given error subcode.

#define ERR_POST(message)

Error posting with file, line number information but without error codes.

EDiagSev

Severity level for the posted diagnostics.

#define LOG_POST_XX(error_name, err_subcode, message)

@ eDiag_Info

Informational message.

@ eDiag_Error

Error message.

@ eDiag_Warning

Warning message.

@ eDiag_Fatal

Fatal error – guarantees exit(or abort)

@ eDiag_Critical

Critical error message.

void Critical(CExceptionArgs_Base &args)

void Error(CExceptionArgs_Base &args)

const string & GetMsg(void) const

Get message string.

virtual const char * what(void) const noexcept

Standard report (includes full backlog).

const string & FindName(TEnumValueType value, bool allowBadValue) const

Find name of the enum by its numeric value.

const TPrim & Get(void) const

#define ENUM_METHOD_NAME(EnumName)

const string AsFastaString(void) const

static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)

Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Optimized implementation of CSerialObject::Assign, which is not so efficient.

static const size_t kMaxLocalIDLength

ID length restrictions.

void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const

Append a label for this Seq-id to the supplied string.

CConstRef< CSeq_id > GetSeqId(void) const

EAccessionInfo

For IdentifyAccession (below)

int CompareOrdered(const CSeq_id &sid2) const

bool Match(const CSeq_id &sid2) const

Match() - TRUE if SeqIds are equivalent.

static bool IsValidLocalID(const CTempString &s)

Perform rudimentary validation on potential local IDs, whose contents should be pure ASCII and limite...

static const size_t kMaxGeneralTagLength

CSeq_id::E_Choice Which(void) const

string GetLabel(const CSeq_id &id)

static const size_t kMaxGeneralDBLength

@ e_YES

SeqIds compared, but are different.

@ eContent

Untagged human-readable accession or the like.

bool IsPartialStart(ESeqLocExtremes ext) const

check start or stop of location for e_Lim fuzz

ENa_strand GetStrand(void) const

Get the location's strand.

TRange GetTotalRange(void) const

TSeqPos GetStart(ESeqLocExtremes ext) const

Return start and stop positions of the seq-loc.

CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const

Get seq-loc for the current iterator position.

void Add(const CSeq_loc &other)

Simple adding of seq-locs.

bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const

Check if strand is set for any/all part(s) of the seq-loc depending on the flag.

const CSeq_loc & GetEmbeddingSeq_loc(void) const

Get the nearest seq-loc containing the current range.

const CSeq_id * GetId(void) const

Get the id of the location return NULL if has multiple ids or no id at all.

TRange GetRange(void) const

Get the range.

ENa_strand GetStrand(void) const

void GetLabel(string *label) const

Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...

bool IsPartialStop(ESeqLocExtremes ext) const

TSeqPos GetStop(ESeqLocExtremes ext) const

CConstBeginInfo ConstBegin(const C &obj)

Get starting point of non-modifiable object hierarchy.

CMappedFeat GetParent(const CMappedFeat &feat)

Return nearest parent of a feature.

CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)

@ fFGL_Content

Include its content if there is any.

const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)

If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...

TSeqPos GetLength(const CSeq_id &id, CScope *scope)

Get sequence length if scope not null, else return max possible TSeqPos.

Int8 TestForOverlapEx(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, CScope *scope=0, TOverlapFlags flags=fOverlap_Default)

Updated version of TestForOverlap64().

int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)

sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)

Returns the sequence::ECompare containment relationship between CSeq_locs.

bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)

Returns true if all embedded CSeq_ids represent the same CBioseq, else false.

bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)

Determines if two CSeq_ids represent the same CBioseq.

@ eSeqlocPartial_Nointernal

@ eSeqlocPartial_Complete

@ eSeqlocPartial_Limwrong

@ fCompareOverlapping

Check if seq-locs are overlapping.

@ eOverlap_SubsetRev

1st is a subset of 2nd ranges

@ eOverlap_CheckIntervals

2nd is a subset of 1st with matching boundaries

@ eOverlap_Contains

2nd contains 1st extremes

@ eOverlap_CheckIntRev

1st is a subset of 2nd with matching boundaries

@ eOverlap_Simple

any overlap of extremes

@ eOverlap_Contained

2nd contained within 1st extremes

@ eOverlap_Subset

2nd is a subset of 1st ranges

@ eSame

CSeq_locs contain each other.

@ eContained

First CSeq_loc contained by second.

@ eNoOverlap

CSeq_locs do not overlap or abut.

const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)

Get the encoding CDS feature of a given protein sequence.

bool IsPseudo(const CSeq_feat &feat, CScope &scope)

Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...

CConstRef< CSeq_feat > GetOverlappingOperon(const CSeq_loc &loc, CScope &scope)

const CSeq_feat * GetPROTForProduct(const CBioseq &product, CScope *scope)

Get the mature peptide feature of a protein.

vector< TFeatScore > TFeatScores

void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)

Find all features overlapping the location.

static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)

Translate a string using a specified genetic code.

static CRef< CObjectManager > GetInstance(void)

Return the existing object manager or create one.

CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)

Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...

CBioseq_Handle GetBioseqHandle(const CSeq_id &id)

Get bioseq handle by seq-id.

void AddDefaults(TPriority pri=kPriority_Default)

Add default data loaders from object manager.

CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)

vector< CSeq_id_Handle > TIds

@ eGetBioseq_All

Search bioseq, load if not loaded yet.

bool IsSetExcept(void) const

const CFeat_id & GetId(void) const

bool IsSetInst_Mol(void) const

bool IsSetComment(void) const

const CTSE_Handle & GetTSE_Handle(void) const

Get CTSE_Handle of containing TSE.

CConstRef< CBioseq > GetCompleteBioseq(void) const

Get the complete bioseq.

TClass GetClass(void) const

CSeq_entry_Handle GetParentEntry(void) const

Get parent Seq-entry handle.

TBioseqCore GetBioseqCore(void) const

Get bioseq core structure.

virtual CConstRef< CSeq_feat > GetSeq_feat(void) const

const CSeqFeatData & GetData(void) const

TSeqPos GetBioseqLength(void) const

CSeq_entry_Handle GetSeq_entry_Handle(void) const

Get parent Seq-entry handle.

bool IsSetExcept_text(void) const

TInst_Mol GetInst_Mol(void) const

bool IsSetProduct(void) const

bool IsSetInst_Length(void) const

TInst_Topology GetInst_Topology(void) const

const string & GetComment(void) const

TInst_Length GetInst_Length(void) const

const string & GetExcept_text(void) const

bool IsSetInst(void) const

void Reset(void)

Reset handle and make it not to point to any bioseq.

bool IsSetInst_Repr(void) const

bool IsSetClass(void) const

CConstRef< CSeq_entry > GetCompleteSeq_entry(void) const

Complete and get const reference to the seq-entry.

TInst_Repr GetInst_Repr(void) const

CScope & GetScope(void) const

Get scope this handle belongs to.

CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const

CSeq_entry_Handle GetExactComplexityLevel(CBioseq_set::EClass cls) const

Return level with exact complexity, or empty handle if not found.

CSeqFeatData::ESubtype GetFeatSubtype(void) const

bool IsSetInst_Topology(void) const

CSeq_entry_Handle GetTopLevelEntry(void) const

Get top level Seq-entry handle.

CSeq_entry_Handle GetParentEntry(void) const

Get parent Seq-entry handle.

const TId & GetId(void) const

TMol GetBioseqMolType(void) const

Get some values from core:

CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const

Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.

bool CanGetInst_Mol(void) const

const TInst & GetInst(void) const

@ eCoding_Ncbi

Set coding to binary coding (Ncbi4na or Ncbistdaa)

@ eCoding_Iupac

Set coding to printable coding (Iupacna or Iupacaa)

TSeqPos GetEndPosition(void) const

return end position of current segment in sequence (exclusive)

SSeqMapSelector & SetResolveCount(size_t res_cnt)

Set max depth of resolving seq-map.

SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)

Include feature subtype in the search.

bool IsSetPartial(void) const

const CSeq_loc & GetLocation(void) const

bool GetPartial(void) const

SSeqMapSelector & SetFlags(TFlags flags)

Select segment type(s)

const CSeq_feat & GetOriginalFeature(void) const

Get original feature with unmapped location/product.

const CSeq_feat_Handle & GetSeq_feat_Handle(void) const

Get original feature handle.

const CSeq_loc & GetProduct(void) const

SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)

Set feature subtype (also set annotation and feat type)

TSeqPos GetPosition(void) const

return position of current segment in sequence

CConstRef< CSeq_feat > GetSeq_feat(void) const

Get current seq-feat.

TCoding GetCoding(void) const

Target sequence coding.

bool IsInGap(TSeqPos pos) const

true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...

@ eSeqData

real sequence data

TObjectType * GetPointer(void) const THROWS_NONE

Get pointer,.

bool IsNull(void) const THROWS_NONE

Check if pointer is null – same effect as Empty().

CConstRef< C > ConstRef(const C *object)

Template function for conversion of const object pointer to CConstRef.

CRef< C > Ref(C *object)

Helper functions to get CRef<> and CConstRef<> objects.

void Reset(void)

Reset reference object.

void Reset(void)

Reset reference object.

int32_t Int4

4-byte (32-bit) signed integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)

Convert size_t to string.

CTempString literal(const char(&str)[Size])

Templatized initialization from a string literal.

static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive compare of a substring with another string.

static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to int.

static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)

Find the pattern in the specified range of a string using a case insensitive search.

static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)

Check if a string ends with a specified suffix value.

static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)

Check if a string is blank (has no text).

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-sensitive equality of a substring with another string.

static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)

Compare of a substring with another string.

static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)

Convert UInt to string.

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

size_type length(void) const

Return the length of the represented array.

CTempString substr(size_type pos) const

Obtain a substring from this string, beginning at a given offset.

static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive equality of a substring with another string.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)

Test for equality of a substring with another string.

@ eNocase

Case insensitive compare.

static const char label[]

const TKeywords & GetKeywords(void) const

Get the Keywords member data.

bool IsSetExtra_acc(void) const

Check if a value has been assigned to Extra_acc data member.

const TExtra_acc & GetExtra_acc(void) const

Get the Extra_acc member data.

bool IsSetKeywords(void) const

Check if a value has been assigned to Keywords data member.

bool IsSetExtra_accessions(void) const

Check if a value has been assigned to Extra_accessions data member.

const TExtra_accessions & GetExtra_accessions(void) const

Get the Extra_accessions member data.

const TKeywords & GetKeywords(void) const

Get the Keywords member data.

bool IsSetKeywords(void) const

Check if a value has been assigned to Keywords data member.

const TSubtype & GetSubtype(void) const

Get the Subtype member data.

TGenome GetGenome(void) const

Get the Genome member data.

TOrigin GetOrigin(void) const

Get the Origin member data.

bool IsSetOrg(void) const

Check if a value has been assigned to Org data member.

bool IsSetSubtype(void) const

Check if a value has been assigned to Subtype data member.

const TOrg & GetOrg(void) const

Get the Org member data.

bool IsSetOrigin(void) const

Check if a value has been assigned to Origin data member.

TSubtype GetSubtype(void) const

Get the Subtype member data.

bool IsSetGenome(void) const

Check if a value has been assigned to Genome data member.

const TName & GetName(void) const

Get the Name member data.

bool IsSetIs_focus(void) const

to distinguish biological focus Check if a value has been assigned to Is_focus data member.

bool IsSetName(void) const

Check if a value has been assigned to Name data member.

@ eOrigin_synthetic

purely synthetic

@ eOrigin_mut

artificially mutagenized

@ eOrigin_artificial

artificially engineered

TTo GetTo(void) const

Get the To member data.

TFrom GetFrom(void) const

Get the From member data.

const TDesc & GetDesc(void) const

Get the Desc member data.

bool IsSetPseudo(void) const

pseudogene Check if a value has been assigned to Pseudo data member.

bool IsSetLocus_tag(void) const

systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...

bool IsSetLocus(void) const

Official gene symbol Check if a value has been assigned to Locus data member.

bool IsSetDesc(void) const

descriptive name Check if a value has been assigned to Desc data member.

const TLocus_tag & GetLocus_tag(void) const

Get the Locus_tag member data.

const TLocus & GetLocus(void) const

Get the Locus member data.

TPseudo GetPseudo(void) const

Get the Pseudo member data.

const TStr & GetStr(void) const

Get the variant data.

bool IsSetData(void) const

the object itself Check if a value has been assigned to Data data member.

bool IsStr(void) const

Check if variant Str is selected.

bool IsSetDb(void) const

name of database or system Check if a value has been assigned to Db data member.

bool CanGetType(void) const

Check if it is safe to call GetType method.

bool IsSetType(void) const

type of object within class Check if a value has been assigned to Type data member.

bool IsLim(void) const

Check if variant Lim is selected.

bool IsSetYear(void) const

full year (including 1900) Check if a value has been assigned to Year data member.

bool IsStd(void) const

Check if variant Std is selected.

const TTag & GetTag(void) const

Get the Tag member data.

bool IsStrs(void) const

Check if variant Strs is selected.

const TStrs & GetStrs(void) const

Get the variant data.

bool IsId(void) const

Check if variant Id is selected.

const TData & GetData(void) const

Get the Data member data.

bool IsSetTag(void) const

appropriate tag Check if a value has been assigned to Tag data member.

const TDb & GetDb(void) const

Get the Db member data.

TLim GetLim(void) const

Get the variant data.

bool IsStr(void) const

Check if variant Str is selected.

bool IsSetLabel(void) const

field label Check if a value has been assigned to Label data member.

const TStr & GetStr(void) const

Get the variant data.

const TData & GetData(void) const

Get the Data member data.

const TStr & GetStr(void) const

Get the variant data.

const TLabel & GetLabel(void) const

Get the Label member data.

const TType & GetType(void) const

Get the Type member data.

bool IsSetData(void) const

Check if a value has been assigned to Data data member.

vector< CStringUTF8 > TStrs

const TStd & GetStd(void) const

Get the variant data.

vector< CRef< CUser_field > > TData

TId GetId(void) const

Get the variant data.

bool IsStr(void) const

Check if variant Str is selected.

const TMod & GetMod(void) const

Get the Mod member data.

bool IsSetDb(void) const

ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.

const TLineage & GetLineage(void) const

Get the Lineage member data.

TSubtype GetSubtype(void) const

Get the Subtype member data.

bool CanGetDiv(void) const

Check if it is safe to call GetDiv method.

const TDiv & GetDiv(void) const

Get the Div member data.

const TSubname & GetSubname(void) const

Get the Subname member data.

bool IsSetLineage(void) const

lineage with semicolon separators Check if a value has been assigned to Lineage data member.

const TTaxname & GetTaxname(void) const

Get the Taxname member data.

const TDb & GetDb(void) const

Get the Db member data.

bool IsSetMod(void) const

Check if a value has been assigned to Mod data member.

bool IsSetOrgname(void) const

Check if a value has been assigned to Orgname data member.

bool IsSetTaxname(void) const

preferred formal name Check if a value has been assigned to Taxname data member.

const TOrgname & GetOrgname(void) const

Get the Orgname member data.

const TName & GetName(void) const

Get the Name member data.

TProcessed GetProcessed(void) const

Get the Processed member data.

bool IsSetProcessed(void) const

Check if a value has been assigned to Processed data member.

bool IsSetName(void) const

protein name Check if a value has been assigned to Name data member.

list< CRef< CPub > > Tdata

const Tdata & Get(void) const

Get the member data.

const TEquiv & GetEquiv(void) const

Get the variant data.

bool IsEquiv(void) const

Check if variant Equiv is selected.

bool IsSub(void) const

Check if variant Sub is selected.

TType GetType(void) const

Get the Type member data.

bool IsSetExt(void) const

generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.

bool CanGetExt(void) const

Check if it is safe to call GetExt method.

bool IsSetType(void) const

Check if a value has been assigned to Type data member.

const TName & GetName(void) const

Get the variant data.

const TExt & GetExt(void) const

Get the Ext member data.

bool IsName(void) const

Check if variant Name is selected.

const TKey & GetKey(void) const

Get the Key member data.

bool IsSetData(void) const

the specific data Check if a value has been assigned to Data data member.

bool IsSetQual(void) const

qualifiers Check if a value has been assigned to Qual data member.

E_Choice Which(void) const

Which variant is currently selected.

bool IsSetCode(void) const

genetic code used Check if a value has been assigned to Code data member.

bool IsSetExt(void) const

user defined structure extension Check if a value has been assigned to Ext data member.

bool IsCdregion(void) const

Check if variant Cdregion is selected.

bool IsImp(void) const

Check if variant Imp is selected.

const TQual & GetQual(void) const

Get the Qual member data.

bool IsSetPartial(void) const

incomplete in some way? Check if a value has been assigned to Partial data member.

const TId & GetId(void) const

Get the Id member data.

const TLocation & GetLocation(void) const

Get the Location member data.

E_Choice

Choice variants.

bool IsLocal(void) const

Check if variant Local is selected.

bool IsGene(void) const

Check if variant Gene is selected.

TFrame GetFrame(void) const

Get the Frame member data.

const TData & GetData(void) const

Get the Data member data.

bool IsSetExcept(void) const

something funny about this? Check if a value has been assigned to Except data member.

const TExcept_text & GetExcept_text(void) const

Get the Except_text member data.

bool IsSetExcept_text(void) const

explain if except=TRUE Check if a value has been assigned to Except_text data member.

const TCode & GetCode(void) const

Get the Code member data.

const TDbxref & GetDbxref(void) const

Get the Dbxref member data.

list< CRef< C_E > > Tdata

const TCdregion & GetCdregion(void) const

Get the variant data.

const TBiosrc & GetBiosrc(void) const

Get the variant data.

bool IsSetId(void) const

Check if a value has been assigned to Id data member.

TPseudo GetPseudo(void) const

Get the Pseudo member data.

const TProduct & GetProduct(void) const

Get the Product member data.

const Tdata & Get(void) const

Get the member data.

bool IsSetPseudo(void) const

annotated on pseudogene? Check if a value has been assigned to Pseudo data member.

bool IsBiosrc(void) const

Check if variant Biosrc is selected.

void SetPseudo(TPseudo value)

Assign a value to Pseudo data member.

const TGene & GetGene(void) const

Get the variant data.

TPartial GetPartial(void) const

Get the Partial member data.

const TProt & GetProt(void) const

Get the variant data.

TExcept GetExcept(void) const

Get the Except member data.

vector< CRef< CGb_qual > > TQual

const TRna & GetRna(void) const

Get the variant data.

bool IsSetDbxref(void) const

support for xref to other databases Check if a value has been assigned to Dbxref data member.

bool IsSetProduct(void) const

product of process Check if a value has been assigned to Product data member.

const TExt & GetExt(void) const

Get the Ext member data.

bool IsRna(void) const

Check if variant Rna is selected.

bool IsRegion(void) const

Check if variant Region is selected.

const TImp & GetImp(void) const

Get the variant data.

bool IsSetFrame(void) const

Check if a value has been assigned to Frame data member.

bool IsSetLocation(void) const

feature made from Check if a value has been assigned to Location data member.

@ e_Pub

publication applies to this seq

bool IsGenbank(void) const

Check if variant Genbank is selected.

TChain GetChain(void) const

Get the Chain member data.

bool IsSetChain_id(void) const

chain identifier; length-independent generalization of 'chain' Check if a value has been assigned to ...

bool IsSetChain(void) const

Deprecated: 'chain' can't support multiple character PDB chain identifiers (introduced in 2015).

bool IsSetAccession(void) const

Check if a value has been assigned to Accession data member.

bool IsTpg(void) const

Check if variant Tpg is selected.

const TName & GetName(void) const

Get the Name member data.

list< CRef< CSeq_interval > > Tdata

ENa_strand

strand of nucleic acid

const Tdata & Get(void) const

Get the member data.

const TId & GetId(void) const

Get the Id member data.

const TPnt & GetPnt(void) const

Get the variant data.

bool IsTpd(void) const

Check if variant Tpd is selected.

TPoint GetPoint(void) const

Get the Point member data.

bool IsOther(void) const

Check if variant Other is selected.

TFrom GetFrom(void) const

Get the From member data.

bool IsGeneral(void) const

Check if variant General is selected.

bool IsEmbl(void) const

Check if variant Embl is selected.

E_Choice Which(void) const

Which variant is currently selected.

TGi GetGi(void) const

Get the variant data.

TVersion GetVersion(void) const

Get the Version member data.

E_Choice

Choice variants.

const TOther & GetOther(void) const

Get the variant data.

bool IsPacked_int(void) const

Check if variant Packed_int is selected.

Tdata & Set(void)

Assign a value to data member.

const TChain_id & GetChain_id(void) const

Get the Chain_id member data.

const TGeneral & GetGeneral(void) const

Get the variant data.

bool IsGi(void) const

Check if variant Gi is selected.

TTo GetTo(void) const

Get the To member data.

bool IsWhole(void) const

Check if variant Whole is selected.

bool IsInt(void) const

Check if variant Int is selected.

const TInt & GetInt(void) const

Get the variant data.

bool IsSetVersion(void) const

Check if a value has been assigned to Version data member.

bool IsTpe(void) const

Check if variant Tpe is selected.

bool IsPnt(void) const

Check if variant Pnt is selected.

bool IsSetName(void) const

Check if a value has been assigned to Name data member.

const TPacked_int & GetPacked_int(void) const

Get the variant data.

const TAccession & GetAccession(void) const

Get the Accession member data.

bool IsDdbj(void) const

Check if variant Ddbj is selected.

@ e_Other

for historical reasons, 'other' = 'refseq'

@ e_Tpe

Third Party Annot/Seq EMBL.

@ e_Tpd

Third Party Annot/Seq DDBJ.

@ e_General

for other databases

@ e_Gi

GenInfo Integrated Database.

@ e_Tpg

Third Party Annot/Seq Genbank.

const TSeq & GetSeq(void) const

Get the variant data.

bool IsSetClass(void) const

Check if a value has been assigned to Class data member.

TClass GetClass(void) const

Get the Class member data.

const TSet & GetSet(void) const

Get the variant data.

bool IsSeq(void) const

Check if variant Seq is selected.

bool IsSetSeq_set(void) const

Check if a value has been assigned to Seq_set data member.

bool IsSet(void) const

Check if variant Set is selected.

const TSeq_set & GetSeq_set(void) const

Get the Seq_set member data.

list< CRef< CSeq_entry > > TSeq_set

@ eClass_parts

parts for 2 or 3

@ eClass_pop_set

population study

@ eClass_phy_set

phylogenetic study

@ eClass_mut_set

set of mutations

@ eClass_eco_set

ecological sample study

@ eClass_nuc_prot

nuc acid and coded proteins

@ eClass_gen_prod_set

genomic products, chrom+mRNA+protein

@ eClass_segset

segmented sequence + parts

const TIupacaa & GetIupacaa(void) const

Get the variant data.

bool IsSetLinkage(void) const

Check if a value has been assigned to Linkage data member.

TRepr GetRepr(void) const

Get the Repr member data.

bool IsMap(void) const

Check if variant Map is selected.

const TSeg & GetSeg(void) const

Get the variant data.

bool IsSetCompleteness(void) const

Check if a value has been assigned to Completeness data member.

list< CRef< CSeqdesc > > Tdata

bool IsRef(void) const

Check if variant Ref is selected.

bool IsSetReplaced_by(void) const

these seqs make this one obsolete Check if a value has been assigned to Replaced_by data member.

const TUser & GetUser(void) const

Get the variant data.

bool IsSetSeq_data(void) const

the sequence Check if a value has been assigned to Seq_data data member.

TLinkage GetLinkage(void) const

Get the Linkage member data.

TStrand GetStrand(void) const

Get the Strand member data.

ERepr

representation class

const TInst & GetInst(void) const

Get the Inst member data.

const TGap & GetGap(void) const

Get the variant data.

bool IsSetAssembly(void) const

how was this assembled? Check if a value has been assigned to Assembly data member.

TTopology GetTopology(void) const

Get the Topology member data.

const TIupacna & GetIupacna(void) const

Get the variant data.

const TUpdate_date & GetUpdate_date(void) const

Get the variant data.

const TNcbipna & GetNcbipna(void) const

Get the variant data.

bool IsSetRepr(void) const

Check if a value has been assigned to Repr data member.

const TNcbipaa & GetNcbipaa(void) const

Get the variant data.

TType GetType(void) const

Get the Type member data.

bool IsSetMol(void) const

Check if a value has been assigned to Mol data member.

const TTitle & GetTitle(void) const

Get the variant data.

const TSource & GetSource(void) const

Get the variant data.

const TPub & GetPub(void) const

Get the variant data.

bool IsSetStrand(void) const

Check if a value has been assigned to Strand data member.

const TNcbi8aa & GetNcbi8aa(void) const

Get the variant data.

const TLiteral & GetLiteral(void) const

Get the variant data.

bool IsSetBiomol(void) const

Check if a value has been assigned to Biomol data member.

bool IsLoc(void) const

Check if variant Loc is selected.

E_Choice

Choice variants.

const TId & GetId(void) const

Get the Id member data.

bool IsSetHist(void) const

sequence history Check if a value has been assigned to Hist data member.

bool IsNcbi4na(void) const

Check if variant Ncbi4na is selected.

TTech GetTech(void) const

Get the Tech member data.

bool IsSetExt(void) const

extensions for special types Check if a value has been assigned to Ext data member.

const Tdata & Get(void) const

Get the member data.

bool IsSetReplaces(void) const

seq makes these seqs obsolete Check if a value has been assigned to Replaces data member.

bool IsSetInst(void) const

the sequence data Check if a value has been assigned to Inst data member.

bool IsNcbi8na(void) const

Check if variant Ncbi8na is selected.

TLength GetLength(void) const

Get the Length member data.

const TOrg & GetOrg(void) const

Get the variant data.

TLength GetLength(void) const

Get the Length member data.

const TAssembly & GetAssembly(void) const

Get the Assembly member data.

list< CRef< CSeq_id > > TId

const TGenbank & GetGenbank(void) const

Get the variant data.

bool IsSeg(void) const

Check if variant Seg is selected.

list< CRef< CSeq_id > > TIds

bool CanGetLength(void) const

Check if it is safe to call GetLength method.

const TFuzz & GetFuzz(void) const

Get the Fuzz member data.

TMol GetMol(void) const

Get the Mol member data.

const TIds & GetIds(void) const

Get the Ids member data.

const TLinkage_evidence & GetLinkage_evidence(void) const

Get the Linkage_evidence member data.

bool IsName(void) const

Check if variant Name is selected.

const TNcbieaa & GetNcbieaa(void) const

Get the variant data.

bool IsSetFuzz(void) const

could be unsure Check if a value has been assigned to Fuzz data member.

TType GetType(void) const

Get the Type member data.

bool IsDelta(void) const

Check if variant Delta is selected.

const TNcbistdaa & GetNcbistdaa(void) const

Get the variant data.

bool IsSetLength(void) const

length of sequence in residues Check if a value has been assigned to Length data member.

bool CanGetHist(void) const

Check if it is safe to call GetHist method.

const THist & GetHist(void) const

Get the Hist member data.

bool IsSetType(void) const

Check if a value has been assigned to Type data member.

const TExt & GetExt(void) const

Get the Ext member data.

bool CanGetRepr(void) const

Check if it is safe to call GetRepr method.

bool IsSetDescr(void) const

descriptors Check if a value has been assigned to Descr data member.

E_Choice

Choice variants.

TMol_type GetMol_type(void) const

Get the variant data.

const TEmbl & GetEmbl(void) const

Get the variant data.

TBiomol GetBiomol(void) const

Get the Biomol member data.

bool CanGetType(void) const

Check if it is safe to call GetType method.

EMol

molecule class in living organism

bool IsSetLength(void) const

must give a length in residues Check if a value has been assigned to Length data member.

bool IsSetTech(void) const

Check if a value has been assigned to Tech data member.

const TDelta & GetDelta(void) const

Get the variant data.

bool IsSetPub(void) const

the citation(s) Check if a value has been assigned to Pub data member.

const TNcbi4na & GetNcbi4na(void) const

Get the variant data.

const TLoc & GetLoc(void) const

Get the variant data.

TCompleteness GetCompleteness(void) const

Get the Completeness member data.

const TModif & GetModif(void) const

Get the variant data.

bool IsSet(void) const

Check if a value has been assigned to data member.

bool CanGetSeq_data(void) const

Check if it is safe to call GetSeq_data method.

const TNcbi2na & GetNcbi2na(void) const

Get the variant data.

const Tdata & Get(void) const

Get the member data.

E_Choice Which(void) const

Which variant is currently selected.

bool IsSetDate(void) const

Check if a value has been assigned to Date data member.

bool CanGetExt(void) const

Check if it is safe to call GetExt method.

bool IsSetId(void) const

equivalent identifiers Check if a value has been assigned to Id data member.

const TCreate_date & GetCreate_date(void) const

Get the variant data.

bool IsLiteral(void) const

Check if variant Literal is selected.

bool IsSetSeq_data(void) const

may have the data Check if a value has been assigned to Seq_data data member.

list< CRef< CDelta_seq > > Tdata

const TReplaces & GetReplaces(void) const

Get the Replaces member data.

const Tdata & Get(void) const

Get the member data.

bool IsGap(void) const

Check if variant Gap is selected.

const TPub & GetPub(void) const

Get the Pub member data.

const TSeq_data & GetSeq_data(void) const

Get the Seq_data member data.

bool IsNcbi2na(void) const

Check if variant Ncbi2na is selected.

const TReplaced_by & GetReplaced_by(void) const

Get the Replaced_by member data.

list< CRef< CSeq_loc > > Tdata

const TNcbi8na & GetNcbi8na(void) const

Get the variant data.

const TDescr & GetDescr(void) const

Get the Descr member data.

const TComment & GetComment(void) const

Get the variant data.

const TMolinfo & GetMolinfo(void) const

Get the variant data.

bool IsIupacna(void) const

Check if variant Iupacna is selected.

const TName & GetName(void) const

Get the variant data.

const TSeq_data & GetSeq_data(void) const

Get the Seq_data member data.

list< CRef< CLinkage_evidence > > TLinkage_evidence

const TRef & GetRef(void) const

Get the variant data.

bool CanGetInst(void) const

Check if it is safe to call GetInst method.

bool IsSetLinkage_evidence(void) const

Check if a value has been assigned to Linkage_evidence data member.

bool IsSetTopology(void) const

Check if a value has been assigned to Topology data member.

bool IsSetFuzz(void) const

length uncertainty Check if a value has been assigned to Fuzz data member.

E_Choice Which(void) const

Which variant is currently selected.

@ eRepr_const

constructed sequence

@ eRepr_ref

reference to another sequence

@ eRepr_seg

segmented sequence

@ eRepr_delta

sequence made by changes (delta) to others

@ eRepr_map

ordered map of any kind

@ eRepr_raw

continuous sequence

@ eRepr_virtual

no seq data

@ eCompleteness_complete

complete biological entity

@ eCompleteness_no_left

missing 5' or NH3 end

@ eCompleteness_partial

partial but no details given

@ eCompleteness_no_right

missing 3' or COOH end

@ eCompleteness_no_ends

missing both ends

@ eTech_htgs_2

ordered High Throughput sequence contig

@ eTech_physmap

from physical mapping techniques

@ eTech_htc

high throughput cDNA

@ eTech_both

concept transl. w/ partial pept. seq.

@ eTech_targeted

targeted locus sets/studies

@ eTech_seq_pept_homol

sequenced peptide, ordered by homology

@ eTech_composite_wgs_htgs

composite of WGS and HTGS

@ eTech_sts

Sequence Tagged Site.

@ eTech_htgs_3

finished High Throughput sequence

@ eTech_seq_pept_overlap

sequenced peptide, ordered by overlap

@ eTech_htgs_1

unordered High Throughput sequence contig

@ eTech_concept_trans

conceptual translation

@ eTech_tsa

transcriptome shotgun assembly

@ eTech_standard

standard sequencing

@ eTech_wgs

whole genome shotgun sequencing

@ eTech_seq_pept

peptide was sequenced

@ eTech_survey

one-pass genomic sequence

@ eTech_barcode

barcode of life project

@ eTech_htgs_0

single genomic reads for coordination

@ eTech_fli_cdna

full length insert cDNA

@ eTech_est

Expressed Sequence Tag.

@ eTech_concept_trans_a

conceptual transl. supplied by author

@ eTech_genemap

from genetic mapping techniques

@ e_not_set

No variant selected.

@ e_Ncbipna

nucleic acid probabilities

@ e_Ncbieaa

extended ASCII 1 letter aa codes

@ e_Ncbistdaa

consecutive codes for std aas

@ e_Ncbi2na

2 bit nucleic acid code

@ e_Iupacna

IUPAC 1 letter nuc acid code.

@ e_Ncbipaa

amino acid probabilities

@ e_Ncbi8na

8 bit extended nucleic acid code

@ e_Ncbi4na

4 bit nucleic acid code

@ e_Iupacaa

IUPAC 1 letter amino acid code.

@ e_Ncbi8aa

8 bit extended amino acid codes

@ eBiomol_pre_RNA

precursor RNA of any sort really

@ eBiomol_cRNA

viral RNA genome copy intermediate

@ eBiomol_snoRNA

small nucleolar RNA

@ eBiomol_transcribed_RNA

transcribed RNA other than existing classes

@ eBiomol_other_genetic

other genetic material

@ eGIBB_mod_no_right

missing right end (3' or COOH)

@ eGIBB_mod_mitochondrial

@ eGIBB_mod_no_left

missing left end (5' for na, NH2 for aa)

@ e_Embl

EMBL specific information.

@ e_Org

if all from one organism

@ e_User

user defined object

@ e_Update_date

date of last update

@ e_Pub

a reference to the publication

@ e_Pir

PIR specific info.

@ e_Genbank

GenBank specific info.

@ e_Prf

PRF specific information.

@ e_Mol_type

type of molecule

@ e_Sp

SWISSPROT specific info.

@ e_Comment

a more extensive comment

@ e_Method

sequencing method

@ e_Molinfo

info on the molecule and techniques

@ e_Create_date

date entry first created/released

@ e_Title

a title for this sequence

@ e_Pdb

PDB specific information.

@ e_Name

a name for this sequence

@ e_Source

source of materials, includes Org-ref

@ eType_clone

Deprecated. Used only for AGP 1.1.

@ eType_fragment

Deprecated. Used only for AGP 1.1.

@ eMol_not_set

> cdna = rna

@ eMol_na

just a nucleic acid

@ eStrand_ss

single strand

@ e_Literal

a piece of sequence

@ e_Loc

point to a sequence

unsigned int

A callback function used to compare two keys in a database.

where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig

if(yy_accept[yy_current_state])

static void text(MDB_val *v)

constexpr auto sort(_Init &&init)

constexpr bool empty(list< Ts... >) noexcept

const struct ncbi::grid::netcache::search::fields::SIZE size

const struct ncbi::grid::netcache::search::fields::KEY key

const CharType(& source)[N]

Miscellaneous common-use basic types and functionality.

Defines: CTimeFormat - storage class for time format.

Int4 delta(size_t dimension_, const Int4 *score_)

void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)

double df(double x_, const double &y_)

static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)

#define FOR_EACH_PUB_ON_PUBDESC(Itr, Var)

FOR_EACH_PUB_ON_PUBDESC EDIT_EACH_PUB_ON_PUBDESC.

#define FOR_EACH_DESCRIPTOR_ON_BIOSEQ

#define FOR_EACH_ANNOT_ON_BIOSEQ

#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)

FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.

#define IF_EXISTS_CLOSEST_BIOSOURCE(Cref, Var, Lvl)

IF_EXISTS_CLOSEST_BIOSOURCE.

#define FOR_EACH_KEYWORD_ON_GENBANKBLOCK(Itr, Var)

FOR_EACH_KEYWORD_ON_GENBANKBLOCK EDIT_EACH_KEYWORD_ON_GENBANKBLOCK.

CSubSource::TSubtype TSUBSOURCE_SUBTYPE

#define NCBI_GENOME(Type)

@NAME Convenience macros for NCBI objects

#define FOR_EACH_SYNONYM_ON_GENEREF(Itr, Var)

FOR_EACH_SYNONYM_ON_GENEREF EDIT_EACH_SYNONYM_ON_GENEREF.

#define NCBI_ORGMOD(Type)

COrgMod definitions.

#define FOR_EACH_GBQUAL_ON_SEQFEAT(Itr, Var)

FOR_EACH_GBQUAL_ON_SEQFEAT EDIT_EACH_GBQUAL_ON_SEQFEAT.

#define FOR_EACH_SUBSOURCE_ON_BIOSOURCE(Itr, Var)

FOR_EACH_SUBSOURCE_ON_BIOSOURCE EDIT_EACH_SUBSOURCE_ON_BIOSOURCE.

COrgMod::TSubtype TORGMOD_SUBTYPE

#define FOR_EACH_SEQFEATXREF_ON_SEQFEAT(Itr, Var)

FOR_EACH_SEQFEATXREF_ON_SEQFEAT EDIT_EACH_SEQFEATXREF_ON_SEQFEAT.

#define FOR_EACH_DBXREF_ON_FEATURE

#define NCBI_SEQID(Type)

@NAME Convenience macros for NCBI objects

#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)

FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.

#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)

FIELD_IS_SET_AND_IS base macro.

#define FOR_EACH_STRING_IN_LIST(Itr, Var)

FOR_EACH_STRING_IN_LIST EDIT_EACH_STRING_IN_LIST.

#define RAW_FIELD_IS_EMPTY_OR_UNSET(Var, Fld)

RAW_FIELD_IS_EMPTY_OR_UNSET macro.

#define GET_FIELD(Var, Fld)

GET_FIELD base macro.

#define FOR_EACH_CHAR_IN_STRING(Itr, Var)

FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.

bool seq_mac_is_unique(Iterator iter1, Iterator iter2, Predicate pred)

#define BEGIN_COMMA_END(container)

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

CSeqFeatData::ESubtype feat_subtype

bool operator()(const CTempString &lhs, const CTempString &rhs) const

bool operator()(const CTempString &lhs, const CTempString &rhs) const

Selector used in CSeqMap methods returning iterators.

map< string, string > TViralMap

bool HasExcludedAnnotation(const CSeq_loc &loc, CBioseq_Handle far_bsh)

static bool s_NotPeptideException(const CSeq_feat &curr, const CSeq_feat &prev)

static char CheckForBadFileIDSeqIdChars(const string &id)

bool s_ContainedIn(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)

bool s_FieldHasLabel(const CUser_field &field, const string &label)

bool s_AfterIsGapORN(TSeqPos pos, TSeqPos after, TSeqPos len, const CSeqVector &vec)

static TViralMap * s_InitializeViralMap()

bool s_CheckIntervals(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)

MAKE_CONST_MAP(kViralStrandMap, string, string, { {"root", "dsDNA"}, {"Alphasatellitidae", "ssDNA"}, {"Anelloviridae", "ssDNA(-)"}, {"Bacilladnaviridae", "ssDNA"}, {"Bidnaviridae", "ssDNA"}, {"Circoviridae", "ssDNA(+/-)"}, {"Geminiviridae", "ssDNA(+/-)"}, {"Genomoviridae", "ssDNA"}, {"Hepadnaviridae", "dsDNA-RT"}, {"Inoviridae", "ssDNA(+)"}, {"Microviridae", "ssDNA(+)"}, {"Nanoviridae", "ssDNA(+)"}, {"Ortervirales", "ssRNA-RT"}, {"Caulimoviridae", "dsDNA-RT"}, {"Parvoviridae", "ssDNA(+/-)"}, {"Alphapleolipovirus", "dsDNA; ssDNA"}, {"Riboviria", "RNA"}, {"Albetovirus", "ssRNA(+)"}, {"Alphatetraviridae", "ssRNA(+)"}, {"Alvernaviridae", "ssRNA(+)"}, {"Amalgaviridae", "dsRNA"}, {"Astroviridae", "ssRNA(+)"}, {"Aumaivirus", "ssRNA(+)"}, {"Avsunviroidae", "ssRNA"}, {"Barnaviridae", "ssRNA(+)"}, {"Benyviridae", "ssRNA(+)"}, {"Birnaviridae", "dsRNA"}, {"Botourmiaviridae", "ssRNA(+)"}, {"Botybirnavirus", "dsRNA"}, {"Bromoviridae", "ssRNA(+)"}, {"Caliciviridae", "ssRNA(+)"}, {"Carmotetraviridae", "ssRNA(+)"}, {"Chrysoviridae", "dsRNA"}, {"Closteroviridae", "ssRNA(+)"}, {"Cystoviridae", "dsRNA"}, {"Deltavirus", "ssRNA(-)"}, {"dsRNA viruses", "dsRNA"}, {"Endornaviridae", "dsRNA"}, {"Flaviviridae", "ssRNA(+)"}, {"Hepeviridae", "ssRNA(+)"}, {"Hypoviridae", "ssRNA(+)"}, {"Idaeovirus", "ssRNA(+)"}, {"Kitaviridae", "ssRNA(+)"}, {"Leviviridae", "ssRNA(+)"}, {"Luteoviridae", "ssRNA(+)"}, {"Matonaviridae", "ssRNA(+)"}, {"Megabirnaviridae", "dsRNA"}, {"Narnaviridae", "ssRNA(+)"}, {"Haploviricotina", "ssRNA(-)"}, {"Arenaviridae", "ssRNA(+/-)"}, {"Coguvirus", "ssRNA(-)"}, {"Cruliviridae", "ssRNA(-)"}, {"Fimoviridae", "ssRNA(-)"}, {"Hantaviridae", "ssRNA(-)"}, {"Leishbuviridae", "ssRNA(-)"}, {"Mypoviridae", "ssRNA(-)"}, {"Nairoviridae", "ssRNA(-)"}, {"Peribunyaviridae", "ssRNA(-)"}, {"Phasmaviridae", "ssRNA(-)"}, {"Banyangvirus", "ssRNA(+/-)"}, {"Beidivirus", "ssRNA(-)"}, {"Goukovirus", "ssRNA(-)"}, {"Horwuvirus", "ssRNA(-)"}, {"Hudivirus", "ssRNA(-)"}, {"Hudovirus", "ssRNA(-)"}, {"Kabutovirus", "ssRNA(-)"}, {"Laulavirus", "ssRNA(-)"}, {"Mobuvirus", "ssRNA(-)"}, {"Phasivirus", "ssRNA(-)"}, {"Phlebovirus", "ssRNA(+/-)"}, {"Pidchovirus", "ssRNA(-)"}, {"Tenuivirus", "ssRNA(-)"}, {"Wenrivirus", "ssRNA(-)"}, {"Wubeivirus", "ssRNA(-)"}, {"Tospoviridae", "ssRNA(+/-)"}, {"Wupedeviridae", "ssRNA(-)"}, {"Insthoviricetes", "ssRNA(-)"}, {"Nidovirales", "ssRNA(+)"}, {"Nodaviridae", "ssRNA(+)"}, {"Papanivirus", "ssRNA(+)"}, {"Partitiviridae", "dsRNA"}, {"Permutotetraviridae", "ssRNA(+)"}, {"Picobirnaviridae", "dsRNA"}, {"Picornavirales", "ssRNA(+)"}, {"Pospiviroidae", "ssRNA"}, {"Potyviridae", "ssRNA(+)"}, {"Quadriviridae", "dsRNA"}, {"Reoviridae", "dsRNA"}, {"Sarthroviridae", "ssRNA(+)"}, {"Sinaivirus", "ssRNA(+)"}, {"Solemoviridae", "ssRNA(+)"}, {"Solinviviridae", "ssRNA(+)"}, {"Togaviridae", "ssRNA(+)"}, {"Tombusviridae", "ssRNA(+)"}, {"Totiviridae", "dsRNA"}, {"Tymovirales", "ssRNA(+)"}, {"Virgaviridae", "ssRNA(+)"}, {"Virtovirus", "ssRNA(+)"}, {"ssRNA viruses", "ssRNA"}, {"unclassified ssRNA viruses", "ssRNA"}, {"unclassified ssRNA negative-strand viruses", "ssRNA(-)"}, {"unclassified ssRNA positive-strand viruses", "ssRNA(+)"}, {"unclassified viroids", "ssRNA"}, {"DNA satellites", "DNA"}, {"RNA satellites", "RNA"}, {"Smacoviridae", "ssDNA"}, {"Spiraviridae", "ssDNA(+)"}, {"Tolecusatellitidae", "ssDNA"}, {"unclassified viruses", "unknown"}, {"unclassified DNA viruses", "DNA"}, {"unclassified archaeal dsDNA viruses", "dsDNA"}, {"unclassified dsDNA phages", "dsDNA"}, {"unclassified dsDNA viruses", "dsDNA"}, {"unclassified ssDNA bacterial viruses", "ssDNA"}, {"unclassified ssDNA viruses", "ssDNA"}, {"environmental samples", "unknown"}, })

static bool s_MatchPartialType(const CSeq_loc &loc1, const CSeq_loc &loc2, unsigned int partial_type)

@ e_RnaPosition_MIDDLE_RIBOSOMAL_SUBUNIT

@ e_RnaPosition_INTERNAL_SPACER_X

@ e_RnaPosition_LEFT_RIBOSOMAL_SUBUNIT

@ e_RnaPosition_INTERNAL_SPACER_2

@ e_RnaPosition_RIGHT_RIBOSOMAL_SUBUNIT

@ e_RnaPosition_INTERNAL_SPACER_1

static bool s_SubsequentIntron(CFeat_CI feat_ci_dup, Int4 start, Int4 stop, Int4 max)

#define FOR_EACH_SEQID_ON_BIOSEQ_HANDLE(Itr, Var)

static bool s_GetFlankingGapTypes(const CSeq_inst &inst, CSeq_gap::TType &fst, CSeq_gap::TType &lst)

static bool s_SeqIdMatch(const CConstRef< CSeq_id > &q1, const CConstRef< CSeq_id > &q2)

bool x_IsPseudo(const CGene_ref &ref)

static const char * linkEvStrings[]

static bool s_LocSortCompare(const CConstRef< CSeq_loc > &q1, const CConstRef< CSeq_loc > &q2)

static int CountNs(const CSeq_data &seq_data, TSeqPos len)

bool s_BeforeIsGapOrN(TSeqPos pos, TSeqPos before, const CSeqVector &vec)

static bool x_BadCDSinVDJC(const CSeq_loc &cdsloc, const CSeq_loc &vdjcloc, CScope *scope)

static bool x_FeatIsVDJC(const CSeq_feat &ft)

static int s_MaxNsInSeqLitForTech(CMolInfo::TTech tech)

unsigned int s_IdXrefsNotReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)

bool s_IsCDDFeat(const CMappedFeat &feat)

static EDiagSev GetBioseqEndWarning(const CBioseq &seq, bool is_circular, EBioseqEndIsType end_is_char)

bool s_CheckPosNOrGap(TSeqPos pos, const CSeqVector &vec)

bool s_DbtagEqual(const CRef< CDbtag > &dbt1, const CRef< CDbtag > &dbt2)

static bool x_FeatIsCDS(const CSeq_feat &ft)

bool s_HasGI(const CBioseq &seq)

bool s_AfterIsGap(TSeqPos pos, TSeqPos after, TSeqPos len, const CSeqVector &vec)

static optional< int > s_MaxSeqStretchIfLessThanThreshold(const CSeqVector &vec, int threshold)

bool HasUnverified(CBioseq_Handle bsh)

static bool s_OrgModEqual(const CRef< COrgMod > &om1, const CRef< COrgMod > &om2)

string s_GetMrnaProductString(const CSeq_feat &mrna)

static bool s_SubsourceEquivalent(const CRef< CSubSource > &st1, const CRef< CSubSource > &st2)

bool x_HasNamedQual(const CSeq_feat &feat, const string &qual)

static char CheckForBadSeqIdChars(const string &id)

static string s_GetKeywordForStructuredComment(const CUser_object &obj)

TGi GetGIForSeqId(const CSeq_id &id, CScope &scope)

bool StrandsMatch(ENa_strand s1, ENa_strand s2)

static CBioseq_Handle s_GetParent(const CBioseq_Handle &part)

static ERnaPosition s_RnaPosition(const CSeq_feat &feat)

bool s_AreAdjacent(ERnaPosition pos1, ERnaPosition pos2)

bool lists_match(Iterator iter1, Iterator iter1_stop, Iterator iter2, Iterator iter2_stop, Predicate pred)

static bool s_IsConWithGaps(const CBioseq &seq)

static bool s_BiosrcFullLengthIsOk(const CBioSource &src)

static bool s_StandaloneProt(const CBioseq_Handle &bsh)

static TSeqPos s_GetDeltaLen(const CDelta_seq &seg, CScope *scope)

bool s_IdXrefsAreReciprocal(const CSeq_feat &cds, const CSeq_feat &mrna)

static bool HasAssemblyOrNullGap(const CBioseq &seq)

static bool s_IsTPAAssemblyOkForBioseq(const CBioseq &seq, bool has_refseq)

bool s_HasTpaUserObject(CBioseq_Handle bsh)

static bool s_OrgrefEquivalent(const COrg_ref &org1, const COrg_ref &org2)

bool s_GeneralTagsMatch(const string &protein_id, const CDbtag &dbtag)

static bool s_WillReportTerminalGap(const CBioseq &seq, CBioseq_Handle bsh)

string s_GetMrnaProteinLink(const CUser_field &field)

static bool s_ReportableCollision(const CGene_ref &g1, const CGene_ref &g2)

static char CheckForBadLocalIdChars(const string &id)

bool s_BeforeIsGap(TSeqPos pos, TSeqPos before, const CSeqVector &vec)

static bool s_IsSkippableDbtag(const CDbtag &dbt)

static void s_MakePubLabelString(const CPubdesc &pd, string &label)

static void s_GetGeneTextLabel(const CSeq_feat &feat, string &label)

static vector< int > s_LocationToStartStopPairs(const CSeq_loc &loc)

static void GetDateString(string &out_date_str, const CDate &date)

static bool s_SeqIdCompare(const CConstRef< CSeq_id > &q1, const CConstRef< CSeq_id > &q2)

static bool s_IsSwissProt(const CBioseq &seq)

bool s_FieldHasNonBlankValue(const CUser_field &field)

static bool s_IsUnspecified(const CSeq_gap &gap)

static bool s_SuppressMultipleEquivBioSources(const CBioSource &src)

bool s_OverlapOrAbut(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)

static bool x_IsWgsSecondary(const CBioseq &seq)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4