A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/single__feat__validator_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/validator/single_feat_validator.cpp Source File

57 using namespace

sequence;

62

: m_Feat(feat),

m_Scope

(scope), m_Imp(imp), m_ProductIsFar(

false

)

72  "The feature is missing a location"

);

77  bool

lowerSev =

false

;

81  const CDbtag

& dbtag = **it;

82  if

( dbtag.

GetDb

() ==

"dbSNP"

) {

90  "Location"

,

m_Feat

, lowerSev);

131  "Inference or experiment qualifier missing but obsolete experimental evidence qualifier set"

);

157  if

(loc.IsInt() || loc.IsWhole()) {

162  for

(CSeq_loc_CI citer(loc); citer; ++citer) {

163  const CSeq_id

& this_id = citer.GetSeq_id();

164  if

(!

prev

|| !

prev

->Equals(this_id)) {

169  prev

.Reset(&this_id);

183  switch

(sid.

Which

()) {

196  "Feature product should not put an accession in the Textseq-id 'name' slot"

);

199  "Feature product should not use " 200  "Textseq-id 'name' slot"

);

215  if

(id->Which() == sid.

Which

()) {

217  string

from_seq =

id

->AsFastaString();

222  "Capitalization change from product location on feature to product sequence"

);

225  switch

(id->Which()) {

238  "Protein bioseq has Textseq-id 'name' that " 239  "looks like it is derived from a nucleotide " 243  "Protein bioseq has Textseq-id 'name' and no accession"

);

260  bool

is_seqloc_bond =

false

;

264  for

(CSeq_loc_CI it(feat.

GetLocation

()); it; ++it) {

265  if

(it.GetEmbeddingSeq_loc().IsBond()

266

&& (!it.GetEmbeddingSeq_loc().GetBond().IsSetA()

267

|| it.GetEmbeddingSeq_loc().GetBond().IsSetB())) {

268

is_seqloc_bond =

true

;

273  for

(CSeq_loc_CI it(feat.

GetLocation

()); it; ++it) {

274  if

(it.GetEmbeddingSeq_loc().IsBond()) {

275

is_seqloc_bond =

true

;

281  for

(CSeq_loc_CI it(feat.

GetLocation

()); it; ++it) {

282  if

(it.GetEmbeddingSeq_loc().IsBond()) {

283

is_seqloc_bond =

true

;

288  return

is_seqloc_bond;

299  if

(both || both_rev) {

301  if

(both && both_rev) {

302

suffix =

"(forward and reverse)"

;

304

suffix =

"(forward)"

;

305

}

else if

(both_rev) {

306

suffix =

"(reverse)"

;

312  label

+

" may not be on both "

+ suffix +

" strands"

);

321  for

(CSeq_loc_CI it(loc); it; ++it) {

322  if

(it.IsSetStrand()) {

330  if

(both && both_rev) {

339

has_parent_gene_id =

false

;

345

has_parent_gene_id =

true

;

346  if

((*it)->IsSetTag() && (*it)->GetTag().Equals(

tag

)) {

378  bool

has_parent_gene_id =

false

;

379  if

(!

HasGeneIdXref

(parent, (*it)->GetTag(), has_parent_gene_id)) {

380  if

(has_parent_gene_id ||

386

parent = feat_tree->GetParent(parent);

403  if

((*pi)->IsEquiv()) {

405  "Citation on feature has unexpected internal Pub-equiv"

);

415  "empty inference string"

,

416  "bad inference prefix"

,

417  "bad inference body"

,

418  "single inference field"

,

419  "spaces in inference"

,

420  "possible comment in inference"

,

421  "same species misused"

,

422  "the value in the accession field is not legal. The only allowed value is accession.version, eg AF123456.1. Problem ="

,

423  "bad inference accession version"

,

424  "accession.version not public"

,

425  "bad accession type"

,

426  "unrecognized database"

,

441  "Qualifier other than replace has just quotation marks"

);

449  "Inference qualifier problem - empty inference string ()"

);

457

qual.

GetVal

() +

" is not in proper EC_number format"

);

459  string

ec_number = qual.

GetVal

();

465  "EC_number "

+ ec_number +

" was deleted"

);

470  "EC_number "

+ ec_number +

" was replaced"

);

475  if

(pos == string::npos || !

isdigit

(ec_number.c_str()[pos + 1])) {

477

ec_number +

" is not a legal value for qualifier EC_number"

);

480

ec_number +

" is not a legal preliminary value for qualifier EC_number"

);

509  "/pseudogene value should not be '"

+ qual.

GetVal

() +

"'"

,

m_Feat

);

512  bool

has_space =

false

;

513  bool

has_char_after_space =

false

;

515  if

(

isspace

((

unsigned char

)(*it))) {

517

}

else if

(has_space) {

519

has_char_after_space =

true

;

523  if

(has_char_after_space) {

525  "Number qualifiers should not contain spaces"

);

530  "feature qualifier "

+ qual.

GetVal

() +

" has SGML"

);

543  "Unable to find EC number file 'ecnum_ambiguous.txt' in data directory"

);

547  "Unable to find EC number file 'ecnum_deleted.txt' in data directory"

);

551  "Unable to find EC number file 'ecnum_replaced.txt' in data directory"

);

555  "Unable to find EC number file 'ecnum_specific.txt' in data directory"

);

564  for

(

auto

it : errors) {

566

it.first, it.second);

593  "Feature comment may refer to reference by serial number - " 594  "attach reference specific comments to the reference " 595  "REMARK instead."

,

m_Feat

);

599  "feature comment "

+ comment +

" has SGML"

,

627  "On partial Bioseq, SeqFeat.partial should be TRUE"

);

630  else if

(is_partial &&

639  "When SeqFeat.product is a partial Bioseq, SeqFeat.location " 640  "should also be partial"

);

648  "Gene of 'order' with otherwise complete location should " 649  "have partial flag set"

);

655  bool

is_far_fail =

false

;

663  string str

(

"Inconsistent: Product= complete, Location= "

);

665  str

+=

"Feature.partial= "

;

666  str

+= is_partial ?

"TRUE"

:

"FALSE"

;

669

}

else if

(is_far_fail) {

677  string str

(

"Inconsistent: "

);

682  str

+=

"Location= "

;

684  str

+=

"Feature.partial= "

;

685  str

+= is_partial ?

"TRUE"

:

"FALSE"

;

698  "5' or 3' partial location should not have unclassified" 699  " partial in product molinfo descriptor"

);

714  "Bond location should only be on bond features"

);

719  string

prefix =

"Feature"

;

736  string

loc_id = os.str();

738  if

((*it)->IsGi() || (*it)->IsGibbsq() || (*it)->IsGibbmt()) {

742

(*it)->WriteAsFasta(os2);

743  string

bs_id = os2.str();

746  "Sequence identifier in feature location differs in capitalization with identifier on Bioseq"

);

754  "Feature on protein indicates negative strand"

);

761

vector<TSeqPos> gap_starts;

767  "Feature contains more than 50% Ns"

);

769  for

(

auto

gap_start : gap_starts) {

776  "Feature inside sequence gap"

);

781  "Internal interval begins or ends in gap"

);

785  "Feature crosses gap of unknown length"

);

790  string

(

"Exception while checking for intervals in gaps. EXCEPTION: "

) +

792

}

catch

(

const

std::exception&) {

837  while

(map_iter && pos <= stop) {

840  for

(; pos < map_end && pos <= stop; pos++) {

899  if

( (*it)->IsLoc() ) {

920  int

num_unknown_gap = 0;

921  bool

first_in_gap =

false

, last_in_gap =

false

;

922  bool

local_first_gap =

false

, local_last_gap =

false

;

923  bool

startsOrEndsInGap =

false

;

926  for

(CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {

929  if

(!vec.

empty

()) {

933  if

(id_it->Equals(loc_it.GetSeq_id())) {

948

local_first_gap =

false

;

949

local_last_gap =

false

;

954

string::iterator it = vec_data.begin();

955  while

(it != vec_data.end() && pos <

len

) {

956  bool

is_gap =

false

;

957  bool

unknown_length =

false

;

963

unknown_length =

true

;

970

unknown_length =

true

;

976

local_first_gap =

true

;

977

}

else if

(pos ==

len

- 1) {

978

local_last_gap =

true

;

980  if

(unknown_length) {

985

}

else if

(*it ==

'N'

) {

1002

first_in_gap = local_first_gap;

1005

last_in_gap = local_last_gap;

1006  if

(local_first_gap || local_last_gap) {

1007

startsOrEndsInGap =

true

;

1011  if

(num_real == 0 && num_n == 0) {

1021  if

(num_gap == 0 && num_unknown_gap == 0 && num_n == 0) {

1023

}

else if

(first_in_gap || last_in_gap) {

1028

gap_starts.push_back(gap_start);

1032

}

else if

(num_real == 0 && num_gap == 0 && num_unknown_gap == 0 && num_n >= 50) {

1034

}

else if

(startsOrEndsInGap) {

1036

}

else if

(num_unknown_gap > 0) {

1060  if

((*it)->IsLiteral()) {

1061  len

= (*it)->GetLiteral().GetLength();

1062

}

else if

((*it)->IsLoc()) {

1083  for

(CSeq_loc_CI loc_it(loc); loc_it; ++loc_it) {

1086  if

(!vec.

empty

()) {

1088  bool match

=

false

;

1090  if

(id_it->Equals(loc_it.GetSeq_id())) {

1106

string::iterator it = vec_data.begin();

1107  while

(it != vec_data.end()) {

1115  if

((

unsigned

)(*it + 1) <= 256 &&

isalpha

(*it)) {

1123

}

catch

(

const

std::exception& ) {

1128  return

(num_n > real_bases);

1139  const CSeq_id

* protid =

nullptr

;

1164  if

(!prot_handle && look_far) {

1177  bool

look_far =

false

;

1198  "Exception text is present, but exception flag is not set"

);

1202  "Exception flag is set, but exception text is empty"

);

1212  if

(

text

.empty())

return

;

1215  bool

found =

false

;

1219  bool

reasons_in_cit =

false

;

1220  bool

annotated_by_transcript_or_proteomic =

false

;

1221  bool

redundant_with_comment =

false

;

1222  bool

refseq_except =

false

;

1223

vector<string> exceptions;

1225  ITERATE

(vector<string>, it, exceptions) {

1235

reasons_in_cit =

true

;

1237

annotated_by_transcript_or_proteomic =

true

;

1242  bool

check_refseq =

false

;

1244

check_refseq =

true

;

1246

check_refseq =

true

;

1249  if

((*id_it)->IsOther()) {

1250

check_refseq =

true

;

1259

refseq_except =

true

;

1272  str

+

" is not a legal exception explanation"

);

1279

redundant_with_comment =

true

;

1281

redundant_with_comment =

true

;

1285  if

(redundant_with_comment) {

1287  "Exception explanation text is also found in feature comment"

);

1289  if

(refseq_except) {

1292  if

(!found_just_the_exception) {

1294  "Genome processing exception should not be combined with other explanations"

);

1300  "Reasons given in citation exception does not have the required citation"

);

1302  if

(annotated_by_transcript_or_proteomic) {

1303  bool

has_inference =

false

;

1306

has_inference =

true

;

1310  if

(!has_inference) {

1312  "Annotated by transcript or proteomic data exception does not have the required inference qualifier"

);

1336  bool

is_imp =

false

;

1360  const string

& qual_str = gbq->GetQual();

1366  auto

gbqual = gbqual_and_value.first;

1371

qual_str +

" is improperly capitalized"

);

1382  "Unknown qualifier "

+ qual_str);

1397  if

(

NStr::Equal

(qual_str,

"orig_transcript_id"

)) {

1401  if

(

NStr::Equal

(qual_str,

"orig_transcript_id"

)) {

1411  "Wrong qualifier "

+ qual_str +

" for feature "

+

1417  "feat_class qualifier is only legal for RefSeq"

);

1422  const string

&

val

= gbq->GetVal();

1428  "Compound '"

+

val

+

"' must be split into separate instances of qualifier "

+ qual_str);

1432  val

+

" is not a legal value for qualifier "

+ qual_str);

1462  val

+

" is not a legal value for qualifier "

+ qual_str);

1474  "Vector Contamination region should be trimmed from sequence"

);

1483  "A product qualifier is not used on a gene feature"

);

1491  "locus-tag values should be on genes"

);

1504  bool

multiple_rpt_unit =

false

;

1508

}

else if

( *it ==

'('

|| *it ==

')'

||

1509

*it ==

','

|| *it ==

'.'

||

1510  isdigit

((

unsigned char

)(*it)) ) {

1511

multiple_rpt_unit =

true

;

1521

!multiple_rpt_unit ) {

1523  bool

just_nuc_letters =

true

;

1525  'A'

,

'C'

,

'G'

,

'T'

,

'N'

,

'a'

,

'c'

,

'g'

,

't'

,

'n' 1528  if

( nuc_letters.find(*it) == nuc_letters.end() ) {

1529

just_nuc_letters =

false

;

1533  if

( just_nuc_letters ) {

1535  if

( !vec.

empty

() ) {

1540  "repeat_region /rpt_unit and underlying " 1541  "sequence do not match"

);

1547  "Length of rpt_unit_seq is greater than feature length"

);

1559  const char

*cp =

val

.c_str();

1560  bool

badchars =

false

;

1561  while

(*cp != 0 && !badchars) {

1564

}

else if

(*cp !=

'('

&& *cp !=

')' 1566

&& *cp !=

','

&& *cp !=

';'

) {

1573  "/rpt_unit_seq has illegal characters"

);

1581  if

(

str

.length() > 25) {

1585  if

(pos == string::npos) {

1589  int

tmp_from, tmp_to;

1597

}

catch

(

const

std::exception& ) {

1600  if

(tmp_from < 0 || tmp_to < 0) {

1612  "/rpt_unit_range is not a base range"

);

1615  if

(from - 1 < range.

GetFrom

() || from - 1> range.

GetTo

() || to - 1 < range.

GetFrom

() || to - 1 > range.

GetTo

()) {

1617  "/rpt_unit_range is not within sequence length"

);

1619  bool

nulls_between =

false

;

1622

nulls_between =

true

;

1625  if

(nulls_between) {

1626  bool

in_range =

false

;

1628

range = it.GetEmbeddingSeq_loc().GetTotalRange();

1629  if

(from - 1 < range.

GetFrom

() || from - 1> range.

GetTo

() || to - 1 < range.

GetFrom

() || to - 1 > range.

GetTo

()) {

1636  "/rpt_unit_range is not within ordered intervals"

);

1646  bool

only_digits =

true

,

1647

has_spaces =

false

;

1650  if

(

isspace

((

unsigned char

)(*it)) ) {

1653  if

( !

isdigit

((

unsigned char

)(*it)) ) {

1654

only_digits =

false

;

1657  if

(only_digits || has_spaces) {

1669  val

+

" accession missing version for qualifier compare"

);

1672  val

+

" accession has bad version for qualifier compare"

);

1675  val

+

" is not a legal accession for qualifier compare"

);

1678  "RefSeq accession "

+

val

+

" cannot be used for qualifier compare"

);

1686  const char

*src =

str

.c_str();

1687  const char

*find = consist.c_str();

1690  while

(*src != 0 && rval) {

1691  if

(strchr (find, *src) ==

NULL

) {

1707  val

+

" is not a legal value for qualifier "

+ qual_str

1708

+

" - should only be composed of acgt unambiguous nucleotide bases"

);

1712  val

+

" is not a legal value for qualifier "

+ qual_str

1713

+

" - should only be composed of acgtmrwsykvhdbn nucleotide bases"

);

1718  val

+

" is not a legal value for qualifier "

+ qual_str

1719

+

" - should only be composed of acdefghiklmnpqrstuvwy* amino acids"

);

1724  bool

has_fuzz =

false

;

1725  for

( objects::CSeq_loc_CI it(

m_Feat

.

GetLocation

()); it && !has_fuzz; ++it) {

1726  if

(it.IsPoint() && (it.GetFuzzFrom() || it.GetFuzzTo())) {

1737  "/replace already matches underlying sequence ("

+

val

+

")"

);

1740

}

catch

(

const

std::exception& ) {

1751

field_name +

" contains undesired character"

);

1755

field_name +

" ends with undesired character"

);

1760

field_name +

" ends with hyphen"

);

1781  "feature has exception but passes splice site test"

);

1802  "Bad sequence at splice donor after exon ending at position " 1806  "Splice donor consensus (GT) not found after exon ending at position " 1817  "Bad sequence at splice acceptor before exon starting at position " 1821  "Splice acceptor consensus (AG) not found before exon starting at position " 1832  for

(

auto

it = donor_problems.begin(); it != donor_problems.end(); it++) {

1836  for

(

auto

it = acceptor_problems.begin(); it != acceptor_problems.end(); it++) {

1846  if

((*it)->IsOther() && (*it)->GetOther().IsSetAccession()

1870  bool

found =

false

;

1887  if

((*it)->IsOther()) {

1919  " for feature "

+

key

);

1930  if

(strand1 == strand2) {

1958  "Gene cross-reference is not on expected strand"

);

1966  bool

equivalent =

false

;

1984

g2.

GetSyn

().front())) {

2006  bool

has_gene_id_xref =

false

;

2009  if

((*xref)->IsSetId() && (*xref)->GetId().IsLocal()) {

2012  if

(gene_feats.size() > 0) {

2013

has_gene_id_xref =

true

;

2021  if

(has_gene_id_xref) {

2033  size_t

num_genes = 0;

2035  size_t

num_trans_spliced = 0;

2036  bool

equivalent =

false

;

2047  string label

=

"?"

;

2048  size_t

num_match_by_locus = 0;

2049  size_t

num_match_by_locus_tag = 0;

2051  for

( ; gene_it; ++gene_it) {

2052  if

(gene_xref && gene_xref->

IsSetLocus

() &&

2055

num_match_by_locus++;

2061

num_match_by_locus_tag++;

2067  "Feature has Gene Xref with locus_tag but no locus, gene with locus_tag and locus exists"

);

2074  if

(

len

<

max

|| num_genes == 0) {

2077

num_trans_spliced = 0;

2080

num_trans_spliced++;

2082

equivalent =

false

;

2083

prev_gene = gene_it;

2084

}

else if

(

len

==

max

) {

2089

num_trans_spliced++;

2098  if

(num_genes > 1 &&

2103

}

else if

(equivalent) {

2105  "Feature overlapped by " 2107

+

" identical-length equivalent genes but has no cross-reference"

);

2110  "Feature overlapped by " 2112

+

" identical-length genes but has no cross-reference"

);

2114

}

else if

(num_genes == 1

2120  const CGb_qual

& qual = **qual_iter;

2126  "Redundant allele qualifier ("

+ allele +

2127  ") on gene and feature"

);

2130  "Mismatched allele qualifier on gene ("

+ allele +

2131  ") and feature ("

+ qual.

GetVal

() +

")"

);

2142  const string

& allele = gene_xref->

GetAllele

();

2145  const CGb_qual

& qual = **qual_iter;

2151  "Redundant allele qualifier ("

+ allele +

2152  ") on gene and feature"

);

2155  "Mismatched allele qualifier on gene ("

+ allele +

2156  ") and feature ("

+ qual.

GetVal

() +

")"

);

2162  if

(num_match_by_locus == 0 && num_match_by_locus_tag == 0) {

2174  const CSeq_id

*

id

= loc.GetId();

2192  "Feature has gene locus_tag cross-reference but no equivalent gene feature exists"

);

2197  "Feature has gene locus cross-reference but no equivalent gene feature exists"

);

2216  if

(it->IsSetQual() &&

NStr::Equal

(it->GetQual(),

"old_locus_tag"

)

2239  for

(

auto

it : feat.

GetQual

()) {

2279  string

gene_old_locus_tag;

2282  if

((*it)->IsSetQual() &&

NStr::Equal

((*it)->GetQual(),

"old_locus_tag"

)

2283

&& (*it)->IsSetVal() && !

NStr::IsBlank

((*it)->GetVal())) {

2284

gene_old_locus_tag = (*it)->GetVal();

2291  "Old locus tag on feature ("

+ old_locus_tag

2292

+

") does not match that on gene ("

+ gene_old_locus_tag +

")"

);

2303  "old_locus_tag without inherited locus_tag"

);

2318  if

( imp_loc.find(

"one-of"

) != string::npos ) {

2320  "ImpFeat loc "

+ imp_loc +

2321  " has obsolete 'one-of' text for feature "

+

key

);

2326  if

( imp_loc != temp_loc ) {

2328  "ImpFeat loc "

+ imp_loc +

" does not equal feature location "

+

2329

temp_loc +

" for feature "

+

key

);

2346  bool

found =

false

;

2362  if

((*it)->IsOther()) {

2385  " for feature "

+

key

);

2429  if

((*it)->IsOther() && (*it)->GetTextseq_Id()->IsSetAccession()

2448  bool

has_sfp_pseudo =

false

;

2449  bool

has_gene_pseudo =

false

;

2452  if

(it->IsSetQual() &&

2455

sfp_pseudo = it->GetVal();

2456

has_sfp_pseudo =

true

;

2461  for

(

auto

it : gene->

GetQual

()) {

2462  if

(it->IsSetQual() &&

2465

gene_pseudo = it->GetVal();

2466

has_gene_pseudo =

true

;

2471  if

(!has_sfp_pseudo && !has_gene_pseudo) {

2473

}

else if

(!has_sfp_pseudo) {

2475

}

else if

(has_sfp_pseudo && !has_gene_pseudo) {

2477  msg

+=

" has pseudogene qualifier, gene does not"

;

2481  string msg

=

"Different pseudogene values on "

;

2483  msg

+=

" ("

+ sfp_pseudo +

") and gene ("

+ gene_pseudo +

")"

;

2534  "Gene locus_tag does not match general ID of product"

);

2545  for

(

char

ch : src) {

2546  unsigned char

chu = ch;

2547  if

(chu > 31 && chu < 128) {

2563  const string

&

str

= *it;

2565  const char

& ch = *c_it;

2566  unsigned char

chu = ch;

2567  if

(ch > 127 || (ch < 32 && ch !=

'\t'

&& ch !=

'\r'

&& ch !=

'\n'

)) {

2584  for

(

auto

it :

prot

.GetName()) {

2585  if

(

prot

.IsSetEc() && !

prot

.IsSetProcessed()

2591  "Unknown or hypothetical protein should not have EC number"

);

2598  "protein description "

+

prot

.GetDesc() +

" has SGML"

);

2604  "Comment has same value as protein description"

);

2609  "Apparent EC number in protein comment"

);

2616  if

(

prot

.IsSetName() &&

prot

.GetName().size() > 0) {

2619  "Apparent EC number in protein title"

);

2624  if

(

prot

.CanGetDb () ) {

2627  if

( (!

prot

.IsSetName() ||

prot

.GetName().empty()) &&

2628

(!

prot

.IsSetProcessed()

2633  "Protein feature has description but no name"

);

2634

}

else if

(

prot

.IsSetActivity() && !

prot

.GetActivity().empty()) {

2636  "Protein feature has function but no name"

);

2637

}

else if

(

prot

.IsSetEc() && !

prot

.GetEc().empty()) {

2639  "Protein feature has EC number but no name"

);

2642  "Protein feature has no name"

);

2657  if

(

prot

.IsSetProcessed() ) {

2658

processed =

prot

.GetProcessed();

2664  if

(

prot

.IsSetName() &&

2665

!

prot

.GetName().empty() &&

2666

!

prot

.GetName().front().empty() ) {

2669  if

(

prot

.CanGetDesc() && !

prot

.GetDesc().empty() ) {

2672  if

(

prot

.CanGetEc() && !

prot

.GetEc().empty() ) {

2675  if

(

prot

.CanGetActivity() && !

prot

.GetActivity().empty() ) {

2678  if

(

prot

.CanGetDb() && !

prot

.GetDb().empty() ) {

2684  "There is a protein feature where all fields are empty"

);

2693  "'hypothetical protein"

,

2696  "alternatively spliced"

,

2697  "bacteriophage hypothetical protein"

,

2700  "cnserved hypothetical protein"

,

2701  "conesrved hypothetical protein"

,

2702  "conserevd hypothetical protein"

,

2703  "conserved archaeal protein"

,

2704  "conserved domain protein"

,

2705  "conserved hypohetical protein"

,

2706  "conserved hypotehtical protein"

,

2707  "conserved hypotheical protein"

,

2708  "conserved hypothertical protein"

,

2709  "conserved hypothetcial protein"

,

2710  "conserved hypothetical"

,

2711  "conserved hypothetical exported protein"

,

2712  "conserved hypothetical integral membrane protein"

,

2713  "conserved hypothetical membrane protein"

,

2714  "conserved hypothetical phage protein"

,

2715  "conserved hypothetical prophage protein"

,

2716  "conserved hypothetical protein"

,

2717  "conserved hypothetical protein - phage associated"

,

2718  "conserved hypothetical protein fragment 3"

,

2719  "conserved hypothetical protein, fragment"

,

2720  "conserved hypothetical protein, putative"

,

2721  "conserved hypothetical protein, truncated"

,

2722  "conserved hypothetical protein, truncation"

,

2723  "conserved hypothetical protein."

,

2724  "conserved hypothetical protein; possible membrane protein"

,

2725  "conserved hypothetical protein; putative membrane protein"

,

2726  "conserved hypothetical proteins"

,

2727  "conserved hypothetical protien"

,

2728  "conserved hypothetical transmembrane protein"

,

2729  "conserved hypotheticcal protein"

,

2730  "conserved hypthetical protein"

,

2731  "conserved in bacteria"

,

2732  "conserved membrane protein"

,

2733  "conserved protein"

,

2734  "conserved protein of unknown function"

,

2735  "conserved protein of unknown function ; putative membrane protein"

,

2736  "conserved unknown protein"

,

2737  "conservedhypothetical protein"

,

2738  "conserverd hypothetical protein"

,

2739  "conservered hypothetical protein"

,

2740  "consrved hypothetical protein"

,

2741  "converved hypothetical protein"

,

2745  "duplicated hypothetical protein"

,

2750  "homeodomain protein"

,

2752  "hyopthetical protein"

,

2754  "hypotheical protein"

,

2755  "hypothertical protein"

,

2756  "hypothetcical protein"

,

2758  "hypothetical protein"

,

2759  "hypothetical conserved protein"

,

2760  "hypothetical exported protein"

,

2761  "hypothetical novel protein"

,

2762  "hypothetical orf"

,

2763  "hypothetical phage protein"

,

2764  "hypothetical prophage protein"

,

2765  "hypothetical protein (fragment)"

,

2766  "hypothetical protein (multi-domain)"

,

2767  "hypothetical protein (phage associated)"

,

2768  "hypothetical protein - phage associated"

,

2769  "hypothetical protein fragment"

,

2770  "hypothetical protein fragment 1"

,

2771  "hypothetical protein predicted by genemark"

,

2772  "hypothetical protein predicted by glimmer"

,

2773  "hypothetical protein predicted by glimmer/critica"

,

2774  "hypothetical protein, conserved"

,

2775  "hypothetical protein, phage associated"

,

2776  "hypothetical protein, truncated"

,

2777  "hypothetical protein-putative conserved hypothetical protein"

,

2778  "hypothetical protein."

,

2779  "hypothetical proteins"

,

2780  "hypothetical protien"

,

2781  "hypothetical transmembrane protein"

,

2782  "hypothetoical protein"

,

2783  "hypothteical protein"

,

2784  "identified by sequence similarity; putative; orf located~using blastx/framed"

,

2785  "identified by sequence similarity; putative; orf located~using blastx/glimmer/genemark"

,

2787  "membrane protein, putative"

,

2789  "narrowly conserved hypothetical protein"

,

2792  "orf, conserved hypothetical protein"

,

2793  "orf, hypothetical"

,

2794  "orf, hypothetical protein"

,

2795  "orf, hypothetical, fragment"

,

2796  "orf, partial conserved hypothetical protein"

,

2797  "orf; hypothetical protein"

,

2798  "orf; unknown function"

,

2800  "partial cds, hypothetical"

,

2801  "partially conserved hypothetical protein"

,

2802  "phage hypothetical protein"

,

2803  "phage-related conserved hypothetical protein"

,

2804  "phage-related protein"

,

2806  "possible hypothetical protein"

,

2808  "predicted coding region"

,

2809  "predicted protein"

,

2810  "predicted protein (pseudogene)"

,

2811  "predicted protein family"

,

2812  "product uncharacterised protein family"

,

2814  "protein of unknown function"

,

2817  "putative conserved protein"

,

2818  "putative exported protein"

,

2819  "putative hypothetical protein"

,

2820  "putative membrane protein"

,

2821  "putative orf; unknown function"

,

2822  "putative phage protein"

,

2823  "putative protein"

,

2825  "repeats containing protein"

,

2827  "ribosomal protein"

,

2830  "small hypothetical protein"

,

2831  "transmembrane protein"

,

2834  "trp-repeat protein"

,

2835  "truncated conserved hypothetical protein"

,

2836  "truncated hypothetical protein"

,

2837  "uncharacterized conserved membrane protein"

,

2838  "uncharacterized conserved protein"

,

2839  "uncharacterized conserved secreted protein"

,

2840  "uncharacterized protein"

,

2841  "uncharacterized protein conserved in archaea"

,

2842  "uncharacterized protein conserved in bacteria"

,

2843  "unique hypothetical"

,

2844  "unique hypothetical protein"

,

2847  "unknown function"

,

2850  "unknown, conserved protein"

,

2851  "unknown, hypothetical"

,

2852  "unknown-related protein"

,

2853  "unknown; predicted coding region"

,

2855  "unnamed protein product"

,

2856  "very hypothetical protein" 2868  if

(!

prot

.IsSetName()) {

2869  if

(!

prot

.IsSetProcessed() ||

2873  "Protein name is not set"

);

2880  if

(search.empty()) {

2882  "Protein name is empty"

);

2883

}

else if

(sc_BadProtName.find (search.c_str()) != sc_BadProtName.end()

2891  "Uninformative protein name '"

+ it +

"'"

);

2907

(it) +

" is not in proper EC_number format"

);

2909  const string

& ec_number = it;

2915  "EC_number "

+ ec_number +

" was deleted"

);

2920  "EC_number "

+ ec_number +

" was transferred and is no longer valid"

);

2925  if

(pos == string::npos || !

isdigit

(ec_number.c_str()[pos + 1])) {

2927

ec_number +

" is not a legal value for qualifier EC_number"

);

2930

ec_number +

" is not a legal preliminary value for qualifier EC_number"

);

2946  bool

report_name =

true

;

2948  if

(pos == string::npos) {

2950

}

else if

(prot_name.length() - pos < 5) {

2953

report_name =

false

;

2958  "Protein name ends with bracket and may contain organism name"

);

2964  if

(id_it->IsOther()

2965

&& id_it->GetOther().IsSetAccession()

2967

prot_name.substr(21))) {

2969  "Hypothetical protein reference does not match accession"

);

2978  "Comment has same value as protein name"

);

2983  "Protein name has internal PMID"

);

2989

&&

NStr::FindCase

(prot_name,

"methyltransferase"

) == string::npos

2991  if

(

NStr::EqualNocase

(prot_name,

"ribulose-1,5-bisphosphate carboxylase/oxygenase"

)) {

2993

}

else if

(!

NStr::EqualNocase

(prot_name,

"ribulose-1,5-bisphosphate carboxylase/oxygenase large subunit"

)

2994

&& !

NStr::EqualNocase

(prot_name,

"ribulose-1,5-bisphosphate carboxylase/oxygenase small subunit"

)) {

2996  "Nonstandard ribulose bisphosphate protein name"

);

3006  "protein name "

+ prot_name +

" has SGML"

);

3023  if

(!

prot

)

return

;

3026  if

(! mi_i)

return

;

3031  const

CSeq_loc& prot_loc =

prot

->GetLocation();

3035  bool

conflict =

false

;

3050  "Molinfo completeness and protein feature partials conflict"

);

3061  if

(

rna

.IsSetType()) {

3062

rna_type =

rna

.GetType();

3066  if

(

rna

.CanGetExt() &&

rna

.GetExt().IsName()) {

3067  const string

& rna_name =

rna

.GetExt().GetName();

3071  "rRNA name "

+ rna_name +

" has SGML"

);

3080  bool

pseudo = feat_pseudo;

3102

rna_typename +

" has no name"

);

3110  "RNA type 0 (unknown) not supported"

);

3130  "A pseudo RNA should not have a product"

);

3131

}

else if

(pseudo) {

3133  "An RNA overlapped by a pseudogene should not have a product"

);

3180  "Type of RNA does not match MolInfo of product Bioseq"

);

3214  "tRNA data structure on non-tRNA feature"

);

3221  if

( anticodon_len != 3 ) {

3223  "Anticodon is not 3 bases in length"

);

3231  "Anticodon location not in tRNA"

);

3252  "Unparsed anticodon qualifier in tRNA"

);

3258  "Unparsed product qualifier in tRNA"

);

3265  if

(

rna

.IsSetExt() &&

3268  "Unparsed product qualifier in tRNA"

);

3271  "Missing encoded amino acid qualifier in tRNA"

);

3276  bool

isLessThan100 =

false

;

3278

CSeq_loc_CI li(loc);

3280  TSeqPos

last_start = li.GetRange().GetFrom();

3281  TSeqPos

last_stop = li.GetRange().GetTo();

3283

last_id->

Assign

(li.GetSeq_id());

3287  TSeqPos

this_start = li.GetRange().GetFrom();

3288  TSeqPos

this_stop = li.GetRange().GetTo();

3289  if

(

abs

((

int

)this_start - (

int

)last_stop) < 100 ||

abs

((

int

)this_stop - (

int

)last_start) < 100) {

3290  if

(li.GetSeq_id().Equals(*last_id)) {

3292

isLessThan100 =

true

;

3298  for

(

auto

id_it : last_bsh.

GetId

()) {

3299  if

(id_it.GetSeqId()->Equals(li.GetSeq_id())) {

3300

isLessThan100 =

true

;

3307

last_start = this_start;

3308

last_stop = this_stop;

3309

last_id->

Assign

(li.GetSeq_id());

3315  if

( grp ==

NULL

) {

3324  if

( !pseudo && grp !=

NULL

) {

3328  if

(isLessThan100 && ! pseudo) {

3334  if

(

source

.IsSetLineage()) {

3335  string

lineage =

source

.GetLineage();

3338  "tRNA intron in bacteria is less than 100 bp"

);

3349  bool

ordered =

true

;

3350  bool

adjacent =

false

;

3351  bool

unmarked_strand =

false

;

3352  bool

mixed_strand =

false

;

3355  for

(CSeq_loc_CI curr(anticodon); curr; ++curr) {

3357  if

(curr.GetEmbeddingSeq_loc().IsInt()) {

3359

}

else if

(curr.GetEmbeddingSeq_loc().IsPnt()) {

3367

curr.GetEmbeddingSeq_loc().GetLabel(&lbl);

3369  "Anticodon location ["

+ lbl +

"] out of range"

);

3372  if

(

prev

&& curr &&

3378  if

(prev_range.

GetTo

() < curr_range.

GetTo

()) {

3381  if

(curr_range.

GetTo

() + 1 == prev_range.

GetFrom

()) {

3385  if

(prev_range.

GetTo

() > curr_range.

GetTo

()) {

3388  if

(prev_range.

GetTo

() + 1 == curr_range.

GetFrom

()) {

3395  if

( curr_range == prev_range && curr_strand == prev_strand ) {

3397  "Duplicate anticodon exons in location"

);

3399  if

( curr_strand != prev_strand ) {

3401

unmarked_strand =

true

;

3403

unmarked_strand =

true

;

3405

mixed_strand =

true

;

3413  "Adjacent intervals in Anticodon"

);

3417  ENa_strand

ac_strand = anticodon.GetStrand();

3420  "Anticodon strand and tRNA strand do not match."

);

3423  "Anticodon strand and tRNA strand do not match."

);

3427  bool

trans_splice =

false

;

3430

trans_splice =

true

;

3433  if

(!trans_splice) {

3435

anticodon.GetLabel(&loc_lbl);

3438  "Mixed strands in Anticodon ["

+ loc_lbl +

"]"

);

3440  if

(unmarked_strand) {

3442  "Mixed plus and unknown strands in Anticodon ["

+ loc_lbl +

"]"

);

3446  "Intervals out of order in Anticodon ["

+ loc_lbl +

"]"

);

3452 int s_LegalNcbieaaValues

[] = { 42, 65, 66, 67, 68, 69, 70, 71, 72, 73,

3453

74, 75, 76, 77, 78, 79, 80, 81, 82, 83,

3454

84, 85, 86, 87, 88, 89, 90 };

3457  "---"

,

"Ala"

,

"Asx"

,

"Cys"

,

"Asp"

,

"Glu"

,

"Phe"

,

"Gly"

,

"His"

,

"Ile"

,

3458  "Lys"

,

"Leu"

,

"Met"

,

"Asn"

,

"Pro"

,

"Gln"

,

"Arg"

,

"Ser"

,

"Thr"

,

3459  "Val"

,

"Trp"

,

"OTHER"

,

"Tyr"

,

"Glx"

,

"Sec"

,

"TERM"

,

"Pyl"

,

"Xle" 3473

}

catch

(

const

std::exception& ) {

3482  const

list<CRef<CGenetic_code> >& codes = code_table.

Get

();

3484  for

( list<

CRef<CGenetic_code>

>::const_iterator code_it = codes.begin(), code_it_end = codes.end(); code_it != code_it_end; ++code_it ) {

3485  if

((*code_it)->GetId() == gcode) {

3486  return

(*code_it)->GetName();

3507  unsigned char

aa = 0, orig_aa;

3508

vector<char> seqData;

3537  bool

found =

false

;

3551  bool

mustbemethionine =

false

;

3556

mustbemethionine =

true

;

3560  if

(mustbemethionine) {

3564  "Initiation tRNA claims to be tRNA-"

+ aanm +

3565  ", but should be tRNA-Met"

);

3581  if

( ncbieaa.length() != 64 ) {

3589  string

aaname =

buf

;

3595  bool

modified_codon_recognition =

false

;

3596  bool

rna_editing =

false

;

3600

modified_codon_recognition =

true

;

3603

rna_editing =

true

;

3607

vector<string> recognized_codon_values;

3608

vector<unsigned char> recognized_taa_values;

3611  if

(*iter == 255)

continue

;

3616  " is greater than maximum 63"

);

3618

}

else if

(*iter < 0) {

3621  " is less than 0"

);

3625  if

( !modified_codon_recognition && !rna_editing ) {

3626  unsigned char taa

= ncbieaa[*iter];

3628

recognized_codon_values.push_back (codon);

3629

recognized_taa_values.push_back (

taa

);

3632  if

( (aa ==

'U'

) && (

taa

==

'*'

) && (*iter == 14) ) {

3640  "Codon recognized by tRNA ("

+ codon +

") does not match amino acid (" 3641

+ aaname +

") specified by genetic code (" 3649  string

anticodon =

"?"

;

3650

vector<string> codon_values;

3651

vector<unsigned char> taa_values;

3659  if

(codon.length() > 3) {

3660

codon = codon.substr (0, 3);

3666  char

ch = anticodon.c_str()[0];

3684

string::iterator str_it = wobble.begin();

3685  while

(str_it != wobble.end()) {

3688  if

(index < 64 && index > -1) {

3689  unsigned char taa

= ncbieaa[index];

3690

taa_values.push_back(

taa

);

3691

codon_values.push_back(codon);

3697  if

(anticodon.length() > 3) {

3698

anticodon = anticodon.substr(0, 3);

3701

}

catch

(

const

std::exception& ) {

3704  if

(codon_values.size() > 0) {

3707  for

(

size_t i

= 0;

i

< codon_values.size();

i

++) {

3713  if

(aa ==

'U'

&&

NStr::Equal

(anticodon,

"UCA"

)) {

3715

}

else if

(aa ==

'O'

&&

NStr::Equal

(anticodon,

"CUA"

)) {

3717

}

else if

(aa ==

'I'

&&

NStr::Equal

(anticodon,

"CAU"

)) {

3723  "Codons predicted from anticodon ("

+ anticodon

3724

+

") cannot produce amino acid ("

+ aaname +

")"

);

3729  if

(recognized_codon_values.size() > 0) {

3731  for

(

size_t i

= 0;

i

< codon_values.size() && !

ok

;

i

++) {

3732  for

(

size_t

j = 0; j < recognized_codon_values.size() && !

ok

; j++) {

3733  if

(

NStr::Equal

(codon_values[

i

], recognized_codon_values[j])) {

3735

}

else if

(

NStr::Equal

(codon_values[

i

],

"ATG"

) && aa ==

'I'

) {

3745  "Codon recognized cannot be produced from anticodon (" 3746

+ anticodon +

")"

);

3753  if

(orig_aa == 0 || orig_aa == 255) {

3763  if

(idx == 0 || idx >= 28) {

3783  bool

found_bad =

false

;

3784  for

(

auto

it : scores) {

3798  "tRNA-rRNA overlap"

);

3805  "tRNA overlaps CDS"

);

3812  size_t

mismatches = 0;

3826  "Unable to transcribe mRNA"

);

3832  "Unable to fetch mRNA transcript '"

+

label

+

"'"

);

3838  if

((*it)->IsOther()) {

3866  "] less than "

+ farstr +

"product length ["

+

3873

+

"] less than "

+ farstr +

"product length [" 3879  "] less than "

+ farstr +

"product length ["

+

3885  "greater than "

+ farstr +

"product length ["

+

3892  " bases between the transcript and "

+ farstr +

"product sequence"

);

3896  "mRNA has exception but passes transcription test"

);

3901  "mRNA has unclassified exception but only difference is "

+

NStr::SizetToString

(mismatches)

3906  "mRNA has transcribed product replaced exception"

);

3957  "protein_id should not be a gbqual on an mRNA feature"

);

3961  "transcript_id should not be a gbqual on an mRNA feature"

);

3967  if

(

rna

.IsSetExt() &&

rna

.GetExt().IsName()) {

3968  const string

& rna_name =

rna

.GetExt().GetName();

3973  "mRNA feature product indicates it should be a tRNA feature"

);

3978  "mRNA name "

+ rna_name +

" has SGML"

);

3996  "Product Bioseq of mRNA feature is not " 3997  "packaged in the record"

);

4006  "Identical transcript IDs found on multiple mRNAs"

);

4037  const CGene_ref

* genomicgrp =

nullptr

;

4049  bool

found_match =

false

;

4050  bool

found_mismatch =

false

;

4051  for

(

int i

= 1;

i

<= 4;

i

++) {

4054  if

(

gen

!=

""

&&

rna

!=

""

) {

4056

found_match =

true

;

4058

found_mismatch =

true

;

4063  if

(found_mismatch) {

4065  "Found match and mismatch between gene on mRNA bioseq and gene on genomic bioseq"

,

4068

}

else if

(found_mismatch) {

4070  "Gene on mRNA bioseq does not match gene on genomic bioseq"

,

4090  "Focus must be on BioSource descriptor, not BioSource feature."

);

4111  "BioSource descriptor must have focus or transgenic " 4112  "when BioSource feature with different taxname is " 4130  "PolyA_site should be a single point"

);

4162  "sig/mat/transit_peptide feature cannot be associated with a " 4163  "protein product of a coding region feature"

);

4166  "Peptide processing feature should be converted to the " 4167  "appropriate protein feature subtype"

);

4191  "Start and stop of "

+

key

+

" are out of frame with CDS codons"

);

4196  "Start and stop of "

+

key

+

" are out of frame with CDS codons"

);

4200  "Start of "

+

key

+

" is out of frame with CDS codons"

);

4204  "Stop of "

+

key

+

" is out of frame with CDS codons"

);

4216  bool

pseudo = feat_pseudo;

4233  bool

pseudo = feat_pseudo;

4243  "Introns should be at least 10 nt long"

);

4255  if

(partial5 && partial3) {

4266  if

(scores.size() > 0) {

4277  if

(scores.size() > 0) {

4302  bool

donor_in_gap =

false

;

4303  bool

acceptor_in_gap =

false

;

4307

donor_in_gap =

true

;

4312

acceptor_in_gap =

true

;

4315  if

(!partial5 && !partial3) {

4316  if

(donor_in_gap && acceptor_in_gap) {

4323  bool

donor_good =

false

;

4324  bool

acceptor_good =

false

;

4327  if

(!partial5 && !donor_in_gap) {

4330

donor[0] = vec[end5 - 1];

4331

donor[1] = vec[end5];

4337

donor[0] = vec[end5];

4338

donor[1] = vec[end5 + 1];

4345  if

(!partial3 && !acceptor_in_gap) {

4348

acceptor[0] = vec[end3];

4349

acceptor[1] = vec[end3 + 1];

4350

acceptor_good =

true

;

4355

acceptor[0] = vec[end3 - 1];

4356

acceptor[1] = vec[end3];

4357

acceptor_good =

true

;

4363  if

(!partial5 && !partial3) {

4364  if

(donor_good && acceptor_good) {

4373  if

(!donor_in_gap) {

4387  "Splice donor consensus (GT) not found at start of terminal intron, position " 4392  "Splice donor consensus (GT) not found at start of intron, position " 4401  if

(!acceptor_in_gap) {

4404  if

(acceptor_good) {

4414  "Splice acceptor consensus (AG) not found at end of terminal intron, position " 4419  "Splice acceptor consensus (AG) not found at end of intron, position " 4439  bool

is_short =

false

;

4452

}

else if

(partial_right &&

4473  "A note or other qualifier is required for a misc_feature"

);

4479  string

content_label;

4481  if

(

NStr::Equal

(content_label,

"cold-shock protein"

)) {

4483  "cspA misc_feature overlapped by cold-shock protein CDS"

);

4496  bool

is_far_delta =

false

;

4500

is_far_delta =

true

;

4504  if

( !(*sg) )

continue

;

4506

is_far_delta =

false

;

4510  if

(! is_far_delta) {

4512  "An assembly_gap feature should only be on a contig record"

);

4536  "Assembly_gap flanked by Ns on 5' and 3' sides"

);

4539  "Assembly_gap flanked by Ns on 5' side"

);

4542  "Assembly_gap flanked by Ns on 3' side"

);

4545  for

(

size_t i

= 0;

i

< sequence.size();

i

++) {

4546  if

(sequence[

i

] !=

'N'

) {

4563  if

((*it)->IsSetQual() &&

NStr::EqualNocase

((*it)->GetQual(),

"estimated_length"

)

4567  if

(estimated_length != loc_len) {

4574

}

catch

(

const

std::exception& ) {

4581  if

( !vec.

empty

() ) {

4586  unsigned int

num_gap = 0;

4588

string::iterator it = vec_data.begin();

4589  while

(it != vec_data.end()) {

4597

}

else if

(*it !=

'-'

) {

4603  if

(num_real > 0 && num_n > 0) {

4608

}

else if

(num_real > 0) {

4612

}

else if

(num_n > 0) {

4620

+

" gap characters"

);

4625

}

catch

(

const

std::exception& ) {

4638  "NULL feature key"

);

4682  "Feature key Import is no longer legal"

);

4687  switch

( subtype ) {

4693  "Unknown feature key "

+

key

);

4700  "Feature key "

+

key

+

" is no longer legal"

);

4709  "Pre/pro protein feature cannot be associated with a " 4710  "protein product of a coding region feature"

);

4713  "Peptide processing feature should be converted to the " 4714  "appropriate protein feature subtype"

);

4728  "RNA feature should be converted to the appropriate RNA feature " 4729  "subtype, location should be converted manually"

);

4738  "ImpFeat CDS should be pseudo"

);

4744  "ImpFeat CDS with /translation found"

);

4751  "Unknown feature key "

+

key

);

4758  "repeat_region has no qualifiers"

);

4766  const string

&

val

= (*gbqual)->GetVal();

4774  "repeat_region has no qualifiers except rpt_type other"

);

4784  const string

&

val

= (*gbqual)->GetVal();

4785  bool

missing =

true

;

4795  "The regulatory_class 'other' is missing the required /note"

);

4805  if

(

NStr::CompareNocase

( (*gbqual)->GetQual(),

"recombination_class"

) != 0 )

continue

;

4806  const string

&

val

= (*gbqual)->GetVal();

4807  if

( recomb_values.

find

(

val

.c_str()) == recomb_values.

end

() ) {

4811  "The recombination_class 'other' is missing the required /note"

);

static CRef< CScope > m_Scope

@ eExtreme_Positional

numerical value

@ eExtreme_Biological

5' and 3'

@ eErr_SEQ_FEAT_WrongQualOnImpFeat

@ eErr_SEQ_FEAT_NotSpliceConsensusAcceptor

@ eErr_SEQ_FEAT_rRNADoesNotHaveProduct

@ eErr_SEQ_FEAT_WholeLocation

@ eErr_SEQ_FEAT_MobileElementInvalidQualifier

@ eErr_SEQ_FEAT_DuplicateAnticodonInterval

@ eErr_SEQ_FEAT_ShortTRNAIntron

@ eErr_SEQ_FEAT_MinusStrandProtein

@ eErr_SEQ_FEAT_NotSpliceConsensusDonor

@ eErr_SEQ_FEAT_GeneXrefWithoutLocus

@ eErr_SEQ_FEAT_GenesInconsistent

@ eErr_SEQ_FEAT_PseudoRnaHasProduct

@ eErr_SEQ_FEAT_EcNumberDataMissing

@ eErr_SEQ_FEAT_InvalidProductOnGene

@ eErr_SEQ_FEAT_BadRRNAcomponentOverlapTRNA

@ eErr_SEQ_FEAT_mRNAUnnecessaryException

@ eErr_SEQ_FEAT_UnknownImpFeatQual

@ eErr_SEQ_FEAT_InvalidCompareBadAccession

@ eErr_SEQ_FEAT_InvalidCompareMissingVersion

@ eErr_SEQ_FEAT_InvalidRptUnitRange

@ eErr_GENERIC_SgmlPresentInText

@ eErr_SEQ_FEAT_BadAnticodonAA

@ eErr_SEQ_FEAT_RnaProductMismatch

@ eErr_SEQ_FEAT_FeatureBeginsOrEndsInGap

@ eErr_SEQ_FEAT_EcNumberInProteinName

@ eErr_SEQ_FEAT_InvalidTRNAdata

@ eErr_SEQ_FEAT_UnnecessaryException

@ eErr_SEQ_FEAT_AssemblyGapFeatureProblem

@ eErr_SEQ_FEAT_OldLocusTagWithoutLocusTag

@ eErr_SEQ_FEAT_NotSpliceConsensusAcceptorTerminalIntron

@ eErr_SEQ_FEAT_AnticodonMixedStrand

@ eErr_SEQ_FEAT_UnparsedtRNAProduct

@ eErr_SEQ_FEAT_InconsistentPseudogeneValue

@ eErr_SEQ_FEAT_GeneXrefWithoutGene

@ eErr_SEQ_FEAT_ReplacedEcNumber

@ eErr_SEQ_FEAT_PartialsInconsistent

@ eErr_SEQ_FEAT_InvalidQualifierValue

@ eErr_SEQ_FEAT_DuplicateGeneOntologyTerm

@ eErr_SEQ_FEAT_ProtRefHasNoData

@ eErr_SEQ_FEAT_NotSpliceConsensusDonorTerminalIntron

@ eErr_SEQ_FEAT_BadTrnaAA

@ eErr_SEQ_FEAT_WrongQualOnFeature

@ eErr_SEQ_FEAT_ProductFetchFailure

@ eErr_SEQ_FEAT_MismatchedAllele

@ eErr_SEQ_FEAT_RepeatSeqDoNotMatch

@ eErr_SEQ_FEAT_MissingQualOnImpFeat

@ eErr_SEQ_FEAT_InvalidRptUnitSeqCharacters

@ eErr_SEQ_FEAT_TranscriptLen

@ eErr_SEQ_FEAT_RubiscoProblem

@ eErr_SEQ_FEAT_InvalidAlleleDuplicates

@ eErr_SEQ_FEAT_ImpCDSnotPseudo

@ eErr_SEQ_FEAT_BadCDScomponentOverlapTRNA

@ eErr_SEQ_FEAT_BadEcNumberValue

@ eErr_SEQ_FEAT_EcNumberEmpty

@ eErr_SEQ_FEAT_ImpCDShasTranslation

@ eErr_SEQ_FEAT_PeptideFeatOutOfFrame

@ eErr_SEQ_FEAT_ProteinNameHasPMID

@ eErr_SEQ_FEAT_ImpFeatBadLoc

@ eErr_SEQ_FEAT_MissingQualOnFeature

@ eErr_SEQ_FEAT_PolyAsiteNotPoint

@ eErr_SEQ_FEAT_RepeatRegionNeedsNote

@ eErr_SEQ_FEAT_GeneXrefStrandProblem

@ eErr_SEQ_FEAT_PolyATail

@ eErr_SEQ_FEAT_MissingTrnaAA

@ eErr_GENERIC_NonAsciiAsn

@ eErr_SEQ_FEAT_UnparsedtRNAAnticodon

@ eErr_SEQ_FEAT_RefSeqInText

@ eErr_SEQ_FEAT_ErroneousException

@ eErr_SEQ_FEAT_ImproperBondLocation

@ eErr_SEQ_FEAT_InvalidPseudoQualifier

@ eErr_SEQ_FEAT_FeatureSeqIDCaseDifference

@ eErr_SEQ_FEAT_BadProductSeqId

@ eErr_SEQ_FEAT_PeptideFeatureLacksCDS

@ eErr_SEQ_FEAT_InvalidCompareRefSeqAccession

@ eErr_SEQ_FEAT_InvalidReplace

@ eErr_SEQ_FEAT_UnknownImpFeatKey

@ eErr_SEQ_FEAT_IdenticalMRNAtranscriptIDs

@ eErr_SEQ_FEAT_AssemblyGapCoversSequence

@ eErr_SEQ_FEAT_ShortIntron

@ eErr_SEQ_FEAT_SplitEcNumber

@ eErr_SEQ_FEAT_AssemblyGapAdjacentToNs

@ eErr_SEQ_FEAT_InvalidPunctuation

@ eErr_SEQ_FEAT_LocusTagProductMismatch

@ eErr_SEQ_FEAT_UnknownFeatureQual

@ eErr_SEQ_FEAT_TranscriptMismatches

@ eErr_SEQ_FEAT_IncorrectQualifierCapitalization

@ eErr_SEQ_FEAT_InvalidNumberQualifier

@ eErr_SEQ_FEAT_FeatureInsideGap

@ eErr_SEQ_FEAT_InvalidRNAFeature

@ eErr_SEQ_FEAT_tRNArange

@ eErr_SEQ_FEAT_GeneIdMismatch

@ eErr_SEQ_FEAT_MissingMRNAproduct

@ eErr_SEQ_FEAT_tRNAmRNAmixup

@ eErr_SEQ_FEAT_UndesiredProteinName

@ eErr_SEQ_FEAT_MrnaTransFail

@ eErr_SEQ_FEAT_InvalidInferenceValue

@ eErr_SEQ_FEAT_GeneXrefNeeded

@ eErr_SEQ_FEAT_InvalidType

@ eErr_SEQ_FEAT_SerialInComment

@ eErr_SEQ_FEAT_BadTrailingCharacter

@ eErr_SEQ_FEAT_IntervalBeginsOrEndsInGap

@ eErr_SEQ_FEAT_ProteinNameEndsInBracket

@ eErr_SEQ_FEAT_BadInternalCharacter

@ eErr_SEQ_FEAT_BadProteinName

@ eErr_SEQ_FEAT_MissingLocation

@ eErr_SEQ_FEAT_ExceptionMissingText

@ eErr_SEQ_FEAT_BadAnticodonCodon

@ eErr_SEQ_FEAT_BadTrailingHyphen

@ eErr_SEQ_FEAT_OldLocusTagMismtach

@ eErr_SEQ_FEAT_PseudoRnaViaGeneHasProduct

@ eErr_SEQ_FEAT_DeletedEcNumber

@ eErr_SEQ_FEAT_FeatureIsMostlyNs

@ eErr_SEQ_FEAT_InvalidMatchingReplace

@ eErr_INTERNAL_Exception

@ eErr_SEQ_FEAT_BadEcNumberFormat

@ eErr_SEQ_FEAT_BothStrands

@ eErr_SEQ_FEAT_ExceptionProblem

@ eErr_SEQ_FEAT_RedundantFields

@ eErr_SEQ_FEAT_ColdShockProteinProblem

@ eErr_SEQ_FEAT_TrnaCodonWrong

@ eErr_SEQ_FEAT_NoNameForProtein

@ eErr_SEQ_FEAT_RptUnitRangeProblem

@ eErr_SEQ_FEAT_InvalidVariationReplace

@ eErr_SEQ_FEAT_SeqLocOrder

@ eErr_SEQ_FEAT_AnticodonStrandConflict

@ eErr_SEQ_FEAT_InvalidRepeatUnitLength

@ eErr_SEQ_FEAT_VectorContamination

@ eErr_SEQ_FEAT_AbuttingIntervals

@ eErr_SEQ_FEAT_EcNumberInProteinComment

@ eErr_SEQ_FEAT_UnnecessaryCitPubEquiv

@ eErr_SEQ_FEAT_PartialProblem

@ eErr_SEQ_FEAT_RegulatoryClassOtherNeedsNote

@ eErr_SEQ_FEAT_MiscFeatureNeedsNote

@ eErr_SEQ_FEAT_FocusOnBioSourceFeature

@ eErr_SEQ_FEAT_PolyAsignalNotRange

@ eErr_SEQ_DESCR_BioSourceNeedsFocus

@ eErr_SEQ_FEAT_BadTrnaCodon

@ eErr_SEQ_FEAT_FeatureCrossesGap

@ eErr_SEQ_FEAT_SelfReferentialProduct

@ eErr_SEQ_FEAT_GapFeatureProblem

@ eErr_SEQ_FEAT_HypotheticalProteinMismatch

@ eErr_SEQ_FEAT_MissingGeneXref

@ eErr_SEQ_FEAT_RecombinationClassOtherNeedsNote

@ eErr_SEQ_FEAT_MissingExceptionFlag

bool IsOrganismEukaryote() const

int GetGenCode(int def=1) const

bool IsSkippable(void) const

bool IsKnownGap(size_t offset)

bool IsGap(size_t offset)

bool IsUnknownGap(size_t offset)

map< size_t, EGapType > TGapTypeMap

CGapCache(const CSeq_loc &loc, CBioseq_Handle bsh)

@Gb_qual.hpp User-defined methods of the data storage class.

static bool IsLegalMobileElementValue(const string &val)

static bool IsValidPseudogeneValue(const string &val)

static bool IsValidRptTypeValue(const string &val)

static const TLegalRecombinationClassSet & GetSetOfLegalRecombinationClassValues(void)

static const string & GetNcbieaa(int id)

static string IndexToCodon(int index)

static int CodonToIndex(char base1, char base2, char base3)

static const CGenetic_code_table & GetCodeTable(void)

CRef< feature::CFeatTree > GetFeatTreeFromCache(const CSeq_loc &loc, CScope &scope)

CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)

bool IsSuppressed(void) const

bool x_IsIntronShort(bool pseudo)

CMRNAValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)

void x_ValidateMrnaGene()

CConstRef< CSeq_feat > m_Gene

void x_ValidateCommonMRNAProduct()

CPeptideValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)

void x_ValidatePeptideOnCodonBoundary()

CConstRef< CSeq_feat > m_CDS

void x_ValidateSeqFeatLoc() override

void x_ValidateSeqFeatLoc() override

void x_ValidateECNumbers()

void x_ValidateProteinName(const string &prot_name)

void x_ReportUninformativeNames()

void x_ValidateMolinfoPartials()

static EECNumberFileStatus GetECNumAmbiguousStatus()

static EECNumberFileStatus GetECNumSpecificStatus()

@ eECFile_not_found

File was not found in expected directory.

static bool IsECNumberSplit(const string &old_ecno)

static bool IsValidECNumberFormat(const string &ecno)

Verify correct form of EC number.

static EECNumberFileStatus GetECNumDeletedStatus()

EECNumberStatus

Enzyme Commission number status.

@ eEC_replaced

Obsolete synonym for some other EC number.

@ eEC_unknown

Unrecognized; possibly malformed.

@ eEC_deleted

Withdrawn, with no (single?) replacement.

static EECNumberFileStatus GetECNumReplacedStatus()

static EECNumberStatus GetECNumberStatus(const string &ecno)

Determine an EC number's validity and specificity.

void x_ValidateTrnaOverlap()

void x_ValidateRnaTrans()

void x_ValidateRnaProductType()

void x_ValidateTrnaCodons()

void x_ReportRNATranslationProblems(size_t problems, size_t mismatches)

void x_ValidateAnticodon(const CSeq_loc &anticodon)

void x_ValidateTrnaData()

void x_ValidateTrnaType()

void x_ValidateRnaProduct(bool feat_pseudo, bool pseudo)

@RNA_ref.hpp User-defined methods of the data storage class.

static string GetRnaTypeName(const CRNA_ref::EType rna_type)

static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)

bool IsLegalQualifier(EQualifier qual) const

Test wheather a certain qualifier is legal for the feature.

@ eQual_mobile_element_type

static bool AllowStrandBoth(ESubtype subtype)

ESubtype GetSubtype(void) const

string GetKey(EVocabulary vocab=eVocabulary_full) const

const TQualifiers & GetMandatoryQualifiers(void) const

Get the list of all mandatory qualifiers for the feature.

static std::pair< EQualifier, CTempString > GetQualifierTypeAndValue(CTempString qual)

@ eSubtype_transit_peptide

@ eSubtype_bad

These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.

@ eSubtype_mobile_element

static EQualifier GetQualifierType(CTempString qual)

convert qual string to enumerated value

static CTempString GetQualifierAsString(EQualifier qual)

Convert a qualifier from an enumerated value to a string representation or empty if not found.

static CTempString SubtypeValueToName(ESubtype eSubtype)

Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...

static const vector< string > & GetRegulatoryClassList()

namespace ncbi::objects::

static bool IsExceptionTextInLegalList(const string &exception_text, bool allow_refseq)

Indicates whether this specific text occurs in the list of legal exceptions.

const CGene_ref * GetGeneXref(void) const

See related function in util/feature.hpp.

static bool IsExceptionTextRefSeqOnly(const string &exception_text)

Indicates whether this specific text is a RefSeq-only exception.

static TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)

static bool x_IsMostlyNs(const CSeq_loc &loc, CBioseq_Handle bsh)

CBioseq_Handle x_GetFeatureProduct(bool look_far, bool &is_far)

EDiagSev x_SeverityForConsensusSplice()

void x_ValidateBothStrands()

void ValidateCharactersInField(string value, string field_name)

void PostErr(EDiagSev sv, EErrType et, const string &msg)

void x_ValidateLabelVal(const string &val)

CSingleFeatValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)

void x_ReportAcceptorSpliceSiteReadErrors(const CSpliceProblems::TSpliceProblem &problem, const string &label)

void x_ValidateRptUnitVal(const string &val, const string &key)

void x_ValidateGeneXRef()

void x_ValidateReplaceQual(const string &key, const string &qual_str, const string &val)

void x_CheckForNonAsciiCharacters()

CBioseq_Handle x_GetBioseqByLocation(const CSeq_loc &loc)

void x_ValidateSeqFeatDataType()

CBioseq_Handle m_ProductBioseq

static bool s_IsPseudo(const CSeq_feat &feat)

virtual bool x_ReportOrigProteinId()

CBioseq_Handle m_LocationBioseq

void x_ValidateCompareVal(const string &val)

void x_ValidateRptUnitSeqVal(const string &val, const string &key)

void x_ValidateExtUserObject()

void x_ValidateSeqFeatProduct()

void x_ReportDonorSpliceSiteReadErrors(const CSpliceProblems::TSpliceProblem &problem, const string &label)

void x_ValidateImpFeatQuals()

static bool s_GeneRefsAreEquivalent(const CGene_ref &g1, const CGene_ref &g2, string &label)

void x_ReportECNumFileStatus()

void x_ValidateGbQual(const CGb_qual &qual)

void x_ValidateGeneFeaturePair(const CSeq_feat &gene)

void x_ValidateOldLocusTag(const string &old_locus_tag)

virtual void x_ValidateFeatComment()

void x_ValidateNonImpFeat()

bool x_HasNamedQual(const string &qual_name)

void x_ValidateLocusTagGeneralMatch(CConstRef< CSeq_feat > gene)

static bool x_HasSeqLocBond(const CSeq_feat &feat)

void x_ReportPseudogeneConflict(CConstRef< CSeq_feat > gene)

void x_ValidateRptUnitRangeVal(const string &val)

void x_ValidateFeatPartialness()

static bool x_BioseqHasNmAccession(CBioseq_Handle bsh)

void ValidateSplice(bool gene_pseudo, bool check_all)

void x_ReportSpliceProblems(const CSpliceProblems &problems, const string &label)

bool x_AllowFeatureToMatchGapExactly()

static TSeqPos x_FindStartOfGap(CBioseq_Handle bsh, TSeqPos pos, CScope *scope)

static size_t x_CalculateLocationGaps(CBioseq_Handle bsh, const CSeq_loc &loc, vector< TSeqPos > &gap_starts)

virtual void x_ValidateSeqFeatLoc()

virtual void x_ValidateExceptText(const string &text)

static bool s_BioseqHasRefSeqThatStartsWithPrefix(CBioseq_Handle bsh, string prefix)

@ eLocationGapInternalIntervalEndpointInGap

@ eLocationGapCrossesUnknownGap

@ eLocationGapContainedInGapOfNs

@ eLocationGapContainedInGap

@ eLocationGapFeatureMatchesGap

static void x_LocHasStrandBoth(const CSeq_loc &feat, bool &both, bool &both_rev)

void x_ValidateImpFeatLoc()

const TSpliceProblemList & GetDonorProblems() const

void CalculateSpliceProblems(const CSeq_feat &feat, bool check_all, bool pseudo, CBioseq_Handle loc_handle)

vector< TSpliceProblem > TSpliceProblemList

@ eSpliceSiteRead_WrongNT

bool IsExceptionUnnecessary() const

bool AreErrorsUnexpected() const

pair< size_t, TSeqPos > TSpliceProblem

const TSpliceProblemList & GetAcceptorProblems() const

const_iterator find(const key_type &key) const

Return a const_iterator pointing to the specified element, or to the end if the element is not found.

const_iterator end() const

Return the end of the controlled sequence.

vector< CSeq_feat_Handle > TSeq_feat_Handles

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

Template class for iteration on objects of class C (non-medifiable version)

@ eInferenceValidCode_valid

static EInferenceValidCode ValidateInference(string inference, bool fetch_accession, CScope *scope=nullptr)

bool DoRubiscoTest() const

bool ReportSpliceAsError() const

void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)

const CBioSourceKind & BioSourceKind() const

bool IsRemoteFetch() const

CConstRef< CSeq_feat > GetmRNAGivenProduct(const CBioseq &seq)

bool DoesAnyFeatLocHaveGI() const

bool IsLocusTagGeneralMatch() const

bool IgnoreInferences() const

void ValidateDbxref(const CDbtag &xref, const CSerialObject &obj, bool biosource=false, const CSeq_entry *ctx=nullptr)

bool IsSerialNumberInComment(const string &comment)

bool IsFarSequence(const CSeq_id &id)

const CTSE_Handle & GetTSE_Handle()

bool ValidateInferenceAccessions() const

bool IsHugeFileMode() const

void IncrementPseudogeneCount()

CConstRef< CSeq_feat > GetCDSGivenProduct(const CBioseq &seq)

CBioseq_Handle GetLocalBioseqHandle(const CSeq_id &id)

bool x_IsFarFetchFailure(const CSeq_loc &loc)

bool IsGenomeSubmission() const

void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)

void IncrementGeneXrefCount()

bool IsFarFetchCDSproducts() const

bool IsValidateExons() const

size_t GetCumulativeInferenceCount(void) const

bool IgnoreExceptions() const

void SetFarFetchFailure()

bool IsRefSeqConventions() const

SValidatorContext & SetContext()

bool IsIndexerVersion() const

CGeneCache & GetGeneCache()

bool IsSmallGenomeSet() const

bool HasRefSeq(void) const

void ValidateSeqLoc(const CSeq_loc &loc, const CBioseq_Handle &seq, bool report_abutting, const string &prefix, const CSerialObject &obj, bool lowerSev=false)

bool IsFarFetchMRNAproducts() const

bool IsTransgenic(const CBioSource &bsrc)

void ValidatePubdesc(const CPubdesc &pub, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)

container_type::iterator iterator

const_iterator end() const

const_iterator find(const key_type &key) const

#define MAKE_CONST_SET(name, type,...)

Include a standard set of the NCBI C++ Toolkit most basic headers.

The NCBI C++ standard methods for dealing with std::string.

static void chk(int check, const char *fmt,...)

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

static const char * expected[]

static const char * str(char *buf, int n)

Public API for finding the gene(s) on a given feature using the same criteria as the flatfile generat...

vector< TGoTermError > GetGoTermErrors(const CSeq_feat &feat)

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

const TSeqPos kInvalidSeqPos

Define special value for invalid sequence position.

EDiagSev

Severity level for the posted diagnostics.

@ eDiag_Info

Informational message.

@ eDiag_Error

Error message.

@ eDiag_Warning

Warning message.

@ eDiag_Fatal

Fatal error – guarantees exit(or abort)

@ eDiag_Critical

Critical error message.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

virtual const char * what(void) const noexcept

Standard report (includes full backlog).

const string AsFastaString(void) const

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Optimized implementation of CSerialObject::Assign, which is not so efficient.

virtual void WriteAsFasta(ostream &out) const

Implement serializable interface.

string GetLabel(const CSeq_id &id)

const CTextseq_id * GetTextseq_Id(void) const

Return embedded CTextseq_id, if any.

bool IsPartialStart(ESeqLocExtremes ext) const

check start or stop of location for e_Lim fuzz

ENa_strand GetStrand(void) const

Get the location's strand.

TRange GetTotalRange(void) const

TSeqPos GetStart(ESeqLocExtremes ext) const

Return start and stop positions of the seq-loc.

bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const

Check if strand is set for any/all part(s) of the seq-loc depending on the flag.

const CSeq_id * GetId(void) const

Get the id of the location return NULL if has multiple ids or no id at all.

CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const

Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.

TSeqPos GetStop(ESeqLocExtremes ext) const

CConstBeginInfo ConstBegin(const C &obj)

Get starting point of non-modifiable object hierarchy.

ELocationInFrame IsLocationInFrame(const CSeq_feat_Handle &cds, const CSeq_loc &loc)

Determines whether location loc is in frame with coding region cds.

CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)

@ eLocationInFrame_InFrame

@ eLocationInFrame_BadStart

@ eLocationInFrame_BadStop

@ eLocationInFrame_BadStartAndStop

@ fFGL_Content

Include its content if there is any.

const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)

If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...

TSeqPos GetLength(const CSeq_id &id, CScope *scope)

Get sequence length if scope not null, else return max possible TSeqPos.

bool IsValid(const CSeq_point &pt, CScope *scope)

Checks that point >= 0 and point < length of Bioseq.

Int8 TestForOverlapEx(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, CScope *scope=0, TOverlapFlags flags=fOverlap_Default)

Updated version of TestForOverlap64().

int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)

sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)

Returns the sequence::ECompare containment relationship between CSeq_locs.

bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)

Returns true if all embedded CSeq_ids represent the same CBioseq, else false.

bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)

Determines if two CSeq_ids represent the same CBioseq.

CSeq_loc * SeqLocRevCmpl(const CSeq_loc &loc, CScope *scope)

Get reverse complement of the seq-loc (?)

@ eSeqlocPartial_Internal

@ eSeqlocPartial_Complete

@ fCompareOverlapping

Check if seq-locs are overlapping.

@ eOverlap_Simple

any overlap of extremes

@ eOverlap_Interval

at least one pair of intervals must overlap

@ eOverlap_Contained

2nd contained within 1st extremes

@ eOverlap_Subset

2nd is a subset of 1st ranges

@ eContains

First CSeq_loc contains second.

@ eSame

CSeq_locs contain each other.

@ eContained

First CSeq_loc contained by second.

const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)

Get the encoding CDS feature of a given protein sequence.

bool IsPseudo(const CSeq_feat &feat, CScope &scope)

Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...

CConstRef< CSeq_feat > GetOverlappingGene(const CSeq_loc &loc, CScope &scope, ETransSplicing eTransSplicing=eTransSplicing_Auto)

CConstRef< CSeq_feat > GetOverlappingCDS(const CSeq_loc &loc, CScope &scope)

vector< TFeatScore > TFeatScores

void GetOverlappingFeatures(const CSeq_loc &loc, CSeqFeatData::E_Choice feat_type, CSeqFeatData::ESubtype feat_subtype, EOverlapType overlap_type, TFeatScores &feats, CScope &scope, const TBestFeatOpts opts=0, CGetOverlappingFeaturesPlugin *plugin=NULL)

Find all features overlapping the location.

CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id, const CTSE_Handle &tse)

Get bioseq handle for sequence withing one TSE.

CBioseq_Handle GetBioseqHandle(const CSeq_id &id)

Get bioseq handle by seq-id.

CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)

bool IsSetExcept(void) const

const CTSE_Handle & GetTSE_Handle(void) const

Get CTSE_Handle of containing TSE.

CConstRef< CBioseq > GetCompleteBioseq(void) const

Get the complete bioseq.

TClass GetClass(void) const

const TInst_Ext & GetInst_Ext(void) const

TBioseqCore GetBioseqCore(void) const

Get bioseq core structure.

bool IsSetInst_Ext(void) const

bool IsSetDbxref(void) const

const CSeqFeatData & GetData(void) const

TSeqPos GetBioseqLength(void) const

bool IsSetExcept_text(void) const

TInst_Length GetInst_Length(void) const

const string & GetExcept_text(void) const

bool IsSetInst_Repr(void) const

bool IsSetClass(void) const

TInst_Repr GetInst_Repr(void) const

CScope & GetScope(void) const

Get scope this handle belongs to.

const CSeq_feat::TDbxref & GetDbxref(void) const

CSeq_entry_Handle GetTopLevelEntry(void) const

Get top level Seq-entry handle.

const TId & GetId(void) const

CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const

Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.

const TInst & GetInst(void) const

@ eCoding_Iupac

Set coding to printable coding (Iupacna or Iupacaa)

const CSeq_loc & GetLocation(void) const

const CSeq_feat & GetOriginalFeature(void) const

Get original feature with unmapped location/product.

CSeqMap::ESegmentType GetType(void) const

bool IsUnknownLength(void) const

return true if current segment is a gap of unknown length

TSeqPos GetPosition(void) const

return position of current segment in sequence

TSeqPos GetLength(void) const

return length of current segment

CConstRef< CSeq_feat > GetSeq_feat(void) const

Get current seq-feat.

ENa_strand GetStrand(void) const

bool IsInGap(TSeqPos pos) const

true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...

void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const

Fill the buffer string with the sequence data for the interval [start, stop).

TObjectType * GetPointer(void) const THROWS_NONE

Get pointer,.

void Reset(void)

Reset reference object.

char Char

Alias for char.

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)

Convert size_t to string.

static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive compare of a substring with another string.

static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to int.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)

Find the pattern in the specified range of a string using a case insensitive search.

static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)

Check if a string ends with a specified suffix value.

static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)

Check if a string is blank (has no text).

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-sensitive equality of a substring with another string.

static SIZE_TYPE FindWord(const CTempString str, const CTempString word, ECase use_case=eCase, EDirection direction=eForwardSearch)

Find given word in the string.

static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)

Find the pattern in the specified range of a string using a case sensitive search.

static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)

Compare of a substring with another string.

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive equality of a substring with another string.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)

Test for equality of a substring with another string.

static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)

Truncate whitespace in a string.

static string & ToLower(string &str)

Convert string to lower case – string& version.

@ eReverseSearch

Search in a backward direction.

@ eNocase

Case insensitive compare.

static const char label[]

const TOrg & GetOrg(void) const

Get the Org member data.

bool IsSetIs_focus(void) const

to distinguish biological focus Check if a value has been assigned to Is_focus data member.

TTo GetTo(void) const

Get the To member data.

TFrom GetFrom(void) const

Get the From member data.

bool IsSetSyn(void) const

synonyms for locus Check if a value has been assigned to Syn data member.

const TSyn & GetSyn(void) const

Get the Syn member data.

const TDesc & GetDesc(void) const

Get the Desc member data.

bool IsSetPseudo(void) const

pseudogene Check if a value has been assigned to Pseudo data member.

bool CanGetLocus(void) const

Check if it is safe to call GetLocus method.

bool IsSetLocus_tag(void) const

systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...

bool CanGetLocus_tag(void) const

Check if it is safe to call GetLocus_tag method.

bool IsSetLocus(void) const

Official gene symbol Check if a value has been assigned to Locus data member.

bool IsSetAllele(void) const

Official allele designation Check if a value has been assigned to Allele data member.

bool CanGetAllele(void) const

Check if it is safe to call GetAllele method.

bool CanGetDesc(void) const

Check if it is safe to call GetDesc method.

const TLocus_tag & GetLocus_tag(void) const

Get the Locus_tag member data.

const TLocus & GetLocus(void) const

Get the Locus member data.

TPseudo GetPseudo(void) const

Get the Pseudo member data.

const TAllele & GetAllele(void) const

Get the Allele member data.

bool IsStr(void) const

Check if variant Str is selected.

bool IsSetDb(void) const

name of database or system Check if a value has been assigned to Db data member.

const TTag & GetTag(void) const

Get the Tag member data.

bool IsSetTag(void) const

appropriate tag Check if a value has been assigned to Tag data member.

const TDb & GetDb(void) const

Get the Db member data.

const TStr & GetStr(void) const

Get the variant data.

const TTaxname & GetTaxname(void) const

Get the Taxname member data.

bool CanGetTaxname(void) const

Check if it is safe to call GetTaxname method.

EProcessed

processing status

const TName & GetName(void) const

Get the Name member data.

bool IsSetEc(void) const

E.C.

const TEc & GetEc(void) const

Get the Ec member data.

@ eProcessed_signal_peptide

@ eProcessed_transit_peptide

const TPub & GetPub(void) const

Get the variant data.

list< CRef< CPub > > TPub

bool IsPub(void) const

Check if variant Pub is selected.

const TAnticodon & GetAnticodon(void) const

Get the Anticodon member data.

TType GetType(void) const

Get the Type member data.

TNcbi8aa GetNcbi8aa(void) const

Get the variant data.

const TAa & GetAa(void) const

Get the Aa member data.

const TCodon & GetCodon(void) const

Get the Codon member data.

bool IsSetAa(void) const

Check if a value has been assigned to Aa data member.

bool IsTRNA(void) const

Check if variant TRNA is selected.

bool IsSetAnticodon(void) const

location of anticodon Check if a value has been assigned to Anticodon data member.

EType

type of RNA feature

bool IsSetExt(void) const

generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.

TNcbieaa GetNcbieaa(void) const

Get the variant data.

bool IsGen(void) const

Check if variant Gen is selected.

TIupacaa GetIupacaa(void) const

Get the variant data.

bool CanGetAnticodon(void) const

Check if it is safe to call GetAnticodon method.

const TGen & GetGen(void) const

Get the variant data.

bool IsSetType(void) const

Check if a value has been assigned to Type data member.

bool IsSetClass(void) const

for ncRNAs, the class of non-coding RNA: examples: antisense_RNA, guide_RNA, snRNA Check if a value h...

E_Choice Which(void) const

Which variant is currently selected.

const TExt & GetExt(void) const

Get the Ext member data.

const TTRNA & GetTRNA(void) const

Get the variant data.

const TClass & GetClass(void) const

Get the Class member data.

@ e_not_set

No variant selected.

@ e_Name

for naming "other" type

@ eType_scRNA

will become ncRNA, with RNA-gen.class = scRNA

@ eType_snoRNA

will become ncRNA, with RNA-gen.class = snoRNA

@ eType_snRNA

will become ncRNA, with RNA-gen.class = snRNA

bool CanGetDbxref(void) const

Check if it is safe to call GetDbxref method.

const TVal & GetVal(void) const

Get the Val member data.

const TKey & GetKey(void) const

Get the Key member data.

bool IsSetLoc(void) const

original location string Check if a value has been assigned to Loc data member.

bool IsSetComment(void) const

Check if a value has been assigned to Comment data member.

vector< CRef< CDbtag > > TDbxref

const TPub & GetPub(void) const

Get the variant data.

bool IsSetData(void) const

the specific data Check if a value has been assigned to Data data member.

bool IsSetQual(void) const

qualifiers Check if a value has been assigned to Qual data member.

E_Choice Which(void) const

Which variant is currently selected.

bool IsBond(void) const

Check if variant Bond is selected.

bool IsProt(void) const

Check if variant Prot is selected.

bool IsCdregion(void) const

Check if variant Cdregion is selected.

bool IsImp(void) const

Check if variant Imp is selected.

const TCit & GetCit(void) const

Get the Cit member data.

const TQual & GetQual(void) const

Get the Qual member data.

bool IsSetPartial(void) const

incomplete in some way? Check if a value has been assigned to Partial data member.

bool IsSetKey(void) const

Check if a value has been assigned to Key data member.

bool IsSetXref(void) const

cite other relevant features Check if a value has been assigned to Xref data member.

const TLocation & GetLocation(void) const

Get the Location member data.

E_Choice

Choice variants.

bool IsGene(void) const

Check if variant Gene is selected.

const TData & GetData(void) const

Get the Data member data.

bool IsSetExcept(void) const

something funny about this? Check if a value has been assigned to Except data member.

const TExcept_text & GetExcept_text(void) const

Get the Except_text member data.

bool IsPub(void) const

Check if variant Pub is selected.

bool IsSetExcept_text(void) const

explain if except=TRUE Check if a value has been assigned to Except_text data member.

const TDbxref & GetDbxref(void) const

Get the Dbxref member data.

bool CanGetVal(void) const

Check if it is safe to call GetVal method.

bool IsHet(void) const

Check if variant Het is selected.

bool IsSetExp_ev(void) const

Check if a value has been assigned to Exp_ev data member.

const TBiosrc & GetBiosrc(void) const

Get the variant data.

bool CanGetExcept_text(void) const

Check if it is safe to call GetExcept_text method.

TPseudo GetPseudo(void) const

Get the Pseudo member data.

const TProduct & GetProduct(void) const

Get the Product member data.

bool IsSetQual(void) const

Check if a value has been assigned to Qual data member.

bool CanGetExcept(void) const

Check if it is safe to call GetExcept method.

bool IsSetPseudo(void) const

annotated on pseudogene? Check if a value has been assigned to Pseudo data member.

const TComment & GetComment(void) const

Get the Comment member data.

bool IsSetCit(void) const

citations for this feature Check if a value has been assigned to Cit data member.

bool IsBiosrc(void) const

Check if variant Biosrc is selected.

const TGene & GetGene(void) const

Get the variant data.

const Tdata & Get(void) const

Get the member data.

TPartial GetPartial(void) const

Get the Partial member data.

const TProt & GetProt(void) const

Get the variant data.

TExcept GetExcept(void) const

Get the Except member data.

const TXref & GetXref(void) const

Get the Xref member data.

vector< CRef< CSeqFeatXref > > TXref

vector< CRef< CGb_qual > > TQual

const TQual & GetQual(void) const

Get the Qual member data.

const TRna & GetRna(void) const

Get the variant data.

bool IsSetDbxref(void) const

support for xref to other databases Check if a value has been assigned to Dbxref data member.

bool IsSetVal(void) const

Check if a value has been assigned to Val data member.

bool IsSetProduct(void) const

product of process Check if a value has been assigned to Product data member.

bool CanGetQual(void) const

Check if it is safe to call GetQual method.

const TLoc & GetLoc(void) const

Get the Loc member data.

bool IsRna(void) const

Check if variant Rna is selected.

TExp_ev GetExp_ev(void) const

Get the Exp_ev member data.

const TImp & GetImp(void) const

Get the variant data.

bool IsSetLocation(void) const

feature made from Check if a value has been assigned to Location data member.

@ e_Het

cofactor, prosthetic grp, etc, bound to seq

@ e_Region

named region (globin locus)

@ e_Seq

to annotate origin from another seq

@ e_Txinit

transcription initiation

@ e_Num

a numbering system

@ e_Pub

publication applies to this seq

@ e_User

user defined structure

@ e_Rsite

restriction site (for maps really)

@ e_Comment

just a comment

@ e_Non_std_residue

non-standard residue here in seq

bool IsSetAccession(void) const

Check if a value has been assigned to Accession data member.

const TName & GetName(void) const

Get the Name member data.

ENa_strand

strand of nucleic acid

TFrom GetFrom(void) const

Get the From member data.

bool IsGeneral(void) const

Check if variant General is selected.

bool CanGetName(void) const

Check if it is safe to call GetName method.

E_Choice Which(void) const

Which variant is currently selected.

bool CanGetAccession(void) const

Check if it is safe to call GetAccession method.

const TGeneral & GetGeneral(void) const

Get the variant data.

TTo GetTo(void) const

Get the To member data.

bool IsWhole(void) const

Check if variant Whole is selected.

bool IsInt(void) const

Check if variant Int is selected.

const TInt & GetInt(void) const

Get the variant data.

bool IsSetName(void) const

Check if a value has been assigned to Name data member.

@ eNa_strand_both_rev

in reverse orientation

@ eNa_strand_both

in forward orientation

@ e_Tpe

Third Party Annot/Seq EMBL.

@ e_Tpd

Third Party Annot/Seq DDBJ.

@ e_Tpg

Third Party Annot/Seq Genbank.

@ eClass_gen_prod_set

genomic products, chrom+mRNA+protein

TRepr GetRepr(void) const

Get the Repr member data.

bool IsSetCompleteness(void) const

Check if a value has been assigned to Completeness data member.

bool CanGetBiomol(void) const

Check if it is safe to call GetBiomol method.

ERepr

representation class

const TInst & GetInst(void) const

Get the Inst member data.

const TSource & GetSource(void) const

Get the variant data.

const TId & GetId(void) const

Get the Id member data.

bool IsSetExt(void) const

extensions for special types Check if a value has been assigned to Ext data member.

bool IsDelta(void) const

Check if variant Delta is selected.

const TExt & GetExt(void) const

Get the Ext member data.

TBiomol GetBiomol(void) const

Get the Biomol member data.

const TDelta & GetDelta(void) const

Get the variant data.

TCompleteness GetCompleteness(void) const

Get the Completeness member data.

const Tdata & Get(void) const

Get the member data.

list< CRef< CDelta_seq > > Tdata

const TMolinfo & GetMolinfo(void) const

Get the variant data.

@ eRepr_delta

sequence made by changes (delta) to others

@ eRepr_raw

continuous sequence

@ eCompleteness_no_left

missing 5' or NH3 end

@ eCompleteness_partial

partial but no details given

@ eCompleteness_no_right

missing 3' or COOH end

@ eCompleteness_no_ends

missing both ends

@ e_Ncbieaa

extended ASCII 1 letter aa codes

@ e_Ncbistdaa

consecutive codes for std aas

@ e_Molinfo

info on the molecule and techniques

@ e_Source

source of materials, includes Org-ref

@ e_Loc

point to a sequence

static void text(MDB_val *v)

constexpr bool empty(list< Ts... >) noexcept

const struct ncbi::grid::netcache::search::fields::KEY key

const GenericPointer< typename T::ValueType > T2 value

const CharType(& source)[N]

static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)

#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)

FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.

#define FOR_EACH_GBQUAL_ON_FEATURE

#define FOR_EACH_GBQUAL_ON_SEQFEAT(Itr, Var)

FOR_EACH_GBQUAL_ON_SEQFEAT EDIT_EACH_GBQUAL_ON_SEQFEAT.

#define FOR_EACH_STRING_IN_VECTOR(Itr, Var)

FOR_EACH_STRING_IN_VECTOR EDIT_EACH_STRING_IN_VECTOR.

#define FOR_EACH_CHAR_IN_STRING(Itr, Var)

FOR_EACH_CHAR_IN_STRING EDIT_EACH_CHAR_IN_STRING.

bool ContainsSgml(const string &str)

static string s_AsciiString(const string &src)

static bool s_LocationStrandsIncompatible(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)

CSingleFeatValidator * FeatValidatorFactory(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)

bool s_HasNamedQual(const CSeq_feat &feat, const string &qual)

const string kInferenceMessage[]

static bool s_StringConsistsOf(string str, string consist)

static string GetGeneticCodeName(int gcode)

const string kOrigProteinId

int s_LegalNcbieaaValues[]

static const char *const sc_BadProtNameText[]

CStaticArraySet< const char *, PCase_CStr > TBadProtNameSet

static bool s_IsBioseqPartial(CBioseq_Handle bsh)

const char * GetAAName(unsigned char aa, bool is_ascii)

DEFINE_STATIC_ARRAY_MAP(TBadProtNameSet, sc_BadProtName, sc_BadProtNameText)

static const char * kAANames[]

static string s_GetGeneRefFields(const CGene_ref &gene, int field)

bool HasGeneIdXref(const CMappedFeat &sf, const CObject_id &tag, bool &has_parent_gene_id)

static bool s_RptUnitIsBaseRange(string str, TSeqPos &from, TSeqPos &to)

static bool xf_IsDeltaLitOnly(CBioseq_Handle bsh)

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

bool CheckIntronAcceptor(ENa_strand strand, TConstSpliceSite acceptor)

bool CheckIntronSpliceSites(ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor)

bool CheckIntronDonor(ENa_strand strand, TConstSpliceSite donor)

Selector used in CSeqMap methods returning iterators.

atomic_bool CheckECNumFileStatus

size_t GetMRNATranslationProblems(const CSeq_feat &feat, size_t &mismatches, bool ignore_exceptions, CBioseq_Handle nuc, CBioseq_Handle rna, bool far_fetch, bool is_gpipe, bool is_genomic, CScope *scope)

@ eMRNAProblem_UnnecessaryException

@ eMRNAProblem_UnableToFetch

@ eMRNAProblem_TranscriptLenLess

@ eMRNAProblem_PolyATail95

@ eMRNAProblem_TranscriptLenMore

@ eMRNAProblem_ProductReplaced

@ eMRNAProblem_ErroneousException

@ eMRNAProblem_PolyATail100

const int InferenceAccessionCutoff


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4