A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/sequence__tests_8cpp_source.html below:

NCBI C++ ToolKit: src/misc/discrepancy/sequence_tests.cpp Source File

81  if

(bioseq.

IsAa

()) {

85  auto

bsh =

context

.GetBioseqHandle(bioseq);

87

sequence::CDeflineGenerator deflineGenerator;

88  auto

defline = deflineGenerator.GenerateDefline(bsh, 0);

91  for

(

const auto

& desc :

context

.GetSeqdesc()) {

93

m_Objs[defline].Add(*

context

.SeqdescObjRef(desc));

98

m_Objs[defline].Add(*

context

.BioseqObjRef());

106  if

(m_Objs.empty()) {

109  bool

all_unique =

true

;

111  for

(

auto

& it : m_Objs.GetMap()) {

113  if

(list.size() == 1) {

116  else if

(list.size() > 1) {

125

m_ReportItems =

tmp

.Export(*this)->GetSubitems();

137

m_Objs[

"[n] sequence[s] [has] terminal Ns"

].Fatal().Add(*

context

.BioseqObjRef());

151

m_Objs[

"[n] protein sequences are shorter than 50 aa."

].Add(*

context

.BioseqObjRef(),

false

);

161  for

(

const auto

& desc :

context

.GetSeqdesc()) {

162  if

(desc.IsComment()) {

163

m_Objs[desc.GetComment()].Add(*

context

.SeqdescObjRef(desc));

171  if

(!m_Objs.empty()) {

173  string label

= m_Objs.GetMap().size() == 1 ?

"[n] comment descriptor[s] were found (all same)"

:

"[n] comment descriptor[s] were found (some different)"

;

174  for

(

auto

it : m_Objs.GetMap()) {

175  for

(

auto

obj : it.second->GetObjects()) {

186 DISCREPANCY_CASE

(MRNA_ON_WRONG_SEQUENCE_TYPE, SEQUENCE,

eDisc

|

eOncaller

,

"Eukaryotic sequences that are not genomic or macronuclear should not have mRNA features"

)

204

m_Objs[

"[n] mRNA[s] [is] located on eukaryotic sequence[s] that [does] not have genomic or plasmid source[s]"

].Add(*

context

.SeqFeatObjRef(*feat));

218  bool

has_gaps = !!sum.

Gaps

;

221  for

(

auto

it : bioseq.

GetAnnot

()) {

222  if

(it->IsFtable()) {

249  for

(

const auto

& desc :

context

.GetAllSeqdesc()) {

253  for

(

const auto

& user_field : user.

GetData

()) {

254  if

(user_field->IsSetLabel() && user_field->GetLabel().IsStr() && user_field->GetLabel().GetStr() ==

"BioProject"

&& user_field->IsSetData() && user_field->GetData().IsStrs()) {

256  if

(!strs.empty() && !strs[0].empty()) {

257

m_Objs[

"[n] sequence[s] contain[S] BioProject IDs"

].Add(*

context

.BioseqObjRef());

275  for

(

const auto

& desc :

context

.GetAllSeqdesc()) {

279  for

(

const auto

& user_field : user.

GetData

()) {

280  if

(user_field->IsSetLabel() && user_field->GetLabel().IsStr() && user_field->GetLabel().GetStr() ==

"Sequence Read Archive"

&& user_field->IsSetData() && user_field->GetData().IsStrs()) {

282  if

(!strs.empty() && !strs[0].empty()) {

283

m_Objs[

"[n] sequence[s] contain[S] Sequence Read Archive IDs"

].Add(*

context

.BioseqObjRef());

301

m_Objs[

"[n] bioseq[s] [has] no definition line"

].Add(*

context

.BioseqObjRef());

314  if

(sum.

MaxN

> 14) {

315

m_Objs[

"[n] sequence[s] [has] runs of 15 or more Ns"

].Add(*

context

.BioseqObjRef());

329

m_Objs[

"[n] sequence[s] [has] external references"

].Add(*

context

.BioseqObjRef());

339  const double

MIN_N_PERCENTAGE = 10.0;

344  if

(!sum.

HasRef

&& sum.

N

* 100. / sum.

Len

> MIN_N_PERCENTAGE) {

345

m_Objs[

"[n] sequence[s] [has] > 10% Ns"

].Add(*

context

.BioseqObjRef());

356  for

(

const auto

& feat :

context

.GetFeat()) {

361

m_Objs[

key

+

": [n] present"

].Info().Incr();

370  for

(

const auto

& feat :

context

.GetAllFeat()) {

375  key

= to_string(feat.GetData().GetSubtype()) +

" "

+

key

;

378

m_Objs[

kEmptyCStr

][na ?

"N"

:

"A"

].Add(*rep);

386  for

(

auto

& it : m_Objs[

kEmptyCStr

].GetMap()) {

387  if

(it.first ==

"N"

|| it.first ==

"A"

) {

390  size_t n

= it.first.find(

' '

);

391  string key

= it.first.substr(

n

+ 1);

393  string label

=

key

+

": [n] present"

;

405  for

(

auto

& obj : m_Objs[

kEmptyStr

][it.first].GetObjects()) {

408  for

(

auto

& pp : obj2num) {

409

m_Objs[

label

][

"[n] bioseq[s] [has] [(]"

+ to_string(pp.second) +

"[)] "

+

key

+

" features"

].Info().Add(*pp.first);

424  if

(

context

.FeatExons().size()) {

478  if

(m_Objs.empty()) {

489  size_t

num_of_missing = 0,

492  for

(

auto

it : the_map) {

493

num_of_bioseqs += it.second->GetObjects().

size

();

494  if

(it.first.empty()) {

495

num_of_missing += it.second->GetObjects().size();

501  else if

(tech != it.first) {

506  if

(num_of_missing == num_of_bioseqs || (same && !num_of_missing)) {

509

summary += num_of_missing ?

"some missing, "

:

"all present, "

;

510

summary += same ?

"all same)"

:

"some different)"

;

511  if

(num_of_missing) {

512  if

(num_of_missing == num_of_bioseqs) {

513

report[summary].

SetCount

(num_of_missing);

528  return

(ch ==

'A'

|| ch ==

'T'

|| ch ==

'G'

|| ch ==

'C'

);

534  static const size_t

MIN_TITLE_SEQ_LEN = 19;

537  for

(string::const_reverse_iterator it = title.rbegin(); it != title.rend(); ++it) {

544  if

(

count

>= MIN_TITLE_SEQ_LEN) {

549  return count

>= MIN_TITLE_SEQ_LEN;

555  for

(

auto

& desc :

context

.GetSeqdesc()) {

557

m_Objs[

"[n] defline[s] appear[S] to end with sequence characters"

].Add(*

context

.SeqdescObjRef(desc));

567  bool

is_dna =

false

;

568  bool

is_genomic =

false

;

573  auto

molinfo =

context

.GetMolinfo();

577  if

(!is_genomic || !is_dna) {

580  for

(

auto

& annot_it : bioseq.

GetAnnot

()) {

581  if

(annot_it->IsFtable()) {

589  if

(feat->IsSetData()) {

608  if

(descrs.

IsSet

()) {

609  for

(

auto

descr : descrs.

Set

()) {

610  if

(descr->IsMolinfo()) {

611

molinfo = &(descr->SetMolinfo());

616  if

(molinfo ==

nullptr

) {

619

descrs.

Set

().push_back(new_descr);

621  if

(molinfo ==

nullptr

) {

668  const string

& object_name,

669  const string

& field_prefix =

kEmptyStr

)

674  for

(

auto

& z : obj.second->GetMap()) {

675

collector[field_prefix + z.first][

" [n] "

+ object_name +

"[s] [is] missing field "

+ field_prefix + z.first]

686  if

(

f

->IsSetLabel() &&

f

->GetLabel().IsStr() &&

f

->IsSetData()) {

687  string

field_name = field_prefix +

f

->GetLabel().GetStr();

689  if

(already_seen && !collector.

Exist

(field_name)) {

691  string

missing_label =

"[n] "

+ object_name +

"[s] [is] missing field "

+ field_name;

695

collector[field_name][missing_label].

Add

(*ro);

698

collector[field_name][

"[n] "

+ object_name +

"[s] [has] field "

+ field_name +

" value '"

+

GetFieldValueAsString

(*

f

) +

"'"

].

Add

(*

context

.SeqdescObjRef(*desc),

false

);

707

collector[field_prefix + z.first][

" [n] "

+ object_name +

"[s] [is] missing field "

+ field_prefix + z.first].

Add

(*

context

.SeqdescObjRef(*desc));

725  auto

rep_seq =

context

.BioseqObjRef();

726  for

(

auto

& desc :

context

.GetAllSeqdesc()) {

747  size_t

num_values = 0;

750  for

(

auto

& s : node.

GetMap

()) {

752

all_present =

false

;

757  value

= s.first.substr(pos);

765  if

(num_values > 1) {

780  for

(

auto

& s : node.

GetMap

()) {

781  bool

this_present =

true

;

782  bool

this_same =

true

;

784

all_present &= this_present;

785

all_same &= this_same;

786  if

(!all_present && !all_same) {

795  string

summary =

"("

;

797

summary +=

"all present"

;

799

summary +=

"some missing"

;

803

summary +=

"all same"

;

805

summary +=

"inconsistent"

;

814  for

(

auto

& s : original.

GetMap

()){

815  for

(

auto

q : s.second->GetObjects()) {

816

new_home[s.first].

Add

(*q);

827  if

(

NStr::Equal

(orig_field_name,

"BioSample"

)) {

828  return " "

+ orig_field_name;

829

}

else if

(

NStr::Equal

(orig_field_name,

"ProbeDB"

)) {

830  return " "

+ orig_field_name;

831

}

else if

(

NStr::Equal

(orig_field_name,

"Sequence Read Archive"

)) {

832  return " "

+ orig_field_name;

833

}

else if

(

NStr::Equal

(orig_field_name,

"BioProject"

)) {

834  return " "

+ orig_field_name;

835

}

else if

(

NStr::Equal

(orig_field_name,

"Assembly"

)) {

836  return " "

+ orig_field_name;

838  return

orig_field_name;

847  if

(m_Objs.empty()) {

852  bool

all_present =

true

;

853  bool

all_same =

true

;

855  if

(all_present && all_same) {

859  string

top_label =

"DBLink Report "

+

GetSummaryLabel

(all_present, all_same);

862  while

(it != m_Objs.GetMap().end()) {

865  CopyNode

(m_Objs[top_label][

" "

+ it->first], *it->second);

866

it = m_Objs.GetMap().erase(it);

873  bool

this_present =

true

;

874  bool

this_same =

true

;

877  for

(

auto

& s : it2.second->GetMap()){

878  for

(

auto

& q : s.second->GetObjects()) {

879

m_Objs[top_label][new_label][s.first].Add(*q);

901  auto

rep_seq =

context

.BioseqObjRef();

902  for

(

auto

& desc :

context

.GetAllSeqdesc()) {

918

m_Objs[

"[n] Bioseq[s] [is] missing "

+ it.first +

" structured comment"

].Add(*rep_seq);

926

m_Objs[

"[n] Bioseq[s] [is] missing "

+ it.first +

" structured comment"

].Add(*ro);

948  if

(m_Objs.empty()) {

953  bool

all_present =

true

;

954  bool

all_same =

true

;

956  if

(all_present && all_same) {

960  string

top_label =

"Structured Comment Report "

+

GetSummaryLabel

(all_present, all_same);

963  while

(it != m_Objs.GetMap().end()) {

966  CopyNode

(m_Objs[top_label][

" "

+ it->first], *it->second);

967

it = m_Objs.GetMap().erase(it);

974  bool

this_present =

true

;

975  bool

this_same =

true

;

977  string

new_label = it2.first +

" "

+

GetSummaryLabel

(this_present, this_same);

978  for

(

auto

& s : it2.second->GetMap()) {

979  string

sub_label = s.first;

980  if

(this_present && this_same) {

983  for

(

auto

& q : s.second->GetObjects()) {

984

m_Objs[top_label][new_label][sub_label].Add(*q);

1000  for

(

auto

& desc :

context

.GetAllSeqdesc()) {

1001  if

(!desc.IsUser()) {

1017

m_Objs[

"[n] Assembly Name[s] in Genome Assembly Structured Comment"

].Add(*

context

.BioseqObjRef());

1030  for

(

auto

& desc :

context

.GetAllSeqdesc()) {

1031  if

(desc.IsUser()) {

1038

m_Objs[

"[n] sequence[s] [does] not include structured comments."

].Add(*

context

.BioseqObjRef());

1049  for

(

auto

& desc :

context

.GetAllSeqdesc()) {

1050  if

(desc.IsUser()) {

1054  for

(

auto

& it : user.

GetData

()) {

1055  if

(it->IsSetLabel() && it->GetLabel().IsStr() &&

NStr::Equal

(it->GetLabel().GetStr(),

"BioProject"

)) {

1066

m_Objs[

"[n] sequence[s] [does] not include project."

].Add(*

context

.BioseqObjRef());

1077  for

(

auto

& desc :

context

.GetAllSeqdesc()) {

1078  if

(desc.IsUser()) {

1081

m_Objs[

"[n] sequence[s] [is] unverified"

].Add(*

context

.BioseqObjRef(),

false

);

1111

m_Objs[

"[n] sequence[s] contain[S] nucleotides that are not ATCG or N"

].Add(*

context

.BioseqObjRef());

1127  if

(

source

&&

source

->IsSource() &&

source

->GetSource().IsSetOrg() &&

source

->GetSource().GetOrg().IsSetTaxname() && title) {

1128  string

taxname =

source

->GetSource().GetOrg().GetTaxname();

1135  bool

no_taxname_in_defline =

false

;

1137  if

(taxname_pos ==

NPOS

) {

1138

no_taxname_in_defline =

true

;

1142

no_taxname_in_defline =

NStr::CompareCase

(title->

GetTitle

().c_str() + taxname_pos, 1, taxname.size() - 1, taxname.c_str() + 1) != 0;

1144

no_taxname_in_defline =

true

;

1147  if

(no_taxname_in_defline) {

1161  for

(

auto

field: user.

GetData

()) {

1162  if

(field->IsSetData() && field->GetData().IsInt() && field->IsSetLabel() && field->GetLabel().IsStr() && field->GetLabel().GetStr() ==

"ProjectID"

) {

1175  for

(

auto

& desc :

context

.GetAllSeqdesc()) {

1176  if

(desc.IsUser()) {

1180  if

(!proj_id.empty()) {

1181

m_Objs[proj_id][bioseq.

IsNa

() ?

"N"

:

"A"

].Add(*

context

.BioseqObjRef());

1192  if

(m_Objs.empty()) {

1196  string all

=

"[n] sequence[s] [has] project IDs "

;

1197  string

prots =

"[n] protein sequence[s] [has] project IDs "

;

1198  string

nucs =

"[n] nucleotide sequence[s] [has] project IDs "

;

1199  auto

& projects = m_Objs.GetMap();

1200  all

+= projects.size() > 1 ?

"(some different)"

:

"(all same)"

;

1201  size_t

count_prots = 0;

1202  size_t

count_nucs = 0;

1203  for

(

auto

it: projects) {

1204  auto

&

M

= it.second->GetMap();

1205  if

(

M

.find(

"A"

) !=

M

.end()) {

1208  if

(

M

.find(

"N"

) !=

M

.end()) {

1212

prots += count_prots > 1 ?

"(some different)"

:

"(all same)"

;

1213

nucs += count_nucs > 1 ?

"(some different)"

:

"(all same)"

;

1214  for

(

auto

it : projects) {

1215  auto

&

M

= it.second->GetMap();

1216  if

(

M

.find(

"A"

) !=

M

.end()) {

1218

res[

all

][prots].

Add

(*obj);

1221  if

(

M

.find(

"N"

) !=

M

.end()) {

1223

res[

all

][nucs].

Add

(*obj);

1238  auto

& cds =

context

.FeatCDS();

1239  if

(cds.size() < 2) {

1242  size_t

count_pseudo = 0;

1243  size_t

count_disrupt = 0;

1244  for

(

auto

feat : cds) {

1245  if

(feat->IsSetComment() &&

NStr::Find

(feat->GetComment(),

"coding region disrupted by sequencing gap"

) !=

NPOS

) {

1248  if

(

context

.IsPseudo(*feat)) {

1252  if

(count_disrupt != cds.size() && count_pseudo != cds.size()) {

1253

m_Objs[

"[n] mRNA bioseq[s] [has] multiple CDS features"

].Add(*

context

.BioseqObjRef());

1267  auto

& cds =

context

.FeatCDS();

1268  size_t

count_plus = 0;

1269  size_t

count_minus = 0;

1270  for

(

auto

& feat : cds) {

1295

vector<CSeq_feat*> features;

1297  for

(; feat_ci; ++feat_ci) {

1302

new_inst->Assign(bioseq.

GetInst

());

1306  for

(

auto

& feat : features) {

1319  const size_t

MAX_N_IN_SEQ = 7;

1323  if

(sum.

MinQ

> MAX_N_IN_SEQ) {

1324

m_Objs[

"[n] sequence[s] contain[S] low quality region"

].Add(*

context

.BioseqObjRef());

1335  if

(

set

.IsSetDescr()) {

1336  for

(

const auto

& descr :

set

.GetDescr().Get()) {

1337  if

(descr->IsTitle()) {

1338

m_Objs[

"[n] title[s] on sets were found"

].Add(*

context

.SeqdescObjRef(*descr));

1347

m_ReportItems = m_Objs.Export(*

this

,

false

)->GetSubitems();

1358  bool

has_D_loop =

false

;

1359  bool

has_misc_feat_with_control_region =

false

;

1360  for

(

auto

& feat :

all

) {

1361  if

(feat->IsSetData()) {

1367  if

(feat->IsSetComment() &&

NStr::FindNoCase

(feat->GetComment(),

"control region"

) !=

NPOS

) {

1368

has_misc_feat_with_control_region =

true

;

1374  if

(has_D_loop || has_misc_feat_with_control_region) {

1375

m_Objs[

"[n] bioseq[s] [has] D-loop or control region misc_feature, but [is] do not have mitochondrial source"

].Add(*

context

.BioseqObjRef(

CDiscrepancyContext::eFixSet

));

1383

m_ReportItems = m_Objs.Export(*

this

,

false

)->GetSubitems();

1388 static bool

FixGenome(

const CBioseq

& bioseq,

CScope

& scope)

1433

m_Objs[

"[n] sequence[s] [is] shorter than 50 nt"

].Add(*

context

.BioseqObjRef());

1452  for

(

auto

& annot_it : bioseq.

GetAnnot

()) {

1453  if

(annot_it->IsFtable()) {

1458

m_Objs[

"[n] contig[s] [is] shorter than 200 nt"

].Add(*

context

.BioseqObjRef(fix));

1465

m_ReportItems = m_Objs.Export(*

this

,

false

)->GetSubitems();

1487

m_Objs[

"[n] RNA bioseq[s] [is] proviral"

].Add(*

context

.BioseqObjRef());

1495

m_ReportItems = m_Objs.Export(*

this

,

false

)->GetSubitems();

1507  if

((bio_src.*is_set_fn)()) {

1508  if

(

val

.empty()) {

1509  val

= (bio_src.*get_fn)();

1511  else if

(

val

!= (bio_src.*get_fn)()) {

1524  if

(

mod

->IsSetSubtype() &&

mod

->GetSubtype() == subtype &&

mod

->IsSetSubname()) {

1525  if

(

val

.empty()) {

1526  val

=

mod

->GetSubname();

1529  if

(

mod

->GetSubname() !=

val

) {

1545  for

(

const auto

& subtype : bio_src.

GetSubtype

()) {

1560  string

taxname, isolate, strain;

1561  bool

all_taxname_same =

true

, all_isolate_same =

true

, all_strain_same =

true

;

1562  for

(

auto

& descr_bio_src :

context

.GetSetBiosources()) {

1563  const CBioSource

& bio_src = descr_bio_src->GetSource();

1564  if

(

context

.HasLineage(bio_src,

""

,

"Viruses"

)) {

1566

m_Objs[

"[n] biosource[s] should have segment qualifier but [does] not"

].Add(*

context

.SeqdescObjRef(*descr_bio_src));

1569  if

(all_taxname_same) {

1572  if

(all_isolate_same) {

1575  if

(all_strain_same) {

1579  if

(!all_taxname_same) {

1580

m_Objs[

"Not all biosources have same taxname"

];

1582  if

(!all_isolate_same) {

1583

m_Objs[

"Not all biosources have same isolate"

];

1585  if

(!all_strain_same) {

1586

m_Objs[

"Not all biosources have same strain"

];

1594

m_ReportItems = m_Objs.Export(*

this

,

false

)->GetSubitems();

1604  for

(

auto

& qual : feat.

GetQual

()) {

1626  for

(

auto

& feat :

context

.GetFeat()) {

1636  if

(

set

.IsSetClass()) {

1641

m_Objs[

"[n] unwanted set wrapper[s]"

].Add(*

context

.BioseqSetObjRef());

1658

{

"Agricultutral"

,

"agricultural"

,

false

},

1659

{

"Bacilllus"

,

"Bacillus"

,

false

},

1660

{

"Enviromental"

,

"Environmental"

,

false

},

1661

{

"Insitiute"

,

"institute"

,

false

},

1662

{

"Instutite"

,

"institute"

,

false

},

1663

{

"Instutute"

,

"Institute"

,

false

},

1664

{

"P.R.Chian"

,

"P.R. China"

,

false

},

1665

{

"PRChian"

,

"PR China"

,

false

},

1666

{

"Scieces"

,

"Sciences"

,

false

},

1667

{

"agricultral"

,

"agricultural"

,

false

},

1668

{

"agriculturral"

,

"agricultural"

,

false

},

1669

{

"biotechnlogy"

,

"biotechnology"

,

false

},

1670

{

"Biotechnlogy"

,

"Biotechnology"

,

false

},

1671

{

"biotechnolgy"

,

"biotechnology"

,

false

},

1672

{

"biotechology"

,

"biotechnology"

,

false

},

1673

{

"caputre"

,

"capture"

,

true

},

1674

{

"casette"

,

"cassette"

,

true

},

1675

{

"catalize"

,

"catalyze"

,

false

},

1676

{

"charaterization"

,

"characterization"

,

false

},

1677

{

"clonging"

,

"cloning"

,

false

},

1678

{

"consevered"

,

"conserved"

,

false

},

1679

{

"cotaining"

,

"containing"

,

false

},

1680

{

"cytochome"

,

"cytochrome"

,

true

},

1681

{

"diveristy"

,

"diversity"

,

true

},

1682

{

"enivronment"

,

"environment"

,

false

},

1683

{

"enviroment"

,

"environment"

,

false

},

1684

{

"genone"

,

"genome"

,

true

},

1685

{

"homologue"

,

"homolog"

,

true

},

1686

{

"hypotethical"

,

"hypothetical"

,

false

},

1687

{

"hypotetical"

,

"hypothetical"

,

false

},

1688

{

"hypothetcial"

,

"hypothetical"

,

false

},

1689

{

"hypothteical"

,

"hypothetical"

,

false

},

1690

{

"indepedent"

,

"independent"

,

false

},

1691

{

"insititute"

,

"institute"

,

false

},

1692

{

"insitute"

,

"institute"

,

false

},

1693

{

"institue"

,

"institute"

,

false

},

1694

{

"instute"

,

"institute"

,

false

},

1695

{

"muesum"

,

"museum"

,

true

},

1696

{

"musuem"

,

"museum"

,

true

},

1697

{

"nuclear shutting"

,

"nuclear shuttling"

,

true

},

1698

{

"phylogentic"

,

"phylogenetic"

,

false

},

1699

{

"protien"

,

"protein"

,

false

},

1700

{

"puatative"

,

"putative"

,

false

},

1701

{

"putaitve"

,

"putative"

,

false

},

1702

{

"putaive"

,

"putative"

,

false

},

1703

{

"putataive"

,

"putative"

,

false

},

1704

{

"putatitve"

,

"putative"

,

false

},

1705

{

"putatuve"

,

"putative"

,

false

},

1706

{

"putatvie"

,

"putative"

,

false

},

1707

{

"pylogeny"

,

"phylogeny"

,

false

},

1708

{

"resaerch"

,

"research"

,

false

},

1709

{

"reseach"

,

"research"

,

false

},

1710

{

"reserach"

,

"research"

,

true

},

1711

{

"reserch"

,

"research"

,

false

},

1712

{

"ribosoml"

,

"ribosomal"

,

false

},

1713

{

"ribossomal"

,

"ribosomal"

,

false

},

1714

{

"scencies"

,

"sciences"

,

false

},

1715

{

"scinece"

,

"science"

,

false

},

1716

{

"simmilar"

,

"similar"

,

false

},

1717

{

"structual"

,

"structural"

,

false

},

1718

{

"subitilus"

,

"subtilis"

,

false

},

1719

{

"sulfer"

,

"sulfur"

,

false

},

1720

{

"technlogy"

,

"technology"

,

false

},

1721

{

"technolgy"

,

"technology"

,

false

},

1722

{

"Technlogy"

,

"Technology"

,

false

},

1723

{

"Veterinry"

,

"Veterinary"

,

false

},

1724

{

"Argricultural"

,

"Agricultural"

,

false

},

1725

{

"transcirbed"

,

"transcribed"

,

false

},

1726

{

"transcirption"

,

"transcription"

,

true

},

1727

{

"uiniversity"

,

"university"

,

false

},

1728

{

"uinversity"

,

"university"

,

false

},

1729

{

"univercity"

,

"university"

,

false

},

1730

{

"univerisity"

,

"university"

,

false

},

1731

{

"univeristy"

,

"university"

,

false

},

1732

{

"univesity"

,

"university"

,

false

},

1733

{

"unversity"

,

"university"

,

true

},

1734

{

"uviversity"

,

"university"

,

false

},

1735

{

"anaemia"

,

nullptr

,

false

},

1736

{

"haem"

,

nullptr

,

false

},

1737

{

"haemagglutination"

,

nullptr

,

false

},

1738

{

"heam"

,

nullptr

,

false

},

1739

{

"mithocon"

,

nullptr

,

false

},

1747 #include "FLATFILE_FIND.inc" 1748  static

constexpr TLocalFSM s_FSM{s_compact, s_hits_init_1, s_hits_init_2, s_states,

nullptr

};

1760  string error

=

"String not found: "

;

1773  "FLATFILE_FIND_ONCALLER"

,

1774  "FLATFILE_FIND_ONCALLER_UNFIXABLE"

,

1775  "FLATFILE_FIND_ONCALLER_FIXABLE" 1778  static const string

kFixable =

"Fixable"

;

1779  static const string

kNonFixable =

"Non-fixable"

;

1782  for

(

auto

& desc :

context

.GetAllSeqdesc()) {

1789  string

subitem =

string

(

"[n] object[s] contain[S] "

) +

kSpellFixes

[

i

].m_misspell;

1790  bool

autofix =

kSpellFixes

[

i

].m_correct !=

nullptr

;

1791  const string

& fixable = (autofix ? kFixable : kNonFixable);

1792

m_Objs[fixable][subitem].Add(*

context

.SeqdescObjRef(desc, &desc));

1796  for

(

auto

& feat:

context

.FeatAll()) {

1803  string

subitem =

string

(

"[n] object[s] contain[S] "

) +

kSpellFixes

[

i

].m_misspell;

1804  bool

autofix =

kSpellFixes

[

i

].m_correct !=

nullptr

;

1805  const string

& fixable = (autofix ? kFixable : kNonFixable);

1806

m_Objs[fixable][subitem].Add(*

context

.SeqFeatObjRef(*feat, feat));

1815

m_ReportItems = m_Objs.Export(*

this

,

false

)->GetSubitems();

1884  if

(m_Objs.GetMap().find(

kEmptyStr

) == m_Objs.GetMap().end()) {

1886

m_Objs[

"No sequences longer than 20,000 nt found"

];

1891

m_ReportItems = m_Objs.Export(*

this

,

false

)->GetSubitems();

1900  if

(m_Objs[

"N"

].GetCount()) {

1908

m_Objs[

"C"

].Incr();

1909  if

(!m_Objs[

"F"

].GetCount()) {

1911  for

(

auto id

: bioseq.

GetId

()) {

1916

m_Objs[

"F"

].Incr();

1923  for

(

const auto

& descr : bioseq.

GetDescr

().

Get

()) {

1924  if

(descr->IsMolinfo() && descr->GetMolinfo().CanGetTech()) {

1926

m_Objs[

"F"

].Incr();

1935

m_Objs[

"N"

].Incr();

1944  if

(m_Objs[

"C"

].GetCount() && !m_Objs[

"N"

].GetCount()) {

1956  static

constexpr

auto

suspicious_id_re =

ctll::fixed_string

{

"chromosome|plasmid|mito|chloroplast|apicoplast|plastid|^chr|^lg|\\bnw_|\\bnz_|\\bnm_|\\bnc_|\\bac_|cp\\d\\d\\d\\d\\d\\d|^x$|^y$|^z$|^w$|^mt$|^pltd$|^chl$"

};

1957  return

ctre::search<suspicious_id_re, ctre::case_insensitive>(s);

1964  bool

report =

false

;

1965  for

(

const auto

&

id

: bioseq.

GetId

()) {

1966  if

(id->IsLocal()) {

1967  if

(id->GetLocal().IsStr() &&

SuspiciousId

(id->GetLocal().GetStr())) {

1972  else if

(id->IsGeneral()) {

1973  if

(id->GetGeneral().IsSetDb() &&

SuspiciousId

(id->GetGeneral().GetDb())) {

1977  if

(id->GetGeneral().IsSetTag() && id->GetGeneral().GetTag().IsStr() &&

SuspiciousId

(id->GetGeneral().GetTag().GetStr())) {

1984

m_Objs[

"[n] sequence[s] [has] suspicious identifiers"

].Add(*

context

.BioseqSetObjRef());

1992

m_ReportItems = m_Objs.Export(*

this

,

false

)->GetSubitems();

2031  if

(

set

.IsSetSeq_set()) {

2032  for

(

const auto

& se :

set

.GetSeq_set()) {

2033  if

(!se->IsSetDescr()) {

2037  for

(

const auto

& descr : se->GetDescr().Get()) {

2038  if

(!descr->IsSource()) {

2041  const CBioSource

& bio_src = descr->GetSource();

2053  for

(

const auto

& subtype : bio_src.

GetSubtype

()) {

2055  if

(subtype->IsSetSubtype()) {

2059

m_Objs[

"one or more chromosomes are present"

];

2064

m_Objs[

"one or more chromosomes are present"

];

2078  if

(

set

.IsSetSeq_set()) {

2079  for

(

const auto

& se :

set

.GetSeq_set()) {

2080  if

(!se->IsSetDescr()) {

2084  for

(

const auto

& descr : se->GetDescr().Get()) {

2085  if

(!descr->IsSource()) {

2088  const CBioSource

& bio_src = descr->GetSource();

2094  switch

( Location ) {

2106

m_Objs[

"one or more organelles are present"

];

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

const string & GetTaxname(void) const

bool IsSetOrgMod(void) const

const COrgName & GetOrgname(void) const

bool IsSetTaxname(void) const

TSeqPos GetLength(void) const

bool IsSetLength(void) const

void Search(const char *input, VoidCall1 found_callback) const

virtual vector< CRef< CReportItem > > GetSubitems() const =0

static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)

TReportObjectList & GetObjects()

CReportNode & Severity(CReportItem::ESeverity s)

CRef< CReportItem > Export(CDiscrepancyCore &test, bool unique=true) const

static bool Exist(TReportObjectSet &hash, CReportObj &obj)

static EFeatureLocationAllowed AllowedFeatureLocation(ESubtype subtype)

@ eFeatureLocationAllowed_NucOnly

@ eFeatureLocationAllowed_ProtOnly

@ eFeatureLocationAllowed_Any

ESubtype GetSubtype(void) const

@Seq_descr.hpp User-defined methods of the data storage class.

CSeq_feat_EditHandle –.

namespace ncbi::objects::

static bool IsAa(EMol mol)

static bool IsNa(EMol mol)

Base class for all serializable objects.

Template class for iteration on objects of class C (non-medifiable version)

Template class for iteration on objects of class C.

CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const

@ eObjectType_StructuredComment

EObjectType GetObjectType() const

container_type::iterator iterator

API (CDeflineGenerator) for computing sequences' titles ("definitions").

vector< CRef< CReportObj > > TReportObjectList

#define DISCREPANCY_AUTOFIX(name)

#define DISCREPANCY_CASE1(name, type, group, descr,...)

#define DISCREPANCY_CASE0(name, sname, type, group, descr)

#define DISCREPANCY_CASE(name, type, group, descr)

#define DISCREPANCY_SUMMARIZE(name)

vector< CConstRef< CObject > > GetObjects(CSeq_entry_Handle seh, const string &field, CFieldNamePanel::EFieldType field_type, int subtype, const string &ncRNA_class, CConstRef< objects::CSeq_submit > submit, CRef< CEditingActionConstraint > constraint, vector< CSeq_entry_Handle > *descr_context=nullptr)

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

static const char * str(char *buf, int n)

void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)

Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...

EAccessionInfo

For IdentifyAccession (below)

CBioseq_Handle GetBioseqHandle(const CSeq_id &id)

Get bioseq handle by seq-id.

void SetDescr(TDescr &v) const

void SetInst_Mol(TInst_Mol v) const

const CSeqFeatData & GetData(void) const

void Remove(void) const

Remove the feature from Seq-annot.

void SetInst(TInst &v) const

void Remove(ERemoveMode mode=eRemoveSeq_entry) const

bool IsSetData(void) const

const TInst & GetInst(void) const

const CSeq_feat & GetMappedFeature(void) const

Feature mapped to the master sequence.

CConstRef< CSeq_feat > GetSeq_feat(void) const

Get current seq-feat.

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

NCBI_NS_STD::string::size_type SIZE_TYPE

static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)

Find the pattern in the specified range of a string using a case insensitive search.

static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)

Check if a string is blank (has no text).

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)

Check if a string starts with a specified prefix value.

static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive equality of a substring with another string.

static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)

Test for equality of a substring with another string.

static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

const char *const kEmptyCStr

Empty "C" string (points to a '\0').

static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-sensitive compare of a substring with another string.

@ eNocase

Case insensitive compare.

static const char label[]

const TSubtype & GetSubtype(void) const

Get the Subtype member data.

TGenome GetGenome(void) const

Get the Genome member data.

bool IsSetOrg(void) const

Check if a value has been assigned to Org data member.

bool IsSetSubtype(void) const

Check if a value has been assigned to Subtype data member.

void SetGenome(TGenome value)

Assign a value to Genome data member.

bool IsSetGenome(void) const

Check if a value has been assigned to Genome data member.

EGenome

biological context

const TStr & GetStr(void) const

Get the variant data.

bool IsSetData(void) const

the object itself Check if a value has been assigned to Data data member.

bool IsStr(void) const

Check if variant Str is selected.

bool IsSetType(void) const

type of object within class Check if a value has been assigned to Type data member.

bool IsStrs(void) const

Check if variant Strs is selected.

const TStrs & GetStrs(void) const

Get the variant data.

const TData & GetData(void) const

Get the Data member data.

bool IsStr(void) const

Check if variant Str is selected.

const TStr & GetStr(void) const

Get the variant data.

const TData & GetData(void) const

Get the Data member data.

const TType & GetType(void) const

Get the Type member data.

vector< CStringUTF8 > TStrs

const TMod & GetMod(void) const

Get the Mod member data.

bool IsSetData(void) const

the specific data Check if a value has been assigned to Data data member.

bool IsSetQual(void) const

qualifiers Check if a value has been assigned to Qual data member.

const TQual & GetQual(void) const

Get the Qual member data.

const TData & GetData(void) const

Get the Data member data.

bool IsSetAccession(void) const

Check if a value has been assigned to Accession data member.

const TAccession & GetAccession(void) const

Get the Accession member data.

@ eClass_pop_set

population study

@ eClass_phy_set

phylogenetic study

@ eClass_mut_set

set of mutations

@ eClass_eco_set

ecological sample study

@ eClass_small_genome_set

viral segments or mitochondrial minicircles

TRepr GetRepr(void) const

Get the Repr member data.

bool IsSetCompleteness(void) const

Check if a value has been assigned to Completeness data member.

const TUser & GetUser(void) const

Get the variant data.

const TInst & GetInst(void) const

Get the Inst member data.

TTopology GetTopology(void) const

Get the Topology member data.

bool IsSetAnnot(void) const

Check if a value has been assigned to Annot data member.

bool IsSetRepr(void) const

Check if a value has been assigned to Repr data member.

bool IsSetMol(void) const

Check if a value has been assigned to Mol data member.

bool CanGetTopology(void) const

Check if it is safe to call GetTopology method.

const TTitle & GetTitle(void) const

Get the variant data.

const TSource & GetSource(void) const

Get the variant data.

bool IsSetBiomol(void) const

Check if a value has been assigned to Biomol data member.

const TAnnot & GetAnnot(void) const

Get the Annot member data.

const TId & GetId(void) const

Get the Id member data.

TTech GetTech(void) const

Get the Tech member data.

const Tdata & Get(void) const

Get the member data.

bool IsSetInst(void) const

the sequence data Check if a value has been assigned to Inst data member.

TLength GetLength(void) const

Get the Length member data.

TMol GetMol(void) const

Get the Mol member data.

bool IsSetLength(void) const

length of sequence in residues Check if a value has been assigned to Length data member.

TSource & SetSource(void)

Select the variant.

bool IsSetDescr(void) const

descriptors Check if a value has been assigned to Descr data member.

bool IsSet(void) const

Check if a value has been assigned to data member.

TBiomol GetBiomol(void) const

Get the Biomol member data.

void SetBiomol(TBiomol value)

Assign a value to Biomol data member.

bool CanGetId(void) const

Check if it is safe to call GetId method.

bool IsSetTech(void) const

Check if a value has been assigned to Tech data member.

const TFtable & GetFtable(void) const

Get the variant data.

TCompleteness GetCompleteness(void) const

Get the Completeness member data.

const TData & GetData(void) const

Get the Data member data.

bool IsSetId(void) const

equivalent identifiers Check if a value has been assigned to Id data member.

Tdata & Set(void)

Assign a value to data member.

const TDescr & GetDescr(void) const

Get the Descr member data.

const TMolinfo & GetMolinfo(void) const

Get the variant data.

TMolinfo & SetMolinfo(void)

Select the variant.

bool CanGetInst(void) const

Check if it is safe to call GetInst method.

@ eRepr_delta

sequence made by changes (delta) to others

@ eCompleteness_complete

complete biological entity

@ eTech_targeted

targeted locus sets/studies

@ eTech_tsa

transcriptome shotgun assembly

@ eTech_wgs

whole genome shotgun sequencing

@ eBiomol_pre_RNA

precursor RNA of any sort really

@ e_Source

source of materials, includes Org-ref

where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all

void ReverseComplementFeature(CSeq_feat &feat, CScope &scope)

Simultaneous search of multiple RegEx patterns in the input string.

const struct ncbi::grid::netcache::search::fields::KEY key

const GenericPointer< typename T::ValueType > T2 value

const CharType(& source)[N]

const string kStructuredCommentReport

const string & kPreviouslySeenFields

static const string kMrnaSequenceMinusStrandFeatures

static bool IsSegmentSubtype(const CBioSource &bio_src)

static bool s_areCompatible(CBioSource::EGenome Location, CSubSource::ESubtype Qualifier)

void UnitTest_FLATFILE_FIND()

Checking that FLATFILE_FIND.inc is in sync with kSpellFixes If the array is changed,...

string AdjustDBLinkFieldName(const string &orig_field_name)

static const CSubSource::ESubtype eSubtype_unknown

static constexpr size_t kSpellFixesSize

static constexpr auto kSpellFixes

const string kMissingDBLink

const string kStructuredCommentObservedPrefixes

const string kSomeIdenticalDeflines

const string &(CBioSource::* FnGet)() const

string GetFieldValueAsString(const CUser_field &field)

const string & kPreviouslySeenObjects

static bool SuspiciousId(const string &s)

static const string kInconsistentMolinfoTech

void AddUserObjectFieldItems(const CSeqdesc *desc, CReportObj &rep_seq, CReportNode &collector, CReportNode &previously_seen, CDiscrepancyContext &context, const string &object_name, const string &field_prefix=kEmptyStr)

const string kDBLinkObjectList

static const string kInconsistentMolinfoTechSummary

const string kStructuredCommentObservedPrefixesThis

static bool IsATGC(char ch)

string GetSummaryLabel(bool all_present, bool all_same)

static const size_t MIN_SEQUENCE_LEN

const string & kPreviouslySeenFieldsThis

static bool EndsWithSequence(const string &title)

static bool FixTextInObject(CSerialObject *obj, size_t misspell_idx)

void AnalyzeFieldReport(CReportNode &node, bool &all_present, bool &all_same)

const string kSequencesWithGaps

const string kIdenticalDeflines

const string kDeflineExists

static void FindFlatfileText(const char *str, bool *result)

static bool IsMolProd(int biomol)

void AnalyzeField(CReportNode &node, bool &all_present, bool &all_same)

static bool CompareOrGetString(const CBioSource &bio_src, FnIsSet is_set_fn, FnGet get_fn, string &val)

void CopyNode(CReportNode &new_home, CReportNode &original)

const string kStructuredCommentPrevious

const string kNoTaxnameInDefline

static bool CompareOrgModValue(const CBioSource &bio_src, COrgMod::TSubtype subtype, string &val)

const string kStructuredCommentFieldPrefix

const string kDBLinkFieldCountTop

static string GetProjectID(const CUser_object &user)

const string kUniqueDeflines

const string kAllUniqueDeflines

static bool IsMicroSatellite(const CSeq_feat &feat)

const string kDBLinkCollect

bool(CBioSource::* FnIsSet)() const

const string kStructuredCommentsSeqs

static CS_CONTEXT * context


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4