(feat.IsSetPseudo() && feat.GetPseudo() && (feat.GetData().IsCdregion() || feat.GetData().IsRna())) {
73 if(gene && !
context.IsPseudo(*gene)) {
88 context.ReplaceSeq_feat(*obj, *sf, *new_feat);
115 const auto& rrna_name =
f.GetData().GetRna().GetRnaProductName();
117 for(
const auto& [rrna_type, min_length] : kRrnaMinLengths) {
119 if(pos !=
NPOS&&
len< min_length) {
130 if(feat.IsSetData() && ((! feat.IsSetPartial()) || (! feat.GetPartial())) &&
IsShortrRNA(feat, &(
context.GetScope()))) {
131m_Objs[
"[n] rRNA feature[s] [is] too short"].Add(*
context.SeqFeatObjRef(feat)).Fatal();
147 if(!
f.IsSetQual()) {
150 for(
const auto& it :
f.GetQual()) {
151 if(it->IsSetQual() &&
NStr::Equal(it->GetQual(),
"regulatory_class") &&
162 boolhas_genes =
false;
171 if(
IsRBS(feat) && !
context.GetGeneForFeature(feat)) {
172m_Objs[
"[n] RBS feature[s] [does] not have overlapping gene[s]"].Add(*
context.SeqFeatObjRef(feat)).Fatal();
188 f.GetData().IsCdregion() ||
189 f.GetData().IsRna() ||
204m_Objs[
"[n] feature[s] [has] no genes"].Add(*
context.SeqFeatObjRef(feat)).Fatal();
213 const string kExtraGene=
"[n] gene feature[s] [is] not associated with a CDS or RNA feature.";
214 const string kExtraPseudo=
"[n] pseudo gene feature[s] [is] not associated with a CDS or RNA feature.";
215 const string kExtraGeneNonPseudoNonFrameshift=
"[n] non-pseudo gene feature[s] are not associated with a CDS or RNA feature and [does] not have frameshift in the comment.";
219 for(
const auto& it : feat.
GetXref()) {
226 if(it->IsSetData() && it->GetData().IsGene()) {
227have_gene_ref =
true;
228 const CGene_ref& gene_ref = it->GetData().GetGene();
235&& (gene_ref.
IsSetLocus() || locus.empty())) {
247 const auto& genes =
context.FeatGenes();
250 if((gene->IsSetComment() && !gene->GetComment().empty()) || (gene->GetData().GetGene().IsSetDesc() && !gene->GetData().GetGene().GetDesc().empty())) {
253 const CSeq_loc& loc = gene->GetLocation();
256 if(feat->GetData().IsCdregion() || feat->GetData().IsRna()) {
257 const CSeq_loc& loc_f = feat->GetLocation();
260 boolhave_gene_ref =
false;
265 else if(!have_gene_ref) {
267 if(best_gene.
NotEmpty() && &*best_gene == &*gene) {
286 const auto& genes =
context.FeatGenes();
287 const auto& feats =
context.FeatAll();
288 for(
size_t i= 0;
i< genes.size();
i++) {
289 if(genes[
i]->IsSetPseudo() && genes[
i]->GetPseudo()) {
292 const CSeq_loc& loc_i = genes[
i]->GetLocation();
294 for(
size_tj = 0; j < feats.size(); j++) {
295 if(feats[j]->GetData().IsGene()) {
298 const CSeq_loc& loc_j = feats[j]->GetLocation();
303 if(genes[
i] ==
context.GetGeneForFeature(*feats[j])) {
309m_Objs[
"[n] gene feature[s] [is] not associated with any feature and [is] not pseudo."].Add(*
context.SeqFeatObjRef(*genes[
i]));
337 if(it->IsLiteral()) {
338 offset+= it->GetLiteral().GetLength();
339 if(!it->GetLiteral().IsSetSeq_data()) {
343 else if(it->GetLiteral().GetSeq_data().IsGap()) {
348 else if(it->IsLoc()) {
355 if(left >= last_gap_stop && left - last_gap_stop <= 3) {
356extend_len = left - last_gap_stop;
361 CSeqVectorsvec(seq, scope, CBioseq_Handle::CBioseq_Handle::eCoding_Iupac);
365 for(
unsigned i= 0;
i<
count;
i++) {
366 if(codon[
i] ==
'N') {
376svec.
GetSeqData(left - extend_len, left - extend_len + 3, codon);
378 if(codon ==
"CTA"|| codon ==
"TTA"|| codon ==
"TCA") {
383 if(codon ==
"TAG"|| codon ==
"TAA"|| codon ==
"TGA") {
397extend_len = seq.
GetLength() - right - 1;
405 if(it->IsLiteral()) {
406 if(!it->GetLiteral().IsSetSeq_data()) {
410 else if(it->GetLiteral().GetSeq_data().IsGap()) {
414 offset+= it->GetLiteral().GetLength();
416 else if(it->IsLoc()) {
419 if(
offset> right + 3) {
423 if(next_gap_start > right && next_gap_start - right - 1 <= 3) {
424extend_len = next_gap_start - right - 1;
429 CSeqVectorsvec(seq, scope, CBioseq_Handle::CBioseq_Handle::eCoding_Iupac);
433 for(
unsigned i= 0;
i<
count;
i++) {
434 if(codon[
i] ==
'N') {
444svec.
GetSeqData(right + extend_len - 3, right + extend_len, codon);
446 if(codon ==
"CTA"|| codon ==
"TTA"|| codon ==
"TCA") {
451 if(codon ==
"TAG"|| codon ==
"TAA"|| codon ==
"TGA") {
494 if(feat.IsSetData() && feat.GetData().IsCdregion() &&
IsNonExtendable(feat.GetLocation(), bioseq, &(
context.GetScope()))) {
495m_Objs[
"[n] feature[s] [has] partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so"].Add(*
context.SeqFeatObjRef(feat, &feat)).Fatal();
516 context.ReplaceSeq_feat(*obj, *sf, *new_feat);
526 DISCREPANCY_CASE(BACTERIAL_PARTIAL_NONEXTENDABLE_EXCEPTION, SEQUENCE,
eDisc|
eSubmitter|
eSmart,
"Find partial feature ends on bacterial sequences that cannot be extended but have exceptions: on when non-eukaryote")
535m_Objs[
"[n] feature[s] [has] partial ends that do not abut the end of the sequence or a gap, and cannot be extended by 3 or fewer nucleotides to do so, but [has] the correct exception"].Add(*
context.SeqFeatObjRef(feat));
552 if(feat.IsSetData() && feat.GetData().IsCdregion()) {
553 if(feat.IsSetPseudo() && feat.GetPseudo() ==
true&& !
context.IsRefseq())
continue;
554 booladd_this =
false;
561add_this = extend_len > 0 && extend_len <= 3;
571add_this = extend_len > 0 && extend_len <= 3;
576m_Objs[
"[n] feature[s] [has] partial ends that do not abut the end of the sequence or a gap, but could be extended by 3 or fewer nucleotides to do so"].Add(*
context.SeqFeatObjRef(feat,
CDiscrepancyContext::eFixSet)).Fatal();
597gene.
Reset(&gene_it->GetMappedFeature());
608new_gene->
Assign(*gene);
626 unsignedfr = (unsigned)frame - 1;
627fr = (fr + extend_len) % 3;
629new_feat->
SetData().SetCdregion().SetFrame() = frame;
648 unsignedfr = (unsigned)frame - 1;
649fr = (fr + extend_len) % 3;
651new_feat->
SetData().SetCdregion().SetFrame() = frame;
698 if(feat.IsSetData() && feat.GetData().IsCdregion() && !
context.IsPseudo(feat)) {
713 if(m_Objs.empty()) {
729 if(feat.IsSetQual() && (!feat.IsSetData() || !feat.GetData().IsGene())) {
730 for(
const auto& it : feat.GetQual()) {
732m_Objs[
"[n] non-gene feature[s] [has] locus tag[s]."].Add(*
context.SeqFeatObjRef(feat));
748 if(!feat.IsSetPartial() &&
len< 50) {
749m_Objs[
"[n] tRNA[s] [is] too short"].Add(*
context.SeqFeatObjRef(feat));
751 else if(
len>= 150) {
752m_Objs[
"[n] tRNA[s] [is] too long - over 150 nucleotides"].Add(*
context.SeqFeatObjRef(feat));
767 const stringaa =
context.GetAminoacidName(feat);
768 if(aa !=
"Ser"&& aa !=
"Sec"&& aa !=
"Leu") {
769m_Objs[
"[n] tRNA[s] [is] too long"].Add(*
context.SeqFeatObjRef(feat));
782 if(partial_feat != partial_gene) {
795 if(partial_feat != partial_gene) {
804 const string kGenePartialConflictOther=
"[n/2] feature[s] that [is] not coding region[s] or misc_feature[s] conflict with partialness of overlapping gene";
807 const string kConflictBoth=
" feature partialness conflicts with gene on both ends";
808 const string kConflictStart=
" feature partialness conflicts with gene on 5' end";
809 const string kConflictStop=
" feature partialness conflicts with gene on 3' end";
817 boolis_eukaryotic =
context.IsEukaryotic(biosrc ? &biosrc->
GetSource() :
nullptr);
821 if(!feat->IsSetData()) {
828 boolconflict_start =
false;
829 boolconflict_stop =
false;
832 if(feat->GetData().IsCdregion()) {
833 if(!is_eukaryotic || is_mrna) {
841 boolfound_start =
false;
842 boolfound_utr5 =
false;
848conflict_start =
false;
853 if(found_utr5 && !found_start) {
854conflict_start =
true;
859 boolfound_stop =
false;
860 boolfound_utr3 =
false;
866conflict_stop =
false;
871 if(found_utr3 && !found_stop) {
872conflict_stop =
true;
884 if(conflict_start || conflict_stop) {
895m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
931 const string kBadGeneStrand=
"[n/2] feature location[s] conflict with gene location strand[s]";
937 const auto& genes =
context.FeatGenes();
938 const auto& feats =
context.FeatAll();
940 for(
size_tj = 0; j < feats.size(); j++) {
945 const CSeq_loc& loc_j = feats[j]->GetLocation();
948 for(
size_t i= 0;
i< genes.size();
i++) {
949 if(!genes[
i]->IsSetLocation()) {
952 const CSeq_loc& loc_i = genes[
i]->GetLocation();
956 if(feat_start == gene_start || feat_stop == gene_stop) {
962 boolfound_bad =
false;
963 while(f_loc && !found_bad) {
966 while(g_loc && !found_bad) {
996m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1006 boolis_microsatellite =
false;
1007 boolis_tandem =
false;
1009 for(
autoit = quals.cbegin(); it != quals.cend() && (!is_microsatellite || !is_tandem); ++it) {
1014is_microsatellite =
true;
1021 if(is_microsatellite && !is_tandem) {
1022m_Objs[
"[n] microsatellite[s] do not have a repeat type of tandem"].Add(*
context.SeqFeatObjRef(feat, &feat)).Fatal();
1035new_feat->
SetQual().push_back(new_qual);
1036 context.ReplaceSeq_feat(*obj, *sf, *new_feat);
1047 "characterisation",
1077 if(feat.IsSetData()) {
1078 switch(feat.GetData().GetSubtype()) {
1081 if(feat.IsSetComment()) {
1084 if(feat.GetData().GetGene().IsSetDesc()) {
1089 if(feat.GetData().GetProt().IsSetDesc()) {
1095 if(feat.IsSetComment()) {
1109m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1117 "annotated by transcript or proteomic data",
1118 "heterogeneous population sequenced",
1119 "low-quality sequence region",
1120 "unextendable partial coding region",
1128 if(feat.IsSetData() && feat.GetData().IsCdregion() && feat.IsSetExcept_text()) {
1129 for(
size_t i= 0;
i<
max;
i++) {
1131m_Objs[
"[n] coding region[s] [has] new exception[s]"].Add(*
context.SeqFeatObjRef(feat));
1142m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1152&& feat.GetData().GetRna().IsSetExt() && feat.GetData().GetRna().GetExt().IsGen() && feat.GetData().GetRna().GetExt().GetGen().IsSetClass()
1153&&
NStr::EqualNocase(feat.GetData().GetRna().GetExt().GetGen().GetClass(),
"lncrna")
1156m_Objs[
"[n] lncRNA feature[s] [is] suspiciously short"].Add(*
context.SeqFeatObjRef(feat));
1164m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1180 if(feat.IsSetLocation()) {
1181 if(feat.GetLocation().IsMix() || feat.GetLocation().IsPacked_int()) {
1182 if(feat.IsSetExcept_text()) {
1190 else if(feat.IsSetExcept() && feat.GetExcept()) {
1205m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1218 if(feat.IsSetLocation() && feat.CanGetData() && feat.GetData().IsCdregion() && !
context.IsPseudo(feat)) {
1219 if(feat.GetLocation().IsMix() || feat.GetLocation().IsPacked_int()) {
1220 if((feat.IsSetExcept_text() && !feat.GetExcept_text().empty()) || (feat.IsSetExcept() && feat.GetExcept())) {
1224 if(
context.CurrentBioseq().CanGetInst()) {
1260m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1271 if(feat.IsSetLocation() && feat.CanGetData() && feat.GetData().IsCdregion() && feat.IsSetExcept_text() && (feat.GetLocation().IsMix() || feat.GetLocation().IsPacked_int())) {
1272 if(feat.GetExcept_text().find(
"ribosomal slippage") != string::npos) {
1274 stringproduct =
context.GetProdForFeature(feat);
1278m_Objs[
"[n] coding region[s] [has] unexpected ribosomal slippage"].Fatal().Add(*
context.SeqFeatObjRef(feat));
1288m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1300 if(feat.IsSetData() && feat.GetData().IsCdregion() && feat.IsSetLocation() && !feat.IsSetExcept() && !
context.IsPseudo(feat)) {
1303 boolfound_short =
false;
1307 while(li && !found_short) {
1310 if(start >= last_stop && start - last_stop < 11) {
1311found_short =
true;
1313 else if(last_stop >= start && last_stop - start < 11) {
1314found_short =
true;
1316 else if(stop >= last_start && stop - last_start < 11) {
1317found_short =
true;
1319 else if(last_start >= stop && last_start - stop < 11) {
1320found_short =
true;
1340m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1376sf.
SetComment(
"contains short intron that may represent a frameshift");
1394sf.
SetComment(
"contains short intron that may represent a frameshift");
1426sf.
SetData().SetImp().SetKey(
"misc_feature");
1478 AddException(sf, scope,
"low-quality sequence region");
1489std::list<CConstRef<CSeq_loc>> to_remove;
1493 for(
auto& loc : to_remove) {
1505 DISCREPANCY_CASE(UNNECESSARY_VIRUS_GENE, FEAT,
eOncaller,
"Unnecessary gene features on virus: on when lineage is not Picornaviridae,Potyviridae,Flaviviridae and Togaviridae")
1510 if(
context.HasLineage(src,
"Picornaviridae") ||
context.HasLineage(src,
"Potyviridae") ||
context.HasLineage(src,
"Flaviviridae") ||
context.HasLineage(src,
"Togaviridae")) {
1512 if(feat.IsSetData() && feat.GetData().IsGene()) {
1513m_Objs[
"[n] virus gene[s] need to be removed"].Add(*
context.SeqFeatObjRef(feat));
1523m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1532 if(feat.IsSetData() && feat.GetData().IsCdregion() && feat.IsSetDbxref()) {
1533 for(
auto& x : feat.GetDbxref()) {
1535m_Objs[
"[n] feature[s] [has] CDD Xrefs"].Add(*
context.SeqFeatObjRef(feat));
1546m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1555 if(feat.IsSetData() && feat.GetData().IsCdregion() && feat.GetData().GetCdregion().IsSetCode_break()) {
1556m_Objs[
"[n] coding region[s] [has] a translation exception"].Add(*
context.SeqFeatObjRef(feat));
1564m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1570 static const string kNoProductStr=
"[n] product[s] [has] \"no product string in file\"";
1575 if(feat.IsSetData() && feat.GetData().IsProt()) {
1577 if(
prot.IsSetName()) {
1578 const string* no_prot_str =
NStr::FindNoCase(
prot.GetName(),
"no product string in file");
1593m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1600 "trnL-trnF intergenic spacer",
1601 "trnH-psbA intergenic spacer",
1602 "trnS-trnG intergenic spacer",
1603 "trnF-trnL intergenic spacer",
1604 "psbA-trnH intergenic spacer",
1605 "trnG-trnS intergenic spacer"};
1623m_Objs[
"[n] suspect intergenic spacer note[s] not organelle"].Add(*
context.SeqFeatObjRef(feat));
1637 if(feat.IsSetData() && feat.GetData().IsRna()) {
1640 stringproduct =
rna.GetRnaProductName();
1642 if(feat.IsSetComment()) {
1643comment = feat.GetComment();
1646m_Objs[
"[n] RNA product_name or comment[s] contain[S] 'suspect phrase'"].Add(*
context.SeqFeatObjRef(feat));
1656m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1682 if(this_strand != first_strand) {
1696 while(feat_i && gene_i) {
1702 boolfound_stop =
false;
1714 if((feat_i && !gene_i) || (!feat_i && gene_i)) {
1726 if(stop < 1 || stop > bsh.GetBioseqLength() - 2) {
1730search->
SetInt().SetId().Assign(*(loc.
GetId()));
1732search->
SetInt().SetFrom(stop - 1);
1733search->
SetInt().SetTo(stop - 1);
1736search->
SetInt().SetFrom(stop + 1);
1737search->
SetInt().SetTo(stop + 1);
1752 if(start < 1 || start > bsh.GetBioseqLength() - 2) {
1756search->
SetInt().SetId().Assign(*(loc.
GetId()));
1758search->
SetInt().SetFrom(start + 1);
1759search->
SetInt().SetTo(start + 1);
1762search->
SetInt().SetFrom(start - 1);
1763search->
SetInt().SetTo(start - 1);
1783}
else if(!
StrandsMatch(feat_strand, gene_strand)) {
1795 if(gene_start == feat_start) {
1804rbs_search->
SetInt().SetId().Assign(*
id);
1806 if(gene_start < feat_start) {
1809rbs_search->
SetInt().SetFrom(feat_start + 1);
1810rbs_search->
SetInt().SetTo(gene_start);
1813 if(gene_start > feat_start) {
1816rbs_search->
SetInt().SetFrom(gene_start);
1817rbs_search->
SetInt().SetTo(feat_start - 1);
1820 for(
const CSeq_feat* feat : features) {
1856 if(
context.InGenProdSet()) {
1860 booleukaryotic =
context.IsEukaryotic(biosrc ? &biosrc->
GetSource() :
nullptr);
1863 if(feat->IsSetData() && feat->IsSetLocation() && (feat->GetData().IsRna() || (!eukaryotic && feat->GetData().IsCdregion()))) {
1864 ENa_strandfeat_strand = feat->GetLocation().GetStrand();
1865 const CGene_ref* gx = feat->GetGeneXref();
1868 if(feat->GetGeneXref()) {
1870 if(feat->GetData().IsCdregion()) {
1883 if(feat->GetData().IsCdregion()) {
1899m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1920 if(feat.IsSetData()) {
1922 if(feat.GetData().IsCdregion() && feat.IsSetComment()) {
1923 check= feat.GetComment();
1925 else if(feat.GetData().IsProt() && feat.GetData().GetProt().IsSetDesc()) {
1926 check= feat.GetData().GetProt().GetDesc();
1928 if(!
check.empty()) {
1931m_Objs[
"[n] cds comment[s] or protein description[s] contain[S] suspect_phrase[s]"][
"[n] cds comment[s] or protein description[s] contain[S] '"+
suspect_phrases[
i] +
"'"].Summ().Add(*
context.SeqFeatObjRef(feat));
1943m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1954 stringproduct =
rna.GetRnaProductName();
1956m_Objs[
"[n] unexpected misc_RNA feature[s] found. misc_RNAs are unusual in a genome, consider using ncRNA, misc_binding, or misc_feature as appropriate"].Add(*
context.SeqFeatObjRef(feat));
1967 if(rna_product.empty() || cds_product.empty()) {
1970 if(rna_product == cds_product) {
1973 const stringkmRNAVariant =
", transcript variant ";
1974 const stringkCDSVariant =
", isoform ";
1975 size_tpos_in_rna = rna_product.find(kmRNAVariant);
1976 size_tpos_in_cds = cds_product.find(kCDSVariant);
1977 if(pos_in_rna == string::npos || pos_in_cds == string::npos || pos_in_rna != pos_in_cds ||
1981 stringrna_rest = rna_product.substr(pos_in_rna + kmRNAVariant.size()), cds_rest = cds_product.substr(pos_in_cds + kCDSVariant.size());
1982 returnrna_rest == cds_rest;
1995vector<const CSeq_feat*> cds =
context.FeatCDS();
1996vector<const CSeq_feat*> mrnas =
context.FeatMRNAs();
1997 autocds_it = cds.begin();
1998 while(cds_it != cds.end()) {
1999 if(
context.IsPseudo(**cds_it)) {
2000cds_it = cds.erase(cds_it);
2004 if((*cds_it)->IsSetXref()) {
2005 autorna_it = mrnas.cbegin();
2006 while(rna_it != mrnas.end()) {
2007 if((*rna_it)->IsSetId()) {
2008 auto& rnaid = (*rna_it)->
GetId();
2009 if(rnaid.IsLocal()) {
2010 for(
autoxref : (*cds_it)->GetXref()) {
2011 if(xref->IsSetId()) {
2012 auto&
id= xref->GetId();
2013 if(
id.IsLocal()) {
2014 if(!
id.GetLocal().
Compare(rnaid.GetLocal())) {
2023mrnas.erase(rna_it);
2031 stringprod =
context.GetProdForFeature(**cds_it);
2033m_Objs[
"[n] coding region[s] [has] mismatching mRNA"].Add(*
context.SeqFeatObjRef(**cds_it));
2035cds_it = cds.erase(cds_it);
2041 for(
size_t i= 0;
i< cds.size();
i++) {
2042 if(
context.IsPseudo(*cds[
i])) {
2045 boolfound =
false;
2046 stringprod =
context.GetProdForFeature(*cds[
i]);
2047 const CSeq_loc& loc_i = cds[
i]->GetLocation();
2048 for(
size_tj = 0; j < mrnas.size(); j++) {
2049 const CSeq_loc& loc_j = mrnas[j]->GetLocation();
2052 if(
IsProductMatch(prod, mrnas[j]->GetData().GetRna().GetRnaProductName())) {
2071 if(old_mRNA.
Empty()) {
2074annot_handle.
AddFeat(*new_mRNA);
2078old_mRNA_edit.Replace(*new_mRNA);
2089 for(; annot_ci; ++annot_ci) {
2111 if(old_mRNA.
Empty()) {
2114annot_handle.
AddFeat(*new_mRNA);
2118old_mRNA_edit.
Replace(*new_mRNA);
2130 if(feat.IsSetData() && feat.GetData().IsProt()) {
2132 if(
prot.IsSetName() && !
prot.GetName().empty()) {
2133m_Objs[feat.GetData().GetProt().GetName().front()].Incr();
2142 static const size_tMIN_REPORTABLE_AMOUNT = 100;
2143 auto&
M= m_Objs.GetMap();
2144 if(
M.size() == 1 &&
M.begin()->second->GetCount() >= MIN_REPORTABLE_AMOUNT) {
2146rep[
"All proteins have same name [(]\""+
M.begin()->first +
"\""];
2156 boolprotein_id =
false,
2157transcript_id =
false;
2159 for(
const auto& qual : quals) {
2160 if(qual->IsSetQual()) {
2162 if(qual->GetQual() ==
"orig_protein_id") {
2166 if(qual->GetQual() ==
"orig_transcript_id") {
2167transcript_id =
true;
2170 if(protein_id && transcript_id) {
2176 returnprotein_id && transcript_id;
2186 if(feat.IsSetData() && feat.GetData().IsCdregion() && !
context.IsPseudo(feat)) {
2189m_Objs[
"no protein_id and transcript_id present"].Fatal().Add(*
context.SeqFeatObjRef(feat));
2205 stringsubitem =
"[n] "+ feat.GetData().GetKey();
2206subitem +=
" feature[s]";
2218 if(feat.IsSetQual()) {
2219 size_tnum_of_number_quals = 0;
2220 for(
const auto& qual : feat.GetQual()) {
2221 if(qual->IsSetQual() && qual->GetQual() ==
"number") {
2222++num_of_number_quals;
2223 if(num_of_number_quals > 1) {
2224m_Objs[
"[n] feature[s] contain[S] multiple /number qualifiers"].Add(*
context.SeqFeatObjRef(feat));
2240 for(
const auto& qual : feat.GetQual()) {
2241 if(qual->IsSetQual() && qual->GetQual() ==
"product") {
2242m_Objs[
"[n] feature[s] [has] a product qualifier"].Add(*
context.SeqFeatObjRef(feat));
2252 const string kCDShasNoTRNA=
"[n] coding region[s] [does] not have adjacent tRNA";
2257 switch(aa.
Which()) {
2271 static const intSTOP_CODON = 25;
2272 returnaa_idx == STOP_CODON;
2282 const auto& cds =
context.FeatCDS();
2283 const auto& trnas =
context.FeatTRNAs();
2284 for(
size_t i= 0;
i< cds.size();
i++) {
2285 if(!cds[
i]->GetData().GetCdregion().IsSetCode_break()) {
2294 const CSeq_feat* nearest_trna =
nullptr;
2297 if(trna->IsSetLocation()) {
2301 if(start <= stop) {
2302cur_diff = stop - start;
2306 if(start >= stop) {
2307cur_diff = start - stop;
2310 if(cur_diff < diff) {
2312nearest_trna = trna;
2318 if(trna_strand == strand && diff > 1) {
2333 const auto& rnas =
context.Feat_RNAs();
2334 for(
size_t i= 0;
i< rnas.size();
i++) {
2336 const string& name = rnas[
i]->GetData().GetRna().GetExt().GetName();
2337 if(name.find(
"16S") != string::npos || name.find(
"12S") != string::npos) {
2338m_Objs[
"[n] non mitochondrial rRNA name[s] contain[S] 12S/16S"].Add(*
context.SeqFeatObjRef(*rnas[
i]));
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef< CSeq_feat > MakemRNAforCDS(const CSeq_feat &cds, CScope &scope)
MakemRNAforCDS A function to create a CSeq_feat that represents the appropriate mRNA for a given CDS.
TSeqPos GetLength(void) const
static bool SeqLocExtend(CSeq_loc &loc, size_t pos, CScope &scope)
Extends a location to the specificed position.
static bool HasLineage(const CBioSource &biosrc, const string &def_lineage, const string &type)
static bool IsUnculturedNonOrganelleName(const string &taxname)
@Gb_qual.hpp User-defined methods of the data storage class.
bool IsSuppressed(void) const
@RNA_ref.hpp User-defined methods of the data storage class.
string GetRnaProductName(void) const
virtual vector< CRef< CReportItem > > GetSubitems() const =0
static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)
CReportNode & Ext(bool b=true)
CRef< CReportItem > Export(CDiscrepancyCore &test, bool unique=true) const
static const string & GetRegulatoryClass(ESubtype subtype)
static bool IsLegalProductNameForRibosomalSlippage(const string &product_name)
static CTempString SubtypeValueToName(ESubtype eSubtype)
Turns a ESubtype into its string value which is NOT necessarily related to the identifier of the enum...
CSeq_feat_EditHandle â.
namespace ncbi::objects::
Seq-loc iterator class â iterates all intervals from a seq-loc in the correct order.
static TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)
#define DISCREPANCY_AUTOFIX(name)
#define DISCREPANCY_CASE(name, type, group, descr)
#define DISCREPANCY_SUMMARIZE(name)
int GetSubtype(CFieldNamePanel *field_name_panel, string &ncRNA_class)
vector< CConstRef< CObject > > GetObjects(CSeq_entry_Handle seh, const string &field, CFieldNamePanel::EFieldType field_type, int subtype, const string &ncRNA_class, CConstRef< objects::CSeq_submit > submit, CRef< CEditingActionConstraint > constraint, vector< CSeq_entry_Handle > *descr_context=nullptr)
static const string kNoProductStr
static bool IsRBS(const CSeq_feat &f)
static bool AddExceptionsToShortIntron(const CSeq_feat &sf, CScope &scope, std::list< CConstRef< CSeq_loc >> &to_remove)
const string & kJoinedFeaturesException
static void AddException(const CSeq_feat &sf, CScope &scope, const string &exception_text)
EExtensibe IsExtendableRight(TSeqPos right, const CBioseq &seq, CScope *scope, TSeqPos &extend_len, ENa_strand strand)
bool IsPartialStopConflict(const CSeq_feat &feat, const CSeq_feat &gene, bool is_mrna=false)
const string kPseudoMismatch
const string kExtraGeneNonPseudoNonFrameshift
bool StartAbutsGap(const CSeq_loc &loc, ENa_strand strand, CScope &scope)
const string kConflictStart
const string kBadGeneStrand
const string kConflictBoth
static bool IsStopCodon(const CCode_break::C_Aa &aa)
static const size_t kIntergenicSpacerNames_len
static void AdjustBacterialGeneForCodingRegionWithShortIntron(CSeq_feat &sf, CSeq_feat &gene, bool is_bacterial)
const string kFeatureLocationCodingRegion
const string kGenePartialConflictTop
bool StopAbutsGap(const CSeq_loc &loc, ENa_strand strand, CScope &scope)
const string kGenePartialConflictMiscFeat
EExtensibe IsExtendableLeft(TSeqPos left, const CBioseq &seq, CScope *scope, TSeqPos &extend_len, ENa_strand strand)
static bool IsmRnaQualsPresent(const CSeq_feat::TQual &quals)
const string kGenePartialConflictCodingRegion
const string kFeatureLocationConflictTop
bool IsMixedStrandGeneLocationOk(const CSeq_loc &feat_loc, const CSeq_loc &gene_loc)
static const string kSuspiciousNotePhrases[]
const string kFeatureLocationRNA
static bool ExtendToGapsOrEnds(const CSeq_feat &cds, CScope &scope)
const string & kJoinedFeaturesNoException
bool IsGeneInXref(const CSeq_feat &gene, const CSeq_feat &feat, bool &have_gene_ref)
static CSeq_annot_EditHandle GetAnnotHandle(CBioseq_Handle bsh)
const string suspect_phrases[]
const string kShortIntronExcept
bool HasMixedStrands(const CSeq_loc &loc)
const string kExtraPseudo
bool IsShortrRNA(const CSeq_feat &f, CScope *scope)
bool StrandsMatch(ENa_strand s1, ENa_strand s2)
bool ReportGeneMissing(const CSeq_feat &f)
static const string kNewExceptions[]
const string kShortIntronTop
bool IsNonExtendable(const CSeq_loc &loc, const CBioseq &seq, CScope *scope)
static bool IsProductMatch(const string &rna_product, const string &cds_product)
bool IsPartialStartConflict(const CSeq_feat &feat, const CSeq_feat &gene, bool is_mrna=false)
const string kEukaryoteShouldHavemRNA
const string kGenePartialConflictOther
const size_t kNumSuspiciousNotePhrases
const string kEukaryoticCDSHasMrna
static const string kIntergenicSpacerNames[]
bool GeneRefMatch(const CGene_ref &g1, const CGene_ref &g2)
bool IsGeneLocationOk(const CSeq_loc &feat_loc, const CSeq_loc &gene_loc, ENa_strand feat_strand, ENa_strand gene_strand, bool is_coding_region, CScope &scope, const vector< const CSeq_feat * > &features)
static void FindSuspiciousNotePhrases(const string &s, CDiscrepancyContext &context, CReportNode &rep, const CSeq_feat &feat)
const string & kJoinedFeatures
const string kCDShasNoTRNA
const string kNonExtendableException
static const string kFeatureList
static string GetNextSubitemId(size_t num)
const string kConflictStop
static const string kPutativeFrameShift
bool IsMixedStrand(const CSeq_loc &loc)
static void ConvertToMiscFeature(CSeq_feat &sf, CScope &scope)
const string & kJoinedFeaturesBlankException
unsigned int TSeqPos
Type for sequence locations and lengths.
constexpr size_t ArraySize(const Element(&)[Size])
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
TRange GetRange(void) const
Get the range.
ENa_strand GetStrand(void) const
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
@ eOverlap_Contained
2nd contained within 1st extremes
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
@ eNoOverlap
CSeq_locs do not overlap or abut.
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
CConstRef< CSeq_feat > GetGeneForFeature(const CSeq_feat &feat, CScope &scope)
Finds gene for feature, but obeys SeqFeatXref directives.
CConstRef< CSeq_feat > GetmRNAforCDS(const CSeq_feat &cds, CScope &scope)
GetmRNAforCDS A function to find a CSeq_feat representing the appropriate mRNA for a given CDS.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_feat_Handle GetSeq_featHandle(const CSeq_feat &feat, EMissing action=eMissing_Default)
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool IsFtable(void) const
CSeq_entry_Handle GetParentEntry(void) const
Get parent Seq-entry handle.
void Remove(void) const
Remove the feature from Seq-annot.
CSeq_feat_EditHandle AddFeat(const CSeq_feat &new_obj) const
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
CSeq_annot_EditHandle AttachAnnot(CSeq_annot &annot) const
Attach an annotation.
CBioseq_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty â not pointing to any object which means having a null value.
TObjectType * GetPointer(void) const THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty â pointing to an object and has a non-null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case sensitive search.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
@ eNocase
Case insensitive compare.
static const char label[]
TGenome GetGenome(void) const
Get the Genome member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
const TDesc & GetDesc(void) const
Get the Desc member data.
bool IsSetPseudo(void) const
pseudogene Check if a value has been assigned to Pseudo data member.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
bool IsSetDesc(void) const
descriptive name Check if a value has been assigned to Desc data member.
bool IsSetAllele(void) const
Official allele designation Check if a value has been assigned to Allele data member.
bool IsSetMaploc(void) const
descriptive map location Check if a value has been assigned to Maploc data member.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
const TAllele & GetAllele(void) const
Get the Allele member data.
const TMaploc & GetMaploc(void) const
Get the Maploc member data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
const TVal & GetVal(void) const
Get the Val member data.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
TNcbi8aa GetNcbi8aa(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetComment(const TComment &value)
Assign a value to Comment data member.
const TQual & GetQual(void) const
Get the Qual member data.
const TId & GetId(void) const
Get the Id member data.
const TLocation & GetLocation(void) const
Get the Location member data.
void SetExcept(TExcept value)
Assign a value to Except data member.
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
void ResetData(void)
Reset Data data member.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
bool CanGetId(void) const
Check if it is safe to call GetId method.
void SetData(TData &value)
Assign a value to Data data member.
void ResetComment(void)
Reset Comment data member.
const TCdregion & GetCdregion(void) const
Get the variant data.
bool IsSetAa(void) const
Check if a value has been assigned to Aa data member.
const TAa & GetAa(void) const
Get the Aa member data.
const TProduct & GetProduct(void) const
Get the Product member data.
TNcbieaa GetNcbieaa(void) const
Get the variant data.
const TComment & GetComment(void) const
Get the Comment member data.
bool CanGetFrame(void) const
Check if it is safe to call GetFrame method.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
const TGene & GetGene(void) const
Get the variant data.
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
const TXref & GetXref(void) const
Get the Xref member data.
void ResetProduct(void)
Reset Product data member.
vector< CRef< CGb_qual > > TQual
const TQual & GetQual(void) const
Get the Qual member data.
const TRna & GetRna(void) const
Get the variant data.
TNcbistdaa GetNcbistdaa(void) const
Get the variant data.
TQual & SetQual(void)
Assign a value to Qual data member.
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ eFrame_not_set
not set, code uses one
@ e_Ncbi8aa
NCBI8aa code.
@ e_Ncbieaa
ASCII value of NCBIeaa code.
ENa_strand
strand of nucleic acid
TRepr GetRepr(void) const
Get the Repr member data.
const TInst & GetInst(void) const
Get the Inst member data.
TTopology GetTopology(void) const
Get the Topology member data.
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
const TSource & GetSource(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
TLength GetLength(void) const
Get the Length member data.
TMol GetMol(void) const
Get the Mol member data.
bool IsDelta(void) const
Check if variant Delta is selected.
const TExt & GetExt(void) const
Get the Ext member data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
const TDelta & GetDelta(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ eRepr_delta
sequence made by changes (delta) to others
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Source
source of materials, includes Org-ref
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
static string GetProductName(const CProt_ref &prot)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
static CS_CONTEXT * context
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4