(
str.empty() || subfield > 2)
return "";
59 if(!subfield)
return str;
61 size_tpos =
str.find(
':');
62 if(pos == string::npos) {
63 if(subfield == 1)
return str;
67 if(subfield == 1)
return str.substr(0, pos);
81 for(
autoit : quals) {
85 if(
str.empty() || (str_cons && !str_cons->Empty() && !(str_cons->Match(
str))) ) {
99 if(!
rna.CanGetExt()) {
104 switch(ext.
Which()) {
107 if(seq_feat.
CanGetQual() && (rna_str.empty() || rna_str==
"ncRNA"|| rna_str==
"tmRNA"|| rna_str==
"misc_RNA")) {
113rna_str =
"tRNA-"+ rna_str;
128 static const char* rule_type[] = {
133 "Organelles not appropriate in prokaryote",
134 "Suspicious phrase; should this be nonfunctional?",
135 "May contain database identifier more appropriate in note; remove from product name",
136 "Remove organism from product name",
137 "Possible parsing error or incorrect formatting; remove inappropriate symbols",
138 "Implies evolutionary relationship; change to -like protein",
139 "Consider adding 'protein' to the end of the product name",
140 "Correct the name or use 'hypothetical protein'",
141 "Use American spelling",
142 "Use short product name instead of descriptive phrase",
143 "use protein instead of gene as appropriate" 154 return "[n] feature[s] "+ desc;
158 switch(find.
Which()) {
160{
strings =
"[n] feature[s] ";
163s +=
"start[S] with";
181 return "[n] feature[s] May contain plural";
183 return "[n] feature[s] violate[S] e_N_or_more_brackets_or_parentheses !!!";
186 return "[n] feature[s] Three or more numbers together but not contain[S] \'methyltransferas\'";
188 return "[n] feature[s] contain[S] underscore";
190 return "[n] feature[s] violate[S] e_Prefix_and_numbers !!!";
192 return "[n] feature[s] [is] all capital letters";
194 return "[n] feature[s] contain[S] unbalanced brackets or parentheses";
196 return "[n] feature[s] violate[S] e_Too_long !!!";
198 return "[n] feature[s] violate[S] e_Has_term !!!";
203 return "[n] feature[s] violate[S] some other mysterious rule!";
208 static const string kSuspectProductNames=
"[n] product_name[s] contain[S] suspect phrase[s] or character[s]";
212 for(
auto& symbol : prod_name) {
223 for(
auto& feat :
context.GetFeat()) {
224 if(! feat.IsSetData()) {
227 if(
context.IsPseudo(feat)) {
234 stringprot_name = *
data.GetProt().GetName().begin();
235vector<char> Hits(rules->
Get().size());
236std::fill(Hits.begin(), Hits.end(), 0);
237rules->
Screen(prot_name, Hits.data());
241node.
Add(*
context.SeqFeatObjRef(cds ? *cds : feat)).Fatal();
245 for(
autorule : rules->
Get()) {
246 if(Hits[rule_num] && rule->StringMatchesSuspectProductRule(prot_name)) {
248 size_trule_type = rule->GetRule_type();
249 stringrule_name =
"[*";
250 if(rule_type < 10) {
254 stringrule_text = leading_space +
GetRuleMatch(*rule);
257 if(rule->CanGetReplace()) {
268 if(feat.IsSetXref()) {
274 if( !it->empty() ) {
275 stringprot_name = *it;
277vector<char> Hits(rules->
Get().size());
278std::fill(Hits.begin(), Hits.end(), 0);
279rules->
Screen(prot_name, Hits.data());
282node.
Add(*
context.SeqFeatObjRef(feat)).Fatal();
286 for(
autorule : rules->
Get()) {
287 if(Hits[rule_num] && rule->StringMatchesSuspectProductRule(prot_name)) {
289 size_trule_type = rule->GetRule_type();
290 stringrule_name =
"[*";
291 if(rule_type < 10) {
295 stringrule_text = leading_space +
GetRuleMatch(*rule);
297 if(rule->CanGetReplace()) {
314 if(
rna.IsSetExt()) {
317 stringmrna_name = ext.
GetName();
319vector<char> Hits(rules->
Get().size());
320std::fill(Hits.begin(), Hits.end(), 0);
321rules->
Screen(mrna_name, Hits.data());
324node.
Add(*
context.SeqFeatObjRef(feat)).Fatal();
328 for(
autorule : rules->
Get()) {
329 if(Hits[rule_num] && rule->StringMatchesSuspectProductRule(mrna_name)) {
331 size_trule_type = rule->GetRule_type();
332 stringrule_name =
"[*";
333 if(rule_type < 10) {
337 stringrule_text = leading_space +
GetRuleMatch(*rule);
339 if(rule->CanGetReplace()) {
358 stringfind = search;
360 if(!find.length()) {
365 stringtail =
input.substr(p + find.length());
383 stringname = *
prot->GetData().GetProt().GetName().begin();
385 if(
rna&&
rna->GetData().GetRna().CanGetExt() &&
rna->GetData().GetRna().GetExt().GetName() == name) {
396 stringorig_prot_name;
414 if(!newtext.empty() && newtext != prot_name) {
415orig_prot_name = std::move(prot_name);
416prot_name = std::move(newtext);
417 automrna = get_mrna();
419mrna->SetData().SetRna().SetExt().SetName() = prot_name;
422 autocds = get_cds();
427 returnorig_prot_name;
450 stringold_prot_name;
468 if(!newtext.empty() && newtext != prot_name) {
469old_prot_name = std::move(prot_name);
470prot_name = std::move(newtext);
473 if(prot_name != old_prot_name && !prot_name.empty()) {
474 strings =
"Changed \'"+ old_prot_name +
"\' to \'"+ prot_name +
"\' at "+ obj->GetLocation();
478vector<CRef<CAutofixReport>> reports;
479reports.push_back(report);
480ret->AddSubitems(reports);
488 if(
rna.IsSetExt()) {
491 stringmrna_name = ext.
GetName();
497 stringold_mrna_name;
515 if(!newtext.empty() && newtext != mrna_name) {
516old_mrna_name = std::move(mrna_name);
517mrna_name = std::move(newtext);
520mrna->
SetData().SetRna().SetExt().SetName() = mrna_name;
523 if(mrna_name != old_mrna_name && !mrna_name.empty()) {
524 strings =
"Changed \'"+ old_mrna_name +
"\' to \'"+ mrna_name +
"\' at "+ obj->GetLocation();
528vector<CRef<CAutofixReport>> reports;
529reports.push_back(report);
530ret->AddSubitems(reports);
541 string& prot_name =
prot->SetData().SetProt().SetName().front();
547[&mrna] { return CRef<CSeq_feat>(mrna); },
548[&sf] { return CRef<CSeq_feat>((CSeq_feat*)sf); });
549 if(prot_name != old_prot_name && !prot_name.empty()) {
550 strings =
"Changed \'"+ old_prot_name +
"\' to \'"+ prot_name +
"\' at "+ obj->GetLocation();
554vector<CRef<CAutofixReport>> reports;
555reports.push_back(report);
556ret->AddSubitems(reports);
565 DISCREPANCY_CASE(ORGANELLE_PRODUCTS, FEAT,
eOncaller,
"Organelle products on non-organelle sequence: on when neither bacteria nor virus")
578 for(
auto& feat :
context.GetFeat()) {
579 if(feat.IsSetData() && feat.GetData().GetSubtype() ==
CSeqFeatData::eSubtype_prot&& feat.GetData().GetProt().IsSetName() && !feat.GetData().GetProt().GetName().empty() && !
context.IsPseudo(feat)) {
580 stringprot_name = *feat.GetData().GetProt().GetName().begin();
582vector<char> Hits(rules->
Get().size());
583std::fill(Hits.begin(), Hits.end(), 0);
584rules->
Screen(prot_name, Hits.data());
586 for(
autorule : rules->
Get()) {
587 if(Hits[rule_num] && rule->StringMatchesSuspectProductRule(prot_name)) {
588 if(rule->CanGetReplace()) {
611 string& prot_name =
prot->SetData().SetProt().SetName().front();
617[&mrna] { return CRef<CSeq_feat>(mrna); },
618[&sf] { return CRef<CSeq_feat>((CSeq_feat*)sf); });
619 if(prot_name != old_prot_name && !prot_name.empty()) {
620 strings =
"Changed \'"+ old_prot_name +
"\' to \'"+ prot_name +
"\' at "+ obj->GetLocation();
624vector<CRef<CAutofixReport>> reports;
625reports.push_back(report);
626ret->AddSubitems(reports);
641 return ConstRef(&*rrna_products_suspect_rule_set);
644 CTempStringrrna_products_suspect_rule_set_asn_text =
645 "Suspect-rule-set ::= {\n" 646 " { find string-constraint { match-text \"domain\", whole-word FALSE } },\n" 647 " { find string-constraint { match-text \"partial\", whole-word FALSE } },\n" 648 " { find string-constraint { match-text \"5s_rRNA\", whole-word FALSE } },\n" 649 " { find string-constraint { match-text \"16s_rRNA\", whole-word FALSE } },\n" 650 " { find string-constraint { match-text \"23s_rRNA\", whole-word FALSE } },\n" 652 " find string-constraint { match-text \"8S\", whole-word TRUE },\n" 653 " except string-constraint { match-text \"5.8S\", whole-word TRUE } } }";
656asn_istrm.
Read(&*rrna_products_suspect_rule_set, rrna_products_suspect_rule_set->GetThisTypeInfo());
658 return ConstRef(&*rrna_products_suspect_rule_set);
683 "s_SummarizeStringConstraint input too complex. " 684 "Please expand the function or find/create a better one.");
687out_strm <<
"contains '"<< string_constraint.
GetMatch_text() <<
"'";
689out_strm <<
" (whole word)";
702 "s_SummarizeSearchFunc input too complex. " 703 "Please expand the function or find/create a better one.");
731 "s_SummarizeSuspectRule input too complex. " 732 "Please expand the function or find/create a better one.");
738out_strm <<
" but not ";
746 static const stringkMsg =
"[n] rRNA product name[s] contain[S] suspect phrase";
747 for(
auto& feat :
context.GetFeat()) {
751vector<char> Hits(rules->
Get().size());
752std::fill(Hits.begin(), Hits.end(), 0);
753rules->
Screen(product, Hits.data());
755 for(
autorule : rules->
Get()) {
756 if(Hits[rule_num] && rule->StringMatchesSuspectProductRule(product)) {
757ostringstream detailed_msg;
758detailed_msg <<
"[n] rRNA product name[s] ";
760m_Objs[kMsg][detailed_msg.str()].Ext().Add(*
context.SeqFeatObjRef(feat));
774vector<char> Hits(rules->
Get().size());
775std::fill(Hits.begin(), Hits.end(), 0);
781node.
Add(*
context.StringObjRef()).Fatal();
784 for(
autorule : rules->
Get()) {
785 if(Hits[rule_num] && rule->StringMatchesSuspectProductRule(
str)) {
787 size_trule_type = rule->GetRule_type();
788 stringrule_name =
"[*";
789 if(rule_type < 10) {
793 stringrule_text = leading_space +
GetRuleMatch(*rule);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsInitialized(void) const
static bool IsUnculturedNonOrganelleName(const string &taxname)
@RNA_ref.hpp User-defined methods of the data storage class.
static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)
CReportNode & Summ(bool b=true)
namespace ncbi::objects::
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
void Screen(const char *input, char *output) const
bool StringMatchesSuspectProductRule(const CMatchString &str) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::function< CRef< objects::CSeq_feat >) > GetFeatureFunc
#define DISCREPANCY_AUTOFIX(name)
#define DISCREPANCY_CASE(name, type, group, descr)
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_USER_THROW(message)
Throw a quick-and-dirty runtime exception of type 'CException' with the given error message and error...
string GetLabel(const CSeq_id &id)
void Read(const CObjectInfo &object)
Read object of know type.
CMappedFeat GetBestMrnaForCds(const CMappedFeat &cds_feat, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
@ fFGL_Content
Include its content if there is any.
const CSeq_feat * GetCDSForProduct(const CBioseq &product, CScope *scope)
Get the encoding CDS feature of a given protein sequence.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
CConstRef< C > ConstRef(const C *object)
Template function for conversion of const object pointer to CConstRef.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
const char * data(void) const
Return a pointer to the array represented.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
size_type length(void) const
Return the length of the represented array.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
#define DEFINE_STATIC_FAST_MUTEX(id)
Define static fast mutex and initialize it.
TGenome GetGenome(void) const
Get the Genome member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetIgnore_weasel(void) const
Check if a value has been assigned to Ignore_weasel data member.
bool IsSetIs_all_lower(void) const
Check if a value has been assigned to Is_all_lower data member.
bool IsSetIs_all_punct(void) const
Check if a value has been assigned to Is_all_punct data member.
bool IsSetMatch_location(void) const
Check if a value has been assigned to Match_location data member.
const Tdata & Get(void) const
Get the member data.
bool IsString_constraint(void) const
Check if variant String_constraint is selected.
bool IsSetRule_type(void) const
Check if a value has been assigned to Rule_type data member.
bool IsSetFeat_constraint(void) const
Check if a value has been assigned to Feat_constraint data member.
const TSimple_replace & GetSimple_replace(void) const
Get the variant data.
TMatch_location GetMatch_location(void) const
Get the Match_location member data.
const TReplace & GetReplace(void) const
Get the Replace member data.
bool IsSetIs_first_cap(void) const
Check if a value has been assigned to Is_first_cap data member.
bool IsSetCase_sensitive(void) const
Check if a value has been assigned to Case_sensitive data member.
bool CanGetReplace(void) const
Check if it is safe to call GetReplace method.
bool IsSetIgnore_punct(void) const
Check if a value has been assigned to Ignore_punct data member.
bool IsSetDescription(void) const
Check if a value has been assigned to Description data member.
TWhole_string GetWhole_string(void) const
Get the Whole_string member data.
bool IsSetNot_present(void) const
Check if a value has been assigned to Not_present data member.
bool IsHaem_replace(void) const
Check if variant Haem_replace is selected.
bool IsSetIs_first_each_cap(void) const
Check if a value has been assigned to Is_first_each_cap data member.
TRule_type GetRule_type(void) const
Get the Rule_type member data.
const TDescription & GetDescription(void) const
Get the Description member data.
bool CanGetFind(void) const
Check if it is safe to call GetFind method.
TMove_to_note GetMove_to_note(void) const
Get the Move_to_note member data.
const TExcept & GetExcept(void) const
Get the Except member data.
const TFind & GetFind(void) const
Get the Find member data.
const TMatch_text & GetMatch_text(void) const
Get the Match_text member data.
bool IsSimple_replace(void) const
Check if variant Simple_replace is selected.
bool IsSetFind(void) const
Check if a value has been assigned to Find data member.
bool IsSetFatal(void) const
Check if a value has been assigned to Fatal data member.
const TReplace & GetReplace(void) const
Get the Replace member data.
const TReplace_func & GetReplace_func(void) const
Get the Replace_func member data.
bool IsSetIgnore_space(void) const
Check if a value has been assigned to Ignore_space data member.
bool IsSetIs_all_caps(void) const
Check if a value has been assigned to Is_all_caps data member.
bool IsSetReplace(void) const
Check if a value has been assigned to Replace data member.
bool CanGetReplace(void) const
Check if it is safe to call GetReplace method.
bool CanGetRule_type(void) const
Check if it is safe to call GetRule_type method.
const TString_constraint & GetString_constraint(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetExcept(void) const
Check if a value has been assigned to Except data member.
bool IsSetIgnore_words(void) const
Check if a value has been assigned to Ignore_words data member.
@ e_N_or_more_brackets_or_parentheses
@ eString_location_equals
@ eString_location_starts
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
const TName & GetName(void) const
Get the Name member data.
E_Choice Which(void) const
Which variant is currently selected.
const TGen & GetGen(void) const
Get the variant data.
const TName & GetName(void) const
Get the variant data.
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
const TProduct & GetProduct(void) const
Get the Product member data.
@ e_Name
for naming "other" type
const TData & GetData(void) const
Get the Data member data.
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
bool IsProt(void) const
Check if variant Prot is selected.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
const TData & GetData(void) const
Get the Data member data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
void SetData(TData &value)
Assign a value to Data data member.
const TProduct & GetProduct(void) const
Get the Product member data.
const TProt & GetProt(void) const
Get the variant data.
const TXref & GetXref(void) const
Get the Xref member data.
vector< CRef< CSeqFeatXref > > TXref
const TRna & GetRna(void) const
Get the variant data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
const TSource & GetSource(void) const
Get the variant data.
void AddComment(CSeq_feat &feat, const string &comment)
Generic utility macros and templates for exploring NCBI objects.
#define GET_FIELD_OR_DEFAULT(Var, Fld, Dflt)
GET_FIELD_OR_DEFAULT base macro.
static const string kSuspectProductNames
static string GetRuleText(const CSuspect_rule &rule)
static void s_SummarizeSuspectRule(ostream &out_strm, const CSuspect_rule &rule)
std::function< CRef< CSeq_feat >) > GetFeatureFunc
static string ReplaceNoCase(const string &input, const string &search, const string &replace)
static string GetRuleMatch(const CSuspect_rule &rule)
static string GetRNAProductString(const CSeq_feat &seq_feat)
string GetTwoFieldSubfield(const string &str, unsigned subfield)
static void GetProtAndRnaForCDS(const CSeq_feat &cds, CScope &scope, CSeq_feat *&prot, CSeq_feat *&mrna)
static CConstRef< CSuspect_rule_set > s_GetrRNAProductsSuspectRuleSet()
static string GetFirstGBQualMatch(const vector< CRef< CGb_qual > > &quals, const string &qual_name, unsigned subfield=0, const CString_constraint *str_cons=nullptr)
string FixProductName(const CSuspect_rule *rule, CScope &, string &prot_name, GetFeatureFunc get_mrna, GetFeatureFunc get_cds)
static bool ContainsLetters(const string &prod_name)
static void s_SummarizeSearchFunc(ostream &out_strm, const CSearch_func &search_func)
static void s_SummarizeStringConstraint(ostream &out_strm, const CString_constraint &string_constraint)
static CS_CONTEXT * context
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4