( !(*gen_feature)->GetData().IsFtable() )
continue;
51 if( !(*f)->GetData().IsGene() )
continue;
52 stringqname; (*f)->GetData().GetGene().GetLabel(&qname);
53 if( diag.
find(qname) == diag.
end() )
continue;
69 if( !(problem->type &
type) )
continue;
71 if(!problem->message.size())
continue;
82 for(list<problemStr>::iterator problem = diag[qname].problems.
begin(); problem!=diag[qname].problems.
end();)
85 if( !(problem->type &
type) )
91problem=diag[qname].problems.
erase(problem);
115qnames[0]=qname1; qnames[1]=qname2;
116 for(
int i=0;
i<2;
i++)
118 string& qname = qnames[
i];
119 if( diag.
find(qname) != diag.
end() )
131 stringqname = problem->first;
132 if(
done.find(qname)!=
done.end())
continue;
155 ITERATE(list<problemStr>, problem, diag[qname].problems)
161 if(problem->type &
type)
180strres <<
"unknown_problem_type="<<
type<<
'\0';
181 string result=strres.str();
183 result=
"Potential overlap found";
185 result=
"Potential RNA overlap found";
187 result=
"Complete overlap found";
189 result=
"overlap marked for removal";
191 result=
"Something relevant to frame shift found";
193 result=
"Potential frame shift evidence found";
195 result=
"Evidence absolving from the frame shift accusation found";
197 result=
"Potential partial protein annotation found";
199 result=
"Short annotation found";
201 result=
"tRNA is missing in the list of independently annotated tRNAs";
203 result=
"RNA is missing in the list of annotated RNAs in the input";
205 result=
"RNA is present at the wrong strand";
207 result=
"RNA is present with undefined strand";
209 result=
"tRNA is a complete mismatch";
211 result=
"tRNA has mismatched ends";
243 out<< name.c_str() ;
261 if(!feat->GetData().IsRna() && !feat->GetData().IsGene())
continue;
262 const CSeq_loc& loc = feat->GetLocation();
269 if(problem_locs.
find(range)==problem_locs.
end())
continue;
271problem_locs[range].count++;
272 if(feat->GetData().IsRna()) problem_locs[range].rnacount++;
273 if(feat->GetData().IsGene()) problem_locs[range].genecount++;
278 if(!feat->GetData().IsRna() && !feat->GetData().IsGene())
continue;
280 CSeq_loc& loc = feat->SetLocation();
285 if(problem_locs.
find(range)==problem_locs.
end())
continue;
287 if( problem_locs[range].
count!=2
288|| problem_locs[range].rnacount!=1
289|| problem_locs[range].genecount!=1
294 NcbiCerr<<
"CReadBlastApp::FixStrands: " 296<<
"location found, but the number of features with that location is confusing, " 298<<
"["<< problem_locs[range].name <<
"]" 299<<
"("<< range <<
")" 307inter->SetStrand(problem_locs[range].strand);
308 NcbiCerr<<
"CReadBlastApp::FixStrands: " 309<<
"["<< problem_locs[range].name <<
"] " 328 for(CSeq_submit::C_Data::TEntrys::iterator entry =
m_Submit.
SetData().SetEntrys().begin();
335<<
"RemoveProblems(void): doing entry: removeme = " 340 NcbiCerr<<
"RemoveProblems(): WARNING: " 341<<
"CSeq_entry deleted, loss of annotation might occur" 357<<
"RemoveProblems(void): case is single entry " 376<<
"RemoveProblems(CSeq_entry)(seq case): removeme = " 390<<
"RemoveProblems(CSeq_entry)(set case): removeme = " 392<<
", entries.size = " 405 if(!entry.
IsSet())
return;
408 if(
entries.size()!=1)
return;
414 for(CSeq_descr::Tdata::iterator desc = descs.begin(); desc!=descs.end(); )
416seq.
SetDescr().Set().push_back(*desc);
417desc=descs.erase(desc);
423 NcbiCerr<<
"NormalizeSeqentry(CSeq_entry...): " 425<<
"converted sequence set to sequence" 433 boolnoannot=
false;
439 if(all_entries_removed > 0) {
; noseqs=
true;}
444 if(all_annot_removed > 0) {setseq.
ResetAnnot(); noannot=
true;}
446 if(noseqs ) removeme = 1;
449<<
"RemoveProblems(CBioseq_set): noseqs = " 453<<
", removeme (return) = " 477 stringorigName = thisName;
478string::size_type ipos = thisName.rfind(
'|');
if(ipos!=string::npos) thisName.erase(0, ipos+1);
479ipos = thisName.rfind(
'_');
if(ipos!=string::npos) ipos= thisName.rfind(
'_', ipos-1);
482<<
"RemoveProblems(CBioseq): remove? sequence " 483<<
"["<< origName <<
"]" 485<<
"["<< thisName <<
"]" 488 if(problem_seqs.
find(thisName) != problem_seqs.
end())
491<<
"RemoveProblems(CBioseq): sequence " 492<<
"["<< origName <<
"]" 493<<
" is marked for removal, because of a match to " 494<<
"["<< thisName <<
"]" 501<<
"RemoveProblems(CBioseq): remove = " 514 for(CBioseq_set::TSeq_set::iterator entries_end =
entries.end(), entry=
entries.begin(); entry != entries_end; )
519<<
"RemoveProblems(CBioseq_set::TSeq_set): removeseq = " 523 if(removeseq) entry=
entries.erase(entry);
529<<
"RemoveProblems(CBioseq_set::TSeq_set): nentries = " 541 for(CBioseq::TAnnot::iterator annot=annots.begin(); annot!=annots.end(); )
544 if( (*annot)->GetData().IsFtable()) removeme=
RemoveProblems((*annot)->SetData().SetFtable(), problem_seqs, loc_map);
547 NcbiCerr<<
"RemoveProblems(annots, problem_seqs): " 549<<
"annotation has empty feature table and it will be removed" 551annot=annots.erase(annot);
555 if(annots.size()==0)
remove=1;
567 for(CSeq_annot::C_Data::TFtable::iterator feat_end =
table.end(), feat =
table.begin(); feat != feat_end;)
571gene = (*feat)->GetData().IsGene();
572cdregion = (*feat)->GetData().IsCdregion();
573 booldel_feature=
false;
579 if(
PrintDetails())
NcbiCerr<<
"RemoveProblems(CSeq_annot::C_Data::TFtable): feat: ("<< real_loc_string <<
")("<< loc_string <<
")"<<
NcbiEndl;
583 if(problem_seqs.
find(loc_string) != problem_seqs.
end())
585 if((*feat)->GetData().IsImp() &&
586(*feat)->GetData().GetImp().CanGetKey())
588 NcbiCerr<<
"RemoveProblems: INFO: feature "<< loc_string <<
": imp, key = "<< (*feat)->GetData().GetImp().GetKey() <<
NcbiEndl;
590 if((*feat)->GetData().IsImp() &&
591(*feat)->CanGetComment() )
593 NcbiCerr<<
"RemoveProblems: INFO: feature "<< loc_string <<
": imp, comment = "<< (*feat)->GetComment() <<
NcbiEndl;
603 if((*feat)->GetData().IsImp() &&
604(*feat)->GetData().GetImp().CanGetKey() &&
605(*feat)->GetData().GetImp().GetKey() ==
"misc_feature" 606) del_feature =
false;
607 elsedel_feature=
true;
612 NcbiCerr<<
"RemoveProblems: feature "<< loc_string <<
": ";
613 if(del_feature)
NcbiCerr<<
"WILL BE REMOVED";
614 else NcbiCerr<<
"stays until further analysis for it";
619 NcbiCerr<<
"RemoveProblems: WARNING: feature " 620<<
"{"<< (*feat)->GetData().SelectionName((*feat)->GetData().Which()) <<
"} " 621<< loc_string <<
": ";
622 NcbiCerr<<
"will be removed because of a problem: ";
626 if(!del_feature && gene && (*feat)->GetData().GetGene().CanGetLocus_tag() )
631 stringlocus_tag = (*feat)->GetData().GetGene().GetLocus_tag();
632 if(problem_seqs.
find(locus_tag) != problem_seqs.
end()) del_feature=
true;
635 NcbiCerr<<
"RemoveProblems: gene "<< locus_tag <<
": ";
644 NcbiCerr<<
"RemoveProblems: WARNING: gene "<< locus_tag <<
": ";
645 NcbiCerr<<
"will be removed because of a problem: ";
650 if(!del_feature && cdregion && (*feat)->CanGetProduct() )
656 if( (*feat)->CanGetProduct() &&
657(*feat)->GetProduct().IsWhole() &&
658(*feat)->GetProduct().GetWhole().IsGeneral() &&
659(*feat)->GetProduct().GetWhole().GetGeneral().CanGetTag() &&
660(*feat)->GetProduct().GetWhole().GetGeneral().GetTag().IsStr() )
662productName = (*feat)->GetProduct().GetWhole().GetGeneral().GetTag().GetStr();
665(*feat)->CanGetProduct() &&
666(*feat)->GetProduct().IsWhole())
668productName = (*feat)->GetProduct().GetWhole().AsFastaString();
671string::size_type ipos=productName.rfind(
'_', productName.size());
672 if(ipos != string::npos)
674string::size_type ipos2;
675ipos2=productName.rfind(
'_', ipos-1);
676 if(ipos2 != string::npos) productName.erase(0, ipos2+1);
680ipos2=productName.rfind(
'|', ipos-1);
681 if(ipos2 != string::npos) productName.erase(0, ipos2+1);
685 if(productName.length() && problem_seqs.
find(productName) != problem_seqs.
end()) del_feature=
true;
688 NcbiCerr<<
"RemoveProblems: cdregion "<< productName <<
": ";
698 if(problem_seqs.
find(real_loc_string) == problem_seqs.
end())
700problem_seqs[real_loc_string]=problem_seqs[loc_string];
703 if(del_feature) feat=
table.erase(feat);
706 if(
table.size()==0) removeme=1;
722 if(seq->IsSetAnnot() && seq->IsAa()) nremoved+=
RemoveInterim(seq->SetAnnot());
723 if(seq->IsSetAnnot() && seq->IsNa()) nremoved+=
RemoveInterim2(seq->SetAnnot());
736 for(CBioseq::TAnnot::iterator annot=annots.begin(), annot_end = annots.end(); annot != annot_end; )
738 boolerased =
false;
739 if((*annot)->GetData().IsAlign())
741nremoved++; erased =
true;
743 if( (*annot)->GetData().IsFtable())
747 for(CSeq_annot::C_Data::TFtable::iterator feat=
table.begin(), feat_end=
table.end(); feat != feat_end; )
749 string test=
"Genomic Location:";
750 if((*feat)->IsSetData() && (*feat)->GetData().IsProt() &&
751(*feat)->IsSetComment() && (*feat)->GetComment().substr(0,
test.size()) ==
test)
753 table.erase(feat++); dremoved++;
775<<
", left="<< (*annot)->GetData().GetFtable().size()
777 if((*annot)->SetData().SetFtable().size() == 0)
783 if(erased) annot=annots.erase(annot);
800 if( !(*gen_feature)->GetData().IsFtable() )
continue;
804 for(CSeq_annot::C_Data::TFtable::iterator feat_end =
table.end(), feat =
table.begin(); feat != feat_end;)
813 if(feat_defined.
find(buff.str()) != feat_defined.
end())
816feat=
table.erase(feat);
821feat_defined[buff.str()]=
true;
844 int addProblems(list<problemStr>& dest,
constlist<problemStr>& src)
847 ITERATE(list<problemStr>, src_p, src)
849dest.push_back(*src_p);
859 ITERATE(list<problemStr>, problem, feat->second.problems)
862 stringname = feat->first;
863string::size_type ipos = name.rfind(
'|');
if(ipos!=string::npos) name.erase(0, ipos+1);
864ipos = name.rfind(
'_');
if(ipos!=string::npos) ipos= name.rfind(
'_', ipos-1);
865 if(ipos!=string::npos) name.erase(0, ipos+1);
869&& !problem->misc_feat_message.empty()
872problem_locs[range].strand = problem->strand;
873problem_locs[range].name = name;
874problem_locs[range].count =
875problem_locs[range].rnacount =
876problem_locs[range].genecount = 0;
880 NcbiCerr<<
"CReadBlastApp::CollectRNAFeatures: "<< feat->first
881<<
"["<< range <<
"]: " 882<<
"("<< name <<
")" 883<< (added ?
"added":
"skipped") <<
NcbiEndl;
886 return static_cast<int>(problem_locs.
size());
893 boolkeep_frameshifted = args[
"kfs"].HasValue();
896 ITERATE(list<problemStr>, problem, feat->second.problems)
899 stringname = feat->first;
900string::size_type ipos = name.rfind(
'|');
if(ipos!=string::npos) name.erase(0, ipos+1);
901ipos = name.rfind(
'_');
if(ipos!=string::npos) ipos= name.rfind(
'_', ipos-1);
902 if(ipos!=string::npos) name.erase(0, ipos+1);
904(problem->type ==
eFrameShift&& !keep_frameshifted)
910{ problem_names[name]=
ProblemType(problem->type); added=
true; }
912 NcbiCerr<<
"CollectFrameshiftedSeqs: "<< feat->first
914<<
"("<< name <<
")" 915<< (added ?
"added":
"skipped") <<
NcbiEndl;
918 returnproblem_names.
size();
926 NcbiCerr<<
"append_misc_feature: FATAL: do not have problems for "<< name <<
NcbiEndl;
934list<CRef<CSeq_id> >& na_id = (*na)->SetSeq().SetId();
937 if( !(*gen_feature)->GetData().IsFtable() )
continue;
940Tproblem_misced problem_misced;
943 if( !(problem->type & problem_type) )
continue;
949 if(from<0)
continue;
956strand = problem->strand;
957message = problem->misc_feat_message;
958 if(message.size()==0)
continue;
959 if(problem_misced.find(problem->type) != problem_misced.end() &&
960problem_misced[problem->type].find(message) != problem_misced[problem->type].end()
962 elseproblem_misced[problem->type][message] =
true;
964 while((pos=message.find_first_of(
"\n\r"))!=string::npos)
971feat->
SetData().SetImp().SetKey(
"misc_feature");
974feat->
SetLocation().SetInt().SetId(**na_id.begin());
976(*gen_feature)->SetData().SetFtable().push_back(feat);
int CollectFrameshiftedSeqs(map< string, string > &problem_names)
int CollectRNAFeatures(TProblem_locs &problem_locs)
static bool hasProblems(const CBioseq &seq, diagMap &diag, const EProblem type)
static bool PrintDetails(int current_verbosity=m_current_verbosity)
CConstBeginInfo ConstBegin(void)
void erase_problems(const string &qname, diagMap &diag, const EProblem type)
void GetLocMap(LocMap &loc_map, const CSeq_annot::C_Data::TFtable &feats)
int RemoveInterim2(CBioseq::TAnnot &annots)
static void IncreaseVerbosity(void)
void reportProblemMessage(const string &message, ostream &out=NcbiCout)
void reportProblemType(const EProblem type, ostream &out=NcbiCout)
static string ProblemType(const EProblem type)
void reportProblemSequenceName(const string &name, ostream &out=NcbiCout)
int RemoveProblems(map< string, string > &problem_seqs, LocMap &loc_map)
static bool is_prot_entry(const CBioseq &seq)
static void DecreaseVerbosity(void)
static void PopVerbosity(void)
static void getFromTo(const CSeq_loc &loc, TSeqPos &from, TSeqPos &to, ENa_strand &strand)
void reportProblems(const bool report_and_forget, diagMap &diag, ostream &out, const CBioseq::TAnnot &annots, const EProblem type)
void NormalizeSeqentry(CSeq_entry &entry)
void append_misc_feature(CBioseq_set::TSeq_set &seqs, const string &name, EProblem problem_type)
static void PushVerbosity(void)
namespace ncbi::objects::
Template class for iteration on objects of class C (non-medifiable version)
Template class for iteration on objects of class C.
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
std::ofstream out("events_result.xml")
main entry point for tests
#define test(a, b, c, d, e)
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define MSerial_AsnText
I/O stream manipulators â.
static string GetStringDescr(const CBioseq &bioseq, EStringFormat fmt)
@ eOverlap
CSeq_locs overlap.
NCBI_NS_STD::string::size_type SIZE_TYPE
void SetLocation(TLocation &value)
Assign a value to Location data member.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void SetData(TData &value)
Assign a value to Data data member.
ENa_strand
strand of nucleic acid
TSet & SetSet(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
void ResetAnnot(void)
Reset Annot data member.
bool IsSetSeq_set(void) const
Check if a value has been assigned to Seq_set data member.
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if variant Set is selected.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
list< CRef< CSeqdesc > > Tdata
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
void ResetAnnot(void)
Reset Annot data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_feat > > TFtable
list< CRef< CSeq_annot > > TAnnot
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
bool IsEntrys(void) const
Check if variant Entrys is selected.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
string GetLocationString(const CSeq_feat &f)
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
int addProblems(list< problemStr > &dest, const list< problemStr > &src)
string diagName(const string &type, const string &value)
string GetStringDescr(const CBioseq &bioseq)
string GetLocusTag(const CSeq_feat &f, const LocMap &loc_map)
string printed_range(const TSeqPos from2, const TSeqPos to2)
static wxAcceleratorEntry entries[3]
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4