from = ext_rna->exons[0].from;
44 intto = ext_rna->exons[ext_rna->exons.size()-1].to;
46 for(;seq!=seqs.end(); seq++, nseq++)
48 intfrom2 = seq->exons[0].from;
49 intto2 = seq->exons[seq->exons.size()-1].to;
50 boolover_origin = seq->exons.size()>1 && to2-from2 >
m_length/2;
53<<
"["<< ext_rna_range <<
"]" 54<<
"["<< ext_rna_range2 <<
"]"<<
", trying..."<<
NcbiEndl;
56 if(to2>=from || over_origin)
60 if(from2<=to || over_origin)
64TSimpleSeqs::iterator seq2 = seq;
65 for(;seq2!=seqs.end(); seq2++)
67 intfrom2 = seq2->exons[0].from;
71<<
"["<< ext_rna_range <<
"]" 72<<
"["<< ext_rna_range2 <<
"]"<<
", trying 2..."<<
NcbiEndl;
75 overlaps(ext_rna, seq2, this_overlap);
77<<
"["<< ext_rna_range <<
"]" 78<<
"["<< ext_rna_range2 <<
"]"<<
", overlap = "<< this_overlap <<
NcbiEndl;
82best_seq.push_back(*seq2);
96 intfrom = ext_rna->exons[0].from;
97 intto = ext_rna->exons[ext_rna->exons.size()-1].to;
98 CNcbiStrstreamext_rna_range_stream; ext_rna_range_stream << from <<
"..."<< to <<
'\0';
99 stringext_rna_range = ext_rna_range_stream.str();
100TSimpleSeqs::iterator& best_seq = seq;
101 for(;seq!=seqs.end(); seq++, nseq++)
103 intfrom2 = seq->exons[0].from;
104 intto2 = seq->exons[seq->exons.size()-1].to;
105 CNcbiStrstreamext_rna_range_stream2; ext_rna_range_stream2 << from2 <<
"..."<< to2 <<
'\0';
106 stringext_rna_range2 = ext_rna_range_stream2.str();
108<<
"["<< ext_rna_range <<
"]" 109<<
"["<< ext_rna_range2 <<
"]"<<
", trying..."<<
NcbiEndl;
117TSimpleSeqs::iterator seq2 = seq;
118 for(;seq2!=seqs.end(); seq2++)
120 intfrom2 = seq2->exons[0].from;
121 intto2 = seq2->exons[seq->exons.size()-1].to;
122 CNcbiStrstreamext_rna_range_stream2; ext_rna_range_stream2 << from2 <<
"..."<< to2 <<
'\0';
123 stringext_rna_range2 = ext_rna_range_stream2.str();
125<<
"["<< ext_rna_range <<
"]" 126<<
"["<< ext_rna_range2 <<
"]"<<
", trying 2..."<<
NcbiEndl;
127 if(from2>to)
break;
129 overlaps(ext_rna, seq2, this_overlap);
131<<
"["<< ext_rna_range <<
"]" 132<<
"["<< ext_rna_range2 <<
"]"<<
", overlap = "<< this_overlap <<
NcbiEndl;
133 if(this_overlap>overlap)
135overlap=this_overlap;
149 for(TSimplePairs::const_iterator e1=seq1->exons.begin(); e1!=seq1->exons.end(); e1++)
151 for(TSimplePairs::const_iterator e2=seq2->exons.begin(); e2!=seq2->exons.end(); e2++)
153 into =
min(e2->to, e1->to)-
max(e1->from, e2->from)+1;
171 if( !(*gen_feature)->GetData().IsFtable() )
continue;
191 if( !(*gen_feature)->GetData().IsFtable() )
continue;
194lres =
overlaps_na((*gen_feature)->GetData().GetFtable());
225 getFromTo(seq_interval, from2, to2, strand2);
230 if( !(*f1)->GetData().IsRna() )
continue;
232 boollres=
overlaps(seq_interval, (*f1)->GetLocation(), overlap);
239 if(trna_type.size()>0) name1 = trna_type;
246 getFromTo((*f1)->GetLocation(), from1, to1, strand1);
247 intmin1, min2, max1, max2;
248min1 =
min(from1, to1);
249min2 =
min(from2, to2);
250max1 =
max(from1, to1);
251max2 =
max(from2, to2);
257 intleft_frame = (from1-1)%3+1;
258 intright_frame = (from2-1)%3+1;
270report->
space= overlap;
273report->
loc1= &((*f1)->GetLocation());
274report->
loc2= &seq_interval;
276 charbufferchar[20480]; memset(bufferchar, 0, 20480);
277strstream
buffer(bufferchar, 20480);
282<<
"potential RNA location (" 283<< name1 <<
") that overlaps protein ("<<
get_title(seq) <<
")"<<
'\0';
286buff_misc_feat_protein
287<<
"potential protein location (" 288<<
get_title(seq) <<
") that overlaps RNA ("<< name1 <<
")"<<
'\0';
292misc_feat_rna << buff_misc_feat_rna.str() <<
'\0';
294misc_feat_protein << buff_misc_feat_protein.str() <<
'\0';
296 if(
PrintDetails())
NcbiCerr<<
"overlaps_prot_na[seq,feats]: created RNA buffer: "<< buff_misc_feat_rna.str() <<
"\n";
297 if(
PrintDetails())
NcbiCerr<<
"overlaps_prot_na[seq,feats]: created protein buffer: "<< buff_misc_feat_protein.str() <<
"\n";
299 m_diag[n1].problems.push_back(problem);
300 boolremoveit=
false;
301 stringremoven =
"";
305 NcbiCerr<<
"overlaps_prot_na[seq,feats]: WARNING: RNA location " 306<< n1 <<
" marked for deletion (pseudo)"<<
"\n";
312 NcbiCerr<<
"overlaps_prot_na[seq,feats]: WARNING: RNA location " 313<< n1 <<
" marked for deletion (atypical)"<<
"\n";
328 NcbiCerr<<
"overlaps_prot_na[seq,feats]: WARNING: CDS and gene " 329<< n2 <<
" marked for deletion (hypothetical)"<<
"\n";
343 m_diag[removen].problems.push_back(problemCOH);
348 m_diag[removen].problems.push_back(problemCOH);
351<<
"["<< removen <<
"]" 352<<
" is marked for removal" 361 NcbiCerr<<
"overlaps_prot_na[seq,feats]: WARNING: get_parent_seqset threw when trying to append misc_feature for "<< removen <<
NcbiEndl;
387 if( !(*f1)->GetData().IsRna() )
continue;
393 if( !(*f1)->GetData().GetRna().CanGetExt() )
continue;
394 try{ type1 =
Get3type((*f1)->GetData().GetRna());}
397 NcbiCerr<<
"overlaps_na[feats]: FATAL: cannot get aminoacid type for one trna feats"<<
NcbiEndl;
405 if(type1.size()==0)
continue;
425 stringn2=
"not gene";
458 if(n1==n2)
return result;
466 template<
typenamet1,
typenamet2>
bool 482 TSeqPosfrom1, to1, from2, to2;
484 intmin1, min2, max1, max2;
486min1 =
min(from1, to1);
487max1 =
max(from1, to1);
491min2 =
min(from2, to2);
492max2 =
max(from2, to2);
493 intoverlap_start, overlap_end;
494overlap_end =
min(max1, max2);
495overlap_start =
max(min1, min2);
497 boolresult2 = overlap_end >= overlap_start;
498 if(!result2)
continue;
499overlap+=overlap_end - overlap_start + 1;
516 intmin1, min2, max1, max2;
517min2 =
min(from2, to2);
518max2 =
max(from2, to2);
523min1 =
min(from1, to1);
524max1 =
max(from1, to1);
525 intoverlap_start, overlap_end;
526overlap_end =
min(max1, max2);
527overlap_start =
max(min1, min2);
529 boolresult2 = overlap_end >= overlap_start;
530 if(result2)
result=
true;
531overlap+=overlap_end - overlap_start + 1;
545 TSeqPosfrom1, to1, from2, to2;
547 intmin1, min2, max1, max2;
548getFromTo( *i1, from1, to1, strand1);
549min1 =
min(from1, to1);
550max1 =
max(from1, to1);
554getFromTo( *i2, from2, to2, strand2);
556min2 =
min(from2, to2);
557max2 =
max(from2, to2);
558 if(min2<=min1 && max2>=max1)
560 if(PrintDetails())
NcbiCerr<<
"complete_overlap: " 561<< from1 <<
" ... "<< to1 <<
" " 562<< from2 <<
" ... "<< to2 <<
" " 599 if(!hasGenomicLocation(left))
return result;
600 if(!hasGenomicLocation(right))
return result;
601 const CSeq_loc& left_genomic_int = getGenomicLocation(left);
602 const CSeq_loc& right_genomic_int = getGenomicLocation(right);
605 TSeqPosfrom1, to1, from2, to2;
608getFromTo(left_genomic_int, from1, to1, left_strand);
609getFromTo(right_genomic_int, from2, to2, right_strand);
612 intleft_frame=-0xFF, right_frame=-0xFF;
613 if(left_genomic_int.
IsInt())
615left_frame = (from1-1)%3+1;
617 if(right_genomic_int.
IsInt())
619right_frame = (from2-1)%3+1;
631(
min((
int)to1, (
int)to2)-
632 max((
int)from2, (
int)from1)
636 boolcomplete_overlaps =
false;
638 result= overlaps(left_genomic_int, right_genomic_int, scratch_overlap);
639 boolleft_covered_by_right=
false;
640 boolright_covered_by_left=
false;
641 if(
result) complete_overlaps = (left_covered_by_right=complete_overlap(left_genomic_int, right_genomic_int))
642|| (right_covered_by_left=complete_overlap(right_genomic_int, left_genomic_int));
643 if(PrintDetails())
NcbiCerr<<
"space = "<< space
644<<
", complete_overlap = "<< complete_overlaps
645<<
", result = "<<
result 647 if(
result&& scratch_overlap >= m_cds_overlapThreshold)
660report->
space= space;
664 charbufferchar[20480]; memset(bufferchar, 0, 20480);
665strstream
buffer(bufferchar, 20480);
666printOverlapReport(report,
buffer);
677<<
"potential protein location ("<< GetProtName(left)
678<<
") that overlaps protein ("<< GetProtName(right) <<
")"<<
NcbiEndl<<
'\0';
682<<
"potential protein location ("<< GetProtName(right)
683<<
") that overlaps protein ("<< GetProtName(left) <<
")"<<
NcbiEndl<<
'\0';
691 if(complete_overlaps)
694 if(report->
q_name_left.find(
"hypothetical")!=string::npos && left_covered_by_right && !right_covered_by_left)
696 NcbiCerr<<
"CReadBlastApp::overlaps: WARNING: sequence of a hypothetical protein " 697<<
"["<< qname <<
"]" 698<<
" is marked for removal because of a complete overlap" 702m_diag[qname].problems.push_back(problemCOH);
703m_diag[qname].problems.push_back(problemCO);
711 NcbiCerr<<
"overlaps_prot_na[seq,feats]: WARNING: get_parent_seqset threw when trying to append misc_feature for " 715 if(report->
q_name_right.find(
"hypothetical")!=string::npos && right_covered_by_left)
717 NcbiCerr<<
"CReadBlastApp::overlaps: WARNING: sequence of a hypothetical protein " 718<<
"["<< qrname <<
"]" 719<<
" is marked for removal because of a complete overlap" 723m_diag[qrname].problems.push_back(problemCOH);
724m_diag[qrname].problems.push_back(problemCO);
732 NcbiCerr<<
"overlaps_prot_na[seq,feats]: WARNING: get_parent_seqset threw when trying to append misc_feature for " 739m_diag[qname].problems.push_back(problemO_l);
740m_diag[qrname].problems.push_back(problemO_r);
747m_diag[qname].problems.push_back(problemO_l);
748m_diag[qrname].problems.push_back(problemO_r);
void GetLabel(string *label) const
static bool PrintDetails(int current_verbosity=m_current_verbosity)
static string GetProtName(const CBioseq &seq)
CConstBeginInfo ConstBegin(void)
void GetLocMap(LocMap &loc_map, const CSeq_annot::C_Data::TFtable &feats)
static void IncreaseVerbosity(void)
bool overlaps_na(const CBioseq::TAnnot &annots)
static void printOverlapReport(distanceReportStr *report, ostream &out=NcbiCout)
int find_overlap(TSimpleSeqs::iterator &seq, const TSimpleSeqs::iterator &ext_rna, TSimpleSeqs &seqs, int &overlap)
bool complete_overlap(const CSeq_loc &l1, const CSeq_loc &l2)
static int m_rna_overlapThreshold
static void DecreaseVerbosity(void)
int overlaps(const TSimpleSeqs::iterator &seq1, const TSimpleSeqs::iterator &seq2, int &overlap)
static const CSeq_loc & getGenomicLocation(const CBioseq &seq)
bool overlaps_prot_na(CBioseq &seq, const CBioseq::TAnnot &annots)
static void getFromTo(const CSeq_loc &loc, TSeqPos &from, TSeqPos &to, ENa_strand &strand)
bool match_na(const CSeq_feat &f1, const string &type1)
void append_misc_feature(CBioseq_set::TSeq_set &seqs, const string &name, EProblem problem_type)
namespace ncbi::objects::
Template class for iteration on objects of class C (non-medifiable version)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
@ eOverlap
CSeq_locs overlap.
EType
type of RNA feature
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
const TGene & GetGene(void) const
Get the variant data.
ENa_strand
strand of nucleic acid
bool IsInt(void) const
Check if variant Int is selected.
list< CRef< CSeq_entry > > TSeq_set
list< CRef< CSeq_feat > > TFtable
list< CRef< CSeq_annot > > TAnnot
unsigned int
A callback function used to compare two keys in a database.
@ eMyFeatureType_normal_tRNA
@ eMyFeatureType_atypical_tRNA
@ eMyFeatureType_pseudo_tRNA
@ eMyFeatureType_hypo_CDS
string get_trna_string(const CSeq_feat &feat)
string GetStringDescr(const CBioseq &bioseq)
string Get3type(const CRNA_ref &rna)
EMyFeatureType get_my_feat_type(const CSeq_feat &feat, const LocMap &loc_map)
string get_title(const CBioseq &seq)
EMyFeatureType get_my_seq_type(const CBioseq &seq)
string GetLocusTag(const CSeq_feat &f, const LocMap &loc_map)
CBioseq_set::TSeq_set * get_parent_seqset(const CBioseq &seq)
string GetRNAname(const CSeq_feat &feat)
list< TSimpleSeq > TSimpleSeqs
string GetRRNAtype(const CRNA_ref &rna)
string printed_range(const TSeqPos from2, const TSeqPos to2)
CRef< const CSeq_loc > loc1
CRef< const CSeq_loc > loc2
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4