*
const k_aa_table=
"KNKNXTTTTTRSRSXIIMIXXXXXXQHQHXPPPPPRRRRRLLLLLXXXXXEDEDXAAAAAGGGGGVVVVVXXXXX*Y*YXSSSSS*CWCXLFLFXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX";
45 size_t len= src.size();
48 for(
size_t i= 0;
i<
len; ++
i)
60 size_t len= src.size();
63 for(
size_t i= 0;
i<
len; ++
i)
64dst.push_back(
toACGT(src[
i]) );
89 template<
typenameRes>
107 template<
classRes>
110 constRes * start_codon;
111 if(strand ==
ePlus)
117 returnequal(start_codon,start_codon+3,seq);
123 template<
classRes>
126 if(strand ==
ePlus) {
130 for(
int i= 1;
i<= 3; ++
i)
138 for(
int i= 1;
i<= 3; ++
i)
150 for(CEResidueVec::const_iterator pos = mrna.begin()+search_region.
GetFrom(); (pos = search(pos,mrna.end(),codon,codon+3)) < mrna.begin()+search_region.
GetTo(); ++pos) {
151 int l= (
int)(pos-mrna.begin());
153 if(fixed_frame==-1 || fixed_frame==frame)
154positions[frame].push_back(
l);
165 for(
int i=1;
i<=3; ++
i)
167 for(
int f= 0;
f< 3; ++
f)
168 sort(stops[
f].begin(), stops[
f].end());
176 intcodon_start = start+frame-3;
177 if(codon_start >= 0 &&
IsStopCodon(&seq_strand[codon_start]))
185 intleft_cds_limit = -1;
186 intreading_frame_start = (
int)mrna.size();
187 intreading_frame_stop = (
int)mrna.size();
188 intright_cds_limit = (
int)mrna.size();
204reading_frame_start = rf.
GetFrom();
205 _ASSERT(reading_frame_start >= 0);
206reading_frame_stop = rf.
GetTo();
207 _ASSERT(reading_frame_stop >= 0);
209 if(reading_frame_start == 0 &&
IsStartCodon(&mrna[reading_frame_start]) && reading_frame_start+3 < reading_frame_stop)
210reading_frame_start += 3;
212 _ASSERT( -1 <= left_cds_limit && left_cds_limit <= reading_frame_start );
213 _ASSERT( 0 <= reading_frame_start && reading_frame_start <= reading_frame_stop && reading_frame_stop <
int(mrna.size()) );
214 _ASSERT( reading_frame_stop <= right_cds_limit && right_cds_limit <=
int(mrna.size()) );
216frame = reading_frame_start%3;
218 if(left_cds_limit<0) {
219 if(reading_frame_start >= 3) {
223 if(stops[frame].
size()>0)
224left_cds_limit = stops[frame].back()+3;
229reading_frame_start = reading_frame_stop-5;
232reading_frame_start =
min(protrf.
GetFrom(),reading_frame_start);
236 if(left_cds_limit<0) {
240starts[0].push_back(-3);
243model_start = (
TSignedSeqPos)contig_seq.size()-1-model_start;
244 for(
int i= 0;
i<3; ++
i) {
245 if(frame == -1 || frame ==
i) {
248stops[
i].push_back(
i-3);
250starts[
i].push_back(
i-3);
266 if(obeystart && model.
HasStart()) {
268starts[frame].push_back(start.
GetFrom());
269}
else if(reading_frame_start-left_cds_limit >= 3) {
275}
else if(right_cds_limit - reading_frame_stop >= 3) {
279 if(
int(mrna.size()) <= right_cds_limit) {
280stops[mrna.size()%3].push_back((
int)mrna.size());
281stops[(mrna.size()-1)%3].push_back((
int)mrna.size()-1);
282stops[(mrna.size()-2)%3].push_back((
int)mrna.size()-2);
289vector<int>::const_iterator it_stop = lower_bound(stops.begin(),stops.end(),start);
291 if(it_stop != stops.begin()) {
299 autoit_start = lower_bound(starts.begin(), starts.end(), stop);
300 if(it_start != starts.end()) {
327 if(!fs_only ||
len%3 != 0)
407 EEdgeTypeleft_type,
EEdgeTyperight_type,
const string& left_edit_extra_seq,
const string& right_edit_extra_seq)
417 _ASSERT((
int)left_edit_extra_seq.length() == 0 || (
int)left_edit_extra_seq.length() == left_edite);
419 _ASSERT((
int)right_edit_extra_seq.length() == 0 || (
int)right_edit_extra_seq.length() == right_edite);
424TInDels::const_iterator fsi_end,
EEdgeTypetype_a,
EEdgeTypetype_b,
const string& gseq_a,
const string& gseq_b)
426TInDels::const_iterator fsi = fsi_begin;
427 for( ;fsi != fsi_end && fsi->Loc() < orig_a; ++fsi ) {
428 _ASSERT( !fsi->IntersectingWith(orig_a,orig_b) );
433 stringleft_edit_extra_seq = gseq_a;
436 for( ;fsi != fsi_end && fsi->Loc() == orig_a && !fsi->IsMismatch(); ++fsi ) {
437 if(fsi->IsInsertion()) {
438orig_a += fsi->Len();
439left_orige += fsi->Len();
441edit_a += fsi->Len();
442left_edite += fsi->Len();
443left_edit_extra_seq += fsi->GetInDelV();
446 for( ; fsi != fsi_end && fsi->IsMismatch() && fsi->Loc() == orig_a+(
int)mism.size(); ++fsi)
447mism += fsi->GetInDelV();
449 while(fsi != fsi_end && fsi->InDelEnd() <= orig_b+1) {
456 stringright_edit_extra_seq;
457 for( ;fsi != fsi_end && fsi->Loc() ==
bb&& !fsi->IsMismatch(); ++fsi ) {
458 if(fsi->IsInsertion()) {
459right_orige += fsi->Len();
462right_edite += fsi->Len();
463right_edit_extra_seq += fsi->GetInDelV();
469 if(next_orig_a > orig_b) {
470right_edit_extra_seq += gseq_b;
471right_edite += gseq_b.length();
474 InsertOneToOneRange(orig_a, edit_a,
len, mism, left_orige, left_edite, right_orige, right_edite, type_a, tb, left_edit_extra_seq, right_edit_extra_seq);
476orig_a = next_orig_a;
477edit_a +=
len+right_edite;
479left_orige = right_orige;
480left_edite = right_edite;
481left_edit_extra_seq = right_edit_extra_seq;
483 for( ; fsi != fsi_end && fsi->IsMismatch() && fsi->Loc() == orig_a+(
int)mism.size(); ++fsi)
484mism += fsi->GetInDelV();
489 stringright_edit_extra_seq;
491 if(orig_a+
len> orig_b) {
492right_edit_extra_seq = gseq_b;
495 InsertOneToOneRange(orig_a, edit_a,
len, mism, left_orige, left_edite, 0, (
TSignedSeqPos)gseq_b.length(), type_a, tb, left_edit_extra_seq, right_edit_extra_seq);
501left_edit_extra_seq.clear();
505 if(orig_a <= orig_b) {
506 int len= orig_b-orig_a+1;
508 InsertOneToOneRange(orig_a, edit_a,
len, mism, left_orige, left_edite, 0, (
TSignedSeqPos)gseq_b.length(), type_a, type_b, left_edit_extra_seq, gseq_b);
518 _ASSERT(transcript_exons.size() == exons.size());
519 _ASSERT(transcript_exons.size() == 1 || (orientation ==
ePlus&& transcript_exons.front().GetFrom() < transcript_exons.back().GetFrom()) ||
520(orientation ==
eMinus&& transcript_exons.front().GetFrom() > transcript_exons.back().GetFrom()));
522 for(
unsigned int i= 0;
i< exons.size(); ++
i) {
524diff += exonlen-(transcript_exons[
i].GetTo()-transcript_exons[
i].GetFrom()+1);
527 if(!
f->IsMismatch())
528diff += (
f->IsDeletion()) ?
f->Len() : -
f->Len();
537 for(
unsigned int i= 0;
i< exons.size(); ++
i) {
538 if(exons[
i].Limits().Empty()) {
539 _ASSERT(
i== 0 || exons[
i-1].Limits().NotEmpty());
540 _ASSERT(
i== exons.size()-1 || exons[
i+1].Limits().NotEmpty());
546 if(
i> 0 && exons[
i-1].Limits().
Empty()) {
548gseq_a = exons[
i-1].m_seq;
549estart += gseq_a.length();
551 if(
i< exons.size()-1 && exons[
i+1].Limits().Empty()) {
553gseq_b = exons[
i+1].m_seq;
559estart =
InsertIndelRangesForInterval(exons[
i].GetFrom(), exons[
i].GetTo(), estart, indels.begin(), indels.end(), type_a, type_b, gseq_a, gseq_b);
562 if(
i!= exons.size()-1) {
564estart += transcript_exons[
i+1].GetFrom()-transcript_exons[
i].GetTo()-1;
566estart += transcript_exons[
i].GetFrom()-transcript_exons[
i+1].GetTo()-1;
574TInDels::const_iterator fsi_begin = indels.begin();
575TInDels::const_iterator fsi_end = indels.end();
581 for(
unsigned int i= 0;
i< exons.size(); ++
i) {
582 if(exons[
i].Limits().
Empty()) {
583 _ASSERT(
i== 0 || exons[
i-1].Limits().NotEmpty());
584 _ASSERT(
i== exons.size()-1 || exons[
i+1].Limits().NotEmpty());
592 if(
i> 0 && exons[
i-1].Limits().
Empty()) {
594gseq_a = exons[
i-1].m_seq;
595estart += gseq_a.length();
597 if(
i< exons.size()-1 && exons[
i+1].Limits().Empty()) {
599gseq_b = exons[
i+1].m_seq;
606 if(stop < lim.
GetFrom())
continue;
607 if(lim.
GetTo() < start)
break;
609 if(lim.
GetFrom() >= start) {
613 if(lim.
GetTo() <= stop) {
614stop = lim.
GetTo();
619 if(
i!= exons.size()-1 && (!exons[
i+1].m_fsplice || !exons[
i].m_ssplice))
630 template<
classIn,
classOut>
633edited_sequence.clear();
638s.insert(s.end(),
l,
'N');
651 for(
int i=
a;
i<
b; ++
i)
652edited_sequence.push_back(original_sequence[
i]);
660seq.insert(seq.end(),
l,
'N');
670seq.insert(seq.end(),
l,
'N');
689 voidCAlignMap::EditedSequence<CResidueVec,CResidueVec>(
const CResidueVec& original_sequence,
CResidueVec& edited_sequence,
boolincludeholes)
const;
691 voidCAlignMap::EditedSequence<CEResidueVec,CEResidueVec>(
const CEResidueVec& original_sequence,
CEResidueVec& edited_sequence,
boolincludeholes)
const;
693 voidCAlignMap::EditedSequence<string,string>(
const string& original_sequence,
string& edited_sequence,
boolincludeholes)
const;
695 voidCAlignMap::EditedSequence<CAlignCollapser::CPartialString,string>(
const CAlignCollapser::CPartialString& original_sequence,
string& edited_sequence,
boolincludeholes)
const;
776 if(snap_to_codons) {
777 boolsnapped =
false;
804 if(snap_to_codons) {
805 boolsnapped =
false;
842 if(p <
a.front().GetExtendedFrom() || p >
a.back().GetExtendedTo())
return-1;
844 if(p <
a.front().GetFrom()) {
845 if(move_mode ==
eLeftEnd&&
b.front().GetTypeFrom() !=
eGgap) {
846 return b.front().GetExtendedFrom();
852 if(p >
a.back().GetTo()) {
854 return b.back().GetExtendedTo();
862 if(p >
a[num].GetTo()) {
863 if(
a[num].GetTypeTo() ==
eGgap)
868 return b[num+1].GetExtendedFrom();
870 return b[num].GetExtendedTo();
874}
else if(p ==
a[num].GetTo()) {
876 return b[num].GetExtendedTo();
877}
else if(p ==
a[num].GetFrom() && move_mode ==
eLeftEnd&&
b[num].GetTypeFrom() !=
eGgap) {
878 return b[num].GetExtendedFrom();
880 return b[num].GetTo();
882}
else if(p ==
a[num].GetFrom()) {
884 return b[num].GetExtendedFrom();
886 return b[num].GetFrom();
889 return b[num].GetFrom()+p-
a[num].GetFrom();
TSignedSeqPos FShiftedMove(TSignedSeqPos orig_pos, int len) const
static int FindLowerRange(const vector< CAlignMap::SMapRange > &a, TSignedSeqPos p)
TSignedSeqRange ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const
void InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, TSignedSeqPos len, const string &mism, TSignedSeqPos left_orige, TSignedSeqPos left_edite, TSignedSeqPos right_orige, TSignedSeqPos right_edite, EEdgeType left_type, EEdgeType right_type, const string &left_edit_extra_seq, const string &right_edit_extra_seq)
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend)
vector< SMapRange > m_edited_ranges
TSignedSeqPos InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin, TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string &gseq_a, const string &gseq_b)
TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras=true) const
TSignedSeqPos MapOrigToEdited(TSignedSeqPos orig_pos) const
void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const
TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const
TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons=false) const
int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const
static TSignedSeqPos MapAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqPos p, ERangeEnd move_mode)
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const
vector< SMapRange > m_orig_ranges
TSignedSeqRange Start() const
TSignedSeqRange ProtReadingFrame() const
TSignedSeqRange ReadingFrame() const
const CCDSInfo & GetCdsInfo() const
vector< CModelExon > TExons
static EResidue _fromACGT(TResidue x)
static const EResidue * _rev_codons(int i)
static const EResidue * _codons(int i)
static const Res * _rev_codons(int i)
static const Res * _codons(int i)
static Res _fromACGT(TResidue x)
bool Empty(const CNcbiOstrstream &src)
vector< TResidue > CResidueVec
bool Include(TSignedSeqRange big, TSignedSeqRange small)
vector< CInDelInfo > TInDels
objects::CSeqVectorTypes::TResidue TResidue
bool IsStopCodon(const Res *seq, int strand)
bool FindFirstStart(const vector< int > &starts, int stop, int &start)
bool FindUpstreamStop(const vector< int > &stops, int start, int &stop)
static const EResidue s_ecodons2[3]
const TResidue rev_codons[4][4]
const TResidue codons[4][4]
void FindAllStops(TIVec stops[], const CEResidueVec &mrna, TSignedSeqRange search_region, int fixed_frame)
const char *const k_aa_table
void FindStartsStops(const CGeneModel &model, const CEResidueVec &contig_seq, const CEResidueVec &mrna, const CAlignMap &mrnamap, TIVec starts[3], TIVec stops[3], int &frame, bool obeystart)
const EResidue k_toMinus[5]
void FindAllCodonInstances(TIVec positions[], const EResidue codon[], const CEResidueVec &mrna, TSignedSeqRange search_region, int fixed_frame)
const EResidue * ecodons[4]
static const EResidue s_ecodons0r[3]
static const EResidue s_ecodons1[3]
static const EResidue s_ecodons2r[3]
template bool IsStartCodon< TResidue >(const TResidue *seq, int strand)
const EResidue * rev_ecodons[4]
template bool IsStartCodon< EResidue >(const EResidue *seq, int strand)
void ReverseComplement(const CEResidueVec &src, CEResidueVec &dst)
bool IsStartCodon(const Res *seq, int strand)
template bool IsStopCodon< EResidue >(const EResidue *seq, int strand)
void Convert(const CResidueVec &src, CEResidueVec &dst)
template bool IsStopCodon< TResidue >(const TResidue *seq, int strand)
static const EResidue s_ecodons3r[3]
bool Partial5pCodonIsStop(const CEResidueVec &seq_strand, int start, int frame)
static const EResidue s_ecodons3[3]
void PushInDel(TInDels &indels, bool fs_only, TSignedSeqPos p, int len, CInDelInfo::EType type, const string &seq="")
void FindAllStarts(TIVec starts[], const CEResidueVec &mrna, TSignedSeqRange search_region, int fixed_frame)
static const EResidue s_ecodons0[3]
static const EResidue s_ecodons1r[3]
TResidue toACGT(EResidue c)
EResidue fromACGT(TResidue c)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
position_type GetLength(void) const
bool NotEmpty(void) const
static TThisType GetEmpty(void)
static position_type GetWholeFrom(void)
CRange< TSignedSeqPos > TSignedSeqRange
static position_type GetWholeTo(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
unsigned int
A callback function used to compare two keys in a database.
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void Out(T t, int w, CNcbiOstream &to=cout)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4