seq_align.
GetId().back()->GetIdType(
id);
84 boolis_protein =
false;
90 boolis_product_reversed =
false;
93is_product_reversed =
true;
108 boolprev_3_prime_splice =
false;
110 inttarget_len = product_len;
112vector<TSignedSeqRange> transcript_exons;
126 if((*i)->IsSetType() && (*i)->GetType().IsStr()) {
127 string type= (*i)->GetType().GetStr();
128 if(
type==
"RNASeq-Counts") {
130 if(*j && (*j)->CanGetLabel() && (*j)->GetLabel().IsStr()) {
131 string label= (*j)->GetLabel().GetStr();
133 count+= (*j)->GetData().GetInt();
136}
else if(
type==
"MismatchedBases") {
137mismatches = (*i)->GetData().front()->GetData().GetStr();
138}
else if(
type==
"MismatchedBasesStatus") {
139mismstatus = (*i)->GetData().front()->GetData().GetStr();
145 if(Strand() ==
eMinus) {
146reverse(mismatches.begin(),mismatches.end());
147reverse(mismstatus.begin(),mismstatus.end());
152 boolggap_model=
false;
159 if(is_product_reversed) {
160 int tmp= prod_cur_start;
161prod_cur_start = product_len - prod_cur_end -1;
162prod_cur_end = product_len -
tmp-1;
170 if(prod_prev+1 != prod_cur_start || !prev_3_prime_splice || !cur_5_prime_splice) {
172 if(!mismatches.empty())
173mismatches = mismatches.substr(prod_cur_start - prod_prev -1);
186 if(Strand() ==
eMinus) {
194 if((*it)->CanGetId() && (*it)->GetId().IsStr()) {
195 if((*it)->GetId().GetStr() ==
"idty") {
196eident = (*it)->GetValue().GetReal();
213 stringfill_seq = transcript.substr(nuc_cur_start, nuc_cur_end-nuc_cur_start+1);
225AddGgapExon(eident, fill_seq, fill_src, Strand() ==
eMinus);
232 _ASSERT(transcript_exons.back().NotEmpty());
240 if(mismstatus[0] ==
'n')
242 else if(mismstatus[0] ==
'c')
244mismstatus = mismstatus.substr(1);
249 if(!mismatches.empty()) {
250v = mismatches.substr(0,product_ins);
251mismatches = mismatches.substr(product_ins);
254reverse(v.begin(),v.end());
256 if(Strand() ==
ePlus)
257indels.push_back(fs);
259indels.insert(indels.begin(), fs);
262 if(Strand()==
ePlus)
267}
else if(chunk.
IsMatch()) {
271 stringv(mismatch_len,
'N');
272 if(!mismatches.empty()) {
273 _ASSERT(mismatch_len <= (
int)mismatches.length());
274v = mismatches.substr(0,mismatch_len);
275mismatches = mismatches.substr(mismatch_len);
277 if(Strand() ==
ePlus) {
280reverse(v.begin(),v.end());
288 if(Strand() ==
ePlus)
289indels.back().SetStatus(indelstatus);
291indels.front().SetStatus(indelstatus);
295prod_prev = prod_cur_end;
298 _ASSERT(mismatches.empty() || (product_len - prod_prev - 1 == (
int)mismatches.length()));
300 sort(transcript_exons.begin(),transcript_exons.end());
301 boolminusstrand = Strand() ==
eMinus;
303 if(orientation ==
eMinus)
304reverse(transcript_exons.begin(),transcript_exons.end());
307 _ASSERT(orientation == Strand());
310 if((*m)->IsStop_codon_found()) {
312 if(Strand() ==
ePlus) {
314 _ASSERT((transcript_exons.back().GetTo()+1)%3 == 0);
315transcript_exons.back().SetTo(transcript_exons.back().GetTo()+3);
318 _ASSERT((transcript_exons.front().GetTo()+1)%3 == 0);
319transcript_exons.front().SetTo(transcript_exons.front().GetTo()+3);
327 boolkeepdoing =
true;
331TInDels::iterator indl_next = indl;
332 if(++indl_next == indels.end())
335 if(indl->InDelEnd() == indl_next->Loc()) {
336 stringnew_seq = indl->GetInDelV()+indl_next->GetInDelV();
338 if(indl->GetType() == indl_next->GetType()) {
339*indl =
CInDelInfo(indl->Loc(), indl->Len()+indl_next->Len(), indl->GetType(), new_seq);
340indels.erase(indl_next);
342}
else if(!indl->IsMismatch() && !indl_next->IsMismatch()) {
343 if(indl->Len() == indl_next->Len()) {
345indels.erase(indl_next);
346}
else if(indl->Len() < indl_next->Len()) {
348*indl_next =
CInDelInfo(indl->InDelEnd(), indl_next->Len()-indl->Len(), indl_next->GetType(), new_seq.substr(indl->Len()));
350*indl =
CInDelInfo(indl->Loc(), indl->Len()-indl_next->Len(), indl->GetType(), new_seq.substr(0,new_seq_len-indl_next->Len()));
360 if(!indels.empty()) {
361 auto& indl = indels.front();
362 auto& e = Exons().front();
363 if(!indl.IsMismatch() && indl.Loc() == e.GetFrom()) {
364 if(indl.IsInsertion()) {
365e.AddFrom(indl.Len());
368 auto&
t= transcript_exons.front();
369 if(orientation ==
ePlus)
370 t.SetFrom(
t.GetFrom()+indl.Len());
372 t.SetTo(
t.GetTo()-indl.Len());
374indels.erase(indels.begin());
377 if(!indels.empty()) {
378 auto& indl = indels.back();
379 auto& e = Exons().back();
380 if(!indl.IsMismatch() && indl.InDelEnd() == e.GetTo()+1) {
381 if(indl.IsInsertion()) {
382e.AddTo(-indl.Len());
385 auto&
t= transcript_exons.back();
386 if(orientation ==
ePlus)
387 t.SetTo(
t.GetTo()-indl.Len());
389 t.SetFrom(
t.GetFrom()+indl.Len());
394Exons().front().m_fsplice_sig.clear();
395Exons().back().m_ssplice_sig.clear();
397m_alignmap =
CAlignMap(Exons(), transcript_exons, indels, orientation, target_len );
398FrameShifts() = indels;
407 for(CGeneModel::TExons::const_iterator piece_begin = Exons().begin(); piece_begin != Exons().end(); ++piece_begin) {
408 _ASSERT( !piece_begin->m_fsplice );
410 if(piece_begin->Limits().Empty()) {
411 _ASSERT(piece_begin->m_ssplice);
413 _ASSERT(piece_begin->Limits().NotEmpty());
416CGeneModel::TExons::const_iterator piece_end;
417 for(piece_end = piece_begin; piece_end != Exons().end() && piece_end->m_ssplice; ++piece_end) ;
418 _ASSERT( piece_end != Exons().end() );
420CGeneModel::TExons::const_iterator piece_end_g = piece_end;
421 if(piece_end_g->Limits().Empty()) {
422 _ASSERT(piece_end_g->m_fsplice);
424 _ASSERT(piece_end_g->Limits().NotEmpty());
427 TSignedSeqRangepiece_range(piece_begin->GetFrom(),piece_end_g->GetTo());
429piece_range = m_alignmap.ShrinkToRealPoints(piece_range, is_protein);
446 _ASSERT(piece_range.NotEmpty());
447 _ASSERT(piece_range.IntersectingWith(piece_begin->Limits()) && piece_range.IntersectingWith(piece_end_g->Limits()));
449 if(piece_range.GetFrom() != piece_begin->GetFrom() || piece_range.GetTo() != piece_end_g->GetTo()) {
454piece_begin = piece_end;
458 TSignedSeqRangereading_frame = m_alignmap.MapRangeOrigToEdited(Limits(),
true);
462 if((*m)->IsStart_codon_found()) {
465}
else if((*m)->IsStop_codon_found()) {
473cds_info_t.SetReadingFrame(reading_frame,
true);
475cds_info_t.SetStart(start,
false);
478cds_info_t.SetStop(stop,
false);
482SetCdsInfo(cds_info_g);
484SetCdsInfo(cds_info_t);
494 if((*it)->CanGetId() && (*it)->GetId().IsStr()) {
495 stringscr = (*it)->GetId().GetStr();
496 if((scr ==
"N of matches") || (scr ==
"num_ident") || (scr ==
"matches")) {
497 doubleident = (*it)->GetValue().GetInt();
500}
else if(scr ==
"rank"&& (*it)->GetValue().GetInt() == 1) {
502}
else if(scr ==
"ambiguous_orientation") {
504}
else if(scr ==
"count") {
505 _ASSERT(Weight() == 1 || Weight() == (*it)->GetValue().GetInt());
506SetWeight((*it)->GetValue().GetInt());
530 stringcds_seq(cds_len,
'A');
531 copy(mrna.begin()+cds_start, mrna.begin()+cds_start+cds_len, cds_seq.begin());
554prot_seq[(stp->GetFrom()- cds_info.
Cds().
GetFrom())/3] =
'U';
567 if(prot_seq[0] ==
'-') {
568 stringfirst_triplet = cds_seq.substr(0, 3);
571prot_seq = first_aa+prot_seq.substr(1);
582prot_seq[(stp->GetFrom()- cds_info.
Cds().
GetFrom())/3] =
'U';
594 CRef<CSeq_loc>s_ExonDataToLoc(
constvector<TSignedSeqRange>& vec,
600 ITERATE(vector<TSignedSeqRange>, iter, vec) {
602ival->SetFrom(iter->GetFrom());
603ival->SetTo(iter->GetTo());
604ival->SetStrand(strand);
605ival->SetId().Assign(
id);
607 data.push_back(ival);
610 if(
data.size() == 1) {
611loc->SetInt(*
data.front());
613loc->SetPacked_int().Set().swap(
data);
627annot->SetNameDesc(
"Gnomon gene scan output");
631 unsigned intcounter = 0;
632 stringlocus_tag_base(
"GNOMON_");
635 intstrand = igene.
Strand();
638vector<TSignedSeqRange> mrna_vec;
639 copy(igene.
Exons().begin(), igene.
Exons().end(), back_inserter(mrna_vec));
640vector<TSignedSeqRange> cds_vec;
642 for(
size_tj = 0; j < mrna_vec.size(); ++j) {
644 if(!intersect.
Empty()) {
645cds_vec.push_back(intersect);
651 if(strand ==
ePlus) {
652 _ASSERT(cds_vec.back().GetLength()>=3);
653cds_vec.back().SetTo(cds_vec.back().GetTo() - 3);
655 _ASSERT(cds_vec.front().GetLength()>=3);
656cds_vec.front().SetFrom(cds_vec.front().GetFrom() + 3);
663 if(mrna_vec.size()) {
667(*s_ExonDataToLoc(mrna_vec,
676 if(!cds_vec.empty()) {
681(*s_ExonDataToLoc(cds_vec,
690snprintf(
buf, 32,
"%04u", ++counter);
691 stringname(locus_tag_base);
693feat_gene->
SetData().SetGene().SetLocus_tag(name);
704 ftable.push_back(feat_gene);
705 ftable.push_back(feat_mrna);
707 ftable.push_back(feat_cds);
731 unsigned intgc_count = 0;
733 for( ; xcript_iter; ++xcript_iter) {
734 if(*xcript_iter ==
'G'|| *xcript_iter ==
'C') {
738*gccontent =
static_cast<unsigned int>(100.0 * gc_count / xcript_vec.
size() + 0.5);
745vec.SetIupacCoding();
747seq.reserve(vec.size());
749 for( ; iter; ++iter) {
756 for(
unsigned int i= 5;
i< seq.size() - 3; ++
i)
757score += cdr.
Score(seq,
i,
i% 3) - ncdr.
Score(seq,
i);
767 inttotallen = xcript_vec.
size();
770 intextrabases = start.
Left()+2;
773 if(startposition < extrabases) {
775extraNs5p = extrabases-startposition;
777left = startposition-extrabases;
779right =
min(startposition+2+start.
Right(),totallen-1);
782 if(startposition+extrabases >= totallen) {
784extraNs5p = startposition+extrabases-totallen+1;
786right = startposition+extrabases;
788left =
max(0,startposition-2-start.
Right());
796 id->Assign(*seq_id);
804sttseq.resize(extraNs5p,
enN);
805 for(
unsigned int i= 0;
i< sttvec.
size(); ++
i) {
806sttseq.push_back(
fromACGT(sttvec[
i]));
810*startscore = start.
Score(sttseq, extrabases+2);
812 for(
unsigned int i= 5;
i< sttseq.size(); ++
i) {
813*startscore -= ncdr.
Score(sttseq,
i);
User-defined methods of the data storage class.
void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const
CCDSInfo MapFromEditedToOrig(const CAlignMap &amap) const
CCDSInfo MapFromOrigToEdited(const CAlignMap &amap) const
bool IsMappedToGenome() const
TSignedSeqRange Cds() const
TSignedSeqRange ReadingFrame() const
const TPStops & PStops() const
static double GetScore(CConstRef< CHMMParameters > hmm_params, const objects::CSeq_loc &cds, objects::CScope &scope, int *const gccontent, double *const startscore=0)
string GetProtein(const CResidueVec &contig_sequence) const
TSignedSeqRange TranscriptLimits() const
const TExons & Exons() const
TSignedSeqRange ReadingFrame() const
virtual CAlignMap GetAlignMap() const
TSignedSeqRange RealCdsLimits() const
string GetCdsDnaSequence(const CResidueVec &contig_sequence) const
const CCDSInfo & GetCdsInfo() const
bool PStop(bool includeall=true) const
list< CGeneModel > GetGenes() const
CRef< objects::CSeq_annot > GetAnnot(const objects::CSeq_id &id)
const CInputModel & GetParameter(const string &type, int cgcontent) const
static string ToString(const CSeq_id &id)
double Score(const CEResidueVec &seq, int i, int codonshift) const
double Score(const CEResidueVec &seq, int i) const
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
namespace ncbi::objects::
double Score(const CEResidueVec &seq, int i) const
bool Empty(const CNcbiOstrstream &src)
vector< TResidue > CResidueVec
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
list< CGeneModel > TGeneModelList
vector< CInDelInfo > TInDels
USING_SCOPE(ncbi::objects)
Int8 GetModelId(const CSeq_align &seq_align)
EResidue fromACGT(TResidue c)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
ENa_strand GetStrand(void) const
Get the location's strand.
TRange GetTotalRange(void) const
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
int64_t Int8
8-byte (64-bit) signed integer
position_type GetLength(void) const
bool NotEmpty(void) const
CRange< TSignedSeqPos > TSignedSeqRange
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static const char label[]
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
vector< CRef< CUser_field > > TData
const TDonor_after_exon & GetDonor_after_exon(void) const
Get the Donor_after_exon member data.
const TId & GetId(void) const
Get the Id member data.
bool CanGetProduct_length(void) const
Check if it is safe to call GetProduct_length method.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool CanGetAcceptor_before_exon(void) const
Check if it is safe to call GetAcceptor_before_exon method.
bool CanGetBases(void) const
Check if it is safe to call GetBases method.
bool CanGetGenomic_id(void) const
Check if it is safe to call GetGenomic_id method.
vector< CRef< CScore > > TScore
TMatch GetMatch(void) const
Get the variant data.
list< CRef< CScore > > Tdata
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TAcceptor_before_exon & GetAcceptor_before_exon(void) const
Get the Acceptor_before_exon member data.
bool CanGetGenomic_id(void) const
Check if it is safe to call GetGenomic_id method.
bool IsMismatch(void) const
Check if variant Mismatch is selected.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool CanGetProduct_type(void) const
Check if it is safe to call GetProduct_type method.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
bool CanGetModifiers(void) const
Check if it is safe to call GetModifiers method.
TDiag GetDiag(void) const
Get the variant data.
bool CanGetScore(void) const
Check if it is safe to call GetScore method.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
list< CRef< CUser_object > > TExt
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
list< CRef< CSpliced_seg_modifier > > TModifiers
bool IsGenomic_ins(void) const
Check if variant Genomic_ins is selected.
bool IsMatch(void) const
Check if variant Match is selected.
bool CanGetGenomic_strand(void) const
Check if it is safe to call GetGenomic_strand method.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
const TScores & GetScores(void) const
Get the Scores member data.
bool CanGetExt(void) const
Check if it is safe to call GetExt method.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
bool CanGetScores(void) const
Check if it is safe to call GetScores method.
const TExt & GetExt(void) const
Get the Ext member data.
const TBases & GetBases(void) const
Get the Bases member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool CanGetDonor_after_exon(void) const
Check if it is safe to call GetDonor_after_exon method.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
const Tdata & Get(void) const
Get the member data.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
bool CanGet(void) const
Check if it is safe to call Get method.
bool IsProduct_ins(void) const
Check if variant Product_ins is selected.
const TScore & GetScore(void) const
Get the Score member data.
const TModifiers & GetModifiers(void) const
Get the Modifiers member data.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool CanGetId(void) const
Check if it is safe to call GetId method.
void SetLocation(TLocation &value)
Assign a value to Location data member.
const TLocation & GetLocation(void) const
Get the Location member data.
void SetData(TData &value)
Assign a value to Data data member.
void SetTo(TTo value)
Assign a value to To data member.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
void SetStrand(TStrand value)
Assign a value to Strand data member.
list< CRef< CSeq_feat > > TFtable
string GetDNASequence(CConstRef< objects::CSeq_id > id, CScope &scope)
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4