pair <ENa_strand, ENa_strand> GetSplicedStrands(
const CSpliced_seg& spliced_seg)
59(spliced_seg.
GetExons().front()->IsSetProduct_strand() ?
60spliced_seg.
GetExons().front()->GetProduct_strand() :
65(spliced_seg.
GetExons().front()->IsSetGenomic_strand()?
66spliced_seg.
GetExons().front()->GetGenomic_strand():
69 returnmake_pair(product_strand, genomic_strand);
83pair <ENa_strand, ENa_strand> strands = GetSplicedStrands(spliced_seg);
87exons.resize(spliced_seg.
GetExons().size());
93 SExon& exon_struct = exons[
i++];
109 boolcross_the_origin =
i> 1 && (
114 if(cross_the_origin && scope) {
145GetExonStructure(spliced_seg, exons, m_scope);
149pair <ENa_strand, ENa_strand> strands = GetSplicedStrands(spliced_seg);
158product_max_pos = spliced_seg.
GetPoly_a()-1;
162product_max_pos = product_max_pos*3+2;
164product_max_pos = exons.back().prod_to;
170product_min_pos = product_min_pos*3-2;
172product_min_pos = exons[0].prod_from;
181CSpliced_seg::TExons::iterator it = spliced_seg.
SetExons().begin();
196 for(++it; it != spliced_seg.
SetExons().end(); ++
i, prev_exon = *it++) {
199 booldonor_set = prev_exon->IsSetDonor_after_exon() || (genomic_strand ==
eNa_strand_minus&& prev_exon->GetGenomic_start()==0);
202 if(donor_set && acceptor_set && exons[
i-1].prod_to + 1 == exons[
i].prod_from) {
206 _ASSERT( exons[
i].prod_from > exons[
i-1].prod_to );
207 intprod_hole_len = exons[
i].prod_from - exons[
i-1].prod_to -1;
208 _ASSERT( exons[
i].genomic_from > exons[
i-1].genomic_to );
209 intgenomic_hole_len = exons[
i].genomic_from - exons[
i-1].genomic_to -1;
211 if(((m_intron_stitch_threshold_flags &
fProduct) &&
212prod_hole_len >= (
int)m_min_intron) ||
213((m_intron_stitch_threshold_flags &
fGenomic) &&
214genomic_hole_len >= (
int)m_min_intron))
217 if(!prev_exon->IsSetParts() || prev_exon->GetParts().empty()) {
219part->SetMatch(exons[
i-1].prod_to-exons[
i-1].prod_from+1);
220prev_exon->SetParts().push_back(part);
224part->SetMatch(exons[
i].prod_to-exons[
i].prod_from+1);
228 intmax_hole_len =
max(prod_hole_len, genomic_hole_len);
229 intmin_hole_len =
min(prod_hole_len, genomic_hole_len);
230 intleft_mismatch_len = 0;
231 intright_mismatch_len = min_hole_len;
232 if(prod_hole_len != genomic_hole_len && mapper_to_cds) {
233 CSeq_locend_pos(*transcript_id, exons[
i-1].prod_to);
234 TSeqPosend_pos_on_cds = mapper_to_cds->
Map(end_pos)
236 intbases_needed_to_complete_codon = 2 - (end_pos_on_cds % 3);
238 if(right_mismatch_len >= bases_needed_to_complete_codon) {
239left_mismatch_len = bases_needed_to_complete_codon + ((right_mismatch_len-bases_needed_to_complete_codon)/2/3)*3;
240right_mismatch_len -= left_mismatch_len;
244 boolno_acceptor_before =
i> 1 && !prev_exon->IsSetAcceptor_before_exon();
248 boolcross_the_origin =
253 if(cross_the_origin) {
254 intgenomic_size = m_scope->GetSequenceLength(spliced_seg.
GetGenomic_id());
256prev_exon->SetPartial(product_min_pos < exons[
i-1].prod_from &&
259exon.
SetPartial(exons[
i].prod_to < product_max_pos &&
263prev_exon->SetGenomic_end(genomic_size-1);
266prev_exon->SetGenomic_start(0);
271 intto_origin =
origin- exons[
i-1].genomic_to -1;
272 if(prod_hole_len == genomic_hole_len) {
273left_mismatch_len = to_origin;
274right_mismatch_len -= left_mismatch_len;
277 if(left_mismatch_len > 0 && to_origin > 0) {
278 intmismatch_len =
min(left_mismatch_len, to_origin);
280part->SetMismatch(mismatch_len);
281prev_exon->SetParts().push_back(part);
282prod_hole_len -= mismatch_len;
283genomic_hole_len -= mismatch_len;
284to_origin -= mismatch_len;
285exons[
i-1].genomic_to += mismatch_len;
286exons[
i-1].prod_to += mismatch_len;
287left_mismatch_len -= mismatch_len;
291 _ASSERT(left_mismatch_len == 0);
292 _ASSERT(prod_hole_len != genomic_hole_len);
294 if(prod_hole_len < genomic_hole_len) {
295 intgenomic_ins =
min(genomic_hole_len-prod_hole_len, to_origin);
296part->SetGenomic_ins(genomic_ins);
297genomic_hole_len -= genomic_ins;
298to_origin -= genomic_ins;
299exons[
i-1].genomic_to += genomic_ins;
301part->SetProduct_ins(prod_hole_len-genomic_hole_len);
302exons[
i-1].prod_to += prod_hole_len-genomic_hole_len;
303prod_hole_len = genomic_hole_len;
305prev_exon->SetParts().push_back(part);
308 _ASSERT(prod_hole_len == genomic_hole_len);
309 _ASSERT(right_mismatch_len >= to_origin);
310 intmismatch_len = to_origin;
312part->SetMismatch(mismatch_len);
313prev_exon->SetParts().push_back(part);
314prod_hole_len -= mismatch_len;
315genomic_hole_len -= mismatch_len;
317exons[
i-1].genomic_to += mismatch_len;
318exons[
i-1].prod_to += mismatch_len;
319right_mismatch_len -= mismatch_len;
325exons[
i].prod_from = exons[
i-1].prod_to+1;
326exons[
i].genomic_from = exons[
i-1].genomic_to+1;
329prev_exon->SetProduct_end().SetProtpos().SetAmin() = exons[
i-1].prod_to/3;
330prev_exon->SetProduct_end().SetProtpos().SetFrame() = (exons[
i-1].prod_to %3) +1;
332exon.
SetProduct_start().SetProtpos().SetFrame() = (exons[
i].prod_from %3) +1;
334prev_exon->SetProduct_end().SetNucpos( exons[
i-1].prod_to );
337prev_exon->SetProduct_start().SetNucpos( -exons[
i-1].prod_to );
341list <CRef< CSpliced_exon_chunk > >::iterator insertion_point = exon.
SetParts().begin();
343 if(left_mismatch_len > 0) {
345part->SetMismatch(left_mismatch_len);
346insertion_point = exon.
SetParts().insert(insertion_point, part);
349 if(prod_hole_len != genomic_hole_len) {
351 if(prod_hole_len < genomic_hole_len) {
352part->SetGenomic_ins(genomic_hole_len - prod_hole_len);
354part->SetProduct_ins(prod_hole_len - genomic_hole_len);
356insertion_point = exon.
SetParts().insert(insertion_point, part);
359 if(right_mismatch_len > 0) {
361part->SetMismatch(right_mismatch_len);
362exon.
SetParts().insert(insertion_point, part);
380 if(left_mismatch_len > 0) {
382part->SetMismatch(left_mismatch_len);
383prev_exon->SetParts().push_back(part);
385 if(prod_hole_len != genomic_hole_len) {
387 if(prod_hole_len < genomic_hole_len) {
388part->SetGenomic_ins(max_hole_len - min_hole_len);
390part->SetProduct_ins(max_hole_len - min_hole_len);
392prev_exon->SetParts().push_back(part);
394 if(right_mismatch_len > 0) {
396part->SetMismatch(right_mismatch_len);
397prev_exon->SetParts().push_back(part);
400prev_exon->SetParts().splice(prev_exon->SetParts().end(), exon.
SetParts());
405prev_exon->ResetDonor_after_exon();
408exons[
i].prod_from = exons[
i-1].prod_from;
409exons[
i].genomic_from = exons[
i-1].genomic_from;
411prev_exon->SetPartial(
412(product_min_pos < exons[
i-1].prod_from && no_acceptor_before) ||
413(exons[
i].prod_to < product_max_pos && no_donor_after));
416prev_exon->SetExt().splice(prev_exon->SetExt().end(), exon.
SetExt());
419CSpliced_seg::TExons::iterator save_it = it;
450align.
SetSegs().SetSpliced().SetExons())
452(*exon_it)->ResetScores();
474align.
SetSegs().SetSpliced().SetExons())
476RecalculateExonIdty(**exon_it);
487score_builder.
AddScore(*m_scope, align, *score);
510 switch((*part_it)->Which()) {
512matches += (*part_it)->GetMatch();
513total += (*part_it)->GetMatch();
517total += (*part_it)->GetMismatch();
521total += (*part_it)->GetProduct_ins();
525total += (*part_it)->GetGenomic_ins();
544 if(idty >= 0 && (*score_it)->IsSetId() && (*score_it)->GetId().IsStr() &&
545(*score_it)->GetId().GetStr() ==
"idty") {
546(*score_it)->SetValue().SetReal(idty / 10000000000.);
548exon_scores.erase(score_it);
562pair <ENa_strand, ENa_strand> strands = GetSplicedStrands(spliced_seg);
577 "TrimHolesToCodons(): " 578 "Reversed mRNA with CDS");
583GetExonStructure(spliced_seg, exons, m_scope);
585 intframe_offset = (exons.back().prod_to/3+1)*3+cds.
GetFrom();
587vector<SExon>::iterator right_exon_it = exons.begin();
588CSpliced_seg::TExons::iterator right_spl_exon_it = spliced_seg.
SetExons().begin();
590 for(;;++right_exon_it, ++right_spl_exon_it) {
592vector<SExon>::reverse_iterator left_exon_it(right_exon_it);
593CSpliced_seg::TExons::reverse_iterator left_spl_exon_it(right_spl_exon_it);
595 if(right_exon_it != exons.begin() && right_exon_it != exons.end()) {
596 booldonor_set = left_spl_exon_it != spliced_seg.
SetExons().rend() && (*left_spl_exon_it)->IsSetDonor_after_exon();
597 boolacceptor_set = right_spl_exon_it != spliced_seg.
SetExons().end() && (*right_spl_exon_it)->IsSetAcceptor_before_exon();
599 if(((donor_set && acceptor_set) || left_exon_it->genomic_to + 1 == right_exon_it->genomic_from) && left_exon_it->prod_to + 1 == right_exon_it->prod_from) {
604 if(right_exon_it != exons.begin() && (right_exon_it != exons.end() || (m_flags &
fTrimEnds))) {
605 while(exons.rend() != left_exon_it &&
606cds.
GetFrom() < left_exon_it->prod_to && left_exon_it->prod_to < cds.
GetTo() &&
607(left_exon_it->prod_to - cds.
GetFrom() + 1) % 3 > 0
609TrimLeftExon(
min(left_exon_it->prod_to - left_exon_it->prod_from + 1,
610(left_exon_it->prod_to - cds.
GetFrom() + 1) % 3),
612exons.rend(), left_exon_it, left_spl_exon_it,
613product_strand, genomic_strand);
617 if(right_exon_it != exons.end() && (right_exon_it != exons.begin() || (m_flags &
fTrimEnds))) {
618 while(right_exon_it != exons.end() &&
619cds.
GetFrom() < right_exon_it->prod_from && right_exon_it->prod_from < cds.
GetTo() &&
620(frame_offset-right_exon_it->prod_from) % 3 > 0
622TrimRightExon(
min(right_exon_it->prod_to - right_exon_it->prod_from + 1,
623(frame_offset-right_exon_it->prod_from) % 3),
625right_exon_it, exons.end(), right_spl_exon_it,
626product_strand, genomic_strand);
630 if(left_exon_it.base() != right_exon_it) {
631right_exon_it = exons.erase(left_exon_it.base(), right_exon_it);
632right_spl_exon_it = spliced_seg.
SetExons().erase(left_spl_exon_it.base(), right_spl_exon_it);
635 if(right_exon_it == exons.end())
638 _ASSERT(right_exon_it == exons.end() && right_spl_exon_it == spliced_seg.
SetExons().end());
644 boolis_protein_align =
647CSpliced_seg::TExons::iterator prev_exon_it = spliced_seg.
SetExons().end();
649 boolhas_parts =
false;
669 switch(chunk.
Which()) {
675 if(part_index == 0 && prev_exon_it != spliced_seg.
SetExons().end() &&
676(*prev_exon_it)->IsSetParts()) {
682 if(prev_len +
len>= 3) {
684prev_chunk.
SetDiag(prev_len);
686 if(is_protein_align) {
687 TSeqPosproduct_end = (*prev_exon_it)->GetProduct_end().AsSeqPos();
688product_end += prev_len;
689(*prev_exon_it)->SetProduct_end().SetProtpos().SetAmin (product_end / 3);
690(*prev_exon_it)->SetProduct_end().SetProtpos().SetFrame((product_end % 3) + 1);
693product_start += prev_len;
697(*prev_exon_it)->SetProduct_end().SetNucpos() += prev_len;
701 if(
len> 3-prev_len) {
703new_chunk->SetDiag(3-prev_len);
704exon.
SetParts().insert(part_it, new_chunk);
716new_chunk->SetDiag(
len- (
len% 3));
717exon.
SetParts().insert(part_it, new_chunk);
739prev_exon_it = exon_it;
756 if(is_protein_align) {
759->GetProduct_start().GetProtpos().GetFrame() - 1;
764->GetProduct_start().GetNucpos() % 3;
766 for(
auto& exon_it : spliced_seg.
SetExons()) {
768 if(is_protein_align) {
769exon_it->SetProduct_start().SetProtpos().SetAmin(product_pos / 3);
770exon_it->SetProduct_start().SetProtpos().SetFrame(product_pos % 3 + 1);
773exon_it->SetProduct_start().SetNucpos(product_pos);
777 for(
const auto& part : exon_it->GetParts()) {
778 switch(part->Which()) {
780product_pos += part->GetMatch();
783product_pos += part->GetMismatch();
786product_pos += part->GetDiag();
791product_pos += part->GetProduct_ins();
796 "unhandled part type in exon length computation");
801 if(is_protein_align) {
802exon_it->SetProduct_end().SetProtpos().SetAmin ((product_pos - 1) / 3);
803exon_it->SetProduct_end().SetProtpos().SetFrame((product_pos - 1) % 3 + 1);
806exon_it->SetProduct_end().SetNucpos(product_pos - 1);
813? (*prev_exon_it)->GetProduct_end().GetProtpos().GetAmin()+1
814: (*prev_exon_it)->GetProduct_end().GetNucpos()+1;
819 return m_impl->AdjustAlignment(align_in, range,
mode);
828align->
Assign(align_in);
830vector<SExon> orig_exons = GetExons(*align);
834pair <ENa_strand, ENa_strand> strands = GetSplicedStrands(spliced_seg);
840 "AdjustAlignment(): " 841 "product minus strand not supported");
850spliced_seg.
GetExons().back()->GetGenomic_end());
853spliced_seg.
GetExons().front()->GetGenomic_end());
856 boolcross_the_origin = range.
GetFrom() > range.
GetTo();
857 if( !cross_the_origin ) {
858 autoit = spliced_seg.
GetExons().begin();
862 if((*it)->GetGenomic_end() > (*next)->GetGenomic_start()) {
863cross_the_origin =
true;
868 if((*it)->GetGenomic_start() < (*next)->GetGenomic_end()) {
869cross_the_origin =
true;
877 if(cross_the_origin) {
878genomic_size = m_scope->GetSequenceLength(spliced_seg.
GetGenomic_id());
882range.
SetTo(range.
GetTo() + genomic_size);
887range.
SetTo(range.
GetTo() + genomic_size);
893 if(spliced_seg.
GetExons().size() == 1) {
896 for(
auto& it : spliced_seg.
SetExons()) {
897it->SetGenomic_start(it->GetGenomic_start() + genomic_size);
898it->SetGenomic_end(it->GetGenomic_end() + genomic_size);
901align_range.
SetTo(align_range.
GetTo() + genomic_size);
920 autoit = spliced_seg.
SetExons().begin();
925 if(s_CrossesOrigin(**it, **
next, plus_strand)) {
926 autoadj_start =
next;
927 autoadj_end = spliced_seg.
SetExons().end();
929 if( !plus_strand ) {
930adj_start = spliced_seg.
SetExons().begin();
936 for( ; adj_start != adj_end; ++adj_start) {
937(*adj_start)->SetGenomic_start
938((*adj_start)->GetGenomic_start() + genomic_size);
939(*adj_start)->SetGenomic_end
940((*adj_start)->GetGenomic_end() + genomic_size);
944(*adj_start)->GetGenomic_end()));
958cerr <<
"range = "<< range << endl;
959cerr <<
"align_range = "<< align_range << endl;
961 NCBI_USER_THROW(
"alignmentrange and requested range don't overlap");
965GetExonStructure(spliced_seg, exons, m_scope);
967 boolis_protein_align =
970vector<SExon>::iterator right_exon_it = exons.begin();
971CSpliced_seg::TExons::iterator right_spl_exon_it = spliced_seg.
SetExons().begin();
976 for(;;++right_exon_it, ++right_spl_exon_it) {
978vector<SExon>::reverse_iterator left_exon_it(right_exon_it);
979CSpliced_seg::TExons::reverse_iterator left_spl_exon_it(right_spl_exon_it);
981 if(right_exon_it == exons.end() &&
982left_exon_it->genomic_to > range_right
985exons.rend(), left_exon_it, left_spl_exon_it,
986product_strand, genomic_strand);
988 if(right_exon_it == exons.begin() &&
989right_exon_it->genomic_from < range_left
992right_exon_it, exons.end(), right_spl_exon_it,
993product_strand, genomic_strand);
994 booldelete_me =
false;
995 if(left_exon_it.base() != right_exon_it) {
999right_exon_it = exons.erase(left_exon_it.base(), right_exon_it);
1000right_spl_exon_it = spliced_seg.
SetExons().erase(left_spl_exon_it.base(), right_spl_exon_it);
1003 if(right_exon_it == exons.end())
1010 intfirst_exon_extension = 0;
1011 intlast_exon_extension = 0;
1015first_exon_extension =
1021 if(first_exon_extension > 0) {
1025chunk->SetDiag(first_exon_extension);
1030last_exon_extension =
1036 if(last_exon_extension > 0) {
1040chunk->SetDiag(last_exon_extension);
1041last_exon.
SetParts().push_back(chunk);
1045last_exon_extension =
1051 if(last_exon_extension > 0) {
1055chunk->SetDiag(last_exon_extension);
1056last_exon.
SetParts().push_back(chunk);
1060first_exon_extension =
1065 if(first_exon_extension > 0) {
1069chunk->SetDiag(first_exon_extension);
1075exons.front().prod_from -= first_exon_extension;
1076exons.front().genomic_from -= first_exon_extension;
1077exons.back().prod_to += last_exon_extension;
1078exons.back().genomic_to += last_exon_extension;
1084 if(first_exon_extension > 0) {
1086exon->SetGenomic_start() = range.
GetFrom();
1087exon->SetGenomic_end() = genomic_size-1;
1088spliced_seg.
SetExons().push_front(exon);
1091exon_struct.
prod_from= exons.front().prod_from - first_exon_extension;
1092exon_struct.
prod_to= exons.front().prod_from - 1;
1093exon_struct.
genomic_from= exons.front().genomic_from - first_exon_extension;
1094exon_struct.
genomic_to= exons.front().genomic_from - 1;
1096exons.insert(exons.begin(), exon_struct);
1101 if(last_exon_extension > 0) {
1103exon->SetGenomic_start() = 0;
1104exon->SetGenomic_end() = last_exon_extension - 1;
1105spliced_seg.
SetExons().push_back(exon);
1108exon_struct.
prod_from= exons.back().prod_to + 1;
1109exon_struct.
prod_to= exons.back().prod_to + last_exon_extension;
1110exon_struct.
genomic_from= exons.back().genomic_to +1;
1111exon_struct.
genomic_to= exons.back().genomic_to + last_exon_extension;
1113exons.push_back(exon_struct);
1118 if(last_exon_extension > 0) {
1120exon->SetGenomic_start() = range.
GetFrom();
1121exon->SetGenomic_end() = genomic_size-1;
1122spliced_seg.
SetExons().push_back(exon);
1125exon_struct.
prod_from= exons.back().prod_to + 1;
1126exon_struct.
prod_to= exons.back().prod_to + last_exon_extension;
1127exon_struct.
genomic_from= exons.back().genomic_to +1;
1128exon_struct.
genomic_to= exons.back().genomic_to + last_exon_extension;
1130exons.push_back(exon_struct);
1135 if(first_exon_extension > 0) {
1137exon->SetGenomic_start() = 0;
1138exon->SetGenomic_end() = first_exon_extension - 1;
1139spliced_seg.
SetExons().push_front(exon);
1142exon_struct.
prod_from= exons.front().prod_from - first_exon_extension;
1143exon_struct.
prod_to= exons.front().prod_from - 1;
1144exon_struct.
genomic_from= exons.front().genomic_from - first_exon_extension;
1145exon_struct.
genomic_to= exons.front().genomic_from - 1;
1147exons.insert(exons.begin(), exon_struct);
1151 if(range_left != exons.front().genomic_from || range_right != exons.back().genomic_to) {
1153 "AdjustAlignment(): " 1154 "result's ends do not match the range. This is a bug in AdjustAlignment implementation");
1157 int offset= is_protein_align ?
int(exons.front().prod_from/3)*3 : exons.front().prod_from;
1158 if(
offset> exons.front().prod_from)
1165vector<SExon>::iterator exon_struct_it = exons.begin();
1167 intputative_prod_length = 0;
1168 if(is_protein_align) {
1175putative_prod_length = (exons.back().prod_to -
offset+ 3)/3;
1183putative_prod_length = exons.back().prod_to -
offset+ 1;
1189 if(cross_the_origin) {
1199 auto& spliced_exons = spliced_seg.
SetExons();
1200 for(
autoexon_it = spliced_exons.begin(); exon_it != spliced_exons.end();) {
1201 booldelete_me =
false;
1202 if( (*exon_it)->IsSetParts() ) {
1207 for(
autopart_it: (*exon_it)->GetParts()) {
1208 switch( part_it->Which()) {
1219exon_it = spliced_exons.erase(exon_it);
1226 if(GetExons(*align) != orig_exons) {
1227ClearScores(*align);
1239feat_iter; ++feat_iter)
1241 if(!feat_iter.GetSize() ||
1242(feat_iter->IsSetPseudo() && feat_iter->GetPseudo()))
1246cdregion_feat = *feat_iter;
1254 returncdregion_feat;
1270vector<SExon>::reverse_iterator left_edge,
1271vector<SExon>::reverse_iterator& exon_it,
1272CSpliced_seg::TExons::reverse_iterator& spl_exon_it,
1276 _ASSERT( trim_amount < 3 || side!=eTrimProduct );
1277 boolis_protein = (*spl_exon_it)->GetProduct_start().IsProtpos();
1279 while(trim_amount > 0) {
1280 intexon_len = side==eTrimProduct
1281? (exon_it->prod_to - exon_it->prod_from + 1)
1282: (exon_it->genomic_to - exon_it->genomic_from + 1);
1283 if(exon_len <= trim_amount) {
1284 intnext_from = exon_it->genomic_from;
1287trim_amount -= exon_len;
1288 _ASSERT( trim_amount==0 || side!=eTrimProduct );
1289 if(exon_it == left_edge)
1291 if(trim_amount > 0) {
1292trim_amount -= next_from - exon_it->genomic_to -1;
1295(*spl_exon_it)->SetPartial(
true);
1296(*spl_exon_it)->ResetDonor_after_exon();
1298 intgenomic_trim_amount = 0;
1299 intproduct_trim_amount = 0;
1301 if((*spl_exon_it)->CanGetParts() && !(*spl_exon_it)->GetParts().empty()) {
1303CSpliced_exon_Base::TParts::iterator chunk = parts.end();
1304 while(--chunk, (trim_amount>0 ||
1306? (*chunk)->IsGenomic_ins()
1307: (*chunk)->IsProduct_ins()))) {
1308 intproduct_chunk_len = 0;
1309 intgenomic_chunk_len = 0;
1310 switch((*chunk)->Which()) {
1312product_chunk_len = (*chunk)->GetMatch();
1313genomic_chunk_len = product_chunk_len;
1314 if(product_chunk_len > trim_amount) {
1315(*chunk)->SetMatch(product_chunk_len - trim_amount);
1319product_chunk_len = (*chunk)->GetMismatch();
1320genomic_chunk_len = product_chunk_len;
1321 if(product_chunk_len > trim_amount) {
1322(*chunk)->SetMismatch(product_chunk_len - trim_amount);
1326product_chunk_len = (*chunk)->GetDiag();
1327genomic_chunk_len = product_chunk_len;
1328 if(product_chunk_len > trim_amount) {
1329(*chunk)->SetDiag(product_chunk_len - trim_amount);
1334product_chunk_len = (*chunk)->GetProduct_ins();
1335 if(side==eTrimProduct && product_chunk_len > trim_amount) {
1336(*chunk)->SetProduct_ins(product_chunk_len - trim_amount);
1340genomic_chunk_len = (*chunk)->GetGenomic_ins();
1341 if(side==eTrimGenomic && genomic_chunk_len > trim_amount) {
1342(*chunk)->SetGenomic_ins(genomic_chunk_len - trim_amount);
1350 if(side==eTrimProduct && product_chunk_len <= trim_amount) {
1351genomic_trim_amount += genomic_chunk_len;
1352product_trim_amount += product_chunk_len;
1353trim_amount -= product_chunk_len;
1354}
else if(side==eTrimGenomic && genomic_chunk_len <= trim_amount) {
1355genomic_trim_amount += genomic_chunk_len;
1356product_trim_amount += product_chunk_len;
1357trim_amount -= genomic_chunk_len;
1359genomic_trim_amount +=
min(trim_amount, genomic_chunk_len);
1360product_trim_amount +=
min(trim_amount, product_chunk_len);
1364chunk = parts.erase(chunk);
1368genomic_trim_amount += trim_amount;
1369product_trim_amount += trim_amount;
1373exon_it->prod_to -= product_trim_amount;
1374exon_it->genomic_to -= genomic_trim_amount;
1377 CProduct_pos& prot_pos = (*spl_exon_it)->SetProduct_end();
1378SetProtpos(prot_pos, exon_it->prod_to);
1381(*spl_exon_it)->SetProduct_end().SetNucpos() -= product_trim_amount;
1383(*spl_exon_it)->SetProduct_start().SetNucpos() += product_trim_amount;
1388(*spl_exon_it)->SetGenomic_end() -= genomic_trim_amount;
1390(*spl_exon_it)->SetGenomic_start() += genomic_trim_amount;
1396vector<SExon>::iterator& exon_it,
1397vector<SExon>::iterator right_edge,
1398CSpliced_seg::TExons::iterator& spl_exon_it,
1402 _ASSERT( trim_amount < 3 || side!=eTrimProduct );
1403 boolis_protein = (*spl_exon_it)->GetProduct_start().IsProtpos();
1405 while(trim_amount > 0) {
1406 intexon_len = side==eTrimProduct
1407? (exon_it->prod_to - exon_it->prod_from + 1)
1408: (exon_it->genomic_to - exon_it->genomic_from + 1);
1409 if(exon_len <= trim_amount) {
1410 intprev_to = exon_it->genomic_to;
1413trim_amount -= exon_len;
1414 _ASSERT( trim_amount==0 || side!=eTrimProduct );
1415 if(exon_it == right_edge)
1417 if(trim_amount > 0) {
1418trim_amount -= exon_it->genomic_from - prev_to -1;
1421(*spl_exon_it)->SetPartial(
true);
1422(*spl_exon_it)->ResetAcceptor_before_exon();
1424 intgenomic_trim_amount = 0;
1425 intproduct_trim_amount = 0;
1427 if((*spl_exon_it)->CanGetParts() && !(*spl_exon_it)->GetParts().empty()) {
1429CSpliced_exon_Base::TParts::iterator chunk = parts.begin();
1430 for(; trim_amount>0 ||
1432? (*chunk)->IsGenomic_ins()
1433: (*chunk)->IsProduct_ins());
1435 intproduct_chunk_len = 0;
1436 intgenomic_chunk_len = 0;
1437 switch((*chunk)->Which()) {
1439product_chunk_len = (*chunk)->GetMatch();
1440genomic_chunk_len = product_chunk_len;
1441 if(product_chunk_len > trim_amount) {
1442(*chunk)->SetMatch(product_chunk_len - trim_amount);
1446product_chunk_len = (*chunk)->GetMismatch();
1447genomic_chunk_len = product_chunk_len;
1448 if(product_chunk_len > trim_amount) {
1449(*chunk)->SetMismatch(product_chunk_len - trim_amount);
1453product_chunk_len = (*chunk)->GetDiag();
1454genomic_chunk_len = product_chunk_len;
1455 if(product_chunk_len > trim_amount) {
1456(*chunk)->SetDiag(product_chunk_len - trim_amount);
1461product_chunk_len = (*chunk)->GetProduct_ins();
1462 if(side==eTrimProduct && product_chunk_len > trim_amount) {
1463(*chunk)->SetProduct_ins(product_chunk_len - trim_amount);
1467genomic_chunk_len = (*chunk)->GetGenomic_ins();
1468 if(side==eTrimGenomic && genomic_chunk_len > trim_amount) {
1469(*chunk)->SetGenomic_ins(genomic_chunk_len - trim_amount);
1477 if(side==eTrimProduct && product_chunk_len <= trim_amount) {
1478genomic_trim_amount += genomic_chunk_len;
1479product_trim_amount += product_chunk_len;
1480trim_amount -= product_chunk_len;
1481}
else if(side==eTrimGenomic && genomic_chunk_len <= trim_amount) {
1482genomic_trim_amount += genomic_chunk_len;
1483product_trim_amount += product_chunk_len;
1484trim_amount -= genomic_chunk_len;
1486genomic_trim_amount +=
min(trim_amount, genomic_chunk_len);
1487product_trim_amount +=
min(trim_amount, product_chunk_len);
1491chunk = parts.erase(chunk);
1495genomic_trim_amount += trim_amount;
1496product_trim_amount += trim_amount;
1500exon_it->prod_from += product_trim_amount;
1501exon_it->genomic_from += genomic_trim_amount;
1504 CProduct_pos& prot_pos = (*spl_exon_it)->SetProduct_start();
1505SetProtpos(prot_pos, exon_it->prod_from);
1508(*spl_exon_it)->SetProduct_start().SetNucpos() += product_trim_amount;
1510(*spl_exon_it)->SetProduct_end().SetNucpos() -= product_trim_amount;
1515(*spl_exon_it)->SetGenomic_start() += genomic_trim_amount;
1517(*spl_exon_it)->SetGenomic_end() -= genomic_trim_amount;
1529 if(
source!=
nullptr) {
1530gcode =
source->GetGenCode(gcode);
@ eExtreme_Positional
numerical value
unique_ptr< SImplementation > m_impl
CConstRef< objects::CSeq_align > AdjustAlignment(const objects::CSeq_align &align, TSeqRange range, EProductPositionsMode mode=eForceProductFrom0)
EProductPositionsMode
Adjust alignment to the specified range (cross-the-origin range on circular chromosome is indicated b...
@ eTryToPreserveProductPositions
void AddSplignScores(const CSeq_align &align, CSeq_align::TScore &scores)
Compute the six splign scores.
void AddScore(CScope &scope, CSeq_align &align, CSeq_align::EScoreType score)
EScoreType
enum controlling known named scores
@ eScore_PercentIdentity_Gapped
@ eScore_PercentIdentity_Ungapped
@ eScore_HighQualityPercentCoverage
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
void ResetNamedScore(const string &name)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_USER_THROW(message)
Throw a quick-and-dirty runtime exception of type 'CException' with the given error message and error...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators â.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
const CBioSource * GetBioSource(const CBioseq &bioseq)
Retrieve the BioSource object for a given bioseq handle.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
TSeqPos GetSequenceLength(const CSeq_id &id, TGetFlags flags=0)
Get sequence length Return kInvalidSeqPos if sequence is not found.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
@ eLocationToProduct
Map from the feature's location to product.
bool IsSetProduct(void) const
const CSeq_loc & GetLocation(void) const
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
void Reset(void)
Reset reference object.
int64_t Int8
8-byte (64-bit) signed integer
bool IntersectingWith(const TThisType &r) const
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
CRange< TSignedSeqPos > TSignedSeqRange
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
const TDonor_after_exon & GetDonor_after_exon(void) const
Get the Donor_after_exon member data.
void SetScores(TScores &value)
Assign a value to Scores data member.
TScore & SetScore(void)
Assign a value to Score data member.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
void SetProduct_start(TProduct_start &value)
Assign a value to Product_start data member.
void SetAmin(TAmin value)
Assign a value to Amin data member.
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
list< CRef< CScore > > Tdata
TProduct_ins & SetProduct_ins(void)
Select the variant.
void SetProduct_end(TProduct_end &value)
Assign a value to Product_end data member.
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool CanGetExons(void) const
Check if it is safe to call GetExons method.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
TDiag & SetDiag(void)
Select the variant.
bool IsSetAcceptor_before_exon(void) const
splice sites Check if a value has been assigned to Acceptor_before_exon data member.
TExons & SetExons(void)
Assign a value to Exons data member.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
void ResetScore(void)
Reset Score data member.
TExt & SetExt(void)
Assign a value to Ext data member.
void SetProduct_length(TProduct_length value)
Assign a value to Product_length data member.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
void SetGenomic_start(TGenomic_start value)
Assign a value to Genomic_start data member.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
bool CanGetSegs(void) const
Check if it is safe to call GetSegs method.
bool IsSetExt(void) const
extra info Check if a value has been assigned to Ext data member.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
void SetPartial(TPartial value)
Assign a value to Partial data member.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
TProtpos & SetProtpos(void)
Select the variant.
TGenomic_ins & SetGenomic_ins(void)
Select the variant.
bool CanGetProduct_id(void) const
Check if it is safe to call GetProduct_id method.
bool IsSetExons(void) const
set of segments involved each segment corresponds to one exon exons are always in biological order Ch...
TParts & SetParts(void)
Assign a value to Parts data member.
void SetGenomic_end(TGenomic_end value)
Assign a value to Genomic_end data member.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
bool IsSetScore(void) const
for whole alignment Check if a value has been assigned to Score data member.
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
void SetFrame(TFrame value)
Assign a value to Frame data member.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsSetDonor_after_exon(void) const
Check if a value has been assigned to Donor_after_exon data member.
bool IsSetScores(void) const
scores for this exon Check if a value has been assigned to Scores data member.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eProduct_type_transcript
ENa_strand
strand of nucleic acid
unsigned int
A callback function used to compare two keys in a database.
int GetGeneticCode(const CBioseq_Handle &bsh)
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
#define NCBI_CONST_INT8(v)
64-bit integers
static const GLdouble origin[]
TSignedSeqPos genomic_from
static void TrimLeftExon(int trim_amount, ETrimSide side, vector< SExon >::reverse_iterator left_edge, vector< SExon >::reverse_iterator &exon_it, objects::CSpliced_seg::TExons::reverse_iterator &spl_exon_it, objects::ENa_strand product_strand, objects::ENa_strand genomic_strand)
void MaximizeTranslation(objects::CSeq_align &align)
vector< SExon > GetExons(const CSeq_align &align)
void RecalculateScores(CSeq_align &align)
CConstRef< objects::CSeq_align > AdjustAlignment(const objects::CSeq_align &align, TSeqRange range, EProductPositionsMode mode)
void RecalculateExonIdty(CSpliced_exon &exon)
TSignedSeqRange GetCds(const objects::CSeq_id &seqid)
void GetExonStructure(const CSpliced_seg &spliced_seg, vector< SExon > &exons, CScope *scope)
void ClearScores(CSeq_align &align)
static void TrimRightExon(int trim_amount, ETrimSide side, vector< SExon >::iterator &exon_it, vector< SExon >::iterator right_edge, objects::CSpliced_seg::TExons::iterator &spl_exon_it, objects::ENa_strand product_strand, objects::ENa_strand genomic_strand)
void TrimHolesToCodons(objects::CSeq_align &align)
void StitchSmallHoles(objects::CSeq_align &align)
CSeq_align::EScoreType s_ScoresToRecalculate[]
CMappedFeat GetCdsOnMrna(const objects::CSeq_id &rna_id, CScope &scope)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4