RestoreFivePrime(
size_tbeg)
const;
78 size_tRestoreThreePrime(
size_tend)
const;
110 const string&
nuc= alignment_text.
GetDNA();
111 const string& outp = alignment_text.
GetProtein();
112 const string& orig_match = alignment_text.
GetMatch();
113list<CNPiece> m_AliPiece;
116string::size_type n1 = outp.find_first_not_of(
GAP_CHAR);
117string::size_type n2 = outp.find_last_not_of(
GAP_CHAR);
119m_AliPiece.push_back(
CNPiece(n1, n2+1, 0, 0));
123 string match= orig_match;
124 for(
size_t i= 1;
i<
match.size()-1; ++
i) {
135 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
137m_AliPiece.splice(it,
tmp);
138it = m_AliPiece.erase(it);
140 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
142m_AliPiece.splice(it,
tmp);
143it = m_AliPiece.erase(it);
148 if( !m_AliPiece.empty() ) {
149m_AliPiece.front().beg = trim.
CutFromLeft(m_AliPiece.front(), m_options);
150m_AliPiece.back().end = trim.
CutFromRight(m_AliPiece.back(), m_options);
154 if( !m_AliPiece.empty() && m_options.
GetFillHoles() ) {
155string::size_type beg = m_AliPiece.front().beg;
156string::size_type end = m_AliPiece.back().end;
158m_AliPiece.push_back(
CNPiece(beg, end, 0, 0));
162 if( !m_AliPiece.empty() && m_options.
GetMinHoleLen() > 0 ) {
163 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
164list<CNPiece>::iterator sit = it;
166 if(sit == m_AliPiece.end())
break;
170 intnuc_cnt = 0, prot_cnt = 0;
171 for(
intpos = hbeg; pos < hend; ++pos) {
177it = m_AliPiece.erase(it);
185 if( !m_AliPiece.empty() ) {
186 boolkeep_trimming =
true;
187 while( keep_trimming ) {
188 CNPiece& pc = *m_AliPiece.rbegin();
192 for(;
n>= pc.
beg; --
n) {
197 if(pc.
beg>= pc.
end) {
198m_AliPiece.pop_back();
204 if( !m_AliPiece.empty() && m_options.
GetCutNs() ) {
205 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
206 intpos = it->end - 1;
207 for(; pos >= it->beg &&
nuc[pos] ==
'N'; --pos);
209it = m_AliPiece.erase(it);
219 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
220 intpos = it->end - 1;
228it = m_AliPiece.erase(it);
237 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
239 intpos = it->end - 1;
240 for(; pos >= it->beg && (
islower(outp[pos]) || outp[pos] ==
INTRON_CHAR) ; --pos);
242it = m_AliPiece.erase(it);
248 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
252 if(pos == it->end) {
253it = m_AliPiece.erase(it);
264 if( !m_AliPiece.empty() ) {
275list<CNPiece> m_AliPiece;
276 const string&
match= match_all_pos;
279 if(
n== string::npos ||
n>= (
unsigned)pc.
end)
returnm_AliPiece;
281 boolisintr =
false;
282string::size_type beg =
n;
286 for(;
n<(unsigned)pc.
end; ++
n) {
290m_AliPiece.push_back(
CNPiece(beg,
n, 0, efflen));
297m_AliPiece.push_back(
CNPiece(beg,
n, efflen, efflen));
314m_AliPiece.push_back(
CNPiece(beg,
n, efflen, efflen));
317list<CNPiece>::iterator itb, ite, itc;
318list<CNPiece>::size_type pnum = m_AliPiece.size() + 1;
319 while(pnum > m_AliPiece.size()) {
320pnum = m_AliPiece.size();
322 for(itb = m_AliPiece.begin(); ; ) {
324 intslen = 0, spos = 0;
327 while(itc != m_AliPiece.end()) {
328 if(m_options.
Bad(itc))
break;
331 if(m_options.
Dropof(slen, spos, itb))
break;
333 if(m_options.
Perc(itc, slen, spos, itb)) {
334 if(m_options.
BackCheck(itb, itc)) ite = itc;
341m_options.
Join(itb, ite);
342m_AliPiece.erase(itb, ite);
346 if(itb == m_AliPiece.end())
break;
350itb = m_AliPiece.end();
352 while(itb != m_AliPiece.begin()) {
354 intslen = 0, spos = 0;
356 while(itc != m_AliPiece.begin()) {
358 if(m_options.
Bad(itc))
break;
361 if(m_options.
Dropof(slen, spos, itb))
break;
363 if(m_options.
Perc(itc, slen, spos, itb)) {
364 if(m_options.
ForwCheck(itc, itb)) ite = itc;
370m_options.
Join(ite, itb);
371m_AliPiece.erase(ite, itb);
373 if(itb == m_AliPiece.begin())
break;
379 for(list<CNPiece>::iterator it = m_AliPiece.begin(); it != m_AliPiece.end(); ) {
380 if(it->posit == 0) it = m_AliPiece.erase(it);
381 else if(it->efflen < m_options.
GetMinGoodLen()) it = m_AliPiece.erase(it);
390 const string&
match= match_all_pos;
392vector<pair<int, int> > exons;
394 boolin_exon =
false;
395 for(
int n=pc.
beg;
n<pc.
end; ) {
398exons.push_back(make_pair(
n, 0));
403exons.back().second =
n;
407 intcur_beg = pc.
beg;
408 for(vector<pair<int, int> >::iterator eit = exons.begin(); eit != exons.end(); ++eit) {
411 int len= eit->second - eit->first;
412 for(
int i= eit->first; i < eit->second; ++
i) {
422 for(
n= eit->first - 1;
n> cur_beg; --
n) {
426 if(
n> cur_beg) alip.push_back(
CNPiece(cur_beg,
n, 0, 0));
428 for(
n= eit->second;
n< pc.
end; ++
n) {
434 if(cur_beg < pc.
end) alip.push_back(
CNPiece(cur_beg, pc.
end, 0, 0));
442 const string&
nuc= alignment_text.
GetDNA();
445 intcnuc = 0, cprot = 0, cmax = 18;
447 for(
n= pc.
end- 1;
n>= pc.
beg; --
n) {
451score -= scoring.
sm_Ig;
463score -= scoring.
sm_Ine;
467score -= scoring.
sm_Ine;
487 if(cnuc >= cmax && cprot >= cmax)
break;
490score -= scoring.
sm_Ig;
525 if(
Dropof(efflen, posit, add))
return false;
526 if(
GetTotalPositives()*(efflen+cur->efflen+add->efflen) > 100*(posit+cur->posit+add->posit))
return false;
532 intposit =
last->posit;
533 intefflen =
last->efflen;
534 for(list<prosplign::CNPiece>::iterator it1 = it; it1 !=
last; ++it1) {
536efflen += it1->efflen;
538 last->posit = posit;
539 last->efflen = efflen;
540 last->beg = it->beg;
545 intefflen = it1->efflen;
546 intpos = it1->posit;
549 if(
Dropof(efflen, pos, it1))
return false;
550efflen += it1->efflen;
553efflen += it1->efflen;
561 intefflen = it2->efflen;
562 intpos = it2->posit;
565 if(
Dropof(efflen, pos, it2))
return false;
566efflen += it2->efflen;
569efflen += it2->efflen;
585 if((*field)->CanGetLabel() && (*field)->GetLabel().IsStr() && (*field)->GetLabel().GetStr()==
"CompartmentId") {
586 return(*field)->GetData().GetInt();
598 intcompartment_id = GetCompNum(seqalign);
602 intnuc_from =
bounds.GetFrom()+1;
603 intnuc_to =
bounds.GetTo()+1;
606 out<<endl<<
"************************************************************************"<<endl;
607 out<<
"************************************************************************"<<endl;
608 out<<
"************************************************************************"<<endl;
609 out<<compartment_id<<
"\t"<<contig_name<<
"\t"<<prot_id<<
"\t"<<nuc_from<<
"\t"<<nuc_to<<
"\t";
610 out<<(is_plus_strand?
'+':
'-')<<endl;
613 const string& dna = align_text.GetDNA();
614 const string& translation = align_text.GetTranslation();
615 string match= align_text.GetMatch();
616 const string& protein = align_text.GetProtein();
618good_parts.append(
match.size(),
'*');
619 for(
size_t i= 0;
i<
match.size(); ++
i) {
621 match[
i] = good_parts[
i] =
' ';
624 intnpos1 = is_plus_strand?nuc_from:nuc_to;
626 intprot_beg_pos =
static_cast<int>(protein.find_first_not_of(
GAP_CHAR));
627 intprot_end_pos =
static_cast<int>(protein.find_last_not_of(
GAP_CHAR));
629 for(
int i=0;
i<prot_end_pos;
i+=width) {
630 intapos =
i+width-1;
631 if(apos >= (
int)dna.length()) {
632apos = (
int)dna.length() - 1;
636 #ifdef NCBI_COMPILER_WORKSHOP 639 intreal_bases = width-gaps;
641 intreal_bases =
static_cast<int>(width-
count(dna.begin()+
i, dna.begin()+(
i+width),
GAP_CHAR));
644 intnpos2 = is_plus_strand?npos1+real_bases-1:npos1-(real_bases-1);
647 if(apos > prot_beg_pos) {
648 out.setf(IOS_BASE::left, IOS_BASE::adjustfield);
650 out<<setw(12)<<npos1<<dna.substr(
i, width)<<
" "<<npos2<<endl;
652 out<<setw(12)<<
"-"<<dna.substr(
i, width)<<
" "<<
"-"<<endl;
654 out<<setw(12)<<
" "<<translation.substr(
i, width)<<endl;
655 out<<setw(12)<<
" "<<
match.substr(
i, width)<<endl;
656 out<<setw(12)<<
" "<<protein.substr(
i, width)<<endl;
657 out<<setw(12)<<
" "<<good_parts.substr(
i, width)<<endl;
660npos1 = is_plus_strand?npos2+1:npos2-1;
669CAliChunk(
TSeqPosali_pos,
TSeqPosnuc_pos,
TSeqPosprot_pos, CSpliced_seg::TExons::iterator exon_iter, CSpliced_exon::TParts::iterator chunk_iter) :
670m_nuc_pos(nuc_pos), m_prot_pos(prot_pos), m_exon_iter(exon_iter), m_chunk_iter(chunk_iter), m_bad(
false)
680}
else if(chunk.
IsMatch()) {
694m_ali_range =
TSeqRange(ali_pos, ali_pos +
max(m_nuc_len,m_prot_len)-1);
702CSpliced_seg::TExons::iterator m_exon_iter;
703CSpliced_exon::TParts::iterator m_chunk_iter;
707 typedeflist<CAliChunk> TAliChunkCollection;
708 typedefTAliChunkCollection::iterator TAliChunkIterator;
716 intnuc_from =
bounds.GetFrom();
717 intnuc_to =
bounds.GetTo();
720 intalignment_pos = 0;
722TAliChunkCollection chunks;
731alignment_pos +=
max(prot_cur_start-prot_from, nuc_cur_start-nuc_from);
732nuc_from = nuc_cur_start;
734alignment_pos +=
max(prot_cur_start-prot_from, nuc_to-nuc_cur_end);
735nuc_to = nuc_cur_end;
737prot_from = prot_cur_start;
740CAliChunk chunk(alignment_pos, strand ==
eNa_strand_plus?nuc_from:nuc_to, prot_from, e_it, p_it);
741alignment_pos = chunk.m_ali_range.GetTo()+1;
742prot_from += chunk.m_prot_len;
744nuc_from += chunk.m_nuc_len;
746nuc_to -= chunk.m_nuc_len;
749chunks.push_back(chunk);
758list<TSeqRange> InvertPartList(
constlist<CNPiece>& good_parts,
TSeqRangetotal_range)
760list<TSeqRange> bad_parts;
762 inttail_beg = total_range.
GetFrom();
763 inttail_end = total_range.
GetTo();
764 ITERATE(list<CNPiece>,
i, good_parts) {
765 if(tail_beg < i->beg)
766bad_parts.push_back(
TSeqRange(tail_beg,
i->beg-1));
769 if(tail_beg <= tail_end)
770bad_parts.push_back(
TSeqRange(tail_beg,tail_end));
787}
else if(chunk.
IsMatch()) {
806 _ASSERT( nuc_cur_end-nuc_cur_start+1 == nuc_len );
807 _ASSERT( prot_cur_end-prot_cur_start+1 == prot_len );
812 voidSplitChunk(TAliChunkCollection& chunks, TAliChunkIterator iter,
TSeqPosstart_of_second_chunk,
boolgenomic_plus)
814 _DEBUG_CODE( TestExonLength(**iter->m_exon_iter); );
815 _ASSERT( iter->m_ali_range.GetFrom() < start_of_second_chunk );
816 _ASSERT( start_of_second_chunk <= iter->m_ali_range.GetTo());
817 _ASSERT( iter->m_nuc_len == iter->m_prot_len );
820new_chunk->Assign(**iter->m_chunk_iter);
821 intfirst_len = start_of_second_chunk - iter->m_ali_range.GetFrom();
822 intsecond_len = iter->m_ali_range.GetTo() - start_of_second_chunk+1;
824TAliChunkIterator first_iter = chunks.insert(iter, *iter);
827iter->m_nuc_pos += first_len;
829iter->m_nuc_pos -= first_len;
831iter->m_prot_pos += first_len;
833 if(new_chunk->IsDiag()) {
834new_chunk->SetDiag(first_len);
835(*iter->m_chunk_iter)->SetDiag(second_len);
836}
else if(new_chunk->IsMatch()) {
837new_chunk->SetMatch(first_len);
838(*iter->m_chunk_iter)->SetMatch(second_len);
839}
else if(new_chunk->IsMismatch()) {
840new_chunk->SetMismatch(first_len);
841(*iter->m_chunk_iter)->SetMismatch(second_len);
844first_iter->m_ali_range.SetTo(start_of_second_chunk-1);
845iter->m_ali_range.SetFrom(start_of_second_chunk);
847first_iter->m_nuc_len = first_iter->m_prot_len = first_len;
848iter->m_nuc_len = iter->m_prot_len = second_len;
850first_iter->m_chunk_iter = (*iter->m_exon_iter)->SetParts().insert(iter->m_chunk_iter, new_chunk);
852 _DEBUG_CODE( TestExonLength(**iter->m_exon_iter); );
857exons.erase(exon_iter);
858exon_iter = exons.end();
861 voidDropExonHead(TAliChunkIterator chunk_iter,
boolgenomic_plus)
867 size_tchunks_count = cur_exon->
GetParts().size();
870cur_exon->
SetParts().erase(cur_exon->
SetParts().begin(), chunk_iter->m_chunk_iter);
894 size_tchunks_count = cur_exon->
GetParts().size();
898new_exon->Assign(*cur_exon);
900CSpliced_exon::TParts::iterator new_exon_chunk = new_exon->SetParts().begin();
902 if(old_exon_chunk==chunk_iter->m_chunk_iter)
907new_exon->SetParts().erase(new_exon_chunk, new_exon->SetParts().end());
910new_exon->SetGenomic_end(chunk_iter->m_nuc_pos-1);
912new_exon->SetGenomic_start(chunk_iter->m_nuc_pos+1);
915new_exon->SetPartial(
true);
916 if(new_exon->IsSetDonor_after_exon())
917new_exon->ResetDonor_after_exon();
919 _ASSERT( new_exon->GetGenomic_start() <= new_exon->GetGenomic_end() );
920 _ASSERT( new_exon->GetProduct_start().AsSeqPos() <= new_exon->GetProduct_end().AsSeqPos() );
922exons.insert(chunk_iter->m_exon_iter, new_exon);
924DropExonHead(chunk_iter, genomic_plus);
926 _ASSERT( 0 < new_exon->GetParts().size() && 0 < cur_exon->
GetParts().size() );
927 _ASSERT( new_exon->GetParts().size()+cur_exon->
GetParts().size() == chunks_count );
935 void prosplign::SetScores(objects::CSeq_align& seq_align, objects::CScope& scope,
const string& matrix_name) {
937 const string&
prot= pro_text.GetProtein();
938 const string& dna = pro_text.GetDNA();
939 const string&
match= pro_text.GetMatch();
941 intpos = 0, ident = 0,
len= 0, neg = 0, pgap = 0, ngap = 0;
942 for(string::size_type
i=0;
i<
match.size(); ++
i) {
943 if( (prot[
i] !=
'.') && (
match[
i] !=
'X') ) {
945 if(prot[
i] ==
'-') {
947}
else if(dna[
i] ==
'-') {
953 if(triple) ident +=3;
967seq_align.SetNamedScore(
"num_ident", ident);
968seq_align.SetNamedScore(
"num_positives", pos);
969seq_align.SetNamedScore(
"num_negatives", neg);
970seq_align.SetNamedScore(
"product_gap_length", pgap);
971seq_align.SetNamedScore(
"genomic_gap_length", ngap);
972seq_align.SetNamedScore(
"align_length",
len);
976 for(ibeg = 0; ibeg<(
int)(
prot.size()) && ( (
prot[ibeg] ==
'.') || (
match[ibeg] ==
'X') || (
prot[ibeg] ==
'-') ); ++ibeg) {}
977 for(iend =
prot.size() - 1; iend >=0 && ( (
prot[iend] ==
'.') || (
match[iend] ==
'X') || (
prot[iend] ==
'-') ); --iend) {}
978 for(
int i=ibeg;
i<=iend; ++
i) {
979 if( (prot[
i] !=
'.') && (
match[
i] !=
'X') ) {
980 if(prot[
i] ==
'-') {
985seq_align.SetNamedScore(
"product_internal_gap_length", ipgap);
991TAliChunkCollection chunks = ExtractChunks(scope, seq_align);
998list<TSeqRange> bad_parts = InvertPartList(good_parts,
TSeqRange(chunks.front().m_ali_range.GetFrom(),chunks.back().m_ali_range.GetTo()));
1000TAliChunkIterator chunk_iter = chunks.begin();
1002 ITERATE(list<TSeqRange>, bad_part, bad_parts) {
1003 while(chunk_iter != chunks.end() && chunk_iter->m_ali_range.GetTo() < bad_part->GetFrom()) {
1007 if(chunk_iter == chunks.end())
1009 if(bad_part->GetTo() < chunk_iter->m_ali_range.GetFrom())
1012 if(chunk_iter->m_ali_range.GetFrom() < bad_part->GetFrom())
1013SplitChunk(chunks, chunk_iter, bad_part->GetFrom(), genomic_plus);
1015 while(chunk_iter != chunks.end() && chunk_iter->m_ali_range.GetTo() <= bad_part->GetTo())
1016chunk_iter++->m_bad =
true;
1018 if(chunk_iter != chunks.end() && chunk_iter->m_ali_range.GetFrom() <= bad_part->GetTo()) {
1019chunk_iter->m_bad =
true;
1020SplitChunk(chunks, chunk_iter, bad_part->GetTo()+1, genomic_plus);
1021chunk_iter->m_bad =
false;
1025CSpliced_seg::TExons::iterator prev_exon_iter = sps.
SetExons().end();
1028 while(chunk_it != chunks.end() && !chunk_it->m_bad) {
1029prev_exon_iter = chunk_it->m_exon_iter;
1032 if(chunk_it == chunks.end())
1034 if(prev_exon_iter != chunk_it->m_exon_iter) {
1035 if((*chunk_it->m_exon_iter)->IsSetAcceptor_before_exon())
1036(*chunk_it->m_exon_iter)->ResetAcceptor_before_exon();
1038SplitExon(sps.
SetExons(),chunk_it, genomic_plus);
1041prev_exon_iter = chunk_it->m_exon_iter;
1042TAliChunkIterator next_chunk_iter = chunk_it;
1044 while(next_chunk_iter != chunks.end() && next_chunk_iter->m_bad && next_chunk_iter->m_exon_iter==prev_exon_iter) {
1045chunk_it = next_chunk_iter++;
1048 if(next_chunk_iter == chunks.end() || next_chunk_iter->m_exon_iter!=prev_exon_iter) {
1049DropExon(sps.
SetExons(), prev_exon_iter);
1051DropExonHead(next_chunk_iter, genomic_plus);
1057TestExonLength(**e_it);
1067: m_alignment_text(alignment_text) {
1068 const string& outp = alignment_text.
GetProtein();
1071 for(
size_t i= 1;
i<
match.size()-1; ++
i) {
1092 if(pbeg == string::npos)
returnbeg;
1093 if( pbeg >= beg )
returnbeg;
1094 intali_len = (
int)(beg - pbeg);
1096 if( ali_len > 36 )
returnbeg;
1099 intmismatch_cnt = 0;
1101 for(
size_t i= pbeg;
i< beg; ++
i) {
1105 if( in_gap != -1 ) {
1109}
else if( dna_row[
i] ==
GAP_CHAR) {
1110 if( in_gap != 1 ) {
1123 if( gap_cnt == 0 && mismatch_cnt < 10)
returnpbeg;
1124 if( gap_cnt < 3 && 100 * posit_cnt >= 60 * ali_len )
returnpbeg;
1125 if( gap_cnt < 2 && 100 * posit_cnt >= 50 * ali_len )
returnpbeg;
1137 if(pend == string::npos)
returnend;
1140 if( end >= pend )
returnend;
1141 intali_len = (
int)(pend-end);
1142 if( ali_len > 36 )
returnend;
1145 intmismatch_cnt = 0;
1147 for(
size_t i= end;
i<pend; ++
i) {
1150 if( tran_row[
i] ==
'*')
returnend;
1152 if( in_gap != -1 ) {
1156}
else if( dna_row[
i] ==
GAP_CHAR) {
1157 if( in_gap != 1 ) {
1170 if( gap_cnt == 0 && mismatch_cnt < 10)
returnpend;
1171 if( gap_cnt < 3 && 100 * posit_cnt >= 60 * ali_len )
returnpend;
1172 if( gap_cnt < 2 && 100 * posit_cnt >= 50 * ali_len )
returnpend;
1192 boolkeep_trimming =
true;
1196 while( keep_trimming ) {
1198 intbegpos = pc.
beg;
1199 intendpos = pc.
end;
1201 doublecur_max_drop = 0;
1202 intcur_cut = begpos;
1204 intcur_pos = begpos;
1215 intps_dna_gap_len = 0;
1216 intps_prot_gap_len = 0;
1222 if( cur_end >= endpos )
returnpc.
beg;
1224 for(
intpos = cur_pos; pos < cur_end; ++pos ) {
1240 if( max_cut_len < cur_pos - begpos + 1 ) {
1257ps_len += ps_len_increment;
1258ps_pos += ps_len_increment;
1259ps_prot_gap_len = 0;
1261}
else if( dna[cur_pos] ==
GAP_CHAR) {
1263 if( ps_dna_gap_len < 3 ) {
1264ps_len += ps_len_increment;
1269ps_prot_gap_len = 0;
1271 if( ps_prot_gap_len < 3 ) {
1272ps_len += ps_len_increment;
1279ps_len += ps_len_increment;
1280ps_prot_gap_len = 0;
1287 doubleposit_drop = rposit/(double)
window_size- ps_pos/(
double)ps_len;
1288 if( posit_drop >= dropoff && ( posit_drop > cur_max_drop || cur_cut == begpos ) ) {
1289cur_max_drop = posit_drop;
1292}
while( cur_end < endpos );
1294 if( cur_cut == begpos ) {
1295keep_trimming =
false;
1302 for( ; cur_cut < endpos; ++cur_cut ) {
1307 if( cur_cut >= endpos )
returnpc.
beg;
1314 for( ; cur_cut >= begpos; --cur_cut) {
1320 if( cur_cut <= begpos )
returnpc.
beg;
1344 boolkeep_trimming =
true;
1346 while( keep_trimming ) {
1348 intbegpos = pc.
beg;
1349 intendpos = pc.
end;
1351 doublecur_max_drop = 0;
1352 intcur_cut = endpos;
1354 intwin_end = endpos;
1366 intps_dna_gap_len = 0;
1367 intps_prot_gap_len = 0;
1373 for(
intpos = win_beg; pos < win_end; ++pos ) {
1383 while( win_beg > begpos ) {
1392 if( max_cut_len < endpos - win_end ) {
1409 intcur_pos = win_end;
1413ps_len += ps_len_increment;
1414ps_pos += ps_len_increment;
1415ps_prot_gap_len = 0;
1417}
else if( dna[cur_pos] ==
GAP_CHAR) {
1419 if( ps_dna_gap_len < 3 ) {
1420ps_len += ps_len_increment;
1425ps_prot_gap_len = 0;
1427 if( ps_prot_gap_len < 3 ) {
1428ps_len += ps_len_increment;
1435ps_len += ps_len_increment;
1436ps_prot_gap_len = 0;
1441 doubleposit_drop = wposit/(double)
window_size- ps_pos/(
double)ps_len;
1442 if( posit_drop >= dropoff && ( posit_drop > cur_max_drop || cur_cut == endpos ) ) {
1443cur_max_drop = posit_drop;
1448 if( cur_cut == endpos ) {
1449keep_trimming =
false;
1455 for( --cur_cut; cur_cut >= begpos; --cur_cut ) {
1461 if( cur_cut <= begpos )
returnpc.
end;
1468 for( ; cur_cut < endpos; ++cur_cut ) {
1473 if(cur_cut >= endpos)
returnpc.
end;
CRef< CProduct_pos > NultriposToProduct_pos(int nultripos)
Convert linear coordinate into (amin,frame)
list< CNPiece > ExcludeBadExons(const CNPiece pc, const string &match_all_pos, const string &protein, CProSplignOutputOptionsExt m_options)
const char BAD_OR_MISMATCH[]
list< CNPiece > FindGoodParts(const CProteinAlignText &alignment_text, CProSplignOutputOptionsExt m_options, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
const char BAD_PIECE_CHAR
USING_SCOPE(ncbi::objects)
bool TrimNegativeTail(CNPiece &pc, const CProteinAlignText &alignment_text, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
const char INTRON_OR_GAP[]
void RefineAlignment(objects::CScope &scope, objects::CSeq_align &seq_align, const list< CNPiece > &good_parts)
void SetScores(objects::CSeq_align &seq_align, objects::CScope &scope, const string &matrix_name="BLOSUM62")
CNPiece(string::size_type obeg, string::size_type oend, int oposit, int oefflen)
Extended output filtering parameters deprecated, used in older programs.
CProSplignOutputOptionsExt(const CProSplignOutputOptions &options)
bool Perc(list< prosplign::CNPiece >::iterator it, int efflen, int posit, list< prosplign::CNPiece >::iterator last)
bool ForwCheck(list< prosplign::CNPiece >::iterator it1, list< prosplign::CNPiece >::iterator it2)
void Join(list< prosplign::CNPiece >::iterator it, list< prosplign::CNPiece >::iterator last)
bool Bad(list< prosplign::CNPiece >::iterator it)
bool BackCheck(list< prosplign::CNPiece >::iterator it1, list< prosplign::CNPiece >::iterator it2)
bool Dropof(int efflen, int posit, list< prosplign::CNPiece >::iterator it)
Output filtering parameters.
bool GetCutFlankPartialCodons() const
int GetTotalPositives() const
int GetCutFlanksWithPositGapRatio() const
int GetCutFlanksWithPositWindow() const
int GetCutFlanksWithPositMaxLen() const
bool GetFillHoles() const
bool GetCutFlanksWithPositDrop() const
int GetMinHoleLen() const
int GetMinFlankingExonLen() const
int GetMinGoodLen() const
int GetFlankPositives() const
int GetCutFlanksWithPositDropoff() const
int GetMinExonPos() const
int GetStartBonus() const
bool IsPassThrough() const
static void Output(const objects::CSeq_align &seqalign, objects::CScope &scope, ostream &out, int width, const string &matrix_name="BLOSUM62")
Outputs formatted text.
size_t RestoreThreePrime(size_t end) const
CProSplignTrimmer(const CProteinAlignText &alignment_text)
CProSplignTrimmer implementation.
size_t RestoreFivePrime(size_t beg) const
checks if alignment ends should be restored beyond 'beg' or 'end' returns new flanking coord or 'beg'...
int CutFromRight(CNPiece pc, const CProSplignOutputOptionsExt &options) const
trim right flank with positives dropoff over a cutoff, iterative 'pc' should not be dropped completel...
int CutFromLeft(CNPiece pc, const CProSplignOutputOptionsExt &options) const
trim flanks with positives dropoff over a cutoff, iterative flank 'good pieces' should not be dropped...
const CProteinAlignText & m_alignment_text
Text representation of ProSplign alignment.
const string & GetDNA() const
const string & GetMatch() const
const string & GetProtein() const
static CRef< objects::CSeq_loc > GetGenomicBounds(objects::CScope &scope, const objects::CSeq_align &seqalign)
const string & GetTranslation() const
Substitution Matrix for Scoring Amino-Acid Alignments.
int ScaledScore(char amin1, char amin2) const
static const char * bounds[]
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define _DEBUG_CODE(code)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
bool CanGetData(void) const
Check if it is safe to call GetData method.
const TData & GetData(void) const
Get the Data member data.
vector< CRef< CUser_field > > TData
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
void SetProduct_start(TProduct_start &value)
Assign a value to Product_start data member.
TMatch GetMatch(void) const
Get the variant data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
bool IsMismatch(void) const
Check if variant Mismatch is selected.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
bool IsSetAcceptor_before_exon(void) const
splice sites Check if a value has been assigned to Acceptor_before_exon data member.
TExons & SetExons(void)
Assign a value to Exons data member.
TDiag GetDiag(void) const
Get the variant data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
list< CRef< CUser_object > > TExt
void SetGenomic_start(TGenomic_start value)
Assign a value to Genomic_start data member.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
bool IsGenomic_ins(void) const
Check if variant Genomic_ins is selected.
bool IsMatch(void) const
Check if variant Match is selected.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
void SetPartial(TPartial value)
Assign a value to Partial data member.
bool CanGetExt(void) const
Check if it is safe to call GetExt method.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
void ResetAcceptor_before_exon(void)
Reset Acceptor_before_exon data member.
TParts & SetParts(void)
Assign a value to Parts data member.
bool IsDiag(void) const
Check if variant Diag is selected.
void SetGenomic_end(TGenomic_end value)
Assign a value to Genomic_end data member.
const TExt & GetExt(void) const
Get the Ext member data.
list< CRef< CSpliced_exon_chunk > > TParts
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
bool IsProduct_ins(void) const
Check if variant Product_ins is selected.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
ENa_strand
strand of nucleic acid
unsigned int
A callback function used to compare two keys in a database.
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4