kNonCoveredEndThreshold (55);
88 const doublekPower (2.5);
93 const size_tkMinTermExonSize (28);
94 const doublekMinTermExonIdty (0.9);
97 const intkFlankExonProx (20);
100 const intkMaxCutToSplice (6);
108 const intkEstMatchScore (1000);
109 const intkEstMismatchScore (-1011);
110 const intkEstGapOpeningScore(-1460);
111 const intkEstGapExtensionScore(-464);
113 const intkEstGtAgSpliceScore(-4988);
114 const intkEstGcAgSpliceScore(-5999);
115 const intkEstAtAcSpliceScore(-7010);
116 const intkEstNonConsensusSpliceScore(-13060);
160m_CanResetHistory (
false),
162m_ScoringType(s_GetDefaultScoringType()),
163m_MatchScore(s_GetDefaultMatchScore()),
164m_MismatchScore(s_GetDefaultMismatchScore()),
165m_GapOpeningScore(s_GetDefaultGapOpeningScore()),
166m_GapExtensionScore(s_GetDefaultGapExtensionScore()),
167m_GtAgSpliceScore(s_GetDefaultGtAgSpliceScore()),
168m_GcAgSpliceScore(s_GetDefaultGcAgSpliceScore()),
169m_AtAcSpliceScore(s_GetDefaultAtAcSpliceScore()),
170m_NonConsensusSpliceScore(s_GetDefaultNonConsensusSpliceScore()),
172m_MinExonIdty(s_GetDefaultMinExonIdty()),
173m_MinPolyaExtIdty(s_GetDefaultPolyaExtIdty()),
174m_MinPolyaLen(s_GetDefaultMinPolyaLen()),
175m_MinHoleLen(s_GetDefaultMinHoleLen()),
176m_TrimToCodons(s_GetDefaultTrimToCodons()),
177m_CompartmentPenalty(s_GetDefaultCompartmentPenalty()),
178m_MinCompartmentIdty(s_GetDefaultMinCompartmentIdty()),
179m_MinSingletonIdty(s_GetDefaultMinCompartmentIdty()),
187m_max_genomic_ext (s_GetDefaultMaxGenomicExtent()),
189m_MaxPartExonIdentDrop (s_GetDefaultMaxPartExonIdentDrop()),
191m_MaxCompsPerQuery (0),
192m_MinPatternHitLength (13)
228aligner->SetScoreMatrix(
NULL);
246 if(low_query_quality) {
247aligner->
SetWm(kEstMatchScore);
248aligner->
SetWms(kEstMismatchScore);
249aligner->
SetWg(kEstGapOpeningScore);
250aligner->
SetWs(kEstGapExtensionScore);
252aligner->
SetWi(0, kEstGtAgSpliceScore);
253aligner->
SetWi(1, kEstGcAgSpliceScore);
254aligner->
SetWi(2, kEstAtAcSpliceScore);
255aligner->
SetWi(3, kEstNonConsensusSpliceScore);
392 if(!(0 <= idty && idty <= 1)) {
402 if(!(0 <= idty && idty <= 1)) {
424 if(!(0 <= idty && idty <= 1)) {
434 if(!(0 <= idty && idty <= 1)) {
590 if(penalty < 0 || penalty > 1) {
608 if( pos+1 == 0 || pos >=
m_genomic.size() )
return true;
622 boolretain,
boolis_genomic,
boolgenomic_strand)
647 string(
"Sequence is empty: ")
654 if(start > finish) {
656ostr <<
"Invalid sequence interval requested for " 658<< start <<
'\t'<< finish;
670 CSeq_loctmp_loc(*tmp_id, start, finish, strand);
673seq->resize(1 + finish - start);
674 copy(s.begin(), s.end(), seq->begin());
682 m_Scope->RemoveFromHistory(bh);
712 for(
TSeqPosloop = start; loop <= finish; loop++) {
743hr->SetQueryStart(q0);
744hr->SetSubjStart(s0);
745hr->SetQueryStop(q - 1);
746hr->SetSubjStop(s - 1);
747hr->SetLength(q - q0);
748hr->SetMismatches(0);
751hr->SetScore(2*(q - q0));
759 const char* Seq1 (&
m_mrna.front());
760 const char* Seq2 (&
m_genomic.front());
765 const doubleidty (h->GetIdentity());
766 const booldiag (h->GetGaps() == 0 && h->GetQuerySpan() == h->GetSubjSpan());
767 if(idty == 1 || idty < .95 || h->
GetLength() < 100 || !diag) {
772 intq0 (-1), s0 (-1), q1 (h->GetQueryMax());
773 intq (h->GetQueryMin()), s (h->GetSubjMin());
776 if(Seq1[q++] != Seq2[s++]) {
779hr->SetQueryId(h->GetQueryId());
780hr->SetSubjId(h->GetSubjId());
796hr->SetQueryId(h->GetQueryId());
797hr->SetSubjId(h->GetSubjId());
819THitComparator sorter (THitComparator::eQueryMin);
820stable_sort(phitrefs->begin(), phitrefs->end(), sorter);
838 const boolnon_intersect = ( prevSmax < h->GetSubjMin() ) || ( prevSmin > h->GetSubjMax() );
839 if(!non_intersect) {
844 const boolconsistent (h->GetSubjStrand()?
850+
string(
" (extra long introns)"));
855 prev= h->GetSubjStop();
856prevSmin = h->GetSubjMin();
857prevSmax = h->GetSubjMax();
860phitrefs->erase(
remove_if(phitrefs->begin(), phitrefs->end(),
865vector<size_t> pattern0;
866vector<pair<bool,double> > imperfect;
867 doublemax_idty (0.0);
868 for(
size_t i(0),
n(phitrefs->size());
i<
n; ++
i) {
870 const THitRef& h ((*phitrefs)[
i]);
871 const boolvalid (
true);
874pattern0.push_back(h->GetQueryMin());
875pattern0.push_back(h->GetQueryMax());
876pattern0.push_back(h->GetSubjMin());
877pattern0.push_back(h->GetSubjMax());
878 const doubleidty (h->GetIdentity());
879 const boolimprf (idty < 1.00
880|| h->GetQuerySpan() != h->GetSubjSpan()
881|| h->GetMismatches() > 0
882|| h->GetGaps() > 0);
883imperfect.push_back(pair<bool,double>(imprf, idty));
884 if(idty > max_idty) {
890 if(max_idty < .85 && pattern0.size() >= 4) {
897 const size_tdim (pattern0.size());
899 const char* Seq1 (&
m_mrna.front());
901 const char* Seq2 (&
m_genomic.front());
902 const size_tSeqLen2 (
m_genomic.size());
906 boolsome_error (
false), bad_input (
false);
909 for(
size_t i(0);
i< dim;
i+= 4) {
911 if(pattern0[
i] > pattern0[
i+1] || pattern0[
i+2] > pattern0[
i+3]) {
912ostr_err <<
"Pattern hits must be specified in plus strand";
913some_error = bad_input =
true;
918 if(pattern0[
i] <= pattern0[
i-3] || pattern0[
i+2] <= pattern0[
i-1]) {
920<<
string(
" (hits not sorted)");
926 const boolbr1 (pattern0[
i+1] >= SeqLen1);
927 const boolbr2 (pattern0[
i+3] >= SeqLen2);
930ostr_err <<
"Pattern hits out of range (" 932<< phitrefs->front()->GetQueryId()->GetSeqIdString(
true)
934<< phitrefs->front()->GetSubjId()->GetSeqIdString(
true)
938ostr_err <<
"\tquery_pattern_max = "<< pattern0[
i+1]
939<<
"; query_len = "<< SeqLen1 << endl;
943ostr_err <<
"\tsubj_pattern_max = "<< pattern0[
i+3]
944<<
"; subj_len = "<< SeqLen2 << endl;
954ostr_err <<
"Pattern dimension must be a multiple of four";
955some_error = bad_input =
true;
959ostr_err <<
" (query = " 960<< phitrefs->front()->GetQueryId()->AsFastaString()
962<< phitrefs->front()->GetSubjId()->AsFastaString() <<
')' 967 if(err.size() > 0) {
977map_elem.
m_box[0] = map_elem.
m_box[2] = 0;
982 for(
size_t i= 0;
i< dim;
i+= 4) {
984 size_tL1, R1, L2, R2;
985 size_tmax_seg_size (0);
987 const boolimprf (imperfect[
i/4].
first);
994 const size_tlen1 (pattern0[
i+1] - pattern0[
i] + 1);
995 const size_tlen2 (pattern0[
i+3] - pattern0[
i+2] + 1);
996 const size_tmaxlen (
max(len1, len2));
997 const size_tlendif (len1 < len2? len2 - len1: len1 - len2);
998 size_tband (
size_t((1 - imperfect[
i/4].second) * maxlen) + 2);
999 if(band < lendif) band += lendif;
1002Seq2 + pattern0[
i+2], len2,
1010R1 = pattern0[
i+1] - pattern0[
i] - 1;
1012R2 = pattern0[
i+3] - pattern0[
i+2] - 1;
1013max_seg_size = R1 - L1 + 1;
1020 size_tcut ((1 + R1 - L1) / 5);
1021 if(cut > 20) cut = 20;
1023 const size_tl1 (L1 + cut), l2 (L2 + cut);
1024 const size_t r1(R1 - cut),
r2(R2 - cut);
1025 if(l1 <
r1&& l2 <
r2) {
1031 size_tq0 (pattern0[
i] + L1);
1032 size_ts0 (pattern0[
i+2] + L2);
1033 size_tq1 (pattern0[
i] + R1);
1034 size_ts1 (pattern0[
i+2] + R2);
1038 const size_thitlen_q (pattern0[
i+ 1] - pattern0[
i] + 1);
1039 const size_tsh (
size_t(hitlen_q / 4));
1041 size_t delta(sh > L1? sh - L1: 0);
1045 const size_th2s_right (hitlen_q - R1 - 1);
1046 delta= sh > h2s_right? sh - h2s_right: 0;
1050 if(q0 > q1 || s0 > s1) {
1053q0 = pattern0[
i] + L1;
1054s0 = pattern0[
i+2] + L2;
1055q1 = pattern0[
i] + R1;
1056s1 = pattern0[
i+2] + R2;
1063 const size_tpattern_dim =
m_pattern.size();
1070map_elem.
m_box[1] = pattern0[
i+1];
1071map_elem.
m_box[3] = pattern0[
i+3];
1074map_elem.
m_box[1] = SeqLen1 - 1;
1075map_elem.
m_box[3] = SeqLen2 - 1;
1085 const stringstrid (id->AsFastaString());
1095 if(seq_data == 0) {
1102vector<CRef<CSeq_loc> > orfs;
1103vector<string> start_codon;
1104start_codon.push_back(
"ATG");
1107 TSeqPosmax_len_plus (0), max_len_minus (0);
1108 TSeqPosmax_from_plus (0), max_from_minus (0);
1109 TSeqPosmax_to_plus (0), max_to_minus (0);
1113 const ENa_strandorf_strand ((*orf)->GetInt().GetStrand());
1117 if(
len> max_len_minus) {
1118max_len_minus =
len;
1119max_from_minus = (*orf)->GetInt().GetTo();
1120max_to_minus = (*orf)->GetInt().GetFrom();
1124 if(
len> max_len_plus) {
1125max_len_plus =
len;
1126max_from_plus = (*orf)->GetInt().GetFrom();
1127max_to_plus = (*orf)->GetInt().GetTo();
1132 if(max_len_plus > 0) {
1133rv.first =
TOrf(max_from_plus, max_to_plus);
1136 if(max_len_minus > 0) {
1137rv.second =
TOrf(max_from_minus, max_to_minus);
1174 if(h.
NotNull() && h->GetQueryStrand() ==
false) {
1183 if(hitrefs.size() == 0) {
1189 THit::TIdid_query (hitrefs.front()->GetQueryId());
1192 if(mrna_size == kMaxCoord) {
1194 string(
"Sequence not found: ") + id_query->AsFastaString());
1204min_singleton_idty_final,
1208comps.
Run(hitrefs.begin(), hitrefs.end(),
GetScope());
1210comps.
Run(hitrefs.begin(), hitrefs.end());
1213pair<size_t,size_t> dim (comps.
GetCounts());
1214 if(dim.second > 0) {
1242 boolsame_strand (
false);
1249 for(
size_t i(0);
i< dim.first; ++
i, box += 4) {
1251 if(
i+ 1 == dim.first) {
1253same_strand =
false;
1258same_strand = strand_this == strand_next;
1259smax = same_strand? (box + 4)[2]:
kMax_UInt;
1264 if(smax < box[3]) {
1267 "Unexpected order of compartments");
1272comps.
Get(
i, comp_hits);
1274 if(smax < box[3]) smax = box[3];
1275 if(smin > box[2]) smin = box[2];
1299smin = same_strand? box[3]: 0;
1327 THit::TIdid_query (phitrefs->front()->GetQueryId());
1381 const doublekMinPercAInPolya (0.80);
1384 for(
size_t i= polya_start;
i<dim; ++
i) {
1385 if(seq[
i] ==
'A') ++
cnt;
1387 if(
cnt>= (dim - polya_start)*kMinPercAInPolya)
return true;
1394 const size_tkMaxNonA (3), kMinAstreak (5);
1395 Int8 i(dim - 1), i0 (dim);
1396 for(
size_tcount_non_a (0), astreak (0);
i>= 0 && count_non_a < kMaxNonA; --
i) {
1398 if(seq[
i] !=
'A') {
1403 if(++astreak >= kMinAstreak) {
1409 const size_t len(dim - i0);
1411 if(
len>= kMinAstreak) {
1413 if(0 < cds_stop && cds_stop < dim && rv <= cds_stop) {
1439 if(range_left > range_right) {
1443 if(phitrefs->size() == 0) {
1452 for(
size_t i(0),
n(phitrefs->size());
i<
n; ++
i) {
1457 const boolnew_strand (!(h->GetSubjStrand()));
1458h->SetQueryStart(a1);
1459h->SetQueryStop(a0);
1460h->SetSubjStrand(new_strand);
1474THitRefs::iterator ii (phitrefs->begin()), jj (phitrefs->end() - 1);
1477 boolb0 (
true), b1 (
true);
1478 while(b0 && b1 && ii < jj) {
1480 while(ii->IsNull() && ii < jj) ++ii;
1481 while(jj->IsNull() && ii < jj) --jj;
1485 const doublehit_idty ((*ii)->GetIdentity());
1486 const size_tmin_termhitlen (
1487hit_idty < .9999? min_termhitlen2: min_termhitlen1);
1489 if((*ii)->GetQuerySpan() < min_termhitlen) {
1499 const doublehit_idty ((*jj)->GetIdentity());
1500 const size_tmin_termhitlen (
1501hit_idty < .9999? min_termhitlen2: min_termhitlen1);
1503 if((*jj)->GetQuerySpan() < min_termhitlen) {
1512phitrefs->erase(
remove_if(phitrefs->begin(), phitrefs->end(),
1516 if(phitrefs->size() == 0) {
1525 THit::TCoordqmin (span[0]), qmax (span[1]), smin (span[2]), smax (span[3]);
1527 const boolctg_strand (phitrefs->front()->GetSubjStrand());
1536 THit::TCoordfixed_left (kMaxCoord / 2), fixed_right(fixed_left);
1538 const size_tkTermLenCutOff_m2 (10);
1539 const boolfix_left (qmin <= kTermLenCutOff_m2);
1540 const boolfix_right (rspace <= kTermLenCutOff_m2);
1541 if(fix_left || fix_right) {
1543 if(phitrefs->size() > 1) {
1547 THit::TCoordprev_start (phitrefs->front()->GetSubjStart());
1553cur_start - prev_start:
1554prev_start - cur_start);
1555 if(intron > max_intron) {
1556max_intron = intron;
1558prev_start = cur_start;
1561 const doublefactor (2.5);
1562 if(fix_left) { fixed_left =
THit::TCoord(max_intron * factor); }
1563 if(fix_right) { fixed_right =
THit::TCoord(max_intron * factor); }
1568 if(fix_left) { fixed_left = single_hit_extent; }
1569 if(fix_right) { fixed_right = single_hit_extent; }
1576 const THit::TCoordextent_left (
min(extent_left_m1, extent_left_m2));
1581 if(extent_right < poly_length) extent_right = poly_length;
1584smin =
max(0,
int(smin - extent_left));
1585smax += extent_right;
1588smin =
max(0,
int(smin - extent_right));
1589smax += extent_left;
1597 if(smin < range_left) {
1600 if(smax > range_right) {
1605 if(phitrefs->size() > 1) {
1606 THit::TIdid_query (phitrefs->front()->GetSubjId());
1608tmp_id->
Assign(*id_query);
1613 if(hitmin > smin) {
1617 TSeqPostmplen = hitmin - smin;
1619 for(;smit; ++smit) {
1624 _ASSERT( smin + pos <= hitmin );
1632 if(smax > hitmax) {
1636 TSeqPostmplen = smax - hitmax;
1638 for(;smit; ++smit) {
1643 _ASSERT( hitmax + pos < smax );
1644smax = hitmax + pos;
1656smin, smax,
true,
true, ctg_strand);
1661 if(smax >= ctg_end) {
1662smax = ctg_end > 0? ctg_end - 1: 0;
1665 if(ctg_strand ==
false) {
1680 if(!(smin <= hsmin && hsmax <= smax)) {
1682ostr <<
"\nOne of compartment hits:\n"<< *h
1683<<
"\n goes outside the genome range = ("<< smin+1 <<
", "<< smax+1 <<
')' 1684<<
"\n allowed for the compartment";
1689 if(ctg_strand ==
false) {
1693h->SetSubjStart(a2);
1703(*ii)->Shift(-(
Int4)qmin, -(
Int4)smin);
1722 Int8last_exon = -1;
1731 if(last_exon == -1) {
1739 const char* p0 = &
m_mrna.front() + s.
m_box[1] + 1;
1741 const char* p = p0;
1742 const char* q = q0;
1743 const char* pe = &
m_mrna.front() + mrna_size;
1747 size_tsh = 0,
ct=0;
1748 for(; p < pe && q < qe; ++p, ++q, ++
ct) {
1749 if(
toupper(*p) !=
'N'&& *p == *q) {
1764 for(;p>=p0;--p,--q,++
ct) {
1765 if(
toupper(*p) !=
'N'&& *p == *q) {
1768 if( match_num <
ct*kMinExonFlankIdty) {
1781 for(
ct= 0,p = p0, q = q0;
ct< sh; ++p, ++q, ++
ct) {
1782 if(
toupper(*p) !=
'N'&& *p == *q) {
1791 const size_tann_dim = s.
m_annot.size();
1792 if(ann_dim > 2 && s.
m_annot[ann_dim - 3] ==
'>') {
1793s.
m_annot[ann_dim - 2] = q < qe? *q:
' ';
1794s.
m_annot[ann_dim - 1] = q < (qe-1)? *(q+1):
' ';
1804 if(coord < mrna_size ) {
1809 if( ( (
int)mrna_size - (
int)s.
m_box[1] - 1 ) >= kFlankExonProx &&
1811 intseq1_pos = (
int)s.
m_box[1];
1812 intseq2_pos = (
int)s.
m_box[3];
1813 size_tdet_pos = s.
m_details.size() - 1;
1814 size_tmin_det_pos = det_pos - kMaxCutToSplice;
1815 intmin_pos = (
int)s.
m_box[0] + 8;
1816 while(seq1_pos >= min_pos && det_pos >= min_det_pos) {
1817 if( (
size_t)(seq2_pos + 2) <
m_genomic.size() && s.
m_details[det_pos] ==
'M'&&
1819 if( det_pos + 1 < s.
m_details.size() ) {
1820s.
m_box[1] = seq1_pos;
1821s.
m_box[3] = seq2_pos;
1825 size_tadim = s.
m_annot.size();
1826 if(adim > 0 && s.
m_annot[adim-1] ==
'>') {
1828}
else if(adim > 2 && s.
m_annot[adim-3] ==
'>') {
1858ss.
m_box[0] = coord;
1859ss.
m_box[1] = mrna_size - 1;
1871mcount += jj->m_idty * jj->m_len;
1875 const size_tmin_singleton_idty_final (
1878 if(mcount < min_singleton_idty_final) {
1886jj->m_box[0] += qmin;
1887jj->m_box[1] += qmin;
1890jj->m_box[0] = mrna_size - jj->m_box[0] - 1;
1891jj->m_box[1] = mrna_size - jj->m_box[1] - 1;
1896jj->m_box[2] += smin;
1897jj->m_box[3] += smin;
1900jj->m_box[2] = smax - jj->m_box[2];
1901jj->m_box[3] = smax - jj->m_box[3];
1914 boolsevere (
true);
1938TSegmentVector segments;
1941 #ifdef DBG_DUMP_PATTERN 1942cerr <<
"Pattern:"<< endl;
1945 const size_tmap_dim (
m_alnmap.size());
1951 size_tcds_start (0), cds_stop (0);
1952 for(
size_t i(0);
i< map_dim; ++
i) {
1957 const size_tlen1 (zone.
m_box[1] - zone.
m_box[0] + 1);
1958 const size_tlen2 (zone.
m_box[3] - zone.
m_box[2] + 1);
1971Seq2 + zone.
m_box[2], len2,
1975vector<size_t> pattern;
1980 "CSplign::x_Run(): Invalid alignment pattern");
1985back_inserter(pattern));
1988 for(
size_tj (0), pt_dim (pattern.size()); j < pt_dim; j += 4) {
1990 #ifdef DBG_DUMP_PATTERN 1991cerr << (1 + pattern[j]) <<
'\t'<< (1 + pattern[j+1]) <<
'\t' 1992<<
"(len = "<< (pattern[j+1] - pattern[j] + 1) <<
")\t" 1993<< (1 + pattern[j+2]) <<
'\t'<< (1 + pattern[j+3])
1994<<
"(len = "<< (pattern[j+3] - pattern[j+2] + 1) <<
")\t" 1996 #undef DBG_DUMP_PATTERN 1999pattern[j] -= zone.
m_box[0];
2000pattern[j+1] -= zone.
m_box[0];
2001pattern[j+2] -= zone.
m_box[2];
2002pattern[j+3] -= zone.
m_box[2];
2007 m_aligner->SetEndSpaceFree(
true,
true,
true,
true);
2008 m_aligner->SetCDS(cds_start, cds_stop);
2014 #ifdef DBG_DUMP_TYPE2 2021 #undef DBG_DUMP_TYPE2 2028 if(
i+ 1 < map_dim) {
2031 g.m_box[0] = zone.
m_box[1] + 1;
2033 g.m_box[2] = zone.
m_box[3] + 1;
2041 #ifdef DUMP_ORIG_SEGS 2042cerr <<
"Orig segments:"<< endl;
2043 ITERATE(TSegmentVector, ii, segments) {
2044cerr << ii->m_exon <<
'\t'<< ii->m_idty <<
'\t'<< ii->m_len <<
'\t' 2045<< ii->m_box[0] <<
'\t'<< ii->m_box[1] <<
'\t' 2046<< ii->m_box[2] <<
'\t'<< ii->m_box[3] <<
'\t' 2047<< ii->m_annot <<
'\t'<< ii->m_score << endl;
2051 if(segments.size() == 0) {
2057 const size_tSeqLen2 (
m_genomic.size());
2079 boolis_test =
false;
2080 boolis_test_plus =
false;
2085is_test_plus =
true;
2094TSegmentVector::iterator
prev;
2096 if(ii->m_exon ==
false)
continue;
2100 if(
prev->m_exon) {
2130 boolabuts_gap =
x_IsInGap( ii->m_box[2] - 1 );
2135 if( ii->m_box[0] >
prev->m_box[1] + 1) {
2139sgap.
m_box[1] = ii->m_box[0] - 1;
2140sgap.
m_box[3] = ii->m_box[2] - 1;
2142ii = segments.insert(ii, sgap);
2157 boolcontinue_iterations =
false;
2165 if(segments.size() == 0) {
2169 size_texon_count0 (0);
2170 ITERATE(TSegmentVector, ii, segments) {
2171 if(ii->m_exon) ++exon_count0;
2176 boolfirst_exon =
true;
2178 for(
size_tk0 = 0; k0 < segments.size(); ++k0) {
2184first_exon =
false;
2187}
else if( !segments[k0-1].m_exon ) {
2201 if(last_exon ==
NULL) {
2211TSegmentVector tmp_segments;
2214 intprev_exon_index = -1;
2215 for(
size_tk0 = 0; k0 < segments.size(); ++k0) {
2216 if(segments[k0].m_exon) {
2217 if(prev_exon_index == -1) {
2218 if(segments[k0].m_box[0] > 0) {
2220 g.m_box[1] = segments[k0].m_box[0] - 1;
2222 g.m_box[3] = segments[k0].m_box[2] - 1;
2223 g.m_len =
g.m_box[1] -
g.m_box[0] + 1;
2224tmp_segments.push_back(
g);
2228 if( segments[prev_exon_index].m_box[1] + 1 < segments[k0].m_box[0] ) {
2229 g.m_box[0] = segments[prev_exon_index].m_box[1] + 1;
2230 g.m_box[1] = segments[k0].m_box[0] - 1;
2231 g.m_box[2] = segments[prev_exon_index].m_box[3] + 1;
2232 g.m_box[3] = segments[k0].m_box[2] - 1;
2233 g.m_len =
g.m_box[1] -
g.m_box[0] + 1;
2234tmp_segments.push_back(
g);
2237prev_exon_index = (
int)k0;
2238tmp_segments.push_back(segments[k0]);
2243 if(prev_exon_index >= 0) {
2244 if(segments[prev_exon_index].m_box[1] + 1 < SeqLen1) {
2245 g.m_box[0] = segments[prev_exon_index].m_box[1] + 1;
2246 g.m_box[1] = SeqLen1 - 1;
2247 g.m_box[2] = segments[prev_exon_index].m_box[3] + 1;
2248 g.m_box[3] = SeqLen2 - 1;
2249 g.m_len =
g.m_box[1] -
g.m_box[0] + 1;
2250tmp_segments.push_back(
g);
2255segments.swap(tmp_segments);
2266 while(k0 < segments.size()) {
2272 const doublemin_idty (
len>= kMinTermExonSize?
2276 if(s.
m_idty>= min_idty) {
2283 long intk1 (segments.size() - 1);
2284 while(k1 >= (
int)k0) {
2290 const doublemin_idty (
len>= kMinTermExonSize?
2294 if(s.
m_idty>= min_idty) {
2308ii->ImproveFromLeft1(Seq1, Seq2,
m_aligner);
2319ii->ImproveFromRight1(Seq1, Seq2,
m_aligner);
2327 for(
unsigned intk0 = 0; k0 < segments.size(); ++k0) {
2328 if(!segments[k0].m_exon) {
2329 if( k0 > 0 && segments[k0-1].m_exon) {
2332 if(
x_IsInGap(segments[k0-1].m_box[3] + 1) ||
2336 if( ( (
int)SeqLen1 - (
int)segments[k0-1].m_box[1] - 1 ) >= kFlankExonProx ) {
2343segments[k0-1].ImproveFromRight1(Seq1, Seq2,
m_aligner);
2346segments[k0-1].ImproveFromRight(Seq1, Seq2,
m_aligner);
2349 if( k0 + 1 < segments.size() && segments[k0+1].m_exon) {
2352 if(
x_IsInGap(segments[k0+1].m_box[2] - 1) ||
2356 if( (
int)segments[k0+1].m_box[0] >= kFlankExonProx ) {
2363segments[k0+1].ImproveFromLeft1(Seq1, Seq2,
m_aligner);
2366segments[k0+1].ImproveFromLeft(Seq1, Seq2,
m_aligner);
2375 if( segments.size() == 0 ) {
2380 if(segments[0].m_box[0] > 0) {
2384 g.m_box[1] = segments[0].m_box[0] - 1;
2386 g.m_box[3] = segments[0].m_box[2] - 1;
2388segments.insert(segments.begin(),
g);
2392 TSegment& seg_last (segments.back());
2393 if(seg_last.
m_box[1] + 1 < SeqLen1) {
2396 g.m_box[0] = seg_last.
m_box[1] + 1;
2397 g.m_box[1] = SeqLen1 - 1;
2398 g.m_box[2] = seg_last.
m_box[3] + 1;
2399 g.m_box[3] = SeqLen2 - 1;
2401segments.push_back(
g);
2408 boolfirst_exon =
true;
2414 if(ii->IsLowComplexityExon(Seq1) ) {
2417first_exon =
false;
2424 if( last_exon != 0 ) {
2434 if(ii->m_exon ==
false)
continue;
2442sl.ImproveFromLeft1(Seq1, Seq2,
m_aligner);
2445 if( sl.m_details == ii->m_details && sr.
m_details== ii->m_details ) {
2451 if( sr.
m_details!= ii->m_details && ii != segments.begin() && (ii-1)->m_exon && ( (ii+1) == segments.end() || !(ii+1)->m_exon ) ) {
2454}
else if( sl.m_details != ii->m_details && (ii+1) != segments.end() && (ii+1)->m_exon && ( ii == segments.begin() || !(ii-1)->m_exon) ) {
2456}
else if(sl.m_details == ii->m_details ||
2463 if(sl.m_details != ii->m_details || sr.
m_details!= ii->m_details){
2464 if(sl.m_details == ii->m_details ||
2473 if(ii != segments.begin() && (ii)->m_box[0] > (ii - 1)->m_box[1] + 1) {
2475sgap.
m_box[0] = (ii - 1)->m_box[1] + 1;
2476sgap.
m_box[2] = (ii - 1)->m_box[3] + 1;
2477sgap.
m_box[1] = ii->m_box[0] - 1;
2478sgap.
m_box[3] = ii->m_box[2] - 1;
2480ii = segments.insert(ii, sgap);
2481continue_iterations =
true;
2484 if( (ii+1) != segments.end() && (ii+1)->m_box[0] > ii->m_box[1] + 1 ) {
2487sgap.
m_box[0] = (ii - 1)->m_box[1] + 1;
2488sgap.
m_box[2] = (ii - 1)->m_box[3] + 1;
2489sgap.
m_box[1] = ii->m_box[0] - 1;
2490sgap.
m_box[3] = ii->m_box[2] - 1;
2492ii = segments.insert(ii, sgap);
2493continue_iterations =
true;
2500}
else if(ii->m_idty < .9 && ii->m_len < 20) {
2502 boolnc_prev (
false), nc_next (
false);
2503 if(ii != segments.begin() && (ii - 1)->m_exon) {
2508 if( (ii+1) != segments.end() && (ii + 1)->m_exon) {
2511(ii + 1)->GetAcceptor());
2513 if( nc_prev || nc_next ) {
2524 for(
size_tk (0); k < segments.size(); ++k) {
2526 if(s.
m_exon==
false)
continue;
2528 if( ( k == 0 ) || ( ! segments[k-1].m_exon ) ) {
2530 if( ( k + 1 == segments.size() ) || ( ! segments[k+1].m_exon ) ) {
2546 for(
size_tk (0); k < segments.size(); ++k) {
2548 if(s.
m_exon==
false)
continue;
2550 booldrop (
false);
2560 if( (
int)s.
m_box[0] >= kFlankExonProx ) {
2563 if(adj ==
eNo) adj = eSoft;
2567 if( k + 1 == segments.size() ) {
2568 if( ( (
int)SeqLen1 - (
int)s.
m_box[1] - 1 ) >= kFlankExonProx ) {
2571 if(adj ==
eNo) adj = eSoft;
2575 if(k > 0 && ( ! segments[k-1].m_exon ) ) {
2576 if( (
int)s.
m_box[0] >= kFlankExonProx ) {
2579 if(adj ==
eNo) adj = eSoft;
2583 if(k + 1 < segments.size() && (! segments[k+1].m_exon ) ) {
2584 if( ( (
int)SeqLen1 - (
int)s.
m_box[1] - 1 ) >= kFlankExonProx ) {
2587 if(adj ==
eNo) adj = eSoft;
2595}
else if(adj == eHard) {
2596 if( s.
m_len< 20 ) {
2599 if( s.
m_idty< kMinTermExonIdty && s.
m_len< kMinTermExonSize ) {
2613 size_texon_count (0);
2615 for(
size_t i= 0;
i< segments.size(); ++
i) {
2618term_segs[exon_count] = &s;
2619 if(++exon_count == 2) {
2625 if(exon_count == 2) {
2632 size_texon_count (0);
2634 for(
Int8 i= segments.size() - 1;
i>= 0; --
i) {
2637term_segs[exon_count] = &s;
2638 if(++exon_count == 2) {
2644 if(exon_count == 2) {
2650 boolgap_prev (
false);
2651 for(
size_tk (0); k < segments.size(); ++k) {
2654 if(s.
m_exon==
false) {
2658 size_tlength (s.
m_box[1] - s.
m_box[0] + 1);
2659 boolgap_next (
false);
2660 if(k + 1 < segments.size()) {
2661gap_next = !segments[k+1].m_exon;
2663 if(length <= 10 && (gap_prev || gap_next)) {
2672 intgap_start_idx (-1);
2673 if(segments.size() && segments[0].m_exon ==
false) {
2677 for(
size_tk (0); k < segments.size(); ++k) {
2680 if(gap_start_idx == -1) {
2681gap_start_idx =
int(k);
2683s.
m_box[0] = segments[k-1].m_box[1] + 1;
2684s.
m_box[2] = segments[k-1].m_box[3] + 1;
2689 if(gap_start_idx >= 0) {
2691 g.m_box[1] = s.
m_box[0] - 1;
2692 g.m_box[3] = s.
m_box[2] - 1;
2693 g.m_len =
g.m_box[1] -
g.m_box[0] + 1;
2694 g.m_details.resize(0);
2702 if(gap_start_idx >= 0) {
2704 g.m_box[1] = segments[segments.size()-1].m_box[1];
2705 g.m_box[3] = segments[segments.size()-1].m_box[3];
2706 g.m_len =
g.m_box[1] -
g.m_box[0] + 1;
2707 g.m_details.resize(0);
2711 size_texon_count1 (0);
2713 if(ii->m_exon) ++exon_count1;
2716 if(exon_count1 == 0 ) {
2720 if(exon_count0 == exon_count1 && continue_iterations ==
false)
break;
2729 boolfirst_exon =
true;
2731 intlast_exon_index = -1;
2732 for(
size_tk0 = 0; k0 < sdim; ++k0) {
2734last_exon_index = (
int)k0;
2737 for(
unsigned intk0 = 0; k0 < sdim; ++k0) {
2739 boolcut_from_left =
false;
2740 boolcut_from_right =
false;
2746first_exon =
false;
2749 if( (
int)s.
m_box[0] >= kFlankExonProx ) {
2750cut_from_left =
true;
2752first_exon =
false;
2754cut_from_left =
true;
2762 if( last_exon_index == (
int)k0 ) {
2763 if( ( (
int)SeqLen1 - (
int)s.
m_box[1] - 1 ) >= kFlankExonProx ) {
2764cut_from_right =
true;
2766}
else if(k0 + 1 < sdim && (!
m_segments[k0+1].m_exon ) ) {
2767cut_from_right =
true;
2771 if(cut_from_left) {
2772 intseq1_pos = (
int)s.
m_box[0];
2773 intseq2_pos = (
int)s.
m_box[2];
2775 intmax_pos = (
int)s.
m_box[1] - 8;
2776 while(seq1_pos <= max_pos && det_pos <= kMaxCutToSplice) {
2777 if( seq2_pos > 1 && s.
m_details[det_pos] ==
'M'&&
2778 toupper(Seq2[seq2_pos-2]) ==
'A'&&
toupper(Seq2[seq2_pos-1]) ==
'G') {
2780s.
m_box[0] = seq1_pos;
2781s.
m_box[2] = seq2_pos;
2791 if( k0>0 && ( !
m_segments[k0-1].m_exon ) ) {
2793 g.m_box[1] = s.
m_box[0] - 1;
2794 g.m_box[3] = s.
m_box[2] - 1;
2795 g.m_len =
g.m_box[1] -
g.m_box[0] + 1;
2821 if(cut_from_right) {
2822 intseq1_pos = (
int)s.
m_box[1];
2823 intseq2_pos = (
int)s.
m_box[3];
2824 size_tdet_pos = s.
m_details.size() - 1;
2825 size_tmin_det_pos = det_pos - kMaxCutToSplice;
2826 intmin_pos = (
int)s.
m_box[0] + 8;
2827 while(seq1_pos >= min_pos && det_pos >= min_det_pos) {
2828 if( (
size_t)(seq2_pos + 2) <
m_genomic.size() && s.
m_details[det_pos] ==
'M'&&
2829 toupper(Seq2[seq2_pos+1]) ==
'G'&&
toupper(Seq2[seq2_pos+2]) ==
'T') {
2830 if( det_pos + 1 < s.
m_details.size() ) {
2831s.
m_box[1] = seq1_pos;
2832s.
m_box[3] = seq2_pos;
2836 size_tadim = s.
m_annot.size();
2837 if(adim > 0 && s.
m_annot[adim-1] ==
'>') {
2839}
else if(adim > 2 && s.
m_annot[adim-3] ==
'>') {
2843 if( k0 + 1 < sdim && ( !
m_segments[k0+1].m_exon ) ) {
2845 g.m_box[0] = s.
m_box[1] + 1;
2846 g.m_box[2] = s.
m_box[3] + 1;
2847 g.m_len =
g.m_box[1] -
g.m_box[0] + 1;
2879 booladjust =
false;
2880 boolprev_exon =
false;
2882 for(
size_tpp = 0; pp <ssize ; ++pp) {
2884 if( !prev_exon && ( pp == ssize - 1 || !
m_segments[pp+1].m_exon ) ) {
2903 if( min_hole_len > 0) {
2906 for(; pos2 <
m_segments.size(); ++pos1, ++pos2) {
2919 boolcut_to_codons =
true;
2920 if( cut_to_codons ) {
2932 #ifdef DUMP_PROCESSED_SEGS 2933cerr <<
"Processed segments:"<< endl;
2935cerr << ii->m_box[0] <<
'\t'<< ii->m_box[1] <<
'\t' 2936<< ii->m_box[2] <<
'\t'<< ii->m_box[3] <<
'\t' 2937<< ii->m_annot <<
'\t'<< ii->m_score << endl;
2949 for(
size_t i(0), dim (
m_Segments.size());
i< dim; ++
i) {
2955trans.append(s.
m_len,
'D');
2959 ITERATE(
string, ii, trans) {
2964 returndouble(matches) / trans.size();
2972box[1] = box[3] = 0;
2987box[0] =
static_cast<unsigned int>(
a);
2990box[1] =
static_cast<unsigned int>(
b);
3002box[2] =
static_cast<unsigned int>(
a);
3005box[3] =
static_cast<unsigned int>(
b);
3014 boolturn2gap (
false);
3016 const size_texon_size (1 + term_segs[0]->m_box[1] -
3017term_segs[0]->m_box[0]);
3019 const doubleidty (term_segs[0]->m_idty);
3023 if(exon_size < kMinTermExonSize && idty < kMinTermExonIdty ) {
3028 if(exon_size < kMinTermExonSize) {
3033 const char*dnr, *acc;
3035 a= term_segs[0]->
m_box[3];
3036 b= term_segs[1]->
m_box[2];
3041 a= term_segs[1]->
m_box[3];
3042 b= term_segs[0]->
m_box[2];
3047 const size_tintron_len (
b-
a);
3051 size_tmax_ext ((idty < .96 || !consensus || exon_size < 16)?
3055 if(exon_size < 8) {
3056max_ext = 10 * exon_size;
3059 else if(exon_size < 16) {
3064 if(intron_len > max_intron_len) {
3074s.
m_len= exon_size;
3088 if(query_len >= kNonCoveredEndThreshold) {
3092 const doublek (pow(kNonCoveredEndThreshold, - 1. / kPower) * max_ext);
3093 const doubledrv (k * pow(query_len, 1. / kPower));
3105 template<
typenameT>
3108*(
reinterpret_cast<T*
>(p)) =
n;
3115 copy(s.begin(), s.end(), p);
3120 template<
typenameT>
3123 n= *(
reinterpret_cast<const T*
>(p));
3144 const size_t total_size=
sizeofm_exon +
sizeofm_idty +
3145 sizeofm_len +
sizeofm_box + m_annot.size() + 1 +
3146m_details.size() + 1 +
sizeofm_score;
3150 char* p = &target->front();
3154 for(
size_t i= 0;
i< 4; ++
i) {
3167 const size_tmin_size =
sizeofm_exon +
sizeofm_idty +
sizeofm_len +
3168+
sizeofm_box + 1 + 1 +
sizeofm_score;
3170 if(
source.size() < min_size) {
3174 const char* p = &
source.front();
3179 for(
size_t i= 0;
i< 4; ++
i) {
3197 const size_tcore_size (
3198 sizeofm_Id +
sizeofm_Status + m_Msg.size() + 1
3199+
sizeofm_QueryStrand +
sizeofm_SubjStrand
3200+
sizeofm_Cds_start +
sizeofm_Cds_stop
3205vector<char> core (core_size);
3207 char* p = &core.front();
3219 typedefvector<TNetCacheBuffer> TBuffers;
3220TBuffers vb (m_Segments.size());
3223ii->ToBuffer(&vb[ibuf++]);
3226 size_t total_size(core_size +
sizeof(
size_t) * m_Segments.size());
3232TNetCacheBuffer::iterator it = target->begin();
3233 copy(core.begin(), core.end(), it);
3238 const size_tseg_buf_size = ii->size();
3239*((
size_t*)p) = seg_buf_size;
3240it +=
sizeof(size_t);
3241 copy(ii->begin(), ii->end(), it);
3251 const size_tmin_size (
3255+
sizeofm_QueryStrand +
sizeofm_SubjStrand
3256+
sizeofm_Cds_start +
sizeofm_Cds_stop
3261 if(
source.size() < min_size) {
3265 const char* p (&
source.front());
3277 const char* pe (&
source.back());
3279 size_tseg_buf_size (0);
3281m_Segments.push_back(
TSegment());
3282 TSegment& seg (m_Segments.back());
3295 boolvalid_input (sas.
GetPointer() && sas->CanGet() && sas->Get().size()
3296&& sas->Get().front()->CanGetSegs()
3297&& sas->Get().front()->GetSegs().IsSpliced()
3298&& sas->Get().front()->GetSegs().GetSpliced().GetProduct_type()
3304 "CSplign::s_ComputeStats(): Invalid input");
3307output_stats->resize(0);
3311output_stats->push_back(ss);
3314 returnoutput_stats->size();
3319 const intkFrame_not_set (-10);
3320 const intkFrame_end (-5);
3321 const intkFrame_lost (-20);
3326 boolembed_scoreset,
3332 "CSplign::s_ComputeStats(): mode not yet supported.");
3335 const boolcds_stats ((
flags&
eSF_BasicCds) && (cds.first + cds.second > 0));
3344scores.assign(score_vec.begin(), score_vec.end());
3350 constTSpliced & spliced (sa->GetSegs().GetSpliced());
3353 "CSplign::s_ComputeStats(): Unsupported product type");
3358 const boolcds_strand (cds.first < cds.second);
3359 if(qstrand ^ cds_strand) {
3361 "CSplign::s_ComputeStats(): Transcript orientation not " 3362 "matching specified CDS orientation.");
3366 typedefTSpliced::TExons TExons;
3367 constTExons & exons (spliced.GetExons());
3369 const TSeqPosqlen (spliced.GetProduct_length());
3370 const TSeqPospolya (spliced.CanGetPoly_a()?
3371spliced.GetPoly_a(): (qstrand? qlen:
TSeqPos(-1)));
3376 ITERATE(TExons, ii2, exons) {
3378 constTExon & exon (**ii2);
3379 const TSeqPosqmin (exon.GetProduct_start().GetNucpos()),
3380qmax (exon.GetProduct_end().GetNucpos());
3382 const TSeqPosqgap (qstrand? qmin - qprev - 1: qprev - qmax - 1);
3385 if(cds_stats) xcript.append(qgap,
'X');
3388 typedefTExon::TParts TParts;
3389 constTParts & parts (exon.GetParts());
3391 ITERATE(TParts, ii3, parts) {
3398 if(cds_stats) xcript.append(
len,
'M');
3402 if(cds_stats) xcript.append(
len,
'R');
3406 if(cds_stats) xcript.append(
len,
'D');
3410 if(cds_stats) xcript.append(
len,
'I');
3413errmsg =
"Unexpected spliced exon chunk part: " 3419qprev = qstrand? qmax: qmin;
3422 const TSeqPosqgap (qstrand? polya - qprev - 1: qprev - polya - 1);
3423 if(cds_stats) xcript.append(qgap,
'X');
3425 if(!qstrand && qlen <= 0) {
3427 "CSplign::s_ComputeStats(): Cannot compute " 3428 "inframe stats - transcript length not set.");
3431 intqpos (qstrand? -1:
int(qlen));
3432 intqinc (qstrand? +1: -1);
3433 intframe (kFrame_not_set);
3434 size_taln_length_cds (0);
3435 intmatches_frame[] = {0, 0, 0, 0, 0};
3436 const Int8cds_start (cds.first), cds_stop (cds.second);
3437 for(string::const_iterator ie (xcript.end()), ii(xcript.begin());
3438ii != ie && frame != kFrame_end; ++ii)
3445 if(frame == kFrame_not_set && qpos == cds_start) frame = 0;
3446 if(qpos == cds_stop) frame = kFrame_end;
3449++matches_frame[frame + 2];
3455 if(frame == kFrame_not_set && qpos == cds_start) frame = 0;
3456 if(qpos == cds_stop) frame = kFrame_end;
3457 if(frame >= -2) ++aln_length_cds;
3462 if(frame == kFrame_not_set && qpos == cds_start) frame = 0;
3463 if(qpos == cds_stop) frame = kFrame_end;
3466frame = (frame + 1) % 3;
3473frame = (frame - 1) % 3;
3479 if( (qstrand && cds_start <= qpos && qpos < cds_stop) ||
3480(!qstrand && cds_start >= qpos && qpos > cds_stop) )
3482frame = kFrame_lost;
3492score_matches_inframe->SetValue().SetInt(matches_frame[2]);
3493scores.push_back(score_matches_inframe);
3499score_inframe_identity->SetValue().
3500SetReal(
double(matches_frame[2]) / aln_length_cds);
3501scores.push_back(score_inframe_identity);
3506 if(embed_scoreset) {
3508sa_score.resize(scores.size());
3509 copy(scores.begin(), scores.end(), sa_score.begin());
@ eExtreme_Positional
numerical value
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
void transform(Container &c, UnaryFunction *op)
void Run(typename THitRefs::iterator start, typename THitRefs::iterator finish, CScope *scope=NULL, const vector< pair< TCoord, TCoord > > *gaps=NULL)
Execute: identify compartments.
bool GetStrand(size_t i) const
void SetMaxIntron(TCoord mi)
Assign the maximum intron length, in base pairs.
void Get(size_t idx, THitRefs &compartment) const
Retrieve a compartment by index.
const TCoord * GetBox(size_t i) const
bool GetStatus(size_t i) const
pair< size_t, size_t > GetCounts(void) const
Retrieve the compartment counts.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
static void FindOrfs(const string &seq, TLocVec &results, unsigned int min_length_bp=3, int genetic_code=1, const vector< string > &allowable_starts=vector< string >(), bool longest_orfs=true, size_t max_seq_gap=k_default_max_seq_gap)
Find ORFs in both orientations.
bool IntersectingWith(const TRange &r) const
void AddSplignScores(const CSeq_align &align, CSeq_align::TScore &scores)
Compute the six splign scores.
void ImproveFromLeft(TSeg &s)
void Cut50FromLeft(TSeg &s)
static bool HasAbuttingExonOnLeft(TSegs segments, TSeqPos p)
void Cut50FromRight(TSeg &s)
void TrimHolesToCodons(TSegs &segments, objects::CBioseq_Handle &mrna_bio_handle, bool mrna_strand, TSeqPos mrna_len)
void ImproveFromRight(TSeg &s)
bool ThrowAway20_28_90(TSeg &s)
void JoinExons(TSegs &segments, TSeqPos p1, TSeqPos p2)
void AdjustGaps(TSegs &segments)
static bool HasAbuttingExonOnRight(TSegs segments, TSeqPos p)
void SetMismatchScore(int score)
void SetMinHoleLen(size_t len)
void SetPolyaDetection(bool on)
static int s_GetDefaultNonConsensusSpliceScore(void)
int GetGcAgSpliceScore(void) const
EScoringType GetScoringType(void) const
void Run(THitRefs *hitrefs)
void SetCompartmentPenalty(double penalty)
int m_NonConsensusSpliceScore
int GetGapExtensionScore(void) const
list< CRef< objects::CScore_set > > TScoreSets
void SetMinSingletonIdentity(double idty)
size_t x_GetGenomicExtent(const size_t query_extent, size_t max_ext=0) const
CRef< objects::CScope > & SetScope(void)
void SetMatchScore(int score)
void SetMinPolyaLen(size_t len)
SAlignedCompartment x_RunOnCompartment(THitRefs *hitrefs, size_t range_left, size_t range_right)
void SetMaxIntron(size_t max_intron)
bool GetEndGapDetection(void) const
double GetPolyaExtIdentity(void) const
static bool s_GetDefaultTrimToCodons(void)
size_t m_MinSingletonIdtyBps
bool x_ProcessTermSegm(TSegment **term_segs, Uint1 side) const
void SetMinSingletonIdentityBps(size_t idty)
void SetMinExonIdentity(double idty)
pair< size_t, size_t > m_BoundingRange
static double s_GetDefaultMinCompartmentIdty(void)
size_t GetMinPolyaLen(void) const
int GetMismatchScore(void) const
static int s_GetDefaultGapOpeningScore(void)
void SetGapOpeningScore(int score)
vector< size_t > m_pattern
pair< size_t, size_t > TOrf
static int s_GetDefaultGtAgSpliceScore(void)
CConstRef< objects::CSeqMap > m_GenomicSeqMap
void SetTestType(const string &test_type)
static size_t s_GetDefaultMinPolyaLen(void)
bool GetPolyaDetection(void) const
void SetStrand(bool strand)
double GetCompartmentPenalty(void) const
EScoringType m_ScoringType
size_t GetMinHoleLen(void) const
int GetAtAcSpliceScore(void) const
bool GetTrimToCodons(void) const
void PreserveScope(bool preserve=true)
Controls whether to clean the scope object's cache on a new sequence.
void SetMaxPartExonIdentDrop(double ident)
void SetGcAgSpliceScore(int score)
static size_t s_GetDefaultMaxGenomicExtent(void)
vector< char > m_mrna_polya
size_t m_MaxCompsPerQuery
bool AlignSingleCompartment(THitRefs *hitrefs, THit::TCoord range_left, THit::TCoord range_right, SAlignedCompartment *result)
void SetMaxCompsPerQuery(size_t m)
void x_LoadSequence(vector< char > *seq, const objects::CSeq_id &seqid, THit::TCoord start, THit::TCoord finish, bool retain, bool is_genomic=false, bool genomic_strand=true)
size_t GetMaxIntron(void) const
static CVersionAPI & s_GetVersion(void)
Retrieve the library's version object.
bool GetStrand(void) const
string GetTestType(void) const
CRef< objects::CScope > GetScope(void) const
Access the scope object that the library will use to retrieve the sequences.
static int s_GetDefaultGcAgSpliceScore(void)
void SetScoringType(EScoringType type)
double m_MinSingletonIdty
CRef< objects::CScope > m_Scope
void SetNonConsensusSpliceScore(int score)
static double s_GetDefaultCompartmentPenalty(void)
vector< THitRef > THitRefs
void SetAlignerScores(void)
static int s_GetDefaultMatchScore(void)
static double s_GetDefaultMinExonIdty(void)
void SetMaxGenomicExtent(size_t mge)
static int s_GetDefaultMismatchScore(void)
size_t GetMaxGenomicExtent(void) const
static double s_GetDefaultMaxPartExonIdentDrop(void)
static size_t s_ComputeStats(CRef< objects::CSeq_align_set > sas, TScoreSets *output_stats, TOrf cds=TOrf(0, 0), EStatFlags flags=eSF_BasicNonCds)
Generate statistics based on splign-generated seq-align-set, with each seq-align corresponding to an ...
double m_MinCompartmentIdty
CConstRef< TAligner > GetAligner(void) const
void x_FinalizeAlignedCompartment(SAlignedCompartment &ac)
double GetMinCompartmentIdentity(void) const
static EScoringType s_GetDefaultScoringType(void)
static size_t s_TestPolyA(const char *seq, size_t dim, size_t cds_stop=0)
void x_SplitQualifyingHits(THitRefs *phitrefs)
size_t GetMaxCompsPerQuery(void) const
double m_MaxPartExonIdentDrop
void SetTrimToCodons(bool)
int GetMatchScore(void) const
void SetAtAcSpliceScore(int score)
bool IsPolyA(const char *seq, size_t polya_start, size_t dim)
void SetPolyaExtIdentity(double idty)
size_t m_MinPatternHitLength
void SetGapExtensionScore(int score)
double GetMinSingletonIdentity(void) const
CRef< TAligner > & SetAligner(void)
Access the spliced aligner core object.
float x_Run(const char *seq1, const char *seq2)
size_t GetMinSingletonIdentityBps(void) const
bool x_IsInGap(size_t pos)
TSIHToMaskRanges m_MaskMap
static double s_GetDefaultPolyaExtIdty(void)
void SetMinCompartmentIdentity(double idty)
vector< TSegment > TSegments
static CRef< CSplicedAligner > s_CreateDefaultAligner(void)
int GetNonConsensusSpliceScore(void) const
TOrfPair GetCds(const THit::TId &id, const vector< char > *seq_data=0)
double m_CompartmentPenalty
pair< TOrf, TOrf > TOrfPair
void SetEndGapDetection(bool on)
int GetGtAgSpliceScore(void) const
objects::CBioseq_Handle m_mrna_bio_handle
double GetMaxPartExonIdentDrop(void) const
static size_t s_GetDefaultMinHoleLen(void)
void SetGtAgSpliceScore(int score)
int GetGapOpeningScore(void) const
void x_SetPattern(THitRefs *hitrefs)
double GetMinExonIdentity(void) const
CRef< TAligner > m_aligner
static THitRef sx_NewHit(THit::TCoord q0, THit::TCoord q, THit::TCoord s0, THit::TCoord s)
CNWFormatter::SSegment TSegment
static int s_GetDefaultAtAcSpliceScore(void)
void x_MaskSequence(vector< char > *seq, const TSeqRangeColl &mask_ranges, THit::TCoord start, THit::TCoord finish)
static int s_GetDefaultGapExtensionScore(void)
vector< SAlnMapElem > m_alnmap
container_type::const_iterator const_iterator
const_iterator end() const
const_iterator find(const key_type &key) const
static const char s_Version[]
static void test_type(TDSSOCKET *tds, TDSCOLUMN *col)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
void ImproveFromLeft(const char *seq1, const char *seq2, CConstRef< CSplicedAligner > aligner)
void ExtendRight(const vector< char > &mrna, const vector< char > &genomic, Int8 ext_len, const CNWAligner *aligner)
void ImproveFromRight(const char *seq1, const char *seq2, CConstRef< CSplicedAligner > aligner)
void AsText(string *output, ETextFormatType type, size_t line_width=100) const
void MakeSegments(vector< SSegment > *psegments) const
int CanExtendRight(const vector< char > &mrna, const vector< char > &genomic) const
const char * GetDonor(void) const
void ImproveFromRight1(const char *seq1, const char *seq2, CConstRef< CSplicedAligner > aligner)
void FromBuffer(const TNetCacheBuffer &buf)
static bool s_IsConsensusSplice(const char *donor, const char *acceptor, bool semi_as_cons=false)
void ToBuffer(TNetCacheBuffer *buf) const
vector< char > TNetCacheBuffer
const char * GetAcceptor(void) const
int CanExtendLeft(const vector< char > &mrna, const vector< char > &genomic) const
void Update(const CNWAligner *aligner)
void ExtendLeft(const vector< char > &mrna, const vector< char > &genomic, Int8 ext_len, const CNWAligner *aligner)
bool IsLowComplexityExon(const char *rna_seq)
void SetBand(size_t band)
virtual void SetSequences(const char *seq1, size_t len1, const char *seq2, size_t len2, bool verify=true)
size_t GetLongestSeg(size_t *q0, size_t *q1, size_t *s0, size_t *s1) const
static void s_GetSpan(const THitRefs &hitrefs, TCoord span[4])
Get sequence span for a set of alignments (hits).
void SetScoreMatrix(const SNCBIPackedScoreMatrix *scoremat)
void SetWms(TScore value)
void SetWi(unsigned char splice_type, TScore value)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NON_CONST_REVERSE_ITERATE(Type, Var, Cont)
Non constant version of REVERSE_ITERATE macro.
@ eDiag_Fatal
Fatal error â guarantees exit(or abort)
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
#define NCBI_RETHROW_SAME(prev_exception, message)
Generic macro to re-throw the same exception.
EDiagSev GetSeverity(void) const
Get exception severity.
CException & SetSeverity(EDiagSev severity)
Set exception severity.
const string AsFastaString(void) const
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
TSeqPos GetStop(ESeqLocExtremes ext) const
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
CSeqMap::ESegmentType GetType(void) const
TSeqPos GetPosition(void) const
return position of current segment in sequence
CConstRef< CSeq_literal > GetRefGapLiteral(void) const
return CSeq_literal with gap data, or null if either the segment is not a gap, or an unspecified gap
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
static CConstRef< CSeqMap > GetSeqMapForSeq_loc(const CSeq_loc &loc, CScope *scope)
CSeqMap_CI ResolvedRangeIterator(CScope *scope, TSeqPos from, TSeqPos length, ENa_strand strand=eNa_strand_plus, size_t maxResolve=size_t(-1), TFlags flags=fDefaultFlags) const
Iterate segments in the range with specified strand coordinates.
bool NotNull(void) const THROWS_NONE
Check if pointer is not null â same effect as NotEmpty().
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null â same effect as Empty().
uint8_t Uint1
1-byte (8-bit) unsigned integer
int32_t Int4
4-byte (32-bit) signed integer
#define numeric_limits
Pre-declaration of the "numeric_limits<>" template Forcibly overrides (using preprocessor) the origin...
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
TThisType & SetLength(position_type length)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
void SetFrom(TFrom value)
Assign a value to From data member.
E_Choice
Choice variants.
vector< CRef< CScore > > TScore
TMatch GetMatch(void) const
Get the variant data.
list< CRef< CScore > > Tdata
Tdata & Set(void)
Assign a value to data member.
static string SelectionName(E_Choice index)
Retrieve selection name (for diagnostic purposes).
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
list< CRef< CSeq_align > > Tdata
TProduct_ins GetProduct_ins(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eProduct_type_transcript
ENa_strand
strand of nucleic acid
unsigned int
A callback function used to compare two keys in a database.
const CharType(& source)[N]
void ElemToBuffer(const string &s, char *&p)
void ElemFromBuffer(string &s, const char *&p)
Int4 delta(size_t dimension_, const Int4 *score_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
string GetDonor(const objects::CSpliced_exon &exon)
static sljit_uw total_size
static const sljit_gpr r1
static const sljit_gpr r2
static CVersionAPI * s_CreateVersion(void)
const string kTestType_20_28
const string kTestType_20_28_plus
const string kTestType_production_default
void CleaveOffByTail(CSplign::THitRefs *phitrefs, TSeqPos polya_start)
const char g_msg_NoAlignment[]
const char g_msg_CompartmentInconsistent[]
const char g_msg_AlignedNotSpecified[]
const char g_msg_BadIdentityThreshold[]
const char g_msg_EmptyHitVectorPassed[]
const char g_msg_NetCacheBufferIncomplete[]
const char g_msg_QueryCoverageOutOfRange[]
const char g_msg_NullPointerPassed[]
const char g_msg_NoExonsAboveIdtyLimit[]
const char g_msg_NoHitsAfterFiltering[]
const char g_msg_InvalidRange[]
ECompartmentStatus m_Status
vector< char > TNetCacheBuffer
void GetBox(Uint4 *box) const
void ToBuffer(TNetCacheBuffer *buf) const
void FromBuffer(const TNetCacheBuffer &buf)
double GetIdentity(void) const
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4