A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/splign_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/align/splign/splign.cpp Source File

87  const Uint4

kNonCoveredEndThreshold (55);

88  const double

kPower (2.5);

93  const size_t

kMinTermExonSize (28);

94  const double

kMinTermExonIdty (0.9);

97  const int

kFlankExonProx (20);

100  const int

kMaxCutToSplice (6);

108  const int

kEstMatchScore (1000);

109  const int

kEstMismatchScore (-1011);

110  const int

kEstGapOpeningScore(-1460);

111  const int

kEstGapExtensionScore(-464);

113  const int

kEstGtAgSpliceScore(-4988);

114  const int

kEstGcAgSpliceScore(-5999);

115  const int

kEstAtAcSpliceScore(-7010);

116  const int

kEstNonConsensusSpliceScore(-13060);

160

m_CanResetHistory (

false

),

162

m_ScoringType(s_GetDefaultScoringType()),

163

m_MatchScore(s_GetDefaultMatchScore()),

164

m_MismatchScore(s_GetDefaultMismatchScore()),

165

m_GapOpeningScore(s_GetDefaultGapOpeningScore()),

166

m_GapExtensionScore(s_GetDefaultGapExtensionScore()),

167

m_GtAgSpliceScore(s_GetDefaultGtAgSpliceScore()),

168

m_GcAgSpliceScore(s_GetDefaultGcAgSpliceScore()),

169

m_AtAcSpliceScore(s_GetDefaultAtAcSpliceScore()),

170

m_NonConsensusSpliceScore(s_GetDefaultNonConsensusSpliceScore()),

172

m_MinExonIdty(s_GetDefaultMinExonIdty()),

173

m_MinPolyaExtIdty(s_GetDefaultPolyaExtIdty()),

174

m_MinPolyaLen(s_GetDefaultMinPolyaLen()),

175

m_MinHoleLen(s_GetDefaultMinHoleLen()),

176

m_TrimToCodons(s_GetDefaultTrimToCodons()),

177

m_CompartmentPenalty(s_GetDefaultCompartmentPenalty()),

178

m_MinCompartmentIdty(s_GetDefaultMinCompartmentIdty()),

179

m_MinSingletonIdty(s_GetDefaultMinCompartmentIdty()),

187

m_max_genomic_ext (s_GetDefaultMaxGenomicExtent()),

189

m_MaxPartExonIdentDrop (s_GetDefaultMaxPartExonIdentDrop()),

191

m_MaxCompsPerQuery (0),

192

m_MinPatternHitLength (13)

228

aligner->SetScoreMatrix(

NULL

);

246  if

(low_query_quality) {

247

aligner->

SetWm

(kEstMatchScore);

248

aligner->

SetWms

(kEstMismatchScore);

249

aligner->

SetWg

(kEstGapOpeningScore);

250

aligner->

SetWs

(kEstGapExtensionScore);

252

aligner->

SetWi

(0, kEstGtAgSpliceScore);

253

aligner->

SetWi

(1, kEstGcAgSpliceScore);

254

aligner->

SetWi

(2, kEstAtAcSpliceScore);

255

aligner->

SetWi

(3, kEstNonConsensusSpliceScore);

392  if

(!(0 <= idty && idty <= 1)) {

402  if

(!(0 <= idty && idty <= 1)) {

424  if

(!(0 <= idty && idty <= 1)) {

434  if

(!(0 <= idty && idty <= 1)) {

590  if

(penalty < 0 || penalty > 1) {

608  if

( pos+1 == 0 || pos >=

m_genomic

.size() )

return true

;

622  bool

retain,

bool

is_genomic,

bool

genomic_strand)

647  string

(

"Sequence is empty: "

)

654  if

(start > finish) {

656

ostr <<

"Invalid sequence interval requested for " 658

<< start <<

'\t'

<< finish;

670  CSeq_loc

tmp_loc(*tmp_id, start, finish, strand);

673

seq->resize(1 + finish - start);

674  copy

(s.begin(), s.end(), seq->begin());

682  m_Scope

->RemoveFromHistory(bh);

712  for

(

TSeqPos

loop = start; loop <= finish; loop++) {

743

hr->SetQueryStart(q0);

744

hr->SetSubjStart(s0);

745

hr->SetQueryStop(q - 1);

746

hr->SetSubjStop(s - 1);

747

hr->SetLength(q - q0);

748

hr->SetMismatches(0);

751

hr->SetScore(2*(q - q0));

759  const char

* Seq1 (&

m_mrna

.front());

760  const char

* Seq2 (&

m_genomic

.front());

765  const double

idty (h->GetIdentity());

766  const bool

diag (h->GetGaps() == 0 && h->GetQuerySpan() == h->GetSubjSpan());

767  if

(idty == 1 || idty < .95 || h->

GetLength

() < 100 || !diag) {

772  int

q0 (-1), s0 (-1), q1 (h->GetQueryMax());

773  int

q (h->GetQueryMin()), s (h->GetSubjMin());

776  if

(Seq1[q++] != Seq2[s++]) {

779

hr->SetQueryId(h->GetQueryId());

780

hr->SetSubjId(h->GetSubjId());

796

hr->SetQueryId(h->GetQueryId());

797

hr->SetSubjId(h->GetSubjId());

819

THitComparator sorter (THitComparator::eQueryMin);

820

stable_sort(phitrefs->begin(), phitrefs->end(), sorter);

838  const bool

non_intersect = ( prevSmax < h->GetSubjMin() ) || ( prevSmin > h->GetSubjMax() );

839  if

(!non_intersect) {

844  const bool

consistent (h->GetSubjStrand()?

850

+

string

(

" (extra long introns)"

));

855  prev

= h->GetSubjStop();

856

prevSmin = h->GetSubjMin();

857

prevSmax = h->GetSubjMax();

860

phitrefs->erase(

remove_if

(phitrefs->begin(), phitrefs->end(),

865

vector<size_t> pattern0;

866

vector<pair<bool,double> > imperfect;

867  double

max_idty (0.0);

868  for

(

size_t i

(0),

n

(phitrefs->size());

i

<

n

; ++

i

) {

870  const THitRef

& h ((*phitrefs)[

i

]);

871  const bool

valid (

true

);

874

pattern0.push_back(h->GetQueryMin());

875

pattern0.push_back(h->GetQueryMax());

876

pattern0.push_back(h->GetSubjMin());

877

pattern0.push_back(h->GetSubjMax());

878  const double

idty (h->GetIdentity());

879  const bool

imprf (idty < 1.00

880

|| h->GetQuerySpan() != h->GetSubjSpan()

881

|| h->GetMismatches() > 0

882

|| h->GetGaps() > 0);

883

imperfect.push_back(pair<bool,double>(imprf, idty));

884  if

(idty > max_idty) {

890  if

(max_idty < .85 && pattern0.size() >= 4) {

897  const size_t

dim (pattern0.size());

899  const char

* Seq1 (&

m_mrna

.front());

901  const char

* Seq2 (&

m_genomic

.front());

902  const size_t

SeqLen2 (

m_genomic

.size());

906  bool

some_error (

false

), bad_input (

false

);

909  for

(

size_t i

(0);

i

< dim;

i

+= 4) {

911  if

(pattern0[

i

] > pattern0[

i

+1] || pattern0[

i

+2] > pattern0[

i

+3]) {

912

ostr_err <<

"Pattern hits must be specified in plus strand"

;

913

some_error = bad_input =

true

;

918  if

(pattern0[

i

] <= pattern0[

i

-3] || pattern0[

i

+2] <= pattern0[

i

-1]) {

920

<<

string

(

" (hits not sorted)"

);

926  const bool

br1 (pattern0[

i

+1] >= SeqLen1);

927  const bool

br2 (pattern0[

i

+3] >= SeqLen2);

930

ostr_err <<

"Pattern hits out of range (" 932

<< phitrefs->front()->GetQueryId()->GetSeqIdString(

true

)

934

<< phitrefs->front()->GetSubjId()->GetSeqIdString(

true

)

938

ostr_err <<

"\tquery_pattern_max = "

<< pattern0[

i

+1]

939

<<

"; query_len = "

<< SeqLen1 << endl;

943

ostr_err <<

"\tsubj_pattern_max = "

<< pattern0[

i

+3]

944

<<

"; subj_len = "

<< SeqLen2 << endl;

954

ostr_err <<

"Pattern dimension must be a multiple of four"

;

955

some_error = bad_input =

true

;

959

ostr_err <<

" (query = " 960

<< phitrefs->front()->GetQueryId()->AsFastaString()

962

<< phitrefs->front()->GetSubjId()->AsFastaString() <<

')' 967  if

(err.size() > 0) {

977

map_elem.

m_box

[0] = map_elem.

m_box

[2] = 0;

982  for

(

size_t i

= 0;

i

< dim;

i

+= 4) {

984  size_t

L1, R1, L2, R2;

985  size_t

max_seg_size (0);

987  const bool

imprf (imperfect[

i

/4].

first

);

994  const size_t

len1 (pattern0[

i

+1] - pattern0[

i

] + 1);

995  const size_t

len2 (pattern0[

i

+3] - pattern0[

i

+2] + 1);

996  const size_t

maxlen (

max

(len1, len2));

997  const size_t

lendif (len1 < len2? len2 - len1: len1 - len2);

998  size_t

band (

size_t

((1 - imperfect[

i

/4].second) * maxlen) + 2);

999  if

(band < lendif) band += lendif;

1002

Seq2 + pattern0[

i

+2], len2,

1010

R1 = pattern0[

i

+1] - pattern0[

i

] - 1;

1012

R2 = pattern0[

i

+3] - pattern0[

i

+2] - 1;

1013

max_seg_size = R1 - L1 + 1;

1020  size_t

cut ((1 + R1 - L1) / 5);

1021  if

(cut > 20) cut = 20;

1023  const size_t

l1 (L1 + cut), l2 (L2 + cut);

1024  const size_t r1

(R1 - cut),

r2

(R2 - cut);

1025  if

(l1 <

r1

&& l2 <

r2

) {

1031  size_t

q0 (pattern0[

i

] + L1);

1032  size_t

s0 (pattern0[

i

+2] + L2);

1033  size_t

q1 (pattern0[

i

] + R1);

1034  size_t

s1 (pattern0[

i

+2] + R2);

1038  const size_t

hitlen_q (pattern0[

i

+ 1] - pattern0[

i

] + 1);

1039  const size_t

sh (

size_t

(hitlen_q / 4));

1041  size_t delta

(sh > L1? sh - L1: 0);

1045  const size_t

h2s_right (hitlen_q - R1 - 1);

1046  delta

= sh > h2s_right? sh - h2s_right: 0;

1050  if

(q0 > q1 || s0 > s1) {

1053

q0 = pattern0[

i

] + L1;

1054

s0 = pattern0[

i

+2] + L2;

1055

q1 = pattern0[

i

] + R1;

1056

s1 = pattern0[

i

+2] + R2;

1063  const size_t

pattern_dim =

m_pattern

.size();

1070

map_elem.

m_box

[1] = pattern0[

i

+1];

1071

map_elem.

m_box

[3] = pattern0[

i

+3];

1074

map_elem.

m_box

[1] = SeqLen1 - 1;

1075

map_elem.

m_box

[3] = SeqLen2 - 1;

1085  const string

strid (id->AsFastaString());

1095  if

(seq_data == 0) {

1102

vector<CRef<CSeq_loc> > orfs;

1103

vector<string> start_codon;

1104

start_codon.push_back(

"ATG"

);

1107  TSeqPos

max_len_plus (0), max_len_minus (0);

1108  TSeqPos

max_from_plus (0), max_from_minus (0);

1109  TSeqPos

max_to_plus (0), max_to_minus (0);

1113  const ENa_strand

orf_strand ((*orf)->GetInt().GetStrand());

1117  if

(

len

> max_len_minus) {

1118

max_len_minus =

len

;

1119

max_from_minus = (*orf)->GetInt().GetTo();

1120

max_to_minus = (*orf)->GetInt().GetFrom();

1124  if

(

len

> max_len_plus) {

1125

max_len_plus =

len

;

1126

max_from_plus = (*orf)->GetInt().GetFrom();

1127

max_to_plus = (*orf)->GetInt().GetTo();

1132  if

(max_len_plus > 0) {

1133

rv.first =

TOrf

(max_from_plus, max_to_plus);

1136  if

(max_len_minus > 0) {

1137

rv.second =

TOrf

(max_from_minus, max_to_minus);

1174  if

(h.

NotNull

() && h->GetQueryStrand() ==

false

) {

1183  if

(hitrefs.size() == 0) {

1189  THit::TId

id_query (hitrefs.front()->GetQueryId());

1192  if

(mrna_size == kMaxCoord) {

1194  string

(

"Sequence not found: "

) + id_query->AsFastaString());

1204

min_singleton_idty_final,

1208

comps.

Run

(hitrefs.begin(), hitrefs.end(),

GetScope

());

1210

comps.

Run

(hitrefs.begin(), hitrefs.end());

1213

pair<size_t,size_t> dim (comps.

GetCounts

());

1214  if

(dim.second > 0) {

1242  bool

same_strand (

false

);

1249  for

(

size_t i

(0);

i

< dim.first; ++

i

, box += 4) {

1251  if

(

i

+ 1 == dim.first) {

1253

same_strand =

false

;

1258

same_strand = strand_this == strand_next;

1259

smax = same_strand? (box + 4)[2]:

kMax_UInt

;

1264  if

(smax < box[3]) {

1267  "Unexpected order of compartments"

);

1272

comps.

Get

(

i

, comp_hits);

1274  if

(smax < box[3]) smax = box[3];

1275  if

(smin > box[2]) smin = box[2];

1299

smin = same_strand? box[3]: 0;

1327  THit::TId

id_query (phitrefs->front()->GetQueryId());

1381  const double

kMinPercAInPolya (0.80);

1384  for

(

size_t i

= polya_start;

i

<dim; ++

i

) {

1385  if

(seq[

i

] ==

'A'

) ++

cnt

;

1387  if

(

cnt

>= (dim - polya_start)*kMinPercAInPolya)

return true

;

1394  const size_t

kMaxNonA (3), kMinAstreak (5);

1395  Int8 i

(dim - 1), i0 (dim);

1396  for

(

size_t

count_non_a (0), astreak (0);

i

>= 0 && count_non_a < kMaxNonA; --

i

) {

1398  if

(seq[

i

] !=

'A'

) {

1403  if

(++astreak >= kMinAstreak) {

1409  const size_t len

(dim - i0);

1411  if

(

len

>= kMinAstreak) {

1413  if

(0 < cds_stop && cds_stop < dim && rv <= cds_stop) {

1439  if

(range_left > range_right) {

1443  if

(phitrefs->size() == 0) {

1452  for

(

size_t i

(0),

n

(phitrefs->size());

i

<

n

; ++

i

) {

1457  const bool

new_strand (!(h->GetSubjStrand()));

1458

h->SetQueryStart(a1);

1459

h->SetQueryStop(a0);

1460

h->SetSubjStrand(new_strand);

1474

THitRefs::iterator ii (phitrefs->begin()), jj (phitrefs->end() - 1);

1477  bool

b0 (

true

), b1 (

true

);

1478  while

(b0 && b1 && ii < jj) {

1480  while

(ii->IsNull() && ii < jj) ++ii;

1481  while

(jj->IsNull() && ii < jj) --jj;

1485  const double

hit_idty ((*ii)->GetIdentity());

1486  const size_t

min_termhitlen (

1487

hit_idty < .9999? min_termhitlen2: min_termhitlen1);

1489  if

((*ii)->GetQuerySpan() < min_termhitlen) {

1499  const double

hit_idty ((*jj)->GetIdentity());

1500  const size_t

min_termhitlen (

1501

hit_idty < .9999? min_termhitlen2: min_termhitlen1);

1503  if

((*jj)->GetQuerySpan() < min_termhitlen) {

1512

phitrefs->erase(

remove_if

(phitrefs->begin(), phitrefs->end(),

1516  if

(phitrefs->size() == 0) {

1525  THit::TCoord

qmin (span[0]), qmax (span[1]), smin (span[2]), smax (span[3]);

1527  const bool

ctg_strand (phitrefs->front()->GetSubjStrand());

1536  THit::TCoord

fixed_left (kMaxCoord / 2), fixed_right(fixed_left);

1538  const size_t

kTermLenCutOff_m2 (10);

1539  const bool

fix_left (qmin <= kTermLenCutOff_m2);

1540  const bool

fix_right (rspace <= kTermLenCutOff_m2);

1541  if

(fix_left || fix_right) {

1543  if

(phitrefs->size() > 1) {

1547  THit::TCoord

prev_start (phitrefs->front()->GetSubjStart());

1553

cur_start - prev_start:

1554

prev_start - cur_start);

1555  if

(intron > max_intron) {

1556

max_intron = intron;

1558

prev_start = cur_start;

1561  const double

factor (2.5);

1562  if

(fix_left) { fixed_left =

THit::TCoord

(max_intron * factor); }

1563  if

(fix_right) { fixed_right =

THit::TCoord

(max_intron * factor); }

1568  if

(fix_left) { fixed_left = single_hit_extent; }

1569  if

(fix_right) { fixed_right = single_hit_extent; }

1576  const THit::TCoord

extent_left (

min

(extent_left_m1, extent_left_m2));

1581  if

(extent_right < poly_length) extent_right = poly_length;

1584

smin =

max

(0,

int

(smin - extent_left));

1585

smax += extent_right;

1588

smin =

max

(0,

int

(smin - extent_right));

1589

smax += extent_left;

1597  if

(smin < range_left) {

1600  if

(smax > range_right) {

1605  if

(phitrefs->size() > 1) {

1606  THit::TId

id_query (phitrefs->front()->GetSubjId());

1608

tmp_id->

Assign

(*id_query);

1613  if

(hitmin > smin) {

1617  TSeqPos

tmplen = hitmin - smin;

1619  for

(;smit; ++smit) {

1624  _ASSERT

( smin + pos <= hitmin );

1632  if

(smax > hitmax) {

1636  TSeqPos

tmplen = smax - hitmax;

1638  for

(;smit; ++smit) {

1643  _ASSERT

( hitmax + pos < smax );

1644

smax = hitmax + pos;

1656

smin, smax,

true

,

true

, ctg_strand);

1661  if

(smax >= ctg_end) {

1662

smax = ctg_end > 0? ctg_end - 1: 0;

1665  if

(ctg_strand ==

false

) {

1680  if

(!(smin <= hsmin && hsmax <= smax)) {

1682

ostr <<

"\nOne of compartment hits:\n"

<< *h

1683

<<

"\n goes outside the genome range = ("

<< smin+1 <<

", "

<< smax+1 <<

')' 1684

<<

"\n allowed for the compartment"

;

1689  if

(ctg_strand ==

false

) {

1693

h->SetSubjStart(a2);

1703

(*ii)->Shift(-(

Int4

)qmin, -(

Int4

)smin);

1722  Int8

last_exon = -1;

1731  if

(last_exon == -1) {

1739  const char

* p0 = &

m_mrna

.front() + s.

m_box

[1] + 1;

1741  const char

* p = p0;

1742  const char

* q = q0;

1743  const char

* pe = &

m_mrna

.front() + mrna_size;

1747  size_t

sh = 0,

ct

=0;

1748  for

(; p < pe && q < qe; ++p, ++q, ++

ct

) {

1749  if

(

toupper

(*p) !=

'N'

&& *p == *q) {

1764  for

(;p>=p0;--p,--q,++

ct

) {

1765  if

(

toupper

(*p) !=

'N'

&& *p == *q) {

1768  if

( match_num <

ct

*kMinExonFlankIdty) {

1781  for

(

ct

= 0,p = p0, q = q0;

ct

< sh; ++p, ++q, ++

ct

) {

1782  if

(

toupper

(*p) !=

'N'

&& *p == *q) {

1791  const size_t

ann_dim = s.

m_annot

.size();

1792  if

(ann_dim > 2 && s.

m_annot

[ann_dim - 3] ==

'>'

) {

1793

s.

m_annot

[ann_dim - 2] = q < qe? *q:

' '

;

1794

s.

m_annot

[ann_dim - 1] = q < (qe-1)? *(q+1):

' '

;

1804  if

(coord < mrna_size ) {

1809  if

( ( (

int

)mrna_size - (

int

)s.

m_box

[1] - 1 ) >= kFlankExonProx &&

1811  int

seq1_pos = (

int

)s.

m_box

[1];

1812  int

seq2_pos = (

int

)s.

m_box

[3];

1813  size_t

det_pos = s.

m_details

.size() - 1;

1814  size_t

min_det_pos = det_pos - kMaxCutToSplice;

1815  int

min_pos = (

int

)s.

m_box

[0] + 8;

1816  while

(seq1_pos >= min_pos && det_pos >= min_det_pos) {

1817  if

( (

size_t

)(seq2_pos + 2) <

m_genomic

.size() && s.

m_details

[det_pos] ==

'M'

&&

1819  if

( det_pos + 1 < s.

m_details

.size() ) {

1820

s.

m_box

[1] = seq1_pos;

1821

s.

m_box

[3] = seq2_pos;

1825  size_t

adim = s.

m_annot

.size();

1826  if

(adim > 0 && s.

m_annot

[adim-1] ==

'>'

) {

1828

}

else if

(adim > 2 && s.

m_annot

[adim-3] ==

'>'

) {

1858

ss.

m_box

[0] = coord;

1859

ss.

m_box

[1] = mrna_size - 1;

1871

mcount += jj->m_idty * jj->m_len;

1875  const size_t

min_singleton_idty_final (

1878  if

(mcount < min_singleton_idty_final) {

1886

jj->m_box[0] += qmin;

1887

jj->m_box[1] += qmin;

1890

jj->m_box[0] = mrna_size - jj->m_box[0] - 1;

1891

jj->m_box[1] = mrna_size - jj->m_box[1] - 1;

1896

jj->m_box[2] += smin;

1897

jj->m_box[3] += smin;

1900

jj->m_box[2] = smax - jj->m_box[2];

1901

jj->m_box[3] = smax - jj->m_box[3];

1914  bool

severe (

true

);

1938

TSegmentVector segments;

1941 #ifdef DBG_DUMP_PATTERN 1942

cerr <<

"Pattern:"

<< endl;

1945  const size_t

map_dim (

m_alnmap

.size());

1951  size_t

cds_start (0), cds_stop (0);

1952  for

(

size_t i

(0);

i

< map_dim; ++

i

) {

1957  const size_t

len1 (zone.

m_box

[1] - zone.

m_box

[0] + 1);

1958  const size_t

len2 (zone.

m_box

[3] - zone.

m_box

[2] + 1);

1971

Seq2 + zone.

m_box

[2], len2,

1975

vector<size_t> pattern;

1980  "CSplign::x_Run(): Invalid alignment pattern"

);

1985

back_inserter(pattern));

1988  for

(

size_t

j (0), pt_dim (pattern.size()); j < pt_dim; j += 4) {

1990 #ifdef DBG_DUMP_PATTERN 1991

cerr << (1 + pattern[j]) <<

'\t'

<< (1 + pattern[j+1]) <<

'\t' 1992

<<

"(len = "

<< (pattern[j+1] - pattern[j] + 1) <<

")\t" 1993

<< (1 + pattern[j+2]) <<

'\t'

<< (1 + pattern[j+3])

1994

<<

"(len = "

<< (pattern[j+3] - pattern[j+2] + 1) <<

")\t" 1996 #undef DBG_DUMP_PATTERN 1999

pattern[j] -= zone.

m_box

[0];

2000

pattern[j+1] -= zone.

m_box

[0];

2001

pattern[j+2] -= zone.

m_box

[2];

2002

pattern[j+3] -= zone.

m_box

[2];

2007  m_aligner

->SetEndSpaceFree(

true

,

true

,

true

,

true

);

2008  m_aligner

->SetCDS(cds_start, cds_stop);

2014 #ifdef DBG_DUMP_TYPE2 2021 #undef DBG_DUMP_TYPE2 2028  if

(

i

+ 1 < map_dim) {

2031  g

.m_box[0] = zone.

m_box

[1] + 1;

2033  g

.m_box[2] = zone.

m_box

[3] + 1;

2041 #ifdef DUMP_ORIG_SEGS 2042

cerr <<

"Orig segments:"

<< endl;

2043  ITERATE

(TSegmentVector, ii, segments) {

2044

cerr << ii->m_exon <<

'\t'

<< ii->m_idty <<

'\t'

<< ii->m_len <<

'\t' 2045

<< ii->m_box[0] <<

'\t'

<< ii->m_box[1] <<

'\t' 2046

<< ii->m_box[2] <<

'\t'

<< ii->m_box[3] <<

'\t' 2047

<< ii->m_annot <<

'\t'

<< ii->m_score << endl;

2051  if

(segments.size() == 0) {

2057  const size_t

SeqLen2 (

m_genomic

.size());

2079  bool

is_test =

false

;

2080  bool

is_test_plus =

false

;

2085

is_test_plus =

true

;

2094

TSegmentVector::iterator

prev

;

2096  if

(ii->m_exon ==

false

)

continue

;

2100  if

(

prev

->m_exon) {

2130  bool

abuts_gap =

x_IsInGap

( ii->m_box[2] - 1 );

2135  if

( ii->m_box[0] >

prev

->m_box[1] + 1) {

2139

sgap.

m_box

[1] = ii->m_box[0] - 1;

2140

sgap.

m_box

[3] = ii->m_box[2] - 1;

2142

ii = segments.insert(ii, sgap);

2157  bool

continue_iterations =

false

;

2165  if

(segments.size() == 0) {

2169  size_t

exon_count0 (0);

2170  ITERATE

(TSegmentVector, ii, segments) {

2171  if

(ii->m_exon) ++exon_count0;

2176  bool

first_exon =

true

;

2178  for

(

size_t

k0 = 0; k0 < segments.size(); ++k0) {

2184

first_exon =

false

;

2187

}

else if

( !segments[k0-1].m_exon ) {

2201  if

(last_exon ==

NULL

) {

2211

TSegmentVector tmp_segments;

2214  int

prev_exon_index = -1;

2215  for

(

size_t

k0 = 0; k0 < segments.size(); ++k0) {

2216  if

(segments[k0].m_exon) {

2217  if

(prev_exon_index == -1) {

2218  if

(segments[k0].m_box[0] > 0) {

2220  g

.m_box[1] = segments[k0].m_box[0] - 1;

2222  g

.m_box[3] = segments[k0].m_box[2] - 1;

2223  g

.m_len =

g

.m_box[1] -

g

.m_box[0] + 1;

2224

tmp_segments.push_back(

g

);

2228  if

( segments[prev_exon_index].m_box[1] + 1 < segments[k0].m_box[0] ) {

2229  g

.m_box[0] = segments[prev_exon_index].m_box[1] + 1;

2230  g

.m_box[1] = segments[k0].m_box[0] - 1;

2231  g

.m_box[2] = segments[prev_exon_index].m_box[3] + 1;

2232  g

.m_box[3] = segments[k0].m_box[2] - 1;

2233  g

.m_len =

g

.m_box[1] -

g

.m_box[0] + 1;

2234

tmp_segments.push_back(

g

);

2237

prev_exon_index = (

int

)k0;

2238

tmp_segments.push_back(segments[k0]);

2243  if

(prev_exon_index >= 0) {

2244  if

(segments[prev_exon_index].m_box[1] + 1 < SeqLen1) {

2245  g

.m_box[0] = segments[prev_exon_index].m_box[1] + 1;

2246  g

.m_box[1] = SeqLen1 - 1;

2247  g

.m_box[2] = segments[prev_exon_index].m_box[3] + 1;

2248  g

.m_box[3] = SeqLen2 - 1;

2249  g

.m_len =

g

.m_box[1] -

g

.m_box[0] + 1;

2250

tmp_segments.push_back(

g

);

2255

segments.swap(tmp_segments);

2266  while

(k0 < segments.size()) {

2272  const double

min_idty (

len

>= kMinTermExonSize?

2276  if

(s.

m_idty

>= min_idty) {

2283  long int

k1 (segments.size() - 1);

2284  while

(k1 >= (

int

)k0) {

2290  const double

min_idty (

len

>= kMinTermExonSize?

2294  if

(s.

m_idty

>= min_idty) {

2308

ii->ImproveFromLeft1(Seq1, Seq2,

m_aligner

);

2319

ii->ImproveFromRight1(Seq1, Seq2,

m_aligner

);

2327  for

(

unsigned int

k0 = 0; k0 < segments.size(); ++k0) {

2328  if

(!segments[k0].m_exon) {

2329  if

( k0 > 0 && segments[k0-1].m_exon) {

2332  if

(

x_IsInGap

(segments[k0-1].m_box[3] + 1) ||

2336  if

( ( (

int

)SeqLen1 - (

int

)segments[k0-1].m_box[1] - 1 ) >= kFlankExonProx ) {

2343

segments[k0-1].ImproveFromRight1(Seq1, Seq2,

m_aligner

);

2346

segments[k0-1].ImproveFromRight(Seq1, Seq2,

m_aligner

);

2349  if

( k0 + 1 < segments.size() && segments[k0+1].m_exon) {

2352  if

(

x_IsInGap

(segments[k0+1].m_box[2] - 1) ||

2356  if

( (

int

)segments[k0+1].m_box[0] >= kFlankExonProx ) {

2363

segments[k0+1].ImproveFromLeft1(Seq1, Seq2,

m_aligner

);

2366

segments[k0+1].ImproveFromLeft(Seq1, Seq2,

m_aligner

);

2375  if

( segments.size() == 0 ) {

2380  if

(segments[0].m_box[0] > 0) {

2384  g

.m_box[1] = segments[0].m_box[0] - 1;

2386  g

.m_box[3] = segments[0].m_box[2] - 1;

2388

segments.insert(segments.begin(),

g

);

2392  TSegment

& seg_last (segments.back());

2393  if

(seg_last.

m_box

[1] + 1 < SeqLen1) {

2396  g

.m_box[0] = seg_last.

m_box

[1] + 1;

2397  g

.m_box[1] = SeqLen1 - 1;

2398  g

.m_box[2] = seg_last.

m_box

[3] + 1;

2399  g

.m_box[3] = SeqLen2 - 1;

2401

segments.push_back(

g

);

2408  bool

first_exon =

true

;

2414  if

(ii->IsLowComplexityExon(Seq1) ) {

2417

first_exon =

false

;

2424  if

( last_exon != 0 ) {

2434  if

(ii->m_exon ==

false

)

continue

;

2442

sl.ImproveFromLeft1(Seq1, Seq2,

m_aligner

);

2445  if

( sl.m_details == ii->m_details && sr.

m_details

== ii->m_details ) {

2451  if

( sr.

m_details

!= ii->m_details && ii != segments.begin() && (ii-1)->m_exon && ( (ii+1) == segments.end() || !(ii+1)->m_exon ) ) {

2454

}

else if

( sl.m_details != ii->m_details && (ii+1) != segments.end() && (ii+1)->m_exon && ( ii == segments.begin() || !(ii-1)->m_exon) ) {

2456

}

else if

(sl.m_details == ii->m_details ||

2463  if

(sl.m_details != ii->m_details || sr.

m_details

!= ii->m_details){

2464  if

(sl.m_details == ii->m_details ||

2473  if

(ii != segments.begin() && (ii)->m_box[0] > (ii - 1)->m_box[1] + 1) {

2475

sgap.

m_box

[0] = (ii - 1)->m_box[1] + 1;

2476

sgap.

m_box

[2] = (ii - 1)->m_box[3] + 1;

2477

sgap.

m_box

[1] = ii->m_box[0] - 1;

2478

sgap.

m_box

[3] = ii->m_box[2] - 1;

2480

ii = segments.insert(ii, sgap);

2481

continue_iterations =

true

;

2484  if

( (ii+1) != segments.end() && (ii+1)->m_box[0] > ii->m_box[1] + 1 ) {

2487

sgap.

m_box

[0] = (ii - 1)->m_box[1] + 1;

2488

sgap.

m_box

[2] = (ii - 1)->m_box[3] + 1;

2489

sgap.

m_box

[1] = ii->m_box[0] - 1;

2490

sgap.

m_box

[3] = ii->m_box[2] - 1;

2492

ii = segments.insert(ii, sgap);

2493

continue_iterations =

true

;

2500

}

else if

(ii->m_idty < .9 && ii->m_len < 20) {

2502  bool

nc_prev (

false

), nc_next (

false

);

2503  if

(ii != segments.begin() && (ii - 1)->m_exon) {

2508  if

( (ii+1) != segments.end() && (ii + 1)->m_exon) {

2511

(ii + 1)->GetAcceptor());

2513  if

( nc_prev || nc_next ) {

2524  for

(

size_t

k (0); k < segments.size(); ++k) {

2526  if

(s.

m_exon

==

false

)

continue

;

2528  if

( ( k == 0 ) || ( ! segments[k-1].m_exon ) ) {

2530  if

( ( k + 1 == segments.size() ) || ( ! segments[k+1].m_exon ) ) {

2546  for

(

size_t

k (0); k < segments.size(); ++k) {

2548  if

(s.

m_exon

==

false

)

continue

;

2550  bool

drop (

false

);

2560  if

( (

int

)s.

m_box

[0] >= kFlankExonProx ) {

2563  if

(adj ==

eNo

) adj = eSoft;

2567  if

( k + 1 == segments.size() ) {

2568  if

( ( (

int

)SeqLen1 - (

int

)s.

m_box

[1] - 1 ) >= kFlankExonProx ) {

2571  if

(adj ==

eNo

) adj = eSoft;

2575  if

(k > 0 && ( ! segments[k-1].m_exon ) ) {

2576  if

( (

int

)s.

m_box

[0] >= kFlankExonProx ) {

2579  if

(adj ==

eNo

) adj = eSoft;

2583  if

(k + 1 < segments.size() && (! segments[k+1].m_exon ) ) {

2584  if

( ( (

int

)SeqLen1 - (

int

)s.

m_box

[1] - 1 ) >= kFlankExonProx ) {

2587  if

(adj ==

eNo

) adj = eSoft;

2595

}

else if

(adj == eHard) {

2596  if

( s.

m_len

< 20 ) {

2599  if

( s.

m_idty

< kMinTermExonIdty && s.

m_len

< kMinTermExonSize ) {

2613  size_t

exon_count (0);

2615  for

(

size_t i

= 0;

i

< segments.size(); ++

i

) {

2618

term_segs[exon_count] = &s;

2619  if

(++exon_count == 2) {

2625  if

(exon_count == 2) {

2632  size_t

exon_count (0);

2634  for

(

Int8 i

= segments.size() - 1;

i

>= 0; --

i

) {

2637

term_segs[exon_count] = &s;

2638  if

(++exon_count == 2) {

2644  if

(exon_count == 2) {

2650  bool

gap_prev (

false

);

2651  for

(

size_t

k (0); k < segments.size(); ++k) {

2654  if

(s.

m_exon

==

false

) {

2658  size_t

length (s.

m_box

[1] - s.

m_box

[0] + 1);

2659  bool

gap_next (

false

);

2660  if

(k + 1 < segments.size()) {

2661

gap_next = !segments[k+1].m_exon;

2663  if

(length <= 10 && (gap_prev || gap_next)) {

2672  int

gap_start_idx (-1);

2673  if

(segments.size() && segments[0].m_exon ==

false

) {

2677  for

(

size_t

k (0); k < segments.size(); ++k) {

2680  if

(gap_start_idx == -1) {

2681

gap_start_idx =

int

(k);

2683

s.

m_box

[0] = segments[k-1].m_box[1] + 1;

2684

s.

m_box

[2] = segments[k-1].m_box[3] + 1;

2689  if

(gap_start_idx >= 0) {

2691  g

.m_box[1] = s.

m_box

[0] - 1;

2692  g

.m_box[3] = s.

m_box

[2] - 1;

2693  g

.m_len =

g

.m_box[1] -

g

.m_box[0] + 1;

2694  g

.m_details.resize(0);

2702  if

(gap_start_idx >= 0) {

2704  g

.m_box[1] = segments[segments.size()-1].m_box[1];

2705  g

.m_box[3] = segments[segments.size()-1].m_box[3];

2706  g

.m_len =

g

.m_box[1] -

g

.m_box[0] + 1;

2707  g

.m_details.resize(0);

2711  size_t

exon_count1 (0);

2713  if

(ii->m_exon) ++exon_count1;

2716  if

(exon_count1 == 0 ) {

2720  if

(exon_count0 == exon_count1 && continue_iterations ==

false

)

break

;

2729  bool

first_exon =

true

;

2731  int

last_exon_index = -1;

2732  for

(

size_t

k0 = 0; k0 < sdim; ++k0) {

2734

last_exon_index = (

int

)k0;

2737  for

(

unsigned int

k0 = 0; k0 < sdim; ++k0) {

2739  bool

cut_from_left =

false

;

2740  bool

cut_from_right =

false

;

2746

first_exon =

false

;

2749  if

( (

int

)s.

m_box

[0] >= kFlankExonProx ) {

2750

cut_from_left =

true

;

2752

first_exon =

false

;

2754

cut_from_left =

true

;

2762  if

( last_exon_index == (

int

)k0 ) {

2763  if

( ( (

int

)SeqLen1 - (

int

)s.

m_box

[1] - 1 ) >= kFlankExonProx ) {

2764

cut_from_right =

true

;

2766

}

else if

(k0 + 1 < sdim && (!

m_segments

[k0+1].m_exon ) ) {

2767

cut_from_right =

true

;

2771  if

(cut_from_left) {

2772  int

seq1_pos = (

int

)s.

m_box

[0];

2773  int

seq2_pos = (

int

)s.

m_box

[2];

2775  int

max_pos = (

int

)s.

m_box

[1] - 8;

2776  while

(seq1_pos <= max_pos && det_pos <= kMaxCutToSplice) {

2777  if

( seq2_pos > 1 && s.

m_details

[det_pos] ==

'M'

&&

2778  toupper

(Seq2[seq2_pos-2]) ==

'A'

&&

toupper

(Seq2[seq2_pos-1]) ==

'G'

) {

2780

s.

m_box

[0] = seq1_pos;

2781

s.

m_box

[2] = seq2_pos;

2791  if

( k0>0 && ( !

m_segments

[k0-1].m_exon ) ) {

2793  g

.m_box[1] = s.

m_box

[0] - 1;

2794  g

.m_box[3] = s.

m_box

[2] - 1;

2795  g

.m_len =

g

.m_box[1] -

g

.m_box[0] + 1;

2821  if

(cut_from_right) {

2822  int

seq1_pos = (

int

)s.

m_box

[1];

2823  int

seq2_pos = (

int

)s.

m_box

[3];

2824  size_t

det_pos = s.

m_details

.size() - 1;

2825  size_t

min_det_pos = det_pos - kMaxCutToSplice;

2826  int

min_pos = (

int

)s.

m_box

[0] + 8;

2827  while

(seq1_pos >= min_pos && det_pos >= min_det_pos) {

2828  if

( (

size_t

)(seq2_pos + 2) <

m_genomic

.size() && s.

m_details

[det_pos] ==

'M'

&&

2829  toupper

(Seq2[seq2_pos+1]) ==

'G'

&&

toupper

(Seq2[seq2_pos+2]) ==

'T'

) {

2830  if

( det_pos + 1 < s.

m_details

.size() ) {

2831

s.

m_box

[1] = seq1_pos;

2832

s.

m_box

[3] = seq2_pos;

2836  size_t

adim = s.

m_annot

.size();

2837  if

(adim > 0 && s.

m_annot

[adim-1] ==

'>'

) {

2839

}

else if

(adim > 2 && s.

m_annot

[adim-3] ==

'>'

) {

2843  if

( k0 + 1 < sdim && ( !

m_segments

[k0+1].m_exon ) ) {

2845  g

.m_box[0] = s.

m_box

[1] + 1;

2846  g

.m_box[2] = s.

m_box

[3] + 1;

2847  g

.m_len =

g

.m_box[1] -

g

.m_box[0] + 1;

2879  bool

adjust =

false

;

2880  bool

prev_exon =

false

;

2882  for

(

size_t

pp = 0; pp <ssize ; ++pp) {

2884  if

( !prev_exon && ( pp == ssize - 1 || !

m_segments

[pp+1].m_exon ) ) {

2903  if

( min_hole_len > 0) {

2906  for

(; pos2 <

m_segments

.size(); ++pos1, ++pos2) {

2919  bool

cut_to_codons =

true

;

2920  if

( cut_to_codons ) {

2932 #ifdef DUMP_PROCESSED_SEGS 2933

cerr <<

"Processed segments:"

<< endl;

2935

cerr << ii->m_box[0] <<

'\t'

<< ii->m_box[1] <<

'\t' 2936

<< ii->m_box[2] <<

'\t'

<< ii->m_box[3] <<

'\t' 2937

<< ii->m_annot <<

'\t'

<< ii->m_score << endl;

2949  for

(

size_t i

(0), dim (

m_Segments

.size());

i

< dim; ++

i

) {

2955

trans.append(s.

m_len

,

'D'

);

2959  ITERATE

(

string

, ii, trans) {

2964  return

double(matches) / trans.size();

2972

box[1] = box[3] = 0;

2987

box[0] =

static_cast<unsigned int>

(

a

);

2990

box[1] =

static_cast<unsigned int>

(

b

);

3002

box[2] =

static_cast<unsigned int>

(

a

);

3005

box[3] =

static_cast<unsigned int>

(

b

);

3014  bool

turn2gap (

false

);

3016  const size_t

exon_size (1 + term_segs[0]->m_box[1] -

3017

term_segs[0]->m_box[0]);

3019  const double

idty (term_segs[0]->m_idty);

3023  if

(exon_size < kMinTermExonSize && idty < kMinTermExonIdty ) {

3028  if

(exon_size < kMinTermExonSize) {

3033  const char

*dnr, *acc;

3035  a

= term_segs[0]->

m_box

[3];

3036  b

= term_segs[1]->

m_box

[2];

3041  a

= term_segs[1]->

m_box

[3];

3042  b

= term_segs[0]->

m_box

[2];

3047  const size_t

intron_len (

b

-

a

);

3051  size_t

max_ext ((idty < .96 || !consensus || exon_size < 16)?

3055  if

(exon_size < 8) {

3056

max_ext = 10 * exon_size;

3059  else if

(exon_size < 16) {

3064  if

(intron_len > max_intron_len) {

3074

s.

m_len

= exon_size;

3088  if

(query_len >= kNonCoveredEndThreshold) {

3092  const double

k (pow(kNonCoveredEndThreshold, - 1. / kPower) * max_ext);

3093  const double

drv (k * pow(query_len, 1. / kPower));

3105  template

<

typename

T>

3108

*(

reinterpret_cast<T

*

>

(p)) =

n

;

3115  copy

(s.begin(), s.end(), p);

3120  template

<

typename

T>

3123  n

= *(

reinterpret_cast<const T

*

>

(p));

3144  const size_t total_size

=

sizeof

m_exon +

sizeof

m_idty +

3145  sizeof

m_len +

sizeof

m_box + m_annot.size() + 1 +

3146

m_details.size() + 1 +

sizeof

m_score;

3150  char

* p = &target->front();

3154  for

(

size_t i

= 0;

i

< 4; ++

i

) {

3167  const size_t

min_size =

sizeof

m_exon +

sizeof

m_idty +

sizeof

m_len +

3168

+

sizeof

m_box + 1 + 1 +

sizeof

m_score;

3170  if

(

source

.size() < min_size) {

3174  const char

* p = &

source

.front();

3179  for

(

size_t i

= 0;

i

< 4; ++

i

) {

3197  const size_t

core_size (

3198  sizeof

m_Id +

sizeof

m_Status + m_Msg.size() + 1

3199

+

sizeof

m_QueryStrand +

sizeof

m_SubjStrand

3200

+

sizeof

m_Cds_start +

sizeof

m_Cds_stop

3205

vector<char> core (core_size);

3207  char

* p = &core.front();

3219  typedef

vector<TNetCacheBuffer> TBuffers;

3220

TBuffers vb (m_Segments.size());

3223

ii->ToBuffer(&vb[ibuf++]);

3226  size_t total_size

(core_size +

sizeof

(

size_t

) * m_Segments.size());

3232

TNetCacheBuffer::iterator it = target->begin();

3233  copy

(core.begin(), core.end(), it);

3238  const size_t

seg_buf_size = ii->size();

3239

*((

size_t

*)p) = seg_buf_size;

3240

it +=

sizeof

(size_t);

3241  copy

(ii->begin(), ii->end(), it);

3251  const size_t

min_size (

3255

+

sizeof

m_QueryStrand +

sizeof

m_SubjStrand

3256

+

sizeof

m_Cds_start +

sizeof

m_Cds_stop

3261  if

(

source

.size() < min_size) {

3265  const char

* p (&

source

.front());

3277  const char

* pe (&

source

.back());

3279  size_t

seg_buf_size (0);

3281

m_Segments.push_back(

TSegment

());

3282  TSegment

& seg (m_Segments.back());

3295  bool

valid_input (sas.

GetPointer

() && sas->CanGet() && sas->Get().size()

3296

&& sas->Get().front()->CanGetSegs()

3297

&& sas->Get().front()->GetSegs().IsSpliced()

3298

&& sas->Get().front()->GetSegs().GetSpliced().GetProduct_type()

3304  "CSplign::s_ComputeStats(): Invalid input"

);

3307

output_stats->resize(0);

3311

output_stats->push_back(ss);

3314  return

output_stats->size();

3319  const int

kFrame_not_set (-10);

3320  const int

kFrame_end (-5);

3321  const int

kFrame_lost (-20);

3326  bool

embed_scoreset,

3332  "CSplign::s_ComputeStats(): mode not yet supported."

);

3335  const bool

cds_stats ((

flags

&

eSF_BasicCds

) && (cds.first + cds.second > 0));

3344

scores.assign(score_vec.begin(), score_vec.end());

3350  const

TSpliced & spliced (sa->GetSegs().GetSpliced());

3353  "CSplign::s_ComputeStats(): Unsupported product type"

);

3358  const bool

cds_strand (cds.first < cds.second);

3359  if

(qstrand ^ cds_strand) {

3361  "CSplign::s_ComputeStats(): Transcript orientation not " 3362  "matching specified CDS orientation."

);

3366  typedef

TSpliced::TExons TExons;

3367  const

TExons & exons (spliced.GetExons());

3369  const TSeqPos

qlen (spliced.GetProduct_length());

3370  const TSeqPos

polya (spliced.CanGetPoly_a()?

3371

spliced.GetPoly_a(): (qstrand? qlen:

TSeqPos

(-1)));

3376  ITERATE

(TExons, ii2, exons) {

3378  const

TExon & exon (**ii2);

3379  const TSeqPos

qmin (exon.GetProduct_start().GetNucpos()),

3380

qmax (exon.GetProduct_end().GetNucpos());

3382  const TSeqPos

qgap (qstrand? qmin - qprev - 1: qprev - qmax - 1);

3385  if

(cds_stats) xcript.append(qgap,

'X'

);

3388  typedef

TExon::TParts TParts;

3389  const

TParts & parts (exon.GetParts());

3391  ITERATE

(TParts, ii3, parts) {

3398  if

(cds_stats) xcript.append(

len

,

'M'

);

3402  if

(cds_stats) xcript.append(

len

,

'R'

);

3406  if

(cds_stats) xcript.append(

len

,

'D'

);

3410  if

(cds_stats) xcript.append(

len

,

'I'

);

3413

errmsg =

"Unexpected spliced exon chunk part: " 3419

qprev = qstrand? qmax: qmin;

3422  const TSeqPos

qgap (qstrand? polya - qprev - 1: qprev - polya - 1);

3423  if

(cds_stats) xcript.append(qgap,

'X'

);

3425  if

(!qstrand && qlen <= 0) {

3427  "CSplign::s_ComputeStats(): Cannot compute " 3428  "inframe stats - transcript length not set."

);

3431  int

qpos (qstrand? -1:

int

(qlen));

3432  int

qinc (qstrand? +1: -1);

3433  int

frame (kFrame_not_set);

3434  size_t

aln_length_cds (0);

3435  int

matches_frame[] = {0, 0, 0, 0, 0};

3436  const Int8

cds_start (cds.first), cds_stop (cds.second);

3437  for

(string::const_iterator ie (xcript.end()), ii(xcript.begin());

3438

ii != ie && frame != kFrame_end; ++ii)

3445  if

(frame == kFrame_not_set && qpos == cds_start) frame = 0;

3446  if

(qpos == cds_stop) frame = kFrame_end;

3449

++matches_frame[frame + 2];

3455  if

(frame == kFrame_not_set && qpos == cds_start) frame = 0;

3456  if

(qpos == cds_stop) frame = kFrame_end;

3457  if

(frame >= -2) ++aln_length_cds;

3462  if

(frame == kFrame_not_set && qpos == cds_start) frame = 0;

3463  if

(qpos == cds_stop) frame = kFrame_end;

3466

frame = (frame + 1) % 3;

3473

frame = (frame - 1) % 3;

3479  if

( (qstrand && cds_start <= qpos && qpos < cds_stop) ||

3480

(!qstrand && cds_start >= qpos && qpos > cds_stop) )

3482

frame = kFrame_lost;

3492

score_matches_inframe->SetValue().SetInt(matches_frame[2]);

3493

scores.push_back(score_matches_inframe);

3499

score_inframe_identity->SetValue().

3500

SetReal(

double

(matches_frame[2]) / aln_length_cds);

3501

scores.push_back(score_inframe_identity);

3506  if

(embed_scoreset) {

3508

sa_score.resize(scores.size());

3509  copy

(scores.begin(), scores.end(), sa_score.begin());

@ eExtreme_Positional

numerical value

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

User-defined methods of the data storage class.

void remove_if(Container &c, Predicate *__pred)

void transform(Container &c, UnaryFunction *op)

void Run(typename THitRefs::iterator start, typename THitRefs::iterator finish, CScope *scope=NULL, const vector< pair< TCoord, TCoord > > *gaps=NULL)

Execute: identify compartments.

bool GetStrand(size_t i) const

void SetMaxIntron(TCoord mi)

Assign the maximum intron length, in base pairs.

void Get(size_t idx, THitRefs &compartment) const

Retrieve a compartment by index.

const TCoord * GetBox(size_t i) const

bool GetStatus(size_t i) const

pair< size_t, size_t > GetCounts(void) const

Retrieve the compartment counts.

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

static void FindOrfs(const string &seq, TLocVec &results, unsigned int min_length_bp=3, int genetic_code=1, const vector< string > &allowable_starts=vector< string >(), bool longest_orfs=true, size_t max_seq_gap=k_default_max_seq_gap)

Find ORFs in both orientations.

bool IntersectingWith(const TRange &r) const

void AddSplignScores(const CSeq_align &align, CSeq_align::TScore &scores)

Compute the six splign scores.

void ImproveFromLeft(TSeg &s)

void Cut50FromLeft(TSeg &s)

static bool HasAbuttingExonOnLeft(TSegs segments, TSeqPos p)

void Cut50FromRight(TSeg &s)

void TrimHolesToCodons(TSegs &segments, objects::CBioseq_Handle &mrna_bio_handle, bool mrna_strand, TSeqPos mrna_len)

void ImproveFromRight(TSeg &s)

bool ThrowAway20_28_90(TSeg &s)

void JoinExons(TSegs &segments, TSeqPos p1, TSeqPos p2)

void AdjustGaps(TSegs &segments)

static bool HasAbuttingExonOnRight(TSegs segments, TSeqPos p)

void SetMismatchScore(int score)

void SetMinHoleLen(size_t len)

void SetPolyaDetection(bool on)

static int s_GetDefaultNonConsensusSpliceScore(void)

int GetGcAgSpliceScore(void) const

EScoringType GetScoringType(void) const

void Run(THitRefs *hitrefs)

void SetCompartmentPenalty(double penalty)

int m_NonConsensusSpliceScore

int GetGapExtensionScore(void) const

list< CRef< objects::CScore_set > > TScoreSets

void SetMinSingletonIdentity(double idty)

size_t x_GetGenomicExtent(const size_t query_extent, size_t max_ext=0) const

CRef< objects::CScope > & SetScope(void)

void SetMatchScore(int score)

void SetMinPolyaLen(size_t len)

SAlignedCompartment x_RunOnCompartment(THitRefs *hitrefs, size_t range_left, size_t range_right)

void SetMaxIntron(size_t max_intron)

bool GetEndGapDetection(void) const

double GetPolyaExtIdentity(void) const

static bool s_GetDefaultTrimToCodons(void)

size_t m_MinSingletonIdtyBps

bool x_ProcessTermSegm(TSegment **term_segs, Uint1 side) const

void SetMinSingletonIdentityBps(size_t idty)

void SetMinExonIdentity(double idty)

pair< size_t, size_t > m_BoundingRange

static double s_GetDefaultMinCompartmentIdty(void)

size_t GetMinPolyaLen(void) const

int GetMismatchScore(void) const

static int s_GetDefaultGapOpeningScore(void)

void SetGapOpeningScore(int score)

vector< size_t > m_pattern

pair< size_t, size_t > TOrf

static int s_GetDefaultGtAgSpliceScore(void)

CConstRef< objects::CSeqMap > m_GenomicSeqMap

void SetTestType(const string &test_type)

static size_t s_GetDefaultMinPolyaLen(void)

bool GetPolyaDetection(void) const

void SetStrand(bool strand)

double GetCompartmentPenalty(void) const

EScoringType m_ScoringType

size_t GetMinHoleLen(void) const

int GetAtAcSpliceScore(void) const

bool GetTrimToCodons(void) const

void PreserveScope(bool preserve=true)

Controls whether to clean the scope object's cache on a new sequence.

void SetMaxPartExonIdentDrop(double ident)

void SetGcAgSpliceScore(int score)

static size_t s_GetDefaultMaxGenomicExtent(void)

vector< char > m_mrna_polya

size_t m_MaxCompsPerQuery

bool AlignSingleCompartment(THitRefs *hitrefs, THit::TCoord range_left, THit::TCoord range_right, SAlignedCompartment *result)

void SetMaxCompsPerQuery(size_t m)

void x_LoadSequence(vector< char > *seq, const objects::CSeq_id &seqid, THit::TCoord start, THit::TCoord finish, bool retain, bool is_genomic=false, bool genomic_strand=true)

size_t GetMaxIntron(void) const

static CVersionAPI & s_GetVersion(void)

Retrieve the library's version object.

bool GetStrand(void) const

string GetTestType(void) const

CRef< objects::CScope > GetScope(void) const

Access the scope object that the library will use to retrieve the sequences.

static int s_GetDefaultGcAgSpliceScore(void)

void SetScoringType(EScoringType type)

double m_MinSingletonIdty

CRef< objects::CScope > m_Scope

void SetNonConsensusSpliceScore(int score)

static double s_GetDefaultCompartmentPenalty(void)

vector< THitRef > THitRefs

void SetAlignerScores(void)

static int s_GetDefaultMatchScore(void)

static double s_GetDefaultMinExonIdty(void)

void SetMaxGenomicExtent(size_t mge)

static int s_GetDefaultMismatchScore(void)

size_t GetMaxGenomicExtent(void) const

static double s_GetDefaultMaxPartExonIdentDrop(void)

static size_t s_ComputeStats(CRef< objects::CSeq_align_set > sas, TScoreSets *output_stats, TOrf cds=TOrf(0, 0), EStatFlags flags=eSF_BasicNonCds)

Generate statistics based on splign-generated seq-align-set, with each seq-align corresponding to an ...

double m_MinCompartmentIdty

CConstRef< TAligner > GetAligner(void) const

void x_FinalizeAlignedCompartment(SAlignedCompartment &ac)

double GetMinCompartmentIdentity(void) const

static EScoringType s_GetDefaultScoringType(void)

static size_t s_TestPolyA(const char *seq, size_t dim, size_t cds_stop=0)

void x_SplitQualifyingHits(THitRefs *phitrefs)

size_t GetMaxCompsPerQuery(void) const

double m_MaxPartExonIdentDrop

void SetTrimToCodons(bool)

int GetMatchScore(void) const

void SetAtAcSpliceScore(int score)

bool IsPolyA(const char *seq, size_t polya_start, size_t dim)

void SetPolyaExtIdentity(double idty)

size_t m_MinPatternHitLength

void SetGapExtensionScore(int score)

double GetMinSingletonIdentity(void) const

CRef< TAligner > & SetAligner(void)

Access the spliced aligner core object.

float x_Run(const char *seq1, const char *seq2)

size_t GetMinSingletonIdentityBps(void) const

bool x_IsInGap(size_t pos)

TSIHToMaskRanges m_MaskMap

static double s_GetDefaultPolyaExtIdty(void)

void SetMinCompartmentIdentity(double idty)

vector< TSegment > TSegments

static CRef< CSplicedAligner > s_CreateDefaultAligner(void)

int GetNonConsensusSpliceScore(void) const

TOrfPair GetCds(const THit::TId &id, const vector< char > *seq_data=0)

double m_CompartmentPenalty

pair< TOrf, TOrf > TOrfPair

void SetEndGapDetection(bool on)

int GetGtAgSpliceScore(void) const

objects::CBioseq_Handle m_mrna_bio_handle

double GetMaxPartExonIdentDrop(void) const

static size_t s_GetDefaultMinHoleLen(void)

void SetGtAgSpliceScore(int score)

int GetGapOpeningScore(void) const

void x_SetPattern(THitRefs *hitrefs)

double GetMinExonIdentity(void) const

CRef< TAligner > m_aligner

static THitRef sx_NewHit(THit::TCoord q0, THit::TCoord q, THit::TCoord s0, THit::TCoord s)

CNWFormatter::SSegment TSegment

static int s_GetDefaultAtAcSpliceScore(void)

void x_MaskSequence(vector< char > *seq, const TSeqRangeColl &mask_ranges, THit::TCoord start, THit::TCoord finish)

static int s_GetDefaultGapExtensionScore(void)

vector< SAlnMapElem > m_alnmap

container_type::const_iterator const_iterator

const_iterator end() const

const_iterator find(const key_type &key) const

static const char s_Version[]

static void test_type(TDSSOCKET *tds, TDSCOLUMN *col)

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

void ImproveFromLeft(const char *seq1, const char *seq2, CConstRef< CSplicedAligner > aligner)

void ExtendRight(const vector< char > &mrna, const vector< char > &genomic, Int8 ext_len, const CNWAligner *aligner)

void ImproveFromRight(const char *seq1, const char *seq2, CConstRef< CSplicedAligner > aligner)

void AsText(string *output, ETextFormatType type, size_t line_width=100) const

void MakeSegments(vector< SSegment > *psegments) const

int CanExtendRight(const vector< char > &mrna, const vector< char > &genomic) const

const char * GetDonor(void) const

void ImproveFromRight1(const char *seq1, const char *seq2, CConstRef< CSplicedAligner > aligner)

void FromBuffer(const TNetCacheBuffer &buf)

static bool s_IsConsensusSplice(const char *donor, const char *acceptor, bool semi_as_cons=false)

void ToBuffer(TNetCacheBuffer *buf) const

vector< char > TNetCacheBuffer

const char * GetAcceptor(void) const

int CanExtendLeft(const vector< char > &mrna, const vector< char > &genomic) const

void Update(const CNWAligner *aligner)

void ExtendLeft(const vector< char > &mrna, const vector< char > &genomic, Int8 ext_len, const CNWAligner *aligner)

bool IsLowComplexityExon(const char *rna_seq)

void SetBand(size_t band)

virtual void SetSequences(const char *seq1, size_t len1, const char *seq2, size_t len2, bool verify=true)

size_t GetLongestSeg(size_t *q0, size_t *q1, size_t *s0, size_t *s1) const

static void s_GetSpan(const THitRefs &hitrefs, TCoord span[4])

Get sequence span for a set of alignments (hits).

void SetScoreMatrix(const SNCBIPackedScoreMatrix *scoremat)

void SetWms(TScore value)

void SetWi(unsigned char splice_type, TScore value)

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

#define NON_CONST_REVERSE_ITERATE(Type, Var, Cont)

Non constant version of REVERSE_ITERATE macro.

@ eDiag_Fatal

Fatal error – guarantees exit(or abort)

TErrCode GetErrCode(void) const

Get error code.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

const string & GetMsg(void) const

Get message string.

#define NCBI_RETHROW_SAME(prev_exception, message)

Generic macro to re-throw the same exception.

EDiagSev GetSeverity(void) const

Get exception severity.

CException & SetSeverity(EDiagSev severity)

Set exception severity.

const string AsFastaString(void) const

string GetSeqIdString(bool with_version=false) const

Return seqid string with optional version for text seqid type.

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Optimized implementation of CSerialObject::Assign, which is not so efficient.

static CSeq_id_Handle GetHandle(const CSeq_id &id)

Normal way of getting a handle, works for any seq-id.

TSeqPos GetStart(ESeqLocExtremes ext) const

Return start and stop positions of the seq-loc.

TSeqPos GetStop(ESeqLocExtremes ext) const

TSeqPos GetLength(const CSeq_id &id, CScope *scope)

Get sequence length if scope not null, else return max possible TSeqPos.

CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const

Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.

@ eCoding_Iupac

Set coding to printable coding (Iupacna or Iupacaa)

TSeqPos GetEndPosition(void) const

return end position of current segment in sequence (exclusive)

CSeqMap::ESegmentType GetType(void) const

TSeqPos GetPosition(void) const

return position of current segment in sequence

CConstRef< CSeq_literal > GetRefGapLiteral(void) const

return CSeq_literal with gap data, or null if either the segment is not a gap, or an unspecified gap

void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const

Fill the buffer string with the sequence data for the interval [start, stop).

static CConstRef< CSeqMap > GetSeqMapForSeq_loc(const CSeq_loc &loc, CScope *scope)

CSeqMap_CI ResolvedRangeIterator(CScope *scope, TSeqPos from, TSeqPos length, ENa_strand strand=eNa_strand_plus, size_t maxResolve=size_t(-1), TFlags flags=fDefaultFlags) const

Iterate segments in the range with specified strand coordinates.

bool NotNull(void) const THROWS_NONE

Check if pointer is not null – same effect as NotEmpty().

TObjectType * GetPointer(void) THROWS_NONE

Get pointer,.

void Reset(void)

Reset reference object.

bool IsNull(void) const THROWS_NONE

Check if pointer is null – same effect as Empty().

uint8_t Uint1

1-byte (8-bit) unsigned integer

int32_t Int4

4-byte (32-bit) signed integer

#define numeric_limits

Pre-declaration of the "numeric_limits<>" template Forcibly overrides (using preprocessor) the origin...

uint32_t Uint4

4-byte (32-bit) unsigned integer

int64_t Int8

8-byte (64-bit) signed integer

TThisType & SetLength(position_type length)

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

void SetFrom(TFrom value)

Assign a value to From data member.

E_Choice

Choice variants.

vector< CRef< CScore > > TScore

TMatch GetMatch(void) const

Get the variant data.

list< CRef< CScore > > Tdata

Tdata & Set(void)

Assign a value to data member.

static string SelectionName(E_Choice index)

Retrieve selection name (for diagnostic purposes).

TMismatch GetMismatch(void) const

Get the variant data.

TGenomic_ins GetGenomic_ins(void) const

Get the variant data.

list< CRef< CSeq_align > > Tdata

TProduct_ins GetProduct_ins(void) const

Get the variant data.

E_Choice Which(void) const

Which variant is currently selected.

@ e_Product_ins

insertion in product sequence (i.e. gap in the genomic sequence)

@ e_Genomic_ins

insertion in genomic sequence (i.e. gap in the product sequence)

@ e_Match

both sequences represented, product and genomic sequences match

@ e_Mismatch

both sequences represented, product and genomic sequences do not match

@ eProduct_type_transcript

ENa_strand

strand of nucleic acid

unsigned int

A callback function used to compare two keys in a database.

const CharType(& source)[N]

void ElemToBuffer(const string &s, char *&p)

void ElemFromBuffer(string &s, const char *&p)

Int4 delta(size_t dimension_, const Int4 *score_)

void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)

string GetDonor(const objects::CSpliced_exon &exon)

static sljit_uw total_size

static const sljit_gpr r1

static const sljit_gpr r2

static CVersionAPI * s_CreateVersion(void)

const string kTestType_20_28

const string kTestType_20_28_plus

const string kTestType_production_default

void CleaveOffByTail(CSplign::THitRefs *phitrefs, TSeqPos polya_start)

const char g_msg_NoAlignment[]

const char g_msg_CompartmentInconsistent[]

const char g_msg_AlignedNotSpecified[]

const char g_msg_BadIdentityThreshold[]

const char g_msg_EmptyHitVectorPassed[]

const char g_msg_NetCacheBufferIncomplete[]

const char g_msg_QueryCoverageOutOfRange[]

const char g_msg_NullPointerPassed[]

const char g_msg_NoExonsAboveIdtyLimit[]

const char g_msg_NoHitsAfterFiltering[]

const char g_msg_InvalidRange[]

ECompartmentStatus m_Status

vector< char > TNetCacheBuffer

void GetBox(Uint4 *box) const

void ToBuffer(TNetCacheBuffer *buf) const

void FromBuffer(const TNetCacheBuffer &buf)

double GetIdentity(void) const

int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)

const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4