A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/chainer_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/gnomon/chainer.cpp Source File

51 #include <unordered_set> 52 #include <unordered_map> 171  typedef

set<pair<Int8,string>>

TIDs

;

236

:m_hmm_params(hmm_params), m_gnomon(gnomon), m_edited_contig_map(edited_contig_map), m_limits(

limits

), m_contig_acc(contig_acc), m_idnext(1), m_idinc(1)

242  return m_data

->MakeChains(models);

256

m_align(0), m_cds_info(0), m_align_map(0), m_left_member(0), m_right_member(0), m_sink_for_contained(0),

257

m_copy(0), m_contained(0), m_identical_count(0),

258

m_left_num(0), m_right_num(0), m_num(0),

259

m_splice_weight(0), m_left_splice_num(0), m_right_splice_num(0), m_splice_num(0),

260

m_type(

eCDS

), m_left_cds(0), m_right_cds(0), m_cds(0), m_included(

false

), m_postponed(

false

), m_internal(

false

),

261

m_marked_for_deletion(

false

), m_marked_for_retention(

false

), m_restricted_to_start(

false

),

262

m_gapped_connection(

false

), m_fully_connected_to_part(-1), m_not_for_chaining(

false

),

264

m_trusted_group(0), m_left_trusted_group(0), m_right_trusted_group(0), m_cds_from_trusted(

false

), m_excluded_readthrough(

false

) {}

269  void

MarkIncludedForChain();

270  void

MarkPostponed();

271  void

MarkPostponedForChain();

274  TContained

CollectCodingContainedForMemeber();

291  int m_type

, m_left_cds, m_right_cds, m_cds;

317

tuple<TIDMap, TSignedSeqRange> PeaksAndLimits(

EStatus

determinant,

int

min_blob_weight,

int

max_empty_dist,

int

min_splice_dist);

318

tuple<TIVec, TSignedSeqRange> MainPeaks(

TIDMap

& peak_weights,

double

secondary_peak,

double

tertiary_peak,

double

tertiary_peak_coverage,

bool

right_end);

324  void

RestoreTrimmedEnds(

int

trim);

325  void

RemoveFshiftsFromUTRs();

327  void

SetOpenForPartialyAlignedProteins(map<

string

, pair<bool,bool> >& prot_complet);

328

pair<bool,bool> ValidPolyA(

int

pos,

const CResidueVec

& contig);

329  void

ClipToCap(

int

min_cap_blob,

int

max_dist,

int

min_flank_exon,

double

secondary_peak,

bool

recalulate_support =

true

);

330  void

ClipToPolyA(

const CResidueVec

& contig,

int

min_polya_blob,

int

max_dist,

int

min_flank_exon,

double

secondary_peak,

double

tertiary_peak,

double

tertiary_peak_coverage,

bool

recalulate_support =

true

);

331  void

CheckSecondaryCapPolyAEnds();

332  void

ClipLowCoverageUTR(

double

utr_clip_threshold,

bool

recalulate_support =

true

);

333  void

CalculateDropLimits();

334  void

CalculateSupportAndWeightFromMembers(

bool

keep_all_evidence =

false

);

338  void

SetConfirmedStartStopForCompleteProteins(map<

string

, pair<bool,bool> >& prot_complet,

const SMinScor

& minscor);

341  void

SetConsistentCoverage();

343  bool

HarborsNested(

const CChain

& other_chain,

bool

check_in_holes)

const

;

344  bool

HarborsNested(

const CGene

& other_gene,

bool

check_in_holes)

const

;

346  bool

HasTrustedEvidence()

const

;

370  typedef

list<CGeneModel>::iterator

TIt

;

371  typedef

list<CGeneModel>::const_iterator

TConstIt

;

374  bool

IsAlternative(

const CChain

&

a

)

const

;

375  bool

IsAllowedAlternative(

const

ncbi::gnomon::CGeneModel&,

int

maxcomposite)

const

;

378  bool Nested

()

const

{

return

!m_nested_in_genes.empty(); }

379  bool

LargeCdsOverlap(

const CGeneModel

&

a

)

const

;

380  bool

HarborsNested(

const CChain

& other_chain,

bool

check_in_holes)

const

;

381  bool

HarborsNested(

const CGene

& other_gene,

bool

check_in_holes)

const

;

385

set<CGene*> RemoveGeneFromOtherGenesSets();

401

(*i)->RemoveFromHarbored(

this

);

403

(*i)->RemoveFromNestedIn(

this

);

405  return

m_harbors_genes;

413  if

(RealCdsLimits().NotEmpty())

414

gene_lim_for_nested =

front

()->OpenCds() ?

front

()->MaxCdsLimits() : RealCdsLimits();

415  if

(!

Include

(gene_lim_for_nested,range))

420  if

(RealCdsLimits().NotEmpty() && (*it)->ReadingFrame().Empty())

423  if

((*it)->ReadingFrame().NotEmpty())

424

model_lim_for_nested = (*it)->OpenCds() ? (*it)->MaxCdsLimits() : (*it)->RealCdsLimits();

443  return

HarborsRange(other_lim_for_nested, check_in_holes);

455  return

HarborsRange(other_lim_for_nested, check_in_holes);

466

common_cds += (ib->Limits()&

b

.RealCdsLimits()&ia->Limits()&

a

.RealCdsLimits()).

GetLength

();

480

m_real_cds_limits +=

a

.RealCdsLimits();

481

m_maxscore =

max

(m_maxscore,

a

.Score());

490  if

(

a

.Support().empty()) {

496  if

(s->IsCore() && ++composite > maxcomposite)

return false

;

499  if

(

a

.PStop(

false

) || !

a

.FrameShifts().empty())

506

vector<TSignedSeqRange> gene_gapfill_exons;

509

gene_gapfill_exons.push_back(e->

Limits

());

511

vector<TSignedSeqRange> a_gapfill_exons;

514

a_gapfill_exons.push_back(e->

Limits

());

516  if

(gene_gapfill_exons != a_gapfill_exons)

519  bool

a_share_intron =

false

;

522

set<TSignedSeqRange> b_introns;

523  for

(

int i

= 1;

i

< (

int

)

b

.Exons().size(); ++

i

) {

524  if

(

b

.Exons()[

i

-1].m_ssplice &&

b

.Exons()[

i

].m_fsplice) {

526

b_introns.insert(intron);

530  bool

a_has_new_intron =

false

;

531  for

(

int i

= 1;

i

< (

int

)

a

.Exons().size(); ++

i

) {

532  if

(

a

.Exons()[

i

-1].m_ssplice &&

a

.Exons()[

i

].m_fsplice &&

a

.Exons()[

i

-1].m_ssplice_sig !=

"XX"

&&

a

.Exons()[

i

].m_fsplice_sig !=

"XX"

) {

534  if

(b_introns.insert(intron).second)

535

a_has_new_intron =

true

;

537

a_share_intron =

true

;

541  if

(a_has_new_intron) {

543

}

else if

(!gene_gapfill_exons.empty()) {

545

}

else if

(

a

.RealCdsLimits().NotEmpty() &&

b

.RealCdsLimits().NotEmpty() && !

a

.RealCdsLimits().IntersectingWith(

b

.RealCdsLimits()) && (!

a

.TrustedmRNA().empty() || !

a

.TrustedProt().empty())) {

550

}

else if

(

a

.RealCdsLen() <=

b

.RealCdsLen()){

555  return

(a_share_intron || gene_gapfill_exons.empty());

562  if

(

a

.Strand() !=

front

()->Strand())

565  bool

gene_has_trusted =

false

;

567  if

((*it)->HasTrustedEvidence()) {

568

gene_has_trusted =

true

;

573  bool

has_common_splice =

false

;

577

has_common_splice =

true

;

582  if

(has_common_splice && (!gene_has_trusted || !

a

.HasTrustedEvidence()))

585  if

(

a

.ReadingFrame().NotEmpty() && RealCdsLimits().NotEmpty()) {

586  CAlignMap

amap(

a

.Exons(),

a

.FrameShifts(),

a

.Strand(),

a

.GetCdsInfo().Cds());

588  for

(

unsigned int

j = 0; j <

a

.Exons().

size

(); ++j) {

589  for

(

TSignedSeqPos

k =

max

(

a

.Exons()[j].GetFrom(),

a

.GetCdsInfo().Cds().GetFrom()); k <=

min

(

a

.Exons()[j].GetTo(),

a

.GetCdsInfo().Cds().GetTo()); ++k) {

591  _ASSERT

(p < (

int

)acds_map.size());

598  bool

has_common_cds =

false

;

601  if

(!

a

.GetCdsInfo().Cds().IntersectingWith((*it)->GetCdsInfo().Cds()))

604  CAlignMap

gmap((*it)->Exons(), (*it)->FrameShifts(), (*it)->Strand(), (*it)->GetCdsInfo().Cds());

606  for

(

unsigned int

j = 0; j < (*it)->Exons().

size

(); ++j) {

607  for

(

TSignedSeqPos

k =

max

((*it)->Exons()[j].GetFrom(),(*it)->GetCdsInfo().Cds().GetFrom()); k <=

min

((*it)->Exons()[j].GetTo(),(*it)->GetCdsInfo().Cds().GetTo()); ++k) {

609  _ASSERT

(p < (

int

)cds_map.size());

615  for

(

unsigned int i

= 0;

i

< acds_map.size(); ) {

617  for

( ; j < cds_map.size() && (acds_map[

i

] != cds_map[j] ||

i

%3 != j%3); ++j);

618  if

(j == cds_map.size()) {

624  for

( ; j < cds_map.size() &&

i

< acds_map.size() && acds_map[

i

] == cds_map[j]; ++j, ++

i

, ++

count

);

627

has_common_cds =

true

;

636  return

has_common_cds;

639  return

has_common_splice;

644  if

(!

a

.Support().empty() &&

b

.Support().empty())

646  else if

(

a

.Support().empty() && !

b

.Support().empty())

650  bool

atrusted = !

a

.TrustedmRNA().empty() || !

a

.TrustedProt().empty();

651  bool

btrusted = !

b

.TrustedmRNA().empty() || !

b

.TrustedProt().empty();

652  if

(atrusted && !btrusted) {

654

}

else if

(btrusted && !atrusted) {

656

}

else if

(

a

.ReadingFrame().NotEmpty() &&

b

.ReadingFrame().Empty()) {

658

}

else if

(

b

.ReadingFrame().NotEmpty() &&

a

.ReadingFrame().Empty()) {

660

}

else if

(

a

.ReadingFrame().NotEmpty()) {

662  double

ds = 0.05*

fabs

(

a

.Score());

663  double

as =

a

.Score();

673

ds = 0.05*

fabs

(

b

.Score());

674  double

bs =

b

.Score();

688  else if

(

a

.m_splice_weight >

b

.m_splice_weight)

690  else if

(

a

.m_splice_weight <

b

.m_splice_weight)

692  else if

(

a

.Weight() >

b

.Weight())

694  else if

(

a

.Weight() <

b

.Weight())

696  else if

(

a

.Limits().GetLength() !=

b

.Limits().GetLength())

697  return

(

a

.Limits().GetLength() <

b

.Limits().GetLength());

699  return a

.ID() <

b

.ID();

701  double

asize =

a

.m_splice_weight;

702  double

bsize =

b

.m_splice_weight;

703  double

ds = 0.025*(asize+bsize);

721  else if

(bsize > asize)

723  else if

(

a

.Limits().GetLength() !=

b

.Limits().GetLength())

724  return

(

a

.Limits().GetLength() <

b

.Limits().GetLength());

726  return a

.ID() <

b

.ID();

745  bool

gene_good_enough_to_be_annotation = allow_partialalts || gene.front()->GoodEnoughToBeAnnotation();

748  TSignedSeqRange

gene_cds = (gene.size() > 1 || gene.front()->CompleteCds() || algn_good_enough_to_be_annotation) ? gene.

RealCdsLimits

() : gene.front()->MaxCdsLimits();

751  if

(!gene_good_enough_to_be_annotation && !algn_good_enough_to_be_annotation) {

753  for

(

int i

= 1;

i

< (

int

)

b

.Exons().size(); ++

i

) {

754  if

(

b

.Exons()[

i

].m_ssplice_sig ==

"XX"

&&

b

.Exons()[

i

].m_fsplice_sig ==

"XX"

&&

b

.Exons()[

i

].Limits().IntersectingWith(gene_cds)) {

759  for

(

int i

= 1;

i

< (

int

)algn.

Exons

().size(); ++

i

) {

760  if

(algn.

Exons

()[

i

].m_ssplice_sig ==

"XX"

&& algn.

Exons

()[

i

].m_fsplice_sig ==

"XX"

&& algn.

Exons

()[

i

].Limits().IntersectingWith(algn_cds)) {

774

}

else if

(algn.

ReadingFrame

().

Empty

() || gene.front()->ReadingFrame().Empty()) {

778  return

eNotCompatible;

779

}

else if

(algn.

RealCdsLen

() > altfrac/100*gene.front()->RealCdsLen() || algn.

Score

() > altfrac/100*gene.front()->Score()) {

784  return

eNotCompatible;

788

set<TSignedSeqRange> gene_gapfill_introns;

789

set<TSignedSeqRange> align_gapfill_introns;

792  for

(

int i

= 1;

i

< (

int

)

b

.Exons().size(); ++

i

) {

793  if

(

b

.Exons()[

i

-1].m_ssplice_sig ==

"XX"

||

b

.Exons()[

i

].m_fsplice_sig ==

"XX"

) {

795

gene_gapfill_introns.insert(intron);

799  for

(

int i

= 1;

i

< (

int

)algn.

Exons

().size(); ++

i

) {

800  if

(algn.

Exons

()[

i

-1].m_ssplice_sig ==

"XX"

|| algn.

Exons

()[

i

].m_fsplice_sig ==

"XX"

) {

802

align_gapfill_introns.insert(intron);

805  ITERATE

(set<TSignedSeqRange>, ig, gene_gapfill_introns) {

806  ITERATE

(set<TSignedSeqRange>, ia, align_gapfill_introns) {

807  if

(ig->IntersectingWith(*ia))

808  return

eNotCompatible;

812  if

(algn.

HarborsNested

(gene, gene_good_enough_to_be_annotation))

815  if

(gene.

HarborsNested

(algn, algn_good_enough_to_be_annotation))

818  if

(!algn_cds.

Empty

() && !gene_cds.

Empty

()) {

826  return

eNotCompatible;

830  if

(gene_good_enough_to_be_annotation && algn_good_enough_to_be_annotation) {

831  if

(gene.front()->Strand() != algn.

Strand

() && allow_opposite_strand &&

842  return

eNotCompatible;

849  for

(TChainPointerList::iterator itloop = not_placed_yet.begin(); itloop != not_placed_yet.end(); ) {

850

TChainPointerList::iterator it = itloop++;

858

list<CGene*> possibly_nested;

860  bool

good_model =

true

;

861  for

(list<CGene>::iterator itl = alts.begin(); good_model && itl != alts.end(); ++itl) {

866

possibly_nested.push_back(&(*itl));

876

alts.push_back(

CGene

());

880

alts.back().Insert(algn);

881

not_placed_yet.erase(it);

884  ITERATE

(list<CGene*>, itl, possibly_nested) {

885

(*itl)->AddToNestedIn(&alts.back());

886

alts.back().AddToHarbored(*itl);

895  for

(TChainPointerList::iterator itloop = not_placed_yet.begin(); itloop != not_placed_yet.end(); ) {

896

TChainPointerList::iterator it = itloop++;

899

list<list<CGene>::iterator> included_in;

900

list<CGene*> possibly_nested;

901

list<CGene*> nested_in;

903  bool

good_model =

true

;

904  for

(list<CGene>::iterator itl = alts.begin(); good_model && itl != alts.end(); ++itl) {

909

nested_in.push_back(&(*itl));

912

possibly_nested.push_back(&(*itl));

917

included_in.push_back(itl);

920  case

eNotCompatibleNested:

921  if

(itl->IsAlternative(algn))

922

included_in.push_back(itl);

935  CGene

& gene = *included_in.front();

936  CChain

& model = *gene.front();

943  if

(algn_cds_len < 0.8*model_cds_len)

949

not_placed_yet.push_back(gene.front());

953  ITERATE

(list<CGene*>, itl, nested_in) {

955

(*itl)->AddToHarbored(&gene);

957  ITERATE

(list<CGene*>, itl, possibly_nested) {

958

(*itl)->AddToNestedIn(&gene);

962

not_placed_yet.erase(it);

970  for

(TChainPointerList::iterator itloop = not_placed_yet.begin(); itloop != not_placed_yet.end(); ) {

971

TChainPointerList::iterator it = itloop++;

974

list<list<CGene>::iterator> included_in;

975

list<CGene*> possibly_nested;

977  bool

good_model =

true

;

978  for

(list<CGene>::iterator itl = alts.begin(); good_model && itl != alts.end(); ++itl) {

983

possibly_nested.push_back(&(*itl));

987

included_in.push_back(itl);

995  if

(good_model && !included_in.empty() && (allow_partialalts || included_in.front()->front()->GoodEnoughToBeAnnotation())) {

996  if

(included_in.size() == 1) {

1001  CGene

& gene = *included_in.front();

1003

not_placed_yet.erase(it);

1005  ITERATE

(list<CGene*>, itl, possibly_nested) {

1007

(*itl)->AddToNestedIn(&gene);

1013  bool

allow_connection =

false

;

1016  bool

cds_overlap =

true

;

1018

cds_overlap =

false

;

1022  ITERATE

(list<list<CGene>::iterator>, k, included_in) {

1023  if

(!(*k)->IsAlternative(

a

)) {

1024

cds_overlap =

false

;

1032

algn.

AddComment

(

"Gene overlap override"

);

1034

allow_connection =

true

;

1038  if

(allow_connection) {

1039  CGene

& gene = *included_in.front();

1042  ITERATE

(list<list<CGene>::iterator>, k, included_in) {

1043  if

(k != included_in.begin()) {

1046  if

(CheckCompatibility(*included_in.front(), **

l

) == eAlternative) {

1048

(*l)->AddComment(

"Pass2b"

);

1050

included_in.front()->Insert(**

l

);

1052

not_placed_yet.push_back(*

l

);

1055

TChainPointerList::iterator idest = itloop;

1057

not_placed_yet.insert(idest, *

l

);

1060

set<CGene*> nested_genes = (*k)->RemoveGeneFromOtherGenesSets();

1061  ITERATE

(set<CGene*>,

i

, nested_genes)

1062

possibly_nested.push_back(*

i

);

1066

not_placed_yet.erase(it);

1068  ITERATE

(list<CGene*>, itl, possibly_nested) {

1070

(*itl)->AddToNestedIn(&gene);

1086

list<CGene>::iterator included_in(alts.end());

1087

list<CGene*> possibly_nested;

1088

list<CGene*> nested_in;

1090  bool

good_model =

true

;

1091  for

(list<CGene>::iterator itl = alts.begin(); good_model && itl != alts.end(); ++itl) {

1092  ECompat cmp

= CheckCompatibility(*itl, algn);

1095  case

eNotCompatibleNested:

1096  case

eNotCompatible:

1097

rejected.push_back(&algn);

1099

ost <<

"Trumped by another model "

<< itl->front()->ID();

1101  if

(

cmp

== eNotCompatibleNested)

1103

good_model =

false

;

1106  if

(!allow_partialalts && !itl->front()->GoodEnoughToBeAnnotation()) {

1107

rejected.push_back(&algn);

1109

ost <<

"Trumped by another model "

<< itl->front()->ID();

1111

good_model =

false

;

1112

}

else if

(included_in == alts.end()) {

1115

good_model =

false

;

1116

rejected.push_back(&algn);

1118

ost <<

"Connects two genes "

<< itl->front()->ID() <<

" "

<< included_in->front()->ID();

1123

nested_in.push_back(&(*itl));

1126

possibly_nested.push_back(&(*itl));

1134  if

(included_in != alts.end()) {

1138

included_in->Insert(algn);

1139

genep = &(*included_in);

1141

alts.push_back(

CGene

());

1142

genep = &alts.back();

1146

alts.back().Insert(algn);

1148  ITERATE

(list<CGene*>, itl, nested_in) {

1149  if

((*itl)->HarborsNested(*genep,

true

)) {

1151

(*itl)->AddToHarbored(genep);

1154  ITERATE

(list<CGene*>, itl, possibly_nested) {

1156

(*itl)->AddToNestedIn(genep);

1170

TChainPointerList::iterator jt_loop = it;

1171  for

(++jt_loop; jt_loop != not_placed_yet.end();) {

1172

TChainPointerList::iterator jt = jt_loop++;

1176

ost <<

"Trumped by similar chain "

<< ai.

ID

();

1178

rejected.push_back(&aj);

1179

not_placed_yet.erase(jt);

1187  for

(TChainPointerList::iterator it_loop = not_placed_yet.begin(); it_loop != not_placed_yet.end();) {

1188

TChainPointerList::iterator it = it_loop++;

1195

vector<const CChain*> candidates;

1200

candidates.push_back(&aj);

1204  for

(

size_t i

= 0; alive &&

i

< candidates.size(); ++

i

) {

1205  for

(

size_t

j =

i

+1; alive && j < candidates.size(); ++j) {

1206  if

(!candidates[

i

]->Limits().IntersectingWith(candidates[j]->Limits())) {

1208

ost <<

"Overlapping tandem "

<< candidates[

i

]->ID() - ai.

ID

() <<

" "

<< candidates[j]->ID() - ai.

ID

();

1210

rejected.push_back(*it);

1211

not_placed_yet.erase(it);

1226

it->SetGeneID(it->ID());

1227

it->SetRankInGene(0);

1228

not_placed_yet.push_back(&(*it));

1235

FilterOutSimilarsWithLowerScore(not_placed_yet, bad_aligns);

1236

FilterOutTandemOverlap(not_placed_yet, bad_aligns, 80);

1238

FindGeneSeeds(alts, not_placed_yet);

1239

ReplacePseudoGeneSeeds(alts, not_placed_yet);

1240

FindAltsForGeneSeeds(alts, not_placed_yet);

1241

PlaceAllYouCan(alts, not_placed_yet, bad_aligns);

1246

(*l)->SetGeneID(k->front()->ID());

1247

(*l)->SetRankInGene(++rank);

1255

(*l)->AddComment(

"Not placed"

);

1278  if

(alimits == blimits)

1281  return

(alimits.

GetTo

() > blimits.

GetTo

());

1290 typedef

tuple<Int8, TSignedSeqRange>

TIdLim

;

1316

gmembers.insert(&m);

1325  if

(members_genes.empty())

1330  typedef

map<CGene*,list<SChainMember*> > TGeneToMembers;

1331  typedef

map<TIdLim, TGeneToMembers> TMembersInDiffGenes;

1332

TMembersInDiffGenes members_in_different_genes;

1336  CGene

* genep = members_genes.front().second;

1337

members_in_different_genes[idlim][genep].push_back(mp);

1339  for

(

int i

= 1;

i

< (

int

)members_genes.size(); ++

i

) {

1343  CGene

* genep = members_genes[

i

].second;

1344  if

(idlim_prev != idlim) {

1345

TMembersInDiffGenes::iterator it = members_in_different_genes.find(idlim_prev);

1346  if

(it->second.size() < 2)

1347

members_in_different_genes.erase(it);

1349

members_in_different_genes[idlim][genep].push_back(mp);

1354

TMembersInDiffGenes::iterator it = members_in_different_genes.find(idlim);

1355  if

(it->second.size() < 2)

1356

members_in_different_genes.erase(it);

1359  ITERATE

(TMembersInDiffGenes, imdg, members_in_different_genes) {

1360  ITERATE

(TGeneToMembers, ig1, imdg->second) {

1361  CGene

& gene1 = *ig1->first;

1369  typedef

map<CChain*,TMemberPtrSet> TConflictMemebersInChains;

1370

TConflictMemebersInChains conflict_members_in_chains;

1372  ITERATE

(TMembersInDiffGenes, imdg, members_in_different_genes) {

1373  ITERATE

(TGeneToMembers, ig1, imdg->second) {

1374  CGene

& gene1 = *ig1->first;

1376  CChain

* chain1p_orig = *ic1;

1378  for

(list<SChainMember*>::const_iterator im = ig1->second.begin(); im != ig1->second.end() && mbr1p_orig == 0; ++im) {

1379  if

(binary_search(chain1p_orig->

m_members

.begin(),chain1p_orig->

m_members

.end(),*im, std::less<SChainMember*>()))

1382  for

(TGeneToMembers::const_iterator ig2 = imdg->second.begin(); mbr1p_orig != 0 && ig2 != ig1; ++ig2) {

1383  CGene

& gene2 = *ig2->first;

1385  CChain

* chain1p = chain1p_orig;

1389  for

(list<SChainMember*>::const_iterator im = ig2->second.begin(); im != ig2->second.end() && mbr2p == 0; ++im) {

1390  if

(binary_search(chain2p->

m_members

.begin(),chain2p->

m_members

.end(),*im, std::less<SChainMember*>()))

1397  if

(chain1p->

Exons

().size() > 1)

1400  if

(chain2p->

Exons

().size() > 1)

1405  swap

(chain1p,chain2p);

1410  CChain

& chain1 = *chain1p;

1411  CChain

& chain2 = *chain2p;

1415

conflict_members_in_chains[&chain2].insert(mbr2p);

1417

conflict_members_in_chains[&chain1].insert(mbr1p);

1418

}

else if

(

Precede

(core1,core2)) {

1419  if

(

Precede

(align_lim,core1))

1420

conflict_members_in_chains[&chain2].insert(mbr2p);

1421  else if

(

Precede

(core2,align_lim))

1422

conflict_members_in_chains[&chain1].insert(mbr1p);

1426

conflict_members_in_chains[&chain1].insert(mbr1p);

1428

conflict_members_in_chains[&chain2].insert(mbr2p);

1431

conflict_members_in_chains[&chain1].insert(mbr1p);

1433

conflict_members_in_chains[&chain2].insert(mbr2p);

1436

conflict_members_in_chains[&chain2].insert(mbr2p);

1438

conflict_members_in_chains[&chain1].insert(mbr1p);

1440

conflict_members_in_chains[&chain1].insert(mbr1p);

1441

conflict_members_in_chains[&chain2].insert(mbr2p);

1445

conflict_members_in_chains[&chain1].insert(mbr1p);

1446

conflict_members_in_chains[&chain2].insert(mbr2p);

1455  for

(

CGene

& gene : genes) {

1456  for

(

CChain

* chainp : gene)

1472  ITERATE

(TConflictMemebersInChains, it, conflict_members_in_chains) {

1473  CChain

& chain = *it->first;

1479

hard_limits = (hard_limits & chain.

Limits

());

1557

}

else if

(alim.

GetTo

() > noclip_limits.

GetTo

()) {

1566  int

left_splice = -1;

1567  int

right_splice = -1;

1568  for

(

int

e = 1; e < (

int

)chain.

Exons

().size(); ++e) {

1569  if

(left_splice < 0 && chain.

Exons

()[e-1].m_ssplice &&

Include

(new_limits,chain.

Exons

()[e-1].GetTo()))

1570

left_splice = chain.

Exons

()[e-1].GetTo();

1571  if

(chain.

Exons

()[e].m_fsplice &&

Include

(new_limits,chain.

Exons

()[e].GetFrom()))

1572

right_splice = chain.

Exons

()[e].GetFrom();

1575  double

left_weights_total = 0.;

1577  double

right_weights_total = 0.;

1583  for

(

int

e = 1; e < (

int

)

a

.Exons().size(); ++e) {

1584  if

(

a

.Exons()[e-1].m_ssplice &&

a

.Exons()[e-1].GetTo() == left_splice) {

1585

left_weights[alim.

GetFrom

()] +=

a

.Weight();

1586

left_weights_total +=

a

.Weight();

1588  if

(

a

.Exons()[e].m_fsplice &&

a

.Exons()[e].GetFrom() == right_splice) {

1589

right_weights[alim.

GetTo

()] +=

a

.Weight();

1590

right_weights_total +=

a

.Weight();

1594  if

(left_weights_total > 0.) {

1597  for

(map<int,double>::reverse_iterator it = left_weights.rbegin(); it != left_weights.rend(); ++it) {

1598  if

(

t

< 0.9*left_weights_total)

1602  if

(left < new_limits.

GetFrom

())

1605  if

(right_weights_total > 0.) {

1609  if

(

t

< 0.9*right_weights_total)

1613  if

(right > new_limits.

GetTo

())

1614

new_limits.

SetTo

(right);

1629  if

(new_limits != chain.

Limits

()) {

1635

note +=

" overlap UTR clip"

;

1639  bool

wasopen = chain.

OpenCds

();

1645  m_gnomon

->GetScore(chain, !no5pextension);

1647  if

(wasopen != chain.

OpenCds

() && (wasopen ==

false

|| cds.

HasStart

())) {

1658  if

(m_type !=

eCDS

)

1661

deque<const SChainMember*> not_visited(1,

this

);

1662  while

(!not_visited.empty()) {

1664

not_visited.pop_back();

1669  if

(c < mbr->m_identical_count) {

1670  if

(included_in_list.insert(mi).second) {

1671

contained.push_back(mi);

1673

included_in_list.insert(mi->

m_copy

->begin(),mi->

m_copy

->end());

1675

}

else if

(included_in_list.find(mi) == included_in_list.end()) {

1676

not_visited.push_back(mi);

1686

AddCodingToContained(contained, included_in_list);

1696

AddCodingToContained(contained, included_in_list);

1699

left->AddCodingToContained(contained, included_in_list);

1703

right->AddCodingToContained(contained, included_in_list);

1710

deque<const SChainMember*> not_visited(1,

this

);

1711  while

(!not_visited.empty()) {

1713

not_visited.pop_back();

1716  if

(c < mbr->m_identical_count) {

1717  if

(included_in_list.insert(mi).second) {

1718

contained.push_back(mi);

1720

included_in_list.insert(mi->

m_copy

->begin(),mi->

m_copy

->end());

1722

}

else if

(included_in_list.find(mi) == included_in_list.end()) {

1723

not_visited.push_back(mi);

1733

AddToContained(contained, included_in_list);

1743

AddToContained(contained, included_in_list);

1746

left->AddToContained(contained, included_in_list);

1750

right->AddToContained(contained, included_in_list);

1756 #define START_BONUS 600 1774  TContained

contained = CollectContainedForChain();

1783

m_postponed =

true

;

1798  TContained

contained = CollectContainedForChain();

1807  TContained

contained = CollectContainedForChain();

1854  return

(alimits.

GetTo

() < blimits.

GetTo

());

1875  if

(alimits == blimits)

1877  else if

(alimits.

GetTo

() == blimits.

GetTo

())

1880  return

(alimits.

GetTo

() < blimits.

GetTo

());

1903  return

(alimits.

GetTo

() < blimits.

GetTo

());

1926  if

(alimits == blimits)

1929  return

(alimits.

GetTo

() < blimits.

GetTo

());

1965  sort

(container.begin(),container.end());

1966

container.erase( unique(container.begin(),container.end()), container.end() );

1976  void

InsertMemberCopyWithoutCds(

SChainMember

* copy_ofp);

1991

m_members.splice(m_members.end(),other.

m_members

);

1992

m_copylist.splice(m_copylist.end(),other.

m_copylist

);

1993

m_align_maps.splice(m_align_maps.end(),other.

m_align_maps

);

1994

m_containedlist.splice(m_containedlist.end(),other.

m_containedlist

);

1995

m_extra_cds.splice(m_extra_cds.end(),other.

m_extra_cds

);

1996

insert(end(),other.begin(),other.end());

2004

InsertMember(mbr, copy_ofp);

2009

m_extra_cds.push_back(cds);

2010

InsertMemberCopyWithCds(m_extra_cds.back(), copy_ofp);

2018

InsertMember(mbr, copy_ofp);

2034

InsertMember(mbr, copy_ofp);

2040

m_members.push_back(m);

2041

push_back(&m_members.back());

2044

m_members.back().m_contained = &m_containedlist.back();

2050

m_members.back().m_align_map = &m_align_maps.back();

2052

m_members.back().m_align_map = copy_ofp->

m_align_map

;

2055  if

(copy_ofp != 0) {

2056  if

(copy_ofp->

m_copy

== 0) {

2057

m_copylist.push_back(

TContained

(1,copy_ofp));

2058

copy_ofp->

m_copy

= &m_copylist.back();

2060

m_members.back().m_copy = copy_ofp->

m_copy

;

2061

copy_ofp->

m_copy

->push_back(&m_members.back());

2070

InsertMember(new_mbr, copy_ofp);

2076

m_extra_cds.push_back(

CCDSInfo

());

2078

InsertMember(*itcl);

2079

m_members.back().m_orig_align = orig_aligns[itcl->ID()];

2080  if

(unmodified_aligns.count(itcl->ID()))

2081

m_members.back().m_unmd_align = &unmodified_aligns[itcl->ID()];

2103  bool

small_flex =

false

;

2113  if

(big_limits == small_limits) {

2131  m_data

->CutParts(models);

2137  if

(!parts.empty()) {

2138

models.splice(models.begin(),parts);

2146  size_t

initial_size = pointers.size();

2147  for

(

size_t i

= 0;

i

< initial_size; ++

i

) {

2154

clust.push_back(new_algn);

2162  size_t

initial_size = pointers.size();

2163  for

(

size_t i

= 0;

i

< initial_size; ++

i

) {

2176

map<int, set<int> > oriented_splices;

2177  ITERATE

(set<TSignedSeqRange>,

i

, oriented_introns_plus) {

2178

oriented_splices[

ePlus

].insert(

i

->GetFrom());

2179

oriented_splices[

ePlus

].insert(

i

->GetTo());

2181  ITERATE

(set<TSignedSeqRange>,

i

, oriented_introns_minus) {

2182

oriented_splices[

eMinus

].insert(

i

->GetFrom());

2183

oriented_splices[

eMinus

].insert(

i

->GetTo());

2204  typedef

vector<pair<CCDSInfo::SPStop,TSignedSeqRange> > TPstopIntron;

2205

TPstopIntron pstops_with_intron_plus;

2206

TPstopIntron pstops_with_intron_minus;

2210

TPstopIntron& pstops_with_intron = (algn.

Strand

() ==

ePlus

) ? pstops_with_intron_plus : pstops_with_intron_minus;

2213

left =

min

(left,s->GetFrom());

2214

right =

max

(right,s->GetTo());

2215  if

(s->GetLength() == 3) {

2218  for

(

int i

= 1;

i

< (

int

)algn.

Exons

().size(); ++

i

) {

2220

pstops_with_intron.push_back(make_pair(*s,intron));

2226  uniq

(pstops_with_intron_plus);

2227  uniq

(pstops_with_intron_minus);

2239

TPstopIntron& pstops_with_intron = (algn.

Strand

() ==

ePlus

) ? pstops_with_intron_plus : pstops_with_intron_minus;

2240  if

(pstops_with_intron.empty())

2247  ITERATE

(TPstopIntron,

si

, pstops_with_intron) {

2248  if

(

si

->second.GetLength() == 1) {

2249  if

(

si

->first == *s)

2252  for

(

int i

= 1;

i

< (

int

)algn.

Exons

().size(); ++

i

) {

2254  if

(

si

->second == intron &&

si

->first == *s)

2267  ITERATE

(TPstopIntron,

si

, pstops_with_intron) {

2272  for

(

int i

= 0;

i

< (

int

)exons.size(); ++

i

) {

2273  if

(

Include

(exons[

i

].Limits(),

si

->first.GetFrom())) {

2274  if

(

si

->second.GetLength() == 1) {

2275  if

(

si

->first.GetTo() <= exons[

i

].GetTo())

2278  if

(

i

< (

int

)exons.size()-1) {

2280  if

(intron ==

si

->second &&

si

->first.GetTo() <= exons[

i

+1].GetTo())

2305  double ms

= GoodCDNAScore(algn);

2306

RemovePoorCds(algn,

ms

);

2326  int

strand = align.

Strand

();

2327  if

(!tgr_info.count(strand))

2333  auto

second = upper_bound(tinfo.begin(), tinfo.end(), align.

Limits

().

GetTo

(),

2335  if

(

first

== second)

2345  for

(

auto

& e : align.

Exons

()) {

2346  auto

overlap = (e.

Limits

()&acds);

2347  for

(

TSignedSeqPos

k = overlap.GetFrom(); k <= overlap.GetTo(); ++k) {

2349  if

(p >= 0 && p%3 == 0) {

2350  _ASSERT

(p/3 < (

int

)acds_map.size());

2358  bool

paralogs =

false

;

2360  for

(

auto

it =

first

; it != second; ++it) {

2365  int

tgr = it-tinfo.begin()+1;

2370  for

(

const TIVec

& tcds_map : it->m_cds_maps) {

2371  int

long_enough = 10;

2373  if

(mci >= long_enough) {

2379  auto

num = prev_accessions.

size

();

2380  for

(

auto

&

id

: it->m_ids)

2381

prev_accessions.

insert

(

id

.second);

2382  if

(num+it->m_ids.size() != prev_accessions.

size

())

2400

cerr <<

"Chimeric chain member "

<< align.

ID

() <<

" "

<< (paralogs ?

"paralogs"

:

"unrelated"

) <<

" "

<< flag << endl;

2402  if

(mbr.

m_copy

!=

nullptr

) {

2403  for

(

auto

p : *mbr.

m_copy

)

2404

p->m_marked_for_deletion =

true

;

2413  size_t

initial_size = pointers.size();

2414  for

(

size_t i

= 0;

i

< initial_size; ++

i

) {

2439

initial_size = pointers.size();

2440  for

(

unsigned int i

= 0;

i

< initial_size; ++

i

) {

2472  if

(mbr.

m_copy

->front()->m_align->Strand() == algn.

Strand

()) {

2475

}

else if

((*mbr.

m_copy

)[1]->m_cds_info->ReadingFrame() == cdsinfo.

ReadingFrame

()) {

2490  if

(indl->InDelEnd() > lim.

GetFrom

() && indl->Loc() <= lim.

GetTo

())

2491

fs.push_back(*indl);

2503  bool

jflex =

false

;

2529  if

(!jflex && jlimits.

GetTo

()-ai_max_cds.

GetFrom

() >= 5)

2537  if

(!jflex && ai_max_cds.

GetTo

()-jlimits.

GetFrom

() >= 5)

2560  if

(

abs

(j_from-i_from)%3 != 0)

2565  int

iex = (

int

)ai.

Exons

().size();

2566  int

jex = (

int

)aj.

Exons

().size();

2577  set<int>

left_exon_ends, right_exon_ends;

2580  for

(

int i

= 1;

i

< (

int

)algn.

Exons

().size(); ++

i

) {

2581  if

(algn.

Exons

()[

i

-1].m_ssplice && algn.

Exons

()[

i

].m_fsplice) {

2582

left_exon_ends.

insert

(algn.

Exons

()[

i

].GetFrom());

2583

right_exon_ends.

insert

(algn.

Exons

()[

i

-1].GetTo());

2593  if

(ri != right_exon_ends.

end

())

2597  if

(li != left_exon_ends.

end

())

2606  for

(

int i

= 0;

i

< (

int

)pointers.size(); ++

i

) {

2618  string

ssplice = ai.

Exons

()[

i

-1].m_ssplice_sig;

2619  string

fsplice = ai.

Exons

()[

i

].m_fsplice_sig;

2620  if

(ssplice ==

"XX"

|| fsplice ==

"XX"

)

2622  else if

(ai.

Strand

() ==

ePlus

&& ((ssplice !=

"GT"

&& ssplice !=

"GC"

) || fsplice !=

"AG"

))

2624  else if

(ai.

Strand

() ==

eMinus

&& (ssplice !=

"AG"

|| (fsplice !=

"GT"

&& fsplice !=

"GC"

)))

2648  if

(indl->IntersectingWith(e->

GetFrom

(), e->

GetTo

()))

2654  if

(pointers[jfirst]->m_align->Limits() != ai.

Limits

())

2656  for

(

int

j = jfirst; j < (

int

)pointers.size() && pointers[j]->m_align->Limits().GetFrom() <= ai.

Limits

().

GetTo

(); ++j) {

2658

IncludeInContained(mi, mi);

2666  if

(CanIncludeJinI(mi, mj))

2667

IncludeInContained(mi, mj);

2697 #define NON_CDNA_INTRON_PENALTY 20 2722  else if

(mj.

m_type

==

eLeftUTR

&& (!ai_left_complete || (!j_rflexible && (aj.

Limits

()&ai_rf).GetLength() > 5)))

2734  else if

(mj.

m_type

==

eCDS

&& (!aj_right_complete || (!i_lflexible && (ai.

Limits

()&aj_rf).GetLength() > 5)))

2748  if

(j_rflexible || i_lflexible)

2750  if

((ai.

Limits

() & aj.

Limits

()).GetLength() < intersect_limit)

2761  int

cds_overlap = 0;

2765  if

(genome_overlap < 0)

2777  if

(cds_overlap%3 != 0)

2784  for

(

int i

= 1;

i

< (

int

)ai.

Exons

().size(); ++

i

) {

2785  if

(ai.

Exons

()[

i

-1].m_ssplice && ai.

Exons

()[

i

].m_fsplice) {

2787  if

(

Include

(ai_rf,intron) &&

Include

(aj_rf,intron) && mrna_count[intron]+est_count[intron]+rnaseq_count[intron] == 0) {

2795

delta_cds = mi.

m_cds

-cds_overlap;

2798

delta_splice_num = 0;

2799  if

(delta_cds >= 0) {

2803  if

(!j_rflexible && !i_lflexible)

2804  first

= upper_bound(contained.begin(), contained.end(), &mj,

LeftOrder

())-contained.begin();

2806

not_sorted =

false

;

2807

contained.back()->m_accumulated_num = contained.back()->m_align->Weight();

2808

contained.back()->m_accumulated_splice_num = contained.back()->m_splice_weight;

2809  for

(

int i

= (

int

)contained.size()-2;

i

>=

first

; --

i

) {

2810

contained[

i

]->m_accumulated_num = contained[

i

]->m_align->Weight()+contained[

i

+1]->m_accumulated_num;

2811

contained[

i

]->m_accumulated_splice_num = contained[

i

]->m_splice_weight+contained[

i

+1]->m_accumulated_splice_num;

2815

delta_num = contained[

first

]->m_accumulated_num;

2816

delta_splice_num = contained[

first

]->m_accumulated_splice_num;

2830  for

(

auto

p : micontained) {

2844  for

(

int i

= 1;

i

< (

int

)ai.

Exons

().size(); ++

i

) {

2845  if

(ai.

Exons

()[

i

-1].m_ssplice && ai.

Exons

()[

i

].m_fsplice) {

2847  if

(

Include

(ai_rf,intron) && mrna_count[intron]+est_count[intron]+rnaseq_count[intron] == 0) {

2869

}

else if

(align.TrustedGroup() != 0) {

2870

tgr = align.TrustedGroup();

2874  _ASSERT

(tgr == 0 || (tgr > 0 && align.Strand() ==

ePlus

) || (tgr < 0 && align.Strand() ==

eMinus

));

2880  TIVec

right_ends(pointers.size());

2881  for

(

int

k = 0; k < (

int

)pointers.size(); ++k) {

2882  auto

& kalign = *pointers[k]->m_align;

2883  int

rend = kalign.Limits().GetTo();

2885

rend = kalign.Limits().GetFrom();

2886

right_ends[k] = rend;

2892

LRIinit(mi, micontained);

2893  bool

not_sorted =

true

;

2896

TIVec::iterator

lb

= lower_bound(right_ends.begin(),right_ends.end(),ai.

Limits

().

GetFrom

()-2*flex_len);

2897

TContained::iterator jfirst = pointers.begin();

2898  if

(

lb

!= right_ends.end())

2899

jfirst = pointers.begin()+(

lb

-right_ends.begin());

2900  for

(TContained::iterator j = jfirst; j <

i

; ++j) {

2904  if

(aj.

Exons

().back().m_fsplice_sig ==

"XX"

|| ai.

Exons

().front().m_ssplice_sig ==

"XX"

)

2912  double

delta_splice_num;

2913  if

(LRCanChainItoJ(delta_cds, delta_num, delta_splice_num, mi, mj, micontained, not_sorted)) {

2918  bool

trust_compatible =

true

;

2922

trust_compatible =

false

;

2924

trust_compatible =

false

;

2926  bool

better_connection =

false

;

2928

better_connection = (newcds > mi.

m_left_cds

);

2932

better_connection =

true

;

2935  if

(better_connection) {

2936  if

(trust_compatible) {

2956  int

strand = ai.

Strand

();

2957  _ASSERT

(tgr == 0 || (tgr > 0 && strand ==

ePlus

) || (tgr < 0 && strand ==

eMinus

));

2965  TIVec

left_ends(pointers.size());

2966  for

(

int

k = 0; k < (

int

)pointers.size(); ++k) {

2967  auto

& kalign = *pointers[k]->m_align;

2968  int

lend = kalign.Limits().GetFrom();

2970

lend = kalign.Limits().GetTo();

2971

left_ends[k] = lend;

2996  bool

not_sorted =

true

;

2999

TIVec::iterator

lb

= lower_bound(left_ends.begin(),left_ends.end(),ai.

Limits

().

GetTo

()+2*flex_len,greater<int>());

3000

TContained::iterator jfirst = pointers.begin();

3001  if

(

lb

!= left_ends.end())

3002

jfirst = pointers.begin()+(

lb

-left_ends.begin());

3003  for

(TContained::iterator j = jfirst; j <

i

; ++j) {

3007  if

(aj.

Exons

().front().m_ssplice_sig ==

"XX"

|| ai.

Exons

().back().m_fsplice_sig ==

"XX"

)

3026  if

(mj.

m_type

==

eRightUTR

&& (!ai_right_complete || (!j_lflexible && (aj.

Limits

()&ai_rf).GetLength() > 5)))

3038  if

(mj.

m_type

==

eCDS

&& (!aj_left_complete || (!i_rflexible && (ai.

Limits

()&aj_rf).GetLength() > 5)))

3054  if

(j_lflexible || i_rflexible)

3058  if

(intersect < intersect_limit)

continue

;

3069  int

cds_overlap = 0;

3073  if

(genome_overlap < 0)

3085  if

(cds_overlap%3 != 0)

3092  for

(

int i

= 1;

i

< (

int

)ai.

Exons

().size(); ++

i

) {

3093  if

(ai.

Exons

()[

i

-1].m_ssplice && ai.

Exons

()[

i

].m_fsplice) {

3095  if

(

Include

(ai_rf,intron) &&

Include

(aj_rf,intron) && mrna_count[intron]+est_count[intron]+rnaseq_count[intron] == 0) {

3104  int

delta_cds = mi.

m_cds

-cds_overlap;

3114  if

(!j_lflexible && !i_rflexible)

3115  first

= upper_bound(micontained.begin(),micontained.end(),&mj,

RightOrder

())-micontained.begin();

3117

not_sorted =

false

;

3118

micontained.back()->m_accumulated_num = micontained.back()->m_align->Weight();

3119

micontained.back()->m_accumulated_splice_num = micontained.back()->m_splice_weight;

3120  for

(

int i

= (

int

)micontained.size()-2;

i

>=

first

; --

i

) {

3121

micontained[

i

]->m_accumulated_num = micontained[

i

]->m_align->Weight()+micontained[

i

+1]->m_accumulated_num;

3122

micontained[

i

]->m_accumulated_splice_num = micontained[

i

]->m_splice_weight+micontained[

i

+1]->m_accumulated_splice_num;

3126  double

delta_num = micontained[

first

]->m_accumulated_num;

3127  double

delta_splice_num = micontained[

first

]->m_accumulated_splice_num;

3132  bool

trust_compatible =

true

;

3136

trust_compatible =

false

;

3138

trust_compatible =

false

;

3140  bool

better_connection =

false

;

3146

better_connection =

true

;

3149  if

(better_connection) {

3150  if

(trust_compatible) {

3169  int

strand = ai.

Strand

();

3170  _ASSERT

(tgr == 0 || (tgr > 0 && strand ==

ePlus

) || (tgr < 0 && strand ==

eMinus

));

3181

vector<const SChainMember*> mal;

3184

mal.push_back(left);

3187

mal.push_back(right);

3190  string

note = to_string(mi.

m_align

->

ID

());

3191  ITERATE

(vector<const SChainMember*>, imal, mal) {

3193  if

((*imal)->m_excluded_readthrough)

3195

note = note+

" "

+star+to_string((*imal)->m_align->ID());

3201

map<TSignedSeqRange,int>& mrna_count, map<TSignedSeqRange,int>& est_count, map<TSignedSeqRange,int>& rnaseq_count) {

3203  for

(

int i

= 1;

i

< (

int

)chain.

Exons

().size() && good; ++

i

) {

3204  if

(chain.

Exons

()[

i

-1].m_ssplice && chain.

Exons

()[

i

].m_fsplice) {

3215

map<TSignedSeqRange,int>& mrna_count, map<TSignedSeqRange,int>& est_count, map<TSignedSeqRange,int>& rnaseq_count) {

3218

(*i)->m_marked_for_deletion = !

GoodSupportForIntrons

(*(*i)->m_align, minscor, mrna_count, est_count, rnaseq_count);

3229  if

(

a

.Limits() !=

b

.Limits())

3230  return a

.Limits() <

b

.Limits();

3235  return

aflex < bflex;

3237  return

*orig_aligns[

a

.ID()]->GetTargetId() < *orig_aligns[

b

.ID()]->GetTargetId();

3243

map<tuple<int, int>, TGeneModelList::iterator> special_aligns;

3245  for

(TGeneModelList::iterator it = clust.begin(); it != clust.end(); ++it) {

3248

special_aligns.emplace(make_tuple(status, it->Limits().GetTo()), it);

3252

special_aligns.emplace(make_tuple(status, it->Limits().GetFrom()), it);

3258  for

(TGeneModelList::iterator it = clust.begin(); it != clust.end(); ++it) {

3269  if

(it->Strand() ==

ePlus

) {

3270

pos = it->Limits().GetFrom();

3274

pos = it->Limits().GetTo();

3279

galign.

Status

() |= status;

3280

clust.push_front(galign);

3281  auto

rslt = special_aligns.emplace(make_tuple(status, pos), clust.begin());

3283  auto

ialign = rslt.first->second;

3284

ialign->SetWeight(ialign->Weight()+galign.

Weight

());

3296  if

(it->Strand() ==

eMinus

) {

3297

pos = it->Limits().GetFrom();

3301

pos = it->Limits().GetTo();

3306

galign.

Status

() |= status;

3307

clust.push_front(galign);

3308  auto

rslt = special_aligns.emplace(make_tuple(status, pos), clust.begin());

3310  auto

ialign = rslt.first->second;

3311

ialign->SetWeight(ialign->Weight()+galign.

Weight

());

3319  for

(

auto

& sa : special_aligns) {

3320  auto

ialign = sa.second;

3321  double

min_pos_weight = ((ialign->Status()&

CGeneModel::eCap

) ? min_cap_weight : min_polya_weight);

3322  if

(ialign->Limits().GetFrom() < 0 || ialign->Limits().GetTo() >= contig_len || ialign->Weight() < min_pos_weight)

3323

clust.erase(ialign);

3329

confirmed_ends.clear();

3330

all_frameshifts.clear();

3333  if

(use_confirmed_ends) {

3335  auto

rslt = confirmed_ends.emplace(align.

Exons

().front().GetTo(), align.

Exons

().front().GetFrom());

3337

rslt.first->second =

min

(rslt.first->second, align.

Exons

().front().GetFrom());

3340  auto

rslt = confirmed_ends.emplace(align.

Exons

().back().GetFrom(), align.

Exons

().back().GetTo());

3342

rslt.first->second =

max

(rslt.first->second, align.

Exons

().back().GetTo());

3345

all_frameshifts.insert(all_frameshifts.end(), align.

FrameShifts

().begin(), align.

FrameShifts

().end());

3346  for

(

int i

= 1;

i

< (

int

)align.

Exons

().size(); ++

i

) {

3347  if

(align.

Exons

()[

i

-1].m_ssplice && align.

Exons

()[

i

].m_fsplice) {

3352

oriented_introns_plus.insert(intron);

3354

oriented_introns_minus.insert(intron);

3358

mrna_count[intron] += align.

Weight

();

3360

est_count[intron] += align.

Weight

();

3362

rnaseq_count[intron] += align.

Weight

();

3367

has_rnaseq = !rnaseq_count.empty();

3368  sort

(all_frameshifts.begin(),all_frameshifts.end());

3369  if

(!all_frameshifts.empty())

3370  uniq

(all_frameshifts);

3381  for

(

int i

= 1;

i

< (

int

)align.

Exons

().size(); ++

i

) {

3382  if

(align.

Exons

()[

i

-1].m_ssplice && align.

Exons

()[

i

].m_fsplice) {

3384  if

(oriented_introns_plus.find(intron) != oriented_introns_plus.end())

3386  if

(oriented_introns_minus.find(intron) != oriented_introns_minus.end())

3390  if

(pluses > 0 && minuses == 0) {

3394

}

else if

(minuses > 0 && pluses == 0) {

3411

CreateFlexibleAligns(clust);

3416

align.SetTrustedCds(align.TrustedCds()&align.Limits());

3418  CChainMembers

allpointers(clust, orig_aligns, unmodified_aligns);

3420

DuplicateNotOriented(allpointers, clust);

3421

ReplicatePStops(allpointers);

3422

ScoreCdnas(allpointers);

3423

Duplicate5pendsAndShortCDSes(allpointers);

3424

DuplicateUTRs(allpointers);

3425

CalculateSpliceWeights(allpointers);

3429

FindOverlappingWithTrusted(allpointers);

3433

FindContainedAlignments(allpointers);

3439  _ASSERT

((*ip)->m_orig_align);

3440  if

(!(*ip)->m_not_for_chaining)

3441

pointers.push_back(*

ip

);

3447

coding_pointers.push_back(*

i

);

3450

LeftRight(coding_pointers);

3451

RightLeft(coding_pointers);

3455

array<map<TSignedSeqPos,TSignedSeqRange>, 2> coding_right_splices;

3456

array<map<TSignedSeqPos,TSignedSeqRange>, 2> coding_left_splices;

3457

array<set<TSignedSeqRange>, 2> coding_introns;

3475  CChain

chain(mi,

false

);

3478  m_gnomon

->GetScore(chain,

false

,

false

,

true

);

3485

(cdslen < minscor.m_minlen || (chain.

Score

() < 2*minscor.m_min && cdslen < 2*minscor.m_cds_len)))

3489  for

(

int i

= 1;

i

< (

int

)chain.

Exons

().size(); ++

i

) {

3491  bool

coding_donor =

Include

(real_cds, donor);

3493  bool

coding_acceptor =

Include

(real_cds, acceptor);

3495

coding_right_splices[chain.

Strand

()][donor].CombineWith(real_cds);

3496  if

(coding_acceptor)

3497

coding_left_splices[chain.

Strand

()][acceptor].CombineWith(real_cds);

3498  if

(coding_donor && coding_acceptor)

3499

coding_introns[chain.

Strand

()].emplace(donor, acceptor);

3519  for

(

auto ip

: pointers) {

3524  int

strand =

ip

->m_align->Strand();

3525  auto

& crs = coding_right_splices[strand];

3526  auto

& cls = coding_left_splices[strand];

3527  for

(

int i

= 1;

i

< (

int

)

ip

->m_align->Exons().size(); ++

i

) {

3529  auto

rslt = crs.find(rsplice);

3531  ip

->m_marked_for_deletion =

true

;

3535

rslt = cls.find(lsplice);

3537  ip

->m_marked_for_deletion =

true

;

3542  if

(!

ip

->m_marked_for_deletion) {

3543  auto

& cdi = coding_introns[strand];

3544  for

(

auto

& exon :

ip

->m_align->Exons()) {

3545  if

(

Include

(cds, exon.Limits()))

3547  for

(

auto

intronp = cdi.upper_bound(exon.Limits()); intronp != cdi.end() && intronp->GetFrom() < exon.GetTo(); ++intronp) {

3548  if

(

Include

(exon.Limits(), *intronp) && !

Include

(cds, *intronp)) {

3549  ip

->m_marked_for_deletion =

true

;

3553  if

(

ip

->m_marked_for_deletion)

3563

set<TSignedSeqRange> introns;

3564

set<TSignedSeqRange> est_introns;

3565  for

(

auto

p : pointers) {

3568  for

(

unsigned i

= 1;

i

< exons.size(); ++

i

) {

3569  if

(!exons[

i

-1].m_ssplice || !exons[

i

].m_fsplice)

3571

introns.emplace(exons[

i

-1].GetTo(), exons[

i

].GetFrom());

3573

est_introns.emplace(exons[

i

-1].GetTo(), exons[

i

].GetFrom());

3576  bool

enough_est = !introns.empty() && est_introns.size() > longreadsthreshold/100*introns.size();

3578

cerr <<

"Introns: "

<< introns.size() <<

" "

<< est_introns.size() <<

" "

<< enough_est << endl;

3580  int

old_oep = intersect_limit;

3582

intersect_limit = 10000;

3584

if(p->m_align->Status()&(CGeneModel::eLeftFlexible|CGeneModel::eRightFlexible))

3586

if(p->m_align->Exons().size() != 1 || p->m_type == eCDS)

3588

if(p->m_copy != nullptr) {

3589

for(SChainMember* cp : *p->m_copy) {

3590

if(cp->m_cds_info->MaxCdsLimits() == TSignedSeqRange::GetWhole())

3595  return true

; }), pointers.end());

3598

LeftRight(pointers);

3599

RightLeft(pointers);

3621  CChain

chain(mi,

false

);

3628

RemovePoorCds(chain, GoodCDNAScore(chain,

true

));

3641

chain.

ClipToCap

(min_cap_blob, max_dist, min_flank_exon, secondary_peak,

false

);

3642

chain.

ClipToPolyA

(contig, min_polya_blob, max_dist, min_flank_exon, secondary_peak, tertiary_peak, tertiary_peak_coverage,

false

);

3647  m_gnomon

->GetScore(chain, !no5pextension);

3650

RemovePoorCds(chain, GoodCDNAScore(chain));

3660

tmp_chains.push_back(chain);

3669

CreateChainsForPartialProteins(tmp_chains, pointers, unma_aligns, unma_members);

3679  for

(

auto i

: allpointers) {

3684  if

(mi.

m_copy

!=

nullptr

) {

3685  for

(

auto

j : *mi.

m_copy

) {

3687  for

(

auto

jc : *j->m_contained) {

3696

pointers.erase(

std::remove_if

(pointers.begin(),pointers.end(),[](

SChainMember

* p){ return p->m_type == eRightUTR; }), pointers.end());

3701

return p->m_align->Exons().size() == 1 && !(p->m_align->Status()&(CGeneModel::eLeftFlexible|CGeneModel::eRightFlexible)); }), pointers.end());

3704

array<set<TSignedSeqRange>, 2> non_coding_introns;

3705  for

(

auto

p : pointers) {

3707  for

(

unsigned i

= 1;

i

< exons.size(); ++

i

) {

3709

non_coding_introns[p->

m_align

->

Strand

()].insert(intron);

3712  for

(

auto

p : pointers) {

3714  auto

& ncdi = non_coding_introns[strand];

3716  for

(

auto

intronp = ncdi.upper_bound(exon.Limits()); intronp != ncdi.end() && intronp->GetFrom() < exon.GetTo(); ++intronp) {

3717  if

(

Include

(exon.Limits(), *intronp)) {

3729

LeftRight(pointers);

3730

RightLeft(pointers);

3751  CChain

chain(mi,

false

);

3759

chain.

ClipToCap

(min_cap_blob, max_dist, min_flank_exon, secondary_peak,

false

);

3760

chain.

ClipToPolyA

(contig, min_polya_blob, max_dist, min_flank_exon, secondary_peak, tertiary_peak, tertiary_peak_coverage,

false

);

3770

tmp_chains.push_back(chain);

3778

chain.

SetID

(m_idnext);

3780

m_idnext += m_idinc;

3783

CombineCompatibleChains(tmp_chains);

3784

SetFlagsForChains(tmp_chains);

3786

intersect_limit = old_oep;

3788

list<CGene> genes = FindGenes(tmp_chains);

3790  if

(genes.size() > 1) {

3791

TrimAlignmentsIncludedInDifferentGenes(genes);

3792

CombineCompatibleChains(tmp_chains);

3793

SetFlagsForChains(tmp_chains);

3796  if

(genes.size() > 1)

3797

FindGenes(tmp_chains);

3801

it->RestoreTrimmedEnds(trim);

3802

chains.push_back(*it);

3807  enum

{ eFirstPeak = 1, eSecondPeak = 2, eThirdPeak = 4, eAs = 8};

3808

map<tuple<int, int, int>,

int

> cap_polya_info;

3810  for

(

auto

& chain : tmp_chains) {

3831  for

(

auto

&

info

: cap_polya_info) {

3833  char

strand = get<1>(

info

.first) ==

ePlus

?

'+'

:

'-'

;

3835

cerr <<

m_contig_acc

<<

' '

<< determinant <<

' '

<< strand <<

' '

<< pos <<

' '

;

3836  if

(

info

.second&eFirstPeak)

3837

cerr <<

":FirstPeak"

;

3838  if

(

info

.second&eSecondPeak)

3839

cerr <<

":SecondPeak"

;

3840  if

(

info

.second&eThirdPeak)

3841

cerr <<

":ThirdPeak"

;

3842  if

(

info

.second&eAs)

3858  return

ap->

ID

() < bp->

ID

();

3865  TIVec

right_ends(pointers.size());

3866

vector<SChainMember> no_gap_members(pointers.size());

3867  for

(

int

k = 0; k < (

int

)pointers.size(); ++k) {

3870

no_gap_members[k] = mi;

3875  int

first_member = (

int

)pointers.size()-1;

3877  for

(

int i

= (

int

)pointers.size()-1;

i

>= 0; --

i

) {

3879  if

(limi.

GetTo

() >= leftpos) {

3887  int

last_member = 0;

3889  for

(

int i

= 0;

i

< (

int

)pointers.size(); ++

i

) {

3891  if

(

Include

(limi,rightpos)) {

3893

rightpos =

max

(rightpos,limi.

GetTo

());

3897  int

fully_connected_right = 0;

3899  for

(

int i

= first_member;

i

<= last_member; ++

i

) {

3904

LRIinit(mi, micontained);

3910  int

part_to_connect = (

int

)parts.size()-1;

3911  while

(part_to_connect >= 0 && ai.

Limits

().

GetFrom

() <= parts[part_to_connect]->Limits().GetFrom())

3914  if

(fully_connected_right > 0 && ai.

Limits

().

GetFrom

() > fully_connected_right)

3917  bool

not_sorted =

true

;

3919  bool

compatible_with_included_parts =

true

;

3920  int

last_included_part = -1;

3921  bool

includes_first_part =

false

;

3922  for

(

int

p = part_to_connect+1; p < (

int

)parts.size(); ++p) {

3929  bool

samestop = (parts[p]->GetCdsInfo().HasStop() == mi.

m_cds_info

->

HasStop

() && (!parts[p]->GetCdsInfo().HasStop() || parts[p]->GetCdsInfo().Stop() == mi.

m_cds_info

->

Stop

()));

3931  if

(compatible && samestop && samefshifts) {

3932

last_included_part = p;

3934

includes_first_part =

true

;

3936

compatible_with_included_parts =

false

;

3942

compatible_with_included_parts =

false

;

3950  if

(!compatible_with_included_parts)

3953  _ASSERT

(part_to_connect < 0 || part_to_connect == (

int

)parts.size()-1 || mi.

m_type

==

eCDS

);

3955  if

(includes_first_part) {

3960

TIVec::iterator

lb

= lower_bound(right_ends.begin(),right_ends.end(),(part_to_connect >= 0 ? parts[part_to_connect]->Limits().GetTo() : ai.

Limits

().

GetFrom

()));

3962  if

(

lb

!= right_ends.end())

3963

jfirst = (

int

)(

lb

-right_ends.begin());

3965  for

(

int

j = jfirst; j <

i

; ++j) {

3981 #define PGAP_PENALTY 120 3989  if

(better_connection) {

3990  if

(trust_compatible) {

4003  double

delta_splice_num;

4004  if

(LRCanChainItoJ(delta_cds, delta_num, delta_splice_num, mi, mj, micontained, not_sorted)) {

4009  bool

trust_compatible =

true

;

4013

trust_compatible =

false

;

4015

trust_compatible =

false

;

4017  bool

better_connection =

false

;

4019

better_connection =

true

;

4021

better_connection = (newcds > mi.

m_left_cds

);

4025

better_connection =

true

;

4028  if

(better_connection) {

4029  if

(trust_compatible) {

4047

better_connection =

false

;

4049

better_connection =

true

;

4050

}

else if

(newcds != mi_no_gap.

m_left_cds

) {

4051

better_connection = (newcds > mi_no_gap.

m_left_cds

);

4054

}

else if

(newnum > mi_no_gap.

m_left_num

) {

4055

better_connection =

true

;

4058  if

(better_connection) {

4059  if

(trust_compatible) {

4099  if

(best_right ==

nullptr

)

4102  _ASSERT

(std::less<SChainMember*>()(best_right, &no_gap_members.front()) || std::less<SChainMember*>()(&no_gap_members.back(), best_right));

4104  if

(!std::less<SChainMember*>()(mp->m_left_member, &no_gap_members.front()) && !std::less<SChainMember*>()(&no_gap_members.back(), mp->m_left_member)) {

4105  SChainMember

* p = pointers[mp->m_left_member-&no_gap_members.front()];

4119  bool operator()

(

const

vector<CGeneModel*>* ap,

const

vector<CGeneModel*>* bp)

4121  const

vector<CGeneModel*>& partsa = *ap;

4122  const

vector<CGeneModel*>& partsb = *bp;

4125  ITERATE

(vector<CGeneModel*>, k, partsa)

4126

align_lena += (*k)->AlignLen();

4129  ITERATE

(vector<CGeneModel*>, k, partsb)

4130

align_lenb += (*k)->AlignLen();

4132  if

(align_lena != align_lenb) {

4133  return

align_lena > align_lenb;

4135  return

*orig_aligns[partsa.front()->ID()]->GetTargetId() < *orig_aligns[partsb.front()->ID()]->GetTargetId();

4144  typedef

map<Int8, vector<CGeneModel*> > TIdChainMembermap;

4145

TIdChainMembermap protein_parts;

4146  for

(

int

k = 0; k < (

int

)pointers_all.size(); ++k) {

4154

vector<vector<CGeneModel*>*> gapped_sorted_protein_parts;

4156

vector<CGeneModel*>& parts =

ip

->second;

4157  if

(parts.size() > 1) {

4159

gapped_sorted_protein_parts.push_back(&parts);

4162  sort

(gapped_sorted_protein_parts.begin(),gapped_sorted_protein_parts.end(),

AlignLenOrder

(orig_aligns));

4165

vector<CGeneModel*>& parts = **

ip

;

4166  Int8 id

= parts.front()->ID();

4168  ITERATE

(vector<CGeneModel*>, k, parts) {

4177  bool

connected =

false

;

4179  if

(k->Continuous() && palign.

Strand

() == k->Strand() && palign.

IsSubAlignOf

(*k)) {

4182

k->AddComment(

"Was connected "

+orig_aligns[palign.

ID

()]->TargetAccession());

4191  for

(

int

k = 0; k < (

int

)pointers_all.size(); ++k) {

4208  bool

compatible =

true

;

4210  if

((mip->

m_align

->

ID

() !=

id

&&

Include

(partp->Limits(),

limits

)) || (partp->Limits().IntersectingWith(

limits

) && !partp->HasCompatibleOverlap(*mip->

m_align

))) {

4211

compatible =

false

;

4219

pointers.push_back(mip);

4222  SChainMember

* best_right = FindOptimalChainForProtein(pointers, parts, palign);

4224  if

(best_right ==

nullptr

)

4229  CChain

chain(*best_right,

false

);

4232  if

(unmodified_aligns.count(

id

)) {

4234

vector<TSignedSeqRange> new_holes;

4235

vector<TSignedSeqRange> remaining_holes;

4236  for

(

int

k = 1; k < (

int

)chain.

Exons

().size(); ++k) {

4241

remaining_holes.push_back(h);

4242  for

(

int

piece_begin = 0; piece_begin < (

int

)unma.

Exons

().size(); ++piece_begin) {

4243  int

piece_end = piece_begin;

4244  for

( ; piece_end < (

int

)unma.

Exons

().size() && unma.

Exons

()[piece_end].m_ssplice; ++piece_end);

4245  if

(unma.

Exons

()[piece_begin].GetFrom() < h.

GetFrom

() && unma.

Exons

()[piece_end].GetTo() > h.

GetTo

()) {

4246

new_holes.push_back(h);

4249

piece_begin = piece_end;

4254  if

(!new_holes.empty()) {

4261

vector<TSignedSeqRange> existed_holes;

4262  for

(

int

k = 1; k < (

int

)unma.

Exons

().size(); ++k) {

4269  for

(

int

k = 1; k < (

int

)palign.

Exons

().size(); ++k) {

4274  bool

connected =

true

;

4275  ITERATE

(vector<TSignedSeqRange>, h, remaining_holes) {

4283  bool

existed =

false

;

4284  ITERATE

(vector<TSignedSeqRange>, h, existed_holes) {

4291  if

(connected || existed) {

4302

unmacl.push_back(unma);

4305

vector<CGeneModel*> unmaparts;

4308

unmaparts.push_back(&(*im));

4311  CChainMembers

unmapointers(unmacl, orig_aligns, unmodified_aligns);

4312

Duplicate5pendsAndShortCDSes(unmapointers);

4316

IncludeInContained(mi, mi);

4319  if

(CanIncludeJinI(mi, mj))

4320

IncludeInContained(mi, mj);

4325  _ASSERT

((*ip)->m_orig_align);

4326

(*ip)->m_mem_id = -(*ip)->m_mem_id;

4327

pointers.push_back(*

ip

);

4331

best_right = FindOptimalChainForProtein(pointers, unmaparts, unma);

4334  bool

present =

false

;

4336

present =

ip

== &mj;

4339  if

(CanIncludeJinI(mi, mj)) {

4340

IncludeInContained(mi, mj);

4346

chain =

CChain

(*best_right,

false

);

4348

unma_aligns.splice(unma_aligns.end(), unmacl);

4370

chain.

ClipToCap

(min_cap_blob, max_dist, min_flank_exon, secondary_peak,

false

);

4371

chain.

ClipToPolyA

(contig, min_polya_blob, max_dist, min_flank_exon, secondary_peak, tertiary_peak, tertiary_peak_coverage,

false

);

4375  m_gnomon

->GetScore(chain, !no5pextension);

4382

chain.

AddComment

(

"Connected "

+orig_aligns[palign.

ID

()]->TargetAccession());

4385

chains.push_back(chain);

4399  int len

= right-left+1;

4401

vector<int> prot_cov[2][3];

4402

prot_cov[0][0].resize(

len

,0);

4403

prot_cov[0][1].resize(

len

,0);

4404

prot_cov[0][2].resize(

len

,0);

4405

prot_cov[1][0].resize(

len

,0);

4406

prot_cov[1][1].resize(

len

,0);

4407

prot_cov[1][2].resize(

len

,0);

4413  for

(

int i

= 0;

i

< (

int

)align.

Exons

().size(); ++

i

) {

4416  for

(

int

j = rf.

GetFrom

(); j <= rf.

GetTo

(); ++j) {

4419

++prot_cov[align.

Strand

()][

abs

(cdstr-jtr)%3][j-left];

4442  bool

allcdnaintrons =

true

;

4444  for

(

int i

= 1;

i

< (

int

)chain.

Exons

().size() && allcdnaintrons; ++

i

) {

4445  if

(chain.

Exons

()[

i

-1].m_ssplice_sig !=

"XX"

&& chain.

Exons

()[

i

].m_fsplice_sig !=

"XX"

) {

4447

allcdnaintrons = (mrna_count[intron]+est_count[intron]+rnaseq_count[intron] > 0);

4451  if

(allcdnaintrons && num >0)

4461  int

rrf_from_proteins = 0;

4464  for

(

int i

= 0;

i

< (

int

)chain.

Exons

().size(); ++

i

) {

4467  for

(

int

j = rf.

GetFrom

(); j <= rf.

GetTo

(); ++j) {

4468  if

(j < left || j > right)

4472  int

frame =

abs

(cdstr-jtr)%3;

4473  if

(jtr >= 0 && prot_cov[chain.

Strand

()][frame][j-left] > 0) {

4475

lrf_from_proteins =

min

(lrf_from_proteins,j);

4477

rrf_from_proteins =

max

(rrf_from_proteins,j);

4501  for

(TChainList::iterator itt = chains.begin(); itt != chains.end(); ++itt) {

4505  for

(TChainList::iterator jt = chains.begin(); jt != chains.end();) {

4506

TChainList::iterator jtt = jt++;

4510  if

(itt != jtt && itt->Strand() == jtt->Strand() && jtt->IsSubAlignOf(*itt) && itt->ReadingFrame().Empty() == jtt->ReadingFrame().Empty()) {

4511  if

(itt->ReadingFrame().NotEmpty()) {

4516  bool

same_frame = (itt->FShiftedLen(

a

,

b

,

false

)-1)%3 == 0;

4524  if

(!

Include

(jtt->MaxCdsLimits(), itt->MaxCdsLimits()))

4534  bool

same_stops =

true

;

4536  if

(

Include

(jtt->Limits(),*istp) && find(jstops.begin(), jstops.end(), *istp) == jstops.end()) {

4537

same_stops =

false

;

4557

support.insert(*

i

);

4558  if

((*i)->m_copy != 0)

4559

support.insert((*i)->m_copy->begin(),(*i)->m_copy->end());

4571  if

(support.insert(*i).second && (

Include

(jlimits, il) || itt->HasCompatibleOverlap(*(*i)->m_align, 1))) {

4572

itt->m_was_combined =

true

;

4573

itt->m_members.push_back(*

i

);

4574  if

((*i)->m_copy != 0)

4575

support.insert((*i)->m_copy->begin(),(*i)->m_copy->end());

4578  if

(itt->m_was_combined) {

4580

itt->CalculateSupportAndWeightFromMembers();

4584

jtt->AddComment(

"Combined with "

+to_string(itt->ID()));

4598  return

minscor.m_min;

4632  if

(algn.

Score

() < minscor)

4636 #define SCAN_WINDOW 49 4650

vector<double> coverage_raw(mrna_len+

SCAN_WINDOW

);

4665

m_coverage.resize(mrna_len);

4669  for

(

int i

= 0;

i

< mrna_len; ++

i

) {

4670

m_coverage[

i

] = cov;

4676 CChain::CChain

(

SChainMember

& mbr,

bool

full_support) : m_coverage_drop_left(-1), m_coverage_drop_right(-1), m_coverage_bump_left(-1), m_coverage_bump_right(-1), m_core_coverage(0), m_splice_weight(0), m_cap_peaks(3, -1), m_polya_peaks(3, -1), m_was_combined(

false

) {

4697  int

num = other_exons.size();

4702  first

= other_exons.front().GetTo() >= exons.back().GetTo() ? 0 : 1;

4704  first

= std::lower_bound(other_exons.begin(), other_exons.end(), exons.back().GetTo(), [](

const CModelExon

& e,

TSignedSeqPos a

) { return e.GetTo() < a; })-other_exons.begin();

4705

exons.back().Extend(other_exons[

first

]);

4706

exons.insert(exons.end(), other_exons.begin()+

first

+1, other_exons.end());

4720  int

num = other_exons.size();

4722  if

(other_exons.back().GetTo() < exons.front().GetFrom()) {

4723

exons.insert(exons.begin(), other_exons.begin(), other_exons.end());

4725

exons[num].m_fsplice =

false

;

4726

exons[num].m_fsplice_sig.clear();

4727

exons[num-1].m_ssplice =

false

;

4728

exons[num-1].m_ssplice_sig.clear();

4732  if

(cds.

GetFrom

() > exons[num].GetFrom())

4733

exons[num].Limits().SetFrom(cds.

GetFrom

());

4736  if

(other_cds.

GetTo

() < exons[num-1].GetTo())

4737

exons[num-1].Limits().SetTo(other_cds.

GetTo

());

4743  first

= other_exons.back().GetFrom() <= exons.front().GetFrom() ? 1 : 0;

4745  first

= std::lower_bound(other_exons.begin(), other_exons.end(), exons.front().GetFrom(), [](

const CModelExon

& e,

TSignedSeqPos a

) { return e.GetTo() < a; })-other_exons.begin();

4746

exons.front().Limits().SetFrom(other_exons[

first

].GetFrom());

4747  if

(other_exons[

first

].m_fsplice) {

4748

exons.front().m_fsplice =

true

;

4749

exons.front().m_fsplice_sig = other_exons[

first

].m_fsplice_sig;

4751

exons.insert(exons.begin(), other_exons.begin(), other_exons.begin()+

first

);

4764  m_exons

.assign(exons.begin(), exons.end());

4767  m_exons

.front().m_fsplice =

false

;

4768  m_exons

.front().m_fsplice_sig.clear();

4769  m_exons

.back().m_ssplice =

false

;

4770  m_exons

.back().m_ssplice_sig.clear();

4783 CChain::CChain

(

SChainMember

& mbr,

CGeneModel

* gapped_helper,

bool

keep_all_evidence,

bool

addallsupport) : m_coverage_drop_left(-1), m_coverage_drop_right(-1), m_coverage_bump_left(-1), m_coverage_bump_right(-1), m_core_coverage(0), m_splice_weight(0), m_cap_peaks(3, -1), m_polya_peaks(3, -1), m_was_combined(

false

)

4789

list<CGeneModel> extened_parts;

4790

vector<CGeneModel*> extened_parts_and_gapped;

4791  if

(gapped_helper != 0) {

4792

extened_parts_and_gapped.push_back(gapped_helper);

4810

extened_parts.push_back(align);

4811  _ASSERT

(extened_parts.back().Continuous());

4812

extened_parts_and_gapped.push_back(&extened_parts.back());

4814

extened_parts.back().Extend(align,

false

);

4815  _ASSERT

(extened_parts.back().Continuous());

4818  if

(left < extened_parts.front().Limits().GetFrom())

4819

extened_parts.front().ExtendLeft(extened_parts.front().Limits().GetFrom()-left);

4820  if

(right > extened_parts.back().Limits().GetTo())

4821

extened_parts.back().ExtendRight(right-extened_parts.back().Limits().GetTo());

4824  EStrand

strand = extened_parts_and_gapped.front()->Strand();

4827  sort

(extened_parts_and_gapped.begin(),extened_parts_and_gapped.end(),

AlignSeqOrder

());

4828  ITERATE

(vector<CGeneModel*>, it, extened_parts_and_gapped) {

4851

vector<double> coverage_raw(mrna_len+

SCAN_WINDOW

);

4870  for

(

int i

= 0;

i

< mrna_len; ++

i

) {

4880  if

(!align->

TrustedProt

().empty() || (!align->

TrustedmRNA

().empty() && (*i)->m_cds_info->ProtReadingFrame().NotEmpty())) {

4882  if

(align->

AlignLen

() > 0.5*(*i)->m_orig_align->TargetLen())

4892

map<Int8,int> exonnum;

4897

exonnum[align.

ID

()] += align.

Exons

().size();

4901  if

(it->second >= (

int

)orig_aligns[it->first]->Exons().size()) {

4921  bool NewIsBetter

(

int

new_count,

int

new_not_wanted_count,

int

new_matches_count,

int

new_mem_id)

const

{

4929  return

new_mem_id < mem_id;

4939  int

amem_id =

a

.m_member !=

nullptr

?

a

.m_member->m_mem_id : 0;

4940  return

amem_id < mem_id;

4975  if

(ai->

Ident

() > 0.)

4976

matches = ai->

Ident

()*matches+0.5;

4977  bool

not_wanted =

false

;

4983

alimits.

SetTo

(

min

(alimits.

GetTo

(), exonp->Limits().GetTo()));

5007

linkers.push_back(sl);

5012

set<TSignedSeqRange> chain_introns;

5013  for

(

int i

= 1;

i

< (

int

)

Exons

().size(); ++

i

) {

5014  if

(

Exons

()[

i

-1].m_ssplice &&

Exons

()[

i

].m_fsplice)

5018  for

(

SLinker

& sl : linkers) {

5023  bool

all_introns_included =

true

;

5024  for

(

int i

= 1; all_introns_included &&

i

< (

int

)unma.

Exons

().size(); ++

i

) {

5025  if

(unma.

Exons

()[

i

-1].m_ssplice && unma.

Exons

()[

i

].m_fsplice)

5028  if

(!all_introns_included) {

5029

sl.m_not_wanted =

true

;

5034

sl.m_not_wanted =

true

;

5037  bool

all_introns_included =

true

;

5038  for

(

int i

= 1; all_introns_included &&

i

< (

int

)orig_align.

Exons

().size(); ++

i

) {

5039  if

(orig_align.

Exons

()[

i

-1].m_ssplice && orig_align.

Exons

()[

i

].m_fsplice)

5040

all_introns_included = chain_introns.count(

TSignedSeqRange

(orig_align.

Exons

()[

i

-1].GetTo(),orig_align.

Exons

()[

i

].GetFrom()));

5042  if

(!all_introns_included) {

5043

sl.m_not_wanted =

true

;

5058

linkers.push_back(sl);

5065  for

(

int i

= 1;

i

< (

int

)

Exons

().size(); ++

i

) {

5066  if

(!

Exons

()[

i

-1].m_ssplice || !

Exons

()[

i

].m_fsplice) {

5070

linkers.push_back(sl);

5077

multimap<TSignedSeqPos, SLinker*> right_ends;

5078  for

(

auto

& linker : linkers)

5079

right_ends.emplace(linker.m_range.GetTo(), &linker);

5081  for

(

auto

it = right_ends.begin(); it != right_ends.end(); ++it) {

5089

}

else if

(it != right_ends.begin()) {

5093  bool

divided_pstop =

false

;

5105

new_not_wanted_count += sli.

m_value

;

5108  if

(!sli.

m_connected

|| sli.

NewIsBetter

(new_count, new_not_wanted_count, new_matches_count, new_mem_id)) {

5116  if

(jt == right_ends.begin())

5123  for

(

int i

= 0;

i

< (

int

)linkers.size(); ++

i

) {

5134  for

(

SLinker

*

l

= best_right;

l

!= 0;

l

=

l

->m_left) {

5136

sp_core.

insert

(

l

->m_member->m_align->ID());

5140  if

(!keep_all_evidence) {

5141  for

(

int i

= 0;

i

< (

int

)linkers.size(); ++

i

) {

5162  if

(!sp_not_wanted.count(

id

)) {

5166  if

(sp.

insert

(

id

).second) {

5175

protreadingframe &= readingframe;

5187  for

(

int

ia = 0; ia < (

int

)

m_members

.size(); ++ia) {

5190  a

.Exons().size() > 1 &&

Exons

().front().Limits().GetFrom() ==

a

.Limits().GetFrom()) {

5198  for

(

int

ia = 0; ia < (

int

)

m_members

.size(); ++ia) {

5201  a

.Exons().size() > 1 &&

Exons

().back().Limits().GetTo() ==

a

.Limits().GetTo()) {

5213  bool

found_length_match =

false

;

5221

map<string, pair<bool,bool> >::iterator iter = prot_complet.find(accession);

5222  _ASSERT

(iter != prot_complet.end());

5223  if

(iter == prot_complet.end() || !iter->second.first || !iter->second.second)

5227

found_length_match =

true

;

5232  if

(!found_length_match) {

5249  bool

trusted =

false

;

5261  if

(

a

< 0 ||

b

< 0 ||

abs

(

a

-

b

) != 2)

5271

list<TSignedSeqRange> align_introns;

5272  for

(

int i

= 1;

i

< (

int

)align.

Exons

().size(); ++

i

) {

5275

align_introns.push_back(intron);

5278

list<TSignedSeqRange> introns;

5281  for

(

int i

= 1;

i

< (

int

)

Exons

().size(); ++

i

) {

5283  if

(

Include

(start,intron)) {

5284

introns.push_back(intron);

5286  if

(!

Exons

()[

i

-1].m_ssplice || !

Exons

()[

i

].m_fsplice)

5291  if

(

len

!=3 || hole || align_introns != introns)

5296  bool

found =

false

;

5297  for

(

int i

= 0;

i

< (

int

)

Exons

().size() && !found; ++

i

) {

5299  if

(

Exons

()[

i

].Limits().GetTo() > start.

GetTo

()) {

5300

rf = start.

GetTo

()+1;

5302

}

else if

(

i

!= (

int

)

Exons

().size()-1) {

5303

rf =

Exons

()[

i

+1].Limits().GetFrom();

5316  bool

found =

false

;

5317  for

(

int i

= 0;

i

< (

int

)

Exons

().size() && !found; ++

i

) {

5319  if

(

Exons

()[

i

].Limits().GetFrom() < start.

GetFrom

()) {

5322

}

else if

(

i

!= 0) {

5323

rf =

Exons

()[

i

-1].Limits().GetTo();

5338  if

(conf_start.

Empty

()) {

5382

reading_frame.

SetTo

(rf);

5394

cds.

SetStop

(stop,confirmed_stop);

5396  if

(

Include

(reading_frame, *s))

5402  for

(

int i

= 1;

i

< (

int

)

Exons

().size(); ++

i

) {

5403  if

(!

Exons

()[

i

-1].m_ssplice || !

Exons

()[

i

].m_fsplice) {

5405  if

(

Precede

(hole,reading_frame)) {

5407

}

else if

(

Precede

(reading_frame,hole)) {

5413  if

(new_lim !=

Limits

())

5417

gnomon.

GetScore

(*

this

,

false

, trusted);

5455  auto

ai = (*it)->m_align;

5457  if

(

limits

.IntersectingWith(alimits))

5458

new_members.push_back(*it);

5474  if

(limits_on_mrna.

GetFrom

() > 0)

5481  bool

changed =

false

;

5487  if

(cds.

PStop

()) {

5489  for

(

auto

& pstop : cds.

PStops

()) {

5491

pstops.push_back(pstop);

5493  if

(pstops.size() != cds.

PStops

().size()) {

5495  for

(

auto

& pstop : pstops)

5510  if

(recalulate_support)

5518  auto

old_limits =

Limits

();

5519  auto

new_limits = old_limits;

5520  bool

left_confirmed =

false

;

5521  bool

right_confirmed =

false

;

5524  if

(rslt != confirmed_ends.

end

() && rslt->second <

Exons

().

front

().GetTo()) {

5525

left_confirmed =

true

;

5526

new_limits.SetFrom(rslt->second);

5528

rslt = confirmed_ends.

find

(

Exons

().back().GetFrom());

5529  if

(rslt != confirmed_ends.

end

() && rslt->second >

Exons

().back().GetFrom()) {

5530

right_confirmed =

true

;

5531

new_limits.SetTo(rslt->second);

5534  if

(!left_confirmed && !right_confirmed)

5546  if

(new_limits.GetFrom() < old_limits.GetFrom()) {

5547  int delta

= old_limits.GetFrom()-new_limits.GetFrom();

5552  if

(new_limits.GetTo() > old_limits.GetTo()) {

5553  int delta

= new_limits.GetTo()-old_limits.GetTo();

5564  if

(

i

->Loc() > new_limits.GetFrom() &&

i

->InDelEnd() < new_limits.GetTo())

5578  if

(

Limits

() != new_limits)

5586  if

(left_confirmed) {

5588  if

(new_limits.GetFrom() < old_limits.GetFrom())

5590  else if

(new_limits.GetFrom() > old_limits.GetFrom())

5593  if

(right_confirmed) {

5595  if

(new_limits.GetTo() > old_limits.GetTo())

5596  AddComment

(

"Extended to confirmed right"

);

5597  else if

(new_limits.GetTo() < old_limits.GetTo())

5604  if

(!

Include

(new_limits, cds_info.

Cds

()) || (left_confirmed && !left_complete) || (right_confirmed && !right_complete)) {

5611  for

(

int i

= cds_limits_t.GetFrom();

i

<= cds_limits_t.GetTo(); ++

i

) {

5616  for

(

int i

= cds_limits_t.GetTo();

i

>= cds_limits_t.GetFrom(); --

i

) {

5617

cds_limits_t.SetTo(

i

);

5621  if

(cds_limits_t.Empty())

5623

cds_info_t.Clip(cds_limits_t);

5626  bool

fivep_confirmed = (

Strand

() ==

ePlus

) ? left_confirmed : right_confirmed;

5627  bool

threep_confirmed = (

Strand

() ==

ePlus

) ? right_confirmed : left_confirmed;

5628  bool

has_start = cds_info_t.HasStart();

5629  bool

has_stop = cds_info_t.HasStop();

5630  auto

prot_rf = cds_info_t.ProtReadingFrame();

5631  if

(prot_rf.NotEmpty() && ((fivep_confirmed && !has_start) || (threep_confirmed && !has_stop))) {

5636  auto

IndelInCodon = [

this

](

int i

,

CAlignMap

& map) {

5637  int a

= map.MapEditedToOrig(

i

);

5638  int b

= map.MapEditedToOrig(

i

+2);

5641  return

(

a

< 0 ||

b

< 0 || map.MapEditedToOrig(

i

+1) < 0 || !

GetInDels

(

a

,

b

,

false

).empty());

5644  if

(fivep_confirmed && !has_start) {

5645  for

(

int i

= prot_rf.GetFrom(); !has_start &&

i

>= cds_limits_t.GetFrom() && !

IsStopCodon

(&mrna[

i

]);

i

-= 3) {

5646

has_start =

IsStartCodon

(&mrna[

i

]) && !IndelInCodon(

i

, amap);

5648  for

(

int i

= prot_rf.GetFrom(); !has_start &&

i

< cds_limits_t.GetTo();

i

+= 3) {

5651

has_start =

IsStartCodon

(&mrna[

i

]) && !IndelInCodon(

i

, amap);

5652

cds_limits_t.SetFrom(

i

);

5657  if

(threep_confirmed && !has_stop) {

5658  for

(

int i

= prot_rf.GetTo()+1; !has_stop &&

i

< cds_limits_t.GetTo();

i

+= 3)

5659

has_stop =

IsStopCodon

(&mrna[

i

]) && !IndelInCodon(

i

, amap);

5660  if

(!has_stop && cds_info_t.PStop(

false

)) {

5662  sort

(pstops.begin(), pstops.end());

5663  for

(

auto

& stp : pstops) {

5666

cds_limits_t.SetTo(stp.GetFrom()-1);

5674  if

(cds_limits_t.Empty())

5677

cds_info_t.Clip(cds_limits_t);

5684  auto

cds = cds_info.

Cds

();

5707  string

motif1 =

"AATAAA"

;

5708  string

motif2 =

"ATTAAA"

;

5709  string

motif3 =

"AGTAAA"

;

5710  int

block_of_As_len = 6;

5713

block_of_As.assign(block_of_As_len,

'A'

);

5715

block_of_As.assign(block_of_As_len,

'T'

);

5717  int a

=

max

(0, pos-block_of_As_len);

5718  int b

=

min

((

int

)contig.size()-1, pos+block_of_As_len);

5719  if

(

b

-

a

+1 < block_of_As_len)

5720  return

make_pair(

false

,

false

);

5721  if

(search(contig.begin()+

a

, contig.begin()+

b

+1, block_of_As.begin(), block_of_As.end()) != contig.begin()+

b

+1) {

5731  if

(left < 0 || right >= (

int

)contig.size())

5732  return

make_pair(

false

,

false

);

5734  string

segment(contig.begin()+left, contig.begin()+right+1);

5738  if

(segment.find(motif1) != string::npos || segment.find(motif2) != string::npos || segment.find(motif3) != string::npos)

5739  return

make_pair(

true

,

true

);

5741  return

make_pair(

false

,

true

);

5743  return

make_pair(

true

,

false

);

5747 #define MIN_UTR_EXON 15 5760  if

(align.

Status

()&determinant) {

5764  bool

belong_to_exon =

false

;

5766  for

(

auto

& exon :

Exons

()) {

5767  if

(pos >= exon.Limits().GetFrom()+min_splice_dist && pos <= exon.Limits().GetTo()) {

5768

belong_to_exon =

true

;

5772  if

(rlimit < pos && belong_to_exon)

5777  bool

belong_to_exon =

false

;

5779  for

(

auto

& exon :

Exons

()) {

5780  if

(pos >= exon.Limits().GetFrom() && pos <= exon.Limits().GetTo()-min_splice_dist) {

5781

belong_to_exon =

true

;

5785  if

(llimit > pos && belong_to_exon)

5794  if

(raw_weights.

empty

())

5795  return

make_tuple(peak_weights,real_limits);

5797  int

last_allowed = right_end ? real_limits.

GetTo

()+flex_len : -(real_limits.

GetFrom

()-flex_len);

5798  auto

ipeak = raw_weights.

begin

();

5799  double

w = ipeak->second;

5800  for

(

auto

it =

next

(raw_weights.

begin

()); it != raw_weights.

end

(); ++it) {

5801  if

(it->first >

prev

(it)->first+1+max_empty_dist) {

5802  if

(ipeak->first > last_allowed)

5804  if

(w >= min_blob_weight) {

5805  auto

still_good = ipeak;

5806  for

(

auto i

= ipeak;

i

!= it &&

i

->first <= last_allowed; ++

i

) {

5807  if

(

i

->second >= 0.5*ipeak->second)

5810

peak_weights.emplace(still_good->first, w);

5816  if

(it->second > ipeak->second)

5820  if

(ipeak->first <= last_allowed && w >= min_blob_weight) {

5821  auto

still_good = ipeak;

5822  for

(

auto i

= ipeak;

i

!= raw_weights.

end

() &&

i

->first <= last_allowed; ++

i

) {

5823  if

(

i

->second >= 0.5*ipeak->second)

5826

peak_weights.emplace(still_good->first, w);

5829  return

make_tuple(peak_weights,real_limits);

5832

tuple<TIVec, TSignedSeqRange>

CChain::MainPeaks

(

TIDMap

& peak_weights,

double

secondary_peak,

double

tertiary_peak,

double

tertiary_peak_coverage,

bool

right_end) {

5836

peaks[0] =

abs

(ifirst_peak->first);

5838  int

first_peak = ifirst_peak->first;

5839  limits

.SetTo(first_peak);

5842  int

first_peak = -ifirst_peak->first;

5843  limits

.SetFrom(first_peak);

5847  auto

isecond_peak =

prev

(peak_weights.

end

());

5848  for

( ; isecond_peak != ifirst_peak && isecond_peak->second < secondary_peak*ifirst_peak->second; --isecond_peak);

5849  if

(isecond_peak != ifirst_peak)

5850

peaks[1] =

abs

(isecond_peak->first);

5852  if

(tertiary_peak > 0) {

5855  if

(genome_core_lim.

Empty

()) {

5856

genome_core_lim =

Limits

();

5866  double

core_coverage = 0;

5870

core_coverage /= core_lim.

GetLength

();

5873  for

(

auto

& exon :

Exons

()) {

5874  if

(

Include

(exon.Limits(),

abs

(ifirst_peak->first))) {

5875

fpeak_exon = exon.Limits();

5880  auto

ithird_peak =

prev

(peak_weights.

end

());

5881  for

( ; ithird_peak != isecond_peak; --ithird_peak) {

5882  if

(

Include

(fpeak_exon,

abs

(ithird_peak->first))) {

5886  if

(ithird_peak->second >= tertiary_peak*ifirst_peak->second &&

m_coverage

[p] > tertiary_peak_coverage*core_coverage)

5890  if

(ithird_peak != isecond_peak)

5891

peaks[2] =

abs

(ithird_peak->first);

5892

isecond_peak = ithird_peak;

5895  if

(isecond_peak != ifirst_peak) {

5897  int

second_peak = isecond_peak->first;

5898  limits

.SetTo(second_peak);

5900  int

second_peak = -isecond_peak->first;

5901  limits

.SetFrom(second_peak);

5905  return

make_tuple(peaks,

limits

);

5908 void CChain::ClipToCap

(

int

min_cap_blob,

int

max_dist,

int

min_flank_exon,

double

secondary_peak,

bool

recalulate_support) {

5920  TIDMap

& peak_weights(get<0>(rslt));

5922  if

(peak_weights.

empty

()) {

5925  if

(right_end && real_limits.

GetTo

() <

Limits

().GetTo())

5927  else if

(!right_end && real_limits.

GetFrom

() >

Limits

().GetFrom())

5948  auto

rslt1 =

MainPeaks

(peak_weights, secondary_peak, 0., 0., right_end);

5956 void CChain::ClipToPolyA

(

const CResidueVec

& contig,

int

min_polya_blob,

int

max_dist,

int

min_flank_exon,

double

secondary_peak,

double

tertiary_peak,

double

tertiary_peak_coverage,

bool

recalulate_support) {

5968  TIDMap

& peak_weights(get<0>(rslt));

5971  for

(

auto

ip_loop = peak_weights.

begin

(); ip_loop != peak_weights.

end

(); ) {

5972  auto ip

= ip_loop++;

5977  if

(peak_weights.

empty

()) {

5980  if

(right_end && real_limits.

GetTo

() <

Limits

().GetTo())

5982  else if

(!right_end && real_limits.

GetFrom

() >

Limits

().GetFrom())

6003  auto

rslt1 =

MainPeaks

(peak_weights, secondary_peak, tertiary_peak, tertiary_peak_coverage, right_end);

6021 #define COVERAGE_DROP 0.1 6022 #define COVERAGE_BUMP 3 6023 #define SMALL_GAP_UTR 100 6047

genome_core_lim =

Limits

();

6059  _ASSERT

((

int

)coverage.size() == mrna_len && core_lim.

GetFrom

() >= 0 && core_lim.

GetTo

() < mrna_len);

6061  double

core_coverage = 0;

6063

core_coverage += coverage[

i

];

6065

core_coverage /= core_lim.

GetLength

();

6068  if

(core_lim.

GetFrom

() <= 0 && core_lim.

GetTo

() >= mrna_len-1)

6075

vector<double> longseq_coverage(mrna_len);

6081  if

(overlap.

Empty

())

6084  for

(

int i

= 1;

i

< (

int

)align.

Exons

().size(); ++

i

) {

6085  if

(align.

Exons

()[

i

-1].m_ssplice && align.

Exons

()[

i

].m_fsplice && align.

Exons

()[

i

-1].m_ssplice_sig !=

"XX"

&& align.

Exons

()[

i

].m_fsplice_sig !=

"XX"

) {

6087  bool

valid_intron =

false

;

6088  for

(

int

j = 1; j < (

int

)

Exons

().size() && !valid_intron; ++j) {

6089  if

(

Exons

()[j-1].m_ssplice &&

Exons

()[j].m_fsplice) {

6091

valid_intron = (intr == jntr);

6110  for

(

int i

= overlap_on_mrna.

GetFrom

();

i

<= overlap_on_mrna.

GetTo

(); ++

i

)

6111

longseq_coverage[

i

] += align.

Weight

();

6121

longseq_coverage[

i

] = 0;

6128

longseq_coverage[

i

] = 0;

6132  double

core_inron_coverage = 0;

6133  int

core_introns = 0;

6134  for

(

int i

= 1;

i

< (

int

)

Exons

().size(); ++

i

) {

6135  if

(

Exons

()[

i

-1].m_ssplice &&

Exons

()[

i

].m_fsplice) {

6141  if

(

Include

(core_lim, intron)) {

6143

core_inron_coverage += intron_coverage[intron];

6147  if

(core_introns > 0)

6148

core_inron_coverage /= core_introns;

6150

core_inron_coverage = 0.5*core_coverage;

6155  int

left_limit = core_lim.

GetFrom

();

6156  int

right_limit = core_lim.

GetTo

();

6157  int len

= right_limit-left_limit+1;

6159  for

(

int i

= left_limit;

i

<= right_limit; ++

i

)

6160

wlen += coverage[

i

];

6162  while

(left_limit > 0 && (longseq_coverage[left_limit] > 0 ||

6163

(coverage[left_limit] >

max

(core_coverage,wlen/

len

)*utr_clip_threshold &&

6164

(intron_coverage.

find

(left_limit-1) == intron_coverage.

end

() || intron_coverage[left_limit-1] > core_inron_coverage*utr_clip_threshold)))) {

6168

wlen += coverage[left_limit];

6171  if

(left_limit > 0) {

6185  int

right_limit = core_lim.

GetTo

();

6186  int

left_limit = core_lim.

GetFrom

();

6187  int len

= right_limit-left_limit+1;

6189  for

(

int i

= left_limit;

i

<= right_limit; ++

i

)

6190

wlen += coverage[

i

];

6192  while

(right_limit < mrna_len-1 && (longseq_coverage[right_limit] > 0 ||

6193

(coverage[right_limit] > wlen/

len

*utr_clip_threshold &&

6194

(intron_coverage.

find

(right_limit) == intron_coverage.

end

() || intron_coverage[right_limit] > core_inron_coverage*utr_clip_threshold)))) {

6198

wlen += coverage[right_limit];

6201  if

(right_limit < mrna_len-1) {

6223  if

(fivep_confirmed && threep_confirmed)

6230

vector<double> longseq_coverage(mrna_len);

6236  if

(overlap.

Empty

())

6242  for

(

int i

= overlap_on_mrna.

GetFrom

();

i

<= overlap_on_mrna.

GetTo

(); ++

i

)

6243

longseq_coverage[

i

] += align.

Weight

();

6269  if

(!fivep_confirmed) {

6270  int

left_limit = soft_limit.

GetFrom

();

6271  int

first_bump = -1;

6276

first_bump = left_limit;

6281  if

(first_bump > 0) {

6288  int

first_drop = left_limit;

6290  for

( ; first_drop-

SCAN_WINDOW

/2 > 0; --first_drop) {

6305  if

(!threep_confirmed) {

6306  int

right_limit = soft_limit.

GetTo

();

6307  int

first_bump = -1;

6312

first_bump = right_limit;

6316  if

(first_bump > 0) {

6323  int

first_drop = right_limit;

6325  for

( ; first_drop < mrna_len-

SCAN_WINDOW

/2; ++first_drop) {

6347

map<TSignedSeqRange,double> intron_coverage;

6348

vector<double> coverage(mrna_len);

6354  if

(overlap.

Empty

())

6359  for

(

int i

= overlap_on_mrna.

GetFrom

();

i

<= overlap_on_mrna.

GetTo

(); ++

i

)

6360

coverage[

i

] += align.

Weight

();

6363  for

(

int i

= 1;

i

< (

int

)align.

Exons

().size(); ++

i

) {

6364  if

(align.

Exons

()[

i

-1].m_ssplice_sig !=

"XX"

&& align.

Exons

()[

i

].m_fsplice_sig !=

"XX"

) {

6373  double

maxintroncount = 0;

6375

minintroncount =

min

(minintroncount,it->second);

6376

maxintroncount =

max

(maxintroncount,it->second);

6378  if

(minintroncount < 0.1*maxintroncount)

6381

vector<int> dips(mrna_len,0);

6382  double

maxsofar = 0;

6383  for

(

int i

= 0;

i

< mrna_len; ++

i

) {

6384  if

(coverage[

i

] < 0.1*maxsofar)

6386

maxsofar =

max

(maxsofar,coverage[

i

]);

6388  for

(

int i

= 0;

i

< (

int

)

Exons

().size(); ++

i

) {

6389  if

(

Exons

()[

i

].m_fsplice_sig ==

"XX"

||

Exons

()[

i

].m_ssplice_sig ==

"XX"

) {

6397  for

(

int i

= mrna_len-1;

i

>= 0; --

i

) {

6398  if

(coverage[

i

] < 0.1*maxsofar && dips[

i

] > 0)

6400

maxsofar =

max

(maxsofar,coverage[

i

]);

6403  if

(intron_coverage.size() > 1)

6412  bool

setconfstart =

false

;

6413  bool

setconfstop =

false

;

6418  if

((*i)->m_align->GetCdsInfo().ProtReadingFrame().Empty())

6421  if

((*i)->m_align->Type() &

emRNA

) {

6423

setconfstart =

true

;

6425

setconfstop =

true

;

6432

map<string, pair<bool,bool> >::iterator iter = prot_complet.find(accession);

6433  _ASSERT

(iter != prot_complet.end());

6434  if

(iter == prot_complet.end())

6439  if

((*i)->m_align->Strand() ==

eMinus

)

6440  swap

(fivep_exon,threep_exon);

6443

iter->second.first &&

Include

(

Limits

(),(*i)->m_align->Limits())) {

6451

setconfstart =

true

;

6458

iter->second.second &&

Include

(

Limits

(),(*i)->m_align->Limits())) {

6466

setconfstop =

true

;

6475  double

score = cds_info.

Score

();

6477

score +=

max

(1.,0.3*score);

6478

cds_info.

SetScore

(score,

false

);

6482

cds_info.

SetScore

(score,

false

);

6499  typedef

map<Int8, set<TSignedSeqRange> > Tint8range;

6504  if

(!(*i)->m_align->TrustedProt().empty()) {

6506  if

((*i)->m_mem_id > 0)

6507

aexons[(*i)->m_align->ID()].insert(e->

Limits

());

6509

uexons[(*i)->m_align->ID()].insert(e->

Limits

());

6512  else if

(!(*i)->m_align->TrustedmRNA().empty() && (*i)->m_align->ConfirmedStart() && (*i)->m_align->ConfirmedStop())

6516  typedef

map<Int8, int> Tint8int;

6517

Tint8int palignedlen;

6520  ITERATE

(set<TSignedSeqRange>, e,

i

->second)

6521  len

+= e->GetLength();

6522

palignedlen[

i

->first] =

len

;

6526  ITERATE

(set<TSignedSeqRange>, e,

i

->second)

6527  len

+= e->GetLength();

6528

palignedlen[

i

->first] =

max

(

len

,palignedlen[

i

->first]);

6532  ITERATE

(Tint8int,

i

, palignedlen) {

6543  if

(ie->m_fsplice_sig ==

"XX"

|| ie->m_ssplice_sig ==

"XX"

)

6544

gap_cds += (cds&ie->Limits()).

GetLength

();

6549  ITERATE

(Tint8int,

i

, palignedlen) {

6551  if

(

i

->second+gap_cds > 0.8*orig_align->

TargetLen

()) {

6553  string

tprotein(protein_seqvec.

begin

(),protein_seqvec.

end

());

6554  CCigar

cigar =

LclAlign

(mprotein.c_str(), (

int

)mprotein.size(), tprotein.c_str(), (

int

)tprotein.size(), 10, 1,

delta

.matrix);

6603  return

make_pair(

a

.TargetAccession(), 0);

6615  catch

(sequence::CSeqIdFromHandleException&) {

6617  return

make_pair(

a

.TargetAccession(), 0);

6632  if

(a_acc.second != b_acc.second)

6633  return

a_acc.second > b_acc.second;

6635  int

a_stt =

a

->HasStart()+

a

->HasStop();

6636  int

b_stt =

b

->HasStart()+

b

->HasStop();

6638  return

a_stt > b_stt;

6643  return

a_len > b_len;

6648  return a

->ID() <

b

->ID();

6667  m_data

->FilterOutChimeras(clust);

6672  typedef

map<int,TGeneModelClusterSet> TClustersByStrand;

6673

TClustersByStrand trusted_aligns;

6676  if

(it->Continuous() && (!it->TrustedmRNA().empty() || !it->TrustedProt().empty())

6677

&& orig_align !=

nullptr

&& it->

AlignLen

() > minscor.m_minprotfrac*orig_align->

TargetLen

()) {

6678

trusted_aligns[it->Strand()].Insert(*it);

6685  for

(

auto

& strand_vec : trusted_aligns) {

6686  int

strand = strand_vec.first;

6689

tgr_info[strand].emplace_back();

6690  auto

&

last

= tgr_info[strand].back();

6691  last

.m_limits = cls.Limits();

6694  for

(

auto

& e : talign.Exons()) {

6701  for

(

auto

& cref : talign.TrustedProt())

6703  for

(

auto

& cref : talign.TrustedmRNA())

6708  last

.m_cds_limits += tcds;

6709  CAlignMap

tmap(talign.Exons(), talign.FrameShifts(), talign.Strand(), tcds);

6711  for

(

auto

& e : talign.Exons()) {

6712  auto

overlap = (e.

Limits

()&tcds);

6713  for

(

TSignedSeqPos

k = overlap.GetFrom(); k <= overlap.GetTo(); ++k) {

6715  if

(p >= 0 && p%3 == 0) {

6716  _ASSERT

(p/3 < (

int

)

last

.m_cds_maps.back().size());

6717  last

.m_cds_maps.back()[p/3] = k;

6726  for

(

auto

& ts : tgr_info) {

6727  char

strand = ts.first ==

ePlus

?

'+'

:

'-'

;

6729  for

(

int i

= 0;

i

< (

int

)spl.size(); ++

i

) {

6733

trusted_group_cds[tgr] = spl[

i

].m_cds_limits;

6734  for

(

auto

&

id

: spl[

i

].m_ids)

6735

cerr <<

"TGR: "

<< strand <<

" "

<< tgr <<

" "

<<

id

.first <<

" "

<<

id

.second << endl;

6739  for

(TGeneModelList::iterator it_loop = clust.begin(); it_loop != clust.end(); ) {

6740

TGeneModelList::iterator ita = it_loop++;

6746  int

strand = align.

Strand

();

6747  if

(!tgr_info.count(strand))

6753  auto

second = upper_bound(tinfo.begin(), tinfo.end(), align.

Limits

().

GetTo

(),

6755  if

(

first

== second)

6765  for

(

auto

& e : align.

Exons

()) {

6766  auto

overlap = (e.

Limits

()&acds);

6767  for

(

TSignedSeqPos

k = overlap.GetFrom(); k <= overlap.GetTo(); ++k) {

6769  if

(p >= 0 && p%3 == 0) {

6770  _ASSERT

(p/3 < (

int

)acds_map.size());

6778  bool

paralogs =

false

;

6780  for

(

auto

it =

first

; it != second; ++it) {

6781  int

tgr = it-tinfo.

begin

()+1;

6786  for

(

unsigned int i

= 0;

i

< align.

Exons

().

size

() && atgr != tgr; ++

i

) {

6793  for

(

const TIVec

& tcds_map : it->m_cds_maps) {

6794  int

long_enough = 10;

6796  if

(mci >= long_enough) {

6803  auto

num = prev_accessions.

size

();

6804  for

(

auto

&

id

: it->m_ids)

6805

prev_accessions.

insert

(

id

.second);

6806  if

(num+it->m_ids.size() != prev_accessions.

size

())

6824

cerr <<

"Chimeric alignment "

<< align.

ID

() <<

" "

<< (paralogs ?

"paralogs"

:

"unrelated"

) <<

" "

<< flag << endl;

6825

SkipReason(orig_aligns[align.

ID

()],

"Chimera"

);

6833  virtual bool

align_predicate(

CAlignModel

& align);

6834  virtual string GetComment

() {

return "Overlaps the same alignment"

;}

6842

vector<CAlignModel*> alignment_ptrs;

6845

alignment_ptrs.push_back(&*

a

);

6848  if

(alignment_ptrs.empty())

6853

vector<CAlignModel*>::iterator

first

= alignment_ptrs.begin();

6855

vector<CAlignModel*> ::iterator current =

first

; ++current;

6856  for

(; current != alignment_ptrs.end(); ++current) {

6857

pair<string,int> current_accver =

GetAccVer

(**current, scope);

6858  if

(first_accver.first == current_accver.first) {

6859  if

((*current)->Strand() == (*first)->Strand() && (*current)->Limits().IntersectingWith((*first)->Limits())) {

6864

first_accver = current_accver;

6876  return new

gnomon::OverlapsSameAccessionAlignment(alignments);

6886  const CGeneModel

* prev_alignp = &dummy_align;

6888  bool

prev_is_compatible =

false

;

6898

prev_alignp = &algnj;

6900  if

((same_as_prev && prev_is_compatible) || (!same_as_prev && algn.

Strand

()==algnj.

Strand

() && algn.

isCompatible

(algnj))) {

6901

prev_is_compatible =

true

;

6906

prev_is_compatible =

false

;

6915

: alignments(_alignments)

6923  return

!paralog.empty();

6925  virtual string GetComment

() {

return "Connects two "

+paralog+

" alignments"

; }

6930  return new

gnomon::ConnectsParalogs(alignments);

6936  if

(

m_data

->orig_aligns.find(itcl->ID()) ==

m_data

->orig_aligns.end()) {

6945  double ms

=

m_data

->GoodCDNAScore(algn);

6951

ost <<

"Short alignment "

<< algn.

AlignLen

();

6953

ost <<

"Low score "

<< algn.

Score

();

6961 #define PROT_CLIP 120 6962 #define PROT_CLIP_FRAC 0.20 6988  if

(align.

PStop

()) {

6999  if

(ostop.

GetLength

() == 3 && protein_seqvec[ostop.

GetFrom

()/3] ==

'U'

) {

7005  if

(conf !=

m_confirmed_bases_len

.

begin

() && (--conf)->first <= stp->GetFrom() && conf->first+conf->second > stp->GetTo())

7018  m_data

->unmodified_aligns[itcl->ID()] = *itcl;

7027  if

(protein_seqvec[0] ==

'M'

)

7028

fivepclip = maxclip-tlim.

GetFrom

();

7029  int

threepclip = maxclip-(

orig

->TargetLen()-tlim.

GetTo

()-1);

7034  int

threepshift = 0;

7036  for

(TInDels::iterator indl = align.

FrameShifts

().begin(); !skip && indl != align.

FrameShifts

().end(); ++indl) {

7048  int

tpb = lim.

GetTo

()-1;

7051  if

(tpb < fivepclip) {

7052  if

(indl->IsInsertion())

7053

fivepshift += indl->Len();

7054  else if

(indl->IsDeletion())

7055

fivepshift -= indl->Len();

7056

}

else if

(tpa >= tlen-threepclip) {

7057  if

(indl->IsInsertion())

7058

threepshift += indl->Len();

7059  else if

(indl->IsDeletion())

7060

threepshift -= indl->Len();

7068  if

(fivepshift >= 0)

7071

fivepshift = 3-(-fivepshift)%3;

7072  if

(threepshift >= 0)

7075

threepshift = 3-(-threepshift)%3;

7083

edited_tlim.

SetTo

(edited_tlim.

GetTo

()-threepshift);

7091  string

protseq = editedm.

GetProtein

(contig);

7092

tlen = 3*(

int

)protseq.size();

7093  int

fivep_problem = -1;

7094  int

first_stop = tlen;

7096  for

(

int

p = 0; !skip && p < (

int

)protseq.size(); ++p) {

7097  if

(protseq[p] ==

'*'

) {

7100  if

(tpb < fivepclip)

7101

fivep_problem =

max

(fivep_problem, tpb);

7102  else if

(tpa >= tlen-threepclip || p == (

int

)protseq.size()-1)

7103

first_stop =

min

(first_stop, tpa);

7111  int

fivep_limit = 0;

7112  auto

m = protseq.find(

"M"

, (fivep_problem+1)/3);

7114  if

(m != string::npos && (

int

)m*3 <= fivepclip) {

7115

fivep_limit = 3*(

int

)m;

7121  int

threep_limit = tlen-1;

7123  if

(first_stop+2 < threep_limit) {

7124

threep_limit = first_stop+2;

7146

edited_cds.

SetStart

(start,

true

);

7147

edited_cds.

SetStop

(stop,

true

);

7154  _ASSERT

(tlen == 3*(

int

)protseq.size());

7156

m = protseq.find(

"*"

);

7157  _ASSERT

(m == protseq.size()-1);

7175  typedef

vector<SFShiftsCluster> TFShiftsClusterVec;

7178

TFShiftsClusterVec algn_fclusters;

7179

algn_fclusters.reserve(algn.

Exons

().size());

7183

TInDels::const_iterator fi = fs.begin();

7186  while

(fi != fs.end() && fi->IntersectingWith(e->

GetFrom

(),e->

GetTo

())) {

7187

algn_fclusters.back().m_fshifts.push_back(*fi++);

7192  ITERATE

(TFShiftsClusterVec, exon_cluster, algn_fclusters) {

7193

pair<TIt,TIt> eq_rng = fshift_clusters.equal_range(*exon_cluster);

7194  for

(TIt glob_cluster = eq_rng.first; glob_cluster != eq_rng.second; ++glob_cluster) {

7196  if

(find(exon_cluster->m_fshifts.begin(),exon_cluster->m_fshifts.end(),*fi) == exon_cluster->m_fshifts.end())

7197  if

(fi->IntersectingWith(exon_cluster->m_limits.GetFrom(),exon_cluster->m_limits.GetTo()))

7200  if

(find(glob_cluster->m_fshifts.begin(),glob_cluster->m_fshifts.end(),*fi) == glob_cluster->m_fshifts.end())

7201  if

(fi->IntersectingWith(glob_cluster->m_limits.GetFrom(),glob_cluster->m_limits.GetTo()))

7206

pair<TIt,TIt> eq_rng = fshift_clusters.equal_range(*exon_cluster);

7207  for

(TIt glob_cluster = eq_rng.first; glob_cluster != eq_rng.second;) {

7208

exon_cluster->m_limits += glob_cluster->m_limits;

7209

exon_cluster->m_fshifts.insert(exon_cluster->m_fshifts.end(),glob_cluster->m_fshifts.begin(),glob_cluster->m_fshifts.end());

7210

fshift_clusters.erase(glob_cluster++);

7212  uniq

(exon_cluster->m_fshifts);

7213

fshift_clusters.insert(eq_rng.second, *exon_cluster);

7235

clust_plus.push_back(algn);

7237

clust_minus.push_back(algn);

7243  if

(

a

.FrameShifts().empty())

7247  int

inframelength = 0;

7248  int

outframelength = 0;

7251  TInDels

indels =

a

.GetInDels(left, right,

true

);

7255

inframelength +=

len

;

7257

outframelength +=

len

;

7260  if

(fs->IsDeletion()) {

7261

frame = (frame+fs->Len())%3;

7263

frame = (3+frame-fs->Len()%3)%3;

7269

inframelength +=

len

;

7271

outframelength +=

len

;

7273  return

double(inframelength)/(inframelength + outframelength);

7278

: mininframefrac(_mininframefrac), seq(_seq), scope(_scope), mrnaCDS(_mrnaCDS) {}

7284  virtual void

transform_align(

CAlignModel

& align);

7294  if

(scope !=

NULL

) {

7300

mrna.SetWhole(*target_id);

7301  CFeat_CI

feat_ci(*scope, mrna, sel);

7304  const CSeq_id

* cds_loc_seq_id = cds_loc.GetId();

7306  TSeqRange

feat_range = cds_loc.GetTotalRange();

7313  if

(pos != mrnaCDS.end()) {

7314

cds_on_mrna = pos->second;

7318  if

(cds_on_mrna.

Empty

())

7330  if

(left < 0 || right < 0)

7333  CAlignMap

alignmap_clipped(

a

.GetAlignMap());

7339  if

(!

a

.Continuous())

7349  a

.FrameShifts().clear();

7353  int

length = (

int

)cds.size();

7355  if

(length%3 != 0 || length < 6)

7361  for

(

int i

= 0;

i

< length-3;

i

+= 3) {

7373

cdsinfo.

SetStop

(stop,

true

);

7377  b

.FrameShifts().clear();

7384  for

(TGeneModelList::iterator it = chains.begin(); it != chains.end();) {

7385

TGeneModelList::iterator itt = it++;

7386  for

(TGeneModelList::iterator jt = chains.begin(); jt != itt;) {

7387

TGeneModelList::iterator jtt = jt++;

7388  if

(itt->Strand() != jtt->Strand() || (itt->Score() !=

BadScore

() && jtt->Score() !=

BadScore

()))

continue

;

7392  if

(itt->isCompatible(*jtt) > 1) chains.erase(jtt);

7393

}

else if

(jtt->Score() !=

BadScore

()) {

7394  if

(itt->isCompatible(*jtt) > 1) {

7399

}

else if

(itt->AlignLen() > jtt->AlignLen()) {

7400  if

(itt->isCompatible(*jtt) > 0) chains.erase(jtt);

7402  if

(itt->isCompatible(*jtt) > 0) {

7421  _ASSERT

( new_from-old_from >= trim && new_from <= e.

GetTo

() );

7442

TrimProtein(align, alignmap);

7444

TrimTranscript(align, alignmap);

7454  for

(CAlignModel::TExons::const_iterator piece_begin = align.

Exons

().begin(); piece_begin != align.

Exons

().end(); ++piece_begin) {

7455  _ASSERT

( !piece_begin->m_fsplice );

7457

CAlignModel::TExons::const_iterator piece_end;

7458  for

(piece_end = piece_begin; piece_end != align.

Exons

().end() && piece_end->m_ssplice; ++piece_end) ;

7465  a

= piece_begin->GetFrom()+trim;

7471  b

= piece_end->GetTo()-trim;

7473  if

((

a

!= piece_begin->GetFrom() ||

b

!= piece_end->GetTo()) &&

b

>

a

) {

7476  if

(newlimits.

NotEmpty

() && piece_begin->GetTo() >= newlimits.

GetFrom

() && piece_end->GetFrom() <= newlimits.

GetTo

())

7480

piece_begin = piece_end;

7506  if

(align.

Exons

().front().m_ssplice_sig ==

"XX"

)

7508  if

(align.

Exons

().back().m_fsplice_sig ==

"XX"

)

7513  if

(cds_on_genome.

GetFrom

() <

a

) {

7516  if

(

b

< cds_on_genome.

GetTo

()) {

7524  if

(newlimits != align.

Limits

()) {

7532  return new

gnomon::TrimAlignment(

m_data

->trim);

7551  return new

gnomon::DoNotBelieveShortPolyATail(

m_data

->minpolya);

7557  m_data

->m_idnext = idnext;

7558  m_data

->m_idinc = idinc;

7563  m_data

->SetGenomicRange(alignments);

7574

range +=

i

->Limits();

7577  string

accession =

i

->TargetAccession();

7578  if

(!prot_complet.count(accession)) {

7581

prot_complet[accession] = make_pair(*protein_ci ==

'M'

,

true

);

7589

confirmed_ends.clear();

7590

all_frameshifts.clear();

7591

orig_aligns.clear();

7592

unmodified_aligns.clear();

7593

trusted_group_cds.clear();

7597

rnaseq_count.clear();

7598

oriented_introns_plus.clear();

7599

oriented_introns_minus.clear();

7604  return new

gnomon::ProjectCDS(

m_data

->mininframefrac,

m_gnomon

->GetSeq(),

7605  m_data

->mrnaCDS.find(

"use_objmgr"

)!=

m_data

->mrnaCDS.end() ? &scope :

NULL

,

7619  return new

gnomon::DoNotBelieveFrameShiftsWithoutCdsEvidence();

7623  if

(

a

.Limits() ==

b

.Limits()) {

7624  if

(

a

.Type() ==

b

.Type())

7625  return a

.ID() <

b

.ID();

7627  return a

.Type() >

b

.Type();

7629  else if

(

a

.Limits().GetFrom() ==

b

.Limits().GetFrom())

7630  return a

.Limits().GetTo() >

b

.Limits().GetTo();

7632  return a

.Limits().GetFrom() <

b

.Limits().GetFrom();

7637  m_data

->SetConfirmedStartStopForProteinAlignments(alignments);

7647

map<string, pair<bool,bool> >::iterator iter = prot_complet.find(algn.

TargetAccession

());

7648  _ASSERT

(iter != prot_complet.end());

7649  if

(iter == prot_complet.end())

7652  if

(cds.

HasStart

() && iter->second.first && alignedlim.

GetFrom

() == 0)

7667  m_data

->orig_aligns[

i

->ID()]=&(*i);

7671  if

(!

i

->TrustedmRNA().empty() &&

i

->Exons().size() > 1) {

7672  auto

tlim =

i

->TranscriptLimits();

7673  if

(

i

->Exons().front().Limits().NotEmpty() && tlim.GetFrom() == 0)

7675  if

(

i

->Exons().back().Limits().NotEmpty() && (tlim.GetTo() ==

i

->TargetLen()-1 || (

i

->Status()&

CGeneModel::ePolyA

)))

7680  TInDels

alignfshifts =

i

->GetInDels(

true

);

7686  if

(fs->IntersectingWith(e->

GetFrom

(),e->

GetTo

())) {

7687

efshifts.push_back(*fs);

7688  len

+= (fs->IsInsertion() ? fs->Len() : -fs->Len());

7691  if

(efshifts.empty())

7694  int a

= efshifts.front().Loc()-1;

7695  int b

= efshifts.back().InDelEnd();

7699  if

(

len

%3 != 0 || !confirmed_region) {

7701  int l

= fs->Len()%3;

7702  if

(fs->IsInsertion()) {

7717

models.push_back(aa);

7721  for

(

auto

it_loop = models.begin(); it_loop != models.end(); ) {

7722  auto

it = it_loop++;

7723  if

(!

m_data

->orig_aligns.count(it->ID())) {

7733

arg_desc->

AddKey

(

"param"

,

"param"

,

7734  "Organism specific parameters"

,

7741  "If aligned sequence is partial and includes a small portion of an exon the alignment program " 7742  "usually misses this exon and might erroneously place a few bases from this exon near the previous exon, " 7743  "and this will mess up the chaining. To prevent this we trim small portions of the alignment before chaining. " 7744  "If it is possible, the trimming will be reversed for the 5'/3' ends of the final chain. Must be < minex and " 7748

arg_desc->

SetCurrentGroup

(

"Additional information about sequences"

);

7750  "CDSes annotated on mRNAs. If CDS could be projected on genome with intact " 7751  "Start/Stop and frame the Stop will be accepted as is. The Start could/will " 7752  "be moved further to make the longest possible complete CDS within the chain"

,

7754

arg_desc->

AddDefaultKey

(

"mininframefrac"

,

"mininframefrac"

,

7755  "Some mRNA alignments have paired indels which throw a portion of CDS out of frame." 7756  "This parameter regulates how much of the CDS could suffer from this before CDS is considered inaceptable"

,

7759  "Information about protein 5' and 3' completeness"

,

7764  "Minimal coding propensity score for valid CDS. This threshold could be ignored depending on " 7765  "-longenoughcds or -protcdslen and -minprotfrac"

,

7767

arg_desc->

AddDefaultKey

(

"longenoughcds"

,

"longenoughcds"

,

7768  "Minimal CDS not supported by protein or annotated mRNA to ignore the score (bp)"

,

7771  "Minimal CDS supported by protein or annotated mRNA to ignore the score (bp)"

,

7774  "Minimal fraction of protein aligned to ignore " 7775  "the score and consider for confirmed start"

,

7778  "Some proteins aligned with better than -minprotfrac coverage are missing Start/Stop. " 7779  "If such an alignment was extended by EST(s) which provided a Start/Stop and we are not missing " 7780  "more than (1-endprotfrac)*proteinlength on either side this chain will be considered to have a confirmed Start/Stop"

,

7783  "Minimal overlap length for chaining alignments which don't have introns in the ovrlapping regions"

,

7786  "Minimal number of mRNA/EST for valid noncoding models"

,

7788

arg_desc->

AddDefaultKey

(

"minsupport_mrna"

,

"minsupport_mrna"

,

7789  "Minimal number of mRNA for valid noncoding models"

,

7791

arg_desc->

AddDefaultKey

(

"minsupport_rnaseq"

,

"minsupport_rnaseq"

,

7792  "Minimal number of RNA-Seq for valid noncoding models"

,

7795  "Chains with thorter CDS should be supported by trusted protein"

,

7797

arg_desc->

AddDefaultKey

(

"altfrac"

,

"altfrac"

,

"The CDS length of the principal model in the gene is multiplied by this fraction. Alt variants with the CDS length above " 7799

arg_desc->

AddDefaultKey

(

"longreadsthreshold"

,

"longreadsthreshold"

,

"If long reads support that many introns in alignment cluster " 7802

arg_desc->

AddFlag

(

"opposite"

,

"Allow overlap of complete multiexon genes with opposite strands"

);

7803

arg_desc->

AddFlag

(

"partialalts"

,

"Allows partial alternative variants. In combination with -nognomon will allow partial genes"

);

7805

arg_desc->

AddFlag

(

"no5pextension"

,

"Don't extend chain CDS to the leftmost start"

);

7807

arg_desc->

SetCurrentGroup

(

"Heuristic parameters for score evaluation"

);

7809  "5p intron penalty"

,

7812  "3p intron penalty"

,

7815  "Bonus for CDS length"

,

7818  "Penalty for total length"

,

7820

arg_desc->

AddDefaultKey

(

"utrclipthreshold"

,

"utrclipthreshold"

,

7821  "Relative coverage for clipping low support UTRs"

,

7826

arg_desc->

AddDefaultKey

(

"min-cap-weight"

,

"MinCapWeight"

,

7827  "Minimal accepted weight for a capped alignment"

,

7830  "Minimal cap blob weight for accepted peak"

,

7833

arg_desc->

AddDefaultKey

(

"min-polya-weight"

,

"MinPolyaWeight"

,

7834  "Minimal accepted weight for polya alignment"

,

7836

arg_desc->

AddDefaultKey

(

"min-polya-blob"

,

"MinPolyaBlob"

,

7837  "Minimal polya blob weight for accepted peak"

,

7841  "Maximal distance between individual cap/polya positions in a blob"

,

7843

arg_desc->

AddDefaultKey

(

"secondary-peak"

,

"SecondaryPeak"

,

7844  "Minimal weight fraction for a secondary cap/polya peak"

,

7846

arg_desc->

AddDefaultKey

(

"tertiary-peak"

,

"TertiaryPeak"

,

7847  "Last 5' exon is extended to low weight polya peak if there is sufficient rnaseq coverage"

,

7849

arg_desc->

AddDefaultKey

(

"tertiary-peak-coverage"

,

"TertiaryPeakCoverage"

,

7850  "Minimal relative rnaseq coverage for tertiary peak"

,

7853

arg_desc->

AddDefaultKey

(

"min-flank-exon"

,

"MinFlankExon"

,

7854  "The minimal distance of cap/polya to a splice"

,

7859  "Minimal accepted polyA tale length in transcript alignments"

,

7861

arg_desc->

AddFlag

(

"use_confirmed_ends"

,

"Use end exons of trusted transcripts for clippig/extension"

);

7881  m_data

->minpolya = minpolya;

7889  m_data

->mininframefrac = mininframefrac;

7893  return m_data

->prot_complet;

7902  CNcbiIfstream

param_file(args[

"param"

].AsString().c_str());

7906

chainer->

SetTrim

(args[

"trim"

].AsInteger());

7909

minscor.

m_min

= args[

"minscor"

].AsDouble();

7913

minscor.

m_cds_bonus

= args[

"cdsbonus"

].AsDouble();

7919

minscor.

m_cds_len

= args[

"longenoughcds"

].AsInteger();

7921

minscor.

m_minsupport

= args[

"minsupport"

].AsInteger();

7924

minscor.

m_minlen

= args[

"minlen"

].AsInteger();

7928

chainer->

m_data

->altfrac = args[

"altfrac"

].AsDouble();

7929

chainer->

m_data

->longreadsthreshold = args[

"longreadsthreshold"

].AsDouble();

7930

chainer->

m_data

->composite = args[

"composite"

].AsInteger();

7931

chainer->

m_data

->allow_opposite_strand = args[

"opposite"

];

7932

chainer->

m_data

->allow_partialalts = args[

"partialalts"

];

7933

chainer->

m_data

->tolerance = args[

"tolerance"

].AsInteger();

7934

chainer->

m_data

->no5pextension = args[

"no5pextension"

];

7936

chainer->

m_data

->min_cap_weight = args[

"min-cap-weight"

].AsInteger();

7937

chainer->

m_data

->min_cap_blob = args[

"min-cap-blob"

].AsInteger();

7938

chainer->

m_data

->min_polya_weight = args[

"min-polya-weight"

].AsInteger();

7939

chainer->

m_data

->min_polya_blob = args[

"min-polya-blob"

].AsInteger();

7940

chainer->

m_data

->max_dist = args[

"max-dist"

].AsInteger();

7941

chainer->

m_data

->secondary_peak = args[

"secondary-peak"

].AsDouble();

7942

chainer->

m_data

->tertiary_peak = args[

"tertiary-peak"

].AsDouble();

7943

chainer->

m_data

->tertiary_peak_coverage = args[

"tertiary-peak-coverage"

].AsDouble();

7944

chainer->

m_data

->min_flank_exon = args[

"min-flank-exon"

].AsInteger();

7945

chainer->

SetMinPolyA

(args[

"minpolya"

].AsInteger());

7946

chainer->

m_data

->use_confirmed_ends = args[

"use_confirmed_ends"

];

7951

map<string,TSignedSeqRange>& mrnaCDS = chainer->

SetMrnaCDS

();

7952  if

(args[

"mrnaCDS"

]) {

7953  if

(args[

"mrnaCDS"

].AsString()==

"use_objmgr"

) {

7956  CNcbiIfstream

cdsfile(args[

"mrnaCDS"

].AsString().c_str());

7959  string

accession,

tmp

;

7961  while

(cdsfile >> accession >>

a

>>

b

) {

7963

getline(cdsfile,

tmp

);

7970

map<string, pair<bool,bool> >& prot_complet = chainer->

SetProtComplet

();

7971  if

(args[

"pinfo"

]) {

7978  while

(protfile >> seqid_str >> fivep >> threep) {

7980

prot_complet[seqid_str] = make_pair(fivep, threep);

7997

TInDels::const_iterator ic = upper_bound(editing_indels_frombtoa.begin(), editing_indels_frombtoa.end(), exonb.

GetFrom

(),

OverlappingIndel

);

7999  if

((ic == editing_indels_frombtoa.end() || ic->Loc() > exonb.

GetTo

()+1) && exona_indels.empty())

8000  return

combined_indels;

8002  typedef

list<char> TCharList;

8010  for

( ;

pb

<= exonb.

GetTo

(); ++

pb

) {

8011  if

(ic != editing_indels_frombtoa.end() && ic->Loc() <=

pb

) {

8012  if

(ic->IsInsertion()) {

8015  pb

= ic->InDelEnd()-1;

8017  string

s = ic->GetInDelV();

8019

s = s.substr(ic->Len()-extra_left);

8020  edit

.insert(

edit

.end(),s.begin(),s.end());

8021  edit

.push_back(

'M'

);

8025  edit

.push_back(

'M'

);

8030  string

s = ic->GetInDelV().substr(0,extra_right);

8031  edit

.insert(

edit

.end(),s.begin(),s.end());

8037  if

(!exona_indels.empty()) {

8038

TInDels::const_iterator jleft = exona_indels.begin();

8047  if

(jleft != exona_indels.end() && jleft->Loc() == pa) {

8048  if

(jleft->IsInsertion()) {

8050

skipsome = jleft->Len();

8052  for

(TCharList::iterator ipp =

ip

; skipsome > 0 && ipp !=

edit

.begin() && *(--ipp) !=

'-'

&& *ipp !=

'M'

; ) {

8054

ipp =

edit

.erase(ipp);

8058  int

insertsome = jleft->Len();

8059  for

(reverse_iterator<TCharList::iterator> ir(

ip

); insertsome > 0 && ir !=

edit

.rend() && *ir ==

'-'

; ++ir) {

8064  edit

.insert(

ip

,insertsome,

'N'

);

8073  else if

(*

ip

!=

'-'

)

8077  if

(jleft != exona_indels.end()) {

8078  _ASSERT

(jleft->IsDeletion() && jleft->Loc() == pa+1);

8079  int

insertsome = jleft->Len();

8080  for

(TCharList::reverse_iterator ir =

edit

.rbegin(); insertsome > 0 && ir !=

edit

.rend() && *ir ==

'-'

; ++ir) {

8085  edit

.insert(

edit

.end(),insertsome,

'N'

);

8092  for

(TCharList::iterator

ip

=

edit

.begin();

ip

!=

edit

.end(); ) {

8096

}

else if

(*

ip

==

'-'

) {

8098  for

( ;

ip

!=

edit

.end() && *

ip

==

'-'

; ++

ip

, ++

len

);

8106  for

( ;

ip

!=

edit

.end() && *

ip

!=

'M'

&& *

ip

!=

'-'

; ++

ip

)

8113  return

combined_indels;

8122  for

(

int

ie = 0; ie < (

int

)srcmodel.

Exons

().size(); ++ie) {

8130  int

ggapa =

i

->first;

8131  int

ggapb =

i

->first+(

int

)

i

->second->GetInDelV().length()-1;

8134

src =

i

->second->GetSource();

8139

}

else if

(ggapb == e.

GetTo

()) {

8140  string

s =

i

->second->GetInDelV();

8142

src =

i

->second->GetSource();

8147

}

else if

(ggapb >= e.

GetFrom

()) {

8154  if

((

int

)srcmodel.

Exons

().size() == 1){

8165  if

(exon.

Empty

()) {

8169  int

extra_right = e.

GetTo

()-exon.

GetTo

();

8177

exon_indels.push_back(*indl);

8184  int

loc = ir->first;

8185  char

c = ir->second;

8186

TInDels::const_iterator ic = upper_bound(efs.begin(), efs.end(), loc,

OverlappingIndel

);

8187  if

(ic != efs.end() && ic->IsInsertion() && ic->Loc() <= loc && ic->InDelEnd() > loc)

8189  else if

(ic != efs.end() && ic->IsDeletion() && ic->Loc() == loc)

8191  else if

(erepl.empty() || erepl.back().InDelEnd() != loc)

8194

loc = erepl.back().Loc();

8195  string

s = erepl.back().GetInDelV()+

string

(1,c);

8199

efs.insert(efs.end(), erepl.begin(), erepl.end());

8200  sort

(efs.begin(), efs.end());

8201  for

(

auto

& indl : efs) {

8203

editedframeshifts.push_back(indl);

8211  if

(ie < (

int

)srcmodel.

Exons

().size()-1 && (!e.

m_ssplice

|| !srcmodel.

Exons

()[ie+1].m_fsplice))

8272  if

(

i

->IsMismatch()) {

8279  if

(ic !=

m_editing_indels

.end() &&

i

->GetType() == ic->GetType() &&

i

->Loc() >= ic->Loc() &&

i

->InDelEnd() <= ic->InDelEnd()) {

8282

}

else if

(included && (ic ==

m_editing_indels

.end() || ic->Loc() >

i

->InDelEnd())) {

8304  for

(

auto

& indel : aindels)

8308  for

(

auto

& e : aexons) {

8318

vector<TSignedSeqRange> transcript_exons;

8322  for

(

int i

= 0;

i

< (

int

)aexons.size(); ++

i

) {

8326

list<CInDelInfo> exon_indels;

8328  if

(indl->IntersectingWith(e.

GetFrom

(), e.

GetTo

()))

8329

exon_indels.push_back(*indl);

8333  int

left_shrink = 0;

8334  int

right = e.

GetTo

();

8335  int

right_shrink = 0;

8336  int

left_extend = 0;

8337  int

right_extend = 0;

8346  if

(

i

== (

int

)aexons.size()-1)

8356  if

(ileft !=

m_editing_indels

.end() && ileft->IsDeletion() && ileft->Loc() == left) {

8357  if

(!exon_indels.empty() && exon_indels.front().IsDeletion() && exon_indels.front().Loc() == left) {

8359

left_extend =

min

(ileft->Len(),exon_indels.front().Len());

8367

ll = left_codon.

GetTo

();

8371

left = ileft->Loc()+ileft->Len();

8374

left_shrink = left-e.

GetFrom

();

8381

lim.

SetFrom

(ileft->InDelEnd());

8391  while

(!exon_indels.empty() && exon_indels.front().InDelEnd() <= left)

8392

exon_indels.pop_front();

8397

TInDels::const_iterator first_outside = ileft;

8398  for

( ; first_outside !=

m_editing_indels

.end() && first_outside->Loc() <= (first_outside->IsInsertion() ? right : right+1); ++first_outside);

8399

reverse_iterator<TInDels::const_iterator> iright(first_outside);

8402  if

(iright !=

m_editing_indels

.rend() && iright->IsDeletion() && iright->Loc() == right+1) {

8403  if

(!exon_indels.empty() && exon_indels.back().IsDeletion() && exon_indels.back().Loc() == right+1) {

8405

right_extend =

min

(iright->Len(),exon_indels.back().Len());

8417

right = iright->Loc()-1;

8420

right_shrink = e.

GetTo

()-right;

8427

lim.

SetTo

(iright->Loc()-1);

8436

right = lim.

GetTo

();

8437  while

(!exon_indels.empty() && exon_indels.back().Loc() > right)

8438

exon_indels.pop_back();

8445

transcript_exons.push_back(texon);

8450

corrected_exon.

SetFrom

(corrected_exon.

GetFrom

()-left_extend);

8451

corrected_exon.

SetTo

(corrected_exon.

GetTo

()+right_extend);

8453  if

(

i

< (

int

)aexons.size()-1 && (!aexons[

i

].m_ssplice || !aexons[

i

+1].m_fsplice))

8458

editedindels.insert(editedindels.end(), efs.begin(), efs.end());

8461  string

gap_seq = e.

m_seq

;

8467  if

(ig->GetSource().m_range.NotEmpty()) {

8468  if

(

i

> 0 && ig->Loc() < aexons[

i

-1].GetTo())

8470  if

(

i

== 0 && ig->Loc() > aexons[

i

+1].GetFrom())

8472  if

(ig->GetInDelV() == gap_seq) {

8483  if

(left_end >= 0) {

8484

left_end -= gap->Len();

8485  for

(TInDels::const_iterator ig = gap+1; ig !=

m_editing_indels

.end() && ig->Loc() == gap->Loc(); ++ig)

8486

left_end -= ig->Len();

8491  for

(TInDels::const_iterator ig = gap; ig !=

m_editing_indels

.begin() && (ig-1)->

Loc

() == gap->Loc(); --ig) {

8492

left_end += (ig-1)->

Len

();

8508  double

score = acds.

Score

();

8522  if

(

a

.Limits().NotEmpty()) {

8523  a

.SetTargetId(*ia->GetTargetId());

8526

alignments.erase(ia);

8535  _ASSERT

(!ia->Exons().empty());

8561  int

length (sv.size());

8563

sv.GetSeqData(0, length, seq_txt);

8565  TIVec

exons(length,0);

8577  for

(

int

p =

a

+1; p <=

b

; ++p) {

8584  TIVec

model_ranges(length,0);

8587  for

(

int

p =

max

(0,

i

->Limits().GetFrom()-2); p <=

min

(length-1,

i

->Limits().GetTo()+2); ++p)

8588

model_ranges[p] = 1;

8592  if

(indl->IsMismatch()) {

8593  string

s = indl->GetInDelV();

8594  for

(

int l

= 0;

l

< indl->Len(); ++

l

)

8605  for

(

int

ie = 0; ie < (

int

)

i

->Exons().size(); ++ie) {

8610  _ASSERT

(

i

->Exons()[ie-1].Limits().NotEmpty());

8611  for

(pos =

i

->Exons()[ie-1].GetTo()+1; pos < length && exons[pos] > 0; ++pos);

8613  _ASSERT

((

int

)

i

->Exons().size() > 1 &&

i

->Exons()[1].Limits().NotEmpty());

8615  for

(pos =

i

->Exons()[1].GetFrom(); pos > 0 && exons[pos] > 0; --pos);

8617  string

seq = e.

m_seq

;

8619  if

(

i

->Strand() ==

eMinus

) {

8630

TInDels::iterator

next

= indl;

8632  if

(indl->GetSource().m_range.Empty() &&

next

->GetSource().m_range.Empty()) {

8641  for

(

int i

= 0;

i

< length; ++

i

) {

8642  if

(model_ranges[

i

])

8650

++current_gap->second;

8675  int

GC_RANGE = 200000;

8678

length =

limits

.GetLength();

8680

seq.reserve(length);

8682

seq.push_back(sv[

i

]);

8697  for

(

CFeat_CI

it(bh, sel); it; ++it) {

8698  TSeqRange

range = it->GetLocation().GetTotalRange();

8699  for

(

unsigned int i

= range.

GetFrom

();

i

<= range.

GetTo

(); ++

i

) {

8717

seq[ir->first-

limits

.GetFrom()] = ir->second;

8722 #define BLOCK_OF_Ns 35 8724  if

(cor.GetSource().m_range.Empty() &&

Include

(

limits

, cor.Loc())) {

8725

cor.SetLoc(cor.Loc()-

limits

.GetFrom());

8727

}

else if

(cor.Loc() >=

limits

.GetFrom() && cor.Loc() <=

limits

.GetTo()+1) {

8728  int l

= cor.Loc()-

limits

.GetFrom();

8729  CInDelInfo g

(

l

, cor.Len(), cor.GetType(), cor.GetInDelV(), cor.GetSource());

8742  swap

(seq, editedseq);

8756

TInDels::const_iterator nexti =

next

(ig);

8757  if

(nexti !=

m_editing_indels

.end() && nexti->GetSource().m_range.NotEmpty() && nexti->Loc() == ig->Loc())

8760  if

(ig->GetSource().m_range.NotEmpty()) {

8762  if

(left_end >= 0) {

8763

left_end -= ig->Len();

8764  for

(TInDels::const_iterator igg = ig+1; igg !=

m_editing_indels

.end() && igg->Loc() == ig->Loc(); ++igg)

8765

left_end -= igg->Len();

8770  for

(TInDels::const_iterator

i

= ig;

i

!=

m_editing_indels

.begin() && (

i

-1)->

Loc

() == ig->Loc(); --

i

) {

8771

left_end += (

i

-1)->

Len

();

8779  if

(ig->IsInsertion()) {

8780  string

s(seq.begin()+ig->Loc(), seq.begin()+ig->Len());

8793  for

(

int

p = lim.

GetFrom

(); p <= lim.

GetTo

(); ++p)

8794

confirmed_bases.

insert

(p);

8801

++cbase_len->second;

8811  TIntMap

notbridgeable_gaps_len;

8815

notbridgeable_gaps_len[pos] = ig->second;

8859

: hthresh(_hthresh), hmaxlen(_hmaxlen), gnomon(_gnomon) {}

8864  int

total_hole_len = 0;

8865  for

(

unsigned int i

= 1;

i

< m.

Exons

().

size

(); ++

i

) {

8866  if

(!m.

Exons

()[

i

-1].m_ssplice || !m.

Exons

()[

i

].m_fsplice)

8867

total_hole_len += m.

Exons

()[

i

].GetFrom()-m.

Exons

()[

i

-1].GetTo()-1;

8872  for

(

unsigned int i

= 1;

i

< m.

Exons

().

size

(); ++

i

) {

8873  bool

hole = !m.

Exons

()[

i

-1].m_ssplice || !m.

Exons

()[

i

].m_fsplice;

8874  int

intron = m.

Exons

()[

i

].GetFrom()-m.

Exons

()[

i

-1].GetTo()-1;

8894  for

(

unsigned int i

= 1;

i

< m.

Exons

().

size

(); ++

i

) {

8895  bool

hole = !m.

Exons

()[

i

-1].m_ssplice || !m.

Exons

()[

i

].m_fsplice;

8896  int

intron = m.

Exons

()[

i

].GetFrom()-m.

Exons

()[

i

-1].GetTo()-1;

8909  for

(

unsigned int i

= 1;

i

< m.

Exons

().

size

(); ++

i

) {

8910  bool

hole = !m.

Exons

()[

i

-1].m_ssplice || !m.

Exons

()[

i

].m_fsplice;

8911  int

intron = m.

Exons

()[

i

].GetFrom()-m.

Exons

()[

i

-1].GetTo()-1;

8924  int

exonlen = alignmap.

FShiftedLen

(shrinkedexon,

false

);

8930  if

(

a

.Exons().empty())

8936  a

.CutExons(

a

.Limits());

8942  if

((

a

.Exons().size() > 1 && !

a

.Exons().front().m_ssplice) || (

a

.Type() &

CAlignModel::eProt

)==0 || !

a

.LeftComplete()) {

8943  for

(

unsigned int i

= 0;

i

<

a

.Exons().size()-1; ++

i

) {

8947

left =

a

.Exons()[

i

+1].GetFrom();

8957  if

((

a

.Exons().size() > 1 && !

a

.Exons().back().m_fsplice) || (

a

.Type() &

CAlignModel::eProt

)==0 || !

a

.RightComplete()) {

8958  for

(

auto i

=

a

.Exons().size()-1;

i

> 0; --

i

) {

8962

right =

a

.Exons()[

i

-1].GetTo();

8974  if

(newlimits !=

a

.Limits()) {

8976  a

.CutExons(

a

.Limits());

8982  a

.CutExons(

a

.Limits());

8987  for

(

size_t i

= 1;

i

<

a

.Exons().

size

()-1; ++

i

) {

8994

e = &

a

.Exons()[0];

9001  if

(remainingpoint <

a

.Exons()[

i

-1].GetTo())

9002

left = remainingpoint+1;

9005

e = &

a

.Exons()[

i

];

9010  if

(

i

==

a

.Exons().size()-1) {

9018  if

(remainingpoint >

a

.Exons()[

i

+1].GetFrom())

9019

right = remainingpoint-1;

9022

e = &

a

.Exons()[

i

];

9030  return

m.

Exons

().empty();

9044

: minsupport(_minsupport)

#define SPECIAL_ALIGN_LEN

list< CChain > TChainList

static int s_ExonLen(const CGeneModel &a)

static bool DescendingModelOrder(const CChain &a, const CChain &b)

bool GoodSupportForIntrons(const CGeneModel &chain, const SMinScor &minscor, map< TSignedSeqRange, int > &mrna_count, map< TSignedSeqRange, int > &est_count, map< TSignedSeqRange, int > &rnaseq_count)

void MarkUnwantedLowSupportIntrons(TContained &pointers, const SMinScor &minscor, map< TSignedSeqRange, int > &mrna_count, map< TSignedSeqRange, int > &est_count, map< TSignedSeqRange, int > &rnaseq_count)

TInDels CombineCorrectionsAndIndels(const TSignedSeqRange exona, int extra_left, int extra_right, const TSignedSeqRange exonb, const TInDels &editing_indels_frombtoa, const TInDels &exona_indels)

vector< SChainMember * > TContained

static bool DescendingModelOrderPConsistentCoverage(const TChainPtr &a, const TChainPtr &b)

bool MemberIsMarkedForDeletion(const SChainMember *mp)

map< Int8, CGeneModel > TUnmodAligns

#define NON_CDNA_INTRON_PENALTY

TInDels StrictlyContainedInDels(const TInDels &indels, const TSignedSeqRange &lim)

TIdLim AlignIdLimits(SChainMember *mp)

bool OverlappingIndel(int pos, const CInDelInfo &indl)

pair< string, int > GetAccVer(const CAlignModel &a, CScope &scope)

string FindMultiplyIncluded(CAlignModel &algn, TAlignModelList &clust)

list< CChain * > TChainPointerList

double InframeFraction(const CGeneModel &a, TSignedSeqPos left, TSignedSeqPos right)

tuple< Int8, TSignedSeqRange > TIdLim

set< TSignedSeqRange, RangeOrder > TRangePrecedeSet

int EffectiveExonLength(const CModelExon &e, const CAlignMap &alignmap, bool snap_to_codons)

map< Int8, CAlignModel * > TOrigAligns

static bool DescendingModelOrderP(const TChainPtr &a, const TChainPtr &b)

bool MemberIsCoding(const SChainMember *mp)

vector< pair< SChainMember *, CGene * > > TMemeberGeneVec

bool LeftAndLongFirst(const CGeneModel &a, const CGeneModel &b)

string GetLinkedIdsForMember(const SChainMember &mi)

TSignedSeqRange ExtendedMaxCdsLimits(const CGeneModel &a, const CCDSInfo &cds)

bool BelongToExon(const CGeneModel::TExons &exons, int pos)

vector< SLinker > TLinkers

set< SChainMember * > TMemberPtrSet

void remove_if(Container &c, Predicate *__pred)

TSignedSeqRange ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const

void MoveOrigin(TSignedSeqPos shift)

TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras=true) const

TSignedSeqPos MapOrigToEdited(TSignedSeqPos orig_pos) const

void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const

TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const

TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons=false) const

int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const

TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const

string TargetAccession() const

virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)

CConstRef< objects::CSeq_id > GetTargetId() const

virtual CAlignMap GetAlignMap() const

CCDSInfo MapFromEditedToOrig(const CAlignMap &amap) const

bool PStop(bool includeall=true) const

void SetStart(TSignedSeqRange r, bool confirmed=false)

CCDSInfo MapFromOrigToEdited(const CAlignMap &amap) const

TSignedSeqRange MaxCdsLimits() const

void Set5PrimeCdsLimit(TSignedSeqPos p)

bool IsMappedToGenome() const

void SetScore(double score, bool open=false)

TSignedSeqRange Start() const

bool ConfirmedStart() const

void Clip(TSignedSeqRange limits)

void AddPStop(SPStop stp)

TSignedSeqRange Cds() const

void CombineWith(const CCDSInfo &another_cds_info)

TSignedSeqRange ReadingFrame() const

TSignedSeqRange ProtReadingFrame() const

void SetStop(TSignedSeqRange r, bool confirmed=false)

const TPStops & PStops() const

bool ConfirmedStop() const

void Clear5PrimeCdsLimit()

void SetReadingFrame(TSignedSeqRange r, bool protein=false)

TSignedSeqRange Stop() const

void SpliceFromOther(CChainMembers &other)

CChainMembers & operator=(const CChainMembers &object)=delete

list< CAlignMap > m_align_maps

void InsertMemberCopyWithCds(const CCDSInfo &cds, SChainMember *copy_ofp)

list< CCDSInfo > m_extra_cds

void InsertMemberCopyAndStoreCds(const CCDSInfo &cds, SChainMember *copy_ofp)

list< TContained > m_copylist

list< TContained > m_containedlist

list< SChainMember > m_members

CChainMembers(const CChainMembers &object)=delete

void InsertMember(CGeneModel &algn, SChainMember *copy_ofp=0)

void DuplicateUTR(SChainMember *copy_ofp)

void InsertMemberCopyWithoutCds(SChainMember *copy_ofp)

void SetBestPlacement(TOrigAligns &orig_aligns)

void AddAllMembersAndCoverage(SChainMember &mbr)

void ClipChain(TSignedSeqRange limits, bool recalulate_support=true)

void SetOpenForPartialyAlignedProteins(map< string, pair< bool, bool > > &prot_complet)

TSignedSeqRange m_supported_range

void RemoveFshiftsFromUTRs()

void RestoreTrimmedEnds(int trim)

void CalculateDropLimits()

void ClipLowCoverageUTR(double utr_clip_threshold, bool recalulate_support=true)

bool HarborsNested(const CChain &other_chain, bool check_in_holes) const

int m_polya_cap_right_soft_limit

tuple< TIDMap, TSignedSeqRange > PeaksAndLimits(EStatus determinant, int min_blob_weight, int max_empty_dist, int min_splice_dist)

void CalculateSupportAndWeightFromMembers(bool keep_all_evidence=false)

int m_coverage_drop_right

map< int, double > TIDMap

vector< double > m_coverage

void SetConfirmedStartStopForCompleteProteins(map< string, pair< bool, bool > > &prot_complet, const SMinScor &minscor)

void SetConsistentCoverage()

void ClipToCap(int min_cap_blob, int max_dist, int min_flank_exon, double secondary_peak, bool recalulate_support=true)

void CollectTrustedmRNAsProts(TOrigAligns &orig_aligns, const SMinScor &minscor, CScope &scope, SMatrix &matrix, const CResidueVec &contig)

void ClipToPolyA(const CResidueVec &contig, int min_polya_blob, int max_dist, int min_flank_exon, double secondary_peak, double tertiary_peak, double tertiary_peak_coverage, bool recalulate_support=true)

pair< bool, bool > ValidPolyA(int pos, const CResidueVec &contig)

bool SetConfirmedEnds(const CGnomonEngine &gnomon, CGnomonAnnotator_Base::TIntMap &confirmed_ends)

CGeneModel m_gapped_helper_align

bool RestoreReasonableConfirmedStart(const CGnomonEngine &gnomon, TOrigAligns &orig_aligns)

int m_polya_cap_left_soft_limit

bool HasTrustedEvidence() const

tuple< TIVec, TSignedSeqRange > MainPeaks(TIDMap &peak_weights, double secondary_peak, double tertiary_peak, double tertiary_peak_coverage, bool right_end)

void CheckSecondaryCapPolyAEnds()

int m_coverage_bump_right

static void ArgsToChainer(CChainer *chainer, const CArgs &args, objects::CScope &scope)

static void SetupArgDescriptions(CArgDescriptions *arg_desc)

map< TSignedSeqRange, int > rnaseq_count

void LRIinit(SChainMember &mi, const TContained &micontained)

void CreateChainsForPartialProteins(TChainList &chains, TContained &pointers, TGeneModelList &unma_aligns, CChainMembers &unma_members)

const CAlignMap & m_edited_contig_map

void ReplicatePStops(CChainMembers &pointers)

bool CanIncludeJinI(const SChainMember &mi, const SChainMember &mj)

map< string, pair< bool, bool > > prot_complet

ECompat CheckCompatibility(const CGene &gene, const CChain &algn)

void IncludeInContained(SChainMember &big, SChainMember &small)

void DuplicateNotOriented(CChainMembers &pointers, TGeneModelList &clust)

void FindContainedAlignments(TContained &pointers)

void SetGenomicRange(const TAlignModelList &alignments)

set< TSignedSeqPos > TSplices

void CreateFlexibleAligns(TGeneModelList &clust)

const string & m_contig_acc

void SplitAlignmentsByStrand(const TGeneModelList &clust, TGeneModelList &clust_plus, TGeneModelList &clust_minus)

void CDNACounts(TGeneModelList &clust)

map< TSignedSeqRange, int > mrna_count

void FilterOutSimilarsWithLowerScore(TChainPointerList &not_placed_yet, TChainPointerList &rejected)

void FindOverlappingWithTrusted(CChainMembers &pointers)

void CombineCompatibleChains(TChainList &chains)

bool AddIfCompatible(set< SFShiftsCluster > &fshift_clusters, const CGeneModel &algn)

void RemovePoorCds(CGeneModel &algn, double minscor)

map< int, TSignedSeqRange > trusted_group_cds

bool allow_opposite_strand

void RightLeft(TContained &pointers)

map< string, TSignedSeqRange > mrnaCDS

unique_ptr< CGnomonEngine > & m_gnomon

set< pair< Int8, string > > TIDs

map< int, TGRInfoVec > TGRInfoByStrand

void FilterOutBadScoreChainsHavingBetterCompatibles(TGeneModelList &chains)

void LeftRight(TContained &pointers)

list< CGene > FindGenes(TChainList &cls)

SChainMember * FindOptimalChainForProtein(TContained &pointers_all, vector< CGeneModel * > &parts, CGeneModel &palign)

void ReplacePseudoGeneSeeds(list< CGene > &alts, TChainPointerList &not_placed_yet)

void CalculateSpliceWeights(CChainMembers &pointers)

const TSignedSeqRange & m_limits

vector< STGRInfo > TGRInfoVec

double tertiary_peak_coverage

double longreadsthreshold

CChainerImpl(CRef< CHMMParameters > &hmm_params, unique_ptr< CGnomonEngine > &gnomon, const CAlignMap &edited_contig_map, const TSignedSeqRange &limits, const string &m_contig_acc)

set< TSignedSeqRange > oriented_introns_plus

void SetConfirmedStartStopForProteinAlignments(TAlignModelList &alignments)

void CutParts(TGeneModelList &clust)

TGeneModelList MakeChains(TGeneModelList &models)

void FilterOutChimeras(TGeneModelList &clust)

void DuplicateUTRs(CChainMembers &pointers)

void SetFlagsForChains(TChainList &chains)

void FilterOutTandemOverlap(TChainPointerList &not_placed_yet, TChainPointerList &rejected, double fraction)

void PlaceAllYouCan(list< CGene > &alts, TChainPointerList &not_placed_yet, TChainPointerList &rejected)

void Duplicate5pendsAndShortCDSes(CChainMembers &pointers)

void FindAltsForGeneSeeds(list< CGene > &alts, TChainPointerList &not_placed_yet)

TUnmodAligns unmodified_aligns

double GoodCDNAScore(const CGeneModel &algn, bool simple=false)

map< TSignedSeqRange, int > est_count

void FindGeneSeeds(list< CGene > &alts, TChainPointerList &not_placed_yet)

void ScoreCdnas(CChainMembers &pointers)

bool FsTouch(const TSignedSeqRange &lim, const CInDelInfo &fs)

void SkipReason(CGeneModel *orig_align, const string &comment)

void TrimAlignmentsIncludedInDifferentGenes(list< CGene > &genes)

CRef< CHMMParameters > & m_hmm_params

set< TSignedSeqRange > oriented_introns_minus

bool LRCanChainItoJ(int &delta_cds, double &delta_num, double &delta_splice_num, SChainMember &mi, SChainMember &mj, TContained &contained, bool &not_sorted)

void CutParts(TGeneModelList &models)

map< string, pair< bool, bool > > & SetProtComplet()

TransformFunction * TrimAlignment()

void SetMinInframeFrac(double mininframefrac)

void SetNumbering(int idnext, int idinc)

void SetIntersectLimit(int value)

Predicate * OverlapsSameAccessionAlignment(TAlignModelList &alignments)

void ScoreCDSes_FilterOutPoorAlignments(TGeneModelList &clust)

void SetConfirmedStartStopForProteinAlignments(TAlignModelList &alignments)

void FindSelenoproteinsClipProteinsToStartStop(TGeneModelList &clust)

TGeneModelList MakeChains(TGeneModelList &models)

void SetMinPolyA(int minpolya)

TransformFunction * ProjectCDS(objects::CScope &scope)

void DropAlignmentInfo(TAlignModelList &alignments, TGeneModelList &models)

TransformFunction * DoNotBelieveShortPolyATail()

void FilterOutChimeras(TGeneModelList &clust)

map< string, TSignedSeqRange > & SetMrnaCDS()

void SetGenomicRange(const TAlignModelList &alignments)

Predicate * ConnectsParalogs(TAlignModelList &alignments)

TransformFunction * DoNotBelieveFrameShiftsWithoutCdsEvidence()

unique_ptr< CChainerImpl > m_data

TSignedSeqRange SubjectRange() const

void AddExon(TSignedSeqRange exon, const string &fs="", const string &ss="", double ident=0, const string &seq="", const CInDelInfo::SSource &src=CInDelInfo::SSource())

TSignedSeqPos FShiftedMove(TSignedSeqPos pos, int len) const

void ExtendRight(int amount)

const list< CRef< CSeq_id > > & TrustedProt() const

bool GoodEnoughToBeAnnotation() const

void SetTrustedCds(const TSignedSeqRange &r)

string GetProtein(const CResidueVec &contig_sequence) const

void AddNormalExon(TSignedSeqRange exon, const string &fs, const string &ss, double ident, bool infront)

bool Open5primeEnd() const

virtual void CutExons(TSignedSeqRange hole)

int MutualExtension(const CGeneModel &a) const

bool IntersectingWith(const CGeneModel &a) const

TSignedSeqRange TranscriptLimits() const

void Extend(const CGeneModel &a, bool ensure_cds_invariant=true)

EStrand Orientation() const

void InsertTrustedProt(CRef< CSeq_id > g)

int FShiftedLen(TSignedSeqRange ab, bool withextras=true) const

TSignedSeqRange TrustedCds() const

bool IdenticalAlign(const CGeneModel &a) const

const CSupportInfoSet & Support() const

bool OpenRightEnd() const

const TExons & Exons() const

bool LeftComplete() const

TSignedSeqRange ReadingFrame() const

bool RightComplete() const

virtual CAlignMap GetAlignMap() const

TSignedSeqRange RealCdsLimits() const

TSignedSeqRange TranscriptExon(int i) const

void ReverseComplementModel()

void SetStrand(EStrand s)

void ReplaceSupport(const CSupportInfoSet &support_set)

virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)

void SetCdsInfo(const CCDSInfo &cds_info)

bool ConfirmedStop() const

void InsertTrustedmRNA(CRef< CSeq_id > g)

void AddGgapExon(double ident, const string &seq, const CInDelInfo::SSource &src, bool infront)

bool AddSupport(const CSupportInfo &support)

TSignedSeqRange Limits() const

const list< CRef< CSeq_id > > & TrustedmRNA() const

void AddComment(const string &comment)

bool IsSubAlignOf(const CGeneModel &a) const

int HasCompatibleOverlap(const CGeneModel &a, int min_overlap=2) const

const CCDSInfo & GetCdsInfo() const

vector< CModelExon > TExons

TSignedSeqRange MaxCdsLimits() const

bool ConfirmedStart() const

void ExtendLeft(int amount)

const vector< CCDSInfo > * GetEdgeReadingFrames() const

void SetTrustedGroup(int tgr)

TInDels GetInDels(bool fs_only) const

bool PStop(bool includeall=true) const

int isCompatible(const CGeneModel &a) const

bool HarborsNested(const CChain &other_chain, bool check_in_holes) const

bool HarborsRange(TSignedSeqRange range, bool check_in_holes) const

set< CGene * > m_nested_in_genes

set< CGene * > m_harbors_genes

bool IsAllowedAlternative(const ncbi::gnomon::CGeneModel &, int maxcomposite) const

TSignedSeqRange RealCdsLimits() const

list< CGeneModel >::const_iterator TConstIt

void RemoveFromHarbored(CGene *p)

set< CGene * > RemoveGeneFromOtherGenesSets()

TSignedSeqRange m_real_cds_limits

list< CGeneModel >::iterator TIt

void AddToHarbored(CGene *p)

bool IsAlternative(const CChain &a) const

void RemoveFromNestedIn(CGene *p)

void AddToNestedIn(CGene *p)

TSignedSeqRange Limits() const

bool LargeCdsOverlap(const CGeneModel &a) const

CAlignModel MapOneModelToEditedContig(const CGeneModel &align) const

CGeneModel MapOneModelToOrigContig(const CGeneModel &srcmodel) const

void SetHMMParameters(CHMMParameters *params)

unique_ptr< CGnomonEngine > m_gnomon

virtual ~CGnomonAnnotator_Base()

map< int, char > m_replaced_bases

CGnomonEngine & GetGnomon()

void MapModelsToOrigContig(TGeneModelList &models) const

void SetGenomic(const CResidueVec &seq)

TGgapInfo m_inserted_seqs

TInDels m_reversed_corrections

void MapModelsToEditedContig(TGeneModelList &models) const

map< int, char > m_replacements

unique_ptr< SPhyloCSFSlice > m_pcsf_slice

const CPhyloCSFData * m_pcsf_data

void MapAlignmentsToEditedContig(TAlignModelList &alignments) const

TIntMap m_confirmed_bases_len

TIntMap m_confirmed_bases_orig_len

CAlignMap m_edited_contig_map

CRef< CHMMParameters > m_hmm_params

TIntMap m_notbridgeable_gaps_len

double GetChanceOfIntronLongerThan(int l) const

const CResidueVec & GetSeq() const

int GetMaxIntronLen() const

void GetScore(CGeneModel &model, bool extend5p=false, bool obeystart=false, bool extend_max_cds=false) const

int GetMinIntronLen() const

HMM model parameters just create it and pass to a Gnomon engine.

static CRef< CSeq_id > ToSeq_id(const string &str)

static string ToString(const CSeq_id &id)

CConstRef< CSeq_id > ToCanonical(const CSeq_id &id) const

TSignedSeqPos Loc() const

static bool HaveCommonExonOrIntron(const CGeneModel &a, const CGeneModel &b)

static bool RangeNestedInIntron(TSignedSeqRange r, const CGeneModel &algn, bool check_in_holes=true)

static size_t CountCommonSplices(const CGeneModel &a, const CGeneModel &b)

static bool AreSimilar(const CGeneModel &a, const CGeneModel &b, int tolerance)

CInDelInfo::SSource m_source

TSignedSeqPos GetFrom() const

const TSignedSeqRange & Limits() const

TSignedSeqPos GetTo() const

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

SPhyloCSFSlice * CreateSliceForContig(const string &contig_acc) const

container_type::const_iterator const_iterator

container_type::iterator iterator

const_iterator begin() const

const_iterator end() const

const_iterator lower_bound(const key_type &key) const

iterator_bool insert(const value_type &val)

const_iterator upper_bound(const key_type &key) const

container_type::value_type value_type

const_iterator find(const key_type &key) const

iterator_bool insert(const value_type &val)

const_iterator begin() const

parent_type::iterator iterator

const_iterator upper_bound(const key_type &key) const

const_iterator find(const key_type &key) const

const_iterator end() const

const_iterator lower_bound(const key_type &key) const

static const char si[8][64]

static vector< string > arr

bool Empty(const CNcbiOstrstream &src)

struct parameters_t * pb[]

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)

static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

static const TDS_WORD limits[]

CCigar LclAlign(const char *query, int querylen, const char *subject, int subjectlen, int gopen, int gapextend, const char delta[256][256])

int MaxCommonInterval(const T &a, const T &b, int long_enough=numeric_limits< int >::max())

vector< TResidue > CResidueVec

bool Precede(TSignedSeqRange l, TSignedSeqRange r)

set< CSupportInfo > CSupportInfoSet

list< CAlignModel > TAlignModelList

bool Include(TSignedSeqRange big, TSignedSeqRange small)

void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)

list< CGeneModel > TGeneModelList

bool IsStopCodon(const Res *seq, int strand=ePlus)

EStrand OtherStrand(EStrand s)

vector< CInDelInfo > TInDels

list< Model > GetAlignParts(const Model &algn, bool settrimflags)

bool IsStartCodon(const Res *seq, int strand=ePlus)

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define ERASE_ITERATE(Type, Var, Cont)

Non-constant version with ability to erase current element, if container permits.

int TSignedSeqPos

Type for signed sequence position.

#define VECTOR_ERASE(Var, Cont)

Use this macro inside body of ERASE_ITERATE cycle to erase from vector-like container.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)

void AddFlag(const string &name, const string &comment, CBoolEnum< EFlagValue > set_value=eFlagHasValueIfSet, TFlags flags=0)

Add description for flag argument.

void AddKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)

Add description for mandatory key.

void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)

Add description for optional key without default value.

void SetCurrentGroup(const string &group)

Set current arguments group name.

void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)

Add description for optional key with default value.

@ eInputFile

Name of file (must exist and be readable)

@ eDouble

Convertible into a floating point number (double)

@ eInteger

Convertible into an integer number (int or Int8)

#define LOG_POST(message)

This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Optimized implementation of CSerialObject::Assign, which is not so efficient.

CConstRef< CSeq_id > GetSeqId(void) const

string AsString(void) const

const CTextseq_id * GetTextseq_Id(void) const

Return embedded CTextseq_id, if any.

const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)

If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...

TSeqPos GetLength(const CSeq_id &id, CScope *scope)

Get sequence length if scope not null, else return max possible TSeqPos.

bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)

Determines if two CSeq_ids represent the same CBioseq.

@ eGetId_ForceAcc

return only an accession based seq-id

static CRef< CObjectManager > GetInstance(void)

Return the existing object manager or create one.

CBioseq_Handle GetBioseqHandle(const CSeq_id &id)

Get bioseq handle by seq-id.

void AddDefaults(TPriority pri=kPriority_Default)

Add default data loaders from object manager.

@ eCoding_Iupac

Set coding to printable coding (Iupacna or Iupacaa)

SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)

Include feature subtype in the search.

SAnnotSelector & SetResolveAll(void)

SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).

bool IsSetPartial(void) const

SAnnotSelector & SetAdaptiveDepth(bool value=true)

SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...

const CSeq_feat & GetMappedFeature(void) const

Feature mapped to the master sequence.

SAnnotSelector & AddNamedAnnots(const CAnnotName &name)

Add named annot to set of annots names to look for.

SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)

Set feature subtype (also set annotation and feat type)

const_iterator begin(void) const

const_iterator end(void) const

#define numeric_limits

Pre-declaration of the "numeric_limits<>" template Forcibly overrides (using preprocessor) the origin...

int64_t Int8

8-byte (64-bit) signed integer

position_type GetLength(void) const

bool NotEmpty(void) const

bool IntersectingWith(const TThisType &r) const

static TThisType GetEmpty(void)

static position_type GetWholeFrom(void)

CRange< TSignedSeqPos > TSignedSeqRange

static TThisType GetWhole(void)

static position_type GetWholeTo(void)

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_SCOPE(ns)

Define a new scope.

IO_PREFIX::ifstream CNcbiIfstream

Portable alias for ifstream.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)

Convert numeric value to string.

static string & ToUpper(string &str)

Convert string to upper case – string& version.

static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-sensitive compare of a substring with another string.

@ fSplit_MergeDelimiters

Merge adjacent delimiters.

double Restart(void)

Return time elapsed since first Start() or last Restart() call (in seconds).

double Elapsed(void) const

Return time elapsed since first Start() or last Restart() call (in seconds).

void SetFrom(TFrom value)

Assign a value to From data member.

TTo GetTo(void) const

Get the To member data.

TFrom GetFrom(void) const

Get the From member data.

void SetTo(TTo value)

Assign a value to To data member.

const TLocation & GetLocation(void) const

Get the Location member data.

bool IsSetAccession(void) const

Check if a value has been assigned to Accession data member.

TVersion GetVersion(void) const

Get the Version member data.

bool IsSetVersion(void) const

Check if a value has been assigned to Version data member.

const TAccession & GetAccession(void) const

Get the Accession member data.

unsigned int

A callback function used to compare two keys in a database.

where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig

void AddComment(CSeq_feat &feat, const string &comment)

constexpr auto sort(_Init &&init)

constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head

constexpr bool empty(list< Ts... >) noexcept

double value_type

The numeric datatype used by the parser.

const struct ncbi::grid::netcache::search::fields::SIZE size

Magic spell ;-) needed for some weird compilers... very empiric.

const GenericPointer< typename T::ValueType > T2 value

const CharType(& source)[N]

Defines the CNcbiApplication and CAppException classes for creating NCBI applications.

Defines command line argument related classes.

Defines unified interface to application:

Int4 delta(size_t dimension_, const Int4 *score_)

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

static SLJIT_INLINE sljit_ins ms(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

static SLJIT_INLINE sljit_ins lb(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

bool operator()(const TMemeberGeneVec::value_type &a, const TMemeberGeneVec::value_type &b)

bool operator()(const vector< CGeneModel * > *ap, const vector< CGeneModel * > *bp)

TOrigAligns & orig_aligns

AlignLenOrder(TOrigAligns &oa)

bool operator()(const CGeneModel *ap, const CGeneModel *bp)

TSignedSeqRange m_cds_limits

virtual bool model_predicate(CGeneModel &align)

bool operator()(const SChainMember *ap, const SChainMember *bp)

TAlignModelList & alignments

virtual bool align_predicate(CAlignModel &align)

virtual string GetComment()

ConnectsParalogs(TAlignModelList &_alignments)

CutShortPartialExons(int minex)

virtual void transform_align(CAlignModel &align)

virtual void transform_align(CAlignModel &align)

virtual void transform_align(CAlignModel &align)

DoNotBelieveShortPolyATail(int _minpolya)

bool operator()(const CGeneModel &a, const CGeneModel &b)

GModelOrder(TOrigAligns &oa)

TOrigAligns & orig_aligns

bool operator()(const SChainMember *ap, const SChainMember *bp)

virtual bool model_predicate(CGeneModel &align)

HasLongIntron(CGnomonEngine &gnomon)

virtual bool model_predicate(CGeneModel &align)

virtual bool model_predicate(CGeneModel &align)

HasShortIntron(CGnomonEngine &gnomon)

bool operator()(const SChainMember *ap, const SChainMember *bp)

bool operator()(const SChainMember *ap, const SChainMember *bp)

virtual bool model_predicate(CGeneModel &align)

LowSupport_Noncoding(int _minsupport)

MarkupCappedEst(const set< string > &_caps, int _capgap)

virtual void transform_align(CAlignModel &align)

const set< string > & caps

MarkupTrustedGenes(const set< string > &_trusted_genes)

virtual void transform_align(CAlignModel &align)

const set< string > & trusted_genes

virtual bool align_predicate(CAlignModel &align)

OverlapsSameAccessionAlignment(TAlignModelList &alignments)

virtual string GetComment()

virtual void transform_align(CAlignModel &align)

const map< string, TSignedSeqRange > & mrnaCDS

ProjectCDS(double _mininframefrac, const CResidueVec &_seq, CScope *_scope, const map< string, TSignedSeqRange > &_mrnaCDS)

ProteinWithBigHole(double hthresh, double hmaxlen, CGnomonEngine &gnomon)

virtual bool model_predicate(CGeneModel &align)

bool operator()(const TSignedSeqRange &a, const TSignedSeqRange &b) const

bool operator()(const SChainMember *ap, const SChainMember *bp)

bool operator()(const SChainMember *ap, const SChainMember *bp)

void MarkPostponedForChain()

TContained CollectContainedForMemeber()

const CCDSInfo * m_cds_info

bool m_marked_for_deletion

void MarkIncludedForChain()

SChainMember * m_left_member

bool m_restricted_to_start

CGeneModel * m_unmd_align

bool m_marked_for_retention

int m_fully_connected_to_part

SChainMember * m_right_member

TContained CollectCodingContainedForChain()

double m_right_splice_num

bool m_excluded_readthrough

void AddToContained(TContained &contained, TMemberPtrSet &included_in_list)

TContained CollectCodingContainedForMemeber()

void AddCodingToContained(TContained &contained, TMemberPtrSet &included_in_list)

SChainMember * m_sink_for_contained

TContained CollectContainedForChain()

int m_right_trusted_group

void MarkUnwantedCopiesForChain(const TSignedSeqRange &cds)

double m_accumulated_splice_num

CAlignModel * m_orig_align

list< TSignedSeqRange > m_confirmed_intervals

map< int, char > m_replacements

TInDels m_correction_indels

SFShiftsCluster(TSignedSeqRange limits=TSignedSeqRange::GetEmpty())

bool operator<(const SFShiftsCluster &c) const

bool NewIsBetter(int new_count, int new_not_wanted_count, int new_matches_count, int new_mem_id) const

bool operator<(const SLinker &sl) const

TSignedSeqRange m_reading_frame

bool OtherIsBetter(const SLinker &a) const

double m_utr_clip_threshold

bool operator()(const SChainMember *ap, const SChainMember *bp)

virtual bool model_predicate(CGeneModel &align)

virtual bool model_predicate(CGeneModel &align)

virtual void transform_align(CAlignModel &align)

void TrimProtein(CAlignModel &align, CAlignMap &alignmap)

TSignedSeqPos TrimCodingExonRight(const CAlignModel &align, const CModelExon &e, int trim)

void TrimTranscript(CAlignModel &align, CAlignMap &alignmap)

TSignedSeqPos TrimCodingExonLeft(const CAlignModel &align, const CModelExon &e, int trim)

TrimAlignment(int a_trim)

bool operator()(const CAlignModel *a, const CAlignModel *b)

s_ByAccVerLen(CScope &scope_)

int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4