set<pair<Int8,string>>
TIDs;
236:m_hmm_params(hmm_params), m_gnomon(gnomon), m_edited_contig_map(edited_contig_map), m_limits(
limits), m_contig_acc(contig_acc), m_idnext(1), m_idinc(1)
242 return m_data->MakeChains(models);
256m_align(0), m_cds_info(0), m_align_map(0), m_left_member(0), m_right_member(0), m_sink_for_contained(0),
257m_copy(0), m_contained(0), m_identical_count(0),
258m_left_num(0), m_right_num(0), m_num(0),
259m_splice_weight(0), m_left_splice_num(0), m_right_splice_num(0), m_splice_num(0),
260m_type(
eCDS), m_left_cds(0), m_right_cds(0), m_cds(0), m_included(
false), m_postponed(
false), m_internal(
false),
261m_marked_for_deletion(
false), m_marked_for_retention(
false), m_restricted_to_start(
false),
262m_gapped_connection(
false), m_fully_connected_to_part(-1), m_not_for_chaining(
false),
264m_trusted_group(0), m_left_trusted_group(0), m_right_trusted_group(0), m_cds_from_trusted(
false), m_excluded_readthrough(
false) {}
269 voidMarkIncludedForChain();
270 voidMarkPostponed();
271 voidMarkPostponedForChain();
274 TContainedCollectCodingContainedForMemeber();
291 int m_type, m_left_cds, m_right_cds, m_cds;
317tuple<TIDMap, TSignedSeqRange> PeaksAndLimits(
EStatusdeterminant,
intmin_blob_weight,
intmax_empty_dist,
intmin_splice_dist);
318tuple<TIVec, TSignedSeqRange> MainPeaks(
TIDMap& peak_weights,
doublesecondary_peak,
doubletertiary_peak,
doubletertiary_peak_coverage,
boolright_end);
324 voidRestoreTrimmedEnds(
inttrim);
325 voidRemoveFshiftsFromUTRs();
327 voidSetOpenForPartialyAlignedProteins(map<
string, pair<bool,bool> >& prot_complet);
328pair<bool,bool> ValidPolyA(
intpos,
const CResidueVec& contig);
329 voidClipToCap(
intmin_cap_blob,
intmax_dist,
intmin_flank_exon,
doublesecondary_peak,
boolrecalulate_support =
true);
330 voidClipToPolyA(
const CResidueVec& contig,
intmin_polya_blob,
intmax_dist,
intmin_flank_exon,
doublesecondary_peak,
doubletertiary_peak,
doubletertiary_peak_coverage,
boolrecalulate_support =
true);
331 voidCheckSecondaryCapPolyAEnds();
332 voidClipLowCoverageUTR(
doubleutr_clip_threshold,
boolrecalulate_support =
true);
333 voidCalculateDropLimits();
334 voidCalculateSupportAndWeightFromMembers(
boolkeep_all_evidence =
false);
338 voidSetConfirmedStartStopForCompleteProteins(map<
string, pair<bool,bool> >& prot_complet,
const SMinScor& minscor);
341 voidSetConsistentCoverage();
343 boolHarborsNested(
const CChain& other_chain,
boolcheck_in_holes)
const;
344 boolHarborsNested(
const CGene& other_gene,
boolcheck_in_holes)
const;
346 boolHasTrustedEvidence()
const;
370 typedeflist<CGeneModel>::iterator
TIt;
371 typedeflist<CGeneModel>::const_iterator
TConstIt;
374 boolIsAlternative(
const CChain&
a)
const;
375 boolIsAllowedAlternative(
constncbi::gnomon::CGeneModel&,
intmaxcomposite)
const;
378 bool Nested()
const{
return!m_nested_in_genes.empty(); }
379 boolLargeCdsOverlap(
const CGeneModel&
a)
const;
380 boolHarborsNested(
const CChain& other_chain,
boolcheck_in_holes)
const;
381 boolHarborsNested(
const CGene& other_gene,
boolcheck_in_holes)
const;
385set<CGene*> RemoveGeneFromOtherGenesSets();
401(*i)->RemoveFromHarbored(
this);
403(*i)->RemoveFromNestedIn(
this);
405 returnm_harbors_genes;
413 if(RealCdsLimits().NotEmpty())
414gene_lim_for_nested =
front()->OpenCds() ?
front()->MaxCdsLimits() : RealCdsLimits();
415 if(!
Include(gene_lim_for_nested,range))
420 if(RealCdsLimits().NotEmpty() && (*it)->ReadingFrame().Empty())
423 if((*it)->ReadingFrame().NotEmpty())
424model_lim_for_nested = (*it)->OpenCds() ? (*it)->MaxCdsLimits() : (*it)->RealCdsLimits();
443 returnHarborsRange(other_lim_for_nested, check_in_holes);
455 returnHarborsRange(other_lim_for_nested, check_in_holes);
466common_cds += (ib->Limits()&
b.RealCdsLimits()&ia->Limits()&
a.RealCdsLimits()).
GetLength();
480m_real_cds_limits +=
a.RealCdsLimits();
481m_maxscore =
max(m_maxscore,
a.Score());
490 if(
a.Support().empty()) {
496 if(s->IsCore() && ++composite > maxcomposite)
return false;
499 if(
a.PStop(
false) || !
a.FrameShifts().empty())
506vector<TSignedSeqRange> gene_gapfill_exons;
509gene_gapfill_exons.push_back(e->
Limits());
511vector<TSignedSeqRange> a_gapfill_exons;
514a_gapfill_exons.push_back(e->
Limits());
516 if(gene_gapfill_exons != a_gapfill_exons)
519 boola_share_intron =
false;
522set<TSignedSeqRange> b_introns;
523 for(
int i= 1;
i< (
int)
b.Exons().size(); ++
i) {
524 if(
b.Exons()[
i-1].m_ssplice &&
b.Exons()[
i].m_fsplice) {
526b_introns.insert(intron);
530 boola_has_new_intron =
false;
531 for(
int i= 1;
i< (
int)
a.Exons().size(); ++
i) {
532 if(
a.Exons()[
i-1].m_ssplice &&
a.Exons()[
i].m_fsplice &&
a.Exons()[
i-1].m_ssplice_sig !=
"XX"&&
a.Exons()[
i].m_fsplice_sig !=
"XX") {
534 if(b_introns.insert(intron).second)
535a_has_new_intron =
true;
537a_share_intron =
true;
541 if(a_has_new_intron) {
543}
else if(!gene_gapfill_exons.empty()) {
545}
else if(
a.RealCdsLimits().NotEmpty() &&
b.RealCdsLimits().NotEmpty() && !
a.RealCdsLimits().IntersectingWith(
b.RealCdsLimits()) && (!
a.TrustedmRNA().empty() || !
a.TrustedProt().empty())) {
550}
else if(
a.RealCdsLen() <=
b.RealCdsLen()){
555 return(a_share_intron || gene_gapfill_exons.empty());
562 if(
a.Strand() !=
front()->Strand())
565 boolgene_has_trusted =
false;
567 if((*it)->HasTrustedEvidence()) {
568gene_has_trusted =
true;
573 boolhas_common_splice =
false;
577has_common_splice =
true;
582 if(has_common_splice && (!gene_has_trusted || !
a.HasTrustedEvidence()))
585 if(
a.ReadingFrame().NotEmpty() && RealCdsLimits().NotEmpty()) {
586 CAlignMapamap(
a.Exons(),
a.FrameShifts(),
a.Strand(),
a.GetCdsInfo().Cds());
588 for(
unsigned intj = 0; j <
a.Exons().
size(); ++j) {
589 for(
TSignedSeqPosk =
max(
a.Exons()[j].GetFrom(),
a.GetCdsInfo().Cds().GetFrom()); k <=
min(
a.Exons()[j].GetTo(),
a.GetCdsInfo().Cds().GetTo()); ++k) {
591 _ASSERT(p < (
int)acds_map.size());
598 boolhas_common_cds =
false;
601 if(!
a.GetCdsInfo().Cds().IntersectingWith((*it)->GetCdsInfo().Cds()))
604 CAlignMapgmap((*it)->Exons(), (*it)->FrameShifts(), (*it)->Strand(), (*it)->GetCdsInfo().Cds());
606 for(
unsigned intj = 0; j < (*it)->Exons().
size(); ++j) {
607 for(
TSignedSeqPosk =
max((*it)->Exons()[j].GetFrom(),(*it)->GetCdsInfo().Cds().GetFrom()); k <=
min((*it)->Exons()[j].GetTo(),(*it)->GetCdsInfo().Cds().GetTo()); ++k) {
609 _ASSERT(p < (
int)cds_map.size());
615 for(
unsigned int i= 0;
i< acds_map.size(); ) {
617 for( ; j < cds_map.size() && (acds_map[
i] != cds_map[j] ||
i%3 != j%3); ++j);
618 if(j == cds_map.size()) {
624 for( ; j < cds_map.size() &&
i< acds_map.size() && acds_map[
i] == cds_map[j]; ++j, ++
i, ++
count);
627has_common_cds =
true;
636 returnhas_common_cds;
639 returnhas_common_splice;
644 if(!
a.Support().empty() &&
b.Support().empty())
646 else if(
a.Support().empty() && !
b.Support().empty())
650 boolatrusted = !
a.TrustedmRNA().empty() || !
a.TrustedProt().empty();
651 boolbtrusted = !
b.TrustedmRNA().empty() || !
b.TrustedProt().empty();
652 if(atrusted && !btrusted) {
654}
else if(btrusted && !atrusted) {
656}
else if(
a.ReadingFrame().NotEmpty() &&
b.ReadingFrame().Empty()) {
658}
else if(
b.ReadingFrame().NotEmpty() &&
a.ReadingFrame().Empty()) {
660}
else if(
a.ReadingFrame().NotEmpty()) {
662 doubleds = 0.05*
fabs(
a.Score());
663 doubleas =
a.Score();
673ds = 0.05*
fabs(
b.Score());
674 doublebs =
b.Score();
688 else if(
a.m_splice_weight >
b.m_splice_weight)
690 else if(
a.m_splice_weight <
b.m_splice_weight)
692 else if(
a.Weight() >
b.Weight())
694 else if(
a.Weight() <
b.Weight())
696 else if(
a.Limits().GetLength() !=
b.Limits().GetLength())
697 return(
a.Limits().GetLength() <
b.Limits().GetLength());
699 return a.ID() <
b.ID();
701 doubleasize =
a.m_splice_weight;
702 doublebsize =
b.m_splice_weight;
703 doubleds = 0.025*(asize+bsize);
721 else if(bsize > asize)
723 else if(
a.Limits().GetLength() !=
b.Limits().GetLength())
724 return(
a.Limits().GetLength() <
b.Limits().GetLength());
726 return a.ID() <
b.ID();
745 boolgene_good_enough_to_be_annotation = allow_partialalts || gene.front()->GoodEnoughToBeAnnotation();
748 TSignedSeqRangegene_cds = (gene.size() > 1 || gene.front()->CompleteCds() || algn_good_enough_to_be_annotation) ? gene.
RealCdsLimits() : gene.front()->MaxCdsLimits();
751 if(!gene_good_enough_to_be_annotation && !algn_good_enough_to_be_annotation) {
753 for(
int i= 1;
i< (
int)
b.Exons().size(); ++
i) {
754 if(
b.Exons()[
i].m_ssplice_sig ==
"XX"&&
b.Exons()[
i].m_fsplice_sig ==
"XX"&&
b.Exons()[
i].Limits().IntersectingWith(gene_cds)) {
759 for(
int i= 1;
i< (
int)algn.
Exons().size(); ++
i) {
760 if(algn.
Exons()[
i].m_ssplice_sig ==
"XX"&& algn.
Exons()[
i].m_fsplice_sig ==
"XX"&& algn.
Exons()[
i].Limits().IntersectingWith(algn_cds)) {
774}
else if(algn.
ReadingFrame().
Empty() || gene.front()->ReadingFrame().Empty()) {
778 returneNotCompatible;
779}
else if(algn.
RealCdsLen() > altfrac/100*gene.front()->RealCdsLen() || algn.
Score() > altfrac/100*gene.front()->Score()) {
784 returneNotCompatible;
788set<TSignedSeqRange> gene_gapfill_introns;
789set<TSignedSeqRange> align_gapfill_introns;
792 for(
int i= 1;
i< (
int)
b.Exons().size(); ++
i) {
793 if(
b.Exons()[
i-1].m_ssplice_sig ==
"XX"||
b.Exons()[
i].m_fsplice_sig ==
"XX") {
795gene_gapfill_introns.insert(intron);
799 for(
int i= 1;
i< (
int)algn.
Exons().size(); ++
i) {
800 if(algn.
Exons()[
i-1].m_ssplice_sig ==
"XX"|| algn.
Exons()[
i].m_fsplice_sig ==
"XX") {
802align_gapfill_introns.insert(intron);
805 ITERATE(set<TSignedSeqRange>, ig, gene_gapfill_introns) {
806 ITERATE(set<TSignedSeqRange>, ia, align_gapfill_introns) {
807 if(ig->IntersectingWith(*ia))
808 returneNotCompatible;
812 if(algn.
HarborsNested(gene, gene_good_enough_to_be_annotation))
815 if(gene.
HarborsNested(algn, algn_good_enough_to_be_annotation))
818 if(!algn_cds.
Empty() && !gene_cds.
Empty()) {
826 returneNotCompatible;
830 if(gene_good_enough_to_be_annotation && algn_good_enough_to_be_annotation) {
831 if(gene.front()->Strand() != algn.
Strand() && allow_opposite_strand &&
842 returneNotCompatible;
849 for(TChainPointerList::iterator itloop = not_placed_yet.begin(); itloop != not_placed_yet.end(); ) {
850TChainPointerList::iterator it = itloop++;
858list<CGene*> possibly_nested;
860 boolgood_model =
true;
861 for(list<CGene>::iterator itl = alts.begin(); good_model && itl != alts.end(); ++itl) {
866possibly_nested.push_back(&(*itl));
876alts.push_back(
CGene());
880alts.back().Insert(algn);
881not_placed_yet.erase(it);
884 ITERATE(list<CGene*>, itl, possibly_nested) {
885(*itl)->AddToNestedIn(&alts.back());
886alts.back().AddToHarbored(*itl);
895 for(TChainPointerList::iterator itloop = not_placed_yet.begin(); itloop != not_placed_yet.end(); ) {
896TChainPointerList::iterator it = itloop++;
899list<list<CGene>::iterator> included_in;
900list<CGene*> possibly_nested;
901list<CGene*> nested_in;
903 boolgood_model =
true;
904 for(list<CGene>::iterator itl = alts.begin(); good_model && itl != alts.end(); ++itl) {
909nested_in.push_back(&(*itl));
912possibly_nested.push_back(&(*itl));
917included_in.push_back(itl);
920 caseeNotCompatibleNested:
921 if(itl->IsAlternative(algn))
922included_in.push_back(itl);
935 CGene& gene = *included_in.front();
936 CChain& model = *gene.front();
943 if(algn_cds_len < 0.8*model_cds_len)
949not_placed_yet.push_back(gene.front());
953 ITERATE(list<CGene*>, itl, nested_in) {
955(*itl)->AddToHarbored(&gene);
957 ITERATE(list<CGene*>, itl, possibly_nested) {
958(*itl)->AddToNestedIn(&gene);
962not_placed_yet.erase(it);
970 for(TChainPointerList::iterator itloop = not_placed_yet.begin(); itloop != not_placed_yet.end(); ) {
971TChainPointerList::iterator it = itloop++;
974list<list<CGene>::iterator> included_in;
975list<CGene*> possibly_nested;
977 boolgood_model =
true;
978 for(list<CGene>::iterator itl = alts.begin(); good_model && itl != alts.end(); ++itl) {
983possibly_nested.push_back(&(*itl));
987included_in.push_back(itl);
995 if(good_model && !included_in.empty() && (allow_partialalts || included_in.front()->front()->GoodEnoughToBeAnnotation())) {
996 if(included_in.size() == 1) {
1001 CGene& gene = *included_in.front();
1003not_placed_yet.erase(it);
1005 ITERATE(list<CGene*>, itl, possibly_nested) {
1007(*itl)->AddToNestedIn(&gene);
1013 boolallow_connection =
false;
1016 boolcds_overlap =
true;
1018cds_overlap =
false;
1022 ITERATE(list<list<CGene>::iterator>, k, included_in) {
1023 if(!(*k)->IsAlternative(
a)) {
1024cds_overlap =
false;
1032algn.
AddComment(
"Gene overlap override");
1034allow_connection =
true;
1038 if(allow_connection) {
1039 CGene& gene = *included_in.front();
1042 ITERATE(list<list<CGene>::iterator>, k, included_in) {
1043 if(k != included_in.begin()) {
1046 if(CheckCompatibility(*included_in.front(), **
l) == eAlternative) {
1048(*l)->AddComment(
"Pass2b");
1050included_in.front()->Insert(**
l);
1052not_placed_yet.push_back(*
l);
1055TChainPointerList::iterator idest = itloop;
1057not_placed_yet.insert(idest, *
l);
1060set<CGene*> nested_genes = (*k)->RemoveGeneFromOtherGenesSets();
1061 ITERATE(set<CGene*>,
i, nested_genes)
1062possibly_nested.push_back(*
i);
1066not_placed_yet.erase(it);
1068 ITERATE(list<CGene*>, itl, possibly_nested) {
1070(*itl)->AddToNestedIn(&gene);
1086list<CGene>::iterator included_in(alts.end());
1087list<CGene*> possibly_nested;
1088list<CGene*> nested_in;
1090 boolgood_model =
true;
1091 for(list<CGene>::iterator itl = alts.begin(); good_model && itl != alts.end(); ++itl) {
1092 ECompat cmp= CheckCompatibility(*itl, algn);
1095 caseeNotCompatibleNested:
1096 caseeNotCompatible:
1097rejected.push_back(&algn);
1099ost <<
"Trumped by another model "<< itl->front()->ID();
1101 if(
cmp== eNotCompatibleNested)
1103good_model =
false;
1106 if(!allow_partialalts && !itl->front()->GoodEnoughToBeAnnotation()) {
1107rejected.push_back(&algn);
1109ost <<
"Trumped by another model "<< itl->front()->ID();
1111good_model =
false;
1112}
else if(included_in == alts.end()) {
1115good_model =
false;
1116rejected.push_back(&algn);
1118ost <<
"Connects two genes "<< itl->front()->ID() <<
" "<< included_in->front()->ID();
1123nested_in.push_back(&(*itl));
1126possibly_nested.push_back(&(*itl));
1134 if(included_in != alts.end()) {
1138included_in->Insert(algn);
1139genep = &(*included_in);
1141alts.push_back(
CGene());
1142genep = &alts.back();
1146alts.back().Insert(algn);
1148 ITERATE(list<CGene*>, itl, nested_in) {
1149 if((*itl)->HarborsNested(*genep,
true)) {
1151(*itl)->AddToHarbored(genep);
1154 ITERATE(list<CGene*>, itl, possibly_nested) {
1156(*itl)->AddToNestedIn(genep);
1170TChainPointerList::iterator jt_loop = it;
1171 for(++jt_loop; jt_loop != not_placed_yet.end();) {
1172TChainPointerList::iterator jt = jt_loop++;
1176ost <<
"Trumped by similar chain "<< ai.
ID();
1178rejected.push_back(&aj);
1179not_placed_yet.erase(jt);
1187 for(TChainPointerList::iterator it_loop = not_placed_yet.begin(); it_loop != not_placed_yet.end();) {
1188TChainPointerList::iterator it = it_loop++;
1195vector<const CChain*> candidates;
1200candidates.push_back(&aj);
1204 for(
size_t i= 0; alive &&
i< candidates.size(); ++
i) {
1205 for(
size_tj =
i+1; alive && j < candidates.size(); ++j) {
1206 if(!candidates[
i]->Limits().IntersectingWith(candidates[j]->Limits())) {
1208ost <<
"Overlapping tandem "<< candidates[
i]->ID() - ai.
ID() <<
" "<< candidates[j]->ID() - ai.
ID();
1210rejected.push_back(*it);
1211not_placed_yet.erase(it);
1226it->SetGeneID(it->ID());
1227it->SetRankInGene(0);
1228not_placed_yet.push_back(&(*it));
1235FilterOutSimilarsWithLowerScore(not_placed_yet, bad_aligns);
1236FilterOutTandemOverlap(not_placed_yet, bad_aligns, 80);
1238FindGeneSeeds(alts, not_placed_yet);
1239ReplacePseudoGeneSeeds(alts, not_placed_yet);
1240FindAltsForGeneSeeds(alts, not_placed_yet);
1241PlaceAllYouCan(alts, not_placed_yet, bad_aligns);
1246(*l)->SetGeneID(k->front()->ID());
1247(*l)->SetRankInGene(++rank);
1255(*l)->AddComment(
"Not placed");
1278 if(alimits == blimits)
1281 return(alimits.
GetTo() > blimits.
GetTo());
1290 typedeftuple<Int8, TSignedSeqRange>
TIdLim;
1316gmembers.insert(&m);
1325 if(members_genes.empty())
1330 typedefmap<CGene*,list<SChainMember*> > TGeneToMembers;
1331 typedefmap<TIdLim, TGeneToMembers> TMembersInDiffGenes;
1332TMembersInDiffGenes members_in_different_genes;
1336 CGene* genep = members_genes.front().second;
1337members_in_different_genes[idlim][genep].push_back(mp);
1339 for(
int i= 1;
i< (
int)members_genes.size(); ++
i) {
1343 CGene* genep = members_genes[
i].second;
1344 if(idlim_prev != idlim) {
1345TMembersInDiffGenes::iterator it = members_in_different_genes.find(idlim_prev);
1346 if(it->second.size() < 2)
1347members_in_different_genes.erase(it);
1349members_in_different_genes[idlim][genep].push_back(mp);
1354TMembersInDiffGenes::iterator it = members_in_different_genes.find(idlim);
1355 if(it->second.size() < 2)
1356members_in_different_genes.erase(it);
1359 ITERATE(TMembersInDiffGenes, imdg, members_in_different_genes) {
1360 ITERATE(TGeneToMembers, ig1, imdg->second) {
1361 CGene& gene1 = *ig1->first;
1369 typedefmap<CChain*,TMemberPtrSet> TConflictMemebersInChains;
1370TConflictMemebersInChains conflict_members_in_chains;
1372 ITERATE(TMembersInDiffGenes, imdg, members_in_different_genes) {
1373 ITERATE(TGeneToMembers, ig1, imdg->second) {
1374 CGene& gene1 = *ig1->first;
1376 CChain* chain1p_orig = *ic1;
1378 for(list<SChainMember*>::const_iterator im = ig1->second.begin(); im != ig1->second.end() && mbr1p_orig == 0; ++im) {
1379 if(binary_search(chain1p_orig->
m_members.begin(),chain1p_orig->
m_members.end(),*im, std::less<SChainMember*>()))
1382 for(TGeneToMembers::const_iterator ig2 = imdg->second.begin(); mbr1p_orig != 0 && ig2 != ig1; ++ig2) {
1383 CGene& gene2 = *ig2->first;
1385 CChain* chain1p = chain1p_orig;
1389 for(list<SChainMember*>::const_iterator im = ig2->second.begin(); im != ig2->second.end() && mbr2p == 0; ++im) {
1390 if(binary_search(chain2p->
m_members.begin(),chain2p->
m_members.end(),*im, std::less<SChainMember*>()))
1397 if(chain1p->
Exons().size() > 1)
1400 if(chain2p->
Exons().size() > 1)
1405 swap(chain1p,chain2p);
1410 CChain& chain1 = *chain1p;
1411 CChain& chain2 = *chain2p;
1415conflict_members_in_chains[&chain2].insert(mbr2p);
1417conflict_members_in_chains[&chain1].insert(mbr1p);
1418}
else if(
Precede(core1,core2)) {
1419 if(
Precede(align_lim,core1))
1420conflict_members_in_chains[&chain2].insert(mbr2p);
1421 else if(
Precede(core2,align_lim))
1422conflict_members_in_chains[&chain1].insert(mbr1p);
1426conflict_members_in_chains[&chain1].insert(mbr1p);
1428conflict_members_in_chains[&chain2].insert(mbr2p);
1431conflict_members_in_chains[&chain1].insert(mbr1p);
1433conflict_members_in_chains[&chain2].insert(mbr2p);
1436conflict_members_in_chains[&chain2].insert(mbr2p);
1438conflict_members_in_chains[&chain1].insert(mbr1p);
1440conflict_members_in_chains[&chain1].insert(mbr1p);
1441conflict_members_in_chains[&chain2].insert(mbr2p);
1445conflict_members_in_chains[&chain1].insert(mbr1p);
1446conflict_members_in_chains[&chain2].insert(mbr2p);
1455 for(
CGene& gene : genes) {
1456 for(
CChain* chainp : gene)
1472 ITERATE(TConflictMemebersInChains, it, conflict_members_in_chains) {
1473 CChain& chain = *it->first;
1479hard_limits = (hard_limits & chain.
Limits());
1557}
else if(alim.
GetTo() > noclip_limits.
GetTo()) {
1566 intleft_splice = -1;
1567 intright_splice = -1;
1568 for(
inte = 1; e < (
int)chain.
Exons().size(); ++e) {
1569 if(left_splice < 0 && chain.
Exons()[e-1].m_ssplice &&
Include(new_limits,chain.
Exons()[e-1].GetTo()))
1570left_splice = chain.
Exons()[e-1].GetTo();
1571 if(chain.
Exons()[e].m_fsplice &&
Include(new_limits,chain.
Exons()[e].GetFrom()))
1572right_splice = chain.
Exons()[e].GetFrom();
1575 doubleleft_weights_total = 0.;
1577 doubleright_weights_total = 0.;
1583 for(
inte = 1; e < (
int)
a.Exons().size(); ++e) {
1584 if(
a.Exons()[e-1].m_ssplice &&
a.Exons()[e-1].GetTo() == left_splice) {
1585left_weights[alim.
GetFrom()] +=
a.Weight();
1586left_weights_total +=
a.Weight();
1588 if(
a.Exons()[e].m_fsplice &&
a.Exons()[e].GetFrom() == right_splice) {
1589right_weights[alim.
GetTo()] +=
a.Weight();
1590right_weights_total +=
a.Weight();
1594 if(left_weights_total > 0.) {
1597 for(map<int,double>::reverse_iterator it = left_weights.rbegin(); it != left_weights.rend(); ++it) {
1598 if(
t< 0.9*left_weights_total)
1602 if(left < new_limits.
GetFrom())
1605 if(right_weights_total > 0.) {
1609 if(
t< 0.9*right_weights_total)
1613 if(right > new_limits.
GetTo())
1614new_limits.
SetTo(right);
1629 if(new_limits != chain.
Limits()) {
1635note +=
" overlap UTR clip";
1639 boolwasopen = chain.
OpenCds();
1645 m_gnomon->GetScore(chain, !no5pextension);
1647 if(wasopen != chain.
OpenCds() && (wasopen ==
false|| cds.
HasStart())) {
1658 if(m_type !=
eCDS)
1661deque<const SChainMember*> not_visited(1,
this);
1662 while(!not_visited.empty()) {
1664not_visited.pop_back();
1669 if(c < mbr->m_identical_count) {
1670 if(included_in_list.insert(mi).second) {
1671contained.push_back(mi);
1673included_in_list.insert(mi->
m_copy->begin(),mi->
m_copy->end());
1675}
else if(included_in_list.find(mi) == included_in_list.end()) {
1676not_visited.push_back(mi);
1686AddCodingToContained(contained, included_in_list);
1696AddCodingToContained(contained, included_in_list);
1699left->AddCodingToContained(contained, included_in_list);
1703right->AddCodingToContained(contained, included_in_list);
1710deque<const SChainMember*> not_visited(1,
this);
1711 while(!not_visited.empty()) {
1713not_visited.pop_back();
1716 if(c < mbr->m_identical_count) {
1717 if(included_in_list.insert(mi).second) {
1718contained.push_back(mi);
1720included_in_list.insert(mi->
m_copy->begin(),mi->
m_copy->end());
1722}
else if(included_in_list.find(mi) == included_in_list.end()) {
1723not_visited.push_back(mi);
1733AddToContained(contained, included_in_list);
1743AddToContained(contained, included_in_list);
1746left->AddToContained(contained, included_in_list);
1750right->AddToContained(contained, included_in_list);
1756 #define START_BONUS 600 1774 TContainedcontained = CollectContainedForChain();
1783m_postponed =
true;
1798 TContainedcontained = CollectContainedForChain();
1807 TContainedcontained = CollectContainedForChain();
1854 return(alimits.
GetTo() < blimits.
GetTo());
1875 if(alimits == blimits)
1877 else if(alimits.
GetTo() == blimits.
GetTo())
1880 return(alimits.
GetTo() < blimits.
GetTo());
1903 return(alimits.
GetTo() < blimits.
GetTo());
1926 if(alimits == blimits)
1929 return(alimits.
GetTo() < blimits.
GetTo());
1965 sort(container.begin(),container.end());
1966container.erase( unique(container.begin(),container.end()), container.end() );
1976 voidInsertMemberCopyWithoutCds(
SChainMember* copy_ofp);
1991m_members.splice(m_members.end(),other.
m_members);
1992m_copylist.splice(m_copylist.end(),other.
m_copylist);
1993m_align_maps.splice(m_align_maps.end(),other.
m_align_maps);
1994m_containedlist.splice(m_containedlist.end(),other.
m_containedlist);
1995m_extra_cds.splice(m_extra_cds.end(),other.
m_extra_cds);
1996insert(end(),other.begin(),other.end());
2004InsertMember(mbr, copy_ofp);
2009m_extra_cds.push_back(cds);
2010InsertMemberCopyWithCds(m_extra_cds.back(), copy_ofp);
2018InsertMember(mbr, copy_ofp);
2034InsertMember(mbr, copy_ofp);
2040m_members.push_back(m);
2041push_back(&m_members.back());
2044m_members.back().m_contained = &m_containedlist.back();
2050m_members.back().m_align_map = &m_align_maps.back();
2052m_members.back().m_align_map = copy_ofp->
m_align_map;
2055 if(copy_ofp != 0) {
2056 if(copy_ofp->
m_copy== 0) {
2057m_copylist.push_back(
TContained(1,copy_ofp));
2058copy_ofp->
m_copy= &m_copylist.back();
2060m_members.back().m_copy = copy_ofp->
m_copy;
2061copy_ofp->
m_copy->push_back(&m_members.back());
2070InsertMember(new_mbr, copy_ofp);
2076m_extra_cds.push_back(
CCDSInfo());
2078InsertMember(*itcl);
2079m_members.back().m_orig_align = orig_aligns[itcl->ID()];
2080 if(unmodified_aligns.count(itcl->ID()))
2081m_members.back().m_unmd_align = &unmodified_aligns[itcl->ID()];
2103 boolsmall_flex =
false;
2113 if(big_limits == small_limits) {
2131 m_data->CutParts(models);
2137 if(!parts.empty()) {
2138models.splice(models.begin(),parts);
2146 size_tinitial_size = pointers.size();
2147 for(
size_t i= 0;
i< initial_size; ++
i) {
2154clust.push_back(new_algn);
2162 size_tinitial_size = pointers.size();
2163 for(
size_t i= 0;
i< initial_size; ++
i) {
2176map<int, set<int> > oriented_splices;
2177 ITERATE(set<TSignedSeqRange>,
i, oriented_introns_plus) {
2178oriented_splices[
ePlus].insert(
i->GetFrom());
2179oriented_splices[
ePlus].insert(
i->GetTo());
2181 ITERATE(set<TSignedSeqRange>,
i, oriented_introns_minus) {
2182oriented_splices[
eMinus].insert(
i->GetFrom());
2183oriented_splices[
eMinus].insert(
i->GetTo());
2204 typedefvector<pair<CCDSInfo::SPStop,TSignedSeqRange> > TPstopIntron;
2205TPstopIntron pstops_with_intron_plus;
2206TPstopIntron pstops_with_intron_minus;
2210TPstopIntron& pstops_with_intron = (algn.
Strand() ==
ePlus) ? pstops_with_intron_plus : pstops_with_intron_minus;
2213left =
min(left,s->GetFrom());
2214right =
max(right,s->GetTo());
2215 if(s->GetLength() == 3) {
2218 for(
int i= 1;
i< (
int)algn.
Exons().size(); ++
i) {
2220pstops_with_intron.push_back(make_pair(*s,intron));
2226 uniq(pstops_with_intron_plus);
2227 uniq(pstops_with_intron_minus);
2239TPstopIntron& pstops_with_intron = (algn.
Strand() ==
ePlus) ? pstops_with_intron_plus : pstops_with_intron_minus;
2240 if(pstops_with_intron.empty())
2247 ITERATE(TPstopIntron,
si, pstops_with_intron) {
2248 if(
si->second.GetLength() == 1) {
2249 if(
si->first == *s)
2252 for(
int i= 1;
i< (
int)algn.
Exons().size(); ++
i) {
2254 if(
si->second == intron &&
si->first == *s)
2267 ITERATE(TPstopIntron,
si, pstops_with_intron) {
2272 for(
int i= 0;
i< (
int)exons.size(); ++
i) {
2273 if(
Include(exons[
i].Limits(),
si->first.GetFrom())) {
2274 if(
si->second.GetLength() == 1) {
2275 if(
si->first.GetTo() <= exons[
i].GetTo())
2278 if(
i< (
int)exons.size()-1) {
2280 if(intron ==
si->second &&
si->first.GetTo() <= exons[
i+1].GetTo())
2305 double ms= GoodCDNAScore(algn);
2306RemovePoorCds(algn,
ms);
2326 intstrand = align.
Strand();
2327 if(!tgr_info.count(strand))
2333 autosecond = upper_bound(tinfo.begin(), tinfo.end(), align.
Limits().
GetTo(),
2335 if(
first== second)
2345 for(
auto& e : align.
Exons()) {
2346 autooverlap = (e.
Limits()&acds);
2347 for(
TSignedSeqPosk = overlap.GetFrom(); k <= overlap.GetTo(); ++k) {
2349 if(p >= 0 && p%3 == 0) {
2350 _ASSERT(p/3 < (
int)acds_map.size());
2358 boolparalogs =
false;
2360 for(
autoit =
first; it != second; ++it) {
2365 inttgr = it-tinfo.begin()+1;
2370 for(
const TIVec& tcds_map : it->m_cds_maps) {
2371 intlong_enough = 10;
2373 if(mci >= long_enough) {
2379 autonum = prev_accessions.
size();
2380 for(
auto&
id: it->m_ids)
2381prev_accessions.
insert(
id.second);
2382 if(num+it->m_ids.size() != prev_accessions.
size())
2400cerr <<
"Chimeric chain member "<< align.
ID() <<
" "<< (paralogs ?
"paralogs":
"unrelated") <<
" "<< flag << endl;
2402 if(mbr.
m_copy!=
nullptr) {
2403 for(
autop : *mbr.
m_copy)
2404p->m_marked_for_deletion =
true;
2413 size_tinitial_size = pointers.size();
2414 for(
size_t i= 0;
i< initial_size; ++
i) {
2439initial_size = pointers.size();
2440 for(
unsigned int i= 0;
i< initial_size; ++
i) {
2472 if(mbr.
m_copy->front()->m_align->Strand() == algn.
Strand()) {
2475}
else if((*mbr.
m_copy)[1]->m_cds_info->ReadingFrame() == cdsinfo.
ReadingFrame()) {
2490 if(indl->InDelEnd() > lim.
GetFrom() && indl->Loc() <= lim.
GetTo())
2491fs.push_back(*indl);
2503 booljflex =
false;
2529 if(!jflex && jlimits.
GetTo()-ai_max_cds.
GetFrom() >= 5)
2537 if(!jflex && ai_max_cds.
GetTo()-jlimits.
GetFrom() >= 5)
2560 if(
abs(j_from-i_from)%3 != 0)
2565 intiex = (
int)ai.
Exons().size();
2566 intjex = (
int)aj.
Exons().size();
2577 set<int>left_exon_ends, right_exon_ends;
2580 for(
int i= 1;
i< (
int)algn.
Exons().size(); ++
i) {
2581 if(algn.
Exons()[
i-1].m_ssplice && algn.
Exons()[
i].m_fsplice) {
2582left_exon_ends.
insert(algn.
Exons()[
i].GetFrom());
2583right_exon_ends.
insert(algn.
Exons()[
i-1].GetTo());
2593 if(ri != right_exon_ends.
end())
2597 if(li != left_exon_ends.
end())
2606 for(
int i= 0;
i< (
int)pointers.size(); ++
i) {
2618 stringssplice = ai.
Exons()[
i-1].m_ssplice_sig;
2619 stringfsplice = ai.
Exons()[
i].m_fsplice_sig;
2620 if(ssplice ==
"XX"|| fsplice ==
"XX")
2622 else if(ai.
Strand() ==
ePlus&& ((ssplice !=
"GT"&& ssplice !=
"GC") || fsplice !=
"AG"))
2624 else if(ai.
Strand() ==
eMinus&& (ssplice !=
"AG"|| (fsplice !=
"GT"&& fsplice !=
"GC")))
2648 if(indl->IntersectingWith(e->
GetFrom(), e->
GetTo()))
2654 if(pointers[jfirst]->m_align->Limits() != ai.
Limits())
2656 for(
intj = jfirst; j < (
int)pointers.size() && pointers[j]->m_align->Limits().GetFrom() <= ai.
Limits().
GetTo(); ++j) {
2658IncludeInContained(mi, mi);
2666 if(CanIncludeJinI(mi, mj))
2667IncludeInContained(mi, mj);
2697 #define NON_CDNA_INTRON_PENALTY 20 2722 else if(mj.
m_type==
eLeftUTR&& (!ai_left_complete || (!j_rflexible && (aj.
Limits()&ai_rf).GetLength() > 5)))
2734 else if(mj.
m_type==
eCDS&& (!aj_right_complete || (!i_lflexible && (ai.
Limits()&aj_rf).GetLength() > 5)))
2748 if(j_rflexible || i_lflexible)
2750 if((ai.
Limits() & aj.
Limits()).GetLength() < intersect_limit)
2761 intcds_overlap = 0;
2765 if(genome_overlap < 0)
2777 if(cds_overlap%3 != 0)
2784 for(
int i= 1;
i< (
int)ai.
Exons().size(); ++
i) {
2785 if(ai.
Exons()[
i-1].m_ssplice && ai.
Exons()[
i].m_fsplice) {
2787 if(
Include(ai_rf,intron) &&
Include(aj_rf,intron) && mrna_count[intron]+est_count[intron]+rnaseq_count[intron] == 0) {
2795delta_cds = mi.
m_cds-cds_overlap;
2798delta_splice_num = 0;
2799 if(delta_cds >= 0) {
2803 if(!j_rflexible && !i_lflexible)
2804 first= upper_bound(contained.begin(), contained.end(), &mj,
LeftOrder())-contained.begin();
2806not_sorted =
false;
2807contained.back()->m_accumulated_num = contained.back()->m_align->Weight();
2808contained.back()->m_accumulated_splice_num = contained.back()->m_splice_weight;
2809 for(
int i= (
int)contained.size()-2;
i>=
first; --
i) {
2810contained[
i]->m_accumulated_num = contained[
i]->m_align->Weight()+contained[
i+1]->m_accumulated_num;
2811contained[
i]->m_accumulated_splice_num = contained[
i]->m_splice_weight+contained[
i+1]->m_accumulated_splice_num;
2815delta_num = contained[
first]->m_accumulated_num;
2816delta_splice_num = contained[
first]->m_accumulated_splice_num;
2830 for(
autop : micontained) {
2844 for(
int i= 1;
i< (
int)ai.
Exons().size(); ++
i) {
2845 if(ai.
Exons()[
i-1].m_ssplice && ai.
Exons()[
i].m_fsplice) {
2847 if(
Include(ai_rf,intron) && mrna_count[intron]+est_count[intron]+rnaseq_count[intron] == 0) {
2869}
else if(align.TrustedGroup() != 0) {
2870tgr = align.TrustedGroup();
2874 _ASSERT(tgr == 0 || (tgr > 0 && align.Strand() ==
ePlus) || (tgr < 0 && align.Strand() ==
eMinus));
2880 TIVecright_ends(pointers.size());
2881 for(
intk = 0; k < (
int)pointers.size(); ++k) {
2882 auto& kalign = *pointers[k]->m_align;
2883 intrend = kalign.Limits().GetTo();
2885rend = kalign.Limits().GetFrom();
2886right_ends[k] = rend;
2892LRIinit(mi, micontained);
2893 boolnot_sorted =
true;
2896TIVec::iterator
lb= lower_bound(right_ends.begin(),right_ends.end(),ai.
Limits().
GetFrom()-2*flex_len);
2897TContained::iterator jfirst = pointers.begin();
2898 if(
lb!= right_ends.end())
2899jfirst = pointers.begin()+(
lb-right_ends.begin());
2900 for(TContained::iterator j = jfirst; j <
i; ++j) {
2904 if(aj.
Exons().back().m_fsplice_sig ==
"XX"|| ai.
Exons().front().m_ssplice_sig ==
"XX")
2912 doubledelta_splice_num;
2913 if(LRCanChainItoJ(delta_cds, delta_num, delta_splice_num, mi, mj, micontained, not_sorted)) {
2918 booltrust_compatible =
true;
2922trust_compatible =
false;
2924trust_compatible =
false;
2926 boolbetter_connection =
false;
2928better_connection = (newcds > mi.
m_left_cds);
2932better_connection =
true;
2935 if(better_connection) {
2936 if(trust_compatible) {
2956 intstrand = ai.
Strand();
2957 _ASSERT(tgr == 0 || (tgr > 0 && strand ==
ePlus) || (tgr < 0 && strand ==
eMinus));
2965 TIVecleft_ends(pointers.size());
2966 for(
intk = 0; k < (
int)pointers.size(); ++k) {
2967 auto& kalign = *pointers[k]->m_align;
2968 intlend = kalign.Limits().GetFrom();
2970lend = kalign.Limits().GetTo();
2971left_ends[k] = lend;
2996 boolnot_sorted =
true;
2999TIVec::iterator
lb= lower_bound(left_ends.begin(),left_ends.end(),ai.
Limits().
GetTo()+2*flex_len,greater<int>());
3000TContained::iterator jfirst = pointers.begin();
3001 if(
lb!= left_ends.end())
3002jfirst = pointers.begin()+(
lb-left_ends.begin());
3003 for(TContained::iterator j = jfirst; j <
i; ++j) {
3007 if(aj.
Exons().front().m_ssplice_sig ==
"XX"|| ai.
Exons().back().m_fsplice_sig ==
"XX")
3026 if(mj.
m_type==
eRightUTR&& (!ai_right_complete || (!j_lflexible && (aj.
Limits()&ai_rf).GetLength() > 5)))
3038 if(mj.
m_type==
eCDS&& (!aj_left_complete || (!i_rflexible && (ai.
Limits()&aj_rf).GetLength() > 5)))
3054 if(j_lflexible || i_rflexible)
3058 if(intersect < intersect_limit)
continue;
3069 intcds_overlap = 0;
3073 if(genome_overlap < 0)
3085 if(cds_overlap%3 != 0)
3092 for(
int i= 1;
i< (
int)ai.
Exons().size(); ++
i) {
3093 if(ai.
Exons()[
i-1].m_ssplice && ai.
Exons()[
i].m_fsplice) {
3095 if(
Include(ai_rf,intron) &&
Include(aj_rf,intron) && mrna_count[intron]+est_count[intron]+rnaseq_count[intron] == 0) {
3104 intdelta_cds = mi.
m_cds-cds_overlap;
3114 if(!j_lflexible && !i_rflexible)
3115 first= upper_bound(micontained.begin(),micontained.end(),&mj,
RightOrder())-micontained.begin();
3117not_sorted =
false;
3118micontained.back()->m_accumulated_num = micontained.back()->m_align->Weight();
3119micontained.back()->m_accumulated_splice_num = micontained.back()->m_splice_weight;
3120 for(
int i= (
int)micontained.size()-2;
i>=
first; --
i) {
3121micontained[
i]->m_accumulated_num = micontained[
i]->m_align->Weight()+micontained[
i+1]->m_accumulated_num;
3122micontained[
i]->m_accumulated_splice_num = micontained[
i]->m_splice_weight+micontained[
i+1]->m_accumulated_splice_num;
3126 doubledelta_num = micontained[
first]->m_accumulated_num;
3127 doubledelta_splice_num = micontained[
first]->m_accumulated_splice_num;
3132 booltrust_compatible =
true;
3136trust_compatible =
false;
3138trust_compatible =
false;
3140 boolbetter_connection =
false;
3146better_connection =
true;
3149 if(better_connection) {
3150 if(trust_compatible) {
3169 intstrand = ai.
Strand();
3170 _ASSERT(tgr == 0 || (tgr > 0 && strand ==
ePlus) || (tgr < 0 && strand ==
eMinus));
3181vector<const SChainMember*> mal;
3184mal.push_back(left);
3187mal.push_back(right);
3190 stringnote = to_string(mi.
m_align->
ID());
3191 ITERATE(vector<const SChainMember*>, imal, mal) {
3193 if((*imal)->m_excluded_readthrough)
3195note = note+
" "+star+to_string((*imal)->m_align->ID());
3201map<TSignedSeqRange,int>& mrna_count, map<TSignedSeqRange,int>& est_count, map<TSignedSeqRange,int>& rnaseq_count) {
3203 for(
int i= 1;
i< (
int)chain.
Exons().size() && good; ++
i) {
3204 if(chain.
Exons()[
i-1].m_ssplice && chain.
Exons()[
i].m_fsplice) {
3215map<TSignedSeqRange,int>& mrna_count, map<TSignedSeqRange,int>& est_count, map<TSignedSeqRange,int>& rnaseq_count) {
3218(*i)->m_marked_for_deletion = !
GoodSupportForIntrons(*(*i)->m_align, minscor, mrna_count, est_count, rnaseq_count);
3229 if(
a.Limits() !=
b.Limits())
3230 return a.Limits() <
b.Limits();
3235 returnaflex < bflex;
3237 return*orig_aligns[
a.ID()]->GetTargetId() < *orig_aligns[
b.ID()]->GetTargetId();
3243map<tuple<int, int>, TGeneModelList::iterator> special_aligns;
3245 for(TGeneModelList::iterator it = clust.begin(); it != clust.end(); ++it) {
3248special_aligns.emplace(make_tuple(status, it->Limits().GetTo()), it);
3252special_aligns.emplace(make_tuple(status, it->Limits().GetFrom()), it);
3258 for(TGeneModelList::iterator it = clust.begin(); it != clust.end(); ++it) {
3269 if(it->Strand() ==
ePlus) {
3270pos = it->Limits().GetFrom();
3274pos = it->Limits().GetTo();
3279galign.
Status() |= status;
3280clust.push_front(galign);
3281 autorslt = special_aligns.emplace(make_tuple(status, pos), clust.begin());
3283 autoialign = rslt.first->second;
3284ialign->SetWeight(ialign->Weight()+galign.
Weight());
3296 if(it->Strand() ==
eMinus) {
3297pos = it->Limits().GetFrom();
3301pos = it->Limits().GetTo();
3306galign.
Status() |= status;
3307clust.push_front(galign);
3308 autorslt = special_aligns.emplace(make_tuple(status, pos), clust.begin());
3310 autoialign = rslt.first->second;
3311ialign->SetWeight(ialign->Weight()+galign.
Weight());
3319 for(
auto& sa : special_aligns) {
3320 autoialign = sa.second;
3321 doublemin_pos_weight = ((ialign->Status()&
CGeneModel::eCap) ? min_cap_weight : min_polya_weight);
3322 if(ialign->Limits().GetFrom() < 0 || ialign->Limits().GetTo() >= contig_len || ialign->Weight() < min_pos_weight)
3323clust.erase(ialign);
3329confirmed_ends.clear();
3330all_frameshifts.clear();
3333 if(use_confirmed_ends) {
3335 autorslt = confirmed_ends.emplace(align.
Exons().front().GetTo(), align.
Exons().front().GetFrom());
3337rslt.first->second =
min(rslt.first->second, align.
Exons().front().GetFrom());
3340 autorslt = confirmed_ends.emplace(align.
Exons().back().GetFrom(), align.
Exons().back().GetTo());
3342rslt.first->second =
max(rslt.first->second, align.
Exons().back().GetTo());
3345all_frameshifts.insert(all_frameshifts.end(), align.
FrameShifts().begin(), align.
FrameShifts().end());
3346 for(
int i= 1;
i< (
int)align.
Exons().size(); ++
i) {
3347 if(align.
Exons()[
i-1].m_ssplice && align.
Exons()[
i].m_fsplice) {
3352oriented_introns_plus.insert(intron);
3354oriented_introns_minus.insert(intron);
3358mrna_count[intron] += align.
Weight();
3360est_count[intron] += align.
Weight();
3362rnaseq_count[intron] += align.
Weight();
3367has_rnaseq = !rnaseq_count.empty();
3368 sort(all_frameshifts.begin(),all_frameshifts.end());
3369 if(!all_frameshifts.empty())
3370 uniq(all_frameshifts);
3381 for(
int i= 1;
i< (
int)align.
Exons().size(); ++
i) {
3382 if(align.
Exons()[
i-1].m_ssplice && align.
Exons()[
i].m_fsplice) {
3384 if(oriented_introns_plus.find(intron) != oriented_introns_plus.end())
3386 if(oriented_introns_minus.find(intron) != oriented_introns_minus.end())
3390 if(pluses > 0 && minuses == 0) {
3394}
else if(minuses > 0 && pluses == 0) {
3411CreateFlexibleAligns(clust);
3416align.SetTrustedCds(align.TrustedCds()&align.Limits());
3418 CChainMembersallpointers(clust, orig_aligns, unmodified_aligns);
3420DuplicateNotOriented(allpointers, clust);
3421ReplicatePStops(allpointers);
3422ScoreCdnas(allpointers);
3423Duplicate5pendsAndShortCDSes(allpointers);
3424DuplicateUTRs(allpointers);
3425CalculateSpliceWeights(allpointers);
3429FindOverlappingWithTrusted(allpointers);
3433FindContainedAlignments(allpointers);
3439 _ASSERT((*ip)->m_orig_align);
3440 if(!(*ip)->m_not_for_chaining)
3441pointers.push_back(*
ip);
3447coding_pointers.push_back(*
i);
3450LeftRight(coding_pointers);
3451RightLeft(coding_pointers);
3455array<map<TSignedSeqPos,TSignedSeqRange>, 2> coding_right_splices;
3456array<map<TSignedSeqPos,TSignedSeqRange>, 2> coding_left_splices;
3457array<set<TSignedSeqRange>, 2> coding_introns;
3475 CChainchain(mi,
false);
3478 m_gnomon->GetScore(chain,
false,
false,
true);
3485(cdslen < minscor.m_minlen || (chain.
Score() < 2*minscor.m_min && cdslen < 2*minscor.m_cds_len)))
3489 for(
int i= 1;
i< (
int)chain.
Exons().size(); ++
i) {
3491 boolcoding_donor =
Include(real_cds, donor);
3493 boolcoding_acceptor =
Include(real_cds, acceptor);
3495coding_right_splices[chain.
Strand()][donor].CombineWith(real_cds);
3496 if(coding_acceptor)
3497coding_left_splices[chain.
Strand()][acceptor].CombineWith(real_cds);
3498 if(coding_donor && coding_acceptor)
3499coding_introns[chain.
Strand()].emplace(donor, acceptor);
3519 for(
auto ip: pointers) {
3524 intstrand =
ip->m_align->Strand();
3525 auto& crs = coding_right_splices[strand];
3526 auto& cls = coding_left_splices[strand];
3527 for(
int i= 1;
i< (
int)
ip->m_align->Exons().size(); ++
i) {
3529 autorslt = crs.find(rsplice);
3531 ip->m_marked_for_deletion =
true;
3535rslt = cls.find(lsplice);
3537 ip->m_marked_for_deletion =
true;
3542 if(!
ip->m_marked_for_deletion) {
3543 auto& cdi = coding_introns[strand];
3544 for(
auto& exon :
ip->m_align->Exons()) {
3545 if(
Include(cds, exon.Limits()))
3547 for(
autointronp = cdi.upper_bound(exon.Limits()); intronp != cdi.end() && intronp->GetFrom() < exon.GetTo(); ++intronp) {
3548 if(
Include(exon.Limits(), *intronp) && !
Include(cds, *intronp)) {
3549 ip->m_marked_for_deletion =
true;
3553 if(
ip->m_marked_for_deletion)
3563set<TSignedSeqRange> introns;
3564set<TSignedSeqRange> est_introns;
3565 for(
autop : pointers) {
3568 for(
unsigned i= 1;
i< exons.size(); ++
i) {
3569 if(!exons[
i-1].m_ssplice || !exons[
i].m_fsplice)
3571introns.emplace(exons[
i-1].GetTo(), exons[
i].GetFrom());
3573est_introns.emplace(exons[
i-1].GetTo(), exons[
i].GetFrom());
3576 boolenough_est = !introns.empty() && est_introns.size() > longreadsthreshold/100*introns.size();
3578cerr <<
"Introns: "<< introns.size() <<
" "<< est_introns.size() <<
" "<< enough_est << endl;
3580 intold_oep = intersect_limit;
3582intersect_limit = 10000;
3584if(p->m_align->Status()&(CGeneModel::eLeftFlexible|CGeneModel::eRightFlexible))
3586if(p->m_align->Exons().size() != 1 || p->m_type == eCDS)
3588if(p->m_copy != nullptr) {
3589for(SChainMember* cp : *p->m_copy) {
3590if(cp->m_cds_info->MaxCdsLimits() == TSignedSeqRange::GetWhole())
3595 return true; }), pointers.end());
3598LeftRight(pointers);
3599RightLeft(pointers);
3621 CChainchain(mi,
false);
3628RemovePoorCds(chain, GoodCDNAScore(chain,
true));
3641chain.
ClipToCap(min_cap_blob, max_dist, min_flank_exon, secondary_peak,
false);
3642chain.
ClipToPolyA(contig, min_polya_blob, max_dist, min_flank_exon, secondary_peak, tertiary_peak, tertiary_peak_coverage,
false);
3647 m_gnomon->GetScore(chain, !no5pextension);
3650RemovePoorCds(chain, GoodCDNAScore(chain));
3660tmp_chains.push_back(chain);
3669CreateChainsForPartialProteins(tmp_chains, pointers, unma_aligns, unma_members);
3679 for(
auto i: allpointers) {
3684 if(mi.
m_copy!=
nullptr) {
3685 for(
autoj : *mi.
m_copy) {
3687 for(
autojc : *j->m_contained) {
3696pointers.erase(
std::remove_if(pointers.begin(),pointers.end(),[](
SChainMember* p){ return p->m_type == eRightUTR; }), pointers.end());
3701return p->m_align->Exons().size() == 1 && !(p->m_align->Status()&(CGeneModel::eLeftFlexible|CGeneModel::eRightFlexible)); }), pointers.end());
3704array<set<TSignedSeqRange>, 2> non_coding_introns;
3705 for(
autop : pointers) {
3707 for(
unsigned i= 1;
i< exons.size(); ++
i) {
3709non_coding_introns[p->
m_align->
Strand()].insert(intron);
3712 for(
autop : pointers) {
3714 auto& ncdi = non_coding_introns[strand];
3716 for(
autointronp = ncdi.upper_bound(exon.Limits()); intronp != ncdi.end() && intronp->GetFrom() < exon.GetTo(); ++intronp) {
3717 if(
Include(exon.Limits(), *intronp)) {
3729LeftRight(pointers);
3730RightLeft(pointers);
3751 CChainchain(mi,
false);
3759chain.
ClipToCap(min_cap_blob, max_dist, min_flank_exon, secondary_peak,
false);
3760chain.
ClipToPolyA(contig, min_polya_blob, max_dist, min_flank_exon, secondary_peak, tertiary_peak, tertiary_peak_coverage,
false);
3770tmp_chains.push_back(chain);
3778chain.
SetID(m_idnext);
3780m_idnext += m_idinc;
3783CombineCompatibleChains(tmp_chains);
3784SetFlagsForChains(tmp_chains);
3786intersect_limit = old_oep;
3788list<CGene> genes = FindGenes(tmp_chains);
3790 if(genes.size() > 1) {
3791TrimAlignmentsIncludedInDifferentGenes(genes);
3792CombineCompatibleChains(tmp_chains);
3793SetFlagsForChains(tmp_chains);
3796 if(genes.size() > 1)
3797FindGenes(tmp_chains);
3801it->RestoreTrimmedEnds(trim);
3802chains.push_back(*it);
3807 enum{ eFirstPeak = 1, eSecondPeak = 2, eThirdPeak = 4, eAs = 8};
3808map<tuple<int, int, int>,
int> cap_polya_info;
3810 for(
auto& chain : tmp_chains) {
3831 for(
auto&
info: cap_polya_info) {
3833 charstrand = get<1>(
info.first) ==
ePlus?
'+':
'-';
3835cerr <<
m_contig_acc<<
' '<< determinant <<
' '<< strand <<
' '<< pos <<
' ';
3836 if(
info.second&eFirstPeak)
3837cerr <<
":FirstPeak";
3838 if(
info.second&eSecondPeak)
3839cerr <<
":SecondPeak";
3840 if(
info.second&eThirdPeak)
3841cerr <<
":ThirdPeak";
3842 if(
info.second&eAs)
3858 returnap->
ID() < bp->
ID();
3865 TIVecright_ends(pointers.size());
3866vector<SChainMember> no_gap_members(pointers.size());
3867 for(
intk = 0; k < (
int)pointers.size(); ++k) {
3870no_gap_members[k] = mi;
3875 intfirst_member = (
int)pointers.size()-1;
3877 for(
int i= (
int)pointers.size()-1;
i>= 0; --
i) {
3879 if(limi.
GetTo() >= leftpos) {
3887 intlast_member = 0;
3889 for(
int i= 0;
i< (
int)pointers.size(); ++
i) {
3891 if(
Include(limi,rightpos)) {
3893rightpos =
max(rightpos,limi.
GetTo());
3897 intfully_connected_right = 0;
3899 for(
int i= first_member;
i<= last_member; ++
i) {
3904LRIinit(mi, micontained);
3910 intpart_to_connect = (
int)parts.size()-1;
3911 while(part_to_connect >= 0 && ai.
Limits().
GetFrom() <= parts[part_to_connect]->Limits().GetFrom())
3914 if(fully_connected_right > 0 && ai.
Limits().
GetFrom() > fully_connected_right)
3917 boolnot_sorted =
true;
3919 boolcompatible_with_included_parts =
true;
3920 intlast_included_part = -1;
3921 boolincludes_first_part =
false;
3922 for(
intp = part_to_connect+1; p < (
int)parts.size(); ++p) {
3929 boolsamestop = (parts[p]->GetCdsInfo().HasStop() == mi.
m_cds_info->
HasStop() && (!parts[p]->GetCdsInfo().HasStop() || parts[p]->GetCdsInfo().Stop() == mi.
m_cds_info->
Stop()));
3931 if(compatible && samestop && samefshifts) {
3932last_included_part = p;
3934includes_first_part =
true;
3936compatible_with_included_parts =
false;
3942compatible_with_included_parts =
false;
3950 if(!compatible_with_included_parts)
3953 _ASSERT(part_to_connect < 0 || part_to_connect == (
int)parts.size()-1 || mi.
m_type==
eCDS);
3955 if(includes_first_part) {
3960TIVec::iterator
lb= lower_bound(right_ends.begin(),right_ends.end(),(part_to_connect >= 0 ? parts[part_to_connect]->Limits().GetTo() : ai.
Limits().
GetFrom()));
3962 if(
lb!= right_ends.end())
3963jfirst = (
int)(
lb-right_ends.begin());
3965 for(
intj = jfirst; j <
i; ++j) {
3981 #define PGAP_PENALTY 120 3989 if(better_connection) {
3990 if(trust_compatible) {
4003 doubledelta_splice_num;
4004 if(LRCanChainItoJ(delta_cds, delta_num, delta_splice_num, mi, mj, micontained, not_sorted)) {
4009 booltrust_compatible =
true;
4013trust_compatible =
false;
4015trust_compatible =
false;
4017 boolbetter_connection =
false;
4019better_connection =
true;
4021better_connection = (newcds > mi.
m_left_cds);
4025better_connection =
true;
4028 if(better_connection) {
4029 if(trust_compatible) {
4047better_connection =
false;
4049better_connection =
true;
4050}
else if(newcds != mi_no_gap.
m_left_cds) {
4051better_connection = (newcds > mi_no_gap.
m_left_cds);
4054}
else if(newnum > mi_no_gap.
m_left_num) {
4055better_connection =
true;
4058 if(better_connection) {
4059 if(trust_compatible) {
4099 if(best_right ==
nullptr)
4102 _ASSERT(std::less<SChainMember*>()(best_right, &no_gap_members.front()) || std::less<SChainMember*>()(&no_gap_members.back(), best_right));
4104 if(!std::less<SChainMember*>()(mp->m_left_member, &no_gap_members.front()) && !std::less<SChainMember*>()(&no_gap_members.back(), mp->m_left_member)) {
4105 SChainMember* p = pointers[mp->m_left_member-&no_gap_members.front()];
4119 bool operator()(
constvector<CGeneModel*>* ap,
constvector<CGeneModel*>* bp)
4121 constvector<CGeneModel*>& partsa = *ap;
4122 constvector<CGeneModel*>& partsb = *bp;
4125 ITERATE(vector<CGeneModel*>, k, partsa)
4126align_lena += (*k)->AlignLen();
4129 ITERATE(vector<CGeneModel*>, k, partsb)
4130align_lenb += (*k)->AlignLen();
4132 if(align_lena != align_lenb) {
4133 returnalign_lena > align_lenb;
4135 return*orig_aligns[partsa.front()->ID()]->GetTargetId() < *orig_aligns[partsb.front()->ID()]->GetTargetId();
4144 typedefmap<Int8, vector<CGeneModel*> > TIdChainMembermap;
4145TIdChainMembermap protein_parts;
4146 for(
intk = 0; k < (
int)pointers_all.size(); ++k) {
4154vector<vector<CGeneModel*>*> gapped_sorted_protein_parts;
4156vector<CGeneModel*>& parts =
ip->second;
4157 if(parts.size() > 1) {
4159gapped_sorted_protein_parts.push_back(&parts);
4162 sort(gapped_sorted_protein_parts.begin(),gapped_sorted_protein_parts.end(),
AlignLenOrder(orig_aligns));
4165vector<CGeneModel*>& parts = **
ip;
4166 Int8 id= parts.front()->ID();
4168 ITERATE(vector<CGeneModel*>, k, parts) {
4177 boolconnected =
false;
4179 if(k->Continuous() && palign.
Strand() == k->Strand() && palign.
IsSubAlignOf(*k)) {
4182k->AddComment(
"Was connected "+orig_aligns[palign.
ID()]->TargetAccession());
4191 for(
intk = 0; k < (
int)pointers_all.size(); ++k) {
4208 boolcompatible =
true;
4210 if((mip->
m_align->
ID() !=
id&&
Include(partp->Limits(),
limits)) || (partp->Limits().IntersectingWith(
limits) && !partp->HasCompatibleOverlap(*mip->
m_align))) {
4211compatible =
false;
4219pointers.push_back(mip);
4222 SChainMember* best_right = FindOptimalChainForProtein(pointers, parts, palign);
4224 if(best_right ==
nullptr)
4229 CChainchain(*best_right,
false);
4232 if(unmodified_aligns.count(
id)) {
4234vector<TSignedSeqRange> new_holes;
4235vector<TSignedSeqRange> remaining_holes;
4236 for(
intk = 1; k < (
int)chain.
Exons().size(); ++k) {
4241remaining_holes.push_back(h);
4242 for(
intpiece_begin = 0; piece_begin < (
int)unma.
Exons().size(); ++piece_begin) {
4243 intpiece_end = piece_begin;
4244 for( ; piece_end < (
int)unma.
Exons().size() && unma.
Exons()[piece_end].m_ssplice; ++piece_end);
4245 if(unma.
Exons()[piece_begin].GetFrom() < h.
GetFrom() && unma.
Exons()[piece_end].GetTo() > h.
GetTo()) {
4246new_holes.push_back(h);
4249piece_begin = piece_end;
4254 if(!new_holes.empty()) {
4261vector<TSignedSeqRange> existed_holes;
4262 for(
intk = 1; k < (
int)unma.
Exons().size(); ++k) {
4269 for(
intk = 1; k < (
int)palign.
Exons().size(); ++k) {
4274 boolconnected =
true;
4275 ITERATE(vector<TSignedSeqRange>, h, remaining_holes) {
4283 boolexisted =
false;
4284 ITERATE(vector<TSignedSeqRange>, h, existed_holes) {
4291 if(connected || existed) {
4302unmacl.push_back(unma);
4305vector<CGeneModel*> unmaparts;
4308unmaparts.push_back(&(*im));
4311 CChainMembersunmapointers(unmacl, orig_aligns, unmodified_aligns);
4312Duplicate5pendsAndShortCDSes(unmapointers);
4316IncludeInContained(mi, mi);
4319 if(CanIncludeJinI(mi, mj))
4320IncludeInContained(mi, mj);
4325 _ASSERT((*ip)->m_orig_align);
4326(*ip)->m_mem_id = -(*ip)->m_mem_id;
4327pointers.push_back(*
ip);
4331best_right = FindOptimalChainForProtein(pointers, unmaparts, unma);
4334 boolpresent =
false;
4336present =
ip== &mj;
4339 if(CanIncludeJinI(mi, mj)) {
4340IncludeInContained(mi, mj);
4346chain =
CChain(*best_right,
false);
4348unma_aligns.splice(unma_aligns.end(), unmacl);
4370chain.
ClipToCap(min_cap_blob, max_dist, min_flank_exon, secondary_peak,
false);
4371chain.
ClipToPolyA(contig, min_polya_blob, max_dist, min_flank_exon, secondary_peak, tertiary_peak, tertiary_peak_coverage,
false);
4375 m_gnomon->GetScore(chain, !no5pextension);
4382chain.
AddComment(
"Connected "+orig_aligns[palign.
ID()]->TargetAccession());
4385chains.push_back(chain);
4399 int len= right-left+1;
4401vector<int> prot_cov[2][3];
4402prot_cov[0][0].resize(
len,0);
4403prot_cov[0][1].resize(
len,0);
4404prot_cov[0][2].resize(
len,0);
4405prot_cov[1][0].resize(
len,0);
4406prot_cov[1][1].resize(
len,0);
4407prot_cov[1][2].resize(
len,0);
4413 for(
int i= 0;
i< (
int)align.
Exons().size(); ++
i) {
4416 for(
intj = rf.
GetFrom(); j <= rf.
GetTo(); ++j) {
4419++prot_cov[align.
Strand()][
abs(cdstr-jtr)%3][j-left];
4442 boolallcdnaintrons =
true;
4444 for(
int i= 1;
i< (
int)chain.
Exons().size() && allcdnaintrons; ++
i) {
4445 if(chain.
Exons()[
i-1].m_ssplice_sig !=
"XX"&& chain.
Exons()[
i].m_fsplice_sig !=
"XX") {
4447allcdnaintrons = (mrna_count[intron]+est_count[intron]+rnaseq_count[intron] > 0);
4451 if(allcdnaintrons && num >0)
4461 intrrf_from_proteins = 0;
4464 for(
int i= 0;
i< (
int)chain.
Exons().size(); ++
i) {
4467 for(
intj = rf.
GetFrom(); j <= rf.
GetTo(); ++j) {
4468 if(j < left || j > right)
4472 intframe =
abs(cdstr-jtr)%3;
4473 if(jtr >= 0 && prot_cov[chain.
Strand()][frame][j-left] > 0) {
4475lrf_from_proteins =
min(lrf_from_proteins,j);
4477rrf_from_proteins =
max(rrf_from_proteins,j);
4501 for(TChainList::iterator itt = chains.begin(); itt != chains.end(); ++itt) {
4505 for(TChainList::iterator jt = chains.begin(); jt != chains.end();) {
4506TChainList::iterator jtt = jt++;
4510 if(itt != jtt && itt->Strand() == jtt->Strand() && jtt->IsSubAlignOf(*itt) && itt->ReadingFrame().Empty() == jtt->ReadingFrame().Empty()) {
4511 if(itt->ReadingFrame().NotEmpty()) {
4516 boolsame_frame = (itt->FShiftedLen(
a,
b,
false)-1)%3 == 0;
4524 if(!
Include(jtt->MaxCdsLimits(), itt->MaxCdsLimits()))
4534 boolsame_stops =
true;
4536 if(
Include(jtt->Limits(),*istp) && find(jstops.begin(), jstops.end(), *istp) == jstops.end()) {
4537same_stops =
false;
4557support.insert(*
i);
4558 if((*i)->m_copy != 0)
4559support.insert((*i)->m_copy->begin(),(*i)->m_copy->end());
4571 if(support.insert(*i).second && (
Include(jlimits, il) || itt->HasCompatibleOverlap(*(*i)->m_align, 1))) {
4572itt->m_was_combined =
true;
4573itt->m_members.push_back(*
i);
4574 if((*i)->m_copy != 0)
4575support.insert((*i)->m_copy->begin(),(*i)->m_copy->end());
4578 if(itt->m_was_combined) {
4580itt->CalculateSupportAndWeightFromMembers();
4584jtt->AddComment(
"Combined with "+to_string(itt->ID()));
4598 returnminscor.m_min;
4632 if(algn.
Score() < minscor)
4636 #define SCAN_WINDOW 49 4650vector<double> coverage_raw(mrna_len+
SCAN_WINDOW);
4665m_coverage.resize(mrna_len);
4669 for(
int i= 0;
i< mrna_len; ++
i) {
4670m_coverage[
i] = cov;
4676 CChain::CChain(
SChainMember& mbr,
boolfull_support) : m_coverage_drop_left(-1), m_coverage_drop_right(-1), m_coverage_bump_left(-1), m_coverage_bump_right(-1), m_core_coverage(0), m_splice_weight(0), m_cap_peaks(3, -1), m_polya_peaks(3, -1), m_was_combined(
false) {
4697 intnum = other_exons.size();
4702 first= other_exons.front().GetTo() >= exons.back().GetTo() ? 0 : 1;
4704 first= std::lower_bound(other_exons.begin(), other_exons.end(), exons.back().GetTo(), [](
const CModelExon& e,
TSignedSeqPos a) { return e.GetTo() < a; })-other_exons.begin();
4705exons.back().Extend(other_exons[
first]);
4706exons.insert(exons.end(), other_exons.begin()+
first+1, other_exons.end());
4720 intnum = other_exons.size();
4722 if(other_exons.back().GetTo() < exons.front().GetFrom()) {
4723exons.insert(exons.begin(), other_exons.begin(), other_exons.end());
4725exons[num].m_fsplice =
false;
4726exons[num].m_fsplice_sig.clear();
4727exons[num-1].m_ssplice =
false;
4728exons[num-1].m_ssplice_sig.clear();
4732 if(cds.
GetFrom() > exons[num].GetFrom())
4733exons[num].Limits().SetFrom(cds.
GetFrom());
4736 if(other_cds.
GetTo() < exons[num-1].GetTo())
4737exons[num-1].Limits().SetTo(other_cds.
GetTo());
4743 first= other_exons.back().GetFrom() <= exons.front().GetFrom() ? 1 : 0;
4745 first= std::lower_bound(other_exons.begin(), other_exons.end(), exons.front().GetFrom(), [](
const CModelExon& e,
TSignedSeqPos a) { return e.GetTo() < a; })-other_exons.begin();
4746exons.front().Limits().SetFrom(other_exons[
first].GetFrom());
4747 if(other_exons[
first].m_fsplice) {
4748exons.front().m_fsplice =
true;
4749exons.front().m_fsplice_sig = other_exons[
first].m_fsplice_sig;
4751exons.insert(exons.begin(), other_exons.begin(), other_exons.begin()+
first);
4764 m_exons.assign(exons.begin(), exons.end());
4767 m_exons.front().m_fsplice =
false;
4768 m_exons.front().m_fsplice_sig.clear();
4769 m_exons.back().m_ssplice =
false;
4770 m_exons.back().m_ssplice_sig.clear();
4783 CChain::CChain(
SChainMember& mbr,
CGeneModel* gapped_helper,
boolkeep_all_evidence,
booladdallsupport) : m_coverage_drop_left(-1), m_coverage_drop_right(-1), m_coverage_bump_left(-1), m_coverage_bump_right(-1), m_core_coverage(0), m_splice_weight(0), m_cap_peaks(3, -1), m_polya_peaks(3, -1), m_was_combined(
false)
4789list<CGeneModel> extened_parts;
4790vector<CGeneModel*> extened_parts_and_gapped;
4791 if(gapped_helper != 0) {
4792extened_parts_and_gapped.push_back(gapped_helper);
4810extened_parts.push_back(align);
4811 _ASSERT(extened_parts.back().Continuous());
4812extened_parts_and_gapped.push_back(&extened_parts.back());
4814extened_parts.back().Extend(align,
false);
4815 _ASSERT(extened_parts.back().Continuous());
4818 if(left < extened_parts.front().Limits().GetFrom())
4819extened_parts.front().ExtendLeft(extened_parts.front().Limits().GetFrom()-left);
4820 if(right > extened_parts.back().Limits().GetTo())
4821extened_parts.back().ExtendRight(right-extened_parts.back().Limits().GetTo());
4824 EStrandstrand = extened_parts_and_gapped.front()->Strand();
4827 sort(extened_parts_and_gapped.begin(),extened_parts_and_gapped.end(),
AlignSeqOrder());
4828 ITERATE(vector<CGeneModel*>, it, extened_parts_and_gapped) {
4851vector<double> coverage_raw(mrna_len+
SCAN_WINDOW);
4870 for(
int i= 0;
i< mrna_len; ++
i) {
4880 if(!align->
TrustedProt().empty() || (!align->
TrustedmRNA().empty() && (*i)->m_cds_info->ProtReadingFrame().NotEmpty())) {
4882 if(align->
AlignLen() > 0.5*(*i)->m_orig_align->TargetLen())
4892map<Int8,int> exonnum;
4897exonnum[align.
ID()] += align.
Exons().size();
4901 if(it->second >= (
int)orig_aligns[it->first]->Exons().size()) {
4921 bool NewIsBetter(
intnew_count,
intnew_not_wanted_count,
intnew_matches_count,
intnew_mem_id)
const{
4929 returnnew_mem_id < mem_id;
4939 intamem_id =
a.m_member !=
nullptr?
a.m_member->m_mem_id : 0;
4940 returnamem_id < mem_id;
4975 if(ai->
Ident() > 0.)
4976matches = ai->
Ident()*matches+0.5;
4977 boolnot_wanted =
false;
4983alimits.
SetTo(
min(alimits.
GetTo(), exonp->Limits().GetTo()));
5007linkers.push_back(sl);
5012set<TSignedSeqRange> chain_introns;
5013 for(
int i= 1;
i< (
int)
Exons().size(); ++
i) {
5014 if(
Exons()[
i-1].m_ssplice &&
Exons()[
i].m_fsplice)
5018 for(
SLinker& sl : linkers) {
5023 boolall_introns_included =
true;
5024 for(
int i= 1; all_introns_included &&
i< (
int)unma.
Exons().size(); ++
i) {
5025 if(unma.
Exons()[
i-1].m_ssplice && unma.
Exons()[
i].m_fsplice)
5028 if(!all_introns_included) {
5029sl.m_not_wanted =
true;
5034sl.m_not_wanted =
true;
5037 boolall_introns_included =
true;
5038 for(
int i= 1; all_introns_included &&
i< (
int)orig_align.
Exons().size(); ++
i) {
5039 if(orig_align.
Exons()[
i-1].m_ssplice && orig_align.
Exons()[
i].m_fsplice)
5040all_introns_included = chain_introns.count(
TSignedSeqRange(orig_align.
Exons()[
i-1].GetTo(),orig_align.
Exons()[
i].GetFrom()));
5042 if(!all_introns_included) {
5043sl.m_not_wanted =
true;
5058linkers.push_back(sl);
5065 for(
int i= 1;
i< (
int)
Exons().size(); ++
i) {
5066 if(!
Exons()[
i-1].m_ssplice || !
Exons()[
i].m_fsplice) {
5070linkers.push_back(sl);
5077multimap<TSignedSeqPos, SLinker*> right_ends;
5078 for(
auto& linker : linkers)
5079right_ends.emplace(linker.m_range.GetTo(), &linker);
5081 for(
autoit = right_ends.begin(); it != right_ends.end(); ++it) {
5089}
else if(it != right_ends.begin()) {
5093 booldivided_pstop =
false;
5105new_not_wanted_count += sli.
m_value;
5108 if(!sli.
m_connected|| sli.
NewIsBetter(new_count, new_not_wanted_count, new_matches_count, new_mem_id)) {
5116 if(jt == right_ends.begin())
5123 for(
int i= 0;
i< (
int)linkers.size(); ++
i) {
5134 for(
SLinker*
l= best_right;
l!= 0;
l=
l->m_left) {
5136sp_core.
insert(
l->m_member->m_align->ID());
5140 if(!keep_all_evidence) {
5141 for(
int i= 0;
i< (
int)linkers.size(); ++
i) {
5162 if(!sp_not_wanted.count(
id)) {
5166 if(sp.
insert(
id).second) {
5175protreadingframe &= readingframe;
5187 for(
intia = 0; ia < (
int)
m_members.size(); ++ia) {
5190 a.Exons().size() > 1 &&
Exons().front().Limits().GetFrom() ==
a.Limits().GetFrom()) {
5198 for(
intia = 0; ia < (
int)
m_members.size(); ++ia) {
5201 a.Exons().size() > 1 &&
Exons().back().Limits().GetTo() ==
a.Limits().GetTo()) {
5213 boolfound_length_match =
false;
5221map<string, pair<bool,bool> >::iterator iter = prot_complet.find(accession);
5222 _ASSERT(iter != prot_complet.end());
5223 if(iter == prot_complet.end() || !iter->second.first || !iter->second.second)
5227found_length_match =
true;
5232 if(!found_length_match) {
5249 booltrusted =
false;
5261 if(
a< 0 ||
b< 0 ||
abs(
a-
b) != 2)
5271list<TSignedSeqRange> align_introns;
5272 for(
int i= 1;
i< (
int)align.
Exons().size(); ++
i) {
5275align_introns.push_back(intron);
5278list<TSignedSeqRange> introns;
5281 for(
int i= 1;
i< (
int)
Exons().size(); ++
i) {
5283 if(
Include(start,intron)) {
5284introns.push_back(intron);
5286 if(!
Exons()[
i-1].m_ssplice || !
Exons()[
i].m_fsplice)
5291 if(
len!=3 || hole || align_introns != introns)
5296 boolfound =
false;
5297 for(
int i= 0;
i< (
int)
Exons().size() && !found; ++
i) {
5299 if(
Exons()[
i].Limits().GetTo() > start.
GetTo()) {
5300rf = start.
GetTo()+1;
5302}
else if(
i!= (
int)
Exons().size()-1) {
5303rf =
Exons()[
i+1].Limits().GetFrom();
5316 boolfound =
false;
5317 for(
int i= 0;
i< (
int)
Exons().size() && !found; ++
i) {
5319 if(
Exons()[
i].Limits().GetFrom() < start.
GetFrom()) {
5322}
else if(
i!= 0) {
5323rf =
Exons()[
i-1].Limits().GetTo();
5338 if(conf_start.
Empty()) {
5382reading_frame.
SetTo(rf);
5394cds.
SetStop(stop,confirmed_stop);
5396 if(
Include(reading_frame, *s))
5402 for(
int i= 1;
i< (
int)
Exons().size(); ++
i) {
5403 if(!
Exons()[
i-1].m_ssplice || !
Exons()[
i].m_fsplice) {
5405 if(
Precede(hole,reading_frame)) {
5407}
else if(
Precede(reading_frame,hole)) {
5413 if(new_lim !=
Limits())
5417gnomon.
GetScore(*
this,
false, trusted);
5455 autoai = (*it)->m_align;
5457 if(
limits.IntersectingWith(alimits))
5458new_members.push_back(*it);
5474 if(limits_on_mrna.
GetFrom() > 0)
5481 boolchanged =
false;
5487 if(cds.
PStop()) {
5489 for(
auto& pstop : cds.
PStops()) {
5491pstops.push_back(pstop);
5493 if(pstops.size() != cds.
PStops().size()) {
5495 for(
auto& pstop : pstops)
5510 if(recalulate_support)
5518 autoold_limits =
Limits();
5519 autonew_limits = old_limits;
5520 boolleft_confirmed =
false;
5521 boolright_confirmed =
false;
5524 if(rslt != confirmed_ends.
end() && rslt->second <
Exons().
front().GetTo()) {
5525left_confirmed =
true;
5526new_limits.SetFrom(rslt->second);
5528rslt = confirmed_ends.
find(
Exons().back().GetFrom());
5529 if(rslt != confirmed_ends.
end() && rslt->second >
Exons().back().GetFrom()) {
5530right_confirmed =
true;
5531new_limits.SetTo(rslt->second);
5534 if(!left_confirmed && !right_confirmed)
5546 if(new_limits.GetFrom() < old_limits.GetFrom()) {
5547 int delta= old_limits.GetFrom()-new_limits.GetFrom();
5552 if(new_limits.GetTo() > old_limits.GetTo()) {
5553 int delta= new_limits.GetTo()-old_limits.GetTo();
5564 if(
i->Loc() > new_limits.GetFrom() &&
i->InDelEnd() < new_limits.GetTo())
5578 if(
Limits() != new_limits)
5586 if(left_confirmed) {
5588 if(new_limits.GetFrom() < old_limits.GetFrom())
5590 else if(new_limits.GetFrom() > old_limits.GetFrom())
5593 if(right_confirmed) {
5595 if(new_limits.GetTo() > old_limits.GetTo())
5596 AddComment(
"Extended to confirmed right");
5597 else if(new_limits.GetTo() < old_limits.GetTo())
5604 if(!
Include(new_limits, cds_info.
Cds()) || (left_confirmed && !left_complete) || (right_confirmed && !right_complete)) {
5611 for(
int i= cds_limits_t.GetFrom();
i<= cds_limits_t.GetTo(); ++
i) {
5616 for(
int i= cds_limits_t.GetTo();
i>= cds_limits_t.GetFrom(); --
i) {
5617cds_limits_t.SetTo(
i);
5621 if(cds_limits_t.Empty())
5623cds_info_t.Clip(cds_limits_t);
5626 boolfivep_confirmed = (
Strand() ==
ePlus) ? left_confirmed : right_confirmed;
5627 boolthreep_confirmed = (
Strand() ==
ePlus) ? right_confirmed : left_confirmed;
5628 boolhas_start = cds_info_t.HasStart();
5629 boolhas_stop = cds_info_t.HasStop();
5630 autoprot_rf = cds_info_t.ProtReadingFrame();
5631 if(prot_rf.NotEmpty() && ((fivep_confirmed && !has_start) || (threep_confirmed && !has_stop))) {
5636 autoIndelInCodon = [
this](
int i,
CAlignMap& map) {
5637 int a= map.MapEditedToOrig(
i);
5638 int b= map.MapEditedToOrig(
i+2);
5641 return(
a< 0 ||
b< 0 || map.MapEditedToOrig(
i+1) < 0 || !
GetInDels(
a,
b,
false).empty());
5644 if(fivep_confirmed && !has_start) {
5645 for(
int i= prot_rf.GetFrom(); !has_start &&
i>= cds_limits_t.GetFrom() && !
IsStopCodon(&mrna[
i]);
i-= 3) {
5646has_start =
IsStartCodon(&mrna[
i]) && !IndelInCodon(
i, amap);
5648 for(
int i= prot_rf.GetFrom(); !has_start &&
i< cds_limits_t.GetTo();
i+= 3) {
5651has_start =
IsStartCodon(&mrna[
i]) && !IndelInCodon(
i, amap);
5652cds_limits_t.SetFrom(
i);
5657 if(threep_confirmed && !has_stop) {
5658 for(
int i= prot_rf.GetTo()+1; !has_stop &&
i< cds_limits_t.GetTo();
i+= 3)
5659has_stop =
IsStopCodon(&mrna[
i]) && !IndelInCodon(
i, amap);
5660 if(!has_stop && cds_info_t.PStop(
false)) {
5662 sort(pstops.begin(), pstops.end());
5663 for(
auto& stp : pstops) {
5666cds_limits_t.SetTo(stp.GetFrom()-1);
5674 if(cds_limits_t.Empty())
5677cds_info_t.Clip(cds_limits_t);
5684 autocds = cds_info.
Cds();
5707 stringmotif1 =
"AATAAA";
5708 stringmotif2 =
"ATTAAA";
5709 stringmotif3 =
"AGTAAA";
5710 intblock_of_As_len = 6;
5713block_of_As.assign(block_of_As_len,
'A');
5715block_of_As.assign(block_of_As_len,
'T');
5717 int a=
max(0, pos-block_of_As_len);
5718 int b=
min((
int)contig.size()-1, pos+block_of_As_len);
5719 if(
b-
a+1 < block_of_As_len)
5720 returnmake_pair(
false,
false);
5721 if(search(contig.begin()+
a, contig.begin()+
b+1, block_of_As.begin(), block_of_As.end()) != contig.begin()+
b+1) {
5731 if(left < 0 || right >= (
int)contig.size())
5732 returnmake_pair(
false,
false);
5734 stringsegment(contig.begin()+left, contig.begin()+right+1);
5738 if(segment.find(motif1) != string::npos || segment.find(motif2) != string::npos || segment.find(motif3) != string::npos)
5739 returnmake_pair(
true,
true);
5741 returnmake_pair(
false,
true);
5743 returnmake_pair(
true,
false);
5747 #define MIN_UTR_EXON 15 5760 if(align.
Status()&determinant) {
5764 boolbelong_to_exon =
false;
5766 for(
auto& exon :
Exons()) {
5767 if(pos >= exon.Limits().GetFrom()+min_splice_dist && pos <= exon.Limits().GetTo()) {
5768belong_to_exon =
true;
5772 if(rlimit < pos && belong_to_exon)
5777 boolbelong_to_exon =
false;
5779 for(
auto& exon :
Exons()) {
5780 if(pos >= exon.Limits().GetFrom() && pos <= exon.Limits().GetTo()-min_splice_dist) {
5781belong_to_exon =
true;
5785 if(llimit > pos && belong_to_exon)
5794 if(raw_weights.
empty())
5795 returnmake_tuple(peak_weights,real_limits);
5797 intlast_allowed = right_end ? real_limits.
GetTo()+flex_len : -(real_limits.
GetFrom()-flex_len);
5798 autoipeak = raw_weights.
begin();
5799 doublew = ipeak->second;
5800 for(
autoit =
next(raw_weights.
begin()); it != raw_weights.
end(); ++it) {
5801 if(it->first >
prev(it)->first+1+max_empty_dist) {
5802 if(ipeak->first > last_allowed)
5804 if(w >= min_blob_weight) {
5805 autostill_good = ipeak;
5806 for(
auto i= ipeak;
i!= it &&
i->first <= last_allowed; ++
i) {
5807 if(
i->second >= 0.5*ipeak->second)
5810peak_weights.emplace(still_good->first, w);
5816 if(it->second > ipeak->second)
5820 if(ipeak->first <= last_allowed && w >= min_blob_weight) {
5821 autostill_good = ipeak;
5822 for(
auto i= ipeak;
i!= raw_weights.
end() &&
i->first <= last_allowed; ++
i) {
5823 if(
i->second >= 0.5*ipeak->second)
5826peak_weights.emplace(still_good->first, w);
5829 returnmake_tuple(peak_weights,real_limits);
5832tuple<TIVec, TSignedSeqRange>
CChain::MainPeaks(
TIDMap& peak_weights,
doublesecondary_peak,
doubletertiary_peak,
doubletertiary_peak_coverage,
boolright_end) {
5836peaks[0] =
abs(ifirst_peak->first);
5838 intfirst_peak = ifirst_peak->first;
5839 limits.SetTo(first_peak);
5842 intfirst_peak = -ifirst_peak->first;
5843 limits.SetFrom(first_peak);
5847 autoisecond_peak =
prev(peak_weights.
end());
5848 for( ; isecond_peak != ifirst_peak && isecond_peak->second < secondary_peak*ifirst_peak->second; --isecond_peak);
5849 if(isecond_peak != ifirst_peak)
5850peaks[1] =
abs(isecond_peak->first);
5852 if(tertiary_peak > 0) {
5855 if(genome_core_lim.
Empty()) {
5856genome_core_lim =
Limits();
5866 doublecore_coverage = 0;
5870core_coverage /= core_lim.
GetLength();
5873 for(
auto& exon :
Exons()) {
5874 if(
Include(exon.Limits(),
abs(ifirst_peak->first))) {
5875fpeak_exon = exon.Limits();
5880 autoithird_peak =
prev(peak_weights.
end());
5881 for( ; ithird_peak != isecond_peak; --ithird_peak) {
5882 if(
Include(fpeak_exon,
abs(ithird_peak->first))) {
5886 if(ithird_peak->second >= tertiary_peak*ifirst_peak->second &&
m_coverage[p] > tertiary_peak_coverage*core_coverage)
5890 if(ithird_peak != isecond_peak)
5891peaks[2] =
abs(ithird_peak->first);
5892isecond_peak = ithird_peak;
5895 if(isecond_peak != ifirst_peak) {
5897 intsecond_peak = isecond_peak->first;
5898 limits.SetTo(second_peak);
5900 intsecond_peak = -isecond_peak->first;
5901 limits.SetFrom(second_peak);
5905 returnmake_tuple(peaks,
limits);
5908 void CChain::ClipToCap(
intmin_cap_blob,
intmax_dist,
intmin_flank_exon,
doublesecondary_peak,
boolrecalulate_support) {
5920 TIDMap& peak_weights(get<0>(rslt));
5922 if(peak_weights.
empty()) {
5925 if(right_end && real_limits.
GetTo() <
Limits().GetTo())
5927 else if(!right_end && real_limits.
GetFrom() >
Limits().GetFrom())
5948 autorslt1 =
MainPeaks(peak_weights, secondary_peak, 0., 0., right_end);
5956 void CChain::ClipToPolyA(
const CResidueVec& contig,
intmin_polya_blob,
intmax_dist,
intmin_flank_exon,
doublesecondary_peak,
doubletertiary_peak,
doubletertiary_peak_coverage,
boolrecalulate_support) {
5968 TIDMap& peak_weights(get<0>(rslt));
5971 for(
autoip_loop = peak_weights.
begin(); ip_loop != peak_weights.
end(); ) {
5972 auto ip= ip_loop++;
5977 if(peak_weights.
empty()) {
5980 if(right_end && real_limits.
GetTo() <
Limits().GetTo())
5982 else if(!right_end && real_limits.
GetFrom() >
Limits().GetFrom())
6003 autorslt1 =
MainPeaks(peak_weights, secondary_peak, tertiary_peak, tertiary_peak_coverage, right_end);
6021 #define COVERAGE_DROP 0.1 6022 #define COVERAGE_BUMP 3 6023 #define SMALL_GAP_UTR 100 6047genome_core_lim =
Limits();
6059 _ASSERT((
int)coverage.size() == mrna_len && core_lim.
GetFrom() >= 0 && core_lim.
GetTo() < mrna_len);
6061 doublecore_coverage = 0;
6063core_coverage += coverage[
i];
6065core_coverage /= core_lim.
GetLength();
6068 if(core_lim.
GetFrom() <= 0 && core_lim.
GetTo() >= mrna_len-1)
6075vector<double> longseq_coverage(mrna_len);
6081 if(overlap.
Empty())
6084 for(
int i= 1;
i< (
int)align.
Exons().size(); ++
i) {
6085 if(align.
Exons()[
i-1].m_ssplice && align.
Exons()[
i].m_fsplice && align.
Exons()[
i-1].m_ssplice_sig !=
"XX"&& align.
Exons()[
i].m_fsplice_sig !=
"XX") {
6087 boolvalid_intron =
false;
6088 for(
intj = 1; j < (
int)
Exons().size() && !valid_intron; ++j) {
6089 if(
Exons()[j-1].m_ssplice &&
Exons()[j].m_fsplice) {
6091valid_intron = (intr == jntr);
6110 for(
int i= overlap_on_mrna.
GetFrom();
i<= overlap_on_mrna.
GetTo(); ++
i)
6111longseq_coverage[
i] += align.
Weight();
6121longseq_coverage[
i] = 0;
6128longseq_coverage[
i] = 0;
6132 doublecore_inron_coverage = 0;
6133 intcore_introns = 0;
6134 for(
int i= 1;
i< (
int)
Exons().size(); ++
i) {
6135 if(
Exons()[
i-1].m_ssplice &&
Exons()[
i].m_fsplice) {
6141 if(
Include(core_lim, intron)) {
6143core_inron_coverage += intron_coverage[intron];
6147 if(core_introns > 0)
6148core_inron_coverage /= core_introns;
6150core_inron_coverage = 0.5*core_coverage;
6155 intleft_limit = core_lim.
GetFrom();
6156 intright_limit = core_lim.
GetTo();
6157 int len= right_limit-left_limit+1;
6159 for(
int i= left_limit;
i<= right_limit; ++
i)
6160wlen += coverage[
i];
6162 while(left_limit > 0 && (longseq_coverage[left_limit] > 0 ||
6163(coverage[left_limit] >
max(core_coverage,wlen/
len)*utr_clip_threshold &&
6164(intron_coverage.
find(left_limit-1) == intron_coverage.
end() || intron_coverage[left_limit-1] > core_inron_coverage*utr_clip_threshold)))) {
6168wlen += coverage[left_limit];
6171 if(left_limit > 0) {
6185 intright_limit = core_lim.
GetTo();
6186 intleft_limit = core_lim.
GetFrom();
6187 int len= right_limit-left_limit+1;
6189 for(
int i= left_limit;
i<= right_limit; ++
i)
6190wlen += coverage[
i];
6192 while(right_limit < mrna_len-1 && (longseq_coverage[right_limit] > 0 ||
6193(coverage[right_limit] > wlen/
len*utr_clip_threshold &&
6194(intron_coverage.
find(right_limit) == intron_coverage.
end() || intron_coverage[right_limit] > core_inron_coverage*utr_clip_threshold)))) {
6198wlen += coverage[right_limit];
6201 if(right_limit < mrna_len-1) {
6223 if(fivep_confirmed && threep_confirmed)
6230vector<double> longseq_coverage(mrna_len);
6236 if(overlap.
Empty())
6242 for(
int i= overlap_on_mrna.
GetFrom();
i<= overlap_on_mrna.
GetTo(); ++
i)
6243longseq_coverage[
i] += align.
Weight();
6269 if(!fivep_confirmed) {
6270 intleft_limit = soft_limit.
GetFrom();
6271 intfirst_bump = -1;
6276first_bump = left_limit;
6281 if(first_bump > 0) {
6288 intfirst_drop = left_limit;
6290 for( ; first_drop-
SCAN_WINDOW/2 > 0; --first_drop) {
6305 if(!threep_confirmed) {
6306 intright_limit = soft_limit.
GetTo();
6307 intfirst_bump = -1;
6312first_bump = right_limit;
6316 if(first_bump > 0) {
6323 intfirst_drop = right_limit;
6325 for( ; first_drop < mrna_len-
SCAN_WINDOW/2; ++first_drop) {
6347map<TSignedSeqRange,double> intron_coverage;
6348vector<double> coverage(mrna_len);
6354 if(overlap.
Empty())
6359 for(
int i= overlap_on_mrna.
GetFrom();
i<= overlap_on_mrna.
GetTo(); ++
i)
6360coverage[
i] += align.
Weight();
6363 for(
int i= 1;
i< (
int)align.
Exons().size(); ++
i) {
6364 if(align.
Exons()[
i-1].m_ssplice_sig !=
"XX"&& align.
Exons()[
i].m_fsplice_sig !=
"XX") {
6373 doublemaxintroncount = 0;
6375minintroncount =
min(minintroncount,it->second);
6376maxintroncount =
max(maxintroncount,it->second);
6378 if(minintroncount < 0.1*maxintroncount)
6381vector<int> dips(mrna_len,0);
6382 doublemaxsofar = 0;
6383 for(
int i= 0;
i< mrna_len; ++
i) {
6384 if(coverage[
i] < 0.1*maxsofar)
6386maxsofar =
max(maxsofar,coverage[
i]);
6388 for(
int i= 0;
i< (
int)
Exons().size(); ++
i) {
6389 if(
Exons()[
i].m_fsplice_sig ==
"XX"||
Exons()[
i].m_ssplice_sig ==
"XX") {
6397 for(
int i= mrna_len-1;
i>= 0; --
i) {
6398 if(coverage[
i] < 0.1*maxsofar && dips[
i] > 0)
6400maxsofar =
max(maxsofar,coverage[
i]);
6403 if(intron_coverage.size() > 1)
6412 boolsetconfstart =
false;
6413 boolsetconfstop =
false;
6418 if((*i)->m_align->GetCdsInfo().ProtReadingFrame().Empty())
6421 if((*i)->m_align->Type() &
emRNA) {
6423setconfstart =
true;
6425setconfstop =
true;
6432map<string, pair<bool,bool> >::iterator iter = prot_complet.find(accession);
6433 _ASSERT(iter != prot_complet.end());
6434 if(iter == prot_complet.end())
6439 if((*i)->m_align->Strand() ==
eMinus)
6440 swap(fivep_exon,threep_exon);
6443iter->second.first &&
Include(
Limits(),(*i)->m_align->Limits())) {
6451setconfstart =
true;
6458iter->second.second &&
Include(
Limits(),(*i)->m_align->Limits())) {
6466setconfstop =
true;
6475 doublescore = cds_info.
Score();
6477score +=
max(1.,0.3*score);
6478cds_info.
SetScore(score,
false);
6482cds_info.
SetScore(score,
false);
6499 typedefmap<Int8, set<TSignedSeqRange> > Tint8range;
6504 if(!(*i)->m_align->TrustedProt().empty()) {
6506 if((*i)->m_mem_id > 0)
6507aexons[(*i)->m_align->ID()].insert(e->
Limits());
6509uexons[(*i)->m_align->ID()].insert(e->
Limits());
6512 else if(!(*i)->m_align->TrustedmRNA().empty() && (*i)->m_align->ConfirmedStart() && (*i)->m_align->ConfirmedStop())
6516 typedefmap<Int8, int> Tint8int;
6517Tint8int palignedlen;
6520 ITERATE(set<TSignedSeqRange>, e,
i->second)
6521 len+= e->GetLength();
6522palignedlen[
i->first] =
len;
6526 ITERATE(set<TSignedSeqRange>, e,
i->second)
6527 len+= e->GetLength();
6528palignedlen[
i->first] =
max(
len,palignedlen[
i->first]);
6532 ITERATE(Tint8int,
i, palignedlen) {
6543 if(ie->m_fsplice_sig ==
"XX"|| ie->m_ssplice_sig ==
"XX")
6544gap_cds += (cds&ie->Limits()).
GetLength();
6549 ITERATE(Tint8int,
i, palignedlen) {
6551 if(
i->second+gap_cds > 0.8*orig_align->
TargetLen()) {
6553 stringtprotein(protein_seqvec.
begin(),protein_seqvec.
end());
6554 CCigarcigar =
LclAlign(mprotein.c_str(), (
int)mprotein.size(), tprotein.c_str(), (
int)tprotein.size(), 10, 1,
delta.matrix);
6603 returnmake_pair(
a.TargetAccession(), 0);
6615 catch(sequence::CSeqIdFromHandleException&) {
6617 returnmake_pair(
a.TargetAccession(), 0);
6632 if(a_acc.second != b_acc.second)
6633 returna_acc.second > b_acc.second;
6635 inta_stt =
a->HasStart()+
a->HasStop();
6636 intb_stt =
b->HasStart()+
b->HasStop();
6638 returna_stt > b_stt;
6643 returna_len > b_len;
6648 return a->ID() <
b->ID();
6667 m_data->FilterOutChimeras(clust);
6672 typedefmap<int,TGeneModelClusterSet> TClustersByStrand;
6673TClustersByStrand trusted_aligns;
6676 if(it->Continuous() && (!it->TrustedmRNA().empty() || !it->TrustedProt().empty())
6677&& orig_align !=
nullptr&& it->
AlignLen() > minscor.m_minprotfrac*orig_align->
TargetLen()) {
6678trusted_aligns[it->Strand()].Insert(*it);
6685 for(
auto& strand_vec : trusted_aligns) {
6686 intstrand = strand_vec.first;
6689tgr_info[strand].emplace_back();
6690 auto&
last= tgr_info[strand].back();
6691 last.m_limits = cls.Limits();
6694 for(
auto& e : talign.Exons()) {
6701 for(
auto& cref : talign.TrustedProt())
6703 for(
auto& cref : talign.TrustedmRNA())
6708 last.m_cds_limits += tcds;
6709 CAlignMaptmap(talign.Exons(), talign.FrameShifts(), talign.Strand(), tcds);
6711 for(
auto& e : talign.Exons()) {
6712 autooverlap = (e.
Limits()&tcds);
6713 for(
TSignedSeqPosk = overlap.GetFrom(); k <= overlap.GetTo(); ++k) {
6715 if(p >= 0 && p%3 == 0) {
6716 _ASSERT(p/3 < (
int)
last.m_cds_maps.back().size());
6717 last.m_cds_maps.back()[p/3] = k;
6726 for(
auto& ts : tgr_info) {
6727 charstrand = ts.first ==
ePlus?
'+':
'-';
6729 for(
int i= 0;
i< (
int)spl.size(); ++
i) {
6733trusted_group_cds[tgr] = spl[
i].m_cds_limits;
6734 for(
auto&
id: spl[
i].m_ids)
6735cerr <<
"TGR: "<< strand <<
" "<< tgr <<
" "<<
id.first <<
" "<<
id.second << endl;
6739 for(TGeneModelList::iterator it_loop = clust.begin(); it_loop != clust.end(); ) {
6740TGeneModelList::iterator ita = it_loop++;
6746 intstrand = align.
Strand();
6747 if(!tgr_info.count(strand))
6753 autosecond = upper_bound(tinfo.begin(), tinfo.end(), align.
Limits().
GetTo(),
6755 if(
first== second)
6765 for(
auto& e : align.
Exons()) {
6766 autooverlap = (e.
Limits()&acds);
6767 for(
TSignedSeqPosk = overlap.GetFrom(); k <= overlap.GetTo(); ++k) {
6769 if(p >= 0 && p%3 == 0) {
6770 _ASSERT(p/3 < (
int)acds_map.size());
6778 boolparalogs =
false;
6780 for(
autoit =
first; it != second; ++it) {
6781 inttgr = it-tinfo.
begin()+1;
6786 for(
unsigned int i= 0;
i< align.
Exons().
size() && atgr != tgr; ++
i) {
6793 for(
const TIVec& tcds_map : it->m_cds_maps) {
6794 intlong_enough = 10;
6796 if(mci >= long_enough) {
6803 autonum = prev_accessions.
size();
6804 for(
auto&
id: it->m_ids)
6805prev_accessions.
insert(
id.second);
6806 if(num+it->m_ids.size() != prev_accessions.
size())
6824cerr <<
"Chimeric alignment "<< align.
ID() <<
" "<< (paralogs ?
"paralogs":
"unrelated") <<
" "<< flag << endl;
6825SkipReason(orig_aligns[align.
ID()],
"Chimera");
6833 virtual boolalign_predicate(
CAlignModel& align);
6834 virtual string GetComment() {
return "Overlaps the same alignment";}
6842vector<CAlignModel*> alignment_ptrs;
6845alignment_ptrs.push_back(&*
a);
6848 if(alignment_ptrs.empty())
6853vector<CAlignModel*>::iterator
first= alignment_ptrs.begin();
6855vector<CAlignModel*> ::iterator current =
first; ++current;
6856 for(; current != alignment_ptrs.end(); ++current) {
6857pair<string,int> current_accver =
GetAccVer(**current, scope);
6858 if(first_accver.first == current_accver.first) {
6859 if((*current)->Strand() == (*first)->Strand() && (*current)->Limits().IntersectingWith((*first)->Limits())) {
6864first_accver = current_accver;
6876 return newgnomon::OverlapsSameAccessionAlignment(alignments);
6886 const CGeneModel* prev_alignp = &dummy_align;
6888 boolprev_is_compatible =
false;
6898prev_alignp = &algnj;
6900 if((same_as_prev && prev_is_compatible) || (!same_as_prev && algn.
Strand()==algnj.
Strand() && algn.
isCompatible(algnj))) {
6901prev_is_compatible =
true;
6906prev_is_compatible =
false;
6915: alignments(_alignments)
6923 return!paralog.empty();
6925 virtual string GetComment() {
return "Connects two "+paralog+
" alignments"; }
6930 return newgnomon::ConnectsParalogs(alignments);
6936 if(
m_data->orig_aligns.find(itcl->ID()) ==
m_data->orig_aligns.end()) {
6945 double ms=
m_data->GoodCDNAScore(algn);
6951ost <<
"Short alignment "<< algn.
AlignLen();
6953ost <<
"Low score "<< algn.
Score();
6961 #define PROT_CLIP 120 6962 #define PROT_CLIP_FRAC 0.20 6988 if(align.
PStop()) {
6999 if(ostop.
GetLength() == 3 && protein_seqvec[ostop.
GetFrom()/3] ==
'U') {
7005 if(conf !=
m_confirmed_bases_len.
begin() && (--conf)->first <= stp->GetFrom() && conf->first+conf->second > stp->GetTo())
7018 m_data->unmodified_aligns[itcl->ID()] = *itcl;
7027 if(protein_seqvec[0] ==
'M')
7028fivepclip = maxclip-tlim.
GetFrom();
7029 intthreepclip = maxclip-(
orig->TargetLen()-tlim.
GetTo()-1);
7034 intthreepshift = 0;
7036 for(TInDels::iterator indl = align.
FrameShifts().begin(); !skip && indl != align.
FrameShifts().end(); ++indl) {
7048 inttpb = lim.
GetTo()-1;
7051 if(tpb < fivepclip) {
7052 if(indl->IsInsertion())
7053fivepshift += indl->Len();
7054 else if(indl->IsDeletion())
7055fivepshift -= indl->Len();
7056}
else if(tpa >= tlen-threepclip) {
7057 if(indl->IsInsertion())
7058threepshift += indl->Len();
7059 else if(indl->IsDeletion())
7060threepshift -= indl->Len();
7068 if(fivepshift >= 0)
7071fivepshift = 3-(-fivepshift)%3;
7072 if(threepshift >= 0)
7075threepshift = 3-(-threepshift)%3;
7083edited_tlim.
SetTo(edited_tlim.
GetTo()-threepshift);
7091 stringprotseq = editedm.
GetProtein(contig);
7092tlen = 3*(
int)protseq.size();
7093 intfivep_problem = -1;
7094 intfirst_stop = tlen;
7096 for(
intp = 0; !skip && p < (
int)protseq.size(); ++p) {
7097 if(protseq[p] ==
'*') {
7100 if(tpb < fivepclip)
7101fivep_problem =
max(fivep_problem, tpb);
7102 else if(tpa >= tlen-threepclip || p == (
int)protseq.size()-1)
7103first_stop =
min(first_stop, tpa);
7111 intfivep_limit = 0;
7112 autom = protseq.find(
"M", (fivep_problem+1)/3);
7114 if(m != string::npos && (
int)m*3 <= fivepclip) {
7115fivep_limit = 3*(
int)m;
7121 intthreep_limit = tlen-1;
7123 if(first_stop+2 < threep_limit) {
7124threep_limit = first_stop+2;
7146edited_cds.
SetStart(start,
true);
7147edited_cds.
SetStop(stop,
true);
7154 _ASSERT(tlen == 3*(
int)protseq.size());
7156m = protseq.find(
"*");
7157 _ASSERT(m == protseq.size()-1);
7175 typedefvector<SFShiftsCluster> TFShiftsClusterVec;
7178TFShiftsClusterVec algn_fclusters;
7179algn_fclusters.reserve(algn.
Exons().size());
7183TInDels::const_iterator fi = fs.begin();
7186 while(fi != fs.end() && fi->IntersectingWith(e->
GetFrom(),e->
GetTo())) {
7187algn_fclusters.back().m_fshifts.push_back(*fi++);
7192 ITERATE(TFShiftsClusterVec, exon_cluster, algn_fclusters) {
7193pair<TIt,TIt> eq_rng = fshift_clusters.equal_range(*exon_cluster);
7194 for(TIt glob_cluster = eq_rng.first; glob_cluster != eq_rng.second; ++glob_cluster) {
7196 if(find(exon_cluster->m_fshifts.begin(),exon_cluster->m_fshifts.end(),*fi) == exon_cluster->m_fshifts.end())
7197 if(fi->IntersectingWith(exon_cluster->m_limits.GetFrom(),exon_cluster->m_limits.GetTo()))
7200 if(find(glob_cluster->m_fshifts.begin(),glob_cluster->m_fshifts.end(),*fi) == glob_cluster->m_fshifts.end())
7201 if(fi->IntersectingWith(glob_cluster->m_limits.GetFrom(),glob_cluster->m_limits.GetTo()))
7206pair<TIt,TIt> eq_rng = fshift_clusters.equal_range(*exon_cluster);
7207 for(TIt glob_cluster = eq_rng.first; glob_cluster != eq_rng.second;) {
7208exon_cluster->m_limits += glob_cluster->m_limits;
7209exon_cluster->m_fshifts.insert(exon_cluster->m_fshifts.end(),glob_cluster->m_fshifts.begin(),glob_cluster->m_fshifts.end());
7210fshift_clusters.erase(glob_cluster++);
7212 uniq(exon_cluster->m_fshifts);
7213fshift_clusters.insert(eq_rng.second, *exon_cluster);
7235clust_plus.push_back(algn);
7237clust_minus.push_back(algn);
7243 if(
a.FrameShifts().empty())
7247 intinframelength = 0;
7248 intoutframelength = 0;
7251 TInDelsindels =
a.GetInDels(left, right,
true);
7255inframelength +=
len;
7257outframelength +=
len;
7260 if(fs->IsDeletion()) {
7261frame = (frame+fs->Len())%3;
7263frame = (3+frame-fs->Len()%3)%3;
7269inframelength +=
len;
7271outframelength +=
len;
7273 returndouble(inframelength)/(inframelength + outframelength);
7278: mininframefrac(_mininframefrac), seq(_seq), scope(_scope), mrnaCDS(_mrnaCDS) {}
7284 virtual voidtransform_align(
CAlignModel& align);
7294 if(scope !=
NULL) {
7300mrna.SetWhole(*target_id);
7301 CFeat_CIfeat_ci(*scope, mrna, sel);
7304 const CSeq_id* cds_loc_seq_id = cds_loc.GetId();
7306 TSeqRangefeat_range = cds_loc.GetTotalRange();
7313 if(pos != mrnaCDS.end()) {
7314cds_on_mrna = pos->second;
7318 if(cds_on_mrna.
Empty())
7330 if(left < 0 || right < 0)
7333 CAlignMapalignmap_clipped(
a.GetAlignMap());
7339 if(!
a.Continuous())
7349 a.FrameShifts().clear();
7353 intlength = (
int)cds.size();
7355 if(length%3 != 0 || length < 6)
7361 for(
int i= 0;
i< length-3;
i+= 3) {
7373cdsinfo.
SetStop(stop,
true);
7377 b.FrameShifts().clear();
7384 for(TGeneModelList::iterator it = chains.begin(); it != chains.end();) {
7385TGeneModelList::iterator itt = it++;
7386 for(TGeneModelList::iterator jt = chains.begin(); jt != itt;) {
7387TGeneModelList::iterator jtt = jt++;
7388 if(itt->Strand() != jtt->Strand() || (itt->Score() !=
BadScore() && jtt->Score() !=
BadScore()))
continue;
7392 if(itt->isCompatible(*jtt) > 1) chains.erase(jtt);
7393}
else if(jtt->Score() !=
BadScore()) {
7394 if(itt->isCompatible(*jtt) > 1) {
7399}
else if(itt->AlignLen() > jtt->AlignLen()) {
7400 if(itt->isCompatible(*jtt) > 0) chains.erase(jtt);
7402 if(itt->isCompatible(*jtt) > 0) {
7421 _ASSERT( new_from-old_from >= trim && new_from <= e.
GetTo() );
7442TrimProtein(align, alignmap);
7444TrimTranscript(align, alignmap);
7454 for(CAlignModel::TExons::const_iterator piece_begin = align.
Exons().begin(); piece_begin != align.
Exons().end(); ++piece_begin) {
7455 _ASSERT( !piece_begin->m_fsplice );
7457CAlignModel::TExons::const_iterator piece_end;
7458 for(piece_end = piece_begin; piece_end != align.
Exons().end() && piece_end->m_ssplice; ++piece_end) ;
7465 a= piece_begin->GetFrom()+trim;
7471 b= piece_end->GetTo()-trim;
7473 if((
a!= piece_begin->GetFrom() ||
b!= piece_end->GetTo()) &&
b>
a) {
7476 if(newlimits.
NotEmpty() && piece_begin->GetTo() >= newlimits.
GetFrom() && piece_end->GetFrom() <= newlimits.
GetTo())
7480piece_begin = piece_end;
7506 if(align.
Exons().front().m_ssplice_sig ==
"XX")
7508 if(align.
Exons().back().m_fsplice_sig ==
"XX")
7513 if(cds_on_genome.
GetFrom() <
a) {
7516 if(
b< cds_on_genome.
GetTo()) {
7524 if(newlimits != align.
Limits()) {
7532 return newgnomon::TrimAlignment(
m_data->trim);
7551 return newgnomon::DoNotBelieveShortPolyATail(
m_data->minpolya);
7557 m_data->m_idnext = idnext;
7558 m_data->m_idinc = idinc;
7563 m_data->SetGenomicRange(alignments);
7574range +=
i->Limits();
7577 stringaccession =
i->TargetAccession();
7578 if(!prot_complet.count(accession)) {
7581prot_complet[accession] = make_pair(*protein_ci ==
'M',
true);
7589confirmed_ends.clear();
7590all_frameshifts.clear();
7591orig_aligns.clear();
7592unmodified_aligns.clear();
7593trusted_group_cds.clear();
7597rnaseq_count.clear();
7598oriented_introns_plus.clear();
7599oriented_introns_minus.clear();
7604 return newgnomon::ProjectCDS(
m_data->mininframefrac,
m_gnomon->GetSeq(),
7605 m_data->mrnaCDS.find(
"use_objmgr")!=
m_data->mrnaCDS.end() ? &scope :
NULL,
7619 return newgnomon::DoNotBelieveFrameShiftsWithoutCdsEvidence();
7623 if(
a.Limits() ==
b.Limits()) {
7624 if(
a.Type() ==
b.Type())
7625 return a.ID() <
b.ID();
7627 return a.Type() >
b.Type();
7629 else if(
a.Limits().GetFrom() ==
b.Limits().GetFrom())
7630 return a.Limits().GetTo() >
b.Limits().GetTo();
7632 return a.Limits().GetFrom() <
b.Limits().GetFrom();
7637 m_data->SetConfirmedStartStopForProteinAlignments(alignments);
7647map<string, pair<bool,bool> >::iterator iter = prot_complet.find(algn.
TargetAccession());
7648 _ASSERT(iter != prot_complet.end());
7649 if(iter == prot_complet.end())
7652 if(cds.
HasStart() && iter->second.first && alignedlim.
GetFrom() == 0)
7667 m_data->orig_aligns[
i->ID()]=&(*i);
7671 if(!
i->TrustedmRNA().empty() &&
i->Exons().size() > 1) {
7672 autotlim =
i->TranscriptLimits();
7673 if(
i->Exons().front().Limits().NotEmpty() && tlim.GetFrom() == 0)
7675 if(
i->Exons().back().Limits().NotEmpty() && (tlim.GetTo() ==
i->TargetLen()-1 || (
i->Status()&
CGeneModel::ePolyA)))
7680 TInDelsalignfshifts =
i->GetInDels(
true);
7686 if(fs->IntersectingWith(e->
GetFrom(),e->
GetTo())) {
7687efshifts.push_back(*fs);
7688 len+= (fs->IsInsertion() ? fs->Len() : -fs->Len());
7691 if(efshifts.empty())
7694 int a= efshifts.front().Loc()-1;
7695 int b= efshifts.back().InDelEnd();
7699 if(
len%3 != 0 || !confirmed_region) {
7701 int l= fs->Len()%3;
7702 if(fs->IsInsertion()) {
7717models.push_back(aa);
7721 for(
autoit_loop = models.begin(); it_loop != models.end(); ) {
7722 autoit = it_loop++;
7723 if(!
m_data->orig_aligns.count(it->ID())) {
7733arg_desc->
AddKey(
"param",
"param",
7734 "Organism specific parameters",
7741 "If aligned sequence is partial and includes a small portion of an exon the alignment program " 7742 "usually misses this exon and might erroneously place a few bases from this exon near the previous exon, " 7743 "and this will mess up the chaining. To prevent this we trim small portions of the alignment before chaining. " 7744 "If it is possible, the trimming will be reversed for the 5'/3' ends of the final chain. Must be < minex and " 7748arg_desc->
SetCurrentGroup(
"Additional information about sequences");
7750 "CDSes annotated on mRNAs. If CDS could be projected on genome with intact " 7751 "Start/Stop and frame the Stop will be accepted as is. The Start could/will " 7752 "be moved further to make the longest possible complete CDS within the chain",
7754arg_desc->
AddDefaultKey(
"mininframefrac",
"mininframefrac",
7755 "Some mRNA alignments have paired indels which throw a portion of CDS out of frame." 7756 "This parameter regulates how much of the CDS could suffer from this before CDS is considered inaceptable",
7759 "Information about protein 5' and 3' completeness",
7764 "Minimal coding propensity score for valid CDS. This threshold could be ignored depending on " 7765 "-longenoughcds or -protcdslen and -minprotfrac",
7767arg_desc->
AddDefaultKey(
"longenoughcds",
"longenoughcds",
7768 "Minimal CDS not supported by protein or annotated mRNA to ignore the score (bp)",
7771 "Minimal CDS supported by protein or annotated mRNA to ignore the score (bp)",
7774 "Minimal fraction of protein aligned to ignore " 7775 "the score and consider for confirmed start",
7778 "Some proteins aligned with better than -minprotfrac coverage are missing Start/Stop. " 7779 "If such an alignment was extended by EST(s) which provided a Start/Stop and we are not missing " 7780 "more than (1-endprotfrac)*proteinlength on either side this chain will be considered to have a confirmed Start/Stop",
7783 "Minimal overlap length for chaining alignments which don't have introns in the ovrlapping regions",
7786 "Minimal number of mRNA/EST for valid noncoding models",
7788arg_desc->
AddDefaultKey(
"minsupport_mrna",
"minsupport_mrna",
7789 "Minimal number of mRNA for valid noncoding models",
7791arg_desc->
AddDefaultKey(
"minsupport_rnaseq",
"minsupport_rnaseq",
7792 "Minimal number of RNA-Seq for valid noncoding models",
7795 "Chains with thorter CDS should be supported by trusted protein",
7797arg_desc->
AddDefaultKey(
"altfrac",
"altfrac",
"The CDS length of the principal model in the gene is multiplied by this fraction. Alt variants with the CDS length above " 7799arg_desc->
AddDefaultKey(
"longreadsthreshold",
"longreadsthreshold",
"If long reads support that many introns in alignment cluster " 7802arg_desc->
AddFlag(
"opposite",
"Allow overlap of complete multiexon genes with opposite strands");
7803arg_desc->
AddFlag(
"partialalts",
"Allows partial alternative variants. In combination with -nognomon will allow partial genes");
7805arg_desc->
AddFlag(
"no5pextension",
"Don't extend chain CDS to the leftmost start");
7807arg_desc->
SetCurrentGroup(
"Heuristic parameters for score evaluation");
7809 "5p intron penalty",
7812 "3p intron penalty",
7815 "Bonus for CDS length",
7818 "Penalty for total length",
7820arg_desc->
AddDefaultKey(
"utrclipthreshold",
"utrclipthreshold",
7821 "Relative coverage for clipping low support UTRs",
7826arg_desc->
AddDefaultKey(
"min-cap-weight",
"MinCapWeight",
7827 "Minimal accepted weight for a capped alignment",
7830 "Minimal cap blob weight for accepted peak",
7833arg_desc->
AddDefaultKey(
"min-polya-weight",
"MinPolyaWeight",
7834 "Minimal accepted weight for polya alignment",
7836arg_desc->
AddDefaultKey(
"min-polya-blob",
"MinPolyaBlob",
7837 "Minimal polya blob weight for accepted peak",
7841 "Maximal distance between individual cap/polya positions in a blob",
7843arg_desc->
AddDefaultKey(
"secondary-peak",
"SecondaryPeak",
7844 "Minimal weight fraction for a secondary cap/polya peak",
7846arg_desc->
AddDefaultKey(
"tertiary-peak",
"TertiaryPeak",
7847 "Last 5' exon is extended to low weight polya peak if there is sufficient rnaseq coverage",
7849arg_desc->
AddDefaultKey(
"tertiary-peak-coverage",
"TertiaryPeakCoverage",
7850 "Minimal relative rnaseq coverage for tertiary peak",
7853arg_desc->
AddDefaultKey(
"min-flank-exon",
"MinFlankExon",
7854 "The minimal distance of cap/polya to a splice",
7859 "Minimal accepted polyA tale length in transcript alignments",
7861arg_desc->
AddFlag(
"use_confirmed_ends",
"Use end exons of trusted transcripts for clippig/extension");
7881 m_data->minpolya = minpolya;
7889 m_data->mininframefrac = mininframefrac;
7893 return m_data->prot_complet;
7902 CNcbiIfstreamparam_file(args[
"param"].AsString().c_str());
7906chainer->
SetTrim(args[
"trim"].AsInteger());
7909minscor.
m_min= args[
"minscor"].AsDouble();
7913minscor.
m_cds_bonus= args[
"cdsbonus"].AsDouble();
7919minscor.
m_cds_len= args[
"longenoughcds"].AsInteger();
7921minscor.
m_minsupport= args[
"minsupport"].AsInteger();
7924minscor.
m_minlen= args[
"minlen"].AsInteger();
7928chainer->
m_data->altfrac = args[
"altfrac"].AsDouble();
7929chainer->
m_data->longreadsthreshold = args[
"longreadsthreshold"].AsDouble();
7930chainer->
m_data->composite = args[
"composite"].AsInteger();
7931chainer->
m_data->allow_opposite_strand = args[
"opposite"];
7932chainer->
m_data->allow_partialalts = args[
"partialalts"];
7933chainer->
m_data->tolerance = args[
"tolerance"].AsInteger();
7934chainer->
m_data->no5pextension = args[
"no5pextension"];
7936chainer->
m_data->min_cap_weight = args[
"min-cap-weight"].AsInteger();
7937chainer->
m_data->min_cap_blob = args[
"min-cap-blob"].AsInteger();
7938chainer->
m_data->min_polya_weight = args[
"min-polya-weight"].AsInteger();
7939chainer->
m_data->min_polya_blob = args[
"min-polya-blob"].AsInteger();
7940chainer->
m_data->max_dist = args[
"max-dist"].AsInteger();
7941chainer->
m_data->secondary_peak = args[
"secondary-peak"].AsDouble();
7942chainer->
m_data->tertiary_peak = args[
"tertiary-peak"].AsDouble();
7943chainer->
m_data->tertiary_peak_coverage = args[
"tertiary-peak-coverage"].AsDouble();
7944chainer->
m_data->min_flank_exon = args[
"min-flank-exon"].AsInteger();
7945chainer->
SetMinPolyA(args[
"minpolya"].AsInteger());
7946chainer->
m_data->use_confirmed_ends = args[
"use_confirmed_ends"];
7951map<string,TSignedSeqRange>& mrnaCDS = chainer->
SetMrnaCDS();
7952 if(args[
"mrnaCDS"]) {
7953 if(args[
"mrnaCDS"].AsString()==
"use_objmgr") {
7956 CNcbiIfstreamcdsfile(args[
"mrnaCDS"].AsString().c_str());
7959 stringaccession,
tmp;
7961 while(cdsfile >> accession >>
a>>
b) {
7963getline(cdsfile,
tmp);
7970map<string, pair<bool,bool> >& prot_complet = chainer->
SetProtComplet();
7971 if(args[
"pinfo"]) {
7978 while(protfile >> seqid_str >> fivep >> threep) {
7980prot_complet[seqid_str] = make_pair(fivep, threep);
7997TInDels::const_iterator ic = upper_bound(editing_indels_frombtoa.begin(), editing_indels_frombtoa.end(), exonb.
GetFrom(),
OverlappingIndel);
7999 if((ic == editing_indels_frombtoa.end() || ic->Loc() > exonb.
GetTo()+1) && exona_indels.empty())
8000 returncombined_indels;
8002 typedeflist<char> TCharList;
8010 for( ;
pb<= exonb.
GetTo(); ++
pb) {
8011 if(ic != editing_indels_frombtoa.end() && ic->Loc() <=
pb) {
8012 if(ic->IsInsertion()) {
8015 pb= ic->InDelEnd()-1;
8017 strings = ic->GetInDelV();
8019s = s.substr(ic->Len()-extra_left);
8020 edit.insert(
edit.end(),s.begin(),s.end());
8021 edit.push_back(
'M');
8025 edit.push_back(
'M');
8030 strings = ic->GetInDelV().substr(0,extra_right);
8031 edit.insert(
edit.end(),s.begin(),s.end());
8037 if(!exona_indels.empty()) {
8038TInDels::const_iterator jleft = exona_indels.begin();
8047 if(jleft != exona_indels.end() && jleft->Loc() == pa) {
8048 if(jleft->IsInsertion()) {
8050skipsome = jleft->Len();
8052 for(TCharList::iterator ipp =
ip; skipsome > 0 && ipp !=
edit.begin() && *(--ipp) !=
'-'&& *ipp !=
'M'; ) {
8054ipp =
edit.erase(ipp);
8058 intinsertsome = jleft->Len();
8059 for(reverse_iterator<TCharList::iterator> ir(
ip); insertsome > 0 && ir !=
edit.rend() && *ir ==
'-'; ++ir) {
8064 edit.insert(
ip,insertsome,
'N');
8073 else if(*
ip!=
'-')
8077 if(jleft != exona_indels.end()) {
8078 _ASSERT(jleft->IsDeletion() && jleft->Loc() == pa+1);
8079 intinsertsome = jleft->Len();
8080 for(TCharList::reverse_iterator ir =
edit.rbegin(); insertsome > 0 && ir !=
edit.rend() && *ir ==
'-'; ++ir) {
8085 edit.insert(
edit.end(),insertsome,
'N');
8092 for(TCharList::iterator
ip=
edit.begin();
ip!=
edit.end(); ) {
8096}
else if(*
ip==
'-') {
8098 for( ;
ip!=
edit.end() && *
ip==
'-'; ++
ip, ++
len);
8106 for( ;
ip!=
edit.end() && *
ip!=
'M'&& *
ip!=
'-'; ++
ip)
8113 returncombined_indels;
8122 for(
intie = 0; ie < (
int)srcmodel.
Exons().size(); ++ie) {
8130 intggapa =
i->first;
8131 intggapb =
i->first+(
int)
i->second->GetInDelV().length()-1;
8134src =
i->second->GetSource();
8139}
else if(ggapb == e.
GetTo()) {
8140 strings =
i->second->GetInDelV();
8142src =
i->second->GetSource();
8147}
else if(ggapb >= e.
GetFrom()) {
8154 if((
int)srcmodel.
Exons().size() == 1){
8165 if(exon.
Empty()) {
8169 intextra_right = e.
GetTo()-exon.
GetTo();
8177exon_indels.push_back(*indl);
8184 intloc = ir->first;
8185 charc = ir->second;
8186TInDels::const_iterator ic = upper_bound(efs.begin(), efs.end(), loc,
OverlappingIndel);
8187 if(ic != efs.end() && ic->IsInsertion() && ic->Loc() <= loc && ic->InDelEnd() > loc)
8189 else if(ic != efs.end() && ic->IsDeletion() && ic->Loc() == loc)
8191 else if(erepl.empty() || erepl.back().InDelEnd() != loc)
8194loc = erepl.back().Loc();
8195 strings = erepl.back().GetInDelV()+
string(1,c);
8199efs.insert(efs.end(), erepl.begin(), erepl.end());
8200 sort(efs.begin(), efs.end());
8201 for(
auto& indl : efs) {
8203editedframeshifts.push_back(indl);
8211 if(ie < (
int)srcmodel.
Exons().size()-1 && (!e.
m_ssplice|| !srcmodel.
Exons()[ie+1].m_fsplice))
8272 if(
i->IsMismatch()) {
8279 if(ic !=
m_editing_indels.end() &&
i->GetType() == ic->GetType() &&
i->Loc() >= ic->Loc() &&
i->InDelEnd() <= ic->InDelEnd()) {
8282}
else if(included && (ic ==
m_editing_indels.end() || ic->Loc() >
i->InDelEnd())) {
8304 for(
auto& indel : aindels)
8308 for(
auto& e : aexons) {
8318vector<TSignedSeqRange> transcript_exons;
8322 for(
int i= 0;
i< (
int)aexons.size(); ++
i) {
8326list<CInDelInfo> exon_indels;
8328 if(indl->IntersectingWith(e.
GetFrom(), e.
GetTo()))
8329exon_indels.push_back(*indl);
8333 intleft_shrink = 0;
8334 intright = e.
GetTo();
8335 intright_shrink = 0;
8336 intleft_extend = 0;
8337 intright_extend = 0;
8346 if(
i== (
int)aexons.size()-1)
8356 if(ileft !=
m_editing_indels.end() && ileft->IsDeletion() && ileft->Loc() == left) {
8357 if(!exon_indels.empty() && exon_indels.front().IsDeletion() && exon_indels.front().Loc() == left) {
8359left_extend =
min(ileft->Len(),exon_indels.front().Len());
8367ll = left_codon.
GetTo();
8371left = ileft->Loc()+ileft->Len();
8374left_shrink = left-e.
GetFrom();
8381lim.
SetFrom(ileft->InDelEnd());
8391 while(!exon_indels.empty() && exon_indels.front().InDelEnd() <= left)
8392exon_indels.pop_front();
8397TInDels::const_iterator first_outside = ileft;
8398 for( ; first_outside !=
m_editing_indels.end() && first_outside->Loc() <= (first_outside->IsInsertion() ? right : right+1); ++first_outside);
8399reverse_iterator<TInDels::const_iterator> iright(first_outside);
8402 if(iright !=
m_editing_indels.rend() && iright->IsDeletion() && iright->Loc() == right+1) {
8403 if(!exon_indels.empty() && exon_indels.back().IsDeletion() && exon_indels.back().Loc() == right+1) {
8405right_extend =
min(iright->Len(),exon_indels.back().Len());
8417right = iright->Loc()-1;
8420right_shrink = e.
GetTo()-right;
8427lim.
SetTo(iright->Loc()-1);
8436right = lim.
GetTo();
8437 while(!exon_indels.empty() && exon_indels.back().Loc() > right)
8438exon_indels.pop_back();
8445transcript_exons.push_back(texon);
8450corrected_exon.
SetFrom(corrected_exon.
GetFrom()-left_extend);
8451corrected_exon.
SetTo(corrected_exon.
GetTo()+right_extend);
8453 if(
i< (
int)aexons.size()-1 && (!aexons[
i].m_ssplice || !aexons[
i+1].m_fsplice))
8458editedindels.insert(editedindels.end(), efs.begin(), efs.end());
8461 stringgap_seq = e.
m_seq;
8467 if(ig->GetSource().m_range.NotEmpty()) {
8468 if(
i> 0 && ig->Loc() < aexons[
i-1].GetTo())
8470 if(
i== 0 && ig->Loc() > aexons[
i+1].GetFrom())
8472 if(ig->GetInDelV() == gap_seq) {
8483 if(left_end >= 0) {
8484left_end -= gap->Len();
8485 for(TInDels::const_iterator ig = gap+1; ig !=
m_editing_indels.end() && ig->Loc() == gap->Loc(); ++ig)
8486left_end -= ig->Len();
8491 for(TInDels::const_iterator ig = gap; ig !=
m_editing_indels.begin() && (ig-1)->
Loc() == gap->Loc(); --ig) {
8492left_end += (ig-1)->
Len();
8508 doublescore = acds.
Score();
8522 if(
a.Limits().NotEmpty()) {
8523 a.SetTargetId(*ia->GetTargetId());
8526alignments.erase(ia);
8535 _ASSERT(!ia->Exons().empty());
8561 intlength (sv.size());
8563sv.GetSeqData(0, length, seq_txt);
8565 TIVecexons(length,0);
8577 for(
intp =
a+1; p <=
b; ++p) {
8584 TIVecmodel_ranges(length,0);
8587 for(
intp =
max(0,
i->Limits().GetFrom()-2); p <=
min(length-1,
i->Limits().GetTo()+2); ++p)
8588model_ranges[p] = 1;
8592 if(indl->IsMismatch()) {
8593 strings = indl->GetInDelV();
8594 for(
int l= 0;
l< indl->Len(); ++
l)
8605 for(
intie = 0; ie < (
int)
i->Exons().size(); ++ie) {
8610 _ASSERT(
i->Exons()[ie-1].Limits().NotEmpty());
8611 for(pos =
i->Exons()[ie-1].GetTo()+1; pos < length && exons[pos] > 0; ++pos);
8613 _ASSERT((
int)
i->Exons().size() > 1 &&
i->Exons()[1].Limits().NotEmpty());
8615 for(pos =
i->Exons()[1].GetFrom(); pos > 0 && exons[pos] > 0; --pos);
8617 stringseq = e.
m_seq;
8619 if(
i->Strand() ==
eMinus) {
8630TInDels::iterator
next= indl;
8632 if(indl->GetSource().m_range.Empty() &&
next->GetSource().m_range.Empty()) {
8641 for(
int i= 0;
i< length; ++
i) {
8642 if(model_ranges[
i])
8650++current_gap->second;
8675 intGC_RANGE = 200000;
8678length =
limits.GetLength();
8680seq.reserve(length);
8682seq.push_back(sv[
i]);
8697 for(
CFeat_CIit(bh, sel); it; ++it) {
8698 TSeqRangerange = it->GetLocation().GetTotalRange();
8699 for(
unsigned int i= range.
GetFrom();
i<= range.
GetTo(); ++
i) {
8717seq[ir->first-
limits.GetFrom()] = ir->second;
8722 #define BLOCK_OF_Ns 35 8724 if(cor.GetSource().m_range.Empty() &&
Include(
limits, cor.Loc())) {
8725cor.SetLoc(cor.Loc()-
limits.GetFrom());
8727}
else if(cor.Loc() >=
limits.GetFrom() && cor.Loc() <=
limits.GetTo()+1) {
8728 int l= cor.Loc()-
limits.GetFrom();
8729 CInDelInfo g(
l, cor.Len(), cor.GetType(), cor.GetInDelV(), cor.GetSource());
8742 swap(seq, editedseq);
8756TInDels::const_iterator nexti =
next(ig);
8757 if(nexti !=
m_editing_indels.end() && nexti->GetSource().m_range.NotEmpty() && nexti->Loc() == ig->Loc())
8760 if(ig->GetSource().m_range.NotEmpty()) {
8762 if(left_end >= 0) {
8763left_end -= ig->Len();
8764 for(TInDels::const_iterator igg = ig+1; igg !=
m_editing_indels.end() && igg->Loc() == ig->Loc(); ++igg)
8765left_end -= igg->Len();
8770 for(TInDels::const_iterator
i= ig;
i!=
m_editing_indels.begin() && (
i-1)->
Loc() == ig->Loc(); --
i) {
8771left_end += (
i-1)->
Len();
8779 if(ig->IsInsertion()) {
8780 strings(seq.begin()+ig->Loc(), seq.begin()+ig->Len());
8793 for(
intp = lim.
GetFrom(); p <= lim.
GetTo(); ++p)
8794confirmed_bases.
insert(p);
8801++cbase_len->second;
8811 TIntMapnotbridgeable_gaps_len;
8815notbridgeable_gaps_len[pos] = ig->second;
8859: hthresh(_hthresh), hmaxlen(_hmaxlen), gnomon(_gnomon) {}
8864 inttotal_hole_len = 0;
8865 for(
unsigned int i= 1;
i< m.
Exons().
size(); ++
i) {
8866 if(!m.
Exons()[
i-1].m_ssplice || !m.
Exons()[
i].m_fsplice)
8867total_hole_len += m.
Exons()[
i].GetFrom()-m.
Exons()[
i-1].GetTo()-1;
8872 for(
unsigned int i= 1;
i< m.
Exons().
size(); ++
i) {
8873 boolhole = !m.
Exons()[
i-1].m_ssplice || !m.
Exons()[
i].m_fsplice;
8874 intintron = m.
Exons()[
i].GetFrom()-m.
Exons()[
i-1].GetTo()-1;
8894 for(
unsigned int i= 1;
i< m.
Exons().
size(); ++
i) {
8895 boolhole = !m.
Exons()[
i-1].m_ssplice || !m.
Exons()[
i].m_fsplice;
8896 intintron = m.
Exons()[
i].GetFrom()-m.
Exons()[
i-1].GetTo()-1;
8909 for(
unsigned int i= 1;
i< m.
Exons().
size(); ++
i) {
8910 boolhole = !m.
Exons()[
i-1].m_ssplice || !m.
Exons()[
i].m_fsplice;
8911 intintron = m.
Exons()[
i].GetFrom()-m.
Exons()[
i-1].GetTo()-1;
8924 intexonlen = alignmap.
FShiftedLen(shrinkedexon,
false);
8930 if(
a.Exons().empty())
8936 a.CutExons(
a.Limits());
8942 if((
a.Exons().size() > 1 && !
a.Exons().front().m_ssplice) || (
a.Type() &
CAlignModel::eProt)==0 || !
a.LeftComplete()) {
8943 for(
unsigned int i= 0;
i<
a.Exons().size()-1; ++
i) {
8947left =
a.Exons()[
i+1].GetFrom();
8957 if((
a.Exons().size() > 1 && !
a.Exons().back().m_fsplice) || (
a.Type() &
CAlignModel::eProt)==0 || !
a.RightComplete()) {
8958 for(
auto i=
a.Exons().size()-1;
i> 0; --
i) {
8962right =
a.Exons()[
i-1].GetTo();
8974 if(newlimits !=
a.Limits()) {
8976 a.CutExons(
a.Limits());
8982 a.CutExons(
a.Limits());
8987 for(
size_t i= 1;
i<
a.Exons().
size()-1; ++
i) {
8994e = &
a.Exons()[0];
9001 if(remainingpoint <
a.Exons()[
i-1].GetTo())
9002left = remainingpoint+1;
9005e = &
a.Exons()[
i];
9010 if(
i==
a.Exons().size()-1) {
9018 if(remainingpoint >
a.Exons()[
i+1].GetFrom())
9019right = remainingpoint-1;
9022e = &
a.Exons()[
i];
9030 returnm.
Exons().empty();
9044: minsupport(_minsupport)
#define SPECIAL_ALIGN_LEN
list< CChain > TChainList
static int s_ExonLen(const CGeneModel &a)
static bool DescendingModelOrder(const CChain &a, const CChain &b)
bool GoodSupportForIntrons(const CGeneModel &chain, const SMinScor &minscor, map< TSignedSeqRange, int > &mrna_count, map< TSignedSeqRange, int > &est_count, map< TSignedSeqRange, int > &rnaseq_count)
void MarkUnwantedLowSupportIntrons(TContained &pointers, const SMinScor &minscor, map< TSignedSeqRange, int > &mrna_count, map< TSignedSeqRange, int > &est_count, map< TSignedSeqRange, int > &rnaseq_count)
TInDels CombineCorrectionsAndIndels(const TSignedSeqRange exona, int extra_left, int extra_right, const TSignedSeqRange exonb, const TInDels &editing_indels_frombtoa, const TInDels &exona_indels)
vector< SChainMember * > TContained
static bool DescendingModelOrderPConsistentCoverage(const TChainPtr &a, const TChainPtr &b)
bool MemberIsMarkedForDeletion(const SChainMember *mp)
map< Int8, CGeneModel > TUnmodAligns
#define NON_CDNA_INTRON_PENALTY
TInDels StrictlyContainedInDels(const TInDels &indels, const TSignedSeqRange &lim)
TIdLim AlignIdLimits(SChainMember *mp)
bool OverlappingIndel(int pos, const CInDelInfo &indl)
pair< string, int > GetAccVer(const CAlignModel &a, CScope &scope)
string FindMultiplyIncluded(CAlignModel &algn, TAlignModelList &clust)
list< CChain * > TChainPointerList
double InframeFraction(const CGeneModel &a, TSignedSeqPos left, TSignedSeqPos right)
tuple< Int8, TSignedSeqRange > TIdLim
set< TSignedSeqRange, RangeOrder > TRangePrecedeSet
int EffectiveExonLength(const CModelExon &e, const CAlignMap &alignmap, bool snap_to_codons)
map< Int8, CAlignModel * > TOrigAligns
static bool DescendingModelOrderP(const TChainPtr &a, const TChainPtr &b)
bool MemberIsCoding(const SChainMember *mp)
vector< pair< SChainMember *, CGene * > > TMemeberGeneVec
bool LeftAndLongFirst(const CGeneModel &a, const CGeneModel &b)
string GetLinkedIdsForMember(const SChainMember &mi)
TSignedSeqRange ExtendedMaxCdsLimits(const CGeneModel &a, const CCDSInfo &cds)
bool BelongToExon(const CGeneModel::TExons &exons, int pos)
vector< SLinker > TLinkers
set< SChainMember * > TMemberPtrSet
void remove_if(Container &c, Predicate *__pred)
TSignedSeqRange ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const
void MoveOrigin(TSignedSeqPos shift)
TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras=true) const
TSignedSeqPos MapOrigToEdited(TSignedSeqPos orig_pos) const
void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const
TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const
TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons=false) const
int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const
string TargetAccession() const
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
CConstRef< objects::CSeq_id > GetTargetId() const
virtual CAlignMap GetAlignMap() const
CCDSInfo MapFromEditedToOrig(const CAlignMap &amap) const
bool PStop(bool includeall=true) const
void SetStart(TSignedSeqRange r, bool confirmed=false)
CCDSInfo MapFromOrigToEdited(const CAlignMap &amap) const
TSignedSeqRange MaxCdsLimits() const
void Set5PrimeCdsLimit(TSignedSeqPos p)
bool IsMappedToGenome() const
void SetScore(double score, bool open=false)
TSignedSeqRange Start() const
bool ConfirmedStart() const
void Clip(TSignedSeqRange limits)
void AddPStop(SPStop stp)
TSignedSeqRange Cds() const
void CombineWith(const CCDSInfo &another_cds_info)
TSignedSeqRange ReadingFrame() const
TSignedSeqRange ProtReadingFrame() const
void SetStop(TSignedSeqRange r, bool confirmed=false)
const TPStops & PStops() const
bool ConfirmedStop() const
void Clear5PrimeCdsLimit()
void SetReadingFrame(TSignedSeqRange r, bool protein=false)
TSignedSeqRange Stop() const
void SpliceFromOther(CChainMembers &other)
CChainMembers & operator=(const CChainMembers &object)=delete
list< CAlignMap > m_align_maps
void InsertMemberCopyWithCds(const CCDSInfo &cds, SChainMember *copy_ofp)
list< CCDSInfo > m_extra_cds
void InsertMemberCopyAndStoreCds(const CCDSInfo &cds, SChainMember *copy_ofp)
list< TContained > m_copylist
list< TContained > m_containedlist
list< SChainMember > m_members
CChainMembers(const CChainMembers &object)=delete
void InsertMember(CGeneModel &algn, SChainMember *copy_ofp=0)
void DuplicateUTR(SChainMember *copy_ofp)
void InsertMemberCopyWithoutCds(SChainMember *copy_ofp)
void SetBestPlacement(TOrigAligns &orig_aligns)
void AddAllMembersAndCoverage(SChainMember &mbr)
void ClipChain(TSignedSeqRange limits, bool recalulate_support=true)
void SetOpenForPartialyAlignedProteins(map< string, pair< bool, bool > > &prot_complet)
TSignedSeqRange m_supported_range
void RemoveFshiftsFromUTRs()
void RestoreTrimmedEnds(int trim)
void CalculateDropLimits()
void ClipLowCoverageUTR(double utr_clip_threshold, bool recalulate_support=true)
bool HarborsNested(const CChain &other_chain, bool check_in_holes) const
int m_polya_cap_right_soft_limit
tuple< TIDMap, TSignedSeqRange > PeaksAndLimits(EStatus determinant, int min_blob_weight, int max_empty_dist, int min_splice_dist)
void CalculateSupportAndWeightFromMembers(bool keep_all_evidence=false)
int m_coverage_drop_right
map< int, double > TIDMap
vector< double > m_coverage
void SetConfirmedStartStopForCompleteProteins(map< string, pair< bool, bool > > &prot_complet, const SMinScor &minscor)
void SetConsistentCoverage()
void ClipToCap(int min_cap_blob, int max_dist, int min_flank_exon, double secondary_peak, bool recalulate_support=true)
void CollectTrustedmRNAsProts(TOrigAligns &orig_aligns, const SMinScor &minscor, CScope &scope, SMatrix &matrix, const CResidueVec &contig)
void ClipToPolyA(const CResidueVec &contig, int min_polya_blob, int max_dist, int min_flank_exon, double secondary_peak, double tertiary_peak, double tertiary_peak_coverage, bool recalulate_support=true)
pair< bool, bool > ValidPolyA(int pos, const CResidueVec &contig)
bool SetConfirmedEnds(const CGnomonEngine &gnomon, CGnomonAnnotator_Base::TIntMap &confirmed_ends)
CGeneModel m_gapped_helper_align
bool RestoreReasonableConfirmedStart(const CGnomonEngine &gnomon, TOrigAligns &orig_aligns)
int m_polya_cap_left_soft_limit
bool HasTrustedEvidence() const
tuple< TIVec, TSignedSeqRange > MainPeaks(TIDMap &peak_weights, double secondary_peak, double tertiary_peak, double tertiary_peak_coverage, bool right_end)
void CheckSecondaryCapPolyAEnds()
int m_coverage_bump_right
static void ArgsToChainer(CChainer *chainer, const CArgs &args, objects::CScope &scope)
static void SetupArgDescriptions(CArgDescriptions *arg_desc)
map< TSignedSeqRange, int > rnaseq_count
void LRIinit(SChainMember &mi, const TContained &micontained)
void CreateChainsForPartialProteins(TChainList &chains, TContained &pointers, TGeneModelList &unma_aligns, CChainMembers &unma_members)
const CAlignMap & m_edited_contig_map
void ReplicatePStops(CChainMembers &pointers)
bool CanIncludeJinI(const SChainMember &mi, const SChainMember &mj)
map< string, pair< bool, bool > > prot_complet
ECompat CheckCompatibility(const CGene &gene, const CChain &algn)
void IncludeInContained(SChainMember &big, SChainMember &small)
void DuplicateNotOriented(CChainMembers &pointers, TGeneModelList &clust)
void FindContainedAlignments(TContained &pointers)
void SetGenomicRange(const TAlignModelList &alignments)
set< TSignedSeqPos > TSplices
void CreateFlexibleAligns(TGeneModelList &clust)
const string & m_contig_acc
void SplitAlignmentsByStrand(const TGeneModelList &clust, TGeneModelList &clust_plus, TGeneModelList &clust_minus)
void CDNACounts(TGeneModelList &clust)
map< TSignedSeqRange, int > mrna_count
void FilterOutSimilarsWithLowerScore(TChainPointerList ¬_placed_yet, TChainPointerList &rejected)
void FindOverlappingWithTrusted(CChainMembers &pointers)
void CombineCompatibleChains(TChainList &chains)
bool AddIfCompatible(set< SFShiftsCluster > &fshift_clusters, const CGeneModel &algn)
void RemovePoorCds(CGeneModel &algn, double minscor)
map< int, TSignedSeqRange > trusted_group_cds
bool allow_opposite_strand
void RightLeft(TContained &pointers)
map< string, TSignedSeqRange > mrnaCDS
unique_ptr< CGnomonEngine > & m_gnomon
set< pair< Int8, string > > TIDs
map< int, TGRInfoVec > TGRInfoByStrand
void FilterOutBadScoreChainsHavingBetterCompatibles(TGeneModelList &chains)
void LeftRight(TContained &pointers)
list< CGene > FindGenes(TChainList &cls)
SChainMember * FindOptimalChainForProtein(TContained &pointers_all, vector< CGeneModel * > &parts, CGeneModel &palign)
void ReplacePseudoGeneSeeds(list< CGene > &alts, TChainPointerList ¬_placed_yet)
void CalculateSpliceWeights(CChainMembers &pointers)
const TSignedSeqRange & m_limits
vector< STGRInfo > TGRInfoVec
double tertiary_peak_coverage
double longreadsthreshold
CChainerImpl(CRef< CHMMParameters > &hmm_params, unique_ptr< CGnomonEngine > &gnomon, const CAlignMap &edited_contig_map, const TSignedSeqRange &limits, const string &m_contig_acc)
set< TSignedSeqRange > oriented_introns_plus
void SetConfirmedStartStopForProteinAlignments(TAlignModelList &alignments)
void CutParts(TGeneModelList &clust)
TGeneModelList MakeChains(TGeneModelList &models)
void FilterOutChimeras(TGeneModelList &clust)
void DuplicateUTRs(CChainMembers &pointers)
void SetFlagsForChains(TChainList &chains)
void FilterOutTandemOverlap(TChainPointerList ¬_placed_yet, TChainPointerList &rejected, double fraction)
void PlaceAllYouCan(list< CGene > &alts, TChainPointerList ¬_placed_yet, TChainPointerList &rejected)
void Duplicate5pendsAndShortCDSes(CChainMembers &pointers)
void FindAltsForGeneSeeds(list< CGene > &alts, TChainPointerList ¬_placed_yet)
TUnmodAligns unmodified_aligns
double GoodCDNAScore(const CGeneModel &algn, bool simple=false)
map< TSignedSeqRange, int > est_count
void FindGeneSeeds(list< CGene > &alts, TChainPointerList ¬_placed_yet)
void ScoreCdnas(CChainMembers &pointers)
bool FsTouch(const TSignedSeqRange &lim, const CInDelInfo &fs)
void SkipReason(CGeneModel *orig_align, const string &comment)
void TrimAlignmentsIncludedInDifferentGenes(list< CGene > &genes)
CRef< CHMMParameters > & m_hmm_params
set< TSignedSeqRange > oriented_introns_minus
bool LRCanChainItoJ(int &delta_cds, double &delta_num, double &delta_splice_num, SChainMember &mi, SChainMember &mj, TContained &contained, bool ¬_sorted)
void CutParts(TGeneModelList &models)
map< string, pair< bool, bool > > & SetProtComplet()
TransformFunction * TrimAlignment()
void SetMinInframeFrac(double mininframefrac)
void SetNumbering(int idnext, int idinc)
void SetIntersectLimit(int value)
Predicate * OverlapsSameAccessionAlignment(TAlignModelList &alignments)
void ScoreCDSes_FilterOutPoorAlignments(TGeneModelList &clust)
void SetConfirmedStartStopForProteinAlignments(TAlignModelList &alignments)
void FindSelenoproteinsClipProteinsToStartStop(TGeneModelList &clust)
TGeneModelList MakeChains(TGeneModelList &models)
void SetMinPolyA(int minpolya)
TransformFunction * ProjectCDS(objects::CScope &scope)
void DropAlignmentInfo(TAlignModelList &alignments, TGeneModelList &models)
TransformFunction * DoNotBelieveShortPolyATail()
void FilterOutChimeras(TGeneModelList &clust)
map< string, TSignedSeqRange > & SetMrnaCDS()
void SetGenomicRange(const TAlignModelList &alignments)
Predicate * ConnectsParalogs(TAlignModelList &alignments)
TransformFunction * DoNotBelieveFrameShiftsWithoutCdsEvidence()
unique_ptr< CChainerImpl > m_data
TSignedSeqRange SubjectRange() const
void AddExon(TSignedSeqRange exon, const string &fs="", const string &ss="", double ident=0, const string &seq="", const CInDelInfo::SSource &src=CInDelInfo::SSource())
TSignedSeqPos FShiftedMove(TSignedSeqPos pos, int len) const
void ExtendRight(int amount)
const list< CRef< CSeq_id > > & TrustedProt() const
bool GoodEnoughToBeAnnotation() const
void SetTrustedCds(const TSignedSeqRange &r)
string GetProtein(const CResidueVec &contig_sequence) const
void AddNormalExon(TSignedSeqRange exon, const string &fs, const string &ss, double ident, bool infront)
bool Open5primeEnd() const
virtual void CutExons(TSignedSeqRange hole)
int MutualExtension(const CGeneModel &a) const
bool IntersectingWith(const CGeneModel &a) const
TSignedSeqRange TranscriptLimits() const
void Extend(const CGeneModel &a, bool ensure_cds_invariant=true)
EStrand Orientation() const
void InsertTrustedProt(CRef< CSeq_id > g)
int FShiftedLen(TSignedSeqRange ab, bool withextras=true) const
TSignedSeqRange TrustedCds() const
bool IdenticalAlign(const CGeneModel &a) const
const CSupportInfoSet & Support() const
bool OpenRightEnd() const
const TExons & Exons() const
bool LeftComplete() const
TSignedSeqRange ReadingFrame() const
bool RightComplete() const
virtual CAlignMap GetAlignMap() const
TSignedSeqRange RealCdsLimits() const
TSignedSeqRange TranscriptExon(int i) const
void ReverseComplementModel()
void SetStrand(EStrand s)
void ReplaceSupport(const CSupportInfoSet &support_set)
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
void SetCdsInfo(const CCDSInfo &cds_info)
bool ConfirmedStop() const
void InsertTrustedmRNA(CRef< CSeq_id > g)
void AddGgapExon(double ident, const string &seq, const CInDelInfo::SSource &src, bool infront)
bool AddSupport(const CSupportInfo &support)
TSignedSeqRange Limits() const
const list< CRef< CSeq_id > > & TrustedmRNA() const
void AddComment(const string &comment)
bool IsSubAlignOf(const CGeneModel &a) const
int HasCompatibleOverlap(const CGeneModel &a, int min_overlap=2) const
const CCDSInfo & GetCdsInfo() const
vector< CModelExon > TExons
TSignedSeqRange MaxCdsLimits() const
bool ConfirmedStart() const
void ExtendLeft(int amount)
const vector< CCDSInfo > * GetEdgeReadingFrames() const
void SetTrustedGroup(int tgr)
TInDels GetInDels(bool fs_only) const
bool PStop(bool includeall=true) const
int isCompatible(const CGeneModel &a) const
bool HarborsNested(const CChain &other_chain, bool check_in_holes) const
bool HarborsRange(TSignedSeqRange range, bool check_in_holes) const
set< CGene * > m_nested_in_genes
set< CGene * > m_harbors_genes
bool IsAllowedAlternative(const ncbi::gnomon::CGeneModel &, int maxcomposite) const
TSignedSeqRange RealCdsLimits() const
list< CGeneModel >::const_iterator TConstIt
void RemoveFromHarbored(CGene *p)
set< CGene * > RemoveGeneFromOtherGenesSets()
TSignedSeqRange m_real_cds_limits
list< CGeneModel >::iterator TIt
void AddToHarbored(CGene *p)
bool IsAlternative(const CChain &a) const
void RemoveFromNestedIn(CGene *p)
void AddToNestedIn(CGene *p)
TSignedSeqRange Limits() const
bool LargeCdsOverlap(const CGeneModel &a) const
CAlignModel MapOneModelToEditedContig(const CGeneModel &align) const
CGeneModel MapOneModelToOrigContig(const CGeneModel &srcmodel) const
void SetHMMParameters(CHMMParameters *params)
unique_ptr< CGnomonEngine > m_gnomon
virtual ~CGnomonAnnotator_Base()
map< int, char > m_replaced_bases
CGnomonEngine & GetGnomon()
void MapModelsToOrigContig(TGeneModelList &models) const
void SetGenomic(const CResidueVec &seq)
TGgapInfo m_inserted_seqs
TInDels m_reversed_corrections
void MapModelsToEditedContig(TGeneModelList &models) const
map< int, char > m_replacements
unique_ptr< SPhyloCSFSlice > m_pcsf_slice
const CPhyloCSFData * m_pcsf_data
void MapAlignmentsToEditedContig(TAlignModelList &alignments) const
TIntMap m_confirmed_bases_len
TIntMap m_confirmed_bases_orig_len
CAlignMap m_edited_contig_map
CRef< CHMMParameters > m_hmm_params
TIntMap m_notbridgeable_gaps_len
double GetChanceOfIntronLongerThan(int l) const
const CResidueVec & GetSeq() const
int GetMaxIntronLen() const
void GetScore(CGeneModel &model, bool extend5p=false, bool obeystart=false, bool extend_max_cds=false) const
int GetMinIntronLen() const
HMM model parameters just create it and pass to a Gnomon engine.
static CRef< CSeq_id > ToSeq_id(const string &str)
static string ToString(const CSeq_id &id)
CConstRef< CSeq_id > ToCanonical(const CSeq_id &id) const
TSignedSeqPos Loc() const
static bool HaveCommonExonOrIntron(const CGeneModel &a, const CGeneModel &b)
static bool RangeNestedInIntron(TSignedSeqRange r, const CGeneModel &algn, bool check_in_holes=true)
static size_t CountCommonSplices(const CGeneModel &a, const CGeneModel &b)
static bool AreSimilar(const CGeneModel &a, const CGeneModel &b, int tolerance)
CInDelInfo::SSource m_source
TSignedSeqPos GetFrom() const
const TSignedSeqRange & Limits() const
TSignedSeqPos GetTo() const
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
SPhyloCSFSlice * CreateSliceForContig(const string &contig_acc) const
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
const_iterator lower_bound(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator upper_bound(const key_type &key) const
container_type::value_type value_type
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
parent_type::iterator iterator
const_iterator upper_bound(const key_type &key) const
const_iterator find(const key_type &key) const
const_iterator end() const
const_iterator lower_bound(const key_type &key) const
static const char si[8][64]
static vector< string > arr
bool Empty(const CNcbiOstrstream &src)
struct parameters_t * pb[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const TDS_WORD limits[]
CCigar LclAlign(const char *query, int querylen, const char *subject, int subjectlen, int gopen, int gapextend, const char delta[256][256])
int MaxCommonInterval(const T &a, const T &b, int long_enough=numeric_limits< int >::max())
vector< TResidue > CResidueVec
bool Precede(TSignedSeqRange l, TSignedSeqRange r)
set< CSupportInfo > CSupportInfoSet
list< CAlignModel > TAlignModelList
bool Include(TSignedSeqRange big, TSignedSeqRange small)
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
list< CGeneModel > TGeneModelList
bool IsStopCodon(const Res *seq, int strand=ePlus)
EStrand OtherStrand(EStrand s)
vector< CInDelInfo > TInDels
list< Model > GetAlignParts(const Model &algn, bool settrimflags)
bool IsStartCodon(const Res *seq, int strand=ePlus)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
int TSignedSeqPos
Type for signed sequence position.
#define VECTOR_ERASE(Var, Cont)
Use this macro inside body of ERASE_ITERATE cycle to erase from vector-like container.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
void AddFlag(const string &name, const string &comment, CBoolEnum< EFlagValue > set_value=eFlagHasValueIfSet, TFlags flags=0)
Add description for flag argument.
void AddKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for mandatory key.
void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for optional key without default value.
void SetCurrentGroup(const string &group)
Set current arguments group name.
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
@ eInputFile
Name of file (must exist and be readable)
@ eDouble
Convertible into a floating point number (double)
@ eInteger
Convertible into an integer number (int or Int8)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
CConstRef< CSeq_id > GetSeqId(void) const
string AsString(void) const
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ eGetId_ForceAcc
return only an accession based seq-id
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetResolveAll(void)
SetResolveAll() is equivalent to SetResolveMethod(eResolve_All).
bool IsSetPartial(void) const
SAnnotSelector & SetAdaptiveDepth(bool value=true)
SetAdaptiveDepth() requests to restrict subsegment resolution depending on annotations found on lower...
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & AddNamedAnnots(const CAnnotName &name)
Add named annot to set of annots names to look for.
SAnnotSelector & SetFeatSubtype(TFeatSubtype subtype)
Set feature subtype (also set annotation and feat type)
const_iterator begin(void) const
const_iterator end(void) const
#define numeric_limits
Pre-declaration of the "numeric_limits<>" template Forcibly overrides (using preprocessor) the origin...
int64_t Int8
8-byte (64-bit) signed integer
position_type GetLength(void) const
bool NotEmpty(void) const
bool IntersectingWith(const TThisType &r) const
static TThisType GetEmpty(void)
static position_type GetWholeFrom(void)
CRange< TSignedSeqPos > TSignedSeqRange
static TThisType GetWhole(void)
static position_type GetWholeTo(void)
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ToUpper(string &str)
Convert string to upper case â string& version.
static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive compare of a substring with another string.
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
double Restart(void)
Return time elapsed since first Start() or last Restart() call (in seconds).
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
TVersion GetVersion(void) const
Get the Version member data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
unsigned int
A callback function used to compare two keys in a database.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
void AddComment(CSeq_feat &feat, const string &comment)
constexpr auto sort(_Init &&init)
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
constexpr bool empty(list< Ts... >) noexcept
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::SIZE size
Magic spell ;-) needed for some weird compilers... very empiric.
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static SLJIT_INLINE sljit_ins ms(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins lb(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
bool operator()(const TMemeberGeneVec::value_type &a, const TMemeberGeneVec::value_type &b)
bool operator()(const vector< CGeneModel * > *ap, const vector< CGeneModel * > *bp)
TOrigAligns & orig_aligns
AlignLenOrder(TOrigAligns &oa)
bool operator()(const CGeneModel *ap, const CGeneModel *bp)
TSignedSeqRange m_cds_limits
virtual bool model_predicate(CGeneModel &align)
bool operator()(const SChainMember *ap, const SChainMember *bp)
TAlignModelList & alignments
virtual bool align_predicate(CAlignModel &align)
virtual string GetComment()
ConnectsParalogs(TAlignModelList &_alignments)
CutShortPartialExons(int minex)
virtual void transform_align(CAlignModel &align)
virtual void transform_align(CAlignModel &align)
virtual void transform_align(CAlignModel &align)
DoNotBelieveShortPolyATail(int _minpolya)
bool operator()(const CGeneModel &a, const CGeneModel &b)
GModelOrder(TOrigAligns &oa)
TOrigAligns & orig_aligns
bool operator()(const SChainMember *ap, const SChainMember *bp)
virtual bool model_predicate(CGeneModel &align)
HasLongIntron(CGnomonEngine &gnomon)
virtual bool model_predicate(CGeneModel &align)
virtual bool model_predicate(CGeneModel &align)
HasShortIntron(CGnomonEngine &gnomon)
bool operator()(const SChainMember *ap, const SChainMember *bp)
bool operator()(const SChainMember *ap, const SChainMember *bp)
virtual bool model_predicate(CGeneModel &align)
LowSupport_Noncoding(int _minsupport)
MarkupCappedEst(const set< string > &_caps, int _capgap)
virtual void transform_align(CAlignModel &align)
const set< string > & caps
MarkupTrustedGenes(const set< string > &_trusted_genes)
virtual void transform_align(CAlignModel &align)
const set< string > & trusted_genes
virtual bool align_predicate(CAlignModel &align)
OverlapsSameAccessionAlignment(TAlignModelList &alignments)
virtual string GetComment()
virtual void transform_align(CAlignModel &align)
const map< string, TSignedSeqRange > & mrnaCDS
ProjectCDS(double _mininframefrac, const CResidueVec &_seq, CScope *_scope, const map< string, TSignedSeqRange > &_mrnaCDS)
ProteinWithBigHole(double hthresh, double hmaxlen, CGnomonEngine &gnomon)
virtual bool model_predicate(CGeneModel &align)
bool operator()(const TSignedSeqRange &a, const TSignedSeqRange &b) const
bool operator()(const SChainMember *ap, const SChainMember *bp)
bool operator()(const SChainMember *ap, const SChainMember *bp)
void MarkPostponedForChain()
TContained CollectContainedForMemeber()
const CCDSInfo * m_cds_info
bool m_marked_for_deletion
void MarkIncludedForChain()
SChainMember * m_left_member
bool m_restricted_to_start
CGeneModel * m_unmd_align
bool m_marked_for_retention
int m_fully_connected_to_part
SChainMember * m_right_member
TContained CollectCodingContainedForChain()
double m_right_splice_num
bool m_excluded_readthrough
void AddToContained(TContained &contained, TMemberPtrSet &included_in_list)
TContained CollectCodingContainedForMemeber()
void AddCodingToContained(TContained &contained, TMemberPtrSet &included_in_list)
SChainMember * m_sink_for_contained
TContained CollectContainedForChain()
int m_right_trusted_group
void MarkUnwantedCopiesForChain(const TSignedSeqRange &cds)
double m_accumulated_splice_num
CAlignModel * m_orig_align
list< TSignedSeqRange > m_confirmed_intervals
map< int, char > m_replacements
TInDels m_correction_indels
SFShiftsCluster(TSignedSeqRange limits=TSignedSeqRange::GetEmpty())
bool operator<(const SFShiftsCluster &c) const
bool NewIsBetter(int new_count, int new_not_wanted_count, int new_matches_count, int new_mem_id) const
bool operator<(const SLinker &sl) const
TSignedSeqRange m_reading_frame
bool OtherIsBetter(const SLinker &a) const
double m_utr_clip_threshold
bool operator()(const SChainMember *ap, const SChainMember *bp)
virtual bool model_predicate(CGeneModel &align)
virtual bool model_predicate(CGeneModel &align)
virtual void transform_align(CAlignModel &align)
void TrimProtein(CAlignModel &align, CAlignMap &alignmap)
TSignedSeqPos TrimCodingExonRight(const CAlignModel &align, const CModelExon &e, int trim)
void TrimTranscript(CAlignModel &align, CAlignMap &alignmap)
TSignedSeqPos TrimCodingExonLeft(const CAlignModel &align, const CModelExon &e, int trim)
TrimAlignment(int a_trim)
bool operator()(const CAlignModel *a, const CAlignModel *b)
s_ByAccVerLen(CScope &scope_)
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4