Dims = Denseg.
GetDim();
83 for(
size_tCurrSeg = 0; CurrSeg < SegCount; ++CurrSeg) {
84 intIndex = (Dims*CurrSeg)+Row;
85 intCurrStart = Denseg.
GetStarts()[Index];
86 if( CurrStart != -1) {
88CurrLoc->
SetInt().SetId().Assign( *Denseg.
GetIds()[Row] );
89CurrLoc->
SetInt().SetFrom() = CurrStart;
90CurrLoc->
SetInt().SetTo() = CurrStart + Denseg.
GetLens()[CurrSeg];
93Accum->
SetMix().Set().push_back(CurrLoc);
114 TSeqPosPosCoveredBases = 0, NegCoveredBases = 0;
120PosCoveredBases += (*LocIter)->GetInt().GetLength();
122NegCoveredBases += (*LocIter)->GetInt().GetLength();
128 return max(PosCoveredBases, NegCoveredBases);
144AccumResults->Get()) {
145 intBestRank = QueryIter->second->GetBestRank();
148 _TRACE(
"Determined ID: " 149<< QueryIter->second->GetQueryId()->AsFastaString()
150<<
" needs Instanced MM Aligner.");
153<< QueryIter->second->GetQueryId()->AsFastaString()
154<<
" fails the minimum percent coverage cutoff. Skipping.");
172vector<CRef<CInstance> > Instances;
176 if(Instances.empty()) {
180 ERR_POST(
Info<<
" Instance Count: "<< Instances.size());
191 ERR_POST(
Info<<
" Aligning "<< Inst.
Query.GetId().AsFastaString() <<
" to " 192<< Inst.
Subject.GetId().AsFastaString());
194<< Inst.
Query.GetTo() <<
":" 196<<
" and s: "<< Inst.
Subject.GetFrom() <<
":" 197<< Inst.
Subject.GetTo() <<
":" 205Inst.
Query.GetStrand(),
216.
Print(
"instance_query", Inst.
Query.GetId().AsFastaString())
217.
Print(
"instance_subject", Inst.
Subject.GetId().AsFastaString())
218.
Print(
"instance_align", (GlobalDs.
IsNull() ?
"false":
"true"));
223Result->
SetSegs().SetDenseg().Assign(*GlobalDs);
226ResultSet->
Set().push_back(Result);
255 if(!ResultSet->
Get().empty()) {
291 doublePercentRemaining = 1.0-PercentComplete;
293 doubleFactor = PercentRemaining/PercentComplete;
297 ERR_POST(
Error<<
" Instanced Aligner took over 5 minutes. Timed out.");
302 doubleTimeEstimated = Span.
GetAsDouble() * Factor;
305 ERR_POST(
Error<<
" Instanced Aligner expected to take "<< TimeEstimated
306<<
" seconds. More than "<< (CallbackData->
TimeOutSeconds/60.0)
307<<
" minutes. Terminating Early.");
353 TSeqPosQueryStrandedStart, QueryStrandedStop;
355QueryStrandedStart = QueryStart;
356QueryStrandedStop = QueryStop;
358QueryStrandedStart = ( (QueryVec.
size()-1) - QueryStop);
359QueryStrandedStop = ( (QueryVec.
size()-1) - QueryStart);
362 stringQuerySeq, SubjectSeq;
363QueryVec.
GetSeqData(QueryStrandedStart, QueryStrandedStop+1, QuerySeq);
364SubjectVec.
GetSeqData(SubjectStart, SubjectStop+1, SubjectSeq);
389Score = Aligner.
Run();
393ResultDenseg = Aligner.
GetDense_seg(ExtractQueryStart, Strand, QueryId,
404ResultDenseg->
OffsetRow(0, QueryStart);
405ResultDenseg->
OffsetRow(1, SubjectStart);
425objects::CScope& Scope)
430list<CConstRef<CSeq_align> > In;
446 CDense_seg& Denseg = (*AlignIter)->SetSegs().SetDenseg();
479Pluses->Set().push_back(*AlignIter);
481Minuses->
Set().push_back(*AlignIter);
484 if(!Pluses->Set().empty()) {
486 if(!
Out.IsNull()) {
489Instances.push_back(Inst);
493 if(!Minuses->
Set().empty()) {
495 if(!
Out.IsNull()) {
498Instances.push_back(Inst);
529 Query.SetId().Assign(AlignSet.
Get().front()->GetSeq_id(0));
530 Subject.SetId().Assign(AlignSet.
Get().front()->GetSeq_id(1));
532 Query.SetStrand() = AlignSet.
Get().front()->GetSeqStrand(0);
533 Subject.SetStrand() = AlignSet.
Get().front()->GetSeqStrand(1);
542 Query.SetFrom(
min(
Query.GetFrom(), (*AlignIter)->GetSeqStart(0)));
545 Query.SetTo(
max(
Query.GetTo(), (*AlignIter)->GetSeqStop(0)));
583 return(
Subject.GetLength() /
double(
Query.GetLength()));
589 return Query.GetLength();
613TSubjectCoverage BestCoverage;
614 doubleMaxCoverage = 0;
621 stringIdStr = Set->
Get().front()->GetSeq_id(1).AsFastaString();
622 doubleSubjCoverage = 0;
625(*AlignIter)->GetNamedScore(
"pct_coverage", PctCov);
626SubjCoverage =
max(SubjCoverage, PctCov);
628BestCoverage[IdStr] = SubjCoverage;
629MaxCoverage =
max(SubjCoverage, MaxCoverage);
633 typedefvector<CRef<CInstance> > TInstVector;
638TInstVector SubjInstances;
641 stringSubjIdStr = Set->
Get().front()->GetSeq_id(1).AsFastaString();
642 if(BestCoverage[SubjIdStr] < (MaxCoverage*0.10)) {
654 boolInserted =
false;
655 boolContained =
false;
656 ITERATE(TInstVector, InstIter, SubjInstances) {
657 boolCurrContained = (*InstIter)->IsAlignmentContained(**AlignIter);
658Contained |= CurrContained;
663 intGapDist = (*InstIter)->GapDistance(**AlignIter);
664 if(GapDist < 20000) {
665(*InstIter)->MergeIn(*AlignIter);
672SubjInstances.push_back(Inst);
678TInstVector CleanedInstances;
680 ITERATE(TInstVector, InstIter, SubjInstances) {
683 if((*InstIter)->Alignments.Get().size() <= 1)
687Cleaned =
x_RunCleanup((*InstIter)->Alignments, Scope);
688 if(!Cleaned.
IsNull()) {
692 boolDupeFound =
false;
694 if( (*AlignIter)->GetSeqStart(0) == (*SourceIter)->GetSeqStart(0) &&
695(*AlignIter)->GetSeqStart(1) == (*SourceIter)->GetSeqStart(1) &&
696(*AlignIter)->GetSeqStop(0) == (*SourceIter)->GetSeqStop(0) &&
697(*AlignIter)->GetSeqStop(1) == (*SourceIter)->GetSeqStop(1)) {
707 boolContained =
false;
708 ITERATE(TInstVector, CleanIter, CleanedInstances) {
709 boolCurr = (*CleanIter)->IsAlignmentContained(**AlignIter);
719Dupe |= ((*OldInstIter)->Query.Equals(Inst->Query)
720&& (*OldInstIter)->Subject.Equals(Inst->Subject));
723CleanedInstances.push_back(Inst);
728 copy(CleanedInstances.begin(), CleanedInstances.end(),
729insert_iterator<TInstVector>(Instances, Instances.end()));
739vector<CRef<CInstance> >::iterator Curr;
740Curr = Instances.begin();
741 for(Curr = Instances.begin(); Curr != Instances.end(); ) {
742 if( (*Curr)->SubjToQueryRatio() > MaxRatio ||
743(*Curr)->SubjToQueryRatio() < 0.10 )
744Curr = Instances.erase(Curr);
751 for(Curr = Instances.begin(); Curr != Instances.end(); ++Curr) {
752 TSeqPosCurrLength = (*Curr)->QueryLength();
753LongestInstance =
max(CurrLength, LongestInstance);
757 for(Curr = Instances.begin(); Curr != Instances.end(); ) {
758 if( (*Curr)->QueryLength() <= (LongestInstance*0.05))
759Curr = Instances.erase(Curr);
764vector<CRef<CInstance> >::iterator Outer, Inner;
765 for(Outer = Instances.begin(); Outer != Instances.end(); ++Outer) {
766 for(Inner = Outer+1; Inner != Instances.end(); ) {
767 if( (*Outer)->Query.Equals((*Inner)->Query) &&
768(*Outer)->Subject.Equals((*Inner)->Subject) ) {
769Inner = Instances.erase(Inner);
781 doubleBestPctCoverage = -1.0;
790 doublePctCoverage = -1.0;
791(*AlignIter)->GetNamedScore(
"pct_coverage", PctCoverage);
792BestPctCoverage =
max(BestPctCoverage, PctCoverage);
TSeqPos s_CalcCoverageCount(TAlignSetRef Alignments, int Row, CScope &Scope)
bool s_ProgressCallback(CNWAligner::SProgressInfo *ProgressInfo)
CRef< CSeq_loc > s_CoverageSeqLoc(TAlignSetRef Alignments, int Row, CScope &Scope)
Declares the CBl2Seq (BLAST 2 Sequences) class.
Declares the CBlastNucleotideOptionsHandle class.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
Definitions of special type used in BLAST.
Main argument class for BLASTN application.
class CAlignCleanup implements an alignment cleanup utility based on the C++ alignment manager.
void Cleanup(const TAligns &aligns_in, TAligns &aligns_out, EMode mode=eDefault, CSeq_align::TSegs::E_Choice output_seg_type=CSeq_align::TSegs::e_Denseg)
void TrimEndGaps()
Trim leading/training gaps if possible.
ENa_strand GetSeqStrand(TDim row) const
void Reverse(void)
Reverse the segments' orientation.
void OffsetRow(TDim row, TSignedSeqPos offset)
Offset row's coords.
double SubjToQueryRatio() const
int GapDistance(const objects::CSeq_align &Align) const
objects::CSeq_interval Subject
void MergeIn(CRef< objects::CSeq_align > Align)
objects::CSeq_align_set Alignments
TSeqPos QueryLength() const
bool IsAlignmentContained(const objects::CSeq_align &Align) const
CInstance(const CRef< objects::CSeq_align > Align)
objects::CSeq_interval Query
TAlignResultsRef GenerateAlignments(objects::CScope &Scope, ISequenceSet *QuerySet, ISequenceSet *SubjectSet, TAlignResultsRef AccumResults)
void x_FilterInstances(vector< CRef< CInstance > > &Instances, double MaxRatio)
CRef< objects::CSeq_align_set > x_RunCleanup(const objects::CSeq_align_set &AlignSet, objects::CScope &Scope)
void x_RunAligner(objects::CScope &Scope, CQuerySet &QueryAligns, TAlignResultsRef Results)
void x_GetDistanceInstances(CQuerySet &QueryAligns, objects::CScope &Scope, vector< CRef< CInstance > > &Instances)
void x_GetCleanupInstances(CQuerySet &QueryAligns, objects::CScope &Scope, vector< CRef< CInstance > > &Instances)
CRef< objects::CDense_seg > x_RunMMGlobal(const objects::CSeq_id &QueryId, const objects::CSeq_id &SubjectId, objects::ENa_strand Strand, TSeqPos QueryStart, TSeqPos QueryStop, TSeqPos SubjectStart, TSeqPos SubjectStop, objects::CScope &Scope)
bool x_MinCoverageCheck(const CQuerySet &QueryAligns)
TAssemblyToSubjectSet & Get()
CConstRef< objects::CSeq_id > GetQueryId() const
TSeqPos GetSeqStop(TDim row) const
void SetNamedScore(const string &id, int score)
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Declares the CDiscNucleotideOptionsHandle class.
void SetProgressCallback(FProgressCallback prg_callback, void *data)
void SetScoreMatrix(const SNCBIPackedScoreMatrix *scoremat)
void SetWms(TScore value)
CRef< objects::CDense_seg > GetDense_seg(TSeqPos query_start, objects::ENa_strand query_strand, TSeqPos subj_start, objects::ENa_strand subj_strand, bool trim_end_gaps=false) const
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
CDiagContext_Extra & Print(const string &name, const string &value)
The method does not print the argument, but adds it to the string.
CDiagContext & GetDiagContext(void)
Get diag context instance.
CDiagContext_Extra Extra(void) const
Create a temporary CDiagContext_Extra object.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
string ReportAll(TDiagPostFlags flags=eDPF_Exception) const
Report all exceptions.
void Info(CExceptionArgs_Base &args)
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eStrand_Plus
Plus strand.
@ eStrand_Minus
Minus strand.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
bool IsNull(void) const THROWS_NONE
Check if pointer is null â same effect as Empty().
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_SCOPE(ns)
Define a new scope.
double GetAsDouble(void) const
Return time span as number of seconds.
long GetCompleteSeconds(void) const
Get number of complete seconds.
@ eCurrent
Use current time. See also CCurrentTime.
static bool IsSignaled(TSignalMask signals=eSignal_Any)
Check that any of specified signals is received.
const TDenseg & GetDenseg(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
const TLens & GetLens(void) const
Get the Lens member data.
void SetType(TType value)
Assign a value to Type data member.
TDim GetDim(void) const
Get the Dim member data.
TStrands & SetStrands(void)
Assign a value to Strands data member.
const TIds & GetIds(void) const
Get the Ids member data.
bool CanGetStrands(void) const
Check if it is safe to call GetStrands method.
TNumseg GetNumseg(void) const
Get the Numseg member data.
list< CRef< CSeq_align > > Tdata
const TStrands & GetStrands(void) const
Get the Strands member data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
@ eType_partial
mapping pieces together
ENa_strand
strand of nucleic acid
list< CRef< CSeq_loc > > Tdata
const Tdata & Get(void) const
Get the member data.
const TMix & GetMix(void) const
Get the variant data.
Main class to perform a BLAST search on the local machine.
Magic spell ;-) needed for some weird compilers... very empiric.
Setup interrupt signal handling.
Defines NCBI C++ exception handling.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
void Out(T t, int w, CNcbiOstream &to=cout)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4