seqloc->
SetInt().SetFrom(from);
78seqloc->
SetInt().SetTo(to);
81loc_list.push_back(seqloc);
83seg_list.push_back(
SSegmentLoc(query_index, from, to));
97vector<SSegmentLoc>& filler_segs)
100vector<CRangeCollection<TOffset> > sorted_segs(num_queries);
109 CHit*subhit = *subitr;
120 ITERATE(vector<int>, it, blastp_indices) {
132itr->GetFrom() - 1, filler_segs,
i);
134seg_start = itr->GetToOpen();
144seq_length - 1, filler_segs,
i);
149printf(
"Filler Segments:\n");
150 for(
int i= 0;
i< (
int)filler_segs.size();
i++) {
151printf(
"query %d %4d - %4d\n",
152filler_segs[
i].seq_index,
153filler_segs[
i].GetFrom(),
154filler_segs[
i].GetTo());
170 constvector<int>& indices,
172vector<SSegmentLoc>& filler_segs)
174 const intkBlastBatchSize = 10000;
175 size_tnum_full_queries = indices.size();
177 if(filler_locs.empty())
185blastp_opts->SetEvalueThreshold(
max(blastp_evalue, 10.0));
187blastp_opts->SetSegFiltering(
false);
192 while(batch_start < (
int)filler_locs.size()) {
197 for(
int i= batch_start;
i< (
int)filler_locs.size();
i++) {
198 const CSeq_loc& curr_loc = *filler_locs[
i];
199 intfragment_size = curr_loc.
GetInt().
GetTo() -
201 if(batch_size + fragment_size >= kBlastBatchSize && batch_size > 0)
205batch_size += fragment_size;
208 CBl2Seqblaster(curr_batch, queries, *blastp_opts);
214 "Alignment interrupted");
221 for(
int i= 0;
i< (
int)curr_batch.size();
i++) {
223 intlist1_oid = filler_segs[batch_start +
i].seq_index;
225 for(
size_tj = 0; j < num_full_queries; j++) {
229 if(list1_oid == indices[j])
235v[
i* num_full_queries + j]->
Get()) {
249 const CScore& curr_score = **score_itr;
252 else if(curr_score.
GetId().
GetStr() ==
"e_value")
257 if(evalue > blastp_evalue)
261align_score, denseg));
277 const CScore& curr_score = **score_itr;
289 if(evalue > blastp_evalue)
293indices[j], align_score, dendiag));
301batch_start += curr_batch.size();
307 constvector<int>& indices)
329vector< CRef<objects::CSeq_loc> > filler_locs;
330vector<SSegmentLoc> filler_segs;
336printf(
"blastp hits:\n");
339printf(
"query %d %4d - %4d query %d %4d - %4d score %d\n",
364unique_ptr< vector<int> >
result(
newvector<int>());
383 intleft = -1, right = -1;
385 for(
size_t i=0;
i< left_inds->size();
i++) {
386 for(
size_tj=0;j < right_inds->size();j++) {
387 if(dist > dmat((*left_inds)[
i], (*right_inds)[j]) || left < 0) {
388left = (*left_inds)[
i];
389right = (*right_inds)[j];
390dist = dmat(left, right);
400blastp_opts->SetEvalueThreshold(
max(blastp_evalue, 10.0));
401blastp_opts->SetSegFiltering(
false);
406 CBl2Seqblaster(left_query, right_query, *blastp_opts);
424 const CScore& curr_score = **score_itr;
427 else if(curr_score.
GetId().
GetStr() ==
"e_value")
432 if(evalue > blastp_evalue)
450 const CScore& curr_score = **score_itr;
462 if(evalue > blastp_evalue)
466align_score, dendiag));
473 ITERATE(vector<int>, it, *right_inds) {
474left_inds->push_back(*it);
482 constvector<TPhyTreeNode*>& cluster_trees)
488 ITERATE(vector<TPhyTreeNode*>, it, cluster_trees) {
497printf(
"in-cluster blastp hits:\n");
500printf(
"query %d %4d - %4d query %d %4d - %4d score %d\n",
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
int TOffset
Basic data type for offsets into a sequence.
Declares the CBl2Seq (BLAST 2 Sequences) class.
Declares the CBlastProteinOptionsHandle class.
vector< CRef< objects::CSeq_align_set > > TSeqAlignVector
Vector of Seq-align-sets.
Runs the BLAST algorithm between 2 sequences.
Handle to the protein-protein options to the BLAST algorithm.
const TDistMatrix & GetDistMatrix(void) const
Get distance matrix.
void Append(CHitList &hitlist)
Append one hitlist to another.
int Size() const
Retrieve number of hits in list.
void PurgeAllHits()
Delete all hits unconditionally.
bool Empty()
Determine whether a list contains no hits.
CHit * GetHit(int index)
Retrieve a hit from the hitlist.
void AddToHitList(CHit *hit)
Append a hit to the hitlist.
A generalized representation of a pairwise alignment.
TSubHit & GetSubHit()
Retrieve a list of subhits.
int m_Score
Score of alignment.
int m_SeqIndex1
Numerical identifier for first sequence in alignment.
int m_SeqIndex2
Numerical identifier for second sequence in alignment.
TRange m_SeqRange1
The range of offsets on the first sequence.
static const int kMinHitSize
Not always used, but useful to avoid extremely small hits.
TRange m_SeqRange2
The range of offsets on the second sequence.
bool HasSubHits()
Query if a CHit has a hierarchy of subhits available.
vector< CHit * > TSubHit
Hits can be grouped hierarchically.
double GetBlastpEvalue(void) const
Get e-value for accepting Blastp hits.
bool GetVerbose(void) const
Get verbose mode.
Simultaneously align multiple protein sequences.
SProgress m_ProgressMonitor
CRef< objects::CScope > m_Scope
void x_FindLocalInClusterHits(const vector< TPhyTreeNode * > &cluster_trees)
Run blast on sequences from each cluster subtree.
vector< CRef< objects::CSeq_loc > > m_tQueries
CHitList m_LocalInClusterHits
void x_AlignFillerBlocks(const blast::TSeqLocVector &queries, const vector< int > &indices, vector< CRef< objects::CSeq_loc > > &filler_locs, vector< SSegmentLoc > &filler_segs)
Run blastp, aligning the collection of filler fragments against the entire input dataset.
void x_FindLocalHits(const blast::TSeqLocVector &queries, const vector< int > &indices)
Run blast on selected input sequences and postprocess the results.
@ eInterrupt
Alignment interruped through callback function.
vector< CSequence > m_QueryData
void x_AssignDefaultResFreqs()
CConstRef< CMultiAlignerOptions > m_Options
void x_AddNewSegment(vector< CRef< objects::CSeq_loc > > &loc_list, const CRef< objects::CSeq_loc > &query, TOffset from, TOffset to, vector< SSegmentLoc > &seg_list, int query_index)
Create a new query sequence that is a subset of a previous query sequence.
void x_MakeFillerBlocks(const vector< int > &indices, vector< CRef< objects::CSeq_loc > > &filler_locs, vector< SSegmentLoc > &filler_segs)
Turn all fragments of selected query sequence not already covered by a domain hit into a separate que...
unique_ptr< vector< int > > x_AlignClusterQueries(const TPhyTreeNode *node)
definition of a Culling tree
Interface for CMultiAligner.
CConstRef< objects::CSeq_align_set > GetSeqAlign() const
Accessor for the Seq-align results.
virtual TSeqAlignVector Run()
Perform BLAST search Assuming N queries and M subjects, the structure of the returned vector is as fo...
CRef< CSearchResultSet > RunEx()
Performs the same functionality as Run(), but it returns a different data type.
CSearchResults & GetResults(size_type qi, size_type si)
Retrieve results for a query-subject pair contained by this object.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
TNodeList_CI SubNodeBegin(void) const
Return first const iterator on subnode list.
TNodeList::const_iterator TNodeList_CI
bool IsLeaf() const
Report whether this is a leaf node.
TNodeList_CI SubNodeEnd(void) const
Return last const iterator on subnode list.
const TValue & GetValue(void) const
Return node's value.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
const TStr & GetStr(void) const
Get the variant data.
const TDenseg & GetDenseg(void) const
Get the variant data.
const TScores & GetScores(void) const
Get the Scores member data.
E_Choice Which(void) const
Which variant is currently selected.
vector< CRef< CScore > > TScore
TInt GetInt(void) const
Get the variant data.
const TDendiag & GetDendiag(void) const
Get the variant data.
const TValue & GetValue(void) const
Get the Value member data.
vector< CRef< CScore > > TScores
list< CRef< CSeq_align > > Tdata
const TScore & GetScore(void) const
Get the Score member data.
TReal GetReal(void) const
Get the variant data.
list< CRef< CDense_diag > > TDendiag
const TId & GetId(void) const
Get the Id member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
TFrom GetFrom(void) const
Get the From member data.
TTo GetTo(void) const
Get the To member data.
const TInt & GetInt(void) const
Get the variant data.
unsigned int
A callback function used to compare two keys in a database.
const TYPE & Get(const CNamedParameterList *param)
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Structure to represent a single sequence to be fed to BLAST.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4