CSeq_align_set::Tdata::const_iterator input_it = input_list.begin();
63CSeq_align_set::Tdata::iterator final_it = final_list.begin();
65 while(input_it != input_list.end())
70 if(hit_count >= list_size)
72final_list.erase(final_it, final_list.end());
76 if(final_it == final_list.end())
87 if(input_evalue == final_evalue)
95 if(input_evalue < final_evalue)
97CSeq_align_set::Tdata::const_iterator start_input_it = input_it;
100 const CSeq_id& id_prev = (*input_it)->GetSeq_id(1);
102 if(input_it == input_list.end())
107 if(! id_prev.
Match((*input_it)->GetSeq_id(1)))
113final_list.insert(final_it, start_input_it, input_it);
119 const CSeq_id& id_prev = (*final_it)->GetSeq_id(1);
122 if(final_it == final_list.end())
127 if(! id_prev.
Match((*final_it)->GetSeq_id(1)))
142aggregate_search_result_set->
clear();
144 for(
unsigned int i=0;
i<
t[0]->GetNumQueries();
i++)
146vector< CRef<CSearchResults> > thread_results;
148 const CSeq_id&
id= *(thread_results[0]->GetSeqId());
150 for(
unsigned intd=1; d < num_of_threads; d++)
152thread_results.push_back ((*(
t[d]))[id]);
157 for(
unsigned intd=0; d< num_of_threads; d++)
159 if(thread_results[d]->HasAlignments())
164align_set->
Set().insert(align_set->
Set().begin(),
165thread_align_set->
Get().begin(),
166thread_align_set->
Get().end());
173aggregate_messages.
Combine(thread_results[d]->GetErrors());
177thread_results[0]->GetMaskedQueryRegions(query_mask);
181thread_results[0]->GetAncillaryData(),
183aggregate_search_result_set->
push_back(aggregate_search_results);
187 returnaggregate_search_result_set;
194 boolinclude_filtered_reads)
196 boolisCSRA =
false;
203 CVDBBlastUtilvdbUtil(isCSRA?csras:dbs,
true, isCSRA, include_filtered_reads);
208 CLocalBlastlcl_blast(query_factory, opt_handle, seqSrc, seqInfoSrc);
227 boolinclude_filtered_reads)
229 boolisCSRA =
false;
244 CPsiBlastpsi_blast(pssm, db_adapter, psi_opts);
269vector<string> & chunks,
273vector<string> & chunks,
276 void*
Main(
void);
293vector<string> & chunks,
295 boolinclude_filtered_reads):
296m_chunks(chunks), m_include_filtered_reads(include_filtered_reads),
297m_num_extensions(0), m_pssm(pssm)
305vector<string> & chunks,
307 boolinclude_filtered_reads):
308m_query_factory(query_factory), m_chunks(chunks),
309m_include_filtered_reads(include_filtered_reads), m_num_extensions(0)
342 unsigned intnum_of_chunks =
m_chunks.size();
343vector<CRef<CSearchResultSet> >
results;
345 for(
unsigned int i=0;
i< num_of_chunks;
i++) {
368 boolinclude_filtered_reads):
369m_query_vector(query_vector),
370m_opt_handle(options),
371m_total_num_seqs(local_vdb.total_num_seqs),
372m_total_length(local_vdb.total_length),
373m_chunks_for_thread(local_vdb.chunks_for_thread),
374m_num_threads(local_vdb.chunks_for_thread.
size()),
376m_include_filtered_reads(include_filtered_reads)
384 boolinclude_filtered_reads):
385m_opt_handle(options),
386m_total_num_seqs(local_vdb.total_num_seqs),
387m_total_length(local_vdb.total_length),
388m_chunks_for_thread(local_vdb.chunks_for_thread),
389m_num_threads(local_vdb.chunks_for_thread.
size()),
391m_include_filtered_reads(include_filtered_reads),
406 return(
a.length >
b.length);
411vector<vector<SSortStruct> > & out_list, vector<Uint8> & acc_size)
415 for(
unsigned int i=0;
i<
in_list.size();
i++)
417 unsigned intmin_index = 0;
418 for(
unsigned intj=1; j<num_threads; j++) {
419 if(acc_size[j] < acc_size[min_index])
422acc_size[min_index] +=
in_list[
i].length;
423out_list[min_index].push_back(
in_list[
i]);
430vector<SSortStruct> filtered_list;
431 for(
unsigned int i= 0;
i<
in_list.size();
i++) {
435filtered_list.push_back(
in_list[
i]);
442 const unsigned intdbs_per_chunk,
const string tag)
447 Uint8num_seqs_count = 0;
449 unsigned intdb_count = 0;
450 for(
unsigned int i=0;
i<
in_list.size();
i++) {
452num_seqs_count +=
in_list[
i].num_seqs;
453 if(num_seqs_count > (
Uint8)
kMax_I4|| db_count >= dbs_per_chunk) {
454chunks.push_back(dbs);
455 _TRACE(
"Chunk: "<< dbs <<
" Num Seqs: "<< num_seqs_count -
in_list[
i].num_seqs);
457num_seqs_count =
in_list[
i].num_seqs;
468chunks.push_back(dbs);
469 _TRACE(
"Chunk: "<< dbs <<
" Num Seqs: "<< num_seqs_count);
481 stringmax_dbs_env =
env.Get(
"VDB_MAX_DBS_PER_CHUNK");
487 if(max_dbs_per_chunk && max_dbs_per_chunk < dbs_per_chunk)
488dbs_per_chunk = max_dbs_per_chunk;
490 returndbs_per_chunk;
500vector<string>::iterator uq = std::unique(dbs.begin(), dbs.end());
501dbs.erase(uq, dbs.end());
503 unsigned intnum_dbs = dbs.size();
504 unsigned intnum_threads = (num_dbs < threads) ? num_dbs : threads;
506vector <SSortStruct> p,
r;
507 Uint8total_length = 0;
508 Uint8total_num_seqs = 0;
514 #pragma omp parallel for num_threads(num_threads) schedule(static) if (num_threads > 1) \ 515 shared(num_dbs, p, dbs) reduction(+ : total_length, total_num_seqs) 516 for(
unsigned int i=0;
i< num_dbs;
i++) {
518p[
i].db_name = dbs[
i];
523total_length += p[
i].length;
524total_num_seqs += p[
i].num_seqs;
527openmp_exception += e.
what();
531 else if(search_mode ==
eBoth) {
534 #pragma omp parallel for num_threads(num_threads) schedule(static) if (num_threads > 1) \ 535 shared(num_dbs, p,r, dbs) reduction(+ : total_length, total_num_seqs) 536 for(
unsigned int i=0;
i< num_dbs;
i++) {
538p[
i].db_name = dbs[
i];
540 r[
i].db_name = dbs[
i];
550total_length += (p[
i].length +
r[
i].length);
551total_num_seqs += (p[
i].num_seqs +
r[
i].num_seqs);
554openmp_exception += e.
what();
558 else if(search_mode ==
eAligned) {
560 #pragma omp parallel for num_threads(num_threads) schedule(static) if (num_threads > 1) \ 561 shared(num_dbs, r, dbs) reduction(+ : total_length, total_num_seqs) 562 for(
unsigned int i=0;
i< num_dbs;
i++) {
565 r[
i].db_name = dbs[
i];
570total_length +=
r[
i].length;
571total_num_seqs +=
r[
i].num_seqs;
578openmp_exception += e.
what();
593num_dbs =
r.size() + p.size();
594 if(
r.size() == 0) {
599 if(max_csra_thread != 0 && threads > max_csra_thread) {
600threads = max_csra_thread;
602num_threads = (num_dbs < threads) ? num_dbs : threads;
611 else if(total_num_seqs == 0){
613 stringzero_seq_err =
"DB list contains no searchable seqs in sra_mode "+
NStr::IntToString(search_mode) +
".";
621vector<Uint8> acc_size(num_threads, 0);
626vector<vector<SSortStruct> > list_thread(num_threads);
633 for(
unsigned int t=0;
t< num_threads;
t++) {
638 if((search_mode !=
eUnaligned) && (
r.size() > 0)){
639vector<vector<SSortStruct> > list_thread(num_threads);
646 for(
unsigned int t=0;
t< num_threads;
t++) {
676 for(
unsigned int i=0;
i< rs.
size();
i++)
678rs[
i].TrimSeqAlign(hit_list_size);
684 if(orig_size <= 200)
686 return(orig_size + 100);
688 else if(orig_size < 500)
690 return(orig_size + 75);
693 return(orig_size + 50);
712 if(num_chunks == 1) {
728vector<CRef<CSearchResultSet> >
results;
729 for(
unsigned int i=0;
i< num_chunks;
i++)
770 for(
unsigned int i=0;
i<
orig.Size();
i++)
773q->SetMaskedRegions(
orig[
i]->GetMaskedRegions());
774q->SetGeneticCodeId(
orig[
i]->GetGeneticCodeId());
804vector<CRef<CSearchResultSet> >
results;
805vector<CRef<IQueryFactory> > query_factory;
806vector<CRef<CPssmWithParameters> > pssm;
832thread[
t]->Join(
reinterpret_cast<void**
>(&thread_results[
t]));
837 if(thread_results[
t] ==
NULL) {
841 results.push_back(thread_results[
t]->thread_result_set);
842 delete(thread_results[
t]);
Declares the CBlastNucleotideOptionsHandle class.
Defines BLAST error codes (user errors included)
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
void push_back(const value_type &element)
Add a value to the back of this container.
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
bool m_include_filtered_reads
CRef< CBlastQueryVector > m_query_vector
CRef< CBlastOptionsHandle > m_opt_handle
void x_AdjustDbSize(void)
CRef< objects::CPssmWithParameters > m_pssm
static string PreprocessDBs(CLocalVDBBlast::SLocalVDBStruct &local_vdb, const string db_names, unsigned int num_threads=kDisableThreadedSearch, ESRASearchMode seach_mode=eAligned)
void x_PrepareQuery(vector< CRef< IQueryFactory > > &qf_v)
void x_PreparePssm(vector< CRef< CPssmWithParameters > > &pssm)
CLocalVDBBlast(const CLocalVDBBlast &)
static const unsigned int kDisableThreadedSearch
CRef< CSearchResultSet > RunThreadedSearch()
CRef< CSearchResultSet > Run()
unsigned int m_num_threads
vector< vector< string > > & m_chunks_for_thread
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Handle to the protein-protein options to the BLAST algorithm.
Runs a single iteration of the PSI-BLAST algorithm on a BLAST database.
Search Results for All Queries.
Search Results for One Query.
Handle to the protein-translated nucleotide options to the BLAST algorithm.
BlastSeqSrc * GetSRASeqSrc()
Return the stored SRA BlastSeqSrc object.
static Uint4 GetMaxNumCSRAThread(void)
static void GetVDBStats(const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, bool getRefStats=false)
Fucntion to get around the OID (blastseqsrc) limit So num of seqs > int4 can be returned.
static bool IsCSRA(const string &db_name)
CRef< blast::IBlastSeqInfoSrc > GetSRASeqInfoSrc()
Return the SRA BlastSeqInfoSrc object (create if none exists).
static void GetAllStats(const string &strAllRuns, Uint8 &num_seqs, Uint8 &length, Uint8 &ref_num_seqs, Uint8 &ref_length)
CVDBThread(const CVDBThread &)
void * Main(void)
Derived (user-created) class must provide a real thread function.
CRef< CBlastOptionsHandle > m_opt_handle
CRef< CSearchResultSet > RunTandemSearches(void)
CRef< CPssmWithParameters > m_pssm
CVDBThread & operator=(const CVDBThread &)
CRef< IQueryFactory > m_query_factory
vector< string > m_chunks
bool m_include_filtered_reads
CVDBThread(CRef< IQueryFactory > query_factory, vector< string > &chunks, CRef< CBlastOptions > options, bool include_filtered_reads)
Collection of masked regions for a single query sequence.
Class for the messages for an individual query sequence.
static bool DLIST_NAME() in_list(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
CRef< CSearchResultSet > Run()
Run the PSI-BLAST engine for one iteration.
virtual void SetNumberOfThreads(size_t nthreads)
Mutator for the number of threads.
void SetHitlistSize(int s)
CRef< CSearchResultSet > Run()
Executes the search.
virtual BLAST_SequenceBlk * GetSequenceBlk()=0
Accessor for the BLAST_SequenceBlk structure.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
int GetHitlistSize() const
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
void SetDbSeqNum(unsigned int n)
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
size_type size() const
Identical to GetNumResults, provided to facilitate STL-style iteration.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
CRef< CBlastOptions > Clone() const
Explicit deep copy of the Blast options object.
void push_back(value_type &element)
Add a value to the back of this container.
void Combine(const TQueryMessages &other)
Combine other messages with these.
void clear()
Clears the contents of this object.
Int4 GetNumExtensions()
Retrieve the number of extensions performed during the search.
Int8 GetEffectiveSearchSpace() const
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
#define MSerial_AsnBinary
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
Main class to perform a BLAST search on the local machine.
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines NCBI C++ exception handling.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
Multi-threading â mutexes; rw-locks; semaphore.
Multi-threading â classes, functions, and features.
Defines: CTimeFormat - storage class for time format.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares CPsiBlast, the C++ API for the PSI-BLAST engine.
Declares the CPSIBlastOptionsHandle class.
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
Complete type definition of Blast Sequence Source ADT.
vector< vector< string > > chunks_for_thread
CRef< CSearchResultSet > thread_result_set
Declares the CTBlastnOptionsHandle class.
const string k_CSRA_CHUNK("CSRA_CHUNK: ")
CRef< CSearchResultSet > s_RunLocalVDBSearch(const string &dbs, CRef< IQueryFactory > query_factory, CRef< CBlastOptionsHandle > opt_handle, Int4 &num_extensions, bool include_filtered_reads)
void s_TrimResults(CSearchResultSet &rs, int hit_list_size)
static CRef< CSearchResultSet > s_CombineSearchSets(vector< CRef< CSearchResultSet > > &t, unsigned int num_of_threads, const int list_size)
static void s_DivideDBsForThread(unsigned int num_threads, vector< SSortStruct > &in_list, vector< vector< SSortStruct > > &out_list, vector< Uint8 > &acc_size)
static void s_GetChunksForThread(vector< SSortStruct > &in_list, vector< string > &chunks, const unsigned int dbs_per_chunk, const string tag)
static const unsigned int DEFAULT_MAX_DBS_PER_CHUNK
static const unsigned int DEFAULT_MAX_DBS_OPEN
static void s_RemoveNonCSRAEntry(vector< SSortStruct > &in_list)
static int s_GetModifiedHitlistSize(const int orig_size)
CRef< CBlastQueryVector > s_CloneBlastQueryVector(const CBlastQueryVector &orig)
static bool s_SortDbSize(const SSortStruct &a, const SSortStruct &b)
const string k_NOT_CSRA_DB("NOT_CSRA")
static unsigned int s_GetNumDbsPerChunk(unsigned int num_threads, unsigned int num_dbs)
CRef< CSearchResultSet > s_RunPsiVDBSearch(const string &dbs, CRef< CPssmWithParameters > pssm, CRef< CBlastOptionsHandle > opt_handle, bool include_filtered_reads)
static void s_MergeAlignSet(CSeq_align_set &final_set, const CSeq_align_set &input_set, const int list_size)
Declares the CLocalVDBBlast class.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4