* overlap_sz_str = getenv(
"OVERLAP_CHUNK_SIZE");
58 _TRACE(
"Using overlap chunk size from environment "<< retval);
68 _TRACE(
"Using overlap chunk size "<< retval);
75 size_tconcatenated_query_length,
102 size_tconcatenated_query_length,
106concatenated_query_length, num_queries)) {
107 _TRACE(
"Not splitting queries");
112 Uint4num_chunks = 0;
118 size_tchunk_size_delta = ((*chunk_size) %
CODON_LENGTH);
125num_chunks = concatenated_query_length / ((*chunk_size) - overlap_size);
129 if(num_chunks <= 1) {
136*
chunk_size= (concatenated_query_length + (num_chunks - 1) * overlap_size) / num_chunks;
141 _TRACE(
"Number of chunks: "<< num_chunks <<
"; " 142 "Target chunk size: "<< target_chunk_size <<
"; " 165 if(total_length <= 0)
172num_seqs, total_length,
177vector<Int8> eff_searchsp;
178 for(
size_tindex = 0; index <= (size_t)qinfo->
last_context; index++) {
196seqsrc, num_threads);
201 _ASSERT(setup_data->m_QuerySplitter->IsQuerySplit() ==
false);
204setup_data->m_InternalData->m_FnInterrupt = full_data->
m_FnInterrupt;
209 returnsetup_data->m_InternalData;
218 for(
size_t i= 0;
i< kNumChunks;
i++) {
222 if(query_chunk_factories ==
NULL|| options ==
NULL) {
229 for(
size_t i= 0;
i< kNumChunks;
i++) {
234ctx <= chunk_qinfo->last_context;
ctx++) {
243 Int4context_in_chunk)
const 252 intabsolute_context)
const 256vector<int>::const_iterator itr = find(context_indices.begin(),
257context_indices.end(),
259 if(itr == context_indices.end()) {
262 return static_cast<int>(itr - context_indices.begin());
267 Int4context_in_chunk)
const 274 size_tretval = curr_chunk;
276 for(--curr_chunk;
static_cast<int>(curr_chunk) >= 0; --curr_chunk) {
283 return static_cast<int>(retval);
294 out<< endl <<
"NumChunks = "<< kNumChunks << endl;
296 for(
size_t i= 0;
i< kNumChunks;
i++) {
297 out<<
"Chunk"<<
i<<
"StartingChunks = " 301 for(
size_t i= 0;
i< kNumChunks;
i++) {
302 out<<
"Chunk"<<
i<<
"AbsoluteContexts = " 321 for(
size_t i= 0;
i< kNumChunks;
i++) {
324 ITERATE(vector<size_t>, itr, query_indices) {
325global_query_indices.
insert(*itr);
358 return static_cast<size_t>(retval);
381vector<size_t>::const_iterator itr =
384(
size_t)global_query_index);
393retval =
static_cast<int>(
i);
User-defined methods of the data storage class.
Auxiliary functions for BLAST.
SBlastProgress * SBlastProgressNew(void *user_data)
Allocates and initializes a new SBlastProgress structure.
#define CODON_LENGTH
Codons are always of length 3.
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsPssm(EBlastProgramType p)
Returns true if the subject is PSSM.
Int4 Blast_GetQueryIndexFromContext(Int4 context, EBlastProgramType program)
Given a context from BLAST engine core, return the query index.
Int8 BlastSeqSrcGetTotLenStats(const BlastSeqSrc *seq_src)
Get the total length of all sequences for calculation of expect value etc.
BlastSeqSrc * BlastSeqSrcCopy(const BlastSeqSrc *seq_src)
Copy function: needed to guarantee thread safety.
Int4 BlastSeqSrcGetNumSeqs(const BlastSeqSrc *seq_src)
Get the number of sequences contained in the sequence source.
Int8 BlastSeqSrcGetTotLen(const BlastSeqSrc *seq_src)
Get the total length of all sequences in the sequence source.
BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)
Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...
Int4 BlastSeqSrcGetNumSeqsStats(const BlastSeqSrc *seq_src)
Get the number of sequences used for calculation of expect values etc.
void BlastSeqSrcResetChunkIterator(BlastSeqSrc *seq_src)
Reset the internal "bookmark" of the last chunk for iteration provided by this object.
Internal auxiliary setup classes/functions for C++ BLAST APIs.
Encapsulates ALL the BLAST algorithm's options.
Auxiliary class to provide convenient and efficient access to conversions between contexts local to q...
Auxiliary class to compute the effective search space.
Wrapper class for SBlastProgress .
Wrapper class around SSplitQueryBlk structure.
iterator_bool insert(const value_type &val)
static const int chunk_size
Declares auxiliary class to calculate the effective search space.
std::ofstream out("events_result.xml")
main entry point for tests
vector< vector< int > > m_StartingChunks
CQueryDataPerChunk(const CSplitQueryBlk &sqb, EBlastProgramType program, CRef< ILocalQueryData > local_query_data)
Constructor.
void SplitQuery_SetEffectiveSearchSpace(CRef< CBlastOptions > options, CRef< IQueryFactory > full_query_fact, CRef< SInternalData > full_data)
this might supercede the function below...
size_t SplitQuery_GetOverlapChunkSize(EBlastProgramType program)
Size of the region that overlaps in between each query chunk.
CRef< SBlastSetupData > BlastSetupPreliminarySearchEx(CRef< IQueryFactory > qf, CRef< CBlastOptions > options, CConstRef< CPssmWithParameters > pssm, BlastSeqSrc *seqsrc, size_t num_threads)
Extended interface to set up internal data structures used by the BLAST CORE engine.
vector< vector< int > > m_ContextsPerChunk
Each element in this vector represents a chunk, and it contains the contexts numbers that correspond ...
int GetLastChunk(int global_query_index)
get the last chunk where query identified with global_query_index is found
CRef< TBlastSeqSrc > m_SeqSrc
The source of subject sequence data.
size_t GetNumChunks() const
Retrieve the number of chunks.
int GetAbsoluteContext(size_t chunk_num, Int4 context_in_chunk) const
Get the context number in the absolute (i.e.
Int8 GetEffSearchSpaceForContext(size_t ctx_index) const
Retrieve the effective search space calculated for a given query context.
void SetEffectiveSearchSpace(Int8 eff)
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
CRef< SInternalData > SplitQuery_CreateChunkData(CRef< IQueryFactory > qf, CRef< CBlastOptions > options, CRef< SInternalData > full_data, size_t num_threads)
Function used by search class to retrieve a query factory for a given chunk.
ostream & operator<<(ostream &out, const CContextTranslator &rhs)
SBlastProgress * Get() const
size_t GetQueryLength(size_t chunk_num, int context_in_chunk) const
Get the length of the query.
vector< size_t > m_QueryLengths
Lengths of the queries.
vector< int > m_LastChunkForQueryCache
Lists the last chunk where the query can be found.
CStructWrapper< BlastSeqSrc > TBlastSeqSrc
CRef< TBlastScoreBlk > m_ScoreBlk
BLAST score block structure.
CContextTranslator(const CSplitQueryBlk &sqb, vector< CRef< IQueryFactory > > *query_chunk_factories=NULL, const CBlastOptions *options=NULL)
Constructor.
TInterruptFnPtr m_FnInterrupt
The interrupt callback.
virtual BlastQueryInfo * GetQueryInfo()=0
Accessor for the BlastQueryInfo structure.
CRef< CSBlastProgress > m_ProgressMonitor
The user data structure to aid in progress monitoring.
BlastQueryInfo * m_QueryInfo
The query information structure.
size_t x_ContextInChunkToQueryIndex(int context_in_chunk) const
Convert a context in a chunk to a query index (within the chunk)
int GetContextInChunk(size_t chunk_num, int absolute_context) const
Get the context number in the split query chunk.
vector< size_t > GetQueryIndices(size_t chunk_num) const
Get the indices of the queries contained in a given chunk.
EBlastProgramType m_Program
BLAST program type.
vector< vector< int > > m_AbsoluteContexts
int GetStartingChunk(size_t curr_chunk, Int4 context_in_chunk) const
Get the chunk number where context_in_chunk starts (i.e.
virtual size_t GetSeqLength(size_t index)=0
Get the length of the sequence indicated by index.
bool SplitQuery_ShouldSplit(EBlastProgramType program, size_t chunk_size, size_t concatenated_query_length, size_t num_queries)
Determines if the input query sequence(s) should be split because it.
vector< int > GetQueryContexts(size_t chunk_num) const
Get the contexts of the queries contained in a given chunk.
Uint4 SplitQuery_CalculateNumChunks(EBlastProgramType program, size_t *chunk_size, size_t concatenated_query_length, size_t num_queries)
Calculate the number of chunks that a query will be split into based upon query length,...
string s_PrintVector(const vector< T > &data2print)
Auxiliary function to print a vector.
Int8 GetEffectiveSearchSpace() const
vector< vector< size_t > > m_QueryIndicesPerChunk
Each element in this vector represents a chunk, and it contains the query indices that correspond in ...
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void Reset(void)
Reset reference object.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
const struct ncbi::grid::netcache::search::fields::SIZE size
const Int4 kInvalidContext
Value to represent an invalid context.
Auxiliary functions and classes to assist in query splitting.
The query related information.
Int4 first_context
Index of the first element of the context array.
Int4 last_context
Index of the last element of the context array.
Complete type definition of Blast Sequence Source ADT.
Progress monitoring structure.
void * user_data
Pointer to user-provided data.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4