: m_QueryFactory(query_factory), m_Options(options), m_NumChunks(0),
53m_LocalQueryData(0), m_TotalQueryLength(0), m_ChunkSize(0)
56m_LocalQueryData = m_QueryFactory->MakeLocalQueryData(m_Options);
57m_TotalQueryLength = m_LocalQueryData->GetSumOfSequenceLengths();
59&m_ChunkSize, m_TotalQueryLength, m_LocalQueryData->GetNumQueries());
61 if(!options->GetGappedMode()) m_NumChunks = 1;
62x_ExtractCScopesAndMasks();
72 out<< endl <<
"; This is read by x_ReadQueryBoundsPerChunk" 73<< endl <<
"; Format: query start, query end, strand"<< endl;
78 for(
size_tquery_index = 0; query_index < kNumQueries; query_index++) {
80(query_data->
GetSeq_loc(query_index)->GetId());
83 for(
size_tchunk_index = 0; chunk_index < kNumChunks; chunk_index++) {
88 for(
size_tqidx = 0; qidx < queries_in_chunk->
Size(); qidx++) {
93(query_loc_in_chunk->
GetId());
96 if(query_id->
Match(*query_id_in_chunk)) {
99 out<<
"Chunk"<< chunk_index <<
"Query"<< query_index
100<<
" = "<< range.
GetFrom() <<
", " 145 size_tchunk_start = 0;
146 const size_tkOverlapSize =
148 for(
size_tchunk_num = 0; chunk_num <
m_NumChunks; chunk_num++) {
162 TChunkRange(
static_cast<unsigned int>(chunk_start),
static_cast<unsigned int>(chunk_end)));
163 _TRACE(
"Chunk "<< chunk_num <<
": ranges from "<< chunk_start
164<<
" to "<< chunk_end);
175 const size_tkOverlap =
200interval.
SetFrom(
max(0, qstart) + query_offset);
210interval.
SetTo() -= 1;
221vector<TChunkRange> query_ranges;
222query_ranges.reserve(kNumQueries);
224 _TRACE(
"Query 0: "<< query_ranges.back().GetFrom() <<
"-"<<
225query_ranges.back().GetToOpen());
226 for(
int i= 1;
i< kNumQueries;
i++) {
227 TSeqPosquery_start = query_ranges[
i-1].GetTo() + 1;
229query_ranges.push_back(
TChunkRange(query_start, query_end));
230 _TRACE(
"Query "<<
i<<
": "<< query_ranges.back().GetFrom()
231<<
"-"<< query_ranges.back().GetToOpen());
238 for(
size_tchunk_num = 0; chunk_num <
m_NumChunks; chunk_num++) {
242 for(
size_tqindex = 0; qindex < query_ranges.size(); qindex++) {
243 const TChunkRange& query_range = query_ranges[qindex];
267split_query_loc->
SetId(*query_seqloc->
GetId());
271 _TRACE(
"Chunk "<< chunk_num <<
": query "<< qindex <<
" (" 275<<
" strand "<< (
int)split_query_loc->
GetStrand());
300 static inline unsigned int 304 _ASSERT(shift == 0 || shift == 1 || shift == -1);
309}
else if(shift == 1) {
311}
else if(shift == -1) {
331 case1: retval = -1;
break;
332 case2: retval = 1;
break;
333 case0:
default: retval = 0;
break;
344unique_ptr<CQueryDataPerChunk> qdpc;
351 for(
size_tchunk_num = 0; chunk_num <
m_NumChunks; chunk_num++) {
354 for(
size_t i= 0;
i< queries.size();
i++) {
360 size_tqlength = qdpc->GetQueryLength(
static_cast<int>(queries[
i]));
361 intlast_query_chunk = qdpc->GetLastChunk(
static_cast<int>(queries[
i]));
362 _ASSERT(last_query_chunk != -1);
365 for(
unsigned int ctx= 0;
ctx< kNumContexts;
ctx++) {
373 static_cast<Int4>(kNumContexts*queries[
i]+
ctx));
380 if(chunk_num == (
size_t)last_query_chunk) {
383 static_cast<Int4>(kNumContexts*queries[
i]+
ctx));
386 static_cast<Int4>(kNumContexts*queries[
i]+
394 for(
unsigned int ctx= 0;
ctx< kNumContexts;
ctx++) {
402 static_cast<Int4>(kNumContexts*queries[
i]+
ctx));
410 static_cast<Int4>(kNumContexts*queries[
i]+
ctx));
417 static_cast<Int4>(kNumContexts*queries[
i]));
455 intabsolute_context)
464 returnchunk_qinfo[chunk_num]->contexts[pos].query_length;
471 #ifdef DEBUG_COMPARE_SEQUENCES 477 static strings_GetPrintableSequence(
const Uint1* seq,
size_t len,
boolis_prot)
480 for(
size_t i= 0;
i<
len;
i++) {
481retval.append(1, (is_prot
495 static boolcmp_sequence(
const Uint1* global,
const Uint1* chunk,
size_t len,
500 for(
size_t i= 0;
i<
len;
i++) {
501 if(global[
i] != chunk[
i]) {
507 if(retval ==
false) {
508 _TRACE(
"Comparing global: '" 509<< s_GetPrintableSequence(global,
len, is_prot) <<
"'");
511<< s_GetPrintableSequence(chunk,
len, is_prot) <<
"'");
587 #ifdef DEBUG_COMPARE_SEQUENCES 593vector<const BlastQueryInfo*> chunk_qinfo(
m_NumChunks, 0);
595 for(
size_tchunk_num = 0; chunk_num <
m_NumChunks; chunk_num++) {
598 #ifdef DEBUG_COMPARE_SEQUENCES 604 _ASSERT(chunk_qinfo[chunk_num]);
608 for(
Int4 ctx= 0;
ctx< chunk_qinfo[chunk_num]->first_context;
ctx++) {
612 for(
Int4 ctx= chunk_qinfo[chunk_num]->first_context;
613 ctx<= chunk_qinfo[chunk_num]->last_context;
616 size_tcorrection = 0;
617 const intstarting_chunk =
619 const intabsolute_context =
632 for(
intc =
static_cast<int>(chunk_num); c != starting_chunk; c--) {
640 size_toverlap =
min(kOverlap, curr_len);
641correction += prev_len -
min(overlap, prev_len);
646 size_tsubtrahend = 0;
648 for(
intc =
static_cast<int>(chunk_num); c >= starting_chunk && c >= 0; c--) {
657 size_toverlap =
min(kOverlap, curr_len);
658subtrahend += (curr_len -
min(overlap, prev_len));
667 #ifdef DEBUG_COMPARE_SEQUENCES 671 intchunk_offset = chunk_qinfo[chunk_num]->contexts[
ctx].query_offset;
672 if(!cmp_sequence(&global_seq->
sequence[global_offset],
673&chunk_seq->
sequence[chunk_offset], 10,
675cerr <<
"Failed to compare sequence data!"<< endl;
682 _TRACE(
"CContextTranslator contents: "<< ctx_translator);
693 #ifdef DEBUG_COMPARE_SEQUENCES 696 const size_tkOverlap =
701vector<const BlastQueryInfo*> chunk_qinfo(
m_NumChunks, 0);
703 for(
size_tchunk_num = 0; chunk_num <
m_NumChunks; chunk_num++) {
706 #ifdef DEBUG_COMPARE_SEQUENCES 712 _ASSERT(chunk_qinfo[chunk_num]);
716 for(
Int4 ctx= 0;
ctx< chunk_qinfo[chunk_num]->first_context;
ctx++) {
720 for(
Int4 ctx= chunk_qinfo[chunk_num]->first_context;
721 ctx<= chunk_qinfo[chunk_num]->last_context;
724 size_tcorrection = 0;
725 const intstarting_chunk =
727 const intabsolute_context =
742(chunk_num == (
size_t)last_query_chunk) &&
801 for(
intc =
static_cast<int>(chunk_num); c != starting_chunk; c--) {
809 size_toverlap =
min(kOverlap, curr_len);
810correction += prev_len -
min(overlap, prev_len);
815 size_tsubtrahend = 0;
817 for(
intc =
static_cast<int>(chunk_num); c >= starting_chunk && c >= 0; c--) {
826 size_toverlap =
min(kOverlap, curr_len);
827subtrahend += (curr_len -
min(overlap, prev_len));
837 #ifdef DEBUG_COMPARE_SEQUENCES 841 intchunk_offset = chunk_qinfo[chunk_num]->contexts[
ctx].query_offset;
842 intnum_bases2compare =
843 min(10, chunk_qinfo[chunk_num]->contexts[
ctx].query_length);
844 if(!cmp_sequence(&global_seq->
sequence[global_offset],
845&chunk_seq->
sequence[chunk_offset],
847cerr <<
"Failed to compare sequence data for chunk "<< chunk_num
848<<
", context "<<
ctx<< endl;
854 _TRACE(
"CContextTranslator contents: "<< ctx_translator);
876 for(
size_tchunk_num = 0; chunk_num <
m_NumChunks; chunk_num++) {
886 _TRACE(
"CQuerySplitter contents: "<< *
this);
895 string msg(
"Invalid query chunk number: ");
898 throwout_of_range(
msg);
#define CODON_LENGTH
Codons are always of length 3.
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
Declares class to encapsulate all BLAST options.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Internal auxiliary setup classes/functions for C++ BLAST APIs.
Encapsulates ALL the BLAST algorithm's options.
size_type Size() const
Returns the number of queries found in this query vector.
CConstRef< objects::CSeq_loc > GetQuerySeqLoc(size_type i) const
Get the query Seq-loc for a query by index.
Auxiliary class to provide convenient and efficient access to conversions between contexts local to q...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Auxiliary class to determine information about the query that was split into chunks.
Class responsible for splitting query sequences and providing data to the BLAST search class to searc...
Wrapper class around SSplitQueryBlk structure.
Provides access (not ownership) to the C structures used to configure local BLAST search class implem...
Source of query sequence data for BLAST Provides an interface for search classes to retrieve sequence...
Collection of masked regions for a single query sequence.
static bool is_valid(const char *num, int type, CONV_RESULT *cr)
std::ofstream out("events_result.xml")
main entry point for tests
bool Empty(const CNcbiOstrstream &src)
virtual CConstRef< objects::CSeq_loc > GetSeq_loc(size_t index)=0
Get the Seq_loc for the sequence indicated by index.
vector< CRef< IQueryFactory > > m_QueryChunkFactories
Vector of query factories, each element corresponds to a chunk.
size_t SplitQuery_GetOverlapChunkSize(EBlastProgramType program)
Size of the region that overlaps in between each query chunk.
size_t m_TotalQueryLength
Length of the concatenated query.
void AddQueryToChunk(size_t chunk_num, Int4 query_index)
Adds a query index to a given chunk.
TChunkRange GetChunkBounds(size_t chunk_num) const
Get the boundaries of a chunk in the concatenated query.
TScopeVector m_Scopes
Vector of CScope objects.
COpenRange< TSeqPos > TChunkRange
Range describing a query chunk.
int GetLastChunk(int global_query_index)
get the last chunk where query identified with global_query_index is found
void x_ComputeContextOffsetsForChunks()
Compute the context offsets which are used to adjust the results.
int GetAbsoluteContext(size_t chunk_num, Int4 context_in_chunk) const
Get the context number in the absolute (i.e.
void SetChunkOverlapSize(size_t size)
Sets the size (# of bases/residues) of overlap between query chunks.
objects::ENa_strand GetStrandOption() const
const char BLASTNA_TO_IUPACNA[]
Translates between blastna and iupacna.
virtual BLAST_SequenceBlk * GetSequenceBlk()=0
Accessor for the BLAST_SequenceBlk structure.
void SetChunkBounds(size_t chunk_num, const TChunkRange &chunk_range)
Set the boundaries of a chunk in the concatenated query.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
Uint4 m_NumChunks
Number of chunks, if this is 1, no splitting occurs.
void x_ComputeContextOffsets_NonTranslatedQueries()
Compute the context offsets which are used to adjust the results for non-translated queries.
void AddContextToChunk(size_t chunk_num, Int4 context_index)
Adds a query context to a given chunk.
size_t GetQueryLength(size_t chunk_num, int context_in_chunk) const
Get the length of the query.
size_t SplitQuery_GetChunkSize(EProgram program)
Returns the optimal chunk size for a given task.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
void AddContextOffsetToChunk(size_t chunk_num, Int4 context_offset)
Adds a context offset (correction) to a given chunk.
virtual BlastQueryInfo * GetQueryInfo()=0
Accessor for the BlastQueryInfo structure.
objects::ENa_strand BlastSetup_GetStrand(const objects::CSeq_loc &query_seqloc, EBlastProgramType program, objects::ENa_strand strand_option)
Choose between a Seq-loc specified query strand and the strand obtained from the CBlastOptions.
void x_ComputeQueryIndicesForChunks()
Compute query indices that correspond to each chunk.
CRef< ILocalQueryData > m_LocalQueryData
Source of local query data.
CRef< IQueryFactory > GetQueryFactoryForChunk(Uint4 chunk_num)
Returns a IQueryFactory suitable to be executed by a BLAST search class.
void x_ComputeContextOffsets_TranslatedQueries()
Compute the context offsets which are used to adjust the results for translated queries.
size_t m_ChunkSize
Size of the query chunks.
unsigned int GetNumberOfContexts(EBlastProgramType p)
Returns the number of contexts for a given BLAST program.
virtual size_t GetNumQueries()=0
Get the number of queries.
TSeqLocInfoVector ExtractUserSpecifiedMasks()
Retrieve any user specified masking locations.
TSeqLocInfoVector m_UserSpecifiedMasks
Vector of masking locations.
const CBlastOptions * m_Options
BLAST options.
int GetContextInChunk(size_t chunk_num, int absolute_context) const
Get the context number in the split query chunk.
void x_ComputeQueryContextsForChunks()
Compute query contexts that correspond to each chunk.
vector< size_t > GetQueryIndices(size_t chunk_num) const
Get the indices of the queries contained in a given chunk.
CRef< CSplitQueryBlk > Split()
Split the query sequence(s)
int GetStartingChunk(size_t curr_chunk, Int4 context_in_chunk) const
Get the chunk number where context_in_chunk starts (i.e.
virtual size_t GetSeqLength(size_t index)=0
Get the length of the sequence indicated by index.
CRef< IQueryFactory > m_QueryFactory
The original, unsplit query factory.
vector< CRef< objects::CScope > > ExtractScopes()
Retrieve the CScope objects associated with the query sequences associated with this object.
Uint4 SplitQuery_CalculateNumChunks(EBlastProgramType program, size_t *chunk_size, size_t concatenated_query_length, size_t num_queries)
Calculate the number of chunks that a query will be split into based upon query length,...
void x_ExtractCScopesAndMasks()
Auxiliary method to extract the CScope objects from the query factory.
Uint4 GetNumberOfChunks() const
Returns the number of chunks the query/queries will be split into.
CRef< CSplitQueryBlk > m_SplitBlk
Split query block structure.
vector< size_t > GetContextOffsets(size_t chunk_num) const
Get the context offsets (corrections) of the queries contained in a given chunk.
TSplitQueryVector m_SplitQueriesInChunk
Vector of split queries.
const char NCBISTDAA_TO_AMINOACID[]
Translates between ncbieaa and ncbistdaa.
bool GetGappedMode() const
Returns true if gapped BLAST is set, false otherwise.
void x_ComputeChunkRanges()
Compute all chunk ranges.
unsigned int TSeqPos
Type for sequence locations and lengths.
const string AsFastaString(void) const
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
ENa_strand GetStrand(void) const
Get the location's strand.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
TRange GetTotalRange(void) const
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
bool IntersectingWith(const TThisType &r) const
position_type GetToOpen(void) const
position_type GetFrom(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
ENa_strand
strand of nucleic acid
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
unsigned int
A callback function used to compare two keys in a database.
Main class to perform a BLAST search on the local machine.
#define INT4_MAX
largest nubmer represented by signed int
#define ASSERT
macro for assert.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
const Int4 kInvalidContext
Value to represent an invalid context.
Declares CQuerySplitter, a class to split the query sequence(s)
Auxiliary functions and classes to assist in query splitting.
ostream & operator<<(ostream &out, const CQuerySplitter &rhs)
static int s_GetShiftForTranslatedNegStrand(size_t query_length)
Retrieve the shift for the negative strand.
static unsigned int s_AddShift(unsigned int context, int shift)
Adds the necessary shift to the context to record the query contexts for the query chunks.
static void s_SetSplitQuerySeqInterval(const TChunkRange &chunk, const TChunkRange &query_range, int query_offset, CRef< CSeq_loc > split_query_loc)
Auxiliary function to assign the split query's Seq-interval so that it's constrained within the chunk...
static size_t s_GetAbsoluteContextLength(const vector< const BlastQueryInfo * > &chunk_qinfo, int chunk_num, const CContextTranslator &ctx_translator, int absolute_context)
Get the length of a context in absolute terms (i.e.
static bool s_IsPlusStrand(const BlastQueryInfo *qinfo, Int4 context_number)
Determine whether a given context corresponds to the plus or minus strand.
Definition of SSeqLoc structure.
Structure to hold a sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
The query related information.
BlastContextInfo * contexts
Information per context.
static CS_CONTEXT * context
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4