A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/split__query__cxx_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/blast/api/split_query_cxx.cpp Source File

52

: m_QueryFactory(query_factory), m_Options(options), m_NumChunks(0),

53

m_LocalQueryData(0), m_TotalQueryLength(0), m_ChunkSize(0)

56

m_LocalQueryData = m_QueryFactory->MakeLocalQueryData(m_Options);

57

m_TotalQueryLength = m_LocalQueryData->GetSumOfSequenceLengths();

59

&m_ChunkSize, m_TotalQueryLength, m_LocalQueryData->GetNumQueries());

61  if

(!options->GetGappedMode()) m_NumChunks = 1;

62

x_ExtractCScopesAndMasks();

72  out

<< endl <<

"; This is read by x_ReadQueryBoundsPerChunk" 73

<< endl <<

"; Format: query start, query end, strand"

<< endl;

78  for

(

size_t

query_index = 0; query_index < kNumQueries; query_index++) {

80

(query_data->

GetSeq_loc

(query_index)->GetId());

83  for

(

size_t

chunk_index = 0; chunk_index < kNumChunks; chunk_index++) {

88  for

(

size_t

qidx = 0; qidx < queries_in_chunk->

Size

(); qidx++) {

93

(query_loc_in_chunk->

GetId

());

96  if

(query_id->

Match

(*query_id_in_chunk)) {

99  out

<<

"Chunk"

<< chunk_index <<

"Query"

<< query_index

100

<<

" = "

<< range.

GetFrom

() <<

", " 145  size_t

chunk_start = 0;

146  const size_t

kOverlapSize =

148  for

(

size_t

chunk_num = 0; chunk_num <

m_NumChunks

; chunk_num++) {

162  TChunkRange

(

static_cast<unsigned int>

(chunk_start),

static_cast<unsigned int>

(chunk_end)));

163  _TRACE

(

"Chunk "

<< chunk_num <<

": ranges from "

<< chunk_start

164

<<

" to "

<< chunk_end);

175  const size_t

kOverlap =

200

interval.

SetFrom

(

max

(0, qstart) + query_offset);

210

interval.

SetTo

() -= 1;

221

vector<TChunkRange> query_ranges;

222

query_ranges.reserve(kNumQueries);

224  _TRACE

(

"Query 0: "

<< query_ranges.back().GetFrom() <<

"-"

<<

225

query_ranges.back().GetToOpen());

226  for

(

int i

= 1;

i

< kNumQueries;

i

++) {

227  TSeqPos

query_start = query_ranges[

i

-1].GetTo() + 1;

229

query_ranges.push_back(

TChunkRange

(query_start, query_end));

230  _TRACE

(

"Query "

<<

i

<<

": "

<< query_ranges.back().GetFrom()

231

<<

"-"

<< query_ranges.back().GetToOpen());

238  for

(

size_t

chunk_num = 0; chunk_num <

m_NumChunks

; chunk_num++) {

242  for

(

size_t

qindex = 0; qindex < query_ranges.size(); qindex++) {

243  const TChunkRange

& query_range = query_ranges[qindex];

267

split_query_loc->

SetId

(*query_seqloc->

GetId

());

271  _TRACE

(

"Chunk "

<< chunk_num <<

": query "

<< qindex <<

" (" 275

<<

" strand "

<< (

int

)split_query_loc->

GetStrand

());

300 static inline unsigned int 304  _ASSERT

(shift == 0 || shift == 1 || shift == -1);

309

}

else if

(shift == 1) {

311

}

else if

(shift == -1) {

331  case

1: retval = -1;

break

;

332  case

2: retval = 1;

break

;

333  case

0:

default

: retval = 0;

break

;

344

unique_ptr<CQueryDataPerChunk> qdpc;

351  for

(

size_t

chunk_num = 0; chunk_num <

m_NumChunks

; chunk_num++) {

354  for

(

size_t i

= 0;

i

< queries.size();

i

++) {

360  size_t

qlength = qdpc->GetQueryLength(

static_cast<int>

(queries[

i

]));

361  int

last_query_chunk = qdpc->GetLastChunk(

static_cast<int>

(queries[

i

]));

362  _ASSERT

(last_query_chunk != -1);

365  for

(

unsigned int ctx

= 0;

ctx

< kNumContexts;

ctx

++) {

373  static_cast<Int4>

(kNumContexts*queries[

i

]+

ctx

));

380  if

(chunk_num == (

size_t

)last_query_chunk) {

383  static_cast<Int4>

(kNumContexts*queries[

i

]+

ctx

));

386  static_cast<Int4>

(kNumContexts*queries[

i

]+

394  for

(

unsigned int ctx

= 0;

ctx

< kNumContexts;

ctx

++) {

402  static_cast<Int4>

(kNumContexts*queries[

i

]+

ctx

));

410  static_cast<Int4>

(kNumContexts*queries[

i

]+

ctx

));

417  static_cast<Int4>

(kNumContexts*queries[

i

]));

455  int

absolute_context)

464  return

chunk_qinfo[chunk_num]->contexts[pos].query_length;

471 #ifdef DEBUG_COMPARE_SEQUENCES 477 static string

s_GetPrintableSequence(

const Uint1

* seq,

size_t len

,

bool

is_prot)

480  for

(

size_t i

= 0;

i

<

len

;

i

++) {

481

retval.append(1, (is_prot

495 static bool

cmp_sequence(

const Uint1

* global,

const Uint1

* chunk,

size_t len

,

500  for

(

size_t i

= 0;

i

<

len

;

i

++) {

501  if

(global[

i

] != chunk[

i

]) {

507  if

(retval ==

false

) {

508  _TRACE

(

"Comparing global: '" 509

<< s_GetPrintableSequence(global,

len

, is_prot) <<

"'"

);

511

<< s_GetPrintableSequence(chunk,

len

, is_prot) <<

"'"

);

587 #ifdef DEBUG_COMPARE_SEQUENCES 593

vector<const BlastQueryInfo*> chunk_qinfo(

m_NumChunks

, 0);

595  for

(

size_t

chunk_num = 0; chunk_num <

m_NumChunks

; chunk_num++) {

598 #ifdef DEBUG_COMPARE_SEQUENCES 604  _ASSERT

(chunk_qinfo[chunk_num]);

608  for

(

Int4 ctx

= 0;

ctx

< chunk_qinfo[chunk_num]->first_context;

ctx

++) {

612  for

(

Int4 ctx

= chunk_qinfo[chunk_num]->first_context;

613  ctx

<= chunk_qinfo[chunk_num]->last_context;

616  size_t

correction = 0;

617  const int

starting_chunk =

619  const int

absolute_context =

632  for

(

int

c =

static_cast<int>

(chunk_num); c != starting_chunk; c--) {

640  size_t

overlap =

min

(kOverlap, curr_len);

641

correction += prev_len -

min

(overlap, prev_len);

646  size_t

subtrahend = 0;

648  for

(

int

c =

static_cast<int>

(chunk_num); c >= starting_chunk && c >= 0; c--) {

657  size_t

overlap =

min

(kOverlap, curr_len);

658

subtrahend += (curr_len -

min

(overlap, prev_len));

667 #ifdef DEBUG_COMPARE_SEQUENCES 671  int

chunk_offset = chunk_qinfo[chunk_num]->contexts[

ctx

].query_offset;

672  if

(!cmp_sequence(&global_seq->

sequence

[global_offset],

673

&chunk_seq->

sequence

[chunk_offset], 10,

675

cerr <<

"Failed to compare sequence data!"

<< endl;

682  _TRACE

(

"CContextTranslator contents: "

<< ctx_translator);

693 #ifdef DEBUG_COMPARE_SEQUENCES 696  const size_t

kOverlap =

701

vector<const BlastQueryInfo*> chunk_qinfo(

m_NumChunks

, 0);

703  for

(

size_t

chunk_num = 0; chunk_num <

m_NumChunks

; chunk_num++) {

706 #ifdef DEBUG_COMPARE_SEQUENCES 712  _ASSERT

(chunk_qinfo[chunk_num]);

716  for

(

Int4 ctx

= 0;

ctx

< chunk_qinfo[chunk_num]->first_context;

ctx

++) {

720  for

(

Int4 ctx

= chunk_qinfo[chunk_num]->first_context;

721  ctx

<= chunk_qinfo[chunk_num]->last_context;

724  size_t

correction = 0;

725  const int

starting_chunk =

727  const int

absolute_context =

742

(chunk_num == (

size_t

)last_query_chunk) &&

801  for

(

int

c =

static_cast<int>

(chunk_num); c != starting_chunk; c--) {

809  size_t

overlap =

min

(kOverlap, curr_len);

810

correction += prev_len -

min

(overlap, prev_len);

815  size_t

subtrahend = 0;

817  for

(

int

c =

static_cast<int>

(chunk_num); c >= starting_chunk && c >= 0; c--) {

826  size_t

overlap =

min

(kOverlap, curr_len);

827

subtrahend += (curr_len -

min

(overlap, prev_len));

837 #ifdef DEBUG_COMPARE_SEQUENCES 841  int

chunk_offset = chunk_qinfo[chunk_num]->contexts[

ctx

].query_offset;

842  int

num_bases2compare =

843  min

(10, chunk_qinfo[chunk_num]->contexts[

ctx

].query_length);

844  if

(!cmp_sequence(&global_seq->

sequence

[global_offset],

845

&chunk_seq->

sequence

[chunk_offset],

847

cerr <<

"Failed to compare sequence data for chunk "

<< chunk_num

848

<<

", context "

<<

ctx

<< endl;

854  _TRACE

(

"CContextTranslator contents: "

<< ctx_translator);

876  for

(

size_t

chunk_num = 0; chunk_num <

m_NumChunks

; chunk_num++) {

886  _TRACE

(

"CQuerySplitter contents: "

<< *

this

);

895  string msg

(

"Invalid query chunk number: "

);

898  throw

out_of_range(

msg

);

#define CODON_LENGTH

Codons are always of length 3.

#define NUM_STRANDS

Number of frames in a nucleotide sequence.

#define NUM_FRAMES

Number of frames to which we translate in translating searches.

Declares class to encapsulate all BLAST options.

Boolean Blast_QueryIsTranslated(EBlastProgramType p)

Returns true if the query is translated.

Boolean Blast_QueryIsNucleotide(EBlastProgramType p)

Returns true if the query is nucleotide.

Boolean Blast_QueryIsProtein(EBlastProgramType p)

Returns true if the query is protein.

EBlastProgramType

Defines the engine's notion of the different applications of the BLAST algorithm.

Internal auxiliary setup classes/functions for C++ BLAST APIs.

Encapsulates ALL the BLAST algorithm's options.

size_type Size() const

Returns the number of queries found in this query vector.

CConstRef< objects::CSeq_loc > GetQuerySeqLoc(size_type i) const

Get the query Seq-loc for a query by index.

Auxiliary class to provide convenient and efficient access to conversions between contexts local to q...

NCBI C++ Object Manager dependant implementation of IQueryFactory.

Auxiliary class to determine information about the query that was split into chunks.

Class responsible for splitting query sequences and providing data to the BLAST search class to searc...

Wrapper class around SSplitQueryBlk structure.

Provides access (not ownership) to the C structures used to configure local BLAST search class implem...

Source of query sequence data for BLAST Provides an interface for search classes to retrieve sequence...

Collection of masked regions for a single query sequence.

static bool is_valid(const char *num, int type, CONV_RESULT *cr)

std::ofstream out("events_result.xml")

main entry point for tests

bool Empty(const CNcbiOstrstream &src)

virtual CConstRef< objects::CSeq_loc > GetSeq_loc(size_t index)=0

Get the Seq_loc for the sequence indicated by index.

vector< CRef< IQueryFactory > > m_QueryChunkFactories

Vector of query factories, each element corresponds to a chunk.

size_t SplitQuery_GetOverlapChunkSize(EBlastProgramType program)

Size of the region that overlaps in between each query chunk.

size_t m_TotalQueryLength

Length of the concatenated query.

void AddQueryToChunk(size_t chunk_num, Int4 query_index)

Adds a query index to a given chunk.

TChunkRange GetChunkBounds(size_t chunk_num) const

Get the boundaries of a chunk in the concatenated query.

TScopeVector m_Scopes

Vector of CScope objects.

COpenRange< TSeqPos > TChunkRange

Range describing a query chunk.

int GetLastChunk(int global_query_index)

get the last chunk where query identified with global_query_index is found

void x_ComputeContextOffsetsForChunks()

Compute the context offsets which are used to adjust the results.

int GetAbsoluteContext(size_t chunk_num, Int4 context_in_chunk) const

Get the context number in the absolute (i.e.

void SetChunkOverlapSize(size_t size)

Sets the size (# of bases/residues) of overlap between query chunks.

objects::ENa_strand GetStrandOption() const

const char BLASTNA_TO_IUPACNA[]

Translates between blastna and iupacna.

virtual BLAST_SequenceBlk * GetSequenceBlk()=0

Accessor for the BLAST_SequenceBlk structure.

void SetChunkBounds(size_t chunk_num, const TChunkRange &chunk_range)

Set the boundaries of a chunk in the concatenated query.

CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)

Creates and caches an ILocalQueryData.

Uint4 m_NumChunks

Number of chunks, if this is 1, no splitting occurs.

void x_ComputeContextOffsets_NonTranslatedQueries()

Compute the context offsets which are used to adjust the results for non-translated queries.

void AddContextToChunk(size_t chunk_num, Int4 context_index)

Adds a query context to a given chunk.

size_t GetQueryLength(size_t chunk_num, int context_in_chunk) const

Get the length of the query.

size_t SplitQuery_GetChunkSize(EProgram program)

Returns the optimal chunk size for a given task.

EBlastProgramType GetProgramType() const

Returns the CORE BLAST notion of program type.

void AddContextOffsetToChunk(size_t chunk_num, Int4 context_offset)

Adds a context offset (correction) to a given chunk.

virtual BlastQueryInfo * GetQueryInfo()=0

Accessor for the BlastQueryInfo structure.

objects::ENa_strand BlastSetup_GetStrand(const objects::CSeq_loc &query_seqloc, EBlastProgramType program, objects::ENa_strand strand_option)

Choose between a Seq-loc specified query strand and the strand obtained from the CBlastOptions.

void x_ComputeQueryIndicesForChunks()

Compute query indices that correspond to each chunk.

CRef< ILocalQueryData > m_LocalQueryData

Source of local query data.

CRef< IQueryFactory > GetQueryFactoryForChunk(Uint4 chunk_num)

Returns a IQueryFactory suitable to be executed by a BLAST search class.

void x_ComputeContextOffsets_TranslatedQueries()

Compute the context offsets which are used to adjust the results for translated queries.

size_t m_ChunkSize

Size of the query chunks.

unsigned int GetNumberOfContexts(EBlastProgramType p)

Returns the number of contexts for a given BLAST program.

virtual size_t GetNumQueries()=0

Get the number of queries.

TSeqLocInfoVector ExtractUserSpecifiedMasks()

Retrieve any user specified masking locations.

TSeqLocInfoVector m_UserSpecifiedMasks

Vector of masking locations.

const CBlastOptions * m_Options

BLAST options.

int GetContextInChunk(size_t chunk_num, int absolute_context) const

Get the context number in the split query chunk.

void x_ComputeQueryContextsForChunks()

Compute query contexts that correspond to each chunk.

vector< size_t > GetQueryIndices(size_t chunk_num) const

Get the indices of the queries contained in a given chunk.

CRef< CSplitQueryBlk > Split()

Split the query sequence(s)

int GetStartingChunk(size_t curr_chunk, Int4 context_in_chunk) const

Get the chunk number where context_in_chunk starts (i.e.

virtual size_t GetSeqLength(size_t index)=0

Get the length of the sequence indicated by index.

CRef< IQueryFactory > m_QueryFactory

The original, unsplit query factory.

vector< CRef< objects::CScope > > ExtractScopes()

Retrieve the CScope objects associated with the query sequences associated with this object.

Uint4 SplitQuery_CalculateNumChunks(EBlastProgramType program, size_t *chunk_size, size_t concatenated_query_length, size_t num_queries)

Calculate the number of chunks that a query will be split into based upon query length,...

void x_ExtractCScopesAndMasks()

Auxiliary method to extract the CScope objects from the query factory.

Uint4 GetNumberOfChunks() const

Returns the number of chunks the query/queries will be split into.

CRef< CSplitQueryBlk > m_SplitBlk

Split query block structure.

vector< size_t > GetContextOffsets(size_t chunk_num) const

Get the context offsets (corrections) of the queries contained in a given chunk.

TSplitQueryVector m_SplitQueriesInChunk

Vector of split queries.

const char NCBISTDAA_TO_AMINOACID[]

Translates between ncbieaa and ncbistdaa.

bool GetGappedMode() const

Returns true if gapped BLAST is set, false otherwise.

void x_ComputeChunkRanges()

Compute all chunk ranges.

unsigned int TSeqPos

Type for sequence locations and lengths.

const string AsFastaString(void) const

bool Match(const CSeq_id &sid2) const

Match() - TRUE if SeqIds are equivalent.

ENa_strand GetStrand(void) const

Get the location's strand.

void SetId(CSeq_id &id)

set the 'id' field in all parts of this location

TRange GetTotalRange(void) const

const CSeq_id * GetId(void) const

Get the id of the location return NULL if has multiple ids or no id at all.

void SetStrand(ENa_strand strand)

Set the strand for all of the location's ranges.

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

uint8_t Uint1

1-byte (8-bit) unsigned integer

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

bool IntersectingWith(const TThisType &r) const

position_type GetToOpen(void) const

position_type GetFrom(void) const

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

TFrom GetFrom(void) const

Get the From member data.

void SetTo(TTo value)

Assign a value to To data member.

ENa_strand

strand of nucleic acid

TFrom GetFrom(void) const

Get the From member data.

void SetFrom(TFrom value)

Assign a value to From data member.

TTo GetTo(void) const

Get the To member data.

bool IsInt(void) const

Check if variant Int is selected.

const TInt & GetInt(void) const

Get the variant data.

unsigned int

A callback function used to compare two keys in a database.

Main class to perform a BLAST search on the local machine.

#define INT4_MAX

largest nubmer represented by signed int

#define ASSERT

macro for assert.

NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

const Int4 kInvalidContext

Value to represent an invalid context.

Declares CQuerySplitter, a class to split the query sequence(s)

Auxiliary functions and classes to assist in query splitting.

ostream & operator<<(ostream &out, const CQuerySplitter &rhs)

static int s_GetShiftForTranslatedNegStrand(size_t query_length)

Retrieve the shift for the negative strand.

static unsigned int s_AddShift(unsigned int context, int shift)

Adds the necessary shift to the context to record the query contexts for the query chunks.

static void s_SetSplitQuerySeqInterval(const TChunkRange &chunk, const TChunkRange &query_range, int query_offset, CRef< CSeq_loc > split_query_loc)

Auxiliary function to assign the split query's Seq-interval so that it's constrained within the chunk...

static size_t s_GetAbsoluteContextLength(const vector< const BlastQueryInfo * > &chunk_qinfo, int chunk_num, const CContextTranslator &ctx_translator, int absolute_context)

Get the length of a context in absolute terms (i.e.

static bool s_IsPlusStrand(const BlastQueryInfo *qinfo, Int4 context_number)

Determine whether a given context corresponds to the plus or minus strand.

Definition of SSeqLoc structure.

Structure to hold a sequence.

Uint1 * sequence

Sequence used for search (could be translation).

Int4 query_length

Length of this query, strand or frame.

Int4 query_offset

Offset of this query, strand or frame in the concatenated super-query.

Int1 frame

Frame number (-1, -2, -3, 0, 1, 2, or 3)

The query related information.

BlastContextInfo * contexts

Information per context.

static CS_CONTEXT * context


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4