word_length =
lookup->word_length;
76s_first=
subject->sequence + s_range[1];
77s_last=
subject->sequence + s_range[2];
81 lookup->charsize, s_first);
83 for(s = s_first; s <= s_last; s++) {
96 if(numhits <= (array_size - totalhits))
111 for(
i= 0;
i< numhits;
i++) {
112offset_pairs[
i+ totalhits].qs_offsets.q_off = src[
i];
113offset_pairs[
i+ totalhits].qs_offsets.s_off = s_off;
117totalhits += numhits;
159word_length =
lookup->word_length;
162s_first=
subject->sequence + s_range[1];
163s_last=
subject->sequence + s_range[2];
167 lookup->charsize, s_first);
169 for(s = s_first; s <= s_last; s++) {
182 if(numhits <= (array_size - totalhits))
197 for(
i= 0;
i< numhits;
i++) {
198offset_pairs[
i+ totalhits].qs_offsets.q_off = src[
i];
199offset_pairs[
i+ totalhits].qs_offsets.s_off = s_off;
203totalhits += numhits;
254 Int4* scaled_compress_table;
257 Int4compressed_char;
258 Int4compressed_alphabet_size;
262word_length =
lookup->word_length;
265s_first=
subject->sequence + s_range[1];
266s_last=
subject->sequence + s_range[2];
268compressed_alphabet_size =
lookup->compressed_alphabet_size;
269scaled_compress_table =
lookup->scaled_compress_table;
270recip =
lookup->reciprocal_alphabet_size;
272pv_array_bts =
lookup->pv_array_bts;
275 for(s = s_first; s <= s_last; s++){
277compressed_alphabet_size,
283next_char = ((s <= s_last)? s[word_length-1] : 0);
284preshift = (
Int4)((((
Int8)index) * recip) >> 32);
287 for(; s <= s_last; s++) {
290compressed_char = scaled_compress_table[next_char];
291next_char = s[word_length];
293 if(compressed_char < 0){
296 for(skip = word_length-1; skip && (s <= s_last) ; s++){
297compressed_char = scaled_compress_table[next_char];
298next_char = s[word_length];
300 if(compressed_char < 0){
301skip = word_length-1;
306index = preshift + compressed_char;
307preshift = (
Int4)((((
Int8)( index )) * recip) >> 32);
327index = preshift + compressed_char;
328preshift = (
Int4)((((
Int8)( index )) * recip) >> 32);
331 if(
PV_TEST(pv, index, pv_array_bts)) {
335 lookup->backbone + index;
337numhits = backbone_cell->
num_used;
341 if(numhits <= (array_size - totalhits)) {
354 for(
i= 0;
i< numhits-1;
i++) {
379 for(
i= 0;
i< first_cell_entries;
i++) {
385 if(first_cell_entries) {
386curr_cell = curr_cell->
next;
388 while(curr_cell !=
NULL) {
394curr_cell = curr_cell->
next;
398totalhits += numhits;
426 if(
i==
b->num_alloc) {
428offset_pairs =
b->offset_pairs =
455 Int4table_correction;
472 const Int4max_hits = 4000000;
476bucket_array =
lookup->bucket_array;
480 for(index = 0; index <
lookup->num_buckets; index++)
481bucket_array[index].num_filled = 0;
483s_first = abs_start + *
offset;
484s_last = abs_start + sequence->
length-
lookup->wordsize;
492table_correction =
lookup->wordsize - 1;
496 lookup->charsize, s_first);
498 for(s = s_first; s <= s_last; s++) {
506cell = &
lookup->rps_backbone[index];
511 if(numhits <= (max_hits - totalhits)) {
517 for(
i= 0;
i< numhits;
i++) {
518q_off = cell->
entries[
i] - table_correction;
527q_off = cell->
entries[0] - table_correction;
530 for(
i= 0;
i< (numhits - 1);
i++) {
531q_off = src[
i] - table_correction;
538totalhits += numhits;
Routines for creating protein BLAST lookup tables.
#define COMPRESSED_HITS_PER_OVERFLOW_CELL
number of query offsets to store in an overflow cell
#define COMPRESSED_HITS_PER_BACKBONE_CELL
number of query offsets to store in a backbone cell
#define COMPRESSED_HITS_CELL_MASK
#define RPS_BUCKET_SIZE
The number of regions into which the concatenated RPS blast database is split via bucket sorting.
#define AA_HITS_PER_CELL
maximum number of hits in one lookup table cell
#define RPS_HITS_PER_CELL
maximum number of hits in an RPS backbone cell; this may be redundant (have the same value as AA_HITS...
static NCBI_INLINE Int4 s_ComputeCompressedIndex(Int4 wordsize, const Uint1 *word, Int4 compressed_alphabet_size, Int4 *skip, BlastCompressedAaLookupTable *lookup)
Convert a word to use a compressed alphabet.
void BlastChooseProteinScanSubject(LookupTableWrap *lookup_wrap)
Choose the most appropriate function to scan through protein subject sequences.
Int4 BlastRPSScanSubject(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *sequence, Int4 *offset)
Scans the RPS query sequence from "offset" to the end of the sequence.
static Int4 s_BlastSmallAaScanSubject(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *subject, BlastOffsetPair *offset_pairs, Int4 array_size, Int4 *s_range)
same function for small lookup table
static Int4 s_BlastCompressedAaScanSubject(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *subject, BlastOffsetPair *offset_pairs, Int4 array_size, Int4 *s_range)
Scans the subject sequence from "offset" to the end of the sequence, assuming a compressed protein al...
static void s_AddToRPSBucket(RPSBucket *b, Uint4 q_off, Uint4 s_off)
Add one query-subject pair to the list of such pairs retrieved from the RPS blast lookup table.
static Int4 s_BlastAaScanSubject(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *subject, BlastOffsetPair *offset_pairs, Int4 array_size, Int4 *s_range)
Scans the subject sequence from "offset" to the end of the sequence.
Routines for creating protein BLAST lookup tables.
#define PV_ARRAY_BTS
bits-to-shift from lookup_index to pv_array index.
#define PV_TEST(lookup, index, shift)
Test the bit at position 'index' in the PV array bitfield within 'lookup'.
static NCBI_INLINE Int4 ComputeTableIndexIncremental(Int4 wordsize, Int4 charsize, Int4 mask, const Uint1 *word, Int4 index)
Given a word, compute its index value, reusing a previously computed index value.
static NCBI_INLINE Int4 ComputeTableIndex(Int4 wordsize, Int4 charsize, const Uint1 *word)
Given a word, compute its index value from scratch.
#define PV_ARRAY_TYPE
The pv_array 'native' type.
@ eAaLookupTable
standard protein (blastp) lookup table
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
@ eRPSLookupTable
RPS lookup table (rpsblast and rpstblastn)
static int lookup(const char *name, const struct lookup_int *table)
uint8_t Uint1
1-byte (8-bit) unsigned integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
while(yy_chk[yy_base[yy_current_state]+yy_c] !=yy_current_state)
for(len=0;yy_str[len];++len)
static NCBI_INLINE Boolean s_DetermineScanningOffsets(const BLAST_SequenceBlk *subject, Int4 word_length, Int4 lut_word_length, Int4 *range)
Determines the scanner's offsets taking the database masking restrictions into account (if any).
#define ASSERT
macro for assert.
structure defining one cell of the compacted lookup table
union AaLookupBackboneCell::@3 payload
union that specifies either entries stored right on the backbone if fewer than AA_HITS_PER_CELL are p...
Int4 entries[3]
if the number of hits for this cell is AA_HITS_PER_CELL or less, the hits are all stored directly in ...
Int4 overflow_cursor
integer offset into the overflow array where the list of hits for this cell begins
Int4 num_used
number of hits stored for this cell
structure defining one cell of the small (i.e., use short) lookup table
union AaLookupSmallboneCell::@4 payload
union that specifies either entries stored right on the backbone if fewer than AA_HITS_PER_CELL are p...
Uint2 num_used
number of hits stored for this cell
Int4 overflow_cursor
integer offset into the overflow array where the list of hits for this cell begins
Uint2 entries[3]
if the number of hits for this cell is AA_HITS_PER_CELL or less, the hits are all stored directly in ...
Structure to hold a sequence.
Int4 length
Length of sequence.
Uint1 * sequence
Sequence used for search (could be translation).
The basic lookup table structure for blastp searches.
void * scansub_callback
function for scanning subject sequences
EBoneType bone_type
type of bone used: 0: normal backbone (using Int4) 1: small backbone (using Uint2) will be determined...
The lookup table structure for protein searches using a compressed alphabet.
void * scansub_callback
function for scanning subject sequences
The basic lookup table structure for RPS blast searches.
structure for hashtable of indexed query offsets
Int4 query_offsets[4]
storage for query offsets local to the backbone cell
CompressedMixedOffsets overflow_list
storage for remote query offsets
union CompressedLookupBackboneCell::@5 payload
structure for holding the list of query offsets
Int4 query_offsets[4 -2]
the query offsets stored locally
CompressedOverflowCell * head
head of linked list of cells of query offsets stored off the backbone
cell in list for holding query offsets
struct CompressedOverflowCell * next
pointer to next cell
Int4 query_offsets[4]
the query offsets stored in the cell
Wrapper structure for different types of BLAST lookup tables.
void * lut
Pointer to the actual lookup table structure.
ELookupTableType lut_type
What kind of a lookup table it is?
structure defining one cell of the RPS lookup table
Int4 num_used
number of hits in this cell
Int4 entries[3]
if the number of hits in this cell is RPS_HITS_PER_CELL or less, all hits go into this array.
structure used for bucket sorting offsets retrieved from the RPS blast lookup table.
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
Uint4 q_off
Query offset.
Uint4 s_off
Subject offset.
struct BlastOffsetPair::@6 qs_offsets
Query/subject offset pair.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4