A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/blast__nalookup_8c_source.html below:

NCBI C++ ToolKit: src/algo/blast/core/blast_nalookup.c Source File

40 #define BLAST2NA_MASK 0xfc 43 #define BITS_PER_NUC 2 47  Int4

approx_table_entries,

Int4

max_q_off,

87  if

(approx_table_entries < 250)

95  if

(approx_table_entries < 8500)

102  if

(approx_table_entries < 1250) {

105

}

else if

(approx_table_entries < 21000) {

115  if

(approx_table_entries < 1250) {

118

}

else if

(approx_table_entries < 8500) {

121

}

else if

(approx_table_entries < 18000) {

131  if

(approx_table_entries < 12000) {

134

}

else if

(approx_table_entries < 180000) {

144  if

(approx_table_entries < 8500) {

147

}

else if

(approx_table_entries < 18000) {

150

}

else if

(approx_table_entries < 60000) {

153

}

else if

(approx_table_entries < 900000) {

163  if

(approx_table_entries < 8500) {

166

}

else if

(approx_table_entries < 300000) {

182

(approx_table_entries >= 32767 || max_q_off >= 32768)) {

202  Int4

overflow_cells_needed = 2;

203  Int4

overflow_cursor = 2;

204  Int4

longest_chain = 0;

205 #ifdef LOOKUP_VERBOSE 206  Int4

backbone_occupancy = 0;

207  Int4

thick_backbone_occupancy = 0;

208  Int4

num_overflows = 0;

216  for

(

i

= 0;

i

<

lookup

->backbone_size;

i

++) {

217  if

(thin_backbone[

i

] !=

NULL

) {

218  Int4

num_hits = thin_backbone[

i

][1];

220

overflow_cells_needed += num_hits + 1;

221

longest_chain =

MAX

(longest_chain, num_hits);

231  if

(overflow_cells_needed >= 32768) {

232  for

(

i

= 0;

i

<

lookup

->backbone_size;

i

++)

233  sfree

(thin_backbone[

i

]);

247  lookup

->longest_chain = longest_chain;

250  if

(overflow_cells_needed > 0) {

257  for

(

i

= 0;

i

<

lookup

->backbone_size;

i

++) {

262  if

(thin_backbone[

i

] ==

NULL

) {

263  lookup

->final_backbone[

i

] = -1;

267 #ifdef LOOKUP_VERBOSE 268

backbone_occupancy++;

270

num_hits = thin_backbone[

i

][1];

276 #ifdef LOOKUP_VERBOSE 277

thick_backbone_occupancy++;

279  lookup

->final_backbone[

i

] = thin_backbone[

i

][2];

282 #ifdef LOOKUP_VERBOSE 289  lookup

->final_backbone[

i

] = -overflow_cursor;

290  for

(j = 0; j < num_hits; j++) {

291  lookup

->overflow[overflow_cursor++] =

292

thin_backbone[

i

][j + 2];

298  lookup

->overflow[overflow_cursor++] = -1;

302  sfree

(thin_backbone[

i

]);

305  lookup

->overflow_size = overflow_cursor;

307 #ifdef LOOKUP_VERBOSE 308

printf(

"SmallNa\n"

);

309

printf(

"backbone size: %d\n"

,

lookup

->backbone_size);

310

printf(

"backbone occupancy: %d (%f%%)\n"

, backbone_occupancy,

311

100.0 * backbone_occupancy /

lookup

->backbone_size);

312

printf(

"thick_backbone occupancy: %d (%f%%)\n"

,

313

thick_backbone_occupancy,

314

100.0 * thick_backbone_occupancy /

lookup

->backbone_size);

315

printf(

"num_overflows: %d\n"

, num_overflows);

316

printf(

"overflow size: %d\n"

, overflow_cells_needed);

317

printf(

"longest chain: %d\n"

, longest_chain);

341  if

(stop - start > 2)

346

start = locations->

ssr

->

right

+1;

347

locations = locations->

next

;

350

stop = locations->

ssr

->

left

-1;

365  if

( !query_options ) {

386  Int4

**thin_backbone;

393  lookup

->lut_word_length = lut_width;

418  sfree

(thin_backbone);

428  if

(

lookup

->masked_locations)

446  Int4

overflow_cells_needed = 0;

447  Int4

overflow_cursor = 0;

448  Int4

longest_chain = 0;

450 #ifdef LOOKUP_VERBOSE 451  Int4

backbone_occupancy = 0;

452  Int4

thick_backbone_occupancy = 0;

453  Int4

num_overflows = 0;

469  for

(

i

= 0;

i

<

lookup

->backbone_size;

i

++) {

470  if

(thin_backbone[

i

] !=

NULL

) {

471  Int4

num_hits = thin_backbone[

i

][1];

473

overflow_cells_needed += num_hits;

474

longest_chain =

MAX

(longest_chain, num_hits);

478  lookup

->longest_chain = longest_chain;

481  if

(overflow_cells_needed > 0) {

487  for

(

i

= 0;

i

<

lookup

->backbone_size;

i

++) {

492  if

(thin_backbone[

i

] ==

NULL

)

495 #ifdef LOOKUP_VERBOSE 496

backbone_occupancy++;

498

num_hits = thin_backbone[

i

][1];

499  lookup

->thick_backbone[

i

].num_used = num_hits;

508 #ifdef LOOKUP_VERBOSE 509

thick_backbone_occupancy++;

511  for

(j = 0; j < num_hits; j++) {

512  lookup

->thick_backbone[

i

].payload.entries[j] =

513

thin_backbone[

i

][j + 2];

517 #ifdef LOOKUP_VERBOSE 520  lookup

->thick_backbone[

i

].payload.overflow_cursor =

522  for

(j = 0; j < num_hits; j++) {

523  lookup

->overflow[overflow_cursor] =

524

thin_backbone[

i

][j + 2];

530  sfree

(thin_backbone[

i

]);

533  lookup

->overflow_size = overflow_cursor;

535 #ifdef LOOKUP_VERBOSE 536

printf(

"backbone size: %d\n"

,

lookup

->backbone_size);

537

printf(

"backbone occupancy: %d (%f%%)\n"

, backbone_occupancy,

538

100.0 * backbone_occupancy /

lookup

->backbone_size);

539

printf(

"thick_backbone occupancy: %d (%f%%)\n"

,

540

thick_backbone_occupancy,

541

100.0 * thick_backbone_occupancy /

lookup

->backbone_size);

542

printf(

"num_overflows: %d\n"

, num_overflows);

543

printf(

"overflow size: %d\n"

, overflow_cells_needed);

544

printf(

"longest chain: %d\n"

, longest_chain);

555  Int4

**thin_backbone;

562  lookup

->lut_word_length = lut_width;

582  sfree

(thin_backbone);

590  if

(

lookup

->masked_locations)

613

}

else if

(length == 18) {

618

}

else if

(length == 21) {

624

}

else if

(

weight

== 12) {

630

}

else if

(length == 18) {

635

}

else if

(length == 21) {

670  Int4

template_length;

680  const Int4

kCompressionFactor=2048;

713  int

temp_int = template_type + 1;

714

second_template_type =

723

helper_array2 ==

NULL

)

749

from = loc->

ssr

->

left

- (template_length - 2);

750

to = loc->

ssr

->

right

- (template_length - 2);

752

pos = seq + template_length;

754  for

(index = from; index <= to; index++) {

760

pos = seq + template_length;

769 #ifdef LOOKUP_VERBOSE 777 #ifdef LOOKUP_VERBOSE 780  PV_SET

(pv_array, ecode1, pv_array_bts);

783

helper_array[ecode1/kCompressionFactor]++;

795 #ifdef LOOKUP_VERBOSE 798  PV_SET

(pv_array, ecode2, pv_array_bts);

801

helper_array2[ecode2/kCompressionFactor]++;

809  for

(index = 0; index < mb_lt->

hashsize

/ kCompressionFactor; index++)

810

longest_chain =

MAX

(longest_chain, helper_array[index]);

814  sfree

(helper_array);

818  for

(index = 0; index < mb_lt->

hashsize

/ kCompressionFactor; index++)

819

longest_chain =

MAX

(longest_chain, helper_array2[index]);

823  sfree

(helper_array2);

870

seq =

query

->sequence_start + from;

871

pos = seq + kLutWordLength;

875

from -= kLutWordLength - 2;

876

last_offset = to + 2;

878  for

(index = from; index <= last_offset; index++) {

884

pos = seq + kLutWordLength;

893  PV_SET

(pv_array, ecode, pv_array_bts);

911

mb_lt->

hashtable

[(

Int8

)((1 << (2 * word_size)) - 1)] = 0;

913  if

(word_size < 16) {

920  for

(

i

= 1;

i

< 4;

i

++) {

922  for

(k = 0;k < word_size;k++) {

928  for

(

i

= 0;

i

< 3;

i

++) {

929  for

(k = 0;k < word_size;k++) {

930

word = ((0xffffffff ^ (3 << k*2)) | (

i

<< k*2)) & 0xffffffff;

972  const Int4

kCompressionFactor=2048;

974  Uint4

* helper_array;

988  if

(helper_array ==

NULL

)

1013  if

(lookup_options->

stride

> 0) {

1014

shift = lookup_options->

stride

- 1;

1015

pos_shift = kLutWordLength + 1;

1024

seq =

query

->sequence_start + from;

1025

pos = seq + kLutWordLength;

1029

from -= kLutWordLength - 2;

1030

last_offset = to + 2;

1032  for

(index = from; index <= last_offset; index++) {

1038

pos = seq + kLutWordLength;

1051  if

((counts[ecode / 2] >> 4) >= max_word_count) {

1056  if

((counts[ecode / 2] & 0xf) >= max_word_count) {

1077 #ifdef LOOKUP_VERBOSE 1082 #ifdef LOOKUP_VERBOSE 1085  PV_SET

(pv_array, ecode, pv_array_bts);

1088

helper_array[ecode/kCompressionFactor]++;

1096

pos = seq + pos_shift;

1105  for

(index = 0; index < mb_lt->

hashsize

/ kCompressionFactor; index++)

1106

longest_chain =

MAX

(longest_chain, helper_array[index]);

1109  sfree

(helper_array);

1125  Uint1

max_word_count)

1130  Int8

word, index, w;

1131  const Int4

kNumWords

1138  if

(!sequence || !counts || !mb_lt || !pv) {

1147

w = (

Int8

)s[0] << 24 | (

Int8

)s[1] << 16 | (

Int8

)s[2] << 8 | s[3];

1148  for

(

i

= 0;

i

< kNumWords;

i

++) {

1159

word = (w >> shift) &

mask

;

1162  if

(!

PV_TEST

(pv, word, pv_array_bts)) {

1169  if

((counts[index] & 0xf) < max_word_count) {

1174  if

((counts[index] >> 4) < max_word_count) {

1175

counts[index] += 1 << 4;

1194  Uint1

max_word_count)

1200  if

(!seq_src || !pv || !counts) {

1204

memset(&seq_arg, 0,

sizeof

(seq_arg));

1231  Int4

approx_table_entries,

1238  const Int4

kTargetPVSize = 131072;

1239  const Int4

kSmallQueryCutoff = 15000;

1240  const Int4

kLargeQueryCutoff = 800000;

1251  if

(mb_lt ==

NULL

) {

1287  if

(mb_lt->

hashsize

<= 8 * kTargetPVSize)

1299

(approx_table_entries <= kSmallQueryCutoff ||

1300

approx_table_entries >= kLargeQueryCutoff)) {

1301

pv_size = pv_size / 2;

1315  if

(counts ==

NULL

) {

1344  if

(lookup_options->

db_filter

&& counts) {

1356 #ifdef LOOKUP_VERBOSE 1357

printf(

"lookup table size: %ld (%d letters)\n"

, mb_lt->

hashsize

,

1362

printf(

"PV array size: %d bytes (%ld table entries/bit)\n"

,

1392  const Uint4

fnv_prime = 16777619u;

1393  const Uint4

fnv_offset_basis = 2166136261u;

1397  hash

= fnv_offset_basis;

1398  for

(

i

= 0;

i

< 4;

i

++) {

1414  Int4

lut_word_length;

1416  const Int4

pv_array_bts =

lookup

->pv_array_bts;

1420

word_length =

lookup

->word_length;

1421

lut_word_length =

lookup

->lut_word_length;

1425  for

(loc = locations; loc; loc = loc->

next

) {

1443

seq =

query

->sequence + from;

1444

pos = seq + lut_word_length - 1;

1445

end =

query

->sequence + to + 1;

1447  for

(; seq < end; seq++) {

1454

pos = seq + lut_word_length;

1479

v = v - ((v >> 1) & 0x55555555);

1480

v = (v & 0x33333333) + ((v >> 2) & 0x33333333);

1481

v = ((v + (v >> 4)) & 0xF0F0F0F);

1507  if

(

array

->values) {

1511  if

(

array

->counts) {

1526  if

(!retval || !bitfield) {

1539  for

(

i

= 1;

i

< retval->

length

;

i

++) {

1573

bit_count = (idx > 0) ?

array

->counts[idx - 1] : 0;

1574  ASSERT

(

array

->bitfield[idx] & (1 << bit_number));

1577

bit_count +=

s_Popcount

(

array

->bitfield[idx] & ((1 << bit_number) - 1));

1582  return

bit_count - 1;

1596  ASSERT

(sparse_index < array->num_elements);

1597  if

(sparse_index < 0 || sparse_index >

array

->num_elements) {

1601  return array

->values + sparse_index;

1616  Uint1

max_word_count)

1622  const Int4

kNumWords

1630  if

(!sequence || !counts || !

lookup

|| !pv) {

1642

w = (

Int8

)s[0] << 24 | (

Int8

)s[1] << 16 | (

Int8

)s[2] << 8 | s[3];

1643  for

(

i

= 0;

i

< kNumWords;

i

++) {

1654

word = (w >> shift) &

mask

;

1657  if

(!

PV_TEST

(pv, word, pv_array_bts)) {

1663  if

(*pelem < max_word_count) {

1691  if

(

th

->seq_arg) {

1693  for

(

i

= 0;

i

<

th

->num_threads;

i

++) {

1701  for

(

i

= 0;

i

<

th

->num_threads;

i

++) {

1707  if

(

th

->seq_src) {

1709  for

(

i

= 0;

i

<

th

->num_threads;

i

++) {

1715  if

(

th

->word_counts) {

1717  for

(

i

= 1;

i

<

th

->num_threads;

i

++) {

1718  if

(

th

->word_counts[

i

]) {

1719  if

(

th

->word_counts[

i

]->values) {

1720  free

(

th

->word_counts[

i

]->values);

1722  free

(

th

->word_counts[

i

]);

1728  free

(

th

->word_counts);

1743  if

(num_threads < 1 || !

lookup

|| !seq_src) {

1759  if

(!retval->

itr

) {

1776  for

(

i

= 0;

i

< num_threads;

i

++) {

1789  if

(!retval->

itr

[

i

]) {

1796

1LL << (2 *

lookup

->lut_word_length));

1841  Uint4

in_num_threads,

1842  Uint1

max_word_count)

1846  Int4

num_db_seqs, th_batch;

1860

num_threads =

MIN

(in_num_threads, num_db_seqs);

1872 #pragma omp parallel for if (num_threads > 1) num_threads(num_threads) \ 1873  default(none) shared(num_threads, th_data, lookup, \ 1874  th_batch, max_word_count) private(i) \ 1875  schedule(dynamic, 1) 1877  for

(

i

= 0;

i

< num_threads;

i

++) {

1879  for

(j = 0;j < th_batch;j++) {

1881 #pragma omp critical (get_sequence_for_word_counts) 1885

th_data->

itr

[

i

]);

1919  for

(k = 1;k < num_threads;k++) {

1932  while

(i < th_data->word_counts[0]->length) {

1942  ASSERT

(k < th_data->word_counts[0]->num_elements);

1983

pv_array_bts =

lookup

->pv_array_bts;

1984

word_size =

lookup

->lut_word_length;

1988

pv[0xffffffff >> pv_array_bts] &=

1992  for

(

i

= 1;

i

< 4;

i

++) {

1994  for

(k = 0;k < word_size;k++) {

1995

pv[word >> pv_array_bts] &=

2001  for

(

i

= 0;

i

< 3;

i

++) {

2002  for

(k = 0;k < word_size;k++) {

2003

word = ((0xffffffff ^ (3 << k*2)) | (

i

<< k*2)) & 0xffffffff;

2005

pv[word >> pv_array_bts] &=

2024  Int4

overflow_cells_needed = 0;

2025  Int4

overflow_cursor = 0;

2026  Int4

longest_chain = 0;

2028  const Int4

pv_array_bts =

lookup

->pv_array_bts;

2029  const Int8

kNumWords = 1LL << (2 *

lookup

->lut_word_length);

2030 #ifdef LOOKUP_VERBOSE 2031  Int4

backbone_occupancy = 0;

2032  Int4

thick_backbone_occupancy = 0;

2033  Int4

num_overflows = 0;

2034  Int4

words_per_hash[5] = {0,};

2048

memset(

lookup

->pv, 0, (kNumWords >>

lookup

->pv_array_bts) *

2061  for

(

i

= 0;

i

<

lookup

->backbone_size;

i

++) {

2065  if

(

b

->num_offsets > 0) {

2066  for

(;

b

;

b

=

b

->next) {

2067

num_hits +=

b

->num_offsets;

2075

overflow_cells_needed += num_hits + (num_words * 2);

2077

longest_chain =

MAX

(longest_chain, num_hits);

2080  lookup

->longest_chain = longest_chain;

2083  if

(overflow_cells_needed > 0) {

2089  for

(

i

= 0;

i

<

lookup

->backbone_size;

i

++) {

2092  Int4

num_offsets = 0;

2098  if

(

head

->num_offsets == 0) {

2102 #ifdef LOOKUP_VERBOSE 2103

thick_backbone_occupancy++;

2108  for

(

b

=

head

;

b

;

b

=

b

->next) {

2110

num_offsets +=

b

->num_offsets;

2112 #ifdef LOOKUP_VERBOSE 2113

backbone_occupancy++;

2118 #ifdef LOOKUP_VERBOSE 2119

words_per_hash[((num_words < 6) ? num_words : 5) - 1]++;

2129  for

(

b

=

head

;

b

;

b

=

b

->next, k++) {

2131

cell->

words

[k] =

b

->word;

2148  for

(

b

=

head

;

b

;

b

=

b

->next, k++) {

2149

cell->

words

[k] =

b

->word;

2151

is_overflow =

TRUE

;

2154

is_overflow =

TRUE

;

2160 #ifdef LOOKUP_VERBOSE 2163

cell->

offsets

[0] = overflow_cursor;

2164  for

(

b

=

head

;

b

;

b

=

b

->next) {

2166  lookup

->overflow[overflow_cursor++] = *(

Int4

*)(&

b

->word);

2167  lookup

->overflow[overflow_cursor++] =

b

->num_offsets;

2172  lookup

->overflow[overflow_cursor++] = j - 1;

2176  ASSERT

(overflow_cursor <= overflow_cells_needed);

2184  lookup

->offsets_size = overflow_cursor;

2186 #ifdef LOOKUP_VERBOSE 2187

printf(

"backbone size: %d\n"

,

lookup

->backbone_size);

2188

printf(

"backbone occupancy: %d (%f%%)\n"

, backbone_occupancy,

2189

100.0 * backbone_occupancy /

lookup

->backbone_size);

2190

printf(

"thick_backbone occupancy: %d (%f%%)\n"

,

2191

thick_backbone_occupancy,

2192

100.0 * thick_backbone_occupancy /

lookup

->backbone_size);

2193

printf(

"num_overflows: %d\n"

, num_overflows);

2194

printf(

"\tnumber of words per hash\tcount\n"

);

2197  for

(ii = 0;ii < 5;ii++) {

2198

printf(

"\t%d\t%d\n"

, ii + 1, words_per_hash[ii]);

2201

printf(

"overflow size: %d\n"

, overflow_cells_needed);

2202

printf(

"longest chain: %d\n"

, longest_chain);

2212  if

(

lookup

->masked_locations)

2235  const Int8

kNumWords = (1ULL << 32);

2236  Int4

num_hash_bits = 8;

2237  Int4 i

, num_unique_words = 0;

2246  lookup

->lut_word_length = 16;

2278  for

(

i

= 0;i < kNumWords >>

lookup

->pv_array_bts;

i

++) {

2283  while

(num_hash_bits < 32 &&

2284

(1LL << num_hash_bits) < num_unique_words) {

2288  lookup

->backbone_size = 1 << num_hash_bits;

2292  if

(!thin_backbone) {

2306  lookup

->lut_word_length,

2318  sfree

(thin_backbone);

#define COMPRESSION_RATIO

Compression ratio of nucleotide bases (4 bases in 1 byte)

#define sfree(x)

Safe free a pointer: belongs to a higher level header.

Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...

BLAST filtering functions.

BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)

Deallocate all BlastSeqLoc objects in a chain.

BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)

Create and initialize a new sequence interval.

#define PV_ARRAY_BTS

bits-to-shift from lookup_index to pv_array index.

BackboneCell * BackboneCellFree(BackboneCell *cell)

#define PV_TEST(lookup, index, shift)

Test the bit at position 'index' in the PV array bitfield within 'lookup'.

#define PV_ARRAY_MASK

amount to mask off.

void BlastLookupIndexQueryExactMatches(Int4 **backbone, Int4 word_length, Int4 charsize, Int4 lut_word_length, BLAST_SequenceBlk *query, BlastSeqLoc *locations)

Add all applicable query offsets to a generic lookup table.

void BlastHashLookupIndexQueryExactMatches(BackboneCell *backbone, Int4 *offsets, Int4 word_length, Int4 charsize, Int4 lut_word_length, BLAST_SequenceBlk *query, BlastSeqLoc *locations, TNaLookupHashFunction hash_func, Uint4 mask, Uint4 *pv_array)

Add all applicable query offsets to a hashed lookup table.

#define PV_ARRAY_BYTES

number of BYTES in 'native' type.

#define PV_SET(lookup, index, shift)

Set the bit at position 'index' in the PV array bitfield within 'lookup'.

#define PV_ARRAY_TYPE

The pv_array 'native' type.

#define BLASTERR_MEMORY

System error: out of memory condition.

static Int2 s_NaHashLookupCountWordsInSubject_16_1(const BLAST_SequenceBlk *sequence, BlastNaHashLookupTable *lookup, BlastSparseUint1Array *counts, Uint1 max_word_count)

Scan a subject sequecne and update words counters, for 16-base words with scan step of 1.

BlastSmallNaLookupTable * BlastSmallNaLookupTableDestruct(BlastSmallNaLookupTable *lookup)

Free a small nucleotide lookup table.

Int4 BlastNaLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastNaLookupTable **lut, const LookupTableOptions *opt, const QuerySetUpOptions *query_options, Int4 lut_width)

Create a new nucleotide lookup table.

static Int4 s_BlastSmallNaLookupFinalize(Int4 **thin_backbone, BlastSmallNaLookupTable *lookup, BLAST_SequenceBlk *query)

Pack the data structures comprising a small nucleotide lookup table into their final form.

static Int2 s_FillContigMBTable(BLAST_SequenceBlk *query, BlastSeqLoc *location, BlastMBLookupTable *mb_lt, const LookupTableOptions *lookup_options, Uint1 *counts)

Fills in the hashtable and next_pos fields of BlastMBLookupTable* for the contiguous case.

static Boolean s_HasMaskAtHashEnabled(const QuerySetUpOptions *query_options)

Determine whether mask at hash is enabled from the QuerySetUpOptions.

static NaHashLookupThreadData * NaHashLookupThreadDataFree(NaHashLookupThreadData *th)

BlastMBLookupTable * BlastMBLookupTableDestruct(BlastMBLookupTable *mb_lt)

Deallocate memory used by the Mega BLAST lookup table.

static BlastSeqLoc * s_SeqLocListInvert(const BlastSeqLoc *locations, Int4 length)

Changes the list of locations into a list of the intervals between locations (the inverse).

static Int2 s_ScanSubjectForWordCounts(BlastSeqSrc *seq_src, BlastMBLookupTable *mb_lt, Uint1 *counts, Uint1 max_word_count)

Scan database sequences and count query words that appear in the database.

BlastNaHashLookupTable * BlastNaHashLookupTableDestruct(BlastNaHashLookupTable *lookup)

Free a nucleotide lookup table.

static BlastSparseUint1Array * BlastSparseUint1ArrayFree(BlastSparseUint1Array *array)

static EDiscTemplateType s_GetDiscTemplateType(Int4 weight, Uint1 length, EDiscWordType type)

Convert weight, template length and template type from input options into an MBTemplateType enum.

ELookupTableType BlastChooseNaLookupTable(const LookupTableOptions *lookup_options, Int4 approx_table_entries, Int4 max_q_off, Int4 *lut_width)

choose the type of nucleotide lookup table to be used for a blast search

Int2 BlastMBLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *location, BlastMBLookupTable **mb_lt_ptr, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, Int4 approx_table_entries, Int4 lut_width, BlastSeqSrc *seqsrc)

Create the lookup table for Mega BLAST.

Int4 BlastNaHashLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastNaHashLookupTable **lut, const LookupTableOptions *opt, const QuerySetUpOptions *query_options, BlastSeqSrc *seqsrc, Uint4 num_threads)

struct NaHashLookupThreadData NaHashLookupThreadData

static BlastSparseUint1Array * BlastSparseUint1ArrayNew(Uint4 *bitfield, Int8 len)

static Int2 s_NaHashLookupScanSubjectForWordCounts(BlastSeqSrc *seq_src, BlastNaHashLookupTable *lookup, Uint4 in_num_threads, Uint1 max_word_count)

Scan database sequences and count query words that appear in the database.

static void s_BlastNaHashLookupFinalize(BackboneCell *thin_backbone, Int4 *offsets, BlastNaHashLookupTable *lookup)

Pack the data structures comprising a nucleotide lookup table into their final form.

static Int2 s_NaHashLookupFillPV(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastNaHashLookupTable *lookup)

static Uint4 s_Popcount(Uint4 v)

static Uint1 * BlastSparseUint1ArrayGetElement(BlastSparseUint1Array *array, Int8 index)

static Int4 BlastSparseUint1ArrayGetIndex(BlastSparseUint1Array *array, Int8 index)

#define BLAST2NA_MASK

bitfield used to detect ambiguities in uncompressed nucleotide letters

static Int2 s_FillPV(BLAST_SequenceBlk *query, BlastSeqLoc *location, BlastMBLookupTable *mb_lt, const LookupTableOptions *lookup_options)

static void s_BlastNaLookupFinalize(Int4 **thin_backbone, BlastNaLookupTable *lookup)

Pack the data structures comprising a nucleotide lookup table into their final form.

static Int2 s_NaHashLookupRemovePolyAWords(BlastNaHashLookupTable *lookup)

static Int2 s_MBCountWordsInSubject_16_1(const BLAST_SequenceBlk *sequence, BlastMBLookupTable *mb_lt, Uint1 *counts, Uint1 max_word_count)

Scan a subject sequecne and update words counters, for 16-base words with scan step of 1.

#define BITS_PER_NUC

number of bits in a compressed nucleotide letter

static Int2 s_FillDiscMBTable(BLAST_SequenceBlk *query, BlastSeqLoc *location, BlastMBLookupTable *mb_lt, const LookupTableOptions *lookup_options)

Fills in the hashtable and next_pos fields of BlastMBLookupTable* for the discontiguous case.

static NaHashLookupThreadData * NaHashLookupThreadDataNew(Int4 num_threads, BlastNaHashLookupTable *lookup, BlastSeqSrc *seq_src)

struct BlastSparseUint1Array BlastSparseUint1Array

Sparse array of Uint1 implemented with a bitfield.

static Int2 s_RemovePolyAWords(BlastMBLookupTable *mb_lt)

Int4 BlastSmallNaLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastSmallNaLookupTable **lut, const LookupTableOptions *opt, const QuerySetUpOptions *query_options, Int4 lut_width)

Create a new small nucleotide lookup table.

static Uint4 FNV_hash(Uint1 *seq, Uint4 mask)

BlastNaLookupTable * BlastNaLookupTableDestruct(BlastNaLookupTable *lookup)

Free a nucleotide lookup table.

Routines for creating nucleotide BLAST lookup tables.

#define NA_OFFSETS_PER_HASH

EDiscWordType

General types of discontiguous word templates.

#define NA_WORDS_PER_HASH

static NCBI_INLINE Int4 ComputeDiscontiguousIndex(Uint8 accum, EDiscTemplateType template_type)

Given an accumulator containing packed bases, compute the discontiguous word index specified by templ...

EDiscTemplateType

Enumeration of all discontiguous word templates; the enumerated values encode the weight,...

@ eDiscTemplate_12_18_Optimal

@ eDiscTemplate_11_18_Optimal

@ eDiscTemplateContiguous

@ eDiscTemplate_12_16_Optimal

@ eDiscTemplate_12_16_Coding

@ eDiscTemplate_11_21_Coding

@ eDiscTemplate_11_18_Coding

@ eDiscTemplate_12_21_Coding

@ eDiscTemplate_11_16_Optimal

@ eDiscTemplate_11_21_Optimal

@ eDiscTemplate_12_21_Optimal

@ eDiscTemplate_12_18_Coding

@ eDiscTemplate_11_16_Coding

#define NA_HITS_PER_CELL

maximum number of hits in one lookup table cell

Boolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions *filter_options)

Queries whether masking should be done only for the lookup table or for the entire search.

ELookupTableType

Types of the lookup table.

@ eSmallNaLookupTable

lookup table for blastn with small query

@ eNaLookupTable

blastn lookup table

@ eMBLookupTable

megablast lookup table (includes both contiguous and discontiguous megablast)

@ eNaHashLookupTable

used for 16-base words

Boolean Blast_ProgramIsMapping(EBlastProgramType p)

Int4 BlastSeqSrcIteratorNext(const BlastSeqSrc *seq_src, BlastSeqSrcIterator *itr)

Increments the BlastSeqSrcIterator.

BlastSeqSrcIterator * BlastSeqSrcIteratorFree(BlastSeqSrcIterator *itr)

Frees the BlastSeqSrcIterator structure.

BlastSeqSrcIterator * BlastSeqSrcIteratorNewEx(unsigned int chunk_sz)

Allocate and initialize an iterator over a BlastSeqSrc.

void BlastSeqSrcReleaseSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)

Deallocate individual sequence.

BlastSeqSrc * BlastSeqSrcCopy(const BlastSeqSrc *seq_src)

Copy function: needed to guarantee thread safety.

Int4 BlastSeqSrcGetNumSeqs(const BlastSeqSrc *seq_src)

Get the number of sequences contained in the sequence source.

BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)

Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...

Int2 BlastSeqSrcGetSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)

Retrieve an individual sequence.

#define BLAST_SEQSRC_EOF

No more sequences available.

void BlastSeqSrcResetChunkIterator(BlastSeqSrc *seq_src)

Reset the internal "bookmark" of the last chunk for iteration provided by this object.

Various auxiliary BLAST utility functions.

BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)

Deallocate memory for a sequence block.

Int2 BlastCompressBlastnaSequence(BLAST_SequenceBlk *seq_blk)

Adds a specialized representation of sequence data to a sequence block.

ncbi::TMaskedQueryRegions mask

static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

static int lookup(const char *name, const struct lookup_int *table)

static const char location[]

@ eBlastEncodingProtein

NCBIstdaa.

uint8_t Uint1

1-byte (8-bit) unsigned integer

int16_t Int2

2-byte (16-bit) signed integer

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

int64_t Int8

8-byte (64-bit) signed integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n th

Utility functions for lookup table generation.

Int4 ilog2(Int8 x)

Integer base two logarithm.

#define MIN(a, b)

returns smaller of a and b.

Uint1 Boolean

bool replacment for C

#define TRUE

bool replacment for C indicating true.

#define FALSE

bool replacment for C indicating false.

#define ASSERT

macro for assert.

#define MAX(a, b)

returns larger of a and b.

static PCRE2_SIZE * offsets

Structure to hold a sequence.

Int4 length

Length of sequence.

Uint1 * sequence

Sequence used for search (could be translation).

Thin backbone cell for nucleotide lookup table with hashed words.

The lookup table structure used for Mega BLAST.

Int4 num_words_added

Number of words added to the l.t.

Int4 lut_word_length

number of letters in a lookup table word

Int4 pv_array_bts

The exponent of 2 by which pv_array is smaller than the backbone.

BlastSeqLoc * masked_locations

masked locations, only non-NULL for soft-masking.

Int4 * next_pos2

Extra positions for the second template.

Int4 * hashtable2

Array of positions for second template.

Int4 * hashtable

Array of positions.

Int4 num_unique_pos_added

Number of positions added to the l.t.

PV_ARRAY_TYPE * pv_array

Presence vector, used for quick presence check.

Boolean stride

is lookup table created with a stride

Int8 hashsize

= 4^(lut_word_length)

EDiscTemplateType template_type

Type of the discontiguous word template.

Int4 scan_step

Step size for scanning the database.

Int4 longest_chain

Largest number of query positions for a given word.

Int4 word_length

number of exact letter matches that will trigger an ungapped extension

Boolean discontiguous

Are discontiguous words used?

Int4 * next_pos

Extra positions stored here.

Boolean two_templates

Use two templates simultaneously.

EDiscTemplateType second_template_type

Type of the second discontiguous word template.

Int4 template_length

Length of the discontiguous word template.

The basic lookup table structure for blastn searches.

Used to hold a set of positions, mostly used for filtering.

SSeqRange * ssr

location data on the sequence.

struct BlastSeqLoc * next

next in linked list

Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...

Int4 oid

Oid in BLAST database, index in an array of sequences, etc [in].

EBlastEncoding encoding

Encoding of sequence, i.e.

BLAST_SequenceBlk * seq

Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...

Complete type definition of Blast Sequence Source Iterator.

Complete type definition of Blast Sequence Source ADT.

Lookup table structure for blastn searches with small queries.

Sparse array of Uint1 implemented with a bitfield.

Uint1 * values

array of values for present indices

Uint4 num_elements

number of values present in the array

Int4 * counts

cumulative number of bits set

Uint4 length

length of the bitfield

Uint4 * bitfield

bitfield with bits set for present indices

Options needed to construct a lookup table Also needed: query sequence and query length.

Int4 word_size

Determines the size of the lookup table.

Uint1 max_db_word_count

words with larger frequency in the database will be masked in the lookup table, if the db_filter opto...

Boolean db_filter

scan the database and include only words that appear in the database between 1 and 9 times (currently...

EBlastProgramType program_number

indicates blastn, blastp, etc.

Int4 mb_template_type

Type of a discontiguous word template.

Uint4 stride

number of words to skip after collecting each word

Int4 mb_template_length

Length of the discontiguous words.

Structure defining one cell of the compacted lookup table.

Int1 num_offsets[3]

number of offsets for each word if there are fewer than 3

Uint4 words[3]

words stored under this hash value

Int4 offsets[9]

offset locations for each word

Int1 num_words

number of words stored under the same hash value

BlastSeqSrcIterator ** itr

BlastSparseUint1Array ** word_counts

BlastSeqSrcGetSeqArg * seq_arg

structure defining one cell of the compacted lookup table

Options required for setting up the query sequence.

char * filter_string

DEPRECATED, filtering options above.

SBlastFilterOptions * filtering_options

structured options for all filtering offered from algo/blast/core for BLAST.

Int4 left

left endpoint of range (zero based)

Int4 right

right endpoint of range (zero based)

voidp calloc(uInt items, uInt size)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4