(q_off == q_pos)
return TRUE;
90 if(index == q_pos)
return TRUE;
91 if(index == -1 || index >= 0)
return FALSE;
94index = overflow[src_off++];
96 if(index == q_pos)
return TRUE;
97index = overflow[src_off++];
119index &= (
lookup->mask);
125num_hits =
lookup->thick_backbone[index].num_used;
128 lookup->thick_backbone[index].payload.entries :
129 lookup->overflow +
lookup->thick_backbone[index].payload.overflow_cursor;
131 for(
i=0;
i<num_hits; ++
i) {
132 if(lookup_pos[
i] == q_pos)
return TRUE;
157 Uint1*subject0, *sf, *q_beg, *q_end, *s, *start;
158 Int2remainder, base;
159 Int4q_avail, s_avail;
161base = 3 - (s_off % 4);
164q_avail =
query->length - q_off;
165s_avail =
subject->length - s_off;
167q = q_beg = q_end =
query->sequence + q_off;
189 while((s > start) || (s == start && base < remainder)) {
201}
else if(sum < X) {
207ungapped_data->
s_start= s_off - (q_off - ungapped_data->
q_start);
209 if(q_avail < s_avail) {
218q =
query->sequence + q_off;
225 while(s < sf || (s == sf && base > remainder)) {
230X_current = (-score > X) ? -score : X;
232}
else if(sum < X_current)
241ungapped_data->
length= (
Int4)(q_end - q_beg);
242ungapped_data->
score= score;
266 const Int4* score_table,
Int4reduced_cutoff)
282q_ext = q_off +
len;
283s_ext = s_off +
len;
292 for(
i= 0;
i<
len; s--, q -= 4,
i++) {
293 Uint1s_byte = s[-1];
294 Uint1q_byte = (q[-4] << 6) | (q[-3] << 4) | (q[-2] << 2) | q[-1];
296sum += score_table[q_byte ^ s_byte];
309ungapped_data->
q_start= (
Int4)(new_q - q_start);
310ungapped_data->
s_start= s_ext - (q_ext - ungapped_data->
q_start);
322 for(
i= 0;
i<
len; s++, q += 4,
i++) {
324 Uint1q_byte = (q[0] << 6) | (q[1] << 4) | (q[2] << 2) | q[3];
326sum += score_table[q_byte ^ s_byte];
337 if(score >= reduced_cutoff) {
341s_off, X, ungapped_data);
345ungapped_data->
score= score;
348ungapped_data->
q_start+ 1) );
372 if(
table->chain[index].diag == diag) {
373*level =
table->chain[index].level;
374*hit_len =
table->chain[index].hit_len;
375*hit_saved =
table->chain[index].hit_saved;
379index =
table->chain[index].next;
409 if(
table->chain[index].diag == diag) {
410 table->chain[index].level = level;
411 table->chain[index].hit_len =
len;
412 table->chain[index].hit_saved = hit_saved;
419 table->chain[index].diag = diag;
420 table->chain[index].level = level;
421 table->chain[index].hit_len =
len;
422 table->chain[index].hit_saved = hit_saved;
426index =
table->chain[index].next;
433 table->capacity *= 2;
442cell->
level= level;
462 Int4lut_word_length,
473index = (s[0] << 24 | s[1] << 16 | s[2] << 8) >> shift;
479index = (s[0] << 24 | s[1] << 16 ) >> shift;
485index = (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) >> shift;
489(lookup_wrap, index, q_pos));
515 Uint4lut_word_length,
521 Int4ext_to, ext_max;
522 Int4q_end = *q_off + word_length;
523 Int4s_end = *s_off + word_length;
531 if(word_length == lut_word_length)
return1;
542s_end - lut_word_length,
544q_end - lut_word_length))
return0;
547 for(;
TRUE; ++(*s_off), ++(*q_off)) {
549*s_off, lut_word_length, *q_off))
break;
553ext_to = word_length - (q_end - (*q_off));
554ext_max =
MIN(q_range - q_end, s_range - s_end);
557 if(ext_to || locations) {
559 if(ext_to > ext_max)
return0;
563 for(s_pos = s_end - lut_word_length,
564q_pos = q_end - lut_word_length;
566s_pos -= lut_word_length,
567q_pos -= lut_word_length) {
569s_pos, lut_word_length, q_pos))
return0;
572(*extended) = ext_to;
576 if(!check_double)
return1;
581ext_to += word_length;
582ext_max =
MIN(ext_max, ext_to);
585 for(s_pos = s_end, q_pos = q_end;
586*extended + lut_word_length <= ext_max;
587s_pos += lut_word_length,
588q_pos += lut_word_length,
589(*extended) += lut_word_length) {
591lut_word_length, q_pos))
break;
595s_pos -= (lut_word_length - 1);
596q_pos -= (lut_word_length - 1);
597 while(*extended < ext_max) {
599lut_word_length, q_pos))
return1;
605 return((ext_max == ext_to) ? 2 : 1);
637 Int4word_length,
Int4lut_word_length,
645 Int4diag, real_diag;
646 Int4s_end, s_off_pos, s_end_pos;
653 Int4last_hit, hit_saved;
664real_diag = diag & diag_table->
diag_mask;
665last_hit = hit_level_array[real_diag].
last_hit;
666hit_saved = hit_level_array[real_diag].
flag;
667s_end = s_off + word_length;
668s_off_pos = s_off + diag_table->
offset;
669s_end_pos = s_end + diag_table->
offset;
672 if(s_off_pos < last_hit)
return0;
674 if(two_hits && (hit_saved || s_end_pos > last_hit +
window_size)) {
678query_mask, query_info, s_range,
679word_length, lut_word_length, lut,
TRUE, &extended);
680 if(!word_type)
return0;
683s_end_pos += extended;
686 if(word_type == 1) {
690 Int4s_b = s_end_pos - 2 * word_length;
692 if(Delta < 0) Delta = 0;
698&& off_s_end -
delta>= s_a
699&& off_s_end - off_s_l <= s_b) {
704off_s_end = hit_level_array[off_diag].
last_hit;
708&& off_s_end - off_s_l +
delta<= s_b) {
718}
else if(check_masks) {
721query_mask, query_info, s_range,
722word_length, lut_word_length, lut,
FALSE, &extended))
return0;
725s_end_pos += extended;
732ungapped_data = &dummy_ungapped_data;
744s_off, -(cutoffs->
x_dropoff), ungapped_data);
755*final_data = *ungapped_data;
763ungapped_data =
NULL;
768hit_level_array[real_diag].
last_hit= s_end_pos;
769hit_level_array[real_diag].
flag= hit_ready;
771diag_table->
hit_len_array[real_diag] = (hit_ready) ? 0 : s_end_pos - s_off_pos;
805 Int4word_length,
Int4lut_word_length,
814 Int4s_end, s_off_pos, s_end_pos, s_l;
821 Int4last_hit, hit_saved = 0;
828diag = s_off - q_off;
829s_end = s_off + word_length;
830s_off_pos = s_off + hash_table->
offset;
831s_end_pos = s_end + hash_table->
offset;
836 if(!rc) last_hit = 0;
839 if(s_off_pos < last_hit)
return0;
841 if(two_hits && (hit_saved || s_end_pos > last_hit +
window_size)) {
845query_mask, query_info, s_range,
846word_length, lut_word_length, lut,
TRUE, &extended);
847 if(!word_type)
return0;
850s_end_pos += extended;
853 if(word_type == 1) {
856 Int4s_b = s_end_pos - 2 * word_length;
858 if(Delta < 0) Delta = 0;
862 Int4off_hit_saved = 0;
864&off_s_end, &off_s_l, &off_hit_saved);
867&& off_s_end -
delta>= s_a
868&& off_s_end - off_s_l <= s_b) {
873&off_s_end, &off_s_l, &off_hit_saved);
877&& off_s_end - off_s_l +
delta<= s_b) {
887}
else if(check_masks) {
890query_mask, query_info, s_range,
891word_length, lut_word_length, lut,
FALSE, &extended))
return0;
894s_end_pos += extended;
902ungapped_data = &dummy_ungapped_data;
914s_off, -(cutoffs->
x_dropoff), ungapped_data);
926*final_data = *ungapped_data;
934ungapped_data =
NULL;
940(hit_ready) ? 0 : s_end_pos - s_off_pos,
975 Int4hits_extended = 0;
983check_masks = !lut->
stride;
996 for(; index < num_hits; ++index) {
1003query_info, s_range,
1004word_length, word_length,
1006word_params, matrix,
1013 for(; index < num_hits; ++index) {
1020query_info, s_range,
1021word_length, word_length,
1023word_params, matrix,
1029 returnhits_extended;
1063 Int4hits_extended = 0;
1064 Int4word_length, lut_word_length, ext_to;
1073check_masks = !lut->
stride;
1081ext_to = word_length - lut_word_length;
1093 for(; index < num_hits; ++index) {
1102 Int4s_off = s_offset;
1106 for(; ext_left <
MIN(ext_to, s_offset); ++ext_left) {
1120 if(ext_left < ext_to) {
1122s_off = s_offset + lut_word_length;
1123 if(s_off + ext_to - ext_left > s_range)
1125q =
query->sequence + q_offset + lut_word_length;
1128 for(; ext_right < ext_to - ext_left; ++ext_right) {
1139 if(ext_left + ext_right < ext_to)
1143q_offset -= ext_left;
1144s_offset -= ext_left;
1153query_info, s_range,
1154word_length, lut_word_length,
1156word_params, matrix,
1164query_info, s_range,
1165word_length, lut_word_length,
1167word_params, matrix,
1173 returnhits_extended;
1206 Int4hits_extended = 0;
1207 Int4word_length, lut_word_length, ext_to;
1216check_masks = !lut->
stride;
1224ext_to = word_length - lut_word_length;
1236 for(; index < num_hits; ++index) {
1246 Int4ext_max =
MIN(ext_to, s_offset);
1250 for(; ext_left < ext_max; s--, q -= 4, ++ext_left) {
1253 if((
byte& 3) != q[-1] || ++ext_left == ext_max)
1255 if(((
byte>> 2) & 3) != q[-2] || ++ext_left == ext_max)
1257 if(((
byte>> 4) & 3) != q[-3] || ++ext_left == ext_max)
1259 if((
byte>> 6) != q[-4])
1267 if(ext_left < ext_to) {
1269ext_max = ext_to -ext_left;
1270 if(s_offset + lut_word_length + ext_max > s_range)
1272q =
query->sequence + q_offset + lut_word_length;
1275 for(; ext_right < ext_max; s++, q += 4, ++ext_right) {
1278 if((
byte>> 6) != q[0] || ++ext_right == ext_max)
1280 if(((
byte>> 4) & 3) != q[1] || ++ext_right == ext_max)
1282 if(((
byte>> 2) & 3) != q[2] || ++ext_right == ext_max)
1284 if((
byte& 3) != q[3])
1289 if(ext_left + ext_right < ext_to)
1293q_offset -= ext_left;
1294s_offset -= ext_left;
1303query_info, s_range,
1304word_length, lut_word_length,
1306word_params, matrix,
1314query_info, s_range,
1315word_length, lut_word_length,
1317word_params, matrix,
1323 returnhits_extended;
13314, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13322, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13332, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13342, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13353, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13362, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13372, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13382, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13393, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13402, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13412, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13422, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13433, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13442, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13452, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13462, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
13544, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
13551, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13561, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13571, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1392 Int4hits_extended = 0;
1396 Int4ext_to = word_length - lut_word_length;
1400 for(; index < num_hits; ++index) {
1415 if( (s_offset > 0) && (q_offset > 0) ) {
1416 Uint1q_byte = q[q_offset - 4];
1419ext_left =
MIN(
MIN(ext_left, ext_to), q_offset - q_start);
1424 if((ext_left < ext_to) && ((q_offset + lut_word_length) <
query->length)) {
1425 Uint1q_byte = q[q_offset + lut_word_length];
1428ext_right =
MIN(
MIN(ext_right, s_range - (s_offset + lut_word_length)),
1429q_range - (q_offset + lut_word_length));
1430 if(ext_left + ext_right < ext_to)
1434q_offset -= ext_left;
1435s_offset -= ext_left;
1441query_info, s_range,
1442word_length, lut_word_length,
1444word_params, matrix,
1453query_info, s_range,
1454word_length, lut_word_length,
1456word_params, matrix,
1462 returnhits_extended;
1497 Int4hits_extended = 0;
1504 for(; index < num_hits; ++index) {
1514 Int4ext_max =
MIN(
MIN(word_length - lut_word_length, s_offset), q_offset - q_start);
1532 while(ext_left < ext_max) {
1533 Uint1q_byte = q[q_off - 4];
1542ext_left =
MIN(ext_left, ext_max);
1549ext_max =
MIN(
MIN(word_length - ext_left, s_range - s_off), q_range - q_off);
1550 while(ext_right < ext_max) {
1551 Uint1q_byte = q[q_off];
1560ext_right =
MIN(ext_right, ext_max);
1562 if(ext_left + ext_right < word_length)
1565q_offset -= ext_left;
1566s_offset -= ext_left;
1572query_info, s_range,
1573word_length, lut_word_length,
1575word_params, matrix,
1583query_info, s_range,
1584word_length, lut_word_length,
1586word_params, matrix,
1592 returnhits_extended;
1609 Int4hitsfound, total_hits = 0;
1610 Int4hits_extended = 0;
1615 Int4lut_word_length;
1620word_length =
lookup->word_length;
1621lut_word_length =
lookup->lut_word_length;
1629word_length =
lookup->template_length;
1630lut_word_length =
lookup->template_length;
1632word_length =
lookup->word_length;
1633lut_word_length =
lookup->lut_word_length;
1641word_length =
lookup->word_length;
1642lut_word_length =
lookup->lut_word_length;
1649scan_range[2] =
subject->length - lut_word_length;
1666scan_range[1] =
subject->seq_ranges[0].left + word_length - lut_word_length;
1667scan_range[2] =
subject->seq_ranges[0].right - lut_word_length;
1675hitsfound = scansub(lookup_wrap,
subject, offset_pairs, max_hits, &scan_range[1]);
1680total_hits += hitsfound;
1681hits_extended += extend(offset_pairs, hitsfound, word_params,
1683query_info, ewp, init_hitlist, scan_range[2] + lut_word_length);
1689init_hitlist->
total);
1731 if( check_oid( oid, &last_vol_idx ) ==
eNotIndexed) {
1733 subject,
query, query_info, lookup_wrap, matrix,word_params,
1734ewp, offset_pairs, max_hits, init_hitlist, ungapped_stats );
1743hsp_end = hsp + init_hitlist->
total;
1745 for( ; hsp < hsp_end; ++hsp ) {
1748diag =
IR_DIAG( q_off, s_off );
1757q_off, s_off + word_size, s_off,
1758-(cutoffs->
x_dropoff), &dummy_ungapped_data,
1765*ungapped_data = dummy_ungapped_data;
1766 if( new_hsp != hsp ) *new_hsp = *hsp;
1776 if( new_hsp != hsp ) *new_hsp = *hsp;
1896 for(
i= 1;
i< num_arrays;
i++) {
1946 Int4hitsfound, total_hits = 0;
1947 Int4hits_extended = 0;
1951 Int4lut_word_length;
1957 if(*hsp_list_ptr ==
NULL) {
1962hsp_list = *hsp_list_ptr;
1972word_length =
lookup->word_length;
1973lut_word_length =
lookup->lut_word_length;
1980word_length =
lookup->template_length;
1981lut_word_length =
lookup->template_length;
1983word_length =
lookup->word_length;
1984lut_word_length =
lookup->lut_word_length;
1992word_length =
lookup->word_length;
1993lut_word_length =
lookup->lut_word_length;
1999word_length =
lookup->word_length;
2000lut_word_length =
lookup->lut_word_length;
2006scan_range[2] =
subject->length - lut_word_length;
2018scan_range[1] =
subject->seq_ranges[0].left + word_length - lut_word_length;
2019scan_range[2] =
subject->seq_ranges[0].right - lut_word_length;
2029 if(getenv(
"MAPPER_USE_SMALL_WORDS")) {
2035hitsfound = scansub(lookup_wrap,
subject, offset_pairs, max_hits, &scan_range[1]);
2037 if(hitsfound >= 0) {
2045 for(
i= 0;
i< hitsfound;
i++) {
2051 Int4diag = s_off - q_off;
2066 if(last_p != 0 && last_d == diag &&
2067s_off - last_p < lut_word_length + 1) {
2071 ASSERT(index < word_hits->num_arrays);
2079word_hits->
num[index],
2080word_params, score_params,
2087scan_range[2] + lut_word_length,
2090word_hits->
num[index] = 0;
2102total_hits += hitsfound;
2104word_params, score_params,
2111scan_range[2] + lut_word_length,
2117 if(!read_is_query) {
2126 if(word_hits->
num[
i] > 0) {
2128word_hits->
num[
i],
2129word_params, score_params,
2136scan_range[2] + lut_word_length,
2140word_hits->
num[
i] = 0;
2196 if( check_oid( oid, &last_vol_idx ) ==
eNotIndexed) {
2198 subject,
query, query_info, lookup_wrap, word_params,
2199score_params, hit_params, offset_pairs, word_hits, max_hits,
2200gap_align, init_hitlist, hsp_list, ungapped_stats,
2207 if(*hsp_list ==
NULL) {
2212 if( word_size > 0) {
2216hsp_end = hsp + init_hitlist->
total;
2218 for( ; hsp < hsp_end; ++hsp ) {
2223diag =
IR_DIAG( q_off, s_off );
2233 Uint1* query_seq =
query->sequence + query_start;
2237 Int4num_identical = 0;
2238 Int4right_ungapped_ext_len = 0;
2245q_off - query_start,
2250&right_ungapped_ext_len);
2268q_off - query_start, s_off,
#define COMPRESSION_RATIO
Compression ratio of nucleotide bases (4 bases in 1 byte)
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
void Blast_UngappedStatsUpdate(BlastUngappedStats *ungapped_stats, Int4 total_hits, Int4 extended_hits, Int4 saved_hits)
Fill data in the ungapped hits diagnostics structure.
Boolean BLAST_SaveInitialHit(BlastInitHitList *init_hitlist, Int4 q_off, Int4 s_off, BlastUngappedData *ungapped_data)
Save the initial hit data into the initial hit list structure.
void Blast_InitHitListSortByScore(BlastInitHitList *init_hitlist)
Sort array of initial HSPs by score.
Int2 Blast_ExtendWordExit(Blast_ExtendWord *ewp, Int4 subject_length)
Update the word extension structure after scanning of each subject sequence.
#define DIAGHASH_NUM_BUCKETS
Number of hash buckets in BLAST_DiagHash.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSPMappingInfo * BlastHSPMappingInfoNew(void)
Allocate memory for an HSP's additional data structure.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
#define PV_ARRAY_BTS
bits-to-shift from lookup_index to pv_array index.
#define PV_TEST(lookup, index, shift)
Test the bit at position 'index' in the PV array bitfield within 'lookup'.
#define PV_ARRAY_TYPE
The pv_array 'native' type.
Routines for creating nucleotide BLAST lookup tables.
#define NA_HITS_PER_CELL
maximum number of hits in one lookup table cell
Routines for scanning nucleotide BLAST lookup tables.
void * BlastChooseNucleotideScanSubjectAny(LookupTableWrap *lookup_wrap)
Return the most generic function to scan through nucleotide subject sequences.
Int4(* TNaScanSubjectFunction)(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *subject, BlastOffsetPair *offset_pairs, Int4 max_hits, Int4 *scan_range)
Generic prototype for nucleotide subject scanning routines.
@ eSmallNaLookupTable
lookup table for blastn with small query
@ eMBLookupTable
megablast lookup table (includes both contiguous and discontiguous megablast)
@ eNaHashLookupTable
used for 16-base words
@ eDiagHash
use hash table (blastn only)
Int4 BSearchContextInfo(Int4 n, const BlastQueryInfo *A)
Search BlastContextInfo structures for the specified offset.
Various auxiliary BLAST utility functions.
#define NCBI2NA_UNPACK_BASE(x, N)
Macro to extract base N from a byte x (N >= 0, N < 4)
static void get_results(DBPROCESS *dbproc, int start)
static int lookup(const char *name, const struct lookup_int *table)
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
ir_diag_hash * ir_hash_create(void)
Hash table constructor.
ir_diag_hash * ir_hash_destroy(ir_diag_hash *hash)
Hash table destructor.
Declarations of structures needed to implement diagonal hash to support ungapped extensions for index...
#define IR_LOCATE(hash, diag, key)
Find a hash table entry for the given diagonal.
#define IR_KEY(diag)
Compute the hash key from a diagonal identifier.
#define IR_DIAG(qoff, soff)
Compute diagonal identifier from subject and query offsets.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
Int4 BlastNaExtendJumper(BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, BlastQueryInfo *query_info, BlastGapAlignStruct *gap_align, BlastHSPList *hsp_list, Uint4 s_range, SubjectIndex *s_index)
Extend a list of word hits.
int JumperGappedAlignmentCompressedWithTraceback(const Uint1 *query, const Uint1 *subject, Int4 query_length, Int4 subject_length, Int4 query_start, Int4 subject_start, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 *num_identical, Int4 *right_ungapped_ext_len)
Jumper gapped alignment with traceback; 1 base per byte in query, 4 bases per byte in subject.
int JumperFindSpliceSignals(BlastHSP *hsp, Int4 query_len, const Uint1 *subject, Int4 subject_len)
Find splice signals at the edges of an HSP and save them in the HSP.
Boolean JumperGoodAlign(const BlastGapAlignStruct *gap_align, const BlastHitSavingParameters *hit_params, Int4 num_identical, BlastContextInfo *context_info)
Test whether an HSP should be saved.
SubjectIndex * SubjectIndexNew(BLAST_SequenceBlk *subject, Int4 width, Int4 word_size)
Index a sequence, used for indexing compressed nucleotide subject sequence.
SubjectIndex * SubjectIndexFree(SubjectIndex *sindex)
Free subject index structure.
JumperEditsBlock * JumperFindEdits(const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align)
GapEditScript * JumperPrelimEditBlockToGapEditScript(JumperPrelimEditBlock *rev_prelim_block, JumperPrelimEditBlock *fwd_prelim_block)
Convert Jumper's preliminary edit script to GapEditScript.
#define SUBJECT_INDEX_WORD_LENGTH
for(len=0;yy_str[len];++len)
if(yy_accept[yy_current_state])
Boolean(* T_Lookup_Callback)(const LookupTableWrap *, Int4, Int4)
Function pointer type to check the presence of index->q_off pair.
static NCBI_INLINE Boolean s_DetermineScanningOffsets(const BLAST_SequenceBlk *subject, Int4 word_length, Int4 lut_word_length, Int4 *range)
Determines the scanner's offsets taking the database masking restrictions into account (if any).
Declarations for functions that extract hits from indexed blast databases (specialized for megablast)
#define LAST_VOL_IDX_NULL
int(* T_MB_IdbCheckOid)(Int4 oid, Int4 *last_vol_id)
Function pointer type to check index seeds availability for oid.
unsigned long(* T_MB_IdbGetResults)(Int4 oid, Int4 chunk, BlastInitHitList *init_hitlist)
Function pointer type to retrieve hits from an indexed database.
static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash *table, Int4 diag, Int4 *level, Int4 *hit_len, Int4 *hit_saved)
Attempt to retrieve information associated with diagonal diag.
static Boolean s_MBLookup(const LookupTableWrap *lookup_wrap, Int4 index, Int4 q_pos)
Check to see if an index->q_pos pair exists in MB lookup table.
static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash *table, Int4 diag, Int4 level, Int4 len, Int4 hit_saved, Int4 s_off, Int4 window_size)
Attempt to store information associated with diagonal diag.
static Int4 s_BlastSmallNaExtendAlignedOneByte(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform exact match extensions on the hits retrieved from small-query lookup tables.
static Int4 s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 q_off, Int4 s_off, BlastSeqLoc *query_mask, BlastQueryInfo *query_info, Int4 s_range, Int4 word_length, Int4 lut_word_length, const LookupTableWrap *lut, const BlastInitialWordParameters *word_params, Int4 **matrix, BLAST_DiagTable *diag_table, BlastInitHitList *init_hitlist, Boolean check_masks)
Perform ungapped extension given an offset pair, and save the initial hit information if the hit qual...
void BlastChooseNaExtend(LookupTableWrap *lookup_wrap)
Choose the best routine to use for creating ungapped alignments.
static NCBI_INLINE Boolean s_IsSeedMasked(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *subject, Int4 s_off, Int4 lut_word_length, Int4 q_pos)
Test to see if seed->q_off exists in lookup table.
static Int4 s_BlastNaExtendAligned(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform exact match extensions on the hits retrieved from blastn/megablast lookup tables,...
static Int4 s_BlastNaExtend(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform exact match extensions on the hits retrieved from blastn/megablast lookup tables,...
static Int4 s_BlastSmallNaExtend(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform exact match extensions on the hits retrieved from small-query blastn lookup tables,...
static void s_NuclUngappedExtend(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, Int4 q_off, Int4 s_match_end, Int4 s_off, Int4 X, BlastUngappedData *ungapped_data, const Int4 *score_table, Int4 reduced_cutoff)
Perform ungapped extension of a word hit.
Int2 BlastNaWordFinder(BLAST_SequenceBlk *subject, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, LookupTableWrap *lookup_wrap, Int4 **matrix, const BlastInitialWordParameters *word_params, Blast_ExtendWord *ewp, BlastOffsetPair *offset_pairs, Int4 max_hits, BlastInitHitList *init_hitlist, BlastUngappedStats *ungapped_stats)
Find all words for a given subject sequence and perform ungapped extensions, assuming ordinary blastn...
static Boolean s_NaLookup(const LookupTableWrap *lookup_wrap, Int4 index, Int4 q_pos)
Check to see if an index->q_pos pair exists in Na lookup table.
static void s_NuclUngappedExtendExact(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, Int4 q_off, Int4 s_off, Int4 X, BlastUngappedData *ungapped_data)
Perform ungapped extension of a word hit, using a score matrix and extending one base at a time.
Int2 JumperNaWordFinder(BLAST_SequenceBlk *subject, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, LookupTableWrap *lookup_wrap, const BlastInitialWordParameters *word_params, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, BlastOffsetPair *offset_pairs, MapperWordHits *word_hits, Int4 max_hits, BlastGapAlignStruct *gap_align, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list_ptr, BlastUngappedStats *ungapped_stats, BlastGappedStats *gapped_stats)
MapperWordHits * MapperWordHitsFree(MapperWordHits *wh)
static const Uint1 s_ExactMatchExtendRight[256]
Entry i of this list gives the number of pairs of bits that are zero in the bit pattern of i,...
static Int4 s_BlastNaExtendDirect(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform ungapped extensions on the hits retrieved from blastn/megablast lookup tables,...
static Int4 s_TypeOfWord(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 *q_off, Int4 *s_off, BlastSeqLoc *locations, BlastQueryInfo *query_info, Uint4 s_range, Uint4 word_length, Uint4 lut_word_length, const LookupTableWrap *lookup_wrap, Boolean check_double, Int4 *extended)
Check the mini-extended word against masked query regions, and do right extension if necessary.
MapperWordHits * MapperWordHitsNew(const BLAST_SequenceBlk *query, const BlastQueryInfo *query_info)
Int2 ShortRead_IndexedWordFinder(BLAST_SequenceBlk *subject, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, LookupTableWrap *lookup_wrap, const BlastInitialWordParameters *word_params, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, BlastOffsetPair *offset_pairs, MapperWordHits *word_hits, Int4 max_hits, BlastGapAlignStruct *gap_align, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list, BlastUngappedStats *ungapped_stats, BlastGappedStats *gapped_stats)
static const Uint1 s_ExactMatchExtendLeft[256]
Entry i of this list gives the number of pairs of bits that are zero in the bit pattern of i,...
static Boolean s_SmallNaLookup(const LookupTableWrap *lookup_wrap, Int4 index, Int4 q_pos)
Check to see if an index->q_pos pair exists in SmallNa lookup table.
Int2 MB_IndexedWordFinder(BLAST_SequenceBlk *subject, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, LookupTableWrap *lookup_wrap, Int4 **matrix, const BlastInitialWordParameters *word_params, Blast_ExtendWord *ewp, BlastOffsetPair *offset_pairs, Int4 max_hits, BlastInitHitList *init_hitlist, BlastUngappedStats *ungapped_stats)
Finds all runs of a specified number of exact matches between two nucleotide sequences.
static Int4 s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 q_off, Int4 s_off, BlastSeqLoc *query_mask, BlastQueryInfo *query_info, Int4 s_range, Int4 word_length, Int4 lut_word_length, const LookupTableWrap *lut, const BlastInitialWordParameters *word_params, Int4 **matrix, BLAST_DiagHash *hash_table, BlastInitHitList *init_hitlist, Boolean check_masks)
Perform ungapped extension given an offset pair, and save the initial hit information if the hit qual...
Nucleotide ungapped extension code.
Int4(* TNaExtendFunction)(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Int4 range)
Signature of function used to compute ungapped alignments.
const struct ncbi::grid::netcache::search::fields::KEY key
#define MIN(a, b)
returns smaller of a and b.
#define NCBI_INLINE
"inline" seems to work on our remaining in-house compilers (WorkShop, Compaq, ICC,...
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ASSERT
macro for assert.
#define INT4_MIN
Smallest (most negative) number represented by signed int.
#define MAX(a, b)
returns larger of a and b.
Int4 delta(size_t dimension_, const Int4 *score_)
Track initial word matches using hashing with chaining.
Int4 offset
"offset" added to query and subject position so that "last_hit" doesn't have to be zeroed out every t...
Structure containing parameters needed for initial word extension.
DiagStruct * hit_level_array
Array to hold latest hits and their lengths for all diagonals.
Int4 diag_array_length
Smallest power of 2 longer than query length.
Int4 diag_mask
Used to mask off everything above min_diag_length (mask = min_diag_length-1).
Uint1 * hit_len_array
Array to hold the lengthof the latest hit.
Int4 offset
"offset" added to query and subject position so that "last_hit" doesn't have to be zeroed out every t...
Structure to hold a sequence.
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Structure supporting the gapped alignment.
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
JumperGapAlign * jumper
data for jumper alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastHSPMappingInfo * map_info
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastHitSavingOptions * options
The original (unparsed) options.
Structure to hold the initial HSP information.
BlastUngappedData * ungapped_data
Pointer to a structure holding ungapped alignment information.
BlastOffsetPair offsets
Offsets in query and subject, or, in PHI BLAST, start and end of pattern in subject.
Structure to hold all initial HSPs for a given subject sequence.
Int4 total
Total number of hits currently saved.
BlastInitHSP * init_hsp_array
Array of offset pairs, possibly with scores.
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 window_size
Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.
Int4 scan_range
Maximal number of gaps allowed between 2 hits.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
BlastUngappedCutoffs * cutoffs
cutoff values (one per context)
Boolean ungapped_extension
Should an ungapped extension be performed?
BlastInitialWordOptions * options
The original (unparsed) options.
Boolean matrix_only_scoring
Use the scoring matrix ( not table ) to score ungapped and gapped alignments -RMH-.
ESeedContainerType container_type
How to store offset pairs for initial seeds?
Int4 nucl_score_table[256]
the combined score of all match/mismatch combinations for aligning four bases
The lookup table structure used for Mega BLAST.
Int4 lut_word_length
number of letters in a lookup table word
Int4 pv_array_bts
The exponent of 2 by which pv_array is smaller than the backbone.
BlastSeqLoc * masked_locations
masked locations, only non-NULL for soft-masking.
Int4 * hashtable
Array of positions.
PV_ARRAY_TYPE * pv_array
Presence vector, used for quick presence check.
Boolean stride
is lookup table created with a stride
Int8 hashsize
= 4^(lut_word_length)
Int4 scan_step
Step size for scanning the database.
Int4 word_length
number of exact letter matches that will trigger an ungapped extension
Boolean discontiguous
Are discontiguous words used?
Int4 * next_pos
Extra positions stored here.
void * extend_callback
function for extending hits
Int4 template_length
Length of the discontiguous word template.
The basic lookup table structure for blastn searches.
Int4 scan_step
number of bases between successive words
BlastSeqLoc * masked_locations
masked locations, only non-NULL for soft-masking.
Int4 lut_word_length
Length in bases of a word indexed by the lookup table.
Int4 word_length
Length in bases of the full word match required to trigger extension.
void * extend_callback
function for extending hits
The query related information.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Int4 last_context
Index of the last element of the context array.
Uint4 max_length
Length of the longest among the concatenated queries.
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Used to hold a set of positions, mostly used for filtering.
Lookup table structure for blastn searches with small queries.
Int4 scan_step
number of bases between successive words
Int4 word_length
Length in bases of the full word match required to trigger extension.
void * extend_callback
function for extending hits
BlastSeqLoc * masked_locations
masked locations, only non-NULL for soft-masking.
Int4 lut_word_length
Length in bases of a word indexed by the lookup table.
All the ungapped cutoff values that can change from context to context.
Int4 reduced_nucl_cutoff_score
for blastn, a reduced cutoff score for use with approximate ungapped alignments
Int4 cutoff_score
Cutoff score for saving ungapped hits.
Int4 x_dropoff
Raw X-dropoff value used in the ungapped extension.
Structure to hold ungapped alignment information.
Int4 score
Score of the ungapped alignment.
Int4 length
Length of the ungapped alignment.
Int4 q_start
Start of the ungapped alignment in query.
Int4 s_start
Start of the ungapped alignment in subject.
Structure containing hit counts from the ungapped stage of a BLAST search.
Int4 good_init_extends
Number of successful initial extensions, i.e.
Structure for keeping initial word extension information.
BLAST_DiagHash * hash_table
Hash table and related parameters.
BLAST_DiagTable * diag_table
Diagonal array and related parameters.
Structure for keeping last hit information for a diagonal in a hash table, when eRight or eRightAndLe...
Int4 hit_len
The length of last hit.
signed int level
This hit's offset in the subject sequence.
unsigned int hit_saved
Whether or not this hit has been saved.
Uint4 next
Offset of next element in the chain.
Int4 diag
This hit's diagonal.
Structure for keeping last hit information for a diagonal.
signed int last_hit
Offset of the last hit.
unsigned int flag
Reset the next extension?
JumperPrelimEditBlock * left_prelim_block
JumperPrelimEditBlock * right_prelim_block
Wrapper structure for different types of BLAST lookup tables.
void * lookup_callback
function used to look up an index->q_off pair
void * lut
Pointer to the actual lookup table structure.
void * check_index_oid
function used to check if seeds for a given oid are present
ELookupTableType lut_type
What kind of a lookup table it is?
void * read_indexed_db
function used to retrieve hits from an indexed database
Int4 num_arrays
number of pair_arrays
BlastOffsetPair ** pair_arrays
lists of word hits
Int4 * last_diag
diagnal for the last word hit for each query context
Int4 array_size
size of each array
Int4 * last_pos
subject position for the last word hit for each query context
Int4 * num
number of hits in the list
Int4 divisor
divisor used to find pair_arrays index based on query offset
Index for a chunk of a subject sequence.
Uint4 qend
Right end (in the query) of the last seen seed on the diagonal.
Uint4 diag
Diagonal identifier.
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
Uint4 q_off
Query offset.
Uint4 s_off
Subject offset.
struct BlastOffsetPair::@6 qs_offsets
Query/subject offset pair.
static CS_CONTEXT * context
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4