prelim_hitlist_size = hitlist_size;
47 char* ADAPTIVE_CBS_ENV = getenv(
"ADAPTIVE_CBS");
48 if(compositionBasedStats) {
49 if(ADAPTIVE_CBS_ENV !=
NULL) {
50 if(hitlist_size < 1000) {
51prelim_hitlist_size =
MAX(prelim_hitlist_size + 1000, 1500);
54prelim_hitlist_size = prelim_hitlist_size*2 + 50;
58 if(hitlist_size <= 500) {
59prelim_hitlist_size = 1050;
62prelim_hitlist_size = prelim_hitlist_size*2 + 50;
67 else if(gapped_calculation) {
68prelim_hitlist_size =
MIN(
MAX(2 * prelim_hitlist_size, 10), prelim_hitlist_size + 50);
70 returnprelim_hitlist_size;
83 if(hit_options ==
NULL||
84ext_options ==
NULL||
85scoring_options ==
NULL)
152 Int4subject_end,
Int4query_gapped_start,
153 Int4subject_gapped_start,
Int4query_context,
166 if(new_hsp ==
NULL)
176new_hsp->
context= query_context;
179new_hsp->
score= score;
180 if(gap_edit && *gap_edit)
199 if(
info->subject_overhangs) {
247new_hsp->
num= hsp->
num;
281retval->
num= hsp->
num;
333 if(!
copy->right) {
363 Int4lastEffectiveOccurrence;
365 Int4min_pattern_length;
380 for(index = 1; index < pat_info->
num_patterns; ++index) {
382> min_pattern_length) {
406 ASSERT(query_info && hsp && sbp && pattern_blk);
415exp(-Lambda*hsp->
score);
442 Int4score,
const Uint1* query_start,
const Uint1* subject_start,
443 const Uint1* best_q_start,
const Uint1* best_q_end,
444 const Uint1* best_s_start,
const Uint1* best_s_end,
445 intbest_start_esp_index,
446 intbest_end_esp_index,
447 intbest_end_esp_num)
453 if(hsp->
score>= cutoff_score) {
462 if(best_end_esp_index != last_num|| best_start_esp_index > 0)
470hsp->
gap_info->
num[last_num] = best_end_esp_num;
471 ASSERT(best_end_esp_num >= 0);
486 Int4sum, score, gap_open, gap_extend;
490 intbest_start_esp_index = 0;
491 intbest_end_esp_index = 0;
492 intcurrent_start_esp_index = 0;
493 intbest_end_esp_num = 0;
496 const Uint1* best_q_start;
497 const Uint1* best_s_start;
498 const Uint1* best_q_end;
499 const Uint1* best_s_end;
502 const Uint1* current_q_start;
504 const Uint1* current_s_start;
510 const Uint1kResidueMask = 0x0f;
518 if(score_params->
reward% 2 == 1)
522(score_params->
reward- 2*score_params->
penalty) * factor / 2;
524gap_open = score_params->
gap_open;
534best_q_start = best_q_end = current_q_start =
query;
535best_s_start = best_s_end = current_s_start =
subject;
538best_end_esp_num = -1;
540 if(!esp)
return TRUE;
541 for(index=0; index<esp->
size; index++)
544 for(op_index=0; op_index<esp->
num[index]; )
548sum += factor*matrix[*
query& kResidueMask][*
subject];
553sum -= gap_open + gap_extend * esp->
num[index];
555op_index += esp->
num[index];
557sum -= gap_open + gap_extend * esp->
num[index];
559op_index += esp->
num[index];
567 if(op_index < esp->num[index]) {
568esp->
num[index] -= op_index;
569current_start_esp_index = index;
572current_start_esp_index = index + 1;
577current_q_start =
query;
584 if(score < cutoff_score) {
586best_q_start =
query;
591best_start_esp_index = current_start_esp_index;
592best_end_esp_index = current_start_esp_index;
595}
else if(sum > score) {
600best_q_start = current_q_start;
601best_s_start = current_s_start;
605best_start_esp_index = current_start_esp_index;
606best_end_esp_index = index;
607best_end_esp_num = op_index;
614 if(best_start_esp_index < esp->
size&& best_end_esp_index < esp->
size) {
619qp = (
Int4)(best_q_start - q);
620sp = (
Int4)(best_s_start - s);
622 while(qp > 0 && sp > 0 && (q[--qp] == s[--sp]) && q[qp]<4) ext++;
625esp->
num[best_start_esp_index] += ext;
626 if(best_end_esp_index == best_start_esp_index) best_end_esp_num += ext;
627score += ext * score_params->
reward;
629qp = (
Int4)(best_q_end - q);
630sp = (
Int4)(best_s_end - s);
632 while(qp < qlen && sp < slen && q[qp]<4 && (q[qp++] == s[sp++])) ext++;
635esp->
num[best_end_esp_index] += ext;
636best_end_esp_num += ext;
637score += ext * score_params->
reward;
643score, q, s, best_q_start,
644best_q_end, best_s_start, best_s_end,
645best_start_esp_index, best_end_esp_index,
665 const Uint1* query_start,
const Uint1* subject_start,
666 const Uint1* best_q_start,
const Uint1* best_q_end,
667 const Uint1* best_s_start,
const Uint1* best_s_end)
671subject_start, best_q_start, best_q_end,
672best_s_start, best_s_end, 0, 0, 0);
683 const Uint1* best_q_start,* best_s_start,* best_q_end,* best_s_end;
684 const Uint1* current_q_start, * current_s_start;
686 const Uint1kResidueMask = (translated ? 0xff : 0x0f);
696best_q_start = best_q_end = current_q_start =
query;
697best_s_start = best_s_end = current_s_start =
subject;
699 for(index = 0; index < hsp_length; ++index) {
706current_q_start =
query;
711 if(score < cutoff_score) {
712best_q_start = best_q_end =
query;
713best_s_start = best_s_end =
subject;
716}
else if(sum > score) {
723best_q_start = current_q_start;
724best_s_start = current_s_start;
731query_start, subject_start, best_q_start,
732best_q_end, best_s_start, best_s_end);
751 Int4 i, num_ident, align_length, q_off, s_off;
779 if(q_length != s_length)
781align_length = q_length;
782 for(
i=0;
i<align_length;
i++) {
785 else if(
NULL!= matrix) {
786 if(matrix[*q][*s] > 0)
796 for(index=0; index<esp->
size; index++)
798align_length += esp->
num[index];
799 switch(esp->
op_type[index]) {
801 for(
i=0;
i<esp->
num[index];
i++) {
805 else if(
NULL!= matrix) {
806 if(matrix[*q][*s] > 0)
814s += esp->
num[index];
817q += esp->
num[index];
820s += esp->
num[index];
821q += esp->
num[index];
827 if(align_length_ptr) {
828*align_length_ptr = align_length;
830*num_ident_ptr = num_ident;
833*num_pos_ptr = num_pos + num_ident;
853 Int4* num_ident_ptr,
Int4* align_length_ptr,
856 Int4num_ident, align_length;
884 for(index=0; index<esp->
size; index++)
887 switch(esp->
op_type[index]) {
889align_length += esp->
num[index];
890 for(
i=0;
i<esp->
num[index];
i++) {
893 else if(
NULL!= matrix) {
894 if(matrix[*q][*s] > 0)
902align_length += esp->
num[index];
906align_length += esp->
num[index];
907q += esp->
num[index];
923q += esp->
num[index];
928 if(align_length_ptr) {
929*align_length_ptr = align_length;
931*num_ident_ptr = num_ident;
934*num_pos_ptr = num_pos + num_ident;
944 Int4* align_length_ptr)
970 Int4* align_length_ptr,
999align_length < hit_options->min_hit_length) ;
1009 Int4align_length = 0;
1022delete_hsp =
s_HSPTest(hsp, hit_options, align_length);
1031 return s_HSPTest(hsp, hit_options, align_length);
1037 if(query_length > 0) {
1038pct = 100.0 * (double) (hsp->
query.
end- hsp->
query.
offset)/ (double) query_length;
1046 doublemin_query_coverage_pct,
1050 return(hsp_coverage < min_query_coverage_pct);
1056 Int4* gaps_out,
Int4* gap_opens_out)
1060 Int4gap_opens = 0, gaps = 0;
1065 for(index=0; index<esp->
size; index++) {
1067length += esp->
num[index];
1068gaps += esp->
num[index];
1072gaps += esp->
num[index];
1075}
else if(s_length > length) {
1079*length_out = length;
1080*gap_opens_out = gap_opens;
1096 if(segment->
frame< 0) {
1099}
else if(segment->
frame> 0) {
1103*start = segment->
offset+ 1;
1104*end = segment->
end;
1110 Int4query_length,
Int4subject_length,
1163 if(target_t->
partial&& (start ||
1164(stop < target_t->subject_blk->length /
CODON_LENGTH-3)))
1166 const intkMaxTranslation = 99;
1167 Int4nucl_length = 0;
1168 Int4translation_length = 0;
1169 Int4nucl_start = 0;
1171 Int4nucl_shift = 0;
1172 Int4start_shift = 0;
1187nucl_length = nucl_end - nucl_start;
1195nucl_shift = nucl_start;
1197 if(start_shift < start || start_shift+translation_length > stop) {
1205 if(translation_length > stop-start) {
1220target_t->
range[2*
context+1] = start_shift + length;
1222 sfree(nucl_seq_rev);
1231 if(translated_length)
1242 const Uint1* gen_code_string,
1243 Uint1** translation_buffer_ptr,
1244 Uint1** subject_ptr,
1245 Int4* subject_length_ptr,
1246 Int4* start_shift_ptr)
1248 Int4translation_length;
1249 Uint1* translation_buffer;
1255 ASSERT(subject_blk && hsp && gen_code_string && translation_buffer_ptr &&
1256subject_ptr && subject_length_ptr && start_shift_ptr);
1258translation_buffer = *translation_buffer_ptr;
1259 sfree(translation_buffer);
1264translation_length =
1266subject_blk->
length) - start_shift;
1268nucl_shift = start_shift;
1270nucl_shift = subject_blk->
length- start_shift - translation_length;
1275gen_code_string, &translation_buffer,
1276subject_length_ptr,
NULL);
1282oof_end = subject_blk->
length;
1286translation_length =
1289nucl_shift = start_shift;
1291nucl_shift = oof_end - start_shift - translation_length;
1296gen_code_string,
NULL,
1297subject_length_ptr, &translation_buffer);
1302*translation_buffer_ptr = translation_buffer;
1303*start_shift_ptr = start_shift;
1306 subject= translation_buffer + 1;
1320 if(start_shift > 0) {
1362 if(!hsp_list || hsp_list->
hspcnt<= 1)
1365 for(index = 0; index < hsp_list->
hspcnt- 1; ++index) {
1376 if(!hsp_list || hsp_list->
hspcnt<= 1)
1392 const double epsilon= 1.0e-180;
1397 if(evalue1 < evalue2) {
1399}
else if(evalue1 > evalue2) {
1442 if(hsp_list->
hspcnt> 1) {
1446 for(index = 0; index < hsp_list->
hspcnt- 1; ++index) {
1452 if(index < hsp_list->hspcnt - 1) {
1513 doublescore_density = (hsp1->
score+ hsp2->
score) *(1.0) /
1537 #define OVERLAP_DIAG_CLOSE 10 1549 for(index = 0; index < hsp_list->
hspcnt; ++index) {
1561 const Int4kDefaultAllocated=100;
1589 intnum = hsp_list->
hspcnt;
1597 for(index = 0; index < hsp_list->
hspcnt; ++index) {
1627 s_Heapify(
char* base0,
char* base,
char* lim,
char*
last,
size_twidth,
int(*compar )(
const void*,
const void* ))
1631 char* left_son,* large_son;
1633left_son = base0 + 2*(base-base0) + width;
1634 while(base <= lim) {
1635 if(left_son ==
last)
1636large_son = left_son;
1638large_son = (*compar)(left_son, left_son+width) >= 0 ?
1639left_son : left_son+width;
1640 if((*compar)(base, large_son) < 0) {
1641 for(
i=0;
i<width; ++
i) {
1643base[
i] = large_son[
i];
1644large_son[
i] = ch;
1647left_son = base0 + 2*(base-base0) + width;
1661 int(*compar )(
const void*,
const void* ))
1663 char* base = (
char*)
b;
1665 char* base0 = (
char*)base,* lim,* basef;
1670lim = &base[((nel-2)/2)*width];
1671basef = &base[(nel-1)*width];
1673 for(base = &base0[(
i- 1)*width];
i> 0; base = base - width) {
1674 s_Heapify(base0, base, lim, basef, width, compar);
1699hsp_array[0] = *hsp;
1700 if(hsp_list->
hspcnt>= 2) {
1701 s_Heapify((
char*)hsp_array, (
char*)hsp_array,
1702(
char*)&hsp_array[hsp_list->
hspcnt/2 - 1],
1703(
char*)&hsp_array[hsp_list->
hspcnt-1],
1717 doublebest_evalue = (double)
INT4_MAX;
1718 const doublekDelta = 1.0e-200;
1721 if(hsp_list->
hspcnt== 0)
1724 for(index=0; index<hsp_list->
hspcnt; index++)
1728 if(
ABS(best_evalue-hsp_list->
best_evalue)/(best_evalue+kDelta) > 0.01)
1743 doublebest_evalue = (double)
INT4_MAX;
1745 for(index=0; index<hsp_list->
hspcnt; index++)
1761hspcnt = hsp_list->
hspcnt;
1770 if(new_allocated > hsp_list->
allocated) {
1772realloc(hsp_array, new_allocated*
sizeof(
BlastHSP*));
1773 if(hsp_array ==
NULL)
1783hsp_allocated = new_allocated;
1798 if(hspcnt < hsp_allocated)
1800hsp_array[hsp_list->
hspcnt] = new_hsp;
1813 Int4subject_length,
1818 doublescaling_factor)
1827 doublegap_decay_divisor = 1.;
1830 if(hsp_list ==
NULL|| hsp_list->
hspcnt== 0)
1833kbp = (gapped_calculation ? sbp->
kbp_gap: sbp->
kbp);
1834hsp_cnt = hsp_list->
hspcnt;
1837 if(gap_decay_rate != 0.)
1840 for(index=0; index<hsp_cnt; index++) {
1841hsp = hsp_array[index];
1844 ASSERT(scaling_factor != 0.0);
1857 if(kbp[
i])
break;
1861 ASSERT(kbp[kbp_context]);
1862kbp[kbp_context]->
Lambda/= scaling_factor;
1866score = hsp->
score;
1867 if(hsp_list && hsp_list->
hspcnt!= 0
1868&& gapped_calculation && sbp->
round_down) {
1893hsp->
evalue/= gap_decay_divisor;
1895kbp[kbp_context]->
Lambda*= scaling_factor;
1915 if(hsp_list ==
NULL)
1918kbp = (gapped_calculation ? sbp->
kbp_gap: sbp->
kbp);
1920 for(index=0; index<hsp_list->
hspcnt; index++) {
1945 for(index=0; index<hsp_list->
hspcnt; index++) {
1962 if(!hsp_list || hsp_list->
hspcnt== 0)
1965 for(index = 0; index < hsp_list->
hspcnt; ++index) {
1985 if(hsp_list ==
NULL)
1991 for(index = 0; index < hsp_list->
hspcnt; index++) {
1992hsp = hsp_array[index];
1996 if(hsp->
evalue> cutoff) {
1999 if(index > hsp_cnt)
2000hsp_array[hsp_cnt] = hsp_array[index];
2005hsp_list->
hspcnt= hsp_cnt;
2026 for(index = 0; index < hsp_list->
hspcnt; index++) {
2027hsp = hsp_array[index];
2034 if(index > hsp_cnt)
2035hsp_array[hsp_cnt] = hsp_array[index];
2040hsp_list->
hspcnt= hsp_cnt;
2056 if((hsp_list ==
NULL) ||
2063 for(index = hsp_max; index < hsp_list->
hspcnt; index++) {
2067hsp_list->
hspcnt= hsp_max;
2084 if(hsp_list ==
NULL)
2088 for(index = 0; index < hsp_list->
hspcnt; index++) {
2089hsp = hsp_array[index];
2096 if(index > hsp_cnt)
2097hsp_array[hsp_cnt] = hsp_array[index];
2102hsp_list->
hspcnt= hsp_cnt;
2116 return(*xx)->
oid- (*yy)->oid;
2121 Int4contexts_per_query,
Int4*split_offsets,
2132 if(hitlist1 ==
NULL)
2134 if(hitlist2 ==
NULL) {
2135*combined_hit_list_ptr = hitlist1;
2136*old_hit_list_ptr =
NULL;
2145 if(num_hsplists1 > 1) {
2149 if(num_hsplists2 > 1) {
2157query_is_split =
FALSE;
2158 for(
i= 0;
i< contexts_per_query;
i++) {
2159 if(split_offsets[
i] > 0) {
2160query_is_split =
TRUE;
2164 ASSERT(chunk_overlap_size != 0);
2169 while(
i< num_hsplists1 && j < num_hsplists2) {
2173 if(hsplist1->
oid< hsplist2->
oid) {
2177 else if(hsplist1->
oid> hsplist2->
oid) {
2185 if(query_is_split) {
2188hsplist2->
hsp_max, split_offsets,
2203 for(;
i< num_hsplists1;
i++) {
2207 for(; j < num_hsplists2; j++) {
2216*old_hit_list_ptr =
NULL;
2217*combined_hit_list_ptr = new_hitlist;
2232 if(hsp_list ==
NULL|| hsp_list->
hspcnt== 0)
2236hspcnt = hsp_list->
hspcnt;
2239 for(index=0; index<hspcnt; index++)
2241 if(hsp_array[index] !=
NULL)
2243hsp_array[index1] = hsp_array[index];
2248 for(index=index1; index<hspcnt; index++)
2250hsp_array[index] =
NULL;
2253hsp_list->
hspcnt= index1;
2394 intindex, opid, qid, sid;
2402 for(index=0; index < esp->
size; index++) {
2403 for(opid=0; opid < esp->
num[index];){
2409sid+=esp->
num[index];
2410opid+=esp->
num[index];
2412qid+=esp->
num[index];
2413opid+=esp->
num[index];
2415 if(qid >= q_cut && sid >= s_cut) found =
TRUE;
2428 if(opid < esp->num[index]) {
2431esp->
num[0] = esp->
num[index] - opid;
2435 for(; index < esp->
size; index++, new_index++) {
2437esp->
num[new_index] = esp->
num[index];
2439esp->
size= new_index;
2443 if(opid < esp->num[index]) {
2445esp->
num[index] = opid;
2447esp->
size= index+1;
2467 if(hsp_list ==
NULL|| hsp_list->
hspcnt== 0)
2473 returnhsp_list->
hspcnt;
2476hsp_count = hsp_list->
hspcnt;
2480 while(
i< hsp_count) {
2482 while(
i+j < hsp_count &&
2483hsp_array[
i] && hsp_array[
i+j] &&
2489hsp = hsp_array[
i+j];
2496 for(k=
i+j; k<hsp_count; k++) {
2497hsp_array[k] = hsp_array[k+1];
2499hsp_array[hsp_count] = hsp;
2506 while(
i< hsp_count) {
2508 while(
i+j < hsp_count &&
2509hsp_array[
i] && hsp_array[
i+j] &&
2515hsp = hsp_array[
i+j];
2522 for(k=
i+j; k<hsp_count; k++) {
2523hsp_array[k] = hsp_array[k+1];
2525hsp_array[hsp_count] = hsp;
2548 intcurr_context, target_context;
2552 if(hsp_list ==
NULL|| hsp_list->
hspcnt== 0)
2556 returnhsp_list->
hspcnt;
2561 for(
i=0; i < hsp_list->hspcnt -1;
i++) {
2562 if(hsp_array[
i] ==
NULL){
2567e = hsp_array[
i]->
query.
end+ range_diff;
2569 if(e < 0) e = hsp_array[
i]->
query.
end;
2570 while(
i+j < hsp_list->hspcnt) {
2571 if(hsp_array[
i+j] && hsp_array[
i]->
context== hsp_array[
i+j]->
context&&
2572((hsp_array[
i+j]->
query.offset >= o) &&
2573(hsp_array[
i+j]->
query.
end<= e))){
2583 for(
i=0;
i< hsp_list->
hspcnt-1;
i++) {
2584 if(hsp_array[
i] ==
NULL){
2589curr_context = hsp_array[
i]->
context;
2591target_context = (hsp_array[
i]->
query.
frame> 0) ? curr_context +1 : curr_context -1;
2592e = qlen - (hsp_array[
i]->
query.
offset- range_diff);
2593o = qlen - (hsp_array[
i]->
query.
end+ range_diff);
2594 while(
i+j < hsp_list->hspcnt) {
2595 if(hsp_array[
i+j] && (hsp_array[
i+j]->
context== target_context) &&
2597(hsp_array[
i+j]->
query.end <= e))){
2605 returnhsp_list->
hspcnt;
2619 Uint1* query_start;
2632hspcnt = hsp_list->
hspcnt;
2635 if(hsp_list->
hspcnt== 0)
2647 if(kNucleotideSubject)
2650memset((
void*) &seq_arg, 0,
sizeof(seq_arg));
2651seq_arg.
oid= subject_blk->
oid;
2655seq_arg.
seq= subject_blk;
2660 for(index = 0; index < hspcnt; ++index) {
2666 else if(status < 0){
2672 if(kTranslateSubject) {
2673 if(!gen_code_string)
2683subject_start = subject_blk->
sequence;
2687 for(index = 0; index < hspcnt; ++index) {
2689 if(hsp_array[index] ==
NULL)
2692hsp = hsp_array[index];
2696query_start = query_blk->
sequence+
2699 if(kTranslateSubject)
2702 if(kNucleotideSubject) {
2705subject_start, word_params, sbp, kTranslateSubject);
2711 Int4align_length = 0;
2753 Int4index, index1, index2;
2756 ASSERT(new_hspcnt <= combined_hsp_list->allocated);
2758 if(new_hspcnt >= hsp_list->
hspcnt+ combined_hsp_list->
hspcnt) {
2760 for(index=combined_hsp_list->
hspcnt, index1=0;
2761index1<hsp_list->hspcnt; index1++) {
2765combined_hsp_list->
hspcnt= new_hspcnt;
2777index1 = index2 = 0;
2778 for(index = 0; index < new_hspcnt; ++index) {
2779 if(index1 < combined_hsp_list->hspcnt &&
2780(index2 >= hsp_list->
hspcnt||
2782&hsp_list->
hsp_array[index2]) <= 0)) {
2783new_hsp_array[index] = combined_hsp_list->
hsp_array[index1];
2786new_hsp_array[index] = hsp_list->
hsp_array[index2];
2791 for( ; index1 < combined_hsp_list->
hspcnt; ++index1) {
2792combined_hsp_list->
hsp_array[index1] =
2795 for( ; index2 < hsp_list->
hspcnt; ++index2) {
2801combined_hsp_list->
hsp_array= new_hsp_array;
2802combined_hsp_list->
hspcnt= new_hspcnt;
2812 BlastHSPList* combined_hsp_list = *combined_hsp_list_ptr;
2816 if(!hsp_list || hsp_list->
hspcnt== 0)
2820 if(!combined_hsp_list) {
2821*combined_hsp_list_ptr = hsp_list;
2822*old_hsp_list_ptr =
NULL;
2830 if(new_hspcnt > combined_hsp_list->
allocated&&
2832 Int4new_allocated =
MIN(2*new_hspcnt, hsp_num_max);
2836new_allocated*
sizeof(
BlastHSP*));
2838 if(new_hsp_array) {
2839combined_hsp_list->
allocated= new_allocated;
2840combined_hsp_list->
hsp_array= new_hsp_array;
2843new_hspcnt = combined_hsp_list->
allocated;
2846 if(combined_hsp_list->
allocated== hsp_num_max)
2852*old_hsp_list_ptr =
NULL;
2859 Int4hsp_num_max,
Int4*split_offsets,
2860 Int4contexts_per_query,
Int4chunk_overlap_size,
2863 BlastHSPList* combined_hsp_list = *combined_hsp_list_ptr;
2867 Int4index1, index2;
2868 Int4hspcnt1, hspcnt2, new_hspcnt = 0;
2869 Int4start_diag, end_diag;
2873 if(!hsp_list || hsp_list->
hspcnt== 0)
2877 if(!combined_hsp_list) {
2878*combined_hsp_list_ptr = hsp_list;
2879*hsp_list_ptr =
NULL;
2888hspcnt1 = hspcnt2 = 0;
2890 if(contexts_per_query < 0) {
2891 for(index1 = 0; index1 < combined_hsp_list->
hspcnt; index1++) {
2892hsp1 = combined_hsp_list->
hsp_array[index1];
2893 if(hsp1->
subject.
end> split_offsets[0]) {
2895hsp_var = combined_hsp_list->
hsp_array[hspcnt1];
2896combined_hsp_list->
hsp_array[hspcnt1] = hsp1;
2897combined_hsp_list->
hsp_array[index1] = hsp_var;
2901 for(index2 = 0; index2 < hsp_list->
hspcnt; index2++) {
2903 if(hsp2->
subject.
offset< split_offsets[0] + chunk_overlap_size) {
2905hsp_var = hsp_list->
hsp_array[hspcnt2];
2907hsp_list->
hsp_array[index2] = hsp_var;
2921 for(index1 = 0; index1 < combined_hsp_list->
hspcnt; index1++) {
2922hsp1 = combined_hsp_list->
hsp_array[index1];
2923offset_idx = hsp1->
context% contexts_per_query;
2924 if(split_offsets[offset_idx] < 0)
continue;
2926split_offsets[offset_idx]) ||
2928split_offsets[offset_idx] + chunk_overlap_size)) {
2930hsp_var = combined_hsp_list->
hsp_array[hspcnt1];
2931combined_hsp_list->
hsp_array[hspcnt1] = hsp1;
2932combined_hsp_list->
hsp_array[index1] = hsp_var;
2936 for(index2 = 0; index2 < hsp_list->
hspcnt; index2++) {
2938offset_idx = hsp2->
context% contexts_per_query;
2939 if(split_offsets[offset_idx] < 0)
continue;
2941split_offsets[offset_idx]) ||
2943split_offsets[offset_idx] + chunk_overlap_size)) {
2945hsp_var = hsp_list->
hsp_array[hspcnt2];
2947hsp_list->
hsp_array[index2] = hsp_var;
2956 if(hspcnt1 > 0 && hspcnt2 > 0) {
2960 for(index1 = 0; index1 < hspcnt1; index1++) {
2962hsp1 = hspp1[index1];
2964 for(index2 = 0; index2 < hspcnt2; index2++) {
2966hsp2 = hspp2[index2];
2982 if(contexts_per_query < 0 || hsp1->
query.frame >= 0) {
3010 if(new_hspcnt >= combined_hsp_list->
allocated-1 &&
3012 Int4new_allocated =
MIN(2*new_hspcnt, hsp_num_max);
3013 if(new_allocated > combined_hsp_list->
allocated) {
3016new_allocated*
sizeof(
BlastHSP*));
3017 if(new_hsp_array ==
NULL) {
3020combined_hsp_list->
hsp_array= new_hsp_array;
3021combined_hsp_list->
allocated= new_allocated;
3026new_hspcnt =
MIN(new_hspcnt, combined_hsp_list->
allocated);
3032*hsp_list_ptr =
NULL;
3045 for(index=0; index<hsp_list->
hspcnt; index++) {
3059 if(!hsp_list || hsp_list->
hspcnt== 0 ||
3060gapped_calculation ==
FALSE||
3064 for(index = 0; index < hsp_list->
hspcnt; ++index)
3090 else if(h1->
hspcnt== 0)
3092 else if(h2->
hspcnt== 0)
3174 Int4index, hsplist_count;
3180 for(index = 0; index < hsplist_count &&
3185 for( ; index < hsplist_count; ++index) {
3222 const intkStartValue = 100;
3268 intevalue_order = 0;
3272 for(index =0; index < hit_list->
hsplist_count; index++) {
3287 if(evalue_order < 0) {
3315 for(index=0; index<hsplist_count; index++) {
3316 if(hsplist_array[index]) {
3317hsplist_array[index1] = hsplist_array[index];
3322 for(index=index1; index<hsplist_count; index++) {
3323hsplist_array[index] =
NULL;
3375 for(index = 0; index <
results->num_queries; ++index)
3391 for(index = 0; index <
results->num_queries; ++index) {
3392hit_list =
results->hitlist_array[index];
3393 if(hit_list !=
NULL 3409 for(index = 0; index <
results->num_queries; ++index) {
3410hit_list =
results->hitlist_array[index];
3425 for(index = 0; index <
results->num_queries; ++index) {
3426hit_list =
results->hitlist_array[index];
3432 for(index1 = 0; index1 < hit_list->
hsplist_count/2; ++index1) {
3480 for(
i= 0;
i<
results->num_queries;
i++) {
3482 if(hitlist ==
NULL)
3485 for(j = hsp_count = 0; j < hitlist->
hsplist_count; j++) {
3487hsp_count += hsplist->
hspcnt;
3497 for(m = 0; m < hsplist->
hspcnt; k++, m++) {
3499hsp_array[k].
hsplist= hsplist;
3500hsp_array[k].
hsp= hsp;
3514 for(j = 0; j < hsp_count; j++) {
3519hsp, query_info, 0, masklevel)) {
3529 if(hsplist->
hspcnt== 1)
3539 if(hsplist->
hspcnt== 0) {
3557 if(!hsp_list || hsp_list->
hspcnt== 0)
3576 intpattern_index, hit_index;
3593hit_list =
results->hitlist_array[0];
3595 for(hit_index = 0; hit_index < hit_list->
hsplist_count; ++hit_index) {
3600 for(hsp_index = 0; hsp_index < hsp_list->
hspcnt; ++hsp_index) {
3603 if(!hsplist_array[pattern_index])
3605hsplist_array[pattern_index]->
oid= hsp_list->
oid;
3611 for(pattern_index = 0; pattern_index < num_patterns;
3613 if(hsplist_array[pattern_index]) {
3614 if(!phi_results[pattern_index])
3617hsplist_array[pattern_index],
3619hsplist_array[pattern_index] =
NULL;
3624 sfree(hsplist_array);
3627 for(pattern_index = 0; pattern_index < num_patterns; ++pattern_index) {
3663 if(
r1->hspcnt <
r2->hspcnt)
3665 else if(
r1->hspcnt >
r2->hspcnt)
3690 if(total_hsp_limit == 0) {
3691 returnhsp_limit_exceeded;
3694 for(query_index = 0; query_index <
results->num_queries; ++query_index) {
3697 Int4hsplist_count = 0;
3700 if( !(hit_list =
results->hitlist_array[query_index]) )
3708 for(subj_index = 0; subj_index < hsplist_count; ++subj_index) {
3709hsplist_array[subj_index] = hit_list->
hsplist_array[subj_index];
3712qsort((
void*)hsplist_array, hsplist_count,
3717 Uint4hsp_per_seq =
MAX(1, total_hsp_limit/hsplist_count);
3718 for(subj_index = 0; subj_index < hsplist_count; ++subj_index) {
3719 Int4allowed_hsp_num = ((subj_index+1)*hsp_per_seq) - tot_hsps;
3721 if(hsp_list->
hspcnt> allowed_hsp_num) {
3724 for(hsp_index = allowed_hsp_num;
3725hsp_index < hsp_list->
hspcnt; ++hsp_index) {
3728hsp_list->
hspcnt= allowed_hsp_num;
3729hsp_limit_exceeded =
TRUE;
3731tot_hsps += hsp_list->
hspcnt;
3734 sfree(hsplist_array);
3737 returnhsp_limit_exceeded;
3758 return(
r1->oid >
r2->oid);
3768 Uint4total_hsp_limit,
3774 if(total_hsp_limit == 0) {
3775 returnany_hsp_limit_exceeded;
3778 for(query_index = 0; query_index <
results->num_queries; ++query_index) {
3780 Int4hsplist_count = 0;
3782 Int4total_hsps = 0;
3784 if( hsp_limit_exceeded) hsp_limit_exceeded[query_index] =
FALSE;
3786 if( !(hit_list =
results->hitlist_array[query_index]) )
3791 for(subj_index = 0; subj_index < hsplist_count; ++subj_index) {
3795 if(total_hsps > total_hsp_limit)
3799 inthsp_counter = 0;
3801 if( hsp_limit_exceeded) {
3802hsp_limit_exceeded[query_index] =
TRUE;
3803any_hsp_limit_exceeded =
TRUE;
3805 for(subj_index = 0; subj_index < hsplist_count; ++subj_index) {
3810 for(subj_hsp=0; subj_hsp < subj_list->
hspcnt; ++subj_hsp) {
3811everything_list[hsp_counter].
hsp= hsps_per_subj[subj_hsp];
3812everything_list[hsp_counter].
oid= subj_list->
oid;
3813hsps_per_subj[subj_hsp] =
NULL;
3821 for(hsp_counter = total_hsp_limit; hsp_counter < total_hsps ; ++hsp_counter) {
3822everything_list[hsp_counter].
hsp=
Blast_HSPFree(everything_list[hsp_counter].hsp);
3823everything_list[hsp_counter].
oid= 0x7fffff;
3828 for(hsp_counter = 0; hsp_counter < total_hsp_limit; ++ hsp_counter)
3830 inthsp_counter_start = hsp_counter;
3834 while((everything_list[hsp_counter].oid == everything_list[hsp_counter+1].oid) &&
3835(hsp_counter + 1 < total_hsp_limit)) {
3838num_hsp = hsp_counter -hsp_counter_start + 1;
3840subj_list->
oid= everything_list[hsp_counter].
oid;
3843 for(hspcnt = 0; hspcnt < num_hsp; ++hspcnt) {
3849 free(everything_list);
3853 returnany_hsp_limit_exceeded;
3870*removed_hsps = rm_hsps;
3888*removed_hsps = rm_hsps;
Definitions used throughout BLAST.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define CODON_LENGTH
Codons are always of length 3.
#define BLAST_CMP(a, b)
A macro expression that returns 1, 0, -1 if a is greater than, equal to or less than b,...
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
BlastHSPResults * Blast_HSPResultsFromHSPStreamWithLimit(BlastHSPStream *hsp_stream, Uint4 num_queries, SBlastHitsParameters *hit_param, Uint4 max_num_hsps, Boolean *removed_hsps)
As Blast_HSPResultsFromHSPStream, except the total number of HSPs kept for each query does not exceed...
Int2 Blast_HSPListReapByRawScore(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Same as Blast_HSPListReapByEvalue() except that it uses the raw score of the hit and the HitSavingOpt...
Int2 Blast_HSPResultsReverseSort(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value, in reverse order.
static void s_BlastHitListInsertHSPListInHeap(BlastHitList *hit_list, BlastHSPList *hsp_list)
Given a BlastHitList* with a heapified HSP list array, remove the worst scoring HSP list and insert t...
Int2 Blast_HSPListAppend(BlastHSPList **old_hsp_list_ptr, BlastHSPList **combined_hsp_list_ptr, Int4 hsp_num_max)
Append one HSP list to the other.
static int s_CompareHsplistHspcnt(const void *v1, const void *v2)
Comparison function for sorting HSP lists in increasing order of the number of HSPs in a hit.
void Blast_HSPListSortByEvalue(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties.
static void s_BlastHitListPurge(BlastHitList *hit_list)
Purge a BlastHitList of empty HSP lists.
Int2 Blast_HitListHSPListsFree(BlastHitList *hitlist)
Deallocate memory for every HSP list on BlastHitList, as well as all their components.
static int s_CompareOidHSPwOid(const void *v1, const void *v2)
BlastHSP * Blast_HSPNew(void)
Allocate and zeros out memory for an HSP structure.
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
static Int2 s_Blast_HSPGetOOFNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, const BlastHSP *hsp, EBlastProgramType program, Int4 *num_ident_ptr, Int4 *align_length_ptr, const BlastScoreBlk *sbp, Int4 *num_pos_ptr)
Calculate number of identities in an HSP for an out-of-frame alignment.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
void Blast_HSPListPHIGetEvalues(BlastHSPList *hsp_list, BlastScoreBlk *sbp, const BlastQueryInfo *query_info, const SPHIPatternSearchBlk *pattern_blk)
Calculate e-values for a PHI BLAST HSP list.
Int2 Blast_HSPGetNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr, const BlastScoreBlk *sbp)
Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::...
static void s_CreateHeap(void *b, size_t nel, size_t width, int(*compar)(const void *, const void *))
Creates a heap of elements based on a comparison function.
static Boolean s_TrimResultsByTotalHSPLimitEx(BlastHSPResults *results, Uint4 total_hsp_limit, Boolean *hsp_limit_exceeded)
BlastHitList * Blast_HitListFree(BlastHitList *hitlist)
Deallocate memory for the hit list.
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
Int2 Blast_HitListMerge(BlastHitList **old_hit_list_ptr, BlastHitList **combined_hit_list_ptr, Int4 contexts_per_query, Int4 *split_offsets, Int4 chunk_overlap_size, Boolean allow_gap)
Combine two hitlists; both HitLists must contain HSPs that represent alignments to the same query seq...
static int s_EvalueCompareHSPs(const void *v1, const void *v2)
Comparison callback function for sorting HSPs by e-value and score, before saving BlastHSPList in a B...
Int2 Blast_HSPResultsSortByEvalue(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value.
static Boolean s_UpdateReevaluatedHSP(BlastHSP *hsp, Boolean gapped, Int4 cutoff_score, Int4 score, const Uint1 *query_start, const Uint1 *subject_start, const Uint1 *best_q_start, const Uint1 *best_q_end, const Uint1 *best_s_start, const Uint1 *best_s_end, int best_start_esp_index, int best_end_esp_index, int best_end_esp_num)
Update HSP data after reevaluation with ambiguities.
Boolean Blast_HSPTestIdentityAndLength(EBlastProgramType program_number, BlastHSP *hsp, const Uint1 *query, const Uint1 *subject, const BlastScoringOptions *score_options, const BlastHitSavingOptions *hit_options)
Calculates number of identities and alignment lengths of an HSP via Blast_HSPGetNumIdentities and det...
Int4 Blast_HSPListSubjectBestHit(EBlastProgramType program, const BlastHSPSubjectBestHitOptions *subject_besthit_opts, const BlastQueryInfo *query_info, BlastHSPList *hsp_list)
static int s_SortHSPListByOid(const void *x, const void *y)
callback used to sort HSP lists in order of increasing OID
Int2 Blast_HSPResultsReverseOrder(BlastHSPResults *results)
Reverse order of HSP lists in each hit list in the BLAST results.
BlastHSPResults * Blast_HSPResultsFromHSPStream(BlastHSPStream *hsp_stream, size_t num_queries, SBlastHitsParameters *bhp)
Move all of the hits within an HSPStream into a BlastHSPResults structure.
static Int4 s_HSPEndDiag(const BlastHSP *hsp)
Retrieve the ending diagonal of an HSP.
Boolean Blast_HSPList_IsEmpty(const BlastHSPList *hsp_list)
Returns true if the BlastHSPList contains no HSPs.
BlastHitList * Blast_HitListNew(Int4 hitlist_size)
Allocate memory for a hit list of a given size.
Int2 Blast_HSPGetPartialSubjectTranslation(BLAST_SequenceBlk *subject_blk, BlastHSP *hsp, Boolean is_ooframe, const Uint1 *gen_code_string, Uint1 **translation_buffer_ptr, Uint1 **subject_ptr, Int4 *subject_length_ptr, Int4 *start_shift_ptr)
Performs the translation and coordinates adjustment, if only part of the subject sequence is translat...
Int2 Blast_HitListSortByEvalue(BlastHitList *hit_list)
Sort BlastHitLIst bon evalue.
Int4 Blast_HSPListPurgeHSPsWithCommonEndpoints(EBlastProgramType program, BlastHSPList *hsp_list, Boolean purge)
Check for an overlap of two different alignments and remove redundant HSPs.
Int2 Blast_HSPResultsInsertHSPList(BlastHSPResults *results, BlastHSPList *hsp_list, Int4 hitlist_size)
Blast_HSPResultsInsertHSPList Insert an HSP list to the appropriate place in the results structure.
void Blast_HSPGetAdjustedOffsets(EBlastProgramType program, BlastHSP *hsp, Int4 query_length, Int4 subject_length, Int4 *q_start, Int4 *q_end, Int4 *s_start, Int4 *s_end)
Adjust HSP endpoint offsets according to strand/frame; return values in 1-offset coordinates instead ...
void Blast_HSPCalcLengthAndGaps(const BlastHSP *hsp, Int4 *length_out, Int4 *gaps_out, Int4 *gap_opens_out)
Calculate length of an HSP as length in query plus length of gaps in query.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
static int s_EvalueComp(double evalue1, double evalue2)
Compares 2 evalues, consider them equal if both are close enough to zero.
static void s_BlastHSPListInsertHSPInHeap(BlastHSPList *hsp_list, BlastHSP **hsp)
Given a BlastHSPList* with a heapified HSP array, check whether the new HSP is better than the worst ...
static void s_CutOffGapEditScript(BlastHSP *hsp, Int4 q_cut, Int4 s_cut, Boolean cut_begin)
Int2 Blast_HSPGetNumIdentities(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr)
Calculate number of identities in an HSP and set the BlastHSP::num_ident field (unconditionally)
Int2 Blast_HSPListPurgeNullHSPs(BlastHSPList *hsp_list)
Cleans out the NULLed out HSP's from the HSP array that is part of the BlastHSPList.
BlastHSPMappingInfo * BlastHSPMappingInfoNew(void)
Allocate memory for an HSP's additional data structure.
BlastHSPList * BlastHSPListDup(const BlastHSPList *hsp_list)
Returns a duplicate (deep copy) of the given hsp list.
Boolean Blast_HSPTest(BlastHSP *hsp, const BlastHitSavingOptions *hit_options, Int4 align_length)
Determines whether this HSP should be kept or deleted.
struct SHspWrap SHspWrap
Auxiliary structure for sorting HSPs.
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
BlastHSPMappingInfo * BlastHSPMappingInfoFree(BlastHSPMappingInfo *info)
Deallocate memory for an HSP's additional data structure.
Boolean Blast_HSPReevaluateWithAmbiguitiesGapped(BlastHSP *hsp, const Uint1 *q, const Int4 qlen, const Uint1 *s, const Int4 slen, const BlastHitSavingParameters *hit_params, const BlastScoringParameters *score_params, const BlastScoreBlk *sbp)
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information.
BlastHSP * Blast_HSPClone(const BlastHSP *hsp)
Make a deep copy of an HSP.
Int2 Blast_HitListPurgeNullHSPLists(BlastHitList *hit_list)
Purges a BlastHitList of NULL HSP lists.
Int2 Blast_HSPListGetEvalues(EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor)
Calculate the expected values for all HSPs in a hit list, without using the sum statistics.
static Boolean s_HSPTest(const BlastHSP *hsp, const BlastHitSavingOptions *hit_options, Int4 align_length)
Int2 Blast_HSPListReevaluateUngapped(EBlastProgramType program, BlastHSPList *hsp_list, BLAST_SequenceBlk *query_blk, BLAST_SequenceBlk *subject_blk, const BlastInitialWordParameters *word_params, const BlastHitSavingParameters *hit_params, const BlastQueryInfo *query_info, BlastScoreBlk *sbp, const BlastScoringParameters *score_params, const BlastSeqSrc *seq_src, const Uint1 *gen_code_string)
Reevaluate all ungapped HSPs in an HSP list.
Boolean Blast_HSPQueryCoverageTest(BlastHSP *hsp, double min_query_coverage_pct, Int4 query_length)
Calculate query coverage percentage of an hsp.
int ScoreCompareHSPs(const void *h1, const void *h2)
Comparison callback function for sorting HSPs, first by score in descending order,...
Int2 Blast_TrimHSPListByMaxHsps(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *new_hsp)
Saves HSP information into a BlastHSPList structure.
SBlastHitsParameters * SBlastHitsParametersFree(SBlastHitsParameters *param)
Deallocated SBlastHitsParameters.
static Boolean s_BlastMergeTwoHSPs(BlastHSP *hsp1, BlastHSP *hsp2, Boolean allow_gap)
Given two hits, check if the hits can be merged and do the merge if so.
static int s_SortHspWrapRawScore(const void *x, const void *y)
callback used to sort a list of encapsulated HSP structures in order of decreasing raw score -RMH-
void Blast_HSPListAdjustOddBlastnScores(BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp)
For nucleotide BLAST, if the match reward score is equal to 2, random alignments are dominated by run...
static void s_Heapify(char *base0, char *base, char *lim, char *last, size_t width, int(*compar)(const void *, const void *))
This is a copy of a static function from ncbimisc.c.
static int s_QueryEndCompareHSPs(const void *v1, const void *v2)
Callback for sorting HSPs by ending offset in query.
Int2 Blast_HSPListReapByQueryCoverage(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options, const BlastQueryInfo *query_info, EBlastProgramType program_number)
Discard the HSPs below the min query coverage pct from the HSP list.
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Int2 Blast_HSPResultsApplyMasklevel(BlastHSPResults *results, const BlastQueryInfo *query_info, Int4 masklevel, Int4 query_length)
Apply Cross_match like masklevel to HSP list.
Boolean Blast_HSPListIsSortedByScore(const BlastHSPList *hsp_list)
Check if HSP list is sorted by score.
static void s_BlastHSPListsCombineByScore(BlastHSPList *hsp_list, BlastHSPList *combined_hsp_list, Int4 new_hspcnt)
Combine two HSP lists, without altering the individual HSPs, and without reallocating the HSP array.
BlastHSPResults * Blast_HSPResultsFromHSPStreamWithLimitEx(BlastHSPStream *hsp_stream, Uint4 num_queries, SBlastHitsParameters *hit_param, Uint4 max_num_hsps, Boolean *removed_hsps)
As Blast_HSPResultsFromHSPStreamWithLimit, except accept and return array of Boolen flags specifying ...
const Uint1 * Blast_HSPGetTargetTranslation(SBlastTargetTranslation *target_t, const BlastHSP *hsp, Int4 *translated_length)
Returns a buffer with a protein translated from nucleotide.
static int s_QueryOffsetCompareHSPs(const void *v1, const void *v2)
Callback for sorting HSPs by starting offset in query.
static Boolean s_UpdateReevaluatedHSPUngapped(BlastHSP *hsp, Int4 cutoff_score, Int4 score, const Uint1 *query_start, const Uint1 *subject_start, const Uint1 *best_q_start, const Uint1 *best_q_end, const Uint1 *best_s_start, const Uint1 *best_s_end)
Update HSP data after reevaluation with ambiguities for an ungapped search.
static double s_BlastGetBestEvalue(const BlastHSPList *hsp_list)
Gets the best (lowest) evalue from the BlastHSPList.
struct BlastHSPwOid BlastHSPwOid
Int4 GetPrelimHitlistSize(Int4 hitlist_size, Int4 compositionBasedStats, Boolean gapped_calculation)
static int s_EvalueCompareHSPLists(const void *v1, const void *v2)
Callback for sorting hsp lists by their best evalue/score; Evalues are compared with the condition th...
#define OVERLAP_DIAG_CLOSE
Maximal diagonal distance between HSP starting offsets, within which HSPs from search of different ch...
static Int2 s_Blast_HitListGrowHSPListArray(BlastHitList *hit_list)
Given a BlastHitList pointer this function makes the hsplist_array larger, up to a maximum size.
BlastHSPResults ** PHIBlast_HSPResultsSplit(const BlastHSPResults *results, const SPHIQueryInfo *pattern_info)
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structur...
void Blast_HSPAdjustSubjectOffset(BlastHSP *hsp, Int4 start_shift)
Adjusts offsets if partial sequence was used for extension.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
double Blast_HSPGetQueryCoverage(const BlastHSP *hsp, Int4 query_length)
Calculate query coverage percentage of an hsp.
Boolean Blast_HSPReevaluateWithAmbiguitiesUngapped(BlastHSP *hsp, const Uint1 *query_start, const Uint1 *subject_start, const BlastInitialWordParameters *word_params, BlastScoreBlk *sbp, Boolean translated)
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information.
static int s_EvalueCompareHSPListsRev(const void *v1, const void *v2)
Callback for sorting hsp lists by their best e-value/score, in reverse order - from higher e-value to...
static void s_HSPPHIGetEvalue(BlastHSP *hsp, BlastScoreBlk *sbp, const BlastQueryInfo *query_info, const SPHIPatternSearchBlk *pattern_blk)
Calculate e-value for an HSP found by PHI BLAST.
static BlastHSP * s_BlastHSPCopy(const BlastHSP *hsp)
Copies all contents of a BlastHSP structure.
Int2 SBlastHitsParametersNew(const BlastHitSavingOptions *hit_options, const BlastExtensionOptions *ext_options, const BlastScoringOptions *scoring_options, SBlastHitsParameters **retval)
Sets up small structures used by blast_hit.c for saving HSPs.
static Int4 s_HSPStartDiag(const BlastHSP *hsp)
Retrieve the starting diagonal of an HSP.
Int4 PhiBlastGetEffectiveNumberOfPatterns(const BlastQueryInfo *query_info)
Count the number of occurrences of pattern in sequence, which do not overlap by more than half the pa...
static Boolean s_BlastCheckBestEvalue(const BlastHSPList *hsp_list)
Verifies that the best_evalue field on the BlastHSPList is correct.
static Boolean s_TrimResultsByTotalHSPLimit(BlastHSPResults *results, Uint4 total_hsp_limit)
Removes extra results if a limit is imposed on the total number of HSPs returned.
static void s_BlastSegGetTranslatedOffsets(const BlastSeg *segment, Int4 seq_length, Int4 *start, Int4 *end)
Adjust start and end of an HSP in a translated sequence segment.
void Blast_HSPListPHIGetBitScores(BlastHSPList *hsp_list, BlastScoreBlk *sbp)
Calculate bit scores from raw scores in an HSP list for a PHI BLAST search.
void Blast_HSPListSwap(BlastHSPList *list1, BlastHSPList *list2)
Swaps the two HSP lists via structure assignment.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Int2 Blast_HSPListReapByEvalue(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the e-value threshold from the HSP list.
Int2 Blast_HSPListsMerge(BlastHSPList **hsp_list_ptr, BlastHSPList **combined_hsp_list_ptr, Int4 hsp_num_max, Int4 *split_offsets, Int4 contexts_per_query, Int4 chunk_overlap_size, Boolean allow_gap, Boolean short_reads)
Merge an HSP list from a chunk of the subject sequence into a previously computed HSP list.
SBlastHitsParameters * SBlastHitsParametersDup(const SBlastHitsParameters *hit_params)
Make a deep copy of the SBlastHitsParameters structure passed in.
Int2 Blast_HSPListGetBitScores(BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp)
Calculate bit scores from raw scores in an HSP list.
static int s_CompareScoreHSPwOid(const void *v1, const void *v2)
Int2 Blast_HitListUpdate(BlastHitList *hit_list, BlastHSPList *hsp_list)
Insert a new HSP list into the hit list.
static Int2 s_Blast_HSPGetNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, const BlastHSP *hsp, Int4 *num_ident_ptr, Int4 *align_length_ptr, const BlastScoreBlk *sbp, Int4 *num_pos_ptr)
Calculate number of identities in a regular HSP.
void Blast_HSPListAdjustOffsets(BlastHSPList *hsp_list, Int4 offset)
Adjust subject offsets in an HSP list if only part of the subject sequence was searched.
Structures and API used for saving BLAST hits.
Utilities for dealing with BLAST HSPs in the core of BLAST.
#define CONTAINED_IN_HSP(a, b, c, d, e, f)
TRUE if c is between a and b; f between d and e.
Declaration of ADT to save and retrieve lists of HSPs in the BLAST engine.
const int kBlastHSPStream_Eof
Return value when the end of the stream is reached (applicable to read method only)
int BlastHSPStreamRead(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Invokes the user-specified read function for this BlastHSPStream implementation.
Int4 BlastIntervalTreeMasksHSP(const BlastIntervalTree *tree, const BlastHSP *hsp, const BlastQueryInfo *query_info, Int4 subtree_index, Int4 masklevel)
void Blast_IntervalTreeReset(BlastIntervalTree *tree)
Empty an interval tree structure but do not free it.
BlastIntervalTree * Blast_IntervalTreeInit(Int4 q_start, Int4 q_end, Int4 s_start, Int4 s_end)
Initialize an interval tree structure.
Int2 BlastIntervalTreeAddHSP(BlastHSP *hsp, BlastIntervalTree *tree, const BlastQueryInfo *query_info, EITreeIndexMethod index_method)
Add an HSP to an existing interval tree.
BlastIntervalTree * Blast_IntervalTreeFree(BlastIntervalTree *tree)
Deallocate an interval tree structure.
Interface for an interval tree, used for fast HSP containment tests.
@ eQueryOnlyStrandIndifferent
Index by query offset only.
#define BLASTERR_MEMORY
System error: out of memory condition.
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
void BlastSeqSrcReleaseSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Deallocate individual sequence.
#define BLAST_SEQSRC_EXCLUDED
Sequence excluded due to filtering.
Int2 BlastSeqSrcGetSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Retrieve an individual sequence.
double BLAST_GapDecayDivisor(double decayrate, unsigned nsegs)
Compute a divisor used to weight the evalue of a collection of "nsegs" distinct alignments.
double BLAST_SpougeStoE(Int4 S, Blast_KarlinBlk *kbp, Blast_GumbelBlk *gbp, Int4 qlen, Int4 slen)
Calculates the Expect value based upon the Spouge's FSC method.
double BLAST_KarlinStoE_simple(Int4 S, Blast_KarlinBlk *kbp, Int8 searchsp)
Calculates the Expect value based upon the search space and some Karlin-Altschul parameters.
Various auxiliary BLAST utility functions.
Int4 BLAST_FrameToContext(Int2 frame, EBlastProgramType program)
Convert translation frame or strand into a context number suitable for indexing into the BlastQueryIn...
#define FENCE_SENTRY
This sentry value is used as a 'fence' around the valid portions of partially decoded sequences.
int Blast_GetPartialTranslation(const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr)
Get one frame translation - needed when only parts of subject sequences are translated.
Int2 BlastTargetTranslationNew(BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target)
Sets up structure for target translation.
SBlastTargetTranslation * BlastTargetTranslationFree(SBlastTargetTranslation *target_t)
Free SBlastTargetTranslation.
Int4 BLAST_GetTranslation(const Uint1 *query_seq, const Uint1 *query_seq_rev, Int4 nt_length, Int2 frame, Uint1 *buffer, const Uint1 *genetic_code)
GetTranslation to get the translation of the nucl.
#define MAX_FULL_TRANSLATION
Maximal unpacked subject sequence length for which full translation is performed up front.
Int2 GetReverseNuclSequence(const Uint1 *sequence, Int4 length, Uint1 **rev_sequence_ptr)
Reverse a nucleotide sequence in the blastna encoding, adding sentinel bytes on both ends.
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
GapEditScript * GapEditScriptDup(const GapEditScript *old)
Duplicates the edit script structure.
@ eGapAlignDel2
Frame shift deletion of two nucleotides.
@ eGapAlignIns2
Frame shift insertion of two nucleotides.
@ eGapAlignIns1
Frame shift insertion of one nucleotide.
@ eGapAlignIns
Insertion: a gap in subject.
@ eGapAlignDel1
Frame shift deletion of one nucleotide.
@ eGapAlignSub
Substitution.
@ eGapAlignDel
Deletion: a gap in query.
GapEditScript * GapEditScriptNew(Int4 size)
Initialize the edit script structure.
Int2 GapEditScriptPartialCopy(GapEditScript *new_esp, int offset, const GapEditScript *old_esp, int start, int stop)
Copies the portion of the GapEditScript specified by start and stop to a new one the new one should a...
GapEditScript * GapEditScriptDelete(GapEditScript *esp)
Free edit script structure.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
JumperEditsBlock * JumperEditsBlockFree(JumperEditsBlock *block)
SequenceOverhangs * SequenceOverhangsFree(SequenceOverhangs *overhangs)
JumperEditsBlock * JumperEditsBlockDup(const JumperEditsBlock *block)
const struct ncbi::grid::netcache::search::fields::SIZE size
Prototypes for portable math library (ported from C Toolkit)
#define NCBIMATH_LN2
Natural log(2)
#define MIN(a, b)
returns smaller of a and b.
#define INT4_MAX
largest nubmer represented by signed int
void * BlastMemDup(const void *orig, size_t size)
Copies memory using memcpy and malloc.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ABS(a)
returns absolute value of a (|a|)
#define ASSERT
macro for assert.
#define MAX(a, b)
returns larger of a and b.
double lambda(size_t dimMatrix_, const Int4 *const *scoreMatrix_, const double *q_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
static int pattern_info(int what, void *where, BOOL unsetok)
static const sljit_gpr r1
static const sljit_gpr r2
Structure to hold a sequence.
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Int4 oid
The ordinal id of the current sequence.
Int4 length
Length of sequence.
Uint1 * sequence_nomask
Start of query sequence without masking.
Uint1 * sequence
Sequence used for search (could be translation).
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int4 length_adjustment
Length adjustment for boundary conditions.
Int8 eff_searchsp
Effective search space for this context.
Options used for gapped extension These include: a.
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Int4 cutoff_score
Raw cutoff score corresponding to the e-value provided by the user if no sum stats,...
The structure to hold all HSPs for a given sequence after the gapped alignment.
Boolean do_not_reallocate
Is reallocation of the hsp_array allowed?
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Int4 hsp_max
The maximal number of HSPs allowed to be saved.
double best_evalue
Smallest e-value for HSPs in this list.
Int4 allocated
The allocated size of the hsp_array.
Int4 query_index
Index of the query which this HSPList corresponds to.
Mapping information for an HSP.
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
SequenceOverhangs * subject_overhangs
Unaligned subject subsequence.
The structure to contain all BLAST results, for multiple queries.
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Int4 num_queries
Number of query sequences.
Default implementation of BlastHSPStream.
unsigned int max_range_diff
Structure holding all information about an HSP.
SPHIHspInfo * pat_info
In PHI BLAST, information about this pattern match.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
Int4 context
Context number of query.
double bit_score
Bit score, calculated from score.
Int4 num
How many HSP's are linked together for sum statistics evaluation? If unset (0), this HSP is not part ...
BlastSeg subject
Subject sequence info.
GapEditScript * gap_info
ALL gapped alignment is here.
Int2 comp_adjustment_method
which mode of composition adjustment was used; relevant only for blastp and tblastn
Int4 score
This HSP's raw score.
BlastHSPMappingInfo * map_info
The structure to contain all BLAST results for one query sequence.
double worst_evalue
Highest of the best e-values among the HSP lists.
Int4 hsplist_max
Maximal allowed size of the HSP lists array.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Int4 hsplist_count
Filled size of the HSP lists array.
Int4 low_score
The lowest of the best scores among the HSP lists.
Int4 hsplist_current
Number of allocated HSP list arrays.
Boolean heapified
Is this hit list already heapified?
Options used when evaluating and saving hits These include: a.
Int4 max_hsps_per_subject
Queries are paired reads, for mapping.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 cutoff_score
The (raw) score cut-off threshold.
Int4 hsp_num_max
Maximal number of HSPs to save for one database sequence.
Int4 hitlist_size
Maximal number of database sequences to return results for.
double query_cov_hsp_perc
Min query coverage hsp percentage.
double percent_identity
The percent identity cut-off threshold.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastGappedCutoffs * cutoffs
per-context gapped cutoff information
BlastHitSavingOptions * options
The original (unparsed) options.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
BlastUngappedCutoffs * cutoffs
cutoff values (one per context)
Main structure describing an interval tree.
The query related information.
BlastContextInfo * contexts
Information per context.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
Structure used for scoring calculations.
Boolean protein_alphabet
TRUE if alphabet_code is for a protein alphabet (e.g., ncbistdaa etc.), FALSE for nt.
Blast_KarlinBlk ** kbp
Karlin-Altschul parameters.
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
Boolean round_down
Score must be rounded down to nearest even score if odd.
Int4 number_of_contexts
Used by sfp and kbp, how large are these.
SBlastScoreMatrix * matrix
scoring matrix data
Blast_GumbelBlk * gbp
Gumbel parameters for FSC.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
EBlastProgramType program_number
indicates blastn, blastp, etc.
Boolean gapped_calculation
gap-free search if FALSE
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Int4 gap_extend
Penalty for each gap residue (scaled version)
Int2 penalty
Penalty for a mismatch.
Int4 gap_open
Extra penalty for starting a gap (scaled version)
BlastScoringOptions * options
User-provided values for these params.
Int2 reward
Reward for a match.
One sequence segment within an HSP.
Int4 gapped_start
Where the gapped extension started.
Int2 frame
Translation frame.
Int4 offset
Start of hsp.
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
EBlastEncoding encoding
Encoding of sequence, i.e.
Boolean check_oid_exclusion
Check whether an OID is excluded due to overlapping filtering.
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Complete type definition of Blast Sequence Source ADT.
Int4 cutoff_score
Cutoff score for saving ungapped hits.
Structure to hold the Karlin-Altschul parameters.
double paramC
for use in seed.
double Lambda
Lambda value used in statistics.
double logK
natural log of K value used in statistics
Edit script: linked list of correspondencies between two sequences.
Int4 * num
Array of number of operations.
Int4 size
Size of above arrays.
EGapAlignOpType * op_type
Array of type of operation.
Keeps prelim_hitlist_size and HitSavingOptions together, mostly for use by hspstream.
Int4 prelim_hitlist_size
number of hits saved during preliminary part of search.
int ** data
actual scoring matrix data, stored in row-major form
Information about target translations.
EBlastProgramType program_number
Program being run.
Int4 * range
start and stop of translated sequences.
const Uint1 * gen_code_string
Genetic code string for translation.
BLAST_SequenceBlk * subject_blk
target sequence being translated.
Uint1 ** translations
two dimensional array for translations.
Boolean partial
specifies that nucleotide sequence is too long to translated.
Auxiliary structure for sorting HSPs.
BlastHSP * hsp
HSP described by this structure.
BlastHSPList * hsplist
The HSPList to which this HSP belongs.
In PHI BLAST: information about pattern match in a given HSP.
Int4 index
Index of query pattern occurrence for this HSP.
Int4 offset
Starting offset of this pattern occurrence.
Structure containing all auxiliary information needed in a pattern search.
Int4 num_patterns_db
Number of patterns actually found during the database search.
In PHI BLAST, structure containing information about all pattern occurrences in query.
Int4 num_patterns
Number of pattern occurrences in query.
SPHIPatternInfo * occurrences
Array of pattern occurrence information structures.
Structure to save short unaligned subsequences outside an HSP.
Uint1 * left
Left subsequence.
Uint1 * right
Rught subsequence.
Int4 right_len
Length of the right subsequence.
Int4 left_len
Length of the left subsequence.
static CS_CONTEXT * context
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4