chain->
score;
69 if(!list || !chain) {
88 if(check_for_duplicates &&
103 if(check_for_duplicates &&
125 Int4cutoff_edit_dist)
131 if(chain->
score>= cutoff_score) {
134 Int4num_identical = 0;
137 if(cutoff_edit_dist < 0) {
141 for(; h; h = h->
next) {
152 if(align_len - num_identical <= cutoff_edit_dist) {
168 Int4list_score, chain_score;
170 if(!list || !chain) {
183 if(list_score > chain_score) {
187 else if(list_score < chain_score) {
198 if(check_for_duplicates &&
213 if(check_for_duplicates &&
239 Int4cutoff_score,
Int4cutoff_edit_dist,
245 if(!list || !chain) {
255 if(cutoff_score <= 0) {
258 else if(ch->
score>= cutoff_score) {
286best_score = list->
score;
288 while(ch->
next&& best_score - ch->
next->
score<= margin) {
331chain = chain->
next;
359chain = chain->
next;
373chain = chain->
next;
374 for(; chain; chain = chain->
next,
prev=
prev->next) {
461 returnlength * seq_error;
464 returnopen_score +
MIN(length, 4) * extend_score;
469 Int4gap_open_score,
Int4gap_extend_score)
474 const Int4kGap = 15;
475 Int4num_identical = 0;
477 Int4subject_gap = 0;
482 ASSERT(num_matches >= 0);
484score += num_matches;
485num_identical += num_matches;
491 if(subject_gap > 0) {
506score += mismatch_score;
509 if(subject_gap > 0) {
520 if(subject_gap > 0) {
529score += hsp->
query.
end- last_pos;
530num_identical += hsp->
query.
end- last_pos;
542 Int4overlap_f, overlap_s;
548 if((
a->query.offset <=
b->query.offset &&
a->query.end >=
b->query.end) ||
549(
a->query.offset >=
b->query.offset &&
a->query.end <=
b->query.end)) {
551 return MIN(
a->score,
b->score);
555 if((
a->query.end <
b->query.offset &&
a->query.offset <
b->query.end) ||
556(
b->query.end <
a->query.offset &&
b->query.offset <
a->query.end)) {
565 if(
a->query.offset <=
b->query.offset) {
575overlap_f = overlap_s =
f->query.end - s->
query.
offset;
576 ASSERT(overlap_f >= 0 && overlap_s >= 0);
578 for(
i= 0;
i<
f->map_info->edits->num_edits;
i++) {
579 if(
f->map_info->edits->edits[
i].query_pos >= s->
query.
offset) {
580overlap_f -= edit_penalty;
585overlap_s -= edit_penalty;
589 return MIN(overlap_f, overlap_s);
603 if(!chain || !score_options) {
608 if(comp_hsp_score) {
622 if(comp_hsp_score) {
657 const Int4kGap = 15;
719num_matches = hsp->
query.
end- last_pos - 1;
743 Int4gap_open_score,
Int4gap_extend_score,
744 const Uint1* query_seq)
748 Int4d = is_start ? 1 : -1;
750 Int4delta_query = 0, delta_subject = 0;
751 Booleanis_subject = !is_query;
754 const Uint1kGap = 15;
771 while(
i!= end && num_left > 0) {
776delta_query += num_left;
777delta_subject += num_left;
794delta_subject += num_left;
813delta_query += num_left;
835 if(is_start &&
i> 0) {
865overhangs->
left= realloc(overhangs->
left,
866(overhangs->
left_len+ delta_subject) *
868 if(!overhangs->
left) {
871subject_bases = overhangs->
left+ overhangs->
left_len;
872overhangs->
left_len+= delta_subject;
874memcpy(subject_bases, query_seq + hsp->
query.
offset, delta_subject);
906subject_bases = overhangs->
right;
908memcpy(subject_bases, query_seq + hsp->
query.
end- delta_subject,
913 while(k < hsp->map_info->edits->num_edits &&
979 const Uint1kGap = 15;
985 while(k < hsp->map_info->edits->num_edits &&
1025 #define NUM_ADAPTERS 4 1026 #define MAX_ADAPTER_LEN 20 1037{0, 2, 0, 3, 1, 2, 2, 0, 0, 2, 0, 2},
1039{0, 3, 2, 2, 0, 0, 3, 3, 1, 3, 1, 2},
1041{1, 3, 2, 3, 1, 3, 1, 3, 3, 0, 3, 0},
1043{2, 0, 3, 1, 2, 2, 0, 0, 2, 0, 2, 1, 0, 1, 0, 1, 2, 3, 1, 3}};
1047 Int4adapter_start = -1;
1049 Int4from = hsp_from, to = hsp_to;
1050 const Int4kMaxErrors = 1;
1058 for(adptr_idx = 0;!found && adptr_idx <
NUM_ADAPTERS;adptr_idx++) {
1059 Uint1* adapter = adapters_tab[adptr_idx];
1061 Int4q =
MAX(to - lengths[adptr_idx], from);
1063 while(q < query_len - 4 && q +
i< query_len &&
i< lengths[adptr_idx]) {
1064 while(q < query_len - 4 && *(
Uint4*)(
query+ q) != word) {
1067 if(q < query_len - 4) {
1068 Int4errors = kMaxErrors + 1;
1070 while(q +
i< query_len &&
i< lengths[adptr_idx] &&
1073 if(
query[q +
i] != adapter[
i]) {
1078 if(q +
i== query_len ||
i== lengths[adptr_idx]) {
1089 ASSERT(adapter_start <= query_len);
1090 returnadapter_start;
1102 const Int4kMinAdapterLen = 3;
1104 if(!chains_ptr || !*chains_ptr || adapter_pos < 0) {
1109chain = *chains_ptr;
1123 if(query_len - h->
hsp->
query.
end< kMinAdapterLen) {
1124chain = chain->
next;
1132chain = chain->
next;
1138chain->
adapter= adapter_pos;
1150 while(
prev&&
prev->next != chain) {
1168 while(h && h->
hsp->
query.
end<= adapter_pos) {
1194 while(hh && hh->
next!= h) {
1216 Int4pos_minus = query_len - adapter_pos - 1;
1221 while(h && h->
hsp->
query.
end<= pos_minus + 5) {
1230 while(
prev&&
prev->next != chain) {
1249 if(h != chain->
hsps) {
1251 while(hh && hh->
next!= h) {
1281chain = chain->
next;
1285*chains_ptr =
head;
1298 for(query_idx = 0;query_idx < query_info->
num_queries;query_idx++) {
1303 Int4from = -1, to = -1;
1306 Int4adapter_pos = -1;
1308 if(!saved[query_idx]) {
1319chain = saved[query_idx];
1321 for(; ch; ch = ch->
next) {
1345 ASSERT(from >= 0 && to >= 0);
1349 if(from < 20 && to > query_len - 3) {
1354chain = saved[query_idx];
1356 for(; ch; ch = ch->
next) {
1362 if(query_len - h->
hsp->
query.
end> overhang) {
1392from = query_len - hsp->
query.
end;
1397 if(to >= query_len - 3) {
1406 if(adapter_pos >= 0) {
1420 const Uint1kBaseA = 0;
1423 const Int4kMaxErrors = 3;
1431 while(
i>= 0 && err < kMaxErrors) {
1432 if(sequence[
i] != kBaseA) {
1441 while(
i< length - 1 &&
1442(sequence[
i] != kBaseA || sequence[
i+ 1] != kBaseA)) {
1444 if(sequence[
i] != kBaseA) {
1450num_a = length -
i- err;
1453 if(num_a < 3 || (num_a < 5 && err > 0)) {
1463 Int4negative_start,
Int4query_len)
1471 for(ch = chains; ch; ch = ch->
next) {
1481 if((h->
hsp->
query.
frame< 0 && negative_start >= 0) ||
1507 for(query_idx = 0;query_idx < query_info->
num_queries;query_idx++) {
1512 Int4from = -1, to = -1;
1513 Int4positive_start, negative_start;
1516 if(!saved[query_idx] || saved[query_idx]->adapter >= 0) {
1523chain = saved[query_idx];
1525 for(; ch; ch = ch->
next) {
1549 ASSERT(from >= 0 && to >= 0);
1553 if(from < 4 && to > query_len - 3) {
1568 if(positive_start >= 0 || negative_start >= 0) {
1569 s_SetPolyATail(saved[query_idx], positive_start, negative_start,
1588 Int4mismatches = 0;
1593 const Uint1kGap = 15;
1595 if(!
first|| !second || !
query|| !score_opts) {
1607 if(query_gap < 0 || subject_gap < 0) {
1612 if(
MAX(query_gap, subject_gap) < 4) {
1613mismatches =
MIN(query_gap, subject_gap);
1614query_gap -= mismatches;
1615subject_gap -= mismatches;
1620edits_size =
first->map_info->edits->num_edits +
1622mismatches + query_gap + subject_gap;
1647 if(mismatches > 0) {
1665 if(query_gap > 0) {
1683 if(subject_gap > 0) {
1732 if(mismatches > 0) {
1733 for(k = 0;k < mismatches;k++) {
1750 edit->subject_base =
1754 edit->subject_base =
edit->query_base;
1763 if(query_gap > 0) {
1764 for(k = 0;k < query_gap;k++) {
1769 edit->query_pos = merged_hsp->
query.
end+ mismatches;
1772 edit->query_base = kGap;
1780 edit->subject_base =
1784 edit->subject_base = 0;
1793 if(subject_gap > 0) {
1794 for(k = 0;k < subject_gap;k++) {
1798 edit->query_pos = merged_hsp->
query.
end+ mismatches + k;
1802 edit->subject_base = kGap;
1853new_right_len *
sizeof(
Uint1));
1857new_right_len *
sizeof(
Uint1));
1871 for(query_idx = 0;query_idx + 1 < query_info->
num_queries; query_idx++) {
1872 HSPChain* chain = saved[query_idx];
1873 HSPChain* thepair = saved[query_idx + 1];
1876 if(!chain || !thepair) {
1895 if(chain->
oid== thepair->
oid) {
1906chain->
pair= thepair;
1907thepair->
pair= chain;
1923 if(
a->score <
b->score) {
1926 else if(
a->score >
b->score) {
1941 Int4array_size = 10;
1955 for(query_idx = 0; query_idx < num_queries; query_idx++) {
1956 HSPChain* chain = saved[query_idx];
1958 Int4best_score = 0;
1960 if(!chain || !chain->
next) {
1965 for(chain = saved[query_idx]; chain; chain = chain->
next) {
1966 if(chain->
score> best_score) {
1967best_score = chain->
score;
1972chain = saved[query_idx];
1976 if(score < best_score) {
1987saved[query_idx] =
next;
1992chain = chain->
next;
2000 for(query_idx = 0; query_idx < num_queries; query_idx++) {
2001 HSPChain* chain = saved[query_idx];
2003 Int4best_score = 0;
2004 Int4best_pair_score = 0;
2007 Int4num_chains = 0;
2013 if(!chain->
next) {
2019 for(chain = saved[query_idx]; chain; chain = chain->
next) {
2021 if(score >= best_score) {
2032 if(num_chains >= array_size) {
2039 array[num_chains++] = chain;
2050 for(
i= 0;
i< num_chains;
i++) {
2056 while(k < num_chains &&
array[k]->score ==
array[
i]->score) {
2062 for(;
i< k;
i++) {
2071chain = saved[query_idx];
2081 if((!chain->
pair&& score < best_score) ||
2095saved[query_idx] =
next;
2100chain = chain->
next;
2119 if(
a->oid >
b->oid) {
2122 else if(
a->oid <
b->oid) {
2125 else if(
a->hsps->hsp->subject.offset >
b->hsps->hsp->subject.offset) {
2128 else if(
a->hsps->hsp->subject.offset <
b->hsps->hsp->subject.offset) {
2138 int(*comp)(
const void*,
const void*))
2141 Int4array_size = 50;
2144 if(!saved || num_queries < 0) {
2154 for(
i= 0;
i< num_queries;
i++) {
2156 Int4num_chains = 0;
2164 for(; chain; chain = chain->
next) {
2165 if(num_chains >= array_size) {
2167chain_array = realloc(chain_array, array_size *
2175chain_array[num_chains++] = chain;
2178 if(num_chains > 1) {
2180qsort(chain_array, num_chains,
sizeof(
HSPChain*), comp);
2183 for(k = 0;k < num_chains - 1;k++) {
2184chain_array[k]->
next= chain_array[k + 1];
2186chain_array[num_chains - 1]->
next=
NULL;
2188saved[
i] = chain_array[0];
2220 for(; h; h = h->
next) {
2226memcpy(new_chain, chain,
sizeof(
HSPChain));
2231new_chain->
hsps= h;
2237 head->next = new_chain;
2247 for(; chain; chain = chain->
next) {
2262 Int4cutoff_edit_distance)
2266 while(chain && !
s_TestCutoffs(chain, cutoff_score, cutoff_edit_distance)
2273*chains_ptr = chain;
2276 while(chain && chain->
next) {
2285chain = chain->
next;
2301 Int4cutoff_edit_dist)
2304 const Int4kPairBonus = 21;
2307 if(!getenv(
"MAPPER_NO_PRUNNING")) {
2327 if(getenv(
"MAPPER_NO_OVERLAPPED_HSP_MERGE")) {
2328 for(query_idx = 0; query_idx < query_info->
num_queries; query_idx++) {
2329 HSPChain* chain = saved[query_idx];
2337 for(; chain; chain = chain->
next) {
2346 for(query_idx = 0; query_idx < query_info->
num_queries; query_idx++) {
2347 s_FilterChains(&saved[query_idx], cutoff_score, cutoff_edit_dist);
2354 for(query_idx = 0; query_idx < query_info->
num_queries; query_idx++) {
2355 HSPChain* chain = saved[query_idx];
2357 Int4num_unique = 1;
2363chain = chain->
next;
2364 for(; chain; chain = chain->
next,
prev=
prev->next) {
2365 if(
prev->oid != chain->
oid||
2373 for(chain = saved[query_idx]; chain; chain = chain->
next) {
2374chain->
count= num_unique;
2379 for(query_idx = 0; query_idx < query_info->
num_queries; query_idx++) {
2380 HSPChain* chain = saved[query_idx];
2381 for(; chain; chain = chain->
next) {
2383 for(; h; h = h->
next) {
2391 results->chain_array = saved;
2435 if(!dest || !
source) {
2439 for(
i= 0;
i< num;
i++) {
2452 #define MAX_NUM_HSP_PATHS 40 2465 if(path->
start) {
2482 if(!retval->
start) {
2491 #define NUM_SIGNALS 23 2492 #define NUM_SIGNALS_CONSENSUS 2 2538 const Uint1kGap = 15;
2550 if((
first->map_info->edits->num_edits > 0 &&
2551 first->map_info->edits->edits[
2552 first->map_info->edits->num_edits - 1].query_pos >=
2560 Int4num_second = 0;
2563 for(
i=
first->map_info->edits->num_edits - 1;
i>= 0;
i--) {
2565 if(edits[
i].query_pos < second->
query.offset) {
2573 if(edits[
i].query_pos >=
first->query.end) {
2579 if(num_first > num_second) {
2580 while(
first->map_info->edits->num_edits > 0 &&
2581 first->map_info->edits->edits[
2582 first->map_info->edits->num_edits - 1].query_pos >=
2587 Int4num_edits =
first->map_info->edits->num_edits;
2590 if(edits[num_edits - 1].query_pos >=
first->query.end - 1) {
2591 if(edits[num_edits - 1].subject_base != kGap) {
2596 else if(edits[num_edits - 1].query_pos == query_len - 2 &&
2597edits[num_edits - 1].subject_base == kGap) {
2599edge = (edge << 2) |
query[query_len - 1];
2602 if(edits[num_edits - 1].subject_base != kGap &&
2603edits[num_edits - 1].query_base != kGap) {
2606 query[edits[num_edits - 1].query_pos + 1];
2608 else if(edits[num_edits - 1].subject_base == kGap) {
2610 query[edits[num_edits - 1].query_pos + 2];
2614 query[edits[num_edits - 1].query_pos];
2618trim_by =
first->query.end - edits[
2622 if(edits[num_edits - 1].query_base == kGap) {
2630 first->map_info->right_edge = edge;
2633 else if(num_second > num_first) {
2636 first->query.end) {
2642 if(edits[0].query_pos == 0) {
2643 if(edits[0].subject_base != kGap) {
2648 else if(edits[0].query_pos == 1 &&
2649edits[0].subject_base == kGap) {
2651edge = (edge << 2) |
query[0];
2656edits[0].subject_base;
2658 if(edits[0].subject_base == kGap) {
2660 query[edits[0].query_pos - 1];
2667 if(edits[0].query_base == kGap) {
2694 if((
first->map_info->edits->num_edits > 0 &&
2695 first->map_info->edits->edits[
2696 first->map_info->edits->num_edits - 1].query_pos >=
2717overlap_len *
sizeof(
Uint1));
2719 subject[2 + overlap_len] = (
first->map_info->right_edge & 0xf) >> 2;
2720 subject[2 + overlap_len + 1] =
first->map_info->right_edge & 3;
2723 for(k = 0;k < num_signals;k++) {
2725 for(
i= 0;
i<= overlap_len && !found;
i++) {
2731 if(seq == signals[k]) {
2733 first->query.end -= d;
2734 first->subject.end -= d;
2735 first->gap_info->num[
first->gap_info->size - 1] -= d;
2737 first->num_ident -= d;
2738 first->map_info->right_edge = (seq & 0xf0) >> 4;
2745second->
score-= d;
2752 first->subject.end -
first->subject.offset);
2797 Int4subject_from,
Int4subject_to,
2809 Int4query_gap = query_to - query_from + 1;
2811 Int4subject_gap = subject_to - subject_from + 1;
2813 Int4query_ext_len, subject_ext_len;
2814 Int4ungapped_ext_len;
2818 JUMPjumper_mismatch[] = {{1, 1, 0, 0}};
2820 JUMPjumper_insertion[] = {{1, 1, 2, 0},
2824 JUMPjumper_deletion[] = {{1, 1, 2, 0},
2850 for(
i= 0;
i< o_len;
i++, k-=2) {
2866 if(!gap_align->
jumper) {
2873 switch(
SIGN(query_gap - subject_gap)) {
2874 case0: jumper_table = jumper_mismatch;
2877 case-1: jumper_table = jumper_deletion;
2880 case1: jumper_table = jumper_insertion;
2895query_gap, subject_gap,
2898&query_ext_len, &subject_ext_len,
2905 ASSERT(query_ext_len <= query_gap);
2906 ASSERT(subject_ext_len <= subject_gap);
2911 while(query_ext_len < query_gap) {
2918 while(subject_ext_len < subject_gap) {
2924 ASSERT(query_ext_len == query_gap);
2925 ASSERT(subject_ext_len == subject_gap);
2951gap_align->
query_stop= query_from + query_ext_len;
2953gap_align->
subject_stop= subject_from + subject_ext_len;
2982hsp->
query.
end+= query_ext_len;
3015 Int4first_len, second_len;
3032 if(!
first|| !second) {
3036 if(!
first->map_info->subject_overhangs ||
3037!
first->map_info->subject_overhangs->right ||
3048 if(query_gap >
first->map_info->subject_overhangs->right_len - 2 ||
3053first_len =
first->map_info->subject_overhangs->right_len;
3058 for(q = 0; !found && q < 4;q++) {
3060 if(
first->query.end - q <= first->
query.offset) {
3071seq = (
first->map_info->subject_overhangs->right[0] << 6) |
3072(
first->map_info->subject_overhangs->right[1] << 4);
3075seq = (
query[
first->query.end - 1] << 6) |
3076(
first->map_info->subject_overhangs->right[0] << 4);
3079seq = (
query[
first->query.end - q] << 6) |
3086 if(seq != (signals[k] & 0xf0)) {
3097 for(
i=
MAX(start - 1, 0);
i<=
MIN(start + 1, second_len - 2);
i++) {
3103 if(seq == signals[k]) {
3104 Int4subject_gap = second_len - (
i+ 2) + q;
3105 if(query_gap - subject_gap < -1 ||
3106query_gap - subject_gap > 1) {
3142 for(q = 0; !found && q < 4;q++) {
3177 if(seq != (signals[k] & 0xf)) {
3183end = query_gap + q;
3186 for(
i=
MAX(end - 1, 0);
i<=
MIN(end + 1, first_len - 2);
i++) {
3188seq |= (
first->map_info->subject_overhangs->right[
i] << 6) |
3189(
first->map_info->subject_overhangs->right[
i+ 1] << 4);
3191 if(seq == signals[k]) {
3193 Int4subject_gap =
i- q;
3194 if(query_gap - subject_gap < -1 ||
3195query_gap - subject_gap > 1) {
3228 first->map_info->right_edge = (signal & 0xf0) >> 4;
3235 first->subject.end -
first->subject.offset);
3250 #define NUM_SINGLE_SIGNALS 8 3493h->
next= following;
3525 if(!chains || !query_blk || !query_info || !scoring_opts) {
3530 for(ch = chains; ch; ch = ch->
next) {
3573scoring_opts,
FALSE);
3597h->
next= following;
3616consensus_only =
FALSE;
3620query_len, consensus_only);
3651query_len, scoring_opts);
3709 Int4longest_intron,
3716 Int4best_score = 0;
3717 const Int4kMaxIntronLength = longest_intron;
3721 if(!path || !nodes || !num) {
3727 for(
i= num - 1;
i>= 0;
i--) {
3732 for(k =
i+ 1;k < num && is_spliced;k++) {
3736 Int4new_score = nodes[k].
best_score+ self_score - overlap_cost;
3739 const Int4hsp_len = hsp->query.end - hsp->query.offset;
3744 const Int4subj_overlap_len =
3751 if(newhsp->
query.
offset> hsp->query.offset &&
3752newhsp->
query.
end> hsp->query.end &&
3756newhsp->
subject.
offset- hsp->subject.end < kMaxIntronLength &&
3757(
double)overlap_len / hsp_len < 0.75 &&
3758(
double)overlap_len / newhsp_len < 0.75 &&
3759(
double)subj_overlap_len / hsp_len < 0.75 &&
3760(
double)subj_overlap_len / newhsp_len < 0.75) {
3779new_score += chain->
score+ overlap_cost -
3780(newhsp->
score+ self_score);
3784 else if(newhsp->
query.
offset- hsp->query.end == 1) {
3795 if(new_score > nodes[
i].best_score) {
3804 if(nodes[
i].best_score == best_score) {
3809 else if(nodes[
i].best_score > best_score) {
3811path->
start[0] = &(nodes[
i]);
3822 if(!path->
start[
i]) {
3827 if(!path->
start[k]) {
3834 if(node_i->
hsp[0] == node_k->
hsp[0]) {
3876node = path->
start[
i];
3881 if(path->
score< cutoff_score) {
4042 if(
a->score >
b->score) {
4045 if(
a->score <
b->score) {
4049 if(
a->conf <
b->conf) {
4052 if(
a->conf >
b->conf) {
4056 if(
a->distance <
b->distance) {
4059 if(
a->distance >
b->distance) {
4069 Int4mismatch_score,
4070 Int4gap_open_score,
4071 Int4gap_extend_score,
4084 if(!chain || subj_pos < 0) {
4117old_score = hsp->
score;
4119gap_extend_score,
query);
4145 Int4mismatch_score,
4146 Int4gap_open_score,
Int4gap_extend_score,
4158 if(!chain || subj_pos <= 0 || !query_blk || !query_info) {
4196old_score = hsp->
score;
4198gap_extend_score,
query);
4209 if(chain->
hsps!= h) {
4211 while(hc && hc->
next!= h) {
4231 Int4* max_num_pairs,
4239 Pairinfo* pair_info = *pair_info_ptr;
4240 Int4conv_bonus = 0;
4243 const Int4kMaxInsertSize = is_spliced ?
4250 for(second = *second_list; second; second = second->
next) {
4251 Int2first_frame =
first->hsps->hsp->query.frame;
4255 if(num_pairs >= *max_num_pairs) {
4256*max_num_pairs *= 2;
4258realloc(pair_info, *max_num_pairs *
sizeof(
Pairinfo));
4259 if(!new_pair_info) {
4262pair_info = new_pair_info;
4263*pair_info_ptr = new_pair_info;
4268pair_info[num_pairs].
second= second;
4273pair_info[num_pairs].
distance= 0;
4276 ASSERT(first_frame != 0 && second_frame != 0);
4277 if(
SIGN(first_frame) !=
SIGN(second_frame)) {
4279 Int4plus_start, minus_start;
4285 if(first_frame > 0) {
4291 if(second_frame > 0) {
4303plus_start =
plus->hsps->hsp->subject.offset;
4305 while(hsp->
next) {
4312distance = minus_start - plus_start;
4313pair_info[num_pairs].
distance= distance;
4316 if(distance > 0 && distance < kMaxInsertSize) {
4317 Int4plus_end, minus_end;
4319 while(hsp->
next) {
4324minus_end =
minus->hsps->hsp->subject.offset;
4330 if(plus_end > minus_start || minus_end < plus_start) {
4334 if(plus_end > minus_start) {
4335 ASSERT(plus_end - minus_start > 0);
4342pair_info[num_pairs].
score-=
4343plus_end - minus_start;
4349 if(
plus== pair_info[num_pairs].
first) {
4350pair_info[num_pairs].
trim_first= minus_start;
4358 if(minus_end < plus_start) {
4359 ASSERT(plus_start - minus_end > 0);
4361pair_info[num_pairs].
score-=
4362plus_start - minus_end;
4364 if(
minus== pair_info[num_pairs].
first) {
4365pair_info[num_pairs].
trim_first= plus_start;
4378pair_info[num_pairs].
score+= conv_bonus;
4386 if(pair_info[num_pairs].score < min_score) {
4395pair_info[num_pairs].
score-= 1;
4401 if(num_pairs > 0) {
4412 for(
i=0;
i< num_pairs;
i++) {
4425convergent_found =
TRUE;
4430 if(best_score - pair_info[
i].score > margin) {
4444 for(ch = *first_list; ch; ch = ch->
next) {
4452 while(
i< num_pairs &&
4453(pair_info[
i].
first!= ch || !pair_info[
i].second->
pair||
4455best_score - pair_info[
i].
score> margin)) {
4458 if(
i>= num_pairs) {
4466pair_info[
i].
second= pair;
4474 for(ch = *second_list; ch; ch = ch->
next) {
4482 while(
i< num_pairs &&
4483(pair_info[
i].second != ch || !pair_info[
i].
first->pair ||
4485best_score - pair_info[
i].
score> margin)) {
4488 if(
i>= num_pairs) {
4496pair_info[
i].
first= pair;
4505 for(
i= 0;
i< num_pairs;
i++) {
4506 if(!pair_info[
i].valid_pair ||
4513 if(pair_info[
i].trim_first > 0) {
4514 if(pair_info[
i].
first->hsps->hsp->query.frame > 0) {
4516pair_info[
i].trim_first,
4529pair_info[
i].trim_first,
4540 if(pair_info[
i].trim_second > 0) {
4543pair_info[
i].trim_second,
4552pair_info[
i].trim_second,
4580 for(chain = saved[
i]; chain; chain = chain->
next) {
4583 if(chain->
oid!= oid) {
4587 if(chain->
score< 30) {
4647 const Int4kDefaultMaxHsps = 1000;
4648 Int4max_hsps = kDefaultMaxHsps;
4658 Int4workspace_size = 40;
4662 const Int4kPairBonus = is_spliced ? 21 : 5;
4669 if(!params || !nodes) {
4690chain_array[0] =
NULL;
4691chain_array[1] =
NULL;
4692 while(i < hsp_list->hspcnt) {
4700 Int4context_next_fragment = has_pair ? (query_idx + 2) *
NUM_STRANDS:
4706 while(i < hsp_list->hspcnt &&
4707hsp_array[
i]->
context< context_next_fragment) {
4709memset(nodes, 0, num_hsps *
sizeof(
HSPNode));
4714 while(i < hsp_list->hspcnt && hsp_array[
i]->
context==
context) {
4726 if(num_hsps >= max_hsps) {
4749nodes[num_hsps].
hsp= &(hsp_array[
i]);
4758 if(cutoff_score_fun[1] != 0) {
4760cutoff_score = (cutoff_score_fun[0] +
4761cutoff_score_fun[1] * query_len) / 100;
4771hsp_list->
oid, is_spliced,
4772kLongestIntron, cutoff_score,
4773query_blk, query_info, scoring_opts);
4778&new_chains, cutoff_score, cutoff_edit_dist,
4783 while(i < hsp_list->hspcnt && hsp_array[
i]->
context==
context) {
4793 if(is_spliced && chain_array[0]) {
4798 if(is_spliced && chain_array[1]) {
4803 first= chain_array[0];
4804second = chain_array[1];
4809 if(
first&& second) {
4812&workspace_size, is_spliced, scoring_opts,
4813query_blk, query_info);
4823cutoff_edit_dist,
TRUE);
4828 ASSERT(!saved_chains[query_idx] ||
4834cutoff_score, cutoff_edit_dist,
TRUE);
4839 ASSERT(!saved_chains[query_idx + 1] ||
4845chain_array[0] = chain_array[1] =
NULL;
4898 data->params = params;
4899 data->query_info = query_info;
4915 if(hit_options ==
NULL)
4952writer_info->
params= params;
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
BlastGapAlignStruct * BLAST_GapAlignStructFree(BlastGapAlignStruct *gap_align)
Deallocates memory in the BlastGapAlignStruct structure.
Structures and API used for saving BLAST hits.
BlastHSP * Blast_HSPClone(const BlastHSP *hsp)
Make a deep copy of an HSP.
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
#define MAGICBLAST_MAX_INSERT_SIZE_NONSPLICED
#define MAGICBLAST_MAX_INSERT_SIZE_SPLICED
Default maximum insert size: distance on the subject between reads that belong to a pair,...
@ eFirstSegment
The first sequence of a pair with both sequences read and accepted.
Various auxiliary BLAST utility functions.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
EGapAlignOpType
Operation types within the edit script.
@ eGapAlignIns
Insertion: a gap in subject.
@ eGapAlignSub
Substitution.
@ eGapAlignDel
Deletion: a gap in query.
GapEditScript * GapEditScriptDelete(GapEditScript *esp)
Free edit script structure.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
static BlastHSP * s_MergeHSPs(const BlastHSP *first, const BlastHSP *second, const Uint1 *query, const ScoringOptions *score_opts)
static Int4 HSPChainListTrim(HSPChain *list, Int4 margin)
static Int4 s_HSPChainListInsertOne(HSPChain **list, HSPChain *chain, Boolean check_for_duplicates)
static Int4 s_HSPChainListInsertOne_OLD(HSPChain **list, HSPChain *chain, Boolean check_for_duplicates)
static Int4 s_TrimOverlap(BlastHSP *first, BlastHSP *second, const Uint1 *query)
static Boolean s_TestCutoffs(HSPChain *chain, Int4 cutoff_score, Int4 cutoff_edit_dist)
static Int4 s_ComputeGapScore(Int4 length, Int4 open_score, Int4 extend_score, Int4 seq_error)
static Int4 s_FindPolyATails(HSPChain **saved, const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info)
static Int4 s_FindSpliceJunctionsForOverlaps(BlastHSP *first, BlastHSP *second, Uint1 *query, Int4 query_len, Boolean consensus_only)
static Boolean s_TestHSPRanges(const BlastHSP *hsp)
#define MAX_NUM_HSP_PATHS
static void s_ExtendAlignmentCleanup(Uint1 *subject, BlastGapAlignStruct *gap_align, GapEditScript *edit_script, JumperEditsBlock *edits)
static int s_CompareHSPsByContextScore(const void *v1, const void *v2)
static Int2 s_SetAdapter(HSPChain **chains_ptr, Int4 adapter_pos, const Uint1 *query, Int4 query_len, const ScoringOptions *scores)
static Int4 s_SetPolyATail(HSPChain *chains, Int4 positive_start, Int4 negative_start, Int4 query_len)
static int s_FindRearrangedPairs(HSPChain **saved, const BlastQueryInfo *query_info)
static Int4 s_GetChainScore(const HSPChain *chain)
HSPChain * FindPartialyCoveredQueries(void *data, Int4 oid, Int4 word_size)
Find HSP chains that do not cover full extend of queries for a given subject.
BlastHSPWriterInfo * BlastHSPMapperInfoNew(BlastHSPMapperParams *params)
WriterInfo to create a default writer: the collecter.
static Int4 s_ExtendAlignment(BlastHSP *hsp, const Uint1 *query, Int4 query_from, Int4 query_to, Int4 subject_from, Int4 subject_to, const ScoringOptions *score_options, Boolean is_left)
#define NUM_SIGNALS_CONSENSUS
static HSPPath * HSPPathFree(HSPPath *path)
static BlastHSPWriter * s_BlastHSPMapperFree(BlastHSPWriter *writer)
Free the writer.
static Int4 s_TrimHSP(BlastHSP *hsp, Int4 num, Boolean is_query, Boolean is_start, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, const Uint1 *query_seq)
static BlastHSPWriter * s_BlastHSPMapperPairedNew(void *params, BlastQueryInfo *query_info, BLAST_SequenceBlk *query)
create the writer
static Int4 s_GetOverlapCost(const BlastHSP *a, const BlastHSP *b, Int4 edit_penalty)
static Boolean s_TestChains(HSPChain *chain)
static int s_BlastHSPMapperFinal(void *data, void *mapping_results)
Perform post-run clean-ups.
static int s_FilterChains(HSPChain **chains_ptr, Int4 cutoff_score, Int4 cutoff_edit_distance)
static Int4 s_ComputeAlignmentScore(BlastHSP *hsp, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score)
static Int4 s_FindFragmentStart(HSPChain *chain)
BlastHSPMapperParams * BlastHSPMapperParamsNew(const BlastHitSavingOptions *hit_options, const BlastScoringOptions *scoring_options)
The following are exported functions to be used by APP.
static Int4 s_FindSpliceJunctions(HSPChain *chains, const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const ScoringOptions *scoring_opts)
static Int4 s_IntronToGap(HSPContainer *h, HSPContainer *next, const Uint1 *query, const ScoringOptions *scoring_opts)
static Int4 s_ComputeChainScore(HSPChain *chain, const ScoringOptions *score_options, Int4 query_len, Boolean comp_hsp_score)
static int s_CompareChainsByOid(const void *cha, const void *chb)
struct BlastHSPMapperData BlastHSPMapperData
Data structure used by the writer.
static int s_CompareHSPsByContextSubjectOffset(const void *v1, const void *v2)
static int s_HSPNodeArrayCopy(HSPNode *dest, HSPNode *source, Int4 num)
static Int4 s_FindSpliceJunctionsForGap(BlastHSP *first, BlastHSP *second, Uint1 *query, Int4 query_len, const ScoringOptions *score_options)
static HSPChain * s_FindBestPath(HSPNode *nodes, Int4 num, HSPPath *path, Int4 oid, Boolean is_spliced, Int4 longest_intron, Int4 cutoff_score, const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const ScoringOptions *scoring_opts)
static Int4 s_FindPolyAInSequence(Uint1 *sequence, Int4 length)
static int s_Finalize(HSPChain **saved, BlastMappingResults *results, const BlastQueryInfo *query_info, const BLAST_SequenceBlk *query_blk, const ScoringOptions *score_opts, Boolean is_paired, Int4 cutoff_score, Int4 cutoff_edit_dist)
static Boolean s_FindBestPairs(HSPChain **first_list, HSPChain **second_list, Int4 min_score, Pairinfo **pair_info_ptr, Int4 *max_num_pairs, Boolean is_spliced, const ScoringOptions *scoring_options, const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info)
static Int4 s_SortChains(HSPChain **saved, Int4 num_queries, int(*comp)(const void *, const void *))
static int s_RemoveOverlaps(HSPChain *chain, const ScoringOptions *score_opts, Int4 query_len)
static HSPPath * HSPPathNew(void)
static int s_BlastHSPMapperPairedInit(void *data, void *results)
The following are implementations for BlastHSPWriter ADT.
static Int4 HSPChainListInsert(HSPChain **list, HSPChain **chain, Int4 cutoff_score, Int4 cutoff_edit_dist, Boolean check_for_duplicates)
static int s_CompareChainsByScore(const void *cha, const void *chb)
static Boolean s_TestChainsSorted(HSPChain *chain)
static void s_BlastHSPMapperSplicedRunCleanUp(HSPPath *path, HSPNode *nodes, Pairinfo *pair_data)
static Int4 s_FindAdapterInSequence(Int4 hsp_from, Int4 hsp_to, Uint1 *query, Int4 query_len)
BlastHSPMapperParams * BlastHSPMapperParamsFree(BlastHSPMapperParams *opts)
Deallocates the BlastHSPMapperParams structure passed in.
static Int4 s_TrimChainStartToSubjPos(HSPChain *chain, Int4 subj_pos, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info)
static Int4 s_FindAdapters(HSPChain **saved, const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const ScoringOptions *score_opts)
static int s_ComparePairs(const void *v1, const void *v2)
static int s_PruneChains(HSPChain **saved, Int4 num_queries, Int4 pair_bonus)
static int s_BlastHSPMapperSplicedPairedRun(void *data, BlastHSPList *hsp_list)
Perform writing task for paired reads ownership of the HSP list and sets the dereferenced pointer to ...
static Int4 s_TrimChainEndToSubjPos(HSPChain *chain, Int4 subj_pos, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info)
Implementation of a number of BlastHSPWriters to save the best chain of RNA-Seq hits to a genome.
JumperEditsBlock * JumperEditsBlockFree(JumperEditsBlock *block)
JumperGapAlign * JumperGapAlignNew(Int4 size)
JumperEditsBlock * JumperEditsBlockCombine(JumperEditsBlock **block_ptr, JumperEditsBlock **append_ptr)
Int4 JumperPrelimEditBlockAdd(JumperPrelimEditBlock *block, JumperOpType op)
GapEditScript * GapEditScriptCombine(GapEditScript **edit_script_ptr, GapEditScript **append_ptr)
JumperEditsBlock * JumperFindEdits(const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align)
GapEditScript * JumperPrelimEditBlockToGapEditScript(JumperPrelimEditBlock *rev_prelim_block, JumperPrelimEditBlock *fwd_prelim_block)
Convert Jumper's preliminary edit script to GapEditScript.
Int4 GetCutoffScore(Int4 query_length)
Get alignment cutoff score for a given query length.
Int4 JumperExtendRightWithTraceback(const Uint1 *query, const Uint1 *subject, int query_length, int subject_length, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, Int4 *query_ext_len, Int4 *subject_ext_len, JumperPrelimEditBlock *edit_script, Int4 *num_identical, Boolean left_extension, Int4 *ungapped_ext_len, JUMP *jumper)
Right extension with traceback.
#define MAPPER_SPLICE_SIGNAL
if(yy_accept[yy_current_state])
const CharType(& source)[N]
#define MIN(a, b)
returns smaller of a and b.
#define SIGN(a)
return +1 for a > 0, -1 for a < 0
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ASSERT
macro for assert.
#define MAX(a, b)
returns larger of a and b.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
HSPChain * HSPChainFree(HSPChain *chain_list)
Deallocate a chain or list of chains.
HSPContainer * HSPContainerNew(BlastHSP **hsp)
Create HSPContainer and take ownership of the HSP.
HSPChain * HSPChainNew(Int4 context)
Allocate a chain.
HSPContainer * HSPContainerFree(HSPContainer *hc)
Free the list of HSPs, along with the stored HSPs.
HSPChain * CloneChain(const HSPChain *chain)
Clone a single HSP chain.
Structure to hold a sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Int4 query_length
Length of this query, strand or frame.
Int4 segment_flags
Flags describing segments for paired reads.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Structure supporting the gapped alignment.
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
JumperGapAlign * jumper
data for jumper alignment
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Data structure used by the writer.
BlastQueryInfo * query_info
information about queries
HSPChain ** saved_chains
HSP chains are stored here.
BLAST_SequenceBlk * query
query sequence
BlastHSPMapperParams * params
how many hits to save
Keeps prelim_hitlist_size and HitSavingOptions together.
Int4 cutoff_edit_dist
max edit distance to accept a chain alignment
Int4 hitlist_size
number of hits saved during preliminary part of search.
Boolean paired
mapping with paired reads
EBlastProgramType program
program type
Int4 cutoff_score
min score to accept a chain alignment
ScoringOptions scoring_options
scores for match, mismatch, and gap
Int4 cutoff_score_fun[2]
coefficients for cutoff score as a function of query length: x[0] + x[1] * length
Boolean splice
mapping spliced reads (RNA-seq to a genome)
Int4 longest_intron
max intron length
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
SequenceOverhangs * subject_overhangs
Unaligned subject subsequence.
The structure to contain all BLAST results, for multiple queries.
A wrap of data structure used to create a writer.
BlastHSPWriterNewFn NewFnPtr
ADT definition of BlastHSPWriter.
void * data
data structure
BlastHSPWriterFinalFn FinalFnPtr
BlastHSPWriterFreeFn FreeFnPtr
BlastHSPWriterRunFn RunFnPtr
BlastHSPWriterInitFn InitFnPtr
Structure holding all information about an HSP.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
Int4 context
Context number of query.
BlastSeg subject
Subject sequence info.
GapEditScript * gap_info
ALL gapped alignment is here.
Int4 score
This HSP's raw score.
BlastHSPMappingInfo * map_info
Options used when evaluating and saving hits These include: a.
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 longest_intron
The longest distance between HSPs allowed for combining via sum statistics with uneven gaps.
Int4 cutoff_score
The (raw) score cut-off threshold.
Boolean paired
Splice HSPs for each query (for mapping RNA-Seq to a genome)
Int4 hitlist_size
Maximal number of database sequences to return results for.
Int4 max_edit_distance
Maximum number of mismatches and gaps.
Int4 cutoff_score_fun[2]
Coefficients x100 for the raw score cut-off threshold as a function of query length: x[0] + x[1] * qu...
Structure that contains BLAST mapping results.
The query related information.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int2 penalty
Penalty for a mismatch.
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Int2 reward
Reward for a match.
Int2 frame
Translation frame.
Int4 offset
Start of hsp.
Edit script: linked list of correspondencies between two sequences.
Int4 * num
Array of number of operations.
Int4 size
Size of above arrays.
EGapAlignOpType * op_type
Array of type of operation.
A chain of HSPs: spliced alignment.
Int4 score
Alignment score for the chain.
Int4 count
Number of placements for the read.
HSPContainer * hsps
A list of HSPs that belong to this chain.
Int4 polyA
Position of detected PolyA sequence in the query.
Uint1 pair_conf
Pair configuration.
Int4 context
Contex number of query sequence.
struct HSPChain * next
Pointer to the next chain in a list.
struct HSPChain * pair
Pointer to mapped mate alignmemt (for paired reads)
Int4 adapter
Position of detected adapter sequence in the query.
struct HSPContainer * next
struct HSPNode * path_next
Uint1 query_base
Query base at this position.
Uint1 subject_base
Subject base at this position.
Int4 query_pos
Query position.
Alignment edit script for gapped alignment.
JumperPrelimEditBlock * left_prelim_block
JumperPrelimEditBlock * right_prelim_block
Structure to save short unaligned subsequences outside an HSP.
Uint1 * left
Left subsequence.
Uint1 * right
Rught subsequence.
Int4 right_len
Length of the right subsequence.
Int4 left_len
Length of the left subsequence.
static CS_CONTEXT * context
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4