: m_TemplateHandle(template_handle),
79m_WordSize(word_size),
80m_AllowedTotalMismatch(allowed_total_mismatch),
81m_Allowed3EndMismatch(allowed_3end_mismatch),
82m_MaxMismatch(max_mismatch),
84m_MismatchRegionLength3End(10),
86m_NumNonSpecificTarget(20),
87m_MaxTargetPerSequence(100)
90 if(!input_seqalign.
Get().empty()){
128m_FeatureScope(
NULL)
154 charmaster_gap_char,
155 charslave_gap_char) {
157 for(
int i= 0;
i< (
int)xcript.size();
i++){
158 if(xcript[
i] == master_gap_char) {
165 for(
int i= (
int)xcript.size() - 1;
i>= 0;
i--){
166 if(xcript[
i] == master_gap_char) {
173 if(master_start_gap == 0) {
175 if(xcript[
i] == slave_gap_char) {
182 if(master_end_gap == 0){
183 for(
int i= (
int)xcript.size() - 1;
i>= 0;
i--){
184 if(xcript[
i] == slave_gap_char) {
200 int& max_num_continuous_match,
204 intnum_continuous_match = 0;
205num_total_mismatch = 0;
206num_3end_mismatch = 0;
209max_num_continuous_match = 0;
217 stringmaster_string;
223 intmaster_letter_len = 0;
224 for(
int i=0;
i< (
int)master_string.size();
i++){
225 if(master_string[
i] != gap_char) {
226master_letter_len ++;
229 for(
int i=0;
i< (
int)master_string.size();
i++){
231 if(master_string[
i] == gap_char) {
232 if(is_left_primer) {
242}
else if(slave_string[
i] == gap_char) {
243 if(is_left_primer) {
254}
else if(master_string[
i]!=slave_string[
i]){
255 if(is_left_primer) {
257num_3end_mismatch ++;
261num_3end_mismatch ++;
264num_total_mismatch ++;
269 if(master_string[
i]== slave_string[
i]){
270num_continuous_match ++;
271 if(max_num_continuous_match < num_continuous_match) {
272max_num_continuous_match = num_continuous_match;
276num_continuous_match = 0;
281 if(master_string[0] == gap_char ||
282master_string[(
int)master_string.size() - 1] == gap_char ||
283slave_string[0] == gap_char ||
284slave_string[(
int)slave_string.size() - 1] == gap_char ||
298 bool& nw_align_modified) {
299nw_align_modified =
false;
309hit_full_start, hit_seq);
314 GetSeqData(hit_full_start, hit_full_stop + 1, hit_seq);
323aligner->SetScoreMatrix(
NULL);
325xcript = aligner->GetTranscriptString();
327den_ref = aligner->GetDense_seg(desired_align_range.
GetFrom(),
344 s_CountGaps(xcript, master_start_gap, master_end_gap, slave_start_gap, slave_end_gap,
'I',
'D');
345 if(slave_start_gap > 0 || slave_end_gap > 0) {
351new_hit_full_start =
max((
int)(hit_full_start - slave_start_gap), 0);
353GetBioseqLength() - 1);
355new_hit_full_start =
max((
int)(hit_full_start - slave_end_gap), 0);
357GetBioseqLength() - 1);
365 if(!(new_hit_full_start == hit_full_start &&
366new_hit_full_stop == hit_full_stop)) {
368 for(
int i= hit_full_start - new_hit_full_start - 1;
i>= 0 ;
i--){
371 for(
int i= (
int)xcript.size() - 1 - (new_hit_full_stop - hit_full_stop);
i< (
int)xcript.size();
i++) {
375 for(
int i= new_hit_full_stop - hit_full_stop - 1;
i>= 0 ;
i--){
378 for(
int i= (
int)xcript.size() - 1 - (hit_full_start - new_hit_full_start);
i< (
int)xcript.size();
i++) {
383hit_full_start = new_hit_full_start;
384hit_full_stop = new_hit_full_stop;
397den_ref->
SetIds().push_back(master_id);
398den_ref->
SetIds().push_back(slave_id);
399nw_align_modified =
true;
404 if(master_start_gap > 0 || master_end_gap > 0) {
406xcript = xcript.substr(master_start_gap);
407xcript = xcript.substr(0, xcript.size() - master_end_gap);
412new_hit_full_start = hit_full_start + master_start_gap;
413new_hit_full_stop = hit_full_stop - master_end_gap;
415new_hit_full_start = hit_full_start + master_end_gap;
416new_hit_full_stop = hit_full_stop - master_start_gap;
435den_ref->
SetIds().push_back(master_id);
436den_ref->
SetIds().push_back(slave_id);
437nw_align_modified =
true;
462 int& max_num_continuous_match,
467 bool& nw_align_modified) {
476desired_align_range.
GetTo() + 1, master_seq);
488 TSeqPosfull_master_stop =
min(desired_align_range.
GetTo(), master_local_stop);
492 CRange<int>full_master_range(full_master_start, full_master_stop);
500 intlongest_chunk_index = 0;
501 intlongest_chunk_size = 0;
504 for(
intchunk_index = 0; chunk_index < master_chunk->size(); chunk_index ++) {
506chunk_ref = (*master_chunk)[chunk_index];
507 if(!chunk_ref->IsGap()) {
508 if(chunk_ref->GetAlnRange().GetLength() > longest_chunk_size) {
509longest_chunk_size = chunk_ref->GetAlnRange().GetLength();
510longest_chunk_index = chunk_index;
519 CRange<int>longest_chunk_range = (*master_chunk)[longest_chunk_index]->GetRange();
521 inthit_start_adjust = longest_chunk_range.
GetFrom() -
522desired_align_range.
GetFrom();
523 inthit_stop_adjust = desired_align_range.
GetTo() -
524longest_chunk_range.
GetTo();
530hit_start_adjust), 0);
536GetBioseqLength() - 1);
541hit_stop_adjust), 0);
560nw_align_modified =
false;
562master_seq, *av, hit_full_start,
564hit_strand, xcript, nw_align_modified);
578 intnum_continuous_match = 0;
579max_num_continuous_match = 0;
581 if(!nw_align_modified) {
584align_length = (
int)xcript.size();
586 ITERATE(
string, iter, xcript) {
596 TSeqPosmaster_letter_len = (
TSeqPos)xcript.size() - num_master_gap;
598 ITERATE(
string, the_iter, xcript) {
602 if(is_left_primer) {
613num_continuous_match = 0;
617 if(is_left_primer) {
627num_continuous_match ++;
628 if(max_num_continuous_match < num_continuous_match) {
629max_num_continuous_match = num_continuous_match;
634 if(is_left_primer) {
645num_continuous_match = 0;
649 if(is_left_primer) {
659num_continuous_match = 0;
670num_total_mismatch = total_mismatch;
671num_total_gap = total_insertion + total_deletion;
672num_3end_mismatch = mismatch_3end;
673num_3end_gap = insertion_3end + deletion_3end;
696num_total_mismatch = 0;
697num_3end_mismatch = 0;
700 intmax_num_continuous_match = 0;
707IntersectionWith(master_range);
709 if(primer_master_overlap.
GetLength() >=
724 if(is_left_primer) {
735global_align =
tmp.aln;
736num_total_mismatch =
tmp.num_total_mismatch;
737num_3end_mismatch =
tmp.num_3end_mismatch;
738num_total_gap =
tmp.num_total_gap;
739num_3end_gap =
tmp.num_3end_gap;
743 booldo_global_alignment =
true;
744 if(desired_align_range.
GetFrom() >= master_local_start &&
745desired_align_range.
GetTo() <= master_local_stop) {
746do_global_alignment =
false;
750desired_align_range.
GetTo());
753num_3end_mismatch, num_total_gap,num_3end_gap,
755max_num_continuous_match, aln_range);
757 if(!do_global_alignment) {
758 doublepercent_ident = 1 - ((double)(num_total_mismatch + num_total_gap))/aln_range.
GetLength();
761num_total_mismatch + num_total_gap < m_Hits->m_MaxMismatch &&
762(num_total_mismatch + num_total_gap <= m_Hits->m_AllowedTotalMismatch ||
763num_3end_mismatch + num_3end_gap <= m_Hits->m_Allowed3EndMismatch)) {
767aln_ref->
SetSegs().SetDenseg(*primer_denseg);
769global_align = aln_ref;
774 if(do_global_alignment) {
775 intalign_length = 1;
776 boolnw_align_modified =
false;
777num_total_mismatch = 0;
778num_3end_mismatch = 0;
781max_num_continuous_match = 0;
782 doublepercent_ident;
784num_total_mismatch, num_3end_mismatch,
785num_total_gap, num_3end_gap,
786is_left_primer, max_num_continuous_match,
789master_local_stop, hit_strand,
791 if(nw_align_modified) {
793num_total_mismatch = 0;
794num_3end_mismatch = 0;
797max_num_continuous_match = 0;
801num_3end_mismatch, num_total_gap,num_3end_gap,
803max_num_continuous_match, aln_range);
805percent_ident = 1 - ((double)(num_total_mismatch + num_total_gap))/aln_range.
GetLength();
808percent_ident = 1 - ((double)(num_total_mismatch + num_total_gap))/align_length;
812num_total_mismatch + num_total_gap < m_Hits->m_MaxMismatch &&
813(num_total_mismatch + num_total_gap <= m_Hits->m_AllowedTotalMismatch ||
814num_3end_mismatch + num_3end_gap <= m_Hits->m_Allowed3EndMismatch)) {
819aln_ref->
SetSegs().SetDenseg(*den_ref);
824global_align = aln_ref;
833temp_match.
aln= global_align;
835 m_Cache[cache_id] = temp_match;
851 boolsame_target =
false;
858cerr <<
"mapping triggered"<< endl;
912 if(backbone_loc && component_loc) {
919 if(backbone_component) {
940 boolallowed =
false;
942 if(
IsSameBioseq(*((*iter)->GetId()), hit_id, &scope) &&
943(*iter)->GetTotalRange().IntersectionWith(hit_range).GetLength() >= hit_range.
GetLength()*0.95){
964 boolis_self_forward_primer,
965 boolis_self_reverse_primer)
972 info.product_len = product_len;
973 info.left_total_mismatch = left_total_mismatch;
974 info.left_total_gap = left_total_gap;
975 info.left_3end_mismatch = left_3end_mismatch;
976 info.left_3end_gap = left_3end_gap;
978 info.right_total_mismatch = right_total_mismatch;
979 info.right_total_gap = right_total_gap;
980 info.right_3end_mismatch = right_3end_mismatch;
981 info.right_3end_gap = right_3end_gap;
982 info.aln.first = &left_align;
983 info.aln.second = &right_align;
985 info.self_forward_primer = is_self_forward_primer;
986 info.self_reverse_primer = is_self_reverse_primer;
995GetBioseqCore()->
GetId(),
1004 if(template_hit_same_id && left_template_aln_overlap && right_template_aln_overlap) {
1023!template_hit_same_id &&
1027cerr <<
"self hit by mapping"<< endl;
1030 boolhit_assigned =
false;
1033 if(index == *iter) {
1035hit_assigned =
true;
1047hit_assigned =
true;
1055 if(!hit_assigned) {
1068 intstart1 = 0, start2 = 0;
1073 returnstart1 <= start2;
1091 int& left_window_index_list_size,
1093 int& right_window_index_list_size,
1098 constvector<SHspInfo*>& hsp_list) {
1099left_window_index_list_size = 0;
1100right_window_index_list_size = 0;
1105left_window_desired_range_int.
SetFrom(left_window_desired_range.
GetFrom());
1106left_window_desired_range_int.
SetTo(left_window_desired_range.
GetTo());
1109right_window_desired_range_int.
SetFrom(right_window_desired_range.
GetFrom());
1110right_window_desired_range_int.
SetTo(right_window_desired_range.
GetTo());
1122 for(; left_window_tree_it; ++ left_window_tree_it) {
1124 if(hsp_list[temp->index]->master_range.IntersectionWith(left_window_desired_range).GetLength() >=
1126left_window_index_list[left_window_index_list_size].
index= temp->index;
1127left_window_index_list[left_window_index_list_size].
bit_score= hsp_list[temp->index]->bit_score;
1128left_window_index_list_size ++;
1131 for(; right_window_tree_it; ++ right_window_tree_it) {
1133 if(hsp_list[temp->index]->master_range.IntersectionWith(right_window_desired_range).GetLength() >=
1135right_window_index_list[right_window_index_list_size].
index= temp->index;
1136right_window_index_list[right_window_index_list_size].
bit_score= hsp_list[temp->index]->bit_score;
1137right_window_index_list_size ++;
1143 for(
int i= 0;
i<(
int) hsp_list.size();
i++ ) {
1146 if(hsp_list[
i]->master_range.GetFrom() >= right_window_desired_range.
GetTo()) {
1149 if(hsp_list[
i]->master_range.IntersectionWith(left_window_desired_range).
1151left_window_index_list[left_window_index_list_size].
index=
i;
1152left_window_index_list[left_window_index_list_size].
bit_score= hsp_list[
i]->bit_score;
1153left_window_index_list_size ++;
1156 if(hsp_list[
i]->master_range.IntersectionWith(right_window_desired_range).
1158right_window_index_list[right_window_index_list_size].
index=
i;
1159right_window_index_list[right_window_index_list_size].
bit_score= hsp_list[
i]->bit_score;
1160right_window_index_list_size ++;
1168 if(left_window_index_list_size > 0) {
1172 if(right_window_index_list_size > 0) {
1187 intHspOverlappingWithLeftPrimer_size;
1188 intHspOverlappingWithRightPrimer_size;
1189 intHspOverlappingWithLeftPrimerMinusStrand_size;
1190 intHspOverlappingWithRightPrimerMinusStrand_size;
1194HspOverlappingWithLeftPrimer_size,
1196HspOverlappingWithRightPrimer_size,
1203HspOverlappingWithLeftPrimerMinusStrand_size,
1205HspOverlappingWithRightPrimerMinusStrand_size,
1212 boolanalyze_plus_strand_first =
true;
1218 if(sorted_hsp.first.size() > 0) {
1219 if(sorted_hsp.second.size() > 0 &&
1220sorted_hsp.first[0]->bit_score < sorted_hsp.second[0]->bit_score - 1) {
1221analyze_plus_strand_first =
false;
1224analyze_plus_strand_first =
false;
1228 if(analyze_plus_strand_first) {
1232HspOverlappingWithLeftPrimer_size,
1233HspOverlappingWithRightPrimer_size,
1239HspOverlappingWithLeftPrimerMinusStrand_size,
1240HspOverlappingWithRightPrimerMinusStrand_size,
1248HspOverlappingWithLeftPrimerMinusStrand_size,
1249HspOverlappingWithRightPrimerMinusStrand_size,
1254HspOverlappingWithLeftPrimer_size,
1255HspOverlappingWithRightPrimer_size,
1261HspOverlappingWithLeftPrimer_size,
1262HspOverlappingWithRightPrimer_size,
1263HspOverlappingWithLeftPrimerMinusStrand_size,
1264HspOverlappingWithRightPrimerMinusStrand_size,
1284slave_range = (*ij).second;
1291ExtractSlice(0, master_range.
GetFrom(), master_range.
GetTo());
1296cerr <<
"ExtractSlice error = "<< e.what() << endl;
1310 intHspOverlappingWithLeftPrimer_size,
1311 intHspOverlappingWithRightPrimer_size,
1316vector<CRange<TSeqPos> > right_slave_range_array(HspOverlappingWithRightPrimer_size);
1317 for(
intj = 0; HspOverlappingWithLeftPrimer_size > 0 && j < HspOverlappingWithRightPrimer_size; j ++) {
1325right_slave_range_array[j] =
1327right_primer_master_overlap,
1331 intleft_primer_hsp_index = 0;
1332 for(
int i= 0;
i< HspOverlappingWithLeftPrimer_size;
i++) {
1346 boolleft_slave_range_filled =
false;
1347 boolleft_global_align_filled =
false;
1351 TSeqPosleft_total_mismatch = 0;
1353 TSeqPosleft_3end_mismatch = 0;
1356 for(
intj = 0; j < HspOverlappingWithRightPrimer_size; j ++) {
1363 if(!left_slave_range_filled) {
1364left_primer_hit_range =
1366left_primer_master_overlap,
1368left_slave_range_filled =
true;
1369 if(left_primer_hit_range.
Empty()) {
1374 TSeqPosleft_primer_hit_stop = left_primer_hit_range.
GetTo();
1375 TSeqPosleft_primer_hit_start = left_primer_hit_range.
GetFrom();
1379 if(right_primer_hit_range.
Empty()) {
1382 TSeqPosright_primer_hit_stop = right_primer_hit_range.
GetTo();
1383 TSeqPosright_primer_hit_start = right_primer_hit_range.
GetFrom();
1389(left_primer_hit_start - right_primer_hit_stop + 1) +
1394(right_primer_hit_start - left_primer_hit_stop + 1) +
1399 if(product_len > 0 &&
1403 if(!left_global_align_filled) {
1405left_primer_hit_global_align
1407hsp_list[left_primer_hsp_index],
1408left_total_mismatch,
1412 true, hit_index, hit_strand);
1413left_global_align_filled =
true;
1414 if(!left_primer_hit_global_align) {
1419 TSeqPosright_total_mismatch = 0;
1421 TSeqPosright_3end_mismatch = 0;
1425hsp_list[right_hsp_index],
1426right_total_mismatch, right_3end_mismatch,
1427right_total_gap, right_3end_gap,
false,
1428hit_index, hit_strand);
1429 if(right_primer_hit_global_align) {
1431 intpcr_product_len = 0;
1433*right_primer_hit_global_align,
1439*right_primer_hit_global_align,
1440left_total_mismatch,
1441left_3end_mismatch, left_total_gap, left_3end_gap,
1442right_total_mismatch, right_3end_mismatch,
1444right_3end_gap, pcr_product_len,
1445hit_index,
false,
false);
1458 boolprimers_on_different_strand,
1470 if(primers_on_different_strand) {
1472product_len = right_primer_hit_stop - left_primer_hit_start + 1;
1476product_len = (left_primer_hit_stop - right_primer_hit_start + 1);
1478product_len = (right_primer_hit_start - left_primer_hit_stop + 1) +
1496 constvector<SHspInfo*>& minus_strand_hsp_list,
1497 intHspOverlappingWithLeftPrimer_size,
1498 intHspOverlappingWithRightPrimer_size,
1499 intHspOverlappingWithLeftPrimerMinusStrand_size,
1500 intHspOverlappingWithRightPrimerMinusStrand_size,
1503vector<CRange<TSeqPos> > right_primer_hit_range_array(HspOverlappingWithLeftPrimerMinusStrand_size);
1504 for(
intj = 0; HspOverlappingWithLeftPrimer_size > 0 && j < HspOverlappingWithLeftPrimerMinusStrand_size; j ++) {
1506 CRange<TSeqPos>right_master_range = minus_strand_hsp_list[right_hsp_index]->master_range;
1511right_primer_hit_range_array[j] =
1513left_primer_window_right_align_overlap,
1517 for(
int i= 0;
i< HspOverlappingWithLeftPrimer_size;
i++) {
1530 boolleft_slave_range_filled =
false;
1531 boolleft_global_align_filled =
false;
1532 TSeqPosleft_total_mismatch = 0;
1533 TSeqPosleft_3end_mismatch = 0;
1538 for(
intj = 0; j < HspOverlappingWithLeftPrimerMinusStrand_size; j ++) {
1541 if(!left_slave_range_filled) {
1542left_primer_hit_range =
1544left_primer_master_overlap,
1546left_slave_range_filled =
true;
1547 if(left_primer_hit_range.
Empty()) {
1552 TSeqPosleft_primer_hit_stop = left_primer_hit_range.
GetTo();
1555 CRange<TSeqPos>right_primer_hit_range = right_primer_hit_range_array[j];
1556 if(right_primer_hit_range.
Empty()) {
1559 TSeqPosright_primer_hit_start = right_primer_hit_range.
GetFrom();
1563 intproduct_len = right_primer_hit_start - left_primer_hit_stop + 1 +
1567 if(!(product_len > 0 &&
1573 TSeqPosright_total_mismatch = 0;
1575 TSeqPosright_3end_mismatch = 0;
1578 if(!left_global_align_filled) {
1579left_primer_hit_global_align =
1581plus_strand_hsp_list[left_hsp_index],
1582left_total_mismatch,
1584left_total_gap, left_3end_gap,
1587left_global_align_filled =
true;
1588 if(!left_primer_hit_global_align) {
1594minus_strand_hsp_list[right_hsp_index],
1595right_total_mismatch,
1596right_3end_mismatch,
1597right_total_gap, right_3end_gap,
1600 if(right_primer_hit_global_align) {
1603 intpcr_product_len = 0;
1604 boolvalid_pcr_length;
1608*right_primer_hit_global_align,
1613 if(valid_pcr_length) {
1617new_align_left = left_primer_hit_global_align;
1618new_align_right = right_primer_hit_global_align;
1622left_total_mismatch,
1623left_3end_mismatch, left_total_gap, left_3end_gap,
1624right_total_mismatch, right_3end_mismatch,
1626right_3end_gap, pcr_product_len, hit_index,
true,
false);
1636vector<CRange<TSeqPos> > right_primer_hit_range_array2(HspOverlappingWithRightPrimerMinusStrand_size);
1637 for(
intj = 0; HspOverlappingWithRightPrimer_size > 0 && j < HspOverlappingWithRightPrimerMinusStrand_size; j ++) {
1640 CRange<TSeqPos>right_master_range = minus_strand_hsp_list[right_hsp_index]->master_range;
1645right_primer_hit_range_array2[j] =
1647right_primer_as_3_master_overlap,
1651 for(
int i= 0;
i< HspOverlappingWithRightPrimer_size;
i++) {
1667 boolright_primer_as_5_slave_range_filled =
false;
1668 boolleft_global_align_filled =
false;
1671 TSeqPosleft_total_mismatch = 0;
1672 TSeqPosleft_3end_mismatch = 0;
1679 for(
intj = 0; j < HspOverlappingWithRightPrimerMinusStrand_size; j ++) {
1682 if(!right_primer_as_5_slave_range_filled ) {
1683right_primer_as_5_hit_range =
1685right_primer_as_5_master_overlap,
1687right_primer_as_5_slave_range_filled =
true;
1688 if(right_primer_as_5_hit_range.
Empty()) {
1695 TSeqPosright_primer_as_5_hit_start = right_primer_as_5_hit_range.
GetFrom();
1697 CRange<TSeqPos>right_primer_as_3_hit_range = right_primer_hit_range_array2[j];
1699 if(right_primer_as_3_hit_range.
Empty()) {
1702 TSeqPosright_primer_as_3_hit_stop = right_primer_as_3_hit_range.
GetTo();
1707 intproduct_len = right_primer_as_5_hit_start
1708- right_primer_as_3_hit_stop + 1 +
1712 if(!(product_len > 0 &&
1718 TSeqPosright_total_mismatch = 0;
1720 TSeqPosright_3end_mismatch = 0;
1723 if(!left_global_align_filled) {
1724left_primer_hit_global_align =
1726plus_strand_hsp_list[left_hsp_index],
1727left_total_mismatch,
1729left_total_gap, left_3end_gap,
1731left_global_align_filled =
true;
1732 if(!left_primer_hit_global_align) {
1738right_total_mismatch,
1739right_3end_mismatch,
1740right_total_gap, right_3end_gap,
1743 if(right_primer_hit_global_align) {
1746 intpcr_product_len = 0;
1747 boolvalid_pcr_length;
1751*left_primer_hit_global_align,
1756 if(valid_pcr_length) {
1760new_align_left->
Assign(*left_primer_hit_global_align);
1764new_align_right->
Assign(*right_primer_hit_global_align);
1770left_total_mismatch,
1771left_3end_mismatch, left_total_gap, left_3end_gap,
1772right_total_mismatch, right_3end_mismatch,
1774right_3end_gap, pcr_product_len, hit_index,
false,
true);
1804SPrimerHitInfo* info1,
1806SPrimerHitInfo* info2) {
1810mismatch1 =
min(mismatch1,
1811(
int)info1->right_total_mismatch +
1812(
int)info1->right_total_gap +
1813(
int)info1->left_total_mismatch +
1814(
int)info1->left_total_gap);
1816mismatch2 =
min(mismatch2,
1817(
int)info2->right_total_mismatch +
1818(
int)info2->right_total_gap +
1819(
int)info2->left_total_mismatch +
1820(
int)info2->left_total_gap);
1822 returnmismatch1 < mismatch2;
1826SPrimerHitInfo*>* info1,
1828SPrimerHitInfo*>* info2) {
1831 ITERATE(vector<COligoSpecificityCheck::SPrimerHitInfo*>, iter, *info1) {
1833mismatch1 =
min(mismatch1,
1834(
int)(*iter)->right_total_mismatch +
1835(
int)(*iter)->right_total_gap +
1836(
int)(*iter)->left_total_mismatch +
1837(
int)(*iter)->left_total_gap);
1841 ITERATE(vector<COligoSpecificityCheck::SPrimerHitInfo*>, iter, *info2) {
1843mismatch2 =
min(mismatch2,
1844(
int)(*iter)->right_total_mismatch +
1845(
int)(*iter)->right_total_gap +
1846(
int)(*iter)->left_total_mismatch +
1847(
int)(*iter)->left_total_gap);
1849 returnmismatch1 < mismatch2;
1853 for(
int i= 0;
i< (
int) primer_hit_list_list.size();
i++){
1854vector<COligoSpecificityCheck::SPrimerHitInfo>* primer_hit_list = &primer_hit_list_list[
i];
1856vector<vector<SPrimerHitInfo*>* >
result;
1858vector<SPrimerHitInfo*>* temp;
1860 NON_CONST_ITERATE(vector<COligoSpecificityCheck::SPrimerHitInfo>, iter, *primer_hit_list) {
1861 const CSeq_id& cur_id = iter->aln.first->GetSeq_id(1);
1863 if(previous_id.
Empty()) {
1864temp =
newvector<SPrimerHitInfo*>;
1865temp->push_back(&(*iter));
1867}
else if(cur_id.
Match(*previous_id)){
1868temp->push_back(&(*iter));
1871temp =
newvector<SPrimerHitInfo*>;
1872temp->push_back(&(*iter));
1875previous_id = &cur_id;
1881vector<SPrimerHitInfo> temp2;
1886 ITERATE(vector<SPrimerHitInfo*>, iter2, **iter){
1887temp2.push_back(**iter2);
1892primer_hit_list->clear();
1893*primer_hit_list = temp2;
1900 intend = primer_info_list.size();
1901 if(from >= end)
return;
1902 if(to > end || to < 0) to = end;
1903 for(
int i=from;
i<to; ++
i) {
1905vector<SPrimerHitInfo> temp;
1919vector<double> score1;
1920 static const intnum_hsp = 2;
1921 if(info1.first.size() > 0) {
1923 for(
int i= 0;
i< (
int)info1.first.size() &&
i< num_hsp;
i++) {
1924 if(
i== 0 || (
i> 0 && !(info1.first[
i]->master_range.IntersectingWith(previous_range)))) {
1925score1.push_back(info1.first[
i]->bit_score);
1928previous_range = info1.first[
i]->master_range;
1932 if(info1.second.size() > 0 && (score1.empty() || !(score1[0] > info1.second[0]->bit_score))) {
1934 for(
int i= 0;
i< (
int)info1.second.size() &&
i< num_hsp;
i++) {
1935 if(
i== 0 || (
i> 0 && !(info1.second[
i]->master_range.IntersectingWith(previous_range)))) {
1936score1.push_back(info1.second[
i]->bit_score);
1939previous_range = info1.second[
i]->master_range;
1944vector<double> score2;
1946 if(info2.first.size() > 0) {
1948 for(
int i= 0;
i< (
int)info2.first.size() &&
i< num_hsp;
i++) {
1949 if(
i== 0 || (
i> 0 && !(info2.first[
i]->master_range.IntersectingWith(previous_range)))) {
1950score2.push_back(info2.first[
i]->bit_score);
1953previous_range = info2.first[
i]->master_range;
1957 if(info2.second.size() > 0 && (score2.empty() || !(score2[0] > info2.second[0]->bit_score))) {
1959 for(
int i= 0;
i< (
int)info2.second.size() &&
i< num_hsp;
i++) {
1960 if(
i== 0 || (
i> 0 && !(info2.second[
i]->master_range.IntersectingWith(previous_range)))) {
1961score2.push_back(info2.second[
i]->bit_score);
1964previous_range = info2.second[
i]->master_range;
1970stable_sort(score1.begin(), score1.end(), greater<double>());
1972stable_sort(score2.begin(), score2.end(), greater<double>());
1974 if(score1[0] > score2[0]) {
1976}
else if(score1[0] < score2[0]) {
1978}
else if(score1.size() > 1 && score2.size() > 1) {
1980 return(score1[1] > score2[1]);
1981}
else if(score1.size() > 1) {
1983}
else if(score2.size() > 1) {
1996 if(input_hits.
Get().empty()) {
2001 boolis_first_aln =
true;
2002 doublehighest_hit_score = 0;
2003 boollast_highest_hit_score_index_found =
false;
2008subid = &((*iter)->GetSeq_id(1));
2009 doublecur_bit_score = 0;
2011 if(cur_bit_score == 0) {
2014 if(
id.IsStr() &&
id.GetStr() ==
"bit_score") {
2015cur_bit_score = (*iter_score)->GetValue().GetReal();
2021 if(!is_first_aln && !subid->
Match(*previous_id)) {
2023 if(!last_highest_hit_score_index_found) {
2025 if(cur_bit_score < highest_hit_score) {
2026last_highest_hit_score_index_found =
true;
2029 if(!(each_hit.first.empty())) {
2030highest_hit_score =
max(highest_hit_score, each_hit.first[0]->bit_score);
2032 if(!(each_hit.second.empty())) {
2033highest_hit_score =
max(highest_hit_score, each_hit.second[0]->bit_score);
2045each_hit.first.clear();
2046each_hit.second.clear();
2052temp ->
hsp= *iter;
2053each_hit.second.push_back(temp);
2056temp ->
hsp= *iter;
2057each_hit.first.push_back(temp);
2062is_first_aln =
false;
2063previous_id = subid;
2068 if(!(each_hit.first.empty() && each_hit.second.empty())) {
2076 intnum_hsp = (
int)input_hits.
Get().size();
2077 inthsp_hit_ratio = 0;
2080hsp_hit_ratio = num_hsp/num_hits;
2083cerr <<
"hit = "<< num_hits <<
" hsp = "<< num_hsp
2084<<
" hsp/hit ratio = "<< hsp_hit_ratio << endl;
2087 if(hsp_hit_ratio > 100) {
2098index_holder->index = j;
2100 m_SortHit[
i].first[j]->master_range.GetTo());
2101RangeTreeForEachHitPlusStrand->
Insert(temp_master_range,
2112 for(
intj = 0; j < (
int)
m_SortHit[
i].second.size(); j ++) {
2114index_holder->index = j;
2116 m_SortHit[
i].second[j]->master_range.GetTo());
2117RangeTreeForEachHitMinusStrand->
Insert(temp_master_range,
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
CRef< CAlnChunkVec > GetSeqChunks(TNumrow row, const TSignedRange &range, TGetChunkFlags flags=fAlnSegsOnly) const
const CSeq_id & GetSeqId(TNumrow row) const
bool IsPositiveStrand(TNumrow row) const
TSeqPos GetAlnStop(TNumseg seg) const
TSignedSeqPos GetSeqPosFromSeqPos(TNumrow for_row, TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
const CBioseq_Handle & GetBioseqHandle(TNumrow row) const
TResidue GetGapChar(TNumrow row) const
void SetEndChar(TResidue gap_char)
void SetGapChar(TResidue gap_char)
string & GetAlnSeqString(string &buffer, TNumrow row, const CAlnMap::TSignedRange &aln_rng) const
void FromTranscript(TSeqPos query_start, ENa_strand query_strand, TSeqPos subj_start, ENa_strand subj_strand, const string &transcript)
Initialize from pairwise alignment transcript (a string representation produced by CNWAligner)
CRef< CDense_seg > ExtractSlice(TDim row, TSeqPos from, TSeqPos to) const
Extract a slice of the alignment that includes the specified range.
CRange< TSeqPos > GetSeqRange(TDim row) const
static string GetLoaderNameFromArgs(CReader *reader=0)
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
const SPrimerInfo * m_PrimerInfo
bool x_IsPcrLengthInRange(const CSeq_align &left_primer_hit_align, const CSeq_align &right_primer_hit_align, bool primers_on_different_strand, ENa_strand hit_strand, int &product_len)
Test if the primer pair generates the pcr product in specified length range and fill the actual lengt...
const COligoSpecificityTemplate * m_Hits
the information about the blast results
SHspIndexInfo * m_HspOverlappingWithLeftPrimer
CRef< CScope > m_Scope
scope to fetch sequence
void CheckSpecificity(const vector< SPrimerInfo > &primer_info_list, int from=0, int to=-1)
check the specificity of the primer pairs.
void x_FindOverlappingHSP(SHspIndexInfo *left_window_index_list, int &left_window_index_list_size, SHspIndexInfo *right_window_index_list, int &right_window_index_list_size, const CRange< TSeqPos > &left_window_desired_range, const CRange< TSeqPos > &right_window_desired_range, ENa_strand hit_strand, TSeqPos hit_index, const vector< SHspInfo * > &hsp_list)
SHspIndexInfo * m_HspOverlappingWithLeftPrimerMinusStrand
void x_AnalyzeLeftAndRightPrimer(const vector< SHspInfo * > &hsp_list, ENa_strand hit_strand, int HspOverlappingWithLeftPrimer_size, int HspOverlappingWithRightPrimer_size, TSeqPos hit_index)
void x_AnalyzeTwoPrimers(const TSortedHsp &sorted_hsp, TSeqPos index)
Analyze the the primer pair specificity usign both left and right primer at ends.
COligoSpecificityCheck(const COligoSpecificityTemplate *temp, CScope &scope)
vector< vector< SPrimerHitInfo > > m_SelfHit
the hit represent the input template
vector< vector< SPrimerHitInfo > > m_VariantHit
the hits represent the transcript variants from the same gene as the input template
CRef< CScope > m_FeatureScope
bool x_SequencesMappedToSameTarget(CSeq_id::EAccessionInfo hit_type, const CSeq_align &left_align, const CSeq_align &right_align)
SHspIndexInfo * m_HspOverlappingWithRightPrimerMinusStrand
TSeqPos m_SpecifiedProductLen
the requested pcr length for non-specific template
vector< map< SSlaveRange, CRange< TSeqPos >, slave_range_sort_order > > m_SlaveRangeCache
vector< vector< SPrimerHitInfo > > m_PrimerHit
the non-specific hit for the primer pair
vector< int > m_NumTargetFromSameSequence
max number of targets allowed from a single subject sequence for a primer.
void x_FindMatchInfoForAlignment(CDense_seg &primer_denseg, bool &end_gap, TSeqPos &num_total_mismatch, TSeqPos &num_3end_mismatch, TSeqPos &num_total_gap, TSeqPos &num_3end_gap, bool is_left_primer, int &max_num_continuous_match, CRange< TSignedSeqPos > &aln_range)
void x_SavePrimerInfo(CSeq_align &left_align, CSeq_align &right_align, TSeqPos left_total_mismatch, TSeqPos left_3end_mismatch, TSeqPos left_total_gap, TSeqPos left_3end_gap, TSeqPos right_total_mismatch, TSeqPos right_3end_mismatch, TSeqPos right_total_gap, TSeqPos right_3end_gap, int product_len, TSeqPos index, bool is_self_forward_primer, bool is_self_reverse_primer)
save the primer informaton
void x_AnalyzePrimerSpecificity()
Analyze the primer pair specificity.
vector< const SPrimerInfo * > m_PrimerInfoList
the information about primer to be checked
void x_AnalyzeOnePrimer(const vector< SHspInfo * > &plus_strand_hsp_list, const vector< SHspInfo * > &minus_strand_hsp_list, int HspOverlappingWithLeftPrimer_size, int HspOverlappingWithRightPrimer_size, int HspOverlappingWithLeftPrimerMinusStrand_size, int HspOverlappingWithRightPrimerMinusStrand_size, TSeqPos hit_index)
analyze the case where the left primer itself can serve as both left and right primer
CRef< CObjectManager > m_FeatureOM
void x_SortPrimerHit(vector< vector< SPrimerHitInfo > > &primer_hit_list_list)
~COligoSpecificityCheck()
CRange< TSeqPos > x_GetSlaveRangeGivenMasterRange(const CSeq_align &input_align, CRange< TSeqPos > &master_range, int index)
map< SAlnCache, SPrimerMatch, sort_order > m_Cache
cache coordinate-alignment mapping
CRef< CSeq_align > x_FillGlobalAlignInfo(const CRange< TSeqPos > &desired_align_range, SHspInfo *input_hsp_info, TSeqPos &num_total_mismatch, TSeqPos &num_3end_mismatch, TSeqPos &num_total_gap, TSeqPos &num_3end_gap, bool is_left_primer, TSeqPos index, ENa_strand hit_strand)
return alignment for the full primer window.
SHspIndexInfo * m_HspOverlappingWithRightPrimer
CRef< CDense_seg > x_NW_alignment(const CRange< TSeqPos > &desired_align_range, const CSeq_align &input_hit, TSeqPos &num_total_mismatch, TSeqPos &num_3end_mismatch, TSeqPos &num_total_gap, TSeqPos &num_3end_gap, bool is_left_primer, int &max_num_continuous_match, int &align_length, TSeqPos master_local_start, TSeqPos master_local_stop, ENa_strand hit_strand, bool &nw_align_modified)
vector< vector< SPrimerHitInfo > > m_AllowedHit
the hit that user choose to ingnore for specificity
int m_NumNonSpecificTarget
the number non-specific targets to return
void x_SortHit(CSeq_align_set &input)
sort the hit
CRange< TSeqPos > m_TemplateRange
range on the input template
CConstRef< CSeq_id > m_Id
seqid
COligoSpecificityTemplate(const CBioseq_Handle &template_handle, CSeq_align_set &input_seqalign, CScope &scope, int word_size, TSeqPos allowed_total_mismatch=1, TSeqPos allowed_3end_mismatch=1, TSeqPos max_mismatch=7)
constructor @template_handle: bioseq represents the pcr template
TSeqPos m_MismatchRegionLength3End
the length or region at the 3' end for checking mismatches
vector< TSeqPos > m_AllowedSeqidIndex
user specified hits that can be disregarded for specificity checking
CSeq_id::EAccessionInfo m_TemplateType
const CBioseq_Handle & m_TemplateHandle
bioseq handle for input bioseq
vector< TSortedHsp > m_SortHit
the processed sorted hit list corresponding to the input seqalign
const list< CRef< CSeq_id > > * m_Allowed_Splice_Variants
TSeqPos m_TargetSizeMax
the requested target max length
int m_WordSize
minimal continuous match required
int m_MaxTargetPerSequence
~COligoSpecificityTemplate()
const list< CRef< CSeq_loc > > * m_AllowedSeqloc
vector< CIntervalTree * > m_RangeTreeListMinusStrand
vector< CIntervalTree * > m_RangeTreeListPlusStrand
void Reverse(void)
Reverse the segments' orientation NOTE: currently *only* works for dense-seg.
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
TSeqPos GetSeqStart(TDim row) const
@ eBackwards
Towards lower seq coord (to the left if plus strand, right if minus)
container_type::iterator iterator
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
iterator Insert(const interval_type &interval, const mapped_type &value)
reference GetValue(void) const
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
EAccessionInfo
For IdentifyAccession (below)
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
static int WorstRank(const CRef< CSeq_id > &id)
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CSeq_id_Handle GetIdHandle(const CSeq_loc &loc, CScope *scope)
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeq_loc_Mapper_Base & KeepNonmappingRanges(void)
Keep ranges which can not be mapped.
@ eSeqMap_Down
map from a segmented bioseq to segments
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TSeqPos GetBioseqLength(void) const
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty â not pointing to any object which means having a null value.
position_type GetLength(void) const
bool IntersectingWith(const TThisType &r) const
TThisType IntersectionWith(const TThisType &r) const
static TThisType GetEmpty(void)
TThisType & Set(position_type from, position_type to)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
TId GetId(void) const
Get the variant data.
const TDenseg & GetDenseg(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
void SetType(TType value)
Assign a value to Type data member.
vector< CRef< CScore > > TScores
list< CRef< CSeq_align > > Tdata
TIds & SetIds(void)
Assign a value to Ids data member.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
@ eType_partial
mapping pieces together
ENa_strand
strand of nucleic acid
E_Choice Which(void) const
Which variant is currently selected.
const TId & GetId(void) const
Get the Id member data.
unsigned int
A callback function used to compare two keys in a database.
const struct ncbi::grid::netcache::search::fields::SIZE size
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
static bool SortIndexListByScoreDescending(const COligoSpecificityCheck::SHspIndexInfo &info1, const COligoSpecificityCheck::SHspIndexInfo &info2)
static bool SortPrimerHitByMismatchAscending(const vector< COligoSpecificityCheck::SPrimerHitInfo * > *info1, const vector< COligoSpecificityCheck::SPrimerHitInfo * > *info2)
static bool SortHitByTopHspScores(TSortedHsp const &info1, TSortedHsp const &info2)
static bool SeqLocAllowed(const list< CRef< CSeq_loc > > &allowed_seq, const CSeq_id &hit_id, const CRange< TSeqPos > &hit_range, CScope &scope)
static bool SortPrimerHitInGroupByMismatchAscending(const COligoSpecificityCheck::SPrimerHitInfo *info1, const COligoSpecificityCheck::SPrimerHitInfo *info2)
static const double k_Min_Percent_Identity
static const int k_MaxReliableGapNum
CRef< CDense_seg > s_DoNWalign(const CRange< TSeqPos > &desired_align_range, string &master_seq, const CAlnVec &av, TSeqPos hit_full_start, TSeqPos hit_full_stop, ENa_strand hit_strand, string &xcript, bool &nw_align_modified)
static void s_CountGaps(const string &xcript, TSeqPos &master_start_gap, TSeqPos &master_end_gap, TSeqPos &slave_start_gap, TSeqPos &slave_end_gap, char master_gap_char, char slave_gap_char)
static bool SortHspByMasterStartAscending(const SHspInfo *info1, const SHspInfo *info2)
static const double k_MinOverlapLenFactor
primer specificity checking tool
pair< vector< SHspInfo * >, vector< SHspInfo * > > TSortedHsp
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
key for coordinate-alignment cache
primer hit to the blast dababase
value for coordinate-alignment cache
TSeqPos num_3end_mismatch
total mismatchs
CRef< CSeq_align > aln
3' end gaps
TSeqPos num_3end_gap
total gaps
TSeqPos num_total_mismatch
TSeqPos num_total_gap
3' end mismatches
const CSeq_align * align_index
CRange< TSeqPos > master_range
CRange< TSeqPos > slave_range
CConstRef< CSeq_align > hsp
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4