(!constraint.empty()) {
65 int last= constraint.size();
66 if(constraint[
last-4] >= (
size_t)seq1_start ||
67constraint[
last-2] >= (
size_t)seq2_start) {
72constraint.push_back(seq1_start);
73constraint.push_back(seq1_start);
74constraint.push_back(seq2_start);
75constraint.push_back(seq2_start);
80 if(seq1_start >= seq1_stop ||
81seq2_start >= seq2_stop)
84constraint.push_back(seq1_stop);
85constraint.push_back(seq1_stop);
86constraint.push_back(seq2_stop);
87constraint.push_back(seq2_stop);
122vector<CSequence>& query_data,
123vector<CTree::STreeLeaf>& node_list,
126 _ASSERT(range.GetFrom() < 0 || range.GetTo() >= range.GetFrom());
133 for(
size_t i= 0;
i< node_list.size();
i++) {
134sum += node_list[
i].distance;
138 for(
size_t i= 0;
i< node_list.size();
i++) {
143 intindex = node_list[
i].query_idx;
144 double weight= node_list[
i].distance / sum;
151 if(node_list[
i].distance == 0 && sum == 0)
154 weight= node_list[
i].distance / sum;
156 intstart = range.GetFrom() >= 0 ? range.GetFrom() : 0;
157 int size= range.GetFrom() >= 0 ? range.GetTo() - range.GetFrom() + 1
158: query_data[index].GetLength();
165 for(
intj = 0; j <
size; j++) {
167freq_data[j][0] +=
weight;
171freq_data[j][k] +=
weight* matrix(start + j, k);
187 for(
int i= 0;
i< freq_size;
i++) {
193sum += freq_data[
i][j];
200freq_data[
i][j] *= sum;
211 for(;
i< seq.
GetLength() && s < seq_pos;
i++) {
249{
return first< p.first && second < p.second;}
261vector<CMultiAligner::TRangePair>& prof_ranges)
277 _ASSERT(p.first >= 0 && p.second >= 0);
278 if(p.first < 0 || p.second < 0) {
283 r.first.SetFrom(p.first);
284 r.second.SetFrom(p.second);
288 while(p <
len&& s < end_seq) {
292 while(p <
len&& s < end_seq
298 if((p.first >=
len.first && s.first < end_seq.first)
299|| (p.second >=
len.second && s.second < end_seq.second)) {
304 r.first.SetTo(p.first - 1);
305 r.second.SetTo(p.second - 1);
306 _ASSERT(
r.first.GetLength() ==
r.second.GetLength());
307prof_ranges.push_back(
r);
310 while(p.first <
len.first
316 while(p.second <
len.second
323 r.first.SetFrom(p.first);
324 r.second.SetFrom(p.second);
343 for(
i= 0;
i<
len;
i++) {
358 for(
i++;
i<
len;
i++) {
383vector<CSequence>& alignment,
384vector<CTree::STreeLeaf>& node_list1,
385vector<CTree::STreeLeaf>& node_list2,
389 const intkMinAlignLength = 11;
395 for(
int i= 0;
i< (
int)node_list1.size();
i++) {
397 for(
intj = 0; j < (
int)node_list2.size(); j++) {
399 intseq1 = node_list1[
i].query_idx;
400 intseq2 = node_list2[j].query_idx;
405 for(
intk = 0; k < pair_info(seq1, seq2).Size(); k++) {
406 CHit*hit = pair_info(seq1, seq2).GetHit(k);
407 CHit*new_hit =
new CHit(seq1, seq2);
427 CHit*subhit = *subitr;
428 CHit*new_subhit =
new CHit(seq1, seq2);
445 "Alignment interrupted");
450 if(profile_hitlist.
Empty())
458 for(
int i= 0;
i< profile_hitlist.
Size();
i++) {
464 CHit*subhit = *subitr;
472 "Alignment interrupted");
482printf(
"possible constraints (offsets wrt input profiles):\n");
483 for(
int i= 0;
i< profile_hitlist.
Size();
i++) {
486printf(
"query %3d %4d - %4d query %3d %4d - %4d score %d %c\n",
500vector<SGraphNode> graph;
501 for(
intj = 0; j < profile_hitlist.
Size(); j++) {
510 if(iteration == 0 && hit->
m_Score< 1000 &&
519 for(
i= 0;
i< (
int)graph.size();
i++) {
520 CHit*ghit = graph[
i].hit;
551graph[
i].hit = hit;
560 if(
i== (
int)graph.size())
566 "Alignment interrupted");
577 if(iteration == 0) {
584 intnum_hits = profile_hitlist.
Size();
585vector<int> list1, list2;
586list1.reserve(num_hits);
587list2.reserve(num_hits);
591 for(
int i= 0;
i< (
int)graph.size();
i++) {
592 CHit*ghit = graph[
i].hit;
603 for(
intj = 0; j < num_hits; j++) {
625 for(k = 0; k < (
int)list1.size(); k++) {
629 if(k == (
int)list1.size())
632 for(k = 0; k < (
int)list2.size(); k++) {
636 if(k == (
int)list2.size())
645graph[
i].best_score = graph[
i].hit->
m_Score*
646list1.size() * list2.size();
650 "Alignment interrupted");
661itr->best_score = itr->hit->m_Score;
668 while(best_path != 0) {
673printf(
"pick query %3d %4d - %4d query %3d %4d - %4d\n",
701vector<CTree::STreeLeaf>& node_list1,
702vector<CTree::STreeLeaf>& node_list2,
704vector<TRangePair>& match_ranges)
const 709 intseq1 = -1, seq2 = -1;
711 for(
size_t i=0;
i< node_list1.size();
i++) {
712 for(
size_tj=0;j < node_list2.size();j++) {
713 if(dist > dmat(node_list1[
i].query_idx, node_list2[j].query_idx)
716seq1 = node_list1[
i].query_idx;
717seq2 = node_list2[j].query_idx;
718dist = dmat(seq1, seq2);
725 if(pair_info(seq1, seq2).Size() == 0) {
729 CHit* hit = pair_info(seq1, seq2).GetHit(0);
730 for(
intk=1;k < pair_info(seq1, seq2).Size();k++) {
731 if(pair_info(seq1, seq2).GetHit(k)->m_Score > hit->
m_Score) {
732hit = pair_info(seq1, seq2).GetHit(k);
738vector<TOffsetPair> seq_match_regions(
747 _ASSERT(seq_match_regions.size() >= 2);
748 for(
size_t i=0;
i< seq_match_regions.size();
i+=2) {
749 _ASSERT(
i< seq_match_regions.size() - 1);
751== seq_match_regions[
i+ 1].second - seq_match_regions[
i].second);
755seq_match_regions[
i+ 1].
first);
756 TRangeseq_range2(seq_match_regions[
i].second,
757seq_match_regions[
i+ 1].second);
768printf(
"possible constraints (offsets wrt input profiles):\n");
769printf(
"query %3d %4d - %4d query %3d %4d - %4d score %d\n",
771match_ranges.front().first.GetFrom(),
772match_ranges.back().first.GetTo(),
774match_ranges.front().second.GetFrom(),
775match_ranges.back().second.GetTo(),
794vector<CTree::STreeLeaf>& node_list1,
795vector<CTree::STreeLeaf>& node_list2,
796vector<CSequence>& alignment,
804 intfreq1_size = alignment[node_list1[0].query_idx].GetLength();
805 intfreq2_size = alignment[node_list2[0].query_idx].GetLength();
809printf(
"\nalign profile (size %d) with profile (size %d)\n",
810freq1_size, freq2_size);
817freq1_data =
new double* [freq1_size];
820 for(
int i= 1;
i< freq1_size;
i++)
823memset(freq1_data[0], 0,
kAlphabetSize* freq1_size *
sizeof(
double));
830freq2_data =
new double* [freq2_size];
833 for(
int i= 1;
i< freq2_size;
i++)
836memset(freq2_data[0], 0,
kAlphabetSize* freq2_size *
sizeof(
double));
843(
const double**)freq2_data, freq2_size,
kScale);
846vector<size_t> constraint;
851node_list2, pair_info, iteration);
859printf(
"constraints: ");
860 for(
int i= 0;
i< (
int)constraint.size();
i+=4) {
861printf(
"(seq1 %d seq2 %d)->", (
int)constraint[
i],
862(
int)constraint[
i+2]);
880 if(freq1_size > 1.2 * freq2_size ||
881freq2_size > 1.2 * freq1_size) {
885 if((freq1_size > 1.5 * freq2_size ||
886freq2_size > 1.5 * freq1_size) &&
887!constraint.empty()) {
901 delete[] freq1_data[0];
902 delete[] freq1_data;
903 delete[] freq2_data[0];
904 delete[] freq2_data;
910 for(
int i= 0;
i< (
int)node_list1.size();
i++) {
911alignment[node_list1[
i].query_idx].PropagateGaps(
t,
914 for(
int i= 0;
i< (
int)node_list2.size();
i++) {
915alignment[node_list2[
i].query_idx].PropagateGaps(
t,
922 for(
int i= 0;
i< (
int)
t.size() / 10;
i++)
923printf(
"%10d",
i+ 1);
925 for(
int i= 0;
i< (
int)
t.size();
i++)
926printf(
"%d",
i% 10);
929 for(
int i= 0;
i< (
int)node_list1.size();
i++) {
930 intindex = node_list1[
i].query_idx;
932printf(
"%3d: ", index);
933 for(
intj = 0; j <
query.GetLength(); j++) {
934printf(
"%c",
query.GetPrintableLetter(j));
939 for(
int i= 0;
i< (
int)node_list2.size();
i++) {
940 intindex = node_list2[
i].query_idx;
942printf(
"%3d: ", index);
943 for(
intj = 0; j <
query.GetLength(); j++) {
944printf(
"%c",
query.GetPrintableLetter(j));
965vector<CTree::STreeLeaf>& node_list1,
966vector<CTree::STreeLeaf>& node_list2,
967vector<CSequence>& alignment,
968vector<size_t>& constraints,
970 intfull_prof_len1,
intfull_prof_len2,
978 intfreq1_size = range1.
GetTo() - range1.
GetFrom() + 1;
979 intfreq2_size = range2.
GetTo() - range2.
GetFrom() + 1;
983freq1_data =
new double* [freq1_size];
986 for(
int i= 1;
i< freq1_size;
i++)
989memset(freq1_data[0], 0,
kAlphabetSize* freq1_size *
sizeof(
double));
995freq2_data =
new double* [freq2_size];
998 for(
int i= 1;
i< freq2_size;
i++) {
1002memset(freq2_data[0], 0,
kAlphabetSize* freq2_size *
sizeof(
double));
1008(
const double**)freq2_data, freq2_size,
kScale);
1025 if(full_prof_len1 > 1.2 * full_prof_len2 ||
1026full_prof_len2 > 1.2 * full_prof_len1) {
1048 delete[] freq1_data[0];
1049 delete[] freq1_data;
1050 delete[] freq2_data[0];
1051 delete[] freq2_data;
1057vector<CTree::STreeLeaf>& node_list1,
1058vector<CTree::STreeLeaf>& node_list2,
1059vector<CSequence>& alignment,
1070printf(
"\nalign profile (size %d) with profile (size %d)\n",
1071alignment[node_list1[0].query_idx].
GetLength(),
1072alignment[node_list2[0].query_idx].
GetLength());
1078vector<TRangePair> match_ranges;
1080pair_info, match_ranges);
1083 if(match_ranges.empty()) {
1086 stringmessage =
"No significant alignments were found for cluster" 1087 " containing sequences: ";
1088 ITERATE(vector<CTree::STreeLeaf>, it, node_list1) {
1091message +=
" and ";
1092 ITERATE(vector<CTree::STreeLeaf>, it, node_list2) {
1095message +=
". Decreasing maximum in-cluster distance or turing off" 1096 " clustering option may improve results.";
1104 ITERATE(vector<TRangePair>, rit, match_ranges) {
1105printf(
"in-cluster constraints: " 1106 "(seq1 %d seq2 %d)->(seq1 %d seq2 %d)\n",
1107rit->first.GetFrom(), rit->second.GetFrom(),
1108rit->first.GetTo(), rit->second.GetTo());
1120 intprof1_length = alignment[node_list1.front().query_idx].GetLength();
1121 intprof2_length = alignment[node_list2.front().query_idx].GetLength();
1126 if(match_ranges.front().first.GetFrom() > 0
1127&& match_ranges.front().second.GetFrom() > 0) {
1129 TRangerange1(0, match_ranges.front().first.GetFrom() - 1);
1130 TRangerange2(0, match_ranges.front().second.GetFrom() - 1);
1144vector<size_t> constr;
1147constr, range1, range2,
1154 intlen1 = match_ranges.front().first.GetFrom();
1155 intlen2 = match_ranges.front().second.GetFrom();
1157 for(
int i=0;
i< len1;
i++) {
1160 for(
int i=0;
i< len2;
i++) {
1167vector<TRangePair>::const_iterator it(match_ranges.begin());
1170 for(
int i=0;
i< it->first.GetLength();
i++) {
1174vector<TRangePair>::const_iterator prev_it(it);
1178 for(; it != match_ranges.end(); ++it, ++prev_it) {
1179 _ASSERT(it->first.GetLength() == it->second.GetLength());
1182 TRangespace1(prev_it->first.GetToOpen(), it->first.GetFrom() - 1);
1183 TRangespace2(prev_it->second.GetToOpen(), it->second.GetFrom() - 1);
1187 if(space1.
Empty()) {
1194 else if(space2.
Empty()) {
1213vector<size_t> constr;
1223transcr.push_back(*
t);
1229 for(
int i=0;
i< it->first.GetLength();
i++) {
1236 if(prof1_length - match_ranges.back().first.GetTo() - 1 > 0
1237&& prof2_length - match_ranges.back().second.GetTo() - 1 > 0) {
1239 TRangeseq1_range(match_ranges.back().first.GetTo() + 1,
1241 TRangeseq2_range(match_ranges.back().second.GetTo() + 1,
1255vector<size_t> constr;
1258constr, seq1_range, seq2_range,
1259prof1_length, prof2_length,
1263transcr.push_back(*it);
1270 intlen1 = prof1_length - match_ranges.back().first.GetTo() - 1;
1271 intlen2 = prof2_length - match_ranges.back().second.GetTo() - 1;
1273 for(
int i=0;
i< len1;
i++) {
1276 for(
int i=0;
i< len2;
i++) {
1282 for(
int i=0;
i< (
int)node_list1.size();
i++) {
1283alignment[node_list1[
i].query_idx].PropagateGaps(transcr,
1286 for(
int i=0;
i< (
int)node_list2.size();
i++) {
1287alignment[node_list2[
i].query_idx].PropagateGaps(transcr,
1295 for(
int i= 0;
i< (
int)
t.size() / 10;
i++)
1296printf(
"%10d",
i+ 1);
1298 for(
int i= 0;
i< (
int)
t.size();
i++)
1299printf(
"%d",
i% 10);
1302 for(
int i= 0;
i< (
int)node_list1.size();
i++) {
1303 intindex = node_list1[
i].query_idx;
1305printf(
"%3d: ", index);
1306 for(
intj = 0; j <
query.GetLength(); j++) {
1307printf(
"%c",
query.GetPrintableLetter(j));
1312 for(
int i= 0;
i< (
int)node_list2.size();
i++) {
1313 intindex = node_list2[
i].query_idx;
1315printf(
"%3d: ", index);
1316 for(
intj = 0; j <
query.GetLength(); j++) {
1317printf(
"%c",
query.GetPrintableLetter(j));
13350.000000, 0.019250, 0.073770, 0.052030, 0.228450,
13360.084020, 0.021990, 0.207660, 0.108730, 0.204100
13410, 7, 9, 1, 9, 9, 5, 2, 6, 4, 8, 4, 4,
13429, 3, 9, 8, 7, 7, 4, 5, 0, 5, 9, 0, 0
1355 intnum_queries = align.size();
1357 double r= 1.0 / num_queries;
1362 for(
intj = 0; j < num_queries; j++)
1366freq[j] =
count[j] *
r;
1374H1 += freq[j] *
log((num_queries - 1) / pseudocount +
1382 returnH1 - H2 *
log((num_queries - 1) *
1383(pseudocount + H3) / pseudocount);
1394 intalign_len = align[0].GetLength();
1397 for(
int i= 0;
i< align_len;
i++)
1417vector<CSequence>& alignment,
1426vector<CTree::STreeLeaf> full_seq_list;
1431vector<CTree::STreeLeaf> cluster_seq_list;
1436printf(
"cluster: ");
1437 for(
int i= 0;
i< (
int)cluster_seq_list.size();
i++) {
1438printf(
"%d ", cluster_seq_list[
i].query_idx);
1444vector<int> cluster_idx;
1445vector<int> other_idx;
1446vector<CTree::STreeLeaf> other_seq_list;
1447 intcluster_size = cluster_seq_list.size();
1452 for(
int i= 0;
i< num_queries;
i++) {
1454 for(j = 0; j < cluster_size; j++) {
1455 if(full_seq_list[
i].query_idx ==
1456cluster_seq_list[j].query_idx) {
1460 if(j == cluster_size) {
1461other_seq_list.push_back(full_seq_list[
i]);
1462other_idx.push_back(full_seq_list[
i].query_idx);
1465 for(
int i= 0;
i< cluster_size;
i++) {
1466cluster_idx.push_back(cluster_seq_list[
i].query_idx);
1469vector<CSequence> tmp_align = alignment;
1477tmp_align, pair_info, iteration);
1484printf(
"realigned score = %f\n\n", new_score);
1486 if(new_score > score) {
1488alignment.swap(tmp_align);
1500 constvector<CSequence>& query_data,
1501vector<TRange>& gaps)
1517gaps.push_back(range);
1536vector<CSequence>& query_data,
1538 intiteration,
boolis_cluster)
1551 if(!left_child->
IsLeaf())
1553pair_info, iteration, is_cluster);
1556 if(!right_child->
IsLeaf())
1558pair_info, iteration, is_cluster);
1562vector<CTree::STreeLeaf> node_list1, node_list2;
1564left_child->
GetValue().GetDist());
1566right_child->
GetValue().GetDist());
1569 if(is_cluster && iteration == 0) {
1571query_data, pair_info, iteration);
1575query_data, pair_info, iteration);
1581 "Alignment interrupted");
1592vector<TRange> gaps;
1601vector<CSequence>& query_data,
1602 constvector<TRange>& gaps)
1615vector<TRange>::const_iterator gap_it(gaps.begin());
1623 for(
int i=it->GetFrom();i < it->GetTo();
i++) {
1627 while(gap_it != gaps.end() && gap_it->GetFrom() <
i+
offset) {
1628 offset+= gap_it->GetTo() - gap_it->GetFrom() + 1;
1643 for(
intj=0;j < (
int)matrix.
GetCols();j++) {
1646 _ASSERT(rps_freqs(
i, j) >= 0.0);
1648matrix(
i+
offset, j) = rps_freqs(
i, j);
1657-= domain_res_freq_boost;
1662+= domain_res_freq_boost;
1678vector<CSequence>& new_alignment,
1681 intnum_queries = new_alignment.size();
1682 intalign_length = new_alignment[0].GetLength();
1683vector<double> hvec(align_length);
1684vector<TRange> chosen_cols;
1689 for(
i= 0;
i< align_length;
i++) {
1696 for(
i= 0;
i< align_length;
i++) {
1698 if(!chosen_cols.empty() &&
1699chosen_cols.back().GetTo() ==
i-1) {
1700chosen_cols.back().SetTo(
i);
1703chosen_cols.push_back(
TRange(
i,
i));
1710 for(
i= j = 0; j < (
int)chosen_cols.size(); j++) {
1712chosen_cols[
i++] = chosen_cols[j];
1715chosen_cols.resize(
i);
1719 for(
i= 0;
i< (
int)chosen_cols.size();
i++) {
1720printf(
"constraint at position %3d - %3d\n",
1721chosen_cols[
i].GetFrom(), chosen_cols[
i].GetTo());
1726vector<TRange> range_nogap(num_queries);
1728 for(
i= 0;
i< (
int)chosen_cols.size();
i++) {
1730 TRange& curr_range = chosen_cols[
i];
1731 intstart = curr_range.
GetFrom();
1737 for(j = 0; j < num_queries; j++) {
1742 for(k = 0; k < length; k++) {
1747range_nogap[j].SetEmpty();
1754 for(m = 0; m <= curr_range.
GetFrom(); m++) {
1759 intoffset2 = offset1;
1760 for(; m <= curr_range.
GetTo(); m++) {
1764range_nogap[j].Set(offset1, offset2);
1770 for(k = 0; k < num_queries - 1; k++) {
1771 for(m = k + 1; m < num_queries; m++) {
1772 if(!range_nogap[k].
Empty() && !range_nogap[m].Empty()) {
1774range_nogap[m], 1000,
1783printf(
"Per-column score\n");
1784 for(
i= 0;
i< align_length;
i++) {
1785printf(
"%6.2f ", hvec[
i]);
1786 if((
i+1)%10 == 0)
1799 for(
int i= 0;
i< num_queries;
i++) {
1800 for(
intj = 0; j < num_queries; j++) {
1801pair_info(
i, j).ResetList();
1813vector<CTree::STreeEdge>& edges,
1814 doublecluster_cutoff)
1819 intnew_conserved_cols;
1820 doublenew_score = 0.0;
1853pair_info, iteration,
false);
1859 doublerealign_score =
x_GetScore(tmp_aligned);
1861printf(
"start score: %f\n", realign_score);
1868 for(
int i= 0;
i< num_tries;
i++) {
1869new_score = realign_score;
1876 for(
intj = 0; j < (
int)edges.size() &&
1877edges[j].distance >= cluster_cutoff; j++) {
1889 if(new_score - realign_score <= 0.02 *
fabs(realign_score)) {
1892realign_score = new_score;
1895realign_score =
max(realign_score, new_score);
1900 for(
int i= 0;
i< num_queries;
i++) {
1901 for(
intj = 0; j < (
int)tmp_aligned[
i].
GetLength(); j++) {
1902printf(
"%c", tmp_aligned[
i].GetPrintableLetter(j));
1906printf(
"score = %f\n", realign_score);
1910 if(realign_score > best_score) {
1915printf(
"REPLACE ALIGNMENT\n\n");
1917best_score = realign_score;
1927&& iteration >= 1) {
1935 for(
int i= 0;
i< num_queries;
i++) {
1936 for(
intj = 0; j < num_queries; j++) {
1937pair_info(
i, j).ResetList();
1948new_conserved_cols = 0;
1961 for(
int i= 0;
i< conserved_regions.
Size();
i++) {
1971 if(new_conserved_cols <= conserved_cols)
1975conserved_cols = new_conserved_cols;
1981pair_info, iteration,
false);
1986 catch(std::bad_alloc ex) {
2004 return a.distance >
b.distance;
2021vector<CTree::STreeEdge> edges;
2024 intnum_edges = edges.size();
2030 intnum_internal_edges =
min(11, (
int)(0.3 * num_edges + 0.5));
2039 for(
i= 0;
i< num_internal_edges - 1;
i++) {
2040 if(edges[
i].distance > 2 * edges[
i+1].distance) {
2041cluster_cutoff = edges[
i].distance;
2045 if(
i== num_internal_edges - 1) {
2046cluster_cutoff = edges[
i].distance;
2051 for(
i= 0;
i< num_edges;
i++)
2052printf(
"%f ", edges[
i].distance);
2053printf(
"cutoff = %f\n", cluster_cutoff);
static const string kScale
static const int kAlphabetSize
The aligner internally works only with the ncbistdaa alphabet.
CLocalRange< TOffset > TRange
define for the fundamental building block of sequence ranges
pair< TOffset, TOffset > TOffsetPair
Basic type specifying a range on a sequence.
int GetPrototype(void) const
Get cluster prototype.
const TDistMatrix & GetDistMatrix(void) const
Get distance matrix.
const TClusters & GetClusters(void) const
Get clusters.
Interface for the traceback from blast hits.
vector< TOffsetPair > ListMatchRegions(TOffsetPair start_offsets)
Compile a list of regions in the current edit script that contain substitutions.
An ordered collection of CHit objects.
int Size() const
Retrieve number of hits in list.
void PurgeAllHits()
Delete all hits unconditionally.
bool Empty()
Determine whether a list contains no hits.
CHit * GetHit(int index)
Retrieve a hit from the hitlist.
void SortByStartOffset()
Sort the hits in the hitlist by increasing sequence1 index, then by increasing sequence1 start offset...
void AddToHitList(CHit *hit)
Append a hit to the hitlist.
A generalized representation of a pairwise alignment.
TSubHit & GetSubHit()
Retrieve a list of subhits.
void InsertSubHit(CHit *hit)
Add a to a CHit's list of subhits.
int m_Score
Score of alignment.
CEditScript & GetEditScript()
Retrieve the traceback associated with a CHit.
int m_SeqIndex1
Numerical identifier for first sequence in alignment.
int m_SeqIndex2
Numerical identifier for second sequence in alignment.
TRange m_SeqRange1
The range of offsets on the first sequence.
TRange m_SeqRange2
The range of offsets on the second sequence.
bool HasSubHits()
Query if a CHit has a hierarchy of subhits available.
vector< CHit * > TSubHit
Hits can be grouped hierarchically.
A pair of integers, defined to privide increment and less-then operators.
bool operator<(const CIntPair &p) const
True if each element of this is smaller than the corresponding element of p.
CIntPair & operator++(void)
Increment both elements.
CIntPair operator++(int)
Increment both elements.
bool StrictlyBelow(const TThisType &r)
Test whether 'this' represents a sequence range strictly less than a given sequence range.
bool GetFastAlign(void) const
Check if fast alignment is to be used.
TScore GetEndGapExtendPenalty(void) const
Get gap extension penalty for end gaps in pairwise global alignment of profiles.
double GetConservedCutoffScore(void) const
Get cutoff score for conserved aligned columns.
bool GetIterate(void) const
Check if iterative alignmnet option is used.
double GetPseudocount(void) const
Get pseudocount for calculating column entropy.
TScore GetGapExtendPenalty(void) const
Get gap extension penlaty for middle gaps in pairwise global alignment of profiles.
TScore GetGapOpenPenalty(void) const
Get gap opening penalty for middle gaps in pairwise global alignment of profiles.
TScore GetEndGapOpenPenalty(void) const
Get gap opening penalty for end gaps in pairwise global alignment of profiles.
bool GetRealign(void) const
Check if MSA is to be realigned for different rooting of progressive alignment tree.
@ eMulti
Alignment guide tree for each cluster is attached to the main alignment guide tree.
bool GetVerbose(void) const
Get verbose mode.
double GetDomainResFreqBoost(void) const
Get boost for residue frequencies in conserved domains from RPS data base.
EEndGapCostStrategy
Strategy for reducing end gap penalties for profile-profile alignment.
@ fReduceLeft
Reduce penalty only for left end gaps.
@ fReduceBoth
Reduce penalty for both end gaps.
@ fReduceRight
Reduce penalty only for right end gaps.
CMultiAlignerOptions::EInClustAlnMethod m_ClustAlnMethod
SProgress m_ProgressMonitor
SGraphNode * x_FindBestPath(vector< SGraphNode > &nodes)
Find a maximum weight path in a directed acyclic graph.
void x_AlignProfileProfileUsingHit(vector< CTree::STreeLeaf > &node_list1, vector< CTree::STreeLeaf > &node_list2, vector< CSequence > &alignment, CNcbiMatrix< CHitList > &pair_info, int iteration)
Align two profiles with all sequences that belong to the same cluster.
void x_AddRpsFreqsToCluster(const CClusterer::CSingleCluster &cluster, vector< CSequence > &query_data, const vector< TRange > &gaps)
void x_BuildAlignmentIterative(vector< CTree::STreeEdge > &edges, double cluster_cutoff)
Main driver for the progressive alignment process.
CHitList m_LocalInClusterHits
void x_FindConservedColumns(vector< CSequence > &new_alignment, CHitList &conserved)
Create a list of constraints that reflect conserved columns in a multiple alignment.
const TPhyTreeNode * GetTree(void) const
Get ree used guide in progressive alignment.
@ eOutOfMemory
Out of memory error.
@ eInterrupt
Alignment interruped through callback function.
vector< CSequence > m_QueryData
void x_FindInClusterConstraints(vector< CSequence > &alignment, vector< CTree::STreeLeaf > &node_list1, vector< CTree::STreeLeaf > &node_list2, CNcbiMatrix< CHitList > &pair_info, vector< TRangePair > &match_ranges) const
Find constraint to use for profile to profile alignment in clusters.
vector< vector< TRange > > m_RPSLocs
void x_ComputeProfileRangeAlignment(vector< CTree::STreeLeaf > &node_list1, vector< CTree::STreeLeaf > &node_list2, vector< CSequence > &alignment, vector< size_t > &constraints, const TRange &range1, const TRange &range2, int full_prof_len1, int full_prof_len2, EEndGapCostStrategy strat, CNWAligner::TTranscript &t)
Compute profile profile alignmnet for a ranges of given profiles.
vector< CSequence > m_Results
CConstRef< CMultiAlignerOptions > m_Options
void x_AlignProfileProfile(vector< CTree::STreeLeaf > &node_list1, vector< CTree::STreeLeaf > &node_list2, vector< CSequence > &alignment, CNcbiMatrix< CHitList > &pair_info, int iteration)
Align two collections of sequences.
double x_GetScoreOneCol(vector< CSequence > &align, int col)
Calculate the entropy score of one column of a multiple alignment (see the COBALT papaer for details)
vector< string > m_Messages
void x_AlignProgressive(const TPhyTreeNode *tree, vector< CSequence > &query_data, CNcbiMatrix< CHitList > &pair_info, int iteration, bool is_cluster)
Main driver for progressive alignment.
int m_Score
Alignment score.
void x_BuildAlignment()
Given the current domain, local, pattern and user hits, along with the current tree,...
void x_FindConstraints(vector< size_t > &constraint, vector< CSequence > &alignment, vector< CTree::STreeLeaf > &node_list1, vector< CTree::STreeLeaf > &node_list2, CNcbiMatrix< CHitList > &pair_info, int iteration)
Find the set of constraints to use for a profile-profile alignment.
pair< TRange, TRange > TRangePair
double x_GetScore(vector< CSequence > &align)
Compute the entropy score of a multiple alignment.
static const int kRpsScaleFactor
static const int kClusterNodeId
double x_RealignSequences(const TPhyTreeNode *input_cluster, vector< CSequence > &alignment, CNcbiMatrix< CHitList > &pair_info, double score, int iteration)
Perform a single bipartition on a multiple alignment.
size_t GetRows() const
get the number of rows in this matrix
size_t GetCols() const
get the number of columns in this matrix
Class for representing protein sequences.
int GetLength() const
Get the length of the current sequence.
unsigned char GetLetter(int pos) const
Access the sequence letter at a specified position.
TFreqMatrix & GetFreqs()
Access the list of position frequencies associated with a sequence.
static void CompressSequences(vector< CSequence > &seq, vector< int > index_list)
Given a collection of sequences, remove all sequence positions where a subset of the sequences all co...
static const unsigned char kGapChar
The ncbistdaa code for a gap.
definition of a Culling tree
static void ListTreeEdges(const TPhyTreeNode *node, vector< STreeEdge > &edge_list, int max_id=-1)
Traverse a tree below a given starting point, listing all edges encountered along the way.
static void ListTreeLeaves(const TPhyTreeNode *node, vector< STreeLeaf > &node_list, double curr_dist=0)
Traverse a tree below a given starting point, listing all leaves encountered along the way.
Callback for sorting tree edges in order of decreasing edge weight.
bool operator()(const CTree::STreeEdge &a, const CTree::STreeEdge &b) const
Interface for CMultiAligner.
bool Empty(const CNcbiOstrstream &src)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
void SetStartWg(TScore value)
TTranscript GetTranscript(bool reversed=true) const
void SetEndWs(TScore value)
virtual CNWAligner::TScore Run(void)
void SetEndWg(TScore value)
vector< ETranscriptSymbol > TTranscript
void SetSequences(const char *seq1, size_t len1, const char *seq2, size_t len2, bool verify=true)
void SetPattern(const vector< size_t > &pattern)
void SetEndSpaceFree(bool Left1, bool Right1, bool Left2, bool Right2)
void SetStartWs(TScore value)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
position_type GetLength(void) const
bool NotEmpty(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
TNodeList::const_iterator TNodeList_CI
bool IsLeaf() const
Report whether this is a leaf node.
const TValue & GetValue(void) const
Return node's value.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
unsigned int
A callback function used to compare two keys in a database.
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
#define INT4_MAX
largest nubmer represented by signed int
#define INT4_MIN
Smallest (most negative) number represented by signed int.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static const int kNumClasses
Number of 'letters' in the reduced alphabet.
static void x_NormalizeResidueFrequencies(double **freq_data, int freq_size)
Normalize the residue frequencies so that each column sums to 1.0.
static void x_HitToConstraints(vector< size_t > &constraint, CHit *hit)
Convert a constraint into a form usable by CPSSMAligner.
static void x_AddConstraint(vector< size_t > &constraint, CHit *hit)
Convert one hit into a constraint usable by CPSSMAligner.
static void x_ExpandRange(TRange &range, CSequence &seq)
Convert a sequence range to reflect gaps added to the underlying sequence.
static void s_CleanUpConstraints(CNcbiMatrix< CHitList > &pair_info, int num_queries)
static const int kRes2Class[kAlphabetSize]
Mapping from protein letters to reduced alphabet letters.
static void x_GetProfileMatchRanges(const TRange &seq_range1, const TRange seq_range2, CSequence &seq1, CSequence &seq2, vector< CMultiAligner::TRangePair > &prof_ranges)
Translate ungapped alignment range for pair-wise sequence alignment in input sequence coordinates int...
static void x_FillResidueFrequencies(double **freq_data, vector< CSequence > &query_data, vector< CTree::STreeLeaf > &node_list, TRange range=TRange(-1, -1))
Compute the residue frequencies for a specified range of a collection of sequences.
static const double kDerivedFreqs[kNumClasses]
Background frequencies of the letters in the reduced alphabet (each is the sum of the Robinson-Robins...
static int s_SeqToProfilePosition(int seq_pos, CSequence &seq)
Find a profile position for a input sequence position.
static void x_GetClusterGapLocations(const CClusterer::CSingleCluster &cluster, const vector< CSequence > &query_data, vector< TRange > &gaps)
Find locations of gaps in cluster prototype.
struct SGraphNode * path_next
the optimal path node belongs to
CHit * hit
the alignment represented by this node
Structure for listing tree edges.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4