(
CFile(fn).Exists() && ! fs.fail()) {
73fs.getline(line, 256);
75 if(line[0] ==
'#')
continue;
88 const stringsuffix = (ig_opt->
m_IsProtein) ?
".pdm.":
".ndm.";
98 "Domain annotation data file could not be found in [internal_data] directory");
102 ITERATE(vector<string>,
l, lines) {
103vector<string> tokens;
105 if(!tokens.empty()) {
107 for(
int i=1;
i<11; ++
i) {
124 if(lines.size() == 0) {
127 ITERATE(vector<string>,
l, lines) {
128vector<string> tokens;
130 if(!tokens.empty()) {
135 if(tokens.size() == 3) {
137}
else if(tokens.size() == 4) {
140}
else if(tokens.size() == 5) {
155 if(lines.size() == 0) {
156 ERR_POST(
Warning<<
"D gene frame definition file could not be found");
158 ITERATE(vector<string>,
l, lines) {
159vector<string> tokens;
161 if(!tokens.empty()) {
175 if((*result)->HasAlignments()) {
177CSeq_align_set::Tdata::iterator it = align_list.begin();
178 while(it != align_list.end()) {
179 if((
int)((*it)->GetAlignLength()) - (
int)((*it)->GetTotalGapCount(0)) < length){
180it = align_list.erase(it);
194 if((*result)->HasAlignments()) {
198 inttop_hit_actual_len = 0;
201 inthighest_score = 0;
213 if(score >= highest_score) {
214highest_score = score;
215extend_strand = (*align)->GetSeqStrand(0);
217(*align)->GetSegs().GetDenseg().GetStarts()[1]);
221 intallowed_len =
min((*align)->GetSegs().GetDenseg().GetStarts()[1],
222query_len - ((*align)->GetSegs().GetDenseg().GetStarts()[0] +
223(
int)(*align)->GetSegs().GetDenseg().GetLens()[0]));
224top_hit_actual_len =
min(desired_len, allowed_len);
229top_hit_actual_len =
min(desired_len,
230 min((*align)->GetSegs().GetDenseg().GetStarts()[0],
231(*align)->GetSegs().GetDenseg().GetStarts()[1]));
238 intallowed_len =
min((*align)->GetSegs().GetDenseg().GetStarts()[1],
239query_len - ((*align)->GetSegs().GetDenseg().GetStarts()[0] +
240(
int)(*align)->GetSegs().GetDenseg().GetLens()[0]));
241actual_len =
min(top_hit_actual_len,
min(desired_len, allowed_len));
246actual_len =
min(top_hit_actual_len,
min(desired_len,
247 min((*align)->GetSegs().GetDenseg().GetStarts()[0],
248(*align)->GetSegs().GetDenseg().GetStarts()[1])));
254 if(actual_len > 0 && (*align)->GetSeqStrand(0) == extend_strand) {
257(*align)->SetSegs().SetDenseg().SetStarts()[1] -= actual_len;
258(*align)->SetSegs().SetDenseg().SetLens()[0] += actual_len;
262(*align)->SetSegs().SetDenseg().SetStarts()[0] -= actual_len;
263(*align)->SetSegs().SetDenseg().SetStarts()[1] -= actual_len;
264(*align)->SetSegs().SetDenseg().SetLens()[0] += actual_len;
279 if((*result)->HasAlignments()) {
283 inttop_hit_actual_len = 0;
285 inthighest_score = 0;
297 if(score >= highest_score) {
298highest_score = score;
299extend_strand = (*align)->GetSeqStrand(0);
302 intj_align_stop = (*align)->GetSegs().GetDenseg().GetSeqStop(1);
304j_stop - j_align_stop);
308 intquery_align_start = (*align)->GetSegs().GetDenseg().GetSeqStart(0);
309 intallowed_query_length = query_align_start;
311top_hit_actual_len =
min(desired_len, allowed_query_length);
314 intallowed_query_length = query_stop - (*align)->GetSegs().GetDenseg().GetSeqStop(0);
315top_hit_actual_len =
min(desired_len, allowed_query_length);
322 intquery_align_start = (*align)->GetSegs().GetDenseg().GetSeqStart(0);
323 intallowed_query_length = query_align_start;
324actual_len =
min(allowed_query_length, top_hit_actual_len);
328 intallowed_query_length = query_stop - (*align)->GetSegs().GetDenseg().GetSeqStop(0);
329actual_len =
min(top_hit_actual_len, allowed_query_length);
334 if(actual_len > 0 && (*align)->GetSeqStrand(0) == extend_strand) {
337 intnum_seg = (*align)->GetSegs().GetDenseg().GetNumseg();
338 intnum_dim = (*align)->GetSegs().GetDenseg().GetDim();
339(*align)->SetSegs().SetDenseg().SetStarts()[num_seg*num_dim - 2] -= actual_len;
340(*align)->SetSegs().SetDenseg().SetLens()[num_seg-1] += actual_len;
343 intnum_seg = (*align)->GetSegs().GetDenseg().GetNumseg();
344(*align)->SetSegs().SetDenseg().SetLens()[num_seg-1] += actual_len;
358vector<CRef <CIgAnnotation> > annots;
414 for(
intgene = 1; gene < num_genes; ++gene) {
447cerr <<
"blast failed"<< endl;
455 for(
intgene = 0; gene < num_genes; ++gene) {
469cerr <<
"blast failed"<< endl;
484 boolskipped =
false;
525 returnfinal_results;
535 if(sx != sy)
return(sx > sy);
547 return(x_id < y_id);
596 if((*annot)->m_GeneInfo[0] == -1 || (*annot)->m_GeneInfo[4] == -1 || (*annot)->m_GeneInfo[5] == -1) {
599mask_list.push_back(
mask);
602 bool ms= (*annot)->m_MinusStrand;
606mask_list.push_back(
mask);
610mask_list.push_back(
mask);
652 if((*annot)->m_GeneInfo[0] == -1) {
657mask_list.push_back(
mask);
668 bool ms= (*annot)->m_MinusStrand;
673 if(begin > 0 && begin <=
len-1) {
678 if(end < len -1 && end >= 0) {
718 if((*previous_d_results)[iq].HasAlignments()){
719align_d = (*previous_d_results)[iq].SetSeqAlign();
722 if((*annot)->m_GeneInfo[0] == -1 || !align_d || align_d.
Empty() || align_d->
IsEmpty()) {
727mask_list.push_back(
mask);
731 bool ms= (*annot)->m_MinusStrand;
732 intv_end_or_j_begin = (
ms)?
734 intj_begin_or_v_end = (
ms)?
736 if(v_end_or_j_begin > 0) {
741 if(j_begin_or_v_end < len-1 && j_begin_or_v_end > 0) {
764 if((*annot)->m_GeneInfo[0] ==-1) {
768mask_list.push_back(
mask);
771 intbegin = (*annot)->m_GeneInfo[0];
772 intend = (*annot)->m_GeneInfo[1];
776mask_list.push_back(
mask);
781mask_list.push_back(
mask);
797 if(sx < 0.999999 * sy || sy < 0.999999 * sx)
return false;
801 if(ix > iy)
return false;
811 if(sid.substr(0, 4) ==
"lcl|")
return(sid.substr(4, sid.length()));
823 if(ids.find(this_id) == string::npos) {
847 if((*result)->HasAlignments()) {
861 if((*it)->GetSeq_id(1).Match(align->
GetSeq_id(1)) &&
862(*it)->GetSeqStart(1) == align->
GetSeqStart(1) &&
863(*it)->GetSeqStop(1) == align->
GetSeqStop(1))
return true;
875 if(sx < 0.999999 * sy)
return true;
876 if(sy < 0.999999 * sx)
return false;
880 if(ix != iy)
return(ix > iy);
892 return(x_id < y_id);
901 if(sx != sy)
return(sx > sy);
921 if(ds < js || de < je + margin)
return true;
923 if(ds > js - margin || de > je)
return true;
979 boolva_or_vd_as_heavy_chain) {
983 if(align_D && !align_D->
Get().empty()) {
985CSeq_align_set::Tdata::iterator it = align_list.begin();
987 if(q_ct!=
"VH"&& q_ct!=
"VD"&& q_ct!=
"VA"&& q_ct!=
"VB") {
988 while(it != align_list.end()) {
989it = align_list.erase(it);
992}
else if(q_ct ==
"VA"|| q_ct ==
"VD") {
993 if(va_or_vd_as_heavy_chain) {
999 while(it != align_list.end()) {
1000it = align_list.erase(it);
1006it = align_list.begin();
1007 while(it != align_list.end()) {
1010 if(q_ct!=
"N/A") {
1011 chars_ct = q_ct[1];
1015 if(d_chain_type !=
"N/A"){
1016 if(d_chain_type[1] != q_ct[1]) keep =
false;
1018 stringsid = (*it)->GetSeq_id(1).AsFastaString();
1020 if(sid.substr(0, 4) ==
"LCL|") sid = sid.substr(4, sid.length());
1021 if((sid.substr(0, 2) ==
"IG"|| sid.substr(0, 2) ==
"TR")
1022&& sid[3] ==
'D') {
1025 if(s_ct!=
'B'&& s_ct!=
'D') s_ct = q_ct[1];
1026 if(s_ct != q_ct[1]) keep =
false;
1031 if(!keep) it = align_list.erase(it);
1037 boolstrand_found =
false;
1039 if((*it)->GetSeqStrand(0) == q_st) {
1040strand_found =
true;
1045it = align_list.begin();
1046 while(it != align_list.end()) {
1047 if((*it)->GetSeqStrand(0) != q_st) {
1048it = align_list.erase(it);
1053it = align_list.begin();
1054 while(it != align_list.end()) {
1056 intq_ds = (*it)->GetSeqStart(0);
1057 intq_de = (*it)->GetSeqStop(0);
1060 if(!keep) it = align_list.erase(it);
1068 if(align_J && !align_J->
Get().empty()) {
1070CSeq_align_set::Tdata::iterator it = align_list.begin();
1071 while(it != align_list.end()) {
1074 if(q_ct!=
"N/A") {
1075 chars_ct = q_ct[1];
1079 if(j_chain_type !=
"N/A"){
1080 if(j_chain_type[1] != q_ct[1]) keep =
false;
1082 stringsid = (*it)->GetSeq_id(1).AsFastaString();
1084 if(sid.substr(0, 4) ==
"LCL|") sid = sid.substr(4, sid.length());
1085 if((sid.substr(0, 2) ==
"IG"|| sid.substr(0, 2) ==
"TR")
1086&& sid[3] ==
'J') {
1088}
else if(sid[0] ==
'J') {
1091 if(s_ct!=
'H'&& s_ct!=
'L'&& s_ct!=
'K'&&
1092s_ct!=
'A'&& s_ct!=
'B'&& s_ct!=
'D'&& s_ct!=
'G') s_ct = q_ct[1];
1093 if(s_ct != q_ct[1]) keep =
false;
1099 if((*it)->GetSeqStrand(0) != q_st) keep =
false;
1103 intq_js = (*it)->GetSeqStart(0);
1104 intq_je = (*it)->GetSeqStop(0);
1106 if(q_je < q_ve - allowed_VJ_distance || q_js > q_ve -
j_wordsize) keep =
false;
1108 if(q_js > q_ve + allowed_VJ_distance || q_je < q_ve +
j_wordsize) keep =
false;
1111 if(!keep) it = align_list.erase(it);
1123CSeq_align_set::Tdata::iterator it;
1127 while(it != al_D.end()) {
1129it = al_D.erase(it);
1138 while(it != al_J.end()) {
1140it = al_J.erase(it);
1148 while(it != al_J.end()) {
1150it = al_J.erase(it);
1155 while(it != al_D.end()) {
1157it = al_D.erase(it);
1189original_align_D->
Assign(*align_D);
1198original_align_J->
Assign(*align_J);
1202 x_FindDJAln(align_D, align_J, q_ct, q_ms, q_st, q_ve, iq,
false);
1203 if((original_align_D.
NotEmpty() && !original_align_D->
Get().empty()) && (q_ct ==
"VA"|| q_ct ==
"VD")) {
1207 x_FindDJAln(original_align_D, original_align_J, q_ct, q_ms, q_st, q_ve, iq,
true);
1208 intas_heavy_chain_score = 0;
1209 intas_light_chain_score = 0;
1211 if(original_align_J.
NotEmpty() && !original_align_J->
Get().empty()){
1215 if(original_align_D.
NotEmpty() && !original_align_D->
Get().empty()){
1218 if(align_J.
NotEmpty() && !align_J->
Get().empty()){
1223 if(as_heavy_chain_score + d_score> as_light_chain_score){
1225align_D->
Assign(*original_align_D);
1228align_J->
Assign(*original_align_J);
1244 if(j_cdr3end > 0 && subject_start - j_cdr3end <= 1) {
1250 max(subject_start,
min(j_cdr3end + 1,
1259 if(subject_end > j_cdr3end) {
1263}
else if(j_cdr3end > 0 && subject_start - j_cdr3end <= 2) {
1275 if(subject_end > j_cdr3end) {
1278}
else if(j_cdr3end > 0 && subject_start - j_cdr3end <= 4) {
1290 if(subject_end > j_cdr3end) {
1298annot->
m_JDomain[4] = j_fwr4end_offset;
1299 if(j_fwr4end_offset >= 0) {
1326 stringq_ct = (*annot)->m_ChainType[0];
1327 boolq_ms = (*annot)->m_MinusStrand;
1329 intq_ve = (q_ms) ? (*annot)->m_GeneInfo[0] : (*annot)->m_GeneInfo[1] - 1;
1338 if(align_D && !align_D.Empty() && !align_D->IsEmpty()) {
1340CSeq_align_set::Tdata::iterator it = align_list.begin();
1343it = align_list.begin();
1344 while(it != align_list.end()) {
1347 if(q_ct!=
"N/A") {
1348 chars_ct = q_ct[1];
1352 if(d_chain_type !=
"N/A"){
1353 if(d_chain_type[1] != q_ct[1]) keep =
false;
1355 stringsid = (*it)->GetSeq_id(1).AsFastaString();
1357 if(sid.substr(0, 4) ==
"LCL|") sid = sid.substr(4, sid.length());
1358 if((sid.substr(0, 2) ==
"IG"|| sid.substr(0, 2) ==
"TR")
1359&& sid[3] ==
'D') {
1362 if(s_ct!=
'B'&& s_ct!=
'D') s_ct = q_ct[1];
1363 if(s_ct != q_ct[1]) keep =
false;
1368 if(!keep) it = align_list.erase(it);
1374 boolstrand_found =
false;
1376 if((*it)->GetSeqStrand(0) == q_st) {
1377strand_found =
true;
1382it = align_list.begin();
1383 while(it != align_list.end()) {
1384 if((*it)->GetSeqStrand(0) != q_st) {
1385it = align_list.erase(it);
1390it = align_list.begin();
1391 while(it != align_list.end()) {
1393 intq_ds = (*it)->GetSeqStart(0);
1394 intq_de = (*it)->GetSeqStop(0);
1397 if(!keep) it = align_list.erase(it);
1409 if(align_J && align_J.
NotEmpty() && !align_J->
IsEmpty() && !align_list.empty()) {
1412CSeq_align_set::Tdata::iterator it = al_J.begin();
1413 while(it != al_J.end()) {
1415it = al_J.erase(it);
1430 boolq_ms = (*annot)->m_MinusStrand;
1435 if(align_C && !align_C->
Get().empty()) {
1437CSeq_align_set::Tdata::iterator it = align_list.begin();
1438 while(it != align_list.end()) {
1442 if((*it)->GetSeqStrand(0) != q_st) keep =
false;
1445 if(!keep) it = align_list.erase(it);
1461 stringq_ct = (*annot)->m_ChainType[0];
1462 boolq_ms = (*annot)->m_MinusStrand;
1464 intq_ve = (q_ms) ? (*annot)->m_GeneInfo[0] : (*annot)->m_GeneInfo[1] - 1;
1468 x_FindDJ( results_D, results_J, *annot, align_D, align_J, q_ct, q_ms, q_st, q_ve, iq);
1480 stringq_ct = (*annot)->m_ChainType[0];
1483 if(align_D && !align_D.
Empty() && !align_D->
IsEmpty()) {
1486(*annot)->m_GeneInfo[2] = align->
GetSeqStart(0);
1487(*annot)->m_GeneInfo[3] = align->
GetSeqStop(0)+1;
1517(*annot)->m_GeneInfo[6] = align->
GetSeqStart(0);
1518(*annot)->m_GeneInfo[7] = align->
GetSeqStop(0)+1;
1519 if((*annot)->m_JDomain[3] > 0 && (*annot)->m_JDomain[1] > 0) {
1529(*annot)->m_CDomain[1] - 1;
1537(*annot)->m_CDomain[0] = query_start;
1541 intdiff =
max(0, (*annot)->m_CDomain[0] - (*annot)->m_JDomain[3] - 1);
1544 if((*annot)->m_JDomain[4] > 0) {
1545j_end -= (*annot)->m_JDomain[4];
1547 intj_stop = align_j->
Get().front()->GetSeqStop(1);
1548 intj_extend_max =
max(0, j_end - j_stop);
1549 intextend_len =
min(diff, j_extend_max);
1550 if(extend_len > 0) {
1551(*annot)->m_JDomain[3] += extend_len;
1567 boolq_ms = (*annot)->m_MinusStrand;
1577(*annot)->m_GeneInfo[4] = align->
GetSeqStart(0);
1578(*annot)->m_GeneInfo[5] = align->
GetSeqStop(0)+1;
1581 if(frame_offset >= 0) {
1582 intframe_adj = (align->
GetSeqStart(1) + 3 - frame_offset) % 3;
1583(*annot)->m_FrameInfo[2] = (q_ms) ?
1602 stringq_ct = (*annot)->m_ChainType[0];
1603 boolq_ms = (*annot)->m_MinusStrand;
1613(*annot)->m_GeneInfo[2] = align->
GetSeqStart(0);
1614(*annot)->m_GeneInfo[3] = align->
GetSeqStop(0)+1;
1626(*annot)->m_GeneInfo[4] = align->
GetSeqStart(0);
1627(*annot)->m_GeneInfo[5] = align->
GetSeqStop(0)+1;
1630 if(frame_offset >= 0) {
1631 intframe_adj = (align->
GetSeqStart(1) + 3 - frame_offset) % 3;
1632(*annot)->m_FrameInfo[2] = (q_ms) ?
1650 CScopescope_q(*mgr), scope_s(*mgr);
1652 boolannotate_subject =
false;
1659 if(db_name_V == db_name_domain) {
1660db_domain.
Reset(&(*db_V));
1662db_domain.
Reset(
new CSeqDB(db_name_domain, db_type));
1664annotate_subject =
true;
1673 if((*result)->HasAlignments() && (*gl_results)[iq].HasAlignments()) {
1677(*gl_results)[iq].GetSeqAlign()->Get().front();
1684 intq_ends[2], q_dir;
1704 intdomain_info[10];
1709 CAlnMaps_map((*it)->GetSegs().GetDenseg());
1710 ints_start = (*it)->GetSeqStart(1);
1711 ints_stop = (*it)->GetSeqStop(1);
1727 query.SetId((*it)->GetSeq_id(1));
1734 if(
result.HasAlignments()) {
1740scope_q.RemoveBioseq(hdl_q);
1744 for(
int i=0;
i<10;
i+=2) {
1746start = domain_info[
i] - 1;
1747stop = domain_info[
i+1] - 1;
1754 if(start <= d_stop && stop >= d_start) {
1755 intstart_copy = start;
1756 intstop_copy = stop;
1757 if(start_copy < d_start) start_copy = d_start;
1758 if(stop_copy > d_stop) stop_copy = d_stop;
1759 if(start_copy <= stop_copy) {
1772 if(start > s_stop || stop < s_start)
continue;
1774 if(start < s_start) start = s_start;
1776 if(stop > s_stop) stop = s_stop;
1778 if(start > stop)
continue;
1783 if((start - q_ends[1])*q_dir > 0 || (stop - q_ends[0])*q_dir < 0)
continue;
1785 if((start - q_ends[0])*q_dir < 0) start = q_ends[0];
1787 if((stop - q_ends[1])*q_dir > 0) stop = q_ends[1];
1789 if((start - stop)*q_dir > 0)
continue;
1793 intpos = q_map.
GetStart(1, seg);
1800seg = q_map.
GetSeg(aln_stop);
1807 if((start - stop)*q_dir > 0)
continue;
1818 while(i<10 && annot->m_DomainInfo[
i] < 0)
i+=2;
1819 if(
i< 10 && domain_info[
i] > 0) {
1820extension = (domain_info[
i] - 1 -
1831 while(i<10 && annot->m_DomainInfo[
i] >=0) {
1848 if(start >= 0 && (start - q_ends[1])*q_dir < 0) {
1852 if((start - q_ends[1])*q_dir <= 0) {
1860 if(frame_offset >= 0) {
1861 intq_start = (*it)->GetSeqStart(0);
1862 intq_stop = (*it)->GetSeqStop(0);
1863 intq_mid = q_start + q_stop;
1864 intq_dif = q_stop - q_start;
1865 intframe_adj = (3 - ((*it)->GetSeqStart(1) + 3 - frame_offset) % 3) %3;
1866annot->
m_FrameInfo[0] = (q_mid - q_dir *q_dif)/2 + q_dir * frame_adj;
1875q_start =
max(q_start, fwr3_stop);
1876q_mid = q_start + q_stop;
1877q_dif = q_stop - q_start;
1880q_stop =
min(q_stop, fwr3_stop);
1881q_mid = q_start + q_stop;
1882q_dif = q_stop - q_start;
1886frame_adj = ((*it)->GetSeqStop(1) + 3 - frame_offset) % 3;
1889annot->
m_FrameInfo[1] = (q_mid + q_dir *q_dif)/2 - q_dir * frame_adj;
1912 if((*result)->HasAlignments()) {
1913 intnum_aligns = (*result)->GetSeqAlign()->Size();
1928 for(
int i=0;
i<num_aligns; ++
i) {
1938 if((*result)->HasAlignments()) {
1940(&*((*result)->GetSeqAlign())));
1955 intnum_results =
result->GetNumResults();
1959 for(
intiq = 0; iq< num_queries && ir< num_results; ++iq) {
1970 while(!qid->
Match(*rid)) {
1977 while(ir < num_results && (*
result)[ir].
GetSeqId()->Match(*qid)) {
1983align_list.insert(align_list.end(), add_list.begin(), add_list.end());
1997 boolnew_result = (final_results.
Empty());
2006 intactual_align = 0;
2008 if((*result)->HasAlignments()) {
2010(&*((*result)->GetSeqAlign())));
2013 if(num_aligns >= 0) {
2015 if(align_list.size() > (CSeq_align_set::Tdata::size_type)num_aligns) {
2016CSeq_align_set::Tdata::iterator it = align_list.begin();
2017 for(
int i=0;
i<num_aligns; ++
i) ++it;
2018align_list.erase(it, align_list.end());
2019actual_align = num_aligns;
2021actual_align = align_list.size();
2037 while( !(*final_results)[iq].
GetSeqId()->Match(*
query)) ++iq;
2039 if(!align.
Empty()) {
2045CSeq_align_set::Tdata::iterator it = align_list.begin();
2046 while(it != align_list.end()) {
2052 if(!align_list.empty()) {
2053ig_list.insert(ig_list.end(), align_list.begin(), align_list.end());
2060 case0: ig_result->
m_NumActualV= actual_align;
break;
2061 case1: ig_result->
m_NumActualD= actual_align;
break;
2062 case2: ig_result->
m_NumActualJ= actual_align;
break;
2063 case3: ig_result->
m_NumActualC= actual_align;
break;
2079 if((*result)->HasAlignments()){
2080(*result)->SetSeqAlign()->Set().clear();
Declares the CBl2Seq (BLAST 2 Sequences) class.
@ eSequenceComparison
Seq-aligns in the BLAST 2 Sequence style (one alignment per query-subject pair)
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eBlastp
Protein-Protein.
ncbi::TMaskedQueryRegions mask
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
TSignedSeqPos GetAlnPosFromSeqPos(TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
TNumseg GetSeg(TSeqPos aln_pos) const
CDense_seg::TNumseg TNumseg
TSignedSeqPos GetSeqPosFromSeqPos(TNumrow for_row, TNumrow row, TSeqPos seq_pos, ESearchDirection dir=eNone, bool try_reverse_dir=true) const
Runs the BLAST algorithm between 2 sequences.
Defines BLAST error codes (user errors included)
Creates BlastOptionsHandle objects with default values for the programs/tasks requested.
Encapsulates ALL the BLAST algorithm's options.
size_type Size() const
Returns the number of queries found in this query vector.
void SetMaskedRegions(size_type i, TMaskedQueryRegions mqr)
Assign a list of masked regions to one query.
void AddMask(size_type i, CRef< CSeqLocInfo > sli)
Add a masked region to the set for a query.
CRef< CBlastSearchQuery > GetBlastSearchQuery(size_type i) const
Get the CBlastSearchQuery object at index i.
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
API for Remote Blast Requests.
Search Results for All Queries.
Search Results for One Query.
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
CRef< CBioseq > SeqidToBioseq(const CSeq_id &seqid) const
Get a CBioseq for a given Seq-id.
structure for seqloc info
TSeqPos GetSeqStop(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
@ eRight
Towards higher aln coord (always to the right)
@ eBackwards
Towards lower seq coord (to the left if plus strand, right if minus)
@ eForward
Towards higher seq coord (to the right if plus strand, left if minus)
Collection of masked regions for a single query sequence.
Class for the messages for an individual query sequence.
Constants used in compositional score matrix adjustment.
@ eNoCompositionBasedStats
Don't use composition based statistics.
void x_AnnotateDomain(CRef< CSearchResultSet > &gl_results, CRef< CSearchResultSet > &dm_results, vector< CRef< CIgAnnotation > > &annot)
Annotate the query chaintype and domains based on blast results.
static bool s_DJNotCompatible(const CSeq_align &d, const CSeq_align &j, bool ms, int margin)
string GetDatabaseName() const
Returns the database name if appropriate, else kEmptyStr for subject sequences.
void SetCompositionBasedStats(ECompoAdjustModes mode)
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
CRef< IQueryFactory > m_Subject
virtual void SetNumberOfThreads(size_t nthreads)
Mutator for the number of threads.
double GetEvalueThreshold() const
CRef< CSearchResultSet > Run()
Run the Ig-BLAST engine.
static bool s_CompareSeqAlignByScore(const CRef< CSeq_align > &x, const CRef< CSeq_align > &y)
CRef< CSeq_align_set > & SetSeqAlign()
CIgAnnotationInfo(CConstRef< CIgBlastOptions > &ig_options)
int GetJDomain(const string &sid)
void x_AnnotateC(CRef< CSearchResultSet > &results_c, CRef< CSearchResultSet > &results_j, vector< CRef< CIgAnnotation > > &annot)
static int max_allowed_VD_distance
CRef< CLocalDbAdapter > m_LocalDb
bool GetDomainInfo(const string sid, int *domain_info)
void SetGapOpeningCost(int g)
static void s_ReadLinesFromFile(const string &fn, vector< string > &lines)
CConstRef< objects::CSeq_align_set > GetSeqAlign() const
Accessor for the Seq-align results.
vector< string > m_ChainType
static void s_SortResultsByEvalue(CRef< CSearchResultSet > &results)
Sort blast results according to evalue.
const string & GetRID(void)
Gets the request id (RID) associated with the search.
static int extend_length3end
void x_AnnotateDJ(CRef< CSearchResultSet > &results_D, CRef< CSearchResultSet > &results_J, vector< CRef< CIgAnnotation > > &annot)
Annotate the D and J genes based on blast results.
static bool s_SeqAlignInSet(CSeq_align_set::Tdata &align_list, CRef< CSeq_align > &align)
CRef< CLocalDbAdapter > m_Db[5]
static int max_allowed_VJ_distance_with_D
CRef< CSearchResultSet > Run()
Executes the search.
void x_FindDJAln(CRef< CSeq_align_set > &align_D, CRef< CSeq_align_set > &align_J, string q_ct, bool q_ms, ENa_strand q_st, int q_ve, int iq, bool va_or_vd_as_heavy_chain)
void x_ProcessCResult(CRef< CSearchResultSet > &results_C, vector< CRef< CIgAnnotation > > &annots)
void x_SetAnnotation(vector< CRef< CIgAnnotation > > &annot, CRef< CSearchResultSet > &final_results)
Append annotation info to the final results.
void x_SetupNoOverlapDSearch(const vector< CRef< CIgAnnotation > > &annots, CRef< CSearchResultSet > &results, CRef< IQueryFactory > &qf, CRef< CBlastOptionsHandle > &opts_hndl, int db_type)
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
map< string, int > m_Fwr4EndOffset
void SetGapExtensionCost(int e)
map< string, int > m_DomainIndex
bool IsBlastDb() const
Returns true if this object represents a BLAST database.
static int max_allowed_V_end_to_J_end
CRef< CIgAnnotation > & SetIgAnnotation()
void x_ScreenByAlignLength(CRef< CSearchResultSet > &results, int length)
int GetFrameOffset(const string sid)
void x_SetupDJSearch(const vector< CRef< CIgAnnotation > > &annots, CRef< IQueryFactory > &qf, CRef< CBlastOptionsHandle > &opts_hndl, int db_type)
Prepare blast option handle and query for D, J germline database search.
void SetMismatchPenalty(int p)
vector< string > m_TopGeneIds
CRef< CBlastAncillaryData > GetAncillaryData() const
Accessor for the query's search ancillary.
static string s_RemoveLocalPrefix(const string &sid)
void SetHitlistSize(int s)
Sets HitlistSize.
CRef< objects::CSeq_align_set > SetSeqAlign()
Only intended to be used if you need to edit the seqlign.
CConstRef< CIgBlastOptions > m_IgOptions
void x_SetupCRegionSearch(const vector< CRef< CIgAnnotation > > &annots, CRef< IQueryFactory > &qf, CRef< CBlastOptionsHandle > &opts_hndl)
CRef< CBlastOptionsHandle > m_Options
vector< int > m_DomainData
static void s_AppendResults(CRef< CSearchResultSet > &results, int num_aligns, int gene, CRef< CSearchResultSet > &final_results)
Append blast results to the final results.
const string GetDomainChainType(const string sid)
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
bool Submit(void)
This submits the search (if necessary) and returns immediately.
void x_SetupDbSearch(vector< CRef< CIgAnnotation > > &annot, CRef< IQueryFactory > &qf)
Prepare blast option handle and query for specified database search.
CRef< CSearchResultSet > RunEx()
Performs the same functionality as Run(), but it returns a different data type.
map< string, int > m_FrameOffset
map< string, string > m_DJChainType
void x_AnnotateD(CRef< CSearchResultSet > &results_D, vector< CRef< CIgAnnotation > > &annot)
void x_FillJDomain(CRef< CSeq_align > &align, CRef< CIgAnnotation > &annot)
static bool s_CompareSeqAlignByScoreAndName(const CRef< CSeq_align > &x, const CRef< CSeq_align > &y)
static int max_v_j_overlap
void x_ProcessDJResult(CRef< CSearchResultSet > &results_V, CRef< CSearchResultSet > &results_D, CRef< CSearchResultSet > &results_J, vector< CRef< CIgAnnotation > > &annots)
const string GetDJChainType(const string sid)
void x_ExtendAlign3end(CRef< CSearchResultSet > &results)
CIgAnnotationInfo m_AnnotationInfo
static int extend_length5end
int GetFwr4EndOffset(const string &sid)
static int max_allowed_j_deletion
void push_back(value_type &element)
Add a value to the back of this container.
CRef< CSearchResultSet > GetResultSet()
Submit the search (if necessary) and return the results.
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
map< string, int > m_JDomainInfo
TQueryMessages GetErrors(int min_severity=eBlastSevError) const
Accessor for the error/warning messsages for this query.
void x_FindDJ(CRef< CSearchResultSet > &results_D, CRef< CSearchResultSet > &results_J, CRef< CIgAnnotation > &annot, CRef< CSeq_align_set > &align_D, CRef< CSeq_align_set > &align_J, string q_ct, bool q_ms, ENa_strand q_st, int q_ve, int iq)
CConstRef< objects::CSeq_id > GetSeqId() const
Accessor for the query's sequence identifier.
void Combine(const TQueryMessages &other)
Combine other messages with these.
void x_AnnotateJ(CRef< CSearchResultSet > &results_J, vector< CRef< CIgAnnotation > > &annot)
CRef< CBlastQueryVector > m_Query
static int max_allowed_VJ_distance_without_D
void x_ExtendAlign5end(CRef< CSearchResultSet > &results)
void x_AnnotateV(CRef< CSearchResultSet > &results, vector< CRef< CIgAnnotation > > &annot)
Annotate the V gene based on blast results.
void x_ProcessDGeneResult(CRef< CSearchResultSet > &results_V, CRef< CSearchResultSet > &results_D, CRef< CSearchResultSet > &results_J, vector< CRef< CIgAnnotation > > &annots)
static bool s_CompareSeqAlignByEvalue(const CRef< CSeq_align > &x, const CRef< CSeq_align > &y)
void x_SetChainType(CRef< CSearchResultSet > &results, vector< CRef< CIgAnnotation > > &annot)
Set the subject chain type and frame info.
static string s_MakeTopHitsId(const CSeq_align_set::Tdata &align_list, int num_align)
string m_CustomInternalData
map< string, string > m_DomainChainType
CRef< CSearchDatabase > m_RemoteDb
void x_SetupVSearch(CRef< IQueryFactory > &qf, CRef< CBlastOptionsHandle > &opts_hndl)
Prepare blast option handle and query for V germline database search.
static bool s_IsSeqAlignAsGood(const CRef< CSeq_align > &x, const CRef< CSeq_align > &y)
void x_ConvertResultType(CRef< CSearchResultSet > &results)
Convert bl2seq result to database search mode.
void SetEntrezQuery(const char *x)
Restrict search to sequences matching this Entrez query.
void SetMatchReward(int r)
bool HasAlignments() const
Return true if there are any alignments for this query.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
CDiagContext_Extra & Print(const string &name, const string &value)
The method does not print the argument, but adds it to the string.
CDiagContext & GetDiagContext(void)
Get diag context instance.
CDiagContext_Extra Extra(void) const
Create a temporary CDiagContext_Extra object.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const string AsFastaString(void) const
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
@ eContent
Untagged human-readable accession or the like.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void RemoveBioseq(const CBioseq_Handle &seq)
Revoke Bioseq previously added using AddBioseq().
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TSeqPos GetBioseqLength(void) const
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty â not pointing to any object which means having a null value.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define USING_SCOPE(ns)
Use the specified namespace.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string & ToUpper(string &str)
Convert string to upper case â string& version.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
const TDenseg & GetDenseg(void) const
Get the variant data.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
ENa_strand
strand of nucleic acid
Declares CIgBlast, the C++ API for the IG-BLAST engine.
Main class to perform a BLAST search on the local machine.
constexpr bool empty(list< Ts... >) noexcept
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CRemoteBlast class.
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
static SLJIT_INLINE sljit_ins ms(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure to represent a single sequence to be fed to BLAST.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4