;
95 typedefvector< CRef<CSeq_id> > TSeqIds;
96TSeqIds seqid_v(num_seqs);
97generate(seqid_v.begin(), seqid_v.end(),
100 ITERATE(TSeqIds, seqid, seqid_v) {
108 const TSeqPoskOffsetLength(30);
109 for(
intindex = 0; index <
mask->total_size; ++index) {
111index+kOffsetLength);
115BOOST_REQUIRE_EQUAL(num_seqs, mask_v.size());
117 unsigned intqindex(0);
121BOOST_REQUIRE_MESSAGE( kNumMasks == query_masks_list->size(),
122 "Failed on "+ kProgName);
126ss <<
"Error in query number "<< qindex <<
", context " 127<<
context<<
" ('"<< kProgName <<
"')";
132BOOST_REQUIRE_MESSAGE(frame == (*itr)->GetFrame(),
137 mask->seqloc_array[kNumContexts*qindex+
context];
138BOOST_REQUIRE(loc !=
NULL);
140BOOST_REQUIRE_MESSAGE
141(
offsets.GetFrom() == (*itr)->GetInterval().GetFrom(),
143BOOST_REQUIRE_MESSAGE
144(
offsets.GetTo() == (*itr)->GetInterval().GetTo(),
148BOOST_REQUIRE_EQUAL(kNumMasks,
context);
156BOOST_CHECK(start <= stop);
157 for(
int i= start;
i< stop;
i++) {
168 boolignore_strand_in_mask)
170 const intkNumLcaseLocs = 11;
171 const intkLcaseStarts[kNumLcaseLocs] =
172{ 0, 78, 217, 380, 694, 1018, 1128, 2817, 3084, 3428, 3782 };
173 const intkLcaseEnds[kNumLcaseLocs] =
174{ 75, 208, 316, 685, 1004, 1122, 1298, 2952, 3409, 3733, 3916 };
177 const intkQuerySize = 9180;
178vector<int> kLcaseStartsNegStrand, kLcaseEndsNegStrand;
179kLcaseStartsNegStrand.reserve(kNumLcaseLocs);
180kLcaseEndsNegStrand.reserve(kNumLcaseLocs);
181 for(
i= 0;
i< kNumLcaseLocs;
i++) {
182 intstart = kQuerySize - 1 - kLcaseEnds[
i];
183 intstop = kQuerySize - 1 - kLcaseStarts[
i];
184kLcaseStartsNegStrand.push_back(start);
185kLcaseEndsNegStrand.push_back(stop);
189unique_ptr<SSeqLoc> qsl(
192CSeq_loc* seqloc =
newCSeq_loc();
193 for(
intindex = 0; index < kNumLcaseLocs; ++index) {
194seqloc->SetPacked_int().AddInterval(
id, kLcaseStarts[index],
196BOOST_CHECK(!seqloc->GetPacked_int().Get().back()->CanGetStrand());
197seqloc->SetPacked_int().Set().back()->SetStrand(strand);
199qsl->mask.Reset(seqloc);
200qsl->ignore_strand_in_mask = ignore_strand_in_mask;
203query_v.push_back(*qsl);
205nucl_handle->SetDustFiltering(
false);
206nucl_handle->SetMaskAtHash(
false);
210blast::CBl2Seq blaster(*qsl.get(), *qsl.get(), *nucl_handle);
211(void) blaster.Run();
215BOOST_CHECK_EQUAL(
false, nucl_handle->GetMaskAtHash());
216 for(
i= 0;
i< kNumLcaseLocs;
i++) {
217 constpair<int, int> range_plus(kLcaseStarts[
i], kLcaseEnds[
i]);
218 constpair<int, int> range_minus(kLcaseStartsNegStrand[
i],
219kLcaseEndsNegStrand[
i]);
220 intstarting_offset = 0;
224blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[0].query_offset;
226(blaster.m_Blast->m_InternalData->m_Queries->sequence,
227starting_offset + range_plus.first,
228starting_offset + range_plus.second));
231blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[1].query_offset;
233(blaster.m_Blast->m_InternalData->m_Queries->sequence,
234starting_offset + range_minus.first,
235starting_offset + range_minus.second));
240blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[0].query_offset;
242(blaster.m_Blast->m_InternalData->m_Queries->sequence,
243starting_offset + range_plus.first,
244starting_offset + range_plus.second));
247blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[1].query_offset;
249(blaster.m_Blast->m_InternalData->m_Queries->sequence,
250starting_offset + range_minus.first,
251starting_offset + range_minus.second));
254blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[0].query_offset;
256(blaster.m_Blast->m_InternalData->m_Queries->sequence,
257starting_offset + range_plus.first,
258starting_offset + range_plus.second));
260blaster.m_Blast->m_InternalData->m_QueryInfo->contexts[1].query_offset;
262(blaster.m_Blast->m_InternalData->m_Queries->sequence,
263starting_offset + range_minus.first,
264starting_offset + range_minus.second));
275blaster.GetFilteredQueryRegions();
276BOOST_CHECK(masked_regions_vector.size() == 1);
277BOOST_CHECK_EQUAL(masked_regions_vector.front().size(),
278(
size_t)kNumLcaseLocs);
280BOOST_CHECK(query_v[0].
mask->IsPacked_int());
281BOOST_CHECK_EQUAL(query_v[0].
mask->GetPacked_int().Get().size(),
282masked_regions_vector.front().size());
285query_v[0].
mask->GetPacked_int().Get()) {
286BOOST_CHECK_EQUAL(kLcaseStarts[loc_index], (
int)(*itr)->GetFrom());
287BOOST_CHECK_EQUAL(kLcaseEnds[loc_index], (
int)(*itr)->GetTo());
290BOOST_CHECK_EQUAL(kNumLcaseLocs, loc_index);
295BOOST_CHECK_EQUAL(kLcaseStarts[loc_index], (
int)intv.
GetFrom());
296BOOST_CHECK_EQUAL(kLcaseEnds[loc_index], (
int)intv.
GetTo());
298BOOST_CHECK_EQUAL((*itr)->GetFrame(),
303BOOST_CHECK_EQUAL(kNumLcaseLocs, loc_index);
310 typedefvector< pair<TSeqPos, TSeqPos> > TSegments;
311TSegments masked_offsets;
312masked_offsets.push_back(make_pair(298U, 305U));
313masked_offsets.push_back(make_pair(875U, 882U));
314masked_offsets.push_back(make_pair(1018U, 1115U));
315masked_offsets.push_back(make_pair(1449U, 1479U));
316masked_offsets.push_back(make_pair(3113U, 3133U));
317masked_offsets.push_back(make_pair(3282U, 3298U));
318masked_offsets.push_back(make_pair(3428U, 3441U));
319masked_offsets.push_back(make_pair(3598U, 3606U));
320masked_offsets.push_back(make_pair(4704U, 4710U));
321masked_offsets.push_back(make_pair(6364U, 6373U));
322masked_offsets.push_back(make_pair(6512U, 6573U));
323masked_offsets.push_back(make_pair(7600U, 7672U));
324masked_offsets.push_back(make_pair(7766U, 7772U));
325masked_offsets.push_back(make_pair(8873U, 8880U));
326masked_offsets.push_back(make_pair(9109U, 9179U));
328 const size_tkNumQueries(1);
329 const size_tkNumLocs(masked_offsets.size());
333unique_ptr<SSeqLoc> qsl(
341BOOST_CHECK(query_reference[0].
mask->IsPacked_int());
343query_reference[0].mask->GetPacked_int().Get();
344BOOST_CHECK_EQUAL(kNumLocs, seqinterval_list.size());
346 boolreverse =
IsReverse(query_reference[0].
mask->GetStrand());
347index = reverse ? masked_offsets.size() - 1 : 0;
349BOOST_CHECK_EQUAL(masked_offsets[index].
first,
351BOOST_CHECK_EQUAL(masked_offsets[index].second,
353reverse ? index-- : index++;
359(void) blaster.Run();
361blaster.GetFilteredQueryRegions();
363BOOST_CHECK_EQUAL(kNumQueries, query_reference.size());
364BOOST_CHECK_EQUAL(kNumQueries,
query_test.size());
365BOOST_CHECK_EQUAL(kNumQueries, masked_regions_vector.size());
368BOOST_CHECK_EQUAL(kNumLocs, masked_regions.size());
371BOOST_CHECK_EQUAL(masked_offsets[index].
first,
372(*itr)->GetInterval().GetFrom());
373BOOST_CHECK_EQUAL(masked_offsets[index].second,
374(*itr)->GetInterval().GetTo());
381vector< CRef<CSeq_id> > gis;
386vector<TSeqRange> ranges;
391BOOST_REQUIRE(gis.size() == ranges.size());
394 for(
i= 0;
i< gis.size();
i++) {
396ranges[
i].GetFrom(),
397ranges[
i].GetTo()));
404BOOST_REQUIRE(gis[
i]->Match((*query_interval)->GetId()));
405BOOST_REQUIRE_EQUAL(ranges[
i].GetFrom(),
406(*query_interval)->GetFrom());
407BOOST_REQUIRE_EQUAL(ranges[
i].GetTo(),
408(*query_interval)->GetTo());
414 typedefpair<TGi, TSeqPos> TGiLength;
415vector<TGiLength> gis;
416gis.push_back(make_pair(
GI_CONST(6), 342U));
417gis.push_back(make_pair(
GI_CONST(129295), 232U));
418gis.push_back(make_pair(
GI_CONST(15606659), 443U));
421 input.reserve(gis.size());
422 ITERATE(vector<TGiLength>, gi, gis) {
424seqloc->
SetWhole().SetGi(gi->first);
430 const TSeqPoskStartingPosition(0);
432 constTGiLength& kGiLength = gis[
i++];
434BOOST_REQUIRE(kTargetId.
Match((*query_interval)->GetId()));
435BOOST_REQUIRE_EQUAL(kStartingPosition,
436(*query_interval)->GetFrom());
437BOOST_REQUIRE_EQUAL(kGiLength.second,
438(*query_interval)->GetTo());
445BOOST_REQUIRE(retval.
Empty());
461 prog, strand_opt, blast_msg);
464BOOST_REQUIRE(m->empty());
469 const intkNumLocs = 3;
470 const intkSegStarts[kNumLocs] = { 15, 55, 495 };
471 const intkSegEnds[kNumLocs] = { 27, 68, 513 };
475query_v.push_back(*qsl);
481&query_blk, &query_info);
491& filter_slp,
NULL);
493BOOST_REQUIRE(filtering_options ==
NULL);
494BOOST_REQUIRE(status == 0);
498 for(loc_index=0, loc = filter_slp; loc; loc = loc->
next, ++loc_index) {
500BOOST_REQUIRE_EQUAL(kSegStarts[loc_index], di->
left);
501BOOST_REQUIRE_EQUAL(kSegEnds[loc_index], di->
right);
505BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
509 const size_tkNumLocs = 4;
510 const TSeqPoskRepeatStarts[kNumLocs] = { 0, 380, 2851, 3113 };
511 const TSeqPoskRepeatEnds[kNumLocs] = { 212, 1297, 2953, 3764 };
513unique_ptr<SSeqLoc> qsl(
516query_v.push_back(*qsl);
522BOOST_REQUIRE(query_v[0].
mask.NotEmpty());
523BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
525query_v[0].mask->GetPacked_int().Get();
527 size_tloc_index = 0;
528BOOST_REQUIRE_EQUAL(kNumLocs, seqinterval_list.size());
531BOOST_REQUIRE_EQUAL(kRepeatStarts[loc_index], (*itr)->GetFrom());
532BOOST_REQUIRE_EQUAL(kRepeatEnds[loc_index], (*itr)->GetTo());
533BOOST_REQUIRE(!(*itr)->CanGetStrand());
537BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
542 intpair_size =
sizeof(
TSeqPos) * 2;
571 size_tnum_locs =
sizeof(intervals) / pair_size;
572BOOST_REQUIRE(0 == (
sizeof(intervals) % pair_size));
579query_v.push_back(*qsl);
585BOOST_REQUIRE(query_v[0].
mask.NotEmpty());
586BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
588query_v[0].mask->GetPacked_int().Get();
590 size_tloc_index = 0;
591BOOST_REQUIRE_EQUAL(num_locs, seqinterval_list.size());
595BOOST_REQUIRE_EQUAL(intervals[loc_index], (*itr)->GetFrom());
596BOOST_REQUIRE_EQUAL(intervals[loc_index+1], (*itr)->GetTo());
597BOOST_REQUIRE(! (*itr)->CanGetStrand());
601BOOST_REQUIRE_EQUAL(num_locs*2, loc_index);
605vector<TSeqRange> masked_regions;
606masked_regions.push_back(
TSeqRange(85028, 85528));
607masked_regions.push_back(
TSeqRange(85539, 85736));
608masked_regions.push_back(
TSeqRange(86334, 86461));
609masked_regions.push_back(
TSeqRange(86487, 86585));
610masked_regions.push_back(
TSeqRange(86730, 87050));
611masked_regions.push_back(
TSeqRange(87313, 87370));
612masked_regions.push_back(
TSeqRange(88134, 88140));
613masked_regions.push_back(
TSeqRange(88171, 88483));
614masked_regions.push_back(
TSeqRange(89032, 89152));
615masked_regions.push_back(
TSeqRange(91548, 91704));
616masked_regions.push_back(
TSeqRange(92355, 92539));
617masked_regions.push_back(
TSeqRange(92550, 92973));
618masked_regions.push_back(
TSeqRange(92983, 93283));
619masked_regions.push_back(
TSeqRange(93296, 93384));
620masked_regions.push_back(
TSeqRange(93472, 93642));
621masked_regions.push_back(
TSeqRange(93685, 94026));
622masked_regions.push_back(
TSeqRange(94435, 94545));
625unique_ptr<SSeqLoc> qsl(
627make_pair<TSeqPos, TSeqPos>(84999, 94637),
630query_v.push_back(*qsl);
638BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
640query_v[0].mask->GetPacked_int().Get();
642 size_tloc_index = 0;
643BOOST_REQUIRE_EQUAL(masked_regions.size(), seqinterval_list.size());
646BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetFrom(),
648BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetTo(),
650BOOST_REQUIRE(!(*itr)->CanGetStrand());
654BOOST_REQUIRE_EQUAL(masked_regions.size(), loc_index);
663BOOST_REQUIRE(
a==
b);
666BOOST_REQUIRE(
a!=
b);
676 query->AddMask(lower_case_mask);
688BOOST_REQUIRE( !mqr.empty() );
692BOOST_REQUIRE(e.
GetMsg().find(
"lossy direction") !=
NPOS);
697BOOST_REQUIRE(*sli == *lower_case_mask);
698BOOST_REQUIRE_EQUAL((
int)2, (
int)mqr.size());
699BOOST_REQUIRE(mqr.front()->GetFrame() == 1);
700BOOST_REQUIRE(mqr.back()->GetFrame() == -1);
709query_v1.push_back(*qsl1);
714query_v2.push_back(*qsl2);
736query_v1.push_back(*qsl1);
741query_v2.push_back(*qsl2);
761vector<TSeqRange> masked_regions;
762masked_regions.push_back(
TSeqRange(85019, 85172));
763masked_regions.push_back(
TSeqRange(85190, 85345));
764masked_regions.push_back(
TSeqRange(85385, 85452));
765masked_regions.push_back(
TSeqRange(85483, 85505));
766masked_regions.push_back(
TSeqRange(85511, 85533));
767masked_regions.push_back(
TSeqRange(85575, 85596));
768masked_regions.push_back(
TSeqRange(85673, 85694));
769masked_regions.push_back(
TSeqRange(85725, 85745));
777query_v.push_back(*qsl);
786BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
788query_v[0].mask->GetPacked_int().Get();
790 size_tloc_index = 0;
791BOOST_REQUIRE_EQUAL(masked_regions.size(), seqinterval_list.size());
794BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetFrom(),
796BOOST_REQUIRE_EQUAL(masked_regions[loc_index].GetTo(),
798BOOST_REQUIRE(!(*itr)->CanGetStrand());
802BOOST_REQUIRE_EQUAL(masked_regions.size(), loc_index);
807unique_ptr<SSeqLoc> qsl(
810query_v.push_back(*qsl);
817BOOST_REQUIRE(query_v[0].
mask.Empty());
822unique_ptr<SSeqLoc> qsl
826query_v.push_back(*qsl);
833BOOST_REQUIRE(query_v[0].
mask.Empty());
840query_v.push_back(*qsl);
853query_v.push_back(*qsl);
875vector<TSeqRange> masks;
881masks.push_back(
TSeqRange(1018, 1122));
882masks.push_back(
TSeqRange(1128, 1298));
883masks.push_back(
TSeqRange(2817, 2952));
884masks.push_back(
TSeqRange(2084, 3409));
885masks.push_back(
TSeqRange(3428, 3733));
886masks.push_back(
TSeqRange(3782, 3916));
890 ITERATE(vector<TSeqRange>, range, masks) {
908BOOST_REQUIRE_EQUAL((
size_t)4, restricted_mask.size());
909BOOST_REQUIRE_EQUAL((
TSeqPos)624,
910restricted_mask.back()->GetInterval().GetTo());
912(restricted_mask.front()->GetInterval().GetId()));
913BOOST_REQUIRE(!(restricted_mask.front()->GetInterval().CanGetStrand()));
916restriction.
SetTo(2000);
919BOOST_REQUIRE_EQUAL((
size_t)3, restricted_mask.size());
920TMaskedQueryRegions::iterator itr = restricted_mask.begin();
922BOOST_REQUIRE_EQUAL((
TSeqPos)1000, (*itr)->GetInterval().GetFrom());
923BOOST_REQUIRE_EQUAL((
TSeqPos)1004, (*itr)->GetInterval().GetTo()-1);
924BOOST_REQUIRE(id->
Match((*itr)->GetInterval().GetId()));
925BOOST_REQUIRE(!(*itr)->GetInterval().CanGetStrand());
928BOOST_REQUIRE_EQUAL((
TSeqPos)1018, (*itr)->GetInterval().GetFrom());
929BOOST_REQUIRE_EQUAL((
TSeqPos)1122, (*itr)->GetInterval().GetTo()-1);
930BOOST_REQUIRE(id->
Match((*itr)->GetInterval().GetId()));
931BOOST_REQUIRE(!(*itr)->GetInterval().CanGetStrand());
934BOOST_REQUIRE_EQUAL((
TSeqPos)1128, (*itr)->GetInterval().GetFrom());
935BOOST_REQUIRE_EQUAL((
TSeqPos)1298, (*itr)->GetInterval().GetTo()-1);
936BOOST_REQUIRE(id->
Match((*itr)->GetInterval().GetId()));
937BOOST_REQUIRE(!(*itr)->GetInterval().CanGetStrand());
940BOOST_REQUIRE(itr == restricted_mask.end());
943restriction.
SetTo(20000);
945BOOST_REQUIRE(restricted_mask.empty());
950vector<TSeqRange> masks;
956masks.push_back(
TSeqRange(1018, 1122));
957masks.push_back(
TSeqRange(1128, 1298));
958masks.push_back(
TSeqRange(2817, 2952));
959masks.push_back(
TSeqRange(2084, 3409));
960masks.push_back(
TSeqRange(3428, 3733));
961masks.push_back(
TSeqRange(3782, 3916));
965 ITERATE(vector<TSeqRange>, range, masks) {
974BOOST_REQUIRE(!bqff.
Empty());
976 constset<CSeqLocInfo::ETranslationFrame>& frames = bqff.
ListFrames();
977 ITERATE(set<CSeqLocInfo::ETranslationFrame>, fr, frames) {
978BOOST_REQUIRE(bqff[*fr] !=
NULL);
986vector<TSeqRange> masks;
991 ITERATE(vector<TSeqRange>, range, masks) {
997mqr.push_back(sli_plus);
1000mqr.push_back(sli_minus);
1006BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 0);
1007BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 25);
1010BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 0);
1011BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 24);
1014BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 0);
1015BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 24);
1018BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 3034);
1019BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 3059);
1022BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 3034);
1023BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 3058);
1026BOOST_REQUIRE_EQUAL(bsl->
ssr->
left, 3034);
1027BOOST_REQUIRE_EQUAL(bsl->
ssr->
right, 3058);
1044BOOST_REQUIRE(!bqff.
Empty());
1046 constset<CSeqLocInfo::ETranslationFrame>& frames = bqff.
ListFrames();
1047 const intkExpectedNumFrames = 2;
1049 ITERATE(set<CSeqLocInfo::ETranslationFrame>, fr, frames) {
1050BOOST_REQUIRE(bqff[*fr] !=
NULL);
1053BOOST_REQUIRE_EQUAL(kExpectedNumFrames, bqff.
GetNumFrames());
1054BOOST_REQUIRE_EQUAL(1, frame_ctr);
1055BOOST_REQUIRE_EQUAL(1, frames.size());
1074BOOST_REQUIRE(!bqff.
Empty());
1076 constset<CSeqLocInfo::ETranslationFrame>& frames = bqff.
ListFrames();
1077 const intkExpectedNumFrames = 2;
1079 ITERATE(set<CSeqLocInfo::ETranslationFrame>, fr, frames) {
1080BOOST_REQUIRE(bqff[*fr] !=
NULL);
1083BOOST_REQUIRE_EQUAL(kExpectedNumFrames, bqff.
GetNumFrames());
1084BOOST_REQUIRE_EQUAL(kExpectedNumFrames, frame_ctr);
1085BOOST_REQUIRE_EQUAL(kExpectedNumFrames, frames.size());
1089 const boolignore_strand_in_mask =
true;
1091ignore_strand_in_mask);
1095 const boolignore_strand_in_mask =
true;
1097ignore_strand_in_mask);
1101 const boolignore_strand_in_mask =
true;
1103ignore_strand_in_mask);
1107 const boolignore_strand_in_mask =
false;
1109ignore_strand_in_mask);
1113 const boolignore_strand_in_mask =
false;
1115ignore_strand_in_mask);
1119 const boolignore_strand_in_mask =
false;
1121ignore_strand_in_mask);
1125 const intkNumLcaseLocs = 11;
1126 const intkLcaseStarts[kNumLcaseLocs] =
1127{ 0, 78, 217, 380, 694, 1018, 1128, 2817, 3084, 3428, 3782 };
1128 const intkLcaseEnds[kNumLcaseLocs] =
1129{ 75, 208, 316, 685, 1004, 1122, 1298, 2952, 3409, 3733, 3916 };
1131 const intkNumLocs = 6;
1132 const intkStarts[kNumLocs] = { 0, 217, 380, 2817, 3084, 3782 };
1133 const intkEnds[kNumLocs] = { 212, 316, 1298, 2953, 3764, 3916 };
1135unique_ptr<SSeqLoc> qsl(
1139CSeq_loc* seqloc =
newCSeq_loc();
1140 for(
intindex = 0; index < kNumLcaseLocs; ++index) {
1141seqloc->SetPacked_int().AddInterval(
id, kLcaseStarts[index],
1143BOOST_REQUIRE(!seqloc->GetPacked_int().Get().back()->CanGetStrand());
1145qsl->mask.Reset(seqloc);
1148query_v.push_back(*qsl);
1153BOOST_REQUIRE(query_v[0].
mask->IsPacked_int());
1157BOOST_REQUIRE(query_v[0].
mask.NotEmpty());
1159query_v[0].
mask->GetPacked_int().Get()) {
1161BOOST_REQUIRE_EQUAL(kStarts[loc_index], (
int)(*itr)->GetFrom());
1162BOOST_REQUIRE_EQUAL(kEnds[loc_index], (
int)(*itr)->GetTo());
1166BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
1170 const intkNumLocs = 13;
1171 const intkStarts[kNumLocs] =
1172{ 0, 298, 380, 1449, 2851, 3113, 4704, 6364, 6512, 7600,
1174 const intkEnds[kNumLocs] =
1175{ 212, 305, 1297, 1479, 2953, 3764, 4710, 6373, 6573, 7672,
1178unique_ptr<SSeqLoc> qsl(
1181query_v.push_back(*qsl);
1191BOOST_REQUIRE(query_v[0].
mask.NotEmpty());
1193query_v[0].
mask->GetPacked_int().Get()) {
1195BOOST_REQUIRE_EQUAL(kStarts[loc_index], (
int)(*itr)->GetFrom());
1196BOOST_REQUIRE_EQUAL(kEnds[loc_index], (
int)(*itr)->GetTo());
1199BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
1203 const intkNumLocs = 15;
1204 const intkDustStarts[kNumLocs] =
1205{ 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
12066512, 7600, 7766, 8873, 9109};
1207 const intkDustEnds[kNumLocs] =
1208{ 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
12096573, 7672, 7772, 8880 , 9179};
1212unique_ptr<SSeqLoc> qsl(
1215query_v.push_back(*qsl);
1223query_v[0].
mask->GetPacked_int().Get()) {
1224BOOST_REQUIRE_EQUAL(kDustStarts[loc_index], (
int)(*itr)->GetFrom());
1225BOOST_REQUIRE_EQUAL(kDustEnds[loc_index], (
int)(*itr)->GetTo());
1229BOOST_REQUIRE_EQUAL(loc_index, kNumLocs);
1233 const intkNumLocs = 15;
1234 const intkDustStarts[kNumLocs] =
1235{ 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
12366512, 7600, 7766, 8873, 9109};
1237 const intkDustEnds[kNumLocs] =
1238{ 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
12396573, 7672, 7772, 8880 , 9179};
1242unique_ptr<SSeqLoc> qsl(
1245query_v.push_back(*qsl);
1253query_v[0].
mask->GetPacked_int().Get()) {
1254BOOST_REQUIRE_EQUAL(kDustStarts[loc_index], (
int)(*itr)->GetFrom());
1255BOOST_REQUIRE_EQUAL(kDustEnds[loc_index], (
int)(*itr)->GetTo());
1259BOOST_REQUIRE_EQUAL(loc_index, kNumLocs);
1263 const intkNumLocs = 15;
1264 const intkDustStarts[kNumLocs] =
1265{ 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
12666512, 7600, 7766, 8873, 9109};
1267 const intkDustEnds[kNumLocs] =
1268{ 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
12696573, 7672, 7772, 8880 , 9179};
1272unique_ptr<SSeqLoc> qsl(
1275query_v.push_back(*qsl);
1282 intloc_index = reverse ? kNumLocs - 1 : 0;
1284query_v[0].
mask->GetPacked_int().Get()) {
1285BOOST_REQUIRE_EQUAL(kDustStarts[loc_index], (
int)(*itr)->GetFrom());
1286BOOST_REQUIRE_EQUAL(kDustEnds[loc_index], (
int)(*itr)->GetTo());
1287reverse ? --loc_index : ++loc_index;
1292BOOST_REQUIRE_EQUAL(loc_index, kNumLocs);
1295BOOST_REQUIRE_EQUAL(loc_index, -1);
1301 const intkNumLocs = 3;
1302 const intkSegStarts[kNumLocs] = { 15, 55, 495 };
1303 const intkSegEnds[kNumLocs] = { 27, 68, 513 };
1307query_v.push_back(*qsl);
1313&query_blk, &query_info);
1323&filter_out, &blast_message);
1325BOOST_REQUIRE(filter_options ==
NULL);
1326BOOST_REQUIRE(status == 0);
1332 for(loc_index=0, loc = filter_slp; loc; loc = loc->
next, ++loc_index) {
1334BOOST_REQUIRE_EQUAL(kSegStarts[loc_index], di->
left);
1335BOOST_REQUIRE_EQUAL(kSegEnds[loc_index], di->
right);
1338BOOST_REQUIRE_EQUAL(kNumLocs, loc_index);
1341BOOST_REQUIRE(filter_out ==
NULL);
1345 const intkNumLocs = 3;
1346 const intkSegStarts[kNumLocs] = { 15, 55, 495 };
1347 const intkSegEnds[kNumLocs] = { 27, 68, 513 };
1351query_v.push_back(*qsl);
1357&query_blk, &query_info);
1361 for(
Int4loc_index=0; loc_index<kNumLocs; ++loc_index) {
1364kSegEnds[loc_index]);
1367kSegEnds[loc_index]);
1376BOOST_REQUIRE(filter_maskloc ==
NULL);
1381 for(
intindex=0; index<query_length; index++)
1386BOOST_REQUIRE_EQUAL(-241853716, (
int)
hash);
1390 const intkNumLocs = 15;
1391 const intkDustStarts[kNumLocs] =
1392{ 298, 875, 1018, 1064, 1448, 3113, 3282, 3428, 3598, 4704, 6364,
13936511, 7766, 8873, 9108 };
1394 const intkDustEnds[kNumLocs] =
1395{ 305, 882, 1045, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
13966573, 7772, 8880, 9179 };
1402query_v.push_back(*qsl);
1408&query_blk, &query_info);
1412 for(
Int4loc_index=0; loc_index<kNumLocs; ++loc_index) {
1415kDustEnds[loc_index]);
1418kDustEnds[loc_index]);
1428BOOST_REQUIRE(filter_maskloc ==
NULL);
1433 for(
intindex=0; index<query_length; index++)
1438BOOST_REQUIRE_EQUAL(-1261879517, (
int)
hash);
1442 const intkNumLocs0 = 15;
1443 const intkNumLocs1 = 80;
1444 const intkNumLocs2 = 1;
1446 intdust_starts0[kNumLocs0] =
1447{ 298, 875, 1018, 1449, 3113, 3282, 3428, 3598, 4704, 6364,
14486512, 7600, 7766, 8873, 9109};
1449 intdust_ends0[kNumLocs0] =
1450{ 305, 882, 1115, 1479, 3133, 3298, 3441, 3606, 4710, 6373,
14516573, 7672, 7772, 8880 , 9179};
1452 intdust_starts1[kNumLocs1] =
1453{ 189, 862, 1717, 1880, 2301, 2850, 3074, 3301, 4865, 5231, 5397,
14545825, 5887, 6560, 6806, 7178, 7709, 8000, 8275, 8441, 9449, 9779,
145510297, 10457, 11033, 11242, 12271, 12410, 12727, 13803, 14743, 15052,
145615153, 15262, 16201, 16968, 17318, 18470, 20179, 21513, 21569,
145722034, 22207, 22657, 22890, 23326, 27984, 28305, 28581, 28960, 29678,
145830553, 31195, 32347, 33641, 33785, 34138, 34861, 34872, 35028,
145935676, 35727, 36105, 36312, 36841, 38459, 38610, 38997, 39217, 39428,
146039629, 42243, 42584, 43157, 43346, 43619, 44040, 44617, 46791, 47213};
1461 intdust_ends1[kNumLocs1] =
1462{ 230, 876, 1741, 1898, 2315, 2868, 3117, 3308, 4886, 5255, 5433, 5860,
14635943, 6566, 6857, 7245, 7737, 8014, 8286, 8479, 9496, 9830, 10306,
146410581, 11082, 11255, 12277, 12432, 12748, 13809, 14750, 15121, 15171,
146515345, 16237, 16992, 17332, 18482, 20185, 21524, 21688, 22072, 22220,
146622672, 22898, 23348, 27996, 28311, 28626, 28998, 29690, 30596, 31220,
146732359, 33683, 33815, 34203, 34870, 34894, 35039, 35725, 35797, 36114,
146836318, 36869, 38497, 38632, 39035, 39223, 39477, 39635, 42249, 42591,
146943175, 43410, 43648, 44049, 44630, 46811, 47219};
1470 intdust_starts2[kNumLocs2] = {156};
1471 intdust_ends2[kNumLocs2] = {172};
1473 typedefpair<int*, int*> TStartEndPair;
1474TStartEndPair pair0(dust_starts0, dust_ends0);
1475TStartEndPair pair1(dust_starts1, dust_ends1);
1476TStartEndPair pair2(dust_starts2, dust_ends2);
1478vector< TStartEndPair > start_end_v;
1479start_end_v.push_back(pair0);
1480start_end_v.push_back(pair1);
1481start_end_v.push_back(pair2);
1484unique_ptr<SSeqLoc> qsl1(
1487unique_ptr<SSeqLoc> qsl2(
1490unique_ptr<SSeqLoc> qsl3(
1495query_v.push_back(*qsl1);
1496query_v.push_back(*qsl2);
1497query_v.push_back(*qsl3);
1506 ITERATE(vector< TStartEndPair >, vec_iter, start_end_v)
1508TStartEndPair local_pair = *vec_iter;
1509 int* start = local_pair.first;
1510 int* stop = local_pair.second;
1513query_v[query_number].
mask->GetPacked_int().Get()) {
1514BOOST_REQUIRE_EQUAL(start[loc_index], (
int)(*itr)->GetFrom());
1515BOOST_REQUIRE_EQUAL(stop[loc_index], (
int)(*itr)->GetTo());
1524 const intkNumLocs = 4;
1525 const intkMaskStarts[kNumLocs] = { 10, 20, 30, 40 };
1526 const intkMaskEnds[kNumLocs] = { 15, 25, 35, 45 };
1527 const intkRange[2] = { 12, 22 };
1531 for(index = 0; index < kNumLocs; ++index) {
1532 BlastSeqLocNew(&mask_loc, kMaskStarts[index], kMaskEnds[index]);
1539 for(index = 0, loc_var = mask_loc; loc_var;
1540++index, loc_var = loc_var->
next) {
1541BOOST_REQUIRE_EQUAL(kMaskStarts[index], (
int)loc_var->ssr->left);
1542BOOST_REQUIRE_EQUAL(kMaskEnds[index], (
int)loc_var->ssr->right);
1544BOOST_REQUIRE_EQUAL(kNumLocs, index);
1547 for(index = 0, loc_var = mask_loc; loc_var;
1548++index, loc_var = loc_var->
next);
1549BOOST_REQUIRE_EQUAL(2, index);
1550BOOST_REQUIRE_EQUAL(kMaskEnds[0]-kRange[0], (
int)mask_loc->
ssr->
right);
1551BOOST_REQUIRE_EQUAL(kMaskStarts[1]-kRange[0],
1553BOOST_REQUIRE_EQUAL(kRange[1]-kRange[0],
1558BOOST_REQUIRE(mask_loc ==
NULL);
1563 const intkNumQueries = 3;
1565 const intkQueryLengths[kNumQueries] = { 1639, 1151, 1164 };
1569 for(
intindex = 0; index < kNumQueries; ++index) {
1571loc->
SetWhole().SetGi(kQueryGis[index]);
1574query_v.push_back(
SSeqLoc(loc, scope));
1584 for(
int i= 0;
i< kNumQueries;
i++) {
1587BOOST_REQUIRE_EQUAL(kQueryLengths[
i],
len);
1593 const intkNumQueries = 3;
1595 const intkNumContexts = kNumQueries*
NUM_FRAMES;
1598BOOST_REQUIRE_EQUAL(kNumContexts, query_info->
last_context+ 1);
1601{ { 660, 686 }, { 92, 119 }, { 1156, 1163 } };
1604BOOST_REQUIRE_EQUAL(kNumContexts, mask_loc->
total_size);
1606 for(
intindex = 0; index < kNumQueries; index++) {
1616BOOST_REQUIRE_EQUAL(kNumContexts, mask_loc->
total_size);
1618 const intkProtStarts[kNumContexts] =
1619{ 220, 219, 219, 317, 317, 316, 30, 30, 30, 343, 343, 343, 385, 385,
1621 const intkProtEnds[kNumContexts] =
1622{ 228, 228, 228, 326, 325, 325, 39, 39, 39, 352, 352, 352, 387, 386,
1625 for(
intindex = 0; index < kNumContexts; ++index) {
1628os <<
"Context "<< index <<
" has no mask!";
1634os <<
"Context "<< index;
1635BOOST_REQUIRE_MESSAGE(kProtStarts[index] == range->
left,
1637BOOST_REQUIRE_MESSAGE(kProtEnds[index] == range->
right,
1643BOOST_REQUIRE_EQUAL(kNumContexts, mask_loc->
total_size);
1644 const intkNuclStarts[kNumContexts] =
1645{ 660, 658, 659, 661, 663, 662, 90, 91, 92, 95, 94, 93, 1155, 1156,
16461154, 1158, 1157, 1159 };
1647 const intkNuclEnds[kNumContexts] =
1648{ 684, 685, 686, 687, 686, 688, 117, 118, 119, 121, 120, 119, 1161,
16491159, 1160, 1163, 1162, 1161 };
1651 for(
intindex = 0; index < kNumContexts; ++index) {
1654os <<
"Context "<< index <<
" has no mask!";
1660os <<
"Context "<< index;
1661BOOST_REQUIRE_MESSAGE(kNuclStarts[index] == range->
left,
1663BOOST_REQUIRE_MESSAGE(kNuclEnds[index] == range->
right,
1672BOOST_REQUIRE(
strcmp(retval.
get(),
"F") == 0);
1680BOOST_REQUIRE(
strcmp(retval.
get(),
"m;") == 0);
1689 string(4096,
'X').c_str());
1696 "D 20 128 1;R -d XXXXXXXXXXXXXXXXXXXX"));
1704&filtering_options,
NULL);
1705BOOST_REQUIRE(status == 0);
1706BOOST_REQUIRE(filtering_options !=
NULL);
1707BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1711BOOST_REQUIRE(filtering_options ==
NULL);
1719&filtering_options,
NULL);
1720BOOST_REQUIRE(status == 0);
1721BOOST_REQUIRE_EQUAL(
true, !!filtering_options->
mask_at_hash);
1726BOOST_REQUIRE_EQUAL(
string(
"L;m;"),
string(retval.
get()));
1729BOOST_REQUIRE(filtering_options ==
NULL);
1737&filtering_options,
NULL);
1738BOOST_REQUIRE(status == 0);
1739BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1744BOOST_REQUIRE(
strcmp(retval.
get(),
"L;") == 0);
1747BOOST_REQUIRE(filtering_options ==
NULL);
1755BOOST_REQUIRE(status == 0);
1756BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1758BOOST_REQUIRE(filtering_options->
segOptions);
1760BOOST_REQUIRE_CLOSE(1.0, filtering_options->
segOptions->
locut, 0.01);
1761BOOST_REQUIRE_CLOSE(1.5, filtering_options->
segOptions->
hicut, 0.01);
1764BOOST_REQUIRE(
strcmp(retval.
get(),
"S 10 1.0 1.5;") == 0);
1767BOOST_REQUIRE(filtering_options ==
NULL);
1776BOOST_REQUIRE_EQUAL(1, (
int) status);
1777BOOST_REQUIRE(filtering_options ==
NULL);
1785BOOST_REQUIRE(status == 0);
1786BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1791BOOST_REQUIRE(
strcmp(retval.
get(),
"L;") == 0);
1794BOOST_REQUIRE(filtering_options ==
NULL);
1801BOOST_REQUIRE(status == 0);
1802BOOST_REQUIRE_EQUAL(
false, !!filtering_options->
mask_at_hash);
1804BOOST_REQUIRE(filtering_options->
segOptions);
1807BOOST_REQUIRE(
strcmp(retval.
get(),
"L;") == 0);
1810BOOST_REQUIRE(filtering_options ==
NULL);
1817BOOST_REQUIRE(status == 0);
1819BOOST_REQUIRE(! filtering_options->
dustOptions);
1820BOOST_REQUIRE(! filtering_options->
segOptions);
1825BOOST_REQUIRE(
strcmp(retval.
get(),
"W -t 9606;") == 0);
1828BOOST_REQUIRE(filtering_options ==
NULL);
1833 const intkNewLevel = 21;
1834 const intkNewWindow = 68;
1848BOOST_REQUIRE_EQUAL(0, (
int) status);
1850BOOST_REQUIRE_EQUAL(
true, !!
result->mask_at_hash);
1851BOOST_REQUIRE_EQUAL(kNewLevel,
result->dustOptions->level);
1852BOOST_REQUIRE_EQUAL(kNewWindow,
result->dustOptions->window);
1853BOOST_REQUIRE(
result->repeatFilterOptions);
1858BOOST_REQUIRE_EQUAL(0, (
int) status);
1860BOOST_REQUIRE_EQUAL(kNewLevel,
result->dustOptions->level);
1861BOOST_REQUIRE_EQUAL(kNewWindow,
result->dustOptions->window);
1866BOOST_REQUIRE_EQUAL(0, (
int) status);
1868BOOST_REQUIRE_EQUAL(
true, !!
result->mask_at_hash);
1869BOOST_REQUIRE(
result->repeatFilterOptions);
1881BOOST_REQUIRE_EQUAL(
false, nucl_handle.
GetMaskAtHash());
1917BOOST_REQUIRE_THROW(
1926 const size_tkNumSeqs = 10;
1940BOOST_REQUIRE_EQUAL((
size_t)kNumSeqs, (
size_t)mask_v.size());
1942BOOST_REQUIRE_EQUAL((
size_t)0
U, query_masks_list->size());
1947 const intkNumberLocIn = 7;
1948 const intkLocStartIn[kNumberLocIn] =
1949{ 281312, 281356, 281416, 281454, 281895, 282435, 282999};
1950 const intkLocEndIn[kNumberLocIn] =
1951{ 281736, 281406, 281446, 281878, 282423, 282968, 283191};
1953 const intkNumberLocOut = 4;
1954 const intkLocStartOut[kNumberLocOut] =
1955{ 281312, 281895, 282435, 282999};
1956 const intkLocEndOut[kNumberLocOut] =
1957{ 281878, 282423, 282968, 283191};
1960 for(
intindex=0; index<kNumberLocIn; index++)
1977BOOST_REQUIRE_EQUAL(
count, kNumberLocOut);
1984BOOST_REQUIRE_EQUAL(ssr->
left, kLocStartOut[
count]);
1985BOOST_REQUIRE_EQUAL(ssr->
right, kLocEndOut[
count]);
1995vector<EBlastProgramType> programs =
2000vector<int> num_seqs_array;
2001num_seqs_array.reserve(3);
2002num_seqs_array.push_back(random_gen.
GetRand(1,10));
2003num_seqs_array.push_back(random_gen.
GetRand(1,10));
2004num_seqs_array.push_back(random_gen.
GetRand(1,10));
2006 ITERATE(vector<EBlastProgramType>, program, programs) {
2007 ITERATE(vector<int>, num_seqs, num_seqs_array) {
2014 #if SEQLOC_MIX_QUERY_OK 2017 const intkNumInts = 20;
2018 const intkStarts[kNumInts] =
2019{ 838, 1838, 6542, 7459, 9246, 10431, 14807, 16336, 19563,
202020606, 21232, 22615, 23822, 27941, 29597, 30136, 31287,
202131786, 33315, 35402 };
2022 const intkEnds[kNumInts] =
2023{ 961, 2010, 6740, 7573, 9408, 10609, 15043, 16511, 19783,
202420748, 21365, 22817, 24049, 28171, 29839, 30348, 31362,
202531911, 33485, 37952 };
2027 const intkNumMaskLocs = 7;
2028 const intkMaskStarts[kNumMaskLocs] =
2029{ 2607, 3000, 3739, 4238, 5211, 5602, 5716 };
2030 const intkMaskStops[kNumMaskLocs] =
2031{ 2769, 3006, 3809, 4244, 5218, 5608, 5722 };
2033 const intkNumMaskLocs = 8;
2034 const intkMaskStarts[kNumMaskLocs] =
2035{ 29678, 30136, 31305, 35786, 36285, 37258, 37649, 37763 };
2036 const intkMaskStops[kNumMaskLocs] =
2037{ 29839, 30136, 31311, 35856, 36291, 37265, 37655, 37769 };
2044 for(index = 0; index < kNumInts; ++index) {
2046next_loc->SetInt().SetFrom(kStarts[index]);
2047next_loc->SetInt().SetTo(kEnds[index]);
2048next_loc->SetInt().SetId(qid);
2049qloc->SetMix().Set().push_back(next_loc);
2053scope->AddDefaults();
2055unique_ptr<SSeqLoc>
query(
new SSeqLoc(qloc, scope));
2057query_v.push_back(*
query);
2065query_v[0].
mask->GetPacked_int().Get()) {
2066BOOST_REQUIRE_EQUAL(kMaskStarts[loc_index],
2067(
int) (*itr)->GetFrom());
2068BOOST_REQUIRE_EQUAL(kMaskStops[loc_index],
2069(
int) (*itr)->GetTo());
2072BOOST_REQUIRE_EQUAL(kNumMaskLocs, loc_index);
2092BOOST_REQUIRE(mask_itr !=
NULL);
2093BOOST_REQUIRE_EQUAL((
int)itr->GetFrom(), (
int)mask_itr->
ssr->
left);
2094BOOST_REQUIRE_EQUAL((
int)itr->GetTo(), (
int)mask_itr->
ssr->
right);
2095mask_itr = mask_itr->
next;
2097BOOST_REQUIRE(mask_itr ==
NULL);
2123BOOST_REQUIRE(mask_itr !=
NULL);
2124BOOST_REQUIRE_EQUAL((
int)itr->GetFrom(), (
int)mask_itr->
ssr->
left);
2125BOOST_REQUIRE_EQUAL((
int)itr->GetTo(), (
int)mask_itr->
ssr->
right);
2126mask_itr = mask_itr->
next;
2128BOOST_REQUIRE(mask_itr ==
NULL);
2150BOOST_REQUIRE(mask_itr !=
NULL);
2151BOOST_REQUIRE_EQUAL((
int)itr->GetFrom(), (
int)mask_itr->
ssr->
left);
2152BOOST_REQUIRE_EQUAL((
int)itr->GetTo(), (
int)mask_itr->
ssr->
right);
2153mask_itr = mask_itr->
next;
2155BOOST_REQUIRE(mask_itr ==
NULL);
2172reverse(rv.begin(), rv.end());
2176BOOST_REQUIRE(mask_itr !=
NULL);
2177BOOST_REQUIRE_EQUAL((
int)itr->GetFrom(), (
int)mask_itr->
ssr->
left);
2178BOOST_REQUIRE_EQUAL((
int)itr->GetTo(), (
int)mask_itr->
ssr->
right);
2179mask_itr = mask_itr->
next;
2181BOOST_REQUIRE(mask_itr ==
NULL);
2191BOOST_REQUIRE(taxids.
empty() ==
false);
2192BOOST_REQUIRE(taxids.
find(9606) != taxids.
end());
bool IsReverse(ENa_strand s)
Declares the CBl2Seq (BLAST 2 Sequences) class.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
void BlastSetUp_MaskQuery(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastMaskLoc *filter_maskloc, EBlastProgramType program_number)
Masks the sequence given a BlastMaskLoc.
BlastMaskLoc * BlastMaskLocFree(BlastMaskLoc *mask_loc)
Deallocate memory for a BlastMaskLoc structure as well as the BlastSeqLoc's pointed to.
Int2 BlastSetUp_GetFilteringLocations(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, EBlastProgramType program_number, const SBlastFilterOptions *filter_options, BlastMaskLoc **filter_out, Blast_Message **blast_message)
Does preparation for filtering and then calls BlastSetUp_Filter.
Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)
Produces SBlastFilterOptions from a string that has been traditionally supported in blast.
const Uint1 kNuclMask
BLASTNA element used to mask bases in BLAST.
Int2 BlastSetUp_Filter(EBlastProgramType program_number, Uint1 *sequence, Int4 length, Int4 offset, const SBlastFilterOptions *filter_options, BlastSeqLoc **seqloc_retval, Blast_Message **blast_message)
Runs seg filtering functions, according to the filtering options, returns BlastSeqLoc*.
Int2 BlastMaskLocDNAToProtein(BlastMaskLoc *mask_loc, const BlastQueryInfo *query_info)
Given a BlastMaskLoc with an array of lists of DNA mask locations, substitutes that array by a new ar...
void BlastSeqLocCombine(BlastSeqLoc **mask_loc, Int4 link_value)
Go through all mask locations in one sequence and combine any that overlap, deallocating the unneeded...
BlastMaskLoc * BlastMaskLocNew(Int4 total)
Allocate memory for a BlastMaskLoc.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Int2 BlastMaskLocProteinToDNA(BlastMaskLoc *mask_loc, const BlastQueryInfo *query_info)
Given a BlastMaskLoc with an array of lists of mask locations per protein frame, recalculates all mas...
char * BlastFilteringOptionsToString(const SBlastFilterOptions *filtering_options)
Convert the filtering options structure to a string.
Declares the CBlastNucleotideOptionsHandle class.
Definitions which are dependant on the NCBI C++ Object Manager.
SDustOptions * SDustOptionsFree(SDustOptions *dust_options)
Frees SDustOptions.
Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions **repeat_options, const char *dbname)
Resets name of db for repeat filtering.
SRepeatFilterOptions * SRepeatFilterOptionsFree(SRepeatFilterOptions *repeat_options)
Frees SRepeatFilterOptions.
SBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)
Frees SBlastFilterOptions and all subservient structures.
Int2 SBlastFilterOptionsMerge(SBlastFilterOptions **combined, const SBlastFilterOptions *opt1, const SBlastFilterOptions *opt2)
Merges two sets of options together, taking the non-default one as preferred.
Int2 SDustOptionsNew(SDustOptions **dust_options)
Allocates memory for SDustOptions, fills in defaults.
@ eRepeats
Repeat filtering for nucleotides.
@ eDust
low-complexity for nucleotides.
@ eSeg
low-complexity for proteins.
Int2 SBlastFilterOptionsNew(SBlastFilterOptions **filter_options, EFilterOptions type)
Allocates memory for SBlastFilterOptions and.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)
Obtains the sequence length for a given query in the query, without taking into consideration any app...
Utilities initialize/setup BLAST.
void BlastSeqLoc_RestrictToInterval(BlastSeqLoc **mask, Int4 from, Int4 to)
Adjusts the mask locations coordinates to a sequence interval.
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eBlastp
Protein-Protein.
@ eBlastx
Translated nucl-Protein.
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
BOOST_AUTO_TEST_CASE(TSeqLocVector2Packed_seqint_TestIntervals)
vector< TSeqRange > TRangeVector
void setupQueryStructures(TSeqLocVector &query_vector, const CBlastOptions &kOpts, BLAST_SequenceBlk **query_blk, BlastQueryInfo **qinfo)
void setupQueryInfoForOffsetTranslation(CBlastQueryInfo &query_info)
static BlastSeqLoc * s_RangeVector2BlastSeqLoc(const TRangeVector &rv)
static void x_TestGetFilteredQueryRegions(ENa_strand strand)
static bool x_AreAllBasesMasked(const Uint1 *sequence, int start, int stop)
void BlastSeqLocListReverse(BlastSeqLoc **head)
Reverse elements in the list.
static void x_TestGetSeqLocInfoVector(EBlastProgramType program, size_t num_seqs)
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
ncbi::TMaskedQueryRegions mask
Wrapper class for BLAST_SequenceBlk .
Defines BLAST error codes (user errors included)
static void x_TestLowerCaseMaskWith(ENa_strand strand, bool ignore_strand_in_mask)
Wrapper class for BlastMaskLoc .
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Encapsulates ALL the BLAST algorithm's options.
Collection of BlastSeqLoc lists for filtering processing.
Wrapper class for BlastQueryInfo .
void AddQuery(CRef< CBlastSearchQuery > q)
Add a query to the set.
TMaskedQueryRegions GetMaskedRegions(size_type i) const
Get the masked regions for a query by number.
CRef< objects::CSeq_loc > GetMasks(size_type i) const
Convenience method to get a CSeq_loc representing the masking locations.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
vector< CRange< TSeqPos > > TRanges
void AddInterval(const CSeq_interval &ival)
for convenience
structure for seqloc info
static CRef< CScope > NewScope(bool with_defaults=true)
Return a new scope, possibly (by default) with default loaders, which will include the Genbank loader...
CRef< blast::CBlastSearchQuery > CreateBlastSearchQuery(objects::CSeq_id &id, objects::ENa_strand s=objects::eNa_strand_unknown)
static CTestObjMgr & Instance()
Collection of masked regions for a single query sequence.
TMaskedQueryRegions RestrictToSeqInt(const objects::CSeq_interval &location) const
Return a new instance of this object that is restricted to the location specified.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
const_iterator find(const key_type &key) const
const_iterator end() const
Calls sym dust lib in algo/dustmask and returns CSeq_locs for use by BLAST.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static void query_test(int prepare, SQLRETURN expected, const char *expected_status)
CRef< objects::CPacked_seqint > TSeqLocVector2Packed_seqint(const TSeqLocVector &sequences)
Converts a TSeqLocVector into a CPacked_seqint.
int GetDustFilteringLinker() const
Get linker parameter for dust.
void Blast_GetSeqLocInfoVector(EBlastProgramType program, const objects::CPacked_seqint &queries, const BlastMaskLoc *mask, TSeqLocInfoVector &mask_v)
Converts a BlastMaskLoc internal structure into an object returned by the C++ API.
BlastQueryInfo * Release()
bool GetDustFiltering() const
Is dust filtering enabled?
void SetWindowMaskerTaxId(int taxid)
Enable window masker and select a taxid (or 0 to disable).
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
bool Empty()
Returns true if this object contains any masking information.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
int GetDustFilteringLevel() const
Get level parameter for dust.
const char * GetWindowMaskerDatabase() const
Get the window masker database name (or NULL if not set).
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
void GetTaxIdWithWindowMaskerSupport(set< int > &supported_taxids)
This function returns a list of NCBI taxonomy IDs for which there exists windowmasker masking data to...
void SetRepeatFilteringDB(const char *db)
Enable repeat filtering.
bool QueryHasMultipleFrames() const
Check whether the query is multiframe for this type of search.
void Blast_FindWindowMaskerLoc(CBlastQueryVector &query, const CBlastOptions *opts)
Find Window Masker filtered locations using a BlastOptions.
size_t GetNumFrames() const
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
bool GetMaskAtHash() const
Returns whether masking should only be done for lookup table creation.
void SetRepeatFiltering(bool val)
Enable repeat filtering.
bool GetRepeatFiltering() const
Is repeat filtering enabled?
void Blast_FindRepeatFilterLoc(TSeqLocVector &query_loc, const CBlastOptionsHandle *opts_handle)
Finds repeats locations for a given set of sequences.
unsigned int GetNumberOfContexts(EBlastProgramType p)
Returns the number of contexts for a given BLAST program.
const set< ETranslationFrame > & ListFrames()
Returns the list of frame values for which this object contains masking information.
int GetDustFilteringWindow() const
Get window parameter for dust.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
void UseProteinCoords(TSeqPos dna_length)
Adjusts all stored masks from nucleotide to protein offsets.
int GetWindowMaskerTaxId() const
Get the window masker taxid (or 0 if not set).
void SetMaskAtHash(bool m=true)
Sets MaskAtHash.
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
void Blast_FindDustFilterLoc(TSeqLocVector &queries, const CBlastNucleotideOptionsHandle *nucl_handle)
Finds dust locations for a given set of sequences by calling the the symmetric dust lib.
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
void SetDustFiltering(bool val)
Enable dust filtering.
@ eNotSupported
Feature not supported.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
element_type * get(void) const
Get pointer.
TErrCode GetErrCode(void) const
Get error code.
const string & GetMsg(void) const
Get message string.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
@ e_YES
SeqIds compared, but are different.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eSame
CSeq_locs contain each other.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
Uint4 TValue
Type of the generated integer value and/or the seed value.
TValue GetRand(void)
Get the next random number in the interval [0..GetMax()] (inclusive)
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
void SetTo(TTo value)
Assign a value to To data member.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
bool CanGetStrand(void) const
Check if it is safe to call GetStrand method.
TFrom GetFrom(void) const
Get the From member data.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
void SetStrand(TStrand value)
Assign a value to Strand data member.
@ eNa_strand_both
in forward orientation
@ e_Gi
GenInfo Integrated Database.
objects::CSeq_id * GenerateRandomSeqid_Gi()
vector< EBlastProgramType > GetAllBlastProgramTypes()
constexpr bool empty(list< Ts... >) noexcept
double value_type
The numeric datatype used by the parser.
Magic spell ;-) needed for some weird compilers... very empiric.
int strcmp(const char *str1, const char *str2)
Defines: CTimeFormat - storage class for time format.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static PCRE2_SIZE * offsets
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
C++ implementation of repeats filtering for C++ BLAST.
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Structure to hold a sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Int4 query_length
Length of this query, strand or frame.
Structure for keeping the query masking information.
Int4 total_size
Total size of the BlastSeqLoc array below.
BlastSeqLoc ** seqloc_array
Array of masked locations.
The query related information.
BlastContextInfo * contexts
Information per context.
Int4 last_context
Index of the last element of the context array.
Used to hold a set of positions, mostly used for filtering.
SSeqRange * ssr
location data on the sequence.
struct BlastSeqLoc * next
next in linked list
Structure to hold the a message from the core of the BLAST engine.
SRepeatFilterOptions * repeatFilterOptions
for organism specific repeat filtering.
SSegOptions * segOptions
low-complexity filtering for proteins sequences (includes translated nucleotides).
Boolean mask_at_hash
mask query only for lookup table creation
SWindowMaskerOptions * windowMaskerOptions
organism specific filtering with window masker.
SDustOptions * dustOptions
low-complexity filtering for nucleotides.
int window
initial window to trigger further work.
Structure to represent a single sequence to be fed to BLAST.
A structure containing two integers, used e.g.
Int4 left
left endpoint of range (zero based)
Int4 right
right endpoint of range (zero based)
Utility stuff for more convenient using of Boost.Test library.
static CS_CONTEXT * context
Interface to retrieve list of available windowmasker filtering.
Blast wrappers for WindowMasker filtering.
voidp calloc(uInt items, uInt size)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4