<
typenameSV>
69 unsignedsv_planes = svect.slices();
71 BM_ASSERT(sv_planes > high_bit && high_bit > 0);
73 typenameSV::bvector_type bv_acc;
77 for(
i= high_bit+1;
i< sv_planes; ++
i)
79 if(
typenameSV::bvector_type* bv = svect.slice(
i))
82svect.free_slice(
i);
87 for(
i= high_bit;
true; --
i)
89 typenameSV::bvector_type* bv = svect.get_create_slice(
i);
106 template<
typenameSV>
109 if(low_bit == 0)
return;
112 unsignedsv_planes = svect.slices();
113 typenameSV::bvector_type bv_acc1;
117 for(
i= low_bit+1;
i< sv_planes; ++
i)
119 if(
typenameSV::bvector_type* bv = svect.slice(
i))
124 typenameSV::bvector_type bv_acc2;
125 typenameSV::bvector_type* bv_low_plane = svect.get_create_slice(low_bit);
127 for(
i= low_bit-1;
true; --
i)
129 if(
typenameSV::bvector_type* bv = svect.slice(
i))
132svect.free_slice(
i);
144bv_acc1.bit_xor(bv_acc2);
145bv_low_plane->bit_or(bv_acc1);
169 template<
typenameSV>
172 typenameSV::size_type& midx,
175 typenameSV::size_type mismatch =
bm::id_max;
180 unsignedplanes1 = (unsigned)sv1.get_bmatrix().rows();
183 typenameSV::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
184 typenameSV::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
196 if(bv_null1 && bv_null2)
198 bool f= bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
199 if(
f&& (midx < mismatch))
201found =
f; mismatch = midx;
208 typenameSV::bvector_type bv_tmp;
209bv_tmp.resize(sv2.size());
213 bool f= bv_null1->find_first_mismatch(bv_tmp, midx, mismatch);
214 if(
f&& (midx < mismatch))
216found =
f; mismatch = midx;
221 typenameSV::bvector_type bv_tmp;
222bv_tmp.resize(sv1.size());
225 bool f= bv_null2->find_first_mismatch(bv_tmp, midx, mismatch);
226 if(
f&& (midx < mismatch))
228found =
f; mismatch = midx;
235 for(
unsigned i= 0; mismatch && (
i< planes1); ++
i)
237 typenameSV::bvector_type_const_ptr bv1 = sv1.get_slice(
i);
240 typenameSV::bvector_type_const_ptr bv2 = sv2.get_slice(
i);
248 bool f= bv2->find(midx);
249 if(
f&& (midx < mismatch))
251found =
f; sv_idx = 2; mismatch = midx;
258 bool f= bv1->find(midx);
259 if(
f&& (midx < mismatch))
261found =
f; sv_idx = 1; mismatch = midx;
267 bool f= bv1->find_first_mismatch(*bv2, midx, mismatch);
268 if(
f&& (midx < mismatch))
270found =
f; mismatch = midx;
273sv_idx = (bv1->test(mismatch)) ? 1 : 2;
290found = sv1.find_rank(midx + 1, mismatch);
293found = sv2.find_rank(midx + 1, mismatch);
304 bool f= bv_null1->find_first_mismatch(*bv_null2, midx, mismatch);
305 if(
f&& (midx < mismatch))
307found =
f; mismatch = midx;
347 template<
typenameSV1,
typenameSV2>
353 typedef typenameSV1::bvector_type bvector_type;
354 typedef typenamebvector_type::allocator_type allocator_type;
355 typedef typenameallocator_type::allocator_pool_type allocator_pool_type;
357allocator_pool_type pool;
358 typenamebvector_type::mem_pool_guard mp_guard_bv;
359mp_guard_bv.assign_if_not_set(pool, bv);
373 unsignedplanes = sv1.effective_slices();;
374 if(planes < sv2.slices())
375planes = sv2.slices();
377 for(
unsigned i= 0;
i< planes; ++
i)
379 typenameSV1::bvector_type_const_ptr bv1 = sv1.get_slice(
i);
380 typenameSV2::bvector_type_const_ptr bv2 = sv2.get_slice(
i);
399 typenamebvector_type::mem_pool_guard mp_guard;
400mp_guard.assign_if_not_set(pool, bv_xor);
402bv_xor.bit_xor(*bv1, *bv2, SV1::bvector_type::opt_none);
409 typenameSV1::size_type sz1 = sv1.size();
410 typenameSV2::size_type sz2 = sv2.size();
420bv.set_range(sz1, sz2-1);
426 typenameSV1::bvector_type_const_ptr bv_null1 = sv1.get_null_bvector();
427 typenameSV2::bvector_type_const_ptr bv_null2 = sv2.get_null_bvector();
434 if(bv_null1 && bv_null2)
437 typenamebvector_type::mem_pool_guard mp_guard;
438mp_guard.assign_if_not_set(pool, bv_or);
440bv_or.bit_or(*bv_null1, *bv_null2, bvector_type::opt_none);
452 if(bv_null1 && bv_null2)
455 typenamebvector_type::mem_pool_guard mp_guard;
456mp_guard.assign_if_not_set(pool, bv_xor);
458bv_xor.bit_xor(*bv_null1, *bv_null2, bvector_type::opt_none);
463bvector_type bv_null;
464 typenamebvector_type::mem_pool_guard mp_guard;
465mp_guard.assign_if_not_set(pool, bv_null);
469bv_null.resize(sv1.size());
474bv_null.resize(sv2.size());
490 template<
typenameSV>
580 template<
typenameSV,
unsignedS_FACTOR>
618 template<
classOpt = bm::agg_run_options<> >
653{
agg_pipe_.set_search_count_limit(limit); }
686{
return agg_pipe_.get_bv_res_vector(); }
690{
return agg_pipe_.get_bv_count_vector(); }
722 void bind(
constSV& sv,
boolsorted);
754template<typename BII>
893template<class TPipe>
989template<typename It>
995 typenamebvector_type::mem_pool_guard mp_guard;
996mp_guard.assign_if_not_set(
pool_, bv_out);
999 typenamebvector_type::mem_pool_guard mp_guard1(
pool_, bv1);
1000 boolany_zero =
false;
1001 for(; start < end; ++start)
1004any_zero |= (v == 0);
1030 boolnull_correct =
true);
1046 template<
boolBOUND>
1082 unsignedprefix_len);
1096 unsignedoctet_start,
1103template <
boolBOUND>
1112 unsignedfrom,
unsignedtotal_planes);
1119 unsignedfrom,
unsignedtotal_planes);
1160 template<
typenameAGG>
1167 unsigned charbits[64];
1169 for(
unsigned i= 0;
i< bit_count_v; ++
i)
1171 unsignedbit_idx = bits[
i];
1173 unsignedplane_idx = (unsigned(octet_idx) * 8) + bit_idx;
1175 BM_ASSERT(bv != sv.get_null_bvector());
1178 unsignedvinv = unsigned(
value);
1187vinv &= unsigned(planes_mask);
1188 for(
unsignedoctet_plane = (
unsigned(octet_idx) * 8); vinv; vinv &= vinv-1)
1192 unsignedplane_idx = octet_plane + bit_idx;
1195 BM_ASSERT(bv != sv.get_null_bvector());
1245 template<
typenameSV>
1308 remap(bv_in, sv_brel, bv_out);
1321 void attach_sv(
constSV* sv_brel,
boolcompute_stats =
false);
1330 template<
unsignedBSIZE>
1365 template<
typenameSV>
1367: sv_ptr_(0), gb_(0), have_stats_(
false)
1371 "BM: unsigned sparse vector is required for transform");
1375SV::throw_bad_alloc();
1381 template<
typenameSV>
1391 template<
typenameSV>
1397have_stats_ =
false;
1401 if(sv_brel->empty() || !compute_stats)
1403 const bvector_type* bv_non_null = sv_brel->get_null_bvector();
1408 typenamebvector_type::mem_pool_guard mp_g_z;
1409mp_g_z.assign_if_not_set(pool_, bv_zero_);
1412scanner.
find_zero(*sv_brel, bv_zero_);
1413have_stats_ =
true;
1419 template<
typenameSV>
1424 if(sv_brel.empty())
1427 const bvector_type* bv_non_null = sv_brel.get_null_bvector();
1430 if(!bv_non_null->test(id_from))
1433 size_typeidx = sv_brel.translate_address(id_from);
1434id_to = sv_brel.get(idx);
1440 template<
typenameSV>
1445 typenamebvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
1446mp_g_out.assign_if_not_set(pool_, bv_out);
1447mp_g_p.assign_if_not_set(pool_, bv_product_);
1448mp_g_z.assign_if_not_set(pool_, bv_zero_);
1450attach_sv(&sv_brel);
1452remap(bv_in, bv_out);
1457 template<
typenameSV>
1465 if(sv_ptr_ == 0 || sv_ptr_->empty())
1470 typenamebvector_type::mem_pool_guard mp_g_out, mp_g_p, mp_g_z;
1471mp_g_out.assign_if_not_set(pool_, bv_out);
1472mp_g_p.assign_if_not_set(pool_, bv_product_);
1473mp_g_z.assign_if_not_set(pool_, bv_zero_);
1478 const bvector_type* bv_non_null = sv_ptr_->get_null_bvector();
1482bv_product_ = bv_in;
1483bv_product_.bit_and(*bv_non_null);
1484enum_bv = &bv_product_;
1495bv_product_ = bv_in;
1497bv_product_.bit_sub(bv_zero_);
1503bv_out.set_bit_no_check(0);
1505enum_bv = &bv_product_;
1512buf_cnt = nb_old = 0;
1514 typenamebvector_type::enumerator en(enum_bv->first());
1515 for(; en.valid(); ++en)
1517 typenameSV::size_type idx = *en;
1518idx = sv_ptr_->translate_address(idx);
1525sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1526bv_out.set(&gb_->buffer_[0], buf_cnt,
BM_SORTED);
1530gb_->gather_idx_[buf_cnt++] = idx;
1534gb_->gather_idx_[buf_cnt++] = idx;
1537 if(buf_cnt == sv_g_size)
1539sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1546sv_ptr_->gather(&gb_->buffer_[0], &gb_->gather_idx_[0], buf_cnt,
BM_SORTED_UNIFORM);
1554 template<
typenameSV>
1559 if(sv_brel.empty())
1564 typenameSV::const_iterator it = sv_brel.begin();
1565 for(; it.valid(); ++it)
1569 if(bv_in.test(idx))
1571bv_out.set_bit_no_check(t_id);
1581 template<
typenameSV,
unsignedS_FACTOR>
1588effective_str_max_ = 0;
1593 template<
typenameSV,
unsignedS_FACTOR>
1596static_assert(S_FACTOR == 4 || S_FACTOR == 8 || S_FACTOR == 16
1597|| S_FACTOR == 32 || S_FACTOR == 64,
1598 "BM: sparse_vector_scanner<> incorrect sampling factor template parameter");
1603 ifconstexpr (SV::is_str())
1605effective_str_max_ = sv.effective_vector_max();
1610range_idx_.construct(sv, S_FACTOR);
1614vector_plane_masks_.resize(effective_str_max_);
1615 for(
unsigned i= 0;
i< effective_str_max_; ++
i)
1617vector_plane_masks_[
i] = sv.get_slice_mask(
i);
1624 template<
typenameSV,
unsignedS_FACTOR>
1628effective_str_max_ = 0;
1633 template<
typenameSV,
unsignedS_FACTOR>
1643find_nonzero(sv, bv_out);
1644 ifconstexpr (SV::is_compressed())
1647bv_out.set_range(sv.effective_size(),
bm::id_max- 1,
false);
1648decompress(sv, bv_out);
1655correct_nulls(sv, bv_out);
1660 template<
typenameSV,
unsignedS_FACTOR>
1669 autoold_sz = bv_out.size();
1670bv_out.resize(sv.size());
1672bv_out.resize(old_sz);
1673correct_nulls(sv, bv_out);
1678 template<
typenameSV,
unsignedS_FACTOR>
1684bv_out.bit_and(*bv_null);
1689 template<
typenameSV,
unsignedS_FACTOR>
1700find_zero(sv, bv_out);
1701 returnbv_out.any();
1705 boolfound = prepare_and_sub_aggregator(sv,
value);
1712 boolany = (search_limit == 1);
1713found = agg_.combine_and_sub(bv_out, any);
1720 template<
typenameSV,
unsignedS_FACTOR>
1733 boolfound = prepare_and_sub_aggregator(sv,
value);
1736found = agg_.find_first_and_sub(idx);
1744 template<
typenameSV,
unsignedS_FACTOR>
1751 unsignedprefix_len)
1759 unsignedcommon_prefix_len = 0;
1761 value_type* pref = remap_prefix_vect_.data();
1765 if(
agg_.set_range_hint(mask_from_, mask_to_))
1767 if(prefix_len == ~0u)
1770sv.template common_prefix_length<true>(mask_from_, mask_to_, pref);
1771 if(common_prefix_len)
1774 str= remap_value_vect_.data();
1776 for(
unsigned i= 0;
i< common_prefix_len; ++
i)
1777 if(
str[
i] != pref[
i])
1784 unsignedpl; (void)pl;
1786(pl=sv.template common_prefix_length<true>(
1787mask_from_, mask_to_, pref)));
1789common_prefix_len = prefix_len;
1794 if(prefix_len != ~0u)
1797 unsignedpl; (void)pl;
1799(pl=sv.template common_prefix_length<true>(
1800mask_from_, mask_to_, pref)));
1802common_prefix_len = prefix_len;
1808 if(common_prefix_len && (in_len <= common_prefix_len))
1810 if(in_len == common_prefix_len)
1811--common_prefix_len;
1818 str= remap_value_vect_.data();
1821 if(sv.is_remap() && (
str!= remap_value_vect_.data()))
1823 bool r= sv.remap_tosv(
1824remap_value_vect_.data(), sv.effective_max_str(),
str);
1827 str= remap_value_vect_.data();
1831 boolfound = prepare_and_sub_aggregator(sv,
str, common_prefix_len,
true);
1834found = agg_.find_first_and_sub(idx);
1835 if(found && idx > mask_to_)
1837 int cmp= sv.compare(idx, search_str);
1838found = (
cmp== 0);
1848 template<
typenameSV,
unsignedS_FACTOR>
1852 unsignedoctet_start,
1862 for(
intoctet_idx =
len-1; octet_idx >=
int(octet_start); --octet_idx)
1864 unsigned value= unsigned(
str[octet_idx]) & 0xFFu;
1867 if(&sv == bound_sv_)
1868planes_mask = vector_plane_masks_[unsigned(octet_idx)];
1870planes_mask = sv.get_slice_mask(
unsigned(octet_idx));
1875add_agg_char(agg_, sv, octet_idx, planes_mask,
value);
1882 typenameSV::size_type planes = sv.get_bmatrix().rows_not_null();
1883 for(
unsignedplane_idx =
unsigned(
len* 8);
1884plane_idx < planes; ++plane_idx)
1895 template<
typenameSV,
unsignedS_FACTOR>
1900 usingunsigned_value_type =
typenameSV::unsigned_value_type;
1904 unsigned charbits[
sizeof(
value) * 8];
1905unsigned_value_type uv = sv.s2u(
value);
1907 unsigned shortbit_count_v =
bm::bitscan(uv, bits);
1909 constunsigned_value_type mask1 = 1;
1914 for(
unsigned i= bit_count_v;
i> 0; --
i)
1916 unsignedbit_idx = bits[
i-1];
1923 unsignedsv_planes = sv.effective_slices();
1924 for(
unsigned i= 0;
i< sv_planes; ++
i)
1926unsigned_value_type
mask= mask1 <<
i;
1936 template<
typenameSV,
unsignedS_FACTOR>
1948find_zero(sv, bv_out);
1952 unsigned charbits[
sizeof(
value) * 8];
1953 typenameSV::unsigned_value_type uv = sv.s2u(
value);
1954 unsigned shortbit_count_v =
bm::bitscan(uv, bits);
1958 if(
const bvector_type* bv_plane = sv.get_slice(bits[--bit_count_v]))
1965 for(
unsigned i= 0;
i< bit_count_v; ++
i)
1969bv_out &= *bv_plane;
1978 unsignedsv_planes = sv.effective_slices();
1979 for(
unsigned i= 0; (
i< sv_planes) && uv; ++
i)
1981 if(
const bvector_type* bv = sv.get_slice(
i); bv && !(uv & (1u <<
i)))
1988 template<
typenameSV,
unsignedS_FACTOR>
1995find_gt_horizontal(sv,
val, bv_out,
true);
2000 template<
typenameSV,
unsignedS_FACTOR>
2011find_eq(sv,
val, bv_min);
2012find_gt_horizontal(sv,
val, bv_out,
true);
2013bv_out.merge(bv_min);
2018find_gt_horizontal(sv,
val, bv_out,
true);
2026find_gt_horizontal(sv,
val, bv_out,
true);
2031bv_out.set_range(0, sv.size()-1);
2032correct_nulls(sv, bv_out);
2039 template<
typenameSV,
unsignedS_FACTOR>
2045find_ge(sv,
val, bv_out);
2051 template<
typenameSV,
unsignedS_FACTOR>
2056find_gt(sv,
val, bv_out);
2062 template<
typenameSV,
unsignedS_FACTOR>
2070find_ge(sv, from, bv_out);
2073find_gt(sv, to, bv_gt);
2074bv_out.bit_sub(bv_gt);
2080 template<
typenameSV,
unsignedS_FACTOR>
2086 unsigned charblist[64];
2087 unsignedbit_count_v;
2098find_zero(sv, bv_zero);
2099 autosz = sv.size();
2100bv_out.set_range(0, sz-1);
2101bv_out.bit_sub(bv_zero);
2106bv_out.bit_sub(*bv_sign);
2109correct_nulls(sv, bv_out);
2118find_positive(sv, gtz_bv);
2121bv_out.swap(gtz_bv);
2123correct_nulls(sv, bv_out);
2128find_eq(sv, -1, bv_out);
2129bv_out.bit_or(gtz_bv);
2131correct_nulls(sv, bv_out);
2135 autouvalue = SV::s2u(
value+
bool(
value< 0));
2150 unsignedtotal_planes = sv.effective_slices();
2151 const bvector_type* bv_sign = sv.get_slice(0); (void)bv_sign;
2160bv_out.swap(gtz_bv);
2162correct_nulls(sv, bv_out);
2165aggregate_AND_OR_slices(top_or_bv, *bv_sign, sv, blist[bit_count_v-1]+1, total_planes);
2169aggregate_OR_slices(top_or_bv, sv, blist[bit_count_v-1]+1, total_planes);
2174 if(total_planes <
unsigned(blist[bit_count_v-1]+1))
2176aggregate_OR_slices(top_or_bv, sv, blist[bit_count_v-1]+1, total_planes);
2180bv_out.merge(top_or_bv);
2188 for(
int i=
int(bit_count_v)-1;
i>= 0; --
i)
2190 intslice_idx = blist[
i];
2191 if(slice_idx == gt_slice_limit())
2193 const bvector_type* bv_base_plane = sv.get_slice(
unsigned(slice_idx));
2202and_eq_bv.bit_and(*bv_base_plane, *bv_sign);
2204and_eq_bv.bit_and(*bv_base_plane, gtz_bv);
2207and_eq_bv = *bv_base_plane;
2210and_eq_bv.bit_and(*bv_base_plane);
2212 intnext_slice_idx = 0;
2215next_slice_idx = blist[
i-1];
2216 if(slice_idx-1 == next_slice_idx)
2223 for(
intj = slice_idx-1; j >= next_slice_idx; --j)
2228 if(
const bvector_type* bv_sub_plane = sv.get_slice(
unsigned(j)))
2229bv_out.bit_or_and(and_eq_bv, *bv_sub_plane);
2238top_or_bv.set_range(0, sv.size()-1);
2239top_or_bv.bit_sub(bv_out);
2240bv_out.swap(top_or_bv);
2244gtz_bv.resize(sv.size());
2246bv_out.bit_sub(gtz_bv);
2250decompress(sv, bv_out);
2259correct_nulls(sv, bv_out);
2266 template<
typenameSV,
unsignedS_FACTOR>
2270 unsignedfrom,
unsignedtotal_planes)
2272 for(
unsigned i= from;
i< total_planes; ++
i)
2276 BM_ASSERT(bv_slice != sv.get_null_bvector());
2280agg_.combine_or(bv_target);
2285 template<
typenameSV,
unsignedS_FACTOR>
2289 unsignedfrom,
unsignedtotal_planes)
2291 for(
unsigned i= from;
i< total_planes; ++
i)
2295 BM_ASSERT(bv_slice != sv.get_null_bvector());
2296bv_target.bit_or_and(bv_mask, *bv_slice);
2303 template<
typenameSV,
unsignedS_FACTOR>
2306 typenameSV::bvector_type& bv_out)
2308 returnfind_eq_str_impl(sv,
str, bv_out,
false);
2314 template<
typenameSV,
unsignedS_FACTOR>
2317 typenameSV::size_type& pos)
2320 returnthis->find_eq_str(*bound_sv_,
str, pos);
2325 template<
typenameSV,
unsignedS_FACTOR>
2329 typenameSV::size_type& pos)
2331 boolfound =
false;
2336 boolremaped =
false;
2339 if(sv.is_remap() &&
str!= remap_value_vect_.data())
2341 autostr_len = sv.effective_vector_max();
2342remap_value_vect_.resize(str_len);
2343remaped = sv.remap_tosv(
2344remap_value_vect_.data(), str_len,
str);
2347 str= remap_value_vect_.data();
2351 size_tin_len = ::strlen(
str);
2352size_type found_pos;
2353found = find_first_eq(sv,
str, in_len, found_pos, remaped, ~0u);
2359 ifconstexpr (sv.is_compressed())
2360found = sv.find_rank(found_pos + 1, pos);
2367 typenameSV::bvector_type bv_zero;
2368find_zero(sv, bv_zero);
2369found = bv_zero.find(pos);
2376 template<
typenameSV,
unsignedS_FACTOR>
2379 typenameSV::bvector_type& bv_out)
2382 returnfind_eq_str(*bound_sv_,
str, bv_out);
2387 template<
typenameSV,
unsignedS_FACTOR>
2391 typenameSV::bvector_type& bv_out)
2393 returnfind_eq_str_impl(sv,
str, bv_out,
true);
2398 template<
typenameSV,
unsignedS_FACTOR>
2404 autostr_len = sv.effective_vector_max();
2405remap_vect_target.
resize(str_len);
2406 boolremaped = sv.remap_tosv(remap_vect_target.
data(), str_len,
str);
2412 template<
typenameSV,
unsignedS_FACTOR>
2416 typenameSV::bvector_type& bv_out,
2419 boolfound =
false;
2420bv_out.clear(
true);
2433 if(sv.is_remap() && (
str!= remap_value_vect_.data()))
2435 boolremaped = remap_tosv(remap_value_vect_,
str, sv);
2438 str= remap_value_vect_.data();
2444 const unsignedcommon_prefix_len = 0;
2445found = prepare_and_sub_aggregator(sv,
str, common_prefix_len,
2449found = agg_.combine_and_sub(bv_out);
2455find_zero(sv, bv_out);
2456found = bv_out.any();
2464 template<
typenameSV,
unsignedS_FACTOR>
template<
classTPipe>
2467 if(pipe.bv_and_mask_)
2470 boolany = pipe.bv_and_mask_->find_range(
first,
last);
2475agg_.combine_and_sub(pipe.agg_pipe_);
2476agg_.reset_range_hint();
2481 template<
typenameSV,
unsignedS_FACTOR>
2482 template<
boolBOUND>
2488 typenameSV::size_type& pos)
2490 boolfound =
false;
2494 unsignedprefix_len = ~0u;
2498reset_search_range();
2503 ifconstexpr (BOUND)
2505found = range_idx_.bfind_range(
str, in_len,
l,
r);
2510(unsigned) range_idx_.common_prefix_length(
str, in_len,
l,
r);
2512 if((
l==
r) && (in_len == prefix_len))
2514range_idx_.recalc_range(
str,
l,
r);
2519range_idx_.recalc_range(
str,
l,
r);
2520set_search_range(
l,
r);
2532 l= 0;
r= sv.size()-1;
2538 if(dist < min_distance_cutoff)
2551 int cmp= this->compare_str<BOUND>(sv, mid,
str);
2569 for(
unsigned i= 0;
i< (sub_bfind_block_cnt-1);
2570++
i, mid += sub_block_l1_size)
2572 int cmp= this->compare_str<BOUND>(sv, mid,
str);
2582set_search_range(
l,
r);
2587 typenameSV::size_type mid = dist/2+
l;
2597mid = dist / 2 +
l;
2598 cmp= this->compare_str<false>(sv, mid,
str);
2603 cmp= this->compare_str<BOUND>(sv, mid,
str);
2609set_search_range(
l, mid);
2620found = find_first_eq(sv,
str, in_len, found_pos, remaped, prefix_len);
2624 ifconstexpr (SV::is_compressed())
2625found = sv.find_rank(found_pos + 1, pos);
2627reset_search_range();
2632 typenameSV::bvector_type bv_zero;
2633find_zero(sv, bv_zero);
2634found = bv_zero.find(pos);
2641 template<
typenameSV,
unsignedS_FACTOR>
2645 size_t len= ::strlen(
str);
2646effective_str_max_ = sv.effective_max_str();
2647 if(
len> effective_str_max_)
2652 boolremaped =
false;
2657remap_value_vect_.resize_no_copy(
len);
2658remaped = sv.remap_tosv(remap_value_vect_.data(),
2659effective_str_max_,
str);
2664 returnbfind_eq_str_impl<false>(sv,
str,
len, remaped, pos);
2669 template<
typenameSV,
unsignedS_FACTOR>
2672 typenameSV::size_type& pos)
2675 size_t len= ::strlen(
str);
2676 if(
len> effective_str_max_)
2678 boolremaped =
false;
2681 if(bound_sv_->is_remap())
2683remaped = bound_sv_->remap_tosv(remap_value_vect_.data(),
2684effective_str_max_,
str);
2689 returnbfind_eq_str_impl<true>(*bound_sv_,
str,
len, remaped, pos);
2694 template<
typenameSV,
unsignedS_FACTOR>
2701 if(in_len > effective_str_max_)
2706 boolremaped =
false;
2710 if(bound_sv_->is_remap())
2712remaped = bound_sv_->remap_n_tosv_2way(
2713remap_value_vect_.data(),
2724 for(
size_t i= 0;
i< in_len && *
str; ++
i)
2728 returnbfind_eq_str_impl<true>(*bound_sv_, s, in_len, remaped, pos);
2733 template<
typenameSV,
unsignedS_FACTOR>
2737 typenameSV::size_type& pos)
2749 cmp= this->compare(sv,
l,
val);
2760 cmp= this->compare(sv,
r,
val);
2766 for(;
r>= 0; --
r)
2768 cmp= this->compare(sv,
r,
val);
2782 if(dist < linear_cutoff1)
2784 for(;
l<=
r; ++
l)
2786 cmp= this->compare(sv,
l,
val);
2803 cmp= this->compare(sv, mid,
val);
2810 cmp= this->compare(sv,
i,
val);
2824 if(dist < linear_cutoff2)
2826 typenameSV::const_iterator it(&sv,
l);
2827 for(; it.valid(); ++it, ++
l)
2830 if(sv_value ==
val)
2835 if(sv_value >
val)
2854 template<
typenameSV,
unsignedS_FACTOR>
2858 typenameSV::size_type& pos)
2871 cmp= this->compare_str<false>(sv,
l,
str);
2882 cmp= this->compare_str<false>(sv,
r,
str);
2888 for(;
r>= 0; --
r)
2890 cmp= this->compare_str<false>(sv,
r,
str);
2904 if(dist < linear_cutoff1)
2906 for(;
l<=
r; ++
l)
2908 cmp= this->compare_str<false>(sv,
l,
str);
2924 cmp= this->compare_str<false>(sv, mid,
str);
2931 cmp= this->compare_str<false>(sv,
i,
str);
2945 if(dist < linear_cutoff2)
2947 if(!hmatr_.is_init())
2949 unsignedmax_str = (unsigned)sv.effective_max_str();
2950hmatr_.resize(linear_cutoff2, max_str+1,
false);
2953dist = sv.decode(hmatr_,
l, dist+1);
2954 for(
unsigned i= 0;
i< dist; ++
i, ++
l)
2969 cmp= this->compare_str<false>(sv,
l,
str);
2988 template<
typenameSV,
unsignedS_FACTOR>
2989 template<
boolBOUND>
2997 ifconstexpr (BOUND)
3042 for(
unsigned i= 0;
true; ++
i)
3044 charoctet =
str[
i];
char value= s0[
i];
3056 returnsv.compare(idx,
str);
3062 template<
typenameSV,
unsignedS_FACTOR>
3069 returnsv.compare(idx,
val);
3074 template<
typenameSV,
unsignedS_FACTOR>
3078 typenameSV::bvector_type& bv_out)
3087find_zero(sv, bv_out);
3091find_eq_with_nulls(sv,
value, bv_out, 0);
3093decompress(sv, bv_out);
3094correct_nulls(sv, bv_out);
3099 template<
typenameSV,
unsignedS_FACTOR>
template<
typenameBII>
3103static_assert(!SV::is_compressed(),
"BM:find_eq on RSC vector not implemented");
3110 typenameSV::bvector_type bv_out;
3111find_zero(sv, bv_out);
3112 typenameSV::bvector_type::enumerator en = bv_out.get_enumerator(0);
3113 for(; en.valid(); ++en)
3122 boolfound = prepare_and_sub_aggregator(sv,
value);
3126found = agg_.combine_and_sub_bi(bi);
3133 template<
typenameSV,
unsignedS_FACTOR>
3137 typenameSV::size_type& pos)
3142find_eq(sv,
value, bv_zero);
3143 boolfound = bv_zero.find(pos);
3148 boolfound = find_first_eq(sv,
value, found_pos);
3154 ifconstexpr (SV::is_compressed())
3155found = sv.find_rank(found_pos + 1, pos);
3163 template<
typenameSV,
unsignedS_FACTOR>
3166 typenameSV::bvector_type& bv_out)
3169 autosz = sv.effective_slices();
3170 for(
unsigned i= 0;
i< sz; ++
i)
3171agg_.add(sv.get_slice(
i));
3172agg_.combine_or(bv_out);
3178 template<
typenameSV,
unsignedS_FACTOR>
3181 typenameSV::bvector_type& bv_out)
3184bv_out.set_range(0, sv.size()-1);
3188bv_out.bit_sub(*bv_sign);
3194 template<
typenameSV,
unsignedS_FACTOR>
3197 typenameSV::bvector_type& bv_out)
3199 ifconstexpr (SV::is_compressed())
3201 const bvector_type* bv_non_null = sv.get_null_bvector();
3205rank_compr_.decompress(bv_tmp_, *bv_non_null, bv_out);
3206bv_out.swap(bv_tmp_);
3210(void)sv; (void)bv_out;
3216 template<
typenameSV,
unsignedS_FACTOR>
3221mask_from_ = from; mask_to_ = to; mask_set_ =
true;
3226 template<
typenameSV,
unsignedS_FACTOR>
3237 template<
typenameSV,
unsignedS_FACTOR>
template<
classOpt>
3242static_assert(Opt::is_masks(),
3243 "BM: Search masking needs to be enabled in template parameter options before function call. see bm::agg_run_options<> ");
3244bv_and_mask_ = bv_mask;
3249 template<
typenameSV,
unsignedS_FACTOR>
template<
classOpt>
3256 typenameaggregator_type::arg_groups* arg = agg_pipe_.add();
3261 if(sv_.is_remap() && (
str!= remap_value_vect_.data()))
3263 boolremaped = remap_tosv(remap_value_vect_,
str, sv_);
3266 str= remap_value_vect_.data();
3274 ifconstexpr(Opt::is_masks())
3275arg->add(bv_and_mask_, 0);
3276arg->add(sv_.get_null_bvector(), 0);
3280 for(
intoctet_idx =
len-1; octet_idx >= 0; --octet_idx)
3285 unsigned value= unsigned(
str[octet_idx]) & 0xFF;
3290 if(&sv == bound_sv_)
3291planes_mask = vector_plane_masks_[unsigned(octet_idx)];
3294planes_mask = sv_.get_slice_mask(
unsigned(octet_idx));
3303*arg, sv_, octet_idx, planes_mask,
value);
3311 unsignedplane_idx = unsigned(
len* 8);
3314 for(; plane_idx < planes; ++plane_idx)
3327 template<
typenameSV>
3331s_factor_ = s_factor;
3332sv_size_ = sv.size();
3338 autoeffective_str_max = sv.effective_vector_max() + 1;
3340 size_typeidx_size = total_nb * s_factor + 1;
3341s_cache_.init_resize(idx_size, effective_str_max);
3342s_cache_.set_zero();
3350 value_type* s_str = s_cache_.row(idx_size_);
3352sv.get(
i, s_str, cols);
3354 if(
i== sv_size_-1)
3362s_str = s_cache_.row(idx_size_);
3364sv.get(
i, s_str, cols);
3373min_len = ::strlen(s);
3378idx_unique_ =
true;
3379 const value_type* str_prev = s_cache_.row(0);
3383 size_tcurr_len = ::strlen(str_curr);
3384 if(curr_len < min_len)
3387 int cmp= SV::compare_str(str_prev, str_curr);
3391idx_unique_ =
false;
3394str_prev = str_curr;
3397min_key_len_ = min_len;
3403 template<
typenameSV>
3412 l= 0;
r= idx_size_ - 1;
3415 size_tmin_len = this->min_key_len_;
3416 if(in_len < min_len)
3422 cmp= SV::compare_str(search_str,
str, min_len);
3426 str= s_cache_.row(
r);
3427 cmp= SV::compare_str(search_str,
str, min_len);
3435 if(dist < linear_cutoff)
3440 cmp= SV::compare_str(search_str, str_i, min_len);
3462 const value_type* str_m = s_cache_.row(mid);
3463 cmp= SV::compare_str(str_m, search_str, min_len);
3475 template<
typenameSV>
3485 size_tmin_len = (in_len < min_key_len_) ? in_len : min_key_len_;
3489 for(;
i< min_len-3;
i+=4)
3492::memcpy(&i2, &str_l[
i],
sizeof(i2));
3493::memcpy(&i1, &str_r[
i],
sizeof(i1));
3498::memcpy(&i2, &str_s[
i],
sizeof(i2));
3506 for(;
true; ++
i)
3508 auto ch1= str_l[
i];
auto ch2= str_r[
i];
3511 autochs = str_s[
i];
3521 template<
typenameSV>
3534 if(
r== idx_size_-1)
3561 int cmp= SV::compare_str(search_str,
str);
3564 r-= (
r&& idx_unique_);
ncbi::TMaskedQueryRegions mask
Algorithms for fast aggregation of N bvectors.
Algorithms for bvector<> (main include)
#define BM_VECT_ALIGN_ATTR
Sparse constainer sparse_vector<> for integer types using bit-transposition transform.
value_type * data() const noexcept
void resize(size_type new_size)
vector resize
value_type * resize_no_copy(size_type new_size)
resize without content preservation
Integer set to set transformation (functional image in groups theory) https://en.wikipedia....
bvector_type bv_zero_
bit-vector for zero elements
const bvector_type & get_bv_zero() const
Get read access to zero-elements vector Zero vector gets populated after attach_sv() is called or as ...
bvector_type bv_product_
temp vector
void attach_sv(const SV *sv_brel, bool compute_stats=false)
Attach a translation table vector for remapping (Image function)
allocator_pool_type pool_
const SV * sv_ptr_
current translation table vector
void remap(const bvector_type &bv_in, bvector_type &bv_out)
Perform remapping (Image function) (based on attached translation table)
SV::value_type value_type
bool have_stats_
flag of statistics presense
gather_buffer_type * gb_
intermediate buffers
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
void operator=(const set2set_11_transform &)=delete
void one_pass_run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
gather_buffer< sv_g_size > gather_buffer_type
void run(const bvector_type &bv_in, const SV &sv_brel, bvector_type &bv_out)
Run remap transformation.
set2set_11_transform(const set2set_11_transform &)=delete
SV::bvector_type bvector_type
Pipeline to run multiple searches against a particular SV faster using cache optimizations.
bv_count_vector_type & get_bv_count_vector() noexcept
Return results vector count used for pipeline execution.
void set_search_mask(const bvector_type *bv_mask) noexcept
Set pipeline mask bit-vector to restrict the search space.
void complete()
Prepare pipeline for the execution (resize and init internal structures) Once complete,...
bool is_complete() const noexcept
bvect_vector_type & get_bv_res_vector() noexcept
Return results vector used for pipeline execution.
size_type size() const noexcept
void set_search_count_limit(size_type limit) noexcept
Set search limit for results.
pipeline(const pipeline &)=delete
void add(const typename SV::value_type *str)
Add new search string.
const bvector_type * bv_and_mask_
aggregator_pipeline_type agg_pipe_
traget aggregator pipeline
const run_options & get_options() const noexcept
Get pipeline run options.
remap_vector_type remap_value_vect_
remap buffer
pipeline & operator=(const pipeline &)=delete
aggregator_type::template pipeline< Opt > aggregator_pipeline_type
size_t eff_slices_
number of effectice NOT NULL value slices
void set_or_target(bvector_type *bv_or) noexcept
Attach OR (aggregator vector).
const SV & sv_
target sparse vector ref
aggregator_pipeline_type & get_aggregator_pipe() noexcept
get aggregator pipeline (access to compute internals)
run_options & options() noexcept
Set pipeline run options.
algorithms for sparse_vector scan/search
bm::dynamic_heap_matrix< value_type, allocator_type > matrix_search_buf_type
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type
bm::rank_compressor< bvector_type > rank_compr_
mask_vector_type vector_plane_masks_
masks of allocated bit-planes (1 - means there is a bit-plane)
void reset_binding() noexcept
reset sparse vector binding
size_type effective_str_max_
void reset_search_range() noexcept
reset (disable) search range
SV::bvector_type bvector_type
void operator=(const sparse_vector_scanner &)=delete
bool prepare_and_sub_aggregator(const SV &sv, value_type value)
Prepare aggregator for AND-SUB (EQ) search.
void find_eq(const SV &sv, value_type value, bvector_type &bv_out)
find all sparse vector elements EQ to search value
allocator_pool_type pool_
bool bfind(const SV &sv, const value_type val, size_type &pos)
binary search for position in the sorted sparse vector
void bind(const SV &sv, bool sorted)
bind sparse vector for all searches
void correct_nulls(const SV &sv, bvector_type &bv_out)
Exclude possible NULL values from the result vector.
void find_range(const SV &sv, value_type from, value_type to, bvector_type &bv_out)
find all elements sparse vector element in closed range [left..right] interval
bm::heap_vector< bm::id64_t, typename bvector_type::allocator_type, true > mask_vector_type
const bvector_type * bvector_type_const_ptr
void aggregate_OR_slices(bvector_type &bv_target, const SV &sv, unsigned from, unsigned total_planes)
aggregator_type::bv_count_vector_type bv_count_vector_type
bm::heap_vector< bvector_type *, allocator_type, true > bvect_vector_type
void decompress(const SV &sv, bvector_type &bv_out)
Rank-Select decompression for RSC vectors.
void set_search_range(size_type from, size_type to) noexcept
set search boundaries (hint for the aggregator)
sparse_vector_scanner(const sparse_vector_scanner &)=delete
remap_vector_type remap_value_vect_
remap buffer
bvector_type * bvector_type_ptr
allocator_pool_type & get_bvector_alloc_pool() noexcept
Return allocator pool for blocks (Can be used to improve performance of repeated searches with the sa...
void find_gt(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element greater (>) than value
bool find_eq_str_impl(const SV &sv, const value_type *str, bvector_type &bv_out, bool prefix_sub)
find EQ str / prefix impl
void find_zero(const SV &sv, bvector_type &bv_out, bool null_correct=true)
find all sparse vector elements EQ to 0
void find_eq_with_nulls_horizontal(const SV &sv, value_type value, bvector_type &bv_out)
For testing purposes only.
bm::heap_vector< value_type, typename bvector_type::allocator_type, true > remap_vector_type
remap_vector_type value_vect_
value buffer
void find_lt(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element less (<) than value
bool find_eq_with_nulls(const SV &sv, value_type value, bvector_type &bv_out, size_type search_limit=0)
find value (may include NULL indexes)
bool bfind_eq_str(const SV &sv, const value_type *str, size_type &pos)
binary find first sparse vector element (string).
int compare(const SV &sv, size_type idx, const value_type val) noexcept
compare sv[idx] with input value
bool bfind_eq_str_impl(const SV &sv, const value_type *str, size_t in_len, bool remaped, size_type &pos)
void find_gt_horizontal(const SV &sv, value_type value, bvector_type &bv_out, bool null_correct=true)
For testing purposes only.
static constexpr int gt_slice_limit() noexcept
Return the slice limit for signed/unsigned vector value types.
static bool remap_tosv(remap_vector_type &remap_vect_target, const value_type *str, const SV &sv)
Remap input value into SV char encodings.
static void add_agg_char(AGG &agg, const SV &sv, int octet_idx, bm::id64_t planes_mask, unsigned value)
Addd char to aggregator (AND-SUB)
bm::aggregator< bvector_type > aggregator_type
aggregator_type::run_options run_options
Scanner options to control execution.
bool lower_bound_str(const SV &sv, const value_type *str, size_type &pos)
lower bound search for an array position
void find_positive(const SV &sv, bvector_type &bv_out)
Find positive (greter than zero elements) Output vector is computed as a logical OR (join) of all pla...
bvector_type::allocator_type allocator_type
matrix_search_buf_type hmatr_
heap matrix for string search linear stage
bm::sv_sample_index< SV > range_idx_
range index
bool find_eq_str(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements (string)
void find_nonzero(const SV &sv, bvector_type &bv_out)
Find non-zero elements Output vector is computed as a logical OR (join) of all planes.
bool find_first_eq(const SV &sv, value_type value, size_type &idx)
find first value (may include NULL indexes)
void invert(const SV &sv, bvector_type &bv_out)
invert search result ("EQ" to "not EQ")
allocator_type::allocator_pool_type allocator_pool_type
SV::value_type value_type
bool find_eq_str_prefix(const SV &sv, const value_type *str, bvector_type &bv_out)
find sparse vector elements with a given prefix (string)
void find_le(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element less or equal (<=) than value
void find_ge(const SV &sv, value_type val, bvector_type &bv_out)
find all elements sparse vector element greater or equal (>=) than value
int compare_str(const SV &sv, size_type idx, const value_type *str) const noexcept
compare sv[idx] with input str
static void aggregate_AND_OR_slices(bvector_type &bv_target, const bvector_type &bv_mask, const SV &sv, unsigned from, unsigned total_planes)
remap_vector_type remap_prefix_vect_
common prefix buffer
Index for SV sorted vectors for approximate range queries.
heap_matrix_type s_cache_
cache for SV sampled elements
SV::bvector_type bvector_type
SV::value_type value_type
size_type sv_size() const noexcept
Original SV size.
size_t min_key_len_
minimal key size in index
size_type common_prefix_length(const value_type *search_str, size_t in_len, size_type l, size_type r) const noexcept
find common prefix between index elements and search string
bool idx_unique_
inx value unique or there are dups?
size_type size() const noexcept
Index size (number of sampled elements)
size_t get_min_len() const noexcept
Return length of minimal indexed string.
sv_sample_index(const SV &sv, unsigned s_factor)
void recalc_range(const value_type *search_str, size_type &l, size_type &r) const noexcept
recalculate range into SV coordinates range [from..to)
size_type idx_size_
index size
size_type sv_size_
original sv size
bool bfind_range(const value_type *search_str, size_t in_len, size_type &l, size_type &r) const noexcept
find range (binary)
bm::dynamic_heap_matrix< value_type, allocator_type > heap_matrix_type
bool is_unique() const noexcept
returns true if all index values are unique
void construct(const SV &sv, unsigned s_factor)
Build sampling index for the sorted sprase vector.
bvector_type::allocator_type allocator_type
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
bm::id_t word_bitcount(bm::id_t w) noexcept
unsigned short bitscan(V w, B *bits) noexcept
Templated Bitscan with dynamic dispatch for best type.
null_support
NULL-able value support.
@ BM_SORTED
input set is sorted (ascending order)
@ BM_SORTED_UNIFORM
sorted and in one block (internal!)
@ use_null
support "non-assigned" or "NULL" logic
@ no_null
do not support NULL values
unsigned int
A callback function used to compare two keys in a database.
bool sparse_vector_find_first_mismatch(const SV &sv1, const SV &sv2, typename SV::size_type &midx, bm::null_support null_proc=bm::use_null)
Find first mismatch (element which is different) between two sparse vectors (uses linear scan in bit-...
void dynamic_range_clip_low(SV &svect, unsigned low_bit)
Clip dynamic range for signal lower than specified (boost)
void dynamic_range_clip_high(SV &svect, unsigned high_bit)
Clip dynamic range for signal higher than specified.
void sparse_vector_find_mismatch(typename SV1::bvector_type &bv, const SV1 &sv1, const SV2 &sv2, bm::null_support null_proc)
Find mismatch vector, indicating positions of mismatch between two sparse vectors (uses linear scan i...
void xor_swap(W &x, W &y) noexcept
XOR swap two variables.
const unsigned set_block_mask
unsigned long long int id64_t
const unsigned gap_max_bits
const unsigned set_block_shift
bool has_zero_byte_u64(bm::id64_t v) noexcept
Returns true if INT64 contains 0 octet.
double value_type
The numeric datatype used by the parser.
const GenericPointer< typename T::ValueType > T2 value
int strcmp(const char *str1, const char *str2)
static const BitmapCharRec ch1
static const BitmapCharRec ch2
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
ad-hoc conditional expressions
value_type buffer_[BSIZE]
size_type gather_idx_[BSIZE]
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4