bvector_type::size_type
size_type;
177 unsigned char*
buf,
size_tbuf_size);
567 template<
typenameDEC,
typenameBLOCK_IDX>
646template<class BV, class DEC>
671 const unsigned char*
buf,
691is_range_set_ = 1; idx_from_ = from; idx_to_ = to;
770 unsignedxor_chain_size_;
775 unsignedis_range_set_;
787template<class BV, class SerialIterator>
806 boolexit_on_one =
false);
832{
return "BM::de-serialization format error"; }
834 boolis_range_set_ =
false;
846 template<
classDEC,
typenameBLOCK_IDX>
862 bool is_eof()
const{
returnend_of_stream_; }
871 unsignedget_bit_block(
bm::
word_t* dst_block,
880 unsigneddec_size()
const{
returndecoder_.size(); }
947 returnget_bit_block_COUNT(dst_block, tmp_block);
968 block_idx_typenew_nb = parent_type::try_skip(decoder_, nb, expect_nb);
971block_idx_ = new_nb; state_ = e_blocks;
1005 template<
typenameBV>
1029 const unsigned char*
buf,
1031 boolexit_on_one =
false);
1041 const unsigned char*
buf,
1063 const unsigned char*
buf,
1066 boolexit_on_one =
false)
1077 const unsigned char*
buf,
1094 const unsigned char*
buf,
1103 const unsigned char*
buf,
1272 template<
classBV>
1276compression_stat_(0),
1277gap_serial_(
false),
1278byte_order_serial_(
true),
1279sb_bookmarks_(
false),
1282enc_header_pos_(0), header_flag_(0),
1290 if(temp_block == 0)
1307 template<
classBV>
1310compression_stat_(0),
1311gap_serial_(
false),
1312byte_order_serial_(
true),
1313sb_bookmarks_(
false),
1316enc_header_pos_(0), header_flag_(0),
1324 if(temp_block == 0)
1341 template<
classBV>
1349tb_wflags_ = alloc_.alloc_bit_block(1);
1350idx_arr_ = alloc_.alloc_bit_block(1);
1354 template<
classBV>
1357 if(own_temp_block_)
1358alloc_.free_bit_block(temp_block_);
1359 if(compression_stat_)
1360alloc_.free_bit_block((
bm::word_t*)compression_stat_);
1362alloc_.free_bit_block(xor_tmp_block_, 3);
1363 if(gap_recalc_tmp_block0_)
1365 if(gap_recalc_tmp_block1_)
1368 if(gap_ex0_tmp_block_)
1370 if(gap_ex1_tmp_block_)
1373alloc_.free_bit_block(tb_wflags_, 1);
1375alloc_.free_bit_block(idx_arr_, 1);
1382 template<
classBV>
1385 for(
unsigned i= 0;
i< 256; ++
i)
1386compression_stat_[
i] = 0;
1389 template<
classBV>
1393compression_level_ = clevel;
1394 if(compression_level_ == 5)
1396 else if(compression_level_ == 6)
1400 template<
classBV>
1405sparse_cutoff_ = cutoff;
1410 template<
classBV>
1413gap_serial_ =
value;
1416 template<
classBV>
1419byte_order_serial_ =
value;
1422 template<
classBV>
1425sb_bookmarks_ = enable;
1428 if(bm_interval > 512)
1431 if(bm_interval < 4)
1434sb_range_ = bm_interval;
1437 template<
classBV>
1442xor_scan_.set_ref_vector(
ref_vect);
1445xor_tmp_block_ = alloc_.alloc_bit_block(3);
1451 template<
classBV>
1456 returnxor_scan_.compute_sim_model(sim_model,
ref_vect, params);
1459 template<
classBV>
1462sim_model_ = sim_model;
1465 template<
classBV>
1471 template<
classBV>
1482 if(!byte_order_serial_)
1498enc_header_pos_ = enc.get_pos();
1499enc.put_8(header_flag_);
1501 if(byte_order_serial_)
1504enc.put_8((
unsigned char)bo);
1516enc.put_64(bv.size());
1518enc.put_32(bv.size());
1556 template<
classBOUT>
1564: bout.delta16(min0);
1571: bout.delta16(min1);
1582 template<
classBOUT>
1587bout.put_bits(min_v, 8)
1588: bout.put_16_no(min_v);
1589(tail_delta < 256) ?
1590bout.put_bits(tail_delta, 8)
1591:bout.put_16_no(tail_delta);
1603 if( tail_delta <= ((0xFF << 3) + 0b111) )
1607head_v3 |= (tail_delta & 0b111);
1618 template<
classBV>
1622 const unsigneddrange_gain_cutoff = 200;
1627 const unsignedh_limit = 3;
1628 const unsignedex_limit = (
len/ 4);
1629 unsignedhist0[h_limit] = {0,};
1630 unsignedhist1[h_limit] = {0,};
1645 unsignedex0_cnt, ex1_cnt;
1648gap_recalc_block, ex0_arr, ex1_arr, ex0_cnt, ex1_cnt);
1650 if(ex0_cnt + ex1_cnt < 5)
1654 if(gl1 < (gl2 + ex0_cnt + ex1_cnt))
1677 unsigned charhead_v3 = 0;
1685min_v = gap_recalc_block[1];
1686max_v = gap_recalc_block[
len-1]-1;
1712bout.
delta16s(gap_recalc_block[1]);
1713 for(
unsignedk = 2; k <
len; ++k)
1715 BM_ASSERT(gap_recalc_block[k] < 65535);
1717gap_recalc_block[k] - gap_recalc_block[k-1];
1733&gap_recalc_block[2],
len-3, min_v+1, max_v);
1739 boolend_fl = (ex1_cnt == 0);
1742ex0_cnt,
false, end_fl);
1746ex1_cnt,
true,
true);
1751 boolend_fl = (ex0_cnt == 0);
1754ex1_cnt,
true, end_fl);
1758ex0_cnt,
false,
true);
1761 #ifdef BM_DBG_SERIAL 1763enc.
put_8((
unsigned char)0xFF);
1778 if( (min0 > 1 || min1 > 1))
1785 if(delta_acc > drange_gain_cutoff)
1815 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0);
1820gamma_gap_block(gap_block, enc);
1824 #ifdef BM_DBG_SERIAL 1826enc.
put_8((
unsigned char)0xFF);
1838 template<
classBV>
1856 if(tail_delta < 256)
1869 if(tail_delta < 256)
1870bout.
gamma8(tail_delta);
1881 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0);
1883 if(enc_size > plain_size)
1898gamma_gap_block(gap_block, enc);
1902 template<
classBV>
1914 bm::encoderenc_try((
unsigned char*)try_buf_, 65536);
1919interpolated_encode_gap_block_v3s(gap_block, enc_try,
len);
1922s_size = size_t(enc_try_pos1 - enc_try_pos0);
1928 boolv3_ok = interpolated_encode_gap_block_v3(gap_block, enc,
len);
1932 size_tdr_size = size_t(enc_pos1 - enc_pos0);
1933 if(s_size && (s_size < dr_size))
1952gamma_gap_block(gap_block, enc);
1959 template<
classBV>
1963 unsigned len= (*gap_block >> 3);
1967 booluse_gamma =
false;
1968 if(compression_level_ > 3)
1971use_gamma = (gamma_size_bits < size_16_bits);
1984bout.
put_bits(*gap_block & 1, 1);
1991 if(*pcurr == 65535)
1993 unsigned delta= unsigned(*pcurr -
prev);
2001 unsigned gamma_size= (unsigned)(enc_pos1 - enc_pos0);
2004enc.set_pos(enc_pos0);
2020bout.
put_bits(*gap_block & 1, 1);
2022 for(
unsigned i= 1;
i<
len; ++
i)
2030 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0); (void) enc_size;
2033 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0); (void) enc_size;
2037 template<
classBV>
2045 if(compression_level_ > 3 && arr_len > 1)
2051bout.
gamma(arr_len);
2055 for(
unsigned i= 1;
i< arr_len; ++
i)
2063 unsigned gamma_size= (unsigned)(enc_pos1 - enc_pos0);
2066enc.set_pos(enc_pos0);
2069compression_stat_[scode]++;
2075enc.put_prefixed_array_16(scode, gap_array, arr_len,
true);
2076compression_stat_[scode]++;
2080 template<
classBV>
2104bout.
gamma(arr_len-4);
2109 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0);
2111 if(enc_size >= raw_size)
2112enc.set_pos(enc_pos0);
2115compression_stat_[scode]++;
2121enc.put_prefixed_array_16(scode, gap_block, arr_len,
true);
2122compression_stat_[scode]++;
2128 template<
classBV>
2143 if(min_v >= 256 && tail >= 256)
2145interpolated_gap_array_v0(gap_block, arr_len, enc, inverted);
2166arr_len |= (1 << 1);
2170enc.put_8((
unsigned char)min_v);
2175enc.put_8((
unsigned char)tail);
2185 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0);
2187 if(enc_size >= raw_size)
2189enc.set_pos(enc_pos0);
2193compression_stat_[scode]++;
2198 unsigned charscode =
2200enc.put_prefixed_array_16(scode, gap_block, arr_len,
true);
2201compression_stat_[scode]++;
2206 template<
classBV>
2210scores_[mod_size_] = score; models_[mod_size_] =
mod;
2218 template<
classBV>
2225 const floatbie_bits_per_int =
2226compression_level_ < 6 ? 3.75f : this->bie_bits_per_int_;
2227 const unsignedbie_limit = unsigned(
float(
bm::gap_max_bits) / bie_bits_per_int);
2228 unsignedbc, ibc, gc;
2232 BM_ASSERT(sub_stat.top_level_idx == i0);
2236bit_stat_.bit_model_0run_size_ =
2245bit_stat_.bit_model_d0_size_ = unsigned(8 + (32 * d0_bc *
sizeof(
bm::word_t)));
2251 if(sub_stat.top_level_idx == i0 && (block == sub_stat.blocks[j0]))
2253gc = sub_stat.gc_arr[j0]; bc = sub_stat.bc_arr[j0];
2257bit_stat_.gc = gc; bit_stat_.bc = bc;
2270 unsignedarr_size_inv =
2277 floatgcf=float(gc);
228032 +
unsigned((gcf-1) * bie_bits_per_int));
2281 if(
floatbcf=
float(bc), ibcf=
float(ibc); bc < bie_limit)
228316 * 3 +
unsigned(bcf * bie_bits_per_int));
2285 if(ibc < bie_limit)
228716 * 3 +
unsigned(ibcf * bie_bits_per_int));
2289gc -= gc > 2 ? 2 : 0;
229316 * 4 +
unsigned(gcf * bie_bits_per_int));
229716 * 4 +
unsigned(gcf * bie_bits_per_int));
2303 for(
unsigned i= 0;
i< mod_size_; ++
i)
2305 if(scores_[
i] < min_score)
2307min_score = scores_[
i]; model = models_[
i];
2313std::cout <<
" 0runs="<< (bit_model_0run_size_ * 8) << std::endl;
2314std::cout <<
" GAP BIC="<< (16 * 4 + unsigned(gcf * bie_bits_per_int)) << std::endl;
2315std::cout <<
" ARR BIC="<< (16 * 3 + unsigned(bcf * bie_bits_per_int)) << std::endl;
2316std::cout <<
"BC,GC=["<< bc <<
", "<< gc <<
"]"<< std::endl;
2317std::cout <<
"bie_limit="<< bie_limit << std::endl;
2322 case set_block_bit: std::cout <<
"BIT="<<
"["<< bc <<
", "<< gc <<
"]";
break;
2334 case set_block_bit_0runs: std::cout <<
"0runs=["<< bc <<
", "<< gc <<
" lmt="<< bie_limit <<
"]";
break;
2337 default: std::cout <<
"UNK="<<
int(model);
break;
2347 if((compression_level_ >= 6) && bic_drange_)
2353(bit_stat_.gc <= bit_stat_.bc || bit_stat_.gc <= bit_stat_.ibc))
2359model = (bit_stat_.bc <= bit_stat_.ibc) ?
2369 template<
classBV>
2379 if(compression_level_ >= 5)
2380 returnfind_bit_best_encoding_l5(block, sub_stat, nb);
2387 if(compression_level_ <= 1)
2392 if(compression_level_ <= 5)
2395 if(compression_level_ >= 2)
2404bit_stat_.bit_model_d0_size_ = unsigned(8 + (32 * d0_bc *
sizeof(
bm::word_t)));
2408 if(compression_level_ >= 4)
2415bit_stat_.gc = 65535;
2419 if(bit_stat_.bc == 1)
2431 if(compression_level_ >= 3)
2435 unsignedarr_size_inv =
2441 if(compression_level_ >= 4)
2443 const unsignedgamma_bits_per_int = 6;
2444 if(compression_level_ == 4)
244816 + (bit_stat_.gc-1) * gamma_bits_per_int);
2449 if(bit_stat_.bc < bit_stat_.gc &&
245216 + bit_stat_.bc * gamma_bits_per_int);
2453 if(bit_stat_.ibc > 3 &&
2454bit_stat_.ibc < bit_stat_.gc &&
245716 + bit_stat_.ibc * gamma_bits_per_int);
2467 for(
unsigned i= 0;
i< mod_size_; ++
i)
2469 if(scores_[
i] < min_score)
2471min_score = scores_[
i]; model = models_[
i];
2477 template<
classBV>
2483 if(compression_level_ <= 2)
2491 if(compression_level_ == 4)
2498 template<
classBV>
2507 unsigned charenc_choice = find_gap_best_encoding(gap_block);
2511gamma_gap_block(gap_block, enc);
2520enc.
put_16(gap_temp_block[0]);
2537gamma_gap_array(gap_temp_block, arr_len, enc,
invert);
2540interpolated_encode_gap_block(gap_block, enc, 0);
2543gamma_gap_block(gap_block, enc);
2547 template<
classBV>
2554enc.put_8((blk[0]==0) ? 0 : 1);
2590enc.put_32(blk +
i, j -
i);
2599 template<
classBV>
2608 if(bit_stat_.bit_model_0run_size_ < bit_stat_.bit_model_d0_size_)
2610encode_bit_interval(block, enc, 0);
2627enc.put_32(block[off+j+0]);
2628enc.put_32(block[off+j+1]);
2629enc.put_32(block[off+j+2]);
2630enc.put_32(block[off+j+3]);
2643encode_bit_interval(block, enc, 0);
2652 template<
classBV>
2662enc.put_8(vbr_flag);
2666ridx = ref_vect_->get_row_idx(ridx);
2670 case1: enc.put_8((
unsigned char)ridx);
break;
2671 case2: enc.put_16((
unsigned short)ridx);
break;
2672 case0: enc.put_32((
unsigned)ridx);
break;
2676enc.put_8((
unsigned char) (chain_size-1));
2678 for(
unsignedci = 1; ci < chain_size; ++ci)
2680ridx = mchain.ref_idx[ci];
2681d64 = mchain.xor_d64[ci];
2682ridx = ref_vect_->get_row_idx(ridx);
2685 case1: enc.put_8((
unsigned char)ridx);
break;
2686 case2: enc.put_16((
unsigned short)ridx);
break;
2687 case0: enc.put_32((
unsigned)ridx);
break;
2697 template<
classBV>
2706s_block = xor_tmp1_;
2709 const bm::word_t* ref_block = xor_scan_.get_ref_block(ridx,
i, j);
2713ref_block = xor_tmp2_;
2717 for(
unsignedk = 1; k < mchain.chain_size; ++k)
2719ridx = mchain.ref_idx[k];
2720ref_block = xor_scan_.get_ref_block(ridx,
i, j);
2724ref_block = xor_tmp2_;
2726d64 = mchain.xor_d64[k];
2732 template<
classBV>
2740bv.calc_stat(&stat);
2744 buf.resize(bv_stat->max_serialize_mem,
false);
2745optimize_ = free_ =
false;
2747 unsigned char* data_buf =
buf.data();
2748 size_tbuf_size =
buf.size();
2756 template<
classBV>
2765 typenamebvector_type::mem_pool_guard mp_g_z;
2766mp_g_z.assign_if_not_set(pool_, bv);
2768bv.optimize(temp_block_, BV::opt_compress, &
st);
2771optimize_ = free_ =
false;
2774 template<
classBV>
2784 unsigned charscode =
2786enc.put_prefixed_array_16(scode, bit_idx_arr_.data(), arr_len,
true);
2787compression_stat_[scode]++;
2790encode_bit_digest(block, enc, bit_stat_.d0);
2793 template<
classBV>
2799gamma_gap_block(bit_idx_arr_.data(), enc);
2802 template<
classBV>
2811gamma_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2818 template<
classBV>
2827interpolated_gap_array(bit_idx_arr_.data(), arr_len, enc, inverted);
2830encode_bit_digest(block, enc, bit_stat_.d0);
2833 template<
classBV>
2841 bool r= bienc_gap_bit_block_enc(
len, enc);
2844encode_bit_digest(block, enc, bit_stat_.d0);
2856interpolated_encode_gap_block(bit_idx_arr_.data(), enc,
len);
2859 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0);
2862enc.set_pos(enc_pos0);
2869 template<
classBV>
2876 bool r= bienc_gap_bit_block_enc(
len, enc);
2879encode_bit_digest(block, enc, bit_stat_.digest0_);
2882 template<
classBV>
2901enc.put_8((
unsigned char)
head);
2908 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0);
2910 if(enc_size >= raw_size)
2912enc.set_pos(enc_pos0);
2916compression_stat_[scode]++;
2943 template<
classBV>
2947 unsignedsb_flag = 0;
2953 unsigned len= (unsigned)sb_bit_idx_arr_.size();
2956 unsignedmin0 = 0;
unsigneddelta_acc = 0; (void) delta_acc;
2958 unsigned* sb_arr = sb_bit_idx_arr_.data();
2960min0 = (min0 > 1) ? (min0-1) : 0;
2964 unsignedprojected_delta_acc = (
len- 1) * min0;
2965 if(min0 && projected_delta_acc > 250)
2970 BM_ASSERT((max_v1 + delta_acc) == max_v0);
2987 else if(sb > 65535)
2996 if(min_v <= 0xFFFFFF)
3003 else if(min_v > 255)
3006 if(max_v_delta > 65535)
3007 if(max_v_delta <= 0xFFFFFF)
3013 else if(max_v_delta > 255)
3031 if(min_v <= 0xFFFFFF)
3044 else if(min_v > 255)
3046bout.
put_16_no((
unsigned short)min_v);
3049bout.
put_bits((
unsigned char)min_v, 8);
3051 if(max_v_delta > 65535)
3053 if(max_v_delta <= 0xFFFFFF)
3058 else if(max_v_delta > 255)
3059bout.
put_16_no((
unsigned short)max_v_delta);
3078bout.
gamma(min0_code);
3107(
unsigned)sb_bit_idx_arr_.size()-2,
3111 unsignedenc_size = (unsigned)(pos1 - pos0); (void) enc_size;
3113compression_stat_[scode]++;
3117 template<
classBV>
3128 unsignedsub_gap_len = 0;
3131 if(sub_stat.blocks[
i])
3141bit_out_type bout(enc);
3144bout.gamma8(sub_gap_len);
3145bout.bic_encode_u16_cm(sub_idx, sub_gap_len, 0, 255);
3147 for(
unsigned i= 0;
i< sub_gap_len; ++
i)
3149 autoblock_idx = sub_idx[
i];
3156bout.put_bits(
head, 1);
3159 for(
unsignedj = 1; j <
len; ++j)
3161bout.gamma8(gap_block[j]);
3171bout.gamma8(tail_delta);
3172bout.bic_encode_u16(&gap_block[2],
len-3, min_v, max_v);
3180 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0); (void) enc_size;
3182compression_stat_[scode]++;
3187 template<
classBV>
3200 unsigneds_cnt, r_cnt;
3204 autosplit_cnt = s_cnt + (r_cnt * 2);
3205 if(split_cnt > bit_stat_.bc)
3213 unsigned charscode =
3221 boolno_gap_EOC = (r_cnt == 0);
3226 unsigneds_g_size = unsigned(enc_pos1_s - enc_pos0);
3237 unsignedenc_size2 = (unsigned)(p2 - p1);
3238 unsignedg_size = (r_cnt == 1) ? 0
3240 booluse_gamma = (g_size <= enc_size2);
3245 autop3_rl0 = enc.
get_pos();
3248!inverted,
true, use_gamma,
false);
3249 autop3_rl1 = enc.
get_pos();
3250 unsignedrl_g_size = unsigned(p3_rl1 - p3_rl0);
3256 for(
unsigned i= 0;
i< r_cnt; ++
i)
3259 BM_ASSERT(
unsigned(arr_rl[
i]) + arr_r[
i] <= 65535);
3261arr_rl[
i] += arr_r[
i];
3267!inverted,
true, use_gamma,
false);
3269 unsignedenc_size3 = (unsigned)(p3 - p2);
3270 if(enc_size3 > g_size)
3279 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0);
3281 if((enc_size >= raw_size) || (enc_size >= bit_stat_.bit_model_d0_size_))
3286 #ifdef BM_DBG_SERIAL 3287enc.
put_8((
unsigned char)0xFF);
3289compression_stat_[scode]++;
3293 template<
classBV>
3300bit_idx_arr_.data(), block, inverted);
3303 unsigned charscode =
3315 boolneed_min_max = min_v && (dr_reduct > (3 * 256) || arr_len < 4);
3329arr_len -= 2; min_v++; max_v--;
3330 arr= &bit_idx_arr_[1];
3334 arr= &bit_idx_arr_[0];
3335min_v = 0; max_v = 65535;
3343 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0);
3345 if(enc_size >= raw_size)
3352 if(bit_stat_.d0 != ~0ull && enc_size > bit_stat_.bit_model_d0_size_)
3359compression_stat_[scode]++;
3367 template<
classBV>
3373 bm::encoderenc_try((
unsigned char*)try_buf_, 65536);
3378 bool r= interpolated_arr_bit_block_v3s(enc_try, block, inverted);
3382s_size = size_t(enc_try_pos1 - enc_try_pos0);
3389 bool r= interpolated_arr_bit_block_v3(enc, block, inverted);
3393 size_tdr_size = size_t(enc_pos1 - enc_pos0);
3394 if(s_size && (s_size < dr_size))
3413encode_bit_digest(block, enc, bit_stat_.d0);
3420bit_idx_arr_.data(), block, inverted);
3423 unsigned charscode =
3435 if(!inverted && min_v <= 0xFF && max_delta <= 0xFF)
3438enc.
put_8((
unsigned char)min_v);
3439enc.
put_8((
unsigned char)max_delta);
3449bout.
bic_encode_u16(&bit_idx_arr_[1], arr_len-2, min_v, max_v);
3453 unsignedenc_size = (unsigned)(enc_pos1 - enc_pos0);
3455 if(enc_size >= raw_size)
3459 if(bit_stat_.d0 != ~0ull && enc_size > bit_stat_.bit_model_d0_size_)
3463compression_stat_[scode]++;
3470encode_bit_digest(block, enc, bit_stat_.d0);
3476 #define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO) \ 3478 enc.put_8(B_1ZERO); \ 3479 else if (nb < 256u) \ 3481 enc.put_8(B_8ZERO); \ 3482 enc.put_8((unsigned char)nb); \ 3484 else if (nb < 65536u) \ 3486 enc.put_8(B_16ZERO); \ 3487 enc.put_16((unsigned short)nb); \ 3489 else if (nb < bm::id_max32) \ 3491 enc.put_8(B_32ZERO); \ 3492 enc.put_32(unsigned(nb)); \ 3496 enc.put_8(B_64ZERO); \ 3501 template<
classBV>
3509 if(bookm.ptr_ && nb_delta >= bookm.nb_range_)
3511 unsigned char* curr = enc.get_pos();
3512 size_tbytes_delta = size_t(curr - bookm.ptr_);
3513 if(bytes_delta > bookm.min_bytes_range_)
3515enc.set_pos(bookm.ptr_);
3516 switch(bookm.bm_type_)
3519bytes_delta -=
sizeof(unsigned);
3520 if(bytes_delta < 0xFFFFFFFF)
3521enc.put_32(
unsigned(bytes_delta));
3525bytes_delta -= (
sizeof(unsigned)-1);
3526 if(bytes_delta < 0xFFFFFF)
3527enc.put_24(
unsigned(bytes_delta));
3530bytes_delta -=
sizeof(
unsignedshort);
3531 if(bytes_delta < 0xFFFF)
3532enc.put_16((
unsigned short)bytes_delta);
3541 if(nb_delta < 0xFF)
3544enc.put_8((
unsigned char) nb_delta);
3547 if(nb_delta < 0xFFFF)
3550enc.put_16((
unsigned short) nb_delta);
3553 if(nb_delta < 0xFFFFFF)
3556enc.put_24(
unsigned(nb_delta));
3559 if(nb_delta < ~0
U)
3562enc.put_32(
unsigned(nb_delta));
3567 if(nb_delta < 0xFFFFFFFFFFFFUL)
3570enc.put_48(nb_delta);
3575enc.put_64(nb_delta);
3587bookm.ptr_ = enc.get_pos() + 1;
3588 switch(bookm.bm_type_)
3610 template<
classBV>
3613 unsigned char*
buf,
size_tbuf_size)
3617 if(allow_stat_reset_)
3618reset_compression_stats();
3622enc_header_pos_ = 0;
3623encode_header(bv, enc);
3627 unsignedi_last = ~0u;
3637 if(i0 < bman.top_block_size())
3644process_bookmark(
i, sb_bookmark, enc);
3654sub_stat.
init((
void*)&bv, i0);
3656 boolis_sparse_sub =
3657bman.is_sparse_sblock(i0, sparse_cutoff_, sub_stat);
3660 if((compression_level_ >= 5) && (ref_vect_ == 0))
3662 if(is_sparse_sub && sub_stat.
bv_count< 65536)
3665bienc_arr_sblock(bv, i0, enc);
3672 unsignedibc = unsigned(sub_max_bc - sub_stat.
bv_count);
3681bienc_gaps_sblock(bv, i0, sub_stat, enc);
3703 const bm::word_t* blk = bman.get_block(i0, j0);
3723enc.
set_pos(enc_header_pos_);
3724enc.
put_8(header_flag_);
3729 if(nb > 1 && nb < 128)
3732 unsigned charc = (
unsignedchar)((1u << 7) | nb);
3759 bm::word_t*** blk_root = bman.top_blocks_root();
3766 const bm::word_t* blk_next = bman.get_block(i0, j0);
3798 boolnb_indexed = sim_model_->bv_blocks.test(
i);
3803 size_typerank = sim_model_->bv_blocks.count_range(0,
i);
3807sim_model_->matr.get(ref_idx_, rank);
3809 switch(mchain.
match)
3817 size_typeplain_idx = ref_vect_->get_row_idx(ridx);
3819enc.
put_32(
unsigned(plain_idx));
3829xor_tmp_product(blk, mchain, i0, j0);
3835 size_typeplain_idx = ref_vect_->get_row_idx(ridx);
3855encode_xor_match_chain(enc, mchain);
3857blk = xor_tmp_block_;
3871encode_gap_block(
BMGAP_PTR(blk), enc);
3881 unsigned charmodel = find_bit_best_encoding(blk, sub_stat,
i);
3891 unsignedbit_idx = 0;
3904encode_bit_array(blk, enc,
false);
3907encode_bit_array(blk, enc,
true);
3910gamma_gap_bit_block(blk, enc);
3913encode_bit_interval(blk, enc, 0);
3916gamma_arr_bit_block(blk, enc,
false);
3919gamma_arr_bit_block(blk, enc,
true);
3922bienc_arr_bit_block(blk, enc,
false);
3925bienc_arr_bit_block(blk, enc,
true);
3928interpolated_arr_bit_block(blk, enc,
false);
3931interpolated_arr_bit_block(blk, enc,
true);
3934interpolated_gap_bit_block(blk, enc);
3937interpolated_gap_bit_block(blk, enc);
3940encode_bit_digest(blk, enc, bit_stat_.d0);
3962enc.
set_pos(enc_header_pos_);
3963enc.
put_8(header_flag_);
4019 template<
classBV>
4021 unsigned char*
buf,
4053 template<
classBV>
4055 unsigned char*
buf,
4085 template<
classBV>
4087 const unsigned char*
buf,
4094 unsigned charheader_flag = dec.
get_8();
4101 if(bo_current == bo)
4148 template<
classBV>
4150 const unsigned char*
buf,
4151 typenameBV::size_type from,
4152 typenameBV::size_type to,
4158 unsigned charheader_flag = dec.
get_8();
4165 if(bo_current == bo)
4196bv.keep_range(from, to);
4200 template<
typenameDEC,
typenameBLOCK_IDX>
4203 unsignedblock_type,
4229 if(k == 0) --bit_idx;
4232dst_arr[k] = bit_idx;
4245dst_arr[
len-1] = max_v;
4253min_v = (
len& 1) ?
4256max_v = (
len& (1<<1)) ?
4265dst_arr[
len-1] = max_v;
4272 throwstd::logic_error(err_msg());
4274BM_THROW(BM_ERR_SERIALFORMAT);
4280 template<
typenameDEC,
typenameBLOCK_IDX>
4284 unsignedblock_type)
4293min_v = dec.get_16();
4294max_v = dec.get_16();
4297min_v = dec.get_8();
4298max_delta = dec.get_8();
4305 unsigneds_cnt, r_cnt;
4326bin.
decode_array(arr_rl, this->tb_wflags_, &s_cnt, r_cnt);
4337 for(
unsigned i= 0;
i< r_cnt; ++
i)
4340arr_rl[
i] -= arr_r[
i];
4347 #ifdef BM_DBG_SERIAL 4348 unsigned charcontrol = dec.get_8();
4349 if(control != 0xFF)
4351std::cerr <<
"SERIALIZATION INTEGRITY 2!"<< std::endl;
4353 throwstd::logic_error(err_msg());
4355BM_THROW(BM_ERR_SERIALFORMAT);
4371 boolneed_min_max = bin.
get_bits(1);
4387min_v = 0; max_v = 65535;
4406arr_len = dec.get_16();
4420 template<
typenameDEC,
typenameBLOCK_IDX>
4423 unsignedblock_type,
4427 unsigned len(0), sb_flag(0);
4435sb_flag = dec.get_8();
4438*sb_idx = dec.get_32();
4440*sb_idx = dec.get_16();
4442*sb_idx = dec.get_8();
4452 throwstd::logic_error(err_msg());
4454BM_THROW(BM_ERR_SERIALFORMAT);
4460min_v = dec.get_32();
4462min_v = dec.get_24();
4464min_v = dec.get_16();
4466min_v = dec.get_8();
4471max_v = dec.get_32();
4473max_v = dec.get_24();
4475max_v = dec.get_16();
4477max_v = dec.get_8();
4483 booluse_gamma = bin.
get_bit();
4485min0 = bin.
gamma();
4490dst_arr[
len-1] = max_v;
4517 unsignedj = bin.
gamma();
4519min_v = (j * 65536) + nbit;
4546 automin0_code = bin.
gamma();
4550min0 = bin.
gamma();
4564*sb_idx = bin.
gamma()-1;
4573dst_arr[
len-1] = max_v;
4587 throwstd::logic_error(err_msg());
4589BM_THROW(BM_ERR_SERIALFORMAT);
4596 template<
typenameDEC,
typenameBLOCK_IDX>
4604this->read_bic_arr(
decoder, blk, block_type);
4608 template<
typenameDEC,
typenameBLOCK_IDX>
4615 unsignedarr_len = dec.get_16();
4619id_array_[0] =
head;
4620id_array_[1] = min_v;
4621id_array_[arr_len] = 65535;
4631 template<
typenameDEC,
typenameBLOCK_IDX>
4659block[off+j+0] |= dec.get_32();
4660block[off+j+1] |= dec.get_32();
4661block[off+j+2] |= dec.get_32();
4662block[off+j+3] |= dec.get_32();
4671 template<
typenameDEC,
typenameBLOCK_IDX>
4679 unsigned charrun_type = dec.get_8();
4682 unsignedrun_length = dec.get_16();
4685 unsignedrun_end = j + run_length;
4687 for(;j < run_end; ++j)
4689 unsignedw = dec.get_32();
4702 template<
typenameBIN>
4722 template<
typenameBIN>
4734max_v <<= 3; max_v |= (head_v3 & 0b111);
4744 template<
typenameDEC,
typenameBLOCK_IDX>
4747 unsignedblock_type,
4757*dst_block = gap_head;
4788 unsignedarr_len = read_id_list(
decoder, block_type, id_array_);
4798 unsigned len= (gap_head >> 3);
4801*dst_block = gap_head;
4807 for(
unsigned i= 1;
i<
len; ++
i)
4811*(++gap_data_ptr) = gap_sum;
4818 unsigned len= (gap_head >> 3);
4819*dst_block = gap_head;
4821dst_block[1] = min_v;
4830 unsigned len= (gap_head >> 3);
4844dst_block[0] = gap_head;
4845dst_block[1] = min_v;
4848dst_block[
len-1] = max_v;
4861 unsigned len= (gap_head >> 3);
4870dst_block[0] = gap_head;
4871dst_block[1] = min_v;
4873dst_block[
len-1] = max_v;
4886 unsigned charhead_v3 = (
unsignedchar) bin.
get_bits(8);
4888 unsigned len= (gap_head >> 3);
4891 unsignedex0_cnt{0};
4899 for(
unsignedk = 2; k <
len; ++k)
4903 BM_ASSERT(dst_block[k] > dst_block[k-1]);
4912dst_block[1] = min_v;
4915dst_block[
len-1] = max_v+1;
4921bin.
decode_array(&ex0_arr[0], this->tb_wflags_, &ex0_cnt);
4924 for(
unsignedk = 0; k < ex0_cnt; ++k)
4931bin.
decode_array(&ex0_arr[0], this->tb_wflags_, &ex0_cnt);
4934 for(
unsignedk = 0; k < ex0_cnt; ++k)
4944dst_block[1] = min_v;
4947dst_block[
len-1] = max_v+1;
4954 #ifdef BM_DBG_SERIAL 4956 if(control != 0xFF)
4958std::cerr <<
"SERIALIZATION INTEGRITY!"<< std::endl;
4960 throwstd::logic_error(err_msg());
4962BM_THROW(BM_ERR_SERIALFORMAT);
4970 if(
cnt!= cnt_saved)
4972std::cerr <<
"SERIALIZATION INTEGRITY!"<< std::endl;
4974 throwstd::logic_error(err_msg());
4976BM_THROW(BM_ERR_SERIALFORMAT);
4990 unsignedstart_flag = bin.
get_bit();
4991 unsigneduse_gamma = bin.
get_bit();
4999 for(
unsigned i= 2;
i<
len; ++
i)
5004 prev= dst_block[
i];
5009 for(
unsigned i= 1;
i<
len; ++
i)
5012 BM_ASSERT(
i==1 || (dst_block[
i-1] < dst_block[
i]));
5022 throwstd::logic_error(err_msg());
5024BM_THROW(BM_ERR_SERIALFORMAT);
5036 template<
typenameDEC,
typenameBLOCK_IDX>
5047 if(save_pos > skip_pos_)
5096 if(nb_sync <= expect_nb)
5110 template<
classBV,
classDEC>
5133 template<
classBV,
classDEC>
5136alloc_.free_bit_block(temp_block_);
5138alloc_.free_bit_block(xor_block_, 2);
5146 template<
classBV,
classDEC>
5150 if(ref_vect_ && !xor_block_)
5151xor_block_ = alloc_.alloc_bit_block(2);
5154 template<
classBV,
classDEC>
5166 boolinv_flag =
false;
5169bman.reserve_top_blocks(i0+1);
5170bman.check_alloc_top_subblock(i0);
5179(
sizeof(
gap_word_t) == 2 ? dec.get_16() : dec.get_32());
5186*gap_temp_block = gap_head;
5187dec.get_16(gap_temp_block+1,
len- 1);
5192blk = bman.get_allocator().alloc_bit_block();
5193bman.set_block(nb, blk);
5200bv.combine_operation_block_or(i0, j0, blk, temp_block_);
5211bman.get_allocator().alloc_gap_block(
unsigned(level), bman.glen());
5213*gap_blk_ptr = gap_head;
5219dec.get_16(gap_blk + 1,
len- 1);
5225*gap_temp_block = gap_head;
5226dec.get_16(gap_temp_block + 1,
len- 1);
5243 unsignedarr_len = this->read_id_list(dec, btype, this->id_array_);
5244gap_temp_block[0] = 0;
5255bv.combine_operation_block_or(i0, j0, blk, temp_block_);
5261gap_head = dec.get_16();
5268this->read_gap_block(dec, btype, gap_temp_block, gap_head);
5273gap_head = dec.get_16();
5274this->read_gap_block(dec, btype, gap_temp_block, gap_head);
5278this->read_gap_block(dec, btype, gap_temp_block, gap_head);
5282this->read_gap_block(dec, btype, gap_temp_block, gap_head);
5286this->read_gap_block(dec, btype, gap_temp_block, gap_head);
5291 throwstd::logic_error(this->err_msg());
5293BM_THROW(BM_ERR_SERIALFORMAT);
5301blk = bman.get_allocator().alloc_bit_block();
5303bman.set_block_ptr(i0, j0, blk);
5308bv.combine_operation_block_or(i0, j0, blk, temp_block_);
5315bv.combine_operation_block_or(i0, j0, blk, tmp_blk);
5319 template<
classBV,
classDEC>
5328blk = bman.get_allocator().alloc_bit_block();
5329bman.set_block(nb, blk);
5334blk = bman.deoptimize_block(nb);
5342blk = bman.deoptimize_block(nb);
5346 for(
unsignedk = 0; k <
len; ++k)
5358this->read_bic_arr(dec, blk, btype);
5365blk = bman.deoptimize_block(nb);
5368this->read_bic_arr(dec, temp_block_, btype);
5373this->read_bic_gap(dec, blk);
5376this->read_digest0_block(dec, blk);
5381 throwstd::logic_error(this->err_msg());
5383BM_THROW(BM_ERR_SERIALFORMAT);
5388 template<
classBV,
classDEC>
5394 unsigned*
arr= this->sb_id_array_;
5396 unsigned len= this->read_bic_sb_arr(dec, btype,
arr, &sb);
5399 typenameBV::size_type from = sb * sb_max_bc;
5402 for(
typenameBV::size_type
i= 0;
i<
len; ++
i)
5404 typenameBV::size_type idx = from +
arr[
i];
5407 if(idx < idx_from_)
5409bv.set_bit_no_check(idx);
5414 for(
typenameBV::size_type
i= 0;
i<
len; ++
i)
5416 typenameBV::size_type idx = from +
arr[
i];
5417bv.set_bit_no_check(idx);
5423 template<
classBV,
classDEC>
5432blk = bman.get_allocator().alloc_bit_block();
5433bman.set_block(nb, blk);
5439bv.combine_operation_with_block(nb, temp_block_, 0,
BM_OR);
5443 template<
classBV,
classDEC>
5449 unsignedhead_idx = dec.get_16();
5450 unsignedtail_idx = dec.get_16();
5454blk = bman.get_allocator().alloc_bit_block();
5455bman.set_block(nb, blk);
5456 for(
unsignedk = 0; k < head_idx; ++k)
5458dec.get_32(blk + head_idx, tail_idx - head_idx + 1);
5465dec.get_32(temp_block_ + head_idx, tail_idx - head_idx + 1);
5466bv.combine_operation_with_block(nb, temp_block_, 0,
BM_OR);
5470 template<
classBV,
classDEC>
5480blk = bman.deoptimize_block(nb);
5486blk = bman.get_allocator().alloc_bit_block();
5488bman.set_block(nb, blk);
5493 for(
unsignedk = 0; k <
len; ++k)
5499 for(
unsignedk = 0; k <
len; ++k)
5508 template<
classBV,
classDEC>
5510 const unsigned char*
buf,
5514 if(!bman.is_init())
5516 autobc = bman.compute_top_block_size(
bm::id_max-1);
5522 typenamebvector_type::mem_pool_guard mp_guard_bv;
5523mp_guard_bv.assign_if_not_set(pool_, bv);
5530 unsigned charheader_flag = dec.get_8();
5540 throwstd::logic_error(this->err_msg());
5542BM_THROW(BM_ERR_SERIALFORMAT);
5559bv_size = dec.get_32();
5560 if(bv_size > bv.size())
5563 for(
unsigned cnt= dec.get_32();
cnt; --
cnt)
5569 returndec.size()-1;
5589 throwstd::logic_error(this->err_msg());
5591BM_THROW(BM_ERR_SERIALFORMAT);
5596bv_size = dec.get_32();
5597 if(bv_size > bv.size())
5604xor_block_ = alloc_.alloc_bit_block();
5615 unsignedrow_idx(0);
5619 unsigned charbtype;
5623 size_tdec_last_size = dec.size();
5624std::cout <<
"size="<< dec_last_size;
5636btype = dec.get_8();
5637 if(btype & (1 << 7))
5639nb = btype & ~(1 << 7);
5645 bm::word_t* blk = bman.get_block_ptr(i0, j0);
5647 autodec_size = dec.size();
5648std::cout <<
"_sz="<< (dec_size - dec_last_size);
5649std::cout <<
" ["<< unsigned(btype) <<
", "<< nb_i <<
"]"<< std::flush;
5650dec_last_size = dec_size;
5684 throwstd::logic_error(this->err_msg());
5686BM_THROW(BM_ERR_SERIALFORMAT);
5695bman.set_block_all_set(nb_i);
5698full_blocks = dec.get_8();
5699 gotoprocess_full_blocks;
5702full_blocks = dec.get_16();
5703 gotoprocess_full_blocks;
5706full_blocks = dec.get_32();
5707 gotoprocess_full_blocks;
5711full_blocks = dec.get_64();
5712 gotoprocess_full_blocks;
5717 throwstd::logic_error(this->err_msg());
5719BM_THROW(BM_ERR_SERIALFORMAT);
5722process_full_blocks:
5727bv.set_range(from, to-1);
5728nb_i += full_blocks-1;
5732decode_block_bit(dec, bv, nb_i, blk);
5738bv.set_bit_no_check(bit_idx);
5744this->read_0runs_block(dec, temp_block);
5745bv.combine_operation_with_block(nb_i, temp_block, 0,
BM_OR);
5749decode_block_bit_interval(dec, bv, nb_i, blk);
5767deserialize_gap(btype, dec, bv, bman, nb_i, blk);
5770decode_arrbit(dec, bv, nb_i, blk);
5781decode_bit_block(btype, dec, bman, nb_i, blk);
5784decode_bit_block(btype, dec, bman, nb_i, blk);
5790decode_arr_sblock(btype, dec, bv);
5801this->bookmark_idx_ = nb_i;
5802this->skip_offset_ = dec.get_32();
5803 gotoprocess_bookmark;
5805this->bookmark_idx_ = nb_i;
5806this->skip_offset_ = dec.get_24();
5807 gotoprocess_bookmark;
5809this->bookmark_idx_ = nb_i;
5810this->skip_offset_ = dec.get_16();
5814this->skip_pos_ = dec.get_pos() + this->skip_offset_;
5816nb_from = this->try_skip(dec, nb_i, nb_from);
5823nb_sync = dec.get_8();
5824 gotoprocess_nb_sync;
5826nb_sync = dec.get_16();
5827 gotoprocess_nb_sync;
5829nb_sync = dec.get_24();
5830 gotoprocess_nb_sync;
5832nb_sync = dec.get_32();
5833 gotoprocess_nb_sync;
5836 gotoprocess_nb_sync;
5840 BM_ASSERT(nb_i == this->bookmark_idx_ + nb_sync);
5841 if(nb_i != this->bookmark_idx_ + nb_sync)
5844 throwstd::logic_error(this->err_msg());
5846BM_THROW(BM_ERR_SERIALFORMAT);
5859row_idx = dec.get_32();
5860 size_typeidx = ref_vect_->find(row_idx);
5861 if(idx == ref_vect_->not_found())
5870 const bm::word_t* ref_blk = ref_bman.get_block_ptr(i0, j0);
5872bv.combine_operation_with_block(nb_i, ref_blk,
5880row_idx = dec.get_8();
5886row_idx = dec.get_16();
5893row_idx = dec.get_32();
5896x_ref_d64_ = dec.get_64();
5901x_ref_idx_ = ref_vect_->find(row_idx);
5903 if(x_ref_idx_ == ref_vect_->not_found())
5911or_block_ = bman.deoptimize_block(nb_i);
5912bman.set_block_ptr(nb_i, 0);
5913or_block_idx_ = nb_i;
5920row_idx = dec.get_8();
5922 gotoprocess_xor_ref;
5926row_idx = dec.get_16();
5928 gotoprocess_xor_ref;
5932row_idx = dec.get_32();
5934 gotoprocess_xor_ref;
5939 unsigned charvbr_flag = dec.get_8();
5942 case1: row_idx = dec.get_8();
break;
5943 case2: row_idx = dec.get_16();
break;
5944 case0: row_idx = dec.get_32();
break;
5947 bm::id64_tacc64 = x_ref_d64_ = dec.get_h64(); (void) acc64;
5949xor_chain_size_ = dec.get_8();
5951 for(
unsignedci = 0; ci < xor_chain_size_; ++ci)
5955 case1: xor_chain_[ci].ref_idx = dec.get_8();
break;
5956 case2: xor_chain_[ci].ref_idx = dec.get_16();
break;
5957 case0: xor_chain_[ci].ref_idx = dec.get_32();
break;
5960xor_chain_[ci].xor_d64 = dec.get_h64();
5962 BM_ASSERT((xor_chain_[ci].xor_d64 & acc64) == 0);
5963acc64 |= xor_chain_[ci].xor_d64;
5966 gotoprocess_xor_ref;
5972 throwstd::logic_error(this->err_msg());
5974BM_THROW(BM_ERR_SERIALFORMAT);
5986bv.set_new_blocks_strat(strat);
5988bman.shrink_top_blocks();
5995 template<
classBV,
classDEC>
6002 for(
unsignedci = 0; ci < xor_chain_size_; ++ci)
6004 unsignedref_idx = (unsigned)ref_vect_->find(xor_chain_[ci].ref_idx);
6014ref_blk = xor_block_;
6026 template<
classBV,
classDEC>
6035 const bvector_type* ref_bv = ref_vect_->get_bv(x_ref_idx_);
6040ref_blk = ref_bman.get_block_ptr(i0, j0);
6042 BM_ASSERT(!or_block_ || or_block_idx_ == x_nb_);
6048 bm::word_t* blk = bman.deoptimize_block(i0, j0,
true);
6052alloc_.free_bit_block(or_block_);
6056bman.set_block_ptr(x_nb_, or_block_);
6058or_block_ = 0; or_block_idx_ = 0;
6061 if(xor_chain_size_)
6063 bm::word_t* blk = bman.deoptimize_block(i0, j0,
true);
6064xor_decode_chain(blk);
6075 bm::word_t* blk = bman.get_block_ptr(i0, j0);
6077 if(
BM_IS_GAP(blk) && (x_ref_d64_==~0ULL) && !or_block_)
6088bman.assign_gap_check(i0, j0, res, ++res_len, blk, tmp_buf);
6095ref_blk = xor_block_;
6103 bm::word_t* blk = bman.deoptimize_block(i0, j0,
true);
6108 if(xor_chain_size_)
6109xor_decode_chain(blk);
6121alloc_.free_bit_block(or_block_);
6134 if(nb_from == x_nb_ || nb_to == x_nb_)
6137bman.optimize_bit_block_nocheck(i0, j0);
6142 template<
classBV,
classDEC>
6145x_ref_idx_ = 0; x_ref_d64_ = 0; xor_chain_size_ = 0;
6151 template<
typenameDEC,
typenameBLOCK_IDX>
6155end_of_stream_(
false),
6244 throwstd::bad_alloc();
6246BM_THROW(BM_ERR_BADALLOC);
6253 if(!this->ex0_arr_ || !this->ex1_arr_)
6256 throwstd::bad_alloc();
6258BM_THROW(BM_ERR_BADALLOC);
6265 template<
typenameDEC,
typenameBLOCK_IDX>
6277 template<
typenameDEC,
typenameBLOCK_IDX>
6293end_of_stream_ =
true;
6297last_id_ = decoder_.get_32();
6304end_of_stream_ =
true;
6309block_type_ = decoder_.get_8();
6313 if(block_type_ & (1u << 7u))
6315mono_block_cnt_ = (block_type_ & ~(1u << 7u)) - 1;
6316state_ = e_zero_blocks;
6320 switch(block_type_)
6324end_of_stream_ =
true; state_ = e_unknown;
6327state_ = e_zero_blocks;
6328mono_block_cnt_ = 0;
6331state_ = e_zero_blocks;
6332mono_block_cnt_ = decoder_.get_8()-1;
6335state_ = e_zero_blocks;
6336mono_block_cnt_ = decoder_.get_16()-1;
6339state_ = e_zero_blocks;
6340mono_block_cnt_ = decoder_.get_32()-1;
6343state_ = e_one_blocks;
6347state_ = e_one_blocks;
6348mono_block_cnt_ = 0;
6351state_ = e_one_blocks;
6352mono_block_cnt_ = decoder_.get_8()-1;
6355state_ = e_one_blocks;
6356mono_block_cnt_ = decoder_.get_16()-1;
6359state_ = e_one_blocks;
6360mono_block_cnt_ = decoder_.get_32()-1;
6363state_ = e_one_blocks;
6381state_ = e_bit_block;
6390gap_head_ = decoder_.get_16();
6409state_ = e_gap_block;
6418state_ = e_gap_block;
6421state_ = e_gap_block;
6427this->bookmark_idx_ = block_idx_;
6428this->skip_offset_ = decoder_.get_32();
6429this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
6432this->bookmark_idx_ = block_idx_;
6433this->skip_offset_ = decoder_.get_24();
6434this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
6437this->bookmark_idx_ = block_idx_;
6438this->skip_offset_ = decoder_.get_16();
6439this->skip_pos_ = decoder_.get_pos() + this->skip_offset_;
6443nb_sync = decoder_.get_8();
6444 gotoprocess_nb_sync;
6446nb_sync = decoder_.get_16();
6447 gotoprocess_nb_sync;
6449nb_sync = decoder_.get_24();
6450 gotoprocess_nb_sync;
6452nb_sync = decoder_.get_32();
6453 gotoprocess_nb_sync;
6456 gotoprocess_nb_sync;
6460 BM_ASSERT(block_idx_ == this->bookmark_idx_ + nb_sync);
6461 if(block_idx_ != this->bookmark_idx_ + nb_sync)
6464 throwstd::logic_error(this->err_msg());
6466BM_THROW(BM_ERR_SERIALFORMAT);
6494 throwstd::logic_error(this->err_msg());
6496BM_THROW(BM_ERR_SERIALFORMAT);
6505 if(!mono_block_cnt_)
6517 throwstd::logic_error(this->err_msg());
6519BM_THROW(BM_ERR_SERIALFORMAT);
6524 template<
typenameDEC,
typenameBLOCK_IDX>
6528 BM_ASSERT(state_ == e_zero_blocks || state_ == e_one_blocks);
6529 if(!mono_block_cnt_)
6533block_idx_ += mono_block_cnt_+1;
6534mono_block_cnt_ = 0;
6540 template<
typenameDEC,
typenameBLOCK_IDX>
6548 for(
unsignedk = 0; k <
len; ++k)
6556 for(
unsignedk = 0; k <
len; ++k)
6562 template<
typenameDEC,
typenameBLOCK_IDX>
6570 BM_ASSERT(this->state_ == e_bit_block);
6573 switch(this->block_type_)
6582 unsigned charrun_type = decoder_.get_8();
6585 unsignedrun_length = decoder_.get_16();
6588decoder_.get_32(dst_block ? dst_block + j : dst_block, run_length);
6596 unsignedhead_idx = decoder_.get_16();
6597 unsignedtail_idx = decoder_.get_16();
6600 for(
unsigned i= 0;
i< head_idx; ++
i)
6602decoder_.get_32(dst_block + head_idx,
6603tail_idx - head_idx + 1);
6609 intpos =
int(tail_idx - head_idx) + 1;
6617get_arr_bit(dst_block,
true);
6622 throwstd::logic_error(this->err_msg());
6624BM_THROW(BM_ERR_SERIALFORMAT);
6628get_inv_arr(dst_block);
6636this->read_bic_arr(decoder_, dst_block, this->block_type_);
6641this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
6648this->read_bic_gap(decoder_, dst_block);
6653this->read_digest0_block(decoder_, dst_block);
6658 throwstd::logic_error(this->err_msg());
6660BM_THROW(BM_ERR_SERIALFORMAT);
6666 template<
typenameDEC,
typenameBLOCK_IDX>
6672 BM_ASSERT(this->state_ == e_bit_block);
6674 switch(block_type_)
6681 unsignedhead_idx = decoder_.get_16();
6682 unsignedtail_idx = decoder_.get_16();
6683 for(
unsigned i= head_idx;
i<= tail_idx; ++
i)
6684dst_block[
i] |= decoder_.get_32();
6689 unsigned charrun_type = decoder_.get_8();
6692 unsignedrun_length = decoder_.get_16();
6695 unsignedrun_end = j + run_length;
6696 for(;j < run_end; ++j)
6699dst_block[j] |= decoder_.get_32();
6711get_arr_bit(dst_block,
false);
6714get_inv_arr(tmp_block);
6721this->read_bic_arr(decoder_, dst_block, this->block_type_);
6726this->read_bic_arr_inv(decoder_, tmp_block, this->block_type_);
6730this->read_bic_gap(decoder_, dst_block);
6733this->read_digest0_block(decoder_, dst_block);
6738 throwstd::logic_error(this->err_msg());
6740BM_THROW(BM_ERR_SERIALFORMAT);
6746 template<
typenameDEC,
typenameBLOCK_IDX>
6752 BM_ASSERT(this->state_ == e_bit_block);
6755 switch(block_type_)
6762 unsigned charrun_type = decoder_.get_8();
6765 unsignedrun_length = decoder_.get_16();
6767 unsignedrun_end = j + run_length;
6770 for(;j < run_end; ++j)
6773dst_block[j] &= decoder_.get_32();
6778 for(;j < run_end; ++j)
6789 unsignedhead_idx = decoder_.get_16();
6790 unsignedtail_idx = decoder_.get_16();
6792 for(
i= 0;
i< head_idx; ++
i)
6794 for(
i= head_idx;
i<= tail_idx; ++
i)
6795dst_block[
i] &= decoder_.get_32();
6802get_arr_bit(tmp_block,
true);
6807get_inv_arr(tmp_block);
6818this->read_bic_arr(decoder_, tmp_block, block_type_);
6822this->read_bic_arr(decoder_, 0, block_type_);
6827this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
6836this->read_bic_gap(decoder_, tmp_block);
6840this->read_bic_gap(decoder_, 0);
6847this->read_digest0_block(decoder_, tmp_block);
6851this->read_digest0_block(decoder_, 0);
6856 throwstd::logic_error(this->err_msg());
6858BM_THROW(BM_ERR_SERIALFORMAT);
6864 template<
typenameDEC,
typenameBLOCK_IDX>
6870 BM_ASSERT(this->state_ == e_bit_block);
6874 switch(block_type_)
6878dst_block[
i] ^= decoder_.get_32();
6882 unsigned charrun_type = decoder_.get_8();
6885 unsignedrun_length = decoder_.get_16();
6888 unsignedrun_end = j + run_length;
6889 for(;j < run_end; ++j)
6892dst_block[j] ^= decoder_.get_32();
6904 unsignedhead_idx = decoder_.get_16();
6905 unsignedtail_idx = decoder_.get_16();
6906 for(
unsigned i= head_idx;
i<= tail_idx; ++
i)
6907dst_block[
i] ^= decoder_.get_32();
6913get_arr_bit(tmp_block,
true);
6918get_inv_arr(tmp_block);
6927this->read_bic_arr(decoder_, tmp_block, block_type_);
6934this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
6946this->read_bic_gap(decoder_, tmp_block);
6950this->read_bic_gap(decoder_, 0);
6957this->read_digest0_block(decoder_, tmp_block);
6961this->read_digest0_block(decoder_, 0);
6966 throwstd::logic_error(this->err_msg());
6968BM_THROW(BM_ERR_SERIALFORMAT);
6974 template<
typenameDEC,
typenameBLOCK_IDX>
6980 BM_ASSERT(this->state_ == e_bit_block);
6984 switch(block_type_)
6988dst_block[
i] &= ~decoder_.get_32();
6992 unsigned charrun_type = decoder_.get_8();
6995 unsignedrun_length = decoder_.get_16();
6998 unsignedrun_end = j + run_length;
6999 for(;j < run_end; ++j)
7002dst_block[j] &= ~decoder_.get_32();
7014 unsignedhead_idx = decoder_.get_16();
7015 unsignedtail_idx = decoder_.get_16();
7016 for(
unsigned i= head_idx;
i<= tail_idx; ++
i)
7017dst_block[
i] &= ~decoder_.get_32();
7023get_arr_bit(tmp_block,
true);
7028get_inv_arr(tmp_block);
7037this->read_bic_arr(decoder_, tmp_block, block_type_);
7044this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
7053this->read_bic_gap(decoder_, tmp_block);
7057this->read_bic_gap(decoder_, 0);
7064this->read_digest0_block(decoder_, tmp_block);
7068this->read_digest0_block(decoder_, 0);
7073 throwstd::logic_error(this->err_msg());
7075BM_THROW(BM_ERR_SERIALFORMAT);
7082 template<
typenameDEC,
typenameBLOCK_IDX>
7088 BM_ASSERT(this->state_ == e_bit_block);
7091 switch(block_type_)
7100 unsigned charrun_type = decoder_.get_8();
7103 unsignedrun_length = decoder_.get_16();
7106 unsignedrun_end = j + run_length;
7107 for(;j < run_end; ++j)
7121 unsignedhead_idx = decoder_.get_16();
7122 unsignedtail_idx = decoder_.get_16();
7123 for(
unsigned i= head_idx;
i<= tail_idx; ++
i)
7128 count+= get_arr_bit(0);
7135get_inv_arr(tmp_block);
7142this->read_bic_arr(decoder_, tmp_block, block_type_);
7147this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
7151this->read_digest0_block(decoder_, tmp_block);
7155this->read_bic_gap(decoder_, tmp_block);
7162 throwstd::logic_error(this->err_msg());
7164BM_THROW(BM_ERR_SERIALFORMAT);
7171 template<
typenameDEC,
typenameBLOCK_IDX>
7177 BM_ASSERT(this->state_ == e_bit_block);
7185 switch(block_type_)
7192 unsigned charrun_type = decoder_.get_8();
7195 unsignedrun_length = decoder_.get_16();
7198 unsignedrun_end = j + run_length;
7199 for(;j < run_end; ++j)
7214 unsignedhead_idx = decoder_.get_16();
7215 unsignedtail_idx = decoder_.get_16();
7216 for(
unsigned i= head_idx;
i<= tail_idx; ++
i)
7227get_inv_arr(tmp_block);
7233this->read_bic_arr(decoder_, tmp_block, block_type_);
7238this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
7241this->read_bic_gap(decoder_, tmp_block);
7244this->read_digest0_block(decoder_, 0);
7249 throwstd::logic_error(this->err_msg());
7251BM_THROW(BM_ERR_SERIALFORMAT);
7259 template<
typenameDEC,
typenameBLOCK_IDX>
7265 BM_ASSERT(this->state_ == e_bit_block);
7269 switch(block_type_)
7278 unsigned charrun_type = decoder_.get_8();
7281 unsignedrun_length = decoder_.get_16();
7284 unsignedrun_end = j + run_length;
7285 for(;j < run_end; ++j)
7299 unsignedhead_idx = decoder_.get_16();
7300 unsignedtail_idx = decoder_.get_16();
7301 for(
unsigned i= head_idx;
i<= tail_idx; ++
i)
7308get_arr_bit(tmp_block,
true);
7312get_inv_arr(tmp_block);
7320this->read_bic_arr(decoder_, tmp_block, block_type_);
7325this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
7329this->read_digest0_block(decoder_, tmp_block);
7333this->read_bic_gap(decoder_, tmp_block);
7340 throwstd::logic_error(this->err_msg());
7342BM_THROW(BM_ERR_SERIALFORMAT);
7349 template<
typenameDEC,
typenameBLOCK_IDX>
7355 BM_ASSERT(this->state_ == e_bit_block);
7359 switch(block_type_)
7376 unsigned charrun_type = decoder_.get_8();
7379 unsignedrun_length = decoder_.get_16();
7380 unsignedrun_end = j + run_length;
7383 for(;j < run_end; ++j)
7391 for(;j < run_end; ++j)
7402 unsignedhead_idx = decoder_.get_16();
7403 unsignedtail_idx = decoder_.get_16();
7406 for(
i= 0;
i< head_idx; ++
i)
7408 for(
i= head_idx;
i<= tail_idx; ++
i)
7417get_arr_bit(tmp_block,
true);
7420get_inv_arr(tmp_block);
7427this->read_bic_arr(decoder_, tmp_block, block_type_);
7432this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
7436this->read_digest0_block(decoder_, tmp_block);
7440this->read_bic_gap(decoder_, tmp_block);
7446 throwstd::logic_error(this->err_msg());
7448BM_THROW(BM_ERR_SERIALFORMAT);
7452 returncount_adapter.
sum();
7455 template<
typenameDEC,
typenameBLOCK_IDX>
7461 BM_ASSERT(this->state_ == e_bit_block);
7465 switch(block_type_)
7482 unsigned charrun_type = decoder_.get_8();
7485 unsignedrun_length = decoder_.get_16();
7486 unsignedrun_end = j + run_length;
7489 for(;j < run_end; ++j)
7497 for(;j < run_end; ++j)
7508 unsignedhead_idx = decoder_.get_16();
7509 unsignedtail_idx = decoder_.get_16();
7512 for(
i= 0;
i< head_idx; ++
i)
7514 for(
i= head_idx;
i<= tail_idx; ++
i)
7523get_arr_bit(tmp_block,
true);
7526get_inv_arr(tmp_block);
7533this->read_bic_arr(decoder_, tmp_block, block_type_);
7538this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
7543this->read_digest0_block(decoder_, tmp_block);
7547this->read_bic_gap(decoder_, tmp_block);
7553 throwstd::logic_error(this->err_msg());
7555BM_THROW(BM_ERR_SERIALFORMAT);
7559 returncount_adapter.
sum();
7562 template<
typenameDEC,
typenameBLOCK_IDX>
7568 BM_ASSERT(this->state_ == e_bit_block);
7572 switch(block_type_)
7589 unsigned charrun_type = decoder_.get_8();
7592 unsignedrun_length = decoder_.get_16();
7593 unsignedrun_end = j + run_length;
7596 for(;j < run_end; ++j)
7604 for(;j < run_end; ++j)
7615 unsignedhead_idx = decoder_.get_16();
7616 unsignedtail_idx = decoder_.get_16();
7619 for(
i= 0;
i< head_idx; ++
i)
7621 for(
i= head_idx;
i<= tail_idx; ++
i)
7631get_arr_bit(tmp_block,
true);
7634get_inv_arr(tmp_block);
7641this->read_bic_arr(decoder_, tmp_block, block_type_);
7646this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
7650this->read_digest0_block(decoder_, tmp_block);
7654this->read_bic_gap(decoder_, tmp_block);
7660 throwstd::logic_error(this->err_msg());
7662BM_THROW(BM_ERR_SERIALFORMAT);
7666 returncount_adapter.
sum();
7669 template<
typenameDEC,
typenameBLOCK_IDX>
7675 BM_ASSERT(this->state_ == e_bit_block);
7679 switch(block_type_)
7696 unsigned charrun_type = decoder_.get_8();
7699 unsignedrun_length = decoder_.get_16();
7700 unsignedrun_end = j + run_length;
7703 for(;j < run_end; ++j)
7718 unsignedhead_idx = decoder_.get_16();
7719 unsignedtail_idx = decoder_.get_16();
7722 for(
i= head_idx;
i<= tail_idx; ++
i)
7730get_arr_bit(tmp_block,
true);
7733get_inv_arr(tmp_block);
7740this->read_bic_arr(decoder_, tmp_block, block_type_);
7745this->read_bic_arr_inv(decoder_, tmp_block, block_type_);
7749this->read_digest0_block(decoder_, tmp_block);
7753this->read_bic_gap(decoder_, tmp_block);
7759 throwstd::logic_error(this->err_msg());
7761BM_THROW(BM_ERR_SERIALFORMAT);
7764 returncount_adapter.
sum();
7769 template<
typenameDEC,
typenameBLOCK_IDX>
7790 for(
unsignedk = 0; k <
len; ++k)
7802decoder_.seek(
len* 2);
7807 template<
typenameDEC,
typenameBLOCK_IDX>
7811++(this->block_idx_);
7812this->state_ = e_blocks;
7814 returndecoder_.get_16();
7817 template<
typenameDEC,
typenameBLOCK_IDX>
7821 BM_ASSERT(this->state_ == e_gap_block ||
7825this->read_gap_block(decoder_, block_type_, dst_block, gap_head_);
7827++(this->block_idx_);
7828this->state_ = e_blocks;
7832 template<
typenameDEC,
typenameBLOCK_IDX>
7839 BM_ASSERT(this->state_ == e_bit_block);
7841get_bit_func_type bit_func = bit_func_table_[op];
7843 unsigned cnt= ((*this).*(bit_func))(dst_block, tmp_block);
7844this->state_ = e_blocks;
7845++(this->block_idx_);
7854 template<
classBV>
7856: temp_block_(0), ref_vect_(0)
7861 template<
classBV>
7865alloc_.free_bit_block(temp_block_);
7872 template<
classBV>
7877 typenameBV::size_type
count= 0;
7881bv.bit_and(bv_tmp, BV::opt_compress);
7896 count= bv_tmp.count();
7922BM_THROW(BM_ERR_SERIALFORMAT);
7930 template<
classBV>
7933 const unsigned char*
buf,
7944 returndeserialize_xor(bv, bv_tmp, op);
7947 template<
classBV>
7950 const unsigned char*
buf,
7959 unsigned charheader_flag = dec.
get_8();
7966 if(bo_current == bo)
7968de_.set_ref_vectors(ref_vect_);
7969de_.set_range(idx_from, idx_to);
7970de_.deserialize(bv,
buf);
7997bv.keep_range_no_check(idx_from, idx_to);
8002 template<
classBV>
8012bv.bit_and(bv_tmp, bvector_type::opt_compress);
8024 count= bv_tmp.count();
8047 throwstd::logic_error(
"BM: serialization error");
8049BM_THROW(BM_ERR_SERIALFORMAT);
8058 template<
classBV>
8061 const unsigned char*
buf,
8067 unsigned charheader_flag = dec.
get_8();
8072 returndeserialize_xor(bv,
buf, op, exit_on_one);
8088 if(bo_current == bo)
8093de_.deserialize(bv,
buf);
8097bv_tmp_.clear(
true);
8100de_.deserialize(bv_tmp_,
buf);
8112 if(bo_current == bo)
8115 returnit_d_.deserialize(bv, ss, temp_block_, op, exit_on_one);
8122 returnit_d_be_.deserialize(bv, ss, temp_block_, op, exit_on_one);
8127 returnit_d_le_.deserialize(bv, ss, temp_block_, op, exit_on_one);
8132 throwstd::logic_error(
"BM::platform error: unknown endianness");
8134BM_THROW(BM_ERR_SERIALFORMAT);
8139 template<
classBV>
8142 const unsigned char*
buf,
8148 unsigned charheader_flag = dec.
get_8();
8156 if(!bman.is_init())
8157bv.set_range(idx_from, idx_to);
8166deserialize_xor_range(bv_tmp,
buf, idx_from, idx_to);
8169bv.bit_and(bv_tmp, bvector_type::opt_compress);
8180 if(bo_current == bo)
8182bv_tmp_.clear(
true);
8185de_.set_range(idx_from, idx_to);
8186de_.deserialize(bv_tmp_,
buf);
8191bv.bit_and(bv_tmp_, bvector_type::opt_compress);
8209 if(bo_current == bo)
8212it_d_.set_range(idx_from, idx_to);
8213it_d_.deserialize(bv, ss, temp_block_, op,
false);
8214it_d_.unset_range();
8222it_d_be_.set_range(idx_from, idx_to);
8223it_d_be_.deserialize(bv, ss, temp_block_, op,
false);
8224it_d_be_.unset_range();
8230it_d_le_.set_range(idx_from, idx_to);
8231it_d_le_.deserialize(bv, ss, temp_block_, op,
false);
8232it_d_le_.unset_range();
8238 throwstd::logic_error(
"BM::platform error: unknown endianness");
8240BM_THROW(BM_ERR_SERIALFORMAT);
8250 template<
classBV,
classSerialIterator>
8254is_range_set_ =
true;
8260 template<
classBV,
classSerialIterator>
8267 const unsignedwin_size = 64;
8272 for(
unsigned i= 0;
i<= id_count;)
8275 for(j = 0; j < win_size &&
i<= id_count; ++j, ++
i)
8277id_buffer[j] = sit.get_id();
8285 for(
unsigned i= 0;
i<= id_count;)
8288 for(j = 0; j < win_size &&
i<= id_count; ++j, ++
i)
8290id_buffer[j] = sit.get_id();
8298 template<
classBV,
classSerialIterator>
8316 if(bv_block_idx <= nblock_last)
8317bman.set_all_zero(bv_block_idx, nblock_last);
8326 bm::word_t*** blk_root = bman.top_blocks_root();
8327 unsignedtop_size = bman.top_block_size();
8328 for(;
i< top_size; ++
i)
8341 count+= bman.block_bitcount(blk_blk[j]);
8351 throwstd::logic_error(err_msg());
8353BM_THROW(BM_ERR_SERIALFORMAT);
8359 template<
classBV,
classSerialIterator>
8367 unsignedid_count = sit.get_id_count();
8368 boolset_clear =
true;
8375load_id_list(bv_tmp, sit, id_count,
true);
8388load_id_list(bv, sit, id_count, set_clear);
8391 for(
unsigned i= 0;
i< id_count; ++
i)
8399 for(
unsigned i= 0;
i< id_count; ++
i)
8412 count+= bv.get_bit(
id);
8420load_id_list(bv_tmp, sit, id_count,
true);
8428load_id_list(bv_tmp, sit, id_count,
true);
8436load_id_list(bv_tmp, sit, id_count,
false);
8437 count+= bv_tmp.count();
8443load_id_list(bv_tmp, sit, id_count,
true);
8451 throwstd::logic_error(err_msg());
8453BM_THROW(BM_ERR_SERIALFORMAT);
8461 template<
classBV,
classSerialIterator>
8474gap_temp_block[0] = 0;
8477 if(!bman.is_init())
8480 if(sit.bv_size() && (sit.bv_size() > bv.size()))
8481bv.resize(sit.bv_size());
8483 typenameserial_iterator_type::iterator_state
state;
8484 state= sit.get_state();
8485 if(
state== serial_iterator_type::e_list_ids)
8487 count= process_id_list(bv, sit, op);
8499 count+= finalize_target_vector(bman, op, bv_block_idx);
8506 caseserial_iterator_type::e_blocks:
8511 if(is_range_set_ && (bv_block_idx < nb_range_from_))
8514 boolskip_flag = sit.try_skip(bv_block_idx, nb_range_from_);
8517bv_block_idx = sit.block_idx();
8518 BM_ASSERT(bv_block_idx <= nb_range_from_);
8519 BM_ASSERT(sit.state() == serial_iterator_type::e_blocks);
8523 caseserial_iterator_type::e_bit_block:
8525 BM_ASSERT(sit.block_idx() == bv_block_idx);
8528 bm::word_t* blk = bman.get_block_ptr(i0, j0);
8541blk = bman.make_bit_block(bv_block_idx);
8554 throwstd::logic_error(err_msg());
8556BM_THROW(BM_ERR_SERIALFORMAT);
8573blk = bman.deoptimize_block(bv_block_idx);
8579 unsignedc = sit.get_bit_block(blk, temp_block, sop);
8581 if(exit_on_one &&
count)
8586bman.optimize_bit_block(i0, j0, bvector_type::opt_compress);
8594 caseserial_iterator_type::e_zero_blocks:
8596 BM_ASSERT(bv_block_idx == sit.block_idx());
8603bv_block_idx = sit.skip_mono_blocks();
8609bv_block_idx = sit.skip_mono_blocks();
8610bman.set_all_zero(nb_start, bv_block_idx-1);
8621 bm::word_t* blk = bman.get_block_ptr(i0, j0);
8632bman.zero_block(bv_block_idx);
8646 count+= blk ? bman.block_bitcount(blk) : 0;
8647 if(exit_on_one &&
count)
8659 caseserial_iterator_type::e_one_blocks:
8661 BM_ASSERT(bv_block_idx == sit.block_idx());
8664 bm::word_t* blk = bman.get_block_ptr(i0, j0);
8671bman.set_block_all_set(bv_block_idx);
8679bman.zero_block(bv_block_idx);
8683 if(++empty_op_cnt > 64)
8686 bool b= bv.find_reverse(last_id);
8690 if(last_nb < bv_block_idx)
8696 count+= blk ? bman.block_bitcount(blk) : 0;
8704blk = bman.deoptimize_block(bv_block_idx);
8735bman.set_block_all_set(bv_block_idx);
8749 if(exit_on_one &&
count)
8754 caseserial_iterator_type::e_gap_block:
8756 BM_ASSERT(bv_block_idx == sit.block_idx());
8760 const bm::word_t* blk = bman.get_block(i0, j0);
8762sit.get_gap_block(gap_temp_block);
8780 if(exit_on_one &&
count)
8788bman.zero_block(bv_block_idx);
8799bv_block_idx, gap_temp_block, level);
8812bv.combine_operation_with_block(bv_block_idx,
8820bv.combine_operation_with_block(
8830blk = bman.get_block_ptr(i0, j0);
8846 throwstd::logic_error(err_msg());
8848BM_THROW(BM_ERR_SERIALFORMAT);
8855 if(is_range_set_ && (bv_block_idx > nb_range_to_))
8869 #pragma warning( pop )#define BM_DECLARE_TEMP_BLOCK(x)
Algorithms for bvector<> (main include)
#define IS_FULL_BLOCK(addr)
#define IS_VALID_ADDR(addr)
#define BMPTR_SETBIT0(ptr)
#define BMSET_PTRGAP(ptr)
#define FULL_BLOCK_FAKE_ADDR
#define FULL_BLOCK_REAL_ADDR
Bit manipulation primitives (internal)
#define BM_SER_NEXT_GRP(enc, nb, B_1ZERO, B_8ZERO, B_16ZERO, B_32ZERO, B_64ZERO)
Utilities for bit transposition (internal) (experimental!)
Bit manipulation primitives (internal)
Functions and utilities for XOR filters (internal)
Byte based reader for un-aligned bit streaming.
unsigned gamma() noexcept
decode unsigned value using Elias Gamma coding
unsigned get_24_no() noexcept
get 24 bits neutral order from the bit-stream
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) noexcept
Binary Interpolative array decode (32-bit)
unsigned delta16() noexcept
Custome variant of delta decoding.
unsigned delta16s() noexcept
Custome variant of delta decoding.
void bic_decode_u16(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative array decode.
unsigned gamma8() noexcept
decode unsigned value using Elias Gamma coding
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
unsigned decode_array(bm::gap_word_t *arr, bm::word_t *tb_wflags, unsigned *sz, unsigned default_sz=0) noexcept
Selective array decode.
unsigned get_bit() noexcept
read 1 bit
unsigned get_16_no() noexcept
get 16 bits neutral order from the bit-stream
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
unsigned get_32_no() noexcept
get 32 bits neutral order from the bit-stream
unsigned get_bits(unsigned count) noexcept
read number of bits out of the stream
Byte based writer for un-aligned bit streaming.
void put_16_no(unsigned value) noexcept
put 16 bits neutral byte order
void put_32_no(unsigned value) noexcept
put 32 bits neutral byte order
void flush_if_full() noexcept
void delta16s(unsigned value) noexcept
Custome variant of delta encoding (safe for 8-bit values)
void encode_array(const bm::gap_word_t *arr, bm::gap_word_t *recalc_arr, bm::word_t *tb_wflags, bm::gap_word_t *tmp_arr, unsigned sz, bool one_flag, bool EOC_flag, unsigned force_code=0, bool save_size=true) noexcept
Selective array serialization (BIC encoding is used)
void put_24_no(unsigned value) noexcept
put 24 bits neutral byte order
void gamma(unsigned value) noexcept
Elias Gamma encode the specified value.
void bic_encode_u16(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative array decode.
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) noexcept
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
void flush() noexcept
Flush the incomplete 32-bit accumulator word.
void delta16(unsigned value) noexcept
Custome variant of delta encoding.
void put_bits(unsigned value, unsigned count) noexcept
issue count bits out of value
void gamma8(unsigned value) noexcept
Elias Gamma encode the specified value.
Bit-block get adapter, takes bitblock and represents it as a get_32() accessor function.
Bit-block sum adapter, takes values and sums it /internal.
bm::word_t sum() const noexcept
Get accumulated sum.
List of reference bit-vectors with their true index associations.
const unsigned char * get_pos() const noexcept
Return current buffer pointer.
unsigned char get_8() noexcept
Reads character from the decoding buffer.
void set_pos(const unsigned char *pos) noexcept
Set current buffer pointer.
Class for decoding data from memory buffer.
bm::id64_t get_48() noexcept
Reads 64-bit word from the decoding buffer.
bm::id64_t get_64() noexcept
Reads 64-bit word from the decoding buffer.
bm::word_t get_24() noexcept
Reads 32-bit word from the decoding buffer.
bm::short_t get_16() noexcept
Reads 16-bit word from the decoding buffer.
bm::word_t get_32() noexcept
Reads 32-bit word from the decoding buffer.
Base deserialization class.
static void read_0runs_block(decoder_type &decoder, bm::word_t *blk) noexcept
read bit-block encoded as runs
const unsigned char * skip_pos_
decoder skip position
block_idx_type try_skip(decoder_type &decoder, block_idx_type nb, block_idx_type expect_nb) noexcept
Try to skip if skip bookmark is available within reach.
void read_bic_arr(decoder_type &decoder, bm::word_t *blk, unsigned block_type)
Read binary interpolated list into a bit-set.
bm::word_t * ex1_arr_
array for exceptions
bm::bit_block_t tb_wflags_
temp flags for sub-block DR compression
void read_bic_gap(decoder_type &decoder, bm::word_t *blk) noexcept
Read binary interpolated gap blocks into a bitset.
static const char * err_msg() noexcept
block_idx_type bookmark_idx_
last bookmark block index
unsigned skip_offset_
bookmark to skip 256 encoded blocks
void read_digest0_block(decoder_type &decoder, bm::word_t *blk) noexcept
Read digest0-type bit-block.
void read_bic_arr_inv(decoder_type &decoder, bm::word_t *blk, unsigned block_type) noexcept
Read inverted binary interpolated list into a bit-set.
bm::bit_in< DEC > bit_in_type
unsigned * sb_id_array_
ptr to super-block idx array (temp)
unsigned read_bic_sb_arr(decoder_type &decoder, unsigned block_type, unsigned *dst_arr, unsigned *sb_idx)
Read list of bit ids for super-blocks.
void read_gap_block(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_block, bm::gap_word_t &gap_head)
Read GAP block from the stream.
bm::word_t * ex0_arr_
array for exceptions
unsigned read_id_list(decoder_type &decoder, unsigned block_type, bm::gap_word_t *dst_arr)
Read list of bit ids.
bm::gap_word_t * id_array_
ptr to idx array for temp decode use
Deserializer for bit-vector.
void xor_reset() noexcept
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR de-serialization (no transfer of ownership for the poi...
block_arridx_type bit_idx_arr_
allocator_type::allocator_pool_type allocator_pool_type
void xor_decode_chain(bm::word_t *blk) noexcept
void xor_decode(blocks_manager_type &bman)
deseriaizer_base< DEC, block_idx_type > parent_type
bm::bv_ref_vector< BV > bv_ref_vector_type
void decode_arr_sblock(unsigned char btype, decoder_type &dec, bvector_type &bv)
void reset() noexcept
reset range deserialization and reference vectors
void deserialize_gap(unsigned char btype, decoder_type &dec, bvector_type &bv, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
bvector_type::allocator_type allocator_type
parent_type::decoder_type decoder_type
void unset_range() noexcept
Disable range deserialization.
allocator_pool_type pool_
sblock_arridx_type sb_bit_idx_arr_
BV::blocks_manager_type blocks_manager_type
void set_range(size_type from, size_type to) noexcept
set deserialization range [from, to] This is NOT exact, approximate range, content outside range is n...
block_arridx_type gap_temp_block_
size_t deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *temp_block=0)
bvector_type::block_idx_type block_idx_type
void decode_arrbit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
void decode_bit_block(unsigned char btype, decoder_type &dec, blocks_manager_type &bman, block_idx_type nb, bm::word_t *blk)
void decode_block_bit(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
void decode_block_bit_interval(decoder_type &dec, bvector_type &bv, block_idx_type nb, bm::word_t *blk)
void move_from(encoder &enc) noexcept
Move content from another encoder.
unsigned char * position_type
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) noexcept
Encode 8-bit prefix + an array.
void put_16(bm::short_t s) noexcept
Puts short word (16 bits) into the encoding buffer.
void put_32(bm::word_t w) noexcept
Puts 32 bits word into encoding buffer.
void put_64(bm::id64_t w) noexcept
Puts 64 bits word into encoding buffer.
size_t size() const noexcept
Returns size of the current encoding stream.
void put_8(unsigned char c) noexcept
Puts one character into the encoding buffer.
void set_pos(unsigned char *buf_pos) noexcept
Set current memory stream position.
unsigned char * get_pos() const noexcept
Get current memory stream position.
void put_8_16_32(unsigned w, unsigned char c8, unsigned char c16, unsigned char c32) noexcept
but gat plus value based on its VBR evaluation
Functor for Elias Gamma encoding.
value_type * data() const noexcept
void resize(size_type new_size)
vector resize
Iterator to walk forward the serialized stream.
void set_range(size_type from, size_type to)
set deserialization range [from, to]
void unset_range() noexcept
disable range filtration
SerialIterator serial_iterator_type
BV::blocks_manager_type blocks_manager_type
bvector_type::size_type size_type
size_type deserialize(bvector_type &bv, serial_iterator_type &sit, bm::word_t *temp_block, set_operation op=bm::set_OR, bool exit_on_one=false)
static void load_id_list(bvector_type &bv, serial_iterator_type &sit, unsigned id_count, bool set_clear)
load data from the iterator of type "id list"
bvector_type::block_idx_type block_idx_type
static size_type process_id_list(bvector_type &bv, serial_iterator_type &sit, set_operation op)
Process (obsolete) id-list serialization format.
static const char * err_msg() noexcept
static size_type finalize_target_vector(blocks_manager_type &bman, set_operation op, size_type bv_block_idx)
Finalize the deserialization (zero target vector tail or bit-count tail)
Deserializer, performs logical operations between bit-vector and serialized bit-vector.
bm::iterator_deserializer< BV, serial_stream_le > it_d_le_
little-endian stream iterator
bm::bv_ref_vector< BV > bv_ref_vector_type
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
void deserialize_range(bvector_type &bv, const unsigned char *buf, size_type idx_from, size_type idx_to)
BV::blocks_manager_type blocks_manager_type
BV::allocator_type allocator_type
bm::deserializer< BV, bm::decoder_little_endian > deserializer_le
~operation_deserializer()
bvector_type::block_idx_type block_idx_type
serial_stream_iterator< bm::decoder_little_endian, block_idx_type > serial_stream_le
void deserialize_xor_range(bvector_type &bv, const unsigned char *buf, size_type idx_from, size_type idx_to)
size_type deserialize(bvector_type &bv, const unsigned char *buf, set_operation op, bool exit_on_one=false)
Deserialize bvector using buffer as set operation argument.
void deserialize_range(bvector_type &bv, const unsigned char *buf, bm::word_t *, size_type idx_from, size_type idx_to)
bvector_type::size_type size_type
const bv_ref_vector_type * ref_vect_
xor ref.vector
size_type deserialize(bvector_type &bv, const unsigned char *buf, bm::word_t *, set_operation op=bm::set_OR, bool exit_on_one=false)
Obsolete! Deserialize bvector using buffer as set operation argument.
bm::deserializer< BV, bm::decoder_big_endian > deserializer_be
deserializer< BV, bm::decoder > de_
serial_stream_iterator< bm::decoder_big_endian, block_idx_type > serial_stream_be
bm::iterator_deserializer< BV, serial_stream_be > it_d_be_
big-endian stream iterator
size_type deserialize_xor(bvector_type &bv, const unsigned char *buf, set_operation op, bool exit_on_one)
bm::iterator_deserializer< BV, serial_stream_current > it_d_
default stream iterator (same endian)
serial_stream_iterator< bm::decoder, block_idx_type > serial_stream_current
static size_type deserialize_xor(bvector_type &bv, bvector_type &bv_tmp, set_operation op)
Serialization stream iterator.
bm::id_t last_id_
Last id from the id list.
unsigned get_bit_block_COUNT_B(bm::word_t *dst_block, bm::word_t *tmp_block)
deseriaizer_base< DEC, block_idx_type > parent_type
deseriaizer_base< DEC, BLOCK_IDX >::decoder_type decoder_type
unsigned char header_flag_
decoder_type & decoder()
Get low level access to the decoder (use carefully)
unsigned get_id_count() const noexcept
Number of ids in the inverted list (valid for e_list_ids)
void get_inv_arr(bm::word_t *block) noexcept
void next()
get next block
serial_stream_iterator(const unsigned char *buf)
block_idx_type mono_block_cnt_
number of 0 or 1 blocks
unsigned block_type_
current block type
block_idx_type skip_mono_blocks() noexcept
skip all zero or all-one blocks
unsigned get_bit_block_SUB(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_COUNT_SUB_AB(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_COUNT_SUB_BA(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_COUNT_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_COUNT(bm::word_t *dst_block, bm::word_t *tmp_block)
bool is_eof() const
Returns true if end of bit-stream reached.
unsigned get_bit() noexcept
block_idx_type block_idx_
current block index
unsigned get_bit_block_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
iterator_state get_state() const noexcept
unsigned get_bit_block_OR(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_ASSIGN(bm::word_t *dst_block, bm::word_t *tmp_block)
block_idx_type block_idx() const noexcept
Get current block index.
gap_word_t * block_idx_arr_
unsigned get_arr_bit(bm::word_t *dst_block, bool clear_target=true) noexcept
Get array of bits out of the decoder into bit block (Converts inverted list into bits) Returns number...
iterator_state state() const noexcept
Returns iterator internal state.
unsigned get_bit_block_COUNT_A(bm::word_t *dst_block, bm::word_t *tmp_block)
~serial_stream_iterator()
gap_word_t glevels_[bm::gap_levels]
GAP levels.
iterator_state
iterator is a state machine, this enum encodes its key value
@ e_bit_block
one bit block
@ e_list_ids
plain int array
@ e_zero_blocks
one or more zero bit blocks
@ e_one_blocks
one or more all-1 bit blocks
@ e_blocks
stream of blocks
void get_gap_block(bm::gap_word_t *dst_block)
Read gap block data (with head)
get_bit_func_type bit_func_table_[bm::set_END]
unsigned id_cnt_
Id counter for id list.
unsigned get_bit_block_COUNT_XOR(bm::word_t *dst_block, bm::word_t *tmp_block)
unsigned get_bit_block_COUNT_AND(bm::word_t *dst_block, bm::word_t *tmp_block)
bm::id_t get_id() const noexcept
Get last id from the id list.
block_idx_type bv_size() const
serialized bitvector size
unsigned get_bit_block(bm::word_t *dst_block, bm::word_t *tmp_block, set_operation op)
read bit block, using logical operation
Bit-vector serialization class.
unsigned compression_level_
void xor_tmp_product(const bm::word_t *s_block, const block_match_chain_type &mchain, unsigned i, unsigned j) noexcept
Compute digest based XOR product, place into tmp XOR block.
unsigned char find_gap_best_encoding(const bm::gap_word_t *gap_block) noexcept
Determine best representation for GAP block based on current set compression level.
void serialize(const BV &bv, typename serializer< BV >::buffer &buf, const statistics_type *bv_stat=0)
Bitvector serialization into buffer object (resized automatically)
void interpolated_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) noexcept
Encode GAP block as an array with binary interpolated coder.
size_type ref_idx_
current reference index
void set_sim_model(const xor_sim_model_type *sim_model) noexcept
Atach XOR similarity model (must be computed by the same ref vector)
bm::gamma_encoder< bm::gap_word_t, bit_out_type > gamma_encoder_func
serializer(bm::word_t *temp_block)
bm::bit_out< bm::encoder > bit_out_type
bm::word_t * gap_recalc_tmp_block1_
tmp area for GAP reduction
bool interpolated_arr_bit_block_v3s(bm::encoder &enc, const bm::word_t *block, bool inverted)
void set_sparse_cutoff(unsigned cutoff) noexcept
Fine tuning for Binary Interpolative Compression (levels 5+) The parameter sets average population co...
bm::heap_vector< bm::gap_word_t, allocator_type, true > block_arridx_type
unsigned bit_model_d0_size_
memory (bytes) by d0 method (bytes)
allocator_type::allocator_pool_type allocator_pool_type
bm::word_t * idx_arr_
temp space for bit-block decode into idxs
void reset_compression_stats() noexcept
Reset all accumulated compression statistics.
void gamma_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc) noexcept
unsigned char * enc_header_pos_
pos of top level header to roll back
bool bienc_gap_bit_block_enc(unsigned len, bm::encoder &enc) noexcept
encode bit-block as interpolated bit block of gaps
void reset_models() noexcept
void encode_bit_array(const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept
Encode bit-block as an array of bits.
bvector_type::block_idx_type block_idx_type
bvector_type::size_type size_type
void gamma_gap_bit_block(const bm::word_t *block, bm::encoder &enc) noexcept
unsigned char find_bit_best_encoding_l5(const bm::word_t *block, const bm::bv_sub_survey &sub_stat, block_idx_type nb) noexcept
Determine best representation for a bit-block (level 5)
size_type * compression_stat_
void interpolated_encode_gap_block_v3s(const bm::gap_word_t *gap_block, bm::encoder &enc, unsigned len)
allocator_pool_type pool_
bm::bv_ref_vector< BV > bv_ref_vector_type
unsigned char header_flag_
set of masks used to save
unsigned char models_[bm::block_waves]
void encode_bit_interval(const bm::word_t *blk, bm::encoder &enc, unsigned size_control) noexcept
Encode BIT block with repeatable runs of zeroes.
bm::xor_scanner< BV > xor_scan_
scanner for XOR similarity
void gamma_gap_array(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false) noexcept
Encode GAP block as delta-array with Elias Gamma coder.
void set_bookmarks(bool enable, unsigned bm_interval=256) noexcept
Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size incre...
void encode_xor_match_chain(bm::encoder &enc, const block_match_chain_type &mchain) noexcept
Encode XOR match chain.
void byte_order_serialization(bool value) noexcept
Set byte-order serialization (for cross platform compatibility)
void interpolated_gap_array_v0(const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) noexcept
void optimize_serialize_destroy(BV &bv, typename serializer< BV >::buffer &buf)
Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized an...
bm::word_t * gap_ex1_tmp_block_
tmp area for GAP reduction
bvector_type::allocator_type allocator_type
byte_buffer< allocator_type > buffer
block_arridx_type bit_idx_arr_
unsigned ibc
inverse bit-count
bool bic_drange_
bic dynamic range compression flag
void set_compression_level(unsigned clevel) noexcept
Set compression level.
bm::heap_vector< unsigned, allocator_type, true > sblock_arridx_type
bool compute_sim_model(xor_sim_model_type &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params ¶ms)
Calculate XOR similarity model for ref_vector refernece vector must be associated before.
void set_bic_dynamic_range_reduce(bool flag) noexcept
enable/disable dynamic range reduction for BIC compression where possible.
bm::word_t * xor_tmp_block_
tmp area for xor product
float bie_bits_per_int_
default coefficient for choosing BIC coding (2.2)
static void process_bookmark(block_idx_type nb, bookmark_state &bookm, bm::encoder &enc) noexcept
Check if bookmark needs to be placed and if so, encode it into serialization BLOB.
bool allow_stat_reset_
controls zeroing of telemetry
bvector_type::blocks_manager_type blocks_manager_type
unsigned bit_model_0run_size_
memory (bytes) by run-0 method (bytes)
void encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc)
void bienc_gap_bit_block(const bm::word_t *block, bm::encoder &enc) noexcept
encode bit-block as interpolated bit block of gaps
serializer(const serializer &)
bool free_
flag to free the input vector
void interpolated_gap_bit_block(const bm::word_t *block, bm::encoder &enc) noexcept
encode bit-block as interpolated gap block
struct bm::serializer::@939 bit_stat_
saved bit-block statistics (mostly for debugging)
bm::word_t * gap_ex0_tmp_block_
tmp area for GAP reduction
xor_sim_model_type::block_match_chain_type block_match_chain_type
void bienc_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept
bool optimize_
flag to optimize the input vector
void encode_bit_digest(const bm::word_t *blk, bm::encoder &enc, bm::id64_t d0) noexcept
Encode bit-block using digest (hierarchical compression)
void allow_stat_reset(bool allow) noexcept
Enable/disable statistics reset on each serilaization.
serializer(const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
Constructor.
void add_model(unsigned char mod, unsigned score) noexcept
bm::word_t * tb_wflags_
temp flags for sub-block DR compression
unsigned char find_bit_best_encoding(const bm::word_t *block, const bm::bv_sub_survey &sub_stat, block_idx_type nb) noexcept
Determine best representation for a bit-block.
sblock_arridx_type sb_bit_idx_arr_
bool interpolated_arr_bit_block_v3(bm::encoder &enc, const bm::word_t *block, bool inverted)
const bv_ref_vector_type * ref_vect_
ref.vector for XOR compression
const xor_sim_model_type * sim_model_
similarity model matrix
void gap_length_serialization(bool value) noexcept
Set GAP length serialization (serializes GAP levels of the original vector)
void bienc_gaps_sblock(const BV &bv, unsigned sb, const bm::bv_sub_survey &sub_stat, bm::encoder &enc) noexcept
bm::xor_sim_model< BV > xor_sim_model_type
bool sb_bookmarks_
Bookmarks flag.
void interpolated_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted)
void set_bic_coeff(float bie_bits_per_int) noexcept
Coeffcicient to help choose if bit-block should be encoded using binary interpolative coder.
void bienc_arr_sblock(const BV &bv, unsigned sb, bm::encoder &enc) noexcept
unsigned sparse_cutoff_
number of bits per blocks to consider sparse
void gamma_arr_bit_block(const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept
bm::word_t * try_buf_
temp space for trying alternative compressors
const size_type * get_compression_stat() const noexcept
Return serialization counter vector.
bm::word_t * gap_recalc_tmp_block0_
tmp area for GAP reduction
unsigned sb_range_
Desired bookmarks interval.
bvector_type::statistics statistics_type
bool interpolated_encode_gap_block_v3(const bm::gap_word_t *gap_block, bm::encoder &enc, unsigned len)
serializer & operator=(const serializer &)
void set_ref_vectors(const bv_ref_vector_type *ref_vect)
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointe...
void encode_header(const BV &bv, bm::encoder &enc) noexcept
Encode serialization header information.
void set_curr_ref_idx(size_type ref_idx) noexcept
Set current index in rer.vector collection (not a row idx or plain idx)
unsigned scores_[bm::block_waves]
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
void alloc_temp_buffers()
Allocate serialization temp buffers.
unsigned get_compression_level() const noexcept
Get current compression level.
void interpolated_encode_gap_block(const bm::gap_word_t *gap_block, bm::encoder &enc, unsigned len)
XOR scanner to search for complement-similarities in collections of bit-vectors.
static vector< string > arr
Encoding utilities for serialization (internal)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
bm::id_t bit_block_count(const bm::word_t *block) noexcept
Bitcount for bit block.
unsigned bit_count_nonzero_size(const T *blk, unsigned data_size) noexcept
Inspects block for full zero words.
unsigned word_bitcount64(bm::id64_t x) noexcept
bm::id_t bit_operation_sub_count(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock SUB operation and calculates bitcount of the result.
bm::id64_t bit_block_xor(bm::word_t *dst, const bm::word_t *src) noexcept
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
bm::id_t word_bitcount(bm::id_t w) noexcept
void bit_invert(T *start) noexcept
bool bit_block_or(bm::word_t *dst, const bm::word_t *src) noexcept
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
bm::id64_t bit_block_and(bm::word_t *dst, const bm::word_t *src) noexcept
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
unsigned bit_block_convert_to_arr(T *dest, const unsigned *src, bool inverted) noexcept
Convert bit block into an array of ints corresponding to 1 bits.
void bit_block_rle_split(T *dst_s, T *dst_r, T *rlen, unsigned &s_cnt, unsigned &r_cnt, const unsigned *src, bool inverted) noexcept
Convert bit block into two arrays of ints( corresponding to 1 bits)
unsigned bit_block_find(const bm::word_t *block, unsigned nbit, unsigned *pos) noexcept
Searches for the next 1 bit in the BIT block.
void bit_block_set(bm::word_t *dst, bm::word_t value) noexcept
Bitblock memset operation.
void bit_block_copy(bm::word_t *dst, const bm::word_t *src) noexcept
Bitblock copy operation.
void clear_bit(unsigned *dest, unsigned bitpos) noexcept
Set 1 bit in a block.
void set_bit(unsigned *dest, unsigned bitpos) noexcept
Set 1 bit in a block.
bm::id_t bit_operation_xor_count(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock XOR operation and calculates bitcount of the result.
void bit_block_rle_set(unsigned *blk, const T *s, const T *r, const T *rlen, unsigned s_cnt, unsigned r_cnt) noexcept
Set bits using rle split scheme (reverse of bit_block_rle_split)
bm::id_t bit_operation_or_count(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock OR operation and calculates bitcount of the result.
bm::id64_t calc_block_digest0(const bm::word_t *const block) noexcept
Compute digest for 64 non-zero areas.
bm::id_t bit_operation_and_count(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock AND operation and calculates bitcount of the result.
bm::id64_t bit_block_sub(bm::word_t *dst, const bm::word_t *src) noexcept
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
operation
Bit operations.
set_operation
Codes of set operations.
strategy
Block allocation strategies.
@ BM_GAP
GAP compression is ON.
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
serialization_flags
Bit mask flags for serialization algorithm.
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
void deserialize_range(BV &bv, const unsigned char *buf, typename BV::size_type from, typename BV::size_type to, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector range deserialization from a memory BLOB.
@ BM_NO_GAP_LENGTH
save no GAP info (save some space)
@ BM_NO_BYTE_ORDER
save no byte-order info (save some space)
distance_metric
Distance metrics codes defined for vectors A and B.
distance_metric operation2metric(set_operation op) noexcept
Convert set operation into compatible distance metric.
@ COUNT_XOR
(A ^ B).count()
@ COUNT_SUB_BA
(B - A).count()
gap_word_t * gap_operation_xor(const gap_word_t *vect1, const gap_word_t *vect2, gap_word_t *tmp_buf, unsigned &dsize) noexcept
GAP XOR operation.
void gap_invert(T *buf) noexcept
Inverts all bits in the GAP buffer.
void gap_add_to_bitset(unsigned *dest, const T *pcurr, unsigned len) noexcept
Adds(OR) GAP block to bitblock.
void set_gap_level(T *buf, int level) noexcept
Sets GAP block capacity level.
unsigned gap_set_array(T *buf, const T *arr, unsigned len) noexcept
Convert array to GAP buffer.
unsigned gap_bit_count(const T *buf, unsigned dsize=0) noexcept
Calculates number of bits ON in GAP buffer.
void gap_convert_to_bitset(unsigned *dest, const T *buf, unsigned len=0) noexcept
GAP block to bitblock conversion.
unsigned gap_bit_count_unr(const T *buf) noexcept
Calculates number of bits ON in GAP buffer. Loop unrolled version.
unsigned gap_set_value(unsigned val, T *buf, unsigned pos) noexcept
Sets or clears bit in the GAP buffer.
bool gap_split(const T *buf, unsigned len, unsigned h_limit, const unsigned *hist0, const unsigned *hist1, T *tbuf, T *ex0_arr, T *ex1_arr, unsigned &ex0_cnt, unsigned &ex1_cnt) noexcept
split GAP block into pure GAPs and exceptions
int gap_calc_level(unsigned len, const T *glevel_len) noexcept
Calculates GAP block capacity level.
bool arr_calc_delta_min(const T *arr, unsigned arr_len, T &min0) noexcept
calculate minimal delta between monotonic growing numbers
void gap_calc_mins(const T *buf, T &min0, T &min1) noexcept
minimal delta sizes
void gap_calc_hist(const T *buf, unsigned len, unsigned *hist0, unsigned *hist1, unsigned hist_len) noexcept
compute histogram of exceptions on GAP block
void gap_restore_mins(T *buf, T min0, T min1) noexcept
Restore GAP block using two minimal GAP lens.
T gap_recalc_mins(T *tbuf, const T *buf, T min0, T min1) noexcept
Recalculate GAP block using two minimal GAP lens.
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
void gap_set_all(T *buf, unsigned set_max, unsigned value) noexcept
Sets all bits to 0 or 1 (GAP)
D gap_convert_to_arr(D *dest, const T *buf, unsigned dest_len, bool invert=false) noexcept
Convert gap block into array of ints corresponding to 1 bits.
unsigned gap_add_value(T *buf, unsigned pos) noexcept
Add new value to the end of GAP buffer.
unsigned int
A callback function used to compare two keys in a database.
BV::size_type count_and(const BV &bv1, const BV &bv2) noexcept
Computes bitcount of AND operation of two bitsets.
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
bm::distance_metric_descriptor::size_type count_xor(const BV &bv1, const BV &bv2) noexcept
Computes bitcount of XOR operation of two bitsets.
BV::size_type count_sub(const BV &bv1, const BV &bv2) noexcept
Computes bitcount of SUB operation of two bitsets.
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
BV::size_type count_or(const BV &bv1, const BV &bv2) noexcept
Computes bitcount of OR operation of two bitsets.
const unsigned char set_block_ref_eq
block is a copy of a reference block
const unsigned char set_block_xor_ref32_um
..... 32-bit (should never happen)
const unsigned set_block_digest_wave_size
const unsigned char set_block_gap
Plain GAP block.
BV::size_type process_operation(BV &bv, BV &bv_tmp, bm::set_operation op)
Utility function to process operation using temp vector.
const unsigned char set_block_arrbit_inv
List of bits OFF.
const unsigned char set_nb_sync_mark8
bookmark sync point (8-bits)
const unsigned sblock_flag_max16
const unsigned sblock_flag_sb16
16-bit SB index (8-bit by default)
const unsigned char set_block_bit_interval
Interval block.
const unsigned char set_sblock_bienc_v3
super-block interpolated list v3
const unsigned char set_block_bit_digest0
H-compression with digest mask.
const unsigned char set_block_xor_ref32
..... 32-bit (should never happen)
const unsigned char set_block_arrgap_bienc_inv_v2
Interpolated GAP array (inverted)
const unsigned char set_block_bitgap_bienc
Interpolated bit-block as GAPs.
unsigned char check_pair_vect_vbr(const BMChain &mchain, const RVect &ref_vect)
Check effective bit-rate for the XOR encode vector.
const unsigned char set_block_arrgap_egamma_inv
Gamma compressed inverted delta GAP array.
const unsigned char set_block_arr_bienc_inv_v3
Interpolated array (v3)
void bit_recomb(It1 &it1, It2 &it2, BinaryOp &op, Encoder &enc, unsigned block_size=bm::set_block_size) noexcept
const unsigned set_block_mask
const unsigned short h3f_min0_8bit
min0 is not applied
const unsigned char set_sblock_bienc_gaps_v3
super-block of sparse GAPs
unsigned gamma_size(const T v) noexcept
Compute gamma size( in bytes)
T arr_recalc_min(T *tarr, const T *arr, unsigned arr_len, T min0, T delta_acc=0) noexcept
Recalculate array using minimal delta for better BIC compression.
const unsigned sblock_flag_sbgamma
const unsigned char set_block_arr_bienc_inv
Interpolated inverted block int array.
const unsigned set_sub_array_size
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) noexcept
Recalc linear bvector block index into 2D matrix coordinates.
const unsigned char set_block_gap_egamma_v3
Gamma compressed GAP block (v3)
const unsigned sblock_flag_sb32
32-bit SB index
bool check_block_zero(const bm::word_t *blk, bool deep_scan) noexcept
Checks all conditions and returns true if block consists of only 0 bits.
bm::gap_word_t recalc_tail_delta(bm::gap_word_t tail_delta, bm::gap_word_t &head, unsigned char &head_v3) noexcept
recalculate tail_delta for VBR compression: if tail_delta < 2048 then part of it (3 bits) is stored i...
const unsigned char set_block_16one
UP to 65536 all-set blocks.
const unsigned short h2f_max_v_8bit
min_v is 1 char, 8-bit (0 - 16-bit)
unsigned char compute_min_flags(bm::gap_word_t min0, bm::gap_word_t min1, unsigned char head_v3) noexcept
compute bienc_v3 serialization flags
bool is_const_set_operation(bm::set_operation op) noexcept
Returns true if set operation is constant (bitcount)
const unsigned short h3f_min1_8bit
min0 is 1 char, 8-bit (0 - 16-bit)
void encode_mins(BOUT &bout, unsigned head_v3, bm::gap_word_t min0, bm::gap_word_t min1) noexcept
save drange compression info
const unsigned char set_block_gap_bienc_v3s
Interpolated GAP array (v3)
const unsigned set_total_blocks
const unsigned char set_nb_sync_mark48
const unsigned char set_block_arrgap_bienc
Interpolated GAP array.
ByteOrder
Byte orders recognized by the library.
const unsigned char set_block_8one
Up to 256 all-set blocks.
const unsigned char set_nb_sync_mark16
const unsigned char set_block_32one
UP to 4G all-set blocks.
const unsigned char set_block_arr_bienc_v3
Interpolated array (v3)
const unsigned short h3f_min1_skip
min1 is 1 char, 8-bit (0 - 16-bit)
const unsigned char set_block_bit_0runs
Bit block with encoded zero intervals.
const unsigned char set_block_xor_ref8_um
block is un-masked XOR of a reference block (8-bit)
unsigned long long bmi_bslr_u64(unsigned long long w) noexcept
const unsigned char set_block_64one
lots of all-set blocks
const unsigned char set_block_gap_bienc
Interpolated GAP block (legacy)
const unsigned char set_block_xor_gap_ref16
..... 16-bit
bool check_block_one(const bm::word_t *blk, bool deep_scan) noexcept
Checks if block has only 1 bits.
const unsigned set_compression_default
Default compression level.
const unsigned char set_block_arrbit
List of bits ON.
const unsigned char set_block_1one
One block all-set (1111...)
void encode_min_max(BOUT &bout, bm::gap_word_t min_v, bm::gap_word_t tail_delta) noexcept
save range compression info [min_v..max_v]
const unsigned char set_block_arrgap_inv
List of bits OFF (GAP block)
void bit_block_change_bc(const bm::word_t *block, unsigned *gc, unsigned *bc) noexcept
void convert_sub_to_arr(const BV &bv, unsigned sb, VECT &vect)
convert sub-blocks to an array of set 1s (32-bit)
const unsigned gap_levels
const unsigned char set_block_gapbit
GAP compressed bitblock.
const unsigned char set_block_arr_bienc_8bh
BIC block 8bit header.
const unsigned char set_sblock_bienc
super-block interpolated list
const unsigned char set_block_xor_chain
XOR chain (composit of sub-blocks)
const unsigned char h3f_ex_upper2
void decode_min_max(BIN &bin, bm::gap_word_t gap_head, unsigned head_v3, bm::gap_word_t &min_v, bm::gap_word_t &max_v) noexcept
const unsigned char set_nb_bookmark32
jump ahead mark (32-bit)
const unsigned sblock_flag_max24
const unsigned set_sub_total_bits
const unsigned gap_len_cut_off_v3
const unsigned set_block_size
unsigned bit_to_gap(gap_word_t *dest, const unsigned *block, unsigned dest_len) noexcept
Convert bit block to GAP representation.
unsigned long long int id64_t
const unsigned block_waves
const unsigned char set_block_xor_ref16
block is masked XOR of a reference block (16-bit)
const unsigned char set_block_arrgap_bienc_inv
Interpolated GAP array (inverted)
const unsigned char set_block_arrgap_egamma
Gamma compressed delta GAP array.
const unsigned gap_equiv_len
const unsigned char set_block_1zero
One all-zero block.
const unsigned sblock_flag_min24
24-bit minv
const unsigned char set_block_end
End of serialization.
void arr_restore_min(T *arr, unsigned arr_len, T min0, T delta_acc=0) noexcept
Restore array using two minimal delta for better BIC compression.
const unsigned gap_max_buff_len
const unsigned char set_block_arrgap_bienc_v2
//!< Interpolated GAP array (v2)
const unsigned char set_block_xor_gap_ref32
..... 32-bit (should never happen)
bm::operation setop2op(bm::set_operation op) noexcept
Convert set operation to operation.
const unsigned char set_block_arrgap
List of bits ON (GAP block)
const unsigned char set_nb_sync_mark64
..... 64-bit (should never happen)
const unsigned char set_nb_sync_mark32
const unsigned char set_block_sgapgap
SGAP compressed GAP block.
const unsigned char h3f_ex_arr_1
const unsigned sparse_max_l5
serialization_header_mask
@ BM_HM_NO_GAPL
no GAP levels
@ BM_HM_ID_LIST
id list stored
@ BM_HM_NO_BO
no byte-order
@ BM_HM_SPARSE
very sparse vector
@ BM_HM_64_BIT
64-bit vector
@ BM_HM_RESIZE
resized vector
@ BM_HM_HXOR
horizontal XOR compression turned ON
const unsigned char set_block_arr_bienc
Interpolated block as int array.
const unsigned char set_block_64zero
lots of zero blocks
const unsigned sblock_flag_dr_min
const unsigned short h3f_exceptions
min0 is not applied
const unsigned char set_block_gap_bienc_v2
Interpolated GAP block (v2)
const unsigned gap_max_safe_len
const unsigned char set_block_xor_ref8
block is masked XOR of a reference block (8-bit)
const unsigned char set_block_gap_egamma
Gamma compressed GAP block.
unsigned short gap_word_t
const unsigned char set_block_32zero
Up to 4G zero blocks.
const unsigned char set_block_8zero
Up to 256 zero blocks.
const unsigned char set_block_xor_ref16_um
block is un-masked XOR of a reference block (16-bit)
const unsigned gap_max_bits
const unsigned short h3f_min0_skip
max_v is 1 char, 8-bit (0 - 16-bit)
const unsigned char set_block_bit_1bit
Bit block with 1 bit ON.
const unsigned char set_block_aone
All other blocks one.
const unsigned char set_nb_bookmark16
jump ahead mark (16-bit)
const unsigned char set_block_gap_bienc_v3
Interpolated GAP array (v3)
const unsigned set_block_shift
const unsigned char set_block_arr_bienc_inv_v3s
Interpolated array (v3)
void decode_mins(BIN &bin, unsigned head_v3, bm::gap_word_t &min0, bm::gap_word_t &min1) noexcept
const unsigned sblock_flag_len16
16-bit len (8-bit by default)
const unsigned set_compression_max
Maximum supported compression level.
const unsigned short h2f_min_v_8bit
bit 0 is reserved (used for head start bit)
const unsigned char set_nb_sync_mark24
const unsigned char set_block_xor_gap_ref8
..... 8-bit
const unsigned sparse_max_l6
const unsigned char set_block_azero
All other blocks zero.
unsigned calc_hist_limit(const unsigned *hist0, const unsigned *hist1, unsigned hist_len, unsigned ex_limit, unsigned *ex_sum) noexcept
claculate the effective exceptions limit using two histograms
const unsigned bits_in_block
const unsigned char h3f_ex_arr_ex_EOC
use gamma code for size
const unsigned tmp_buff_alloc_factor
multiplier for alloc_bit_block()
const unsigned char set_block_16zero
Up to 65536 zero blocks.
const unsigned char set_block_bit
Plain bit block.
const unsigned char set_block_bitgap_bienc_v2
Interpolated bit-block as GAPs (v2 - reseved)
const unsigned char set_nb_bookmark24
jump ahead mark (24-bit)
unsigned long long bmi_blsi_u64(unsigned long long w)
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) noexcept
Internal function computes different distance metrics.
const unsigned sblock_flag_min16
16-bit minv
const unsigned char set_block_arr_bienc_v3s
Interpolated array (v3)
const unsigned char set_block_sgapbit
SGAP compressed bitblock.
void gap_survey(const T *gap_buf, unsigned &gamma_size_bits) noexcept
Compute various GAP encoding characteristics.
const GenericPointer< typename T::ValueType > T2 value
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
std::vector< bm::id64_t > ref_vect
Bit COUNT SUB AB functor.
bm::xor_complement_match match
Basic stats on second level group of blocks.
unsigned avg_gap_len() const noexcept
average length of GAP blocks
void init(void *bv, unsigned i) noexcept
void * bv_ptr
pointer to bit-vector
unsigned top_level_idx
index of the sub-block of blocks
bool is_only_gaps() const noexcept
true if sub contains multiple gap blocks and nothing else
static ByteOrder byte_order()
Bookmark state structure.
unsigned bm_type_
0:32-bit, 1: 24-bit, 2: 16-bit
size_t min_bytes_range_
minumal distance (bytes) between marks
block_idx_type nb_
bookmark block idx
unsigned char * ptr_
bookmark pointer
block_idx_type nb_range_
target bookmark range in blocks
bookmark_state(block_idx_type nb_range) noexcept
Parameters for XOR similarity search.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4