gap_count = 1;
105 unsignedbit_count = 0;
108w = w0 = *block ^ *xor_block;
111 const intw_shift =
int(
sizeof(w) * 8 - 1);
114gap_count -= (w_prev = (w0 >> w_shift));
117 for(++block, ++xor_block; block < block_end; ++block, ++xor_block)
119w = w0 = *block ^ *xor_block;
124gap_count -= !w_prev;
133gap_count -= (w0 >> w_shift);
134gap_count -= !(w_prev ^ w_l);
136w_prev = (w0 >> w_shift);
159 unsignedgap_count = 1;
160 unsignedbit_count = 0;
166w = w0 = *block ^ *xor_block;
169 const intw_shift =
int(
sizeof(w) * 8 - 1);
172gap_count -= unsigned(w_prev = (w0 >> w_shift));
175 for(++block, ++xor_block; block < block_end; ++block, ++xor_block)
177w = w0 = *block ^ *xor_block;
182gap_count -= !w_prev;
190gap_count -= unsigned(w0 >> w_shift);
191gap_count -= !(w_prev ^ w_l);
192w_prev = (w0 >> w_shift);
213 #ifdef VECT_BLOCK_XOR_CHANGE 325 template<
typenamePVT,
typenameVT>
326 typenameVT::size_type
329 typenameVT::size_type best_ref_idx,
334match_pairs_vect.resize(0);
340 typenameVT::size_type sz = match_vect.size();
341 for(
typenameVT::size_type
i= 0; (
i< sz) && (d64_acc != ~0ull); ++
i)
344 if(xmd.
ref_idx== best_ref_idx)
360 const unsignedmin_gain_cut_off = 50;
361 for(
typenameVT::size_type
i= 0; (
i< sz) && (d64_acc != ~0ull); ++
i)
372 if(xmd.
gc_gain> min_gain_cut_off)
373d64_new = ~d64_acc & xmd.
gc_d64;
376 if(xmd.
bc_gain> min_gain_cut_off)
377d64_new = ~d64_acc & xmd.
bc_d64;
380 if(xmd.
ibc_gain> min_gain_cut_off)
381d64_new = ~d64_acc & xmd.
ibc_d64;
395 returnmatch_pairs_vect.size();
403 template<
typenameBMChain,
typenameRVect>
407 for(
size_t i= 0;
i< mchain.chain_size; ++
i)
443 #if defined(VECT_BLOCK_CHANGE) 455x_descr.sb_bc[
i] = (
unsignedshort) bc;
464x_descr.sb_gc[
i] = (
unsignedshort) gc;
493 #ifdef VECT_BIT_BLOCK_XOR 508 for( ;sub_block < sub_block_end; )
510t_sub_block[0] = sub_block[0] ^ xor_sub_block[0];
511t_sub_block[1] = sub_block[1] ^ xor_sub_block[1];
512t_sub_block[2] = sub_block[2] ^ xor_sub_block[2];
513t_sub_block[3] = sub_block[3] ^ xor_sub_block[3];
514t_sub_block+=4; sub_block+=4; xor_sub_block+=4;
519 for(; sub_block < sub_block_end; t_sub_block+=4, sub_block+=4)
521t_sub_block[0] = sub_block[0];
522t_sub_block[1] = sub_block[1];
523t_sub_block[2] = sub_block[2];
524t_sub_block[3] = sub_block[3];
529 bm::word_t* t_sub_block = target_block + off;
533 const bm::word_t* xor_sub_block = xor_block + off;
534 for(; sub_block < sub_block_end; )
536t_sub_block[0] = sub_block[0] ^ xor_sub_block[0];
537t_sub_block[1] = sub_block[1] ^ xor_sub_block[1];
538t_sub_block[2] = sub_block[2] ^ xor_sub_block[2];
539t_sub_block[3] = sub_block[3] ^ xor_sub_block[3];
540t_sub_block+=4; sub_block+=4; xor_sub_block+=4;
545 for(; sub_block < sub_block_end; t_sub_block+=4, sub_block+=4)
547t_sub_block[0] = sub_block[0];
548t_sub_block[1] = sub_block[1];
549t_sub_block[2] = sub_block[2];
550t_sub_block[3] = sub_block[3];
576 #ifdef VECT_BIT_BLOCK_XOR_2WAY 589 for(; t_sub_block < t_sub_block_end; t_sub_block+=4, xor_sub_block+=4)
591t_sub_block[0] ^= xor_sub_block[0];
592t_sub_block[1] ^= xor_sub_block[1];
593t_sub_block[2] ^= xor_sub_block[2];
594t_sub_block[3] ^= xor_sub_block[3];
597 const bm::word_t* xor_sub_block = xor_block + off;
598 bm::word_t* t_sub_block = target_block + off;
600 for(; t_sub_block < t_sub_block_end; t_sub_block+=4, xor_sub_block+=4)
602t_sub_block[0] ^= xor_sub_block[0];
603t_sub_block[1] ^= xor_sub_block[1];
604t_sub_block[2] ^= xor_sub_block[2];
605t_sub_block[3] ^= xor_sub_block[3];
622 template<
typenameBV>
707bv_blocks.optimize(tb);
715 template<
classBMATR>
725 template<
typenameBMATR>
789 template<
typenameBV>
818 template<
typenameBV>
874 unsigned i,
unsignedj,
976{
return ref_vect_->
get_bv(ri)->get_blocks_manager().get_block_ptr(
i, j); }
1044template<typename BV>
1056 template<
typenameBV>
1069 template<
typenameBV>
1079xmd.xor_gc = xmd.xor_bc = 0;
1087 const bm::word_t* xor_sub_block = xor_block + off;
1089 unsignedxor_gc, xor_bc;
1093x_descr.sb_xor_bc[
i] = (
unsignedshort)xor_bc;
1097 bm::word_tw_l = (sub_block[-1] ^ xor_sub_block[-1]);
1098 bm::word_tw_r = (sub_block[0] ^ xor_sub_block[0]) & 1;
1100xor_gc -= (w_l == w_r);
1102x_descr.sb_xor_gc[
i] = (
unsignedshort)xor_gc;
1104xmd.xor_bc += xor_bc;
1105xmd.xor_gc += xor_gc;
1111 unsignedblock_gc_gain(0), block_bc_gain(0), block_ibc_gain(0);
1112 bm::id64_tgc_digest(0), bc_digest(0), ibc_digest(0);
1121 unsignedxor_gc = x_descr.sb_xor_gc[
i];
1125block_gc_gain += x_descr.sb_gc[
i];
1127 else if(xor_gc < x_descr.sb_gc[
i])
1130block_gc_gain += (x_descr.sb_gc[
i] - xor_gc);
1132 unsignedxor_bc = x_descr.sb_xor_bc[
i];
1133 if(xor_bc < x_descr.sb_bc[
i])
1136block_bc_gain += (x_descr.sb_bc[
i] - xor_bc);
1138 unsignedxor_ibc = wave_max_bits - xor_bc;
1139 unsignedwave_ibc = wave_max_bits - x_descr.sb_bc[
i];
1140 if(xor_ibc < wave_ibc)
1142ibc_digest |= dmask;
1143block_ibc_gain += (wave_ibc - xor_ibc);
1151xmd.gc_d64 = gc_digest;
1152xmd.bc_d64 = bc_digest;
1153xmd.ibc_d64 = ibc_digest;
1155xmd.gc_gain = block_gc_gain;
1156xmd.bc_gain = block_bc_gain;
1157xmd.ibc_gain = block_ibc_gain;
1162 if(!(block_gc_gain | block_bc_gain | block_ibc_gain))
1176xmd.block_gain = 0; xmd.xor_d64 = 0;
1192xmd.match_type =
best_metric(
unsigned(new_bc),
unsigned(new_gc), &best_m);
1193 switch(xmd.match_type)
1196 if(new_ibc < new_gc)
1198xmd.block_gain = block_ibc_gain; xmd.xor_d64 = ibc_digest;
1202xmd.block_gain = block_gc_gain; xmd.xor_d64 = gc_digest;
1206 if(new_ibc < new_bc)
1208xmd.block_gain = block_ibc_gain; xmd.xor_d64 = ibc_digest;
1212xmd.block_gain = block_bc_gain; xmd.xor_d64 = bc_digest;
1216xmd.block_gain = block_ibc_gain; xmd.xor_d64 = ibc_digest;
1225 if(block_gc_gain >= block_bc_gain && block_gc_gain >= block_ibc_gain)
1227xmd.block_gain = block_gc_gain; xmd.xor_d64 = gc_digest;
1230 if(block_bc_gain > block_gc_gain && block_bc_gain > block_ibc_gain)
1232xmd.block_gain = block_bc_gain; xmd.xor_d64 = bc_digest;
1236xmd.block_gain = block_ibc_gain; xmd.xor_d64 = ibc_digest;
1244 template<
typenameBV>
1250 unsigned i,
unsignedj,
1265 unsignedbest_block_gain = 0;
1303 unsignedgc_diff = r_gc - s_gc;
1304 if(gc_diff >= s_gc)
1358 const floatbie_bits_per_int = 3.0f;
1359 const unsignedbie_limit =
1362 unsignedxor_bc, xor_gc;
1403 unsignedgain_min = unsigned(
sizeof(
char) +
sizeof(
unsigned));
1405gain_min += (unsigned)
sizeof(
bm::id64_t);
1412 if(gain > gain_min)
1424 template<
typenameBV>
1439 template<
typenameBV>
1453 template<
typenameBV>
1467 typenamebvector_type::enumerator en(sim_model.
bv_blocks);
1468 for(
size_typecol = 0; en.valid(); ++en, ++col)
1478 template<
typenameBV>
1487 const floatbie_bits_per_int = 3.0f;
1488 const unsignedbie_limit =
1537 if(d64 && d64 != ~0ULL)
1541 switch(bmc.
match)
1552 unsignedxor_bc, xor_gc;
1564 for(
size_typek = 0; k < chain_size; ++k)
1576bmc.
ref_idx[0] = unsigned(ridx);
1580bmc.
ref_idx[0] = unsigned(ridx);
1587 autosz = pm_vect.
size();
1606 template<
typenameBV>
1620 autosz = pm_vect.size();
1639 template<
typenameBV>
1674 template<
typenameBV>
1678 for(
size_t i= 0;
i< sz; ++
i)
1682 alloc_.free_bit_block(blk);
1689 template<
typenameBV>
1711t_block =
alloc_.alloc_bit_block();
1731 template<
typenameBV>
ncbi::TMaskedQueryRegions mask
#define BM_DECLARE_TEMP_BLOCK(x)
#define VECT_BIT_BLOCK_XOR(t, src, src_xor, d)
#define VECT_BLOCK_CHANGE(block, size)
#define VECT_BIT_BLOCK_XOR_2WAY(t, src_xor, d)
#define VECT_BLOCK_XOR_CHANGE(block, xor_block, size, gc, bc)
#define IS_VALID_ADDR(addr)
Bit manipulation primitives (internal)
List of reference bit-vectors with their true index associations.
const bvector_type * get_bv(size_type idx) const noexcept
Get reference vector by the index in this ref-vector.
bvector_type::size_type size_type
void add_vectors(const BMATR &bmatr)
Append basic bit-matrix to the list of reference vectors.
bm::block_match_chain< size_type > block_match_chain_type
void fill_alloc_digest(bvector_type &bv_blocks) const
Fill block allocation digest for all vectors in the reference collection.
bm::heap_vector< std::size_t, bv_allocator_type, true > bv_plane_vector_type
static size_type not_found() noexcept
not-found value for find methods
bvector_type * bvector_type_ptr
bool build_nb_digest_and_xor_matrix(matrix_chain_type &matr, bvector_type &bv_blocks) const
Calculate blocks digest and resize XOR distance matrix based on total number of available blocks.
size_type find(std::size_t ref_idx) const noexcept
Find vector index by the reference index.
size_type find_bv(const bvector_type *bv) const noexcept
Find vector index by the pointer.
const bvector_type * bvector_type_const_ptr
void add(const bvector_type *bv, size_type ref_idx)
Add reference vector.
void reset()
reset the collection (resize(0))
void build(const BMATR &bmatr)
Reset and build vector of references from a basic bit-matrix all NULL rows are skipped,...
bm::dynamic_heap_matrix< block_match_chain_type, bv_allocator_type > matrix_chain_type
bm::heap_vector< bvector_type_const_ptr, bv_allocator_type, true > bvptr_vector_type
size_type size() const noexcept
Get reference list size.
size_type get_row_idx(size_type idx) const noexcept
Get reference row index by the index in this ref-vector.
void resize_xor_matrix(matrix_chain_type &matr, size_type total_blocks) const
Utility function to resize matrix based on number of vectors and blocks.
bvector_type::allocator_type bv_allocator_type
void add_sparse_vector(const SV &sv)
Add bit-transposed sparse vector as a bit-matrix.
bv_plane_vector_type ref_bvects_rows_
reference vector row idxs
unsigned rows_acc_
total rows accumulator
bvptr_vector_type ref_bvects_
reference vector pointers
const value_type * row(size_type row_idx) const noexcept
size_type cols() const noexcept
void resize(size_type rows_in, size_type cols_in, bool copy_content=true)
value_type * data() const noexcept
void resize(size_type new_size)
vector resize
value_type & at(size_type pos)
size_type size() const noexcept
void push_back(const value_type &v)
push new element to the back of the vector
buffer_type::size_type size_type
XOR scanner to search for complement-similarities in collections of bit-vectors.
const bv_ref_vector_type * ref_vect_
ref.vect for XOR filter
bm::bv_ref_vector< BV > bv_ref_vector_type
bm::heap_vector< unsigned, bv_allocator_type, true > bv_bcgc_vector_type
bm::id64_t get_xor_digest() const noexcept
void sync_nb_vect()
Sync TEMP vector size.
const bm::word_t * get_ref_block(size_type ri, unsigned i, unsigned j) const noexcept
Return block from the reference vector [vect_idx, block_i, block_j].
bvector_type::size_type size_type
bm::xor_complement_match search_best_xor_mask(const bm::word_t *s_block, size_type ri, size_type ridx_from, size_type ridx_to, unsigned i, unsigned j, bm::word_t *tx_block, const bm::xor_sim_params ¶ms)
Scan for all candidate bit-blocks to find mask or match.
bm::block_waves_xor_descr & get_descr() noexcept
bv_bcgc_vector_type nb_bc_vect_
bool validate_xor(const bm::word_t *xor_block) const noexcept
Check if XOR transform simplified block enough for compressibility objective.
bool compute_sim_model(xor_sim_model< BV > &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params ¶ms)
Calculate matrix of best XOR match metrics per block for the attached collection of bit-vectors.
size_type found_ridx_
match vector (in references)
const bm::word_t * found_block_xor_
void apply_xor_match_vector(bm::word_t *target_xor_block, const bm::word_t *s_block, size_type s_ri, const match_pairs_vector_type &pm_vect, unsigned i, unsigned j) const noexcept
XOR all match blocks to target using their digest masks.
bm::bit_block_t xor_tmp_block_
bv_xdescr_vector_type nb_xdescr_vect_
bm::id64_t x_d64_
search digest
size_type found_ridx() const noexcept
void compute_xor_complexity_descr(const bm::word_t *block, bm::id64_t block_d64, const bm::word_t *xor_block, bm::block_waves_xor_descr &x_descr, bm::block_xor_match_descr &xmd) const noexcept
Compute reference complexity descriptor based on XOR vector.
unsigned s_block_best_metric_
s-block orig.metric
match_pairs_vector_type & get_match_pairs() noexcept
bv_bcgc_vector_type nb_gc_vect_
size_type refine_match_chain()
Run a search to add possible XOR match chain additions.
match_pairs_vector_type chain_match_vect_
refined match pairs
bv_blocks_vector_type nb_blocks_vect_
pointers to temp blocks
bvector_type::allocator_type bv_allocator_type
xor_matches_vector_type match_vect_
vector of match descr
bm::block_waves_xor_descr x_descr_
XOR desriptor.
unsigned get_s_bc() const noexcept
bv_allocator_type alloc_
allocator to produce blocks
bv_ref_vector_type::matrix_chain_type matrix_chain_type
void deoptimize_gap_blocks(size_type nb, const xor_sim_params ¶ms)
Deoptimize vertical slice of GAP blocks.
const bm::word_t * get_found_block() const noexcept
void free_blocks() noexcept
Free the collection of temp blocks.
bm::heap_vector< bm::block_xor_match_descr, bv_allocator_type, true > xor_matches_vector_type
void compute_sim_model(typename xor_sim_model< BV >::matrix_chain_type &sim_model_matr, size_type nb, size_type nb_rank, const bm::xor_sim_params ¶ms)
Compute similarity model for block.
xor_matches_vector_type & get_match_vector() noexcept
bm::xor_complement_match x_block_mtype_
metric type
bm::heap_vector< bm::block_waves_xor_descr, bv_allocator_type, true > bv_xdescr_vector_type
void compute_s_block_stats(const bm::word_t *block) noexcept
Compute statistics for the r-(or s-) block.
void get_s_block_stats(size_type ri) noexcept
Get statistics for the r-(or s-) block.
void set_ref_vector(const bv_ref_vector_type *ref_vect) noexcept
bm::xor_complement_match get_best_match_type() const noexcept
Return best match type of a found block.
static bm::xor_complement_match best_metric(unsigned bc, unsigned gc, unsigned *best_metric) noexcept
const bv_ref_vector_type & get_ref_vector() const noexcept
unsigned get_s_block_best() const noexcept
unsigned get_x_best_metric() const noexcept
bool compute_sim_model(xor_sim_model< BV > &sim_model, const bm::xor_sim_params ¶ms)
Calculate matrix of best XOR match metrics per block for the attached collection of bit-vectors.
unsigned x_best_metric_
min(gc, bc, ibc)
bm::heap_vector< bm::match_pair, bv_allocator_type, true > match_pairs_vector_type
bm::heap_vector< bm::word_t *, bv_allocator_type, true > bv_blocks_vector_type
unsigned get_s_gc() const noexcept
static unsigned char depth[2 *(256+1+29)+1]
unsigned word_bitcount64(bm::id64_t x) noexcept
bm::id64_t bit_block_xor(bm::word_t *dst, const bm::word_t *src) noexcept
Plain bitblock XOR operation. Function does not analyse availability of source and destination blocks...
bm::id_t word_bitcount(bm::id_t w) noexcept
void bit_block_xor_change64(const bm::word_t *s_block, const bm::word_t *ref_block, unsigned size, unsigned *gc, unsigned *bc) noexcept
void bit_block_xor_change32(const bm::word_t *block, const bm::word_t *xor_block, unsigned size, unsigned *gc, unsigned *bc) noexcept
bm::id64_t calc_block_digest0(const bm::word_t *const block) noexcept
Compute digest for 64 non-zero areas.
unsigned bit_count_min_unroll(const bm::word_t *block, const bm::word_t *block_end) noexcept
Bitcount for bit block without agressive unrolling.
void gap_convert_to_bitset(unsigned *dest, const T *buf, unsigned len=0) noexcept
GAP block to bitblock conversion.
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
unsigned int
A callback function used to compare two keys in a database.
xor_complement_match
XOR complementarity type between 2 blocks.
const unsigned set_block_digest_wave_size
unsigned char check_pair_vect_vbr(const BMChain &mchain, const RVect &ref_vect)
Check effective bit-rate for the XOR encode vector.
void compute_s_block_descr(const bm::word_t *block, block_waves_xor_descr &x_descr, unsigned *s_gc, unsigned *s_bc) noexcept
Compute reference (non-XOR) 64-dim complexity descriptor for the s-block.
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) noexcept
Recalc linear bvector block index into 2D matrix coordinates.
unsigned bit_block_change64(const bm::word_t *in_block, unsigned size) noexcept
VT::size_type greedy_refine_match_vector(PVT &match_pairs_vect, VT &match_vect, typename VT::size_type best_ref_idx, bm::id64_t d64, bm::xor_complement_match match_type)
Greedy algorithm to find additional matches improving the inital best match block on its match type.
unsigned bit_block_change32(const bm::word_t *block, unsigned size) noexcept
unsigned long long bmi_bslr_u64(unsigned long long w) noexcept
void bit_block_change_bc(const bm::word_t *block, unsigned *gc, unsigned *bc) noexcept
unsigned long long int id64_t
const unsigned block_waves
bool block_find_first_diff(const bm::word_t *blk, const bm::word_t *arg_blk, unsigned *pos) noexcept
Find first bit which is different between two blocks (GAP or bit)
unsigned short gap_word_t
void bit_block_xor_change(const bm::word_t *block, const bm::word_t *xor_block, unsigned size, unsigned *gc, unsigned *bc) noexcept
bm::id_t bvector_size_type
const unsigned gap_max_bits
unsigned long long bmi_blsi_u64(unsigned long long w)
const struct ncbi::grid::netcache::search::fields::SIZE size
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
std::vector< bm::id64_t > ref_vect
bool operator==(const block_match_chain &bmc) const noexcept
bm::xor_complement_match match
Structure to compute XOR gap-count profile by sub-block waves.
unsigned short sb_bc[bm::block_waves]
BIT counts.
unsigned short sb_gc[bm::block_waves]
GAP counts.
unsigned short sb_xor_gc[bm::block_waves]
XOR-mask GAP count.
unsigned short sb_xor_bc[bm::block_waves]
XOR-mask GAP count.
Capture the XOR filter results (xor block against ref.block)
size_type ref_idx
reference vector index
bm::id64_t xor_d64
recorded digest
bvector_size_type size_type
unsigned block_gain
XOR filter improvement (best)
bm::xor_complement_match match_type
match type
bvector_size_type ref_idx
reference vector index
bm::id64_t xor_d64
recorded digest
match_pair(bvector_size_type idx, bm::id64_t d64)
bm::dynamic_heap_matrix< block_match_chain_type, bv_allocator_type > matrix_chain_type
bm::block_match_chain< size_type > block_match_chain_type
bvector_type::size_type size_type
matrix_chain_type matr
model matrix
bvector_type bv_blocks
blocks digest
bvector_type::allocator_type bv_allocator_type
Parameters for XOR similarity search.
unsigned min_lookup_depth
unsigned max_lookup_depth
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4