<
classTOut>
55 unsigned len= (*gap_buf >> 3);
56tout <<
"["<< *gap_buf <<
" len="<<
len<<
"] ";
57 for(
unsigned i= 0;
i<
len; ++
i)
60tout << *gap_buf <<
"; ";
65 template<
classTOut>
69 unsigned len= gap_len ? gap_len : (*gap_buf >> 3);
70tout <<
"[" " len="<<
len<<
"] ";
71 unsigned i= gap_len ? 0 : 1;
72 for(;
i<
len; ++
i)
74tout << gap_buf[
i] <<
"; ";
82 while( (
value>>
l) > 1 ) ++
l;
86 template<
classTOut>
94 for(
unsigned i= 0;
i<
l; ++
i)
100 for(
unsigned i= 0;
i<
l; ++
i)
112 template<
typenameTOut>
116 unsigned len= gap_len ? gap_len : (*gap_buf >> 3);
117tout <<
"[" " len="<<
len<<
"] ";
118 unsigned i= gap_len ? 0 : 1;
119 for(;
i<
len; ++
i)
121 unsignedv = gap_buf[
i];
127tout <<
" gamma_bits="<< total <<
" src_bits ="<<
len* 16;
142std::ifstream fin(fname.c_str(),
std::ios::in| std::ios::binary);
147fin.seekg(0, std::ios::end);
148fsize = (size_t)fin.tellg();
156fin.seekg(0, std::ios::beg);
157fin.read((
char*) &
data[0], std::streamsize(fsize));
166 template<
classTBV>
169std::ifstream bv_file (fname,
std::ios::in| std::ios::binary);
172std::cerr <<
"Cannot open file: "<< fname << std::endl;
175bv_file.seekg(0, std::ios_base::end);
176 unsignedlength = (unsigned)bv_file.tellg();
179std::cerr <<
"Empty file:"<< fname << std::endl;
185bv_file.seekg(0, std::ios::beg);
187 char*
buffer=
new char[length];
189bv_file.read(
buffer, length);
196 template<
classTBV>
199std::ofstream bfile (fname,
std::ios::out| std::ios::binary);
202std::cerr <<
"Cannot open file: "<< fname << std::endl;
205 typenameTBV::statistics st1;
218 autoblob_size = sermem_buf.
size();
220bfile.write((
char*)sermem_buf.
buf(), std::streamsize(blob_size));
225 void SaveBlob(
const char* name_prefix,
unsignednum,
const char* ext,
226 const unsigned char* blob,
size_tblob_size)
228std::stringstream fname_str;
229fname_str << name_prefix <<
"-"<< num << ext;
232 const char* fname = s.c_str();
233std::ofstream bfile (fname,
std::ios::out| std::ios::binary);
236std::cerr <<
"Cannot open file: "<< fname << std::endl;
239bfile.write((
char*)blob, std::streamsize(blob_size));
244 template<
typenameV,
typenameTOut>
247 for(
unsigned i= 0;
i<
sizeof(V)*8;
i++)
249tout << (unsigned)((
val>>
i) & 1);
250 if(
i== 15 && (
sizeof(V)*8 > 16)) tout <<
"-";
254 template<
typenameTOut>
260 template<
typenameTOut>
266 const unsigned*
row= distance[
i];
270tout << std::setw(4) << std::setfill(
'0') <<
row[j] <<
" ";
276 template<
typenameTM,
typenameTOut>
284 if(
i< 10) tout <<
" ";
285 for(
unsignedj = 0; j <
columns; ++j)
289tout << std::setw(4) << std::setfill(
'0') <<
row[j] <<
" ";
307 unsignedbit_idx = 0;
316 value|= (1 << bit_idx);
322 if(bit_idx ==
sizeof(
unsigned) * 8)
328 template<
classBV,
typenameTOut>
331 const unsignedsz = 128000;
332 unsigned* bc_arr =
new unsigned[sz];
333 for(
unsignedx = 0; x < sz; ++x) bc_arr[x] = 0;
336 unsignedlast_block = bv.count_blocks(bc_arr);
339 for(
unsigned i= 0;
i<= last_block; ++
i)
344 for(;
i<= last_block; ++
i)
346tout << std::setw(5) << std::setfill(
'0') << bc_arr[
i] <<
" ";
348 if(++j == 10)
break;
350tout <<
" | "<< sum << std::endl;
352tout <<
"Total="<< sum << std::endl;
356 template<
typenameTOut>
359 static unsignedsum = 0;
360 static unsignedrow_idx = 0;
361 static unsigned prev= 0;
379tout << std::setw(5) << std::setfill(
'0') <<
count<<
" ";
386tout <<
" | "<< sum << std::endl;
390 template<
classBV,
typenameTOut>
393 typenameBV::statistics
st;
399 autossize =
buf.size();
401tout <<
" - Blocks: [ " 402<<
"B:"<<
st.bit_blocks
403<<
", G:"<<
st.gap_blocks <<
"] " 405<<
", mem = "<<
st.memory_used <<
" "<< (
st.memory_used / (1024 * 1024)) <<
"MB " 406<<
", max smem:"<<
st.max_serialize_mem <<
" "<< (
st.max_serialize_mem / (1024 * 1024)) <<
"MB " 407<<
" compressed = "<< ssize <<
" "<< (ssize / (1024 * 1024)) <<
"MB " 413 template<
classBV,
typenameTOut>
416 const typenameBV::blocks_manager_type& bman = bv.get_blocks_manager();
421 inttotal_gap_eff = 0;
428 typenameBV::block_idx_type nb;
429 typenameBV::block_idx_type nb_prev = 0;
430 for(nb = 0; nb <
blocks; ++nb)
434 const bm::word_t* blk = bman.get_block(i0, j0);
443tout <<
"[Alert!"<< nb <<
"]";
447 typenameBV::block_idx_type start = nb;
451blk = bman.get_block(i0, j0);
456tout <<
"[Alert!"<< nb <<
"]";
470tout <<
"{F."<< start <<
":"<< nb <<
"}";
475 if((nb-1) != nb_prev)
477tout <<
".."<< (size_t)nb-nb_prev <<
"..";
487 unsignedraw_size=bc*2;
488 unsignedcmr_len=
len*2;
489 size_tmem_eff = raw_size - cmr_len;
490total_gap_eff += unsigned(mem_eff);
494tout <<
" [GAP "<< nb <<
"("<<
i<<
","<< j <<
")" 495<<
"="<< bc <<
":"<< level <<
"-L"<<
len<<
"("<< mem_eff <<
")]";
505zw += (blk[
i] == 0);
509tout <<
" (BIT "<< nb <<
"="<< bc <<
"["<< zw <<
"])";
520tout << std::endl <<
"gap_efficiency="<< total_gap_eff << std::endl;
528 unsigned char*
buf= 0;
529 typenameBV::size_type blob_size = 0;
535 typenameBV::statistics
st;
538 buf=
new unsigned char[
st.max_serialize_mem];
539blob_size = (unsigned)bvs.
serialize(bv, (
unsigned char*)
buf,
st.max_serialize_mem);
552 template<
classSV,
typenameTOut>
553 voidprint_svector_xor_stat(TOut& toutconst SV& sv)
556 typenameSV::size_type sz = sv.size();
561 for(
typenameSV::size_type nb = 0; nb < nb_max; ++nb)
563tout <<
"nb = "<< nb << std::endl;
568 autoplanes = sv.planes();
569 for(
unsigned i= 0;
i< planes; ++
i)
571 const typenameSV::bvector_type* bv = sv.get_plane(
i);
574 const typenameSV::bvector_type::blocks_manager_type& bman = bv->get_blocks_manager();
575 const bm::word_t* block = bman.get_block_ptr(i0, j0);
581bm::compute_complexity_descr(block, x_descr);
583bm::bit_block_change_bc32(block, &gc, &bc);
584 unsignedbest_metric, block_metric;
585block_metric = best_metric = gc < bc ? gc : bc;
587 boolkb_found =
false;
589 for(
unsignedk =
i+ 1; k < planes; ++k)
591 const typenameSV::bvector_type* bv_x = sv.get_plane(
i);
594 const typenameSV::bvector_type::blocks_manager_type& bman_x = bv_x->get_blocks_manager();
595 const bm::word_t* block_x = bman_x.get_block_ptr(i0, j0);
601bm::compute_xor_complexity_descr(block, block_x, x_descr);
604bm::bit_block_xor_product(tb, block, block_x, kb_d64);
605 unsignedkb_bc, kb_gc;
606bm::bit_block_change_bc32(tb, &kb_gc, &kb_bc);
626tout <<
"XOR match "<<
"metric gain = "<< std::endl;
635 template<
classSV,
typenameTOut>
638 typedef typenameSV::bvector_type bvector_type;
648 typenamebvector_type::size_type cnt_and = dmit->
result;
651 typenamebvector_type::size_type cnt_or = dmit->
result;
652 if(cnt_and == 0 || cnt_or == 0)
658d = double(cnt_and) / double(cnt_or);
660 unsignedres = unsigned(d * 100);
661 if(res > 100) res = 100;
669similarity_batch_type sbatch;
679 typenamesimilarity_batch_type::vector_type& sim_vec = sbatch.descr_vect_;
682 for(
size_tk = 0; k < sim_vec.size(); ++k)
684 unsignedsim = sim_vec[k].similarity();
687 const typenameSV::bvector_type* bv1 = sim_vec[k].get_first();
688 const typenameSV::bvector_type* bv2 = sim_vec[k].get_second();
692 typenameSV::bvector_type bvx(*bv2);
696 if(bv_size_x < bv_size2)
698 size_tdiff = bv_size2 - bv_size_x;
701 size_tsz10p = bv_size2 / 10;
704tout <<
"["<< sim_vec[k].get_first_idx()
705<<
", "<< sim_vec[k].get_second_idx()
707<<
" size("<< sim_vec[k].get_second_idx() <<
")=" 709<<
" size(x)="<< bv_size_x
719 typenameSV::statistics
st;
720svect.calc_stat(&
st);
722tout <<
"size = "<< svect.size() << std::endl;
724tout <<
"Bit blocks: "<<
st.bit_blocks << std::endl;
725tout <<
"GAP blocks: "<<
st.gap_blocks << std::endl;
726tout <<
"GAP levels counts:";
731 case0: tout <<
"[ I: "<<
st.gap_levels[
g] <<
"] ";
break;
732 case1: tout <<
"[ II: "<<
st.gap_levels[
g] <<
"] ";
break;
733 case2: tout <<
"[ III:"<<
st.gap_levels[
g] <<
"] ";
break;
734 case3: tout <<
"[ IV: "<<
st.gap_levels[
g] <<
"] ";
break;
736tout <<
"[ "<<
g<<
": "<<
st.gap_levels[
g] <<
"] ";
break;
741tout <<
"Max serialize mem:"<<
st.max_serialize_mem <<
" " 742<< (
st.max_serialize_mem / (1024 * 1024)) <<
"MB"<< std::endl;
743tout <<
"Memory used: "<<
st.memory_used <<
" " 744<< (
st.memory_used / (1024 * 1024)) <<
"MB"<< std::endl;
746 autoeff_max_element = svect.effective_vector_max();
747 size_tstd_vect_size =
sizeof(
typename SV::value_type) * svect.size() * eff_max_element;
748tout <<
"Projected mem usage for vector<value_type>:" 749<< std_vect_size <<
" " 750<< std_vect_size / (1024 * 1024) <<
"MB" 752 if(
sizeof(
typename SV::value_type) > 4 && (eff_max_element == 1))
754tout <<
"Projected mem usage for vector<long long>:" 755<<
sizeof(
longlong) * svect.size() << std::endl;
758tout <<
"\nplanes:"<< std::endl;
760 size_tssize(0), octet_ssize(0);
762 typenameSV::bvector_type bv_join;
763 autoplanes = svect.get_bmatrix().rows();
765 unsignedoctet_cnt(0), octet(0);
766 for(
unsigned i= 0;
i< planes; ++
i)
768 const typenameSV::bvector_type* bv_plane = svect.get_slice(
i);
769tout <<
i<<
"-"<< octet_cnt <<
":";
773 boolany_else =
false;
774 for(
unsignedj =
i+1; j < planes; ++j)
776 if(svect.get_slice(j))
787bv_join |= *bv_plane;
790octet_ssize += pssize;
794tout <<
"--------------------"<< std::endl;
795tout <<
"octet N = "<< octet <<
796 " compressed = "<< octet_ssize <<
797 " "<< octet_ssize/(1024*1024) <<
"MB"<< std::endl;
798octet_cnt = 0; octet_ssize = 0;
807tout <<
"-------------------- END of OCTETS\n";
809 const typenameSV::bvector_type* bv_null = svect.get_null_bvector();
812tout <<
"NULL plane:\n";
814 typenameSV::size_type not_null_cnt = bv_null->count();
815tout <<
" - Bitcount: "<< not_null_cnt << std::endl;
817tout <<
"Projected mem usage for std::vector<pair<unsigned, value_type> >:" 818<< ((
sizeof(
typename SV::value_type) +
sizeof(
unsigned)) * not_null_cnt) <<
" " 819<< ((
sizeof(
typename SV::value_type) +
sizeof(
unsigned)) * not_null_cnt) / (1024 * 1024) <<
"MB" 824tout <<
"NO NULL plane:\n";
827tout <<
" Total serialized size (planes): "<< ssize
829<<
" "<< ssize / (1024 * 1024) <<
" MB"<< std::endl;
834 doublefr = double(bv_join_cnt) / double (svect.size());
835tout <<
"Non-zero elements: "<< bv_join_cnt <<
" " 838 size_tnon_zero_mem = size_t(bv_join_cnt) *
sizeof(
typename SV::value_type);
839tout <<
"Projected mem usage for non-zero elements: "<< non_zero_mem <<
" " 840<< non_zero_mem / (1024*1024) <<
" MB" 846 template<
classSV,
typenameTOut>
849 typenameSV::octet_freq_matrix_type octet_stat_matr;
851str_svect.calc_octet_stat(octet_stat_matr);
853 for(
unsigned i= 0;
i< octet_stat_matr.rows(); ++
i)
856= octet_stat_matr.row(
i);
858 for(
unsignedj = 0; j < octet_stat_matr.cols(); ++j)
871 for(
unsignedj = 0; j < octet_stat_matr.cols(); ++j)
881tout <<
"\t total= "<<
cnt;
885tout <<
" (empty) ";
893 template<
classVECT>
896std::ofstream fout(fname.c_str(), std::ios::binary);
899 size_tsz = vect.size();
900fout.write((
char*)&sz,
sizeof(sz));
905fout.write((
char*)vect.data(),
916 template<
classVECT>
919std::ifstream fin(fname.c_str(),
std::ios::in| std::ios::binary);
923fin.read((
char*) &sz,
sizeof(sz));
941 template<
classCBC>
945 typenameCBC::buffer_type sbuf;
949std::ofstream fout(fname.c_str(), std::ios::binary);
954 const char*
buf= (
char*)sbuf.buf();
955fout.write(
buf, sbuf.size());
965*blob_size = sbuf.size();
972 template<
classCBC>
975std::vector<unsigned char>
buffer;
988 const unsigned char*
buf= &
buffer[0];
1000 template<
classSV>
1002 size_t* sv_blob_size=0,
booluse_xor =
true)
1012std::ofstream fout(fname.c_str(), std::ios::binary);
1017 const char*
buf= (
char*)sv_lay.
buf();
1018 autosz = sv_lay.
size();
1019fout.write(
buf, std::streamsize(sz));
1029*sv_blob_size = sv_lay.
size();
1034 template<
classSV>
1037std::vector<unsigned char>
buffer;
1045 if(
buffer.size() == 0)
1050 const unsigned char*
buf= &
buffer[0];
1066 template<
classSV,
classV>
1069 if(sv.size() != vect.size())
1073 for(
size_t i= 0;
i< vect.size(); ++
i)
1075 unsignedv1 = sv[(unsigned)
i];
1076 unsigned v2= vect[
i];
1084 template<
classSV,
classBV>
1087 typenameSV::back_insert_iterator bit = sv.get_back_inserter();
1088 typenameBV::enumerator en = bv.first();
1089 for(; en.valid(); ++en)
1091 autov = en.value();
1102 size_tgetCurrentRSS( )
1106 if( (
fp= fopen(
"/proc/self/statm",
"r")) ==
NULL)
1107 return(
size_t)0L;
1108 if( fscanf(
fp,
"%*s%ld", &rss ) != 1 )
1111 return(
size_t)0L;
1114 return(
size_t)rss * (size_t)sysconf( _SC_PAGESIZE);
1119 template<
classBV,
typenameTOut>
1122tout << bv.count() <<
": ";
1123 typenameBV::enumerator en = bv.first();
1124 for(; en.valid(); ++en)
1125tout << *en <<
", ";
1135 #pragma warning( pop )#define BM_DECLARE_TEMP_BLOCK(x)
#define IS_FULL_BLOCK(addr)
#define IS_VALID_ADDR(addr)
Serialization for sparse_vector<>
Bitvector Bit-vector container with runtime compression of bits.
void calc_stat(struct bm::bvector< Alloc >::statistics *st) const noexcept
Calculates bitvector statistics.
size_type count() const noexcept
population count (count of ON bits)
size_t size() const noexcept
Get buffer size.
const unsigned char * buf() const noexcept
Get read access to buffer memory.
Deseriaizer for compressed collections.
int deserialize(CBC &buffer_coll, const unsigned char *buf, bm::word_t *temp_block=0)
Seriaizer for compressed collections.
void serialize(const CBC &buffer_coll, buffer_type &buf, bm::word_t *temp_block=0)
Serialize compressed collection into memory buffer.
Bit-vector serialization class.
void set_compression_level(unsigned clevel) noexcept
Set compression level.
void set_bic_dynamic_range_reduce(bool flag) noexcept
enable/disable dynamic range reduction for BIC compression where possible.
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Serialize sparse vector into a memory buffer(s) structure.
void set_xor_ref(bool is_enabled) noexcept
Turn ON and OFF XOR compression of sparse vectors Enables XOR reference compression for the sparse ve...
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
static const column_t columns[]
bm::id_t bit_block_count(const bm::word_t *block) noexcept
Bitcount for bit block.
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
@ COUNT_AND
(A & B).count()
@ COUNT_OR
(A | B).count()
unsigned gap_bit_count(const T *buf, unsigned dsize=0) noexcept
Calculates number of bits ON in GAP buffer.
unsigned gap_control_sum(const T *buf) noexcept
Calculates sum of all words in GAP block. (For debugging purposes)
T gap_level(const T *buf) noexcept
Returs GAP blocks capacity level.
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
const unsigned set_array_mask
const unsigned set_block_plane_cnt
void print_svector_stat(TOut &tout, const SV &svect, bool print_sim=false)
int svector_check(const SV &sv, const V &vect)
void PrintDGap(TOut &tout, const bm::gap_word_t *gap_buf, unsigned gap_len=0)
int file_save_compressed_collection(const CBC &cbc, const std::string &fname, size_t *blob_size=0)
unsigned BinStrLR(const char *str)
Binary code string converted to number Bits are expected left to right.
unsigned PrintGammaCode(TOut &tout, unsigned value)
void print_bc(TOut &tout, unsigned i, unsigned count)
void print_blocks_count(TOut &tout, const BV &bv)
void PrintTMatrix(TOut &tout, const TM &tmatrix, unsigned cols=0, bool binary=false)
void PrintGap(TOut &tout, const bm::gap_word_t *gap_buf)
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) noexcept
Recalc linear bvector block index into 2D matrix coordinates.
int file_load_compressed_collection(CBC &cbc, const std::string &fname)
void print_bv(TOut &tout, const BV &bv)
void print_str_svector_stat(TOut &tout, const SV &str_svect)
size_t print_bvector_stat(TOut &tout, const BV &bvect)
const unsigned set_total_blocks
void PrintDGapGamma(TOut &tout, const bm::gap_word_t *gap_buf, unsigned gap_len=0)
void PrintDistanceMatrix(TOut &tout, const unsigned distance[bm::set_block_plane_cnt][bm::set_block_plane_cnt])
int load_vector(VECT &vect, const std::string &fname)
const unsigned bie_cut_off
const unsigned gap_levels
size_t compute_serialization_size(const BV &bv)
void convert_bv2sv(SV &sv, const BV &bv)
const unsigned set_block_size
unsigned long long int id64_t
int read_dump_file(const std::string &fname, VT &data)
Read dump file into an STL container (vector of some basic type)
void PrintBits32(TOut &tout, unsigned val)
void build_jaccard_similarity_batch(SIMBATCH &sbatch, const SV &sv)
Utility function to build jaccard similarity batch for sparse_vector<>
unsigned int iLog2(unsigned int value)
const unsigned set_array_shift
void print_stat(TOut &tout, const BV &bv, typename BV::block_idx_type blocks=0)
unsigned short gap_word_t
void LoadBVector(const char *fname, TBV &bvector, unsigned *file_size=0)
const unsigned set_block_shift
int file_save_svector(const SV &sv, const std::string &fname, size_t *sv_blob_size=0, bool use_xor=true)
void PrintBinary(TOut &tout, V val)
void SaveBVector(const char *fname, const TBV &bvector, bool safe_mode=false)
int save_vector(const VECT &vect, const std::string &fname)
void SaveBlob(const char *name_prefix, unsigned num, const char *ext, const unsigned char *blob, size_t blob_size)
int file_load_svector(SV &sv, const std::string &fname)
double value_type
The numeric datatype used by the parser.
const GenericPointer< typename T::ValueType > T2 value
std::istream & in(std::istream &in_, double &x_)
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
#define row(bind, expected)
static DP_BlockInfo * blocks
Structure to compute XOR gap-count profile by sub-block waves.
Distance metric descriptor, holds metric code and result.
layout class for serialization buffer structure
const unsigned char * buf() const noexcept
Return serialization buffer pointer.
size_t size() const noexcept
return current serialized size
Mini-matrix for bit transposition purposes.
static unsigned cols() noexcept
static unsigned rows() noexcept
const T * row(unsigned row_idx) const noexcept
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4