= 1024L*1024L;
54: out_stream( os ), alloc( alloc ), metadata( metadata ),
56fmt_gen_algo_ver( StatAlgoVersion ),
97 units.push_back( unit );
105string::size_type pos = name.find_first_of(
' ');
106 stringreal_name = name.substr( 0, pos );
108 for(
unsignedind = 0;
109ind <
sizeof(
PARAMS)/
sizeof(
const char* ); ++ind )
110 if( real_name ==
PARAMS[ind] )
116 LOG_POST(
Error<<
"Unknown parameter name "<< real_name );
123 typedefvector< Uint4 >::size_type size_type;
127 _TRACE(
"divisor: "<< divisor <<
" size: "<<
128(total/(2048*divisor)) <<
" KB");
129size_type
size= (size_type)( total/divisor );
135 catch( std::exception & e )
144fill( *cba, *cba +
size, 0 );
146 for( size_type
i= 0;
i<
units.size(); ++
i)
153 Uint4& word = (*cba)[unit/divisor];
154word |= (1ULL<<(unit%divisor));
155 Uint4& word1 = (*cba)[rcunit/divisor];
156word1 |= (1ULL<<(rcunit%divisor));
167 Uint4sz = (k < 32 ? (1ULL<<k) : 0x80000000ULL);
172 for(
Uint1 i= 0;
i<= u - k; ++
i)
174fill( ht, ht + sz, 0 );
177j !=
units.end(); ++j )
180mxcoll[
i] = *max_element( ht, ht + sz );
190 if( tc > 0 ) avcoll[
i] = (double)
t/tc;
191 elseavcoll[
i] = 0;
204 const double* minav = min_element( avcoll, avcoll + u - k + 1 );
205 Uint1roff = (minav - avcoll);
206max_coll = mxcoll[roff];
214 LOG_POST(
"optimizing the data structure");
218 Uint1roff( 0 ), max_coll( 0 );
223 for(
Uint1 i= 0;
i< k + 2; ++
i) emem *= 2;
235 if(
sizeof(
char* ) <=
sizeof(
Uint4) ) {
236 if( k > 8*
sizeof(
char* ) - 4 )
237k = 8*
sizeof(
char* ) - 4;
242 "Can not find parameters to satisfy memory requirements");
244 Uint1b0( 0 ), bc( 0 );
245 Uint4sz = (k < 32 ? (1ULL<<k) : 0x80000000ULL);
255 while( (1ULL<<bc) <= max_coll )
262 while( (1ULL<<b0) <=
M)
275 "Can not find parameters to satisfy memory requirements");
277 _TRACE(
"Using the following hash parameters: \n" 278<<
"hash key length = "<< (
int)k <<
" bits\n" 279<<
"right offset = "<< (
int)roff <<
" bits\n" 280<<
"estimated size = "<< 2*
M+ (1ULL<<(k+2)) <<
" bytes\n");
291fill( htp, htp + sz, 0 );
294j !=
units.end(); ++j )
300 Uint4coll_mask = ((1ULL<<bc) - 1);
304pair< Uint4, Uint1 >
hash 306 Uint1ccount = (htp[
hash.first]&coll_mask);
313htp[
hash.first] += hsb;
317 if( (htp[
hash.first]&~coll_mask) == 0 )
320htp[
hash.first] += ((vend - 1)<<bc);
322 elsehtp[
hash.first] -= (1ULL<<bc);
324vtp[htp[
hash.first]>>bc]
330 paramsp = {
M, k, roff, bc, htp, vtp, cba };
339 case eMemory:
return "insufficient memory";
Exceptions that CSeqMaskerOstatOpt might throw.
@ eMemory
Memory allocation problem.
virtual const char * GetErrCodeString() const override
Get a description string for this exception.
virtual void doSetUnitCount(Uint4 unit, Uint4 count)
Set count information for the given unit.
Uint1 UnitSize() const
Get the unit size value in bases.
virtual void doFinalize()
Generate a hash function and dump the optimized unit counts data to the output stream.
CSeqMaskerOstatOpt(CNcbiOstream &os, Uint2 sz, bool alloc, string const &metadata)
Object constructor.
virtual void write_out(const params &p) const =0
Dump the unit counts data to the output stream according to the requested format.
virtual void doSetUnitSize(Uint4 us)
Set the unit size value.
const vector< Uint4 > & GetParams() const
Get the values of masking parameters.
Uint1 findBestRoff(Uint1 k, Uint1 &max_coll, Uint4 &M, Uint4 *ht)
void createCacheBitArray(Uint4 **cba)
Base class for computing and saving unit counts data.
static const char * PARAMS[]
Algorithm parameter names.
virtual void doSetUnitSize(Uint4 us)
virtual void doSetParam(const string &, Uint4)
CSeqMaskerOstat(CNcbiOstream &os, bool alloc, string const &metadata)
Object constructor.
static pair< Uint4, Uint1 > hash_code(Uint4 unit, Uint1 k, Uint1 roff)
Compute a hash code of a unit.
static Uint4 reverse_complement(Uint4 seq, Uint1 size)
Reverse complement of a unit.
The NCBI C++ standard methods for dealing with std::string.
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
element_type * get(void) const
Get pointer.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Miscellaneous common-use basic types and functionality.
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
Defines Limits for the types used in NCBI C/C++ toolkit.
static const unsigned long MB
static const unsigned long GROW_CHUNK
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Parameters of the optimized data structure.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4