m_ListCapacity(capacity),
59 char* max_entry_str = getenv(
"MAX_LMDB_TXN_ENTRY");
82 for(
unsigned int i=0;
i< vol_names.size();
i++) {
89rc = volinfo.
put(txn,
i, vol_num_oids[
i]);
124 if(seqid->
IsGi()) {
135 m_list.push_back(kv_pir);
139 if(seqid->
IsPdb()) {
142kv_pdb_mol.
oid= oid;
143 m_list.push_back(kv_pdb_mol);
146 stringid_upper = kv_pdb_mol.
id;
148 if(kv_pdb_mol.
id!= id_upper) {
167 if( kv.
id!= id_v) {
186 stringid_upper = kv.
id;
188 if(kv.
id!= id_upper) {
203 const size_tMIN_PAGES = 3;
204 const size_tBRANCH_PAGES = 2;
206 size_tvol_name_size = (vol_names.front().size() + 24)* vol_names.size();
207 size_tvol_info_size = 24* vol_names.size();
216 size_tpage_max_size = page_size -16;
219 size_tleaf_pages_needed = vol_name_size/page_max_size + vol_info_size/page_max_size + 2;
220 size_ttotal_pages_needed = MIN_PAGES + BRANCH_PAGES + leaf_pages_needed;
221 if( (total_pages_needed + last_page_num) > max_num_pages ) {
222 size_tnewMapSize = (total_pages_needed + last_page_num) * page_size;
224 LOG_POST(
Info<<
"Increased lmdb mapsize to "<< newMapSize);
240 size_tpage_max_size = page_size -16;
243 size_tleaf_pages_needed =
size/page_max_size + 1;
244 size_tdup_pages = (leaf_pages_needed > 200) ? 14: 7;
245 size_tbranch_pages_needed = (avg_id_length + 16)* leaf_pages_needed/page_max_size + 1;
246 size_ttotal_pages_needed = leaf_pages_needed + branch_pages_needed + dup_pages;
247 if( (total_pages_needed + last_page_num) > max_num_pages) {
248 size_tnewMapSize = (total_pages_needed + last_page_num) * page_size;
250 LOG_POST(
Info<<
"Increased lmdb mapsize to "<< newMapSize);
254 void CWriteDB_LMDB::x_Split(vector<SKeyValuePair>::iterator
b, vector<SKeyValuePair>::iterator e,
const unsigned intmin_chunk_size)
257 unsigned intchunk = (e -
b);
258 if(chunk < min_chunk_size) {
266 x_Split(
b, (
b+chunk), min_chunk_size);
268 x_Split((
b+chunk),e, min_chunk_size);
277 if(
m_list.size() == 0) {
284 char* min_split_str = getenv(
"LMDB_MIN_SPLIT_SIZE");
285 char* chunk_str = getenv(
"LMDB_SPLIT_CHUNK_SIZE");
288 _TRACE(
"DEBUG: LMDB_SPLIT_CHUNK_SIZE "<< chunk_str);
292 _TRACE(
"DEBUG: LMDB LMDB_MIN_SPLIT_SIZE "<< min_split_str);
300 if(num_chunks < num_threads) {
301num_threads = num_chunks;
304omp_set_num_threads(num_threads);
306 #pragma omp single nowait 316 while(j <
m_list.size()){
348 const unsigned charbyte_max = 0xFF;
350 sort(ids.begin(), ids.end());
351 for(
unsigned intj =0; j < ids.size(); j++) {
352 Uint4id_len = ids[j].size();
353 if(id_len >= byte_max) {
354os.write((
char*)&byte_max, 1);
355os.write((
char*)&id_len, 4);
359 char l= byte_max & id_len;
363os.write(ids[j].c_str(), id_len);
371 if(
m_list.size() == 0) {
378vector<Uint4>
offsets(total_num_oids, 0);
380os.write((
char*)&total_num_oids, 8);
382 for(
unsigned int i=0;
i< total_num_oids;
i++) {
383os.write((
char*) &
offset, 8);
388vector<string> tmp_ids;
389 for(
unsigned int i= 0;
i<
m_list.size();
i++) {
404 if(!
m_list[
i].saveToOidList) {
407tmp_ids.push_back(
m_list[
i].
id);
419 for(
unsigned int i= 0;
i< total_num_oids;
i++) {
421os.write((
char*) &
offset, 8);
441 char* max_entry_str = getenv(
"MAX_LMDB_TXN_ENTRY");
460 if(tax_ids.
size() == 0) {
479 const size_tMIN_PAGES = 4;
487 size_tpage_max_size = stat.
ms_psize- 16;
490 size_tleaf_pages_needed =
size/page_max_size + 1;
491 size_tbranch_pages_needed = 24 * leaf_pages_needed/page_max_size + 1;
492 size_ttotal_pages_needed = leaf_pages_needed + branch_pages_needed + MIN_PAGES;
493 if( (total_pages_needed + last_page_num) > max_num_pages) {
494 size_tnewMapSize = (total_pages_needed + last_page_num) * page_size;
496 LOG_POST(
Info<<
"Increased lmdb mapsize to "<< newMapSize);
535 for(
unsigned intj =0; j < tax_ids.size(); j++) {
537os.write((
char*)&tid, 4);
539 returntax_ids.size();
551vector<Uint4>
offsets(total_num_oids, 0);
553os.write((
char*)&total_num_oids, 8);
555 for(
unsigned int i=0;
i< total_num_oids;
i++) {
556os.write((
char*) &
offset, 8);
561vector<TTaxId> tmp_tax_ids;
579 for(
unsigned int i= 0;
i< total_num_oids;
i++) {
581os.write((
char*) &
offset, 8);
590blastdb::SortAndUnique <blastdb::TOid> (oids);
591 Uint4num_oids = oids.size();
592os.write((
char*)&num_oids, 4);
593 for(
unsigned intj =0; j < num_oids; j++) {
594os.write((
char*)&oids[j], 4);
596 return((num_oids +1) * 4);
606vector<blastdb::TOid> tmp_oids;
Class for manageing LMDB env, each env should only be open once.
static CBlastLMDBManager & GetInstance()
void CloseEnv(const string &fname)
void x_IncreaseEnvMapSize()
vector< SKeyValuePair > m_list
void x_InsertEntry(const CRef< CSeq_id > &seqid, const blastdb::TOid oid)
void InsertVolumesInfo(const vector< string > &vol_names, const vector< blastdb::TOid > &vol_num_oids)
Create volume table This api should only be called once to create vol info for all vols in the db.
unsigned int m_MaxEntryPerTxn
void x_CreateOidToSeqidsLookupFile()
int InsertEntries(const list< CRef< CSeq_id >> &seqids, const blastdb::TOid oid)
Add entries in bulk as fetched from CSeqDB::GetSeqIDs.
void x_Split(vector< SKeyValuePair >::iterator b, vector< SKeyValuePair >::iterator e, const unsigned int min_chunk_size)
void x_CommitTransaction()
CWriteDB_LMDB(const string &dbname, Uint8 map_size=500000, Uint8 capacity=500000)
Constructor for LMDB write access.
CWriteDB_TaxID(const string &dbname, Uint8 map_size=500000, Uint8 capacity=500000)
Constructor for LMDB write access.
void x_IncreaseEnvMapSize()
vector< SKeyValuePair< blastdb::TOid > > m_TaxId2OidList
int InsertEntries(const set< TTaxId > &tax_ids, const blastdb::TOid oid)
Add tax id entries in bulk for each oid This api needs to be called in sequential order of OIDs This ...
vector< SKeyValuePair< Uint8 > > m_TaxId2OffsetsList
void x_CreateOidToTaxIdsLookupFile()
void x_CreateTaxIdToOidsLookupFile()
void x_CommitTransaction()
unsigned int m_MaxEntryPerTxn
Resource class for `MDB_dbi` handles.
static dbi open(MDB_txn *const txn, const char *const name=nullptr, const unsigned int flags=default_flags)
Opens a database handle.
bool put(MDB_txn *const txn, const val &key, const val &data, const unsigned int flags=default_put_flags)
Stores a key/value pair into this database.
MDB_dbi handle() const noexcept
Returns the underlying `MDB_dbi` handle.
MDB_env * handle() const noexcept
Returns the underlying `MDB_env*` handle.
env & set_mapsize(const std::size_t size)
Resource class for `MDB_txn*` handles.
void commit()
Commits this transaction.
static txn begin(MDB_env *const env, MDB_txn *const parent=nullptr, const unsigned int flags=default_flags)
Creates a new LMDB transaction.
Wrapper class for `MDB_val` structures.
static const int chunk_size
Include a standard set of the NCBI C++ Toolkit most basic headers.
std::ofstream out("events_result.xml")
main entry point for tests
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define TAX_ID_TO(T, tax_id)
SStrictId_Tax::TId TTaxId
Taxon id type.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
virtual bool Remove(TRemoveFlags flags=eRecursive) const
Remove a directory entry.
const TPrim & Get(void) const
const string AsFastaString(void) const
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string & ToUpper(string &str)
Convert string to upper case â string& version.
const TPdb & GetPdb(void) const
Get the variant data.
bool IsPrf(void) const
Check if variant Prf is selected.
bool IsPdb(void) const
Check if variant Pdb is selected.
const TMol & GetMol(void) const
Get the Mol member data.
bool IsGi(void) const
Check if variant Gi is selected.
bool IsPir(void) const
Check if variant Pir is selected.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
#define MDB_INTEGERKEY
numeric keys in native byte order: either unsigned int or size_t.
#define MDB_DUPFIXED
with MDB_DUPSORT, sorted dup items have fixed size
#define MDB_DUPSORT
use sorted duplicates
#define MDB_CREATE
create DB if not already existing
#define MDB_APPENDDUP
Duplicate data is being appended, don't split full pages.
size_t me_mapsize
Size of the data memory map.
unsigned int ms_psize
Size of a database page.
size_t me_last_pgno
ID of the last used page.
constexpr auto sort(_Init &&init)
static void env_info(MDB_env *env, MDB_envinfo *stat)
static bool dbi_put(MDB_txn *txn, MDB_dbi dbi, const MDB_val *key, const MDB_val *data, unsigned int flags)
static void env_stat(MDB_env *env, MDB_stat *stat)
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
unsigned int GetCpuCount(void)
Return number of active CPUs (never less than 1).
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
static PCRE2_SIZE * offsets
Defines interface to interact with LMDB files.
const string taxid2offset_str
string GetFileNameFromExistingLMDBFile(const string &lmdb_filename, ELMDBFileType file_type)
Int4 TOid
Ordinal ID in BLAST databases.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static bool cmp_key(const SKeyValuePair &v, const SKeyValuePair &k)
The database environment.
Information about the environment.
Statistics for a database in the environment.
Uint4 s_WirteTaxIds(CNcbiOfstream &os, vector< TTaxId > &tax_ids)
#define DEFAULT_MIN_SPLIT_CHUNK_SIZE
#define DEFAULT_MAX_ENTRY_PER_TXN
Uint4 s_WirteOids(CNcbiOfstream &os, vector< blastdb::TOid > &oids)
#define DEFAULT_MIN_SPLIT_SORT_SIZE
Uint4 s_WirteIds(CNcbiOfstream &os, vector< string > &ids)
Defines lmdb implementation of string-key database.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4