oss << std::fixed <<
value;
53 #define SPEED(time, nentries) s_FormatNum((size_t)((nentries)/(time))) 79m_Filename(fname), m_FileType(
file_type),m_Env(
lmdb::
env::create()), m_Count(1), m_ReadOnly(read_only)
88 NCBI_THROW(
CSeqDBException, eFileErr,
"File "+ fname +
" not found. If you renamed any BLAST database files, please use original file names, and makeblastdb to rename the database. If you deleted any BLAST database files, you need to recreate the database.");
116 for(
unsigned int i=0;
i< m_dbis.size();
i++){
117 if(m_dbis[
i] != UINT_MAX) {
126 if(m_dbis[dbi_type] == UINT_MAX) {
127 stringerr =
"DB contains no ";
134err +=
"accession info.";
136 caseeDbiTaxid2offset:
137err +=
"tax id info";
145 returnm_dbis[dbi_type];
151m_Env.set_mapsize(map_size);
157 returnlmdb_manager.
Get();
187 if((*itr)->GetFilename() == fname) {
188(*itr)->AddReference();
189 if( opened && !*opened ) {
190(*itr)->AddReference();
198 if( opened && !*opened ) {
209 if((*itr)->GetFilename() == fname) {
210(*itr)->AddReference();
211 return(*itr)->GetEnv();
224 if((*itr)->GetFilename() == fname) {
225 if((*itr)->RemoveReference() == 0) {
248m_LMDBFileOpened(
false),
262 CSeqDBLMDB::GetOid(
const string& accession, vector<blastdb::TOid> & oids,
const boolallow_dup)
const 273 stringacc = accession;
276 if(cursor.get(data2find,
MDB_SET)) {
279 const char* d =
val.data();
280oids.push_back(((d[3] << 24)&0xFF000000) | ((d[2] << 16) & 0xFF0000) | ((d[1] << 8) & 0xFF00) | (d[0]&0xFF));
285oids.push_back(((d[3] << 24)&0xFF000000) | ((d[2] << 16) & 0xFF0000) | ((d[1] << 8) & 0xFF00) | (d[0]&0xFF));
312vol_num_oids.clear();
317 MDB_statvolinfo_stat, volname_stat;
325vol_num_oids.resize(volinfo_stat.
ms_entries);
329 for(
unsigned int i=0;
i< volinfo_stat.
ms_entries;
i++) {
331 if(cursor_volname.get(data2find,
MDB_SET)) {
335vol_names[
i].assign(
val.data(),
val.size());
337 if(cursor_volinfo.get(data2find,
MDB_SET)) {
340 const char* d =
val.data();
341vol_num_oids[
i] = (((d[3] << 24)&0xFF000000) | ((d[2] << 16) & 0xFF0000) | ((d[1] << 8) & 0xFF00) | (d[0]&0xFF));
348cursor_volname.close();
349cursor_volinfo.close();
354 for(
unsigned intj=0; j < vol_num_oids.size(); j++){
375 for(
i=0;
i< accessions.size();
i++) {
376 stringacc = accessions[
i];
378 if(cursor.get(data2find,
MDB_SET)) {
381 const char* d =
val.data();
382oids[
i] = (((d[3] << 24)&0xFF000000) | ((d[2] << 16) & 0xFF0000) | ((d[1] << 8) & 0xFF00) | (d[0]&0xFF));
409 return(v.
id< k.
id);
411 return(v.
oid< k.
oid);
441 while(begin < end) {
442 unsigned charid_len = *begin;
444 if(id_len == 0xFF) {
448 id.assign(begin, long_id_len);
449begin += long_id_len;
450idlist.push_back(
id);
454 id.assign(begin, id_len);
456idlist.push_back(
id);
464vector<string>::iterator f_itr = file_idlist.begin();
465vector<string>::iterator i_itr = input_idlist.begin();
466 while(f_itr != file_idlist.end() && i_itr != input_idlist.end()) {
467 if(*i_itr == *f_itr) {
479 stringtmp_pdb = *i_itr;
480 while((f_itr != file_idlist.end()) && ((*f_itr).find_first_of(tmp_pdb) == 0)){
484 while((i_itr != input_idlist.end()) && ((*i_itr).find_first_of(tmp_pdb) == 0)){
495 if((i_itr != input_idlist.end()) && (file_seq_id.
GetSeqIdString(
true) == *i_itr)){
505 if(f_itr == file_idlist.end()){
510input_idlist.clear();
518vector<blastdb::TOid> oids;
520vector<SOidSeqIdPair> pairs;
521 for(
unsigned int i=0;
i< ids.size();
i++) {
531 if(pairs.size() == 0) {
541 while(
i< pairs.size()) {
542vector<string> file_idlist;
543vector<string> input_idlist;
544current_oid = pairs[
i].oid;
545 lookup.GetSeqIdListForOid(current_oid, file_idlist);
546 while((
i< pairs.size()) && (current_oid == pairs[
i].oid)) {
547input_idlist.push_back(pairs[
i].
id);
551rv.push_back(current_oid);
567 autodbi(dbi_handle);
572tax_ids.push_back(taxid);
595tax_ids_found.clear();
607 if(cursor.get(data2find,
MDB_SET)) {
610 const char* d =
val.data();
611 offsets.push_back((((
Uint8) d[7] << 56) &0xFF00000000000000) | (((
Uint8) d[6] << 48) & 0xFF000000000000) |
612(((
Uint8) d[5] << 40) &0xFF0000000000) | (((
Uint8) d[4] << 32) & 0xFF00000000) |
613(((
Uint8) d[3] << 24) &0xFF000000) | (((
Uint8) d[2] << 16) & 0xFF0000) |
614(((
Uint8) d[1] << 8) &0xFF00) | ((
Uint8) d[0]&0xFF));
617 offsets.push_back((((
Uint8) d[7] << 56) &0xFF00000000000000) | (((
Uint8) d[6] << 48) & 0xFF000000000000) |
618(((
Uint8) d[5] << 40) &0xFF0000000000) | (((
Uint8) d[4] << 32) & 0xFF00000000) |
619(((
Uint8) d[3] << 24) &0xFF000000) | (((
Uint8) d[2] << 16) & 0xFF0000) |
620(((
Uint8) d[1] << 8) &0xFF00) | ((
Uint8) d[0]&0xFF));
622tax_ids_found.push_back(*itr);
629vector<bool> oids_set(
m_NumOids,
false);
632 const char* start_ptr = (
char*) oid_file.
GetPtr();
633 for(
unsigned int i=0;
i<
offsets.size();
i++) {
635 Uint4num_of_oids = *list_ptr;
638 while(
count< num_of_oids) {
639 if(!oids_set[*list_ptr]) {
640oids.push_back(*list_ptr);
641oids_set[*list_ptr] =
true;
648 intoids_sz = oids.size();
650 sort(oids.begin(), oids.end());
654oids.reserve(oids_sz);
655 for(
int i=0;
i< oids_set.size();
i++) {
702 while(begin < end) {
712vector<blastdb::TOid> oids;
715 set<TTaxId>tax_id_list(tax_ids_found.begin(), tax_ids_found.end());
717 for(
unsigned int i=0;
i< oids.size();
i++) {
718vector<TTaxId> file_list;
719 lookup.GetTaxIdListForOid(oids[
i], file_list);
720 if(file_list.size() > tax_ids.
size()) {
725 for(; j < file_list.size(); j++) {
726 if(tax_id_list.
find(file_list[j]) == tax_id_list.
end()) {
730 if(j == file_list.size()) {
731rv.push_back(oids[
i]);
741 for(
unsigned int i=0;
i< oids.size();
i++) {
742vector<TTaxId> taxid_list;
743 lookup.GetTaxIdListForOid(oids[
i], taxid_list);
744tax_ids.
insert(taxid_list.begin(), taxid_list.end());
752 throwinvalid_argument(
"Basename is empty");
757vol_str = (index > 9) ?
".":
".0";
760 return basename+ vol_str + (is_protein ?
".pdb":
".ndb");
766 stringfilename (lmdb_filename, 0, lmdb_filename.size() - 2);
794 ITERATE(vector<string>, itr, extn) {
795 CFile f(filename +
"."+ (*itr));
unsigned int AddReference()
MDB_dbi GetDbi(EDbiType dbi_type)
void SetMapSize(Uint8 map_size)
void InitDbi(lmdb::env &env, ELMDBFileType file_type)
CBlastEnv(const string &fname, ELMDBFileType file_type, bool read_only=true, Uint8 map_size=0)
Class for manageing LMDB env, each env should only be open once.
static CBlastLMDBManager & GetInstance()
lmdb::env & GetWriteEnv(const string &fname, Uint8 map_size)
lmdb::env & GetReadEnvAcc(const string &fname, MDB_dbi &db_acc, bool *opened=0)
list< CBlastEnv * > m_EnvList
lmdb::env & GetReadEnvTax(const string &fname, MDB_dbi &db_tax, bool *opened=0)
void CloseEnv(const string &fname)
CBlastEnv * GetBlastEnv(const string &fname, ELMDBFileType file_type, bool *opened=0)
lmdb::env & GetReadEnvVol(const string &fname, MDB_dbi &db_volname, MDB_dbi &db_volinfo)
void GetSeqIdListForOid(blastdb::TOid oid, vector< string > &idlist)
CLookupSeqIds(CMemoryFile &file)
void GetTaxIdListForOid(blastdb::TOid oid, vector< TTaxId > &taxid_list)
CLookupTaxIds(CMemoryFile &file)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
T & Get(void)
Create the variable if not created yet, return the reference.
void GetOids(const vector< string > &accessions, vector< blastdb::TOid > &oids) const
Get OIDs for a vector of string accessions.
void GetOid(const string &accession, vector< blastdb::TOid > &oids, const bool allow_dup=false) const
Get OIDs for single string accession.
string m_TaxId2OffsetsFile
void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids) const
Get Tax Ids for oid list.
CSeqDBLMDB(const string &fname)
void GetOidsForTaxIds(const set< TTaxId > &tax_ids, vector< blastdb::TOid > &oids, vector< TTaxId > &tax_ids_found) const
Get Oids for Tax Ids list, idenitcal Oids are merged.
void NegativeSeqIdsToOids(const vector< string > &ids, vector< blastdb::TOid > &rv) const
Get Oids excluded from a vector of input accessions An oid only get exlcuded if all its seqids are fo...
void NegativeTaxIdsToOids(const set< TTaxId > &ids, vector< blastdb::TOid > &rv, vector< TTaxId > &tax_ids_found) const
Get Oids to exclude for Tax ids @parm ids Input tax ids to exclude /Output tax ids found.
void GetVolumesInfo(vector< string > &vol_names, vector< blastdb::TOid > &vol_num_oids)
Return info for all volumes.
void GetDBTaxIds(vector< TTaxId > &tax_ids) const
Get All Unique Tax Ids for db @parma tax_ids Return all unique tax ids found in db.
CSeqDB_Substring FindBaseName() const
Returns the portion of this path containing the base name.
void GetString(string &s) const
Return the data by assigning it to a string.
static cursor open(MDB_txn *const txn, const MDB_dbi dbi)
Creates an LMDB cursor.
Resource class for `MDB_dbi` handles.
static dbi open(MDB_txn *const txn, const char *const name=nullptr, const unsigned int flags=default_flags)
Opens a database handle.
MDB_dbi handle() const noexcept
Returns the underlying `MDB_dbi` handle.
Resource class for `MDB_env*` handles.
env & open(const char *const path, const unsigned int flags=default_flags, const mode mode=default_mode)
Opens this environment.
env & set_max_dbs(const MDB_dbi count)
env & set_mapsize(const std::size_t size)
Base class for LMDB exception conditions.
virtual const char * what() const noexcept
Returns the underlying LMDB error code.
int code() const noexcept
Returns the underlying LMDB error code.
static txn begin(MDB_env *const env, MDB_txn *const parent=nullptr, const unsigned int flags=default_flags)
Creates a new LMDB transaction.
Wrapper class for `MDB_val` structures.
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
static int lookup(const char *name, const struct lookup_int *table)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define TAX_ID_TO(T, tax_id)
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define TAX_ID_FROM(T, value)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
Int8 GetLength(void) const
Get size of file.
void * GetPtr(void) const
Get pointer to beginning of data.
virtual bool Exists(void) const
Check existence of file.
const TPrim & Get(void) const
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
@ fParse_RawText
Try to ID raw non-numeric accessions.
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
@ fParse_AnyLocal
Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...
int32_t Int4
4-byte (32-bit) signed integer
char Char
Alias for char.
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
const TPdb & GetPdb(void) const
Get the variant data.
bool IsSetChain_id(void) const
chain identifier; length-independent generalization of 'chain' Check if a value has been assigned to ...
bool IsPdb(void) const
Check if variant Pdb is selected.
const TMol & GetMol(void) const
Get the Mol member data.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
#define MDB_NOTFOUND
key/data pair not found (EOF)
void mdb_dbi_close(MDB_env *env, MDB_dbi dbi)
Close a database handle.
@ MDB_SET
Position at specified key.
@ MDB_NEXT_DUP
Position at next data item of current key.
@ MDB_NEXT
Position at next data item.
@ MDB_GET_CURRENT
Return key/data at current cursor position.
#define MDB_INTEGERKEY
numeric keys in native byte order: either unsigned int or size_t.
#define MDB_DUPFIXED
with MDB_DUPSORT, sorted dup items have fixed size
#define MDB_DUPSORT
use sorted duplicates
#define MDB_NOLOCK
don't do any locking, caller must manage their own locks
#define MDB_NOSUBDIR
no environment directory
#define MDB_RDONLY
read only
size_t ms_entries
Number of data items.
unsigned int MDB_dbi
A handle for an individual database in the DB environment.
constexpr auto sort(_Init &&init)
static void dbi_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat)
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
static const char * locale
static PCRE2_SIZE * offsets
void DeleteLMDBFiles(bool db_is_protein, const string &filename)
string BuildLMDBFileName(const string &basename, bool is_protein, bool use_index, unsigned int index)
Build the canonical LMDB file name for BLAST databases.
bool s_CompareIdList(vector< string > &file_idlist, vector< string > &input_idlist)
static string s_FormatNum(T value)
string GetFileNameFromExistingLMDBFile(const string &lmdb_filename, ELMDBFileType file_type)
Defines interface to interact with LMDB files.
const string taxid2offset_str
void SeqDB_GetLMDBFileExtensions(bool db_is_protein, vector< string > &extn)
Retrieves file extensions for BLAST LMDB files.
const blastdb::TOid kSeqDBEntryNotFound
Int4 TOid
Ordinal ID in BLAST databases.
This file defines several SeqDB utility functions related to byte order and file system portability.
Statistics for a database in the environment.
SOidSeqIdPair(blastdb::TOid o, const string &i)
static bool cmp_oid(const SOidSeqIdPair &v, const SOidSeqIdPair &k)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4