: m_AtlasHolder (
NULL, use_atlas_lock),
54m_Atlas (m_AtlasHolder.
Get()),
55m_DBNames (db_name_list),
56m_Aliases (m_Atlas, db_name_list, prot_nucl),
58m_Aliases.GetVolumeNames(),
63m_RestrictBegin (oid_begin),
64m_RestrictEnd (oid_end),
70m_ExactTotalLength(0),
71m_TotalLengthStats(0),
75m_SeqType (prot_nucl),
76m_OidListSetup (
false),
77m_UserGiList (gi_list),
78m_NegativeList (neg_list),
80m_NeedTotalsScan (
false),
81m_UseGiMask (m_Aliases.HasGiMask()),
82m_MaskDataColumn (kUnknownTitle),
88vector <string> mask_list;
159: m_AtlasHolder (
NULL, use_atlas_lock),
160m_Atlas (m_AtlasHolder.
Get()),
161m_Aliases (m_Atlas,
"",
'-'),
168m_ExactTotalLength(0),
171m_OidListSetup (
true),
172m_NeedTotalsScan (
false),
173m_UseGiMask (
false),
174m_MaskDataColumn (kUnknownTitle),
192 if((oid_begin == 0) && (oid_end == 0)) {
250 constvector< CRef<CSeqDB_FilterTree> >& nodes = ft->
GetNodes();
251 if(nodes.size() == 1) {
275 boolsuccess =
true;
304vector<int> & oid_list,
336begin_chunk = * state_obj;
342end_chunk = begin_chunk +
static_cast<int>(
buffer->results.size());
344end_chunk = begin_chunk + oid_size;
350*state_obj = end_chunk;
361 intnext_oid = begin_chunk;
364 while(next_oid < end_chunk) {
367next_oid < end_chunk) {
368oid_list.push_back(next_oid++);
370next_oid = end_chunk;
376oid_list.resize(oid_size);
377 while(iter < oid_size) {
382oid_list[iter++] = next_oid++;
388 if(iter < oid_size) {
389oid_list.resize(iter);
391*state_obj = next_oid;
417 returnvol->GetSeqLengthProt(vol_oid);
421 returnvol->GetSeqLengthExact(vol_oid);
436 returnvol->GetSeqLengthProt(vol_oid);
440 returnvol->GetSeqLengthApprox(vol_oid);
456gi_to_taxid.
clear();
462 if((! defline_set.
Empty()) && defline_set->
CanGet()) {
464 if(! (*defline)->CanGetSeqid()) {
468 if(! (*defline)->IsSetTaxid()) {
473 if(! (**seqid).IsGi()) {
477gi_to_taxid[(**seqid).GetGi()] = (*defline)->GetTaxid();
484vector<TTaxId> & taxids,
497 if((! defline_set.
Empty()) && defline_set->
CanGet()) {
499 if((*defline)->IsSetTaxid()) {
500taxids.push_back((*defline)->GetTaxid());
516 if((! defline_set.
Empty()) && defline_set->
CanGet()) {
535gi_to_taxid_set.clear();
541 if((! defline_set.
Empty()) && defline_set->
CanGet()) {
543 if(! (*defline)->CanGetSeqid()) {
548 if(! (**seqid).IsGi()) {
553gi_to_taxid_set[(**seqid).GetGi()].
insert(
563vector<TTaxId>& taxids,
577 if((! defline_set.
Empty()) && defline_set->
CanGet()) {
583 if((*defline)->CanGetSeqid()) {
587(*defline)->GetSeqid()
589 if((**seqid).IsGi()) {
591(*defline)->GetLeafTaxIds();
594leafTaxids.
begin(),
621 returnvol->GetBioseq(vol_oid,
659 if(
buffer->checked_out > 0) {
668 const char** seq)
const 672 if(index < buffer->
results.size()) {
673(
buffer->checked_out)++;
674*seq =
buffer->results[index].address;
675 return buffer->results[index].length;
679(
buffer->checked_out)++;
680*seq =
buffer->results[0].address;
681 return buffer->results[0].length;
699res.
length= vol->GetSequence(vol_oid++, &seq);
700 if(res.
length< 0)
return;
703tot_length -= res.
length;
705 buffer->results.push_back(res);
706res.
length= vol->GetSequence(vol_oid++, &seq);
726 returnvol->GetSequence(vol_oid,
buffer);
744 returnvol->GetSeqData(vol_oid, begin, end, locked);
761 returnvol->GetAmbigSeq(vol_oid,
782 returnvol->GetAmbigPartialSeq(vol_oid,
885 _ASSERT((rv & 0x7FFFFFFF) == rv);
904 TGigi = vol->GetSeqGI(vol_oid, locked);
907list< CRef<CSeq_id> > ids =
908vol->GetSeqIDs(vol_oid);
911 return(**id).GetGi();
928 _ASSERT((rv & 0x7FFFFFFF) == rv);
949 _ASSERT((num_oids & 0x7FFFFFFF) == num_oids);
951 return(
int) num_oids;
994 returnvol->GetSeqType();
1017}
else if(d != date) {
1061 returnvol->GetFilteredHeader(vol_oid, locked);
1096 for(
int i= 0;
i< (
int) s.size();
i++) {
1097 if(s[
i] ==
char(0)) {
1132 returnvol->GetPig(vol_oid, pig, locked);
1192 for(
unsigned i=0;
i< list.size();
i++) {
1262 returnvol->GetGi(vol_oid, gi, locked);
1282 for(
unsigned int i=0;
i<
tmp.size();
i++) {
1283 intoid2 =
tmp[
i];
1285oids.push_back(
tmp[
i]);
1290vector<int> vol_oids;
1298 if(vol_oids.empty()) {
1304 ITERATE(vector<int>, iter, vol_oids) {
1305 intoid1 = ((*iter) + vol_start);
1310 if(find(oids.begin(), oids.end(), oid1) != oids.end()) {
1317oids.push_back(oid1);
1331vector<blastdb::TOid> oids;
1335 for(
unsigned int i=0;
i< oids.size();
i++) {
1338rv.push_back(oids[
i]);
1344 "Taxonomy list is not supported in v4 BLAST db");
1360vector<blastdb::TOid> oids;
1362oids.push_back(oid);
1372 "Taxonomy list is not supported in v4 BLAST db");
1384 "Taxonomy list is not supported in v4 BLAST db");
1393oids.resize(accs.size());
1397 for(
unsigned int i=0;
i< oids.size();
i++) {
1408 for(
unsigned int i=0;
i< accs.size();
i++) {
1409vector<blastdb::TOid>
tmp;
1411 if(
tmp.empty()) {
1415oids[
i] =
tmp[0];
1436 boolis_BL_ORD_ID =
false;
1441 if(dbt.
GetDb() ==
"BL_ORD_ID") {
1442is_BL_ORD_ID =
true;
1450 if(seqid_in.
IsPir() || seqid_in.
IsPrf()) {
1456 for(
unsigned int i=0;
i<
tmp.size();
i++) {
1457 intoid2 =
tmp[
i];
1459oids.push_back(
tmp[
i]);
1466vector<int> vol_oids;
1474seqid.
Assign(seqid_in);
1480 if(vol_oids.empty()) {
1486 ITERATE(vector<int>, iter, vol_oids) {
1487 intoid1 = ((*iter) + vol_start);
1493oids.push_back(oid1);
1514 "OID not in valid range.");
1520 "Residue offset not in valid range.");
1533 if((first_seq < vol_cnt) && (residue < vol_len)) {
1534 returnvol_start + volp->
GetOidAtOffset(first_seq, residue, locked);
1539vol_start += vol_cnt;
1541 if(first_seq > vol_cnt) {
1542first_seq -= vol_cnt;
1547 if(residue > vol_len) {
1556 "Could not find valid split point oid.");
1562vector<string> & paths,
1563vector<string> * alias_paths,
1567 booluse_atlas_lock =
true;
1607 Uint8base_count(0);
1622 if(totlen || maxlen || minlen) {
1633max_count =
max(
len, max_count);
1634min_count =
min(
len, min_count);
1640*numseq = oid_count;
1644*totlen = base_count;
1648*maxlen = max_count;
1652*minlen = min_count;
1660oss <<
"Taxid "<< taxid <<
" not found";
1668 Uint8* total_length,
1709 int* ambig_length)
const 1716vol->GetRawSeqAndAmbig(vol_oid,
1746 template<
classTId>
1760*high_out = high_in;
1763*count_out = count_in;
1765 if(low_out && (*low_out > low_in)) {
1768 if(high_out && (*high_out < high_in)) {
1769*high_out = high_in;
1772*count_out += count_in;
1783 boolfound =
false;
1815 boolfound =
false;
1818 intvlow(0), vhigh(0), vcount(0);
1844 boolfound =
false;
1879vol->SetOffsetRanges(vol_oid,
1910 RetAmbigSeq(
const_cast<const char**
>(& datap));
1924vector<int> vol_oids;
1930 if(vol_oids.empty()) {
1936 ITERATE(vector<int>, iter, vol_oids) {
1937 intoid1 = (*iter) + vol_start;
1943oids.push_back(oid1);
1976 if(! ngis.empty()) {
1979}
else if(! ntis.empty()) {
1982}
else if(!stis.empty()) {
1995 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 1996 (!defined(NCBI_COMPILER_MIPSPRO)) ) 2009titles.assign(
all.begin(),
all.end());
2028vector<int> vol_ids;
2030 boolfound =
false;
2036vol_ids.push_back(
id);
2121 "This column ID was not found.");
2147 intvol_idx = -1, vol_oid = -1;
2152 if(vol_col_id >= 0) {
2174 template<
classK,
classC>
2177 returnc.find(k) != c.end();
2192 stringv = iter->second;
2193vector<string> items;
2196 if(items.size() == 4) {
2201 return& iter->second;
2211: m_NextId(100), m_Empty(
true), m_CacheRealAlgo(-1)
2220algorithms.push_back(iter->first);
2226 stringreal_desc = desc;
2227vector<string> items;
2229 if(items.size() == 4) {
2230real_desc = items[2];
2238 if((! found_id) || (
m_DescToId[real_desc] !=
id)) {
2290 "Cannot find volume in algorithm map.");
2297 "Cannot find volume algorithm in algorithm map.");
2300 returntrans[algo_id];
2307 "Cannot find string algorithm id in algorithm map.");
2313 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 2314 (!defined(NCBI_COMPILER_MIPSPRO)) ) 2330algorithms.resize(0);
2353vector<int> algorithms;
2355 if(algorithms.empty()) {
2361<<
"Available filtering algorithms applied to database sequences:" 2364retval << setw(13) << left <<
"Algorithm ID" 2365<< setw(40) << left <<
"Algorithm name" 2366<< setw(40) << left <<
"Algorithm options"<< endl;
2367 ITERATE(vector<int>, algo_id, algorithms) {
2368 string algo, algo_opts, algo_name;
2370 if(algo_opts.empty()) {
2371algo_opts.assign(
"default options used");
2374retval << setw(13) << left << (*algo_id)
2375<< setw(40) << left << algo_name
2376<< setw(40) << left << algo_opts << endl;
2378retval << setw(13) << left << (*algo_id)
2379<< setw(40) << left <<
algo 2380<< setw(40) << left << algo_opts << endl;
2394 string& program_name,
2398 if(enum_type_vals ==
NULL) {
2399enum_type_vals = GetTypeInfo_enum_EBlast_filter_program();
2403vector<string> items;
2406 if(items.size() == 2) {
2409program.assign(items[0]);
2410program_name.assign(enum_type_vals->
FindName(pid,
false));
2412}
else if(items.size() == 4) {
2418 "Error in stored mask algorithm description data.");
2424 string& program_name,
2445 if(found ==
false) {
2447oss <<
"Filtering algorithm ID "<< algorithm_id
2448<<
" is not supported."<< endl;
2484 if(vol_col_id < 0) {
2498oss <<
"Error: volume ("<< volp->
GetVolName()
2499<<
") mask data has duplicates value ("<< *dup <<
")";
2506 const string& desc1 = iter->second;
2526 const void* src = (
const void*) blob.
ReadRaw(
n*8);
2531 template<
classTRead>
2538 for(
intrng = 0; rng < num_ranges; rng++) {
2541 if(
algo== vol_algo) {
2545 intskip_amt = num_pairs * 2 * TRead::numeric_size;
2571 intvol_oid = 0, vol_idx = -1;
2585 if(blob.
Size() != 0) {
2589 intvol_algo_id = -1;
2597s_ReadRanges<SReadInt4>(vol_algo_id, ranges, blob);
2609 if(num_threads < 1) {
2611}
else if(num_threads == 1) {
2612num_threads = force_mt ? 1 : 0;
2617 for(
intthread =
m_NumThreads; thread < num_threads; ++thread) {
2623 for(
intthread = num_threads; thread <
m_NumThreads; ++thread) {
2669 for(
intvol = 0; vol < nvols; ++vol) {
2677 for(
intvol = 0; vol < nvols; ++vol) {
2734 if((! defline_set.
Empty()) && defline_set->
CanGet()) {
2736 if(! (*defline)->CanGetSeqid()) {
2741 if((*df_seqid)->Match(seq_id)) {
2743 if(!df_taxids.
empty()) {
2759 for(
unsigned int i=0;
i< oids.size();
i++) {
2763 if(!taxid_set.
empty()) {
2764taxids.insert(taxids.begin(), taxid_set.
begin(), taxid_set.
end());
Declaration of ADT to retrieve sequences for the BLAST engine.
#define BLAST_SEQSRC_MINLENGTH
Default minimal sequence length.
`Blob' Class for SeqDB (and WriteDB).
int GetReadOffset() const
Get the current read pointer offset.
Int4 ReadInt4()
Read a 4 byte integer at the pointer (and move the pointer).
int Size() const
Get size of blob contents.
void Clear()
Clear all owned data and reference an empty string.
void SeekRead(int offset)
Move the read pointer to a specific location.
const char * ReadRaw(int size)
Read raw data (moving the read pointer).
void SetFrame(const string &frame)
void Log(const string &name, const char *value, CDebugDumpFormatter::EValueType type=CDebugDumpFormatter::eValue, const string &comment=kEmptyStr)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CObjectIStreamAsnBinary â.
void GetAliasFileValues(TAliasFileValues &afv, const CSeqDBVolSet &volset)
Get Name/Value Data From Alias Files.
Int8 GetNumSeqsStats(const CSeqDBVolSet &volset) const
Get the number of sequences available.
Uint8 GetTotalLength(const CSeqDBVolSet &volset) const
Get the total length of the set of databases.
Uint8 GetTotalLengthStats(const CSeqDBVolSet &volset) const
Get the total length of the set of databases.
bool NeedTotalsScan(const CSeqDBVolSet &volset) const
Check whether a db scan is need to compute correct totals.
Int4 GetMinLength(const CSeqDBVolSet &volset) const
Get the number of sequences available.
Int8 GetNumOIDs(const CSeqDBVolSet &volset) const
Get the size of the OID range.
void GetMaskList(vector< string > &mask_list)
Get Gi-based Mask Names From Alias Files.
void FindVolumePaths(vector< string > &vols, vector< string > *alias, bool recursive) const
Find the base names of volumes.
bool HasFilters()
Check if any volume filtering exists.
string GetTitle(const CSeqDBVolSet &volset) const
Get the title.
Int8 GetNumSeqs(const CSeqDBVolSet &volset) const
Get the number of sequences available.
CRef< CSeqDB_FilterTree > GetFilterTree()
Get filtering tree for all volumes.
Guard object for the SeqDBAtlas singleton.
CSeqDBAtlas & Get()
Get the CSeqDBAtlas object.
static void RetRegion(const char *datap)
Free allocated memory.
Uint8 GetSliceSize()
Get the current slice size.
void Lock(CSeqDBLockHold &locked)
Lock the atlas.
void Unlock(CSeqDBLockHold &locked)
Unlock the atlas.
int GetNumGis() const
Get the number of GIs in the array.
void GetPigList(vector< TPig > &pigs) const
void GetGiList(vector< TGi > &gis) const
Get the gi list.
void GetTiList(vector< TTi > &tis) const
Get the ti list.
int GetNumTis() const
Get the number of TIs in the array.
void GetMaskData(int algo_id, TGi gi, CSeqDB::TSequenceRanges &ranges, CSeqDBLockHold &locked)
Get the mask data for GI.
int GetAlgorithmId(const string &algo_name) const
Get the mask algorithsm id for a string id.
const string & GetDesc(int algo_id, CSeqDBLockHold &locked)
Get the mask description for algo id.
void GetAvailableMaskAlgorithms(vector< int > &algo) const
Get the available mask algorithsm ids.
SeqDB ID list for performing boolean set operations.
bool Blank() const
Check if an ID list is blank.
void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist)
Get gi to taxid map for an OID.
int m_NumSeqsStats
Number of sequences in the overall database.
int GetSequence(int oid, const char **buffer)
Get the sequence data for a sequence.
void GetDBTaxIds(set< TTaxId > &tax_ids)
Get all unique tax ids from db.
CSeqDBAliasFile m_Aliases
Alias node hierarchy management object.
char GetSeqType() const
Get the sequence type.
int x_GetSeqBuffer(SSeqResBuffer *buffer, int oid, const char **seq) const
Get sequence from buffer.
CRef< CSeqDBGiList > m_UserGiList
The User GI list for the entire CSeqDB object.
void x_InitIdSet()
Initialize Id Set.
int GetOidAtOffset(int first_seq, Uint8 residue) const
Find the OID corresponding to the offset given in residues, into the database as a whole.
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Raw Sequence and Ambiguity Data.
int GetMinLength() const
Returns the length of the smallest sequence in the database.
int m_NumThreads
number of thread clients
std::shared_mutex m_CacheIDMutex
mapping thread ID to storage ID
void x_RetSeqBuffer(SSeqResBuffer *buffer) const
Return sequence to buffer.
CSeqDBIdSet m_IdSet
The positive or negative ID list for the entire CSeqDB object.
int x_GetMinLength() const
Returns the shortest sequence lengths of all volumes.
void GetLeafTaxIDs(int oid, map< TGi, set< TTaxId > > &gi_to_taxid_set, bool persist)
Get gi to taxid map for an OID.
~CSeqDBImpl()
Destructor.
string m_Date
Cached most recent date string for GetDate().
void AccessionsToOids(const vector< string > &accs, vector< blastdb::TOid > &oids)
CRef< CSeqDBOIDList > m_OIDList
The list of included OIDs (construction is deferred).
EBlastDbVersion GetBlastDbVersion() const
Return blast db version.
CSeqDBImpl(const string &db_name_list, char prot_nucl, int oid_begin, int oid_end, CSeqDBGiList *gi_list, CSeqDBNegativeList *neg_list, CSeqDBIdSet idset, bool use_atlas_lock)
Standard Constructor.
void x_BuildMaskAlgorithmList(CSeqDBLockHold &locked)
Get a list of algorithm IDs for which mask data exists.
int m_MaskDataColumn
Column ID for mask data column.
int GetMaskAlgorithmId(const string &algo_name)
Get the numeric ID for a algorithm name.
CFastMutex m_OIDLock
Mutex which synchronizes access to the OID list.
int GetColumnId(const string &title)
Get an ID number for a given column title.
bool GiToOidwFilterCheck(TGi gi, int &oid)
GiToOis is meant to simply return oid for a gi if one exisits This method finds the oid and checks if...
void GetColumnBlob(int col_id, int oid, bool keep, CBlastDbBlob &blob)
Fetch the data blob for the given column and oid.
void GetStringBounds(string *low_id, string *high_id, int *count)
Get String Bounds.
void SetIterationRange(int oid_begin, int oid_end)
Set Iteration Range.
TGi x_GetSeqGI(int oid, CSeqDBLockHold &locked)
Look up for the GI of a sequence.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
Uint8 m_TotalLength
Total length of database (in bases).
void GetMaskAlgorithmDetails(int algorithm_id, string &program, string &program_name, string &algo_opts)
Get information about one type of masking available here.
bool m_UseGiMask
Which type of masks are we using?
void x_CheckOid(T &list, CSeqDBLockHold &locked)
CSeqDBAtlas & m_Atlas
Reference to memory management layer.
CRef< CSeqDBNegativeList > m_NegativeList
The Negative ID list for the entire CSeqDB object.
void SetNumberOfThreads(int num_threads, bool force_mt=false)
Invoke the garbage collector to free up memory.
TGi GetSeqGI(int oid)
Look up for the GI of a sequence.
int GetMaxLength() const
Returns the length of the largest sequence in the database.
int m_RestrictEnd
Ending OID as provided to the constructor.
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv)
Get Oid list for input tax ids.
void RetAmbigSeq(const char **buffer) const
Returns any resources associated with the sequence.
void FlushOffsetRangeCache()
Flush all offset ranges cached.
void GetAllTaxIDs(int oid, set< TTaxId > &taxids)
Get all tax ids (leaf and non-leaf for an oid.
Uint8 GetVolumeLength() const
Returns the sum of the lengths of all volumes.
void GetLMDBFileNames(vector< string > &lmdb_list) const
const string & GetDBNameList() const
Get list of database names.
void x_FillSeqBuffer(SSeqResBuffer *buffer, int oid) const
Fill up the buffer.
CObjectIStreamAsnBinary * reusable_inpstr
unsigned GetSequenceHash(int oid)
Get the sequence hash for a given OID.
char m_SeqType
Type of sequences used by this instance.
void x_GetTaxIdsForSeqId(const CSeq_id &seq_id, int oid, CBlast_def_line::TTaxIds &taxid_set)
CRef< CBioseq > GetBioseq(int oid, TGi target_gi, const CSeq_id *target_seq_id, bool seqdata)
Get a CBioseq for a sequence.
static void FindVolumePaths(const string &dbname, char prot_nucl, vector< string > &paths, vector< string > *alias_paths, bool recursive, bool expand_links)
Find volume paths.
void ListColumns(vector< string > &titles)
List columns titles found in this database.
int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, CSeqDB::TSequenceRanges *partial_ranges, CSeqDB::TSequenceRanges *masks) const
Uint8 m_TotalLengthStats
Total length of database (in bases).
bool OidToPig(int oid, int &pig) const
Translate a PIG to an OID.
std::atomic< bool > m_OidListSetup
True if OID list setup is done (or was not required).
void GetAliasFileValues(TAliasFileValues &afv)
Get Name/Value Data From Alias Files.
Uint8 x_GetTotalLength() const
Returns the sum of the lengths of all available sequences.
Uint8 GetExactTotalLength()
Returns the exact sum of the lengths of all available sequences.
void GetTaxIdsForSeqId(const CSeq_id &seq_id, vector< TTaxId > &taxids)
int x_GetCacheID()
Get local cache ID for current thread.
CRef< CSeqDBGiMask > m_GiMask
Gi-based mask.
bool IdsToOids(CSeqDBGiList &id_list)
Get OIDs from an ID list.
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end) const
Fetch data as a CSeq_data object.
map< string, int > m_ColumnTitleMap
Map string column titles to global column IDs.
const map< string, string > & GetColumnMetaData(int column_id)
Get all metadata for the specified column.
CRef< CBlast_def_line_set > x_GetHdr(int oid, CSeqDBLockHold &locked)
Get the sequence header data.
Uint8 x_GetVolumeLength() const
Returns the sum of the lengths of all volumes.
void x_GetOidList(CSeqDBLockHold &locked)
Build the OID list.
int m_NextChunkOID
"Bookmark" for multithreaded chunk-type OID iteration.
static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)
Get taxonomy information.
int x_GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
string GetAvailableMaskAlgorithmDescriptions()
Returns a formatted string with the list of available masking algorithms in this database for display...
bool CheckOrFindOID(int &next_oid)
Find an included OID, incrementing next_oid if necessary.
void GetPigBounds(int *low_id, int *high_id, int *count)
Get PIG Bounds.
void SeqidToOids(const CSeq_id &seqid, vector< int > &oids, bool multi)
Translate a CSeq-id to a list of OIDs.
int GetNumSeqsStats() const
Returns the number of sequences available.
@ kUnknownTitle
This column is not heard of yet.
@ kColumnNotFound
This column does not exist (we checked).
int x_GetMaxLength() const
Returns the longest sequence lengths of all volumes.
void SetVolsOidMaskType(int oid_mask_type)
int GetAmbigSeq(int oid, char **buffer, int nucl_code, SSeqDBSlice *region, ESeqDBAllocType strategy, CSeqDB::TSequenceRanges *masks=NULL) const
Get a pointer to a range of sequence data with ambiguities.
int m_MaxLength
Longest database sequence.
int m_RestrictBegin
Starting OID as provided to the constructor.
void GetGiBounds(TGi *low_id, TGi *high_id, int *count)
Get GI Bounds.
void GetAvailableMaskAlgorithms(vector< int > &algorithms)
Get a list of algorithm IDs for which mask data exists.
int GetSeqLengthApprox(int oid) const
Get the approximate sequence length.
std::atomic< int > m_NextCacheID
Uint8 m_VolumeLength
Total length of all database volumes combined (in bases).
void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids)
void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Dump debug information for this object.
Uint8 x_GetTotalLengthStats() const
Returns the sum of the lengths of all available sequences.
void GetTotals(ESummaryType sumtype, int *oid_count, Uint8 *total_length, bool use_approx)
Returns the sum of the sequence lengths.
CSeqDB_IdRemapper m_AlgorithmIds
Algorithm ID mapping.
void RetSequence(const char **buffer)
Returns any resources associated with the sequence.
bool x_CheckOrFindOID(int &next_oid, CSeqDBLockHold &locked)
Get the next included oid.
int m_MinLength
Shortest database sequence.
void GetMaskData(int oid, int algo_id, CSeqDB::TSequenceRanges &ranges)
Get masked ranges of a sequence.
int x_GetColumnId(const string &title, CSeqDBLockHold &locked)
Get the Column ID for the column with the specified title.
bool TiToOid(Int8 ti, int &oid)
Translate a TI to an OID.
Uint8 GetTotalLengthStats() const
Returns the sum of the lengths of all available sequences.
int x_GetSeqLength(int oid) const
Get the sequence length.
void SetVolsMemBit(int mbit)
Set the membership bit of all volumes.
Uint8 m_ExactTotalLength
Total length of database (in bases).
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
void HashToOids(unsigned hash, vector< int > &oids)
Get the OIDs for a given sequence hash.
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data)
Apply a range of offsets to a database sequence.
list< CRef< CSeq_id > > GetSeqIDs(int oid)
Gets a list of sequence identifiers.
void AccessionToOids(const string &acc, vector< int > &oids)
Find OIDs matching the specified string.
int x_GetNumSeqs() const
Returns the number of sequences available.
string m_DBNames
The list of database names provided to the constructor.
void FlushSeqMemory()
Flush unnecessarily held memory.
CRef< CBlast_def_line_set > GetHdr(int oid)
Get the sequence header data.
vector< SSeqResBuffer * > m_CachedSeqs
Cached sequences.
int GetNumSeqs() const
Returns the number of sequences available.
vector< CRef< CSeqDB_ColumnEntry > > m_ColumnInfo
Map assigned global column IDs to column information.
int x_GetNumSeqsStats() const
Returns the number of sequences available.
Uint8 GetTotalLength() const
Returns the sum of the lengths of all available sequences.
std::map< int, int > m_CacheID
int x_GetMaskDataColumn(CSeqDBLockHold &locked)
Open the mask data column (if necessary) and return its id.
bool PigToOid(int pig, int &oid) const
Translate a PIG to an OID.
CSeqDB::EOidListType GetNextOIDChunk(int &begin_chunk, int &end_chunk, int oid_size, vector< int > &oid_list, int *oid_state)
Return a chunk of OIDs, and update the OID bookmark.
string GetDate() const
Returns the construction date of the database.
CSeqDBVolSet m_VolSet
Set of volumes used by this database instance.
string GetTitle() const
Returns the database title.
void ResetInternalChunkBookmark()
Restart chunk iteration at the beginning of the database.
string x_FixString(const string &s) const
Adjust string length to offset of first embedded NUL byte.
int m_NumOIDs
Size of databases OID range.
bool m_NeedTotalsScan
True if this configuration cannot deduce totals without a scan.
int x_SetCacheID(int threadID)
int m_NumSeqs
Number of sequences in the overall database.
void x_ScanTotals(bool approx, int *seq_count, Uint8 *base_count, int *max_count, int *min_count, CSeqDBLockHold &locked)
Compute totals via iteration.
int GetSeqLength(int oid) const
Get the sequence length.
CSeqDBIdSet GetIdSet()
Get IdSet list attached to this database.
bool OidToGi(int oid, TGi &gi)
Translate a GI to an OID.
void AccessionToOids(const string &acc, vector< TOid > &oids) const
void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids) const
void GetLMDBFileNames(vector< string > &lmdb_list) const
void AccessionsToOids(const vector< string > &accs, vector< TOid > &oids) const
bool IsBlastDBVersion5() const
void GetDBTaxIds(set< TTaxId > &tax_ids) const
void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const
const vector< string > & GetSiList()
const vector< TTi > & GetTiList()
Build ID set for this negative list.
const vector< TGi > & GetGiList()
Build ID set for this negative list.
void UnLease()
Deallocate the memory ranges owned by this object.
bool CheckOrFindOID(TOID &next_oid) const
Find an included oid from the specified point.
static bool GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo &info)
Get the taxonomy names for a given tax id.
void OptimizeGiLists()
Optimize the GI list configuration.
void UnLease()
Return storage held by the volumes.
const CSeqDBVol * GetVol(int i) const
Find a volume by index.
Uint8 GetVolumeSetLength() const
Find total volume length for all volumes.
CSeqDBVol * GetVolNonConst(int i)
Find a volume by index.
int GetNumVols() const
Get the number of volumes.
CSeqDBVol * FindVol(int oid, int &vol_oid) const
Find a volume by OID.
int GetNumOIDs() const
Get the size of the OID range.
int GetVolOIDStart(int i) const
Get the first OID in a volume.
void SeqidToOids(CSeq_id &seqid, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified Seq-id.
void AccessionToOids(const string &acc, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified accession or formatted Seq-id.
void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob, bool keep, CSeqDBLockHold &locked)
Fetch the data blob for the given column and oid.
const string & GetVolName() const
Get the volume name.
int GetSeqLengthExact(int oid) const
Exact sequence length for nucleotide databases.
int GetColumnId(const string &title, CSeqDBLockHold &locked)
Get an ID number for a given column title.
int GetNumOIDs() const
Get the number of OIDs for this volume.
void GetPigBounds(int &low_id, int &high_id, int &count, CSeqDBLockHold &locked) const
Get PIG Bounds.
void FlushOffsetRangeCache()
Flush all offset ranges cached.
void ListColumns(set< string > &titles, CSeqDBLockHold &locked)
List the titles of all columns for this volume.
int GetSeqLengthApprox(int oid) const
Approximate sequence length for nucleotide databases.
int GetSeqLengthProt(int oid) const
Sequence length for protein databases.
void IdsToOids(CSeqDBGiList &gis) const
Translate Gis to Oids for the given vector of Gi/Oid pairs.
void SetOidMaskType(int oid_masks) const
bool GiToOid(TGi gi, int &oid, CSeqDBLockHold &locked) const
Find the OID given a GI.
void GetGiBounds(TGi &low_id, TGi &high_id, int &count, CSeqDBLockHold &locked) const
Get GI Bounds.
int GetOidAtOffset(int first_seq, Uint8 residue, CSeqDBLockHold &locked) const
Find the OID at a given index into the database.
const map< string, string > & GetColumnMetaData(int col_id, CSeqDBLockHold &locked)
Get all metadata for the specified column.
bool PigToOid(int pig, int &oid) const
Find the OID given a PIG.
bool TiToOid(Int8 ti, int &oid, CSeqDBLockHold &locked) const
Find the OID given a TI.
void SetMemBit(int mbit) const
Set the MEMB_BIT fitlering for this volume.
Uint8 GetVolumeLength() const
Get the total length of this volume (in bases).
string GetDate() const
Get the formatting date of the volume.
void GetStringBounds(string &low_id, string &high_id, int &count) const
Get String Bounds.
void HashToOids(unsigned hash, vector< int > &oids, CSeqDBLockHold &locked) const
Get the OIDs for a given sequence hash.
Something else yet again etc.
int GetEnd() const
Get OID after last included OID.
@ eOidRange
OID Range [start, end).
int GetBegin() const
Get first included OID.
Database-wide column information.
const map< string, string > & GetMap()
Get the metadata map.
void SetHaveMap()
Indicate that the metadata map is now complete.
int GetVolumeIndex(int volnum)
Get a volume-specific column ID.
bool HaveMap()
Determine if we have the metadata map yet.
void SetMapValue(const string &k, const string &v)
Add a meta-data key/value association.
bool HasFilter() const
Check whether this tree represents any volume filtering.
const vector< CRef< CSeqDB_FilterTree > > & GetNodes() const
Get child nodes attached to this node.
vector< CRef< CSeqDB_AliasMask > > TFilters
Type used to store lists of filters found here.
int m_CacheRealAlgo
Cached list of real algorithms for BuildVolAlgos.
int GetVolAlgo(int vol_idx, int algo_id)
Build a list of volume algorithm IDs.
bool GetDesc(int algorithm_id, string &desc)
Is this object populated?
void AddMapping(int vol_id, int id, const string &desc)
Register a volume's algorithm definition.
map< int, string > m_IdToDesc
Map of real IDs to descriptions.
void GetIdList(vector< int > &algorithms)
Get a list of user (real) IDs available here.
map< string, int > m_DescToId
Map of descriptions to real IDs.
CSeqDB_IdRemapper()
Constructor.
int GetAlgoId(const string &id)
Translate a string algorithm ID to a numeric algorithm ID.
int m_CacheVolAlgo
Cached list of volume algorithms for BuildVolAlgos.
int RealToVol(int vol_idx, int algo_id)
Translate a real algorithm ID to a volume algorithm ID.
void SetNotEmpty()
Is this object populated?
map< int, map< int, int > > m_RealIdToVolumeId
Map of volume# to map of real id to volume-based id.
int m_CacheVolIndex
Cached volume index for BuildVolAlgos.
bool Empty()
Is this object populated?
int m_NextId
Next unassigned synthetic ID.
static const char * kOidNotFound
String containing the error message in exceptions thrown when a given OID cannot be found.
EOidListType
Indicates how block of OIDs was returned.
ESummaryType
Types of summary information available.
@ eUnfilteredAll
Sum of all sequences, ignoring GI and OID lists and alias files.
@ eFilteredRange
Sum of included sequences with OIDs within the iteration range.
@ eFilteredAll
Values from alias files, or summation over all included sequences.
static const char * kBlastDbDateFormat
Format string for the date returned by CSeqDB::GetDate.
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator end() const
static unsigned char depth[2 *(256+1+29)+1]
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
const string & FindName(TEnumValueType value, bool allowBadValue) const
Find name of the enum by its numeric value.
void Read(CObjectIStream &in, TObjectPtr object, const CTypeRef &type)
const string AsFastaString(void) const
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const
Define method for dumping debug information.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static TNumeric StringToNumeric(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to a numeric value.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
@ fConvErr_NoThrow
Do not throw an exception on error.
EBlast_filter_program
This defines the possible sequence filtering algorithms to be used in a BLAST database.
bool CanGet(void) const
Check if it is safe to call Get method.
const Tdata & Get(void) const
Get the member data.
bool CanGetDb(void) const
Check if it is safe to call GetDb method.
const TDb & GetDb(void) const
Get the Db member data.
bool IsPrf(void) const
Check if variant Prf is selected.
E_Choice Which(void) const
Which variant is currently selected.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsPir(void) const
Check if variant Pir is selected.
@ e_General
for other databases
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
const TYPE & Get(const CNamedParameterList *param)
map< string, string > TStringMap
#define INT4_MAX
largest nubmer represented by signed int
bool approx(T x_, T y_, T eps_)
std::istream & in(std::istream &in_, double &x_)
#define INIT_CLASS_MARK()
Marker initializer for constructor.
#define CHECK_MARKER()
Assertion to verify the marker.
#define BREAK_MARKER()
Make the marker of this class invalid.
ESeqDBAllocType
Certain methods have an "Alloc" version.
const blastdb::TOid kSeqDBEntryNotFound
Int4 TOid
Ordinal ID in BLAST databases.
const int kSeqDBNuclNcbiNA8
Used to request ambiguities in Ncbi/NA8 format.
bool IsStringId(const CSeq_id &id)
Determine if id is srting id.
unsigned SeqDB_SequenceHash(const char *sequence, int length)
Returns a path minus filename.
EBlastDbVersion
BLAST database version.
const U & SeqDB_MapFind(const std::map< T, U > &m, const T &k, const U &dflt)
Find a map value or return a default.
static const string * s_CheckUniqueValues(const map< string, string > &m)
void s_ReadRanges(int vol_algo, CSeqDB::TSequenceRanges &ranges, CBlastDbBlob &blob)
static bool s_IsNumericId(const string &id)
static void s_GetDetails(const string &desc, string &program, string &program_name, string &algo_opts)
bool s_Contains(const C &c, const K &k)
void s_AccumulateMinMaxCount(TId low_in, TId high_in, int count_in, TId *low_out, TId *high_out, int *count_out, bool set_all)
Accumulate optional min, max, and count.
static const string s_RestoreColon(const string &in)
The top level of the private implementation layer for SeqDB.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure to buffer multiple TSeqRes.
Structure to keep sequence retrieval results.
List of sequence offset ranges.
void append(const void *src, size_type num_elements)
Append extra elements at the end.
static void Read(CBlastDbBlob &blob, int n, CSeqDB::TSequenceRanges &ranges)
static int Read(CBlastDbBlob &blob)
OID-Range type to simplify interfaces.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4