fn(name +
'.'+ prot_nucl +
"og");
409list< CRef<CSeq_id> >
GetSeqIDs(
intoid)
const;
478 bool PigToOid(
intpig,
int& oid)
const;
538 bool GetGi(
intoid,
690 int* ambig_length)
const;
732 int&
count)
const;
771 boolcache_data)
const;
881 "MEMB_BIT error: conflicting bit found.");
898 const string& str_id,
900vector<int> & oids)
const;
944 bool& have_vol)
const 1006 boolmatch_type =
false;
1007 boolfound = L.
FindId(
id, match_type);
1009 return(! found) && match_type;
1033 bool* changed)
const;
1068vector<char> & hdr_data)
const;
1089 bool* changed)
const;
1132vector<Int4> & ambchars)
const;
1210 const char**
buffer)
const;
1264 const CSeq_id* preferred_seq_id);
1289list< CRef<CSeqdesc> >
1292 const CSeq_id* preferred_seq_id);
1341vector<int> & oids)
const;
1449 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 1450 (!defined(NCBI_COMPILER_MIPSPRO)) )`Blob' Class for SeqDB (and WriteDB).
CObjectIStreamAsnBinary â.
CNcbiStreamoff TIndx
The type used for file offsets.
void Init(const string &filename)
Initializes a memory map object.
void Clear()
Clears the memory mapobject.
CSeqDBGiIndex(CSeqDBAtlas &atlas, const string &dbname, char prot_nucl)
static bool IndexExists(const string &name, const char prot_nucl)
TGi GetSeqGI(TOid oid, CSeqDBLockHold &locked)
bool HasIdFilters() const
bool FindId(const CSeq_id &id)
Test for existence of a Seq-id by type.
bool FindId(const CSeq_id &id, bool &match_type)
Test for existence of a TI or GI here and report whether the ID was one of those types.
bool HasIdFilters() const
~CSeqDBRangeList()
Destructor.
CSeqDBRangeList()
Constructor.
TRangeList m_Ranges
Range of offsets needed for this sequence.
void SetRanges(const TRangeList &ranges, bool append_ranges, bool cache_data)
Set ranges of the sequence that will be used.
const TRangeList & GetRanges()
Get ranges of sequence offsets that will be used.
static int ImmediateLength()
Sequences shorter than this will not use ranges in any case.
bool IsCached()
Returns true if the sequence data is cached.
bool m_CacheData
True if caching of sequence data is required for this sequence.
set< pair< int, int > > TRangeList
List of sequence offset ranges.
void FlushSequence()
Flush cached sequence data (if any).
void OptimizeGiLists() const
Simplify the GI list configuration.
bool m_HaveColumns
True if we have opened the columns for this volume.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Get the Seq-ids associated with a sequence.
int x_GetSequence(int oid, const char **buffer, bool keep, CSeqDBLockHold &locked, bool can_release, SSeqDBSlice *region) const
Get partial sequence data.
CFastMutex m_MtxCachedRange
CRef< CSeqDBIsam > m_IsamGi
Handles translation of GIs to OIDs.
void SeqidToOids(CSeq_id &seqid, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified Seq-id.
Uint8 x_GetSeqResidueOffset(int oid) const
Returns the base-offset of the specified oid.
void x_OpenHashFile(void) const
void x_UnleasePigFile(void) const
int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, CSeqDB::TSequenceRanges *partial_ranges, CSeqDB::TSequenceRanges *masks) const
void x_UnleaseTiFile(void) const
CSeqDBAtlas & m_Atlas
The memory management layer.
void AccessionToOids(const string &acc, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified accession or formatted Seq-id.
void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob, bool keep, CSeqDBLockHold &locked)
Fetch the data blob for the given column and oid.
CRef< CSeqDBHdrFile > m_Hdr
Contains header (defline) information for this volume.
void x_OpenSeqFile(void) const
CSeqDBIntCache< CRef< CSeqdesc > > m_TaxCache
This cache allows CBioseqs to share taxonomic objects.
const string & GetVolName() const
Get the volume name.
void x_OpenTiFile(void) const
int GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const
Get a sequence with ambiguous regions.
vector< CRef< CSeqDBColumn > > m_Columns
Set of columns defined for this volume.
CRef< CSeqDBIdxFile > m_Idx
Metadata plus offsets into the sequence, header, and ambiguity data.
void x_OpenStrFile(void) const
int GetSeqLengthExact(int oid) const
Exact sequence length for nucleotide databases.
void x_StringToOids(const string &acc, ESeqDBIdType id_type, Int8 ident, const string &str_id, bool simplified, vector< int > &oids) const
void OpenSeqFile(CSeqDBLockHold &locked) const
Open sequence file.
int GetColumnId(const string &title, CSeqDBLockHold &locked)
Get an ID number for a given column title.
CRef< CSeqDBIsam > m_IsamStr
Handles translation of strings (accessions) to OIDs.
vector< CRef< CSeqDBGiList > > TGiLists
A set of GI lists.
CSeqDBIntCache< TDeflineCacheItem > m_DeflineCache
Cache of filtered deflines.
int x_GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const
Get a sequence with ambiguous regions.
int m_VolStart
Starting OID of this volume.
int GetNumOIDs() const
Get the number of OIDs for this volume.
map< int, CRef< CSeqDBRangeList > > TRangeCache
Cached/ranged sequence info type.
bool GetGi(int oid, TGi &gi, CSeqDBLockHold &locked) const
Find the GI given an OID.
CRef< CBlast_def_line_set > x_GetHdrAsn1(int oid, bool adjust_oids, bool *changed) const
Get sequence header object.
void GetPigBounds(int &low_id, int &high_id, int &count, CSeqDBLockHold &locked) const
Get PIG Bounds.
void x_FilterHasId(const CSeq_id &id, bool &have_user, bool &have_vol) const
Determine if a user ID list affects this ID, and how.
string m_VolName
The name of this volume.
CTempString x_GetHdrAsn1Binary(int oid) const
Get sequence header binary data.
void FlushOffsetRangeCache()
Flush all offset ranges cached.
CSeqDBVol(CSeqDBAtlas &atlas, const string &name, char prot_nucl, CSeqDBGiList *user_list, CSeqDBNegativeList *neg_list, int vol_start, CSeqDBLockHold &locked)
Constructor.
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Get Raw Sequence and Ambiguity Data.
void x_OpenHdrFile(void) const
bool x_HaveNegativeList(void) const
Returns true if this volume has a negative ID list.
string GetLMDBFileName() const
Get sqlite file name associated with this volume Empty string if version 4.
void ListColumns(set< string > &titles, CSeqDBLockHold &locked)
List the titles of all columns for this volume.
int GetSeqLengthApprox(int oid) const
Approximate sequence length for nucleotide databases.
TRangeCache m_RangeCache
Cached/ranged sequence info.
int m_VolEnd
First OID past end of this volume.
int GetOidMaskType() const
bool x_ListIncludesId(CSeqDBNegativeList &L, const CSeq_id &id) const
Returns true if this ID is not found in the negative ID list.
bool m_SeqFileOpened
True if the volume file has been (at least tried to) opened.
bool GetPig(int oid, int &pig, CSeqDBLockHold &locked) const
Find the PIG given an OID.
int GetSeqLengthProt(int oid) const
Sequence length for protein databases.
CRef< CSeqDBSeqFile > m_Seq
Contains sequence data for this volume.
CRef< CSeqdesc > x_GetAsnDefline(int oid) const
Get sequence header information structures.
string GetTitle() const
Get the volume title.
CRef< CSeqDBGiIndex > m_GiIndex
The GI index file (for fast oid->gi conversion)
CRef< CSeqDBNegativeList > m_NegativeList
The negative ID list, if one exists.
TGiLists m_VolumeGiLists
The volume GI lists, if any exist.
CRef< CSeqDBIsam > m_IsamTi
Handles translation of TI (trace ids) to OIDs.
int x_GetSequence(int oid, const char **buffer) const
Get sequence data.
CRef< CSeqDBIsam > m_IsamHash
Handles translation of sequence hash value to OIDs.
void UnLease()
Return expendable resources held by this volume.
set< pair< int, int > > TRangeList
List of sequence offset ranges.
list< CRef< CSeqdesc > > x_GetTaxonomy(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)
Get taxonomic descriptions of a sequence.
int GetSequence(int oid, const char **buffer) const
Get the sequence data.
void IdsToOids(CSeqDBGiList &gis) const
Translate Gis to Oids for the given vector of Gi/Oid pairs.
CRef< CBlast_def_line_set > x_GetFilteredHeader(int oid, bool *changed) const
Get sequence header information.
char * x_AllocType(size_t length, ESeqDBAllocType alloc_type) const
Allocate memory in one of several ways.
void SetOidMaskType(int oid_masks) const
void x_CheckVersions(const string &acc, vector< int > &oids) const
Check Seq-id versions for special sparse-id support case.
void AttachVolumeGiList(CRef< CSeqDBGiList > gilist) const
Filter this volume using the specified GI list.
bool GiToOid(TGi gi, int &oid, CSeqDBLockHold &locked) const
Find the OID given a GI.
bool x_HaveGiList(void) const
Returns true if this volume has a positive ID list.
TGi GetSeqGI(int oid, CSeqDBLockHold &locked) const
Get the GI of a sequence This method returns the gi of the sequence.
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end, CSeqDBLockHold &locked) const
Fetch data as a CSeq_data object.
void GetGiBounds(TGi &low_id, TGi &high_id, int &count, CSeqDBLockHold &locked) const
Get GI Bounds.
int GetOidAtOffset(int first_seq, Uint8 residue, CSeqDBLockHold &locked) const
Find the OID at a given index into the database.
char GetSeqType() const
Get the sequence type stored in this database.
bool x_HaveIdFilter(void) const
Returns true if this volume has an ID list.
void x_OpenAllColumns(CSeqDBLockHold &locked)
Find all columns for this volume.
CRef< CBioseq > GetBioseq(int oid, TGi pref_gi, const CSeq_id *pref_seq_id, bool seqdata, CSeqDBLockHold &locked)
Get a CBioseq object for this sequence.
int GetMinLength() const
Get the length of the smallest sequence in this volume.
void x_UnleaseStrFile(void) const
CRef< CSeqDBGiList > m_UserGiList
The user ID list, if one exists.
CSeqDBAtlas::TIndx TIndx
Import TIndx definition from the CSeqDBAtlas class.
void x_OpenGiFile(void) const
pair< CRef< CBlast_def_line_set >, bool > TDeflineCacheItem
Filtered defline plus whether binary data needed changes.
CRef< CSeqDBIsam > m_IsamPig
Handles translation of GIs to OIDs.
bool m_IsAA
True if the volume is protein, false for nucleotide.
const map< string, string > & GetColumnMetaData(int col_id, CSeqDBLockHold &locked)
Get all metadata for the specified column.
CRef< CBlast_def_line_set > x_GetTaxDefline(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)
Get defline filtered by several criteria.
void x_OpenOidFile(void) const
int GetMaxLength() const
Get the length of the largest sequence in this volume.
bool PigToOid(int pig, int &oid) const
Find the OID given a PIG.
bool TiToOid(Int8 ti, int &oid, CSeqDBLockHold &locked) const
Find the OID given a TI.
void x_OpenPigFile(void) const
void SetMemBit(int mbit) const
Set the MEMB_BIT fitlering for this volume.
void x_GetFilteredBinaryHeader(int oid, vector< char > &hdr_data) const
Get binary sequence header information.
bool x_ListIncludesId(CSeqDBGiList &L, const CSeq_id &id) const
Returns true if this volume's ID list has this Seq-id.
Uint8 GetVolumeLength() const
Get the total length of this volume (in bases).
CRef< CBlast_def_line_set > GetFilteredHeader(int oid, CSeqDBLockHold &locked) const
Get filtered sequence header information.
string GetDate() const
Get the formatting date of the volume.
unsigned GetSequenceHash(int oid)
Get the sequence hash for a given OID.
int m_MemBit
The filtering MEMB_BIT.
void x_UnleaseGiFile(void) const
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data) const
Apply a range of offsets to a database sequence.
void GetStringBounds(string &low_id, string &high_id, int &count) const
Get String Bounds.
void HashToOids(unsigned hash, vector< int > &oids, CSeqDBLockHold &locked) const
Get the OIDs for a given sequence hash.
void x_GetAmbChar(int oid, vector< Int4 > &ambchars) const
Get ambiguity information.
char x_GetSeqType() const
Returns 'p' for protein databases, or 'n' for nucleotide.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
unsigned int TSeqPos
Type for sequence locations and lengths.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual bool Exists(void) const
Check existence of file.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty â pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
int32_t Int4
4-byte (32-bit) signed integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
The SeqDB memory management layer.
Defines database column access classes.
ESeqDBAllocType
Certain methods have an "Alloc" version.
Int4 TOid
Ordinal ID in BLAST databases.
ESeqDBIdType
Various identifier formats used in Id lookup.
This file defines several SeqDB utility functions related to byte order and file system portability.
Defines database volume access classes.
USING_SCOPE(objects)
Import definitions from the objects namespace.
List of sequence offset ranges.
OID-Range type to simplify interfaces.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4