extn[0] = protein ?
'p':
'n';
93extn[2] = is_index ?
'i':
'd';
184m_DataFile (datafile),
222 intisam_version = 1;
225 intmax_line_size = 0;
247 "Unknown id type specified.");
296key_off.reserve(nsamples);
303 intoutput_count = 0;
311 stringelement, prev_elem;
317element[0] = char(0);
319 while(iter != end_iter) {
320prev_elem.swap(element);
323 if(prev_elem == element) {
345key_off.push_back((
int) key_buffer.size());
351key_buffer.append(element.data(), element.length()-1);
352key_buffer.append(NUL);
368key_off.push_back((
int) key_buffer.size());
370 intkey_off_start =
eKeyOffset+ (nsamples + 1) * 8;
374 for(
i= 0;
i< key_off.size();
i++) {
407 for(
int i= 0;
i<
count;
i++) {
410 if(prevp && (*prevp == elem)) {
431 for(
int i= 0;
i<
count;
i++) {
434 if(prevp && (*prevp == elem)) {
492 "Cannot call AddIds() for this index type.");
506 intsz = sprintf(
buf,
"%u", (
unsigned)
hash);
514 const CSeq_id& seqid = **iter;
516 if(seqid.
IsGi()) {
527 const CSeq_id& seqid = **iter;
561 const CSeq_id& seqid = **iter;
563 switch(seqid.
Which()) {
613 if(objid.
IsStr()) {
649 if(! mol.
size()) {
652 "Empty molecule string in pdb Seq-id.");
662 stringshort_id(full_id, 4);
667 if(short_id[4] ==
'|')
677 if(
a.size() !=
b.size())
691 if(
id.CanGetAccession()) {
692acc =
id.GetAccession();
695 if(
id.CanGetName()) {
699 if(! acc.
empty()) {
711 intver =
id.CanGetVersion() ?
id.GetVersion() : 0;
713 if(ver && acc.
size()) {
745memcpy(
buf, sbuf, sz);
749 for(
int i= 0;
i< sz;
i++) {
754sz += sprintf(
buf+ sz,
"%d", oid);
775 if(acc.
size() && ver) {
779 intsz = acc.
size();
780sz += sprintf(
buf+ sz,
".%d", ver);
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
CWriteDB_IndexFile class.
Uint8 m_MaxFileSize
Maximum file size in bytes.
unsigned int WriteInt4(int data)
Write an Int4 (in bigendian order) to the file.
bool m_Created
True if the file has already been opened.
const string & GetFilename() const
Get the current filename for this file.
virtual void RenameFileIndex(unsigned int num_digits)
void Create()
Create and open the file.
void Close()
Close the file, flushing any remaining data to disk.
unsigned int Write(const CTempString &data)
Write contents of a string to the file.
virtual void RenameSingle()
Rename this file, disincluding the volume index.
unsigned int WriteInt8(Int8 data)
Write an Int8 (in bigendian order) to the file.
~CWriteDB_IsamData()
Destructor.
CWriteDB_IsamData(EIsamType itype, const string &dbname, bool protein, int index, Uint8 max_file_size)
Constructor for an ISAM data file.
void x_Flush()
This should flush any unwritten data to disk.
CWriteDB_IsamIndex class.
void x_AddLocal(int oid, const CSeq_id &seqid)
Add a 'local' type Seq-id.
void AddPig(int oid, int pig)
Set PIG for a protein sequence.
@ eKeyDelim
Byte indicating end of key.
@ eIsamNumericType
Numeric ISAM file with Int4 key.
@ eMaxStringLine
Maximum line size for string.
@ eRecordDelim
Byte indicating end of data.
@ eIsamStringType
String ISAM file.
@ eIsamNumericLong
Numeric ISAM file with Int8 key.
@ eKeyOffset
Offset of the key offset table.
void x_AddTraceIds(int oid, const TIdList &idlist)
Store GIs found in Seq-id list.
bool m_UseInt8
Use an Int8 table for numeric IDs.
void AddHash(int oid, int hash)
Set a sequence's hash value.
void x_AddString(int oid, const CTempString &s, int ver)
Add an accession with a version.
bool CanFit(int num)
Tests whether there is room for a given number of IDs.
void x_AddStringData(int oid, const CTempString &s)
Add a string to the string table.
void x_AddStringIds(int oid, const TIdList &idlist)
Compute and store string IDs from Seq-ids.
bool Empty() const
Tests whether the index file is empty (has no entries).
void x_AddPatent(int oid, const CSeq_id &seqid)
Add a 'patent' type Seq-id.
Uint8 m_DataFileSize
Accumulated size of data file.
EIsamType m_Type
Type of identifier indexed here.
CRef< CWriteDB_IsamData > m_DataFile
The data file associated with this index file.
void x_AddPdb(int oid, const CSeq_id &seqid)
Add the index strings for the specified PDB identifier.
void x_Flush()
Flush index data in preparation for Close().
void x_AddTextId(int oid, const CTextseq_id &id)
Add a text ID.
void x_AddGis(int oid, const TIdList &idlist)
Store GIs found in Seq-id list.
CWriteDB_IsamIndex(EIsamType itype, const string &dbname, bool protein, int index, CRef< CWriteDB_IsamData > datafile, bool sparse, Int8 max_file_size=0)
Constructor for an ISAM index file.
vector< SIdOid > m_NumberTable
Sorted list of numbers.
void AddIds(int oid, const TIdList &ids)
Add sequence IDs to the index file.
CWriteDB_PackedSemiTree m_StringSort
Sorted list of strings.
int m_Oid
OID being to which seqid strings are being added.
vector< CRef< CSeq_id > > TIdList
Type used for lists of sequence identifiers.
int m_PageSize
Ratio of samples to data records.
~CWriteDB_IsamIndex()
Destructor.
void x_FlushStringIndex()
Flush index data for a string ISAM file.
void x_WriteHeader()
Write the ISAM index header to disk.
void x_Free()
Free no longer needed array and string memory.
set< string > m_OidStringData
Keep track of string seqids associated with current value of m_Oid.
bool m_Sparse
If true, fewer strings are used.
void x_FlushNumericIndex()
Flush index data for a numeric ISAM file.
void x_AddStdString(int oid, const string &s)
Add a string to the string table.
int m_BytesPerElem
Byte (over)estimate per Seq-id.
void AddHash(int oid, int hash)
Set a sequence's hash value.
~CWriteDB_Isam()
Destructor.
CRef< CWriteDB_IsamData > m_DFile
Data file, contains one record for each key/oid pair.
bool CanFit(int num)
Tests whether there is room for a given number of IDs.
CRef< CWriteDB_IsamIndex > m_IFile
Index file, contains meta data and samples of the key/oid pairs.
void ListFiles(vector< string > &files) const
List Filenames.
vector< CRef< CSeq_id > > TIdList
Type used for lists of sequence identifiers.
CWriteDB_Isam(EIsamType itype, const string &dbname, bool protein, int index, Uint8 max_file_size, bool sparse)
Constructor for an ISAM index file.
void AddPig(int oid, int pig)
Set PIG for a protein sequence.
void AddIds(int oid, const TIdList &ids)
Add sequence IDs to the index file.
void RenameSingle()
Rename files to single-volume names.
void Close()
Flush data to disk and close all associated files.
void RenameFileIndex(unsigned int num_digits)
Class providing iteration over string data.
void Get(string &data)
Get the string pointed to by this iterator.
void Insert(const char *x, int L)
Insert string data into the container.
Iterator End()
Get an iterator to the end of this collection.
void Sort()
Sort all contained data.
int Size() const
Return the number of contained entries.
void Clear()
Clear all objects from this container.
Iterator Begin()
Get an iterator to the beginning of this collection.
iterator_bool insert(const value_type &val)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const TPrim & Get(void) const
const string AsFastaString(void) const
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
void Reset(void)
Reset reference object.
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static Int8 StringToInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to Int8.
static int strncasecmp(const char *s1, const char *s2, size_t n)
Case-insensitive comparison of two zero-terminated strings, narrowed to the specified number of chara...
const char * data(void) const
Return a pointer to the array represented.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
size_type size(void) const
Return the length of the represented array.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
bool IsId(void) const
Check if variant Id is selected.
bool CanGetTag(void) const
Check if it is safe to call GetTag method.
const TDb & GetDb(void) const
Get the Db member data.
const TStr & GetStr(void) const
Get the variant data.
TId GetId(void) const
Get the variant data.
const TPdb & GetPdb(void) const
Get the variant data.
bool IsGeneral(void) const
Check if variant General is selected.
E_Choice Which(void) const
Which variant is currently selected.
bool CanGetMol(void) const
Check if it is safe to call GetMol method.
TGi GetGi(void) const
Get the variant data.
const TMol & GetMol(void) const
Get the Mol member data.
const TLocal & GetLocal(void) const
Get the variant data.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsGi(void) const
Check if variant Gi is selected.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
char * dbname(DBPROCESS *dbproc)
Get name of current database.
unsigned int
A callback function used to compare two keys in a database.
constexpr auto sort(_Init &&init)
#define row(bind, expected)
Element type for numeric tables.
int oid() const
Return the oid.
Int8 id() const
Return the numeric identifier.
Data conversion tools for CWriteDB and associated code.
Defines exception class for WriteDB.
int s_DivideRoundUp(int value, int blocksize)
Divide by a number, rounding up to a whole integer.
bool s_NoCaseEqual(CTempString &a, CTempString &b)
Compare two strings, ignoring case.
USING_SCOPE(std)
Import C++ std namespace.
static string s_IsamExtension(EWriteDBIsamType itype, bool protein, bool is_index)
Compute the file extension for an ISAM file.
Code for database isam construction.
EWriteDBIsamType
Type of ISAM index.
@ eAcc
Accession (string) Index.
@ ePig
Protein Identifier Group.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4