<
typenameword_t >
67 template<
boolLEGACY >
77 template<
boolLEGACY >
87 template<
boolLEGACY >
91 template<
typenameiterator_t >
118{
Reset( offset_data,
key, ws ); }
166 template<
typenameiterator_t >
187 unsigned longstride,
unsigned longws_hint );
197 unsigned longstride,
unsigned longws_hint );
216 unsigned longh = offset_data.
hkey_width() - 1;
217 unsigned longs = offset_data.
getStride();
218 unsigned longw = offset_data.
getWSHint();
257 if(
curr_== 0 )
return false;
302{
return more_!= 0; }
305 template<
typenameiterator_t >
307 TWord**
map,
unsigned longhkey_width,
308 unsigned longstride,
unsigned longws_hint )
309:
TBase(
map, hkey_width, stride, ws_hint )
322 template<
boolLEGACY >
331 template<
boolLEGACY >
359 constvector< string > & idmap,
440 virtual void Remap();
454 constSSearchOptions & search_options );
466 template<
boolLEGACY >
469 constvector< string > & idmap,
TWord*
data)
470: mapfile_(
map), map_start_( 0 ), version_(
VERSION),
486 stride_, GetIndexWSHint< LEGACY >( header ) );
491 else if(
data!= 0 ) {
497 stride_, GetIndexWSHint< LEGACY >( header ) );
504 template<
boolLEGACY >
507 if( mapfile_ != 0 ) {
508 deletesubject_map_; subject_map_ = 0;
509 deleteoffset_data_; offset_data_ = 0;
511map_ = (
TWord*)(mapfile_->Map( subject_map_offset_ ));
512subject_map_ =
new TSubjectMap( &map_, start_, stop_, stride_ );
517 template<
boolLEGACY >
530 template<
boolLEGACY >
534vector< string > idmap;
535 stringidmap_fname = fname +
".map";
538 while( idmap_stream ) {
540idmap_stream >> line;
541idmap.push_back( line );
557 ERR_POST(
"not enough memory for index");
560 "not enough memory for index");
563s.read( (
char*)
data,
l);
564header = ReadIndexHeader< LEGACY >(
data);
569header = ReadIndexHeader< LEGACY >(
map->GetPtr() );
Types of exception the indexing library can throw.
Implementation of the BLAST database index.
const Uint1 * GetSeqStoreBase() const
Get the start of compressed raw sequence data.
size_t subject_map_offset_
Offset of the subject map in the index file.
unsigned long version_
Index format version.
virtual CConstRef< CSearchResults > DoSearch(const BLAST_SequenceBlk *query, const BlastSeqLoc *locs, const SSearchOptions &search_options)
The search procedure for this specialized index implementation.
TOffsetData * offset_data_
Offset lists.
static const unsigned long HEADER_SIZE
Size of the index file header for index format version >= 2.
TTraits::TOffsetData TOffsetData
CMemoryFile * mapfile_
Memory mapped file.
TWord * map_
Start of memory mapped file data.
TOffsetData::TIterator TOffsetIterator
TWord * map_start_
Start of the index data, when not mapped.
const TOffsetIterator OffsetIterator(TWord nmer, unsigned long mod) const
Create an offset list iterator corresponding to the given Nmer value.
TTraits::TSubjectMap TSubjectMap
virtual TSeqPos GetSeqLen(TSeqNum oid) const
Get the length of the subject sequence.
TSeqNum NumChunks() const
Get the total number of sequence chunks in the index.
const TSubjectMap & GetSubjectMap() const
Get the subject map instance from the index object.
CDbIndex_Traits< LEGACY > TTraits
Offset data and subject map types computer.
virtual void Remap()
If possible reduce the index footpring by unmapping the portion that does not contain sequence data.
~CDbIndex_Impl()
Object destructor.
CDbIndex_Impl(CMemoryFile *map, const SIndexHeader &header, const vector< string > &idmap, TWord *data=0)
Create an index object from mapped memory segment.
TSeqNum NumSubjects() const
Get the total number of logical sequences in the index.
unsigned long stride_
Stride value used during index creation.
virtual unsigned long Version() const
Get the index format version.
virtual const Uint1 * GetSeqData(TSeqNum oid) const
Get the sequence data of the subject sequence.
unsigned long hkey_width() const
Get the hash key width of the index.
Base class providing high level interface to index objects.
static CRef< CDbIndex > LoadIndex(CNcbiIstream &is)
Load index from an open stream.
TSeqNum start_
OID of the first sequence in the index.
TSeqNum stop_chunk_
Number of the last chunk of the last sequence.
Uint4 TWord
Type representing main memory unit of the index structure.
SIndexHeader header_
The index header structure.
TSeqNum start_chunk_
Number of the first chunk of the first sequence.
TSubjectMap * subject_map_
The subject map object.
vector< string > idmap_
Mapping from source ids to bioseq ids.
TSeqNum stop_
OID of the last sequence in the inex.
CSequenceIStream::TStreamPos TSeqNum
Type used to enumerate sequences in the index.
Class representing index hash table and offset list database.
THashTable hash_table_
The hash table (mapping from Nmer values to the lists of offsets.
TWord total_
Auxiliary data member used for importing the offset list data.
unsigned long getMinOffset() const
Accessor for minimum offset value.
unsigned long hkey_width() const
Get the width of the hash key in base pairs.
unsigned long getWSHint() const
Accessor for ws_hint value.
unsigned long getStride() const
Accessor for stride value.
Iterator specific functionality of offset list manager class.
TWord * data_start_
Start of the offset data.
TOffsets offsets_
Concatenated offset list data.
COffsetData_Base TBase
Base class alias.
CVectorWrap< TWord > TOffsets
Type used to store offset lists.
iterator_t TIterator
Type used to iterate over an offset list.
COffsetData(CNcbiIstream &is, unsigned long hkey_width, unsigned long stride, unsigned long ws_hint)
Construct the object from the data in the given input stream.
Iterator for 0-terminated pre-ordered offset lists.
const TWord * curr_
Current position in the offset list.
TOffsetValue getOffsetValue() const
bool More()
Check if more data is available in the iterator.
bool boundary_
Flag indicating the current offset is actually a extra information for boundary cases.
COffsetData_Base::TOffsetValue TOffsetValue
CPreOrderedOffsetIterator()
unsigned long more_
Flag indicating that more values are available.
bool Next()
Advance the iterator.
unsigned long mod_
Determines which offsets to skip.
COffsetData< CPreOrderedOffsetIterator > TOffsetData
Type of offset data class supported by this iterator.
TWord offset_
Current cached offset value.
unsigned long min_offset_
Minimum offset used by the index.
CPreOrderedOffsetIterator(const TOffsetData &offset_data, TWord key, unsigned long ws)
Object constructor.
TWord Offset() const
Iterator dereference.
Type representing subject map data.
const Uint1 * GetSeqStoreBase() const
Return the start of the raw storage for compressed subject sequence data.
TSeqNum NumSubjects() const
Get the total number of logical sequences in the map.
TSeqNum NumChunks() const
Get the total number of sequence chunks in the map.
TSeqPos GetSeqLen(TSeqNum oid) const
Get the length of the subject sequence.
const Uint1 * GetSeqData(TSeqNum oid) const
Get the sequence data of the subject sequence.
TVector::size_type size_type
void SetPtr(T *base, size_type sz)
Make the object hold an external sequence.
void ReadWord(CNcbiIstream &is, word_t &data)
Read a word from the input stream.
unsigned long GetIndexStride(const SIndexHeader &header)
Get the stride value associated with the index.
const SIndexHeader ReadIndexHeader(void *map)
Read the index header information from the given input stream.
CDbIndex::TSeqNum TSeqNum
Forwarding declarations for convenience.
CMemoryFile * MapFile(const std::string &fname)
Memory map a file and return a pointer to the mapped area.
unsigned long GetIndexWSHint(const SIndexHeader &header)
Get the ws_hint value associated with the index.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Int8 GetLength(void) const
Get size of file.
bool Unmap(void)
Unmap file if mapped.
void * GetPtr(void) const
Get pointer to beginning of data.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
const struct ncbi::grid::netcache::search::fields::KEY key
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure to hold a sequence.
Used to hold a set of positions, mostly used for filtering.
Simple record type used to specify index search parameters.
Some computed type definitions.
COffsetData< CPreOrderedOffsetIterator > TOffsetData
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4