A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/dbindex_8hpp_source.html below:

NCBI C++ ToolKit: include/algo/blast/dbindex/dbindex.hpp Source File

33 #ifndef C_DB_INDEX_HPP 34 #define C_DB_INDEX_HPP 71

extern

unsigned long GetCodeBits

(

unsigned long

stride );

103  switch

( GetErrCode() ) {

104  case

eFile:

return "access failure"

;

105  case eRead

:

return "read failure"

;

106  case

eWrite:

return "write failure"

;

107  case

eEndian:

return "endianness mismatch"

;

108  case

eVersion:

return "unknown index format version"

;

109  case eSize

:

return "wrong header size"

;

126  static const Uint4

INDEX_FORMAT_VERSION_0 = 0;

129  static const Uint4

INDEX_FORMAT_VERSION_1 = 1;

135  static Uint4

GetSystemEndianness(

void

);

193  static const size_t

COMMON_SIZE = 2*

sizeof

(

Uint4

);

202  void

Save( std::ostream & os,

const std::string

& fname );

271  static const size_t

EXPECTED_SIZE = COMMON_SIZE + 2*

sizeof

(

Uint4

);

316 template

<

typename

T >

425  virtual const char

* GetErrCodeString()

const override

;

441  static const unsigned long CR

= 4;

447  static const unsigned long

STRIDE = 5;

455  static const unsigned long

MIN_OFFSET = 64;

460  static const unsigned long

CODE_BITS = 3;

463  static const unsigned char VERSION

= (

unsigned

char)5;

515  unsigned long

word_size,

517  const TWord

*

map

,

size_t

map_size )

518

: word_size_( word_size ), start_( start ), results_(

size

, 0 )

520  for

(

size_t i

= 0;

i

< map_size; ++

i

) {

521

map_.push_back(

map

[

i

] );

531  if

( seq == 0 )

return

0;

532  else if

( seq - start_ - 1 >= results_.size() )

return

0;

533  else return

results_[seq - start_ - 1];

552  if

( subj >= map_.size() )

return

0;

553  return

(

TSeqNum

)(map_[subj]) + chunk;

564 

{

return

GetResults( MapSubject( subj, chunk ) ); }

575  if

( subj >= map_.size() )

return false

;

578  TSeqNum

start = MapSubject( subj, 0 );

579  TSeqNum

end = MapSubject( subj + 1, 0 );

580  if

( end == 0 ) end = start_ +

static_cast<TSeqNum>

(results_.size()) + 1;

582  for

(

TSeqNum

chunk = start; chunk < end; ++chunk ) {

583  if

( GetResults( chunk ) != 0 ) {

599  if

( seq > 0 && seq - start_ - 1 < results_.size() ) {

600

results_[seq - start_ - 1] = res;

607  for

( TResults::iterator it = results_.begin();

608

it != results_.end(); ++it ) {

661  static void

MakeIndex(

681

{ MakeIndex( fname, oname, start, 0, stop, stop_chunk, options ); }

690  static void

MakeIndex(

713  static void

MakeIndex(

733

{ MakeIndex(

input

, oname, start, 0, stop, stop_chunk, options ); }

742  static void

MakeIndex(

766  const

SSearchOptions & search_options

804  "GetSeqLen() is not supported in this index version."

);

818  "GetSeqData() is not supported in this index version."

);

843  template

<

bool

LEGACY >

874  ASSERT

( oid >= getStartOId() );

875  return

oid - getStartOId();

880  ASSERT

( sid <= getStopOId() - getStartOId() );

881  return

sid + getStartOId();

885  unsigned long getStride

()

const

{

return

header_.stride_; }

886  unsigned long getWSHint

()

const

{

return

header_.ws_hint_; }

896

pair< TSeqNum, TSeqNum > getSRCId(

TSeqNum

cid )

const

;

900 

{

return

getChunkLength( getCId( sid, rcid ) ); }

903 

{

return

getSIdByCId( getCIdByLRCId( lid, rcid ) ); }

904

pair< TSeqNum, TSeqPos > getRCIdOffByLIdOff(

TSeqNum

lid,

TSeqPos

loff )

const

;

908

pair< TSeqNum, TSeqPos >

t

= getRCIdOffByLIdOff( lid, loff );

909  return

make_pair( getCIdByLRCId( lid,

t

.first ),

t

.second );

916

pair< TSeqNum, TSeqNum >

t

= getSRCId( cid );

917  return

make_pair(

t

.first, getSOff(

t

.first,

t

.second, coff ) );

922

pair< TSeqNum, TSeqPos >

t

= getCIdOffByLIdOff( lid, loff );

923  return

getSIdOffByCIdOff(

t

.first,

t

.second );

926  TSeqNum

getNumSubjects()

const

;

932  TSeqNum

getLId(

const

TOffsetValue & v )

const

;

933  TSeqPos

getLOff(

const

TOffsetValue & v )

const

;

937  if

( sid < idmap_.size() )

return

idmap_[sid];

938  else return "unknown"

;

941  const

vector< string > &

getIdMap

()

const

{

return

idmap_; }

983  unsigned long

stride,

unsigned long

ws_hint );

1065  unsigned long

stride );

1079  unsigned long

stride );

1113  return

*(ptr + 1) - *ptr;

1152  return

std::make_pair(

1188  TWord

lid_start = *ptr;

1194  ASSERT

( siter != eiter );

1195

TChunksIter res = std::upper_bound( siter, eiter, abs_offset );

1199  return

std::make_pair(

1201

(

TSeqPos

)(soff - (*res - lid_start)*

CR

) );

1215  return

start + lchunk;

1296  return

start + rcid;

1309  TWord

lid_start = *ptr;

1316  ASSERT

( siter != eiter );

1317

TChunksIter res = std::upper_bound( siter, eiter, abs_offset );

1321  return

std::make_pair(

1323

(

TSeqPos

)(loff - (*res - lid_start)*

CR

) );

1397 inline

pair< CDbIndex::TSeqNum, CDbIndex::TSeqNum >

1408 inline

pair< CDbIndex::TSeqNum, TSeqPos >

Definitions used throughout BLAST.

#define NCBI_XBLAST_EXPORT

NULL operations for other cases.

Ungapped extension structures that are common to nucleotide and protein extension routines.

BlastInitHitList * BLAST_InitHitListFree(BlastInitHitList *init_hitlist)

Free memory for the BlastInitList structure.

This class represents a set of seeds obtained by searching all subjects represented by the index.

unsigned long GetWordSize() const

Get the search word size.

void SetResults(TSeqNum seq, BlastInitHitList *res)

Set the result set for a given logical subject.

TSeqNum start_

Starting logical subject number.

CSearchResults(unsigned long word_size, TSeqNum start, TSeqNum size, const TWord *map, size_t map_size)

Object constructor.

BlastInitHitList * GetResults(TSeqNum subj, TSeqNum chunk) const

Get the result set for a particular subject and chunk.

vector< Uint8 > map_

(subject,chunk)->(logical id) map.

~CSearchResults()

Object destructor.

TSeqNum MapSubject(TSeqNum subj, TSeqNum chunk) const

Map a subject sequence and a chunk number to internal logical id.

CDbIndex::TWord TWord

Convenience declaration.

unsigned long word_size_

Word size used for the search.

bool CheckResults(TSeqNum subj) const

Check if any results are available for a given subject sequence.

BlastInitHitList * GetResults(TSeqNum seq) const

Get the result set for a particular logical subject.

vector< BlastInitHitList * > TResults

Each vector item points to results for a particular logical subject.

TResults results_

The combined result set.

TSeqNum NumSeq() const

Get the number of logical sequences in the results set.

Types of exception the indexing library can throw.

EErrCode

Numerical error codes.

@ eBadOption

Bad index creation/search option.

@ eBadVersion

Wrong index version.

@ eBadData

Bad index data.

@ eBadSequence

Bad input sequence data.

NCBI_EXCEPTION_DEFAULT(CDbIndex_Exception, CException)

Base class providing high level interface to index objects.

TSeqNum StartSeq() const

Get the OID of the first sequence in the index.

pair< TSeqNum, TSeqPos > getSIdOffByCIdOff(TSeqNum cid, TSeqPos coff) const

TSeqNum getNumSubjects() const

static CRef< CDbIndex > LoadIndex(CNcbiIstream &is)

Load index from an open stream.

const Uint1 * getSeqData(TSeqNum sid) const

TSeqNum StopChunk() const

Get the number of the last chunk of the last sequence in the index.

TSeqNum getSIdByLRCId(TSeqNum lid, TSeqNum rcid) const

TSeqNum getSIdByOId(TSeqNum oid) const

static void MakeIndex(CSequenceIStream &input, const std::string &oname, TSeqNum start, TSeqNum &stop, TSeqNum &stop_chunk, const SOptions &options)

Create an index object.

TSeqPos getSOff(TSeqNum sid, TSeqNum rcid, TSeqPos coff) const

pair< TSeqNum, TSeqPos > getRCIdOffByLIdOff(TSeqNum lid, TSeqPos loff) const

TSeqNum getCIdByLRCId(TSeqNum lid, TSeqNum rcid) const

const vector< string > & getIdMap() const

TSeqNum getCId(TSeqNum sid, TSeqNum rcid) const

pair< TSeqNum, TSeqNum > getSRCId(TSeqNum cid) const

virtual ~CDbIndex()

Index object destructor.

pair< TSeqNum, TSeqPos > getCIdOffByLIdOff(TSeqNum lid, TSeqPos loff) const

const string getBioseqIdBySId(TSeqNum sid) const

TSeqNum start_

OID of the first sequence in the index.

SOffsetValue TOffsetValue

unsigned long getChunkOverlap() const

unsigned long getHKeyWidth() const

TSeqNum getCId(TSeqNum sid) const

unsigned long getMaxChunkSize() const

virtual const Uint1 * GetSeqData(TSeqNum) const

Get the sequence data of the subject sequence.

TSeqNum stop_chunk_

Number of the last chunk of the last sequence.

Uint4 TWord

Type representing main memory unit of the index structure.

virtual TSeqPos GetSeqLen(TSeqNum) const

Get the length of the subject sequence.

SIndexHeader header_

The index header structure.

TSeqNum start_chunk_

Number of the first chunk of the first sequence.

unsigned long getWSHint() const

static void MakeIndex(const std::string &fname, const std::string &oname, TSeqNum start, TSeqNum &stop, TSeqNum &stop_chunk, const SOptions &options)

Create an index object.

TSeqNum getLId(const TOffsetValue &v) const

TWord getChunkLength(TSeqNum cid) const

TSeqPos getLOff(const TOffsetValue &v) const

TSubjectMap * subject_map_

The subject map object.

TWord getChunkLength(TSeqNum sid, TSeqNum rcid) const

TSeqNum getSIdByCId(TSeqNum cid) const

unsigned long getStride() const

TSeqNum StartChunk() const

Get the number of the first chunk of the first sequence in the index.

vector< string > idmap_

Mapping from source ids to bioseq ids.

TSeqNum getNumChunks() const

virtual CConstRef< CSearchResults > DoSearch(const BLAST_SequenceBlk *, const BlastSeqLoc *, const SSearchOptions &)

Actual implementation of seed searching.

pair< TSeqNum, TSeqPos > getSIdOffByLIdOff(TSeqNum lid, TSeqPos loff) const

static const unsigned long CR

Letters per byte in the sequence store.

virtual void Remap()

If possible reduce the index footpring by unmapping the portion that does not contain sequence data.

TSeqNum stop_

OID of the last sequence in the inex.

CSequenceIStream::TStreamPos TSeqNum

Type used to enumerate sequences in the index.

TWord getSubjectLength(TSeqNum sid) const

TSeqNum getStopOId() const

TSeqNum getStartOId() const

TSeqNum getOIdBySId(TSeqNum sid) const

TSeqNum StopSeq() const

Get the OID of the last sequence in the index.

Class representing index hash table and offset list database.

THashTable hash_table_

The hash table (mapping from Nmer values to the lists of offsets.

CVectorWrap< TWord > THashTable

The type of the hash table.

COffsetData_Base(TWord **map, unsigned long hkey_width, unsigned long stride, unsigned long ws_hint)

Object constructor.

TWord total_

Auxiliary data member used for importing the offset list data.

unsigned long min_offset_

Minimum offset value used by the index.

unsigned long stride_

Stride value used by the index.

unsigned long getMinOffset() const

Accessor for minimum offset value.

CDbIndex::TWord TWord

Index word type (public to support Solaris).

unsigned long ws_hint_

ws_hint values used by the index.

unsigned long hkey_width_

Hash key width in bp.

unsigned long hkey_width() const

Get the width of the hash key in base pairs.

CDbIndex::SOffsetValue TOffsetValue

unsigned long getWSHint() const

Accessor for ws_hint value.

unsigned long getStride() const

Accessor for stride value.

Iterator for 0-terminated pre-ordered offset lists.

Class used to abstract reading nucleotide sequences from various sources.

Uint4 TStreamPos

Type used to represent positions within a sequence stream.

Type representing subject map data.

TSeqNum getLId(const TOffsetValue &v) const

TLengths lengths_

Subject lengths storage.

TWord getSubjectLength(TSeqNum sid) const

const Uint1 * GetSeqStoreBase() const

Return the start of the raw storage for compressed subject sequence data.

TSeqNum GetNumChunks(TSeqNum lid) const

Get number of chunks combined into a given logical sequence.

unsigned long GetStride() const

Accessor for stride value.

CVectorWrap< TWord > TChunks

Type for storing the chunk data.

CDbIndex::TOffsetValue TOffsetValue

TWord offset_mask_

Mask to extract offsets.

pair< TSeqNum, TSeqNum > TSCPair

CVectorWrap< TWord > TLIdMap

Local id -> chunks map storage type.

unsigned long stride_

Index stride value.

TSCPair getSRCId(TSeqNum cid) const

std::pair< TSeqNum, TSeqPos > DecodeOffset(TWord offset) const

Decode offset.

pair< TSeqNum, TSeqPos > TSOPair

TSeqNum getCIdByLRCId(TSeqNum lid, TSeqNum rcid) const

TSeqNum NumSubjects() const

Get the total number of logical sequences in the map.

unsigned long max_chunk_size_

TSeqNum MapSubject(TSeqNum subject, TSeqNum chunk) const

Get the logical sequence id from the database oid and the chunk number.

void Load(TWord **map, TSeqNum start, TSeqNum stop, unsigned long stride)

Loads index by mapping to the memory segment.

unsigned long min_offset_

Minimum offset used by the index.

const TWord * GetSubjectMap() const

Provides a mapping from real subject ids and chunk numbers to internal logical subject ids.

Uint1 offset_bits_

Number of bits used to encode offset.

TWord total_

Size in bytes of the raw sequence storage.

TLIdMap lid_map_

Local id -> chunk map storage.

TSeqNum NumChunks() const

Get the total number of sequence chunks in the map.

std::pair< TSeqNum, TSeqPos > MapSubjOff(TSeqNum lid, TSeqPos soff) const

Map logical sequence id and logical sequence offset to relative chunk number and chunk offset.

TSubjects subjects_

Mapping from database oids to the chunk info.

TSeqPos getSOff(TSeqNum sid, TSeqNum rcid, TSeqPos coff) const

TChunks chunks_

Collection of individual chunk descriptors.

TWord GetSeqStoreSize() const

Return the size in bytes of the eaw sequence storage.

TSeqStore seq_store_

Storage for the raw subject sequence data.

CVectorWrap< TWord > TLengths

Subject lengths storage type.

TSeqPos GetSeqLen(TSeqNum oid) const

Get the length of the subject sequence.

unsigned long chunk_overlap_

TSeqNum getNumChunks(TSeqNum sid) const

void SetSeqDataFromMap(TWord **map)

Set up the sequence store from the memory segment.

const Uint1 * getSeqData(TSeqNum sid) const

void SetSubjInfo(TSeqNum subj, TWord &start, TWord &end) const

Return the subject information based on the given logical subject id.

const Uint1 * GetSeqData(TSeqNum oid) const

Get the sequence data of the subject sequence.

TSeqNum MapLId2Chunk(TSeqNum lid, TSeqNum lchunk) const

Map logical id and relative chunk to absolute chunk id.

CSubjectMap()

Trivial constructor.

TSeqPos getLOff(const TOffsetValue &v) const

TSeqNum getNumChunks() const

TSOPair getRCIdOffByLIdOff(TSeqNum lid, TSeqPos loff) const

TSCPairMap c2s_map_

CId -> (SId, RCId) map.

CVectorWrap< TWord > TSubjects

Type used to map database oids to the chunk info.

CVectorWrap< Uint1 > TSeqStore

Type used for compressed subject sequence data storage.

CDbIndex::TSeqNum TSeqNum

vector< TSCPair > TSCPairMap

TSeqNum getCId(TSeqNum sid, TSeqNum rcid) const

TWord getChunkLength(TSeqNum cid) const

TSeqNum getNumSubjects() const

A vector or pointer based sequence wrapper.

void resize(size_type n, T v=T())

Change the size of the sequence.

TVector::reference reference

bool vec_

Flag indicating whether it is a wrapper or a holder of external sequence.

std::vector< T > TVector

Sequence type being wrapped.

TVector::size_type size_type

T * base_

Pointer to the first element of the sequence.

const T * const_iterator

Iterator type pointing to const data.

TVector data_

std::vector object wrapped by this object.

void SetPtr(T *base, size_type sz)

Make the object hold an external sequence.

size_type size() const

Get the sequence size.

TVector::const_reference const_reference

TVector::value_type value_type

const_reference operator[](size_type n) const

Indexing operator.

const_iterator begin() const

Get the start of the sequence.

CVectorWrap(size_type sz=0, T v=T())

Object constructor.

reference operator[](size_type n)

Indexing operator.

size_type size_

Size of the external sequence.

const_iterator end() const

Get the end of the sequence.

const unsigned long WIDTH_32

32-bit index.

const unsigned long OFFSET_COMBINED

Combination of chunk number and chunk-based offset.

CRef< CIndexSuperHeader_Base > GetIndexSuperHeader(const std::string &fname)

Read superheader structure from the file.

const unsigned long TWO_HIT

Use two-hit search.

const unsigned long REPORT_QUIET

No progress reporting.

const unsigned long REPORT_NORMAL

Normal reporting.

unsigned long GetMinOffset(unsigned long stride)

Compute the minimum offset value needed encode offsets based on stride.

const unsigned long ONE_HIT

Use one-hit search (normal).

const unsigned long UNCOMPRESSED

No compression.

const unsigned long REPORT_VERBOSE

Verbose reporting.

unsigned long GetCodeBits(unsigned long stride)

Compute the number of bits to encode special offsets based on stride.

size_t GetIdxVolNumOIDs(const std::string &fname)

Read the index header information from the given file.

static const unsigned long CR

CDbIndex::TSeqNum TSeqNum

Forwarding declarations for convenience.

unsigned int TSeqPos

Type for sequence locations and lengths.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

virtual const char * GetErrCodeString(void) const

Get error code interpreted as text.

uint8_t Uint1

1-byte (8-bit) unsigned integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

IO_PREFIX::istream CNcbiIstream

Portable alias for istream.

const string version

version string

double value_type

The numeric datatype used by the parser.

const struct ncbi::grid::netcache::search::fields::SIZE size

#define ASSERT

macro for assert.

Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.

Structure to hold a sequence.

Structure to hold all initial HSPs for a given subject sequence.

Used to hold a set of positions, mostly used for filtering.

Simple record type used to specify index creation parameters.

bool legacy

Indicator of the legacy index format.

unsigned long report_level

Verbose index creation.

unsigned long max_index_size

Maximum index size in megabytes.

unsigned long chunk_size

Long sequences are split into chunks of this size.

std::string stat_file_name

File to write index statistics into.

unsigned long ws_hint

Most likely word size to use for searches.

unsigned long chunk_overlap

Amount by which individual chunks overlap.

bool idmap

Indicator of the index map creation.

unsigned long hkey_width

Width of the hash key in bits.

unsigned long stride

Stride to use for stored database locations.

Simple record type used to specify index search parameters.

unsigned long two_hits

Window for two-hit method (see megablast docs).

unsigned long word_size

Target seed length.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4