A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/blast__dbindex_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/blast/api/blast_dbindex.cpp Source File

59 # define IDX_TRACE(_m) { std::cerr << _m << std::endl; } 61 # define IDX_TRACE(_m) 214  typedef

vector< CConstRef< CDbIndex::CSearchResults > >

TResultSet

;

227  for

( TSeqMap::size_type

i

= 0;

i

<

seqmap_

.size(); ++

i

) {

306

lut_options, word_options );

353  return a

.start_oid <

b

.start_oid;

369  typedef

std::vector< SVolumeDescriptor >

TVolList

;

463

TVolList::const_iterator

r

(

490  explicit CIndexedDb_New

(

const string

& indexname,

bool

& partial );

551  IDX_TRACE

(

"setting multiple threads to "

<<

552

(multiple_threads ?

"true"

:

"false"

) );

564  IDX_TRACE

(

"setting number of search threads to "

<< n_threads );

583  if

( idb == 0 )

return

;

584

idb->

DoPreSearch

( queries, lut_options, word_options );

594  if

( idb == 0 )

return

;

596  if

( idbn == 0 )

return

;

607  if

( idb == 0 )

return

;

609  if

( idbn == 0 )

return

;

623  if

( idb == 0 )

return

;

634  static const char

*

SEP

=

" "

;

636

string::size_type pos( 0 ), pos1( 0 );

638  while

( pos1 != string::npos ) {

639

pos1 = db_spec.find_first_of(

SEP

, pos );

640

db_names.push_back( db_spec.substr( pos, pos1 - pos ) );

667  bool

idx_not_resolved(

false

);

677  Info

<<

"index superheader for volume "

<< vol_name

678

<<

" was not loaded ("

<< e.

what

() <<

")"

);

679

idx_not_resolved =

true

;

682  if

( !idx_not_resolved && shdr->GetNumSeq() != dbnseq ) {

684  Error

<<

"numbers of OIDs reported by the database and " 685

<<

"by the index do not match. Index for volume " 686

<< vol_name <<

" will not be used"

);

687

idx_not_resolved =

true

;

690  if

( !idx_not_resolved ) {

691  size_t

curr_vols_size(

volumes_

.size() );

692  size_t

total_idxvol_oids( 0 );

694  for

(

size_t i

( 0 ), e( shdr->GetNumVol() );

i

< e; ++

i

) {

699  if

( name.empty() ) {

701  Error

<<

"index volume "

<< name

702

<<

" not resolved; index will not be used for " 704

idx_not_resolved =

true

;

707  if

( !idx_not_resolved ) {

710  if

( idxvol_oids == 0 ) {

711

idx_not_resolved =

true

;

713  Error

<<

"index volume "

<< name

714

<<

" reports no sequences; index will " 715

<<

"not be used for "

<< vol_name );

721

total_idxvol_oids += idxvol_oids;

725  if

( idx_not_resolved ) {

731  if

( !idx_not_resolved && dbnseq != total_idxvol_oids ) {

733  Error

<<

"total of oids reported by index volumes (" 734

<< total_idxvol_oids <<

") does not match " 735

<<

"the number of oids reported by the superheader (" 736

<< dbnseq <<

"); index will not be used for " 739

idx_not_resolved =

true

;

743

partial = (partial || idx_not_resolved);

745  if

( idx_not_resolved ) {

754

: queries_( 0 ), multiple_threads_(

false

), n_threads_( 1 )

757  IDX_TRACE

(

"creating new style CIndexedDb object"

);

762  IDX_TRACE

(

"db spec given: "

<< indexname );

772  IDX_TRACE

(

"list of database volumes in order:"

);

778  IDX_TRACE

(

"final index volume list:"

);

784  bool

has_index(

false

);

788  if

(

i

->has_index ) {

796  "no database volume has an index"

);

808  IDX_TRACE

(

"destroying new style CIndexedDb object"

);

814  Int4

& vol_idx( *vol_idx_p );

816  bool

find_volume(

true

);

823  if

( !find_volume )

return

;

824

TVolList::const_iterator vi(

FindVolume

( oid ) );

825

new_vol_idx = vi -

volumes_

.begin();

826  if

( !vi->has_index ) { vol_idx = new_vol_idx;

return

; }

829  Int4

min_vol_idx( vol_idx == -1 ? 0 : vol_idx );

833  IDX_TRACE

(

"loading volume "

<< new_vol_idx <<

": "

<< vi->name );

838

std::ostringstream os;

839

os <<

"CIndexedDb: could not load index volume: "

<< vi->name;

843  IDX_TRACE

(

"searching volume "

<< vi->name );

845  IDX_TRACE

(

"results loaded for "

<< vi->name );

848  for

( ; min_vol_idx < new_vol_idx; ++min_vol_idx ) {

851  IDX_TRACE

(

"unloaded results for volume "

<<

856

vol_idx = new_vol_idx;

863

TVolList::const_iterator vi(

FindVolume

( oid ) );

869

TVolList::const_iterator vi(

volumes_

.begin() + *last_vol_idx );

871

oid -= vi->start_oid;

907

TVolList::const_iterator vi(

FindVolume

( oid ) );

908  ASSERT

( vi->start_oid <= oid );

909  ASSERT

( vi->start_oid + vi->n_oids > oid );

911

oid -= vi->start_oid;

916  if

( (res = vr->

GetResults

( oid, chunk )) != 0 ) {

931  ERR_POST

(

Info

<<

"Minimal supported word size in "

<< fname <<

" is "

<< rv);

946  if

( !indexnames.empty() ) {

947

vector< string > dbnames;

948

string::size_type start = 0, end = 0;

952  while

( start != string::npos ) {

953

end = indexnames.find_first_of(

" "

, start );

954

dbnames.push_back( indexnames.substr( start, end - start ) );

955

start = indexnames.find_first_not_of(

" "

, end );

961

dbni != dbnames.end(); ++dbni ) {

962  const string

& indexname = *dbni;

965  unsigned long

start_vol = 0, stop_vol = 99;

967

end = indexname.find_first_of(

","

, start );

968  string

index_base = indexname.substr( start, end );

971  if

( start < indexname.length() && end != string::npos ) {

972

end = indexname.find_first_of(

","

, start );

975  if

( start < indexname.length() && end != string::npos ) {

976

end = indexname.find_first_of(

","

, start );

977  string

start_vol_str =

978

indexname.substr( start, end - start );

980  if

( !start_vol_str.empty() ) {

981

start_vol = atoi( start_vol_str.c_str() );

986  if

( start < indexname.length() && end != string::npos ) {

987

end = indexname.find_first_of(

","

, start );

988  string

stop_vol_str =

989

indexname.substr( start, end - start);

991  if

( !stop_vol_str.empty() ) {

992

stop_vol = atoi( stop_vol_str.c_str() );

998  if

( start_vol <= stop_vol ) {

1001  for

(

long i

= start_vol; (

unsigned

long)

i

<= stop_vol; ++

i

) {

1003

os << index_base <<

"."

<< setw( 2 ) << setfill(

'0'

)

1007  if

( !name.empty() ){

1008  if

(

i

- last_i > 1 ) {

1009  for

(

long

j = last_i + 1; j <

i

; ++j ) {

1011

<< j <<

" not resolved."

);

1024  string msg

(

"no index file specified or index '"

);

1025  msg

+= indexnames +

"*' not found."

;

1045  for

( vector< string >::size_type v = 0;

1055  string

(

"CIndexedDb: could not load index"

) +

1078  if

( (res =

results

->GetResults( oid, chunk )) != 0 ) {

1080  return results

->GetWordSize();

1104  if

(word_size < min_ws) {

1106

rv =

"MegaBLAST database index requires word size greater than "

;

1112

rv =

"Failed to read index MegaBLAST db min word size."

;

1117

rv =

"Empty index db instance"

;

1123  const string

& indexname,

bool

old_style,

bool

& partial,

const int

word_size )

1130  ERR_POST

(

Info

<<

"trying to load new style index at " 1143  else return "index allocation error"

;

1153  ERR_POST

(

Info

<<

"trying to load old style index at " 1162  else return "index allocation error"

;

1229  _ASSERT

( init_hitlist != 0 );

1235

oid, chunk, init_hitlist );

Declarations for indexed blast databases.

void(* DbIndexSetUsingThreadsFnType)(bool multiple_threads)

Type of a callback to set the concurrency state in the index structure.

void(* DbIndexRunSearchFnType)(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)

Type of a callback to run the indexed seed search.

void(* DbIndexSetNumThreadsFnType)(size_t n_threads)

Type of a callback to provide the number of threads to the indexing library, when multi-threaded sear...

void(* DbIndexSetQueryInfoFnType)(LookupTableWrap *lt_wrap, CRef< CBlastSeqLocWrap > locs_wrap)

Type of a callback to set the query information in the index structure.

void BlastInitHitListReset(BlastInitHitList *init_hitlist)

Free the ungapped data substructures and reset initial HSP count to 0.

void BlastInitHitListMove(BlastInitHitList *dst, BlastInitHitList *src)

Move the contents of a BlastInitHitList structure.

Structures and functions prototypes used for BLAST gapped extension.

Structures and API used for saving BLAST hits.

Various auxiliary BLAST utility functions.

BlastSeqLoc * getLocs() const

Get access to the held object.

unsigned long GetWordSize() const

Get the search word size.

BlastInitHitList * GetResults(TSeqNum seq) const

Get the result set for a particular logical subject.

Types of exception the indexing library can throw.

TSeqNum StartSeq() const

Get the OID of the first sequence in the index.

CConstRef< CSearchResults > Search(const BLAST_SequenceBlk *query, const BlastSeqLoc *locs, const SSearchOptions &search_options)

Search the index.

static CRef< CDbIndex > Load(const std::string &fname, bool nomap=false)

Load index.

CSequenceIStream::TStreamPos TSeqNum

Type used to enumerate sequences in the index.

TSeqNum StopSeq() const

Get the OID of the last sequence in the index.

Index wrapper exceptions.

Index wrapper for new style MegaBLAST indexing functionality.

Index wrapper for old style MegaBLAST indexing functionality.

This class is responsible for loading indices and doing the actual seed search.

static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)

Find volume paths.

int GetNumOIDs() const

Returns the size of the (possibly sparse) OID range.

Include a standard set of the NCBI C++ Toolkit most basic headers.

const SIndexHeader ReadIndexHeader< false >(void *map)

CRef< CIndexSuperHeader_Base > GetIndexSuperHeader(const std::string &fname)

Read superheader structure from the file.

size_t GetIdxVolNumOIDs(const std::string &fname)

Read the index header information from the given file.

static const struct name_t names[]

std::string name

Fully qualified name of the volume.

virtual int MinIndexWordSize()=0

int ref_count

How many threads still need the result set.

virtual unsigned long GetResults(CDbIndex::TSeqNum oid, CDbIndex::TSeqNum chunk, BlastInitHitList *init_hitlist) const

Return results corresponding to a given subject sequence and chunk.

void TraceVolumes(void)

This is only used for debugging output.

DbIndexSetQueryInfoFnType GetDbIndexSetQueryInfoFn()

Return the appropriate callback to set query information in the index.

void ClearDbIndexCallbacks(void)

static DbIndexSetNumThreadsFnType SetNumThreadsFn

Global pointer to the appropriate callback to set the number of threads.

virtual ~CIndexedDb()

Object destructor.

static void IndexedDbSetUsingThreads(bool multiple_threads)

Set state of concurrency in the index structure.

vector< CConstRef< CDbIndex::CSearchResults > > TResultSet

Type used to represent collections of search result sets.

friend bool operator<(const SVolumeDescriptor &a, const SVolumeDescriptor &b)

Volumes are compared by their starting ordinal ids.

static void ParseDBNames(const std::string db_spec, TStrVec &db_names)

Generate a list of BLAST database names from a single string.

virtual int CheckOid(Int4 oid, Int4 *last_vol_id)

Check whether any results were reported for a given subject sequence.

friend std::ostream & operator<<(std::ostream &os, const SVolumeDescriptor &vd)

This is only used for debug tracing.

bool multiple_threads_

flag indicating that multithreading is in effect

CDbIndex::SSearchOptions sopt_

common search parameters

virtual ~CIndexedDb_New()

Object destructor.

virtual int CheckOid(Int4 oid, Int4 *last_vol_id)=0

Check whether any results were reported for a given subject sequence.

virtual void EndSearchIndication(Int4)

Not used.

virtual void DoPreSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)

Run preliminary indexed search functionality.

std::vector< std::string > TStrVec

Alias for a vector os strings.

virtual void DoPreSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)

Wrapper around PreSearch().

USING_SCOPE(ncbi::objects)

static DbIndexSetUsingThreadsFnType SetUsingThreadsFn

Global pointer to the appropriate callback to set the state of concurrency.

virtual void EndSearchIndication(Int4 last_vol_id)=0

Function used by threads to indicate that they are done with iterating over the database sequences.

void SetQueryInfo(CRef< CBlastSeqLocWrap > locs_wrap)

Set the current set of unmasked query segments.

void UpdateIndex(Int4 oid, Int4 *vol_idx)

Update the seed sets, if necessary.

virtual void EndSearchIndication(Int4 last_vol_id)

Function used by threads to indicate that they are done with iterating over the database sequences.

bool has_index

'true' if the volume is indexed.

static void TraceNames(const TStrVec &names)

This is only used for debugging output.

static void NullSetUsingThreads(bool)

No-op callback for setting concurrency state.

TResultsHolder results_holder_

reference counted seed set holders

static void NullRunSearch(BLAST_SequenceBlk *, LookupTableOptions *, BlastInitialWordOptions *)

No-op callback to run indexed search.

CIndexedDb_Old(const string &indexname)

Object constructor.

vector< string > index_names_

List of index volume names.

static unsigned long s_MB_IdbGetResults(Int4 oid_i, Int4 chunk_i, BlastInitHitList *init_hitlist)

Get the seed search results for a give subject id and chunk number.

TVolList volumes_

index volume descriptors

static void IndexedDbSetQueryInfo(LookupTableWrap *lt_wrap, CRef< CBlastSeqLocWrap > locs_wrap)

Set information about unmasked query segments.

virtual int MinIndexWordSize()

Get the minimum acceptable word size to use with indexed search.

virtual void DoPreSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)=0

Run preliminary indexed search functionality.

static int s_MB_IdbCheckOid(Int4 oid, Int4 *last_vol_oid)

CFastMutex mtx_

mutex used for thread sync

static DbIndexSetQueryInfoFnType SetQueryInfoFn

Global pointer to the appropriate callback to set query info, based on whether or not index search is...

static int s_GetMinimumSupportedWordSizeByIndex(const string &fname)

CIndexedDb_New(const string &indexname, bool &partial)

Object constructor.

TResultSet results_

Set of result sets, one per loaded index.

static void EnumerateDbVolumes(const TStrVec &db_names, TStrVec &db_vols)

Generate a list of leaf database volumes from a list of database names.

CConstRef< CDbIndex::CSearchResults > TVolResults

This type captures the seeds found by search of an index volume.

std::string DbIndexInit(const string &indexname, bool old_style, bool &partial, const int word_size)

TVolResults res

Seed set or null.

static void NullSetNumThreads(size_t)

No-op callback for setting the number of threads.

static void IndexedDbSetNumThreads(size_t n_threads)

Set the number of concurrent search threads in the index structure.

static void IndexedDbRunSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)

Run indexed search.

virtual unsigned long GetResults(CDbIndex::TSeqNum oid, CDbIndex::TSeqNum chunk, BlastInitHitList *init_hitlist) const

Return results corresponding to a given subject sequence and chunk.

static void s_MB_IdxEndSearchIndication(Int4 last_vol_id)

DbIndexSetUsingThreadsFnType GetDbIndexSetUsingThreadsFn()

Return the appropriate callback to set the concurrency state in the index structure.

TSeqMap seqmap_

For each element of indices_ with index i seqmap_[i] contains one plus the last oid of that database ...

BLAST_SequenceBlk * queries_

query data (from BLAST)

void SetMultipleThreads(bool multiple_threads)

Set the concurrency status.

DbIndexRunSearchFnType GetDbIndexRunSearchFn()

Return the appropriate callback to run indexed seed search.

std::string s_CheckMinWordSize(int word_size)

DbIndexSetNumThreadsFnType GetDbIndexSetNumThreadsFn()

Return the appropriate callback to set the number of threads in the index structure.

static DbIndexRunSearchFnType RunSearchFn

Global pointer to the appropriate callback to run indexed search, based on whether or not index searc...

std::vector< SVolResults > TResultsHolder

List of reference counted result holders.

std::vector< SVolumeDescriptor > TVolList

List of leaf index volumes.

void SetNumThreads(size_t n_threads)

Set the number of threads used for concurrent search.

TSeqMap::size_type LocateIndex(CDbIndex::TSeqNum oid) const

Find an index corresponding to the given subject id.

CRef< CBlastSeqLocWrap > locs_wrap_

Current set of unmasked query locations.

vector< CDbIndex::TSeqNum > TSeqMap

Type used to map loaded indices to subject ids.

TVolList::const_iterator FindVolume(SIZE_TYPE oid) const

Find a volume containing the given subject ordinal id.

CRef< CDbIndex > index_

Currently loaded index.

virtual int CheckOid(Int4 oid, Int4 *)

Check whether any results were reported for a given subject sequence.

static CRef< CIndexedDb > Index_Set_Instance

Shared representation of currently loaded index volumes.

void SetUpDbIndexCallbacks(void)

SIZE_TYPE start_oid

OId of the first sequence of the volume.

static void NullSetQueryInfo(LookupTableWrap *, CRef< CBlastSeqLocWrap >)

No-op callback for setting query info.

void AddIndexInfo(const std::string &vol_name, bool &idx_not_resolved)

virtual int MinIndexWordSize()

size_t n_threads_

number of search threads running

SIZE_TYPE GetNextUnusedOID(void) const

Auxiliary function thet returns the oid value that is one more than the largest oid used so far.

void PreSearch(BLAST_SequenceBlk *queries, BlastSeqLoc *locs, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)

Invoke the seed search procedure on each of the loaded indices.

SIZE_TYPE n_oids

Number of sequences in the volume.

virtual unsigned long GetResults(CDbIndex::TSeqNum oid, CDbIndex::TSeqNum chunk, BlastInitHitList *init_hitlist) const =0

Return results corresponding to a given subject sequence and chunk.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define ERR_POST(message)

Error posting with file, line number information but without error codes.

void Error(CExceptionArgs_Base &args)

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

virtual const char * what(void) const noexcept

Standard report (includes full backlog).

void Info(CExceptionArgs_Base &args)

void * GetPtr(void) const

Get pointer to beginning of data.

void Reset(void)

Reset reference object.

TObjectType * Release(void)

Release a reference to the object and return a pointer to the object.

int32_t Int4

4-byte (32-bit) signed integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

NCBI_NS_STD::string::size_type SIZE_TYPE

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

Declarations for functions that extract hits from indexed blast databases (specialized for megablast)

#define LAST_VOL_IDX_NULL

#define LAST_VOL_IDX_INIT

constexpr auto sort(_Init &&init)

Magic spell ;-) needed for some weird compilers... very empiric.

#define ASSERT

macro for assert.

Multi-threading – classes, functions, and features.

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

Defines BLAST database access classes.

Defines exception class and several constants for SeqDB.

bool SeqDB_CompareVolume(const string &volpath1, const string &volpath2)

Compares two volume file names and determine the volume order.

string SeqDB_ResolveDbPath(const string &filename)

Resolve a file path using SeqDB's path algorithms.

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

Structure to hold a sequence.

Structure to hold all initial HSPs for a given subject sequence.

Options needed for initial word finding and processing.

Int4 window_size

Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.

Used to hold a set of positions, mostly used for filtering.

Simple record type used to specify index search parameters.

unsigned long two_hits

Window for two-hit method (see megablast docs).

unsigned long word_size

Target seed length.

Reference count for the volume results.

Information about one leaf index volume.

Options needed to construct a lookup table Also needed: query sequence and query length.

Int4 word_size

Determines the size of the lookup table.

Wrapper structure for different types of BLAST lookup tables.

void * end_search_indication

function used to report that a thread is done iterating over the database in preliminary search

void * check_index_oid

function used to check if seeds for a given oid are present

void * read_indexed_db

function used to retrieve hits from an indexed database


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4