vector< CConstRef< CDbIndex::CSearchResults > >
TResultSet;
227 for( TSeqMap::size_type
i= 0;
i<
seqmap_.size(); ++
i) {
306lut_options, word_options );
353 return a.start_oid <
b.start_oid;
369 typedefstd::vector< SVolumeDescriptor >
TVolList;
463TVolList::const_iterator
r(
490 explicit CIndexedDb_New(
const string& indexname,
bool& partial );
551 IDX_TRACE(
"setting multiple threads to "<<
552(multiple_threads ?
"true":
"false") );
564 IDX_TRACE(
"setting number of search threads to "<< n_threads );
583 if( idb == 0 )
return;
584idb->
DoPreSearch( queries, lut_options, word_options );
594 if( idb == 0 )
return;
596 if( idbn == 0 )
return;
607 if( idb == 0 )
return;
609 if( idbn == 0 )
return;
623 if( idb == 0 )
return;
634 static const char*
SEP=
" ";
636string::size_type pos( 0 ), pos1( 0 );
638 while( pos1 != string::npos ) {
639pos1 = db_spec.find_first_of(
SEP, pos );
640db_names.push_back( db_spec.substr( pos, pos1 - pos ) );
667 boolidx_not_resolved(
false);
677 Info<<
"index superheader for volume "<< vol_name
678<<
" was not loaded ("<< e.
what() <<
")");
679idx_not_resolved =
true;
682 if( !idx_not_resolved && shdr->GetNumSeq() != dbnseq ) {
684 Error<<
"numbers of OIDs reported by the database and " 685<<
"by the index do not match. Index for volume " 686<< vol_name <<
" will not be used");
687idx_not_resolved =
true;
690 if( !idx_not_resolved ) {
691 size_tcurr_vols_size(
volumes_.size() );
692 size_ttotal_idxvol_oids( 0 );
694 for(
size_t i( 0 ), e( shdr->GetNumVol() );
i< e; ++
i) {
699 if( name.empty() ) {
701 Error<<
"index volume "<< name
702<<
" not resolved; index will not be used for " 704idx_not_resolved =
true;
707 if( !idx_not_resolved ) {
710 if( idxvol_oids == 0 ) {
711idx_not_resolved =
true;
713 Error<<
"index volume "<< name
714<<
" reports no sequences; index will " 715<<
"not be used for "<< vol_name );
721total_idxvol_oids += idxvol_oids;
725 if( idx_not_resolved ) {
731 if( !idx_not_resolved && dbnseq != total_idxvol_oids ) {
733 Error<<
"total of oids reported by index volumes (" 734<< total_idxvol_oids <<
") does not match " 735<<
"the number of oids reported by the superheader (" 736<< dbnseq <<
"); index will not be used for " 739idx_not_resolved =
true;
743partial = (partial || idx_not_resolved);
745 if( idx_not_resolved ) {
754: queries_( 0 ), multiple_threads_(
false), n_threads_( 1 )
757 IDX_TRACE(
"creating new style CIndexedDb object");
762 IDX_TRACE(
"db spec given: "<< indexname );
772 IDX_TRACE(
"list of database volumes in order:");
778 IDX_TRACE(
"final index volume list:");
784 boolhas_index(
false);
788 if(
i->has_index ) {
796 "no database volume has an index");
808 IDX_TRACE(
"destroying new style CIndexedDb object");
814 Int4& vol_idx( *vol_idx_p );
816 boolfind_volume(
true);
823 if( !find_volume )
return;
824TVolList::const_iterator vi(
FindVolume( oid ) );
825new_vol_idx = vi -
volumes_.begin();
826 if( !vi->has_index ) { vol_idx = new_vol_idx;
return; }
829 Int4min_vol_idx( vol_idx == -1 ? 0 : vol_idx );
833 IDX_TRACE(
"loading volume "<< new_vol_idx <<
": "<< vi->name );
838std::ostringstream os;
839os <<
"CIndexedDb: could not load index volume: "<< vi->name;
843 IDX_TRACE(
"searching volume "<< vi->name );
845 IDX_TRACE(
"results loaded for "<< vi->name );
848 for( ; min_vol_idx < new_vol_idx; ++min_vol_idx ) {
851 IDX_TRACE(
"unloaded results for volume "<<
856vol_idx = new_vol_idx;
863TVolList::const_iterator vi(
FindVolume( oid ) );
869TVolList::const_iterator vi(
volumes_.begin() + *last_vol_idx );
871oid -= vi->start_oid;
907TVolList::const_iterator vi(
FindVolume( oid ) );
908 ASSERT( vi->start_oid <= oid );
909 ASSERT( vi->start_oid + vi->n_oids > oid );
911oid -= vi->start_oid;
916 if( (res = vr->
GetResults( oid, chunk )) != 0 ) {
931 ERR_POST(
Info<<
"Minimal supported word size in "<< fname <<
" is "<< rv);
946 if( !indexnames.empty() ) {
947vector< string > dbnames;
948string::size_type start = 0, end = 0;
952 while( start != string::npos ) {
953end = indexnames.find_first_of(
" ", start );
954dbnames.push_back( indexnames.substr( start, end - start ) );
955start = indexnames.find_first_not_of(
" ", end );
961dbni != dbnames.end(); ++dbni ) {
962 const string& indexname = *dbni;
965 unsigned longstart_vol = 0, stop_vol = 99;
967end = indexname.find_first_of(
",", start );
968 stringindex_base = indexname.substr( start, end );
971 if( start < indexname.length() && end != string::npos ) {
972end = indexname.find_first_of(
",", start );
975 if( start < indexname.length() && end != string::npos ) {
976end = indexname.find_first_of(
",", start );
977 stringstart_vol_str =
978indexname.substr( start, end - start );
980 if( !start_vol_str.empty() ) {
981start_vol = atoi( start_vol_str.c_str() );
986 if( start < indexname.length() && end != string::npos ) {
987end = indexname.find_first_of(
",", start );
988 stringstop_vol_str =
989indexname.substr( start, end - start);
991 if( !stop_vol_str.empty() ) {
992stop_vol = atoi( stop_vol_str.c_str() );
998 if( start_vol <= stop_vol ) {
1001 for(
long i= start_vol; (
unsignedlong)
i<= stop_vol; ++
i) {
1003os << index_base <<
"."<< setw( 2 ) << setfill(
'0')
1007 if( !name.empty() ){
1008 if(
i- last_i > 1 ) {
1009 for(
longj = last_i + 1; j <
i; ++j ) {
1011<< j <<
" not resolved.");
1024 string msg(
"no index file specified or index '");
1025 msg+= indexnames +
"*' not found.";
1045 for( vector< string >::size_type v = 0;
1055 string(
"CIndexedDb: could not load index") +
1078 if( (res =
results->GetResults( oid, chunk )) != 0 ) {
1080 return results->GetWordSize();
1104 if(word_size < min_ws) {
1106rv =
"MegaBLAST database index requires word size greater than ";
1112rv =
"Failed to read index MegaBLAST db min word size.";
1117rv =
"Empty index db instance";
1123 const string& indexname,
boolold_style,
bool& partial,
const intword_size )
1130 ERR_POST(
Info<<
"trying to load new style index at " 1143 else return "index allocation error";
1153 ERR_POST(
Info<<
"trying to load old style index at " 1162 else return "index allocation error";
1229 _ASSERT( init_hitlist != 0 );
1235oid, chunk, init_hitlist );
Declarations for indexed blast databases.
void(* DbIndexSetUsingThreadsFnType)(bool multiple_threads)
Type of a callback to set the concurrency state in the index structure.
void(* DbIndexRunSearchFnType)(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Type of a callback to run the indexed seed search.
void(* DbIndexSetNumThreadsFnType)(size_t n_threads)
Type of a callback to provide the number of threads to the indexing library, when multi-threaded sear...
void(* DbIndexSetQueryInfoFnType)(LookupTableWrap *lt_wrap, CRef< CBlastSeqLocWrap > locs_wrap)
Type of a callback to set the query information in the index structure.
void BlastInitHitListReset(BlastInitHitList *init_hitlist)
Free the ungapped data substructures and reset initial HSP count to 0.
void BlastInitHitListMove(BlastInitHitList *dst, BlastInitHitList *src)
Move the contents of a BlastInitHitList structure.
Structures and functions prototypes used for BLAST gapped extension.
Structures and API used for saving BLAST hits.
Various auxiliary BLAST utility functions.
BlastSeqLoc * getLocs() const
Get access to the held object.
unsigned long GetWordSize() const
Get the search word size.
BlastInitHitList * GetResults(TSeqNum seq) const
Get the result set for a particular logical subject.
Types of exception the indexing library can throw.
TSeqNum StartSeq() const
Get the OID of the first sequence in the index.
CConstRef< CSearchResults > Search(const BLAST_SequenceBlk *query, const BlastSeqLoc *locs, const SSearchOptions &search_options)
Search the index.
static CRef< CDbIndex > Load(const std::string &fname, bool nomap=false)
Load index.
CSequenceIStream::TStreamPos TSeqNum
Type used to enumerate sequences in the index.
TSeqNum StopSeq() const
Get the OID of the last sequence in the index.
Index wrapper exceptions.
Index wrapper for new style MegaBLAST indexing functionality.
Index wrapper for old style MegaBLAST indexing functionality.
This class is responsible for loading indices and doing the actual seed search.
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
Include a standard set of the NCBI C++ Toolkit most basic headers.
const SIndexHeader ReadIndexHeader< false >(void *map)
CRef< CIndexSuperHeader_Base > GetIndexSuperHeader(const std::string &fname)
Read superheader structure from the file.
size_t GetIdxVolNumOIDs(const std::string &fname)
Read the index header information from the given file.
static const struct name_t names[]
std::string name
Fully qualified name of the volume.
virtual int MinIndexWordSize()=0
int ref_count
How many threads still need the result set.
virtual unsigned long GetResults(CDbIndex::TSeqNum oid, CDbIndex::TSeqNum chunk, BlastInitHitList *init_hitlist) const
Return results corresponding to a given subject sequence and chunk.
void TraceVolumes(void)
This is only used for debugging output.
DbIndexSetQueryInfoFnType GetDbIndexSetQueryInfoFn()
Return the appropriate callback to set query information in the index.
void ClearDbIndexCallbacks(void)
static DbIndexSetNumThreadsFnType SetNumThreadsFn
Global pointer to the appropriate callback to set the number of threads.
virtual ~CIndexedDb()
Object destructor.
static void IndexedDbSetUsingThreads(bool multiple_threads)
Set state of concurrency in the index structure.
vector< CConstRef< CDbIndex::CSearchResults > > TResultSet
Type used to represent collections of search result sets.
friend bool operator<(const SVolumeDescriptor &a, const SVolumeDescriptor &b)
Volumes are compared by their starting ordinal ids.
static void ParseDBNames(const std::string db_spec, TStrVec &db_names)
Generate a list of BLAST database names from a single string.
virtual int CheckOid(Int4 oid, Int4 *last_vol_id)
Check whether any results were reported for a given subject sequence.
friend std::ostream & operator<<(std::ostream &os, const SVolumeDescriptor &vd)
This is only used for debug tracing.
bool multiple_threads_
flag indicating that multithreading is in effect
CDbIndex::SSearchOptions sopt_
common search parameters
virtual ~CIndexedDb_New()
Object destructor.
virtual int CheckOid(Int4 oid, Int4 *last_vol_id)=0
Check whether any results were reported for a given subject sequence.
virtual void EndSearchIndication(Int4)
Not used.
virtual void DoPreSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Run preliminary indexed search functionality.
std::vector< std::string > TStrVec
Alias for a vector os strings.
virtual void DoPreSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Wrapper around PreSearch().
USING_SCOPE(ncbi::objects)
static DbIndexSetUsingThreadsFnType SetUsingThreadsFn
Global pointer to the appropriate callback to set the state of concurrency.
virtual void EndSearchIndication(Int4 last_vol_id)=0
Function used by threads to indicate that they are done with iterating over the database sequences.
void SetQueryInfo(CRef< CBlastSeqLocWrap > locs_wrap)
Set the current set of unmasked query segments.
void UpdateIndex(Int4 oid, Int4 *vol_idx)
Update the seed sets, if necessary.
virtual void EndSearchIndication(Int4 last_vol_id)
Function used by threads to indicate that they are done with iterating over the database sequences.
bool has_index
'true' if the volume is indexed.
static void TraceNames(const TStrVec &names)
This is only used for debugging output.
static void NullSetUsingThreads(bool)
No-op callback for setting concurrency state.
TResultsHolder results_holder_
reference counted seed set holders
static void NullRunSearch(BLAST_SequenceBlk *, LookupTableOptions *, BlastInitialWordOptions *)
No-op callback to run indexed search.
CIndexedDb_Old(const string &indexname)
Object constructor.
vector< string > index_names_
List of index volume names.
static unsigned long s_MB_IdbGetResults(Int4 oid_i, Int4 chunk_i, BlastInitHitList *init_hitlist)
Get the seed search results for a give subject id and chunk number.
TVolList volumes_
index volume descriptors
static void IndexedDbSetQueryInfo(LookupTableWrap *lt_wrap, CRef< CBlastSeqLocWrap > locs_wrap)
Set information about unmasked query segments.
virtual int MinIndexWordSize()
Get the minimum acceptable word size to use with indexed search.
virtual void DoPreSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)=0
Run preliminary indexed search functionality.
static int s_MB_IdbCheckOid(Int4 oid, Int4 *last_vol_oid)
CFastMutex mtx_
mutex used for thread sync
static DbIndexSetQueryInfoFnType SetQueryInfoFn
Global pointer to the appropriate callback to set query info, based on whether or not index search is...
static int s_GetMinimumSupportedWordSizeByIndex(const string &fname)
CIndexedDb_New(const string &indexname, bool &partial)
Object constructor.
TResultSet results_
Set of result sets, one per loaded index.
static void EnumerateDbVolumes(const TStrVec &db_names, TStrVec &db_vols)
Generate a list of leaf database volumes from a list of database names.
CConstRef< CDbIndex::CSearchResults > TVolResults
This type captures the seeds found by search of an index volume.
std::string DbIndexInit(const string &indexname, bool old_style, bool &partial, const int word_size)
TVolResults res
Seed set or null.
static void NullSetNumThreads(size_t)
No-op callback for setting the number of threads.
static void IndexedDbSetNumThreads(size_t n_threads)
Set the number of concurrent search threads in the index structure.
static void IndexedDbRunSearch(BLAST_SequenceBlk *queries, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Run indexed search.
virtual unsigned long GetResults(CDbIndex::TSeqNum oid, CDbIndex::TSeqNum chunk, BlastInitHitList *init_hitlist) const
Return results corresponding to a given subject sequence and chunk.
static void s_MB_IdxEndSearchIndication(Int4 last_vol_id)
DbIndexSetUsingThreadsFnType GetDbIndexSetUsingThreadsFn()
Return the appropriate callback to set the concurrency state in the index structure.
TSeqMap seqmap_
For each element of indices_ with index i seqmap_[i] contains one plus the last oid of that database ...
BLAST_SequenceBlk * queries_
query data (from BLAST)
void SetMultipleThreads(bool multiple_threads)
Set the concurrency status.
DbIndexRunSearchFnType GetDbIndexRunSearchFn()
Return the appropriate callback to run indexed seed search.
std::string s_CheckMinWordSize(int word_size)
DbIndexSetNumThreadsFnType GetDbIndexSetNumThreadsFn()
Return the appropriate callback to set the number of threads in the index structure.
static DbIndexRunSearchFnType RunSearchFn
Global pointer to the appropriate callback to run indexed search, based on whether or not index searc...
std::vector< SVolResults > TResultsHolder
List of reference counted result holders.
std::vector< SVolumeDescriptor > TVolList
List of leaf index volumes.
void SetNumThreads(size_t n_threads)
Set the number of threads used for concurrent search.
TSeqMap::size_type LocateIndex(CDbIndex::TSeqNum oid) const
Find an index corresponding to the given subject id.
CRef< CBlastSeqLocWrap > locs_wrap_
Current set of unmasked query locations.
vector< CDbIndex::TSeqNum > TSeqMap
Type used to map loaded indices to subject ids.
TVolList::const_iterator FindVolume(SIZE_TYPE oid) const
Find a volume containing the given subject ordinal id.
CRef< CDbIndex > index_
Currently loaded index.
virtual int CheckOid(Int4 oid, Int4 *)
Check whether any results were reported for a given subject sequence.
static CRef< CIndexedDb > Index_Set_Instance
Shared representation of currently loaded index volumes.
void SetUpDbIndexCallbacks(void)
SIZE_TYPE start_oid
OId of the first sequence of the volume.
static void NullSetQueryInfo(LookupTableWrap *, CRef< CBlastSeqLocWrap >)
No-op callback for setting query info.
void AddIndexInfo(const std::string &vol_name, bool &idx_not_resolved)
virtual int MinIndexWordSize()
size_t n_threads_
number of search threads running
SIZE_TYPE GetNextUnusedOID(void) const
Auxiliary function thet returns the oid value that is one more than the largest oid used so far.
void PreSearch(BLAST_SequenceBlk *queries, BlastSeqLoc *locs, LookupTableOptions *lut_options, BlastInitialWordOptions *word_options)
Invoke the seed search procedure on each of the loaded indices.
SIZE_TYPE n_oids
Number of sequences in the volume.
virtual unsigned long GetResults(CDbIndex::TSeqNum oid, CDbIndex::TSeqNum chunk, BlastInitHitList *init_hitlist) const =0
Return results corresponding to a given subject sequence and chunk.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
void Info(CExceptionArgs_Base &args)
void * GetPtr(void) const
Get pointer to beginning of data.
void Reset(void)
Reset reference object.
TObjectType * Release(void)
Release a reference to the object and return a pointer to the object.
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Declarations for functions that extract hits from indexed blast databases (specialized for megablast)
#define LAST_VOL_IDX_NULL
#define LAST_VOL_IDX_INIT
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
#define ASSERT
macro for assert.
Multi-threading â classes, functions, and features.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Defines BLAST database access classes.
Defines exception class and several constants for SeqDB.
bool SeqDB_CompareVolume(const string &volpath1, const string &volpath2)
Compares two volume file names and determine the volume order.
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure to hold a sequence.
Structure to hold all initial HSPs for a given subject sequence.
Options needed for initial word finding and processing.
Int4 window_size
Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.
Used to hold a set of positions, mostly used for filtering.
Simple record type used to specify index search parameters.
unsigned long two_hits
Window for two-hit method (see megablast docs).
unsigned long word_size
Target seed length.
Reference count for the volume results.
Information about one leaf index volume.
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 word_size
Determines the size of the lookup table.
Wrapper structure for different types of BLAST lookup tables.
void * end_search_indication
function used to report that a thread is done iterating over the database in preliminary search
void * check_index_oid
function used to check if seeds for a given oid are present
void * read_indexed_db
function used to retrieve hits from an indexed database
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4