A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/seqdbimpl_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/blast/seqdb_reader/seqdbimpl.cpp Source File

53

: m_AtlasHolder (

NULL

, use_atlas_lock),

54

m_Atlas (m_AtlasHolder.

Get

()),

55

m_DBNames (db_name_list),

56

m_Aliases (m_Atlas, db_name_list, prot_nucl),

58

m_Aliases.GetVolumeNames(),

63

m_RestrictBegin (oid_begin),

64

m_RestrictEnd (oid_end),

70

m_ExactTotalLength(0),

71

m_TotalLengthStats(0),

75

m_SeqType (prot_nucl),

76

m_OidListSetup (

false

),

77

m_UserGiList (gi_list),

78

m_NegativeList (neg_list),

80

m_NeedTotalsScan (

false

),

81

m_UseGiMask (m_Aliases.HasGiMask()),

82

m_MaskDataColumn (kUnknownTitle),

88

vector <string> mask_list;

159

: m_AtlasHolder (

NULL

, use_atlas_lock),

160

m_Atlas (m_AtlasHolder.

Get

()),

161

m_Aliases (m_Atlas,

""

,

'-'

),

168

m_ExactTotalLength(0),

171

m_OidListSetup (

true

),

172

m_NeedTotalsScan (

false

),

173

m_UseGiMask (

false

),

174

m_MaskDataColumn (kUnknownTitle),

192  if

((oid_begin == 0) && (oid_end == 0)) {

250  const

vector< CRef<CSeqDB_FilterTree> >& nodes = ft->

GetNodes

();

251  if

(nodes.size() == 1) {

275  bool

success =

true

;

304

vector<int> & oid_list,

336

begin_chunk = * state_obj;

342

end_chunk = begin_chunk +

static_cast<int>

(

buffer

->results.size());

344

end_chunk = begin_chunk + oid_size;

350

*state_obj = end_chunk;

361  int

next_oid = begin_chunk;

364  while

(next_oid < end_chunk) {

367

next_oid < end_chunk) {

368

oid_list.push_back(next_oid++);

370

next_oid = end_chunk;

376

oid_list.resize(oid_size);

377  while

(iter < oid_size) {

382

oid_list[iter++] = next_oid++;

388  if

(iter < oid_size) {

389

oid_list.resize(iter);

391

*state_obj = next_oid;

417  return

vol->GetSeqLengthProt(vol_oid);

421  return

vol->GetSeqLengthExact(vol_oid);

436  return

vol->GetSeqLengthProt(vol_oid);

440  return

vol->GetSeqLengthApprox(vol_oid);

456

gi_to_taxid.

clear

();

462  if

((! defline_set.

Empty

()) && defline_set->

CanGet

()) {

464  if

(! (*defline)->CanGetSeqid()) {

468  if

(! (*defline)->IsSetTaxid()) {

473  if

(! (**seqid).IsGi()) {

477

gi_to_taxid[(**seqid).GetGi()] = (*defline)->GetTaxid();

484

vector<TTaxId> & taxids,

497  if

((! defline_set.

Empty

()) && defline_set->

CanGet

()) {

499  if

((*defline)->IsSetTaxid()) {

500

taxids.push_back((*defline)->GetTaxid());

516  if

((! defline_set.

Empty

()) && defline_set->

CanGet

()) {

535

gi_to_taxid_set.clear();

541  if

((! defline_set.

Empty

()) && defline_set->

CanGet

()) {

543  if

(! (*defline)->CanGetSeqid()) {

548  if

(! (**seqid).IsGi()) {

553

gi_to_taxid_set[(**seqid).GetGi()].

insert

(

563

vector<TTaxId>& taxids,

577  if

((! defline_set.

Empty

()) && defline_set->

CanGet

()) {

583  if

((*defline)->CanGetSeqid()) {

587

(*defline)->GetSeqid()

589  if

((**seqid).IsGi()) {

591

(*defline)->GetLeafTaxIds();

594

leafTaxids.

begin

(),

621  return

vol->GetBioseq(vol_oid,

659  if

(

buffer

->checked_out > 0) {

668  const char

** seq)

const 672  if

(index < buffer->

results

.size()) {

673

(

buffer

->checked_out)++;

674

*seq =

buffer

->results[index].address;

675  return buffer

->results[index].length;

679

(

buffer

->checked_out)++;

680

*seq =

buffer

->results[0].address;

681  return buffer

->results[0].length;

699

res.

length

= vol->GetSequence(vol_oid++, &seq);

700  if

(res.

length

< 0)

return

;

703

tot_length -= res.

length

;

705  buffer

->results.push_back(res);

706

res.

length

= vol->GetSequence(vol_oid++, &seq);

726  return

vol->GetSequence(vol_oid,

buffer

);

744  return

vol->GetSeqData(vol_oid, begin, end, locked);

761  return

vol->GetAmbigSeq(vol_oid,

782  return

vol->GetAmbigPartialSeq(vol_oid,

885  _ASSERT

((rv & 0x7FFFFFFF) == rv);

904  TGi

gi = vol->GetSeqGI(vol_oid, locked);

907

list< CRef<CSeq_id> > ids =

908

vol->GetSeqIDs(vol_oid);

911  return

(**id).GetGi();

928  _ASSERT

((rv & 0x7FFFFFFF) == rv);

949  _ASSERT

((num_oids & 0x7FFFFFFF) == num_oids);

951  return

(

int

) num_oids;

994  return

vol->GetSeqType();

1017

}

else if

(d != date) {

1061  return

vol->GetFilteredHeader(vol_oid, locked);

1096  for

(

int i

= 0;

i

< (

int

) s.size();

i

++) {

1097  if

(s[

i

] ==

char

(0)) {

1132  return

vol->GetPig(vol_oid, pig, locked);

1192  for

(

unsigned i

=0;

i

< list.size();

i

++) {

1262  return

vol->GetGi(vol_oid, gi, locked);

1282  for

(

unsigned int i

=0;

i

<

tmp

.size();

i

++) {

1283  int

oid2 =

tmp

[

i

];

1285

oids.push_back(

tmp

[

i

]);

1290

vector<int> vol_oids;

1298  if

(vol_oids.empty()) {

1304  ITERATE

(vector<int>, iter, vol_oids) {

1305  int

oid1 = ((*iter) + vol_start);

1310  if

(find(oids.begin(), oids.end(), oid1) != oids.end()) {

1317

oids.push_back(oid1);

1331

vector<blastdb::TOid> oids;

1335  for

(

unsigned int i

=0;

i

< oids.size();

i

++) {

1338

rv.push_back(oids[

i

]);

1344  "Taxonomy list is not supported in v4 BLAST db"

);

1360

vector<blastdb::TOid> oids;

1362

oids.push_back(oid);

1372  "Taxonomy list is not supported in v4 BLAST db"

);

1384  "Taxonomy list is not supported in v4 BLAST db"

);

1393

oids.resize(accs.size());

1397  for

(

unsigned int i

=0;

i

< oids.size();

i

++) {

1408  for

(

unsigned int i

=0;

i

< accs.size();

i

++) {

1409

vector<blastdb::TOid>

tmp

;

1411  if

(

tmp

.empty()) {

1415

oids[

i

] =

tmp

[0];

1436  bool

is_BL_ORD_ID =

false

;

1441  if

(dbt.

GetDb

() ==

"BL_ORD_ID"

) {

1442

is_BL_ORD_ID =

true

;

1450  if

(seqid_in.

IsPir

() || seqid_in.

IsPrf

()) {

1456  for

(

unsigned int i

=0;

i

<

tmp

.size();

i

++) {

1457  int

oid2 =

tmp

[

i

];

1459

oids.push_back(

tmp

[

i

]);

1466

vector<int> vol_oids;

1474

seqid.

Assign

(seqid_in);

1480  if

(vol_oids.empty()) {

1486  ITERATE

(vector<int>, iter, vol_oids) {

1487  int

oid1 = ((*iter) + vol_start);

1493

oids.push_back(oid1);

1514  "OID not in valid range."

);

1520  "Residue offset not in valid range."

);

1533  if

((first_seq < vol_cnt) && (residue < vol_len)) {

1534  return

vol_start + volp->

GetOidAtOffset

(first_seq, residue, locked);

1539

vol_start += vol_cnt;

1541  if

(first_seq > vol_cnt) {

1542

first_seq -= vol_cnt;

1547  if

(residue > vol_len) {

1556  "Could not find valid split point oid."

);

1562

vector<string> & paths,

1563

vector<string> * alias_paths,

1567  bool

use_atlas_lock =

true

;

1607  Uint8

base_count(0);

1622  if

(totlen || maxlen || minlen) {

1633

max_count =

max

(

len

, max_count);

1634

min_count =

min

(

len

, min_count);

1640

*numseq = oid_count;

1644

*totlen = base_count;

1648

*maxlen = max_count;

1652

*minlen = min_count;

1660

oss <<

"Taxid "

<< taxid <<

" not found"

;

1668  Uint8

* total_length,

1709  int

* ambig_length)

const 1716

vol->GetRawSeqAndAmbig(vol_oid,

1746 template

<

class

TId>

1760

*high_out = high_in;

1763

*count_out = count_in;

1765  if

(low_out && (*low_out > low_in)) {

1768  if

(high_out && (*high_out < high_in)) {

1769

*high_out = high_in;

1772

*count_out += count_in;

1783  bool

found =

false

;

1815  bool

found =

false

;

1818  int

vlow(0), vhigh(0), vcount(0);

1844  bool

found =

false

;

1879

vol->SetOffsetRanges(vol_oid,

1910  RetAmbigSeq

(

const_cast<const char

**

>

(& datap));

1924

vector<int> vol_oids;

1930  if

(vol_oids.empty()) {

1936  ITERATE

(vector<int>, iter, vol_oids) {

1937  int

oid1 = (*iter) + vol_start;

1943

oids.push_back(oid1);

1976  if

(! ngis.empty()) {

1979

}

else if

(! ntis.empty()) {

1982

}

else if

(!stis.empty()) {

1995 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 1996  (!defined(NCBI_COMPILER_MIPSPRO)) ) 2009

titles.assign(

all

.begin(),

all

.end());

2028

vector<int> vol_ids;

2030  bool

found =

false

;

2036

vol_ids.push_back(

id

);

2121  "This column ID was not found."

);

2147  int

vol_idx = -1, vol_oid = -1;

2152  if

(vol_col_id >= 0) {

2174 template

<

class

K,

class

C>

2177  return

c.find(k) != c.end();

2192  string

v = iter->second;

2193

vector<string> items;

2196  if

(items.size() == 4) {

2201  return

& iter->second;

2211

: m_NextId(100), m_Empty(

true

), m_CacheRealAlgo(-1)

2220

algorithms.push_back(iter->first);

2226  string

real_desc = desc;

2227

vector<string> items;

2229  if

(items.size() == 4) {

2230

real_desc = items[2];

2238  if

((! found_id) || (

m_DescToId

[real_desc] !=

id

)) {

2290  "Cannot find volume in algorithm map."

);

2297  "Cannot find volume algorithm in algorithm map."

);

2300  return

trans[algo_id];

2307  "Cannot find string algorithm id in algorithm map."

);

2313 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 2314  (!defined(NCBI_COMPILER_MIPSPRO)) ) 2330

algorithms.resize(0);

2353

vector<int> algorithms;

2355  if

(algorithms.empty()) {

2361

<<

"Available filtering algorithms applied to database sequences:" 2364

retval << setw(13) << left <<

"Algorithm ID" 2365

<< setw(40) << left <<

"Algorithm name" 2366

<< setw(40) << left <<

"Algorithm options"

<< endl;

2367  ITERATE

(vector<int>, algo_id, algorithms) {

2368  string algo

, algo_opts, algo_name;

2370  if

(algo_opts.empty()) {

2371

algo_opts.assign(

"default options used"

);

2374

retval << setw(13) << left << (*algo_id)

2375

<< setw(40) << left << algo_name

2376

<< setw(40) << left << algo_opts << endl;

2378

retval << setw(13) << left << (*algo_id)

2379

<< setw(40) << left <<

algo 2380

<< setw(40) << left << algo_opts << endl;

2394  string

& program_name,

2398  if

(enum_type_vals ==

NULL

) {

2399

enum_type_vals = GetTypeInfo_enum_EBlast_filter_program();

2403

vector<string> items;

2406  if

(items.size() == 2) {

2409

program.assign(items[0]);

2410

program_name.assign(enum_type_vals->

FindName

(pid,

false

));

2412

}

else if

(items.size() == 4) {

2418  "Error in stored mask algorithm description data."

);

2424  string

& program_name,

2445  if

(found ==

false

) {

2447

oss <<

"Filtering algorithm ID "

<< algorithm_id

2448

<<

" is not supported."

<< endl;

2484  if

(vol_col_id < 0) {

2498

oss <<

"Error: volume ("

<< volp->

GetVolName

()

2499

<<

") mask data has duplicates value ("

<< *dup <<

")"

;

2506  const string

& desc1 = iter->second;

2526  const void

* src = (

const void

*) blob.

ReadRaw

(

n

*8);

2531 template

<

class

TRead>

2538  for

(

int

rng = 0; rng < num_ranges; rng++) {

2541  if

(

algo

== vol_algo) {

2545  int

skip_amt = num_pairs * 2 * TRead::numeric_size;

2571  int

vol_oid = 0, vol_idx = -1;

2585  if

(blob.

Size

() != 0) {

2589  int

vol_algo_id = -1;

2597

s_ReadRanges<SReadInt4>(vol_algo_id, ranges, blob);

2609  if

(num_threads < 1) {

2611

}

else if

(num_threads == 1) {

2612

num_threads = force_mt ? 1 : 0;

2617  for

(

int

thread =

m_NumThreads

; thread < num_threads; ++thread) {

2623  for

(

int

thread = num_threads; thread <

m_NumThreads

; ++thread) {

2669  for

(

int

vol = 0; vol < nvols; ++vol) {

2677  for

(

int

vol = 0; vol < nvols; ++vol) {

2734  if

((! defline_set.

Empty

()) && defline_set->

CanGet

()) {

2736  if

(! (*defline)->CanGetSeqid()) {

2741  if

((*df_seqid)->Match(seq_id)) {

2743  if

(!df_taxids.

empty

()) {

2759  for

(

unsigned int i

=0;

i

< oids.size();

i

++) {

2763  if

(!taxid_set.

empty

()) {

2764

taxids.insert(taxids.begin(), taxid_set.

begin

(), taxid_set.

end

());

Declaration of ADT to retrieve sequences for the BLAST engine.

#define BLAST_SEQSRC_MINLENGTH

Default minimal sequence length.

`Blob' Class for SeqDB (and WriteDB).

int GetReadOffset() const

Get the current read pointer offset.

Int4 ReadInt4()

Read a 4 byte integer at the pointer (and move the pointer).

int Size() const

Get size of blob contents.

void Clear()

Clear all owned data and reference an empty string.

void SeekRead(int offset)

Move the read pointer to a specific location.

const char * ReadRaw(int size)

Read raw data (moving the read pointer).

void SetFrame(const string &frame)

void Log(const string &name, const char *value, CDebugDumpFormatter::EValueType type=CDebugDumpFormatter::eValue, const string &comment=kEmptyStr)

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

CObjectIStreamAsnBinary –.

void GetAliasFileValues(TAliasFileValues &afv, const CSeqDBVolSet &volset)

Get Name/Value Data From Alias Files.

Int8 GetNumSeqsStats(const CSeqDBVolSet &volset) const

Get the number of sequences available.

Uint8 GetTotalLength(const CSeqDBVolSet &volset) const

Get the total length of the set of databases.

Uint8 GetTotalLengthStats(const CSeqDBVolSet &volset) const

Get the total length of the set of databases.

bool NeedTotalsScan(const CSeqDBVolSet &volset) const

Check whether a db scan is need to compute correct totals.

Int4 GetMinLength(const CSeqDBVolSet &volset) const

Get the number of sequences available.

Int8 GetNumOIDs(const CSeqDBVolSet &volset) const

Get the size of the OID range.

void GetMaskList(vector< string > &mask_list)

Get Gi-based Mask Names From Alias Files.

void FindVolumePaths(vector< string > &vols, vector< string > *alias, bool recursive) const

Find the base names of volumes.

bool HasFilters()

Check if any volume filtering exists.

string GetTitle(const CSeqDBVolSet &volset) const

Get the title.

Int8 GetNumSeqs(const CSeqDBVolSet &volset) const

Get the number of sequences available.

CRef< CSeqDB_FilterTree > GetFilterTree()

Get filtering tree for all volumes.

Guard object for the SeqDBAtlas singleton.

CSeqDBAtlas & Get()

Get the CSeqDBAtlas object.

static void RetRegion(const char *datap)

Free allocated memory.

Uint8 GetSliceSize()

Get the current slice size.

void Lock(CSeqDBLockHold &locked)

Lock the atlas.

void Unlock(CSeqDBLockHold &locked)

Unlock the atlas.

int GetNumGis() const

Get the number of GIs in the array.

void GetPigList(vector< TPig > &pigs) const

void GetGiList(vector< TGi > &gis) const

Get the gi list.

void GetTiList(vector< TTi > &tis) const

Get the ti list.

int GetNumTis() const

Get the number of TIs in the array.

void GetMaskData(int algo_id, TGi gi, CSeqDB::TSequenceRanges &ranges, CSeqDBLockHold &locked)

Get the mask data for GI.

int GetAlgorithmId(const string &algo_name) const

Get the mask algorithsm id for a string id.

const string & GetDesc(int algo_id, CSeqDBLockHold &locked)

Get the mask description for algo id.

void GetAvailableMaskAlgorithms(vector< int > &algo) const

Get the available mask algorithsm ids.

SeqDB ID list for performing boolean set operations.

bool Blank() const

Check if an ID list is blank.

void GetTaxIDs(int oid, map< TGi, TTaxId > &gi_to_taxid, bool persist)

Get gi to taxid map for an OID.

int m_NumSeqsStats

Number of sequences in the overall database.

int GetSequence(int oid, const char **buffer)

Get the sequence data for a sequence.

void GetDBTaxIds(set< TTaxId > &tax_ids)

Get all unique tax ids from db.

CSeqDBAliasFile m_Aliases

Alias node hierarchy management object.

char GetSeqType() const

Get the sequence type.

int x_GetSeqBuffer(SSeqResBuffer *buffer, int oid, const char **seq) const

Get sequence from buffer.

CRef< CSeqDBGiList > m_UserGiList

The User GI list for the entire CSeqDB object.

void x_InitIdSet()

Initialize Id Set.

int GetOidAtOffset(int first_seq, Uint8 residue) const

Find the OID corresponding to the offset given in residues, into the database as a whole.

void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const

Raw Sequence and Ambiguity Data.

int GetMinLength() const

Returns the length of the smallest sequence in the database.

int m_NumThreads

number of thread clients

std::shared_mutex m_CacheIDMutex

mapping thread ID to storage ID

void x_RetSeqBuffer(SSeqResBuffer *buffer) const

Return sequence to buffer.

CSeqDBIdSet m_IdSet

The positive or negative ID list for the entire CSeqDB object.

int x_GetMinLength() const

Returns the shortest sequence lengths of all volumes.

void GetLeafTaxIDs(int oid, map< TGi, set< TTaxId > > &gi_to_taxid_set, bool persist)

Get gi to taxid map for an OID.

~CSeqDBImpl()

Destructor.

string m_Date

Cached most recent date string for GetDate().

void AccessionsToOids(const vector< string > &accs, vector< blastdb::TOid > &oids)

CRef< CSeqDBOIDList > m_OIDList

The list of included OIDs (construction is deferred).

EBlastDbVersion GetBlastDbVersion() const

Return blast db version.

CSeqDBImpl(const string &db_name_list, char prot_nucl, int oid_begin, int oid_end, CSeqDBGiList *gi_list, CSeqDBNegativeList *neg_list, CSeqDBIdSet idset, bool use_atlas_lock)

Standard Constructor.

void x_BuildMaskAlgorithmList(CSeqDBLockHold &locked)

Get a list of algorithm IDs for which mask data exists.

int m_MaskDataColumn

Column ID for mask data column.

int GetMaskAlgorithmId(const string &algo_name)

Get the numeric ID for a algorithm name.

CFastMutex m_OIDLock

Mutex which synchronizes access to the OID list.

int GetColumnId(const string &title)

Get an ID number for a given column title.

bool GiToOidwFilterCheck(TGi gi, int &oid)

GiToOis is meant to simply return oid for a gi if one exisits This method finds the oid and checks if...

void GetColumnBlob(int col_id, int oid, bool keep, CBlastDbBlob &blob)

Fetch the data blob for the given column and oid.

void GetStringBounds(string *low_id, string *high_id, int *count)

Get String Bounds.

void SetIterationRange(int oid_begin, int oid_end)

Set Iteration Range.

TGi x_GetSeqGI(int oid, CSeqDBLockHold &locked)

Look up for the GI of a sequence.

int GetNumOIDs() const

Returns the size of the (possibly sparse) OID range.

Uint8 m_TotalLength

Total length of database (in bases).

void GetMaskAlgorithmDetails(int algorithm_id, string &program, string &program_name, string &algo_opts)

Get information about one type of masking available here.

bool m_UseGiMask

Which type of masks are we using?

void x_CheckOid(T &list, CSeqDBLockHold &locked)

CSeqDBAtlas & m_Atlas

Reference to memory management layer.

CRef< CSeqDBNegativeList > m_NegativeList

The Negative ID list for the entire CSeqDB object.

void SetNumberOfThreads(int num_threads, bool force_mt=false)

Invoke the garbage collector to free up memory.

TGi GetSeqGI(int oid)

Look up for the GI of a sequence.

int GetMaxLength() const

Returns the length of the largest sequence in the database.

int m_RestrictEnd

Ending OID as provided to the constructor.

void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv)

Get Oid list for input tax ids.

void RetAmbigSeq(const char **buffer) const

Returns any resources associated with the sequence.

void FlushOffsetRangeCache()

Flush all offset ranges cached.

void GetAllTaxIDs(int oid, set< TTaxId > &taxids)

Get all tax ids (leaf and non-leaf for an oid.

Uint8 GetVolumeLength() const

Returns the sum of the lengths of all volumes.

void GetLMDBFileNames(vector< string > &lmdb_list) const

const string & GetDBNameList() const

Get list of database names.

void x_FillSeqBuffer(SSeqResBuffer *buffer, int oid) const

Fill up the buffer.

CObjectIStreamAsnBinary * reusable_inpstr

unsigned GetSequenceHash(int oid)

Get the sequence hash for a given OID.

char m_SeqType

Type of sequences used by this instance.

void x_GetTaxIdsForSeqId(const CSeq_id &seq_id, int oid, CBlast_def_line::TTaxIds &taxid_set)

CRef< CBioseq > GetBioseq(int oid, TGi target_gi, const CSeq_id *target_seq_id, bool seqdata)

Get a CBioseq for a sequence.

static void FindVolumePaths(const string &dbname, char prot_nucl, vector< string > &paths, vector< string > *alias_paths, bool recursive, bool expand_links)

Find volume paths.

void ListColumns(vector< string > &titles)

List columns titles found in this database.

int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, CSeqDB::TSequenceRanges *partial_ranges, CSeqDB::TSequenceRanges *masks) const

Uint8 m_TotalLengthStats

Total length of database (in bases).

bool OidToPig(int oid, int &pig) const

Translate a PIG to an OID.

std::atomic< bool > m_OidListSetup

True if OID list setup is done (or was not required).

void GetAliasFileValues(TAliasFileValues &afv)

Get Name/Value Data From Alias Files.

Uint8 x_GetTotalLength() const

Returns the sum of the lengths of all available sequences.

Uint8 GetExactTotalLength()

Returns the exact sum of the lengths of all available sequences.

void GetTaxIdsForSeqId(const CSeq_id &seq_id, vector< TTaxId > &taxids)

int x_GetCacheID()

Get local cache ID for current thread.

CRef< CSeqDBGiMask > m_GiMask

Gi-based mask.

bool IdsToOids(CSeqDBGiList &id_list)

Get OIDs from an ID list.

CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end) const

Fetch data as a CSeq_data object.

map< string, int > m_ColumnTitleMap

Map string column titles to global column IDs.

const map< string, string > & GetColumnMetaData(int column_id)

Get all metadata for the specified column.

CRef< CBlast_def_line_set > x_GetHdr(int oid, CSeqDBLockHold &locked)

Get the sequence header data.

Uint8 x_GetVolumeLength() const

Returns the sum of the lengths of all volumes.

void x_GetOidList(CSeqDBLockHold &locked)

Build the OID list.

int m_NextChunkOID

"Bookmark" for multithreaded chunk-type OID iteration.

static void GetTaxInfo(TTaxId taxid, SSeqDBTaxInfo &info)

Get taxonomy information.

int x_GetNumOIDs() const

Returns the size of the (possibly sparse) OID range.

string GetAvailableMaskAlgorithmDescriptions()

Returns a formatted string with the list of available masking algorithms in this database for display...

bool CheckOrFindOID(int &next_oid)

Find an included OID, incrementing next_oid if necessary.

void GetPigBounds(int *low_id, int *high_id, int *count)

Get PIG Bounds.

void SeqidToOids(const CSeq_id &seqid, vector< int > &oids, bool multi)

Translate a CSeq-id to a list of OIDs.

int GetNumSeqsStats() const

Returns the number of sequences available.

@ kUnknownTitle

This column is not heard of yet.

@ kColumnNotFound

This column does not exist (we checked).

int x_GetMaxLength() const

Returns the longest sequence lengths of all volumes.

void SetVolsOidMaskType(int oid_mask_type)

int GetAmbigSeq(int oid, char **buffer, int nucl_code, SSeqDBSlice *region, ESeqDBAllocType strategy, CSeqDB::TSequenceRanges *masks=NULL) const

Get a pointer to a range of sequence data with ambiguities.

int m_MaxLength

Longest database sequence.

int m_RestrictBegin

Starting OID as provided to the constructor.

void GetGiBounds(TGi *low_id, TGi *high_id, int *count)

Get GI Bounds.

void GetAvailableMaskAlgorithms(vector< int > &algorithms)

Get a list of algorithm IDs for which mask data exists.

int GetSeqLengthApprox(int oid) const

Get the approximate sequence length.

std::atomic< int > m_NextCacheID

Uint8 m_VolumeLength

Total length of all database volumes combined (in bases).

void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids)

void DebugDump(CDebugDumpContext ddc, unsigned int depth) const

Dump debug information for this object.

Uint8 x_GetTotalLengthStats() const

Returns the sum of the lengths of all available sequences.

void GetTotals(ESummaryType sumtype, int *oid_count, Uint8 *total_length, bool use_approx)

Returns the sum of the sequence lengths.

CSeqDB_IdRemapper m_AlgorithmIds

Algorithm ID mapping.

void RetSequence(const char **buffer)

Returns any resources associated with the sequence.

bool x_CheckOrFindOID(int &next_oid, CSeqDBLockHold &locked)

Get the next included oid.

int m_MinLength

Shortest database sequence.

void GetMaskData(int oid, int algo_id, CSeqDB::TSequenceRanges &ranges)

Get masked ranges of a sequence.

int x_GetColumnId(const string &title, CSeqDBLockHold &locked)

Get the Column ID for the column with the specified title.

bool TiToOid(Int8 ti, int &oid)

Translate a TI to an OID.

Uint8 GetTotalLengthStats() const

Returns the sum of the lengths of all available sequences.

int x_GetSeqLength(int oid) const

Get the sequence length.

void SetVolsMemBit(int mbit)

Set the membership bit of all volumes.

Uint8 m_ExactTotalLength

Total length of database (in bases).

bool GiToOid(TGi gi, int &oid) const

Translate a GI to an OID.

void HashToOids(unsigned hash, vector< int > &oids)

Get the OIDs for a given sequence hash.

void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data)

Apply a range of offsets to a database sequence.

list< CRef< CSeq_id > > GetSeqIDs(int oid)

Gets a list of sequence identifiers.

void AccessionToOids(const string &acc, vector< int > &oids)

Find OIDs matching the specified string.

int x_GetNumSeqs() const

Returns the number of sequences available.

string m_DBNames

The list of database names provided to the constructor.

void FlushSeqMemory()

Flush unnecessarily held memory.

CRef< CBlast_def_line_set > GetHdr(int oid)

Get the sequence header data.

vector< SSeqResBuffer * > m_CachedSeqs

Cached sequences.

int GetNumSeqs() const

Returns the number of sequences available.

vector< CRef< CSeqDB_ColumnEntry > > m_ColumnInfo

Map assigned global column IDs to column information.

int x_GetNumSeqsStats() const

Returns the number of sequences available.

Uint8 GetTotalLength() const

Returns the sum of the lengths of all available sequences.

std::map< int, int > m_CacheID

int x_GetMaskDataColumn(CSeqDBLockHold &locked)

Open the mask data column (if necessary) and return its id.

bool PigToOid(int pig, int &oid) const

Translate a PIG to an OID.

CSeqDB::EOidListType GetNextOIDChunk(int &begin_chunk, int &end_chunk, int oid_size, vector< int > &oid_list, int *oid_state)

Return a chunk of OIDs, and update the OID bookmark.

string GetDate() const

Returns the construction date of the database.

CSeqDBVolSet m_VolSet

Set of volumes used by this database instance.

string GetTitle() const

Returns the database title.

void ResetInternalChunkBookmark()

Restart chunk iteration at the beginning of the database.

string x_FixString(const string &s) const

Adjust string length to offset of first embedded NUL byte.

int m_NumOIDs

Size of databases OID range.

bool m_NeedTotalsScan

True if this configuration cannot deduce totals without a scan.

int x_SetCacheID(int threadID)

int m_NumSeqs

Number of sequences in the overall database.

void x_ScanTotals(bool approx, int *seq_count, Uint8 *base_count, int *max_count, int *min_count, CSeqDBLockHold &locked)

Compute totals via iteration.

int GetSeqLength(int oid) const

Get the sequence length.

CSeqDBIdSet GetIdSet()

Get IdSet list attached to this database.

bool OidToGi(int oid, TGi &gi)

Translate a GI to an OID.

void AccessionToOids(const string &acc, vector< TOid > &oids) const

void GetTaxIdsForOids(const vector< blastdb::TOid > &oids, set< TTaxId > &tax_ids) const

void GetLMDBFileNames(vector< string > &lmdb_list) const

void AccessionsToOids(const vector< string > &accs, vector< TOid > &oids) const

bool IsBlastDBVersion5() const

void GetDBTaxIds(set< TTaxId > &tax_ids) const

void TaxIdsToOids(set< TTaxId > &tax_ids, vector< blastdb::TOid > &rv) const

const vector< string > & GetSiList()

const vector< TTi > & GetTiList()

Build ID set for this negative list.

const vector< TGi > & GetGiList()

Build ID set for this negative list.

void UnLease()

Deallocate the memory ranges owned by this object.

bool CheckOrFindOID(TOID &next_oid) const

Find an included oid from the specified point.

static bool GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo &info)

Get the taxonomy names for a given tax id.

void OptimizeGiLists()

Optimize the GI list configuration.

void UnLease()

Return storage held by the volumes.

const CSeqDBVol * GetVol(int i) const

Find a volume by index.

Uint8 GetVolumeSetLength() const

Find total volume length for all volumes.

CSeqDBVol * GetVolNonConst(int i)

Find a volume by index.

int GetNumVols() const

Get the number of volumes.

CSeqDBVol * FindVol(int oid, int &vol_oid) const

Find a volume by OID.

int GetNumOIDs() const

Get the size of the OID range.

int GetVolOIDStart(int i) const

Get the first OID in a volume.

void SeqidToOids(CSeq_id &seqid, vector< int > &oids, CSeqDBLockHold &locked) const

Find OIDs for the specified Seq-id.

void AccessionToOids(const string &acc, vector< int > &oids, CSeqDBLockHold &locked) const

Find OIDs for the specified accession or formatted Seq-id.

void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob, bool keep, CSeqDBLockHold &locked)

Fetch the data blob for the given column and oid.

const string & GetVolName() const

Get the volume name.

int GetSeqLengthExact(int oid) const

Exact sequence length for nucleotide databases.

int GetColumnId(const string &title, CSeqDBLockHold &locked)

Get an ID number for a given column title.

int GetNumOIDs() const

Get the number of OIDs for this volume.

void GetPigBounds(int &low_id, int &high_id, int &count, CSeqDBLockHold &locked) const

Get PIG Bounds.

void FlushOffsetRangeCache()

Flush all offset ranges cached.

void ListColumns(set< string > &titles, CSeqDBLockHold &locked)

List the titles of all columns for this volume.

int GetSeqLengthApprox(int oid) const

Approximate sequence length for nucleotide databases.

int GetSeqLengthProt(int oid) const

Sequence length for protein databases.

void IdsToOids(CSeqDBGiList &gis) const

Translate Gis to Oids for the given vector of Gi/Oid pairs.

void SetOidMaskType(int oid_masks) const

bool GiToOid(TGi gi, int &oid, CSeqDBLockHold &locked) const

Find the OID given a GI.

void GetGiBounds(TGi &low_id, TGi &high_id, int &count, CSeqDBLockHold &locked) const

Get GI Bounds.

int GetOidAtOffset(int first_seq, Uint8 residue, CSeqDBLockHold &locked) const

Find the OID at a given index into the database.

const map< string, string > & GetColumnMetaData(int col_id, CSeqDBLockHold &locked)

Get all metadata for the specified column.

bool PigToOid(int pig, int &oid) const

Find the OID given a PIG.

bool TiToOid(Int8 ti, int &oid, CSeqDBLockHold &locked) const

Find the OID given a TI.

void SetMemBit(int mbit) const

Set the MEMB_BIT fitlering for this volume.

Uint8 GetVolumeLength() const

Get the total length of this volume (in bases).

string GetDate() const

Get the formatting date of the volume.

void GetStringBounds(string &low_id, string &high_id, int &count) const

Get String Bounds.

void HashToOids(unsigned hash, vector< int > &oids, CSeqDBLockHold &locked) const

Get the OIDs for a given sequence hash.

Something else yet again etc.

int GetEnd() const

Get OID after last included OID.

@ eOidRange

OID Range [start, end).

int GetBegin() const

Get first included OID.

Database-wide column information.

const map< string, string > & GetMap()

Get the metadata map.

void SetHaveMap()

Indicate that the metadata map is now complete.

int GetVolumeIndex(int volnum)

Get a volume-specific column ID.

bool HaveMap()

Determine if we have the metadata map yet.

void SetMapValue(const string &k, const string &v)

Add a meta-data key/value association.

bool HasFilter() const

Check whether this tree represents any volume filtering.

const vector< CRef< CSeqDB_FilterTree > > & GetNodes() const

Get child nodes attached to this node.

vector< CRef< CSeqDB_AliasMask > > TFilters

Type used to store lists of filters found here.

int m_CacheRealAlgo

Cached list of real algorithms for BuildVolAlgos.

int GetVolAlgo(int vol_idx, int algo_id)

Build a list of volume algorithm IDs.

bool GetDesc(int algorithm_id, string &desc)

Is this object populated?

void AddMapping(int vol_id, int id, const string &desc)

Register a volume's algorithm definition.

map< int, string > m_IdToDesc

Map of real IDs to descriptions.

void GetIdList(vector< int > &algorithms)

Get a list of user (real) IDs available here.

map< string, int > m_DescToId

Map of descriptions to real IDs.

CSeqDB_IdRemapper()

Constructor.

int GetAlgoId(const string &id)

Translate a string algorithm ID to a numeric algorithm ID.

int m_CacheVolAlgo

Cached list of volume algorithms for BuildVolAlgos.

int RealToVol(int vol_idx, int algo_id)

Translate a real algorithm ID to a volume algorithm ID.

void SetNotEmpty()

Is this object populated?

map< int, map< int, int > > m_RealIdToVolumeId

Map of volume# to map of real id to volume-based id.

int m_CacheVolIndex

Cached volume index for BuildVolAlgos.

bool Empty()

Is this object populated?

int m_NextId

Next unassigned synthetic ID.

static const char * kOidNotFound

String containing the error message in exceptions thrown when a given OID cannot be found.

EOidListType

Indicates how block of OIDs was returned.

ESummaryType

Types of summary information available.

@ eUnfilteredAll

Sum of all sequences, ignoring GI and OID lists and alias files.

@ eFilteredRange

Sum of included sequences with OIDs within the iteration range.

@ eFilteredAll

Values from alias files, or summation over all included sequences.

static const char * kBlastDbDateFormat

Format string for the date returned by CSeqDB::GetDate.

iterator_bool insert(const value_type &val)

const_iterator begin() const

const_iterator end() const

static unsigned char depth[2 *(256+1+29)+1]

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

SStrictId_Tax::TId TTaxId

Taxon id type.

#define LOG_POST(message)

This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

void Info(CExceptionArgs_Base &args)

const string & FindName(TEnumValueType value, bool allowBadValue) const

Find name of the enum by its numeric value.

void Read(CObjectIStream &in, TObjectPtr object, const CTypeRef &type)

const string AsFastaString(void) const

string GetSeqIdString(bool with_version=false) const

Return seqid string with optional version for text seqid type.

virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)

Optimized implementation of CSerialObject::Assign, which is not so efficient.

virtual void DebugDump(CDebugDumpContext ddc, unsigned int depth) const

Define method for dumping debug information.

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

int64_t Int8

8-byte (64-bit) signed integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to int.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static TNumeric StringToNumeric(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to a numeric value.

static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

@ fConvErr_NoThrow

Do not throw an exception on error.

EBlast_filter_program

This defines the possible sequence filtering algorithms to be used in a BLAST database.

bool CanGet(void) const

Check if it is safe to call Get method.

const Tdata & Get(void) const

Get the member data.

bool CanGetDb(void) const

Check if it is safe to call GetDb method.

const TDb & GetDb(void) const

Get the Db member data.

bool IsPrf(void) const

Check if variant Prf is selected.

E_Choice Which(void) const

Which variant is currently selected.

const TGeneral & GetGeneral(void) const

Get the variant data.

bool IsPir(void) const

Check if variant Pir is selected.

@ e_General

for other databases

char * dbname(DBPROCESS *dbproc)

Get name of current database.

unsigned int

A callback function used to compare two keys in a database.

const TYPE & Get(const CNamedParameterList *param)

map< string, string > TStringMap

#define INT4_MAX

largest nubmer represented by signed int

bool approx(T x_, T y_, T eps_)

std::istream & in(std::istream &in_, double &x_)

#define INIT_CLASS_MARK()

Marker initializer for constructor.

#define CHECK_MARKER()

Assertion to verify the marker.

#define BREAK_MARKER()

Make the marker of this class invalid.

ESeqDBAllocType

Certain methods have an "Alloc" version.

const blastdb::TOid kSeqDBEntryNotFound

Int4 TOid

Ordinal ID in BLAST databases.

const int kSeqDBNuclNcbiNA8

Used to request ambiguities in Ncbi/NA8 format.

bool IsStringId(const CSeq_id &id)

Determine if id is srting id.

unsigned SeqDB_SequenceHash(const char *sequence, int length)

Returns a path minus filename.

EBlastDbVersion

BLAST database version.

const U & SeqDB_MapFind(const std::map< T, U > &m, const T &k, const U &dflt)

Find a map value or return a default.

static const string * s_CheckUniqueValues(const map< string, string > &m)

void s_ReadRanges(int vol_algo, CSeqDB::TSequenceRanges &ranges, CBlastDbBlob &blob)

static bool s_IsNumericId(const string &id)

static void s_GetDetails(const string &desc, string &program, string &program_name, string &algo_opts)

bool s_Contains(const C &c, const K &k)

void s_AccumulateMinMaxCount(TId low_in, TId high_in, int count_in, TId *low_out, TId *high_out, int *count_out, bool set_all)

Accumulate optional min, max, and count.

static const string s_RestoreColon(const string &in)

The top level of the private implementation layer for SeqDB.

static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

Structure to buffer multiple TSeqRes.

Structure to keep sequence retrieval results.

List of sequence offset ranges.

void append(const void *src, size_type num_elements)

Append extra elements at the end.

static void Read(CBlastDbBlob &blob, int n, CSeqDB::TSequenceRanges &ranges)

static int Read(CBlastDbBlob &blob)

OID-Range type to simplify interfaces.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4