A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/seqdbisam_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/blast/seqdb_reader/seqdbisam.cpp Source File

46 #define ISAM_VERSION 1 49 #define DEFAULT_NISAM_SIZE 256 52 #define DEFAULT_SISAM_SIZE 64 55 #define MEMORY_ONLY_PAGE_SIZE 1 66  bool

found_index_file =

103  TIndx

disk_file_length(0);

104  bool

found_data_file =

126  Int4

num_elements(0);

165  while

(Stop >= Start) {

166

SampleNum = ((

Uint4

)(Stop + Start)) >> 1;

171  const void

* keydatap(0);

197

Start = SampleNum +1;

228  "Error: Unable to use ISAM index in batch mode."

);

248  int

gilist_index = 0;

251  const void

* data_page (0);

254  int

start = 0, num_elements = 0;

261  for

(

int i

= 0;

i

< num_elements;

i

++) {

276  if

(gilist_index < gilist_size) {

285  if

(isam_data < vol_end) {

308  bool

sameAccession =

false

;

313  if

(acc2 == accession) {

314

sameAccession =

true

;

318  return

sameAccession;

325  bool

sameAccession =

false

;

326  if

(currIndex < num_keys - 1) {

329  return

sameAccession;

339  if

(! gilist_size)

return

;

348  "Error: Unable to use ISAM index in batch mode."

);

352

vector<string> sample_keys;

353

vector<TIndx> page_offs;

365  int

gilist_index = 0;

366  int

sample_index = 0;

381  for

(

int i

= 0;

i

< num_keys;

i

++) {

385  if

(gilist_index < gilist_size) {

391  if

(vals[

i

] < vol_end) {

434  Int4 last

= Start + NumElements - 1;

436  const void

* KeyDataPage =

NULL

;

437  const void

* KeyDataPageStart =

NULL

;

445

KeyDataPage = (

char

*)KeyDataPageStart - Start *

m_TermSize

;

447  bool

found (

false

);

458

}

else if

(Key < Number) {

466  if

(found ==

false

) {

481

*Index = Start + current;

532  TIndx

offset_begin = KeyOffset;

533  TIndx

term_end = KeyOffset + term_in.size() + 1;

534  TIndx

map_end = term_end + at_least;

536  if

(map_end > file_length) {

537

map_end = file_length;

539  if

(term_end > map_end) {

541  result

=

int

(file_length - offset_begin);

550

file_data + term_in.size() + 1,

553  if

(dc_result != -1) {

597  const char

* file_data = begin;

598  int

bytes =

int

(end - begin);

600  for

(

i

= 0; (

i

< bytes) &&

i

< (

int

) term_in.size();

i

++) {

601  char ch1

= term_in[

i

];

602  char ch2

= file_data[

i

];

619  const char

* p = file_data +

i

;

621  while

((p < end) && ((*p) ==

' '

)) {

625  if

(((p == end) ||

ENDS_ISAM_KEY

(*p)) && (

i

== (

int

) term_in.size())) {

638

vector<TIndx> & indices_out,

639

vector<string> & keys_out,

640

vector<string> & data_out)

644  bool

ignore_case =

true

;

648  const char

* indexp(beginp);

649  bool

found_match(

false

);

651  while

(indexp < endp) {

665

indices_out.push_back(page_index + TermNum);

690

vector<TIndx> & indices_out,

691

vector<string> & keys_out,

692

vector<string> & data_out)

699  bool

ignore_case =

true

;

704  bool

done_b(

false

), done_e(

false

);

706  const char

* beginp(0);

707  const char

* endp(0);

712  while

(! (done_b && done_e)) {

713  if

(sample_index < pre_amt) {

717

beg_off = sample_index - pre_amt;

724

end_off = sample_index + post_amt;

727  x_LoadPage

(beg_off, end_off, & beginp, & endp);

735  if

(diff_begin != -1) {

743  const char

* last_term(0);

744  const char

* p(endp-1);

748  enum

{ eEndNulls, eLastTerm } search_stage = eEndNulls;

753  if

(search_stage == eEndNulls) {

755

search_stage = eLastTerm;

776  if

(diff_end != -1) {

794  const char

* map_end,

795

vector<string> & keys_out,

796

vector<string> & data_out)

798  const char

* data_ptr(0);

799  const char

* p(key_start);

801  while

(p < map_end) {

805

keys_out.push_back(

string

(key_start, data_ptr));

806

data_out.push_back(

string

(data_ptr+1, p));

808

keys_out.push_back(

string

(key_start, p));

809

data_out.push_back(

""

);

827  TIndx

offset_begin = sample_offset + (sample_num *

sizeof

(

Uint4

));

844  const char

* key_offset_addr =

849  for

(

int i

= 0;

i

<length;

i

++) {

850  if

(! key_offset_addr[

i

]) {

857  str

.assign(key_offset_addr, length);

874  bool

ignore_case(

true

);

882  TIndx

offset_begin = SampleOffset + (SampleNum *

sizeof

(

Uint4

));

901  const char

** beginp,

907  _ASSERT

(SampleNum2 > SampleNum1);

935

vector<string> & terms_out,

936

vector<string> & values_out,

937

vector<TIndx> & indices_out)

943  bool

short_match(

false

);

944  bool

follow_match(

false

);

946  size_t

preexisting_data_count = values_out.size();

957  bool

ignore_case =

true

;

964  int

Length = (

int

) term_in.size();

977  while

(Stop >= Start) {

978

SampleNum = ((

Uint4

)(Stop + Start)) >> 1;

982  int

diff =

x_DiffSample

(term_in, SampleNum, KeyOffset);

991  if

(BytesToEnd > (

TIndx

) max_lines_2) {

992

BytesToEnd = max_lines_2;

1009  if

(short_match && (diff >= Length)) {

1013  while

(SampleNum > 0) {

1027  if

(prefix != term_in) {

1035

found_short = SampleNum + 1;

1050

found_short = SampleNum;

1059

?

tolower

((

unsigned char

) term_in[diff]) <

tolower

((

unsigned char

) KeyData[diff])

1060

: term_in[diff] < KeyData[diff]) {

1063

Start = SampleNum + 1;

1070  if

( (SampleNum < 0) || (SampleNum >=

m_NumSamples

)) {

1076  const char

* beginp(0);

1077  const char

* endp(0);

1079  x_LoadPage

(SampleNum, SampleNum + 1, & beginp, & endp);

1095  if

(preexisting_data_count == values_out.size()) {

1108

m_IdentType (ident_type),

1109

m_IndexLease (atlas),

1110

m_DataLease (atlas),

1117

m_Initialized (

false

),

1118

m_KeySampleOffset(0),

1119

m_TestNonUnique (

true

),

1128  switch

(ident_type) {

1143  "Error: ident type argument not valid"

);

1157  string msg

(

"Error: Could not open input file ("

);

1176  string

& index_name,

1180

(!

isalpha

((

unsigned char

) prot_nucl)) ||

1181

(!

isalpha

((

unsigned char

) file_ext_char))) {

1185  "Error: argument not valid"

);

1188

index_name.reserve(

dbname

.size() + 4);

1189

data_name.reserve(

dbname

.size() + 4);

1193

index_name += prot_nucl;

1194

index_name += file_ext_char;

1196

data_name = index_name;

1205  string

iname, dname;

1237

vector<TOid> & oids,

1239  bool

& version_check)

1242  bool

strip_version = version_check;

1243

version_check =

false

;

1251  bool

found =

false

;

1253  string

accession(

string

(

"gb|"

) + acc +

"|"

);

1254  string

locus_str(

string

(

"gb||"

) + acc);

1258

vector<string> keys_out;

1259

vector<string> data_out;

1260

vector<TIndx> indices_out;

1266

indices_out)) < 0) {

1278

indices_out)) < 0) {

1292

indices_out)) < 0) {

1302  if

((! found) && strip_version) {

1303  size_t

pos = acc.find(

"."

);

1305  bool

is_version =

false

;

1307  if

(pos != string::npos) {

1308  int

ver_len =

static_cast<int>

(acc.size() - pos) - 1;

1310

is_version = (ver_len <= 3 && ver_len >= 1);

1312  for

(

size_t

vp = pos+1; vp < acc.size(); vp++) {

1314

is_version =

false

;

1321  string

nover(acc, 0, pos);

1329  if

(data_out.size()) {

1330

version_check =

true

;

1357  if

(

id

.

size

() &&

1361

indices_out)) < 0)) {

1372  ITERATE

(vector<string>, iter, data_out) {

1373

oids.push_back(atoi((*iter).c_str()));

1383

cerr <<

" this should be derived from readdb_acc2fastaEx().."

<< endl;

1399

x_TranslateGiList<TGi>(vol_start, ids);

1403

x_TranslateGiList<TTi>(vol_start, ids);

1407

x_TranslateGiList<string>(vol_start, ids);

1411

x_TranslateGiList<TPig>(vol_start, ids);

1417  "Error: Wrong type of idlist specified."

);

1473  int

num_elements(0);

1475  const void

* data_page(0);

1509

elem_index = num_elements - 1;

1521  const char

* beginp(0);

1522  const char

* endp(0);

1527  x_LoadPage

(Start, Start + 1, & beginp, & endp);

1531

vector<string> keys_out;

1532

vector<string> data_out;

1546  x_LoadPage

(Stop, Stop + 1, & beginp, & endp);

1550  const char

* lastp(0);

1551  const char

* indexp(beginp);

1553  while

(indexp < endp) {

1668

vector<TOid> & oids)

1677  bool

found =

false

;

1683

vector<string> keys_out;

1684

vector<string> data_out;

1685

vector<TIndx> indices_out;

1690

indices_out)) < 0) {

1700  ITERATE

(vector<string>, iter, data_out) {

1701

oids.push_back(atoi(iter->c_str()));

bool GetFileSizeL(const string &fname, TIndx &length)

Get size of a file.

const char * GetFileDataPtr(const string &fname, TIndx offset)

Get a pointer to the specified offset.

void Init(const string &filename)

Initializes a memory map object.

void Clear()

Clears the memory mapobject.

bool OutsideLastBound(Int8 ident)

Returns true if the provided integer compares as higher than the assigned upper boundary for this ISA...

bool IsSet()

Returns true if this object has an assigned value.

string GetString() const

Fetch the numeric value of this object.

void SetString(const string &ident)

Fetch the string value of this object.

void SetNumeric(Int8 ident)

Assign a numeric value to this object.

Int8 GetNumeric() const

Fetch the numeric value of this object.

bool OutsideFirstBound(Int8 ident)

Returns true if the provided integer compares as lower than the assigned lower boundary for this ISAM...

EErrorCode x_StringSearch(const string &term_in, vector< string > &term_out, vector< string > &value_out, vector< TIndx > &index_out)

String identifier lookup.

EErrorCode x_SearchIndexNumeric(Int8 Number, int *Data, Uint4 *Index, Int4 &SampleNum, bool &done)

Index file search.

CSeqDBIsam(CSeqDBAtlas &atlas, const string &dbname, char prot_nucl, char file_ext_char, ESeqDBIdType ident_type)

Constructor.

@ eNumericLongId

This type is not supported.

@ eString

This type is not supported.

@ eNumericNoData

Numeric database with Key/Value pairs in the index file.

void x_SearchNegativeMulti(int vol_start, int vol_end, CSeqDBNegativeList &gis, bool use_tis)

Negative ID List Translation.

CSeqDBFileMemMap m_DataLease

A persistent lease on the ISAM data file.

TIndx m_IndexFileLength

The length of the ISAM index file.

bool m_LongId

Use Uint8 for the key.

int x_DiffCharLease(const string &term_in, CSeqDBFileMemMap &lease, const string &file_name, TIndx file_length, Uint4 at_least, TIndx KeyOffset, bool ignore_case)

Find the first character to differ in two strings.

int x_DiffChar(const string &term_in, const char *begin, const char *end, bool ignore_case)

Find the first character to differ in two strings.

int x_GetPageNumElements(Int4 SampleNum, Int4 *Start)

Determine the number of elements in the data page.

ESeqDBIdType m_IdentType

The type of identifier this class uses.

SIsamKey m_LastKey

Last volume key.

Int4 m_IdxOption

Options set by upper layer.

void x_LoadData(CSeqDBFileMemMap &lease, vector< T > &keys, vector< int > &vals, int num_keys, TIndx begin)

Load and extract a data page into array at once.

void x_GetDataElement(const void *dpage, int index, Int8 &key, int &data)

Get a particular data element from a data page.

Int4 m_NumSamples

Number of terms in ISAM index.

void HashToOids(unsigned hash, vector< TOid > &oids)

Sequence hash lookup.

EErrorCode

Exit conditions occurring in this code.

@ eBadVersion

Lookup was successful.

@ eBadType

The format version of the ISAM file is unsupported.

@ eWrongFile

The requested ISAM type did not match the file.

@ eNoError

The key was not found.

@ eInitFailed

The file was not found, or was the wrong length.

int x_DiffSample(const string &term_in, Uint4 SampleNum, TIndx &KeyOffset)

Find the first character to differ in two strings.

~CSeqDBIsam()

Destructor.

Uint8 x_GetNumericKey(const void *p)

void x_LoadIndex(CSeqDBFileMemMap &lease, vector< T > &keys, vector< TIndx > &offs)

Load and extract all index samples into array at once.

bool x_SparseStringToOids(const string &acc, vector< int > &oids, bool adjusted)

Lookup a string in a sparse table.

void x_FindIndexBounds()

Find the least and greatest keys in this ISAM file.

Int4 m_NumTerms

Number of terms in database.

void IdsToOids(int vol_start, int vol_end, CSeqDBGiList &ids)

Translate Gis and Tis to Oids for the given ID list.

EErrorCode x_SearchDataNumeric(Int8 Number, int *Data, Uint4 *Index, Int4 SampleNum)

Data file search.

int TOid

This class works with OIDs relative to a specific volume.

bool m_Initialized

Flag indicating whether initialization has been done.

TIndx x_GetIndexKeyOffset(TIndx sample_offset, Uint4 sample_num)

Get the offset of the specified sample.

static void x_MakeFilenames(const string &dbname, char prot_nucl, char file_ext_char, string &index_name, string &data_name)

Make filenames for ISAM file.

static void x_Lower(string &s)

Converts a string to lower case.

bool x_OutOfBounds(Int8 key)

Check whether a numeric key is within this volume's bounds.

void x_SearchNegativeMultiSeq(int vol_start, int vol_end, CSeqDBNegativeList &gis)

EErrorCode x_InitSearch(void)

Initialize the search object.

void x_GetIndexString(TIndx key_offset, int length, string &prefix, bool trim_to_null)

Read a string from the index file.

void x_ExtractPageData(const string &term_in, TIndx page_index, const char *beginp, const char *endp, vector< TIndx > &indices_out, vector< string > &keys_out, vector< string > &data_out)

Find matches in the given memory area of a string ISAM file.

void GetIdBounds(Int8 &low_id, Int8 &high_id, int &count)

Get Numeric Bounds.

Int4 m_PageSize

Page size of ISAM index.

TIndx m_DataFileLength

The length of the ISAM data file.

void UnLease()

Return any memory held by this object to the atlas.

int m_Type

The format type of database files found (eNumeric or eString).

TIndx m_KeySampleOffset

Offset of samples in index file.

SIsamKey m_FirstKey

First volume key.

void x_LoadPage(TIndx SampleNum1, TIndx SampleNum2, const char **beginp, const char **endp)

Map a page into memory.

void x_ExtractAllData(const string &term_in, TIndx sample_index, vector< TIndx > &indices_out, vector< string > &keys_out, vector< string > &data_out)

Find matches in the given page of a string ISAM file.

void x_MapDataPage(int sample_index, int &start, int &num_elements, const void **data_page_begin)

Map a data page.

CSeqDBAtlas::TIndx TIndx

Type which is large enough to span the bytes of an ISAM file.

int x_GetNumericData(const void *p)

void StringToOids(const string &acc, vector< TOid > &oids, bool adjusted, bool &version_check)

String translation.

bool x_FindInNegativeList(CSeqDBNegativeList &ids, int &index, Int8 key, bool use_tis)

Find ID in the negative GI list using PBS.

Int4 m_MaxLineSize

Maximum string length in the database.

void x_ExtractData(const char *key_start, const char *entry_end, vector< string > &key_out, vector< string > &data_out)

Extract the data from a key-value pair in memory.

EErrorCode x_NumericSearch(Int8 Number, int *Data, Uint4 *Index)

Numeric identifier lookup.

string m_DataFname

The filename of the ISAM data file.

static bool IndexExists(const string &dbname, char prot_nucl, char file_ext_char)

Check if a given ISAM index exists.

string m_IndexFname

The filename of the ISAM index file.

int m_TermSize

size of the numeric key-data pair

bool x_IdentToOid(Int8 id, TOid &oid)

Numeric identifier lookup.

CSeqDBFileMemMap m_IndexLease

A persistent lease on the ISAM index file.

CSeqDBAtlas & m_Atlas

The memory management layer.

int GetNumTis() const

Get the number of TIs in the array.

void AddIncludedOid(int oid)

Include an OID in the iteration.

void AddVisibleOid(int oid)

Indicate a visible OID.

int GetNumGis() const

Get the number of GIs in the array.

int GetNumSis() const

Get the number of SeqIds in the array.

void InsureOrder()

Sort list if not already sorted.

static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)

static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)

static const char * str(char *buf, int n)

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

virtual bool Exists(void) const

Check existence of file.

const string AsFastaString(void) const

@ fParse_RawText

Try to ID raw non-numeric accessions.

@ fParse_AnyLocal

Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

int64_t Int8

8-byte (64-bit) signed integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)

Case-insensitive compare of a substring with another string.

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)

Convert UInt to string.

static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)

Split a string into two pieces using the specified delimiters.

char * dbname(DBPROCESS *dbproc)

Get name of current database.

unsigned int

A callback function used to compare two keys in a database.

const string version

version string

const struct ncbi::grid::netcache::search::fields::SIZE size

const struct ncbi::grid::netcache::search::fields::KEY key

static const BitmapCharRec ch1

static const BitmapCharRec ch2

Useful/utility classes and methods.

ESeqDBIdType

Various identifier formats used in Id lookup.

@ eStringId

Each PIG identifier refers to exactly one protein sequence.

@ eTiId

Genomic ID is a relatively stable numeric identifier for sequences.

@ ePigId

Trace ID is a numeric identifier for Trace sequences.

@ eHashId

Some sequence sources uses string identifiers.

#define SEQDB_ISEOL(x)

Macro for EOL chars.

T SeqDB_GetStdOrd(const T *stdord_obj)

Read a network order integer value.

USING_SCOPE(objects)

Place these definitions in the ncbi namespace.

#define DEFAULT_SISAM_SIZE

Default page size for string indices.

static bool ENDS_ISAM_KEY(char P)

Returns true if the character is a terminator for an ISAM key.

#define DEFAULT_NISAM_SIZE

Default page size for numeric indices.

#define ISAM_VERSION

Format version of the ISAM files.

#define MEMORY_ONLY_PAGE_SIZE

Special page size value which indicates a memory-only string index.

static bool s_IsSameAccession(string acc1, string acc2)

static char s_SeqDBIsam_NullifyEOLs(char c)

Return NUL for nulls or EOL characters.

const char ISAM_DATA_CHAR

The terminating character for string ISAM keys when data is present.

ISAM index database access object.

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4