A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/writedb__impl_8cpp_source.html below:

NCBI C++ ToolKit: src/objtools/blast/seqdb_writer/writedb_impl.cpp Source File

66  bool

scan_bioseq_4_cfastareader_usrobj)

71

m_MaxVolumeLetters (0),

74

m_MaskDataColumn (-1),

75

m_ParseIDs (parse_ids),

76

m_UseGiMask (use_gi_mask),

81

m_HaveSequence (

false

),

82

m_LongSeqId (long_ids),

84

m_limitDefline (protein? limit_defline:

false

),

85

m_OidMasks (oid_masks),

86

m_ScanBioseq4CFastaReaderUsrObjct(scan_bioseq_4_cfastareader_usrobj)

94  t

.assign(

t

, 1,

t

.size() - 1);

131 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 132  (!defined(NCBI_COMPILER_MIPSPRO)) ) 169  msg

<<

"Invalid molecule type of sequence added (" 171

<<

"); expected "

<< (

m_Protein

?

"protein"

:

"nucleotide"

);

217  while

(!

source

->eof()) {

219  if

(next_key >= other.

key

) {

229  return

(

key

< other.

key

);

242 #ifdef WORDS_BIGENDIAN 252 template

<>

inline string 254 #define CHAR_BUFFER_SIZE 256 261  while

(*p != 0x02) ++p;

267  if

( (

in

.size() == 4)

268

&& ((

in

[0] -

'0'

) * (

in

[0] -

'9'

) <= 0) ) {

272  source

->read(next_token, 4);

273  source

->seekg(-4, ios_base::cur);

274  string

next_key(next_token, 4);

276  if

(next_key ==

in

) {

278  return

x_GetNextKey();

289  return

(*lhs < *rhs);

297  while

(!keys.empty()) {

309  if

(

key

->AdvanceKey(*

next

)) {

310  if

(keys.find(

key

) != keys.end()) {

312  msg

<<

"Error: Duplicate seq_id <" 314

<<

"> is found multiple times across volumes."

;

339  for

(

unsigned int i

=0;

i

<

m_GiMasks

.size(); ++

i

) {

353  LOG_POST

(

Info

<<

"Rename files index to "

<< num_digits <<

" digits"

);

383

vector<blastdb::TOid> vol_num_oids(

m_VolumeList

.size());

387

vol_num_oids[

i

] = v->

GetOID

();

418  string

masklist(

""

);

421  const string

& x =

m_GiMasks

[

i

]->GetName();

423

masklist += x +

" "

;

430

ofstream alias(nm.c_str());

432

alias <<

"#\n# Alias file created: "

<<

m_Date

<<

"\n#\n" 434

<<

"DBLIST "

<< dblist <<

"\n"

;

436  if

(masklist !=

""

) {

437

alias <<

"MASKLIST "

<< masklist <<

"\n"

;

444  if

(! bin_hdr.empty()) {

455

vector< vector< char >* > bindata;

458  if

((**iter).IsUser()) {

464  const

vector< CRef< CUser_field > > &

D

= uo.

GetData

();

468  D

[0]->CanGetLabel() &&

469  D

[0]->GetLabel().IsStr() &&

471  D

[0]->CanGetData() &&

472  D

[0]->GetData().IsOss()) {

474

bindata =

D

[0]->GetData().GetOss();

482  if

(! bindata.empty()) {

483  if

(bindata[0] && (! bindata[0]->

empty

())) {

484

vector<char> &

b

= *bindata[0];

486

bin_hdr.assign(&

b

[0],

b

.size());

494  static const int

kGenBankLimit = 5;

495  static const int

kGenBankScore = 500;

496  if

(dfs->

Get

().size() <= kGenBankLimit){

502

list<CRef<CBlast_def_line> > & df_set= deflines->

Set

();

509

CBlast_def_line_set::Tdata::iterator itr=df_set.

begin

();

511

list<CRef<CBlast_def_line> > tmp_gb_list;

512  while

(itr != df_set.end()){

516  if

(score >= kGenBankScore){

517  size_t

orig_size = tax_ids.

size

();

519  if

(orig_size == tax_ids.

size

()){

521

list<CRef<CBlast_def_line> >::iterator tmp_itr = itr;

523

tmp_gb_list.splice(tmp_gb_list.end(), df_set, tmp_itr);

526

itr = df_set.erase(itr);

540  while

((gb_count < kGenBankLimit) && (tmp_gb_list.size() > 0)){

541

df_set.splice(df_set.end(), tmp_gb_list, tmp_gb_list.begin());

575  if

(defline->CanGetMemberships() &&

576

defline->GetMemberships().size() == 0) {

578

defline->ResetMemberships();

581  if

(defline->CanGetLinks() &&

582

defline->GetLinks().size() == 0) {

584

defline->ResetLinks();

588

deflines.

Reset

(bdls);

594  const

vector< vector<int> > & membbits,

595  const

vector< vector<int> > & linkouts,

602

vector<TTaxId> taxids;

619  else if

( desc.

IsOrg

()) {

620

org_pt = &(desc.

GetOrg

());

628  if

((**dbiter).CanGetDb() &&

629

(**dbiter).GetDb() ==

"taxon"

) {

648

list< CRef<CSeq_id> > ids = bioseq.

GetId

();

650  unsigned

taxid_i(0), mship_i(0), links_i(0);

651  bool

used_pig(

false

);

658  while

(! ids.empty()) {

661

defline->SetSeqid() = ids;

694

defline->SetTitle(titles);

696  if

(taxid_i < taxids.size()) {

697

defline->SetTaxid(taxids[taxid_i++]);

700  if

(mship_i < membbits.size()) {

701  const

vector<int> & V = membbits[mship_i++];

702

defline->SetMemberships().assign(V.begin(), V.end());

705  if

(links_i < linkouts.size()) {

706  const

vector<int> & V = linkouts[mship_i++];

707

defline->SetLinks().assign(V.begin(), V.end());

710  if

((! used_pig) && pig) {

711

defline->SetOther_info().push_back(pig);

715

bdls->

Set

().push_back(defline);

728

istringstream iss(bin_hdr);

732

deflines.

Reset

(&* bdls);

738  if

(deflines.

Empty

())

743  if

(bioseq_id ==

NULL

||

748

(!long_seqid && (bioseq_id->

IsPrf

() || bioseq_id->

IsPir

()))) {

756  if

(

id

.NotEmpty() && !id->

IsLocal

()) {

768  const

vector< vector<int> > & membbits,

769  const

vector< vector<int> > & linkouts,

776  bool

scan_bioseq_4_cfastareader_usrobj)

778  bool

use_bin = (deflines.

Empty

() && pig == 0);

780  if

(! bin_hdr.empty() && OID<0) {

784  if

(deflines.

Empty

()) {

787  if

(bioseq.

Empty

()) {

790  "Error: Cannot find CBioseq or deflines."

);

802  if

(bin_hdr.empty()) {

812

scan_bioseq_4_cfastareader_usrobj);

822  if

(bin_hdr.empty() && deflines.

Empty

()) {

831  if

(bin_hdr.empty() &&

832

(deflines.

Empty

() || deflines->

Get

().empty())) {

836  "Error: No deflines provided."

);

840  const

list<int> * L = 0;

842  if

(deflines->

Get

().front()->CanGetOther_info()) {

843

L = & deflines->

Get

().front()->GetOther_info();

851  if

((L == 0) || L->empty()) {

853

bdls->

Set

().front()->SetOther_info().push_back(pig);

855

deflines.

Reset

(&* bdls);

857

}

else if

(L->front() != pig) {

859

bdls->

Set

().front()->SetOther_info().front() = pig;

861

deflines.

Reset

(&* bdls);

872

bdls->

Set

().front()->SetSeqid().front() = gnl_id;

874

deflines.

Reset

(&* bdls);

877  if

(deflines.

Empty

() && (! bin_hdr.empty())) {

887  if

(bin_hdr.empty() || OID>=0) {

895  if

((! deflines.

Empty

()) && deflines->

CanGet

()) {

927  if

(!

m_Ids

.empty()) {

935  "Error: Cannot find IDs or deflines."

);

942  const

list< CRef<CSeq_id> > & ids = (**iter).GetSeqid();

949  m_Ids

.push_back(*it);

977  "Need sequence data."

);

997  "Need sequence data."

);

1007  switch

(sd.

Which

()) {

1033  msg

=

"Unable to process sequence for entry ["

;

1038  if

(!

msg

.empty()) {

1047  "No sequence data in Bioseq, " 1048  "and no Bioseq_Handle available."

);

1062

na8.reserve(sz + 1);

1067

na4.resize((sz + 1) / 2);

1069  for

(

int i

= 0;

i

< sz;

i

+= 2) {

1070

na4[

i

/2] = (na8[

i

] << 4) + na8[

i

+1];

1075

(

int

)

si

.GetLength(),

1139  char

* map_sz_str = getenv(

"BLASTDB_LMDB_MAP_SIZE"

);

1200 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 1201  (!defined(NCBI_COMPILER_MIPSPRO)) ) 1237  "Cannot write sequence to volume."

);

1254

((

b

< 0) ? -

b

:

b

));

1304 template

<

class

TWriteSize,

class

TRanges>

1309  typedef

vector< pair<TSeqPos, TSeqPos> > TPairVector;

1311  Int4

num_written = 0;

1312

TWriteSize::WriteInt(blob,

count

);

1314  for

(

typename

TRanges::const_iterator

r1

= (ranges).begin(),

1315

r1_end = (ranges).end();

1319  if

(

r1

->offsets.size()) {

1321

TWriteSize::WriteInt(blob,

r1

->algorithm_id);

1322

TWriteSize::WriteInt(blob,

r1

->offsets.size());

1325

TWriteSize::WriteInt(blob,

r2

->first);

1326

TWriteSize::WriteInt(blob,

r2

->second);

1336 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 1337  (!defined(NCBI_COMPILER_MIPSPRO)) ) 1340  const

vector <TGi> & gis)

1356  typedef

vector< pair<TSeqPos, TSeqPos> > TPairVector;

1358  int

range_list_count = 0;

1359  int

offset_pairs_count = 0;

1363  if

(

r1

->empty()) {

1367

range_list_count ++;

1368

offset_pairs_count +=

r1

->offsets.size();

1371  string msg

(

"Error: Algorithm IDs must be registered before use."

);

1372  msg

+=

" Unknown algorithm ID = "

+

1379  if

((

r2

->first >

r2

->second) ||

1380

(

r2

->second > seq_length)) {

1384  "Error: Masked data offsets out of bounds."

);

1393  if

(

r1

->offsets.size()) {

1395

->AddGiMask(gis,

r1

->offsets);

1412  if

(

r1

->offsets.size()) {

1438  const string

& options,

1439  const string

& name)

1461  return

algorithm_id;

1466  const string

&description,

1467  const string

&options)

1472  string value

=

"100:"

+

1479  return

algorithm_id;

1497  size_t

col_id =

m_Blobs

.size() / 2;

1507  m_Blobs

.push_back(new_blob2);

1522  const string

&

key

,

1523  const string

&

value

)

1525  if

((col_id < 0) || (col_id >= (

int

)

m_ColumnMetas

.size())) {

1527  "Error: provided column ID is not valid"

);

1539  if

((col_id < 0) || (col_id * 2 >= (

int

)

m_Blobs

.size())) {

1541  "Error: provided column ID is not valid"

);

1546  "Error: Already have blob for this sequence and column"

);

1576  bool

scan_bioseq_4_cfastareader_usrobj)

1581  string

binary_header;

1582

vector< vector<int> > v1,

v2

;

1584  const bool

kLimitDefline =

false

;

1588

long_seqids, kLimitDefline, scan_bioseq_4_cfastareader_usrobj);

1605  "Error: Nucleotide masking not supported."

);

1610  if

(masked.empty()) {

1634  for

(

unsigned i

= 0;

i

< mask_bytes.size();

i

++) {

1635  int

ch = ((

int

) mask_bytes[

i

]) & 0xFF;

1642  string

mask_byte =

"X"

;

1660

vols.push_back((**iter).GetVolumeName());

1669

(**iter).ListFiles(files);

1714 #define TAB_REPLACEMENT " " 1721  const

vector< vector<int> > & membits,

1722  const

vector< vector<int> > & linkout,

1727  bool

scan_bioseq_4_cfastareader_usrobj)

1737  if

(scan_bioseq_4_cfastareader_usrobj) {

1747  const

vector< CRef< CUser_field > > &

D

= desc.

GetUser

().

GetData

();

1752  if

(

f

.CanGetLabel() &&

1753  f

.GetLabel().IsStr() &&

1754  f

.GetLabel().GetStr() ==

"DefLine"

&&

1756  f

.GetData().IsStr()) {

1774  unsigned

mship_i(0), links_i(0);

1775  bool

used_pig(

false

);

1791

defline->SetSeqid().push_back(gnl_id);

1793  string

title(fasta, 1, fasta.size());

1798

defline->SetTitle(title);

1800  if

(mship_i < membits.size()) {

1801  const

vector<int> & V = membits[mship_i++];

1802

defline->SetMemberships().assign(V.begin(), V.end());

1805  if

(links_i < linkout.size()) {

1806  const

vector<int> & V = linkout[mship_i++];

1807

defline->SetLinks().assign(V.begin(), V.end());

1810  if

((! used_pig) && pig) {

1811

defline->SetOther_info().push_back(pig);

1815

bdls->

Set

().push_back(defline);

1820  while

(fasta.size()) {

1821  size_t

id_start = skip;

1822  size_t

pos_title = fasta.find(

" "

, skip);

1823  size_t

pos_next = fasta.find(

"\001"

, skip);

1826  if

(pos_next == fasta.npos) {

1828

pos_next = fasta.find(

" >"

);

1836  if

(pos_next == fasta.npos) {

1837

pos_next = fasta.size();

1841  if

(pos_title == fasta.npos || pos_title >= pos_next) {

1843

pos_title = pos_next;

1846  string

ids(fasta, id_start, pos_title - id_start);

1847  if

(pos_title == pos_next) pos_title--;

1848  string

title(fasta, pos_title + 1, pos_next-pos_title - 1);

1849  string

remaining(fasta, pos_next, fasta.size() - pos_next);

1850

fasta.swap(remaining);

1853

list< CRef<CSeq_id> > seqids;

1854  if

(ids.find(

'|'

) !=

NPOS

){

1861  string label

=

id

->GetSeqIdString(

true

);

1868  string label

=

id

->GetSeqIdString(

true

);

1872

seqids.push_back(

id

);

1878

defline->SetSeqid().swap(seqids);

1879

defline->SetTitle(title);

1881  if

(mship_i < membits.size()) {

1882  const

vector<int> & V = membits[mship_i++];

1883

defline->SetMemberships().assign(V.begin(), V.end());

1886  if

(links_i < linkout.size()) {

1887  const

vector<int> & V = linkout[mship_i++];

1888

defline->SetLinks().assign(V.begin(), V.end());

1891  if

((! used_pig) && pig) {

1892

defline->SetOther_info().push_back(pig);

1896

bdls->

Set

().push_back(defline);

1903 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \ 1904  (!defined(NCBI_COMPILER_MIPSPRO)) )

vector< TRangeWithFuzz > TRanges

const CSeq_id * GetNonLocalId() const

Find a non-local ID if present, consulting assembly details if all IDs for the overall sequence are l...

`Blob' Class for SeqDB (and WriteDB).

@ eSimple

Just write NUL bytes until aligned.

void WritePadBytes(int align, EPadding fmt)

Align the offset by writing pad bytes.

void WriteInt2(int x)

Write a 1 byte integer to the blob.

void Clear()

Clear all owned data and reference an empty string.

void WriteInt4(Int4 x)

Write a 4 byte integer to the blob.

void WriteInt1(int x)

Write a 1 byte integer to the blob.

void WriteInt4_LE(Int4 x)

void SortBySeqIdRank(bool is_protein, bool useBlastRank=false)

Sort the deflines according to the toolkit established ranking of Seq-ids.

static Int4 GetInt4(const unsigned char *ptr)

bool IsRegistered(int algo_id) const

Verify whether the provided algorithm ID has been registered with this object.

int Add(EBlast_filter_program program, const string &options=string(), const string &progname=string())

Attempt to register the information about a masking algorithm.

This represents a set of masks for a given sequence.

CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:

static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)

CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...

CMaskInfoRegistry m_MaskAlgoRegistry

Registry for masking algorithms in this database.

int CreateColumn(const string &title, bool mbo=false)

Set up a generic CWriteDB metadata column.

void x_Publish()

Flush accumulated sequence data to volume.

CSeqVector m_SeqVector

SeqVector for next sequence to write.

void SetMaskedLetters(const string &masked)

Set bases that should not be used in sequences.

void x_MaskSequence()

Replace masked input letters with m_MaskByte value.

void x_CookIds()

Collect ids for ISAM files.

void SetPig(int pig)

Set the PIG identifier of this sequence.

string m_Sequence

Sequence data in format that will be written to disk.

bool m_Protein

True if DB is protein.

vector< vector< int > > m_Memberships

Membership bits - outer vector is per-defline, inner is bits.

void x_CookColumns()

Prepare column data to be appended to disk.

void AddColumnMetaData(int col_id, const string &key, const string &value)

Add meta data to a column.

CWriteDB_Column::TColumnMeta TColumnMeta

Per-column metadata.

void ListFiles(vector< string > &files)

List Filenames.

void x_SetHaveSequence()

Records that we now have unwritten sequence data.

CRef< CWriteDB_Volume > m_Volume

This volume is currently accepting sequences.

bool x_HaveSequence() const

Returns true if we have unwritten sequence data.

void AddSequence(const CTempString &sequence, const CTempString &ambiguities)

Add a new sequence as raw sequence and ambiguity data.

static void x_GetFastaReaderDeflines(const CBioseq &bioseq, CConstRef< CBlast_def_line_set > &deflines, const vector< vector< int > > &membits, const vector< vector< int > > &linkout, int pig, bool accept_gt, bool parse_ids, bool long_seqids, bool scan_bioseq_4_cfastareader_usrobj=false)

Extract a defline set from a CFastaReader generated CBioseq.

vector< char > m_MaskLookup

Is (blast-aa) byte masked?

vector< CRef< CWriteDB_Volume > > m_VolumeList

List of all volumes so far, up to and including m_Volume.

string m_Dbname

Database base name.

string m_Date

Time stamp (for all volumes.)

void x_MakeAlias()

Flush accumulated sequence data to volume.

void x_CookHeader()

Convert header data into usable forms.

void Close()

Close the file and flush any remaining data to disk.

void SetMaxFileSize(Uint8 sz)

Set the maximum size for any file in the database.

vector< CRef< CSeq_id > > m_Ids

Ids for next sequence to write, for use during ISAM construction.

void x_CookSequence()

Convert sequence data into usable forms.

~CWriteDB_Impl()

Destructor.

EBlastDbVersion m_DbVersion

BLASTDB version.

int m_Pig

PIG to attach to headers for protein sequences.

static CRef< CBlast_def_line_set > ExtractBioseqDeflines(const CBioseq &bs, bool parse_ids, bool long_seqids, bool scan_bioseq_4_cfastareader_usrobj=false)

Extract deflines from a CBioseq.

string m_MaskedLetters

Masked protein letters (IUPAC).

EIndexType m_Indices

Indexing mode.

static void x_BuildDeflinesFromBioseq(const CBioseq &bioseq, CConstRef< CBlast_def_line_set > &deflines, const vector< vector< int > > &membits, const vector< vector< int > > &linkout, int pig)

Construct deflines from a CBioseq and other meta-data.

int x_GetMaskDataColumnId()

Get the mask data column id.

vector< TColumnMeta > m_ColumnMetas

Meta data for all columns.

CConstRef< CBlast_def_line_set > m_Deflines

Deflines to write as header.

void x_ClearHaveSequence()

Records that we no longer have unwritten sequence data.

void SetMaskData(const CMaskedRangesVector &ranges, const vector< TGi > &gis)

Set filtering data for a sequence.

vector< string > m_ColumnTitles

Column titles.

int m_MaskDataColumn

Column ID for masking data column.

int x_ComputeSeqLength()

Compute the length of the current sequence.

vector< vector< int > > m_Linkouts

Linkout bits - outer vector is per-defline, inner is bits.

void x_ResetSequenceData()

Clear sequence data from last sequence.

void SetDeflines(const CBlast_def_line_set &deflines)

This method replaces any stored header data for the current sequence with the provided CBlast_def_lin...

void ListVolumes(vector< string > &vols)

List Volumes.

static void x_SetDeflinesFromBinary(const string &bin_hdr, CConstRef< CBlast_def_line_set > &deflines)

Extract a defline set from a binary ASN.1 blob.

CRef< CWriteDB_LMDB > m_Lmdbdb

Write lmdb handle.

int FindColumn(const string &title) const

Find an existing column.

CRef< CWriteDB_TaxID > m_Taxdb

Write tax info handle.

map< int, int > m_MaskAlgoMap

Mapping from algo_id to gi-mask id.

vector< CRef< CBlastDbBlob > > m_Blobs

Blob data for the current sequence, indexed by letter.

int m_Hash

Sequence hash for this sequence.

bool m_LongSeqId

If true, use long sequence id format (database|accession) for all acessions.

static void x_GetBioseqBinaryHeader(const CBioseq &bioseq, string &binhdr)

Get binary version of deflines from 'user' data in Bioseq.

int RegisterMaskAlgorithm(EBlast_filter_program program, const string &options, const string &name="")

Register a type of filtering data found in this database.

vector< int > m_HaveBlob

List of blob columns that are active for this sequence.

string m_Ambig

Ambiguities in format that will be written to disk.

Uint8 m_MaxVolumeLetters

Max letters per volume.

void x_ComputeHash(const CTempString &sequence, const CTempString &ambiguities)

Compute the hash of a (raw) sequence.

int m_LmdbOid

Current oid to use for lmdb.

CWriteDB_Impl(const string &dbname, bool protein, const string &title, EIndexType indices, bool parse_ids, bool long_ids, bool use_gi_mask, EBlastDbVersion dbver=eBDB_Version4, bool limit_defline=false, Uint8 oid_masks=EOidMaskType::fNone, bool scan_bioseq_4_cfastareader_usrobj=false)

Constructor.

void x_CookData()

Convert and compute final data formats.

string m_BinHdr

Binary header in format that will be written to disk.

bool m_UseGiMask

Generate GI-based mask files.

bool m_ScanBioseq4CFastaReaderUsrObjct

Uint8 m_MaxFileSize

Maximum size of any file.

CBlastDbBlob & SetBlobData(int col_id)

Get a blob to use for a given column letter.

bool m_ParseIDs

Generate ISAM files.

bool m_Closed

True if database has been closed.

vector< CRef< CWriteDB_GiMask > > m_GiMasks

Gi-based masks.

int m_SeqLength

When a sequence is added, this will be populated with the length of that sequence.

bool m_HaveSequence

True if we have a sequence to write.

static void x_ExtractDeflines(CConstRef< CBioseq > &bioseq, CConstRef< CBlast_def_line_set > &deflines, string &bin_hdr, const vector< vector< int > > &membbits, const vector< vector< int > > &linkouts, int pig, set< TTaxId > &tax_ids, int OID=-1, bool parse_ids=true, bool long_seqid=false, bool limit_defline=false, bool scan_bioseq_4_cfastareader_usrobj=false)

Get deflines from a CBioseq and other meta-data.

string m_Title

Title field of database.

CConstRef< CBioseq > m_Bioseq

Bioseq object for next sequence to write.

string m_MaskByte

Byte that replaced masked letters.

string x_MakeAliasName()

Compute name of alias file produced.

void SetMaxVolumeLetters(Uint8 sz)

Set the maximum letters in one volume.

class to support searching for duplicate isam keys

CWriteDB_IsamKey(const string &fn)

bool operator<(const CWriteDB_IsamKey &other) const

bool AdvanceKey(const CWriteDB_IsamKey &other)

This class supports creation of a string accession to integer OID lmdb database.

void InsertVolumesInfo(const vector< string > &vol_names, const vector< blastdb::TOid > &vol_num_oids)

Create volume table This api should only be called once to create vol info for all vols in the db.

int InsertEntries(const list< CRef< CSeq_id >> &seqids, const blastdb::TOid oid)

Add entries in bulk as fetched from CSeqDB::GetSeqIDs.

This class supports creation of tax id list lookup files.

int InsertEntries(const set< TTaxId > &tax_ids, const blastdb::TOid oid)

Add tax id entries in bulk for each oid This api needs to be called in sequential order of OIDs This ...

void RenameSingle()

Rename all volumes files to single-volume names.

void AddColumnMetaData(int col_id, const string &key, const string &value)

Add meta data to a column.

void RenameFileIndex(unsigned int num_digits)

const int & GetOID() const

Get the current OID of the volume.

int CreateColumn(const string &title, const TColumnMeta &meta, Uint8 max_sz, bool mbo=true)

Create a new database column.

bool WriteSequence(const string &seq, const string &ambig, const string &binhdr, const TIdList &ids, int pig, int hash, const TBlobList &blobs, int maskcol_id=-1)

Add a sequence to this volume.

void Close()

Close the volume.

const string & GetVolumeName() const

Get the name of the volume.

EIndexType

Whether and what kind of indices to build.

@ eAddHash

Add an index from sequence hash to OID.

iterator_bool insert(const value_type &val)

const_iterator begin() const

const_iterator end() const

string GetSeqIdString(const CSeq_id &id)

static const char si[8][64]

static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

unsigned int TSeqPos

Type for sequence locations and lengths.

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

#define TAX_ID_FROM(T, value)

#define ERR_POST(message)

Error posting with file, line number information but without error codes.

#define LOG_POST(message)

This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...

void Error(CExceptionArgs_Base &args)

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

const string & GetMsg(void) const

Get message string.

void Info(CExceptionArgs_Base &args)

string GetName(void) const

Get the base entry name with extension (if any).

C & SerialAssign(C &dest, const C &src, ESerialRecursionMode how=eRecursive)

Set object to copy of another one.

#define MSerial_AsnBinary

EAccessionInfo

For IdentifyAccession (below)

static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)

Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.

static int BestRank(const CRef< CSeq_id > &id)

static int BlastRank(const CRef< CSeq_id > &id)

@ fParse_RawText

Try to ID raw non-numeric accessions.

@ fParse_ValidLocal

Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...

CConstRef< CBioseq > GetCompleteBioseq(void) const

Get the complete bioseq.

void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const

Fill the buffer string with the sequence data for the interval [start, stop).

bool Empty(void) const THROWS_NONE

Check if CConstRef is empty – not pointing to any object which means having a null value.

TObjectType * GetPointer(void) const THROWS_NONE

Get pointer,.

void Reset(void)

Reset reference object.

void Reset(void)

Reset reference object.

bool NotEmpty(void) const THROWS_NONE

Check if CRef is not empty – pointing to an object and has a non-null value.

bool NotEmpty(void) const THROWS_NONE

Check if CConstRef is not empty – pointing to an object and has a non-null value.

bool Empty(void) const THROWS_NONE

Check if CRef is empty – not pointing to any object, which means having a null value.

int32_t Int4

4-byte (32-bit) signed integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

IO_PREFIX::ifstream CNcbiIfstream

Portable alias for ifstream.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static string ParseEscapes(const CTempString str, EEscSeqRange mode=eEscSeqRange_Standard, char user_char='?')

Parse C-style escape sequences in the specified string.

const char * data(void) const

Return a pointer to the array represented.

static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

static Uint8 StringToUInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to Uint8.

size_type length(void) const

Return the length of the represented array.

static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)

Replace occurrences of a substring within a string.

size_type size(void) const

Return the length of the represented array.

string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const

Transform time to string.

@ eCurrent

Use current time. See also CCurrentTime.

C::value_type FindBestChoice(const C &container, F score_func)

Find the best choice (lowest score) for values in a container.

static const char label[]

const TOrg & GetOrg(void) const

Get the Org member data.

EBlast_filter_program

This defines the possible sequence filtering algorithms to be used in a BLAST database.

Tdata & Set(void)

Assign a value to data member.

bool CanGet(void) const

Check if it is safe to call Get method.

const Tdata & Get(void) const

Get the member data.

@ eBlast_filter_program_other

bool IsStr(void) const

Check if variant Str is selected.

bool CanGetType(void) const

Check if it is safe to call GetType method.

void SetTag(TTag &value)

Assign a value to Tag data member.

bool IsId(void) const

Check if variant Id is selected.

bool CanGetData(void) const

Check if it is safe to call GetData method.

const TStr & GetStr(void) const

Get the variant data.

const TData & GetData(void) const

Get the Data member data.

const TType & GetType(void) const

Get the Type member data.

void SetDb(const TDb &value)

Assign a value to Db data member.

TId GetId(void) const

Get the variant data.

const TDb & GetDb(void) const

Get the Db member data.

bool CanGetDb(void) const

Check if it is safe to call GetDb method.

TGeneral & SetGeneral(void)

Select the variant.

bool IsPrf(void) const

Check if variant Prf is selected.

E_Choice Which(void) const

Which variant is currently selected.

bool IsLocal(void) const

Check if variant Local is selected.

bool IsPir(void) const

Check if variant Pir is selected.

const TUser & GetUser(void) const

Get the variant data.

const TInst & GetInst(void) const

Get the Inst member data.

bool IsOrg(void) const

Check if variant Org is selected.

bool CanGetMol(void) const

Check if it is safe to call GetMol method.

const TTitle & GetTitle(void) const

Get the variant data.

const TSource & GetSource(void) const

Get the variant data.

bool IsSource(void) const

Check if variant Source is selected.

const TId & GetId(void) const

Get the Id member data.

const Tdata & Get(void) const

Get the member data.

bool CanGetDescr(void) const

Check if it is safe to call GetDescr method.

TLength GetLength(void) const

Get the Length member data.

const TOrg & GetOrg(void) const

Get the variant data.

bool CanGetId(void) const

Check if it is safe to call GetId method.

bool CanGetSeq_data(void) const

Check if it is safe to call GetSeq_data method.

bool IsTitle(void) const

Check if variant Title is selected.

const TDescr & GetDescr(void) const

Get the Descr member data.

bool CanGetInst(void) const

Check if it is safe to call GetInst method.

bool IsUser(void) const

Check if variant User is selected.

E_Choice Which(void) const

Which variant is currently selected.

@ e_Ncbieaa

extended ASCII 1 letter aa codes

@ e_Ncbistdaa

consecutive codes for std aas

@ e_Ncbi2na

2 bit nucleic acid code

@ e_Iupacna

IUPAC 1 letter nuc acid code.

@ e_Ncbi4na

4 bit nucleic acid code

@ e_Iupacaa

IUPAC 1 letter amino acid code.

char * dbname(DBPROCESS *dbproc)

Get name of current database.

unsigned int

A callback function used to compare two keys in a database.

constexpr bool empty(list< Ts... >) noexcept

const struct ncbi::grid::netcache::search::fields::KEY key

const GenericPointer< typename T::ValueType > T2 value

const CharType(& source)[N]

std::istream & in(std::istream &in_, double &x_)

double df(double x_, const double &y_)

string BuildLMDBFileName(const string &basename, bool is_protein, bool use_index=false, unsigned int index=0)

Build the canonical LMDB file name for BLAST databases.

string GetFileNameFromExistingLMDBFile(const string &lmdb_filename, ELMDBFileType file_type)

Defines exception class and several constants for SeqDB.

unsigned SeqDB_SequenceHash(const char *sequence, int length)

Returns a path minus filename.

EBlastDbVersion

BLAST database version.

Defines `expert' version of CSeqDB interfaces.

void SeqDB_UnpackAmbiguities(const CTempString &sequence, const CTempString &ambiguities, string &result)

Unpack an ambiguous nucleotide sequence.

static const sljit_gpr r1

static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

static const sljit_gpr r2

Comparison function for set<CWriteDB_IsamKey<T> *>

bool operator()(const CWriteDB_IsamKey< T > *lhs, const CWriteDB_IsamKey< T > *rhs) const

static bool ambig(char c)

Data conversion tools for CWriteDB and associated code.

void WriteDB_Ncbi2naToBinary(const CSeq_inst &si, string &seq)

Build blast db nucleotide format from Ncbi2na Seq-inst.

void WriteDB_EaaToBinary(const CSeq_inst &si, string &seq)

Build blast db protein format from Eaa protein Seq-inst.

void WriteDB_IupacaaToBinary(const CSeq_inst &si, string &seq)

Build blast db protein format from Iupacaa protein Seq-inst.

void WriteDB_Ncbi4naToBinary(const CSeq_inst &seqinst, string &seq, string &amb)

Build blast db nucleotide format from Ncbi4na Seq-inst.

void WriteDB_StdaaToBinary(const CSeq_inst &si, string &seq)

Build blast db protein format from Stdaa protein Seq-inst.

void WriteDB_IupacnaToBinary(const CSeq_inst &si, string &seq, string &amb)

Build blast db nucleotide format from Iupacna Seq-inst.

Defines exception class for WriteDB.

int WriteDB_FindSequenceLength(bool protein, const string &seq)

Compute length of sequence from raw packing.

static bool s_UseFastaReaderDeflines(CConstRef< CBioseq > &bioseq, CConstRef< CBlast_def_line_set > &deflines, bool long_seqid)

static CRef< CBlast_def_line_set > s_EditDeflineSet(CConstRef< CBlast_def_line_set > &deflines)

static void s_CheckEmptyLists(CRef< CBlast_def_line_set > &deflines, bool owner)

static const string s_EscapeColon(const string &in)

int s_AbsMax(int a, int b)

USING_SCOPE(std)

Import C++ std namespace.

static void s_CheckDuplicateIds(set< CWriteDB_IsamKey< T > *, CWriteDB_IsamKey_Compare< T > > &keys)

Check for duplicate ids across volumes.

void s_LimitDeflines(CConstRef< CBlast_def_line_set > &dfs)

Defines implementation class of WriteDB.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4