A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/blastdb__convert_8cpp_source.html below:

NCBI C++ ToolKit: src/app/blastdb/blastdb_convert.cpp Source File

49 #include "../blast/blast_app_util.hpp" 55 #ifndef SKIP_DOXYGEN_PROCESSING 75  const string

& extension,

106  virtual void Init

();

122

arg_desc->SetUsageContext(

GetArguments

().GetProgramBasename(),

123  "Converts a BLAST databases from version 4 to " 126

arg_desc->SetCurrentGroup(

"Input options"

);

127

arg_desc->AddKey(

kInput

,

"database_name"

,

"Input database name"

,

129

arg_desc->AddDefaultKey(

kArgDbType

,

"molecule_type"

,

130  "Molecule type of the BLAST database to read"

,

135

arg_desc->SetCurrentGroup(

"Configuration options"

);

136

arg_desc->AddFlag(

"update_timestamp"

,

137  "Update the date of last update in the output database"

,

139

arg_desc->AddFlag(

"new_index"

,

140  "Generate vol index for filename"

,

142

arg_desc->AddDefaultKey(

kMapSize

,

"memory_map_size_limit"

,

143  "Max mempry map size of output file"

,

146

arg_desc->SetCurrentGroup(

"Output options"

);

147

arg_desc->AddKey(

kArgOutput

,

"database_name"

,

148  "Name of BLAST database to be created"

,

151

arg_desc->AddOptionalKey(

"logfile"

,

"File_Name"

,

152  "File to which the program log should be redirected"

,

167  "databases with OID lists aren't supported"

);

174  const string

& alias_file_name,

175  const

vector<string> & new_vol_names)

187  for

(

unsigned int i

=0;

i

< new_vol_names.size();

i

++) {

188  out

<< new_vol_names[

i

] <<

" "

;

202  const string

& output_idx_basename,

203  const string

& lmdb_base,

210  char

*

data

= (

char

*)memfile.

Map

(0, 0);

212  throw

runtime_error(

"Failed to memory map "

+ input_idx_fname);

216  const string

idx_ext = is_prot?

"pin"

:

"nin"

;

218  Uint4

read_offset = 0;

224

read_offset +=

sizeof

(dbver);

229  out

.WriteInt4(mol_type);

230

read_offset +=

sizeof

(mol_type);

233  out

.WriteInt4(vol_num);

237

read_offset +=

sizeof

(title_len);

240

title.reserve(title_len);

242

read_offset += title_len;

243  out

.WriteInt4(title_len);

247  out

.WriteInt4(lmdb_fname.size());

248  out

.Write(lmdb_fname);

252

read_offset +=

sizeof

(date_len);

255

date.reserve(date_len);

257

read_offset += date_len;

261  out

.WriteInt4(date_len);

266

cerr <<

"FXIME: NEED TO WRITE UPDATED DATE"

<< endl;

267  out

.WriteInt4(date_len);

273  while

((kFileSize - read_offset) >

kBufSize

) {

275  out

.Write(raw_data);

279  out

.Write(raw_data);

287

extns.push_back(

"rps"

);

288

extns.push_back(

"loo"

);

289

extns.push_back(

"aux"

);

290

extns.push_back(

"freq"

);

291

extns.push_back(

"blocks"

);

292

extns.push_back(

"wcounts"

);

293

extns.push_back(

"obsr"

);

297  for

(

unsigned int i

=0;

i

< vols.size();

i

++){

298  if

(vols[

i

] == vol_name){

300  "Duplicate db volumes"

);

308  const string

& kInputDb = args[

kInput

].AsString();

310  const string

& kMol(args[

kArgDbType

].AsString());

315  const bool

kNewIndex = args[

"new_index"

].HasValue() ?

true

:

false

;

317  m_LogFile

= & (args[

"logfile"

].HasValue()? args[

"logfile"

].AsOutputFile() : cout);

327  "Memory mapping not supported in this platform"

);

332  "Cannot create a BLAST database from an existing one without " 333  "changing the output name, please provide a (different) database name "

);

337  m_LogFile

= & (args[

"logfile"

].HasValue() ? args[

"logfile"

].AsOutputFile() : cout);

347

vector<string> paths, alias_files;

348  bool

use_index_in_filename =

true

;

350  if

(getenv(

"NONDFLT"

) ==

NULL

) {

357  if

(alias_files.size() > 1) {

359  "database with multiple alias files is not supported"

);

361  if

(alias_files.size() == 1) {

366

cerr << kOutputAbsPath << endl;

367

*

m_LogFile

<<

"Deleted existing BLAST database with identical name."

<< endl;

370  if

(alias_files.size() == 0) {

371

use_index_in_filename =

false

;

374

vector<string> extns;

381  if

(args[

"update_timestamp"

]) {

382

copy_flags &= ~

CDirEntry

::fCF_PreserveTime;

387

vector<string> vol_names;

388

vector<blastdb::TOid> vol_num_oids;

392

unique_ptr<CWriteDB_LMDB> lmdbdb (

new CWriteDB_LMDB

(lmdb_fname_w_path, kMemoryMapSize));

397  for

(

unsigned int

p=0; p < paths.size(); p++) {

398  string

& vol_path = paths[p];

399  _TRACE

(

"Processing "

<< vol_path);

401  string

kOutputVol = output_dir.

GetName

();

402  if

(kNewIndex && use_index_in_filename) {

405  unsigned int l

= (path_size_str.size() < 2) ? 2 : path_size_str.size();

406  for

(

unsigned int

x = vol_num.size(); x <

l

; x++){

407

zero_padding +=

'0'

;

409

kOutputVol +=

"."

+ zero_padding + vol_num;

411  else if

(use_index_in_filename){

412

vector<string> parts;

414  if

(parts.size() < 2) {

423

kOutputVol +=

"."

+ parts.back();

429  for

(

int

oid = 0; oid < kNumOids; oid++) {

430

list<CRef<CSeq_id>> ids = vol->

GetSeqIDs

(oid);

431

lmdbdb->InsertEntries(ids, oid_total+oid);

434

taxdb->InsertEntries(tax_ids, oid_total+oid);

438

vol_names.push_back(kOutputVol);

439

vol_num_oids.push_back(kNumOids);

440

oid_total += kNumOids;

442  for

(

auto

ext : extns) {

443  if

(ext ==

"psi"

|| ext ==

"psd"

||

444

ext ==

"nsi"

|| ext ==

"nsd"

) {

447  const string

& kInputFile = vol_path +

"."

+ ext;

452  const string

kOutputFile = output_dir.

GetDir

() + kOutputVol +

"."

+ ext;

453  if

(ext ==

"pin"

|| ext ==

"nin"

) {

455

output_dir.

GetName

(), kIsProt, p, copy_flags);

458  if

(!de.

Copy

(kOutputFile, copy_flags)) {

459  throw

runtime_error(

"Failed to cp "

+ de.

GetPath

()

460

+

" to "

+ kOutputFile);

467

lmdbdb->InsertVolumesInfo(vol_names, vol_num_oids);

471  if

(alias_files.size() == 1) {

472  const string

kOutputFile =

kOutput

+

"."

+ (kIsProt ?

'p'

:

'n'

) +

"al"

;

478  if

((status != 0) &&

cleanup

) {

485 #ifndef SKIP_DOXYGEN_PROCESSING 486 int main

(

int

argc,

const char

* argv[]

)

Declares singleton objects to store the version and reference for the BLAST engine.

#define CATCH_ALL(exit_code)

Standard catch statement for all BLAST command line programs.

Interface for converting sources of sequence data into blast sequence input.

static void s_UpdateVolumesInAliasFile(const string &orig_alias_file, const string &alias_file_name, const vector< string > &new_vol_names)

Fixes the alias file contents to match the new database name.

static void s_ConvertV4toV5(const string &input_idx_fname, const string &output_idx_basename, const string &lmdb_base, bool is_prot, int vol_num, CDirEntry::TCopyFlags copy_flags)

static const SIZE_TYPE kBufSize(4096)

Used to copy index files.

static const char * kOidList

Alias file keyword for oidlist.

void s_GetProfileDBsExt(vector< string > &extns)

static void s_LookForOidlistInAliasFile(const string &fname)

static const char * kDbList

Alias file keyword for volumes.

void s_CheckDuplicateVols(const vector< string > &vols, const string &vol_name)

int main(int argc, const char *argv[])

static const string kMapSize("map_size")

static const string kInput("in")

Command line flag to represent the input.

Code to build a database given various sources of sequence data.

Defines BLAST error codes (user errors included)

void AddParam(EUsageParams p, int val)

Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.

The main application class.

CBlastUsageReport m_UsageReport

virtual int Run()

@inheritDoc

CBlastdbConvertApp()

@inheritDoc

virtual void Init()

@inheritDoc

static void CreateDirectories(const string &dbname)

Create Directory for blast db.

Defines user input exceptions.

Defines invalid user input exceptions.

static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)

Find volume paths.

int GetNumOIDs() const

Returns the size of the (possibly sparse) OID range.

list< CRef< CSeq_id > > GetSeqIDs(int oid) const

Gets a list of sequence identifiers.

ESeqType

Sequence types (eUnknown tries protein, then nucleotide).

void GetAllTaxIDs(int oid, set< TTaxId > &taxids) const

Get all tax ids for an oid.

static const char * kBlastDbDateFormat

Format string for the date returned by CSeqDB::GetDate.

EBlastDbVersion GetBlastDbVersion() const

Return blast db version.

virtual void x_Flush()

This should flush any unwritten data to disk.

CWriteDB_FileAutoFlush(const string &basename, const string &extension, int index, Uint8 max_file_size, bool always_create)

CWriteDB_IndexFile class.

This class supports creation of a string accession to integer OID lmdb database.

This class supports creation of tax id list lookup files.

Constant declarations for command line arguments for BLAST programs.

const string kArgOutput

Output file name.

const string kArgDbType

BLAST database molecule type.

void Print(const CCompactSAMApplication::AlignInfo &ai)

std::ofstream out("events_result.xml")

main entry point for tests

static void cleanup(void)

void SetFullVersion(CRef< CVersionAPI > version)

Set version data for the program.

void HideStdArgs(THideStdArgs hide_mask)

Set the hide mask for the Hide Std Flags.

virtual const CArgs & GetArgs(void) const

Get parsed command line arguments.

int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)

Main function (entry point) for the NCBI application.

CVersionInfo GetVersion(void) const

Get the program version information.

virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)

Setup the command line argument descriptions.

const CNcbiArguments & GetArguments(void) const

Get the application's cached unprocessed command-line arguments.

@ fHideXmlHelp

Hide XML help description.

@ fHideFullVersion

Hide full version description.

@ fHideDryRun

Hide dryrun description.

@ fHideConffile

Hide configuration file description.

@ fAppend

Append to end-of-file; for eOutputFile or eIOFile.

@ eInt8

Convertible into an integer number (Int8 only)

@ eInputFile

Name of file (must exist and be readable)

@ eString

An arbitrary string.

@ eOutputFile

Name of file (must be writable)

void SetDiagPostPrefix(const char *prefix)

Specify a string to prefix all subsequent error postings with.

EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)

Set the threshold severity for posting the messages.

@ eDiag_Warning

Warning message.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

unsigned int TCopyFlags

Binary OR of "ECopyFlags".

static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)

Get an absolute path from some, possibly relative, path.

static bool IsSupported(void)

Check if memory-mapping is supported by the C++ Toolkit on this platform.

string GetDir(EIfEmptyPath mode=eIfEmptyPath_Current) const

Get the directory component for this directory entry.

virtual bool Exists(void) const

Check the entry existence.

void * Map(TOffsetType offset, size_t length)

Map file segment.

Int8 GetLength(void) const

Get size of file.

bool MemMapAdvise(void *ptr, EMemMapAdvise advise) const

Advise on mapped memory map usage.

string GetName(void) const

Get the base entry name with extension (if any).

bool UnmapAll(void)

Unmap all mapped segment.

const string & GetPath(void) const

Get entry path.

virtual bool Copy(const string &new_path, TCopyFlags flags=fCF_Default, size_t buf_size=0) const

Copy the entry to a location specified by "new_path".

@ fCF_FollowLinks

Copy entries following their sym.links, not the links themselves.

@ fCF_Overwrite

The following flags define what to do when the destination entry already exists:

@ fCF_PreserveTime

Preserve date/times.

int32_t Int4

4-byte (32-bit) signed integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

int64_t Int8

8-byte (64-bit) signed integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

bool IsEnabled(void)

Indicates whether application usage statistics collection is enabled for a current reporter instance.

CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)

Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)

IO_PREFIX::ofstream CNcbiOfstream

Portable alias for ofstream.

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

IO_PREFIX::ifstream CNcbiIfstream

Portable alias for ifstream.

NCBI_NS_STD::string::size_type SIZE_TYPE

static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)

Convert string to int.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)

Find the pattern in the string.

static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)

Convert UInt to string.

double Elapsed(void) const

Return time elapsed since first Start() or last Restart() call (in seconds).

string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const

Transform time to string.

void Start(void)

Start the timer.

@ eCurrent

Use current time. See also CCurrentTime.

const std::string kOutput

Command line flag to specify the output.

const string version

version string

Defines the CNcbiApplication and CAppException classes for creating NCBI applications.

Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...

std::istream & in(std::istream &in_, double &x_)

Defines BLAST database access classes.

CSeqDB::ESeqType ParseMoleculeTypeString(const string &str)

Convert a string to a CSeqDB ESeqType object.

bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)

Deletes all files associated with a BLAST database.

Defines interface to interact with LMDB files.

string BuildLMDBFileName(const string &basename, bool is_protein, bool use_index=false, unsigned int index=0)

Build the canonical LMDB file name for BLAST databases.

string GetFileNameFromExistingLMDBFile(const string &lmdb_filename, ELMDBFileType file_type)

Int4 TOid

Ordinal ID in BLAST databases.

void SeqDB_GetFileExtensions(bool db_is_protein, vector< string > &extensions, EBlastDbVersion dbver=eBDB_Version4)

Retrieves a list of all supported file extensions for BLAST databases.

This file defines several SeqDB utility functions related to byte order and file system portability.

T SeqDB_GetStdOrd(const T *stdord_obj)

Read a network order integer value.

void s_SeqDB_QuickAssign(string &dst, const char *bp, const char *ep)

Higher Performance String Assignment.

static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

Defines exception class for WriteDB.

Code for database files construction.

Defines lmdb implementation of string-key database.


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4