& extension,
106 virtual void Init();
122arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
123 "Converts a BLAST databases from version 4 to " 126arg_desc->SetCurrentGroup(
"Input options");
127arg_desc->AddKey(
kInput,
"database_name",
"Input database name",
129arg_desc->AddDefaultKey(
kArgDbType,
"molecule_type",
130 "Molecule type of the BLAST database to read",
135arg_desc->SetCurrentGroup(
"Configuration options");
136arg_desc->AddFlag(
"update_timestamp",
137 "Update the date of last update in the output database",
139arg_desc->AddFlag(
"new_index",
140 "Generate vol index for filename",
142arg_desc->AddDefaultKey(
kMapSize,
"memory_map_size_limit",
143 "Max mempry map size of output file",
146arg_desc->SetCurrentGroup(
"Output options");
147arg_desc->AddKey(
kArgOutput,
"database_name",
148 "Name of BLAST database to be created",
151arg_desc->AddOptionalKey(
"logfile",
"File_Name",
152 "File to which the program log should be redirected",
167 "databases with OID lists aren't supported");
174 const string& alias_file_name,
175 constvector<string> & new_vol_names)
187 for(
unsigned int i=0;
i< new_vol_names.size();
i++) {
188 out<< new_vol_names[
i] <<
" ";
202 const string& output_idx_basename,
203 const string& lmdb_base,
210 char*
data= (
char*)memfile.
Map(0, 0);
212 throwruntime_error(
"Failed to memory map "+ input_idx_fname);
216 const stringidx_ext = is_prot?
"pin":
"nin";
218 Uint4read_offset = 0;
224read_offset +=
sizeof(dbver);
229 out.WriteInt4(mol_type);
230read_offset +=
sizeof(mol_type);
233 out.WriteInt4(vol_num);
237read_offset +=
sizeof(title_len);
240title.reserve(title_len);
242read_offset += title_len;
243 out.WriteInt4(title_len);
247 out.WriteInt4(lmdb_fname.size());
248 out.Write(lmdb_fname);
252read_offset +=
sizeof(date_len);
255date.reserve(date_len);
257read_offset += date_len;
261 out.WriteInt4(date_len);
266cerr <<
"FXIME: NEED TO WRITE UPDATED DATE"<< endl;
267 out.WriteInt4(date_len);
273 while((kFileSize - read_offset) >
kBufSize) {
275 out.Write(raw_data);
279 out.Write(raw_data);
287extns.push_back(
"rps");
288extns.push_back(
"loo");
289extns.push_back(
"aux");
290extns.push_back(
"freq");
291extns.push_back(
"blocks");
292extns.push_back(
"wcounts");
293extns.push_back(
"obsr");
297 for(
unsigned int i=0;
i< vols.size();
i++){
298 if(vols[
i] == vol_name){
300 "Duplicate db volumes");
308 const string& kInputDb = args[
kInput].AsString();
310 const string& kMol(args[
kArgDbType].AsString());
315 const boolkNewIndex = args[
"new_index"].HasValue() ?
true:
false;
317 m_LogFile= & (args[
"logfile"].HasValue()? args[
"logfile"].AsOutputFile() : cout);
327 "Memory mapping not supported in this platform");
332 "Cannot create a BLAST database from an existing one without " 333 "changing the output name, please provide a (different) database name ");
337 m_LogFile= & (args[
"logfile"].HasValue() ? args[
"logfile"].AsOutputFile() : cout);
347vector<string> paths, alias_files;
348 booluse_index_in_filename =
true;
350 if(getenv(
"NONDFLT") ==
NULL) {
357 if(alias_files.size() > 1) {
359 "database with multiple alias files is not supported");
361 if(alias_files.size() == 1) {
366cerr << kOutputAbsPath << endl;
367*
m_LogFile<<
"Deleted existing BLAST database with identical name."<< endl;
370 if(alias_files.size() == 0) {
371use_index_in_filename =
false;
374vector<string> extns;
381 if(args[
"update_timestamp"]) {
382copy_flags &= ~
CDirEntry::fCF_PreserveTime;
387vector<string> vol_names;
388vector<blastdb::TOid> vol_num_oids;
392unique_ptr<CWriteDB_LMDB> lmdbdb (
new CWriteDB_LMDB(lmdb_fname_w_path, kMemoryMapSize));
397 for(
unsigned intp=0; p < paths.size(); p++) {
398 string& vol_path = paths[p];
399 _TRACE(
"Processing "<< vol_path);
401 stringkOutputVol = output_dir.
GetName();
402 if(kNewIndex && use_index_in_filename) {
405 unsigned int l= (path_size_str.size() < 2) ? 2 : path_size_str.size();
406 for(
unsigned intx = vol_num.size(); x <
l; x++){
407zero_padding +=
'0';
409kOutputVol +=
"."+ zero_padding + vol_num;
411 else if(use_index_in_filename){
412vector<string> parts;
414 if(parts.size() < 2) {
423kOutputVol +=
"."+ parts.back();
429 for(
intoid = 0; oid < kNumOids; oid++) {
430list<CRef<CSeq_id>> ids = vol->
GetSeqIDs(oid);
431lmdbdb->InsertEntries(ids, oid_total+oid);
434taxdb->InsertEntries(tax_ids, oid_total+oid);
438vol_names.push_back(kOutputVol);
439vol_num_oids.push_back(kNumOids);
440oid_total += kNumOids;
442 for(
autoext : extns) {
443 if(ext ==
"psi"|| ext ==
"psd"||
444ext ==
"nsi"|| ext ==
"nsd") {
447 const string& kInputFile = vol_path +
"."+ ext;
452 const stringkOutputFile = output_dir.
GetDir() + kOutputVol +
"."+ ext;
453 if(ext ==
"pin"|| ext ==
"nin") {
455output_dir.
GetName(), kIsProt, p, copy_flags);
458 if(!de.
Copy(kOutputFile, copy_flags)) {
459 throwruntime_error(
"Failed to cp "+ de.
GetPath()
460+
" to "+ kOutputFile);
467lmdbdb->InsertVolumesInfo(vol_names, vol_num_oids);
471 if(alias_files.size() == 1) {
472 const stringkOutputFile =
kOutput+
"."+ (kIsProt ?
'p':
'n') +
"al";
478 if((status != 0) &&
cleanup) {
485 #ifndef SKIP_DOXYGEN_PROCESSING 486 int main(
intargc,
const char* argv[]
)
Declares singleton objects to store the version and reference for the BLAST engine.
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
Interface for converting sources of sequence data into blast sequence input.
static void s_UpdateVolumesInAliasFile(const string &orig_alias_file, const string &alias_file_name, const vector< string > &new_vol_names)
Fixes the alias file contents to match the new database name.
static void s_ConvertV4toV5(const string &input_idx_fname, const string &output_idx_basename, const string &lmdb_base, bool is_prot, int vol_num, CDirEntry::TCopyFlags copy_flags)
static const SIZE_TYPE kBufSize(4096)
Used to copy index files.
static const char * kOidList
Alias file keyword for oidlist.
void s_GetProfileDBsExt(vector< string > &extns)
static void s_LookForOidlistInAliasFile(const string &fname)
static const char * kDbList
Alias file keyword for volumes.
void s_CheckDuplicateVols(const vector< string > &vols, const string &vol_name)
int main(int argc, const char *argv[])
static const string kMapSize("map_size")
static const string kInput("in")
Command line flag to represent the input.
Code to build a database given various sources of sequence data.
Defines BLAST error codes (user errors included)
void AddParam(EUsageParams p, int val)
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
The main application class.
CBlastUsageReport m_UsageReport
virtual int Run()
@inheritDoc
CBlastdbConvertApp()
@inheritDoc
virtual void Init()
@inheritDoc
static void CreateDirectories(const string &dbname)
Create Directory for blast db.
Defines user input exceptions.
Defines invalid user input exceptions.
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Gets a list of sequence identifiers.
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
void GetAllTaxIDs(int oid, set< TTaxId > &taxids) const
Get all tax ids for an oid.
static const char * kBlastDbDateFormat
Format string for the date returned by CSeqDB::GetDate.
EBlastDbVersion GetBlastDbVersion() const
Return blast db version.
virtual void x_Flush()
This should flush any unwritten data to disk.
CWriteDB_FileAutoFlush(const string &basename, const string &extension, int index, Uint8 max_file_size, bool always_create)
CWriteDB_IndexFile class.
This class supports creation of a string accession to integer OID lmdb database.
This class supports creation of tax id list lookup files.
Constant declarations for command line arguments for BLAST programs.
const string kArgOutput
Output file name.
const string kArgDbType
BLAST database molecule type.
void Print(const CCompactSAMApplication::AlignInfo &ai)
std::ofstream out("events_result.xml")
main entry point for tests
static void cleanup(void)
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
CVersionInfo GetVersion(void) const
Get the program version information.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ fAppend
Append to end-of-file; for eOutputFile or eIOFile.
@ eInt8
Convertible into an integer number (Int8 only)
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
void SetDiagPostPrefix(const char *prefix)
Specify a string to prefix all subsequent error postings with.
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
@ eDiag_Warning
Warning message.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
unsigned int TCopyFlags
Binary OR of "ECopyFlags".
static string CreateAbsolutePath(const string &path, ERelativeToWhat rtw=eRelativeToCwd)
Get an absolute path from some, possibly relative, path.
static bool IsSupported(void)
Check if memory-mapping is supported by the C++ Toolkit on this platform.
string GetDir(EIfEmptyPath mode=eIfEmptyPath_Current) const
Get the directory component for this directory entry.
virtual bool Exists(void) const
Check the entry existence.
void * Map(TOffsetType offset, size_t length)
Map file segment.
Int8 GetLength(void) const
Get size of file.
bool MemMapAdvise(void *ptr, EMemMapAdvise advise) const
Advise on mapped memory map usage.
string GetName(void) const
Get the base entry name with extension (if any).
bool UnmapAll(void)
Unmap all mapped segment.
const string & GetPath(void) const
Get entry path.
virtual bool Copy(const string &new_path, TCopyFlags flags=fCF_Default, size_t buf_size=0) const
Copy the entry to a location specified by "new_path".
@ fCF_FollowLinks
Copy entries following their sym.links, not the links themselves.
@ fCF_Overwrite
The following flags define what to do when the destination entry already exists:
@ fCF_PreserveTime
Preserve date/times.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
void Start(void)
Start the timer.
@ eCurrent
Use current time. See also CCurrentTime.
const std::string kOutput
Command line flag to specify the output.
const string version
version string
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
std::istream & in(std::istream &in_, double &x_)
Defines BLAST database access classes.
CSeqDB::ESeqType ParseMoleculeTypeString(const string &str)
Convert a string to a CSeqDB ESeqType object.
bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)
Deletes all files associated with a BLAST database.
Defines interface to interact with LMDB files.
string BuildLMDBFileName(const string &basename, bool is_protein, bool use_index=false, unsigned int index=0)
Build the canonical LMDB file name for BLAST databases.
string GetFileNameFromExistingLMDBFile(const string &lmdb_filename, ELMDBFileType file_type)
Int4 TOid
Ordinal ID in BLAST databases.
void SeqDB_GetFileExtensions(bool db_is_protein, vector< string > &extensions, EBlastDbVersion dbver=eBDB_Version4)
Retrieves a list of all supported file extensions for BLAST databases.
This file defines several SeqDB utility functions related to byte order and file system portability.
T SeqDB_GetStdOrd(const T *stdord_obj)
Read a network order integer value.
void s_SeqDB_QuickAssign(string &dst, const char *bp, const char *ep)
Higher Performance String Assignment.
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Defines exception class for WriteDB.
Code for database files construction.
Defines lmdb implementation of string-key database.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4