(
void);
56 virtual int Run(
void);
57 virtual void Exit(
void);
85: kTargetOnly(
"target_only"),
86kMembershipBits(
"membership_bits"),
87kCopyOnly(
"copy_only")
110arg_desc->SetUsageContext(
112 "Performs a (deep) copy of a subset of a BLAST database" 115arg_desc->SetCurrentGroup(
"BLAST database options");
117arg_desc->AddDefaultKey(
120 "Source BLAST database name",
124arg_desc->AddDefaultKey(
127 "Molecule type stored in the source BLAST database",
130arg_desc->SetConstraint(
135arg_desc->SetCurrentGroup(
"Configuration options");
137arg_desc->AddOptionalKey(
kArgGiList,
"input_file",
138 "Text or binary gi file to restrict the source BLAST " 140 "If text format is provided, it will be converted " 145 "Text sequence id or accession file to restrict " 146 "the source BLAST database",
152arg_desc->AddOptionalKey(
155 "Title for the output BLAST database",
161 "Copy the membership bits",
167 "Copy only entries specified in GI file",
171arg_desc->AddOptionalKey(
174 "Membership bit by which copied entries are filtered",
180 "Do not copy GI data",
184arg_desc->AddOptionalKey(
"blastdb_version",
"version",
185 "Version of BLAST database to be created. Default is the same version as source BLAST database",
187arg_desc->SetConstraint(
"blastdb_version",
190 const stringkSwissprot(
"swissprot");
191 const stringkPdb(
"pdb");
192 const stringkRefseq(
"refseq");
198arg_desc->SetConstraint(
203arg_desc->SetCurrentGroup(
"Output options");
205arg_desc->AddOptionalKey(
208 "Name of the output BLAST database to be created",
212arg_desc->AddDefaultKey(
"max_file_sz",
"number_of_bytes",
213 "Maximum file size for the output BLAST database files",
229 boolcopy_membership_bits =
false,
230 boolcopy_leaf_taxids =
true 233 CStopWatchtotal_timer, bioseq_timer, memb_timer, leaf_timer;
234total_timer.
Start();
236 for(
int i= 0;
i< numSeqs;
i++) {
248bioseq_timer.
Start();
250 if(bs_nc.
Empty()) {
253bioseq_timer.
Stop();
259bioseq_timer.
Stop();
261 if(copy_membership_bits) {
262memb_timer.
Start();
267 if(bdl->CanGetMemberships() &&
268!bdl->GetMemberships().empty()) {
269 intmemb_bits = bdl->GetMemberships().front();
270 if(memb_bits == 0) {
274bdl->GetSeqid().front()->AsFastaString();
281 if(copy_leaf_taxids) {
282leaf_timer.
Start();
289bdl->GetSeqid().front()->AsFastaString();
297total_timer.
Stop();
300<<
" sequences from the source database" 303 Info<<
"Processed all input data in " 307 Info<<
"Processed bioseqs in " 311 Info<<
"Processed membership bits in " 315 Info<<
"Processed leaf taxids in " 346 returnblastdb->
GiToOid(elem.
gi, oid);
381 if(bs_nc.
Empty()) {
398vector<string> file_paths;
401 boolretval =
false;
402 const char* isam_extensions[] = {
"si",
"sd",
"ni",
"nd",
"os",
NULL};
404 ITERATE(vector<string>,
f, file_paths) {
405 for(
int i= 0; isam_extensions[
i] !=
NULL;
i++) {
407oss << *
f<<
"."<<
type<< isam_extensions[
i];
410 if(
file.Exists() &&
file.GetLength() > 0) {
419 ITERATE(vector<string>,
f, file_paths) {
420std::size_t found =
f->find_last_of(
".");
422oss <<
f->substr(0, found) <<
"."<<
type<<
"os";
425 if(
file.Exists() &&
file.GetLength() > 0) {
442vector<string> file_paths;
444 ITERATE(vector<string>,
f, file_paths) {
446oss << *
f<<
"."<<
"ppd";
449 if(
file.Exists() &&
file.GetLength() > 0)
467args[
kArgDb].AsString(),
473args[
kArgDb].AsString(),
480 stringmbitName = args[
kCopyOnly].AsString();
504 const boolkIsSparse =
false;
508 const boolkUseGiMask =
false;
510 if(args[
"blastdb_version"]) {
511dbver =
static_cast<EBlastDbVersion>(args[
"blastdb_version"].AsInteger());
518kIsSparse, kParseSeqids, kUseGiMask,
520? args[
"logfile"].AsOutputFile() : cerr),
false, dbver);
550 const boolkIsSparse =
false;
554 const boolkUseGiMask =
false;
556 if(args[
"blastdb_version"]) {
557dbver =
static_cast<EBlastDbVersion>(args[
"blastdb_version"].AsInteger());
562kIsSparse, kParseSeqids, kUseGiMask,
564? args[
"logfile"].AsOutputFile() : cerr),
false, dbver);
593 if(args[
"logfile"]) {
597time_t now = time(0);
598 LOG_POST(
Info<<
string(72,
'-') <<
"\n"<<
"NEW LOG - "<< ctime(&now) );
602 static const Uint8MAX_VOL_FILE_SIZE = 0x100000000;
603 if(bytes >= MAX_VOL_FILE_SIZE) {
624 ERR_POST(
Error<<
"Unknown error in BlastdbCopyApplication::Run()");
646 int main(
intargc,
const char* argv[])
Interface for converting sources of sequence data into blast sequence input.
int main(int argc, const char *argv[])
Code to build a database given various sources of sequence data.
virtual int Run(void)
Run the application.
~BlastdbCopyApplication()
void x_CopyDB(const CArgs &args, CSeqDB::ESeqType seq_type, Uint8 bytes)
CBlastUsageReport m_UsageReport
bool x_ShouldCopyPIGs(const string &dbname, CSeqDB::ESeqType seq_type) const
const string kMembershipBits
void x_MakeDBwIDList(const CArgs &args, CSeqDB::ESeqType seq_type, Uint8 bytes)
virtual void Exit(void)
Cleanup on application exit.
bool x_ShouldParseSeqIds(const string &dbname, CSeqDB::ESeqType seq_type) const
virtual void Init(void)
Initialize the application.
TMemBitMap m_MembershipMap
map< string, ICriteria::EMembershipBit > TMemBitMap
CRef< CSeqDBExpert > m_BlastDb
virtual CConstRef< CBioseq > GetNext()
Get a Bioseq object if there are any more to get.
CBlastDbAllBioseqSource(CRef< CSeqDBExpert > blastdb)
bool x_GetOidFromSeqID(CRef< CSeqDBExpert > blastdb, CRef< CSeqDBGiList > gilist, CSeqDBFileGiList::EIdType idtype, int ind, int &oid)
virtual CConstRef< CBioseq > GetNext()
Get a Bioseq object if there are any more to get.
const TIdToLeafs GetLeafTaxIds() const
CBlastDbBioseqSource(CRef< CSeqDBExpert > blastdb, CRef< CSeqDBGiList > gilist, CSeqDBFileGiList::EIdType idtype, bool copy_membership_bits=false, bool copy_leaf_taxids=true)
list< CConstRef< CBioseq > > TBioseqs
TLinkoutMap m_MembershipBits
const TLinkoutMap GetMembershipBits() const
void AddParam(EUsageParams p, int val)
Build BlastDB format databases from various data sources.
bool AddSequences(IBioseqSource &src, bool add_pig=false)
Add sequences from an IBioseqSource object.
void SetMembBits(const TLinkoutMap &membbits, bool keep_mbits)
Specify a membership bit lookup object.
void SetUseRemote(bool use_remote)
Specify whether to use remote fetching for locally absent IDs.
void SetSourceDb(const string &src_db_name)
Specify source database(s) via the database name(s).
void StartBuild()
Start building a new database.
bool EndBuild(bool erase=false)
Finish building a new database.
void SetMaxFileSize(Uint8 max_file_size)
Set the maximum size of database component files.
void SetSkipCopyingGis(bool v)
void SetLeafTaxIds(const TIdToLeafs &taxids, bool keep_taxids)
Specify a leaf-taxids object.
Defines user input exceptions.
Defines invalid user input exceptions.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
int GetNumGis() const
Get the number of GIs in the array.
const SGiOid & GetGiOid(int index) const
Access an element of the array.
int GetNumSis() const
Get the number of Seq-ids in the array.
const SSiOid & GetSiOid(int index) const
Access an element of the array.
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
const string & GetDBNameList() const
Get list of database names.
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
bool SeqidToOid(const CSeq_id &seqid, int &oid) const
Translate a Seq-id to any matching OID.
CRef< CBioseq > GetBioseq(int oid, TGi target_gi=ZERO_GI, const CSeq_id *target_seq_id=NULL) const
Get a CBioseq for a sequence.
void SetVolsMemBit(int mbit)
Set the membership of all volumes.
string GetTitle() const
Returns the database title.
bool CheckOrFindOID(int &next_oid) const
Find an included OID, incrementing next_oid if necessary.
EBlastDbVersion GetBlastDbVersion() const
Return blast db version.
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
bool GiToOid(TGi gi, int &oid) const
Translate a GI to an OID.
Interface to a source of Bioseq objects.
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator end() const
Constant declarations for command line arguments for BLAST programs.
const string kArgDbTitle
Title for the BLAST database.
const string kArgOutput
Output file name.
const string kArgDbType
BLAST database molecule type.
const string kArgSeqIdList
seqid list file name to restrict BLAST database
const string kArgDb
BLAST database name.
const string kArgGiList
gi list file name to restrict BLAST database
void Print(const CCompactSAMApplication::AlignInfo &ai)
This is the header file for defining and working with criteria functions.
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
CVersionInfo GetVersion(void) const
Get the program version information.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
bool Exist(const string &name) const
Check existence of argument description.
@ eExcludes
One argument excludes another.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eInteger
Convertible into an integer number (int or Int8)
void SetDiagPostFlag(EDiagPostFlag flag)
Set the specified flag (globally).
void SetDiagPostPrefix(const char *prefix)
Specify a string to prefix all subsequent error postings with.
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void SetDiagStream(CNcbiOstream *os, bool quick_flush=true, FDiagCleanup cleanup=0, void *cleanup_data=0, const string &stream_name="")
Set diagnostic stream.
@ eDPF_All
All flags (except for the "unusual" ones!)
@ eDiag_Info
Informational message.
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
void Reset(void)
Reset reference object.
bool Empty(void) const THROWS_NONE
Check if CRef is empty â not pointing to any object, which means having a null value.
uint64_t Uint8
8-byte (64-bit) unsigned integer
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
static Uint8 StringToUInt8_DataSize(const CTempString str, TStringToNumFlags flags=0)
Convert string that can contain "software" qualifiers to Uint8.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void Stop(void)
Suspend the timer.
string AsSmartString(CTimeSpan::ESmartStringPrecision precision, ERound rounding, CTimeSpan::ESmartStringZeroMode zero_mode=CTimeSpan::eSSZ_SkipZero) const
Transform elapsed time to "smart" string.
void Start(void)
Start the timer.
const Tdata & Get(void) const
Get the member data.
list< CRef< CBlast_def_line > > Tdata
char * dbname(DBPROCESS *dbproc)
Get name of current database.
const string version
version string
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
CSeqDB::ESeqType ParseMoleculeTypeString(const string &str)
Convert a string to a CSeqDB ESeqType object.
bool DeleteBlastDb(const string &dbpath, CSeqDB::ESeqType seq_type)
Deletes all files associated with a BLAST database.
EBlastDbVersion
BLAST database version.
Structure that holds GI,OID pairs.
TGi gi
The GI or 0 if unknown.
Structure that holds Seq-id,OID pairs.
string si
The String-id or "" if unknown.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4