* input_fname =
NULL,
85 const string* output_fname =
NULL)
const;
127 "This application has three modes of operation:\n\n" 128 "1) GI file conversion:\n" 129 " Converts a text file containing GIs (one per line) to a more efficient\n" 130 " binary format. This can be provided as an argument to the -gilist option\n" 131 " of the BLAST search command line binaries or to the -gilist option of\n" 132 " this program to create an alias file for a BLAST database (see below).\n\n" 133 "2) Alias file creation (restricting with GI List or Sequence ID List):\n" 134 " Creates an alias for a BLAST database and a GI or ID list which\n" 135 " restricts this database. This is useful if one often searches a subset\n" 136 " of a database (e.g., based on organism or a curated list). The alias\n" 137 " file makes the search appear as if one were searching a regular BLAST\n" 138 " database rather than the subset of one.\n\n" 139 "3) Alias file creation (aggregating BLAST databases):\n" 140 " Creates an alias for multiple BLAST databases. All databases must be of\n" 141 " the same molecule type (no validation is done). The relevant options are\n" 142 " -dblist and -num_volumes.\n";
152arg_desc->SetUsageContext(
GetArguments().GetProgramBasename(),
153 "Application to create BLAST database aliases, version " 156 stringdflt(
"Default = input file name provided to -gi_file_in argument");
157dflt +=
" with the .bgl extension";
161 "dblist",
"num_volumes",
"vdblist",
"seqid_file_in",
"seqid_file_out",
162 "seqid_db",
"seqid_dbtype",
"seqid_file_info" 165arg_desc->SetCurrentGroup(
"GI file conversion options");
167arg_desc->AddOptionalKey(
"gi_file_in",
"input_file",
168 "Text file to convert, should contain one GI per line",
170 for(
stringexclusion : exclusions) {
174arg_desc->AddOptionalKey(
"gi_file_out",
"output_file",
175 "File name of converted GI file\n"+ dflt,
180 for(
stringexclusion : exclusions) {
184arg_desc->SetCurrentGroup(
"Alias file creation options");
186arg_desc->AddOptionalKey(
kArgDb,
"dbname",
"BLAST database name",
190arg_desc->AddDefaultKey(
kArgDbType,
"molecule_type",
191 "Molecule type stored in BLAST database",
196arg_desc->AddOptionalKey(
kArgDbTitle,
"database_title",
197 "Title for BLAST database\n" 198 "Default = name of BLAST database provided to -db" 199 " argument with the -gifile argument appended to it",
203arg_desc->AddOptionalKey(
kArgGiList,
"input_file",
204 "Text or binary gi file to restrict the BLAST " 205 "database provided in -db argument\n" 206 "If text format is provided, it will be converted " 212 "Text sequence id or accession file to restrict " 213 "the BLAST database provided in -db argument",
220 "Text taxonomy id file to restrict " 221 "the BLAST database provided in -db argument",
228arg_desc->AddOptionalKey(
"oid_masks",
"oid_masks",
229 "Create alias db with pre-built oid masks\n" 238arg_desc->AddFlag(
"process_as_tis",
239 "Process all numeric ID lists as TIs instead of GIs",
true);
242arg_desc->AddOptionalKey(
kOutput,
"database_name",
243 "Name of BLAST database alias to be created",
246arg_desc->AddOptionalKey(
"dblist",
"database_names",
247 "A space separated list of BLAST database names to" 251arg_desc->AddOptionalKey(
"dblist_file",
"file_name",
252 "A file containing a list of BLAST database names" 253 " to aggregate, one per line",
257arg_desc->AddOptionalKey(
"vdblist",
"vdb_names",
258 "A space separated list of VDB names to aggregate",
261arg_desc->AddOptionalKey(
"vdblist_file",
"file_name",
262 "A file containing a list of vdb names" 263 " to aggregate, one per line",
265 const char*
key[] = {
"dblist",
"dblist_file",
"vdblist",
"vdblist_file"};
266 for(
size_t i= 0;
i<
sizeof(
key)/
sizeof(*
key);
i++) {
277 msg<<
"Number of volumes to aggregate, in which case the " 278<<
"basename for the database is extracted from the " 280arg_desc->AddOptionalKey(
"num_volumes",
"positive_integer",
291 stringdflt_seqid(
"Default = input file name provided to -seqid_file_in argument");
294 "dblist",
"num_volumes",
"vdblist" 299arg_desc->SetCurrentGroup(
"Seqd ID file conversion options");
301arg_desc->AddOptionalKey(
"seqid_file_in",
"input_file",
302 "Text file to convert, should contain one seq id per line",
304 for(
stringexclusion : seqid_exclusions) {
308arg_desc->AddOptionalKey(
"seqid_title",
"seqid_title",
"Title for seqid list.\n "+
312arg_desc->AddOptionalKey(
"seqid_file_out",
"output_file",
313 "File name of converted seq id file\n"+ dflt_seqid +
" with the .bsl extension",
316arg_desc->AddOptionalKey(
"seqid_db",
"dbname",
"BLAST database for seqidlist",
320arg_desc->AddOptionalKey(
"seqid_dbtype",
"molecule_type",
"Molecule type BLAST database",
324arg_desc->SetConstraint(
"seqid_dbtype", &(*
new CArgAllow_Strings,
"nucl",
"prot"));
326 for(
stringexclusion : seqid_exclusions) {
332 "dblist",
"num_volumes",
"vdblist",
"seqid_file_in",
"seqid_file_out" 336arg_desc->AddOptionalKey(
"seqid_file_info",
"seqid_file_info",
"Display seqidlist file info",
CArgDescriptions::eString);
337 for(
stringexclusion : seqid_info_exclusions) {
348 const string* input_fname
,
349 const string* output_fname
)
const 353 stringproduct(
"GI");
354 if(args.
Exist(
"process_as_tis") && args[
"process_as_tis"]) {
356product.assign(
"TI");
360 unsigned intline_ctr = 0;
365 if( !line.empty() ) {
370<<
": "<< e.GetMsg());
376 if(input_fname && output_fname) {
377 LOG_POST(
"Converted "<< builder.
Size() <<
" "<< product <<
"s from " 378<< *input_fname <<
" to binary format in "<< *output_fname);
380 LOG_POST(
"Converted "<< builder.
Size() <<
" "<< product <<
"s into " 381<<
"binary "<< product <<
" file");
391 boolisTiList =
false;
392 if(args.
Exist(
"process_as_tis") && args[
"process_as_tis"]) {
398!args[
"oid_masks"].
HasValue()) {
401 "or oid_masks must be specified if database name is used");
412title = orig_db_name +
" limited by ";
424title +=
"exclude model oid masks";
434 if( !gilist.empty() ) {
435 if( !
CFile(gilist).Exists() ) {
440 const charmol_type = args[
kArgDbType].AsString()[0];
441 _ASSERT(mol_type ==
'p'|| mol_type ==
'n');
443oss << args[
kOutput].AsString() <<
"."<< mol_type <<
444(isTiList ?
".btl":
".gil");
446 const string& ifname = args[
kArgGiList].AsString();
447ifstream
input(ifname.c_str());
448ofstream
output(gilist.c_str(), std::ios::binary);
453 if(args[
"dblist"].
HasValue()) {
457 CSeqDBExpertoriginal_db(args[
"dblist"].AsString(), db_seqtype);
461 if(args[
"dblist"].
HasValue() || args[
"dblist_file"].HasValue()) {
464seq_type, gilist, title, alias_type);
465}
else if(args[
"num_volumes"].
HasValue()) {
466 const unsigned intnum_vols =
467 static_cast<unsigned int>(args[
"num_volumes"].AsInteger());
472args[
kArgDb].AsString(),
477 if( !seqid_list.empty() ) {
478 if( !
CFile(seqid_list).Exists() ) {
483args[
kArgDb].AsString(),
484seq_type, seqid_list,
488 if( !taxid_list.empty() ) {
489 if( !
CFile(taxid_list).Exists() ) {
494args[
kArgDb].AsString(),
495seq_type, taxid_list,
499seq_type, args[
"oid_masks"].AsInteger(), title);
502 if(args[
"vdblist"].
HasValue() || args[
"vdblist_file"].
HasValue()) {
512title = args[
"title"].AsString();
529op_mode |= IOS_BASE::app;
538alias_file <<
"TITLE "<< title <<
"\n";
541alias_file <<
"VDBLIST ";
542 ITERATE(vector< string >, iter, vdbs) {
543alias_file <<
"\""<< *iter <<
"\" ";
550vector<string> retval;
553 const stringdblist = args[dbs].AsString();
558 while(getline(
in, line)) {
563retval.push_back(line);
579 if(args[
"seqid_file_out"].
HasValue()) {
580out_filename = args[
"seqid_file_out"].AsString();
583out_filename = args[
"seqid_file_in"].AsString() +
".bsl";
586 if(args[
"seqid_title"].
HasValue()) {
587title = args[
"seqid_title"].AsString();
594 unsigned intline_ctr = 0;
595vector<string> seqid_list;
600 if( !line.empty() ) {
602seqid_list.push_back(line);
606 if(args[
"seqid_db"].
HasValue()) {
608 if(args[
"seqid_dbtype"].
HasValue()) {
611 CSeqDBseqdb(args[
"seqid_db"].AsString(),
type);
638 if(args[
"gi_file_out"].
HasValue()) {
639gi_file_out = args[
"gi_file_out"].AsString();
641gi_file_out = args[
"gi_file_in"].AsString();
642gi_file_out +=
".bgl";
649 if(!
CFile(gi_file_out).Exists()) {
670 if(args[
"gi_file_in"].
HasValue()) {
673 else if(args[
"seqid_file_in"].
HasValue()) {
676 else if(args[
"seqid_file_info"].
HasValue()) {
693 if(args[
"vdblist"].
HasValue() || args[
"vdblist_file"].
HasValue()) {
703 #ifndef SKIP_DOXYGEN_PROCESSING 704 int main(
intargc,
const char* argv[]
)
Declares singleton objects to store the version and reference for the BLAST engine.
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
Interface for converting sources of sequence data into blast sequence input.
Class to constrain the values of an argument to those greater than or equal to the value specified in...
Binary GI or TI List Builder.
void Write(const string &fname)
Write the list to a file.
size_type Size() const
Returns the number of IDs stored in an instance of this class.
EIdType
Identifier types.
void AppendId(const Int8 &id)
Add an identifier to the list.
The main application class.
CBlastDBAliasApp()
@inheritDoc
void x_SeqIDFileInfo() const
void x_AddVDBsToAliasFile(string filename, bool append, string title=kEmptyStr) const
virtual int Run()
@inheritDoc
int x_ConvertSeqIDFile() const
vector< string > x_GetDbsToAggregate(const string dbs, const string file) const
EOperationMode
Describes the modes of operation of this application.
@ eConvertGiFile
Convert gi files from text to binary format.
@ eConvertSeqIDFile
Convert text seqidlist files from proprietory binary format.
@ eSeqIDFileInfo
Display info about seqidlist file.
@ eCreateAlias
Create alias files.
EOperationMode x_GetOperationMode() const
Determine what mode of operation is being used.
CBlastUsageReport m_UsageReport
void CreateAliasFile() const
Invokes function to create an alias file with the arguments provided on the command line.
static const char *const DOCUMENTATION
Documentation for this program.
int ConvertGiFile(CNcbiIstream &input, CNcbiOstream &output, const string *input_fname=NULL, const string *output_fname=NULL) const
Converts gi files from binary to text format.
virtual void Init()
@inheritDoc
static void PrintSeqidlistInfo(const string &filename, CNcbiOstream &os)
void AddParam(EUsageParams p, int val)
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
Defines user input exceptions.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CSeqDB_Substring FindFileName() const
Returns the portion of this path containing the file name.
CSeqDB_Substring FindBaseName() const
Returns the portion of this path containing the base name.
void GetString(string &s) const
Return the data by assigning it to a string.
ESeqType
Sequence types (eUnknown tries protein, then nucleotide).
@ eProtein
Protein database.
@ eNucleotide
Nucleotide database.
const string kArgDbTitle
Title for the BLAST database.
const string kArgOutput
Output file name.
const string kArgDbType
BLAST database molecule type.
const string kArgTaxIdListFile
Argument to specify file with taxonomy ids for filtering.
const string kArgSeqIdList
seqid list file name to restrict BLAST database
const string kArgDb
BLAST database name.
const string kArgGiList
gi list file name to restrict BLAST database
void Print(const CCompactSAMApplication::AlignInfo &ai)
std::ofstream out("events_result.xml")
main entry point for tests
static void DLIST_NAME() append(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static SQLCHAR output[256]
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
CVersionInfo GetVersion(void) const
Get the program version information.
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
bool Exist(const string &name) const
Check existence of argument description.
@ fBinary
Open as binary file; for eInputFile, eOutputFile, eIOFile.
@ fPreOpen
Open file right away; for eInputFile, eOutputFile, eIOFile.
@ eRequires
One argument requires another.
@ eExcludes
One argument excludes another.
@ eInputFile
Name of file (must exist and be readable)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ofstream CNcbiOfstream
Portable alias for ofstream.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
static Int8 StringToInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to Int8.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
string AsString(const CTimeFormat &format=kEmptyStr, TSeconds out_tz=eCurrentTimeZone) const
Transform time to string.
void Start(void)
Start the timer.
@ eCurrent
Use current time. See also CCurrentTime.
const string version
version string
const struct ncbi::grid::netcache::search::fields::KEY key
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
std::istream & in(std::istream &in_, double &x_)
Defines exception class and several constants for SeqDB.
bool SeqDB_IsBinaryTiList(const string &fname)
Returns true if the file name passed contains a binary TI list.
bool SeqDB_IsBinaryGiList(const string &fname)
Read a text or binary SeqId list from a file.
Defines `expert' version of CSeqDB interfaces.
This file defines several SeqDB utility functions related to byte order and file system portability.
int WriteBlastSeqidlistFile(const vector< string > &idlist, CNcbiOstream &os, const string &title, const CSeqDB *seqdb=NULL)
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Defines BLAST database construction classes.
void CWriteDB_CreateAliasFile(const string &file_name, const string &db_name, CWriteDB::ESeqType seq_type, const string &gi_file_name, const string &title=string(), EAliasFileFilterType alias_type=eGiList)
Writes an alias file that restricts a database with a gi list.
void CWriteDB_CreateOidMaskDB(const string &input_db, const string &output_db, CWriteDB::ESeqType seq_type, int oid_mask_type, const string &title=string())
EAliasFileFilterType
Defines the possible filtering types that can be applied to an alias file.
@ eTiList
Filter a BLAST database via TIs (Trace IDs)
@ eSeqIdList
Filter a BLAST database via a Seq-id list.
@ eTaxIdList
Filter a BLAST database via Taxonomy Id list.
@ eGiList
Filter a BLAST database via GIs.
Defines exception class for WriteDB.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4