;
68arg_desc->SetUsageContext(
69GetArguments().GetProgramBasename(), USAGE_LINE );
70arg_desc->AddOptionalKey(
71 "input",
"input_file_name",
"input file name",
73arg_desc->AddOptionalKey(
74 "output",
"output_file_name",
"output file name",
76arg_desc->AddDefaultKey(
77 "verbosity",
"reporting_level",
"how much to report",
79arg_desc->AddOptionalKey(
80 "iformat",
"input_format",
81 "type of input used (default is \"blastdb\" for new style index, " 82 "\"fasta\" for old style index)",
84arg_desc->AddDefaultKey(
85 "legacy",
"use_legacy_index_format",
86 "use legacy (0-terminated offset lists) dbindex format",
88arg_desc->AddDefaultKey(
89 "idmap",
"generate_idmap",
90 "generate id map for the sequences in the index",
92arg_desc->AddOptionalKey(
93 "db_mask",
"filtering_algorithm",
94 "use the specified filtering algorithm from BLAST DB",
98 "show the info about available database filtering algorithms" 101arg_desc->AddOptionalKey(
102 "nmer",
"nmer_size",
103 "length of the indexed words",
105arg_desc->AddOptionalKey(
106 "ws_hint",
"word_size_hint",
107 "most likely word size used in searches",
109arg_desc->AddOptionalKey(
110 "volsize",
"volume_size",
"size of an index volume in MB",
112arg_desc->AddOptionalKey(
113 "stat",
"statistics_file",
114 "write index statistics into file with that name " 115 "(for testing and debugging purposes only).",
117arg_desc->AddOptionalKey(
118 "stride",
"stride",
119 "distance between stored database positions",
121arg_desc->AddDefaultKey(
122 "old_style_index",
"boolean",
123 "Use old style index (deprecated)",
125arg_desc->SetConstraint(
128arg_desc->SetConstraint(
131arg_desc->SetConstraint(
134arg_desc->SetConstraint(
137arg_desc->SetConstraint(
140arg_desc->SetConstraint(
143arg_desc->SetDependency(
145arg_desc->SetDependency(
147SetupArgDescriptions( arg_desc.release() );
157 boolold_style(
GetArgs()[
"old_style_index"].AsBoolean() );
165 if(
GetArgs()[
"volsize"] ) {
180 if(
GetArgs()[
"stride"] ) {
183 "legacy index creation");
188 if(
GetArgs()[
"ws_hint"] ) {
191 "legacy index creation");
194 unsigned longws_hint =
GetArgs()[
"ws_hint"].AsInteger();
199 "to the minimum value of "<< ws_hint );
206 unsigned intvol_num = 0;
215 stringiformat(
GetArgs()[
"iformat"] ?
GetArgs()[
"iformat"].AsString()
216: old_style ?
"fasta":
"blastdb");
218 if( !old_style && iformat ==
"fasta") {
219 ERR_POST(
Error<<
"new style index requires input format 'blastdb'");
223 if( iformat ==
"fasta") {
224 if(
GetArgs()[
"db_mask"] ) {
231(
GetArgs()[
"input"].AsString() ) );
234}
else if( iformat ==
"blastdb") {
236 if(
GetArgs()[
"show_filters"] ) {
238 GetArgs()[
"input"].AsString() ) << endl;
243 if(
GetArgs()[
"db_mask"] ) {
245 GetArgs()[
"input"].AsString(),
true,
246 GetArgs()[
"db_mask"].AsString() );
250 GetArgs()[
"input"].AsString(),
false, 0 );
255 ERR_POST(
Error<<
"input format 'blastdb' requires -input option");
262 if( iformat !=
"blastdb"&&
264 GetArgs()[
"db_mask"].AsString() !=
"") {
265 ERR_POST(
Error<<
"option 'db_mask' requires input format 'blastdb'");
269 if( !old_style && iformat ==
"blastdb") {
270 if(
GetArgs()[
"output"] ) {
272 "option 'output' is ignored for new style indices");
275 typedefstd::vector< std::string > TStrVec;
285 boolenable_mask(
GetArgs()[
"db_mask"] );
286 stringfilter( enable_mask ?
GetArgs()[
"db_mask"].AsString() :
"");
288 ITERATE( TStrVec, dbvi, db_vols ) {
292 Uint4vol_num_seq( 0 );
299 Uint4num_seq( 0 ), num_vol( 0 );
311os << dbv_name <<
"."<< setfill(
'0') << setw( 2 )
312<< vol_num++ <<
".idx";
313cerr <<
"creating "<< os.str() <<
"..."<< flush;
315*seqstream, os.str(), start, stop, options );
316num_seq += (stop - start);
318 if( start == stop ) cerr <<
"removed (empty)"<< endl;
321cerr <<
"done"<< endl;
323 "generated index volume with OIDs: "<<
324start <<
"--"<< stop );
327 while( start != stop );
329 if( num_seq != vol_num_seq ) {
331 "number of sequence reported by BLAST database" 332 " volume ("<< vol_num_seq <<
") is not the same" 333 " as in the index ("<< num_seq <<
")");
340shdr.Save( dbv_name +
".shd");
342 "index generated for BLAST database volume "<<
343dbv_name <<
" with "<< num_seq <<
" sequences");
350 Uint4num_seq( 0 ), num_vol( 0 );
352 GetArgs()[
"show_filters"] ?
"":
GetArgs()[
"output"].AsString();
358os << ofname_base <<
"."<< setfill(
'0') << setw( 2 )
359<< vol_num++ <<
".idx";
360cerr <<
"creating "<< os.str() <<
"..."<< flush;
363os.str(), start, stop, options );
364num_seq += (stop - start);
366 if( start == stop ) cerr <<
"removed (empty)"<< endl;
367 else{ ++num_vol; cerr <<
"done"<< endl; }
368}
while( start != stop );
374shdr.Save( ofname_base +
".shd");
static SOptions DefaultSOptions()
Creates an SOptions instance initialized with default values.
static void MakeIndex(const std::string &fname, const std::string &oname, TSeqNum start, TSeqNum start_chunk, TSeqNum &stop, TSeqNum &stop_chunk, const SOptions &options)
Create an index object.
CSequenceIStream::TStreamPos TSeqNum
Type used to enumerate sequences in the index.
static const char *const USAGE_LINE
String containing program usage information.
virtual int Run()
Application main procedure.
virtual void Init()
Application initialization.
static void FindVolumePaths(const string &dbname, ESeqType seqtype, vector< string > &paths, vector< string > *alias_paths=NULL, bool recursive=true, bool expand_links=true)
Find volume paths.
int GetNumOIDs() const
Returns the size of the (possibly sparse) OID range.
Sequence stream that reads BLAST nucleotide databases.
static string ShowSupportedFilters(const string &dbname)
Report on supported subject filter algorithms.
Sequence stream for reading FASTA formatted files.
Class used to abstract reading nucleotide sequences from various sources.
const unsigned long REPORT_QUIET
No progress reporting.
const unsigned long REPORT_VERBOSE
Verbose reporting.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
@ eRequires
One argument requires another.
@ eExcludes
One argument excludes another.
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
@ eString
An arbitrary string.
@ eInteger
Convertible into an integer number (int or Int8)
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
@ eDiag_Warning
Warning message.
void Error(CExceptionArgs_Base &args)
void Warning(CExceptionArgs_Base &args)
void Info(CExceptionArgs_Base &args)
uint32_t Uint4
4-byte (32-bit) unsigned integer
USING_SCOPE(blastdbindex)
#define ASSERT
macro for assert.
#define GetArgs
Avoid preprocessor name clash with the NCBI C Toolkit.
Simple record type used to specify index creation parameters.
bool legacy
Indicator of the legacy index format.
unsigned long report_level
Verbose index creation.
unsigned long max_index_size
Maximum index size in megabytes.
std::string stat_file_name
File to write index statistics into.
unsigned long ws_hint
Most likely word size to use for searches.
bool idmap
Indicator of the index map creation.
unsigned long hkey_width
Width of the hash key in bits.
unsigned long stride
Stride to use for stored database locations.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4