,
62arg_desc.
AddKey(
"ustat",
"unit_counts",
63 "file with unit counts",
68 "(not optional if used with -mk_counts or -convert options)",
75 "check for duplicate sequences",
79 "indicates that -input represents a file containing " 80 "a list of names of fasta files to process, one name " 84 "memory available for mk_counts option in megabytes",
87 "add metadata to the counts file",
90 "number of bases in a unit",
93 "total size of the genome",
99arg_desc.
AddOptionalKey(
"window",
"window_size",
"window size",
102 "window score above which it is allowed to extend masking",
105 "window score threshold used to trigger masking",
108 "alternative high score for a unit if the" 109 "original unit score is more than highscore",
112 "alternative low score for a unit if the" 113 "original unit score is lower than lowscore",
125arg_desc.
AddFlag(
"parse_seqids",
126 "Parse Seq-ids in FASTA input",
true);
128 "controls the format of the masker output (for masking stage only)",
138 "maximum useful unit score",
141 "maximum useful unit score as percentage",
144 "window score threshold used to trigger masking as percentage",
147 "window score above which it is allowed to extend masking as percentage",
150 "minimum useful unit score",
153 "minimum useful unit score as percentage",
168 "controls the format of the masker input",
170arg_desc.
AddDefaultKey(
"exclude_ids",
"exclude_id_list",
171 "file containing the list of ids to exclude from processing",
174 "file containing the list of ids to process",
177 "match ids as strings",
183strings_allowed->
Allow(
"seqids");
188 "controls the format of the output file containing the unit counts " 189 "(for counts generation and conversion only)",
196->Allow(
"obinary") );
198 "target size of the output file containing the unit counts",
203 "combine window masking with dusting",
206 "dust minimum level",
211arg_desc.
AddFlag(
"mk_counts",
"generate frequency counts for a database");
212arg_desc.
AddFlag(
"convert",
"convert counts between different formats");
264 if(args[
"mk_counts"])
266 else if(args[
"convert"])
268 else if(args[
"ustat"])
272 "one of '-mk_counts', '-convert' or '-ustat <stat_file>' " 273 "must be specified");
288 if(
format==
"interval") {
291}
else if(
format==
"fasta") {
313 throwruntime_error(
"Unknown output format");
320: app_type(s_DetermineAppType(args,
type)),
321is( app_type >= eGenerateMasks && args[
kInputFormat].AsString() !=
"blastdb" 323( !(args[
kInput].AsString() ==
"-")
326lstat_name( app_type >= eGenerateMasks ? args[
"ustat"].AsString() :
""),
327t_low_pct( app_type != eConvertCounts && args[
"t_low_pct"] ? args[
"t_low_pct"].AsDouble() : -1.0 ),
328t_extend_pct( app_type != eConvertCounts && args[
"t_extend_pct"] ? args[
"t_extend_pct"].AsDouble() : -1.0 ),
329t_thres_pct( app_type != eConvertCounts && args[
"t_thres_pct"] ? args[
"t_thres_pct"].AsDouble() : -1.0 ),
330t_high_pct( app_type != eConvertCounts && args[
"t_high_pct"] ? args[
"t_high_pct"].AsDouble() : -1.0 ),
331textend( app_type >= eGenerateMasks && args[
"t_extend"] ? args[
"t_extend"].AsInteger() : 0 ),
332cutoff_score( app_type >= eGenerateMasks && args[
"t_thres"] ? args[
"t_thres"].AsInteger() : 0 ),
333max_score( app_type != eConvertCounts && args[
"t_high"] ? args[
"t_high"].AsInteger() : 0 ),
334min_score( app_type != eConvertCounts && args[
"t_low"] ? args[
"t_low"].AsInteger() : 0 ),
335 window_size( app_type >= eGenerateMasks && args[
"window"] ? args[
"window"].AsInteger() : 0 ),
337merge_cutoff_score( 50 ),
338abs_merge_cutoff_dist( 8 ),
339mean_merge_cutoff_dist( 50 ),
346merge_unit_step( 1 ),
347fa_list( app_type == eComputeCounts && determine_input ? args[
"fa_list"].AsBoolean() :
false),
348mem( app_type == eComputeCounts ? args[
"mem"].AsInteger() : 0 ),
349unit_size( app_type == eComputeCounts && args[
"unit"] ? args[
"unit"].AsInteger() : 0 ),
350genome_size( app_type == eComputeCounts && args[
"genome_size"] ? args[
"genome_size"].AsInt8() : 0 ),
351 input( determine_input ? args[
kInput].AsString() :
""),
353 th(
"90,99,99.5,99.8"),
355dust_level( app_type == eGenerateMasksWithDuster ? args[
"dust_level"].AsInteger() : 0 ),
357checkdup( app_type == eComputeCounts ? args[
"checkdup"].AsBoolean() :
false),
358sformat( app_type < eGenerateMasks ? args[
"sformat"].AsString() :
""),
359smem( app_type < eGenerateMasks ? args[
"smem"].AsInteger() : 0 ),
360ids( 0 ), exclude_ids( 0 ),
361use_ba( app_type != eConvertCounts ),
362text_match( app_type != eConvertCounts && args[
"text_match"].AsBoolean() )
364 if(args.
Exist(
"meta") && args[
"meta"]) {
365 metadata= args[
"meta"].AsString();
367 _TRACE(
"Entering CWinMaskConfig::CWinMaskConfig()");
381args[
kInput].AsString() );
384 if(determine_input &&
iformatstr!=
"seqids"){
393eReaderAllocFail,
"");
399 set_max_score= args[
"set_t_high"] ? args[
"set_t_high"].AsInteger()
401 set_min_score= args[
"set_t_low"] ? args[
"set_t_low"].AsInteger()
405 stringids_file_name( args[
"ids"].AsString() );
406 stringexclude_ids_file_name( args[
"exclude_ids"].AsString() );
408 if( !ids_file_name.empty()
409&& !exclude_ids_file_name.empty() )
412 "only one of -ids or -exclude_ids can be specified");
415 if( !ids_file_name.empty() ) {
423 "-text_match false can be used only with " 430 if( !exclude_ids_file_name.empty() ) {
438 "-text_match false can be used only with " 445 _TRACE(
"Leaving CWinMaskConfig::CWinMaskConfig");
461 "User options caused reader not to be created; can't get reader");
476string::size_type stop( line.find_first_of(
" \t") );
477string::size_type start( line[0] ==
'>'? 1 : 0 );
478 stringid_str = line.substr( start, stop - start );
479id_list.
insert( id_str );
491 return "can not open input stream";
495 return "can not allocate fasta sequence reader";
499 return "inconsistent program options";
Class for reading sequences from BLAST databases.
Class for reading sequences from fasta files.
Virtual base class for all input readers.
Output filter to print masked sequence locations as Blast-db-mask-info objects.
Output filter to write masked data in fasta format.
Output filter to print masked sequences as sets of intervals.
Output filter to print masked sequence locations as NCBI Seq-loc objects.
A base class for winmasker output writers.
Winmasker configuration errors.
virtual const char * GetErrCodeString() const override
Get the description of an error.
@ eInconsistentOptions
Option validation failure.
@ eInputOpenFail
Can not open input file.
@ eReaderAllocFail
Memory allocation for input reader object failed.
string iformatstr
input format
static void FillIdList(const string &file_name, CIdSet &id_list)
Read the list of sequence ids from a given file.
CMaskWriter * writer
output writer object
CMaskReader * reader
input reader object
@ eGenerateMasksWithDuster
CWinMaskConfig(const CArgs &args, EAppType type=eAny, bool determine_input=true)
Object constructor.
CIstreamProxy is
input file resource manager
CMaskWriter * x_GetWriter(const CArgs &args)
Create the CMaskWriter instance for this class.
string output
output file name (may be empty to indicate stdout)
CIdSet * exclude_ids
set of ids to exclude from processing
CWinMaskUtil::CIdSet_TextMatch CIdSet_TextMatch
EAppType app_type
type of application to run
CMaskReader & Reader()
Get the input reader object.
Uint4 set_max_score
score to use for high scoring units
string metadata
metadata associated with counts file
~CWinMaskConfig()
Destructor.
CIdSet * ids
set of ids to process
bool text_match
identify seq ids by string matching
Uint4 set_min_score
score to use for low scoring units
static void AddWinMaskArgs(CArgDescriptions &arg_desc, EAppType type=eAny, bool determine_input=true)
CWinMaskUtil::CIdSet_SeqId CIdSet_SeqId
static EAppType s_DetermineAppType(const CArgs &args, EAppType user_specified_type)
Base class for sets of seq_id representations used with -ids and -exclude-ids options.
virtual void insert(const string &id_str)=0
Add a string to the id set.
static SQLCHAR output[256]
void AddFlag(const string &name, const string &comment, CBoolEnum< EFlagValue > set_value=eFlagHasValueIfSet, TFlags flags=0)
Add description for flag argument.
void SetConstraint(const string &name, const CArgAllow *constraint, EConstraintNegate negate=eConstraint)
Set additional user defined constraint on argument value.
bool Exist(const string &name) const
Check existence of argument description.
void AddKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for mandatory key.
void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for optional key without default value.
CArgAllow_Strings * Allow(const string &value)
Add allowed string values.
void SetCurrentGroup(const string &group)
Set current arguments group name.
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
@ eExcludes
One argument excludes another.
@ eInputFile
Name of file (must exist and be readable)
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
@ eDouble
Convertible into a floating point number (double)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
@ fBinary
Open file in binary mode.
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
@ eBlast_filter_program_windowmasker
The blob sat and sat key Both must be positive integers</td > n< td > Non empty string The interpretation of the blob id depends on a processor Cassandra n processor expects the following format
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n th
Contains the command line options common to filtering algorithms.
const size_t kNumInputFormats
Number of elements in kInputFormats.
const char * kOutputFormats[]
Output formats allowed, the first one is the default.
const size_t kNumOutputFormats
Number of elements in kOutputFormats.
const std::string kOutput
Command line flag to specify the output.
const std::string kOutputFormat
Command line flag to specify the output format.
const char * kInputFormats[]
Input formats allowed, the first one is the default.
const std::string kInput
Command line flag to specify the input.
const std::string kInputFormat
Command line flag to specify the input format.
string BuildAlgorithmParametersString(const CArgs &args)
Builds an algorithm options string for the filtering applications (segmasker, dustmasker) by examinin...
NCBI C++ auxiliary debug macros.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4