arg_window_size,
73 Uint4arg_window_step,
76 Uint4arg_cutoff_score,
79 Uint4arg_set_max_score,
80 Uint4arg_set_min_score,
82 Uint4arg_merge_cutoff_score,
83 Uint4arg_abs_merge_cutoff_dist,
84 Uint4arg_mean_merge_cutoff_dist,
85 Uint1arg_merge_unit_step,
86 const string& arg_trigger,
108 window_size( arg_window_size ), window_step( arg_window_step ),
109unit_step( arg_unit_step ),
110merge_pass( arg_merge_pass ),
111merge_cutoff_score( arg_merge_cutoff_score ),
112abs_merge_cutoff_dist( arg_abs_merge_cutoff_dist ),
113mean_merge_cutoff_dist( arg_mean_merge_cutoff_dist ),
114merge_unit_step( arg_merge_unit_step ),
115trigger( arg_trigger ==
"mean"? eTrigger_Mean
117discontig( arg_discontig ), pattern( arg_pattern )
121 if( window_size < ustat->UnitSize() ) {
122std::ostringstream os;
124 "must be greater or equal to unit size ("<<
141 if( arg_merge_pass )
179unique_ptr<CSeqMaskerWindow> window_ptr
191 Uint4start = 0, end = 0, cend = 0;
192 Uint4limit = textend;
200 Uint4ts = (*trigger_score)();
201 Uint4s = (*score)();
208 if( window.
Start() > cend )
211start = end = cend = 0;
215 if( od != 0 && od->
cba_!= 0 )
217adv = window.
Start();
219 if( !booster.
Check() )
225 else if( ts < cutoff_score )
229 if( window.
Start() > cend + 1 )
232start = end = cend = 0;
234 elsecend = window.
End();
241 if( window.
Start() > cend + 1 )
244start = window.
Start();
247 elsestart = window.
Start();
249cend = end = window.
End();
269 if(
mask->size() < 2 )
return mask.release();
272TMaskList::iterator jtmp =
mask->end();
275 for( TMaskList::iterator
i=
mask->begin(), j = --jtmp;
278masked.push_back(
mitem(
i->first,
i->second, unit_size,
280 Uint4nstart = (
i++)->second - unit_size + 2;
281unmasked.push_back(
mitem( nstart,
i->first + unit_size - 2,
282unit_size,
data, *
this) );
285masked.push_back(
mitem( (
mask->rbegin())->first,
286(
mask->rbegin())->second,
287unit_size,
data, *
this) );
291TMList::iterator ii = masked.begin();
292TMList::iterator j = unmasked.begin();
293TMList::iterator k = ii,
l= ii;
296 for( ; ii != masked.end(); k =
l= ii, --k, ++
l)
298 Uint4ldist = (ii != masked.begin())
299? ii->start - k->end - 1 : 0;
300TMList::iterator tmpend = masked.end();
302 Uint4rdist = (ii != tmpend)
303?
l->start - ii->end - 1 : 0;
304 doublelavg = 0.0, ravg = 0.0;
305 boolcan_go_left =
count&& ldist
307 boolcan_go_right = rdist
312TMList::iterator
tmp= j; --
tmp;
319ravg =
MergeAvg( ii, j, unit_size );
336k->avg =
MergeAvg( k, --j, unit_size );
338<< k->start <<
" - "<< k->end
340<< ii->start <<
" - "<< ii->end );
341 Merge( masked, k, unmasked, j );
359 else if( can_go_left )
362k->avg =
MergeAvg( k, --j, unit_size );
364<< k->start <<
" - "<< k->end
366<< ii->start <<
" - "<< ii->end );
367 Merge( masked, k, unmasked, j );
385 for( ii = masked.begin(), j = unmasked.begin(), k = ii++;
386ii != masked.end(); (k = ii++), j++ )
390 _TRACE(
"Unconditionally merging " 391<< k->start <<
" - "<< k->end
393<< ii->start <<
" - "<< ii->end );
394k->avg =
MergeAvg( k, j, unit_size );
395 Merge( masked, k, unmasked, j );
398 if( ++ii == masked.end() )
break;
404 for( TMList::const_iterator iii = masked.begin(); iii != masked.end(); ++iii )
408 return mask.release();
413 constTMList::iterator & umi,
414 Uint4unit_size )
const 416TMList::iterator
tmp= mi++;
421 doublea1 =
tmp->avg, a2 = umi->avg, a3 = mi->avg;
422 return(a1*n1 + a2*n2 + a3*n3)/
N;
427 TMList& um, TMList::iterator & umi )
const 429TMList::iterator
tmp= mi++;
432umi = um.erase( umi );
442 return "can not open input stream";
446 return "syntax error";
450 return "the following parameters could not be determined" 451 " from the unit frequency database or command line: ";
455 return "score function object allocation failed";
459 return "merge pass score function object allocation failed";
463 return "validation error";
474: start( arg_start ), end( arg_end ), avg( 0.0 )
496 while( window->
End() <
end)
513TMaskList::const_iterator
si( src->begin() );
514TMaskList::const_iterator send( src->end() );
515TMaskList::iterator di( dest->begin() );
516TMaskList::iterator dend( dest->end() );
521 if( di != dend && di->first <
si->first )
523 elseseg = *(
si++);
529 if(
si->first < di->first ) {
530next_seg = *(
si++);
535next_seg = *(
si++);
537}
else if( di != dend ) {
543 if( seg.second + 1 < next_seg.first ) {
544res.push_back( seg );
547 else if( seg.second < next_seg.second ) {
548seg.second = next_seg.second;
552res.push_back( seg );
ncbi::TMaskedQueryRegions mask
Interface to the bit array used to check if the score of a unit is below t_extend.
bool Check()
Check if the current state of the window and advance.
Factory class to generate an appropriate CSeqMaskerIstat derived class based on the format name.
CSeqMaskerWindow::TUnit AmbigUnit() const
Get the value of the unit used to represent an ambuguity.
Uint4 get_textend() const
Get the value of T_extend.
virtual Uint1 UnitSize() const =0
Get the unit size.
const optimization_data * get_optimization_data() const
Get the data structure optimization parameters.
Uint4 get_threshold() const
Get the value of T_threshold.
Average unit score form the start of the sequence to the end of current window.
Score function object computing mean of unit in a window.
The score function object that computes maxmin of k consecutive units in a window.
Abstract base class for score function objects.
virtual void PreAdvance(Uint4 step)=0
Window advancement notification.
void SetWindow(const CSeqMaskerWindow &new_window)
Set the window object that should be used for score computation.
virtual void PostAdvance(Uint4 step)=0
Window advancement notification.
static Uint1 BitCount(Uint4 mask, Uint1 bit_value=1)
Count the bits with given value in a given bit pattern.
Windows with units that may contain ambiguities.
Window iterator for discontiguous units used for the merging pass.
Window iterator used for discontiguous units.
Sliding window skipping over the ambiguities.
Uint4 Step() const
Get the current value of the window step.
Uint4 End() const
Get the current ending position of the window.
Uint4 Start() const
Get the current starting position of the window.
Uint4 TUnit
Integer type used to represent units within a window.
Represents different error situations that can occur in the masking process.
@ eValidation
Insconsistent internal parameters.
@ eLstatSyntax
Error parsing the length statistics file.
@ eLstatParam
Error deducing parameters from lstat or command line.
@ eScoreAllocFail
Error allocating the score function object.
@ eLstatStreamIpenFail
Error opening the length statistics file.
@ eScoreP3AllocFail
Error allocating the score function object for merging pass.
virtual const char * GetErrCodeString() const override
Get the exception description string.
Main interface to window based masker functionality.
void Merge(TMList &m, TMList::iterator mi, TMList &um, TMList::iterator &umi) const
~CSeqMasker()
Object destructor.
static void MergeMaskInfo(TMaskList *dest, const TMaskList *src)
Merge together two result lists.
@ eTrigger_Min
Using min score of k unit in the window.
CSeqMaskerScore * trigger_score
Uint4 abs_merge_cutoff_dist
CSeqMaskerScore * score_p3
pair< TSeqPos, TSeqPos > TMaskedInterval
Type representing a masked interval within a sequence.
Uint4 mean_merge_cutoff_dist
TMaskList * DoMask(const objects::CSeqVector &data, TSeqPos start, TSeqPos end) const
vector< TMaskedInterval > TMaskList
A type representing the total of masking information about a sequence.
TMaskList * operator()(const objects::CSeqVector &data) const
Sequence masking operator.
double MergeAvg(TMList::iterator mi, const TMList::iterator &umi, Uint4 unit_size) const
enum CSeqMasker::@32 trigger
static CSeqMaskerVersion AlgoVersion
Version of window masking algorithm.
CRef< CSeqMaskerIstat > ustat
CSeqMasker(const string &lstat_name, Uint1 arg_window_size, Uint4 arg_window_step, Uint1 arg_unit_step, Uint4 arg_textend, Uint4 arg_cutoff_score, Uint4 arg_max_score, Uint4 arg_min_score, Uint4 arg_set_max_score, Uint4 arg_set_min_score, bool arg_merge_pass, Uint4 arg_merge_cutoff_score, Uint4 arg_abs_merge_cutoff_dist, Uint4 arg_mean_merge_cutoff_dist, Uint1 arg_merge_unit_step, const string &arg_trigger, Uint1 tmin_count, bool arg_discontig, Uint4 arg_pattern, bool arg_use_ba, double min_pct=-1.0, double extend_pct=-1.0, double thres_pct=-1.0, double max_pct=-1.0)
Object constructor.
static const char si[8][64]
unsigned int TSeqPos
Type for sequence locations and lengths.
TErrCode GetErrCode(void) const
Get error code.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
unsigned int
A callback function used to compare two keys in a database.
#define WIN_MASK_ALGO_VER_MAJOR
#define WIN_MASK_ALGO_NAME
#define WIN_MASK_ALGO_VER_PATCH
#define WIN_MASK_ALGO_VER_MINOR
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure containing information about optimization parameters used.
Uint4 * cba_
Bit array with zeroes where all corresponding units have counts below t_extend.
Uint4 start
Start of the interval.
Uint4 end
End of the interval.
mitem(Uint4 start, Uint4 end, Uint1 unit_size, const objects::CSeqVector &data, const CSeqMasker &owner)
Object constructor.
double avg
Average score of the units in the interval.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4