A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/win__mask__gen__counts_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/winmask/win_mask_gen_counts.cpp Source File

66  case 'a'

:

case 'A'

:

return

0;

67  case 'c'

:

case 'C'

:

return

1;

68  case 'g'

:

case 'G'

:

return

2;

69  case 't'

:

case 'T'

:

return

3;

75 static inline bool ambig

(

char

c )

77  return

c !=

'a'

&& c !=

'A'

&& c !=

'c'

&& c !=

'C' 78

&& c !=

'g'

&& c !=

'G'

&& c !=

't'

&& c !=

'T'

;

87  if

( bioseq.CanGetInst()

88

&& bioseq.GetInst().CanGetLength()

89

&& bioseq.GetInst().CanGetSeq_data() )

92  const CSeq_data

& seqdata( bioseq.GetInst().GetSeq_data() );

93

unique_ptr< CSeq_data > dest(

new CSeq_data

);

96  return

dest->GetIupacna().Get();

125  const string

& arg_input,

127  const string

& infmt_arg,

128  const string

& sformat,

129  const string

& arg_th,

132  Uint8

arg_genome_size,

135  bool

arg_check_duplicates,

139  bool

use_ba,

string const

& metadata,

140  double

min_pct,

double

extend_pct,

double

thres_pct,

double

max_pct )

141

:

input

( arg_input ),

143

sformat, os, use_ba, metadata ) ),

144

max_mem( mem_avail*1024*1024ULL ), unit_size( arg_unit_size ),

145

genome_size( arg_genome_size ),

146

min_count( arg_min_count == 0 ? 1 : arg_min_count ),

149

t_high( arg_max_count ),

150

has_min_count( arg_min_count != 0 ),

151

no_extra_pass( arg_min_count != 0 && arg_max_count != 0 ),

152

check_duplicates( arg_check_duplicates ),use_list( arg_use_list ),

154

score_counts( max_count, 0 ),

155

ids( arg_ids ), exclude_ids( arg_exclude_ids ),

159

string::size_type pos( 0 );

162  while

( pos != string::npos &&

count

< 4 )

164

string::size_type newpos = arg_th.find_first_of(

","

, pos );

165  th

[

count

++] = atof( arg_th.substr( pos, newpos - pos ).c_str() );

166

pos = (newpos == string::npos ) ? newpos : newpos + 1;

172  const string

& arg_input,

174  const string

& infmt_arg,

175  const string

& sformat,

176  const string

& arg_th,

179  Uint8

arg_genome_size,

182  bool

arg_check_duplicates,

186  bool

use_ba,

string const

& metadata,

187  double

min_pct,

double

extend_pct,

double

thres_pct,

double

max_pct )

188

:

input

( arg_input ),

190

sformat,

output

, use_ba, metadata ) ),

191

max_mem( mem_avail*1024*1024ULL ), unit_size( arg_unit_size ),

192

genome_size( arg_genome_size ),

193

min_count( arg_min_count == 0 ? 1 : arg_min_count ),

196

t_high( arg_max_count ),

197

has_min_count( arg_min_count != 0 ),

198

no_extra_pass( arg_min_count != 0 && arg_max_count != 0 ),

199

check_duplicates( arg_check_duplicates ),use_list( arg_use_list ),

201

score_counts( max_count, 0 ),

202

ids( arg_ids ), exclude_ids( arg_exclude_ids ),

206

string::size_type pos( 0 );

209  while

( pos != string::npos &&

count

< 4 )

211

string::size_type newpos = arg_th.find_first_of(

","

, pos );

212  th

[

count

++] = atof( arg_th.substr( pos, newpos - pos ).c_str() );

213

pos = (newpos == string::npos ) ? newpos : newpos + 1;

216  if

( min_pct >= 0.0 )

th

[0] = min_pct;

217  if

( extend_pct >= 0.0 )

th

[1] = extend_pct;

218  if

( thres_pct >= 0.0 )

th

[2] = thres_pct;

219  if

( max_pct >= 0.0 )

th

[3] = max_pct;

229

vector< string > file_list;

237  while

( getline( fl_stream, line ) ) {

238  if

( !line.empty() ) {

239

file_list.push_back( line );

254  LOG_POST

(

"computing the genome length"

);

258  i

!= file_list.end(); ++

i

)

286  while

( suffix_size > 0 ) {

287  Uint8

units_needed( 1ULL<<(2*suffix_size) );

288  if

( units_needed <= n_units )

break

;

292  NCBI_ASSERT

( suffix_size > 0,

"suffix size is 0"

);

297  Uint4

prefix_exp( 1<<(2*prefix_size) );

301  for

(

Uint4

prefix( 0 ); prefix < prefix_exp; ++prefix ) {

312  Uint4

index[4] = {0, 0, 0, 0};

313  double

previous( 0.0 );

339  for

(

Uint1

j( 0 ); j < 4; ++j )

340  if

( previous <

th

[j] && current >=

th

[j] )

365  for

(

Uint4

prefix( 0 ); prefix < prefix_exp; ++prefix )

366  process

( prefix, prefix_size, file_list,

true

);

397

s <<

" "

<<

th

[

i

] <<

"%% threshold at "

<< index[

i

];

411  const

vector< string > & input_list,

415  Uint8

vector_size( 1ULL<<(2*suffix_size) );

416

vector< Uint4 > counts( vector_size, 0 );

418  Uint4

prefix_mask( ((1<<(2*prefix_size)) - 1)<<(2*suffix_size) );

419  Uint4

suffix_mask( (1<<2*suffix_size) - 1 );

420  if

(

unit_size

== 16 ) unit_mask = 0xFFFFFFFF;

422  if

( suffix_size == 16 )

424

suffix_mask = 0xFFFFFFFF;

428  _TRACE

(

"prefix: "

<< prefix <<

429  "\nprefix_size: "

<< (

int

)prefix_size <<

430  "\nsuffix_size: "

<< (

int

)suffix_size <<

431  "\nvector_size: "

<< vector_size <<

432  "\nunit_mask: "

<< unit_mask <<

433  "\nprefix_mask: "

<< prefix_mask <<

434  "\nsufffix_mask: "

<< suffix_mask );

446

prefix <<= (2*suffix_size);

450

it != input_list.end(); ++it )

468  for

(

Uint4 i

( 0 );

i

< length; ++

i

) {

477

unit = ((unit<<2)&unit_mask) +

letter

(

data

[

i

] );

483  if

( unit <= runit && (unit&prefix_mask) == prefix )

485  auto

& c( counts[unit&suffix_mask] );

487  if

( c < 0xffffffffUL )

494  if

( runit <= unit && (runit&prefix_mask) == prefix )

496  auto

& c( counts[runit&suffix_mask] );

498  if

( c < 0xffffffffUL )

527  for

(

Uint8 i

( 0 );

i

< vector_size; ++

i

)

529  Uint4

u( prefix +

i

), ru( 0 );

531  if

( counts[

i

] > 0 )

User-defined methods of the data storage class.

Factory of CSeqMaskerOstat objects.

void setComment(const string &msg)

Add a comment to the unit counts file.

void SetCount(Uint4 count, double pct)

void SetMaxCount(Uint4 mc)

void setUnitCount(Uint4 unit, Uint4 count)

Add count value for a particular unit.

void finalize()

Perform any final tasks required to generate unit counts in the particular format.

void setParam(const string &name, Uint4 value)

Set a value of a WindowMasker parameter.

void setUnitSize(Uint1 us)

Set the unit size value.

static Uint4 reverse_complement(Uint4 seq, Uint1 size)

Reverse complement of a unit.

static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)

Exceptions that CWinMaskCountsGenerator may throw.

@ eNullGenome

Genome has 0 size.

virtual const char * GetErrCodeString() const override

Return description string corresponding to an error code.

~CWinMaskCountsGenerator()

Object destructor.

vector< Uint4 > score_counts

void process(Uint4 prefix, Uint1 prefix_size, const vector< string > &input, bool do_output)

CRef< CSeqMaskerOstat > ustat

const CWinMaskUtil::CIdSet * ids

void operator()()

This function does the actual n-mer counting.

Uint8 fastalen(const string &fname) const

CWinMaskCountsGenerator(const string &input, const string &output, const string &infmt, const string &sformat, const string &th, Uint4 mem_avail, Uint1 unit_size, Uint8 genome_size, Uint4 min_count, Uint4 max_count, bool check_duplicates, bool use_list, const CWinMaskUtil::CIdSet *ids, const CWinMaskUtil::CIdSet *exclude_ids, bool use_ba, string const &metadata, double min_pct=-1.0, double extend_pct=-1.0, double thres_pct=-1.0, double max_pct=-1.0)

Constructor.

const CWinMaskUtil::CIdSet * exclude_ids

Base class for sets of seq_id representations used with -ids and -exclude-ids options.

Function iterating over bioseqs in input.

static bool consider(const objects::CBioseq_Handle &bsh, const CIdSet *ids, const CIdSet *exclude_ids)

Check if the given bioseq should be considered for processing.

static SQLCHAR output[256]

unsigned int TSeqPos

Type for sequence locations and lengths.

#define NCBI_ASSERT(expr, mess)

#define LOG_POST(message)

This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...

TErrCode GetErrCode(void) const

Get error code.

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

virtual const char * GetErrCodeString(void) const

Get error code interpreted as text.

static CRef< CObjectManager > GetInstance(void)

Return the existing object manager or create one.

TSeqPos GetBioseqLength(void) const

@ eCoding_Iupac

Set coding to printable coding (Iupacna or Iupacaa)

uint8_t Uint1

1-byte (8-bit) unsigned integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

IO_PREFIX::ifstream CNcbiIfstream

Portable alias for ifstream.

static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)

Split a string using specified delimiters.

const TSeq & GetSeq(void) const

Get the variant data.

@ e_Iupacna

IUPAC 1 letter nuc acid code.

const struct ncbi::grid::netcache::search::fields::SIZE size

CRef< objects::CObjectManager > om

void CheckDuplicates(const vector< string > &input, const string &infmt, const CWinMaskUtil::CIdSet *ids, const CWinMaskUtil::CIdSet *exclude_ids)

Check for possibly duplicate sequences in the input.

static Uint4 reverse_complement(Uint4 seq, Uint1 size)

static Uint4 letter(char c)

static bool ambig(char c)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4