A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/kmercounts_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/cobalt/kmercounts.cpp Source File

70

objects::CScope& scope)

89  while

(index + kmer_len - 1 < sv.size() &&

i

< kmer_len) {

92  if

(sv[index +

i

] ==

kXaa

) {

99

pos |=

GetAALetter

(sv[index +

i

]) << (num_bits * (kmer_len -

i

- 1));

114  int

index = pos / chunk;

115  int offset

= pos - index * chunk;

122  unsigned int

num_bits)

136

counts =

new TCount

[num_elements];

138  catch

(std::bad_alloc) {

144

counts =

new TCount

[num_elements];

146  catch

(std::bad_alloc) {

148  "Memory cannot be allocated for k-mer counting." 149  " Try using compressed alphabet or smaller k."

);

158

objects::CScope& scope)

163  _ASSERT

(kmer_len > 0 && alphabet_size > 0);

167  "Compressed alphabet selected, but translation table not" 171  if

(!seq.IsWhole() && !seq.IsInt()) {

173  "Unsupported SeqLoc encountered"

);

177

objects::CSeqVector sv = scope.GetBioseqHandle(*seq.GetId()).GetSeqVector();

179  unsigned int

num_elements;

180  unsigned int

seq_len = sv.size();

188  "Sequence shorter than desired k-mer length"

);

192  unsigned int mask

= 1;

194  while

(alphabet_size >

mask

) {

198  const int

kNumBits = num;

204

counts = tmp_counts.

get

();

215

num_elements = 1 << (kNumBits * kmer_len);

216  const Uint4

kMask = num_elements - (1 << kNumBits);

219

memset(counts, 0, num_elements *

sizeof

(

TCount

));

221  const int

kBitChunk =

sizeof

(

Uint4

) * 8;

224

vector<Uint4> used_entries(num_elements / kBitChunk + 1);

229  bool

is_pos =

InitPosBits

(sv, pos,

i

, kNumBits, kmer_len);

233  MarkUsed

(pos, used_entries, kBitChunk);

237  for

(;

i

< seq_len && is_pos;

i

++) {

241  "Letter out of alphabet in sequnece"

);

246  if

(sv[

i

] ==

kXaa

) {

250  if

(

i

>= seq_len || !is_pos) {

260  MarkUsed

(pos, used_entries, kBitChunk);

268  Uint4

num_bit_chunks = num_elements / kBitChunk + 1;

269  while

(ind < num_elements / kBitChunk + 1) {

272  while

(ind < num_bit_chunks && used_entries[ind] == 0) {

276  if

(ind == num_bit_chunks) {

281  for

(

Uint4 mask

=0x80000000,j=0;used_entries[ind] != 0;

284  if

((used_entries[ind] &

mask

) != 0) {

285

pos = ind * kBitChunk + j;

290

used_entries[ind] ^=

mask

;

298  _ASSERT

(pow((

double

)alphabet_size, (

double

)kmer_len)

302  for

(

Uint4 i

=0;

i

< kmer_len;

i

++) {

303

base[

i

] = pow((

double

)alphabet_size, (

double

)

i

);

306

num_elements = (

Uint4

)pow((

double

)alphabet_size, (double)kmer_len);

309

memset(counts, 0, num_elements *

sizeof

(

TCount

));

312  const int

kBitChunk =

sizeof

(

Uint4

) * 8;

313

vector<Uint4> used_entries(num_elements / kBitChunk + 1);

316  for

(

unsigned i

=0;

i

< seq_len - kmer_len + 1;

i

++) {

319  if

(sv[

i

+ kmer_len - 1] ==

kXaa

) {

326  for

(

Uint4

j=1;j < kmer_len;j++) {

331  MarkUsed

(pos, used_entries, kBitChunk);

338  Uint4

num_bit_chunks = num_elements / kBitChunk + 1;

339  while

(ind < num_elements / kBitChunk + 1) {

342  while

(ind < num_bit_chunks && used_entries[ind] == 0) {

346  if

(ind == num_bit_chunks) {

351  for

(

Uint4 mask

=0x80000000,j=0;used_entries[ind] != 0;

354  if

((used_entries[ind] &

mask

) != 0) {

355

pos = ind * kBitChunk + j;

360

used_entries[ind] ^=

mask

;

378  unsigned int

num_counts2 =

v2

.GetNumCounts();

379  unsigned int

fewer_counts

380

= num_counts1 < num_counts2 ? num_counts1 : num_counts2;

385  return

1.0 - (double)num_common / (

double

)fewer_counts;

398  unsigned int

num_counts2 =

v2

.GetNumCounts();

399  unsigned int

more_counts

400

= num_counts1 > num_counts2 ? num_counts1 : num_counts2;

405  return

1.0 - (double)num_common / (

double

)more_counts;

424

&& it1->position == it2->position) {

428  result

+= (unsigned)(it1->value < it2->value

429

? it1->value : it2->value);

441

&& it1->position < it2->position) {

446

&& it2->position < it1->position) {

461  unsigned int mask

= 1;

485

ostr << it->position <<

":"

<< (

int

)it->value <<

" "

;

494

objects::CScope& scope)

501

objects::CScope& scope)

506  _ASSERT

(kmer_len > 0 && alphabet_size > 0);

510  "Compressed alphabet selected, but translation table not" 514  if

(!seq.IsWhole() && !seq.IsInt()) {

516  "Unsupported SeqLoc encountered"

);

520

objects::CSeqVector sv = scope.GetBioseqHandle(*seq.GetId()).GetSeqVector();

522  unsigned int

num_elements;

523  unsigned int

seq_len = sv.size();

531  "Sequence shorter than desired k-mer length"

);

534  const int

kBitChunk =

sizeof

(

Uint4

) * 8;

541  _ASSERT

(pow((

double

)alphabet_size, (

double

)kmer_len)

545  for

(

Uint4 i

=0;

i

< kmer_len;

i

++) {

546

base[

i

] = pow((

double

)alphabet_size, (

double

)

i

);

549

num_elements = (

Uint4

)pow((

double

)alphabet_size, (double)kmer_len);

560  for

(

unsigned

j=0;j < kmer_len &&

i

< seq_len - kmer_len + 1;j++) {

562  if

(sv[

i

+ j] ==

kXaa

) {

568

}

while

(

i

< seq_len - kmer_len + 1 && is_xaa);

570  if

(

i

>= seq_len - kmer_len + 1) {

575  for

(;

i

< seq_len - kmer_len + 1;

i

++) {

578  if

(sv[

i

+ kmer_len - 1] ==

kXaa

) {

586  for

(

unsigned

j=0;j < kmer_len &&

i

< seq_len - kmer_len + 1;

589  if

(sv[

i

+ j] ==

kXaa

) {

595

}

while

(

i

< seq_len - kmer_len + 1 && is_xaa);

598  if

(

i

>= seq_len - kmer_len + 1) {

605  for

(

Uint4

j=1;j < kmer_len;j++) {

627  unsigned int

num_counts2 =

v2

.GetNumCounts();

628  unsigned int

fewer_counts

629

= num_counts1 < num_counts2 ? num_counts1 : num_counts2;

634  return

1.0 - (double)num_common / (

double

)fewer_counts;

647  unsigned int

num_counts2 =

v2

.GetNumCounts();

648  unsigned int

more_counts

649

= num_counts1 > num_counts2 ? num_counts1 : num_counts2;

654  return

1.0 - (double)num_common / (

double

)more_counts;

667  for

(

size_t i

=0;

i

<

size

;

i

++) {

static const int kAlphabetSize

The aligner internally works only with the ncbistdaa alphabet.

ncbi::TMaskedQueryRegions mask

K-mer counts implemented as bit vectors.

static unsigned int GetKmerLength(void)

Get k-mer length.

static double FractionCommonKmersGlobalDist(const CBinaryKmerCounts &v1, const CBinaryKmerCounts &v2)

void Reset(const objects::CSeq_loc &seq, objects::CScope &scope)

Compute counts.

CBinaryKmerCounts(void)

Constructor.

unsigned int GetNumCounts(void) const

Get number of k-mers.

static unsigned int sm_AlphabetSize

static double FractionCommonKmersDist(const CBinaryKmerCounts &vect1, const CBinaryKmerCounts &vect2)

static Uint4 x_Popcount(Uint4 v)

Get number of set bits (adapted from http://graphics.stanford.edu/~seander/bithacks....

static CSafeStatic< vector< Uint1 > > sm_TransTable

static bool sm_UseCompressed

static Uint4 GetAALetter(Uint1 letter)

static unsigned int sm_KmerLength

static unsigned int CountCommonKmers(const CBinaryKmerCounts &v1, const CBinaryKmerCounts &v2)

Copmute number of common kmers between two count vectors.

Exception class for Kmer counts.

Kmer counts for alignment free sequence similarity computation implemented as a sparse vector.

static unsigned int sm_AlphabetSize

static unsigned int GetKmerLength(void)

Get default kmer length.

CSparseKmerCounts(void)

Create empty counts vector.

static TCount * ReserveCountsMem(unsigned int num_bits)

vector< SVectorElement > m_Counts

vector< SVectorElement >::const_iterator TNonZeroCounts_CI

static double FractionCommonKmersGlobalDist(const CSparseKmerCounts &v1, const CSparseKmerCounts &v2)

static TCount * sm_Buffer

static void PreCount(void)

Perform preparations before k-mer counting common to all sequences.

static CSafeStatic< vector< Uint1 > > sm_TransTable

static void PostCount(void)

Perform post-kmer counting tasks.

static const unsigned int kLengthBitsThreshold

TNonZeroCounts_CI BeginNonZero(void) const

Get non-zero counts iterator.

static unsigned int sm_KmerLength

static double FractionCommonKmersDist(const CSparseKmerCounts &vect1, const CSparseKmerCounts &vect2)

TNonZeroCounts_CI EndNonZero(void) const

Get non-zero counts iterator.

static Uint4 GetAALetter(Uint1 letter)

void Reset(const objects::CSeq_loc &seq, objects::CScope &scope)

Reset the counts vector.

unsigned int GetNumCounts(void) const

Get number of all k-mers found in the sequence.

static bool sm_UseCompressed

static unsigned int CountCommonKmers(const CSparseKmerCounts &v1, const CSparseKmerCounts &v2, bool repetitions=true)

Copmute number of common kmers between two count vectors.

static bool sm_ForceSmallerMem

static bool InitPosBits(const objects::CSeqVector &sv, Uint4 &pos, unsigned int &index, Uint4 num_bits, Uint4 kmer_len)

Initializes element index as bit vector for first k letters, skipping Xaa.

CNcbiOstream & Print(CNcbiOstream &ostr) const

Print counts.

element_type * get(void) const

Get pointer.

void reset(element_type *p=0)

Reset will delete the old pointer, set content to the new value, and assume the ownership upon the ne...

#define NCBI_THROW(exception_class, err_code, message)

Generic macro to throw an exception, given the exception class, error code and message string.

uint8_t Uint1

1-byte (8-bit) unsigned integer

uint32_t Uint4

4-byte (32-bit) unsigned integer

IO_PREFIX::ostream CNcbiOstream

Portable alias for ostream.

unsigned int

A callback function used to compare two keys in a database.

static void MarkUsed(Uint4 pos, vector< Uint4 > &entries, int chunk)

const struct ncbi::grid::netcache::search::fields::SIZE size

Defines NCBI C++ exception handling.

NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.

Element of the sparse vector.

static wxAcceleratorEntry entries[3]


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4