A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/seq__dict_8hpp_source.html below:

NCBI C++ ToolKit: include/gui/utils/seq_dict.hpp Source File

1 #ifndef __GUI_SEQ_DICTIONARY_HPP_ 2 #define __GUI_SEQ_DICTIONARY_HPP_ 53 #ifndef __USE_BM_XOR_COMPRESSION__ 54 #define __USE_BM_XOR_COMPRESSION__ 69  'A'

,

'C'

,

'G'

,

'T'

,

'N'

,

'B'

,

'D'

,

'E'

,

'F'

,

'H'

,

'I'

,

'J'

,

'K'

,

70  'L'

,

'M'

,

'O'

,

'P'

,

'Q'

,

'R'

,

'S'

,

'U'

,

'V'

,

'W'

,

'X'

,

'Y'

,

'Z'

,

105 template

<

typename

Alphabet = TSeqAlphabet<true>>

120  if

(IsRegion(index) ==

false

) {

126  auto

it = m_Terms.get_const_iterator(

offset

);

127  for

(

size_t i

= 0;

i

<

len

&& it.valid(); ++

i

) {

139  return

x_GetOffset(index) == 0 && x_GetLen(index) > 0;

146  return len

== 0 ? 1 :

len

;

160 #ifdef __USE_BM_XOR_COMPRESSION__ 164

m_Terms.optimize(TB);

165

sv_serializer.

serialize

(m_Terms, sv_lay);

166  const unsigned char

*

buf

= sv_lay.

data

();

168  if

(!os.write(

reinterpret_cast<const char

*

>

(&sz),

sizeof

(sz)))

170  if

(sz && !os.write((

char

*)&

buf

[0], sz))

186

sv_deserializer.

deserialize

(m_Terms, (

const unsigned char

*)&

data

[pos]);

219  return

x_SetIndex(char_code, 0);

225  return

x_SetIndex(0,

len

);

251 template

<

typename

Alphabet = TSeqAlphabet<true>>

272  if

(seq.length() == 1) {

273

index = x_AddChar(seq[0]);

276

m_CheckSum.Reset(ncbi::CChecksum::eCRC32);

277

m_CheckSum.AddLine(seq);

278  auto crc32

= m_CheckSum.GetChecksum();

279  if

(m_SeqPos.test(

crc32

)) {

284  if

(x_FindSequence(seq, pos)) {

285

index = TSeqDict::x_SetIndex(pos + 1, seq.length());

287

index = x_AddSequence(seq);

290

m_SeqPos.set(

crc32

);

291

index = x_AddSequence(seq);

300  return

TSeqDict::x_SetIndexAsRegion(

len

);

306

unique_ptr<TSeqDict> seqdict(

new TSeqDict

());

307  auto

sz = m_SearchStr.size();

309

seqdict->m_Terms[0] = 0;

310  while

(curr_pos < sz) {

311

vector<unsigned>

buffer

(10 * 1024 * 1024 );

313  for

(;

i

<

buffer

.size() && curr_pos < sz; ++

i

, ++curr_pos) {

314  buffer

[

i

] = m_Coder[(unsigned)m_SearchStr[curr_pos]];

316

seqdict->m_Terms.import_back(&

buffer

[0],

i

);

325  auto

seq_sz = seq.size();

328

m_SearchStr.push_back(

'\0'

);

329  auto

ptr = strstr(&m_SearchStr[0], seq.c_str());

330

m_SearchStr.pop_back();

332

pos = ptr - &m_SearchStr[0];

341  auto code

= m_Coder[(unsigned)

C

];

343  throw

runtime_error(

"The sequence contains bad symbol '"

+

string

(1,

C

) +

"'"

);

344  return

TSeqDict::x_SetIndexAsChar(

code

);

350  auto

seq_size = seq.size();

354

vector<unsigned> seq_v(seq_size);

355  for

(

size_t i

= 0;

i

< seq_size; ++

i

) {

356  auto code

= m_Coder[(unsigned)seq[

i

]];

357  if

(

code

==

kBAD_CODE

)

throw

runtime_error(

"The sequence contains bad symbol '"

+

string

(1, seq[

i

]) +

"'"

);

360

idx = TSeqDict::x_SetIndex(m_CurrPos, seq_size);

361

m_CurrPos += seq_size;

362  copy

(seq.begin(), seq.end(), back_inserter(m_SearchStr));

371  size_t

m_CurrPos = 0;

Compressed bit-vector bvector<> container, set algebraic methods, traversal iterators.

#define BM_DECLARE_TEMP_BLOCK(x)

Algorithms for bit ranges and intervals.

Sparse constainer sparse_vector<> for integer types using bit-transposition transform.

Algorithms for bm::sparse_vector.

Compressed sparse container rsc_sparse_vector<> for integer types.

Serialization for sparse_vector<>

string sparse vector based on bit-transposed matrix

Checksum and hash calculation classes.

ncbi::CChecksum m_CheckSum

unique_ptr< TSeqDict > Build()

Builds the sequence dictionary from previously added sequences.

vector< unsigned > m_Coder

TIndex AddRegion(uint32_t len)

Create Region index.

TSeqDict::svector_u32 svector_u32

bool x_FindSequence(const string &seq, unsigned &pos)

TIndex x_AddSequence(const string &seq)

if necessary adds a ductionary sequence and returns its index

CSeqDictionary< Alphabet > TSeqDict

TIndex AddSequence(const string &seq)

Add sequence and returns its index.

vector< char > m_SearchStr

static bool IsRegion(TIndex index)

Checks if index references an actual sequence or it just contains a length of unaligned region.

string & GetSequence(TIndex index, string &seq) const

returns Sequence by Index

size_t Deserialize(const char *data)

Deserialization from string starting from pos returns number of deserilized bytes.

static TIndex x_SetIndexAsChar(uint32_t char_code)

static TIndex x_SetIndex(uint32_t offset, uint32_t len)

Index mask functions.

static uint32_t x_GetOffset(TIndex index)

static size_t GetSeqLength(TIndex index)

Returns the sequence length encoded in the index.

bool Serialize(ostream &os)

Serialization to ostream.

bm::sparse_vector< unsigned, bm::bvector<> > svector_u32

static uint32_t x_GetLen(TIndex index)

unsigned long long TIndex

static TIndex x_SetIndexAsRegion(uint32_t len)

sparse vector de-serializer

void deserialize(SV &sv, const unsigned char *buf, bool clear_sv=true)

Serialize sparse vector into a memory buffer(s) structure.

void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)

Serialize sparse vector into a memory buffer(s) structure.

void enable_xor_compression() noexcept

Enable XOR compression on vector serialization.

succinct sparse vector with runtime compression using bit-slicing / transposition method

@ BM_GAP

GAP compression is ON.

static const BitmapCharRec *const chars[]

void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)

static const unsigned kBAD_CODE

layout class for serialization buffer structure

const unsigned char * data() const noexcept

Return serialization buffer pointer.

size_t size() const noexcept

return current serialized size


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4