A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/sequence_2adapter__search_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/sequence/adapter_search.cpp Source File

48  static const char

* alphabet =

"ACGT"

;

49  for

(

size_t i

= 0;

i

< mer_size;

i

++) {

50

s[mer_size - 1 -

i

] = alphabet[w & 3];

60  if

(words.size() == 0) {

65

iupac.resize(words.size() - 1);

66  static const char

* alphabet =

"ACGT"

;

67  for

(

size_t i

= 0;

i

< words.size() - 1;

i

++) {

68

iupac[

i

] = alphabet[words[

i

] >> (mer_size * 2 - 2)];

70

iupac +=

s_AsIUPAC

(words.back(), mer_size);

77

vector<Uint1> counts(64, 0);

80

counts[w2 & 0x3F] += 1;

84  for

(

size_t i

= 0;

i

< 64;

i

++) {

85  size_t

c = counts[

i

];

88  return

(144 - sumsq)/132.0;

94  const char

*

const

seq,

106  static const Uint1

char2letter[] = {

107

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

108

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

109

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

110

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

111

0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,

112

0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

113

0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0,

114

0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

115

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

116

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

117

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

118

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

119

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

120

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

121

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

122

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

131  Uint1

c = char2letter[

static_cast<int>

(seq[

i

])];

137  for

(

size_t i

= 1;

i

< words.size();

i

++) {

138  Uint1

c = char2letter[

static_cast<int>

(seq[

i

+ k])];

139

words[

i

] = ((words[

i

- 1] &

mask

) << 2) | c;

144  Uint1

c = 3 - char2letter[

static_cast<int>

(seq[seq_size - 1 -

i

])];

150  for

(

size_t i

= 1;

i

< words.size();

i

++) {

151  Uint1

c = 3 - char2letter[

static_cast<int>

(seq[k -

i

])];

152

words[

i

] = ((words[

i

- 1] &

mask

) << 2) | c;

159

::CPairedEndAdapterDetector

162

TWords::const_iterator begin,

163

TWords::const_iterator end)

165  size_t

words_size = end - begin;

166  size_t len

=

min

(words_size, m_len);

170  for

(

size_t i

= 0;

i

<

len

;

i

++) {

171

x_IncrCount(

i

, *(begin +

i

) >> 4);

175  if

(words_size > 0 && words_size < m_len) {

176  size_t len

= min<size_t>(2, m_len - words_size);

177  TWord

w = *(begin + words_size);

178  for

(

size_t i

= 0;

i

<

len

;

i

++) {

179  size_t

pos = words_size +

i

;

180  TWord

w2 = (w >> (2 -

i

*2)) & 0xFFFFF;

181

x_IncrCount(pos, w2);

188

::CPairedEndAdapterDetector

190

::x_NextWord(

size_t

pos,

TWord

prev_word)

const 193  size_t

best_count = 0;

195  TWord

word = ((prev_word << 2) & 0xFFFFF) |

letter

;

197  if

(

count

> best_count) {

206 string

NAdapterSearch

207

::CPairedEndAdapterDetector

209

::InferConsensus(

const SParams

& params)

const 213  size_t

secondbest_sup(0);

215  for

(

size_t

word = 0; word < NMERS10; word++) {

216  size_t

sup = m_counts[word];

218

secondbest_sup = top_sup;

229

words[0] = best_word;

233

<< s_AsIUPAC(best_word,10)

234

<<

"; overrepresentation: " 240  for

(

size_t i

= 1;

i

< words.size();

i

++) {

241  TWord

last_word = words[

i

- 1];

242  TCount

last_sup = x_GetCount(

i

- 1, last_word);

244  TWord

curr_word = x_NextWord(

i

- 1, last_word);

245  TCount

curr_sup = x_GetCount(

i

, curr_word);

248

words[

i

] = curr_word;

255  return

s_AsIUPAC(words, 10);

260

pair<size_t, size_t> NAdapterSearch

261

::CPairedEndAdapterDetector

262

::s_FindAdapterStartPos(

266  return

pair<size_t, size_t>(

267

find(fwd_read.begin(), fwd_read.end(), rev_read.back())

268

- fwd_read.begin() + MER_LENGTH,

270

find(rev_read.begin(), rev_read.end(), fwd_read.front())

276

::CPairedEndAdapterDetector

281  size_t len

= spot_len / 2;

282  const char

* fwd_read = spot;

283  const char

* rev_read = spot+

len

;

293

pair<size_t, size_t> pos = s_FindAdapterStartPos(fwd_words, rev_words);

295  bool

is_consistent = (pos.first + pos.second ==

len

);

298  size_t

adapter_len =

len

- pos.first;

299  if

(is_consistent && adapter_len >= MER_LENGTH) {

300

m_cons5.AddExemplar(fwd_words.begin() + pos.first, fwd_words.end());

307

s_Translate(rev_read + pos.first, adapter_len,

false

, words2);

308

m_cons3.AddExemplar(words2.begin(), words2.end());

335  size_t

top_count = m_counts[

seed

];

339

words.push_back(

seed

);

340

x_ExtendSeed(words, top_count,

false

);

341

reverse(words.begin(), words.end());

342

x_ExtendSeed(words, top_count,

true

);

350  char

c = seq[seq.size() - 1];

351  while

(seq.size() > 0 && seq[seq.size() - 1] == c) {

352

seq.resize(seq.size() - 1);

361

::CUnpairedAdapterDetector

362

::x_FindAdapterSeed()

const 364  typedef

pair<TCount, TWord>

TPair

;

365  typedef

priority_queue< TPair, vector<TPair>, greater<TPair> > TTopWords;

372  for

(

TWord

w = 0; w < m_counts.size(); w++) {

376  while

(top_words.size() > m_params.top_n) {

386  size_t n

= top_words.size();

387  for

(; !top_words.empty(); top_words.pop()) {

388

sup = top_words.top().first;

389

word = top_words.top().second;

390

sumlogs +=

log

(sup + 1.0);

393  size_t

gmean =

n

== 0 ? 0 : exp(sumlogs /

n

) - 1;

398

<<

"; overrepresentation: " 401

<<

static_cast<size_t>

(gmean));

403  return

m_params.HaveInitialSupport(sup, gmean) ? word : 0;

416

: (w >> 2) | (

letter

<< 22);

419  if

(

count

> best_count) {

434  TWord

prev_word = words.back();

435  TWord

curr_word = x_GetAdjacent(prev_word, right);

436  TCount

prev_sup = m_counts[prev_word];

437  TCount

curr_sup = m_counts[curr_word];

439  if

(!m_params.HaveContinuedSupport(top_sup, prev_sup, curr_sup)

440

|| find(words.begin(), words.end(), curr_word) != words.end())

446

words.push_back(curr_word);

459  return a

.second <

b

.second;

465

::CSimpleUngappedAligner

466

::GetSeqRange(

TPos

pos)

const 468

TRanges::const_iterator it = lower_bound(

469

m_subseq_ranges.begin(),

470

m_subseq_ranges.end(),

474  return

it == m_subseq_ranges.end() ?

TRange

(NULLPOS, NULLPOS) : *it;

479

::CSimpleUngappedAligner

482  if

(

a

.len > 0 &&

b

.len == 0) {

484

}

else if

(

a

.len == 0 &&

b

.len > 0) {

487  TRange

ar = GetSeqRange((

a

.second -

a

.first) / 2);

488  TRange br

= GetSeqRange((

b

.second -

b

.first) / 2);

492  TPos

a_un = (ar.second - ar.first) -

a

.len/2;

493  TPos

b_un = (

br

.second -

br

.first) -

b

.len/2;

497

<<

" "

<< (

a

.second -

a

.first) / 2

500

<<

" "

<< m_seq.substr(ar.first, ar.second - ar.first));

502

<<

" "

<< (

b

.second -

b

.first) / 2

505

<<

" "

<< m_seq.substr(

br

.first,

br

.second -

br

.first));

511  double

a_score =

a

.len -

log

(1.0 + a_un)*5;

512  double

b_score =

b

.len -

log

(1.0 + b_un)*5;

513  return

a_score < b_score ?

b

:

a

;

525

s_PermuteMismatches(word, approx_words);

535  TPos

& pos_ref = vec_index[w];

536  if

(pos_ref == pos || pos_ref == NULLPOS) {

539  if

(pos_ref != MULTIPOS) {

555

positions.resize(coordset.

size

());

556

TPositions::iterator dest = positions.begin();

560  TPos

pos = it->second;

563  if

(map_index.

find

(w) == map_index.

end

()) {

564

map_index[w].first = dest;

567

map_index[w].second = dest;

575

m_seq.assign(seq,

len

);

577

m_subseq_ranges.clear();

580

fill(m_vec_index.begin(), m_vec_index.end(), (

TPos

)NULLPOS);

587  const char

* endend = seq+

len

;

588  for

(

const char

*begin = seq, *end = find(begin, endend,

'-'

);

590

begin = end + 1, end = find(begin, endend,

'-'

))

592  TRange r

(begin - seq, end - seq);

593

m_subseq_ranges.push_back(

r

);

598  for

(

size_t i

= 0;

i

< words.size();

i

++) {

600

s_IndexWord(words[

i

], pos, m_vec_index, coordset);

604

s_CoordSetToMapIndex(coordset, m_map_index, m_positions);

613

TWords::iterator dest = words.begin();

614  for

(

size_t

i1 = 3; i1 < 9; i1++) {

616  for

(

Uint1

c1 = 0; c1 < 4; c1++) {

617  TWord

w1 = s_Put(w, i1, c1);

619  for

(

size_t

i2 = i1+1; i2 < 9; i2++) {

621  for

(

Uint1

c2 = 0; c2 < 4; c2++) {

622

*(dest++) = s_Put(w1, i2, c2);

634  if

(m1.

first

== NULLPOS) {

656

m.

first

= q_pos - s_pos;

657

m.

second

= q_pos + s_pos;

666  const size_t

query_len,

667  const bool

direction,

668  const int

match_score,

669  const int

mismatch_score,

670  const int

dropoff_score)

const 672  int

delta_pos = direction ? 1 : -1;

676  typedef

pair<TPos, TPos> TPoint;

677

TPoint p(seg.

first

+ (direction ? seg.

len

- 1 : 0),

678

seg.

second

+ (direction ? seg.

len

- 1 : 0));

680  Int8

score(0), max_score(0);

684  TRange

q_bounds(0, query_len);

688

p.first += delta_pos;

689

p.second += delta_pos;

690  while

(score + dropoff_score > max_score

691

&& p.first >= q_bounds.first

692

&& p.first < q_bounds.second

693

&& p.second >= s_bounds.first

694

&& p.second < s_bounds.second)

696

score += (q_seq[p.first] == m_seq[p.second]) ? match_score

700

<<

" "

<< q_seq[p.first]

701

<<

" "

<< m_seq[p.second]

703

<<

" "

<< best_p.first);

706  if

(score > max_score) {

711

p.first += delta_pos;

712

p.second += delta_pos;

717

seg.

len

= best_p.first - seg.

first

+ 1;

722

seg2.

first

= best_p.first;

723

seg2.

second

= best_p.second;

743  for

(

size_t i

= 0;

i

< words.size();

i

++) {

747

m_vec_index.begin() + w + 1);

749  if

(*

r

.first == NULLPOS) {

751

}

else if

(*

r

.first == MULTIPOS) {

752  r

= m_map_index.find(w)->second;

763  for

(TPositions::const_iterator it =

r

.first; it !=

r

.second; ++it) {

764  SMatch

new_match = x_CreateMatch(

i

, *it);

767  if

(!

s_Merge

(old_match, new_match)) {

768

best_match = x_GetBetterOf(best_match, old_match);

769

old_match = new_match;

774  ITERATE

(TMatches, it, matches) {

775

best_match = x_GetBetterOf(best_match, it->

second

);

LargeInt< 1 > revcomp(const LargeInt< 1 > &x, size_t sizeKmer)

CLocalRange< TOffset > TRange

define for the fundamental building block of sequence ranges

ncbi::TMaskedQueryRegions mask

static void s_CoordSetToMapIndex(const TCoordSet &coordset, TMapIndex &map_index, TPositions &positions)

static void s_PermuteMismatches(TWord w, TWords &words)

SMatch FindSingleBest(const char *query, size_t len) const

Return best ungapped alignment of at least MER_LENGTH.

pair< TPositions::const_iterator, TPositions::const_iterator > TPosRange

void Init(const char *seq, size_t len)

IUPAC sequence of target sequence(s) Multiple sequences may be concatenated and '-'-delimited e....

SMatch x_CreateMatch(TPos q_start, TPos s_start) const

Create a match in diag/antidiag coordspace for a matched word.

static bool s_Merge(SMatch &m, const SMatch &m2)

void x_Extend(SMatch &match, const char *query_seq, const size_t query_len, const bool direction, const int match_score=3, const int mismatch_score=-2, const int dropoff=5) const

Ungapped extension (match is in normal coordinates)

static void s_IndexWord(TWord w, TPos pos, TPositions &vec_index, TCoordSet &coordset)

pair< TPos, TPos > TRange

vector< TPos > TPositions

TWord x_GetAdjacent(TWord w, bool right) const

if right, return most frequent word whose 11-prefix = 11-suffix of w; otherwise, return most frequent...

virtual void AddExemplar(const char *seq, size_t len)

Add sequence of a spot from an SRA run (single or paired-end read)

virtual string InferAdapterSeq() const

returns IUPAC seq of the inferred adapter seq; empty if not found.

void x_ExtendSeed(TWords &words, size_t top_count, bool right) const

Extend the seed one way, as long as it satisfies extension thresholds.

static bool s_IsLowComplexity(TWord word)

With threshold of 0.9, 0.7% of all words are classified as such.

static double s_GetWordComplexity(TWord word)

Complexity is a value between 0 (for a homopolymer) and 1 (max complexity).

static const size_t MER_LENGTH

Will use 24-bit 12-mers as words.

static string s_AsIUPAC(TWord w, size_t mer_size=MER_LENGTH)

Convert word to IUPAC string [ACGT]{MER_LENGTH}.

static void s_Translate(const char *const iupac_na, size_t len, bool apply_revcomp, TWords &words)

Translate IUPAC sequence to a sequence of overlapping words (A,C,G,T,*) <-> (0,1,2,...

static bool s_IsHomopolymer(TWord w)

const_iterator end() const

const_iterator find(const key_type &key) const

iterator_bool insert(const value_type &val)

parent_type::value_type value_type

SStaticPair< const char *, const char * > TPair

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

#define ERR_POST(message)

Error posting with file, line number information but without error codes.

#define LOG_POST(message)

This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...

void Info(CExceptionArgs_Base &args)

uint8_t Uint1

1-byte (8-bit) unsigned integer

int64_t Int8

8-byte (64-bit) signed integer

uint64_t Uint8

8-byte (64-bit) unsigned integer

void s_Merge(SExpression &l, SExpression &r)

NCBI C++ auxiliary debug macros.

Miscellaneous common-use basic types and functionality.

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)

static SLJIT_INLINE sljit_ins br(sljit_gpr target)

Represents single ungapped alignment.

An adapter sequence is presumed to occur at least min_support times in the input, and is overrepresen...

bool HaveContinuedSupport(size_t top_sup, size_t prev_sup, size_t candidate_sup) const

bool HaveInitialSupport(size_t top_candidate_sup, size_t non_candidate_sup) const

bool operator()(const NAdapterSearch::CSimpleUngappedAligner::TRange &a, const NAdapterSearch::CSimpleUngappedAligner::TRange &b) const

static Uint4 letter(char c)


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4