A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://www.ncbi.nlm.nih.gov/IEB/ToolBox/CPP_DOC/doxyhtml/variations_8cpp_source.html below:

NCBI C++ ToolKit: src/algo/gnomon/variations.cpp Source File

16

indels.reserve(sam_indels.size());

23  if

(indl->GetInDelV().empty())

24

insertlen = indl->Len();

26

deletion = indl->GetInDelV();

27

}

else if

(loc+insertlen == indl->Loc()) {

28  if

(indl->GetInDelV().empty())

29

insertlen += indl->Len();

31

deletion += indl->GetInDelV();

34

indels.push_back(

CLiteIndel

(loc, insertlen));

35  if

(!deletion.empty())

36

indels.push_back(

CLiteIndel

(loc+insertlen,(

int

)deletion.size(),deletion));

39  if

(indl->GetInDelV().empty()) {

40

insertlen = indl->Len();

43

deletion = indl->GetInDelV();

50

indels.push_back(

CLiteIndel

(loc, insertlen));

51  if

(!deletion.empty())

52

indels.push_back(

CLiteIndel

(loc+insertlen,(

int

)deletion.size(),deletion));

59

m_weight(

weight

), m_ident(ident), m_range(range) {

70  size_t

first_element = cigar.find_first_not_of(

"0123456789"

);

71  if

(first_element != string::npos && cigar[first_element] ==

'I'

)

72

cigar[first_element] =

'S'

;

73  if

(cigar[cigar.size()-1] ==

'I'

)

74

cigar[cigar.size()-1] =

'S'

;

83

istringstream istr_cigar(cigar);

86  const string

& seq = ad.

m_seq

;

87  while

(istr_cigar >>

len

>> c) {

95  for

(

int l

= 0;

l

<

len

; ++

l

) {

96  if

(seq[seq_pos] != contig[gstop]) {

98

sam_indels.push_back(

CLiteIndel

(gstop+1,1,seq.substr(seq_pos,1)));

109

seq_pos +=

len

; align_len +=

len

; gstop +=

len

;

116

seq_pos +=

len

; align_len +=

len

; gstop +=

len

;

121

seq_pos +=

len

; align_len +=

len

;

126

align_len +=

len

; gstop +=

len

;

130  throw

runtime_error(

"Alignments can't have introns"

);

138  m_ident

= (double)matches/align_len;

154  string

seq = contig.substr(

l

,

r

-

l

);

165

seq += contig.substr(

l

,

r

-

l

);

186  int

length (sv.

size

());

200  for

(

int i

= 0;

i

< (

int

)align.

Exons

().size(); ++

i

) {

204  if

(

i

->IntersectingWith(exon_lim.

GetFrom

(), exon_lim.

GetTo

()))

205

corrections.push_back(*

i

);

207  while

(!corrections.empty() && corrections.front().Loc() == exon_lim.

GetFrom

()) {

208

exon_lim.

SetFrom

(corrections.front().InDelEnd());

209

corrections.erase(corrections.begin());

211  while

(!corrections.empty() && corrections.back().InDelEnd() == exon_lim.

GetTo

()+1) {

212

exon_lim.

SetTo

(corrections.back().Loc()-1);

213

corrections.pop_back();

222

errors += indl->Len();

223  if

(indl->IsDeletion())

224

align_len += indl->Len();

225  if

(!indl->IsInsertion()) {

226  string

s = indl->GetInDelV();

227

ns +=

count

(s.begin(), s.end(),

'N'

);

235  if

(indl->IsMismatch()) {

236

indels.push_back(

CLiteIndel

(indl->Loc(), indl->Len()));

237

indels.push_back(

CLiteIndel

(indl->InDelEnd(), indl->Len(), indl->GetInDelV()));

238

}

else if

(indl->IsInsertion()) {

239

indels.push_back(

CLiteIndel

(indl->Loc(), indl->Len()));

241

indels.push_back(

CLiteIndel

(indl->Loc(), indl->Len(), indl->GetInDelV()));

254

map<TSignedSeqRange,TSIMap> variations;

255

list<TSignedSeqRange> confirmed_ranges;

260  ITERATE

(list<TSignedSeqRange>,

i

, confirmed_ranges) {

267

aligns.back().SetTargetId(*

id

);

272  TSIMap

& seq_counts =

i

->second;

274  if

(!correctionsonly) {

277  const string

& seq = j->first;

278  int count

= j->second;

288

aligns.back().SetTargetId(*

id

);

294  double

selected_weight = 0;

296  const string

& seq = j->first;

297  int count

= j->second;

300  if

(dist < selected_dist || (dist == selected_dist &&

count

> selected_weight)) {

301

selected_cigar = cigar;

303

selected_dist = dist;

304

selected_weight =

count

;

311  a

.SetWeight(selected_weight);

314

aligns.back().SetTargetId(*

id

);

322 #define ENTROPY_LEVEL_FOR_ALIGNS 0.51 327  string

read = al->TranscriptSeq(

m_contigt

);

328  string

base =

m_contigt

.substr(al->Limits().GetFrom(),al->Limits().GetLength());

332

all_alignsp.push_back(&(*al));

339  for

(

int

ir = 0; ir < (

int

)all_alignsp.size(); ++ir) {

353  for

(

int

ir = 0; ir < reads_num; ++ir) {

354  for

(

int

p =

m_alignsp

[ir]->Limits().GetFrom(); p <=

m_alignsp

[ir]->Limits().GetTo(); ++p)

358  if

((*indl)->IsDeletion()) {

359  int len

= (*indl)->Len();

360  if

(indl != indels.begin()) {

361

TLiteInDelsP::const_iterator

prev

= indl-1;

362  if

((*prev)->IsInsertion() && (*prev)->Loc()+(*prev)->Len() == (*indl)->Loc())

363  len

-= (*prev)->Len();

366

deletion_len[(*indl)->Loc()] =

max

(

len

, deletion_len[(*indl)->Loc()]);

373  int

contigp =

i

->first;

374  if

(del != deletion_len.

end

() && contigp == del->first) {

375  int

del_len = (del++)->second;

376  for

(

int l

= 0;

l

< del_len; ++

l

)

379  int

alignp = contigp+shift;

384  int

total_deletion_len = 0;

386

total_deletion_len +=

i

->second;

388  m_base

.reserve(contig_len+total_deletion_len);

390  for

(

int

p = 0; p < contig_len; ++p) {

392  if

(rslt != deletion_len.

end

()) {

393  int n

= rslt->second;

405  for

(

int

p = 0; p < (

int

)

m_base

.length(); ++p) {

406  if

(

m_base

[p] ==

'-'

) {

409  while

(

r

< reads_num &&

m_starts

[

r

]+dashes == p)

415  for

(

int

ir = 0; ir < reads_num; ++ir) {

422

list<pair<int,int> > indel_pos_length;

424  if

((*indl)->IsDeletion()) {

425  if

(!indel_pos_length.empty() && indel_pos_length.back().first-indel_pos_length.back().second == (*indl)->Loc()) {

426  _ASSERT

(indel_pos_length.back().second < 0);

427  int

new_len = indel_pos_length.back().second+(*indl)->Len();

429

indel_pos_length.back().second = new_len;

430

}

else if

(new_len > 0) {

431

indel_pos_length.back().first = (*indl)->Loc();

432

indel_pos_length.back().second = new_len;

434

indel_pos_length.pop_back();

437

indel_pos_length.push_back(make_pair((*indl)->Loc(), (*indl)->Len()));

440

indel_pos_length.push_back(make_pair((*indl)->Loc(), -(*indl)->Len()));

444

list<pair<int,int> >::iterator indl = indel_pos_length.begin();

445  for

(

int

p = start+1; p <

base_length

&& p < start+(

int

)read.size(); ) {

446  if

(

m_base

[p] ==

'-'

) {

450  int

insertp = p-start;

452  if

(indl != indel_pos_length.end() && indl->second > 0 &&

m_contig_to_align

[indl->first] == p) {

457

read.insert(insertp,

len

,

'-'

);

458

}

else if

(indl != indel_pos_length.end() && indl->second < 0 &&

m_contig_to_align

[indl->first] == p) {

460

read.

insert

(p-start,

n

,

'-'

);

473  for

(

int

ir = 0; ir < reads_num; ++ir) {

475  const string

& read =

m_reads

[ir];

479  for

(

int

p = legit_range.

GetFrom

(); p <= legit_range.

GetTo

(); ++p) {

480  char

c = read[p-start];

487

vector<const CLiteAlign*> all_alignsp;

490  int

aligns_size = (

int

)all_alignsp.size();

511  if

(

Include

(read_range,two_word_range)) {

517

seq_counts[read_seq] += w;

524  const string

& read =

m_reads

[ir];

526  int

end = start+(

int

)read.size()-1;

528  int

first_legit_match = start;

530  while

(shift <

m_min_edge

|| (first_legit_match <= end && (read[first_legit_match-start] ==

'-'

|| read[first_legit_match-start] !=

m_base

[first_legit_match]))) {

531  if

(

m_base

[first_legit_match] !=

'-'

)

535  int

last_legit_match = end;

537  while

(shift <

m_min_edge

|| (last_legit_match >= start && (read[last_legit_match-start] ==

'-'

|| read[last_legit_match-start] !=

m_base

[last_legit_match]))) {

538  if

(

m_base

[last_legit_match] !=

'-'

)

554  string

maximal_bases(

m_base

.size(),

'A'

);

555  for

(

int

p = 0; p < (

int

)

m_base

.size(); ++p) {

558

maximal_bases[p] =

'#'

;

567  if

(

i

->second > w) {

574

maximal_bases[p] =

'#'

;

577

maximal_bases[p] = c;

582  string

prev_strong_word;

583  for

(

int

p = 0; p < (

int

)maximal_bases.size(); ) {

584  if

(maximal_bases[p] ==

'#'

) {

593  if

(strong_word_range.

Empty

())

596

p = strong_word_range.

GetFrom

()+1;

598  bool

same_as_contig =

true

;

599  for

(

int

pos = strong_word_range.

GetFrom

(); pos <= strong_word_range.

GetTo

() && same_as_contig; ++pos)

600

same_as_contig = (maximal_bases[pos] ==

m_base

[pos]);

602  if

(!same_as_contig) {

604

}

else if

(first_gap >= 0) {

608  bool

there_is_weak_range = prev_strong_word_range.

NotEmpty

() && prev_strong_word_range.

GetTo

()+1 < strong_word_range.

GetFrom

();

616  if

(there_is_weak_range) {

619  int

accepted_cross = 0;

620  SeqCountsBetweenTwoStrongWords

(prev_strong_word_range, prev_strong_word, strong_word_range, strong_word, seq_counts, total_cross, accepted_cross);

624  if

(!seq_counts.

empty

()) {

630  if

(

i

->second > most_frequent_variant->second)

631

most_frequent_variant =

i

;

636  string

var_seq = prev_strong_word+it->first+strong_word;

638

var_counts[var_seq] = it->second;

641  if

(!var_counts.

empty

()) {

643  int

base_posr = swordr;

646  if

(var_counts.

size

() == 1 && var_counts.

begin

()->first ==

m_contigt

.substr(base_posl,base_posr-base_posl+1)) {

647

confirmed_ranges.back().SetTo(base_posr);

648

there_is_weak_range =

false

;

651

variations.insert(var);

658

prev_strong_word_range = strong_word_range;

659

prev_strong_word = strong_word;

661  if

(confirmed_ranges.empty() || there_is_weak_range || confirmed_ranges.back().GetTo()+1 < swordl)

664

confirmed_ranges.back().

SetTo

(swordr);

670  const string

& read =

m_reads

[ir];

672  int

stop = start+(

int

)read.size()-1;

675  if

(read[

r

-start] !=

'-'

)

676

read_word.push_back(read[

r

-start]);

683  for

(

int r

= word_range.

GetFrom

();

r

<= word_range.

GetTo

(); ++

r

) {

685

read_word.push_back(

m_base

[

r

]);

697  if

(

Include

(legit_range,word_range)) {

700  if

(word == read_word)

714  while

(nextp < (

int

)maximal_bases.size()) {

716  int

word_start = nextp;

717  int

word_end = nextp;

718  bool

low_complexity =

false

;

719  for

( ; word_end < (

int

)maximal_bases.size() && (

int

)word.size() <

m_word

&& maximal_bases[word_end] !=

'#'

; ++word_end) {

720  if

(maximal_bases[word_end] !=

'-'

) {

721

word.push_back(

toupper

(maximal_bases[word_end]));

722  if

(

islower

(maximal_bases[word_end]))

723

low_complexity =

true

;

727  if

((

int

)word.size() <

m_word

) {

728  if

(maximal_bases[word_end] ==

'#'

) {

729  if

(first_gap < 0) first_gap = word_end;

738  if

(!low_complexity &&

CheckWord

(word_range,word)) {

740

strong_word_range = word_range;

761  m_word

= args[

"word"

].AsInteger();

764  m_maxNs

= args[

"maxNs"

].AsInteger();

int Distance(const char *query, const char *subject) const

TInDels GetInDels(int sstart, const char *const query, const char *subject) const

const TExons & Exons() const

TSignedSeqRange Limits() const

CLiteAlign(TSignedSeqRange range, const TLiteInDels &indels, set< CLiteIndel > &indel_holder, double weight, double ident)

string TranscriptSeq(const string &contig) const

void InsertDashesInBase()

void InsertDashesInReads()

static void SetupArgDescriptions(CArgDescriptions *arg_desc)

int m_min_abs_support_for_variant

bool CheckWord(const TSignedSeqRange &word_range, const string &word)

void SelectAligns(vector< const CLiteAlign * > &all_alignsp)

string EmitSequenceFromBase(const TSignedSeqRange &word_range)

set< CLiteIndel > m_indel_holder

void ProcessArgs(const CArgs &args)

map< int, TCharIntMap > m_counts

void AddAlignment(const SSamData &align)

TAlignModelList GetVariationAlignList(bool correctionsonly)

TSignedSeqRange LegitRange(int ir)

void SetGenomic(const CConstRef< CSeq_id > &seqid, CScope &scope)

vector< const CLiteAlign * > m_alignsp

TIntMap m_contig_to_align

double m_min_rel_support_for_variant

int FindNextStrongWord(int nextp, const string &maximal_bases, string &strong_word, TSignedSeqRange &strong_word_range, int &first_gap)

string EmitSequenceFromRead(int ir, const TSignedSeqRange &word_range)

void SeqCountsBetweenTwoStrongWords(const TSignedSeqRange &prev_strong_word_range, const string &prev_strong_word, const TSignedSeqRange &strong_word_range, const string &strong_word, TSIMap &seq_counts, int &total_cross, int &accepted_cross)

void Variations(map< TSignedSeqRange, TSIMap > &variations, list< TSignedSeqRange > &confirmed_ranges)

double m_strong_consensus

void PrepareReads(const vector< const CLiteAlign * > &all_alignsp)

TIntMap m_align_to_contig

container_type::const_iterator const_iterator

container_type::iterator iterator

const_iterator begin() const

const_iterator end() const

iterator_bool insert(const value_type &val)

container_type::value_type value_type

const_iterator find(const key_type &key) const

iterator_bool insert(const value_type &val)

static int base_length[29]

static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)

double Entropy(const string &seq)

CCigar GlbAlign(const char *query, int querylen, const char *subject, int subjectlen, int gopen, int gapextend, const char delta[256][256])

list< CAlignModel > TAlignModelList

bool Include(TSignedSeqRange big, TSignedSeqRange small)

vector< CInDelInfo > TInDels

#define ITERATE(Type, Var, Cont)

ITERATE macro to sequence through container elements.

int TSignedSeqPos

Type for signed sequence position.

#define NON_CONST_ITERATE(Type, Var, Cont)

Non constant version of ITERATE macro.

void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)

Add description for optional key with default value.

@ eDouble

Convertible into a floating point number (double)

@ eInteger

Convertible into an integer number (int or Int8)

string GetSeqIdString(bool with_version=false) const

Return seqid string with optional version for text seqid type.

CBioseq_Handle GetBioseqHandle(const CSeq_id &id)

Get bioseq handle by seq-id.

CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const

Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.

@ eCoding_Iupac

Set coding to printable coding (Iupacna or Iupacaa)

void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const

Fill the buffer string with the sequence data for the interval [start, stop).

position_type GetLength(void) const

bool NotEmpty(void) const

static TThisType GetEmpty(void)

CRange< TSignedSeqPos > TSignedSeqRange

#define END_NCBI_SCOPE

End previously defined NCBI scope.

#define END_SCOPE(ns)

End the previously defined scope.

#define BEGIN_NCBI_SCOPE

Define ncbi namespace.

#define BEGIN_SCOPE(ns)

Define a new scope.

static string IntToString(int value, TNumToStringFlags flags=0, int base=10)

Convert int to string.

void SetFrom(TFrom value)

Assign a value to From data member.

TTo GetTo(void) const

Get the To member data.

TFrom GetFrom(void) const

Get the From member data.

void SetTo(TTo value)

Assign a value to To data member.

unsigned int

A callback function used to compare two keys in a database.

constexpr auto sort(_Init &&init)

const struct ncbi::grid::netcache::search::fields::SIZE size

Int4 delta(size_t dimension_, const Int4 *score_)

double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)

static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)

static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)

TLiteInDels GroupInDels(const TLiteInDels &sam_indels)

#define ENTROPY_LEVEL_FOR_ALIGNS

vector< const CLiteIndel * > TLiteInDelsP

vector< CLiteIndel > TLiteInDels

list< CLiteAlign > TLiteAlignList


RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4