indels.reserve(sam_indels.size());
23 if(indl->GetInDelV().empty())
24insertlen = indl->Len();
26deletion = indl->GetInDelV();
27}
else if(loc+insertlen == indl->Loc()) {
28 if(indl->GetInDelV().empty())
29insertlen += indl->Len();
31deletion += indl->GetInDelV();
34indels.push_back(
CLiteIndel(loc, insertlen));
35 if(!deletion.empty())
36indels.push_back(
CLiteIndel(loc+insertlen,(
int)deletion.size(),deletion));
39 if(indl->GetInDelV().empty()) {
40insertlen = indl->Len();
43deletion = indl->GetInDelV();
50indels.push_back(
CLiteIndel(loc, insertlen));
51 if(!deletion.empty())
52indels.push_back(
CLiteIndel(loc+insertlen,(
int)deletion.size(),deletion));
59m_weight(
weight), m_ident(ident), m_range(range) {
70 size_tfirst_element = cigar.find_first_not_of(
"0123456789");
71 if(first_element != string::npos && cigar[first_element] ==
'I')
72cigar[first_element] =
'S';
73 if(cigar[cigar.size()-1] ==
'I')
74cigar[cigar.size()-1] =
'S';
83istringstream istr_cigar(cigar);
86 const string& seq = ad.
m_seq;
87 while(istr_cigar >>
len>> c) {
95 for(
int l= 0;
l<
len; ++
l) {
96 if(seq[seq_pos] != contig[gstop]) {
98sam_indels.push_back(
CLiteIndel(gstop+1,1,seq.substr(seq_pos,1)));
109seq_pos +=
len; align_len +=
len; gstop +=
len;
116seq_pos +=
len; align_len +=
len; gstop +=
len;
121seq_pos +=
len; align_len +=
len;
126align_len +=
len; gstop +=
len;
130 throwruntime_error(
"Alignments can't have introns");
138 m_ident= (double)matches/align_len;
154 stringseq = contig.substr(
l,
r-
l);
165seq += contig.substr(
l,
r-
l);
186 intlength (sv.
size());
200 for(
int i= 0;
i< (
int)align.
Exons().size(); ++
i) {
204 if(
i->IntersectingWith(exon_lim.
GetFrom(), exon_lim.
GetTo()))
205corrections.push_back(*
i);
207 while(!corrections.empty() && corrections.front().Loc() == exon_lim.
GetFrom()) {
208exon_lim.
SetFrom(corrections.front().InDelEnd());
209corrections.erase(corrections.begin());
211 while(!corrections.empty() && corrections.back().InDelEnd() == exon_lim.
GetTo()+1) {
212exon_lim.
SetTo(corrections.back().Loc()-1);
213corrections.pop_back();
222errors += indl->Len();
223 if(indl->IsDeletion())
224align_len += indl->Len();
225 if(!indl->IsInsertion()) {
226 strings = indl->GetInDelV();
227ns +=
count(s.begin(), s.end(),
'N');
235 if(indl->IsMismatch()) {
236indels.push_back(
CLiteIndel(indl->Loc(), indl->Len()));
237indels.push_back(
CLiteIndel(indl->InDelEnd(), indl->Len(), indl->GetInDelV()));
238}
else if(indl->IsInsertion()) {
239indels.push_back(
CLiteIndel(indl->Loc(), indl->Len()));
241indels.push_back(
CLiteIndel(indl->Loc(), indl->Len(), indl->GetInDelV()));
254map<TSignedSeqRange,TSIMap> variations;
255list<TSignedSeqRange> confirmed_ranges;
260 ITERATE(list<TSignedSeqRange>,
i, confirmed_ranges) {
267aligns.back().SetTargetId(*
id);
272 TSIMap& seq_counts =
i->second;
274 if(!correctionsonly) {
277 const string& seq = j->first;
278 int count= j->second;
288aligns.back().SetTargetId(*
id);
294 doubleselected_weight = 0;
296 const string& seq = j->first;
297 int count= j->second;
300 if(dist < selected_dist || (dist == selected_dist &&
count> selected_weight)) {
301selected_cigar = cigar;
303selected_dist = dist;
304selected_weight =
count;
311 a.SetWeight(selected_weight);
314aligns.back().SetTargetId(*
id);
322 #define ENTROPY_LEVEL_FOR_ALIGNS 0.51 327 stringread = al->TranscriptSeq(
m_contigt);
328 stringbase =
m_contigt.substr(al->Limits().GetFrom(),al->Limits().GetLength());
332all_alignsp.push_back(&(*al));
339 for(
intir = 0; ir < (
int)all_alignsp.size(); ++ir) {
353 for(
intir = 0; ir < reads_num; ++ir) {
354 for(
intp =
m_alignsp[ir]->Limits().GetFrom(); p <=
m_alignsp[ir]->Limits().GetTo(); ++p)
358 if((*indl)->IsDeletion()) {
359 int len= (*indl)->Len();
360 if(indl != indels.begin()) {
361TLiteInDelsP::const_iterator
prev= indl-1;
362 if((*prev)->IsInsertion() && (*prev)->Loc()+(*prev)->Len() == (*indl)->Loc())
363 len-= (*prev)->Len();
366deletion_len[(*indl)->Loc()] =
max(
len, deletion_len[(*indl)->Loc()]);
373 intcontigp =
i->first;
374 if(del != deletion_len.
end() && contigp == del->first) {
375 intdel_len = (del++)->second;
376 for(
int l= 0;
l< del_len; ++
l)
379 intalignp = contigp+shift;
384 inttotal_deletion_len = 0;
386total_deletion_len +=
i->second;
388 m_base.reserve(contig_len+total_deletion_len);
390 for(
intp = 0; p < contig_len; ++p) {
392 if(rslt != deletion_len.
end()) {
393 int n= rslt->second;
405 for(
intp = 0; p < (
int)
m_base.length(); ++p) {
406 if(
m_base[p] ==
'-') {
409 while(
r< reads_num &&
m_starts[
r]+dashes == p)
415 for(
intir = 0; ir < reads_num; ++ir) {
422list<pair<int,int> > indel_pos_length;
424 if((*indl)->IsDeletion()) {
425 if(!indel_pos_length.empty() && indel_pos_length.back().first-indel_pos_length.back().second == (*indl)->Loc()) {
426 _ASSERT(indel_pos_length.back().second < 0);
427 intnew_len = indel_pos_length.back().second+(*indl)->Len();
429indel_pos_length.back().second = new_len;
430}
else if(new_len > 0) {
431indel_pos_length.back().first = (*indl)->Loc();
432indel_pos_length.back().second = new_len;
434indel_pos_length.pop_back();
437indel_pos_length.push_back(make_pair((*indl)->Loc(), (*indl)->Len()));
440indel_pos_length.push_back(make_pair((*indl)->Loc(), -(*indl)->Len()));
444list<pair<int,int> >::iterator indl = indel_pos_length.begin();
445 for(
intp = start+1; p <
base_length&& p < start+(
int)read.size(); ) {
446 if(
m_base[p] ==
'-') {
450 intinsertp = p-start;
452 if(indl != indel_pos_length.end() && indl->second > 0 &&
m_contig_to_align[indl->first] == p) {
457read.insert(insertp,
len,
'-');
458}
else if(indl != indel_pos_length.end() && indl->second < 0 &&
m_contig_to_align[indl->first] == p) {
460read.
insert(p-start,
n,
'-');
473 for(
intir = 0; ir < reads_num; ++ir) {
475 const string& read =
m_reads[ir];
479 for(
intp = legit_range.
GetFrom(); p <= legit_range.
GetTo(); ++p) {
480 charc = read[p-start];
487vector<const CLiteAlign*> all_alignsp;
490 intaligns_size = (
int)all_alignsp.size();
511 if(
Include(read_range,two_word_range)) {
517seq_counts[read_seq] += w;
524 const string& read =
m_reads[ir];
526 intend = start+(
int)read.size()-1;
528 intfirst_legit_match = start;
530 while(shift <
m_min_edge|| (first_legit_match <= end && (read[first_legit_match-start] ==
'-'|| read[first_legit_match-start] !=
m_base[first_legit_match]))) {
531 if(
m_base[first_legit_match] !=
'-')
535 intlast_legit_match = end;
537 while(shift <
m_min_edge|| (last_legit_match >= start && (read[last_legit_match-start] ==
'-'|| read[last_legit_match-start] !=
m_base[last_legit_match]))) {
538 if(
m_base[last_legit_match] !=
'-')
554 stringmaximal_bases(
m_base.size(),
'A');
555 for(
intp = 0; p < (
int)
m_base.size(); ++p) {
558maximal_bases[p] =
'#';
567 if(
i->second > w) {
574maximal_bases[p] =
'#';
577maximal_bases[p] = c;
582 stringprev_strong_word;
583 for(
intp = 0; p < (
int)maximal_bases.size(); ) {
584 if(maximal_bases[p] ==
'#') {
593 if(strong_word_range.
Empty())
596p = strong_word_range.
GetFrom()+1;
598 boolsame_as_contig =
true;
599 for(
intpos = strong_word_range.
GetFrom(); pos <= strong_word_range.
GetTo() && same_as_contig; ++pos)
600same_as_contig = (maximal_bases[pos] ==
m_base[pos]);
602 if(!same_as_contig) {
604}
else if(first_gap >= 0) {
608 boolthere_is_weak_range = prev_strong_word_range.
NotEmpty() && prev_strong_word_range.
GetTo()+1 < strong_word_range.
GetFrom();
616 if(there_is_weak_range) {
619 intaccepted_cross = 0;
620 SeqCountsBetweenTwoStrongWords(prev_strong_word_range, prev_strong_word, strong_word_range, strong_word, seq_counts, total_cross, accepted_cross);
624 if(!seq_counts.
empty()) {
630 if(
i->second > most_frequent_variant->second)
631most_frequent_variant =
i;
636 stringvar_seq = prev_strong_word+it->first+strong_word;
638var_counts[var_seq] = it->second;
641 if(!var_counts.
empty()) {
643 intbase_posr = swordr;
646 if(var_counts.
size() == 1 && var_counts.
begin()->first ==
m_contigt.substr(base_posl,base_posr-base_posl+1)) {
647confirmed_ranges.back().SetTo(base_posr);
648there_is_weak_range =
false;
651variations.insert(var);
658prev_strong_word_range = strong_word_range;
659prev_strong_word = strong_word;
661 if(confirmed_ranges.empty() || there_is_weak_range || confirmed_ranges.back().GetTo()+1 < swordl)
664confirmed_ranges.back().
SetTo(swordr);
670 const string& read =
m_reads[ir];
672 intstop = start+(
int)read.size()-1;
675 if(read[
r-start] !=
'-')
676read_word.push_back(read[
r-start]);
683 for(
int r= word_range.
GetFrom();
r<= word_range.
GetTo(); ++
r) {
685read_word.push_back(
m_base[
r]);
697 if(
Include(legit_range,word_range)) {
700 if(word == read_word)
714 while(nextp < (
int)maximal_bases.size()) {
716 intword_start = nextp;
717 intword_end = nextp;
718 boollow_complexity =
false;
719 for( ; word_end < (
int)maximal_bases.size() && (
int)word.size() <
m_word&& maximal_bases[word_end] !=
'#'; ++word_end) {
720 if(maximal_bases[word_end] !=
'-') {
721word.push_back(
toupper(maximal_bases[word_end]));
722 if(
islower(maximal_bases[word_end]))
723low_complexity =
true;
727 if((
int)word.size() <
m_word) {
728 if(maximal_bases[word_end] ==
'#') {
729 if(first_gap < 0) first_gap = word_end;
738 if(!low_complexity &&
CheckWord(word_range,word)) {
740strong_word_range = word_range;
761 m_word= args[
"word"].AsInteger();
764 m_maxNs= args[
"maxNs"].AsInteger();
int Distance(const char *query, const char *subject) const
TInDels GetInDels(int sstart, const char *const query, const char *subject) const
const TExons & Exons() const
TSignedSeqRange Limits() const
CLiteAlign(TSignedSeqRange range, const TLiteInDels &indels, set< CLiteIndel > &indel_holder, double weight, double ident)
string TranscriptSeq(const string &contig) const
void InsertDashesInBase()
void InsertDashesInReads()
static void SetupArgDescriptions(CArgDescriptions *arg_desc)
int m_min_abs_support_for_variant
bool CheckWord(const TSignedSeqRange &word_range, const string &word)
void SelectAligns(vector< const CLiteAlign * > &all_alignsp)
string EmitSequenceFromBase(const TSignedSeqRange &word_range)
set< CLiteIndel > m_indel_holder
void ProcessArgs(const CArgs &args)
map< int, TCharIntMap > m_counts
void AddAlignment(const SSamData &align)
TAlignModelList GetVariationAlignList(bool correctionsonly)
TSignedSeqRange LegitRange(int ir)
void SetGenomic(const CConstRef< CSeq_id > &seqid, CScope &scope)
vector< const CLiteAlign * > m_alignsp
TIntMap m_contig_to_align
double m_min_rel_support_for_variant
int FindNextStrongWord(int nextp, const string &maximal_bases, string &strong_word, TSignedSeqRange &strong_word_range, int &first_gap)
string EmitSequenceFromRead(int ir, const TSignedSeqRange &word_range)
void SeqCountsBetweenTwoStrongWords(const TSignedSeqRange &prev_strong_word_range, const string &prev_strong_word, const TSignedSeqRange &strong_word_range, const string &strong_word, TSIMap &seq_counts, int &total_cross, int &accepted_cross)
void Variations(map< TSignedSeqRange, TSIMap > &variations, list< TSignedSeqRange > &confirmed_ranges)
double m_strong_consensus
void PrepareReads(const vector< const CLiteAlign * > &all_alignsp)
TIntMap m_align_to_contig
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
iterator_bool insert(const value_type &val)
container_type::value_type value_type
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
static int base_length[29]
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
double Entropy(const string &seq)
CCigar GlbAlign(const char *query, int querylen, const char *subject, int subjectlen, int gopen, int gapextend, const char delta[256][256])
list< CAlignModel > TAlignModelList
bool Include(TSignedSeqRange big, TSignedSeqRange small)
vector< CInDelInfo > TInDels
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
@ eDouble
Convertible into a floating point number (double)
@ eInteger
Convertible into an integer number (int or Int8)
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
position_type GetLength(void) const
bool NotEmpty(void) const
static TThisType GetEmpty(void)
CRange< TSignedSeqPos > TSignedSeqRange
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
unsigned int
A callback function used to compare two keys in a database.
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
TLiteInDels GroupInDels(const TLiteInDels &sam_indels)
#define ENTROPY_LEVEL_FOR_ALIGNS
vector< const CLiteIndel * > TLiteInDelsP
vector< CLiteIndel > TLiteInDels
list< CLiteAlign > TLiteAlignList
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4