( sparce_aln, threshold );
119 x_Init( spliced_seg, scope, threshold );
127 if( input_aligns.size() == 1 && input_aligns[0]->GetSegs().IsSpliced() ){
129 const_cast<CSpliced_seg*
>( &input_aligns[0]->GetSegs().GetSpliced() )
136 if( input_aligns.size() > 0 ){
138container.
insert( **ait );
153 LOG_POST(
Error<<
"Skipping this alignment: "<< e.what() );
162 LOG_POST(
Error<<
"Input alignments cannot be anchored because they don't share at least one common sequence.");
165 eUnknown,
"Input alignments cannot be anchored because they don't share at least one common sequence." 190 x_Init( *sparse_aln, threshold );
241*handle -> GetBioseqHandle().
GetSeqId(),
367vector< IAlnSegmentIterator* > iters;
385vector<bool> seq_touched(
m_NumSeqs,
false);
399 while(a_range.
GetTo() < (
int)pos && ++it) {
400 if(!seq_touched[
row])
401seq_touched[
row] =
true;
412}
else if(too < next_pos) {
424vector<string> seq_spans;
431 boolseg_has_gap =
false;
432 boolseg_has_unaln =
false;
433 boolseg_is_translated =
false;
448seg_has_unaln = seq_touched[
row];
464 m_SparseAlns[0]->GetAlnSeqString(
row, seq_span, seq_range, seg_is_translated);
467 if(seg_is_translated) {
470seq_span.swap(new_seq);
474 if(!seq_span.empty()) {
475seq_spans.push_back(seq_span);
480 if(seq_spans.size() <= 1) {
482 if(seq_spans.empty()) {
483 LOG_POST(
Warning<<
"All gaps/discontinuity for aln range [ "<< pos <<
", "<< (next_pos - 1) <<
"]");
492 if(newRow.
type== 0) {
501aln_spans.push_back(newRow);
519 intlength = next_pos - pos;
521 int size= (
int)seq_spans.size();
530 boolf_mismatch =
false;
531 for(
intj = 1; j <
size; j++) {
532 _ASSERT(
i< (
int)seq_spans[j].length());
533 if(seq_spans[j][
i] != seq_spans[0][
i]) {
541 if(new_span_type != f_span_type) {
550 if(span_start >
i- 1)
551 LOG_POST(
Error<<
"Miscalculating span: pos="<< pos <<
", start="<< span_start <<
", oend="<<
i);
552newRow.
type= f_span_type;
555aln_spans.push_back(newRow);
559f_span_type = new_span_type;
561}
while(
i< length);
566 for(
introw_to_delete = 0; row_to_delete <
m_NumSeqs; row_to_delete++) {
567 deleteiters[row_to_delete];
572TAlnSpans::iterator iter = aln_spans.begin();
573 for(; iter != aln_spans.end();) {
589 if(from == -1 || to == -1) {
598}
else if(iter->type &
fGap) {
619vector<bool> seq_strand(
m_NumSeqs,
true);
622 for(
intseq_ix = 0; seq_ix <
m_NumSeqs; seq_ix++) {
623seq_strand[seq_ix] =
m_SparseAlns[0]->IsPositiveStrand(seq_ix);
626 for(
intseq_ix = 0; seq_ix <
m_NumSeqs; seq_ix++) {
631vector<TSignedSeqPos> seq_pos(
m_NumSeqs, -1);
636 for(
intseq_ix = 0; seq_ix <
m_NumSeqs; seq_ix++) {
639 if(!span_row.
ranges[seq_ix].Empty()) {
645 stringsplice3, splice5;
647 if(seq_strand[seq_ix]) {
649 if(cur_seq_pos > 0 && cur_span_from > cur_seq_pos) {
653gap_row.
ranges[seq_ix] =
665seq_ix, intron, seq_pos[seq_ix], cur_span_from - 1
667 if(intron.length() > 1) {
668 stringsplice5 = intron.substr(0, 2);
669 stringsplice3 = intron.substr(intron.length() - 2);
677iter = rows.insert(iter, gap_row);
680seq_pos[seq_ix] = cur_span_to + 1;
684 if(cur_seq_pos > 0 && cur_span_to < cur_seq_pos) {
688gap_row.
ranges[seq_ix] =
700seq_ix, intron, cur_span_to + 1, seq_pos[seq_ix]
702 if(intron.length() > 1) {
703 stringsplice5 = intron.substr(0, 2);
704 stringsplice3 = intron.substr(intron.length() - 2);
712iter = rows.insert(iter, gap_row);
715seq_pos[seq_ix] = cur_span_from - 1;
731TAlnSpans::iterator iter = rows.begin();
732TAlnSpans::iterator
prev= rows.end();
734 for(; iter != rows.end();
prev= iter, ++iter) {
738|| iter->type ==
fTail 747 boolmerge_prev =
false;
748 boolmerge_next =
false;
750TAlnSpans::iterator
next= iter + 1;
771 if(
row>= (
int)merged_row.
ranges.size()) {
775}
else if(
row>= (
int)
prev->ranges.size()) {
793 if(
row>= (
int)merged_row.
ranges.size()) {
797}
else if(
row>= (
int)
next->ranges.size()) {
806TAlnSpans::iterator from = merge_prev ? iter :
next;
807TAlnSpans::iterator to = merge_next ? (
next+ 1) :
next;
809rows.erase(from, to);
826TAlnSpans::iterator iter = rows.begin();
827 for(; iter != rows.end();) {
828 if(pos < (
int)iter->aln_range.GetFrom()) {
841pos = iter->aln_range.GetTo() + 1;
863 unsignedwarning_cnt = 0;
864TAlnSpans::iterator iter =
m_AlnSpans.begin();
867 if(iter->length == 0) {
868iter->length = iter->aln_range.GetLength() /
m_BaseWidth;
870 if(iter->length == 0) {
872 if(warning_cnt < 10) {
875 if(warning_cnt == 10) {
876 LOG_POST(
Warning<<
"Zero length span! (Further warnings supressed)");
888((double)(iter->length - iter->mismatch)) / iter->length
890iter->identity = floor(identity * 10000 + 0.5) / 100.0;
893iter->identity = 0.0;
921 if(from == -1 || to == -1) {
930}
else if(iter->type &
fGap) {
956span_row.
locs.push_back(loc);
964loc->
SetInt().SetStrand(
969loc->
SetInt().SetStrand(
974span_row.
locs.push_back(loc);
983 intproduct_length = 0;
1003 if( poly_a < start ){
1011span_rows.push_back( newRow );
1013 if( poly_a < start -1 ){
1018newRow.
length= start - 1 - poly_a;
1021span_rows.push_back( newRow );
1033newRow.
length= start - 1;
1036span_rows.push_back( newRow );
1061 switch(chunk.
Which()) {
1073span_rows.push_back(newRow);
1075prod_pos += prod_len;
1089span_rows.push_back(newRow);
1091prod_pos += prod_len;
1108 if(aln_to < aln_from)
1109 swap(aln_to, aln_from);
1115from += 3 - from % 3;
1119}
else if(off == 2) {
1129 m_SparseAlns[0]->GetAlnSeqString(1, p_str, seq_range,
false);
1131 _ASSERT(g_str.size() == p_str.size());
1132 for(
auto i= 0;
i< g_str.size(); ++
i) {
1133 if(
i< p_str.size()) {
1134 if(g_str[
i] != p_str[
i])
1137newRow.
mismatch+= (g_str.size() - p_str.size());
1149span_rows.push_back(newRow);
1151prod_pos += prod_len;
1166newRow.
length= prod_len;
1167span_rows.push_back( newRow );
1169prod_pos += prod_len;
1179span_rows.push_back( newRow );
1198 if( poly_a >= product_length ){
1203 if( stop < poly_a ){
1204 if( stop+1 < poly_a ){
1209newRow.
length= (poly_a - stop) + 1;
1211span_rows.push_back( newRow );
1218newRow.
length= (product_length - poly_a);
1220span_rows.push_back( newRow );
1226 if( stop < product_length - 1){
1230newRow.
length= product_length - stop;
1233span_rows.push_back( newRow );
1237 for(
auto&& aln_row : span_rows) {
1238 if(!aln_row.ranges.empty() && aln_row.ranges[0].NotEmpty()) {
1239 TSeqPosaln_from =
m_SparseAlns[0]->GetAlnPosFromSeqPos((
int)0, aln_row.ranges[0].GetFrom());
1240 TSeqPosaln_to =
m_SparseAlns[0]->GetAlnPosFromSeqPos((
int)0, aln_row.ranges[0].GetTo());
1241 if(aln_to < aln_from)
1242 swap(aln_to, aln_from);
1243aln_row.aln_range.Set(aln_from, aln_to + 1);
1316 return static_cast<int>(
m_AlnSpans.size());
1322 return static_cast<int>(
m_ColNames.size());
1337 return wxT(
"string");
1344 return wxT(
"int");
1347 return wxT(
"double");
1350 return wxT(
"string");
1355 return wxT(
"string");
1359 static void s_Append(
string& dst,
const string& src)
1361 if( !dst.empty() ) {
1374 eUnknown,
"CAlnSpanVertModel::GetValueAt(): Array index out-of-bounds" 1382 switch( extra_col ){
1420 returnwxVariant( spanRow.
length);
1423 returnwxVariant( spanRow.
mismatch);
1426 returnwxVariant( spanRow.
gap);
1429 returnwxVariant( spanRow.
identity);
1436|| col >= (
int)spanRow.
ranges.size()
1437|| spanRow.
ranges[col].Empty()
1480 eUnknown,
"CAlnSpanVertModel::GetData(): Array index out-of-bounds"User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void BuildAln(TAnchoredAlnVec &in_alns, CAnchoredAln &out_aln, const CAlnUserOptions &options, TAlnSeqIdIRef pseudo_seqid=TAlnSeqIdIRef())
Build anchored alignment from a set of alignmnets.
void CreateAnchoredAlnVec(_TAlnStats &aln_stats, TAnchoredAlnVec &out_vec, const CAlnUserOptions &options)
Create anchored alignment from each seq-align in the stats.
static void s_Append(string &dst, const string &src)
const_iterator insert(const CSeq_align &seq_align)
Insert new CSeq_align into the list.
size_type size(void) const
Container mapping seq-aligns to vectors of participating seq-ids.
void push_back(const CSeq_align &aln)
Adding an alignment.
void x_ConvertRowsToSpans(TAlnSpans &rows)
finally, convert our spans into rows for display
void x_MergeRows(TAlnSpans &rows)
CRange< TSignedSeqPos > m_AlnRange
void SetThreshold(int th)
virtual int GetNumColumns() const
Returns the number of columns in the model.
TAlnSpans m_AlnSpans
the alignments we represent
void SetShowIndels(bool indel)
virtual wxVariant GetValueAt(int i, int j) const
vector< string > m_ColNames
const SSpanRow & GetData(size_t row) const
access a given row's data
void x_PopulateRows(TAlnSpans &rows)
virtual int GetNumRows() const
Returns the number of rows in the model.
virtual wxString GetColumnName(int aColIx) const
Returns a default name for the column using spreadsheet conventions: A, B, C, ...
CRef< CSpliced_seg > m_SplicedSeg
vector< CRef< CSparseAln > > m_SparseAlns
virtual wxString GetColumnType(int aColIx) const
Tries to extract actual type from row 0 value if it exists.
vector< SSpanRow > TAlnSpans
void x_InsertIntrons(TAlnSpans &rows)
CAnchoredAln m_AnchoredAln
CRef< IAlnMultiDataSource > m_AlnSrc
void x_PopulateRowsSpliced(TAlnSpans &rows)
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows,...
bool CanBeAnchored(void) const
Check if there are any ids which can be used as anchors for the whole set of alignments.
Options for different alignment manager operations.
@ ePreserveRows
Preserve all rows as they were in the input (e.g.
Query-anchored alignment can be 2 or multi-dimentional.
TDim GetDim(void) const
How many rows.
static void TranslateNAToAA(const string &na, string &aa, int gen_code=kDefaultGenCode)
TSeqPos GetSeqStop(TDim row) const
TDim CheckNumRows(void) const
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
virtual void x_FireDataChanged()
IAlignRowHandle provides an abstract way to access alignment row data.
virtual bool UsesAATranslation() const =0
virtual const objects::CBioseq_Handle & GetBioseqHandle() const =0
virtual bool CanGetSeqString() const =0
virtual string & GetAlnSeqString(string &buffer, const IAlnExplorer::TSignedRange &aln_rng) const =0
IAlnMultiDataSource - interface to a data source representing an abstract multiple alignment.
virtual IAlnExplorer::EAlignType GetAlignType() const =0
virtual TSignedSeqPos GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, IAlnExplorer::ESearchDirection dir=IAlnExplorer::eNone, bool try_reverse_dir=true) const =0
virtual const IAlignRowHandle * GetRowHandle(TNumrow row) const =0
virtual bool IsPositiveStrand(TNumrow row) const =0
virtual TSeqPos GetAlnStart(void) const =0
virtual bool CanGetId(TNumrow row) const =0
virtual IAlnSegmentIterator * CreateSegmentIterator(TNumrow row, const IAlnExplorer::TSignedRange &range, IAlnSegmentIterator::EFlags flags) const =0
virtual TNumrow GetNumRows(void) const =0
number of rows in alignment
virtual TSeqPos GetAlnStop(void) const =0
virtual const objects::CSeq_id & GetSeqId(TNumrow row) const =0
Alignment segment iterator interface.
@ eAllSegments
Iterate all segments.
@ fIndel
Either anchor or the selected row is not present in the segment.
@ fGap
Both anchor row and the selected row are not included in the segment (some other row is present and t...
virtual TSegTypeFlags GetType(void) const =0
Get current segment type.
virtual const TSignedRange & GetAlnRange(void) const =0
Get alignment range for the segment.
virtual int GetGenCode(IAlnExplorer::TNumrow row) const =0
bool IsConsensusSplice(const string &splice5, const string &splice3)
Consensus splice is GY..AG or AT..AC.
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
position_type GetLength(void) const
position_type GetToOpen(void) const
static position_type GetPositionMax(void)
static TThisType GetEmpty(void)
TThisType & Set(position_type from, position_type to)
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
CRange< TSignedSeqPos > TSignedSeqRange
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
@ fWithCommas
Use commas as thousands separator.
static const char label[]
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
const TProtpos & GetProtpos(void) const
Get the variant data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
TMatch GetMatch(void) const
Get the variant data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
TAmin GetAmin(void) const
Get the Amin member data.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
TNucpos GetNucpos(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
unsigned int
A callback function used to compare two keys in a database.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n th
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
vector< CRef< CAnchoredAln > > TAnchoredAlnVec
Collection of anchored alignments.
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
#define row(bind, expected)
the alignment we store with its parsed data
vector< CConstRef< objects::CSeq_loc > > locs
vector< string > str_ranges
visible values
vector< TSignedSeqRange > ranges
wxString ToWxString(const string &s)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4