m_splign_results(splign.GetResult())
86 const charkSeqId_not_set [] =
"lcl|ID_not_set";
114 const stringquerystr (use_fasta_style_ids?
117 const stringsubjstr (use_fasta_style_ids?
123 for(
size_t i(0), seg_dim (ii->m_Segments.size());
i< seg_dim; ++
i) {
127oss << (ii->m_QueryStrand?
'+':
'-')
132oss << seg.
m_idty<<
'\t';
138oss << seg.
m_len<<
'\t' 139<< seg.
m_box[0] + 1 <<
'\t'<< seg.
m_box[1] + 1 <<
'\t';
142oss << seg.
m_box[2] + 1 <<
'\t' 143<< seg.
m_box[3] + 1 <<
'\t';
153 if(print_exon_scores) {
162 else if(
i== seg_dim - 1) {
169 if(print_exon_scores) {
177 const boolpolya_present (ii->m_PolyA > 0 && ii->m_PolyA < ii->m_QueryLen);
184 if(ii->m_QueryStrand) {
185polya_len = ii->m_QueryLen - ii->m_PolyA;
188start = 1 + ii->m_PolyA;
189stop = ii->m_QueryLen;
192polya_len = 1 + ii->m_PolyA;
199oss << c1 << ii->m_Id <<
'\t'<< querystr <<
'\t'<< subjstr
200<<
"\t-\t"<< polya_len <<
'\t';
201oss << start <<
'\t'<< stop
202<<
"\t-\t-\t<poly-"<< c2 <<
">\t-";
203 if(print_exon_scores) {
218 copy(strx.begin(), strx.end(), s.begin() + 9 - strx.size());
233 const size_textra_chars = 5;
235 const stringkNotSet (
"id_not_set");
244 stringquery_sequence_sense;
253 const stringkTenner (10,
' ');
261 const boolqstrand = ii->m_QueryStrand;
262 const boolsstrand = ii->m_SubjStrand;
263 const charqc = qstrand?
'+':
'-';
264 const charsc = sstrand?
'+':
'-';
266oss << endl << '>
' << qc << ii->m_Id << '\t' 267 << querystr << '(
' << qc << ")\t" 268 << subjstr << '(
' << sc << ')
' << endl; 270 size_t exons_total = 0; 271 ITERATE(CSplign::TSegments, jj, ii->m_Segments) { 277 string query_sequence; 279 query_sequence = query_sequence_sense; 282 query_sequence.resize(query_sequence_sense.size()); 283 transform(query_sequence_sense.rbegin(), query_sequence_sense.rend(), 284 query_sequence.begin(),SCompliment()); 287 string cds_sequence, query_protein; 288 if(ii->m_Cds_start < ii->m_Cds_stop) { 290 cds_sequence.resize(ii->m_Cds_stop - ii->m_Cds_start + 1); 291 copy(query_sequence.begin() + ii->m_Cds_start, 292 query_sequence.begin() + ii->m_Cds_stop + 1, 293 cds_sequence.begin()); 294 CSeqTranslator::Translate(cds_sequence, query_protein); 297 size_t exon_count = 0, seg_count = 0; 298 int query_aa_idx (0); 299 int qframe(ii->m_Cds_start < ii->m_Cds_stop? -2: -3); 300 ITERATE(CSplign::TSegments, jj, ii->m_Segments) { 302 const CSplign::TSegment & s (*jj); 305 size_t qbeg = s.m_box[0]; 306 size_t qfin = s.m_box[1]; 307 size_t sbeg = s.m_box[2]; 308 size_t sfin = s.m_box[3]; 319 if(exon_count + 1 < exons_total) { 350 sv_subj.GetSeqData(sv_subj.begin() + TSeqPos(s0), 351 sv_subj.begin() + TSeqPos(s1 + 1), str); 352 vector<char> subj (str.size()); 354 copy(str.begin(), str.end(), subj.begin()); 357 reverse(str.begin(), str.end()); 358 transform(str.begin(), str.end(), 359 subj.begin(), SCompliment()); 363 const size_t Q0 (q0); 364 q0 = query_sequence.size() - q1 - 1; 365 q1 = query_sequence.size() - Q0 - 1; 368 vector<char> query (q1 - q0 + 1); 369 copy(query_sequence.begin() + q0, query_sequence.begin() + q1 + 1, 372 const bool do_print (segnum == -1 || int(seg_count) == segnum); 375 oss << endl << " Exon " << (exon_count + 1) << " (" 376 << (1 + s.m_box[0]) << '-
' << (1 + s.m_box[1]) << ',
' 377 << (1 + s.m_box[2]) << '-
' << (1 + s.m_box[3]) << ") " 378 << "Len = " << s.m_len << ' ' 379 << "Identity = " << s.m_idty << endl; 387 MakeLeftHeader(qbeg + 1, &l1); 388 MakeLeftHeader(sbeg + 1, &l3); 392 trans.assign(extra_chars, '#
'); 394 trans.append(s.m_details); 395 if(exon_count + 1 < exons_total) { 396 trans.append(extra_chars, '#
'); 399 for(size_t t = 0, td = trans.size(), iq = 0, is = 0; t < td; ++t) { 401 char c = trans[t], c1, c2, c3, c0, c4; 403 if(qframe == -2 && q0 + iq == ii->m_Cds_start) { 407 if(qframe >= 0 && q0 + iq >= ii->m_Cds_stop) { 444 NCBI_THROW(CAlgoAlignException, 446 g_msg_UnknownTranscriptSymbol + c); 450 if(qframe >= -1 && (c == 'M' || c == 'R' || c == 'D')) { 451 qframe = (qframe + 1) % 3; 453 if(c != '#
' && c != 'I
' && qframe == 1) { 454 c0 = query_protein[query_aa_idx++]; 462 if(l1.size() == 10 + line_width) { 465 oss << l0 << endl << l1 << endl 466 << l2 << endl << l3 << endl << endl; 469 l0 = l1 = l2 = l3 = kTenner; 485 MakeLeftHeader(q0 + 1, &l1); 486 MakeLeftHeader(s0 + 1, &l3); 493 oss << l0 << endl << l1 << endl << l2 << endl << l3 << endl; 496 l0 = l1 = l2 = l3 = kTenner; 502 if(qframe >= 0) qframe = -3; // disable further translation 509 return CNcbiOstrstreamToString(oss); 514 double CalcIdentity(const string& transcript) 517 ITERATE(string, ii, transcript) { 519 ++matches; // std::count() not supported by some compilers 522 return double(matches) / transcript.size(); 526 CRef<CSeq_align> CSplignFormatter::x_Compartment2SeqAlign ( 527 const vector<size_t>& boxes, 528 const vector<string>& transcripts, 529 const vector<float>& scores ) const 531 const size_t num_exons (boxes.size() / 4); 533 CRef<CSeq_align> sa (new CSeq_align); 537 // this is a discontinuous alignment 538 sa->SetType(CSeq_align::eType_disc); 541 // create seq-align-set 542 CSeq_align_set& sas = sa->SetSegs().SetDisc(); 543 list<CRef<CSeq_align> >& sas_data = sas.Set(); 545 for(size_t i = 0; i < num_exons; ++i) { 547 CRef<CSeq_align> sa (new CSeq_align); 550 sa->SetType(CSeq_align::eType_global); 553 CRef<CScore> score (new CScore); 554 score->SetId().SetStr("splign"); 555 score->SetValue().SetReal(scores[i]); 556 CSeq_align::TScore& scorelist = sa->SetScore(); 557 scorelist.push_back(score); 559 // add percent identity 560 CRef<CScore> idty (new CScore); 561 idty->SetId().SetStr("idty"); 562 idty->SetValue().SetReal(CalcIdentity(transcripts[i])); 563 scorelist.push_back(idty); 565 CDense_seg& ds = sa->SetSegs().SetDenseg(); 567 const size_t* box = &(*(boxes.begin() + i*4)); 568 const TSeqPos query_start = Convert(box[0]); 569 ENa_strand query_strand = box[0] <= box[1]? eNa_strand_plus: 571 const TSeqPos subj_start = Convert(box[2]); 572 ENa_strand subj_strand = box[2] <= box[3]? eNa_strand_plus: 574 ds.FromTranscript(query_start, query_strand, subj_start, subj_strand, 576 // don'tinclude strands when both are
positive 581vector< CRef< CSeq_id > > &ids = ds.SetIds();
585ids.push_back(id_query);
589ids.push_back(id_subj);
591sas_data.push_back(sa);
626 const boolspliced_seg (flag & 0x0001);
627 const boolwith_parts (flag & 0x0002);
669 if(polya > 0 && polya < query_len) {
674 for(
size_t i(0), seg_dim ((*ii).m_Segments.size());
i< seg_dim; ++
i) {
681 TSeqPosqmin, qmax, smin, smax;
700exon->SetProduct_start().SetNucpos(qmin);
701exon->SetProduct_end().SetNucpos(qmax);
703exon->SetGenomic_start(smin);
704exon->SetGenomic_end(smax);
711score->SetId().SetStr(
"splign");
712score->SetValue().SetReal(seg.
m_score);
713scores.push_back(score);
719score->SetId().SetStr(
"idty");
720score->SetValue().SetReal(seg.
m_idty);
721scores.push_back(score);
724 if(
i>0 && !(*ii).m_Segments[
i-1].m_exon) {
725exon->SetPartial(
true);
728 const size_tadim (seg.
m_annot.size());
729 if(
i>0 && (*ii).m_Segments[
i-1].m_exon) {
731 if(adim > 2 && seg.
m_annot[2] ==
'<') {
733acc.push_back(seg.
m_annot[0]);
734acc.push_back(seg.
m_annot[1]);
735exon->SetAcceptor_before_exon().SetBases(acc);
739 if(
i+1<seg_dim && !(*ii).m_Segments[
i+1].m_exon) {
740exon->SetPartial(
true);
743 if(
i+1<seg_dim && (*ii).m_Segments[
i+1].m_exon) {
745 if(adim > 2 && seg.
m_annot[adim - 3] ==
'>') {
747dnr.push_back(seg.
m_annot[adim - 2]);
748dnr.push_back(seg.
m_annot[adim - 1]);
749exon->SetDonor_after_exon().SetBases(dnr);
760 "Alignment details not available");
780exons.push_back(exon);
790uo->
SetType().SetStr(
"origin");
809vector<size_t> boxes;
810vector<string> transcripts;
811vector<float> scores;
813 for(
size_t i(0), seg_dim (ii->m_Segments.size());
i< seg_dim; ++
i) {
818scores.push_back(seg.
m_score);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void remove_if(Container &c, Predicate *__pred)
static string s_RunLengthEncode(const string &in)
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CRef< objects::CSeq_align_set > AsSeqAlignSet(const CSplign::TResults *results=0, int flags=eAF_SplicedSegWithParts) const
Format alignment as a seq-align-set.
CConstRef< objects::CSeq_id > m_QueryId
void SetSeqIds(CConstRef< objects::CSeq_id > id1, CConstRef< objects::CSeq_id > id2)
CRef< objects::CSeq_align > x_Compartment2SeqAlign(const vector< size_t > &boxes, const vector< string > &transcripts, const vector< float > &scores) const
CSplignFormatter(const CSplign::TResults &results)
string AsExonTable(const CSplign::TResults *results=0, int flags=eTF_None) const
CConstRef< objects::CSeq_id > m_SubjId
const CSplign::TResults m_splign_results
string AsAlignmentText(CRef< objects::CScope > scope, const CSplign::TResults *results=0, size_t line_width=80, int segnum=-1) const
Format alignment as plain text.
CSplign is the central library object for computing spliced cDNA-to-genomic alignments.
static CVersionAPI & s_GetVersion(void)
Retrieve the library's version object.
vector< SAlignedCompartment > TResults
void Print(const CCompactSAMApplication::AlignInfo &ai)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
const_iterator begin(void) const
const_iterator end(void) const
bool IsNull(void) const THROWS_NONE
Check if pointer is null â same effect as Empty().
TObjectType * GetNonNullPointer(void) const
Get pointer value and throw a null pointer exception if pointer is null.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
@ fVersionInfo
Print version info.
TData & SetData(void)
Assign a value to Data data member.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CScore > > Tdata
void SetProduct_id(TProduct_id &value)
Assign a value to Product_id data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
TExons & SetExons(void)
Assign a value to Exons data member.
void SetProduct_strand(TProduct_strand value)
Assign a value to Product_strand data member.
void SetProduct_length(TProduct_length value)
Assign a value to Product_length data member.
void SetDim(TDim value)
Assign a value to Dim data member.
list< CRef< CUser_object > > TExt
void SetType(TType value)
Assign a value to Type data member.
TExt & SetExt(void)
Assign a value to Ext data member.
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
list< CRef< CSpliced_exon > > TExons
void SetPoly_a(TPoly_a value)
Assign a value to Poly_a data member.
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
list< CRef< CSpliced_exon_chunk > > TParts
void SetGenomic_strand(TGenomic_strand value)
Assign a value to Genomic_strand data member.
list< CRef< CSeq_align > > Tdata
@ eProduct_type_transcript
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
CRef< CSpliced_exon_chunk > CreateSplicedExonChunk(char cur, size_t count)
void MakeLeftHeader(size_t x, string *ps)
const char g_msg_UnknownTranscriptSymbol[]
const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4