m_set_GapChar(
false),
63m_set_EndChar(
false),
73m_set_GapChar(
false),
74m_set_EndChar(
false),
98 stringerrstr =
string(
"CAlnVec::GetBioseqHandle(): ")
99+
"Seq-id cannot be resolved: " 113seq_vec = iter->second;
160 for(
int i=0;
i<chunk_vec->size();
i++) {
163 if(chunk->GetType() &
fSeq) {
166seq_vec.
GetSeqData(chunk->GetRange().GetFrom(),
167chunk->GetRange().GetTo() + 1,
170seq_vec.
GetSeqData(seq_vec_size - chunk->GetRange().GetTo() - 1,
171seq_vec_size - chunk->GetRange().GetFrom(),
180 const int n= chunk->GetAlnRange().GetLength();
181 char* ch_buff =
new char[
n+1];
189memset(ch_buff, fill_ch,
n);
204 unsigned intscrn_width,
217scrn_lft_seq_pos = -1,
218scrn_rgt_seq_pos = -1,
222 intpos, nscrns,
delta;
232 const boolrecord_inserts = insert_starts && insert_lens;
233 const boolrecord_coords = scrn_width && scrn_lefts && scrn_rights;
244 for(seg = 0, pos =
row, aln_pos = 0, anchor_pos =
m_Anchor;
250 len= seg_len * width;
252 if(anchored &&
m_Starts[anchor_pos] < 0) {
255 if(record_inserts) {
258prev_start -
len)) {
261insert_lens->pop_back();
262insert_lens->push_back(ttl_len);
264insert_starts->pop_back();
265insert_starts->push_back(start);
268prev_aln_pos = aln_pos / width;
270insert_starts->push_back(start);
271insert_aln_starts->push_back(prev_aln_pos);
272insert_lens->push_back(
len);
280stop = start +
len- 1;
286 if(buf_len < seg_len) {
288buf_len = seg_len - buf_len;
291 if(seg < left_seg || seg > right_seg) {
297 for(
size_t i= 0;
i< buf_len; ++
i) {
304 if(scrn_lft_seq_pos < 0) {
305scrn_lft_seq_pos =
plus? start : stop;
306 if(scrn_rgt_seq_pos < 0) {
307scrn_rgt_seq_pos = scrn_lft_seq_pos;
311nscrns = (aln_pos - scrn_pos) / scrn_width;
312 for(
int i= 0;
i< nscrns;
i++) {
313scrn_lefts->push_back(scrn_lft_seq_pos);
314scrn_rights->push_back(scrn_rgt_seq_pos);
316scrn_lft_seq_pos =
plus? start : stop;
318scrn_pos += scrn_width;
321scrn_lft_seq_pos =
plus? start : stop;
324nscrns = (aln_pos +
len- scrn_pos) / scrn_width;
326 for(
int i= 0;
i< nscrns;
i++) {
328scrn_width - (curr_pos - scrn_pos) :
329curr_pos - scrn_pos - scrn_width);
331scrn_lefts->push_back(scrn_lft_seq_pos);
333scrn_lft_seq_pos < start :
334scrn_lft_seq_pos > stop) {
335scrn_lft_seq_pos = (
plus? start : stop) +
337scrn_rgt_seq_pos = scrn_lft_seq_pos +
340scrn_rgt_seq_pos = scrn_lft_seq_pos + (
plus? -1 : 1)
342scrn_lft_seq_pos +=
delta;
344 if(seg == left_seg &&
345scrn_lft_seq_pos == scrn_rgt_seq_pos) {
352scrn_rights->push_back(scrn_rgt_seq_pos);
353curr_pos = scrn_pos += scrn_width;
355 if(aln_pos +
len<= scrn_pos) {
356scrn_lft_seq_pos = -1;
358scrn_rgt_seq_pos =
plus? stop : start;
365 if(seg < left_seg || seg > right_seg) {
371 for(
size_t i= 0;
i< seg_len; ++
i) {
383 TSeqPospos_diff = aln_pos - scrn_pos;
385nscrns = pos_diff / scrn_width;
386 if(pos_diff % scrn_width) {
389 for(
int i= 0;
i< nscrns;
i++) {
390scrn_lefts->push_back(scrn_lft_seq_pos);
391scrn_rights->push_back(scrn_rgt_seq_pos);
393scrn_lft_seq_pos = scrn_rgt_seq_pos;
395scrn_pos += scrn_width;
419vector<string>* consens)
const 421consensus_seq.
Reset();
433 if(consens ==
NULL) {
457 for(
i= 0;
i< consens->size(); ++
i) {
459 for(j = 0; j < (size_t)
m_NumRows; ++j) {
472 if((*consens)[
i].length() != 0) {
473new_ds->
SetStarts().push_back(total_bases);
483 data+= (*consens)[
i];
487 for(
i= 0;
i<
m_Ids.size(); ++
i) {
496 id->Assign(consensus_id);
497consensus_seq.
SetId().push_back(
id);
499new_ds->
SetIds().push_back(
id);
504desc.
Set().push_back(d);
505d->
SetComment(
"This is a generated consensus sequence");
523consensus_row =
int(new_ds->
GetIds().size()) - 1;
531 size_trows = segs.size();
533 for(
size_t row= 0;
row< rows; ++
row) {
534 const string& s = segs[
row];
541 buf=
new char[(rows+1)*(cols+1)];
543 const char* src = s.c_str();
544 char* dst =
buf+(
row-gap_rows);
545 while((*dst = *src++)) {
550 for(
size_tcol = 0; col < cols; ++col) {
551 char* col_buf =
buf+ col*(rows+1);
552*(col_buf+(rows-gap_rows)) = 0;
553segs.push_back(
string(col_buf));
562fill_n(base_count, numBases, 0);
564 const char*
i= col.c_str();
566 while((c = *
i++)) {
640fill_n(base_count, numBases, 0);
642 const char*
i= col.c_str();
644 while((c = *
i++)) {
646 if(0<=pos && pos < numBases)
655 const intnumBases = isNucleotide ? 4 : 26;
663 for(
size_tj = 0; j < (size_t)
m_NumSegs; ++j) {
674 if( gap_count > gap_seg_thresh )
684consens[j].resize(
m_Lens[j]);
696 for(
size_t i= 0;
i<
m_Lens[j]; ++
i) {
711 for(
intk = 0; k < numBases; ++k) {
721 if(rev_map.count(rev_map.begin()->first) == 1 &&
722rev_map.begin()->first >= base_thresh) {
723consens[j][
i] = isNucleotide ?
724 ToIupac(rev_map.begin()->second) :
725(rev_map.begin()->second+
'A');
730 unsigned charc = 0x00;
732TRevMap::iterator curr = rev_map.begin();
733TRevMap::iterator
prev= rev_map.begin();
735curr != rev_map.end() &&
736(freq < base_thresh ||
prev->first == curr->first);
743 unsigned charcur_char = curr->second+
'A';
748 case 'N':
case 'D':
749c = (cur_char ==
'N'|| cur_char ==
'D') ?
'B':
'X';
751 case 'Q':
case 'E':
752c = (cur_char ==
'Q'|| cur_char ==
'E') ?
'Z':
'X';
754 case 'I':
case 'L':
755c = (cur_char ==
'I'|| cur_char ==
'L') ?
'J':
'X';
767consens[j][
i] = isNucleotide ?
'N':
'X';
769consens[j][
i] = isNucleotide ?
ToIupac(c) : c;
779 size_tsegment_row_index = segment*
m_NumRows;
780 for(
size_t i= 0;
i< (size_t)
m_NumRows; ++
i, ++segment_row_index) {
785 string& s = segs[
i];
802 const CSeq_id& consensus_id)
const 806*bioseq, consensus_id);
827 bools1_is_prot,
bools2_is_prot,
828 intgen_code1,
intgen_code2)
831 if(s1_is_prot == s2_is_prot && s1.length() != s2.length()) {
833 "CAlnVec::CalculateScore(): " 834 "Strings should have equal lenghts.");
835}
else if(s1.length() * (s1_is_prot ? 1 : 3) !=
836s2.length() * (s2_is_prot ? 1 : 3)) {
838 "CAlnVec::CalculateScore(): " 839 "Strings lengths do not match.");
844 const unsigned char* res1 = (
unsigned char*) s1.c_str();
845 const unsigned char* res2 = (
unsigned char*) s2.c_str();
846 const unsigned char* end1 = res1 + s1.length();
847 const unsigned char* end2 = res2 + s2.length();
849 static bools_FullScoreMatrixInitialized =
false;
850 if(s1_is_prot && s2_is_prot) {
851 if( !s_FullScoreMatrixInitialized ) {
852s_FullScoreMatrixInitialized =
true;
857 for( ; res1 != end1; res1++, res2++) {
862}
else if( !s1_is_prot && !s2_is_prot ) {
864 for( ; res1 != end1; res1++, res2++) {
865 if(*res1 == *res2) {
875 for( ; res1 != end1; res1++, res2++) {
882 for( ; res2 != end2; res1++, res2++) {
899 "CAlnVec::TranslateNAToAA(): " 900 "NA size expected to be divisible by 3");
905 size_tna_size = na.size();
908aa.resize(na_size / 3);
913 for(
size_tna_i = 0; na_i < na_size; ) {
914 for(
size_t i= 0;
i< 3;
i++) {
929 TNumrowindex1 = row1, index2 = row2;
951 if(start1 >=0 && start2 >= 0) {
986 boolgaps_in_count)
const 1012 stringna_buff, aa_buff;
1025 if(residue_count) {
1042 if(gaps_in_count && residue_count) {
1057residue_cnt.resize(16, 0);
1061 int max= 0, total = 0;
1063 if(*i_res >
max) {
1069 return100 *
max/ total;
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
static SNCBIFullScoreMatrix s_FullScoreMatrix
bool IsSetAnchor(void) const
list< TSeqPos > TSeqPosList
int GetWidth(TNumrow row) const
const TNumseg & x_GetSeqLeftSeg(TNumrow row) const
const CDense_seg::TStarts & m_Starts
TSegTypeFlags GetSegType(TNumrow row, TNumseg seg, int offset=0) const
TSignedSeqPos GetStart(TNumrow row, TNumseg seg, int offset=0) const
const CSeq_id & GetSeqId(TNumrow row) const
bool IsPositiveStrand(TNumrow row) const
const CDense_seg::TIds & m_Ids
TNumseg GetSeg(TSeqPos aln_pos) const
TDim GetNumRows(void) const
const CDense_seg::TStrands & m_Strands
CConstRef< CDense_seg > m_DS
const TNumseg & x_GetSeqRightSeg(TNumrow row) const
CRef< CAlnChunkVec > GetAlnChunks(TNumrow row, const TSignedRange &range, TGetChunkFlags flags=fAlnSegsOnly) const
unsigned int TSegTypeFlags
TSeqPos GetAlnStart(void) const
TSeqPos GetLen(TNumseg seg, int offset=0) const
CDense_seg::TNumseg TNumseg
TSeqPos GetAlnStop(void) const
const CDense_seg::TLens & m_Lens
static void CollectNucleotideFrequences(const string &col, int base_count[], int numBases)
CAlnVec(const CDense_seg &ds, CScope &scope)
const CBioseq_Handle & GetBioseqHandle(TNumrow row) const
TResidue GetGapChar(TNumrow row) const
string & GetSeqString(string &buffer, TNumrow row, TSeqPos seq_from, TSeqPos seq_to) const
TResidue GetEndChar() const
static void TranslateNAToAA(const string &na, string &aa, int gen_code=kDefaultGenCode)
int GetGenCode(TNumrow row) const
string & GetColumnVector(string &buffer, TSeqPos aln_pos, TResidueCount *residue_count=0, bool gaps_in_count=false) const
string & GetWholeAlnSeqString(TNumrow row, string &buffer, TSeqPosList *insert_aln_starts=0, TSeqPosList *insert_starts=0, TSeqPosList *insert_lens=0, unsigned int scrn_width=0, TSeqPosList *scrn_lefts=0, TSeqPosList *scrn_rights=0) const
CSeqVector & x_GetSeqVector(TNumrow row) const
CScope & GetScope(void) const
static void CollectProteinFrequences(const string &col, int base_count[], int numBases)
static unsigned char ToIupac(unsigned char c)
CRef< CDense_seg > CreateConsensus(int &consensus_row) const
string & GetAlnSeqString(string &buffer, TNumrow row, const CAlnMap::TSignedRange &aln_rng) const
int CalculateScore(TNumrow row1, TNumrow row2) const
void RetrieveSegmentSequences(size_t segment, vector< string > &segs) const
TBioseqHandleCache m_BioseqHandlesCache
vector< int > TResidueCount
static unsigned char FromIupac(unsigned char c)
TSeqVectorCache m_SeqVectorCache
int CalculatePercentIdentity(TSeqPos aln_pos) const
static void TransposeSequences(vector< string > &segs)
static const CTrans_table & GetTransTable(int id)
@Seq_descr.hpp User-defined methods of the data storage class.
char GetCodonResidue(int state) const
static int NextCodonState(int state, unsigned char ch)
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * column
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string AsFastaString(void) const
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
bool IsNucleotide(void) const
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eStrand_Plus
Plus strand.
@ eStrand_Minus
Minus strand.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
bool IsProtein(void) const
void SetCoding(TCoding coding)
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
bool IsNucleotide(void) const
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
TLens & SetLens(void)
Assign a value to Lens data member.
void SetDim(TDim value)
Assign a value to Dim data member.
TStarts & SetStarts(void)
Assign a value to Starts data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
const TIds & GetIds(void) const
Get the Ids member data.
TIds & SetIds(void)
Assign a value to Ids data member.
TSeq & SetSeq(void)
Select the variant.
TId & SetId(void)
Assign a value to Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
TIupacna & SetIupacna(void)
Select the variant.
TMol GetMol(void) const
Get the Mol member data.
TComment & SetComment(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
virtual void Reset(void)
Reset the whole object.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
void SetRepr(TRepr value)
Assign a value to Repr data member.
Tdata & Set(void)
Assign a value to data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
TIupacaa & SetIupacaa(void)
Select the variant.
void SetMol(TMol value)
Assign a value to Mol data member.
@ eRepr_raw
continuous sequence
@ e_not_set
No variant selected.
@ eMol_na
just a nucleic acid
unsigned int
A callback function used to compare two keys in a database.
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::SIZE size
Int4 delta(size_t dimension_, const Int4 *score_)
const SNCBIPackedScoreMatrix NCBISM_Blosum62
#define NCBI_FSM_DIM
Recommended approach: unpack and index directly.
void NCBISM_Unpack(const SNCBIPackedScoreMatrix *psm, SNCBIFullScoreMatrix *fsm)
Expand a packed score matrix into an unpacked one, which callers can proceed to index directly by sta...
#define row(bind, expected)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4