bases[4];
66 for(
intna4 = 0; na4 < 16; na4++) {
69 for(
intbit = 0; bit < 4; bit++) {
71 if( !na4 || (na4 & (1 << bit)) ) {
87 for(
intbit = 0; bit < 4; bit++) {
93 for(
intbase = 0; base < 4; base++) {
94 if(!bases[base] ||
rnd> bases[base]) {
117 intbase4na = *
data;
142m_SeqMap(vec.m_SeqMap),
146m_Strand(vec.m_Strand),
147m_Coding(vec.m_Coding)
155m_SeqMap(&bioseq.GetSeqMap()),
156m_TSE(bioseq.GetTSE_Handle()),
196m_SeqMap(
CSeqMap::GetSeqMapForSeq_loc(loc, &scope)),
202 m_TSE= bh.GetTSE_Handle();
229m_SeqMap(
CSeqMap::CreateSeqMapForBioseq(bioseq)),
246 if( &vec !=
this) {
316src_end =
min(src_end,
size());
317 if( src_pos >= src_end ) {
323 "CSeqVector::GetPackedSeqData: " 324 "cannot get seq-data in range: " 325<<src_pos<<
"-"<<src_end);
329 switch( dst_coding ) {
346 "Can not pack data using the selected coding: "<<
355 size_tsrc_pos,
size_t count)
360dst_str.append(src_str.data()+src_pos,
count);
367 size_tsrc_pos,
size_t count)
372dst_str.append(&src_str[src_pos],
count);
381dst_str.append(
count, gap);
388 const char* src,
size_t count)
395dst += char((dst_c<<4)|*src);
401 for( ;
count>= 2; dst_pos += 2, src += 2,
count-= 2 ) {
402dst += char((src[0]<<4)|src[1]);
412 constvector<char>& src,
TSeqPossrc_pos,
420 if( (src_pos^dst_pos) & 1 ) {
424dst += char((dst_c<<4)|((src[src_pos>>1]>>4)&15));
431 size_tpos = src_pos>>1;
432 for( ;
count>= 2; dst_pos += 2, pos += 1,
count-= 2 ) {
433dst += char(((src[pos]<<4)&0xf0)|((src[pos+1]>>4)&0x0f));
437dst_c = (src[pos])&15;
444dst += char((dst_c<<4)|((src[src_pos>>1])&15));
452 size_toctets =
count>>1;
453 size_tpos = src_pos>>1;
455dst.append(&src[pos], octets);
459dst_c = (src[pos+octets]>>4)&15;
474dst_str += char((dst_c << 4)|gap);
480 size_toctets =
count>>1;
482dst_str.append(octets,
char((gap<<4)|gap));
497 _ASSERT(dst_str.size() == dst_pos>>2);
498 const char* unpacked =
buffer;
501 for( ;
count&& (dst_pos&3); --
count, ++dst_pos ) {
502c = char((c<<2)|*unpacked++);
504 if( (dst_pos&3) == 0 ) {
516 _ASSERT(dst_str.size() == dst_pos>>2);
518 char* packed_end = packed_buffer;
519 for( ;
count>= 4;
count-= 4, unpacked += 4 ) {
520*packed_end++ = char(
521(unpacked[0]<<6)|(unpacked[1]<<4)|(unpacked[2]<<2)|unpacked[3] );
523dst_str.append(packed_buffer, packed_end);
529dst_c = char((unpacked[0]<<2)|unpacked[1]);
532dst_c = char((unpacked[0]<<4)|(unpacked[1]<<2)|unpacked[2]);
543 constvector<char>& src,
TSeqPossrc_pos,
551 if( (src_pos^dst_pos) & 3 ) {
574 charc = char((dst_c<<(add*2))|(src[src_pos>>2]&((1<<(add*2))-1)));
576dst_c = char(c >> (2*(add-
count)));
587 size_toctets =
count>>2;
588 size_tpos = src_pos>>2;
590dst.append(&src[pos], octets);
592 size_trem =
count&3;
595dst_c = char((src[pos+octets]&255)>>(2*(4-rem)));
608 _ASSERT(dst_str.size() == dst_pos>>2);
612fill_n(
buffer, chunk, gap);
618 _ASSERT(dst_str.size() == dst_pos>>2);
627 const char*
table= 0,
boolreverse =
false)
640 switch( src_coding ) {
675 "Invalid data coding: "<<src_coding);
690 const char*
table,
boolreverse)
704 switch( src_coding ) {
739 "Invalid data coding: "<<src_coding);
755 const char*
table,
boolreverse,
770 switch( src_coding ) {
805 "Invalid data coding: "<<src_coding);
815randomizer_pos +=
count;
833dst_str.reserve(src_end-src_pos);
836 while( src_pos < src_end ) {
837 _ASSERT(dst_str.size() == dst_pos);
847 const char*
table= 0;
848 if( dst_coding != src_coding || reverse ||
851reverse, case_conversion);
852 if( !
table&& src_coding != dst_coding ) {
854 "Incompatible sequence codings: "<<
855src_coding<<
" -> "<<dst_coding);
871 switch( src_coding ) {
924dst_str.reserve((src_end-src_pos+1)>>1);
928 while( src_pos < src_end ) {
929 _ASSERT(dst_str.size() == dst_pos>>1);
939 const char*
table= 0;
940 if( dst_coding != src_coding || reverse ||
943reverse, case_conversion);
944 if( !
table&& src_coding != dst_coding ) {
946 "Incompatible sequence codings: "<<
947src_coding<<
" -> "<<dst_coding);
969 data.GetNcbi4na().Get(), dataPos,
count);
975 _ASSERT(dst_str.size() == dst_pos>>1);
978dst_str += char(dst_c<<4);
995dst_str.reserve((src_end-src_pos+3)>>2);
999 while( src_pos < src_end ) {
1000 _ASSERT(dst_str.size() == dst_pos>>2);
1005 "Cannot fill NCBI2na gap without randomizer");
1024 const char*
table= 0;
1025 if( dst_coding != src_coding || reverse ||
1028reverse, case_conversion);
1029 if( !
table&& src_coding != dst_coding ) {
1031 "Incompatible sequence codings: "<<
1032src_coding<<
" -> "<<dst_coding);
1052randomizer, src_pos);
1059 data.GetNcbi2na().Get(), dataPos,
count);
1065 _ASSERT(dst_str.size() == dst_pos>>2);
1104 "Can not indicate gap using the selected coding: "<<
1117 typedefpair<TCoding, TCoding> TMainConversion;
1118 typedefpair<bool, ECaseConversion> TConversionFlags;
1119 typedefpair<TMainConversion, TConversionFlags> TConversionKey;
1120 typedefvector<char> TConversionTable;
1124TConversionKey
key;
1125 key.first = TMainConversion(src, dst);
1126 key.second = TConversionFlags(reverse, case_cvt);
1127TTables::iterator it = tables->find(
key);
1128 if( it != tables->end() ) {
1130 switch(it->second.size()) {
1133 default:
return&it->second[0];
1136TConversionTable&
table= (*tables)[
key];
1147 if( srcIndex.second >= COUNT ) {
1157 catch( exception&
) {
1171pair<unsigned, unsigned> dstIndex =
1173 if( dstIndex.second >= COUNT ) {
1182 catch( exception&
) {
1192 table.resize(COUNT,
char(kInvalidCode));
1193 booldifferent =
false;
1194 for(
unsigned i= srcIndex.first;
i<= srcIndex.second; ++
i) {
1215 catch( exception&
) {
1228 '\x00',
'\x01',
'\x02',
'\x03',
'\x04',
'\x05',
'\x06',
'\x07',
1229 '\x08',
'\x09',
'\x0a',
'\x0b',
'\x0c',
'\x0d',
'\x0e',
'\x0f',
1230 '\x10',
'\x11',
'\x12',
'\x13',
'\x14',
'\x15',
'\x16',
'\x17',
1231 '\x18',
'\x19',
'\x1a',
'\x1b',
'\x1c',
'\x1d',
'\x1e',
'\x1f',
1232 '\x20',
'\x21',
'\x22',
'\x23',
'\x24',
'\x25',
'\x26',
'\x27',
1233 '\x28',
'\x29',
'\x2a',
'\x2b',
'\x2c',
'\x2d',
'\x2e',
'\x2f',
1234 '\x30',
'\x31',
'\x32',
'\x33',
'\x34',
'\x35',
'\x36',
'\x37',
1235 '\x38',
'\x39',
'\x3a',
'\x3b',
'\x3c',
'\x3d',
'\x3e',
'\x3f',
1236 '\x40',
'\x41',
'\x42',
'\x43',
'\x44',
'\x45',
'\x46',
'\x47',
1237 '\x48',
'\x49',
'\x4a',
'\x4b',
'\x4c',
'\x4d',
'\x4e',
'\x4f',
1238 '\x50',
'\x51',
'\x52',
'\x53',
'\x54',
'\x55',
'\x56',
'\x57',
1239 '\x58',
'\x59',
'\x5a',
'\x5b',
'\x5c',
'\x5d',
'\x5e',
'\x5f',
1240 '\x60',
'\x61',
'\x62',
'\x63',
'\x64',
'\x65',
'\x66',
'\x67',
1241 '\x68',
'\x69',
'\x6a',
'\x6b',
'\x6c',
'\x6d',
'\x6e',
'\x6f',
1242 '\x70',
'\x71',
'\x72',
'\x73',
'\x74',
'\x75',
'\x76',
'\x77',
1243 '\x78',
'\x79',
'\x7a',
'\x7b',
'\x7c',
'\x7d',
'\x7e',
'\x7f',
1244 '\x80',
'\x81',
'\x82',
'\x83',
'\x84',
'\x85',
'\x86',
'\x87',
1245 '\x88',
'\x89',
'\x8a',
'\x8b',
'\x8c',
'\x8d',
'\x8e',
'\x8f',
1246 '\x90',
'\x91',
'\x92',
'\x93',
'\x94',
'\x95',
'\x96',
'\x97',
1247 '\x98',
'\x99',
'\x9a',
'\x9b',
'\x9c',
'\x9d',
'\x9e',
'\x9f',
1248 '\xa0',
'\xa1',
'\xa2',
'\xa3',
'\xa4',
'\xa5',
'\xa6',
'\xa7',
1249 '\xa8',
'\xa9',
'\xaa',
'\xab',
'\xac',
'\xad',
'\xae',
'\xaf',
1250 '\xb0',
'\xb1',
'\xb2',
'\xb3',
'\xb4',
'\xb5',
'\xb6',
'\xb7',
1251 '\xb8',
'\xb9',
'\xba',
'\xbb',
'\xbc',
'\xbd',
'\xbe',
'\xbf',
1252 '\xc0',
'\xc1',
'\xc2',
'\xc3',
'\xc4',
'\xc5',
'\xc6',
'\xc7',
1253 '\xc8',
'\xc9',
'\xca',
'\xcb',
'\xcc',
'\xcd',
'\xce',
'\xcf',
1254 '\xd0',
'\xd1',
'\xd2',
'\xd3',
'\xd4',
'\xd5',
'\xd6',
'\xd7',
1255 '\xd8',
'\xd9',
'\xda',
'\xdb',
'\xdc',
'\xdd',
'\xde',
'\xdf',
1256 '\xe0',
'\xe1',
'\xe2',
'\xe3',
'\xe4',
'\xe5',
'\xe6',
'\xe7',
1257 '\xe8',
'\xe9',
'\xea',
'\xeb',
'\xec',
'\xed',
'\xee',
'\xef',
1258 '\xf0',
'\xf1',
'\xf2',
'\xf3',
'\xf4',
'\xf5',
'\xf6',
'\xf7',
1259 '\xf8',
'\xf9',
'\xfa',
'\xfb',
'\xfc',
'\xfd',
'\xfe',
'\xff'static CRef< CScope > m_Scope
CScope * GetScopeOrNull(void) const
SeqVector related exceptions.
static TPair GetCodeIndexFromTo(CSeq_data::E_Choice code_type)
static bool IsCodeAvailable(CSeq_data::E_Choice code_type)
static TIndex GetIndexComplement(CSeq_data::E_Choice code_type, TIndex idx)
static TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
unsigned int TSeqPos
Type for sequence locations and lengths.
int TSignedSeqPos
Type for signed sequence position.
element_type * get(void) const
Get pointer.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TSeqPos GetBioseqLength(void) const
EVectorCoding
CSeqVector constructor flags.
TMol GetSequenceType(void) const
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
virtual void RandomizeData(char *buffer, size_t count, TSeqPos pos)=0
Convert count unpacked bases in buffer 4na -> 2na with randomization.
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
const CSeq_data & GetRefData(void) const
will allow any data segments, user should check for position and strand
static const char sm_TrivialTable[256]
SSeqMapSelector & SetLinkUsedTSE(bool link=true)
TSeqPos GetRefPosition(void) const
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer)
Fill the buffer string with the sequence data for the interval [start, stop).
TSeqPos GetGapSizeForward(void) const
returns number of gap symbols ahead including current symbol returns 0 if current position is not in ...
CConstRef< CSeq_literal > GetGapSeq_literal(void) const
returns gap Seq-data object ref returns null if it's not a gap or an unspecified gap
bool GetRefMinusStrand(void) const
CSeqMap::ESegmentType GetType(void) const
static const char * sx_GetConvertTable(TCoding src, TCoding dst, bool reverse, ECaseConversion case_cvt)
bool CanGetRange(TSeqPos start, TSeqPos stop)
Check if the sequence can be obtained for the interval [start, stop)
TSeqPos GetRefEndPosition(void) const
SSeqMapSelector & SetStrand(ENa_strand strand)
Set strand to iterate over.
static TResidue sx_GetGapChar(TCoding coding, ECaseConversion case_cvt)
TSeqPos GetPosition(void) const
return position of current segment in sequence
AutoPtr< CSeqVector_CI > m_Iterator
TCoding GetCoding(void) const
Target sequence coding.
~CNcbi2naRandomizer(void)
friend class CSeqVector_CI
bool CanGetRange(TSeqPos start, TSeqPos stop) const
Check if the sequence data is available for the interval [start, stop).
CSeqVector & operator=(const CSeqVector &vec)
CConstRef< CSeqMap > m_SeqMap
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
CSeqVector_CI & x_GetIterator(TSeqPos pos) const
void SetNoAmbiguities(void)
void x_InitRandomizer(CRandom &random_gen)
CRef< INcbi2naRandomizer > m_Randomizer
virtual ~CSeqVector(void)
CConstRef< CSeq_literal > GetGapSeq_literal(TSeqPos pos) const
returns gap Seq-literal object ref returns null if it's not a gap or an unspecified gap
void x_ResetIterator(void) const
CSeqVector_CI * x_CreateIterator(TSeqPos pos) const
TMutex & GetMutex(void) const
Get mutex for a few non-MT-safe methods to make them MT-safe at a cost of performance.
CNcbi2naRandomizer(CRandom &gen)
bool IsProtein(void) const
void SetCoding(TCoding coding)
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
char m_RandomTable[16][kRandomDataSize]
TSeqPos GetLength(CScope *scope) const
void x_GetPacked8SeqData(string &dst_str, TSeqPos src_pos, TSeqPos src_end)
void SetRandomizeAmbiguities(void)
Randomization of ambiguities and gaps in ncbi2na coding.
TSeqPos GetGapSizeForward(TSeqPos pos) const
returns number of gap symbols ahead including base at position 'pos' returns 0 if the position is not...
void x_GetPacked2naSeqData(string &dst_str, TSeqPos src_pos, TSeqPos src_end)
void SetNcbiCoding(void)
Set coding to either Ncbi8aa or Ncbi8na depending on molecule type.
void SetStrand(ENa_strand strand)
void GetPackedSeqData(string &buffer, TSeqPos start=0, TSeqPos stop=kInvalidSeqPos)
void RandomizeData(char *buffer, size_t count, TSeqPos pos)
Convert count unpacked bases in buffer 4na -> 2na with randomization.
void x_GetPacked4naSeqData(string &dst_str, TSeqPos src_pos, TSeqPos src_end)
TResidue GetGapChar(ECaseConversion case_cvt=eCaseConversion_none) const
Return gap symbol corresponding to the selected coding.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
uint32_t Uint4
4-byte (32-bit) unsigned integer
Uint4 TValue
Type of the generated integer value and/or the seed value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
void set_bit(unsigned *dest, unsigned bitpos) noexcept
Set 1 bit in a block.
ENa_strand
strand of nucleic acid
const TInst & GetInst(void) const
Get the Inst member data.
E_Choice
Choice variants.
TMol GetMol(void) const
Get the Mol member data.
@ e_not_set
No variant selected.
@ e_Ncbipna
nucleic acid probabilities
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Ncbi2na
2 bit nucleic acid code
@ e_Iupacna
IUPAC 1 letter nuc acid code.
@ e_Ncbipaa
amino acid probabilities
@ e_Ncbi8na
8 bit extended nucleic acid code
@ e_Ncbi4na
4 bit nucleic acid code
@ e_Iupacaa
IUPAC 1 letter amino acid code.
@ e_Ncbi8aa
8 bit extended amino acid codes
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
const struct ncbi::grid::netcache::search::fields::KEY key
static size_t rnd(size_t minimal, size_t maximal)
Multi-threading â mutexes; rw-locks; semaphore.
static unsigned long int total_count
static void x_AppendGapTo4(string &dst_str, char &dst_c, TSeqPos dst_pos, TSeqPos count, char gap)
static void x_AppendAnyTo4(string &dst_str, char &dst_c, TSeqPos dst_pos, const CSeq_data &data, TSeqPos dataPos, TSeqPos total_count, const char *table, bool reverse)
static void x_AppendAnyTo8(string &dst_str, const CSeq_data &data, TSeqPos dataPos, TSeqPos total_count, const char *table=0, bool reverse=false)
static void x_Append2To2(string &dst, char &dst_c, TSeqPos dst_pos, const vector< char > &src, TSeqPos src_pos, TSeqPos count)
static void x_Append8To2(string &dst_str, char &dst_c, TSeqPos dst_pos, const char *buffer, TSeqPos count)
static const size_t kBufferSize
static void x_AppendAnyTo2(string &dst_str, char &dst_c, TSeqPos dst_pos, const CSeq_data &data, TSeqPos dataPos, TSeqPos total_count, const char *table, bool reverse, INcbi2naRandomizer *randomizer, TSeqPos randomizer_pos)
static void x_AppendGapTo8(string &dst_str, size_t count, char gap)
static void x_Append8To8(string &dst_str, const string &src_str, size_t src_pos, size_t count)
static void x_Append4To4(string &dst, char &dst_c, TSeqPos dst_pos, const vector< char > &src, TSeqPos src_pos, TSeqPos count)
static void x_AppendRandomTo2(string &dst_str, char &dst_c, TSeqPos dst_pos, TSeqPos src_pos, TSeqPos count, INcbi2naRandomizer &randomizer, char gap)
static void x_Append8To4(string &dst, char &dst_c, TSeqPos dst_pos, const char *src, size_t count)
DEFINE_STATIC_FAST_MUTEX(s_ConvertTableMutex2)
void copy_8bit_any(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos, const char *table, bool reverse)
void copy_4bit_any(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos, const char *table, bool reverse)
void copy_2bit_any(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos, const char *table, bool reverse)
void copy_2bit(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos)
Selector used in CSeqMap methods returning iterators.
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4