0, 0, 0, 0, 0, 0, 0, 0,
540, 0, 0, 0, 0, 0, 0, 0,
580, 0, 0, 0, 0, 0, 0, 0,
610, 0, 0, 0, 0, 0, 0, 0,
630, 0, 0, 0, 0, 0, 0, 0,
640, 0, 0, 0, 0, 0, 0, 0,
660, 0, 0, 0, 0, 0, 0, 0,
670, 0, 0, 0, 0, 0, 0, 0,
690, 0, 0, 0, 0, 0, 0, 0,
700, 0, 0, 0, 0, 0, 0, 0,
720, 0, 0, 0, 0, 0, 0, 0,
730, 0, 0, 0, 0, 0, 0, 0,
750, 0, 0, 0, 0, 0, 0, 0,
760, 0, 0, 0, 0, 0, 0, 0,
780, 0, 0, 0, 0, 0, 0, 0,
790, 0, 0, 0, 0, 0, 0, 0,
810, 0, 0, 0, 0, 0, 0, 0,
820, 0, 0, 0, 0, 0, 0, 0,
840, 0, 0, 0, 0, 0, 0, 0,
850, 0, 0, 0, 0, 0, 0, 0,
870, 0, 0, 0, 0, 0, 0, 0,
880, 0, 0, 0, 0, 0, 0, 0,
900, 0, 0, 0, 0, 0, 0, 0,
910, 0, 0, 0, 0, 0, 0, 0,
960, 0, 0, 0, 0, 0, 0, 0,
138: m_initialized(
false), m_pool(0)
164 if(!ifs.is_open()) {
165 ERR_POST_X(1,
"UnicodeToAscii table not found: "<< name);
168 LOG_POST_X(2,
Info<<
"Loading UnicodeToAscii table at: "<< name);
172 size_tpoolsize = filelen/2;
175 ERR_POST_X(3,
"UnicodeToAscii table failed to load: not enough memory");
187 if(
x_ParseLine(line, symbol, translation) > 1) {
189 if(poolpos + translation.size() + 1 > poolsize) {
190 m_pool= (
char*)realloc(
m_pool, poolsize += filelen/4);
192 ERR_POST_X(3,
"UnicodeToAscii table failed to load: not enough memory");
197symbolToOffset[symbol] = poolpos;
198memcpy(
m_pool+poolpos, translation.data(), translation.size());
199poolpos += translation.size();
200*(
m_pool+poolpos) =
'\0';
205 ERR_POST_X(1,
"UnicodeToAscii table is empty: "<< name);
216sym != symend; ++sym) {
227 string& line,
TUnicode& symbol,
string& translation)
234string::size_type begin = line.find_first_not_of(
" \t", 0);
235 if(begin == string::npos) {
238string::size_type end = line.find_first_of(
" \t,#",begin);
242 if(end == string::npos) {
250 if( end == line.size() || line[end] ==
'#') {
254end = line.find(
',',end);
255 if(end == string::npos) {
258begin = line.find_first_not_of(
" \t", ++end);
259 if(begin == string::npos) {
262 if(*(line.data()+begin) !=
'\"') {
265 const char*
data= line.data()+begin;
266 const char* dataend = line.data()+line.size();
274 if(
data< dataend) {
278 case 'a': c = 0x7;
break;
279 case 'b': c = 0x8;
break;
280 case 't': c = 0x9;
break;
281 case 'n': c = 0xA;
break;
282 case 'v': c = 0xB;
break;
283 case 'f': c = 0xC;
break;
284 case 'r': c = 0xD;
break;
285 case '0': c = 0x0;
break;
287 if(
data+ 1 < dataend) {
288begin =
data+ 1 - line.data();
289end = line.find_first_not_of(
"0123456789abcdefABCDEF", begin);
290 if(end == string::npos) {
294 data= line.data() + end;
299 if(
data== dataend) {
303translation.append(1,c);
317 return&(
i->second);
329 if(
t.IsInitialized()) {
330 return t.GetTranslation(character);
335 if((character & (~0xFFFF)) == 0) {
336 unsigned intthePlanNo = (character & 0xFF00) >> 8;
337 unsigned inttheOffset = character & 0xFF;
340translation = &((*thePlan)[theOffset]);
344 if(!default_translation) {
349 "UnicodeToAscii: unknown Unicode symbol");
351translation = default_translation;
359 const char*p = theUTF;
362 if( ((*theUTF) & 0xC0) != 0xC0 ) {
364 RC|= (
unsignedchar)theUTF[0];
370 while((counter =
Int1(counter << 1)) < 0) {
371 unsigned charc = *p++;
372 if((c & ~077) != 0200) {
375acc = (acc << 6) | (c & 077);
384 const char*p = theUTF;
387 if( (
unsigned char)theUTF[0] < 0x80 ) {
389*theUnicode = *theUTF;
393 if( ((*theUTF) & 0xC0) != 0xC0 || ((*theUTF) & 0xFE) == 0xC0) {
399 if( ((*theUTF) & 0xF8) == 0xF0 ) {
403 while((counter =
Int1(counter << 1)) < 0) {
404 unsigned charc = *p++;
405 if((c & ~077) != 0200) {
408acc = (acc << 6) | (c & 077);
412 return(
size_t)(p - theUTF);
419 size_ttheLength =
UnicodeToUTF8( theUnicode, theBuffer, 10 );
420 return string( theBuffer, theLength );
425 size_ttheBufLength )
429 if(theUnicode < 0x80) {
431 if( Length > theBufLength )
return0;
432theBuffer[0] = char(theUnicode);
434 else if(theUnicode < 0x800) {
436 if( Length > theBufLength )
return0;
437theBuffer[0] = char( 0xC0 | (theUnicode>>6));
438theBuffer[1] = char( 0x80 | (theUnicode & 0x3F));
440 else if(theUnicode < 0x10000) {
442 if( Length > theBufLength )
return0;
443theBuffer[0] = char( 0xE0 | (theUnicode>>12));
444theBuffer[1] = char( 0x80 | ((theUnicode>>6) & 0x3F));
445theBuffer[2] = char( 0x80 | (theUnicode & 0x3F));
447 else if(theUnicode < 0x200000) {
449 if( Length > theBufLength )
return0;
450theBuffer[0] = char( 0xF0 | (theUnicode>>18));
451theBuffer[1] = char( 0x80 | ((theUnicode>>12) & 0x3F));
452theBuffer[2] = char( 0x80 | ((theUnicode>>6) & 0x3F));
453theBuffer[3] = char( 0x80 | (theUnicode & 0x3F));
466 if( !src || !dst || dstLen == 0 )
return0;
469 size_tsrcLen = strlen( src );
471 for( srcPos = 0; srcPos < srcLen; ) {
473 char* pDst = &(dst[dstPos]);
474 const char* pSrc = &(src[srcPos]);
490 if(
result&& pSubst == default_translation) {
501!(pSubst->
Subst) ) {
508memcpy( pDst, pSrc, utfLen );
514 size_tsubstLen = strlen( pSubst->
Subst);
515 if( (dstPos + substLen) > dstLen ) {
520memcpy( pDst, pSubst->
Subst, substLen );
537 size_tsrcLen = strlen( src );
539 for( srcPos = 0; srcPos < srcLen; ) {
541 const char* pSrc = &(src[srcPos]);
557 if(
result&& pSubst == default_translation) {
569!(pSubst->
Subst) ) {
577dst +=
string( pSrc, utfLen );
583dst += pSubst->
Subst;
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
CUnicodeToAsciiTranslation(void)
void x_Initialize(const string &name)
bool IsInitialized(void) const
static int x_ParseLine(string &line, TUnicode &symbol, string &translation)
virtual ~CUnicodeToAsciiTranslation(void)
map< TUnicode, SUnicodeTranslation > m_SymbolToTranslation
const SUnicodeTranslation * GetTranslation(TUnicode symbol) const
container_type::const_iterator const_iterator
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
#define LOG_POST_X(err_subcode, message)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Info(CExceptionArgs_Base &args)
Int8 GetLength(void) const
Get size of file.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
int8_t Int1
1-byte (8-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
ESubstType Type
Type of the substitutor.
string UTF8ToAsciiString(const char *src, const SUnicodeTranslation *default_translation, const TUnicodeTable *table, EConversionResult *result)
Convert UTF8 into ASCII string.
SUnicodeTranslation TUnicodePlan[256]
string UnicodeToUTF8(TUnicode theUnicode)
Convert Unicode character into UTF8.
const SUnicodeTranslation * UnicodeToAscii(TUnicode character, const TUnicodeTable *table, const SUnicodeTranslation *default_translation)
Convert Unicode character into ASCII string.
const char * Subst
Substitutor for unicode.
TUnicodePlan * TUnicodeTable[256]
ssize_t UTF8ToAscii(const char *src, char *dst, size_t dstLen, const SUnicodeTranslation *default_translation, const TUnicodeTable *table, EConversionResult *result)
Convert UTF8 into ASCII character buffer.
@ eSkip
Unicode to be skipped in translation. Usually it is combined mark.
@ eException
Throw exception (CUtilException, with type eWrongData)
@ eAsIs
Unicodes which should go into the text as is.
@ eDefaultTranslationUsed
Definition of all error codes used in util (xutil.lib).
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
Static variables safety - create on demand, destroy on application termination.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
std::istream & in(std::istream &in_, double &x_)
Structure to keep substititutions for the particular unicode character.
CSafeStatic< CUnicodeToAsciiTranslation > g_UnicodeTranslation
NCBI_PARAM_DECL(string, NCBI, UnicodeToAscii)
static string s_FindUnicodeToAscii(void)
static TUnicodeTable g_DefaultUnicodeTable
TUnicode UTF8ToUnicode(const char *theUTF)
NCBI_PARAM_DEF_WITH_INIT(string, NCBI, UnicodeToAscii, "", s_FindUnicodeToAscii)
static TUnicodePlan s_Plan_26h
static TUnicodePlan s_Plan_E4h
static TUnicodePlan s_Plan_30h
static TUnicodePlan s_Plan_04h
static TUnicodePlan s_Plan_FEh
static TUnicodePlan s_Plan_E2h
static TUnicodePlan s_Plan_27h
static TUnicodePlan s_Plan_21h
static TUnicodePlan s_Plan_E5h
static TUnicodePlan s_Plan_E8h
static TUnicodePlan s_Plan_00h
static TUnicodePlan s_Plan_E6h
static TUnicodePlan s_Plan_22h
static TUnicodePlan s_Plan_20h
static TUnicodePlan s_Plan_24h
static TUnicodePlan s_Plan_25h
static TUnicodePlan s_Plan_01h
static TUnicodePlan s_Plan_EAh
static TUnicodePlan s_Plan_EBh
static TUnicodePlan s_Plan_FBh
static TUnicodePlan s_Plan_23h
static TUnicodePlan s_Plan_E0h
static TUnicodePlan s_Plan_E3h
static TUnicodePlan s_Plan_1Eh
static TUnicodePlan s_Plan_02h
static TUnicodePlan s_Plan_03h
static TUnicodePlan s_Plan_E7h
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4