( ptr !=
NPOS) {
81 boolis_entity =
false;
84(ptr+2 <
str.length()) &&
85(semicolon !=
NPOS)) {
86 if( ptr >= semicolon )
87semicolon =
str.find(
";", ptr+1);
88 if( semicolon !=
NPOS) {
91 if(
str[ptr+1] ==
'#') {
96 for(; p < semicolon; ++p) {
106 for(; p < semicolon; ++p) {
113is_entity = (p == semicolon);
119 "\" contains HTML encoded entities");
165 while( (pos =
s.find(
"<!--", pos)) !=
NPOS) {
166 SIZE_TYPEpos_end =
s.find(
"-->", pos + 1);
167 if( pos_end ==
NPOS) {
170 s.erase(pos, pos_end - pos + 3);
174 while( (pos =
s.find(
"<@", pos)) !=
NPOS) {
176 if( pos_end ==
NPOS) {
179 s.erase(pos, pos_end - pos + 2);
184 while( (pos =
s.find(
"<", pos)) !=
NPOS) {
186 if( pos_end ==
NPOS) {
189 if(pos <
s.size() &&
190(
isalpha((
unsigned char)
s[pos + 1]) ||
s[pos + 1] ==
'/')) {
191 s.erase(pos, pos_end - pos + 1);
206 while( (pos =
s.find(
"&", pos)) !=
NPOS) {
208 if( pos_end ==
NPOS) {
211 if( (pos_end - pos) > 2 && (pos_end - pos) < 8 ) {
214 if(
s[start] ==
'#') {
220 boolneed_delete =
true;
222 if( !
check((
int)
s[
i]) ) {
223need_delete =
false;
228 s.erase(pos, pos_end - pos + 1);
514 "Unable to guess the source string encoding", 0);
518ustr.reserve(
str.size());
520string::const_iterator
i, e =
str.end();
524 for(
i=
str.begin();
i!= e;) {
527 if(
i!= e && ch ==
'&') {
528string::const_iterator itmp, end_of_entity, start_of_entity;
529itmp = end_of_entity = start_of_entity =
i;
530 boolent, dec,
hex, parsed=
false;
531ent =
isalpha((
unsigned char)(*itmp)) != 0;
532dec = !ent && *itmp ==
'#'&& ++itmp != e &&
533 isdigit((
unsigned char)(*itmp)) != 0;
534 hex= !dec && itmp != e &&
535(*itmp ==
'x'|| *itmp ==
'X') && ++itmp != e &&
536 isxdigit((
unsigned char)(*itmp)) != 0;
537start_of_entity = itmp;
538 if(itmp != e && (ent || dec ||
hex)) {
540 for(
int len=0;
len<16 && itmp != e; ++
len, ++itmp) {
541 if(*itmp ==
'&'|| *itmp ==
'#') {
545end_of_entity = itmp;
548ent = ent &&
isalnum( (
unsigned char)(*itmp)) != 0;
549dec = dec &&
isdigit( (
unsigned char)(*itmp)) != 0;
552 if(end_of_entity !=
i&& (ent || dec ||
hex)) {
555 stringentity(start_of_entity,end_of_entity);
557 for( ; p->
u!= 0; ++p) {
558 if(entity.compare(p->
s) == 0) {
568 for(itmp = start_of_entity;
569itmp != end_of_entity; ++itmp) {
574 if(
ud>=
'0'&&
ud<=
'9') {
576}
else if(
ud>=
'a'&&
ud<=
'f') {
579}
else if(
ud>=
'A'&&
ud<=
'F') {
597ustr.append( 1, ch );
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
#define ERR_POST_X_ONCE(err_subcode, message)
Error posting only once during program execution with default error code and given error subcode.
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
void Info(CExceptionArgs_Base &args)
static void SetNL(const string &nl)
static const char * sm_newline
static string HTMLEncode(const string &str, THTMLEncodeFlags flags=fEncodeAll)
HTML encodes a string. E.g. <.
static CStringUTF8 HTMLDecode(const string &str, EEncoding encoding=eEncoding_Unknown, THTMLDecodeFlags *result_flags=NULL)
Decode HTML entities and character references.
static string StripSpecialChars(const string &str)
Strip all named and numeric character entities from a string.
static string StripTags(const string &str)
Strip all HTML tags from a string.
static string HTMLAttributeEncode(const string &str, THTMLEncodeFlags flags=fSkipEntities)
HTML encodes a tag attribute ('&' and '"' symbols).
@ fSkipLiteralEntities
Skip "&entity;".
@ fSkipNumericEntities
Skip "&#NNNN;".
@ fCheckPreencoded
Print warning if some preencoded entity found in the string.
@ fCharRef_Entity
Character entity reference(s) was found.
@ fCharRef_Numeric
Numeric character reference(s) was found.
@ fEncoding
Character encoding changed.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
NCBI_NS_STD::string::size_type SIZE_TYPE
static EEncoding GuessEncoding(const CTempString &src)
Guess the encoding of the C/C++ string.
char32_t TUnicodeSymbol
Unicode character.
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
unsigned int
A callback function used to compare two keys in a database.
static struct tag_HtmlEntities s_HtmlEntities[]
static string s_HTMLEncode(const string &str, const string &set, CHTMLHelper::THTMLEncodeFlags flags)
Definition of all error codes used in html library (xhtml.lib).
static void hex(unsigned char c)
static unsigned int ud(time_t one, time_t two)
#define NcbiSysChar_strdup
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4