Update of /cvsroot/python/python/dist/src/Objects In directory usw-pr-cvs1:/tmp/cvs-serv22440 Modified Files: unicodeobject.c Log Message: Back out 2.140. Index: unicodeobject.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v retrieving revision 2.141 retrieving revision 2.142 diff -C2 -d -r2.141 -r2.142 *** unicodeobject.c 21 Apr 2002 03:26:37 -0000 2.141 --- unicodeobject.c 21 Apr 2002 09:59:45 -0000 2.142 *************** *** 1173,1176 **** --- 1173,1182 ---- #endif + /* Allocation strategy: we default to Latin-1, then do one resize + whenever we hit an order boundary. The assumption is that + characters from higher orders usually occur often enough to warrant + this. + */ + PyObject * PyUnicode_EncodeUTF8(const Py_UNICODE *s, *************** *** 1180,1222 **** PyObject *v; char *p; ! int allocated = 0; ! int i; ! /* Short-cut for emtpy strings */ if (size == 0) return PyString_FromStringAndSize(NULL, 0); ! for (i = 0; i < size; ) { ! Py_UCS4 ch = s[i++]; ! if (ch < 0x80) ! allocated += 1; ! else if (ch < 0x0800) ! allocated += 2; ! else if (ch < 0x10000) { ! /* Check for high surrogate */ ! if (0xD800 <= ch && ch <= 0xDBFF && ! i != size && ! 0xDC00 <= s[i] && s[i] <= 0xDFFF) { ! allocated += 1; ! i++; ! } ! allocated += 3; ! } else ! allocated += 4; ! } ! ! v = PyString_FromStringAndSize(NULL, allocated); if (v == NULL) return NULL; p = PyString_AS_STRING(v); ! for (i = 0; i < size; ) { Py_UCS4 ch = s[i++]; ! if (ch < 0x80) { *p++ = (char) ch; - } else if (ch < 0x0800) { *p++ = (char)(0xc0 | (ch >> 6)); *p++ = (char)(0x80 | (ch & 0x3f)); --- 1186,1212 ---- PyObject *v; char *p; ! int i = 0; ! int overalloc = 2; ! int len; ! /* Short-cut for emtpy strings */ if (size == 0) return PyString_FromStringAndSize(NULL, 0); ! v = PyString_FromStringAndSize(NULL, overalloc * size); if (v == NULL) return NULL; p = PyString_AS_STRING(v); ! ! while (i < size) { Py_UCS4 ch = s[i++]; ! if (ch < 0x80) ! /* Encode ASCII */ *p++ = (char) ch; else if (ch < 0x0800) { + /* Encode Latin-1 */ *p++ = (char)(0xc0 | (ch >> 6)); *p++ = (char)(0x80 | (ch & 0x3f)); *************** *** 1224,1258 **** else { ! if (ch < 0x10000) { ! /* Check for high surrogate */ if (0xD800 <= ch && ch <= 0xDBFF && i != size) { Py_UCS4 ch2 = s[i]; ! /* Check for low surrogate */ if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { ! ch = ((ch - 0xD800)<<10 | (ch2-0xDC00))+0x10000; ! *p++ = (char)((ch >> 18) | 0xf0); ! *p++ = (char)(0x80 | ((ch >> 12) & 0x3f)); ! *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); ! *p++ = (char)(0x80 | (ch & 0x3f)); ! i++; ! continue; } /* Fall through: handles isolated high surrogates */ } *p++ = (char)(0xe0 | (ch >> 12)); *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); *p++ = (char)(0x80 | (ch & 0x3f)); ! ! } else { ! *p++ = (char)(0xf0 | (ch>>18)); ! *p++ = (char)(0x80 | ((ch>>12) & 0x3f)); ! *p++ = (char)(0x80 | ((ch>>6) & 0x3f)); ! *p++ = (char)(0x80 | (ch & 0x3f)); } } } ! assert(p - PyString_AS_STRING(v) == allocated); return v; } --- 1214,1270 ---- else { ! /* Encode UCS2 Unicode ordinals */ if (ch < 0x10000) { ! ! /* Special case: check for high surrogate */ if (0xD800 <= ch && ch <= 0xDBFF && i != size) { Py_UCS4 ch2 = s[i]; ! /* Check for low surrogate and combine the two to ! form a UCS4 value */ if (0xDC00 <= ch2 && ch2 <= 0xDFFF) { ! ch = ((ch - 0xD800) << 10 | (ch2 - 0xDC00)) + 0x10000; ! i++; ! goto encodeUCS4; } /* Fall through: handles isolated high surrogates */ } + + if (overalloc < 3) { + len = (int)(p - PyString_AS_STRING(v)); + overalloc = 3; + if (_PyString_Resize(&v, overalloc * size)) + goto onError; + p = PyString_AS_STRING(v) + len; + } *p++ = (char)(0xe0 | (ch >> 12)); *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); *p++ = (char)(0x80 | (ch & 0x3f)); ! continue; ! } ! ! /* Encode UCS4 Unicode ordinals */ ! encodeUCS4: ! if (overalloc < 4) { ! len = (int)(p - PyString_AS_STRING(v)); ! overalloc = 4; ! if (_PyString_Resize(&v, overalloc * size)) ! goto onError; ! p = PyString_AS_STRING(v) + len; } + *p++ = (char)(0xf0 | (ch >> 18)); + *p++ = (char)(0x80 | ((ch >> 12) & 0x3f)); + *p++ = (char)(0x80 | ((ch >> 6) & 0x3f)); + *p++ = (char)(0x80 | (ch & 0x3f)); } } ! *p = '\0'; ! assert((p - PyString_AS_STRING(v)) <= overalloc*size); ! if (_PyString_Resize(&v, (int)(p - PyString_AS_STRING(v)))) ! goto onError; return v; + + onError: + Py_DECREF(v); + return NULL; }
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4