This is a multi-part message in MIME format. --------------FC351AC889B8145AF245DB36 Content-Type: text/plain; charset=us-ascii Content-Transfer-Encoding: 7bit The idea is not new: strings and Unicode should have more or less the same methods to enhance their compatibility. The attached patch adds encoding capabilities to normal strings and extends the builtin str() to accept an optional encoding (and error) argument. It also tries to reuse the already available Unicode codecs for the purposes of strings (conversions are done via the default encoding in both directions). With it you can use the existing codecs to easily encode or decode strings and data into string objects: >>> 'abc'.encode('utf16') '\377\376a\000b\000c\000' >>> str(_, 'utf16') 'abc' Since the codec API and registry support more than just Unicode codecs, one could also think of things like: '...long...data...string...'.encode('data_gzip') or other complicated string conversions. The str(obj[, encoding[, errors]]) builtin does the exact opposite: it takes the string representation of obj and then decodes it using the given encoding. What do you think about this ? Should I add it to CVS as experiment ? -- Marc-Andre Lemburg ______________________________________________________________________ Business: http://www.lemburg.com/ Python Pages: http://www.lemburg.com/python/ --------------FC351AC889B8145AF245DB36 Content-Type: text/plain; charset=us-ascii; name="str.encode.patch" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename="str.encode.patch" diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x core -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x distutils -x */plat* -x *.py -x ACKS -x *.txt -x README CVS-Python/Include/stringobject.h Python+Unicode/Include/stringobject.h --- CVS-Python/Include/stringobject.h Sat Jul 1 10:30:46 2000 +++ Python+Unicode/Include/stringobject.h Wed Jul 5 18:59:46 2000 @@ -81,6 +81,37 @@ #define PyString_AS_STRING(op) (((PyStringObject *)(op))->ob_sval) #define PyString_GET_SIZE(op) (((PyStringObject *)(op))->ob_size) +/* --- Generic Codecs ----------------------------------------------------- */ + +/* Create a string object by decoding the encoded string s of the + given size. */ + +extern DL_IMPORT(PyObject*) PyString_Decode( + const char *s, /* encoded string */ + int size, /* size of buffer */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a char buffer of the given size and returns a + Python string object. */ + +extern DL_IMPORT(PyObject*) PyString_Encode( + const char *s, /* string char buffer */ + int size, /* number of chars to encode */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a string object and returns the result as Python string + object. */ + +extern DL_IMPORT(PyObject*) PyString_AsEncodedString( + PyObject *str, /* string object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + #ifdef __cplusplus } #endif diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x core -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x distutils -x */plat* -x *.py -x ACKS -x *.txt -x README CVS-Python/Objects/stringobject.c Python+Unicode/Objects/stringobject.c --- CVS-Python/Objects/stringobject.c Wed Jul 5 11:42:49 2000 +++ Python+Unicode/Objects/stringobject.c Wed Jul 5 19:47:44 2000 @@ -152,6 +152,100 @@ return (PyObject *) op; } +PyObject *PyString_Decode(const char *s, + int size, + const char *encoding, + const char *errors) +{ + PyObject *buffer = NULL, *str; + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Decode via the codec registry */ + buffer = PyBuffer_FromMemory((void *)s, size); + if (buffer == NULL) + goto onError; + str = PyCodec_Decode(buffer, encoding, errors); + if (str == NULL) + goto onError; + /* Convert Unicode to a string using the default encoding */ + if (PyUnicode_Check(str)) { + PyObject *temp = str; + str = PyUnicode_AsEncodedString(str, NULL, NULL); + Py_DECREF(temp); + if (str == NULL) + goto onError; + } + if (!PyString_Check(str)) { + PyErr_Format(PyExc_TypeError, + "decoder did not return an string object (type=%.400s)", + str->ob_type->tp_name); + Py_DECREF(str); + goto onError; + } + Py_DECREF(buffer); + return str; + + onError: + Py_XDECREF(buffer); + return NULL; +} + +PyObject *PyString_Encode(const char *s, + int size, + const char *encoding, + const char *errors) +{ + PyObject *v, *str; + + str = PyString_FromStringAndSize(s, size); + if (str == NULL) + return NULL; + v = PyString_AsEncodedString(str, encoding, errors); + Py_DECREF(str); + return v; +} + +PyObject *PyString_AsEncodedString(PyObject *str, + const char *encoding, + const char *errors) +{ + PyObject *v; + + if (!PyString_Check(str)) { + PyErr_BadArgument(); + goto onError; + } + + if (encoding == NULL) + encoding = PyUnicode_GetDefaultEncoding(); + + /* Encode via the codec registry */ + v = PyCodec_Encode(str, encoding, errors); + if (v == NULL) + goto onError; + /* Convert Unicode to a string using the default encoding */ + if (PyUnicode_Check(v)) { + PyObject *temp = v; + v = PyUnicode_AsEncodedString(v, NULL, NULL); + Py_DECREF(temp); + if (v == NULL) + goto onError; + } + if (!PyString_Check(v)) { + PyErr_Format(PyExc_TypeError, + "encoder did not return a string object (type=%.400s)", + v->ob_type->tp_name); + Py_DECREF(v); + goto onError; + } + return v; + + onError: + return NULL; +} + static void string_dealloc(op) PyObject *op; @@ -1686,6 +1780,25 @@ } +static char encode__doc__[] = +"S.encode([encoding[,errors]]) -> string\n\ +\n\ +Return an encoded string version of S. Default encoding is the current\n\ +default string encoding. errors may be given to set a different error\n\ +handling scheme. Default is 'strict' meaning that encoding errors raise\n\ +a ValueError. Other possible values are 'ignore' and 'replace'."; + +static PyObject * +string_encode(PyStringObject *self, PyObject *args) +{ + char *encoding = NULL; + char *errors = NULL; + if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) + return NULL; + return PyString_AsEncodedString((PyObject *)self, encoding, errors); +} + + static char expandtabs__doc__[] = "S.expandtabs([tabsize]) -> string\n\ \n\ @@ -2252,6 +2365,7 @@ {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__}, {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__}, {"center", (PyCFunction)string_center, 1, center__doc__}, + {"encode", (PyCFunction)string_encode, 1, encode__doc__}, {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__}, {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__}, #if 0 diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x core -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x Setup.config -x hassignal -x Makefile.pre -x configure -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x distutils -x */plat* -x *.py -x ACKS -x *.txt -x README CVS-Python/Python/bltinmodule.c Python+Unicode/Python/bltinmodule.c --- CVS-Python/Python/bltinmodule.c Tue Jul 4 10:30:50 2000 +++ Python+Unicode/Python/bltinmodule.c Wed Jul 5 19:40:14 2000 @@ -2050,18 +2050,39 @@ PyObject *self; PyObject *args; { - PyObject *v; + PyObject *v, *w; + const void *buffer; + int len; + char *encoding = NULL; + char *errors = NULL; - if (!PyArg_ParseTuple(args, "O:str", &v)) + if ( !PyArg_ParseTuple(args, "O|ss:str", &v, &encoding, &errors) ) return NULL; - return PyObject_Str(v); + + /* Get string representation */ + if (PyString_Check(v)) + Py_INCREF(v); + else + v = PyObject_Str(v); + if (encoding == NULL) + return v; + + /* Decode data raw */ + if (PyObject_AsReadBuffer(v, &buffer, &len)) + return NULL; + w = PyString_Decode((const char *)buffer, len, encoding, errors); + Py_DECREF(v); + return w; } static char str_doc[] = -"str(object) -> string\n\ +"str(object [, encoding[, errors]]) -> string\n\ \n\ -Return a nice string representation of the object.\n\ -If the argument is a string, the return value is the same object."; +Return a string representation of the object.\n\ +If the argument is a string, the return value is the same object.\n\ +If encoding is given, the string representation is decoded prior\n\ +to returning it. errors, defining the error handling for the decoding\n\ +process, defaults to 'strict'."; static PyObject * --------------FC351AC889B8145AF245DB36--
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4