Update of /cvsroot/python/python/dist/src/Modules In directory usw-pr-cvs1:/tmp/cvs-serv18280/Modules Modified Files: ucnhash.c unicodedata.c Log Message: Move uchhash functionality into unicodedata (after the recent crop of changes, the files are small enough to do this). Also adds "name" and "lookup" functions to unicodedata. Index: ucnhash.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/ucnhash.c,v retrieving revision 1.9 retrieving revision 1.10 diff -C2 -r1.9 -r1.10 *** ucnhash.c 2001/01/21 22:41:07 1.9 --- ucnhash.c 2001/01/24 07:59:11 1.10 *************** *** 1,197 **** ! /* unicode character name tables */ ! /* rewritten for Python 2.1 by Fredrik Lundh (fredrik@pythonware.com) */ #include "Python.h" - #include "ucnhash.h" - /* data file generated by Tools/unicode/makeunicodedata.py */ - #include "unicodename_db.h" - - /* -------------------------------------------------------------------- */ - /* database code (cut and pasted from the unidb package) */ - - static unsigned long - gethash(const char *s, int len, int scale) - { - int i; - unsigned long h = 0; - unsigned long ix; - for (i = 0; i < len; i++) { - h = (h * scale) + (unsigned char) toupper(s[i]); - ix = h & 0xff000000; - if (ix) - h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff; - } - return h; - } - - static int - getname(Py_UCS4 code, char* buffer, int buflen) - { - int offset; - int i; - int word; - unsigned char* w; - - if (code < 0 || code >= 65536) - return 0; - - /* get offset into phrasebook */ - offset = phrasebook_offset1[(code>>phrasebook_shift)]; - offset = phrasebook_offset2[(offset<<phrasebook_shift) + - (code&((1<<phrasebook_shift)-1))]; - if (!offset) - return 0; - - i = 0; - - for (;;) { - /* get word index */ - word = phrasebook[offset] - phrasebook_short; - if (word >= 0) { - word = (word << 8) + phrasebook[offset+1]; - offset += 2; - } else - word = phrasebook[offset++]; - if (i) { - if (i > buflen) - return 0; /* buffer overflow */ - buffer[i++] = ' '; - } - /* copy word string from lexicon. the last character in the - word has bit 7 set. the last word in a string ends with - 0x80 */ - w = lexicon + lexicon_offset[word]; - while (*w < 128) { - if (i >= buflen) - return 0; /* buffer overflow */ - buffer[i++] = *w++; - } - if (i >= buflen) - return 0; /* buffer overflow */ - buffer[i++] = *w & 127; - if (*w == 128) - break; /* end of word */ - } - - return 1; - } - - static int - cmpname(int code, const char* name, int namelen) - { - /* check if code corresponds to the given name */ - int i; - char buffer[NAME_MAXLEN]; - if (!getname(code, buffer, sizeof(buffer))) - return 0; - for (i = 0; i < namelen; i++) { - if (toupper(name[i]) != buffer[i]) - return 0; - } - return buffer[namelen] == '\0'; - } - - static int - getcode(const char* name, int namelen, Py_UCS4* code) - { - unsigned int h, v; - unsigned int mask = code_size-1; - unsigned int i, incr; - - /* the following is the same as python's dictionary lookup, with - only minor changes. see the makeunicodedata script for more - details */ - - h = (unsigned int) gethash(name, namelen, code_magic); - i = (~h) & mask; - v = code_hash[i]; - if (!v) - return 0; - if (cmpname(v, name, namelen)) { - *code = v; - return 1; - } - incr = (h ^ (h >> 3)) & mask; - if (!incr) - incr = mask; - for (;;) { - i = (i + incr) & mask; - v = code_hash[i]; - if (!v) - return -1; - if (cmpname(v, name, namelen)) { - *code = v; - return 1; - } - incr = incr << 1; - if (incr > mask) - incr = incr ^ code_poly; - } - } - - static const _PyUnicode_Name_CAPI hashAPI = - { - sizeof(_PyUnicode_Name_CAPI), - getname, - getcode - }; - - /* -------------------------------------------------------------------- */ - /* Python bindings */ - - static PyObject * - ucnhash_getname(PyObject* self, PyObject* args) - { - char name[NAME_MAXLEN]; - - int code; - if (!PyArg_ParseTuple(args, "i", &code)) - return NULL; - - if (!getname((Py_UCS4) code, name, sizeof(name))) { - PyErr_SetString(PyExc_ValueError, "undefined character code"); - return NULL; - } - - return Py_BuildValue("s", name); - } - - static PyObject * - ucnhash_getcode(PyObject* self, PyObject* args) - { - Py_UCS4 code; - - char* name; - int namelen; - if (!PyArg_ParseTuple(args, "s#", &name, &namelen)) - return NULL; - - if (!getcode(name, namelen, &code)) { - PyErr_SetString(PyExc_ValueError, "undefined character name"); - return NULL; - } - - return Py_BuildValue("i", code); - } - static PyMethodDef ucnhash_methods[] = { - {"getname", ucnhash_getname, 1}, - {"getcode", ucnhash_getcode, 1}, {NULL, NULL}, }; ! static char *ucnhash_docstring = "ucnhash hash function module"; - - /* Create PyMethodObjects and register them in the module's dict */ DL_EXPORT(void) initucnhash(void) { ! PyObject *m, *d, *v; ! ! m = Py_InitModule4( "ucnhash", /* Module name */ ucnhash_methods, /* Method list */ --- 1,18 ---- ! /* obsolete -- remove this file! */ #include "Python.h" static PyMethodDef ucnhash_methods[] = { {NULL, NULL}, }; ! static char *ucnhash_docstring = "ucnhash hash function module (obsolete)"; DL_EXPORT(void) initucnhash(void) { ! Py_InitModule4( "ucnhash", /* Module name */ ucnhash_methods, /* Method list */ *************** *** 199,212 **** (PyObject *)NULL, /* always pass this as *self */ PYTHON_API_VERSION); /* API Version */ - if (!m) - return; - - d = PyModule_GetDict(m); - if (!d) - return; - - /* Export C API */ - v = PyCObject_FromVoidPtr((void *) &hashAPI, NULL); - PyDict_SetItemString(d, "Unicode_Names_CAPI", v); - Py_XDECREF(v); } --- 20,22 ---- Index: unicodedata.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/unicodedata.c,v retrieving revision 2.8 retrieving revision 2.9 diff -C2 -r2.8 -r2.9 *** unicodedata.c 2001/01/21 23:31:52 2.8 --- unicodedata.c 2001/01/24 07:59:11 2.9 *************** *** 13,17 **** --- 13,20 ---- #include "Python.h" + #include "ucnhash.h" + /* character properties */ + typedef struct { const unsigned char category; /* index into *************** *** 53,58 **** long rc; ! if (!PyArg_ParseTuple(args, "O!|O:decimal", ! &PyUnicode_Type, &v, &defobj)) return NULL; if (PyUnicode_GET_SIZE(v) != 1) { --- 56,60 ---- long rc; ! if (!PyArg_ParseTuple(args, "O!|O:decimal", &PyUnicode_Type, &v, &defobj)) return NULL; if (PyUnicode_GET_SIZE(v) != 1) { *************** *** 83,88 **** long rc; ! if (!PyArg_ParseTuple(args, "O!|O:digit", ! &PyUnicode_Type, &v, &defobj)) return NULL; if (PyUnicode_GET_SIZE(v) != 1) { --- 85,89 ---- long rc; ! if (!PyArg_ParseTuple(args, "O!|O:digit", &PyUnicode_Type, &v, &defobj)) return NULL; if (PyUnicode_GET_SIZE(v) != 1) { *************** *** 94,99 **** if (rc < 0) { if (defobj == NULL) { ! PyErr_SetString(PyExc_ValueError, ! "not a digit"); return NULL; } --- 95,99 ---- if (rc < 0) { if (defobj == NULL) { ! PyErr_SetString(PyExc_ValueError, "not a digit"); return NULL; } *************** *** 113,118 **** double rc; ! if (!PyArg_ParseTuple(args, "O!|O:numeric", ! &PyUnicode_Type, &v, &defobj)) return NULL; if (PyUnicode_GET_SIZE(v) != 1) { --- 113,117 ---- double rc; ! if (!PyArg_ParseTuple(args, "O!|O:numeric", &PyUnicode_Type, &v, &defobj)) return NULL; if (PyUnicode_GET_SIZE(v) != 1) { *************** *** 124,129 **** if (rc < 0) { if (defobj == NULL) { ! PyErr_SetString(PyExc_ValueError, ! "not a numeric character"); return NULL; } --- 123,127 ---- if (rc < 0) { if (defobj == NULL) { ! PyErr_SetString(PyExc_ValueError, "not a numeric character"); return NULL; } *************** *** 253,273 **** } /* XXX Add doc strings. */ static PyMethodDef unicodedata_functions[] = { ! {"decimal", unicodedata_decimal, 1}, ! {"digit", unicodedata_digit, 1}, ! {"numeric", unicodedata_numeric, 1}, ! {"category", unicodedata_category, 1}, ! {"bidirectional", unicodedata_bidirectional, 1}, ! {"combining", unicodedata_combining, 1}, ! {"mirrored", unicodedata_mirrored, 1}, ! {"decomposition", unicodedata_decomposition, 1}, {NULL, NULL} /* sentinel */ }; DL_EXPORT(void) initunicodedata(void) { ! Py_InitModule("unicodedata", unicodedata_functions); } --- 251,480 ---- } + /* -------------------------------------------------------------------- */ + /* unicode character name tables */ + + /* data file generated by Tools/unicode/makeunicodedata.py */ + #include "unicodename_db.h" + + /* -------------------------------------------------------------------- */ + /* database code (cut and pasted from the unidb package) */ + + static unsigned long + gethash(const char *s, int len, int scale) + { + int i; + unsigned long h = 0; + unsigned long ix; + for (i = 0; i < len; i++) { + h = (h * scale) + (unsigned char) toupper(s[i]); + ix = h & 0xff000000; + if (ix) + h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff; + } + return h; + } + + static int + getname(Py_UCS4 code, char* buffer, int buflen) + { + int offset; + int i; + int word; + unsigned char* w; + + if (code < 0 || code >= 65536) + return 0; + + /* get offset into phrasebook */ + offset = phrasebook_offset1[(code>>phrasebook_shift)]; + offset = phrasebook_offset2[(offset<<phrasebook_shift) + + (code&((1<<phrasebook_shift)-1))]; + if (!offset) + return 0; + + i = 0; + + for (;;) { + /* get word index */ + word = phrasebook[offset] - phrasebook_short; + if (word >= 0) { + word = (word << 8) + phrasebook[offset+1]; + offset += 2; + } else + word = phrasebook[offset++]; + if (i) { + if (i > buflen) + return 0; /* buffer overflow */ + buffer[i++] = ' '; + } + /* copy word string from lexicon. the last character in the + word has bit 7 set. the last word in a string ends with + 0x80 */ + w = lexicon + lexicon_offset[word]; + while (*w < 128) { + if (i >= buflen) + return 0; /* buffer overflow */ + buffer[i++] = *w++; + } + if (i >= buflen) + return 0; /* buffer overflow */ + buffer[i++] = *w & 127; + if (*w == 128) + break; /* end of word */ + } + + return 1; + } + + static int + cmpname(int code, const char* name, int namelen) + { + /* check if code corresponds to the given name */ + int i; + char buffer[NAME_MAXLEN]; + if (!getname(code, buffer, sizeof(buffer))) + return 0; + for (i = 0; i < namelen; i++) { + if (toupper(name[i]) != buffer[i]) + return 0; + } + return buffer[namelen] == '\0'; + } + + static int + getcode(const char* name, int namelen, Py_UCS4* code) + { + unsigned int h, v; + unsigned int mask = code_size-1; + unsigned int i, incr; + + /* the following is the same as python's dictionary lookup, with + only minor changes. see the makeunicodedata script for more + details */ + + h = (unsigned int) gethash(name, namelen, code_magic); + i = (~h) & mask; + v = code_hash[i]; + if (!v) + return 0; + if (cmpname(v, name, namelen)) { + *code = v; + return 1; + } + incr = (h ^ (h >> 3)) & mask; + if (!incr) + incr = mask; + for (;;) { + i = (i + incr) & mask; + v = code_hash[i]; + if (!v) + return -1; + if (cmpname(v, name, namelen)) { + *code = v; + return 1; + } + incr = incr << 1; + if (incr > mask) + incr = incr ^ code_poly; + } + } + + static const _PyUnicode_Name_CAPI hashAPI = + { + sizeof(_PyUnicode_Name_CAPI), + getname, + getcode + }; + + /* -------------------------------------------------------------------- */ + /* Python bindings */ + + static PyObject * + unicodedata_name(PyObject* self, PyObject* args) + { + char name[NAME_MAXLEN]; + + PyUnicodeObject* v; + PyObject* defobj = NULL; + if (!PyArg_ParseTuple(args, "O!|O:name", &PyUnicode_Type, &v, &defobj)) + return NULL; + + if (PyUnicode_GET_SIZE(v) != 1) { + PyErr_SetString(PyExc_TypeError, + "need a single Unicode character as parameter"); + return NULL; + } + + if (!getname((Py_UCS4) *PyUnicode_AS_UNICODE(v), name, sizeof(name))) { + if (defobj == NULL) { + PyErr_SetString(PyExc_ValueError, "no such name"); + return NULL; + } + else { + Py_INCREF(defobj); + return defobj; + } + } + + return Py_BuildValue("s", name); + } + + static PyObject * + unicodedata_lookup(PyObject* self, PyObject* args) + { + Py_UCS4 code; + Py_UNICODE str[1]; + + char* name; + int namelen; + if (!PyArg_ParseTuple(args, "s#:lookup", &name, &namelen)) + return NULL; + + if (!getcode(name, namelen, &code)) { + PyErr_SetString(PyExc_KeyError, "undefined character name"); + return NULL; + } + + str[0] = (Py_UNICODE) code; + return PyUnicode_FromUnicode(str, 1); + } + /* XXX Add doc strings. */ static PyMethodDef unicodedata_functions[] = { ! {"decimal", unicodedata_decimal, METH_VARARGS}, ! {"digit", unicodedata_digit, METH_VARARGS}, ! {"numeric", unicodedata_numeric, METH_VARARGS}, ! {"category", unicodedata_category, METH_VARARGS}, ! {"bidirectional", unicodedata_bidirectional, METH_VARARGS}, ! {"combining", unicodedata_combining, METH_VARARGS}, ! {"mirrored", unicodedata_mirrored, METH_VARARGS}, ! {"decomposition",unicodedata_decomposition, METH_VARARGS}, ! {"name", unicodedata_name, METH_VARARGS}, ! {"lookup", unicodedata_lookup, METH_VARARGS}, {NULL, NULL} /* sentinel */ }; + static char *unicodedata_docstring = "unicode character database"; + DL_EXPORT(void) initunicodedata(void) { ! PyObject *m, *d, *v; ! ! m = Py_InitModule4( ! "unicodedata", unicodedata_functions, ! unicodedata_docstring, NULL, PYTHON_API_VERSION); ! if (!m) ! return; ! ! d = PyModule_GetDict(m); ! if (!d) ! return; ! ! /* Export C API */ ! v = PyCObject_FromVoidPtr((void *) &hashAPI, NULL); ! PyDict_SetItemString(d, "ucnhash_CAPI", v); ! Py_XDECREF(v); ! }
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4