A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from http://mail.python.org/pipermail/python-checkins/2001-January/015280.html below:

[Python-checkins] CVS: python/dist/src/Modules ucnhash.c,1.9,1.10 unicodedata.c,2.8,2.9

[Python-checkins] CVS: python/dist/src/Modules ucnhash.c,1.9,1.10 unicodedata.c,2.8,2.9Fredrik Lundh effbot@users.sourceforge.net
Tue, 23 Jan 2001 23:59:13 -0800
Update of /cvsroot/python/python/dist/src/Modules
In directory usw-pr-cvs1:/tmp/cvs-serv18280/Modules

Modified Files:
	ucnhash.c unicodedata.c 
Log Message:


Move uchhash functionality into unicodedata (after the recent
crop of changes, the files are small enough to do this).  Also
adds "name" and "lookup" functions to unicodedata.


Index: ucnhash.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/ucnhash.c,v
retrieving revision 1.9
retrieving revision 1.10
diff -C2 -r1.9 -r1.10
*** ucnhash.c	2001/01/21 22:41:07	1.9
--- ucnhash.c	2001/01/24 07:59:11	1.10
***************
*** 1,197 ****
! /* unicode character name tables */
! /* rewritten for Python 2.1 by Fredrik Lundh (fredrik@pythonware.com) */
  
  #include "Python.h"
- #include "ucnhash.h"
  
- /* data file generated by Tools/unicode/makeunicodedata.py */
- #include "unicodename_db.h"
- 
- /* -------------------------------------------------------------------- */
- /* database code (cut and pasted from the unidb package) */
- 
- static unsigned long
- gethash(const char *s, int len, int scale)
- {
-     int i;
-     unsigned long h = 0;
-     unsigned long ix;
-     for (i = 0; i < len; i++) {
-         h = (h * scale) + (unsigned char) toupper(s[i]);
-         ix = h & 0xff000000;
-         if (ix)
-             h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff;
-     }
-     return h;
- }
- 
- static int
- getname(Py_UCS4 code, char* buffer, int buflen)
- {
-     int offset;
-     int i;
-     int word;
-     unsigned char* w;
- 
-     if (code < 0 || code >= 65536)
-         return 0;
- 
-     /* get offset into phrasebook */
-     offset = phrasebook_offset1[(code>>phrasebook_shift)];
-     offset = phrasebook_offset2[(offset<<phrasebook_shift) +
-                                (code&((1<<phrasebook_shift)-1))];
-     if (!offset)
-         return 0;
- 
-     i = 0;
- 
-     for (;;) {
-         /* get word index */
-         word = phrasebook[offset] - phrasebook_short;
-         if (word >= 0) {
-             word = (word << 8) + phrasebook[offset+1];
-             offset += 2;
-         } else
-             word = phrasebook[offset++];
-         if (i) {
-             if (i > buflen)
-                 return 0; /* buffer overflow */
-             buffer[i++] = ' ';
-         }
-         /* copy word string from lexicon.  the last character in the
-            word has bit 7 set.  the last word in a string ends with
-            0x80 */
-         w = lexicon + lexicon_offset[word];
-         while (*w < 128) {
-             if (i >= buflen)
-                 return 0; /* buffer overflow */
-             buffer[i++] = *w++;
-         }
-         if (i >= buflen)
-             return 0; /* buffer overflow */
-         buffer[i++] = *w & 127;
-         if (*w == 128)
-             break; /* end of word */
-     }
- 
-     return 1;
- }
- 
- static int
- cmpname(int code, const char* name, int namelen)
- {
-     /* check if code corresponds to the given name */
-     int i;
-     char buffer[NAME_MAXLEN];
-     if (!getname(code, buffer, sizeof(buffer)))
-         return 0;
-     for (i = 0; i < namelen; i++) {
-         if (toupper(name[i]) != buffer[i])
-             return 0;
-     }
-     return buffer[namelen] == '\0';
- }
- 
- static int
- getcode(const char* name, int namelen, Py_UCS4* code)
- {
-     unsigned int h, v;
-     unsigned int mask = code_size-1;
-     unsigned int i, incr;
- 
-     /* the following is the same as python's dictionary lookup, with
-        only minor changes.  see the makeunicodedata script for more
-        details */
- 
-     h = (unsigned int) gethash(name, namelen, code_magic);
-     i = (~h) & mask;
-     v = code_hash[i];
-     if (!v)
-         return 0;
-     if (cmpname(v, name, namelen)) {
-         *code = v;
-         return 1;
-     }
-     incr = (h ^ (h >> 3)) & mask;
-     if (!incr)
-         incr = mask;
-     for (;;) {
-         i = (i + incr) & mask;
-         v = code_hash[i];
-         if (!v)
-             return -1;
-         if (cmpname(v, name, namelen)) {
-             *code = v;
-             return 1;
-         }
-         incr = incr << 1;
-         if (incr > mask)
-             incr = incr ^ code_poly;
-     }
- }
- 
- static const _PyUnicode_Name_CAPI hashAPI = 
- {
-     sizeof(_PyUnicode_Name_CAPI),
-     getname,
-     getcode
- };
- 
- /* -------------------------------------------------------------------- */
- /* Python bindings */
- 
- static PyObject *
- ucnhash_getname(PyObject* self, PyObject* args)
- {
-     char name[NAME_MAXLEN];
- 
-     int code;
-     if (!PyArg_ParseTuple(args, "i", &code))
-         return NULL;
- 
-     if (!getname((Py_UCS4) code, name, sizeof(name))) {
-         PyErr_SetString(PyExc_ValueError, "undefined character code");
-         return NULL;
-     }
- 
-     return Py_BuildValue("s", name);
- }
- 
- static PyObject *
- ucnhash_getcode(PyObject* self, PyObject* args)
- {
-     Py_UCS4 code;
- 
-     char* name;
-     int namelen;
-     if (!PyArg_ParseTuple(args, "s#", &name, &namelen))
-         return NULL;
- 
-     if (!getcode(name, namelen, &code)) {
-         PyErr_SetString(PyExc_ValueError, "undefined character name");
-         return NULL;
-     }
- 
-     return Py_BuildValue("i", code);
- }
- 
  static  
  PyMethodDef ucnhash_methods[] =
  {   
-     {"getname", ucnhash_getname, 1},
-     {"getcode", ucnhash_getcode, 1},
      {NULL, NULL},
  };
  
! static char *ucnhash_docstring = "ucnhash hash function module";
  
- 
- /* Create PyMethodObjects and register them in the module's dict */
  DL_EXPORT(void) 
  initucnhash(void)
  {
!     PyObject *m, *d, *v;
! 
!     m = Py_InitModule4(
          "ucnhash", /* Module name */
          ucnhash_methods, /* Method list */
--- 1,18 ----
! /* obsolete -- remove this file! */
  
  #include "Python.h"
  
  static  
  PyMethodDef ucnhash_methods[] =
  {   
      {NULL, NULL},
  };
  
! static char *ucnhash_docstring = "ucnhash hash function module (obsolete)";
  
  DL_EXPORT(void) 
  initucnhash(void)
  {
!     Py_InitModule4(
          "ucnhash", /* Module name */
          ucnhash_methods, /* Method list */
***************
*** 199,212 ****
          (PyObject *)NULL, /* always pass this as *self */
          PYTHON_API_VERSION); /* API Version */
-     if (!m)
-         return;
- 
-     d = PyModule_GetDict(m);
-     if (!d)
-         return;
- 
-     /* Export C API */
-     v = PyCObject_FromVoidPtr((void *) &hashAPI, NULL);
-     PyDict_SetItemString(d, "Unicode_Names_CAPI", v);
-     Py_XDECREF(v);
  }
--- 20,22 ----

Index: unicodedata.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Modules/unicodedata.c,v
retrieving revision 2.8
retrieving revision 2.9
diff -C2 -r2.8 -r2.9
*** unicodedata.c	2001/01/21 23:31:52	2.8
--- unicodedata.c	2001/01/24 07:59:11	2.9
***************
*** 13,17 ****
--- 13,20 ----
  
  #include "Python.h"
+ #include "ucnhash.h"
  
+ /* character properties */
+ 
  typedef struct {
      const unsigned char category;	/* index into
***************
*** 53,58 ****
      long rc;
  
!     if (!PyArg_ParseTuple(args, "O!|O:decimal",
! 			  &PyUnicode_Type, &v, &defobj))
          return NULL;
      if (PyUnicode_GET_SIZE(v) != 1) {
--- 56,60 ----
      long rc;
  
!     if (!PyArg_ParseTuple(args, "O!|O:decimal", &PyUnicode_Type, &v, &defobj))
          return NULL;
      if (PyUnicode_GET_SIZE(v) != 1) {
***************
*** 83,88 ****
      long rc;
  
!     if (!PyArg_ParseTuple(args, "O!|O:digit",
! 			  &PyUnicode_Type, &v, &defobj))
          return NULL;
      if (PyUnicode_GET_SIZE(v) != 1) {
--- 85,89 ----
      long rc;
  
!     if (!PyArg_ParseTuple(args, "O!|O:digit", &PyUnicode_Type, &v, &defobj))
          return NULL;
      if (PyUnicode_GET_SIZE(v) != 1) {
***************
*** 94,99 ****
      if (rc < 0) {
  	if (defobj == NULL) {
! 	    PyErr_SetString(PyExc_ValueError,
! 			    "not a digit");
              return NULL;
  	}
--- 95,99 ----
      if (rc < 0) {
  	if (defobj == NULL) {
! 	    PyErr_SetString(PyExc_ValueError, "not a digit");
              return NULL;
  	}
***************
*** 113,118 ****
      double rc;
  
!     if (!PyArg_ParseTuple(args, "O!|O:numeric",
! 			  &PyUnicode_Type, &v, &defobj))
          return NULL;
      if (PyUnicode_GET_SIZE(v) != 1) {
--- 113,117 ----
      double rc;
  
!     if (!PyArg_ParseTuple(args, "O!|O:numeric", &PyUnicode_Type, &v, &defobj))
          return NULL;
      if (PyUnicode_GET_SIZE(v) != 1) {
***************
*** 124,129 ****
      if (rc < 0) {
  	if (defobj == NULL) {
! 	    PyErr_SetString(PyExc_ValueError,
! 			    "not a numeric character");
  	    return NULL;
  	}
--- 123,127 ----
      if (rc < 0) {
  	if (defobj == NULL) {
! 	    PyErr_SetString(PyExc_ValueError, "not a numeric character");
  	    return NULL;
  	}
***************
*** 253,273 ****
  }
  
  /* XXX Add doc strings. */
  
  static PyMethodDef unicodedata_functions[] = {
!     {"decimal",		unicodedata_decimal,			1},
!     {"digit",		unicodedata_digit,			1},
!     {"numeric",		unicodedata_numeric,			1},
!     {"category",	unicodedata_category,			1},
!     {"bidirectional",	unicodedata_bidirectional,		1},
!     {"combining",	unicodedata_combining,			1},
!     {"mirrored",	unicodedata_mirrored,			1},
!     {"decomposition",	unicodedata_decomposition,		1},
      {NULL, NULL}		/* sentinel */
  };
  
  DL_EXPORT(void)
  initunicodedata(void)
  {
!     Py_InitModule("unicodedata", unicodedata_functions);
  }
--- 251,480 ----
  }
  
+ /* -------------------------------------------------------------------- */
+ /* unicode character name tables */
+ 
+ /* data file generated by Tools/unicode/makeunicodedata.py */
+ #include "unicodename_db.h"
+ 
+ /* -------------------------------------------------------------------- */
+ /* database code (cut and pasted from the unidb package) */
+ 
+ static unsigned long
+ gethash(const char *s, int len, int scale)
+ {
+     int i;
+     unsigned long h = 0;
+     unsigned long ix;
+     for (i = 0; i < len; i++) {
+         h = (h * scale) + (unsigned char) toupper(s[i]);
+         ix = h & 0xff000000;
+         if (ix)
+             h = (h ^ ((ix>>24) & 0xff)) & 0x00ffffff;
+     }
+     return h;
+ }
+ 
+ static int
+ getname(Py_UCS4 code, char* buffer, int buflen)
+ {
+     int offset;
+     int i;
+     int word;
+     unsigned char* w;
+ 
+     if (code < 0 || code >= 65536)
+         return 0;
+ 
+     /* get offset into phrasebook */
+     offset = phrasebook_offset1[(code>>phrasebook_shift)];
+     offset = phrasebook_offset2[(offset<<phrasebook_shift) +
+                                (code&((1<<phrasebook_shift)-1))];
+     if (!offset)
+         return 0;
+ 
+     i = 0;
+ 
+     for (;;) {
+         /* get word index */
+         word = phrasebook[offset] - phrasebook_short;
+         if (word >= 0) {
+             word = (word << 8) + phrasebook[offset+1];
+             offset += 2;
+         } else
+             word = phrasebook[offset++];
+         if (i) {
+             if (i > buflen)
+                 return 0; /* buffer overflow */
+             buffer[i++] = ' ';
+         }
+         /* copy word string from lexicon.  the last character in the
+            word has bit 7 set.  the last word in a string ends with
+            0x80 */
+         w = lexicon + lexicon_offset[word];
+         while (*w < 128) {
+             if (i >= buflen)
+                 return 0; /* buffer overflow */
+             buffer[i++] = *w++;
+         }
+         if (i >= buflen)
+             return 0; /* buffer overflow */
+         buffer[i++] = *w & 127;
+         if (*w == 128)
+             break; /* end of word */
+     }
+ 
+     return 1;
+ }
+ 
+ static int
+ cmpname(int code, const char* name, int namelen)
+ {
+     /* check if code corresponds to the given name */
+     int i;
+     char buffer[NAME_MAXLEN];
+     if (!getname(code, buffer, sizeof(buffer)))
+         return 0;
+     for (i = 0; i < namelen; i++) {
+         if (toupper(name[i]) != buffer[i])
+             return 0;
+     }
+     return buffer[namelen] == '\0';
+ }
+ 
+ static int
+ getcode(const char* name, int namelen, Py_UCS4* code)
+ {
+     unsigned int h, v;
+     unsigned int mask = code_size-1;
+     unsigned int i, incr;
+ 
+     /* the following is the same as python's dictionary lookup, with
+        only minor changes.  see the makeunicodedata script for more
+        details */
+ 
+     h = (unsigned int) gethash(name, namelen, code_magic);
+     i = (~h) & mask;
+     v = code_hash[i];
+     if (!v)
+         return 0;
+     if (cmpname(v, name, namelen)) {
+         *code = v;
+         return 1;
+     }
+     incr = (h ^ (h >> 3)) & mask;
+     if (!incr)
+         incr = mask;
+     for (;;) {
+         i = (i + incr) & mask;
+         v = code_hash[i];
+         if (!v)
+             return -1;
+         if (cmpname(v, name, namelen)) {
+             *code = v;
+             return 1;
+         }
+         incr = incr << 1;
+         if (incr > mask)
+             incr = incr ^ code_poly;
+     }
+ }
+ 
+ static const _PyUnicode_Name_CAPI hashAPI = 
+ {
+     sizeof(_PyUnicode_Name_CAPI),
+     getname,
+     getcode
+ };
+ 
+ /* -------------------------------------------------------------------- */
+ /* Python bindings */
+ 
+ static PyObject *
+ unicodedata_name(PyObject* self, PyObject* args)
+ {
+     char name[NAME_MAXLEN];
+ 
+     PyUnicodeObject* v;
+     PyObject* defobj = NULL;
+     if (!PyArg_ParseTuple(args, "O!|O:name", &PyUnicode_Type, &v, &defobj))
+         return NULL;
+ 
+     if (PyUnicode_GET_SIZE(v) != 1) {
+ 	PyErr_SetString(PyExc_TypeError,
+ 			"need a single Unicode character as parameter");
+ 	return NULL;
+     }
+ 
+     if (!getname((Py_UCS4) *PyUnicode_AS_UNICODE(v), name, sizeof(name))) {
+ 	if (defobj == NULL) {
+ 	    PyErr_SetString(PyExc_ValueError, "no such name");
+             return NULL;
+ 	}
+ 	else {
+ 	    Py_INCREF(defobj);
+ 	    return defobj;
+ 	}
+     }
+ 
+     return Py_BuildValue("s", name);
+ }
+ 
+ static PyObject *
+ unicodedata_lookup(PyObject* self, PyObject* args)
+ {
+     Py_UCS4 code;
+     Py_UNICODE str[1];
+ 
+     char* name;
+     int namelen;
+     if (!PyArg_ParseTuple(args, "s#:lookup", &name, &namelen))
+         return NULL;
+ 
+     if (!getcode(name, namelen, &code)) {
+         PyErr_SetString(PyExc_KeyError, "undefined character name");
+         return NULL;
+     }
+ 
+     str[0] = (Py_UNICODE) code;
+     return PyUnicode_FromUnicode(str, 1);
+ }
+ 
  /* XXX Add doc strings. */
  
  static PyMethodDef unicodedata_functions[] = {
!     {"decimal", unicodedata_decimal, METH_VARARGS},
!     {"digit", unicodedata_digit, METH_VARARGS},
!     {"numeric", unicodedata_numeric, METH_VARARGS},
!     {"category", unicodedata_category, METH_VARARGS},
!     {"bidirectional", unicodedata_bidirectional, METH_VARARGS},
!     {"combining", unicodedata_combining, METH_VARARGS},
!     {"mirrored", unicodedata_mirrored, METH_VARARGS},
!     {"decomposition",unicodedata_decomposition, METH_VARARGS},
!     {"name", unicodedata_name, METH_VARARGS},
!     {"lookup", unicodedata_lookup, METH_VARARGS},
      {NULL, NULL}		/* sentinel */
  };
  
+ static char *unicodedata_docstring = "unicode character database";
+ 
  DL_EXPORT(void)
  initunicodedata(void)
  {
!     PyObject *m, *d, *v;
! 
!     m = Py_InitModule4(
!         "unicodedata", unicodedata_functions,
!         unicodedata_docstring, NULL, PYTHON_API_VERSION);
!     if (!m)
!         return;
! 
!     d = PyModule_GetDict(m);
!     if (!d)
!         return;
! 
!     /* Export C API */
!     v = PyCObject_FromVoidPtr((void *) &hashAPI, NULL);
!     PyDict_SetItemString(d, "ucnhash_CAPI", v);
!     Py_XDECREF(v);
! 
  }




RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4