Update of /cvsroot/python/python/dist/src/Tools/unicode In directory slayer.i.sourceforge.net:/tmp/cvs-serv23556/Tools/unicode Modified Files: makeunicodedata.py Log Message: unicode database compression, step 2: - fixed attributions - moved decomposition data to a separate table, in preparation for step 3 (which won't happen before 2.0 final, promise!) - use relative paths in the generator script I have a lot more stuff in the works for 2.1, but let's leave that for another day... Index: makeunicodedata.py =================================================================== RCS file: /cvsroot/python/python/dist/src/Tools/unicode/makeunicodedata.py,v retrieving revision 1.2 retrieving revision 1.3 diff -C2 -r1.2 -r1.3 *** makeunicodedata.py 2000/09/25 07:13:41 1.2 --- makeunicodedata.py 2000/09/25 08:07:06 1.3 *************** *** 1,13 **** # ! # makeunidb.py -- generate a compact version of the unicode property ! # database (unicodedatabase.h) # import sys SCRIPT = sys.argv[0] ! VERSION = "1.0" ! UNICODE_DATA = "c:/pythonware/modules/unidb/etc/UnicodeData-Latest.txt" CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd", --- 1,18 ---- # ! # generate a compact version of the unicode property database # + # history: + # 2000-09-24 fl created (based on bits and pieces from unidb) + # 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table + # + # written by Fredrik Lundh (fredrik@pythonware.com), September 2000 + # import sys SCRIPT = sys.argv[0] ! VERSION = "1.1" ! UNICODE_DATA = "../UnicodeData-Latest.txt" CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd", *************** *** 25,35 **** # extract unicode properties ! dummy = (0, 0, 0, 0, "NULL") table = [dummy] cache = {0: dummy} index = [0] * len(unicode.chars) - - DECOMPOSITION = [""] for char in unicode.chars: record = unicode.table[char] --- 30,39 ---- # extract unicode properties ! dummy = (0, 0, 0, 0) table = [dummy] cache = {0: dummy} index = [0] * len(unicode.chars) + # 1) database properties for char in unicode.chars: record = unicode.table[char] *************** *** 40,49 **** bidirectional = BIDIRECTIONAL_NAMES.index(record[4]) mirrored = record[9] == "Y" - if record[5]: - decomposition = '"%s"' % record[5] - else: - decomposition = "NULL" item = ( ! category, combining, bidirectional, mirrored, decomposition ) # add entry to index and item tables --- 44,49 ---- bidirectional = BIDIRECTIONAL_NAMES.index(record[4]) mirrored = record[9] == "Y" item = ( ! category, combining, bidirectional, mirrored ) # add entry to index and item tables *************** *** 54,59 **** index[char] = i ! # FIXME: we really should compress the decomposition stuff ! # (see the unidb utilities for one way to do this) FILE = "unicodedata_db.h" --- 54,77 ---- index[char] = i ! # 2) decomposition data ! ! # FIXME: <fl> using the encoding stuff from unidb would save ! # another 50k or so, but I'll leave that for 2.1... ! ! decomp_data = [""] ! decomp_index = [0] * len(unicode.chars) ! ! for char in unicode.chars: ! record = unicode.table[char] ! if record: ! if record[5]: ! try: ! i = decomp_data.index(record[5]) ! except ValueError: ! i = len(decomp_data) ! decomp_data.append(record[5]) ! else: ! i = 0 ! decomp_index[char] = i FILE = "unicodedata_db.h" *************** *** 66,70 **** print "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {" for item in table: ! print " {%d, %d, %d, %d, %s}," % item print "};" print --- 84,88 ---- print "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {" for item in table: ! print " {%d, %d, %d, %d}," % item print "};" print *************** *** 83,86 **** --- 101,110 ---- print "};" + print "static const char *decomp_data[] = {" + for name in decomp_data: + print " \"%s\"," % name + print " NULL" + print "};" + # split index table index1, index2, shift = splitbins(index) *************** *** 90,93 **** --- 114,125 ---- Array("index1", index1).dump(sys.stdout) Array("index2", index2).dump(sys.stdout) + + # split index table + index1, index2, shift = splitbins(decomp_index) + + print "/* same, for the decomposition data */" + print "#define DECOMP_SHIFT", shift + Array("decomp_index1", index1).dump(sys.stdout) + Array("decomp_index2", index2).dump(sys.stdout) sys.stdout = sys.__stdout__
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4