A RetroSearch Logo

Home - News ( United States | United Kingdom | Italy | Germany ) - Football scores

Search Query:

Showing content from https://mail.python.org/pipermail/python-dev/2000-December/011087.html below:

[Python-Dev] The Dictionary Gem is polished!

[Python-Dev] The Dictionary Gem is polished!Christian Tismer tismer@tismer.com
Sun, 17 Dec 2000 20:10:07 +0200
This is a multi-part message in MIME format.
--------------D1825E07B23FE5AC1D48DB49
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit



Christian Tismer wrote:

...
(my timings)
Attached is the updated script with the timings mentioned
in the last posting. Sorry, I posted an older version before.

-- 
Christian Tismer             :^)   <mailto:tismer@tismer.com>
Mission Impossible 5oftware  :     Have a break! Take a ride on Python's
Kaunstr. 26                  :    *Starship* http://starship.python.net
14163 Berlin                 :     PGP key -> http://wwwkeys.pgp.net
PGP Fingerprint       E182 71C7 1A9D 66E9 9D15  D3CC D4D7 93E2 1FAE F6DF
     where do you want to jump today?   http://www.stackless.com
--------------D1825E07B23FE5AC1D48DB49
Content-Type: text/plain; charset=us-ascii;
 name="dictest.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="dictest.py"

## dictest.py
## Test of a new rehash algorithm
## Chris Tismer
## 2000-12-17
## Mission Impossible 5oftware Team

# The following is a partial re-implementation of
# Python dictionaries in Python.
# The original algorithm was literally turned
# into Python code.

##/*
##Table of irreducible polynomials to efficiently cycle through
##GF(2^n)-{0}, 2<=n<=30.
##*/
polys = [
    4 + 3,
    8 + 3,
    16 + 3,
    32 + 5,
    64 + 3,
    128 + 3,
    256 + 29,
    512 + 17,
    1024 + 9,
    2048 + 5,
    4096 + 83,
    8192 + 27,
    16384 + 43,
    32768 + 3,
    65536 + 45,
    131072 + 9,
    262144 + 39,
    524288 + 39,
    1048576 + 9,
    2097152 + 5,
    4194304 + 3,
    8388608 + 33,
    16777216 + 27,
    33554432 + 9,
    67108864 + 71,
    134217728 + 39,
    268435456 + 9,
    536870912 + 5,
    1073741824 + 83,
    0
]


class NULL: pass

class Dictionary:
    dummy = "<dummy key>"
    def __init__(mp, newalg=0):
        mp.ma_size = 0
        mp.ma_poly = 0
        mp.ma_table = []
        mp.ma_fill = 0
        mp.ma_used = 0
        mp.oldalg = not newalg

    def lookdict(mp, key, _hash):
        me_hash, me_key, me_value = range(3) # rec slots
        dummy = mp.dummy
        
        mask = mp.ma_size-1
        ep0 = mp.ma_table
        i = (~_hash) & mask
        ep = ep0[i]
        if ep[me_key] is NULL or ep[me_key] == key:
            return ep
        if ep[me_key] == dummy:
            freeslot = ep
        else:
            if (ep[me_hash] == _hash and
                cmp(ep[me_key], key) == 0) :
                return ep
            freeslot = NULL

###### FROM HERE
        if mp.oldalg:
            incr = (_hash ^ (_hash >> 3)) & mask
        else:
            # note that we do not mask!
            # even the shifting my not be worth it.
            incr = _hash ^ (_hash >> 3)
###### TO HERE
            
        if (not incr):
            incr = mask
        while 1:
            ep = ep0[(i+incr)&mask]
            if (ep[me_key] is NULL) :
                if (freeslot != NULL) :
                    return freeslot
                else:
                    return ep
            if (ep[me_key] == dummy) :
                if (freeslot == NULL):
                    freeslot = ep
            elif (ep[me_key] == key or
                 (ep[me_hash] == _hash and
                  cmp(ep[me_key], key) == 0)) :
                return ep

            # Cycle through GF(2^n)-{0}
###### FROM HERE
            if mp.oldalg:
                incr = incr << 1
                if (incr > mask):
                    incr = incr ^ mp.ma_poly
            else:
                # new algorithm: do a division
                if (incr & 1):
                    incr = incr ^ mp.ma_poly
                incr = incr >> 1
###### TO HERE

    def insertdict(mp, key, _hash, value):
        me_hash, me_key, me_value = range(3) # rec slots
        
        ep = mp.lookdict(key, _hash)
        if (ep[me_value] is not NULL) :
            old_value = ep[me_value]
            ep[me_value] = value
        else :
            if (ep[me_key] is NULL):
                mp.ma_fill=mp.ma_fill+1
            ep[me_key] = key
            ep[me_hash] = _hash
            ep[me_value] = value
            mp.ma_used = mp.ma_used+1

    def dictresize(mp, minused):
        me_hash, me_key, me_value = range(3) # rec slots

        oldsize = mp.ma_size
        oldtable = mp.ma_table
        MINSIZE = 4
        newsize = MINSIZE
        for i in range(len(polys)):
            if (newsize > minused) :
                newpoly = polys[i]
                break
            newsize = newsize << 1
        else:
            return -1

        _nullentry = range(3)
        _nullentry[me_hash] = 0
        _nullentry[me_key] = NULL
        _nullentry[me_value] = NULL

        newtable = map(lambda x,y=_nullentry:y[:], range(newsize))      

        mp.ma_size = newsize
        mp.ma_poly = newpoly
        mp.ma_table = newtable
        mp.ma_fill = 0
        mp.ma_used = 0

        for ep in oldtable:
            if (ep[me_value] is not NULL):
                mp.insertdict(ep[me_key],ep[me_hash],ep[me_value])
        return 0

    # PyDict_GetItem
    def __getitem__(op, key):
        me_hash, me_key, me_value = range(3) # rec slots

        if not op.ma_table:
            raise KeyError, key
        _hash = hash(key)
        return op.lookdict(key, _hash)[me_value]

    # PyDict_SetItem
    def __setitem__(op, key, value):
        mp = op
        _hash = hash(key)
##      /* if fill >= 2/3 size, double in size */
        if (mp.ma_fill*3 >= mp.ma_size*2) :
            if (mp.dictresize(mp.ma_used*2) != 0):
                if (mp.ma_fill+1 > mp.ma_size):
                    raise MemoryError
        mp.insertdict(key, _hash, value)

    # more interface functions
    def keys(self):
        me_hash, me_key, me_value = range(3) # rec slots
        res = []
        for _hash, _key, _value in self.ma_table:
            if _value is not NULL:
                res.append( _key)
        return res

    def values(self):
        me_hash, me_key, me_value = range(3) # rec slots
        res = []
        for _hash, _key, _value in self.ma_table:
            if _value is not NULL:
                res.append( _value)
        return res

    def items(self):
        me_hash, me_key, me_value = range(3) # rec slots
        res = []
        for _hash, _key, _value in self.ma_table:
            if _value is not NULL:
                res.append( (_key, _value) )
        return res

    def __cmp__(self, other):
        mine = self.items()
        others = other.items()
        mine.sort()
        others.sort()
        return cmp(mine, others)

######################################################
## tests

def timing(func, args=None, n=1, **keywords) :
	import time
	time=time.time
	appl=apply
	if args is None: args = ()
	if type(args) != type(()) : args=(args,)
	rep=range(n)
	dummyarg = ("",)
	dummykw = {}
	dummyfunc = len
	if keywords:
		before=time()
		for i in rep: res=appl(dummyfunc, dummyarg, dummykw)
		empty = time()-before
		before=time()
		for i in rep: res=appl(func, args, keywords)
	else:
		before=time()
		for i in rep: res=appl(dummyfunc, dummyarg)
		empty = time()-before
		before=time()
		for i in rep: res=appl(func, args)
	after = time()
	return round(after-before-empty,4), res

def test(lis, dic):
    for key in lis: dic[key]

def nulltest(lis, dic):
    for key in lis: dic

def string_dicts():
	d1 = Dictionary()   # original
	d2 = Dictionary(1)  # other rehash
	for i in range(1000):
		s = str(i) * 5
		d1[s] = d2[s] = i
	return d1, d2

def badnum_dicts():
	d1 = Dictionary()   # original
	d2 = Dictionary(1)  # other rehash
	shift = 10
	if EXTREME:
		shift = 16
	for i in range(1000):
		bad = i << 16
		d1[bad] = d2[bad] = i
	return d1, d2

def do_test(dict, keys, n):
	t0 = timing(nulltest, (keys, dict), n)[0]
	t1 = timing(test, (keys, dict), n)[0]
	return t1-t0

EXTREME=1

if __name__ == "__main__":
	sdold, sdnew = string_dicts()
	bdold, bdnew = badnum_dicts()
	print "timing for strings old=%.3f new=%.3f" % (
		  do_test(sdold, sdold.keys(), 100),
		  do_test(sdnew, sdnew.keys(), 100) )
	print "timing for bad integers old=%.3f new=%.3f" % (
		  do_test(bdold, bdold.keys(), 10) *10,
		  do_test(bdnew, bdnew.keys(), 10) *10)
  
"""
Results with a shift of 10 (EXTREME=0):
D:\crml_doc\platf\py>python dictest.py
timing for strings old=5.097 new=5.088
timing for bad integers old=101.540 new=12.610

Results with a shift of 16 (EXTREME=1):
D:\crml_doc\platf\py>python dictest.py
timing for strings old=5.218 new=5.147
timing for bad integers old=571.210 new=19.220
"""
--------------D1825E07B23FE5AC1D48DB49--




RetroSearch is an open source project built by @garambo | Open a GitHub Issue

Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo

HTML: 3.2 | Encoding: UTF-8 | Version: 0.7.4