On Sat, Nov 29, 2003 at 01:12:34AM -0500, Raymond Hettinger wrote: > [Guido] > > I would make one change: after looking at another use case, I'd like > > to change the outer iterator to produce (key, grouper) tuples. This > > way, you can write things like > > > > totals = {} > > for key, group in groupby(sequence): > > totals[key] = sum(group) Heh. I love that! > > Here is an implementation that translates readily into C. It uses > Guido's syntax and meets my requirement that bad things don't happen > when someone runs the outer iterator independently of the inner > iterator. > I updated my implementation according to your guideline. Please see attachments. Docstrings are still insufficient due to my english shortage. :) Thanks! Regards, Hye-Shik -------------- next part -------------- Index: Modules/itertoolsmodule.c =================================================================== RCS file: /cvsroot/python/python/dist/src/Modules/itertoolsmodule.c,v retrieving revision 1.26 diff -u -u -r1.26 itertoolsmodule.c --- Modules/itertoolsmodule.c 12 Nov 2003 14:32:26 -0000 1.26 +++ Modules/itertoolsmodule.c 29 Nov 2003 22:25:18 -0000 @@ -2081,6 +2081,332 @@ }; +/* groupby object ***********************************************************/ + +typedef struct { + PyObject_HEAD + PyObject *it; + PyObject *keyfunc; + PyObject *tgtkey; + PyObject *currkey; + PyObject *currvalue; +} groupbyobject; + +static PyTypeObject groupby_type; +static PyObject *_grouper_create(groupbyobject *, PyObject *); + +static PyObject * +groupby_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + groupbyobject *gbo; + PyObject *it, *keyfunc; + + if (!PyArg_UnpackTuple(args, "groupby", 2, 2, &keyfunc, &it)) + return NULL; + + if (keyfunc != Py_None && !PyCallable_Check(keyfunc)) { + PyErr_SetString(PyExc_ValueError, + "Key argument must be a callable object or None."); + return NULL; + } + + gbo = (groupbyobject *)type->tp_alloc(type, 0); + if (gbo == NULL) + return NULL; + gbo->tgtkey = NULL; + gbo->currkey = NULL; + gbo->currvalue = NULL; + gbo->keyfunc = keyfunc; + Py_INCREF(keyfunc); + gbo->it = PyObject_GetIter(it); + if (gbo->it == NULL) { + Py_DECREF(gbo); + return NULL; + } + return (PyObject *)gbo; +} + +static void +groupby_dealloc(groupbyobject *gbo) +{ + PyObject_GC_UnTrack(gbo); + Py_XDECREF(gbo->it); + Py_XDECREF(gbo->keyfunc); + Py_XDECREF(gbo->tgtkey); + Py_XDECREF(gbo->currkey); + Py_XDECREF(gbo->currvalue); + gbo->ob_type->tp_free(gbo); +} + +static int +groupby_traverse(groupbyobject *gbo, visitproc visit, void *arg) +{ + int err; + + if (gbo->it) { + err = visit(gbo->it, arg); + if (err) + return err; + } + + if (gbo->keyfunc) { + err = visit(gbo->keyfunc, arg); + if (err) + return err; + } + + if (gbo->tgtkey) { + err = visit(gbo->tgtkey, arg); + if (err) + return err; + } + + if (gbo->currkey) { + err = visit(gbo->currkey, arg); + if (err) + return err; + } + + if (gbo->currvalue) { + err = visit(gbo->currvalue, arg); + if (err) + return err; + } + + return 0; +} + +static PyObject * +groupby_next(groupbyobject *gbo) +{ + PyObject *newvalue, *newkey, *r, *grouper; + int rcmp; + + /* skip to next iteration group */ + for (;;) { + if (gbo->currkey == NULL) + rcmp = 0; + else if (gbo->tgtkey == NULL) + break; + else if (PyObject_Cmp(gbo->tgtkey, gbo->currkey, &rcmp) == -1) + return NULL; + + if (rcmp != 0) + break; + + newvalue = PyIter_Next(gbo->it); + if (newvalue == NULL) + return NULL; + + if (gbo->keyfunc == Py_None) { + newkey = newvalue; + Py_INCREF(newvalue); + } else { + newkey = PyObject_CallFunctionObjArgs(gbo->keyfunc, + newvalue, NULL); + if (newkey == NULL) { + Py_DECREF(newvalue); + return NULL; + } + } + + Py_XDECREF(gbo->currkey); + gbo->currkey = newkey; + Py_XDECREF(gbo->currvalue); + gbo->currvalue = newvalue; + } + + Py_XDECREF(gbo->tgtkey); + gbo->tgtkey = gbo->currkey; + Py_INCREF(gbo->currkey); + + grouper = _grouper_create(gbo, gbo->tgtkey); + if (grouper == NULL) + return NULL; + + r = PyTuple_New(2); + if (r == NULL) + return NULL; + PyTuple_SET_ITEM(r, 0, gbo->tgtkey); + Py_INCREF(gbo->tgtkey); + PyTuple_SET_ITEM(r, 1, grouper); + + return r; +} + +PyDoc_STRVAR(groupby_doc, +"groupby(keyfunc, iterable) -> create an iterator which returns\n\ +(key, sub-iterator) grouped by each value of key(value).\n"); + +static PyTypeObject groupby_type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "itertools.groupby", /* tp_name */ + sizeof(groupbyobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)groupby_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC | + Py_TPFLAGS_BASETYPE, /* tp_flags */ + groupby_doc, /* tp_doc */ + (traverseproc)groupby_traverse, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)groupby_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + groupby_new, /* tp_new */ + PyObject_GC_Del, /* tp_free */ +}; + + +/* _grouper object (internal) ************************************************/ + +typedef struct { + PyObject_HEAD + PyObject *parent; + PyObject *tgtkey; +} _grouperobject; + +static PyTypeObject _grouper_type; + +static PyObject * +_grouper_create(groupbyobject *parent, PyObject *tgtkey) +{ + _grouperobject *igo; + + igo = PyObject_New(_grouperobject, &_grouper_type); + if (igo == NULL) + return PyErr_NoMemory(); + igo->parent = (PyObject *)parent; + Py_INCREF(parent); + igo->tgtkey = tgtkey; + Py_INCREF(tgtkey); + + return (PyObject *)igo; +} + +static void +_grouper_dealloc(_grouperobject *igo) +{ + Py_DECREF(igo->parent); + Py_DECREF(igo->tgtkey); + PyObject_Del(igo); +} + +static PyObject * +_grouper_next(_grouperobject *igo) +{ + groupbyobject *gbo = (groupbyobject *)igo->parent; + PyObject *newvalue, *newkey, *r; + int rcmp; + + if (gbo->currvalue == NULL) { + newvalue = PyIter_Next(gbo->it); + if (newvalue == NULL) + return NULL; + + if (gbo->keyfunc == Py_None) { + newkey = newvalue; + Py_INCREF(newvalue); + } else { + newkey = PyObject_CallFunctionObjArgs(gbo->keyfunc, + newvalue, NULL); + if (newkey == NULL) { + Py_DECREF(newvalue); + return NULL; + } + } + + assert(gbo->currkey == NULL); + gbo->currkey = newkey; + gbo->currvalue = newvalue; + } + + assert(gbo->currkey != NULL); + if (PyObject_Cmp(igo->tgtkey, gbo->currkey, &rcmp) == -1) + return NULL; + + if (rcmp != 0) + return NULL; + + r = gbo->currvalue; + gbo->currvalue = NULL; + Py_DECREF(gbo->currkey); + gbo->currkey = NULL; + + return r; +} + +static PyTypeObject _grouper_type = { + PyObject_HEAD_INIT(NULL) + 0, /* ob_size */ + "itertools._grouper", /* tp_name */ + sizeof(_grouperobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)_grouper_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)_grouper_next, /* tp_iternext */ + 0, /* tp_methods */ + 0, /* tp_members */ + 0, /* tp_getset */ + 0, /* tp_base */ + 0, /* tp_dict */ + 0, /* tp_descr_get */ + 0, /* tp_descr_set */ + 0, /* tp_dictoffset */ + 0, /* tp_init */ + 0, /* tp_alloc */ + 0, /* tp_new */ + _PyObject_Del, /* tp_free */ +}; + + /* module level code ********************************************************/ PyDoc_STRVAR(module_doc, @@ -2103,6 +2429,7 @@ chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ... \n\ takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\ dropwhile(pred, seq) --> seq[n], seq[n+1], starting when pred fails\n\ +groupby(keyfunc, iterable) --> sub-iteraters grouped by value of keyfunc(v)\n\ "); @@ -2130,6 +2457,7 @@ &count_type, &izip_type, &repeat_type, + &groupby_type, NULL }; @@ -2148,5 +2476,6 @@ return; if (PyType_Ready(&tee_type) < 0) return; - + if (PyType_Ready(&_grouper_type) < 0) + return; } -------------- next part -------------- import unittest from itertools import groupby class TestBasicOps(unittest.TestCase): def test_groupby(self): # Check zero length input self.assertEqual([], list(groupby(lambda r:r[0], []))) # Check normal input s = [(0, 10, 20), (0, 11,21), (0,12,21), (1,13,21), (1,14,22), (2,15,22), (3,16,23), (3,17,23)] dup = [] for k, g in groupby(lambda r:r[0], s): for elem in g: self.assertEqual(k, elem[0]) dup.append(elem) self.assertEqual(s, dup) # Check nested case dup = [] for k, g in groupby(lambda r:r[0], s): for ik, ig in groupby(lambda r:r[2], g): for elem in ig: self.assertEqual(k, elem[0]) self.assertEqual(ik, elem[2]) dup.append(elem) self.assertEqual(s, dup) # Check case where inner iterator is not used keys = [k for k, g in groupby(lambda r:r[0], s)] expectedkeys = set([r[0] for r in s]) self.assertEqual(set(keys), expectedkeys) self.assertEqual(len(keys), len(expectedkeys)) # Check case where key is None word = 'abracadabra' keys = [k for k, g in groupby(None, list.sorted(word))] expectedkeys = set(word) self.assertEqual(set(keys), expectedkeys) self.assertEqual(len(keys), len(expectedkeys)) # Exercise pipes and filters style s = 'abracadabra' ilen = lambda it: len(list(it)) # sort s | uniq r = [k for k, g in groupby(None, list.sorted(s))] self.assertEqual(r, ['a', 'b', 'c', 'd', 'r']) # sort s | uniq -d r = [k for k, g in groupby(None, list.sorted(s)) if ilen(g)>1] self.assertEqual(r, ['a', 'b', 'r']) # sort s | uniq -c r = [(ilen(g), k) for k, g in groupby(None, list.sorted(s))] self.assertEqual(r, [(5, 'a'), (2, 'b'), (1, 'c'), (1, 'd'), (2, 'r')]) # sort s | uniq -c | sort -rn | head -3 r = list.sorted([(ilen(g), k) for k, g in groupby(None, list.sorted(s))], reverse=True)[:3] self.assertEqual(r, [(5, 'a'), (2, 'r'), (2, 'b')]) # Uniteratable argument self.assertRaises(TypeError, groupby, None, None) # iter.next failure class ExpectedError(Exception): pass def delayed_raise(n=0): for i in range(n): yield 'yo' raise ExpectedError def gulp(key, iterable, func=list): return [func(g) for k, g in groupby(key, iterable)] # iter.next failure on outer object self.assertRaises(ExpectedError, gulp, None, delayed_raise(0)) # iter.next failure on inner object self.assertRaises(ExpectedError, gulp, None, delayed_raise(1)) # __cmp__ failure class DummyCmp: def __cmp__(self, dst): raise ExpectedError s = [DummyCmp(), DummyCmp(), None] # __cmp__ failure on outer object self.assertRaises(ExpectedError, gulp, None, s, id) # __cmp__ failure on inner object self.assertRaises(ExpectedError, gulp, None, s) # keyfunc failure def keyfunc(obj): if keyfunc.skip > 0: keyfunc.skip -= 1 return obj else: raise ExpectedError # keyfunc failure on outer object keyfunc.skip = 0 self.assertRaises(ExpectedError, gulp, keyfunc, [None]) keyfunc.skip = 1 self.assertRaises(ExpectedError, gulp, keyfunc, [None, None]) suite = unittest.TestSuite() suite.addTest(unittest.makeSuite(TestBasicOps)) unittest.TextTestRunner(verbosity=2).run(suite)
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4