I was profiling an embedded web server we have written in Python, and found that it spent a lot of time in FieldStorage.has_key() and FieldStorage.__getitem__() from cgi.py. So I looked at the code for the FieldStorage class, and found that it basically emulates a normal dictionary, but is implemented using a linear list of fields. Thus, has_key() and __getitem__() must do linear searches to find things. Is there any reason why this is done, instead of having a dictionary of lists? I did a quick stab at replacing the list with a dictionary, and in our application, we got a threefold increase in speed per request... Of course, in our application we have forms sometimes containing well over a thousand entries. It is probably not very common for people to have such large forms, but it is a pity that the cgi module does not scale well. Anyway, here is a diff of the changes I made. I haven't tested it very extensively (in our application we have subclassed the FieldStorage class, and use a modified __init__() method, which makes it difficult to fully test the real FieldStorage class), but from what I have tested, it seems to work. I started with revision 1.45 of cgi.py, which I checked out from the CVS archives this Monday. =================================================================== RCS file: /projects/cvsroot/python/dist/src/Lib/cgi.py,v retrieving revision 1.45 diff -u -5 -r1.45 cgi.py --- cgi.py 1999/06/11 18:26:09 1.45 +++ cgi.py 1999/11/17 12:30:23 @@ -724,11 +724,11 @@ """Like FieldStorage, for use when no file uploads are possible.""" # Dummy attributes filename = None - list = None + dict = None type = None file = None type_options = {} disposition = None disposition_options = {} @@ -890,11 +890,12 @@ pass if maxlen and clen > maxlen: raise ValueError, 'Maximum content length exceeded' self.length = clen - self.list = self.file = None + self.file = None + self.dict = None self.done = 0 self.lines = [] if ctype == 'application/x-www-form-urlencoded': self.read_urlencoded() elif ctype[:10] == 'multipart/': @@ -912,74 +913,69 @@ raise AttributeError, name if self.file: self.file.seek(0) value = self.file.read() self.file.seek(0) - elif self.list is not None: - value = self.list + elif self.dict is not None: + value = self.dict.values() else: value = None return value def __getitem__(self, key): """Dictionary style indexing.""" - if self.list is None: + if self.dict is None: raise TypeError, "not indexable" - found = [] - for item in self.list: - if item.name == key: found.append(item) - if not found: - raise KeyError, key + found = self.dict[key] if len(found) == 1: return found[0] else: return found def keys(self): """Dictionary style keys() method.""" - if self.list is None: + if self.dict is None: raise TypeError, "not indexable" - keys = [] - for item in self.list: - if item.name not in keys: keys.append(item.name) - return keys + return self.dict.keys() def has_key(self, key): """Dictionary style has_key() method.""" - if self.list is None: + if self.dict is None: raise TypeError, "not indexable" - for item in self.list: - if item.name == key: return 1 - return 0 + return self.dict.has_key(key) def __len__(self): """Dictionary style len(x) support.""" - return len(self.keys()) + return len(self.dict) def read_urlencoded(self): """Internal: read data in query string format.""" qs = self.fp.read(self.length) - self.list = list = [] + self.dict = dict = {} for key, value in parse_qsl(qs, self.keep_blank_values, self.strict_parsing): - list.append(MiniFieldStorage(key, value)) + l = dict.get(key, []) + l.append(MiniFieldStorage(key, value)) + dict[key] = l self.skip_lines() FieldStorageClass = None def read_multi(self, environ, keep_blank_values, strict_parsing): """Internal: read a part that is itself multipart.""" - self.list = [] + self.dict = {} klass = self.FieldStorageClass or self.__class__ part = klass(self.fp, {}, self.innerboundary, environ, keep_blank_values, strict_parsing) # Throw first part away while not part.done: headers = rfc822.Message(self.fp) part = klass(self.fp, headers, self.innerboundary, environ, keep_blank_values, strict_parsing) - self.list.append(part) + l = self.dict.get(part.name, []) + l.append(part) + self.dict[part.name] = l self.skip_lines() def read_single(self): """Internal: read an atomic part.""" if self.length >= 0: =================================================================== -- Thomas Bellman, Lysator Computer Club, Linköping University, Sweden "Beware of bugs in the above code; I have ! bellman @ lysator.liu.se only proved it correct, not tried it." ! Make Love -- Nicht Wahr!
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4