>>>>> "JCA" == James C Ahlstrom <jim@interet.com> writes: JCA> The following is based on trying (a great learning experience) JCA> to write a better Python lint. JCA> There are IMHO two problems with the current Python grammar JCA> file. It is not possible to express operator precedence, so JCA> deliberate shift/reduce conflicts are used instead. That makes JCA> the parse tree complicated and non intuitive. And there is no JCA> provision for error productions. YACC has both of these as JCA> built-in features. JCA> I also found speed problems with tokenize.py. AFAIK, it only JCA> exists because tokenizer.c does not provide comments as tokens, JCA> but eats them instead. We could modify tokenizer.c, then make JCA> tokenize.py be the interface to the fast C tokenizer. This JCA> eliminates the problem of updating both too. JCA> So how about re-writing the Python grammar in YACC in order to JCA> use its more advanced features?? The simple YACC grammar I JCA> wrote for 1.5.2 plus an altered tokenizer.c parsed the whole JCA> Lib/*.py in a couple seconds vs. 30 seconds for the first file JCA> using Aaron Watters' Python lint grammar written in Python. Why not use the actual Python parser instead of tokenize.py? I assume it is also faster than Aaron's Python lint grammer written in Python. The compiler in Tools/compiler uses the parser module internally and produces an AST that is straightforward to use. (The parse tree produced by the parser module is fairly low-level.) There was a thread (on the compiler-sig, I believe) where Moshe and I noodled with a simple lint-like warnings framework based on the compiler package. I don't have the code we ended up with, but I found an example checker in the mail archives and have included it below. It checks for NameErrors. I believe one useful change that Moshe and I arrived at was to avoid the explicit stack that the code uses (via enterNamespace and exitNamespace) and instead pass the namespace as an optional extra argument to the visitXXX methods. Jeremy """Check for NameErrors""" from compiler import parseFile, walk from compiler.misc import Stack, Set import __builtin__ from UserDict import UserDict class Warning: def __init__(self, filename, funcname, lineno): self.filename = filename self.funcname = funcname self.lineno = lineno def __str__(self): return self._template % self.__dict__ class UndefinedLocal(Warning): super_init = Warning.__init__ def __init__(self, filename, funcname, lineno, name): self.super_init(filename, funcname, lineno) self.name = name _template = "%(filename)s:%(lineno)s " \ "%(funcname)s undefined local %(name)s" class NameError(UndefinedLocal): _template = "%(filename)s:%(lineno)s " \ "%(funcname)s undefined name %(name)s" class NameSet(UserDict): """Track names and the line numbers where they are referenced""" def __init__(self): self.data = self.names = {} def add(self, name, lineno): l = self.names.get(name, []) l.append(lineno) self.names[name] = l class CheckNames: def __init__(self, filename): self.filename = filename self.warnings = [] self.scope = Stack() self.gUse = NameSet() self.gDef = NameSet() # _locals is the stack of local namespaces # locals is the top of the stack self._locals = Stack() self.lUse = None self.lDef = None self.lGlobals = None # var declared global # holds scope,def,use,global triples for later analysis self.todo = [] def enterNamespace(self, node): self.scope.push(node) self.lUse = use = NameSet() self.lDef = _def = NameSet() self.lGlobals = gbl = NameSet() self._locals.push((use, _def, gbl)) def exitNamespace(self): self.todo.append((self.scope.top(), self.lDef, self.lUse, self.lGlobals)) self.scope.pop() self._locals.pop() if self._locals: self.lUse, self.lDef, self.lGlobals = self._locals.top() else: self.lUse = self.lDef = self.lGlobals = None def warn(self, warning, funcname, lineno, *args): args = (self.filename, funcname, lineno) + args self.warnings.append(apply(warning, args)) def defName(self, name, lineno, local=1): if self.lUse is None: self.gDef.add(name, lineno) elif local == 0: self.gDef.add(name, lineno) self.lGlobals.add(name, lineno) else: self.lDef.add(name, lineno) def useName(self, name, lineno, local=1): if self.lUse is None: self.gUse.add(name, lineno) elif local == 0: self.gUse.add(name, lineno) self.lUse.add(name, lineno) else: self.lUse.add(name, lineno) def check(self): for s, d, u, g in self.todo: self._check(s, d, u, g, self.gDef) # XXX then check the globals def _check(self, scope, _def, use, gbl, globals): # check for NameError # a name is defined iff it is in def.keys() # a name is global iff it is in gdefs.keys() gdefs = UserDict() gdefs.update(globals) gdefs.update(__builtin__.__dict__) defs = UserDict() defs.update(gdefs) defs.update(_def) errors = Set() for name in use.keys(): if not defs.has_key(name): firstuse = use[name][0] self.warn(NameError, scope.name, firstuse, name) errors.add(name) # check for UndefinedLocalNameError # order == use & def sorted by lineno # elements are lineno, flag, name # flag = 0 if use, flag = 1 if def order = [] for name, lines in use.items(): if gdefs.has_key(name) and not _def.has_key(name): # this is a global ref, we can skip it continue for lineno in lines: order.append((lineno, 0, name)) for name, lines in _def.items(): for lineno in lines: order.append((lineno, 1, name)) order.sort() # ready contains names that have been defined or warned about ready = Set() for lineno, flag, name in order: if flag == 0: # use if not ready.has_elt(name) and not errors.has_elt(name): self.warn(UndefinedLocal, scope.name, lineno, name) ready.add(name) # don't warn again else: ready.add(name) # below are visitor methods def visitFunction(self, node, noname=0): for expr in node.defaults: self.visit(expr) if not noname: self.defName(node.name, node.lineno) self.enterNamespace(node) for name in node.argnames: self.defName(name, node.lineno) self.visit(node.code) self.exitNamespace() return 1 def visitLambda(self, node): return self.visitFunction(node, noname=1) def visitClass(self, node): for expr in node.bases: self.visit(expr) self.defName(node.name, node.lineno) self.enterNamespace(node) self.visit(node.code) self.exitNamespace() return 1 def visitName(self, node): self.useName(node.name, node.lineno) def visitGlobal(self, node): for name in node.names: self.defName(name, node.lineno, local=0) def visitImport(self, node): for name, alias in node.names: self.defName(alias or name, node.lineno) visitFrom = visitImport def visitAssName(self, node): self.defName(node.name, node.lineno) def check(filename): global p, checker p = parseFile(filename) checker = CheckNames(filename) walk(p, checker) checker.check() for w in checker.warnings: print w if __name__ == "__main__": import sys # XXX need to do real arg processing check(sys.argv[1])
RetroSearch is an open source project built by @garambo | Open a GitHub Issue
Search and Browse the WWW like it's 1997 | Search results from DuckDuckGo
HTML:
3.2
| Encoding:
UTF-8
| Version:
0.7.4