1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/compiler/symbols.py Fri May 18 20:51:41 2012 +0200
1.3 @@ -0,0 +1,461 @@
1.4 +"""Module symbol-table generator"""
1.5 +
1.6 +from compiler import ast
1.7 +from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN
1.8 +from compiler.misc import mangle
1.9 +import types
1.10 +
1.11 +
1.12 +import sys
1.13 +
1.14 +MANGLE_LEN = 256
1.15 +
1.16 +class Scope:
1.17 + # XXX how much information do I need about each name?
1.18 + def __init__(self, name, module, klass=None):
1.19 + self.name = name
1.20 + self.module = module
1.21 + self.defs = {}
1.22 + self.uses = {}
1.23 + self.globals = {}
1.24 + self.params = {}
1.25 + self.frees = {}
1.26 + self.cells = {}
1.27 + self.children = []
1.28 + # nested is true if the class could contain free variables,
1.29 + # i.e. if it is nested within another function.
1.30 + self.nested = None
1.31 + self.generator = None
1.32 + self.klass = None
1.33 + if klass is not None:
1.34 + for i in range(len(klass)):
1.35 + if klass[i] != '_':
1.36 + self.klass = klass[i:]
1.37 + break
1.38 +
1.39 + def __repr__(self):
1.40 + return "<%s: %s>" % (self.__class__.__name__, self.name)
1.41 +
1.42 + def mangle(self, name):
1.43 + if self.klass is None:
1.44 + return name
1.45 + return mangle(name, self.klass)
1.46 +
1.47 + def add_def(self, name):
1.48 + self.defs[self.mangle(name)] = 1
1.49 +
1.50 + def add_use(self, name):
1.51 + self.uses[self.mangle(name)] = 1
1.52 +
1.53 + def add_global(self, name):
1.54 + name = self.mangle(name)
1.55 + if name in self.uses or name in self.defs:
1.56 + pass # XXX warn about global following def/use
1.57 + if name in self.params:
1.58 + raise SyntaxError, "%s in %s is global and parameter" % \
1.59 + (name, self.name)
1.60 + self.globals[name] = 1
1.61 + self.module.add_def(name)
1.62 +
1.63 + def add_param(self, name):
1.64 + name = self.mangle(name)
1.65 + self.defs[name] = 1
1.66 + self.params[name] = 1
1.67 +
1.68 + def get_names(self):
1.69 + d = {}
1.70 + d.update(self.defs)
1.71 + d.update(self.uses)
1.72 + d.update(self.globals)
1.73 + return d.keys()
1.74 +
1.75 + def add_child(self, child):
1.76 + self.children.append(child)
1.77 +
1.78 + def get_children(self):
1.79 + return self.children
1.80 +
1.81 + def DEBUG(self):
1.82 + print >> sys.stderr, self.name, self.nested and "nested" or ""
1.83 + print >> sys.stderr, "\tglobals: ", self.globals
1.84 + print >> sys.stderr, "\tcells: ", self.cells
1.85 + print >> sys.stderr, "\tdefs: ", self.defs
1.86 + print >> sys.stderr, "\tuses: ", self.uses
1.87 + print >> sys.stderr, "\tfrees:", self.frees
1.88 +
1.89 + def check_name(self, name):
1.90 + """Return scope of name.
1.91 +
1.92 + The scope of a name could be LOCAL, GLOBAL, FREE, or CELL.
1.93 + """
1.94 + if name in self.globals:
1.95 + return SC_GLOBAL
1.96 + if name in self.cells:
1.97 + return SC_CELL
1.98 + if name in self.defs:
1.99 + return SC_LOCAL
1.100 + if self.nested and (name in self.frees or name in self.uses):
1.101 + return SC_FREE
1.102 + if self.nested:
1.103 + return SC_UNKNOWN
1.104 + else:
1.105 + return SC_GLOBAL
1.106 +
1.107 + def get_free_vars(self):
1.108 + if not self.nested:
1.109 + return ()
1.110 + free = {}
1.111 + free.update(self.frees)
1.112 + for name in self.uses.keys():
1.113 + if name not in self.defs and name not in self.globals:
1.114 + free[name] = 1
1.115 + return free.keys()
1.116 +
1.117 + def handle_children(self):
1.118 + for child in self.children:
1.119 + frees = child.get_free_vars()
1.120 + globals = self.add_frees(frees)
1.121 + for name in globals:
1.122 + child.force_global(name)
1.123 +
1.124 + def force_global(self, name):
1.125 + """Force name to be global in scope.
1.126 +
1.127 + Some child of the current node had a free reference to name.
1.128 + When the child was processed, it was labelled a free
1.129 + variable. Now that all its enclosing scope have been
1.130 + processed, the name is known to be a global or builtin. So
1.131 + walk back down the child chain and set the name to be global
1.132 + rather than free.
1.133 +
1.134 + Be careful to stop if a child does not think the name is
1.135 + free.
1.136 + """
1.137 + self.globals[name] = 1
1.138 + if name in self.frees:
1.139 + del self.frees[name]
1.140 + for child in self.children:
1.141 + if child.check_name(name) == SC_FREE:
1.142 + child.force_global(name)
1.143 +
1.144 + def add_frees(self, names):
1.145 + """Process list of free vars from nested scope.
1.146 +
1.147 + Returns a list of names that are either 1) declared global in the
1.148 + parent or 2) undefined in a top-level parent. In either case,
1.149 + the nested scope should treat them as globals.
1.150 + """
1.151 + child_globals = []
1.152 + for name in names:
1.153 + sc = self.check_name(name)
1.154 + if self.nested:
1.155 + if sc == SC_UNKNOWN or sc == SC_FREE \
1.156 + or isinstance(self, ClassScope):
1.157 + self.frees[name] = 1
1.158 + elif sc == SC_GLOBAL:
1.159 + child_globals.append(name)
1.160 + elif isinstance(self, FunctionScope) and sc == SC_LOCAL:
1.161 + self.cells[name] = 1
1.162 + elif sc != SC_CELL:
1.163 + child_globals.append(name)
1.164 + else:
1.165 + if sc == SC_LOCAL:
1.166 + self.cells[name] = 1
1.167 + elif sc != SC_CELL:
1.168 + child_globals.append(name)
1.169 + return child_globals
1.170 +
1.171 + def get_cell_vars(self):
1.172 + return self.cells.keys()
1.173 +
1.174 +class ModuleScope(Scope):
1.175 + __super_init = Scope.__init__
1.176 +
1.177 + def __init__(self):
1.178 + self.__super_init("global", self)
1.179 +
1.180 +class FunctionScope(Scope):
1.181 + pass
1.182 +
1.183 +class GenExprScope(Scope):
1.184 + __super_init = Scope.__init__
1.185 +
1.186 + __counter = 1
1.187 +
1.188 + def __init__(self, module, klass=None):
1.189 + i = self.__counter
1.190 + self.__counter += 1
1.191 + self.__super_init("generator expression<%d>"%i, module, klass)
1.192 + self.add_param('.0')
1.193 +
1.194 + def get_names(self):
1.195 + keys = Scope.get_names(self)
1.196 + return keys
1.197 +
1.198 +class LambdaScope(FunctionScope):
1.199 + __super_init = Scope.__init__
1.200 +
1.201 + __counter = 1
1.202 +
1.203 + def __init__(self, module, klass=None):
1.204 + i = self.__counter
1.205 + self.__counter += 1
1.206 + self.__super_init("lambda.%d" % i, module, klass)
1.207 +
1.208 +class ClassScope(Scope):
1.209 + __super_init = Scope.__init__
1.210 +
1.211 + def __init__(self, name, module):
1.212 + self.__super_init(name, module, name)
1.213 +
1.214 +class SymbolVisitor:
1.215 + def __init__(self):
1.216 + self.scopes = {}
1.217 + self.klass = None
1.218 +
1.219 + # node that define new scopes
1.220 +
1.221 + def visitModule(self, node):
1.222 + scope = self.module = self.scopes[node] = ModuleScope()
1.223 + self.visit(node.node, scope)
1.224 +
1.225 + visitExpression = visitModule
1.226 +
1.227 + def visitFunction(self, node, parent):
1.228 + if node.decorators:
1.229 + self.visit(node.decorators, parent)
1.230 + parent.add_def(node.name)
1.231 + for n in node.defaults:
1.232 + self.visit(n, parent)
1.233 + scope = FunctionScope(node.name, self.module, self.klass)
1.234 + if parent.nested or isinstance(parent, FunctionScope):
1.235 + scope.nested = 1
1.236 + self.scopes[node] = scope
1.237 + self._do_args(scope, node.argnames)
1.238 + self.visit(node.code, scope)
1.239 + self.handle_free_vars(scope, parent)
1.240 +
1.241 + def visitGenExpr(self, node, parent):
1.242 + scope = GenExprScope(self.module, self.klass);
1.243 + if parent.nested or isinstance(parent, FunctionScope) \
1.244 + or isinstance(parent, GenExprScope):
1.245 + scope.nested = 1
1.246 +
1.247 + self.scopes[node] = scope
1.248 + self.visit(node.code, scope)
1.249 +
1.250 + self.handle_free_vars(scope, parent)
1.251 +
1.252 + def visitGenExprInner(self, node, scope):
1.253 + for genfor in node.quals:
1.254 + self.visit(genfor, scope)
1.255 +
1.256 + self.visit(node.expr, scope)
1.257 +
1.258 + def visitGenExprFor(self, node, scope):
1.259 + self.visit(node.assign, scope, 1)
1.260 + self.visit(node.iter, scope)
1.261 + for if_ in node.ifs:
1.262 + self.visit(if_, scope)
1.263 +
1.264 + def visitGenExprIf(self, node, scope):
1.265 + self.visit(node.test, scope)
1.266 +
1.267 + def visitLambda(self, node, parent, assign=0):
1.268 + # Lambda is an expression, so it could appear in an expression
1.269 + # context where assign is passed. The transformer should catch
1.270 + # any code that has a lambda on the left-hand side.
1.271 + assert not assign
1.272 +
1.273 + for n in node.defaults:
1.274 + self.visit(n, parent)
1.275 + scope = LambdaScope(self.module, self.klass)
1.276 + if parent.nested or isinstance(parent, FunctionScope):
1.277 + scope.nested = 1
1.278 + self.scopes[node] = scope
1.279 + self._do_args(scope, node.argnames)
1.280 + self.visit(node.code, scope)
1.281 + self.handle_free_vars(scope, parent)
1.282 +
1.283 + def _do_args(self, scope, args):
1.284 + for name in args:
1.285 + if type(name) == types.TupleType:
1.286 + self._do_args(scope, name)
1.287 + else:
1.288 + scope.add_param(name)
1.289 +
1.290 + def handle_free_vars(self, scope, parent):
1.291 + parent.add_child(scope)
1.292 + scope.handle_children()
1.293 +
1.294 + def visitClass(self, node, parent):
1.295 + parent.add_def(node.name)
1.296 + for n in node.bases:
1.297 + self.visit(n, parent)
1.298 + scope = ClassScope(node.name, self.module)
1.299 + if parent.nested or isinstance(parent, FunctionScope):
1.300 + scope.nested = 1
1.301 + if node.doc is not None:
1.302 + scope.add_def('__doc__')
1.303 + scope.add_def('__module__')
1.304 + self.scopes[node] = scope
1.305 + prev = self.klass
1.306 + self.klass = node.name
1.307 + self.visit(node.code, scope)
1.308 + self.klass = prev
1.309 + self.handle_free_vars(scope, parent)
1.310 +
1.311 + # name can be a def or a use
1.312 +
1.313 + # XXX a few calls and nodes expect a third "assign" arg that is
1.314 + # true if the name is being used as an assignment. only
1.315 + # expressions contained within statements may have the assign arg.
1.316 +
1.317 + def visitName(self, node, scope, assign=0):
1.318 + if assign:
1.319 + scope.add_def(node.name)
1.320 + else:
1.321 + scope.add_use(node.name)
1.322 +
1.323 + # operations that bind new names
1.324 +
1.325 + def visitFor(self, node, scope):
1.326 + self.visit(node.assign, scope, 1)
1.327 + self.visit(node.list, scope)
1.328 + self.visit(node.body, scope)
1.329 + if node.else_:
1.330 + self.visit(node.else_, scope)
1.331 +
1.332 + def visitFrom(self, node, scope):
1.333 + for name, asname in node.names:
1.334 + if name == "*":
1.335 + continue
1.336 + scope.add_def(asname or name)
1.337 +
1.338 + def visitImport(self, node, scope):
1.339 + for name, asname in node.names:
1.340 + i = name.find(".")
1.341 + if i > -1:
1.342 + name = name[:i]
1.343 + scope.add_def(asname or name)
1.344 +
1.345 + def visitGlobal(self, node, scope):
1.346 + for name in node.names:
1.347 + scope.add_global(name)
1.348 +
1.349 + def visitAssign(self, node, scope):
1.350 + """Propagate assignment flag down to child nodes.
1.351 +
1.352 + The Assign node doesn't itself contains the variables being
1.353 + assigned to. Instead, the children in node.nodes are visited
1.354 + with the assign flag set to true. When the names occur in
1.355 + those nodes, they are marked as defs.
1.356 +
1.357 + Some names that occur in an assignment target are not bound by
1.358 + the assignment, e.g. a name occurring inside a slice. The
1.359 + visitor handles these nodes specially; they do not propagate
1.360 + the assign flag to their children.
1.361 + """
1.362 + for n in node.nodes:
1.363 + self.visit(n, scope, 1)
1.364 + self.visit(node.expr, scope)
1.365 +
1.366 + def visitAssName(self, node, scope, assign=1):
1.367 + scope.add_def(node.name)
1.368 +
1.369 + def visitAssAttr(self, node, scope, assign=0):
1.370 + self.visit(node.expr, scope, 0)
1.371 +
1.372 + def visitSubscript(self, node, scope, assign=0):
1.373 + self.visit(node.expr, scope, 0)
1.374 + for n in node.subs:
1.375 + self.visit(n, scope, 0)
1.376 +
1.377 + def visitSlice(self, node, scope, assign=0):
1.378 + self.visit(node.expr, scope, 0)
1.379 + if node.lower:
1.380 + self.visit(node.lower, scope, 0)
1.381 + if node.upper:
1.382 + self.visit(node.upper, scope, 0)
1.383 +
1.384 + def visitAugAssign(self, node, scope):
1.385 + # If the LHS is a name, then this counts as assignment.
1.386 + # Otherwise, it's just use.
1.387 + self.visit(node.node, scope)
1.388 + if isinstance(node.node, ast.Name):
1.389 + self.visit(node.node, scope, 1) # XXX worry about this
1.390 + self.visit(node.expr, scope)
1.391 +
1.392 + # prune if statements if tests are false
1.393 +
1.394 + _const_types = types.StringType, types.IntType, types.FloatType
1.395 +
1.396 + def visitIf(self, node, scope):
1.397 + for test, body in node.tests:
1.398 + if isinstance(test, ast.Const):
1.399 + if type(test.value) in self._const_types:
1.400 + if not test.value:
1.401 + continue
1.402 + self.visit(test, scope)
1.403 + self.visit(body, scope)
1.404 + if node.else_:
1.405 + self.visit(node.else_, scope)
1.406 +
1.407 + # a yield statement signals a generator
1.408 +
1.409 + def visitYield(self, node, scope):
1.410 + scope.generator = 1
1.411 + self.visit(node.value, scope)
1.412 +
1.413 +def list_eq(l1, l2):
1.414 + return sorted(l1) == sorted(l2)
1.415 +
1.416 +if __name__ == "__main__":
1.417 + import sys
1.418 + from compiler import parseFile, walk
1.419 + import symtable
1.420 +
1.421 + def get_names(syms):
1.422 + return [s for s in [s.get_name() for s in syms.get_symbols()]
1.423 + if not (s.startswith('_[') or s.startswith('.'))]
1.424 +
1.425 + for file in sys.argv[1:]:
1.426 + print file
1.427 + f = open(file)
1.428 + buf = f.read()
1.429 + f.close()
1.430 + syms = symtable.symtable(buf, file, "exec")
1.431 + mod_names = get_names(syms)
1.432 + tree = parseFile(file)
1.433 + s = SymbolVisitor()
1.434 + walk(tree, s)
1.435 +
1.436 + # compare module-level symbols
1.437 + names2 = s.scopes[tree].get_names()
1.438 +
1.439 + if not list_eq(mod_names, names2):
1.440 + print
1.441 + print "oops", file
1.442 + print sorted(mod_names)
1.443 + print sorted(names2)
1.444 + sys.exit(-1)
1.445 +
1.446 + d = {}
1.447 + d.update(s.scopes)
1.448 + del d[tree]
1.449 + scopes = d.values()
1.450 + del d
1.451 +
1.452 + for s in syms.get_symbols():
1.453 + if s.is_namespace():
1.454 + l = [sc for sc in scopes
1.455 + if sc.name == s.get_name()]
1.456 + if len(l) > 1:
1.457 + print "skipping", s.get_name()
1.458 + else:
1.459 + if not list_eq(get_names(s.get_namespace()),
1.460 + l[0].get_names()):
1.461 + print s.get_name()
1.462 + print sorted(get_names(s.get_namespace()))
1.463 + print sorted(l[0].get_names())
1.464 + sys.exit(-1)