1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/compiler/symbols.py Tue May 01 22:04:53 2012 +0200
1.3 @@ -0,0 +1,463 @@
1.4 +"""Module symbol-table generator"""
1.5 +
1.6 +from compiler import ast
1.7 +from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN
1.8 +from compiler.misc import mangle
1.9 +import types
1.10 +
1.11 +
1.12 +import sys
1.13 +
1.14 +MANGLE_LEN = 256
1.15 +
1.16 +class Scope:
1.17 + # XXX how much information do I need about each name?
1.18 + def __init__(self, name, module, klass=None):
1.19 + self.name = name
1.20 + self.module = module
1.21 + self.defs = {}
1.22 + self.uses = {}
1.23 + self.globals = {}
1.24 + self.params = {}
1.25 + self.frees = {}
1.26 + self.cells = {}
1.27 + self.children = []
1.28 + # nested is true if the class could contain free variables,
1.29 + # i.e. if it is nested within another function.
1.30 + self.nested = None
1.31 + self.generator = None
1.32 + self.klass = None
1.33 + if klass is not None:
1.34 + for i in range(len(klass)):
1.35 + if klass[i] != '_':
1.36 + self.klass = klass[i:]
1.37 + break
1.38 +
1.39 + def __repr__(self):
1.40 + return "<%s: %s>" % (self.__class__.__name__, self.name)
1.41 +
1.42 + def mangle(self, name):
1.43 + if self.klass is None:
1.44 + return name
1.45 + return mangle(name, self.klass)
1.46 +
1.47 + def add_def(self, name):
1.48 + self.defs[self.mangle(name)] = 1
1.49 +
1.50 + def add_use(self, name):
1.51 + self.uses[self.mangle(name)] = 1
1.52 +
1.53 + def add_global(self, name):
1.54 + name = self.mangle(name)
1.55 + if self.uses.has_key(name) or self.defs.has_key(name):
1.56 + pass # XXX warn about global following def/use
1.57 + if self.params.has_key(name):
1.58 + raise SyntaxError, "%s in %s is global and parameter" % \
1.59 + (name, self.name)
1.60 + self.globals[name] = 1
1.61 + self.module.add_def(name)
1.62 +
1.63 + def add_param(self, name):
1.64 + name = self.mangle(name)
1.65 + self.defs[name] = 1
1.66 + self.params[name] = 1
1.67 +
1.68 + def get_names(self):
1.69 + d = {}
1.70 + d.update(self.defs)
1.71 + d.update(self.uses)
1.72 + d.update(self.globals)
1.73 + return d.keys()
1.74 +
1.75 + def add_child(self, child):
1.76 + self.children.append(child)
1.77 +
1.78 + def get_children(self):
1.79 + return self.children
1.80 +
1.81 + def DEBUG(self):
1.82 + print >> sys.stderr, self.name, self.nested and "nested" or ""
1.83 + print >> sys.stderr, "\tglobals: ", self.globals
1.84 + print >> sys.stderr, "\tcells: ", self.cells
1.85 + print >> sys.stderr, "\tdefs: ", self.defs
1.86 + print >> sys.stderr, "\tuses: ", self.uses
1.87 + print >> sys.stderr, "\tfrees:", self.frees
1.88 +
1.89 + def check_name(self, name):
1.90 + """Return scope of name.
1.91 +
1.92 + The scope of a name could be LOCAL, GLOBAL, FREE, or CELL.
1.93 + """
1.94 + if self.globals.has_key(name):
1.95 + return SC_GLOBAL
1.96 + if self.cells.has_key(name):
1.97 + return SC_CELL
1.98 + if self.defs.has_key(name):
1.99 + return SC_LOCAL
1.100 + if self.nested and (self.frees.has_key(name) or
1.101 + self.uses.has_key(name)):
1.102 + return SC_FREE
1.103 + if self.nested:
1.104 + return SC_UNKNOWN
1.105 + else:
1.106 + return SC_GLOBAL
1.107 +
1.108 + def get_free_vars(self):
1.109 + if not self.nested:
1.110 + return ()
1.111 + free = {}
1.112 + free.update(self.frees)
1.113 + for name in self.uses.keys():
1.114 + if not (self.defs.has_key(name) or
1.115 + self.globals.has_key(name)):
1.116 + free[name] = 1
1.117 + return free.keys()
1.118 +
1.119 + def handle_children(self):
1.120 + for child in self.children:
1.121 + frees = child.get_free_vars()
1.122 + globals = self.add_frees(frees)
1.123 + for name in globals:
1.124 + child.force_global(name)
1.125 +
1.126 + def force_global(self, name):
1.127 + """Force name to be global in scope.
1.128 +
1.129 + Some child of the current node had a free reference to name.
1.130 + When the child was processed, it was labelled a free
1.131 + variable. Now that all its enclosing scope have been
1.132 + processed, the name is known to be a global or builtin. So
1.133 + walk back down the child chain and set the name to be global
1.134 + rather than free.
1.135 +
1.136 + Be careful to stop if a child does not think the name is
1.137 + free.
1.138 + """
1.139 + self.globals[name] = 1
1.140 + if self.frees.has_key(name):
1.141 + del self.frees[name]
1.142 + for child in self.children:
1.143 + if child.check_name(name) == SC_FREE:
1.144 + child.force_global(name)
1.145 +
1.146 + def add_frees(self, names):
1.147 + """Process list of free vars from nested scope.
1.148 +
1.149 + Returns a list of names that are either 1) declared global in the
1.150 + parent or 2) undefined in a top-level parent. In either case,
1.151 + the nested scope should treat them as globals.
1.152 + """
1.153 + child_globals = []
1.154 + for name in names:
1.155 + sc = self.check_name(name)
1.156 + if self.nested:
1.157 + if sc == SC_UNKNOWN or sc == SC_FREE \
1.158 + or isinstance(self, ClassScope):
1.159 + self.frees[name] = 1
1.160 + elif sc == SC_GLOBAL:
1.161 + child_globals.append(name)
1.162 + elif isinstance(self, FunctionScope) and sc == SC_LOCAL:
1.163 + self.cells[name] = 1
1.164 + elif sc != SC_CELL:
1.165 + child_globals.append(name)
1.166 + else:
1.167 + if sc == SC_LOCAL:
1.168 + self.cells[name] = 1
1.169 + elif sc != SC_CELL:
1.170 + child_globals.append(name)
1.171 + return child_globals
1.172 +
1.173 + def get_cell_vars(self):
1.174 + return self.cells.keys()
1.175 +
1.176 +class ModuleScope(Scope):
1.177 + __super_init = Scope.__init__
1.178 +
1.179 + def __init__(self):
1.180 + self.__super_init("global", self)
1.181 +
1.182 +class FunctionScope(Scope):
1.183 + pass
1.184 +
1.185 +class GenExprScope(Scope):
1.186 + __super_init = Scope.__init__
1.187 +
1.188 + __counter = 1
1.189 +
1.190 + def __init__(self, module, klass=None):
1.191 + i = self.__counter
1.192 + self.__counter += 1
1.193 + self.__super_init("generator expression<%d>"%i, module, klass)
1.194 + self.add_param('.0')
1.195 +
1.196 + def get_names(self):
1.197 + keys = Scope.get_names(self)
1.198 + return keys
1.199 +
1.200 +class LambdaScope(FunctionScope):
1.201 + __super_init = Scope.__init__
1.202 +
1.203 + __counter = 1
1.204 +
1.205 + def __init__(self, module, klass=None):
1.206 + i = self.__counter
1.207 + self.__counter += 1
1.208 + self.__super_init("lambda.%d" % i, module, klass)
1.209 +
1.210 +class ClassScope(Scope):
1.211 + __super_init = Scope.__init__
1.212 +
1.213 + def __init__(self, name, module):
1.214 + self.__super_init(name, module, name)
1.215 +
1.216 +class SymbolVisitor:
1.217 + def __init__(self):
1.218 + self.scopes = {}
1.219 + self.klass = None
1.220 +
1.221 + # node that define new scopes
1.222 +
1.223 + def visitModule(self, node):
1.224 + scope = self.module = self.scopes[node] = ModuleScope()
1.225 + self.visit(node.node, scope)
1.226 +
1.227 + visitExpression = visitModule
1.228 +
1.229 + def visitFunction(self, node, parent):
1.230 + if node.decorators:
1.231 + self.visit(node.decorators, parent)
1.232 + parent.add_def(node.name)
1.233 + for n in node.defaults:
1.234 + self.visit(n, parent)
1.235 + scope = FunctionScope(node.name, self.module, self.klass)
1.236 + if parent.nested or isinstance(parent, FunctionScope):
1.237 + scope.nested = 1
1.238 + self.scopes[node] = scope
1.239 + self._do_args(scope, node.argnames)
1.240 + self.visit(node.code, scope)
1.241 + self.handle_free_vars(scope, parent)
1.242 +
1.243 + def visitGenExpr(self, node, parent):
1.244 + scope = GenExprScope(self.module, self.klass);
1.245 + if parent.nested or isinstance(parent, FunctionScope) \
1.246 + or isinstance(parent, GenExprScope):
1.247 + scope.nested = 1
1.248 +
1.249 + self.scopes[node] = scope
1.250 + self.visit(node.code, scope)
1.251 +
1.252 + self.handle_free_vars(scope, parent)
1.253 +
1.254 + def visitGenExprInner(self, node, scope):
1.255 + for genfor in node.quals:
1.256 + self.visit(genfor, scope)
1.257 +
1.258 + self.visit(node.expr, scope)
1.259 +
1.260 + def visitGenExprFor(self, node, scope):
1.261 + self.visit(node.assign, scope, 1)
1.262 + self.visit(node.iter, scope)
1.263 + for if_ in node.ifs:
1.264 + self.visit(if_, scope)
1.265 +
1.266 + def visitGenExprIf(self, node, scope):
1.267 + self.visit(node.test, scope)
1.268 +
1.269 + def visitLambda(self, node, parent, assign=0):
1.270 + # Lambda is an expression, so it could appear in an expression
1.271 + # context where assign is passed. The transformer should catch
1.272 + # any code that has a lambda on the left-hand side.
1.273 + assert not assign
1.274 +
1.275 + for n in node.defaults:
1.276 + self.visit(n, parent)
1.277 + scope = LambdaScope(self.module, self.klass)
1.278 + if parent.nested or isinstance(parent, FunctionScope):
1.279 + scope.nested = 1
1.280 + self.scopes[node] = scope
1.281 + self._do_args(scope, node.argnames)
1.282 + self.visit(node.code, scope)
1.283 + self.handle_free_vars(scope, parent)
1.284 +
1.285 + def _do_args(self, scope, args):
1.286 + for name in args:
1.287 + if type(name) == types.TupleType:
1.288 + self._do_args(scope, name)
1.289 + else:
1.290 + scope.add_param(name)
1.291 +
1.292 + def handle_free_vars(self, scope, parent):
1.293 + parent.add_child(scope)
1.294 + scope.handle_children()
1.295 +
1.296 + def visitClass(self, node, parent):
1.297 + parent.add_def(node.name)
1.298 + for n in node.bases:
1.299 + self.visit(n, parent)
1.300 + scope = ClassScope(node.name, self.module)
1.301 + if parent.nested or isinstance(parent, FunctionScope):
1.302 + scope.nested = 1
1.303 + if node.doc is not None:
1.304 + scope.add_def('__doc__')
1.305 + scope.add_def('__module__')
1.306 + self.scopes[node] = scope
1.307 + prev = self.klass
1.308 + self.klass = node.name
1.309 + self.visit(node.code, scope)
1.310 + self.klass = prev
1.311 + self.handle_free_vars(scope, parent)
1.312 +
1.313 + # name can be a def or a use
1.314 +
1.315 + # XXX a few calls and nodes expect a third "assign" arg that is
1.316 + # true if the name is being used as an assignment. only
1.317 + # expressions contained within statements may have the assign arg.
1.318 +
1.319 + def visitName(self, node, scope, assign=0):
1.320 + if assign:
1.321 + scope.add_def(node.name)
1.322 + else:
1.323 + scope.add_use(node.name)
1.324 +
1.325 + # operations that bind new names
1.326 +
1.327 + def visitFor(self, node, scope):
1.328 + self.visit(node.assign, scope, 1)
1.329 + self.visit(node.list, scope)
1.330 + self.visit(node.body, scope)
1.331 + if node.else_:
1.332 + self.visit(node.else_, scope)
1.333 +
1.334 + def visitFrom(self, node, scope):
1.335 + for name, asname in node.names:
1.336 + if name == "*":
1.337 + continue
1.338 + scope.add_def(asname or name)
1.339 +
1.340 + def visitImport(self, node, scope):
1.341 + for name, asname in node.names:
1.342 + i = name.find(".")
1.343 + if i > -1:
1.344 + name = name[:i]
1.345 + scope.add_def(asname or name)
1.346 +
1.347 + def visitGlobal(self, node, scope):
1.348 + for name in node.names:
1.349 + scope.add_global(name)
1.350 +
1.351 + def visitAssign(self, node, scope):
1.352 + """Propagate assignment flag down to child nodes.
1.353 +
1.354 + The Assign node doesn't itself contains the variables being
1.355 + assigned to. Instead, the children in node.nodes are visited
1.356 + with the assign flag set to true. When the names occur in
1.357 + those nodes, they are marked as defs.
1.358 +
1.359 + Some names that occur in an assignment target are not bound by
1.360 + the assignment, e.g. a name occurring inside a slice. The
1.361 + visitor handles these nodes specially; they do not propagate
1.362 + the assign flag to their children.
1.363 + """
1.364 + for n in node.nodes:
1.365 + self.visit(n, scope, 1)
1.366 + self.visit(node.expr, scope)
1.367 +
1.368 + def visitAssName(self, node, scope, assign=1):
1.369 + scope.add_def(node.name)
1.370 +
1.371 + def visitAssAttr(self, node, scope, assign=0):
1.372 + self.visit(node.expr, scope, 0)
1.373 +
1.374 + def visitSubscript(self, node, scope, assign=0):
1.375 + self.visit(node.expr, scope, 0)
1.376 + for n in node.subs:
1.377 + self.visit(n, scope, 0)
1.378 +
1.379 + def visitSlice(self, node, scope, assign=0):
1.380 + self.visit(node.expr, scope, 0)
1.381 + if node.lower:
1.382 + self.visit(node.lower, scope, 0)
1.383 + if node.upper:
1.384 + self.visit(node.upper, scope, 0)
1.385 +
1.386 + def visitAugAssign(self, node, scope):
1.387 + # If the LHS is a name, then this counts as assignment.
1.388 + # Otherwise, it's just use.
1.389 + self.visit(node.node, scope)
1.390 + if isinstance(node.node, ast.Name):
1.391 + self.visit(node.node, scope, 1) # XXX worry about this
1.392 + self.visit(node.expr, scope)
1.393 +
1.394 + # prune if statements if tests are false
1.395 +
1.396 + _const_types = types.StringType, types.IntType, types.FloatType
1.397 +
1.398 + def visitIf(self, node, scope):
1.399 + for test, body in node.tests:
1.400 + if isinstance(test, ast.Const):
1.401 + if type(test.value) in self._const_types:
1.402 + if not test.value:
1.403 + continue
1.404 + self.visit(test, scope)
1.405 + self.visit(body, scope)
1.406 + if node.else_:
1.407 + self.visit(node.else_, scope)
1.408 +
1.409 + # a yield statement signals a generator
1.410 +
1.411 + def visitYield(self, node, scope):
1.412 + scope.generator = 1
1.413 + self.visit(node.value, scope)
1.414 +
1.415 +def list_eq(l1, l2):
1.416 + return sorted(l1) == sorted(l2)
1.417 +
1.418 +if __name__ == "__main__":
1.419 + import sys
1.420 + from compiler import parseFile, walk
1.421 + import symtable
1.422 +
1.423 + def get_names(syms):
1.424 + return [s for s in [s.get_name() for s in syms.get_symbols()]
1.425 + if not (s.startswith('_[') or s.startswith('.'))]
1.426 +
1.427 + for file in sys.argv[1:]:
1.428 + print file
1.429 + f = open(file)
1.430 + buf = f.read()
1.431 + f.close()
1.432 + syms = symtable.symtable(buf, file, "exec")
1.433 + mod_names = get_names(syms)
1.434 + tree = parseFile(file)
1.435 + s = SymbolVisitor()
1.436 + walk(tree, s)
1.437 +
1.438 + # compare module-level symbols
1.439 + names2 = s.scopes[tree].get_names()
1.440 +
1.441 + if not list_eq(mod_names, names2):
1.442 + print
1.443 + print "oops", file
1.444 + print sorted(mod_names)
1.445 + print sorted(names2)
1.446 + sys.exit(-1)
1.447 +
1.448 + d = {}
1.449 + d.update(s.scopes)
1.450 + del d[tree]
1.451 + scopes = d.values()
1.452 + del d
1.453 +
1.454 + for s in syms.get_symbols():
1.455 + if s.is_namespace():
1.456 + l = [sc for sc in scopes
1.457 + if sc.name == s.get_name()]
1.458 + if len(l) > 1:
1.459 + print "skipping", s.get_name()
1.460 + else:
1.461 + if not list_eq(get_names(s.get_namespace()),
1.462 + l[0].get_names()):
1.463 + print s.get_name()
1.464 + print sorted(get_names(s.get_namespace()))
1.465 + print sorted(l[0].get_names())
1.466 + sys.exit(-1)