paul@0 | 1 | """Module symbol-table generator""" |
paul@0 | 2 | |
paul@0 | 3 | from compiler import ast |
paul@0 | 4 | from compiler.consts import SC_LOCAL, SC_GLOBAL, SC_FREE, SC_CELL, SC_UNKNOWN |
paul@0 | 5 | from compiler.misc import mangle |
paul@0 | 6 | import types |
paul@0 | 7 | |
paul@0 | 8 | |
paul@0 | 9 | import sys |
paul@0 | 10 | |
paul@0 | 11 | MANGLE_LEN = 256 |
paul@0 | 12 | |
paul@0 | 13 | class Scope: |
paul@0 | 14 | # XXX how much information do I need about each name? |
paul@0 | 15 | def __init__(self, name, module, klass=None): |
paul@0 | 16 | self.name = name |
paul@0 | 17 | self.module = module |
paul@0 | 18 | self.defs = {} |
paul@0 | 19 | self.uses = {} |
paul@0 | 20 | self.globals = {} |
paul@0 | 21 | self.params = {} |
paul@0 | 22 | self.frees = {} |
paul@0 | 23 | self.cells = {} |
paul@0 | 24 | self.children = [] |
paul@0 | 25 | # nested is true if the class could contain free variables, |
paul@0 | 26 | # i.e. if it is nested within another function. |
paul@0 | 27 | self.nested = None |
paul@0 | 28 | self.generator = None |
paul@0 | 29 | self.klass = None |
paul@0 | 30 | if klass is not None: |
paul@0 | 31 | for i in range(len(klass)): |
paul@0 | 32 | if klass[i] != '_': |
paul@0 | 33 | self.klass = klass[i:] |
paul@0 | 34 | break |
paul@0 | 35 | |
paul@0 | 36 | def __repr__(self): |
paul@0 | 37 | return "<%s: %s>" % (self.__class__.__name__, self.name) |
paul@0 | 38 | |
paul@0 | 39 | def mangle(self, name): |
paul@0 | 40 | if self.klass is None: |
paul@0 | 41 | return name |
paul@0 | 42 | return mangle(name, self.klass) |
paul@0 | 43 | |
paul@0 | 44 | def add_def(self, name): |
paul@0 | 45 | self.defs[self.mangle(name)] = 1 |
paul@0 | 46 | |
paul@0 | 47 | def add_use(self, name): |
paul@0 | 48 | self.uses[self.mangle(name)] = 1 |
paul@0 | 49 | |
paul@0 | 50 | def add_global(self, name): |
paul@0 | 51 | name = self.mangle(name) |
paul@0 | 52 | if self.uses.has_key(name) or self.defs.has_key(name): |
paul@0 | 53 | pass # XXX warn about global following def/use |
paul@0 | 54 | if self.params.has_key(name): |
paul@0 | 55 | raise SyntaxError, "%s in %s is global and parameter" % \ |
paul@0 | 56 | (name, self.name) |
paul@0 | 57 | self.globals[name] = 1 |
paul@0 | 58 | self.module.add_def(name) |
paul@0 | 59 | |
paul@0 | 60 | def add_param(self, name): |
paul@0 | 61 | name = self.mangle(name) |
paul@0 | 62 | self.defs[name] = 1 |
paul@0 | 63 | self.params[name] = 1 |
paul@0 | 64 | |
paul@0 | 65 | def get_names(self): |
paul@0 | 66 | d = {} |
paul@0 | 67 | d.update(self.defs) |
paul@0 | 68 | d.update(self.uses) |
paul@0 | 69 | d.update(self.globals) |
paul@0 | 70 | return d.keys() |
paul@0 | 71 | |
paul@0 | 72 | def add_child(self, child): |
paul@0 | 73 | self.children.append(child) |
paul@0 | 74 | |
paul@0 | 75 | def get_children(self): |
paul@0 | 76 | return self.children |
paul@0 | 77 | |
paul@0 | 78 | def DEBUG(self): |
paul@0 | 79 | print >> sys.stderr, self.name, self.nested and "nested" or "" |
paul@0 | 80 | print >> sys.stderr, "\tglobals: ", self.globals |
paul@0 | 81 | print >> sys.stderr, "\tcells: ", self.cells |
paul@0 | 82 | print >> sys.stderr, "\tdefs: ", self.defs |
paul@0 | 83 | print >> sys.stderr, "\tuses: ", self.uses |
paul@0 | 84 | print >> sys.stderr, "\tfrees:", self.frees |
paul@0 | 85 | |
paul@0 | 86 | def check_name(self, name): |
paul@0 | 87 | """Return scope of name. |
paul@0 | 88 | |
paul@0 | 89 | The scope of a name could be LOCAL, GLOBAL, FREE, or CELL. |
paul@0 | 90 | """ |
paul@0 | 91 | if self.globals.has_key(name): |
paul@0 | 92 | return SC_GLOBAL |
paul@0 | 93 | if self.cells.has_key(name): |
paul@0 | 94 | return SC_CELL |
paul@0 | 95 | if self.defs.has_key(name): |
paul@0 | 96 | return SC_LOCAL |
paul@0 | 97 | if self.nested and (self.frees.has_key(name) or |
paul@0 | 98 | self.uses.has_key(name)): |
paul@0 | 99 | return SC_FREE |
paul@0 | 100 | if self.nested: |
paul@0 | 101 | return SC_UNKNOWN |
paul@0 | 102 | else: |
paul@0 | 103 | return SC_GLOBAL |
paul@0 | 104 | |
paul@0 | 105 | def get_free_vars(self): |
paul@0 | 106 | if not self.nested: |
paul@0 | 107 | return () |
paul@0 | 108 | free = {} |
paul@0 | 109 | free.update(self.frees) |
paul@0 | 110 | for name in self.uses.keys(): |
paul@0 | 111 | if not (self.defs.has_key(name) or |
paul@0 | 112 | self.globals.has_key(name)): |
paul@0 | 113 | free[name] = 1 |
paul@0 | 114 | return free.keys() |
paul@0 | 115 | |
paul@0 | 116 | def handle_children(self): |
paul@0 | 117 | for child in self.children: |
paul@0 | 118 | frees = child.get_free_vars() |
paul@0 | 119 | globals = self.add_frees(frees) |
paul@0 | 120 | for name in globals: |
paul@0 | 121 | child.force_global(name) |
paul@0 | 122 | |
paul@0 | 123 | def force_global(self, name): |
paul@0 | 124 | """Force name to be global in scope. |
paul@0 | 125 | |
paul@0 | 126 | Some child of the current node had a free reference to name. |
paul@0 | 127 | When the child was processed, it was labelled a free |
paul@0 | 128 | variable. Now that all its enclosing scope have been |
paul@0 | 129 | processed, the name is known to be a global or builtin. So |
paul@0 | 130 | walk back down the child chain and set the name to be global |
paul@0 | 131 | rather than free. |
paul@0 | 132 | |
paul@0 | 133 | Be careful to stop if a child does not think the name is |
paul@0 | 134 | free. |
paul@0 | 135 | """ |
paul@0 | 136 | self.globals[name] = 1 |
paul@0 | 137 | if self.frees.has_key(name): |
paul@0 | 138 | del self.frees[name] |
paul@0 | 139 | for child in self.children: |
paul@0 | 140 | if child.check_name(name) == SC_FREE: |
paul@0 | 141 | child.force_global(name) |
paul@0 | 142 | |
paul@0 | 143 | def add_frees(self, names): |
paul@0 | 144 | """Process list of free vars from nested scope. |
paul@0 | 145 | |
paul@0 | 146 | Returns a list of names that are either 1) declared global in the |
paul@0 | 147 | parent or 2) undefined in a top-level parent. In either case, |
paul@0 | 148 | the nested scope should treat them as globals. |
paul@0 | 149 | """ |
paul@0 | 150 | child_globals = [] |
paul@0 | 151 | for name in names: |
paul@0 | 152 | sc = self.check_name(name) |
paul@0 | 153 | if self.nested: |
paul@0 | 154 | if sc == SC_UNKNOWN or sc == SC_FREE \ |
paul@0 | 155 | or isinstance(self, ClassScope): |
paul@0 | 156 | self.frees[name] = 1 |
paul@0 | 157 | elif sc == SC_GLOBAL: |
paul@0 | 158 | child_globals.append(name) |
paul@0 | 159 | elif isinstance(self, FunctionScope) and sc == SC_LOCAL: |
paul@0 | 160 | self.cells[name] = 1 |
paul@0 | 161 | elif sc != SC_CELL: |
paul@0 | 162 | child_globals.append(name) |
paul@0 | 163 | else: |
paul@0 | 164 | if sc == SC_LOCAL: |
paul@0 | 165 | self.cells[name] = 1 |
paul@0 | 166 | elif sc != SC_CELL: |
paul@0 | 167 | child_globals.append(name) |
paul@0 | 168 | return child_globals |
paul@0 | 169 | |
paul@0 | 170 | def get_cell_vars(self): |
paul@0 | 171 | return self.cells.keys() |
paul@0 | 172 | |
paul@0 | 173 | class ModuleScope(Scope): |
paul@0 | 174 | __super_init = Scope.__init__ |
paul@0 | 175 | |
paul@0 | 176 | def __init__(self): |
paul@0 | 177 | self.__super_init("global", self) |
paul@0 | 178 | |
paul@0 | 179 | class FunctionScope(Scope): |
paul@0 | 180 | pass |
paul@0 | 181 | |
paul@0 | 182 | class GenExprScope(Scope): |
paul@0 | 183 | __super_init = Scope.__init__ |
paul@0 | 184 | |
paul@0 | 185 | __counter = 1 |
paul@0 | 186 | |
paul@0 | 187 | def __init__(self, module, klass=None): |
paul@0 | 188 | i = self.__counter |
paul@0 | 189 | self.__counter += 1 |
paul@0 | 190 | self.__super_init("generator expression<%d>"%i, module, klass) |
paul@0 | 191 | self.add_param('.0') |
paul@0 | 192 | |
paul@0 | 193 | def get_names(self): |
paul@0 | 194 | keys = Scope.get_names(self) |
paul@0 | 195 | return keys |
paul@0 | 196 | |
paul@0 | 197 | class LambdaScope(FunctionScope): |
paul@0 | 198 | __super_init = Scope.__init__ |
paul@0 | 199 | |
paul@0 | 200 | __counter = 1 |
paul@0 | 201 | |
paul@0 | 202 | def __init__(self, module, klass=None): |
paul@0 | 203 | i = self.__counter |
paul@0 | 204 | self.__counter += 1 |
paul@0 | 205 | self.__super_init("lambda.%d" % i, module, klass) |
paul@0 | 206 | |
paul@0 | 207 | class ClassScope(Scope): |
paul@0 | 208 | __super_init = Scope.__init__ |
paul@0 | 209 | |
paul@0 | 210 | def __init__(self, name, module): |
paul@0 | 211 | self.__super_init(name, module, name) |
paul@0 | 212 | |
paul@0 | 213 | class SymbolVisitor: |
paul@0 | 214 | def __init__(self): |
paul@0 | 215 | self.scopes = {} |
paul@0 | 216 | self.klass = None |
paul@0 | 217 | |
paul@0 | 218 | # node that define new scopes |
paul@0 | 219 | |
paul@0 | 220 | def visitModule(self, node): |
paul@0 | 221 | scope = self.module = self.scopes[node] = ModuleScope() |
paul@0 | 222 | self.visit(node.node, scope) |
paul@0 | 223 | |
paul@0 | 224 | visitExpression = visitModule |
paul@0 | 225 | |
paul@0 | 226 | def visitFunction(self, node, parent): |
paul@0 | 227 | if node.decorators: |
paul@0 | 228 | self.visit(node.decorators, parent) |
paul@0 | 229 | parent.add_def(node.name) |
paul@0 | 230 | for n in node.defaults: |
paul@0 | 231 | self.visit(n, parent) |
paul@0 | 232 | scope = FunctionScope(node.name, self.module, self.klass) |
paul@0 | 233 | if parent.nested or isinstance(parent, FunctionScope): |
paul@0 | 234 | scope.nested = 1 |
paul@0 | 235 | self.scopes[node] = scope |
paul@0 | 236 | self._do_args(scope, node.argnames) |
paul@0 | 237 | self.visit(node.code, scope) |
paul@0 | 238 | self.handle_free_vars(scope, parent) |
paul@0 | 239 | |
paul@0 | 240 | def visitGenExpr(self, node, parent): |
paul@0 | 241 | scope = GenExprScope(self.module, self.klass); |
paul@0 | 242 | if parent.nested or isinstance(parent, FunctionScope) \ |
paul@0 | 243 | or isinstance(parent, GenExprScope): |
paul@0 | 244 | scope.nested = 1 |
paul@0 | 245 | |
paul@0 | 246 | self.scopes[node] = scope |
paul@0 | 247 | self.visit(node.code, scope) |
paul@0 | 248 | |
paul@0 | 249 | self.handle_free_vars(scope, parent) |
paul@0 | 250 | |
paul@0 | 251 | def visitGenExprInner(self, node, scope): |
paul@0 | 252 | for genfor in node.quals: |
paul@0 | 253 | self.visit(genfor, scope) |
paul@0 | 254 | |
paul@0 | 255 | self.visit(node.expr, scope) |
paul@0 | 256 | |
paul@0 | 257 | def visitGenExprFor(self, node, scope): |
paul@0 | 258 | self.visit(node.assign, scope, 1) |
paul@0 | 259 | self.visit(node.iter, scope) |
paul@0 | 260 | for if_ in node.ifs: |
paul@0 | 261 | self.visit(if_, scope) |
paul@0 | 262 | |
paul@0 | 263 | def visitGenExprIf(self, node, scope): |
paul@0 | 264 | self.visit(node.test, scope) |
paul@0 | 265 | |
paul@0 | 266 | def visitLambda(self, node, parent, assign=0): |
paul@0 | 267 | # Lambda is an expression, so it could appear in an expression |
paul@0 | 268 | # context where assign is passed. The transformer should catch |
paul@0 | 269 | # any code that has a lambda on the left-hand side. |
paul@0 | 270 | assert not assign |
paul@0 | 271 | |
paul@0 | 272 | for n in node.defaults: |
paul@0 | 273 | self.visit(n, parent) |
paul@0 | 274 | scope = LambdaScope(self.module, self.klass) |
paul@0 | 275 | if parent.nested or isinstance(parent, FunctionScope): |
paul@0 | 276 | scope.nested = 1 |
paul@0 | 277 | self.scopes[node] = scope |
paul@0 | 278 | self._do_args(scope, node.argnames) |
paul@0 | 279 | self.visit(node.code, scope) |
paul@0 | 280 | self.handle_free_vars(scope, parent) |
paul@0 | 281 | |
paul@0 | 282 | def _do_args(self, scope, args): |
paul@0 | 283 | for name in args: |
paul@0 | 284 | if type(name) == types.TupleType: |
paul@0 | 285 | self._do_args(scope, name) |
paul@0 | 286 | else: |
paul@0 | 287 | scope.add_param(name) |
paul@0 | 288 | |
paul@0 | 289 | def handle_free_vars(self, scope, parent): |
paul@0 | 290 | parent.add_child(scope) |
paul@0 | 291 | scope.handle_children() |
paul@0 | 292 | |
paul@0 | 293 | def visitClass(self, node, parent): |
paul@0 | 294 | parent.add_def(node.name) |
paul@0 | 295 | for n in node.bases: |
paul@0 | 296 | self.visit(n, parent) |
paul@0 | 297 | scope = ClassScope(node.name, self.module) |
paul@0 | 298 | if parent.nested or isinstance(parent, FunctionScope): |
paul@0 | 299 | scope.nested = 1 |
paul@0 | 300 | if node.doc is not None: |
paul@0 | 301 | scope.add_def('__doc__') |
paul@0 | 302 | scope.add_def('__module__') |
paul@0 | 303 | self.scopes[node] = scope |
paul@0 | 304 | prev = self.klass |
paul@0 | 305 | self.klass = node.name |
paul@0 | 306 | self.visit(node.code, scope) |
paul@0 | 307 | self.klass = prev |
paul@0 | 308 | self.handle_free_vars(scope, parent) |
paul@0 | 309 | |
paul@0 | 310 | # name can be a def or a use |
paul@0 | 311 | |
paul@0 | 312 | # XXX a few calls and nodes expect a third "assign" arg that is |
paul@0 | 313 | # true if the name is being used as an assignment. only |
paul@0 | 314 | # expressions contained within statements may have the assign arg. |
paul@0 | 315 | |
paul@0 | 316 | def visitName(self, node, scope, assign=0): |
paul@0 | 317 | if assign: |
paul@0 | 318 | scope.add_def(node.name) |
paul@0 | 319 | else: |
paul@0 | 320 | scope.add_use(node.name) |
paul@0 | 321 | |
paul@0 | 322 | # operations that bind new names |
paul@0 | 323 | |
paul@0 | 324 | def visitFor(self, node, scope): |
paul@0 | 325 | self.visit(node.assign, scope, 1) |
paul@0 | 326 | self.visit(node.list, scope) |
paul@0 | 327 | self.visit(node.body, scope) |
paul@0 | 328 | if node.else_: |
paul@0 | 329 | self.visit(node.else_, scope) |
paul@0 | 330 | |
paul@0 | 331 | def visitFrom(self, node, scope): |
paul@0 | 332 | for name, asname in node.names: |
paul@0 | 333 | if name == "*": |
paul@0 | 334 | continue |
paul@0 | 335 | scope.add_def(asname or name) |
paul@0 | 336 | |
paul@0 | 337 | def visitImport(self, node, scope): |
paul@0 | 338 | for name, asname in node.names: |
paul@0 | 339 | i = name.find(".") |
paul@0 | 340 | if i > -1: |
paul@0 | 341 | name = name[:i] |
paul@0 | 342 | scope.add_def(asname or name) |
paul@0 | 343 | |
paul@0 | 344 | def visitGlobal(self, node, scope): |
paul@0 | 345 | for name in node.names: |
paul@0 | 346 | scope.add_global(name) |
paul@0 | 347 | |
paul@0 | 348 | def visitAssign(self, node, scope): |
paul@0 | 349 | """Propagate assignment flag down to child nodes. |
paul@0 | 350 | |
paul@0 | 351 | The Assign node doesn't itself contains the variables being |
paul@0 | 352 | assigned to. Instead, the children in node.nodes are visited |
paul@0 | 353 | with the assign flag set to true. When the names occur in |
paul@0 | 354 | those nodes, they are marked as defs. |
paul@0 | 355 | |
paul@0 | 356 | Some names that occur in an assignment target are not bound by |
paul@0 | 357 | the assignment, e.g. a name occurring inside a slice. The |
paul@0 | 358 | visitor handles these nodes specially; they do not propagate |
paul@0 | 359 | the assign flag to their children. |
paul@0 | 360 | """ |
paul@0 | 361 | for n in node.nodes: |
paul@0 | 362 | self.visit(n, scope, 1) |
paul@0 | 363 | self.visit(node.expr, scope) |
paul@0 | 364 | |
paul@0 | 365 | def visitAssName(self, node, scope, assign=1): |
paul@0 | 366 | scope.add_def(node.name) |
paul@0 | 367 | |
paul@0 | 368 | def visitAssAttr(self, node, scope, assign=0): |
paul@0 | 369 | self.visit(node.expr, scope, 0) |
paul@0 | 370 | |
paul@0 | 371 | def visitSubscript(self, node, scope, assign=0): |
paul@0 | 372 | self.visit(node.expr, scope, 0) |
paul@0 | 373 | for n in node.subs: |
paul@0 | 374 | self.visit(n, scope, 0) |
paul@0 | 375 | |
paul@0 | 376 | def visitSlice(self, node, scope, assign=0): |
paul@0 | 377 | self.visit(node.expr, scope, 0) |
paul@0 | 378 | if node.lower: |
paul@0 | 379 | self.visit(node.lower, scope, 0) |
paul@0 | 380 | if node.upper: |
paul@0 | 381 | self.visit(node.upper, scope, 0) |
paul@0 | 382 | |
paul@0 | 383 | def visitAugAssign(self, node, scope): |
paul@0 | 384 | # If the LHS is a name, then this counts as assignment. |
paul@0 | 385 | # Otherwise, it's just use. |
paul@0 | 386 | self.visit(node.node, scope) |
paul@0 | 387 | if isinstance(node.node, ast.Name): |
paul@0 | 388 | self.visit(node.node, scope, 1) # XXX worry about this |
paul@0 | 389 | self.visit(node.expr, scope) |
paul@0 | 390 | |
paul@0 | 391 | # prune if statements if tests are false |
paul@0 | 392 | |
paul@0 | 393 | _const_types = types.StringType, types.IntType, types.FloatType |
paul@0 | 394 | |
paul@0 | 395 | def visitIf(self, node, scope): |
paul@0 | 396 | for test, body in node.tests: |
paul@0 | 397 | if isinstance(test, ast.Const): |
paul@0 | 398 | if type(test.value) in self._const_types: |
paul@0 | 399 | if not test.value: |
paul@0 | 400 | continue |
paul@0 | 401 | self.visit(test, scope) |
paul@0 | 402 | self.visit(body, scope) |
paul@0 | 403 | if node.else_: |
paul@0 | 404 | self.visit(node.else_, scope) |
paul@0 | 405 | |
paul@0 | 406 | # a yield statement signals a generator |
paul@0 | 407 | |
paul@0 | 408 | def visitYield(self, node, scope): |
paul@0 | 409 | scope.generator = 1 |
paul@0 | 410 | self.visit(node.value, scope) |
paul@0 | 411 | |
paul@0 | 412 | def list_eq(l1, l2): |
paul@0 | 413 | return sorted(l1) == sorted(l2) |
paul@0 | 414 | |
paul@0 | 415 | if __name__ == "__main__": |
paul@0 | 416 | import sys |
paul@0 | 417 | from compiler import parseFile, walk |
paul@0 | 418 | import symtable |
paul@0 | 419 | |
paul@0 | 420 | def get_names(syms): |
paul@0 | 421 | return [s for s in [s.get_name() for s in syms.get_symbols()] |
paul@0 | 422 | if not (s.startswith('_[') or s.startswith('.'))] |
paul@0 | 423 | |
paul@0 | 424 | for file in sys.argv[1:]: |
paul@0 | 425 | print file |
paul@0 | 426 | f = open(file) |
paul@0 | 427 | buf = f.read() |
paul@0 | 428 | f.close() |
paul@0 | 429 | syms = symtable.symtable(buf, file, "exec") |
paul@0 | 430 | mod_names = get_names(syms) |
paul@0 | 431 | tree = parseFile(file) |
paul@0 | 432 | s = SymbolVisitor() |
paul@0 | 433 | walk(tree, s) |
paul@0 | 434 | |
paul@0 | 435 | # compare module-level symbols |
paul@0 | 436 | names2 = s.scopes[tree].get_names() |
paul@0 | 437 | |
paul@0 | 438 | if not list_eq(mod_names, names2): |
paul@0 | 439 | print |
paul@0 | 440 | print "oops", file |
paul@0 | 441 | print sorted(mod_names) |
paul@0 | 442 | print sorted(names2) |
paul@0 | 443 | sys.exit(-1) |
paul@0 | 444 | |
paul@0 | 445 | d = {} |
paul@0 | 446 | d.update(s.scopes) |
paul@0 | 447 | del d[tree] |
paul@0 | 448 | scopes = d.values() |
paul@0 | 449 | del d |
paul@0 | 450 | |
paul@0 | 451 | for s in syms.get_symbols(): |
paul@0 | 452 | if s.is_namespace(): |
paul@0 | 453 | l = [sc for sc in scopes |
paul@0 | 454 | if sc.name == s.get_name()] |
paul@0 | 455 | if len(l) > 1: |
paul@0 | 456 | print "skipping", s.get_name() |
paul@0 | 457 | else: |
paul@0 | 458 | if not list_eq(get_names(s.get_namespace()), |
paul@0 | 459 | l[0].get_names()): |
paul@0 | 460 | print s.get_name() |
paul@0 | 461 | print sorted(get_names(s.get_namespace())) |
paul@0 | 462 | print sorted(l[0].get_names()) |
paul@0 | 463 | sys.exit(-1) |