1 #!/usr/bin/env python 2 3 """ 4 Annotate program node structures. The code in this module operates upon nodes 5 which are produced when simplifying AST node trees originating from the compiler 6 module. 7 8 Copyright (C) 2006 Paul Boddie <paul@boddie.org.uk> 9 10 This software is free software; you can redistribute it and/or 11 modify it under the terms of the GNU General Public License as 12 published by the Free Software Foundation; either version 2 of 13 the License, or (at your option) any later version. 14 15 This software is distributed in the hope that it will be useful, 16 but WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 GNU General Public License for more details. 19 20 You should have received a copy of the GNU General Public 21 License along with this library; see the file LICENCE.txt 22 If not, write to the Free Software Foundation, Inc., 23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 24 25 -------- 26 27 To use this module, the easiest approach is to use the annotate function: 28 29 annotate(module, builtins) 30 31 The more complicated approach involves obtaining an Annotator: 32 33 annotator = Annotator() 34 35 Then, processing an existing module with it: 36 37 annotator.process(module) 38 39 If a module containing built-in classes and functions has already been 40 annotated, such a module should be passed in as an additional argument: 41 42 annotator.process(module, builtins) 43 """ 44 45 from simplified import * 46 import compiler 47 48 class System: 49 50 """ 51 A class maintaining the state of the annotation system. When the system 52 counter can no longer be incremented by any annotation operation, the 53 system may be considered stable and fully annotated. 54 """ 55 56 def __init__(self): 57 self.count = 0 58 def init(self, node): 59 if not hasattr(node, "types"): 60 node.types = [] 61 def annotate(self, node, types): 62 self.init(node) 63 for type in types: 64 if type not in node.types: 65 node.types.append(type) 66 self.count += 1 67 68 system = System() 69 70 # Exceptions. 71 72 class AnnotationError(Exception): 73 def __init__(self, exc, node, *args): 74 Exception.__init__(self, *args) 75 self.nodes = [node] 76 self.exc = exc 77 def add(self, node): 78 self.nodes.append(node) 79 def __str__(self): 80 return "%s, %s" % (self.exc, self.nodes) 81 82 class AnnotationMessage(Exception): 83 pass 84 85 # Annotation. 86 87 class Annotator(Visitor): 88 89 """ 90 The type annotator which traverses the program nodes, typically depth-first, 91 and maintains a record of the current set of types applying to the currently 92 considered operation. Such types are also recorded on the nodes, and a 93 special "system" record is maintained to monitor the level of annotation 94 activity with a view to recognising when no more annotations are possible. 95 96 Throughout the annotation activity, type information consists of lists of 97 Attribute objects where such objects retain information about the context of 98 the type (since a value in the program may be associated with an object or 99 class) and the actual type of the value being manipulated. Upon accessing 100 attribute information on namespaces, additional accessor information is also 101 exchanged - this provides a means of distinguishing between the different 102 types possible when the means of constructing the namespace may depend on 103 run-time behaviour. 104 """ 105 106 def __init__(self): 107 108 "Initialise the visitor." 109 110 Visitor.__init__(self) 111 self.system = system 112 113 # Satisfy visitor issues. 114 115 self.visitor = self 116 117 def process(self, module, builtins=None): 118 119 """ 120 Process the given 'module', using the optional 'builtins' to access 121 built-in classes and functions. 122 """ 123 124 self.subprograms = [] 125 self.current_subprograms = [] 126 self.current_namespaces = [] 127 128 # Give constants their own namespace. 129 130 for value, constant in module.simplifier.constants.items(): 131 constant.namespace = Namespace() 132 133 # Process the module, supplying builtins if possible. 134 135 self.builtins = builtins 136 self.global_namespace = Namespace() 137 138 if builtins is not None: 139 self.builtins_namespace = builtins.namespace 140 else: 141 self.builtins_namespace = self.global_namespace 142 143 return self.process_node(module) 144 145 def process_node(self, node, locals=None): 146 147 """ 148 Process a subprogram or module 'node', indicating any initial 'locals'. 149 Return an annotated subprogram or module. Note that this method may 150 mutate nodes in the original program. 151 """ 152 153 # Determine the namespace. 154 155 if locals is not None: 156 self.namespace = locals 157 else: 158 self.namespace = self.global_namespace 159 160 # Record the current subprogram and namespace. 161 162 self.current_subprograms.append(node) 163 self.current_namespaces.append(self.namespace) 164 165 # Add namespace details to any structure involved. 166 167 if getattr(node, "structure", None) is not None: 168 node.structure.namespace = Namespace() 169 170 # Initialise bases where appropriate. 171 172 if hasattr(node.structure, "bases"): 173 base_refs = [] 174 for base in node.structure.bases: 175 self.dispatch(base) 176 base_refs.append(self.namespace.types) 177 node.structure.base_refs = base_refs 178 179 # Dispatch to the code itself. 180 181 node.namespace = self.namespace 182 result = self.dispatch(node) 183 result.namespace = self.namespace 184 185 # Obtain the return values. 186 187 self.last_returns = self.namespace.returns 188 self.returned_locals = self.namespace.return_locals 189 190 # Restore the previous subprogram and namespace. 191 192 self.current_namespaces.pop() 193 if self.current_namespaces: 194 self.namespace = self.current_namespaces[-1] 195 196 self.current_subprograms.pop() 197 198 return result 199 200 def annotate(self, node): 201 202 "Annotate the given 'node' in the system." 203 204 self.system.annotate(node, self.namespace.types) 205 206 # Visitor methods. 207 208 def default(self, node): 209 210 """ 211 Process the given 'node', given that it does not have a specific 212 handler. 213 """ 214 215 for attr in ("expr", "lvalue", "test", "handler"): 216 value = getattr(node, attr, None) 217 if value is not None: 218 setattr(node, attr, self.dispatch(value)) 219 for attr in ("body", "else_", "finally_", "code"): 220 value = getattr(node, attr, None) 221 if value is not None: 222 setattr(node, attr, self.dispatches(value)) 223 return node 224 225 def dispatch(self, node, *args): 226 try: 227 return Visitor.dispatch(self, node, *args) 228 except AnnotationError, exc: 229 exc.add(node) 230 raise 231 except AnnotationMessage, exc: 232 raise AnnotationError(exc, node) 233 234 def visitLoadRef(self, loadref): 235 self.namespace.set_types([Attribute(None, loadref.ref)]) 236 self.annotate(loadref) 237 return loadref 238 239 def visitLoadName(self, loadname): 240 self.namespace.set_types(self.namespace.load(loadname.name)) 241 result = loadname 242 self.annotate(result) 243 return result 244 245 def visitStoreName(self, storename): 246 storename.expr = self.dispatch(storename.expr) 247 self.namespace.store(storename.name, self.namespace.types) 248 return storename 249 250 def visitLoadTemp(self, loadtemp): 251 index = getattr(loadtemp, "index", None) 252 try: 253 if getattr(loadtemp, "release", 0): 254 self.namespace.set_types(self.namespace.temp[index].pop()) 255 else: 256 self.namespace.set_types(self.namespace.temp[index][-1]) 257 except KeyError: 258 raise AnnotationMessage, "Temporary store index '%s' not defined." % index 259 self.annotate(loadtemp) 260 return loadtemp 261 262 def visitStoreTemp(self, storetemp): 263 storetemp.expr = self.dispatch(storetemp.expr) 264 index = getattr(storetemp, "index", None) 265 if not self.namespace.temp.has_key(index): 266 self.namespace.temp[index] = [] 267 self.namespace.temp[index].append(self.namespace.types) 268 return storetemp 269 270 def visitReleaseTemp(self, releasetemp): 271 index = getattr(releasetemp, "index", None) 272 try: 273 self.namespace.temp[index].pop() 274 except KeyError: 275 raise AnnotationMessage, "Temporary store index '%s' not defined." % index 276 except IndexError: 277 pass #raise AnnotationMessage, "Temporary store index '%s' is empty." % index 278 return releasetemp 279 280 def visitLoadAttr(self, loadattr): 281 loadattr.expr = self.dispatch(loadattr.expr) 282 types = [] 283 accesses = {} 284 non_accesses = {} 285 for attr in self.namespace.types: 286 for attribute, accessor in get_attributes(attr.type, loadattr.name): 287 if attribute is not None: 288 types.append(attribute) 289 if not accesses.has_key(attr.type): 290 accesses[attr.type] = [] 291 accesses[attr.type].append((attribute, accessor)) 292 else: 293 print "Empty attribute", loadattr.name, "via accessor", accessor 294 if not non_accesses.has_key(attr.type): 295 non_accesses[attr.type] = [] 296 non_accesses[attr.type].append((attribute, accessor)) 297 self.namespace.set_types(types) 298 loadattr.accesses = accesses 299 loadattr.non_accesses = non_accesses 300 self.annotate(loadattr) 301 return loadattr 302 303 def visitStoreAttr(self, storeattr): 304 storeattr.expr = self.dispatch(storeattr.expr) 305 expr = self.namespace.types 306 storeattr.lvalue = self.dispatch(storeattr.lvalue) 307 writes = {} 308 for attr in self.namespace.types: 309 if attr is None: 310 print "Empty attribute storage attempt" 311 continue 312 attr.type.namespace.store(storeattr.name, expr) 313 writes[attr.type] = attr.type.namespace.load(storeattr.name) 314 storeattr.writes = writes 315 return storeattr 316 317 def visitConditional(self, conditional): 318 319 # Conditionals keep local namespace changes isolated. 320 # With Return nodes inside the body/else sections, the changes are 321 # communicated to the caller. 322 323 conditional.test = self.dispatch(conditional.test) 324 saved_namespace = self.namespace 325 326 self.namespace = Namespace() 327 self.namespace.merge_namespace(saved_namespace) 328 conditional.body = self.dispatches(conditional.body) 329 body_namespace = self.namespace 330 331 self.namespace = Namespace() 332 self.namespace.merge_namespace(saved_namespace) 333 conditional.else_ = self.dispatches(conditional.else_) 334 else_namespace = self.namespace 335 336 self.namespace = Namespace() 337 self.namespace.merge_namespace(body_namespace) 338 self.namespace.merge_namespace(else_namespace) 339 return conditional 340 341 def visitReturn(self, return_): 342 if hasattr(return_, "expr"): 343 return_.expr = self.dispatch(return_.expr) 344 combine(self.namespace.returns, self.namespace.types) 345 self.annotate(return_) 346 self.namespace.snapshot() 347 return return_ 348 349 def visitInvoke(self, invoke): 350 invoke.expr = self.dispatch(invoke.expr) 351 invocation_types = self.namespace.types 352 353 if isinstance(invoke, InvokeFunction): 354 self.process_args(invoke) 355 356 # Now locate and invoke the subprogram. This can be complicated because 357 # the target may be a class or object, and there may be many different 358 # related subprograms. 359 360 invocations = [] 361 362 # Visit each callable in turn, finding subprograms. 363 364 for attr in invocation_types: 365 366 # Deal with class invocations by providing instance objects. 367 # Here, each class is queried for the __init__ method, which may 368 # exist for some combinations of classes in a hierarchy but not for 369 # others. 370 371 if isinstance(attr.type, Class): 372 attributes = get_attributes(attr.type, "__init__") 373 374 # Deal with object invocations by using __call__ methods. 375 376 elif isinstance(attr.type, Instance): 377 attributes = get_attributes(attr.type, "__call__") 378 379 # Normal functions or methods are more straightforward. 380 # Here, we model them using an attribute with no context and with 381 # no associated accessor. 382 383 else: 384 attributes = [(attr, None)] 385 386 # Inspect each attribute and extract the subprogram. 387 388 for attribute, accessor in attributes: 389 390 # If a class is involved, presume that it must create a new 391 # object. 392 393 if isinstance(attr.type, Class): 394 395 # Instantiate the class. 396 # NOTE: Should probably only allocate a single instance. 397 398 instance = self.new_instance(invoke, "new", attr.type.full_name(), attr.type) 399 400 # For instantiations, switch the context. 401 402 if attribute is not None: 403 attribute = Attribute(instance, attribute.type) 404 405 # Skip cases where no callable is found. 406 407 if attribute is not None: 408 409 # If a subprogram is defined, invoke it. 410 411 self.invoke_subprogram(invoke, attribute) 412 if attribute.type not in invocations: 413 invocations.append(attribute.type) 414 415 elif not isinstance(attr.type, Class): 416 print "Invocation type is None for", accessor 417 418 # Special case: initialisation. 419 420 if isinstance(attr.type, Class): 421 422 # Associate the instance with the result of this invocation. 423 424 self.namespace.set_types([Attribute(None, instance)]) 425 self.annotate(invoke) 426 427 invoke.invocations = invocations 428 self.namespace.set_types(getattr(invoke, "types", [])) 429 return invoke 430 431 visitInvokeFunction = visitInvoke 432 visitInvokeBlock = visitInvoke 433 434 # Utility methods. 435 436 def new_instance(self, node, reason, target, type): 437 438 "Create, on the given 'node', a new instance with the given 'type'." 439 440 if not hasattr(node, "instances"): 441 node.instances = {} 442 443 if not node.instances.has_key((reason, target, type)): 444 instance = Instance() 445 instance.namespace = Namespace() 446 instance.namespace.store("__class__", [Attribute(None, type)]) 447 node.instances[(reason, target, type)] = instance 448 449 return node.instances[(reason, target, type)] 450 451 def invoke_subprogram(self, invoke, subprogram): 452 453 "Invoke using the given 'invoke' node the given 'subprogram'." 454 455 # Test to see if anything has changed. 456 457 if hasattr(invoke, "syscount") and invoke.syscount == self.system.count: 458 return 459 460 # Remember the state of the system. 461 462 else: 463 invoke.syscount = self.system.count 464 465 # Test for context information, making it into a real attribute. 466 467 if subprogram.context is not None: 468 context = Attribute(None, subprogram.context) 469 target = subprogram.type 470 else: 471 context = None 472 target = subprogram.type 473 474 # Provide the correct namespace for the invocation. 475 476 if isinstance(invoke, InvokeBlock): 477 namespace = Namespace() 478 namespace.merge_namespace(self.namespace) 479 else: 480 items = self.make_items(invoke, target, context) 481 namespace = self.make_namespace(items) 482 483 # Process the subprogram. 484 485 self.process_node(target, namespace) 486 487 # NOTE: Improve and verify this. 488 489 if getattr(target, "returns_value", 0): 490 self.namespace.set_types(self.last_returns) 491 self.annotate(invoke) 492 493 if isinstance(invoke, InvokeBlock): 494 for locals in self.returned_locals: 495 self.namespace.merge_namespace(locals) 496 497 def process_args(self, invoke): 498 499 # NOTE: Consider initialiser invocation for classes. 500 501 types = [] 502 args = [] 503 504 # Get type information for regular arguments. 505 506 for arg in invoke.args: 507 args.append(self.dispatch(arg)) 508 types.append(self.namespace.types) 509 510 # Get type information for star and dstar arguments. 511 512 if invoke.star is not None: 513 param, default = invoke.star 514 default = self.dispatch(default) 515 invoke.star = param, default 516 types.append(default.types) 517 518 if invoke.dstar is not None: 519 param, default = invoke.dstar 520 default = self.dispatch(default) 521 invoke.dstar = param, default 522 types.append(default.types) 523 524 invoke.args = args 525 526 def make_items(self, invocation, subprogram, context): 527 528 """ 529 Make an items mapping for the 'invocation' of the 'subprogram' using the 530 given 'context' (which may be None). 531 """ 532 533 if context is not None: 534 args = [Self(context)] + invocation.args 535 else: 536 args = invocation.args 537 538 # Sort the arguments into positional and keyword arguments. 539 540 pos_args = [] 541 kw_args = [] 542 add_kw = 0 543 for arg in args: 544 if not add_kw: 545 if not isinstance(arg, Keyword): 546 pos_args.append(arg) 547 else: 548 add_kw = 1 549 if add_kw: 550 if isinstance(arg, Keyword): 551 kw_args.append(arg) 552 else: 553 raise AnnotationMessage, "Positional argument appears after keyword arguments in '%s'." % callfunc 554 555 params = subprogram.params 556 items = [] 557 star_args = [] 558 559 # Match each positional argument, taking excess arguments as star args. 560 561 for arg in pos_args: 562 if params: 563 param, default = params[0] 564 if arg is None: 565 arg = default 566 items.append((param, arg.types)) 567 params = params[1:] 568 else: 569 star_args.append(arg) 570 571 # Collect the remaining defaults. 572 573 while params: 574 param, default = params[0] 575 if kw_args.has_key(param): 576 arg = kw_args[param] 577 del kw_args[param] 578 elif default is None: 579 raise AnnotationMessage, "No argument supplied in '%s' for parameter '%s'." % (subprogram, param) 580 items.append((param, arg.types)) 581 params = params[1:] 582 583 dstar_args = kw_args 584 585 # Construct temporary objects. 586 587 if star_args: 588 star_invocation = self.make_star_args(invocation, subprogram, star_args) 589 self.dispatch(star_invocation) 590 star_invocation.pprint() 591 star_types = star_invocation.types 592 else: 593 star_types = None 594 595 if dstar_args: 596 dstar_invocation = self.make_dstar_args(invocation, subprogram, dstar_args) 597 self.dispatch(dstar_invocation) 598 dstar_types = dstar_invocation.types 599 else: 600 dstar_types = None 601 602 # NOTE: Merge the objects properly. 603 604 star_types = star_types or invocation.star and invocation.star.types 605 dstar_types = dstar_types or invocation.dstar and invocation.dstar.types 606 607 # Add star and dstar. 608 609 if star_types is not None: 610 if subprogram.star is not None: 611 param, default = subprogram.star 612 items.append((param, star_types)) 613 else: 614 raise AnnotationMessage, "Invocation provides unwanted *args." 615 elif subprogram.star is not None: 616 param, default = subprogram.star 617 arg = self.dispatch(default) # NOTE: Review reprocessing. 618 items.append((param, arg.types)) 619 620 if dstar_types is not None: 621 if subprogram.dstar is not None: 622 param, default = subprogram.dstar 623 items.append((param, dstar_types)) 624 else: 625 raise AnnotationMessage, "Invocation provides unwanted **args." 626 elif subprogram.dstar is not None: 627 param, default = subprogram.dstar 628 arg = self.dispatch(default) # NOTE: Review reprocessing. 629 items.append((param, arg.types)) 630 631 return items 632 633 def make_star_args(self, invocation, subprogram, star_args): 634 635 "Make a subprogram which initialises a list containing 'star_args'." 636 637 if not hasattr(invocation, "stars"): 638 invocation.stars = {} 639 640 if not invocation.stars.has_key(subprogram.full_name()): 641 code=[ 642 StoreTemp( 643 expr=InvokeFunction( 644 expr=LoadAttr( 645 expr=LoadRef( 646 ref=self.builtins 647 ), 648 name="list", 649 nstype="module", 650 ), 651 args=[], 652 star=None, 653 dstar=None 654 ) 655 ) 656 ] 657 658 for arg in star_args: 659 code.append( 660 InvokeFunction( 661 expr=LoadAttr( 662 expr=LoadTemp(), 663 name="append" 664 ), 665 args=[arg], 666 star=None, 667 dstar=None 668 ) 669 ) 670 671 code += [ 672 Return(expr=LoadTemp(release=1)) 673 ] 674 675 invocation.stars[subprogram.full_name()] = InvokeBlock( 676 produces_result=1, 677 expr=LoadRef( 678 ref=Subprogram( 679 name=None, 680 returns_value=1, 681 params=[], 682 star=None, 683 dstar=None, 684 code=code 685 ) 686 ) 687 ) 688 689 return invocation.stars[subprogram.full_name()] 690 691 def make_namespace(self, items): 692 namespace = Namespace() 693 namespace.merge_items(items) 694 return namespace 695 696 # Namespace-related abstractions. 697 698 class Namespace: 699 700 """ 701 A local namespace which may either relate to a genuine set of function 702 locals or the initialisation of a structure or module. 703 """ 704 705 def __init__(self): 706 707 """ 708 Initialise the namespace with a mapping of local names to possible 709 types, a list of return values and of possible returned local 710 namespaces. The namespace also tracks the "current" types and a mapping 711 of temporary value names to types. 712 """ 713 714 self.names = {} 715 self.returns = [] 716 self.return_locals = [] 717 self.temp = {} 718 self.types = [] 719 720 def set_types(self, types): 721 self.types = types 722 723 def store(self, name, types): 724 self.names[name] = types 725 726 __setattr_ = store 727 728 def load(self, name): 729 return self.names[name] 730 731 __getattr__ = load 732 733 def merge_namespace(self, namespace): 734 self.merge_items(namespace.names.items()) 735 combine(self.returns, namespace.returns) 736 self.temp = namespace.temp 737 738 def merge_items(self, items): 739 for name, types in items: 740 self.merge(name, types) 741 742 def merge(self, name, types): 743 if not self.names.has_key(name): 744 self.names[name] = types[:] 745 else: 746 existing = self.names[name] 747 combine(existing, types) 748 749 def snapshot(self): 750 751 "Make a snapshot of the locals and remember them." 752 753 namespace = Namespace() 754 namespace.merge_namespace(self) 755 self.return_locals.append(namespace) 756 757 def __repr__(self): 758 return repr(self.names) 759 760 class Attribute: 761 762 """ 763 An attribute abstraction, indicating the type of the attribute along with 764 its context or origin. 765 """ 766 767 def __init__(self, context, type): 768 self.context = context 769 self.type = type 770 771 def __eq__(self, other): 772 return hasattr(other, "type") and other.type == self.type or other == self.type 773 774 def __repr__(self): 775 return "Attribute(%s, %s)" % (repr(self.context), repr(self.type)) 776 777 class Self: 778 def __init__(self, attribute): 779 self.types = [attribute] 780 781 def combine(target, additions): 782 for addition in additions: 783 if addition not in target: 784 target.append(addition) 785 786 def find_attributes(structure, name): 787 788 """ 789 Find for the given 'structure' all attributes for the given 'name', visiting 790 base classes where appropriate and returning the attributes in order of 791 descending precedence for all possible base classes. 792 793 The elements in the result list are 2-tuples which contain the attribute and 794 the structure involved in accessing the attribute. 795 """ 796 797 # First attempt to search the instance/class namespace. 798 799 try: 800 l = structure.namespace.load(name) 801 attributes = [] 802 for attribute in l: 803 attributes.append((attribute, structure)) 804 805 # If that does not work, attempt to investigate any class or base classes. 806 807 except KeyError: 808 attributes = [] 809 810 # Investigate any instance's implementing class. 811 812 if isinstance(structure, Instance): 813 for attr in structure.namespace.load("__class__"): 814 cls = attr.type 815 l = get_attributes(cls, name) 816 combine(attributes, l) 817 818 # Investigate any class's base classes. 819 820 elif isinstance(structure, Class): 821 822 # If no base classes exist, return an indicator that no attribute 823 # exists. 824 825 if not structure.base_refs: 826 return [(None, structure)] 827 828 # Otherwise, find all possible base classes. 829 830 for base_refs in structure.base_refs: 831 base_attributes = [] 832 833 # For each base class, find attributes either in the base 834 # class or its own base classes. 835 836 for base_ref in base_refs: 837 l = get_attributes(base_ref, name) 838 combine(base_attributes, l) 839 840 combine(attributes, base_attributes) 841 842 return attributes 843 844 def get_attributes(structure, name): 845 846 """ 847 Return all possible attributes for the given 'structure' having the given 848 'name', wrapping each attribute in an Attribute object which includes 849 context information for the attribute access. 850 851 The elements in the result list are 2-tuples which contain the attribute and 852 the structure involved in accessing the attribute. 853 """ 854 855 if isinstance(structure, Attribute): 856 structure = structure.type 857 results = [] 858 for attribute, accessor in find_attributes(structure, name): 859 if attribute is not None and isinstance(structure, Structure): 860 results.append((Attribute(structure, attribute.type), accessor)) 861 else: 862 results.append((attribute, accessor)) 863 return results 864 865 # Convenience functions. 866 867 def annotate(module, builtins=None): 868 869 """ 870 Annotate the given 'module', also employing the optional 'builtins' module, 871 if specified. 872 """ 873 874 annotator = Annotator() 875 if builtins is not None: 876 annotator.process(module, builtins) 877 else: 878 annotator.process(module) 879 880 def annotate_all(modules, builtins): 881 annotate(builtins) 882 for module in modules: 883 annotate(module, builtins) 884 885 # vim: tabstop=4 expandtab shiftwidth=4