1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 def get_parent_path(self): 190 return ".".join([self.name] + self.namespace_path[:-1]) 191 192 # Namespace management. 193 194 def enter_namespace(self, name): 195 196 "Enter the namespace having the given 'name'." 197 198 self.namespace_path.append(name) 199 200 def exit_namespace(self): 201 202 "Exit the current namespace." 203 204 self.namespace_path.pop() 205 206 # Constant reference naming. 207 208 def get_constant_name(self, value, value_type, encoding=None): 209 210 """ 211 Add a new constant to the current namespace for 'value' with 212 'value_type'. 213 """ 214 215 path = self.get_namespace_path() 216 init_item(self.constants, path, dict) 217 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 218 219 # Literal reference naming. 220 221 def get_literal_name(self): 222 223 "Add a new literal to the current namespace." 224 225 path = self.get_namespace_path() 226 init_item(self.literals, path, lambda: 0) 227 return "$C%d" % self.literals[path] 228 229 def next_literal(self): 230 self.literals[self.get_namespace_path()] += 1 231 232 # Temporary variable naming. 233 234 def get_temporary_name(self): 235 path = self.get_namespace_path() 236 init_item(self.temp, path, lambda: 0) 237 return "$t%d" % self.temp[path] 238 239 def next_temporary(self): 240 self.temp[self.get_namespace_path()] += 1 241 242 # Arbitrary function naming. 243 244 def get_lambda_name(self): 245 path = self.get_namespace_path() 246 init_item(self.lambdas, path, lambda: 0) 247 name = "$l%d" % self.lambdas[path] 248 self.lambdas[path] += 1 249 return name 250 251 def reset_lambdas(self): 252 self.lambdas = {} 253 254 # Constant and literal recording. 255 256 def get_constant_value(self, value, literals=None): 257 258 """ 259 Encode the 'value' if appropriate, returning a value, a typename and any 260 encoding. 261 """ 262 263 if isinstance(value, unicode): 264 return value.encode("utf-8"), "unicode", self.encoding 265 266 # Attempt to convert plain strings to text. 267 268 elif isinstance(value, str) and self.encoding: 269 try: 270 return get_string_details(literals, self.encoding) 271 except UnicodeDecodeError: 272 pass 273 274 return value, value.__class__.__name__, None 275 276 def get_constant_reference(self, ref, value, encoding=None): 277 278 """ 279 Return a constant reference for the given 'ref' type and 'value', with 280 the optional 'encoding' applying to text values. 281 """ 282 283 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 284 285 # Return a reference for the constant. 286 287 objpath = self.get_object_path(constant_name) 288 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 289 290 # Record the value and type for the constant. 291 292 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 293 return name_ref 294 295 def reserve_constant(self, objpath, value, origin, encoding=None): 296 297 """ 298 Reserve a constant within 'objpath' with the given 'value' and having a 299 type with the given 'origin', with the optional 'encoding' applying to 300 text values. 301 """ 302 303 constant_name = self.get_constant_name(value, origin) 304 objpath = self.get_object_path(constant_name) 305 self._reserve_constant(objpath, value, origin, encoding) 306 307 def _reserve_constant(self, objpath, value, origin, encoding): 308 309 """ 310 Store a constant for 'objpath' with the given 'value' and 'origin', with 311 the optional 'encoding' applying to text values. 312 """ 313 314 self.constant_values[objpath] = value, origin, encoding 315 316 def get_literal_reference(self, name, ref, items, cls): 317 318 """ 319 Return a literal reference for the given type 'name', literal 'ref', 320 node 'items' and employing the given 'cls' as the class of the returned 321 reference object. 322 """ 323 324 # Construct an invocation using the items as arguments. 325 326 typename = "$L%s" % name 327 328 invocation = compiler.ast.CallFunc( 329 compiler.ast.Name(typename), 330 items 331 ) 332 333 # Get a name for the actual literal. 334 335 instname = self.get_literal_name() 336 self.next_literal() 337 338 # Record the type for the literal. 339 340 objpath = self.get_object_path(instname) 341 self.literal_types[objpath] = ref.get_origin() 342 343 # Return a wrapper for the invocation exposing the items. 344 345 return cls( 346 instname, 347 ref.instance_of(), 348 self.process_structure_node(invocation), 349 invocation.args 350 ) 351 352 # Node handling. 353 354 def process_structure(self, node): 355 356 """ 357 Within the given 'node', process the program structure. 358 359 During inspection, this will process global declarations, adjusting the 360 module namespace, and import statements, building a module dependency 361 hierarchy. 362 363 During translation, this will consult deduced program information and 364 output translated code. 365 """ 366 367 l = [] 368 for n in node.getChildNodes(): 369 l.append(self.process_structure_node(n)) 370 return l 371 372 def process_augassign_node(self, n): 373 374 "Process the given augmented assignment node 'n'." 375 376 op = operator_functions[n.op] 377 378 if isinstance(n.node, compiler.ast.Getattr): 379 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 380 elif isinstance(n.node, compiler.ast.Name): 381 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 382 else: 383 target = n.node 384 385 assignment = compiler.ast.Assign( 386 [target], 387 compiler.ast.CallFunc( 388 compiler.ast.Name("$op%s" % op), 389 [n.node, n.expr])) 390 391 return self.process_structure_node(assignment) 392 393 def process_assignment_for_object(self, original_name, source): 394 395 """ 396 Return an assignment operation making 'original_name' refer to the given 397 'source'. 398 """ 399 400 assignment = compiler.ast.Assign( 401 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 402 source 403 ) 404 405 return self.process_structure_node(assignment) 406 407 def process_assignment_node_items(self, n, expr): 408 409 """ 410 Process the given assignment node 'n' whose children are to be assigned 411 items of 'expr'. 412 """ 413 414 name_ref = self.process_structure_node(expr) 415 416 # Either unpack the items and present them directly to each assignment 417 # node. 418 419 if isinstance(name_ref, LiteralSequenceRef) and \ 420 self.process_literal_sequence_items(n, name_ref): 421 422 pass 423 424 # Or have the assignment nodes access each item via the sequence API. 425 426 else: 427 self.process_assignment_node_items_by_position(n, expr, name_ref) 428 429 def process_assignment_node_items_by_position(self, n, expr, name_ref): 430 431 """ 432 Process the given sequence assignment node 'n', converting the node to 433 the separate assignment of each target using positional access on a 434 temporary variable representing the sequence. Use 'expr' as the assigned 435 value and 'name_ref' as the reference providing any existing temporary 436 variable. 437 """ 438 439 assignments = [] 440 441 # Employ existing names to access the sequence. 442 # Literal sequences do not provide names of accessible objects. 443 444 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 445 temp = name_ref.name 446 447 # For other expressions, create a temporary name to reference the items. 448 449 else: 450 temp = self.get_temporary_name() 451 self.next_temporary() 452 453 assignments.append( 454 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 455 ) 456 457 # Assign the items to the target nodes. 458 459 for i, node in enumerate(n.nodes): 460 assignments.append( 461 compiler.ast.Assign([node], compiler.ast.Subscript( 462 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 463 ) 464 465 return self.process_structure_node(compiler.ast.Stmt(assignments)) 466 467 def process_literal_sequence_items(self, n, name_ref): 468 469 """ 470 Process the given assignment node 'n', obtaining from the given 471 'name_ref' the items to be assigned to the assignment targets. 472 473 Return whether this method was able to process the assignment node as 474 a sequence of direct assignments. 475 """ 476 477 if len(n.nodes) == len(name_ref.items): 478 assigned_names, count = get_names_from_nodes(n.nodes) 479 accessed_names, _count = get_names_from_nodes(name_ref.items) 480 481 # Only assign directly between items if all assigned names are 482 # plain names (not attribute assignments), and if the assigned names 483 # do not appear in the accessed names. 484 485 if len(assigned_names) == count and \ 486 not assigned_names.intersection(accessed_names): 487 488 for node, item in zip(n.nodes, name_ref.items): 489 self.process_assignment_node(node, item) 490 491 return True 492 493 # Otherwise, use the position-based mechanism to obtain values. 494 495 else: 496 return False 497 else: 498 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 499 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 500 501 def process_compare_node(self, n): 502 503 """ 504 Process the given comparison node 'n', converting an operator sequence 505 from... 506 507 <expr1> <op1> <expr2> <op2> <expr3> 508 509 ...to... 510 511 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 512 """ 513 514 invocations = [] 515 last = n.expr 516 517 for op, op_node in n.ops: 518 op = operator_functions.get(op) 519 520 invocations.append(compiler.ast.CallFunc( 521 compiler.ast.Name("$op%s" % op), 522 [last, op_node])) 523 524 last = op_node 525 526 if len(invocations) > 1: 527 result = compiler.ast.And(invocations) 528 else: 529 result = invocations[0] 530 531 return self.process_structure_node(result) 532 533 def process_dict_node(self, node): 534 535 """ 536 Process the given dictionary 'node', returning a list of (key, value) 537 tuples. 538 """ 539 540 l = [] 541 for key, value in node.items: 542 l.append(( 543 self.process_structure_node(key), 544 self.process_structure_node(value))) 545 return l 546 547 def process_for_node(self, n): 548 549 """ 550 Generate attribute accesses for {n.list}.__iter__ and the next method on 551 the iterator, producing a replacement node for the original. 552 """ 553 554 t0 = self.get_temporary_name() 555 self.next_temporary() 556 t1 = self.get_temporary_name() 557 self.next_temporary() 558 559 node = compiler.ast.Stmt([ 560 561 # <t0> = {n.list} 562 # <t1> = <t0>.__iter__() 563 564 compiler.ast.Assign( 565 [compiler.ast.AssName(t0, "OP_ASSIGN")], 566 n.list), 567 568 compiler.ast.Assign( 569 [compiler.ast.AssName(t1, "OP_ASSIGN")], 570 compiler.ast.CallFunc( 571 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 572 [])), 573 574 # try: 575 # while True: 576 # <var>... = <t1>.next() 577 # ... 578 # except StopIteration: 579 # pass 580 581 compiler.ast.TryExcept( 582 compiler.ast.While( 583 compiler.ast.Name("True"), 584 compiler.ast.Stmt([ 585 compiler.ast.Assign( 586 [n.assign], 587 compiler.ast.CallFunc( 588 compiler.ast.Getattr(compiler.ast.Name(t1), "next"), 589 [] 590 )), 591 n.body]), 592 None), 593 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 594 None) 595 ]) 596 597 self.process_structure_node(node) 598 599 def process_literal_sequence_node(self, n, name, ref, cls): 600 601 """ 602 Process the given literal sequence node 'n' as a function invocation, 603 with 'name' indicating the type of the sequence, and 'ref' being a 604 reference to the type. The 'cls' is used to instantiate a suitable name 605 reference. 606 """ 607 608 if name == "dict": 609 items = [] 610 for key, value in n.items: 611 items.append(compiler.ast.Tuple([key, value])) 612 else: # name in ("list", "tuple"): 613 items = n.nodes 614 615 return self.get_literal_reference(name, ref, items, cls) 616 617 def process_operator_node(self, n): 618 619 """ 620 Process the given operator node 'n' as an operator function invocation. 621 """ 622 623 op = operator_functions[n.__class__.__name__] 624 invocation = compiler.ast.CallFunc( 625 compiler.ast.Name("$op%s" % op), 626 list(n.getChildNodes()) 627 ) 628 return self.process_structure_node(invocation) 629 630 def process_print_node(self, n): 631 632 """ 633 Process the given print node 'n' as an invocation on a stream of the 634 form... 635 636 $print(dest, args, nl) 637 638 The special function name will be translated elsewhere. 639 """ 640 641 nl = isinstance(n, compiler.ast.Printnl) 642 invocation = compiler.ast.CallFunc( 643 compiler.ast.Name("$print"), 644 [n.dest or compiler.ast.Name("None"), 645 compiler.ast.List(list(n.nodes)), 646 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 647 ) 648 return self.process_structure_node(invocation) 649 650 def process_slice_node(self, n, expr=None): 651 652 """ 653 Process the given slice node 'n' as a method invocation. 654 """ 655 656 if n.flags == "OP_ASSIGN": op = "__setslice__" 657 elif n.flags == "OP_DELETE": op = "__delslice__" 658 else: op = "__getslice__" 659 660 invocation = compiler.ast.CallFunc( 661 compiler.ast.Getattr(n.expr, op), 662 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 663 (expr and [expr] or []) 664 ) 665 666 # Fix parse tree structure. 667 668 if op == "__delslice__": 669 invocation = compiler.ast.Discard(invocation) 670 671 return self.process_structure_node(invocation) 672 673 def process_sliceobj_node(self, n): 674 675 """ 676 Process the given slice object node 'n' as a slice constructor. 677 """ 678 679 op = "slice" 680 invocation = compiler.ast.CallFunc( 681 compiler.ast.Name("$op%s" % op), 682 n.nodes 683 ) 684 return self.process_structure_node(invocation) 685 686 def process_subscript_node(self, n, expr=None): 687 688 """ 689 Process the given subscript node 'n' as a method invocation. 690 """ 691 692 if n.flags == "OP_ASSIGN": op = "__setitem__" 693 elif n.flags == "OP_DELETE": op = "__delitem__" 694 else: op = "__getitem__" 695 696 invocation = compiler.ast.CallFunc( 697 compiler.ast.Getattr(n.expr, op), 698 list(n.subs) + (expr and [expr] or []) 699 ) 700 701 # Fix parse tree structure. 702 703 if op == "__delitem__": 704 invocation = compiler.ast.Discard(invocation) 705 706 return self.process_structure_node(invocation) 707 708 def process_attribute_chain(self, n): 709 710 """ 711 Process the given attribute access node 'n'. Return a reference 712 describing the expression. 713 """ 714 715 # AssAttr/Getattr are nested with the outermost access being the last 716 # access in any chain. 717 718 self.attrs.insert(0, n.attrname) 719 attrs = self.attrs 720 721 # Break attribute chains where non-access nodes are found. 722 723 if not self.have_access_expression(n): 724 self.reset_attribute_chain() 725 726 # Descend into the expression, extending backwards any existing chain, 727 # or building another for the expression. 728 729 name_ref = self.process_structure_node(n.expr) 730 731 # Restore chain information applying to this node. 732 733 if not self.have_access_expression(n): 734 self.restore_attribute_chain(attrs) 735 736 # Return immediately if the expression was another access and thus a 737 # continuation backwards along the chain. The above processing will 738 # have followed the chain all the way to its conclusion. 739 740 if self.have_access_expression(n): 741 del self.attrs[0] 742 743 return name_ref 744 745 # Attribute chain handling. 746 747 def reset_attribute_chain(self): 748 749 "Reset the attribute chain for a subexpression of an attribute access." 750 751 self.attrs = [] 752 self.chain_assignment.append(self.in_assignment) 753 self.chain_invocation.append(self.in_invocation) 754 self.in_assignment = None 755 self.in_invocation = None 756 757 def restore_attribute_chain(self, attrs): 758 759 "Restore the attribute chain for an attribute access." 760 761 self.attrs = attrs 762 self.in_assignment = self.chain_assignment.pop() 763 self.in_invocation = self.chain_invocation.pop() 764 765 def have_access_expression(self, node): 766 767 "Return whether the expression associated with 'node' is Getattr." 768 769 return isinstance(node.expr, compiler.ast.Getattr) 770 771 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 772 773 """ 774 Return the name to be used for attribute usage observations involving 775 the given 'name' in the current namespace. 776 777 If the name is being used outside a function, and if 'name_ref' is 778 given and indicates a global or if 'is_global' is specified as a true 779 value, a path featuring the name in the global namespace is returned. 780 Otherwise, a path computed using the current namespace and the given 781 name is returned. 782 783 The intention of this method is to provide a suitably-qualified name 784 that can be tracked across namespaces. Where globals are being 785 referenced in class namespaces, they should be referenced using their 786 path within the module, not using a path within each class. 787 788 It may not be possible to identify a global within a function at the 789 time of inspection (since a global may appear later in a file). 790 Consequently, globals are identified by their local name rather than 791 their module-qualified path. 792 """ 793 794 # For functions, use the appropriate local names. 795 796 if self.in_function: 797 return name 798 799 # For global names outside functions, use a global name. 800 801 elif is_global or name_ref and name_ref.is_global_name(): 802 return self.get_global_path(name) 803 804 # Otherwise, establish a name in the current namespace. 805 806 else: 807 return self.get_object_path(name) 808 809 def get_path_for_access(self): 810 811 "Outside functions, register accesses at the module level." 812 813 if not self.in_function: 814 return self.name 815 else: 816 return self.get_namespace_path() 817 818 def get_module_name(self, node): 819 820 """ 821 Using the given From 'node' in this module, calculate any relative import 822 information, returning a tuple containing a module to import along with any 823 names to import based on the node's name information. 824 825 Where the returned module is given as None, whole module imports should 826 be performed for the returned modules using the returned names. 827 """ 828 829 # Absolute import. 830 831 if node.level == 0: 832 return node.modname, node.names 833 834 # Relative to an ancestor of this module. 835 836 else: 837 path = self.name.split(".") 838 level = node.level 839 840 # Relative imports treat package roots as submodules. 841 842 if split(self.filename)[-1] == "__init__.py": 843 level -= 1 844 845 if level > len(path): 846 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 847 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 848 849 basename = ".".join(path[:len(path)-level]) 850 851 # Name imports from a module. 852 853 if node.modname: 854 return "%s.%s" % (basename, node.modname), node.names 855 856 # Relative whole module imports. 857 858 else: 859 return basename, node.names 860 861 def get_argnames(args): 862 863 """ 864 Return a list of all names provided by 'args'. Since tuples may be 865 employed, the arguments are traversed depth-first. 866 """ 867 868 l = [] 869 for arg in args: 870 if isinstance(arg, tuple): 871 l += get_argnames(arg) 872 else: 873 l.append(arg) 874 return l 875 876 def get_names_from_nodes(nodes): 877 878 """ 879 Return the names employed in the given 'nodes' along with the number of 880 nodes excluding sequences. 881 """ 882 883 names = set() 884 count = 0 885 886 for node in nodes: 887 888 # Add names and count them. 889 890 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 891 names.add(node.name) 892 count += 1 893 894 # Add names from sequences and incorporate their counts. 895 896 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 897 compiler.ast.List, compiler.ast.Set, 898 compiler.ast.Tuple)): 899 _names, _count = get_names_from_nodes(node.nodes) 900 names.update(_names) 901 count += _count 902 903 # Count non-name, non-sequence nodes. 904 905 else: 906 count += 1 907 908 return names, count 909 910 # Location classes. 911 912 class Location: 913 914 "A generic program location." 915 916 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 917 self.path = path 918 self.name = name 919 self.attrnames = attrnames 920 self.version = version 921 self.access_number = access_number 922 923 def __repr__(self): 924 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 925 926 def as_tuple(self): 927 return (self.path, self.name, self.attrnames, self.version, self.access_number) 928 929 def __hash__(self): 930 return hash(self.as_tuple()) 931 932 def __eq__(self, other): 933 return self.as_tuple() == other.as_tuple() 934 935 def __cmp__(self, other): 936 return cmp(self.as_tuple(), other.as_tuple()) 937 938 def get_attrname(self): 939 940 """ 941 Extract the first attribute from the attribute names employed in this 942 location. 943 """ 944 945 attrnames = self.attrnames 946 if not attrnames: 947 return attrnames 948 return get_attrnames(attrnames)[0] 949 950 class AccessLocation(Location): 951 952 "A specialised access location." 953 954 def __init__(self, path, name, attrnames, access_number): 955 956 """ 957 Initialise an access location featuring 'path', 'name', 'attrnames' and 958 'access_number'. 959 """ 960 961 Location.__init__(self, path, name, attrnames, None, access_number) 962 963 def __repr__(self): 964 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 965 966 # Result classes. 967 968 class InstructionSequence: 969 970 "A generic sequence of instructions." 971 972 def __init__(self, instructions): 973 self.instructions = instructions 974 975 def get_value_instruction(self): 976 return self.instructions[-1] 977 978 def get_init_instructions(self): 979 return self.instructions[:-1] 980 981 # Dictionary utilities. 982 983 def init_item(d, key, fn): 984 985 """ 986 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 987 value where no entry already exists. 988 """ 989 990 if not d.has_key(key): 991 d[key] = fn() 992 return d[key] 993 994 def dict_for_keys(d, keys): 995 996 "Return a new dictionary containing entries from 'd' for the given 'keys'." 997 998 nd = {} 999 for key in keys: 1000 if d.has_key(key): 1001 nd[key] = d[key] 1002 return nd 1003 1004 def make_key(s): 1005 1006 "Make sequence 's' into a tuple-based key, first sorting its contents." 1007 1008 l = list(s) 1009 l.sort() 1010 return tuple(l) 1011 1012 def add_counter_item(d, key): 1013 1014 """ 1015 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1016 maintaining a mapping of keys to their order of insertion. 1017 """ 1018 1019 if not d.has_key(key): 1020 d[key] = len(d.keys()) 1021 return d[key] 1022 1023 def remove_items(d1, d2): 1024 1025 "Remove from 'd1' all items from 'd2'." 1026 1027 for key in d2.keys(): 1028 if d1.has_key(key): 1029 del d1[key] 1030 1031 # Set utilities. 1032 1033 def first(s): 1034 return list(s)[0] 1035 1036 def same(s1, s2): 1037 return set(s1) == set(s2) 1038 1039 def order_dependencies(all_depends): 1040 1041 """ 1042 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1043 has the form "A depends on B, C...". The result will order A, B, C, and so 1044 on. 1045 """ 1046 1047 usage = init_reverse_dependencies(all_depends) 1048 1049 # Produce an ordering by obtaining exposed items (required by items already 1050 # processed) and putting them at the start of the list. 1051 1052 ordered = [] 1053 1054 while usage: 1055 have_next = False 1056 1057 for key, n in usage.items(): 1058 1059 # Add items needed by no other items to the ordering. 1060 1061 if not n: 1062 remove_dependency(key, all_depends, usage, ordered) 1063 have_next = True 1064 1065 if not have_next: 1066 raise ValueError, usage 1067 1068 return ordered 1069 1070 def order_dependencies_partial(all_depends): 1071 1072 """ 1073 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1074 has the form "A depends on B, C...". The result will order A, B, C, and so 1075 on. Where cycles exist, they will be broken and a partial ordering returned. 1076 """ 1077 1078 usage = init_reverse_dependencies(all_depends) 1079 1080 # Duplicate the dependencies for subsequent modification. 1081 1082 new_depends = {} 1083 for key, values in all_depends.items(): 1084 new_depends[key] = set(values) 1085 1086 all_depends = new_depends 1087 1088 # Produce an ordering by obtaining exposed items (required by items already 1089 # processed) and putting them at the start of the list. 1090 1091 ordered = [] 1092 1093 while usage: 1094 least = None 1095 least_key = None 1096 1097 for key, n in usage.items(): 1098 1099 # Add items needed by no other items to the ordering. 1100 1101 if not n: 1102 remove_dependency(key, all_depends, usage, ordered) 1103 least = 0 1104 1105 # When breaking cycles, note the least used items. 1106 1107 elif least is None or len(n) < least: 1108 least_key = key 1109 least = len(n) 1110 1111 if least: 1112 transfer_dependencies(least_key, all_depends, usage, ordered) 1113 1114 return ordered 1115 1116 def init_reverse_dependencies(all_depends): 1117 1118 """ 1119 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1120 record the reverse dependencies, making a mapping of the form 1121 "B is needed by A", "C is needed by A", and so on. 1122 """ 1123 1124 usage = {} 1125 1126 # Record path-based dependencies. 1127 1128 for key in all_depends.keys(): 1129 usage[key] = set() 1130 1131 for key, depends in all_depends.items(): 1132 for depend in depends: 1133 init_item(usage, depend, set) 1134 usage[depend].add(key) 1135 1136 return usage 1137 1138 def transfer_dependencies(key, all_depends, usage, ordered): 1139 1140 """ 1141 Transfer items needed by 'key' to those items needing 'key', found using 1142 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1143 collection of dependencies. 1144 1145 If "A is needed by X" and "B is needed by A", then transferring items needed 1146 by A will cause "B is needed by X" to be recorded as a consequence. 1147 1148 Transferring items also needs to occur in the reverse mapping, so that 1149 "A needs B" and "X needs A", then the consequence must be recorded as 1150 "X needs B". 1151 """ 1152 1153 ordered.insert(0, key) 1154 1155 needing = usage[key] # A is needed by X 1156 needed = all_depends.get(key) # A needs B 1157 1158 if needing: 1159 for depend in needing: 1160 l = all_depends.get(depend) 1161 if not l: 1162 continue 1163 1164 l.remove(key) # X needs (A) 1165 1166 if needed: 1167 l.update(needed) # X needs B... 1168 1169 # Prevent self references. 1170 1171 if depend in needed: 1172 l.remove(depend) 1173 1174 if needed: 1175 for depend in needed: 1176 l = usage.get(depend) 1177 if not l: 1178 continue 1179 1180 l.remove(key) # B is needed by (A) 1181 l.update(needing) # B is needed by X... 1182 1183 # Prevent self references. 1184 1185 if depend in needing: 1186 l.remove(depend) 1187 1188 if needed: 1189 del all_depends[key] 1190 del usage[key] 1191 1192 def remove_dependency(key, all_depends, usage, ordered): 1193 1194 """ 1195 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1196 'ordered' collection of dependencies. 1197 1198 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1199 upon removing A from 'usage', the outcome is that all keys needing A will 1200 have A removed from their 'usage' records. 1201 1202 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1203 as a consequence. 1204 """ 1205 1206 ordered.insert(0, key) 1207 1208 depends = all_depends.get(key) 1209 1210 # Reduce usage of the referenced items. 1211 1212 if depends: 1213 for depend in depends: 1214 usage[depend].remove(key) 1215 1216 del usage[key] 1217 1218 # General input/output. 1219 1220 def readfile(filename): 1221 1222 "Return the contents of 'filename'." 1223 1224 f = open(filename) 1225 try: 1226 return f.read() 1227 finally: 1228 f.close() 1229 1230 def writefile(filename, s): 1231 1232 "Write to 'filename' the string 's'." 1233 1234 f = open(filename, "w") 1235 try: 1236 f.write(s) 1237 finally: 1238 f.close() 1239 1240 # General encoding. 1241 1242 def sorted_output(x): 1243 1244 "Sort sequence 'x' and return a string with commas separating the values." 1245 1246 x = map(str, x) 1247 x.sort() 1248 return ", ".join(x) 1249 1250 def get_string_details(literals, encoding): 1251 1252 """ 1253 Determine whether 'literals' represent Unicode strings or byte strings, 1254 using 'encoding' to reproduce byte sequences. 1255 1256 Each literal is the full program representation including prefix and quotes 1257 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1258 string needs to be translated back to its original encoding. 1259 1260 Return a single encoded literal value, a type name, and the original 1261 encoding as a tuple. 1262 """ 1263 1264 typename = "unicode" 1265 1266 l = [] 1267 1268 for s in literals: 1269 out, _typename = get_literal_details(s) 1270 if _typename == "str": 1271 typename = "str" 1272 l.append(out) 1273 1274 out = "".join(l) 1275 1276 # For Unicode values, convert to the UTF-8 program representation. 1277 1278 if typename == "unicode": 1279 return out.encode("utf-8"), typename, encoding 1280 1281 # For byte string values, convert back to the original encoding. 1282 1283 else: 1284 return out.encode(encoding), typename, encoding 1285 1286 def get_literal_details(s): 1287 1288 """ 1289 Determine whether 's' represents a Unicode string or a byte string, where 1290 's' contains the full program representation of a literal including prefix 1291 and quotes, recoded by the parser to UTF-8. 1292 1293 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1294 and byte or Unicode values starting with <backslash><octal digit> or 1295 <backslash>x. 1296 1297 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1298 to be considered as Unicode values. Otherwise, they produce byte values and 1299 cause unprefixed strings to be considered as byte strings. 1300 1301 Literals prefixed with "r" do not have their backslash-encoded values 1302 converted unless also prefixed with "u", in which case only the above value 1303 formats are converted, not any of the other special sequences for things 1304 like newlines. 1305 1306 Return the literal value as a Unicode object together with the appropriate 1307 type name in a tuple. 1308 """ 1309 1310 l = [] 1311 1312 # Identify the quote character and use it to identify the prefix. 1313 1314 quote_type = s[-1] 1315 prefix_end = s.find(quote_type) 1316 prefix = s[:prefix_end].lower() 1317 1318 if prefix not in ("", "b", "br", "r", "u", "ur"): 1319 raise ValueError, "String literal does not have a supported prefix: %s" % s 1320 1321 if "b" in prefix: 1322 typename = "str" 1323 else: 1324 typename = "unicode" 1325 1326 # Identify triple quotes or single quotes. 1327 1328 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1329 quote = s[prefix_end:prefix_end+3] 1330 current = prefix_end + 3 1331 end = len(s) - 3 1332 else: 1333 quote = s[prefix_end] 1334 current = prefix_end + 1 1335 end = len(s) - 1 1336 1337 # Conversions of some quoted values. 1338 1339 searches = { 1340 "u" : (6, 16), 1341 "U" : (10, 16), 1342 "x" : (4, 16), 1343 } 1344 1345 octal_digits = map(str, range(0, 8)) 1346 1347 # Translations of some quoted values. 1348 1349 escaped = { 1350 "\\" : "\\", "'" : "'", '"' : '"', 1351 "a" : "\a", "b" : "\b", "f" : "\f", 1352 "n" : "\n", "r" : "\r", "t" : "\t", 1353 } 1354 1355 while current < end: 1356 1357 # Look for quoted values. 1358 1359 index = s.find("\\", current) 1360 if index == -1 or index + 1 == end: 1361 l.append(s[current:end]) 1362 break 1363 1364 # Add the preceding text. 1365 1366 l.append(s[current:index]) 1367 1368 # Handle quoted text. 1369 1370 term = s[index+1] 1371 1372 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1373 # produce Unicode values. 1374 1375 if typename == "unicode" and ( 1376 term in ("u", "U") or 1377 "u" in prefix and (term == "x" or term in octal_digits)): 1378 1379 needed, base = searches.get(term, (4, 8)) 1380 value = convert_quoted_value(s, index, needed, end, base, unichr) 1381 l.append(value) 1382 current = index + needed 1383 1384 # Add raw byte values, changing the string type. 1385 1386 elif "r" not in prefix and ( 1387 term == "x" or term in octal_digits): 1388 1389 needed, base = searches.get(term, (4, 8)) 1390 value = convert_quoted_value(s, index, needed, end, base, chr) 1391 l.append(value) 1392 typename = "str" 1393 current = index + needed 1394 1395 # Add other escaped values. 1396 1397 elif "r" not in prefix and escaped.has_key(term): 1398 l.append(escaped[term]) 1399 current = index + 2 1400 1401 # Add other text as found. 1402 1403 else: 1404 l.append(s[index:index+2]) 1405 current = index + 2 1406 1407 # Collect the components into a single Unicode object. Since the literal 1408 # text was already in UTF-8 form, interpret plain strings as UTF-8 1409 # sequences. 1410 1411 out = [] 1412 1413 for value in l: 1414 if isinstance(value, unicode): 1415 out.append(value) 1416 else: 1417 out.append(unicode(value, "utf-8")) 1418 1419 return "".join(out), typename 1420 1421 def convert_quoted_value(s, index, needed, end, base, fn): 1422 1423 """ 1424 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1425 positions, and with the given 'end' indicating the first position after the 1426 end of the actual string content. 1427 1428 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1429 to convert the value to an appropriate type. 1430 """ 1431 1432 s = s[index:min(index+needed, end)] 1433 1434 # Not a complete occurrence. 1435 1436 if len(s) < needed: 1437 return s 1438 1439 # Test for a well-formed value. 1440 1441 try: 1442 first = base == 8 and 1 or 2 1443 value = int(s[first:needed], base) 1444 except ValueError: 1445 return s 1446 else: 1447 return fn(value) 1448 1449 # Attribute chain decoding. 1450 1451 def get_attrnames(attrnames): 1452 1453 """ 1454 Split the qualified attribute chain 'attrnames' into its components, 1455 handling special attributes starting with "#" that indicate type 1456 conformance. 1457 """ 1458 1459 if attrnames.startswith("#"): 1460 return [attrnames] 1461 else: 1462 return attrnames.split(".") 1463 1464 def get_name_path(path, name): 1465 1466 "Return a suitable qualified name from the given 'path' and 'name'." 1467 1468 if "." in name: 1469 return name 1470 else: 1471 return "%s.%s" % (path, name) 1472 1473 # Usage-related functions. 1474 1475 def get_types_for_usage(attrnames, objects): 1476 1477 """ 1478 Identify the types that can support the given 'attrnames', using the 1479 given 'objects' as the catalogue of type details. 1480 """ 1481 1482 types = [] 1483 for name, _attrnames in objects.items(): 1484 if set(attrnames).issubset(_attrnames): 1485 types.append(name) 1486 return types 1487 1488 def get_invoked_attributes(usage): 1489 1490 "Obtain invoked attribute from the given 'usage'." 1491 1492 invoked = [] 1493 if usage: 1494 for attrname, invocation, assignment in usage: 1495 if invocation: 1496 invoked.append(attrname) 1497 return invoked 1498 1499 def get_assigned_attributes(usage): 1500 1501 "Obtain assigned attribute from the given 'usage'." 1502 1503 assigned = [] 1504 if usage: 1505 for attrname, invocation, assignment in usage: 1506 if assignment: 1507 assigned.append(attrname) 1508 return assigned 1509 1510 # Type and module functions. 1511 # NOTE: This makes assumptions about the __builtins__ structure. 1512 1513 def get_builtin_module(name): 1514 1515 "Return the module name containing the given type 'name'." 1516 1517 if name == "string": 1518 modname = "str" 1519 elif name == "utf8string": 1520 modname = "unicode" 1521 elif name == "NoneType": 1522 modname = "none" 1523 else: 1524 modname = name 1525 1526 return "__builtins__.%s" % modname 1527 1528 def get_builtin_type(name): 1529 1530 "Return the type name provided by the given Python value 'name'." 1531 1532 if name == "str": 1533 return "string" 1534 elif name == "unicode": 1535 return "utf8string" 1536 else: 1537 return name 1538 1539 def get_builtin_class(name): 1540 1541 "Return the full name of the built-in class having the given 'name'." 1542 1543 typename = get_builtin_type(name) 1544 module = get_builtin_module(typename) 1545 return "%s.%s" % (module, typename) 1546 1547 # Useful data. 1548 1549 predefined_constants = "False", "None", "NotImplemented", "True" 1550 1551 operator_functions = { 1552 1553 # Fundamental operations. 1554 1555 "is" : "is_", 1556 "is not" : "is_not", 1557 1558 # Binary operations. 1559 1560 "in" : "in_", 1561 "not in" : "not_in", 1562 "Add" : "add", 1563 "Bitand" : "and_", 1564 "Bitor" : "or_", 1565 "Bitxor" : "xor", 1566 "Div" : "div", 1567 "FloorDiv" : "floordiv", 1568 "LeftShift" : "lshift", 1569 "Mod" : "mod", 1570 "Mul" : "mul", 1571 "Power" : "pow", 1572 "RightShift" : "rshift", 1573 "Sub" : "sub", 1574 1575 # Unary operations. 1576 1577 "Invert" : "invert", 1578 "UnaryAdd" : "pos", 1579 "UnarySub" : "neg", 1580 1581 # Augmented assignment. 1582 1583 "+=" : "iadd", 1584 "-=" : "isub", 1585 "*=" : "imul", 1586 "/=" : "idiv", 1587 "//=" : "ifloordiv", 1588 "%=" : "imod", 1589 "**=" : "ipow", 1590 "<<=" : "ilshift", 1591 ">>=" : "irshift", 1592 "&=" : "iand", 1593 "^=" : "ixor", 1594 "|=" : "ior", 1595 1596 # Comparisons. 1597 1598 "==" : "eq", 1599 "!=" : "ne", 1600 "<" : "lt", 1601 "<=" : "le", 1602 ">=" : "ge", 1603 ">" : "gt", 1604 } 1605 1606 # vim: tabstop=4 expandtab shiftwidth=4