1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 def get_parent_path(self): 190 return ".".join([self.name] + self.namespace_path[:-1]) 191 192 # Namespace management. 193 194 def enter_namespace(self, name): 195 196 "Enter the namespace having the given 'name'." 197 198 self.namespace_path.append(name) 199 200 def exit_namespace(self): 201 202 "Exit the current namespace." 203 204 self.namespace_path.pop() 205 206 # Constant reference naming. 207 208 def get_constant_name(self, value, value_type, encoding=None): 209 210 """ 211 Add a new constant to the current namespace for 'value' with 212 'value_type'. 213 """ 214 215 path = self.get_namespace_path() 216 init_item(self.constants, path, dict) 217 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 218 219 # Literal reference naming. 220 221 def get_literal_name(self): 222 223 "Add a new literal to the current namespace." 224 225 path = self.get_namespace_path() 226 init_item(self.literals, path, lambda: 0) 227 return "$C%d" % self.literals[path] 228 229 def next_literal(self): 230 self.literals[self.get_namespace_path()] += 1 231 232 # Temporary variable naming. 233 234 def get_temporary_name(self): 235 path = self.get_namespace_path() 236 init_item(self.temp, path, lambda: 0) 237 return "$t%d" % self.temp[path] 238 239 def next_temporary(self): 240 self.temp[self.get_namespace_path()] += 1 241 242 # Arbitrary function naming. 243 244 def get_lambda_name(self): 245 path = self.get_namespace_path() 246 init_item(self.lambdas, path, lambda: 0) 247 name = "$l%d" % self.lambdas[path] 248 self.lambdas[path] += 1 249 return name 250 251 def reset_lambdas(self): 252 self.lambdas = {} 253 254 # Constant and literal recording. 255 256 def get_constant_value(self, value, literals=None): 257 258 """ 259 Encode the 'value' if appropriate, returning a value, a typename and any 260 encoding. 261 """ 262 263 if isinstance(value, unicode): 264 return value.encode("utf-8"), "unicode", self.encoding 265 266 # Attempt to convert plain strings to text. 267 268 elif isinstance(value, str) and self.encoding: 269 try: 270 return get_string_details(literals, self.encoding) 271 except UnicodeDecodeError: 272 pass 273 274 return value, value.__class__.__name__, None 275 276 def get_constant_reference(self, ref, value, encoding=None): 277 278 """ 279 Return a constant reference for the given 'ref' type and 'value', with 280 the optional 'encoding' applying to text values. 281 """ 282 283 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 284 285 # Return a reference for the constant. 286 287 objpath = self.get_object_path(constant_name) 288 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 289 290 # Record the value and type for the constant. 291 292 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 293 return name_ref 294 295 def reserve_constant(self, objpath, value, origin, encoding=None): 296 297 """ 298 Reserve a constant within 'objpath' with the given 'value' and having a 299 type with the given 'origin', with the optional 'encoding' applying to 300 text values. 301 """ 302 303 constant_name = self.get_constant_name(value, origin) 304 objpath = self.get_object_path(constant_name) 305 self._reserve_constant(objpath, value, origin, encoding) 306 307 def _reserve_constant(self, objpath, value, origin, encoding): 308 309 """ 310 Store a constant for 'objpath' with the given 'value' and 'origin', with 311 the optional 'encoding' applying to text values. 312 """ 313 314 self.constant_values[objpath] = value, origin, encoding 315 316 def get_literal_reference(self, name, ref, items, cls): 317 318 """ 319 Return a literal reference for the given type 'name', literal 'ref', 320 node 'items' and employing the given 'cls' as the class of the returned 321 reference object. 322 """ 323 324 # Construct an invocation using the items as arguments. 325 326 typename = "$L%s" % name 327 328 invocation = compiler.ast.CallFunc( 329 compiler.ast.Name(typename), 330 items 331 ) 332 333 # Get a name for the actual literal. 334 335 instname = self.get_literal_name() 336 self.next_literal() 337 338 # Record the type for the literal. 339 340 objpath = self.get_object_path(instname) 341 self.literal_types[objpath] = ref.get_origin() 342 343 # Return a wrapper for the invocation exposing the items. 344 345 return cls( 346 instname, 347 ref.instance_of(), 348 self.process_structure_node(invocation), 349 invocation.args 350 ) 351 352 # Node handling. 353 354 def process_structure(self, node): 355 356 """ 357 Within the given 'node', process the program structure. 358 359 During inspection, this will process global declarations, adjusting the 360 module namespace, and import statements, building a module dependency 361 hierarchy. 362 363 During translation, this will consult deduced program information and 364 output translated code. 365 """ 366 367 l = [] 368 for n in node.getChildNodes(): 369 l.append(self.process_structure_node(n)) 370 return l 371 372 def process_augassign_node(self, n): 373 374 "Process the given augmented assignment node 'n'." 375 376 op = operator_functions[n.op] 377 378 if isinstance(n.node, compiler.ast.Getattr): 379 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 380 elif isinstance(n.node, compiler.ast.Name): 381 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 382 else: 383 target = n.node 384 385 assignment = compiler.ast.Assign( 386 [target], 387 compiler.ast.CallFunc( 388 compiler.ast.Name("$op%s" % op), 389 [n.node, n.expr])) 390 391 return self.process_structure_node(assignment) 392 393 def process_assignment_for_object(self, original_name, source): 394 395 """ 396 Return an assignment operation making 'original_name' refer to the given 397 'source'. 398 """ 399 400 assignment = compiler.ast.Assign( 401 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 402 source 403 ) 404 405 return self.process_structure_node(assignment) 406 407 def process_assignment_node_items(self, n, expr): 408 409 """ 410 Process the given assignment node 'n' whose children are to be assigned 411 items of 'expr'. 412 """ 413 414 name_ref = self.process_structure_node(expr) 415 416 # Either unpack the items and present them directly to each assignment 417 # node. 418 419 if isinstance(name_ref, LiteralSequenceRef) and \ 420 self.process_literal_sequence_items(n, name_ref): 421 422 pass 423 424 # Or have the assignment nodes access each item via the sequence API. 425 426 else: 427 self.process_assignment_node_items_by_position(n, expr, name_ref) 428 429 def process_assignment_node_items_by_position(self, n, expr, name_ref): 430 431 """ 432 Process the given sequence assignment node 'n', converting the node to 433 the separate assignment of each target using positional access on a 434 temporary variable representing the sequence. Use 'expr' as the assigned 435 value and 'name_ref' as the reference providing any existing temporary 436 variable. 437 """ 438 439 assignments = [] 440 441 # Employ existing names to access the sequence. 442 # Literal sequences do not provide names of accessible objects. 443 444 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 445 temp = name_ref.name 446 447 # For other expressions, create a temporary name to reference the items. 448 449 else: 450 temp = self.get_temporary_name() 451 self.next_temporary() 452 453 assignments.append( 454 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 455 ) 456 457 # Assign the items to the target nodes. 458 459 for i, node in enumerate(n.nodes): 460 assignments.append( 461 compiler.ast.Assign([node], compiler.ast.Subscript( 462 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 463 ) 464 465 return self.process_structure_node(compiler.ast.Stmt(assignments)) 466 467 def process_literal_sequence_items(self, n, name_ref): 468 469 """ 470 Process the given assignment node 'n', obtaining from the given 471 'name_ref' the items to be assigned to the assignment targets. 472 473 Return whether this method was able to process the assignment node as 474 a sequence of direct assignments. 475 """ 476 477 if len(n.nodes) == len(name_ref.items): 478 assigned_names, count = get_names_from_nodes(n.nodes) 479 accessed_names, _count = get_names_from_nodes(name_ref.items) 480 481 # Only assign directly between items if all assigned names are 482 # plain names (not attribute assignments), and if the assigned names 483 # do not appear in the accessed names. 484 485 if len(assigned_names) == count and \ 486 not assigned_names.intersection(accessed_names): 487 488 for node, item in zip(n.nodes, name_ref.items): 489 self.process_assignment_node(node, item) 490 491 return True 492 493 # Otherwise, use the position-based mechanism to obtain values. 494 495 else: 496 return False 497 else: 498 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 499 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 500 501 def process_compare_node(self, n): 502 503 """ 504 Process the given comparison node 'n', converting an operator sequence 505 from... 506 507 <expr1> <op1> <expr2> <op2> <expr3> 508 509 ...to... 510 511 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 512 """ 513 514 invocations = [] 515 last = n.expr 516 517 for op, op_node in n.ops: 518 op = operator_functions.get(op) 519 520 invocations.append(compiler.ast.CallFunc( 521 compiler.ast.Name("$op%s" % op), 522 [last, op_node])) 523 524 last = op_node 525 526 if len(invocations) > 1: 527 result = compiler.ast.And(invocations) 528 else: 529 result = invocations[0] 530 531 return self.process_structure_node(result) 532 533 def process_dict_node(self, node): 534 535 """ 536 Process the given dictionary 'node', returning a list of (key, value) 537 tuples. 538 """ 539 540 l = [] 541 for key, value in node.items: 542 l.append(( 543 self.process_structure_node(key), 544 self.process_structure_node(value))) 545 return l 546 547 def process_for_node(self, n): 548 549 """ 550 Generate attribute accesses for {n.list}.__iter__ and the next method on 551 the iterator, producing a replacement node for the original. 552 """ 553 554 t0 = self.get_temporary_name() 555 self.next_temporary() 556 t1 = self.get_temporary_name() 557 self.next_temporary() 558 i0 = self.get_temporary_name() 559 self.next_temporary() 560 561 node = compiler.ast.Stmt([ 562 563 # <t0> = {n.list} 564 # <t1> = <t0>.__iter__() 565 566 compiler.ast.Assign( 567 [compiler.ast.AssName(t0, "OP_ASSIGN")], 568 n.list), 569 570 compiler.ast.Assign( 571 [compiler.ast.AssName(t1, "OP_ASSIGN")], 572 compiler.ast.CallFunc( 573 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 574 [])), 575 576 # try: 577 # while True: 578 # <var>... = <t1>.next() 579 # ... 580 # except StopIteration: 581 # pass 582 583 compiler.ast.TryExcept( 584 compiler.ast.While( 585 compiler.ast.Name("True"), 586 compiler.ast.Stmt([ 587 compiler.ast.Assign( 588 [n.assign], 589 compiler.ast.CallFunc( 590 compiler.ast.Getattr(compiler.ast.Name(t1), "next"), 591 [] 592 )), 593 n.body]), 594 None), 595 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 596 None) 597 ]) 598 599 self.process_structure_node(node) 600 601 def process_literal_sequence_node(self, n, name, ref, cls): 602 603 """ 604 Process the given literal sequence node 'n' as a function invocation, 605 with 'name' indicating the type of the sequence, and 'ref' being a 606 reference to the type. The 'cls' is used to instantiate a suitable name 607 reference. 608 """ 609 610 if name == "dict": 611 items = [] 612 for key, value in n.items: 613 items.append(compiler.ast.Tuple([key, value])) 614 else: # name in ("list", "tuple"): 615 items = n.nodes 616 617 return self.get_literal_reference(name, ref, items, cls) 618 619 def process_operator_node(self, n): 620 621 """ 622 Process the given operator node 'n' as an operator function invocation. 623 """ 624 625 op = operator_functions[n.__class__.__name__] 626 invocation = compiler.ast.CallFunc( 627 compiler.ast.Name("$op%s" % op), 628 list(n.getChildNodes()) 629 ) 630 return self.process_structure_node(invocation) 631 632 def process_print_node(self, n): 633 634 """ 635 Process the given print node 'n' as an invocation on a stream of the 636 form... 637 638 $print(dest, args, nl) 639 640 The special function name will be translated elsewhere. 641 """ 642 643 nl = isinstance(n, compiler.ast.Printnl) 644 invocation = compiler.ast.CallFunc( 645 compiler.ast.Name("$print"), 646 [n.dest or compiler.ast.Name("None"), 647 compiler.ast.List(list(n.nodes)), 648 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 649 ) 650 return self.process_structure_node(invocation) 651 652 def process_slice_node(self, n, expr=None): 653 654 """ 655 Process the given slice node 'n' as an operator function invocation. 656 """ 657 658 if n.flags == "OP_ASSIGN": op = "setslice" 659 elif n.flags == "OP_DELETE": op = "delslice" 660 else: op = "getslice" 661 662 invocation = compiler.ast.CallFunc( 663 compiler.ast.Name("$op%s" % op), 664 [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 665 (expr and [expr] or []) 666 ) 667 668 # Fix parse tree structure. 669 670 if op == "delslice": 671 invocation = compiler.ast.Discard(invocation) 672 673 return self.process_structure_node(invocation) 674 675 def process_sliceobj_node(self, n): 676 677 """ 678 Process the given slice object node 'n' as a slice constructor. 679 """ 680 681 op = "slice" 682 invocation = compiler.ast.CallFunc( 683 compiler.ast.Name("$op%s" % op), 684 n.nodes 685 ) 686 return self.process_structure_node(invocation) 687 688 def process_subscript_node(self, n, expr=None): 689 690 """ 691 Process the given subscript node 'n' as an operator function invocation. 692 """ 693 694 if n.flags == "OP_ASSIGN": op = "setitem" 695 elif n.flags == "OP_DELETE": op = "delitem" 696 else: op = "getitem" 697 698 invocation = compiler.ast.CallFunc( 699 compiler.ast.Name("$op%s" % op), 700 [n.expr] + list(n.subs) + (expr and [expr] or []) 701 ) 702 703 # Fix parse tree structure. 704 705 if op == "delitem": 706 invocation = compiler.ast.Discard(invocation) 707 708 return self.process_structure_node(invocation) 709 710 def process_attribute_chain(self, n): 711 712 """ 713 Process the given attribute access node 'n'. Return a reference 714 describing the expression. 715 """ 716 717 # AssAttr/Getattr are nested with the outermost access being the last 718 # access in any chain. 719 720 self.attrs.insert(0, n.attrname) 721 attrs = self.attrs 722 723 # Break attribute chains where non-access nodes are found. 724 725 if not self.have_access_expression(n): 726 self.reset_attribute_chain() 727 728 # Descend into the expression, extending backwards any existing chain, 729 # or building another for the expression. 730 731 name_ref = self.process_structure_node(n.expr) 732 733 # Restore chain information applying to this node. 734 735 if not self.have_access_expression(n): 736 self.restore_attribute_chain(attrs) 737 738 # Return immediately if the expression was another access and thus a 739 # continuation backwards along the chain. The above processing will 740 # have followed the chain all the way to its conclusion. 741 742 if self.have_access_expression(n): 743 del self.attrs[0] 744 745 return name_ref 746 747 # Attribute chain handling. 748 749 def reset_attribute_chain(self): 750 751 "Reset the attribute chain for a subexpression of an attribute access." 752 753 self.attrs = [] 754 self.chain_assignment.append(self.in_assignment) 755 self.chain_invocation.append(self.in_invocation) 756 self.in_assignment = None 757 self.in_invocation = None 758 759 def restore_attribute_chain(self, attrs): 760 761 "Restore the attribute chain for an attribute access." 762 763 self.attrs = attrs 764 self.in_assignment = self.chain_assignment.pop() 765 self.in_invocation = self.chain_invocation.pop() 766 767 def have_access_expression(self, node): 768 769 "Return whether the expression associated with 'node' is Getattr." 770 771 return isinstance(node.expr, compiler.ast.Getattr) 772 773 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 774 775 """ 776 Return the name to be used for attribute usage observations involving 777 the given 'name' in the current namespace. 778 779 If the name is being used outside a function, and if 'name_ref' is 780 given and indicates a global or if 'is_global' is specified as a true 781 value, a path featuring the name in the global namespace is returned. 782 Otherwise, a path computed using the current namespace and the given 783 name is returned. 784 785 The intention of this method is to provide a suitably-qualified name 786 that can be tracked across namespaces. Where globals are being 787 referenced in class namespaces, they should be referenced using their 788 path within the module, not using a path within each class. 789 790 It may not be possible to identify a global within a function at the 791 time of inspection (since a global may appear later in a file). 792 Consequently, globals are identified by their local name rather than 793 their module-qualified path. 794 """ 795 796 # For functions, use the appropriate local names. 797 798 if self.in_function: 799 return name 800 801 # For global names outside functions, use a global name. 802 803 elif is_global or name_ref and name_ref.is_global_name(): 804 return self.get_global_path(name) 805 806 # Otherwise, establish a name in the current namespace. 807 808 else: 809 return self.get_object_path(name) 810 811 def get_path_for_access(self): 812 813 "Outside functions, register accesses at the module level." 814 815 if not self.in_function: 816 return self.name 817 else: 818 return self.get_namespace_path() 819 820 def get_module_name(self, node): 821 822 """ 823 Using the given From 'node' in this module, calculate any relative import 824 information, returning a tuple containing a module to import along with any 825 names to import based on the node's name information. 826 827 Where the returned module is given as None, whole module imports should 828 be performed for the returned modules using the returned names. 829 """ 830 831 # Absolute import. 832 833 if node.level == 0: 834 return node.modname, node.names 835 836 # Relative to an ancestor of this module. 837 838 else: 839 path = self.name.split(".") 840 level = node.level 841 842 # Relative imports treat package roots as submodules. 843 844 if split(self.filename)[-1] == "__init__.py": 845 level -= 1 846 847 if level > len(path): 848 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 849 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 850 851 basename = ".".join(path[:len(path)-level]) 852 853 # Name imports from a module. 854 855 if node.modname: 856 return "%s.%s" % (basename, node.modname), node.names 857 858 # Relative whole module imports. 859 860 else: 861 return basename, node.names 862 863 def get_argnames(args): 864 865 """ 866 Return a list of all names provided by 'args'. Since tuples may be 867 employed, the arguments are traversed depth-first. 868 """ 869 870 l = [] 871 for arg in args: 872 if isinstance(arg, tuple): 873 l += get_argnames(arg) 874 else: 875 l.append(arg) 876 return l 877 878 def get_names_from_nodes(nodes): 879 880 """ 881 Return the names employed in the given 'nodes' along with the number of 882 nodes excluding sequences. 883 """ 884 885 names = set() 886 count = 0 887 888 for node in nodes: 889 890 # Add names and count them. 891 892 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 893 names.add(node.name) 894 count += 1 895 896 # Add names from sequences and incorporate their counts. 897 898 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 899 compiler.ast.List, compiler.ast.Set, 900 compiler.ast.Tuple)): 901 _names, _count = get_names_from_nodes(node.nodes) 902 names.update(_names) 903 count += _count 904 905 # Count non-name, non-sequence nodes. 906 907 else: 908 count += 1 909 910 return names, count 911 912 # Result classes. 913 914 class InstructionSequence: 915 916 "A generic sequence of instructions." 917 918 def __init__(self, instructions): 919 self.instructions = instructions 920 921 def get_value_instruction(self): 922 return self.instructions[-1] 923 924 def get_init_instructions(self): 925 return self.instructions[:-1] 926 927 # Dictionary utilities. 928 929 def init_item(d, key, fn): 930 931 """ 932 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 933 value where no entry already exists. 934 """ 935 936 if not d.has_key(key): 937 d[key] = fn() 938 return d[key] 939 940 def dict_for_keys(d, keys): 941 942 "Return a new dictionary containing entries from 'd' for the given 'keys'." 943 944 nd = {} 945 for key in keys: 946 if d.has_key(key): 947 nd[key] = d[key] 948 return nd 949 950 def make_key(s): 951 952 "Make sequence 's' into a tuple-based key, first sorting its contents." 953 954 l = list(s) 955 l.sort() 956 return tuple(l) 957 958 def add_counter_item(d, key): 959 960 """ 961 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 962 maintaining a mapping of keys to their order of insertion. 963 """ 964 965 if not d.has_key(key): 966 d[key] = len(d.keys()) 967 return d[key] 968 969 def remove_items(d1, d2): 970 971 "Remove from 'd1' all items from 'd2'." 972 973 for key in d2.keys(): 974 if d1.has_key(key): 975 del d1[key] 976 977 # Set utilities. 978 979 def first(s): 980 return list(s)[0] 981 982 def same(s1, s2): 983 return set(s1) == set(s2) 984 985 def order_dependencies(all_depends): 986 987 """ 988 Produce a dependency ordering for the 'all_depends' mapping. This mapping 989 has the form "A depends on B, C...". The result will order A, B, C, and so 990 on. 991 """ 992 993 usage = init_reverse_dependencies(all_depends) 994 995 # Produce an ordering by obtaining exposed items (required by items already 996 # processed) and putting them at the start of the list. 997 998 ordered = [] 999 1000 while usage: 1001 have_next = False 1002 1003 for key, n in usage.items(): 1004 1005 # Add items needed by no other items to the ordering. 1006 1007 if not n: 1008 remove_dependency(key, all_depends, usage, ordered) 1009 have_next = True 1010 1011 if not have_next: 1012 raise ValueError, usage 1013 1014 return ordered 1015 1016 def order_dependencies_partial(all_depends): 1017 1018 """ 1019 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1020 has the form "A depends on B, C...". The result will order A, B, C, and so 1021 on. Where cycles exist, they will be broken and a partial ordering returned. 1022 """ 1023 1024 usage = init_reverse_dependencies(all_depends) 1025 1026 # Duplicate the dependencies for subsequent modification. 1027 1028 new_depends = {} 1029 for key, values in all_depends.items(): 1030 new_depends[key] = set(values) 1031 1032 all_depends = new_depends 1033 1034 # Produce an ordering by obtaining exposed items (required by items already 1035 # processed) and putting them at the start of the list. 1036 1037 ordered = [] 1038 1039 while usage: 1040 least = None 1041 least_key = None 1042 1043 for key, n in usage.items(): 1044 1045 # Add items needed by no other items to the ordering. 1046 1047 if not n: 1048 remove_dependency(key, all_depends, usage, ordered) 1049 least = 0 1050 1051 # When breaking cycles, note the least used items. 1052 1053 elif least is None or len(n) < least: 1054 least_key = key 1055 least = len(n) 1056 1057 if least: 1058 transfer_dependencies(least_key, all_depends, usage, ordered) 1059 1060 return ordered 1061 1062 def init_reverse_dependencies(all_depends): 1063 1064 """ 1065 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1066 record the reverse dependencies, making a mapping of the form 1067 "B is needed by A", "C is needed by A", and so on. 1068 """ 1069 1070 usage = {} 1071 1072 # Record path-based dependencies. 1073 1074 for key in all_depends.keys(): 1075 usage[key] = set() 1076 1077 for key, depends in all_depends.items(): 1078 for depend in depends: 1079 init_item(usage, depend, set) 1080 usage[depend].add(key) 1081 1082 return usage 1083 1084 def transfer_dependencies(key, all_depends, usage, ordered): 1085 1086 """ 1087 Transfer items needed by 'key' to those items needing 'key', found using 1088 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1089 collection of dependencies. 1090 1091 If "A is needed by X" and "B is needed by A", then transferring items needed 1092 by A will cause "B is needed by X" to be recorded as a consequence. 1093 1094 Transferring items also needs to occur in the reverse mapping, so that 1095 "A needs B" and "X needs A", then the consequence must be recorded as 1096 "X needs B". 1097 """ 1098 1099 ordered.insert(0, key) 1100 1101 needing = usage[key] # A is needed by X 1102 needed = all_depends.get(key) # A needs B 1103 1104 if needing: 1105 for depend in needing: 1106 l = all_depends.get(depend) 1107 if not l: 1108 continue 1109 1110 l.remove(key) # X needs (A) 1111 1112 if needed: 1113 l.update(needed) # X needs B... 1114 1115 # Prevent self references. 1116 1117 if depend in needed: 1118 l.remove(depend) 1119 1120 if needed: 1121 for depend in needed: 1122 l = usage.get(depend) 1123 if not l: 1124 continue 1125 1126 l.remove(key) # B is needed by (A) 1127 l.update(needing) # B is needed by X... 1128 1129 # Prevent self references. 1130 1131 if depend in needing: 1132 l.remove(depend) 1133 1134 if needed: 1135 del all_depends[key] 1136 del usage[key] 1137 1138 def remove_dependency(key, all_depends, usage, ordered): 1139 1140 """ 1141 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1142 'ordered' collection of dependencies. 1143 1144 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1145 upon removing A from 'usage', the outcome is that all keys needing A will 1146 have A removed from their 'usage' records. 1147 1148 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1149 as a consequence. 1150 """ 1151 1152 ordered.insert(0, key) 1153 1154 depends = all_depends.get(key) 1155 1156 # Reduce usage of the referenced items. 1157 1158 if depends: 1159 for depend in depends: 1160 usage[depend].remove(key) 1161 1162 del usage[key] 1163 1164 # General input/output. 1165 1166 def readfile(filename): 1167 1168 "Return the contents of 'filename'." 1169 1170 f = open(filename) 1171 try: 1172 return f.read() 1173 finally: 1174 f.close() 1175 1176 def writefile(filename, s): 1177 1178 "Write to 'filename' the string 's'." 1179 1180 f = open(filename, "w") 1181 try: 1182 f.write(s) 1183 finally: 1184 f.close() 1185 1186 # General encoding. 1187 1188 def sorted_output(x): 1189 1190 "Sort sequence 'x' and return a string with commas separating the values." 1191 1192 x = map(str, x) 1193 x.sort() 1194 return ", ".join(x) 1195 1196 def get_string_details(literals, encoding): 1197 1198 """ 1199 Determine whether 'literals' represent Unicode strings or byte strings, 1200 using 'encoding' to reproduce byte sequences. 1201 1202 Each literal is the full program representation including prefix and quotes 1203 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1204 string needs to be translated back to its original encoding. 1205 1206 Return a single encoded literal value, a type name, and the original 1207 encoding as a tuple. 1208 """ 1209 1210 typename = "unicode" 1211 1212 l = [] 1213 1214 for s in literals: 1215 out, _typename = get_literal_details(s) 1216 if _typename == "str": 1217 typename = "str" 1218 l.append(out) 1219 1220 out = "".join(l) 1221 1222 # For Unicode values, convert to the UTF-8 program representation. 1223 1224 if typename == "unicode": 1225 return out.encode("utf-8"), typename, encoding 1226 1227 # For byte string values, convert back to the original encoding. 1228 1229 else: 1230 return out.encode(encoding), typename, encoding 1231 1232 def get_literal_details(s): 1233 1234 """ 1235 Determine whether 's' represents a Unicode string or a byte string, where 1236 's' contains the full program representation of a literal including prefix 1237 and quotes, recoded by the parser to UTF-8. 1238 1239 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1240 and byte or Unicode values starting with <backslash><octal digit> or 1241 <backslash>x. 1242 1243 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1244 to be considered as Unicode values. Otherwise, they produce byte values and 1245 cause unprefixed strings to be considered as byte strings. 1246 1247 Literals prefixed with "r" do not have their backslash-encoded values 1248 converted unless also prefixed with "u", in which case only the above value 1249 formats are converted, not any of the other special sequences for things 1250 like newlines. 1251 1252 Return the literal value as a Unicode object together with the appropriate 1253 type name in a tuple. 1254 """ 1255 1256 l = [] 1257 1258 # Identify the quote character and use it to identify the prefix. 1259 1260 quote_type = s[-1] 1261 prefix_end = s.find(quote_type) 1262 prefix = s[:prefix_end].lower() 1263 1264 if prefix not in ("", "b", "br", "r", "u", "ur"): 1265 raise ValueError, "String literal does not have a supported prefix: %s" % s 1266 1267 if "b" in prefix: 1268 typename = "str" 1269 else: 1270 typename = "unicode" 1271 1272 # Identify triple quotes or single quotes. 1273 1274 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1275 quote = s[prefix_end:prefix_end+3] 1276 current = prefix_end + 3 1277 end = len(s) - 3 1278 else: 1279 quote = s[prefix_end] 1280 current = prefix_end + 1 1281 end = len(s) - 1 1282 1283 # Conversions of some quoted values. 1284 1285 searches = { 1286 "u" : (6, 16), 1287 "U" : (10, 16), 1288 "x" : (4, 16), 1289 } 1290 1291 octal_digits = map(str, range(0, 8)) 1292 1293 # Translations of some quoted values. 1294 1295 escaped = { 1296 "\\" : "\\", "'" : "'", '"' : '"', 1297 "a" : "\a", "b" : "\b", "f" : "\f", 1298 "n" : "\n", "r" : "\r", "t" : "\t", 1299 } 1300 1301 while current < end: 1302 1303 # Look for quoted values. 1304 1305 index = s.find("\\", current) 1306 if index == -1 or index + 1 == end: 1307 l.append(s[current:end]) 1308 break 1309 1310 # Add the preceding text. 1311 1312 l.append(s[current:index]) 1313 1314 # Handle quoted text. 1315 1316 term = s[index+1] 1317 1318 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1319 # produce Unicode values. 1320 1321 if typename == "unicode" and ( 1322 term in ("u", "U") or 1323 "u" in prefix and (term == "x" or term in octal_digits)): 1324 1325 needed, base = searches.get(term, (4, 8)) 1326 value = convert_quoted_value(s, index, needed, end, base, unichr) 1327 l.append(value) 1328 current = index + needed 1329 1330 # Add raw byte values, changing the string type. 1331 1332 elif "r" not in prefix and ( 1333 term == "x" or term in octal_digits): 1334 1335 needed, base = searches.get(term, (4, 8)) 1336 value = convert_quoted_value(s, index, needed, end, base, chr) 1337 l.append(value) 1338 typename = "str" 1339 current = index + needed 1340 1341 # Add other escaped values. 1342 1343 elif "r" not in prefix and escaped.has_key(term): 1344 l.append(escaped[term]) 1345 current = index + 2 1346 1347 # Add other text as found. 1348 1349 else: 1350 l.append(s[index:index+2]) 1351 current = index + 2 1352 1353 # Collect the components into a single Unicode object. Since the literal 1354 # text was already in UTF-8 form, interpret plain strings as UTF-8 1355 # sequences. 1356 1357 out = [] 1358 1359 for value in l: 1360 if isinstance(value, unicode): 1361 out.append(value) 1362 else: 1363 out.append(unicode(value, "utf-8")) 1364 1365 return "".join(out), typename 1366 1367 def convert_quoted_value(s, index, needed, end, base, fn): 1368 1369 """ 1370 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1371 positions, and with the given 'end' indicating the first position after the 1372 end of the actual string content. 1373 1374 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1375 to convert the value to an appropriate type. 1376 """ 1377 1378 s = s[index:min(index+needed, end)] 1379 1380 # Not a complete occurrence. 1381 1382 if len(s) < needed: 1383 return s 1384 1385 # Test for a well-formed value. 1386 1387 try: 1388 first = base == 8 and 1 or 2 1389 value = int(s[first:needed], base) 1390 except ValueError: 1391 return s 1392 else: 1393 return fn(value) 1394 1395 # Attribute chain decoding. 1396 1397 def get_attrnames(attrnames): 1398 1399 """ 1400 Split the qualified attribute chain 'attrnames' into its components, 1401 handling special attributes starting with "#" that indicate type 1402 conformance. 1403 """ 1404 1405 if attrnames.startswith("#"): 1406 return [attrnames] 1407 else: 1408 return attrnames.split(".") 1409 1410 def get_attrname_from_location(location): 1411 1412 """ 1413 Extract the first attribute from the attribute names employed in a 1414 'location'. 1415 """ 1416 1417 path, name, attrnames, access = location 1418 if not attrnames: 1419 return attrnames 1420 return get_attrnames(attrnames)[0] 1421 1422 def get_name_path(path, name): 1423 1424 "Return a suitable qualified name from the given 'path' and 'name'." 1425 1426 if "." in name: 1427 return name 1428 else: 1429 return "%s.%s" % (path, name) 1430 1431 # Usage-related functions. 1432 1433 def get_types_for_usage(attrnames, objects): 1434 1435 """ 1436 Identify the types that can support the given 'attrnames', using the 1437 given 'objects' as the catalogue of type details. 1438 """ 1439 1440 types = [] 1441 for name, _attrnames in objects.items(): 1442 if set(attrnames).issubset(_attrnames): 1443 types.append(name) 1444 return types 1445 1446 def get_invoked_attributes(usage): 1447 1448 "Obtain invoked attribute from the given 'usage'." 1449 1450 invoked = [] 1451 if usage: 1452 for attrname, invocation, assignment in usage: 1453 if invocation: 1454 invoked.append(attrname) 1455 return invoked 1456 1457 def get_assigned_attributes(usage): 1458 1459 "Obtain assigned attribute from the given 'usage'." 1460 1461 assigned = [] 1462 if usage: 1463 for attrname, invocation, assignment in usage: 1464 if assignment: 1465 assigned.append(attrname) 1466 return assigned 1467 1468 # Type and module functions. 1469 # NOTE: This makes assumptions about the __builtins__ structure. 1470 1471 def get_builtin_module(name): 1472 1473 "Return the module name containing the given type 'name'." 1474 1475 if name == "string": 1476 modname = "str" 1477 elif name == "utf8string": 1478 modname = "unicode" 1479 elif name == "NoneType": 1480 modname = "none" 1481 else: 1482 modname = name 1483 1484 return "__builtins__.%s" % modname 1485 1486 def get_builtin_type(name): 1487 1488 "Return the type name provided by the given Python value 'name'." 1489 1490 if name == "str": 1491 return "string" 1492 elif name == "unicode": 1493 return "utf8string" 1494 else: 1495 return name 1496 1497 def get_builtin_class(name): 1498 1499 "Return the full name of the built-in class having the given 'name'." 1500 1501 typename = get_builtin_type(name) 1502 module = get_builtin_module(typename) 1503 return "%s.%s" % (module, typename) 1504 1505 # Useful data. 1506 1507 predefined_constants = "False", "None", "NotImplemented", "True" 1508 1509 operator_functions = { 1510 1511 # Fundamental operations. 1512 1513 "is" : "is_", 1514 "is not" : "is_not", 1515 1516 # Binary operations. 1517 1518 "in" : "in_", 1519 "not in" : "not_in", 1520 "Add" : "add", 1521 "Bitand" : "and_", 1522 "Bitor" : "or_", 1523 "Bitxor" : "xor", 1524 "Div" : "div", 1525 "FloorDiv" : "floordiv", 1526 "LeftShift" : "lshift", 1527 "Mod" : "mod", 1528 "Mul" : "mul", 1529 "Power" : "pow", 1530 "RightShift" : "rshift", 1531 "Sub" : "sub", 1532 1533 # Unary operations. 1534 1535 "Invert" : "invert", 1536 "UnaryAdd" : "pos", 1537 "UnarySub" : "neg", 1538 1539 # Augmented assignment. 1540 1541 "+=" : "iadd", 1542 "-=" : "isub", 1543 "*=" : "imul", 1544 "/=" : "idiv", 1545 "//=" : "ifloordiv", 1546 "%=" : "imod", 1547 "**=" : "ipow", 1548 "<<=" : "ilshift", 1549 ">>=" : "irshift", 1550 "&=" : "iand", 1551 "^=" : "ixor", 1552 "|=" : "ior", 1553 1554 # Comparisons. 1555 1556 "==" : "eq", 1557 "!=" : "ne", 1558 "<" : "lt", 1559 "<=" : "le", 1560 ">=" : "ge", 1561 ">" : "gt", 1562 } 1563 1564 # vim: tabstop=4 expandtab shiftwidth=4