1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 def get_parent_path(self): 190 return ".".join([self.name] + self.namespace_path[:-1]) 191 192 # Namespace management. 193 194 def enter_namespace(self, name): 195 196 "Enter the namespace having the given 'name'." 197 198 self.namespace_path.append(name) 199 200 def exit_namespace(self): 201 202 "Exit the current namespace." 203 204 self.namespace_path.pop() 205 206 # Constant reference naming. 207 208 def get_constant_name(self, value, value_type, encoding=None): 209 210 """ 211 Add a new constant to the current namespace for 'value' with 212 'value_type'. 213 """ 214 215 path = self.get_namespace_path() 216 init_item(self.constants, path, dict) 217 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 218 219 # Literal reference naming. 220 221 def get_literal_name(self): 222 223 "Add a new literal to the current namespace." 224 225 path = self.get_namespace_path() 226 init_item(self.literals, path, lambda: 0) 227 return "$C%d" % self.literals[path] 228 229 def next_literal(self): 230 self.literals[self.get_namespace_path()] += 1 231 232 # Temporary variable naming. 233 234 def get_temporary_name(self): 235 path = self.get_namespace_path() 236 init_item(self.temp, path, lambda: 0) 237 return "$t%d" % self.temp[path] 238 239 def next_temporary(self): 240 self.temp[self.get_namespace_path()] += 1 241 242 # Arbitrary function naming. 243 244 def get_lambda_name(self): 245 path = self.get_namespace_path() 246 init_item(self.lambdas, path, lambda: 0) 247 name = "$l%d" % self.lambdas[path] 248 self.lambdas[path] += 1 249 return name 250 251 def reset_lambdas(self): 252 self.lambdas = {} 253 254 # Constant and literal recording. 255 256 def get_constant_value(self, value, literals=None): 257 258 """ 259 Encode the 'value' if appropriate, returning a value, a typename and any 260 encoding. 261 """ 262 263 if isinstance(value, unicode): 264 return value.encode("utf-8"), "unicode", self.encoding 265 266 # Attempt to convert plain strings to text. 267 268 elif isinstance(value, str) and self.encoding: 269 try: 270 return get_string_details(literals, self.encoding) 271 except UnicodeDecodeError: 272 pass 273 274 return value, value.__class__.__name__, None 275 276 def get_constant_reference(self, ref, value, encoding=None): 277 278 """ 279 Return a constant reference for the given 'ref' type and 'value', with 280 the optional 'encoding' applying to text values. 281 """ 282 283 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 284 285 # Return a reference for the constant. 286 287 objpath = self.get_object_path(constant_name) 288 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 289 290 # Record the value and type for the constant. 291 292 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 293 return name_ref 294 295 def reserve_constant(self, objpath, value, origin, encoding=None): 296 297 """ 298 Reserve a constant within 'objpath' with the given 'value' and having a 299 type with the given 'origin', with the optional 'encoding' applying to 300 text values. 301 """ 302 303 constant_name = self.get_constant_name(value, origin) 304 objpath = self.get_object_path(constant_name) 305 self._reserve_constant(objpath, value, origin, encoding) 306 307 def _reserve_constant(self, objpath, value, origin, encoding): 308 309 """ 310 Store a constant for 'objpath' with the given 'value' and 'origin', with 311 the optional 'encoding' applying to text values. 312 """ 313 314 self.constant_values[objpath] = value, origin, encoding 315 316 def get_literal_reference(self, name, ref, items, cls): 317 318 """ 319 Return a literal reference for the given type 'name', literal 'ref', 320 node 'items' and employing the given 'cls' as the class of the returned 321 reference object. 322 """ 323 324 # Construct an invocation using the items as arguments. 325 326 typename = "$L%s" % name 327 328 invocation = compiler.ast.CallFunc( 329 compiler.ast.Name(typename), 330 items 331 ) 332 333 # Get a name for the actual literal. 334 335 instname = self.get_literal_name() 336 self.next_literal() 337 338 # Record the type for the literal. 339 340 objpath = self.get_object_path(instname) 341 self.literal_types[objpath] = ref.get_origin() 342 343 # Return a wrapper for the invocation exposing the items. 344 345 return cls( 346 instname, 347 ref.instance_of(), 348 self.process_structure_node(invocation), 349 invocation.args 350 ) 351 352 # Node handling. 353 354 def process_structure(self, node): 355 356 """ 357 Within the given 'node', process the program structure. 358 359 During inspection, this will process global declarations, adjusting the 360 module namespace, and import statements, building a module dependency 361 hierarchy. 362 363 During translation, this will consult deduced program information and 364 output translated code. 365 """ 366 367 l = [] 368 for n in node.getChildNodes(): 369 l.append(self.process_structure_node(n)) 370 return l 371 372 def process_augassign_node(self, n): 373 374 "Process the given augmented assignment node 'n'." 375 376 op = operator_functions[n.op] 377 378 if isinstance(n.node, compiler.ast.Getattr): 379 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 380 elif isinstance(n.node, compiler.ast.Name): 381 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 382 else: 383 target = n.node 384 385 assignment = compiler.ast.Assign( 386 [target], 387 compiler.ast.CallFunc( 388 compiler.ast.Name("$op%s" % op), 389 [n.node, n.expr])) 390 391 return self.process_structure_node(assignment) 392 393 def process_assignment_for_object(self, original_name, source): 394 395 """ 396 Return an assignment operation making 'original_name' refer to the given 397 'source'. 398 """ 399 400 assignment = compiler.ast.Assign( 401 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 402 source 403 ) 404 405 return self.process_structure_node(assignment) 406 407 def process_assignment_node_items(self, n, expr): 408 409 """ 410 Process the given assignment node 'n' whose children are to be assigned 411 items of 'expr'. 412 """ 413 414 name_ref = self.process_structure_node(expr) 415 416 # Either unpack the items and present them directly to each assignment 417 # node. 418 419 if isinstance(name_ref, LiteralSequenceRef) and \ 420 self.process_literal_sequence_items(n, name_ref): 421 422 pass 423 424 # Or have the assignment nodes access each item via the sequence API. 425 426 else: 427 self.process_assignment_node_items_by_position(n, expr, name_ref) 428 429 def process_assignment_node_items_by_position(self, n, expr, name_ref): 430 431 """ 432 Process the given sequence assignment node 'n', converting the node to 433 the separate assignment of each target using positional access on a 434 temporary variable representing the sequence. Use 'expr' as the assigned 435 value and 'name_ref' as the reference providing any existing temporary 436 variable. 437 """ 438 439 assignments = [] 440 441 # Employ existing names to access the sequence. 442 # Literal sequences do not provide names of accessible objects. 443 444 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 445 temp = name_ref.name 446 447 # For other expressions, create a temporary name to reference the items. 448 449 else: 450 temp = self.get_temporary_name() 451 self.next_temporary() 452 453 assignments.append( 454 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 455 ) 456 457 # Assign the items to the target nodes. 458 459 for i, node in enumerate(n.nodes): 460 assignments.append( 461 compiler.ast.Assign([node], compiler.ast.Subscript( 462 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 463 ) 464 465 return self.process_structure_node(compiler.ast.Stmt(assignments)) 466 467 def process_literal_sequence_items(self, n, name_ref): 468 469 """ 470 Process the given assignment node 'n', obtaining from the given 471 'name_ref' the items to be assigned to the assignment targets. 472 473 Return whether this method was able to process the assignment node as 474 a sequence of direct assignments. 475 """ 476 477 if len(n.nodes) == len(name_ref.items): 478 assigned_names, count = get_names_from_nodes(n.nodes) 479 accessed_names, _count = get_names_from_nodes(name_ref.items) 480 481 # Only assign directly between items if all assigned names are 482 # plain names (not attribute assignments), and if the assigned names 483 # do not appear in the accessed names. 484 485 if len(assigned_names) == count and \ 486 not assigned_names.intersection(accessed_names): 487 488 for node, item in zip(n.nodes, name_ref.items): 489 self.process_assignment_node(node, item) 490 491 return True 492 493 # Otherwise, use the position-based mechanism to obtain values. 494 495 else: 496 return False 497 else: 498 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 499 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 500 501 def process_compare_node(self, n): 502 503 """ 504 Process the given comparison node 'n', converting an operator sequence 505 from... 506 507 <expr1> <op1> <expr2> <op2> <expr3> 508 509 ...to... 510 511 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 512 """ 513 514 invocations = [] 515 last = n.expr 516 517 for op, op_node in n.ops: 518 op = operator_functions.get(op) 519 520 invocations.append(compiler.ast.CallFunc( 521 compiler.ast.Name("$op%s" % op), 522 [last, op_node])) 523 524 last = op_node 525 526 if len(invocations) > 1: 527 result = compiler.ast.And(invocations) 528 else: 529 result = invocations[0] 530 531 return self.process_structure_node(result) 532 533 def process_dict_node(self, node): 534 535 """ 536 Process the given dictionary 'node', returning a list of (key, value) 537 tuples. 538 """ 539 540 l = [] 541 for key, value in node.items: 542 l.append(( 543 self.process_structure_node(key), 544 self.process_structure_node(value))) 545 return l 546 547 def process_for_node(self, n): 548 549 """ 550 Generate attribute accesses for {n.list}.__iter__ and the next method on 551 the iterator, producing a replacement node for the original. 552 """ 553 554 t0 = self.get_temporary_name() 555 self.next_temporary() 556 t1 = self.get_temporary_name() 557 self.next_temporary() 558 559 node = compiler.ast.Stmt([ 560 561 # <t0> = {n.list} 562 # <t1> = <t0>.__iter__() 563 564 compiler.ast.Assign( 565 [compiler.ast.AssName(t0, "OP_ASSIGN")], 566 n.list), 567 568 compiler.ast.Assign( 569 [compiler.ast.AssName(t1, "OP_ASSIGN")], 570 compiler.ast.CallFunc( 571 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 572 [])), 573 574 # try: 575 # while True: 576 # <var>... = <t1>.next() 577 # ... 578 # except StopIteration: 579 # pass 580 581 compiler.ast.TryExcept( 582 compiler.ast.While( 583 compiler.ast.Name("True"), 584 compiler.ast.Stmt([ 585 compiler.ast.Assign( 586 [n.assign], 587 compiler.ast.CallFunc( 588 compiler.ast.Getattr(compiler.ast.Name(t1), "next"), 589 [] 590 )), 591 n.body]), 592 None), 593 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 594 None) 595 ]) 596 597 self.process_structure_node(node) 598 599 def process_literal_sequence_node(self, n, name, ref, cls): 600 601 """ 602 Process the given literal sequence node 'n' as a function invocation, 603 with 'name' indicating the type of the sequence, and 'ref' being a 604 reference to the type. The 'cls' is used to instantiate a suitable name 605 reference. 606 """ 607 608 if name == "dict": 609 items = [] 610 for key, value in n.items: 611 items.append(compiler.ast.Tuple([key, value])) 612 else: # name in ("list", "tuple"): 613 items = n.nodes 614 615 return self.get_literal_reference(name, ref, items, cls) 616 617 def process_operator_node(self, n): 618 619 """ 620 Process the given operator node 'n' as an operator function invocation. 621 """ 622 623 opname = n.__class__.__name__ 624 operands = n.getChildNodes() 625 626 # Convert a unary operation to an invocation. 627 628 op = unary_operator_functions.get(opname) 629 630 if op: 631 invocation = compiler.ast.CallFunc( 632 compiler.ast.Name("$op%s" % op), 633 [operands[0]] 634 ) 635 636 # Convert a single operator with a list of operands to a combination of 637 # pairwise operations. 638 639 else: 640 op = operator_functions[opname] 641 invocation = self._process_operator_node(op, operands) 642 643 return self.process_structure_node(invocation) 644 645 def _process_operator_node(self, op, operands): 646 647 """ 648 Process the given 'op', being an operator function, together with the 649 supplied 'operands', returning either a single remaining operand or an 650 invocation combining the operands. 651 """ 652 653 remaining = operands[1:] 654 if not remaining: 655 return operands[0] 656 657 return compiler.ast.CallFunc( 658 compiler.ast.Name("$op%s" % op), 659 [operands[0], self._process_operator_node(op, remaining)] 660 ) 661 662 def process_print_node(self, n): 663 664 """ 665 Process the given print node 'n' as an invocation on a stream of the 666 form... 667 668 $print(dest, args, nl) 669 670 The special function name will be translated elsewhere. 671 """ 672 673 nl = isinstance(n, compiler.ast.Printnl) 674 invocation = compiler.ast.CallFunc( 675 compiler.ast.Name("$print"), 676 [n.dest or compiler.ast.Name("None"), 677 compiler.ast.List(list(n.nodes)), 678 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 679 ) 680 return self.process_structure_node(invocation) 681 682 def process_slice_node(self, n, expr=None): 683 684 """ 685 Process the given slice node 'n' as a method invocation. 686 """ 687 688 if n.flags == "OP_ASSIGN": op = "__setslice__" 689 elif n.flags == "OP_DELETE": op = "__delslice__" 690 else: op = "__getslice__" 691 692 invocation = compiler.ast.CallFunc( 693 compiler.ast.Getattr(n.expr, op), 694 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 695 (expr and [expr] or []) 696 ) 697 698 # Fix parse tree structure. 699 700 if op == "__delslice__": 701 invocation = compiler.ast.Discard(invocation) 702 703 return self.process_structure_node(invocation) 704 705 def process_sliceobj_node(self, n): 706 707 """ 708 Process the given slice object node 'n' as a slice constructor. 709 """ 710 711 op = "slice" 712 invocation = compiler.ast.CallFunc( 713 compiler.ast.Name("$op%s" % op), 714 n.nodes 715 ) 716 return self.process_structure_node(invocation) 717 718 def process_subscript_node(self, n, expr=None): 719 720 """ 721 Process the given subscript node 'n' as a method invocation. 722 """ 723 724 if n.flags == "OP_ASSIGN": op = "__setitem__" 725 elif n.flags == "OP_DELETE": op = "__delitem__" 726 else: op = "__getitem__" 727 728 invocation = compiler.ast.CallFunc( 729 compiler.ast.Getattr(n.expr, op), 730 list(n.subs) + (expr and [expr] or []) 731 ) 732 733 # Fix parse tree structure. 734 735 if op == "__delitem__": 736 invocation = compiler.ast.Discard(invocation) 737 738 return self.process_structure_node(invocation) 739 740 def process_attribute_chain(self, n): 741 742 """ 743 Process the given attribute access node 'n'. Return a reference 744 describing the expression. 745 """ 746 747 # AssAttr/Getattr are nested with the outermost access being the last 748 # access in any chain. 749 750 self.attrs.insert(0, n.attrname) 751 attrs = self.attrs 752 753 # Break attribute chains where non-access nodes are found. 754 755 if not self.have_access_expression(n): 756 self.reset_attribute_chain() 757 758 # Descend into the expression, extending backwards any existing chain, 759 # or building another for the expression. 760 761 name_ref = self.process_structure_node(n.expr) 762 763 # Restore chain information applying to this node. 764 765 if not self.have_access_expression(n): 766 self.restore_attribute_chain(attrs) 767 768 # Return immediately if the expression was another access and thus a 769 # continuation backwards along the chain. The above processing will 770 # have followed the chain all the way to its conclusion. 771 772 if self.have_access_expression(n): 773 del self.attrs[0] 774 775 return name_ref 776 777 # Attribute chain handling. 778 779 def reset_attribute_chain(self): 780 781 "Reset the attribute chain for a subexpression of an attribute access." 782 783 self.attrs = [] 784 self.chain_assignment.append(self.in_assignment) 785 self.chain_invocation.append(self.in_invocation) 786 self.in_assignment = None 787 self.in_invocation = None 788 789 def restore_attribute_chain(self, attrs): 790 791 "Restore the attribute chain for an attribute access." 792 793 self.attrs = attrs 794 self.in_assignment = self.chain_assignment.pop() 795 self.in_invocation = self.chain_invocation.pop() 796 797 def have_access_expression(self, node): 798 799 "Return whether the expression associated with 'node' is Getattr." 800 801 return isinstance(node.expr, compiler.ast.Getattr) 802 803 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 804 805 """ 806 Return the name to be used for attribute usage observations involving 807 the given 'name' in the current namespace. 808 809 If the name is being used outside a function, and if 'name_ref' is 810 given and indicates a global or if 'is_global' is specified as a true 811 value, a path featuring the name in the global namespace is returned. 812 Otherwise, a path computed using the current namespace and the given 813 name is returned. 814 815 The intention of this method is to provide a suitably-qualified name 816 that can be tracked across namespaces. Where globals are being 817 referenced in class namespaces, they should be referenced using their 818 path within the module, not using a path within each class. 819 820 It may not be possible to identify a global within a function at the 821 time of inspection (since a global may appear later in a file). 822 Consequently, globals are identified by their local name rather than 823 their module-qualified path. 824 """ 825 826 # For functions, use the appropriate local names. 827 828 if self.in_function: 829 return name 830 831 # For global names outside functions, use a global name. 832 833 elif is_global or name_ref and name_ref.is_global_name(): 834 return self.get_global_path(name) 835 836 # Otherwise, establish a name in the current namespace. 837 838 else: 839 return self.get_object_path(name) 840 841 def get_path_for_access(self): 842 843 "Outside functions, register accesses at the module level." 844 845 if not self.in_function: 846 return self.name 847 else: 848 return self.get_namespace_path() 849 850 def get_module_name(self, node): 851 852 """ 853 Using the given From 'node' in this module, calculate any relative import 854 information, returning a tuple containing a module to import along with any 855 names to import based on the node's name information. 856 857 Where the returned module is given as None, whole module imports should 858 be performed for the returned modules using the returned names. 859 """ 860 861 # Absolute import. 862 863 if node.level == 0: 864 return node.modname, node.names 865 866 # Relative to an ancestor of this module. 867 868 else: 869 path = self.name.split(".") 870 level = node.level 871 872 # Relative imports treat package roots as submodules. 873 874 if split(self.filename)[-1] == "__init__.py": 875 level -= 1 876 877 if level > len(path): 878 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 879 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 880 881 basename = ".".join(path[:len(path)-level]) 882 883 # Name imports from a module. 884 885 if node.modname: 886 return "%s.%s" % (basename, node.modname), node.names 887 888 # Relative whole module imports. 889 890 else: 891 return basename, node.names 892 893 def get_argnames(args): 894 895 """ 896 Return a list of all names provided by 'args'. Since tuples may be 897 employed, the arguments are traversed depth-first. 898 """ 899 900 l = [] 901 for arg in args: 902 if isinstance(arg, tuple): 903 l += get_argnames(arg) 904 else: 905 l.append(arg) 906 return l 907 908 def get_names_from_nodes(nodes): 909 910 """ 911 Return the names employed in the given 'nodes' along with the number of 912 nodes excluding sequences. 913 """ 914 915 names = set() 916 count = 0 917 918 for node in nodes: 919 920 # Add names and count them. 921 922 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 923 names.add(node.name) 924 count += 1 925 926 # Add names from sequences and incorporate their counts. 927 928 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 929 compiler.ast.List, compiler.ast.Set, 930 compiler.ast.Tuple)): 931 _names, _count = get_names_from_nodes(node.nodes) 932 names.update(_names) 933 count += _count 934 935 # Count non-name, non-sequence nodes. 936 937 else: 938 count += 1 939 940 return names, count 941 942 # Location classes. 943 944 class Location: 945 946 "A generic program location." 947 948 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 949 self.path = path 950 self.name = name 951 self.attrnames = attrnames 952 self.version = version 953 self.access_number = access_number 954 955 def __repr__(self): 956 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 957 958 def as_tuple(self): 959 return (self.path, self.name, self.attrnames, self.version, self.access_number) 960 961 def __hash__(self): 962 return hash(self.as_tuple()) 963 964 def __eq__(self, other): 965 return self.as_tuple() == other.as_tuple() 966 967 def __cmp__(self, other): 968 return cmp(self.as_tuple(), other.as_tuple()) 969 970 def get_attrname(self): 971 972 """ 973 Extract the first attribute from the attribute names employed in this 974 location. 975 """ 976 977 attrnames = self.attrnames 978 if not attrnames: 979 return attrnames 980 return get_attrnames(attrnames)[0] 981 982 class AccessLocation(Location): 983 984 "A specialised access location." 985 986 def __init__(self, path, name, attrnames, access_number): 987 988 """ 989 Initialise an access location featuring 'path', 'name', 'attrnames' and 990 'access_number'. 991 """ 992 993 Location.__init__(self, path, name, attrnames, None, access_number) 994 995 def __repr__(self): 996 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 997 998 # Result classes. 999 1000 class InstructionSequence: 1001 1002 "A generic sequence of instructions." 1003 1004 def __init__(self, instructions): 1005 self.instructions = instructions 1006 1007 def get_value_instruction(self): 1008 return self.instructions[-1] 1009 1010 def get_init_instructions(self): 1011 return self.instructions[:-1] 1012 1013 # Dictionary utilities. 1014 1015 def init_item(d, key, fn): 1016 1017 """ 1018 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1019 value where no entry already exists. 1020 """ 1021 1022 if not d.has_key(key): 1023 d[key] = fn() 1024 return d[key] 1025 1026 def dict_for_keys(d, keys): 1027 1028 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1029 1030 nd = {} 1031 for key in keys: 1032 if d.has_key(key): 1033 nd[key] = d[key] 1034 return nd 1035 1036 def make_key(s): 1037 1038 "Make sequence 's' into a tuple-based key, first sorting its contents." 1039 1040 l = list(s) 1041 l.sort() 1042 return tuple(l) 1043 1044 def add_counter_item(d, key): 1045 1046 """ 1047 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1048 maintaining a mapping of keys to their order of insertion. 1049 """ 1050 1051 if not d.has_key(key): 1052 d[key] = len(d.keys()) 1053 return d[key] 1054 1055 def remove_items(d1, d2): 1056 1057 "Remove from 'd1' all items from 'd2'." 1058 1059 for key in d2.keys(): 1060 if d1.has_key(key): 1061 del d1[key] 1062 1063 # Set utilities. 1064 1065 def first(s): 1066 return list(s)[0] 1067 1068 def same(s1, s2): 1069 return set(s1) == set(s2) 1070 1071 def order_dependencies(all_depends): 1072 1073 """ 1074 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1075 has the form "A depends on B, C...". The result will order A, B, C, and so 1076 on. 1077 """ 1078 1079 usage = init_reverse_dependencies(all_depends) 1080 1081 # Produce an ordering by obtaining exposed items (required by items already 1082 # processed) and putting them at the start of the list. 1083 1084 ordered = [] 1085 1086 while usage: 1087 have_next = False 1088 1089 for key, n in usage.items(): 1090 1091 # Add items needed by no other items to the ordering. 1092 1093 if not n: 1094 remove_dependency(key, all_depends, usage, ordered) 1095 have_next = True 1096 1097 if not have_next: 1098 raise ValueError, usage 1099 1100 return ordered 1101 1102 def order_dependencies_partial(all_depends): 1103 1104 """ 1105 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1106 has the form "A depends on B, C...". The result will order A, B, C, and so 1107 on. Where cycles exist, they will be broken and a partial ordering returned. 1108 """ 1109 1110 usage = init_reverse_dependencies(all_depends) 1111 1112 # Duplicate the dependencies for subsequent modification. 1113 1114 new_depends = {} 1115 for key, values in all_depends.items(): 1116 new_depends[key] = set(values) 1117 1118 all_depends = new_depends 1119 1120 # Produce an ordering by obtaining exposed items (required by items already 1121 # processed) and putting them at the start of the list. 1122 1123 ordered = [] 1124 1125 while usage: 1126 least = None 1127 least_key = None 1128 1129 for key, n in usage.items(): 1130 1131 # Add items needed by no other items to the ordering. 1132 1133 if not n: 1134 remove_dependency(key, all_depends, usage, ordered) 1135 least = 0 1136 1137 # When breaking cycles, note the least used items. 1138 1139 elif least is None or len(n) < least: 1140 least_key = key 1141 least = len(n) 1142 1143 if least: 1144 transfer_dependencies(least_key, all_depends, usage, ordered) 1145 1146 return ordered 1147 1148 def init_reverse_dependencies(all_depends): 1149 1150 """ 1151 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1152 record the reverse dependencies, making a mapping of the form 1153 "B is needed by A", "C is needed by A", and so on. 1154 """ 1155 1156 usage = {} 1157 1158 # Record path-based dependencies. 1159 1160 for key in all_depends.keys(): 1161 usage[key] = set() 1162 1163 for key, depends in all_depends.items(): 1164 for depend in depends: 1165 init_item(usage, depend, set) 1166 usage[depend].add(key) 1167 1168 return usage 1169 1170 def transfer_dependencies(key, all_depends, usage, ordered): 1171 1172 """ 1173 Transfer items needed by 'key' to those items needing 'key', found using 1174 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1175 collection of dependencies. 1176 1177 If "A is needed by X" and "B is needed by A", then transferring items needed 1178 by A will cause "B is needed by X" to be recorded as a consequence. 1179 1180 Transferring items also needs to occur in the reverse mapping, so that 1181 "A needs B" and "X needs A", then the consequence must be recorded as 1182 "X needs B". 1183 """ 1184 1185 ordered.insert(0, key) 1186 1187 needing = usage[key] # A is needed by X 1188 needed = all_depends.get(key) # A needs B 1189 1190 if needing: 1191 for depend in needing: 1192 l = all_depends.get(depend) 1193 if not l: 1194 continue 1195 1196 l.remove(key) # X needs (A) 1197 1198 if needed: 1199 l.update(needed) # X needs B... 1200 1201 # Prevent self references. 1202 1203 if depend in needed: 1204 l.remove(depend) 1205 1206 if needed: 1207 for depend in needed: 1208 l = usage.get(depend) 1209 if not l: 1210 continue 1211 1212 l.remove(key) # B is needed by (A) 1213 l.update(needing) # B is needed by X... 1214 1215 # Prevent self references. 1216 1217 if depend in needing: 1218 l.remove(depend) 1219 1220 if needed: 1221 del all_depends[key] 1222 del usage[key] 1223 1224 def remove_dependency(key, all_depends, usage, ordered): 1225 1226 """ 1227 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1228 'ordered' collection of dependencies. 1229 1230 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1231 upon removing A from 'usage', the outcome is that all keys needing A will 1232 have A removed from their 'usage' records. 1233 1234 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1235 as a consequence. 1236 """ 1237 1238 ordered.insert(0, key) 1239 1240 depends = all_depends.get(key) 1241 1242 # Reduce usage of the referenced items. 1243 1244 if depends: 1245 for depend in depends: 1246 usage[depend].remove(key) 1247 1248 del usage[key] 1249 1250 # General input/output. 1251 1252 def readfile(filename): 1253 1254 "Return the contents of 'filename'." 1255 1256 f = open(filename) 1257 try: 1258 return f.read() 1259 finally: 1260 f.close() 1261 1262 def writefile(filename, s): 1263 1264 "Write to 'filename' the string 's'." 1265 1266 f = open(filename, "w") 1267 try: 1268 f.write(s) 1269 finally: 1270 f.close() 1271 1272 # General encoding. 1273 1274 def sorted_output(x): 1275 1276 "Sort sequence 'x' and return a string with commas separating the values." 1277 1278 x = map(str, x) 1279 x.sort() 1280 return ", ".join(x) 1281 1282 def get_string_details(literals, encoding): 1283 1284 """ 1285 Determine whether 'literals' represent Unicode strings or byte strings, 1286 using 'encoding' to reproduce byte sequences. 1287 1288 Each literal is the full program representation including prefix and quotes 1289 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1290 string needs to be translated back to its original encoding. 1291 1292 Return a single encoded literal value, a type name, and the original 1293 encoding as a tuple. 1294 """ 1295 1296 typename = "unicode" 1297 1298 l = [] 1299 1300 for s in literals: 1301 out, _typename = get_literal_details(s) 1302 if _typename == "str": 1303 typename = "str" 1304 l.append(out) 1305 1306 out = "".join(l) 1307 1308 # For Unicode values, convert to the UTF-8 program representation. 1309 1310 if typename == "unicode": 1311 return out.encode("utf-8"), typename, encoding 1312 1313 # For byte string values, convert back to the original encoding. 1314 1315 else: 1316 return out.encode(encoding), typename, encoding 1317 1318 def get_literal_details(s): 1319 1320 """ 1321 Determine whether 's' represents a Unicode string or a byte string, where 1322 's' contains the full program representation of a literal including prefix 1323 and quotes, recoded by the parser to UTF-8. 1324 1325 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1326 and byte or Unicode values starting with <backslash><octal digit> or 1327 <backslash>x. 1328 1329 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1330 to be considered as Unicode values. Otherwise, they produce byte values and 1331 cause unprefixed strings to be considered as byte strings. 1332 1333 Literals prefixed with "r" do not have their backslash-encoded values 1334 converted unless also prefixed with "u", in which case only the above value 1335 formats are converted, not any of the other special sequences for things 1336 like newlines. 1337 1338 Return the literal value as a Unicode object together with the appropriate 1339 type name in a tuple. 1340 """ 1341 1342 l = [] 1343 1344 # Identify the quote character and use it to identify the prefix. 1345 1346 quote_type = s[-1] 1347 prefix_end = s.find(quote_type) 1348 prefix = s[:prefix_end].lower() 1349 1350 if prefix not in ("", "b", "br", "r", "u", "ur"): 1351 raise ValueError, "String literal does not have a supported prefix: %s" % s 1352 1353 if "b" in prefix: 1354 typename = "str" 1355 else: 1356 typename = "unicode" 1357 1358 # Identify triple quotes or single quotes. 1359 1360 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1361 quote = s[prefix_end:prefix_end+3] 1362 current = prefix_end + 3 1363 end = len(s) - 3 1364 else: 1365 quote = s[prefix_end] 1366 current = prefix_end + 1 1367 end = len(s) - 1 1368 1369 # Conversions of some quoted values. 1370 1371 searches = { 1372 "u" : (6, 16), 1373 "U" : (10, 16), 1374 "x" : (4, 16), 1375 } 1376 1377 octal_digits = map(str, range(0, 8)) 1378 1379 # Translations of some quoted values. 1380 1381 escaped = { 1382 "\\" : "\\", "'" : "'", '"' : '"', 1383 "a" : "\a", "b" : "\b", "f" : "\f", 1384 "n" : "\n", "r" : "\r", "t" : "\t", 1385 } 1386 1387 while current < end: 1388 1389 # Look for quoted values. 1390 1391 index = s.find("\\", current) 1392 if index == -1 or index + 1 == end: 1393 l.append(s[current:end]) 1394 break 1395 1396 # Add the preceding text. 1397 1398 l.append(s[current:index]) 1399 1400 # Handle quoted text. 1401 1402 term = s[index+1] 1403 1404 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1405 # produce Unicode values. 1406 1407 if typename == "unicode" and ( 1408 term in ("u", "U") or 1409 "u" in prefix and (term == "x" or term in octal_digits)): 1410 1411 needed, base = searches.get(term, (4, 8)) 1412 value = convert_quoted_value(s, index, needed, end, base, unichr) 1413 l.append(value) 1414 current = index + needed 1415 1416 # Add raw byte values, changing the string type. 1417 1418 elif "r" not in prefix and ( 1419 term == "x" or term in octal_digits): 1420 1421 needed, base = searches.get(term, (4, 8)) 1422 value = convert_quoted_value(s, index, needed, end, base, chr) 1423 l.append(value) 1424 typename = "str" 1425 current = index + needed 1426 1427 # Add other escaped values. 1428 1429 elif "r" not in prefix and escaped.has_key(term): 1430 l.append(escaped[term]) 1431 current = index + 2 1432 1433 # Add other text as found. 1434 1435 else: 1436 l.append(s[index:index+2]) 1437 current = index + 2 1438 1439 # Collect the components into a single Unicode object. Since the literal 1440 # text was already in UTF-8 form, interpret plain strings as UTF-8 1441 # sequences. 1442 1443 out = [] 1444 1445 for value in l: 1446 if isinstance(value, unicode): 1447 out.append(value) 1448 else: 1449 out.append(unicode(value, "utf-8")) 1450 1451 return "".join(out), typename 1452 1453 def convert_quoted_value(s, index, needed, end, base, fn): 1454 1455 """ 1456 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1457 positions, and with the given 'end' indicating the first position after the 1458 end of the actual string content. 1459 1460 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1461 to convert the value to an appropriate type. 1462 """ 1463 1464 s = s[index:min(index+needed, end)] 1465 1466 # Not a complete occurrence. 1467 1468 if len(s) < needed: 1469 return s 1470 1471 # Test for a well-formed value. 1472 1473 try: 1474 first = base == 8 and 1 or 2 1475 value = int(s[first:needed], base) 1476 except ValueError: 1477 return s 1478 else: 1479 return fn(value) 1480 1481 # Attribute chain decoding. 1482 1483 def get_attrnames(attrnames): 1484 1485 """ 1486 Split the qualified attribute chain 'attrnames' into its components, 1487 handling special attributes starting with "#" that indicate type 1488 conformance. 1489 """ 1490 1491 if attrnames.startswith("#"): 1492 return [attrnames] 1493 else: 1494 return attrnames.split(".") 1495 1496 def get_name_path(path, name): 1497 1498 "Return a suitable qualified name from the given 'path' and 'name'." 1499 1500 if "." in name: 1501 return name 1502 else: 1503 return "%s.%s" % (path, name) 1504 1505 # Usage-related functions. 1506 1507 def get_types_for_usage(attrnames, objects): 1508 1509 """ 1510 Identify the types that can support the given 'attrnames', using the 1511 given 'objects' as the catalogue of type details. 1512 """ 1513 1514 types = [] 1515 for name, _attrnames in objects.items(): 1516 if set(attrnames).issubset(_attrnames): 1517 types.append(name) 1518 return types 1519 1520 def get_invoked_attributes(usage): 1521 1522 "Obtain invoked attribute from the given 'usage'." 1523 1524 invoked = [] 1525 if usage: 1526 for attrname, invocation, assignment in usage: 1527 if invocation: 1528 invoked.append(attrname) 1529 return invoked 1530 1531 def get_assigned_attributes(usage): 1532 1533 "Obtain assigned attribute from the given 'usage'." 1534 1535 assigned = [] 1536 if usage: 1537 for attrname, invocation, assignment in usage: 1538 if assignment: 1539 assigned.append(attrname) 1540 return assigned 1541 1542 # Type and module functions. 1543 # NOTE: This makes assumptions about the __builtins__ structure. 1544 1545 def get_builtin_module(name): 1546 1547 "Return the module name containing the given type 'name'." 1548 1549 if name == "string": 1550 modname = "str" 1551 elif name == "utf8string": 1552 modname = "unicode" 1553 elif name == "NoneType": 1554 modname = "none" 1555 else: 1556 modname = name 1557 1558 return "__builtins__.%s" % modname 1559 1560 def get_builtin_type(name): 1561 1562 "Return the type name provided by the given Python value 'name'." 1563 1564 if name == "str": 1565 return "string" 1566 elif name == "unicode": 1567 return "utf8string" 1568 else: 1569 return name 1570 1571 def get_builtin_class(name): 1572 1573 "Return the full name of the built-in class having the given 'name'." 1574 1575 typename = get_builtin_type(name) 1576 module = get_builtin_module(typename) 1577 return "%s.%s" % (module, typename) 1578 1579 # Useful data. 1580 1581 predefined_constants = "False", "None", "NotImplemented", "True" 1582 1583 unary_operator_functions = { 1584 1585 # Unary operations. 1586 1587 "Invert" : "invert", 1588 "UnaryAdd" : "pos", 1589 "UnarySub" : "neg", 1590 } 1591 1592 operator_functions = { 1593 1594 # Fundamental operations. 1595 1596 "is" : "is_", 1597 "is not" : "is_not", 1598 1599 # Binary operations. 1600 1601 "in" : "in_", 1602 "not in" : "not_in", 1603 "Add" : "add", 1604 "Bitand" : "and_", 1605 "Bitor" : "or_", 1606 "Bitxor" : "xor", 1607 "Div" : "div", 1608 "FloorDiv" : "floordiv", 1609 "LeftShift" : "lshift", 1610 "Mod" : "mod", 1611 "Mul" : "mul", 1612 "Power" : "pow", 1613 "RightShift" : "rshift", 1614 "Sub" : "sub", 1615 1616 # Augmented assignment. 1617 1618 "+=" : "iadd", 1619 "-=" : "isub", 1620 "*=" : "imul", 1621 "/=" : "idiv", 1622 "//=" : "ifloordiv", 1623 "%=" : "imod", 1624 "**=" : "ipow", 1625 "<<=" : "ilshift", 1626 ">>=" : "irshift", 1627 "&=" : "iand", 1628 "^=" : "ixor", 1629 "|=" : "ior", 1630 1631 # Comparisons. 1632 1633 "==" : "eq", 1634 "!=" : "ne", 1635 "<" : "lt", 1636 "<=" : "le", 1637 ">=" : "ge", 1638 ">" : "gt", 1639 } 1640 1641 operator_functions.update(unary_operator_functions) 1642 1643 # vim: tabstop=4 expandtab shiftwidth=4