1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 # Namespace management. 190 191 def enter_namespace(self, name): 192 193 "Enter the namespace having the given 'name'." 194 195 self.namespace_path.append(name) 196 197 def exit_namespace(self): 198 199 "Exit the current namespace." 200 201 self.namespace_path.pop() 202 203 # Constant reference naming. 204 205 def get_constant_name(self, value, value_type, encoding=None): 206 207 """ 208 Add a new constant to the current namespace for 'value' with 209 'value_type'. 210 """ 211 212 path = self.get_namespace_path() 213 init_item(self.constants, path, dict) 214 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 215 216 # Literal reference naming. 217 218 def get_literal_name(self): 219 220 "Add a new literal to the current namespace." 221 222 path = self.get_namespace_path() 223 init_item(self.literals, path, lambda: 0) 224 return "$C%d" % self.literals[path] 225 226 def next_literal(self): 227 self.literals[self.get_namespace_path()] += 1 228 229 # Temporary variable naming. 230 231 def get_temporary_name(self): 232 path = self.get_namespace_path() 233 init_item(self.temp, path, lambda: 0) 234 return "$t%d" % self.temp[path] 235 236 def next_temporary(self): 237 self.temp[self.get_namespace_path()] += 1 238 239 # Arbitrary function naming. 240 241 def get_lambda_name(self): 242 path = self.get_namespace_path() 243 init_item(self.lambdas, path, lambda: 0) 244 name = "$l%d" % self.lambdas[path] 245 self.lambdas[path] += 1 246 return name 247 248 def reset_lambdas(self): 249 self.lambdas = {} 250 251 # Constant and literal recording. 252 253 def get_constant_value(self, value, literals=None): 254 255 """ 256 Encode the 'value' if appropriate, returning a value, a typename and any 257 encoding. 258 """ 259 260 if isinstance(value, unicode): 261 return value.encode("utf-8"), "unicode", self.encoding 262 263 # Attempt to convert plain strings to text. 264 265 elif isinstance(value, str) and self.encoding: 266 try: 267 return get_string_details(literals, self.encoding) 268 except UnicodeDecodeError: 269 pass 270 271 return value, value.__class__.__name__, None 272 273 def get_constant_reference(self, ref, value, encoding=None): 274 275 """ 276 Return a constant reference for the given 'ref' type and 'value', with 277 the optional 'encoding' applying to text values. 278 """ 279 280 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 281 282 # Return a reference for the constant. 283 284 objpath = self.get_object_path(constant_name) 285 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 286 287 # Record the value and type for the constant. 288 289 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 290 return name_ref 291 292 def reserve_constant(self, objpath, value, origin, encoding=None): 293 294 """ 295 Reserve a constant within 'objpath' with the given 'value' and having a 296 type with the given 'origin', with the optional 'encoding' applying to 297 text values. 298 """ 299 300 constant_name = self.get_constant_name(value, origin) 301 objpath = self.get_object_path(constant_name) 302 self._reserve_constant(objpath, value, origin, encoding) 303 304 def _reserve_constant(self, objpath, value, origin, encoding): 305 306 """ 307 Store a constant for 'objpath' with the given 'value' and 'origin', with 308 the optional 'encoding' applying to text values. 309 """ 310 311 self.constant_values[objpath] = value, origin, encoding 312 313 def get_literal_reference(self, name, ref, items, cls): 314 315 """ 316 Return a literal reference for the given type 'name', literal 'ref', 317 node 'items' and employing the given 'cls' as the class of the returned 318 reference object. 319 """ 320 321 # Construct an invocation using the items as arguments. 322 323 typename = "$L%s" % name 324 325 invocation = compiler.ast.CallFunc( 326 compiler.ast.Name(typename), 327 items 328 ) 329 330 # Get a name for the actual literal. 331 332 instname = self.get_literal_name() 333 self.next_literal() 334 335 # Record the type for the literal. 336 337 objpath = self.get_object_path(instname) 338 self.literal_types[objpath] = ref.get_origin() 339 340 # Return a wrapper for the invocation exposing the items. 341 342 return cls( 343 instname, 344 ref.instance_of(), 345 self.process_structure_node(invocation), 346 invocation.args 347 ) 348 349 # Node handling. 350 351 def process_structure(self, node): 352 353 """ 354 Within the given 'node', process the program structure. 355 356 During inspection, this will process global declarations, adjusting the 357 module namespace, and import statements, building a module dependency 358 hierarchy. 359 360 During translation, this will consult deduced program information and 361 output translated code. 362 """ 363 364 l = [] 365 for n in node.getChildNodes(): 366 l.append(self.process_structure_node(n)) 367 return l 368 369 def process_augassign_node(self, n): 370 371 "Process the given augmented assignment node 'n'." 372 373 op = operator_functions[n.op] 374 375 if isinstance(n.node, compiler.ast.Getattr): 376 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 377 elif isinstance(n.node, compiler.ast.Name): 378 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 379 else: 380 target = n.node 381 382 assignment = compiler.ast.Assign( 383 [target], 384 compiler.ast.CallFunc( 385 compiler.ast.Name("$op%s" % op), 386 [n.node, n.expr])) 387 388 return self.process_structure_node(assignment) 389 390 def process_assignment_for_object(self, original_name, source): 391 392 """ 393 Return an assignment operation making 'original_name' refer to the given 394 'source'. 395 """ 396 397 assignment = compiler.ast.Assign( 398 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 399 source 400 ) 401 402 return self.process_structure_node(assignment) 403 404 def process_assignment_node_items(self, n, expr): 405 406 """ 407 Process the given assignment node 'n' whose children are to be assigned 408 items of 'expr'. 409 """ 410 411 name_ref = self.process_structure_node(expr) 412 413 # Either unpack the items and present them directly to each assignment 414 # node. 415 416 if isinstance(name_ref, LiteralSequenceRef) and \ 417 self.process_literal_sequence_items(n, name_ref): 418 419 pass 420 421 # Or have the assignment nodes access each item via the sequence API. 422 423 else: 424 self.process_assignment_node_items_by_position(n, expr, name_ref) 425 426 def process_assignment_node_items_by_position(self, n, expr, name_ref): 427 428 """ 429 Process the given sequence assignment node 'n', converting the node to 430 the separate assignment of each target using positional access on a 431 temporary variable representing the sequence. Use 'expr' as the assigned 432 value and 'name_ref' as the reference providing any existing temporary 433 variable. 434 """ 435 436 assignments = [] 437 438 # Employ existing names to access the sequence. 439 # Literal sequences do not provide names of accessible objects. 440 441 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 442 temp = name_ref.name 443 444 # For other expressions, create a temporary name to reference the items. 445 446 else: 447 temp = self.get_temporary_name() 448 self.next_temporary() 449 450 assignments.append( 451 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 452 ) 453 454 # Assign the items to the target nodes. 455 456 for i, node in enumerate(n.nodes): 457 assignments.append( 458 compiler.ast.Assign([node], compiler.ast.Subscript( 459 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 460 ) 461 462 return self.process_structure_node(compiler.ast.Stmt(assignments)) 463 464 def process_literal_sequence_items(self, n, name_ref): 465 466 """ 467 Process the given assignment node 'n', obtaining from the given 468 'name_ref' the items to be assigned to the assignment targets. 469 470 Return whether this method was able to process the assignment node as 471 a sequence of direct assignments. 472 """ 473 474 if len(n.nodes) == len(name_ref.items): 475 assigned_names, count = get_names_from_nodes(n.nodes) 476 accessed_names, _count = get_names_from_nodes(name_ref.items) 477 478 # Only assign directly between items if all assigned names are 479 # plain names (not attribute assignments), and if the assigned names 480 # do not appear in the accessed names. 481 482 if len(assigned_names) == count and \ 483 not assigned_names.intersection(accessed_names): 484 485 for node, item in zip(n.nodes, name_ref.items): 486 self.process_assignment_node(node, item) 487 488 return True 489 490 # Otherwise, use the position-based mechanism to obtain values. 491 492 else: 493 return False 494 else: 495 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 496 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 497 498 def process_compare_node(self, n): 499 500 """ 501 Process the given comparison node 'n', converting an operator sequence 502 from... 503 504 <expr1> <op1> <expr2> <op2> <expr3> 505 506 ...to... 507 508 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 509 """ 510 511 invocations = [] 512 last = n.expr 513 514 for op, op_node in n.ops: 515 op = operator_functions.get(op) 516 517 invocations.append(compiler.ast.CallFunc( 518 compiler.ast.Name("$op%s" % op), 519 [last, op_node])) 520 521 last = op_node 522 523 if len(invocations) > 1: 524 result = compiler.ast.And(invocations) 525 else: 526 result = invocations[0] 527 528 return self.process_structure_node(result) 529 530 def process_dict_node(self, node): 531 532 """ 533 Process the given dictionary 'node', returning a list of (key, value) 534 tuples. 535 """ 536 537 l = [] 538 for key, value in node.items: 539 l.append(( 540 self.process_structure_node(key), 541 self.process_structure_node(value))) 542 return l 543 544 def process_for_node(self, n): 545 546 """ 547 Generate attribute accesses for {n.list}.__iter__ and the next method on 548 the iterator, producing a replacement node for the original. 549 """ 550 551 t0 = self.get_temporary_name() 552 self.next_temporary() 553 t1 = self.get_temporary_name() 554 self.next_temporary() 555 556 node = compiler.ast.Stmt([ 557 558 # <t0> = {n.list} 559 # <t1> = <t0>.__iter__() 560 561 compiler.ast.Assign( 562 [compiler.ast.AssName(t0, "OP_ASSIGN")], 563 n.list), 564 565 compiler.ast.Assign( 566 [compiler.ast.AssName(t1, "OP_ASSIGN")], 567 compiler.ast.CallFunc( 568 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 569 [])), 570 571 # try: 572 # while True: 573 # <var>... = <t1>.next() 574 # ... 575 # except StopIteration: 576 # pass 577 578 compiler.ast.TryExcept( 579 compiler.ast.While( 580 compiler.ast.Name("True"), 581 compiler.ast.Stmt([ 582 compiler.ast.Assign( 583 [n.assign], 584 compiler.ast.CallFunc( 585 compiler.ast.Getattr(compiler.ast.Name(t1), "next"), 586 [] 587 )), 588 n.body]), 589 None), 590 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 591 None) 592 ]) 593 594 self.process_structure_node(node) 595 596 def process_literal_sequence_node(self, n, name, ref, cls): 597 598 """ 599 Process the given literal sequence node 'n' as a function invocation, 600 with 'name' indicating the type of the sequence, and 'ref' being a 601 reference to the type. The 'cls' is used to instantiate a suitable name 602 reference. 603 """ 604 605 if name == "dict": 606 items = [] 607 for key, value in n.items: 608 items.append(compiler.ast.Tuple([key, value])) 609 else: # name in ("list", "tuple"): 610 items = n.nodes 611 612 return self.get_literal_reference(name, ref, items, cls) 613 614 def process_operator_node(self, n): 615 616 """ 617 Process the given operator node 'n' as an operator function invocation. 618 """ 619 620 opname = n.__class__.__name__ 621 operands = n.getChildNodes() 622 623 # Convert a unary operation to an invocation. 624 625 op = unary_operator_functions.get(opname) 626 627 if op: 628 invocation = compiler.ast.CallFunc( 629 compiler.ast.Name("$op%s" % op), 630 [operands[0]] 631 ) 632 633 # Convert a single operator with a list of operands to a combination of 634 # pairwise operations. 635 636 else: 637 op = operator_functions[opname] 638 invocation = self._process_operator_node(op, operands) 639 640 return self.process_structure_node(invocation) 641 642 def _process_operator_node(self, op, operands): 643 644 """ 645 Process the given 'op', being an operator function, together with the 646 supplied 'operands', returning either a single remaining operand or an 647 invocation combining the operands. 648 """ 649 650 remaining = operands[1:] 651 if not remaining: 652 return operands[0] 653 654 return compiler.ast.CallFunc( 655 compiler.ast.Name("$op%s" % op), 656 [operands[0], self._process_operator_node(op, remaining)] 657 ) 658 659 def process_print_node(self, n): 660 661 """ 662 Process the given print node 'n' as an invocation on a stream of the 663 form... 664 665 $print(dest, args, nl) 666 667 The special function name will be translated elsewhere. 668 """ 669 670 nl = isinstance(n, compiler.ast.Printnl) 671 invocation = compiler.ast.CallFunc( 672 compiler.ast.Name("$print"), 673 [n.dest or compiler.ast.Name("None"), 674 compiler.ast.List(list(n.nodes)), 675 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 676 ) 677 return self.process_structure_node(invocation) 678 679 def process_slice_node(self, n, expr=None): 680 681 """ 682 Process the given slice node 'n' as a method invocation. 683 """ 684 685 if n.flags == "OP_ASSIGN": op = "__setslice__" 686 elif n.flags == "OP_DELETE": op = "__delslice__" 687 else: op = "__getslice__" 688 689 invocation = compiler.ast.CallFunc( 690 compiler.ast.Getattr(n.expr, op), 691 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 692 (expr and [expr] or []) 693 ) 694 695 # Fix parse tree structure. 696 697 if op == "__delslice__": 698 invocation = compiler.ast.Discard(invocation) 699 700 return self.process_structure_node(invocation) 701 702 def process_sliceobj_node(self, n): 703 704 """ 705 Process the given slice object node 'n' as a slice constructor. 706 """ 707 708 op = "slice" 709 invocation = compiler.ast.CallFunc( 710 compiler.ast.Name("$op%s" % op), 711 n.nodes 712 ) 713 return self.process_structure_node(invocation) 714 715 def process_subscript_node(self, n, expr=None): 716 717 """ 718 Process the given subscript node 'n' as a method invocation. 719 """ 720 721 if n.flags == "OP_ASSIGN": op = "__setitem__" 722 elif n.flags == "OP_DELETE": op = "__delitem__" 723 else: op = "__getitem__" 724 725 invocation = compiler.ast.CallFunc( 726 compiler.ast.Getattr(n.expr, op), 727 list(n.subs) + (expr and [expr] or []) 728 ) 729 730 # Fix parse tree structure. 731 732 if op == "__delitem__": 733 invocation = compiler.ast.Discard(invocation) 734 735 return self.process_structure_node(invocation) 736 737 def process_attribute_chain(self, n): 738 739 """ 740 Process the given attribute access node 'n'. Return a reference 741 describing the expression. 742 """ 743 744 # AssAttr/Getattr are nested with the outermost access being the last 745 # access in any chain. 746 747 self.attrs.insert(0, n.attrname) 748 attrs = self.attrs 749 750 # Break attribute chains where non-access nodes are found. 751 752 if not self.have_access_expression(n): 753 self.reset_attribute_chain() 754 755 # Descend into the expression, extending backwards any existing chain, 756 # or building another for the expression. 757 758 name_ref = self.process_structure_node(n.expr) 759 760 # Restore chain information applying to this node. 761 762 if not self.have_access_expression(n): 763 self.restore_attribute_chain(attrs) 764 765 # Return immediately if the expression was another access and thus a 766 # continuation backwards along the chain. The above processing will 767 # have followed the chain all the way to its conclusion. 768 769 if self.have_access_expression(n): 770 del self.attrs[0] 771 772 return name_ref 773 774 # Attribute chain handling. 775 776 def reset_attribute_chain(self): 777 778 "Reset the attribute chain for a subexpression of an attribute access." 779 780 self.attrs = [] 781 self.chain_assignment.append(self.in_assignment) 782 self.chain_invocation.append(self.in_invocation) 783 self.in_assignment = None 784 self.in_invocation = None 785 786 def restore_attribute_chain(self, attrs): 787 788 "Restore the attribute chain for an attribute access." 789 790 self.attrs = attrs 791 self.in_assignment = self.chain_assignment.pop() 792 self.in_invocation = self.chain_invocation.pop() 793 794 def have_access_expression(self, node): 795 796 "Return whether the expression associated with 'node' is Getattr." 797 798 return isinstance(node.expr, compiler.ast.Getattr) 799 800 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 801 802 """ 803 Return the name to be used for attribute usage observations involving 804 the given 'name' in the current namespace. 805 806 If the name is being used outside a function, and if 'name_ref' is 807 given and indicates a global or if 'is_global' is specified as a true 808 value, a path featuring the name in the global namespace is returned. 809 Otherwise, a path computed using the current namespace and the given 810 name is returned. 811 812 The intention of this method is to provide a suitably-qualified name 813 that can be tracked across namespaces. Where globals are being 814 referenced in class namespaces, they should be referenced using their 815 path within the module, not using a path within each class. 816 817 It may not be possible to identify a global within a function at the 818 time of inspection (since a global may appear later in a file). 819 Consequently, globals are identified by their local name rather than 820 their module-qualified path. 821 """ 822 823 # For functions, use the appropriate local names. 824 825 if self.in_function: 826 return name 827 828 # For global names outside functions, use a global name. 829 830 elif is_global or name_ref and name_ref.is_global_name(): 831 return self.get_global_path(name) 832 833 # Otherwise, establish a name in the current namespace. 834 835 else: 836 return self.get_object_path(name) 837 838 def get_path_for_access(self): 839 840 "Outside functions, register accesses at the module level." 841 842 if not self.in_function: 843 return self.name 844 else: 845 return self.get_namespace_path() 846 847 def get_module_name(self, node): 848 849 """ 850 Using the given From 'node' in this module, calculate any relative import 851 information, returning a tuple containing a module to import along with any 852 names to import based on the node's name information. 853 854 Where the returned module is given as None, whole module imports should 855 be performed for the returned modules using the returned names. 856 """ 857 858 # Absolute import. 859 860 if node.level == 0: 861 return node.modname, node.names 862 863 # Relative to an ancestor of this module. 864 865 else: 866 path = self.name.split(".") 867 level = node.level 868 869 # Relative imports treat package roots as submodules. 870 871 if split(self.filename)[-1] == "__init__.py": 872 level -= 1 873 874 if level > len(path): 875 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 876 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 877 878 basename = ".".join(path[:len(path)-level]) 879 880 # Name imports from a module. 881 882 if node.modname: 883 return "%s.%s" % (basename, node.modname), node.names 884 885 # Relative whole module imports. 886 887 else: 888 return basename, node.names 889 890 def get_argnames(args): 891 892 """ 893 Return a list of all names provided by 'args'. Since tuples may be 894 employed, the arguments are traversed depth-first. 895 """ 896 897 l = [] 898 for arg in args: 899 if isinstance(arg, tuple): 900 l += get_argnames(arg) 901 else: 902 l.append(arg) 903 return l 904 905 def get_names_from_nodes(nodes): 906 907 """ 908 Return the names employed in the given 'nodes' along with the number of 909 nodes excluding sequences. 910 """ 911 912 names = set() 913 count = 0 914 915 for node in nodes: 916 917 # Add names and count them. 918 919 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 920 names.add(node.name) 921 count += 1 922 923 # Add names from sequences and incorporate their counts. 924 925 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 926 compiler.ast.List, compiler.ast.Set, 927 compiler.ast.Tuple)): 928 _names, _count = get_names_from_nodes(node.nodes) 929 names.update(_names) 930 count += _count 931 932 # Count non-name, non-sequence nodes. 933 934 else: 935 count += 1 936 937 return names, count 938 939 # Location classes. 940 941 class Location: 942 943 "A generic program location." 944 945 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 946 self.path = path 947 self.name = name 948 self.attrnames = attrnames 949 self.version = version 950 self.access_number = access_number 951 952 def __repr__(self): 953 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 954 955 def as_tuple(self): 956 return (self.path, self.name, self.attrnames, self.version, self.access_number) 957 958 def __hash__(self): 959 return hash(self.as_tuple()) 960 961 def __eq__(self, other): 962 return self.as_tuple() == other.as_tuple() 963 964 def __cmp__(self, other): 965 return cmp(self.as_tuple(), other.as_tuple()) 966 967 def get_attrname(self): 968 969 """ 970 Extract the first attribute from the attribute names employed in this 971 location. 972 """ 973 974 attrnames = self.attrnames 975 if not attrnames: 976 return attrnames 977 return get_attrnames(attrnames)[0] 978 979 class AccessLocation(Location): 980 981 "A specialised access location." 982 983 def __init__(self, path, name, attrnames, access_number): 984 985 """ 986 Initialise an access location featuring 'path', 'name', 'attrnames' and 987 'access_number'. 988 """ 989 990 Location.__init__(self, path, name, attrnames, None, access_number) 991 992 def __repr__(self): 993 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 994 995 # Result classes. 996 997 class InstructionSequence: 998 999 "A generic sequence of instructions." 1000 1001 def __init__(self, instructions): 1002 self.instructions = instructions 1003 1004 def get_value_instruction(self): 1005 return self.instructions[-1] 1006 1007 def get_init_instructions(self): 1008 return self.instructions[:-1] 1009 1010 # Dictionary utilities. 1011 1012 def init_item(d, key, fn): 1013 1014 """ 1015 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1016 value where no entry already exists. 1017 """ 1018 1019 if not d.has_key(key): 1020 d[key] = fn() 1021 return d[key] 1022 1023 def dict_for_keys(d, keys): 1024 1025 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1026 1027 nd = {} 1028 for key in keys: 1029 if d.has_key(key): 1030 nd[key] = d[key] 1031 return nd 1032 1033 def make_key(s): 1034 1035 "Make sequence 's' into a tuple-based key, first sorting its contents." 1036 1037 l = list(s) 1038 l.sort() 1039 return tuple(l) 1040 1041 def add_counter_item(d, key): 1042 1043 """ 1044 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1045 maintaining a mapping of keys to their order of insertion. 1046 """ 1047 1048 if not d.has_key(key): 1049 d[key] = len(d.keys()) 1050 return d[key] 1051 1052 def remove_items(d1, d2): 1053 1054 "Remove from 'd1' all items from 'd2'." 1055 1056 for key in d2.keys(): 1057 if d1.has_key(key): 1058 del d1[key] 1059 1060 # Set utilities. 1061 1062 def first(s): 1063 return list(s)[0] 1064 1065 def same(s1, s2): 1066 return set(s1) == set(s2) 1067 1068 def order_dependencies(all_depends): 1069 1070 """ 1071 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1072 has the form "A depends on B, C...". The result will order A, B, C, and so 1073 on. 1074 """ 1075 1076 usage = init_reverse_dependencies(all_depends) 1077 1078 # Produce an ordering by obtaining exposed items (required by items already 1079 # processed) and putting them at the start of the list. 1080 1081 ordered = [] 1082 1083 while usage: 1084 have_next = False 1085 1086 for key, n in usage.items(): 1087 1088 # Add items needed by no other items to the ordering. 1089 1090 if not n: 1091 remove_dependency(key, all_depends, usage, ordered) 1092 have_next = True 1093 1094 if not have_next: 1095 raise ValueError, usage 1096 1097 return ordered 1098 1099 def order_dependencies_partial(all_depends): 1100 1101 """ 1102 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1103 has the form "A depends on B, C...". The result will order A, B, C, and so 1104 on. Where cycles exist, they will be broken and a partial ordering returned. 1105 """ 1106 1107 usage = init_reverse_dependencies(all_depends) 1108 1109 # Duplicate the dependencies for subsequent modification. 1110 1111 new_depends = {} 1112 for key, values in all_depends.items(): 1113 new_depends[key] = set(values) 1114 1115 all_depends = new_depends 1116 1117 # Produce an ordering by obtaining exposed items (required by items already 1118 # processed) and putting them at the start of the list. 1119 1120 ordered = [] 1121 1122 while usage: 1123 least = None 1124 least_key = None 1125 1126 for key, n in usage.items(): 1127 1128 # Add items needed by no other items to the ordering. 1129 1130 if not n: 1131 remove_dependency(key, all_depends, usage, ordered) 1132 least = 0 1133 1134 # When breaking cycles, note the least used items. 1135 1136 elif least is None or len(n) < least: 1137 least_key = key 1138 least = len(n) 1139 1140 if least: 1141 transfer_dependencies(least_key, all_depends, usage, ordered) 1142 1143 return ordered 1144 1145 def init_reverse_dependencies(all_depends): 1146 1147 """ 1148 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1149 record the reverse dependencies, making a mapping of the form 1150 "B is needed by A", "C is needed by A", and so on. 1151 """ 1152 1153 usage = {} 1154 1155 # Record path-based dependencies. 1156 1157 for key in all_depends.keys(): 1158 usage[key] = set() 1159 1160 for key, depends in all_depends.items(): 1161 for depend in depends: 1162 init_item(usage, depend, set) 1163 usage[depend].add(key) 1164 1165 return usage 1166 1167 def transfer_dependencies(key, all_depends, usage, ordered): 1168 1169 """ 1170 Transfer items needed by 'key' to those items needing 'key', found using 1171 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1172 collection of dependencies. 1173 1174 If "A is needed by X" and "B is needed by A", then transferring items needed 1175 by A will cause "B is needed by X" to be recorded as a consequence. 1176 1177 Transferring items also needs to occur in the reverse mapping, so that 1178 "A needs B" and "X needs A", then the consequence must be recorded as 1179 "X needs B". 1180 """ 1181 1182 ordered.insert(0, key) 1183 1184 needing = usage[key] # A is needed by X 1185 needed = all_depends.get(key) # A needs B 1186 1187 if needing: 1188 for depend in needing: 1189 l = all_depends.get(depend) 1190 if not l: 1191 continue 1192 1193 l.remove(key) # X needs (A) 1194 1195 if needed: 1196 l.update(needed) # X needs B... 1197 1198 # Prevent self references. 1199 1200 if depend in needed: 1201 l.remove(depend) 1202 1203 if needed: 1204 for depend in needed: 1205 l = usage.get(depend) 1206 if not l: 1207 continue 1208 1209 l.remove(key) # B is needed by (A) 1210 l.update(needing) # B is needed by X... 1211 1212 # Prevent self references. 1213 1214 if depend in needing: 1215 l.remove(depend) 1216 1217 if needed: 1218 del all_depends[key] 1219 del usage[key] 1220 1221 def remove_dependency(key, all_depends, usage, ordered): 1222 1223 """ 1224 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1225 'ordered' collection of dependencies. 1226 1227 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1228 upon removing A from 'usage', the outcome is that all keys needing A will 1229 have A removed from their 'usage' records. 1230 1231 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1232 as a consequence. 1233 """ 1234 1235 ordered.insert(0, key) 1236 1237 depends = all_depends.get(key) 1238 1239 # Reduce usage of the referenced items. 1240 1241 if depends: 1242 for depend in depends: 1243 usage[depend].remove(key) 1244 1245 del usage[key] 1246 1247 # General input/output. 1248 1249 def readfile(filename): 1250 1251 "Return the contents of 'filename'." 1252 1253 f = open(filename) 1254 try: 1255 return f.read() 1256 finally: 1257 f.close() 1258 1259 def writefile(filename, s): 1260 1261 "Write to 'filename' the string 's'." 1262 1263 f = open(filename, "w") 1264 try: 1265 f.write(s) 1266 finally: 1267 f.close() 1268 1269 # General encoding. 1270 1271 def sorted_output(x): 1272 1273 "Sort sequence 'x' and return a string with commas separating the values." 1274 1275 x = map(str, x) 1276 x.sort() 1277 return ", ".join(x) 1278 1279 def get_string_details(literals, encoding): 1280 1281 """ 1282 Determine whether 'literals' represent Unicode strings or byte strings, 1283 using 'encoding' to reproduce byte sequences. 1284 1285 Each literal is the full program representation including prefix and quotes 1286 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1287 string needs to be translated back to its original encoding. 1288 1289 Return a single encoded literal value, a type name, and the original 1290 encoding as a tuple. 1291 """ 1292 1293 typename = "unicode" 1294 1295 l = [] 1296 1297 for s in literals: 1298 out, _typename = get_literal_details(s) 1299 if _typename == "str": 1300 typename = "str" 1301 l.append(out) 1302 1303 out = "".join(l) 1304 1305 # For Unicode values, convert to the UTF-8 program representation. 1306 1307 if typename == "unicode": 1308 return out.encode("utf-8"), typename, encoding 1309 1310 # For byte string values, convert back to the original encoding. 1311 1312 else: 1313 return out.encode(encoding), typename, encoding 1314 1315 def get_literal_details(s): 1316 1317 """ 1318 Determine whether 's' represents a Unicode string or a byte string, where 1319 's' contains the full program representation of a literal including prefix 1320 and quotes, recoded by the parser to UTF-8. 1321 1322 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1323 and byte or Unicode values starting with <backslash><octal digit> or 1324 <backslash>x. 1325 1326 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1327 to be considered as Unicode values. Otherwise, they produce byte values and 1328 cause unprefixed strings to be considered as byte strings. 1329 1330 Literals prefixed with "r" do not have their backslash-encoded values 1331 converted unless also prefixed with "u", in which case only the above value 1332 formats are converted, not any of the other special sequences for things 1333 like newlines. 1334 1335 Return the literal value as a Unicode object together with the appropriate 1336 type name in a tuple. 1337 """ 1338 1339 l = [] 1340 1341 # Identify the quote character and use it to identify the prefix. 1342 1343 quote_type = s[-1] 1344 prefix_end = s.find(quote_type) 1345 prefix = s[:prefix_end].lower() 1346 1347 if prefix not in ("", "b", "br", "r", "u", "ur"): 1348 raise ValueError, "String literal does not have a supported prefix: %s" % s 1349 1350 if "b" in prefix: 1351 typename = "str" 1352 else: 1353 typename = "unicode" 1354 1355 # Identify triple quotes or single quotes. 1356 1357 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1358 quote = s[prefix_end:prefix_end+3] 1359 current = prefix_end + 3 1360 end = len(s) - 3 1361 else: 1362 quote = s[prefix_end] 1363 current = prefix_end + 1 1364 end = len(s) - 1 1365 1366 # Conversions of some quoted values. 1367 1368 searches = { 1369 "u" : (6, 16), 1370 "U" : (10, 16), 1371 "x" : (4, 16), 1372 } 1373 1374 octal_digits = map(str, range(0, 8)) 1375 1376 # Translations of some quoted values. 1377 1378 escaped = { 1379 "\\" : "\\", "'" : "'", '"' : '"', 1380 "a" : "\a", "b" : "\b", "f" : "\f", 1381 "n" : "\n", "r" : "\r", "t" : "\t", 1382 } 1383 1384 while current < end: 1385 1386 # Look for quoted values. 1387 1388 index = s.find("\\", current) 1389 if index == -1 or index + 1 == end: 1390 l.append(s[current:end]) 1391 break 1392 1393 # Add the preceding text. 1394 1395 l.append(s[current:index]) 1396 1397 # Handle quoted text. 1398 1399 term = s[index+1] 1400 1401 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1402 # produce Unicode values. 1403 1404 if typename == "unicode" and ( 1405 term in ("u", "U") or 1406 "u" in prefix and (term == "x" or term in octal_digits)): 1407 1408 needed, base = searches.get(term, (4, 8)) 1409 value = convert_quoted_value(s, index, needed, end, base, unichr) 1410 l.append(value) 1411 current = index + needed 1412 1413 # Add raw byte values, changing the string type. 1414 1415 elif "r" not in prefix and ( 1416 term == "x" or term in octal_digits): 1417 1418 needed, base = searches.get(term, (4, 8)) 1419 value = convert_quoted_value(s, index, needed, end, base, chr) 1420 l.append(value) 1421 typename = "str" 1422 current = index + needed 1423 1424 # Add other escaped values. 1425 1426 elif "r" not in prefix and escaped.has_key(term): 1427 l.append(escaped[term]) 1428 current = index + 2 1429 1430 # Add other text as found. 1431 1432 else: 1433 l.append(s[index:index+2]) 1434 current = index + 2 1435 1436 # Collect the components into a single Unicode object. Since the literal 1437 # text was already in UTF-8 form, interpret plain strings as UTF-8 1438 # sequences. 1439 1440 out = [] 1441 1442 for value in l: 1443 if isinstance(value, unicode): 1444 out.append(value) 1445 else: 1446 out.append(unicode(value, "utf-8")) 1447 1448 return "".join(out), typename 1449 1450 def convert_quoted_value(s, index, needed, end, base, fn): 1451 1452 """ 1453 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1454 positions, and with the given 'end' indicating the first position after the 1455 end of the actual string content. 1456 1457 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1458 to convert the value to an appropriate type. 1459 """ 1460 1461 s = s[index:min(index+needed, end)] 1462 1463 # Not a complete occurrence. 1464 1465 if len(s) < needed: 1466 return s 1467 1468 # Test for a well-formed value. 1469 1470 try: 1471 first = base == 8 and 1 or 2 1472 value = int(s[first:needed], base) 1473 except ValueError: 1474 return s 1475 else: 1476 return fn(value) 1477 1478 # Attribute chain decoding. 1479 1480 def get_attrnames(attrnames): 1481 1482 """ 1483 Split the qualified attribute chain 'attrnames' into its components, 1484 handling special attributes starting with "#" that indicate type 1485 conformance. 1486 """ 1487 1488 if attrnames.startswith("#"): 1489 return [attrnames] 1490 else: 1491 return attrnames.split(".") 1492 1493 def get_name_path(path, name): 1494 1495 "Return a suitable qualified name from the given 'path' and 'name'." 1496 1497 if "." in name: 1498 return name 1499 else: 1500 return "%s.%s" % (path, name) 1501 1502 # Usage-related functions. 1503 1504 def get_types_for_usage(attrnames, objects): 1505 1506 """ 1507 Identify the types that can support the given 'attrnames', using the 1508 given 'objects' as the catalogue of type details. 1509 """ 1510 1511 types = [] 1512 for name, _attrnames in objects.items(): 1513 if set(attrnames).issubset(_attrnames): 1514 types.append(name) 1515 return types 1516 1517 def get_invoked_attributes(usage): 1518 1519 "Obtain invoked attribute from the given 'usage'." 1520 1521 invoked = [] 1522 if usage: 1523 for attrname, invocation, assignment in usage: 1524 if invocation: 1525 invoked.append(attrname) 1526 return invoked 1527 1528 def get_assigned_attributes(usage): 1529 1530 "Obtain assigned attribute from the given 'usage'." 1531 1532 assigned = [] 1533 if usage: 1534 for attrname, invocation, assignment in usage: 1535 if assignment: 1536 assigned.append(attrname) 1537 return assigned 1538 1539 # Type and module functions. 1540 # NOTE: This makes assumptions about the __builtins__ structure. 1541 1542 def get_builtin_module(name): 1543 1544 "Return the module name containing the given type 'name'." 1545 1546 if name == "string": 1547 modname = "str" 1548 elif name == "utf8string": 1549 modname = "unicode" 1550 elif name == "NoneType": 1551 modname = "none" 1552 else: 1553 modname = name 1554 1555 return "__builtins__.%s" % modname 1556 1557 def get_builtin_type(name): 1558 1559 "Return the type name provided by the given Python value 'name'." 1560 1561 if name == "str": 1562 return "string" 1563 elif name == "unicode": 1564 return "utf8string" 1565 else: 1566 return name 1567 1568 def get_builtin_class(name): 1569 1570 "Return the full name of the built-in class having the given 'name'." 1571 1572 typename = get_builtin_type(name) 1573 module = get_builtin_module(typename) 1574 return "%s.%s" % (module, typename) 1575 1576 # Useful data. 1577 1578 predefined_constants = "False", "None", "NotImplemented", "True" 1579 1580 unary_operator_functions = { 1581 1582 # Unary operations. 1583 1584 "Invert" : "invert", 1585 "UnaryAdd" : "pos", 1586 "UnarySub" : "neg", 1587 } 1588 1589 operator_functions = { 1590 1591 # Fundamental operations. 1592 1593 "is" : "is_", 1594 "is not" : "is_not", 1595 1596 # Binary operations. 1597 1598 "in" : "in_", 1599 "not in" : "not_in", 1600 "Add" : "add", 1601 "Bitand" : "and_", 1602 "Bitor" : "or_", 1603 "Bitxor" : "xor", 1604 "Div" : "div", 1605 "FloorDiv" : "floordiv", 1606 "LeftShift" : "lshift", 1607 "Mod" : "mod", 1608 "Mul" : "mul", 1609 "Power" : "pow", 1610 "RightShift" : "rshift", 1611 "Sub" : "sub", 1612 1613 # Augmented assignment. 1614 1615 "+=" : "iadd", 1616 "-=" : "isub", 1617 "*=" : "imul", 1618 "/=" : "idiv", 1619 "//=" : "ifloordiv", 1620 "%=" : "imod", 1621 "**=" : "ipow", 1622 "<<=" : "ilshift", 1623 ">>=" : "irshift", 1624 "&=" : "iand", 1625 "^=" : "ixor", 1626 "|=" : "ior", 1627 1628 # Comparisons. 1629 1630 "==" : "eq", 1631 "!=" : "ne", 1632 "<" : "lt", 1633 "<=" : "le", 1634 ">=" : "ge", 1635 ">" : "gt", 1636 } 1637 1638 operator_functions.update(unary_operator_functions) 1639 1640 # vim: tabstop=4 expandtab shiftwidth=4