1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 # Namespace management. 190 191 def enter_namespace(self, name): 192 193 "Enter the namespace having the given 'name'." 194 195 self.namespace_path.append(name) 196 197 def exit_namespace(self): 198 199 "Exit the current namespace." 200 201 self.namespace_path.pop() 202 203 # Constant reference naming. 204 205 def get_constant_name(self, value, value_type, encoding=None): 206 207 """ 208 Add a new constant to the current namespace for 'value' with 209 'value_type'. 210 """ 211 212 path = self.get_namespace_path() 213 init_item(self.constants, path, dict) 214 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 215 216 # Literal reference naming. 217 218 def get_literal_name(self): 219 220 "Add a new literal to the current namespace." 221 222 path = self.get_namespace_path() 223 init_item(self.literals, path, lambda: 0) 224 return "$C%d" % self.literals[path] 225 226 def next_literal(self): 227 self.literals[self.get_namespace_path()] += 1 228 229 # Temporary variable naming. 230 231 def get_temporary_name(self): 232 path = self.get_namespace_path() 233 init_item(self.temp, path, lambda: 0) 234 return "$t%d" % self.temp[path] 235 236 def next_temporary(self): 237 self.temp[self.get_namespace_path()] += 1 238 239 # Arbitrary function naming. 240 241 def get_lambda_name(self): 242 path = self.get_namespace_path() 243 init_item(self.lambdas, path, lambda: 0) 244 name = "$l%d" % self.lambdas[path] 245 self.lambdas[path] += 1 246 return name 247 248 def reset_lambdas(self): 249 self.lambdas = {} 250 251 # Constant and literal recording. 252 253 def get_constant_value(self, value, literals=None): 254 255 """ 256 Encode the 'value' if appropriate, returning a value, a typename and any 257 encoding. 258 """ 259 260 if isinstance(value, unicode): 261 return value.encode("utf-8"), "unicode", self.encoding 262 263 # Attempt to convert plain strings to text. 264 265 elif isinstance(value, str) and self.encoding: 266 try: 267 return get_string_details(literals, self.encoding) 268 except UnicodeDecodeError: 269 pass 270 271 return value, value.__class__.__name__, None 272 273 def get_constant_reference(self, ref, value, encoding=None): 274 275 """ 276 Return a constant reference for the given 'ref' type and 'value', with 277 the optional 'encoding' applying to text values. 278 """ 279 280 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 281 282 # Return a reference for the constant. 283 284 objpath = self.get_object_path(constant_name) 285 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 286 287 # Record the value and type for the constant. 288 289 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 290 return name_ref 291 292 def reserve_constant(self, objpath, value, origin, encoding=None): 293 294 """ 295 Reserve a constant within 'objpath' with the given 'value' and having a 296 type with the given 'origin', with the optional 'encoding' applying to 297 text values. 298 """ 299 300 constant_name = self.get_constant_name(value, origin) 301 objpath = self.get_object_path(constant_name) 302 self._reserve_constant(objpath, value, origin, encoding) 303 304 def _reserve_constant(self, objpath, value, origin, encoding): 305 306 """ 307 Store a constant for 'objpath' with the given 'value' and 'origin', with 308 the optional 'encoding' applying to text values. 309 """ 310 311 self.constant_values[objpath] = value, origin, encoding 312 313 def get_literal_reference(self, name, ref, items, cls): 314 315 """ 316 Return a literal reference for the given type 'name', literal 'ref', 317 node 'items' and employing the given 'cls' as the class of the returned 318 reference object. 319 """ 320 321 # Construct an invocation using the items as arguments. 322 323 typename = "$L%s" % name 324 325 invocation = compiler.ast.CallFunc( 326 compiler.ast.Name(typename), 327 items 328 ) 329 330 # Get a name for the actual literal. 331 332 instname = self.get_literal_name() 333 self.next_literal() 334 335 # Record the type for the literal. 336 337 objpath = self.get_object_path(instname) 338 self.literal_types[objpath] = ref.get_origin() 339 340 # Return a wrapper for the invocation exposing the items. 341 342 return cls( 343 instname, 344 ref.instance_of(), 345 self.process_structure_node(invocation), 346 invocation.args 347 ) 348 349 # Node handling. 350 351 def process_structure(self, node): 352 353 """ 354 Within the given 'node', process the program structure. 355 356 During inspection, this will process global declarations, adjusting the 357 module namespace, and import statements, building a module dependency 358 hierarchy. 359 360 During translation, this will consult deduced program information and 361 output translated code. 362 """ 363 364 l = [] 365 for n in node.getChildNodes(): 366 l.append(self.process_structure_node(n)) 367 return l 368 369 def process_augassign_node(self, n): 370 371 "Process the given augmented assignment node 'n'." 372 373 op = operator_functions[n.op] 374 375 if isinstance(n.node, compiler.ast.Getattr): 376 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 377 elif isinstance(n.node, compiler.ast.Name): 378 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 379 else: 380 target = n.node 381 382 assignment = compiler.ast.Assign( 383 [target], 384 compiler.ast.CallFunc( 385 compiler.ast.Name("$op%s" % op), 386 [n.node, n.expr])) 387 388 return self.process_structure_node(assignment) 389 390 def process_assignment_for_object(self, original_name, source): 391 392 """ 393 Return an assignment operation making 'original_name' refer to the given 394 'source'. 395 """ 396 397 assignment = compiler.ast.Assign( 398 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 399 source 400 ) 401 402 return self.process_structure_node(assignment) 403 404 def process_assignment_node_items(self, n, expr): 405 406 """ 407 Process the given assignment node 'n' whose children are to be assigned 408 items of 'expr'. 409 """ 410 411 name_ref = self.process_structure_node(expr) 412 413 # Either unpack the items and present them directly to each assignment 414 # node. 415 416 if isinstance(name_ref, LiteralSequenceRef) and \ 417 self.process_literal_sequence_items(n, name_ref): 418 419 pass 420 421 # Or have the assignment nodes access each item via the sequence API. 422 423 else: 424 self.process_assignment_node_items_by_position(n, expr, name_ref) 425 426 def process_assignment_node_items_by_position(self, n, expr, name_ref): 427 428 """ 429 Process the given sequence assignment node 'n', converting the node to 430 the separate assignment of each target using positional access on a 431 temporary variable representing the sequence. Use 'expr' as the assigned 432 value and 'name_ref' as the reference providing any existing temporary 433 variable. 434 """ 435 436 statements = [] 437 438 # Employ existing names to access the sequence. 439 # Literal sequences do not provide names of accessible objects. 440 441 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 442 temp = name_ref.name 443 444 # For other expressions, create a temporary name to reference the items. 445 446 else: 447 temp = self.get_temporary_name() 448 self.next_temporary() 449 450 statements.append( 451 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 452 ) 453 454 # Generate a test for the length of the expression object. 455 456 statements.append(compiler.ast.Discard( 457 compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"), 458 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))]))) 459 460 # Assign the items to the target nodes. 461 462 for i, node in enumerate(n.nodes): 463 statements.append( 464 compiler.ast.Assign([node], compiler.ast.CallFunc( 465 compiler.ast.Getattr(compiler.ast.Name(temp), "__get_single_item_unchecked__"), 466 [compiler.ast.Const(i, str(i))])) 467 ) 468 469 return self.process_structure_node(compiler.ast.Stmt(statements)) 470 471 def process_literal_sequence_items(self, n, name_ref): 472 473 """ 474 Process the given assignment node 'n', obtaining from the given 475 'name_ref' the items to be assigned to the assignment targets. 476 477 Return whether this method was able to process the assignment node as 478 a sequence of direct assignments. 479 """ 480 481 if len(n.nodes) == len(name_ref.items): 482 assigned_names, count = get_names_from_nodes(n.nodes) 483 accessed_names, _count = get_names_from_nodes(name_ref.items) 484 485 # Only assign directly between items if all assigned names are 486 # plain names (not attribute assignments), and if the assigned names 487 # do not appear in the accessed names. 488 489 if len(assigned_names) == count and \ 490 not assigned_names.intersection(accessed_names): 491 492 for node, item in zip(n.nodes, name_ref.items): 493 self.process_assignment_node(node, item) 494 495 return True 496 497 # Otherwise, use the position-based mechanism to obtain values. 498 499 else: 500 return False 501 else: 502 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 503 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 504 505 def process_compare_node(self, n): 506 507 """ 508 Process the given comparison node 'n', converting an operator sequence 509 from... 510 511 <expr1> <op1> <expr2> <op2> <expr3> 512 513 ...to... 514 515 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 516 """ 517 518 invocations = [] 519 last = n.expr 520 521 for op, op_node in n.ops: 522 op = operator_functions.get(op) 523 524 invocations.append(compiler.ast.CallFunc( 525 compiler.ast.Name("$op%s" % op), 526 [last, op_node])) 527 528 last = op_node 529 530 if len(invocations) > 1: 531 result = compiler.ast.And(invocations) 532 else: 533 result = invocations[0] 534 535 return self.process_structure_node(result) 536 537 def process_dict_node(self, node): 538 539 """ 540 Process the given dictionary 'node', returning a list of (key, value) 541 tuples. 542 """ 543 544 l = [] 545 for key, value in node.items: 546 l.append(( 547 self.process_structure_node(key), 548 self.process_structure_node(value))) 549 return l 550 551 def process_for_node(self, n): 552 553 """ 554 Generate attribute accesses for {n.list}.__iter__ and the next method on 555 the iterator, producing a replacement node for the original. 556 """ 557 558 t0 = self.get_temporary_name() 559 self.next_temporary() 560 t1 = self.get_temporary_name() 561 self.next_temporary() 562 t2 = self.get_temporary_name() 563 self.next_temporary() 564 565 node = compiler.ast.Stmt([ 566 567 # <t0> = {n.list} 568 # <t1> = <t0>.__iter__() 569 570 compiler.ast.Assign( 571 [compiler.ast.AssName(t0, "OP_ASSIGN")], 572 n.list), 573 574 compiler.ast.Assign( 575 [compiler.ast.AssName(t1, "OP_ASSIGN")], 576 compiler.ast.CallFunc( 577 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 578 [])), 579 580 # <t2> = <t1>.next 581 # try: 582 # while True: 583 # <var>... = <t2>() 584 # ... 585 # except StopIteration: 586 # pass 587 588 compiler.ast.Assign( 589 [compiler.ast.AssName(t2, "OP_ASSIGN")], 590 compiler.ast.Getattr(compiler.ast.Name(t1), "next")), 591 592 compiler.ast.TryExcept( 593 compiler.ast.While( 594 compiler.ast.Name("True"), 595 compiler.ast.Stmt([ 596 compiler.ast.Assign( 597 [n.assign], 598 compiler.ast.CallFunc( 599 compiler.ast.Name(t2), 600 [] 601 )), 602 n.body]), 603 None), 604 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 605 None) 606 ]) 607 608 self.process_structure_node(node) 609 610 def process_literal_sequence_node(self, n, name, ref, cls): 611 612 """ 613 Process the given literal sequence node 'n' as a function invocation, 614 with 'name' indicating the type of the sequence, and 'ref' being a 615 reference to the type. The 'cls' is used to instantiate a suitable name 616 reference. 617 """ 618 619 if name == "dict": 620 items = [] 621 for key, value in n.items: 622 items.append(compiler.ast.Tuple([key, value])) 623 else: # name in ("list", "tuple"): 624 items = n.nodes 625 626 return self.get_literal_reference(name, ref, items, cls) 627 628 def process_operator_node(self, n): 629 630 """ 631 Process the given operator node 'n' as an operator function invocation. 632 """ 633 634 opname = n.__class__.__name__ 635 operands = n.getChildNodes() 636 637 # Convert a unary operation to an invocation. 638 639 op = unary_operator_functions.get(opname) 640 641 if op: 642 invocation = compiler.ast.CallFunc( 643 compiler.ast.Name("$op%s" % op), 644 [operands[0]] 645 ) 646 647 # Convert a single operator with a list of operands to a combination of 648 # pairwise operations. 649 650 else: 651 op = operator_functions[opname] 652 invocation = self._process_operator_node(op, operands) 653 654 return self.process_structure_node(invocation) 655 656 def _process_operator_node(self, op, operands): 657 658 """ 659 Process the given 'op', being an operator function, together with the 660 supplied 'operands', returning either a single remaining operand or an 661 invocation combining the operands. 662 """ 663 664 remaining = operands[1:] 665 if not remaining: 666 return operands[0] 667 668 return compiler.ast.CallFunc( 669 compiler.ast.Name("$op%s" % op), 670 [operands[0], self._process_operator_node(op, remaining)] 671 ) 672 673 def process_print_node(self, n): 674 675 """ 676 Process the given print node 'n' as an invocation on a stream of the 677 form... 678 679 $print(dest, args, nl) 680 681 The special function name will be translated elsewhere. 682 """ 683 684 nl = isinstance(n, compiler.ast.Printnl) 685 invocation = compiler.ast.CallFunc( 686 compiler.ast.Name("$print"), 687 [n.dest or compiler.ast.Name("None"), 688 compiler.ast.List(list(n.nodes)), 689 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 690 ) 691 return self.process_structure_node(invocation) 692 693 def process_slice_node(self, n, expr=None): 694 695 """ 696 Process the given slice node 'n' as a method invocation. 697 """ 698 699 if n.flags == "OP_ASSIGN": op = "__setslice__" 700 elif n.flags == "OP_DELETE": op = "__delslice__" 701 else: op = "__getslice__" 702 703 invocation = compiler.ast.CallFunc( 704 compiler.ast.Getattr(n.expr, op), 705 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 706 (expr and [expr] or []) 707 ) 708 709 # Fix parse tree structure. 710 711 if op == "__delslice__": 712 invocation = compiler.ast.Discard(invocation) 713 714 return self.process_structure_node(invocation) 715 716 def process_sliceobj_node(self, n): 717 718 """ 719 Process the given slice object node 'n' as a slice constructor. 720 """ 721 722 op = "slice" 723 invocation = compiler.ast.CallFunc( 724 compiler.ast.Name("$op%s" % op), 725 n.nodes 726 ) 727 return self.process_structure_node(invocation) 728 729 def process_subscript_node(self, n, expr=None): 730 731 """ 732 Process the given subscript node 'n' as a method invocation. 733 """ 734 735 if n.flags == "OP_ASSIGN": op = "__setitem__" 736 elif n.flags == "OP_DELETE": op = "__delitem__" 737 else: op = "__getitem__" 738 739 invocation = compiler.ast.CallFunc( 740 compiler.ast.Getattr(n.expr, op), 741 list(n.subs) + (expr and [expr] or []) 742 ) 743 744 # Fix parse tree structure. 745 746 if op == "__delitem__": 747 invocation = compiler.ast.Discard(invocation) 748 749 return self.process_structure_node(invocation) 750 751 def process_attribute_chain(self, n): 752 753 """ 754 Process the given attribute access node 'n'. Return a reference 755 describing the expression. 756 """ 757 758 # AssAttr/Getattr are nested with the outermost access being the last 759 # access in any chain. 760 761 self.attrs.insert(0, n.attrname) 762 attrs = self.attrs 763 764 # Break attribute chains where non-access nodes are found. 765 766 if not self.have_access_expression(n): 767 self.reset_attribute_chain() 768 769 # Descend into the expression, extending backwards any existing chain, 770 # or building another for the expression. 771 772 name_ref = self.process_structure_node(n.expr) 773 774 # Restore chain information applying to this node. 775 776 if not self.have_access_expression(n): 777 self.restore_attribute_chain(attrs) 778 779 # Return immediately if the expression was another access and thus a 780 # continuation backwards along the chain. The above processing will 781 # have followed the chain all the way to its conclusion. 782 783 if self.have_access_expression(n): 784 del self.attrs[0] 785 786 return name_ref 787 788 # Attribute chain handling. 789 790 def reset_attribute_chain(self): 791 792 "Reset the attribute chain for a subexpression of an attribute access." 793 794 self.attrs = [] 795 self.chain_assignment.append(self.in_assignment) 796 self.chain_invocation.append(self.in_invocation) 797 self.in_assignment = None 798 self.in_invocation = None 799 800 def restore_attribute_chain(self, attrs): 801 802 "Restore the attribute chain for an attribute access." 803 804 self.attrs = attrs 805 self.in_assignment = self.chain_assignment.pop() 806 self.in_invocation = self.chain_invocation.pop() 807 808 def have_access_expression(self, node): 809 810 "Return whether the expression associated with 'node' is Getattr." 811 812 return isinstance(node.expr, compiler.ast.Getattr) 813 814 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 815 816 """ 817 Return the name to be used for attribute usage observations involving 818 the given 'name' in the current namespace. 819 820 If the name is being used outside a function, and if 'name_ref' is 821 given and indicates a global or if 'is_global' is specified as a true 822 value, a path featuring the name in the global namespace is returned. 823 Otherwise, a path computed using the current namespace and the given 824 name is returned. 825 826 The intention of this method is to provide a suitably-qualified name 827 that can be tracked across namespaces. Where globals are being 828 referenced in class namespaces, they should be referenced using their 829 path within the module, not using a path within each class. 830 831 It may not be possible to identify a global within a function at the 832 time of inspection (since a global may appear later in a file). 833 Consequently, globals are identified by their local name rather than 834 their module-qualified path. 835 """ 836 837 # For functions, use the appropriate local names. 838 839 if self.in_function: 840 return name 841 842 # For global names outside functions, use a global name. 843 844 elif is_global or name_ref and name_ref.is_global_name(): 845 return self.get_global_path(name) 846 847 # Otherwise, establish a name in the current namespace. 848 849 else: 850 return self.get_object_path(name) 851 852 def get_path_for_access(self): 853 854 "Outside functions, register accesses at the module level." 855 856 if not self.in_function: 857 return self.name 858 else: 859 return self.get_namespace_path() 860 861 def get_module_name(self, node): 862 863 """ 864 Using the given From 'node' in this module, calculate any relative import 865 information, returning a tuple containing a module to import along with any 866 names to import based on the node's name information. 867 868 Where the returned module is given as None, whole module imports should 869 be performed for the returned modules using the returned names. 870 """ 871 872 # Absolute import. 873 874 if node.level == 0: 875 return node.modname, node.names 876 877 # Relative to an ancestor of this module. 878 879 else: 880 path = self.name.split(".") 881 level = node.level 882 883 # Relative imports treat package roots as submodules. 884 885 if split(self.filename)[-1] == "__init__.py": 886 level -= 1 887 888 if level > len(path): 889 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 890 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 891 892 basename = ".".join(path[:len(path)-level]) 893 894 # Name imports from a module. 895 896 if node.modname: 897 return "%s.%s" % (basename, node.modname), node.names 898 899 # Relative whole module imports. 900 901 else: 902 return basename, node.names 903 904 def get_argnames(args): 905 906 """ 907 Return a list of all names provided by 'args'. Since tuples may be 908 employed, the arguments are traversed depth-first. 909 """ 910 911 l = [] 912 for arg in args: 913 if isinstance(arg, tuple): 914 l += get_argnames(arg) 915 else: 916 l.append(arg) 917 return l 918 919 def get_names_from_nodes(nodes): 920 921 """ 922 Return the names employed in the given 'nodes' along with the number of 923 nodes excluding sequences. 924 """ 925 926 names = set() 927 count = 0 928 929 for node in nodes: 930 931 # Add names and count them. 932 933 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 934 names.add(node.name) 935 count += 1 936 937 # Add names from sequences and incorporate their counts. 938 939 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 940 compiler.ast.List, compiler.ast.Set, 941 compiler.ast.Tuple)): 942 _names, _count = get_names_from_nodes(node.nodes) 943 names.update(_names) 944 count += _count 945 946 # Count non-name, non-sequence nodes. 947 948 else: 949 count += 1 950 951 return names, count 952 953 # Location classes. 954 955 class Location: 956 957 "A generic program location." 958 959 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 960 self.path = path 961 self.name = name 962 self.attrnames = attrnames 963 self.version = version 964 self.access_number = access_number 965 966 def __repr__(self): 967 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 968 969 def as_tuple(self): 970 return (self.path, self.name, self.attrnames, self.version, self.access_number) 971 972 def __hash__(self): 973 return hash(self.as_tuple()) 974 975 def __eq__(self, other): 976 return self.as_tuple() == other.as_tuple() 977 978 def __cmp__(self, other): 979 return cmp(self.as_tuple(), other.as_tuple()) 980 981 def get_attrname(self): 982 983 """ 984 Extract the first attribute from the attribute names employed in this 985 location. 986 """ 987 988 attrnames = self.attrnames 989 if not attrnames: 990 return attrnames 991 return get_attrnames(attrnames)[0] 992 993 class AccessLocation(Location): 994 995 "A specialised access location." 996 997 def __init__(self, path, name, attrnames, access_number): 998 999 """ 1000 Initialise an access location featuring 'path', 'name', 'attrnames' and 1001 'access_number'. 1002 """ 1003 1004 Location.__init__(self, path, name, attrnames, None, access_number) 1005 1006 def __repr__(self): 1007 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 1008 1009 # Result classes. 1010 1011 class InstructionSequence: 1012 1013 "A generic sequence of instructions." 1014 1015 def __init__(self, instructions): 1016 self.instructions = instructions 1017 1018 def get_value_instruction(self): 1019 return self.instructions[-1] 1020 1021 def get_init_instructions(self): 1022 return self.instructions[:-1] 1023 1024 # Dictionary utilities. 1025 1026 def init_item(d, key, fn): 1027 1028 """ 1029 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1030 value where no entry already exists. 1031 """ 1032 1033 if not d.has_key(key): 1034 d[key] = fn() 1035 return d[key] 1036 1037 def dict_for_keys(d, keys): 1038 1039 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1040 1041 nd = {} 1042 for key in keys: 1043 if d.has_key(key): 1044 nd[key] = d[key] 1045 return nd 1046 1047 def make_key(s): 1048 1049 "Make sequence 's' into a tuple-based key, first sorting its contents." 1050 1051 l = list(s) 1052 l.sort() 1053 return tuple(l) 1054 1055 def add_counter_item(d, key): 1056 1057 """ 1058 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1059 maintaining a mapping of keys to their order of insertion. 1060 """ 1061 1062 if not d.has_key(key): 1063 d[key] = len(d.keys()) 1064 return d[key] 1065 1066 def remove_items(d1, d2): 1067 1068 "Remove from 'd1' all items from 'd2'." 1069 1070 for key in d2.keys(): 1071 if d1.has_key(key): 1072 del d1[key] 1073 1074 # Set utilities. 1075 1076 def first(s): 1077 return list(s)[0] 1078 1079 def same(s1, s2): 1080 return set(s1) == set(s2) 1081 1082 def order_dependencies(all_depends): 1083 1084 """ 1085 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1086 has the form "A depends on B, C...". The result will order A, B, C, and so 1087 on. 1088 """ 1089 1090 usage = init_reverse_dependencies(all_depends) 1091 1092 # Produce an ordering by obtaining exposed items (required by items already 1093 # processed) and putting them at the start of the list. 1094 1095 ordered = [] 1096 1097 while usage: 1098 have_next = False 1099 1100 for key, n in usage.items(): 1101 1102 # Add items needed by no other items to the ordering. 1103 1104 if not n: 1105 remove_dependency(key, all_depends, usage, ordered) 1106 have_next = True 1107 1108 if not have_next: 1109 raise ValueError, usage 1110 1111 return ordered 1112 1113 def order_dependencies_partial(all_depends): 1114 1115 """ 1116 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1117 has the form "A depends on B, C...". The result will order A, B, C, and so 1118 on. Where cycles exist, they will be broken and a partial ordering returned. 1119 """ 1120 1121 usage = init_reverse_dependencies(all_depends) 1122 1123 # Duplicate the dependencies for subsequent modification. 1124 1125 new_depends = {} 1126 for key, values in all_depends.items(): 1127 new_depends[key] = set(values) 1128 1129 all_depends = new_depends 1130 1131 # Produce an ordering by obtaining exposed items (required by items already 1132 # processed) and putting them at the start of the list. 1133 1134 ordered = [] 1135 1136 while usage: 1137 least = None 1138 least_key = None 1139 1140 for key, n in usage.items(): 1141 1142 # Add items needed by no other items to the ordering. 1143 1144 if not n: 1145 remove_dependency(key, all_depends, usage, ordered) 1146 least = 0 1147 1148 # When breaking cycles, note the least used items. 1149 1150 elif least is None or len(n) < least: 1151 least_key = key 1152 least = len(n) 1153 1154 if least: 1155 transfer_dependencies(least_key, all_depends, usage, ordered) 1156 1157 return ordered 1158 1159 def init_reverse_dependencies(all_depends): 1160 1161 """ 1162 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1163 record the reverse dependencies, making a mapping of the form 1164 "B is needed by A", "C is needed by A", and so on. 1165 """ 1166 1167 usage = {} 1168 1169 # Record path-based dependencies. 1170 1171 for key in all_depends.keys(): 1172 usage[key] = set() 1173 1174 for key, depends in all_depends.items(): 1175 for depend in depends: 1176 init_item(usage, depend, set) 1177 usage[depend].add(key) 1178 1179 return usage 1180 1181 def transfer_dependencies(key, all_depends, usage, ordered): 1182 1183 """ 1184 Transfer items needed by 'key' to those items needing 'key', found using 1185 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1186 collection of dependencies. 1187 1188 If "A is needed by X" and "B is needed by A", then transferring items needed 1189 by A will cause "B is needed by X" to be recorded as a consequence. 1190 1191 Transferring items also needs to occur in the reverse mapping, so that 1192 "A needs B" and "X needs A", then the consequence must be recorded as 1193 "X needs B". 1194 """ 1195 1196 ordered.insert(0, key) 1197 1198 needing = usage[key] # A is needed by X 1199 needed = all_depends.get(key) # A needs B 1200 1201 if needing: 1202 for depend in needing: 1203 l = all_depends.get(depend) 1204 if not l: 1205 continue 1206 1207 l.remove(key) # X needs (A) 1208 1209 if needed: 1210 l.update(needed) # X needs B... 1211 1212 # Prevent self references. 1213 1214 if depend in needed: 1215 l.remove(depend) 1216 1217 if needed: 1218 for depend in needed: 1219 l = usage.get(depend) 1220 if not l: 1221 continue 1222 1223 l.remove(key) # B is needed by (A) 1224 l.update(needing) # B is needed by X... 1225 1226 # Prevent self references. 1227 1228 if depend in needing: 1229 l.remove(depend) 1230 1231 if needed: 1232 del all_depends[key] 1233 del usage[key] 1234 1235 def remove_dependency(key, all_depends, usage, ordered): 1236 1237 """ 1238 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1239 'ordered' collection of dependencies. 1240 1241 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1242 upon removing A from 'usage', the outcome is that all keys needing A will 1243 have A removed from their 'usage' records. 1244 1245 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1246 as a consequence. 1247 """ 1248 1249 ordered.insert(0, key) 1250 1251 depends = all_depends.get(key) 1252 1253 # Reduce usage of the referenced items. 1254 1255 if depends: 1256 for depend in depends: 1257 usage[depend].remove(key) 1258 1259 del usage[key] 1260 1261 # General input/output. 1262 1263 def readfile(filename): 1264 1265 "Return the contents of 'filename'." 1266 1267 f = open(filename) 1268 try: 1269 return f.read() 1270 finally: 1271 f.close() 1272 1273 def writefile(filename, s): 1274 1275 "Write to 'filename' the string 's'." 1276 1277 f = open(filename, "w") 1278 try: 1279 f.write(s) 1280 finally: 1281 f.close() 1282 1283 # General encoding. 1284 1285 def sorted_output(x): 1286 1287 "Sort sequence 'x' and return a string with commas separating the values." 1288 1289 x = map(str, x) 1290 x.sort() 1291 return ", ".join(x) 1292 1293 def get_string_details(literals, encoding): 1294 1295 """ 1296 Determine whether 'literals' represent Unicode strings or byte strings, 1297 using 'encoding' to reproduce byte sequences. 1298 1299 Each literal is the full program representation including prefix and quotes 1300 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1301 string needs to be translated back to its original encoding. 1302 1303 Return a single encoded literal value, a type name, and the original 1304 encoding as a tuple. 1305 """ 1306 1307 typename = "unicode" 1308 1309 l = [] 1310 1311 for s in literals: 1312 out, _typename = get_literal_details(s) 1313 if _typename == "str": 1314 typename = "str" 1315 l.append(out) 1316 1317 out = "".join(l) 1318 1319 # For Unicode values, convert to the UTF-8 program representation. 1320 1321 if typename == "unicode": 1322 return out.encode("utf-8"), typename, encoding 1323 1324 # For byte string values, convert back to the original encoding. 1325 1326 else: 1327 return out.encode(encoding), typename, encoding 1328 1329 def get_literal_details(s): 1330 1331 """ 1332 Determine whether 's' represents a Unicode string or a byte string, where 1333 's' contains the full program representation of a literal including prefix 1334 and quotes, recoded by the parser to UTF-8. 1335 1336 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1337 and byte or Unicode values starting with <backslash><octal digit> or 1338 <backslash>x. 1339 1340 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1341 to be considered as Unicode values. Otherwise, they produce byte values and 1342 cause unprefixed strings to be considered as byte strings. 1343 1344 Literals prefixed with "r" do not have their backslash-encoded values 1345 converted unless also prefixed with "u", in which case only the above value 1346 formats are converted, not any of the other special sequences for things 1347 like newlines. 1348 1349 Return the literal value as a Unicode object together with the appropriate 1350 type name in a tuple. 1351 """ 1352 1353 l = [] 1354 1355 # Identify the quote character and use it to identify the prefix. 1356 1357 quote_type = s[-1] 1358 prefix_end = s.find(quote_type) 1359 prefix = s[:prefix_end].lower() 1360 1361 if prefix not in ("", "b", "br", "r", "u", "ur"): 1362 raise ValueError, "String literal does not have a supported prefix: %s" % s 1363 1364 if "b" in prefix: 1365 typename = "str" 1366 else: 1367 typename = "unicode" 1368 1369 # Identify triple quotes or single quotes. 1370 1371 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1372 quote = s[prefix_end:prefix_end+3] 1373 current = prefix_end + 3 1374 end = len(s) - 3 1375 else: 1376 quote = s[prefix_end] 1377 current = prefix_end + 1 1378 end = len(s) - 1 1379 1380 # Conversions of some quoted values. 1381 1382 searches = { 1383 "u" : (6, 16), 1384 "U" : (10, 16), 1385 "x" : (4, 16), 1386 } 1387 1388 octal_digits = map(str, range(0, 8)) 1389 1390 # Translations of some quoted values. 1391 1392 escaped = { 1393 "\\" : "\\", "'" : "'", '"' : '"', 1394 "a" : "\a", "b" : "\b", "f" : "\f", 1395 "n" : "\n", "r" : "\r", "t" : "\t", 1396 } 1397 1398 while current < end: 1399 1400 # Look for quoted values. 1401 1402 index = s.find("\\", current) 1403 if index == -1 or index + 1 == end: 1404 l.append(s[current:end]) 1405 break 1406 1407 # Add the preceding text. 1408 1409 l.append(s[current:index]) 1410 1411 # Handle quoted text. 1412 1413 term = s[index+1] 1414 1415 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1416 # produce Unicode values. 1417 1418 if typename == "unicode" and ( 1419 term in ("u", "U") or 1420 "u" in prefix and (term == "x" or term in octal_digits)): 1421 1422 needed, base = searches.get(term, (4, 8)) 1423 value = convert_quoted_value(s, index, needed, end, base, unichr) 1424 l.append(value) 1425 current = index + needed 1426 1427 # Add raw byte values, changing the string type. 1428 1429 elif "r" not in prefix and ( 1430 term == "x" or term in octal_digits): 1431 1432 needed, base = searches.get(term, (4, 8)) 1433 value = convert_quoted_value(s, index, needed, end, base, chr) 1434 l.append(value) 1435 typename = "str" 1436 current = index + needed 1437 1438 # Add other escaped values. 1439 1440 elif "r" not in prefix and escaped.has_key(term): 1441 l.append(escaped[term]) 1442 current = index + 2 1443 1444 # Add other text as found. 1445 1446 else: 1447 l.append(s[index:index+2]) 1448 current = index + 2 1449 1450 # Collect the components into a single Unicode object. Since the literal 1451 # text was already in UTF-8 form, interpret plain strings as UTF-8 1452 # sequences. 1453 1454 out = [] 1455 1456 for value in l: 1457 if isinstance(value, unicode): 1458 out.append(value) 1459 else: 1460 out.append(unicode(value, "utf-8")) 1461 1462 return "".join(out), typename 1463 1464 def convert_quoted_value(s, index, needed, end, base, fn): 1465 1466 """ 1467 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1468 positions, and with the given 'end' indicating the first position after the 1469 end of the actual string content. 1470 1471 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1472 to convert the value to an appropriate type. 1473 """ 1474 1475 s = s[index:min(index+needed, end)] 1476 1477 # Not a complete occurrence. 1478 1479 if len(s) < needed: 1480 return s 1481 1482 # Test for a well-formed value. 1483 1484 try: 1485 first = base == 8 and 1 or 2 1486 value = int(s[first:needed], base) 1487 except ValueError: 1488 return s 1489 else: 1490 return fn(value) 1491 1492 # Attribute chain decoding. 1493 1494 def get_attrnames(attrnames): 1495 1496 """ 1497 Split the qualified attribute chain 'attrnames' into its components, 1498 handling special attributes starting with "#" that indicate type 1499 conformance. 1500 """ 1501 1502 if attrnames.startswith("#"): 1503 return [attrnames] 1504 else: 1505 return attrnames.split(".") 1506 1507 def get_name_path(path, name): 1508 1509 "Return a suitable qualified name from the given 'path' and 'name'." 1510 1511 if "." in name: 1512 return name 1513 else: 1514 return "%s.%s" % (path, name) 1515 1516 # Usage-related functions. 1517 1518 def get_types_for_usage(attrnames, objects): 1519 1520 """ 1521 Identify the types that can support the given 'attrnames', using the 1522 given 'objects' as the catalogue of type details. 1523 """ 1524 1525 types = [] 1526 for name, _attrnames in objects.items(): 1527 if set(attrnames).issubset(_attrnames): 1528 types.append(name) 1529 return types 1530 1531 def get_invoked_attributes(usage): 1532 1533 "Obtain invoked attribute from the given 'usage'." 1534 1535 invoked = [] 1536 if usage: 1537 for attrname, invocation, assignment in usage: 1538 if invocation: 1539 invoked.append(attrname) 1540 return invoked 1541 1542 def get_assigned_attributes(usage): 1543 1544 "Obtain assigned attribute from the given 'usage'." 1545 1546 assigned = [] 1547 if usage: 1548 for attrname, invocation, assignment in usage: 1549 if assignment: 1550 assigned.append(attrname) 1551 return assigned 1552 1553 # Type and module functions. 1554 # NOTE: This makes assumptions about the __builtins__ structure. 1555 1556 def get_builtin_module(name): 1557 1558 "Return the module name containing the given type 'name'." 1559 1560 if name == "string": 1561 modname = "str" 1562 elif name == "utf8string": 1563 modname = "unicode" 1564 elif name == "NoneType": 1565 modname = "none" 1566 else: 1567 modname = name 1568 1569 return "__builtins__.%s" % modname 1570 1571 def get_builtin_type(name): 1572 1573 "Return the type name provided by the given Python value 'name'." 1574 1575 if name == "str": 1576 return "string" 1577 elif name == "unicode": 1578 return "utf8string" 1579 else: 1580 return name 1581 1582 def get_builtin_class(name): 1583 1584 "Return the full name of the built-in class having the given 'name'." 1585 1586 typename = get_builtin_type(name) 1587 module = get_builtin_module(typename) 1588 return "%s.%s" % (module, typename) 1589 1590 # Useful data. 1591 1592 predefined_constants = "False", "None", "NotImplemented", "True" 1593 1594 unary_operator_functions = { 1595 1596 # Unary operations. 1597 1598 "Invert" : "invert", 1599 "UnaryAdd" : "pos", 1600 "UnarySub" : "neg", 1601 } 1602 1603 operator_functions = { 1604 1605 # Fundamental operations. 1606 1607 "is" : "is_", 1608 "is not" : "is_not", 1609 1610 # Binary operations. 1611 1612 "in" : "in_", 1613 "not in" : "not_in", 1614 "Add" : "add", 1615 "Bitand" : "and_", 1616 "Bitor" : "or_", 1617 "Bitxor" : "xor", 1618 "Div" : "div", 1619 "FloorDiv" : "floordiv", 1620 "LeftShift" : "lshift", 1621 "Mod" : "mod", 1622 "Mul" : "mul", 1623 "Power" : "pow", 1624 "RightShift" : "rshift", 1625 "Sub" : "sub", 1626 1627 # Augmented assignment. 1628 1629 "+=" : "iadd", 1630 "-=" : "isub", 1631 "*=" : "imul", 1632 "/=" : "idiv", 1633 "//=" : "ifloordiv", 1634 "%=" : "imod", 1635 "**=" : "ipow", 1636 "<<=" : "ilshift", 1637 ">>=" : "irshift", 1638 "&=" : "iand", 1639 "^=" : "ixor", 1640 "|=" : "ior", 1641 1642 # Comparisons. 1643 1644 "==" : "eq", 1645 "!=" : "ne", 1646 "<" : "lt", 1647 "<=" : "le", 1648 ">=" : "ge", 1649 ">" : "gt", 1650 } 1651 1652 operator_functions.update(unary_operator_functions) 1653 1654 # vim: tabstop=4 expandtab shiftwidth=4