1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 7 2017, 2018, 2019 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 # Namespace management. 190 191 def enter_namespace(self, name): 192 193 "Enter the namespace having the given 'name'." 194 195 self.namespace_path.append(name) 196 197 def exit_namespace(self): 198 199 "Exit the current namespace." 200 201 self.namespace_path.pop() 202 203 # Constant reference naming. 204 205 def get_constant_name(self, value, value_type, encoding=None): 206 207 """ 208 Add a new constant to the current namespace for 'value' with 209 'value_type'. 210 """ 211 212 path = self.get_namespace_path() 213 init_item(self.constants, path, dict) 214 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 215 216 # Literal reference naming. 217 218 def get_literal_name(self): 219 220 "Add a new literal to the current namespace." 221 222 path = self.get_namespace_path() 223 init_item(self.literals, path, lambda: 0) 224 return "$C%d" % self.literals[path] 225 226 def next_literal(self): 227 self.literals[self.get_namespace_path()] += 1 228 229 # Temporary variable naming. 230 231 def get_temporary_name(self): 232 path = self.get_namespace_path() 233 init_item(self.temp, path, lambda: 0) 234 return "$t%d" % self.temp[path] 235 236 def next_temporary(self): 237 self.temp[self.get_namespace_path()] += 1 238 239 # Arbitrary function naming. 240 241 def get_lambda_name(self): 242 path = self.get_namespace_path() 243 init_item(self.lambdas, path, lambda: 0) 244 name = "$l%d" % self.lambdas[path] 245 self.lambdas[path] += 1 246 return name 247 248 def reset_lambdas(self): 249 self.lambdas = {} 250 251 # Constant and literal recording. 252 253 def get_constant_value(self, value, literals=None): 254 255 """ 256 Encode the 'value' if appropriate, returning a value, a typename and any 257 encoding. 258 """ 259 260 if isinstance(value, unicode): 261 return value.encode("utf-8"), "unicode", self.encoding 262 263 # Attempt to convert plain strings to text. 264 265 elif isinstance(value, str) and self.encoding: 266 try: 267 return get_string_details(literals, self.encoding) 268 except UnicodeDecodeError: 269 pass 270 271 return value, value.__class__.__name__, None 272 273 def get_constant_reference(self, ref, value, encoding=None): 274 275 """ 276 Return a constant reference for the given 'ref' type and 'value', with 277 the optional 'encoding' applying to text values. 278 """ 279 280 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 281 282 # Return a reference for the constant. 283 284 objpath = self.get_object_path(constant_name) 285 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 286 287 # Record the value and type for the constant. 288 289 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 290 return name_ref 291 292 def reserve_constant(self, objpath, value, origin, encoding=None): 293 294 """ 295 Reserve a constant within 'objpath' with the given 'value' and having a 296 type with the given 'origin', with the optional 'encoding' applying to 297 text values. 298 """ 299 300 constant_name = self.get_constant_name(value, origin) 301 objpath = self.get_object_path(constant_name) 302 self._reserve_constant(objpath, value, origin, encoding) 303 304 def _reserve_constant(self, objpath, value, origin, encoding): 305 306 """ 307 Store a constant for 'objpath' with the given 'value' and 'origin', with 308 the optional 'encoding' applying to text values. 309 """ 310 311 self.constant_values[objpath] = value, origin, encoding 312 313 def get_literal_reference(self, name, ref, items, cls): 314 315 """ 316 Return a literal reference for the given type 'name', literal 'ref', 317 node 'items' and employing the given 'cls' as the class of the returned 318 reference object. 319 """ 320 321 # Construct an invocation using the items as arguments. 322 323 typename = "$L%s" % name 324 325 invocation = compiler.ast.CallFunc( 326 compiler.ast.Name(typename), 327 items 328 ) 329 330 # Get a name for the actual literal. 331 332 instname = self.get_literal_name() 333 self.next_literal() 334 335 # Record the type for the literal. 336 337 objpath = self.get_object_path(instname) 338 self.literal_types[objpath] = ref.get_origin() 339 340 # Return a wrapper for the invocation exposing the items. 341 342 return cls( 343 instname, 344 ref.instance_of(), 345 self.process_structure_node(invocation), 346 invocation.args 347 ) 348 349 # Node handling. 350 351 def process_structure(self, node): 352 353 """ 354 Within the given 'node', process the program structure. 355 356 During inspection, this will process global declarations, adjusting the 357 module namespace, and import statements, building a module dependency 358 hierarchy. 359 360 During translation, this will consult deduced program information and 361 output translated code. 362 """ 363 364 l = [] 365 for n in node.getChildNodes(): 366 l.append(self.process_structure_node(n)) 367 return l 368 369 def process_augassign_node(self, n): 370 371 "Process the given augmented assignment node 'n'." 372 373 op = operator_functions[n.op] 374 375 if isinstance(n.node, compiler.ast.Getattr): 376 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 377 elif isinstance(n.node, compiler.ast.Name): 378 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 379 else: 380 target = n.node 381 382 assignment = compiler.ast.Assign( 383 [target], 384 compiler.ast.CallFunc( 385 compiler.ast.Name("$op%s" % op), 386 [n.node, n.expr])) 387 388 return self.process_structure_node(assignment) 389 390 def process_assignment_for_object(self, original_name, source): 391 392 """ 393 Return an assignment operation making 'original_name' refer to the given 394 'source'. 395 """ 396 397 assignment = compiler.ast.Assign( 398 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 399 source 400 ) 401 402 return self.process_structure_node(assignment) 403 404 def process_assignment_node_items(self, n, expr): 405 406 """ 407 Process the given assignment node 'n' whose children are to be assigned 408 items of 'expr'. 409 """ 410 411 name_ref = self.process_structure_node(expr) 412 413 # Either unpack the items and present them directly to each assignment 414 # node. 415 416 if isinstance(name_ref, LiteralSequenceRef) and \ 417 self.process_literal_sequence_items(n, name_ref): 418 419 pass 420 421 # Or have the assignment nodes access each item via the sequence API. 422 423 else: 424 self.process_assignment_node_items_by_position(n, expr, name_ref) 425 426 def process_assignment_node_items_by_position(self, n, expr, name_ref): 427 428 """ 429 Process the given sequence assignment node 'n', converting the node to 430 the separate assignment of each target using positional access on a 431 temporary variable representing the sequence. Use 'expr' as the assigned 432 value and 'name_ref' as the reference providing any existing temporary 433 variable. 434 """ 435 436 statements = [] 437 438 # Employ existing names to access the sequence. 439 # Literal sequences do not provide names of accessible objects. 440 441 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 442 temp = name_ref.name 443 444 # For other expressions, create a temporary name to reference the items. 445 446 else: 447 temp = self.get_temporary_name() 448 self.next_temporary() 449 450 statements.append( 451 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 452 ) 453 454 # Generate a test for the length of the expression object. 455 456 statements.append(compiler.ast.Discard( 457 compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"), 458 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))]))) 459 460 # Assign the items to the target nodes. 461 462 for i, node in enumerate(n.nodes): 463 statements.append( 464 compiler.ast.Assign([node], compiler.ast.Subscript( 465 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 466 ) 467 468 return self.process_structure_node(compiler.ast.Stmt(statements)) 469 470 def process_literal_sequence_items(self, n, name_ref): 471 472 """ 473 Process the given assignment node 'n', obtaining from the given 474 'name_ref' the items to be assigned to the assignment targets. 475 476 Return whether this method was able to process the assignment node as 477 a sequence of direct assignments. 478 """ 479 480 if len(n.nodes) == len(name_ref.items): 481 assigned_names, count = get_names_from_nodes(n.nodes) 482 accessed_names, _count = get_names_from_nodes(name_ref.items) 483 484 # Only assign directly between items if all assigned names are 485 # plain names (not attribute assignments), and if the assigned names 486 # do not appear in the accessed names. 487 488 if len(assigned_names) == count and \ 489 not assigned_names.intersection(accessed_names): 490 491 for node, item in zip(n.nodes, name_ref.items): 492 self.process_assignment_node(node, item) 493 494 return True 495 496 # Otherwise, use the position-based mechanism to obtain values. 497 498 else: 499 return False 500 else: 501 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 502 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 503 504 def process_compare_node(self, n): 505 506 """ 507 Process the given comparison node 'n', converting an operator sequence 508 from... 509 510 <expr1> <op1> <expr2> <op2> <expr3> 511 512 ...to... 513 514 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 515 """ 516 517 invocations = [] 518 last = n.expr 519 520 for op, op_node in n.ops: 521 op = operator_functions.get(op) 522 523 invocations.append(compiler.ast.CallFunc( 524 compiler.ast.Name("$op%s" % op), 525 [last, op_node])) 526 527 last = op_node 528 529 if len(invocations) > 1: 530 result = compiler.ast.And(invocations) 531 else: 532 result = invocations[0] 533 534 return self.process_structure_node(result) 535 536 def process_dict_node(self, node): 537 538 """ 539 Process the given dictionary 'node', returning a list of (key, value) 540 tuples. 541 """ 542 543 l = [] 544 for key, value in node.items: 545 l.append(( 546 self.process_structure_node(key), 547 self.process_structure_node(value))) 548 return l 549 550 def process_for_node(self, n): 551 552 """ 553 Generate attribute accesses for {n.list}.__iter__ and the next method on 554 the iterator, producing a replacement node for the original. 555 """ 556 557 t0 = self.get_temporary_name() 558 self.next_temporary() 559 t1 = self.get_temporary_name() 560 self.next_temporary() 561 t2 = self.get_temporary_name() 562 self.next_temporary() 563 564 node = compiler.ast.Stmt([ 565 566 # <t0> = {n.list} 567 # <t1> = <t0>.__iter__() 568 569 compiler.ast.Assign( 570 [compiler.ast.AssName(t0, "OP_ASSIGN")], 571 n.list), 572 573 compiler.ast.Assign( 574 [compiler.ast.AssName(t1, "OP_ASSIGN")], 575 compiler.ast.CallFunc( 576 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 577 [])), 578 579 # <t2> = <t1>.next 580 # try: 581 # while True: 582 # try: 583 # <var>... = <t2>() 584 # except StopIteration: 585 # raise LoopExit 586 # {n.body} 587 # except LoopExit: 588 # {n.else_} 589 # pass 590 591 compiler.ast.Assign( 592 [compiler.ast.AssName(t2, "OP_ASSIGN")], 593 compiler.ast.Getattr(compiler.ast.Name(t1), "next")), 594 595 compiler.ast.TryExcept( 596 compiler.ast.While( 597 compiler.ast.Name("True"), 598 compiler.ast.Stmt([ 599 compiler.ast.TryExcept( 600 compiler.ast.Assign( 601 [n.assign], 602 compiler.ast.CallFunc( 603 compiler.ast.Name(t2), 604 [])), 605 [(compiler.ast.Name("StopIteration"), None, 606 compiler.ast.Raise(compiler.ast.Name("LoopExit")))], 607 None), 608 n.body]), 609 None), 610 [(compiler.ast.Name("LoopExit"), None, n.else_ or compiler.ast.Pass())], 611 None) 612 ]) 613 614 self.process_structure_node(node) 615 616 def process_literal_sequence_node(self, n, name, ref, cls): 617 618 """ 619 Process the given literal sequence node 'n' as a function invocation, 620 with 'name' indicating the type of the sequence, and 'ref' being a 621 reference to the type. The 'cls' is used to instantiate a suitable name 622 reference. 623 """ 624 625 if name == "dict": 626 items = [] 627 for key, value in n.items: 628 items.append(compiler.ast.Tuple([key, value])) 629 else: # name in ("list", "tuple"): 630 items = n.nodes 631 632 return self.get_literal_reference(name, ref, items, cls) 633 634 def process_operator_node(self, n): 635 636 """ 637 Process the given operator node 'n' as an operator function invocation. 638 """ 639 640 opname = n.__class__.__name__ 641 operands = n.getChildNodes() 642 643 # Convert a unary operation to an invocation. 644 645 op = unary_operator_functions.get(opname) 646 647 if op: 648 invocation = compiler.ast.CallFunc( 649 compiler.ast.Name("$op%s" % op), 650 [operands[0]] 651 ) 652 653 # Convert a single operator with a list of operands to a combination of 654 # pairwise operations. 655 656 else: 657 op = operator_functions[opname] 658 invocation = self._process_operator_node(op, operands) 659 660 return self.process_structure_node(invocation) 661 662 def _process_operator_node(self, op, operands): 663 664 """ 665 Process the given 'op', being an operator function, together with the 666 supplied 'operands', returning either a single remaining operand or an 667 invocation combining the operands. 668 """ 669 670 remaining = operands[1:] 671 if not remaining: 672 return operands[0] 673 674 return compiler.ast.CallFunc( 675 compiler.ast.Name("$op%s" % op), 676 [operands[0], self._process_operator_node(op, remaining)] 677 ) 678 679 def process_print_node(self, n): 680 681 """ 682 Process the given print node 'n' as an invocation on a stream of the 683 form... 684 685 $print(dest, args, nl) 686 687 The special function name will be translated elsewhere. 688 """ 689 690 nl = isinstance(n, compiler.ast.Printnl) 691 invocation = compiler.ast.CallFunc( 692 compiler.ast.Name("$print"), 693 [n.dest or compiler.ast.Name("None"), 694 compiler.ast.List(list(n.nodes)), 695 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 696 ) 697 return self.process_structure_node(invocation) 698 699 def process_slice_node(self, n, expr=None): 700 701 """ 702 Process the given slice node 'n' as a method invocation. 703 """ 704 705 if n.flags == "OP_ASSIGN": op = "__setslice__" 706 elif n.flags == "OP_DELETE": op = "__delslice__" 707 else: op = "__getslice__" 708 709 invocation = compiler.ast.CallFunc( 710 compiler.ast.Getattr(n.expr, op), 711 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 712 (expr and [expr] or []) 713 ) 714 715 # Fix parse tree structure. 716 717 if op == "__delslice__": 718 invocation = compiler.ast.Discard(invocation) 719 720 return self.process_structure_node(invocation) 721 722 def process_sliceobj_node(self, n): 723 724 """ 725 Process the given slice object node 'n' as a slice constructor. 726 """ 727 728 op = "slice" 729 invocation = compiler.ast.CallFunc( 730 compiler.ast.Name("$op%s" % op), 731 n.nodes 732 ) 733 return self.process_structure_node(invocation) 734 735 def process_subscript_node(self, n, expr=None): 736 737 """ 738 Process the given subscript node 'n' as a method invocation. 739 """ 740 741 if n.flags == "OP_ASSIGN": op = "__setitem__" 742 elif n.flags == "OP_DELETE": op = "__delitem__" 743 else: op = "__getitem__" 744 745 invocation = compiler.ast.CallFunc( 746 compiler.ast.Getattr(n.expr, op), 747 list(n.subs) + (expr and [expr] or []) 748 ) 749 750 # Fix parse tree structure. 751 752 if op == "__delitem__": 753 invocation = compiler.ast.Discard(invocation) 754 755 return self.process_structure_node(invocation) 756 757 def process_attribute_chain(self, n): 758 759 """ 760 Process the given attribute access node 'n'. Return a reference 761 describing the expression. 762 """ 763 764 # AssAttr/Getattr are nested with the outermost access being the last 765 # access in any chain. 766 767 self.attrs.insert(0, n.attrname) 768 attrs = self.attrs 769 770 # Break attribute chains where non-access nodes are found. 771 772 if not self.have_access_expression(n): 773 self.reset_attribute_chain() 774 775 # Descend into the expression, extending backwards any existing chain, 776 # or building another for the expression. 777 778 name_ref = self.process_structure_node(n.expr) 779 780 # Restore chain information applying to this node. 781 782 if not self.have_access_expression(n): 783 self.restore_attribute_chain(attrs) 784 785 # Return immediately if the expression was another access and thus a 786 # continuation backwards along the chain. The above processing will 787 # have followed the chain all the way to its conclusion. 788 789 if self.have_access_expression(n): 790 del self.attrs[0] 791 792 return name_ref 793 794 # Attribute chain handling. 795 796 def reset_attribute_chain(self): 797 798 "Reset the attribute chain for a subexpression of an attribute access." 799 800 self.attrs = [] 801 self.chain_assignment.append(self.in_assignment) 802 self.chain_invocation.append(self.in_invocation) 803 self.in_assignment = None 804 self.in_invocation = None 805 806 def restore_attribute_chain(self, attrs): 807 808 "Restore the attribute chain for an attribute access." 809 810 self.attrs = attrs 811 self.in_assignment = self.chain_assignment.pop() 812 self.in_invocation = self.chain_invocation.pop() 813 814 def have_access_expression(self, node): 815 816 "Return whether the expression associated with 'node' is Getattr." 817 818 return isinstance(node.expr, compiler.ast.Getattr) 819 820 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 821 822 """ 823 Return the name to be used for attribute usage observations involving 824 the given 'name' in the current namespace. 825 826 If the name is being used outside a function, and if 'name_ref' is 827 given and indicates a global or if 'is_global' is specified as a true 828 value, a path featuring the name in the global namespace is returned. 829 Otherwise, a path computed using the current namespace and the given 830 name is returned. 831 832 The intention of this method is to provide a suitably-qualified name 833 that can be tracked across namespaces. Where globals are being 834 referenced in class namespaces, they should be referenced using their 835 path within the module, not using a path within each class. 836 837 It may not be possible to identify a global within a function at the 838 time of inspection (since a global may appear later in a file). 839 Consequently, globals are identified by their local name rather than 840 their module-qualified path. 841 """ 842 843 # For functions, use the appropriate local names. 844 845 if self.in_function: 846 return name 847 848 # For global names outside functions, use a global name. 849 850 elif is_global or name_ref and name_ref.is_global_name(): 851 return self.get_global_path(name) 852 853 # Otherwise, establish a name in the current namespace. 854 855 else: 856 return self.get_object_path(name) 857 858 def get_path_for_access(self): 859 860 "Outside functions, register accesses at the module level." 861 862 if not self.in_function: 863 return self.name 864 else: 865 return self.get_namespace_path() 866 867 def get_module_name(self, node): 868 869 """ 870 Using the given From 'node' in this module, calculate any relative import 871 information, returning a tuple containing a module to import along with any 872 names to import based on the node's name information. 873 874 Where the returned module is given as None, whole module imports should 875 be performed for the returned modules using the returned names. 876 """ 877 878 # Absolute import. 879 880 if node.level == 0: 881 return node.modname, node.names 882 883 # Relative to an ancestor of this module. 884 885 else: 886 path = self.name.split(".") 887 level = node.level 888 889 # Relative imports treat package roots as submodules. 890 891 if split(self.filename)[-1] == "__init__.py": 892 level -= 1 893 894 if level > len(path): 895 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 896 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 897 898 basename = ".".join(path[:len(path)-level]) 899 900 # Name imports from a module. 901 902 if node.modname: 903 return "%s.%s" % (basename, node.modname), node.names 904 905 # Relative whole module imports. 906 907 else: 908 return basename, node.names 909 910 def get_argnames(args): 911 912 """ 913 Return a list of all names provided by 'args'. Since tuples may be 914 employed, the arguments are traversed depth-first. 915 """ 916 917 l = [] 918 for arg in args: 919 if isinstance(arg, tuple): 920 l += get_argnames(arg) 921 else: 922 l.append(arg) 923 return l 924 925 def get_names_from_nodes(nodes): 926 927 """ 928 Return the names employed in the given 'nodes' along with the number of 929 nodes excluding sequences. 930 """ 931 932 names = set() 933 count = 0 934 935 for node in nodes: 936 937 # Add names and count them. 938 939 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 940 names.add(node.name) 941 count += 1 942 943 # Add names from sequences and incorporate their counts. 944 945 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 946 compiler.ast.List, compiler.ast.Set, 947 compiler.ast.Tuple)): 948 _names, _count = get_names_from_nodes(node.nodes) 949 names.update(_names) 950 count += _count 951 952 # Count non-name, non-sequence nodes. 953 954 else: 955 count += 1 956 957 return names, count 958 959 # Location classes. 960 961 class Location: 962 963 "A generic program location." 964 965 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 966 self.path = path 967 self.name = name 968 self.attrnames = attrnames 969 self.version = version 970 self.access_number = access_number 971 972 def __repr__(self): 973 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 974 975 def as_tuple(self): 976 return (self.path, self.name, self.attrnames, self.version, self.access_number) 977 978 def __hash__(self): 979 return hash(self.as_tuple()) 980 981 def __eq__(self, other): 982 return self.as_tuple() == other.as_tuple() 983 984 def __cmp__(self, other): 985 return cmp(self.as_tuple(), other.as_tuple()) 986 987 def get_attrname(self): 988 989 """ 990 Extract the first attribute from the attribute names employed in this 991 location. 992 """ 993 994 attrnames = self.attrnames 995 if not attrnames: 996 return attrnames 997 return get_attrnames(attrnames)[0] 998 999 class AccessLocation(Location): 1000 1001 "A specialised access location." 1002 1003 def __init__(self, path, name, attrnames, access_number): 1004 1005 """ 1006 Initialise an access location featuring 'path', 'name', 'attrnames' and 1007 'access_number'. 1008 """ 1009 1010 Location.__init__(self, path, name, attrnames, None, access_number) 1011 1012 def __repr__(self): 1013 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 1014 1015 # Result classes. 1016 1017 class InstructionSequence: 1018 1019 "A generic sequence of instructions." 1020 1021 def __init__(self, instructions): 1022 self.instructions = instructions 1023 1024 def get_value_instruction(self): 1025 return self.instructions[-1] 1026 1027 def get_init_instructions(self): 1028 return self.instructions[:-1] 1029 1030 # Dictionary utilities. 1031 1032 def init_item(d, key, fn): 1033 1034 """ 1035 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1036 value where no entry already exists. 1037 """ 1038 1039 if not d.has_key(key): 1040 d[key] = fn() 1041 return d[key] 1042 1043 def dict_for_keys(d, keys): 1044 1045 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1046 1047 nd = {} 1048 for key in keys: 1049 if d.has_key(key): 1050 nd[key] = d[key] 1051 return nd 1052 1053 def make_key(s): 1054 1055 "Make sequence 's' into a tuple-based key, first sorting its contents." 1056 1057 l = list(s) 1058 l.sort() 1059 return tuple(l) 1060 1061 def add_counter_item(d, key): 1062 1063 """ 1064 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1065 maintaining a mapping of keys to their order of insertion. 1066 """ 1067 1068 if not d.has_key(key): 1069 d[key] = len(d.keys()) 1070 return d[key] 1071 1072 def remove_items(d1, d2): 1073 1074 "Remove from 'd1' all items from 'd2'." 1075 1076 for key in d2.keys(): 1077 if d1.has_key(key): 1078 del d1[key] 1079 1080 # Set utilities. 1081 1082 def first(s): 1083 return list(s)[0] 1084 1085 def same(s1, s2): 1086 return set(s1) == set(s2) 1087 1088 def order_dependencies(all_depends): 1089 1090 """ 1091 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1092 has the form "A depends on B, C...". The result will order A, B, C, and so 1093 on. 1094 """ 1095 1096 usage = init_reverse_dependencies(all_depends) 1097 1098 # Produce an ordering by obtaining exposed items (required by items already 1099 # processed) and putting them at the start of the list. 1100 1101 ordered = [] 1102 1103 while usage: 1104 have_next = False 1105 1106 for key, n in usage.items(): 1107 1108 # Add items needed by no other items to the ordering. 1109 1110 if not n: 1111 remove_dependency(key, all_depends, usage, ordered) 1112 have_next = True 1113 1114 if not have_next: 1115 raise ValueError, usage 1116 1117 return ordered 1118 1119 def order_dependencies_partial(all_depends): 1120 1121 """ 1122 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1123 has the form "A depends on B, C...". The result will order A, B, C, and so 1124 on. Where cycles exist, they will be broken and a partial ordering returned. 1125 """ 1126 1127 usage = init_reverse_dependencies(all_depends) 1128 1129 # Duplicate the dependencies for subsequent modification. 1130 1131 new_depends = {} 1132 for key, values in all_depends.items(): 1133 new_depends[key] = set(values) 1134 1135 all_depends = new_depends 1136 1137 # Produce an ordering by obtaining exposed items (required by items already 1138 # processed) and putting them at the start of the list. 1139 1140 ordered = [] 1141 1142 while usage: 1143 least = None 1144 least_key = None 1145 1146 for key, n in usage.items(): 1147 1148 # Add items needed by no other items to the ordering. 1149 1150 if not n: 1151 remove_dependency(key, all_depends, usage, ordered) 1152 least = 0 1153 1154 # When breaking cycles, note the least used items. 1155 1156 elif least is None or len(n) < least: 1157 least_key = key 1158 least = len(n) 1159 1160 if least: 1161 transfer_dependencies(least_key, all_depends, usage, ordered) 1162 1163 return ordered 1164 1165 def init_reverse_dependencies(all_depends): 1166 1167 """ 1168 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1169 record the reverse dependencies, making a mapping of the form 1170 "B is needed by A", "C is needed by A", and so on. 1171 """ 1172 1173 usage = {} 1174 1175 # Record path-based dependencies. 1176 1177 for key in all_depends.keys(): 1178 usage[key] = set() 1179 1180 for key, depends in all_depends.items(): 1181 for depend in depends: 1182 init_item(usage, depend, set) 1183 usage[depend].add(key) 1184 1185 return usage 1186 1187 def transfer_dependencies(key, all_depends, usage, ordered): 1188 1189 """ 1190 Transfer items needed by 'key' to those items needing 'key', found using 1191 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1192 collection of dependencies. 1193 1194 If "A is needed by X" and "B is needed by A", then transferring items needed 1195 by A will cause "B is needed by X" to be recorded as a consequence. 1196 1197 Transferring items also needs to occur in the reverse mapping, so that 1198 "A needs B" and "X needs A", then the consequence must be recorded as 1199 "X needs B". 1200 """ 1201 1202 ordered.insert(0, key) 1203 1204 needing = usage[key] # A is needed by X 1205 needed = all_depends.get(key) # A needs B 1206 1207 if needing: 1208 for depend in needing: 1209 l = all_depends.get(depend) 1210 if not l: 1211 continue 1212 1213 l.remove(key) # X needs (A) 1214 1215 if needed: 1216 l.update(needed) # X needs B... 1217 1218 # Prevent self references. 1219 1220 if depend in needed: 1221 l.remove(depend) 1222 1223 if needed: 1224 for depend in needed: 1225 l = usage.get(depend) 1226 if not l: 1227 continue 1228 1229 l.remove(key) # B is needed by (A) 1230 l.update(needing) # B is needed by X... 1231 1232 # Prevent self references. 1233 1234 if depend in needing: 1235 l.remove(depend) 1236 1237 if needed: 1238 del all_depends[key] 1239 del usage[key] 1240 1241 def remove_dependency(key, all_depends, usage, ordered): 1242 1243 """ 1244 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1245 'ordered' collection of dependencies. 1246 1247 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1248 upon removing A from 'usage', the outcome is that all keys needing A will 1249 have A removed from their 'usage' records. 1250 1251 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1252 as a consequence. 1253 """ 1254 1255 ordered.insert(0, key) 1256 1257 depends = all_depends.get(key) 1258 1259 # Reduce usage of the referenced items. 1260 1261 if depends: 1262 for depend in depends: 1263 usage[depend].remove(key) 1264 1265 del usage[key] 1266 1267 # General input/output. 1268 1269 def readfile(filename): 1270 1271 "Return the contents of 'filename'." 1272 1273 f = open(filename) 1274 try: 1275 return f.read() 1276 finally: 1277 f.close() 1278 1279 def writefile(filename, s): 1280 1281 "Write to 'filename' the string 's'." 1282 1283 f = open(filename, "w") 1284 try: 1285 f.write(s) 1286 finally: 1287 f.close() 1288 1289 # General encoding. 1290 1291 def sorted_output(x): 1292 1293 "Sort sequence 'x' and return a string with commas separating the values." 1294 1295 x = map(str, x) 1296 x.sort() 1297 return ", ".join(x) 1298 1299 def get_string_details(literals, encoding): 1300 1301 """ 1302 Determine whether 'literals' represent Unicode strings or byte strings, 1303 using 'encoding' to reproduce byte sequences. 1304 1305 Each literal is the full program representation including prefix and quotes 1306 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1307 string needs to be translated back to its original encoding. 1308 1309 Return a single encoded literal value, a type name, and the original 1310 encoding as a tuple. 1311 """ 1312 1313 typename = "unicode" 1314 1315 l = [] 1316 1317 for s in literals: 1318 out, _typename = get_literal_details(s) 1319 if _typename == "str": 1320 typename = "str" 1321 l.append(out) 1322 1323 out = "".join(l) 1324 1325 # For Unicode values, convert to the UTF-8 program representation. 1326 1327 if typename == "unicode": 1328 return out.encode("utf-8"), typename, encoding 1329 1330 # For byte string values, convert back to the original encoding. 1331 1332 else: 1333 return out.encode(encoding), typename, encoding 1334 1335 def get_literal_details(s): 1336 1337 """ 1338 Determine whether 's' represents a Unicode string or a byte string, where 1339 's' contains the full program representation of a literal including prefix 1340 and quotes, recoded by the parser to UTF-8. 1341 1342 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1343 and byte or Unicode values starting with <backslash><octal digit> or 1344 <backslash>x. 1345 1346 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1347 to be considered as Unicode values. Otherwise, they produce byte values and 1348 cause unprefixed strings to be considered as byte strings. 1349 1350 Literals prefixed with "r" do not have their backslash-encoded values 1351 converted unless also prefixed with "u", in which case only the above value 1352 formats are converted, not any of the other special sequences for things 1353 like newlines. 1354 1355 Return the literal value as a Unicode object together with the appropriate 1356 type name in a tuple. 1357 """ 1358 1359 l = [] 1360 1361 # Identify the quote character and use it to identify the prefix. 1362 1363 quote_type = s[-1] 1364 prefix_end = s.find(quote_type) 1365 prefix = s[:prefix_end].lower() 1366 1367 if prefix not in ("", "b", "br", "r", "u", "ur"): 1368 raise ValueError, "String literal does not have a supported prefix: %s" % s 1369 1370 if "b" in prefix: 1371 typename = "str" 1372 else: 1373 typename = "unicode" 1374 1375 # Identify triple quotes or single quotes. 1376 1377 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1378 quote = s[prefix_end:prefix_end+3] 1379 current = prefix_end + 3 1380 end = len(s) - 3 1381 else: 1382 quote = s[prefix_end] 1383 current = prefix_end + 1 1384 end = len(s) - 1 1385 1386 # Conversions of some quoted values. 1387 1388 searches = { 1389 "u" : (6, 16), 1390 "U" : (10, 16), 1391 "x" : (4, 16), 1392 } 1393 1394 octal_digits = map(str, range(0, 8)) 1395 1396 # Translations of some quoted values. 1397 1398 escaped = { 1399 "\\" : "\\", "'" : "'", '"' : '"', 1400 "a" : "\a", "b" : "\b", "f" : "\f", 1401 "n" : "\n", "r" : "\r", "t" : "\t", 1402 } 1403 1404 while current < end: 1405 1406 # Look for quoted values. 1407 1408 index = s.find("\\", current) 1409 if index == -1 or index + 1 == end: 1410 l.append(s[current:end]) 1411 break 1412 1413 # Add the preceding text. 1414 1415 l.append(s[current:index]) 1416 1417 # Handle quoted text. 1418 1419 term = s[index+1] 1420 1421 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1422 # produce Unicode values. 1423 1424 if typename == "unicode" and ( 1425 term in ("u", "U") or 1426 "u" in prefix and (term == "x" or term in octal_digits)): 1427 1428 needed, base = searches.get(term, (4, 8)) 1429 value = convert_quoted_value(s, index, needed, end, base, unichr) 1430 l.append(value) 1431 current = index + needed 1432 1433 # Add raw byte values, changing the string type. 1434 1435 elif "r" not in prefix and ( 1436 term == "x" or term in octal_digits): 1437 1438 needed, base = searches.get(term, (4, 8)) 1439 value = convert_quoted_value(s, index, needed, end, base, chr) 1440 l.append(value) 1441 typename = "str" 1442 current = index + needed 1443 1444 # Add other escaped values. 1445 1446 elif "r" not in prefix and escaped.has_key(term): 1447 l.append(escaped[term]) 1448 current = index + 2 1449 1450 # Add other text as found. 1451 1452 else: 1453 l.append(s[index:index+2]) 1454 current = index + 2 1455 1456 # Collect the components into a single Unicode object. Since the literal 1457 # text was already in UTF-8 form, interpret plain strings as UTF-8 1458 # sequences. 1459 1460 out = [] 1461 1462 for value in l: 1463 if isinstance(value, unicode): 1464 out.append(value) 1465 else: 1466 out.append(unicode(value, "utf-8")) 1467 1468 return "".join(out), typename 1469 1470 def convert_quoted_value(s, index, needed, end, base, fn): 1471 1472 """ 1473 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1474 positions, and with the given 'end' indicating the first position after the 1475 end of the actual string content. 1476 1477 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1478 to convert the value to an appropriate type. 1479 """ 1480 1481 s = s[index:min(index+needed, end)] 1482 1483 # Not a complete occurrence. 1484 1485 if len(s) < needed: 1486 return s 1487 1488 # Test for a well-formed value. 1489 1490 try: 1491 first = base == 8 and 1 or 2 1492 value = int(s[first:needed], base) 1493 except ValueError: 1494 return s 1495 else: 1496 return fn(value) 1497 1498 # Attribute chain decoding. 1499 1500 def get_attrnames(attrnames): 1501 1502 """ 1503 Split the qualified attribute chain 'attrnames' into its components, 1504 handling special attributes starting with "#" that indicate type 1505 conformance. 1506 """ 1507 1508 if attrnames.startswith("#"): 1509 return [attrnames] 1510 else: 1511 return attrnames.split(".") 1512 1513 def get_name_path(path, name): 1514 1515 "Return a suitable qualified name from the given 'path' and 'name'." 1516 1517 if "." in name: 1518 return name 1519 else: 1520 return "%s.%s" % (path, name) 1521 1522 # Usage-related functions. 1523 1524 def get_types_for_usage(attrnames, objects): 1525 1526 """ 1527 Identify the types that can support the given 'attrnames', using the 1528 given 'objects' as the catalogue of type details. 1529 """ 1530 1531 types = [] 1532 for name, _attrnames in objects.items(): 1533 if set(attrnames).issubset(_attrnames): 1534 types.append(name) 1535 return types 1536 1537 def get_invoked_attributes(usage): 1538 1539 "Obtain invoked attribute from the given 'usage'." 1540 1541 invoked = [] 1542 if usage: 1543 for attrname, invocation, assignment in usage: 1544 if invocation: 1545 invoked.append(attrname) 1546 return invoked 1547 1548 def get_assigned_attributes(usage): 1549 1550 "Obtain assigned attribute from the given 'usage'." 1551 1552 assigned = [] 1553 if usage: 1554 for attrname, invocation, assignment in usage: 1555 if assignment: 1556 assigned.append(attrname) 1557 return assigned 1558 1559 # Type and module functions. 1560 # NOTE: This makes assumptions about the __builtins__ structure. 1561 1562 def get_builtin_module(name): 1563 1564 "Return the module name containing the given type 'name'." 1565 1566 if name == "string": 1567 modname = "str" 1568 elif name == "utf8string": 1569 modname = "unicode" 1570 elif name == "NoneType": 1571 modname = "none" 1572 else: 1573 modname = name 1574 1575 return "__builtins__.%s" % modname 1576 1577 def get_builtin_type(name): 1578 1579 "Return the type name provided by the given Python value 'name'." 1580 1581 if name == "str": 1582 return "string" 1583 elif name == "unicode": 1584 return "utf8string" 1585 else: 1586 return name 1587 1588 def get_builtin_class(name): 1589 1590 "Return the full name of the built-in class having the given 'name'." 1591 1592 typename = get_builtin_type(name) 1593 module = get_builtin_module(typename) 1594 return "%s.%s" % (module, typename) 1595 1596 # Useful data. 1597 1598 predefined_constants = "False", "None", "NotImplemented", "True" 1599 1600 unary_operator_functions = { 1601 1602 # Unary operations. 1603 1604 "Invert" : "invert", 1605 "UnaryAdd" : "pos", 1606 "UnarySub" : "neg", 1607 } 1608 1609 operator_functions = { 1610 1611 # Fundamental operations. 1612 1613 "is" : "is_", 1614 "is not" : "is_not", 1615 1616 # Binary operations. 1617 1618 "in" : "in_", 1619 "not in" : "not_in", 1620 "Add" : "add", 1621 "Bitand" : "and_", 1622 "Bitor" : "or_", 1623 "Bitxor" : "xor", 1624 "Div" : "div", 1625 "FloorDiv" : "floordiv", 1626 "LeftShift" : "lshift", 1627 "Mod" : "mod", 1628 "Mul" : "mul", 1629 "Power" : "pow", 1630 "RightShift" : "rshift", 1631 "Sub" : "sub", 1632 1633 # Augmented assignment. 1634 1635 "+=" : "iadd", 1636 "-=" : "isub", 1637 "*=" : "imul", 1638 "/=" : "idiv", 1639 "//=" : "ifloordiv", 1640 "%=" : "imod", 1641 "**=" : "ipow", 1642 "<<=" : "ilshift", 1643 ">>=" : "irshift", 1644 "&=" : "iand", 1645 "^=" : "ixor", 1646 "|=" : "ior", 1647 1648 # Comparisons. 1649 1650 "==" : "eq", 1651 "!=" : "ne", 1652 "<" : "lt", 1653 "<=" : "le", 1654 ">=" : "ge", 1655 ">" : "gt", 1656 } 1657 1658 operator_functions.update(unary_operator_functions) 1659 1660 # vim: tabstop=4 expandtab shiftwidth=4