1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 7 2017, 2018, 2019 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 # Namespace management. 190 191 def enter_namespace(self, name): 192 193 "Enter the namespace having the given 'name'." 194 195 self.namespace_path.append(name) 196 197 def exit_namespace(self): 198 199 "Exit the current namespace." 200 201 self.namespace_path.pop() 202 203 # Constant reference naming. 204 205 def get_constant_name(self, value, value_type, encoding=None): 206 207 """ 208 Add a new constant to the current namespace for 'value' with 209 'value_type'. 210 """ 211 212 path = self.get_namespace_path() 213 init_item(self.constants, path, dict) 214 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 215 216 # Literal reference naming. 217 218 def get_literal_name(self): 219 220 "Add a new literal to the current namespace." 221 222 path = self.get_namespace_path() 223 init_item(self.literals, path, lambda: 0) 224 return "$C%d" % self.literals[path] 225 226 def next_literal(self): 227 self.literals[self.get_namespace_path()] += 1 228 229 # Temporary variable naming. 230 231 def get_temporary_name(self): 232 path = self.get_namespace_path() 233 init_item(self.temp, path, lambda: 0) 234 return "$t%d" % self.temp[path] 235 236 def next_temporary(self): 237 self.temp[self.get_namespace_path()] += 1 238 239 # Arbitrary function naming. 240 241 def get_lambda_name(self): 242 path = self.get_namespace_path() 243 init_item(self.lambdas, path, lambda: 0) 244 name = "$l%d" % self.lambdas[path] 245 self.lambdas[path] += 1 246 return name 247 248 def reset_lambdas(self): 249 self.lambdas = {} 250 251 # Constant and literal recording. 252 253 def get_constant_value(self, value, literals=None): 254 255 """ 256 Encode the 'value' if appropriate, returning a value, a typename and any 257 encoding. 258 """ 259 260 if isinstance(value, unicode): 261 return value.encode("utf-8"), "unicode", self.encoding 262 263 # Attempt to convert plain strings to text. 264 265 elif isinstance(value, str) and self.encoding: 266 try: 267 return get_string_details(literals, self.encoding) 268 except UnicodeDecodeError: 269 pass 270 271 return value, value.__class__.__name__, None 272 273 def get_constant_reference(self, ref, value, encoding=None): 274 275 """ 276 Return a constant reference for the given 'ref' type and 'value', with 277 the optional 'encoding' applying to text values. 278 """ 279 280 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 281 282 # Return a reference for the constant. 283 284 objpath = self.get_object_path(constant_name) 285 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 286 287 # Record the value and type for the constant. 288 289 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 290 return name_ref 291 292 def reserve_constant(self, objpath, value, origin, encoding=None): 293 294 """ 295 Reserve a constant within 'objpath' with the given 'value' and having a 296 type with the given 'origin', with the optional 'encoding' applying to 297 text values. 298 """ 299 300 constant_name = self.get_constant_name(value, origin) 301 objpath = self.get_object_path(constant_name) 302 self._reserve_constant(objpath, value, origin, encoding) 303 304 def _reserve_constant(self, objpath, value, origin, encoding): 305 306 """ 307 Store a constant for 'objpath' with the given 'value' and 'origin', with 308 the optional 'encoding' applying to text values. 309 """ 310 311 self.constant_values[objpath] = value, origin, encoding 312 313 def get_literal_reference(self, name, ref, items, cls): 314 315 """ 316 Return a literal reference for the given type 'name', literal 'ref', 317 node 'items' and employing the given 'cls' as the class of the returned 318 reference object. 319 """ 320 321 # Construct an invocation using the items as arguments. 322 323 typename = "$L%s" % name 324 325 invocation = compiler.ast.CallFunc( 326 compiler.ast.Name(typename), 327 items 328 ) 329 330 # Get a name for the actual literal. 331 332 instname = self.get_literal_name() 333 self.next_literal() 334 335 # Record the type for the literal. 336 337 objpath = self.get_object_path(instname) 338 self.literal_types[objpath] = ref.get_origin() 339 340 # Return a wrapper for the invocation exposing the items. 341 342 return cls( 343 instname, 344 ref.instance_of(), 345 self.process_structure_node(invocation), 346 invocation.args 347 ) 348 349 # Node handling. 350 351 def process_structure(self, node): 352 353 """ 354 Within the given 'node', process the program structure. 355 356 During inspection, this will process global declarations, adjusting the 357 module namespace, and import statements, building a module dependency 358 hierarchy. 359 360 During translation, this will consult deduced program information and 361 output translated code. 362 """ 363 364 l = [] 365 for n in node.getChildNodes(): 366 l.append(self.process_structure_node(n)) 367 return l 368 369 def process_augassign_node(self, n): 370 371 "Process the given augmented assignment node 'n'." 372 373 op = operator_functions[n.op] 374 375 if isinstance(n.node, compiler.ast.Getattr): 376 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 377 elif isinstance(n.node, compiler.ast.Name): 378 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 379 else: 380 target = n.node 381 382 assignment = compiler.ast.Assign( 383 [target], 384 compiler.ast.CallFunc( 385 compiler.ast.Name("$op%s" % op), 386 [n.node, n.expr])) 387 388 return self.process_structure_node(assignment) 389 390 def process_assignment_for_object(self, original_name, source): 391 392 """ 393 Return an assignment operation making 'original_name' refer to the given 394 'source'. 395 """ 396 397 assignment = compiler.ast.Assign( 398 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 399 source 400 ) 401 402 return self.process_structure_node(assignment) 403 404 def process_assignment_node_items(self, n, expr): 405 406 """ 407 Process the given assignment node 'n' whose children are to be assigned 408 items of 'expr'. 409 """ 410 411 name_ref = self.process_structure_node(expr) 412 413 # Either unpack the items and present them directly to each assignment 414 # node. 415 416 if isinstance(name_ref, LiteralSequenceRef) and \ 417 self.process_literal_sequence_items(n, name_ref): 418 419 pass 420 421 # Or have the assignment nodes access each item via the sequence API. 422 423 else: 424 self.process_assignment_node_items_by_position(n, expr, name_ref) 425 426 def process_assignment_node_items_by_position(self, n, expr, name_ref): 427 428 """ 429 Process the given sequence assignment node 'n', converting the node to 430 the separate assignment of each target using positional access on a 431 temporary variable representing the sequence. Use 'expr' as the assigned 432 value and 'name_ref' as the reference providing any existing temporary 433 variable. 434 """ 435 436 statements = [] 437 438 # Employ existing names to access the sequence. 439 # Literal sequences do not provide names of accessible objects. 440 441 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 442 temp = name_ref.name 443 444 # For other expressions, create a temporary name to reference the items. 445 446 else: 447 temp = self.get_temporary_name() 448 self.next_temporary() 449 450 statements.append( 451 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 452 ) 453 454 # Generate a test for the length of the expression object. 455 456 statements.append(compiler.ast.Discard( 457 compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"), 458 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))]))) 459 460 # Assign the items to the target nodes. 461 462 for i, node in enumerate(n.nodes): 463 statements.append( 464 compiler.ast.Assign([node], compiler.ast.CallFunc( 465 compiler.ast.Getattr(compiler.ast.Name(temp), 466 "__get_single_item_unchecked__", 467 privileged=True), 468 [compiler.ast.Const(i, str(i))])) 469 ) 470 471 return self.process_structure_node(compiler.ast.Stmt(statements)) 472 473 def process_literal_sequence_items(self, n, name_ref): 474 475 """ 476 Process the given assignment node 'n', obtaining from the given 477 'name_ref' the items to be assigned to the assignment targets. 478 479 Return whether this method was able to process the assignment node as 480 a sequence of direct assignments. 481 """ 482 483 if len(n.nodes) == len(name_ref.items): 484 assigned_names, count = get_names_from_nodes(n.nodes) 485 accessed_names, _count = get_names_from_nodes(name_ref.items) 486 487 # Only assign directly between items if all assigned names are 488 # plain names (not attribute assignments), and if the assigned names 489 # do not appear in the accessed names. 490 491 if len(assigned_names) == count and \ 492 not assigned_names.intersection(accessed_names): 493 494 for node, item in zip(n.nodes, name_ref.items): 495 self.process_assignment_node(node, item) 496 497 return True 498 499 # Otherwise, use the position-based mechanism to obtain values. 500 501 else: 502 return False 503 else: 504 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 505 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 506 507 def process_compare_node(self, n): 508 509 """ 510 Process the given comparison node 'n', converting an operator sequence 511 from... 512 513 <expr1> <op1> <expr2> <op2> <expr3> 514 515 ...to... 516 517 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 518 """ 519 520 invocations = [] 521 last = n.expr 522 523 for op, op_node in n.ops: 524 op = operator_functions.get(op) 525 526 invocations.append(compiler.ast.CallFunc( 527 compiler.ast.Name("$op%s" % op), 528 [last, op_node])) 529 530 last = op_node 531 532 if len(invocations) > 1: 533 result = compiler.ast.And(invocations) 534 else: 535 result = invocations[0] 536 537 return self.process_structure_node(result) 538 539 def process_dict_node(self, node): 540 541 """ 542 Process the given dictionary 'node', returning a list of (key, value) 543 tuples. 544 """ 545 546 l = [] 547 for key, value in node.items: 548 l.append(( 549 self.process_structure_node(key), 550 self.process_structure_node(value))) 551 return l 552 553 def process_for_node(self, n): 554 555 """ 556 Generate attribute accesses for {n.list}.__iter__ and the next method on 557 the iterator, producing a replacement node for the original. 558 """ 559 560 t0 = self.get_temporary_name() 561 self.next_temporary() 562 t1 = self.get_temporary_name() 563 self.next_temporary() 564 t2 = self.get_temporary_name() 565 self.next_temporary() 566 567 node = compiler.ast.Stmt([ 568 569 # <t0> = {n.list} 570 # <t1> = <t0>.__iter__() 571 572 compiler.ast.Assign( 573 [compiler.ast.AssName(t0, "OP_ASSIGN")], 574 n.list), 575 576 compiler.ast.Assign( 577 [compiler.ast.AssName(t1, "OP_ASSIGN")], 578 compiler.ast.CallFunc( 579 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 580 [])), 581 582 # <t2> = <t1>.next 583 # try: 584 # while True: 585 # try: 586 # <var>... = <t2>() 587 # except StopIteration: 588 # raise LoopExit 589 # {n.body} 590 # except LoopExit: 591 # {n.else_} 592 # pass 593 594 compiler.ast.Assign( 595 [compiler.ast.AssName(t2, "OP_ASSIGN")], 596 compiler.ast.Getattr(compiler.ast.Name(t1), "next")), 597 598 compiler.ast.TryExcept( 599 compiler.ast.While( 600 compiler.ast.Name("True"), 601 compiler.ast.Stmt([ 602 compiler.ast.TryExcept( 603 compiler.ast.Assign( 604 [n.assign], 605 compiler.ast.CallFunc( 606 compiler.ast.Name(t2), 607 [])), 608 [(compiler.ast.Name("StopIteration"), None, 609 compiler.ast.Raise(compiler.ast.Name("LoopExit")))], 610 None), 611 n.body]), 612 None), 613 [(compiler.ast.Name("LoopExit"), None, n.else_ or compiler.ast.Pass())], 614 None) 615 ]) 616 617 self.process_structure_node(node) 618 619 def process_literal_sequence_node(self, n, name, ref, cls): 620 621 """ 622 Process the given literal sequence node 'n' as a function invocation, 623 with 'name' indicating the type of the sequence, and 'ref' being a 624 reference to the type. The 'cls' is used to instantiate a suitable name 625 reference. 626 """ 627 628 if name == "dict": 629 items = [] 630 for key, value in n.items: 631 items.append(compiler.ast.Tuple([key, value])) 632 else: # name in ("list", "tuple"): 633 items = n.nodes 634 635 return self.get_literal_reference(name, ref, items, cls) 636 637 def process_operator_node(self, n): 638 639 """ 640 Process the given operator node 'n' as an operator function invocation. 641 """ 642 643 opname = n.__class__.__name__ 644 operands = n.getChildNodes() 645 646 # Convert a unary operation to an invocation. 647 648 op = unary_operator_functions.get(opname) 649 650 if op: 651 invocation = compiler.ast.CallFunc( 652 compiler.ast.Name("$op%s" % op), 653 [operands[0]] 654 ) 655 656 # Convert a single operator with a list of operands to a combination of 657 # pairwise operations. 658 659 else: 660 op = operator_functions[opname] 661 invocation = self._process_operator_node(op, operands) 662 663 return self.process_structure_node(invocation) 664 665 def _process_operator_node(self, op, operands): 666 667 """ 668 Process the given 'op', being an operator function, together with the 669 supplied 'operands', returning either a single remaining operand or an 670 invocation combining the operands. 671 """ 672 673 remaining = operands[1:] 674 if not remaining: 675 return operands[0] 676 677 return compiler.ast.CallFunc( 678 compiler.ast.Name("$op%s" % op), 679 [operands[0], self._process_operator_node(op, remaining)] 680 ) 681 682 def process_print_node(self, n): 683 684 """ 685 Process the given print node 'n' as an invocation on a stream of the 686 form... 687 688 $print(dest, args, nl) 689 690 The special function name will be translated elsewhere. 691 """ 692 693 nl = isinstance(n, compiler.ast.Printnl) 694 invocation = compiler.ast.CallFunc( 695 compiler.ast.Name("$print"), 696 [n.dest or compiler.ast.Name("None"), 697 compiler.ast.List(list(n.nodes)), 698 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 699 ) 700 return self.process_structure_node(invocation) 701 702 def process_slice_node(self, n, expr=None): 703 704 """ 705 Process the given slice node 'n' as a method invocation. 706 """ 707 708 if n.flags == "OP_ASSIGN": op = "__setslice__" 709 elif n.flags == "OP_DELETE": op = "__delslice__" 710 else: op = "__getslice__" 711 712 invocation = compiler.ast.CallFunc( 713 compiler.ast.Getattr(n.expr, op), 714 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 715 (expr and [expr] or []) 716 ) 717 718 # Fix parse tree structure. 719 720 if op == "__delslice__": 721 invocation = compiler.ast.Discard(invocation) 722 723 return self.process_structure_node(invocation) 724 725 def process_sliceobj_node(self, n): 726 727 """ 728 Process the given slice object node 'n' as a slice constructor. 729 """ 730 731 op = "slice" 732 invocation = compiler.ast.CallFunc( 733 compiler.ast.Name("$op%s" % op), 734 n.nodes 735 ) 736 return self.process_structure_node(invocation) 737 738 def process_subscript_node(self, n, expr=None): 739 740 """ 741 Process the given subscript node 'n' as a method invocation. 742 """ 743 744 if n.flags == "OP_ASSIGN": op = "__setitem__" 745 elif n.flags == "OP_DELETE": op = "__delitem__" 746 else: op = "__getitem__" 747 748 invocation = compiler.ast.CallFunc( 749 compiler.ast.Getattr(n.expr, op), 750 list(n.subs) + (expr and [expr] or []) 751 ) 752 753 # Fix parse tree structure. 754 755 if op == "__delitem__": 756 invocation = compiler.ast.Discard(invocation) 757 758 return self.process_structure_node(invocation) 759 760 def process_attribute_chain(self, n): 761 762 """ 763 Process the given attribute access node 'n'. Return a reference 764 describing the expression. 765 """ 766 767 # AssAttr/Getattr are nested with the outermost access being the last 768 # access in any chain. 769 770 self.attrs.insert(0, n.attrname) 771 attrs = self.attrs 772 773 # Break attribute chains where non-access nodes are found. 774 775 if not self.have_access_expression(n): 776 self.reset_attribute_chain() 777 778 # Descend into the expression, extending backwards any existing chain, 779 # or building another for the expression. 780 781 name_ref = self.process_structure_node(n.expr) 782 783 # Restore chain information applying to this node. 784 785 if not self.have_access_expression(n): 786 self.restore_attribute_chain(attrs) 787 788 # Return immediately if the expression was another access and thus a 789 # continuation backwards along the chain. The above processing will 790 # have followed the chain all the way to its conclusion. 791 792 if self.have_access_expression(n): 793 del self.attrs[0] 794 795 return name_ref 796 797 # Attribute chain handling. 798 799 def reset_attribute_chain(self): 800 801 "Reset the attribute chain for a subexpression of an attribute access." 802 803 self.attrs = [] 804 self.chain_assignment.append(self.in_assignment) 805 self.chain_invocation.append(self.in_invocation) 806 self.in_assignment = None 807 self.in_invocation = None 808 809 def restore_attribute_chain(self, attrs): 810 811 "Restore the attribute chain for an attribute access." 812 813 self.attrs = attrs 814 self.in_assignment = self.chain_assignment.pop() 815 self.in_invocation = self.chain_invocation.pop() 816 817 def have_access_expression(self, node): 818 819 "Return whether the expression associated with 'node' is Getattr." 820 821 return isinstance(node.expr, compiler.ast.Getattr) 822 823 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 824 825 """ 826 Return the name to be used for attribute usage observations involving 827 the given 'name' in the current namespace. 828 829 If the name is being used outside a function, and if 'name_ref' is 830 given and indicates a global or if 'is_global' is specified as a true 831 value, a path featuring the name in the global namespace is returned. 832 Otherwise, a path computed using the current namespace and the given 833 name is returned. 834 835 The intention of this method is to provide a suitably-qualified name 836 that can be tracked across namespaces. Where globals are being 837 referenced in class namespaces, they should be referenced using their 838 path within the module, not using a path within each class. 839 840 It may not be possible to identify a global within a function at the 841 time of inspection (since a global may appear later in a file). 842 Consequently, globals are identified by their local name rather than 843 their module-qualified path. 844 """ 845 846 # For functions, use the appropriate local names. 847 848 if self.in_function: 849 return name 850 851 # For global names outside functions, use a global name. 852 853 elif is_global or name_ref and name_ref.is_global_name(): 854 return self.get_global_path(name) 855 856 # Otherwise, establish a name in the current namespace. 857 858 else: 859 return self.get_object_path(name) 860 861 def get_path_for_access(self): 862 863 "Outside functions, register accesses at the module level." 864 865 if not self.in_function: 866 return self.name 867 else: 868 return self.get_namespace_path() 869 870 def get_module_name(self, node): 871 872 """ 873 Using the given From 'node' in this module, calculate any relative import 874 information, returning a tuple containing a module to import along with any 875 names to import based on the node's name information. 876 877 Where the returned module is given as None, whole module imports should 878 be performed for the returned modules using the returned names. 879 """ 880 881 # Absolute import. 882 883 if node.level == 0: 884 return node.modname, node.names 885 886 # Relative to an ancestor of this module. 887 888 else: 889 path = self.name.split(".") 890 level = node.level 891 892 # Relative imports treat package roots as submodules. 893 894 if split(self.filename)[-1] == "__init__.py": 895 level -= 1 896 897 if level > len(path): 898 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 899 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 900 901 basename = ".".join(path[:len(path)-level]) 902 903 # Name imports from a module. 904 905 if node.modname: 906 return "%s.%s" % (basename, node.modname), node.names 907 908 # Relative whole module imports. 909 910 else: 911 return basename, node.names 912 913 def get_argnames(args): 914 915 """ 916 Return a list of all names provided by 'args'. Since tuples may be 917 employed, the arguments are traversed depth-first. 918 """ 919 920 l = [] 921 for arg in args: 922 if isinstance(arg, tuple): 923 l += get_argnames(arg) 924 else: 925 l.append(arg) 926 return l 927 928 def get_names_from_nodes(nodes): 929 930 """ 931 Return the names employed in the given 'nodes' along with the number of 932 nodes excluding sequences. 933 """ 934 935 names = set() 936 count = 0 937 938 for node in nodes: 939 940 # Add names and count them. 941 942 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 943 names.add(node.name) 944 count += 1 945 946 # Add names from sequences and incorporate their counts. 947 948 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 949 compiler.ast.List, compiler.ast.Set, 950 compiler.ast.Tuple)): 951 _names, _count = get_names_from_nodes(node.nodes) 952 names.update(_names) 953 count += _count 954 955 # Count non-name, non-sequence nodes. 956 957 else: 958 count += 1 959 960 return names, count 961 962 # Location classes. 963 964 class Location: 965 966 "A generic program location." 967 968 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 969 self.path = path 970 self.name = name 971 self.attrnames = attrnames 972 self.version = version 973 self.access_number = access_number 974 975 def __repr__(self): 976 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 977 978 def as_tuple(self): 979 return (self.path, self.name, self.attrnames, self.version, self.access_number) 980 981 def __hash__(self): 982 return hash(self.as_tuple()) 983 984 def __eq__(self, other): 985 return self.as_tuple() == other.as_tuple() 986 987 def __cmp__(self, other): 988 return cmp(self.as_tuple(), other.as_tuple()) 989 990 def get_attrname(self): 991 992 """ 993 Extract the first attribute from the attribute names employed in this 994 location. 995 """ 996 997 attrnames = self.attrnames 998 if not attrnames: 999 return attrnames 1000 return get_attrnames(attrnames)[0] 1001 1002 class AccessLocation(Location): 1003 1004 "A specialised access location." 1005 1006 def __init__(self, path, name, attrnames, access_number): 1007 1008 """ 1009 Initialise an access location featuring 'path', 'name', 'attrnames' and 1010 'access_number'. 1011 """ 1012 1013 Location.__init__(self, path, name, attrnames, None, access_number) 1014 1015 def __repr__(self): 1016 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 1017 1018 # Result classes. 1019 1020 class InstructionSequence: 1021 1022 "A generic sequence of instructions." 1023 1024 def __init__(self, instructions): 1025 self.instructions = instructions 1026 1027 def get_value_instruction(self): 1028 return self.instructions[-1] 1029 1030 def get_init_instructions(self): 1031 return self.instructions[:-1] 1032 1033 # Dictionary utilities. 1034 1035 def init_item(d, key, fn): 1036 1037 """ 1038 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1039 value where no entry already exists. 1040 """ 1041 1042 if not d.has_key(key): 1043 d[key] = fn() 1044 return d[key] 1045 1046 def dict_for_keys(d, keys): 1047 1048 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1049 1050 nd = {} 1051 for key in keys: 1052 if d.has_key(key): 1053 nd[key] = d[key] 1054 return nd 1055 1056 def make_key(s): 1057 1058 "Make sequence 's' into a tuple-based key, first sorting its contents." 1059 1060 l = list(s) 1061 l.sort() 1062 return tuple(l) 1063 1064 def add_counter_item(d, key): 1065 1066 """ 1067 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1068 maintaining a mapping of keys to their order of insertion. 1069 """ 1070 1071 if not d.has_key(key): 1072 d[key] = len(d.keys()) 1073 return d[key] 1074 1075 def remove_items(d1, d2): 1076 1077 "Remove from 'd1' all items from 'd2'." 1078 1079 for key in d2.keys(): 1080 if d1.has_key(key): 1081 del d1[key] 1082 1083 # Set utilities. 1084 1085 def first(s): 1086 return list(s)[0] 1087 1088 def same(s1, s2): 1089 return set(s1) == set(s2) 1090 1091 def order_dependencies(all_depends): 1092 1093 """ 1094 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1095 has the form "A depends on B, C...". The result will order A, B, C, and so 1096 on. 1097 """ 1098 1099 usage = init_reverse_dependencies(all_depends) 1100 1101 # Produce an ordering by obtaining exposed items (required by items already 1102 # processed) and putting them at the start of the list. 1103 1104 ordered = [] 1105 1106 while usage: 1107 have_next = False 1108 1109 for key, n in usage.items(): 1110 1111 # Add items needed by no other items to the ordering. 1112 1113 if not n: 1114 remove_dependency(key, all_depends, usage, ordered) 1115 have_next = True 1116 1117 if not have_next: 1118 raise ValueError, usage 1119 1120 return ordered 1121 1122 def order_dependencies_partial(all_depends): 1123 1124 """ 1125 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1126 has the form "A depends on B, C...". The result will order A, B, C, and so 1127 on. Where cycles exist, they will be broken and a partial ordering returned. 1128 """ 1129 1130 usage = init_reverse_dependencies(all_depends) 1131 1132 # Duplicate the dependencies for subsequent modification. 1133 1134 new_depends = {} 1135 for key, values in all_depends.items(): 1136 new_depends[key] = set(values) 1137 1138 all_depends = new_depends 1139 1140 # Produce an ordering by obtaining exposed items (required by items already 1141 # processed) and putting them at the start of the list. 1142 1143 ordered = [] 1144 1145 while usage: 1146 least = None 1147 least_key = None 1148 1149 for key, n in usage.items(): 1150 1151 # Add items needed by no other items to the ordering. 1152 1153 if not n: 1154 remove_dependency(key, all_depends, usage, ordered) 1155 least = 0 1156 1157 # When breaking cycles, note the least used items. 1158 1159 elif least is None or len(n) < least: 1160 least_key = key 1161 least = len(n) 1162 1163 if least: 1164 transfer_dependencies(least_key, all_depends, usage, ordered) 1165 1166 return ordered 1167 1168 def init_reverse_dependencies(all_depends): 1169 1170 """ 1171 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1172 record the reverse dependencies, making a mapping of the form 1173 "B is needed by A", "C is needed by A", and so on. 1174 """ 1175 1176 usage = {} 1177 1178 # Record path-based dependencies. 1179 1180 for key in all_depends.keys(): 1181 usage[key] = set() 1182 1183 for key, depends in all_depends.items(): 1184 for depend in depends: 1185 init_item(usage, depend, set) 1186 usage[depend].add(key) 1187 1188 return usage 1189 1190 def transfer_dependencies(key, all_depends, usage, ordered): 1191 1192 """ 1193 Transfer items needed by 'key' to those items needing 'key', found using 1194 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1195 collection of dependencies. 1196 1197 If "A is needed by X" and "B is needed by A", then transferring items needed 1198 by A will cause "B is needed by X" to be recorded as a consequence. 1199 1200 Transferring items also needs to occur in the reverse mapping, so that 1201 "A needs B" and "X needs A", then the consequence must be recorded as 1202 "X needs B". 1203 """ 1204 1205 ordered.insert(0, key) 1206 1207 needing = usage[key] # A is needed by X 1208 needed = all_depends.get(key) # A needs B 1209 1210 if needing: 1211 for depend in needing: 1212 l = all_depends.get(depend) 1213 if not l: 1214 continue 1215 1216 l.remove(key) # X needs (A) 1217 1218 if needed: 1219 l.update(needed) # X needs B... 1220 1221 # Prevent self references. 1222 1223 if depend in needed: 1224 l.remove(depend) 1225 1226 if needed: 1227 for depend in needed: 1228 l = usage.get(depend) 1229 if not l: 1230 continue 1231 1232 l.remove(key) # B is needed by (A) 1233 l.update(needing) # B is needed by X... 1234 1235 # Prevent self references. 1236 1237 if depend in needing: 1238 l.remove(depend) 1239 1240 if needed: 1241 del all_depends[key] 1242 del usage[key] 1243 1244 def remove_dependency(key, all_depends, usage, ordered): 1245 1246 """ 1247 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1248 'ordered' collection of dependencies. 1249 1250 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1251 upon removing A from 'usage', the outcome is that all keys needing A will 1252 have A removed from their 'usage' records. 1253 1254 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1255 as a consequence. 1256 """ 1257 1258 ordered.insert(0, key) 1259 1260 depends = all_depends.get(key) 1261 1262 # Reduce usage of the referenced items. 1263 1264 if depends: 1265 for depend in depends: 1266 usage[depend].remove(key) 1267 1268 del usage[key] 1269 1270 # General input/output. 1271 1272 def readfile(filename): 1273 1274 "Return the contents of 'filename'." 1275 1276 f = open(filename) 1277 try: 1278 return f.read() 1279 finally: 1280 f.close() 1281 1282 def writefile(filename, s): 1283 1284 "Write to 'filename' the string 's'." 1285 1286 f = open(filename, "w") 1287 try: 1288 f.write(s) 1289 finally: 1290 f.close() 1291 1292 # General encoding. 1293 1294 def sorted_output(x): 1295 1296 "Sort sequence 'x' and return a string with commas separating the values." 1297 1298 x = map(str, x) 1299 x.sort() 1300 return ", ".join(x) 1301 1302 def get_string_details(literals, encoding): 1303 1304 """ 1305 Determine whether 'literals' represent Unicode strings or byte strings, 1306 using 'encoding' to reproduce byte sequences. 1307 1308 Each literal is the full program representation including prefix and quotes 1309 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1310 string needs to be translated back to its original encoding. 1311 1312 Return a single encoded literal value, a type name, and the original 1313 encoding as a tuple. 1314 """ 1315 1316 typename = "unicode" 1317 1318 l = [] 1319 1320 for s in literals: 1321 out, _typename = get_literal_details(s) 1322 if _typename == "str": 1323 typename = "str" 1324 l.append(out) 1325 1326 out = "".join(l) 1327 1328 # For Unicode values, convert to the UTF-8 program representation. 1329 1330 if typename == "unicode": 1331 return out.encode("utf-8"), typename, encoding 1332 1333 # For byte string values, convert back to the original encoding. 1334 1335 else: 1336 return out.encode(encoding), typename, encoding 1337 1338 def get_literal_details(s): 1339 1340 """ 1341 Determine whether 's' represents a Unicode string or a byte string, where 1342 's' contains the full program representation of a literal including prefix 1343 and quotes, recoded by the parser to UTF-8. 1344 1345 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1346 and byte or Unicode values starting with <backslash><octal digit> or 1347 <backslash>x. 1348 1349 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1350 to be considered as Unicode values. Otherwise, they produce byte values and 1351 cause unprefixed strings to be considered as byte strings. 1352 1353 Literals prefixed with "r" do not have their backslash-encoded values 1354 converted unless also prefixed with "u", in which case only the above value 1355 formats are converted, not any of the other special sequences for things 1356 like newlines. 1357 1358 Return the literal value as a Unicode object together with the appropriate 1359 type name in a tuple. 1360 """ 1361 1362 l = [] 1363 1364 # Identify the quote character and use it to identify the prefix. 1365 1366 quote_type = s[-1] 1367 prefix_end = s.find(quote_type) 1368 prefix = s[:prefix_end].lower() 1369 1370 if prefix not in ("", "b", "br", "r", "u", "ur"): 1371 raise ValueError, "String literal does not have a supported prefix: %s" % s 1372 1373 if "b" in prefix: 1374 typename = "str" 1375 else: 1376 typename = "unicode" 1377 1378 # Identify triple quotes or single quotes. 1379 1380 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1381 quote = s[prefix_end:prefix_end+3] 1382 current = prefix_end + 3 1383 end = len(s) - 3 1384 else: 1385 quote = s[prefix_end] 1386 current = prefix_end + 1 1387 end = len(s) - 1 1388 1389 # Conversions of some quoted values. 1390 1391 searches = { 1392 "u" : (6, 16), 1393 "U" : (10, 16), 1394 "x" : (4, 16), 1395 } 1396 1397 octal_digits = map(str, range(0, 8)) 1398 1399 # Translations of some quoted values. 1400 1401 escaped = { 1402 "\\" : "\\", "'" : "'", '"' : '"', 1403 "a" : "\a", "b" : "\b", "f" : "\f", 1404 "n" : "\n", "r" : "\r", "t" : "\t", 1405 } 1406 1407 while current < end: 1408 1409 # Look for quoted values. 1410 1411 index = s.find("\\", current) 1412 if index == -1 or index + 1 == end: 1413 l.append(s[current:end]) 1414 break 1415 1416 # Add the preceding text. 1417 1418 l.append(s[current:index]) 1419 1420 # Handle quoted text. 1421 1422 term = s[index+1] 1423 1424 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1425 # produce Unicode values. 1426 1427 if typename == "unicode" and ( 1428 term in ("u", "U") or 1429 "u" in prefix and (term == "x" or term in octal_digits)): 1430 1431 needed, base = searches.get(term, (4, 8)) 1432 value = convert_quoted_value(s, index, needed, end, base, unichr) 1433 l.append(value) 1434 current = index + needed 1435 1436 # Add raw byte values, changing the string type. 1437 1438 elif "r" not in prefix and ( 1439 term == "x" or term in octal_digits): 1440 1441 needed, base = searches.get(term, (4, 8)) 1442 value = convert_quoted_value(s, index, needed, end, base, chr) 1443 l.append(value) 1444 typename = "str" 1445 current = index + needed 1446 1447 # Add other escaped values. 1448 1449 elif "r" not in prefix and escaped.has_key(term): 1450 l.append(escaped[term]) 1451 current = index + 2 1452 1453 # Add other text as found. 1454 1455 else: 1456 l.append(s[index:index+2]) 1457 current = index + 2 1458 1459 # Collect the components into a single Unicode object. Since the literal 1460 # text was already in UTF-8 form, interpret plain strings as UTF-8 1461 # sequences. 1462 1463 out = [] 1464 1465 for value in l: 1466 if isinstance(value, unicode): 1467 out.append(value) 1468 else: 1469 out.append(unicode(value, "utf-8")) 1470 1471 return "".join(out), typename 1472 1473 def convert_quoted_value(s, index, needed, end, base, fn): 1474 1475 """ 1476 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1477 positions, and with the given 'end' indicating the first position after the 1478 end of the actual string content. 1479 1480 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1481 to convert the value to an appropriate type. 1482 """ 1483 1484 s = s[index:min(index+needed, end)] 1485 1486 # Not a complete occurrence. 1487 1488 if len(s) < needed: 1489 return s 1490 1491 # Test for a well-formed value. 1492 1493 try: 1494 first = base == 8 and 1 or 2 1495 value = int(s[first:needed], base) 1496 except ValueError: 1497 return s 1498 else: 1499 return fn(value) 1500 1501 # Attribute chain decoding. 1502 1503 def get_attrnames(attrnames): 1504 1505 """ 1506 Split the qualified attribute chain 'attrnames' into its components, 1507 handling special attributes starting with "#" that indicate type 1508 conformance. 1509 """ 1510 1511 if attrnames.startswith("#"): 1512 return [attrnames] 1513 else: 1514 return attrnames.split(".") 1515 1516 def get_name_path(path, name): 1517 1518 "Return a suitable qualified name from the given 'path' and 'name'." 1519 1520 if "." in name: 1521 return name 1522 else: 1523 return "%s.%s" % (path, name) 1524 1525 # Usage-related functions. 1526 1527 def get_types_for_usage(attrnames, objects): 1528 1529 """ 1530 Identify the types that can support the given 'attrnames', using the 1531 given 'objects' as the catalogue of type details. 1532 """ 1533 1534 types = [] 1535 for name, _attrnames in objects.items(): 1536 if set(attrnames).issubset(_attrnames): 1537 types.append(name) 1538 return types 1539 1540 def get_invoked_attributes(usage): 1541 1542 "Obtain invoked attribute from the given 'usage'." 1543 1544 invoked = [] 1545 if usage: 1546 for attrname, invocation, assignment in usage: 1547 if invocation: 1548 invoked.append(attrname) 1549 return invoked 1550 1551 def get_assigned_attributes(usage): 1552 1553 "Obtain assigned attribute from the given 'usage'." 1554 1555 assigned = [] 1556 if usage: 1557 for attrname, invocation, assignment in usage: 1558 if assignment: 1559 assigned.append(attrname) 1560 return assigned 1561 1562 # Type and module functions. 1563 # NOTE: This makes assumptions about the __builtins__ structure. 1564 1565 def get_builtin_module(name): 1566 1567 "Return the module name containing the given type 'name'." 1568 1569 if name == "string": 1570 modname = "str" 1571 elif name == "utf8string": 1572 modname = "unicode" 1573 elif name == "NoneType": 1574 modname = "none" 1575 else: 1576 modname = name 1577 1578 return "__builtins__.%s" % modname 1579 1580 def get_builtin_type(name): 1581 1582 "Return the type name provided by the given Python value 'name'." 1583 1584 if name == "str": 1585 return "string" 1586 elif name == "unicode": 1587 return "utf8string" 1588 else: 1589 return name 1590 1591 def get_builtin_class(name): 1592 1593 "Return the full name of the built-in class having the given 'name'." 1594 1595 typename = get_builtin_type(name) 1596 module = get_builtin_module(typename) 1597 return "%s.%s" % (module, typename) 1598 1599 # Useful data. 1600 1601 predefined_constants = "False", "None", "NotImplemented", "True" 1602 1603 privileged_attributes = [ 1604 "__get_single_item_unchecked__", 1605 ] 1606 1607 unary_operator_functions = { 1608 1609 # Unary operations. 1610 1611 "Invert" : "invert", 1612 "UnaryAdd" : "pos", 1613 "UnarySub" : "neg", 1614 } 1615 1616 operator_functions = { 1617 1618 # Fundamental operations. 1619 1620 "is" : "is_", 1621 "is not" : "is_not", 1622 1623 # Binary operations. 1624 1625 "in" : "in_", 1626 "not in" : "not_in", 1627 "Add" : "add", 1628 "Bitand" : "and_", 1629 "Bitor" : "or_", 1630 "Bitxor" : "xor", 1631 "Div" : "div", 1632 "FloorDiv" : "floordiv", 1633 "LeftShift" : "lshift", 1634 "Mod" : "mod", 1635 "Mul" : "mul", 1636 "Power" : "pow", 1637 "RightShift" : "rshift", 1638 "Sub" : "sub", 1639 1640 # Augmented assignment. 1641 1642 "+=" : "iadd", 1643 "-=" : "isub", 1644 "*=" : "imul", 1645 "/=" : "idiv", 1646 "//=" : "ifloordiv", 1647 "%=" : "imod", 1648 "**=" : "ipow", 1649 "<<=" : "ilshift", 1650 ">>=" : "irshift", 1651 "&=" : "iand", 1652 "^=" : "ixor", 1653 "|=" : "ior", 1654 1655 # Comparisons. 1656 1657 "==" : "eq", 1658 "!=" : "ne", 1659 "<" : "lt", 1660 "<=" : "le", 1661 ">=" : "ge", 1662 ">" : "gt", 1663 } 1664 1665 operator_functions.update(unary_operator_functions) 1666 1667 # vim: tabstop=4 expandtab shiftwidth=4