1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 # Namespace management. 190 191 def enter_namespace(self, name): 192 193 "Enter the namespace having the given 'name'." 194 195 self.namespace_path.append(name) 196 197 def exit_namespace(self): 198 199 "Exit the current namespace." 200 201 self.namespace_path.pop() 202 203 # Constant reference naming. 204 205 def get_constant_name(self, value, value_type, encoding=None): 206 207 """ 208 Add a new constant to the current namespace for 'value' with 209 'value_type'. 210 """ 211 212 path = self.get_namespace_path() 213 init_item(self.constants, path, dict) 214 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 215 216 # Literal reference naming. 217 218 def get_literal_name(self): 219 220 "Add a new literal to the current namespace." 221 222 path = self.get_namespace_path() 223 init_item(self.literals, path, lambda: 0) 224 return "$C%d" % self.literals[path] 225 226 def next_literal(self): 227 self.literals[self.get_namespace_path()] += 1 228 229 # Temporary variable naming. 230 231 def get_temporary_name(self): 232 path = self.get_namespace_path() 233 init_item(self.temp, path, lambda: 0) 234 return "$t%d" % self.temp[path] 235 236 def next_temporary(self): 237 self.temp[self.get_namespace_path()] += 1 238 239 # Arbitrary function naming. 240 241 def get_lambda_name(self): 242 path = self.get_namespace_path() 243 init_item(self.lambdas, path, lambda: 0) 244 name = "$l%d" % self.lambdas[path] 245 self.lambdas[path] += 1 246 return name 247 248 def reset_lambdas(self): 249 self.lambdas = {} 250 251 # Constant and literal recording. 252 253 def get_constant_value(self, value, literals=None): 254 255 """ 256 Encode the 'value' if appropriate, returning a value, a typename and any 257 encoding. 258 """ 259 260 if isinstance(value, unicode): 261 return value.encode("utf-8"), "unicode", self.encoding 262 263 # Attempt to convert plain strings to text. 264 265 elif isinstance(value, str) and self.encoding: 266 try: 267 return get_string_details(literals, self.encoding) 268 except UnicodeDecodeError: 269 pass 270 271 return value, value.__class__.__name__, None 272 273 def get_constant_reference(self, ref, value, encoding=None): 274 275 """ 276 Return a constant reference for the given 'ref' type and 'value', with 277 the optional 'encoding' applying to text values. 278 """ 279 280 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 281 282 # Return a reference for the constant. 283 284 objpath = self.get_object_path(constant_name) 285 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 286 287 # Record the value and type for the constant. 288 289 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 290 return name_ref 291 292 def reserve_constant(self, objpath, value, origin, encoding=None): 293 294 """ 295 Reserve a constant within 'objpath' with the given 'value' and having a 296 type with the given 'origin', with the optional 'encoding' applying to 297 text values. 298 """ 299 300 constant_name = self.get_constant_name(value, origin) 301 objpath = self.get_object_path(constant_name) 302 self._reserve_constant(objpath, value, origin, encoding) 303 304 def _reserve_constant(self, objpath, value, origin, encoding): 305 306 """ 307 Store a constant for 'objpath' with the given 'value' and 'origin', with 308 the optional 'encoding' applying to text values. 309 """ 310 311 self.constant_values[objpath] = value, origin, encoding 312 313 def get_literal_reference(self, name, ref, items, cls): 314 315 """ 316 Return a literal reference for the given type 'name', literal 'ref', 317 node 'items' and employing the given 'cls' as the class of the returned 318 reference object. 319 """ 320 321 # Construct an invocation using the items as arguments. 322 323 typename = "$L%s" % name 324 325 invocation = compiler.ast.CallFunc( 326 compiler.ast.Name(typename), 327 items 328 ) 329 330 # Get a name for the actual literal. 331 332 instname = self.get_literal_name() 333 self.next_literal() 334 335 # Record the type for the literal. 336 337 objpath = self.get_object_path(instname) 338 self.literal_types[objpath] = ref.get_origin() 339 340 # Return a wrapper for the invocation exposing the items. 341 342 return cls( 343 instname, 344 ref.instance_of(), 345 self.process_structure_node(invocation), 346 invocation.args 347 ) 348 349 # Node handling. 350 351 def process_structure(self, node): 352 353 """ 354 Within the given 'node', process the program structure. 355 356 During inspection, this will process global declarations, adjusting the 357 module namespace, and import statements, building a module dependency 358 hierarchy. 359 360 During translation, this will consult deduced program information and 361 output translated code. 362 """ 363 364 l = [] 365 for n in node.getChildNodes(): 366 l.append(self.process_structure_node(n)) 367 return l 368 369 def process_augassign_node(self, n): 370 371 "Process the given augmented assignment node 'n'." 372 373 op = operator_functions[n.op] 374 375 if isinstance(n.node, compiler.ast.Getattr): 376 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 377 elif isinstance(n.node, compiler.ast.Name): 378 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 379 else: 380 target = n.node 381 382 assignment = compiler.ast.Assign( 383 [target], 384 compiler.ast.CallFunc( 385 compiler.ast.Name("$op%s" % op), 386 [n.node, n.expr])) 387 388 return self.process_structure_node(assignment) 389 390 def process_assignment_for_object(self, original_name, source): 391 392 """ 393 Return an assignment operation making 'original_name' refer to the given 394 'source'. 395 """ 396 397 assignment = compiler.ast.Assign( 398 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 399 source 400 ) 401 402 return self.process_structure_node(assignment) 403 404 def process_assignment_node_items(self, n, expr): 405 406 """ 407 Process the given assignment node 'n' whose children are to be assigned 408 items of 'expr'. 409 """ 410 411 name_ref = self.process_structure_node(expr) 412 413 # Either unpack the items and present them directly to each assignment 414 # node. 415 416 if isinstance(name_ref, LiteralSequenceRef) and \ 417 self.process_literal_sequence_items(n, name_ref): 418 419 pass 420 421 # Or have the assignment nodes access each item via the sequence API. 422 423 else: 424 self.process_assignment_node_items_by_position(n, expr, name_ref) 425 426 def process_assignment_node_items_by_position(self, n, expr, name_ref): 427 428 """ 429 Process the given sequence assignment node 'n', converting the node to 430 the separate assignment of each target using positional access on a 431 temporary variable representing the sequence. Use 'expr' as the assigned 432 value and 'name_ref' as the reference providing any existing temporary 433 variable. 434 """ 435 436 statements = [] 437 438 # Employ existing names to access the sequence. 439 # Literal sequences do not provide names of accessible objects. 440 441 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 442 temp = name_ref.name 443 444 # For other expressions, create a temporary name to reference the items. 445 446 else: 447 temp = self.get_temporary_name() 448 self.next_temporary() 449 450 statements.append( 451 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 452 ) 453 454 # Generate a test for the length of the expression object. 455 456 statements.append(compiler.ast.Discard( 457 compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"), 458 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))]))) 459 460 # Assign the items to the target nodes. 461 462 for i, node in enumerate(n.nodes): 463 statements.append( 464 compiler.ast.Assign([node], compiler.ast.CallFunc( 465 compiler.ast.Getattr(compiler.ast.Name(temp), 466 "__get_single_item_unchecked__", 467 privileged=True), 468 [compiler.ast.Const(i, str(i))])) 469 ) 470 471 return self.process_structure_node(compiler.ast.Stmt(statements)) 472 473 def process_literal_sequence_items(self, n, name_ref): 474 475 """ 476 Process the given assignment node 'n', obtaining from the given 477 'name_ref' the items to be assigned to the assignment targets. 478 479 Return whether this method was able to process the assignment node as 480 a sequence of direct assignments. 481 """ 482 483 if len(n.nodes) == len(name_ref.items): 484 assigned_names, count = get_names_from_nodes(n.nodes) 485 accessed_names, _count = get_names_from_nodes(name_ref.items) 486 487 # Only assign directly between items if all assigned names are 488 # plain names (not attribute assignments), and if the assigned names 489 # do not appear in the accessed names. 490 491 if len(assigned_names) == count and \ 492 not assigned_names.intersection(accessed_names): 493 494 for node, item in zip(n.nodes, name_ref.items): 495 self.process_assignment_node(node, item) 496 497 return True 498 499 # Otherwise, use the position-based mechanism to obtain values. 500 501 else: 502 return False 503 else: 504 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 505 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 506 507 def process_compare_node(self, n): 508 509 """ 510 Process the given comparison node 'n', converting an operator sequence 511 from... 512 513 <expr1> <op1> <expr2> <op2> <expr3> 514 515 ...to... 516 517 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 518 """ 519 520 invocations = [] 521 last = n.expr 522 523 for op, op_node in n.ops: 524 op = operator_functions.get(op) 525 526 invocations.append(compiler.ast.CallFunc( 527 compiler.ast.Name("$op%s" % op), 528 [last, op_node])) 529 530 last = op_node 531 532 if len(invocations) > 1: 533 result = compiler.ast.And(invocations) 534 else: 535 result = invocations[0] 536 537 return self.process_structure_node(result) 538 539 def process_dict_node(self, node): 540 541 """ 542 Process the given dictionary 'node', returning a list of (key, value) 543 tuples. 544 """ 545 546 l = [] 547 for key, value in node.items: 548 l.append(( 549 self.process_structure_node(key), 550 self.process_structure_node(value))) 551 return l 552 553 def process_for_node(self, n): 554 555 """ 556 Generate attribute accesses for {n.list}.__iter__ and the next method on 557 the iterator, producing a replacement node for the original. 558 """ 559 560 t0 = self.get_temporary_name() 561 self.next_temporary() 562 t1 = self.get_temporary_name() 563 self.next_temporary() 564 t2 = self.get_temporary_name() 565 self.next_temporary() 566 567 node = compiler.ast.Stmt([ 568 569 # <t0> = {n.list} 570 # <t1> = <t0>.__iter__() 571 572 compiler.ast.Assign( 573 [compiler.ast.AssName(t0, "OP_ASSIGN")], 574 n.list), 575 576 compiler.ast.Assign( 577 [compiler.ast.AssName(t1, "OP_ASSIGN")], 578 compiler.ast.CallFunc( 579 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 580 [])), 581 582 # <t2> = <t1>.next 583 # try: 584 # while True: 585 # <var>... = <t2>() 586 # ... 587 # except StopIteration: 588 # pass 589 590 compiler.ast.Assign( 591 [compiler.ast.AssName(t2, "OP_ASSIGN")], 592 compiler.ast.Getattr(compiler.ast.Name(t1), "next")), 593 594 compiler.ast.TryExcept( 595 compiler.ast.While( 596 compiler.ast.Name("True"), 597 compiler.ast.Stmt([ 598 compiler.ast.Assign( 599 [n.assign], 600 compiler.ast.CallFunc( 601 compiler.ast.Name(t2), 602 [] 603 )), 604 n.body]), 605 None), 606 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 607 None) 608 ]) 609 610 self.process_structure_node(node) 611 612 def process_literal_sequence_node(self, n, name, ref, cls): 613 614 """ 615 Process the given literal sequence node 'n' as a function invocation, 616 with 'name' indicating the type of the sequence, and 'ref' being a 617 reference to the type. The 'cls' is used to instantiate a suitable name 618 reference. 619 """ 620 621 if name == "dict": 622 items = [] 623 for key, value in n.items: 624 items.append(compiler.ast.Tuple([key, value])) 625 else: # name in ("list", "tuple"): 626 items = n.nodes 627 628 return self.get_literal_reference(name, ref, items, cls) 629 630 def process_operator_node(self, n): 631 632 """ 633 Process the given operator node 'n' as an operator function invocation. 634 """ 635 636 opname = n.__class__.__name__ 637 operands = n.getChildNodes() 638 639 # Convert a unary operation to an invocation. 640 641 op = unary_operator_functions.get(opname) 642 643 if op: 644 invocation = compiler.ast.CallFunc( 645 compiler.ast.Name("$op%s" % op), 646 [operands[0]] 647 ) 648 649 # Convert a single operator with a list of operands to a combination of 650 # pairwise operations. 651 652 else: 653 op = operator_functions[opname] 654 invocation = self._process_operator_node(op, operands) 655 656 return self.process_structure_node(invocation) 657 658 def _process_operator_node(self, op, operands): 659 660 """ 661 Process the given 'op', being an operator function, together with the 662 supplied 'operands', returning either a single remaining operand or an 663 invocation combining the operands. 664 """ 665 666 remaining = operands[1:] 667 if not remaining: 668 return operands[0] 669 670 return compiler.ast.CallFunc( 671 compiler.ast.Name("$op%s" % op), 672 [operands[0], self._process_operator_node(op, remaining)] 673 ) 674 675 def process_print_node(self, n): 676 677 """ 678 Process the given print node 'n' as an invocation on a stream of the 679 form... 680 681 $print(dest, args, nl) 682 683 The special function name will be translated elsewhere. 684 """ 685 686 nl = isinstance(n, compiler.ast.Printnl) 687 invocation = compiler.ast.CallFunc( 688 compiler.ast.Name("$print"), 689 [n.dest or compiler.ast.Name("None"), 690 compiler.ast.List(list(n.nodes)), 691 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 692 ) 693 return self.process_structure_node(invocation) 694 695 def process_slice_node(self, n, expr=None): 696 697 """ 698 Process the given slice node 'n' as a method invocation. 699 """ 700 701 if n.flags == "OP_ASSIGN": op = "__setslice__" 702 elif n.flags == "OP_DELETE": op = "__delslice__" 703 else: op = "__getslice__" 704 705 invocation = compiler.ast.CallFunc( 706 compiler.ast.Getattr(n.expr, op), 707 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 708 (expr and [expr] or []) 709 ) 710 711 # Fix parse tree structure. 712 713 if op == "__delslice__": 714 invocation = compiler.ast.Discard(invocation) 715 716 return self.process_structure_node(invocation) 717 718 def process_sliceobj_node(self, n): 719 720 """ 721 Process the given slice object node 'n' as a slice constructor. 722 """ 723 724 op = "slice" 725 invocation = compiler.ast.CallFunc( 726 compiler.ast.Name("$op%s" % op), 727 n.nodes 728 ) 729 return self.process_structure_node(invocation) 730 731 def process_subscript_node(self, n, expr=None): 732 733 """ 734 Process the given subscript node 'n' as a method invocation. 735 """ 736 737 if n.flags == "OP_ASSIGN": op = "__setitem__" 738 elif n.flags == "OP_DELETE": op = "__delitem__" 739 else: op = "__getitem__" 740 741 invocation = compiler.ast.CallFunc( 742 compiler.ast.Getattr(n.expr, op), 743 list(n.subs) + (expr and [expr] or []) 744 ) 745 746 # Fix parse tree structure. 747 748 if op == "__delitem__": 749 invocation = compiler.ast.Discard(invocation) 750 751 return self.process_structure_node(invocation) 752 753 def process_attribute_chain(self, n): 754 755 """ 756 Process the given attribute access node 'n'. Return a reference 757 describing the expression. 758 """ 759 760 # AssAttr/Getattr are nested with the outermost access being the last 761 # access in any chain. 762 763 self.attrs.insert(0, n.attrname) 764 attrs = self.attrs 765 766 # Break attribute chains where non-access nodes are found. 767 768 if not self.have_access_expression(n): 769 self.reset_attribute_chain() 770 771 # Descend into the expression, extending backwards any existing chain, 772 # or building another for the expression. 773 774 name_ref = self.process_structure_node(n.expr) 775 776 # Restore chain information applying to this node. 777 778 if not self.have_access_expression(n): 779 self.restore_attribute_chain(attrs) 780 781 # Return immediately if the expression was another access and thus a 782 # continuation backwards along the chain. The above processing will 783 # have followed the chain all the way to its conclusion. 784 785 if self.have_access_expression(n): 786 del self.attrs[0] 787 788 return name_ref 789 790 # Attribute chain handling. 791 792 def reset_attribute_chain(self): 793 794 "Reset the attribute chain for a subexpression of an attribute access." 795 796 self.attrs = [] 797 self.chain_assignment.append(self.in_assignment) 798 self.chain_invocation.append(self.in_invocation) 799 self.in_assignment = None 800 self.in_invocation = None 801 802 def restore_attribute_chain(self, attrs): 803 804 "Restore the attribute chain for an attribute access." 805 806 self.attrs = attrs 807 self.in_assignment = self.chain_assignment.pop() 808 self.in_invocation = self.chain_invocation.pop() 809 810 def have_access_expression(self, node): 811 812 "Return whether the expression associated with 'node' is Getattr." 813 814 return isinstance(node.expr, compiler.ast.Getattr) 815 816 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 817 818 """ 819 Return the name to be used for attribute usage observations involving 820 the given 'name' in the current namespace. 821 822 If the name is being used outside a function, and if 'name_ref' is 823 given and indicates a global or if 'is_global' is specified as a true 824 value, a path featuring the name in the global namespace is returned. 825 Otherwise, a path computed using the current namespace and the given 826 name is returned. 827 828 The intention of this method is to provide a suitably-qualified name 829 that can be tracked across namespaces. Where globals are being 830 referenced in class namespaces, they should be referenced using their 831 path within the module, not using a path within each class. 832 833 It may not be possible to identify a global within a function at the 834 time of inspection (since a global may appear later in a file). 835 Consequently, globals are identified by their local name rather than 836 their module-qualified path. 837 """ 838 839 # For functions, use the appropriate local names. 840 841 if self.in_function: 842 return name 843 844 # For global names outside functions, use a global name. 845 846 elif is_global or name_ref and name_ref.is_global_name(): 847 return self.get_global_path(name) 848 849 # Otherwise, establish a name in the current namespace. 850 851 else: 852 return self.get_object_path(name) 853 854 def get_path_for_access(self): 855 856 "Outside functions, register accesses at the module level." 857 858 if not self.in_function: 859 return self.name 860 else: 861 return self.get_namespace_path() 862 863 def get_module_name(self, node): 864 865 """ 866 Using the given From 'node' in this module, calculate any relative import 867 information, returning a tuple containing a module to import along with any 868 names to import based on the node's name information. 869 870 Where the returned module is given as None, whole module imports should 871 be performed for the returned modules using the returned names. 872 """ 873 874 # Absolute import. 875 876 if node.level == 0: 877 return node.modname, node.names 878 879 # Relative to an ancestor of this module. 880 881 else: 882 path = self.name.split(".") 883 level = node.level 884 885 # Relative imports treat package roots as submodules. 886 887 if split(self.filename)[-1] == "__init__.py": 888 level -= 1 889 890 if level > len(path): 891 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 892 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 893 894 basename = ".".join(path[:len(path)-level]) 895 896 # Name imports from a module. 897 898 if node.modname: 899 return "%s.%s" % (basename, node.modname), node.names 900 901 # Relative whole module imports. 902 903 else: 904 return basename, node.names 905 906 def get_argnames(args): 907 908 """ 909 Return a list of all names provided by 'args'. Since tuples may be 910 employed, the arguments are traversed depth-first. 911 """ 912 913 l = [] 914 for arg in args: 915 if isinstance(arg, tuple): 916 l += get_argnames(arg) 917 else: 918 l.append(arg) 919 return l 920 921 def get_names_from_nodes(nodes): 922 923 """ 924 Return the names employed in the given 'nodes' along with the number of 925 nodes excluding sequences. 926 """ 927 928 names = set() 929 count = 0 930 931 for node in nodes: 932 933 # Add names and count them. 934 935 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 936 names.add(node.name) 937 count += 1 938 939 # Add names from sequences and incorporate their counts. 940 941 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 942 compiler.ast.List, compiler.ast.Set, 943 compiler.ast.Tuple)): 944 _names, _count = get_names_from_nodes(node.nodes) 945 names.update(_names) 946 count += _count 947 948 # Count non-name, non-sequence nodes. 949 950 else: 951 count += 1 952 953 return names, count 954 955 # Location classes. 956 957 class Location: 958 959 "A generic program location." 960 961 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 962 self.path = path 963 self.name = name 964 self.attrnames = attrnames 965 self.version = version 966 self.access_number = access_number 967 968 def __repr__(self): 969 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 970 971 def as_tuple(self): 972 return (self.path, self.name, self.attrnames, self.version, self.access_number) 973 974 def __hash__(self): 975 return hash(self.as_tuple()) 976 977 def __eq__(self, other): 978 return self.as_tuple() == other.as_tuple() 979 980 def __cmp__(self, other): 981 return cmp(self.as_tuple(), other.as_tuple()) 982 983 def get_attrname(self): 984 985 """ 986 Extract the first attribute from the attribute names employed in this 987 location. 988 """ 989 990 attrnames = self.attrnames 991 if not attrnames: 992 return attrnames 993 return get_attrnames(attrnames)[0] 994 995 class AccessLocation(Location): 996 997 "A specialised access location." 998 999 def __init__(self, path, name, attrnames, access_number): 1000 1001 """ 1002 Initialise an access location featuring 'path', 'name', 'attrnames' and 1003 'access_number'. 1004 """ 1005 1006 Location.__init__(self, path, name, attrnames, None, access_number) 1007 1008 def __repr__(self): 1009 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 1010 1011 # Result classes. 1012 1013 class InstructionSequence: 1014 1015 "A generic sequence of instructions." 1016 1017 def __init__(self, instructions): 1018 self.instructions = instructions 1019 1020 def get_value_instruction(self): 1021 return self.instructions[-1] 1022 1023 def get_init_instructions(self): 1024 return self.instructions[:-1] 1025 1026 # Dictionary utilities. 1027 1028 def init_item(d, key, fn): 1029 1030 """ 1031 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1032 value where no entry already exists. 1033 """ 1034 1035 if not d.has_key(key): 1036 d[key] = fn() 1037 return d[key] 1038 1039 def dict_for_keys(d, keys): 1040 1041 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1042 1043 nd = {} 1044 for key in keys: 1045 if d.has_key(key): 1046 nd[key] = d[key] 1047 return nd 1048 1049 def make_key(s): 1050 1051 "Make sequence 's' into a tuple-based key, first sorting its contents." 1052 1053 l = list(s) 1054 l.sort() 1055 return tuple(l) 1056 1057 def add_counter_item(d, key): 1058 1059 """ 1060 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1061 maintaining a mapping of keys to their order of insertion. 1062 """ 1063 1064 if not d.has_key(key): 1065 d[key] = len(d.keys()) 1066 return d[key] 1067 1068 def remove_items(d1, d2): 1069 1070 "Remove from 'd1' all items from 'd2'." 1071 1072 for key in d2.keys(): 1073 if d1.has_key(key): 1074 del d1[key] 1075 1076 # Set utilities. 1077 1078 def first(s): 1079 return list(s)[0] 1080 1081 def same(s1, s2): 1082 return set(s1) == set(s2) 1083 1084 def order_dependencies(all_depends): 1085 1086 """ 1087 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1088 has the form "A depends on B, C...". The result will order A, B, C, and so 1089 on. 1090 """ 1091 1092 usage = init_reverse_dependencies(all_depends) 1093 1094 # Produce an ordering by obtaining exposed items (required by items already 1095 # processed) and putting them at the start of the list. 1096 1097 ordered = [] 1098 1099 while usage: 1100 have_next = False 1101 1102 for key, n in usage.items(): 1103 1104 # Add items needed by no other items to the ordering. 1105 1106 if not n: 1107 remove_dependency(key, all_depends, usage, ordered) 1108 have_next = True 1109 1110 if not have_next: 1111 raise ValueError, usage 1112 1113 return ordered 1114 1115 def order_dependencies_partial(all_depends): 1116 1117 """ 1118 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1119 has the form "A depends on B, C...". The result will order A, B, C, and so 1120 on. Where cycles exist, they will be broken and a partial ordering returned. 1121 """ 1122 1123 usage = init_reverse_dependencies(all_depends) 1124 1125 # Duplicate the dependencies for subsequent modification. 1126 1127 new_depends = {} 1128 for key, values in all_depends.items(): 1129 new_depends[key] = set(values) 1130 1131 all_depends = new_depends 1132 1133 # Produce an ordering by obtaining exposed items (required by items already 1134 # processed) and putting them at the start of the list. 1135 1136 ordered = [] 1137 1138 while usage: 1139 least = None 1140 least_key = None 1141 1142 for key, n in usage.items(): 1143 1144 # Add items needed by no other items to the ordering. 1145 1146 if not n: 1147 remove_dependency(key, all_depends, usage, ordered) 1148 least = 0 1149 1150 # When breaking cycles, note the least used items. 1151 1152 elif least is None or len(n) < least: 1153 least_key = key 1154 least = len(n) 1155 1156 if least: 1157 transfer_dependencies(least_key, all_depends, usage, ordered) 1158 1159 return ordered 1160 1161 def init_reverse_dependencies(all_depends): 1162 1163 """ 1164 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1165 record the reverse dependencies, making a mapping of the form 1166 "B is needed by A", "C is needed by A", and so on. 1167 """ 1168 1169 usage = {} 1170 1171 # Record path-based dependencies. 1172 1173 for key in all_depends.keys(): 1174 usage[key] = set() 1175 1176 for key, depends in all_depends.items(): 1177 for depend in depends: 1178 init_item(usage, depend, set) 1179 usage[depend].add(key) 1180 1181 return usage 1182 1183 def transfer_dependencies(key, all_depends, usage, ordered): 1184 1185 """ 1186 Transfer items needed by 'key' to those items needing 'key', found using 1187 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1188 collection of dependencies. 1189 1190 If "A is needed by X" and "B is needed by A", then transferring items needed 1191 by A will cause "B is needed by X" to be recorded as a consequence. 1192 1193 Transferring items also needs to occur in the reverse mapping, so that 1194 "A needs B" and "X needs A", then the consequence must be recorded as 1195 "X needs B". 1196 """ 1197 1198 ordered.insert(0, key) 1199 1200 needing = usage[key] # A is needed by X 1201 needed = all_depends.get(key) # A needs B 1202 1203 if needing: 1204 for depend in needing: 1205 l = all_depends.get(depend) 1206 if not l: 1207 continue 1208 1209 l.remove(key) # X needs (A) 1210 1211 if needed: 1212 l.update(needed) # X needs B... 1213 1214 # Prevent self references. 1215 1216 if depend in needed: 1217 l.remove(depend) 1218 1219 if needed: 1220 for depend in needed: 1221 l = usage.get(depend) 1222 if not l: 1223 continue 1224 1225 l.remove(key) # B is needed by (A) 1226 l.update(needing) # B is needed by X... 1227 1228 # Prevent self references. 1229 1230 if depend in needing: 1231 l.remove(depend) 1232 1233 if needed: 1234 del all_depends[key] 1235 del usage[key] 1236 1237 def remove_dependency(key, all_depends, usage, ordered): 1238 1239 """ 1240 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1241 'ordered' collection of dependencies. 1242 1243 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1244 upon removing A from 'usage', the outcome is that all keys needing A will 1245 have A removed from their 'usage' records. 1246 1247 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1248 as a consequence. 1249 """ 1250 1251 ordered.insert(0, key) 1252 1253 depends = all_depends.get(key) 1254 1255 # Reduce usage of the referenced items. 1256 1257 if depends: 1258 for depend in depends: 1259 usage[depend].remove(key) 1260 1261 del usage[key] 1262 1263 # General input/output. 1264 1265 def readfile(filename): 1266 1267 "Return the contents of 'filename'." 1268 1269 f = open(filename) 1270 try: 1271 return f.read() 1272 finally: 1273 f.close() 1274 1275 def writefile(filename, s): 1276 1277 "Write to 'filename' the string 's'." 1278 1279 f = open(filename, "w") 1280 try: 1281 f.write(s) 1282 finally: 1283 f.close() 1284 1285 # General encoding. 1286 1287 def sorted_output(x): 1288 1289 "Sort sequence 'x' and return a string with commas separating the values." 1290 1291 x = map(str, x) 1292 x.sort() 1293 return ", ".join(x) 1294 1295 def get_string_details(literals, encoding): 1296 1297 """ 1298 Determine whether 'literals' represent Unicode strings or byte strings, 1299 using 'encoding' to reproduce byte sequences. 1300 1301 Each literal is the full program representation including prefix and quotes 1302 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1303 string needs to be translated back to its original encoding. 1304 1305 Return a single encoded literal value, a type name, and the original 1306 encoding as a tuple. 1307 """ 1308 1309 typename = "unicode" 1310 1311 l = [] 1312 1313 for s in literals: 1314 out, _typename = get_literal_details(s) 1315 if _typename == "str": 1316 typename = "str" 1317 l.append(out) 1318 1319 out = "".join(l) 1320 1321 # For Unicode values, convert to the UTF-8 program representation. 1322 1323 if typename == "unicode": 1324 return out.encode("utf-8"), typename, encoding 1325 1326 # For byte string values, convert back to the original encoding. 1327 1328 else: 1329 return out.encode(encoding), typename, encoding 1330 1331 def get_literal_details(s): 1332 1333 """ 1334 Determine whether 's' represents a Unicode string or a byte string, where 1335 's' contains the full program representation of a literal including prefix 1336 and quotes, recoded by the parser to UTF-8. 1337 1338 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1339 and byte or Unicode values starting with <backslash><octal digit> or 1340 <backslash>x. 1341 1342 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1343 to be considered as Unicode values. Otherwise, they produce byte values and 1344 cause unprefixed strings to be considered as byte strings. 1345 1346 Literals prefixed with "r" do not have their backslash-encoded values 1347 converted unless also prefixed with "u", in which case only the above value 1348 formats are converted, not any of the other special sequences for things 1349 like newlines. 1350 1351 Return the literal value as a Unicode object together with the appropriate 1352 type name in a tuple. 1353 """ 1354 1355 l = [] 1356 1357 # Identify the quote character and use it to identify the prefix. 1358 1359 quote_type = s[-1] 1360 prefix_end = s.find(quote_type) 1361 prefix = s[:prefix_end].lower() 1362 1363 if prefix not in ("", "b", "br", "r", "u", "ur"): 1364 raise ValueError, "String literal does not have a supported prefix: %s" % s 1365 1366 if "b" in prefix: 1367 typename = "str" 1368 else: 1369 typename = "unicode" 1370 1371 # Identify triple quotes or single quotes. 1372 1373 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1374 quote = s[prefix_end:prefix_end+3] 1375 current = prefix_end + 3 1376 end = len(s) - 3 1377 else: 1378 quote = s[prefix_end] 1379 current = prefix_end + 1 1380 end = len(s) - 1 1381 1382 # Conversions of some quoted values. 1383 1384 searches = { 1385 "u" : (6, 16), 1386 "U" : (10, 16), 1387 "x" : (4, 16), 1388 } 1389 1390 octal_digits = map(str, range(0, 8)) 1391 1392 # Translations of some quoted values. 1393 1394 escaped = { 1395 "\\" : "\\", "'" : "'", '"' : '"', 1396 "a" : "\a", "b" : "\b", "f" : "\f", 1397 "n" : "\n", "r" : "\r", "t" : "\t", 1398 } 1399 1400 while current < end: 1401 1402 # Look for quoted values. 1403 1404 index = s.find("\\", current) 1405 if index == -1 or index + 1 == end: 1406 l.append(s[current:end]) 1407 break 1408 1409 # Add the preceding text. 1410 1411 l.append(s[current:index]) 1412 1413 # Handle quoted text. 1414 1415 term = s[index+1] 1416 1417 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1418 # produce Unicode values. 1419 1420 if typename == "unicode" and ( 1421 term in ("u", "U") or 1422 "u" in prefix and (term == "x" or term in octal_digits)): 1423 1424 needed, base = searches.get(term, (4, 8)) 1425 value = convert_quoted_value(s, index, needed, end, base, unichr) 1426 l.append(value) 1427 current = index + needed 1428 1429 # Add raw byte values, changing the string type. 1430 1431 elif "r" not in prefix and ( 1432 term == "x" or term in octal_digits): 1433 1434 needed, base = searches.get(term, (4, 8)) 1435 value = convert_quoted_value(s, index, needed, end, base, chr) 1436 l.append(value) 1437 typename = "str" 1438 current = index + needed 1439 1440 # Add other escaped values. 1441 1442 elif "r" not in prefix and escaped.has_key(term): 1443 l.append(escaped[term]) 1444 current = index + 2 1445 1446 # Add other text as found. 1447 1448 else: 1449 l.append(s[index:index+2]) 1450 current = index + 2 1451 1452 # Collect the components into a single Unicode object. Since the literal 1453 # text was already in UTF-8 form, interpret plain strings as UTF-8 1454 # sequences. 1455 1456 out = [] 1457 1458 for value in l: 1459 if isinstance(value, unicode): 1460 out.append(value) 1461 else: 1462 out.append(unicode(value, "utf-8")) 1463 1464 return "".join(out), typename 1465 1466 def convert_quoted_value(s, index, needed, end, base, fn): 1467 1468 """ 1469 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1470 positions, and with the given 'end' indicating the first position after the 1471 end of the actual string content. 1472 1473 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1474 to convert the value to an appropriate type. 1475 """ 1476 1477 s = s[index:min(index+needed, end)] 1478 1479 # Not a complete occurrence. 1480 1481 if len(s) < needed: 1482 return s 1483 1484 # Test for a well-formed value. 1485 1486 try: 1487 first = base == 8 and 1 or 2 1488 value = int(s[first:needed], base) 1489 except ValueError: 1490 return s 1491 else: 1492 return fn(value) 1493 1494 # Attribute chain decoding. 1495 1496 def get_attrnames(attrnames): 1497 1498 """ 1499 Split the qualified attribute chain 'attrnames' into its components, 1500 handling special attributes starting with "#" that indicate type 1501 conformance. 1502 """ 1503 1504 if attrnames.startswith("#"): 1505 return [attrnames] 1506 else: 1507 return attrnames.split(".") 1508 1509 def get_name_path(path, name): 1510 1511 "Return a suitable qualified name from the given 'path' and 'name'." 1512 1513 if "." in name: 1514 return name 1515 else: 1516 return "%s.%s" % (path, name) 1517 1518 # Usage-related functions. 1519 1520 def get_types_for_usage(attrnames, objects): 1521 1522 """ 1523 Identify the types that can support the given 'attrnames', using the 1524 given 'objects' as the catalogue of type details. 1525 """ 1526 1527 types = [] 1528 for name, _attrnames in objects.items(): 1529 if set(attrnames).issubset(_attrnames): 1530 types.append(name) 1531 return types 1532 1533 def get_invoked_attributes(usage): 1534 1535 "Obtain invoked attribute from the given 'usage'." 1536 1537 invoked = [] 1538 if usage: 1539 for attrname, invocation, assignment in usage: 1540 if invocation: 1541 invoked.append(attrname) 1542 return invoked 1543 1544 def get_assigned_attributes(usage): 1545 1546 "Obtain assigned attribute from the given 'usage'." 1547 1548 assigned = [] 1549 if usage: 1550 for attrname, invocation, assignment in usage: 1551 if assignment: 1552 assigned.append(attrname) 1553 return assigned 1554 1555 # Type and module functions. 1556 # NOTE: This makes assumptions about the __builtins__ structure. 1557 1558 def get_builtin_module(name): 1559 1560 "Return the module name containing the given type 'name'." 1561 1562 if name == "string": 1563 modname = "str" 1564 elif name == "utf8string": 1565 modname = "unicode" 1566 elif name == "NoneType": 1567 modname = "none" 1568 else: 1569 modname = name 1570 1571 return "__builtins__.%s" % modname 1572 1573 def get_builtin_type(name): 1574 1575 "Return the type name provided by the given Python value 'name'." 1576 1577 if name == "str": 1578 return "string" 1579 elif name == "unicode": 1580 return "utf8string" 1581 else: 1582 return name 1583 1584 def get_builtin_class(name): 1585 1586 "Return the full name of the built-in class having the given 'name'." 1587 1588 typename = get_builtin_type(name) 1589 module = get_builtin_module(typename) 1590 return "%s.%s" % (module, typename) 1591 1592 # Useful data. 1593 1594 predefined_constants = "False", "None", "NotImplemented", "True" 1595 1596 privileged_attributes = [ 1597 "__get_single_item_unchecked__", 1598 ] 1599 1600 unary_operator_functions = { 1601 1602 # Unary operations. 1603 1604 "Invert" : "invert", 1605 "UnaryAdd" : "pos", 1606 "UnarySub" : "neg", 1607 } 1608 1609 operator_functions = { 1610 1611 # Fundamental operations. 1612 1613 "is" : "is_", 1614 "is not" : "is_not", 1615 1616 # Binary operations. 1617 1618 "in" : "in_", 1619 "not in" : "not_in", 1620 "Add" : "add", 1621 "Bitand" : "and_", 1622 "Bitor" : "or_", 1623 "Bitxor" : "xor", 1624 "Div" : "div", 1625 "FloorDiv" : "floordiv", 1626 "LeftShift" : "lshift", 1627 "Mod" : "mod", 1628 "Mul" : "mul", 1629 "Power" : "pow", 1630 "RightShift" : "rshift", 1631 "Sub" : "sub", 1632 1633 # Augmented assignment. 1634 1635 "+=" : "iadd", 1636 "-=" : "isub", 1637 "*=" : "imul", 1638 "/=" : "idiv", 1639 "//=" : "ifloordiv", 1640 "%=" : "imod", 1641 "**=" : "ipow", 1642 "<<=" : "ilshift", 1643 ">>=" : "irshift", 1644 "&=" : "iand", 1645 "^=" : "ixor", 1646 "|=" : "ior", 1647 1648 # Comparisons. 1649 1650 "==" : "eq", 1651 "!=" : "ne", 1652 "<" : "lt", 1653 "<=" : "le", 1654 ">=" : "ge", 1655 ">" : "gt", 1656 } 1657 1658 operator_functions.update(unary_operator_functions) 1659 1660 # vim: tabstop=4 expandtab shiftwidth=4