1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 # Namespace management. 190 191 def enter_namespace(self, name): 192 193 "Enter the namespace having the given 'name'." 194 195 self.namespace_path.append(name) 196 197 def exit_namespace(self): 198 199 "Exit the current namespace." 200 201 self.namespace_path.pop() 202 203 # Constant reference naming. 204 205 def get_constant_name(self, value, value_type, encoding=None): 206 207 """ 208 Add a new constant to the current namespace for 'value' with 209 'value_type'. 210 """ 211 212 path = self.get_namespace_path() 213 init_item(self.constants, path, dict) 214 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 215 216 # Literal reference naming. 217 218 def get_literal_name(self): 219 220 "Add a new literal to the current namespace." 221 222 path = self.get_namespace_path() 223 init_item(self.literals, path, lambda: 0) 224 return "$C%d" % self.literals[path] 225 226 def next_literal(self): 227 self.literals[self.get_namespace_path()] += 1 228 229 # Temporary variable naming. 230 231 def get_temporary_name(self): 232 path = self.get_namespace_path() 233 init_item(self.temp, path, lambda: 0) 234 return "$t%d" % self.temp[path] 235 236 def next_temporary(self): 237 self.temp[self.get_namespace_path()] += 1 238 239 # Arbitrary function naming. 240 241 def get_lambda_name(self): 242 path = self.get_namespace_path() 243 init_item(self.lambdas, path, lambda: 0) 244 name = "$l%d" % self.lambdas[path] 245 self.lambdas[path] += 1 246 return name 247 248 def reset_lambdas(self): 249 self.lambdas = {} 250 251 # Constant and literal recording. 252 253 def get_constant_value(self, value, literals=None): 254 255 """ 256 Encode the 'value' if appropriate, returning a value, a typename and any 257 encoding. 258 """ 259 260 if isinstance(value, unicode): 261 return value.encode("utf-8"), "unicode", self.encoding 262 263 # Attempt to convert plain strings to text. 264 265 elif isinstance(value, str) and self.encoding: 266 try: 267 return get_string_details(literals, self.encoding) 268 except UnicodeDecodeError: 269 pass 270 271 return value, value.__class__.__name__, None 272 273 def get_constant_reference(self, ref, value, encoding=None): 274 275 """ 276 Return a constant reference for the given 'ref' type and 'value', with 277 the optional 'encoding' applying to text values. 278 """ 279 280 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 281 282 # Return a reference for the constant. 283 284 objpath = self.get_object_path(constant_name) 285 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 286 287 # Record the value and type for the constant. 288 289 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 290 return name_ref 291 292 def reserve_constant(self, objpath, value, origin, encoding=None): 293 294 """ 295 Reserve a constant within 'objpath' with the given 'value' and having a 296 type with the given 'origin', with the optional 'encoding' applying to 297 text values. 298 """ 299 300 constant_name = self.get_constant_name(value, origin) 301 objpath = self.get_object_path(constant_name) 302 self._reserve_constant(objpath, value, origin, encoding) 303 304 def _reserve_constant(self, objpath, value, origin, encoding): 305 306 """ 307 Store a constant for 'objpath' with the given 'value' and 'origin', with 308 the optional 'encoding' applying to text values. 309 """ 310 311 self.constant_values[objpath] = value, origin, encoding 312 313 def get_literal_reference(self, name, ref, items, cls): 314 315 """ 316 Return a literal reference for the given type 'name', literal 'ref', 317 node 'items' and employing the given 'cls' as the class of the returned 318 reference object. 319 """ 320 321 # Construct an invocation using the items as arguments. 322 323 typename = "$L%s" % name 324 325 invocation = compiler.ast.CallFunc( 326 compiler.ast.Name(typename), 327 items 328 ) 329 330 # Get a name for the actual literal. 331 332 instname = self.get_literal_name() 333 self.next_literal() 334 335 # Record the type for the literal. 336 337 objpath = self.get_object_path(instname) 338 self.literal_types[objpath] = ref.get_origin() 339 340 # Return a wrapper for the invocation exposing the items. 341 342 return cls( 343 instname, 344 ref.instance_of(), 345 self.process_structure_node(invocation), 346 invocation.args 347 ) 348 349 # Node handling. 350 351 def process_structure(self, node): 352 353 """ 354 Within the given 'node', process the program structure. 355 356 During inspection, this will process global declarations, adjusting the 357 module namespace, and import statements, building a module dependency 358 hierarchy. 359 360 During translation, this will consult deduced program information and 361 output translated code. 362 """ 363 364 l = [] 365 for n in node.getChildNodes(): 366 l.append(self.process_structure_node(n)) 367 return l 368 369 def process_augassign_node(self, n): 370 371 "Process the given augmented assignment node 'n'." 372 373 op = operator_functions[n.op] 374 375 if isinstance(n.node, compiler.ast.Getattr): 376 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 377 elif isinstance(n.node, compiler.ast.Name): 378 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 379 else: 380 target = n.node 381 382 assignment = compiler.ast.Assign( 383 [target], 384 compiler.ast.CallFunc( 385 compiler.ast.Name("$op%s" % op), 386 [n.node, n.expr])) 387 388 return self.process_structure_node(assignment) 389 390 def process_assignment_for_object(self, original_name, source): 391 392 """ 393 Return an assignment operation making 'original_name' refer to the given 394 'source'. 395 """ 396 397 assignment = compiler.ast.Assign( 398 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 399 source 400 ) 401 402 return self.process_structure_node(assignment) 403 404 def process_assignment_node_items(self, n, expr): 405 406 """ 407 Process the given assignment node 'n' whose children are to be assigned 408 items of 'expr'. 409 """ 410 411 name_ref = self.process_structure_node(expr) 412 413 # Either unpack the items and present them directly to each assignment 414 # node. 415 416 if isinstance(name_ref, LiteralSequenceRef) and \ 417 self.process_literal_sequence_items(n, name_ref): 418 419 pass 420 421 # Or have the assignment nodes access each item via the sequence API. 422 423 else: 424 self.process_assignment_node_items_by_position(n, expr, name_ref) 425 426 def process_assignment_node_items_by_position(self, n, expr, name_ref): 427 428 """ 429 Process the given sequence assignment node 'n', converting the node to 430 the separate assignment of each target using positional access on a 431 temporary variable representing the sequence. Use 'expr' as the assigned 432 value and 'name_ref' as the reference providing any existing temporary 433 variable. 434 """ 435 436 statements = [] 437 438 # Employ existing names to access the sequence. 439 # Literal sequences do not provide names of accessible objects. 440 441 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 442 temp = name_ref.name 443 444 # For other expressions, create a temporary name to reference the items. 445 446 else: 447 temp = self.get_temporary_name() 448 self.next_temporary() 449 450 statements.append( 451 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 452 ) 453 454 # Generate a test for the length of the expression object. 455 456 statements.append(compiler.ast.Discard( 457 compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"), 458 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))]))) 459 460 # Assign the items to the target nodes. 461 462 for i, node in enumerate(n.nodes): 463 statements.append( 464 compiler.ast.Assign([node], compiler.ast.Subscript( 465 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 466 ) 467 468 return self.process_structure_node(compiler.ast.Stmt(statements)) 469 470 def process_literal_sequence_items(self, n, name_ref): 471 472 """ 473 Process the given assignment node 'n', obtaining from the given 474 'name_ref' the items to be assigned to the assignment targets. 475 476 Return whether this method was able to process the assignment node as 477 a sequence of direct assignments. 478 """ 479 480 if len(n.nodes) == len(name_ref.items): 481 assigned_names, count = get_names_from_nodes(n.nodes) 482 accessed_names, _count = get_names_from_nodes(name_ref.items) 483 484 # Only assign directly between items if all assigned names are 485 # plain names (not attribute assignments), and if the assigned names 486 # do not appear in the accessed names. 487 488 if len(assigned_names) == count and \ 489 not assigned_names.intersection(accessed_names): 490 491 for node, item in zip(n.nodes, name_ref.items): 492 self.process_assignment_node(node, item) 493 494 return True 495 496 # Otherwise, use the position-based mechanism to obtain values. 497 498 else: 499 return False 500 else: 501 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 502 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 503 504 def process_compare_node(self, n): 505 506 """ 507 Process the given comparison node 'n', converting an operator sequence 508 from... 509 510 <expr1> <op1> <expr2> <op2> <expr3> 511 512 ...to... 513 514 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 515 """ 516 517 invocations = [] 518 last = n.expr 519 520 for op, op_node in n.ops: 521 op = operator_functions.get(op) 522 523 invocations.append(compiler.ast.CallFunc( 524 compiler.ast.Name("$op%s" % op), 525 [last, op_node])) 526 527 last = op_node 528 529 if len(invocations) > 1: 530 result = compiler.ast.And(invocations) 531 else: 532 result = invocations[0] 533 534 return self.process_structure_node(result) 535 536 def process_dict_node(self, node): 537 538 """ 539 Process the given dictionary 'node', returning a list of (key, value) 540 tuples. 541 """ 542 543 l = [] 544 for key, value in node.items: 545 l.append(( 546 self.process_structure_node(key), 547 self.process_structure_node(value))) 548 return l 549 550 def process_for_node(self, n): 551 552 """ 553 Generate attribute accesses for {n.list}.__iter__ and the next method on 554 the iterator, producing a replacement node for the original. 555 """ 556 557 t0 = self.get_temporary_name() 558 self.next_temporary() 559 t1 = self.get_temporary_name() 560 self.next_temporary() 561 t2 = self.get_temporary_name() 562 self.next_temporary() 563 564 node = compiler.ast.Stmt([ 565 566 # <t0> = {n.list} 567 # <t1> = <t0>.__iter__() 568 569 compiler.ast.Assign( 570 [compiler.ast.AssName(t0, "OP_ASSIGN")], 571 n.list), 572 573 compiler.ast.Assign( 574 [compiler.ast.AssName(t1, "OP_ASSIGN")], 575 compiler.ast.CallFunc( 576 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 577 [])), 578 579 # <t2> = <t1>.next 580 # try: 581 # while True: 582 # <var>... = <t2>() 583 # ... 584 # except StopIteration: 585 # pass 586 587 compiler.ast.Assign( 588 [compiler.ast.AssName(t2, "OP_ASSIGN")], 589 compiler.ast.Getattr(compiler.ast.Name(t1), "next")), 590 591 compiler.ast.TryExcept( 592 compiler.ast.While( 593 compiler.ast.Name("True"), 594 compiler.ast.Stmt([ 595 compiler.ast.Assign( 596 [n.assign], 597 compiler.ast.CallFunc( 598 compiler.ast.Name(t2), 599 [] 600 )), 601 n.body]), 602 None), 603 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 604 None) 605 ]) 606 607 self.process_structure_node(node) 608 609 def process_literal_sequence_node(self, n, name, ref, cls): 610 611 """ 612 Process the given literal sequence node 'n' as a function invocation, 613 with 'name' indicating the type of the sequence, and 'ref' being a 614 reference to the type. The 'cls' is used to instantiate a suitable name 615 reference. 616 """ 617 618 if name == "dict": 619 items = [] 620 for key, value in n.items: 621 items.append(compiler.ast.Tuple([key, value])) 622 else: # name in ("list", "tuple"): 623 items = n.nodes 624 625 return self.get_literal_reference(name, ref, items, cls) 626 627 def process_operator_node(self, n): 628 629 """ 630 Process the given operator node 'n' as an operator function invocation. 631 """ 632 633 opname = n.__class__.__name__ 634 operands = n.getChildNodes() 635 636 # Convert a unary operation to an invocation. 637 638 op = unary_operator_functions.get(opname) 639 640 if op: 641 invocation = compiler.ast.CallFunc( 642 compiler.ast.Name("$op%s" % op), 643 [operands[0]] 644 ) 645 646 # Convert a single operator with a list of operands to a combination of 647 # pairwise operations. 648 649 else: 650 op = operator_functions[opname] 651 invocation = self._process_operator_node(op, operands) 652 653 return self.process_structure_node(invocation) 654 655 def _process_operator_node(self, op, operands): 656 657 """ 658 Process the given 'op', being an operator function, together with the 659 supplied 'operands', returning either a single remaining operand or an 660 invocation combining the operands. 661 """ 662 663 remaining = operands[1:] 664 if not remaining: 665 return operands[0] 666 667 return compiler.ast.CallFunc( 668 compiler.ast.Name("$op%s" % op), 669 [operands[0], self._process_operator_node(op, remaining)] 670 ) 671 672 def process_print_node(self, n): 673 674 """ 675 Process the given print node 'n' as an invocation on a stream of the 676 form... 677 678 $print(dest, args, nl) 679 680 The special function name will be translated elsewhere. 681 """ 682 683 nl = isinstance(n, compiler.ast.Printnl) 684 invocation = compiler.ast.CallFunc( 685 compiler.ast.Name("$print"), 686 [n.dest or compiler.ast.Name("None"), 687 compiler.ast.List(list(n.nodes)), 688 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 689 ) 690 return self.process_structure_node(invocation) 691 692 def process_slice_node(self, n, expr=None): 693 694 """ 695 Process the given slice node 'n' as a method invocation. 696 """ 697 698 if n.flags == "OP_ASSIGN": op = "__setslice__" 699 elif n.flags == "OP_DELETE": op = "__delslice__" 700 else: op = "__getslice__" 701 702 invocation = compiler.ast.CallFunc( 703 compiler.ast.Getattr(n.expr, op), 704 [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 705 (expr and [expr] or []) 706 ) 707 708 # Fix parse tree structure. 709 710 if op == "__delslice__": 711 invocation = compiler.ast.Discard(invocation) 712 713 return self.process_structure_node(invocation) 714 715 def process_sliceobj_node(self, n): 716 717 """ 718 Process the given slice object node 'n' as a slice constructor. 719 """ 720 721 op = "slice" 722 invocation = compiler.ast.CallFunc( 723 compiler.ast.Name("$op%s" % op), 724 n.nodes 725 ) 726 return self.process_structure_node(invocation) 727 728 def process_subscript_node(self, n, expr=None): 729 730 """ 731 Process the given subscript node 'n' as a method invocation. 732 """ 733 734 if n.flags == "OP_ASSIGN": op = "__setitem__" 735 elif n.flags == "OP_DELETE": op = "__delitem__" 736 else: op = "__getitem__" 737 738 invocation = compiler.ast.CallFunc( 739 compiler.ast.Getattr(n.expr, op), 740 list(n.subs) + (expr and [expr] or []) 741 ) 742 743 # Fix parse tree structure. 744 745 if op == "__delitem__": 746 invocation = compiler.ast.Discard(invocation) 747 748 return self.process_structure_node(invocation) 749 750 def process_attribute_chain(self, n): 751 752 """ 753 Process the given attribute access node 'n'. Return a reference 754 describing the expression. 755 """ 756 757 # AssAttr/Getattr are nested with the outermost access being the last 758 # access in any chain. 759 760 self.attrs.insert(0, n.attrname) 761 attrs = self.attrs 762 763 # Break attribute chains where non-access nodes are found. 764 765 if not self.have_access_expression(n): 766 self.reset_attribute_chain() 767 768 # Descend into the expression, extending backwards any existing chain, 769 # or building another for the expression. 770 771 name_ref = self.process_structure_node(n.expr) 772 773 # Restore chain information applying to this node. 774 775 if not self.have_access_expression(n): 776 self.restore_attribute_chain(attrs) 777 778 # Return immediately if the expression was another access and thus a 779 # continuation backwards along the chain. The above processing will 780 # have followed the chain all the way to its conclusion. 781 782 if self.have_access_expression(n): 783 del self.attrs[0] 784 785 return name_ref 786 787 # Attribute chain handling. 788 789 def reset_attribute_chain(self): 790 791 "Reset the attribute chain for a subexpression of an attribute access." 792 793 self.attrs = [] 794 self.chain_assignment.append(self.in_assignment) 795 self.chain_invocation.append(self.in_invocation) 796 self.in_assignment = None 797 self.in_invocation = None 798 799 def restore_attribute_chain(self, attrs): 800 801 "Restore the attribute chain for an attribute access." 802 803 self.attrs = attrs 804 self.in_assignment = self.chain_assignment.pop() 805 self.in_invocation = self.chain_invocation.pop() 806 807 def have_access_expression(self, node): 808 809 "Return whether the expression associated with 'node' is Getattr." 810 811 return isinstance(node.expr, compiler.ast.Getattr) 812 813 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 814 815 """ 816 Return the name to be used for attribute usage observations involving 817 the given 'name' in the current namespace. 818 819 If the name is being used outside a function, and if 'name_ref' is 820 given and indicates a global or if 'is_global' is specified as a true 821 value, a path featuring the name in the global namespace is returned. 822 Otherwise, a path computed using the current namespace and the given 823 name is returned. 824 825 The intention of this method is to provide a suitably-qualified name 826 that can be tracked across namespaces. Where globals are being 827 referenced in class namespaces, they should be referenced using their 828 path within the module, not using a path within each class. 829 830 It may not be possible to identify a global within a function at the 831 time of inspection (since a global may appear later in a file). 832 Consequently, globals are identified by their local name rather than 833 their module-qualified path. 834 """ 835 836 # For functions, use the appropriate local names. 837 838 if self.in_function: 839 return name 840 841 # For global names outside functions, use a global name. 842 843 elif is_global or name_ref and name_ref.is_global_name(): 844 return self.get_global_path(name) 845 846 # Otherwise, establish a name in the current namespace. 847 848 else: 849 return self.get_object_path(name) 850 851 def get_path_for_access(self): 852 853 "Outside functions, register accesses at the module level." 854 855 if not self.in_function: 856 return self.name 857 else: 858 return self.get_namespace_path() 859 860 def get_module_name(self, node): 861 862 """ 863 Using the given From 'node' in this module, calculate any relative import 864 information, returning a tuple containing a module to import along with any 865 names to import based on the node's name information. 866 867 Where the returned module is given as None, whole module imports should 868 be performed for the returned modules using the returned names. 869 """ 870 871 # Absolute import. 872 873 if node.level == 0: 874 return node.modname, node.names 875 876 # Relative to an ancestor of this module. 877 878 else: 879 path = self.name.split(".") 880 level = node.level 881 882 # Relative imports treat package roots as submodules. 883 884 if split(self.filename)[-1] == "__init__.py": 885 level -= 1 886 887 if level > len(path): 888 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 889 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 890 891 basename = ".".join(path[:len(path)-level]) 892 893 # Name imports from a module. 894 895 if node.modname: 896 return "%s.%s" % (basename, node.modname), node.names 897 898 # Relative whole module imports. 899 900 else: 901 return basename, node.names 902 903 def get_argnames(args): 904 905 """ 906 Return a list of all names provided by 'args'. Since tuples may be 907 employed, the arguments are traversed depth-first. 908 """ 909 910 l = [] 911 for arg in args: 912 if isinstance(arg, tuple): 913 l += get_argnames(arg) 914 else: 915 l.append(arg) 916 return l 917 918 def get_names_from_nodes(nodes): 919 920 """ 921 Return the names employed in the given 'nodes' along with the number of 922 nodes excluding sequences. 923 """ 924 925 names = set() 926 count = 0 927 928 for node in nodes: 929 930 # Add names and count them. 931 932 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 933 names.add(node.name) 934 count += 1 935 936 # Add names from sequences and incorporate their counts. 937 938 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 939 compiler.ast.List, compiler.ast.Set, 940 compiler.ast.Tuple)): 941 _names, _count = get_names_from_nodes(node.nodes) 942 names.update(_names) 943 count += _count 944 945 # Count non-name, non-sequence nodes. 946 947 else: 948 count += 1 949 950 return names, count 951 952 # Location classes. 953 954 class Location: 955 956 "A generic program location." 957 958 def __init__(self, path, name, attrnames=None, version=None, access_number=None): 959 self.path = path 960 self.name = name 961 self.attrnames = attrnames 962 self.version = version 963 self.access_number = access_number 964 965 def __repr__(self): 966 return "Location(%r, %r, %r, %r, %r)" % self.as_tuple() 967 968 def as_tuple(self): 969 return (self.path, self.name, self.attrnames, self.version, self.access_number) 970 971 def __hash__(self): 972 return hash(self.as_tuple()) 973 974 def __eq__(self, other): 975 return self.as_tuple() == other.as_tuple() 976 977 def __cmp__(self, other): 978 return cmp(self.as_tuple(), other.as_tuple()) 979 980 def get_attrname(self): 981 982 """ 983 Extract the first attribute from the attribute names employed in this 984 location. 985 """ 986 987 attrnames = self.attrnames 988 if not attrnames: 989 return attrnames 990 return get_attrnames(attrnames)[0] 991 992 class AccessLocation(Location): 993 994 "A specialised access location." 995 996 def __init__(self, path, name, attrnames, access_number): 997 998 """ 999 Initialise an access location featuring 'path', 'name', 'attrnames' and 1000 'access_number'. 1001 """ 1002 1003 Location.__init__(self, path, name, attrnames, None, access_number) 1004 1005 def __repr__(self): 1006 return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number) 1007 1008 # Result classes. 1009 1010 class InstructionSequence: 1011 1012 "A generic sequence of instructions." 1013 1014 def __init__(self, instructions): 1015 self.instructions = instructions 1016 1017 def get_value_instruction(self): 1018 return self.instructions[-1] 1019 1020 def get_init_instructions(self): 1021 return self.instructions[:-1] 1022 1023 # Dictionary utilities. 1024 1025 def init_item(d, key, fn): 1026 1027 """ 1028 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 1029 value where no entry already exists. 1030 """ 1031 1032 if not d.has_key(key): 1033 d[key] = fn() 1034 return d[key] 1035 1036 def dict_for_keys(d, keys): 1037 1038 "Return a new dictionary containing entries from 'd' for the given 'keys'." 1039 1040 nd = {} 1041 for key in keys: 1042 if d.has_key(key): 1043 nd[key] = d[key] 1044 return nd 1045 1046 def make_key(s): 1047 1048 "Make sequence 's' into a tuple-based key, first sorting its contents." 1049 1050 l = list(s) 1051 l.sort() 1052 return tuple(l) 1053 1054 def add_counter_item(d, key): 1055 1056 """ 1057 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 1058 maintaining a mapping of keys to their order of insertion. 1059 """ 1060 1061 if not d.has_key(key): 1062 d[key] = len(d.keys()) 1063 return d[key] 1064 1065 def remove_items(d1, d2): 1066 1067 "Remove from 'd1' all items from 'd2'." 1068 1069 for key in d2.keys(): 1070 if d1.has_key(key): 1071 del d1[key] 1072 1073 # Set utilities. 1074 1075 def first(s): 1076 return list(s)[0] 1077 1078 def same(s1, s2): 1079 return set(s1) == set(s2) 1080 1081 def order_dependencies(all_depends): 1082 1083 """ 1084 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1085 has the form "A depends on B, C...". The result will order A, B, C, and so 1086 on. 1087 """ 1088 1089 usage = init_reverse_dependencies(all_depends) 1090 1091 # Produce an ordering by obtaining exposed items (required by items already 1092 # processed) and putting them at the start of the list. 1093 1094 ordered = [] 1095 1096 while usage: 1097 have_next = False 1098 1099 for key, n in usage.items(): 1100 1101 # Add items needed by no other items to the ordering. 1102 1103 if not n: 1104 remove_dependency(key, all_depends, usage, ordered) 1105 have_next = True 1106 1107 if not have_next: 1108 raise ValueError, usage 1109 1110 return ordered 1111 1112 def order_dependencies_partial(all_depends): 1113 1114 """ 1115 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1116 has the form "A depends on B, C...". The result will order A, B, C, and so 1117 on. Where cycles exist, they will be broken and a partial ordering returned. 1118 """ 1119 1120 usage = init_reverse_dependencies(all_depends) 1121 1122 # Duplicate the dependencies for subsequent modification. 1123 1124 new_depends = {} 1125 for key, values in all_depends.items(): 1126 new_depends[key] = set(values) 1127 1128 all_depends = new_depends 1129 1130 # Produce an ordering by obtaining exposed items (required by items already 1131 # processed) and putting them at the start of the list. 1132 1133 ordered = [] 1134 1135 while usage: 1136 least = None 1137 least_key = None 1138 1139 for key, n in usage.items(): 1140 1141 # Add items needed by no other items to the ordering. 1142 1143 if not n: 1144 remove_dependency(key, all_depends, usage, ordered) 1145 least = 0 1146 1147 # When breaking cycles, note the least used items. 1148 1149 elif least is None or len(n) < least: 1150 least_key = key 1151 least = len(n) 1152 1153 if least: 1154 transfer_dependencies(least_key, all_depends, usage, ordered) 1155 1156 return ordered 1157 1158 def init_reverse_dependencies(all_depends): 1159 1160 """ 1161 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1162 record the reverse dependencies, making a mapping of the form 1163 "B is needed by A", "C is needed by A", and so on. 1164 """ 1165 1166 usage = {} 1167 1168 # Record path-based dependencies. 1169 1170 for key in all_depends.keys(): 1171 usage[key] = set() 1172 1173 for key, depends in all_depends.items(): 1174 for depend in depends: 1175 init_item(usage, depend, set) 1176 usage[depend].add(key) 1177 1178 return usage 1179 1180 def transfer_dependencies(key, all_depends, usage, ordered): 1181 1182 """ 1183 Transfer items needed by 'key' to those items needing 'key', found using 1184 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1185 collection of dependencies. 1186 1187 If "A is needed by X" and "B is needed by A", then transferring items needed 1188 by A will cause "B is needed by X" to be recorded as a consequence. 1189 1190 Transferring items also needs to occur in the reverse mapping, so that 1191 "A needs B" and "X needs A", then the consequence must be recorded as 1192 "X needs B". 1193 """ 1194 1195 ordered.insert(0, key) 1196 1197 needing = usage[key] # A is needed by X 1198 needed = all_depends.get(key) # A needs B 1199 1200 if needing: 1201 for depend in needing: 1202 l = all_depends.get(depend) 1203 if not l: 1204 continue 1205 1206 l.remove(key) # X needs (A) 1207 1208 if needed: 1209 l.update(needed) # X needs B... 1210 1211 # Prevent self references. 1212 1213 if depend in needed: 1214 l.remove(depend) 1215 1216 if needed: 1217 for depend in needed: 1218 l = usage.get(depend) 1219 if not l: 1220 continue 1221 1222 l.remove(key) # B is needed by (A) 1223 l.update(needing) # B is needed by X... 1224 1225 # Prevent self references. 1226 1227 if depend in needing: 1228 l.remove(depend) 1229 1230 if needed: 1231 del all_depends[key] 1232 del usage[key] 1233 1234 def remove_dependency(key, all_depends, usage, ordered): 1235 1236 """ 1237 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1238 'ordered' collection of dependencies. 1239 1240 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1241 upon removing A from 'usage', the outcome is that all keys needing A will 1242 have A removed from their 'usage' records. 1243 1244 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1245 as a consequence. 1246 """ 1247 1248 ordered.insert(0, key) 1249 1250 depends = all_depends.get(key) 1251 1252 # Reduce usage of the referenced items. 1253 1254 if depends: 1255 for depend in depends: 1256 usage[depend].remove(key) 1257 1258 del usage[key] 1259 1260 # General input/output. 1261 1262 def readfile(filename): 1263 1264 "Return the contents of 'filename'." 1265 1266 f = open(filename) 1267 try: 1268 return f.read() 1269 finally: 1270 f.close() 1271 1272 def writefile(filename, s): 1273 1274 "Write to 'filename' the string 's'." 1275 1276 f = open(filename, "w") 1277 try: 1278 f.write(s) 1279 finally: 1280 f.close() 1281 1282 # General encoding. 1283 1284 def sorted_output(x): 1285 1286 "Sort sequence 'x' and return a string with commas separating the values." 1287 1288 x = map(str, x) 1289 x.sort() 1290 return ", ".join(x) 1291 1292 def get_string_details(literals, encoding): 1293 1294 """ 1295 Determine whether 'literals' represent Unicode strings or byte strings, 1296 using 'encoding' to reproduce byte sequences. 1297 1298 Each literal is the full program representation including prefix and quotes 1299 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1300 string needs to be translated back to its original encoding. 1301 1302 Return a single encoded literal value, a type name, and the original 1303 encoding as a tuple. 1304 """ 1305 1306 typename = "unicode" 1307 1308 l = [] 1309 1310 for s in literals: 1311 out, _typename = get_literal_details(s) 1312 if _typename == "str": 1313 typename = "str" 1314 l.append(out) 1315 1316 out = "".join(l) 1317 1318 # For Unicode values, convert to the UTF-8 program representation. 1319 1320 if typename == "unicode": 1321 return out.encode("utf-8"), typename, encoding 1322 1323 # For byte string values, convert back to the original encoding. 1324 1325 else: 1326 return out.encode(encoding), typename, encoding 1327 1328 def get_literal_details(s): 1329 1330 """ 1331 Determine whether 's' represents a Unicode string or a byte string, where 1332 's' contains the full program representation of a literal including prefix 1333 and quotes, recoded by the parser to UTF-8. 1334 1335 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1336 and byte or Unicode values starting with <backslash><octal digit> or 1337 <backslash>x. 1338 1339 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1340 to be considered as Unicode values. Otherwise, they produce byte values and 1341 cause unprefixed strings to be considered as byte strings. 1342 1343 Literals prefixed with "r" do not have their backslash-encoded values 1344 converted unless also prefixed with "u", in which case only the above value 1345 formats are converted, not any of the other special sequences for things 1346 like newlines. 1347 1348 Return the literal value as a Unicode object together with the appropriate 1349 type name in a tuple. 1350 """ 1351 1352 l = [] 1353 1354 # Identify the quote character and use it to identify the prefix. 1355 1356 quote_type = s[-1] 1357 prefix_end = s.find(quote_type) 1358 prefix = s[:prefix_end].lower() 1359 1360 if prefix not in ("", "b", "br", "r", "u", "ur"): 1361 raise ValueError, "String literal does not have a supported prefix: %s" % s 1362 1363 if "b" in prefix: 1364 typename = "str" 1365 else: 1366 typename = "unicode" 1367 1368 # Identify triple quotes or single quotes. 1369 1370 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1371 quote = s[prefix_end:prefix_end+3] 1372 current = prefix_end + 3 1373 end = len(s) - 3 1374 else: 1375 quote = s[prefix_end] 1376 current = prefix_end + 1 1377 end = len(s) - 1 1378 1379 # Conversions of some quoted values. 1380 1381 searches = { 1382 "u" : (6, 16), 1383 "U" : (10, 16), 1384 "x" : (4, 16), 1385 } 1386 1387 octal_digits = map(str, range(0, 8)) 1388 1389 # Translations of some quoted values. 1390 1391 escaped = { 1392 "\\" : "\\", "'" : "'", '"' : '"', 1393 "a" : "\a", "b" : "\b", "f" : "\f", 1394 "n" : "\n", "r" : "\r", "t" : "\t", 1395 } 1396 1397 while current < end: 1398 1399 # Look for quoted values. 1400 1401 index = s.find("\\", current) 1402 if index == -1 or index + 1 == end: 1403 l.append(s[current:end]) 1404 break 1405 1406 # Add the preceding text. 1407 1408 l.append(s[current:index]) 1409 1410 # Handle quoted text. 1411 1412 term = s[index+1] 1413 1414 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1415 # produce Unicode values. 1416 1417 if typename == "unicode" and ( 1418 term in ("u", "U") or 1419 "u" in prefix and (term == "x" or term in octal_digits)): 1420 1421 needed, base = searches.get(term, (4, 8)) 1422 value = convert_quoted_value(s, index, needed, end, base, unichr) 1423 l.append(value) 1424 current = index + needed 1425 1426 # Add raw byte values, changing the string type. 1427 1428 elif "r" not in prefix and ( 1429 term == "x" or term in octal_digits): 1430 1431 needed, base = searches.get(term, (4, 8)) 1432 value = convert_quoted_value(s, index, needed, end, base, chr) 1433 l.append(value) 1434 typename = "str" 1435 current = index + needed 1436 1437 # Add other escaped values. 1438 1439 elif "r" not in prefix and escaped.has_key(term): 1440 l.append(escaped[term]) 1441 current = index + 2 1442 1443 # Add other text as found. 1444 1445 else: 1446 l.append(s[index:index+2]) 1447 current = index + 2 1448 1449 # Collect the components into a single Unicode object. Since the literal 1450 # text was already in UTF-8 form, interpret plain strings as UTF-8 1451 # sequences. 1452 1453 out = [] 1454 1455 for value in l: 1456 if isinstance(value, unicode): 1457 out.append(value) 1458 else: 1459 out.append(unicode(value, "utf-8")) 1460 1461 return "".join(out), typename 1462 1463 def convert_quoted_value(s, index, needed, end, base, fn): 1464 1465 """ 1466 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1467 positions, and with the given 'end' indicating the first position after the 1468 end of the actual string content. 1469 1470 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1471 to convert the value to an appropriate type. 1472 """ 1473 1474 s = s[index:min(index+needed, end)] 1475 1476 # Not a complete occurrence. 1477 1478 if len(s) < needed: 1479 return s 1480 1481 # Test for a well-formed value. 1482 1483 try: 1484 first = base == 8 and 1 or 2 1485 value = int(s[first:needed], base) 1486 except ValueError: 1487 return s 1488 else: 1489 return fn(value) 1490 1491 # Attribute chain decoding. 1492 1493 def get_attrnames(attrnames): 1494 1495 """ 1496 Split the qualified attribute chain 'attrnames' into its components, 1497 handling special attributes starting with "#" that indicate type 1498 conformance. 1499 """ 1500 1501 if attrnames.startswith("#"): 1502 return [attrnames] 1503 else: 1504 return attrnames.split(".") 1505 1506 def get_name_path(path, name): 1507 1508 "Return a suitable qualified name from the given 'path' and 'name'." 1509 1510 if "." in name: 1511 return name 1512 else: 1513 return "%s.%s" % (path, name) 1514 1515 # Usage-related functions. 1516 1517 def get_types_for_usage(attrnames, objects): 1518 1519 """ 1520 Identify the types that can support the given 'attrnames', using the 1521 given 'objects' as the catalogue of type details. 1522 """ 1523 1524 types = [] 1525 for name, _attrnames in objects.items(): 1526 if set(attrnames).issubset(_attrnames): 1527 types.append(name) 1528 return types 1529 1530 def get_invoked_attributes(usage): 1531 1532 "Obtain invoked attribute from the given 'usage'." 1533 1534 invoked = [] 1535 if usage: 1536 for attrname, invocation, assignment in usage: 1537 if invocation: 1538 invoked.append(attrname) 1539 return invoked 1540 1541 def get_assigned_attributes(usage): 1542 1543 "Obtain assigned attribute from the given 'usage'." 1544 1545 assigned = [] 1546 if usage: 1547 for attrname, invocation, assignment in usage: 1548 if assignment: 1549 assigned.append(attrname) 1550 return assigned 1551 1552 # Type and module functions. 1553 # NOTE: This makes assumptions about the __builtins__ structure. 1554 1555 def get_builtin_module(name): 1556 1557 "Return the module name containing the given type 'name'." 1558 1559 if name == "string": 1560 modname = "str" 1561 elif name == "utf8string": 1562 modname = "unicode" 1563 elif name == "NoneType": 1564 modname = "none" 1565 else: 1566 modname = name 1567 1568 return "__builtins__.%s" % modname 1569 1570 def get_builtin_type(name): 1571 1572 "Return the type name provided by the given Python value 'name'." 1573 1574 if name == "str": 1575 return "string" 1576 elif name == "unicode": 1577 return "utf8string" 1578 else: 1579 return name 1580 1581 def get_builtin_class(name): 1582 1583 "Return the full name of the built-in class having the given 'name'." 1584 1585 typename = get_builtin_type(name) 1586 module = get_builtin_module(typename) 1587 return "%s.%s" % (module, typename) 1588 1589 # Useful data. 1590 1591 predefined_constants = "False", "None", "NotImplemented", "True" 1592 1593 unary_operator_functions = { 1594 1595 # Unary operations. 1596 1597 "Invert" : "invert", 1598 "UnaryAdd" : "pos", 1599 "UnarySub" : "neg", 1600 } 1601 1602 operator_functions = { 1603 1604 # Fundamental operations. 1605 1606 "is" : "is_", 1607 "is not" : "is_not", 1608 1609 # Binary operations. 1610 1611 "in" : "in_", 1612 "not in" : "not_in", 1613 "Add" : "add", 1614 "Bitand" : "and_", 1615 "Bitor" : "or_", 1616 "Bitxor" : "xor", 1617 "Div" : "div", 1618 "FloorDiv" : "floordiv", 1619 "LeftShift" : "lshift", 1620 "Mod" : "mod", 1621 "Mul" : "mul", 1622 "Power" : "pow", 1623 "RightShift" : "rshift", 1624 "Sub" : "sub", 1625 1626 # Augmented assignment. 1627 1628 "+=" : "iadd", 1629 "-=" : "isub", 1630 "*=" : "imul", 1631 "/=" : "idiv", 1632 "//=" : "ifloordiv", 1633 "%=" : "imod", 1634 "**=" : "ipow", 1635 "<<=" : "ilshift", 1636 ">>=" : "irshift", 1637 "&=" : "iand", 1638 "^=" : "ixor", 1639 "|=" : "ior", 1640 1641 # Comparisons. 1642 1643 "==" : "eq", 1644 "!=" : "ne", 1645 "<" : "lt", 1646 "<=" : "le", 1647 ">=" : "ge", 1648 ">" : "gt", 1649 } 1650 1651 operator_functions.update(unary_operator_functions) 1652 1653 # vim: tabstop=4 expandtab shiftwidth=4