1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 if recorded_details != details: 45 self.remove_output() 46 47 writefile(self.get_output_details_filename(), details) 48 49 def get_output_details_filename(self): 50 51 "Return the output details filename." 52 53 return join(self.output, "$details") 54 55 def get_output_details(self): 56 57 "Return details of the existing output." 58 59 details_filename = self.get_output_details_filename() 60 61 if not exists(details_filename): 62 return None 63 else: 64 return readfile(details_filename) 65 66 def remove_output(self, dirname=None): 67 68 "Remove the output." 69 70 dirname = dirname or self.output 71 72 for filename in listdir(dirname): 73 path = join(dirname, filename) 74 if isdir(path): 75 self.remove_output(path) 76 else: 77 remove(path) 78 79 def copy(source, target, only_if_newer=True): 80 81 "Copy a text file from 'source' to 'target'." 82 83 if isdir(target): 84 target = join(target, split(source)[-1]) 85 86 if only_if_newer and not is_newer(source, target): 87 return 88 89 infile = open(source) 90 outfile = open(target, "w") 91 92 try: 93 outfile.write(infile.read()) 94 finally: 95 outfile.close() 96 infile.close() 97 98 def is_newer(source, target): 99 100 "Return whether 'source' is newer than 'target'." 101 102 if exists(target): 103 target_mtime = getmtime(target) 104 source_mtime = getmtime(source) 105 return source_mtime > target_mtime 106 107 return True 108 109 class CommonModule: 110 111 "A common module representation." 112 113 def __init__(self, name, importer): 114 115 """ 116 Initialise this module with the given 'name' and an 'importer' which is 117 used to provide access to other modules when required. 118 """ 119 120 self.name = name 121 self.importer = importer 122 self.filename = None 123 124 # Inspection-related attributes. 125 126 self.astnode = None 127 self.encoding = None 128 self.iterators = {} 129 self.temp = {} 130 self.lambdas = {} 131 132 # Constants, literals and values. 133 134 self.constants = {} 135 self.constant_values = {} 136 self.literals = {} 137 self.literal_types = {} 138 139 # Nested namespaces. 140 141 self.namespace_path = [] 142 self.in_function = False 143 144 # Retain the assignment value expression and track invocations. 145 146 self.in_assignment = None 147 self.in_invocation = None 148 149 # Attribute chain state management. 150 151 self.attrs = [] 152 self.chain_assignment = [] 153 self.chain_invocation = [] 154 155 def __repr__(self): 156 return "CommonModule(%r, %r)" % (self.name, self.importer) 157 158 def parse_file(self, filename): 159 160 "Parse the file with the given 'filename', initialising attributes." 161 162 self.filename = filename 163 164 # Use the Transformer directly to obtain encoding information. 165 166 t = Transformer() 167 f = open(filename) 168 169 try: 170 self.astnode = t.parsesuite(f.read() + "\n") 171 self.encoding = t.encoding 172 finally: 173 f.close() 174 175 # Module-relative naming. 176 177 def get_global_path(self, name): 178 return "%s.%s" % (self.name, name) 179 180 def get_namespace_path(self): 181 return ".".join([self.name] + self.namespace_path) 182 183 def get_object_path(self, name): 184 return ".".join([self.name] + self.namespace_path + [name]) 185 186 def get_parent_path(self): 187 return ".".join([self.name] + self.namespace_path[:-1]) 188 189 # Namespace management. 190 191 def enter_namespace(self, name): 192 193 "Enter the namespace having the given 'name'." 194 195 self.namespace_path.append(name) 196 197 def exit_namespace(self): 198 199 "Exit the current namespace." 200 201 self.namespace_path.pop() 202 203 # Constant reference naming. 204 205 def get_constant_name(self, value, value_type, encoding=None): 206 207 """ 208 Add a new constant to the current namespace for 'value' with 209 'value_type'. 210 """ 211 212 path = self.get_namespace_path() 213 init_item(self.constants, path, dict) 214 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 215 216 # Literal reference naming. 217 218 def get_literal_name(self): 219 220 "Add a new literal to the current namespace." 221 222 path = self.get_namespace_path() 223 init_item(self.literals, path, lambda: 0) 224 return "$C%d" % self.literals[path] 225 226 def next_literal(self): 227 self.literals[self.get_namespace_path()] += 1 228 229 # Temporary iterator naming. 230 231 def get_iterator_path(self): 232 return self.in_function and self.get_namespace_path() or self.name 233 234 def get_iterator_name(self): 235 path = self.get_iterator_path() 236 init_item(self.iterators, path, lambda: 0) 237 return "$i%d" % self.iterators[path] 238 239 def next_iterator(self): 240 self.iterators[self.get_iterator_path()] += 1 241 242 # Temporary variable naming. 243 244 def get_temporary_name(self): 245 path = self.get_namespace_path() 246 init_item(self.temp, path, lambda: 0) 247 return "$t%d" % self.temp[path] 248 249 def next_temporary(self): 250 self.temp[self.get_namespace_path()] += 1 251 252 # Arbitrary function naming. 253 254 def get_lambda_name(self): 255 path = self.get_namespace_path() 256 init_item(self.lambdas, path, lambda: 0) 257 name = "$l%d" % self.lambdas[path] 258 self.lambdas[path] += 1 259 return name 260 261 def reset_lambdas(self): 262 self.lambdas = {} 263 264 # Constant and literal recording. 265 266 def get_constant_value(self, value, literals=None): 267 268 """ 269 Encode the 'value' if appropriate, returning a value, a typename and any 270 encoding. 271 """ 272 273 if isinstance(value, unicode): 274 return value.encode("utf-8"), "unicode", self.encoding 275 276 # Attempt to convert plain strings to text. 277 278 elif isinstance(value, str) and self.encoding: 279 try: 280 return get_string_details(literals, self.encoding) 281 except UnicodeDecodeError: 282 pass 283 284 return value, value.__class__.__name__, None 285 286 def get_constant_reference(self, ref, value, encoding=None): 287 288 """ 289 Return a constant reference for the given 'ref' type and 'value', with 290 the optional 'encoding' applying to text values. 291 """ 292 293 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 294 295 # Return a reference for the constant. 296 297 objpath = self.get_object_path(constant_name) 298 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 299 300 # Record the value and type for the constant. 301 302 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 303 return name_ref 304 305 def reserve_constant(self, objpath, value, origin, encoding=None): 306 307 """ 308 Reserve a constant within 'objpath' with the given 'value' and having a 309 type with the given 'origin', with the optional 'encoding' applying to 310 text values. 311 """ 312 313 constant_name = self.get_constant_name(value, origin) 314 objpath = self.get_object_path(constant_name) 315 self._reserve_constant(objpath, value, origin, encoding) 316 317 def _reserve_constant(self, objpath, value, origin, encoding): 318 319 """ 320 Store a constant for 'objpath' with the given 'value' and 'origin', with 321 the optional 'encoding' applying to text values. 322 """ 323 324 self.constant_values[objpath] = value, origin, encoding 325 326 def get_literal_reference(self, name, ref, items, cls): 327 328 """ 329 Return a literal reference for the given type 'name', literal 'ref', 330 node 'items' and employing the given 'cls' as the class of the returned 331 reference object. 332 """ 333 334 # Construct an invocation using the items as arguments. 335 336 typename = "$L%s" % name 337 338 invocation = compiler.ast.CallFunc( 339 compiler.ast.Name(typename), 340 items 341 ) 342 343 # Get a name for the actual literal. 344 345 instname = self.get_literal_name() 346 self.next_literal() 347 348 # Record the type for the literal. 349 350 objpath = self.get_object_path(instname) 351 self.literal_types[objpath] = ref.get_origin() 352 353 # Return a wrapper for the invocation exposing the items. 354 355 return cls( 356 instname, 357 ref.instance_of(), 358 self.process_structure_node(invocation), 359 invocation.args 360 ) 361 362 # Node handling. 363 364 def process_structure(self, node): 365 366 """ 367 Within the given 'node', process the program structure. 368 369 During inspection, this will process global declarations, adjusting the 370 module namespace, and import statements, building a module dependency 371 hierarchy. 372 373 During translation, this will consult deduced program information and 374 output translated code. 375 """ 376 377 l = [] 378 for n in node.getChildNodes(): 379 l.append(self.process_structure_node(n)) 380 return l 381 382 def process_augassign_node(self, n): 383 384 "Process the given augmented assignment node 'n'." 385 386 op = operator_functions[n.op] 387 388 if isinstance(n.node, compiler.ast.Getattr): 389 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 390 elif isinstance(n.node, compiler.ast.Name): 391 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 392 else: 393 target = n.node 394 395 assignment = compiler.ast.Assign( 396 [target], 397 compiler.ast.CallFunc( 398 compiler.ast.Name("$op%s" % op), 399 [n.node, n.expr])) 400 401 return self.process_structure_node(assignment) 402 403 def process_assignment_for_object(self, original_name, source): 404 405 """ 406 Return an assignment operation making 'original_name' refer to the given 407 'source'. 408 """ 409 410 assignment = compiler.ast.Assign( 411 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 412 source 413 ) 414 415 return self.process_structure_node(assignment) 416 417 def process_assignment_node_items(self, n, expr): 418 419 """ 420 Process the given assignment node 'n' whose children are to be assigned 421 items of 'expr'. 422 """ 423 424 name_ref = self.process_structure_node(expr) 425 426 # Either unpack the items and present them directly to each assignment 427 # node. 428 429 if isinstance(name_ref, LiteralSequenceRef) and \ 430 self.process_literal_sequence_items(n, name_ref): 431 432 pass 433 434 # Or have the assignment nodes access each item via the sequence API. 435 436 else: 437 self.process_assignment_node_items_by_position(n, expr, name_ref) 438 439 def process_assignment_node_items_by_position(self, n, expr, name_ref): 440 441 """ 442 Process the given sequence assignment node 'n', converting the node to 443 the separate assignment of each target using positional access on a 444 temporary variable representing the sequence. Use 'expr' as the assigned 445 value and 'name_ref' as the reference providing any existing temporary 446 variable. 447 """ 448 449 assignments = [] 450 451 # Employ existing names to access the sequence. 452 # Literal sequences do not provide names of accessible objects. 453 454 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 455 temp = name_ref.name 456 457 # For other expressions, create a temporary name to reference the items. 458 459 else: 460 temp = self.get_temporary_name() 461 self.next_temporary() 462 463 assignments.append( 464 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 465 ) 466 467 # Assign the items to the target nodes. 468 469 for i, node in enumerate(n.nodes): 470 assignments.append( 471 compiler.ast.Assign([node], compiler.ast.Subscript( 472 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 473 ) 474 475 return self.process_structure_node(compiler.ast.Stmt(assignments)) 476 477 def process_literal_sequence_items(self, n, name_ref): 478 479 """ 480 Process the given assignment node 'n', obtaining from the given 481 'name_ref' the items to be assigned to the assignment targets. 482 483 Return whether this method was able to process the assignment node as 484 a sequence of direct assignments. 485 """ 486 487 if len(n.nodes) == len(name_ref.items): 488 assigned_names, count = get_names_from_nodes(n.nodes) 489 accessed_names, _count = get_names_from_nodes(name_ref.items) 490 491 # Only assign directly between items if all assigned names are 492 # plain names (not attribute assignments), and if the assigned names 493 # do not appear in the accessed names. 494 495 if len(assigned_names) == count and \ 496 not assigned_names.intersection(accessed_names): 497 498 for node, item in zip(n.nodes, name_ref.items): 499 self.process_assignment_node(node, item) 500 501 return True 502 503 # Otherwise, use the position-based mechanism to obtain values. 504 505 else: 506 return False 507 else: 508 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 509 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 510 511 def process_compare_node(self, n): 512 513 """ 514 Process the given comparison node 'n', converting an operator sequence 515 from... 516 517 <expr1> <op1> <expr2> <op2> <expr3> 518 519 ...to... 520 521 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 522 """ 523 524 invocations = [] 525 last = n.expr 526 527 for op, op_node in n.ops: 528 op = operator_functions.get(op) 529 530 invocations.append(compiler.ast.CallFunc( 531 compiler.ast.Name("$op%s" % op), 532 [last, op_node])) 533 534 last = op_node 535 536 if len(invocations) > 1: 537 result = compiler.ast.And(invocations) 538 else: 539 result = invocations[0] 540 541 return self.process_structure_node(result) 542 543 def process_dict_node(self, node): 544 545 """ 546 Process the given dictionary 'node', returning a list of (key, value) 547 tuples. 548 """ 549 550 l = [] 551 for key, value in node.items: 552 l.append(( 553 self.process_structure_node(key), 554 self.process_structure_node(value))) 555 return l 556 557 def process_for_node(self, n): 558 559 """ 560 Generate attribute accesses for {n.list}.__iter__ and the next method on 561 the iterator, producing a replacement node for the original. 562 """ 563 564 node = compiler.ast.Stmt([ 565 566 # <next> = {n.list}.__iter__().next 567 568 compiler.ast.Assign( 569 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")], 570 compiler.ast.Getattr( 571 compiler.ast.CallFunc( 572 compiler.ast.Getattr(n.list, "__iter__"), 573 [] 574 ), "next")), 575 576 # try: 577 # while True: 578 # <var>... = <next>() 579 # ... 580 # except StopIteration: 581 # pass 582 583 compiler.ast.TryExcept( 584 compiler.ast.While( 585 compiler.ast.Name("True"), 586 compiler.ast.Stmt([ 587 compiler.ast.Assign( 588 [n.assign], 589 compiler.ast.CallFunc( 590 compiler.ast.Name(self.get_iterator_name()), 591 [] 592 )), 593 n.body]), 594 None), 595 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 596 None) 597 ]) 598 599 self.next_iterator() 600 self.process_structure_node(node) 601 602 def process_literal_sequence_node(self, n, name, ref, cls): 603 604 """ 605 Process the given literal sequence node 'n' as a function invocation, 606 with 'name' indicating the type of the sequence, and 'ref' being a 607 reference to the type. The 'cls' is used to instantiate a suitable name 608 reference. 609 """ 610 611 if name == "dict": 612 items = [] 613 for key, value in n.items: 614 items.append(compiler.ast.Tuple([key, value])) 615 else: # name in ("list", "tuple"): 616 items = n.nodes 617 618 return self.get_literal_reference(name, ref, items, cls) 619 620 def process_operator_node(self, n): 621 622 """ 623 Process the given operator node 'n' as an operator function invocation. 624 """ 625 626 op = operator_functions[n.__class__.__name__] 627 invocation = compiler.ast.CallFunc( 628 compiler.ast.Name("$op%s" % op), 629 list(n.getChildNodes()) 630 ) 631 return self.process_structure_node(invocation) 632 633 def process_print_node(self, n): 634 635 """ 636 Process the given print node 'n' as an invocation on a stream of the 637 form... 638 639 $print(dest, args, nl) 640 641 The special function name will be translated elsewhere. 642 """ 643 644 nl = isinstance(n, compiler.ast.Printnl) 645 invocation = compiler.ast.CallFunc( 646 compiler.ast.Name("$print"), 647 [n.dest or compiler.ast.Name("None"), 648 compiler.ast.List(list(n.nodes)), 649 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 650 ) 651 return self.process_structure_node(invocation) 652 653 def process_slice_node(self, n, expr=None): 654 655 """ 656 Process the given slice node 'n' as an operator function invocation. 657 """ 658 659 if n.flags == "OP_ASSIGN": op = "setslice" 660 elif n.flags == "OP_DELETE": op = "delslice" 661 else: op = "getslice" 662 663 invocation = compiler.ast.CallFunc( 664 compiler.ast.Name("$op%s" % op), 665 [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 666 (expr and [expr] or []) 667 ) 668 669 # Fix parse tree structure. 670 671 if op == "delslice": 672 invocation = compiler.ast.Discard(invocation) 673 674 return self.process_structure_node(invocation) 675 676 def process_sliceobj_node(self, n): 677 678 """ 679 Process the given slice object node 'n' as a slice constructor. 680 """ 681 682 op = "slice" 683 invocation = compiler.ast.CallFunc( 684 compiler.ast.Name("$op%s" % op), 685 n.nodes 686 ) 687 return self.process_structure_node(invocation) 688 689 def process_subscript_node(self, n, expr=None): 690 691 """ 692 Process the given subscript node 'n' as an operator function invocation. 693 """ 694 695 if n.flags == "OP_ASSIGN": op = "setitem" 696 elif n.flags == "OP_DELETE": op = "delitem" 697 else: op = "getitem" 698 699 invocation = compiler.ast.CallFunc( 700 compiler.ast.Name("$op%s" % op), 701 [n.expr] + list(n.subs) + (expr and [expr] or []) 702 ) 703 704 # Fix parse tree structure. 705 706 if op == "delitem": 707 invocation = compiler.ast.Discard(invocation) 708 709 return self.process_structure_node(invocation) 710 711 def process_attribute_chain(self, n): 712 713 """ 714 Process the given attribute access node 'n'. Return a reference 715 describing the expression. 716 """ 717 718 # AssAttr/Getattr are nested with the outermost access being the last 719 # access in any chain. 720 721 self.attrs.insert(0, n.attrname) 722 attrs = self.attrs 723 724 # Break attribute chains where non-access nodes are found. 725 726 if not self.have_access_expression(n): 727 self.reset_attribute_chain() 728 729 # Descend into the expression, extending backwards any existing chain, 730 # or building another for the expression. 731 732 name_ref = self.process_structure_node(n.expr) 733 734 # Restore chain information applying to this node. 735 736 if not self.have_access_expression(n): 737 self.restore_attribute_chain(attrs) 738 739 # Return immediately if the expression was another access and thus a 740 # continuation backwards along the chain. The above processing will 741 # have followed the chain all the way to its conclusion. 742 743 if self.have_access_expression(n): 744 del self.attrs[0] 745 746 return name_ref 747 748 # Attribute chain handling. 749 750 def reset_attribute_chain(self): 751 752 "Reset the attribute chain for a subexpression of an attribute access." 753 754 self.attrs = [] 755 self.chain_assignment.append(self.in_assignment) 756 self.chain_invocation.append(self.in_invocation) 757 self.in_assignment = None 758 self.in_invocation = None 759 760 def restore_attribute_chain(self, attrs): 761 762 "Restore the attribute chain for an attribute access." 763 764 self.attrs = attrs 765 self.in_assignment = self.chain_assignment.pop() 766 self.in_invocation = self.chain_invocation.pop() 767 768 def have_access_expression(self, node): 769 770 "Return whether the expression associated with 'node' is Getattr." 771 772 return isinstance(node.expr, compiler.ast.Getattr) 773 774 def get_name_for_tracking(self, name, name_ref=None): 775 776 """ 777 Return the name to be used for attribute usage observations involving 778 the given 'name' in the current namespace. 779 780 If the name is being used outside a function, and if 'name_ref' is 781 given, a path featuring the name in the global namespace is returned 782 where 'name_ref' indicates a global. Otherwise, a path computed using 783 the current namespace and the given name is returned. 784 785 The intention of this method is to provide a suitably-qualified name 786 that can be tracked across namespaces. Where globals are being 787 referenced in class namespaces, they should be referenced using their 788 path within the module, not using a path within each class. 789 790 It may not be possible to identify a global within a function at the 791 time of inspection (since a global may appear later in a file). 792 Consequently, globals are identified by their local name rather than 793 their module-qualified path. 794 """ 795 796 # For functions, use the appropriate local names. 797 798 if self.in_function: 799 return name 800 801 # For global names outside functions, use a global name. 802 803 elif name_ref and name_ref.is_global_name(): 804 return self.get_global_path(name) 805 806 # Otherwise, establish a name in the current namespace. 807 808 else: 809 return self.get_object_path(name) 810 811 def get_path_for_access(self): 812 813 "Outside functions, register accesses at the module level." 814 815 if not self.in_function: 816 return self.name 817 else: 818 return self.get_namespace_path() 819 820 def get_module_name(self, node): 821 822 """ 823 Using the given From 'node' in this module, calculate any relative import 824 information, returning a tuple containing a module to import along with any 825 names to import based on the node's name information. 826 827 Where the returned module is given as None, whole module imports should 828 be performed for the returned modules using the returned names. 829 """ 830 831 # Absolute import. 832 833 if node.level == 0: 834 return node.modname, node.names 835 836 # Relative to an ancestor of this module. 837 838 else: 839 path = self.name.split(".") 840 level = node.level 841 842 # Relative imports treat package roots as submodules. 843 844 if split(self.filename)[-1] == "__init__.py": 845 level -= 1 846 847 if level > len(path): 848 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 849 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 850 851 basename = ".".join(path[:len(path)-level]) 852 853 # Name imports from a module. 854 855 if node.modname: 856 return "%s.%s" % (basename, node.modname), node.names 857 858 # Relative whole module imports. 859 860 else: 861 return basename, node.names 862 863 def get_argnames(args): 864 865 """ 866 Return a list of all names provided by 'args'. Since tuples may be 867 employed, the arguments are traversed depth-first. 868 """ 869 870 l = [] 871 for arg in args: 872 if isinstance(arg, tuple): 873 l += get_argnames(arg) 874 else: 875 l.append(arg) 876 return l 877 878 def get_names_from_nodes(nodes): 879 880 """ 881 Return the names employed in the given 'nodes' along with the number of 882 nodes excluding sequences. 883 """ 884 885 names = set() 886 count = 0 887 888 for node in nodes: 889 890 # Add names and count them. 891 892 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 893 names.add(node.name) 894 count += 1 895 896 # Add names from sequences and incorporate their counts. 897 898 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 899 compiler.ast.List, compiler.ast.Set, 900 compiler.ast.Tuple)): 901 _names, _count = get_names_from_nodes(node.nodes) 902 names.update(_names) 903 count += _count 904 905 # Count non-name, non-sequence nodes. 906 907 else: 908 count += 1 909 910 return names, count 911 912 # Result classes. 913 914 class InstructionSequence: 915 916 "A generic sequence of instructions." 917 918 def __init__(self, instructions): 919 self.instructions = instructions 920 921 def get_value_instruction(self): 922 return self.instructions[-1] 923 924 def get_init_instructions(self): 925 return self.instructions[:-1] 926 927 # Dictionary utilities. 928 929 def init_item(d, key, fn): 930 931 """ 932 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 933 value where no entry already exists. 934 """ 935 936 if not d.has_key(key): 937 d[key] = fn() 938 return d[key] 939 940 def dict_for_keys(d, keys): 941 942 "Return a new dictionary containing entries from 'd' for the given 'keys'." 943 944 nd = {} 945 for key in keys: 946 if d.has_key(key): 947 nd[key] = d[key] 948 return nd 949 950 def make_key(s): 951 952 "Make sequence 's' into a tuple-based key, first sorting its contents." 953 954 l = list(s) 955 l.sort() 956 return tuple(l) 957 958 def add_counter_item(d, key): 959 960 """ 961 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 962 maintaining a mapping of keys to their order of insertion. 963 """ 964 965 if not d.has_key(key): 966 d[key] = len(d.keys()) 967 return d[key] 968 969 def remove_items(d1, d2): 970 971 "Remove from 'd1' all items from 'd2'." 972 973 for key in d2.keys(): 974 if d1.has_key(key): 975 del d1[key] 976 977 # Set utilities. 978 979 def first(s): 980 return list(s)[0] 981 982 def same(s1, s2): 983 return set(s1) == set(s2) 984 985 # General input/output. 986 987 def readfile(filename): 988 989 "Return the contents of 'filename'." 990 991 f = open(filename) 992 try: 993 return f.read() 994 finally: 995 f.close() 996 997 def writefile(filename, s): 998 999 "Write to 'filename' the string 's'." 1000 1001 f = open(filename, "w") 1002 try: 1003 f.write(s) 1004 finally: 1005 f.close() 1006 1007 # General encoding. 1008 1009 def sorted_output(x): 1010 1011 "Sort sequence 'x' and return a string with commas separating the values." 1012 1013 x = map(str, x) 1014 x.sort() 1015 return ", ".join(x) 1016 1017 def get_string_details(literals, encoding): 1018 1019 """ 1020 Determine whether 'literals' represent Unicode strings or byte strings, 1021 using 'encoding' to reproduce byte sequences. 1022 1023 Each literal is the full program representation including prefix and quotes 1024 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1025 string needs to be translated back to its original encoding. 1026 1027 Return a single encoded literal value, a type name, and the original 1028 encoding as a tuple. 1029 """ 1030 1031 typename = "unicode" 1032 1033 l = [] 1034 1035 for s in literals: 1036 out, _typename = get_literal_details(s) 1037 if _typename == "str": 1038 typename = "str" 1039 l.append(out) 1040 1041 out = "".join(l) 1042 1043 # For Unicode values, convert to the UTF-8 program representation. 1044 1045 if typename == "unicode": 1046 return out.encode("utf-8"), typename, encoding 1047 1048 # For byte string values, convert back to the original encoding. 1049 1050 else: 1051 return out.encode(encoding), typename, encoding 1052 1053 def get_literal_details(s): 1054 1055 """ 1056 Determine whether 's' represents a Unicode string or a byte string, where 1057 's' contains the full program representation of a literal including prefix 1058 and quotes, recoded by the parser to UTF-8. 1059 1060 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1061 and byte or Unicode values starting with <backslash><octal digit> or 1062 <backslash>x. 1063 1064 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1065 to be considered as Unicode values. Otherwise, they produce byte values and 1066 cause unprefixed strings to be considered as byte strings. 1067 1068 Literals prefixed with "r" do not have their backslash-encoded values 1069 converted unless also prefixed with "u", in which case only the above value 1070 formats are converted, not any of the other special sequences for things 1071 like newlines. 1072 1073 Return the literal value as a Unicode object together with the appropriate 1074 type name in a tuple. 1075 """ 1076 1077 l = [] 1078 1079 # Identify the quote character and use it to identify the prefix. 1080 1081 quote_type = s[-1] 1082 prefix_end = s.find(quote_type) 1083 prefix = s[:prefix_end].lower() 1084 1085 if prefix not in ("", "b", "br", "r", "u", "ur"): 1086 raise ValueError, "String literal does not have a supported prefix: %s" % s 1087 1088 if "b" in prefix: 1089 typename = "str" 1090 else: 1091 typename = "unicode" 1092 1093 # Identify triple quotes or single quotes. 1094 1095 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1096 quote = s[prefix_end:prefix_end+3] 1097 current = prefix_end + 3 1098 end = len(s) - 3 1099 else: 1100 quote = s[prefix_end] 1101 current = prefix_end + 1 1102 end = len(s) - 1 1103 1104 # Conversions of some quoted values. 1105 1106 searches = { 1107 "u" : (6, 16), 1108 "U" : (10, 16), 1109 "x" : (4, 16), 1110 } 1111 1112 octal_digits = map(str, range(0, 8)) 1113 1114 # Translations of some quoted values. 1115 1116 escaped = { 1117 "\\" : "\\", "'" : "'", '"' : '"', 1118 "a" : "\a", "b" : "\b", "f" : "\f", 1119 "n" : "\n", "r" : "\r", "t" : "\t", 1120 } 1121 1122 while current < end: 1123 1124 # Look for quoted values. 1125 1126 index = s.find("\\", current) 1127 if index == -1 or index + 1 == end: 1128 l.append(s[current:end]) 1129 break 1130 1131 # Add the preceding text. 1132 1133 l.append(s[current:index]) 1134 1135 # Handle quoted text. 1136 1137 term = s[index+1] 1138 1139 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1140 # produce Unicode values. 1141 1142 if typename == "unicode" and ( 1143 term in ("u", "U") or 1144 "u" in prefix and (term == "x" or term in octal_digits)): 1145 1146 needed, base = searches.get(term, (4, 8)) 1147 value = convert_quoted_value(s, index, needed, end, base, unichr) 1148 l.append(value) 1149 current = index + needed 1150 1151 # Add raw byte values, changing the string type. 1152 1153 elif "r" not in prefix and ( 1154 term == "x" or term in octal_digits): 1155 1156 needed, base = searches.get(term, (4, 8)) 1157 value = convert_quoted_value(s, index, needed, end, base, chr) 1158 l.append(value) 1159 typename = "str" 1160 current = index + needed 1161 1162 # Add other escaped values. 1163 1164 elif "r" not in prefix and escaped.has_key(term): 1165 l.append(escaped[term]) 1166 current = index + 2 1167 1168 # Add other text as found. 1169 1170 else: 1171 l.append(s[index:index+2]) 1172 current = index + 2 1173 1174 # Collect the components into a single Unicode object. Since the literal 1175 # text was already in UTF-8 form, interpret plain strings as UTF-8 1176 # sequences. 1177 1178 out = [] 1179 1180 for value in l: 1181 if isinstance(value, unicode): 1182 out.append(value) 1183 else: 1184 out.append(unicode(value, "utf-8")) 1185 1186 return "".join(out), typename 1187 1188 def convert_quoted_value(s, index, needed, end, base, fn): 1189 1190 """ 1191 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1192 positions, and with the given 'end' indicating the first position after the 1193 end of the actual string content. 1194 1195 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1196 to convert the value to an appropriate type. 1197 """ 1198 1199 s = s[index:min(index+needed, end)] 1200 1201 # Not a complete occurrence. 1202 1203 if len(s) < needed: 1204 return s 1205 1206 # Test for a well-formed value. 1207 1208 try: 1209 first = base == 8 and 1 or 2 1210 value = int(s[first:needed], base) 1211 except ValueError: 1212 return s 1213 else: 1214 return fn(value) 1215 1216 # Attribute chain decoding. 1217 1218 def get_attrnames(attrnames): 1219 1220 """ 1221 Split the qualified attribute chain 'attrnames' into its components, 1222 handling special attributes starting with "#" that indicate type 1223 conformance. 1224 """ 1225 1226 if attrnames.startswith("#"): 1227 return [attrnames] 1228 else: 1229 return attrnames.split(".") 1230 1231 def get_attrname_from_location(location): 1232 1233 """ 1234 Extract the first attribute from the attribute names employed in a 1235 'location'. 1236 """ 1237 1238 path, name, attrnames, access = location 1239 if not attrnames: 1240 return attrnames 1241 return get_attrnames(attrnames)[0] 1242 1243 def get_name_path(path, name): 1244 1245 "Return a suitable qualified name from the given 'path' and 'name'." 1246 1247 if "." in name: 1248 return name 1249 else: 1250 return "%s.%s" % (path, name) 1251 1252 # Usage-related functions. 1253 1254 def get_types_for_usage(attrnames, objects): 1255 1256 """ 1257 Identify the types that can support the given 'attrnames', using the 1258 given 'objects' as the catalogue of type details. 1259 """ 1260 1261 types = [] 1262 for name, _attrnames in objects.items(): 1263 if set(attrnames).issubset(_attrnames): 1264 types.append(name) 1265 return types 1266 1267 def get_invoked_attributes(usage): 1268 1269 "Obtain invoked attribute from the given 'usage'." 1270 1271 invoked = [] 1272 if usage: 1273 for attrname, invocation, assignment in usage: 1274 if invocation: 1275 invoked.append(attrname) 1276 return invoked 1277 1278 def get_assigned_attributes(usage): 1279 1280 "Obtain assigned attribute from the given 'usage'." 1281 1282 assigned = [] 1283 if usage: 1284 for attrname, invocation, assignment in usage: 1285 if assignment: 1286 assigned.append(attrname) 1287 return assigned 1288 1289 # Type and module functions. 1290 # NOTE: This makes assumptions about the __builtins__ structure. 1291 1292 def get_builtin_module(name): 1293 1294 "Return the module name containing the given type 'name'." 1295 1296 if name == "string": 1297 modname = "str" 1298 elif name == "utf8string": 1299 modname = "unicode" 1300 elif name == "NoneType": 1301 modname = "none" 1302 else: 1303 modname = name 1304 1305 return "__builtins__.%s" % modname 1306 1307 def get_builtin_type(name): 1308 1309 "Return the type name provided by the given Python value 'name'." 1310 1311 if name == "str": 1312 return "string" 1313 elif name == "unicode": 1314 return "utf8string" 1315 else: 1316 return name 1317 1318 def get_builtin_class(name): 1319 1320 "Return the full name of the built-in class having the given 'name'." 1321 1322 typename = get_builtin_type(name) 1323 module = get_builtin_module(typename) 1324 return "%s.%s" % (module, typename) 1325 1326 # Useful data. 1327 1328 predefined_constants = "False", "None", "NotImplemented", "True" 1329 1330 operator_functions = { 1331 1332 # Fundamental operations. 1333 1334 "is" : "is_", 1335 "is not" : "is_not", 1336 1337 # Binary operations. 1338 1339 "in" : "in_", 1340 "not in" : "not_in", 1341 "Add" : "add", 1342 "Bitand" : "and_", 1343 "Bitor" : "or_", 1344 "Bitxor" : "xor", 1345 "Div" : "div", 1346 "FloorDiv" : "floordiv", 1347 "LeftShift" : "lshift", 1348 "Mod" : "mod", 1349 "Mul" : "mul", 1350 "Power" : "pow", 1351 "RightShift" : "rshift", 1352 "Sub" : "sub", 1353 1354 # Unary operations. 1355 1356 "Invert" : "invert", 1357 "UnaryAdd" : "pos", 1358 "UnarySub" : "neg", 1359 1360 # Augmented assignment. 1361 1362 "+=" : "iadd", 1363 "-=" : "isub", 1364 "*=" : "imul", 1365 "/=" : "idiv", 1366 "//=" : "ifloordiv", 1367 "%=" : "imod", 1368 "**=" : "ipow", 1369 "<<=" : "ilshift", 1370 ">>=" : "irshift", 1371 "&=" : "iand", 1372 "^=" : "ixor", 1373 "|=" : "ior", 1374 1375 # Comparisons. 1376 1377 "==" : "eq", 1378 "!=" : "ne", 1379 "<" : "lt", 1380 "<=" : "le", 1381 ">=" : "ge", 1382 ">" : "gt", 1383 } 1384 1385 # vim: tabstop=4 expandtab shiftwidth=4