1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.iterators = {} 133 self.temp = {} 134 self.lambdas = {} 135 136 # Constants, literals and values. 137 138 self.constants = {} 139 self.constant_values = {} 140 self.literals = {} 141 self.literal_types = {} 142 143 # Nested namespaces. 144 145 self.namespace_path = [] 146 self.in_function = False 147 148 # Retain the assignment value expression and track invocations. 149 150 self.in_assignment = None 151 self.in_invocation = None 152 153 # Attribute chain state management. 154 155 self.attrs = [] 156 self.chain_assignment = [] 157 self.chain_invocation = [] 158 159 def __repr__(self): 160 return "CommonModule(%r, %r)" % (self.name, self.importer) 161 162 def parse_file(self, filename): 163 164 "Parse the file with the given 'filename', initialising attributes." 165 166 self.filename = filename 167 168 # Use the Transformer directly to obtain encoding information. 169 170 t = Transformer() 171 f = open(filename) 172 173 try: 174 self.astnode = t.parsesuite(f.read() + "\n") 175 self.encoding = t.encoding 176 finally: 177 f.close() 178 179 # Module-relative naming. 180 181 def get_global_path(self, name): 182 return "%s.%s" % (self.name, name) 183 184 def get_namespace_path(self): 185 return ".".join([self.name] + self.namespace_path) 186 187 def get_object_path(self, name): 188 return ".".join([self.name] + self.namespace_path + [name]) 189 190 def get_parent_path(self): 191 return ".".join([self.name] + self.namespace_path[:-1]) 192 193 # Namespace management. 194 195 def enter_namespace(self, name): 196 197 "Enter the namespace having the given 'name'." 198 199 self.namespace_path.append(name) 200 201 def exit_namespace(self): 202 203 "Exit the current namespace." 204 205 self.namespace_path.pop() 206 207 # Constant reference naming. 208 209 def get_constant_name(self, value, value_type, encoding=None): 210 211 """ 212 Add a new constant to the current namespace for 'value' with 213 'value_type'. 214 """ 215 216 path = self.get_namespace_path() 217 init_item(self.constants, path, dict) 218 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 219 220 # Literal reference naming. 221 222 def get_literal_name(self): 223 224 "Add a new literal to the current namespace." 225 226 path = self.get_namespace_path() 227 init_item(self.literals, path, lambda: 0) 228 return "$C%d" % self.literals[path] 229 230 def next_literal(self): 231 self.literals[self.get_namespace_path()] += 1 232 233 # Temporary iterator naming. 234 235 def get_iterator_path(self): 236 return self.in_function and self.get_namespace_path() or self.name 237 238 def get_iterator_name(self): 239 path = self.get_iterator_path() 240 init_item(self.iterators, path, lambda: 0) 241 return "$i%d" % self.iterators[path] 242 243 def next_iterator(self): 244 self.iterators[self.get_iterator_path()] += 1 245 246 # Temporary variable naming. 247 248 def get_temporary_name(self): 249 path = self.get_namespace_path() 250 init_item(self.temp, path, lambda: 0) 251 return "$t%d" % self.temp[path] 252 253 def next_temporary(self): 254 self.temp[self.get_namespace_path()] += 1 255 256 # Arbitrary function naming. 257 258 def get_lambda_name(self): 259 path = self.get_namespace_path() 260 init_item(self.lambdas, path, lambda: 0) 261 name = "$l%d" % self.lambdas[path] 262 self.lambdas[path] += 1 263 return name 264 265 def reset_lambdas(self): 266 self.lambdas = {} 267 268 # Constant and literal recording. 269 270 def get_constant_value(self, value, literals=None): 271 272 """ 273 Encode the 'value' if appropriate, returning a value, a typename and any 274 encoding. 275 """ 276 277 if isinstance(value, unicode): 278 return value.encode("utf-8"), "unicode", self.encoding 279 280 # Attempt to convert plain strings to text. 281 282 elif isinstance(value, str) and self.encoding: 283 try: 284 return get_string_details(literals, self.encoding) 285 except UnicodeDecodeError: 286 pass 287 288 return value, value.__class__.__name__, None 289 290 def get_constant_reference(self, ref, value, encoding=None): 291 292 """ 293 Return a constant reference for the given 'ref' type and 'value', with 294 the optional 'encoding' applying to text values. 295 """ 296 297 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 298 299 # Return a reference for the constant. 300 301 objpath = self.get_object_path(constant_name) 302 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 303 304 # Record the value and type for the constant. 305 306 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 307 return name_ref 308 309 def reserve_constant(self, objpath, value, origin, encoding=None): 310 311 """ 312 Reserve a constant within 'objpath' with the given 'value' and having a 313 type with the given 'origin', with the optional 'encoding' applying to 314 text values. 315 """ 316 317 constant_name = self.get_constant_name(value, origin) 318 objpath = self.get_object_path(constant_name) 319 self._reserve_constant(objpath, value, origin, encoding) 320 321 def _reserve_constant(self, objpath, value, origin, encoding): 322 323 """ 324 Store a constant for 'objpath' with the given 'value' and 'origin', with 325 the optional 'encoding' applying to text values. 326 """ 327 328 self.constant_values[objpath] = value, origin, encoding 329 330 def get_literal_reference(self, name, ref, items, cls): 331 332 """ 333 Return a literal reference for the given type 'name', literal 'ref', 334 node 'items' and employing the given 'cls' as the class of the returned 335 reference object. 336 """ 337 338 # Construct an invocation using the items as arguments. 339 340 typename = "$L%s" % name 341 342 invocation = compiler.ast.CallFunc( 343 compiler.ast.Name(typename), 344 items 345 ) 346 347 # Get a name for the actual literal. 348 349 instname = self.get_literal_name() 350 self.next_literal() 351 352 # Record the type for the literal. 353 354 objpath = self.get_object_path(instname) 355 self.literal_types[objpath] = ref.get_origin() 356 357 # Return a wrapper for the invocation exposing the items. 358 359 return cls( 360 instname, 361 ref.instance_of(), 362 self.process_structure_node(invocation), 363 invocation.args 364 ) 365 366 # Node handling. 367 368 def process_structure(self, node): 369 370 """ 371 Within the given 'node', process the program structure. 372 373 During inspection, this will process global declarations, adjusting the 374 module namespace, and import statements, building a module dependency 375 hierarchy. 376 377 During translation, this will consult deduced program information and 378 output translated code. 379 """ 380 381 l = [] 382 for n in node.getChildNodes(): 383 l.append(self.process_structure_node(n)) 384 return l 385 386 def process_augassign_node(self, n): 387 388 "Process the given augmented assignment node 'n'." 389 390 op = operator_functions[n.op] 391 392 if isinstance(n.node, compiler.ast.Getattr): 393 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 394 elif isinstance(n.node, compiler.ast.Name): 395 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 396 else: 397 target = n.node 398 399 assignment = compiler.ast.Assign( 400 [target], 401 compiler.ast.CallFunc( 402 compiler.ast.Name("$op%s" % op), 403 [n.node, n.expr])) 404 405 return self.process_structure_node(assignment) 406 407 def process_assignment_for_object(self, original_name, source): 408 409 """ 410 Return an assignment operation making 'original_name' refer to the given 411 'source'. 412 """ 413 414 assignment = compiler.ast.Assign( 415 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 416 source 417 ) 418 419 return self.process_structure_node(assignment) 420 421 def process_assignment_node_items(self, n, expr): 422 423 """ 424 Process the given assignment node 'n' whose children are to be assigned 425 items of 'expr'. 426 """ 427 428 name_ref = self.process_structure_node(expr) 429 430 # Either unpack the items and present them directly to each assignment 431 # node. 432 433 if isinstance(name_ref, LiteralSequenceRef) and \ 434 self.process_literal_sequence_items(n, name_ref): 435 436 pass 437 438 # Or have the assignment nodes access each item via the sequence API. 439 440 else: 441 self.process_assignment_node_items_by_position(n, expr, name_ref) 442 443 def process_assignment_node_items_by_position(self, n, expr, name_ref): 444 445 """ 446 Process the given sequence assignment node 'n', converting the node to 447 the separate assignment of each target using positional access on a 448 temporary variable representing the sequence. Use 'expr' as the assigned 449 value and 'name_ref' as the reference providing any existing temporary 450 variable. 451 """ 452 453 assignments = [] 454 455 # Employ existing names to access the sequence. 456 # Literal sequences do not provide names of accessible objects. 457 458 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 459 temp = name_ref.name 460 461 # For other expressions, create a temporary name to reference the items. 462 463 else: 464 temp = self.get_temporary_name() 465 self.next_temporary() 466 467 assignments.append( 468 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 469 ) 470 471 # Assign the items to the target nodes. 472 473 for i, node in enumerate(n.nodes): 474 assignments.append( 475 compiler.ast.Assign([node], compiler.ast.Subscript( 476 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 477 ) 478 479 return self.process_structure_node(compiler.ast.Stmt(assignments)) 480 481 def process_literal_sequence_items(self, n, name_ref): 482 483 """ 484 Process the given assignment node 'n', obtaining from the given 485 'name_ref' the items to be assigned to the assignment targets. 486 487 Return whether this method was able to process the assignment node as 488 a sequence of direct assignments. 489 """ 490 491 if len(n.nodes) == len(name_ref.items): 492 assigned_names, count = get_names_from_nodes(n.nodes) 493 accessed_names, _count = get_names_from_nodes(name_ref.items) 494 495 # Only assign directly between items if all assigned names are 496 # plain names (not attribute assignments), and if the assigned names 497 # do not appear in the accessed names. 498 499 if len(assigned_names) == count and \ 500 not assigned_names.intersection(accessed_names): 501 502 for node, item in zip(n.nodes, name_ref.items): 503 self.process_assignment_node(node, item) 504 505 return True 506 507 # Otherwise, use the position-based mechanism to obtain values. 508 509 else: 510 return False 511 else: 512 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 513 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 514 515 def process_compare_node(self, n): 516 517 """ 518 Process the given comparison node 'n', converting an operator sequence 519 from... 520 521 <expr1> <op1> <expr2> <op2> <expr3> 522 523 ...to... 524 525 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 526 """ 527 528 invocations = [] 529 last = n.expr 530 531 for op, op_node in n.ops: 532 op = operator_functions.get(op) 533 534 invocations.append(compiler.ast.CallFunc( 535 compiler.ast.Name("$op%s" % op), 536 [last, op_node])) 537 538 last = op_node 539 540 if len(invocations) > 1: 541 result = compiler.ast.And(invocations) 542 else: 543 result = invocations[0] 544 545 return self.process_structure_node(result) 546 547 def process_dict_node(self, node): 548 549 """ 550 Process the given dictionary 'node', returning a list of (key, value) 551 tuples. 552 """ 553 554 l = [] 555 for key, value in node.items: 556 l.append(( 557 self.process_structure_node(key), 558 self.process_structure_node(value))) 559 return l 560 561 def process_for_node(self, n): 562 563 """ 564 Generate attribute accesses for {n.list}.__iter__ and the next method on 565 the iterator, producing a replacement node for the original. 566 """ 567 568 node = compiler.ast.Stmt([ 569 570 # <next> = {n.list}.__iter__().next 571 572 compiler.ast.Assign( 573 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")], 574 compiler.ast.Getattr( 575 compiler.ast.CallFunc( 576 compiler.ast.Getattr(n.list, "__iter__"), 577 [] 578 ), "next")), 579 580 # try: 581 # while True: 582 # <var>... = <next>() 583 # ... 584 # except StopIteration: 585 # pass 586 587 compiler.ast.TryExcept( 588 compiler.ast.While( 589 compiler.ast.Name("True"), 590 compiler.ast.Stmt([ 591 compiler.ast.Assign( 592 [n.assign], 593 compiler.ast.CallFunc( 594 compiler.ast.Name(self.get_iterator_name()), 595 [] 596 )), 597 n.body]), 598 None), 599 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 600 None) 601 ]) 602 603 self.next_iterator() 604 self.process_structure_node(node) 605 606 def process_literal_sequence_node(self, n, name, ref, cls): 607 608 """ 609 Process the given literal sequence node 'n' as a function invocation, 610 with 'name' indicating the type of the sequence, and 'ref' being a 611 reference to the type. The 'cls' is used to instantiate a suitable name 612 reference. 613 """ 614 615 if name == "dict": 616 items = [] 617 for key, value in n.items: 618 items.append(compiler.ast.Tuple([key, value])) 619 else: # name in ("list", "tuple"): 620 items = n.nodes 621 622 return self.get_literal_reference(name, ref, items, cls) 623 624 def process_operator_node(self, n): 625 626 """ 627 Process the given operator node 'n' as an operator function invocation. 628 """ 629 630 op = operator_functions[n.__class__.__name__] 631 invocation = compiler.ast.CallFunc( 632 compiler.ast.Name("$op%s" % op), 633 list(n.getChildNodes()) 634 ) 635 return self.process_structure_node(invocation) 636 637 def process_print_node(self, n): 638 639 """ 640 Process the given print node 'n' as an invocation on a stream of the 641 form... 642 643 $print(dest, args, nl) 644 645 The special function name will be translated elsewhere. 646 """ 647 648 nl = isinstance(n, compiler.ast.Printnl) 649 invocation = compiler.ast.CallFunc( 650 compiler.ast.Name("$print"), 651 [n.dest or compiler.ast.Name("None"), 652 compiler.ast.List(list(n.nodes)), 653 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 654 ) 655 return self.process_structure_node(invocation) 656 657 def process_slice_node(self, n, expr=None): 658 659 """ 660 Process the given slice node 'n' as an operator function invocation. 661 """ 662 663 if n.flags == "OP_ASSIGN": op = "setslice" 664 elif n.flags == "OP_DELETE": op = "delslice" 665 else: op = "getslice" 666 667 invocation = compiler.ast.CallFunc( 668 compiler.ast.Name("$op%s" % op), 669 [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 670 (expr and [expr] or []) 671 ) 672 673 # Fix parse tree structure. 674 675 if op == "delslice": 676 invocation = compiler.ast.Discard(invocation) 677 678 return self.process_structure_node(invocation) 679 680 def process_sliceobj_node(self, n): 681 682 """ 683 Process the given slice object node 'n' as a slice constructor. 684 """ 685 686 op = "slice" 687 invocation = compiler.ast.CallFunc( 688 compiler.ast.Name("$op%s" % op), 689 n.nodes 690 ) 691 return self.process_structure_node(invocation) 692 693 def process_subscript_node(self, n, expr=None): 694 695 """ 696 Process the given subscript node 'n' as an operator function invocation. 697 """ 698 699 if n.flags == "OP_ASSIGN": op = "setitem" 700 elif n.flags == "OP_DELETE": op = "delitem" 701 else: op = "getitem" 702 703 invocation = compiler.ast.CallFunc( 704 compiler.ast.Name("$op%s" % op), 705 [n.expr] + list(n.subs) + (expr and [expr] or []) 706 ) 707 708 # Fix parse tree structure. 709 710 if op == "delitem": 711 invocation = compiler.ast.Discard(invocation) 712 713 return self.process_structure_node(invocation) 714 715 def process_attribute_chain(self, n): 716 717 """ 718 Process the given attribute access node 'n'. Return a reference 719 describing the expression. 720 """ 721 722 # AssAttr/Getattr are nested with the outermost access being the last 723 # access in any chain. 724 725 self.attrs.insert(0, n.attrname) 726 attrs = self.attrs 727 728 # Break attribute chains where non-access nodes are found. 729 730 if not self.have_access_expression(n): 731 self.reset_attribute_chain() 732 733 # Descend into the expression, extending backwards any existing chain, 734 # or building another for the expression. 735 736 name_ref = self.process_structure_node(n.expr) 737 738 # Restore chain information applying to this node. 739 740 if not self.have_access_expression(n): 741 self.restore_attribute_chain(attrs) 742 743 # Return immediately if the expression was another access and thus a 744 # continuation backwards along the chain. The above processing will 745 # have followed the chain all the way to its conclusion. 746 747 if self.have_access_expression(n): 748 del self.attrs[0] 749 750 return name_ref 751 752 # Attribute chain handling. 753 754 def reset_attribute_chain(self): 755 756 "Reset the attribute chain for a subexpression of an attribute access." 757 758 self.attrs = [] 759 self.chain_assignment.append(self.in_assignment) 760 self.chain_invocation.append(self.in_invocation) 761 self.in_assignment = None 762 self.in_invocation = None 763 764 def restore_attribute_chain(self, attrs): 765 766 "Restore the attribute chain for an attribute access." 767 768 self.attrs = attrs 769 self.in_assignment = self.chain_assignment.pop() 770 self.in_invocation = self.chain_invocation.pop() 771 772 def have_access_expression(self, node): 773 774 "Return whether the expression associated with 'node' is Getattr." 775 776 return isinstance(node.expr, compiler.ast.Getattr) 777 778 def get_name_for_tracking(self, name, name_ref=None): 779 780 """ 781 Return the name to be used for attribute usage observations involving 782 the given 'name' in the current namespace. 783 784 If the name is being used outside a function, and if 'name_ref' is 785 given, a path featuring the name in the global namespace is returned 786 where 'name_ref' indicates a global. Otherwise, a path computed using 787 the current namespace and the given name is returned. 788 789 The intention of this method is to provide a suitably-qualified name 790 that can be tracked across namespaces. Where globals are being 791 referenced in class namespaces, they should be referenced using their 792 path within the module, not using a path within each class. 793 794 It may not be possible to identify a global within a function at the 795 time of inspection (since a global may appear later in a file). 796 Consequently, globals are identified by their local name rather than 797 their module-qualified path. 798 """ 799 800 # For functions, use the appropriate local names. 801 802 if self.in_function: 803 return name 804 805 # For global names outside functions, use a global name. 806 807 elif name_ref and name_ref.is_global_name(): 808 return self.get_global_path(name) 809 810 # Otherwise, establish a name in the current namespace. 811 812 else: 813 return self.get_object_path(name) 814 815 def get_path_for_access(self): 816 817 "Outside functions, register accesses at the module level." 818 819 if not self.in_function: 820 return self.name 821 else: 822 return self.get_namespace_path() 823 824 def get_module_name(self, node): 825 826 """ 827 Using the given From 'node' in this module, calculate any relative import 828 information, returning a tuple containing a module to import along with any 829 names to import based on the node's name information. 830 831 Where the returned module is given as None, whole module imports should 832 be performed for the returned modules using the returned names. 833 """ 834 835 # Absolute import. 836 837 if node.level == 0: 838 return node.modname, node.names 839 840 # Relative to an ancestor of this module. 841 842 else: 843 path = self.name.split(".") 844 level = node.level 845 846 # Relative imports treat package roots as submodules. 847 848 if split(self.filename)[-1] == "__init__.py": 849 level -= 1 850 851 if level > len(path): 852 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 853 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 854 855 basename = ".".join(path[:len(path)-level]) 856 857 # Name imports from a module. 858 859 if node.modname: 860 return "%s.%s" % (basename, node.modname), node.names 861 862 # Relative whole module imports. 863 864 else: 865 return basename, node.names 866 867 def get_argnames(args): 868 869 """ 870 Return a list of all names provided by 'args'. Since tuples may be 871 employed, the arguments are traversed depth-first. 872 """ 873 874 l = [] 875 for arg in args: 876 if isinstance(arg, tuple): 877 l += get_argnames(arg) 878 else: 879 l.append(arg) 880 return l 881 882 def get_names_from_nodes(nodes): 883 884 """ 885 Return the names employed in the given 'nodes' along with the number of 886 nodes excluding sequences. 887 """ 888 889 names = set() 890 count = 0 891 892 for node in nodes: 893 894 # Add names and count them. 895 896 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 897 names.add(node.name) 898 count += 1 899 900 # Add names from sequences and incorporate their counts. 901 902 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 903 compiler.ast.List, compiler.ast.Set, 904 compiler.ast.Tuple)): 905 _names, _count = get_names_from_nodes(node.nodes) 906 names.update(_names) 907 count += _count 908 909 # Count non-name, non-sequence nodes. 910 911 else: 912 count += 1 913 914 return names, count 915 916 # Result classes. 917 918 class InstructionSequence: 919 920 "A generic sequence of instructions." 921 922 def __init__(self, instructions): 923 self.instructions = instructions 924 925 def get_value_instruction(self): 926 return self.instructions[-1] 927 928 def get_init_instructions(self): 929 return self.instructions[:-1] 930 931 # Dictionary utilities. 932 933 def init_item(d, key, fn): 934 935 """ 936 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 937 value where no entry already exists. 938 """ 939 940 if not d.has_key(key): 941 d[key] = fn() 942 return d[key] 943 944 def dict_for_keys(d, keys): 945 946 "Return a new dictionary containing entries from 'd' for the given 'keys'." 947 948 nd = {} 949 for key in keys: 950 if d.has_key(key): 951 nd[key] = d[key] 952 return nd 953 954 def make_key(s): 955 956 "Make sequence 's' into a tuple-based key, first sorting its contents." 957 958 l = list(s) 959 l.sort() 960 return tuple(l) 961 962 def add_counter_item(d, key): 963 964 """ 965 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 966 maintaining a mapping of keys to their order of insertion. 967 """ 968 969 if not d.has_key(key): 970 d[key] = len(d.keys()) 971 return d[key] 972 973 def remove_items(d1, d2): 974 975 "Remove from 'd1' all items from 'd2'." 976 977 for key in d2.keys(): 978 if d1.has_key(key): 979 del d1[key] 980 981 # Set utilities. 982 983 def first(s): 984 return list(s)[0] 985 986 def same(s1, s2): 987 return set(s1) == set(s2) 988 989 # General input/output. 990 991 def readfile(filename): 992 993 "Return the contents of 'filename'." 994 995 f = open(filename) 996 try: 997 return f.read() 998 finally: 999 f.close() 1000 1001 def writefile(filename, s): 1002 1003 "Write to 'filename' the string 's'." 1004 1005 f = open(filename, "w") 1006 try: 1007 f.write(s) 1008 finally: 1009 f.close() 1010 1011 # General encoding. 1012 1013 def sorted_output(x): 1014 1015 "Sort sequence 'x' and return a string with commas separating the values." 1016 1017 x = map(str, x) 1018 x.sort() 1019 return ", ".join(x) 1020 1021 def get_string_details(literals, encoding): 1022 1023 """ 1024 Determine whether 'literals' represent Unicode strings or byte strings, 1025 using 'encoding' to reproduce byte sequences. 1026 1027 Each literal is the full program representation including prefix and quotes 1028 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1029 string needs to be translated back to its original encoding. 1030 1031 Return a single encoded literal value, a type name, and the original 1032 encoding as a tuple. 1033 """ 1034 1035 typename = "unicode" 1036 1037 l = [] 1038 1039 for s in literals: 1040 out, _typename = get_literal_details(s) 1041 if _typename == "str": 1042 typename = "str" 1043 l.append(out) 1044 1045 out = "".join(l) 1046 1047 # For Unicode values, convert to the UTF-8 program representation. 1048 1049 if typename == "unicode": 1050 return out.encode("utf-8"), typename, encoding 1051 1052 # For byte string values, convert back to the original encoding. 1053 1054 else: 1055 return out.encode(encoding), typename, encoding 1056 1057 def get_literal_details(s): 1058 1059 """ 1060 Determine whether 's' represents a Unicode string or a byte string, where 1061 's' contains the full program representation of a literal including prefix 1062 and quotes, recoded by the parser to UTF-8. 1063 1064 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1065 and byte or Unicode values starting with <backslash><octal digit> or 1066 <backslash>x. 1067 1068 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1069 to be considered as Unicode values. Otherwise, they produce byte values and 1070 cause unprefixed strings to be considered as byte strings. 1071 1072 Literals prefixed with "r" do not have their backslash-encoded values 1073 converted unless also prefixed with "u", in which case only the above value 1074 formats are converted, not any of the other special sequences for things 1075 like newlines. 1076 1077 Return the literal value as a Unicode object together with the appropriate 1078 type name in a tuple. 1079 """ 1080 1081 l = [] 1082 1083 # Identify the quote character and use it to identify the prefix. 1084 1085 quote_type = s[-1] 1086 prefix_end = s.find(quote_type) 1087 prefix = s[:prefix_end].lower() 1088 1089 if prefix not in ("", "b", "br", "r", "u", "ur"): 1090 raise ValueError, "String literal does not have a supported prefix: %s" % s 1091 1092 if "b" in prefix: 1093 typename = "str" 1094 else: 1095 typename = "unicode" 1096 1097 # Identify triple quotes or single quotes. 1098 1099 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1100 quote = s[prefix_end:prefix_end+3] 1101 current = prefix_end + 3 1102 end = len(s) - 3 1103 else: 1104 quote = s[prefix_end] 1105 current = prefix_end + 1 1106 end = len(s) - 1 1107 1108 # Conversions of some quoted values. 1109 1110 searches = { 1111 "u" : (6, 16), 1112 "U" : (10, 16), 1113 "x" : (4, 16), 1114 } 1115 1116 octal_digits = map(str, range(0, 8)) 1117 1118 # Translations of some quoted values. 1119 1120 escaped = { 1121 "\\" : "\\", "'" : "'", '"' : '"', 1122 "a" : "\a", "b" : "\b", "f" : "\f", 1123 "n" : "\n", "r" : "\r", "t" : "\t", 1124 } 1125 1126 while current < end: 1127 1128 # Look for quoted values. 1129 1130 index = s.find("\\", current) 1131 if index == -1 or index + 1 == end: 1132 l.append(s[current:end]) 1133 break 1134 1135 # Add the preceding text. 1136 1137 l.append(s[current:index]) 1138 1139 # Handle quoted text. 1140 1141 term = s[index+1] 1142 1143 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1144 # produce Unicode values. 1145 1146 if typename == "unicode" and ( 1147 term in ("u", "U") or 1148 "u" in prefix and (term == "x" or term in octal_digits)): 1149 1150 needed, base = searches.get(term, (4, 8)) 1151 value = convert_quoted_value(s, index, needed, end, base, unichr) 1152 l.append(value) 1153 current = index + needed 1154 1155 # Add raw byte values, changing the string type. 1156 1157 elif "r" not in prefix and ( 1158 term == "x" or term in octal_digits): 1159 1160 needed, base = searches.get(term, (4, 8)) 1161 value = convert_quoted_value(s, index, needed, end, base, chr) 1162 l.append(value) 1163 typename = "str" 1164 current = index + needed 1165 1166 # Add other escaped values. 1167 1168 elif "r" not in prefix and escaped.has_key(term): 1169 l.append(escaped[term]) 1170 current = index + 2 1171 1172 # Add other text as found. 1173 1174 else: 1175 l.append(s[index:index+2]) 1176 current = index + 2 1177 1178 # Collect the components into a single Unicode object. Since the literal 1179 # text was already in UTF-8 form, interpret plain strings as UTF-8 1180 # sequences. 1181 1182 out = [] 1183 1184 for value in l: 1185 if isinstance(value, unicode): 1186 out.append(value) 1187 else: 1188 out.append(unicode(value, "utf-8")) 1189 1190 return "".join(out), typename 1191 1192 def convert_quoted_value(s, index, needed, end, base, fn): 1193 1194 """ 1195 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1196 positions, and with the given 'end' indicating the first position after the 1197 end of the actual string content. 1198 1199 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1200 to convert the value to an appropriate type. 1201 """ 1202 1203 s = s[index:min(index+needed, end)] 1204 1205 # Not a complete occurrence. 1206 1207 if len(s) < needed: 1208 return s 1209 1210 # Test for a well-formed value. 1211 1212 try: 1213 first = base == 8 and 1 or 2 1214 value = int(s[first:needed], base) 1215 except ValueError: 1216 return s 1217 else: 1218 return fn(value) 1219 1220 # Attribute chain decoding. 1221 1222 def get_attrnames(attrnames): 1223 1224 """ 1225 Split the qualified attribute chain 'attrnames' into its components, 1226 handling special attributes starting with "#" that indicate type 1227 conformance. 1228 """ 1229 1230 if attrnames.startswith("#"): 1231 return [attrnames] 1232 else: 1233 return attrnames.split(".") 1234 1235 def get_attrname_from_location(location): 1236 1237 """ 1238 Extract the first attribute from the attribute names employed in a 1239 'location'. 1240 """ 1241 1242 path, name, attrnames, access = location 1243 if not attrnames: 1244 return attrnames 1245 return get_attrnames(attrnames)[0] 1246 1247 def get_name_path(path, name): 1248 1249 "Return a suitable qualified name from the given 'path' and 'name'." 1250 1251 if "." in name: 1252 return name 1253 else: 1254 return "%s.%s" % (path, name) 1255 1256 # Usage-related functions. 1257 1258 def get_types_for_usage(attrnames, objects): 1259 1260 """ 1261 Identify the types that can support the given 'attrnames', using the 1262 given 'objects' as the catalogue of type details. 1263 """ 1264 1265 types = [] 1266 for name, _attrnames in objects.items(): 1267 if set(attrnames).issubset(_attrnames): 1268 types.append(name) 1269 return types 1270 1271 def get_invoked_attributes(usage): 1272 1273 "Obtain invoked attribute from the given 'usage'." 1274 1275 invoked = [] 1276 if usage: 1277 for attrname, invocation, assignment in usage: 1278 if invocation: 1279 invoked.append(attrname) 1280 return invoked 1281 1282 def get_assigned_attributes(usage): 1283 1284 "Obtain assigned attribute from the given 'usage'." 1285 1286 assigned = [] 1287 if usage: 1288 for attrname, invocation, assignment in usage: 1289 if assignment: 1290 assigned.append(attrname) 1291 return assigned 1292 1293 # Type and module functions. 1294 # NOTE: This makes assumptions about the __builtins__ structure. 1295 1296 def get_builtin_module(name): 1297 1298 "Return the module name containing the given type 'name'." 1299 1300 if name == "string": 1301 modname = "str" 1302 elif name == "utf8string": 1303 modname = "unicode" 1304 elif name == "NoneType": 1305 modname = "none" 1306 else: 1307 modname = name 1308 1309 return "__builtins__.%s" % modname 1310 1311 def get_builtin_type(name): 1312 1313 "Return the type name provided by the given Python value 'name'." 1314 1315 if name == "str": 1316 return "string" 1317 elif name == "unicode": 1318 return "utf8string" 1319 else: 1320 return name 1321 1322 def get_builtin_class(name): 1323 1324 "Return the full name of the built-in class having the given 'name'." 1325 1326 typename = get_builtin_type(name) 1327 module = get_builtin_module(typename) 1328 return "%s.%s" % (module, typename) 1329 1330 # Useful data. 1331 1332 predefined_constants = "False", "None", "NotImplemented", "True" 1333 1334 operator_functions = { 1335 1336 # Fundamental operations. 1337 1338 "is" : "is_", 1339 "is not" : "is_not", 1340 1341 # Binary operations. 1342 1343 "in" : "in_", 1344 "not in" : "not_in", 1345 "Add" : "add", 1346 "Bitand" : "and_", 1347 "Bitor" : "or_", 1348 "Bitxor" : "xor", 1349 "Div" : "div", 1350 "FloorDiv" : "floordiv", 1351 "LeftShift" : "lshift", 1352 "Mod" : "mod", 1353 "Mul" : "mul", 1354 "Power" : "pow", 1355 "RightShift" : "rshift", 1356 "Sub" : "sub", 1357 1358 # Unary operations. 1359 1360 "Invert" : "invert", 1361 "UnaryAdd" : "pos", 1362 "UnarySub" : "neg", 1363 1364 # Augmented assignment. 1365 1366 "+=" : "iadd", 1367 "-=" : "isub", 1368 "*=" : "imul", 1369 "/=" : "idiv", 1370 "//=" : "ifloordiv", 1371 "%=" : "imod", 1372 "**=" : "ipow", 1373 "<<=" : "ilshift", 1374 ">>=" : "irshift", 1375 "&=" : "iand", 1376 "^=" : "ixor", 1377 "|=" : "ior", 1378 1379 # Comparisons. 1380 1381 "==" : "eq", 1382 "!=" : "ne", 1383 "<" : "lt", 1384 "<=" : "le", 1385 ">=" : "ge", 1386 ">" : "gt", 1387 } 1388 1389 # vim: tabstop=4 expandtab shiftwidth=4