1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 if recorded_details != details: 45 self.remove_output() 46 47 writefile(self.get_output_details_filename(), details) 48 49 def get_output_details_filename(self): 50 51 "Return the output details filename." 52 53 return join(self.output, "$details") 54 55 def get_output_details(self): 56 57 "Return details of the existing output." 58 59 details_filename = self.get_output_details_filename() 60 61 if not exists(details_filename): 62 return None 63 else: 64 return readfile(details_filename) 65 66 def remove_output(self, dirname=None): 67 68 "Remove the output." 69 70 dirname = dirname or self.output 71 72 for filename in listdir(dirname): 73 path = join(dirname, filename) 74 if isdir(path): 75 self.remove_output(path) 76 else: 77 remove(path) 78 79 class CommonModule: 80 81 "A common module representation." 82 83 def __init__(self, name, importer): 84 85 """ 86 Initialise this module with the given 'name' and an 'importer' which is 87 used to provide access to other modules when required. 88 """ 89 90 self.name = name 91 self.importer = importer 92 self.filename = None 93 94 # Inspection-related attributes. 95 96 self.astnode = None 97 self.encoding = None 98 self.iterators = {} 99 self.temp = {} 100 self.lambdas = {} 101 102 # Constants, literals and values. 103 104 self.constants = {} 105 self.constant_values = {} 106 self.literals = {} 107 self.literal_types = {} 108 109 # Nested namespaces. 110 111 self.namespace_path = [] 112 self.in_function = False 113 114 # Retain the assignment value expression and track invocations. 115 116 self.in_assignment = None 117 self.in_invocation = None 118 119 # Attribute chain state management. 120 121 self.attrs = [] 122 self.chain_assignment = [] 123 self.chain_invocation = [] 124 125 def __repr__(self): 126 return "CommonModule(%r, %r)" % (self.name, self.importer) 127 128 def parse_file(self, filename): 129 130 "Parse the file with the given 'filename', initialising attributes." 131 132 self.filename = filename 133 134 # Use the Transformer directly to obtain encoding information. 135 136 t = Transformer() 137 f = open(filename) 138 139 try: 140 self.astnode = t.parsesuite(f.read() + "\n") 141 self.encoding = t.encoding 142 finally: 143 f.close() 144 145 # Module-relative naming. 146 147 def get_global_path(self, name): 148 return "%s.%s" % (self.name, name) 149 150 def get_namespace_path(self): 151 return ".".join([self.name] + self.namespace_path) 152 153 def get_object_path(self, name): 154 return ".".join([self.name] + self.namespace_path + [name]) 155 156 def get_parent_path(self): 157 return ".".join([self.name] + self.namespace_path[:-1]) 158 159 # Namespace management. 160 161 def enter_namespace(self, name): 162 163 "Enter the namespace having the given 'name'." 164 165 self.namespace_path.append(name) 166 167 def exit_namespace(self): 168 169 "Exit the current namespace." 170 171 self.namespace_path.pop() 172 173 # Constant reference naming. 174 175 def get_constant_name(self, value, value_type, encoding=None): 176 177 """ 178 Add a new constant to the current namespace for 'value' with 179 'value_type'. 180 """ 181 182 path = self.get_namespace_path() 183 init_item(self.constants, path, dict) 184 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 185 186 # Literal reference naming. 187 188 def get_literal_name(self): 189 190 "Add a new literal to the current namespace." 191 192 path = self.get_namespace_path() 193 init_item(self.literals, path, lambda: 0) 194 return "$C%d" % self.literals[path] 195 196 def next_literal(self): 197 self.literals[self.get_namespace_path()] += 1 198 199 # Temporary iterator naming. 200 201 def get_iterator_path(self): 202 return self.in_function and self.get_namespace_path() or self.name 203 204 def get_iterator_name(self): 205 path = self.get_iterator_path() 206 init_item(self.iterators, path, lambda: 0) 207 return "$i%d" % self.iterators[path] 208 209 def next_iterator(self): 210 self.iterators[self.get_iterator_path()] += 1 211 212 # Temporary variable naming. 213 214 def get_temporary_name(self): 215 path = self.get_namespace_path() 216 init_item(self.temp, path, lambda: 0) 217 return "$t%d" % self.temp[path] 218 219 def next_temporary(self): 220 self.temp[self.get_namespace_path()] += 1 221 222 # Arbitrary function naming. 223 224 def get_lambda_name(self): 225 path = self.get_namespace_path() 226 init_item(self.lambdas, path, lambda: 0) 227 name = "$l%d" % self.lambdas[path] 228 self.lambdas[path] += 1 229 return name 230 231 def reset_lambdas(self): 232 self.lambdas = {} 233 234 # Constant and literal recording. 235 236 def get_constant_value(self, value, literals=None): 237 238 """ 239 Encode the 'value' if appropriate, returning a value, a typename and any 240 encoding. 241 """ 242 243 if isinstance(value, unicode): 244 return value.encode("utf-8"), "unicode", self.encoding 245 246 # Attempt to convert plain strings to text. 247 248 elif isinstance(value, str) and self.encoding: 249 try: 250 return get_string_details(literals, self.encoding) 251 except UnicodeDecodeError: 252 pass 253 254 return value, value.__class__.__name__, None 255 256 def get_constant_reference(self, ref, value, encoding=None): 257 258 """ 259 Return a constant reference for the given 'ref' type and 'value', with 260 the optional 'encoding' applying to text values. 261 """ 262 263 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 264 265 # Return a reference for the constant. 266 267 objpath = self.get_object_path(constant_name) 268 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 269 270 # Record the value and type for the constant. 271 272 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 273 return name_ref 274 275 def reserve_constant(self, objpath, value, origin, encoding=None): 276 277 """ 278 Reserve a constant within 'objpath' with the given 'value' and having a 279 type with the given 'origin', with the optional 'encoding' applying to 280 text values. 281 """ 282 283 constant_name = self.get_constant_name(value, origin) 284 objpath = self.get_object_path(constant_name) 285 self._reserve_constant(objpath, value, origin, encoding) 286 287 def _reserve_constant(self, objpath, value, origin, encoding): 288 289 """ 290 Store a constant for 'objpath' with the given 'value' and 'origin', with 291 the optional 'encoding' applying to text values. 292 """ 293 294 self.constant_values[objpath] = value, origin, encoding 295 296 def get_literal_reference(self, name, ref, items, cls): 297 298 """ 299 Return a literal reference for the given type 'name', literal 'ref', 300 node 'items' and employing the given 'cls' as the class of the returned 301 reference object. 302 """ 303 304 # Construct an invocation using the items as arguments. 305 306 typename = "$L%s" % name 307 308 invocation = compiler.ast.CallFunc( 309 compiler.ast.Name(typename), 310 items 311 ) 312 313 # Get a name for the actual literal. 314 315 instname = self.get_literal_name() 316 self.next_literal() 317 318 # Record the type for the literal. 319 320 objpath = self.get_object_path(instname) 321 self.literal_types[objpath] = ref.get_origin() 322 323 # Return a wrapper for the invocation exposing the items. 324 325 return cls( 326 instname, 327 ref.instance_of(), 328 self.process_structure_node(invocation), 329 invocation.args 330 ) 331 332 # Node handling. 333 334 def process_structure(self, node): 335 336 """ 337 Within the given 'node', process the program structure. 338 339 During inspection, this will process global declarations, adjusting the 340 module namespace, and import statements, building a module dependency 341 hierarchy. 342 343 During translation, this will consult deduced program information and 344 output translated code. 345 """ 346 347 l = [] 348 for n in node.getChildNodes(): 349 l.append(self.process_structure_node(n)) 350 return l 351 352 def process_augassign_node(self, n): 353 354 "Process the given augmented assignment node 'n'." 355 356 op = operator_functions[n.op] 357 358 if isinstance(n.node, compiler.ast.Getattr): 359 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 360 elif isinstance(n.node, compiler.ast.Name): 361 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 362 else: 363 target = n.node 364 365 assignment = compiler.ast.Assign( 366 [target], 367 compiler.ast.CallFunc( 368 compiler.ast.Name("$op%s" % op), 369 [n.node, n.expr])) 370 371 return self.process_structure_node(assignment) 372 373 def process_assignment_for_object(self, original_name, source): 374 375 """ 376 Return an assignment operation making 'original_name' refer to the given 377 'source'. 378 """ 379 380 assignment = compiler.ast.Assign( 381 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 382 source 383 ) 384 385 return self.process_structure_node(assignment) 386 387 def process_assignment_node_items(self, n, expr): 388 389 """ 390 Process the given assignment node 'n' whose children are to be assigned 391 items of 'expr'. 392 """ 393 394 name_ref = self.process_structure_node(expr) 395 396 # Either unpack the items and present them directly to each assignment 397 # node. 398 399 if isinstance(name_ref, LiteralSequenceRef) and \ 400 self.process_literal_sequence_items(n, name_ref): 401 402 pass 403 404 # Or have the assignment nodes access each item via the sequence API. 405 406 else: 407 self.process_assignment_node_items_by_position(n, expr, name_ref) 408 409 def process_assignment_node_items_by_position(self, n, expr, name_ref): 410 411 """ 412 Process the given sequence assignment node 'n', converting the node to 413 the separate assignment of each target using positional access on a 414 temporary variable representing the sequence. Use 'expr' as the assigned 415 value and 'name_ref' as the reference providing any existing temporary 416 variable. 417 """ 418 419 assignments = [] 420 421 # Employ existing names to access the sequence. 422 # Literal sequences do not provide names of accessible objects. 423 424 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 425 temp = name_ref.name 426 427 # For other expressions, create a temporary name to reference the items. 428 429 else: 430 temp = self.get_temporary_name() 431 self.next_temporary() 432 433 assignments.append( 434 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 435 ) 436 437 # Assign the items to the target nodes. 438 439 for i, node in enumerate(n.nodes): 440 assignments.append( 441 compiler.ast.Assign([node], compiler.ast.Subscript( 442 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 443 ) 444 445 return self.process_structure_node(compiler.ast.Stmt(assignments)) 446 447 def process_literal_sequence_items(self, n, name_ref): 448 449 """ 450 Process the given assignment node 'n', obtaining from the given 451 'name_ref' the items to be assigned to the assignment targets. 452 453 Return whether this method was able to process the assignment node as 454 a sequence of direct assignments. 455 """ 456 457 if len(n.nodes) == len(name_ref.items): 458 assigned_names, count = get_names_from_nodes(n.nodes) 459 accessed_names, _count = get_names_from_nodes(name_ref.items) 460 461 # Only assign directly between items if all assigned names are 462 # plain names (not attribute assignments), and if the assigned names 463 # do not appear in the accessed names. 464 465 if len(assigned_names) == count and \ 466 not assigned_names.intersection(accessed_names): 467 468 for node, item in zip(n.nodes, name_ref.items): 469 self.process_assignment_node(node, item) 470 471 return True 472 473 # Otherwise, use the position-based mechanism to obtain values. 474 475 else: 476 return False 477 else: 478 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 479 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 480 481 def process_compare_node(self, n): 482 483 """ 484 Process the given comparison node 'n', converting an operator sequence 485 from... 486 487 <expr1> <op1> <expr2> <op2> <expr3> 488 489 ...to... 490 491 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 492 """ 493 494 invocations = [] 495 last = n.expr 496 497 for op, op_node in n.ops: 498 op = operator_functions.get(op) 499 500 invocations.append(compiler.ast.CallFunc( 501 compiler.ast.Name("$op%s" % op), 502 [last, op_node])) 503 504 last = op_node 505 506 if len(invocations) > 1: 507 result = compiler.ast.And(invocations) 508 else: 509 result = invocations[0] 510 511 return self.process_structure_node(result) 512 513 def process_dict_node(self, node): 514 515 """ 516 Process the given dictionary 'node', returning a list of (key, value) 517 tuples. 518 """ 519 520 l = [] 521 for key, value in node.items: 522 l.append(( 523 self.process_structure_node(key), 524 self.process_structure_node(value))) 525 return l 526 527 def process_for_node(self, n): 528 529 """ 530 Generate attribute accesses for {n.list}.__iter__ and the next method on 531 the iterator, producing a replacement node for the original. 532 """ 533 534 node = compiler.ast.Stmt([ 535 536 # <next> = {n.list}.__iter__().next 537 538 compiler.ast.Assign( 539 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")], 540 compiler.ast.Getattr( 541 compiler.ast.CallFunc( 542 compiler.ast.Getattr(n.list, "__iter__"), 543 [] 544 ), "next")), 545 546 # try: 547 # while True: 548 # <var>... = <next>() 549 # ... 550 # except StopIteration: 551 # pass 552 553 compiler.ast.TryExcept( 554 compiler.ast.While( 555 compiler.ast.Name("True"), 556 compiler.ast.Stmt([ 557 compiler.ast.Assign( 558 [n.assign], 559 compiler.ast.CallFunc( 560 compiler.ast.Name(self.get_iterator_name()), 561 [] 562 )), 563 n.body]), 564 None), 565 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 566 None) 567 ]) 568 569 self.next_iterator() 570 self.process_structure_node(node) 571 572 def process_literal_sequence_node(self, n, name, ref, cls): 573 574 """ 575 Process the given literal sequence node 'n' as a function invocation, 576 with 'name' indicating the type of the sequence, and 'ref' being a 577 reference to the type. The 'cls' is used to instantiate a suitable name 578 reference. 579 """ 580 581 if name == "dict": 582 items = [] 583 for key, value in n.items: 584 items.append(compiler.ast.Tuple([key, value])) 585 else: # name in ("list", "tuple"): 586 items = n.nodes 587 588 return self.get_literal_reference(name, ref, items, cls) 589 590 def process_operator_node(self, n): 591 592 """ 593 Process the given operator node 'n' as an operator function invocation. 594 """ 595 596 op = operator_functions[n.__class__.__name__] 597 invocation = compiler.ast.CallFunc( 598 compiler.ast.Name("$op%s" % op), 599 list(n.getChildNodes()) 600 ) 601 return self.process_structure_node(invocation) 602 603 def process_print_node(self, n): 604 605 """ 606 Process the given print node 'n' as an invocation on a stream of the 607 form... 608 609 $print(dest, args, nl) 610 611 The special function name will be translated elsewhere. 612 """ 613 614 nl = isinstance(n, compiler.ast.Printnl) 615 invocation = compiler.ast.CallFunc( 616 compiler.ast.Name("$print"), 617 [n.dest or compiler.ast.Name("None"), 618 compiler.ast.List(list(n.nodes)), 619 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 620 ) 621 return self.process_structure_node(invocation) 622 623 def process_slice_node(self, n, expr=None): 624 625 """ 626 Process the given slice node 'n' as an operator function invocation. 627 """ 628 629 if n.flags == "OP_ASSIGN": op = "setslice" 630 elif n.flags == "OP_DELETE": op = "delslice" 631 else: op = "getslice" 632 633 invocation = compiler.ast.CallFunc( 634 compiler.ast.Name("$op%s" % op), 635 [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 636 (expr and [expr] or []) 637 ) 638 639 # Fix parse tree structure. 640 641 if op == "delslice": 642 invocation = compiler.ast.Discard(invocation) 643 644 return self.process_structure_node(invocation) 645 646 def process_sliceobj_node(self, n): 647 648 """ 649 Process the given slice object node 'n' as a slice constructor. 650 """ 651 652 op = "slice" 653 invocation = compiler.ast.CallFunc( 654 compiler.ast.Name("$op%s" % op), 655 n.nodes 656 ) 657 return self.process_structure_node(invocation) 658 659 def process_subscript_node(self, n, expr=None): 660 661 """ 662 Process the given subscript node 'n' as an operator function invocation. 663 """ 664 665 if n.flags == "OP_ASSIGN": op = "setitem" 666 elif n.flags == "OP_DELETE": op = "delitem" 667 else: op = "getitem" 668 669 invocation = compiler.ast.CallFunc( 670 compiler.ast.Name("$op%s" % op), 671 [n.expr] + list(n.subs) + (expr and [expr] or []) 672 ) 673 674 # Fix parse tree structure. 675 676 if op == "delitem": 677 invocation = compiler.ast.Discard(invocation) 678 679 return self.process_structure_node(invocation) 680 681 def process_attribute_chain(self, n): 682 683 """ 684 Process the given attribute access node 'n'. Return a reference 685 describing the expression. 686 """ 687 688 # AssAttr/Getattr are nested with the outermost access being the last 689 # access in any chain. 690 691 self.attrs.insert(0, n.attrname) 692 attrs = self.attrs 693 694 # Break attribute chains where non-access nodes are found. 695 696 if not self.have_access_expression(n): 697 self.reset_attribute_chain() 698 699 # Descend into the expression, extending backwards any existing chain, 700 # or building another for the expression. 701 702 name_ref = self.process_structure_node(n.expr) 703 704 # Restore chain information applying to this node. 705 706 if not self.have_access_expression(n): 707 self.restore_attribute_chain(attrs) 708 709 # Return immediately if the expression was another access and thus a 710 # continuation backwards along the chain. The above processing will 711 # have followed the chain all the way to its conclusion. 712 713 if self.have_access_expression(n): 714 del self.attrs[0] 715 716 return name_ref 717 718 # Attribute chain handling. 719 720 def reset_attribute_chain(self): 721 722 "Reset the attribute chain for a subexpression of an attribute access." 723 724 self.attrs = [] 725 self.chain_assignment.append(self.in_assignment) 726 self.chain_invocation.append(self.in_invocation) 727 self.in_assignment = None 728 self.in_invocation = None 729 730 def restore_attribute_chain(self, attrs): 731 732 "Restore the attribute chain for an attribute access." 733 734 self.attrs = attrs 735 self.in_assignment = self.chain_assignment.pop() 736 self.in_invocation = self.chain_invocation.pop() 737 738 def have_access_expression(self, node): 739 740 "Return whether the expression associated with 'node' is Getattr." 741 742 return isinstance(node.expr, compiler.ast.Getattr) 743 744 def get_name_for_tracking(self, name, name_ref=None): 745 746 """ 747 Return the name to be used for attribute usage observations involving 748 the given 'name' in the current namespace. 749 750 If the name is being used outside a function, and if 'name_ref' is 751 given, a path featuring the name in the global namespace is returned 752 where 'name_ref' indicates a global, or a static reference is used if 753 'name_ref' provides such a reference. Otherwise, a path computed using 754 the current namespace and the given name is returned. 755 756 The intention of this method is to provide a suitably-qualified name 757 that can be tracked across namespaces. Where globals are being 758 referenced in class namespaces, they should be referenced using their 759 path within the module, not using a path within each class. 760 761 It may not be possible to identify a global within a function at the 762 time of inspection (since a global may appear later in a file). 763 Consequently, globals are identified by their local name rather than 764 their module-qualified path. 765 """ 766 767 # For functions, use the appropriate local names. 768 769 if self.in_function: 770 return name 771 772 # For static references, use the reference origin. 773 774 elif name_ref and name_ref.final(): 775 return name_ref.final() 776 777 # For global names outside functions, use a global name. 778 779 elif name_ref and name_ref.is_global_name(): 780 return self.get_global_path(name) 781 782 # Otherwise, establish a name in the current namespace. 783 784 else: 785 return self.get_object_path(name) 786 787 def get_path_for_access(self): 788 789 "Outside functions, register accesses at the module level." 790 791 if not self.in_function: 792 return self.name 793 else: 794 return self.get_namespace_path() 795 796 def get_module_name(self, node): 797 798 """ 799 Using the given From 'node' in this module, calculate any relative import 800 information, returning a tuple containing a module to import along with any 801 names to import based on the node's name information. 802 803 Where the returned module is given as None, whole module imports should 804 be performed for the returned modules using the returned names. 805 """ 806 807 # Absolute import. 808 809 if node.level == 0: 810 return node.modname, node.names 811 812 # Relative to an ancestor of this module. 813 814 else: 815 path = self.name.split(".") 816 level = node.level 817 818 # Relative imports treat package roots as submodules. 819 820 if split(self.filename)[-1] == "__init__.py": 821 level -= 1 822 823 if level > len(path): 824 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 825 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 826 827 basename = ".".join(path[:len(path)-level]) 828 829 # Name imports from a module. 830 831 if node.modname: 832 return "%s.%s" % (basename, node.modname), node.names 833 834 # Relative whole module imports. 835 836 else: 837 return basename, node.names 838 839 def get_argnames(args): 840 841 """ 842 Return a list of all names provided by 'args'. Since tuples may be 843 employed, the arguments are traversed depth-first. 844 """ 845 846 l = [] 847 for arg in args: 848 if isinstance(arg, tuple): 849 l += get_argnames(arg) 850 else: 851 l.append(arg) 852 return l 853 854 def get_names_from_nodes(nodes): 855 856 """ 857 Return the names employed in the given 'nodes' along with the number of 858 nodes excluding sequences. 859 """ 860 861 names = set() 862 count = 0 863 864 for node in nodes: 865 866 # Add names and count them. 867 868 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 869 names.add(node.name) 870 count += 1 871 872 # Add names from sequences and incorporate their counts. 873 874 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 875 compiler.ast.List, compiler.ast.Set, 876 compiler.ast.Tuple)): 877 _names, _count = get_names_from_nodes(node.nodes) 878 names.update(_names) 879 count += _count 880 881 # Count non-name, non-sequence nodes. 882 883 else: 884 count += 1 885 886 return names, count 887 888 # Result classes. 889 890 class InstructionSequence: 891 892 "A generic sequence of instructions." 893 894 def __init__(self, instructions): 895 self.instructions = instructions 896 897 def get_value_instruction(self): 898 return self.instructions[-1] 899 900 def get_init_instructions(self): 901 return self.instructions[:-1] 902 903 # Dictionary utilities. 904 905 def init_item(d, key, fn): 906 907 """ 908 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 909 value where no entry already exists. 910 """ 911 912 if not d.has_key(key): 913 d[key] = fn() 914 return d[key] 915 916 def dict_for_keys(d, keys): 917 918 "Return a new dictionary containing entries from 'd' for the given 'keys'." 919 920 nd = {} 921 for key in keys: 922 if d.has_key(key): 923 nd[key] = d[key] 924 return nd 925 926 def make_key(s): 927 928 "Make sequence 's' into a tuple-based key, first sorting its contents." 929 930 l = list(s) 931 l.sort() 932 return tuple(l) 933 934 def add_counter_item(d, key): 935 936 """ 937 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 938 maintaining a mapping of keys to their order of insertion. 939 """ 940 941 if not d.has_key(key): 942 d[key] = len(d.keys()) 943 return d[key] 944 945 def remove_items(d1, d2): 946 947 "Remove from 'd1' all items from 'd2'." 948 949 for key in d2.keys(): 950 if d1.has_key(key): 951 del d1[key] 952 953 # Set utilities. 954 955 def first(s): 956 return list(s)[0] 957 958 def same(s1, s2): 959 return set(s1) == set(s2) 960 961 # General input/output. 962 963 def readfile(filename): 964 965 "Return the contents of 'filename'." 966 967 f = open(filename) 968 try: 969 return f.read() 970 finally: 971 f.close() 972 973 def writefile(filename, s): 974 975 "Write to 'filename' the string 's'." 976 977 f = open(filename, "w") 978 try: 979 f.write(s) 980 finally: 981 f.close() 982 983 # General encoding. 984 985 def sorted_output(x): 986 987 "Sort sequence 'x' and return a string with commas separating the values." 988 989 x = map(str, x) 990 x.sort() 991 return ", ".join(x) 992 993 def get_string_details(literals, encoding): 994 995 """ 996 Determine whether 'literals' represent Unicode strings or byte strings, 997 using 'encoding' to reproduce byte sequences. 998 999 Each literal is the full program representation including prefix and quotes 1000 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1001 string needs to be translated back to its original encoding. 1002 1003 Return a single encoded literal value, a type name, and the original 1004 encoding as a tuple. 1005 """ 1006 1007 typename = "unicode" 1008 1009 l = [] 1010 1011 for s in literals: 1012 out, _typename = get_literal_details(s) 1013 if _typename == "str": 1014 typename = "str" 1015 l.append(out) 1016 1017 out = "".join(l) 1018 1019 # For Unicode values, convert to the UTF-8 program representation. 1020 1021 if typename == "unicode": 1022 return out.encode("utf-8"), typename, encoding 1023 1024 # For byte string values, convert back to the original encoding. 1025 1026 else: 1027 return out.encode(encoding), typename, encoding 1028 1029 def get_literal_details(s): 1030 1031 """ 1032 Determine whether 's' represents a Unicode string or a byte string, where 1033 's' contains the full program representation of a literal including prefix 1034 and quotes, recoded by the parser to UTF-8. 1035 1036 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1037 and byte or Unicode values starting with <backslash><octal digit> or 1038 <backslash>x. 1039 1040 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1041 to be considered as Unicode values. Otherwise, they produce byte values and 1042 cause unprefixed strings to be considered as byte strings. 1043 1044 Literals prefixed with "r" do not have their backslash-encoded values 1045 converted unless also prefixed with "u", in which case only the above value 1046 formats are converted, not any of the other special sequences for things 1047 like newlines. 1048 1049 Return the literal value as a Unicode object together with the appropriate 1050 type name in a tuple. 1051 """ 1052 1053 l = [] 1054 1055 # Identify the quote character and use it to identify the prefix. 1056 1057 quote_type = s[-1] 1058 prefix_end = s.find(quote_type) 1059 prefix = s[:prefix_end].lower() 1060 1061 if prefix not in ("", "b", "br", "r", "u", "ur"): 1062 raise ValueError, "String literal does not have a supported prefix: %s" % s 1063 1064 if "b" in prefix: 1065 typename = "str" 1066 else: 1067 typename = "unicode" 1068 1069 # Identify triple quotes or single quotes. 1070 1071 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1072 quote = s[prefix_end:prefix_end+3] 1073 current = prefix_end + 3 1074 end = len(s) - 3 1075 else: 1076 quote = s[prefix_end] 1077 current = prefix_end + 1 1078 end = len(s) - 1 1079 1080 # Conversions of some quoted values. 1081 1082 searches = { 1083 "u" : (6, 16), 1084 "U" : (10, 16), 1085 "x" : (4, 16), 1086 } 1087 1088 octal_digits = map(str, range(0, 8)) 1089 1090 # Translations of some quoted values. 1091 1092 escaped = { 1093 "\\" : "\\", "'" : "'", '"' : '"', 1094 "a" : "\a", "b" : "\b", "f" : "\f", 1095 "n" : "\n", "r" : "\r", "t" : "\t", 1096 } 1097 1098 while current < end: 1099 1100 # Look for quoted values. 1101 1102 index = s.find("\\", current) 1103 if index == -1 or index + 1 == end: 1104 l.append(s[current:end]) 1105 break 1106 1107 # Add the preceding text. 1108 1109 l.append(s[current:index]) 1110 1111 # Handle quoted text. 1112 1113 term = s[index+1] 1114 1115 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1116 # produce Unicode values. 1117 1118 if typename == "unicode" and ( 1119 term in ("u", "U") or 1120 "u" in prefix and (term == "x" or term in octal_digits)): 1121 1122 needed, base = searches.get(term, (4, 8)) 1123 value = convert_quoted_value(s, index, needed, end, base, unichr) 1124 l.append(value) 1125 current = index + needed 1126 1127 # Add raw byte values, changing the string type. 1128 1129 elif "r" not in prefix and ( 1130 term == "x" or term in octal_digits): 1131 1132 needed, base = searches.get(term, (4, 8)) 1133 value = convert_quoted_value(s, index, needed, end, base, chr) 1134 l.append(value) 1135 typename = "str" 1136 current = index + needed 1137 1138 # Add other escaped values. 1139 1140 elif "r" not in prefix and escaped.has_key(term): 1141 l.append(escaped[term]) 1142 current = index + 2 1143 1144 # Add other text as found. 1145 1146 else: 1147 l.append(s[index:index+2]) 1148 current = index + 2 1149 1150 # Collect the components into a single Unicode object. Since the literal 1151 # text was already in UTF-8 form, interpret plain strings as UTF-8 1152 # sequences. 1153 1154 out = [] 1155 1156 for value in l: 1157 if isinstance(value, unicode): 1158 out.append(value) 1159 else: 1160 out.append(unicode(value, "utf-8")) 1161 1162 return "".join(out), typename 1163 1164 def convert_quoted_value(s, index, needed, end, base, fn): 1165 1166 """ 1167 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1168 positions, and with the given 'end' indicating the first position after the 1169 end of the actual string content. 1170 1171 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1172 to convert the value to an appropriate type. 1173 """ 1174 1175 s = s[index:min(index+needed, end)] 1176 1177 # Not a complete occurrence. 1178 1179 if len(s) < needed: 1180 return s 1181 1182 # Test for a well-formed value. 1183 1184 try: 1185 first = base == 8 and 1 or 2 1186 value = int(s[first:needed], base) 1187 except ValueError: 1188 return s 1189 else: 1190 return fn(value) 1191 1192 # Attribute chain decoding. 1193 1194 def get_attrnames(attrnames): 1195 1196 """ 1197 Split the qualified attribute chain 'attrnames' into its components, 1198 handling special attributes starting with "#" that indicate type 1199 conformance. 1200 """ 1201 1202 if attrnames.startswith("#"): 1203 return [attrnames] 1204 else: 1205 return attrnames.split(".") 1206 1207 def get_attrname_from_location(location): 1208 1209 """ 1210 Extract the first attribute from the attribute names employed in a 1211 'location'. 1212 """ 1213 1214 path, name, attrnames, access = location 1215 if not attrnames: 1216 return attrnames 1217 return get_attrnames(attrnames)[0] 1218 1219 def get_name_path(path, name): 1220 1221 "Return a suitable qualified name from the given 'path' and 'name'." 1222 1223 if "." in name: 1224 return name 1225 else: 1226 return "%s.%s" % (path, name) 1227 1228 # Usage-related functions. 1229 1230 def get_types_for_usage(attrnames, objects): 1231 1232 """ 1233 Identify the types that can support the given 'attrnames', using the 1234 given 'objects' as the catalogue of type details. 1235 """ 1236 1237 types = [] 1238 for name, _attrnames in objects.items(): 1239 if set(attrnames).issubset(_attrnames): 1240 types.append(name) 1241 return types 1242 1243 def get_invoked_attributes(usage): 1244 1245 "Obtain invoked attribute from the given 'usage'." 1246 1247 invoked = [] 1248 if usage: 1249 for attrname, invocation, assignment in usage: 1250 if invocation: 1251 invoked.append(attrname) 1252 return invoked 1253 1254 def get_assigned_attributes(usage): 1255 1256 "Obtain assigned attribute from the given 'usage'." 1257 1258 assigned = [] 1259 if usage: 1260 for attrname, invocation, assignment in usage: 1261 if assignment: 1262 assigned.append(attrname) 1263 return assigned 1264 1265 # Type and module functions. 1266 # NOTE: This makes assumptions about the __builtins__ structure. 1267 1268 def get_builtin_module(name): 1269 1270 "Return the module name containing the given type 'name'." 1271 1272 if name == "string": 1273 modname = "str" 1274 elif name == "utf8string": 1275 modname = "unicode" 1276 elif name == "NoneType": 1277 modname = "none" 1278 else: 1279 modname = name 1280 1281 return "__builtins__.%s" % modname 1282 1283 def get_builtin_type(name): 1284 1285 "Return the type name provided by the given Python value 'name'." 1286 1287 if name == "str": 1288 return "string" 1289 elif name == "unicode": 1290 return "utf8string" 1291 else: 1292 return name 1293 1294 def get_builtin_class(name): 1295 1296 "Return the full name of the built-in class having the given 'name'." 1297 1298 typename = get_builtin_type(name) 1299 module = get_builtin_module(typename) 1300 return "%s.%s" % (module, typename) 1301 1302 # Useful data. 1303 1304 predefined_constants = "False", "None", "NotImplemented", "True" 1305 1306 operator_functions = { 1307 1308 # Fundamental operations. 1309 1310 "is" : "is_", 1311 "is not" : "is_not", 1312 1313 # Binary operations. 1314 1315 "in" : "in_", 1316 "not in" : "not_in", 1317 "Add" : "add", 1318 "Bitand" : "and_", 1319 "Bitor" : "or_", 1320 "Bitxor" : "xor", 1321 "Div" : "div", 1322 "FloorDiv" : "floordiv", 1323 "LeftShift" : "lshift", 1324 "Mod" : "mod", 1325 "Mul" : "mul", 1326 "Power" : "pow", 1327 "RightShift" : "rshift", 1328 "Sub" : "sub", 1329 1330 # Unary operations. 1331 1332 "Invert" : "invert", 1333 "UnaryAdd" : "pos", 1334 "UnarySub" : "neg", 1335 1336 # Augmented assignment. 1337 1338 "+=" : "iadd", 1339 "-=" : "isub", 1340 "*=" : "imul", 1341 "/=" : "idiv", 1342 "//=" : "ifloordiv", 1343 "%=" : "imod", 1344 "**=" : "ipow", 1345 "<<=" : "ilshift", 1346 ">>=" : "irshift", 1347 "&=" : "iand", 1348 "^=" : "ixor", 1349 "|=" : "ior", 1350 1351 # Comparisons. 1352 1353 "==" : "eq", 1354 "!=" : "ne", 1355 "<" : "lt", 1356 "<=" : "le", 1357 ">=" : "ge", 1358 ">" : "gt", 1359 } 1360 1361 # vim: tabstop=4 expandtab shiftwidth=4