1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 if recorded_details != details: 45 self.remove_output() 46 47 writefile(self.get_output_details_filename(), details) 48 49 def get_output_details_filename(self): 50 51 "Return the output details filename." 52 53 return join(self.output, "$details") 54 55 def get_output_details(self): 56 57 "Return details of the existing output." 58 59 details_filename = self.get_output_details_filename() 60 61 if not exists(details_filename): 62 return None 63 else: 64 return readfile(details_filename) 65 66 def remove_output(self, dirname=None): 67 68 "Remove the output." 69 70 dirname = dirname or self.output 71 72 for filename in listdir(dirname): 73 path = join(dirname, filename) 74 if isdir(path): 75 self.remove_output(path) 76 else: 77 remove(path) 78 79 class CommonModule: 80 81 "A common module representation." 82 83 def __init__(self, name, importer): 84 85 """ 86 Initialise this module with the given 'name' and an 'importer' which is 87 used to provide access to other modules when required. 88 """ 89 90 self.name = name 91 self.importer = importer 92 self.filename = None 93 94 # Inspection-related attributes. 95 96 self.astnode = None 97 self.encoding = None 98 self.iterators = {} 99 self.temp = {} 100 self.lambdas = {} 101 102 # Constants, literals and values. 103 104 self.constants = {} 105 self.constant_values = {} 106 self.literals = {} 107 self.literal_types = {} 108 109 # Nested namespaces. 110 111 self.namespace_path = [] 112 self.in_function = False 113 114 # Retain the assignment value expression and track invocations. 115 116 self.in_assignment = None 117 self.in_invocation = False 118 119 # Attribute chain state management. 120 121 self.attrs = [] 122 self.chain_assignment = [] 123 self.chain_invocation = [] 124 125 def __repr__(self): 126 return "CommonModule(%r, %r)" % (self.name, self.importer) 127 128 def parse_file(self, filename): 129 130 "Parse the file with the given 'filename', initialising attributes." 131 132 self.filename = filename 133 134 # Use the Transformer directly to obtain encoding information. 135 136 t = Transformer() 137 f = open(filename) 138 139 try: 140 self.astnode = t.parsesuite(f.read() + "\n") 141 self.encoding = t.encoding 142 finally: 143 f.close() 144 145 # Module-relative naming. 146 147 def get_global_path(self, name): 148 return "%s.%s" % (self.name, name) 149 150 def get_namespace_path(self): 151 return ".".join([self.name] + self.namespace_path) 152 153 def get_object_path(self, name): 154 return ".".join([self.name] + self.namespace_path + [name]) 155 156 def get_parent_path(self): 157 return ".".join([self.name] + self.namespace_path[:-1]) 158 159 # Namespace management. 160 161 def enter_namespace(self, name): 162 163 "Enter the namespace having the given 'name'." 164 165 self.namespace_path.append(name) 166 167 def exit_namespace(self): 168 169 "Exit the current namespace." 170 171 self.namespace_path.pop() 172 173 # Constant reference naming. 174 175 def get_constant_name(self, value, value_type, encoding=None): 176 177 """ 178 Add a new constant to the current namespace for 'value' with 179 'value_type'. 180 """ 181 182 path = self.get_namespace_path() 183 init_item(self.constants, path, dict) 184 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 185 186 # Literal reference naming. 187 188 def get_literal_name(self): 189 190 "Add a new literal to the current namespace." 191 192 path = self.get_namespace_path() 193 init_item(self.literals, path, lambda: 0) 194 return "$C%d" % self.literals[path] 195 196 def next_literal(self): 197 self.literals[self.get_namespace_path()] += 1 198 199 # Temporary iterator naming. 200 201 def get_iterator_path(self): 202 return self.in_function and self.get_namespace_path() or self.name 203 204 def get_iterator_name(self): 205 path = self.get_iterator_path() 206 init_item(self.iterators, path, lambda: 0) 207 return "$i%d" % self.iterators[path] 208 209 def next_iterator(self): 210 self.iterators[self.get_iterator_path()] += 1 211 212 # Temporary variable naming. 213 214 def get_temporary_name(self): 215 path = self.get_namespace_path() 216 init_item(self.temp, path, lambda: 0) 217 return "$t%d" % self.temp[path] 218 219 def next_temporary(self): 220 self.temp[self.get_namespace_path()] += 1 221 222 # Arbitrary function naming. 223 224 def get_lambda_name(self): 225 path = self.get_namespace_path() 226 init_item(self.lambdas, path, lambda: 0) 227 name = "$l%d" % self.lambdas[path] 228 self.lambdas[path] += 1 229 return name 230 231 def reset_lambdas(self): 232 self.lambdas = {} 233 234 # Constant and literal recording. 235 236 def get_constant_value(self, value, literal=None): 237 238 """ 239 Encode the 'value' if appropriate, returning a value, a typename and any 240 encoding. 241 """ 242 243 if isinstance(value, unicode): 244 return value.encode("utf-8"), "unicode", self.encoding 245 246 # Attempt to convert plain strings to text. 247 248 elif isinstance(value, str) and self.encoding: 249 try: 250 return get_string_details(literal, self.encoding) 251 except UnicodeDecodeError: 252 pass 253 254 return value, value.__class__.__name__, None 255 256 def get_constant_reference(self, ref, value, encoding=None): 257 258 """ 259 Return a constant reference for the given 'ref' type and 'value', with 260 the optional 'encoding' applying to text values. 261 """ 262 263 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 264 265 # Return a reference for the constant. 266 267 objpath = self.get_object_path(constant_name) 268 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 269 270 # Record the value and type for the constant. 271 272 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 273 return name_ref 274 275 def reserve_constant(self, objpath, value, origin, encoding=None): 276 277 """ 278 Reserve a constant within 'objpath' with the given 'value' and having a 279 type with the given 'origin', with the optional 'encoding' applying to 280 text values. 281 """ 282 283 constant_name = self.get_constant_name(value, origin) 284 objpath = self.get_object_path(constant_name) 285 self._reserve_constant(objpath, value, origin, encoding) 286 287 def _reserve_constant(self, objpath, value, origin, encoding): 288 289 """ 290 Store a constant for 'objpath' with the given 'value' and 'origin', with 291 the optional 'encoding' applying to text values. 292 """ 293 294 self.constant_values[objpath] = value, origin, encoding 295 296 def get_literal_reference(self, name, ref, items, cls): 297 298 """ 299 Return a literal reference for the given type 'name', literal 'ref', 300 node 'items' and employing the given 'cls' as the class of the returned 301 reference object. 302 """ 303 304 # Construct an invocation using the items as arguments. 305 306 typename = "$L%s" % name 307 308 invocation = compiler.ast.CallFunc( 309 compiler.ast.Name(typename), 310 items 311 ) 312 313 # Get a name for the actual literal. 314 315 instname = self.get_literal_name() 316 self.next_literal() 317 318 # Record the type for the literal. 319 320 objpath = self.get_object_path(instname) 321 self.literal_types[objpath] = ref.get_origin() 322 323 # Return a wrapper for the invocation exposing the items. 324 325 return cls( 326 instname, 327 ref.instance_of(), 328 self.process_structure_node(invocation), 329 invocation.args 330 ) 331 332 # Node handling. 333 334 def process_structure(self, node): 335 336 """ 337 Within the given 'node', process the program structure. 338 339 During inspection, this will process global declarations, adjusting the 340 module namespace, and import statements, building a module dependency 341 hierarchy. 342 343 During translation, this will consult deduced program information and 344 output translated code. 345 """ 346 347 l = [] 348 for n in node.getChildNodes(): 349 l.append(self.process_structure_node(n)) 350 return l 351 352 def process_augassign_node(self, n): 353 354 "Process the given augmented assignment node 'n'." 355 356 op = operator_functions[n.op] 357 358 if isinstance(n.node, compiler.ast.Getattr): 359 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 360 elif isinstance(n.node, compiler.ast.Name): 361 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 362 else: 363 target = n.node 364 365 assignment = compiler.ast.Assign( 366 [target], 367 compiler.ast.CallFunc( 368 compiler.ast.Name("$op%s" % op), 369 [n.node, n.expr])) 370 371 return self.process_structure_node(assignment) 372 373 def process_assignment_for_object(self, original_name, source): 374 375 """ 376 Return an assignment operation making 'original_name' refer to the given 377 'source'. 378 """ 379 380 assignment = compiler.ast.Assign( 381 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 382 source 383 ) 384 385 return self.process_structure_node(assignment) 386 387 def process_assignment_node_items(self, n, expr): 388 389 """ 390 Process the given assignment node 'n' whose children are to be assigned 391 items of 'expr'. 392 """ 393 394 name_ref = self.process_structure_node(expr) 395 396 # Either unpack the items and present them directly to each assignment 397 # node. 398 399 if isinstance(name_ref, LiteralSequenceRef) and \ 400 self.process_literal_sequence_items(n, name_ref): 401 402 pass 403 404 # Or have the assignment nodes access each item via the sequence API. 405 406 else: 407 self.process_assignment_node_items_by_position(n, expr, name_ref) 408 409 def process_assignment_node_items_by_position(self, n, expr, name_ref): 410 411 """ 412 Process the given sequence assignment node 'n', converting the node to 413 the separate assignment of each target using positional access on a 414 temporary variable representing the sequence. Use 'expr' as the assigned 415 value and 'name_ref' as the reference providing any existing temporary 416 variable. 417 """ 418 419 assignments = [] 420 421 # Employ existing names to access the sequence. 422 # Literal sequences do not provide names of accessible objects. 423 424 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 425 temp = name_ref.name 426 427 # For other expressions, create a temporary name to reference the items. 428 429 else: 430 temp = self.get_temporary_name() 431 self.next_temporary() 432 433 assignments.append( 434 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 435 ) 436 437 # Assign the items to the target nodes. 438 439 for i, node in enumerate(n.nodes): 440 assignments.append( 441 compiler.ast.Assign([node], compiler.ast.Subscript( 442 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 443 ) 444 445 return self.process_structure_node(compiler.ast.Stmt(assignments)) 446 447 def process_literal_sequence_items(self, n, name_ref): 448 449 """ 450 Process the given assignment node 'n', obtaining from the given 451 'name_ref' the items to be assigned to the assignment targets. 452 453 Return whether this method was able to process the assignment node as 454 a sequence of direct assignments. 455 """ 456 457 if len(n.nodes) == len(name_ref.items): 458 assigned_names, count = get_names_from_nodes(n.nodes) 459 accessed_names, _count = get_names_from_nodes(name_ref.items) 460 461 # Only assign directly between items if all assigned names are 462 # plain names (not attribute assignments), and if the assigned names 463 # do not appear in the accessed names. 464 465 if len(assigned_names) == count and \ 466 not assigned_names.intersection(accessed_names): 467 468 for node, item in zip(n.nodes, name_ref.items): 469 self.process_assignment_node(node, item) 470 471 return True 472 473 # Otherwise, use the position-based mechanism to obtain values. 474 475 else: 476 return False 477 else: 478 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 479 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 480 481 def process_compare_node(self, n): 482 483 """ 484 Process the given comparison node 'n', converting an operator sequence 485 from... 486 487 <expr1> <op1> <expr2> <op2> <expr3> 488 489 ...to... 490 491 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 492 """ 493 494 invocations = [] 495 last = n.expr 496 497 for op, op_node in n.ops: 498 op = operator_functions.get(op) 499 500 invocations.append(compiler.ast.CallFunc( 501 compiler.ast.Name("$op%s" % op), 502 [last, op_node])) 503 504 last = op_node 505 506 if len(invocations) > 1: 507 result = compiler.ast.And(invocations) 508 else: 509 result = invocations[0] 510 511 return self.process_structure_node(result) 512 513 def process_dict_node(self, node): 514 515 """ 516 Process the given dictionary 'node', returning a list of (key, value) 517 tuples. 518 """ 519 520 l = [] 521 for key, value in node.items: 522 l.append(( 523 self.process_structure_node(key), 524 self.process_structure_node(value))) 525 return l 526 527 def process_for_node(self, n): 528 529 """ 530 Generate attribute accesses for {n.list}.__iter__ and the next method on 531 the iterator, producing a replacement node for the original. 532 """ 533 534 node = compiler.ast.Stmt([ 535 536 # <iterator> = {n.list}.__iter__ 537 538 compiler.ast.Assign( 539 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")], 540 compiler.ast.CallFunc( 541 compiler.ast.Getattr(n.list, "__iter__"), 542 [] 543 )), 544 545 # try: 546 # while True: 547 # <var>... = <iterator>.next() 548 # ... 549 # except StopIteration: 550 # pass 551 552 compiler.ast.TryExcept( 553 compiler.ast.While( 554 compiler.ast.Name("True"), 555 compiler.ast.Stmt([ 556 compiler.ast.Assign( 557 [n.assign], 558 compiler.ast.CallFunc( 559 compiler.ast.Getattr(compiler.ast.Name(self.get_iterator_name()), "next"), 560 [] 561 )), 562 n.body]), 563 None), 564 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 565 None) 566 ]) 567 568 self.next_iterator() 569 self.process_structure_node(node) 570 571 def process_literal_sequence_node(self, n, name, ref, cls): 572 573 """ 574 Process the given literal sequence node 'n' as a function invocation, 575 with 'name' indicating the type of the sequence, and 'ref' being a 576 reference to the type. The 'cls' is used to instantiate a suitable name 577 reference. 578 """ 579 580 if name == "dict": 581 items = [] 582 for key, value in n.items: 583 items.append(compiler.ast.Tuple([key, value])) 584 else: # name in ("list", "tuple"): 585 items = n.nodes 586 587 return self.get_literal_reference(name, ref, items, cls) 588 589 def process_operator_node(self, n): 590 591 """ 592 Process the given operator node 'n' as an operator function invocation. 593 """ 594 595 op = operator_functions[n.__class__.__name__] 596 invocation = compiler.ast.CallFunc( 597 compiler.ast.Name("$op%s" % op), 598 list(n.getChildNodes()) 599 ) 600 return self.process_structure_node(invocation) 601 602 def process_print_node(self, n): 603 604 """ 605 Process the given print node 'n' as an invocation on a stream of the 606 form... 607 608 $print(dest, args, nl) 609 610 The special function name will be translated elsewhere. 611 """ 612 613 nl = isinstance(n, compiler.ast.Printnl) 614 invocation = compiler.ast.CallFunc( 615 compiler.ast.Name("$print"), 616 [n.dest or compiler.ast.Name("None"), 617 compiler.ast.List(list(n.nodes)), 618 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 619 ) 620 return self.process_structure_node(invocation) 621 622 def process_slice_node(self, n, expr=None): 623 624 """ 625 Process the given slice node 'n' as an operator function invocation. 626 """ 627 628 op = n.flags == "OP_ASSIGN" and "setslice" or "getslice" 629 invocation = compiler.ast.CallFunc( 630 compiler.ast.Name("$op%s" % op), 631 [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 632 (expr and [expr] or []) 633 ) 634 return self.process_structure_node(invocation) 635 636 def process_sliceobj_node(self, n): 637 638 """ 639 Process the given slice object node 'n' as a slice constructor. 640 """ 641 642 op = "slice" 643 invocation = compiler.ast.CallFunc( 644 compiler.ast.Name("$op%s" % op), 645 n.nodes 646 ) 647 return self.process_structure_node(invocation) 648 649 def process_subscript_node(self, n, expr=None): 650 651 """ 652 Process the given subscript node 'n' as an operator function invocation. 653 """ 654 655 op = n.flags == "OP_ASSIGN" and "setitem" or "getitem" 656 invocation = compiler.ast.CallFunc( 657 compiler.ast.Name("$op%s" % op), 658 [n.expr] + list(n.subs) + (expr and [expr] or []) 659 ) 660 return self.process_structure_node(invocation) 661 662 def process_attribute_chain(self, n): 663 664 """ 665 Process the given attribute access node 'n'. Return a reference 666 describing the expression. 667 """ 668 669 # AssAttr/Getattr are nested with the outermost access being the last 670 # access in any chain. 671 672 self.attrs.insert(0, n.attrname) 673 attrs = self.attrs 674 675 # Break attribute chains where non-access nodes are found. 676 677 if not self.have_access_expression(n): 678 self.reset_attribute_chain() 679 680 # Descend into the expression, extending backwards any existing chain, 681 # or building another for the expression. 682 683 name_ref = self.process_structure_node(n.expr) 684 685 # Restore chain information applying to this node. 686 687 if not self.have_access_expression(n): 688 self.restore_attribute_chain(attrs) 689 690 # Return immediately if the expression was another access and thus a 691 # continuation backwards along the chain. The above processing will 692 # have followed the chain all the way to its conclusion. 693 694 if self.have_access_expression(n): 695 del self.attrs[0] 696 697 return name_ref 698 699 # Attribute chain handling. 700 701 def reset_attribute_chain(self): 702 703 "Reset the attribute chain for a subexpression of an attribute access." 704 705 self.attrs = [] 706 self.chain_assignment.append(self.in_assignment) 707 self.chain_invocation.append(self.in_invocation) 708 self.in_assignment = None 709 self.in_invocation = False 710 711 def restore_attribute_chain(self, attrs): 712 713 "Restore the attribute chain for an attribute access." 714 715 self.attrs = attrs 716 self.in_assignment = self.chain_assignment.pop() 717 self.in_invocation = self.chain_invocation.pop() 718 719 def have_access_expression(self, node): 720 721 "Return whether the expression associated with 'node' is Getattr." 722 723 return isinstance(node.expr, compiler.ast.Getattr) 724 725 def get_name_for_tracking(self, name, path=None): 726 727 """ 728 Return the name to be used for attribute usage observations involving 729 the given 'name' in the current namespace. If 'path' is indicated and 730 the name is being used outside a function, return the path value; 731 otherwise, return a path computed using the current namespace and the 732 given name. 733 734 The intention of this method is to provide a suitably-qualified name 735 that can be tracked across namespaces. Where globals are being 736 referenced in class namespaces, they should be referenced using their 737 path within the module, not using a path within each class. 738 739 It may not be possible to identify a global within a function at the 740 time of inspection (since a global may appear later in a file). 741 Consequently, globals are identified by their local name rather than 742 their module-qualified path. 743 """ 744 745 # For functions, use the appropriate local names. 746 747 if self.in_function: 748 return name 749 750 # For static namespaces, use the given qualified name. 751 752 elif path: 753 return path 754 755 # Otherwise, establish a name in the current namespace. 756 757 else: 758 return self.get_object_path(name) 759 760 def get_path_for_access(self): 761 762 "Outside functions, register accesses at the module level." 763 764 if not self.in_function: 765 return self.name 766 else: 767 return self.get_namespace_path() 768 769 def get_module_name(self, node): 770 771 """ 772 Using the given From 'node' in this module, calculate any relative import 773 information, returning a tuple containing a module to import along with any 774 names to import based on the node's name information. 775 776 Where the returned module is given as None, whole module imports should 777 be performed for the returned modules using the returned names. 778 """ 779 780 # Absolute import. 781 782 if node.level == 0: 783 return node.modname, node.names 784 785 # Relative to an ancestor of this module. 786 787 else: 788 path = self.name.split(".") 789 level = node.level 790 791 # Relative imports treat package roots as submodules. 792 793 if split(self.filename)[-1] == "__init__.py": 794 level -= 1 795 796 if level > len(path): 797 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 798 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 799 800 basename = ".".join(path[:len(path)-level]) 801 802 # Name imports from a module. 803 804 if node.modname: 805 return "%s.%s" % (basename, node.modname), node.names 806 807 # Relative whole module imports. 808 809 else: 810 return basename, node.names 811 812 def get_argnames(args): 813 814 """ 815 Return a list of all names provided by 'args'. Since tuples may be 816 employed, the arguments are traversed depth-first. 817 """ 818 819 l = [] 820 for arg in args: 821 if isinstance(arg, tuple): 822 l += get_argnames(arg) 823 else: 824 l.append(arg) 825 return l 826 827 def get_names_from_nodes(nodes): 828 829 """ 830 Return the names employed in the given 'nodes' along with the number of 831 nodes excluding sequences. 832 """ 833 834 names = set() 835 count = 0 836 837 for node in nodes: 838 839 # Add names and count them. 840 841 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 842 names.add(node.name) 843 count += 1 844 845 # Add names from sequences and incorporate their counts. 846 847 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 848 compiler.ast.List, compiler.ast.Set, 849 compiler.ast.Tuple)): 850 _names, _count = get_names_from_nodes(node.nodes) 851 names.update(_names) 852 count += _count 853 854 # Count non-name, non-sequence nodes. 855 856 else: 857 count += 1 858 859 return names, count 860 861 # Result classes. 862 863 class InstructionSequence: 864 865 "A generic sequence of instructions." 866 867 def __init__(self, instructions): 868 self.instructions = instructions 869 870 def get_value_instruction(self): 871 return self.instructions[-1] 872 873 def get_init_instructions(self): 874 return self.instructions[:-1] 875 876 # Dictionary utilities. 877 878 def init_item(d, key, fn): 879 880 """ 881 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 882 value where no entry already exists. 883 """ 884 885 if not d.has_key(key): 886 d[key] = fn() 887 return d[key] 888 889 def dict_for_keys(d, keys): 890 891 "Return a new dictionary containing entries from 'd' for the given 'keys'." 892 893 nd = {} 894 for key in keys: 895 if d.has_key(key): 896 nd[key] = d[key] 897 return nd 898 899 def make_key(s): 900 901 "Make sequence 's' into a tuple-based key, first sorting its contents." 902 903 l = list(s) 904 l.sort() 905 return tuple(l) 906 907 def add_counter_item(d, key): 908 909 """ 910 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 911 maintaining a mapping of keys to their order of insertion. 912 """ 913 914 if not d.has_key(key): 915 d[key] = len(d.keys()) 916 return d[key] 917 918 def remove_items(d1, d2): 919 920 "Remove from 'd1' all items from 'd2'." 921 922 for key in d2.keys(): 923 if d1.has_key(key): 924 del d1[key] 925 926 # Set utilities. 927 928 def first(s): 929 return list(s)[0] 930 931 def same(s1, s2): 932 return set(s1) == set(s2) 933 934 # General input/output. 935 936 def readfile(filename): 937 938 "Return the contents of 'filename'." 939 940 f = open(filename) 941 try: 942 return f.read() 943 finally: 944 f.close() 945 946 def writefile(filename, s): 947 948 "Write to 'filename' the string 's'." 949 950 f = open(filename, "w") 951 try: 952 f.write(s) 953 finally: 954 f.close() 955 956 # General encoding. 957 958 def sorted_output(x): 959 960 "Sort sequence 'x' and return a string with commas separating the values." 961 962 x = map(str, x) 963 x.sort() 964 return ", ".join(x) 965 966 def get_string_details(s, encoding): 967 968 """ 969 Determine whether 's' represents a Unicode string or a byte string, using 970 'encoding' to interpret byte sequences. The contents of 's' is the full 971 literal representation including prefix and quotes. 972 973 Find and convert Unicode values starting with <backslash>u or <backslash>U, 974 and byte or Unicode values starting with <backslash><octal digit> or 975 <backslash>x. 976 977 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 978 to be considered as Unicode values. Otherwise, they produce byte values and 979 cause unprefixed strings to be considered as byte strings. 980 981 Literals prefixed with "r" do not have their backslash-encoded values 982 converted unless also prefixed with "u", in which case only the above value 983 formats are converted, not any of the other special sequences for things 984 like newlines. 985 986 Return the encoded literal value, type name, and original encoding as a 987 tuple. 988 """ 989 990 l = [] 991 992 # Identify the quote character and use it to identify the prefix. 993 994 quote_type = s[-1] 995 prefix_end = s.find(quote_type) 996 prefix = s[:prefix_end].lower() 997 998 if prefix not in ("", "b", "br", "r", "u", "ur"): 999 raise ValueError, "String literal does not have a supported prefix: %s" % s 1000 1001 if "b" in prefix: 1002 typename = "str" 1003 else: 1004 typename = "unicode" 1005 1006 # Identify triple quotes or single quotes. 1007 1008 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1009 quote = s[prefix_end:prefix_end+3] 1010 current = prefix_end + 3 1011 end = len(s) - 3 1012 else: 1013 quote = s[prefix_end] 1014 current = prefix_end + 1 1015 end = len(s) - 1 1016 1017 # Conversions of some quoted values. 1018 1019 searches = { 1020 "u" : (6, 16), 1021 "U" : (10, 16), 1022 "x" : (4, 16), 1023 } 1024 1025 octal_digits = map(str, range(0, 8)) 1026 1027 # Translations of some quoted values. 1028 1029 escaped = { 1030 "\\" : "\\", "'" : "'", '"' : '"', 1031 "a" : "\a", "b" : "\b", "f" : "\f", 1032 "n" : "\n", "r" : "\r", "t" : "\t", 1033 } 1034 1035 while current < end: 1036 1037 # Look for quoted values. 1038 1039 index = s.find("\\", current) 1040 if index == -1 or index + 1 == end: 1041 l.append(s[current:end]) 1042 break 1043 1044 # Add the preceding text. 1045 1046 l.append(s[current:index]) 1047 1048 # Handle quoted text. 1049 1050 term = s[index+1] 1051 1052 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1053 # produce Unicode values. 1054 1055 if typename == "unicode" and ( 1056 term in ("u", "U") or 1057 "u" in prefix and (term == "x" or term in octal_digits)): 1058 1059 needed, base = searches.get(term, (4, 8)) 1060 value = convert_quoted_value(s, index, needed, end, base, unichr) 1061 l.append(value) 1062 current = index + needed 1063 1064 # Add raw byte values, changing the string type. 1065 1066 elif "r" not in prefix and ( 1067 term == "x" or term in octal_digits): 1068 1069 needed, base = searches.get(term, (4, 8)) 1070 value = convert_quoted_value(s, index, needed, end, base, chr) 1071 l.append(value) 1072 typename = "str" 1073 current = index + needed 1074 1075 # Add other escaped values. 1076 1077 elif "r" not in prefix and escaped.has_key(term): 1078 l.append(escaped[term]) 1079 current = index + 2 1080 1081 # Add other text as found. 1082 1083 else: 1084 l.append(s[index:index+2]) 1085 current = index + 2 1086 1087 # For byte string values, convert any Unicode values to the original 1088 # encoding. 1089 1090 if typename == "str": 1091 out = [] 1092 for value in l: 1093 if isinstance(value, unicode): 1094 out.append(value.encode(encoding)) 1095 else: 1096 out.append(value) 1097 out = "".join(out) 1098 1099 # For Unicode values, convert byte sequences to Unicode. 1100 1101 else: 1102 out = [] 1103 for value in l: 1104 if isinstance(value, unicode): 1105 out.append(value) 1106 else: 1107 out.append(unicode(value, encoding)) 1108 out = "".join(out).encode("utf-8") 1109 1110 return out, typename, encoding 1111 1112 def convert_quoted_value(s, index, needed, end, base, fn): 1113 1114 """ 1115 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1116 positions, and with the given 'end' indicating the first position after the 1117 end of the actual string content. 1118 1119 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1120 to convert the value to an appropriate type. 1121 """ 1122 1123 s = s[index:min(index+needed, end)] 1124 1125 # Not a complete occurrence. 1126 1127 if len(s) < needed: 1128 return s 1129 1130 # Test for a well-formed value. 1131 1132 try: 1133 first = base == 8 and 1 or 2 1134 value = int(s[first:needed], base) 1135 except ValueError: 1136 return s 1137 else: 1138 return fn(value) 1139 1140 # Attribute chain decoding. 1141 1142 def get_attrnames(attrnames): 1143 1144 """ 1145 Split the qualified attribute chain 'attrnames' into its components, 1146 handling special attributes starting with "#" that indicate type 1147 conformance. 1148 """ 1149 1150 if attrnames.startswith("#"): 1151 return [attrnames] 1152 else: 1153 return attrnames.split(".") 1154 1155 def get_attrname_from_location(location): 1156 1157 """ 1158 Extract the first attribute from the attribute names employed in a 1159 'location'. 1160 """ 1161 1162 path, name, attrnames, access = location 1163 if not attrnames: 1164 return attrnames 1165 return get_attrnames(attrnames)[0] 1166 1167 def get_name_path(path, name): 1168 1169 "Return a suitable qualified name from the given 'path' and 'name'." 1170 1171 if "." in name: 1172 return name 1173 else: 1174 return "%s.%s" % (path, name) 1175 1176 # Usage-related functions. 1177 1178 def get_types_for_usage(attrnames, objects): 1179 1180 """ 1181 Identify the types that can support the given 'attrnames', using the 1182 given 'objects' as the catalogue of type details. 1183 """ 1184 1185 types = [] 1186 for name, _attrnames in objects.items(): 1187 if set(attrnames).issubset(_attrnames): 1188 types.append(name) 1189 return types 1190 1191 def get_invoked_attributes(usage): 1192 1193 "Obtain invoked attribute from the given 'usage'." 1194 1195 invoked = [] 1196 if usage: 1197 for attrname, invocation, assignment in usage: 1198 if invocation: 1199 invoked.append(attrname) 1200 return invoked 1201 1202 def get_assigned_attributes(usage): 1203 1204 "Obtain assigned attribute from the given 'usage'." 1205 1206 assigned = [] 1207 if usage: 1208 for attrname, invocation, assignment in usage: 1209 if assignment: 1210 assigned.append(attrname) 1211 return assigned 1212 1213 # Type and module functions. 1214 1215 def get_builtin_module(name): 1216 1217 "Return the module name containing the given type 'name'." 1218 1219 # NOTE: This makes assumptions about the __builtins__ structure. 1220 1221 if name == "string": 1222 return "str" 1223 elif name == "utf8string": 1224 return "unicode" 1225 elif name == "NoneType": 1226 return "none" 1227 else: 1228 return name 1229 1230 def get_builtin_type(name): 1231 1232 "Return the type name provided by the given Python value 'name'." 1233 1234 if name == "str": 1235 return "string" 1236 elif name == "unicode": 1237 return "utf8string" 1238 else: 1239 return name 1240 1241 # Useful data. 1242 1243 predefined_constants = "False", "None", "NotImplemented", "True" 1244 1245 operator_functions = { 1246 1247 # Fundamental operations. 1248 1249 "is" : "is_", 1250 "is not" : "is_not", 1251 1252 # Binary operations. 1253 1254 "in" : "in_", 1255 "not in" : "not_in", 1256 "Add" : "add", 1257 "Bitand" : "and_", 1258 "Bitor" : "or_", 1259 "Bitxor" : "xor", 1260 "Div" : "div", 1261 "FloorDiv" : "floordiv", 1262 "LeftShift" : "lshift", 1263 "Mod" : "mod", 1264 "Mul" : "mul", 1265 "Power" : "pow", 1266 "RightShift" : "rshift", 1267 "Sub" : "sub", 1268 1269 # Unary operations. 1270 1271 "Invert" : "invert", 1272 "UnaryAdd" : "pos", 1273 "UnarySub" : "neg", 1274 1275 # Augmented assignment. 1276 1277 "+=" : "iadd", 1278 "-=" : "isub", 1279 "*=" : "imul", 1280 "/=" : "idiv", 1281 "//=" : "ifloordiv", 1282 "%=" : "imod", 1283 "**=" : "ipow", 1284 "<<=" : "ilshift", 1285 ">>=" : "irshift", 1286 "&=" : "iand", 1287 "^=" : "ixor", 1288 "|=" : "ior", 1289 1290 # Comparisons. 1291 1292 "==" : "eq", 1293 "!=" : "ne", 1294 "<" : "lt", 1295 "<=" : "le", 1296 ">=" : "ge", 1297 ">" : "gt", 1298 } 1299 1300 # vim: tabstop=4 expandtab shiftwidth=4