1 #!/usr/bin/env python 2 3 """ 4 Common functions. 5 6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from compiler.transformer import Transformer 24 from errors import InspectError 25 from os import listdir, makedirs, remove 26 from os.path import exists, getmtime, isdir, join, split 27 from results import ConstantValueRef, LiteralSequenceRef, NameRef 28 import compiler.ast 29 30 class CommonOutput: 31 32 "Common output functionality." 33 34 def check_output(self, options=None): 35 36 "Check the existing output and remove it if irrelevant." 37 38 if not exists(self.output): 39 makedirs(self.output) 40 41 details = self.importer.get_cache_details() 42 recorded_details = self.get_output_details() 43 44 # Combine cache details with any options. 45 46 full_details = options and (details + " " + options) or details 47 48 if recorded_details != full_details: 49 self.remove_output() 50 51 writefile(self.get_output_details_filename(), full_details) 52 53 def get_output_details_filename(self): 54 55 "Return the output details filename." 56 57 return join(self.output, "$details") 58 59 def get_output_details(self): 60 61 "Return details of the existing output." 62 63 details_filename = self.get_output_details_filename() 64 65 if not exists(details_filename): 66 return None 67 else: 68 return readfile(details_filename) 69 70 def remove_output(self, dirname=None): 71 72 "Remove the output." 73 74 dirname = dirname or self.output 75 76 for filename in listdir(dirname): 77 path = join(dirname, filename) 78 if isdir(path): 79 self.remove_output(path) 80 else: 81 remove(path) 82 83 def copy(source, target, only_if_newer=True): 84 85 "Copy a text file from 'source' to 'target'." 86 87 if isdir(target): 88 target = join(target, split(source)[-1]) 89 90 if only_if_newer and not is_newer(source, target): 91 return 92 93 infile = open(source) 94 outfile = open(target, "w") 95 96 try: 97 outfile.write(infile.read()) 98 finally: 99 outfile.close() 100 infile.close() 101 102 def is_newer(source, target): 103 104 "Return whether 'source' is newer than 'target'." 105 106 if exists(target): 107 target_mtime = getmtime(target) 108 source_mtime = getmtime(source) 109 return source_mtime > target_mtime 110 111 return True 112 113 class CommonModule: 114 115 "A common module representation." 116 117 def __init__(self, name, importer): 118 119 """ 120 Initialise this module with the given 'name' and an 'importer' which is 121 used to provide access to other modules when required. 122 """ 123 124 self.name = name 125 self.importer = importer 126 self.filename = None 127 128 # Inspection-related attributes. 129 130 self.astnode = None 131 self.encoding = None 132 self.temp = {} 133 self.lambdas = {} 134 135 # Constants, literals and values. 136 137 self.constants = {} 138 self.constant_values = {} 139 self.literals = {} 140 self.literal_types = {} 141 142 # Nested namespaces. 143 144 self.namespace_path = [] 145 self.in_function = False 146 147 # Retain the assignment value expression and track invocations. 148 149 self.in_assignment = None 150 self.in_invocation = None 151 152 # Attribute chain state management. 153 154 self.attrs = [] 155 self.chain_assignment = [] 156 self.chain_invocation = [] 157 158 def __repr__(self): 159 return "CommonModule(%r, %r)" % (self.name, self.importer) 160 161 def parse_file(self, filename): 162 163 "Parse the file with the given 'filename', initialising attributes." 164 165 self.filename = filename 166 167 # Use the Transformer directly to obtain encoding information. 168 169 t = Transformer() 170 f = open(filename) 171 172 try: 173 self.astnode = t.parsesuite(f.read() + "\n") 174 self.encoding = t.encoding 175 finally: 176 f.close() 177 178 # Module-relative naming. 179 180 def get_global_path(self, name): 181 return "%s.%s" % (self.name, name) 182 183 def get_namespace_path(self): 184 return ".".join([self.name] + self.namespace_path) 185 186 def get_object_path(self, name): 187 return ".".join([self.name] + self.namespace_path + [name]) 188 189 def get_parent_path(self): 190 return ".".join([self.name] + self.namespace_path[:-1]) 191 192 # Namespace management. 193 194 def enter_namespace(self, name): 195 196 "Enter the namespace having the given 'name'." 197 198 self.namespace_path.append(name) 199 200 def exit_namespace(self): 201 202 "Exit the current namespace." 203 204 self.namespace_path.pop() 205 206 # Constant reference naming. 207 208 def get_constant_name(self, value, value_type, encoding=None): 209 210 """ 211 Add a new constant to the current namespace for 'value' with 212 'value_type'. 213 """ 214 215 path = self.get_namespace_path() 216 init_item(self.constants, path, dict) 217 return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding)) 218 219 # Literal reference naming. 220 221 def get_literal_name(self): 222 223 "Add a new literal to the current namespace." 224 225 path = self.get_namespace_path() 226 init_item(self.literals, path, lambda: 0) 227 return "$C%d" % self.literals[path] 228 229 def next_literal(self): 230 self.literals[self.get_namespace_path()] += 1 231 232 # Temporary variable naming. 233 234 def get_temporary_name(self): 235 path = self.get_namespace_path() 236 init_item(self.temp, path, lambda: 0) 237 return "$t%d" % self.temp[path] 238 239 def next_temporary(self): 240 self.temp[self.get_namespace_path()] += 1 241 242 # Arbitrary function naming. 243 244 def get_lambda_name(self): 245 path = self.get_namespace_path() 246 init_item(self.lambdas, path, lambda: 0) 247 name = "$l%d" % self.lambdas[path] 248 self.lambdas[path] += 1 249 return name 250 251 def reset_lambdas(self): 252 self.lambdas = {} 253 254 # Constant and literal recording. 255 256 def get_constant_value(self, value, literals=None): 257 258 """ 259 Encode the 'value' if appropriate, returning a value, a typename and any 260 encoding. 261 """ 262 263 if isinstance(value, unicode): 264 return value.encode("utf-8"), "unicode", self.encoding 265 266 # Attempt to convert plain strings to text. 267 268 elif isinstance(value, str) and self.encoding: 269 try: 270 return get_string_details(literals, self.encoding) 271 except UnicodeDecodeError: 272 pass 273 274 return value, value.__class__.__name__, None 275 276 def get_constant_reference(self, ref, value, encoding=None): 277 278 """ 279 Return a constant reference for the given 'ref' type and 'value', with 280 the optional 'encoding' applying to text values. 281 """ 282 283 constant_name = self.get_constant_name(value, ref.get_origin(), encoding) 284 285 # Return a reference for the constant. 286 287 objpath = self.get_object_path(constant_name) 288 name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value) 289 290 # Record the value and type for the constant. 291 292 self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding) 293 return name_ref 294 295 def reserve_constant(self, objpath, value, origin, encoding=None): 296 297 """ 298 Reserve a constant within 'objpath' with the given 'value' and having a 299 type with the given 'origin', with the optional 'encoding' applying to 300 text values. 301 """ 302 303 constant_name = self.get_constant_name(value, origin) 304 objpath = self.get_object_path(constant_name) 305 self._reserve_constant(objpath, value, origin, encoding) 306 307 def _reserve_constant(self, objpath, value, origin, encoding): 308 309 """ 310 Store a constant for 'objpath' with the given 'value' and 'origin', with 311 the optional 'encoding' applying to text values. 312 """ 313 314 self.constant_values[objpath] = value, origin, encoding 315 316 def get_literal_reference(self, name, ref, items, cls): 317 318 """ 319 Return a literal reference for the given type 'name', literal 'ref', 320 node 'items' and employing the given 'cls' as the class of the returned 321 reference object. 322 """ 323 324 # Construct an invocation using the items as arguments. 325 326 typename = "$L%s" % name 327 328 invocation = compiler.ast.CallFunc( 329 compiler.ast.Name(typename), 330 items 331 ) 332 333 # Get a name for the actual literal. 334 335 instname = self.get_literal_name() 336 self.next_literal() 337 338 # Record the type for the literal. 339 340 objpath = self.get_object_path(instname) 341 self.literal_types[objpath] = ref.get_origin() 342 343 # Return a wrapper for the invocation exposing the items. 344 345 return cls( 346 instname, 347 ref.instance_of(), 348 self.process_structure_node(invocation), 349 invocation.args 350 ) 351 352 # Node handling. 353 354 def process_structure(self, node): 355 356 """ 357 Within the given 'node', process the program structure. 358 359 During inspection, this will process global declarations, adjusting the 360 module namespace, and import statements, building a module dependency 361 hierarchy. 362 363 During translation, this will consult deduced program information and 364 output translated code. 365 """ 366 367 l = [] 368 for n in node.getChildNodes(): 369 l.append(self.process_structure_node(n)) 370 return l 371 372 def process_augassign_node(self, n): 373 374 "Process the given augmented assignment node 'n'." 375 376 op = operator_functions[n.op] 377 378 if isinstance(n.node, compiler.ast.Getattr): 379 target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN") 380 elif isinstance(n.node, compiler.ast.Name): 381 target = compiler.ast.AssName(n.node.name, "OP_ASSIGN") 382 else: 383 target = n.node 384 385 assignment = compiler.ast.Assign( 386 [target], 387 compiler.ast.CallFunc( 388 compiler.ast.Name("$op%s" % op), 389 [n.node, n.expr])) 390 391 return self.process_structure_node(assignment) 392 393 def process_assignment_for_object(self, original_name, source): 394 395 """ 396 Return an assignment operation making 'original_name' refer to the given 397 'source'. 398 """ 399 400 assignment = compiler.ast.Assign( 401 [compiler.ast.AssName(original_name, "OP_ASSIGN")], 402 source 403 ) 404 405 return self.process_structure_node(assignment) 406 407 def process_assignment_node_items(self, n, expr): 408 409 """ 410 Process the given assignment node 'n' whose children are to be assigned 411 items of 'expr'. 412 """ 413 414 name_ref = self.process_structure_node(expr) 415 416 # Either unpack the items and present them directly to each assignment 417 # node. 418 419 if isinstance(name_ref, LiteralSequenceRef) and \ 420 self.process_literal_sequence_items(n, name_ref): 421 422 pass 423 424 # Or have the assignment nodes access each item via the sequence API. 425 426 else: 427 self.process_assignment_node_items_by_position(n, expr, name_ref) 428 429 def process_assignment_node_items_by_position(self, n, expr, name_ref): 430 431 """ 432 Process the given sequence assignment node 'n', converting the node to 433 the separate assignment of each target using positional access on a 434 temporary variable representing the sequence. Use 'expr' as the assigned 435 value and 'name_ref' as the reference providing any existing temporary 436 variable. 437 """ 438 439 assignments = [] 440 441 # Employ existing names to access the sequence. 442 # Literal sequences do not provide names of accessible objects. 443 444 if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef): 445 temp = name_ref.name 446 447 # For other expressions, create a temporary name to reference the items. 448 449 else: 450 temp = self.get_temporary_name() 451 self.next_temporary() 452 453 assignments.append( 454 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr) 455 ) 456 457 # Assign the items to the target nodes. 458 459 for i, node in enumerate(n.nodes): 460 assignments.append( 461 compiler.ast.Assign([node], compiler.ast.Subscript( 462 compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))])) 463 ) 464 465 return self.process_structure_node(compiler.ast.Stmt(assignments)) 466 467 def process_literal_sequence_items(self, n, name_ref): 468 469 """ 470 Process the given assignment node 'n', obtaining from the given 471 'name_ref' the items to be assigned to the assignment targets. 472 473 Return whether this method was able to process the assignment node as 474 a sequence of direct assignments. 475 """ 476 477 if len(n.nodes) == len(name_ref.items): 478 assigned_names, count = get_names_from_nodes(n.nodes) 479 accessed_names, _count = get_names_from_nodes(name_ref.items) 480 481 # Only assign directly between items if all assigned names are 482 # plain names (not attribute assignments), and if the assigned names 483 # do not appear in the accessed names. 484 485 if len(assigned_names) == count and \ 486 not assigned_names.intersection(accessed_names): 487 488 for node, item in zip(n.nodes, name_ref.items): 489 self.process_assignment_node(node, item) 490 491 return True 492 493 # Otherwise, use the position-based mechanism to obtain values. 494 495 else: 496 return False 497 else: 498 raise InspectError("In %s, item assignment needing %d items is given %d items." % ( 499 self.get_namespace_path(), len(n.nodes), len(name_ref.items))) 500 501 def process_compare_node(self, n): 502 503 """ 504 Process the given comparison node 'n', converting an operator sequence 505 from... 506 507 <expr1> <op1> <expr2> <op2> <expr3> 508 509 ...to... 510 511 <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>) 512 """ 513 514 invocations = [] 515 last = n.expr 516 517 for op, op_node in n.ops: 518 op = operator_functions.get(op) 519 520 invocations.append(compiler.ast.CallFunc( 521 compiler.ast.Name("$op%s" % op), 522 [last, op_node])) 523 524 last = op_node 525 526 if len(invocations) > 1: 527 result = compiler.ast.And(invocations) 528 else: 529 result = invocations[0] 530 531 return self.process_structure_node(result) 532 533 def process_dict_node(self, node): 534 535 """ 536 Process the given dictionary 'node', returning a list of (key, value) 537 tuples. 538 """ 539 540 l = [] 541 for key, value in node.items: 542 l.append(( 543 self.process_structure_node(key), 544 self.process_structure_node(value))) 545 return l 546 547 def process_for_node(self, n): 548 549 """ 550 Generate attribute accesses for {n.list}.__iter__ and the next method on 551 the iterator, producing a replacement node for the original. 552 """ 553 554 t0 = self.get_temporary_name() 555 self.next_temporary() 556 t1 = self.get_temporary_name() 557 self.next_temporary() 558 i0 = self.get_temporary_name() 559 self.next_temporary() 560 561 node = compiler.ast.Stmt([ 562 563 # <t0> = {n.list} 564 # <t1> = <t0>.__iter__() 565 # <next> = <t1>.next 566 567 compiler.ast.Assign( 568 [compiler.ast.AssName(t0, "OP_ASSIGN")], 569 n.list), 570 571 compiler.ast.Assign( 572 [compiler.ast.AssName(t1, "OP_ASSIGN")], 573 compiler.ast.CallFunc( 574 compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"), 575 [])), 576 577 compiler.ast.Assign( 578 [compiler.ast.AssName(i0, "OP_ASSIGN")], 579 compiler.ast.Getattr(compiler.ast.Name(t1), "next")), 580 581 # try: 582 # while True: 583 # <var>... = <next>() 584 # ... 585 # except StopIteration: 586 # pass 587 588 compiler.ast.TryExcept( 589 compiler.ast.While( 590 compiler.ast.Name("True"), 591 compiler.ast.Stmt([ 592 compiler.ast.Assign( 593 [n.assign], 594 compiler.ast.CallFunc( 595 compiler.ast.Name(i0), 596 [] 597 )), 598 n.body]), 599 None), 600 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))], 601 None) 602 ]) 603 604 self.process_structure_node(node) 605 606 def process_literal_sequence_node(self, n, name, ref, cls): 607 608 """ 609 Process the given literal sequence node 'n' as a function invocation, 610 with 'name' indicating the type of the sequence, and 'ref' being a 611 reference to the type. The 'cls' is used to instantiate a suitable name 612 reference. 613 """ 614 615 if name == "dict": 616 items = [] 617 for key, value in n.items: 618 items.append(compiler.ast.Tuple([key, value])) 619 else: # name in ("list", "tuple"): 620 items = n.nodes 621 622 return self.get_literal_reference(name, ref, items, cls) 623 624 def process_operator_node(self, n): 625 626 """ 627 Process the given operator node 'n' as an operator function invocation. 628 """ 629 630 op = operator_functions[n.__class__.__name__] 631 invocation = compiler.ast.CallFunc( 632 compiler.ast.Name("$op%s" % op), 633 list(n.getChildNodes()) 634 ) 635 return self.process_structure_node(invocation) 636 637 def process_print_node(self, n): 638 639 """ 640 Process the given print node 'n' as an invocation on a stream of the 641 form... 642 643 $print(dest, args, nl) 644 645 The special function name will be translated elsewhere. 646 """ 647 648 nl = isinstance(n, compiler.ast.Printnl) 649 invocation = compiler.ast.CallFunc( 650 compiler.ast.Name("$print"), 651 [n.dest or compiler.ast.Name("None"), 652 compiler.ast.List(list(n.nodes)), 653 nl and compiler.ast.Name("True") or compiler.ast.Name("False")] 654 ) 655 return self.process_structure_node(invocation) 656 657 def process_slice_node(self, n, expr=None): 658 659 """ 660 Process the given slice node 'n' as an operator function invocation. 661 """ 662 663 if n.flags == "OP_ASSIGN": op = "setslice" 664 elif n.flags == "OP_DELETE": op = "delslice" 665 else: op = "getslice" 666 667 invocation = compiler.ast.CallFunc( 668 compiler.ast.Name("$op%s" % op), 669 [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] + 670 (expr and [expr] or []) 671 ) 672 673 # Fix parse tree structure. 674 675 if op == "delslice": 676 invocation = compiler.ast.Discard(invocation) 677 678 return self.process_structure_node(invocation) 679 680 def process_sliceobj_node(self, n): 681 682 """ 683 Process the given slice object node 'n' as a slice constructor. 684 """ 685 686 op = "slice" 687 invocation = compiler.ast.CallFunc( 688 compiler.ast.Name("$op%s" % op), 689 n.nodes 690 ) 691 return self.process_structure_node(invocation) 692 693 def process_subscript_node(self, n, expr=None): 694 695 """ 696 Process the given subscript node 'n' as an operator function invocation. 697 """ 698 699 if n.flags == "OP_ASSIGN": op = "setitem" 700 elif n.flags == "OP_DELETE": op = "delitem" 701 else: op = "getitem" 702 703 invocation = compiler.ast.CallFunc( 704 compiler.ast.Name("$op%s" % op), 705 [n.expr] + list(n.subs) + (expr and [expr] or []) 706 ) 707 708 # Fix parse tree structure. 709 710 if op == "delitem": 711 invocation = compiler.ast.Discard(invocation) 712 713 return self.process_structure_node(invocation) 714 715 def process_attribute_chain(self, n): 716 717 """ 718 Process the given attribute access node 'n'. Return a reference 719 describing the expression. 720 """ 721 722 # AssAttr/Getattr are nested with the outermost access being the last 723 # access in any chain. 724 725 self.attrs.insert(0, n.attrname) 726 attrs = self.attrs 727 728 # Break attribute chains where non-access nodes are found. 729 730 if not self.have_access_expression(n): 731 self.reset_attribute_chain() 732 733 # Descend into the expression, extending backwards any existing chain, 734 # or building another for the expression. 735 736 name_ref = self.process_structure_node(n.expr) 737 738 # Restore chain information applying to this node. 739 740 if not self.have_access_expression(n): 741 self.restore_attribute_chain(attrs) 742 743 # Return immediately if the expression was another access and thus a 744 # continuation backwards along the chain. The above processing will 745 # have followed the chain all the way to its conclusion. 746 747 if self.have_access_expression(n): 748 del self.attrs[0] 749 750 return name_ref 751 752 # Attribute chain handling. 753 754 def reset_attribute_chain(self): 755 756 "Reset the attribute chain for a subexpression of an attribute access." 757 758 self.attrs = [] 759 self.chain_assignment.append(self.in_assignment) 760 self.chain_invocation.append(self.in_invocation) 761 self.in_assignment = None 762 self.in_invocation = None 763 764 def restore_attribute_chain(self, attrs): 765 766 "Restore the attribute chain for an attribute access." 767 768 self.attrs = attrs 769 self.in_assignment = self.chain_assignment.pop() 770 self.in_invocation = self.chain_invocation.pop() 771 772 def have_access_expression(self, node): 773 774 "Return whether the expression associated with 'node' is Getattr." 775 776 return isinstance(node.expr, compiler.ast.Getattr) 777 778 def get_name_for_tracking(self, name, name_ref=None, is_global=False): 779 780 """ 781 Return the name to be used for attribute usage observations involving 782 the given 'name' in the current namespace. 783 784 If the name is being used outside a function, and if 'name_ref' is 785 given and indicates a global or if 'is_global' is specified as a true 786 value, a path featuring the name in the global namespace is returned. 787 Otherwise, a path computed using the current namespace and the given 788 name is returned. 789 790 The intention of this method is to provide a suitably-qualified name 791 that can be tracked across namespaces. Where globals are being 792 referenced in class namespaces, they should be referenced using their 793 path within the module, not using a path within each class. 794 795 It may not be possible to identify a global within a function at the 796 time of inspection (since a global may appear later in a file). 797 Consequently, globals are identified by their local name rather than 798 their module-qualified path. 799 """ 800 801 # For functions, use the appropriate local names. 802 803 if self.in_function: 804 return name 805 806 # For global names outside functions, use a global name. 807 808 elif is_global or name_ref and name_ref.is_global_name(): 809 return self.get_global_path(name) 810 811 # Otherwise, establish a name in the current namespace. 812 813 else: 814 return self.get_object_path(name) 815 816 def get_path_for_access(self): 817 818 "Outside functions, register accesses at the module level." 819 820 if not self.in_function: 821 return self.name 822 else: 823 return self.get_namespace_path() 824 825 def get_module_name(self, node): 826 827 """ 828 Using the given From 'node' in this module, calculate any relative import 829 information, returning a tuple containing a module to import along with any 830 names to import based on the node's name information. 831 832 Where the returned module is given as None, whole module imports should 833 be performed for the returned modules using the returned names. 834 """ 835 836 # Absolute import. 837 838 if node.level == 0: 839 return node.modname, node.names 840 841 # Relative to an ancestor of this module. 842 843 else: 844 path = self.name.split(".") 845 level = node.level 846 847 # Relative imports treat package roots as submodules. 848 849 if split(self.filename)[-1] == "__init__.py": 850 level -= 1 851 852 if level > len(path): 853 raise InspectError("Relative import %r involves too many levels up from module %r" % ( 854 ("%s%s" % ("." * node.level, node.modname or "")), self.name)) 855 856 basename = ".".join(path[:len(path)-level]) 857 858 # Name imports from a module. 859 860 if node.modname: 861 return "%s.%s" % (basename, node.modname), node.names 862 863 # Relative whole module imports. 864 865 else: 866 return basename, node.names 867 868 def get_argnames(args): 869 870 """ 871 Return a list of all names provided by 'args'. Since tuples may be 872 employed, the arguments are traversed depth-first. 873 """ 874 875 l = [] 876 for arg in args: 877 if isinstance(arg, tuple): 878 l += get_argnames(arg) 879 else: 880 l.append(arg) 881 return l 882 883 def get_names_from_nodes(nodes): 884 885 """ 886 Return the names employed in the given 'nodes' along with the number of 887 nodes excluding sequences. 888 """ 889 890 names = set() 891 count = 0 892 893 for node in nodes: 894 895 # Add names and count them. 896 897 if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)): 898 names.add(node.name) 899 count += 1 900 901 # Add names from sequences and incorporate their counts. 902 903 elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple, 904 compiler.ast.List, compiler.ast.Set, 905 compiler.ast.Tuple)): 906 _names, _count = get_names_from_nodes(node.nodes) 907 names.update(_names) 908 count += _count 909 910 # Count non-name, non-sequence nodes. 911 912 else: 913 count += 1 914 915 return names, count 916 917 # Result classes. 918 919 class InstructionSequence: 920 921 "A generic sequence of instructions." 922 923 def __init__(self, instructions): 924 self.instructions = instructions 925 926 def get_value_instruction(self): 927 return self.instructions[-1] 928 929 def get_init_instructions(self): 930 return self.instructions[:-1] 931 932 # Dictionary utilities. 933 934 def init_item(d, key, fn): 935 936 """ 937 Add to 'd' an entry for 'key' using the callable 'fn' to make an initial 938 value where no entry already exists. 939 """ 940 941 if not d.has_key(key): 942 d[key] = fn() 943 return d[key] 944 945 def dict_for_keys(d, keys): 946 947 "Return a new dictionary containing entries from 'd' for the given 'keys'." 948 949 nd = {} 950 for key in keys: 951 if d.has_key(key): 952 nd[key] = d[key] 953 return nd 954 955 def make_key(s): 956 957 "Make sequence 's' into a tuple-based key, first sorting its contents." 958 959 l = list(s) 960 l.sort() 961 return tuple(l) 962 963 def add_counter_item(d, key): 964 965 """ 966 Make a mapping in 'd' for 'key' to the number of keys added before it, thus 967 maintaining a mapping of keys to their order of insertion. 968 """ 969 970 if not d.has_key(key): 971 d[key] = len(d.keys()) 972 return d[key] 973 974 def remove_items(d1, d2): 975 976 "Remove from 'd1' all items from 'd2'." 977 978 for key in d2.keys(): 979 if d1.has_key(key): 980 del d1[key] 981 982 # Set utilities. 983 984 def first(s): 985 return list(s)[0] 986 987 def same(s1, s2): 988 return set(s1) == set(s2) 989 990 def order_dependencies(all_depends): 991 992 """ 993 Produce a dependency ordering for the 'all_depends' mapping. This mapping 994 has the form "A depends on B, C...". The result will order A, B, C, and so 995 on. 996 """ 997 998 usage = init_reverse_dependencies(all_depends) 999 1000 # Produce an ordering by obtaining exposed items (required by items already 1001 # processed) and putting them at the start of the list. 1002 1003 ordered = [] 1004 1005 while usage: 1006 have_next = False 1007 1008 for key, n in usage.items(): 1009 1010 # Add items needed by no other items to the ordering. 1011 1012 if not n: 1013 remove_dependency(key, all_depends, usage, ordered) 1014 have_next = True 1015 1016 if not have_next: 1017 raise ValueError, usage 1018 1019 return ordered 1020 1021 def order_dependencies_partial(all_depends): 1022 1023 """ 1024 Produce a dependency ordering for the 'all_depends' mapping. This mapping 1025 has the form "A depends on B, C...". The result will order A, B, C, and so 1026 on. Where cycles exist, they will be broken and a partial ordering returned. 1027 """ 1028 1029 usage = init_reverse_dependencies(all_depends) 1030 1031 # Duplicate the dependencies for subsequent modification. 1032 1033 new_depends = {} 1034 for key, values in all_depends.items(): 1035 new_depends[key] = set(values) 1036 1037 all_depends = new_depends 1038 1039 # Produce an ordering by obtaining exposed items (required by items already 1040 # processed) and putting them at the start of the list. 1041 1042 ordered = [] 1043 1044 while usage: 1045 least = None 1046 least_key = None 1047 1048 for key, n in usage.items(): 1049 1050 # Add items needed by no other items to the ordering. 1051 1052 if not n: 1053 remove_dependency(key, all_depends, usage, ordered) 1054 least = 0 1055 1056 # When breaking cycles, note the least used items. 1057 1058 elif least is None or len(n) < least: 1059 least_key = key 1060 least = len(n) 1061 1062 if least: 1063 transfer_dependencies(least_key, all_depends, usage, ordered) 1064 1065 return ordered 1066 1067 def init_reverse_dependencies(all_depends): 1068 1069 """ 1070 From 'all_depends', providing a mapping of the form "A depends on B, C...", 1071 record the reverse dependencies, making a mapping of the form 1072 "B is needed by A", "C is needed by A", and so on. 1073 """ 1074 1075 usage = {} 1076 1077 # Record path-based dependencies. 1078 1079 for key in all_depends.keys(): 1080 usage[key] = set() 1081 1082 for key, depends in all_depends.items(): 1083 for depend in depends: 1084 init_item(usage, depend, set) 1085 usage[depend].add(key) 1086 1087 return usage 1088 1089 def transfer_dependencies(key, all_depends, usage, ordered): 1090 1091 """ 1092 Transfer items needed by 'key' to those items needing 'key', found using 1093 'all_depends', and updating 'usage'. Insert 'key' into the 'ordered' 1094 collection of dependencies. 1095 1096 If "A is needed by X" and "B is needed by A", then transferring items needed 1097 by A will cause "B is needed by X" to be recorded as a consequence. 1098 1099 Transferring items also needs to occur in the reverse mapping, so that 1100 "A needs B" and "X needs A", then the consequence must be recorded as 1101 "X needs B". 1102 """ 1103 1104 ordered.insert(0, key) 1105 1106 needing = usage[key] # A is needed by X 1107 needed = all_depends.get(key) # A needs B 1108 1109 if needing: 1110 for depend in needing: 1111 l = all_depends.get(depend) 1112 if not l: 1113 continue 1114 1115 l.remove(key) # X needs (A) 1116 1117 if needed: 1118 l.update(needed) # X needs B... 1119 1120 # Prevent self references. 1121 1122 if depend in needed: 1123 l.remove(depend) 1124 1125 if needed: 1126 for depend in needed: 1127 l = usage.get(depend) 1128 if not l: 1129 continue 1130 1131 l.remove(key) # B is needed by (A) 1132 l.update(needing) # B is needed by X... 1133 1134 # Prevent self references. 1135 1136 if depend in needing: 1137 l.remove(depend) 1138 1139 if needed: 1140 del all_depends[key] 1141 del usage[key] 1142 1143 def remove_dependency(key, all_depends, usage, ordered): 1144 1145 """ 1146 Remove 'key', found in 'all_depends', from 'usage', inserting it into the 1147 'ordered' collection of dependencies. 1148 1149 Given that 'usage' for a given key A would indicate that "A needs <nothing>" 1150 upon removing A from 'usage', the outcome is that all keys needing A will 1151 have A removed from their 'usage' records. 1152 1153 So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded 1154 as a consequence. 1155 """ 1156 1157 ordered.insert(0, key) 1158 1159 depends = all_depends.get(key) 1160 1161 # Reduce usage of the referenced items. 1162 1163 if depends: 1164 for depend in depends: 1165 usage[depend].remove(key) 1166 1167 del usage[key] 1168 1169 # General input/output. 1170 1171 def readfile(filename): 1172 1173 "Return the contents of 'filename'." 1174 1175 f = open(filename) 1176 try: 1177 return f.read() 1178 finally: 1179 f.close() 1180 1181 def writefile(filename, s): 1182 1183 "Write to 'filename' the string 's'." 1184 1185 f = open(filename, "w") 1186 try: 1187 f.write(s) 1188 finally: 1189 f.close() 1190 1191 # General encoding. 1192 1193 def sorted_output(x): 1194 1195 "Sort sequence 'x' and return a string with commas separating the values." 1196 1197 x = map(str, x) 1198 x.sort() 1199 return ", ".join(x) 1200 1201 def get_string_details(literals, encoding): 1202 1203 """ 1204 Determine whether 'literals' represent Unicode strings or byte strings, 1205 using 'encoding' to reproduce byte sequences. 1206 1207 Each literal is the full program representation including prefix and quotes 1208 recoded by the parser to UTF-8. Thus, any literal found to represent a byte 1209 string needs to be translated back to its original encoding. 1210 1211 Return a single encoded literal value, a type name, and the original 1212 encoding as a tuple. 1213 """ 1214 1215 typename = "unicode" 1216 1217 l = [] 1218 1219 for s in literals: 1220 out, _typename = get_literal_details(s) 1221 if _typename == "str": 1222 typename = "str" 1223 l.append(out) 1224 1225 out = "".join(l) 1226 1227 # For Unicode values, convert to the UTF-8 program representation. 1228 1229 if typename == "unicode": 1230 return out.encode("utf-8"), typename, encoding 1231 1232 # For byte string values, convert back to the original encoding. 1233 1234 else: 1235 return out.encode(encoding), typename, encoding 1236 1237 def get_literal_details(s): 1238 1239 """ 1240 Determine whether 's' represents a Unicode string or a byte string, where 1241 's' contains the full program representation of a literal including prefix 1242 and quotes, recoded by the parser to UTF-8. 1243 1244 Find and convert Unicode values starting with <backslash>u or <backslash>U, 1245 and byte or Unicode values starting with <backslash><octal digit> or 1246 <backslash>x. 1247 1248 Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x 1249 to be considered as Unicode values. Otherwise, they produce byte values and 1250 cause unprefixed strings to be considered as byte strings. 1251 1252 Literals prefixed with "r" do not have their backslash-encoded values 1253 converted unless also prefixed with "u", in which case only the above value 1254 formats are converted, not any of the other special sequences for things 1255 like newlines. 1256 1257 Return the literal value as a Unicode object together with the appropriate 1258 type name in a tuple. 1259 """ 1260 1261 l = [] 1262 1263 # Identify the quote character and use it to identify the prefix. 1264 1265 quote_type = s[-1] 1266 prefix_end = s.find(quote_type) 1267 prefix = s[:prefix_end].lower() 1268 1269 if prefix not in ("", "b", "br", "r", "u", "ur"): 1270 raise ValueError, "String literal does not have a supported prefix: %s" % s 1271 1272 if "b" in prefix: 1273 typename = "str" 1274 else: 1275 typename = "unicode" 1276 1277 # Identify triple quotes or single quotes. 1278 1279 if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type: 1280 quote = s[prefix_end:prefix_end+3] 1281 current = prefix_end + 3 1282 end = len(s) - 3 1283 else: 1284 quote = s[prefix_end] 1285 current = prefix_end + 1 1286 end = len(s) - 1 1287 1288 # Conversions of some quoted values. 1289 1290 searches = { 1291 "u" : (6, 16), 1292 "U" : (10, 16), 1293 "x" : (4, 16), 1294 } 1295 1296 octal_digits = map(str, range(0, 8)) 1297 1298 # Translations of some quoted values. 1299 1300 escaped = { 1301 "\\" : "\\", "'" : "'", '"' : '"', 1302 "a" : "\a", "b" : "\b", "f" : "\f", 1303 "n" : "\n", "r" : "\r", "t" : "\t", 1304 } 1305 1306 while current < end: 1307 1308 # Look for quoted values. 1309 1310 index = s.find("\\", current) 1311 if index == -1 or index + 1 == end: 1312 l.append(s[current:end]) 1313 break 1314 1315 # Add the preceding text. 1316 1317 l.append(s[current:index]) 1318 1319 # Handle quoted text. 1320 1321 term = s[index+1] 1322 1323 # Add Unicode values. Where a string is u-prefixed, even \o and \x 1324 # produce Unicode values. 1325 1326 if typename == "unicode" and ( 1327 term in ("u", "U") or 1328 "u" in prefix and (term == "x" or term in octal_digits)): 1329 1330 needed, base = searches.get(term, (4, 8)) 1331 value = convert_quoted_value(s, index, needed, end, base, unichr) 1332 l.append(value) 1333 current = index + needed 1334 1335 # Add raw byte values, changing the string type. 1336 1337 elif "r" not in prefix and ( 1338 term == "x" or term in octal_digits): 1339 1340 needed, base = searches.get(term, (4, 8)) 1341 value = convert_quoted_value(s, index, needed, end, base, chr) 1342 l.append(value) 1343 typename = "str" 1344 current = index + needed 1345 1346 # Add other escaped values. 1347 1348 elif "r" not in prefix and escaped.has_key(term): 1349 l.append(escaped[term]) 1350 current = index + 2 1351 1352 # Add other text as found. 1353 1354 else: 1355 l.append(s[index:index+2]) 1356 current = index + 2 1357 1358 # Collect the components into a single Unicode object. Since the literal 1359 # text was already in UTF-8 form, interpret plain strings as UTF-8 1360 # sequences. 1361 1362 out = [] 1363 1364 for value in l: 1365 if isinstance(value, unicode): 1366 out.append(value) 1367 else: 1368 out.append(unicode(value, "utf-8")) 1369 1370 return "".join(out), typename 1371 1372 def convert_quoted_value(s, index, needed, end, base, fn): 1373 1374 """ 1375 Interpret a quoted value in 's' at 'index' with the given 'needed' number of 1376 positions, and with the given 'end' indicating the first position after the 1377 end of the actual string content. 1378 1379 Use 'base' as the numerical base when interpreting the value, and use 'fn' 1380 to convert the value to an appropriate type. 1381 """ 1382 1383 s = s[index:min(index+needed, end)] 1384 1385 # Not a complete occurrence. 1386 1387 if len(s) < needed: 1388 return s 1389 1390 # Test for a well-formed value. 1391 1392 try: 1393 first = base == 8 and 1 or 2 1394 value = int(s[first:needed], base) 1395 except ValueError: 1396 return s 1397 else: 1398 return fn(value) 1399 1400 # Attribute chain decoding. 1401 1402 def get_attrnames(attrnames): 1403 1404 """ 1405 Split the qualified attribute chain 'attrnames' into its components, 1406 handling special attributes starting with "#" that indicate type 1407 conformance. 1408 """ 1409 1410 if attrnames.startswith("#"): 1411 return [attrnames] 1412 else: 1413 return attrnames.split(".") 1414 1415 def get_attrname_from_location(location): 1416 1417 """ 1418 Extract the first attribute from the attribute names employed in a 1419 'location'. 1420 """ 1421 1422 path, name, attrnames, access = location 1423 if not attrnames: 1424 return attrnames 1425 return get_attrnames(attrnames)[0] 1426 1427 def get_name_path(path, name): 1428 1429 "Return a suitable qualified name from the given 'path' and 'name'." 1430 1431 if "." in name: 1432 return name 1433 else: 1434 return "%s.%s" % (path, name) 1435 1436 # Usage-related functions. 1437 1438 def get_types_for_usage(attrnames, objects): 1439 1440 """ 1441 Identify the types that can support the given 'attrnames', using the 1442 given 'objects' as the catalogue of type details. 1443 """ 1444 1445 types = [] 1446 for name, _attrnames in objects.items(): 1447 if set(attrnames).issubset(_attrnames): 1448 types.append(name) 1449 return types 1450 1451 def get_invoked_attributes(usage): 1452 1453 "Obtain invoked attribute from the given 'usage'." 1454 1455 invoked = [] 1456 if usage: 1457 for attrname, invocation, assignment in usage: 1458 if invocation: 1459 invoked.append(attrname) 1460 return invoked 1461 1462 def get_assigned_attributes(usage): 1463 1464 "Obtain assigned attribute from the given 'usage'." 1465 1466 assigned = [] 1467 if usage: 1468 for attrname, invocation, assignment in usage: 1469 if assignment: 1470 assigned.append(attrname) 1471 return assigned 1472 1473 # Type and module functions. 1474 # NOTE: This makes assumptions about the __builtins__ structure. 1475 1476 def get_builtin_module(name): 1477 1478 "Return the module name containing the given type 'name'." 1479 1480 if name == "string": 1481 modname = "str" 1482 elif name == "utf8string": 1483 modname = "unicode" 1484 elif name == "NoneType": 1485 modname = "none" 1486 else: 1487 modname = name 1488 1489 return "__builtins__.%s" % modname 1490 1491 def get_builtin_type(name): 1492 1493 "Return the type name provided by the given Python value 'name'." 1494 1495 if name == "str": 1496 return "string" 1497 elif name == "unicode": 1498 return "utf8string" 1499 else: 1500 return name 1501 1502 def get_builtin_class(name): 1503 1504 "Return the full name of the built-in class having the given 'name'." 1505 1506 typename = get_builtin_type(name) 1507 module = get_builtin_module(typename) 1508 return "%s.%s" % (module, typename) 1509 1510 # Useful data. 1511 1512 predefined_constants = "False", "None", "NotImplemented", "True" 1513 1514 operator_functions = { 1515 1516 # Fundamental operations. 1517 1518 "is" : "is_", 1519 "is not" : "is_not", 1520 1521 # Binary operations. 1522 1523 "in" : "in_", 1524 "not in" : "not_in", 1525 "Add" : "add", 1526 "Bitand" : "and_", 1527 "Bitor" : "or_", 1528 "Bitxor" : "xor", 1529 "Div" : "div", 1530 "FloorDiv" : "floordiv", 1531 "LeftShift" : "lshift", 1532 "Mod" : "mod", 1533 "Mul" : "mul", 1534 "Power" : "pow", 1535 "RightShift" : "rshift", 1536 "Sub" : "sub", 1537 1538 # Unary operations. 1539 1540 "Invert" : "invert", 1541 "UnaryAdd" : "pos", 1542 "UnarySub" : "neg", 1543 1544 # Augmented assignment. 1545 1546 "+=" : "iadd", 1547 "-=" : "isub", 1548 "*=" : "imul", 1549 "/=" : "idiv", 1550 "//=" : "ifloordiv", 1551 "%=" : "imod", 1552 "**=" : "ipow", 1553 "<<=" : "ilshift", 1554 ">>=" : "irshift", 1555 "&=" : "iand", 1556 "^=" : "ixor", 1557 "|=" : "ior", 1558 1559 # Comparisons. 1560 1561 "==" : "eq", 1562 "!=" : "ne", 1563 "<" : "lt", 1564 "<=" : "le", 1565 ">=" : "ge", 1566 ">" : "gt", 1567 } 1568 1569 # vim: tabstop=4 expandtab shiftwidth=4