Lichen (file common.py at 1211f066046b)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         if recorded_details != details:    45             self.remove_output()    46     47         writefile(self.get_output_details_filename(), details)    48     49     def get_output_details_filename(self):    50     51         "Return the output details filename."    52     53         return join(self.output, "$details")    54     55     def get_output_details(self):    56     57         "Return details of the existing output."    58     59         details_filename = self.get_output_details_filename()    60     61         if not exists(details_filename):    62             return None    63         else:    64             return readfile(details_filename)    65     66     def remove_output(self, dirname=None):    67     68         "Remove the output."    69     70         dirname = dirname or self.output    71     72         for filename in listdir(dirname):    73             path = join(dirname, filename)    74             if isdir(path):    75                 self.remove_output(path)    76             else:    77                 remove(path)    78     79 class CommonModule:    80     81     "A common module representation."    82     83     def __init__(self, name, importer):    84     85         """    86         Initialise this module with the given 'name' and an 'importer' which is    87         used to provide access to other modules when required.    88         """    89     90         self.name = name    91         self.importer = importer    92         self.filename = None    93     94         # Inspection-related attributes.    95     96         self.astnode = None    97         self.encoding = None    98         self.iterators = {}    99         self.temp = {}   100         self.lambdas = {}   101    102         # Constants, literals and values.   103    104         self.constants = {}   105         self.constant_values = {}   106         self.literals = {}   107         self.literal_types = {}   108    109         # Nested namespaces.   110    111         self.namespace_path = []   112         self.in_function = False   113    114         # Retain the assignment value expression and track invocations.   115    116         self.in_assignment = None   117         self.in_invocation = False   118    119         # Attribute chain state management.   120    121         self.attrs = []   122         self.chain_assignment = []   123         self.chain_invocation = []   124    125     def __repr__(self):   126         return "CommonModule(%r, %r)" % (self.name, self.importer)   127    128     def parse_file(self, filename):   129    130         "Parse the file with the given 'filename', initialising attributes."   131    132         self.filename = filename   133    134         # Use the Transformer directly to obtain encoding information.   135    136         t = Transformer()   137         f = open(filename)   138    139         try:   140             self.astnode = t.parsesuite(f.read() + "\n")   141             self.encoding = t.encoding   142         finally:   143             f.close()   144    145     # Module-relative naming.   146    147     def get_global_path(self, name):   148         return "%s.%s" % (self.name, name)   149    150     def get_namespace_path(self):   151         return ".".join([self.name] + self.namespace_path)   152    153     def get_object_path(self, name):   154         return ".".join([self.name] + self.namespace_path + [name])   155    156     def get_parent_path(self):   157         return ".".join([self.name] + self.namespace_path[:-1])   158    159     # Namespace management.   160    161     def enter_namespace(self, name):   162    163         "Enter the namespace having the given 'name'."   164    165         self.namespace_path.append(name)   166    167     def exit_namespace(self):   168    169         "Exit the current namespace."   170    171         self.namespace_path.pop()   172    173     # Constant reference naming.   174    175     def get_constant_name(self, value, value_type, encoding=None):   176    177         """   178         Add a new constant to the current namespace for 'value' with   179         'value_type'.   180         """   181    182         path = self.get_namespace_path()   183         init_item(self.constants, path, dict)   184         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   185    186     # Literal reference naming.   187    188     def get_literal_name(self):   189    190         "Add a new literal to the current namespace."   191    192         path = self.get_namespace_path()   193         init_item(self.literals, path, lambda: 0)   194         return "$C%d" % self.literals[path]   195    196     def next_literal(self):   197         self.literals[self.get_namespace_path()] += 1   198    199     # Temporary iterator naming.   200    201     def get_iterator_path(self):   202         return self.in_function and self.get_namespace_path() or self.name   203    204     def get_iterator_name(self):   205         path = self.get_iterator_path()   206         init_item(self.iterators, path, lambda: 0)   207         return "$i%d" % self.iterators[path]   208    209     def next_iterator(self):   210         self.iterators[self.get_iterator_path()] += 1   211    212     # Temporary variable naming.   213    214     def get_temporary_name(self):   215         path = self.get_namespace_path()   216         init_item(self.temp, path, lambda: 0)   217         return "$t%d" % self.temp[path]   218    219     def next_temporary(self):   220         self.temp[self.get_namespace_path()] += 1   221    222     # Arbitrary function naming.   223    224     def get_lambda_name(self):   225         path = self.get_namespace_path()   226         init_item(self.lambdas, path, lambda: 0)   227         name = "$l%d" % self.lambdas[path]   228         self.lambdas[path] += 1   229         return name   230    231     def reset_lambdas(self):   232         self.lambdas = {}   233    234     # Constant and literal recording.   235    236     def get_constant_value(self, value, literals=None):   237    238         """   239         Encode the 'value' if appropriate, returning a value, a typename and any   240         encoding.   241         """   242    243         if isinstance(value, unicode):   244             return value.encode("utf-8"), "unicode", self.encoding   245    246         # Attempt to convert plain strings to text.   247    248         elif isinstance(value, str) and self.encoding:   249             try:   250                 return get_string_details(literals, self.encoding)   251             except UnicodeDecodeError:   252                 pass   253    254         return value, value.__class__.__name__, None   255    256     def get_constant_reference(self, ref, value, encoding=None):   257    258         """   259         Return a constant reference for the given 'ref' type and 'value', with   260         the optional 'encoding' applying to text values.   261         """   262    263         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   264    265         # Return a reference for the constant.   266    267         objpath = self.get_object_path(constant_name)   268         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   269    270         # Record the value and type for the constant.   271    272         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   273         return name_ref   274    275     def reserve_constant(self, objpath, value, origin, encoding=None):   276    277         """   278         Reserve a constant within 'objpath' with the given 'value' and having a   279         type with the given 'origin', with the optional 'encoding' applying to   280         text values.   281         """   282    283         constant_name = self.get_constant_name(value, origin)   284         objpath = self.get_object_path(constant_name)   285         self._reserve_constant(objpath, value, origin, encoding)   286    287     def _reserve_constant(self, objpath, value, origin, encoding):   288    289         """   290         Store a constant for 'objpath' with the given 'value' and 'origin', with   291         the optional 'encoding' applying to text values.   292         """   293    294         self.constant_values[objpath] = value, origin, encoding   295    296     def get_literal_reference(self, name, ref, items, cls):   297    298         """   299         Return a literal reference for the given type 'name', literal 'ref',   300         node 'items' and employing the given 'cls' as the class of the returned   301         reference object.   302         """   303    304         # Construct an invocation using the items as arguments.   305    306         typename = "$L%s" % name   307    308         invocation = compiler.ast.CallFunc(   309             compiler.ast.Name(typename),   310             items   311             )   312    313         # Get a name for the actual literal.   314    315         instname = self.get_literal_name()   316         self.next_literal()   317    318         # Record the type for the literal.   319    320         objpath = self.get_object_path(instname)   321         self.literal_types[objpath] = ref.get_origin()   322    323         # Return a wrapper for the invocation exposing the items.   324    325         return cls(   326             instname,   327             ref.instance_of(),   328             self.process_structure_node(invocation),   329             invocation.args   330             )   331    332     # Node handling.   333    334     def process_structure(self, node):   335    336         """   337         Within the given 'node', process the program structure.   338    339         During inspection, this will process global declarations, adjusting the   340         module namespace, and import statements, building a module dependency   341         hierarchy.   342    343         During translation, this will consult deduced program information and   344         output translated code.   345         """   346    347         l = []   348         for n in node.getChildNodes():   349             l.append(self.process_structure_node(n))   350         return l   351    352     def process_augassign_node(self, n):   353    354         "Process the given augmented assignment node 'n'."   355    356         op = operator_functions[n.op]   357    358         if isinstance(n.node, compiler.ast.Getattr):   359             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   360         elif isinstance(n.node, compiler.ast.Name):   361             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   362         else:   363             target = n.node   364    365         assignment = compiler.ast.Assign(   366             [target],   367             compiler.ast.CallFunc(   368                 compiler.ast.Name("$op%s" % op),   369                 [n.node, n.expr]))   370    371         return self.process_structure_node(assignment)   372    373     def process_assignment_for_object(self, original_name, source):   374    375         """   376         Return an assignment operation making 'original_name' refer to the given   377         'source'.   378         """   379    380         assignment = compiler.ast.Assign(   381             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   382             source   383             )   384    385         return self.process_structure_node(assignment)   386    387     def process_assignment_node_items(self, n, expr):   388    389         """   390         Process the given assignment node 'n' whose children are to be assigned   391         items of 'expr'.   392         """   393    394         name_ref = self.process_structure_node(expr)   395    396         # Either unpack the items and present them directly to each assignment   397         # node.   398    399         if isinstance(name_ref, LiteralSequenceRef) and \   400            self.process_literal_sequence_items(n, name_ref):   401    402             pass   403    404         # Or have the assignment nodes access each item via the sequence API.   405    406         else:   407             self.process_assignment_node_items_by_position(n, expr, name_ref)   408    409     def process_assignment_node_items_by_position(self, n, expr, name_ref):   410    411         """   412         Process the given sequence assignment node 'n', converting the node to   413         the separate assignment of each target using positional access on a   414         temporary variable representing the sequence. Use 'expr' as the assigned   415         value and 'name_ref' as the reference providing any existing temporary   416         variable.   417         """   418    419         assignments = []   420    421         # Employ existing names to access the sequence.   422         # Literal sequences do not provide names of accessible objects.   423    424         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   425             temp = name_ref.name   426    427         # For other expressions, create a temporary name to reference the items.   428    429         else:   430             temp = self.get_temporary_name()   431             self.next_temporary()   432    433             assignments.append(   434                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   435                 )   436    437         # Assign the items to the target nodes.   438    439         for i, node in enumerate(n.nodes):   440             assignments.append(   441                 compiler.ast.Assign([node], compiler.ast.Subscript(   442                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   443                 )   444    445         return self.process_structure_node(compiler.ast.Stmt(assignments))   446    447     def process_literal_sequence_items(self, n, name_ref):   448    449         """   450         Process the given assignment node 'n', obtaining from the given   451         'name_ref' the items to be assigned to the assignment targets.   452    453         Return whether this method was able to process the assignment node as   454         a sequence of direct assignments.   455         """   456    457         if len(n.nodes) == len(name_ref.items):   458             assigned_names, count = get_names_from_nodes(n.nodes)   459             accessed_names, _count = get_names_from_nodes(name_ref.items)   460    461             # Only assign directly between items if all assigned names are   462             # plain names (not attribute assignments), and if the assigned names   463             # do not appear in the accessed names.   464    465             if len(assigned_names) == count and \   466                not assigned_names.intersection(accessed_names):   467    468                 for node, item in zip(n.nodes, name_ref.items):   469                     self.process_assignment_node(node, item)   470    471                 return True   472    473             # Otherwise, use the position-based mechanism to obtain values.   474    475             else:   476                 return False   477         else:   478             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   479                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   480    481     def process_compare_node(self, n):   482    483         """   484         Process the given comparison node 'n', converting an operator sequence   485         from...   486    487         <expr1> <op1> <expr2> <op2> <expr3>   488    489         ...to...   490    491         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   492         """   493    494         invocations = []   495         last = n.expr   496    497         for op, op_node in n.ops:   498             op = operator_functions.get(op)   499    500             invocations.append(compiler.ast.CallFunc(   501                 compiler.ast.Name("$op%s" % op),   502                 [last, op_node]))   503    504             last = op_node   505    506         if len(invocations) > 1:   507             result = compiler.ast.And(invocations)   508         else:   509             result = invocations[0]   510    511         return self.process_structure_node(result)   512    513     def process_dict_node(self, node):   514    515         """   516         Process the given dictionary 'node', returning a list of (key, value)   517         tuples.   518         """   519    520         l = []   521         for key, value in node.items:   522             l.append((   523                 self.process_structure_node(key),   524                 self.process_structure_node(value)))   525         return l   526    527     def process_for_node(self, n):   528    529         """   530         Generate attribute accesses for {n.list}.__iter__ and the next method on   531         the iterator, producing a replacement node for the original.   532         """   533    534         node = compiler.ast.Stmt([   535    536             # <next> = {n.list}.__iter__().next   537    538             compiler.ast.Assign(   539                 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")],   540                 compiler.ast.Getattr(   541                     compiler.ast.CallFunc(   542                         compiler.ast.Getattr(n.list, "__iter__"),   543                         []   544                         ), "next")),   545    546             # try:   547             #     while True:   548             #         <var>... = <next>()   549             #         ...   550             # except StopIteration:   551             #     pass   552    553             compiler.ast.TryExcept(   554                 compiler.ast.While(   555                     compiler.ast.Name("True"),   556                     compiler.ast.Stmt([   557                         compiler.ast.Assign(   558                             [n.assign],   559                             compiler.ast.CallFunc(   560                                 compiler.ast.Name(self.get_iterator_name()),   561                                 []   562                                 )),   563                         n.body]),   564                     None),   565                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   566                 None)   567             ])   568    569         self.next_iterator()   570         self.process_structure_node(node)   571    572     def process_literal_sequence_node(self, n, name, ref, cls):   573    574         """   575         Process the given literal sequence node 'n' as a function invocation,   576         with 'name' indicating the type of the sequence, and 'ref' being a   577         reference to the type. The 'cls' is used to instantiate a suitable name   578         reference.   579         """   580    581         if name == "dict":   582             items = []   583             for key, value in n.items:   584                 items.append(compiler.ast.Tuple([key, value]))   585         else: # name in ("list", "tuple"):   586             items = n.nodes   587    588         return self.get_literal_reference(name, ref, items, cls)   589    590     def process_operator_node(self, n):   591    592         """   593         Process the given operator node 'n' as an operator function invocation.   594         """   595    596         op = operator_functions[n.__class__.__name__]   597         invocation = compiler.ast.CallFunc(   598             compiler.ast.Name("$op%s" % op),   599             list(n.getChildNodes())   600             )   601         return self.process_structure_node(invocation)   602    603     def process_print_node(self, n):   604    605         """   606         Process the given print node 'n' as an invocation on a stream of the   607         form...   608    609         $print(dest, args, nl)   610    611         The special function name will be translated elsewhere.   612         """   613    614         nl = isinstance(n, compiler.ast.Printnl)   615         invocation = compiler.ast.CallFunc(   616             compiler.ast.Name("$print"),   617             [n.dest or compiler.ast.Name("None"),   618              compiler.ast.List(list(n.nodes)),   619              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   620             )   621         return self.process_structure_node(invocation)   622    623     def process_slice_node(self, n, expr=None):   624    625         """   626         Process the given slice node 'n' as an operator function invocation.   627         """   628    629         op = n.flags == "OP_ASSIGN" and "setslice" or "getslice"   630         invocation = compiler.ast.CallFunc(   631             compiler.ast.Name("$op%s" % op),   632             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   633                 (expr and [expr] or [])   634             )   635         return self.process_structure_node(invocation)   636    637     def process_sliceobj_node(self, n):   638    639         """   640         Process the given slice object node 'n' as a slice constructor.   641         """   642    643         op = "slice"   644         invocation = compiler.ast.CallFunc(   645             compiler.ast.Name("$op%s" % op),   646             n.nodes   647             )   648         return self.process_structure_node(invocation)   649    650     def process_subscript_node(self, n, expr=None):   651    652         """   653         Process the given subscript node 'n' as an operator function invocation.   654         """   655    656         op = n.flags == "OP_ASSIGN" and "setitem" or "getitem"   657         invocation = compiler.ast.CallFunc(   658             compiler.ast.Name("$op%s" % op),   659             [n.expr] + list(n.subs) + (expr and [expr] or [])   660             )   661         return self.process_structure_node(invocation)   662    663     def process_attribute_chain(self, n):   664    665         """   666         Process the given attribute access node 'n'. Return a reference   667         describing the expression.   668         """   669    670         # AssAttr/Getattr are nested with the outermost access being the last   671         # access in any chain.   672    673         self.attrs.insert(0, n.attrname)   674         attrs = self.attrs   675    676         # Break attribute chains where non-access nodes are found.   677    678         if not self.have_access_expression(n):   679             self.reset_attribute_chain()   680    681         # Descend into the expression, extending backwards any existing chain,   682         # or building another for the expression.   683    684         name_ref = self.process_structure_node(n.expr)   685    686         # Restore chain information applying to this node.   687    688         if not self.have_access_expression(n):   689             self.restore_attribute_chain(attrs)   690    691         # Return immediately if the expression was another access and thus a   692         # continuation backwards along the chain. The above processing will   693         # have followed the chain all the way to its conclusion.   694    695         if self.have_access_expression(n):   696             del self.attrs[0]   697    698         return name_ref   699    700     # Attribute chain handling.   701    702     def reset_attribute_chain(self):   703    704         "Reset the attribute chain for a subexpression of an attribute access."   705    706         self.attrs = []   707         self.chain_assignment.append(self.in_assignment)   708         self.chain_invocation.append(self.in_invocation)   709         self.in_assignment = None   710         self.in_invocation = False   711    712     def restore_attribute_chain(self, attrs):   713    714         "Restore the attribute chain for an attribute access."   715    716         self.attrs = attrs   717         self.in_assignment = self.chain_assignment.pop()   718         self.in_invocation = self.chain_invocation.pop()   719    720     def have_access_expression(self, node):   721    722         "Return whether the expression associated with 'node' is Getattr."   723    724         return isinstance(node.expr, compiler.ast.Getattr)   725    726     def get_name_for_tracking(self, name, path=None):   727    728         """   729         Return the name to be used for attribute usage observations involving   730         the given 'name' in the current namespace. If 'path' is indicated and   731         the name is being used outside a function, return the path value;   732         otherwise, return a path computed using the current namespace and the   733         given name.   734    735         The intention of this method is to provide a suitably-qualified name   736         that can be tracked across namespaces. Where globals are being   737         referenced in class namespaces, they should be referenced using their   738         path within the module, not using a path within each class.   739    740         It may not be possible to identify a global within a function at the   741         time of inspection (since a global may appear later in a file).   742         Consequently, globals are identified by their local name rather than   743         their module-qualified path.   744         """   745    746         # For functions, use the appropriate local names.   747    748         if self.in_function:   749             return name   750    751         # For static namespaces, use the given qualified name.   752    753         elif path:   754             return path   755    756         # Otherwise, establish a name in the current namespace.   757    758         else:   759             return self.get_object_path(name)   760    761     def get_path_for_access(self):   762    763         "Outside functions, register accesses at the module level."   764    765         if not self.in_function:   766             return self.name   767         else:   768             return self.get_namespace_path()   769    770     def get_module_name(self, node):   771    772         """   773         Using the given From 'node' in this module, calculate any relative import   774         information, returning a tuple containing a module to import along with any   775         names to import based on the node's name information.   776    777         Where the returned module is given as None, whole module imports should   778         be performed for the returned modules using the returned names.   779         """   780    781         # Absolute import.   782    783         if node.level == 0:   784             return node.modname, node.names   785    786         # Relative to an ancestor of this module.   787    788         else:   789             path = self.name.split(".")   790             level = node.level   791    792             # Relative imports treat package roots as submodules.   793    794             if split(self.filename)[-1] == "__init__.py":   795                 level -= 1   796    797             if level > len(path):   798                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   799                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   800    801             basename = ".".join(path[:len(path)-level])   802    803         # Name imports from a module.   804    805         if node.modname:   806             return "%s.%s" % (basename, node.modname), node.names   807    808         # Relative whole module imports.   809    810         else:   811             return basename, node.names   812    813 def get_argnames(args):   814    815     """   816     Return a list of all names provided by 'args'. Since tuples may be   817     employed, the arguments are traversed depth-first.   818     """   819    820     l = []   821     for arg in args:   822         if isinstance(arg, tuple):   823             l += get_argnames(arg)   824         else:   825             l.append(arg)   826     return l   827    828 def get_names_from_nodes(nodes):   829    830     """   831     Return the names employed in the given 'nodes' along with the number of   832     nodes excluding sequences.   833     """   834    835     names = set()   836     count = 0   837    838     for node in nodes:   839    840         # Add names and count them.   841    842         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   843             names.add(node.name)   844             count += 1   845    846         # Add names from sequences and incorporate their counts.   847    848         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   849                                compiler.ast.List, compiler.ast.Set,   850                                compiler.ast.Tuple)):   851             _names, _count = get_names_from_nodes(node.nodes)   852             names.update(_names)   853             count += _count   854    855         # Count non-name, non-sequence nodes.   856    857         else:   858             count += 1   859    860     return names, count   861    862 # Result classes.   863    864 class InstructionSequence:   865    866     "A generic sequence of instructions."   867    868     def __init__(self, instructions):   869         self.instructions = instructions   870    871     def get_value_instruction(self):   872         return self.instructions[-1]   873    874     def get_init_instructions(self):   875         return self.instructions[:-1]   876    877 # Dictionary utilities.   878    879 def init_item(d, key, fn):   880    881     """   882     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   883     value where no entry already exists.   884     """   885    886     if not d.has_key(key):   887         d[key] = fn()   888     return d[key]   889    890 def dict_for_keys(d, keys):   891    892     "Return a new dictionary containing entries from 'd' for the given 'keys'."   893    894     nd = {}   895     for key in keys:   896         if d.has_key(key):   897             nd[key] = d[key]   898     return nd   899    900 def make_key(s):   901    902     "Make sequence 's' into a tuple-based key, first sorting its contents."   903    904     l = list(s)   905     l.sort()   906     return tuple(l)   907    908 def add_counter_item(d, key):   909    910     """   911     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   912     maintaining a mapping of keys to their order of insertion.   913     """   914    915     if not d.has_key(key):   916         d[key] = len(d.keys())   917     return d[key]    918    919 def remove_items(d1, d2):   920    921     "Remove from 'd1' all items from 'd2'."   922    923     for key in d2.keys():   924         if d1.has_key(key):   925             del d1[key]   926    927 # Set utilities.   928    929 def first(s):   930     return list(s)[0]   931    932 def same(s1, s2):   933     return set(s1) == set(s2)   934    935 # General input/output.   936    937 def readfile(filename):   938    939     "Return the contents of 'filename'."   940    941     f = open(filename)   942     try:   943         return f.read()   944     finally:   945         f.close()   946    947 def writefile(filename, s):   948    949     "Write to 'filename' the string 's'."   950    951     f = open(filename, "w")   952     try:   953         f.write(s)   954     finally:   955         f.close()   956    957 # General encoding.   958    959 def sorted_output(x):   960    961     "Sort sequence 'x' and return a string with commas separating the values."   962    963     x = map(str, x)   964     x.sort()   965     return ", ".join(x)   966    967 def get_string_details(literals, encoding):   968    969     """   970     Determine whether 'literals' represent Unicode strings or byte strings,   971     using 'encoding' to reproduce byte sequences.   972    973     Each literal is the full program representation including prefix and quotes   974     recoded by the parser to UTF-8. Thus, any literal found to represent a byte   975     string needs to be translated back to its original encoding.   976    977     Return a single encoded literal value, a type name, and the original   978     encoding as a tuple.   979     """   980    981     typename = "unicode"   982    983     l = []   984    985     for s in literals:   986         out, _typename = get_literal_details(s)   987         if _typename == "str":   988             typename = "str"   989         l.append(out)   990    991     out = "".join(l)   992    993     # For Unicode values, convert to the UTF-8 program representation.   994    995     if typename == "unicode":   996         return out.encode("utf-8"), typename, encoding   997    998     # For byte string values, convert back to the original encoding.   999   1000     else:  1001         return out.encode(encoding), typename, encoding  1002   1003 def get_literal_details(s):  1004   1005     """  1006     Determine whether 's' represents a Unicode string or a byte string, where  1007     's' contains the full program representation of a literal including prefix  1008     and quotes, recoded by the parser to UTF-8.  1009   1010     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1011     and byte or Unicode values starting with <backslash><octal digit> or  1012     <backslash>x.  1013   1014     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1015     to be considered as Unicode values. Otherwise, they produce byte values and  1016     cause unprefixed strings to be considered as byte strings.  1017   1018     Literals prefixed with "r" do not have their backslash-encoded values  1019     converted unless also prefixed with "u", in which case only the above value  1020     formats are converted, not any of the other special sequences for things  1021     like newlines.  1022   1023     Return the literal value as a Unicode object together with the appropriate  1024     type name in a tuple.  1025     """  1026   1027     l = []  1028   1029     # Identify the quote character and use it to identify the prefix.  1030   1031     quote_type = s[-1]  1032     prefix_end = s.find(quote_type)  1033     prefix = s[:prefix_end].lower()  1034   1035     if prefix not in ("", "b", "br", "r", "u", "ur"):  1036         raise ValueError, "String literal does not have a supported prefix: %s" % s  1037   1038     if "b" in prefix:  1039         typename = "str"  1040     else:  1041         typename = "unicode"  1042   1043     # Identify triple quotes or single quotes.  1044   1045     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1046         quote = s[prefix_end:prefix_end+3]  1047         current = prefix_end + 3  1048         end = len(s) - 3  1049     else:  1050         quote = s[prefix_end]  1051         current = prefix_end + 1  1052         end = len(s) - 1  1053   1054     # Conversions of some quoted values.  1055   1056     searches = {  1057         "u" : (6, 16),  1058         "U" : (10, 16),  1059         "x" : (4, 16),  1060         }  1061   1062     octal_digits = map(str, range(0, 8))  1063   1064     # Translations of some quoted values.  1065   1066     escaped = {  1067         "\\" : "\\", "'" : "'", '"' : '"',  1068         "a" : "\a", "b" : "\b", "f" : "\f",  1069         "n" : "\n", "r" : "\r", "t" : "\t",  1070         }  1071   1072     while current < end:  1073   1074         # Look for quoted values.  1075   1076         index = s.find("\\", current)  1077         if index == -1 or index + 1 == end:  1078             l.append(s[current:end])  1079             break  1080   1081         # Add the preceding text.  1082   1083         l.append(s[current:index])  1084   1085         # Handle quoted text.  1086   1087         term = s[index+1]  1088   1089         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1090         # produce Unicode values.  1091   1092         if typename == "unicode" and (  1093             term in ("u", "U") or   1094             "u" in prefix and (term == "x" or term in octal_digits)):  1095   1096             needed, base = searches.get(term, (4, 8))  1097             value = convert_quoted_value(s, index, needed, end, base, unichr)  1098             l.append(value)  1099             current = index + needed  1100   1101         # Add raw byte values, changing the string type.  1102   1103         elif "r" not in prefix and (  1104              term == "x" or term in octal_digits):  1105   1106             needed, base = searches.get(term, (4, 8))  1107             value = convert_quoted_value(s, index, needed, end, base, chr)  1108             l.append(value)  1109             typename = "str"  1110             current = index + needed  1111   1112         # Add other escaped values.  1113   1114         elif "r" not in prefix and escaped.has_key(term):  1115             l.append(escaped[term])  1116             current = index + 2  1117   1118         # Add other text as found.  1119   1120         else:  1121             l.append(s[index:index+2])  1122             current = index + 2  1123   1124     # Collect the components into a single Unicode object. Since the literal  1125     # text was already in UTF-8 form, interpret plain strings as UTF-8  1126     # sequences.  1127   1128     out = []  1129   1130     for value in l:  1131         if isinstance(value, unicode):  1132             out.append(value)  1133         else:  1134             out.append(unicode(value, "utf-8"))  1135   1136     return "".join(out), typename  1137   1138 def convert_quoted_value(s, index, needed, end, base, fn):  1139   1140     """  1141     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1142     positions, and with the given 'end' indicating the first position after the  1143     end of the actual string content.  1144   1145     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1146     to convert the value to an appropriate type.  1147     """  1148   1149     s = s[index:min(index+needed, end)]  1150   1151     # Not a complete occurrence.  1152   1153     if len(s) < needed:  1154         return s  1155   1156     # Test for a well-formed value.  1157   1158     try:  1159         first = base == 8 and 1 or 2  1160         value = int(s[first:needed], base)  1161     except ValueError:  1162         return s  1163     else:  1164         return fn(value)  1165   1166 # Attribute chain decoding.  1167   1168 def get_attrnames(attrnames):  1169   1170     """  1171     Split the qualified attribute chain 'attrnames' into its components,  1172     handling special attributes starting with "#" that indicate type  1173     conformance.  1174     """  1175   1176     if attrnames.startswith("#"):  1177         return [attrnames]  1178     else:  1179         return attrnames.split(".")  1180   1181 def get_attrname_from_location(location):  1182   1183     """  1184     Extract the first attribute from the attribute names employed in a  1185     'location'.  1186     """  1187   1188     path, name, attrnames, access = location  1189     if not attrnames:  1190         return attrnames  1191     return get_attrnames(attrnames)[0]  1192   1193 def get_name_path(path, name):  1194   1195     "Return a suitable qualified name from the given 'path' and 'name'."  1196   1197     if "." in name:  1198         return name  1199     else:  1200         return "%s.%s" % (path, name)  1201   1202 # Usage-related functions.  1203   1204 def get_types_for_usage(attrnames, objects):  1205   1206     """  1207     Identify the types that can support the given 'attrnames', using the  1208     given 'objects' as the catalogue of type details.  1209     """  1210   1211     types = []  1212     for name, _attrnames in objects.items():  1213         if set(attrnames).issubset(_attrnames):  1214             types.append(name)  1215     return types  1216   1217 def get_invoked_attributes(usage):  1218   1219     "Obtain invoked attribute from the given 'usage'."  1220   1221     invoked = []  1222     if usage:  1223         for attrname, invocation, assignment in usage:  1224             if invocation:  1225                 invoked.append(attrname)  1226     return invoked  1227   1228 def get_assigned_attributes(usage):  1229   1230     "Obtain assigned attribute from the given 'usage'."  1231   1232     assigned = []  1233     if usage:  1234         for attrname, invocation, assignment in usage:  1235             if assignment:  1236                 assigned.append(attrname)  1237     return assigned  1238   1239 # Type and module functions.  1240 # NOTE: This makes assumptions about the __builtins__ structure.  1241   1242 def get_builtin_module(name):  1243   1244     "Return the module name containing the given type 'name'."  1245   1246     if name == "string":  1247         modname = "str"  1248     elif name == "utf8string":  1249         modname = "unicode"  1250     elif name == "NoneType":  1251         modname = "none"  1252     else:  1253         modname = name  1254   1255     return "__builtins__.%s" % modname  1256   1257 def get_builtin_type(name):  1258   1259     "Return the type name provided by the given Python value 'name'."  1260   1261     if name == "str":  1262         return "string"  1263     elif name == "unicode":  1264         return "utf8string"  1265     else:  1266         return name  1267   1268 def get_builtin_class(name):  1269   1270     "Return the full name of the built-in class having the given 'name'."  1271   1272     typename = get_builtin_type(name)  1273     module = get_builtin_module(typename)  1274     return "%s.%s" % (module, typename)  1275   1276 # Useful data.  1277   1278 predefined_constants = "False", "None", "NotImplemented", "True"  1279   1280 operator_functions = {  1281   1282     # Fundamental operations.  1283   1284     "is" : "is_",  1285     "is not" : "is_not",  1286   1287     # Binary operations.  1288   1289     "in" : "in_",  1290     "not in" : "not_in",  1291     "Add" : "add",  1292     "Bitand" : "and_",  1293     "Bitor" : "or_",  1294     "Bitxor" : "xor",  1295     "Div" : "div",  1296     "FloorDiv" : "floordiv",  1297     "LeftShift" : "lshift",  1298     "Mod" : "mod",  1299     "Mul" : "mul",  1300     "Power" : "pow",  1301     "RightShift" : "rshift",  1302     "Sub" : "sub",  1303   1304     # Unary operations.  1305   1306     "Invert" : "invert",  1307     "UnaryAdd" : "pos",  1308     "UnarySub" : "neg",  1309   1310     # Augmented assignment.  1311   1312     "+=" : "iadd",  1313     "-=" : "isub",  1314     "*=" : "imul",  1315     "/=" : "idiv",  1316     "//=" : "ifloordiv",  1317     "%=" : "imod",  1318     "**=" : "ipow",  1319     "<<=" : "ilshift",  1320     ">>=" : "irshift",  1321     "&=" : "iand",  1322     "^=" : "ixor",  1323     "|=" : "ior",  1324   1325     # Comparisons.  1326   1327     "==" : "eq",  1328     "!=" : "ne",  1329     "<" : "lt",  1330     "<=" : "le",  1331     ">=" : "ge",  1332     ">" : "gt",  1333     }  1334   1335 # vim: tabstop=4 expandtab shiftwidth=4