Lichen (file common.py at be30371d7a37)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         if recorded_details != details:    45             self.remove_output()    46     47         writefile(self.get_output_details_filename(), details)    48     49     def get_output_details_filename(self):    50     51         "Return the output details filename."    52     53         return join(self.output, "$details")    54     55     def get_output_details(self):    56     57         "Return details of the existing output."    58     59         details_filename = self.get_output_details_filename()    60     61         if not exists(details_filename):    62             return None    63         else:    64             return readfile(details_filename)    65     66     def remove_output(self, dirname=None):    67     68         "Remove the output."    69     70         dirname = dirname or self.output    71     72         for filename in listdir(dirname):    73             path = join(dirname, filename)    74             if isdir(path):    75                 self.remove_output(path)    76             else:    77                 remove(path)    78     79 class CommonModule:    80     81     "A common module representation."    82     83     def __init__(self, name, importer):    84     85         """    86         Initialise this module with the given 'name' and an 'importer' which is    87         used to provide access to other modules when required.    88         """    89     90         self.name = name    91         self.importer = importer    92         self.filename = None    93     94         # Inspection-related attributes.    95     96         self.astnode = None    97         self.encoding = None    98         self.iterators = {}    99         self.temp = {}   100         self.lambdas = {}   101    102         # Constants, literals and values.   103    104         self.constants = {}   105         self.constant_values = {}   106         self.literals = {}   107         self.literal_types = {}   108    109         # Nested namespaces.   110    111         self.namespace_path = []   112         self.in_function = False   113    114         # Retain the assignment value expression and track invocations.   115    116         self.in_assignment = None   117         self.in_invocation = None   118    119         # Attribute chain state management.   120    121         self.attrs = []   122         self.chain_assignment = []   123         self.chain_invocation = []   124    125     def __repr__(self):   126         return "CommonModule(%r, %r)" % (self.name, self.importer)   127    128     def parse_file(self, filename):   129    130         "Parse the file with the given 'filename', initialising attributes."   131    132         self.filename = filename   133    134         # Use the Transformer directly to obtain encoding information.   135    136         t = Transformer()   137         f = open(filename)   138    139         try:   140             self.astnode = t.parsesuite(f.read() + "\n")   141             self.encoding = t.encoding   142         finally:   143             f.close()   144    145     # Module-relative naming.   146    147     def get_global_path(self, name):   148         return "%s.%s" % (self.name, name)   149    150     def get_namespace_path(self):   151         return ".".join([self.name] + self.namespace_path)   152    153     def get_object_path(self, name):   154         return ".".join([self.name] + self.namespace_path + [name])   155    156     def get_parent_path(self):   157         return ".".join([self.name] + self.namespace_path[:-1])   158    159     # Namespace management.   160    161     def enter_namespace(self, name):   162    163         "Enter the namespace having the given 'name'."   164    165         self.namespace_path.append(name)   166    167     def exit_namespace(self):   168    169         "Exit the current namespace."   170    171         self.namespace_path.pop()   172    173     # Constant reference naming.   174    175     def get_constant_name(self, value, value_type, encoding=None):   176    177         """   178         Add a new constant to the current namespace for 'value' with   179         'value_type'.   180         """   181    182         path = self.get_namespace_path()   183         init_item(self.constants, path, dict)   184         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   185    186     # Literal reference naming.   187    188     def get_literal_name(self):   189    190         "Add a new literal to the current namespace."   191    192         path = self.get_namespace_path()   193         init_item(self.literals, path, lambda: 0)   194         return "$C%d" % self.literals[path]   195    196     def next_literal(self):   197         self.literals[self.get_namespace_path()] += 1   198    199     # Temporary iterator naming.   200    201     def get_iterator_path(self):   202         return self.in_function and self.get_namespace_path() or self.name   203    204     def get_iterator_name(self):   205         path = self.get_iterator_path()   206         init_item(self.iterators, path, lambda: 0)   207         return "$i%d" % self.iterators[path]   208    209     def next_iterator(self):   210         self.iterators[self.get_iterator_path()] += 1   211    212     # Temporary variable naming.   213    214     def get_temporary_name(self):   215         path = self.get_namespace_path()   216         init_item(self.temp, path, lambda: 0)   217         return "$t%d" % self.temp[path]   218    219     def next_temporary(self):   220         self.temp[self.get_namespace_path()] += 1   221    222     # Arbitrary function naming.   223    224     def get_lambda_name(self):   225         path = self.get_namespace_path()   226         init_item(self.lambdas, path, lambda: 0)   227         name = "$l%d" % self.lambdas[path]   228         self.lambdas[path] += 1   229         return name   230    231     def reset_lambdas(self):   232         self.lambdas = {}   233    234     # Constant and literal recording.   235    236     def get_constant_value(self, value, literals=None):   237    238         """   239         Encode the 'value' if appropriate, returning a value, a typename and any   240         encoding.   241         """   242    243         if isinstance(value, unicode):   244             return value.encode("utf-8"), "unicode", self.encoding   245    246         # Attempt to convert plain strings to text.   247    248         elif isinstance(value, str) and self.encoding:   249             try:   250                 return get_string_details(literals, self.encoding)   251             except UnicodeDecodeError:   252                 pass   253    254         return value, value.__class__.__name__, None   255    256     def get_constant_reference(self, ref, value, encoding=None):   257    258         """   259         Return a constant reference for the given 'ref' type and 'value', with   260         the optional 'encoding' applying to text values.   261         """   262    263         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   264    265         # Return a reference for the constant.   266    267         objpath = self.get_object_path(constant_name)   268         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   269    270         # Record the value and type for the constant.   271    272         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   273         return name_ref   274    275     def reserve_constant(self, objpath, value, origin, encoding=None):   276    277         """   278         Reserve a constant within 'objpath' with the given 'value' and having a   279         type with the given 'origin', with the optional 'encoding' applying to   280         text values.   281         """   282    283         constant_name = self.get_constant_name(value, origin)   284         objpath = self.get_object_path(constant_name)   285         self._reserve_constant(objpath, value, origin, encoding)   286    287     def _reserve_constant(self, objpath, value, origin, encoding):   288    289         """   290         Store a constant for 'objpath' with the given 'value' and 'origin', with   291         the optional 'encoding' applying to text values.   292         """   293    294         self.constant_values[objpath] = value, origin, encoding   295    296     def get_literal_reference(self, name, ref, items, cls):   297    298         """   299         Return a literal reference for the given type 'name', literal 'ref',   300         node 'items' and employing the given 'cls' as the class of the returned   301         reference object.   302         """   303    304         # Construct an invocation using the items as arguments.   305    306         typename = "$L%s" % name   307    308         invocation = compiler.ast.CallFunc(   309             compiler.ast.Name(typename),   310             items   311             )   312    313         # Get a name for the actual literal.   314    315         instname = self.get_literal_name()   316         self.next_literal()   317    318         # Record the type for the literal.   319    320         objpath = self.get_object_path(instname)   321         self.literal_types[objpath] = ref.get_origin()   322    323         # Return a wrapper for the invocation exposing the items.   324    325         return cls(   326             instname,   327             ref.instance_of(),   328             self.process_structure_node(invocation),   329             invocation.args   330             )   331    332     # Node handling.   333    334     def process_structure(self, node):   335    336         """   337         Within the given 'node', process the program structure.   338    339         During inspection, this will process global declarations, adjusting the   340         module namespace, and import statements, building a module dependency   341         hierarchy.   342    343         During translation, this will consult deduced program information and   344         output translated code.   345         """   346    347         l = []   348         for n in node.getChildNodes():   349             l.append(self.process_structure_node(n))   350         return l   351    352     def process_augassign_node(self, n):   353    354         "Process the given augmented assignment node 'n'."   355    356         op = operator_functions[n.op]   357    358         if isinstance(n.node, compiler.ast.Getattr):   359             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   360         elif isinstance(n.node, compiler.ast.Name):   361             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   362         else:   363             target = n.node   364    365         assignment = compiler.ast.Assign(   366             [target],   367             compiler.ast.CallFunc(   368                 compiler.ast.Name("$op%s" % op),   369                 [n.node, n.expr]))   370    371         return self.process_structure_node(assignment)   372    373     def process_assignment_for_object(self, original_name, source):   374    375         """   376         Return an assignment operation making 'original_name' refer to the given   377         'source'.   378         """   379    380         assignment = compiler.ast.Assign(   381             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   382             source   383             )   384    385         return self.process_structure_node(assignment)   386    387     def process_assignment_node_items(self, n, expr):   388    389         """   390         Process the given assignment node 'n' whose children are to be assigned   391         items of 'expr'.   392         """   393    394         name_ref = self.process_structure_node(expr)   395    396         # Either unpack the items and present them directly to each assignment   397         # node.   398    399         if isinstance(name_ref, LiteralSequenceRef) and \   400            self.process_literal_sequence_items(n, name_ref):   401    402             pass   403    404         # Or have the assignment nodes access each item via the sequence API.   405    406         else:   407             self.process_assignment_node_items_by_position(n, expr, name_ref)   408    409     def process_assignment_node_items_by_position(self, n, expr, name_ref):   410    411         """   412         Process the given sequence assignment node 'n', converting the node to   413         the separate assignment of each target using positional access on a   414         temporary variable representing the sequence. Use 'expr' as the assigned   415         value and 'name_ref' as the reference providing any existing temporary   416         variable.   417         """   418    419         assignments = []   420    421         # Employ existing names to access the sequence.   422         # Literal sequences do not provide names of accessible objects.   423    424         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   425             temp = name_ref.name   426    427         # For other expressions, create a temporary name to reference the items.   428    429         else:   430             temp = self.get_temporary_name()   431             self.next_temporary()   432    433             assignments.append(   434                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   435                 )   436    437         # Assign the items to the target nodes.   438    439         for i, node in enumerate(n.nodes):   440             assignments.append(   441                 compiler.ast.Assign([node], compiler.ast.Subscript(   442                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   443                 )   444    445         return self.process_structure_node(compiler.ast.Stmt(assignments))   446    447     def process_literal_sequence_items(self, n, name_ref):   448    449         """   450         Process the given assignment node 'n', obtaining from the given   451         'name_ref' the items to be assigned to the assignment targets.   452    453         Return whether this method was able to process the assignment node as   454         a sequence of direct assignments.   455         """   456    457         if len(n.nodes) == len(name_ref.items):   458             assigned_names, count = get_names_from_nodes(n.nodes)   459             accessed_names, _count = get_names_from_nodes(name_ref.items)   460    461             # Only assign directly between items if all assigned names are   462             # plain names (not attribute assignments), and if the assigned names   463             # do not appear in the accessed names.   464    465             if len(assigned_names) == count and \   466                not assigned_names.intersection(accessed_names):   467    468                 for node, item in zip(n.nodes, name_ref.items):   469                     self.process_assignment_node(node, item)   470    471                 return True   472    473             # Otherwise, use the position-based mechanism to obtain values.   474    475             else:   476                 return False   477         else:   478             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   479                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   480    481     def process_compare_node(self, n):   482    483         """   484         Process the given comparison node 'n', converting an operator sequence   485         from...   486    487         <expr1> <op1> <expr2> <op2> <expr3>   488    489         ...to...   490    491         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   492         """   493    494         invocations = []   495         last = n.expr   496    497         for op, op_node in n.ops:   498             op = operator_functions.get(op)   499    500             invocations.append(compiler.ast.CallFunc(   501                 compiler.ast.Name("$op%s" % op),   502                 [last, op_node]))   503    504             last = op_node   505    506         if len(invocations) > 1:   507             result = compiler.ast.And(invocations)   508         else:   509             result = invocations[0]   510    511         return self.process_structure_node(result)   512    513     def process_dict_node(self, node):   514    515         """   516         Process the given dictionary 'node', returning a list of (key, value)   517         tuples.   518         """   519    520         l = []   521         for key, value in node.items:   522             l.append((   523                 self.process_structure_node(key),   524                 self.process_structure_node(value)))   525         return l   526    527     def process_for_node(self, n):   528    529         """   530         Generate attribute accesses for {n.list}.__iter__ and the next method on   531         the iterator, producing a replacement node for the original.   532         """   533    534         node = compiler.ast.Stmt([   535    536             # <next> = {n.list}.__iter__().next   537    538             compiler.ast.Assign(   539                 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")],   540                 compiler.ast.Getattr(   541                     compiler.ast.CallFunc(   542                         compiler.ast.Getattr(n.list, "__iter__"),   543                         []   544                         ), "next")),   545    546             # try:   547             #     while True:   548             #         <var>... = <next>()   549             #         ...   550             # except StopIteration:   551             #     pass   552    553             compiler.ast.TryExcept(   554                 compiler.ast.While(   555                     compiler.ast.Name("True"),   556                     compiler.ast.Stmt([   557                         compiler.ast.Assign(   558                             [n.assign],   559                             compiler.ast.CallFunc(   560                                 compiler.ast.Name(self.get_iterator_name()),   561                                 []   562                                 )),   563                         n.body]),   564                     None),   565                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   566                 None)   567             ])   568    569         self.next_iterator()   570         self.process_structure_node(node)   571    572     def process_literal_sequence_node(self, n, name, ref, cls):   573    574         """   575         Process the given literal sequence node 'n' as a function invocation,   576         with 'name' indicating the type of the sequence, and 'ref' being a   577         reference to the type. The 'cls' is used to instantiate a suitable name   578         reference.   579         """   580    581         if name == "dict":   582             items = []   583             for key, value in n.items:   584                 items.append(compiler.ast.Tuple([key, value]))   585         else: # name in ("list", "tuple"):   586             items = n.nodes   587    588         return self.get_literal_reference(name, ref, items, cls)   589    590     def process_operator_node(self, n):   591    592         """   593         Process the given operator node 'n' as an operator function invocation.   594         """   595    596         op = operator_functions[n.__class__.__name__]   597         invocation = compiler.ast.CallFunc(   598             compiler.ast.Name("$op%s" % op),   599             list(n.getChildNodes())   600             )   601         return self.process_structure_node(invocation)   602    603     def process_print_node(self, n):   604    605         """   606         Process the given print node 'n' as an invocation on a stream of the   607         form...   608    609         $print(dest, args, nl)   610    611         The special function name will be translated elsewhere.   612         """   613    614         nl = isinstance(n, compiler.ast.Printnl)   615         invocation = compiler.ast.CallFunc(   616             compiler.ast.Name("$print"),   617             [n.dest or compiler.ast.Name("None"),   618              compiler.ast.List(list(n.nodes)),   619              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   620             )   621         return self.process_structure_node(invocation)   622    623     def process_slice_node(self, n, expr=None):   624    625         """   626         Process the given slice node 'n' as an operator function invocation.   627         """   628    629         if n.flags == "OP_ASSIGN": op = "setslice"   630         elif n.flags == "OP_DELETE": op = "delslice"   631         else: op = "getslice"   632    633         invocation = compiler.ast.CallFunc(   634             compiler.ast.Name("$op%s" % op),   635             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   636                 (expr and [expr] or [])   637             )   638    639         # Fix parse tree structure.   640    641         if op == "delslice":   642             invocation = compiler.ast.Discard(invocation)   643    644         return self.process_structure_node(invocation)   645    646     def process_sliceobj_node(self, n):   647    648         """   649         Process the given slice object node 'n' as a slice constructor.   650         """   651    652         op = "slice"   653         invocation = compiler.ast.CallFunc(   654             compiler.ast.Name("$op%s" % op),   655             n.nodes   656             )   657         return self.process_structure_node(invocation)   658    659     def process_subscript_node(self, n, expr=None):   660    661         """   662         Process the given subscript node 'n' as an operator function invocation.   663         """   664    665         if n.flags == "OP_ASSIGN": op = "setitem"   666         elif n.flags == "OP_DELETE": op = "delitem"   667         else: op = "getitem"   668    669         invocation = compiler.ast.CallFunc(   670             compiler.ast.Name("$op%s" % op),   671             [n.expr] + list(n.subs) + (expr and [expr] or [])   672             )   673    674         # Fix parse tree structure.   675    676         if op == "delitem":   677             invocation = compiler.ast.Discard(invocation)   678    679         return self.process_structure_node(invocation)   680    681     def process_attribute_chain(self, n):   682    683         """   684         Process the given attribute access node 'n'. Return a reference   685         describing the expression.   686         """   687    688         # AssAttr/Getattr are nested with the outermost access being the last   689         # access in any chain.   690    691         self.attrs.insert(0, n.attrname)   692         attrs = self.attrs   693    694         # Break attribute chains where non-access nodes are found.   695    696         if not self.have_access_expression(n):   697             self.reset_attribute_chain()   698    699         # Descend into the expression, extending backwards any existing chain,   700         # or building another for the expression.   701    702         name_ref = self.process_structure_node(n.expr)   703    704         # Restore chain information applying to this node.   705    706         if not self.have_access_expression(n):   707             self.restore_attribute_chain(attrs)   708    709         # Return immediately if the expression was another access and thus a   710         # continuation backwards along the chain. The above processing will   711         # have followed the chain all the way to its conclusion.   712    713         if self.have_access_expression(n):   714             del self.attrs[0]   715    716         return name_ref   717    718     # Attribute chain handling.   719    720     def reset_attribute_chain(self):   721    722         "Reset the attribute chain for a subexpression of an attribute access."   723    724         self.attrs = []   725         self.chain_assignment.append(self.in_assignment)   726         self.chain_invocation.append(self.in_invocation)   727         self.in_assignment = None   728         self.in_invocation = None   729    730     def restore_attribute_chain(self, attrs):   731    732         "Restore the attribute chain for an attribute access."   733    734         self.attrs = attrs   735         self.in_assignment = self.chain_assignment.pop()   736         self.in_invocation = self.chain_invocation.pop()   737    738     def have_access_expression(self, node):   739    740         "Return whether the expression associated with 'node' is Getattr."   741    742         return isinstance(node.expr, compiler.ast.Getattr)   743    744     def get_name_for_tracking(self, name, name_ref=None):   745    746         """   747         Return the name to be used for attribute usage observations involving   748         the given 'name' in the current namespace.   749    750         If the name is being used outside a function, and if 'name_ref' is   751         given, a path featuring the name in the global namespace is returned   752         where 'name_ref' indicates a global, or a static reference is used if   753         'name_ref' provides such a reference. Otherwise, a path computed using   754         the current namespace and the given name is returned.   755    756         The intention of this method is to provide a suitably-qualified name   757         that can be tracked across namespaces. Where globals are being   758         referenced in class namespaces, they should be referenced using their   759         path within the module, not using a path within each class.   760    761         It may not be possible to identify a global within a function at the   762         time of inspection (since a global may appear later in a file).   763         Consequently, globals are identified by their local name rather than   764         their module-qualified path.   765         """   766    767         # For functions, use the appropriate local names.   768    769         if self.in_function:   770             return name   771    772         # For static references, use the reference origin.   773    774         elif name_ref and name_ref.final():   775             return name_ref.final()   776    777         # For global names outside functions, use a global name.   778    779         elif name_ref and name_ref.is_global_name():   780             return self.get_global_path(name)   781    782         # Otherwise, establish a name in the current namespace.   783    784         else:   785             return self.get_object_path(name)   786    787     def get_path_for_access(self):   788    789         "Outside functions, register accesses at the module level."   790    791         if not self.in_function:   792             return self.name   793         else:   794             return self.get_namespace_path()   795    796     def get_module_name(self, node):   797    798         """   799         Using the given From 'node' in this module, calculate any relative import   800         information, returning a tuple containing a module to import along with any   801         names to import based on the node's name information.   802    803         Where the returned module is given as None, whole module imports should   804         be performed for the returned modules using the returned names.   805         """   806    807         # Absolute import.   808    809         if node.level == 0:   810             return node.modname, node.names   811    812         # Relative to an ancestor of this module.   813    814         else:   815             path = self.name.split(".")   816             level = node.level   817    818             # Relative imports treat package roots as submodules.   819    820             if split(self.filename)[-1] == "__init__.py":   821                 level -= 1   822    823             if level > len(path):   824                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   825                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   826    827             basename = ".".join(path[:len(path)-level])   828    829         # Name imports from a module.   830    831         if node.modname:   832             return "%s.%s" % (basename, node.modname), node.names   833    834         # Relative whole module imports.   835    836         else:   837             return basename, node.names   838    839 def get_argnames(args):   840    841     """   842     Return a list of all names provided by 'args'. Since tuples may be   843     employed, the arguments are traversed depth-first.   844     """   845    846     l = []   847     for arg in args:   848         if isinstance(arg, tuple):   849             l += get_argnames(arg)   850         else:   851             l.append(arg)   852     return l   853    854 def get_names_from_nodes(nodes):   855    856     """   857     Return the names employed in the given 'nodes' along with the number of   858     nodes excluding sequences.   859     """   860    861     names = set()   862     count = 0   863    864     for node in nodes:   865    866         # Add names and count them.   867    868         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   869             names.add(node.name)   870             count += 1   871    872         # Add names from sequences and incorporate their counts.   873    874         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   875                                compiler.ast.List, compiler.ast.Set,   876                                compiler.ast.Tuple)):   877             _names, _count = get_names_from_nodes(node.nodes)   878             names.update(_names)   879             count += _count   880    881         # Count non-name, non-sequence nodes.   882    883         else:   884             count += 1   885    886     return names, count   887    888 # Result classes.   889    890 class InstructionSequence:   891    892     "A generic sequence of instructions."   893    894     def __init__(self, instructions):   895         self.instructions = instructions   896    897     def get_value_instruction(self):   898         return self.instructions[-1]   899    900     def get_init_instructions(self):   901         return self.instructions[:-1]   902    903 # Dictionary utilities.   904    905 def init_item(d, key, fn):   906    907     """   908     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   909     value where no entry already exists.   910     """   911    912     if not d.has_key(key):   913         d[key] = fn()   914     return d[key]   915    916 def dict_for_keys(d, keys):   917    918     "Return a new dictionary containing entries from 'd' for the given 'keys'."   919    920     nd = {}   921     for key in keys:   922         if d.has_key(key):   923             nd[key] = d[key]   924     return nd   925    926 def make_key(s):   927    928     "Make sequence 's' into a tuple-based key, first sorting its contents."   929    930     l = list(s)   931     l.sort()   932     return tuple(l)   933    934 def add_counter_item(d, key):   935    936     """   937     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   938     maintaining a mapping of keys to their order of insertion.   939     """   940    941     if not d.has_key(key):   942         d[key] = len(d.keys())   943     return d[key]    944    945 def remove_items(d1, d2):   946    947     "Remove from 'd1' all items from 'd2'."   948    949     for key in d2.keys():   950         if d1.has_key(key):   951             del d1[key]   952    953 # Set utilities.   954    955 def first(s):   956     return list(s)[0]   957    958 def same(s1, s2):   959     return set(s1) == set(s2)   960    961 # General input/output.   962    963 def readfile(filename):   964    965     "Return the contents of 'filename'."   966    967     f = open(filename)   968     try:   969         return f.read()   970     finally:   971         f.close()   972    973 def writefile(filename, s):   974    975     "Write to 'filename' the string 's'."   976    977     f = open(filename, "w")   978     try:   979         f.write(s)   980     finally:   981         f.close()   982    983 # General encoding.   984    985 def sorted_output(x):   986    987     "Sort sequence 'x' and return a string with commas separating the values."   988    989     x = map(str, x)   990     x.sort()   991     return ", ".join(x)   992    993 def get_string_details(literals, encoding):   994    995     """   996     Determine whether 'literals' represent Unicode strings or byte strings,   997     using 'encoding' to reproduce byte sequences.   998    999     Each literal is the full program representation including prefix and quotes  1000     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1001     string needs to be translated back to its original encoding.  1002   1003     Return a single encoded literal value, a type name, and the original  1004     encoding as a tuple.  1005     """  1006   1007     typename = "unicode"  1008   1009     l = []  1010   1011     for s in literals:  1012         out, _typename = get_literal_details(s)  1013         if _typename == "str":  1014             typename = "str"  1015         l.append(out)  1016   1017     out = "".join(l)  1018   1019     # For Unicode values, convert to the UTF-8 program representation.  1020   1021     if typename == "unicode":  1022         return out.encode("utf-8"), typename, encoding  1023   1024     # For byte string values, convert back to the original encoding.  1025   1026     else:  1027         return out.encode(encoding), typename, encoding  1028   1029 def get_literal_details(s):  1030   1031     """  1032     Determine whether 's' represents a Unicode string or a byte string, where  1033     's' contains the full program representation of a literal including prefix  1034     and quotes, recoded by the parser to UTF-8.  1035   1036     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1037     and byte or Unicode values starting with <backslash><octal digit> or  1038     <backslash>x.  1039   1040     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1041     to be considered as Unicode values. Otherwise, they produce byte values and  1042     cause unprefixed strings to be considered as byte strings.  1043   1044     Literals prefixed with "r" do not have their backslash-encoded values  1045     converted unless also prefixed with "u", in which case only the above value  1046     formats are converted, not any of the other special sequences for things  1047     like newlines.  1048   1049     Return the literal value as a Unicode object together with the appropriate  1050     type name in a tuple.  1051     """  1052   1053     l = []  1054   1055     # Identify the quote character and use it to identify the prefix.  1056   1057     quote_type = s[-1]  1058     prefix_end = s.find(quote_type)  1059     prefix = s[:prefix_end].lower()  1060   1061     if prefix not in ("", "b", "br", "r", "u", "ur"):  1062         raise ValueError, "String literal does not have a supported prefix: %s" % s  1063   1064     if "b" in prefix:  1065         typename = "str"  1066     else:  1067         typename = "unicode"  1068   1069     # Identify triple quotes or single quotes.  1070   1071     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1072         quote = s[prefix_end:prefix_end+3]  1073         current = prefix_end + 3  1074         end = len(s) - 3  1075     else:  1076         quote = s[prefix_end]  1077         current = prefix_end + 1  1078         end = len(s) - 1  1079   1080     # Conversions of some quoted values.  1081   1082     searches = {  1083         "u" : (6, 16),  1084         "U" : (10, 16),  1085         "x" : (4, 16),  1086         }  1087   1088     octal_digits = map(str, range(0, 8))  1089   1090     # Translations of some quoted values.  1091   1092     escaped = {  1093         "\\" : "\\", "'" : "'", '"' : '"',  1094         "a" : "\a", "b" : "\b", "f" : "\f",  1095         "n" : "\n", "r" : "\r", "t" : "\t",  1096         }  1097   1098     while current < end:  1099   1100         # Look for quoted values.  1101   1102         index = s.find("\\", current)  1103         if index == -1 or index + 1 == end:  1104             l.append(s[current:end])  1105             break  1106   1107         # Add the preceding text.  1108   1109         l.append(s[current:index])  1110   1111         # Handle quoted text.  1112   1113         term = s[index+1]  1114   1115         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1116         # produce Unicode values.  1117   1118         if typename == "unicode" and (  1119             term in ("u", "U") or   1120             "u" in prefix and (term == "x" or term in octal_digits)):  1121   1122             needed, base = searches.get(term, (4, 8))  1123             value = convert_quoted_value(s, index, needed, end, base, unichr)  1124             l.append(value)  1125             current = index + needed  1126   1127         # Add raw byte values, changing the string type.  1128   1129         elif "r" not in prefix and (  1130              term == "x" or term in octal_digits):  1131   1132             needed, base = searches.get(term, (4, 8))  1133             value = convert_quoted_value(s, index, needed, end, base, chr)  1134             l.append(value)  1135             typename = "str"  1136             current = index + needed  1137   1138         # Add other escaped values.  1139   1140         elif "r" not in prefix and escaped.has_key(term):  1141             l.append(escaped[term])  1142             current = index + 2  1143   1144         # Add other text as found.  1145   1146         else:  1147             l.append(s[index:index+2])  1148             current = index + 2  1149   1150     # Collect the components into a single Unicode object. Since the literal  1151     # text was already in UTF-8 form, interpret plain strings as UTF-8  1152     # sequences.  1153   1154     out = []  1155   1156     for value in l:  1157         if isinstance(value, unicode):  1158             out.append(value)  1159         else:  1160             out.append(unicode(value, "utf-8"))  1161   1162     return "".join(out), typename  1163   1164 def convert_quoted_value(s, index, needed, end, base, fn):  1165   1166     """  1167     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1168     positions, and with the given 'end' indicating the first position after the  1169     end of the actual string content.  1170   1171     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1172     to convert the value to an appropriate type.  1173     """  1174   1175     s = s[index:min(index+needed, end)]  1176   1177     # Not a complete occurrence.  1178   1179     if len(s) < needed:  1180         return s  1181   1182     # Test for a well-formed value.  1183   1184     try:  1185         first = base == 8 and 1 or 2  1186         value = int(s[first:needed], base)  1187     except ValueError:  1188         return s  1189     else:  1190         return fn(value)  1191   1192 # Attribute chain decoding.  1193   1194 def get_attrnames(attrnames):  1195   1196     """  1197     Split the qualified attribute chain 'attrnames' into its components,  1198     handling special attributes starting with "#" that indicate type  1199     conformance.  1200     """  1201   1202     if attrnames.startswith("#"):  1203         return [attrnames]  1204     else:  1205         return attrnames.split(".")  1206   1207 def get_attrname_from_location(location):  1208   1209     """  1210     Extract the first attribute from the attribute names employed in a  1211     'location'.  1212     """  1213   1214     path, name, attrnames, access = location  1215     if not attrnames:  1216         return attrnames  1217     return get_attrnames(attrnames)[0]  1218   1219 def get_name_path(path, name):  1220   1221     "Return a suitable qualified name from the given 'path' and 'name'."  1222   1223     if "." in name:  1224         return name  1225     else:  1226         return "%s.%s" % (path, name)  1227   1228 # Usage-related functions.  1229   1230 def get_types_for_usage(attrnames, objects):  1231   1232     """  1233     Identify the types that can support the given 'attrnames', using the  1234     given 'objects' as the catalogue of type details.  1235     """  1236   1237     types = []  1238     for name, _attrnames in objects.items():  1239         if set(attrnames).issubset(_attrnames):  1240             types.append(name)  1241     return types  1242   1243 def get_invoked_attributes(usage):  1244   1245     "Obtain invoked attribute from the given 'usage'."  1246   1247     invoked = []  1248     if usage:  1249         for attrname, invocation, assignment in usage:  1250             if invocation:  1251                 invoked.append(attrname)  1252     return invoked  1253   1254 def get_assigned_attributes(usage):  1255   1256     "Obtain assigned attribute from the given 'usage'."  1257   1258     assigned = []  1259     if usage:  1260         for attrname, invocation, assignment in usage:  1261             if assignment:  1262                 assigned.append(attrname)  1263     return assigned  1264   1265 # Type and module functions.  1266 # NOTE: This makes assumptions about the __builtins__ structure.  1267   1268 def get_builtin_module(name):  1269   1270     "Return the module name containing the given type 'name'."  1271   1272     if name == "string":  1273         modname = "str"  1274     elif name == "utf8string":  1275         modname = "unicode"  1276     elif name == "NoneType":  1277         modname = "none"  1278     else:  1279         modname = name  1280   1281     return "__builtins__.%s" % modname  1282   1283 def get_builtin_type(name):  1284   1285     "Return the type name provided by the given Python value 'name'."  1286   1287     if name == "str":  1288         return "string"  1289     elif name == "unicode":  1290         return "utf8string"  1291     else:  1292         return name  1293   1294 def get_builtin_class(name):  1295   1296     "Return the full name of the built-in class having the given 'name'."  1297   1298     typename = get_builtin_type(name)  1299     module = get_builtin_module(typename)  1300     return "%s.%s" % (module, typename)  1301   1302 # Useful data.  1303   1304 predefined_constants = "False", "None", "NotImplemented", "True"  1305   1306 operator_functions = {  1307   1308     # Fundamental operations.  1309   1310     "is" : "is_",  1311     "is not" : "is_not",  1312   1313     # Binary operations.  1314   1315     "in" : "in_",  1316     "not in" : "not_in",  1317     "Add" : "add",  1318     "Bitand" : "and_",  1319     "Bitor" : "or_",  1320     "Bitxor" : "xor",  1321     "Div" : "div",  1322     "FloorDiv" : "floordiv",  1323     "LeftShift" : "lshift",  1324     "Mod" : "mod",  1325     "Mul" : "mul",  1326     "Power" : "pow",  1327     "RightShift" : "rshift",  1328     "Sub" : "sub",  1329   1330     # Unary operations.  1331   1332     "Invert" : "invert",  1333     "UnaryAdd" : "pos",  1334     "UnarySub" : "neg",  1335   1336     # Augmented assignment.  1337   1338     "+=" : "iadd",  1339     "-=" : "isub",  1340     "*=" : "imul",  1341     "/=" : "idiv",  1342     "//=" : "ifloordiv",  1343     "%=" : "imod",  1344     "**=" : "ipow",  1345     "<<=" : "ilshift",  1346     ">>=" : "irshift",  1347     "&=" : "iand",  1348     "^=" : "ixor",  1349     "|=" : "ior",  1350   1351     # Comparisons.  1352   1353     "==" : "eq",  1354     "!=" : "ne",  1355     "<" : "lt",  1356     "<=" : "le",  1357     ">=" : "ge",  1358     ">" : "gt",  1359     }  1360   1361 # vim: tabstop=4 expandtab shiftwidth=4