Lichen (file common.py at e6d60706c5a0)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         if recorded_details != details:    45             self.remove_output()    46     47         writefile(self.get_output_details_filename(), details)    48     49     def get_output_details_filename(self):    50     51         "Return the output details filename."    52     53         return join(self.output, "$details")    54     55     def get_output_details(self):    56     57         "Return details of the existing output."    58     59         details_filename = self.get_output_details_filename()    60     61         if not exists(details_filename):    62             return None    63         else:    64             return readfile(details_filename)    65     66     def remove_output(self, dirname=None):    67     68         "Remove the output."    69     70         dirname = dirname or self.output    71     72         for filename in listdir(dirname):    73             path = join(dirname, filename)    74             if isdir(path):    75                 self.remove_output(path)    76             else:    77                 remove(path)    78     79 class CommonModule:    80     81     "A common module representation."    82     83     def __init__(self, name, importer):    84     85         """    86         Initialise this module with the given 'name' and an 'importer' which is    87         used to provide access to other modules when required.    88         """    89     90         self.name = name    91         self.importer = importer    92         self.filename = None    93     94         # Inspection-related attributes.    95     96         self.astnode = None    97         self.encoding = None    98         self.iterators = {}    99         self.temp = {}   100         self.lambdas = {}   101    102         # Constants, literals and values.   103    104         self.constants = {}   105         self.constant_values = {}   106         self.literals = {}   107         self.literal_types = {}   108    109         # Nested namespaces.   110    111         self.namespace_path = []   112         self.in_function = False   113    114         # Retain the assignment value expression and track invocations.   115    116         self.in_assignment = None   117         self.in_invocation = None   118    119         # Attribute chain state management.   120    121         self.attrs = []   122         self.chain_assignment = []   123         self.chain_invocation = []   124    125     def __repr__(self):   126         return "CommonModule(%r, %r)" % (self.name, self.importer)   127    128     def parse_file(self, filename):   129    130         "Parse the file with the given 'filename', initialising attributes."   131    132         self.filename = filename   133    134         # Use the Transformer directly to obtain encoding information.   135    136         t = Transformer()   137         f = open(filename)   138    139         try:   140             self.astnode = t.parsesuite(f.read() + "\n")   141             self.encoding = t.encoding   142         finally:   143             f.close()   144    145     # Module-relative naming.   146    147     def get_global_path(self, name):   148         return "%s.%s" % (self.name, name)   149    150     def get_namespace_path(self):   151         return ".".join([self.name] + self.namespace_path)   152    153     def get_object_path(self, name):   154         return ".".join([self.name] + self.namespace_path + [name])   155    156     def get_parent_path(self):   157         return ".".join([self.name] + self.namespace_path[:-1])   158    159     # Namespace management.   160    161     def enter_namespace(self, name):   162    163         "Enter the namespace having the given 'name'."   164    165         self.namespace_path.append(name)   166    167     def exit_namespace(self):   168    169         "Exit the current namespace."   170    171         self.namespace_path.pop()   172    173     # Constant reference naming.   174    175     def get_constant_name(self, value, value_type, encoding=None):   176    177         """   178         Add a new constant to the current namespace for 'value' with   179         'value_type'.   180         """   181    182         path = self.get_namespace_path()   183         init_item(self.constants, path, dict)   184         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   185    186     # Literal reference naming.   187    188     def get_literal_name(self):   189    190         "Add a new literal to the current namespace."   191    192         path = self.get_namespace_path()   193         init_item(self.literals, path, lambda: 0)   194         return "$C%d" % self.literals[path]   195    196     def next_literal(self):   197         self.literals[self.get_namespace_path()] += 1   198    199     # Temporary iterator naming.   200    201     def get_iterator_path(self):   202         return self.in_function and self.get_namespace_path() or self.name   203    204     def get_iterator_name(self):   205         path = self.get_iterator_path()   206         init_item(self.iterators, path, lambda: 0)   207         return "$i%d" % self.iterators[path]   208    209     def next_iterator(self):   210         self.iterators[self.get_iterator_path()] += 1   211    212     # Temporary variable naming.   213    214     def get_temporary_name(self):   215         path = self.get_namespace_path()   216         init_item(self.temp, path, lambda: 0)   217         return "$t%d" % self.temp[path]   218    219     def next_temporary(self):   220         self.temp[self.get_namespace_path()] += 1   221    222     # Arbitrary function naming.   223    224     def get_lambda_name(self):   225         path = self.get_namespace_path()   226         init_item(self.lambdas, path, lambda: 0)   227         name = "$l%d" % self.lambdas[path]   228         self.lambdas[path] += 1   229         return name   230    231     def reset_lambdas(self):   232         self.lambdas = {}   233    234     # Constant and literal recording.   235    236     def get_constant_value(self, value, literals=None):   237    238         """   239         Encode the 'value' if appropriate, returning a value, a typename and any   240         encoding.   241         """   242    243         if isinstance(value, unicode):   244             return value.encode("utf-8"), "unicode", self.encoding   245    246         # Attempt to convert plain strings to text.   247    248         elif isinstance(value, str) and self.encoding:   249             try:   250                 return get_string_details(literals, self.encoding)   251             except UnicodeDecodeError:   252                 pass   253    254         return value, value.__class__.__name__, None   255    256     def get_constant_reference(self, ref, value, encoding=None):   257    258         """   259         Return a constant reference for the given 'ref' type and 'value', with   260         the optional 'encoding' applying to text values.   261         """   262    263         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   264    265         # Return a reference for the constant.   266    267         objpath = self.get_object_path(constant_name)   268         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   269    270         # Record the value and type for the constant.   271    272         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   273         return name_ref   274    275     def reserve_constant(self, objpath, value, origin, encoding=None):   276    277         """   278         Reserve a constant within 'objpath' with the given 'value' and having a   279         type with the given 'origin', with the optional 'encoding' applying to   280         text values.   281         """   282    283         constant_name = self.get_constant_name(value, origin)   284         objpath = self.get_object_path(constant_name)   285         self._reserve_constant(objpath, value, origin, encoding)   286    287     def _reserve_constant(self, objpath, value, origin, encoding):   288    289         """   290         Store a constant for 'objpath' with the given 'value' and 'origin', with   291         the optional 'encoding' applying to text values.   292         """   293    294         self.constant_values[objpath] = value, origin, encoding   295    296     def get_literal_reference(self, name, ref, items, cls):   297    298         """   299         Return a literal reference for the given type 'name', literal 'ref',   300         node 'items' and employing the given 'cls' as the class of the returned   301         reference object.   302         """   303    304         # Construct an invocation using the items as arguments.   305    306         typename = "$L%s" % name   307    308         invocation = compiler.ast.CallFunc(   309             compiler.ast.Name(typename),   310             items   311             )   312    313         # Get a name for the actual literal.   314    315         instname = self.get_literal_name()   316         self.next_literal()   317    318         # Record the type for the literal.   319    320         objpath = self.get_object_path(instname)   321         self.literal_types[objpath] = ref.get_origin()   322    323         # Return a wrapper for the invocation exposing the items.   324    325         return cls(   326             instname,   327             ref.instance_of(),   328             self.process_structure_node(invocation),   329             invocation.args   330             )   331    332     # Node handling.   333    334     def process_structure(self, node):   335    336         """   337         Within the given 'node', process the program structure.   338    339         During inspection, this will process global declarations, adjusting the   340         module namespace, and import statements, building a module dependency   341         hierarchy.   342    343         During translation, this will consult deduced program information and   344         output translated code.   345         """   346    347         l = []   348         for n in node.getChildNodes():   349             l.append(self.process_structure_node(n))   350         return l   351    352     def process_augassign_node(self, n):   353    354         "Process the given augmented assignment node 'n'."   355    356         op = operator_functions[n.op]   357    358         if isinstance(n.node, compiler.ast.Getattr):   359             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   360         elif isinstance(n.node, compiler.ast.Name):   361             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   362         else:   363             target = n.node   364    365         assignment = compiler.ast.Assign(   366             [target],   367             compiler.ast.CallFunc(   368                 compiler.ast.Name("$op%s" % op),   369                 [n.node, n.expr]))   370    371         return self.process_structure_node(assignment)   372    373     def process_assignment_for_object(self, original_name, source):   374    375         """   376         Return an assignment operation making 'original_name' refer to the given   377         'source'.   378         """   379    380         assignment = compiler.ast.Assign(   381             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   382             source   383             )   384    385         return self.process_structure_node(assignment)   386    387     def process_assignment_node_items(self, n, expr):   388    389         """   390         Process the given assignment node 'n' whose children are to be assigned   391         items of 'expr'.   392         """   393    394         name_ref = self.process_structure_node(expr)   395    396         # Either unpack the items and present them directly to each assignment   397         # node.   398    399         if isinstance(name_ref, LiteralSequenceRef) and \   400            self.process_literal_sequence_items(n, name_ref):   401    402             pass   403    404         # Or have the assignment nodes access each item via the sequence API.   405    406         else:   407             self.process_assignment_node_items_by_position(n, expr, name_ref)   408    409     def process_assignment_node_items_by_position(self, n, expr, name_ref):   410    411         """   412         Process the given sequence assignment node 'n', converting the node to   413         the separate assignment of each target using positional access on a   414         temporary variable representing the sequence. Use 'expr' as the assigned   415         value and 'name_ref' as the reference providing any existing temporary   416         variable.   417         """   418    419         assignments = []   420    421         # Employ existing names to access the sequence.   422         # Literal sequences do not provide names of accessible objects.   423    424         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   425             temp = name_ref.name   426    427         # For other expressions, create a temporary name to reference the items.   428    429         else:   430             temp = self.get_temporary_name()   431             self.next_temporary()   432    433             assignments.append(   434                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   435                 )   436    437         # Assign the items to the target nodes.   438    439         for i, node in enumerate(n.nodes):   440             assignments.append(   441                 compiler.ast.Assign([node], compiler.ast.Subscript(   442                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   443                 )   444    445         return self.process_structure_node(compiler.ast.Stmt(assignments))   446    447     def process_literal_sequence_items(self, n, name_ref):   448    449         """   450         Process the given assignment node 'n', obtaining from the given   451         'name_ref' the items to be assigned to the assignment targets.   452    453         Return whether this method was able to process the assignment node as   454         a sequence of direct assignments.   455         """   456    457         if len(n.nodes) == len(name_ref.items):   458             assigned_names, count = get_names_from_nodes(n.nodes)   459             accessed_names, _count = get_names_from_nodes(name_ref.items)   460    461             # Only assign directly between items if all assigned names are   462             # plain names (not attribute assignments), and if the assigned names   463             # do not appear in the accessed names.   464    465             if len(assigned_names) == count and \   466                not assigned_names.intersection(accessed_names):   467    468                 for node, item in zip(n.nodes, name_ref.items):   469                     self.process_assignment_node(node, item)   470    471                 return True   472    473             # Otherwise, use the position-based mechanism to obtain values.   474    475             else:   476                 return False   477         else:   478             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   479                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   480    481     def process_compare_node(self, n):   482    483         """   484         Process the given comparison node 'n', converting an operator sequence   485         from...   486    487         <expr1> <op1> <expr2> <op2> <expr3>   488    489         ...to...   490    491         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   492         """   493    494         invocations = []   495         last = n.expr   496    497         for op, op_node in n.ops:   498             op = operator_functions.get(op)   499    500             invocations.append(compiler.ast.CallFunc(   501                 compiler.ast.Name("$op%s" % op),   502                 [last, op_node]))   503    504             last = op_node   505    506         if len(invocations) > 1:   507             result = compiler.ast.And(invocations)   508         else:   509             result = invocations[0]   510    511         return self.process_structure_node(result)   512    513     def process_dict_node(self, node):   514    515         """   516         Process the given dictionary 'node', returning a list of (key, value)   517         tuples.   518         """   519    520         l = []   521         for key, value in node.items:   522             l.append((   523                 self.process_structure_node(key),   524                 self.process_structure_node(value)))   525         return l   526    527     def process_for_node(self, n):   528    529         """   530         Generate attribute accesses for {n.list}.__iter__ and the next method on   531         the iterator, producing a replacement node for the original.   532         """   533    534         node = compiler.ast.Stmt([   535    536             # <next> = {n.list}.__iter__().next   537    538             compiler.ast.Assign(   539                 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")],   540                 compiler.ast.Getattr(   541                     compiler.ast.CallFunc(   542                         compiler.ast.Getattr(n.list, "__iter__"),   543                         []   544                         ), "next")),   545    546             # try:   547             #     while True:   548             #         <var>... = <next>()   549             #         ...   550             # except StopIteration:   551             #     pass   552    553             compiler.ast.TryExcept(   554                 compiler.ast.While(   555                     compiler.ast.Name("True"),   556                     compiler.ast.Stmt([   557                         compiler.ast.Assign(   558                             [n.assign],   559                             compiler.ast.CallFunc(   560                                 compiler.ast.Name(self.get_iterator_name()),   561                                 []   562                                 )),   563                         n.body]),   564                     None),   565                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   566                 None)   567             ])   568    569         self.next_iterator()   570         self.process_structure_node(node)   571    572     def process_literal_sequence_node(self, n, name, ref, cls):   573    574         """   575         Process the given literal sequence node 'n' as a function invocation,   576         with 'name' indicating the type of the sequence, and 'ref' being a   577         reference to the type. The 'cls' is used to instantiate a suitable name   578         reference.   579         """   580    581         if name == "dict":   582             items = []   583             for key, value in n.items:   584                 items.append(compiler.ast.Tuple([key, value]))   585         else: # name in ("list", "tuple"):   586             items = n.nodes   587    588         return self.get_literal_reference(name, ref, items, cls)   589    590     def process_operator_node(self, n):   591    592         """   593         Process the given operator node 'n' as an operator function invocation.   594         """   595    596         op = operator_functions[n.__class__.__name__]   597         invocation = compiler.ast.CallFunc(   598             compiler.ast.Name("$op%s" % op),   599             list(n.getChildNodes())   600             )   601         return self.process_structure_node(invocation)   602    603     def process_print_node(self, n):   604    605         """   606         Process the given print node 'n' as an invocation on a stream of the   607         form...   608    609         $print(dest, args, nl)   610    611         The special function name will be translated elsewhere.   612         """   613    614         nl = isinstance(n, compiler.ast.Printnl)   615         invocation = compiler.ast.CallFunc(   616             compiler.ast.Name("$print"),   617             [n.dest or compiler.ast.Name("None"),   618              compiler.ast.List(list(n.nodes)),   619              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   620             )   621         return self.process_structure_node(invocation)   622    623     def process_slice_node(self, n, expr=None):   624    625         """   626         Process the given slice node 'n' as an operator function invocation.   627         """   628    629         if n.flags == "OP_ASSIGN": op = "setslice"   630         elif n.flags == "OP_DELETE": op = "delslice"   631         else: op = "getslice"   632    633         invocation = compiler.ast.CallFunc(   634             compiler.ast.Name("$op%s" % op),   635             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   636                 (expr and [expr] or [])   637             )   638    639         # Fix parse tree structure.   640    641         if op == "delslice":   642             invocation = compiler.ast.Discard(invocation)   643    644         return self.process_structure_node(invocation)   645    646     def process_sliceobj_node(self, n):   647    648         """   649         Process the given slice object node 'n' as a slice constructor.   650         """   651    652         op = "slice"   653         invocation = compiler.ast.CallFunc(   654             compiler.ast.Name("$op%s" % op),   655             n.nodes   656             )   657         return self.process_structure_node(invocation)   658    659     def process_subscript_node(self, n, expr=None):   660    661         """   662         Process the given subscript node 'n' as an operator function invocation.   663         """   664    665         if n.flags == "OP_ASSIGN": op = "setitem"   666         elif n.flags == "OP_DELETE": op = "delitem"   667         else: op = "getitem"   668    669         invocation = compiler.ast.CallFunc(   670             compiler.ast.Name("$op%s" % op),   671             [n.expr] + list(n.subs) + (expr and [expr] or [])   672             )   673    674         # Fix parse tree structure.   675    676         if op == "delitem":   677             invocation = compiler.ast.Discard(invocation)   678    679         return self.process_structure_node(invocation)   680    681     def process_attribute_chain(self, n):   682    683         """   684         Process the given attribute access node 'n'. Return a reference   685         describing the expression.   686         """   687    688         # AssAttr/Getattr are nested with the outermost access being the last   689         # access in any chain.   690    691         self.attrs.insert(0, n.attrname)   692         attrs = self.attrs   693    694         # Break attribute chains where non-access nodes are found.   695    696         if not self.have_access_expression(n):   697             self.reset_attribute_chain()   698    699         # Descend into the expression, extending backwards any existing chain,   700         # or building another for the expression.   701    702         name_ref = self.process_structure_node(n.expr)   703    704         # Restore chain information applying to this node.   705    706         if not self.have_access_expression(n):   707             self.restore_attribute_chain(attrs)   708    709         # Return immediately if the expression was another access and thus a   710         # continuation backwards along the chain. The above processing will   711         # have followed the chain all the way to its conclusion.   712    713         if self.have_access_expression(n):   714             del self.attrs[0]   715    716         return name_ref   717    718     # Attribute chain handling.   719    720     def reset_attribute_chain(self):   721    722         "Reset the attribute chain for a subexpression of an attribute access."   723    724         self.attrs = []   725         self.chain_assignment.append(self.in_assignment)   726         self.chain_invocation.append(self.in_invocation)   727         self.in_assignment = None   728         self.in_invocation = None   729    730     def restore_attribute_chain(self, attrs):   731    732         "Restore the attribute chain for an attribute access."   733    734         self.attrs = attrs   735         self.in_assignment = self.chain_assignment.pop()   736         self.in_invocation = self.chain_invocation.pop()   737    738     def have_access_expression(self, node):   739    740         "Return whether the expression associated with 'node' is Getattr."   741    742         return isinstance(node.expr, compiler.ast.Getattr)   743    744     def get_name_for_tracking(self, name, name_ref=None):   745    746         """   747         Return the name to be used for attribute usage observations involving   748         the given 'name' in the current namespace.   749    750         If the name is being used outside a function, and if 'name_ref' is   751         given, a path featuring the name in the global namespace is returned   752         where 'name_ref' indicates a global. Otherwise, a path computed using   753         the current namespace and the given name is returned.   754    755         The intention of this method is to provide a suitably-qualified name   756         that can be tracked across namespaces. Where globals are being   757         referenced in class namespaces, they should be referenced using their   758         path within the module, not using a path within each class.   759    760         It may not be possible to identify a global within a function at the   761         time of inspection (since a global may appear later in a file).   762         Consequently, globals are identified by their local name rather than   763         their module-qualified path.   764         """   765    766         # For functions, use the appropriate local names.   767    768         if self.in_function:   769             return name   770    771         # For global names outside functions, use a global name.   772    773         elif name_ref and name_ref.is_global_name():   774             return self.get_global_path(name)   775    776         # Otherwise, establish a name in the current namespace.   777    778         else:   779             return self.get_object_path(name)   780    781     def get_path_for_access(self):   782    783         "Outside functions, register accesses at the module level."   784    785         if not self.in_function:   786             return self.name   787         else:   788             return self.get_namespace_path()   789    790     def get_module_name(self, node):   791    792         """   793         Using the given From 'node' in this module, calculate any relative import   794         information, returning a tuple containing a module to import along with any   795         names to import based on the node's name information.   796    797         Where the returned module is given as None, whole module imports should   798         be performed for the returned modules using the returned names.   799         """   800    801         # Absolute import.   802    803         if node.level == 0:   804             return node.modname, node.names   805    806         # Relative to an ancestor of this module.   807    808         else:   809             path = self.name.split(".")   810             level = node.level   811    812             # Relative imports treat package roots as submodules.   813    814             if split(self.filename)[-1] == "__init__.py":   815                 level -= 1   816    817             if level > len(path):   818                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   819                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   820    821             basename = ".".join(path[:len(path)-level])   822    823         # Name imports from a module.   824    825         if node.modname:   826             return "%s.%s" % (basename, node.modname), node.names   827    828         # Relative whole module imports.   829    830         else:   831             return basename, node.names   832    833 def get_argnames(args):   834    835     """   836     Return a list of all names provided by 'args'. Since tuples may be   837     employed, the arguments are traversed depth-first.   838     """   839    840     l = []   841     for arg in args:   842         if isinstance(arg, tuple):   843             l += get_argnames(arg)   844         else:   845             l.append(arg)   846     return l   847    848 def get_names_from_nodes(nodes):   849    850     """   851     Return the names employed in the given 'nodes' along with the number of   852     nodes excluding sequences.   853     """   854    855     names = set()   856     count = 0   857    858     for node in nodes:   859    860         # Add names and count them.   861    862         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   863             names.add(node.name)   864             count += 1   865    866         # Add names from sequences and incorporate their counts.   867    868         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   869                                compiler.ast.List, compiler.ast.Set,   870                                compiler.ast.Tuple)):   871             _names, _count = get_names_from_nodes(node.nodes)   872             names.update(_names)   873             count += _count   874    875         # Count non-name, non-sequence nodes.   876    877         else:   878             count += 1   879    880     return names, count   881    882 # Result classes.   883    884 class InstructionSequence:   885    886     "A generic sequence of instructions."   887    888     def __init__(self, instructions):   889         self.instructions = instructions   890    891     def get_value_instruction(self):   892         return self.instructions[-1]   893    894     def get_init_instructions(self):   895         return self.instructions[:-1]   896    897 # Dictionary utilities.   898    899 def init_item(d, key, fn):   900    901     """   902     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   903     value where no entry already exists.   904     """   905    906     if not d.has_key(key):   907         d[key] = fn()   908     return d[key]   909    910 def dict_for_keys(d, keys):   911    912     "Return a new dictionary containing entries from 'd' for the given 'keys'."   913    914     nd = {}   915     for key in keys:   916         if d.has_key(key):   917             nd[key] = d[key]   918     return nd   919    920 def make_key(s):   921    922     "Make sequence 's' into a tuple-based key, first sorting its contents."   923    924     l = list(s)   925     l.sort()   926     return tuple(l)   927    928 def add_counter_item(d, key):   929    930     """   931     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   932     maintaining a mapping of keys to their order of insertion.   933     """   934    935     if not d.has_key(key):   936         d[key] = len(d.keys())   937     return d[key]    938    939 def remove_items(d1, d2):   940    941     "Remove from 'd1' all items from 'd2'."   942    943     for key in d2.keys():   944         if d1.has_key(key):   945             del d1[key]   946    947 # Set utilities.   948    949 def first(s):   950     return list(s)[0]   951    952 def same(s1, s2):   953     return set(s1) == set(s2)   954    955 # General input/output.   956    957 def readfile(filename):   958    959     "Return the contents of 'filename'."   960    961     f = open(filename)   962     try:   963         return f.read()   964     finally:   965         f.close()   966    967 def writefile(filename, s):   968    969     "Write to 'filename' the string 's'."   970    971     f = open(filename, "w")   972     try:   973         f.write(s)   974     finally:   975         f.close()   976    977 # General encoding.   978    979 def sorted_output(x):   980    981     "Sort sequence 'x' and return a string with commas separating the values."   982    983     x = map(str, x)   984     x.sort()   985     return ", ".join(x)   986    987 def get_string_details(literals, encoding):   988    989     """   990     Determine whether 'literals' represent Unicode strings or byte strings,   991     using 'encoding' to reproduce byte sequences.   992    993     Each literal is the full program representation including prefix and quotes   994     recoded by the parser to UTF-8. Thus, any literal found to represent a byte   995     string needs to be translated back to its original encoding.   996    997     Return a single encoded literal value, a type name, and the original   998     encoding as a tuple.   999     """  1000   1001     typename = "unicode"  1002   1003     l = []  1004   1005     for s in literals:  1006         out, _typename = get_literal_details(s)  1007         if _typename == "str":  1008             typename = "str"  1009         l.append(out)  1010   1011     out = "".join(l)  1012   1013     # For Unicode values, convert to the UTF-8 program representation.  1014   1015     if typename == "unicode":  1016         return out.encode("utf-8"), typename, encoding  1017   1018     # For byte string values, convert back to the original encoding.  1019   1020     else:  1021         return out.encode(encoding), typename, encoding  1022   1023 def get_literal_details(s):  1024   1025     """  1026     Determine whether 's' represents a Unicode string or a byte string, where  1027     's' contains the full program representation of a literal including prefix  1028     and quotes, recoded by the parser to UTF-8.  1029   1030     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1031     and byte or Unicode values starting with <backslash><octal digit> or  1032     <backslash>x.  1033   1034     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1035     to be considered as Unicode values. Otherwise, they produce byte values and  1036     cause unprefixed strings to be considered as byte strings.  1037   1038     Literals prefixed with "r" do not have their backslash-encoded values  1039     converted unless also prefixed with "u", in which case only the above value  1040     formats are converted, not any of the other special sequences for things  1041     like newlines.  1042   1043     Return the literal value as a Unicode object together with the appropriate  1044     type name in a tuple.  1045     """  1046   1047     l = []  1048   1049     # Identify the quote character and use it to identify the prefix.  1050   1051     quote_type = s[-1]  1052     prefix_end = s.find(quote_type)  1053     prefix = s[:prefix_end].lower()  1054   1055     if prefix not in ("", "b", "br", "r", "u", "ur"):  1056         raise ValueError, "String literal does not have a supported prefix: %s" % s  1057   1058     if "b" in prefix:  1059         typename = "str"  1060     else:  1061         typename = "unicode"  1062   1063     # Identify triple quotes or single quotes.  1064   1065     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1066         quote = s[prefix_end:prefix_end+3]  1067         current = prefix_end + 3  1068         end = len(s) - 3  1069     else:  1070         quote = s[prefix_end]  1071         current = prefix_end + 1  1072         end = len(s) - 1  1073   1074     # Conversions of some quoted values.  1075   1076     searches = {  1077         "u" : (6, 16),  1078         "U" : (10, 16),  1079         "x" : (4, 16),  1080         }  1081   1082     octal_digits = map(str, range(0, 8))  1083   1084     # Translations of some quoted values.  1085   1086     escaped = {  1087         "\\" : "\\", "'" : "'", '"' : '"',  1088         "a" : "\a", "b" : "\b", "f" : "\f",  1089         "n" : "\n", "r" : "\r", "t" : "\t",  1090         }  1091   1092     while current < end:  1093   1094         # Look for quoted values.  1095   1096         index = s.find("\\", current)  1097         if index == -1 or index + 1 == end:  1098             l.append(s[current:end])  1099             break  1100   1101         # Add the preceding text.  1102   1103         l.append(s[current:index])  1104   1105         # Handle quoted text.  1106   1107         term = s[index+1]  1108   1109         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1110         # produce Unicode values.  1111   1112         if typename == "unicode" and (  1113             term in ("u", "U") or   1114             "u" in prefix and (term == "x" or term in octal_digits)):  1115   1116             needed, base = searches.get(term, (4, 8))  1117             value = convert_quoted_value(s, index, needed, end, base, unichr)  1118             l.append(value)  1119             current = index + needed  1120   1121         # Add raw byte values, changing the string type.  1122   1123         elif "r" not in prefix and (  1124              term == "x" or term in octal_digits):  1125   1126             needed, base = searches.get(term, (4, 8))  1127             value = convert_quoted_value(s, index, needed, end, base, chr)  1128             l.append(value)  1129             typename = "str"  1130             current = index + needed  1131   1132         # Add other escaped values.  1133   1134         elif "r" not in prefix and escaped.has_key(term):  1135             l.append(escaped[term])  1136             current = index + 2  1137   1138         # Add other text as found.  1139   1140         else:  1141             l.append(s[index:index+2])  1142             current = index + 2  1143   1144     # Collect the components into a single Unicode object. Since the literal  1145     # text was already in UTF-8 form, interpret plain strings as UTF-8  1146     # sequences.  1147   1148     out = []  1149   1150     for value in l:  1151         if isinstance(value, unicode):  1152             out.append(value)  1153         else:  1154             out.append(unicode(value, "utf-8"))  1155   1156     return "".join(out), typename  1157   1158 def convert_quoted_value(s, index, needed, end, base, fn):  1159   1160     """  1161     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1162     positions, and with the given 'end' indicating the first position after the  1163     end of the actual string content.  1164   1165     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1166     to convert the value to an appropriate type.  1167     """  1168   1169     s = s[index:min(index+needed, end)]  1170   1171     # Not a complete occurrence.  1172   1173     if len(s) < needed:  1174         return s  1175   1176     # Test for a well-formed value.  1177   1178     try:  1179         first = base == 8 and 1 or 2  1180         value = int(s[first:needed], base)  1181     except ValueError:  1182         return s  1183     else:  1184         return fn(value)  1185   1186 # Attribute chain decoding.  1187   1188 def get_attrnames(attrnames):  1189   1190     """  1191     Split the qualified attribute chain 'attrnames' into its components,  1192     handling special attributes starting with "#" that indicate type  1193     conformance.  1194     """  1195   1196     if attrnames.startswith("#"):  1197         return [attrnames]  1198     else:  1199         return attrnames.split(".")  1200   1201 def get_attrname_from_location(location):  1202   1203     """  1204     Extract the first attribute from the attribute names employed in a  1205     'location'.  1206     """  1207   1208     path, name, attrnames, access = location  1209     if not attrnames:  1210         return attrnames  1211     return get_attrnames(attrnames)[0]  1212   1213 def get_name_path(path, name):  1214   1215     "Return a suitable qualified name from the given 'path' and 'name'."  1216   1217     if "." in name:  1218         return name  1219     else:  1220         return "%s.%s" % (path, name)  1221   1222 # Usage-related functions.  1223   1224 def get_types_for_usage(attrnames, objects):  1225   1226     """  1227     Identify the types that can support the given 'attrnames', using the  1228     given 'objects' as the catalogue of type details.  1229     """  1230   1231     types = []  1232     for name, _attrnames in objects.items():  1233         if set(attrnames).issubset(_attrnames):  1234             types.append(name)  1235     return types  1236   1237 def get_invoked_attributes(usage):  1238   1239     "Obtain invoked attribute from the given 'usage'."  1240   1241     invoked = []  1242     if usage:  1243         for attrname, invocation, assignment in usage:  1244             if invocation:  1245                 invoked.append(attrname)  1246     return invoked  1247   1248 def get_assigned_attributes(usage):  1249   1250     "Obtain assigned attribute from the given 'usage'."  1251   1252     assigned = []  1253     if usage:  1254         for attrname, invocation, assignment in usage:  1255             if assignment:  1256                 assigned.append(attrname)  1257     return assigned  1258   1259 # Type and module functions.  1260 # NOTE: This makes assumptions about the __builtins__ structure.  1261   1262 def get_builtin_module(name):  1263   1264     "Return the module name containing the given type 'name'."  1265   1266     if name == "string":  1267         modname = "str"  1268     elif name == "utf8string":  1269         modname = "unicode"  1270     elif name == "NoneType":  1271         modname = "none"  1272     else:  1273         modname = name  1274   1275     return "__builtins__.%s" % modname  1276   1277 def get_builtin_type(name):  1278   1279     "Return the type name provided by the given Python value 'name'."  1280   1281     if name == "str":  1282         return "string"  1283     elif name == "unicode":  1284         return "utf8string"  1285     else:  1286         return name  1287   1288 def get_builtin_class(name):  1289   1290     "Return the full name of the built-in class having the given 'name'."  1291   1292     typename = get_builtin_type(name)  1293     module = get_builtin_module(typename)  1294     return "%s.%s" % (module, typename)  1295   1296 # Useful data.  1297   1298 predefined_constants = "False", "None", "NotImplemented", "True"  1299   1300 operator_functions = {  1301   1302     # Fundamental operations.  1303   1304     "is" : "is_",  1305     "is not" : "is_not",  1306   1307     # Binary operations.  1308   1309     "in" : "in_",  1310     "not in" : "not_in",  1311     "Add" : "add",  1312     "Bitand" : "and_",  1313     "Bitor" : "or_",  1314     "Bitxor" : "xor",  1315     "Div" : "div",  1316     "FloorDiv" : "floordiv",  1317     "LeftShift" : "lshift",  1318     "Mod" : "mod",  1319     "Mul" : "mul",  1320     "Power" : "pow",  1321     "RightShift" : "rshift",  1322     "Sub" : "sub",  1323   1324     # Unary operations.  1325   1326     "Invert" : "invert",  1327     "UnaryAdd" : "pos",  1328     "UnarySub" : "neg",  1329   1330     # Augmented assignment.  1331   1332     "+=" : "iadd",  1333     "-=" : "isub",  1334     "*=" : "imul",  1335     "/=" : "idiv",  1336     "//=" : "ifloordiv",  1337     "%=" : "imod",  1338     "**=" : "ipow",  1339     "<<=" : "ilshift",  1340     ">>=" : "irshift",  1341     "&=" : "iand",  1342     "^=" : "ixor",  1343     "|=" : "ior",  1344   1345     # Comparisons.  1346   1347     "==" : "eq",  1348     "!=" : "ne",  1349     "<" : "lt",  1350     "<=" : "le",  1351     ">=" : "ge",  1352     ">" : "gt",  1353     }  1354   1355 # vim: tabstop=4 expandtab shiftwidth=4