Lichen (file common.py at b6cda55e96b9)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         if recorded_details != details:    45             self.remove_output()    46     47         writefile(self.get_output_details_filename(), details)    48     49     def get_output_details_filename(self):    50     51         "Return the output details filename."    52     53         return join(self.output, "$details")    54     55     def get_output_details(self):    56     57         "Return details of the existing output."    58     59         details_filename = self.get_output_details_filename()    60     61         if not exists(details_filename):    62             return None    63         else:    64             return readfile(details_filename)    65     66     def remove_output(self, dirname=None):    67     68         "Remove the output."    69     70         dirname = dirname or self.output    71     72         for filename in listdir(dirname):    73             path = join(dirname, filename)    74             if isdir(path):    75                 self.remove_output(path)    76             else:    77                 remove(path)    78     79 class CommonModule:    80     81     "A common module representation."    82     83     def __init__(self, name, importer):    84     85         """    86         Initialise this module with the given 'name' and an 'importer' which is    87         used to provide access to other modules when required.    88         """    89     90         self.name = name    91         self.importer = importer    92         self.filename = None    93     94         # Inspection-related attributes.    95     96         self.astnode = None    97         self.encoding = None    98         self.iterators = {}    99         self.temp = {}   100         self.lambdas = {}   101    102         # Constants, literals and values.   103    104         self.constants = {}   105         self.constant_values = {}   106         self.literals = {}   107         self.literal_types = {}   108    109         # Nested namespaces.   110    111         self.namespace_path = []   112         self.in_function = False   113    114         # Retain the assignment value expression and track invocations.   115    116         self.in_assignment = None   117         self.in_invocation = None   118    119         # Attribute chain state management.   120    121         self.attrs = []   122         self.chain_assignment = []   123         self.chain_invocation = []   124    125     def __repr__(self):   126         return "CommonModule(%r, %r)" % (self.name, self.importer)   127    128     def parse_file(self, filename):   129    130         "Parse the file with the given 'filename', initialising attributes."   131    132         self.filename = filename   133    134         # Use the Transformer directly to obtain encoding information.   135    136         t = Transformer()   137         f = open(filename)   138    139         try:   140             self.astnode = t.parsesuite(f.read() + "\n")   141             self.encoding = t.encoding   142         finally:   143             f.close()   144    145     # Module-relative naming.   146    147     def get_global_path(self, name):   148         return "%s.%s" % (self.name, name)   149    150     def get_namespace_path(self):   151         return ".".join([self.name] + self.namespace_path)   152    153     def get_object_path(self, name):   154         return ".".join([self.name] + self.namespace_path + [name])   155    156     def get_parent_path(self):   157         return ".".join([self.name] + self.namespace_path[:-1])   158    159     # Namespace management.   160    161     def enter_namespace(self, name):   162    163         "Enter the namespace having the given 'name'."   164    165         self.namespace_path.append(name)   166    167     def exit_namespace(self):   168    169         "Exit the current namespace."   170    171         self.namespace_path.pop()   172    173     # Constant reference naming.   174    175     def get_constant_name(self, value, value_type, encoding=None):   176    177         """   178         Add a new constant to the current namespace for 'value' with   179         'value_type'.   180         """   181    182         path = self.get_namespace_path()   183         init_item(self.constants, path, dict)   184         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   185    186     # Literal reference naming.   187    188     def get_literal_name(self):   189    190         "Add a new literal to the current namespace."   191    192         path = self.get_namespace_path()   193         init_item(self.literals, path, lambda: 0)   194         return "$C%d" % self.literals[path]   195    196     def next_literal(self):   197         self.literals[self.get_namespace_path()] += 1   198    199     # Temporary iterator naming.   200    201     def get_iterator_path(self):   202         return self.in_function and self.get_namespace_path() or self.name   203    204     def get_iterator_name(self):   205         path = self.get_iterator_path()   206         init_item(self.iterators, path, lambda: 0)   207         return "$i%d" % self.iterators[path]   208    209     def next_iterator(self):   210         self.iterators[self.get_iterator_path()] += 1   211    212     # Temporary variable naming.   213    214     def get_temporary_name(self):   215         path = self.get_namespace_path()   216         init_item(self.temp, path, lambda: 0)   217         return "$t%d" % self.temp[path]   218    219     def next_temporary(self):   220         self.temp[self.get_namespace_path()] += 1   221    222     # Arbitrary function naming.   223    224     def get_lambda_name(self):   225         path = self.get_namespace_path()   226         init_item(self.lambdas, path, lambda: 0)   227         name = "$l%d" % self.lambdas[path]   228         self.lambdas[path] += 1   229         return name   230    231     def reset_lambdas(self):   232         self.lambdas = {}   233    234     # Constant and literal recording.   235    236     def get_constant_value(self, value, literals=None):   237    238         """   239         Encode the 'value' if appropriate, returning a value, a typename and any   240         encoding.   241         """   242    243         if isinstance(value, unicode):   244             return value.encode("utf-8"), "unicode", self.encoding   245    246         # Attempt to convert plain strings to text.   247    248         elif isinstance(value, str) and self.encoding:   249             try:   250                 return get_string_details(literals, self.encoding)   251             except UnicodeDecodeError:   252                 pass   253    254         return value, value.__class__.__name__, None   255    256     def get_constant_reference(self, ref, value, encoding=None):   257    258         """   259         Return a constant reference for the given 'ref' type and 'value', with   260         the optional 'encoding' applying to text values.   261         """   262    263         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   264    265         # Return a reference for the constant.   266    267         objpath = self.get_object_path(constant_name)   268         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   269    270         # Record the value and type for the constant.   271    272         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   273         return name_ref   274    275     def reserve_constant(self, objpath, value, origin, encoding=None):   276    277         """   278         Reserve a constant within 'objpath' with the given 'value' and having a   279         type with the given 'origin', with the optional 'encoding' applying to   280         text values.   281         """   282    283         constant_name = self.get_constant_name(value, origin)   284         objpath = self.get_object_path(constant_name)   285         self._reserve_constant(objpath, value, origin, encoding)   286    287     def _reserve_constant(self, objpath, value, origin, encoding):   288    289         """   290         Store a constant for 'objpath' with the given 'value' and 'origin', with   291         the optional 'encoding' applying to text values.   292         """   293    294         self.constant_values[objpath] = value, origin, encoding   295    296     def get_literal_reference(self, name, ref, items, cls):   297    298         """   299         Return a literal reference for the given type 'name', literal 'ref',   300         node 'items' and employing the given 'cls' as the class of the returned   301         reference object.   302         """   303    304         # Construct an invocation using the items as arguments.   305    306         typename = "$L%s" % name   307    308         invocation = compiler.ast.CallFunc(   309             compiler.ast.Name(typename),   310             items   311             )   312    313         # Get a name for the actual literal.   314    315         instname = self.get_literal_name()   316         self.next_literal()   317    318         # Record the type for the literal.   319    320         objpath = self.get_object_path(instname)   321         self.literal_types[objpath] = ref.get_origin()   322    323         # Return a wrapper for the invocation exposing the items.   324    325         return cls(   326             instname,   327             ref.instance_of(),   328             self.process_structure_node(invocation),   329             invocation.args   330             )   331    332     # Node handling.   333    334     def process_structure(self, node):   335    336         """   337         Within the given 'node', process the program structure.   338    339         During inspection, this will process global declarations, adjusting the   340         module namespace, and import statements, building a module dependency   341         hierarchy.   342    343         During translation, this will consult deduced program information and   344         output translated code.   345         """   346    347         l = []   348         for n in node.getChildNodes():   349             l.append(self.process_structure_node(n))   350         return l   351    352     def process_augassign_node(self, n):   353    354         "Process the given augmented assignment node 'n'."   355    356         op = operator_functions[n.op]   357    358         if isinstance(n.node, compiler.ast.Getattr):   359             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   360         elif isinstance(n.node, compiler.ast.Name):   361             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   362         else:   363             target = n.node   364    365         assignment = compiler.ast.Assign(   366             [target],   367             compiler.ast.CallFunc(   368                 compiler.ast.Name("$op%s" % op),   369                 [n.node, n.expr]))   370    371         return self.process_structure_node(assignment)   372    373     def process_assignment_for_object(self, original_name, source):   374    375         """   376         Return an assignment operation making 'original_name' refer to the given   377         'source'.   378         """   379    380         assignment = compiler.ast.Assign(   381             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   382             source   383             )   384    385         return self.process_structure_node(assignment)   386    387     def process_assignment_node_items(self, n, expr):   388    389         """   390         Process the given assignment node 'n' whose children are to be assigned   391         items of 'expr'.   392         """   393    394         name_ref = self.process_structure_node(expr)   395    396         # Either unpack the items and present them directly to each assignment   397         # node.   398    399         if isinstance(name_ref, LiteralSequenceRef) and \   400            self.process_literal_sequence_items(n, name_ref):   401    402             pass   403    404         # Or have the assignment nodes access each item via the sequence API.   405    406         else:   407             self.process_assignment_node_items_by_position(n, expr, name_ref)   408    409     def process_assignment_node_items_by_position(self, n, expr, name_ref):   410    411         """   412         Process the given sequence assignment node 'n', converting the node to   413         the separate assignment of each target using positional access on a   414         temporary variable representing the sequence. Use 'expr' as the assigned   415         value and 'name_ref' as the reference providing any existing temporary   416         variable.   417         """   418    419         assignments = []   420    421         # Employ existing names to access the sequence.   422         # Literal sequences do not provide names of accessible objects.   423    424         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   425             temp = name_ref.name   426    427         # For other expressions, create a temporary name to reference the items.   428    429         else:   430             temp = self.get_temporary_name()   431             self.next_temporary()   432    433             assignments.append(   434                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   435                 )   436    437         # Assign the items to the target nodes.   438    439         for i, node in enumerate(n.nodes):   440             assignments.append(   441                 compiler.ast.Assign([node], compiler.ast.Subscript(   442                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   443                 )   444    445         return self.process_structure_node(compiler.ast.Stmt(assignments))   446    447     def process_literal_sequence_items(self, n, name_ref):   448    449         """   450         Process the given assignment node 'n', obtaining from the given   451         'name_ref' the items to be assigned to the assignment targets.   452    453         Return whether this method was able to process the assignment node as   454         a sequence of direct assignments.   455         """   456    457         if len(n.nodes) == len(name_ref.items):   458             assigned_names, count = get_names_from_nodes(n.nodes)   459             accessed_names, _count = get_names_from_nodes(name_ref.items)   460    461             # Only assign directly between items if all assigned names are   462             # plain names (not attribute assignments), and if the assigned names   463             # do not appear in the accessed names.   464    465             if len(assigned_names) == count and \   466                not assigned_names.intersection(accessed_names):   467    468                 for node, item in zip(n.nodes, name_ref.items):   469                     self.process_assignment_node(node, item)   470    471                 return True   472    473             # Otherwise, use the position-based mechanism to obtain values.   474    475             else:   476                 return False   477         else:   478             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   479                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   480    481     def process_compare_node(self, n):   482    483         """   484         Process the given comparison node 'n', converting an operator sequence   485         from...   486    487         <expr1> <op1> <expr2> <op2> <expr3>   488    489         ...to...   490    491         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   492         """   493    494         invocations = []   495         last = n.expr   496    497         for op, op_node in n.ops:   498             op = operator_functions.get(op)   499    500             invocations.append(compiler.ast.CallFunc(   501                 compiler.ast.Name("$op%s" % op),   502                 [last, op_node]))   503    504             last = op_node   505    506         if len(invocations) > 1:   507             result = compiler.ast.And(invocations)   508         else:   509             result = invocations[0]   510    511         return self.process_structure_node(result)   512    513     def process_dict_node(self, node):   514    515         """   516         Process the given dictionary 'node', returning a list of (key, value)   517         tuples.   518         """   519    520         l = []   521         for key, value in node.items:   522             l.append((   523                 self.process_structure_node(key),   524                 self.process_structure_node(value)))   525         return l   526    527     def process_for_node(self, n):   528    529         """   530         Generate attribute accesses for {n.list}.__iter__ and the next method on   531         the iterator, producing a replacement node for the original.   532         """   533    534         node = compiler.ast.Stmt([   535    536             # <next> = {n.list}.__iter__().next   537    538             compiler.ast.Assign(   539                 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")],   540                 compiler.ast.Getattr(   541                     compiler.ast.CallFunc(   542                         compiler.ast.Getattr(n.list, "__iter__"),   543                         []   544                         ), "next")),   545    546             # try:   547             #     while True:   548             #         <var>... = <next>()   549             #         ...   550             # except StopIteration:   551             #     pass   552    553             compiler.ast.TryExcept(   554                 compiler.ast.While(   555                     compiler.ast.Name("True"),   556                     compiler.ast.Stmt([   557                         compiler.ast.Assign(   558                             [n.assign],   559                             compiler.ast.CallFunc(   560                                 compiler.ast.Name(self.get_iterator_name()),   561                                 []   562                                 )),   563                         n.body]),   564                     None),   565                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   566                 None)   567             ])   568    569         self.next_iterator()   570         self.process_structure_node(node)   571    572     def process_literal_sequence_node(self, n, name, ref, cls):   573    574         """   575         Process the given literal sequence node 'n' as a function invocation,   576         with 'name' indicating the type of the sequence, and 'ref' being a   577         reference to the type. The 'cls' is used to instantiate a suitable name   578         reference.   579         """   580    581         if name == "dict":   582             items = []   583             for key, value in n.items:   584                 items.append(compiler.ast.Tuple([key, value]))   585         else: # name in ("list", "tuple"):   586             items = n.nodes   587    588         return self.get_literal_reference(name, ref, items, cls)   589    590     def process_operator_node(self, n):   591    592         """   593         Process the given operator node 'n' as an operator function invocation.   594         """   595    596         op = operator_functions[n.__class__.__name__]   597         invocation = compiler.ast.CallFunc(   598             compiler.ast.Name("$op%s" % op),   599             list(n.getChildNodes())   600             )   601         return self.process_structure_node(invocation)   602    603     def process_print_node(self, n):   604    605         """   606         Process the given print node 'n' as an invocation on a stream of the   607         form...   608    609         $print(dest, args, nl)   610    611         The special function name will be translated elsewhere.   612         """   613    614         nl = isinstance(n, compiler.ast.Printnl)   615         invocation = compiler.ast.CallFunc(   616             compiler.ast.Name("$print"),   617             [n.dest or compiler.ast.Name("None"),   618              compiler.ast.List(list(n.nodes)),   619              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   620             )   621         return self.process_structure_node(invocation)   622    623     def process_slice_node(self, n, expr=None):   624    625         """   626         Process the given slice node 'n' as an operator function invocation.   627         """   628    629         if n.flags == "OP_ASSIGN": op = "setslice"   630         elif n.flags == "OP_DELETE": op = "delslice"   631         else: op = "getslice"   632    633         invocation = compiler.ast.CallFunc(   634             compiler.ast.Name("$op%s" % op),   635             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   636                 (expr and [expr] or [])   637             )   638    639         # Fix parse tree structure.   640    641         if op == "delslice":   642             invocation = compiler.ast.Discard(invocation)   643    644         return self.process_structure_node(invocation)   645    646     def process_sliceobj_node(self, n):   647    648         """   649         Process the given slice object node 'n' as a slice constructor.   650         """   651    652         op = "slice"   653         invocation = compiler.ast.CallFunc(   654             compiler.ast.Name("$op%s" % op),   655             n.nodes   656             )   657         return self.process_structure_node(invocation)   658    659     def process_subscript_node(self, n, expr=None):   660    661         """   662         Process the given subscript node 'n' as an operator function invocation.   663         """   664    665         if n.flags == "OP_ASSIGN": op = "setitem"   666         elif n.flags == "OP_DELETE": op = "delitem"   667         else: op = "getitem"   668    669         invocation = compiler.ast.CallFunc(   670             compiler.ast.Name("$op%s" % op),   671             [n.expr] + list(n.subs) + (expr and [expr] or [])   672             )   673    674         # Fix parse tree structure.   675    676         if op == "delitem":   677             invocation = compiler.ast.Discard(invocation)   678    679         return self.process_structure_node(invocation)   680    681     def process_attribute_chain(self, n):   682    683         """   684         Process the given attribute access node 'n'. Return a reference   685         describing the expression.   686         """   687    688         # AssAttr/Getattr are nested with the outermost access being the last   689         # access in any chain.   690    691         self.attrs.insert(0, n.attrname)   692         attrs = self.attrs   693    694         # Break attribute chains where non-access nodes are found.   695    696         if not self.have_access_expression(n):   697             self.reset_attribute_chain()   698    699         # Descend into the expression, extending backwards any existing chain,   700         # or building another for the expression.   701    702         name_ref = self.process_structure_node(n.expr)   703    704         # Restore chain information applying to this node.   705    706         if not self.have_access_expression(n):   707             self.restore_attribute_chain(attrs)   708    709         # Return immediately if the expression was another access and thus a   710         # continuation backwards along the chain. The above processing will   711         # have followed the chain all the way to its conclusion.   712    713         if self.have_access_expression(n):   714             del self.attrs[0]   715    716         return name_ref   717    718     # Attribute chain handling.   719    720     def reset_attribute_chain(self):   721    722         "Reset the attribute chain for a subexpression of an attribute access."   723    724         self.attrs = []   725         self.chain_assignment.append(self.in_assignment)   726         self.chain_invocation.append(self.in_invocation)   727         self.in_assignment = None   728         self.in_invocation = None   729    730     def restore_attribute_chain(self, attrs):   731    732         "Restore the attribute chain for an attribute access."   733    734         self.attrs = attrs   735         self.in_assignment = self.chain_assignment.pop()   736         self.in_invocation = self.chain_invocation.pop()   737    738     def have_access_expression(self, node):   739    740         "Return whether the expression associated with 'node' is Getattr."   741    742         return isinstance(node.expr, compiler.ast.Getattr)   743    744     def get_name_for_tracking(self, name, ref=None):   745    746         """   747         Return the name to be used for attribute usage observations involving   748         the given 'name' in the current namespace. If 'ref' is indicated and   749         the name is being used outside a function, return the origin information   750         from 'ref'; otherwise, return a path computed using the current   751         namespace and the given name.   752    753         The intention of this method is to provide a suitably-qualified name   754         that can be tracked across namespaces. Where globals are being   755         referenced in class namespaces, they should be referenced using their   756         path within the module, not using a path within each class.   757    758         It may not be possible to identify a global within a function at the   759         time of inspection (since a global may appear later in a file).   760         Consequently, globals are identified by their local name rather than   761         their module-qualified path.   762         """   763    764         # For functions, use the appropriate local names.   765    766         if self.in_function:   767             return name   768    769         # For static namespaces, use the given qualified name.   770    771         elif ref and ref.static():   772             return ref.get_origin()   773    774         # For non-static objects in static namespaces, use any alias.   775    776         elif ref and ref.get_name():   777             return ref.get_name()   778    779         # Otherwise, establish a name in the current namespace.   780    781         else:   782             return self.get_object_path(name)   783    784     def get_path_for_access(self):   785    786         "Outside functions, register accesses at the module level."   787    788         if not self.in_function:   789             return self.name   790         else:   791             return self.get_namespace_path()   792    793     def get_module_name(self, node):   794    795         """   796         Using the given From 'node' in this module, calculate any relative import   797         information, returning a tuple containing a module to import along with any   798         names to import based on the node's name information.   799    800         Where the returned module is given as None, whole module imports should   801         be performed for the returned modules using the returned names.   802         """   803    804         # Absolute import.   805    806         if node.level == 0:   807             return node.modname, node.names   808    809         # Relative to an ancestor of this module.   810    811         else:   812             path = self.name.split(".")   813             level = node.level   814    815             # Relative imports treat package roots as submodules.   816    817             if split(self.filename)[-1] == "__init__.py":   818                 level -= 1   819    820             if level > len(path):   821                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   822                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   823    824             basename = ".".join(path[:len(path)-level])   825    826         # Name imports from a module.   827    828         if node.modname:   829             return "%s.%s" % (basename, node.modname), node.names   830    831         # Relative whole module imports.   832    833         else:   834             return basename, node.names   835    836 def get_argnames(args):   837    838     """   839     Return a list of all names provided by 'args'. Since tuples may be   840     employed, the arguments are traversed depth-first.   841     """   842    843     l = []   844     for arg in args:   845         if isinstance(arg, tuple):   846             l += get_argnames(arg)   847         else:   848             l.append(arg)   849     return l   850    851 def get_names_from_nodes(nodes):   852    853     """   854     Return the names employed in the given 'nodes' along with the number of   855     nodes excluding sequences.   856     """   857    858     names = set()   859     count = 0   860    861     for node in nodes:   862    863         # Add names and count them.   864    865         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   866             names.add(node.name)   867             count += 1   868    869         # Add names from sequences and incorporate their counts.   870    871         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   872                                compiler.ast.List, compiler.ast.Set,   873                                compiler.ast.Tuple)):   874             _names, _count = get_names_from_nodes(node.nodes)   875             names.update(_names)   876             count += _count   877    878         # Count non-name, non-sequence nodes.   879    880         else:   881             count += 1   882    883     return names, count   884    885 # Result classes.   886    887 class InstructionSequence:   888    889     "A generic sequence of instructions."   890    891     def __init__(self, instructions):   892         self.instructions = instructions   893    894     def get_value_instruction(self):   895         return self.instructions[-1]   896    897     def get_init_instructions(self):   898         return self.instructions[:-1]   899    900 # Dictionary utilities.   901    902 def init_item(d, key, fn):   903    904     """   905     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   906     value where no entry already exists.   907     """   908    909     if not d.has_key(key):   910         d[key] = fn()   911     return d[key]   912    913 def dict_for_keys(d, keys):   914    915     "Return a new dictionary containing entries from 'd' for the given 'keys'."   916    917     nd = {}   918     for key in keys:   919         if d.has_key(key):   920             nd[key] = d[key]   921     return nd   922    923 def make_key(s):   924    925     "Make sequence 's' into a tuple-based key, first sorting its contents."   926    927     l = list(s)   928     l.sort()   929     return tuple(l)   930    931 def add_counter_item(d, key):   932    933     """   934     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   935     maintaining a mapping of keys to their order of insertion.   936     """   937    938     if not d.has_key(key):   939         d[key] = len(d.keys())   940     return d[key]    941    942 def remove_items(d1, d2):   943    944     "Remove from 'd1' all items from 'd2'."   945    946     for key in d2.keys():   947         if d1.has_key(key):   948             del d1[key]   949    950 # Set utilities.   951    952 def first(s):   953     return list(s)[0]   954    955 def same(s1, s2):   956     return set(s1) == set(s2)   957    958 # General input/output.   959    960 def readfile(filename):   961    962     "Return the contents of 'filename'."   963    964     f = open(filename)   965     try:   966         return f.read()   967     finally:   968         f.close()   969    970 def writefile(filename, s):   971    972     "Write to 'filename' the string 's'."   973    974     f = open(filename, "w")   975     try:   976         f.write(s)   977     finally:   978         f.close()   979    980 # General encoding.   981    982 def sorted_output(x):   983    984     "Sort sequence 'x' and return a string with commas separating the values."   985    986     x = map(str, x)   987     x.sort()   988     return ", ".join(x)   989    990 def get_string_details(literals, encoding):   991    992     """   993     Determine whether 'literals' represent Unicode strings or byte strings,   994     using 'encoding' to reproduce byte sequences.   995    996     Each literal is the full program representation including prefix and quotes   997     recoded by the parser to UTF-8. Thus, any literal found to represent a byte   998     string needs to be translated back to its original encoding.   999   1000     Return a single encoded literal value, a type name, and the original  1001     encoding as a tuple.  1002     """  1003   1004     typename = "unicode"  1005   1006     l = []  1007   1008     for s in literals:  1009         out, _typename = get_literal_details(s)  1010         if _typename == "str":  1011             typename = "str"  1012         l.append(out)  1013   1014     out = "".join(l)  1015   1016     # For Unicode values, convert to the UTF-8 program representation.  1017   1018     if typename == "unicode":  1019         return out.encode("utf-8"), typename, encoding  1020   1021     # For byte string values, convert back to the original encoding.  1022   1023     else:  1024         return out.encode(encoding), typename, encoding  1025   1026 def get_literal_details(s):  1027   1028     """  1029     Determine whether 's' represents a Unicode string or a byte string, where  1030     's' contains the full program representation of a literal including prefix  1031     and quotes, recoded by the parser to UTF-8.  1032   1033     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1034     and byte or Unicode values starting with <backslash><octal digit> or  1035     <backslash>x.  1036   1037     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1038     to be considered as Unicode values. Otherwise, they produce byte values and  1039     cause unprefixed strings to be considered as byte strings.  1040   1041     Literals prefixed with "r" do not have their backslash-encoded values  1042     converted unless also prefixed with "u", in which case only the above value  1043     formats are converted, not any of the other special sequences for things  1044     like newlines.  1045   1046     Return the literal value as a Unicode object together with the appropriate  1047     type name in a tuple.  1048     """  1049   1050     l = []  1051   1052     # Identify the quote character and use it to identify the prefix.  1053   1054     quote_type = s[-1]  1055     prefix_end = s.find(quote_type)  1056     prefix = s[:prefix_end].lower()  1057   1058     if prefix not in ("", "b", "br", "r", "u", "ur"):  1059         raise ValueError, "String literal does not have a supported prefix: %s" % s  1060   1061     if "b" in prefix:  1062         typename = "str"  1063     else:  1064         typename = "unicode"  1065   1066     # Identify triple quotes or single quotes.  1067   1068     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1069         quote = s[prefix_end:prefix_end+3]  1070         current = prefix_end + 3  1071         end = len(s) - 3  1072     else:  1073         quote = s[prefix_end]  1074         current = prefix_end + 1  1075         end = len(s) - 1  1076   1077     # Conversions of some quoted values.  1078   1079     searches = {  1080         "u" : (6, 16),  1081         "U" : (10, 16),  1082         "x" : (4, 16),  1083         }  1084   1085     octal_digits = map(str, range(0, 8))  1086   1087     # Translations of some quoted values.  1088   1089     escaped = {  1090         "\\" : "\\", "'" : "'", '"' : '"',  1091         "a" : "\a", "b" : "\b", "f" : "\f",  1092         "n" : "\n", "r" : "\r", "t" : "\t",  1093         }  1094   1095     while current < end:  1096   1097         # Look for quoted values.  1098   1099         index = s.find("\\", current)  1100         if index == -1 or index + 1 == end:  1101             l.append(s[current:end])  1102             break  1103   1104         # Add the preceding text.  1105   1106         l.append(s[current:index])  1107   1108         # Handle quoted text.  1109   1110         term = s[index+1]  1111   1112         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1113         # produce Unicode values.  1114   1115         if typename == "unicode" and (  1116             term in ("u", "U") or   1117             "u" in prefix and (term == "x" or term in octal_digits)):  1118   1119             needed, base = searches.get(term, (4, 8))  1120             value = convert_quoted_value(s, index, needed, end, base, unichr)  1121             l.append(value)  1122             current = index + needed  1123   1124         # Add raw byte values, changing the string type.  1125   1126         elif "r" not in prefix and (  1127              term == "x" or term in octal_digits):  1128   1129             needed, base = searches.get(term, (4, 8))  1130             value = convert_quoted_value(s, index, needed, end, base, chr)  1131             l.append(value)  1132             typename = "str"  1133             current = index + needed  1134   1135         # Add other escaped values.  1136   1137         elif "r" not in prefix and escaped.has_key(term):  1138             l.append(escaped[term])  1139             current = index + 2  1140   1141         # Add other text as found.  1142   1143         else:  1144             l.append(s[index:index+2])  1145             current = index + 2  1146   1147     # Collect the components into a single Unicode object. Since the literal  1148     # text was already in UTF-8 form, interpret plain strings as UTF-8  1149     # sequences.  1150   1151     out = []  1152   1153     for value in l:  1154         if isinstance(value, unicode):  1155             out.append(value)  1156         else:  1157             out.append(unicode(value, "utf-8"))  1158   1159     return "".join(out), typename  1160   1161 def convert_quoted_value(s, index, needed, end, base, fn):  1162   1163     """  1164     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1165     positions, and with the given 'end' indicating the first position after the  1166     end of the actual string content.  1167   1168     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1169     to convert the value to an appropriate type.  1170     """  1171   1172     s = s[index:min(index+needed, end)]  1173   1174     # Not a complete occurrence.  1175   1176     if len(s) < needed:  1177         return s  1178   1179     # Test for a well-formed value.  1180   1181     try:  1182         first = base == 8 and 1 or 2  1183         value = int(s[first:needed], base)  1184     except ValueError:  1185         return s  1186     else:  1187         return fn(value)  1188   1189 # Attribute chain decoding.  1190   1191 def get_attrnames(attrnames):  1192   1193     """  1194     Split the qualified attribute chain 'attrnames' into its components,  1195     handling special attributes starting with "#" that indicate type  1196     conformance.  1197     """  1198   1199     if attrnames.startswith("#"):  1200         return [attrnames]  1201     else:  1202         return attrnames.split(".")  1203   1204 def get_attrname_from_location(location):  1205   1206     """  1207     Extract the first attribute from the attribute names employed in a  1208     'location'.  1209     """  1210   1211     path, name, attrnames, access = location  1212     if not attrnames:  1213         return attrnames  1214     return get_attrnames(attrnames)[0]  1215   1216 def get_name_path(path, name):  1217   1218     "Return a suitable qualified name from the given 'path' and 'name'."  1219   1220     if "." in name:  1221         return name  1222     else:  1223         return "%s.%s" % (path, name)  1224   1225 # Usage-related functions.  1226   1227 def get_types_for_usage(attrnames, objects):  1228   1229     """  1230     Identify the types that can support the given 'attrnames', using the  1231     given 'objects' as the catalogue of type details.  1232     """  1233   1234     types = []  1235     for name, _attrnames in objects.items():  1236         if set(attrnames).issubset(_attrnames):  1237             types.append(name)  1238     return types  1239   1240 def get_invoked_attributes(usage):  1241   1242     "Obtain invoked attribute from the given 'usage'."  1243   1244     invoked = []  1245     if usage:  1246         for attrname, invocation, assignment in usage:  1247             if invocation:  1248                 invoked.append(attrname)  1249     return invoked  1250   1251 def get_assigned_attributes(usage):  1252   1253     "Obtain assigned attribute from the given 'usage'."  1254   1255     assigned = []  1256     if usage:  1257         for attrname, invocation, assignment in usage:  1258             if assignment:  1259                 assigned.append(attrname)  1260     return assigned  1261   1262 # Type and module functions.  1263 # NOTE: This makes assumptions about the __builtins__ structure.  1264   1265 def get_builtin_module(name):  1266   1267     "Return the module name containing the given type 'name'."  1268   1269     if name == "string":  1270         modname = "str"  1271     elif name == "utf8string":  1272         modname = "unicode"  1273     elif name == "NoneType":  1274         modname = "none"  1275     else:  1276         modname = name  1277   1278     return "__builtins__.%s" % modname  1279   1280 def get_builtin_type(name):  1281   1282     "Return the type name provided by the given Python value 'name'."  1283   1284     if name == "str":  1285         return "string"  1286     elif name == "unicode":  1287         return "utf8string"  1288     else:  1289         return name  1290   1291 def get_builtin_class(name):  1292   1293     "Return the full name of the built-in class having the given 'name'."  1294   1295     typename = get_builtin_type(name)  1296     module = get_builtin_module(typename)  1297     return "%s.%s" % (module, typename)  1298   1299 # Useful data.  1300   1301 predefined_constants = "False", "None", "NotImplemented", "True"  1302   1303 operator_functions = {  1304   1305     # Fundamental operations.  1306   1307     "is" : "is_",  1308     "is not" : "is_not",  1309   1310     # Binary operations.  1311   1312     "in" : "in_",  1313     "not in" : "not_in",  1314     "Add" : "add",  1315     "Bitand" : "and_",  1316     "Bitor" : "or_",  1317     "Bitxor" : "xor",  1318     "Div" : "div",  1319     "FloorDiv" : "floordiv",  1320     "LeftShift" : "lshift",  1321     "Mod" : "mod",  1322     "Mul" : "mul",  1323     "Power" : "pow",  1324     "RightShift" : "rshift",  1325     "Sub" : "sub",  1326   1327     # Unary operations.  1328   1329     "Invert" : "invert",  1330     "UnaryAdd" : "pos",  1331     "UnarySub" : "neg",  1332   1333     # Augmented assignment.  1334   1335     "+=" : "iadd",  1336     "-=" : "isub",  1337     "*=" : "imul",  1338     "/=" : "idiv",  1339     "//=" : "ifloordiv",  1340     "%=" : "imod",  1341     "**=" : "ipow",  1342     "<<=" : "ilshift",  1343     ">>=" : "irshift",  1344     "&=" : "iand",  1345     "^=" : "ixor",  1346     "|=" : "ior",  1347   1348     # Comparisons.  1349   1350     "==" : "eq",  1351     "!=" : "ne",  1352     "<" : "lt",  1353     "<=" : "le",  1354     ">=" : "ge",  1355     ">" : "gt",  1356     }  1357   1358 # vim: tabstop=4 expandtab shiftwidth=4