Lichen (file common.py at 7c5a7efcaf14)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         if recorded_details != details:    45             self.remove_output()    46     47         writefile(self.get_output_details_filename(), details)    48     49     def get_output_details_filename(self):    50     51         "Return the output details filename."    52     53         return join(self.output, "$details")    54     55     def get_output_details(self):    56     57         "Return details of the existing output."    58     59         details_filename = self.get_output_details_filename()    60     61         if not exists(details_filename):    62             return None    63         else:    64             return readfile(details_filename)    65     66     def remove_output(self, dirname=None):    67     68         "Remove the output."    69     70         dirname = dirname or self.output    71     72         for filename in listdir(dirname):    73             path = join(dirname, filename)    74             if isdir(path):    75                 self.remove_output(path)    76             else:    77                 remove(path)    78     79 class CommonModule:    80     81     "A common module representation."    82     83     def __init__(self, name, importer):    84     85         """    86         Initialise this module with the given 'name' and an 'importer' which is    87         used to provide access to other modules when required.    88         """    89     90         self.name = name    91         self.importer = importer    92         self.filename = None    93     94         # Inspection-related attributes.    95     96         self.astnode = None    97         self.encoding = None    98         self.iterators = {}    99         self.temp = {}   100         self.lambdas = {}   101    102         # Constants, literals and values.   103    104         self.constants = {}   105         self.constant_values = {}   106         self.literals = {}   107         self.literal_types = {}   108    109         # Nested namespaces.   110    111         self.namespace_path = []   112         self.in_function = False   113    114         # Retain the assignment value expression and track invocations.   115    116         self.in_assignment = None   117         self.in_invocation = None   118    119         # Attribute chain state management.   120    121         self.attrs = []   122         self.chain_assignment = []   123         self.chain_invocation = []   124    125     def __repr__(self):   126         return "CommonModule(%r, %r)" % (self.name, self.importer)   127    128     def parse_file(self, filename):   129    130         "Parse the file with the given 'filename', initialising attributes."   131    132         self.filename = filename   133    134         # Use the Transformer directly to obtain encoding information.   135    136         t = Transformer()   137         f = open(filename)   138    139         try:   140             self.astnode = t.parsesuite(f.read() + "\n")   141             self.encoding = t.encoding   142         finally:   143             f.close()   144    145     # Module-relative naming.   146    147     def get_global_path(self, name):   148         return "%s.%s" % (self.name, name)   149    150     def get_namespace_path(self):   151         return ".".join([self.name] + self.namespace_path)   152    153     def get_object_path(self, name):   154         return ".".join([self.name] + self.namespace_path + [name])   155    156     def get_parent_path(self):   157         return ".".join([self.name] + self.namespace_path[:-1])   158    159     # Namespace management.   160    161     def enter_namespace(self, name):   162    163         "Enter the namespace having the given 'name'."   164    165         self.namespace_path.append(name)   166    167     def exit_namespace(self):   168    169         "Exit the current namespace."   170    171         self.namespace_path.pop()   172    173     # Constant reference naming.   174    175     def get_constant_name(self, value, value_type, encoding=None):   176    177         """   178         Add a new constant to the current namespace for 'value' with   179         'value_type'.   180         """   181    182         path = self.get_namespace_path()   183         init_item(self.constants, path, dict)   184         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   185    186     # Literal reference naming.   187    188     def get_literal_name(self):   189    190         "Add a new literal to the current namespace."   191    192         path = self.get_namespace_path()   193         init_item(self.literals, path, lambda: 0)   194         return "$C%d" % self.literals[path]   195    196     def next_literal(self):   197         self.literals[self.get_namespace_path()] += 1   198    199     # Temporary iterator naming.   200    201     def get_iterator_path(self):   202         return self.in_function and self.get_namespace_path() or self.name   203    204     def get_iterator_name(self):   205         path = self.get_iterator_path()   206         init_item(self.iterators, path, lambda: 0)   207         return "$i%d" % self.iterators[path]   208    209     def next_iterator(self):   210         self.iterators[self.get_iterator_path()] += 1   211    212     # Temporary variable naming.   213    214     def get_temporary_name(self):   215         path = self.get_namespace_path()   216         init_item(self.temp, path, lambda: 0)   217         return "$t%d" % self.temp[path]   218    219     def next_temporary(self):   220         self.temp[self.get_namespace_path()] += 1   221    222     # Arbitrary function naming.   223    224     def get_lambda_name(self):   225         path = self.get_namespace_path()   226         init_item(self.lambdas, path, lambda: 0)   227         name = "$l%d" % self.lambdas[path]   228         self.lambdas[path] += 1   229         return name   230    231     def reset_lambdas(self):   232         self.lambdas = {}   233    234     # Constant and literal recording.   235    236     def get_constant_value(self, value, literals=None):   237    238         """   239         Encode the 'value' if appropriate, returning a value, a typename and any   240         encoding.   241         """   242    243         if isinstance(value, unicode):   244             return value.encode("utf-8"), "unicode", self.encoding   245    246         # Attempt to convert plain strings to text.   247    248         elif isinstance(value, str) and self.encoding:   249             try:   250                 return get_string_details(literals, self.encoding)   251             except UnicodeDecodeError:   252                 pass   253    254         return value, value.__class__.__name__, None   255    256     def get_constant_reference(self, ref, value, encoding=None):   257    258         """   259         Return a constant reference for the given 'ref' type and 'value', with   260         the optional 'encoding' applying to text values.   261         """   262    263         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   264    265         # Return a reference for the constant.   266    267         objpath = self.get_object_path(constant_name)   268         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   269    270         # Record the value and type for the constant.   271    272         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   273         return name_ref   274    275     def reserve_constant(self, objpath, value, origin, encoding=None):   276    277         """   278         Reserve a constant within 'objpath' with the given 'value' and having a   279         type with the given 'origin', with the optional 'encoding' applying to   280         text values.   281         """   282    283         constant_name = self.get_constant_name(value, origin)   284         objpath = self.get_object_path(constant_name)   285         self._reserve_constant(objpath, value, origin, encoding)   286    287     def _reserve_constant(self, objpath, value, origin, encoding):   288    289         """   290         Store a constant for 'objpath' with the given 'value' and 'origin', with   291         the optional 'encoding' applying to text values.   292         """   293    294         self.constant_values[objpath] = value, origin, encoding   295    296     def get_literal_reference(self, name, ref, items, cls):   297    298         """   299         Return a literal reference for the given type 'name', literal 'ref',   300         node 'items' and employing the given 'cls' as the class of the returned   301         reference object.   302         """   303    304         # Construct an invocation using the items as arguments.   305    306         typename = "$L%s" % name   307    308         invocation = compiler.ast.CallFunc(   309             compiler.ast.Name(typename),   310             items   311             )   312    313         # Get a name for the actual literal.   314    315         instname = self.get_literal_name()   316         self.next_literal()   317    318         # Record the type for the literal.   319    320         objpath = self.get_object_path(instname)   321         self.literal_types[objpath] = ref.get_origin()   322    323         # Return a wrapper for the invocation exposing the items.   324    325         return cls(   326             instname,   327             ref.instance_of(),   328             self.process_structure_node(invocation),   329             invocation.args   330             )   331    332     # Node handling.   333    334     def process_structure(self, node):   335    336         """   337         Within the given 'node', process the program structure.   338    339         During inspection, this will process global declarations, adjusting the   340         module namespace, and import statements, building a module dependency   341         hierarchy.   342    343         During translation, this will consult deduced program information and   344         output translated code.   345         """   346    347         l = []   348         for n in node.getChildNodes():   349             l.append(self.process_structure_node(n))   350         return l   351    352     def process_augassign_node(self, n):   353    354         "Process the given augmented assignment node 'n'."   355    356         op = operator_functions[n.op]   357    358         if isinstance(n.node, compiler.ast.Getattr):   359             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   360         elif isinstance(n.node, compiler.ast.Name):   361             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   362         else:   363             target = n.node   364    365         assignment = compiler.ast.Assign(   366             [target],   367             compiler.ast.CallFunc(   368                 compiler.ast.Name("$op%s" % op),   369                 [n.node, n.expr]))   370    371         return self.process_structure_node(assignment)   372    373     def process_assignment_for_object(self, original_name, source):   374    375         """   376         Return an assignment operation making 'original_name' refer to the given   377         'source'.   378         """   379    380         assignment = compiler.ast.Assign(   381             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   382             source   383             )   384    385         return self.process_structure_node(assignment)   386    387     def process_assignment_node_items(self, n, expr):   388    389         """   390         Process the given assignment node 'n' whose children are to be assigned   391         items of 'expr'.   392         """   393    394         name_ref = self.process_structure_node(expr)   395    396         # Either unpack the items and present them directly to each assignment   397         # node.   398    399         if isinstance(name_ref, LiteralSequenceRef) and \   400            self.process_literal_sequence_items(n, name_ref):   401    402             pass   403    404         # Or have the assignment nodes access each item via the sequence API.   405    406         else:   407             self.process_assignment_node_items_by_position(n, expr, name_ref)   408    409     def process_assignment_node_items_by_position(self, n, expr, name_ref):   410    411         """   412         Process the given sequence assignment node 'n', converting the node to   413         the separate assignment of each target using positional access on a   414         temporary variable representing the sequence. Use 'expr' as the assigned   415         value and 'name_ref' as the reference providing any existing temporary   416         variable.   417         """   418    419         assignments = []   420    421         # Employ existing names to access the sequence.   422         # Literal sequences do not provide names of accessible objects.   423    424         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   425             temp = name_ref.name   426    427         # For other expressions, create a temporary name to reference the items.   428    429         else:   430             temp = self.get_temporary_name()   431             self.next_temporary()   432    433             assignments.append(   434                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   435                 )   436    437         # Assign the items to the target nodes.   438    439         for i, node in enumerate(n.nodes):   440             assignments.append(   441                 compiler.ast.Assign([node], compiler.ast.Subscript(   442                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   443                 )   444    445         return self.process_structure_node(compiler.ast.Stmt(assignments))   446    447     def process_literal_sequence_items(self, n, name_ref):   448    449         """   450         Process the given assignment node 'n', obtaining from the given   451         'name_ref' the items to be assigned to the assignment targets.   452    453         Return whether this method was able to process the assignment node as   454         a sequence of direct assignments.   455         """   456    457         if len(n.nodes) == len(name_ref.items):   458             assigned_names, count = get_names_from_nodes(n.nodes)   459             accessed_names, _count = get_names_from_nodes(name_ref.items)   460    461             # Only assign directly between items if all assigned names are   462             # plain names (not attribute assignments), and if the assigned names   463             # do not appear in the accessed names.   464    465             if len(assigned_names) == count and \   466                not assigned_names.intersection(accessed_names):   467    468                 for node, item in zip(n.nodes, name_ref.items):   469                     self.process_assignment_node(node, item)   470    471                 return True   472    473             # Otherwise, use the position-based mechanism to obtain values.   474    475             else:   476                 return False   477         else:   478             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   479                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   480    481     def process_compare_node(self, n):   482    483         """   484         Process the given comparison node 'n', converting an operator sequence   485         from...   486    487         <expr1> <op1> <expr2> <op2> <expr3>   488    489         ...to...   490    491         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   492         """   493    494         invocations = []   495         last = n.expr   496    497         for op, op_node in n.ops:   498             op = operator_functions.get(op)   499    500             invocations.append(compiler.ast.CallFunc(   501                 compiler.ast.Name("$op%s" % op),   502                 [last, op_node]))   503    504             last = op_node   505    506         if len(invocations) > 1:   507             result = compiler.ast.And(invocations)   508         else:   509             result = invocations[0]   510    511         return self.process_structure_node(result)   512    513     def process_dict_node(self, node):   514    515         """   516         Process the given dictionary 'node', returning a list of (key, value)   517         tuples.   518         """   519    520         l = []   521         for key, value in node.items:   522             l.append((   523                 self.process_structure_node(key),   524                 self.process_structure_node(value)))   525         return l   526    527     def process_for_node(self, n):   528    529         """   530         Generate attribute accesses for {n.list}.__iter__ and the next method on   531         the iterator, producing a replacement node for the original.   532         """   533    534         node = compiler.ast.Stmt([   535    536             # <next> = {n.list}.__iter__().next   537    538             compiler.ast.Assign(   539                 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")],   540                 compiler.ast.Getattr(   541                     compiler.ast.CallFunc(   542                         compiler.ast.Getattr(n.list, "__iter__"),   543                         []   544                         ), "next")),   545    546             # try:   547             #     while True:   548             #         <var>... = <next>()   549             #         ...   550             # except StopIteration:   551             #     pass   552    553             compiler.ast.TryExcept(   554                 compiler.ast.While(   555                     compiler.ast.Name("True"),   556                     compiler.ast.Stmt([   557                         compiler.ast.Assign(   558                             [n.assign],   559                             compiler.ast.CallFunc(   560                                 compiler.ast.Name(self.get_iterator_name()),   561                                 []   562                                 )),   563                         n.body]),   564                     None),   565                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   566                 None)   567             ])   568    569         self.next_iterator()   570         self.process_structure_node(node)   571    572     def process_literal_sequence_node(self, n, name, ref, cls):   573    574         """   575         Process the given literal sequence node 'n' as a function invocation,   576         with 'name' indicating the type of the sequence, and 'ref' being a   577         reference to the type. The 'cls' is used to instantiate a suitable name   578         reference.   579         """   580    581         if name == "dict":   582             items = []   583             for key, value in n.items:   584                 items.append(compiler.ast.Tuple([key, value]))   585         else: # name in ("list", "tuple"):   586             items = n.nodes   587    588         return self.get_literal_reference(name, ref, items, cls)   589    590     def process_operator_node(self, n):   591    592         """   593         Process the given operator node 'n' as an operator function invocation.   594         """   595    596         op = operator_functions[n.__class__.__name__]   597         invocation = compiler.ast.CallFunc(   598             compiler.ast.Name("$op%s" % op),   599             list(n.getChildNodes())   600             )   601         return self.process_structure_node(invocation)   602    603     def process_print_node(self, n):   604    605         """   606         Process the given print node 'n' as an invocation on a stream of the   607         form...   608    609         $print(dest, args, nl)   610    611         The special function name will be translated elsewhere.   612         """   613    614         nl = isinstance(n, compiler.ast.Printnl)   615         invocation = compiler.ast.CallFunc(   616             compiler.ast.Name("$print"),   617             [n.dest or compiler.ast.Name("None"),   618              compiler.ast.List(list(n.nodes)),   619              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   620             )   621         return self.process_structure_node(invocation)   622    623     def process_slice_node(self, n, expr=None):   624    625         """   626         Process the given slice node 'n' as an operator function invocation.   627         """   628    629         if n.flags == "OP_ASSIGN": op = "setslice"   630         elif n.flags == "OP_DELETE": op = "delslice"   631         else: op = "getslice"   632    633         invocation = compiler.ast.CallFunc(   634             compiler.ast.Name("$op%s" % op),   635             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   636                 (expr and [expr] or [])   637             )   638    639         # Fix parse tree structure.   640    641         if op == "delslice":   642             invocation = compiler.ast.Discard(invocation)   643    644         return self.process_structure_node(invocation)   645    646     def process_sliceobj_node(self, n):   647    648         """   649         Process the given slice object node 'n' as a slice constructor.   650         """   651    652         op = "slice"   653         invocation = compiler.ast.CallFunc(   654             compiler.ast.Name("$op%s" % op),   655             n.nodes   656             )   657         return self.process_structure_node(invocation)   658    659     def process_subscript_node(self, n, expr=None):   660    661         """   662         Process the given subscript node 'n' as an operator function invocation.   663         """   664    665         if n.flags == "OP_ASSIGN": op = "setitem"   666         elif n.flags == "OP_DELETE": op = "delitem"   667         else: op = "getitem"   668    669         invocation = compiler.ast.CallFunc(   670             compiler.ast.Name("$op%s" % op),   671             [n.expr] + list(n.subs) + (expr and [expr] or [])   672             )   673    674         # Fix parse tree structure.   675    676         if op == "delitem":   677             invocation = compiler.ast.Discard(invocation)   678    679         return self.process_structure_node(invocation)   680    681     def process_attribute_chain(self, n):   682    683         """   684         Process the given attribute access node 'n'. Return a reference   685         describing the expression.   686         """   687    688         # AssAttr/Getattr are nested with the outermost access being the last   689         # access in any chain.   690    691         self.attrs.insert(0, n.attrname)   692         attrs = self.attrs   693    694         # Break attribute chains where non-access nodes are found.   695    696         if not self.have_access_expression(n):   697             self.reset_attribute_chain()   698    699         # Descend into the expression, extending backwards any existing chain,   700         # or building another for the expression.   701    702         name_ref = self.process_structure_node(n.expr)   703    704         # Restore chain information applying to this node.   705    706         if not self.have_access_expression(n):   707             self.restore_attribute_chain(attrs)   708    709         # Return immediately if the expression was another access and thus a   710         # continuation backwards along the chain. The above processing will   711         # have followed the chain all the way to its conclusion.   712    713         if self.have_access_expression(n):   714             del self.attrs[0]   715    716         return name_ref   717    718     # Attribute chain handling.   719    720     def reset_attribute_chain(self):   721    722         "Reset the attribute chain for a subexpression of an attribute access."   723    724         self.attrs = []   725         self.chain_assignment.append(self.in_assignment)   726         self.chain_invocation.append(self.in_invocation)   727         self.in_assignment = None   728         self.in_invocation = None   729    730     def restore_attribute_chain(self, attrs):   731    732         "Restore the attribute chain for an attribute access."   733    734         self.attrs = attrs   735         self.in_assignment = self.chain_assignment.pop()   736         self.in_invocation = self.chain_invocation.pop()   737    738     def have_access_expression(self, node):   739    740         "Return whether the expression associated with 'node' is Getattr."   741    742         return isinstance(node.expr, compiler.ast.Getattr)   743    744     def get_name_for_tracking(self, name, path=None):   745    746         """   747         Return the name to be used for attribute usage observations involving   748         the given 'name' in the current namespace. If 'path' is indicated and   749         the name is being used outside a function, return the path value;   750         otherwise, return a path computed using the current namespace and the   751         given name.   752    753         The intention of this method is to provide a suitably-qualified name   754         that can be tracked across namespaces. Where globals are being   755         referenced in class namespaces, they should be referenced using their   756         path within the module, not using a path within each class.   757    758         It may not be possible to identify a global within a function at the   759         time of inspection (since a global may appear later in a file).   760         Consequently, globals are identified by their local name rather than   761         their module-qualified path.   762         """   763    764         # For functions, use the appropriate local names.   765    766         if self.in_function:   767             return name   768    769         # For static namespaces, use the given qualified name.   770    771         elif path:   772             return path   773    774         # Otherwise, establish a name in the current namespace.   775    776         else:   777             return self.get_object_path(name)   778    779     def get_path_for_access(self):   780    781         "Outside functions, register accesses at the module level."   782    783         if not self.in_function:   784             return self.name   785         else:   786             return self.get_namespace_path()   787    788     def get_module_name(self, node):   789    790         """   791         Using the given From 'node' in this module, calculate any relative import   792         information, returning a tuple containing a module to import along with any   793         names to import based on the node's name information.   794    795         Where the returned module is given as None, whole module imports should   796         be performed for the returned modules using the returned names.   797         """   798    799         # Absolute import.   800    801         if node.level == 0:   802             return node.modname, node.names   803    804         # Relative to an ancestor of this module.   805    806         else:   807             path = self.name.split(".")   808             level = node.level   809    810             # Relative imports treat package roots as submodules.   811    812             if split(self.filename)[-1] == "__init__.py":   813                 level -= 1   814    815             if level > len(path):   816                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   817                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   818    819             basename = ".".join(path[:len(path)-level])   820    821         # Name imports from a module.   822    823         if node.modname:   824             return "%s.%s" % (basename, node.modname), node.names   825    826         # Relative whole module imports.   827    828         else:   829             return basename, node.names   830    831 def get_argnames(args):   832    833     """   834     Return a list of all names provided by 'args'. Since tuples may be   835     employed, the arguments are traversed depth-first.   836     """   837    838     l = []   839     for arg in args:   840         if isinstance(arg, tuple):   841             l += get_argnames(arg)   842         else:   843             l.append(arg)   844     return l   845    846 def get_names_from_nodes(nodes):   847    848     """   849     Return the names employed in the given 'nodes' along with the number of   850     nodes excluding sequences.   851     """   852    853     names = set()   854     count = 0   855    856     for node in nodes:   857    858         # Add names and count them.   859    860         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   861             names.add(node.name)   862             count += 1   863    864         # Add names from sequences and incorporate their counts.   865    866         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   867                                compiler.ast.List, compiler.ast.Set,   868                                compiler.ast.Tuple)):   869             _names, _count = get_names_from_nodes(node.nodes)   870             names.update(_names)   871             count += _count   872    873         # Count non-name, non-sequence nodes.   874    875         else:   876             count += 1   877    878     return names, count   879    880 # Result classes.   881    882 class InstructionSequence:   883    884     "A generic sequence of instructions."   885    886     def __init__(self, instructions):   887         self.instructions = instructions   888    889     def get_value_instruction(self):   890         return self.instructions[-1]   891    892     def get_init_instructions(self):   893         return self.instructions[:-1]   894    895 # Dictionary utilities.   896    897 def init_item(d, key, fn):   898    899     """   900     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   901     value where no entry already exists.   902     """   903    904     if not d.has_key(key):   905         d[key] = fn()   906     return d[key]   907    908 def dict_for_keys(d, keys):   909    910     "Return a new dictionary containing entries from 'd' for the given 'keys'."   911    912     nd = {}   913     for key in keys:   914         if d.has_key(key):   915             nd[key] = d[key]   916     return nd   917    918 def make_key(s):   919    920     "Make sequence 's' into a tuple-based key, first sorting its contents."   921    922     l = list(s)   923     l.sort()   924     return tuple(l)   925    926 def add_counter_item(d, key):   927    928     """   929     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   930     maintaining a mapping of keys to their order of insertion.   931     """   932    933     if not d.has_key(key):   934         d[key] = len(d.keys())   935     return d[key]    936    937 def remove_items(d1, d2):   938    939     "Remove from 'd1' all items from 'd2'."   940    941     for key in d2.keys():   942         if d1.has_key(key):   943             del d1[key]   944    945 # Set utilities.   946    947 def first(s):   948     return list(s)[0]   949    950 def same(s1, s2):   951     return set(s1) == set(s2)   952    953 # General input/output.   954    955 def readfile(filename):   956    957     "Return the contents of 'filename'."   958    959     f = open(filename)   960     try:   961         return f.read()   962     finally:   963         f.close()   964    965 def writefile(filename, s):   966    967     "Write to 'filename' the string 's'."   968    969     f = open(filename, "w")   970     try:   971         f.write(s)   972     finally:   973         f.close()   974    975 # General encoding.   976    977 def sorted_output(x):   978    979     "Sort sequence 'x' and return a string with commas separating the values."   980    981     x = map(str, x)   982     x.sort()   983     return ", ".join(x)   984    985 def get_string_details(literals, encoding):   986    987     """   988     Determine whether 'literals' represent Unicode strings or byte strings,   989     using 'encoding' to reproduce byte sequences.   990    991     Each literal is the full program representation including prefix and quotes   992     recoded by the parser to UTF-8. Thus, any literal found to represent a byte   993     string needs to be translated back to its original encoding.   994    995     Return a single encoded literal value, a type name, and the original   996     encoding as a tuple.   997     """   998    999     typename = "unicode"  1000   1001     l = []  1002   1003     for s in literals:  1004         out, _typename = get_literal_details(s)  1005         if _typename == "str":  1006             typename = "str"  1007         l.append(out)  1008   1009     out = "".join(l)  1010   1011     # For Unicode values, convert to the UTF-8 program representation.  1012   1013     if typename == "unicode":  1014         return out.encode("utf-8"), typename, encoding  1015   1016     # For byte string values, convert back to the original encoding.  1017   1018     else:  1019         return out.encode(encoding), typename, encoding  1020   1021 def get_literal_details(s):  1022   1023     """  1024     Determine whether 's' represents a Unicode string or a byte string, where  1025     's' contains the full program representation of a literal including prefix  1026     and quotes, recoded by the parser to UTF-8.  1027   1028     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1029     and byte or Unicode values starting with <backslash><octal digit> or  1030     <backslash>x.  1031   1032     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1033     to be considered as Unicode values. Otherwise, they produce byte values and  1034     cause unprefixed strings to be considered as byte strings.  1035   1036     Literals prefixed with "r" do not have their backslash-encoded values  1037     converted unless also prefixed with "u", in which case only the above value  1038     formats are converted, not any of the other special sequences for things  1039     like newlines.  1040   1041     Return the literal value as a Unicode object together with the appropriate  1042     type name in a tuple.  1043     """  1044   1045     l = []  1046   1047     # Identify the quote character and use it to identify the prefix.  1048   1049     quote_type = s[-1]  1050     prefix_end = s.find(quote_type)  1051     prefix = s[:prefix_end].lower()  1052   1053     if prefix not in ("", "b", "br", "r", "u", "ur"):  1054         raise ValueError, "String literal does not have a supported prefix: %s" % s  1055   1056     if "b" in prefix:  1057         typename = "str"  1058     else:  1059         typename = "unicode"  1060   1061     # Identify triple quotes or single quotes.  1062   1063     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1064         quote = s[prefix_end:prefix_end+3]  1065         current = prefix_end + 3  1066         end = len(s) - 3  1067     else:  1068         quote = s[prefix_end]  1069         current = prefix_end + 1  1070         end = len(s) - 1  1071   1072     # Conversions of some quoted values.  1073   1074     searches = {  1075         "u" : (6, 16),  1076         "U" : (10, 16),  1077         "x" : (4, 16),  1078         }  1079   1080     octal_digits = map(str, range(0, 8))  1081   1082     # Translations of some quoted values.  1083   1084     escaped = {  1085         "\\" : "\\", "'" : "'", '"' : '"',  1086         "a" : "\a", "b" : "\b", "f" : "\f",  1087         "n" : "\n", "r" : "\r", "t" : "\t",  1088         }  1089   1090     while current < end:  1091   1092         # Look for quoted values.  1093   1094         index = s.find("\\", current)  1095         if index == -1 or index + 1 == end:  1096             l.append(s[current:end])  1097             break  1098   1099         # Add the preceding text.  1100   1101         l.append(s[current:index])  1102   1103         # Handle quoted text.  1104   1105         term = s[index+1]  1106   1107         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1108         # produce Unicode values.  1109   1110         if typename == "unicode" and (  1111             term in ("u", "U") or   1112             "u" in prefix and (term == "x" or term in octal_digits)):  1113   1114             needed, base = searches.get(term, (4, 8))  1115             value = convert_quoted_value(s, index, needed, end, base, unichr)  1116             l.append(value)  1117             current = index + needed  1118   1119         # Add raw byte values, changing the string type.  1120   1121         elif "r" not in prefix and (  1122              term == "x" or term in octal_digits):  1123   1124             needed, base = searches.get(term, (4, 8))  1125             value = convert_quoted_value(s, index, needed, end, base, chr)  1126             l.append(value)  1127             typename = "str"  1128             current = index + needed  1129   1130         # Add other escaped values.  1131   1132         elif "r" not in prefix and escaped.has_key(term):  1133             l.append(escaped[term])  1134             current = index + 2  1135   1136         # Add other text as found.  1137   1138         else:  1139             l.append(s[index:index+2])  1140             current = index + 2  1141   1142     # Collect the components into a single Unicode object. Since the literal  1143     # text was already in UTF-8 form, interpret plain strings as UTF-8  1144     # sequences.  1145   1146     out = []  1147   1148     for value in l:  1149         if isinstance(value, unicode):  1150             out.append(value)  1151         else:  1152             out.append(unicode(value, "utf-8"))  1153   1154     return "".join(out), typename  1155   1156 def convert_quoted_value(s, index, needed, end, base, fn):  1157   1158     """  1159     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1160     positions, and with the given 'end' indicating the first position after the  1161     end of the actual string content.  1162   1163     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1164     to convert the value to an appropriate type.  1165     """  1166   1167     s = s[index:min(index+needed, end)]  1168   1169     # Not a complete occurrence.  1170   1171     if len(s) < needed:  1172         return s  1173   1174     # Test for a well-formed value.  1175   1176     try:  1177         first = base == 8 and 1 or 2  1178         value = int(s[first:needed], base)  1179     except ValueError:  1180         return s  1181     else:  1182         return fn(value)  1183   1184 # Attribute chain decoding.  1185   1186 def get_attrnames(attrnames):  1187   1188     """  1189     Split the qualified attribute chain 'attrnames' into its components,  1190     handling special attributes starting with "#" that indicate type  1191     conformance.  1192     """  1193   1194     if attrnames.startswith("#"):  1195         return [attrnames]  1196     else:  1197         return attrnames.split(".")  1198   1199 def get_attrname_from_location(location):  1200   1201     """  1202     Extract the first attribute from the attribute names employed in a  1203     'location'.  1204     """  1205   1206     path, name, attrnames, access = location  1207     if not attrnames:  1208         return attrnames  1209     return get_attrnames(attrnames)[0]  1210   1211 def get_name_path(path, name):  1212   1213     "Return a suitable qualified name from the given 'path' and 'name'."  1214   1215     if "." in name:  1216         return name  1217     else:  1218         return "%s.%s" % (path, name)  1219   1220 # Usage-related functions.  1221   1222 def get_types_for_usage(attrnames, objects):  1223   1224     """  1225     Identify the types that can support the given 'attrnames', using the  1226     given 'objects' as the catalogue of type details.  1227     """  1228   1229     types = []  1230     for name, _attrnames in objects.items():  1231         if set(attrnames).issubset(_attrnames):  1232             types.append(name)  1233     return types  1234   1235 def get_invoked_attributes(usage):  1236   1237     "Obtain invoked attribute from the given 'usage'."  1238   1239     invoked = []  1240     if usage:  1241         for attrname, invocation, assignment in usage:  1242             if invocation:  1243                 invoked.append(attrname)  1244     return invoked  1245   1246 def get_assigned_attributes(usage):  1247   1248     "Obtain assigned attribute from the given 'usage'."  1249   1250     assigned = []  1251     if usage:  1252         for attrname, invocation, assignment in usage:  1253             if assignment:  1254                 assigned.append(attrname)  1255     return assigned  1256   1257 # Type and module functions.  1258 # NOTE: This makes assumptions about the __builtins__ structure.  1259   1260 def get_builtin_module(name):  1261   1262     "Return the module name containing the given type 'name'."  1263   1264     if name == "string":  1265         modname = "str"  1266     elif name == "utf8string":  1267         modname = "unicode"  1268     elif name == "NoneType":  1269         modname = "none"  1270     else:  1271         modname = name  1272   1273     return "__builtins__.%s" % modname  1274   1275 def get_builtin_type(name):  1276   1277     "Return the type name provided by the given Python value 'name'."  1278   1279     if name == "str":  1280         return "string"  1281     elif name == "unicode":  1282         return "utf8string"  1283     else:  1284         return name  1285   1286 def get_builtin_class(name):  1287   1288     "Return the full name of the built-in class having the given 'name'."  1289   1290     typename = get_builtin_type(name)  1291     module = get_builtin_module(typename)  1292     return "%s.%s" % (module, typename)  1293   1294 # Useful data.  1295   1296 predefined_constants = "False", "None", "NotImplemented", "True"  1297   1298 operator_functions = {  1299   1300     # Fundamental operations.  1301   1302     "is" : "is_",  1303     "is not" : "is_not",  1304   1305     # Binary operations.  1306   1307     "in" : "in_",  1308     "not in" : "not_in",  1309     "Add" : "add",  1310     "Bitand" : "and_",  1311     "Bitor" : "or_",  1312     "Bitxor" : "xor",  1313     "Div" : "div",  1314     "FloorDiv" : "floordiv",  1315     "LeftShift" : "lshift",  1316     "Mod" : "mod",  1317     "Mul" : "mul",  1318     "Power" : "pow",  1319     "RightShift" : "rshift",  1320     "Sub" : "sub",  1321   1322     # Unary operations.  1323   1324     "Invert" : "invert",  1325     "UnaryAdd" : "pos",  1326     "UnarySub" : "neg",  1327   1328     # Augmented assignment.  1329   1330     "+=" : "iadd",  1331     "-=" : "isub",  1332     "*=" : "imul",  1333     "/=" : "idiv",  1334     "//=" : "ifloordiv",  1335     "%=" : "imod",  1336     "**=" : "ipow",  1337     "<<=" : "ilshift",  1338     ">>=" : "irshift",  1339     "&=" : "iand",  1340     "^=" : "ixor",  1341     "|=" : "ior",  1342   1343     # Comparisons.  1344   1345     "==" : "eq",  1346     "!=" : "ne",  1347     "<" : "lt",  1348     "<=" : "le",  1349     ">=" : "ge",  1350     ">" : "gt",  1351     }  1352   1353 # vim: tabstop=4 expandtab shiftwidth=4