Lichen (file common.py at 00e902870a29)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     def get_parent_path(self):   190         return ".".join([self.name] + self.namespace_path[:-1])   191    192     # Namespace management.   193    194     def enter_namespace(self, name):   195    196         "Enter the namespace having the given 'name'."   197    198         self.namespace_path.append(name)   199    200     def exit_namespace(self):   201    202         "Exit the current namespace."   203    204         self.namespace_path.pop()   205    206     # Constant reference naming.   207    208     def get_constant_name(self, value, value_type, encoding=None):   209    210         """   211         Add a new constant to the current namespace for 'value' with   212         'value_type'.   213         """   214    215         path = self.get_namespace_path()   216         init_item(self.constants, path, dict)   217         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   218    219     # Literal reference naming.   220    221     def get_literal_name(self):   222    223         "Add a new literal to the current namespace."   224    225         path = self.get_namespace_path()   226         init_item(self.literals, path, lambda: 0)   227         return "$C%d" % self.literals[path]   228    229     def next_literal(self):   230         self.literals[self.get_namespace_path()] += 1   231    232     # Temporary variable naming.   233    234     def get_temporary_name(self):   235         path = self.get_namespace_path()   236         init_item(self.temp, path, lambda: 0)   237         return "$t%d" % self.temp[path]   238    239     def next_temporary(self):   240         self.temp[self.get_namespace_path()] += 1   241    242     # Arbitrary function naming.   243    244     def get_lambda_name(self):   245         path = self.get_namespace_path()   246         init_item(self.lambdas, path, lambda: 0)   247         name = "$l%d" % self.lambdas[path]   248         self.lambdas[path] += 1   249         return name   250    251     def reset_lambdas(self):   252         self.lambdas = {}   253    254     # Constant and literal recording.   255    256     def get_constant_value(self, value, literals=None):   257    258         """   259         Encode the 'value' if appropriate, returning a value, a typename and any   260         encoding.   261         """   262    263         if isinstance(value, unicode):   264             return value.encode("utf-8"), "unicode", self.encoding   265    266         # Attempt to convert plain strings to text.   267    268         elif isinstance(value, str) and self.encoding:   269             try:   270                 return get_string_details(literals, self.encoding)   271             except UnicodeDecodeError:   272                 pass   273    274         return value, value.__class__.__name__, None   275    276     def get_constant_reference(self, ref, value, encoding=None):   277    278         """   279         Return a constant reference for the given 'ref' type and 'value', with   280         the optional 'encoding' applying to text values.   281         """   282    283         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   284    285         # Return a reference for the constant.   286    287         objpath = self.get_object_path(constant_name)   288         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   289    290         # Record the value and type for the constant.   291    292         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   293         return name_ref   294    295     def reserve_constant(self, objpath, value, origin, encoding=None):   296    297         """   298         Reserve a constant within 'objpath' with the given 'value' and having a   299         type with the given 'origin', with the optional 'encoding' applying to   300         text values.   301         """   302    303         constant_name = self.get_constant_name(value, origin)   304         objpath = self.get_object_path(constant_name)   305         self._reserve_constant(objpath, value, origin, encoding)   306    307     def _reserve_constant(self, objpath, value, origin, encoding):   308    309         """   310         Store a constant for 'objpath' with the given 'value' and 'origin', with   311         the optional 'encoding' applying to text values.   312         """   313    314         self.constant_values[objpath] = value, origin, encoding   315    316     def get_literal_reference(self, name, ref, items, cls):   317    318         """   319         Return a literal reference for the given type 'name', literal 'ref',   320         node 'items' and employing the given 'cls' as the class of the returned   321         reference object.   322         """   323    324         # Construct an invocation using the items as arguments.   325    326         typename = "$L%s" % name   327    328         invocation = compiler.ast.CallFunc(   329             compiler.ast.Name(typename),   330             items   331             )   332    333         # Get a name for the actual literal.   334    335         instname = self.get_literal_name()   336         self.next_literal()   337    338         # Record the type for the literal.   339    340         objpath = self.get_object_path(instname)   341         self.literal_types[objpath] = ref.get_origin()   342    343         # Return a wrapper for the invocation exposing the items.   344    345         return cls(   346             instname,   347             ref.instance_of(),   348             self.process_structure_node(invocation),   349             invocation.args   350             )   351    352     # Node handling.   353    354     def process_structure(self, node):   355    356         """   357         Within the given 'node', process the program structure.   358    359         During inspection, this will process global declarations, adjusting the   360         module namespace, and import statements, building a module dependency   361         hierarchy.   362    363         During translation, this will consult deduced program information and   364         output translated code.   365         """   366    367         l = []   368         for n in node.getChildNodes():   369             l.append(self.process_structure_node(n))   370         return l   371    372     def process_augassign_node(self, n):   373    374         "Process the given augmented assignment node 'n'."   375    376         op = operator_functions[n.op]   377    378         if isinstance(n.node, compiler.ast.Getattr):   379             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   380         elif isinstance(n.node, compiler.ast.Name):   381             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   382         else:   383             target = n.node   384    385         assignment = compiler.ast.Assign(   386             [target],   387             compiler.ast.CallFunc(   388                 compiler.ast.Name("$op%s" % op),   389                 [n.node, n.expr]))   390    391         return self.process_structure_node(assignment)   392    393     def process_assignment_for_object(self, original_name, source):   394    395         """   396         Return an assignment operation making 'original_name' refer to the given   397         'source'.   398         """   399    400         assignment = compiler.ast.Assign(   401             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   402             source   403             )   404    405         return self.process_structure_node(assignment)   406    407     def process_assignment_node_items(self, n, expr):   408    409         """   410         Process the given assignment node 'n' whose children are to be assigned   411         items of 'expr'.   412         """   413    414         name_ref = self.process_structure_node(expr)   415    416         # Either unpack the items and present them directly to each assignment   417         # node.   418    419         if isinstance(name_ref, LiteralSequenceRef) and \   420            self.process_literal_sequence_items(n, name_ref):   421    422             pass   423    424         # Or have the assignment nodes access each item via the sequence API.   425    426         else:   427             self.process_assignment_node_items_by_position(n, expr, name_ref)   428    429     def process_assignment_node_items_by_position(self, n, expr, name_ref):   430    431         """   432         Process the given sequence assignment node 'n', converting the node to   433         the separate assignment of each target using positional access on a   434         temporary variable representing the sequence. Use 'expr' as the assigned   435         value and 'name_ref' as the reference providing any existing temporary   436         variable.   437         """   438    439         assignments = []   440    441         # Employ existing names to access the sequence.   442         # Literal sequences do not provide names of accessible objects.   443    444         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   445             temp = name_ref.name   446    447         # For other expressions, create a temporary name to reference the items.   448    449         else:   450             temp = self.get_temporary_name()   451             self.next_temporary()   452    453             assignments.append(   454                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   455                 )   456    457         # Assign the items to the target nodes.   458    459         for i, node in enumerate(n.nodes):   460             assignments.append(   461                 compiler.ast.Assign([node], compiler.ast.Subscript(   462                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   463                 )   464    465         return self.process_structure_node(compiler.ast.Stmt(assignments))   466    467     def process_literal_sequence_items(self, n, name_ref):   468    469         """   470         Process the given assignment node 'n', obtaining from the given   471         'name_ref' the items to be assigned to the assignment targets.   472    473         Return whether this method was able to process the assignment node as   474         a sequence of direct assignments.   475         """   476    477         if len(n.nodes) == len(name_ref.items):   478             assigned_names, count = get_names_from_nodes(n.nodes)   479             accessed_names, _count = get_names_from_nodes(name_ref.items)   480    481             # Only assign directly between items if all assigned names are   482             # plain names (not attribute assignments), and if the assigned names   483             # do not appear in the accessed names.   484    485             if len(assigned_names) == count and \   486                not assigned_names.intersection(accessed_names):   487    488                 for node, item in zip(n.nodes, name_ref.items):   489                     self.process_assignment_node(node, item)   490    491                 return True   492    493             # Otherwise, use the position-based mechanism to obtain values.   494    495             else:   496                 return False   497         else:   498             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   499                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   500    501     def process_compare_node(self, n):   502    503         """   504         Process the given comparison node 'n', converting an operator sequence   505         from...   506    507         <expr1> <op1> <expr2> <op2> <expr3>   508    509         ...to...   510    511         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   512         """   513    514         invocations = []   515         last = n.expr   516    517         for op, op_node in n.ops:   518             op = operator_functions.get(op)   519    520             invocations.append(compiler.ast.CallFunc(   521                 compiler.ast.Name("$op%s" % op),   522                 [last, op_node]))   523    524             last = op_node   525    526         if len(invocations) > 1:   527             result = compiler.ast.And(invocations)   528         else:   529             result = invocations[0]   530    531         return self.process_structure_node(result)   532    533     def process_dict_node(self, node):   534    535         """   536         Process the given dictionary 'node', returning a list of (key, value)   537         tuples.   538         """   539    540         l = []   541         for key, value in node.items:   542             l.append((   543                 self.process_structure_node(key),   544                 self.process_structure_node(value)))   545         return l   546    547     def process_for_node(self, n):   548    549         """   550         Generate attribute accesses for {n.list}.__iter__ and the next method on   551         the iterator, producing a replacement node for the original.   552         """   553    554         t0 = self.get_temporary_name()   555         self.next_temporary()   556         t1 = self.get_temporary_name()   557         self.next_temporary()   558         i0 = self.get_temporary_name()   559         self.next_temporary()   560    561         node = compiler.ast.Stmt([   562    563             # <t0> = {n.list}   564             # <t1> = <t0>.__iter__()   565    566             compiler.ast.Assign(   567                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   568                 n.list),   569    570             compiler.ast.Assign(   571                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   572                 compiler.ast.CallFunc(   573                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   574                     [])),   575    576             # try:   577             #     while True:   578             #         <var>... = <t1>.next()   579             #         ...   580             # except StopIteration:   581             #     pass   582    583             compiler.ast.TryExcept(   584                 compiler.ast.While(   585                     compiler.ast.Name("True"),   586                     compiler.ast.Stmt([   587                         compiler.ast.Assign(   588                             [n.assign],   589                             compiler.ast.CallFunc(   590                                 compiler.ast.Getattr(compiler.ast.Name(t1), "next"),   591                                 []   592                                 )),   593                         n.body]),   594                     None),   595                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   596                 None)   597             ])   598    599         self.process_structure_node(node)   600    601     def process_literal_sequence_node(self, n, name, ref, cls):   602    603         """   604         Process the given literal sequence node 'n' as a function invocation,   605         with 'name' indicating the type of the sequence, and 'ref' being a   606         reference to the type. The 'cls' is used to instantiate a suitable name   607         reference.   608         """   609    610         if name == "dict":   611             items = []   612             for key, value in n.items:   613                 items.append(compiler.ast.Tuple([key, value]))   614         else: # name in ("list", "tuple"):   615             items = n.nodes   616    617         return self.get_literal_reference(name, ref, items, cls)   618    619     def process_operator_node(self, n):   620    621         """   622         Process the given operator node 'n' as an operator function invocation.   623         """   624    625         op = operator_functions[n.__class__.__name__]   626         invocation = compiler.ast.CallFunc(   627             compiler.ast.Name("$op%s" % op),   628             list(n.getChildNodes())   629             )   630         return self.process_structure_node(invocation)   631    632     def process_print_node(self, n):   633    634         """   635         Process the given print node 'n' as an invocation on a stream of the   636         form...   637    638         $print(dest, args, nl)   639    640         The special function name will be translated elsewhere.   641         """   642    643         nl = isinstance(n, compiler.ast.Printnl)   644         invocation = compiler.ast.CallFunc(   645             compiler.ast.Name("$print"),   646             [n.dest or compiler.ast.Name("None"),   647              compiler.ast.List(list(n.nodes)),   648              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   649             )   650         return self.process_structure_node(invocation)   651    652     def process_slice_node(self, n, expr=None):   653    654         """   655         Process the given slice node 'n' as an operator function invocation.   656         """   657    658         if n.flags == "OP_ASSIGN": op = "setslice"   659         elif n.flags == "OP_DELETE": op = "delslice"   660         else: op = "getslice"   661    662         invocation = compiler.ast.CallFunc(   663             compiler.ast.Name("$op%s" % op),   664             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   665                 (expr and [expr] or [])   666             )   667    668         # Fix parse tree structure.   669    670         if op == "delslice":   671             invocation = compiler.ast.Discard(invocation)   672    673         return self.process_structure_node(invocation)   674    675     def process_sliceobj_node(self, n):   676    677         """   678         Process the given slice object node 'n' as a slice constructor.   679         """   680    681         op = "slice"   682         invocation = compiler.ast.CallFunc(   683             compiler.ast.Name("$op%s" % op),   684             n.nodes   685             )   686         return self.process_structure_node(invocation)   687    688     def process_subscript_node(self, n, expr=None):   689    690         """   691         Process the given subscript node 'n' as an operator function invocation.   692         """   693    694         if n.flags == "OP_ASSIGN": op = "setitem"   695         elif n.flags == "OP_DELETE": op = "delitem"   696         else: op = "getitem"   697    698         invocation = compiler.ast.CallFunc(   699             compiler.ast.Name("$op%s" % op),   700             [n.expr] + list(n.subs) + (expr and [expr] or [])   701             )   702    703         # Fix parse tree structure.   704    705         if op == "delitem":   706             invocation = compiler.ast.Discard(invocation)   707    708         return self.process_structure_node(invocation)   709    710     def process_attribute_chain(self, n):   711    712         """   713         Process the given attribute access node 'n'. Return a reference   714         describing the expression.   715         """   716    717         # AssAttr/Getattr are nested with the outermost access being the last   718         # access in any chain.   719    720         self.attrs.insert(0, n.attrname)   721         attrs = self.attrs   722    723         # Break attribute chains where non-access nodes are found.   724    725         if not self.have_access_expression(n):   726             self.reset_attribute_chain()   727    728         # Descend into the expression, extending backwards any existing chain,   729         # or building another for the expression.   730    731         name_ref = self.process_structure_node(n.expr)   732    733         # Restore chain information applying to this node.   734    735         if not self.have_access_expression(n):   736             self.restore_attribute_chain(attrs)   737    738         # Return immediately if the expression was another access and thus a   739         # continuation backwards along the chain. The above processing will   740         # have followed the chain all the way to its conclusion.   741    742         if self.have_access_expression(n):   743             del self.attrs[0]   744    745         return name_ref   746    747     # Attribute chain handling.   748    749     def reset_attribute_chain(self):   750    751         "Reset the attribute chain for a subexpression of an attribute access."   752    753         self.attrs = []   754         self.chain_assignment.append(self.in_assignment)   755         self.chain_invocation.append(self.in_invocation)   756         self.in_assignment = None   757         self.in_invocation = None   758    759     def restore_attribute_chain(self, attrs):   760    761         "Restore the attribute chain for an attribute access."   762    763         self.attrs = attrs   764         self.in_assignment = self.chain_assignment.pop()   765         self.in_invocation = self.chain_invocation.pop()   766    767     def have_access_expression(self, node):   768    769         "Return whether the expression associated with 'node' is Getattr."   770    771         return isinstance(node.expr, compiler.ast.Getattr)   772    773     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   774    775         """   776         Return the name to be used for attribute usage observations involving   777         the given 'name' in the current namespace.   778    779         If the name is being used outside a function, and if 'name_ref' is   780         given and indicates a global or if 'is_global' is specified as a true   781         value, a path featuring the name in the global namespace is returned.   782         Otherwise, a path computed using the current namespace and the given   783         name is returned.   784    785         The intention of this method is to provide a suitably-qualified name   786         that can be tracked across namespaces. Where globals are being   787         referenced in class namespaces, they should be referenced using their   788         path within the module, not using a path within each class.   789    790         It may not be possible to identify a global within a function at the   791         time of inspection (since a global may appear later in a file).   792         Consequently, globals are identified by their local name rather than   793         their module-qualified path.   794         """   795    796         # For functions, use the appropriate local names.   797    798         if self.in_function:   799             return name   800    801         # For global names outside functions, use a global name.   802    803         elif is_global or name_ref and name_ref.is_global_name():   804             return self.get_global_path(name)   805    806         # Otherwise, establish a name in the current namespace.   807    808         else:   809             return self.get_object_path(name)   810    811     def get_path_for_access(self):   812    813         "Outside functions, register accesses at the module level."   814    815         if not self.in_function:   816             return self.name   817         else:   818             return self.get_namespace_path()   819    820     def get_module_name(self, node):   821    822         """   823         Using the given From 'node' in this module, calculate any relative import   824         information, returning a tuple containing a module to import along with any   825         names to import based on the node's name information.   826    827         Where the returned module is given as None, whole module imports should   828         be performed for the returned modules using the returned names.   829         """   830    831         # Absolute import.   832    833         if node.level == 0:   834             return node.modname, node.names   835    836         # Relative to an ancestor of this module.   837    838         else:   839             path = self.name.split(".")   840             level = node.level   841    842             # Relative imports treat package roots as submodules.   843    844             if split(self.filename)[-1] == "__init__.py":   845                 level -= 1   846    847             if level > len(path):   848                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   849                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   850    851             basename = ".".join(path[:len(path)-level])   852    853         # Name imports from a module.   854    855         if node.modname:   856             return "%s.%s" % (basename, node.modname), node.names   857    858         # Relative whole module imports.   859    860         else:   861             return basename, node.names   862    863 def get_argnames(args):   864    865     """   866     Return a list of all names provided by 'args'. Since tuples may be   867     employed, the arguments are traversed depth-first.   868     """   869    870     l = []   871     for arg in args:   872         if isinstance(arg, tuple):   873             l += get_argnames(arg)   874         else:   875             l.append(arg)   876     return l   877    878 def get_names_from_nodes(nodes):   879    880     """   881     Return the names employed in the given 'nodes' along with the number of   882     nodes excluding sequences.   883     """   884    885     names = set()   886     count = 0   887    888     for node in nodes:   889    890         # Add names and count them.   891    892         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   893             names.add(node.name)   894             count += 1   895    896         # Add names from sequences and incorporate their counts.   897    898         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   899                                compiler.ast.List, compiler.ast.Set,   900                                compiler.ast.Tuple)):   901             _names, _count = get_names_from_nodes(node.nodes)   902             names.update(_names)   903             count += _count   904    905         # Count non-name, non-sequence nodes.   906    907         else:   908             count += 1   909    910     return names, count   911    912 # Result classes.   913    914 class InstructionSequence:   915    916     "A generic sequence of instructions."   917    918     def __init__(self, instructions):   919         self.instructions = instructions   920    921     def get_value_instruction(self):   922         return self.instructions[-1]   923    924     def get_init_instructions(self):   925         return self.instructions[:-1]   926    927 # Dictionary utilities.   928    929 def init_item(d, key, fn):   930    931     """   932     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   933     value where no entry already exists.   934     """   935    936     if not d.has_key(key):   937         d[key] = fn()   938     return d[key]   939    940 def dict_for_keys(d, keys):   941    942     "Return a new dictionary containing entries from 'd' for the given 'keys'."   943    944     nd = {}   945     for key in keys:   946         if d.has_key(key):   947             nd[key] = d[key]   948     return nd   949    950 def make_key(s):   951    952     "Make sequence 's' into a tuple-based key, first sorting its contents."   953    954     l = list(s)   955     l.sort()   956     return tuple(l)   957    958 def add_counter_item(d, key):   959    960     """   961     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   962     maintaining a mapping of keys to their order of insertion.   963     """   964    965     if not d.has_key(key):   966         d[key] = len(d.keys())   967     return d[key]    968    969 def remove_items(d1, d2):   970    971     "Remove from 'd1' all items from 'd2'."   972    973     for key in d2.keys():   974         if d1.has_key(key):   975             del d1[key]   976    977 # Set utilities.   978    979 def first(s):   980     return list(s)[0]   981    982 def same(s1, s2):   983     return set(s1) == set(s2)   984    985 def order_dependencies(all_depends):   986    987     """   988     Produce a dependency ordering for the 'all_depends' mapping. This mapping   989     has the form "A depends on B, C...". The result will order A, B, C, and so   990     on.   991     """   992    993     usage = init_reverse_dependencies(all_depends)   994    995     # Produce an ordering by obtaining exposed items (required by items already   996     # processed) and putting them at the start of the list.   997    998     ordered = []   999   1000     while usage:  1001         have_next = False  1002   1003         for key, n in usage.items():  1004   1005             # Add items needed by no other items to the ordering.  1006   1007             if not n:  1008                 remove_dependency(key, all_depends, usage, ordered)  1009                 have_next = True  1010   1011         if not have_next:  1012             raise ValueError, usage  1013   1014     return ordered  1015   1016 def order_dependencies_partial(all_depends):  1017   1018     """  1019     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1020     has the form "A depends on B, C...". The result will order A, B, C, and so  1021     on. Where cycles exist, they will be broken and a partial ordering returned.  1022     """  1023   1024     usage = init_reverse_dependencies(all_depends)  1025   1026     # Duplicate the dependencies for subsequent modification.  1027   1028     new_depends = {}  1029     for key, values in all_depends.items():  1030         new_depends[key] = set(values)  1031   1032     all_depends = new_depends  1033   1034     # Produce an ordering by obtaining exposed items (required by items already  1035     # processed) and putting them at the start of the list.  1036   1037     ordered = []  1038   1039     while usage:  1040         least = None  1041         least_key = None  1042   1043         for key, n in usage.items():  1044   1045             # Add items needed by no other items to the ordering.  1046   1047             if not n:  1048                 remove_dependency(key, all_depends, usage, ordered)  1049                 least = 0  1050   1051             # When breaking cycles, note the least used items.  1052   1053             elif least is None or len(n) < least:  1054                 least_key = key  1055                 least = len(n)  1056   1057         if least:  1058             transfer_dependencies(least_key, all_depends, usage, ordered)  1059   1060     return ordered  1061   1062 def init_reverse_dependencies(all_depends):  1063   1064     """  1065     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1066     record the reverse dependencies, making a mapping of the form  1067     "B is needed by A", "C is needed by A", and so on.  1068     """  1069   1070     usage = {}  1071   1072     # Record path-based dependencies.  1073   1074     for key in all_depends.keys():  1075         usage[key] = set()  1076   1077     for key, depends in all_depends.items():  1078         for depend in depends:  1079             init_item(usage, depend, set)  1080             usage[depend].add(key)  1081   1082     return usage  1083   1084 def transfer_dependencies(key, all_depends, usage, ordered):  1085   1086     """  1087     Transfer items needed by 'key' to those items needing 'key', found using  1088     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1089     collection of dependencies.  1090   1091     If "A is needed by X" and "B is needed by A", then transferring items needed  1092     by A will cause "B is needed by X" to be recorded as a consequence.  1093   1094     Transferring items also needs to occur in the reverse mapping, so that  1095     "A needs B" and "X needs A", then the consequence must be recorded as  1096     "X needs B".  1097     """  1098   1099     ordered.insert(0, key)  1100   1101     needing = usage[key]                        # A is needed by X  1102     needed = all_depends.get(key)               # A needs B  1103   1104     if needing:  1105         for depend in needing:  1106             l = all_depends.get(depend)  1107             if not l:  1108                 continue  1109   1110             l.remove(key)                       # X needs (A)  1111   1112             if needed:  1113                 l.update(needed)                # X needs B...  1114   1115                 # Prevent self references.  1116   1117                 if depend in needed:  1118                     l.remove(depend)  1119   1120     if needed:  1121         for depend in needed:  1122             l = usage.get(depend)  1123             if not l:  1124                 continue  1125   1126             l.remove(key)                       # B is needed by (A)  1127             l.update(needing)                   # B is needed by X...  1128   1129             # Prevent self references.  1130   1131             if depend in needing:  1132                 l.remove(depend)  1133   1134     if needed:  1135         del all_depends[key]  1136     del usage[key]  1137   1138 def remove_dependency(key, all_depends, usage, ordered):  1139   1140     """  1141     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1142     'ordered' collection of dependencies.  1143   1144     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1145     upon removing A from 'usage', the outcome is that all keys needing A will  1146     have A removed from their 'usage' records.  1147   1148     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1149     as a consequence.  1150     """  1151   1152     ordered.insert(0, key)  1153   1154     depends = all_depends.get(key)  1155   1156     # Reduce usage of the referenced items.  1157   1158     if depends:  1159         for depend in depends:  1160             usage[depend].remove(key)  1161   1162     del usage[key]  1163   1164 # General input/output.  1165   1166 def readfile(filename):  1167   1168     "Return the contents of 'filename'."  1169   1170     f = open(filename)  1171     try:  1172         return f.read()  1173     finally:  1174         f.close()  1175   1176 def writefile(filename, s):  1177   1178     "Write to 'filename' the string 's'."  1179   1180     f = open(filename, "w")  1181     try:  1182         f.write(s)  1183     finally:  1184         f.close()  1185   1186 # General encoding.  1187   1188 def sorted_output(x):  1189   1190     "Sort sequence 'x' and return a string with commas separating the values."  1191   1192     x = map(str, x)  1193     x.sort()  1194     return ", ".join(x)  1195   1196 def get_string_details(literals, encoding):  1197   1198     """  1199     Determine whether 'literals' represent Unicode strings or byte strings,  1200     using 'encoding' to reproduce byte sequences.  1201   1202     Each literal is the full program representation including prefix and quotes  1203     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1204     string needs to be translated back to its original encoding.  1205   1206     Return a single encoded literal value, a type name, and the original  1207     encoding as a tuple.  1208     """  1209   1210     typename = "unicode"  1211   1212     l = []  1213   1214     for s in literals:  1215         out, _typename = get_literal_details(s)  1216         if _typename == "str":  1217             typename = "str"  1218         l.append(out)  1219   1220     out = "".join(l)  1221   1222     # For Unicode values, convert to the UTF-8 program representation.  1223   1224     if typename == "unicode":  1225         return out.encode("utf-8"), typename, encoding  1226   1227     # For byte string values, convert back to the original encoding.  1228   1229     else:  1230         return out.encode(encoding), typename, encoding  1231   1232 def get_literal_details(s):  1233   1234     """  1235     Determine whether 's' represents a Unicode string or a byte string, where  1236     's' contains the full program representation of a literal including prefix  1237     and quotes, recoded by the parser to UTF-8.  1238   1239     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1240     and byte or Unicode values starting with <backslash><octal digit> or  1241     <backslash>x.  1242   1243     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1244     to be considered as Unicode values. Otherwise, they produce byte values and  1245     cause unprefixed strings to be considered as byte strings.  1246   1247     Literals prefixed with "r" do not have their backslash-encoded values  1248     converted unless also prefixed with "u", in which case only the above value  1249     formats are converted, not any of the other special sequences for things  1250     like newlines.  1251   1252     Return the literal value as a Unicode object together with the appropriate  1253     type name in a tuple.  1254     """  1255   1256     l = []  1257   1258     # Identify the quote character and use it to identify the prefix.  1259   1260     quote_type = s[-1]  1261     prefix_end = s.find(quote_type)  1262     prefix = s[:prefix_end].lower()  1263   1264     if prefix not in ("", "b", "br", "r", "u", "ur"):  1265         raise ValueError, "String literal does not have a supported prefix: %s" % s  1266   1267     if "b" in prefix:  1268         typename = "str"  1269     else:  1270         typename = "unicode"  1271   1272     # Identify triple quotes or single quotes.  1273   1274     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1275         quote = s[prefix_end:prefix_end+3]  1276         current = prefix_end + 3  1277         end = len(s) - 3  1278     else:  1279         quote = s[prefix_end]  1280         current = prefix_end + 1  1281         end = len(s) - 1  1282   1283     # Conversions of some quoted values.  1284   1285     searches = {  1286         "u" : (6, 16),  1287         "U" : (10, 16),  1288         "x" : (4, 16),  1289         }  1290   1291     octal_digits = map(str, range(0, 8))  1292   1293     # Translations of some quoted values.  1294   1295     escaped = {  1296         "\\" : "\\", "'" : "'", '"' : '"',  1297         "a" : "\a", "b" : "\b", "f" : "\f",  1298         "n" : "\n", "r" : "\r", "t" : "\t",  1299         }  1300   1301     while current < end:  1302   1303         # Look for quoted values.  1304   1305         index = s.find("\\", current)  1306         if index == -1 or index + 1 == end:  1307             l.append(s[current:end])  1308             break  1309   1310         # Add the preceding text.  1311   1312         l.append(s[current:index])  1313   1314         # Handle quoted text.  1315   1316         term = s[index+1]  1317   1318         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1319         # produce Unicode values.  1320   1321         if typename == "unicode" and (  1322             term in ("u", "U") or   1323             "u" in prefix and (term == "x" or term in octal_digits)):  1324   1325             needed, base = searches.get(term, (4, 8))  1326             value = convert_quoted_value(s, index, needed, end, base, unichr)  1327             l.append(value)  1328             current = index + needed  1329   1330         # Add raw byte values, changing the string type.  1331   1332         elif "r" not in prefix and (  1333              term == "x" or term in octal_digits):  1334   1335             needed, base = searches.get(term, (4, 8))  1336             value = convert_quoted_value(s, index, needed, end, base, chr)  1337             l.append(value)  1338             typename = "str"  1339             current = index + needed  1340   1341         # Add other escaped values.  1342   1343         elif "r" not in prefix and escaped.has_key(term):  1344             l.append(escaped[term])  1345             current = index + 2  1346   1347         # Add other text as found.  1348   1349         else:  1350             l.append(s[index:index+2])  1351             current = index + 2  1352   1353     # Collect the components into a single Unicode object. Since the literal  1354     # text was already in UTF-8 form, interpret plain strings as UTF-8  1355     # sequences.  1356   1357     out = []  1358   1359     for value in l:  1360         if isinstance(value, unicode):  1361             out.append(value)  1362         else:  1363             out.append(unicode(value, "utf-8"))  1364   1365     return "".join(out), typename  1366   1367 def convert_quoted_value(s, index, needed, end, base, fn):  1368   1369     """  1370     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1371     positions, and with the given 'end' indicating the first position after the  1372     end of the actual string content.  1373   1374     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1375     to convert the value to an appropriate type.  1376     """  1377   1378     s = s[index:min(index+needed, end)]  1379   1380     # Not a complete occurrence.  1381   1382     if len(s) < needed:  1383         return s  1384   1385     # Test for a well-formed value.  1386   1387     try:  1388         first = base == 8 and 1 or 2  1389         value = int(s[first:needed], base)  1390     except ValueError:  1391         return s  1392     else:  1393         return fn(value)  1394   1395 # Attribute chain decoding.  1396   1397 def get_attrnames(attrnames):  1398   1399     """  1400     Split the qualified attribute chain 'attrnames' into its components,  1401     handling special attributes starting with "#" that indicate type  1402     conformance.  1403     """  1404   1405     if attrnames.startswith("#"):  1406         return [attrnames]  1407     else:  1408         return attrnames.split(".")  1409   1410 def get_attrname_from_location(location):  1411   1412     """  1413     Extract the first attribute from the attribute names employed in a  1414     'location'.  1415     """  1416   1417     path, name, attrnames, access = location  1418     if not attrnames:  1419         return attrnames  1420     return get_attrnames(attrnames)[0]  1421   1422 def get_name_path(path, name):  1423   1424     "Return a suitable qualified name from the given 'path' and 'name'."  1425   1426     if "." in name:  1427         return name  1428     else:  1429         return "%s.%s" % (path, name)  1430   1431 # Usage-related functions.  1432   1433 def get_types_for_usage(attrnames, objects):  1434   1435     """  1436     Identify the types that can support the given 'attrnames', using the  1437     given 'objects' as the catalogue of type details.  1438     """  1439   1440     types = []  1441     for name, _attrnames in objects.items():  1442         if set(attrnames).issubset(_attrnames):  1443             types.append(name)  1444     return types  1445   1446 def get_invoked_attributes(usage):  1447   1448     "Obtain invoked attribute from the given 'usage'."  1449   1450     invoked = []  1451     if usage:  1452         for attrname, invocation, assignment in usage:  1453             if invocation:  1454                 invoked.append(attrname)  1455     return invoked  1456   1457 def get_assigned_attributes(usage):  1458   1459     "Obtain assigned attribute from the given 'usage'."  1460   1461     assigned = []  1462     if usage:  1463         for attrname, invocation, assignment in usage:  1464             if assignment:  1465                 assigned.append(attrname)  1466     return assigned  1467   1468 # Type and module functions.  1469 # NOTE: This makes assumptions about the __builtins__ structure.  1470   1471 def get_builtin_module(name):  1472   1473     "Return the module name containing the given type 'name'."  1474   1475     if name == "string":  1476         modname = "str"  1477     elif name == "utf8string":  1478         modname = "unicode"  1479     elif name == "NoneType":  1480         modname = "none"  1481     else:  1482         modname = name  1483   1484     return "__builtins__.%s" % modname  1485   1486 def get_builtin_type(name):  1487   1488     "Return the type name provided by the given Python value 'name'."  1489   1490     if name == "str":  1491         return "string"  1492     elif name == "unicode":  1493         return "utf8string"  1494     else:  1495         return name  1496   1497 def get_builtin_class(name):  1498   1499     "Return the full name of the built-in class having the given 'name'."  1500   1501     typename = get_builtin_type(name)  1502     module = get_builtin_module(typename)  1503     return "%s.%s" % (module, typename)  1504   1505 # Useful data.  1506   1507 predefined_constants = "False", "None", "NotImplemented", "True"  1508   1509 operator_functions = {  1510   1511     # Fundamental operations.  1512   1513     "is" : "is_",  1514     "is not" : "is_not",  1515   1516     # Binary operations.  1517   1518     "in" : "in_",  1519     "not in" : "not_in",  1520     "Add" : "add",  1521     "Bitand" : "and_",  1522     "Bitor" : "or_",  1523     "Bitxor" : "xor",  1524     "Div" : "div",  1525     "FloorDiv" : "floordiv",  1526     "LeftShift" : "lshift",  1527     "Mod" : "mod",  1528     "Mul" : "mul",  1529     "Power" : "pow",  1530     "RightShift" : "rshift",  1531     "Sub" : "sub",  1532   1533     # Unary operations.  1534   1535     "Invert" : "invert",  1536     "UnaryAdd" : "pos",  1537     "UnarySub" : "neg",  1538   1539     # Augmented assignment.  1540   1541     "+=" : "iadd",  1542     "-=" : "isub",  1543     "*=" : "imul",  1544     "/=" : "idiv",  1545     "//=" : "ifloordiv",  1546     "%=" : "imod",  1547     "**=" : "ipow",  1548     "<<=" : "ilshift",  1549     ">>=" : "irshift",  1550     "&=" : "iand",  1551     "^=" : "ixor",  1552     "|=" : "ior",  1553   1554     # Comparisons.  1555   1556     "==" : "eq",  1557     "!=" : "ne",  1558     "<" : "lt",  1559     "<=" : "le",  1560     ">=" : "ge",  1561     ">" : "gt",  1562     }  1563   1564 # vim: tabstop=4 expandtab shiftwidth=4