Lichen

common.py

827:57652d9c6f24
2018-06-23 Paul Boddie Allow -R (--reset-all) to ignore any missing output directory to remove.
     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     # Namespace management.   190    191     def enter_namespace(self, name):   192    193         "Enter the namespace having the given 'name'."   194    195         self.namespace_path.append(name)   196    197     def exit_namespace(self):   198    199         "Exit the current namespace."   200    201         self.namespace_path.pop()   202    203     # Constant reference naming.   204    205     def get_constant_name(self, value, value_type, encoding=None):   206    207         """   208         Add a new constant to the current namespace for 'value' with   209         'value_type'.   210         """   211    212         path = self.get_namespace_path()   213         init_item(self.constants, path, dict)   214         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   215    216     # Literal reference naming.   217    218     def get_literal_name(self):   219    220         "Add a new literal to the current namespace."   221    222         path = self.get_namespace_path()   223         init_item(self.literals, path, lambda: 0)   224         return "$C%d" % self.literals[path]   225    226     def next_literal(self):   227         self.literals[self.get_namespace_path()] += 1   228    229     # Temporary variable naming.   230    231     def get_temporary_name(self):   232         path = self.get_namespace_path()   233         init_item(self.temp, path, lambda: 0)   234         return "$t%d" % self.temp[path]   235    236     def next_temporary(self):   237         self.temp[self.get_namespace_path()] += 1   238    239     # Arbitrary function naming.   240    241     def get_lambda_name(self):   242         path = self.get_namespace_path()   243         init_item(self.lambdas, path, lambda: 0)   244         name = "$l%d" % self.lambdas[path]   245         self.lambdas[path] += 1   246         return name   247    248     def reset_lambdas(self):   249         self.lambdas = {}   250    251     # Constant and literal recording.   252    253     def get_constant_value(self, value, literals=None):   254    255         """   256         Encode the 'value' if appropriate, returning a value, a typename and any   257         encoding.   258         """   259    260         if isinstance(value, unicode):   261             return value.encode("utf-8"), "unicode", self.encoding   262    263         # Attempt to convert plain strings to text.   264    265         elif isinstance(value, str) and self.encoding:   266             try:   267                 return get_string_details(literals, self.encoding)   268             except UnicodeDecodeError:   269                 pass   270    271         return value, value.__class__.__name__, None   272    273     def get_constant_reference(self, ref, value, encoding=None):   274    275         """   276         Return a constant reference for the given 'ref' type and 'value', with   277         the optional 'encoding' applying to text values.   278         """   279    280         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   281    282         # Return a reference for the constant.   283    284         objpath = self.get_object_path(constant_name)   285         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   286    287         # Record the value and type for the constant.   288    289         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   290         return name_ref   291    292     def reserve_constant(self, objpath, value, origin, encoding=None):   293    294         """   295         Reserve a constant within 'objpath' with the given 'value' and having a   296         type with the given 'origin', with the optional 'encoding' applying to   297         text values.   298         """   299    300         constant_name = self.get_constant_name(value, origin)   301         objpath = self.get_object_path(constant_name)   302         self._reserve_constant(objpath, value, origin, encoding)   303    304     def _reserve_constant(self, objpath, value, origin, encoding):   305    306         """   307         Store a constant for 'objpath' with the given 'value' and 'origin', with   308         the optional 'encoding' applying to text values.   309         """   310    311         self.constant_values[objpath] = value, origin, encoding   312    313     def get_literal_reference(self, name, ref, items, cls):   314    315         """   316         Return a literal reference for the given type 'name', literal 'ref',   317         node 'items' and employing the given 'cls' as the class of the returned   318         reference object.   319         """   320    321         # Construct an invocation using the items as arguments.   322    323         typename = "$L%s" % name   324    325         invocation = compiler.ast.CallFunc(   326             compiler.ast.Name(typename),   327             items   328             )   329    330         # Get a name for the actual literal.   331    332         instname = self.get_literal_name()   333         self.next_literal()   334    335         # Record the type for the literal.   336    337         objpath = self.get_object_path(instname)   338         self.literal_types[objpath] = ref.get_origin()   339    340         # Return a wrapper for the invocation exposing the items.   341    342         return cls(   343             instname,   344             ref.instance_of(),   345             self.process_structure_node(invocation),   346             invocation.args   347             )   348    349     # Node handling.   350    351     def process_structure(self, node):   352    353         """   354         Within the given 'node', process the program structure.   355    356         During inspection, this will process global declarations, adjusting the   357         module namespace, and import statements, building a module dependency   358         hierarchy.   359    360         During translation, this will consult deduced program information and   361         output translated code.   362         """   363    364         l = []   365         for n in node.getChildNodes():   366             l.append(self.process_structure_node(n))   367         return l   368    369     def process_augassign_node(self, n):   370    371         "Process the given augmented assignment node 'n'."   372    373         op = operator_functions[n.op]   374    375         if isinstance(n.node, compiler.ast.Getattr):   376             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   377         elif isinstance(n.node, compiler.ast.Name):   378             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   379         else:   380             target = n.node   381    382         assignment = compiler.ast.Assign(   383             [target],   384             compiler.ast.CallFunc(   385                 compiler.ast.Name("$op%s" % op),   386                 [n.node, n.expr]))   387    388         return self.process_structure_node(assignment)   389    390     def process_assignment_for_object(self, original_name, source):   391    392         """   393         Return an assignment operation making 'original_name' refer to the given   394         'source'.   395         """   396    397         assignment = compiler.ast.Assign(   398             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   399             source   400             )   401    402         return self.process_structure_node(assignment)   403    404     def process_assignment_node_items(self, n, expr):   405    406         """   407         Process the given assignment node 'n' whose children are to be assigned   408         items of 'expr'.   409         """   410    411         name_ref = self.process_structure_node(expr)   412    413         # Either unpack the items and present them directly to each assignment   414         # node.   415    416         if isinstance(name_ref, LiteralSequenceRef) and \   417            self.process_literal_sequence_items(n, name_ref):   418    419             pass   420    421         # Or have the assignment nodes access each item via the sequence API.   422    423         else:   424             self.process_assignment_node_items_by_position(n, expr, name_ref)   425    426     def process_assignment_node_items_by_position(self, n, expr, name_ref):   427    428         """   429         Process the given sequence assignment node 'n', converting the node to   430         the separate assignment of each target using positional access on a   431         temporary variable representing the sequence. Use 'expr' as the assigned   432         value and 'name_ref' as the reference providing any existing temporary   433         variable.   434         """   435    436         assignments = []   437    438         # Employ existing names to access the sequence.   439         # Literal sequences do not provide names of accessible objects.   440    441         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   442             temp = name_ref.name   443    444         # For other expressions, create a temporary name to reference the items.   445    446         else:   447             temp = self.get_temporary_name()   448             self.next_temporary()   449    450             assignments.append(   451                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   452                 )   453    454         # Assign the items to the target nodes.   455    456         for i, node in enumerate(n.nodes):   457             assignments.append(   458                 compiler.ast.Assign([node], compiler.ast.Subscript(   459                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   460                 )   461    462         return self.process_structure_node(compiler.ast.Stmt(assignments))   463    464     def process_literal_sequence_items(self, n, name_ref):   465    466         """   467         Process the given assignment node 'n', obtaining from the given   468         'name_ref' the items to be assigned to the assignment targets.   469    470         Return whether this method was able to process the assignment node as   471         a sequence of direct assignments.   472         """   473    474         if len(n.nodes) == len(name_ref.items):   475             assigned_names, count = get_names_from_nodes(n.nodes)   476             accessed_names, _count = get_names_from_nodes(name_ref.items)   477    478             # Only assign directly between items if all assigned names are   479             # plain names (not attribute assignments), and if the assigned names   480             # do not appear in the accessed names.   481    482             if len(assigned_names) == count and \   483                not assigned_names.intersection(accessed_names):   484    485                 for node, item in zip(n.nodes, name_ref.items):   486                     self.process_assignment_node(node, item)   487    488                 return True   489    490             # Otherwise, use the position-based mechanism to obtain values.   491    492             else:   493                 return False   494         else:   495             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   496                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   497    498     def process_compare_node(self, n):   499    500         """   501         Process the given comparison node 'n', converting an operator sequence   502         from...   503    504         <expr1> <op1> <expr2> <op2> <expr3>   505    506         ...to...   507    508         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   509         """   510    511         invocations = []   512         last = n.expr   513    514         for op, op_node in n.ops:   515             op = operator_functions.get(op)   516    517             invocations.append(compiler.ast.CallFunc(   518                 compiler.ast.Name("$op%s" % op),   519                 [last, op_node]))   520    521             last = op_node   522    523         if len(invocations) > 1:   524             result = compiler.ast.And(invocations)   525         else:   526             result = invocations[0]   527    528         return self.process_structure_node(result)   529    530     def process_dict_node(self, node):   531    532         """   533         Process the given dictionary 'node', returning a list of (key, value)   534         tuples.   535         """   536    537         l = []   538         for key, value in node.items:   539             l.append((   540                 self.process_structure_node(key),   541                 self.process_structure_node(value)))   542         return l   543    544     def process_for_node(self, n):   545    546         """   547         Generate attribute accesses for {n.list}.__iter__ and the next method on   548         the iterator, producing a replacement node for the original.   549         """   550    551         t0 = self.get_temporary_name()   552         self.next_temporary()   553         t1 = self.get_temporary_name()   554         self.next_temporary()   555    556         node = compiler.ast.Stmt([   557    558             # <t0> = {n.list}   559             # <t1> = <t0>.__iter__()   560    561             compiler.ast.Assign(   562                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   563                 n.list),   564    565             compiler.ast.Assign(   566                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   567                 compiler.ast.CallFunc(   568                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   569                     [])),   570    571             # try:   572             #     while True:   573             #         <var>... = <t1>.next()   574             #         ...   575             # except StopIteration:   576             #     pass   577    578             compiler.ast.TryExcept(   579                 compiler.ast.While(   580                     compiler.ast.Name("True"),   581                     compiler.ast.Stmt([   582                         compiler.ast.Assign(   583                             [n.assign],   584                             compiler.ast.CallFunc(   585                                 compiler.ast.Getattr(compiler.ast.Name(t1), "next"),   586                                 []   587                                 )),   588                         n.body]),   589                     None),   590                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   591                 None)   592             ])   593    594         self.process_structure_node(node)   595    596     def process_literal_sequence_node(self, n, name, ref, cls):   597    598         """   599         Process the given literal sequence node 'n' as a function invocation,   600         with 'name' indicating the type of the sequence, and 'ref' being a   601         reference to the type. The 'cls' is used to instantiate a suitable name   602         reference.   603         """   604    605         if name == "dict":   606             items = []   607             for key, value in n.items:   608                 items.append(compiler.ast.Tuple([key, value]))   609         else: # name in ("list", "tuple"):   610             items = n.nodes   611    612         return self.get_literal_reference(name, ref, items, cls)   613    614     def process_operator_node(self, n):   615    616         """   617         Process the given operator node 'n' as an operator function invocation.   618         """   619    620         opname = n.__class__.__name__   621         operands = n.getChildNodes()   622    623         # Convert a unary operation to an invocation.   624    625         op = unary_operator_functions.get(opname)   626    627         if op:   628             invocation = compiler.ast.CallFunc(   629                 compiler.ast.Name("$op%s" % op),   630                 [operands[0]]   631                 )   632    633         # Convert a single operator with a list of operands to a combination of   634         # pairwise operations.   635    636         else:   637             op = operator_functions[opname]   638             invocation = self._process_operator_node(op, operands)   639    640         return self.process_structure_node(invocation)   641    642     def _process_operator_node(self, op, operands):   643    644         """   645         Process the given 'op', being an operator function, together with the   646         supplied 'operands', returning either a single remaining operand or an   647         invocation combining the operands.   648         """   649    650         remaining = operands[1:]   651         if not remaining:   652             return operands[0]   653    654         return compiler.ast.CallFunc(   655             compiler.ast.Name("$op%s" % op),   656             [operands[0], self._process_operator_node(op, remaining)]   657             )   658    659     def process_print_node(self, n):   660    661         """   662         Process the given print node 'n' as an invocation on a stream of the   663         form...   664    665         $print(dest, args, nl)   666    667         The special function name will be translated elsewhere.   668         """   669    670         nl = isinstance(n, compiler.ast.Printnl)   671         invocation = compiler.ast.CallFunc(   672             compiler.ast.Name("$print"),   673             [n.dest or compiler.ast.Name("None"),   674              compiler.ast.List(list(n.nodes)),   675              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   676             )   677         return self.process_structure_node(invocation)   678    679     def process_slice_node(self, n, expr=None):   680    681         """   682         Process the given slice node 'n' as a method invocation.   683         """   684    685         if n.flags == "OP_ASSIGN": op = "__setslice__"   686         elif n.flags == "OP_DELETE": op = "__delslice__"   687         else: op = "__getslice__"   688    689         invocation = compiler.ast.CallFunc(   690             compiler.ast.Getattr(n.expr, op),   691             [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   692                 (expr and [expr] or [])   693             )   694    695         # Fix parse tree structure.   696    697         if op == "__delslice__":   698             invocation = compiler.ast.Discard(invocation)   699    700         return self.process_structure_node(invocation)   701    702     def process_sliceobj_node(self, n):   703    704         """   705         Process the given slice object node 'n' as a slice constructor.   706         """   707    708         op = "slice"   709         invocation = compiler.ast.CallFunc(   710             compiler.ast.Name("$op%s" % op),   711             n.nodes   712             )   713         return self.process_structure_node(invocation)   714    715     def process_subscript_node(self, n, expr=None):   716    717         """   718         Process the given subscript node 'n' as a method invocation.   719         """   720    721         if n.flags == "OP_ASSIGN": op = "__setitem__"   722         elif n.flags == "OP_DELETE": op = "__delitem__"   723         else: op = "__getitem__"   724    725         invocation = compiler.ast.CallFunc(   726             compiler.ast.Getattr(n.expr, op),   727             list(n.subs) + (expr and [expr] or [])   728             )   729    730         # Fix parse tree structure.   731    732         if op == "__delitem__":   733             invocation = compiler.ast.Discard(invocation)   734    735         return self.process_structure_node(invocation)   736    737     def process_attribute_chain(self, n):   738    739         """   740         Process the given attribute access node 'n'. Return a reference   741         describing the expression.   742         """   743    744         # AssAttr/Getattr are nested with the outermost access being the last   745         # access in any chain.   746    747         self.attrs.insert(0, n.attrname)   748         attrs = self.attrs   749    750         # Break attribute chains where non-access nodes are found.   751    752         if not self.have_access_expression(n):   753             self.reset_attribute_chain()   754    755         # Descend into the expression, extending backwards any existing chain,   756         # or building another for the expression.   757    758         name_ref = self.process_structure_node(n.expr)   759    760         # Restore chain information applying to this node.   761    762         if not self.have_access_expression(n):   763             self.restore_attribute_chain(attrs)   764    765         # Return immediately if the expression was another access and thus a   766         # continuation backwards along the chain. The above processing will   767         # have followed the chain all the way to its conclusion.   768    769         if self.have_access_expression(n):   770             del self.attrs[0]   771    772         return name_ref   773    774     # Attribute chain handling.   775    776     def reset_attribute_chain(self):   777    778         "Reset the attribute chain for a subexpression of an attribute access."   779    780         self.attrs = []   781         self.chain_assignment.append(self.in_assignment)   782         self.chain_invocation.append(self.in_invocation)   783         self.in_assignment = None   784         self.in_invocation = None   785    786     def restore_attribute_chain(self, attrs):   787    788         "Restore the attribute chain for an attribute access."   789    790         self.attrs = attrs   791         self.in_assignment = self.chain_assignment.pop()   792         self.in_invocation = self.chain_invocation.pop()   793    794     def have_access_expression(self, node):   795    796         "Return whether the expression associated with 'node' is Getattr."   797    798         return isinstance(node.expr, compiler.ast.Getattr)   799    800     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   801    802         """   803         Return the name to be used for attribute usage observations involving   804         the given 'name' in the current namespace.   805    806         If the name is being used outside a function, and if 'name_ref' is   807         given and indicates a global or if 'is_global' is specified as a true   808         value, a path featuring the name in the global namespace is returned.   809         Otherwise, a path computed using the current namespace and the given   810         name is returned.   811    812         The intention of this method is to provide a suitably-qualified name   813         that can be tracked across namespaces. Where globals are being   814         referenced in class namespaces, they should be referenced using their   815         path within the module, not using a path within each class.   816    817         It may not be possible to identify a global within a function at the   818         time of inspection (since a global may appear later in a file).   819         Consequently, globals are identified by their local name rather than   820         their module-qualified path.   821         """   822    823         # For functions, use the appropriate local names.   824    825         if self.in_function:   826             return name   827    828         # For global names outside functions, use a global name.   829    830         elif is_global or name_ref and name_ref.is_global_name():   831             return self.get_global_path(name)   832    833         # Otherwise, establish a name in the current namespace.   834    835         else:   836             return self.get_object_path(name)   837    838     def get_path_for_access(self):   839    840         "Outside functions, register accesses at the module level."   841    842         if not self.in_function:   843             return self.name   844         else:   845             return self.get_namespace_path()   846    847     def get_module_name(self, node):   848    849         """   850         Using the given From 'node' in this module, calculate any relative import   851         information, returning a tuple containing a module to import along with any   852         names to import based on the node's name information.   853    854         Where the returned module is given as None, whole module imports should   855         be performed for the returned modules using the returned names.   856         """   857    858         # Absolute import.   859    860         if node.level == 0:   861             return node.modname, node.names   862    863         # Relative to an ancestor of this module.   864    865         else:   866             path = self.name.split(".")   867             level = node.level   868    869             # Relative imports treat package roots as submodules.   870    871             if split(self.filename)[-1] == "__init__.py":   872                 level -= 1   873    874             if level > len(path):   875                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   876                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   877    878             basename = ".".join(path[:len(path)-level])   879    880         # Name imports from a module.   881    882         if node.modname:   883             return "%s.%s" % (basename, node.modname), node.names   884    885         # Relative whole module imports.   886    887         else:   888             return basename, node.names   889    890 def get_argnames(args):   891    892     """   893     Return a list of all names provided by 'args'. Since tuples may be   894     employed, the arguments are traversed depth-first.   895     """   896    897     l = []   898     for arg in args:   899         if isinstance(arg, tuple):   900             l += get_argnames(arg)   901         else:   902             l.append(arg)   903     return l   904    905 def get_names_from_nodes(nodes):   906    907     """   908     Return the names employed in the given 'nodes' along with the number of   909     nodes excluding sequences.   910     """   911    912     names = set()   913     count = 0   914    915     for node in nodes:   916    917         # Add names and count them.   918    919         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   920             names.add(node.name)   921             count += 1   922    923         # Add names from sequences and incorporate their counts.   924    925         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   926                                compiler.ast.List, compiler.ast.Set,   927                                compiler.ast.Tuple)):   928             _names, _count = get_names_from_nodes(node.nodes)   929             names.update(_names)   930             count += _count   931    932         # Count non-name, non-sequence nodes.   933    934         else:   935             count += 1   936    937     return names, count   938    939 # Location classes.   940    941 class Location:   942    943     "A generic program location."   944    945     def __init__(self, path, name, attrnames=None, version=None, access_number=None):   946         self.path = path   947         self.name = name   948         self.attrnames = attrnames   949         self.version = version   950         self.access_number = access_number   951    952     def __repr__(self):   953         return "Location(%r, %r, %r, %r, %r)" % self.as_tuple()   954    955     def as_tuple(self):   956         return (self.path, self.name, self.attrnames, self.version, self.access_number)   957    958     def __hash__(self):   959         return hash(self.as_tuple())   960    961     def __eq__(self, other):   962         return self.as_tuple() == other.as_tuple()   963    964     def __cmp__(self, other):   965         return cmp(self.as_tuple(), other.as_tuple())   966    967     def get_attrname(self):   968    969         """   970         Extract the first attribute from the attribute names employed in this   971         location.   972         """   973    974         attrnames = self.attrnames   975         if not attrnames:   976             return attrnames   977         return get_attrnames(attrnames)[0]   978    979 class AccessLocation(Location):   980    981     "A specialised access location."   982    983     def __init__(self, path, name, attrnames, access_number):   984    985         """   986         Initialise an access location featuring 'path', 'name', 'attrnames' and   987         'access_number'.   988         """   989    990         Location.__init__(self, path, name, attrnames, None, access_number)   991    992     def __repr__(self):   993         return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number)   994    995 # Result classes.   996    997 class InstructionSequence:   998    999     "A generic sequence of instructions."  1000   1001     def __init__(self, instructions):  1002         self.instructions = instructions  1003   1004     def get_value_instruction(self):  1005         return self.instructions[-1]  1006   1007     def get_init_instructions(self):  1008         return self.instructions[:-1]  1009   1010 # Dictionary utilities.  1011   1012 def init_item(d, key, fn):  1013   1014     """  1015     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial  1016     value where no entry already exists.  1017     """  1018   1019     if not d.has_key(key):  1020         d[key] = fn()  1021     return d[key]  1022   1023 def dict_for_keys(d, keys):  1024   1025     "Return a new dictionary containing entries from 'd' for the given 'keys'."  1026   1027     nd = {}  1028     for key in keys:  1029         if d.has_key(key):  1030             nd[key] = d[key]  1031     return nd  1032   1033 def make_key(s):  1034   1035     "Make sequence 's' into a tuple-based key, first sorting its contents."  1036   1037     l = list(s)  1038     l.sort()  1039     return tuple(l)  1040   1041 def add_counter_item(d, key):  1042   1043     """  1044     Make a mapping in 'd' for 'key' to the number of keys added before it, thus  1045     maintaining a mapping of keys to their order of insertion.  1046     """  1047   1048     if not d.has_key(key):  1049         d[key] = len(d.keys())  1050     return d[key]   1051   1052 def remove_items(d1, d2):  1053   1054     "Remove from 'd1' all items from 'd2'."  1055   1056     for key in d2.keys():  1057         if d1.has_key(key):  1058             del d1[key]  1059   1060 # Set utilities.  1061   1062 def first(s):  1063     return list(s)[0]  1064   1065 def same(s1, s2):  1066     return set(s1) == set(s2)  1067   1068 def order_dependencies(all_depends):  1069   1070     """  1071     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1072     has the form "A depends on B, C...". The result will order A, B, C, and so  1073     on.  1074     """  1075   1076     usage = init_reverse_dependencies(all_depends)  1077   1078     # Produce an ordering by obtaining exposed items (required by items already  1079     # processed) and putting them at the start of the list.  1080   1081     ordered = []  1082   1083     while usage:  1084         have_next = False  1085   1086         for key, n in usage.items():  1087   1088             # Add items needed by no other items to the ordering.  1089   1090             if not n:  1091                 remove_dependency(key, all_depends, usage, ordered)  1092                 have_next = True  1093   1094         if not have_next:  1095             raise ValueError, usage  1096   1097     return ordered  1098   1099 def order_dependencies_partial(all_depends):  1100   1101     """  1102     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1103     has the form "A depends on B, C...". The result will order A, B, C, and so  1104     on. Where cycles exist, they will be broken and a partial ordering returned.  1105     """  1106   1107     usage = init_reverse_dependencies(all_depends)  1108   1109     # Duplicate the dependencies for subsequent modification.  1110   1111     new_depends = {}  1112     for key, values in all_depends.items():  1113         new_depends[key] = set(values)  1114   1115     all_depends = new_depends  1116   1117     # Produce an ordering by obtaining exposed items (required by items already  1118     # processed) and putting them at the start of the list.  1119   1120     ordered = []  1121   1122     while usage:  1123         least = None  1124         least_key = None  1125   1126         for key, n in usage.items():  1127   1128             # Add items needed by no other items to the ordering.  1129   1130             if not n:  1131                 remove_dependency(key, all_depends, usage, ordered)  1132                 least = 0  1133   1134             # When breaking cycles, note the least used items.  1135   1136             elif least is None or len(n) < least:  1137                 least_key = key  1138                 least = len(n)  1139   1140         if least:  1141             transfer_dependencies(least_key, all_depends, usage, ordered)  1142   1143     return ordered  1144   1145 def init_reverse_dependencies(all_depends):  1146   1147     """  1148     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1149     record the reverse dependencies, making a mapping of the form  1150     "B is needed by A", "C is needed by A", and so on.  1151     """  1152   1153     usage = {}  1154   1155     # Record path-based dependencies.  1156   1157     for key in all_depends.keys():  1158         usage[key] = set()  1159   1160     for key, depends in all_depends.items():  1161         for depend in depends:  1162             init_item(usage, depend, set)  1163             usage[depend].add(key)  1164   1165     return usage  1166   1167 def transfer_dependencies(key, all_depends, usage, ordered):  1168   1169     """  1170     Transfer items needed by 'key' to those items needing 'key', found using  1171     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1172     collection of dependencies.  1173   1174     If "A is needed by X" and "B is needed by A", then transferring items needed  1175     by A will cause "B is needed by X" to be recorded as a consequence.  1176   1177     Transferring items also needs to occur in the reverse mapping, so that  1178     "A needs B" and "X needs A", then the consequence must be recorded as  1179     "X needs B".  1180     """  1181   1182     ordered.insert(0, key)  1183   1184     needing = usage[key]                        # A is needed by X  1185     needed = all_depends.get(key)               # A needs B  1186   1187     if needing:  1188         for depend in needing:  1189             l = all_depends.get(depend)  1190             if not l:  1191                 continue  1192   1193             l.remove(key)                       # X needs (A)  1194   1195             if needed:  1196                 l.update(needed)                # X needs B...  1197   1198                 # Prevent self references.  1199   1200                 if depend in needed:  1201                     l.remove(depend)  1202   1203     if needed:  1204         for depend in needed:  1205             l = usage.get(depend)  1206             if not l:  1207                 continue  1208   1209             l.remove(key)                       # B is needed by (A)  1210             l.update(needing)                   # B is needed by X...  1211   1212             # Prevent self references.  1213   1214             if depend in needing:  1215                 l.remove(depend)  1216   1217     if needed:  1218         del all_depends[key]  1219     del usage[key]  1220   1221 def remove_dependency(key, all_depends, usage, ordered):  1222   1223     """  1224     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1225     'ordered' collection of dependencies.  1226   1227     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1228     upon removing A from 'usage', the outcome is that all keys needing A will  1229     have A removed from their 'usage' records.  1230   1231     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1232     as a consequence.  1233     """  1234   1235     ordered.insert(0, key)  1236   1237     depends = all_depends.get(key)  1238   1239     # Reduce usage of the referenced items.  1240   1241     if depends:  1242         for depend in depends:  1243             usage[depend].remove(key)  1244   1245     del usage[key]  1246   1247 # General input/output.  1248   1249 def readfile(filename):  1250   1251     "Return the contents of 'filename'."  1252   1253     f = open(filename)  1254     try:  1255         return f.read()  1256     finally:  1257         f.close()  1258   1259 def writefile(filename, s):  1260   1261     "Write to 'filename' the string 's'."  1262   1263     f = open(filename, "w")  1264     try:  1265         f.write(s)  1266     finally:  1267         f.close()  1268   1269 # General encoding.  1270   1271 def sorted_output(x):  1272   1273     "Sort sequence 'x' and return a string with commas separating the values."  1274   1275     x = map(str, x)  1276     x.sort()  1277     return ", ".join(x)  1278   1279 def get_string_details(literals, encoding):  1280   1281     """  1282     Determine whether 'literals' represent Unicode strings or byte strings,  1283     using 'encoding' to reproduce byte sequences.  1284   1285     Each literal is the full program representation including prefix and quotes  1286     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1287     string needs to be translated back to its original encoding.  1288   1289     Return a single encoded literal value, a type name, and the original  1290     encoding as a tuple.  1291     """  1292   1293     typename = "unicode"  1294   1295     l = []  1296   1297     for s in literals:  1298         out, _typename = get_literal_details(s)  1299         if _typename == "str":  1300             typename = "str"  1301         l.append(out)  1302   1303     out = "".join(l)  1304   1305     # For Unicode values, convert to the UTF-8 program representation.  1306   1307     if typename == "unicode":  1308         return out.encode("utf-8"), typename, encoding  1309   1310     # For byte string values, convert back to the original encoding.  1311   1312     else:  1313         return out.encode(encoding), typename, encoding  1314   1315 def get_literal_details(s):  1316   1317     """  1318     Determine whether 's' represents a Unicode string or a byte string, where  1319     's' contains the full program representation of a literal including prefix  1320     and quotes, recoded by the parser to UTF-8.  1321   1322     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1323     and byte or Unicode values starting with <backslash><octal digit> or  1324     <backslash>x.  1325   1326     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1327     to be considered as Unicode values. Otherwise, they produce byte values and  1328     cause unprefixed strings to be considered as byte strings.  1329   1330     Literals prefixed with "r" do not have their backslash-encoded values  1331     converted unless also prefixed with "u", in which case only the above value  1332     formats are converted, not any of the other special sequences for things  1333     like newlines.  1334   1335     Return the literal value as a Unicode object together with the appropriate  1336     type name in a tuple.  1337     """  1338   1339     l = []  1340   1341     # Identify the quote character and use it to identify the prefix.  1342   1343     quote_type = s[-1]  1344     prefix_end = s.find(quote_type)  1345     prefix = s[:prefix_end].lower()  1346   1347     if prefix not in ("", "b", "br", "r", "u", "ur"):  1348         raise ValueError, "String literal does not have a supported prefix: %s" % s  1349   1350     if "b" in prefix:  1351         typename = "str"  1352     else:  1353         typename = "unicode"  1354   1355     # Identify triple quotes or single quotes.  1356   1357     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1358         quote = s[prefix_end:prefix_end+3]  1359         current = prefix_end + 3  1360         end = len(s) - 3  1361     else:  1362         quote = s[prefix_end]  1363         current = prefix_end + 1  1364         end = len(s) - 1  1365   1366     # Conversions of some quoted values.  1367   1368     searches = {  1369         "u" : (6, 16),  1370         "U" : (10, 16),  1371         "x" : (4, 16),  1372         }  1373   1374     octal_digits = map(str, range(0, 8))  1375   1376     # Translations of some quoted values.  1377   1378     escaped = {  1379         "\\" : "\\", "'" : "'", '"' : '"',  1380         "a" : "\a", "b" : "\b", "f" : "\f",  1381         "n" : "\n", "r" : "\r", "t" : "\t",  1382         }  1383   1384     while current < end:  1385   1386         # Look for quoted values.  1387   1388         index = s.find("\\", current)  1389         if index == -1 or index + 1 == end:  1390             l.append(s[current:end])  1391             break  1392   1393         # Add the preceding text.  1394   1395         l.append(s[current:index])  1396   1397         # Handle quoted text.  1398   1399         term = s[index+1]  1400   1401         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1402         # produce Unicode values.  1403   1404         if typename == "unicode" and (  1405             term in ("u", "U") or   1406             "u" in prefix and (term == "x" or term in octal_digits)):  1407   1408             needed, base = searches.get(term, (4, 8))  1409             value = convert_quoted_value(s, index, needed, end, base, unichr)  1410             l.append(value)  1411             current = index + needed  1412   1413         # Add raw byte values, changing the string type.  1414   1415         elif "r" not in prefix and (  1416              term == "x" or term in octal_digits):  1417   1418             needed, base = searches.get(term, (4, 8))  1419             value = convert_quoted_value(s, index, needed, end, base, chr)  1420             l.append(value)  1421             typename = "str"  1422             current = index + needed  1423   1424         # Add other escaped values.  1425   1426         elif "r" not in prefix and escaped.has_key(term):  1427             l.append(escaped[term])  1428             current = index + 2  1429   1430         # Add other text as found.  1431   1432         else:  1433             l.append(s[index:index+2])  1434             current = index + 2  1435   1436     # Collect the components into a single Unicode object. Since the literal  1437     # text was already in UTF-8 form, interpret plain strings as UTF-8  1438     # sequences.  1439   1440     out = []  1441   1442     for value in l:  1443         if isinstance(value, unicode):  1444             out.append(value)  1445         else:  1446             out.append(unicode(value, "utf-8"))  1447   1448     return "".join(out), typename  1449   1450 def convert_quoted_value(s, index, needed, end, base, fn):  1451   1452     """  1453     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1454     positions, and with the given 'end' indicating the first position after the  1455     end of the actual string content.  1456   1457     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1458     to convert the value to an appropriate type.  1459     """  1460   1461     s = s[index:min(index+needed, end)]  1462   1463     # Not a complete occurrence.  1464   1465     if len(s) < needed:  1466         return s  1467   1468     # Test for a well-formed value.  1469   1470     try:  1471         first = base == 8 and 1 or 2  1472         value = int(s[first:needed], base)  1473     except ValueError:  1474         return s  1475     else:  1476         return fn(value)  1477   1478 # Attribute chain decoding.  1479   1480 def get_attrnames(attrnames):  1481   1482     """  1483     Split the qualified attribute chain 'attrnames' into its components,  1484     handling special attributes starting with "#" that indicate type  1485     conformance.  1486     """  1487   1488     if attrnames.startswith("#"):  1489         return [attrnames]  1490     else:  1491         return attrnames.split(".")  1492   1493 def get_name_path(path, name):  1494   1495     "Return a suitable qualified name from the given 'path' and 'name'."  1496   1497     if "." in name:  1498         return name  1499     else:  1500         return "%s.%s" % (path, name)  1501   1502 # Usage-related functions.  1503   1504 def get_types_for_usage(attrnames, objects):  1505   1506     """  1507     Identify the types that can support the given 'attrnames', using the  1508     given 'objects' as the catalogue of type details.  1509     """  1510   1511     types = []  1512     for name, _attrnames in objects.items():  1513         if set(attrnames).issubset(_attrnames):  1514             types.append(name)  1515     return types  1516   1517 def get_invoked_attributes(usage):  1518   1519     "Obtain invoked attribute from the given 'usage'."  1520   1521     invoked = []  1522     if usage:  1523         for attrname, invocation, assignment in usage:  1524             if invocation:  1525                 invoked.append(attrname)  1526     return invoked  1527   1528 def get_assigned_attributes(usage):  1529   1530     "Obtain assigned attribute from the given 'usage'."  1531   1532     assigned = []  1533     if usage:  1534         for attrname, invocation, assignment in usage:  1535             if assignment:  1536                 assigned.append(attrname)  1537     return assigned  1538   1539 # Type and module functions.  1540 # NOTE: This makes assumptions about the __builtins__ structure.  1541   1542 def get_builtin_module(name):  1543   1544     "Return the module name containing the given type 'name'."  1545   1546     if name == "string":  1547         modname = "str"  1548     elif name == "utf8string":  1549         modname = "unicode"  1550     elif name == "NoneType":  1551         modname = "none"  1552     else:  1553         modname = name  1554   1555     return "__builtins__.%s" % modname  1556   1557 def get_builtin_type(name):  1558   1559     "Return the type name provided by the given Python value 'name'."  1560   1561     if name == "str":  1562         return "string"  1563     elif name == "unicode":  1564         return "utf8string"  1565     else:  1566         return name  1567   1568 def get_builtin_class(name):  1569   1570     "Return the full name of the built-in class having the given 'name'."  1571   1572     typename = get_builtin_type(name)  1573     module = get_builtin_module(typename)  1574     return "%s.%s" % (module, typename)  1575   1576 # Useful data.  1577   1578 predefined_constants = "False", "None", "NotImplemented", "True"  1579   1580 unary_operator_functions = {  1581   1582     # Unary operations.  1583   1584     "Invert" : "invert",  1585     "UnaryAdd" : "pos",  1586     "UnarySub" : "neg",  1587     }  1588   1589 operator_functions = {  1590   1591     # Fundamental operations.  1592   1593     "is" : "is_",  1594     "is not" : "is_not",  1595   1596     # Binary operations.  1597   1598     "in" : "in_",  1599     "not in" : "not_in",  1600     "Add" : "add",  1601     "Bitand" : "and_",  1602     "Bitor" : "or_",  1603     "Bitxor" : "xor",  1604     "Div" : "div",  1605     "FloorDiv" : "floordiv",  1606     "LeftShift" : "lshift",  1607     "Mod" : "mod",  1608     "Mul" : "mul",  1609     "Power" : "pow",  1610     "RightShift" : "rshift",  1611     "Sub" : "sub",  1612   1613     # Augmented assignment.  1614   1615     "+=" : "iadd",  1616     "-=" : "isub",  1617     "*=" : "imul",  1618     "/=" : "idiv",  1619     "//=" : "ifloordiv",  1620     "%=" : "imod",  1621     "**=" : "ipow",  1622     "<<=" : "ilshift",  1623     ">>=" : "irshift",  1624     "&=" : "iand",  1625     "^=" : "ixor",  1626     "|=" : "ior",  1627   1628     # Comparisons.  1629   1630     "==" : "eq",  1631     "!=" : "ne",  1632     "<" : "lt",  1633     "<=" : "le",  1634     ">=" : "ge",  1635     ">" : "gt",  1636     }  1637   1638 operator_functions.update(unary_operator_functions)  1639   1640 # vim: tabstop=4 expandtab shiftwidth=4