Lichen

common.py

800:6097bc0ef5ec
2017-04-03 Paul Boddie Added initial support for string formatting.
     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     def get_parent_path(self):   190         return ".".join([self.name] + self.namespace_path[:-1])   191    192     # Namespace management.   193    194     def enter_namespace(self, name):   195    196         "Enter the namespace having the given 'name'."   197    198         self.namespace_path.append(name)   199    200     def exit_namespace(self):   201    202         "Exit the current namespace."   203    204         self.namespace_path.pop()   205    206     # Constant reference naming.   207    208     def get_constant_name(self, value, value_type, encoding=None):   209    210         """   211         Add a new constant to the current namespace for 'value' with   212         'value_type'.   213         """   214    215         path = self.get_namespace_path()   216         init_item(self.constants, path, dict)   217         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   218    219     # Literal reference naming.   220    221     def get_literal_name(self):   222    223         "Add a new literal to the current namespace."   224    225         path = self.get_namespace_path()   226         init_item(self.literals, path, lambda: 0)   227         return "$C%d" % self.literals[path]   228    229     def next_literal(self):   230         self.literals[self.get_namespace_path()] += 1   231    232     # Temporary variable naming.   233    234     def get_temporary_name(self):   235         path = self.get_namespace_path()   236         init_item(self.temp, path, lambda: 0)   237         return "$t%d" % self.temp[path]   238    239     def next_temporary(self):   240         self.temp[self.get_namespace_path()] += 1   241    242     # Arbitrary function naming.   243    244     def get_lambda_name(self):   245         path = self.get_namespace_path()   246         init_item(self.lambdas, path, lambda: 0)   247         name = "$l%d" % self.lambdas[path]   248         self.lambdas[path] += 1   249         return name   250    251     def reset_lambdas(self):   252         self.lambdas = {}   253    254     # Constant and literal recording.   255    256     def get_constant_value(self, value, literals=None):   257    258         """   259         Encode the 'value' if appropriate, returning a value, a typename and any   260         encoding.   261         """   262    263         if isinstance(value, unicode):   264             return value.encode("utf-8"), "unicode", self.encoding   265    266         # Attempt to convert plain strings to text.   267    268         elif isinstance(value, str) and self.encoding:   269             try:   270                 return get_string_details(literals, self.encoding)   271             except UnicodeDecodeError:   272                 pass   273    274         return value, value.__class__.__name__, None   275    276     def get_constant_reference(self, ref, value, encoding=None):   277    278         """   279         Return a constant reference for the given 'ref' type and 'value', with   280         the optional 'encoding' applying to text values.   281         """   282    283         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   284    285         # Return a reference for the constant.   286    287         objpath = self.get_object_path(constant_name)   288         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   289    290         # Record the value and type for the constant.   291    292         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   293         return name_ref   294    295     def reserve_constant(self, objpath, value, origin, encoding=None):   296    297         """   298         Reserve a constant within 'objpath' with the given 'value' and having a   299         type with the given 'origin', with the optional 'encoding' applying to   300         text values.   301         """   302    303         constant_name = self.get_constant_name(value, origin)   304         objpath = self.get_object_path(constant_name)   305         self._reserve_constant(objpath, value, origin, encoding)   306    307     def _reserve_constant(self, objpath, value, origin, encoding):   308    309         """   310         Store a constant for 'objpath' with the given 'value' and 'origin', with   311         the optional 'encoding' applying to text values.   312         """   313    314         self.constant_values[objpath] = value, origin, encoding   315    316     def get_literal_reference(self, name, ref, items, cls):   317    318         """   319         Return a literal reference for the given type 'name', literal 'ref',   320         node 'items' and employing the given 'cls' as the class of the returned   321         reference object.   322         """   323    324         # Construct an invocation using the items as arguments.   325    326         typename = "$L%s" % name   327    328         invocation = compiler.ast.CallFunc(   329             compiler.ast.Name(typename),   330             items   331             )   332    333         # Get a name for the actual literal.   334    335         instname = self.get_literal_name()   336         self.next_literal()   337    338         # Record the type for the literal.   339    340         objpath = self.get_object_path(instname)   341         self.literal_types[objpath] = ref.get_origin()   342    343         # Return a wrapper for the invocation exposing the items.   344    345         return cls(   346             instname,   347             ref.instance_of(),   348             self.process_structure_node(invocation),   349             invocation.args   350             )   351    352     # Node handling.   353    354     def process_structure(self, node):   355    356         """   357         Within the given 'node', process the program structure.   358    359         During inspection, this will process global declarations, adjusting the   360         module namespace, and import statements, building a module dependency   361         hierarchy.   362    363         During translation, this will consult deduced program information and   364         output translated code.   365         """   366    367         l = []   368         for n in node.getChildNodes():   369             l.append(self.process_structure_node(n))   370         return l   371    372     def process_augassign_node(self, n):   373    374         "Process the given augmented assignment node 'n'."   375    376         op = operator_functions[n.op]   377    378         if isinstance(n.node, compiler.ast.Getattr):   379             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   380         elif isinstance(n.node, compiler.ast.Name):   381             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   382         else:   383             target = n.node   384    385         assignment = compiler.ast.Assign(   386             [target],   387             compiler.ast.CallFunc(   388                 compiler.ast.Name("$op%s" % op),   389                 [n.node, n.expr]))   390    391         return self.process_structure_node(assignment)   392    393     def process_assignment_for_object(self, original_name, source):   394    395         """   396         Return an assignment operation making 'original_name' refer to the given   397         'source'.   398         """   399    400         assignment = compiler.ast.Assign(   401             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   402             source   403             )   404    405         return self.process_structure_node(assignment)   406    407     def process_assignment_node_items(self, n, expr):   408    409         """   410         Process the given assignment node 'n' whose children are to be assigned   411         items of 'expr'.   412         """   413    414         name_ref = self.process_structure_node(expr)   415    416         # Either unpack the items and present them directly to each assignment   417         # node.   418    419         if isinstance(name_ref, LiteralSequenceRef) and \   420            self.process_literal_sequence_items(n, name_ref):   421    422             pass   423    424         # Or have the assignment nodes access each item via the sequence API.   425    426         else:   427             self.process_assignment_node_items_by_position(n, expr, name_ref)   428    429     def process_assignment_node_items_by_position(self, n, expr, name_ref):   430    431         """   432         Process the given sequence assignment node 'n', converting the node to   433         the separate assignment of each target using positional access on a   434         temporary variable representing the sequence. Use 'expr' as the assigned   435         value and 'name_ref' as the reference providing any existing temporary   436         variable.   437         """   438    439         assignments = []   440    441         # Employ existing names to access the sequence.   442         # Literal sequences do not provide names of accessible objects.   443    444         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   445             temp = name_ref.name   446    447         # For other expressions, create a temporary name to reference the items.   448    449         else:   450             temp = self.get_temporary_name()   451             self.next_temporary()   452    453             assignments.append(   454                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   455                 )   456    457         # Assign the items to the target nodes.   458    459         for i, node in enumerate(n.nodes):   460             assignments.append(   461                 compiler.ast.Assign([node], compiler.ast.Subscript(   462                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   463                 )   464    465         return self.process_structure_node(compiler.ast.Stmt(assignments))   466    467     def process_literal_sequence_items(self, n, name_ref):   468    469         """   470         Process the given assignment node 'n', obtaining from the given   471         'name_ref' the items to be assigned to the assignment targets.   472    473         Return whether this method was able to process the assignment node as   474         a sequence of direct assignments.   475         """   476    477         if len(n.nodes) == len(name_ref.items):   478             assigned_names, count = get_names_from_nodes(n.nodes)   479             accessed_names, _count = get_names_from_nodes(name_ref.items)   480    481             # Only assign directly between items if all assigned names are   482             # plain names (not attribute assignments), and if the assigned names   483             # do not appear in the accessed names.   484    485             if len(assigned_names) == count and \   486                not assigned_names.intersection(accessed_names):   487    488                 for node, item in zip(n.nodes, name_ref.items):   489                     self.process_assignment_node(node, item)   490    491                 return True   492    493             # Otherwise, use the position-based mechanism to obtain values.   494    495             else:   496                 return False   497         else:   498             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   499                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   500    501     def process_compare_node(self, n):   502    503         """   504         Process the given comparison node 'n', converting an operator sequence   505         from...   506    507         <expr1> <op1> <expr2> <op2> <expr3>   508    509         ...to...   510    511         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   512         """   513    514         invocations = []   515         last = n.expr   516    517         for op, op_node in n.ops:   518             op = operator_functions.get(op)   519    520             invocations.append(compiler.ast.CallFunc(   521                 compiler.ast.Name("$op%s" % op),   522                 [last, op_node]))   523    524             last = op_node   525    526         if len(invocations) > 1:   527             result = compiler.ast.And(invocations)   528         else:   529             result = invocations[0]   530    531         return self.process_structure_node(result)   532    533     def process_dict_node(self, node):   534    535         """   536         Process the given dictionary 'node', returning a list of (key, value)   537         tuples.   538         """   539    540         l = []   541         for key, value in node.items:   542             l.append((   543                 self.process_structure_node(key),   544                 self.process_structure_node(value)))   545         return l   546    547     def process_for_node(self, n):   548    549         """   550         Generate attribute accesses for {n.list}.__iter__ and the next method on   551         the iterator, producing a replacement node for the original.   552         """   553    554         t0 = self.get_temporary_name()   555         self.next_temporary()   556         t1 = self.get_temporary_name()   557         self.next_temporary()   558    559         node = compiler.ast.Stmt([   560    561             # <t0> = {n.list}   562             # <t1> = <t0>.__iter__()   563    564             compiler.ast.Assign(   565                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   566                 n.list),   567    568             compiler.ast.Assign(   569                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   570                 compiler.ast.CallFunc(   571                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   572                     [])),   573    574             # try:   575             #     while True:   576             #         <var>... = <t1>.next()   577             #         ...   578             # except StopIteration:   579             #     pass   580    581             compiler.ast.TryExcept(   582                 compiler.ast.While(   583                     compiler.ast.Name("True"),   584                     compiler.ast.Stmt([   585                         compiler.ast.Assign(   586                             [n.assign],   587                             compiler.ast.CallFunc(   588                                 compiler.ast.Getattr(compiler.ast.Name(t1), "next"),   589                                 []   590                                 )),   591                         n.body]),   592                     None),   593                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   594                 None)   595             ])   596    597         self.process_structure_node(node)   598    599     def process_literal_sequence_node(self, n, name, ref, cls):   600    601         """   602         Process the given literal sequence node 'n' as a function invocation,   603         with 'name' indicating the type of the sequence, and 'ref' being a   604         reference to the type. The 'cls' is used to instantiate a suitable name   605         reference.   606         """   607    608         if name == "dict":   609             items = []   610             for key, value in n.items:   611                 items.append(compiler.ast.Tuple([key, value]))   612         else: # name in ("list", "tuple"):   613             items = n.nodes   614    615         return self.get_literal_reference(name, ref, items, cls)   616    617     def process_operator_node(self, n):   618    619         """   620         Process the given operator node 'n' as an operator function invocation.   621         """   622    623         opname = n.__class__.__name__   624         operands = n.getChildNodes()   625    626         # Convert a unary operation to an invocation.   627    628         op = unary_operator_functions.get(opname)   629    630         if op:   631             invocation = compiler.ast.CallFunc(   632                 compiler.ast.Name("$op%s" % op),   633                 [operands[0]]   634                 )   635    636         # Convert a single operator with a list of operands to a combination of   637         # pairwise operations.   638    639         else:   640             op = operator_functions[opname]   641             invocation = self._process_operator_node(op, operands)   642    643         return self.process_structure_node(invocation)   644    645     def _process_operator_node(self, op, operands):   646    647         """   648         Process the given 'op', being an operator function, together with the   649         supplied 'operands', returning either a single remaining operand or an   650         invocation combining the operands.   651         """   652    653         remaining = operands[1:]   654         if not remaining:   655             return operands[0]   656    657         return compiler.ast.CallFunc(   658             compiler.ast.Name("$op%s" % op),   659             [operands[0], self._process_operator_node(op, remaining)]   660             )   661    662     def process_print_node(self, n):   663    664         """   665         Process the given print node 'n' as an invocation on a stream of the   666         form...   667    668         $print(dest, args, nl)   669    670         The special function name will be translated elsewhere.   671         """   672    673         nl = isinstance(n, compiler.ast.Printnl)   674         invocation = compiler.ast.CallFunc(   675             compiler.ast.Name("$print"),   676             [n.dest or compiler.ast.Name("None"),   677              compiler.ast.List(list(n.nodes)),   678              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   679             )   680         return self.process_structure_node(invocation)   681    682     def process_slice_node(self, n, expr=None):   683    684         """   685         Process the given slice node 'n' as a method invocation.   686         """   687    688         if n.flags == "OP_ASSIGN": op = "__setslice__"   689         elif n.flags == "OP_DELETE": op = "__delslice__"   690         else: op = "__getslice__"   691    692         invocation = compiler.ast.CallFunc(   693             compiler.ast.Getattr(n.expr, op),   694             [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   695                 (expr and [expr] or [])   696             )   697    698         # Fix parse tree structure.   699    700         if op == "__delslice__":   701             invocation = compiler.ast.Discard(invocation)   702    703         return self.process_structure_node(invocation)   704    705     def process_sliceobj_node(self, n):   706    707         """   708         Process the given slice object node 'n' as a slice constructor.   709         """   710    711         op = "slice"   712         invocation = compiler.ast.CallFunc(   713             compiler.ast.Name("$op%s" % op),   714             n.nodes   715             )   716         return self.process_structure_node(invocation)   717    718     def process_subscript_node(self, n, expr=None):   719    720         """   721         Process the given subscript node 'n' as a method invocation.   722         """   723    724         if n.flags == "OP_ASSIGN": op = "__setitem__"   725         elif n.flags == "OP_DELETE": op = "__delitem__"   726         else: op = "__getitem__"   727    728         invocation = compiler.ast.CallFunc(   729             compiler.ast.Getattr(n.expr, op),   730             list(n.subs) + (expr and [expr] or [])   731             )   732    733         # Fix parse tree structure.   734    735         if op == "__delitem__":   736             invocation = compiler.ast.Discard(invocation)   737    738         return self.process_structure_node(invocation)   739    740     def process_attribute_chain(self, n):   741    742         """   743         Process the given attribute access node 'n'. Return a reference   744         describing the expression.   745         """   746    747         # AssAttr/Getattr are nested with the outermost access being the last   748         # access in any chain.   749    750         self.attrs.insert(0, n.attrname)   751         attrs = self.attrs   752    753         # Break attribute chains where non-access nodes are found.   754    755         if not self.have_access_expression(n):   756             self.reset_attribute_chain()   757    758         # Descend into the expression, extending backwards any existing chain,   759         # or building another for the expression.   760    761         name_ref = self.process_structure_node(n.expr)   762    763         # Restore chain information applying to this node.   764    765         if not self.have_access_expression(n):   766             self.restore_attribute_chain(attrs)   767    768         # Return immediately if the expression was another access and thus a   769         # continuation backwards along the chain. The above processing will   770         # have followed the chain all the way to its conclusion.   771    772         if self.have_access_expression(n):   773             del self.attrs[0]   774    775         return name_ref   776    777     # Attribute chain handling.   778    779     def reset_attribute_chain(self):   780    781         "Reset the attribute chain for a subexpression of an attribute access."   782    783         self.attrs = []   784         self.chain_assignment.append(self.in_assignment)   785         self.chain_invocation.append(self.in_invocation)   786         self.in_assignment = None   787         self.in_invocation = None   788    789     def restore_attribute_chain(self, attrs):   790    791         "Restore the attribute chain for an attribute access."   792    793         self.attrs = attrs   794         self.in_assignment = self.chain_assignment.pop()   795         self.in_invocation = self.chain_invocation.pop()   796    797     def have_access_expression(self, node):   798    799         "Return whether the expression associated with 'node' is Getattr."   800    801         return isinstance(node.expr, compiler.ast.Getattr)   802    803     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   804    805         """   806         Return the name to be used for attribute usage observations involving   807         the given 'name' in the current namespace.   808    809         If the name is being used outside a function, and if 'name_ref' is   810         given and indicates a global or if 'is_global' is specified as a true   811         value, a path featuring the name in the global namespace is returned.   812         Otherwise, a path computed using the current namespace and the given   813         name is returned.   814    815         The intention of this method is to provide a suitably-qualified name   816         that can be tracked across namespaces. Where globals are being   817         referenced in class namespaces, they should be referenced using their   818         path within the module, not using a path within each class.   819    820         It may not be possible to identify a global within a function at the   821         time of inspection (since a global may appear later in a file).   822         Consequently, globals are identified by their local name rather than   823         their module-qualified path.   824         """   825    826         # For functions, use the appropriate local names.   827    828         if self.in_function:   829             return name   830    831         # For global names outside functions, use a global name.   832    833         elif is_global or name_ref and name_ref.is_global_name():   834             return self.get_global_path(name)   835    836         # Otherwise, establish a name in the current namespace.   837    838         else:   839             return self.get_object_path(name)   840    841     def get_path_for_access(self):   842    843         "Outside functions, register accesses at the module level."   844    845         if not self.in_function:   846             return self.name   847         else:   848             return self.get_namespace_path()   849    850     def get_module_name(self, node):   851    852         """   853         Using the given From 'node' in this module, calculate any relative import   854         information, returning a tuple containing a module to import along with any   855         names to import based on the node's name information.   856    857         Where the returned module is given as None, whole module imports should   858         be performed for the returned modules using the returned names.   859         """   860    861         # Absolute import.   862    863         if node.level == 0:   864             return node.modname, node.names   865    866         # Relative to an ancestor of this module.   867    868         else:   869             path = self.name.split(".")   870             level = node.level   871    872             # Relative imports treat package roots as submodules.   873    874             if split(self.filename)[-1] == "__init__.py":   875                 level -= 1   876    877             if level > len(path):   878                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   879                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   880    881             basename = ".".join(path[:len(path)-level])   882    883         # Name imports from a module.   884    885         if node.modname:   886             return "%s.%s" % (basename, node.modname), node.names   887    888         # Relative whole module imports.   889    890         else:   891             return basename, node.names   892    893 def get_argnames(args):   894    895     """   896     Return a list of all names provided by 'args'. Since tuples may be   897     employed, the arguments are traversed depth-first.   898     """   899    900     l = []   901     for arg in args:   902         if isinstance(arg, tuple):   903             l += get_argnames(arg)   904         else:   905             l.append(arg)   906     return l   907    908 def get_names_from_nodes(nodes):   909    910     """   911     Return the names employed in the given 'nodes' along with the number of   912     nodes excluding sequences.   913     """   914    915     names = set()   916     count = 0   917    918     for node in nodes:   919    920         # Add names and count them.   921    922         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   923             names.add(node.name)   924             count += 1   925    926         # Add names from sequences and incorporate their counts.   927    928         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   929                                compiler.ast.List, compiler.ast.Set,   930                                compiler.ast.Tuple)):   931             _names, _count = get_names_from_nodes(node.nodes)   932             names.update(_names)   933             count += _count   934    935         # Count non-name, non-sequence nodes.   936    937         else:   938             count += 1   939    940     return names, count   941    942 # Location classes.   943    944 class Location:   945    946     "A generic program location."   947    948     def __init__(self, path, name, attrnames=None, version=None, access_number=None):   949         self.path = path   950         self.name = name   951         self.attrnames = attrnames   952         self.version = version   953         self.access_number = access_number   954    955     def __repr__(self):   956         return "Location(%r, %r, %r, %r, %r)" % self.as_tuple()   957    958     def as_tuple(self):   959         return (self.path, self.name, self.attrnames, self.version, self.access_number)   960    961     def __hash__(self):   962         return hash(self.as_tuple())   963    964     def __eq__(self, other):   965         return self.as_tuple() == other.as_tuple()   966    967     def __cmp__(self, other):   968         return cmp(self.as_tuple(), other.as_tuple())   969    970     def get_attrname(self):   971    972         """   973         Extract the first attribute from the attribute names employed in this   974         location.   975         """   976    977         attrnames = self.attrnames   978         if not attrnames:   979             return attrnames   980         return get_attrnames(attrnames)[0]   981    982 class AccessLocation(Location):   983    984     "A specialised access location."   985    986     def __init__(self, path, name, attrnames, access_number):   987    988         """   989         Initialise an access location featuring 'path', 'name', 'attrnames' and   990         'access_number'.   991         """   992    993         Location.__init__(self, path, name, attrnames, None, access_number)   994    995     def __repr__(self):   996         return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number)   997    998 # Result classes.   999   1000 class InstructionSequence:  1001   1002     "A generic sequence of instructions."  1003   1004     def __init__(self, instructions):  1005         self.instructions = instructions  1006   1007     def get_value_instruction(self):  1008         return self.instructions[-1]  1009   1010     def get_init_instructions(self):  1011         return self.instructions[:-1]  1012   1013 # Dictionary utilities.  1014   1015 def init_item(d, key, fn):  1016   1017     """  1018     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial  1019     value where no entry already exists.  1020     """  1021   1022     if not d.has_key(key):  1023         d[key] = fn()  1024     return d[key]  1025   1026 def dict_for_keys(d, keys):  1027   1028     "Return a new dictionary containing entries from 'd' for the given 'keys'."  1029   1030     nd = {}  1031     for key in keys:  1032         if d.has_key(key):  1033             nd[key] = d[key]  1034     return nd  1035   1036 def make_key(s):  1037   1038     "Make sequence 's' into a tuple-based key, first sorting its contents."  1039   1040     l = list(s)  1041     l.sort()  1042     return tuple(l)  1043   1044 def add_counter_item(d, key):  1045   1046     """  1047     Make a mapping in 'd' for 'key' to the number of keys added before it, thus  1048     maintaining a mapping of keys to their order of insertion.  1049     """  1050   1051     if not d.has_key(key):  1052         d[key] = len(d.keys())  1053     return d[key]   1054   1055 def remove_items(d1, d2):  1056   1057     "Remove from 'd1' all items from 'd2'."  1058   1059     for key in d2.keys():  1060         if d1.has_key(key):  1061             del d1[key]  1062   1063 # Set utilities.  1064   1065 def first(s):  1066     return list(s)[0]  1067   1068 def same(s1, s2):  1069     return set(s1) == set(s2)  1070   1071 def order_dependencies(all_depends):  1072   1073     """  1074     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1075     has the form "A depends on B, C...". The result will order A, B, C, and so  1076     on.  1077     """  1078   1079     usage = init_reverse_dependencies(all_depends)  1080   1081     # Produce an ordering by obtaining exposed items (required by items already  1082     # processed) and putting them at the start of the list.  1083   1084     ordered = []  1085   1086     while usage:  1087         have_next = False  1088   1089         for key, n in usage.items():  1090   1091             # Add items needed by no other items to the ordering.  1092   1093             if not n:  1094                 remove_dependency(key, all_depends, usage, ordered)  1095                 have_next = True  1096   1097         if not have_next:  1098             raise ValueError, usage  1099   1100     return ordered  1101   1102 def order_dependencies_partial(all_depends):  1103   1104     """  1105     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1106     has the form "A depends on B, C...". The result will order A, B, C, and so  1107     on. Where cycles exist, they will be broken and a partial ordering returned.  1108     """  1109   1110     usage = init_reverse_dependencies(all_depends)  1111   1112     # Duplicate the dependencies for subsequent modification.  1113   1114     new_depends = {}  1115     for key, values in all_depends.items():  1116         new_depends[key] = set(values)  1117   1118     all_depends = new_depends  1119   1120     # Produce an ordering by obtaining exposed items (required by items already  1121     # processed) and putting them at the start of the list.  1122   1123     ordered = []  1124   1125     while usage:  1126         least = None  1127         least_key = None  1128   1129         for key, n in usage.items():  1130   1131             # Add items needed by no other items to the ordering.  1132   1133             if not n:  1134                 remove_dependency(key, all_depends, usage, ordered)  1135                 least = 0  1136   1137             # When breaking cycles, note the least used items.  1138   1139             elif least is None or len(n) < least:  1140                 least_key = key  1141                 least = len(n)  1142   1143         if least:  1144             transfer_dependencies(least_key, all_depends, usage, ordered)  1145   1146     return ordered  1147   1148 def init_reverse_dependencies(all_depends):  1149   1150     """  1151     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1152     record the reverse dependencies, making a mapping of the form  1153     "B is needed by A", "C is needed by A", and so on.  1154     """  1155   1156     usage = {}  1157   1158     # Record path-based dependencies.  1159   1160     for key in all_depends.keys():  1161         usage[key] = set()  1162   1163     for key, depends in all_depends.items():  1164         for depend in depends:  1165             init_item(usage, depend, set)  1166             usage[depend].add(key)  1167   1168     return usage  1169   1170 def transfer_dependencies(key, all_depends, usage, ordered):  1171   1172     """  1173     Transfer items needed by 'key' to those items needing 'key', found using  1174     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1175     collection of dependencies.  1176   1177     If "A is needed by X" and "B is needed by A", then transferring items needed  1178     by A will cause "B is needed by X" to be recorded as a consequence.  1179   1180     Transferring items also needs to occur in the reverse mapping, so that  1181     "A needs B" and "X needs A", then the consequence must be recorded as  1182     "X needs B".  1183     """  1184   1185     ordered.insert(0, key)  1186   1187     needing = usage[key]                        # A is needed by X  1188     needed = all_depends.get(key)               # A needs B  1189   1190     if needing:  1191         for depend in needing:  1192             l = all_depends.get(depend)  1193             if not l:  1194                 continue  1195   1196             l.remove(key)                       # X needs (A)  1197   1198             if needed:  1199                 l.update(needed)                # X needs B...  1200   1201                 # Prevent self references.  1202   1203                 if depend in needed:  1204                     l.remove(depend)  1205   1206     if needed:  1207         for depend in needed:  1208             l = usage.get(depend)  1209             if not l:  1210                 continue  1211   1212             l.remove(key)                       # B is needed by (A)  1213             l.update(needing)                   # B is needed by X...  1214   1215             # Prevent self references.  1216   1217             if depend in needing:  1218                 l.remove(depend)  1219   1220     if needed:  1221         del all_depends[key]  1222     del usage[key]  1223   1224 def remove_dependency(key, all_depends, usage, ordered):  1225   1226     """  1227     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1228     'ordered' collection of dependencies.  1229   1230     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1231     upon removing A from 'usage', the outcome is that all keys needing A will  1232     have A removed from their 'usage' records.  1233   1234     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1235     as a consequence.  1236     """  1237   1238     ordered.insert(0, key)  1239   1240     depends = all_depends.get(key)  1241   1242     # Reduce usage of the referenced items.  1243   1244     if depends:  1245         for depend in depends:  1246             usage[depend].remove(key)  1247   1248     del usage[key]  1249   1250 # General input/output.  1251   1252 def readfile(filename):  1253   1254     "Return the contents of 'filename'."  1255   1256     f = open(filename)  1257     try:  1258         return f.read()  1259     finally:  1260         f.close()  1261   1262 def writefile(filename, s):  1263   1264     "Write to 'filename' the string 's'."  1265   1266     f = open(filename, "w")  1267     try:  1268         f.write(s)  1269     finally:  1270         f.close()  1271   1272 # General encoding.  1273   1274 def sorted_output(x):  1275   1276     "Sort sequence 'x' and return a string with commas separating the values."  1277   1278     x = map(str, x)  1279     x.sort()  1280     return ", ".join(x)  1281   1282 def get_string_details(literals, encoding):  1283   1284     """  1285     Determine whether 'literals' represent Unicode strings or byte strings,  1286     using 'encoding' to reproduce byte sequences.  1287   1288     Each literal is the full program representation including prefix and quotes  1289     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1290     string needs to be translated back to its original encoding.  1291   1292     Return a single encoded literal value, a type name, and the original  1293     encoding as a tuple.  1294     """  1295   1296     typename = "unicode"  1297   1298     l = []  1299   1300     for s in literals:  1301         out, _typename = get_literal_details(s)  1302         if _typename == "str":  1303             typename = "str"  1304         l.append(out)  1305   1306     out = "".join(l)  1307   1308     # For Unicode values, convert to the UTF-8 program representation.  1309   1310     if typename == "unicode":  1311         return out.encode("utf-8"), typename, encoding  1312   1313     # For byte string values, convert back to the original encoding.  1314   1315     else:  1316         return out.encode(encoding), typename, encoding  1317   1318 def get_literal_details(s):  1319   1320     """  1321     Determine whether 's' represents a Unicode string or a byte string, where  1322     's' contains the full program representation of a literal including prefix  1323     and quotes, recoded by the parser to UTF-8.  1324   1325     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1326     and byte or Unicode values starting with <backslash><octal digit> or  1327     <backslash>x.  1328   1329     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1330     to be considered as Unicode values. Otherwise, they produce byte values and  1331     cause unprefixed strings to be considered as byte strings.  1332   1333     Literals prefixed with "r" do not have their backslash-encoded values  1334     converted unless also prefixed with "u", in which case only the above value  1335     formats are converted, not any of the other special sequences for things  1336     like newlines.  1337   1338     Return the literal value as a Unicode object together with the appropriate  1339     type name in a tuple.  1340     """  1341   1342     l = []  1343   1344     # Identify the quote character and use it to identify the prefix.  1345   1346     quote_type = s[-1]  1347     prefix_end = s.find(quote_type)  1348     prefix = s[:prefix_end].lower()  1349   1350     if prefix not in ("", "b", "br", "r", "u", "ur"):  1351         raise ValueError, "String literal does not have a supported prefix: %s" % s  1352   1353     if "b" in prefix:  1354         typename = "str"  1355     else:  1356         typename = "unicode"  1357   1358     # Identify triple quotes or single quotes.  1359   1360     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1361         quote = s[prefix_end:prefix_end+3]  1362         current = prefix_end + 3  1363         end = len(s) - 3  1364     else:  1365         quote = s[prefix_end]  1366         current = prefix_end + 1  1367         end = len(s) - 1  1368   1369     # Conversions of some quoted values.  1370   1371     searches = {  1372         "u" : (6, 16),  1373         "U" : (10, 16),  1374         "x" : (4, 16),  1375         }  1376   1377     octal_digits = map(str, range(0, 8))  1378   1379     # Translations of some quoted values.  1380   1381     escaped = {  1382         "\\" : "\\", "'" : "'", '"' : '"',  1383         "a" : "\a", "b" : "\b", "f" : "\f",  1384         "n" : "\n", "r" : "\r", "t" : "\t",  1385         }  1386   1387     while current < end:  1388   1389         # Look for quoted values.  1390   1391         index = s.find("\\", current)  1392         if index == -1 or index + 1 == end:  1393             l.append(s[current:end])  1394             break  1395   1396         # Add the preceding text.  1397   1398         l.append(s[current:index])  1399   1400         # Handle quoted text.  1401   1402         term = s[index+1]  1403   1404         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1405         # produce Unicode values.  1406   1407         if typename == "unicode" and (  1408             term in ("u", "U") or   1409             "u" in prefix and (term == "x" or term in octal_digits)):  1410   1411             needed, base = searches.get(term, (4, 8))  1412             value = convert_quoted_value(s, index, needed, end, base, unichr)  1413             l.append(value)  1414             current = index + needed  1415   1416         # Add raw byte values, changing the string type.  1417   1418         elif "r" not in prefix and (  1419              term == "x" or term in octal_digits):  1420   1421             needed, base = searches.get(term, (4, 8))  1422             value = convert_quoted_value(s, index, needed, end, base, chr)  1423             l.append(value)  1424             typename = "str"  1425             current = index + needed  1426   1427         # Add other escaped values.  1428   1429         elif "r" not in prefix and escaped.has_key(term):  1430             l.append(escaped[term])  1431             current = index + 2  1432   1433         # Add other text as found.  1434   1435         else:  1436             l.append(s[index:index+2])  1437             current = index + 2  1438   1439     # Collect the components into a single Unicode object. Since the literal  1440     # text was already in UTF-8 form, interpret plain strings as UTF-8  1441     # sequences.  1442   1443     out = []  1444   1445     for value in l:  1446         if isinstance(value, unicode):  1447             out.append(value)  1448         else:  1449             out.append(unicode(value, "utf-8"))  1450   1451     return "".join(out), typename  1452   1453 def convert_quoted_value(s, index, needed, end, base, fn):  1454   1455     """  1456     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1457     positions, and with the given 'end' indicating the first position after the  1458     end of the actual string content.  1459   1460     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1461     to convert the value to an appropriate type.  1462     """  1463   1464     s = s[index:min(index+needed, end)]  1465   1466     # Not a complete occurrence.  1467   1468     if len(s) < needed:  1469         return s  1470   1471     # Test for a well-formed value.  1472   1473     try:  1474         first = base == 8 and 1 or 2  1475         value = int(s[first:needed], base)  1476     except ValueError:  1477         return s  1478     else:  1479         return fn(value)  1480   1481 # Attribute chain decoding.  1482   1483 def get_attrnames(attrnames):  1484   1485     """  1486     Split the qualified attribute chain 'attrnames' into its components,  1487     handling special attributes starting with "#" that indicate type  1488     conformance.  1489     """  1490   1491     if attrnames.startswith("#"):  1492         return [attrnames]  1493     else:  1494         return attrnames.split(".")  1495   1496 def get_name_path(path, name):  1497   1498     "Return a suitable qualified name from the given 'path' and 'name'."  1499   1500     if "." in name:  1501         return name  1502     else:  1503         return "%s.%s" % (path, name)  1504   1505 # Usage-related functions.  1506   1507 def get_types_for_usage(attrnames, objects):  1508   1509     """  1510     Identify the types that can support the given 'attrnames', using the  1511     given 'objects' as the catalogue of type details.  1512     """  1513   1514     types = []  1515     for name, _attrnames in objects.items():  1516         if set(attrnames).issubset(_attrnames):  1517             types.append(name)  1518     return types  1519   1520 def get_invoked_attributes(usage):  1521   1522     "Obtain invoked attribute from the given 'usage'."  1523   1524     invoked = []  1525     if usage:  1526         for attrname, invocation, assignment in usage:  1527             if invocation:  1528                 invoked.append(attrname)  1529     return invoked  1530   1531 def get_assigned_attributes(usage):  1532   1533     "Obtain assigned attribute from the given 'usage'."  1534   1535     assigned = []  1536     if usage:  1537         for attrname, invocation, assignment in usage:  1538             if assignment:  1539                 assigned.append(attrname)  1540     return assigned  1541   1542 # Type and module functions.  1543 # NOTE: This makes assumptions about the __builtins__ structure.  1544   1545 def get_builtin_module(name):  1546   1547     "Return the module name containing the given type 'name'."  1548   1549     if name == "string":  1550         modname = "str"  1551     elif name == "utf8string":  1552         modname = "unicode"  1553     elif name == "NoneType":  1554         modname = "none"  1555     else:  1556         modname = name  1557   1558     return "__builtins__.%s" % modname  1559   1560 def get_builtin_type(name):  1561   1562     "Return the type name provided by the given Python value 'name'."  1563   1564     if name == "str":  1565         return "string"  1566     elif name == "unicode":  1567         return "utf8string"  1568     else:  1569         return name  1570   1571 def get_builtin_class(name):  1572   1573     "Return the full name of the built-in class having the given 'name'."  1574   1575     typename = get_builtin_type(name)  1576     module = get_builtin_module(typename)  1577     return "%s.%s" % (module, typename)  1578   1579 # Useful data.  1580   1581 predefined_constants = "False", "None", "NotImplemented", "True"  1582   1583 unary_operator_functions = {  1584   1585     # Unary operations.  1586   1587     "Invert" : "invert",  1588     "UnaryAdd" : "pos",  1589     "UnarySub" : "neg",  1590     }  1591   1592 operator_functions = {  1593   1594     # Fundamental operations.  1595   1596     "is" : "is_",  1597     "is not" : "is_not",  1598   1599     # Binary operations.  1600   1601     "in" : "in_",  1602     "not in" : "not_in",  1603     "Add" : "add",  1604     "Bitand" : "and_",  1605     "Bitor" : "or_",  1606     "Bitxor" : "xor",  1607     "Div" : "div",  1608     "FloorDiv" : "floordiv",  1609     "LeftShift" : "lshift",  1610     "Mod" : "mod",  1611     "Mul" : "mul",  1612     "Power" : "pow",  1613     "RightShift" : "rshift",  1614     "Sub" : "sub",  1615   1616     # Augmented assignment.  1617   1618     "+=" : "iadd",  1619     "-=" : "isub",  1620     "*=" : "imul",  1621     "/=" : "idiv",  1622     "//=" : "ifloordiv",  1623     "%=" : "imod",  1624     "**=" : "ipow",  1625     "<<=" : "ilshift",  1626     ">>=" : "irshift",  1627     "&=" : "iand",  1628     "^=" : "ixor",  1629     "|=" : "ior",  1630   1631     # Comparisons.  1632   1633     "==" : "eq",  1634     "!=" : "ne",  1635     "<" : "lt",  1636     "<=" : "le",  1637     ">=" : "ge",  1638     ">" : "gt",  1639     }  1640   1641 operator_functions.update(unary_operator_functions)  1642   1643 # vim: tabstop=4 expandtab shiftwidth=4