Lichen

common.py

786:7cfb3ba08460
2017-03-26 Paul Boddie Moved literal instantiator definitions to the templates, reordering the parameters and employing bare arguments.
     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     def get_parent_path(self):   190         return ".".join([self.name] + self.namespace_path[:-1])   191    192     # Namespace management.   193    194     def enter_namespace(self, name):   195    196         "Enter the namespace having the given 'name'."   197    198         self.namespace_path.append(name)   199    200     def exit_namespace(self):   201    202         "Exit the current namespace."   203    204         self.namespace_path.pop()   205    206     # Constant reference naming.   207    208     def get_constant_name(self, value, value_type, encoding=None):   209    210         """   211         Add a new constant to the current namespace for 'value' with   212         'value_type'.   213         """   214    215         path = self.get_namespace_path()   216         init_item(self.constants, path, dict)   217         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   218    219     # Literal reference naming.   220    221     def get_literal_name(self):   222    223         "Add a new literal to the current namespace."   224    225         path = self.get_namespace_path()   226         init_item(self.literals, path, lambda: 0)   227         return "$C%d" % self.literals[path]   228    229     def next_literal(self):   230         self.literals[self.get_namespace_path()] += 1   231    232     # Temporary variable naming.   233    234     def get_temporary_name(self):   235         path = self.get_namespace_path()   236         init_item(self.temp, path, lambda: 0)   237         return "$t%d" % self.temp[path]   238    239     def next_temporary(self):   240         self.temp[self.get_namespace_path()] += 1   241    242     # Arbitrary function naming.   243    244     def get_lambda_name(self):   245         path = self.get_namespace_path()   246         init_item(self.lambdas, path, lambda: 0)   247         name = "$l%d" % self.lambdas[path]   248         self.lambdas[path] += 1   249         return name   250    251     def reset_lambdas(self):   252         self.lambdas = {}   253    254     # Constant and literal recording.   255    256     def get_constant_value(self, value, literals=None):   257    258         """   259         Encode the 'value' if appropriate, returning a value, a typename and any   260         encoding.   261         """   262    263         if isinstance(value, unicode):   264             return value.encode("utf-8"), "unicode", self.encoding   265    266         # Attempt to convert plain strings to text.   267    268         elif isinstance(value, str) and self.encoding:   269             try:   270                 return get_string_details(literals, self.encoding)   271             except UnicodeDecodeError:   272                 pass   273    274         return value, value.__class__.__name__, None   275    276     def get_constant_reference(self, ref, value, encoding=None):   277    278         """   279         Return a constant reference for the given 'ref' type and 'value', with   280         the optional 'encoding' applying to text values.   281         """   282    283         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   284    285         # Return a reference for the constant.   286    287         objpath = self.get_object_path(constant_name)   288         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   289    290         # Record the value and type for the constant.   291    292         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   293         return name_ref   294    295     def reserve_constant(self, objpath, value, origin, encoding=None):   296    297         """   298         Reserve a constant within 'objpath' with the given 'value' and having a   299         type with the given 'origin', with the optional 'encoding' applying to   300         text values.   301         """   302    303         constant_name = self.get_constant_name(value, origin)   304         objpath = self.get_object_path(constant_name)   305         self._reserve_constant(objpath, value, origin, encoding)   306    307     def _reserve_constant(self, objpath, value, origin, encoding):   308    309         """   310         Store a constant for 'objpath' with the given 'value' and 'origin', with   311         the optional 'encoding' applying to text values.   312         """   313    314         self.constant_values[objpath] = value, origin, encoding   315    316     def get_literal_reference(self, name, ref, items, cls):   317    318         """   319         Return a literal reference for the given type 'name', literal 'ref',   320         node 'items' and employing the given 'cls' as the class of the returned   321         reference object.   322         """   323    324         # Construct an invocation using the items as arguments.   325    326         typename = "$L%s" % name   327    328         invocation = compiler.ast.CallFunc(   329             compiler.ast.Name(typename),   330             items   331             )   332    333         # Get a name for the actual literal.   334    335         instname = self.get_literal_name()   336         self.next_literal()   337    338         # Record the type for the literal.   339    340         objpath = self.get_object_path(instname)   341         self.literal_types[objpath] = ref.get_origin()   342    343         # Return a wrapper for the invocation exposing the items.   344    345         return cls(   346             instname,   347             ref.instance_of(),   348             self.process_structure_node(invocation),   349             invocation.args   350             )   351    352     # Node handling.   353    354     def process_structure(self, node):   355    356         """   357         Within the given 'node', process the program structure.   358    359         During inspection, this will process global declarations, adjusting the   360         module namespace, and import statements, building a module dependency   361         hierarchy.   362    363         During translation, this will consult deduced program information and   364         output translated code.   365         """   366    367         l = []   368         for n in node.getChildNodes():   369             l.append(self.process_structure_node(n))   370         return l   371    372     def process_augassign_node(self, n):   373    374         "Process the given augmented assignment node 'n'."   375    376         op = operator_functions[n.op]   377    378         if isinstance(n.node, compiler.ast.Getattr):   379             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   380         elif isinstance(n.node, compiler.ast.Name):   381             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   382         else:   383             target = n.node   384    385         assignment = compiler.ast.Assign(   386             [target],   387             compiler.ast.CallFunc(   388                 compiler.ast.Name("$op%s" % op),   389                 [n.node, n.expr]))   390    391         return self.process_structure_node(assignment)   392    393     def process_assignment_for_object(self, original_name, source):   394    395         """   396         Return an assignment operation making 'original_name' refer to the given   397         'source'.   398         """   399    400         assignment = compiler.ast.Assign(   401             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   402             source   403             )   404    405         return self.process_structure_node(assignment)   406    407     def process_assignment_node_items(self, n, expr):   408    409         """   410         Process the given assignment node 'n' whose children are to be assigned   411         items of 'expr'.   412         """   413    414         name_ref = self.process_structure_node(expr)   415    416         # Either unpack the items and present them directly to each assignment   417         # node.   418    419         if isinstance(name_ref, LiteralSequenceRef) and \   420            self.process_literal_sequence_items(n, name_ref):   421    422             pass   423    424         # Or have the assignment nodes access each item via the sequence API.   425    426         else:   427             self.process_assignment_node_items_by_position(n, expr, name_ref)   428    429     def process_assignment_node_items_by_position(self, n, expr, name_ref):   430    431         """   432         Process the given sequence assignment node 'n', converting the node to   433         the separate assignment of each target using positional access on a   434         temporary variable representing the sequence. Use 'expr' as the assigned   435         value and 'name_ref' as the reference providing any existing temporary   436         variable.   437         """   438    439         assignments = []   440    441         # Employ existing names to access the sequence.   442         # Literal sequences do not provide names of accessible objects.   443    444         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   445             temp = name_ref.name   446    447         # For other expressions, create a temporary name to reference the items.   448    449         else:   450             temp = self.get_temporary_name()   451             self.next_temporary()   452    453             assignments.append(   454                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   455                 )   456    457         # Assign the items to the target nodes.   458    459         for i, node in enumerate(n.nodes):   460             assignments.append(   461                 compiler.ast.Assign([node], compiler.ast.Subscript(   462                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   463                 )   464    465         return self.process_structure_node(compiler.ast.Stmt(assignments))   466    467     def process_literal_sequence_items(self, n, name_ref):   468    469         """   470         Process the given assignment node 'n', obtaining from the given   471         'name_ref' the items to be assigned to the assignment targets.   472    473         Return whether this method was able to process the assignment node as   474         a sequence of direct assignments.   475         """   476    477         if len(n.nodes) == len(name_ref.items):   478             assigned_names, count = get_names_from_nodes(n.nodes)   479             accessed_names, _count = get_names_from_nodes(name_ref.items)   480    481             # Only assign directly between items if all assigned names are   482             # plain names (not attribute assignments), and if the assigned names   483             # do not appear in the accessed names.   484    485             if len(assigned_names) == count and \   486                not assigned_names.intersection(accessed_names):   487    488                 for node, item in zip(n.nodes, name_ref.items):   489                     self.process_assignment_node(node, item)   490    491                 return True   492    493             # Otherwise, use the position-based mechanism to obtain values.   494    495             else:   496                 return False   497         else:   498             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   499                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   500    501     def process_compare_node(self, n):   502    503         """   504         Process the given comparison node 'n', converting an operator sequence   505         from...   506    507         <expr1> <op1> <expr2> <op2> <expr3>   508    509         ...to...   510    511         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   512         """   513    514         invocations = []   515         last = n.expr   516    517         for op, op_node in n.ops:   518             op = operator_functions.get(op)   519    520             invocations.append(compiler.ast.CallFunc(   521                 compiler.ast.Name("$op%s" % op),   522                 [last, op_node]))   523    524             last = op_node   525    526         if len(invocations) > 1:   527             result = compiler.ast.And(invocations)   528         else:   529             result = invocations[0]   530    531         return self.process_structure_node(result)   532    533     def process_dict_node(self, node):   534    535         """   536         Process the given dictionary 'node', returning a list of (key, value)   537         tuples.   538         """   539    540         l = []   541         for key, value in node.items:   542             l.append((   543                 self.process_structure_node(key),   544                 self.process_structure_node(value)))   545         return l   546    547     def process_for_node(self, n):   548    549         """   550         Generate attribute accesses for {n.list}.__iter__ and the next method on   551         the iterator, producing a replacement node for the original.   552         """   553    554         t0 = self.get_temporary_name()   555         self.next_temporary()   556         t1 = self.get_temporary_name()   557         self.next_temporary()   558    559         node = compiler.ast.Stmt([   560    561             # <t0> = {n.list}   562             # <t1> = <t0>.__iter__()   563    564             compiler.ast.Assign(   565                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   566                 n.list),   567    568             compiler.ast.Assign(   569                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   570                 compiler.ast.CallFunc(   571                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   572                     [])),   573    574             # try:   575             #     while True:   576             #         <var>... = <t1>.next()   577             #         ...   578             # except StopIteration:   579             #     pass   580    581             compiler.ast.TryExcept(   582                 compiler.ast.While(   583                     compiler.ast.Name("True"),   584                     compiler.ast.Stmt([   585                         compiler.ast.Assign(   586                             [n.assign],   587                             compiler.ast.CallFunc(   588                                 compiler.ast.Getattr(compiler.ast.Name(t1), "next"),   589                                 []   590                                 )),   591                         n.body]),   592                     None),   593                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   594                 None)   595             ])   596    597         self.process_structure_node(node)   598    599     def process_literal_sequence_node(self, n, name, ref, cls):   600    601         """   602         Process the given literal sequence node 'n' as a function invocation,   603         with 'name' indicating the type of the sequence, and 'ref' being a   604         reference to the type. The 'cls' is used to instantiate a suitable name   605         reference.   606         """   607    608         if name == "dict":   609             items = []   610             for key, value in n.items:   611                 items.append(compiler.ast.Tuple([key, value]))   612         else: # name in ("list", "tuple"):   613             items = n.nodes   614    615         return self.get_literal_reference(name, ref, items, cls)   616    617     def process_operator_node(self, n):   618    619         """   620         Process the given operator node 'n' as an operator function invocation.   621         """   622    623         op = operator_functions[n.__class__.__name__]   624         invocation = compiler.ast.CallFunc(   625             compiler.ast.Name("$op%s" % op),   626             list(n.getChildNodes())   627             )   628         return self.process_structure_node(invocation)   629    630     def process_print_node(self, n):   631    632         """   633         Process the given print node 'n' as an invocation on a stream of the   634         form...   635    636         $print(dest, args, nl)   637    638         The special function name will be translated elsewhere.   639         """   640    641         nl = isinstance(n, compiler.ast.Printnl)   642         invocation = compiler.ast.CallFunc(   643             compiler.ast.Name("$print"),   644             [n.dest or compiler.ast.Name("None"),   645              compiler.ast.List(list(n.nodes)),   646              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   647             )   648         return self.process_structure_node(invocation)   649    650     def process_slice_node(self, n, expr=None):   651    652         """   653         Process the given slice node 'n' as a method invocation.   654         """   655    656         if n.flags == "OP_ASSIGN": op = "__setslice__"   657         elif n.flags == "OP_DELETE": op = "__delslice__"   658         else: op = "__getslice__"   659    660         invocation = compiler.ast.CallFunc(   661             compiler.ast.Getattr(n.expr, op),   662             [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   663                 (expr and [expr] or [])   664             )   665    666         # Fix parse tree structure.   667    668         if op == "__delslice__":   669             invocation = compiler.ast.Discard(invocation)   670    671         return self.process_structure_node(invocation)   672    673     def process_sliceobj_node(self, n):   674    675         """   676         Process the given slice object node 'n' as a slice constructor.   677         """   678    679         op = "slice"   680         invocation = compiler.ast.CallFunc(   681             compiler.ast.Name("$op%s" % op),   682             n.nodes   683             )   684         return self.process_structure_node(invocation)   685    686     def process_subscript_node(self, n, expr=None):   687    688         """   689         Process the given subscript node 'n' as a method invocation.   690         """   691    692         if n.flags == "OP_ASSIGN": op = "__setitem__"   693         elif n.flags == "OP_DELETE": op = "__delitem__"   694         else: op = "__getitem__"   695    696         invocation = compiler.ast.CallFunc(   697             compiler.ast.Getattr(n.expr, op),   698             list(n.subs) + (expr and [expr] or [])   699             )   700    701         # Fix parse tree structure.   702    703         if op == "__delitem__":   704             invocation = compiler.ast.Discard(invocation)   705    706         return self.process_structure_node(invocation)   707    708     def process_attribute_chain(self, n):   709    710         """   711         Process the given attribute access node 'n'. Return a reference   712         describing the expression.   713         """   714    715         # AssAttr/Getattr are nested with the outermost access being the last   716         # access in any chain.   717    718         self.attrs.insert(0, n.attrname)   719         attrs = self.attrs   720    721         # Break attribute chains where non-access nodes are found.   722    723         if not self.have_access_expression(n):   724             self.reset_attribute_chain()   725    726         # Descend into the expression, extending backwards any existing chain,   727         # or building another for the expression.   728    729         name_ref = self.process_structure_node(n.expr)   730    731         # Restore chain information applying to this node.   732    733         if not self.have_access_expression(n):   734             self.restore_attribute_chain(attrs)   735    736         # Return immediately if the expression was another access and thus a   737         # continuation backwards along the chain. The above processing will   738         # have followed the chain all the way to its conclusion.   739    740         if self.have_access_expression(n):   741             del self.attrs[0]   742    743         return name_ref   744    745     # Attribute chain handling.   746    747     def reset_attribute_chain(self):   748    749         "Reset the attribute chain for a subexpression of an attribute access."   750    751         self.attrs = []   752         self.chain_assignment.append(self.in_assignment)   753         self.chain_invocation.append(self.in_invocation)   754         self.in_assignment = None   755         self.in_invocation = None   756    757     def restore_attribute_chain(self, attrs):   758    759         "Restore the attribute chain for an attribute access."   760    761         self.attrs = attrs   762         self.in_assignment = self.chain_assignment.pop()   763         self.in_invocation = self.chain_invocation.pop()   764    765     def have_access_expression(self, node):   766    767         "Return whether the expression associated with 'node' is Getattr."   768    769         return isinstance(node.expr, compiler.ast.Getattr)   770    771     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   772    773         """   774         Return the name to be used for attribute usage observations involving   775         the given 'name' in the current namespace.   776    777         If the name is being used outside a function, and if 'name_ref' is   778         given and indicates a global or if 'is_global' is specified as a true   779         value, a path featuring the name in the global namespace is returned.   780         Otherwise, a path computed using the current namespace and the given   781         name is returned.   782    783         The intention of this method is to provide a suitably-qualified name   784         that can be tracked across namespaces. Where globals are being   785         referenced in class namespaces, they should be referenced using their   786         path within the module, not using a path within each class.   787    788         It may not be possible to identify a global within a function at the   789         time of inspection (since a global may appear later in a file).   790         Consequently, globals are identified by their local name rather than   791         their module-qualified path.   792         """   793    794         # For functions, use the appropriate local names.   795    796         if self.in_function:   797             return name   798    799         # For global names outside functions, use a global name.   800    801         elif is_global or name_ref and name_ref.is_global_name():   802             return self.get_global_path(name)   803    804         # Otherwise, establish a name in the current namespace.   805    806         else:   807             return self.get_object_path(name)   808    809     def get_path_for_access(self):   810    811         "Outside functions, register accesses at the module level."   812    813         if not self.in_function:   814             return self.name   815         else:   816             return self.get_namespace_path()   817    818     def get_module_name(self, node):   819    820         """   821         Using the given From 'node' in this module, calculate any relative import   822         information, returning a tuple containing a module to import along with any   823         names to import based on the node's name information.   824    825         Where the returned module is given as None, whole module imports should   826         be performed for the returned modules using the returned names.   827         """   828    829         # Absolute import.   830    831         if node.level == 0:   832             return node.modname, node.names   833    834         # Relative to an ancestor of this module.   835    836         else:   837             path = self.name.split(".")   838             level = node.level   839    840             # Relative imports treat package roots as submodules.   841    842             if split(self.filename)[-1] == "__init__.py":   843                 level -= 1   844    845             if level > len(path):   846                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   847                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   848    849             basename = ".".join(path[:len(path)-level])   850    851         # Name imports from a module.   852    853         if node.modname:   854             return "%s.%s" % (basename, node.modname), node.names   855    856         # Relative whole module imports.   857    858         else:   859             return basename, node.names   860    861 def get_argnames(args):   862    863     """   864     Return a list of all names provided by 'args'. Since tuples may be   865     employed, the arguments are traversed depth-first.   866     """   867    868     l = []   869     for arg in args:   870         if isinstance(arg, tuple):   871             l += get_argnames(arg)   872         else:   873             l.append(arg)   874     return l   875    876 def get_names_from_nodes(nodes):   877    878     """   879     Return the names employed in the given 'nodes' along with the number of   880     nodes excluding sequences.   881     """   882    883     names = set()   884     count = 0   885    886     for node in nodes:   887    888         # Add names and count them.   889    890         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   891             names.add(node.name)   892             count += 1   893    894         # Add names from sequences and incorporate their counts.   895    896         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   897                                compiler.ast.List, compiler.ast.Set,   898                                compiler.ast.Tuple)):   899             _names, _count = get_names_from_nodes(node.nodes)   900             names.update(_names)   901             count += _count   902    903         # Count non-name, non-sequence nodes.   904    905         else:   906             count += 1   907    908     return names, count   909    910 # Result classes.   911    912 class InstructionSequence:   913    914     "A generic sequence of instructions."   915    916     def __init__(self, instructions):   917         self.instructions = instructions   918    919     def get_value_instruction(self):   920         return self.instructions[-1]   921    922     def get_init_instructions(self):   923         return self.instructions[:-1]   924    925 # Dictionary utilities.   926    927 def init_item(d, key, fn):   928    929     """   930     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   931     value where no entry already exists.   932     """   933    934     if not d.has_key(key):   935         d[key] = fn()   936     return d[key]   937    938 def dict_for_keys(d, keys):   939    940     "Return a new dictionary containing entries from 'd' for the given 'keys'."   941    942     nd = {}   943     for key in keys:   944         if d.has_key(key):   945             nd[key] = d[key]   946     return nd   947    948 def make_key(s):   949    950     "Make sequence 's' into a tuple-based key, first sorting its contents."   951    952     l = list(s)   953     l.sort()   954     return tuple(l)   955    956 def add_counter_item(d, key):   957    958     """   959     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   960     maintaining a mapping of keys to their order of insertion.   961     """   962    963     if not d.has_key(key):   964         d[key] = len(d.keys())   965     return d[key]    966    967 def remove_items(d1, d2):   968    969     "Remove from 'd1' all items from 'd2'."   970    971     for key in d2.keys():   972         if d1.has_key(key):   973             del d1[key]   974    975 # Set utilities.   976    977 def first(s):   978     return list(s)[0]   979    980 def same(s1, s2):   981     return set(s1) == set(s2)   982    983 def order_dependencies(all_depends):   984    985     """   986     Produce a dependency ordering for the 'all_depends' mapping. This mapping   987     has the form "A depends on B, C...". The result will order A, B, C, and so   988     on.   989     """   990    991     usage = init_reverse_dependencies(all_depends)   992    993     # Produce an ordering by obtaining exposed items (required by items already   994     # processed) and putting them at the start of the list.   995    996     ordered = []   997    998     while usage:   999         have_next = False  1000   1001         for key, n in usage.items():  1002   1003             # Add items needed by no other items to the ordering.  1004   1005             if not n:  1006                 remove_dependency(key, all_depends, usage, ordered)  1007                 have_next = True  1008   1009         if not have_next:  1010             raise ValueError, usage  1011   1012     return ordered  1013   1014 def order_dependencies_partial(all_depends):  1015   1016     """  1017     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1018     has the form "A depends on B, C...". The result will order A, B, C, and so  1019     on. Where cycles exist, they will be broken and a partial ordering returned.  1020     """  1021   1022     usage = init_reverse_dependencies(all_depends)  1023   1024     # Duplicate the dependencies for subsequent modification.  1025   1026     new_depends = {}  1027     for key, values in all_depends.items():  1028         new_depends[key] = set(values)  1029   1030     all_depends = new_depends  1031   1032     # Produce an ordering by obtaining exposed items (required by items already  1033     # processed) and putting them at the start of the list.  1034   1035     ordered = []  1036   1037     while usage:  1038         least = None  1039         least_key = None  1040   1041         for key, n in usage.items():  1042   1043             # Add items needed by no other items to the ordering.  1044   1045             if not n:  1046                 remove_dependency(key, all_depends, usage, ordered)  1047                 least = 0  1048   1049             # When breaking cycles, note the least used items.  1050   1051             elif least is None or len(n) < least:  1052                 least_key = key  1053                 least = len(n)  1054   1055         if least:  1056             transfer_dependencies(least_key, all_depends, usage, ordered)  1057   1058     return ordered  1059   1060 def init_reverse_dependencies(all_depends):  1061   1062     """  1063     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1064     record the reverse dependencies, making a mapping of the form  1065     "B is needed by A", "C is needed by A", and so on.  1066     """  1067   1068     usage = {}  1069   1070     # Record path-based dependencies.  1071   1072     for key in all_depends.keys():  1073         usage[key] = set()  1074   1075     for key, depends in all_depends.items():  1076         for depend in depends:  1077             init_item(usage, depend, set)  1078             usage[depend].add(key)  1079   1080     return usage  1081   1082 def transfer_dependencies(key, all_depends, usage, ordered):  1083   1084     """  1085     Transfer items needed by 'key' to those items needing 'key', found using  1086     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1087     collection of dependencies.  1088   1089     If "A is needed by X" and "B is needed by A", then transferring items needed  1090     by A will cause "B is needed by X" to be recorded as a consequence.  1091   1092     Transferring items also needs to occur in the reverse mapping, so that  1093     "A needs B" and "X needs A", then the consequence must be recorded as  1094     "X needs B".  1095     """  1096   1097     ordered.insert(0, key)  1098   1099     needing = usage[key]                        # A is needed by X  1100     needed = all_depends.get(key)               # A needs B  1101   1102     if needing:  1103         for depend in needing:  1104             l = all_depends.get(depend)  1105             if not l:  1106                 continue  1107   1108             l.remove(key)                       # X needs (A)  1109   1110             if needed:  1111                 l.update(needed)                # X needs B...  1112   1113                 # Prevent self references.  1114   1115                 if depend in needed:  1116                     l.remove(depend)  1117   1118     if needed:  1119         for depend in needed:  1120             l = usage.get(depend)  1121             if not l:  1122                 continue  1123   1124             l.remove(key)                       # B is needed by (A)  1125             l.update(needing)                   # B is needed by X...  1126   1127             # Prevent self references.  1128   1129             if depend in needing:  1130                 l.remove(depend)  1131   1132     if needed:  1133         del all_depends[key]  1134     del usage[key]  1135   1136 def remove_dependency(key, all_depends, usage, ordered):  1137   1138     """  1139     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1140     'ordered' collection of dependencies.  1141   1142     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1143     upon removing A from 'usage', the outcome is that all keys needing A will  1144     have A removed from their 'usage' records.  1145   1146     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1147     as a consequence.  1148     """  1149   1150     ordered.insert(0, key)  1151   1152     depends = all_depends.get(key)  1153   1154     # Reduce usage of the referenced items.  1155   1156     if depends:  1157         for depend in depends:  1158             usage[depend].remove(key)  1159   1160     del usage[key]  1161   1162 # General input/output.  1163   1164 def readfile(filename):  1165   1166     "Return the contents of 'filename'."  1167   1168     f = open(filename)  1169     try:  1170         return f.read()  1171     finally:  1172         f.close()  1173   1174 def writefile(filename, s):  1175   1176     "Write to 'filename' the string 's'."  1177   1178     f = open(filename, "w")  1179     try:  1180         f.write(s)  1181     finally:  1182         f.close()  1183   1184 # General encoding.  1185   1186 def sorted_output(x):  1187   1188     "Sort sequence 'x' and return a string with commas separating the values."  1189   1190     x = map(str, x)  1191     x.sort()  1192     return ", ".join(x)  1193   1194 def get_string_details(literals, encoding):  1195   1196     """  1197     Determine whether 'literals' represent Unicode strings or byte strings,  1198     using 'encoding' to reproduce byte sequences.  1199   1200     Each literal is the full program representation including prefix and quotes  1201     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1202     string needs to be translated back to its original encoding.  1203   1204     Return a single encoded literal value, a type name, and the original  1205     encoding as a tuple.  1206     """  1207   1208     typename = "unicode"  1209   1210     l = []  1211   1212     for s in literals:  1213         out, _typename = get_literal_details(s)  1214         if _typename == "str":  1215             typename = "str"  1216         l.append(out)  1217   1218     out = "".join(l)  1219   1220     # For Unicode values, convert to the UTF-8 program representation.  1221   1222     if typename == "unicode":  1223         return out.encode("utf-8"), typename, encoding  1224   1225     # For byte string values, convert back to the original encoding.  1226   1227     else:  1228         return out.encode(encoding), typename, encoding  1229   1230 def get_literal_details(s):  1231   1232     """  1233     Determine whether 's' represents a Unicode string or a byte string, where  1234     's' contains the full program representation of a literal including prefix  1235     and quotes, recoded by the parser to UTF-8.  1236   1237     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1238     and byte or Unicode values starting with <backslash><octal digit> or  1239     <backslash>x.  1240   1241     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1242     to be considered as Unicode values. Otherwise, they produce byte values and  1243     cause unprefixed strings to be considered as byte strings.  1244   1245     Literals prefixed with "r" do not have their backslash-encoded values  1246     converted unless also prefixed with "u", in which case only the above value  1247     formats are converted, not any of the other special sequences for things  1248     like newlines.  1249   1250     Return the literal value as a Unicode object together with the appropriate  1251     type name in a tuple.  1252     """  1253   1254     l = []  1255   1256     # Identify the quote character and use it to identify the prefix.  1257   1258     quote_type = s[-1]  1259     prefix_end = s.find(quote_type)  1260     prefix = s[:prefix_end].lower()  1261   1262     if prefix not in ("", "b", "br", "r", "u", "ur"):  1263         raise ValueError, "String literal does not have a supported prefix: %s" % s  1264   1265     if "b" in prefix:  1266         typename = "str"  1267     else:  1268         typename = "unicode"  1269   1270     # Identify triple quotes or single quotes.  1271   1272     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1273         quote = s[prefix_end:prefix_end+3]  1274         current = prefix_end + 3  1275         end = len(s) - 3  1276     else:  1277         quote = s[prefix_end]  1278         current = prefix_end + 1  1279         end = len(s) - 1  1280   1281     # Conversions of some quoted values.  1282   1283     searches = {  1284         "u" : (6, 16),  1285         "U" : (10, 16),  1286         "x" : (4, 16),  1287         }  1288   1289     octal_digits = map(str, range(0, 8))  1290   1291     # Translations of some quoted values.  1292   1293     escaped = {  1294         "\\" : "\\", "'" : "'", '"' : '"',  1295         "a" : "\a", "b" : "\b", "f" : "\f",  1296         "n" : "\n", "r" : "\r", "t" : "\t",  1297         }  1298   1299     while current < end:  1300   1301         # Look for quoted values.  1302   1303         index = s.find("\\", current)  1304         if index == -1 or index + 1 == end:  1305             l.append(s[current:end])  1306             break  1307   1308         # Add the preceding text.  1309   1310         l.append(s[current:index])  1311   1312         # Handle quoted text.  1313   1314         term = s[index+1]  1315   1316         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1317         # produce Unicode values.  1318   1319         if typename == "unicode" and (  1320             term in ("u", "U") or   1321             "u" in prefix and (term == "x" or term in octal_digits)):  1322   1323             needed, base = searches.get(term, (4, 8))  1324             value = convert_quoted_value(s, index, needed, end, base, unichr)  1325             l.append(value)  1326             current = index + needed  1327   1328         # Add raw byte values, changing the string type.  1329   1330         elif "r" not in prefix and (  1331              term == "x" or term in octal_digits):  1332   1333             needed, base = searches.get(term, (4, 8))  1334             value = convert_quoted_value(s, index, needed, end, base, chr)  1335             l.append(value)  1336             typename = "str"  1337             current = index + needed  1338   1339         # Add other escaped values.  1340   1341         elif "r" not in prefix and escaped.has_key(term):  1342             l.append(escaped[term])  1343             current = index + 2  1344   1345         # Add other text as found.  1346   1347         else:  1348             l.append(s[index:index+2])  1349             current = index + 2  1350   1351     # Collect the components into a single Unicode object. Since the literal  1352     # text was already in UTF-8 form, interpret plain strings as UTF-8  1353     # sequences.  1354   1355     out = []  1356   1357     for value in l:  1358         if isinstance(value, unicode):  1359             out.append(value)  1360         else:  1361             out.append(unicode(value, "utf-8"))  1362   1363     return "".join(out), typename  1364   1365 def convert_quoted_value(s, index, needed, end, base, fn):  1366   1367     """  1368     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1369     positions, and with the given 'end' indicating the first position after the  1370     end of the actual string content.  1371   1372     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1373     to convert the value to an appropriate type.  1374     """  1375   1376     s = s[index:min(index+needed, end)]  1377   1378     # Not a complete occurrence.  1379   1380     if len(s) < needed:  1381         return s  1382   1383     # Test for a well-formed value.  1384   1385     try:  1386         first = base == 8 and 1 or 2  1387         value = int(s[first:needed], base)  1388     except ValueError:  1389         return s  1390     else:  1391         return fn(value)  1392   1393 # Attribute chain decoding.  1394   1395 def get_attrnames(attrnames):  1396   1397     """  1398     Split the qualified attribute chain 'attrnames' into its components,  1399     handling special attributes starting with "#" that indicate type  1400     conformance.  1401     """  1402   1403     if attrnames.startswith("#"):  1404         return [attrnames]  1405     else:  1406         return attrnames.split(".")  1407   1408 def get_attrname_from_location(location):  1409   1410     """  1411     Extract the first attribute from the attribute names employed in a  1412     'location'.  1413     """  1414   1415     path, name, attrnames, access = location  1416     if not attrnames:  1417         return attrnames  1418     return get_attrnames(attrnames)[0]  1419   1420 def get_name_path(path, name):  1421   1422     "Return a suitable qualified name from the given 'path' and 'name'."  1423   1424     if "." in name:  1425         return name  1426     else:  1427         return "%s.%s" % (path, name)  1428   1429 # Usage-related functions.  1430   1431 def get_types_for_usage(attrnames, objects):  1432   1433     """  1434     Identify the types that can support the given 'attrnames', using the  1435     given 'objects' as the catalogue of type details.  1436     """  1437   1438     types = []  1439     for name, _attrnames in objects.items():  1440         if set(attrnames).issubset(_attrnames):  1441             types.append(name)  1442     return types  1443   1444 def get_invoked_attributes(usage):  1445   1446     "Obtain invoked attribute from the given 'usage'."  1447   1448     invoked = []  1449     if usage:  1450         for attrname, invocation, assignment in usage:  1451             if invocation:  1452                 invoked.append(attrname)  1453     return invoked  1454   1455 def get_assigned_attributes(usage):  1456   1457     "Obtain assigned attribute from the given 'usage'."  1458   1459     assigned = []  1460     if usage:  1461         for attrname, invocation, assignment in usage:  1462             if assignment:  1463                 assigned.append(attrname)  1464     return assigned  1465   1466 # Type and module functions.  1467 # NOTE: This makes assumptions about the __builtins__ structure.  1468   1469 def get_builtin_module(name):  1470   1471     "Return the module name containing the given type 'name'."  1472   1473     if name == "string":  1474         modname = "str"  1475     elif name == "utf8string":  1476         modname = "unicode"  1477     elif name == "NoneType":  1478         modname = "none"  1479     else:  1480         modname = name  1481   1482     return "__builtins__.%s" % modname  1483   1484 def get_builtin_type(name):  1485   1486     "Return the type name provided by the given Python value 'name'."  1487   1488     if name == "str":  1489         return "string"  1490     elif name == "unicode":  1491         return "utf8string"  1492     else:  1493         return name  1494   1495 def get_builtin_class(name):  1496   1497     "Return the full name of the built-in class having the given 'name'."  1498   1499     typename = get_builtin_type(name)  1500     module = get_builtin_module(typename)  1501     return "%s.%s" % (module, typename)  1502   1503 # Useful data.  1504   1505 predefined_constants = "False", "None", "NotImplemented", "True"  1506   1507 operator_functions = {  1508   1509     # Fundamental operations.  1510   1511     "is" : "is_",  1512     "is not" : "is_not",  1513   1514     # Binary operations.  1515   1516     "in" : "in_",  1517     "not in" : "not_in",  1518     "Add" : "add",  1519     "Bitand" : "and_",  1520     "Bitor" : "or_",  1521     "Bitxor" : "xor",  1522     "Div" : "div",  1523     "FloorDiv" : "floordiv",  1524     "LeftShift" : "lshift",  1525     "Mod" : "mod",  1526     "Mul" : "mul",  1527     "Power" : "pow",  1528     "RightShift" : "rshift",  1529     "Sub" : "sub",  1530   1531     # Unary operations.  1532   1533     "Invert" : "invert",  1534     "UnaryAdd" : "pos",  1535     "UnarySub" : "neg",  1536   1537     # Augmented assignment.  1538   1539     "+=" : "iadd",  1540     "-=" : "isub",  1541     "*=" : "imul",  1542     "/=" : "idiv",  1543     "//=" : "ifloordiv",  1544     "%=" : "imod",  1545     "**=" : "ipow",  1546     "<<=" : "ilshift",  1547     ">>=" : "irshift",  1548     "&=" : "iand",  1549     "^=" : "ixor",  1550     "|=" : "ior",  1551   1552     # Comparisons.  1553   1554     "==" : "eq",  1555     "!=" : "ne",  1556     "<" : "lt",  1557     "<=" : "le",  1558     ">=" : "ge",  1559     ">" : "gt",  1560     }  1561   1562 # vim: tabstop=4 expandtab shiftwidth=4