Lichen

common.py

641:c71722209a3d
2017-03-01 Paul Boddie Added support for preserving attribute and parameter locations, avoiding misidentification issues in existing, preserved object code.
     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.iterators = {}   133         self.temp = {}   134         self.lambdas = {}   135    136         # Constants, literals and values.   137    138         self.constants = {}   139         self.constant_values = {}   140         self.literals = {}   141         self.literal_types = {}   142    143         # Nested namespaces.   144    145         self.namespace_path = []   146         self.in_function = False   147    148         # Retain the assignment value expression and track invocations.   149    150         self.in_assignment = None   151         self.in_invocation = None   152    153         # Attribute chain state management.   154    155         self.attrs = []   156         self.chain_assignment = []   157         self.chain_invocation = []   158    159     def __repr__(self):   160         return "CommonModule(%r, %r)" % (self.name, self.importer)   161    162     def parse_file(self, filename):   163    164         "Parse the file with the given 'filename', initialising attributes."   165    166         self.filename = filename   167    168         # Use the Transformer directly to obtain encoding information.   169    170         t = Transformer()   171         f = open(filename)   172    173         try:   174             self.astnode = t.parsesuite(f.read() + "\n")   175             self.encoding = t.encoding   176         finally:   177             f.close()   178    179     # Module-relative naming.   180    181     def get_global_path(self, name):   182         return "%s.%s" % (self.name, name)   183    184     def get_namespace_path(self):   185         return ".".join([self.name] + self.namespace_path)   186    187     def get_object_path(self, name):   188         return ".".join([self.name] + self.namespace_path + [name])   189    190     def get_parent_path(self):   191         return ".".join([self.name] + self.namespace_path[:-1])   192    193     # Namespace management.   194    195     def enter_namespace(self, name):   196    197         "Enter the namespace having the given 'name'."   198    199         self.namespace_path.append(name)   200    201     def exit_namespace(self):   202    203         "Exit the current namespace."   204    205         self.namespace_path.pop()   206    207     # Constant reference naming.   208    209     def get_constant_name(self, value, value_type, encoding=None):   210    211         """   212         Add a new constant to the current namespace for 'value' with   213         'value_type'.   214         """   215    216         path = self.get_namespace_path()   217         init_item(self.constants, path, dict)   218         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   219    220     # Literal reference naming.   221    222     def get_literal_name(self):   223    224         "Add a new literal to the current namespace."   225    226         path = self.get_namespace_path()   227         init_item(self.literals, path, lambda: 0)   228         return "$C%d" % self.literals[path]   229    230     def next_literal(self):   231         self.literals[self.get_namespace_path()] += 1   232    233     # Temporary iterator naming.   234    235     def get_iterator_path(self):   236         return self.in_function and self.get_namespace_path() or self.name   237    238     def get_iterator_name(self):   239         path = self.get_iterator_path()   240         init_item(self.iterators, path, lambda: 0)   241         return "$i%d" % self.iterators[path]   242    243     def next_iterator(self):   244         self.iterators[self.get_iterator_path()] += 1   245    246     # Temporary variable naming.   247    248     def get_temporary_name(self):   249         path = self.get_namespace_path()   250         init_item(self.temp, path, lambda: 0)   251         return "$t%d" % self.temp[path]   252    253     def next_temporary(self):   254         self.temp[self.get_namespace_path()] += 1   255    256     # Arbitrary function naming.   257    258     def get_lambda_name(self):   259         path = self.get_namespace_path()   260         init_item(self.lambdas, path, lambda: 0)   261         name = "$l%d" % self.lambdas[path]   262         self.lambdas[path] += 1   263         return name   264    265     def reset_lambdas(self):   266         self.lambdas = {}   267    268     # Constant and literal recording.   269    270     def get_constant_value(self, value, literals=None):   271    272         """   273         Encode the 'value' if appropriate, returning a value, a typename and any   274         encoding.   275         """   276    277         if isinstance(value, unicode):   278             return value.encode("utf-8"), "unicode", self.encoding   279    280         # Attempt to convert plain strings to text.   281    282         elif isinstance(value, str) and self.encoding:   283             try:   284                 return get_string_details(literals, self.encoding)   285             except UnicodeDecodeError:   286                 pass   287    288         return value, value.__class__.__name__, None   289    290     def get_constant_reference(self, ref, value, encoding=None):   291    292         """   293         Return a constant reference for the given 'ref' type and 'value', with   294         the optional 'encoding' applying to text values.   295         """   296    297         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   298    299         # Return a reference for the constant.   300    301         objpath = self.get_object_path(constant_name)   302         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   303    304         # Record the value and type for the constant.   305    306         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   307         return name_ref   308    309     def reserve_constant(self, objpath, value, origin, encoding=None):   310    311         """   312         Reserve a constant within 'objpath' with the given 'value' and having a   313         type with the given 'origin', with the optional 'encoding' applying to   314         text values.   315         """   316    317         constant_name = self.get_constant_name(value, origin)   318         objpath = self.get_object_path(constant_name)   319         self._reserve_constant(objpath, value, origin, encoding)   320    321     def _reserve_constant(self, objpath, value, origin, encoding):   322    323         """   324         Store a constant for 'objpath' with the given 'value' and 'origin', with   325         the optional 'encoding' applying to text values.   326         """   327    328         self.constant_values[objpath] = value, origin, encoding   329    330     def get_literal_reference(self, name, ref, items, cls):   331    332         """   333         Return a literal reference for the given type 'name', literal 'ref',   334         node 'items' and employing the given 'cls' as the class of the returned   335         reference object.   336         """   337    338         # Construct an invocation using the items as arguments.   339    340         typename = "$L%s" % name   341    342         invocation = compiler.ast.CallFunc(   343             compiler.ast.Name(typename),   344             items   345             )   346    347         # Get a name for the actual literal.   348    349         instname = self.get_literal_name()   350         self.next_literal()   351    352         # Record the type for the literal.   353    354         objpath = self.get_object_path(instname)   355         self.literal_types[objpath] = ref.get_origin()   356    357         # Return a wrapper for the invocation exposing the items.   358    359         return cls(   360             instname,   361             ref.instance_of(),   362             self.process_structure_node(invocation),   363             invocation.args   364             )   365    366     # Node handling.   367    368     def process_structure(self, node):   369    370         """   371         Within the given 'node', process the program structure.   372    373         During inspection, this will process global declarations, adjusting the   374         module namespace, and import statements, building a module dependency   375         hierarchy.   376    377         During translation, this will consult deduced program information and   378         output translated code.   379         """   380    381         l = []   382         for n in node.getChildNodes():   383             l.append(self.process_structure_node(n))   384         return l   385    386     def process_augassign_node(self, n):   387    388         "Process the given augmented assignment node 'n'."   389    390         op = operator_functions[n.op]   391    392         if isinstance(n.node, compiler.ast.Getattr):   393             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   394         elif isinstance(n.node, compiler.ast.Name):   395             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   396         else:   397             target = n.node   398    399         assignment = compiler.ast.Assign(   400             [target],   401             compiler.ast.CallFunc(   402                 compiler.ast.Name("$op%s" % op),   403                 [n.node, n.expr]))   404    405         return self.process_structure_node(assignment)   406    407     def process_assignment_for_object(self, original_name, source):   408    409         """   410         Return an assignment operation making 'original_name' refer to the given   411         'source'.   412         """   413    414         assignment = compiler.ast.Assign(   415             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   416             source   417             )   418    419         return self.process_structure_node(assignment)   420    421     def process_assignment_node_items(self, n, expr):   422    423         """   424         Process the given assignment node 'n' whose children are to be assigned   425         items of 'expr'.   426         """   427    428         name_ref = self.process_structure_node(expr)   429    430         # Either unpack the items and present them directly to each assignment   431         # node.   432    433         if isinstance(name_ref, LiteralSequenceRef) and \   434            self.process_literal_sequence_items(n, name_ref):   435    436             pass   437    438         # Or have the assignment nodes access each item via the sequence API.   439    440         else:   441             self.process_assignment_node_items_by_position(n, expr, name_ref)   442    443     def process_assignment_node_items_by_position(self, n, expr, name_ref):   444    445         """   446         Process the given sequence assignment node 'n', converting the node to   447         the separate assignment of each target using positional access on a   448         temporary variable representing the sequence. Use 'expr' as the assigned   449         value and 'name_ref' as the reference providing any existing temporary   450         variable.   451         """   452    453         assignments = []   454    455         # Employ existing names to access the sequence.   456         # Literal sequences do not provide names of accessible objects.   457    458         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   459             temp = name_ref.name   460    461         # For other expressions, create a temporary name to reference the items.   462    463         else:   464             temp = self.get_temporary_name()   465             self.next_temporary()   466    467             assignments.append(   468                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   469                 )   470    471         # Assign the items to the target nodes.   472    473         for i, node in enumerate(n.nodes):   474             assignments.append(   475                 compiler.ast.Assign([node], compiler.ast.Subscript(   476                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   477                 )   478    479         return self.process_structure_node(compiler.ast.Stmt(assignments))   480    481     def process_literal_sequence_items(self, n, name_ref):   482    483         """   484         Process the given assignment node 'n', obtaining from the given   485         'name_ref' the items to be assigned to the assignment targets.   486    487         Return whether this method was able to process the assignment node as   488         a sequence of direct assignments.   489         """   490    491         if len(n.nodes) == len(name_ref.items):   492             assigned_names, count = get_names_from_nodes(n.nodes)   493             accessed_names, _count = get_names_from_nodes(name_ref.items)   494    495             # Only assign directly between items if all assigned names are   496             # plain names (not attribute assignments), and if the assigned names   497             # do not appear in the accessed names.   498    499             if len(assigned_names) == count and \   500                not assigned_names.intersection(accessed_names):   501    502                 for node, item in zip(n.nodes, name_ref.items):   503                     self.process_assignment_node(node, item)   504    505                 return True   506    507             # Otherwise, use the position-based mechanism to obtain values.   508    509             else:   510                 return False   511         else:   512             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   513                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   514    515     def process_compare_node(self, n):   516    517         """   518         Process the given comparison node 'n', converting an operator sequence   519         from...   520    521         <expr1> <op1> <expr2> <op2> <expr3>   522    523         ...to...   524    525         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   526         """   527    528         invocations = []   529         last = n.expr   530    531         for op, op_node in n.ops:   532             op = operator_functions.get(op)   533    534             invocations.append(compiler.ast.CallFunc(   535                 compiler.ast.Name("$op%s" % op),   536                 [last, op_node]))   537    538             last = op_node   539    540         if len(invocations) > 1:   541             result = compiler.ast.And(invocations)   542         else:   543             result = invocations[0]   544    545         return self.process_structure_node(result)   546    547     def process_dict_node(self, node):   548    549         """   550         Process the given dictionary 'node', returning a list of (key, value)   551         tuples.   552         """   553    554         l = []   555         for key, value in node.items:   556             l.append((   557                 self.process_structure_node(key),   558                 self.process_structure_node(value)))   559         return l   560    561     def process_for_node(self, n):   562    563         """   564         Generate attribute accesses for {n.list}.__iter__ and the next method on   565         the iterator, producing a replacement node for the original.   566         """   567    568         node = compiler.ast.Stmt([   569    570             # <next> = {n.list}.__iter__().next   571    572             compiler.ast.Assign(   573                 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")],   574                 compiler.ast.Getattr(   575                     compiler.ast.CallFunc(   576                         compiler.ast.Getattr(n.list, "__iter__"),   577                         []   578                         ), "next")),   579    580             # try:   581             #     while True:   582             #         <var>... = <next>()   583             #         ...   584             # except StopIteration:   585             #     pass   586    587             compiler.ast.TryExcept(   588                 compiler.ast.While(   589                     compiler.ast.Name("True"),   590                     compiler.ast.Stmt([   591                         compiler.ast.Assign(   592                             [n.assign],   593                             compiler.ast.CallFunc(   594                                 compiler.ast.Name(self.get_iterator_name()),   595                                 []   596                                 )),   597                         n.body]),   598                     None),   599                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   600                 None)   601             ])   602    603         self.next_iterator()   604         self.process_structure_node(node)   605    606     def process_literal_sequence_node(self, n, name, ref, cls):   607    608         """   609         Process the given literal sequence node 'n' as a function invocation,   610         with 'name' indicating the type of the sequence, and 'ref' being a   611         reference to the type. The 'cls' is used to instantiate a suitable name   612         reference.   613         """   614    615         if name == "dict":   616             items = []   617             for key, value in n.items:   618                 items.append(compiler.ast.Tuple([key, value]))   619         else: # name in ("list", "tuple"):   620             items = n.nodes   621    622         return self.get_literal_reference(name, ref, items, cls)   623    624     def process_operator_node(self, n):   625    626         """   627         Process the given operator node 'n' as an operator function invocation.   628         """   629    630         op = operator_functions[n.__class__.__name__]   631         invocation = compiler.ast.CallFunc(   632             compiler.ast.Name("$op%s" % op),   633             list(n.getChildNodes())   634             )   635         return self.process_structure_node(invocation)   636    637     def process_print_node(self, n):   638    639         """   640         Process the given print node 'n' as an invocation on a stream of the   641         form...   642    643         $print(dest, args, nl)   644    645         The special function name will be translated elsewhere.   646         """   647    648         nl = isinstance(n, compiler.ast.Printnl)   649         invocation = compiler.ast.CallFunc(   650             compiler.ast.Name("$print"),   651             [n.dest or compiler.ast.Name("None"),   652              compiler.ast.List(list(n.nodes)),   653              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   654             )   655         return self.process_structure_node(invocation)   656    657     def process_slice_node(self, n, expr=None):   658    659         """   660         Process the given slice node 'n' as an operator function invocation.   661         """   662    663         if n.flags == "OP_ASSIGN": op = "setslice"   664         elif n.flags == "OP_DELETE": op = "delslice"   665         else: op = "getslice"   666    667         invocation = compiler.ast.CallFunc(   668             compiler.ast.Name("$op%s" % op),   669             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   670                 (expr and [expr] or [])   671             )   672    673         # Fix parse tree structure.   674    675         if op == "delslice":   676             invocation = compiler.ast.Discard(invocation)   677    678         return self.process_structure_node(invocation)   679    680     def process_sliceobj_node(self, n):   681    682         """   683         Process the given slice object node 'n' as a slice constructor.   684         """   685    686         op = "slice"   687         invocation = compiler.ast.CallFunc(   688             compiler.ast.Name("$op%s" % op),   689             n.nodes   690             )   691         return self.process_structure_node(invocation)   692    693     def process_subscript_node(self, n, expr=None):   694    695         """   696         Process the given subscript node 'n' as an operator function invocation.   697         """   698    699         if n.flags == "OP_ASSIGN": op = "setitem"   700         elif n.flags == "OP_DELETE": op = "delitem"   701         else: op = "getitem"   702    703         invocation = compiler.ast.CallFunc(   704             compiler.ast.Name("$op%s" % op),   705             [n.expr] + list(n.subs) + (expr and [expr] or [])   706             )   707    708         # Fix parse tree structure.   709    710         if op == "delitem":   711             invocation = compiler.ast.Discard(invocation)   712    713         return self.process_structure_node(invocation)   714    715     def process_attribute_chain(self, n):   716    717         """   718         Process the given attribute access node 'n'. Return a reference   719         describing the expression.   720         """   721    722         # AssAttr/Getattr are nested with the outermost access being the last   723         # access in any chain.   724    725         self.attrs.insert(0, n.attrname)   726         attrs = self.attrs   727    728         # Break attribute chains where non-access nodes are found.   729    730         if not self.have_access_expression(n):   731             self.reset_attribute_chain()   732    733         # Descend into the expression, extending backwards any existing chain,   734         # or building another for the expression.   735    736         name_ref = self.process_structure_node(n.expr)   737    738         # Restore chain information applying to this node.   739    740         if not self.have_access_expression(n):   741             self.restore_attribute_chain(attrs)   742    743         # Return immediately if the expression was another access and thus a   744         # continuation backwards along the chain. The above processing will   745         # have followed the chain all the way to its conclusion.   746    747         if self.have_access_expression(n):   748             del self.attrs[0]   749    750         return name_ref   751    752     # Attribute chain handling.   753    754     def reset_attribute_chain(self):   755    756         "Reset the attribute chain for a subexpression of an attribute access."   757    758         self.attrs = []   759         self.chain_assignment.append(self.in_assignment)   760         self.chain_invocation.append(self.in_invocation)   761         self.in_assignment = None   762         self.in_invocation = None   763    764     def restore_attribute_chain(self, attrs):   765    766         "Restore the attribute chain for an attribute access."   767    768         self.attrs = attrs   769         self.in_assignment = self.chain_assignment.pop()   770         self.in_invocation = self.chain_invocation.pop()   771    772     def have_access_expression(self, node):   773    774         "Return whether the expression associated with 'node' is Getattr."   775    776         return isinstance(node.expr, compiler.ast.Getattr)   777    778     def get_name_for_tracking(self, name, name_ref=None):   779    780         """   781         Return the name to be used for attribute usage observations involving   782         the given 'name' in the current namespace.   783    784         If the name is being used outside a function, and if 'name_ref' is   785         given, a path featuring the name in the global namespace is returned   786         where 'name_ref' indicates a global. Otherwise, a path computed using   787         the current namespace and the given name is returned.   788    789         The intention of this method is to provide a suitably-qualified name   790         that can be tracked across namespaces. Where globals are being   791         referenced in class namespaces, they should be referenced using their   792         path within the module, not using a path within each class.   793    794         It may not be possible to identify a global within a function at the   795         time of inspection (since a global may appear later in a file).   796         Consequently, globals are identified by their local name rather than   797         their module-qualified path.   798         """   799    800         # For functions, use the appropriate local names.   801    802         if self.in_function:   803             return name   804    805         # For global names outside functions, use a global name.   806    807         elif name_ref and name_ref.is_global_name():   808             return self.get_global_path(name)   809    810         # Otherwise, establish a name in the current namespace.   811    812         else:   813             return self.get_object_path(name)   814    815     def get_path_for_access(self):   816    817         "Outside functions, register accesses at the module level."   818    819         if not self.in_function:   820             return self.name   821         else:   822             return self.get_namespace_path()   823    824     def get_module_name(self, node):   825    826         """   827         Using the given From 'node' in this module, calculate any relative import   828         information, returning a tuple containing a module to import along with any   829         names to import based on the node's name information.   830    831         Where the returned module is given as None, whole module imports should   832         be performed for the returned modules using the returned names.   833         """   834    835         # Absolute import.   836    837         if node.level == 0:   838             return node.modname, node.names   839    840         # Relative to an ancestor of this module.   841    842         else:   843             path = self.name.split(".")   844             level = node.level   845    846             # Relative imports treat package roots as submodules.   847    848             if split(self.filename)[-1] == "__init__.py":   849                 level -= 1   850    851             if level > len(path):   852                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   853                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   854    855             basename = ".".join(path[:len(path)-level])   856    857         # Name imports from a module.   858    859         if node.modname:   860             return "%s.%s" % (basename, node.modname), node.names   861    862         # Relative whole module imports.   863    864         else:   865             return basename, node.names   866    867 def get_argnames(args):   868    869     """   870     Return a list of all names provided by 'args'. Since tuples may be   871     employed, the arguments are traversed depth-first.   872     """   873    874     l = []   875     for arg in args:   876         if isinstance(arg, tuple):   877             l += get_argnames(arg)   878         else:   879             l.append(arg)   880     return l   881    882 def get_names_from_nodes(nodes):   883    884     """   885     Return the names employed in the given 'nodes' along with the number of   886     nodes excluding sequences.   887     """   888    889     names = set()   890     count = 0   891    892     for node in nodes:   893    894         # Add names and count them.   895    896         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   897             names.add(node.name)   898             count += 1   899    900         # Add names from sequences and incorporate their counts.   901    902         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   903                                compiler.ast.List, compiler.ast.Set,   904                                compiler.ast.Tuple)):   905             _names, _count = get_names_from_nodes(node.nodes)   906             names.update(_names)   907             count += _count   908    909         # Count non-name, non-sequence nodes.   910    911         else:   912             count += 1   913    914     return names, count   915    916 # Result classes.   917    918 class InstructionSequence:   919    920     "A generic sequence of instructions."   921    922     def __init__(self, instructions):   923         self.instructions = instructions   924    925     def get_value_instruction(self):   926         return self.instructions[-1]   927    928     def get_init_instructions(self):   929         return self.instructions[:-1]   930    931 # Dictionary utilities.   932    933 def init_item(d, key, fn):   934    935     """   936     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   937     value where no entry already exists.   938     """   939    940     if not d.has_key(key):   941         d[key] = fn()   942     return d[key]   943    944 def dict_for_keys(d, keys):   945    946     "Return a new dictionary containing entries from 'd' for the given 'keys'."   947    948     nd = {}   949     for key in keys:   950         if d.has_key(key):   951             nd[key] = d[key]   952     return nd   953    954 def make_key(s):   955    956     "Make sequence 's' into a tuple-based key, first sorting its contents."   957    958     l = list(s)   959     l.sort()   960     return tuple(l)   961    962 def add_counter_item(d, key):   963    964     """   965     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   966     maintaining a mapping of keys to their order of insertion.   967     """   968    969     if not d.has_key(key):   970         d[key] = len(d.keys())   971     return d[key]    972    973 def remove_items(d1, d2):   974    975     "Remove from 'd1' all items from 'd2'."   976    977     for key in d2.keys():   978         if d1.has_key(key):   979             del d1[key]   980    981 # Set utilities.   982    983 def first(s):   984     return list(s)[0]   985    986 def same(s1, s2):   987     return set(s1) == set(s2)   988    989 # General input/output.   990    991 def readfile(filename):   992    993     "Return the contents of 'filename'."   994    995     f = open(filename)   996     try:   997         return f.read()   998     finally:   999         f.close()  1000   1001 def writefile(filename, s):  1002   1003     "Write to 'filename' the string 's'."  1004   1005     f = open(filename, "w")  1006     try:  1007         f.write(s)  1008     finally:  1009         f.close()  1010   1011 # General encoding.  1012   1013 def sorted_output(x):  1014   1015     "Sort sequence 'x' and return a string with commas separating the values."  1016   1017     x = map(str, x)  1018     x.sort()  1019     return ", ".join(x)  1020   1021 def get_string_details(literals, encoding):  1022   1023     """  1024     Determine whether 'literals' represent Unicode strings or byte strings,  1025     using 'encoding' to reproduce byte sequences.  1026   1027     Each literal is the full program representation including prefix and quotes  1028     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1029     string needs to be translated back to its original encoding.  1030   1031     Return a single encoded literal value, a type name, and the original  1032     encoding as a tuple.  1033     """  1034   1035     typename = "unicode"  1036   1037     l = []  1038   1039     for s in literals:  1040         out, _typename = get_literal_details(s)  1041         if _typename == "str":  1042             typename = "str"  1043         l.append(out)  1044   1045     out = "".join(l)  1046   1047     # For Unicode values, convert to the UTF-8 program representation.  1048   1049     if typename == "unicode":  1050         return out.encode("utf-8"), typename, encoding  1051   1052     # For byte string values, convert back to the original encoding.  1053   1054     else:  1055         return out.encode(encoding), typename, encoding  1056   1057 def get_literal_details(s):  1058   1059     """  1060     Determine whether 's' represents a Unicode string or a byte string, where  1061     's' contains the full program representation of a literal including prefix  1062     and quotes, recoded by the parser to UTF-8.  1063   1064     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1065     and byte or Unicode values starting with <backslash><octal digit> or  1066     <backslash>x.  1067   1068     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1069     to be considered as Unicode values. Otherwise, they produce byte values and  1070     cause unprefixed strings to be considered as byte strings.  1071   1072     Literals prefixed with "r" do not have their backslash-encoded values  1073     converted unless also prefixed with "u", in which case only the above value  1074     formats are converted, not any of the other special sequences for things  1075     like newlines.  1076   1077     Return the literal value as a Unicode object together with the appropriate  1078     type name in a tuple.  1079     """  1080   1081     l = []  1082   1083     # Identify the quote character and use it to identify the prefix.  1084   1085     quote_type = s[-1]  1086     prefix_end = s.find(quote_type)  1087     prefix = s[:prefix_end].lower()  1088   1089     if prefix not in ("", "b", "br", "r", "u", "ur"):  1090         raise ValueError, "String literal does not have a supported prefix: %s" % s  1091   1092     if "b" in prefix:  1093         typename = "str"  1094     else:  1095         typename = "unicode"  1096   1097     # Identify triple quotes or single quotes.  1098   1099     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1100         quote = s[prefix_end:prefix_end+3]  1101         current = prefix_end + 3  1102         end = len(s) - 3  1103     else:  1104         quote = s[prefix_end]  1105         current = prefix_end + 1  1106         end = len(s) - 1  1107   1108     # Conversions of some quoted values.  1109   1110     searches = {  1111         "u" : (6, 16),  1112         "U" : (10, 16),  1113         "x" : (4, 16),  1114         }  1115   1116     octal_digits = map(str, range(0, 8))  1117   1118     # Translations of some quoted values.  1119   1120     escaped = {  1121         "\\" : "\\", "'" : "'", '"' : '"',  1122         "a" : "\a", "b" : "\b", "f" : "\f",  1123         "n" : "\n", "r" : "\r", "t" : "\t",  1124         }  1125   1126     while current < end:  1127   1128         # Look for quoted values.  1129   1130         index = s.find("\\", current)  1131         if index == -1 or index + 1 == end:  1132             l.append(s[current:end])  1133             break  1134   1135         # Add the preceding text.  1136   1137         l.append(s[current:index])  1138   1139         # Handle quoted text.  1140   1141         term = s[index+1]  1142   1143         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1144         # produce Unicode values.  1145   1146         if typename == "unicode" and (  1147             term in ("u", "U") or   1148             "u" in prefix and (term == "x" or term in octal_digits)):  1149   1150             needed, base = searches.get(term, (4, 8))  1151             value = convert_quoted_value(s, index, needed, end, base, unichr)  1152             l.append(value)  1153             current = index + needed  1154   1155         # Add raw byte values, changing the string type.  1156   1157         elif "r" not in prefix and (  1158              term == "x" or term in octal_digits):  1159   1160             needed, base = searches.get(term, (4, 8))  1161             value = convert_quoted_value(s, index, needed, end, base, chr)  1162             l.append(value)  1163             typename = "str"  1164             current = index + needed  1165   1166         # Add other escaped values.  1167   1168         elif "r" not in prefix and escaped.has_key(term):  1169             l.append(escaped[term])  1170             current = index + 2  1171   1172         # Add other text as found.  1173   1174         else:  1175             l.append(s[index:index+2])  1176             current = index + 2  1177   1178     # Collect the components into a single Unicode object. Since the literal  1179     # text was already in UTF-8 form, interpret plain strings as UTF-8  1180     # sequences.  1181   1182     out = []  1183   1184     for value in l:  1185         if isinstance(value, unicode):  1186             out.append(value)  1187         else:  1188             out.append(unicode(value, "utf-8"))  1189   1190     return "".join(out), typename  1191   1192 def convert_quoted_value(s, index, needed, end, base, fn):  1193   1194     """  1195     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1196     positions, and with the given 'end' indicating the first position after the  1197     end of the actual string content.  1198   1199     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1200     to convert the value to an appropriate type.  1201     """  1202   1203     s = s[index:min(index+needed, end)]  1204   1205     # Not a complete occurrence.  1206   1207     if len(s) < needed:  1208         return s  1209   1210     # Test for a well-formed value.  1211   1212     try:  1213         first = base == 8 and 1 or 2  1214         value = int(s[first:needed], base)  1215     except ValueError:  1216         return s  1217     else:  1218         return fn(value)  1219   1220 # Attribute chain decoding.  1221   1222 def get_attrnames(attrnames):  1223   1224     """  1225     Split the qualified attribute chain 'attrnames' into its components,  1226     handling special attributes starting with "#" that indicate type  1227     conformance.  1228     """  1229   1230     if attrnames.startswith("#"):  1231         return [attrnames]  1232     else:  1233         return attrnames.split(".")  1234   1235 def get_attrname_from_location(location):  1236   1237     """  1238     Extract the first attribute from the attribute names employed in a  1239     'location'.  1240     """  1241   1242     path, name, attrnames, access = location  1243     if not attrnames:  1244         return attrnames  1245     return get_attrnames(attrnames)[0]  1246   1247 def get_name_path(path, name):  1248   1249     "Return a suitable qualified name from the given 'path' and 'name'."  1250   1251     if "." in name:  1252         return name  1253     else:  1254         return "%s.%s" % (path, name)  1255   1256 # Usage-related functions.  1257   1258 def get_types_for_usage(attrnames, objects):  1259   1260     """  1261     Identify the types that can support the given 'attrnames', using the  1262     given 'objects' as the catalogue of type details.  1263     """  1264   1265     types = []  1266     for name, _attrnames in objects.items():  1267         if set(attrnames).issubset(_attrnames):  1268             types.append(name)  1269     return types  1270   1271 def get_invoked_attributes(usage):  1272   1273     "Obtain invoked attribute from the given 'usage'."  1274   1275     invoked = []  1276     if usage:  1277         for attrname, invocation, assignment in usage:  1278             if invocation:  1279                 invoked.append(attrname)  1280     return invoked  1281   1282 def get_assigned_attributes(usage):  1283   1284     "Obtain assigned attribute from the given 'usage'."  1285   1286     assigned = []  1287     if usage:  1288         for attrname, invocation, assignment in usage:  1289             if assignment:  1290                 assigned.append(attrname)  1291     return assigned  1292   1293 # Type and module functions.  1294 # NOTE: This makes assumptions about the __builtins__ structure.  1295   1296 def get_builtin_module(name):  1297   1298     "Return the module name containing the given type 'name'."  1299   1300     if name == "string":  1301         modname = "str"  1302     elif name == "utf8string":  1303         modname = "unicode"  1304     elif name == "NoneType":  1305         modname = "none"  1306     else:  1307         modname = name  1308   1309     return "__builtins__.%s" % modname  1310   1311 def get_builtin_type(name):  1312   1313     "Return the type name provided by the given Python value 'name'."  1314   1315     if name == "str":  1316         return "string"  1317     elif name == "unicode":  1318         return "utf8string"  1319     else:  1320         return name  1321   1322 def get_builtin_class(name):  1323   1324     "Return the full name of the built-in class having the given 'name'."  1325   1326     typename = get_builtin_type(name)  1327     module = get_builtin_module(typename)  1328     return "%s.%s" % (module, typename)  1329   1330 # Useful data.  1331   1332 predefined_constants = "False", "None", "NotImplemented", "True"  1333   1334 operator_functions = {  1335   1336     # Fundamental operations.  1337   1338     "is" : "is_",  1339     "is not" : "is_not",  1340   1341     # Binary operations.  1342   1343     "in" : "in_",  1344     "not in" : "not_in",  1345     "Add" : "add",  1346     "Bitand" : "and_",  1347     "Bitor" : "or_",  1348     "Bitxor" : "xor",  1349     "Div" : "div",  1350     "FloorDiv" : "floordiv",  1351     "LeftShift" : "lshift",  1352     "Mod" : "mod",  1353     "Mul" : "mul",  1354     "Power" : "pow",  1355     "RightShift" : "rshift",  1356     "Sub" : "sub",  1357   1358     # Unary operations.  1359   1360     "Invert" : "invert",  1361     "UnaryAdd" : "pos",  1362     "UnarySub" : "neg",  1363   1364     # Augmented assignment.  1365   1366     "+=" : "iadd",  1367     "-=" : "isub",  1368     "*=" : "imul",  1369     "/=" : "idiv",  1370     "//=" : "ifloordiv",  1371     "%=" : "imod",  1372     "**=" : "ipow",  1373     "<<=" : "ilshift",  1374     ">>=" : "irshift",  1375     "&=" : "iand",  1376     "^=" : "ixor",  1377     "|=" : "ior",  1378   1379     # Comparisons.  1380   1381     "==" : "eq",  1382     "!=" : "ne",  1383     "<" : "lt",  1384     "<=" : "le",  1385     ">=" : "ge",  1386     ">" : "gt",  1387     }  1388   1389 # vim: tabstop=4 expandtab shiftwidth=4