Lichen (file common.py at ef0e0f95f50d)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.iterators = {}   133         self.temp = {}   134         self.lambdas = {}   135    136         # Constants, literals and values.   137    138         self.constants = {}   139         self.constant_values = {}   140         self.literals = {}   141         self.literal_types = {}   142    143         # Nested namespaces.   144    145         self.namespace_path = []   146         self.in_function = False   147    148         # Retain the assignment value expression and track invocations.   149    150         self.in_assignment = None   151         self.in_invocation = None   152    153         # Attribute chain state management.   154    155         self.attrs = []   156         self.chain_assignment = []   157         self.chain_invocation = []   158    159     def __repr__(self):   160         return "CommonModule(%r, %r)" % (self.name, self.importer)   161    162     def parse_file(self, filename):   163    164         "Parse the file with the given 'filename', initialising attributes."   165    166         self.filename = filename   167    168         # Use the Transformer directly to obtain encoding information.   169    170         t = Transformer()   171         f = open(filename)   172    173         try:   174             self.astnode = t.parsesuite(f.read() + "\n")   175             self.encoding = t.encoding   176         finally:   177             f.close()   178    179     # Module-relative naming.   180    181     def get_global_path(self, name):   182         return "%s.%s" % (self.name, name)   183    184     def get_namespace_path(self):   185         return ".".join([self.name] + self.namespace_path)   186    187     def get_object_path(self, name):   188         return ".".join([self.name] + self.namespace_path + [name])   189    190     def get_parent_path(self):   191         return ".".join([self.name] + self.namespace_path[:-1])   192    193     # Namespace management.   194    195     def enter_namespace(self, name):   196    197         "Enter the namespace having the given 'name'."   198    199         self.namespace_path.append(name)   200    201     def exit_namespace(self):   202    203         "Exit the current namespace."   204    205         self.namespace_path.pop()   206    207     # Constant reference naming.   208    209     def get_constant_name(self, value, value_type, encoding=None):   210    211         """   212         Add a new constant to the current namespace for 'value' with   213         'value_type'.   214         """   215    216         path = self.get_namespace_path()   217         init_item(self.constants, path, dict)   218         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   219    220     # Literal reference naming.   221    222     def get_literal_name(self):   223    224         "Add a new literal to the current namespace."   225    226         path = self.get_namespace_path()   227         init_item(self.literals, path, lambda: 0)   228         return "$C%d" % self.literals[path]   229    230     def next_literal(self):   231         self.literals[self.get_namespace_path()] += 1   232    233     # Temporary iterator naming.   234    235     def get_iterator_path(self):   236         return self.in_function and self.get_namespace_path() or self.name   237    238     def get_iterator_name(self):   239         path = self.get_iterator_path()   240         init_item(self.iterators, path, lambda: 0)   241         return "$i%d" % self.iterators[path]   242    243     def next_iterator(self):   244         self.iterators[self.get_iterator_path()] += 1   245    246     # Temporary variable naming.   247    248     def get_temporary_name(self):   249         path = self.get_namespace_path()   250         init_item(self.temp, path, lambda: 0)   251         return "$t%d" % self.temp[path]   252    253     def next_temporary(self):   254         self.temp[self.get_namespace_path()] += 1   255    256     # Arbitrary function naming.   257    258     def get_lambda_name(self):   259         path = self.get_namespace_path()   260         init_item(self.lambdas, path, lambda: 0)   261         name = "$l%d" % self.lambdas[path]   262         self.lambdas[path] += 1   263         return name   264    265     def reset_lambdas(self):   266         self.lambdas = {}   267    268     # Constant and literal recording.   269    270     def get_constant_value(self, value, literals=None):   271    272         """   273         Encode the 'value' if appropriate, returning a value, a typename and any   274         encoding.   275         """   276    277         if isinstance(value, unicode):   278             return value.encode("utf-8"), "unicode", self.encoding   279    280         # Attempt to convert plain strings to text.   281    282         elif isinstance(value, str) and self.encoding:   283             try:   284                 return get_string_details(literals, self.encoding)   285             except UnicodeDecodeError:   286                 pass   287    288         return value, value.__class__.__name__, None   289    290     def get_constant_reference(self, ref, value, encoding=None):   291    292         """   293         Return a constant reference for the given 'ref' type and 'value', with   294         the optional 'encoding' applying to text values.   295         """   296    297         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   298    299         # Return a reference for the constant.   300    301         objpath = self.get_object_path(constant_name)   302         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   303    304         # Record the value and type for the constant.   305    306         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   307         return name_ref   308    309     def reserve_constant(self, objpath, value, origin, encoding=None):   310    311         """   312         Reserve a constant within 'objpath' with the given 'value' and having a   313         type with the given 'origin', with the optional 'encoding' applying to   314         text values.   315         """   316    317         constant_name = self.get_constant_name(value, origin)   318         objpath = self.get_object_path(constant_name)   319         self._reserve_constant(objpath, value, origin, encoding)   320    321     def _reserve_constant(self, objpath, value, origin, encoding):   322    323         """   324         Store a constant for 'objpath' with the given 'value' and 'origin', with   325         the optional 'encoding' applying to text values.   326         """   327    328         self.constant_values[objpath] = value, origin, encoding   329    330     def get_literal_reference(self, name, ref, items, cls):   331    332         """   333         Return a literal reference for the given type 'name', literal 'ref',   334         node 'items' and employing the given 'cls' as the class of the returned   335         reference object.   336         """   337    338         # Construct an invocation using the items as arguments.   339    340         typename = "$L%s" % name   341    342         invocation = compiler.ast.CallFunc(   343             compiler.ast.Name(typename),   344             items   345             )   346    347         # Get a name for the actual literal.   348    349         instname = self.get_literal_name()   350         self.next_literal()   351    352         # Record the type for the literal.   353    354         objpath = self.get_object_path(instname)   355         self.literal_types[objpath] = ref.get_origin()   356    357         # Return a wrapper for the invocation exposing the items.   358    359         return cls(   360             instname,   361             ref.instance_of(),   362             self.process_structure_node(invocation),   363             invocation.args   364             )   365    366     # Node handling.   367    368     def process_structure(self, node):   369    370         """   371         Within the given 'node', process the program structure.   372    373         During inspection, this will process global declarations, adjusting the   374         module namespace, and import statements, building a module dependency   375         hierarchy.   376    377         During translation, this will consult deduced program information and   378         output translated code.   379         """   380    381         l = []   382         for n in node.getChildNodes():   383             l.append(self.process_structure_node(n))   384         return l   385    386     def process_augassign_node(self, n):   387    388         "Process the given augmented assignment node 'n'."   389    390         op = operator_functions[n.op]   391    392         if isinstance(n.node, compiler.ast.Getattr):   393             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   394         elif isinstance(n.node, compiler.ast.Name):   395             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   396         else:   397             target = n.node   398    399         assignment = compiler.ast.Assign(   400             [target],   401             compiler.ast.CallFunc(   402                 compiler.ast.Name("$op%s" % op),   403                 [n.node, n.expr]))   404    405         return self.process_structure_node(assignment)   406    407     def process_assignment_for_object(self, original_name, source):   408    409         """   410         Return an assignment operation making 'original_name' refer to the given   411         'source'.   412         """   413    414         assignment = compiler.ast.Assign(   415             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   416             source   417             )   418    419         return self.process_structure_node(assignment)   420    421     def process_assignment_node_items(self, n, expr):   422    423         """   424         Process the given assignment node 'n' whose children are to be assigned   425         items of 'expr'.   426         """   427    428         name_ref = self.process_structure_node(expr)   429    430         # Either unpack the items and present them directly to each assignment   431         # node.   432    433         if isinstance(name_ref, LiteralSequenceRef) and \   434            self.process_literal_sequence_items(n, name_ref):   435    436             pass   437    438         # Or have the assignment nodes access each item via the sequence API.   439    440         else:   441             self.process_assignment_node_items_by_position(n, expr, name_ref)   442    443     def process_assignment_node_items_by_position(self, n, expr, name_ref):   444    445         """   446         Process the given sequence assignment node 'n', converting the node to   447         the separate assignment of each target using positional access on a   448         temporary variable representing the sequence. Use 'expr' as the assigned   449         value and 'name_ref' as the reference providing any existing temporary   450         variable.   451         """   452    453         assignments = []   454    455         # Employ existing names to access the sequence.   456         # Literal sequences do not provide names of accessible objects.   457    458         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   459             temp = name_ref.name   460    461         # For other expressions, create a temporary name to reference the items.   462    463         else:   464             temp = self.get_temporary_name()   465             self.next_temporary()   466    467             assignments.append(   468                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   469                 )   470    471         # Assign the items to the target nodes.   472    473         for i, node in enumerate(n.nodes):   474             assignments.append(   475                 compiler.ast.Assign([node], compiler.ast.Subscript(   476                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   477                 )   478    479         return self.process_structure_node(compiler.ast.Stmt(assignments))   480    481     def process_literal_sequence_items(self, n, name_ref):   482    483         """   484         Process the given assignment node 'n', obtaining from the given   485         'name_ref' the items to be assigned to the assignment targets.   486    487         Return whether this method was able to process the assignment node as   488         a sequence of direct assignments.   489         """   490    491         if len(n.nodes) == len(name_ref.items):   492             assigned_names, count = get_names_from_nodes(n.nodes)   493             accessed_names, _count = get_names_from_nodes(name_ref.items)   494    495             # Only assign directly between items if all assigned names are   496             # plain names (not attribute assignments), and if the assigned names   497             # do not appear in the accessed names.   498    499             if len(assigned_names) == count and \   500                not assigned_names.intersection(accessed_names):   501    502                 for node, item in zip(n.nodes, name_ref.items):   503                     self.process_assignment_node(node, item)   504    505                 return True   506    507             # Otherwise, use the position-based mechanism to obtain values.   508    509             else:   510                 return False   511         else:   512             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   513                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   514    515     def process_compare_node(self, n):   516    517         """   518         Process the given comparison node 'n', converting an operator sequence   519         from...   520    521         <expr1> <op1> <expr2> <op2> <expr3>   522    523         ...to...   524    525         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   526         """   527    528         invocations = []   529         last = n.expr   530    531         for op, op_node in n.ops:   532             op = operator_functions.get(op)   533    534             invocations.append(compiler.ast.CallFunc(   535                 compiler.ast.Name("$op%s" % op),   536                 [last, op_node]))   537    538             last = op_node   539    540         if len(invocations) > 1:   541             result = compiler.ast.And(invocations)   542         else:   543             result = invocations[0]   544    545         return self.process_structure_node(result)   546    547     def process_dict_node(self, node):   548    549         """   550         Process the given dictionary 'node', returning a list of (key, value)   551         tuples.   552         """   553    554         l = []   555         for key, value in node.items:   556             l.append((   557                 self.process_structure_node(key),   558                 self.process_structure_node(value)))   559         return l   560    561     def process_for_node(self, n):   562    563         """   564         Generate attribute accesses for {n.list}.__iter__ and the next method on   565         the iterator, producing a replacement node for the original.   566         """   567    568         node = compiler.ast.Stmt([   569    570             # <next> = {n.list}.__iter__().next   571    572             compiler.ast.Assign(   573                 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")],   574                 compiler.ast.Getattr(   575                     compiler.ast.CallFunc(   576                         compiler.ast.Getattr(n.list, "__iter__"),   577                         []   578                         ), "next")),   579    580             # try:   581             #     while True:   582             #         <var>... = <next>()   583             #         ...   584             # except StopIteration:   585             #     pass   586    587             compiler.ast.TryExcept(   588                 compiler.ast.While(   589                     compiler.ast.Name("True"),   590                     compiler.ast.Stmt([   591                         compiler.ast.Assign(   592                             [n.assign],   593                             compiler.ast.CallFunc(   594                                 compiler.ast.Name(self.get_iterator_name()),   595                                 []   596                                 )),   597                         n.body]),   598                     None),   599                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   600                 None)   601             ])   602    603         self.next_iterator()   604         self.process_structure_node(node)   605    606     def process_literal_sequence_node(self, n, name, ref, cls):   607    608         """   609         Process the given literal sequence node 'n' as a function invocation,   610         with 'name' indicating the type of the sequence, and 'ref' being a   611         reference to the type. The 'cls' is used to instantiate a suitable name   612         reference.   613         """   614    615         if name == "dict":   616             items = []   617             for key, value in n.items:   618                 items.append(compiler.ast.Tuple([key, value]))   619         else: # name in ("list", "tuple"):   620             items = n.nodes   621    622         return self.get_literal_reference(name, ref, items, cls)   623    624     def process_operator_node(self, n):   625    626         """   627         Process the given operator node 'n' as an operator function invocation.   628         """   629    630         op = operator_functions[n.__class__.__name__]   631         invocation = compiler.ast.CallFunc(   632             compiler.ast.Name("$op%s" % op),   633             list(n.getChildNodes())   634             )   635         return self.process_structure_node(invocation)   636    637     def process_print_node(self, n):   638    639         """   640         Process the given print node 'n' as an invocation on a stream of the   641         form...   642    643         $print(dest, args, nl)   644    645         The special function name will be translated elsewhere.   646         """   647    648         nl = isinstance(n, compiler.ast.Printnl)   649         invocation = compiler.ast.CallFunc(   650             compiler.ast.Name("$print"),   651             [n.dest or compiler.ast.Name("None"),   652              compiler.ast.List(list(n.nodes)),   653              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   654             )   655         return self.process_structure_node(invocation)   656    657     def process_slice_node(self, n, expr=None):   658    659         """   660         Process the given slice node 'n' as an operator function invocation.   661         """   662    663         if n.flags == "OP_ASSIGN": op = "setslice"   664         elif n.flags == "OP_DELETE": op = "delslice"   665         else: op = "getslice"   666    667         invocation = compiler.ast.CallFunc(   668             compiler.ast.Name("$op%s" % op),   669             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   670                 (expr and [expr] or [])   671             )   672    673         # Fix parse tree structure.   674    675         if op == "delslice":   676             invocation = compiler.ast.Discard(invocation)   677    678         return self.process_structure_node(invocation)   679    680     def process_sliceobj_node(self, n):   681    682         """   683         Process the given slice object node 'n' as a slice constructor.   684         """   685    686         op = "slice"   687         invocation = compiler.ast.CallFunc(   688             compiler.ast.Name("$op%s" % op),   689             n.nodes   690             )   691         return self.process_structure_node(invocation)   692    693     def process_subscript_node(self, n, expr=None):   694    695         """   696         Process the given subscript node 'n' as an operator function invocation.   697         """   698    699         if n.flags == "OP_ASSIGN": op = "setitem"   700         elif n.flags == "OP_DELETE": op = "delitem"   701         else: op = "getitem"   702    703         invocation = compiler.ast.CallFunc(   704             compiler.ast.Name("$op%s" % op),   705             [n.expr] + list(n.subs) + (expr and [expr] or [])   706             )   707    708         # Fix parse tree structure.   709    710         if op == "delitem":   711             invocation = compiler.ast.Discard(invocation)   712    713         return self.process_structure_node(invocation)   714    715     def process_attribute_chain(self, n):   716    717         """   718         Process the given attribute access node 'n'. Return a reference   719         describing the expression.   720         """   721    722         # AssAttr/Getattr are nested with the outermost access being the last   723         # access in any chain.   724    725         self.attrs.insert(0, n.attrname)   726         attrs = self.attrs   727    728         # Break attribute chains where non-access nodes are found.   729    730         if not self.have_access_expression(n):   731             self.reset_attribute_chain()   732    733         # Descend into the expression, extending backwards any existing chain,   734         # or building another for the expression.   735    736         name_ref = self.process_structure_node(n.expr)   737    738         # Restore chain information applying to this node.   739    740         if not self.have_access_expression(n):   741             self.restore_attribute_chain(attrs)   742    743         # Return immediately if the expression was another access and thus a   744         # continuation backwards along the chain. The above processing will   745         # have followed the chain all the way to its conclusion.   746    747         if self.have_access_expression(n):   748             del self.attrs[0]   749    750         return name_ref   751    752     # Attribute chain handling.   753    754     def reset_attribute_chain(self):   755    756         "Reset the attribute chain for a subexpression of an attribute access."   757    758         self.attrs = []   759         self.chain_assignment.append(self.in_assignment)   760         self.chain_invocation.append(self.in_invocation)   761         self.in_assignment = None   762         self.in_invocation = None   763    764     def restore_attribute_chain(self, attrs):   765    766         "Restore the attribute chain for an attribute access."   767    768         self.attrs = attrs   769         self.in_assignment = self.chain_assignment.pop()   770         self.in_invocation = self.chain_invocation.pop()   771    772     def have_access_expression(self, node):   773    774         "Return whether the expression associated with 'node' is Getattr."   775    776         return isinstance(node.expr, compiler.ast.Getattr)   777    778     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   779    780         """   781         Return the name to be used for attribute usage observations involving   782         the given 'name' in the current namespace.   783    784         If the name is being used outside a function, and if 'name_ref' is   785         given and indicates a global or if 'is_global' is specified as a true   786         value, a path featuring the name in the global namespace is returned.   787         Otherwise, a path computed using the current namespace and the given   788         name is returned.   789    790         The intention of this method is to provide a suitably-qualified name   791         that can be tracked across namespaces. Where globals are being   792         referenced in class namespaces, they should be referenced using their   793         path within the module, not using a path within each class.   794    795         It may not be possible to identify a global within a function at the   796         time of inspection (since a global may appear later in a file).   797         Consequently, globals are identified by their local name rather than   798         their module-qualified path.   799         """   800    801         # For functions, use the appropriate local names.   802    803         if self.in_function:   804             return name   805    806         # For global names outside functions, use a global name.   807    808         elif is_global or name_ref and name_ref.is_global_name():   809             return self.get_global_path(name)   810    811         # Otherwise, establish a name in the current namespace.   812    813         else:   814             return self.get_object_path(name)   815    816     def get_path_for_access(self):   817    818         "Outside functions, register accesses at the module level."   819    820         if not self.in_function:   821             return self.name   822         else:   823             return self.get_namespace_path()   824    825     def get_module_name(self, node):   826    827         """   828         Using the given From 'node' in this module, calculate any relative import   829         information, returning a tuple containing a module to import along with any   830         names to import based on the node's name information.   831    832         Where the returned module is given as None, whole module imports should   833         be performed for the returned modules using the returned names.   834         """   835    836         # Absolute import.   837    838         if node.level == 0:   839             return node.modname, node.names   840    841         # Relative to an ancestor of this module.   842    843         else:   844             path = self.name.split(".")   845             level = node.level   846    847             # Relative imports treat package roots as submodules.   848    849             if split(self.filename)[-1] == "__init__.py":   850                 level -= 1   851    852             if level > len(path):   853                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   854                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   855    856             basename = ".".join(path[:len(path)-level])   857    858         # Name imports from a module.   859    860         if node.modname:   861             return "%s.%s" % (basename, node.modname), node.names   862    863         # Relative whole module imports.   864    865         else:   866             return basename, node.names   867    868 def get_argnames(args):   869    870     """   871     Return a list of all names provided by 'args'. Since tuples may be   872     employed, the arguments are traversed depth-first.   873     """   874    875     l = []   876     for arg in args:   877         if isinstance(arg, tuple):   878             l += get_argnames(arg)   879         else:   880             l.append(arg)   881     return l   882    883 def get_names_from_nodes(nodes):   884    885     """   886     Return the names employed in the given 'nodes' along with the number of   887     nodes excluding sequences.   888     """   889    890     names = set()   891     count = 0   892    893     for node in nodes:   894    895         # Add names and count them.   896    897         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   898             names.add(node.name)   899             count += 1   900    901         # Add names from sequences and incorporate their counts.   902    903         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   904                                compiler.ast.List, compiler.ast.Set,   905                                compiler.ast.Tuple)):   906             _names, _count = get_names_from_nodes(node.nodes)   907             names.update(_names)   908             count += _count   909    910         # Count non-name, non-sequence nodes.   911    912         else:   913             count += 1   914    915     return names, count   916    917 # Result classes.   918    919 class InstructionSequence:   920    921     "A generic sequence of instructions."   922    923     def __init__(self, instructions):   924         self.instructions = instructions   925    926     def get_value_instruction(self):   927         return self.instructions[-1]   928    929     def get_init_instructions(self):   930         return self.instructions[:-1]   931    932 # Dictionary utilities.   933    934 def init_item(d, key, fn):   935    936     """   937     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   938     value where no entry already exists.   939     """   940    941     if not d.has_key(key):   942         d[key] = fn()   943     return d[key]   944    945 def dict_for_keys(d, keys):   946    947     "Return a new dictionary containing entries from 'd' for the given 'keys'."   948    949     nd = {}   950     for key in keys:   951         if d.has_key(key):   952             nd[key] = d[key]   953     return nd   954    955 def make_key(s):   956    957     "Make sequence 's' into a tuple-based key, first sorting its contents."   958    959     l = list(s)   960     l.sort()   961     return tuple(l)   962    963 def add_counter_item(d, key):   964    965     """   966     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   967     maintaining a mapping of keys to their order of insertion.   968     """   969    970     if not d.has_key(key):   971         d[key] = len(d.keys())   972     return d[key]    973    974 def remove_items(d1, d2):   975    976     "Remove from 'd1' all items from 'd2'."   977    978     for key in d2.keys():   979         if d1.has_key(key):   980             del d1[key]   981    982 # Set utilities.   983    984 def first(s):   985     return list(s)[0]   986    987 def same(s1, s2):   988     return set(s1) == set(s2)   989    990 # General input/output.   991    992 def readfile(filename):   993    994     "Return the contents of 'filename'."   995    996     f = open(filename)   997     try:   998         return f.read()   999     finally:  1000         f.close()  1001   1002 def writefile(filename, s):  1003   1004     "Write to 'filename' the string 's'."  1005   1006     f = open(filename, "w")  1007     try:  1008         f.write(s)  1009     finally:  1010         f.close()  1011   1012 # General encoding.  1013   1014 def sorted_output(x):  1015   1016     "Sort sequence 'x' and return a string with commas separating the values."  1017   1018     x = map(str, x)  1019     x.sort()  1020     return ", ".join(x)  1021   1022 def get_string_details(literals, encoding):  1023   1024     """  1025     Determine whether 'literals' represent Unicode strings or byte strings,  1026     using 'encoding' to reproduce byte sequences.  1027   1028     Each literal is the full program representation including prefix and quotes  1029     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1030     string needs to be translated back to its original encoding.  1031   1032     Return a single encoded literal value, a type name, and the original  1033     encoding as a tuple.  1034     """  1035   1036     typename = "unicode"  1037   1038     l = []  1039   1040     for s in literals:  1041         out, _typename = get_literal_details(s)  1042         if _typename == "str":  1043             typename = "str"  1044         l.append(out)  1045   1046     out = "".join(l)  1047   1048     # For Unicode values, convert to the UTF-8 program representation.  1049   1050     if typename == "unicode":  1051         return out.encode("utf-8"), typename, encoding  1052   1053     # For byte string values, convert back to the original encoding.  1054   1055     else:  1056         return out.encode(encoding), typename, encoding  1057   1058 def get_literal_details(s):  1059   1060     """  1061     Determine whether 's' represents a Unicode string or a byte string, where  1062     's' contains the full program representation of a literal including prefix  1063     and quotes, recoded by the parser to UTF-8.  1064   1065     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1066     and byte or Unicode values starting with <backslash><octal digit> or  1067     <backslash>x.  1068   1069     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1070     to be considered as Unicode values. Otherwise, they produce byte values and  1071     cause unprefixed strings to be considered as byte strings.  1072   1073     Literals prefixed with "r" do not have their backslash-encoded values  1074     converted unless also prefixed with "u", in which case only the above value  1075     formats are converted, not any of the other special sequences for things  1076     like newlines.  1077   1078     Return the literal value as a Unicode object together with the appropriate  1079     type name in a tuple.  1080     """  1081   1082     l = []  1083   1084     # Identify the quote character and use it to identify the prefix.  1085   1086     quote_type = s[-1]  1087     prefix_end = s.find(quote_type)  1088     prefix = s[:prefix_end].lower()  1089   1090     if prefix not in ("", "b", "br", "r", "u", "ur"):  1091         raise ValueError, "String literal does not have a supported prefix: %s" % s  1092   1093     if "b" in prefix:  1094         typename = "str"  1095     else:  1096         typename = "unicode"  1097   1098     # Identify triple quotes or single quotes.  1099   1100     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1101         quote = s[prefix_end:prefix_end+3]  1102         current = prefix_end + 3  1103         end = len(s) - 3  1104     else:  1105         quote = s[prefix_end]  1106         current = prefix_end + 1  1107         end = len(s) - 1  1108   1109     # Conversions of some quoted values.  1110   1111     searches = {  1112         "u" : (6, 16),  1113         "U" : (10, 16),  1114         "x" : (4, 16),  1115         }  1116   1117     octal_digits = map(str, range(0, 8))  1118   1119     # Translations of some quoted values.  1120   1121     escaped = {  1122         "\\" : "\\", "'" : "'", '"' : '"',  1123         "a" : "\a", "b" : "\b", "f" : "\f",  1124         "n" : "\n", "r" : "\r", "t" : "\t",  1125         }  1126   1127     while current < end:  1128   1129         # Look for quoted values.  1130   1131         index = s.find("\\", current)  1132         if index == -1 or index + 1 == end:  1133             l.append(s[current:end])  1134             break  1135   1136         # Add the preceding text.  1137   1138         l.append(s[current:index])  1139   1140         # Handle quoted text.  1141   1142         term = s[index+1]  1143   1144         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1145         # produce Unicode values.  1146   1147         if typename == "unicode" and (  1148             term in ("u", "U") or   1149             "u" in prefix and (term == "x" or term in octal_digits)):  1150   1151             needed, base = searches.get(term, (4, 8))  1152             value = convert_quoted_value(s, index, needed, end, base, unichr)  1153             l.append(value)  1154             current = index + needed  1155   1156         # Add raw byte values, changing the string type.  1157   1158         elif "r" not in prefix and (  1159              term == "x" or term in octal_digits):  1160   1161             needed, base = searches.get(term, (4, 8))  1162             value = convert_quoted_value(s, index, needed, end, base, chr)  1163             l.append(value)  1164             typename = "str"  1165             current = index + needed  1166   1167         # Add other escaped values.  1168   1169         elif "r" not in prefix and escaped.has_key(term):  1170             l.append(escaped[term])  1171             current = index + 2  1172   1173         # Add other text as found.  1174   1175         else:  1176             l.append(s[index:index+2])  1177             current = index + 2  1178   1179     # Collect the components into a single Unicode object. Since the literal  1180     # text was already in UTF-8 form, interpret plain strings as UTF-8  1181     # sequences.  1182   1183     out = []  1184   1185     for value in l:  1186         if isinstance(value, unicode):  1187             out.append(value)  1188         else:  1189             out.append(unicode(value, "utf-8"))  1190   1191     return "".join(out), typename  1192   1193 def convert_quoted_value(s, index, needed, end, base, fn):  1194   1195     """  1196     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1197     positions, and with the given 'end' indicating the first position after the  1198     end of the actual string content.  1199   1200     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1201     to convert the value to an appropriate type.  1202     """  1203   1204     s = s[index:min(index+needed, end)]  1205   1206     # Not a complete occurrence.  1207   1208     if len(s) < needed:  1209         return s  1210   1211     # Test for a well-formed value.  1212   1213     try:  1214         first = base == 8 and 1 or 2  1215         value = int(s[first:needed], base)  1216     except ValueError:  1217         return s  1218     else:  1219         return fn(value)  1220   1221 # Attribute chain decoding.  1222   1223 def get_attrnames(attrnames):  1224   1225     """  1226     Split the qualified attribute chain 'attrnames' into its components,  1227     handling special attributes starting with "#" that indicate type  1228     conformance.  1229     """  1230   1231     if attrnames.startswith("#"):  1232         return [attrnames]  1233     else:  1234         return attrnames.split(".")  1235   1236 def get_attrname_from_location(location):  1237   1238     """  1239     Extract the first attribute from the attribute names employed in a  1240     'location'.  1241     """  1242   1243     path, name, attrnames, access = location  1244     if not attrnames:  1245         return attrnames  1246     return get_attrnames(attrnames)[0]  1247   1248 def get_name_path(path, name):  1249   1250     "Return a suitable qualified name from the given 'path' and 'name'."  1251   1252     if "." in name:  1253         return name  1254     else:  1255         return "%s.%s" % (path, name)  1256   1257 # Usage-related functions.  1258   1259 def get_types_for_usage(attrnames, objects):  1260   1261     """  1262     Identify the types that can support the given 'attrnames', using the  1263     given 'objects' as the catalogue of type details.  1264     """  1265   1266     types = []  1267     for name, _attrnames in objects.items():  1268         if set(attrnames).issubset(_attrnames):  1269             types.append(name)  1270     return types  1271   1272 def get_invoked_attributes(usage):  1273   1274     "Obtain invoked attribute from the given 'usage'."  1275   1276     invoked = []  1277     if usage:  1278         for attrname, invocation, assignment in usage:  1279             if invocation:  1280                 invoked.append(attrname)  1281     return invoked  1282   1283 def get_assigned_attributes(usage):  1284   1285     "Obtain assigned attribute from the given 'usage'."  1286   1287     assigned = []  1288     if usage:  1289         for attrname, invocation, assignment in usage:  1290             if assignment:  1291                 assigned.append(attrname)  1292     return assigned  1293   1294 # Type and module functions.  1295 # NOTE: This makes assumptions about the __builtins__ structure.  1296   1297 def get_builtin_module(name):  1298   1299     "Return the module name containing the given type 'name'."  1300   1301     if name == "string":  1302         modname = "str"  1303     elif name == "utf8string":  1304         modname = "unicode"  1305     elif name == "NoneType":  1306         modname = "none"  1307     else:  1308         modname = name  1309   1310     return "__builtins__.%s" % modname  1311   1312 def get_builtin_type(name):  1313   1314     "Return the type name provided by the given Python value 'name'."  1315   1316     if name == "str":  1317         return "string"  1318     elif name == "unicode":  1319         return "utf8string"  1320     else:  1321         return name  1322   1323 def get_builtin_class(name):  1324   1325     "Return the full name of the built-in class having the given 'name'."  1326   1327     typename = get_builtin_type(name)  1328     module = get_builtin_module(typename)  1329     return "%s.%s" % (module, typename)  1330   1331 # Useful data.  1332   1333 predefined_constants = "False", "None", "NotImplemented", "True"  1334   1335 operator_functions = {  1336   1337     # Fundamental operations.  1338   1339     "is" : "is_",  1340     "is not" : "is_not",  1341   1342     # Binary operations.  1343   1344     "in" : "in_",  1345     "not in" : "not_in",  1346     "Add" : "add",  1347     "Bitand" : "and_",  1348     "Bitor" : "or_",  1349     "Bitxor" : "xor",  1350     "Div" : "div",  1351     "FloorDiv" : "floordiv",  1352     "LeftShift" : "lshift",  1353     "Mod" : "mod",  1354     "Mul" : "mul",  1355     "Power" : "pow",  1356     "RightShift" : "rshift",  1357     "Sub" : "sub",  1358   1359     # Unary operations.  1360   1361     "Invert" : "invert",  1362     "UnaryAdd" : "pos",  1363     "UnarySub" : "neg",  1364   1365     # Augmented assignment.  1366   1367     "+=" : "iadd",  1368     "-=" : "isub",  1369     "*=" : "imul",  1370     "/=" : "idiv",  1371     "//=" : "ifloordiv",  1372     "%=" : "imod",  1373     "**=" : "ipow",  1374     "<<=" : "ilshift",  1375     ">>=" : "irshift",  1376     "&=" : "iand",  1377     "^=" : "ixor",  1378     "|=" : "ior",  1379   1380     # Comparisons.  1381   1382     "==" : "eq",  1383     "!=" : "ne",  1384     "<" : "lt",  1385     "<=" : "le",  1386     ">=" : "ge",  1387     ">" : "gt",  1388     }  1389   1390 # vim: tabstop=4 expandtab shiftwidth=4