Lichen (file common.py at d1d907801d42)

     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         if recorded_details != details:    45             self.remove_output()    46     47         writefile(self.get_output_details_filename(), details)    48     49     def get_output_details_filename(self):    50     51         "Return the output details filename."    52     53         return join(self.output, "$details")    54     55     def get_output_details(self):    56     57         "Return details of the existing output."    58     59         details_filename = self.get_output_details_filename()    60     61         if not exists(details_filename):    62             return None    63         else:    64             return readfile(details_filename)    65     66     def remove_output(self, dirname=None):    67     68         "Remove the output."    69     70         dirname = dirname or self.output    71     72         for filename in listdir(dirname):    73             path = join(dirname, filename)    74             if isdir(path):    75                 self.remove_output(path)    76             else:    77                 remove(path)    78     79 def copy(source, target, only_if_newer=True):    80     81     "Copy a text file from 'source' to 'target'."    82     83     if isdir(target):    84         target = join(target, split(source)[-1])    85     86     if only_if_newer and not is_newer(source, target):    87         return    88     89     infile = open(source)    90     outfile = open(target, "w")    91     92     try:    93         outfile.write(infile.read())    94     finally:    95         outfile.close()    96         infile.close()    97     98 def is_newer(source, target):    99    100     "Return whether 'source' is newer than 'target'."   101    102     if exists(target):   103         target_mtime = getmtime(target)   104         source_mtime = getmtime(source)   105         return source_mtime > target_mtime   106    107     return True   108    109 class CommonModule:   110    111     "A common module representation."   112    113     def __init__(self, name, importer):   114    115         """   116         Initialise this module with the given 'name' and an 'importer' which is   117         used to provide access to other modules when required.   118         """   119    120         self.name = name   121         self.importer = importer   122         self.filename = None   123    124         # Inspection-related attributes.   125    126         self.astnode = None   127         self.encoding = None   128         self.iterators = {}   129         self.temp = {}   130         self.lambdas = {}   131    132         # Constants, literals and values.   133    134         self.constants = {}   135         self.constant_values = {}   136         self.literals = {}   137         self.literal_types = {}   138    139         # Nested namespaces.   140    141         self.namespace_path = []   142         self.in_function = False   143    144         # Retain the assignment value expression and track invocations.   145    146         self.in_assignment = None   147         self.in_invocation = None   148    149         # Attribute chain state management.   150    151         self.attrs = []   152         self.chain_assignment = []   153         self.chain_invocation = []   154    155     def __repr__(self):   156         return "CommonModule(%r, %r)" % (self.name, self.importer)   157    158     def parse_file(self, filename):   159    160         "Parse the file with the given 'filename', initialising attributes."   161    162         self.filename = filename   163    164         # Use the Transformer directly to obtain encoding information.   165    166         t = Transformer()   167         f = open(filename)   168    169         try:   170             self.astnode = t.parsesuite(f.read() + "\n")   171             self.encoding = t.encoding   172         finally:   173             f.close()   174    175     # Module-relative naming.   176    177     def get_global_path(self, name):   178         return "%s.%s" % (self.name, name)   179    180     def get_namespace_path(self):   181         return ".".join([self.name] + self.namespace_path)   182    183     def get_object_path(self, name):   184         return ".".join([self.name] + self.namespace_path + [name])   185    186     def get_parent_path(self):   187         return ".".join([self.name] + self.namespace_path[:-1])   188    189     # Namespace management.   190    191     def enter_namespace(self, name):   192    193         "Enter the namespace having the given 'name'."   194    195         self.namespace_path.append(name)   196    197     def exit_namespace(self):   198    199         "Exit the current namespace."   200    201         self.namespace_path.pop()   202    203     # Constant reference naming.   204    205     def get_constant_name(self, value, value_type, encoding=None):   206    207         """   208         Add a new constant to the current namespace for 'value' with   209         'value_type'.   210         """   211    212         path = self.get_namespace_path()   213         init_item(self.constants, path, dict)   214         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   215    216     # Literal reference naming.   217    218     def get_literal_name(self):   219    220         "Add a new literal to the current namespace."   221    222         path = self.get_namespace_path()   223         init_item(self.literals, path, lambda: 0)   224         return "$C%d" % self.literals[path]   225    226     def next_literal(self):   227         self.literals[self.get_namespace_path()] += 1   228    229     # Temporary iterator naming.   230    231     def get_iterator_path(self):   232         return self.in_function and self.get_namespace_path() or self.name   233    234     def get_iterator_name(self):   235         path = self.get_iterator_path()   236         init_item(self.iterators, path, lambda: 0)   237         return "$i%d" % self.iterators[path]   238    239     def next_iterator(self):   240         self.iterators[self.get_iterator_path()] += 1   241    242     # Temporary variable naming.   243    244     def get_temporary_name(self):   245         path = self.get_namespace_path()   246         init_item(self.temp, path, lambda: 0)   247         return "$t%d" % self.temp[path]   248    249     def next_temporary(self):   250         self.temp[self.get_namespace_path()] += 1   251    252     # Arbitrary function naming.   253    254     def get_lambda_name(self):   255         path = self.get_namespace_path()   256         init_item(self.lambdas, path, lambda: 0)   257         name = "$l%d" % self.lambdas[path]   258         self.lambdas[path] += 1   259         return name   260    261     def reset_lambdas(self):   262         self.lambdas = {}   263    264     # Constant and literal recording.   265    266     def get_constant_value(self, value, literals=None):   267    268         """   269         Encode the 'value' if appropriate, returning a value, a typename and any   270         encoding.   271         """   272    273         if isinstance(value, unicode):   274             return value.encode("utf-8"), "unicode", self.encoding   275    276         # Attempt to convert plain strings to text.   277    278         elif isinstance(value, str) and self.encoding:   279             try:   280                 return get_string_details(literals, self.encoding)   281             except UnicodeDecodeError:   282                 pass   283    284         return value, value.__class__.__name__, None   285    286     def get_constant_reference(self, ref, value, encoding=None):   287    288         """   289         Return a constant reference for the given 'ref' type and 'value', with   290         the optional 'encoding' applying to text values.   291         """   292    293         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   294    295         # Return a reference for the constant.   296    297         objpath = self.get_object_path(constant_name)   298         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   299    300         # Record the value and type for the constant.   301    302         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   303         return name_ref   304    305     def reserve_constant(self, objpath, value, origin, encoding=None):   306    307         """   308         Reserve a constant within 'objpath' with the given 'value' and having a   309         type with the given 'origin', with the optional 'encoding' applying to   310         text values.   311         """   312    313         constant_name = self.get_constant_name(value, origin)   314         objpath = self.get_object_path(constant_name)   315         self._reserve_constant(objpath, value, origin, encoding)   316    317     def _reserve_constant(self, objpath, value, origin, encoding):   318    319         """   320         Store a constant for 'objpath' with the given 'value' and 'origin', with   321         the optional 'encoding' applying to text values.   322         """   323    324         self.constant_values[objpath] = value, origin, encoding   325    326     def get_literal_reference(self, name, ref, items, cls):   327    328         """   329         Return a literal reference for the given type 'name', literal 'ref',   330         node 'items' and employing the given 'cls' as the class of the returned   331         reference object.   332         """   333    334         # Construct an invocation using the items as arguments.   335    336         typename = "$L%s" % name   337    338         invocation = compiler.ast.CallFunc(   339             compiler.ast.Name(typename),   340             items   341             )   342    343         # Get a name for the actual literal.   344    345         instname = self.get_literal_name()   346         self.next_literal()   347    348         # Record the type for the literal.   349    350         objpath = self.get_object_path(instname)   351         self.literal_types[objpath] = ref.get_origin()   352    353         # Return a wrapper for the invocation exposing the items.   354    355         return cls(   356             instname,   357             ref.instance_of(),   358             self.process_structure_node(invocation),   359             invocation.args   360             )   361    362     # Node handling.   363    364     def process_structure(self, node):   365    366         """   367         Within the given 'node', process the program structure.   368    369         During inspection, this will process global declarations, adjusting the   370         module namespace, and import statements, building a module dependency   371         hierarchy.   372    373         During translation, this will consult deduced program information and   374         output translated code.   375         """   376    377         l = []   378         for n in node.getChildNodes():   379             l.append(self.process_structure_node(n))   380         return l   381    382     def process_augassign_node(self, n):   383    384         "Process the given augmented assignment node 'n'."   385    386         op = operator_functions[n.op]   387    388         if isinstance(n.node, compiler.ast.Getattr):   389             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   390         elif isinstance(n.node, compiler.ast.Name):   391             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   392         else:   393             target = n.node   394    395         assignment = compiler.ast.Assign(   396             [target],   397             compiler.ast.CallFunc(   398                 compiler.ast.Name("$op%s" % op),   399                 [n.node, n.expr]))   400    401         return self.process_structure_node(assignment)   402    403     def process_assignment_for_object(self, original_name, source):   404    405         """   406         Return an assignment operation making 'original_name' refer to the given   407         'source'.   408         """   409    410         assignment = compiler.ast.Assign(   411             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   412             source   413             )   414    415         return self.process_structure_node(assignment)   416    417     def process_assignment_node_items(self, n, expr):   418    419         """   420         Process the given assignment node 'n' whose children are to be assigned   421         items of 'expr'.   422         """   423    424         name_ref = self.process_structure_node(expr)   425    426         # Either unpack the items and present them directly to each assignment   427         # node.   428    429         if isinstance(name_ref, LiteralSequenceRef) and \   430            self.process_literal_sequence_items(n, name_ref):   431    432             pass   433    434         # Or have the assignment nodes access each item via the sequence API.   435    436         else:   437             self.process_assignment_node_items_by_position(n, expr, name_ref)   438    439     def process_assignment_node_items_by_position(self, n, expr, name_ref):   440    441         """   442         Process the given sequence assignment node 'n', converting the node to   443         the separate assignment of each target using positional access on a   444         temporary variable representing the sequence. Use 'expr' as the assigned   445         value and 'name_ref' as the reference providing any existing temporary   446         variable.   447         """   448    449         assignments = []   450    451         # Employ existing names to access the sequence.   452         # Literal sequences do not provide names of accessible objects.   453    454         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   455             temp = name_ref.name   456    457         # For other expressions, create a temporary name to reference the items.   458    459         else:   460             temp = self.get_temporary_name()   461             self.next_temporary()   462    463             assignments.append(   464                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   465                 )   466    467         # Assign the items to the target nodes.   468    469         for i, node in enumerate(n.nodes):   470             assignments.append(   471                 compiler.ast.Assign([node], compiler.ast.Subscript(   472                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   473                 )   474    475         return self.process_structure_node(compiler.ast.Stmt(assignments))   476    477     def process_literal_sequence_items(self, n, name_ref):   478    479         """   480         Process the given assignment node 'n', obtaining from the given   481         'name_ref' the items to be assigned to the assignment targets.   482    483         Return whether this method was able to process the assignment node as   484         a sequence of direct assignments.   485         """   486    487         if len(n.nodes) == len(name_ref.items):   488             assigned_names, count = get_names_from_nodes(n.nodes)   489             accessed_names, _count = get_names_from_nodes(name_ref.items)   490    491             # Only assign directly between items if all assigned names are   492             # plain names (not attribute assignments), and if the assigned names   493             # do not appear in the accessed names.   494    495             if len(assigned_names) == count and \   496                not assigned_names.intersection(accessed_names):   497    498                 for node, item in zip(n.nodes, name_ref.items):   499                     self.process_assignment_node(node, item)   500    501                 return True   502    503             # Otherwise, use the position-based mechanism to obtain values.   504    505             else:   506                 return False   507         else:   508             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   509                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   510    511     def process_compare_node(self, n):   512    513         """   514         Process the given comparison node 'n', converting an operator sequence   515         from...   516    517         <expr1> <op1> <expr2> <op2> <expr3>   518    519         ...to...   520    521         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   522         """   523    524         invocations = []   525         last = n.expr   526    527         for op, op_node in n.ops:   528             op = operator_functions.get(op)   529    530             invocations.append(compiler.ast.CallFunc(   531                 compiler.ast.Name("$op%s" % op),   532                 [last, op_node]))   533    534             last = op_node   535    536         if len(invocations) > 1:   537             result = compiler.ast.And(invocations)   538         else:   539             result = invocations[0]   540    541         return self.process_structure_node(result)   542    543     def process_dict_node(self, node):   544    545         """   546         Process the given dictionary 'node', returning a list of (key, value)   547         tuples.   548         """   549    550         l = []   551         for key, value in node.items:   552             l.append((   553                 self.process_structure_node(key),   554                 self.process_structure_node(value)))   555         return l   556    557     def process_for_node(self, n):   558    559         """   560         Generate attribute accesses for {n.list}.__iter__ and the next method on   561         the iterator, producing a replacement node for the original.   562         """   563    564         node = compiler.ast.Stmt([   565    566             # <next> = {n.list}.__iter__().next   567    568             compiler.ast.Assign(   569                 [compiler.ast.AssName(self.get_iterator_name(), "OP_ASSIGN")],   570                 compiler.ast.Getattr(   571                     compiler.ast.CallFunc(   572                         compiler.ast.Getattr(n.list, "__iter__"),   573                         []   574                         ), "next")),   575    576             # try:   577             #     while True:   578             #         <var>... = <next>()   579             #         ...   580             # except StopIteration:   581             #     pass   582    583             compiler.ast.TryExcept(   584                 compiler.ast.While(   585                     compiler.ast.Name("True"),   586                     compiler.ast.Stmt([   587                         compiler.ast.Assign(   588                             [n.assign],   589                             compiler.ast.CallFunc(   590                                 compiler.ast.Name(self.get_iterator_name()),   591                                 []   592                                 )),   593                         n.body]),   594                     None),   595                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   596                 None)   597             ])   598    599         self.next_iterator()   600         self.process_structure_node(node)   601    602     def process_literal_sequence_node(self, n, name, ref, cls):   603    604         """   605         Process the given literal sequence node 'n' as a function invocation,   606         with 'name' indicating the type of the sequence, and 'ref' being a   607         reference to the type. The 'cls' is used to instantiate a suitable name   608         reference.   609         """   610    611         if name == "dict":   612             items = []   613             for key, value in n.items:   614                 items.append(compiler.ast.Tuple([key, value]))   615         else: # name in ("list", "tuple"):   616             items = n.nodes   617    618         return self.get_literal_reference(name, ref, items, cls)   619    620     def process_operator_node(self, n):   621    622         """   623         Process the given operator node 'n' as an operator function invocation.   624         """   625    626         op = operator_functions[n.__class__.__name__]   627         invocation = compiler.ast.CallFunc(   628             compiler.ast.Name("$op%s" % op),   629             list(n.getChildNodes())   630             )   631         return self.process_structure_node(invocation)   632    633     def process_print_node(self, n):   634    635         """   636         Process the given print node 'n' as an invocation on a stream of the   637         form...   638    639         $print(dest, args, nl)   640    641         The special function name will be translated elsewhere.   642         """   643    644         nl = isinstance(n, compiler.ast.Printnl)   645         invocation = compiler.ast.CallFunc(   646             compiler.ast.Name("$print"),   647             [n.dest or compiler.ast.Name("None"),   648              compiler.ast.List(list(n.nodes)),   649              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   650             )   651         return self.process_structure_node(invocation)   652    653     def process_slice_node(self, n, expr=None):   654    655         """   656         Process the given slice node 'n' as an operator function invocation.   657         """   658    659         if n.flags == "OP_ASSIGN": op = "setslice"   660         elif n.flags == "OP_DELETE": op = "delslice"   661         else: op = "getslice"   662    663         invocation = compiler.ast.CallFunc(   664             compiler.ast.Name("$op%s" % op),   665             [n.expr, n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   666                 (expr and [expr] or [])   667             )   668    669         # Fix parse tree structure.   670    671         if op == "delslice":   672             invocation = compiler.ast.Discard(invocation)   673    674         return self.process_structure_node(invocation)   675    676     def process_sliceobj_node(self, n):   677    678         """   679         Process the given slice object node 'n' as a slice constructor.   680         """   681    682         op = "slice"   683         invocation = compiler.ast.CallFunc(   684             compiler.ast.Name("$op%s" % op),   685             n.nodes   686             )   687         return self.process_structure_node(invocation)   688    689     def process_subscript_node(self, n, expr=None):   690    691         """   692         Process the given subscript node 'n' as an operator function invocation.   693         """   694    695         if n.flags == "OP_ASSIGN": op = "setitem"   696         elif n.flags == "OP_DELETE": op = "delitem"   697         else: op = "getitem"   698    699         invocation = compiler.ast.CallFunc(   700             compiler.ast.Name("$op%s" % op),   701             [n.expr] + list(n.subs) + (expr and [expr] or [])   702             )   703    704         # Fix parse tree structure.   705    706         if op == "delitem":   707             invocation = compiler.ast.Discard(invocation)   708    709         return self.process_structure_node(invocation)   710    711     def process_attribute_chain(self, n):   712    713         """   714         Process the given attribute access node 'n'. Return a reference   715         describing the expression.   716         """   717    718         # AssAttr/Getattr are nested with the outermost access being the last   719         # access in any chain.   720    721         self.attrs.insert(0, n.attrname)   722         attrs = self.attrs   723    724         # Break attribute chains where non-access nodes are found.   725    726         if not self.have_access_expression(n):   727             self.reset_attribute_chain()   728    729         # Descend into the expression, extending backwards any existing chain,   730         # or building another for the expression.   731    732         name_ref = self.process_structure_node(n.expr)   733    734         # Restore chain information applying to this node.   735    736         if not self.have_access_expression(n):   737             self.restore_attribute_chain(attrs)   738    739         # Return immediately if the expression was another access and thus a   740         # continuation backwards along the chain. The above processing will   741         # have followed the chain all the way to its conclusion.   742    743         if self.have_access_expression(n):   744             del self.attrs[0]   745    746         return name_ref   747    748     # Attribute chain handling.   749    750     def reset_attribute_chain(self):   751    752         "Reset the attribute chain for a subexpression of an attribute access."   753    754         self.attrs = []   755         self.chain_assignment.append(self.in_assignment)   756         self.chain_invocation.append(self.in_invocation)   757         self.in_assignment = None   758         self.in_invocation = None   759    760     def restore_attribute_chain(self, attrs):   761    762         "Restore the attribute chain for an attribute access."   763    764         self.attrs = attrs   765         self.in_assignment = self.chain_assignment.pop()   766         self.in_invocation = self.chain_invocation.pop()   767    768     def have_access_expression(self, node):   769    770         "Return whether the expression associated with 'node' is Getattr."   771    772         return isinstance(node.expr, compiler.ast.Getattr)   773    774     def get_name_for_tracking(self, name, name_ref=None):   775    776         """   777         Return the name to be used for attribute usage observations involving   778         the given 'name' in the current namespace.   779    780         If the name is being used outside a function, and if 'name_ref' is   781         given, a path featuring the name in the global namespace is returned   782         where 'name_ref' indicates a global. Otherwise, a path computed using   783         the current namespace and the given name is returned.   784    785         The intention of this method is to provide a suitably-qualified name   786         that can be tracked across namespaces. Where globals are being   787         referenced in class namespaces, they should be referenced using their   788         path within the module, not using a path within each class.   789    790         It may not be possible to identify a global within a function at the   791         time of inspection (since a global may appear later in a file).   792         Consequently, globals are identified by their local name rather than   793         their module-qualified path.   794         """   795    796         # For functions, use the appropriate local names.   797    798         if self.in_function:   799             return name   800    801         # For global names outside functions, use a global name.   802    803         elif name_ref and name_ref.is_global_name():   804             return self.get_global_path(name)   805    806         # Otherwise, establish a name in the current namespace.   807    808         else:   809             return self.get_object_path(name)   810    811     def get_path_for_access(self):   812    813         "Outside functions, register accesses at the module level."   814    815         if not self.in_function:   816             return self.name   817         else:   818             return self.get_namespace_path()   819    820     def get_module_name(self, node):   821    822         """   823         Using the given From 'node' in this module, calculate any relative import   824         information, returning a tuple containing a module to import along with any   825         names to import based on the node's name information.   826    827         Where the returned module is given as None, whole module imports should   828         be performed for the returned modules using the returned names.   829         """   830    831         # Absolute import.   832    833         if node.level == 0:   834             return node.modname, node.names   835    836         # Relative to an ancestor of this module.   837    838         else:   839             path = self.name.split(".")   840             level = node.level   841    842             # Relative imports treat package roots as submodules.   843    844             if split(self.filename)[-1] == "__init__.py":   845                 level -= 1   846    847             if level > len(path):   848                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   849                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   850    851             basename = ".".join(path[:len(path)-level])   852    853         # Name imports from a module.   854    855         if node.modname:   856             return "%s.%s" % (basename, node.modname), node.names   857    858         # Relative whole module imports.   859    860         else:   861             return basename, node.names   862    863 def get_argnames(args):   864    865     """   866     Return a list of all names provided by 'args'. Since tuples may be   867     employed, the arguments are traversed depth-first.   868     """   869    870     l = []   871     for arg in args:   872         if isinstance(arg, tuple):   873             l += get_argnames(arg)   874         else:   875             l.append(arg)   876     return l   877    878 def get_names_from_nodes(nodes):   879    880     """   881     Return the names employed in the given 'nodes' along with the number of   882     nodes excluding sequences.   883     """   884    885     names = set()   886     count = 0   887    888     for node in nodes:   889    890         # Add names and count them.   891    892         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   893             names.add(node.name)   894             count += 1   895    896         # Add names from sequences and incorporate their counts.   897    898         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   899                                compiler.ast.List, compiler.ast.Set,   900                                compiler.ast.Tuple)):   901             _names, _count = get_names_from_nodes(node.nodes)   902             names.update(_names)   903             count += _count   904    905         # Count non-name, non-sequence nodes.   906    907         else:   908             count += 1   909    910     return names, count   911    912 # Result classes.   913    914 class InstructionSequence:   915    916     "A generic sequence of instructions."   917    918     def __init__(self, instructions):   919         self.instructions = instructions   920    921     def get_value_instruction(self):   922         return self.instructions[-1]   923    924     def get_init_instructions(self):   925         return self.instructions[:-1]   926    927 # Dictionary utilities.   928    929 def init_item(d, key, fn):   930    931     """   932     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial   933     value where no entry already exists.   934     """   935    936     if not d.has_key(key):   937         d[key] = fn()   938     return d[key]   939    940 def dict_for_keys(d, keys):   941    942     "Return a new dictionary containing entries from 'd' for the given 'keys'."   943    944     nd = {}   945     for key in keys:   946         if d.has_key(key):   947             nd[key] = d[key]   948     return nd   949    950 def make_key(s):   951    952     "Make sequence 's' into a tuple-based key, first sorting its contents."   953    954     l = list(s)   955     l.sort()   956     return tuple(l)   957    958 def add_counter_item(d, key):   959    960     """   961     Make a mapping in 'd' for 'key' to the number of keys added before it, thus   962     maintaining a mapping of keys to their order of insertion.   963     """   964    965     if not d.has_key(key):   966         d[key] = len(d.keys())   967     return d[key]    968    969 def remove_items(d1, d2):   970    971     "Remove from 'd1' all items from 'd2'."   972    973     for key in d2.keys():   974         if d1.has_key(key):   975             del d1[key]   976    977 # Set utilities.   978    979 def first(s):   980     return list(s)[0]   981    982 def same(s1, s2):   983     return set(s1) == set(s2)   984    985 # General input/output.   986    987 def readfile(filename):   988    989     "Return the contents of 'filename'."   990    991     f = open(filename)   992     try:   993         return f.read()   994     finally:   995         f.close()   996    997 def writefile(filename, s):   998    999     "Write to 'filename' the string 's'."  1000   1001     f = open(filename, "w")  1002     try:  1003         f.write(s)  1004     finally:  1005         f.close()  1006   1007 # General encoding.  1008   1009 def sorted_output(x):  1010   1011     "Sort sequence 'x' and return a string with commas separating the values."  1012   1013     x = map(str, x)  1014     x.sort()  1015     return ", ".join(x)  1016   1017 def get_string_details(literals, encoding):  1018   1019     """  1020     Determine whether 'literals' represent Unicode strings or byte strings,  1021     using 'encoding' to reproduce byte sequences.  1022   1023     Each literal is the full program representation including prefix and quotes  1024     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1025     string needs to be translated back to its original encoding.  1026   1027     Return a single encoded literal value, a type name, and the original  1028     encoding as a tuple.  1029     """  1030   1031     typename = "unicode"  1032   1033     l = []  1034   1035     for s in literals:  1036         out, _typename = get_literal_details(s)  1037         if _typename == "str":  1038             typename = "str"  1039         l.append(out)  1040   1041     out = "".join(l)  1042   1043     # For Unicode values, convert to the UTF-8 program representation.  1044   1045     if typename == "unicode":  1046         return out.encode("utf-8"), typename, encoding  1047   1048     # For byte string values, convert back to the original encoding.  1049   1050     else:  1051         return out.encode(encoding), typename, encoding  1052   1053 def get_literal_details(s):  1054   1055     """  1056     Determine whether 's' represents a Unicode string or a byte string, where  1057     's' contains the full program representation of a literal including prefix  1058     and quotes, recoded by the parser to UTF-8.  1059   1060     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1061     and byte or Unicode values starting with <backslash><octal digit> or  1062     <backslash>x.  1063   1064     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1065     to be considered as Unicode values. Otherwise, they produce byte values and  1066     cause unprefixed strings to be considered as byte strings.  1067   1068     Literals prefixed with "r" do not have their backslash-encoded values  1069     converted unless also prefixed with "u", in which case only the above value  1070     formats are converted, not any of the other special sequences for things  1071     like newlines.  1072   1073     Return the literal value as a Unicode object together with the appropriate  1074     type name in a tuple.  1075     """  1076   1077     l = []  1078   1079     # Identify the quote character and use it to identify the prefix.  1080   1081     quote_type = s[-1]  1082     prefix_end = s.find(quote_type)  1083     prefix = s[:prefix_end].lower()  1084   1085     if prefix not in ("", "b", "br", "r", "u", "ur"):  1086         raise ValueError, "String literal does not have a supported prefix: %s" % s  1087   1088     if "b" in prefix:  1089         typename = "str"  1090     else:  1091         typename = "unicode"  1092   1093     # Identify triple quotes or single quotes.  1094   1095     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1096         quote = s[prefix_end:prefix_end+3]  1097         current = prefix_end + 3  1098         end = len(s) - 3  1099     else:  1100         quote = s[prefix_end]  1101         current = prefix_end + 1  1102         end = len(s) - 1  1103   1104     # Conversions of some quoted values.  1105   1106     searches = {  1107         "u" : (6, 16),  1108         "U" : (10, 16),  1109         "x" : (4, 16),  1110         }  1111   1112     octal_digits = map(str, range(0, 8))  1113   1114     # Translations of some quoted values.  1115   1116     escaped = {  1117         "\\" : "\\", "'" : "'", '"' : '"',  1118         "a" : "\a", "b" : "\b", "f" : "\f",  1119         "n" : "\n", "r" : "\r", "t" : "\t",  1120         }  1121   1122     while current < end:  1123   1124         # Look for quoted values.  1125   1126         index = s.find("\\", current)  1127         if index == -1 or index + 1 == end:  1128             l.append(s[current:end])  1129             break  1130   1131         # Add the preceding text.  1132   1133         l.append(s[current:index])  1134   1135         # Handle quoted text.  1136   1137         term = s[index+1]  1138   1139         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1140         # produce Unicode values.  1141   1142         if typename == "unicode" and (  1143             term in ("u", "U") or   1144             "u" in prefix and (term == "x" or term in octal_digits)):  1145   1146             needed, base = searches.get(term, (4, 8))  1147             value = convert_quoted_value(s, index, needed, end, base, unichr)  1148             l.append(value)  1149             current = index + needed  1150   1151         # Add raw byte values, changing the string type.  1152   1153         elif "r" not in prefix and (  1154              term == "x" or term in octal_digits):  1155   1156             needed, base = searches.get(term, (4, 8))  1157             value = convert_quoted_value(s, index, needed, end, base, chr)  1158             l.append(value)  1159             typename = "str"  1160             current = index + needed  1161   1162         # Add other escaped values.  1163   1164         elif "r" not in prefix and escaped.has_key(term):  1165             l.append(escaped[term])  1166             current = index + 2  1167   1168         # Add other text as found.  1169   1170         else:  1171             l.append(s[index:index+2])  1172             current = index + 2  1173   1174     # Collect the components into a single Unicode object. Since the literal  1175     # text was already in UTF-8 form, interpret plain strings as UTF-8  1176     # sequences.  1177   1178     out = []  1179   1180     for value in l:  1181         if isinstance(value, unicode):  1182             out.append(value)  1183         else:  1184             out.append(unicode(value, "utf-8"))  1185   1186     return "".join(out), typename  1187   1188 def convert_quoted_value(s, index, needed, end, base, fn):  1189   1190     """  1191     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1192     positions, and with the given 'end' indicating the first position after the  1193     end of the actual string content.  1194   1195     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1196     to convert the value to an appropriate type.  1197     """  1198   1199     s = s[index:min(index+needed, end)]  1200   1201     # Not a complete occurrence.  1202   1203     if len(s) < needed:  1204         return s  1205   1206     # Test for a well-formed value.  1207   1208     try:  1209         first = base == 8 and 1 or 2  1210         value = int(s[first:needed], base)  1211     except ValueError:  1212         return s  1213     else:  1214         return fn(value)  1215   1216 # Attribute chain decoding.  1217   1218 def get_attrnames(attrnames):  1219   1220     """  1221     Split the qualified attribute chain 'attrnames' into its components,  1222     handling special attributes starting with "#" that indicate type  1223     conformance.  1224     """  1225   1226     if attrnames.startswith("#"):  1227         return [attrnames]  1228     else:  1229         return attrnames.split(".")  1230   1231 def get_attrname_from_location(location):  1232   1233     """  1234     Extract the first attribute from the attribute names employed in a  1235     'location'.  1236     """  1237   1238     path, name, attrnames, access = location  1239     if not attrnames:  1240         return attrnames  1241     return get_attrnames(attrnames)[0]  1242   1243 def get_name_path(path, name):  1244   1245     "Return a suitable qualified name from the given 'path' and 'name'."  1246   1247     if "." in name:  1248         return name  1249     else:  1250         return "%s.%s" % (path, name)  1251   1252 # Usage-related functions.  1253   1254 def get_types_for_usage(attrnames, objects):  1255   1256     """  1257     Identify the types that can support the given 'attrnames', using the  1258     given 'objects' as the catalogue of type details.  1259     """  1260   1261     types = []  1262     for name, _attrnames in objects.items():  1263         if set(attrnames).issubset(_attrnames):  1264             types.append(name)  1265     return types  1266   1267 def get_invoked_attributes(usage):  1268   1269     "Obtain invoked attribute from the given 'usage'."  1270   1271     invoked = []  1272     if usage:  1273         for attrname, invocation, assignment in usage:  1274             if invocation:  1275                 invoked.append(attrname)  1276     return invoked  1277   1278 def get_assigned_attributes(usage):  1279   1280     "Obtain assigned attribute from the given 'usage'."  1281   1282     assigned = []  1283     if usage:  1284         for attrname, invocation, assignment in usage:  1285             if assignment:  1286                 assigned.append(attrname)  1287     return assigned  1288   1289 # Type and module functions.  1290 # NOTE: This makes assumptions about the __builtins__ structure.  1291   1292 def get_builtin_module(name):  1293   1294     "Return the module name containing the given type 'name'."  1295   1296     if name == "string":  1297         modname = "str"  1298     elif name == "utf8string":  1299         modname = "unicode"  1300     elif name == "NoneType":  1301         modname = "none"  1302     else:  1303         modname = name  1304   1305     return "__builtins__.%s" % modname  1306   1307 def get_builtin_type(name):  1308   1309     "Return the type name provided by the given Python value 'name'."  1310   1311     if name == "str":  1312         return "string"  1313     elif name == "unicode":  1314         return "utf8string"  1315     else:  1316         return name  1317   1318 def get_builtin_class(name):  1319   1320     "Return the full name of the built-in class having the given 'name'."  1321   1322     typename = get_builtin_type(name)  1323     module = get_builtin_module(typename)  1324     return "%s.%s" % (module, typename)  1325   1326 # Useful data.  1327   1328 predefined_constants = "False", "None", "NotImplemented", "True"  1329   1330 operator_functions = {  1331   1332     # Fundamental operations.  1333   1334     "is" : "is_",  1335     "is not" : "is_not",  1336   1337     # Binary operations.  1338   1339     "in" : "in_",  1340     "not in" : "not_in",  1341     "Add" : "add",  1342     "Bitand" : "and_",  1343     "Bitor" : "or_",  1344     "Bitxor" : "xor",  1345     "Div" : "div",  1346     "FloorDiv" : "floordiv",  1347     "LeftShift" : "lshift",  1348     "Mod" : "mod",  1349     "Mul" : "mul",  1350     "Power" : "pow",  1351     "RightShift" : "rshift",  1352     "Sub" : "sub",  1353   1354     # Unary operations.  1355   1356     "Invert" : "invert",  1357     "UnaryAdd" : "pos",  1358     "UnarySub" : "neg",  1359   1360     # Augmented assignment.  1361   1362     "+=" : "iadd",  1363     "-=" : "isub",  1364     "*=" : "imul",  1365     "/=" : "idiv",  1366     "//=" : "ifloordiv",  1367     "%=" : "imod",  1368     "**=" : "ipow",  1369     "<<=" : "ilshift",  1370     ">>=" : "irshift",  1371     "&=" : "iand",  1372     "^=" : "ixor",  1373     "|=" : "ior",  1374   1375     # Comparisons.  1376   1377     "==" : "eq",  1378     "!=" : "ne",  1379     "<" : "lt",  1380     "<=" : "le",  1381     ">=" : "ge",  1382     ">" : "gt",  1383     }  1384   1385 # vim: tabstop=4 expandtab shiftwidth=4