Lichen

common.py

844:ea1e1157afc4
2018-07-05 Paul Boddie Merged changes from the default branch. tuple-optimisations
     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     # Namespace management.   190    191     def enter_namespace(self, name):   192    193         "Enter the namespace having the given 'name'."   194    195         self.namespace_path.append(name)   196    197     def exit_namespace(self):   198    199         "Exit the current namespace."   200    201         self.namespace_path.pop()   202    203     # Constant reference naming.   204    205     def get_constant_name(self, value, value_type, encoding=None):   206    207         """   208         Add a new constant to the current namespace for 'value' with   209         'value_type'.   210         """   211    212         path = self.get_namespace_path()   213         init_item(self.constants, path, dict)   214         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   215    216     # Literal reference naming.   217    218     def get_literal_name(self):   219    220         "Add a new literal to the current namespace."   221    222         path = self.get_namespace_path()   223         init_item(self.literals, path, lambda: 0)   224         return "$C%d" % self.literals[path]   225    226     def next_literal(self):   227         self.literals[self.get_namespace_path()] += 1   228    229     # Temporary variable naming.   230    231     def get_temporary_name(self):   232         path = self.get_namespace_path()   233         init_item(self.temp, path, lambda: 0)   234         return "$t%d" % self.temp[path]   235    236     def next_temporary(self):   237         self.temp[self.get_namespace_path()] += 1   238    239     # Arbitrary function naming.   240    241     def get_lambda_name(self):   242         path = self.get_namespace_path()   243         init_item(self.lambdas, path, lambda: 0)   244         name = "$l%d" % self.lambdas[path]   245         self.lambdas[path] += 1   246         return name   247    248     def reset_lambdas(self):   249         self.lambdas = {}   250    251     # Constant and literal recording.   252    253     def get_constant_value(self, value, literals=None):   254    255         """   256         Encode the 'value' if appropriate, returning a value, a typename and any   257         encoding.   258         """   259    260         if isinstance(value, unicode):   261             return value.encode("utf-8"), "unicode", self.encoding   262    263         # Attempt to convert plain strings to text.   264    265         elif isinstance(value, str) and self.encoding:   266             try:   267                 return get_string_details(literals, self.encoding)   268             except UnicodeDecodeError:   269                 pass   270    271         return value, value.__class__.__name__, None   272    273     def get_constant_reference(self, ref, value, encoding=None):   274    275         """   276         Return a constant reference for the given 'ref' type and 'value', with   277         the optional 'encoding' applying to text values.   278         """   279    280         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   281    282         # Return a reference for the constant.   283    284         objpath = self.get_object_path(constant_name)   285         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   286    287         # Record the value and type for the constant.   288    289         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   290         return name_ref   291    292     def reserve_constant(self, objpath, value, origin, encoding=None):   293    294         """   295         Reserve a constant within 'objpath' with the given 'value' and having a   296         type with the given 'origin', with the optional 'encoding' applying to   297         text values.   298         """   299    300         constant_name = self.get_constant_name(value, origin)   301         objpath = self.get_object_path(constant_name)   302         self._reserve_constant(objpath, value, origin, encoding)   303    304     def _reserve_constant(self, objpath, value, origin, encoding):   305    306         """   307         Store a constant for 'objpath' with the given 'value' and 'origin', with   308         the optional 'encoding' applying to text values.   309         """   310    311         self.constant_values[objpath] = value, origin, encoding   312    313     def get_literal_reference(self, name, ref, items, cls):   314    315         """   316         Return a literal reference for the given type 'name', literal 'ref',   317         node 'items' and employing the given 'cls' as the class of the returned   318         reference object.   319         """   320    321         # Construct an invocation using the items as arguments.   322    323         typename = "$L%s" % name   324    325         invocation = compiler.ast.CallFunc(   326             compiler.ast.Name(typename),   327             items   328             )   329    330         # Get a name for the actual literal.   331    332         instname = self.get_literal_name()   333         self.next_literal()   334    335         # Record the type for the literal.   336    337         objpath = self.get_object_path(instname)   338         self.literal_types[objpath] = ref.get_origin()   339    340         # Return a wrapper for the invocation exposing the items.   341    342         return cls(   343             instname,   344             ref.instance_of(),   345             self.process_structure_node(invocation),   346             invocation.args   347             )   348    349     # Node handling.   350    351     def process_structure(self, node):   352    353         """   354         Within the given 'node', process the program structure.   355    356         During inspection, this will process global declarations, adjusting the   357         module namespace, and import statements, building a module dependency   358         hierarchy.   359    360         During translation, this will consult deduced program information and   361         output translated code.   362         """   363    364         l = []   365         for n in node.getChildNodes():   366             l.append(self.process_structure_node(n))   367         return l   368    369     def process_augassign_node(self, n):   370    371         "Process the given augmented assignment node 'n'."   372    373         op = operator_functions[n.op]   374    375         if isinstance(n.node, compiler.ast.Getattr):   376             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   377         elif isinstance(n.node, compiler.ast.Name):   378             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   379         else:   380             target = n.node   381    382         assignment = compiler.ast.Assign(   383             [target],   384             compiler.ast.CallFunc(   385                 compiler.ast.Name("$op%s" % op),   386                 [n.node, n.expr]))   387    388         return self.process_structure_node(assignment)   389    390     def process_assignment_for_object(self, original_name, source):   391    392         """   393         Return an assignment operation making 'original_name' refer to the given   394         'source'.   395         """   396    397         assignment = compiler.ast.Assign(   398             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   399             source   400             )   401    402         return self.process_structure_node(assignment)   403    404     def process_assignment_node_items(self, n, expr):   405    406         """   407         Process the given assignment node 'n' whose children are to be assigned   408         items of 'expr'.   409         """   410    411         name_ref = self.process_structure_node(expr)   412    413         # Either unpack the items and present them directly to each assignment   414         # node.   415    416         if isinstance(name_ref, LiteralSequenceRef) and \   417            self.process_literal_sequence_items(n, name_ref):   418    419             pass   420    421         # Or have the assignment nodes access each item via the sequence API.   422    423         else:   424             self.process_assignment_node_items_by_position(n, expr, name_ref)   425    426     def process_assignment_node_items_by_position(self, n, expr, name_ref):   427    428         """   429         Process the given sequence assignment node 'n', converting the node to   430         the separate assignment of each target using positional access on a   431         temporary variable representing the sequence. Use 'expr' as the assigned   432         value and 'name_ref' as the reference providing any existing temporary   433         variable.   434         """   435    436         statements = []   437    438         # Employ existing names to access the sequence.   439         # Literal sequences do not provide names of accessible objects.   440    441         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   442             temp = name_ref.name   443    444         # For other expressions, create a temporary name to reference the items.   445    446         else:   447             temp = self.get_temporary_name()   448             self.next_temporary()   449    450             statements.append(   451                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   452                 )   453    454         # Generate a test for the length of the expression object.   455    456         statements.append(compiler.ast.Discard(   457             compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"),   458                 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))])))   459    460         # Assign the items to the target nodes.   461    462         for i, node in enumerate(n.nodes):   463             statements.append(   464                 compiler.ast.Assign([node], compiler.ast.CallFunc(   465                     compiler.ast.Getattr(compiler.ast.Name(temp), "__get_single_item_unchecked__"),   466                     [compiler.ast.Const(i, str(i))]))   467                 )   468    469         return self.process_structure_node(compiler.ast.Stmt(statements))   470    471     def process_literal_sequence_items(self, n, name_ref):   472    473         """   474         Process the given assignment node 'n', obtaining from the given   475         'name_ref' the items to be assigned to the assignment targets.   476    477         Return whether this method was able to process the assignment node as   478         a sequence of direct assignments.   479         """   480    481         if len(n.nodes) == len(name_ref.items):   482             assigned_names, count = get_names_from_nodes(n.nodes)   483             accessed_names, _count = get_names_from_nodes(name_ref.items)   484    485             # Only assign directly between items if all assigned names are   486             # plain names (not attribute assignments), and if the assigned names   487             # do not appear in the accessed names.   488    489             if len(assigned_names) == count and \   490                not assigned_names.intersection(accessed_names):   491    492                 for node, item in zip(n.nodes, name_ref.items):   493                     self.process_assignment_node(node, item)   494    495                 return True   496    497             # Otherwise, use the position-based mechanism to obtain values.   498    499             else:   500                 return False   501         else:   502             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   503                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   504    505     def process_compare_node(self, n):   506    507         """   508         Process the given comparison node 'n', converting an operator sequence   509         from...   510    511         <expr1> <op1> <expr2> <op2> <expr3>   512    513         ...to...   514    515         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   516         """   517    518         invocations = []   519         last = n.expr   520    521         for op, op_node in n.ops:   522             op = operator_functions.get(op)   523    524             invocations.append(compiler.ast.CallFunc(   525                 compiler.ast.Name("$op%s" % op),   526                 [last, op_node]))   527    528             last = op_node   529    530         if len(invocations) > 1:   531             result = compiler.ast.And(invocations)   532         else:   533             result = invocations[0]   534    535         return self.process_structure_node(result)   536    537     def process_dict_node(self, node):   538    539         """   540         Process the given dictionary 'node', returning a list of (key, value)   541         tuples.   542         """   543    544         l = []   545         for key, value in node.items:   546             l.append((   547                 self.process_structure_node(key),   548                 self.process_structure_node(value)))   549         return l   550    551     def process_for_node(self, n):   552    553         """   554         Generate attribute accesses for {n.list}.__iter__ and the next method on   555         the iterator, producing a replacement node for the original.   556         """   557    558         t0 = self.get_temporary_name()   559         self.next_temporary()   560         t1 = self.get_temporary_name()   561         self.next_temporary()   562         t2 = self.get_temporary_name()   563         self.next_temporary()   564    565         node = compiler.ast.Stmt([   566    567             # <t0> = {n.list}   568             # <t1> = <t0>.__iter__()   569    570             compiler.ast.Assign(   571                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   572                 n.list),   573    574             compiler.ast.Assign(   575                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   576                 compiler.ast.CallFunc(   577                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   578                     [])),   579    580             # <t2> = <t1>.next   581             # try:   582             #     while True:   583             #         <var>... = <t2>()   584             #         ...   585             # except StopIteration:   586             #     pass   587    588             compiler.ast.Assign(   589                 [compiler.ast.AssName(t2, "OP_ASSIGN")],   590                 compiler.ast.Getattr(compiler.ast.Name(t1), "next")),   591    592             compiler.ast.TryExcept(   593                 compiler.ast.While(   594                     compiler.ast.Name("True"),   595                     compiler.ast.Stmt([   596                         compiler.ast.Assign(   597                             [n.assign],   598                             compiler.ast.CallFunc(   599                                 compiler.ast.Name(t2),   600                                 []   601                                 )),   602                         n.body]),   603                     None),   604                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   605                 None)   606             ])   607    608         self.process_structure_node(node)   609    610     def process_literal_sequence_node(self, n, name, ref, cls):   611    612         """   613         Process the given literal sequence node 'n' as a function invocation,   614         with 'name' indicating the type of the sequence, and 'ref' being a   615         reference to the type. The 'cls' is used to instantiate a suitable name   616         reference.   617         """   618    619         if name == "dict":   620             items = []   621             for key, value in n.items:   622                 items.append(compiler.ast.Tuple([key, value]))   623         else: # name in ("list", "tuple"):   624             items = n.nodes   625    626         return self.get_literal_reference(name, ref, items, cls)   627    628     def process_operator_node(self, n):   629    630         """   631         Process the given operator node 'n' as an operator function invocation.   632         """   633    634         opname = n.__class__.__name__   635         operands = n.getChildNodes()   636    637         # Convert a unary operation to an invocation.   638    639         op = unary_operator_functions.get(opname)   640    641         if op:   642             invocation = compiler.ast.CallFunc(   643                 compiler.ast.Name("$op%s" % op),   644                 [operands[0]]   645                 )   646    647         # Convert a single operator with a list of operands to a combination of   648         # pairwise operations.   649    650         else:   651             op = operator_functions[opname]   652             invocation = self._process_operator_node(op, operands)   653    654         return self.process_structure_node(invocation)   655    656     def _process_operator_node(self, op, operands):   657    658         """   659         Process the given 'op', being an operator function, together with the   660         supplied 'operands', returning either a single remaining operand or an   661         invocation combining the operands.   662         """   663    664         remaining = operands[1:]   665         if not remaining:   666             return operands[0]   667    668         return compiler.ast.CallFunc(   669             compiler.ast.Name("$op%s" % op),   670             [operands[0], self._process_operator_node(op, remaining)]   671             )   672    673     def process_print_node(self, n):   674    675         """   676         Process the given print node 'n' as an invocation on a stream of the   677         form...   678    679         $print(dest, args, nl)   680    681         The special function name will be translated elsewhere.   682         """   683    684         nl = isinstance(n, compiler.ast.Printnl)   685         invocation = compiler.ast.CallFunc(   686             compiler.ast.Name("$print"),   687             [n.dest or compiler.ast.Name("None"),   688              compiler.ast.List(list(n.nodes)),   689              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   690             )   691         return self.process_structure_node(invocation)   692    693     def process_slice_node(self, n, expr=None):   694    695         """   696         Process the given slice node 'n' as a method invocation.   697         """   698    699         if n.flags == "OP_ASSIGN": op = "__setslice__"   700         elif n.flags == "OP_DELETE": op = "__delslice__"   701         else: op = "__getslice__"   702    703         invocation = compiler.ast.CallFunc(   704             compiler.ast.Getattr(n.expr, op),   705             [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   706                 (expr and [expr] or [])   707             )   708    709         # Fix parse tree structure.   710    711         if op == "__delslice__":   712             invocation = compiler.ast.Discard(invocation)   713    714         return self.process_structure_node(invocation)   715    716     def process_sliceobj_node(self, n):   717    718         """   719         Process the given slice object node 'n' as a slice constructor.   720         """   721    722         op = "slice"   723         invocation = compiler.ast.CallFunc(   724             compiler.ast.Name("$op%s" % op),   725             n.nodes   726             )   727         return self.process_structure_node(invocation)   728    729     def process_subscript_node(self, n, expr=None):   730    731         """   732         Process the given subscript node 'n' as a method invocation.   733         """   734    735         if n.flags == "OP_ASSIGN": op = "__setitem__"   736         elif n.flags == "OP_DELETE": op = "__delitem__"   737         else: op = "__getitem__"   738    739         invocation = compiler.ast.CallFunc(   740             compiler.ast.Getattr(n.expr, op),   741             list(n.subs) + (expr and [expr] or [])   742             )   743    744         # Fix parse tree structure.   745    746         if op == "__delitem__":   747             invocation = compiler.ast.Discard(invocation)   748    749         return self.process_structure_node(invocation)   750    751     def process_attribute_chain(self, n):   752    753         """   754         Process the given attribute access node 'n'. Return a reference   755         describing the expression.   756         """   757    758         # AssAttr/Getattr are nested with the outermost access being the last   759         # access in any chain.   760    761         self.attrs.insert(0, n.attrname)   762         attrs = self.attrs   763    764         # Break attribute chains where non-access nodes are found.   765    766         if not self.have_access_expression(n):   767             self.reset_attribute_chain()   768    769         # Descend into the expression, extending backwards any existing chain,   770         # or building another for the expression.   771    772         name_ref = self.process_structure_node(n.expr)   773    774         # Restore chain information applying to this node.   775    776         if not self.have_access_expression(n):   777             self.restore_attribute_chain(attrs)   778    779         # Return immediately if the expression was another access and thus a   780         # continuation backwards along the chain. The above processing will   781         # have followed the chain all the way to its conclusion.   782    783         if self.have_access_expression(n):   784             del self.attrs[0]   785    786         return name_ref   787    788     # Attribute chain handling.   789    790     def reset_attribute_chain(self):   791    792         "Reset the attribute chain for a subexpression of an attribute access."   793    794         self.attrs = []   795         self.chain_assignment.append(self.in_assignment)   796         self.chain_invocation.append(self.in_invocation)   797         self.in_assignment = None   798         self.in_invocation = None   799    800     def restore_attribute_chain(self, attrs):   801    802         "Restore the attribute chain for an attribute access."   803    804         self.attrs = attrs   805         self.in_assignment = self.chain_assignment.pop()   806         self.in_invocation = self.chain_invocation.pop()   807    808     def have_access_expression(self, node):   809    810         "Return whether the expression associated with 'node' is Getattr."   811    812         return isinstance(node.expr, compiler.ast.Getattr)   813    814     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   815    816         """   817         Return the name to be used for attribute usage observations involving   818         the given 'name' in the current namespace.   819    820         If the name is being used outside a function, and if 'name_ref' is   821         given and indicates a global or if 'is_global' is specified as a true   822         value, a path featuring the name in the global namespace is returned.   823         Otherwise, a path computed using the current namespace and the given   824         name is returned.   825    826         The intention of this method is to provide a suitably-qualified name   827         that can be tracked across namespaces. Where globals are being   828         referenced in class namespaces, they should be referenced using their   829         path within the module, not using a path within each class.   830    831         It may not be possible to identify a global within a function at the   832         time of inspection (since a global may appear later in a file).   833         Consequently, globals are identified by their local name rather than   834         their module-qualified path.   835         """   836    837         # For functions, use the appropriate local names.   838    839         if self.in_function:   840             return name   841    842         # For global names outside functions, use a global name.   843    844         elif is_global or name_ref and name_ref.is_global_name():   845             return self.get_global_path(name)   846    847         # Otherwise, establish a name in the current namespace.   848    849         else:   850             return self.get_object_path(name)   851    852     def get_path_for_access(self):   853    854         "Outside functions, register accesses at the module level."   855    856         if not self.in_function:   857             return self.name   858         else:   859             return self.get_namespace_path()   860    861     def get_module_name(self, node):   862    863         """   864         Using the given From 'node' in this module, calculate any relative import   865         information, returning a tuple containing a module to import along with any   866         names to import based on the node's name information.   867    868         Where the returned module is given as None, whole module imports should   869         be performed for the returned modules using the returned names.   870         """   871    872         # Absolute import.   873    874         if node.level == 0:   875             return node.modname, node.names   876    877         # Relative to an ancestor of this module.   878    879         else:   880             path = self.name.split(".")   881             level = node.level   882    883             # Relative imports treat package roots as submodules.   884    885             if split(self.filename)[-1] == "__init__.py":   886                 level -= 1   887    888             if level > len(path):   889                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   890                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   891    892             basename = ".".join(path[:len(path)-level])   893    894         # Name imports from a module.   895    896         if node.modname:   897             return "%s.%s" % (basename, node.modname), node.names   898    899         # Relative whole module imports.   900    901         else:   902             return basename, node.names   903    904 def get_argnames(args):   905    906     """   907     Return a list of all names provided by 'args'. Since tuples may be   908     employed, the arguments are traversed depth-first.   909     """   910    911     l = []   912     for arg in args:   913         if isinstance(arg, tuple):   914             l += get_argnames(arg)   915         else:   916             l.append(arg)   917     return l   918    919 def get_names_from_nodes(nodes):   920    921     """   922     Return the names employed in the given 'nodes' along with the number of   923     nodes excluding sequences.   924     """   925    926     names = set()   927     count = 0   928    929     for node in nodes:   930    931         # Add names and count them.   932    933         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   934             names.add(node.name)   935             count += 1   936    937         # Add names from sequences and incorporate their counts.   938    939         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   940                                compiler.ast.List, compiler.ast.Set,   941                                compiler.ast.Tuple)):   942             _names, _count = get_names_from_nodes(node.nodes)   943             names.update(_names)   944             count += _count   945    946         # Count non-name, non-sequence nodes.   947    948         else:   949             count += 1   950    951     return names, count   952    953 # Location classes.   954    955 class Location:   956    957     "A generic program location."   958    959     def __init__(self, path, name, attrnames=None, version=None, access_number=None):   960         self.path = path   961         self.name = name   962         self.attrnames = attrnames   963         self.version = version   964         self.access_number = access_number   965    966     def __repr__(self):   967         return "Location(%r, %r, %r, %r, %r)" % self.as_tuple()   968    969     def as_tuple(self):   970         return (self.path, self.name, self.attrnames, self.version, self.access_number)   971    972     def __hash__(self):   973         return hash(self.as_tuple())   974    975     def __eq__(self, other):   976         return self.as_tuple() == other.as_tuple()   977    978     def __cmp__(self, other):   979         return cmp(self.as_tuple(), other.as_tuple())   980    981     def get_attrname(self):   982    983         """   984         Extract the first attribute from the attribute names employed in this   985         location.   986         """   987    988         attrnames = self.attrnames   989         if not attrnames:   990             return attrnames   991         return get_attrnames(attrnames)[0]   992    993 class AccessLocation(Location):   994    995     "A specialised access location."   996    997     def __init__(self, path, name, attrnames, access_number):   998    999         """  1000         Initialise an access location featuring 'path', 'name', 'attrnames' and  1001         'access_number'.  1002         """  1003   1004         Location.__init__(self, path, name, attrnames, None, access_number)  1005   1006     def __repr__(self):  1007         return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number)  1008   1009 # Result classes.  1010   1011 class InstructionSequence:  1012   1013     "A generic sequence of instructions."  1014   1015     def __init__(self, instructions):  1016         self.instructions = instructions  1017   1018     def get_value_instruction(self):  1019         return self.instructions[-1]  1020   1021     def get_init_instructions(self):  1022         return self.instructions[:-1]  1023   1024 # Dictionary utilities.  1025   1026 def init_item(d, key, fn):  1027   1028     """  1029     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial  1030     value where no entry already exists.  1031     """  1032   1033     if not d.has_key(key):  1034         d[key] = fn()  1035     return d[key]  1036   1037 def dict_for_keys(d, keys):  1038   1039     "Return a new dictionary containing entries from 'd' for the given 'keys'."  1040   1041     nd = {}  1042     for key in keys:  1043         if d.has_key(key):  1044             nd[key] = d[key]  1045     return nd  1046   1047 def make_key(s):  1048   1049     "Make sequence 's' into a tuple-based key, first sorting its contents."  1050   1051     l = list(s)  1052     l.sort()  1053     return tuple(l)  1054   1055 def add_counter_item(d, key):  1056   1057     """  1058     Make a mapping in 'd' for 'key' to the number of keys added before it, thus  1059     maintaining a mapping of keys to their order of insertion.  1060     """  1061   1062     if not d.has_key(key):  1063         d[key] = len(d.keys())  1064     return d[key]   1065   1066 def remove_items(d1, d2):  1067   1068     "Remove from 'd1' all items from 'd2'."  1069   1070     for key in d2.keys():  1071         if d1.has_key(key):  1072             del d1[key]  1073   1074 # Set utilities.  1075   1076 def first(s):  1077     return list(s)[0]  1078   1079 def same(s1, s2):  1080     return set(s1) == set(s2)  1081   1082 def order_dependencies(all_depends):  1083   1084     """  1085     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1086     has the form "A depends on B, C...". The result will order A, B, C, and so  1087     on.  1088     """  1089   1090     usage = init_reverse_dependencies(all_depends)  1091   1092     # Produce an ordering by obtaining exposed items (required by items already  1093     # processed) and putting them at the start of the list.  1094   1095     ordered = []  1096   1097     while usage:  1098         have_next = False  1099   1100         for key, n in usage.items():  1101   1102             # Add items needed by no other items to the ordering.  1103   1104             if not n:  1105                 remove_dependency(key, all_depends, usage, ordered)  1106                 have_next = True  1107   1108         if not have_next:  1109             raise ValueError, usage  1110   1111     return ordered  1112   1113 def order_dependencies_partial(all_depends):  1114   1115     """  1116     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1117     has the form "A depends on B, C...". The result will order A, B, C, and so  1118     on. Where cycles exist, they will be broken and a partial ordering returned.  1119     """  1120   1121     usage = init_reverse_dependencies(all_depends)  1122   1123     # Duplicate the dependencies for subsequent modification.  1124   1125     new_depends = {}  1126     for key, values in all_depends.items():  1127         new_depends[key] = set(values)  1128   1129     all_depends = new_depends  1130   1131     # Produce an ordering by obtaining exposed items (required by items already  1132     # processed) and putting them at the start of the list.  1133   1134     ordered = []  1135   1136     while usage:  1137         least = None  1138         least_key = None  1139   1140         for key, n in usage.items():  1141   1142             # Add items needed by no other items to the ordering.  1143   1144             if not n:  1145                 remove_dependency(key, all_depends, usage, ordered)  1146                 least = 0  1147   1148             # When breaking cycles, note the least used items.  1149   1150             elif least is None or len(n) < least:  1151                 least_key = key  1152                 least = len(n)  1153   1154         if least:  1155             transfer_dependencies(least_key, all_depends, usage, ordered)  1156   1157     return ordered  1158   1159 def init_reverse_dependencies(all_depends):  1160   1161     """  1162     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1163     record the reverse dependencies, making a mapping of the form  1164     "B is needed by A", "C is needed by A", and so on.  1165     """  1166   1167     usage = {}  1168   1169     # Record path-based dependencies.  1170   1171     for key in all_depends.keys():  1172         usage[key] = set()  1173   1174     for key, depends in all_depends.items():  1175         for depend in depends:  1176             init_item(usage, depend, set)  1177             usage[depend].add(key)  1178   1179     return usage  1180   1181 def transfer_dependencies(key, all_depends, usage, ordered):  1182   1183     """  1184     Transfer items needed by 'key' to those items needing 'key', found using  1185     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1186     collection of dependencies.  1187   1188     If "A is needed by X" and "B is needed by A", then transferring items needed  1189     by A will cause "B is needed by X" to be recorded as a consequence.  1190   1191     Transferring items also needs to occur in the reverse mapping, so that  1192     "A needs B" and "X needs A", then the consequence must be recorded as  1193     "X needs B".  1194     """  1195   1196     ordered.insert(0, key)  1197   1198     needing = usage[key]                        # A is needed by X  1199     needed = all_depends.get(key)               # A needs B  1200   1201     if needing:  1202         for depend in needing:  1203             l = all_depends.get(depend)  1204             if not l:  1205                 continue  1206   1207             l.remove(key)                       # X needs (A)  1208   1209             if needed:  1210                 l.update(needed)                # X needs B...  1211   1212                 # Prevent self references.  1213   1214                 if depend in needed:  1215                     l.remove(depend)  1216   1217     if needed:  1218         for depend in needed:  1219             l = usage.get(depend)  1220             if not l:  1221                 continue  1222   1223             l.remove(key)                       # B is needed by (A)  1224             l.update(needing)                   # B is needed by X...  1225   1226             # Prevent self references.  1227   1228             if depend in needing:  1229                 l.remove(depend)  1230   1231     if needed:  1232         del all_depends[key]  1233     del usage[key]  1234   1235 def remove_dependency(key, all_depends, usage, ordered):  1236   1237     """  1238     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1239     'ordered' collection of dependencies.  1240   1241     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1242     upon removing A from 'usage', the outcome is that all keys needing A will  1243     have A removed from their 'usage' records.  1244   1245     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1246     as a consequence.  1247     """  1248   1249     ordered.insert(0, key)  1250   1251     depends = all_depends.get(key)  1252   1253     # Reduce usage of the referenced items.  1254   1255     if depends:  1256         for depend in depends:  1257             usage[depend].remove(key)  1258   1259     del usage[key]  1260   1261 # General input/output.  1262   1263 def readfile(filename):  1264   1265     "Return the contents of 'filename'."  1266   1267     f = open(filename)  1268     try:  1269         return f.read()  1270     finally:  1271         f.close()  1272   1273 def writefile(filename, s):  1274   1275     "Write to 'filename' the string 's'."  1276   1277     f = open(filename, "w")  1278     try:  1279         f.write(s)  1280     finally:  1281         f.close()  1282   1283 # General encoding.  1284   1285 def sorted_output(x):  1286   1287     "Sort sequence 'x' and return a string with commas separating the values."  1288   1289     x = map(str, x)  1290     x.sort()  1291     return ", ".join(x)  1292   1293 def get_string_details(literals, encoding):  1294   1295     """  1296     Determine whether 'literals' represent Unicode strings or byte strings,  1297     using 'encoding' to reproduce byte sequences.  1298   1299     Each literal is the full program representation including prefix and quotes  1300     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1301     string needs to be translated back to its original encoding.  1302   1303     Return a single encoded literal value, a type name, and the original  1304     encoding as a tuple.  1305     """  1306   1307     typename = "unicode"  1308   1309     l = []  1310   1311     for s in literals:  1312         out, _typename = get_literal_details(s)  1313         if _typename == "str":  1314             typename = "str"  1315         l.append(out)  1316   1317     out = "".join(l)  1318   1319     # For Unicode values, convert to the UTF-8 program representation.  1320   1321     if typename == "unicode":  1322         return out.encode("utf-8"), typename, encoding  1323   1324     # For byte string values, convert back to the original encoding.  1325   1326     else:  1327         return out.encode(encoding), typename, encoding  1328   1329 def get_literal_details(s):  1330   1331     """  1332     Determine whether 's' represents a Unicode string or a byte string, where  1333     's' contains the full program representation of a literal including prefix  1334     and quotes, recoded by the parser to UTF-8.  1335   1336     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1337     and byte or Unicode values starting with <backslash><octal digit> or  1338     <backslash>x.  1339   1340     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1341     to be considered as Unicode values. Otherwise, they produce byte values and  1342     cause unprefixed strings to be considered as byte strings.  1343   1344     Literals prefixed with "r" do not have their backslash-encoded values  1345     converted unless also prefixed with "u", in which case only the above value  1346     formats are converted, not any of the other special sequences for things  1347     like newlines.  1348   1349     Return the literal value as a Unicode object together with the appropriate  1350     type name in a tuple.  1351     """  1352   1353     l = []  1354   1355     # Identify the quote character and use it to identify the prefix.  1356   1357     quote_type = s[-1]  1358     prefix_end = s.find(quote_type)  1359     prefix = s[:prefix_end].lower()  1360   1361     if prefix not in ("", "b", "br", "r", "u", "ur"):  1362         raise ValueError, "String literal does not have a supported prefix: %s" % s  1363   1364     if "b" in prefix:  1365         typename = "str"  1366     else:  1367         typename = "unicode"  1368   1369     # Identify triple quotes or single quotes.  1370   1371     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1372         quote = s[prefix_end:prefix_end+3]  1373         current = prefix_end + 3  1374         end = len(s) - 3  1375     else:  1376         quote = s[prefix_end]  1377         current = prefix_end + 1  1378         end = len(s) - 1  1379   1380     # Conversions of some quoted values.  1381   1382     searches = {  1383         "u" : (6, 16),  1384         "U" : (10, 16),  1385         "x" : (4, 16),  1386         }  1387   1388     octal_digits = map(str, range(0, 8))  1389   1390     # Translations of some quoted values.  1391   1392     escaped = {  1393         "\\" : "\\", "'" : "'", '"' : '"',  1394         "a" : "\a", "b" : "\b", "f" : "\f",  1395         "n" : "\n", "r" : "\r", "t" : "\t",  1396         }  1397   1398     while current < end:  1399   1400         # Look for quoted values.  1401   1402         index = s.find("\\", current)  1403         if index == -1 or index + 1 == end:  1404             l.append(s[current:end])  1405             break  1406   1407         # Add the preceding text.  1408   1409         l.append(s[current:index])  1410   1411         # Handle quoted text.  1412   1413         term = s[index+1]  1414   1415         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1416         # produce Unicode values.  1417   1418         if typename == "unicode" and (  1419             term in ("u", "U") or   1420             "u" in prefix and (term == "x" or term in octal_digits)):  1421   1422             needed, base = searches.get(term, (4, 8))  1423             value = convert_quoted_value(s, index, needed, end, base, unichr)  1424             l.append(value)  1425             current = index + needed  1426   1427         # Add raw byte values, changing the string type.  1428   1429         elif "r" not in prefix and (  1430              term == "x" or term in octal_digits):  1431   1432             needed, base = searches.get(term, (4, 8))  1433             value = convert_quoted_value(s, index, needed, end, base, chr)  1434             l.append(value)  1435             typename = "str"  1436             current = index + needed  1437   1438         # Add other escaped values.  1439   1440         elif "r" not in prefix and escaped.has_key(term):  1441             l.append(escaped[term])  1442             current = index + 2  1443   1444         # Add other text as found.  1445   1446         else:  1447             l.append(s[index:index+2])  1448             current = index + 2  1449   1450     # Collect the components into a single Unicode object. Since the literal  1451     # text was already in UTF-8 form, interpret plain strings as UTF-8  1452     # sequences.  1453   1454     out = []  1455   1456     for value in l:  1457         if isinstance(value, unicode):  1458             out.append(value)  1459         else:  1460             out.append(unicode(value, "utf-8"))  1461   1462     return "".join(out), typename  1463   1464 def convert_quoted_value(s, index, needed, end, base, fn):  1465   1466     """  1467     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1468     positions, and with the given 'end' indicating the first position after the  1469     end of the actual string content.  1470   1471     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1472     to convert the value to an appropriate type.  1473     """  1474   1475     s = s[index:min(index+needed, end)]  1476   1477     # Not a complete occurrence.  1478   1479     if len(s) < needed:  1480         return s  1481   1482     # Test for a well-formed value.  1483   1484     try:  1485         first = base == 8 and 1 or 2  1486         value = int(s[first:needed], base)  1487     except ValueError:  1488         return s  1489     else:  1490         return fn(value)  1491   1492 # Attribute chain decoding.  1493   1494 def get_attrnames(attrnames):  1495   1496     """  1497     Split the qualified attribute chain 'attrnames' into its components,  1498     handling special attributes starting with "#" that indicate type  1499     conformance.  1500     """  1501   1502     if attrnames.startswith("#"):  1503         return [attrnames]  1504     else:  1505         return attrnames.split(".")  1506   1507 def get_name_path(path, name):  1508   1509     "Return a suitable qualified name from the given 'path' and 'name'."  1510   1511     if "." in name:  1512         return name  1513     else:  1514         return "%s.%s" % (path, name)  1515   1516 # Usage-related functions.  1517   1518 def get_types_for_usage(attrnames, objects):  1519   1520     """  1521     Identify the types that can support the given 'attrnames', using the  1522     given 'objects' as the catalogue of type details.  1523     """  1524   1525     types = []  1526     for name, _attrnames in objects.items():  1527         if set(attrnames).issubset(_attrnames):  1528             types.append(name)  1529     return types  1530   1531 def get_invoked_attributes(usage):  1532   1533     "Obtain invoked attribute from the given 'usage'."  1534   1535     invoked = []  1536     if usage:  1537         for attrname, invocation, assignment in usage:  1538             if invocation:  1539                 invoked.append(attrname)  1540     return invoked  1541   1542 def get_assigned_attributes(usage):  1543   1544     "Obtain assigned attribute from the given 'usage'."  1545   1546     assigned = []  1547     if usage:  1548         for attrname, invocation, assignment in usage:  1549             if assignment:  1550                 assigned.append(attrname)  1551     return assigned  1552   1553 # Type and module functions.  1554 # NOTE: This makes assumptions about the __builtins__ structure.  1555   1556 def get_builtin_module(name):  1557   1558     "Return the module name containing the given type 'name'."  1559   1560     if name == "string":  1561         modname = "str"  1562     elif name == "utf8string":  1563         modname = "unicode"  1564     elif name == "NoneType":  1565         modname = "none"  1566     else:  1567         modname = name  1568   1569     return "__builtins__.%s" % modname  1570   1571 def get_builtin_type(name):  1572   1573     "Return the type name provided by the given Python value 'name'."  1574   1575     if name == "str":  1576         return "string"  1577     elif name == "unicode":  1578         return "utf8string"  1579     else:  1580         return name  1581   1582 def get_builtin_class(name):  1583   1584     "Return the full name of the built-in class having the given 'name'."  1585   1586     typename = get_builtin_type(name)  1587     module = get_builtin_module(typename)  1588     return "%s.%s" % (module, typename)  1589   1590 # Useful data.  1591   1592 predefined_constants = "False", "None", "NotImplemented", "True"  1593   1594 unary_operator_functions = {  1595   1596     # Unary operations.  1597   1598     "Invert" : "invert",  1599     "UnaryAdd" : "pos",  1600     "UnarySub" : "neg",  1601     }  1602   1603 operator_functions = {  1604   1605     # Fundamental operations.  1606   1607     "is" : "is_",  1608     "is not" : "is_not",  1609   1610     # Binary operations.  1611   1612     "in" : "in_",  1613     "not in" : "not_in",  1614     "Add" : "add",  1615     "Bitand" : "and_",  1616     "Bitor" : "or_",  1617     "Bitxor" : "xor",  1618     "Div" : "div",  1619     "FloorDiv" : "floordiv",  1620     "LeftShift" : "lshift",  1621     "Mod" : "mod",  1622     "Mul" : "mul",  1623     "Power" : "pow",  1624     "RightShift" : "rshift",  1625     "Sub" : "sub",  1626   1627     # Augmented assignment.  1628   1629     "+=" : "iadd",  1630     "-=" : "isub",  1631     "*=" : "imul",  1632     "/=" : "idiv",  1633     "//=" : "ifloordiv",  1634     "%=" : "imod",  1635     "**=" : "ipow",  1636     "<<=" : "ilshift",  1637     ">>=" : "irshift",  1638     "&=" : "iand",  1639     "^=" : "ixor",  1640     "|=" : "ior",  1641   1642     # Comparisons.  1643   1644     "==" : "eq",  1645     "!=" : "ne",  1646     "<" : "lt",  1647     "<=" : "le",  1648     ">=" : "ge",  1649     ">" : "gt",  1650     }  1651   1652 operator_functions.update(unary_operator_functions)  1653   1654 # vim: tabstop=4 expandtab shiftwidth=4