Lichen

common.py

852:7c314a9d1724
2018-07-12 Paul Boddie Attempt to reintroduce function acquisition optimisations by extending context tests related to invocations of access results.
     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     # Namespace management.   190    191     def enter_namespace(self, name):   192    193         "Enter the namespace having the given 'name'."   194    195         self.namespace_path.append(name)   196    197     def exit_namespace(self):   198    199         "Exit the current namespace."   200    201         self.namespace_path.pop()   202    203     # Constant reference naming.   204    205     def get_constant_name(self, value, value_type, encoding=None):   206    207         """   208         Add a new constant to the current namespace for 'value' with   209         'value_type'.   210         """   211    212         path = self.get_namespace_path()   213         init_item(self.constants, path, dict)   214         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   215    216     # Literal reference naming.   217    218     def get_literal_name(self):   219    220         "Add a new literal to the current namespace."   221    222         path = self.get_namespace_path()   223         init_item(self.literals, path, lambda: 0)   224         return "$C%d" % self.literals[path]   225    226     def next_literal(self):   227         self.literals[self.get_namespace_path()] += 1   228    229     # Temporary variable naming.   230    231     def get_temporary_name(self):   232         path = self.get_namespace_path()   233         init_item(self.temp, path, lambda: 0)   234         return "$t%d" % self.temp[path]   235    236     def next_temporary(self):   237         self.temp[self.get_namespace_path()] += 1   238    239     # Arbitrary function naming.   240    241     def get_lambda_name(self):   242         path = self.get_namespace_path()   243         init_item(self.lambdas, path, lambda: 0)   244         name = "$l%d" % self.lambdas[path]   245         self.lambdas[path] += 1   246         return name   247    248     def reset_lambdas(self):   249         self.lambdas = {}   250    251     # Constant and literal recording.   252    253     def get_constant_value(self, value, literals=None):   254    255         """   256         Encode the 'value' if appropriate, returning a value, a typename and any   257         encoding.   258         """   259    260         if isinstance(value, unicode):   261             return value.encode("utf-8"), "unicode", self.encoding   262    263         # Attempt to convert plain strings to text.   264    265         elif isinstance(value, str) and self.encoding:   266             try:   267                 return get_string_details(literals, self.encoding)   268             except UnicodeDecodeError:   269                 pass   270    271         return value, value.__class__.__name__, None   272    273     def get_constant_reference(self, ref, value, encoding=None):   274    275         """   276         Return a constant reference for the given 'ref' type and 'value', with   277         the optional 'encoding' applying to text values.   278         """   279    280         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   281    282         # Return a reference for the constant.   283    284         objpath = self.get_object_path(constant_name)   285         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   286    287         # Record the value and type for the constant.   288    289         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   290         return name_ref   291    292     def reserve_constant(self, objpath, value, origin, encoding=None):   293    294         """   295         Reserve a constant within 'objpath' with the given 'value' and having a   296         type with the given 'origin', with the optional 'encoding' applying to   297         text values.   298         """   299    300         constant_name = self.get_constant_name(value, origin)   301         objpath = self.get_object_path(constant_name)   302         self._reserve_constant(objpath, value, origin, encoding)   303    304     def _reserve_constant(self, objpath, value, origin, encoding):   305    306         """   307         Store a constant for 'objpath' with the given 'value' and 'origin', with   308         the optional 'encoding' applying to text values.   309         """   310    311         self.constant_values[objpath] = value, origin, encoding   312    313     def get_literal_reference(self, name, ref, items, cls):   314    315         """   316         Return a literal reference for the given type 'name', literal 'ref',   317         node 'items' and employing the given 'cls' as the class of the returned   318         reference object.   319         """   320    321         # Construct an invocation using the items as arguments.   322    323         typename = "$L%s" % name   324    325         invocation = compiler.ast.CallFunc(   326             compiler.ast.Name(typename),   327             items   328             )   329    330         # Get a name for the actual literal.   331    332         instname = self.get_literal_name()   333         self.next_literal()   334    335         # Record the type for the literal.   336    337         objpath = self.get_object_path(instname)   338         self.literal_types[objpath] = ref.get_origin()   339    340         # Return a wrapper for the invocation exposing the items.   341    342         return cls(   343             instname,   344             ref.instance_of(),   345             self.process_structure_node(invocation),   346             invocation.args   347             )   348    349     # Node handling.   350    351     def process_structure(self, node):   352    353         """   354         Within the given 'node', process the program structure.   355    356         During inspection, this will process global declarations, adjusting the   357         module namespace, and import statements, building a module dependency   358         hierarchy.   359    360         During translation, this will consult deduced program information and   361         output translated code.   362         """   363    364         l = []   365         for n in node.getChildNodes():   366             l.append(self.process_structure_node(n))   367         return l   368    369     def process_augassign_node(self, n):   370    371         "Process the given augmented assignment node 'n'."   372    373         op = operator_functions[n.op]   374    375         if isinstance(n.node, compiler.ast.Getattr):   376             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   377         elif isinstance(n.node, compiler.ast.Name):   378             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   379         else:   380             target = n.node   381    382         assignment = compiler.ast.Assign(   383             [target],   384             compiler.ast.CallFunc(   385                 compiler.ast.Name("$op%s" % op),   386                 [n.node, n.expr]))   387    388         return self.process_structure_node(assignment)   389    390     def process_assignment_for_object(self, original_name, source):   391    392         """   393         Return an assignment operation making 'original_name' refer to the given   394         'source'.   395         """   396    397         assignment = compiler.ast.Assign(   398             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   399             source   400             )   401    402         return self.process_structure_node(assignment)   403    404     def process_assignment_node_items(self, n, expr):   405    406         """   407         Process the given assignment node 'n' whose children are to be assigned   408         items of 'expr'.   409         """   410    411         name_ref = self.process_structure_node(expr)   412    413         # Either unpack the items and present them directly to each assignment   414         # node.   415    416         if isinstance(name_ref, LiteralSequenceRef) and \   417            self.process_literal_sequence_items(n, name_ref):   418    419             pass   420    421         # Or have the assignment nodes access each item via the sequence API.   422    423         else:   424             self.process_assignment_node_items_by_position(n, expr, name_ref)   425    426     def process_assignment_node_items_by_position(self, n, expr, name_ref):   427    428         """   429         Process the given sequence assignment node 'n', converting the node to   430         the separate assignment of each target using positional access on a   431         temporary variable representing the sequence. Use 'expr' as the assigned   432         value and 'name_ref' as the reference providing any existing temporary   433         variable.   434         """   435    436         statements = []   437    438         # Employ existing names to access the sequence.   439         # Literal sequences do not provide names of accessible objects.   440    441         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   442             temp = name_ref.name   443    444         # For other expressions, create a temporary name to reference the items.   445    446         else:   447             temp = self.get_temporary_name()   448             self.next_temporary()   449    450             statements.append(   451                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   452                 )   453    454         # Generate a test for the length of the expression object.   455    456         statements.append(compiler.ast.Discard(   457             compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"),   458                 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))])))   459    460         # Assign the items to the target nodes.   461    462         for i, node in enumerate(n.nodes):   463             statements.append(   464                 compiler.ast.Assign([node], compiler.ast.Subscript(   465                     compiler.ast.Name(temp), "OP_APPLY", [compiler.ast.Const(i, str(i))]))   466                 )   467    468         return self.process_structure_node(compiler.ast.Stmt(statements))   469    470     def process_literal_sequence_items(self, n, name_ref):   471    472         """   473         Process the given assignment node 'n', obtaining from the given   474         'name_ref' the items to be assigned to the assignment targets.   475    476         Return whether this method was able to process the assignment node as   477         a sequence of direct assignments.   478         """   479    480         if len(n.nodes) == len(name_ref.items):   481             assigned_names, count = get_names_from_nodes(n.nodes)   482             accessed_names, _count = get_names_from_nodes(name_ref.items)   483    484             # Only assign directly between items if all assigned names are   485             # plain names (not attribute assignments), and if the assigned names   486             # do not appear in the accessed names.   487    488             if len(assigned_names) == count and \   489                not assigned_names.intersection(accessed_names):   490    491                 for node, item in zip(n.nodes, name_ref.items):   492                     self.process_assignment_node(node, item)   493    494                 return True   495    496             # Otherwise, use the position-based mechanism to obtain values.   497    498             else:   499                 return False   500         else:   501             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   502                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   503    504     def process_compare_node(self, n):   505    506         """   507         Process the given comparison node 'n', converting an operator sequence   508         from...   509    510         <expr1> <op1> <expr2> <op2> <expr3>   511    512         ...to...   513    514         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   515         """   516    517         invocations = []   518         last = n.expr   519    520         for op, op_node in n.ops:   521             op = operator_functions.get(op)   522    523             invocations.append(compiler.ast.CallFunc(   524                 compiler.ast.Name("$op%s" % op),   525                 [last, op_node]))   526    527             last = op_node   528    529         if len(invocations) > 1:   530             result = compiler.ast.And(invocations)   531         else:   532             result = invocations[0]   533    534         return self.process_structure_node(result)   535    536     def process_dict_node(self, node):   537    538         """   539         Process the given dictionary 'node', returning a list of (key, value)   540         tuples.   541         """   542    543         l = []   544         for key, value in node.items:   545             l.append((   546                 self.process_structure_node(key),   547                 self.process_structure_node(value)))   548         return l   549    550     def process_for_node(self, n):   551    552         """   553         Generate attribute accesses for {n.list}.__iter__ and the next method on   554         the iterator, producing a replacement node for the original.   555         """   556    557         t0 = self.get_temporary_name()   558         self.next_temporary()   559         t1 = self.get_temporary_name()   560         self.next_temporary()   561         t2 = self.get_temporary_name()   562         self.next_temporary()   563    564         node = compiler.ast.Stmt([   565    566             # <t0> = {n.list}   567             # <t1> = <t0>.__iter__()   568    569             compiler.ast.Assign(   570                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   571                 n.list),   572    573             compiler.ast.Assign(   574                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   575                 compiler.ast.CallFunc(   576                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   577                     [])),   578    579             # <t2> = <t1>.next   580             # try:   581             #     while True:   582             #         <var>... = <t2>()   583             #         ...   584             # except StopIteration:   585             #     pass   586    587             compiler.ast.Assign(   588                 [compiler.ast.AssName(t2, "OP_ASSIGN")],   589                 compiler.ast.Getattr(compiler.ast.Name(t1), "next")),   590    591             compiler.ast.TryExcept(   592                 compiler.ast.While(   593                     compiler.ast.Name("True"),   594                     compiler.ast.Stmt([   595                         compiler.ast.Assign(   596                             [n.assign],   597                             compiler.ast.CallFunc(   598                                 compiler.ast.Name(t2),   599                                 []   600                                 )),   601                         n.body]),   602                     None),   603                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   604                 None)   605             ])   606    607         self.process_structure_node(node)   608    609     def process_literal_sequence_node(self, n, name, ref, cls):   610    611         """   612         Process the given literal sequence node 'n' as a function invocation,   613         with 'name' indicating the type of the sequence, and 'ref' being a   614         reference to the type. The 'cls' is used to instantiate a suitable name   615         reference.   616         """   617    618         if name == "dict":   619             items = []   620             for key, value in n.items:   621                 items.append(compiler.ast.Tuple([key, value]))   622         else: # name in ("list", "tuple"):   623             items = n.nodes   624    625         return self.get_literal_reference(name, ref, items, cls)   626    627     def process_operator_node(self, n):   628    629         """   630         Process the given operator node 'n' as an operator function invocation.   631         """   632    633         opname = n.__class__.__name__   634         operands = n.getChildNodes()   635    636         # Convert a unary operation to an invocation.   637    638         op = unary_operator_functions.get(opname)   639    640         if op:   641             invocation = compiler.ast.CallFunc(   642                 compiler.ast.Name("$op%s" % op),   643                 [operands[0]]   644                 )   645    646         # Convert a single operator with a list of operands to a combination of   647         # pairwise operations.   648    649         else:   650             op = operator_functions[opname]   651             invocation = self._process_operator_node(op, operands)   652    653         return self.process_structure_node(invocation)   654    655     def _process_operator_node(self, op, operands):   656    657         """   658         Process the given 'op', being an operator function, together with the   659         supplied 'operands', returning either a single remaining operand or an   660         invocation combining the operands.   661         """   662    663         remaining = operands[1:]   664         if not remaining:   665             return operands[0]   666    667         return compiler.ast.CallFunc(   668             compiler.ast.Name("$op%s" % op),   669             [operands[0], self._process_operator_node(op, remaining)]   670             )   671    672     def process_print_node(self, n):   673    674         """   675         Process the given print node 'n' as an invocation on a stream of the   676         form...   677    678         $print(dest, args, nl)   679    680         The special function name will be translated elsewhere.   681         """   682    683         nl = isinstance(n, compiler.ast.Printnl)   684         invocation = compiler.ast.CallFunc(   685             compiler.ast.Name("$print"),   686             [n.dest or compiler.ast.Name("None"),   687              compiler.ast.List(list(n.nodes)),   688              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   689             )   690         return self.process_structure_node(invocation)   691    692     def process_slice_node(self, n, expr=None):   693    694         """   695         Process the given slice node 'n' as a method invocation.   696         """   697    698         if n.flags == "OP_ASSIGN": op = "__setslice__"   699         elif n.flags == "OP_DELETE": op = "__delslice__"   700         else: op = "__getslice__"   701    702         invocation = compiler.ast.CallFunc(   703             compiler.ast.Getattr(n.expr, op),   704             [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   705                 (expr and [expr] or [])   706             )   707    708         # Fix parse tree structure.   709    710         if op == "__delslice__":   711             invocation = compiler.ast.Discard(invocation)   712    713         return self.process_structure_node(invocation)   714    715     def process_sliceobj_node(self, n):   716    717         """   718         Process the given slice object node 'n' as a slice constructor.   719         """   720    721         op = "slice"   722         invocation = compiler.ast.CallFunc(   723             compiler.ast.Name("$op%s" % op),   724             n.nodes   725             )   726         return self.process_structure_node(invocation)   727    728     def process_subscript_node(self, n, expr=None):   729    730         """   731         Process the given subscript node 'n' as a method invocation.   732         """   733    734         if n.flags == "OP_ASSIGN": op = "__setitem__"   735         elif n.flags == "OP_DELETE": op = "__delitem__"   736         else: op = "__getitem__"   737    738         invocation = compiler.ast.CallFunc(   739             compiler.ast.Getattr(n.expr, op),   740             list(n.subs) + (expr and [expr] or [])   741             )   742    743         # Fix parse tree structure.   744    745         if op == "__delitem__":   746             invocation = compiler.ast.Discard(invocation)   747    748         return self.process_structure_node(invocation)   749    750     def process_attribute_chain(self, n):   751    752         """   753         Process the given attribute access node 'n'. Return a reference   754         describing the expression.   755         """   756    757         # AssAttr/Getattr are nested with the outermost access being the last   758         # access in any chain.   759    760         self.attrs.insert(0, n.attrname)   761         attrs = self.attrs   762    763         # Break attribute chains where non-access nodes are found.   764    765         if not self.have_access_expression(n):   766             self.reset_attribute_chain()   767    768         # Descend into the expression, extending backwards any existing chain,   769         # or building another for the expression.   770    771         name_ref = self.process_structure_node(n.expr)   772    773         # Restore chain information applying to this node.   774    775         if not self.have_access_expression(n):   776             self.restore_attribute_chain(attrs)   777    778         # Return immediately if the expression was another access and thus a   779         # continuation backwards along the chain. The above processing will   780         # have followed the chain all the way to its conclusion.   781    782         if self.have_access_expression(n):   783             del self.attrs[0]   784    785         return name_ref   786    787     # Attribute chain handling.   788    789     def reset_attribute_chain(self):   790    791         "Reset the attribute chain for a subexpression of an attribute access."   792    793         self.attrs = []   794         self.chain_assignment.append(self.in_assignment)   795         self.chain_invocation.append(self.in_invocation)   796         self.in_assignment = None   797         self.in_invocation = None   798    799     def restore_attribute_chain(self, attrs):   800    801         "Restore the attribute chain for an attribute access."   802    803         self.attrs = attrs   804         self.in_assignment = self.chain_assignment.pop()   805         self.in_invocation = self.chain_invocation.pop()   806    807     def have_access_expression(self, node):   808    809         "Return whether the expression associated with 'node' is Getattr."   810    811         return isinstance(node.expr, compiler.ast.Getattr)   812    813     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   814    815         """   816         Return the name to be used for attribute usage observations involving   817         the given 'name' in the current namespace.   818    819         If the name is being used outside a function, and if 'name_ref' is   820         given and indicates a global or if 'is_global' is specified as a true   821         value, a path featuring the name in the global namespace is returned.   822         Otherwise, a path computed using the current namespace and the given   823         name is returned.   824    825         The intention of this method is to provide a suitably-qualified name   826         that can be tracked across namespaces. Where globals are being   827         referenced in class namespaces, they should be referenced using their   828         path within the module, not using a path within each class.   829    830         It may not be possible to identify a global within a function at the   831         time of inspection (since a global may appear later in a file).   832         Consequently, globals are identified by their local name rather than   833         their module-qualified path.   834         """   835    836         # For functions, use the appropriate local names.   837    838         if self.in_function:   839             return name   840    841         # For global names outside functions, use a global name.   842    843         elif is_global or name_ref and name_ref.is_global_name():   844             return self.get_global_path(name)   845    846         # Otherwise, establish a name in the current namespace.   847    848         else:   849             return self.get_object_path(name)   850    851     def get_path_for_access(self):   852    853         "Outside functions, register accesses at the module level."   854    855         if not self.in_function:   856             return self.name   857         else:   858             return self.get_namespace_path()   859    860     def get_module_name(self, node):   861    862         """   863         Using the given From 'node' in this module, calculate any relative import   864         information, returning a tuple containing a module to import along with any   865         names to import based on the node's name information.   866    867         Where the returned module is given as None, whole module imports should   868         be performed for the returned modules using the returned names.   869         """   870    871         # Absolute import.   872    873         if node.level == 0:   874             return node.modname, node.names   875    876         # Relative to an ancestor of this module.   877    878         else:   879             path = self.name.split(".")   880             level = node.level   881    882             # Relative imports treat package roots as submodules.   883    884             if split(self.filename)[-1] == "__init__.py":   885                 level -= 1   886    887             if level > len(path):   888                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   889                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   890    891             basename = ".".join(path[:len(path)-level])   892    893         # Name imports from a module.   894    895         if node.modname:   896             return "%s.%s" % (basename, node.modname), node.names   897    898         # Relative whole module imports.   899    900         else:   901             return basename, node.names   902    903 def get_argnames(args):   904    905     """   906     Return a list of all names provided by 'args'. Since tuples may be   907     employed, the arguments are traversed depth-first.   908     """   909    910     l = []   911     for arg in args:   912         if isinstance(arg, tuple):   913             l += get_argnames(arg)   914         else:   915             l.append(arg)   916     return l   917    918 def get_names_from_nodes(nodes):   919    920     """   921     Return the names employed in the given 'nodes' along with the number of   922     nodes excluding sequences.   923     """   924    925     names = set()   926     count = 0   927    928     for node in nodes:   929    930         # Add names and count them.   931    932         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   933             names.add(node.name)   934             count += 1   935    936         # Add names from sequences and incorporate their counts.   937    938         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   939                                compiler.ast.List, compiler.ast.Set,   940                                compiler.ast.Tuple)):   941             _names, _count = get_names_from_nodes(node.nodes)   942             names.update(_names)   943             count += _count   944    945         # Count non-name, non-sequence nodes.   946    947         else:   948             count += 1   949    950     return names, count   951    952 # Location classes.   953    954 class Location:   955    956     "A generic program location."   957    958     def __init__(self, path, name, attrnames=None, version=None, access_number=None):   959         self.path = path   960         self.name = name   961         self.attrnames = attrnames   962         self.version = version   963         self.access_number = access_number   964    965     def __repr__(self):   966         return "Location(%r, %r, %r, %r, %r)" % self.as_tuple()   967    968     def as_tuple(self):   969         return (self.path, self.name, self.attrnames, self.version, self.access_number)   970    971     def __hash__(self):   972         return hash(self.as_tuple())   973    974     def __eq__(self, other):   975         return self.as_tuple() == other.as_tuple()   976    977     def __cmp__(self, other):   978         return cmp(self.as_tuple(), other.as_tuple())   979    980     def get_attrname(self):   981    982         """   983         Extract the first attribute from the attribute names employed in this   984         location.   985         """   986    987         attrnames = self.attrnames   988         if not attrnames:   989             return attrnames   990         return get_attrnames(attrnames)[0]   991    992 class AccessLocation(Location):   993    994     "A specialised access location."   995    996     def __init__(self, path, name, attrnames, access_number):   997    998         """   999         Initialise an access location featuring 'path', 'name', 'attrnames' and  1000         'access_number'.  1001         """  1002   1003         Location.__init__(self, path, name, attrnames, None, access_number)  1004   1005     def __repr__(self):  1006         return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number)  1007   1008 # Result classes.  1009   1010 class InstructionSequence:  1011   1012     "A generic sequence of instructions."  1013   1014     def __init__(self, instructions):  1015         self.instructions = instructions  1016   1017     def get_value_instruction(self):  1018         return self.instructions[-1]  1019   1020     def get_init_instructions(self):  1021         return self.instructions[:-1]  1022   1023 # Dictionary utilities.  1024   1025 def init_item(d, key, fn):  1026   1027     """  1028     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial  1029     value where no entry already exists.  1030     """  1031   1032     if not d.has_key(key):  1033         d[key] = fn()  1034     return d[key]  1035   1036 def dict_for_keys(d, keys):  1037   1038     "Return a new dictionary containing entries from 'd' for the given 'keys'."  1039   1040     nd = {}  1041     for key in keys:  1042         if d.has_key(key):  1043             nd[key] = d[key]  1044     return nd  1045   1046 def make_key(s):  1047   1048     "Make sequence 's' into a tuple-based key, first sorting its contents."  1049   1050     l = list(s)  1051     l.sort()  1052     return tuple(l)  1053   1054 def add_counter_item(d, key):  1055   1056     """  1057     Make a mapping in 'd' for 'key' to the number of keys added before it, thus  1058     maintaining a mapping of keys to their order of insertion.  1059     """  1060   1061     if not d.has_key(key):  1062         d[key] = len(d.keys())  1063     return d[key]   1064   1065 def remove_items(d1, d2):  1066   1067     "Remove from 'd1' all items from 'd2'."  1068   1069     for key in d2.keys():  1070         if d1.has_key(key):  1071             del d1[key]  1072   1073 # Set utilities.  1074   1075 def first(s):  1076     return list(s)[0]  1077   1078 def same(s1, s2):  1079     return set(s1) == set(s2)  1080   1081 def order_dependencies(all_depends):  1082   1083     """  1084     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1085     has the form "A depends on B, C...". The result will order A, B, C, and so  1086     on.  1087     """  1088   1089     usage = init_reverse_dependencies(all_depends)  1090   1091     # Produce an ordering by obtaining exposed items (required by items already  1092     # processed) and putting them at the start of the list.  1093   1094     ordered = []  1095   1096     while usage:  1097         have_next = False  1098   1099         for key, n in usage.items():  1100   1101             # Add items needed by no other items to the ordering.  1102   1103             if not n:  1104                 remove_dependency(key, all_depends, usage, ordered)  1105                 have_next = True  1106   1107         if not have_next:  1108             raise ValueError, usage  1109   1110     return ordered  1111   1112 def order_dependencies_partial(all_depends):  1113   1114     """  1115     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1116     has the form "A depends on B, C...". The result will order A, B, C, and so  1117     on. Where cycles exist, they will be broken and a partial ordering returned.  1118     """  1119   1120     usage = init_reverse_dependencies(all_depends)  1121   1122     # Duplicate the dependencies for subsequent modification.  1123   1124     new_depends = {}  1125     for key, values in all_depends.items():  1126         new_depends[key] = set(values)  1127   1128     all_depends = new_depends  1129   1130     # Produce an ordering by obtaining exposed items (required by items already  1131     # processed) and putting them at the start of the list.  1132   1133     ordered = []  1134   1135     while usage:  1136         least = None  1137         least_key = None  1138   1139         for key, n in usage.items():  1140   1141             # Add items needed by no other items to the ordering.  1142   1143             if not n:  1144                 remove_dependency(key, all_depends, usage, ordered)  1145                 least = 0  1146   1147             # When breaking cycles, note the least used items.  1148   1149             elif least is None or len(n) < least:  1150                 least_key = key  1151                 least = len(n)  1152   1153         if least:  1154             transfer_dependencies(least_key, all_depends, usage, ordered)  1155   1156     return ordered  1157   1158 def init_reverse_dependencies(all_depends):  1159   1160     """  1161     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1162     record the reverse dependencies, making a mapping of the form  1163     "B is needed by A", "C is needed by A", and so on.  1164     """  1165   1166     usage = {}  1167   1168     # Record path-based dependencies.  1169   1170     for key in all_depends.keys():  1171         usage[key] = set()  1172   1173     for key, depends in all_depends.items():  1174         for depend in depends:  1175             init_item(usage, depend, set)  1176             usage[depend].add(key)  1177   1178     return usage  1179   1180 def transfer_dependencies(key, all_depends, usage, ordered):  1181   1182     """  1183     Transfer items needed by 'key' to those items needing 'key', found using  1184     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1185     collection of dependencies.  1186   1187     If "A is needed by X" and "B is needed by A", then transferring items needed  1188     by A will cause "B is needed by X" to be recorded as a consequence.  1189   1190     Transferring items also needs to occur in the reverse mapping, so that  1191     "A needs B" and "X needs A", then the consequence must be recorded as  1192     "X needs B".  1193     """  1194   1195     ordered.insert(0, key)  1196   1197     needing = usage[key]                        # A is needed by X  1198     needed = all_depends.get(key)               # A needs B  1199   1200     if needing:  1201         for depend in needing:  1202             l = all_depends.get(depend)  1203             if not l:  1204                 continue  1205   1206             l.remove(key)                       # X needs (A)  1207   1208             if needed:  1209                 l.update(needed)                # X needs B...  1210   1211                 # Prevent self references.  1212   1213                 if depend in needed:  1214                     l.remove(depend)  1215   1216     if needed:  1217         for depend in needed:  1218             l = usage.get(depend)  1219             if not l:  1220                 continue  1221   1222             l.remove(key)                       # B is needed by (A)  1223             l.update(needing)                   # B is needed by X...  1224   1225             # Prevent self references.  1226   1227             if depend in needing:  1228                 l.remove(depend)  1229   1230     if needed:  1231         del all_depends[key]  1232     del usage[key]  1233   1234 def remove_dependency(key, all_depends, usage, ordered):  1235   1236     """  1237     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1238     'ordered' collection of dependencies.  1239   1240     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1241     upon removing A from 'usage', the outcome is that all keys needing A will  1242     have A removed from their 'usage' records.  1243   1244     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1245     as a consequence.  1246     """  1247   1248     ordered.insert(0, key)  1249   1250     depends = all_depends.get(key)  1251   1252     # Reduce usage of the referenced items.  1253   1254     if depends:  1255         for depend in depends:  1256             usage[depend].remove(key)  1257   1258     del usage[key]  1259   1260 # General input/output.  1261   1262 def readfile(filename):  1263   1264     "Return the contents of 'filename'."  1265   1266     f = open(filename)  1267     try:  1268         return f.read()  1269     finally:  1270         f.close()  1271   1272 def writefile(filename, s):  1273   1274     "Write to 'filename' the string 's'."  1275   1276     f = open(filename, "w")  1277     try:  1278         f.write(s)  1279     finally:  1280         f.close()  1281   1282 # General encoding.  1283   1284 def sorted_output(x):  1285   1286     "Sort sequence 'x' and return a string with commas separating the values."  1287   1288     x = map(str, x)  1289     x.sort()  1290     return ", ".join(x)  1291   1292 def get_string_details(literals, encoding):  1293   1294     """  1295     Determine whether 'literals' represent Unicode strings or byte strings,  1296     using 'encoding' to reproduce byte sequences.  1297   1298     Each literal is the full program representation including prefix and quotes  1299     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1300     string needs to be translated back to its original encoding.  1301   1302     Return a single encoded literal value, a type name, and the original  1303     encoding as a tuple.  1304     """  1305   1306     typename = "unicode"  1307   1308     l = []  1309   1310     for s in literals:  1311         out, _typename = get_literal_details(s)  1312         if _typename == "str":  1313             typename = "str"  1314         l.append(out)  1315   1316     out = "".join(l)  1317   1318     # For Unicode values, convert to the UTF-8 program representation.  1319   1320     if typename == "unicode":  1321         return out.encode("utf-8"), typename, encoding  1322   1323     # For byte string values, convert back to the original encoding.  1324   1325     else:  1326         return out.encode(encoding), typename, encoding  1327   1328 def get_literal_details(s):  1329   1330     """  1331     Determine whether 's' represents a Unicode string or a byte string, where  1332     's' contains the full program representation of a literal including prefix  1333     and quotes, recoded by the parser to UTF-8.  1334   1335     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1336     and byte or Unicode values starting with <backslash><octal digit> or  1337     <backslash>x.  1338   1339     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1340     to be considered as Unicode values. Otherwise, they produce byte values and  1341     cause unprefixed strings to be considered as byte strings.  1342   1343     Literals prefixed with "r" do not have their backslash-encoded values  1344     converted unless also prefixed with "u", in which case only the above value  1345     formats are converted, not any of the other special sequences for things  1346     like newlines.  1347   1348     Return the literal value as a Unicode object together with the appropriate  1349     type name in a tuple.  1350     """  1351   1352     l = []  1353   1354     # Identify the quote character and use it to identify the prefix.  1355   1356     quote_type = s[-1]  1357     prefix_end = s.find(quote_type)  1358     prefix = s[:prefix_end].lower()  1359   1360     if prefix not in ("", "b", "br", "r", "u", "ur"):  1361         raise ValueError, "String literal does not have a supported prefix: %s" % s  1362   1363     if "b" in prefix:  1364         typename = "str"  1365     else:  1366         typename = "unicode"  1367   1368     # Identify triple quotes or single quotes.  1369   1370     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1371         quote = s[prefix_end:prefix_end+3]  1372         current = prefix_end + 3  1373         end = len(s) - 3  1374     else:  1375         quote = s[prefix_end]  1376         current = prefix_end + 1  1377         end = len(s) - 1  1378   1379     # Conversions of some quoted values.  1380   1381     searches = {  1382         "u" : (6, 16),  1383         "U" : (10, 16),  1384         "x" : (4, 16),  1385         }  1386   1387     octal_digits = map(str, range(0, 8))  1388   1389     # Translations of some quoted values.  1390   1391     escaped = {  1392         "\\" : "\\", "'" : "'", '"' : '"',  1393         "a" : "\a", "b" : "\b", "f" : "\f",  1394         "n" : "\n", "r" : "\r", "t" : "\t",  1395         }  1396   1397     while current < end:  1398   1399         # Look for quoted values.  1400   1401         index = s.find("\\", current)  1402         if index == -1 or index + 1 == end:  1403             l.append(s[current:end])  1404             break  1405   1406         # Add the preceding text.  1407   1408         l.append(s[current:index])  1409   1410         # Handle quoted text.  1411   1412         term = s[index+1]  1413   1414         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1415         # produce Unicode values.  1416   1417         if typename == "unicode" and (  1418             term in ("u", "U") or   1419             "u" in prefix and (term == "x" or term in octal_digits)):  1420   1421             needed, base = searches.get(term, (4, 8))  1422             value = convert_quoted_value(s, index, needed, end, base, unichr)  1423             l.append(value)  1424             current = index + needed  1425   1426         # Add raw byte values, changing the string type.  1427   1428         elif "r" not in prefix and (  1429              term == "x" or term in octal_digits):  1430   1431             needed, base = searches.get(term, (4, 8))  1432             value = convert_quoted_value(s, index, needed, end, base, chr)  1433             l.append(value)  1434             typename = "str"  1435             current = index + needed  1436   1437         # Add other escaped values.  1438   1439         elif "r" not in prefix and escaped.has_key(term):  1440             l.append(escaped[term])  1441             current = index + 2  1442   1443         # Add other text as found.  1444   1445         else:  1446             l.append(s[index:index+2])  1447             current = index + 2  1448   1449     # Collect the components into a single Unicode object. Since the literal  1450     # text was already in UTF-8 form, interpret plain strings as UTF-8  1451     # sequences.  1452   1453     out = []  1454   1455     for value in l:  1456         if isinstance(value, unicode):  1457             out.append(value)  1458         else:  1459             out.append(unicode(value, "utf-8"))  1460   1461     return "".join(out), typename  1462   1463 def convert_quoted_value(s, index, needed, end, base, fn):  1464   1465     """  1466     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1467     positions, and with the given 'end' indicating the first position after the  1468     end of the actual string content.  1469   1470     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1471     to convert the value to an appropriate type.  1472     """  1473   1474     s = s[index:min(index+needed, end)]  1475   1476     # Not a complete occurrence.  1477   1478     if len(s) < needed:  1479         return s  1480   1481     # Test for a well-formed value.  1482   1483     try:  1484         first = base == 8 and 1 or 2  1485         value = int(s[first:needed], base)  1486     except ValueError:  1487         return s  1488     else:  1489         return fn(value)  1490   1491 # Attribute chain decoding.  1492   1493 def get_attrnames(attrnames):  1494   1495     """  1496     Split the qualified attribute chain 'attrnames' into its components,  1497     handling special attributes starting with "#" that indicate type  1498     conformance.  1499     """  1500   1501     if attrnames.startswith("#"):  1502         return [attrnames]  1503     else:  1504         return attrnames.split(".")  1505   1506 def get_name_path(path, name):  1507   1508     "Return a suitable qualified name from the given 'path' and 'name'."  1509   1510     if "." in name:  1511         return name  1512     else:  1513         return "%s.%s" % (path, name)  1514   1515 # Usage-related functions.  1516   1517 def get_types_for_usage(attrnames, objects):  1518   1519     """  1520     Identify the types that can support the given 'attrnames', using the  1521     given 'objects' as the catalogue of type details.  1522     """  1523   1524     types = []  1525     for name, _attrnames in objects.items():  1526         if set(attrnames).issubset(_attrnames):  1527             types.append(name)  1528     return types  1529   1530 def get_invoked_attributes(usage):  1531   1532     "Obtain invoked attribute from the given 'usage'."  1533   1534     invoked = []  1535     if usage:  1536         for attrname, invocation, assignment in usage:  1537             if invocation:  1538                 invoked.append(attrname)  1539     return invoked  1540   1541 def get_assigned_attributes(usage):  1542   1543     "Obtain assigned attribute from the given 'usage'."  1544   1545     assigned = []  1546     if usage:  1547         for attrname, invocation, assignment in usage:  1548             if assignment:  1549                 assigned.append(attrname)  1550     return assigned  1551   1552 # Type and module functions.  1553 # NOTE: This makes assumptions about the __builtins__ structure.  1554   1555 def get_builtin_module(name):  1556   1557     "Return the module name containing the given type 'name'."  1558   1559     if name == "string":  1560         modname = "str"  1561     elif name == "utf8string":  1562         modname = "unicode"  1563     elif name == "NoneType":  1564         modname = "none"  1565     else:  1566         modname = name  1567   1568     return "__builtins__.%s" % modname  1569   1570 def get_builtin_type(name):  1571   1572     "Return the type name provided by the given Python value 'name'."  1573   1574     if name == "str":  1575         return "string"  1576     elif name == "unicode":  1577         return "utf8string"  1578     else:  1579         return name  1580   1581 def get_builtin_class(name):  1582   1583     "Return the full name of the built-in class having the given 'name'."  1584   1585     typename = get_builtin_type(name)  1586     module = get_builtin_module(typename)  1587     return "%s.%s" % (module, typename)  1588   1589 # Useful data.  1590   1591 predefined_constants = "False", "None", "NotImplemented", "True"  1592   1593 unary_operator_functions = {  1594   1595     # Unary operations.  1596   1597     "Invert" : "invert",  1598     "UnaryAdd" : "pos",  1599     "UnarySub" : "neg",  1600     }  1601   1602 operator_functions = {  1603   1604     # Fundamental operations.  1605   1606     "is" : "is_",  1607     "is not" : "is_not",  1608   1609     # Binary operations.  1610   1611     "in" : "in_",  1612     "not in" : "not_in",  1613     "Add" : "add",  1614     "Bitand" : "and_",  1615     "Bitor" : "or_",  1616     "Bitxor" : "xor",  1617     "Div" : "div",  1618     "FloorDiv" : "floordiv",  1619     "LeftShift" : "lshift",  1620     "Mod" : "mod",  1621     "Mul" : "mul",  1622     "Power" : "pow",  1623     "RightShift" : "rshift",  1624     "Sub" : "sub",  1625   1626     # Augmented assignment.  1627   1628     "+=" : "iadd",  1629     "-=" : "isub",  1630     "*=" : "imul",  1631     "/=" : "idiv",  1632     "//=" : "ifloordiv",  1633     "%=" : "imod",  1634     "**=" : "ipow",  1635     "<<=" : "ilshift",  1636     ">>=" : "irshift",  1637     "&=" : "iand",  1638     "^=" : "ixor",  1639     "|=" : "ior",  1640   1641     # Comparisons.  1642   1643     "==" : "eq",  1644     "!=" : "ne",  1645     "<" : "lt",  1646     "<=" : "le",  1647     ">=" : "ge",  1648     ">" : "gt",  1649     }  1650   1651 operator_functions.update(unary_operator_functions)  1652   1653 # vim: tabstop=4 expandtab shiftwidth=4