Lichen

common.py

845:9adc6e8a6fed
2018-07-09 Paul Boddie Introduced restrictions on privileged attributes, allowing internal access only. tuple-optimisations
     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016, 2017, 2018 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from compiler.transformer import Transformer    24 from errors import InspectError    25 from os import listdir, makedirs, remove    26 from os.path import exists, getmtime, isdir, join, split    27 from results import ConstantValueRef, LiteralSequenceRef, NameRef    28 import compiler.ast    29     30 class CommonOutput:    31     32     "Common output functionality."    33     34     def check_output(self, options=None):    35     36         "Check the existing output and remove it if irrelevant."    37     38         if not exists(self.output):    39             makedirs(self.output)    40     41         details = self.importer.get_cache_details()    42         recorded_details = self.get_output_details()    43     44         # Combine cache details with any options.    45     46         full_details = options and (details + " " + options) or details    47     48         if recorded_details != full_details:    49             self.remove_output()    50     51         writefile(self.get_output_details_filename(), full_details)    52     53     def get_output_details_filename(self):    54     55         "Return the output details filename."    56     57         return join(self.output, "$details")    58     59     def get_output_details(self):    60     61         "Return details of the existing output."    62     63         details_filename = self.get_output_details_filename()    64     65         if not exists(details_filename):    66             return None    67         else:    68             return readfile(details_filename)    69     70     def remove_output(self, dirname=None):    71     72         "Remove the output."    73     74         dirname = dirname or self.output    75     76         for filename in listdir(dirname):    77             path = join(dirname, filename)    78             if isdir(path):    79                 self.remove_output(path)    80             else:    81                 remove(path)    82     83 def copy(source, target, only_if_newer=True):    84     85     "Copy a text file from 'source' to 'target'."    86     87     if isdir(target):    88         target = join(target, split(source)[-1])    89     90     if only_if_newer and not is_newer(source, target):    91         return    92     93     infile = open(source)    94     outfile = open(target, "w")    95     96     try:    97         outfile.write(infile.read())    98     finally:    99         outfile.close()   100         infile.close()   101    102 def is_newer(source, target):   103    104     "Return whether 'source' is newer than 'target'."   105    106     if exists(target):   107         target_mtime = getmtime(target)   108         source_mtime = getmtime(source)   109         return source_mtime > target_mtime   110    111     return True   112    113 class CommonModule:   114    115     "A common module representation."   116    117     def __init__(self, name, importer):   118    119         """   120         Initialise this module with the given 'name' and an 'importer' which is   121         used to provide access to other modules when required.   122         """   123    124         self.name = name   125         self.importer = importer   126         self.filename = None   127    128         # Inspection-related attributes.   129    130         self.astnode = None   131         self.encoding = None   132         self.temp = {}   133         self.lambdas = {}   134    135         # Constants, literals and values.   136    137         self.constants = {}   138         self.constant_values = {}   139         self.literals = {}   140         self.literal_types = {}   141    142         # Nested namespaces.   143    144         self.namespace_path = []   145         self.in_function = False   146    147         # Retain the assignment value expression and track invocations.   148    149         self.in_assignment = None   150         self.in_invocation = None   151    152         # Attribute chain state management.   153    154         self.attrs = []   155         self.chain_assignment = []   156         self.chain_invocation = []   157    158     def __repr__(self):   159         return "CommonModule(%r, %r)" % (self.name, self.importer)   160    161     def parse_file(self, filename):   162    163         "Parse the file with the given 'filename', initialising attributes."   164    165         self.filename = filename   166    167         # Use the Transformer directly to obtain encoding information.   168    169         t = Transformer()   170         f = open(filename)   171    172         try:   173             self.astnode = t.parsesuite(f.read() + "\n")   174             self.encoding = t.encoding   175         finally:   176             f.close()   177    178     # Module-relative naming.   179    180     def get_global_path(self, name):   181         return "%s.%s" % (self.name, name)   182    183     def get_namespace_path(self):   184         return ".".join([self.name] + self.namespace_path)   185    186     def get_object_path(self, name):   187         return ".".join([self.name] + self.namespace_path + [name])   188    189     # Namespace management.   190    191     def enter_namespace(self, name):   192    193         "Enter the namespace having the given 'name'."   194    195         self.namespace_path.append(name)   196    197     def exit_namespace(self):   198    199         "Exit the current namespace."   200    201         self.namespace_path.pop()   202    203     # Constant reference naming.   204    205     def get_constant_name(self, value, value_type, encoding=None):   206    207         """   208         Add a new constant to the current namespace for 'value' with   209         'value_type'.   210         """   211    212         path = self.get_namespace_path()   213         init_item(self.constants, path, dict)   214         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   215    216     # Literal reference naming.   217    218     def get_literal_name(self):   219    220         "Add a new literal to the current namespace."   221    222         path = self.get_namespace_path()   223         init_item(self.literals, path, lambda: 0)   224         return "$C%d" % self.literals[path]   225    226     def next_literal(self):   227         self.literals[self.get_namespace_path()] += 1   228    229     # Temporary variable naming.   230    231     def get_temporary_name(self):   232         path = self.get_namespace_path()   233         init_item(self.temp, path, lambda: 0)   234         return "$t%d" % self.temp[path]   235    236     def next_temporary(self):   237         self.temp[self.get_namespace_path()] += 1   238    239     # Arbitrary function naming.   240    241     def get_lambda_name(self):   242         path = self.get_namespace_path()   243         init_item(self.lambdas, path, lambda: 0)   244         name = "$l%d" % self.lambdas[path]   245         self.lambdas[path] += 1   246         return name   247    248     def reset_lambdas(self):   249         self.lambdas = {}   250    251     # Constant and literal recording.   252    253     def get_constant_value(self, value, literals=None):   254    255         """   256         Encode the 'value' if appropriate, returning a value, a typename and any   257         encoding.   258         """   259    260         if isinstance(value, unicode):   261             return value.encode("utf-8"), "unicode", self.encoding   262    263         # Attempt to convert plain strings to text.   264    265         elif isinstance(value, str) and self.encoding:   266             try:   267                 return get_string_details(literals, self.encoding)   268             except UnicodeDecodeError:   269                 pass   270    271         return value, value.__class__.__name__, None   272    273     def get_constant_reference(self, ref, value, encoding=None):   274    275         """   276         Return a constant reference for the given 'ref' type and 'value', with   277         the optional 'encoding' applying to text values.   278         """   279    280         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   281    282         # Return a reference for the constant.   283    284         objpath = self.get_object_path(constant_name)   285         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   286    287         # Record the value and type for the constant.   288    289         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   290         return name_ref   291    292     def reserve_constant(self, objpath, value, origin, encoding=None):   293    294         """   295         Reserve a constant within 'objpath' with the given 'value' and having a   296         type with the given 'origin', with the optional 'encoding' applying to   297         text values.   298         """   299    300         constant_name = self.get_constant_name(value, origin)   301         objpath = self.get_object_path(constant_name)   302         self._reserve_constant(objpath, value, origin, encoding)   303    304     def _reserve_constant(self, objpath, value, origin, encoding):   305    306         """   307         Store a constant for 'objpath' with the given 'value' and 'origin', with   308         the optional 'encoding' applying to text values.   309         """   310    311         self.constant_values[objpath] = value, origin, encoding   312    313     def get_literal_reference(self, name, ref, items, cls):   314    315         """   316         Return a literal reference for the given type 'name', literal 'ref',   317         node 'items' and employing the given 'cls' as the class of the returned   318         reference object.   319         """   320    321         # Construct an invocation using the items as arguments.   322    323         typename = "$L%s" % name   324    325         invocation = compiler.ast.CallFunc(   326             compiler.ast.Name(typename),   327             items   328             )   329    330         # Get a name for the actual literal.   331    332         instname = self.get_literal_name()   333         self.next_literal()   334    335         # Record the type for the literal.   336    337         objpath = self.get_object_path(instname)   338         self.literal_types[objpath] = ref.get_origin()   339    340         # Return a wrapper for the invocation exposing the items.   341    342         return cls(   343             instname,   344             ref.instance_of(),   345             self.process_structure_node(invocation),   346             invocation.args   347             )   348    349     # Node handling.   350    351     def process_structure(self, node):   352    353         """   354         Within the given 'node', process the program structure.   355    356         During inspection, this will process global declarations, adjusting the   357         module namespace, and import statements, building a module dependency   358         hierarchy.   359    360         During translation, this will consult deduced program information and   361         output translated code.   362         """   363    364         l = []   365         for n in node.getChildNodes():   366             l.append(self.process_structure_node(n))   367         return l   368    369     def process_augassign_node(self, n):   370    371         "Process the given augmented assignment node 'n'."   372    373         op = operator_functions[n.op]   374    375         if isinstance(n.node, compiler.ast.Getattr):   376             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   377         elif isinstance(n.node, compiler.ast.Name):   378             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   379         else:   380             target = n.node   381    382         assignment = compiler.ast.Assign(   383             [target],   384             compiler.ast.CallFunc(   385                 compiler.ast.Name("$op%s" % op),   386                 [n.node, n.expr]))   387    388         return self.process_structure_node(assignment)   389    390     def process_assignment_for_object(self, original_name, source):   391    392         """   393         Return an assignment operation making 'original_name' refer to the given   394         'source'.   395         """   396    397         assignment = compiler.ast.Assign(   398             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   399             source   400             )   401    402         return self.process_structure_node(assignment)   403    404     def process_assignment_node_items(self, n, expr):   405    406         """   407         Process the given assignment node 'n' whose children are to be assigned   408         items of 'expr'.   409         """   410    411         name_ref = self.process_structure_node(expr)   412    413         # Either unpack the items and present them directly to each assignment   414         # node.   415    416         if isinstance(name_ref, LiteralSequenceRef) and \   417            self.process_literal_sequence_items(n, name_ref):   418    419             pass   420    421         # Or have the assignment nodes access each item via the sequence API.   422    423         else:   424             self.process_assignment_node_items_by_position(n, expr, name_ref)   425    426     def process_assignment_node_items_by_position(self, n, expr, name_ref):   427    428         """   429         Process the given sequence assignment node 'n', converting the node to   430         the separate assignment of each target using positional access on a   431         temporary variable representing the sequence. Use 'expr' as the assigned   432         value and 'name_ref' as the reference providing any existing temporary   433         variable.   434         """   435    436         statements = []   437    438         # Employ existing names to access the sequence.   439         # Literal sequences do not provide names of accessible objects.   440    441         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   442             temp = name_ref.name   443    444         # For other expressions, create a temporary name to reference the items.   445    446         else:   447             temp = self.get_temporary_name()   448             self.next_temporary()   449    450             statements.append(   451                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   452                 )   453    454         # Generate a test for the length of the expression object.   455    456         statements.append(compiler.ast.Discard(   457             compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"),   458                 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))])))   459    460         # Assign the items to the target nodes.   461    462         for i, node in enumerate(n.nodes):   463             statements.append(   464                 compiler.ast.Assign([node], compiler.ast.CallFunc(   465                     compiler.ast.Getattr(compiler.ast.Name(temp),   466                                          "__get_single_item_unchecked__",   467                                          privileged=True),   468                     [compiler.ast.Const(i, str(i))]))   469                 )   470    471         return self.process_structure_node(compiler.ast.Stmt(statements))   472    473     def process_literal_sequence_items(self, n, name_ref):   474    475         """   476         Process the given assignment node 'n', obtaining from the given   477         'name_ref' the items to be assigned to the assignment targets.   478    479         Return whether this method was able to process the assignment node as   480         a sequence of direct assignments.   481         """   482    483         if len(n.nodes) == len(name_ref.items):   484             assigned_names, count = get_names_from_nodes(n.nodes)   485             accessed_names, _count = get_names_from_nodes(name_ref.items)   486    487             # Only assign directly between items if all assigned names are   488             # plain names (not attribute assignments), and if the assigned names   489             # do not appear in the accessed names.   490    491             if len(assigned_names) == count and \   492                not assigned_names.intersection(accessed_names):   493    494                 for node, item in zip(n.nodes, name_ref.items):   495                     self.process_assignment_node(node, item)   496    497                 return True   498    499             # Otherwise, use the position-based mechanism to obtain values.   500    501             else:   502                 return False   503         else:   504             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   505                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   506    507     def process_compare_node(self, n):   508    509         """   510         Process the given comparison node 'n', converting an operator sequence   511         from...   512    513         <expr1> <op1> <expr2> <op2> <expr3>   514    515         ...to...   516    517         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   518         """   519    520         invocations = []   521         last = n.expr   522    523         for op, op_node in n.ops:   524             op = operator_functions.get(op)   525    526             invocations.append(compiler.ast.CallFunc(   527                 compiler.ast.Name("$op%s" % op),   528                 [last, op_node]))   529    530             last = op_node   531    532         if len(invocations) > 1:   533             result = compiler.ast.And(invocations)   534         else:   535             result = invocations[0]   536    537         return self.process_structure_node(result)   538    539     def process_dict_node(self, node):   540    541         """   542         Process the given dictionary 'node', returning a list of (key, value)   543         tuples.   544         """   545    546         l = []   547         for key, value in node.items:   548             l.append((   549                 self.process_structure_node(key),   550                 self.process_structure_node(value)))   551         return l   552    553     def process_for_node(self, n):   554    555         """   556         Generate attribute accesses for {n.list}.__iter__ and the next method on   557         the iterator, producing a replacement node for the original.   558         """   559    560         t0 = self.get_temporary_name()   561         self.next_temporary()   562         t1 = self.get_temporary_name()   563         self.next_temporary()   564         t2 = self.get_temporary_name()   565         self.next_temporary()   566    567         node = compiler.ast.Stmt([   568    569             # <t0> = {n.list}   570             # <t1> = <t0>.__iter__()   571    572             compiler.ast.Assign(   573                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   574                 n.list),   575    576             compiler.ast.Assign(   577                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   578                 compiler.ast.CallFunc(   579                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   580                     [])),   581    582             # <t2> = <t1>.next   583             # try:   584             #     while True:   585             #         <var>... = <t2>()   586             #         ...   587             # except StopIteration:   588             #     pass   589    590             compiler.ast.Assign(   591                 [compiler.ast.AssName(t2, "OP_ASSIGN")],   592                 compiler.ast.Getattr(compiler.ast.Name(t1), "next")),   593    594             compiler.ast.TryExcept(   595                 compiler.ast.While(   596                     compiler.ast.Name("True"),   597                     compiler.ast.Stmt([   598                         compiler.ast.Assign(   599                             [n.assign],   600                             compiler.ast.CallFunc(   601                                 compiler.ast.Name(t2),   602                                 []   603                                 )),   604                         n.body]),   605                     None),   606                 [(compiler.ast.Name("StopIteration"), None, compiler.ast.Stmt([compiler.ast.Pass()]))],   607                 None)   608             ])   609    610         self.process_structure_node(node)   611    612     def process_literal_sequence_node(self, n, name, ref, cls):   613    614         """   615         Process the given literal sequence node 'n' as a function invocation,   616         with 'name' indicating the type of the sequence, and 'ref' being a   617         reference to the type. The 'cls' is used to instantiate a suitable name   618         reference.   619         """   620    621         if name == "dict":   622             items = []   623             for key, value in n.items:   624                 items.append(compiler.ast.Tuple([key, value]))   625         else: # name in ("list", "tuple"):   626             items = n.nodes   627    628         return self.get_literal_reference(name, ref, items, cls)   629    630     def process_operator_node(self, n):   631    632         """   633         Process the given operator node 'n' as an operator function invocation.   634         """   635    636         opname = n.__class__.__name__   637         operands = n.getChildNodes()   638    639         # Convert a unary operation to an invocation.   640    641         op = unary_operator_functions.get(opname)   642    643         if op:   644             invocation = compiler.ast.CallFunc(   645                 compiler.ast.Name("$op%s" % op),   646                 [operands[0]]   647                 )   648    649         # Convert a single operator with a list of operands to a combination of   650         # pairwise operations.   651    652         else:   653             op = operator_functions[opname]   654             invocation = self._process_operator_node(op, operands)   655    656         return self.process_structure_node(invocation)   657    658     def _process_operator_node(self, op, operands):   659    660         """   661         Process the given 'op', being an operator function, together with the   662         supplied 'operands', returning either a single remaining operand or an   663         invocation combining the operands.   664         """   665    666         remaining = operands[1:]   667         if not remaining:   668             return operands[0]   669    670         return compiler.ast.CallFunc(   671             compiler.ast.Name("$op%s" % op),   672             [operands[0], self._process_operator_node(op, remaining)]   673             )   674    675     def process_print_node(self, n):   676    677         """   678         Process the given print node 'n' as an invocation on a stream of the   679         form...   680    681         $print(dest, args, nl)   682    683         The special function name will be translated elsewhere.   684         """   685    686         nl = isinstance(n, compiler.ast.Printnl)   687         invocation = compiler.ast.CallFunc(   688             compiler.ast.Name("$print"),   689             [n.dest or compiler.ast.Name("None"),   690              compiler.ast.List(list(n.nodes)),   691              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   692             )   693         return self.process_structure_node(invocation)   694    695     def process_slice_node(self, n, expr=None):   696    697         """   698         Process the given slice node 'n' as a method invocation.   699         """   700    701         if n.flags == "OP_ASSIGN": op = "__setslice__"   702         elif n.flags == "OP_DELETE": op = "__delslice__"   703         else: op = "__getslice__"   704    705         invocation = compiler.ast.CallFunc(   706             compiler.ast.Getattr(n.expr, op),   707             [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   708                 (expr and [expr] or [])   709             )   710    711         # Fix parse tree structure.   712    713         if op == "__delslice__":   714             invocation = compiler.ast.Discard(invocation)   715    716         return self.process_structure_node(invocation)   717    718     def process_sliceobj_node(self, n):   719    720         """   721         Process the given slice object node 'n' as a slice constructor.   722         """   723    724         op = "slice"   725         invocation = compiler.ast.CallFunc(   726             compiler.ast.Name("$op%s" % op),   727             n.nodes   728             )   729         return self.process_structure_node(invocation)   730    731     def process_subscript_node(self, n, expr=None):   732    733         """   734         Process the given subscript node 'n' as a method invocation.   735         """   736    737         if n.flags == "OP_ASSIGN": op = "__setitem__"   738         elif n.flags == "OP_DELETE": op = "__delitem__"   739         else: op = "__getitem__"   740    741         invocation = compiler.ast.CallFunc(   742             compiler.ast.Getattr(n.expr, op),   743             list(n.subs) + (expr and [expr] or [])   744             )   745    746         # Fix parse tree structure.   747    748         if op == "__delitem__":   749             invocation = compiler.ast.Discard(invocation)   750    751         return self.process_structure_node(invocation)   752    753     def process_attribute_chain(self, n):   754    755         """   756         Process the given attribute access node 'n'. Return a reference   757         describing the expression.   758         """   759    760         # AssAttr/Getattr are nested with the outermost access being the last   761         # access in any chain.   762    763         self.attrs.insert(0, n.attrname)   764         attrs = self.attrs   765    766         # Break attribute chains where non-access nodes are found.   767    768         if not self.have_access_expression(n):   769             self.reset_attribute_chain()   770    771         # Descend into the expression, extending backwards any existing chain,   772         # or building another for the expression.   773    774         name_ref = self.process_structure_node(n.expr)   775    776         # Restore chain information applying to this node.   777    778         if not self.have_access_expression(n):   779             self.restore_attribute_chain(attrs)   780    781         # Return immediately if the expression was another access and thus a   782         # continuation backwards along the chain. The above processing will   783         # have followed the chain all the way to its conclusion.   784    785         if self.have_access_expression(n):   786             del self.attrs[0]   787    788         return name_ref   789    790     # Attribute chain handling.   791    792     def reset_attribute_chain(self):   793    794         "Reset the attribute chain for a subexpression of an attribute access."   795    796         self.attrs = []   797         self.chain_assignment.append(self.in_assignment)   798         self.chain_invocation.append(self.in_invocation)   799         self.in_assignment = None   800         self.in_invocation = None   801    802     def restore_attribute_chain(self, attrs):   803    804         "Restore the attribute chain for an attribute access."   805    806         self.attrs = attrs   807         self.in_assignment = self.chain_assignment.pop()   808         self.in_invocation = self.chain_invocation.pop()   809    810     def have_access_expression(self, node):   811    812         "Return whether the expression associated with 'node' is Getattr."   813    814         return isinstance(node.expr, compiler.ast.Getattr)   815    816     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   817    818         """   819         Return the name to be used for attribute usage observations involving   820         the given 'name' in the current namespace.   821    822         If the name is being used outside a function, and if 'name_ref' is   823         given and indicates a global or if 'is_global' is specified as a true   824         value, a path featuring the name in the global namespace is returned.   825         Otherwise, a path computed using the current namespace and the given   826         name is returned.   827    828         The intention of this method is to provide a suitably-qualified name   829         that can be tracked across namespaces. Where globals are being   830         referenced in class namespaces, they should be referenced using their   831         path within the module, not using a path within each class.   832    833         It may not be possible to identify a global within a function at the   834         time of inspection (since a global may appear later in a file).   835         Consequently, globals are identified by their local name rather than   836         their module-qualified path.   837         """   838    839         # For functions, use the appropriate local names.   840    841         if self.in_function:   842             return name   843    844         # For global names outside functions, use a global name.   845    846         elif is_global or name_ref and name_ref.is_global_name():   847             return self.get_global_path(name)   848    849         # Otherwise, establish a name in the current namespace.   850    851         else:   852             return self.get_object_path(name)   853    854     def get_path_for_access(self):   855    856         "Outside functions, register accesses at the module level."   857    858         if not self.in_function:   859             return self.name   860         else:   861             return self.get_namespace_path()   862    863     def get_module_name(self, node):   864    865         """   866         Using the given From 'node' in this module, calculate any relative import   867         information, returning a tuple containing a module to import along with any   868         names to import based on the node's name information.   869    870         Where the returned module is given as None, whole module imports should   871         be performed for the returned modules using the returned names.   872         """   873    874         # Absolute import.   875    876         if node.level == 0:   877             return node.modname, node.names   878    879         # Relative to an ancestor of this module.   880    881         else:   882             path = self.name.split(".")   883             level = node.level   884    885             # Relative imports treat package roots as submodules.   886    887             if split(self.filename)[-1] == "__init__.py":   888                 level -= 1   889    890             if level > len(path):   891                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   892                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   893    894             basename = ".".join(path[:len(path)-level])   895    896         # Name imports from a module.   897    898         if node.modname:   899             return "%s.%s" % (basename, node.modname), node.names   900    901         # Relative whole module imports.   902    903         else:   904             return basename, node.names   905    906 def get_argnames(args):   907    908     """   909     Return a list of all names provided by 'args'. Since tuples may be   910     employed, the arguments are traversed depth-first.   911     """   912    913     l = []   914     for arg in args:   915         if isinstance(arg, tuple):   916             l += get_argnames(arg)   917         else:   918             l.append(arg)   919     return l   920    921 def get_names_from_nodes(nodes):   922    923     """   924     Return the names employed in the given 'nodes' along with the number of   925     nodes excluding sequences.   926     """   927    928     names = set()   929     count = 0   930    931     for node in nodes:   932    933         # Add names and count them.   934    935         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   936             names.add(node.name)   937             count += 1   938    939         # Add names from sequences and incorporate their counts.   940    941         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   942                                compiler.ast.List, compiler.ast.Set,   943                                compiler.ast.Tuple)):   944             _names, _count = get_names_from_nodes(node.nodes)   945             names.update(_names)   946             count += _count   947    948         # Count non-name, non-sequence nodes.   949    950         else:   951             count += 1   952    953     return names, count   954    955 # Location classes.   956    957 class Location:   958    959     "A generic program location."   960    961     def __init__(self, path, name, attrnames=None, version=None, access_number=None):   962         self.path = path   963         self.name = name   964         self.attrnames = attrnames   965         self.version = version   966         self.access_number = access_number   967    968     def __repr__(self):   969         return "Location(%r, %r, %r, %r, %r)" % self.as_tuple()   970    971     def as_tuple(self):   972         return (self.path, self.name, self.attrnames, self.version, self.access_number)   973    974     def __hash__(self):   975         return hash(self.as_tuple())   976    977     def __eq__(self, other):   978         return self.as_tuple() == other.as_tuple()   979    980     def __cmp__(self, other):   981         return cmp(self.as_tuple(), other.as_tuple())   982    983     def get_attrname(self):   984    985         """   986         Extract the first attribute from the attribute names employed in this   987         location.   988         """   989    990         attrnames = self.attrnames   991         if not attrnames:   992             return attrnames   993         return get_attrnames(attrnames)[0]   994    995 class AccessLocation(Location):   996    997     "A specialised access location."   998    999     def __init__(self, path, name, attrnames, access_number):  1000   1001         """  1002         Initialise an access location featuring 'path', 'name', 'attrnames' and  1003         'access_number'.  1004         """  1005   1006         Location.__init__(self, path, name, attrnames, None, access_number)  1007   1008     def __repr__(self):  1009         return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number)  1010   1011 # Result classes.  1012   1013 class InstructionSequence:  1014   1015     "A generic sequence of instructions."  1016   1017     def __init__(self, instructions):  1018         self.instructions = instructions  1019   1020     def get_value_instruction(self):  1021         return self.instructions[-1]  1022   1023     def get_init_instructions(self):  1024         return self.instructions[:-1]  1025   1026 # Dictionary utilities.  1027   1028 def init_item(d, key, fn):  1029   1030     """  1031     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial  1032     value where no entry already exists.  1033     """  1034   1035     if not d.has_key(key):  1036         d[key] = fn()  1037     return d[key]  1038   1039 def dict_for_keys(d, keys):  1040   1041     "Return a new dictionary containing entries from 'd' for the given 'keys'."  1042   1043     nd = {}  1044     for key in keys:  1045         if d.has_key(key):  1046             nd[key] = d[key]  1047     return nd  1048   1049 def make_key(s):  1050   1051     "Make sequence 's' into a tuple-based key, first sorting its contents."  1052   1053     l = list(s)  1054     l.sort()  1055     return tuple(l)  1056   1057 def add_counter_item(d, key):  1058   1059     """  1060     Make a mapping in 'd' for 'key' to the number of keys added before it, thus  1061     maintaining a mapping of keys to their order of insertion.  1062     """  1063   1064     if not d.has_key(key):  1065         d[key] = len(d.keys())  1066     return d[key]   1067   1068 def remove_items(d1, d2):  1069   1070     "Remove from 'd1' all items from 'd2'."  1071   1072     for key in d2.keys():  1073         if d1.has_key(key):  1074             del d1[key]  1075   1076 # Set utilities.  1077   1078 def first(s):  1079     return list(s)[0]  1080   1081 def same(s1, s2):  1082     return set(s1) == set(s2)  1083   1084 def order_dependencies(all_depends):  1085   1086     """  1087     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1088     has the form "A depends on B, C...". The result will order A, B, C, and so  1089     on.  1090     """  1091   1092     usage = init_reverse_dependencies(all_depends)  1093   1094     # Produce an ordering by obtaining exposed items (required by items already  1095     # processed) and putting them at the start of the list.  1096   1097     ordered = []  1098   1099     while usage:  1100         have_next = False  1101   1102         for key, n in usage.items():  1103   1104             # Add items needed by no other items to the ordering.  1105   1106             if not n:  1107                 remove_dependency(key, all_depends, usage, ordered)  1108                 have_next = True  1109   1110         if not have_next:  1111             raise ValueError, usage  1112   1113     return ordered  1114   1115 def order_dependencies_partial(all_depends):  1116   1117     """  1118     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1119     has the form "A depends on B, C...". The result will order A, B, C, and so  1120     on. Where cycles exist, they will be broken and a partial ordering returned.  1121     """  1122   1123     usage = init_reverse_dependencies(all_depends)  1124   1125     # Duplicate the dependencies for subsequent modification.  1126   1127     new_depends = {}  1128     for key, values in all_depends.items():  1129         new_depends[key] = set(values)  1130   1131     all_depends = new_depends  1132   1133     # Produce an ordering by obtaining exposed items (required by items already  1134     # processed) and putting them at the start of the list.  1135   1136     ordered = []  1137   1138     while usage:  1139         least = None  1140         least_key = None  1141   1142         for key, n in usage.items():  1143   1144             # Add items needed by no other items to the ordering.  1145   1146             if not n:  1147                 remove_dependency(key, all_depends, usage, ordered)  1148                 least = 0  1149   1150             # When breaking cycles, note the least used items.  1151   1152             elif least is None or len(n) < least:  1153                 least_key = key  1154                 least = len(n)  1155   1156         if least:  1157             transfer_dependencies(least_key, all_depends, usage, ordered)  1158   1159     return ordered  1160   1161 def init_reverse_dependencies(all_depends):  1162   1163     """  1164     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1165     record the reverse dependencies, making a mapping of the form  1166     "B is needed by A", "C is needed by A", and so on.  1167     """  1168   1169     usage = {}  1170   1171     # Record path-based dependencies.  1172   1173     for key in all_depends.keys():  1174         usage[key] = set()  1175   1176     for key, depends in all_depends.items():  1177         for depend in depends:  1178             init_item(usage, depend, set)  1179             usage[depend].add(key)  1180   1181     return usage  1182   1183 def transfer_dependencies(key, all_depends, usage, ordered):  1184   1185     """  1186     Transfer items needed by 'key' to those items needing 'key', found using  1187     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1188     collection of dependencies.  1189   1190     If "A is needed by X" and "B is needed by A", then transferring items needed  1191     by A will cause "B is needed by X" to be recorded as a consequence.  1192   1193     Transferring items also needs to occur in the reverse mapping, so that  1194     "A needs B" and "X needs A", then the consequence must be recorded as  1195     "X needs B".  1196     """  1197   1198     ordered.insert(0, key)  1199   1200     needing = usage[key]                        # A is needed by X  1201     needed = all_depends.get(key)               # A needs B  1202   1203     if needing:  1204         for depend in needing:  1205             l = all_depends.get(depend)  1206             if not l:  1207                 continue  1208   1209             l.remove(key)                       # X needs (A)  1210   1211             if needed:  1212                 l.update(needed)                # X needs B...  1213   1214                 # Prevent self references.  1215   1216                 if depend in needed:  1217                     l.remove(depend)  1218   1219     if needed:  1220         for depend in needed:  1221             l = usage.get(depend)  1222             if not l:  1223                 continue  1224   1225             l.remove(key)                       # B is needed by (A)  1226             l.update(needing)                   # B is needed by X...  1227   1228             # Prevent self references.  1229   1230             if depend in needing:  1231                 l.remove(depend)  1232   1233     if needed:  1234         del all_depends[key]  1235     del usage[key]  1236   1237 def remove_dependency(key, all_depends, usage, ordered):  1238   1239     """  1240     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1241     'ordered' collection of dependencies.  1242   1243     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1244     upon removing A from 'usage', the outcome is that all keys needing A will  1245     have A removed from their 'usage' records.  1246   1247     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1248     as a consequence.  1249     """  1250   1251     ordered.insert(0, key)  1252   1253     depends = all_depends.get(key)  1254   1255     # Reduce usage of the referenced items.  1256   1257     if depends:  1258         for depend in depends:  1259             usage[depend].remove(key)  1260   1261     del usage[key]  1262   1263 # General input/output.  1264   1265 def readfile(filename):  1266   1267     "Return the contents of 'filename'."  1268   1269     f = open(filename)  1270     try:  1271         return f.read()  1272     finally:  1273         f.close()  1274   1275 def writefile(filename, s):  1276   1277     "Write to 'filename' the string 's'."  1278   1279     f = open(filename, "w")  1280     try:  1281         f.write(s)  1282     finally:  1283         f.close()  1284   1285 # General encoding.  1286   1287 def sorted_output(x):  1288   1289     "Sort sequence 'x' and return a string with commas separating the values."  1290   1291     x = map(str, x)  1292     x.sort()  1293     return ", ".join(x)  1294   1295 def get_string_details(literals, encoding):  1296   1297     """  1298     Determine whether 'literals' represent Unicode strings or byte strings,  1299     using 'encoding' to reproduce byte sequences.  1300   1301     Each literal is the full program representation including prefix and quotes  1302     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1303     string needs to be translated back to its original encoding.  1304   1305     Return a single encoded literal value, a type name, and the original  1306     encoding as a tuple.  1307     """  1308   1309     typename = "unicode"  1310   1311     l = []  1312   1313     for s in literals:  1314         out, _typename = get_literal_details(s)  1315         if _typename == "str":  1316             typename = "str"  1317         l.append(out)  1318   1319     out = "".join(l)  1320   1321     # For Unicode values, convert to the UTF-8 program representation.  1322   1323     if typename == "unicode":  1324         return out.encode("utf-8"), typename, encoding  1325   1326     # For byte string values, convert back to the original encoding.  1327   1328     else:  1329         return out.encode(encoding), typename, encoding  1330   1331 def get_literal_details(s):  1332   1333     """  1334     Determine whether 's' represents a Unicode string or a byte string, where  1335     's' contains the full program representation of a literal including prefix  1336     and quotes, recoded by the parser to UTF-8.  1337   1338     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1339     and byte or Unicode values starting with <backslash><octal digit> or  1340     <backslash>x.  1341   1342     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1343     to be considered as Unicode values. Otherwise, they produce byte values and  1344     cause unprefixed strings to be considered as byte strings.  1345   1346     Literals prefixed with "r" do not have their backslash-encoded values  1347     converted unless also prefixed with "u", in which case only the above value  1348     formats are converted, not any of the other special sequences for things  1349     like newlines.  1350   1351     Return the literal value as a Unicode object together with the appropriate  1352     type name in a tuple.  1353     """  1354   1355     l = []  1356   1357     # Identify the quote character and use it to identify the prefix.  1358   1359     quote_type = s[-1]  1360     prefix_end = s.find(quote_type)  1361     prefix = s[:prefix_end].lower()  1362   1363     if prefix not in ("", "b", "br", "r", "u", "ur"):  1364         raise ValueError, "String literal does not have a supported prefix: %s" % s  1365   1366     if "b" in prefix:  1367         typename = "str"  1368     else:  1369         typename = "unicode"  1370   1371     # Identify triple quotes or single quotes.  1372   1373     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1374         quote = s[prefix_end:prefix_end+3]  1375         current = prefix_end + 3  1376         end = len(s) - 3  1377     else:  1378         quote = s[prefix_end]  1379         current = prefix_end + 1  1380         end = len(s) - 1  1381   1382     # Conversions of some quoted values.  1383   1384     searches = {  1385         "u" : (6, 16),  1386         "U" : (10, 16),  1387         "x" : (4, 16),  1388         }  1389   1390     octal_digits = map(str, range(0, 8))  1391   1392     # Translations of some quoted values.  1393   1394     escaped = {  1395         "\\" : "\\", "'" : "'", '"' : '"',  1396         "a" : "\a", "b" : "\b", "f" : "\f",  1397         "n" : "\n", "r" : "\r", "t" : "\t",  1398         }  1399   1400     while current < end:  1401   1402         # Look for quoted values.  1403   1404         index = s.find("\\", current)  1405         if index == -1 or index + 1 == end:  1406             l.append(s[current:end])  1407             break  1408   1409         # Add the preceding text.  1410   1411         l.append(s[current:index])  1412   1413         # Handle quoted text.  1414   1415         term = s[index+1]  1416   1417         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1418         # produce Unicode values.  1419   1420         if typename == "unicode" and (  1421             term in ("u", "U") or   1422             "u" in prefix and (term == "x" or term in octal_digits)):  1423   1424             needed, base = searches.get(term, (4, 8))  1425             value = convert_quoted_value(s, index, needed, end, base, unichr)  1426             l.append(value)  1427             current = index + needed  1428   1429         # Add raw byte values, changing the string type.  1430   1431         elif "r" not in prefix and (  1432              term == "x" or term in octal_digits):  1433   1434             needed, base = searches.get(term, (4, 8))  1435             value = convert_quoted_value(s, index, needed, end, base, chr)  1436             l.append(value)  1437             typename = "str"  1438             current = index + needed  1439   1440         # Add other escaped values.  1441   1442         elif "r" not in prefix and escaped.has_key(term):  1443             l.append(escaped[term])  1444             current = index + 2  1445   1446         # Add other text as found.  1447   1448         else:  1449             l.append(s[index:index+2])  1450             current = index + 2  1451   1452     # Collect the components into a single Unicode object. Since the literal  1453     # text was already in UTF-8 form, interpret plain strings as UTF-8  1454     # sequences.  1455   1456     out = []  1457   1458     for value in l:  1459         if isinstance(value, unicode):  1460             out.append(value)  1461         else:  1462             out.append(unicode(value, "utf-8"))  1463   1464     return "".join(out), typename  1465   1466 def convert_quoted_value(s, index, needed, end, base, fn):  1467   1468     """  1469     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1470     positions, and with the given 'end' indicating the first position after the  1471     end of the actual string content.  1472   1473     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1474     to convert the value to an appropriate type.  1475     """  1476   1477     s = s[index:min(index+needed, end)]  1478   1479     # Not a complete occurrence.  1480   1481     if len(s) < needed:  1482         return s  1483   1484     # Test for a well-formed value.  1485   1486     try:  1487         first = base == 8 and 1 or 2  1488         value = int(s[first:needed], base)  1489     except ValueError:  1490         return s  1491     else:  1492         return fn(value)  1493   1494 # Attribute chain decoding.  1495   1496 def get_attrnames(attrnames):  1497   1498     """  1499     Split the qualified attribute chain 'attrnames' into its components,  1500     handling special attributes starting with "#" that indicate type  1501     conformance.  1502     """  1503   1504     if attrnames.startswith("#"):  1505         return [attrnames]  1506     else:  1507         return attrnames.split(".")  1508   1509 def get_name_path(path, name):  1510   1511     "Return a suitable qualified name from the given 'path' and 'name'."  1512   1513     if "." in name:  1514         return name  1515     else:  1516         return "%s.%s" % (path, name)  1517   1518 # Usage-related functions.  1519   1520 def get_types_for_usage(attrnames, objects):  1521   1522     """  1523     Identify the types that can support the given 'attrnames', using the  1524     given 'objects' as the catalogue of type details.  1525     """  1526   1527     types = []  1528     for name, _attrnames in objects.items():  1529         if set(attrnames).issubset(_attrnames):  1530             types.append(name)  1531     return types  1532   1533 def get_invoked_attributes(usage):  1534   1535     "Obtain invoked attribute from the given 'usage'."  1536   1537     invoked = []  1538     if usage:  1539         for attrname, invocation, assignment in usage:  1540             if invocation:  1541                 invoked.append(attrname)  1542     return invoked  1543   1544 def get_assigned_attributes(usage):  1545   1546     "Obtain assigned attribute from the given 'usage'."  1547   1548     assigned = []  1549     if usage:  1550         for attrname, invocation, assignment in usage:  1551             if assignment:  1552                 assigned.append(attrname)  1553     return assigned  1554   1555 # Type and module functions.  1556 # NOTE: This makes assumptions about the __builtins__ structure.  1557   1558 def get_builtin_module(name):  1559   1560     "Return the module name containing the given type 'name'."  1561   1562     if name == "string":  1563         modname = "str"  1564     elif name == "utf8string":  1565         modname = "unicode"  1566     elif name == "NoneType":  1567         modname = "none"  1568     else:  1569         modname = name  1570   1571     return "__builtins__.%s" % modname  1572   1573 def get_builtin_type(name):  1574   1575     "Return the type name provided by the given Python value 'name'."  1576   1577     if name == "str":  1578         return "string"  1579     elif name == "unicode":  1580         return "utf8string"  1581     else:  1582         return name  1583   1584 def get_builtin_class(name):  1585   1586     "Return the full name of the built-in class having the given 'name'."  1587   1588     typename = get_builtin_type(name)  1589     module = get_builtin_module(typename)  1590     return "%s.%s" % (module, typename)  1591   1592 # Useful data.  1593   1594 predefined_constants = "False", "None", "NotImplemented", "True"  1595   1596 privileged_attributes = [  1597     "__get_single_item_unchecked__",  1598     ]  1599   1600 unary_operator_functions = {  1601   1602     # Unary operations.  1603   1604     "Invert" : "invert",  1605     "UnaryAdd" : "pos",  1606     "UnarySub" : "neg",  1607     }  1608   1609 operator_functions = {  1610   1611     # Fundamental operations.  1612   1613     "is" : "is_",  1614     "is not" : "is_not",  1615   1616     # Binary operations.  1617   1618     "in" : "in_",  1619     "not in" : "not_in",  1620     "Add" : "add",  1621     "Bitand" : "and_",  1622     "Bitor" : "or_",  1623     "Bitxor" : "xor",  1624     "Div" : "div",  1625     "FloorDiv" : "floordiv",  1626     "LeftShift" : "lshift",  1627     "Mod" : "mod",  1628     "Mul" : "mul",  1629     "Power" : "pow",  1630     "RightShift" : "rshift",  1631     "Sub" : "sub",  1632   1633     # Augmented assignment.  1634   1635     "+=" : "iadd",  1636     "-=" : "isub",  1637     "*=" : "imul",  1638     "/=" : "idiv",  1639     "//=" : "ifloordiv",  1640     "%=" : "imod",  1641     "**=" : "ipow",  1642     "<<=" : "ilshift",  1643     ">>=" : "irshift",  1644     "&=" : "iand",  1645     "^=" : "ixor",  1646     "|=" : "ior",  1647   1648     # Comparisons.  1649   1650     "==" : "eq",  1651     "!=" : "ne",  1652     "<" : "lt",  1653     "<=" : "le",  1654     ">=" : "ge",  1655     ">" : "gt",  1656     }  1657   1658 operator_functions.update(unary_operator_functions)  1659   1660 # vim: tabstop=4 expandtab shiftwidth=4