Lichen

common.py

934:2989aab1b4f7
2021-06-29 Paul Boddie Renamed the utf8string class to unicode, eliminating the unicode function. This means that the simple case of merely returning an object if it is already a Unicode object no longer occurs when using the unicode callable, but such behaviour might be better supported with more general customised instantiation functionality.
     1 #!/usr/bin/env python     2      3 """     4 Common functions.     5      6 Copyright (C) 2007-2019, 2021 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from compiler.transformer import Transformer    23 from errors import InspectError    24 from os import listdir, makedirs, remove    25 from os.path import exists, getmtime, isdir, join, split    26 from results import ConstantValueRef, LiteralSequenceRef, NameRef    27 import compiler.ast    28     29 class CommonOutput:    30     31     "Common output functionality."    32     33     def check_output(self, options=None):    34     35         "Check the existing output and remove it if irrelevant."    36     37         if not exists(self.output):    38             makedirs(self.output)    39     40         details = self.importer.get_cache_details()    41         recorded_details = self.get_output_details()    42     43         # Combine cache details with any options.    44     45         full_details = options and (details + " " + options) or details    46     47         if recorded_details != full_details:    48             self.remove_output()    49     50         writefile(self.get_output_details_filename(), full_details)    51     52     def get_output_details_filename(self):    53     54         "Return the output details filename."    55     56         return join(self.output, "$details")    57     58     def get_output_details(self):    59     60         "Return details of the existing output."    61     62         details_filename = self.get_output_details_filename()    63     64         if not exists(details_filename):    65             return None    66         else:    67             return readfile(details_filename)    68     69     def remove_output(self, dirname=None):    70     71         "Remove the output."    72     73         dirname = dirname or self.output    74     75         for filename in listdir(dirname):    76             path = join(dirname, filename)    77             if isdir(path):    78                 self.remove_output(path)    79             else:    80                 remove(path)    81     82 def copy(source, target, only_if_newer=True):    83     84     "Copy a text file from 'source' to 'target'."    85     86     if isdir(target):    87         target = join(target, split(source)[-1])    88     89     if only_if_newer and not is_newer(source, target):    90         return    91     92     infile = open(source)    93     outfile = open(target, "w")    94     95     try:    96         outfile.write(infile.read())    97     finally:    98         outfile.close()    99         infile.close()   100    101 def is_newer(source, target):   102    103     "Return whether 'source' is newer than 'target'."   104    105     if exists(target):   106         target_mtime = getmtime(target)   107         source_mtime = getmtime(source)   108         return source_mtime > target_mtime   109    110     return True   111    112 class CommonModule:   113    114     "A common module representation."   115    116     def __init__(self, name, importer):   117    118         """   119         Initialise this module with the given 'name' and an 'importer' which is   120         used to provide access to other modules when required.   121         """   122    123         self.name = name   124         self.importer = importer   125         self.filename = None   126    127         # Inspection-related attributes.   128    129         self.astnode = None   130         self.encoding = None   131         self.temp = {}   132         self.lambdas = {}   133    134         # Constants, literals and values.   135    136         self.constants = {}   137         self.constant_values = {}   138         self.literals = {}   139         self.literal_types = {}   140    141         # Nested namespaces.   142    143         self.namespace_path = []   144         self.in_function = False   145    146         # Retain the assignment value expression and track invocations.   147    148         self.in_assignment = None   149         self.in_invocation = None   150    151         # Attribute chain state management.   152    153         self.attrs = []   154         self.chain_assignment = []   155         self.chain_invocation = []   156    157     def __repr__(self):   158         return "CommonModule(%r, %r)" % (self.name, self.importer)   159    160     def parse_file(self, filename):   161    162         "Parse the file with the given 'filename', initialising attributes."   163    164         self.filename = filename   165    166         # Use the Transformer directly to obtain encoding information.   167    168         t = Transformer()   169         f = open(filename)   170    171         try:   172             self.astnode = t.parsesuite(f.read() + "\n")   173             self.encoding = t.encoding   174         finally:   175             f.close()   176    177     # Module-relative naming.   178    179     def get_global_path(self, name):   180         return "%s.%s" % (self.name, name)   181    182     def get_namespace_path(self):   183         return ".".join([self.name] + self.namespace_path)   184    185     def get_object_path(self, name):   186         return ".".join([self.name] + self.namespace_path + [name])   187    188     # Namespace management.   189    190     def enter_namespace(self, name):   191    192         "Enter the namespace having the given 'name'."   193    194         self.namespace_path.append(name)   195    196     def exit_namespace(self):   197    198         "Exit the current namespace."   199    200         self.namespace_path.pop()   201    202     # Constant reference naming.   203    204     def get_constant_name(self, value, value_type, encoding=None):   205    206         """   207         Add a new constant to the current namespace for 'value' with   208         'value_type'.   209         """   210    211         path = self.get_namespace_path()   212         init_item(self.constants, path, dict)   213         return "$c%d" % add_counter_item(self.constants[path], (value, value_type, encoding))   214    215     # Literal reference naming.   216    217     def get_literal_name(self):   218    219         "Add a new literal to the current namespace."   220    221         path = self.get_namespace_path()   222         init_item(self.literals, path, lambda: 0)   223         return "$C%d" % self.literals[path]   224    225     def next_literal(self):   226         self.literals[self.get_namespace_path()] += 1   227    228     # Temporary variable naming.   229    230     def get_temporary_name(self):   231         path = self.get_namespace_path()   232         init_item(self.temp, path, lambda: 0)   233         return "$t%d" % self.temp[path]   234    235     def next_temporary(self):   236         self.temp[self.get_namespace_path()] += 1   237    238     # Arbitrary function naming.   239    240     def get_lambda_name(self):   241         path = self.get_namespace_path()   242         init_item(self.lambdas, path, lambda: 0)   243         name = "$l%d" % self.lambdas[path]   244         self.lambdas[path] += 1   245         return name   246    247     def reset_lambdas(self):   248         self.lambdas = {}   249    250     # Constant and literal recording.   251    252     def get_constant_value(self, value, literals=None):   253    254         """   255         Encode the 'value' if appropriate, returning a value, a typename and any   256         encoding.   257         """   258    259         if isinstance(value, unicode):   260             return value.encode("utf-8"), "unicode", self.encoding   261    262         # Attempt to convert plain strings to text.   263    264         elif isinstance(value, str) and self.encoding:   265             try:   266                 return get_string_details(literals, self.encoding)   267             except UnicodeDecodeError:   268                 pass   269    270         return value, value.__class__.__name__, None   271    272     def get_constant_reference(self, ref, value, encoding=None):   273    274         """   275         Return a constant reference for the given 'ref' type and 'value', with   276         the optional 'encoding' applying to text values.   277         """   278    279         constant_name = self.get_constant_name(value, ref.get_origin(), encoding)   280    281         # Return a reference for the constant.   282    283         objpath = self.get_object_path(constant_name)   284         name_ref = ConstantValueRef(constant_name, ref.instance_of(objpath), value)   285    286         # Record the value and type for the constant.   287    288         self._reserve_constant(objpath, name_ref.value, name_ref.get_origin(), encoding)   289         return name_ref   290    291     def reserve_constant(self, objpath, value, origin, encoding=None):   292    293         """   294         Reserve a constant within 'objpath' with the given 'value' and having a   295         type with the given 'origin', with the optional 'encoding' applying to   296         text values.   297         """   298    299         constant_name = self.get_constant_name(value, origin)   300         objpath = self.get_object_path(constant_name)   301         self._reserve_constant(objpath, value, origin, encoding)   302    303     def _reserve_constant(self, objpath, value, origin, encoding):   304    305         """   306         Store a constant for 'objpath' with the given 'value' and 'origin', with   307         the optional 'encoding' applying to text values.   308         """   309    310         self.constant_values[objpath] = value, origin, encoding   311    312     def get_literal_reference(self, name, ref, items, cls):   313    314         """   315         Return a literal reference for the given type 'name', literal 'ref',   316         node 'items' and employing the given 'cls' as the class of the returned   317         reference object.   318         """   319    320         # Construct an invocation using the items as arguments.   321    322         typename = "$L%s" % name   323    324         invocation = compiler.ast.CallFunc(   325             compiler.ast.Name(typename),   326             items   327             )   328    329         # Get a name for the actual literal.   330    331         instname = self.get_literal_name()   332         self.next_literal()   333    334         # Record the type for the literal.   335    336         objpath = self.get_object_path(instname)   337         self.literal_types[objpath] = ref.get_origin()   338    339         # Return a wrapper for the invocation exposing the items.   340    341         return cls(   342             instname,   343             ref.instance_of(),   344             self.process_structure_node(invocation),   345             invocation.args   346             )   347    348     # Node handling.   349    350     def process_structure(self, node):   351    352         """   353         Within the given 'node', process the program structure.   354    355         During inspection, this will process global declarations, adjusting the   356         module namespace, and import statements, building a module dependency   357         hierarchy.   358    359         During translation, this will consult deduced program information and   360         output translated code.   361         """   362    363         l = []   364         for n in node.getChildNodes():   365             l.append(self.process_structure_node(n))   366         return l   367    368     def process_augassign_node(self, n):   369    370         "Process the given augmented assignment node 'n'."   371    372         op = operator_functions[n.op]   373    374         if isinstance(n.node, compiler.ast.Getattr):   375             target = compiler.ast.AssAttr(n.node.expr, n.node.attrname, "OP_ASSIGN")   376         elif isinstance(n.node, compiler.ast.Name):   377             target = compiler.ast.AssName(n.node.name, "OP_ASSIGN")   378         else:   379             target = n.node   380    381         assignment = compiler.ast.Assign(   382             [target],   383             compiler.ast.CallFunc(   384                 compiler.ast.Name("$op%s" % op),   385                 [n.node, n.expr]))   386    387         return self.process_structure_node(assignment)   388    389     def process_assignment_for_object(self, original_name, source):   390    391         """   392         Return an assignment operation making 'original_name' refer to the given   393         'source'.   394         """   395    396         assignment = compiler.ast.Assign(   397             [compiler.ast.AssName(original_name, "OP_ASSIGN")],   398             source   399             )   400    401         return self.process_structure_node(assignment)   402    403     def process_assignment_node_items(self, n, expr):   404    405         """   406         Process the given assignment node 'n' whose children are to be assigned   407         items of 'expr'.   408         """   409    410         name_ref = self.process_structure_node(expr)   411    412         # Either unpack the items and present them directly to each assignment   413         # node.   414    415         if isinstance(name_ref, LiteralSequenceRef) and \   416            self.process_literal_sequence_items(n, name_ref):   417    418             pass   419    420         # Or have the assignment nodes access each item via the sequence API.   421    422         else:   423             self.process_assignment_node_items_by_position(n, expr, name_ref)   424    425     def process_assignment_node_items_by_position(self, n, expr, name_ref):   426    427         """   428         Process the given sequence assignment node 'n', converting the node to   429         the separate assignment of each target using positional access on a   430         temporary variable representing the sequence. Use 'expr' as the assigned   431         value and 'name_ref' as the reference providing any existing temporary   432         variable.   433         """   434    435         statements = []   436    437         # Employ existing names to access the sequence.   438         # Literal sequences do not provide names of accessible objects.   439    440         if isinstance(name_ref, NameRef) and not isinstance(name_ref, LiteralSequenceRef):   441             temp = name_ref.name   442    443         # For other expressions, create a temporary name to reference the items.   444    445         else:   446             temp = self.get_temporary_name()   447             self.next_temporary()   448    449             statements.append(   450                 compiler.ast.Assign([compiler.ast.AssName(temp, "OP_ASSIGN")], expr)   451                 )   452    453         # Generate a test for the length of the expression object.   454    455         statements.append(compiler.ast.Discard(   456             compiler.ast.CallFunc(compiler.ast.Name("$seq_test_length"),   457                 [compiler.ast.Name(temp), compiler.ast.Const(len(n.nodes))])))   458    459         # Assign the items to the target nodes.   460    461         for i, node in enumerate(n.nodes):   462             statements.append(   463                 compiler.ast.Assign([node], compiler.ast.CallFunc(   464                     compiler.ast.Getattr(compiler.ast.Name(temp),   465                                          "__get_single_item_unchecked__",   466                                          privileged=True),   467                     [compiler.ast.Const(i, str(i))]))   468                 )   469    470         return self.process_structure_node(compiler.ast.Stmt(statements))   471    472     def process_literal_sequence_items(self, n, name_ref):   473    474         """   475         Process the given assignment node 'n', obtaining from the given   476         'name_ref' the items to be assigned to the assignment targets.   477    478         Return whether this method was able to process the assignment node as   479         a sequence of direct assignments.   480         """   481    482         if len(n.nodes) == len(name_ref.items):   483             assigned_names, count = get_names_from_nodes(n.nodes)   484             accessed_names, _count = get_names_from_nodes(name_ref.items)   485    486             # Only assign directly between items if all assigned names are   487             # plain names (not attribute assignments), and if the assigned names   488             # do not appear in the accessed names.   489    490             if len(assigned_names) == count and \   491                not assigned_names.intersection(accessed_names):   492    493                 for node, item in zip(n.nodes, name_ref.items):   494                     self.process_assignment_node(node, item)   495    496                 return True   497    498             # Otherwise, use the position-based mechanism to obtain values.   499    500             else:   501                 return False   502         else:   503             raise InspectError("In %s, item assignment needing %d items is given %d items." % (   504                 self.get_namespace_path(), len(n.nodes), len(name_ref.items)))   505    506     def process_compare_node(self, n):   507    508         """   509         Process the given comparison node 'n', converting an operator sequence   510         from...   511    512         <expr1> <op1> <expr2> <op2> <expr3>   513    514         ...to...   515    516         <op1>(<expr1>, <expr2>) and <op2>(<expr2>, <expr3>)   517         """   518    519         invocations = []   520         last = n.expr   521    522         for op, op_node in n.ops:   523             op = operator_functions.get(op)   524    525             invocations.append(compiler.ast.CallFunc(   526                 compiler.ast.Name("$op%s" % op),   527                 [last, op_node]))   528    529             last = op_node   530    531         if len(invocations) > 1:   532             result = compiler.ast.And(invocations)   533         else:   534             result = invocations[0]   535    536         return self.process_structure_node(result)   537    538     def process_dict_node(self, node):   539    540         """   541         Process the given dictionary 'node', returning a list of (key, value)   542         tuples.   543         """   544    545         l = []   546         for key, value in node.items:   547             l.append((   548                 self.process_structure_node(key),   549                 self.process_structure_node(value)))   550         return l   551    552     def process_for_node(self, n):   553    554         """   555         Generate attribute accesses for {n.list}.__iter__ and the next method on   556         the iterator, producing a replacement node for the original.   557         """   558    559         t0 = self.get_temporary_name()   560         self.next_temporary()   561         t1 = self.get_temporary_name()   562         self.next_temporary()   563         t2 = self.get_temporary_name()   564         self.next_temporary()   565         t3 = self.get_temporary_name()   566         self.next_temporary()   567    568         node = compiler.ast.Stmt([   569    570             # <t0> = {n.list}   571             # <t1> = <t0>.__iter__()   572    573             compiler.ast.Assign(   574                 [compiler.ast.AssName(t0, "OP_ASSIGN")],   575                 n.list),   576    577             compiler.ast.Assign(   578                 [compiler.ast.AssName(t1, "OP_ASSIGN")],   579                 compiler.ast.CallFunc(   580                     compiler.ast.Getattr(compiler.ast.Name(t0), "__iter__"),   581                     [])),   582    583             # <t2> = <t1>.next   584             # try:   585             #     while True:   586             #         try:   587             #             <t3> = <t2>()   588             #         except StopIteration:   589             #             raise LoopExit   590             #         <var>... = <t3>   591             #         {n.body}   592             # except LoopExit:   593             #     {n.else_}   594             #     pass   595    596             compiler.ast.Assign(   597                 [compiler.ast.AssName(t2, "OP_ASSIGN")],   598                 compiler.ast.Getattr(compiler.ast.Name(t1), "next")),   599    600             # try:   601    602             compiler.ast.TryExcept(   603    604                 # while True:   605    606                 compiler.ast.While(   607                     compiler.ast.Name("True"),   608                     compiler.ast.Stmt([   609    610                         # try:   611    612                         compiler.ast.TryExcept(   613    614                             # <t3> = <t2>()   615    616                             compiler.ast.Assign(   617                                 [compiler.ast.AssName(t3, "OP_ASSIGN")],   618                                 compiler.ast.CallFunc(   619                                     compiler.ast.Name(t2),   620                                     [])),   621    622                         # except StopIteration:   623                         #     raise LoopExit   624    625                             [(compiler.ast.Name("StopIteration"), None,   626                               compiler.ast.Raise(compiler.ast.Name("LoopExit")))],   627                             None),   628    629                         # <var>... = <t3>   630    631                         compiler.ast.Assign(   632                             [n.assign],   633                             compiler.ast.Name(t3)),   634                         n.body]),   635                     None),   636    637             # except LoopExit:   638             #     {n.else_}   639             #     pass   640    641                 [(compiler.ast.Name("LoopExit"), None, n.else_ or compiler.ast.Pass())],   642                 None)   643             ])   644    645         self.process_structure_node(node)   646    647     def process_literal_sequence_node(self, n, name, ref, cls):   648    649         """   650         Process the given literal sequence node 'n' as a function invocation,   651         with 'name' indicating the type of the sequence, and 'ref' being a   652         reference to the type. The 'cls' is used to instantiate a suitable name   653         reference.   654         """   655    656         if name == "dict":   657             items = []   658             for key, value in n.items:   659                 items.append(compiler.ast.Tuple([key, value]))   660         else: # name in ("list", "tuple"):   661             items = n.nodes   662    663         return self.get_literal_reference(name, ref, items, cls)   664    665     def process_operator_node(self, n):   666    667         """   668         Process the given operator node 'n' as an operator function invocation.   669         """   670    671         opname = n.__class__.__name__   672         operands = n.getChildNodes()   673    674         # Convert a unary operation to an invocation.   675    676         op = unary_operator_functions.get(opname)   677    678         if op:   679             invocation = compiler.ast.CallFunc(   680                 compiler.ast.Name("$op%s" % op),   681                 [operands[0]]   682                 )   683    684         # Convert a single operator with a list of operands to a combination of   685         # pairwise operations.   686    687         else:   688             op = operator_functions[opname]   689             invocation = self._process_operator_node(op, operands)   690    691         return self.process_structure_node(invocation)   692    693     def _process_operator_node(self, op, operands):   694    695         """   696         Process the given 'op', being an operator function, together with the   697         supplied 'operands', returning either a single remaining operand or an   698         invocation combining the operands.   699         """   700    701         remaining = operands[1:]   702         if not remaining:   703             return operands[0]   704    705         return compiler.ast.CallFunc(   706             compiler.ast.Name("$op%s" % op),   707             [operands[0], self._process_operator_node(op, remaining)]   708             )   709    710     def process_print_node(self, n):   711    712         """   713         Process the given print node 'n' as an invocation on a stream of the   714         form...   715    716         $print(dest, args, nl)   717    718         The special function name will be translated elsewhere.   719         """   720    721         nl = isinstance(n, compiler.ast.Printnl)   722         invocation = compiler.ast.CallFunc(   723             compiler.ast.Name("$print"),   724             [n.dest or compiler.ast.Name("None"),   725              compiler.ast.List(list(n.nodes)),   726              nl and compiler.ast.Name("True") or compiler.ast.Name("False")]   727             )   728         return self.process_structure_node(invocation)   729    730     def process_slice_node(self, n, expr=None):   731    732         """   733         Process the given slice node 'n' as a method invocation.   734         """   735    736         if n.flags == "OP_ASSIGN": op = "__setslice__"   737         elif n.flags == "OP_DELETE": op = "__delslice__"   738         else: op = "__getslice__"   739    740         invocation = compiler.ast.CallFunc(   741             compiler.ast.Getattr(n.expr, op),   742             [n.lower or compiler.ast.Name("None"), n.upper or compiler.ast.Name("None")] +   743                 (expr and [expr] or [])   744             )   745    746         # Fix parse tree structure.   747    748         if op == "__delslice__":   749             invocation = compiler.ast.Discard(invocation)   750    751         return self.process_structure_node(invocation)   752    753     def process_sliceobj_node(self, n):   754    755         """   756         Process the given slice object node 'n' as a slice constructor.   757         """   758    759         op = "slice"   760         invocation = compiler.ast.CallFunc(   761             compiler.ast.Name("$op%s" % op),   762             n.nodes   763             )   764         return self.process_structure_node(invocation)   765    766     def process_subscript_node(self, n, expr=None):   767    768         """   769         Process the given subscript node 'n' as a method invocation.   770         """   771    772         if n.flags == "OP_ASSIGN": op = "__setitem__"   773         elif n.flags == "OP_DELETE": op = "__delitem__"   774         else: op = "__getitem__"   775    776         invocation = compiler.ast.CallFunc(   777             compiler.ast.Getattr(n.expr, op),   778             list(n.subs) + (expr and [expr] or [])   779             )   780    781         # Fix parse tree structure.   782    783         if op == "__delitem__":   784             invocation = compiler.ast.Discard(invocation)   785    786         return self.process_structure_node(invocation)   787    788     def process_attribute_chain(self, n):   789    790         """   791         Process the given attribute access node 'n'. Return a reference   792         describing the expression.   793         """   794    795         # AssAttr/Getattr are nested with the outermost access being the last   796         # access in any chain.   797    798         self.attrs.insert(0, n.attrname)   799         attrs = self.attrs   800    801         # Break attribute chains where non-access nodes are found.   802    803         if not self.have_access_expression(n):   804             self.reset_attribute_chain()   805    806         # Descend into the expression, extending backwards any existing chain,   807         # or building another for the expression.   808    809         name_ref = self.process_structure_node(n.expr)   810    811         # Restore chain information applying to this node.   812    813         if not self.have_access_expression(n):   814             self.restore_attribute_chain(attrs)   815    816         # Return immediately if the expression was another access and thus a   817         # continuation backwards along the chain. The above processing will   818         # have followed the chain all the way to its conclusion.   819    820         if self.have_access_expression(n):   821             del self.attrs[0]   822    823         return name_ref   824    825     # Attribute chain handling.   826    827     def reset_attribute_chain(self):   828    829         "Reset the attribute chain for a subexpression of an attribute access."   830    831         self.attrs = []   832         self.chain_assignment.append(self.in_assignment)   833         self.chain_invocation.append(self.in_invocation)   834         self.in_assignment = None   835         self.in_invocation = None   836    837     def restore_attribute_chain(self, attrs):   838    839         "Restore the attribute chain for an attribute access."   840    841         self.attrs = attrs   842         self.in_assignment = self.chain_assignment.pop()   843         self.in_invocation = self.chain_invocation.pop()   844    845     def have_access_expression(self, node):   846    847         "Return whether the expression associated with 'node' is Getattr."   848    849         return isinstance(node.expr, compiler.ast.Getattr)   850    851     def get_name_for_tracking(self, name, name_ref=None, is_global=False):   852    853         """   854         Return the name to be used for attribute usage observations involving   855         the given 'name' in the current namespace.   856    857         If the name is being used outside a function, and if 'name_ref' is   858         given and indicates a global or if 'is_global' is specified as a true   859         value, a path featuring the name in the global namespace is returned.   860         Otherwise, a path computed using the current namespace and the given   861         name is returned.   862    863         The intention of this method is to provide a suitably-qualified name   864         that can be tracked across namespaces. Where globals are being   865         referenced in class namespaces, they should be referenced using their   866         path within the module, not using a path within each class.   867    868         It may not be possible to identify a global within a function at the   869         time of inspection (since a global may appear later in a file).   870         Consequently, globals are identified by their local name rather than   871         their module-qualified path.   872         """   873    874         # For functions, use the appropriate local names.   875    876         if self.in_function:   877             return name   878    879         # For global names outside functions, use a global name.   880    881         elif is_global or name_ref and name_ref.is_global_name():   882             return self.get_global_path(name)   883    884         # Otherwise, establish a name in the current namespace.   885    886         else:   887             return self.get_object_path(name)   888    889     def get_path_for_access(self):   890    891         "Outside functions, register accesses at the module level."   892    893         if not self.in_function:   894             return self.name   895         else:   896             return self.get_namespace_path()   897    898     def get_module_name(self, node):   899    900         """   901         Using the given From 'node' in this module, calculate any relative import   902         information, returning a tuple containing a module to import along with any   903         names to import based on the node's name information.   904    905         Where the returned module is given as None, whole module imports should   906         be performed for the returned modules using the returned names.   907         """   908    909         # Absolute import.   910    911         if node.level == 0:   912             return node.modname, node.names   913    914         # Relative to an ancestor of this module.   915    916         else:   917             path = self.name.split(".")   918             level = node.level   919    920             # Relative imports treat package roots as submodules.   921    922             if split(self.filename)[-1] == "__init__.py":   923                 level -= 1   924    925             if level > len(path):   926                 raise InspectError("Relative import %r involves too many levels up from module %r" % (   927                     ("%s%s" % ("." * node.level, node.modname or "")), self.name))   928    929             basename = ".".join(path[:len(path)-level])   930    931         # Name imports from a module.   932    933         if node.modname:   934             return "%s.%s" % (basename, node.modname), node.names   935    936         # Relative whole module imports.   937    938         else:   939             return basename, node.names   940    941 def get_argnames(args):   942    943     """   944     Return a list of all names provided by 'args'. Since tuples may be   945     employed, the arguments are traversed depth-first.   946     """   947    948     l = []   949     for arg in args:   950         if isinstance(arg, tuple):   951             l += get_argnames(arg)   952         else:   953             l.append(arg)   954     return l   955    956 def get_names_from_nodes(nodes):   957    958     """   959     Return the names employed in the given 'nodes' along with the number of   960     nodes excluding sequences.   961     """   962    963     names = set()   964     count = 0   965    966     for node in nodes:   967    968         # Add names and count them.   969    970         if isinstance(node, (compiler.ast.AssName, compiler.ast.Name)):   971             names.add(node.name)   972             count += 1   973    974         # Add names from sequences and incorporate their counts.   975    976         elif isinstance(node, (compiler.ast.AssList, compiler.ast.AssTuple,   977                                compiler.ast.List, compiler.ast.Set,   978                                compiler.ast.Tuple)):   979             _names, _count = get_names_from_nodes(node.nodes)   980             names.update(_names)   981             count += _count   982    983         # Count non-name, non-sequence nodes.   984    985         else:   986             count += 1   987    988     return names, count   989    990 # Location classes.   991    992 class Location:   993    994     "A generic program location."   995    996     def __init__(self, path, name, attrnames=None, version=None, access_number=None):   997         self.path = path   998         self.name = name   999         self.attrnames = attrnames  1000         self.version = version  1001         self.access_number = access_number  1002   1003     def __repr__(self):  1004         return "Location(%r, %r, %r, %r, %r)" % self.as_tuple()  1005   1006     def as_tuple(self):  1007         return (self.path, self.name, self.attrnames, self.version, self.access_number)  1008   1009     def __hash__(self):  1010         return hash(self.as_tuple())  1011   1012     def __eq__(self, other):  1013         return self.as_tuple() == other.as_tuple()  1014   1015     def __cmp__(self, other):  1016         return cmp(self.as_tuple(), other.as_tuple())  1017   1018     def get_attrname(self):  1019   1020         """  1021         Extract the first attribute from the attribute names employed in this  1022         location.  1023         """  1024   1025         attrnames = self.attrnames  1026         if not attrnames:  1027             return attrnames  1028         return get_attrnames(attrnames)[0]  1029   1030 class AccessLocation(Location):  1031   1032     "A specialised access location."  1033   1034     def __init__(self, path, name, attrnames, access_number):  1035   1036         """  1037         Initialise an access location featuring 'path', 'name', 'attrnames' and  1038         'access_number'.  1039         """  1040   1041         Location.__init__(self, path, name, attrnames, None, access_number)  1042   1043     def __repr__(self):  1044         return "AccessLocation(%r, %r, %r, %r)" % (self.path, self.name, self.attrnames, self.access_number)  1045   1046 # Result classes.  1047   1048 class InstructionSequence:  1049   1050     "A generic sequence of instructions."  1051   1052     def __init__(self, instructions):  1053         self.instructions = instructions  1054   1055     def get_value_instruction(self):  1056         return self.instructions[-1]  1057   1058     def get_init_instructions(self):  1059         return self.instructions[:-1]  1060   1061 # Dictionary utilities.  1062   1063 def init_item(d, key, fn):  1064   1065     """  1066     Add to 'd' an entry for 'key' using the callable 'fn' to make an initial  1067     value where no entry already exists.  1068     """  1069   1070     if not d.has_key(key):  1071         d[key] = fn()  1072     return d[key]  1073   1074 def dict_for_keys(d, keys):  1075   1076     "Return a new dictionary containing entries from 'd' for the given 'keys'."  1077   1078     nd = {}  1079     for key in keys:  1080         if d.has_key(key):  1081             nd[key] = d[key]  1082     return nd  1083   1084 def make_key(s):  1085   1086     "Make sequence 's' into a tuple-based key, first sorting its contents."  1087   1088     l = list(s)  1089     l.sort()  1090     return tuple(l)  1091   1092 def add_counter_item(d, key):  1093   1094     """  1095     Make a mapping in 'd' for 'key' to the number of keys added before it, thus  1096     maintaining a mapping of keys to their order of insertion.  1097     """  1098   1099     if not d.has_key(key):  1100         d[key] = len(d.keys())  1101     return d[key]   1102   1103 def remove_items(d1, d2):  1104   1105     "Remove from 'd1' all items from 'd2'."  1106   1107     for key in d2.keys():  1108         if d1.has_key(key):  1109             del d1[key]  1110   1111 # Set utilities.  1112   1113 def first(s):  1114     return list(s)[0]  1115   1116 def same(s1, s2):  1117     return set(s1) == set(s2)  1118   1119 def order_dependencies(all_depends):  1120   1121     """  1122     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1123     has the form "A depends on B, C...". The result will order A, B, C, and so  1124     on.  1125     """  1126   1127     usage = init_reverse_dependencies(all_depends)  1128   1129     # Produce an ordering by obtaining exposed items (required by items already  1130     # processed) and putting them at the start of the list.  1131   1132     ordered = []  1133   1134     while usage:  1135         have_next = False  1136   1137         for key, n in usage.items():  1138   1139             # Add items needed by no other items to the ordering.  1140   1141             if not n:  1142                 remove_dependency(key, all_depends, usage, ordered)  1143                 have_next = True  1144   1145         if not have_next:  1146             raise ValueError, usage  1147   1148     return ordered  1149   1150 def order_dependencies_partial(all_depends):  1151   1152     """  1153     Produce a dependency ordering for the 'all_depends' mapping. This mapping  1154     has the form "A depends on B, C...". The result will order A, B, C, and so  1155     on. Where cycles exist, they will be broken and a partial ordering returned.  1156     """  1157   1158     usage = init_reverse_dependencies(all_depends)  1159   1160     # Duplicate the dependencies for subsequent modification.  1161   1162     new_depends = {}  1163     for key, values in all_depends.items():  1164         new_depends[key] = set(values)  1165   1166     all_depends = new_depends  1167   1168     # Produce an ordering by obtaining exposed items (required by items already  1169     # processed) and putting them at the start of the list.  1170   1171     ordered = []  1172   1173     while usage:  1174         least = None  1175         least_key = None  1176   1177         for key, n in usage.items():  1178   1179             # Add items needed by no other items to the ordering.  1180   1181             if not n:  1182                 remove_dependency(key, all_depends, usage, ordered)  1183                 least = 0  1184   1185             # When breaking cycles, note the least used items.  1186   1187             elif least is None or len(n) < least:  1188                 least_key = key  1189                 least = len(n)  1190   1191         if least:  1192             transfer_dependencies(least_key, all_depends, usage, ordered)  1193   1194     return ordered  1195   1196 def init_reverse_dependencies(all_depends):  1197   1198     """  1199     From 'all_depends', providing a mapping of the form "A depends on B, C...",  1200     record the reverse dependencies, making a mapping of the form  1201     "B is needed by A", "C is needed by A", and so on.  1202     """  1203   1204     usage = {}  1205   1206     # Record path-based dependencies.  1207   1208     for key in all_depends.keys():  1209         usage[key] = set()  1210   1211     for key, depends in all_depends.items():  1212         for depend in depends:  1213             init_item(usage, depend, set)  1214             usage[depend].add(key)  1215   1216     return usage  1217   1218 def transfer_dependencies(key, all_depends, usage, ordered):  1219   1220     """  1221     Transfer items needed by 'key' to those items needing 'key', found using  1222     'all_depends', and updating 'usage'. Insert 'key' into the 'ordered'  1223     collection of dependencies.  1224   1225     If "A is needed by X" and "B is needed by A", then transferring items needed  1226     by A will cause "B is needed by X" to be recorded as a consequence.  1227   1228     Transferring items also needs to occur in the reverse mapping, so that  1229     "A needs B" and "X needs A", then the consequence must be recorded as  1230     "X needs B".  1231     """  1232   1233     ordered.insert(0, key)  1234   1235     needing = usage[key]                        # A is needed by X  1236     needed = all_depends.get(key)               # A needs B  1237   1238     if needing:  1239         for depend in needing:  1240             l = all_depends.get(depend)  1241             if not l:  1242                 continue  1243   1244             l.remove(key)                       # X needs (A)  1245   1246             if needed:  1247                 l.update(needed)                # X needs B...  1248   1249                 # Prevent self references.  1250   1251                 if depend in needed:  1252                     l.remove(depend)  1253   1254     if needed:  1255         for depend in needed:  1256             l = usage.get(depend)  1257             if not l:  1258                 continue  1259   1260             l.remove(key)                       # B is needed by (A)  1261             l.update(needing)                   # B is needed by X...  1262   1263             # Prevent self references.  1264   1265             if depend in needing:  1266                 l.remove(depend)  1267   1268     if needed:  1269         del all_depends[key]  1270     del usage[key]  1271   1272 def remove_dependency(key, all_depends, usage, ordered):  1273   1274     """  1275     Remove 'key', found in 'all_depends', from 'usage', inserting it into the  1276     'ordered' collection of dependencies.  1277   1278     Given that 'usage' for a given key A would indicate that "A needs <nothing>"  1279     upon removing A from 'usage', the outcome is that all keys needing A will  1280     have A removed from their 'usage' records.  1281   1282     So, if "B needs A", removing A will cause "B needs <nothing>" to be recorded  1283     as a consequence.  1284     """  1285   1286     ordered.insert(0, key)  1287   1288     depends = all_depends.get(key)  1289   1290     # Reduce usage of the referenced items.  1291   1292     if depends:  1293         for depend in depends:  1294             usage[depend].remove(key)  1295   1296     del usage[key]  1297   1298 # General input/output.  1299   1300 def readfile(filename):  1301   1302     "Return the contents of 'filename'."  1303   1304     f = open(filename)  1305     try:  1306         return f.read()  1307     finally:  1308         f.close()  1309   1310 def writefile(filename, s):  1311   1312     "Write to 'filename' the string 's'."  1313   1314     f = open(filename, "w")  1315     try:  1316         f.write(s)  1317     finally:  1318         f.close()  1319   1320 # General encoding.  1321   1322 def sorted_output(x):  1323   1324     "Sort sequence 'x' and return a string with commas separating the values."  1325   1326     x = map(str, x)  1327     x.sort()  1328     return ", ".join(x)  1329   1330 def get_string_details(literals, encoding):  1331   1332     """  1333     Determine whether 'literals' represent Unicode strings or byte strings,  1334     using 'encoding' to reproduce byte sequences.  1335   1336     Each literal is the full program representation including prefix and quotes  1337     recoded by the parser to UTF-8. Thus, any literal found to represent a byte  1338     string needs to be translated back to its original encoding.  1339   1340     Return a single encoded literal value, a type name, and the original  1341     encoding as a tuple.  1342     """  1343   1344     typename = "unicode"  1345   1346     l = []  1347   1348     for s in literals:  1349         out, _typename = get_literal_details(s)  1350         if _typename == "str":  1351             typename = "str"  1352         l.append(out)  1353   1354     out = "".join(l)  1355   1356     # For Unicode values, convert to the UTF-8 program representation.  1357   1358     if typename == "unicode":  1359         return out.encode("utf-8"), typename, encoding  1360   1361     # For byte string values, convert back to the original encoding.  1362   1363     else:  1364         return out.encode(encoding), typename, encoding  1365   1366 def get_literal_details(s):  1367   1368     """  1369     Determine whether 's' represents a Unicode string or a byte string, where  1370     's' contains the full program representation of a literal including prefix  1371     and quotes, recoded by the parser to UTF-8.  1372   1373     Find and convert Unicode values starting with <backslash>u or <backslash>U,  1374     and byte or Unicode values starting with <backslash><octal digit> or  1375     <backslash>x.  1376   1377     Literals prefixed with "u" cause <backslash><octal digit> and <backslash>x  1378     to be considered as Unicode values. Otherwise, they produce byte values and  1379     cause unprefixed strings to be considered as byte strings.  1380   1381     Literals prefixed with "r" do not have their backslash-encoded values  1382     converted unless also prefixed with "u", in which case only the above value  1383     formats are converted, not any of the other special sequences for things  1384     like newlines.  1385   1386     Return the literal value as a Unicode object together with the appropriate  1387     type name in a tuple.  1388     """  1389   1390     l = []  1391   1392     # Identify the quote character and use it to identify the prefix.  1393   1394     quote_type = s[-1]  1395     prefix_end = s.find(quote_type)  1396     prefix = s[:prefix_end].lower()  1397   1398     if prefix not in ("", "b", "br", "r", "u", "ur"):  1399         raise ValueError, "String literal does not have a supported prefix: %s" % s  1400   1401     if "b" in prefix:  1402         typename = "str"  1403     else:  1404         typename = "unicode"  1405   1406     # Identify triple quotes or single quotes.  1407   1408     if len(s) >= 6 and s[-2] == quote_type and s[-3] == quote_type:  1409         quote = s[prefix_end:prefix_end+3]  1410         current = prefix_end + 3  1411         end = len(s) - 3  1412     else:  1413         quote = s[prefix_end]  1414         current = prefix_end + 1  1415         end = len(s) - 1  1416   1417     # Conversions of some quoted values.  1418   1419     searches = {  1420         "u" : (6, 16),  1421         "U" : (10, 16),  1422         "x" : (4, 16),  1423         }  1424   1425     octal_digits = map(str, range(0, 8))  1426   1427     # Translations of some quoted values.  1428   1429     escaped = {  1430         "\\" : "\\", "'" : "'", '"' : '"',  1431         "a" : "\a", "b" : "\b", "f" : "\f",  1432         "n" : "\n", "r" : "\r", "t" : "\t",  1433         }  1434   1435     while current < end:  1436   1437         # Look for quoted values.  1438   1439         index = s.find("\\", current)  1440         if index == -1 or index + 1 == end:  1441             l.append(s[current:end])  1442             break  1443   1444         # Add the preceding text.  1445   1446         l.append(s[current:index])  1447   1448         # Handle quoted text.  1449   1450         term = s[index+1]  1451   1452         # Add Unicode values. Where a string is u-prefixed, even \o and \x  1453         # produce Unicode values.  1454   1455         if typename == "unicode" and (  1456             term in ("u", "U") or   1457             "u" in prefix and (term == "x" or term in octal_digits)):  1458   1459             needed, base = searches.get(term, (4, 8))  1460             value = convert_quoted_value(s, index, needed, end, base, unichr)  1461             l.append(value)  1462             current = index + needed  1463   1464         # Add raw byte values, changing the string type.  1465   1466         elif "r" not in prefix and (  1467              term == "x" or term in octal_digits):  1468   1469             needed, base = searches.get(term, (4, 8))  1470             value = convert_quoted_value(s, index, needed, end, base, chr)  1471             l.append(value)  1472             typename = "str"  1473             current = index + needed  1474   1475         # Add other escaped values.  1476   1477         elif "r" not in prefix and escaped.has_key(term):  1478             l.append(escaped[term])  1479             current = index + 2  1480   1481         # Add other text as found.  1482   1483         else:  1484             l.append(s[index:index+2])  1485             current = index + 2  1486   1487     # Collect the components into a single Unicode object. Since the literal  1488     # text was already in UTF-8 form, interpret plain strings as UTF-8  1489     # sequences.  1490   1491     out = []  1492   1493     for value in l:  1494         if isinstance(value, unicode):  1495             out.append(value)  1496         else:  1497             out.append(unicode(value, "utf-8"))  1498   1499     return "".join(out), typename  1500   1501 def convert_quoted_value(s, index, needed, end, base, fn):  1502   1503     """  1504     Interpret a quoted value in 's' at 'index' with the given 'needed' number of  1505     positions, and with the given 'end' indicating the first position after the  1506     end of the actual string content.  1507   1508     Use 'base' as the numerical base when interpreting the value, and use 'fn'  1509     to convert the value to an appropriate type.  1510     """  1511   1512     s = s[index:min(index+needed, end)]  1513   1514     # Not a complete occurrence.  1515   1516     if len(s) < needed:  1517         return s  1518   1519     # Test for a well-formed value.  1520   1521     try:  1522         first = base == 8 and 1 or 2  1523         value = int(s[first:needed], base)  1524     except ValueError:  1525         return s  1526     else:  1527         return fn(value)  1528   1529 # Attribute chain decoding.  1530   1531 def get_attrnames(attrnames):  1532   1533     """  1534     Split the qualified attribute chain 'attrnames' into its components,  1535     handling special attributes starting with "#" that indicate type  1536     conformance.  1537     """  1538   1539     if attrnames.startswith("#"):  1540         return [attrnames]  1541     else:  1542         return attrnames.split(".")  1543   1544 def get_name_path(path, name):  1545   1546     "Return a suitable qualified name from the given 'path' and 'name'."  1547   1548     if "." in name:  1549         return name  1550     else:  1551         return "%s.%s" % (path, name)  1552   1553 # Usage-related functions.  1554   1555 def get_types_for_usage(attrnames, objects):  1556   1557     """  1558     Identify the types that can support the given 'attrnames', using the  1559     given 'objects' as the catalogue of type details.  1560     """  1561   1562     types = []  1563     for name, _attrnames in objects.items():  1564         if set(attrnames).issubset(_attrnames):  1565             types.append(name)  1566     return types  1567   1568 def get_invoked_attributes(usage):  1569   1570     "Obtain invoked attribute from the given 'usage'."  1571   1572     invoked = []  1573     if usage:  1574         for attrname, invocation, assignment in usage:  1575             if invocation:  1576                 invoked.append(attrname)  1577     return invoked  1578   1579 def get_assigned_attributes(usage):  1580   1581     "Obtain assigned attribute from the given 'usage'."  1582   1583     assigned = []  1584     if usage:  1585         for attrname, invocation, assignment in usage:  1586             if assignment:  1587                 assigned.append(attrname)  1588     return assigned  1589   1590 # Type and module functions.  1591 # NOTE: This makes assumptions about the __builtins__ structure.  1592   1593 def get_builtin_module(name):  1594   1595     "Return the module name containing the given type 'name'."  1596   1597     if name == "string":  1598         modname = "str"  1599     elif name == "NoneType":  1600         modname = "none"  1601     else:  1602         modname = name  1603   1604     return "__builtins__.%s" % modname  1605   1606 def get_builtin_type(name):  1607   1608     "Return the type name provided by the given Python value 'name'."  1609   1610     if name == "str":  1611         return "string"  1612     else:  1613         return name  1614   1615 def get_builtin_class(name):  1616   1617     "Return the full name of the built-in class having the given 'name'."  1618   1619     typename = get_builtin_type(name)  1620     module = get_builtin_module(typename)  1621     return "%s.%s" % (module, typename)  1622   1623 # Useful data.  1624   1625 predefined_constants = "False", "None", "NotImplemented", "True"  1626   1627 privileged_attributes = [  1628     "__get_single_item_unchecked__",  1629     ]  1630   1631 unary_operator_functions = {  1632   1633     # Unary operations.  1634   1635     "Invert" : "invert",  1636     "UnaryAdd" : "pos",  1637     "UnarySub" : "neg",  1638     }  1639   1640 operator_functions = {  1641   1642     # Fundamental operations.  1643   1644     "is" : "is_",  1645     "is not" : "is_not",  1646   1647     # Binary operations.  1648   1649     "in" : "in_",  1650     "not in" : "not_in",  1651     "Add" : "add",  1652     "Bitand" : "and_",  1653     "Bitor" : "or_",  1654     "Bitxor" : "xor",  1655     "Div" : "div",  1656     "FloorDiv" : "floordiv",  1657     "LeftShift" : "lshift",  1658     "Mod" : "mod",  1659     "Mul" : "mul",  1660     "Power" : "pow",  1661     "RightShift" : "rshift",  1662     "Sub" : "sub",  1663   1664     # Augmented assignment.  1665   1666     "+=" : "iadd",  1667     "-=" : "isub",  1668     "*=" : "imul",  1669     "/=" : "idiv",  1670     "//=" : "ifloordiv",  1671     "%=" : "imod",  1672     "**=" : "ipow",  1673     "<<=" : "ilshift",  1674     ">>=" : "irshift",  1675     "&=" : "iand",  1676     "^=" : "ixor",  1677     "|=" : "ior",  1678   1679     # Comparisons.  1680   1681     "==" : "eq",  1682     "!=" : "ne",  1683     "<" : "lt",  1684     "<=" : "le",  1685     ">=" : "ge",  1686     ">" : "gt",  1687     }  1688   1689 operator_functions.update(unary_operator_functions)  1690   1691 # vim: tabstop=4 expandtab shiftwidth=4