Lichen

importer.py

26:f47c63967c59
2016-09-05 Paul Boddie Separated inspection-related naming methods from common module methods.
     1 #!/usr/bin/env python     2      3 """     4 Import logic.     5      6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from errors import ProgramError    24 from os.path import exists, extsep, getmtime, join    25 from os import listdir, makedirs, remove    26 from common import init_item, readfile, writefile    27 from modules import CachedModule    28 from referencing import Reference    29 import inspector    30 import sys    31     32 class Importer:    33     34     "An import machine, searching for and loading modules."    35     36     def __init__(self, path, cache=None, verbose=False):    37     38         """    39         Initialise the importer with the given search 'path' - a list of    40         directories to search for Python modules.    41     42         The optional 'cache' should be the name of a directory used to store    43         cached module information.    44     45         The optional 'verbose' parameter causes output concerning the activities    46         of the object to be produced if set to a true value (not the default).    47         """    48     49         self.path = path    50         self.cache = cache    51         self.verbose = verbose    52     53         self.to_import = set()    54         self.required = set(["__main__"])    55         self.removed = {}    56     57         self.modules = {}    58         self.accessing_modules = {}    59         self.invalidated = set()    60     61         self.objects = {}    62         self.classes = {}    63         self.function_parameters = {}    64         self.function_defaults = {}    65         self.function_targets = {}    66         self.function_arguments = {}    67     68         # Derived information.    69     70         self.subclasses = {}    71     72         # Attributes of different object types.    73     74         self.all_class_attrs = {}    75         self.all_instance_attrs = {}    76         self.all_instance_attr_constants = {}    77         self.all_combined_attrs = {}    78         self.all_module_attrs = {}    79         self.all_shadowed_attrs = {}    80     81         # References to external names and aliases within program units.    82     83         self.all_name_references = {}    84         self.all_initialised_names = {}    85         self.all_aliased_names = {}    86     87         # General attribute accesses.    88     89         self.all_attr_accesses = {}    90         self.all_const_accesses = {}    91         self.all_attr_access_modifiers = {}    92     93         # Constant literals and values.    94     95         self.all_constants = {}    96         self.all_constant_values = {}    97     98         self.make_cache()    99    100     def make_cache(self):   101         if self.cache and not exists(self.cache):   102             makedirs(self.cache)   103    104     def check_cache(self, details):   105    106         """   107         Check whether the cache applies for the given 'details', invalidating it   108         if it does not.   109         """   110    111         recorded_details = self.get_cache_details()   112    113         if recorded_details != details:   114             self.remove_cache()   115    116         writefile(self.get_cache_details_filename(), details)   117    118     def get_cache_details_filename(self):   119    120         "Return the filename for the cache details."   121    122         return join(self.cache, "$details")   123    124     def get_cache_details(self):   125    126         "Return details of the cache."   127    128         details_filename = self.get_cache_details_filename()   129    130         if not exists(details_filename):   131             return None   132         else:   133             return readfile(details_filename)   134    135     def remove_cache(self):   136    137         "Remove the contents of the cache."   138    139         for filename in listdir(self.cache):   140             remove(join(self.cache, filename))   141    142     def to_cache(self):   143    144         "Write modules to the cache."   145    146         if self.cache:   147             for module_name, module in self.modules.items():   148                 module.to_cache(join(self.cache, module_name))   149    150     # Object retrieval and storage.   151    152     def get_object(self, name):   153    154         """   155         Return a reference for the given 'name' or None if no such object   156         exists.   157         """   158    159         return self.objects.get(name)   160    161     def set_object(self, name, value=None):   162    163         "Set the object with the given 'name' and the given 'value'."   164    165         if isinstance(value, Reference):   166             ref = value.alias(name)   167         else:   168             ref = Reference(value, name)   169    170         self.objects[name] = ref   171    172     # Indirect object retrieval.   173    174     def get_attributes(self, ref, attrname):   175    176         """   177         Return attributes provided by 'ref' for 'attrname'. Class attributes   178         may be provided by instances.   179         """   180    181         kind = ref.get_kind()   182         if kind == "<class>":   183             ref = self.get_class_attribute(ref.get_origin(), attrname)   184             return ref and set([ref]) or set()   185         elif kind == "<instance>":   186             return self.get_combined_attributes(ref.get_origin(), attrname)   187         elif kind == "<module>":   188             ref = self.get_module_attribute(ref.get_origin(), attrname)   189             return ref and set([ref]) or set()   190         else:   191             return set()   192    193     def get_class_attribute(self, object_type, attrname):   194    195         "Return from 'object_type' the details of class attribute 'attrname'."   196    197         attr = self.all_class_attrs[object_type].get(attrname)   198         return attr and self.get_object(attr)   199    200     def get_instance_attributes(self, object_type, attrname):   201    202         """   203         Return from 'object_type' the details of instance attribute 'attrname'.   204         """   205    206         consts = self.all_instance_attr_constants.get(object_type)   207         attrs = set()   208         for attr in self.all_instance_attrs[object_type].get(attrname, []):   209             attrs.add(consts and consts.get(attrname) or Reference("<var>", attr))   210         return attrs   211    212     def get_combined_attributes(self, object_type, attrname):   213    214         """   215         Return from 'object_type' the details of class or instance attribute   216         'attrname'.   217         """   218    219         ref = self.get_class_attribute(object_type, attrname)   220         refs = ref and set([ref]) or set()   221         refs.update(self.get_instance_attributes(object_type, attrname))   222         return refs   223    224     def get_module_attribute(self, object_type, attrname):   225    226         "Return from 'object_type' the details of module attribute 'attrname'."   227    228         if attrname in self.all_module_attrs[object_type]:   229             return self.get_object("%s.%s" % (object_type, attrname))   230         else:   231             return None   232    233     # Module management.   234    235     def queue_module(self, name, accessor, required=False):   236    237         """   238         Queue the module with the given 'name' for import from the given   239         'accessor' module. If 'required' is true (it is false by default), the   240         module will be required in the final program.   241         """   242    243         if not self.modules.has_key(name):   244             self.to_import.add(name)   245    246         if required:   247             self.required.add(name)   248    249         init_item(self.accessing_modules, name, set)   250         self.accessing_modules[name].add(accessor.name)   251    252     def get_modules(self):   253    254         "Return all modules known to the importer."   255    256         return self.modules.values()   257    258     def get_module(self, name):   259    260         "Return the module with the given 'name'."   261    262         if not self.modules.has_key(name):   263             return None   264    265         return self.modules[name]   266    267     # Program operations.   268    269     def initialise(self, filename, reset=False):   270    271         """   272         Initialise a program whose main module is 'filename', resetting the   273         cache if 'reset' is true. Return the main module.   274         """   275    276         if reset:   277             self.remove_cache()   278         self.check_cache(filename)   279    280         # Load the program itself.   281    282         m = self.load_from_file(filename)   283    284         # Load any queued modules.   285    286         while self.to_import:   287             for name in list(self.to_import): # avoid mutation issue   288                 self.load(name)   289    290         # Resolve dependencies between modules.   291    292         self.resolve()   293    294         # Record the type of all classes.   295    296         self.type_ref = self.get_object("__builtins__.type")   297    298         # Resolve dependencies within the program.   299    300         for module in self.modules.values():   301             module.complete()   302    303         # Remove unneeded modules.   304    305         all_modules = self.modules.items()   306    307         for name, module in all_modules:   308             if name not in self.required:   309                 module.unpropagate()   310                 del self.modules[name]   311                 self.removed[name] = module   312    313         return m   314    315     def finalise(self):   316    317         "Finalise the inspected program."   318    319         self.finalise_classes()   320         self.to_cache()   321         self.set_class_types()   322         self.define_instantiators()   323         self.collect_constants()   324    325     # Supporting operations.   326    327     def resolve(self):   328    329         "Resolve dependencies between modules."   330    331         resolved = {}   332    333         for name, ref in self.objects.items():   334             if ref.has_kind("<depends>"):   335                 found = self.find_dependency(ref)   336                 if found:   337                     resolved[name] = found   338                 else:   339                     print >>sys.stderr, "Name %s references an unknown object: %s" % (name, ref.get_origin())   340    341         # Record the resolved names and identify required modules.   342    343         for name, ref in resolved.items():   344             self.objects[name] = ref   345    346             module_name = self.get_module_provider(ref)   347             if module_name:   348                 self.required.add(module_name)   349    350     def find_dependency(self, ref):   351    352         "Find the ultimate dependency for 'ref'."   353    354         found = set()   355         while ref and ref.has_kind("<depends>") and not ref in found:   356             found.add(ref)   357             ref = self.objects.get(ref.get_origin())   358         return ref   359    360     def get_module_provider(self, ref):   361    362         "Identify the provider of the given 'ref'."   363    364         for ancestor in ref.ancestors():   365             if self.modules.has_key(ancestor):   366                 return ancestor   367         return None   368    369     def finalise_classes(self):   370    371         "Finalise the class relationships and attributes."   372    373         self.derive_inherited_attrs()   374         self.derive_subclasses()   375         self.derive_shadowed_attrs()   376    377     def derive_inherited_attrs(self):   378    379         "Derive inherited attributes for classes throughout the program."   380    381         for name in self.classes.keys():   382             self.propagate_attrs_for_class(name)   383    384     def propagate_attrs_for_class(self, name, visited=None):   385    386         "Propagate inherited attributes for class 'name'."   387    388         # Visit classes only once.   389    390         if self.all_combined_attrs.has_key(name):   391             return   392    393         visited = visited or []   394    395         if name in visited:   396             raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name)   397    398         visited.append(name)   399    400         class_attrs = {}   401         instance_attrs = {}   402    403         # Aggregate the attributes from base classes, recording the origins of   404         # applicable attributes.   405    406         for base in self.classes[name][::-1]:   407    408             # Get the identity of the class from the reference.   409    410             base = base.get_origin()   411    412             # Define the base class completely before continuing with this   413             # class.   414    415             self.propagate_attrs_for_class(base, visited)   416             class_attrs.update(self.all_class_attrs[base])   417    418             # Instance attribute origins are combined if different.   419    420             for key, values in self.all_instance_attrs[base].items():   421                 init_item(instance_attrs, key, set)   422                 instance_attrs[key].update(values)   423    424         # Class attributes override those defined earlier in the hierarchy.   425    426         class_attrs.update(self.all_class_attrs.get(name, {}))   427    428         # Instance attributes are merely added if not already defined.   429    430         for key in self.all_instance_attrs.get(name, []):   431             if not instance_attrs.has_key(key):   432                 instance_attrs[key] = set(["%s.%s" % (name, key)])   433    434         self.all_class_attrs[name] = class_attrs   435         self.all_instance_attrs[name] = instance_attrs   436         self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys())   437    438     def derive_subclasses(self):   439    440         "Derive subclass details for classes."   441    442         for name, bases in self.classes.items():   443             for base in bases:   444    445                 # Get the identity of the class from the reference.   446    447                 base = base.get_origin()   448                 self.subclasses[base].add(name)   449    450     def derive_shadowed_attrs(self):   451    452         "Derive shadowed attributes for classes."   453    454         for name, attrs in self.all_instance_attrs.items():   455             attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys())   456             if attrs:   457                 self.all_shadowed_attrs[name] = attrs   458    459     def set_class_types(self):   460    461         "Set the type of each class."   462    463         for attrs in self.all_class_attrs.values():   464             attrs["__class__"] = self.type_ref.get_origin()   465    466     def define_instantiators(self):   467    468         """   469         Consolidate parameter and default details, incorporating initialiser   470         details to define instantiator signatures.   471         """   472    473         for cls, attrs in self.all_class_attrs.items():   474             initialiser = attrs["__init__"]   475             self.function_parameters[cls] = self.function_parameters[initialiser][1:]   476             self.function_defaults[cls] = self.function_defaults[initialiser]   477    478     def collect_constants(self):   479    480         "Get constants from all active modules."   481    482         for module in self.modules.values():   483             self.all_constants.update(module.constants)   484    485     # Import methods.   486    487     def find_in_path(self, name):   488    489         """   490         Find the given module 'name' in the search path, returning None where no   491         such module could be found, or a 2-tuple from the 'find' method   492         otherwise.   493         """   494    495         for d in self.path:   496             m = self.find(d, name)   497             if m: return m   498         return None   499    500     def find(self, d, name):   501    502         """   503         In the directory 'd', find the given module 'name', where 'name' can   504         either refer to a single file module or to a package. Return None if the   505         'name' cannot be associated with either a file or a package directory,   506         or a 2-tuple from '_find_package' or '_find_module' otherwise.   507         """   508    509         m = self._find_package(d, name)   510         if m: return m   511         m = self._find_module(d, name)   512         if m: return m   513         return None   514    515     def _find_module(self, d, name):   516    517         """   518         In the directory 'd', find the given module 'name', returning None where   519         no suitable file exists in the directory, or a 2-tuple consisting of   520         None (indicating that no package directory is involved) and a filename   521         indicating the location of the module.   522         """   523    524         name_py = name + extsep + "py"   525         filename = self._find_file(d, name_py)   526         if filename:   527             return None, filename   528         return None   529    530     def _find_package(self, d, name):   531    532         """   533         In the directory 'd', find the given package 'name', returning None   534         where no suitable package directory exists, or a 2-tuple consisting of   535         a directory (indicating the location of the package directory itself)   536         and a filename indicating the location of the __init__.py module which   537         declares the package's top-level contents.   538         """   539    540         filename = self._find_file(d, name)   541         if filename:   542             init_py = "__init__" + extsep + "py"   543             init_py_filename = self._find_file(filename, init_py)   544             if init_py_filename:   545                 return filename, init_py_filename   546         return None   547    548     def _find_file(self, d, filename):   549    550         """   551         Return the filename obtained when searching the directory 'd' for the   552         given 'filename', or None if no actual file exists for the filename.   553         """   554    555         filename = join(d, filename)   556         if exists(filename):   557             return filename   558         else:   559             return None   560    561     def load(self, name):   562    563         """   564         Load the module or package with the given 'name'. Return an object   565         referencing the loaded module or package, or None if no such module or   566         package exists.   567         """   568    569         # Loaded modules are returned immediately.   570         # Modules may be known but not yet loading (having been registered as   571         # submodules), loading, loaded, or completely unknown.   572    573         module = self.get_module(name)   574    575         if module:   576             return self.modules[name]   577    578         # Otherwise, modules are loaded.   579    580         if self.verbose:   581             print >>sys.stderr, "Loading", name   582    583         # Split the name into path components, and try to find the uppermost in   584         # the search path.   585    586         path = name.split(".")   587         path_so_far = []   588         module = None   589    590         for p in path:   591    592             # Get the module's filesystem details.   593    594             if not path_so_far:   595                 m = self.find_in_path(p)   596             elif d:   597                 m = self.find(d, p)   598             else:   599                 m = None   600    601             path_so_far.append(p)   602             module_name = ".".join(path_so_far)   603    604             if not m:   605                 if self.verbose:   606                     print >>sys.stderr, "Not found (%s)" % name   607    608                 return None # NOTE: Import error.   609    610             # Get the module itself.   611    612             d, filename = m   613             module = self.load_from_file(filename, module_name)   614    615         return module   616    617     def load_from_file(self, filename, module_name=None):   618    619         "Load the module from the given 'filename'."   620    621         if module_name is None:   622             module_name = "__main__"   623    624         module = self.modules.get(module_name)   625    626         if not module:   627    628             # Try to load from cache.   629    630             module = self.load_from_cache(filename, module_name)   631             if module:   632                 return module   633    634             # If no cache entry exists, load from file.   635    636             module = inspector.InspectedModule(module_name, self)   637             self.add_module(module_name, module)   638             self.update_cache_validity(module)   639    640             self._load(module, module_name, lambda m: m.parse, filename)   641    642         return module   643    644     def update_cache_validity(self, module):   645    646         "Make 'module' valid in the cache, but invalidate accessing modules."   647    648         accessing = self.accessing_modules.get(module.name)   649         if accessing:   650             self.invalidated.update(accessing)   651         if module.name in self.invalidated:   652             self.invalidated.remove(module.name)   653    654     def source_is_new(self, filename, module_name):   655    656         "Return whether 'filename' is newer than the cached 'module_name'."   657    658         if self.cache:   659             cache_filename = join(self.cache, module_name)   660             return not exists(cache_filename) or \   661                 getmtime(filename) > getmtime(cache_filename) or \   662                 module_name in self.invalidated   663         else:   664             return True   665    666     def load_from_cache(self, filename, module_name):   667    668         "Return a module residing in the cache."   669    670         module = self.modules.get(module_name)   671    672         if not module and not self.source_is_new(filename, module_name):   673             module = CachedModule(module_name, self)   674             self.add_module(module_name, module)   675    676             filename = join(self.cache, module_name)   677             self._load(module, module_name, lambda m: m.from_cache, filename)   678    679         return module   680    681     def _load(self, module, module_name, fn, filename):   682    683         """   684         Load 'module' for the given 'module_name', and with 'fn' performing an   685         invocation on the module with the given 'filename'.   686         """   687    688         # Load the module.   689    690         if self.verbose:   691             print >>sys.stderr, "Loading", filename   692         fn(module)(filename)   693         if self.verbose:   694             print >>sys.stderr, "Loaded", filename   695    696     def add_module(self, module_name, module):   697    698         """   699         Return the module with the given 'module_name', adding a new module   700         object if one does not already exist.   701         """   702    703         self.modules[module_name] = module   704         self.objects[module_name] = Reference("<module>", module_name)   705         if module_name in self.to_import:   706             self.to_import.remove(module_name)   707    708 # vim: tabstop=4 expandtab shiftwidth=4