Lichen

importer.py

358:3c11956e943f
2016-12-09 Paul Boddie Report missing symbols only after serialising the module data.
     1 #!/usr/bin/env python     2      3 """     4 Import logic.     5      6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,     7               2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk>     8      9 This program is free software; you can redistribute it and/or modify it under    10 the terms of the GNU General Public License as published by the Free Software    11 Foundation; either version 3 of the License, or (at your option) any later    12 version.    13     14 This program is distributed in the hope that it will be useful, but WITHOUT    15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    16 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    17 details.    18     19 You should have received a copy of the GNU General Public License along with    20 this program.  If not, see <http://www.gnu.org/licenses/>.    21 """    22     23 from errors import ProgramError    24 from os.path import exists, extsep, getmtime, join    25 from os import listdir, makedirs, remove    26 from common import init_item, readfile, writefile    27 from modules import CachedModule    28 from referencing import Reference    29 import inspector    30 import sys    31     32 class Importer:    33     34     "An import machine, searching for and loading modules."    35     36     def __init__(self, path, cache=None, verbose=False):    37     38         """    39         Initialise the importer with the given search 'path' - a list of    40         directories to search for Python modules.    41     42         The optional 'cache' should be the name of a directory used to store    43         cached module information.    44     45         The optional 'verbose' parameter causes output concerning the activities    46         of the object to be produced if set to a true value (not the default).    47         """    48     49         self.path = path    50         self.cache = cache    51         self.verbose = verbose    52     53         # Module importing queue, required modules, removed modules and active    54         # modules in the final program.    55     56         self.to_import = set()    57         self.required = set(["__main__"])    58         self.removed = {}    59         self.modules = {}    60     61         # Module relationships and invalidated cached modules.    62     63         self.accessing_modules = {}    64         self.invalidated = set()    65     66         # Basic program information.    67     68         self.objects = {}    69         self.classes = {}    70         self.function_parameters = {}    71         self.function_defaults = {}    72         self.function_locals = {}    73         self.function_targets = {}    74         self.function_arguments = {}    75     76         # Unresolved names.    77     78         self.missing = set()    79     80         # Derived information.    81     82         self.subclasses = {}    83     84         # Attributes of different object types.    85     86         self.all_class_attrs = {}    87         self.all_instance_attrs = {}    88         self.all_instance_attr_constants = {}    89         self.all_combined_attrs = {}    90         self.all_module_attrs = {}    91         self.all_shadowed_attrs = {}    92     93         # References to external names and aliases within program units.    94     95         self.all_name_references = {}    96         self.all_initialised_names = {}    97         self.all_aliased_names = {}    98     99         # General attribute accesses.   100    101         self.all_attr_accesses = {}   102         self.all_const_accesses = {}   103         self.all_attr_access_modifiers = {}   104    105         # Constant literals and values.   106    107         self.all_constants = {}   108         self.all_constant_values = {}   109    110         self.make_cache()   111    112     def make_cache(self):   113         if self.cache and not exists(self.cache):   114             makedirs(self.cache)   115    116     def check_cache(self, details):   117    118         """   119         Check whether the cache applies for the given 'details', invalidating it   120         if it does not.   121         """   122    123         recorded_details = self.get_cache_details()   124    125         if recorded_details != details:   126             self.remove_cache()   127    128         writefile(self.get_cache_details_filename(), details)   129    130     def get_cache_details_filename(self):   131    132         "Return the filename for the cache details."   133    134         return join(self.cache, "$details")   135    136     def get_cache_details(self):   137    138         "Return details of the cache."   139    140         details_filename = self.get_cache_details_filename()   141    142         if not exists(details_filename):   143             return None   144         else:   145             return readfile(details_filename)   146    147     def remove_cache(self):   148    149         "Remove the contents of the cache."   150    151         for filename in listdir(self.cache):   152             remove(join(self.cache, filename))   153    154     def to_cache(self):   155    156         "Write modules to the cache."   157    158         if self.cache:   159             for module_name, module in self.modules.items():   160                 module.to_cache(join(self.cache, module_name))   161    162     # Object retrieval and storage.   163    164     def get_object(self, name):   165    166         """   167         Return a reference for the given 'name' or None if no such object   168         exists.   169         """   170    171         return self.objects.get(name)   172    173     def set_object(self, name, value=None):   174    175         "Set the object with the given 'name' and the given 'value'."   176    177         if isinstance(value, Reference):   178             ref = value.alias(name)   179         else:   180             ref = Reference(value, name)   181    182         self.objects[name] = ref   183    184     # Identification of both stored object names and name references.   185    186     def identify(self, name):   187    188         "Identify 'name' using stored object and external name records."   189    190         return self.objects.get(name) or self.all_name_references.get(name)   191    192     # Indirect object retrieval.   193    194     def get_attributes(self, ref, attrname):   195    196         """   197         Return attributes provided by 'ref' for 'attrname'. Class attributes   198         may be provided by instances.   199         """   200    201         kind = ref.get_kind()   202         if kind == "<class>":   203             ref = self.get_class_attribute(ref.get_origin(), attrname)   204             return ref and set([ref]) or set()   205         elif kind == "<instance>":   206             return self.get_combined_attributes(ref.get_origin(), attrname)   207         elif kind == "<module>":   208             ref = self.get_module_attribute(ref.get_origin(), attrname)   209             return ref and set([ref]) or set()   210         else:   211             return set()   212    213     def get_class_attribute(self, object_type, attrname):   214    215         "Return from 'object_type' the details of class attribute 'attrname'."   216    217         attrs = self.all_class_attrs.get(object_type)   218         attr = attrs and attrs.get(attrname)   219         return attr and self.get_object(attr)   220    221     def get_instance_attributes(self, object_type, attrname):   222    223         """   224         Return from 'object_type' the details of instance attribute 'attrname'.   225         """   226    227         consts = self.all_instance_attr_constants.get(object_type)   228         attrs = set()   229         for attr in self.all_instance_attrs[object_type].get(attrname, []):   230             attrs.add(consts and consts.get(attrname) or Reference("<var>", attr))   231         return attrs   232    233     def get_combined_attributes(self, object_type, attrname):   234    235         """   236         Return from 'object_type' the details of class or instance attribute   237         'attrname'.   238         """   239    240         ref = self.get_class_attribute(object_type, attrname)   241         refs = ref and set([ref]) or set()   242         refs.update(self.get_instance_attributes(object_type, attrname))   243         return refs   244    245     def get_module_attribute(self, object_type, attrname):   246    247         "Return from 'object_type' the details of module attribute 'attrname'."   248    249         if attrname in self.all_module_attrs[object_type]:   250             return self.get_object("%s.%s" % (object_type, attrname))   251         else:   252             return None   253    254     # Convenience methods for deducing which kind of object provided an   255     # attribute.   256    257     def get_attribute_provider(self, ref, attrname):   258    259         """   260         Return the kind of provider of the attribute accessed via 'ref' using   261         'attrname'.   262         """   263    264         kind = ref.get_kind()   265    266         if kind in ["<class>", "<module>"]:   267             return kind   268         else:   269             return self.get_instance_attribute_provider(ref.get_origin(), attrname)   270    271     def get_instance_attribute_provider(self, object_type, attrname):   272    273         """   274         Return the kind of provider of the attribute accessed via an instance of   275         'object_type' using 'attrname'.   276         """   277    278         if self.get_class_attribute(object_type, attrname):   279             return "<class>"   280         else:   281             return "<instance>"   282    283     # Module management.   284    285     def queue_module(self, name, accessor, required=False):   286    287         """   288         Queue the module with the given 'name' for import from the given   289         'accessor' module. If 'required' is true (it is false by default), the   290         module will be required in the final program.   291         """   292    293         if not self.modules.has_key(name):   294             self.to_import.add(name)   295    296         if required:   297             self.required.add(name)   298    299         init_item(self.accessing_modules, name, set)   300         self.accessing_modules[name].add(accessor.name)   301    302     def get_modules(self):   303    304         "Return all modules known to the importer."   305    306         return self.modules.values()   307    308     def get_module(self, name):   309    310         "Return the module with the given 'name'."   311    312         if not self.modules.has_key(name):   313             return None   314    315         return self.modules[name]   316    317     # Program operations.   318    319     def initialise(self, filename, reset=False):   320    321         """   322         Initialise a program whose main module is 'filename', resetting the   323         cache if 'reset' is true. Return the main module.   324         """   325    326         if reset:   327             self.remove_cache()   328         self.check_cache(filename)   329    330         # Load the program itself.   331    332         m = self.load_from_file(filename)   333    334         # Load any queued modules.   335    336         while self.to_import:   337             for name in list(self.to_import): # avoid mutation issue   338                 self.load(name)   339    340         # Resolve dependencies between modules.   341    342         self.resolve()   343    344         # Record the type of all classes.   345    346         self.type_ref = self.get_object("__builtins__.type")   347    348         # Resolve dependencies within the program.   349    350         for module in self.modules.values():   351             module.complete()   352    353         # Remove unneeded modules.   354    355         all_modules = self.modules.items()   356    357         for name, module in all_modules:   358             if name not in self.required:   359                 module.unpropagate()   360                 del self.modules[name]   361                 self.removed[name] = module   362    363         # Collect redundant objects.   364    365         for module in self.removed.values():   366             module.collect()   367    368         # Assert module objects where aliases have been removed.   369    370         for name in self.required:   371             if not self.objects.has_key(name):   372                 self.objects[name] = Reference("<module>", name)   373    374         return m   375    376     def finalise(self):   377    378         """   379         Finalise the inspected program, returning whether the program could be   380         finalised.   381         """   382    383         self.finalise_classes()   384         self.to_cache()   385    386         if self.missing:   387             return False   388    389         self.set_class_types()   390         self.define_instantiators()   391         self.collect_constants()   392    393         return True   394    395     # Supporting operations.   396    397     def resolve(self):   398    399         "Resolve dependencies between modules."   400    401         self.waiting = {}   402         self.depends = {}   403    404         for module in self.modules.values():   405    406             # Resolve all deferred references in each module.   407    408             for ref in module.deferred:   409                 found = self.find_dependency(ref)   410                 if not found:   411                     self.missing.add((module.name, ref.get_origin()))   412    413                 # Record the resolved names and identify required modules.   414    415                 else:   416                     # Find the providing module of this reference.   417                     # Where definitive details of the origin cannot be found,   418                     # identify the provider using the deferred reference.   419                     # NOTE: This may need to test for static origins.   420    421                     provider = self.get_module_provider(found.unresolved() and ref or found)   422                     ref.mutate(found)   423    424                     # Record any external dependency.   425    426                     if provider and provider != module.name:   427    428                         # Record the provider dependency.   429    430                         module.required.add(provider)   431                         self.accessing_modules[provider].add(module.name)   432    433                         # Postpone any inclusion of the provider until this   434                         # module becomes required.   435    436                         if module.name not in self.required:   437                             init_item(self.waiting, module.name, set)   438                             self.waiting[module.name].add(provider)   439    440                         # Make this module required in the accessing module.   441    442                         elif provider not in self.required:   443                             self.required.add(provider)   444                             if self.verbose:   445                                 print >>sys.stderr, "Requiring", provider, "for", ref   446    447                         # Record a module ordering dependency.   448    449                         if not found.static() or self.uses_dynamic_callable(found):   450                             init_item(self.depends, module.name, set)   451                             self.depends[module.name].add(provider)   452    453         # Check modules again to see if they are now required and should now   454         # cause the inclusion of other modules providing objects to the program.   455    456         for module_name in self.waiting.keys():   457             self.require_providers(module_name)   458    459     def require_providers(self, module_name):   460    461         """   462         Test if 'module_name' is itself required and, if so, require modules   463         containing objects provided to the module.   464         """   465    466         if module_name in self.required and self.waiting.has_key(module_name):   467             for provider in self.waiting[module_name]:   468                 if provider not in self.required:   469                     self.required.add(provider)   470                     if self.verbose:   471                         print >>sys.stderr, "Requiring", provider   472                     self.require_providers(provider)   473    474     def uses_dynamic_callable(self, ref):   475    476         """   477         Return whether 'ref' refers to a callable employing defaults that may   478         need initialising before the callable can be used.   479         """   480    481         # Find the function or method associated with the reference.   482    483         if ref.has_kind("<function>"):   484             origin = ref.get_origin()   485         elif ref.has_kind("<class>"):   486             origin = "%s.__init__" % ref.get_origin()   487         else:   488             return False   489    490         # Find any defaults for the function or method.   491    492         defaults = self.function_defaults.get(origin)   493         if not defaults:   494             return False   495    496         # Identify non-constant defaults.   497    498         for name, ref in defaults:   499             if not ref.is_constant_alias():   500                 return True   501    502         return False   503    504     def order_modules(self):   505    506         "Produce a module initialisation ordering."   507    508         self.check_ordering()   509    510         module_names = self.modules.keys()   511    512         # Record the number of modules using or depending on each module.   513    514         usage = {}   515    516         for module_name in module_names:   517             usage[module_name] = 0   518    519         for module_name, depend_names in self.depends.items():   520             if module_name in module_names:   521                 for depend_name in depend_names:   522                     if depend_name in module_names:   523                         usage[depend_name] += 1   524    525         # Produce an ordering by obtaining exposed modules (required by modules   526         # already processed) and putting them at the start of the list.   527    528         ordered = []   529    530         while usage:   531             for module_name, n in usage.items():   532                 if n == 0:   533                     ordered.insert(0, module_name)   534                     module_names = self.depends.get(module_name)   535    536                     # Reduce usage of the referenced modules.   537    538                     if module_names:   539                         for name in module_names:   540                             usage[name] -= 1   541    542                     del usage[module_name]   543    544         ordered.remove("__main__")   545         ordered.append("__main__")   546         return ordered   547    548     def check_ordering(self):   549    550         "Check the ordering dependencies."   551    552         for module_name, modules in self.depends.items():   553             for provider in modules:   554                 if self.depends.has_key(provider) and module_name in self.depends[provider]:   555                     raise ProgramError, "Modules %s and %s may not depend on each other for non-static objects." % (module_name, provider)   556    557     def find_dependency(self, ref):   558    559         "Find the ultimate dependency for 'ref'."   560    561         found = set()   562         while ref and ref.has_kind("<depends>") and not ref in found:   563             found.add(ref)   564             ref = self.identify(ref.get_origin())   565         return ref   566    567     def get_module_provider(self, ref):   568    569         "Identify the provider of the given 'ref'."   570    571         for ancestor in ref.ancestors():   572             if self.modules.has_key(ancestor):   573                 return ancestor   574         return None   575    576     def finalise_classes(self):   577    578         "Finalise the class relationships and attributes."   579    580         self.derive_inherited_attrs()   581         self.derive_subclasses()   582         self.derive_shadowed_attrs()   583    584     def derive_inherited_attrs(self):   585    586         "Derive inherited attributes for classes throughout the program."   587    588         for name in self.classes.keys():   589             self.propagate_attrs_for_class(name)   590    591     def propagate_attrs_for_class(self, name, visited=None):   592    593         "Propagate inherited attributes for class 'name'."   594    595         # Visit classes only once.   596    597         if self.all_combined_attrs.has_key(name):   598             return   599    600         visited = visited or []   601    602         if name in visited:   603             raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name)   604    605         visited.append(name)   606    607         class_attrs = {}   608         instance_attrs = {}   609    610         # Aggregate the attributes from base classes, recording the origins of   611         # applicable attributes.   612    613         for base in self.classes[name][::-1]:   614    615             # Get the identity of the class from the reference.   616    617             base = base.get_origin()   618    619             # Define the base class completely before continuing with this   620             # class.   621    622             self.propagate_attrs_for_class(base, visited)   623             class_attrs.update(self.all_class_attrs[base])   624    625             # Instance attribute origins are combined if different.   626    627             for key, values in self.all_instance_attrs[base].items():   628                 init_item(instance_attrs, key, set)   629                 instance_attrs[key].update(values)   630    631         # Class attributes override those defined earlier in the hierarchy.   632    633         class_attrs.update(self.all_class_attrs.get(name, {}))   634    635         # Instance attributes are merely added if not already defined.   636    637         for key in self.all_instance_attrs.get(name, []):   638             if not instance_attrs.has_key(key):   639                 instance_attrs[key] = set(["%s.%s" % (name, key)])   640    641         self.all_class_attrs[name] = class_attrs   642         self.all_instance_attrs[name] = instance_attrs   643         self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys())   644    645     def derive_subclasses(self):   646    647         "Derive subclass details for classes."   648    649         for name, bases in self.classes.items():   650             for base in bases:   651    652                 # Get the identity of the class from the reference.   653    654                 base = base.get_origin()   655                 self.subclasses[base].add(name)   656    657     def derive_shadowed_attrs(self):   658    659         "Derive shadowed attributes for classes."   660    661         for name, attrs in self.all_instance_attrs.items():   662             attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys())   663             if attrs:   664                 self.all_shadowed_attrs[name] = attrs   665    666     def set_class_types(self):   667    668         "Set the type of each class."   669    670         for attrs in self.all_class_attrs.values():   671             attrs["__class__"] = self.type_ref.get_origin()   672    673     def define_instantiators(self):   674    675         """   676         Consolidate parameter and default details, incorporating initialiser   677         details to define instantiator signatures.   678         """   679    680         for cls, attrs in self.all_class_attrs.items():   681             initialiser = attrs["__init__"]   682             self.function_parameters[cls] = self.function_parameters[initialiser]   683             self.function_defaults[cls] = self.function_defaults[initialiser]   684    685     def collect_constants(self):   686    687         "Get constants from all active modules."   688    689         for module in self.modules.values():   690             self.all_constants.update(module.constants)   691    692     # Import methods.   693    694     def find_in_path(self, name):   695    696         """   697         Find the given module 'name' in the search path, returning None where no   698         such module could be found, or a 2-tuple from the 'find' method   699         otherwise.   700         """   701    702         for d in self.path:   703             m = self.find(d, name)   704             if m: return m   705         return None   706    707     def find(self, d, name):   708    709         """   710         In the directory 'd', find the given module 'name', where 'name' can   711         either refer to a single file module or to a package. Return None if the   712         'name' cannot be associated with either a file or a package directory,   713         or a 2-tuple from '_find_package' or '_find_module' otherwise.   714         """   715    716         m = self._find_package(d, name)   717         if m: return m   718         m = self._find_module(d, name)   719         if m: return m   720         return None   721    722     def _find_module(self, d, name):   723    724         """   725         In the directory 'd', find the given module 'name', returning None where   726         no suitable file exists in the directory, or a 2-tuple consisting of   727         None (indicating that no package directory is involved) and a filename   728         indicating the location of the module.   729         """   730    731         name_py = name + extsep + "py"   732         filename = self._find_file(d, name_py)   733         if filename:   734             return None, filename   735         return None   736    737     def _find_package(self, d, name):   738    739         """   740         In the directory 'd', find the given package 'name', returning None   741         where no suitable package directory exists, or a 2-tuple consisting of   742         a directory (indicating the location of the package directory itself)   743         and a filename indicating the location of the __init__.py module which   744         declares the package's top-level contents.   745         """   746    747         filename = self._find_file(d, name)   748         if filename:   749             init_py = "__init__" + extsep + "py"   750             init_py_filename = self._find_file(filename, init_py)   751             if init_py_filename:   752                 return filename, init_py_filename   753         return None   754    755     def _find_file(self, d, filename):   756    757         """   758         Return the filename obtained when searching the directory 'd' for the   759         given 'filename', or None if no actual file exists for the filename.   760         """   761    762         filename = join(d, filename)   763         if exists(filename):   764             return filename   765         else:   766             return None   767    768     def load(self, name):   769    770         """   771         Load the module or package with the given 'name'. Return an object   772         referencing the loaded module or package, or None if no such module or   773         package exists.   774         """   775    776         # Loaded modules are returned immediately.   777         # Modules may be known but not yet loading (having been registered as   778         # submodules), loading, loaded, or completely unknown.   779    780         module = self.get_module(name)   781    782         if module:   783             return self.modules[name]   784    785         # Otherwise, modules are loaded.   786    787         # Split the name into path components, and try to find the uppermost in   788         # the search path.   789    790         path = name.split(".")   791         path_so_far = []   792         module = None   793    794         for p in path:   795    796             # Get the module's filesystem details.   797    798             if not path_so_far:   799                 m = self.find_in_path(p)   800             elif d:   801                 m = self.find(d, p)   802             else:   803                 m = None   804    805             path_so_far.append(p)   806             module_name = ".".join(path_so_far)   807    808             # Return None if the module could not be located.   809    810             if not m:   811                 if self.verbose:   812                     print >>sys.stderr, "Not found (%s)" % name   813                 return None   814    815             # Get the directory and module filename.   816    817             d, filename = m   818    819         # Get the module itself.   820    821         return self.load_from_file(filename, module_name)   822    823     def load_from_file(self, filename, module_name=None):   824    825         "Load the module from the given 'filename'."   826    827         if module_name is None:   828             module_name = "__main__"   829    830         module = self.modules.get(module_name)   831    832         if not module:   833    834             # Try to load from cache.   835    836             module = self.load_from_cache(filename, module_name)   837             if module:   838                 return module   839    840             # If no cache entry exists, load from file.   841    842             module = inspector.InspectedModule(module_name, self)   843             self.add_module(module_name, module)   844             self.update_cache_validity(module)   845    846             self._load(module, module_name, lambda m: m.parse, filename)   847    848         return module   849    850     def update_cache_validity(self, module):   851    852         "Make 'module' valid in the cache, but invalidate accessing modules."   853    854         accessing = self.accessing_modules.get(module.name)   855         if accessing:   856             self.invalidated.update(accessing)   857         if module.name in self.invalidated:   858             self.invalidated.remove(module.name)   859    860     def source_is_new(self, filename, module_name):   861    862         "Return whether 'filename' is newer than the cached 'module_name'."   863    864         if self.cache:   865             cache_filename = join(self.cache, module_name)   866             return not exists(cache_filename) or \   867                 getmtime(filename) > getmtime(cache_filename) or \   868                 module_name in self.invalidated   869         else:   870             return True   871    872     def load_from_cache(self, filename, module_name):   873    874         "Return a module residing in the cache."   875    876         module = self.modules.get(module_name)   877    878         if not module and not self.source_is_new(filename, module_name):   879             module = CachedModule(module_name, self)   880             self.add_module(module_name, module)   881    882             filename = join(self.cache, module_name)   883             self._load(module, module_name, lambda m: m.from_cache, filename)   884    885         return module   886    887     def _load(self, module, module_name, fn, filename):   888    889         """   890         Load 'module' for the given 'module_name', and with 'fn' performing an   891         invocation on the module with the given 'filename'.   892         """   893    894         # Load the module.   895    896         if self.verbose:   897             print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename   898         fn(module)(filename)   899    900         # Add the module object if not already defined.   901    902         if not self.objects.has_key(module_name):   903             self.objects[module_name] = Reference("<module>", module_name)   904    905     def add_module(self, module_name, module):   906    907         """   908         Return the module with the given 'module_name', adding a new module   909         object if one does not already exist.   910         """   911    912         self.modules[module_name] = module   913         if module_name in self.to_import:   914             self.to_import.remove(module_name)   915    916 # vim: tabstop=4 expandtab shiftwidth=4