1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/importer.py Tue Aug 30 16:51:10 2016 +0200
1.3 @@ -0,0 +1,710 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"""
1.7 +Import logic.
1.8 +
1.9 +Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
1.10 + 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk>
1.11 +
1.12 +This program is free software; you can redistribute it and/or modify it under
1.13 +the terms of the GNU General Public License as published by the Free Software
1.14 +Foundation; either version 3 of the License, or (at your option) any later
1.15 +version.
1.16 +
1.17 +This program is distributed in the hope that it will be useful, but WITHOUT
1.18 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1.19 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
1.20 +details.
1.21 +
1.22 +You should have received a copy of the GNU General Public License along with
1.23 +this program. If not, see <http://www.gnu.org/licenses/>.
1.24 +"""
1.25 +
1.26 +from errors import ProgramError
1.27 +from os.path import exists, extsep, getmtime, join
1.28 +from os import listdir, makedirs, remove
1.29 +from common import init_item, readfile, writefile
1.30 +from referencing import Reference
1.31 +import inspector
1.32 +import sys
1.33 +
1.34 +class Importer:
1.35 +
1.36 + "An import machine, searching for and loading modules."
1.37 +
1.38 + def __init__(self, path, cache=None, verbose=False):
1.39 +
1.40 + """
1.41 + Initialise the importer with the given search 'path' - a list of
1.42 + directories to search for Python modules.
1.43 +
1.44 + The optional 'cache' should be the name of a directory used to store
1.45 + cached module information.
1.46 +
1.47 + The optional 'verbose' parameter causes output concerning the activities
1.48 + of the object to be produced if set to a true value (not the default).
1.49 + """
1.50 +
1.51 + self.path = path
1.52 + self.cache = cache
1.53 + self.verbose = verbose
1.54 +
1.55 + self.modules = {}
1.56 + self.modules_ordered = []
1.57 + self.loading = set()
1.58 + self.hidden = {}
1.59 + self.revealing = {}
1.60 + self.invalidated = set()
1.61 +
1.62 + self.objects = {}
1.63 + self.classes = {}
1.64 + self.function_parameters = {}
1.65 + self.function_defaults = {}
1.66 + self.function_targets = {}
1.67 + self.function_arguments = {}
1.68 +
1.69 + # Derived information.
1.70 +
1.71 + self.subclasses = {}
1.72 +
1.73 + # Attributes of different object types.
1.74 +
1.75 + self.all_class_attrs = {}
1.76 + self.all_instance_attrs = {}
1.77 + self.all_instance_attr_constants = {}
1.78 + self.all_combined_attrs = {}
1.79 + self.all_module_attrs = {}
1.80 + self.all_shadowed_attrs = {}
1.81 +
1.82 + # References to external names and aliases within program units.
1.83 +
1.84 + self.all_name_references = {}
1.85 + self.all_initialised_names = {}
1.86 + self.all_aliased_names = {}
1.87 +
1.88 + # General attribute accesses.
1.89 +
1.90 + self.all_attr_accesses = {}
1.91 + self.all_const_accesses = {}
1.92 + self.all_attr_access_modifiers = {}
1.93 +
1.94 + # Constant literals and values.
1.95 +
1.96 + self.all_constants = {}
1.97 + self.all_constant_values = {}
1.98 +
1.99 + self.make_cache()
1.100 +
1.101 + def make_cache(self):
1.102 + if self.cache and not exists(self.cache):
1.103 + makedirs(self.cache)
1.104 +
1.105 + def check_cache(self, details):
1.106 +
1.107 + """
1.108 + Check whether the cache applies for the given 'details', invalidating it
1.109 + if it does not.
1.110 + """
1.111 +
1.112 + recorded_details = self.get_cache_details()
1.113 +
1.114 + if recorded_details != details:
1.115 + self.remove_cache()
1.116 +
1.117 + writefile(self.get_cache_details_filename(), details)
1.118 +
1.119 + def get_cache_details_filename(self):
1.120 +
1.121 + "Return the filename for the cache details."
1.122 +
1.123 + return join(self.cache, "$details")
1.124 +
1.125 + def get_cache_details(self):
1.126 +
1.127 + "Return details of the cache."
1.128 +
1.129 + details_filename = self.get_cache_details_filename()
1.130 +
1.131 + if not exists(details_filename):
1.132 + return None
1.133 + else:
1.134 + return readfile(details_filename)
1.135 +
1.136 + def remove_cache(self):
1.137 +
1.138 + "Remove the contents of the cache."
1.139 +
1.140 + for filename in listdir(self.cache):
1.141 + remove(join(self.cache, filename))
1.142 +
1.143 + def to_cache(self):
1.144 +
1.145 + "Write modules to the cache."
1.146 +
1.147 + if self.cache:
1.148 + for module_name, module in self.modules.items():
1.149 + module.to_cache(join(self.cache, module_name))
1.150 +
1.151 + # Object retrieval and storage.
1.152 +
1.153 + def get_object(self, name):
1.154 +
1.155 + """
1.156 + Return a reference for the given 'name' or None if no such object
1.157 + exists.
1.158 + """
1.159 +
1.160 + return self.objects.get(name)
1.161 +
1.162 + def set_object(self, name, value=None):
1.163 +
1.164 + "Set the object with the given 'name' and the given 'value'."
1.165 +
1.166 + if isinstance(value, Reference):
1.167 + ref = value.alias(name)
1.168 + else:
1.169 + ref = Reference(value, name)
1.170 +
1.171 + self.objects[name] = ref
1.172 +
1.173 + # Indirect object retrieval.
1.174 +
1.175 + def get_attributes(self, ref, attrname):
1.176 +
1.177 + """
1.178 + Return attributes provided by 'ref' for 'attrname'. Class attributes
1.179 + may be provided by instances.
1.180 + """
1.181 +
1.182 + kind = ref.get_kind()
1.183 + if kind == "<class>":
1.184 + ref = self.get_class_attribute(ref.get_origin(), attrname)
1.185 + return ref and set([ref]) or set()
1.186 + elif kind == "<instance>":
1.187 + return self.get_combined_attributes(ref.get_origin(), attrname)
1.188 + elif kind == "<module>":
1.189 + ref = self.get_module_attribute(ref.get_origin(), attrname)
1.190 + return ref and set([ref]) or set()
1.191 + else:
1.192 + return set()
1.193 +
1.194 + def get_class_attribute(self, object_type, attrname):
1.195 +
1.196 + "Return from 'object_type' the details of class attribute 'attrname'."
1.197 +
1.198 + attr = self.all_class_attrs[object_type].get(attrname)
1.199 + return attr and self.get_object(attr)
1.200 +
1.201 + def get_instance_attributes(self, object_type, attrname):
1.202 +
1.203 + """
1.204 + Return from 'object_type' the details of instance attribute 'attrname'.
1.205 + """
1.206 +
1.207 + consts = self.all_instance_attr_constants.get(object_type)
1.208 + attrs = set()
1.209 + for attr in self.all_instance_attrs[object_type].get(attrname, []):
1.210 + attrs.add(consts and consts.get(attrname) or Reference("<var>", attr))
1.211 + return attrs
1.212 +
1.213 + def get_combined_attributes(self, object_type, attrname):
1.214 +
1.215 + """
1.216 + Return from 'object_type' the details of class or instance attribute
1.217 + 'attrname'.
1.218 + """
1.219 +
1.220 + ref = self.get_class_attribute(object_type, attrname)
1.221 + refs = ref and set([ref]) or set()
1.222 + refs.update(self.get_instance_attributes(object_type, attrname))
1.223 + return refs
1.224 +
1.225 + def get_module_attribute(self, object_type, attrname):
1.226 +
1.227 + "Return from 'object_type' the details of module attribute 'attrname'."
1.228 +
1.229 + if attrname in self.all_module_attrs[object_type]:
1.230 + return self.get_object("%s.%s" % (object_type, attrname))
1.231 + else:
1.232 + return None
1.233 +
1.234 + # Module management.
1.235 +
1.236 + def get_modules(self):
1.237 +
1.238 + "Return all modules known to the importer."
1.239 +
1.240 + return self.modules.values()
1.241 +
1.242 + def get_module(self, name, hidden=False):
1.243 +
1.244 + "Return the module with the given 'name'."
1.245 +
1.246 + if not self.modules.has_key(name):
1.247 + return None
1.248 +
1.249 + # Obtain the module and attempt to reveal it.
1.250 +
1.251 + module = self.modules[name]
1.252 + if not hidden:
1.253 + self.reveal_module(module)
1.254 + return module
1.255 +
1.256 + def reveal_module(self, module):
1.257 +
1.258 + "Check if 'module' is hidden and reveal it."
1.259 +
1.260 + if module.name in self.hidden:
1.261 + del self.hidden[module.name]
1.262 +
1.263 + # Reveal referenced modules.
1.264 +
1.265 + module.reveal_referenced()
1.266 +
1.267 + def set_revealing(self, module, name, instigator):
1.268 +
1.269 + """
1.270 + Make the revealing of 'module' conditional on 'name' for the given
1.271 + 'instigator' of the reveal operation.
1.272 + """
1.273 +
1.274 + self.revealing[module.name].add((name, instigator))
1.275 +
1.276 + # Program operations.
1.277 +
1.278 + def initialise(self, filename, reset=False):
1.279 +
1.280 + """
1.281 + Initialise a program whose main module is 'filename', resetting the
1.282 + cache if 'reset' is true. Return the main module.
1.283 + """
1.284 +
1.285 + if reset:
1.286 + self.remove_cache()
1.287 + self.check_cache(filename)
1.288 +
1.289 + # Load the program itself.
1.290 +
1.291 + m = self.load_from_file(filename)
1.292 +
1.293 + # Resolve dependencies within the program.
1.294 +
1.295 + for module in self.modules_ordered:
1.296 + module.resolve()
1.297 +
1.298 + return m
1.299 +
1.300 + def finalise(self):
1.301 +
1.302 + "Finalise the inspected program."
1.303 +
1.304 + self.finalise_classes()
1.305 + self.remove_hidden()
1.306 + self.to_cache()
1.307 + self.set_class_types()
1.308 + self.define_instantiators()
1.309 + self.collect_constants()
1.310 +
1.311 + def finalise_classes(self):
1.312 +
1.313 + "Finalise the class relationships and attributes."
1.314 +
1.315 + self.derive_inherited_attrs()
1.316 + self.derive_subclasses()
1.317 + self.derive_shadowed_attrs()
1.318 +
1.319 + def derive_inherited_attrs(self):
1.320 +
1.321 + "Derive inherited attributes for classes throughout the program."
1.322 +
1.323 + for name in self.classes.keys():
1.324 + self.propagate_attrs_for_class(name)
1.325 +
1.326 + def propagate_attrs_for_class(self, name, visited=None):
1.327 +
1.328 + "Propagate inherited attributes for class 'name'."
1.329 +
1.330 + # Visit classes only once.
1.331 +
1.332 + if self.all_combined_attrs.has_key(name):
1.333 + return
1.334 +
1.335 + visited = visited or []
1.336 +
1.337 + if name in visited:
1.338 + raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name)
1.339 +
1.340 + visited.append(name)
1.341 +
1.342 + class_attrs = {}
1.343 + instance_attrs = {}
1.344 +
1.345 + # Aggregate the attributes from base classes, recording the origins of
1.346 + # applicable attributes.
1.347 +
1.348 + for base in self.classes[name][::-1]:
1.349 +
1.350 + # Get the identity of the class from the reference.
1.351 +
1.352 + base = base.get_origin()
1.353 +
1.354 + # Define the base class completely before continuing with this
1.355 + # class.
1.356 +
1.357 + self.propagate_attrs_for_class(base, visited)
1.358 + class_attrs.update(self.all_class_attrs[base])
1.359 +
1.360 + # Instance attribute origins are combined if different.
1.361 +
1.362 + for key, values in self.all_instance_attrs[base].items():
1.363 + init_item(instance_attrs, key, set)
1.364 + instance_attrs[key].update(values)
1.365 +
1.366 + # Class attributes override those defined earlier in the hierarchy.
1.367 +
1.368 + class_attrs.update(self.all_class_attrs.get(name, {}))
1.369 +
1.370 + # Instance attributes are merely added if not already defined.
1.371 +
1.372 + for key in self.all_instance_attrs.get(name, []):
1.373 + if not instance_attrs.has_key(key):
1.374 + instance_attrs[key] = set(["%s.%s" % (name, key)])
1.375 +
1.376 + self.all_class_attrs[name] = class_attrs
1.377 + self.all_instance_attrs[name] = instance_attrs
1.378 + self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys())
1.379 +
1.380 + def derive_subclasses(self):
1.381 +
1.382 + "Derive subclass details for classes."
1.383 +
1.384 + for name, bases in self.classes.items():
1.385 + for base in bases:
1.386 +
1.387 + # Get the identity of the class from the reference.
1.388 +
1.389 + base = base.get_origin()
1.390 + self.subclasses[base].add(name)
1.391 +
1.392 + def derive_shadowed_attrs(self):
1.393 +
1.394 + "Derive shadowed attributes for classes."
1.395 +
1.396 + for name, attrs in self.all_instance_attrs.items():
1.397 + attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys())
1.398 + if attrs:
1.399 + self.all_shadowed_attrs[name] = attrs
1.400 +
1.401 + def remove_hidden(self):
1.402 +
1.403 + "Remove all hidden modules."
1.404 +
1.405 + # First reveal any modules exposing names.
1.406 +
1.407 + for modname, names in self.revealing.items():
1.408 + module = self.modules[modname]
1.409 +
1.410 + # Obtain the imported names and determine whether they should cause
1.411 + # the module to be revealed.
1.412 +
1.413 + for (name, instigator) in names:
1.414 + if module is not instigator:
1.415 +
1.416 + # Only if an object is provided by the module should the
1.417 + # module be revealed. References to objects in other modules
1.418 + # should not in themselves expose the module in which those
1.419 + # references occur.
1.420 +
1.421 + ref = module.get_global(name)
1.422 + if ref and ref.provided_by_module(module.name):
1.423 + self.reveal_module(module)
1.424 + instigator.revealed.add(module)
1.425 +
1.426 + # Then remove all modules that are still hidden.
1.427 +
1.428 + for modname in self.hidden:
1.429 + module = self.modules[modname]
1.430 + module.unpropagate()
1.431 + del self.modules[modname]
1.432 + ref = self.objects.get(modname)
1.433 + if ref and ref.get_kind() == "<module>":
1.434 + del self.objects[modname]
1.435 +
1.436 + def set_class_types(self):
1.437 +
1.438 + "Set the type of each class."
1.439 +
1.440 + ref = self.get_object("__builtins__.type")
1.441 + for attrs in self.all_class_attrs.values():
1.442 + attrs["__class__"] = ref.get_origin()
1.443 +
1.444 + def define_instantiators(self):
1.445 +
1.446 + """
1.447 + Consolidate parameter and default details, incorporating initialiser
1.448 + details to define instantiator signatures.
1.449 + """
1.450 +
1.451 + for cls, attrs in self.all_class_attrs.items():
1.452 + initialiser = attrs["__init__"]
1.453 + self.function_parameters[cls] = self.function_parameters[initialiser][1:]
1.454 + self.function_defaults[cls] = self.function_defaults[initialiser]
1.455 +
1.456 + def collect_constants(self):
1.457 +
1.458 + "Get constants from all active modules."
1.459 +
1.460 + for module in self.modules.values():
1.461 + self.all_constants.update(module.constants)
1.462 +
1.463 + # Import methods.
1.464 +
1.465 + def find_in_path(self, name):
1.466 +
1.467 + """
1.468 + Find the given module 'name' in the search path, returning None where no
1.469 + such module could be found, or a 2-tuple from the 'find' method
1.470 + otherwise.
1.471 + """
1.472 +
1.473 + for d in self.path:
1.474 + m = self.find(d, name)
1.475 + if m: return m
1.476 + return None
1.477 +
1.478 + def find(self, d, name):
1.479 +
1.480 + """
1.481 + In the directory 'd', find the given module 'name', where 'name' can
1.482 + either refer to a single file module or to a package. Return None if the
1.483 + 'name' cannot be associated with either a file or a package directory,
1.484 + or a 2-tuple from '_find_package' or '_find_module' otherwise.
1.485 + """
1.486 +
1.487 + m = self._find_package(d, name)
1.488 + if m: return m
1.489 + m = self._find_module(d, name)
1.490 + if m: return m
1.491 + return None
1.492 +
1.493 + def _find_module(self, d, name):
1.494 +
1.495 + """
1.496 + In the directory 'd', find the given module 'name', returning None where
1.497 + no suitable file exists in the directory, or a 2-tuple consisting of
1.498 + None (indicating that no package directory is involved) and a filename
1.499 + indicating the location of the module.
1.500 + """
1.501 +
1.502 + name_py = name + extsep + "py"
1.503 + filename = self._find_file(d, name_py)
1.504 + if filename:
1.505 + return None, filename
1.506 + return None
1.507 +
1.508 + def _find_package(self, d, name):
1.509 +
1.510 + """
1.511 + In the directory 'd', find the given package 'name', returning None
1.512 + where no suitable package directory exists, or a 2-tuple consisting of
1.513 + a directory (indicating the location of the package directory itself)
1.514 + and a filename indicating the location of the __init__.py module which
1.515 + declares the package's top-level contents.
1.516 + """
1.517 +
1.518 + filename = self._find_file(d, name)
1.519 + if filename:
1.520 + init_py = "__init__" + extsep + "py"
1.521 + init_py_filename = self._find_file(filename, init_py)
1.522 + if init_py_filename:
1.523 + return filename, init_py_filename
1.524 + return None
1.525 +
1.526 + def _find_file(self, d, filename):
1.527 +
1.528 + """
1.529 + Return the filename obtained when searching the directory 'd' for the
1.530 + given 'filename', or None if no actual file exists for the filename.
1.531 + """
1.532 +
1.533 + filename = join(d, filename)
1.534 + if exists(filename):
1.535 + return filename
1.536 + else:
1.537 + return None
1.538 +
1.539 + def load(self, name, return_leaf=False, hidden=False):
1.540 +
1.541 + """
1.542 + Load the module or package with the given 'name'. Return an object
1.543 + referencing the loaded module or package, or None if no such module or
1.544 + package exists.
1.545 +
1.546 + Where 'return_leaf' is specified, the final module in the chain is
1.547 + returned. Where 'hidden' is specified, the module is marked as hidden.
1.548 + """
1.549 +
1.550 + if return_leaf:
1.551 + name_for_return = name
1.552 + else:
1.553 + name_for_return = name.split(".")[0]
1.554 +
1.555 + # Loaded modules are returned immediately.
1.556 + # Modules may be known but not yet loading (having been registered as
1.557 + # submodules), loading, loaded, or completely unknown.
1.558 +
1.559 + module = self.get_module(name, hidden)
1.560 +
1.561 + if module:
1.562 + return self.modules[name_for_return]
1.563 +
1.564 + # Otherwise, modules are loaded.
1.565 +
1.566 + if self.verbose:
1.567 + print >>sys.stderr, "Loading", name
1.568 +
1.569 + # Split the name into path components, and try to find the uppermost in
1.570 + # the search path.
1.571 +
1.572 + path = name.split(".")
1.573 + path_so_far = []
1.574 + top = module = None
1.575 +
1.576 + for p in path:
1.577 +
1.578 + # Get the module's filesystem details.
1.579 +
1.580 + if not path_so_far:
1.581 + m = self.find_in_path(p)
1.582 + elif d:
1.583 + m = self.find(d, p)
1.584 + else:
1.585 + m = None
1.586 +
1.587 + path_so_far.append(p)
1.588 + module_name = ".".join(path_so_far)
1.589 +
1.590 + if not m:
1.591 + if self.verbose:
1.592 + print >>sys.stderr, "Not found (%s)" % name
1.593 +
1.594 + return None # NOTE: Import error.
1.595 +
1.596 + # Get the module itself.
1.597 +
1.598 + d, filename = m
1.599 + submodule = self.load_from_file(filename, module_name, hidden)
1.600 +
1.601 + if module is None:
1.602 + top = submodule
1.603 +
1.604 + module = submodule
1.605 +
1.606 + # Return either the deepest or the uppermost module.
1.607 +
1.608 + return return_leaf and module or top
1.609 +
1.610 + def load_from_file(self, filename, module_name=None, hidden=False):
1.611 +
1.612 + "Load the module from the given 'filename'."
1.613 +
1.614 + if module_name is None:
1.615 + module_name = "__main__"
1.616 +
1.617 + module = self.modules.get(module_name)
1.618 +
1.619 + if not module:
1.620 +
1.621 + # Try to load from cache.
1.622 +
1.623 + module = self.load_from_cache(filename, module_name, hidden)
1.624 + if module:
1.625 + return module
1.626 +
1.627 + # If no cache entry exists, load from file.
1.628 +
1.629 + module = inspector.InspectedModule(module_name, self)
1.630 + self.add_module(module_name, module)
1.631 + self.update_cache_validity(module)
1.632 +
1.633 + # Initiate loading if not already in progress.
1.634 +
1.635 + if not module.loaded and module not in self.loading:
1.636 + self._load(module, module_name, hidden, lambda m: m.parse, filename)
1.637 +
1.638 + return module
1.639 +
1.640 + def update_cache_validity(self, module):
1.641 +
1.642 + "Make 'module' valid in the cache, but invalidate accessing modules."
1.643 +
1.644 + self.invalidated.update(module.accessing_modules)
1.645 + if module.name in self.invalidated:
1.646 + self.invalidated.remove(module.name)
1.647 +
1.648 + def source_is_new(self, filename, module_name):
1.649 +
1.650 + "Return whether 'filename' is newer than the cached 'module_name'."
1.651 +
1.652 + if self.cache:
1.653 + cache_filename = join(self.cache, module_name)
1.654 + return not exists(cache_filename) or \
1.655 + getmtime(filename) > getmtime(cache_filename) or \
1.656 + module_name in self.invalidated
1.657 + else:
1.658 + return True
1.659 +
1.660 + def load_from_cache(self, filename, module_name, hidden=False):
1.661 +
1.662 + "Return a module residing in the cache."
1.663 +
1.664 + module = self.modules.get(module_name)
1.665 +
1.666 + if not self.source_is_new(filename, module_name):
1.667 +
1.668 + if not module:
1.669 + module = inspector.CachedModule(module_name, self)
1.670 + self.add_module(module_name, module)
1.671 +
1.672 + if not module.loaded and module not in self.loading:
1.673 + filename = join(self.cache, module_name)
1.674 + self._load(module, module_name, hidden, lambda m: m.from_cache, filename)
1.675 +
1.676 + return module
1.677 +
1.678 + def _load(self, module, module_name, hidden, fn, filename):
1.679 +
1.680 + """
1.681 + Load 'module' for the given 'module_name', with the module being hidden
1.682 + if 'hidden' is a true value, and with 'fn' performing an invocation on
1.683 + the module with the given 'filename'.
1.684 + """
1.685 +
1.686 + # Indicate that the module is hidden if requested.
1.687 +
1.688 + if hidden:
1.689 + self.hidden[module_name] = module
1.690 +
1.691 + # Indicate that loading is in progress and load the module.
1.692 +
1.693 + self.loading.add(module)
1.694 + if self.verbose:
1.695 + print >>sys.stderr, "Loading", filename
1.696 + fn(module)(filename)
1.697 + if self.verbose:
1.698 + print >>sys.stderr, "Loaded", filename
1.699 + self.loading.remove(module)
1.700 +
1.701 + self.modules_ordered.append(module)
1.702 +
1.703 + def add_module(self, module_name, module):
1.704 +
1.705 + """
1.706 + Return the module with the given 'module_name', adding a new module
1.707 + object if one does not already exist.
1.708 + """
1.709 +
1.710 + self.modules[module_name] = module
1.711 + self.objects[module_name] = Reference("<module>", module_name)
1.712 +
1.713 +# vim: tabstop=4 expandtab shiftwidth=4