1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 self.to_import = set() 54 self.required = set(["__main__"]) 55 self.removed = {} 56 57 self.modules = {} 58 self.accessing_modules = {} 59 self.invalidated = set() 60 61 self.objects = {} 62 self.classes = {} 63 self.function_parameters = {} 64 self.function_defaults = {} 65 self.function_targets = {} 66 self.function_arguments = {} 67 68 # Derived information. 69 70 self.subclasses = {} 71 72 # Attributes of different object types. 73 74 self.all_class_attrs = {} 75 self.all_instance_attrs = {} 76 self.all_instance_attr_constants = {} 77 self.all_combined_attrs = {} 78 self.all_module_attrs = {} 79 self.all_shadowed_attrs = {} 80 81 # References to external names and aliases within program units. 82 83 self.all_name_references = {} 84 self.all_initialised_names = {} 85 self.all_aliased_names = {} 86 87 # General attribute accesses. 88 89 self.all_attr_accesses = {} 90 self.all_const_accesses = {} 91 self.all_attr_access_modifiers = {} 92 93 # Constant literals and values. 94 95 self.all_constants = {} 96 self.all_constant_values = {} 97 98 self.make_cache() 99 100 def make_cache(self): 101 if self.cache and not exists(self.cache): 102 makedirs(self.cache) 103 104 def check_cache(self, details): 105 106 """ 107 Check whether the cache applies for the given 'details', invalidating it 108 if it does not. 109 """ 110 111 recorded_details = self.get_cache_details() 112 113 if recorded_details != details: 114 self.remove_cache() 115 116 writefile(self.get_cache_details_filename(), details) 117 118 def get_cache_details_filename(self): 119 120 "Return the filename for the cache details." 121 122 return join(self.cache, "$details") 123 124 def get_cache_details(self): 125 126 "Return details of the cache." 127 128 details_filename = self.get_cache_details_filename() 129 130 if not exists(details_filename): 131 return None 132 else: 133 return readfile(details_filename) 134 135 def remove_cache(self): 136 137 "Remove the contents of the cache." 138 139 for filename in listdir(self.cache): 140 remove(join(self.cache, filename)) 141 142 def to_cache(self): 143 144 "Write modules to the cache." 145 146 if self.cache: 147 for module_name, module in self.modules.items(): 148 module.to_cache(join(self.cache, module_name)) 149 150 # Object retrieval and storage. 151 152 def get_object(self, name): 153 154 """ 155 Return a reference for the given 'name' or None if no such object 156 exists. 157 """ 158 159 return self.objects.get(name) 160 161 def set_object(self, name, value=None): 162 163 "Set the object with the given 'name' and the given 'value'." 164 165 if isinstance(value, Reference): 166 ref = value.alias(name) 167 else: 168 ref = Reference(value, name) 169 170 self.objects[name] = ref 171 172 # Identification of both stored object names and name references. 173 174 def identify(self, name): 175 176 "Identify 'name' using stored object and external name records." 177 178 return self.objects.get(name) or self.all_name_references.get(name) 179 180 # Indirect object retrieval. 181 182 def get_attributes(self, ref, attrname): 183 184 """ 185 Return attributes provided by 'ref' for 'attrname'. Class attributes 186 may be provided by instances. 187 """ 188 189 kind = ref.get_kind() 190 if kind == "<class>": 191 ref = self.get_class_attribute(ref.get_origin(), attrname) 192 return ref and set([ref]) or set() 193 elif kind == "<instance>": 194 return self.get_combined_attributes(ref.get_origin(), attrname) 195 elif kind == "<module>": 196 ref = self.get_module_attribute(ref.get_origin(), attrname) 197 return ref and set([ref]) or set() 198 else: 199 return set() 200 201 def get_class_attribute(self, object_type, attrname): 202 203 "Return from 'object_type' the details of class attribute 'attrname'." 204 205 attr = self.all_class_attrs[object_type].get(attrname) 206 return attr and self.get_object(attr) 207 208 def get_instance_attributes(self, object_type, attrname): 209 210 """ 211 Return from 'object_type' the details of instance attribute 'attrname'. 212 """ 213 214 consts = self.all_instance_attr_constants.get(object_type) 215 attrs = set() 216 for attr in self.all_instance_attrs[object_type].get(attrname, []): 217 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 218 return attrs 219 220 def get_combined_attributes(self, object_type, attrname): 221 222 """ 223 Return from 'object_type' the details of class or instance attribute 224 'attrname'. 225 """ 226 227 ref = self.get_class_attribute(object_type, attrname) 228 refs = ref and set([ref]) or set() 229 refs.update(self.get_instance_attributes(object_type, attrname)) 230 return refs 231 232 def get_module_attribute(self, object_type, attrname): 233 234 "Return from 'object_type' the details of module attribute 'attrname'." 235 236 if attrname in self.all_module_attrs[object_type]: 237 return self.get_object("%s.%s" % (object_type, attrname)) 238 else: 239 return None 240 241 # Module management. 242 243 def queue_module(self, name, accessor, required=False): 244 245 """ 246 Queue the module with the given 'name' for import from the given 247 'accessor' module. If 'required' is true (it is false by default), the 248 module will be required in the final program. 249 """ 250 251 if not self.modules.has_key(name): 252 self.to_import.add(name) 253 254 if required: 255 self.required.add(name) 256 257 init_item(self.accessing_modules, name, set) 258 self.accessing_modules[name].add(accessor.name) 259 260 def get_modules(self): 261 262 "Return all modules known to the importer." 263 264 return self.modules.values() 265 266 def get_module(self, name): 267 268 "Return the module with the given 'name'." 269 270 if not self.modules.has_key(name): 271 return None 272 273 return self.modules[name] 274 275 # Program operations. 276 277 def initialise(self, filename, reset=False): 278 279 """ 280 Initialise a program whose main module is 'filename', resetting the 281 cache if 'reset' is true. Return the main module. 282 """ 283 284 if reset: 285 self.remove_cache() 286 self.check_cache(filename) 287 288 # Load the program itself. 289 290 m = self.load_from_file(filename) 291 292 # Load any queued modules. 293 294 while self.to_import: 295 for name in list(self.to_import): # avoid mutation issue 296 self.load(name) 297 298 # Resolve dependencies between modules. 299 300 self.resolve() 301 302 # Record the type of all classes. 303 304 self.type_ref = self.get_object("__builtins__.type") 305 306 # Resolve dependencies within the program. 307 308 for module in self.modules.values(): 309 module.complete() 310 311 # Remove unneeded modules. 312 313 all_modules = self.modules.items() 314 315 for name, module in all_modules: 316 if name not in self.required: 317 module.unpropagate() 318 del self.modules[name] 319 self.removed[name] = module 320 321 return m 322 323 def finalise(self): 324 325 "Finalise the inspected program." 326 327 self.finalise_classes() 328 self.to_cache() 329 self.set_class_types() 330 self.define_instantiators() 331 self.collect_constants() 332 333 # Supporting operations. 334 335 def resolve(self): 336 337 "Resolve dependencies between modules." 338 339 self.waiting = {} 340 341 for module in self.modules.values(): 342 343 # Resolve all deferred references in each module. 344 345 for ref in module.deferred: 346 found = self.find_dependency(ref) 347 if not found: 348 print >>sys.stderr, "Module %s references an unknown object: %s" % (module.name, ref.get_origin()) 349 350 # Record the resolved names and identify required modules. 351 352 else: 353 ref.mutate(found) 354 355 # Find the providing module of this reference. 356 357 provider = self.get_module_provider(ref) 358 if provider: 359 360 module.required.add(provider) 361 self.accessing_modules[provider].add(module.name) 362 363 # Postpone any inclusion of the provider until this 364 # module becomes required. 365 366 if module.name not in self.required: 367 init_item(self.waiting, module.name, set) 368 self.waiting[module.name].add(provider) 369 370 # Make this module required in the accessing module. 371 372 else: 373 self.required.add(provider) 374 375 for module_name in self.waiting.keys(): 376 self.require_providers(module_name) 377 378 def require_providers(self, module_name): 379 if module_name in self.required and self.waiting.has_key(module_name): 380 for provider in self.waiting[module_name]: 381 if provider not in self.required: 382 self.required.add(provider) 383 self.require_providers(provider) 384 385 def find_dependency(self, ref): 386 387 "Find the ultimate dependency for 'ref'." 388 389 found = set() 390 while ref and ref.has_kind("<depends>") and not ref in found: 391 found.add(ref) 392 ref = self.identify(ref.get_origin()) 393 return ref 394 395 def get_module_provider(self, ref): 396 397 "Identify the provider of the given 'ref'." 398 399 for ancestor in ref.ancestors(): 400 if self.modules.has_key(ancestor): 401 return ancestor 402 return None 403 404 def finalise_classes(self): 405 406 "Finalise the class relationships and attributes." 407 408 self.derive_inherited_attrs() 409 self.derive_subclasses() 410 self.derive_shadowed_attrs() 411 412 def derive_inherited_attrs(self): 413 414 "Derive inherited attributes for classes throughout the program." 415 416 for name in self.classes.keys(): 417 self.propagate_attrs_for_class(name) 418 419 def propagate_attrs_for_class(self, name, visited=None): 420 421 "Propagate inherited attributes for class 'name'." 422 423 # Visit classes only once. 424 425 if self.all_combined_attrs.has_key(name): 426 return 427 428 visited = visited or [] 429 430 if name in visited: 431 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 432 433 visited.append(name) 434 435 class_attrs = {} 436 instance_attrs = {} 437 438 # Aggregate the attributes from base classes, recording the origins of 439 # applicable attributes. 440 441 for base in self.classes[name][::-1]: 442 443 # Get the identity of the class from the reference. 444 445 base = base.get_origin() 446 447 # Define the base class completely before continuing with this 448 # class. 449 450 self.propagate_attrs_for_class(base, visited) 451 class_attrs.update(self.all_class_attrs[base]) 452 453 # Instance attribute origins are combined if different. 454 455 for key, values in self.all_instance_attrs[base].items(): 456 init_item(instance_attrs, key, set) 457 instance_attrs[key].update(values) 458 459 # Class attributes override those defined earlier in the hierarchy. 460 461 class_attrs.update(self.all_class_attrs.get(name, {})) 462 463 # Instance attributes are merely added if not already defined. 464 465 for key in self.all_instance_attrs.get(name, []): 466 if not instance_attrs.has_key(key): 467 instance_attrs[key] = set(["%s.%s" % (name, key)]) 468 469 self.all_class_attrs[name] = class_attrs 470 self.all_instance_attrs[name] = instance_attrs 471 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 472 473 def derive_subclasses(self): 474 475 "Derive subclass details for classes." 476 477 for name, bases in self.classes.items(): 478 for base in bases: 479 480 # Get the identity of the class from the reference. 481 482 base = base.get_origin() 483 self.subclasses[base].add(name) 484 485 def derive_shadowed_attrs(self): 486 487 "Derive shadowed attributes for classes." 488 489 for name, attrs in self.all_instance_attrs.items(): 490 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 491 if attrs: 492 self.all_shadowed_attrs[name] = attrs 493 494 def set_class_types(self): 495 496 "Set the type of each class." 497 498 for attrs in self.all_class_attrs.values(): 499 attrs["__class__"] = self.type_ref.get_origin() 500 501 def define_instantiators(self): 502 503 """ 504 Consolidate parameter and default details, incorporating initialiser 505 details to define instantiator signatures. 506 """ 507 508 for cls, attrs in self.all_class_attrs.items(): 509 initialiser = attrs["__init__"] 510 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 511 self.function_defaults[cls] = self.function_defaults[initialiser] 512 513 def collect_constants(self): 514 515 "Get constants from all active modules." 516 517 for module in self.modules.values(): 518 self.all_constants.update(module.constants) 519 520 # Import methods. 521 522 def find_in_path(self, name): 523 524 """ 525 Find the given module 'name' in the search path, returning None where no 526 such module could be found, or a 2-tuple from the 'find' method 527 otherwise. 528 """ 529 530 for d in self.path: 531 m = self.find(d, name) 532 if m: return m 533 return None 534 535 def find(self, d, name): 536 537 """ 538 In the directory 'd', find the given module 'name', where 'name' can 539 either refer to a single file module or to a package. Return None if the 540 'name' cannot be associated with either a file or a package directory, 541 or a 2-tuple from '_find_package' or '_find_module' otherwise. 542 """ 543 544 m = self._find_package(d, name) 545 if m: return m 546 m = self._find_module(d, name) 547 if m: return m 548 return None 549 550 def _find_module(self, d, name): 551 552 """ 553 In the directory 'd', find the given module 'name', returning None where 554 no suitable file exists in the directory, or a 2-tuple consisting of 555 None (indicating that no package directory is involved) and a filename 556 indicating the location of the module. 557 """ 558 559 name_py = name + extsep + "py" 560 filename = self._find_file(d, name_py) 561 if filename: 562 return None, filename 563 return None 564 565 def _find_package(self, d, name): 566 567 """ 568 In the directory 'd', find the given package 'name', returning None 569 where no suitable package directory exists, or a 2-tuple consisting of 570 a directory (indicating the location of the package directory itself) 571 and a filename indicating the location of the __init__.py module which 572 declares the package's top-level contents. 573 """ 574 575 filename = self._find_file(d, name) 576 if filename: 577 init_py = "__init__" + extsep + "py" 578 init_py_filename = self._find_file(filename, init_py) 579 if init_py_filename: 580 return filename, init_py_filename 581 return None 582 583 def _find_file(self, d, filename): 584 585 """ 586 Return the filename obtained when searching the directory 'd' for the 587 given 'filename', or None if no actual file exists for the filename. 588 """ 589 590 filename = join(d, filename) 591 if exists(filename): 592 return filename 593 else: 594 return None 595 596 def load(self, name): 597 598 """ 599 Load the module or package with the given 'name'. Return an object 600 referencing the loaded module or package, or None if no such module or 601 package exists. 602 """ 603 604 # Loaded modules are returned immediately. 605 # Modules may be known but not yet loading (having been registered as 606 # submodules), loading, loaded, or completely unknown. 607 608 module = self.get_module(name) 609 610 if module: 611 return self.modules[name] 612 613 # Otherwise, modules are loaded. 614 615 if self.verbose: 616 print >>sys.stderr, "Loading", name 617 618 # Split the name into path components, and try to find the uppermost in 619 # the search path. 620 621 path = name.split(".") 622 path_so_far = [] 623 module = None 624 625 for p in path: 626 627 # Get the module's filesystem details. 628 629 if not path_so_far: 630 m = self.find_in_path(p) 631 elif d: 632 m = self.find(d, p) 633 else: 634 m = None 635 636 path_so_far.append(p) 637 module_name = ".".join(path_so_far) 638 639 if not m: 640 if self.verbose: 641 print >>sys.stderr, "Not found (%s)" % name 642 643 return None # NOTE: Import error. 644 645 # Get the module itself. 646 647 d, filename = m 648 module = self.load_from_file(filename, module_name) 649 650 return module 651 652 def load_from_file(self, filename, module_name=None): 653 654 "Load the module from the given 'filename'." 655 656 if module_name is None: 657 module_name = "__main__" 658 659 module = self.modules.get(module_name) 660 661 if not module: 662 663 # Try to load from cache. 664 665 module = self.load_from_cache(filename, module_name) 666 if module: 667 return module 668 669 # If no cache entry exists, load from file. 670 671 module = inspector.InspectedModule(module_name, self) 672 self.add_module(module_name, module) 673 self.update_cache_validity(module) 674 675 self._load(module, module_name, lambda m: m.parse, filename) 676 677 return module 678 679 def update_cache_validity(self, module): 680 681 "Make 'module' valid in the cache, but invalidate accessing modules." 682 683 accessing = self.accessing_modules.get(module.name) 684 if accessing: 685 self.invalidated.update(accessing) 686 if module.name in self.invalidated: 687 self.invalidated.remove(module.name) 688 689 def source_is_new(self, filename, module_name): 690 691 "Return whether 'filename' is newer than the cached 'module_name'." 692 693 if self.cache: 694 cache_filename = join(self.cache, module_name) 695 return not exists(cache_filename) or \ 696 getmtime(filename) > getmtime(cache_filename) or \ 697 module_name in self.invalidated 698 else: 699 return True 700 701 def load_from_cache(self, filename, module_name): 702 703 "Return a module residing in the cache." 704 705 module = self.modules.get(module_name) 706 707 if not module and not self.source_is_new(filename, module_name): 708 module = CachedModule(module_name, self) 709 self.add_module(module_name, module) 710 711 filename = join(self.cache, module_name) 712 self._load(module, module_name, lambda m: m.from_cache, filename) 713 714 return module 715 716 def _load(self, module, module_name, fn, filename): 717 718 """ 719 Load 'module' for the given 'module_name', and with 'fn' performing an 720 invocation on the module with the given 'filename'. 721 """ 722 723 # Load the module. 724 725 if self.verbose: 726 print >>sys.stderr, "Loading", filename 727 fn(module)(filename) 728 if self.verbose: 729 print >>sys.stderr, "Loaded", filename 730 731 def add_module(self, module_name, module): 732 733 """ 734 Return the module with the given 'module_name', adding a new module 735 object if one does not already exist. 736 """ 737 738 self.modules[module_name] = module 739 self.objects[module_name] = Reference("<module>", module_name) 740 if module_name in self.to_import: 741 self.to_import.remove(module_name) 742 743 # vim: tabstop=4 expandtab shiftwidth=4