1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_locals = {} 73 self.function_targets = {} 74 self.function_arguments = {} 75 76 # Unresolved names. 77 78 self.missing = set() 79 80 # Derived information. 81 82 self.subclasses = {} 83 84 # Attributes of different object types. 85 86 self.all_class_attrs = {} 87 self.all_instance_attrs = {} 88 self.all_instance_attr_constants = {} 89 self.all_combined_attrs = {} 90 self.all_module_attrs = {} 91 self.all_shadowed_attrs = {} 92 93 # References to external names and aliases within program units. 94 95 self.all_name_references = {} 96 self.all_initialised_names = {} 97 self.all_aliased_names = {} 98 99 # General attribute accesses. 100 101 self.all_attr_accesses = {} 102 self.all_const_accesses = {} 103 self.all_attr_access_modifiers = {} 104 105 # Constant literals and values. 106 107 self.all_constants = {} 108 self.all_constant_values = {} 109 110 self.make_cache() 111 112 def make_cache(self): 113 if self.cache and not exists(self.cache): 114 makedirs(self.cache) 115 116 def check_cache(self, details): 117 118 """ 119 Check whether the cache applies for the given 'details', invalidating it 120 if it does not. 121 """ 122 123 recorded_details = self.get_cache_details() 124 125 if recorded_details != details: 126 self.remove_cache() 127 128 writefile(self.get_cache_details_filename(), details) 129 130 def get_cache_details_filename(self): 131 132 "Return the filename for the cache details." 133 134 return join(self.cache, "$details") 135 136 def get_cache_details(self): 137 138 "Return details of the cache." 139 140 details_filename = self.get_cache_details_filename() 141 142 if not exists(details_filename): 143 return None 144 else: 145 return readfile(details_filename) 146 147 def remove_cache(self): 148 149 "Remove the contents of the cache." 150 151 for filename in listdir(self.cache): 152 remove(join(self.cache, filename)) 153 154 def to_cache(self): 155 156 "Write modules to the cache." 157 158 if self.cache: 159 for module_name, module in self.modules.items(): 160 module.to_cache(join(self.cache, module_name)) 161 162 # Object retrieval and storage. 163 164 def get_object(self, name): 165 166 """ 167 Return a reference for the given 'name' or None if no such object 168 exists. 169 """ 170 171 return self.objects.get(name) 172 173 def set_object(self, name, value=None): 174 175 "Set the object with the given 'name' and the given 'value'." 176 177 if isinstance(value, Reference): 178 ref = value.alias(name) 179 else: 180 ref = Reference(value, name) 181 182 self.objects[name] = ref 183 184 # Identification of both stored object names and name references. 185 186 def identify(self, name): 187 188 "Identify 'name' using stored object and external name records." 189 190 ref = self.follow(name) 191 if ref and ref.has_kind("<depends>"): 192 ref = self.follow(ref.get_origin()) 193 return ref 194 195 def follow(self, name): 196 197 "Identify 'name' using stored object and external name records." 198 199 ref = self.objects.get(name) 200 if not ref or ref.has_kind("<module>"): 201 ref = self.all_name_references.get(name) or ref 202 return ref 203 204 # Indirect object retrieval. 205 206 def get_attributes(self, ref, attrname): 207 208 """ 209 Return attributes provided by 'ref' for 'attrname'. Class attributes 210 may be provided by instances. 211 """ 212 213 kind = ref.get_kind() 214 if kind == "<class>": 215 ref = self.get_class_attribute(ref.get_origin(), attrname) 216 return ref and set([ref]) or set() 217 elif kind == "<instance>": 218 return self.get_combined_attributes(ref.get_origin(), attrname) 219 elif kind == "<module>": 220 ref = self.get_module_attribute(ref.get_origin(), attrname) 221 return ref and set([ref]) or set() 222 else: 223 return set() 224 225 def get_class_attribute(self, object_type, attrname): 226 227 "Return from 'object_type' the details of class attribute 'attrname'." 228 229 attr = self.all_class_attrs[object_type].get(attrname) 230 return attr and self.get_object(attr) 231 232 def get_instance_attributes(self, object_type, attrname): 233 234 """ 235 Return from 'object_type' the details of instance attribute 'attrname'. 236 """ 237 238 consts = self.all_instance_attr_constants.get(object_type) 239 attrs = set() 240 for attr in self.all_instance_attrs[object_type].get(attrname, []): 241 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 242 return attrs 243 244 def get_combined_attributes(self, object_type, attrname): 245 246 """ 247 Return from 'object_type' the details of class or instance attribute 248 'attrname'. 249 """ 250 251 ref = self.get_class_attribute(object_type, attrname) 252 refs = ref and set([ref]) or set() 253 refs.update(self.get_instance_attributes(object_type, attrname)) 254 return refs 255 256 def get_module_attribute(self, object_type, attrname): 257 258 "Return from 'object_type' the details of module attribute 'attrname'." 259 260 return self.identify("%s.%s" % (object_type, attrname)) 261 262 # Convenience methods for deducing which kind of object provided an 263 # attribute. 264 265 def get_attribute_provider(self, ref, attrname): 266 267 """ 268 Return the kind of provider of the attribute accessed via 'ref' using 269 'attrname'. 270 """ 271 272 kind = ref.get_kind() 273 274 if kind in ["<class>", "<module>"]: 275 return kind 276 else: 277 return self.get_instance_attribute_provider(ref.get_origin(), attrname) 278 279 def get_instance_attribute_provider(self, object_type, attrname): 280 281 """ 282 Return the kind of provider of the attribute accessed via an instance of 283 'object_type' using 'attrname'. 284 """ 285 286 if self.get_class_attribute(object_type, attrname): 287 return "<class>" 288 else: 289 return "<instance>" 290 291 # Module management. 292 293 def queue_module(self, name, accessor, required=False): 294 295 """ 296 Queue the module with the given 'name' for import from the given 297 'accessor' module. If 'required' is true (it is false by default), the 298 module will be required in the final program. 299 """ 300 301 if not self.modules.has_key(name): 302 self.to_import.add(name) 303 304 if required: 305 self.required.add(name) 306 307 init_item(self.accessing_modules, name, set) 308 self.accessing_modules[name].add(accessor.name) 309 310 def get_modules(self): 311 312 "Return all modules known to the importer." 313 314 return self.modules.values() 315 316 def get_module(self, name): 317 318 "Return the module with the given 'name'." 319 320 if not self.modules.has_key(name): 321 return None 322 323 return self.modules[name] 324 325 # Program operations. 326 327 def initialise(self, filename, reset=False): 328 329 """ 330 Initialise a program whose main module is 'filename', resetting the 331 cache if 'reset' is true. Return the main module. 332 """ 333 334 if reset: 335 self.remove_cache() 336 self.check_cache(filename) 337 338 # Load the program itself. 339 340 m = self.load_from_file(filename) 341 342 # Load any queued modules. 343 344 while self.to_import: 345 for name in list(self.to_import): # avoid mutation issue 346 self.load(name) 347 348 # Resolve dependencies between modules. 349 350 self.resolve() 351 352 # Record the type of all classes. 353 354 self.type_ref = self.get_object("__builtins__.core.type") 355 356 # Resolve dependencies within the program. 357 358 for module in self.modules.values(): 359 module.complete() 360 361 # Remove unneeded modules. 362 363 all_modules = self.modules.items() 364 365 for name, module in all_modules: 366 if name not in self.required: 367 module.unpropagate() 368 del self.modules[name] 369 self.removed[name] = module 370 371 # Collect redundant objects. 372 373 for module in self.removed.values(): 374 module.collect() 375 376 # Assert module objects where aliases have been removed. 377 378 for name in self.required: 379 if not self.objects.has_key(name): 380 self.objects[name] = Reference("<module>", name) 381 382 return m 383 384 def finalise(self): 385 386 """ 387 Finalise the inspected program, returning whether the program could be 388 finalised. 389 """ 390 391 if self.missing: 392 return False 393 394 self.finalise_classes() 395 self.to_cache() 396 self.set_class_types() 397 self.define_instantiators() 398 self.collect_constants() 399 400 return True 401 402 # Supporting operations. 403 404 def resolve(self): 405 406 "Resolve dependencies between modules." 407 408 self.waiting = {} 409 410 for module in self.modules.values(): 411 412 # Resolve all deferred references in each module. 413 414 for ref in module.deferred: 415 dependency = self.find_dependency(ref) 416 if not dependency: 417 self.missing.add((module.name, ref.get_origin())) 418 419 # Record the resolved names and identify required modules. 420 421 else: 422 # Find the providing module of this reference. 423 # Where definitive details of the origin cannot be found, 424 # identify the provider using the deferred reference. 425 426 provider = self.get_module_provider(dependency.unresolved() and ref or dependency) 427 428 # Only mutate references to the eventual object if it is 429 # static. Otherwise, maintain the closest referrer. 430 431 if dependency.static(): 432 found = dependency 433 else: 434 found = self.find_reference(ref) 435 436 ref.mutate(found) 437 438 if provider: 439 440 module.required.add(provider) 441 self.accessing_modules[provider].add(module.name) 442 443 # Postpone any inclusion of the provider until this 444 # module becomes required. 445 446 if module.name not in self.required: 447 init_item(self.waiting, module.name, set) 448 self.waiting[module.name].add(provider) 449 450 # Make this module required in the accessing module. 451 452 elif provider not in self.required: 453 self.required.add(provider) 454 if self.verbose: 455 print >>sys.stderr, "Requiring", provider, "for", ref 456 457 # Check modules again to see if they are now required and should now 458 # cause the inclusion of other modules providing objects to the program. 459 460 for module_name in self.waiting.keys(): 461 self.require_providers(module_name) 462 463 def require_providers(self, module_name): 464 465 """ 466 Test if 'module_name' is itself required and, if so, require modules 467 containing objects provided to the module. 468 """ 469 470 if module_name in self.required and self.waiting.has_key(module_name): 471 for provider in self.waiting[module_name]: 472 if provider not in self.required: 473 self.required.add(provider) 474 if self.verbose: 475 print >>sys.stderr, "Requiring", provider 476 self.require_providers(provider) 477 478 def find_dependency(self, ref): 479 480 "Find the ultimate dependency for 'ref'." 481 482 found = set() 483 while ref and ref.has_kind("<depends>") and not ref in found: 484 found.add(ref) 485 ref = self.follow(ref.get_origin()) 486 return ref 487 488 def find_reference(self, ref): 489 490 "Find the ultimate usable reference for 'ref'." 491 492 found = set() 493 while ref: 494 found.add(ref) 495 next_ref = self.follow(ref.get_origin()) 496 if not next_ref or next_ref in found: 497 return ref 498 if not next_ref.has_kind("<depends>") and not next_ref.static(): 499 return ref 500 ref = next_ref 501 return ref 502 503 def get_module_provider(self, ref): 504 505 "Identify the provider of the given 'ref'." 506 507 for ancestor in ref.ancestors(): 508 if self.modules.has_key(ancestor): 509 return ancestor 510 return None 511 512 def finalise_classes(self): 513 514 "Finalise the class relationships and attributes." 515 516 self.derive_inherited_attrs() 517 self.derive_subclasses() 518 self.derive_shadowed_attrs() 519 520 def derive_inherited_attrs(self): 521 522 "Derive inherited attributes for classes throughout the program." 523 524 for name in self.classes.keys(): 525 self.propagate_attrs_for_class(name) 526 527 def propagate_attrs_for_class(self, name, visited=None): 528 529 "Propagate inherited attributes for class 'name'." 530 531 # Visit classes only once. 532 533 if self.all_combined_attrs.has_key(name): 534 return 535 536 visited = visited or [] 537 538 if name in visited: 539 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 540 541 visited.append(name) 542 543 class_attrs = {} 544 instance_attrs = {} 545 546 # Aggregate the attributes from base classes, recording the origins of 547 # applicable attributes. 548 549 for base in self.classes[name][::-1]: 550 551 # Get the identity of the class from the reference. 552 553 base = base.get_origin() 554 555 # Define the base class completely before continuing with this 556 # class. 557 558 self.propagate_attrs_for_class(base, visited) 559 class_attrs.update(self.all_class_attrs[base]) 560 561 # Instance attribute origins are combined if different. 562 563 for key, values in self.all_instance_attrs[base].items(): 564 init_item(instance_attrs, key, set) 565 instance_attrs[key].update(values) 566 567 # Class attributes override those defined earlier in the hierarchy. 568 569 class_attrs.update(self.all_class_attrs.get(name, {})) 570 571 # Instance attributes are merely added if not already defined. 572 573 for key in self.all_instance_attrs.get(name, []): 574 if not instance_attrs.has_key(key): 575 instance_attrs[key] = set(["%s.%s" % (name, key)]) 576 577 self.all_class_attrs[name] = class_attrs 578 self.all_instance_attrs[name] = instance_attrs 579 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 580 581 def derive_subclasses(self): 582 583 "Derive subclass details for classes." 584 585 for name, bases in self.classes.items(): 586 for base in bases: 587 588 # Get the identity of the class from the reference. 589 590 base = base.get_origin() 591 self.subclasses[base].add(name) 592 593 def derive_shadowed_attrs(self): 594 595 "Derive shadowed attributes for classes." 596 597 for name, attrs in self.all_instance_attrs.items(): 598 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 599 if attrs: 600 self.all_shadowed_attrs[name] = attrs 601 602 def set_class_types(self): 603 604 "Set the type of each class." 605 606 for attrs in self.all_class_attrs.values(): 607 attrs["__class__"] = self.type_ref.get_origin() 608 609 def define_instantiators(self): 610 611 """ 612 Consolidate parameter and default details, incorporating initialiser 613 details to define instantiator signatures. 614 """ 615 616 for cls, attrs in self.all_class_attrs.items(): 617 initialiser = attrs["__init__"] 618 self.function_parameters[cls] = self.function_parameters[initialiser] 619 self.function_defaults[cls] = self.function_defaults[initialiser] 620 621 def collect_constants(self): 622 623 "Get constants from all active modules." 624 625 for module in self.modules.values(): 626 self.all_constants.update(module.constants) 627 628 # Import methods. 629 630 def find_in_path(self, name): 631 632 """ 633 Find the given module 'name' in the search path, returning None where no 634 such module could be found, or a 2-tuple from the 'find' method 635 otherwise. 636 """ 637 638 for d in self.path: 639 m = self.find(d, name) 640 if m: return m 641 return None 642 643 def find(self, d, name): 644 645 """ 646 In the directory 'd', find the given module 'name', where 'name' can 647 either refer to a single file module or to a package. Return None if the 648 'name' cannot be associated with either a file or a package directory, 649 or a 2-tuple from '_find_package' or '_find_module' otherwise. 650 """ 651 652 m = self._find_package(d, name) 653 if m: return m 654 m = self._find_module(d, name) 655 if m: return m 656 return None 657 658 def _find_module(self, d, name): 659 660 """ 661 In the directory 'd', find the given module 'name', returning None where 662 no suitable file exists in the directory, or a 2-tuple consisting of 663 None (indicating that no package directory is involved) and a filename 664 indicating the location of the module. 665 """ 666 667 name_py = name + extsep + "py" 668 filename = self._find_file(d, name_py) 669 if filename: 670 return None, filename 671 return None 672 673 def _find_package(self, d, name): 674 675 """ 676 In the directory 'd', find the given package 'name', returning None 677 where no suitable package directory exists, or a 2-tuple consisting of 678 a directory (indicating the location of the package directory itself) 679 and a filename indicating the location of the __init__.py module which 680 declares the package's top-level contents. 681 """ 682 683 filename = self._find_file(d, name) 684 if filename: 685 init_py = "__init__" + extsep + "py" 686 init_py_filename = self._find_file(filename, init_py) 687 if init_py_filename: 688 return filename, init_py_filename 689 return None 690 691 def _find_file(self, d, filename): 692 693 """ 694 Return the filename obtained when searching the directory 'd' for the 695 given 'filename', or None if no actual file exists for the filename. 696 """ 697 698 filename = join(d, filename) 699 if exists(filename): 700 return filename 701 else: 702 return None 703 704 def load(self, name): 705 706 """ 707 Load the module or package with the given 'name'. Return an object 708 referencing the loaded module or package, or None if no such module or 709 package exists. 710 """ 711 712 # Loaded modules are returned immediately. 713 # Modules may be known but not yet loading (having been registered as 714 # submodules), loading, loaded, or completely unknown. 715 716 module = self.get_module(name) 717 718 if module: 719 return self.modules[name] 720 721 # Otherwise, modules are loaded. 722 723 # Split the name into path components, and try to find the uppermost in 724 # the search path. 725 726 path = name.split(".") 727 path_so_far = [] 728 module = None 729 730 for p in path: 731 732 # Get the module's filesystem details. 733 734 if not path_so_far: 735 m = self.find_in_path(p) 736 elif d: 737 m = self.find(d, p) 738 else: 739 m = None 740 741 path_so_far.append(p) 742 module_name = ".".join(path_so_far) 743 744 if not m: 745 if self.verbose: 746 print >>sys.stderr, "Not found (%s)" % name 747 748 return None # NOTE: Import error. 749 750 # Get the module itself. 751 752 d, filename = m 753 module = self.load_from_file(filename, module_name) 754 755 return module 756 757 def load_from_file(self, filename, module_name=None): 758 759 "Load the module from the given 'filename'." 760 761 if module_name is None: 762 module_name = "__main__" 763 764 module = self.modules.get(module_name) 765 766 if not module: 767 768 # Try to load from cache. 769 770 module = self.load_from_cache(filename, module_name) 771 if module: 772 return module 773 774 # If no cache entry exists, load from file. 775 776 module = inspector.InspectedModule(module_name, self) 777 self.add_module(module_name, module) 778 self.update_cache_validity(module) 779 780 self._load(module, module_name, lambda m: m.parse, filename) 781 782 return module 783 784 def update_cache_validity(self, module): 785 786 "Make 'module' valid in the cache, but invalidate accessing modules." 787 788 accessing = self.accessing_modules.get(module.name) 789 if accessing: 790 self.invalidated.update(accessing) 791 if module.name in self.invalidated: 792 self.invalidated.remove(module.name) 793 794 def source_is_new(self, filename, module_name): 795 796 "Return whether 'filename' is newer than the cached 'module_name'." 797 798 if self.cache: 799 cache_filename = join(self.cache, module_name) 800 return not exists(cache_filename) or \ 801 getmtime(filename) > getmtime(cache_filename) or \ 802 module_name in self.invalidated 803 else: 804 return True 805 806 def load_from_cache(self, filename, module_name): 807 808 "Return a module residing in the cache." 809 810 module = self.modules.get(module_name) 811 812 if not module and not self.source_is_new(filename, module_name): 813 module = CachedModule(module_name, self) 814 self.add_module(module_name, module) 815 816 filename = join(self.cache, module_name) 817 self._load(module, module_name, lambda m: m.from_cache, filename) 818 819 return module 820 821 def _load(self, module, module_name, fn, filename): 822 823 """ 824 Load 'module' for the given 'module_name', and with 'fn' performing an 825 invocation on the module with the given 'filename'. 826 """ 827 828 # Load the module. 829 830 if self.verbose: 831 print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename 832 fn(module)(filename) 833 834 # Add the module object if not already defined. 835 836 if not self.objects.has_key(module_name): 837 self.objects[module_name] = Reference("<module>", module_name) 838 839 def add_module(self, module_name, module): 840 841 """ 842 Return the module with the given 'module_name', adding a new module 843 object if one does not already exist. 844 """ 845 846 self.modules[module_name] = module 847 if module_name in self.to_import: 848 self.to_import.remove(module_name) 849 850 # vim: tabstop=4 expandtab shiftwidth=4