1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_locals = {} 73 self.function_targets = {} 74 self.function_arguments = {} 75 76 # Unresolved names. 77 78 self.missing = set() 79 80 # Derived information. 81 82 self.subclasses = {} 83 84 # Attributes of different object types. 85 86 self.all_class_attrs = {} 87 self.all_instance_attrs = {} 88 self.all_instance_attr_constants = {} 89 self.all_combined_attrs = {} 90 self.all_module_attrs = {} 91 self.all_shadowed_attrs = {} 92 93 # References to external names and aliases within program units. 94 95 self.all_name_references = {} 96 self.all_initialised_names = {} 97 self.all_aliased_names = {} 98 99 # General attribute accesses. 100 101 self.all_attr_accesses = {} 102 self.all_const_accesses = {} 103 self.all_attr_access_modifiers = {} 104 105 # Constant literals and values. 106 107 self.all_constants = {} 108 self.all_constant_values = {} 109 110 self.make_cache() 111 112 def make_cache(self): 113 if self.cache and not exists(self.cache): 114 makedirs(self.cache) 115 116 def check_cache(self, details): 117 118 """ 119 Check whether the cache applies for the given 'details', invalidating it 120 if it does not. 121 """ 122 123 recorded_details = self.get_cache_details() 124 125 if recorded_details != details: 126 self.remove_cache() 127 128 writefile(self.get_cache_details_filename(), details) 129 130 def get_cache_details_filename(self): 131 132 "Return the filename for the cache details." 133 134 return join(self.cache, "$details") 135 136 def get_cache_details(self): 137 138 "Return details of the cache." 139 140 details_filename = self.get_cache_details_filename() 141 142 if not exists(details_filename): 143 return None 144 else: 145 return readfile(details_filename) 146 147 def remove_cache(self): 148 149 "Remove the contents of the cache." 150 151 for filename in listdir(self.cache): 152 remove(join(self.cache, filename)) 153 154 def to_cache(self): 155 156 "Write modules to the cache." 157 158 if self.cache: 159 for module_name, module in self.modules.items(): 160 module.to_cache(join(self.cache, module_name)) 161 162 # Object retrieval and storage. 163 164 def get_object(self, name): 165 166 """ 167 Return a reference for the given 'name' or None if no such object 168 exists. 169 """ 170 171 return self.objects.get(name) 172 173 def set_object(self, name, value=None): 174 175 "Set the object with the given 'name' and the given 'value'." 176 177 if isinstance(value, Reference): 178 ref = value.alias(name) 179 else: 180 ref = Reference(value, name) 181 182 self.objects[name] = ref 183 184 # Identification of both stored object names and name references. 185 186 def identify(self, name): 187 188 "Identify 'name' using stored object and external name records." 189 190 return self.objects.get(name) or self.all_name_references.get(name) 191 192 # Indirect object retrieval. 193 194 def get_attributes(self, ref, attrname): 195 196 """ 197 Return attributes provided by 'ref' for 'attrname'. Class attributes 198 may be provided by instances. 199 """ 200 201 kind = ref.get_kind() 202 if kind == "<class>": 203 ref = self.get_class_attribute(ref.get_origin(), attrname) 204 return ref and set([ref]) or set() 205 elif kind == "<instance>": 206 return self.get_combined_attributes(ref.get_origin(), attrname) 207 elif kind == "<module>": 208 ref = self.get_module_attribute(ref.get_origin(), attrname) 209 return ref and set([ref]) or set() 210 else: 211 return set() 212 213 def get_class_attribute(self, object_type, attrname): 214 215 "Return from 'object_type' the details of class attribute 'attrname'." 216 217 attr = self.all_class_attrs[object_type].get(attrname) 218 return attr and self.get_object(attr) 219 220 def get_instance_attributes(self, object_type, attrname): 221 222 """ 223 Return from 'object_type' the details of instance attribute 'attrname'. 224 """ 225 226 consts = self.all_instance_attr_constants.get(object_type) 227 attrs = set() 228 for attr in self.all_instance_attrs[object_type].get(attrname, []): 229 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 230 return attrs 231 232 def get_combined_attributes(self, object_type, attrname): 233 234 """ 235 Return from 'object_type' the details of class or instance attribute 236 'attrname'. 237 """ 238 239 ref = self.get_class_attribute(object_type, attrname) 240 refs = ref and set([ref]) or set() 241 refs.update(self.get_instance_attributes(object_type, attrname)) 242 return refs 243 244 def get_module_attribute(self, object_type, attrname): 245 246 "Return from 'object_type' the details of module attribute 'attrname'." 247 248 if attrname in self.all_module_attrs[object_type]: 249 return self.get_object("%s.%s" % (object_type, attrname)) 250 else: 251 return None 252 253 # Convenience methods for deducing which kind of object provided an 254 # attribute. 255 256 def get_attribute_provider(self, ref, attrname): 257 258 """ 259 Return the kind of provider of the attribute accessed via 'ref' using 260 'attrname'. 261 """ 262 263 kind = ref.get_kind() 264 265 if kind in ["<class>", "<module>"]: 266 return kind 267 else: 268 return self.get_instance_attribute_provider(ref.get_origin(), attrname) 269 270 def get_instance_attribute_provider(self, object_type, attrname): 271 272 """ 273 Return the kind of provider of the attribute accessed via an instance of 274 'object_type' using 'attrname'. 275 """ 276 277 if self.get_class_attribute(object_type, attrname): 278 return "<class>" 279 else: 280 return "<instance>" 281 282 # Module management. 283 284 def queue_module(self, name, accessor, required=False): 285 286 """ 287 Queue the module with the given 'name' for import from the given 288 'accessor' module. If 'required' is true (it is false by default), the 289 module will be required in the final program. 290 """ 291 292 if not self.modules.has_key(name): 293 self.to_import.add(name) 294 295 if required: 296 self.required.add(name) 297 298 init_item(self.accessing_modules, name, set) 299 self.accessing_modules[name].add(accessor.name) 300 301 def get_modules(self): 302 303 "Return all modules known to the importer." 304 305 return self.modules.values() 306 307 def get_module(self, name): 308 309 "Return the module with the given 'name'." 310 311 if not self.modules.has_key(name): 312 return None 313 314 return self.modules[name] 315 316 # Program operations. 317 318 def initialise(self, filename, reset=False): 319 320 """ 321 Initialise a program whose main module is 'filename', resetting the 322 cache if 'reset' is true. Return the main module. 323 """ 324 325 if reset: 326 self.remove_cache() 327 self.check_cache(filename) 328 329 # Load the program itself. 330 331 m = self.load_from_file(filename) 332 333 # Load any queued modules. 334 335 while self.to_import: 336 for name in list(self.to_import): # avoid mutation issue 337 self.load(name) 338 339 # Resolve dependencies between modules. 340 341 self.resolve() 342 343 # Record the type of all classes. 344 345 self.type_ref = self.get_object("__builtins__.type") 346 347 # Resolve dependencies within the program. 348 349 for module in self.modules.values(): 350 module.complete() 351 352 # Remove unneeded modules. 353 354 all_modules = self.modules.items() 355 356 for name, module in all_modules: 357 if name not in self.required: 358 module.unpropagate() 359 del self.modules[name] 360 self.removed[name] = module 361 362 # Collect redundant objects. 363 364 for module in self.removed.values(): 365 module.collect() 366 367 # Assert module objects where aliases have been removed. 368 369 for name in self.required: 370 if not self.objects.has_key(name): 371 self.objects[name] = Reference("<module>", name) 372 373 return m 374 375 def finalise(self): 376 377 """ 378 Finalise the inspected program, returning whether the program could be 379 finalised. 380 """ 381 382 if self.missing: 383 return False 384 385 self.finalise_classes() 386 self.to_cache() 387 self.set_class_types() 388 self.define_instantiators() 389 self.collect_constants() 390 391 return True 392 393 # Supporting operations. 394 395 def resolve(self): 396 397 "Resolve dependencies between modules." 398 399 self.waiting = {} 400 self.depends = {} 401 402 for module in self.modules.values(): 403 404 # Resolve all deferred references in each module. 405 406 for ref in module.deferred: 407 found = self.find_dependency(ref) 408 if not found: 409 self.missing.add((module.name, ref.get_origin())) 410 411 # Record the resolved names and identify required modules. 412 413 else: 414 # Find the providing module of this reference. 415 # Where definitive details of the origin cannot be found, 416 # identify the provider using the deferred reference. 417 # NOTE: This may need to test for static origins. 418 419 provider = self.get_module_provider(found.unresolved() and ref or found) 420 ref.mutate(found) 421 422 # Record any external dependency. 423 424 if provider and provider != module.name: 425 426 # Record the provider dependency. 427 428 module.required.add(provider) 429 self.accessing_modules[provider].add(module.name) 430 431 # Postpone any inclusion of the provider until this 432 # module becomes required. 433 434 if module.name not in self.required: 435 init_item(self.waiting, module.name, set) 436 self.waiting[module.name].add(provider) 437 438 # Make this module required in the accessing module. 439 440 elif provider not in self.required: 441 self.required.add(provider) 442 if self.verbose: 443 print >>sys.stderr, "Requiring", provider, "for", ref 444 445 # Record a module ordering dependency. 446 447 if not found.static(): 448 init_item(self.depends, module.name, set) 449 self.depends[module.name].add(provider) 450 451 # Check modules again to see if they are now required and should now 452 # cause the inclusion of other modules providing objects to the program. 453 454 for module_name in self.waiting.keys(): 455 self.require_providers(module_name) 456 457 def require_providers(self, module_name): 458 459 """ 460 Test if 'module_name' is itself required and, if so, require modules 461 containing objects provided to the module. 462 """ 463 464 if module_name in self.required and self.waiting.has_key(module_name): 465 for provider in self.waiting[module_name]: 466 if provider not in self.required: 467 self.required.add(provider) 468 if self.verbose: 469 print >>sys.stderr, "Requiring", provider 470 self.require_providers(provider) 471 472 def order_modules(self): 473 474 "Produce a module initialisation ordering." 475 476 self.check_ordering() 477 478 module_names = self.modules.keys() 479 480 # Record the number of modules using or depending on each module. 481 482 usage = {} 483 484 for module_name in module_names: 485 usage[module_name] = 0 486 487 for module_name, depend_names in self.depends.items(): 488 if module_name in module_names: 489 for depend_name in depend_names: 490 if depend_name in module_names: 491 usage[depend_name] += 1 492 493 # Produce an ordering by obtaining exposed modules (required by modules 494 # already processed) and putting them at the start of the list. 495 496 ordered = [] 497 498 while usage: 499 for module_name, n in usage.items(): 500 if n == 0: 501 ordered.insert(0, module_name) 502 module_names = self.depends.get(module_name) 503 504 # Reduce usage of the referenced modules. 505 506 if module_names: 507 for name in module_names: 508 usage[name] -= 1 509 510 del usage[module_name] 511 512 ordered.remove("__main__") 513 ordered.append("__main__") 514 return ordered 515 516 def check_ordering(self): 517 518 "Check the ordering dependencies." 519 520 for module_name, modules in self.depends.items(): 521 for provider in modules: 522 if self.depends.has_key(provider) and module_name in self.depends[provider]: 523 raise ProgramError, "Modules %s and %s may not depend on each other for non-static objects." % (module_name, provider) 524 525 def find_dependency(self, ref): 526 527 "Find the ultimate dependency for 'ref'." 528 529 found = set() 530 while ref and ref.has_kind("<depends>") and not ref in found: 531 found.add(ref) 532 ref = self.identify(ref.get_origin()) 533 return ref 534 535 def get_module_provider(self, ref): 536 537 "Identify the provider of the given 'ref'." 538 539 for ancestor in ref.ancestors(): 540 if self.modules.has_key(ancestor): 541 return ancestor 542 return None 543 544 def finalise_classes(self): 545 546 "Finalise the class relationships and attributes." 547 548 self.derive_inherited_attrs() 549 self.derive_subclasses() 550 self.derive_shadowed_attrs() 551 552 def derive_inherited_attrs(self): 553 554 "Derive inherited attributes for classes throughout the program." 555 556 for name in self.classes.keys(): 557 self.propagate_attrs_for_class(name) 558 559 def propagate_attrs_for_class(self, name, visited=None): 560 561 "Propagate inherited attributes for class 'name'." 562 563 # Visit classes only once. 564 565 if self.all_combined_attrs.has_key(name): 566 return 567 568 visited = visited or [] 569 570 if name in visited: 571 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 572 573 visited.append(name) 574 575 class_attrs = {} 576 instance_attrs = {} 577 578 # Aggregate the attributes from base classes, recording the origins of 579 # applicable attributes. 580 581 for base in self.classes[name][::-1]: 582 583 # Get the identity of the class from the reference. 584 585 base = base.get_origin() 586 587 # Define the base class completely before continuing with this 588 # class. 589 590 self.propagate_attrs_for_class(base, visited) 591 class_attrs.update(self.all_class_attrs[base]) 592 593 # Instance attribute origins are combined if different. 594 595 for key, values in self.all_instance_attrs[base].items(): 596 init_item(instance_attrs, key, set) 597 instance_attrs[key].update(values) 598 599 # Class attributes override those defined earlier in the hierarchy. 600 601 class_attrs.update(self.all_class_attrs.get(name, {})) 602 603 # Instance attributes are merely added if not already defined. 604 605 for key in self.all_instance_attrs.get(name, []): 606 if not instance_attrs.has_key(key): 607 instance_attrs[key] = set(["%s.%s" % (name, key)]) 608 609 self.all_class_attrs[name] = class_attrs 610 self.all_instance_attrs[name] = instance_attrs 611 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 612 613 def derive_subclasses(self): 614 615 "Derive subclass details for classes." 616 617 for name, bases in self.classes.items(): 618 for base in bases: 619 620 # Get the identity of the class from the reference. 621 622 base = base.get_origin() 623 self.subclasses[base].add(name) 624 625 def derive_shadowed_attrs(self): 626 627 "Derive shadowed attributes for classes." 628 629 for name, attrs in self.all_instance_attrs.items(): 630 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 631 if attrs: 632 self.all_shadowed_attrs[name] = attrs 633 634 def set_class_types(self): 635 636 "Set the type of each class." 637 638 for attrs in self.all_class_attrs.values(): 639 attrs["__class__"] = self.type_ref.get_origin() 640 641 def define_instantiators(self): 642 643 """ 644 Consolidate parameter and default details, incorporating initialiser 645 details to define instantiator signatures. 646 """ 647 648 for cls, attrs in self.all_class_attrs.items(): 649 initialiser = attrs["__init__"] 650 self.function_parameters[cls] = self.function_parameters[initialiser] 651 self.function_defaults[cls] = self.function_defaults[initialiser] 652 653 def collect_constants(self): 654 655 "Get constants from all active modules." 656 657 for module in self.modules.values(): 658 self.all_constants.update(module.constants) 659 660 # Import methods. 661 662 def find_in_path(self, name): 663 664 """ 665 Find the given module 'name' in the search path, returning None where no 666 such module could be found, or a 2-tuple from the 'find' method 667 otherwise. 668 """ 669 670 for d in self.path: 671 m = self.find(d, name) 672 if m: return m 673 return None 674 675 def find(self, d, name): 676 677 """ 678 In the directory 'd', find the given module 'name', where 'name' can 679 either refer to a single file module or to a package. Return None if the 680 'name' cannot be associated with either a file or a package directory, 681 or a 2-tuple from '_find_package' or '_find_module' otherwise. 682 """ 683 684 m = self._find_package(d, name) 685 if m: return m 686 m = self._find_module(d, name) 687 if m: return m 688 return None 689 690 def _find_module(self, d, name): 691 692 """ 693 In the directory 'd', find the given module 'name', returning None where 694 no suitable file exists in the directory, or a 2-tuple consisting of 695 None (indicating that no package directory is involved) and a filename 696 indicating the location of the module. 697 """ 698 699 name_py = name + extsep + "py" 700 filename = self._find_file(d, name_py) 701 if filename: 702 return None, filename 703 return None 704 705 def _find_package(self, d, name): 706 707 """ 708 In the directory 'd', find the given package 'name', returning None 709 where no suitable package directory exists, or a 2-tuple consisting of 710 a directory (indicating the location of the package directory itself) 711 and a filename indicating the location of the __init__.py module which 712 declares the package's top-level contents. 713 """ 714 715 filename = self._find_file(d, name) 716 if filename: 717 init_py = "__init__" + extsep + "py" 718 init_py_filename = self._find_file(filename, init_py) 719 if init_py_filename: 720 return filename, init_py_filename 721 return None 722 723 def _find_file(self, d, filename): 724 725 """ 726 Return the filename obtained when searching the directory 'd' for the 727 given 'filename', or None if no actual file exists for the filename. 728 """ 729 730 filename = join(d, filename) 731 if exists(filename): 732 return filename 733 else: 734 return None 735 736 def load(self, name): 737 738 """ 739 Load the module or package with the given 'name'. Return an object 740 referencing the loaded module or package, or None if no such module or 741 package exists. 742 """ 743 744 # Loaded modules are returned immediately. 745 # Modules may be known but not yet loading (having been registered as 746 # submodules), loading, loaded, or completely unknown. 747 748 module = self.get_module(name) 749 750 if module: 751 return self.modules[name] 752 753 # Otherwise, modules are loaded. 754 755 # Split the name into path components, and try to find the uppermost in 756 # the search path. 757 758 path = name.split(".") 759 path_so_far = [] 760 module = None 761 762 for p in path: 763 764 # Get the module's filesystem details. 765 766 if not path_so_far: 767 m = self.find_in_path(p) 768 elif d: 769 m = self.find(d, p) 770 else: 771 m = None 772 773 path_so_far.append(p) 774 module_name = ".".join(path_so_far) 775 776 if not m: 777 if self.verbose: 778 print >>sys.stderr, "Not found (%s)" % name 779 780 return None # NOTE: Import error. 781 782 # Get the module itself. 783 784 d, filename = m 785 module = self.load_from_file(filename, module_name) 786 787 return module 788 789 def load_from_file(self, filename, module_name=None): 790 791 "Load the module from the given 'filename'." 792 793 if module_name is None: 794 module_name = "__main__" 795 796 module = self.modules.get(module_name) 797 798 if not module: 799 800 # Try to load from cache. 801 802 module = self.load_from_cache(filename, module_name) 803 if module: 804 return module 805 806 # If no cache entry exists, load from file. 807 808 module = inspector.InspectedModule(module_name, self) 809 self.add_module(module_name, module) 810 self.update_cache_validity(module) 811 812 self._load(module, module_name, lambda m: m.parse, filename) 813 814 return module 815 816 def update_cache_validity(self, module): 817 818 "Make 'module' valid in the cache, but invalidate accessing modules." 819 820 accessing = self.accessing_modules.get(module.name) 821 if accessing: 822 self.invalidated.update(accessing) 823 if module.name in self.invalidated: 824 self.invalidated.remove(module.name) 825 826 def source_is_new(self, filename, module_name): 827 828 "Return whether 'filename' is newer than the cached 'module_name'." 829 830 if self.cache: 831 cache_filename = join(self.cache, module_name) 832 return not exists(cache_filename) or \ 833 getmtime(filename) > getmtime(cache_filename) or \ 834 module_name in self.invalidated 835 else: 836 return True 837 838 def load_from_cache(self, filename, module_name): 839 840 "Return a module residing in the cache." 841 842 module = self.modules.get(module_name) 843 844 if not module and not self.source_is_new(filename, module_name): 845 module = CachedModule(module_name, self) 846 self.add_module(module_name, module) 847 848 filename = join(self.cache, module_name) 849 self._load(module, module_name, lambda m: m.from_cache, filename) 850 851 return module 852 853 def _load(self, module, module_name, fn, filename): 854 855 """ 856 Load 'module' for the given 'module_name', and with 'fn' performing an 857 invocation on the module with the given 'filename'. 858 """ 859 860 # Load the module. 861 862 if self.verbose: 863 print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename 864 fn(module)(filename) 865 866 # Add the module object if not already defined. 867 868 if not self.objects.has_key(module_name): 869 self.objects[module_name] = Reference("<module>", module_name) 870 871 def add_module(self, module_name, module): 872 873 """ 874 Return the module with the given 'module_name', adding a new module 875 object if one does not already exist. 876 """ 877 878 self.modules[module_name] = module 879 if module_name in self.to_import: 880 self.to_import.remove(module_name) 881 882 # vim: tabstop=4 expandtab shiftwidth=4