1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_locals = {} 73 self.function_targets = {} 74 self.function_arguments = {} 75 76 # Unresolved names. 77 78 self.missing = set() 79 80 # Derived information. 81 82 self.subclasses = {} 83 84 # Attributes of different object types. 85 86 self.all_class_attrs = {} 87 self.all_instance_attrs = {} 88 self.all_instance_attr_constants = {} 89 self.all_combined_attrs = {} 90 self.all_module_attrs = {} 91 self.all_shadowed_attrs = {} 92 93 # References to external names and aliases within program units. 94 95 self.all_name_references = {} 96 self.all_initialised_names = {} 97 self.all_aliased_names = {} 98 99 # General attribute accesses. 100 101 self.all_attr_accesses = {} 102 self.all_const_accesses = {} 103 self.all_attr_access_modifiers = {} 104 105 # Constant literals and values. 106 107 self.all_constants = {} 108 self.all_constant_values = {} 109 110 self.make_cache() 111 112 def make_cache(self): 113 if self.cache and not exists(self.cache): 114 makedirs(self.cache) 115 116 def check_cache(self, details): 117 118 """ 119 Check whether the cache applies for the given 'details', invalidating it 120 if it does not. 121 """ 122 123 recorded_details = self.get_cache_details() 124 125 if recorded_details != details: 126 self.remove_cache() 127 128 writefile(self.get_cache_details_filename(), details) 129 130 def get_cache_details_filename(self): 131 132 "Return the filename for the cache details." 133 134 return join(self.cache, "$details") 135 136 def get_cache_details(self): 137 138 "Return details of the cache." 139 140 details_filename = self.get_cache_details_filename() 141 142 if not exists(details_filename): 143 return None 144 else: 145 return readfile(details_filename) 146 147 def remove_cache(self): 148 149 "Remove the contents of the cache." 150 151 for filename in listdir(self.cache): 152 remove(join(self.cache, filename)) 153 154 def to_cache(self): 155 156 "Write modules to the cache." 157 158 if self.cache: 159 for module_name, module in self.modules.items(): 160 module.to_cache(join(self.cache, module_name)) 161 162 # Object retrieval and storage. 163 164 def get_object(self, name): 165 166 """ 167 Return a reference for the given 'name' or None if no such object 168 exists. 169 """ 170 171 return self.objects.get(name) 172 173 def set_object(self, name, value=None): 174 175 "Set the object with the given 'name' and the given 'value'." 176 177 if isinstance(value, Reference): 178 ref = value.alias(name) 179 else: 180 ref = Reference(value, name) 181 182 self.objects[name] = ref 183 184 # Identification of both stored object names and name references. 185 186 def identify(self, name): 187 188 "Identify 'name' using stored object and external name records." 189 190 return self.objects.get(name) or self.all_name_references.get(name) 191 192 # Indirect object retrieval. 193 194 def get_attributes(self, ref, attrname): 195 196 """ 197 Return attributes provided by 'ref' for 'attrname'. Class attributes 198 may be provided by instances. 199 """ 200 201 kind = ref.get_kind() 202 if kind == "<class>": 203 ref = self.get_class_attribute(ref.get_origin(), attrname) 204 return ref and set([ref]) or set() 205 elif kind == "<instance>": 206 return self.get_combined_attributes(ref.get_origin(), attrname) 207 elif kind == "<module>": 208 ref = self.get_module_attribute(ref.get_origin(), attrname) 209 return ref and set([ref]) or set() 210 else: 211 return set() 212 213 def get_class_attribute(self, object_type, attrname): 214 215 "Return from 'object_type' the details of class attribute 'attrname'." 216 217 attrs = self.all_class_attrs.get(object_type) 218 attr = attrs and attrs.get(attrname) 219 return attr and self.get_object(attr) 220 221 def get_instance_attributes(self, object_type, attrname): 222 223 """ 224 Return from 'object_type' the details of instance attribute 'attrname'. 225 """ 226 227 consts = self.all_instance_attr_constants.get(object_type) 228 attrs = set() 229 for attr in self.all_instance_attrs[object_type].get(attrname, []): 230 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 231 return attrs 232 233 def get_combined_attributes(self, object_type, attrname): 234 235 """ 236 Return from 'object_type' the details of class or instance attribute 237 'attrname'. 238 """ 239 240 ref = self.get_class_attribute(object_type, attrname) 241 refs = ref and set([ref]) or set() 242 refs.update(self.get_instance_attributes(object_type, attrname)) 243 return refs 244 245 def get_module_attribute(self, object_type, attrname): 246 247 "Return from 'object_type' the details of module attribute 'attrname'." 248 249 if attrname in self.all_module_attrs[object_type]: 250 return self.get_object("%s.%s" % (object_type, attrname)) 251 else: 252 return None 253 254 # Convenience methods for deducing which kind of object provided an 255 # attribute. 256 257 def get_attribute_provider(self, ref, attrname): 258 259 """ 260 Return the kind of provider of the attribute accessed via 'ref' using 261 'attrname'. 262 """ 263 264 kind = ref.get_kind() 265 266 if kind in ["<class>", "<module>"]: 267 return kind 268 else: 269 return self.get_instance_attribute_provider(ref.get_origin(), attrname) 270 271 def get_instance_attribute_provider(self, object_type, attrname): 272 273 """ 274 Return the kind of provider of the attribute accessed via an instance of 275 'object_type' using 'attrname'. 276 """ 277 278 if self.get_class_attribute(object_type, attrname): 279 return "<class>" 280 else: 281 return "<instance>" 282 283 # Module management. 284 285 def queue_module(self, name, accessor, required=False): 286 287 """ 288 Queue the module with the given 'name' for import from the given 289 'accessor' module. If 'required' is true (it is false by default), the 290 module will be required in the final program. 291 """ 292 293 if not self.modules.has_key(name): 294 self.to_import.add(name) 295 296 if required: 297 self.required.add(name) 298 299 init_item(self.accessing_modules, name, set) 300 self.accessing_modules[name].add(accessor.name) 301 302 def get_modules(self): 303 304 "Return all modules known to the importer." 305 306 return self.modules.values() 307 308 def get_module(self, name): 309 310 "Return the module with the given 'name'." 311 312 if not self.modules.has_key(name): 313 return None 314 315 return self.modules[name] 316 317 # Program operations. 318 319 def initialise(self, filename, reset=False): 320 321 """ 322 Initialise a program whose main module is 'filename', resetting the 323 cache if 'reset' is true. Return the main module. 324 """ 325 326 if reset: 327 self.remove_cache() 328 self.check_cache(filename) 329 330 # Load the program itself. 331 332 m = self.load_from_file(filename) 333 334 # Load any queued modules. 335 336 while self.to_import: 337 for name in list(self.to_import): # avoid mutation issue 338 self.load(name) 339 340 # Resolve dependencies between modules. 341 342 self.resolve() 343 344 # Record the type of all classes. 345 346 self.type_ref = self.get_object("__builtins__.type") 347 348 # Resolve dependencies within the program. 349 350 for module in self.modules.values(): 351 module.complete() 352 353 # Remove unneeded modules. 354 355 all_modules = self.modules.items() 356 357 for name, module in all_modules: 358 if name not in self.required: 359 module.unpropagate() 360 del self.modules[name] 361 self.removed[name] = module 362 363 # Collect redundant objects. 364 365 for module in self.removed.values(): 366 module.collect() 367 368 # Assert module objects where aliases have been removed. 369 370 for name in self.required: 371 if not self.objects.has_key(name): 372 self.objects[name] = Reference("<module>", name) 373 374 return m 375 376 def finalise(self): 377 378 """ 379 Finalise the inspected program, returning whether the program could be 380 finalised. 381 """ 382 383 self.finalise_classes() 384 self.to_cache() 385 386 if self.missing: 387 return False 388 389 self.set_class_types() 390 self.define_instantiators() 391 self.collect_constants() 392 393 return True 394 395 # Supporting operations. 396 397 def resolve(self): 398 399 "Resolve dependencies between modules." 400 401 self.waiting = {} 402 self.depends = {} 403 404 for module in self.modules.values(): 405 406 # Resolve all deferred references in each module. 407 408 original_deferred = [] 409 410 for ref in module.deferred: 411 412 # Retain original references for caching. 413 414 original_deferred.append(ref.copy()) 415 416 # Update references throughout the program. 417 418 found = self.find_dependency(ref) 419 if not found: 420 self.missing.add((module.name, ref.get_origin())) 421 422 # Record the resolved names and identify required modules. 423 424 else: 425 # Find the providing module of this reference. 426 # Where definitive details of the origin cannot be found, 427 # identify the provider using the deferred reference. 428 # NOTE: This may need to test for static origins. 429 430 provider = self.get_module_provider(found.unresolved() and ref or found) 431 ref.mutate(found) 432 433 # Record any external dependency. 434 435 if provider and provider != module.name: 436 437 # Record the provider dependency. 438 439 module.required.add(provider) 440 self.accessing_modules[provider].add(module.name) 441 442 # Postpone any inclusion of the provider until this 443 # module becomes required. 444 445 if module.name not in self.required: 446 init_item(self.waiting, module.name, set) 447 self.waiting[module.name].add(provider) 448 449 # Make this module required in the accessing module. 450 451 elif provider not in self.required: 452 self.required.add(provider) 453 if self.verbose: 454 print >>sys.stderr, "Requiring", provider, "for", ref 455 456 # Record a module ordering dependency. 457 458 if not found.static() or self.uses_dynamic_callable(found): 459 init_item(self.depends, module.name, set) 460 self.depends[module.name].add(provider) 461 462 module.deferred = original_deferred 463 464 # Check modules again to see if they are now required and should now 465 # cause the inclusion of other modules providing objects to the program. 466 467 for module_name in self.waiting.keys(): 468 self.require_providers(module_name) 469 470 def require_providers(self, module_name): 471 472 """ 473 Test if 'module_name' is itself required and, if so, require modules 474 containing objects provided to the module. 475 """ 476 477 if module_name in self.required and self.waiting.has_key(module_name): 478 for provider in self.waiting[module_name]: 479 if provider not in self.required: 480 self.required.add(provider) 481 if self.verbose: 482 print >>sys.stderr, "Requiring", provider 483 self.require_providers(provider) 484 485 def uses_dynamic_callable(self, ref): 486 487 """ 488 Return whether 'ref' refers to a callable employing defaults that may 489 need initialising before the callable can be used. 490 """ 491 492 # Find the function or method associated with the reference. 493 494 if ref.has_kind("<function>"): 495 origin = ref.get_origin() 496 elif ref.has_kind("<class>"): 497 origin = "%s.__init__" % ref.get_origin() 498 else: 499 return False 500 501 # Find any defaults for the function or method. 502 503 defaults = self.function_defaults.get(origin) 504 if not defaults: 505 return False 506 507 # Identify non-constant defaults. 508 509 for name, ref in defaults: 510 if not ref.is_constant_alias(): 511 return True 512 513 return False 514 515 def order_modules(self): 516 517 "Produce a module initialisation ordering." 518 519 self.check_ordering() 520 521 module_names = self.modules.keys() 522 523 # Record the number of modules using or depending on each module. 524 525 usage = {} 526 527 for module_name in module_names: 528 usage[module_name] = 0 529 530 for module_name, depend_names in self.depends.items(): 531 if module_name in module_names: 532 for depend_name in depend_names: 533 if depend_name in module_names: 534 usage[depend_name] += 1 535 536 # Produce an ordering by obtaining exposed modules (required by modules 537 # already processed) and putting them at the start of the list. 538 539 ordered = [] 540 541 while usage: 542 for module_name, n in usage.items(): 543 if n == 0: 544 ordered.insert(0, module_name) 545 module_names = self.depends.get(module_name) 546 547 # Reduce usage of the referenced modules. 548 549 if module_names: 550 for name in module_names: 551 usage[name] -= 1 552 553 del usage[module_name] 554 555 ordered.remove("__main__") 556 ordered.append("__main__") 557 return ordered 558 559 def check_ordering(self): 560 561 "Check the ordering dependencies." 562 563 for module_name, modules in self.depends.items(): 564 for provider in modules: 565 if self.depends.has_key(provider) and module_name in self.depends[provider]: 566 raise ProgramError, "Modules %s and %s may not depend on each other for non-static objects." % (module_name, provider) 567 568 def find_dependency(self, ref): 569 570 "Find the ultimate dependency for 'ref'." 571 572 found = set() 573 while ref and ref.has_kind("<depends>") and not ref in found: 574 found.add(ref) 575 ref = self.identify(ref.get_origin()) 576 return ref 577 578 def get_module_provider(self, ref): 579 580 "Identify the provider of the given 'ref'." 581 582 for ancestor in ref.ancestors(): 583 if self.modules.has_key(ancestor): 584 return ancestor 585 return None 586 587 def finalise_classes(self): 588 589 "Finalise the class relationships and attributes." 590 591 self.derive_inherited_attrs() 592 self.derive_subclasses() 593 self.derive_shadowed_attrs() 594 595 def derive_inherited_attrs(self): 596 597 "Derive inherited attributes for classes throughout the program." 598 599 for name in self.classes.keys(): 600 self.propagate_attrs_for_class(name) 601 602 def propagate_attrs_for_class(self, name, visited=None): 603 604 "Propagate inherited attributes for class 'name'." 605 606 # Visit classes only once. 607 608 if self.all_combined_attrs.has_key(name): 609 return 610 611 visited = visited or [] 612 613 if name in visited: 614 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 615 616 visited.append(name) 617 618 class_attrs = {} 619 instance_attrs = {} 620 621 # Aggregate the attributes from base classes, recording the origins of 622 # applicable attributes. 623 624 for base in self.classes[name][::-1]: 625 626 # Get the identity of the class from the reference. 627 628 base = base.get_origin() 629 630 # Define the base class completely before continuing with this 631 # class. 632 633 self.propagate_attrs_for_class(base, visited) 634 class_attrs.update(self.all_class_attrs[base]) 635 636 # Instance attribute origins are combined if different. 637 638 for key, values in self.all_instance_attrs[base].items(): 639 init_item(instance_attrs, key, set) 640 instance_attrs[key].update(values) 641 642 # Class attributes override those defined earlier in the hierarchy. 643 644 class_attrs.update(self.all_class_attrs.get(name, {})) 645 646 # Instance attributes are merely added if not already defined. 647 648 for key in self.all_instance_attrs.get(name, []): 649 if not instance_attrs.has_key(key): 650 instance_attrs[key] = set(["%s.%s" % (name, key)]) 651 652 self.all_class_attrs[name] = class_attrs 653 self.all_instance_attrs[name] = instance_attrs 654 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 655 656 def derive_subclasses(self): 657 658 "Derive subclass details for classes." 659 660 for name, bases in self.classes.items(): 661 for base in bases: 662 663 # Get the identity of the class from the reference. 664 665 base = base.get_origin() 666 self.subclasses[base].add(name) 667 668 def derive_shadowed_attrs(self): 669 670 "Derive shadowed attributes for classes." 671 672 for name, attrs in self.all_instance_attrs.items(): 673 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 674 if attrs: 675 self.all_shadowed_attrs[name] = attrs 676 677 def set_class_types(self): 678 679 "Set the type of each class." 680 681 for attrs in self.all_class_attrs.values(): 682 attrs["__class__"] = self.type_ref.get_origin() 683 684 def define_instantiators(self): 685 686 """ 687 Consolidate parameter and default details, incorporating initialiser 688 details to define instantiator signatures. 689 """ 690 691 for cls, attrs in self.all_class_attrs.items(): 692 initialiser = attrs["__init__"] 693 self.function_parameters[cls] = self.function_parameters[initialiser] 694 self.function_defaults[cls] = self.function_defaults[initialiser] 695 696 def collect_constants(self): 697 698 "Get constants from all active modules." 699 700 for module in self.modules.values(): 701 self.all_constants.update(module.constants) 702 703 # Import methods. 704 705 def find_in_path(self, name): 706 707 """ 708 Find the given module 'name' in the search path, returning None where no 709 such module could be found, or a 2-tuple from the 'find' method 710 otherwise. 711 """ 712 713 for d in self.path: 714 m = self.find(d, name) 715 if m: return m 716 return None 717 718 def find(self, d, name): 719 720 """ 721 In the directory 'd', find the given module 'name', where 'name' can 722 either refer to a single file module or to a package. Return None if the 723 'name' cannot be associated with either a file or a package directory, 724 or a 2-tuple from '_find_package' or '_find_module' otherwise. 725 """ 726 727 m = self._find_package(d, name) 728 if m: return m 729 m = self._find_module(d, name) 730 if m: return m 731 return None 732 733 def _find_module(self, d, name): 734 735 """ 736 In the directory 'd', find the given module 'name', returning None where 737 no suitable file exists in the directory, or a 2-tuple consisting of 738 None (indicating that no package directory is involved) and a filename 739 indicating the location of the module. 740 """ 741 742 name_py = name + extsep + "py" 743 filename = self._find_file(d, name_py) 744 if filename: 745 return None, filename 746 return None 747 748 def _find_package(self, d, name): 749 750 """ 751 In the directory 'd', find the given package 'name', returning None 752 where no suitable package directory exists, or a 2-tuple consisting of 753 a directory (indicating the location of the package directory itself) 754 and a filename indicating the location of the __init__.py module which 755 declares the package's top-level contents. 756 """ 757 758 filename = self._find_file(d, name) 759 if filename: 760 init_py = "__init__" + extsep + "py" 761 init_py_filename = self._find_file(filename, init_py) 762 if init_py_filename: 763 return filename, init_py_filename 764 return None 765 766 def _find_file(self, d, filename): 767 768 """ 769 Return the filename obtained when searching the directory 'd' for the 770 given 'filename', or None if no actual file exists for the filename. 771 """ 772 773 filename = join(d, filename) 774 if exists(filename): 775 return filename 776 else: 777 return None 778 779 def load(self, name): 780 781 """ 782 Load the module or package with the given 'name'. Return an object 783 referencing the loaded module or package, or None if no such module or 784 package exists. 785 """ 786 787 # Loaded modules are returned immediately. 788 # Modules may be known but not yet loading (having been registered as 789 # submodules), loading, loaded, or completely unknown. 790 791 module = self.get_module(name) 792 793 if module: 794 return self.modules[name] 795 796 # Otherwise, modules are loaded. 797 798 # Split the name into path components, and try to find the uppermost in 799 # the search path. 800 801 path = name.split(".") 802 path_so_far = [] 803 module = None 804 805 for p in path: 806 807 # Get the module's filesystem details. 808 809 if not path_so_far: 810 m = self.find_in_path(p) 811 elif d: 812 m = self.find(d, p) 813 else: 814 m = None 815 816 path_so_far.append(p) 817 module_name = ".".join(path_so_far) 818 819 # Return None if the module could not be located. 820 821 if not m: 822 if self.verbose: 823 print >>sys.stderr, "Not found (%s)" % name 824 return None 825 826 # Get the directory and module filename. 827 828 d, filename = m 829 830 # Get the module itself. 831 832 return self.load_from_file(filename, module_name) 833 834 def load_from_file(self, filename, module_name=None): 835 836 "Load the module from the given 'filename'." 837 838 if module_name is None: 839 module_name = "__main__" 840 841 module = self.modules.get(module_name) 842 843 if not module: 844 845 # Try to load from cache. 846 847 module = self.load_from_cache(filename, module_name) 848 if module: 849 return module 850 851 # If no cache entry exists, load from file. 852 853 module = inspector.InspectedModule(module_name, self) 854 self.add_module(module_name, module) 855 self.update_cache_validity(module) 856 857 self._load(module, module_name, lambda m: m.parse, filename) 858 859 return module 860 861 def update_cache_validity(self, module): 862 863 "Make 'module' valid in the cache, but invalidate accessing modules." 864 865 accessing = self.accessing_modules.get(module.name) 866 if accessing: 867 self.invalidated.update(accessing) 868 if module.name in self.invalidated: 869 self.invalidated.remove(module.name) 870 871 def source_is_new(self, filename, module_name): 872 873 "Return whether 'filename' is newer than the cached 'module_name'." 874 875 if self.cache: 876 cache_filename = join(self.cache, module_name) 877 return not exists(cache_filename) or \ 878 getmtime(filename) > getmtime(cache_filename) or \ 879 module_name in self.invalidated 880 else: 881 return True 882 883 def load_from_cache(self, filename, module_name): 884 885 "Return a module residing in the cache." 886 887 module = self.modules.get(module_name) 888 889 if not module and not self.source_is_new(filename, module_name): 890 module = CachedModule(module_name, self) 891 self.add_module(module_name, module) 892 893 filename = join(self.cache, module_name) 894 self._load(module, module_name, lambda m: m.from_cache, filename) 895 896 return module 897 898 def _load(self, module, module_name, fn, filename): 899 900 """ 901 Load 'module' for the given 'module_name', and with 'fn' performing an 902 invocation on the module with the given 'filename'. 903 """ 904 905 # Load the module. 906 907 if self.verbose: 908 print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename 909 fn(module)(filename) 910 911 # Add the module object if not already defined. 912 913 if not self.objects.has_key(module_name): 914 self.objects[module_name] = Reference("<module>", module_name) 915 916 def add_module(self, module_name, module): 917 918 """ 919 Return the module with the given 'module_name', adding a new module 920 object if one does not already exist. 921 """ 922 923 self.modules[module_name] = module 924 if module_name in self.to_import: 925 self.to_import.remove(module_name) 926 927 # vim: tabstop=4 expandtab shiftwidth=4