1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_locals = {} 73 self.function_targets = {} 74 self.function_arguments = {} 75 76 # Unresolved names. 77 78 self.missing = set() 79 80 # Derived information. 81 82 self.subclasses = {} 83 84 # Attributes of different object types. 85 86 self.all_class_attrs = {} 87 self.all_instance_attrs = {} 88 self.all_instance_attr_constants = {} 89 self.all_combined_attrs = {} 90 self.all_module_attrs = {} 91 self.all_shadowed_attrs = {} 92 93 # References to external names and aliases within program units. 94 95 self.all_name_references = {} 96 self.all_initialised_names = {} 97 self.all_aliased_names = {} 98 99 # General attribute accesses. 100 101 self.all_attr_accesses = {} 102 self.all_const_accesses = {} 103 self.all_attr_access_modifiers = {} 104 105 # Constant literals and values. 106 107 self.all_constants = {} 108 self.all_constant_values = {} 109 110 self.make_cache() 111 112 def make_cache(self): 113 if self.cache and not exists(self.cache): 114 makedirs(self.cache) 115 116 def check_cache(self, details): 117 118 """ 119 Check whether the cache applies for the given 'details', invalidating it 120 if it does not. 121 """ 122 123 recorded_details = self.get_cache_details() 124 125 if recorded_details != details: 126 self.remove_cache() 127 128 writefile(self.get_cache_details_filename(), details) 129 130 def get_cache_details_filename(self): 131 132 "Return the filename for the cache details." 133 134 return join(self.cache, "$details") 135 136 def get_cache_details(self): 137 138 "Return details of the cache." 139 140 details_filename = self.get_cache_details_filename() 141 142 if not exists(details_filename): 143 return None 144 else: 145 return readfile(details_filename) 146 147 def remove_cache(self): 148 149 "Remove the contents of the cache." 150 151 for filename in listdir(self.cache): 152 remove(join(self.cache, filename)) 153 154 def to_cache(self): 155 156 "Write modules to the cache." 157 158 if self.cache: 159 for module_name, module in self.modules.items(): 160 module.to_cache(join(self.cache, module_name)) 161 162 # Object retrieval and storage. 163 164 def get_object(self, name): 165 166 """ 167 Return a reference for the given 'name' or None if no such object 168 exists. 169 """ 170 171 return self.objects.get(name) 172 173 def set_object(self, name, value=None): 174 175 "Set the object with the given 'name' and the given 'value'." 176 177 if isinstance(value, Reference): 178 ref = value.alias(name) 179 else: 180 ref = Reference(value, name) 181 182 self.objects[name] = ref 183 184 # Identification of both stored object names and name references. 185 186 def identify(self, name): 187 188 "Identify 'name' using stored object and external name records." 189 190 return self.objects.get(name) or self.all_name_references.get(name) 191 192 # Indirect object retrieval. 193 194 def get_attributes(self, ref, attrname): 195 196 """ 197 Return attributes provided by 'ref' for 'attrname'. Class attributes 198 may be provided by instances. 199 """ 200 201 kind = ref.get_kind() 202 if kind == "<class>": 203 ref = self.get_class_attribute(ref.get_origin(), attrname) 204 return ref and set([ref]) or set() 205 elif kind == "<instance>": 206 return self.get_combined_attributes(ref.get_origin(), attrname) 207 elif kind == "<module>": 208 ref = self.get_module_attribute(ref.get_origin(), attrname) 209 return ref and set([ref]) or set() 210 else: 211 return set() 212 213 def get_class_attribute(self, object_type, attrname): 214 215 "Return from 'object_type' the details of class attribute 'attrname'." 216 217 attrs = self.all_class_attrs.get(object_type) 218 attr = attrs and attrs.get(attrname) 219 return attr and self.get_object(attr) 220 221 def get_instance_attributes(self, object_type, attrname): 222 223 """ 224 Return from 'object_type' the details of instance attribute 'attrname'. 225 """ 226 227 consts = self.all_instance_attr_constants.get(object_type) 228 attrs = set() 229 for attr in self.all_instance_attrs[object_type].get(attrname, []): 230 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 231 return attrs 232 233 def get_combined_attributes(self, object_type, attrname): 234 235 """ 236 Return from 'object_type' the details of class or instance attribute 237 'attrname'. 238 """ 239 240 ref = self.get_class_attribute(object_type, attrname) 241 refs = ref and set([ref]) or set() 242 refs.update(self.get_instance_attributes(object_type, attrname)) 243 return refs 244 245 def get_module_attribute(self, object_type, attrname): 246 247 "Return from 'object_type' the details of module attribute 'attrname'." 248 249 if attrname in self.all_module_attrs[object_type]: 250 return self.get_object("%s.%s" % (object_type, attrname)) 251 else: 252 return None 253 254 # Convenience methods for deducing which kind of object provided an 255 # attribute. 256 257 def get_attribute_provider(self, ref, attrname): 258 259 """ 260 Return the kind of provider of the attribute accessed via 'ref' using 261 'attrname'. 262 """ 263 264 kind = ref.get_kind() 265 266 if kind in ["<class>", "<module>"]: 267 return kind 268 else: 269 return self.get_instance_attribute_provider(ref.get_origin(), attrname) 270 271 def get_instance_attribute_provider(self, object_type, attrname): 272 273 """ 274 Return the kind of provider of the attribute accessed via an instance of 275 'object_type' using 'attrname'. 276 """ 277 278 if self.get_class_attribute(object_type, attrname): 279 return "<class>" 280 else: 281 return "<instance>" 282 283 # Module management. 284 285 def queue_module(self, name, accessor, required=False): 286 287 """ 288 Queue the module with the given 'name' for import from the given 289 'accessor' module. If 'required' is true (it is false by default), the 290 module will be required in the final program. 291 """ 292 293 if not self.modules.has_key(name): 294 self.to_import.add(name) 295 296 if required: 297 self.required.add(name) 298 299 init_item(self.accessing_modules, name, set) 300 self.accessing_modules[name].add(accessor.name) 301 302 def get_modules(self): 303 304 "Return all modules known to the importer." 305 306 return self.modules.values() 307 308 def get_module(self, name): 309 310 "Return the module with the given 'name'." 311 312 if not self.modules.has_key(name): 313 return None 314 315 return self.modules[name] 316 317 # Program operations. 318 319 def initialise(self, filename, reset=False): 320 321 """ 322 Initialise a program whose main module is 'filename', resetting the 323 cache if 'reset' is true. Return the main module. 324 """ 325 326 if reset: 327 self.remove_cache() 328 self.check_cache(filename) 329 330 # Load the program itself. 331 332 m = self.load_from_file(filename) 333 334 # Load any queued modules. 335 336 while self.to_import: 337 for name in list(self.to_import): # avoid mutation issue 338 self.load(name) 339 340 # Resolve dependencies between modules. 341 342 self.resolve() 343 344 # Record the type of all classes. 345 346 self.type_ref = self.get_object("__builtins__.type") 347 348 # Resolve dependencies within the program. 349 350 for module in self.modules.values(): 351 module.complete() 352 353 # Remove unneeded modules. 354 355 all_modules = self.modules.items() 356 357 for name, module in all_modules: 358 if name not in self.required: 359 module.unpropagate() 360 del self.modules[name] 361 self.removed[name] = module 362 363 # Collect redundant objects. 364 365 for module in self.removed.values(): 366 module.collect() 367 368 # Assert module objects where aliases have been removed. 369 370 for name in self.required: 371 if not self.objects.has_key(name): 372 self.objects[name] = Reference("<module>", name) 373 374 return m 375 376 def finalise(self): 377 378 """ 379 Finalise the inspected program, returning whether the program could be 380 finalised. 381 """ 382 383 if self.missing: 384 return False 385 386 self.finalise_classes() 387 self.to_cache() 388 self.set_class_types() 389 self.define_instantiators() 390 self.collect_constants() 391 392 return True 393 394 # Supporting operations. 395 396 def resolve(self): 397 398 "Resolve dependencies between modules." 399 400 self.waiting = {} 401 self.depends = {} 402 403 for module in self.modules.values(): 404 405 # Resolve all deferred references in each module. 406 407 for ref in module.deferred: 408 found = self.find_dependency(ref) 409 if not found: 410 self.missing.add((module.name, ref.get_origin())) 411 412 # Record the resolved names and identify required modules. 413 414 else: 415 # Find the providing module of this reference. 416 # Where definitive details of the origin cannot be found, 417 # identify the provider using the deferred reference. 418 # NOTE: This may need to test for static origins. 419 420 provider = self.get_module_provider(found.unresolved() and ref or found) 421 ref.mutate(found) 422 423 # Record any external dependency. 424 425 if provider and provider != module.name: 426 427 # Record the provider dependency. 428 429 module.required.add(provider) 430 self.accessing_modules[provider].add(module.name) 431 432 # Postpone any inclusion of the provider until this 433 # module becomes required. 434 435 if module.name not in self.required: 436 init_item(self.waiting, module.name, set) 437 self.waiting[module.name].add(provider) 438 439 # Make this module required in the accessing module. 440 441 elif provider not in self.required: 442 self.required.add(provider) 443 if self.verbose: 444 print >>sys.stderr, "Requiring", provider, "for", ref 445 446 # Record a module ordering dependency. 447 448 if not found.static() or self.uses_dynamic_callable(found): 449 init_item(self.depends, module.name, set) 450 self.depends[module.name].add(provider) 451 452 # Check modules again to see if they are now required and should now 453 # cause the inclusion of other modules providing objects to the program. 454 455 for module_name in self.waiting.keys(): 456 self.require_providers(module_name) 457 458 def require_providers(self, module_name): 459 460 """ 461 Test if 'module_name' is itself required and, if so, require modules 462 containing objects provided to the module. 463 """ 464 465 if module_name in self.required and self.waiting.has_key(module_name): 466 for provider in self.waiting[module_name]: 467 if provider not in self.required: 468 self.required.add(provider) 469 if self.verbose: 470 print >>sys.stderr, "Requiring", provider 471 self.require_providers(provider) 472 473 def uses_dynamic_callable(self, ref): 474 475 """ 476 Return whether 'ref' refers to a callable employing defaults that may 477 need initialising before the callable can be used. 478 """ 479 480 # Find the function or method associated with the reference. 481 482 if ref.has_kind("<function>"): 483 origin = ref.get_origin() 484 elif ref.has_kind("<class>"): 485 origin = "%s.__init__" % ref.get_origin() 486 else: 487 return False 488 489 # Find any defaults for the function or method. 490 491 defaults = self.function_defaults.get(origin) 492 if not defaults: 493 return False 494 495 # Identify non-constant defaults. 496 497 for name, ref in defaults: 498 if not ref.is_constant_alias(): 499 return True 500 501 return False 502 503 def order_modules(self): 504 505 "Produce a module initialisation ordering." 506 507 self.check_ordering() 508 509 module_names = self.modules.keys() 510 511 # Record the number of modules using or depending on each module. 512 513 usage = {} 514 515 for module_name in module_names: 516 usage[module_name] = 0 517 518 for module_name, depend_names in self.depends.items(): 519 if module_name in module_names: 520 for depend_name in depend_names: 521 if depend_name in module_names: 522 usage[depend_name] += 1 523 524 # Produce an ordering by obtaining exposed modules (required by modules 525 # already processed) and putting them at the start of the list. 526 527 ordered = [] 528 529 while usage: 530 for module_name, n in usage.items(): 531 if n == 0: 532 ordered.insert(0, module_name) 533 module_names = self.depends.get(module_name) 534 535 # Reduce usage of the referenced modules. 536 537 if module_names: 538 for name in module_names: 539 usage[name] -= 1 540 541 del usage[module_name] 542 543 ordered.remove("__main__") 544 ordered.append("__main__") 545 return ordered 546 547 def check_ordering(self): 548 549 "Check the ordering dependencies." 550 551 for module_name, modules in self.depends.items(): 552 for provider in modules: 553 if self.depends.has_key(provider) and module_name in self.depends[provider]: 554 raise ProgramError, "Modules %s and %s may not depend on each other for non-static objects." % (module_name, provider) 555 556 def find_dependency(self, ref): 557 558 "Find the ultimate dependency for 'ref'." 559 560 found = set() 561 while ref and ref.has_kind("<depends>") and not ref in found: 562 found.add(ref) 563 ref = self.identify(ref.get_origin()) 564 return ref 565 566 def get_module_provider(self, ref): 567 568 "Identify the provider of the given 'ref'." 569 570 for ancestor in ref.ancestors(): 571 if self.modules.has_key(ancestor): 572 return ancestor 573 return None 574 575 def finalise_classes(self): 576 577 "Finalise the class relationships and attributes." 578 579 self.derive_inherited_attrs() 580 self.derive_subclasses() 581 self.derive_shadowed_attrs() 582 583 def derive_inherited_attrs(self): 584 585 "Derive inherited attributes for classes throughout the program." 586 587 for name in self.classes.keys(): 588 self.propagate_attrs_for_class(name) 589 590 def propagate_attrs_for_class(self, name, visited=None): 591 592 "Propagate inherited attributes for class 'name'." 593 594 # Visit classes only once. 595 596 if self.all_combined_attrs.has_key(name): 597 return 598 599 visited = visited or [] 600 601 if name in visited: 602 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 603 604 visited.append(name) 605 606 class_attrs = {} 607 instance_attrs = {} 608 609 # Aggregate the attributes from base classes, recording the origins of 610 # applicable attributes. 611 612 for base in self.classes[name][::-1]: 613 614 # Get the identity of the class from the reference. 615 616 base = base.get_origin() 617 618 # Define the base class completely before continuing with this 619 # class. 620 621 self.propagate_attrs_for_class(base, visited) 622 class_attrs.update(self.all_class_attrs[base]) 623 624 # Instance attribute origins are combined if different. 625 626 for key, values in self.all_instance_attrs[base].items(): 627 init_item(instance_attrs, key, set) 628 instance_attrs[key].update(values) 629 630 # Class attributes override those defined earlier in the hierarchy. 631 632 class_attrs.update(self.all_class_attrs.get(name, {})) 633 634 # Instance attributes are merely added if not already defined. 635 636 for key in self.all_instance_attrs.get(name, []): 637 if not instance_attrs.has_key(key): 638 instance_attrs[key] = set(["%s.%s" % (name, key)]) 639 640 self.all_class_attrs[name] = class_attrs 641 self.all_instance_attrs[name] = instance_attrs 642 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 643 644 def derive_subclasses(self): 645 646 "Derive subclass details for classes." 647 648 for name, bases in self.classes.items(): 649 for base in bases: 650 651 # Get the identity of the class from the reference. 652 653 base = base.get_origin() 654 self.subclasses[base].add(name) 655 656 def derive_shadowed_attrs(self): 657 658 "Derive shadowed attributes for classes." 659 660 for name, attrs in self.all_instance_attrs.items(): 661 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 662 if attrs: 663 self.all_shadowed_attrs[name] = attrs 664 665 def set_class_types(self): 666 667 "Set the type of each class." 668 669 for attrs in self.all_class_attrs.values(): 670 attrs["__class__"] = self.type_ref.get_origin() 671 672 def define_instantiators(self): 673 674 """ 675 Consolidate parameter and default details, incorporating initialiser 676 details to define instantiator signatures. 677 """ 678 679 for cls, attrs in self.all_class_attrs.items(): 680 initialiser = attrs["__init__"] 681 self.function_parameters[cls] = self.function_parameters[initialiser] 682 self.function_defaults[cls] = self.function_defaults[initialiser] 683 684 def collect_constants(self): 685 686 "Get constants from all active modules." 687 688 for module in self.modules.values(): 689 self.all_constants.update(module.constants) 690 691 # Import methods. 692 693 def find_in_path(self, name): 694 695 """ 696 Find the given module 'name' in the search path, returning None where no 697 such module could be found, or a 2-tuple from the 'find' method 698 otherwise. 699 """ 700 701 for d in self.path: 702 m = self.find(d, name) 703 if m: return m 704 return None 705 706 def find(self, d, name): 707 708 """ 709 In the directory 'd', find the given module 'name', where 'name' can 710 either refer to a single file module or to a package. Return None if the 711 'name' cannot be associated with either a file or a package directory, 712 or a 2-tuple from '_find_package' or '_find_module' otherwise. 713 """ 714 715 m = self._find_package(d, name) 716 if m: return m 717 m = self._find_module(d, name) 718 if m: return m 719 return None 720 721 def _find_module(self, d, name): 722 723 """ 724 In the directory 'd', find the given module 'name', returning None where 725 no suitable file exists in the directory, or a 2-tuple consisting of 726 None (indicating that no package directory is involved) and a filename 727 indicating the location of the module. 728 """ 729 730 name_py = name + extsep + "py" 731 filename = self._find_file(d, name_py) 732 if filename: 733 return None, filename 734 return None 735 736 def _find_package(self, d, name): 737 738 """ 739 In the directory 'd', find the given package 'name', returning None 740 where no suitable package directory exists, or a 2-tuple consisting of 741 a directory (indicating the location of the package directory itself) 742 and a filename indicating the location of the __init__.py module which 743 declares the package's top-level contents. 744 """ 745 746 filename = self._find_file(d, name) 747 if filename: 748 init_py = "__init__" + extsep + "py" 749 init_py_filename = self._find_file(filename, init_py) 750 if init_py_filename: 751 return filename, init_py_filename 752 return None 753 754 def _find_file(self, d, filename): 755 756 """ 757 Return the filename obtained when searching the directory 'd' for the 758 given 'filename', or None if no actual file exists for the filename. 759 """ 760 761 filename = join(d, filename) 762 if exists(filename): 763 return filename 764 else: 765 return None 766 767 def load(self, name): 768 769 """ 770 Load the module or package with the given 'name'. Return an object 771 referencing the loaded module or package, or None if no such module or 772 package exists. 773 """ 774 775 # Loaded modules are returned immediately. 776 # Modules may be known but not yet loading (having been registered as 777 # submodules), loading, loaded, or completely unknown. 778 779 module = self.get_module(name) 780 781 if module: 782 return self.modules[name] 783 784 # Otherwise, modules are loaded. 785 786 # Split the name into path components, and try to find the uppermost in 787 # the search path. 788 789 path = name.split(".") 790 path_so_far = [] 791 module = None 792 793 for p in path: 794 795 # Get the module's filesystem details. 796 797 if not path_so_far: 798 m = self.find_in_path(p) 799 elif d: 800 m = self.find(d, p) 801 else: 802 m = None 803 804 path_so_far.append(p) 805 module_name = ".".join(path_so_far) 806 807 # Return None if the module could not be located. 808 809 if not m: 810 if self.verbose: 811 print >>sys.stderr, "Not found (%s)" % name 812 return None 813 814 # Get the directory and module filename. 815 816 d, filename = m 817 818 # Get the module itself. 819 820 return self.load_from_file(filename, module_name) 821 822 def load_from_file(self, filename, module_name=None): 823 824 "Load the module from the given 'filename'." 825 826 if module_name is None: 827 module_name = "__main__" 828 829 module = self.modules.get(module_name) 830 831 if not module: 832 833 # Try to load from cache. 834 835 module = self.load_from_cache(filename, module_name) 836 if module: 837 return module 838 839 # If no cache entry exists, load from file. 840 841 module = inspector.InspectedModule(module_name, self) 842 self.add_module(module_name, module) 843 self.update_cache_validity(module) 844 845 self._load(module, module_name, lambda m: m.parse, filename) 846 847 return module 848 849 def update_cache_validity(self, module): 850 851 "Make 'module' valid in the cache, but invalidate accessing modules." 852 853 accessing = self.accessing_modules.get(module.name) 854 if accessing: 855 self.invalidated.update(accessing) 856 if module.name in self.invalidated: 857 self.invalidated.remove(module.name) 858 859 def source_is_new(self, filename, module_name): 860 861 "Return whether 'filename' is newer than the cached 'module_name'." 862 863 if self.cache: 864 cache_filename = join(self.cache, module_name) 865 return not exists(cache_filename) or \ 866 getmtime(filename) > getmtime(cache_filename) or \ 867 module_name in self.invalidated 868 else: 869 return True 870 871 def load_from_cache(self, filename, module_name): 872 873 "Return a module residing in the cache." 874 875 module = self.modules.get(module_name) 876 877 if not module and not self.source_is_new(filename, module_name): 878 module = CachedModule(module_name, self) 879 self.add_module(module_name, module) 880 881 filename = join(self.cache, module_name) 882 self._load(module, module_name, lambda m: m.from_cache, filename) 883 884 return module 885 886 def _load(self, module, module_name, fn, filename): 887 888 """ 889 Load 'module' for the given 'module_name', and with 'fn' performing an 890 invocation on the module with the given 'filename'. 891 """ 892 893 # Load the module. 894 895 if self.verbose: 896 print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename 897 fn(module)(filename) 898 899 # Add the module object if not already defined. 900 901 if not self.objects.has_key(module_name): 902 self.objects[module_name] = Reference("<module>", module_name) 903 904 def add_module(self, module_name, module): 905 906 """ 907 Return the module with the given 'module_name', adding a new module 908 object if one does not already exist. 909 """ 910 911 self.modules[module_name] = module 912 if module_name in self.to_import: 913 self.to_import.remove(module_name) 914 915 # vim: tabstop=4 expandtab shiftwidth=4