1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_targets = {} 73 self.function_arguments = {} 74 75 # Unresolved names. 76 77 self.missing = set() 78 79 # Derived information. 80 81 self.subclasses = {} 82 83 # Attributes of different object types. 84 85 self.all_class_attrs = {} 86 self.all_instance_attrs = {} 87 self.all_instance_attr_constants = {} 88 self.all_combined_attrs = {} 89 self.all_module_attrs = {} 90 self.all_shadowed_attrs = {} 91 92 # References to external names and aliases within program units. 93 94 self.all_name_references = {} 95 self.all_initialised_names = {} 96 self.all_aliased_names = {} 97 98 # General attribute accesses. 99 100 self.all_attr_accesses = {} 101 self.all_const_accesses = {} 102 self.all_attr_access_modifiers = {} 103 104 # Constant literals and values. 105 106 self.all_constants = {} 107 self.all_constant_values = {} 108 109 self.make_cache() 110 111 def make_cache(self): 112 if self.cache and not exists(self.cache): 113 makedirs(self.cache) 114 115 def check_cache(self, details): 116 117 """ 118 Check whether the cache applies for the given 'details', invalidating it 119 if it does not. 120 """ 121 122 recorded_details = self.get_cache_details() 123 124 if recorded_details != details: 125 self.remove_cache() 126 127 writefile(self.get_cache_details_filename(), details) 128 129 def get_cache_details_filename(self): 130 131 "Return the filename for the cache details." 132 133 return join(self.cache, "$details") 134 135 def get_cache_details(self): 136 137 "Return details of the cache." 138 139 details_filename = self.get_cache_details_filename() 140 141 if not exists(details_filename): 142 return None 143 else: 144 return readfile(details_filename) 145 146 def remove_cache(self): 147 148 "Remove the contents of the cache." 149 150 for filename in listdir(self.cache): 151 remove(join(self.cache, filename)) 152 153 def to_cache(self): 154 155 "Write modules to the cache." 156 157 if self.cache: 158 for module_name, module in self.modules.items(): 159 module.to_cache(join(self.cache, module_name)) 160 161 # Object retrieval and storage. 162 163 def get_object(self, name): 164 165 """ 166 Return a reference for the given 'name' or None if no such object 167 exists. 168 """ 169 170 return self.objects.get(name) 171 172 def set_object(self, name, value=None): 173 174 "Set the object with the given 'name' and the given 'value'." 175 176 if isinstance(value, Reference): 177 ref = value.alias(name) 178 else: 179 ref = Reference(value, name) 180 181 self.objects[name] = ref 182 183 # Identification of both stored object names and name references. 184 185 def identify(self, name): 186 187 "Identify 'name' using stored object and external name records." 188 189 return self.objects.get(name) or self.all_name_references.get(name) 190 191 # Indirect object retrieval. 192 193 def get_attributes(self, ref, attrname): 194 195 """ 196 Return attributes provided by 'ref' for 'attrname'. Class attributes 197 may be provided by instances. 198 """ 199 200 kind = ref.get_kind() 201 if kind == "<class>": 202 ref = self.get_class_attribute(ref.get_origin(), attrname) 203 return ref and set([ref]) or set() 204 elif kind == "<instance>": 205 return self.get_combined_attributes(ref.get_origin(), attrname) 206 elif kind == "<module>": 207 ref = self.get_module_attribute(ref.get_origin(), attrname) 208 return ref and set([ref]) or set() 209 else: 210 return set() 211 212 def get_class_attribute(self, object_type, attrname): 213 214 "Return from 'object_type' the details of class attribute 'attrname'." 215 216 attr = self.all_class_attrs[object_type].get(attrname) 217 return attr and self.get_object(attr) 218 219 def get_instance_attributes(self, object_type, attrname): 220 221 """ 222 Return from 'object_type' the details of instance attribute 'attrname'. 223 """ 224 225 consts = self.all_instance_attr_constants.get(object_type) 226 attrs = set() 227 for attr in self.all_instance_attrs[object_type].get(attrname, []): 228 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 229 return attrs 230 231 def get_combined_attributes(self, object_type, attrname): 232 233 """ 234 Return from 'object_type' the details of class or instance attribute 235 'attrname'. 236 """ 237 238 ref = self.get_class_attribute(object_type, attrname) 239 refs = ref and set([ref]) or set() 240 refs.update(self.get_instance_attributes(object_type, attrname)) 241 return refs 242 243 def get_module_attribute(self, object_type, attrname): 244 245 "Return from 'object_type' the details of module attribute 'attrname'." 246 247 if attrname in self.all_module_attrs[object_type]: 248 return self.get_object("%s.%s" % (object_type, attrname)) 249 else: 250 return None 251 252 # Convenience methods for deducing which kind of object provided an 253 # attribute. 254 255 def get_attribute_provider(self, ref, attrname): 256 257 """ 258 Return the kind of provider of the attribute accessed via 'ref' using 259 'attrname'. 260 """ 261 262 kind = ref.get_kind() 263 264 if kind in ["<class>", "<module>"]: 265 return kind 266 else: 267 return self.get_instance_attribute_provider(ref.get_origin(), attrname) 268 269 def get_instance_attribute_provider(self, object_type, attrname): 270 271 """ 272 Return the kind of provider of the attribute accessed via an instance of 273 'object_type' using 'attrname'. 274 """ 275 276 if self.get_class_attribute(object_type, attrname): 277 return "<class>" 278 else: 279 return "<instance>" 280 281 # Module management. 282 283 def queue_module(self, name, accessor, required=False): 284 285 """ 286 Queue the module with the given 'name' for import from the given 287 'accessor' module. If 'required' is true (it is false by default), the 288 module will be required in the final program. 289 """ 290 291 if not self.modules.has_key(name): 292 self.to_import.add(name) 293 294 if required: 295 self.required.add(name) 296 297 init_item(self.accessing_modules, name, set) 298 self.accessing_modules[name].add(accessor.name) 299 300 def get_modules(self): 301 302 "Return all modules known to the importer." 303 304 return self.modules.values() 305 306 def get_module(self, name): 307 308 "Return the module with the given 'name'." 309 310 if not self.modules.has_key(name): 311 return None 312 313 return self.modules[name] 314 315 # Program operations. 316 317 def initialise(self, filename, reset=False): 318 319 """ 320 Initialise a program whose main module is 'filename', resetting the 321 cache if 'reset' is true. Return the main module. 322 """ 323 324 if reset: 325 self.remove_cache() 326 self.check_cache(filename) 327 328 # Load the program itself. 329 330 m = self.load_from_file(filename) 331 332 # Load any queued modules. 333 334 while self.to_import: 335 for name in list(self.to_import): # avoid mutation issue 336 self.load(name) 337 338 # Resolve dependencies between modules. 339 340 self.resolve() 341 342 # Record the type of all classes. 343 344 self.type_ref = self.get_object("__builtins__.type") 345 346 # Resolve dependencies within the program. 347 348 for module in self.modules.values(): 349 module.complete() 350 351 # Remove unneeded modules. 352 353 all_modules = self.modules.items() 354 355 for name, module in all_modules: 356 if name not in self.required: 357 module.unpropagate() 358 del self.modules[name] 359 self.removed[name] = module 360 361 # Collect redundant objects. 362 363 for module in self.removed.values(): 364 module.collect() 365 366 # Assert module objects where aliases have been removed. 367 368 for name in self.required: 369 if not self.objects.has_key(name): 370 self.objects[name] = Reference("<module>", name) 371 372 return m 373 374 def finalise(self): 375 376 """ 377 Finalise the inspected program, returning whether the program could be 378 finalised. 379 """ 380 381 if self.missing: 382 return False 383 384 self.finalise_classes() 385 self.to_cache() 386 self.set_class_types() 387 self.define_instantiators() 388 self.collect_constants() 389 390 return True 391 392 # Supporting operations. 393 394 def resolve(self): 395 396 "Resolve dependencies between modules." 397 398 self.waiting = {} 399 400 for module in self.modules.values(): 401 402 # Resolve all deferred references in each module. 403 404 for ref in module.deferred: 405 found = self.find_dependency(ref) 406 if not found: 407 self.missing.add((module.name, ref.get_origin())) 408 409 # Record the resolved names and identify required modules. 410 411 else: 412 ref.mutate(found) 413 414 # Find the providing module of this reference. 415 416 provider = self.get_module_provider(ref) 417 if provider: 418 419 module.required.add(provider) 420 self.accessing_modules[provider].add(module.name) 421 422 # Postpone any inclusion of the provider until this 423 # module becomes required. 424 425 if module.name not in self.required: 426 init_item(self.waiting, module.name, set) 427 self.waiting[module.name].add(provider) 428 429 # Make this module required in the accessing module. 430 431 elif provider not in self.required: 432 self.required.add(provider) 433 if self.verbose: 434 print >>sys.stderr, "Requiring", provider, "for", ref 435 436 # Check modules again to see if they are now required and should now 437 # cause the inclusion of other modules providing objects to the program. 438 439 for module_name in self.waiting.keys(): 440 self.require_providers(module_name) 441 442 def require_providers(self, module_name): 443 444 """ 445 Test if 'module_name' is itself required and, if so, require modules 446 containing objects provided to the module. 447 """ 448 449 if module_name in self.required and self.waiting.has_key(module_name): 450 for provider in self.waiting[module_name]: 451 if provider not in self.required: 452 self.required.add(provider) 453 if self.verbose: 454 print >>sys.stderr, "Requiring", provider 455 self.require_providers(provider) 456 457 def find_dependency(self, ref): 458 459 "Find the ultimate dependency for 'ref'." 460 461 found = set() 462 while ref and ref.has_kind("<depends>") and not ref in found: 463 found.add(ref) 464 ref = self.identify(ref.get_origin()) 465 return ref 466 467 def get_module_provider(self, ref): 468 469 "Identify the provider of the given 'ref'." 470 471 for ancestor in ref.ancestors(): 472 if self.modules.has_key(ancestor): 473 return ancestor 474 return None 475 476 def finalise_classes(self): 477 478 "Finalise the class relationships and attributes." 479 480 self.derive_inherited_attrs() 481 self.derive_subclasses() 482 self.derive_shadowed_attrs() 483 484 def derive_inherited_attrs(self): 485 486 "Derive inherited attributes for classes throughout the program." 487 488 for name in self.classes.keys(): 489 self.propagate_attrs_for_class(name) 490 491 def propagate_attrs_for_class(self, name, visited=None): 492 493 "Propagate inherited attributes for class 'name'." 494 495 # Visit classes only once. 496 497 if self.all_combined_attrs.has_key(name): 498 return 499 500 visited = visited or [] 501 502 if name in visited: 503 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 504 505 visited.append(name) 506 507 class_attrs = {} 508 instance_attrs = {} 509 510 # Aggregate the attributes from base classes, recording the origins of 511 # applicable attributes. 512 513 for base in self.classes[name][::-1]: 514 515 # Get the identity of the class from the reference. 516 517 base = base.get_origin() 518 519 # Define the base class completely before continuing with this 520 # class. 521 522 self.propagate_attrs_for_class(base, visited) 523 class_attrs.update(self.all_class_attrs[base]) 524 525 # Instance attribute origins are combined if different. 526 527 for key, values in self.all_instance_attrs[base].items(): 528 init_item(instance_attrs, key, set) 529 instance_attrs[key].update(values) 530 531 # Class attributes override those defined earlier in the hierarchy. 532 533 class_attrs.update(self.all_class_attrs.get(name, {})) 534 535 # Instance attributes are merely added if not already defined. 536 537 for key in self.all_instance_attrs.get(name, []): 538 if not instance_attrs.has_key(key): 539 instance_attrs[key] = set(["%s.%s" % (name, key)]) 540 541 self.all_class_attrs[name] = class_attrs 542 self.all_instance_attrs[name] = instance_attrs 543 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 544 545 def derive_subclasses(self): 546 547 "Derive subclass details for classes." 548 549 for name, bases in self.classes.items(): 550 for base in bases: 551 552 # Get the identity of the class from the reference. 553 554 base = base.get_origin() 555 self.subclasses[base].add(name) 556 557 def derive_shadowed_attrs(self): 558 559 "Derive shadowed attributes for classes." 560 561 for name, attrs in self.all_instance_attrs.items(): 562 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 563 if attrs: 564 self.all_shadowed_attrs[name] = attrs 565 566 def set_class_types(self): 567 568 "Set the type of each class." 569 570 for attrs in self.all_class_attrs.values(): 571 attrs["__class__"] = self.type_ref.get_origin() 572 573 def define_instantiators(self): 574 575 """ 576 Consolidate parameter and default details, incorporating initialiser 577 details to define instantiator signatures. 578 """ 579 580 for cls, attrs in self.all_class_attrs.items(): 581 initialiser = attrs["__init__"] 582 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 583 self.function_defaults[cls] = self.function_defaults[initialiser] 584 585 def collect_constants(self): 586 587 "Get constants from all active modules." 588 589 for module in self.modules.values(): 590 self.all_constants.update(module.constants) 591 592 # Import methods. 593 594 def find_in_path(self, name): 595 596 """ 597 Find the given module 'name' in the search path, returning None where no 598 such module could be found, or a 2-tuple from the 'find' method 599 otherwise. 600 """ 601 602 for d in self.path: 603 m = self.find(d, name) 604 if m: return m 605 return None 606 607 def find(self, d, name): 608 609 """ 610 In the directory 'd', find the given module 'name', where 'name' can 611 either refer to a single file module or to a package. Return None if the 612 'name' cannot be associated with either a file or a package directory, 613 or a 2-tuple from '_find_package' or '_find_module' otherwise. 614 """ 615 616 m = self._find_package(d, name) 617 if m: return m 618 m = self._find_module(d, name) 619 if m: return m 620 return None 621 622 def _find_module(self, d, name): 623 624 """ 625 In the directory 'd', find the given module 'name', returning None where 626 no suitable file exists in the directory, or a 2-tuple consisting of 627 None (indicating that no package directory is involved) and a filename 628 indicating the location of the module. 629 """ 630 631 name_py = name + extsep + "py" 632 filename = self._find_file(d, name_py) 633 if filename: 634 return None, filename 635 return None 636 637 def _find_package(self, d, name): 638 639 """ 640 In the directory 'd', find the given package 'name', returning None 641 where no suitable package directory exists, or a 2-tuple consisting of 642 a directory (indicating the location of the package directory itself) 643 and a filename indicating the location of the __init__.py module which 644 declares the package's top-level contents. 645 """ 646 647 filename = self._find_file(d, name) 648 if filename: 649 init_py = "__init__" + extsep + "py" 650 init_py_filename = self._find_file(filename, init_py) 651 if init_py_filename: 652 return filename, init_py_filename 653 return None 654 655 def _find_file(self, d, filename): 656 657 """ 658 Return the filename obtained when searching the directory 'd' for the 659 given 'filename', or None if no actual file exists for the filename. 660 """ 661 662 filename = join(d, filename) 663 if exists(filename): 664 return filename 665 else: 666 return None 667 668 def load(self, name): 669 670 """ 671 Load the module or package with the given 'name'. Return an object 672 referencing the loaded module or package, or None if no such module or 673 package exists. 674 """ 675 676 # Loaded modules are returned immediately. 677 # Modules may be known but not yet loading (having been registered as 678 # submodules), loading, loaded, or completely unknown. 679 680 module = self.get_module(name) 681 682 if module: 683 return self.modules[name] 684 685 # Otherwise, modules are loaded. 686 687 # Split the name into path components, and try to find the uppermost in 688 # the search path. 689 690 path = name.split(".") 691 path_so_far = [] 692 module = None 693 694 for p in path: 695 696 # Get the module's filesystem details. 697 698 if not path_so_far: 699 m = self.find_in_path(p) 700 elif d: 701 m = self.find(d, p) 702 else: 703 m = None 704 705 path_so_far.append(p) 706 module_name = ".".join(path_so_far) 707 708 if not m: 709 if self.verbose: 710 print >>sys.stderr, "Not found (%s)" % name 711 712 return None # NOTE: Import error. 713 714 # Get the module itself. 715 716 d, filename = m 717 module = self.load_from_file(filename, module_name) 718 719 return module 720 721 def load_from_file(self, filename, module_name=None): 722 723 "Load the module from the given 'filename'." 724 725 if module_name is None: 726 module_name = "__main__" 727 728 module = self.modules.get(module_name) 729 730 if not module: 731 732 # Try to load from cache. 733 734 module = self.load_from_cache(filename, module_name) 735 if module: 736 return module 737 738 # If no cache entry exists, load from file. 739 740 module = inspector.InspectedModule(module_name, self) 741 self.add_module(module_name, module) 742 self.update_cache_validity(module) 743 744 self._load(module, module_name, lambda m: m.parse, filename) 745 746 return module 747 748 def update_cache_validity(self, module): 749 750 "Make 'module' valid in the cache, but invalidate accessing modules." 751 752 accessing = self.accessing_modules.get(module.name) 753 if accessing: 754 self.invalidated.update(accessing) 755 if module.name in self.invalidated: 756 self.invalidated.remove(module.name) 757 758 def source_is_new(self, filename, module_name): 759 760 "Return whether 'filename' is newer than the cached 'module_name'." 761 762 if self.cache: 763 cache_filename = join(self.cache, module_name) 764 return not exists(cache_filename) or \ 765 getmtime(filename) > getmtime(cache_filename) or \ 766 module_name in self.invalidated 767 else: 768 return True 769 770 def load_from_cache(self, filename, module_name): 771 772 "Return a module residing in the cache." 773 774 module = self.modules.get(module_name) 775 776 if not module and not self.source_is_new(filename, module_name): 777 module = CachedModule(module_name, self) 778 self.add_module(module_name, module) 779 780 filename = join(self.cache, module_name) 781 self._load(module, module_name, lambda m: m.from_cache, filename) 782 783 return module 784 785 def _load(self, module, module_name, fn, filename): 786 787 """ 788 Load 'module' for the given 'module_name', and with 'fn' performing an 789 invocation on the module with the given 'filename'. 790 """ 791 792 # Load the module. 793 794 if self.verbose: 795 print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename 796 fn(module)(filename) 797 798 # Add the module object if not already defined. 799 800 if not self.objects.has_key(module_name): 801 self.objects[module_name] = Reference("<module>", module_name) 802 803 def add_module(self, module_name, module): 804 805 """ 806 Return the module with the given 'module_name', adding a new module 807 object if one does not already exist. 808 """ 809 810 self.modules[module_name] = module 811 if module_name in self.to_import: 812 self.to_import.remove(module_name) 813 814 # vim: tabstop=4 expandtab shiftwidth=4