1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_targets = {} 73 self.function_arguments = {} 74 75 # Unresolved names. 76 77 self.missing = set() 78 79 # Derived information. 80 81 self.subclasses = {} 82 83 # Attributes of different object types. 84 85 self.all_class_attrs = {} 86 self.all_instance_attrs = {} 87 self.all_instance_attr_constants = {} 88 self.all_combined_attrs = {} 89 self.all_module_attrs = {} 90 self.all_shadowed_attrs = {} 91 92 # References to external names and aliases within program units. 93 94 self.all_name_references = {} 95 self.all_initialised_names = {} 96 self.all_aliased_names = {} 97 98 # General attribute accesses. 99 100 self.all_attr_accesses = {} 101 self.all_const_accesses = {} 102 self.all_attr_access_modifiers = {} 103 104 # Constant literals and values. 105 106 self.all_constants = {} 107 self.all_constant_values = {} 108 109 self.make_cache() 110 111 def make_cache(self): 112 if self.cache and not exists(self.cache): 113 makedirs(self.cache) 114 115 def check_cache(self, details): 116 117 """ 118 Check whether the cache applies for the given 'details', invalidating it 119 if it does not. 120 """ 121 122 recorded_details = self.get_cache_details() 123 124 if recorded_details != details: 125 self.remove_cache() 126 127 writefile(self.get_cache_details_filename(), details) 128 129 def get_cache_details_filename(self): 130 131 "Return the filename for the cache details." 132 133 return join(self.cache, "$details") 134 135 def get_cache_details(self): 136 137 "Return details of the cache." 138 139 details_filename = self.get_cache_details_filename() 140 141 if not exists(details_filename): 142 return None 143 else: 144 return readfile(details_filename) 145 146 def remove_cache(self): 147 148 "Remove the contents of the cache." 149 150 for filename in listdir(self.cache): 151 remove(join(self.cache, filename)) 152 153 def to_cache(self): 154 155 "Write modules to the cache." 156 157 if self.cache: 158 for module_name, module in self.modules.items(): 159 module.to_cache(join(self.cache, module_name)) 160 161 # Object retrieval and storage. 162 163 def get_object(self, name): 164 165 """ 166 Return a reference for the given 'name' or None if no such object 167 exists. 168 """ 169 170 return self.objects.get(name) 171 172 def set_object(self, name, value=None): 173 174 "Set the object with the given 'name' and the given 'value'." 175 176 if isinstance(value, Reference): 177 ref = value.alias(name) 178 else: 179 ref = Reference(value, name) 180 181 self.objects[name] = ref 182 183 # Identification of both stored object names and name references. 184 185 def identify(self, name): 186 187 "Identify 'name' using stored object and external name records." 188 189 return self.objects.get(name) or self.all_name_references.get(name) 190 191 # Indirect object retrieval. 192 193 def get_attributes(self, ref, attrname): 194 195 """ 196 Return attributes provided by 'ref' for 'attrname'. Class attributes 197 may be provided by instances. 198 """ 199 200 kind = ref.get_kind() 201 if kind == "<class>": 202 ref = self.get_class_attribute(ref.get_origin(), attrname) 203 return ref and set([ref]) or set() 204 elif kind == "<instance>": 205 return self.get_combined_attributes(ref.get_origin(), attrname) 206 elif kind == "<module>": 207 ref = self.get_module_attribute(ref.get_origin(), attrname) 208 return ref and set([ref]) or set() 209 else: 210 return set() 211 212 def get_class_attribute(self, object_type, attrname): 213 214 "Return from 'object_type' the details of class attribute 'attrname'." 215 216 attr = self.all_class_attrs[object_type].get(attrname) 217 return attr and self.get_object(attr) 218 219 def get_instance_attributes(self, object_type, attrname): 220 221 """ 222 Return from 'object_type' the details of instance attribute 'attrname'. 223 """ 224 225 consts = self.all_instance_attr_constants.get(object_type) 226 attrs = set() 227 for attr in self.all_instance_attrs[object_type].get(attrname, []): 228 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 229 return attrs 230 231 def get_combined_attributes(self, object_type, attrname): 232 233 """ 234 Return from 'object_type' the details of class or instance attribute 235 'attrname'. 236 """ 237 238 ref = self.get_class_attribute(object_type, attrname) 239 refs = ref and set([ref]) or set() 240 refs.update(self.get_instance_attributes(object_type, attrname)) 241 return refs 242 243 def get_module_attribute(self, object_type, attrname): 244 245 "Return from 'object_type' the details of module attribute 'attrname'." 246 247 if attrname in self.all_module_attrs[object_type]: 248 return self.get_object("%s.%s" % (object_type, attrname)) 249 else: 250 return None 251 252 # Module management. 253 254 def queue_module(self, name, accessor, required=False): 255 256 """ 257 Queue the module with the given 'name' for import from the given 258 'accessor' module. If 'required' is true (it is false by default), the 259 module will be required in the final program. 260 """ 261 262 if not self.modules.has_key(name): 263 self.to_import.add(name) 264 265 if required: 266 self.required.add(name) 267 268 init_item(self.accessing_modules, name, set) 269 self.accessing_modules[name].add(accessor.name) 270 271 def get_modules(self): 272 273 "Return all modules known to the importer." 274 275 return self.modules.values() 276 277 def get_module(self, name): 278 279 "Return the module with the given 'name'." 280 281 if not self.modules.has_key(name): 282 return None 283 284 return self.modules[name] 285 286 # Program operations. 287 288 def initialise(self, filename, reset=False): 289 290 """ 291 Initialise a program whose main module is 'filename', resetting the 292 cache if 'reset' is true. Return the main module. 293 """ 294 295 if reset: 296 self.remove_cache() 297 self.check_cache(filename) 298 299 # Load the program itself. 300 301 m = self.load_from_file(filename) 302 303 # Load any queued modules. 304 305 while self.to_import: 306 for name in list(self.to_import): # avoid mutation issue 307 self.load(name) 308 309 # Resolve dependencies between modules. 310 311 self.resolve() 312 313 # Record the type of all classes. 314 315 self.type_ref = self.get_object("__builtins__.type") 316 317 # Resolve dependencies within the program. 318 319 for module in self.modules.values(): 320 module.complete() 321 322 # Remove unneeded modules. 323 324 all_modules = self.modules.items() 325 326 for name, module in all_modules: 327 if name not in self.required: 328 module.unpropagate() 329 del self.modules[name] 330 self.removed[name] = module 331 332 # Collect redundant objects. 333 334 for module in self.removed.values(): 335 module.collect() 336 337 # Assert module objects where aliases have been removed. 338 339 for name in self.required: 340 if not self.objects.has_key(name): 341 self.objects[name] = Reference("<module>", name) 342 343 return m 344 345 def finalise(self): 346 347 """ 348 Finalise the inspected program, returning whether the program could be 349 finalised. 350 """ 351 352 if self.missing: 353 return False 354 355 self.finalise_classes() 356 self.to_cache() 357 self.set_class_types() 358 self.define_instantiators() 359 self.collect_constants() 360 361 return True 362 363 # Supporting operations. 364 365 def resolve(self): 366 367 "Resolve dependencies between modules." 368 369 self.waiting = {} 370 371 for module in self.modules.values(): 372 373 # Resolve all deferred references in each module. 374 375 for ref in module.deferred: 376 found = self.find_dependency(ref) 377 if not found: 378 self.missing.add((module.name, ref.get_origin())) 379 380 # Record the resolved names and identify required modules. 381 382 else: 383 ref.mutate(found) 384 385 # Find the providing module of this reference. 386 387 provider = self.get_module_provider(ref) 388 if provider: 389 390 module.required.add(provider) 391 self.accessing_modules[provider].add(module.name) 392 393 # Postpone any inclusion of the provider until this 394 # module becomes required. 395 396 if module.name not in self.required: 397 init_item(self.waiting, module.name, set) 398 self.waiting[module.name].add(provider) 399 400 # Make this module required in the accessing module. 401 402 elif provider not in self.required: 403 self.required.add(provider) 404 if self.verbose: 405 print >>sys.stderr, "Requiring", provider, "for", ref 406 407 # Check modules again to see if they are now required and should now 408 # cause the inclusion of other modules providing objects to the program. 409 410 for module_name in self.waiting.keys(): 411 self.require_providers(module_name) 412 413 def require_providers(self, module_name): 414 415 """ 416 Test if 'module_name' is itself required and, if so, require modules 417 containing objects provided to the module. 418 """ 419 420 if module_name in self.required and self.waiting.has_key(module_name): 421 for provider in self.waiting[module_name]: 422 if provider not in self.required: 423 self.required.add(provider) 424 if self.verbose: 425 print >>sys.stderr, "Requiring", provider 426 self.require_providers(provider) 427 428 def find_dependency(self, ref): 429 430 "Find the ultimate dependency for 'ref'." 431 432 found = set() 433 while ref and ref.has_kind("<depends>") and not ref in found: 434 found.add(ref) 435 ref = self.identify(ref.get_origin()) 436 return ref 437 438 def get_module_provider(self, ref): 439 440 "Identify the provider of the given 'ref'." 441 442 for ancestor in ref.ancestors(): 443 if self.modules.has_key(ancestor): 444 return ancestor 445 return None 446 447 def finalise_classes(self): 448 449 "Finalise the class relationships and attributes." 450 451 self.derive_inherited_attrs() 452 self.derive_subclasses() 453 self.derive_shadowed_attrs() 454 455 def derive_inherited_attrs(self): 456 457 "Derive inherited attributes for classes throughout the program." 458 459 for name in self.classes.keys(): 460 self.propagate_attrs_for_class(name) 461 462 def propagate_attrs_for_class(self, name, visited=None): 463 464 "Propagate inherited attributes for class 'name'." 465 466 # Visit classes only once. 467 468 if self.all_combined_attrs.has_key(name): 469 return 470 471 visited = visited or [] 472 473 if name in visited: 474 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 475 476 visited.append(name) 477 478 class_attrs = {} 479 instance_attrs = {} 480 481 # Aggregate the attributes from base classes, recording the origins of 482 # applicable attributes. 483 484 for base in self.classes[name][::-1]: 485 486 # Get the identity of the class from the reference. 487 488 base = base.get_origin() 489 490 # Define the base class completely before continuing with this 491 # class. 492 493 self.propagate_attrs_for_class(base, visited) 494 class_attrs.update(self.all_class_attrs[base]) 495 496 # Instance attribute origins are combined if different. 497 498 for key, values in self.all_instance_attrs[base].items(): 499 init_item(instance_attrs, key, set) 500 instance_attrs[key].update(values) 501 502 # Class attributes override those defined earlier in the hierarchy. 503 504 class_attrs.update(self.all_class_attrs.get(name, {})) 505 506 # Instance attributes are merely added if not already defined. 507 508 for key in self.all_instance_attrs.get(name, []): 509 if not instance_attrs.has_key(key): 510 instance_attrs[key] = set(["%s.%s" % (name, key)]) 511 512 self.all_class_attrs[name] = class_attrs 513 self.all_instance_attrs[name] = instance_attrs 514 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 515 516 def derive_subclasses(self): 517 518 "Derive subclass details for classes." 519 520 for name, bases in self.classes.items(): 521 for base in bases: 522 523 # Get the identity of the class from the reference. 524 525 base = base.get_origin() 526 self.subclasses[base].add(name) 527 528 def derive_shadowed_attrs(self): 529 530 "Derive shadowed attributes for classes." 531 532 for name, attrs in self.all_instance_attrs.items(): 533 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 534 if attrs: 535 self.all_shadowed_attrs[name] = attrs 536 537 def set_class_types(self): 538 539 "Set the type of each class." 540 541 for attrs in self.all_class_attrs.values(): 542 attrs["__class__"] = self.type_ref.get_origin() 543 544 def define_instantiators(self): 545 546 """ 547 Consolidate parameter and default details, incorporating initialiser 548 details to define instantiator signatures. 549 """ 550 551 for cls, attrs in self.all_class_attrs.items(): 552 initialiser = attrs["__init__"] 553 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 554 self.function_defaults[cls] = self.function_defaults[initialiser] 555 556 def collect_constants(self): 557 558 "Get constants from all active modules." 559 560 for module in self.modules.values(): 561 self.all_constants.update(module.constants) 562 563 # Import methods. 564 565 def find_in_path(self, name): 566 567 """ 568 Find the given module 'name' in the search path, returning None where no 569 such module could be found, or a 2-tuple from the 'find' method 570 otherwise. 571 """ 572 573 for d in self.path: 574 m = self.find(d, name) 575 if m: return m 576 return None 577 578 def find(self, d, name): 579 580 """ 581 In the directory 'd', find the given module 'name', where 'name' can 582 either refer to a single file module or to a package. Return None if the 583 'name' cannot be associated with either a file or a package directory, 584 or a 2-tuple from '_find_package' or '_find_module' otherwise. 585 """ 586 587 m = self._find_package(d, name) 588 if m: return m 589 m = self._find_module(d, name) 590 if m: return m 591 return None 592 593 def _find_module(self, d, name): 594 595 """ 596 In the directory 'd', find the given module 'name', returning None where 597 no suitable file exists in the directory, or a 2-tuple consisting of 598 None (indicating that no package directory is involved) and a filename 599 indicating the location of the module. 600 """ 601 602 name_py = name + extsep + "py" 603 filename = self._find_file(d, name_py) 604 if filename: 605 return None, filename 606 return None 607 608 def _find_package(self, d, name): 609 610 """ 611 In the directory 'd', find the given package 'name', returning None 612 where no suitable package directory exists, or a 2-tuple consisting of 613 a directory (indicating the location of the package directory itself) 614 and a filename indicating the location of the __init__.py module which 615 declares the package's top-level contents. 616 """ 617 618 filename = self._find_file(d, name) 619 if filename: 620 init_py = "__init__" + extsep + "py" 621 init_py_filename = self._find_file(filename, init_py) 622 if init_py_filename: 623 return filename, init_py_filename 624 return None 625 626 def _find_file(self, d, filename): 627 628 """ 629 Return the filename obtained when searching the directory 'd' for the 630 given 'filename', or None if no actual file exists for the filename. 631 """ 632 633 filename = join(d, filename) 634 if exists(filename): 635 return filename 636 else: 637 return None 638 639 def load(self, name): 640 641 """ 642 Load the module or package with the given 'name'. Return an object 643 referencing the loaded module or package, or None if no such module or 644 package exists. 645 """ 646 647 # Loaded modules are returned immediately. 648 # Modules may be known but not yet loading (having been registered as 649 # submodules), loading, loaded, or completely unknown. 650 651 module = self.get_module(name) 652 653 if module: 654 return self.modules[name] 655 656 # Otherwise, modules are loaded. 657 658 # Split the name into path components, and try to find the uppermost in 659 # the search path. 660 661 path = name.split(".") 662 path_so_far = [] 663 module = None 664 665 for p in path: 666 667 # Get the module's filesystem details. 668 669 if not path_so_far: 670 m = self.find_in_path(p) 671 elif d: 672 m = self.find(d, p) 673 else: 674 m = None 675 676 path_so_far.append(p) 677 module_name = ".".join(path_so_far) 678 679 if not m: 680 if self.verbose: 681 print >>sys.stderr, "Not found (%s)" % name 682 683 return None # NOTE: Import error. 684 685 # Get the module itself. 686 687 d, filename = m 688 module = self.load_from_file(filename, module_name) 689 690 return module 691 692 def load_from_file(self, filename, module_name=None): 693 694 "Load the module from the given 'filename'." 695 696 if module_name is None: 697 module_name = "__main__" 698 699 module = self.modules.get(module_name) 700 701 if not module: 702 703 # Try to load from cache. 704 705 module = self.load_from_cache(filename, module_name) 706 if module: 707 return module 708 709 # If no cache entry exists, load from file. 710 711 module = inspector.InspectedModule(module_name, self) 712 self.add_module(module_name, module) 713 self.update_cache_validity(module) 714 715 self._load(module, module_name, lambda m: m.parse, filename) 716 717 return module 718 719 def update_cache_validity(self, module): 720 721 "Make 'module' valid in the cache, but invalidate accessing modules." 722 723 accessing = self.accessing_modules.get(module.name) 724 if accessing: 725 self.invalidated.update(accessing) 726 if module.name in self.invalidated: 727 self.invalidated.remove(module.name) 728 729 def source_is_new(self, filename, module_name): 730 731 "Return whether 'filename' is newer than the cached 'module_name'." 732 733 if self.cache: 734 cache_filename = join(self.cache, module_name) 735 return not exists(cache_filename) or \ 736 getmtime(filename) > getmtime(cache_filename) or \ 737 module_name in self.invalidated 738 else: 739 return True 740 741 def load_from_cache(self, filename, module_name): 742 743 "Return a module residing in the cache." 744 745 module = self.modules.get(module_name) 746 747 if not module and not self.source_is_new(filename, module_name): 748 module = CachedModule(module_name, self) 749 self.add_module(module_name, module) 750 751 filename = join(self.cache, module_name) 752 self._load(module, module_name, lambda m: m.from_cache, filename) 753 754 return module 755 756 def _load(self, module, module_name, fn, filename): 757 758 """ 759 Load 'module' for the given 'module_name', and with 'fn' performing an 760 invocation on the module with the given 'filename'. 761 """ 762 763 # Load the module. 764 765 if self.verbose: 766 print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename 767 fn(module)(filename) 768 769 # Add the module object if not already defined. 770 771 if not self.objects.has_key(module_name): 772 self.objects[module_name] = Reference("<module>", module_name) 773 774 def add_module(self, module_name, module): 775 776 """ 777 Return the module with the given 'module_name', adding a new module 778 object if one does not already exist. 779 """ 780 781 self.modules[module_name] = module 782 if module_name in self.to_import: 783 self.to_import.remove(module_name) 784 785 # vim: tabstop=4 expandtab shiftwidth=4