1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 # Module importing queue, required modules, removed modules and active 54 # modules in the final program. 55 56 self.to_import = set() 57 self.required = set(["__main__"]) 58 self.removed = {} 59 self.modules = {} 60 61 # Module relationships and invalidated cached modules. 62 63 self.accessing_modules = {} 64 self.invalidated = set() 65 66 # Basic program information. 67 68 self.objects = {} 69 self.classes = {} 70 self.function_parameters = {} 71 self.function_defaults = {} 72 self.function_targets = {} 73 self.function_arguments = {} 74 75 # Unresolved names. 76 77 self.missing = set() 78 79 # Derived information. 80 81 self.subclasses = {} 82 83 # Attributes of different object types. 84 85 self.all_class_attrs = {} 86 self.all_instance_attrs = {} 87 self.all_instance_attr_constants = {} 88 self.all_combined_attrs = {} 89 self.all_module_attrs = {} 90 self.all_shadowed_attrs = {} 91 92 # References to external names and aliases within program units. 93 94 self.all_name_references = {} 95 self.all_initialised_names = {} 96 self.all_aliased_names = {} 97 98 # General attribute accesses. 99 100 self.all_attr_accesses = {} 101 self.all_const_accesses = {} 102 self.all_attr_access_modifiers = {} 103 104 # Constant literals and values. 105 106 self.all_constants = {} 107 self.all_constant_values = {} 108 109 self.make_cache() 110 111 def make_cache(self): 112 if self.cache and not exists(self.cache): 113 makedirs(self.cache) 114 115 def check_cache(self, details): 116 117 """ 118 Check whether the cache applies for the given 'details', invalidating it 119 if it does not. 120 """ 121 122 recorded_details = self.get_cache_details() 123 124 if recorded_details != details: 125 self.remove_cache() 126 127 writefile(self.get_cache_details_filename(), details) 128 129 def get_cache_details_filename(self): 130 131 "Return the filename for the cache details." 132 133 return join(self.cache, "$details") 134 135 def get_cache_details(self): 136 137 "Return details of the cache." 138 139 details_filename = self.get_cache_details_filename() 140 141 if not exists(details_filename): 142 return None 143 else: 144 return readfile(details_filename) 145 146 def remove_cache(self): 147 148 "Remove the contents of the cache." 149 150 for filename in listdir(self.cache): 151 remove(join(self.cache, filename)) 152 153 def to_cache(self): 154 155 "Write modules to the cache." 156 157 if self.cache: 158 for module_name, module in self.modules.items(): 159 module.to_cache(join(self.cache, module_name)) 160 161 # Object retrieval and storage. 162 163 def get_object(self, name): 164 165 """ 166 Return a reference for the given 'name' or None if no such object 167 exists. 168 """ 169 170 return self.objects.get(name) 171 172 def set_object(self, name, value=None): 173 174 "Set the object with the given 'name' and the given 'value'." 175 176 if isinstance(value, Reference): 177 ref = value.alias(name) 178 else: 179 ref = Reference(value, name) 180 181 self.objects[name] = ref 182 183 # Identification of both stored object names and name references. 184 185 def identify(self, name): 186 187 "Identify 'name' using stored object and external name records." 188 189 return self.objects.get(name) or self.all_name_references.get(name) 190 191 # Indirect object retrieval. 192 193 def get_attributes(self, ref, attrname): 194 195 """ 196 Return attributes provided by 'ref' for 'attrname'. Class attributes 197 may be provided by instances. 198 """ 199 200 kind = ref.get_kind() 201 if kind == "<class>": 202 ref = self.get_class_attribute(ref.get_origin(), attrname) 203 return ref and set([ref]) or set() 204 elif kind == "<instance>": 205 return self.get_combined_attributes(ref.get_origin(), attrname) 206 elif kind == "<module>": 207 ref = self.get_module_attribute(ref.get_origin(), attrname) 208 return ref and set([ref]) or set() 209 else: 210 return set() 211 212 def get_class_attribute(self, object_type, attrname): 213 214 "Return from 'object_type' the details of class attribute 'attrname'." 215 216 attr = self.all_class_attrs[object_type].get(attrname) 217 return attr and self.get_object(attr) 218 219 def get_instance_attributes(self, object_type, attrname): 220 221 """ 222 Return from 'object_type' the details of instance attribute 'attrname'. 223 """ 224 225 consts = self.all_instance_attr_constants.get(object_type) 226 attrs = set() 227 for attr in self.all_instance_attrs[object_type].get(attrname, []): 228 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 229 return attrs 230 231 def get_combined_attributes(self, object_type, attrname): 232 233 """ 234 Return from 'object_type' the details of class or instance attribute 235 'attrname'. 236 """ 237 238 ref = self.get_class_attribute(object_type, attrname) 239 refs = ref and set([ref]) or set() 240 refs.update(self.get_instance_attributes(object_type, attrname)) 241 return refs 242 243 def get_module_attribute(self, object_type, attrname): 244 245 "Return from 'object_type' the details of module attribute 'attrname'." 246 247 if attrname in self.all_module_attrs[object_type]: 248 return self.get_object("%s.%s" % (object_type, attrname)) 249 else: 250 return None 251 252 # Module management. 253 254 def queue_module(self, name, accessor, required=False): 255 256 """ 257 Queue the module with the given 'name' for import from the given 258 'accessor' module. If 'required' is true (it is false by default), the 259 module will be required in the final program. 260 """ 261 262 if not self.modules.has_key(name): 263 self.to_import.add(name) 264 265 if required: 266 self.required.add(name) 267 268 init_item(self.accessing_modules, name, set) 269 self.accessing_modules[name].add(accessor.name) 270 271 def get_modules(self): 272 273 "Return all modules known to the importer." 274 275 return self.modules.values() 276 277 def get_module(self, name): 278 279 "Return the module with the given 'name'." 280 281 if not self.modules.has_key(name): 282 return None 283 284 return self.modules[name] 285 286 # Program operations. 287 288 def initialise(self, filename, reset=False): 289 290 """ 291 Initialise a program whose main module is 'filename', resetting the 292 cache if 'reset' is true. Return the main module. 293 """ 294 295 if reset: 296 self.remove_cache() 297 self.check_cache(filename) 298 299 # Load the program itself. 300 301 m = self.load_from_file(filename) 302 303 # Load any queued modules. 304 305 while self.to_import: 306 for name in list(self.to_import): # avoid mutation issue 307 self.load(name) 308 309 # Resolve dependencies between modules. 310 311 self.resolve() 312 313 # Record the type of all classes. 314 315 self.type_ref = self.get_object("__builtins__.type") 316 317 # Resolve dependencies within the program. 318 319 for module in self.modules.values(): 320 module.complete() 321 322 # Remove unneeded modules. 323 324 all_modules = self.modules.items() 325 326 for name, module in all_modules: 327 if name not in self.required: 328 module.unpropagate() 329 del self.modules[name] 330 self.removed[name] = module 331 332 return m 333 334 def finalise(self): 335 336 """ 337 Finalise the inspected program, returning whether the program could be 338 finalised. 339 """ 340 341 if self.missing: 342 return False 343 344 self.finalise_classes() 345 self.to_cache() 346 self.set_class_types() 347 self.define_instantiators() 348 self.collect_constants() 349 350 return True 351 352 # Supporting operations. 353 354 def resolve(self): 355 356 "Resolve dependencies between modules." 357 358 self.waiting = {} 359 360 for module in self.modules.values(): 361 362 # Resolve all deferred references in each module. 363 364 for ref in module.deferred: 365 found = self.find_dependency(ref) 366 if not found: 367 self.missing.add((module.name, ref.get_origin())) 368 369 # Record the resolved names and identify required modules. 370 371 else: 372 ref.mutate(found) 373 374 # Find the providing module of this reference. 375 376 provider = self.get_module_provider(ref) 377 if provider: 378 379 module.required.add(provider) 380 self.accessing_modules[provider].add(module.name) 381 382 # Postpone any inclusion of the provider until this 383 # module becomes required. 384 385 if module.name not in self.required: 386 init_item(self.waiting, module.name, set) 387 self.waiting[module.name].add(provider) 388 389 # Make this module required in the accessing module. 390 391 elif provider not in self.required: 392 self.required.add(provider) 393 if self.verbose: 394 print >>sys.stderr, "Requiring", provider, "for", ref 395 396 # Check modules again to see if they are now required and should now 397 # cause the inclusion of other modules providing objects to the program. 398 399 for module_name in self.waiting.keys(): 400 self.require_providers(module_name) 401 402 def require_providers(self, module_name): 403 404 """ 405 Test if 'module_name' is itself required and, if so, require modules 406 containing objects provided to the module. 407 """ 408 409 if module_name in self.required and self.waiting.has_key(module_name): 410 for provider in self.waiting[module_name]: 411 if provider not in self.required: 412 self.required.add(provider) 413 if self.verbose: 414 print >>sys.stderr, "Requiring", provider 415 self.require_providers(provider) 416 417 def find_dependency(self, ref): 418 419 "Find the ultimate dependency for 'ref'." 420 421 found = set() 422 while ref and ref.has_kind("<depends>") and not ref in found: 423 found.add(ref) 424 ref = self.identify(ref.get_origin()) 425 return ref 426 427 def get_module_provider(self, ref): 428 429 "Identify the provider of the given 'ref'." 430 431 for ancestor in ref.ancestors(): 432 if self.modules.has_key(ancestor): 433 return ancestor 434 return None 435 436 def finalise_classes(self): 437 438 "Finalise the class relationships and attributes." 439 440 self.derive_inherited_attrs() 441 self.derive_subclasses() 442 self.derive_shadowed_attrs() 443 444 def derive_inherited_attrs(self): 445 446 "Derive inherited attributes for classes throughout the program." 447 448 for name in self.classes.keys(): 449 self.propagate_attrs_for_class(name) 450 451 def propagate_attrs_for_class(self, name, visited=None): 452 453 "Propagate inherited attributes for class 'name'." 454 455 # Visit classes only once. 456 457 if self.all_combined_attrs.has_key(name): 458 return 459 460 visited = visited or [] 461 462 if name in visited: 463 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 464 465 visited.append(name) 466 467 class_attrs = {} 468 instance_attrs = {} 469 470 # Aggregate the attributes from base classes, recording the origins of 471 # applicable attributes. 472 473 for base in self.classes[name][::-1]: 474 475 # Get the identity of the class from the reference. 476 477 base = base.get_origin() 478 479 # Define the base class completely before continuing with this 480 # class. 481 482 self.propagate_attrs_for_class(base, visited) 483 class_attrs.update(self.all_class_attrs[base]) 484 485 # Instance attribute origins are combined if different. 486 487 for key, values in self.all_instance_attrs[base].items(): 488 init_item(instance_attrs, key, set) 489 instance_attrs[key].update(values) 490 491 # Class attributes override those defined earlier in the hierarchy. 492 493 class_attrs.update(self.all_class_attrs.get(name, {})) 494 495 # Instance attributes are merely added if not already defined. 496 497 for key in self.all_instance_attrs.get(name, []): 498 if not instance_attrs.has_key(key): 499 instance_attrs[key] = set(["%s.%s" % (name, key)]) 500 501 self.all_class_attrs[name] = class_attrs 502 self.all_instance_attrs[name] = instance_attrs 503 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 504 505 def derive_subclasses(self): 506 507 "Derive subclass details for classes." 508 509 for name, bases in self.classes.items(): 510 for base in bases: 511 512 # Get the identity of the class from the reference. 513 514 base = base.get_origin() 515 self.subclasses[base].add(name) 516 517 def derive_shadowed_attrs(self): 518 519 "Derive shadowed attributes for classes." 520 521 for name, attrs in self.all_instance_attrs.items(): 522 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 523 if attrs: 524 self.all_shadowed_attrs[name] = attrs 525 526 def set_class_types(self): 527 528 "Set the type of each class." 529 530 for attrs in self.all_class_attrs.values(): 531 attrs["__class__"] = self.type_ref.get_origin() 532 533 def define_instantiators(self): 534 535 """ 536 Consolidate parameter and default details, incorporating initialiser 537 details to define instantiator signatures. 538 """ 539 540 for cls, attrs in self.all_class_attrs.items(): 541 initialiser = attrs["__init__"] 542 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 543 self.function_defaults[cls] = self.function_defaults[initialiser] 544 545 def collect_constants(self): 546 547 "Get constants from all active modules." 548 549 for module in self.modules.values(): 550 self.all_constants.update(module.constants) 551 552 # Import methods. 553 554 def find_in_path(self, name): 555 556 """ 557 Find the given module 'name' in the search path, returning None where no 558 such module could be found, or a 2-tuple from the 'find' method 559 otherwise. 560 """ 561 562 for d in self.path: 563 m = self.find(d, name) 564 if m: return m 565 return None 566 567 def find(self, d, name): 568 569 """ 570 In the directory 'd', find the given module 'name', where 'name' can 571 either refer to a single file module or to a package. Return None if the 572 'name' cannot be associated with either a file or a package directory, 573 or a 2-tuple from '_find_package' or '_find_module' otherwise. 574 """ 575 576 m = self._find_package(d, name) 577 if m: return m 578 m = self._find_module(d, name) 579 if m: return m 580 return None 581 582 def _find_module(self, d, name): 583 584 """ 585 In the directory 'd', find the given module 'name', returning None where 586 no suitable file exists in the directory, or a 2-tuple consisting of 587 None (indicating that no package directory is involved) and a filename 588 indicating the location of the module. 589 """ 590 591 name_py = name + extsep + "py" 592 filename = self._find_file(d, name_py) 593 if filename: 594 return None, filename 595 return None 596 597 def _find_package(self, d, name): 598 599 """ 600 In the directory 'd', find the given package 'name', returning None 601 where no suitable package directory exists, or a 2-tuple consisting of 602 a directory (indicating the location of the package directory itself) 603 and a filename indicating the location of the __init__.py module which 604 declares the package's top-level contents. 605 """ 606 607 filename = self._find_file(d, name) 608 if filename: 609 init_py = "__init__" + extsep + "py" 610 init_py_filename = self._find_file(filename, init_py) 611 if init_py_filename: 612 return filename, init_py_filename 613 return None 614 615 def _find_file(self, d, filename): 616 617 """ 618 Return the filename obtained when searching the directory 'd' for the 619 given 'filename', or None if no actual file exists for the filename. 620 """ 621 622 filename = join(d, filename) 623 if exists(filename): 624 return filename 625 else: 626 return None 627 628 def load(self, name): 629 630 """ 631 Load the module or package with the given 'name'. Return an object 632 referencing the loaded module or package, or None if no such module or 633 package exists. 634 """ 635 636 # Loaded modules are returned immediately. 637 # Modules may be known but not yet loading (having been registered as 638 # submodules), loading, loaded, or completely unknown. 639 640 module = self.get_module(name) 641 642 if module: 643 return self.modules[name] 644 645 # Otherwise, modules are loaded. 646 647 # Split the name into path components, and try to find the uppermost in 648 # the search path. 649 650 path = name.split(".") 651 path_so_far = [] 652 module = None 653 654 for p in path: 655 656 # Get the module's filesystem details. 657 658 if not path_so_far: 659 m = self.find_in_path(p) 660 elif d: 661 m = self.find(d, p) 662 else: 663 m = None 664 665 path_so_far.append(p) 666 module_name = ".".join(path_so_far) 667 668 if not m: 669 if self.verbose: 670 print >>sys.stderr, "Not found (%s)" % name 671 672 return None # NOTE: Import error. 673 674 # Get the module itself. 675 676 d, filename = m 677 module = self.load_from_file(filename, module_name) 678 679 return module 680 681 def load_from_file(self, filename, module_name=None): 682 683 "Load the module from the given 'filename'." 684 685 if module_name is None: 686 module_name = "__main__" 687 688 module = self.modules.get(module_name) 689 690 if not module: 691 692 # Try to load from cache. 693 694 module = self.load_from_cache(filename, module_name) 695 if module: 696 return module 697 698 # If no cache entry exists, load from file. 699 700 module = inspector.InspectedModule(module_name, self) 701 self.add_module(module_name, module) 702 self.update_cache_validity(module) 703 704 self._load(module, module_name, lambda m: m.parse, filename) 705 706 return module 707 708 def update_cache_validity(self, module): 709 710 "Make 'module' valid in the cache, but invalidate accessing modules." 711 712 accessing = self.accessing_modules.get(module.name) 713 if accessing: 714 self.invalidated.update(accessing) 715 if module.name in self.invalidated: 716 self.invalidated.remove(module.name) 717 718 def source_is_new(self, filename, module_name): 719 720 "Return whether 'filename' is newer than the cached 'module_name'." 721 722 if self.cache: 723 cache_filename = join(self.cache, module_name) 724 return not exists(cache_filename) or \ 725 getmtime(filename) > getmtime(cache_filename) or \ 726 module_name in self.invalidated 727 else: 728 return True 729 730 def load_from_cache(self, filename, module_name): 731 732 "Return a module residing in the cache." 733 734 module = self.modules.get(module_name) 735 736 if not module and not self.source_is_new(filename, module_name): 737 module = CachedModule(module_name, self) 738 self.add_module(module_name, module) 739 740 filename = join(self.cache, module_name) 741 self._load(module, module_name, lambda m: m.from_cache, filename) 742 743 return module 744 745 def _load(self, module, module_name, fn, filename): 746 747 """ 748 Load 'module' for the given 'module_name', and with 'fn' performing an 749 invocation on the module with the given 'filename'. 750 """ 751 752 # Load the module. 753 754 if self.verbose: 755 print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename 756 fn(module)(filename) 757 758 # Add the module object if not already defined. 759 760 if not self.objects.has_key(module_name): 761 self.objects[module_name] = Reference("<module>", module_name) 762 763 def add_module(self, module_name, module): 764 765 """ 766 Return the module with the given 'module_name', adding a new module 767 object if one does not already exist. 768 """ 769 770 self.modules[module_name] = module 771 if module_name in self.to_import: 772 self.to_import.remove(module_name) 773 774 # vim: tabstop=4 expandtab shiftwidth=4