1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 self.to_import = set() 54 self.required = set(["__main__"]) 55 self.removed = {} 56 57 self.modules = {} 58 self.accessing_modules = {} 59 self.invalidated = set() 60 61 self.objects = {} 62 self.classes = {} 63 self.function_parameters = {} 64 self.function_defaults = {} 65 self.function_targets = {} 66 self.function_arguments = {} 67 68 # Derived information. 69 70 self.subclasses = {} 71 72 # Attributes of different object types. 73 74 self.all_class_attrs = {} 75 self.all_instance_attrs = {} 76 self.all_instance_attr_constants = {} 77 self.all_combined_attrs = {} 78 self.all_module_attrs = {} 79 self.all_shadowed_attrs = {} 80 81 # References to external names and aliases within program units. 82 83 self.all_name_references = {} 84 self.all_initialised_names = {} 85 self.all_aliased_names = {} 86 87 # General attribute accesses. 88 89 self.all_attr_accesses = {} 90 self.all_const_accesses = {} 91 self.all_attr_access_modifiers = {} 92 93 # Constant literals and values. 94 95 self.all_constants = {} 96 self.all_constant_values = {} 97 98 self.make_cache() 99 100 def make_cache(self): 101 if self.cache and not exists(self.cache): 102 makedirs(self.cache) 103 104 def check_cache(self, details): 105 106 """ 107 Check whether the cache applies for the given 'details', invalidating it 108 if it does not. 109 """ 110 111 recorded_details = self.get_cache_details() 112 113 if recorded_details != details: 114 self.remove_cache() 115 116 writefile(self.get_cache_details_filename(), details) 117 118 def get_cache_details_filename(self): 119 120 "Return the filename for the cache details." 121 122 return join(self.cache, "$details") 123 124 def get_cache_details(self): 125 126 "Return details of the cache." 127 128 details_filename = self.get_cache_details_filename() 129 130 if not exists(details_filename): 131 return None 132 else: 133 return readfile(details_filename) 134 135 def remove_cache(self): 136 137 "Remove the contents of the cache." 138 139 for filename in listdir(self.cache): 140 remove(join(self.cache, filename)) 141 142 def to_cache(self): 143 144 "Write modules to the cache." 145 146 if self.cache: 147 for module_name, module in self.modules.items(): 148 module.to_cache(join(self.cache, module_name)) 149 150 # Object retrieval and storage. 151 152 def get_object(self, name): 153 154 """ 155 Return a reference for the given 'name' or None if no such object 156 exists. 157 """ 158 159 return self.objects.get(name) 160 161 def set_object(self, name, value=None): 162 163 "Set the object with the given 'name' and the given 'value'." 164 165 if isinstance(value, Reference): 166 ref = value.alias(name) 167 else: 168 ref = Reference(value, name) 169 170 self.objects[name] = ref 171 172 # Identification of both stored object names and name references. 173 174 def identify(self, name): 175 176 "Identify 'name' using stored object and external name records." 177 178 return self.objects.get(name) or self.all_name_references.get(name) 179 180 # Indirect object retrieval. 181 182 def get_attributes(self, ref, attrname): 183 184 """ 185 Return attributes provided by 'ref' for 'attrname'. Class attributes 186 may be provided by instances. 187 """ 188 189 kind = ref.get_kind() 190 if kind == "<class>": 191 ref = self.get_class_attribute(ref.get_origin(), attrname) 192 return ref and set([ref]) or set() 193 elif kind == "<instance>": 194 return self.get_combined_attributes(ref.get_origin(), attrname) 195 elif kind == "<module>": 196 ref = self.get_module_attribute(ref.get_origin(), attrname) 197 return ref and set([ref]) or set() 198 else: 199 return set() 200 201 def get_class_attribute(self, object_type, attrname): 202 203 "Return from 'object_type' the details of class attribute 'attrname'." 204 205 attr = self.all_class_attrs[object_type].get(attrname) 206 return attr and self.get_object(attr) 207 208 def get_instance_attributes(self, object_type, attrname): 209 210 """ 211 Return from 'object_type' the details of instance attribute 'attrname'. 212 """ 213 214 consts = self.all_instance_attr_constants.get(object_type) 215 attrs = set() 216 for attr in self.all_instance_attrs[object_type].get(attrname, []): 217 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 218 return attrs 219 220 def get_combined_attributes(self, object_type, attrname): 221 222 """ 223 Return from 'object_type' the details of class or instance attribute 224 'attrname'. 225 """ 226 227 ref = self.get_class_attribute(object_type, attrname) 228 refs = ref and set([ref]) or set() 229 refs.update(self.get_instance_attributes(object_type, attrname)) 230 return refs 231 232 def get_module_attribute(self, object_type, attrname): 233 234 "Return from 'object_type' the details of module attribute 'attrname'." 235 236 if attrname in self.all_module_attrs[object_type]: 237 return self.get_object("%s.%s" % (object_type, attrname)) 238 else: 239 return None 240 241 # Module management. 242 243 def queue_module(self, name, accessor, required=False): 244 245 """ 246 Queue the module with the given 'name' for import from the given 247 'accessor' module. If 'required' is true (it is false by default), the 248 module will be required in the final program. 249 """ 250 251 if not self.modules.has_key(name): 252 self.to_import.add(name) 253 254 if required: 255 self.required.add(name) 256 257 init_item(self.accessing_modules, name, set) 258 self.accessing_modules[name].add(accessor.name) 259 260 def get_modules(self): 261 262 "Return all modules known to the importer." 263 264 return self.modules.values() 265 266 def get_module(self, name): 267 268 "Return the module with the given 'name'." 269 270 if not self.modules.has_key(name): 271 return None 272 273 return self.modules[name] 274 275 # Program operations. 276 277 def initialise(self, filename, reset=False): 278 279 """ 280 Initialise a program whose main module is 'filename', resetting the 281 cache if 'reset' is true. Return the main module. 282 """ 283 284 if reset: 285 self.remove_cache() 286 self.check_cache(filename) 287 288 # Load the program itself. 289 290 m = self.load_from_file(filename) 291 292 # Load any queued modules. 293 294 while self.to_import: 295 for name in list(self.to_import): # avoid mutation issue 296 self.load(name) 297 298 # Resolve dependencies between modules. 299 300 self.resolve() 301 302 # Record the type of all classes. 303 304 self.type_ref = self.get_object("__builtins__.type") 305 306 # Resolve dependencies within the program. 307 308 for module in self.modules.values(): 309 module.complete() 310 311 # Remove unneeded modules. 312 313 all_modules = self.modules.items() 314 315 for name, module in all_modules: 316 if name not in self.required: 317 module.unpropagate() 318 del self.modules[name] 319 self.removed[name] = module 320 321 return m 322 323 def finalise(self): 324 325 "Finalise the inspected program." 326 327 self.finalise_classes() 328 self.to_cache() 329 self.set_class_types() 330 self.define_instantiators() 331 self.collect_constants() 332 333 # Supporting operations. 334 335 def resolve(self): 336 337 "Resolve dependencies between modules." 338 339 for d in [self.objects, self.all_name_references]: 340 resolved = {} 341 342 for name, ref in d.items(): 343 if ref.has_kind("<depends>"): 344 found = self.find_dependency(ref) 345 if found: 346 resolved[name] = found 347 else: 348 print >>sys.stderr, "Name %s references an unknown object: %s" % (name, ref.get_origin()) 349 350 # Record the resolved names and identify required modules. 351 352 for name, ref in resolved.items(): 353 d[name] = ref 354 355 # Find the providing module of this reference. 356 357 module_name = self.get_module_provider(ref) 358 if module_name: 359 self.required.add(module_name) 360 361 # Make this module required in all accessing modules. 362 363 for accessor_name in self.accessing_modules[module_name]: 364 self.modules[accessor_name].required.add(module_name) 365 366 def find_dependency(self, ref): 367 368 "Find the ultimate dependency for 'ref'." 369 370 found = set() 371 while ref and ref.has_kind("<depends>") and not ref in found: 372 found.add(ref) 373 ref = self.objects.get(ref.get_origin()) 374 return ref 375 376 def get_module_provider(self, ref): 377 378 "Identify the provider of the given 'ref'." 379 380 for ancestor in ref.ancestors(): 381 if self.modules.has_key(ancestor): 382 return ancestor 383 return None 384 385 def finalise_classes(self): 386 387 "Finalise the class relationships and attributes." 388 389 self.derive_inherited_attrs() 390 self.derive_subclasses() 391 self.derive_shadowed_attrs() 392 393 def derive_inherited_attrs(self): 394 395 "Derive inherited attributes for classes throughout the program." 396 397 for name in self.classes.keys(): 398 self.propagate_attrs_for_class(name) 399 400 def propagate_attrs_for_class(self, name, visited=None): 401 402 "Propagate inherited attributes for class 'name'." 403 404 # Visit classes only once. 405 406 if self.all_combined_attrs.has_key(name): 407 return 408 409 visited = visited or [] 410 411 if name in visited: 412 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 413 414 visited.append(name) 415 416 class_attrs = {} 417 instance_attrs = {} 418 419 # Aggregate the attributes from base classes, recording the origins of 420 # applicable attributes. 421 422 for base in self.classes[name][::-1]: 423 424 # Get the identity of the class from the reference. 425 426 base = base.get_origin() 427 428 # Define the base class completely before continuing with this 429 # class. 430 431 self.propagate_attrs_for_class(base, visited) 432 class_attrs.update(self.all_class_attrs[base]) 433 434 # Instance attribute origins are combined if different. 435 436 for key, values in self.all_instance_attrs[base].items(): 437 init_item(instance_attrs, key, set) 438 instance_attrs[key].update(values) 439 440 # Class attributes override those defined earlier in the hierarchy. 441 442 class_attrs.update(self.all_class_attrs.get(name, {})) 443 444 # Instance attributes are merely added if not already defined. 445 446 for key in self.all_instance_attrs.get(name, []): 447 if not instance_attrs.has_key(key): 448 instance_attrs[key] = set(["%s.%s" % (name, key)]) 449 450 self.all_class_attrs[name] = class_attrs 451 self.all_instance_attrs[name] = instance_attrs 452 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 453 454 def derive_subclasses(self): 455 456 "Derive subclass details for classes." 457 458 for name, bases in self.classes.items(): 459 for base in bases: 460 461 # Get the identity of the class from the reference. 462 463 base = base.get_origin() 464 self.subclasses[base].add(name) 465 466 def derive_shadowed_attrs(self): 467 468 "Derive shadowed attributes for classes." 469 470 for name, attrs in self.all_instance_attrs.items(): 471 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 472 if attrs: 473 self.all_shadowed_attrs[name] = attrs 474 475 def set_class_types(self): 476 477 "Set the type of each class." 478 479 for attrs in self.all_class_attrs.values(): 480 attrs["__class__"] = self.type_ref.get_origin() 481 482 def define_instantiators(self): 483 484 """ 485 Consolidate parameter and default details, incorporating initialiser 486 details to define instantiator signatures. 487 """ 488 489 for cls, attrs in self.all_class_attrs.items(): 490 initialiser = attrs["__init__"] 491 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 492 self.function_defaults[cls] = self.function_defaults[initialiser] 493 494 def collect_constants(self): 495 496 "Get constants from all active modules." 497 498 for module in self.modules.values(): 499 self.all_constants.update(module.constants) 500 501 # Import methods. 502 503 def find_in_path(self, name): 504 505 """ 506 Find the given module 'name' in the search path, returning None where no 507 such module could be found, or a 2-tuple from the 'find' method 508 otherwise. 509 """ 510 511 for d in self.path: 512 m = self.find(d, name) 513 if m: return m 514 return None 515 516 def find(self, d, name): 517 518 """ 519 In the directory 'd', find the given module 'name', where 'name' can 520 either refer to a single file module or to a package. Return None if the 521 'name' cannot be associated with either a file or a package directory, 522 or a 2-tuple from '_find_package' or '_find_module' otherwise. 523 """ 524 525 m = self._find_package(d, name) 526 if m: return m 527 m = self._find_module(d, name) 528 if m: return m 529 return None 530 531 def _find_module(self, d, name): 532 533 """ 534 In the directory 'd', find the given module 'name', returning None where 535 no suitable file exists in the directory, or a 2-tuple consisting of 536 None (indicating that no package directory is involved) and a filename 537 indicating the location of the module. 538 """ 539 540 name_py = name + extsep + "py" 541 filename = self._find_file(d, name_py) 542 if filename: 543 return None, filename 544 return None 545 546 def _find_package(self, d, name): 547 548 """ 549 In the directory 'd', find the given package 'name', returning None 550 where no suitable package directory exists, or a 2-tuple consisting of 551 a directory (indicating the location of the package directory itself) 552 and a filename indicating the location of the __init__.py module which 553 declares the package's top-level contents. 554 """ 555 556 filename = self._find_file(d, name) 557 if filename: 558 init_py = "__init__" + extsep + "py" 559 init_py_filename = self._find_file(filename, init_py) 560 if init_py_filename: 561 return filename, init_py_filename 562 return None 563 564 def _find_file(self, d, filename): 565 566 """ 567 Return the filename obtained when searching the directory 'd' for the 568 given 'filename', or None if no actual file exists for the filename. 569 """ 570 571 filename = join(d, filename) 572 if exists(filename): 573 return filename 574 else: 575 return None 576 577 def load(self, name): 578 579 """ 580 Load the module or package with the given 'name'. Return an object 581 referencing the loaded module or package, or None if no such module or 582 package exists. 583 """ 584 585 # Loaded modules are returned immediately. 586 # Modules may be known but not yet loading (having been registered as 587 # submodules), loading, loaded, or completely unknown. 588 589 module = self.get_module(name) 590 591 if module: 592 return self.modules[name] 593 594 # Otherwise, modules are loaded. 595 596 if self.verbose: 597 print >>sys.stderr, "Loading", name 598 599 # Split the name into path components, and try to find the uppermost in 600 # the search path. 601 602 path = name.split(".") 603 path_so_far = [] 604 module = None 605 606 for p in path: 607 608 # Get the module's filesystem details. 609 610 if not path_so_far: 611 m = self.find_in_path(p) 612 elif d: 613 m = self.find(d, p) 614 else: 615 m = None 616 617 path_so_far.append(p) 618 module_name = ".".join(path_so_far) 619 620 if not m: 621 if self.verbose: 622 print >>sys.stderr, "Not found (%s)" % name 623 624 return None # NOTE: Import error. 625 626 # Get the module itself. 627 628 d, filename = m 629 module = self.load_from_file(filename, module_name) 630 631 return module 632 633 def load_from_file(self, filename, module_name=None): 634 635 "Load the module from the given 'filename'." 636 637 if module_name is None: 638 module_name = "__main__" 639 640 module = self.modules.get(module_name) 641 642 if not module: 643 644 # Try to load from cache. 645 646 module = self.load_from_cache(filename, module_name) 647 if module: 648 return module 649 650 # If no cache entry exists, load from file. 651 652 module = inspector.InspectedModule(module_name, self) 653 self.add_module(module_name, module) 654 self.update_cache_validity(module) 655 656 self._load(module, module_name, lambda m: m.parse, filename) 657 658 return module 659 660 def update_cache_validity(self, module): 661 662 "Make 'module' valid in the cache, but invalidate accessing modules." 663 664 accessing = self.accessing_modules.get(module.name) 665 if accessing: 666 self.invalidated.update(accessing) 667 if module.name in self.invalidated: 668 self.invalidated.remove(module.name) 669 670 def source_is_new(self, filename, module_name): 671 672 "Return whether 'filename' is newer than the cached 'module_name'." 673 674 if self.cache: 675 cache_filename = join(self.cache, module_name) 676 return not exists(cache_filename) or \ 677 getmtime(filename) > getmtime(cache_filename) or \ 678 module_name in self.invalidated 679 else: 680 return True 681 682 def load_from_cache(self, filename, module_name): 683 684 "Return a module residing in the cache." 685 686 module = self.modules.get(module_name) 687 688 if not module and not self.source_is_new(filename, module_name): 689 module = CachedModule(module_name, self) 690 self.add_module(module_name, module) 691 692 filename = join(self.cache, module_name) 693 self._load(module, module_name, lambda m: m.from_cache, filename) 694 695 return module 696 697 def _load(self, module, module_name, fn, filename): 698 699 """ 700 Load 'module' for the given 'module_name', and with 'fn' performing an 701 invocation on the module with the given 'filename'. 702 """ 703 704 # Load the module. 705 706 if self.verbose: 707 print >>sys.stderr, "Loading", filename 708 fn(module)(filename) 709 if self.verbose: 710 print >>sys.stderr, "Loaded", filename 711 712 def add_module(self, module_name, module): 713 714 """ 715 Return the module with the given 'module_name', adding a new module 716 object if one does not already exist. 717 """ 718 719 self.modules[module_name] = module 720 self.objects[module_name] = Reference("<module>", module_name) 721 if module_name in self.to_import: 722 self.to_import.remove(module_name) 723 724 # vim: tabstop=4 expandtab shiftwidth=4