1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from modules import CachedModule 28 from referencing import Reference 29 import inspector 30 import sys 31 32 class Importer: 33 34 "An import machine, searching for and loading modules." 35 36 def __init__(self, path, cache=None, verbose=False): 37 38 """ 39 Initialise the importer with the given search 'path' - a list of 40 directories to search for Python modules. 41 42 The optional 'cache' should be the name of a directory used to store 43 cached module information. 44 45 The optional 'verbose' parameter causes output concerning the activities 46 of the object to be produced if set to a true value (not the default). 47 """ 48 49 self.path = path 50 self.cache = cache 51 self.verbose = verbose 52 53 self.to_import = set() 54 self.required = set(["__main__"]) 55 self.removed = {} 56 57 self.modules = {} 58 self.accessing_modules = {} 59 self.invalidated = set() 60 61 self.objects = {} 62 self.classes = {} 63 self.function_parameters = {} 64 self.function_defaults = {} 65 self.function_targets = {} 66 self.function_arguments = {} 67 68 # Derived information. 69 70 self.subclasses = {} 71 72 # Attributes of different object types. 73 74 self.all_class_attrs = {} 75 self.all_instance_attrs = {} 76 self.all_instance_attr_constants = {} 77 self.all_combined_attrs = {} 78 self.all_module_attrs = {} 79 self.all_shadowed_attrs = {} 80 81 # References to external names and aliases within program units. 82 83 self.all_name_references = {} 84 self.all_initialised_names = {} 85 self.all_aliased_names = {} 86 87 # General attribute accesses. 88 89 self.all_attr_accesses = {} 90 self.all_const_accesses = {} 91 self.all_attr_access_modifiers = {} 92 93 # Constant literals and values. 94 95 self.all_constants = {} 96 self.all_constant_values = {} 97 98 self.make_cache() 99 100 def make_cache(self): 101 if self.cache and not exists(self.cache): 102 makedirs(self.cache) 103 104 def check_cache(self, details): 105 106 """ 107 Check whether the cache applies for the given 'details', invalidating it 108 if it does not. 109 """ 110 111 recorded_details = self.get_cache_details() 112 113 if recorded_details != details: 114 self.remove_cache() 115 116 writefile(self.get_cache_details_filename(), details) 117 118 def get_cache_details_filename(self): 119 120 "Return the filename for the cache details." 121 122 return join(self.cache, "$details") 123 124 def get_cache_details(self): 125 126 "Return details of the cache." 127 128 details_filename = self.get_cache_details_filename() 129 130 if not exists(details_filename): 131 return None 132 else: 133 return readfile(details_filename) 134 135 def remove_cache(self): 136 137 "Remove the contents of the cache." 138 139 for filename in listdir(self.cache): 140 remove(join(self.cache, filename)) 141 142 def to_cache(self): 143 144 "Write modules to the cache." 145 146 if self.cache: 147 for module_name, module in self.modules.items(): 148 module.to_cache(join(self.cache, module_name)) 149 150 # Object retrieval and storage. 151 152 def get_object(self, name): 153 154 """ 155 Return a reference for the given 'name' or None if no such object 156 exists. 157 """ 158 159 return self.objects.get(name) 160 161 def set_object(self, name, value=None): 162 163 "Set the object with the given 'name' and the given 'value'." 164 165 if isinstance(value, Reference): 166 ref = value.alias(name) 167 else: 168 ref = Reference(value, name) 169 170 self.objects[name] = ref 171 172 # Indirect object retrieval. 173 174 def get_attributes(self, ref, attrname): 175 176 """ 177 Return attributes provided by 'ref' for 'attrname'. Class attributes 178 may be provided by instances. 179 """ 180 181 kind = ref.get_kind() 182 if kind == "<class>": 183 ref = self.get_class_attribute(ref.get_origin(), attrname) 184 return ref and set([ref]) or set() 185 elif kind == "<instance>": 186 return self.get_combined_attributes(ref.get_origin(), attrname) 187 elif kind == "<module>": 188 ref = self.get_module_attribute(ref.get_origin(), attrname) 189 return ref and set([ref]) or set() 190 else: 191 return set() 192 193 def get_class_attribute(self, object_type, attrname): 194 195 "Return from 'object_type' the details of class attribute 'attrname'." 196 197 attr = self.all_class_attrs[object_type].get(attrname) 198 return attr and self.get_object(attr) 199 200 def get_instance_attributes(self, object_type, attrname): 201 202 """ 203 Return from 'object_type' the details of instance attribute 'attrname'. 204 """ 205 206 consts = self.all_instance_attr_constants.get(object_type) 207 attrs = set() 208 for attr in self.all_instance_attrs[object_type].get(attrname, []): 209 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 210 return attrs 211 212 def get_combined_attributes(self, object_type, attrname): 213 214 """ 215 Return from 'object_type' the details of class or instance attribute 216 'attrname'. 217 """ 218 219 ref = self.get_class_attribute(object_type, attrname) 220 refs = ref and set([ref]) or set() 221 refs.update(self.get_instance_attributes(object_type, attrname)) 222 return refs 223 224 def get_module_attribute(self, object_type, attrname): 225 226 "Return from 'object_type' the details of module attribute 'attrname'." 227 228 if attrname in self.all_module_attrs[object_type]: 229 return self.get_object("%s.%s" % (object_type, attrname)) 230 else: 231 return None 232 233 # Module management. 234 235 def queue_module(self, name, accessor, required=False): 236 237 """ 238 Queue the module with the given 'name' for import from the given 239 'accessor' module. If 'required' is true (it is false by default), the 240 module will be required in the final program. 241 """ 242 243 if not self.modules.has_key(name): 244 self.to_import.add(name) 245 246 if required: 247 self.required.add(name) 248 249 init_item(self.accessing_modules, name, set) 250 self.accessing_modules[name].add(accessor.name) 251 252 def get_modules(self): 253 254 "Return all modules known to the importer." 255 256 return self.modules.values() 257 258 def get_module(self, name): 259 260 "Return the module with the given 'name'." 261 262 if not self.modules.has_key(name): 263 return None 264 265 return self.modules[name] 266 267 # Program operations. 268 269 def initialise(self, filename, reset=False): 270 271 """ 272 Initialise a program whose main module is 'filename', resetting the 273 cache if 'reset' is true. Return the main module. 274 """ 275 276 if reset: 277 self.remove_cache() 278 self.check_cache(filename) 279 280 # Load the program itself. 281 282 m = self.load_from_file(filename) 283 284 # Load any queued modules. 285 286 while self.to_import: 287 for name in list(self.to_import): # avoid mutation issue 288 self.load(name) 289 290 # Resolve dependencies between modules. 291 292 self.resolve() 293 294 # Record the type of all classes. 295 296 self.type_ref = self.get_object("__builtins__.type") 297 298 # Resolve dependencies within the program. 299 300 for module in self.modules.values(): 301 module.complete() 302 303 # Remove unneeded modules. 304 305 all_modules = self.modules.items() 306 307 for name, module in all_modules: 308 if name not in self.required: 309 module.unpropagate() 310 del self.modules[name] 311 self.removed[name] = module 312 313 return m 314 315 def finalise(self): 316 317 "Finalise the inspected program." 318 319 self.finalise_classes() 320 self.to_cache() 321 self.set_class_types() 322 self.define_instantiators() 323 self.collect_constants() 324 325 # Supporting operations. 326 327 def resolve(self): 328 329 "Resolve dependencies between modules." 330 331 resolved = {} 332 333 for name, ref in self.objects.items(): 334 if ref.has_kind("<depends>"): 335 found = self.find_dependency(ref) 336 if found: 337 resolved[name] = found 338 else: 339 print >>sys.stderr, "Name %s references an unknown object: %s" % (name, ref.get_origin()) 340 341 # Record the resolved names and identify required modules. 342 343 for name, ref in resolved.items(): 344 self.objects[name] = ref 345 346 module_name = self.get_module_provider(ref) 347 if module_name: 348 self.required.add(module_name) 349 350 def find_dependency(self, ref): 351 352 "Find the ultimate dependency for 'ref'." 353 354 found = set() 355 while ref and ref.has_kind("<depends>") and not ref in found: 356 found.add(ref) 357 ref = self.objects.get(ref.get_origin()) 358 return ref 359 360 def get_module_provider(self, ref): 361 362 "Identify the provider of the given 'ref'." 363 364 for ancestor in ref.ancestors(): 365 if self.modules.has_key(ancestor): 366 return ancestor 367 return None 368 369 def finalise_classes(self): 370 371 "Finalise the class relationships and attributes." 372 373 self.derive_inherited_attrs() 374 self.derive_subclasses() 375 self.derive_shadowed_attrs() 376 377 def derive_inherited_attrs(self): 378 379 "Derive inherited attributes for classes throughout the program." 380 381 for name in self.classes.keys(): 382 self.propagate_attrs_for_class(name) 383 384 def propagate_attrs_for_class(self, name, visited=None): 385 386 "Propagate inherited attributes for class 'name'." 387 388 # Visit classes only once. 389 390 if self.all_combined_attrs.has_key(name): 391 return 392 393 visited = visited or [] 394 395 if name in visited: 396 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 397 398 visited.append(name) 399 400 class_attrs = {} 401 instance_attrs = {} 402 403 # Aggregate the attributes from base classes, recording the origins of 404 # applicable attributes. 405 406 for base in self.classes[name][::-1]: 407 408 # Get the identity of the class from the reference. 409 410 base = base.get_origin() 411 412 # Define the base class completely before continuing with this 413 # class. 414 415 self.propagate_attrs_for_class(base, visited) 416 class_attrs.update(self.all_class_attrs[base]) 417 418 # Instance attribute origins are combined if different. 419 420 for key, values in self.all_instance_attrs[base].items(): 421 init_item(instance_attrs, key, set) 422 instance_attrs[key].update(values) 423 424 # Class attributes override those defined earlier in the hierarchy. 425 426 class_attrs.update(self.all_class_attrs.get(name, {})) 427 428 # Instance attributes are merely added if not already defined. 429 430 for key in self.all_instance_attrs.get(name, []): 431 if not instance_attrs.has_key(key): 432 instance_attrs[key] = set(["%s.%s" % (name, key)]) 433 434 self.all_class_attrs[name] = class_attrs 435 self.all_instance_attrs[name] = instance_attrs 436 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 437 438 def derive_subclasses(self): 439 440 "Derive subclass details for classes." 441 442 for name, bases in self.classes.items(): 443 for base in bases: 444 445 # Get the identity of the class from the reference. 446 447 base = base.get_origin() 448 self.subclasses[base].add(name) 449 450 def derive_shadowed_attrs(self): 451 452 "Derive shadowed attributes for classes." 453 454 for name, attrs in self.all_instance_attrs.items(): 455 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 456 if attrs: 457 self.all_shadowed_attrs[name] = attrs 458 459 def set_class_types(self): 460 461 "Set the type of each class." 462 463 for attrs in self.all_class_attrs.values(): 464 attrs["__class__"] = self.type_ref.get_origin() 465 466 def define_instantiators(self): 467 468 """ 469 Consolidate parameter and default details, incorporating initialiser 470 details to define instantiator signatures. 471 """ 472 473 for cls, attrs in self.all_class_attrs.items(): 474 initialiser = attrs["__init__"] 475 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 476 self.function_defaults[cls] = self.function_defaults[initialiser] 477 478 def collect_constants(self): 479 480 "Get constants from all active modules." 481 482 for module in self.modules.values(): 483 self.all_constants.update(module.constants) 484 485 # Import methods. 486 487 def find_in_path(self, name): 488 489 """ 490 Find the given module 'name' in the search path, returning None where no 491 such module could be found, or a 2-tuple from the 'find' method 492 otherwise. 493 """ 494 495 for d in self.path: 496 m = self.find(d, name) 497 if m: return m 498 return None 499 500 def find(self, d, name): 501 502 """ 503 In the directory 'd', find the given module 'name', where 'name' can 504 either refer to a single file module or to a package. Return None if the 505 'name' cannot be associated with either a file or a package directory, 506 or a 2-tuple from '_find_package' or '_find_module' otherwise. 507 """ 508 509 m = self._find_package(d, name) 510 if m: return m 511 m = self._find_module(d, name) 512 if m: return m 513 return None 514 515 def _find_module(self, d, name): 516 517 """ 518 In the directory 'd', find the given module 'name', returning None where 519 no suitable file exists in the directory, or a 2-tuple consisting of 520 None (indicating that no package directory is involved) and a filename 521 indicating the location of the module. 522 """ 523 524 name_py = name + extsep + "py" 525 filename = self._find_file(d, name_py) 526 if filename: 527 return None, filename 528 return None 529 530 def _find_package(self, d, name): 531 532 """ 533 In the directory 'd', find the given package 'name', returning None 534 where no suitable package directory exists, or a 2-tuple consisting of 535 a directory (indicating the location of the package directory itself) 536 and a filename indicating the location of the __init__.py module which 537 declares the package's top-level contents. 538 """ 539 540 filename = self._find_file(d, name) 541 if filename: 542 init_py = "__init__" + extsep + "py" 543 init_py_filename = self._find_file(filename, init_py) 544 if init_py_filename: 545 return filename, init_py_filename 546 return None 547 548 def _find_file(self, d, filename): 549 550 """ 551 Return the filename obtained when searching the directory 'd' for the 552 given 'filename', or None if no actual file exists for the filename. 553 """ 554 555 filename = join(d, filename) 556 if exists(filename): 557 return filename 558 else: 559 return None 560 561 def load(self, name): 562 563 """ 564 Load the module or package with the given 'name'. Return an object 565 referencing the loaded module or package, or None if no such module or 566 package exists. 567 """ 568 569 # Loaded modules are returned immediately. 570 # Modules may be known but not yet loading (having been registered as 571 # submodules), loading, loaded, or completely unknown. 572 573 module = self.get_module(name) 574 575 if module: 576 return self.modules[name] 577 578 # Otherwise, modules are loaded. 579 580 if self.verbose: 581 print >>sys.stderr, "Loading", name 582 583 # Split the name into path components, and try to find the uppermost in 584 # the search path. 585 586 path = name.split(".") 587 path_so_far = [] 588 module = None 589 590 for p in path: 591 592 # Get the module's filesystem details. 593 594 if not path_so_far: 595 m = self.find_in_path(p) 596 elif d: 597 m = self.find(d, p) 598 else: 599 m = None 600 601 path_so_far.append(p) 602 module_name = ".".join(path_so_far) 603 604 if not m: 605 if self.verbose: 606 print >>sys.stderr, "Not found (%s)" % name 607 608 return None # NOTE: Import error. 609 610 # Get the module itself. 611 612 d, filename = m 613 module = self.load_from_file(filename, module_name) 614 615 return module 616 617 def load_from_file(self, filename, module_name=None): 618 619 "Load the module from the given 'filename'." 620 621 if module_name is None: 622 module_name = "__main__" 623 624 module = self.modules.get(module_name) 625 626 if not module: 627 628 # Try to load from cache. 629 630 module = self.load_from_cache(filename, module_name) 631 if module: 632 return module 633 634 # If no cache entry exists, load from file. 635 636 module = inspector.InspectedModule(module_name, self) 637 self.add_module(module_name, module) 638 self.update_cache_validity(module) 639 640 self._load(module, module_name, lambda m: m.parse, filename) 641 642 return module 643 644 def update_cache_validity(self, module): 645 646 "Make 'module' valid in the cache, but invalidate accessing modules." 647 648 accessing = self.accessing_modules.get(module.name) 649 if accessing: 650 self.invalidated.update(accessing) 651 if module.name in self.invalidated: 652 self.invalidated.remove(module.name) 653 654 def source_is_new(self, filename, module_name): 655 656 "Return whether 'filename' is newer than the cached 'module_name'." 657 658 if self.cache: 659 cache_filename = join(self.cache, module_name) 660 return not exists(cache_filename) or \ 661 getmtime(filename) > getmtime(cache_filename) or \ 662 module_name in self.invalidated 663 else: 664 return True 665 666 def load_from_cache(self, filename, module_name): 667 668 "Return a module residing in the cache." 669 670 module = self.modules.get(module_name) 671 672 if not module and not self.source_is_new(filename, module_name): 673 module = CachedModule(module_name, self) 674 self.add_module(module_name, module) 675 676 filename = join(self.cache, module_name) 677 self._load(module, module_name, lambda m: m.from_cache, filename) 678 679 return module 680 681 def _load(self, module, module_name, fn, filename): 682 683 """ 684 Load 'module' for the given 'module_name', and with 'fn' performing an 685 invocation on the module with the given 'filename'. 686 """ 687 688 # Load the module. 689 690 if self.verbose: 691 print >>sys.stderr, "Loading", filename 692 fn(module)(filename) 693 if self.verbose: 694 print >>sys.stderr, "Loaded", filename 695 696 def add_module(self, module_name, module): 697 698 """ 699 Return the module with the given 'module_name', adding a new module 700 object if one does not already exist. 701 """ 702 703 self.modules[module_name] = module 704 self.objects[module_name] = Reference("<module>", module_name) 705 if module_name in self.to_import: 706 self.to_import.remove(module_name) 707 708 # vim: tabstop=4 expandtab shiftwidth=4