1 #!/usr/bin/env python 2 3 """ 4 Import logic. 5 6 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 7 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk> 8 9 This program is free software; you can redistribute it and/or modify it under 10 the terms of the GNU General Public License as published by the Free Software 11 Foundation; either version 3 of the License, or (at your option) any later 12 version. 13 14 This program is distributed in the hope that it will be useful, but WITHOUT 15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 16 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 17 details. 18 19 You should have received a copy of the GNU General Public License along with 20 this program. If not, see <http://www.gnu.org/licenses/>. 21 """ 22 23 from errors import ProgramError 24 from os.path import exists, extsep, getmtime, join 25 from os import listdir, makedirs, remove 26 from common import init_item, readfile, writefile 27 from referencing import Reference 28 import inspector 29 import sys 30 31 class Importer: 32 33 "An import machine, searching for and loading modules." 34 35 def __init__(self, path, cache=None, verbose=False): 36 37 """ 38 Initialise the importer with the given search 'path' - a list of 39 directories to search for Python modules. 40 41 The optional 'cache' should be the name of a directory used to store 42 cached module information. 43 44 The optional 'verbose' parameter causes output concerning the activities 45 of the object to be produced if set to a true value (not the default). 46 """ 47 48 self.path = path 49 self.cache = cache 50 self.verbose = verbose 51 52 self.to_import = set() 53 54 self.modules = {} 55 self.accessing_modules = {} 56 self.invalidated = set() 57 58 self.objects = {} 59 self.classes = {} 60 self.function_parameters = {} 61 self.function_defaults = {} 62 self.function_targets = {} 63 self.function_arguments = {} 64 65 # Derived information. 66 67 self.subclasses = {} 68 69 # Attributes of different object types. 70 71 self.all_class_attrs = {} 72 self.all_instance_attrs = {} 73 self.all_instance_attr_constants = {} 74 self.all_combined_attrs = {} 75 self.all_module_attrs = {} 76 self.all_shadowed_attrs = {} 77 78 # References to external names and aliases within program units. 79 80 self.all_name_references = {} 81 self.all_initialised_names = {} 82 self.all_aliased_names = {} 83 84 # General attribute accesses. 85 86 self.all_attr_accesses = {} 87 self.all_const_accesses = {} 88 self.all_attr_access_modifiers = {} 89 90 # Constant literals and values. 91 92 self.all_constants = {} 93 self.all_constant_values = {} 94 95 self.make_cache() 96 97 def make_cache(self): 98 if self.cache and not exists(self.cache): 99 makedirs(self.cache) 100 101 def check_cache(self, details): 102 103 """ 104 Check whether the cache applies for the given 'details', invalidating it 105 if it does not. 106 """ 107 108 recorded_details = self.get_cache_details() 109 110 if recorded_details != details: 111 self.remove_cache() 112 113 writefile(self.get_cache_details_filename(), details) 114 115 def get_cache_details_filename(self): 116 117 "Return the filename for the cache details." 118 119 return join(self.cache, "$details") 120 121 def get_cache_details(self): 122 123 "Return details of the cache." 124 125 details_filename = self.get_cache_details_filename() 126 127 if not exists(details_filename): 128 return None 129 else: 130 return readfile(details_filename) 131 132 def remove_cache(self): 133 134 "Remove the contents of the cache." 135 136 for filename in listdir(self.cache): 137 remove(join(self.cache, filename)) 138 139 def to_cache(self): 140 141 "Write modules to the cache." 142 143 if self.cache: 144 for module_name, module in self.modules.items(): 145 module.to_cache(join(self.cache, module_name)) 146 147 # Object retrieval and storage. 148 149 def get_object(self, name): 150 151 """ 152 Return a reference for the given 'name' or None if no such object 153 exists. 154 """ 155 156 return self.objects.get(name) 157 158 def set_object(self, name, value=None): 159 160 "Set the object with the given 'name' and the given 'value'." 161 162 if isinstance(value, Reference): 163 ref = value.alias(name) 164 else: 165 ref = Reference(value, name) 166 167 self.objects[name] = ref 168 169 # Indirect object retrieval. 170 171 def get_attributes(self, ref, attrname): 172 173 """ 174 Return attributes provided by 'ref' for 'attrname'. Class attributes 175 may be provided by instances. 176 """ 177 178 kind = ref.get_kind() 179 if kind == "<class>": 180 ref = self.get_class_attribute(ref.get_origin(), attrname) 181 return ref and set([ref]) or set() 182 elif kind == "<instance>": 183 return self.get_combined_attributes(ref.get_origin(), attrname) 184 elif kind == "<module>": 185 ref = self.get_module_attribute(ref.get_origin(), attrname) 186 return ref and set([ref]) or set() 187 else: 188 return set() 189 190 def get_class_attribute(self, object_type, attrname): 191 192 "Return from 'object_type' the details of class attribute 'attrname'." 193 194 attr = self.all_class_attrs[object_type].get(attrname) 195 return attr and self.get_object(attr) 196 197 def get_instance_attributes(self, object_type, attrname): 198 199 """ 200 Return from 'object_type' the details of instance attribute 'attrname'. 201 """ 202 203 consts = self.all_instance_attr_constants.get(object_type) 204 attrs = set() 205 for attr in self.all_instance_attrs[object_type].get(attrname, []): 206 attrs.add(consts and consts.get(attrname) or Reference("<var>", attr)) 207 return attrs 208 209 def get_combined_attributes(self, object_type, attrname): 210 211 """ 212 Return from 'object_type' the details of class or instance attribute 213 'attrname'. 214 """ 215 216 ref = self.get_class_attribute(object_type, attrname) 217 refs = ref and set([ref]) or set() 218 refs.update(self.get_instance_attributes(object_type, attrname)) 219 return refs 220 221 def get_module_attribute(self, object_type, attrname): 222 223 "Return from 'object_type' the details of module attribute 'attrname'." 224 225 if attrname in self.all_module_attrs[object_type]: 226 return self.get_object("%s.%s" % (object_type, attrname)) 227 else: 228 return None 229 230 # Module management. 231 232 def queue_module(self, name, module): 233 234 """ 235 Queue the module with the given 'name' for import from the given 236 'module'. 237 """ 238 239 if not self.modules.has_key(name): 240 self.to_import.add(name) 241 242 init_item(self.accessing_modules, name, set) 243 self.accessing_modules[name].add(module.name) 244 245 def get_modules(self): 246 247 "Return all modules known to the importer." 248 249 return self.modules.values() 250 251 def get_module(self, name): 252 253 "Return the module with the given 'name'." 254 255 if not self.modules.has_key(name): 256 return None 257 258 return self.modules[name] 259 260 # Program operations. 261 262 def initialise(self, filename, reset=False): 263 264 """ 265 Initialise a program whose main module is 'filename', resetting the 266 cache if 'reset' is true. Return the main module. 267 """ 268 269 if reset: 270 self.remove_cache() 271 self.check_cache(filename) 272 273 # Load the program itself. 274 275 m = self.load_from_file(filename) 276 277 # Load any queued modules. 278 279 while self.to_import: 280 for name in list(self.to_import): # avoid mutation issue 281 self.load(name) 282 283 # Resolve dependencies between modules. 284 285 self.resolve() 286 287 # Resolve dependencies within the program. 288 289 for module in self.modules.values(): 290 module.complete() 291 292 return m 293 294 def finalise(self): 295 296 "Finalise the inspected program." 297 298 self.finalise_classes() 299 self.to_cache() 300 self.set_class_types() 301 self.define_instantiators() 302 self.collect_constants() 303 304 # Supporting operations. 305 306 def resolve(self): 307 308 "Resolve dependencies between modules." 309 310 resolved = {} 311 312 for name, ref in self.objects.items(): 313 if ref.has_kind("<depends>"): 314 ref = self.find_dependency(ref) 315 if ref: 316 resolved[name] = ref 317 318 for name, ref in resolved.items(): 319 self.objects[name] = ref 320 321 def find_dependency(self, ref): 322 323 "Find the ultimate dependency for 'ref'." 324 325 found = set() 326 while ref and ref.has_kind("<depends>") and not ref in found: 327 found.add(ref) 328 ref = self.objects.get(ref.get_origin()) 329 return ref 330 331 def finalise_classes(self): 332 333 "Finalise the class relationships and attributes." 334 335 self.derive_inherited_attrs() 336 self.derive_subclasses() 337 self.derive_shadowed_attrs() 338 339 def derive_inherited_attrs(self): 340 341 "Derive inherited attributes for classes throughout the program." 342 343 for name in self.classes.keys(): 344 self.propagate_attrs_for_class(name) 345 346 def propagate_attrs_for_class(self, name, visited=None): 347 348 "Propagate inherited attributes for class 'name'." 349 350 # Visit classes only once. 351 352 if self.all_combined_attrs.has_key(name): 353 return 354 355 visited = visited or [] 356 357 if name in visited: 358 raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name) 359 360 visited.append(name) 361 362 class_attrs = {} 363 instance_attrs = {} 364 365 # Aggregate the attributes from base classes, recording the origins of 366 # applicable attributes. 367 368 for base in self.classes[name][::-1]: 369 370 # Get the identity of the class from the reference. 371 372 base = base.get_origin() 373 374 # Define the base class completely before continuing with this 375 # class. 376 377 self.propagate_attrs_for_class(base, visited) 378 class_attrs.update(self.all_class_attrs[base]) 379 380 # Instance attribute origins are combined if different. 381 382 for key, values in self.all_instance_attrs[base].items(): 383 init_item(instance_attrs, key, set) 384 instance_attrs[key].update(values) 385 386 # Class attributes override those defined earlier in the hierarchy. 387 388 class_attrs.update(self.all_class_attrs.get(name, {})) 389 390 # Instance attributes are merely added if not already defined. 391 392 for key in self.all_instance_attrs.get(name, []): 393 if not instance_attrs.has_key(key): 394 instance_attrs[key] = set(["%s.%s" % (name, key)]) 395 396 self.all_class_attrs[name] = class_attrs 397 self.all_instance_attrs[name] = instance_attrs 398 self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys()) 399 400 def derive_subclasses(self): 401 402 "Derive subclass details for classes." 403 404 for name, bases in self.classes.items(): 405 for base in bases: 406 407 # Get the identity of the class from the reference. 408 409 base = base.get_origin() 410 self.subclasses[base].add(name) 411 412 def derive_shadowed_attrs(self): 413 414 "Derive shadowed attributes for classes." 415 416 for name, attrs in self.all_instance_attrs.items(): 417 attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys()) 418 if attrs: 419 self.all_shadowed_attrs[name] = attrs 420 421 def set_class_types(self): 422 423 "Set the type of each class." 424 425 ref = self.get_object("__builtins__.type") 426 for attrs in self.all_class_attrs.values(): 427 attrs["__class__"] = ref.get_origin() 428 429 def define_instantiators(self): 430 431 """ 432 Consolidate parameter and default details, incorporating initialiser 433 details to define instantiator signatures. 434 """ 435 436 for cls, attrs in self.all_class_attrs.items(): 437 initialiser = attrs["__init__"] 438 self.function_parameters[cls] = self.function_parameters[initialiser][1:] 439 self.function_defaults[cls] = self.function_defaults[initialiser] 440 441 def collect_constants(self): 442 443 "Get constants from all active modules." 444 445 for module in self.modules.values(): 446 self.all_constants.update(module.constants) 447 448 # Import methods. 449 450 def find_in_path(self, name): 451 452 """ 453 Find the given module 'name' in the search path, returning None where no 454 such module could be found, or a 2-tuple from the 'find' method 455 otherwise. 456 """ 457 458 for d in self.path: 459 m = self.find(d, name) 460 if m: return m 461 return None 462 463 def find(self, d, name): 464 465 """ 466 In the directory 'd', find the given module 'name', where 'name' can 467 either refer to a single file module or to a package. Return None if the 468 'name' cannot be associated with either a file or a package directory, 469 or a 2-tuple from '_find_package' or '_find_module' otherwise. 470 """ 471 472 m = self._find_package(d, name) 473 if m: return m 474 m = self._find_module(d, name) 475 if m: return m 476 return None 477 478 def _find_module(self, d, name): 479 480 """ 481 In the directory 'd', find the given module 'name', returning None where 482 no suitable file exists in the directory, or a 2-tuple consisting of 483 None (indicating that no package directory is involved) and a filename 484 indicating the location of the module. 485 """ 486 487 name_py = name + extsep + "py" 488 filename = self._find_file(d, name_py) 489 if filename: 490 return None, filename 491 return None 492 493 def _find_package(self, d, name): 494 495 """ 496 In the directory 'd', find the given package 'name', returning None 497 where no suitable package directory exists, or a 2-tuple consisting of 498 a directory (indicating the location of the package directory itself) 499 and a filename indicating the location of the __init__.py module which 500 declares the package's top-level contents. 501 """ 502 503 filename = self._find_file(d, name) 504 if filename: 505 init_py = "__init__" + extsep + "py" 506 init_py_filename = self._find_file(filename, init_py) 507 if init_py_filename: 508 return filename, init_py_filename 509 return None 510 511 def _find_file(self, d, filename): 512 513 """ 514 Return the filename obtained when searching the directory 'd' for the 515 given 'filename', or None if no actual file exists for the filename. 516 """ 517 518 filename = join(d, filename) 519 if exists(filename): 520 return filename 521 else: 522 return None 523 524 def load(self, name): 525 526 """ 527 Load the module or package with the given 'name'. Return an object 528 referencing the loaded module or package, or None if no such module or 529 package exists. 530 """ 531 532 # Loaded modules are returned immediately. 533 # Modules may be known but not yet loading (having been registered as 534 # submodules), loading, loaded, or completely unknown. 535 536 module = self.get_module(name) 537 538 if module: 539 return self.modules[name] 540 541 # Otherwise, modules are loaded. 542 543 if self.verbose: 544 print >>sys.stderr, "Loading", name 545 546 # Split the name into path components, and try to find the uppermost in 547 # the search path. 548 549 path = name.split(".") 550 path_so_far = [] 551 module = None 552 553 for p in path: 554 555 # Get the module's filesystem details. 556 557 if not path_so_far: 558 m = self.find_in_path(p) 559 elif d: 560 m = self.find(d, p) 561 else: 562 m = None 563 564 path_so_far.append(p) 565 module_name = ".".join(path_so_far) 566 567 if not m: 568 if self.verbose: 569 print >>sys.stderr, "Not found (%s)" % name 570 571 return None # NOTE: Import error. 572 573 # Get the module itself. 574 575 d, filename = m 576 module = self.load_from_file(filename, module_name) 577 578 return module 579 580 def load_from_file(self, filename, module_name=None): 581 582 "Load the module from the given 'filename'." 583 584 if module_name is None: 585 module_name = "__main__" 586 587 module = self.modules.get(module_name) 588 589 if not module: 590 591 # Try to load from cache. 592 593 module = self.load_from_cache(filename, module_name) 594 if module: 595 return module 596 597 # If no cache entry exists, load from file. 598 599 module = inspector.InspectedModule(module_name, self) 600 self.add_module(module_name, module) 601 self.update_cache_validity(module) 602 603 self._load(module, module_name, lambda m: m.parse, filename) 604 605 return module 606 607 def update_cache_validity(self, module): 608 609 "Make 'module' valid in the cache, but invalidate accessing modules." 610 611 accessing = self.accessing_modules.get(module.name) 612 if accessing: 613 self.invalidated.update(accessing) 614 if module.name in self.invalidated: 615 self.invalidated.remove(module.name) 616 617 def source_is_new(self, filename, module_name): 618 619 "Return whether 'filename' is newer than the cached 'module_name'." 620 621 if self.cache: 622 cache_filename = join(self.cache, module_name) 623 return not exists(cache_filename) or \ 624 getmtime(filename) > getmtime(cache_filename) or \ 625 module_name in self.invalidated 626 else: 627 return True 628 629 def load_from_cache(self, filename, module_name): 630 631 "Return a module residing in the cache." 632 633 module = self.modules.get(module_name) 634 635 if not module and not self.source_is_new(filename, module_name): 636 module = inspector.CachedModule(module_name, self) 637 self.add_module(module_name, module) 638 639 filename = join(self.cache, module_name) 640 self._load(module, module_name, lambda m: m.from_cache, filename) 641 642 return module 643 644 def _load(self, module, module_name, fn, filename): 645 646 """ 647 Load 'module' for the given 'module_name', and with 'fn' performing an 648 invocation on the module with the given 'filename'. 649 """ 650 651 # Load the module. 652 653 if self.verbose: 654 print >>sys.stderr, "Loading", filename 655 fn(module)(filename) 656 if self.verbose: 657 print >>sys.stderr, "Loaded", filename 658 659 def add_module(self, module_name, module): 660 661 """ 662 Return the module with the given 'module_name', adding a new module 663 object if one does not already exist. 664 """ 665 666 self.modules[module_name] = module 667 self.objects[module_name] = Reference("<module>", module_name) 668 if module_name in self.to_import: 669 self.to_import.remove(module_name) 670 671 # vim: tabstop=4 expandtab shiftwidth=4