Lichen

Annotated importer.py

90:c7ddfc4525da
2016-10-08 Paul Boddie Added some support for eliminating accessor class types where the provided attributes are invoked and are unbound methods. This uses a more sophisticated method involving usage observations that incorporate invocation information, permitting classes as accessors if paths through the code support them, even if other paths require instances as accessors to invoke methods.
paul@0 1
#!/usr/bin/env python
paul@0 2
paul@0 3
"""
paul@0 4
Import logic.
paul@0 5
paul@0 6
Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
paul@0 7
              2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk>
paul@0 8
paul@0 9
This program is free software; you can redistribute it and/or modify it under
paul@0 10
the terms of the GNU General Public License as published by the Free Software
paul@0 11
Foundation; either version 3 of the License, or (at your option) any later
paul@0 12
version.
paul@0 13
paul@0 14
This program is distributed in the hope that it will be useful, but WITHOUT
paul@0 15
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@0 16
FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
paul@0 17
details.
paul@0 18
paul@0 19
You should have received a copy of the GNU General Public License along with
paul@0 20
this program.  If not, see <http://www.gnu.org/licenses/>.
paul@0 21
"""
paul@0 22
paul@0 23
from errors import ProgramError
paul@0 24
from os.path import exists, extsep, getmtime, join
paul@0 25
from os import listdir, makedirs, remove
paul@0 26
from common import init_item, readfile, writefile
paul@13 27
from modules import CachedModule
paul@0 28
from referencing import Reference
paul@0 29
import inspector
paul@0 30
import sys
paul@0 31
paul@0 32
class Importer:
paul@0 33
paul@0 34
    "An import machine, searching for and loading modules."
paul@0 35
paul@0 36
    def __init__(self, path, cache=None, verbose=False):
paul@0 37
paul@0 38
        """
paul@0 39
        Initialise the importer with the given search 'path' - a list of
paul@0 40
        directories to search for Python modules.
paul@0 41
paul@0 42
        The optional 'cache' should be the name of a directory used to store
paul@0 43
        cached module information.
paul@0 44
paul@0 45
        The optional 'verbose' parameter causes output concerning the activities
paul@0 46
        of the object to be produced if set to a true value (not the default).
paul@0 47
        """
paul@0 48
paul@0 49
        self.path = path
paul@0 50
        self.cache = cache
paul@0 51
        self.verbose = verbose
paul@0 52
paul@41 53
        # Module importing queue, required modules, removed modules and active
paul@41 54
        # modules in the final program.
paul@41 55
paul@12 56
        self.to_import = set()
paul@16 57
        self.required = set(["__main__"])
paul@24 58
        self.removed = {}
paul@41 59
        self.modules = {}
paul@12 60
paul@41 61
        # Module relationships and invalidated cached modules.
paul@41 62
paul@12 63
        self.accessing_modules = {}
paul@0 64
        self.invalidated = set()
paul@0 65
paul@41 66
        # Basic program information.
paul@41 67
paul@0 68
        self.objects = {}
paul@0 69
        self.classes = {}
paul@0 70
        self.function_parameters = {}
paul@0 71
        self.function_defaults = {}
paul@0 72
        self.function_targets = {}
paul@0 73
        self.function_arguments = {}
paul@0 74
paul@41 75
        # Unresolved names.
paul@41 76
paul@41 77
        self.missing = set()
paul@41 78
paul@0 79
        # Derived information.
paul@0 80
paul@0 81
        self.subclasses = {}
paul@0 82
paul@0 83
        # Attributes of different object types.
paul@0 84
paul@0 85
        self.all_class_attrs = {}
paul@0 86
        self.all_instance_attrs = {}
paul@0 87
        self.all_instance_attr_constants = {}
paul@0 88
        self.all_combined_attrs = {}
paul@0 89
        self.all_module_attrs = {}
paul@0 90
        self.all_shadowed_attrs = {}
paul@0 91
paul@0 92
        # References to external names and aliases within program units.
paul@0 93
paul@0 94
        self.all_name_references = {}
paul@0 95
        self.all_initialised_names = {}
paul@0 96
        self.all_aliased_names = {}
paul@0 97
paul@0 98
        # General attribute accesses.
paul@0 99
paul@0 100
        self.all_attr_accesses = {}
paul@0 101
        self.all_const_accesses = {}
paul@0 102
        self.all_attr_access_modifiers = {}
paul@0 103
paul@0 104
        # Constant literals and values.
paul@0 105
paul@0 106
        self.all_constants = {}
paul@0 107
        self.all_constant_values = {}
paul@0 108
paul@0 109
        self.make_cache()
paul@0 110
paul@0 111
    def make_cache(self):
paul@0 112
        if self.cache and not exists(self.cache):
paul@0 113
            makedirs(self.cache)
paul@0 114
paul@0 115
    def check_cache(self, details):
paul@0 116
paul@0 117
        """
paul@0 118
        Check whether the cache applies for the given 'details', invalidating it
paul@0 119
        if it does not.
paul@0 120
        """
paul@0 121
paul@0 122
        recorded_details = self.get_cache_details()
paul@0 123
paul@0 124
        if recorded_details != details:
paul@0 125
            self.remove_cache()
paul@0 126
paul@0 127
        writefile(self.get_cache_details_filename(), details)
paul@0 128
paul@0 129
    def get_cache_details_filename(self):
paul@0 130
paul@0 131
        "Return the filename for the cache details."
paul@0 132
paul@0 133
        return join(self.cache, "$details")
paul@0 134
paul@0 135
    def get_cache_details(self):
paul@0 136
paul@0 137
        "Return details of the cache."
paul@0 138
paul@0 139
        details_filename = self.get_cache_details_filename()
paul@0 140
paul@0 141
        if not exists(details_filename):
paul@0 142
            return None
paul@0 143
        else:
paul@0 144
            return readfile(details_filename)
paul@0 145
paul@0 146
    def remove_cache(self):
paul@0 147
paul@0 148
        "Remove the contents of the cache."
paul@0 149
paul@0 150
        for filename in listdir(self.cache):
paul@0 151
            remove(join(self.cache, filename))
paul@0 152
paul@0 153
    def to_cache(self):
paul@0 154
paul@0 155
        "Write modules to the cache."
paul@0 156
paul@0 157
        if self.cache:
paul@0 158
            for module_name, module in self.modules.items():
paul@0 159
                module.to_cache(join(self.cache, module_name))
paul@0 160
paul@0 161
    # Object retrieval and storage.
paul@0 162
paul@0 163
    def get_object(self, name):
paul@0 164
paul@0 165
        """
paul@0 166
        Return a reference for the given 'name' or None if no such object
paul@0 167
        exists.
paul@0 168
        """
paul@0 169
paul@0 170
        return self.objects.get(name)
paul@0 171
paul@0 172
    def set_object(self, name, value=None):
paul@0 173
paul@0 174
        "Set the object with the given 'name' and the given 'value'."
paul@0 175
paul@0 176
        if isinstance(value, Reference):
paul@0 177
            ref = value.alias(name)
paul@0 178
        else:
paul@0 179
            ref = Reference(value, name)
paul@0 180
paul@0 181
        self.objects[name] = ref
paul@0 182
paul@27 183
    # Identification of both stored object names and name references.
paul@27 184
paul@27 185
    def identify(self, name):
paul@27 186
paul@27 187
        "Identify 'name' using stored object and external name records."
paul@27 188
paul@27 189
        return self.objects.get(name) or self.all_name_references.get(name)
paul@27 190
paul@0 191
    # Indirect object retrieval.
paul@0 192
paul@0 193
    def get_attributes(self, ref, attrname):
paul@0 194
paul@0 195
        """
paul@0 196
        Return attributes provided by 'ref' for 'attrname'. Class attributes
paul@0 197
        may be provided by instances.
paul@0 198
        """
paul@0 199
paul@0 200
        kind = ref.get_kind()
paul@0 201
        if kind == "<class>":
paul@0 202
            ref = self.get_class_attribute(ref.get_origin(), attrname)
paul@0 203
            return ref and set([ref]) or set()
paul@0 204
        elif kind == "<instance>":
paul@0 205
            return self.get_combined_attributes(ref.get_origin(), attrname)
paul@0 206
        elif kind == "<module>":
paul@0 207
            ref = self.get_module_attribute(ref.get_origin(), attrname)
paul@0 208
            return ref and set([ref]) or set()
paul@0 209
        else:
paul@0 210
            return set()
paul@0 211
paul@0 212
    def get_class_attribute(self, object_type, attrname):
paul@0 213
paul@0 214
        "Return from 'object_type' the details of class attribute 'attrname'."
paul@0 215
paul@0 216
        attr = self.all_class_attrs[object_type].get(attrname)
paul@0 217
        return attr and self.get_object(attr)
paul@0 218
paul@0 219
    def get_instance_attributes(self, object_type, attrname):
paul@0 220
paul@0 221
        """
paul@0 222
        Return from 'object_type' the details of instance attribute 'attrname'.
paul@0 223
        """
paul@0 224
paul@0 225
        consts = self.all_instance_attr_constants.get(object_type)
paul@0 226
        attrs = set()
paul@0 227
        for attr in self.all_instance_attrs[object_type].get(attrname, []):
paul@0 228
            attrs.add(consts and consts.get(attrname) or Reference("<var>", attr))
paul@0 229
        return attrs
paul@0 230
paul@0 231
    def get_combined_attributes(self, object_type, attrname):
paul@0 232
paul@0 233
        """
paul@0 234
        Return from 'object_type' the details of class or instance attribute
paul@0 235
        'attrname'.
paul@0 236
        """
paul@0 237
paul@0 238
        ref = self.get_class_attribute(object_type, attrname)
paul@0 239
        refs = ref and set([ref]) or set()
paul@0 240
        refs.update(self.get_instance_attributes(object_type, attrname))
paul@0 241
        return refs
paul@0 242
paul@0 243
    def get_module_attribute(self, object_type, attrname):
paul@0 244
paul@0 245
        "Return from 'object_type' the details of module attribute 'attrname'."
paul@0 246
paul@0 247
        if attrname in self.all_module_attrs[object_type]:
paul@0 248
            return self.get_object("%s.%s" % (object_type, attrname))
paul@0 249
        else:
paul@0 250
            return None
paul@0 251
paul@0 252
    # Module management.
paul@0 253
paul@16 254
    def queue_module(self, name, accessor, required=False):
paul@12 255
paul@12 256
        """
paul@12 257
        Queue the module with the given 'name' for import from the given
paul@16 258
        'accessor' module. If 'required' is true (it is false by default), the
paul@16 259
        module will be required in the final program.
paul@12 260
        """
paul@12 261
paul@12 262
        if not self.modules.has_key(name):
paul@12 263
            self.to_import.add(name)
paul@12 264
paul@16 265
        if required:
paul@16 266
            self.required.add(name)
paul@16 267
paul@12 268
        init_item(self.accessing_modules, name, set)
paul@16 269
        self.accessing_modules[name].add(accessor.name)
paul@12 270
paul@0 271
    def get_modules(self):
paul@0 272
paul@0 273
        "Return all modules known to the importer."
paul@0 274
paul@0 275
        return self.modules.values()
paul@0 276
paul@12 277
    def get_module(self, name):
paul@0 278
paul@0 279
        "Return the module with the given 'name'."
paul@0 280
paul@0 281
        if not self.modules.has_key(name):
paul@0 282
            return None
paul@0 283
paul@12 284
        return self.modules[name]
paul@0 285
paul@0 286
    # Program operations.
paul@0 287
paul@0 288
    def initialise(self, filename, reset=False):
paul@0 289
paul@0 290
        """
paul@0 291
        Initialise a program whose main module is 'filename', resetting the
paul@0 292
        cache if 'reset' is true. Return the main module.
paul@0 293
        """
paul@0 294
paul@0 295
        if reset:
paul@0 296
            self.remove_cache()
paul@0 297
        self.check_cache(filename)
paul@0 298
paul@0 299
        # Load the program itself.
paul@0 300
paul@0 301
        m = self.load_from_file(filename)
paul@0 302
paul@12 303
        # Load any queued modules.
paul@12 304
paul@12 305
        while self.to_import:
paul@12 306
            for name in list(self.to_import): # avoid mutation issue
paul@12 307
                self.load(name)
paul@12 308
paul@12 309
        # Resolve dependencies between modules.
paul@12 310
paul@12 311
        self.resolve()
paul@12 312
paul@16 313
        # Record the type of all classes.
paul@16 314
paul@16 315
        self.type_ref = self.get_object("__builtins__.type")
paul@16 316
paul@0 317
        # Resolve dependencies within the program.
paul@0 318
paul@12 319
        for module in self.modules.values():
paul@12 320
            module.complete()
paul@0 321
paul@16 322
        # Remove unneeded modules.
paul@16 323
paul@16 324
        all_modules = self.modules.items()
paul@16 325
paul@16 326
        for name, module in all_modules:
paul@16 327
            if name not in self.required:
paul@16 328
                module.unpropagate()
paul@16 329
                del self.modules[name]
paul@24 330
                self.removed[name] = module
paul@16 331
paul@68 332
        # Collect redundant objects.
paul@68 333
paul@68 334
        for module in self.removed.values():
paul@68 335
            module.collect()
paul@68 336
paul@68 337
        # Assert module objects where aliases have been removed.
paul@68 338
paul@68 339
        for name in self.required:
paul@68 340
            if not self.objects.has_key(name):
paul@68 341
                self.objects[name] = Reference("<module>", name)
paul@68 342
paul@0 343
        return m
paul@0 344
paul@0 345
    def finalise(self):
paul@0 346
paul@41 347
        """
paul@41 348
        Finalise the inspected program, returning whether the program could be
paul@41 349
        finalised.
paul@41 350
        """
paul@41 351
paul@41 352
        if self.missing:
paul@41 353
            return False
paul@0 354
paul@0 355
        self.finalise_classes()
paul@0 356
        self.to_cache()
paul@0 357
        self.set_class_types()
paul@0 358
        self.define_instantiators()
paul@0 359
        self.collect_constants()
paul@0 360
paul@41 361
        return True
paul@41 362
paul@12 363
    # Supporting operations.
paul@12 364
paul@12 365
    def resolve(self):
paul@12 366
paul@12 367
        "Resolve dependencies between modules."
paul@12 368
paul@35 369
        self.waiting = {}
paul@35 370
paul@35 371
        for module in self.modules.values():
paul@35 372
paul@35 373
            # Resolve all deferred references in each module.
paul@12 374
paul@35 375
            for ref in module.deferred:
paul@35 376
                found = self.find_dependency(ref)
paul@35 377
                if not found:
paul@41 378
                    self.missing.add((module.name, ref.get_origin()))
paul@35 379
paul@35 380
                # Record the resolved names and identify required modules.
paul@12 381
paul@35 382
                else:
paul@35 383
                    ref.mutate(found)
paul@35 384
paul@35 385
                    # Find the providing module of this reference.
paul@35 386
paul@35 387
                    provider = self.get_module_provider(ref)
paul@35 388
                    if provider:
paul@16 389
paul@35 390
                        module.required.add(provider)
paul@35 391
                        self.accessing_modules[provider].add(module.name)
paul@35 392
paul@35 393
                        # Postpone any inclusion of the provider until this
paul@35 394
                        # module becomes required.
paul@12 395
paul@35 396
                        if module.name not in self.required:
paul@35 397
                            init_item(self.waiting, module.name, set)
paul@35 398
                            self.waiting[module.name].add(provider)
paul@35 399
paul@35 400
                        # Make this module required in the accessing module.
paul@32 401
paul@53 402
                        elif provider not in self.required:
paul@35 403
                            self.required.add(provider)
paul@53 404
                            if self.verbose:
paul@53 405
                                print >>sys.stderr, "Requiring", provider, "for", ref
paul@35 406
paul@38 407
        # Check modules again to see if they are now required and should now
paul@38 408
        # cause the inclusion of other modules providing objects to the program.
paul@38 409
paul@35 410
        for module_name in self.waiting.keys():
paul@35 411
            self.require_providers(module_name)
paul@16 412
paul@35 413
    def require_providers(self, module_name):
paul@38 414
paul@38 415
        """
paul@38 416
        Test if 'module_name' is itself required and, if so, require modules
paul@38 417
        containing objects provided to the module.
paul@38 418
        """
paul@38 419
paul@35 420
        if module_name in self.required and self.waiting.has_key(module_name):
paul@35 421
            for provider in self.waiting[module_name]:
paul@35 422
                if provider not in self.required:
paul@35 423
                    self.required.add(provider)
paul@53 424
                    if self.verbose:
paul@53 425
                        print >>sys.stderr, "Requiring", provider
paul@35 426
                    self.require_providers(provider)
paul@32 427
paul@12 428
    def find_dependency(self, ref):
paul@12 429
paul@12 430
        "Find the ultimate dependency for 'ref'."
paul@12 431
paul@12 432
        found = set()
paul@12 433
        while ref and ref.has_kind("<depends>") and not ref in found:
paul@12 434
            found.add(ref)
paul@35 435
            ref = self.identify(ref.get_origin())
paul@12 436
        return ref
paul@12 437
paul@16 438
    def get_module_provider(self, ref):
paul@16 439
paul@16 440
        "Identify the provider of the given 'ref'."
paul@16 441
paul@16 442
        for ancestor in ref.ancestors():
paul@16 443
            if self.modules.has_key(ancestor):
paul@16 444
                return ancestor
paul@16 445
        return None
paul@16 446
paul@0 447
    def finalise_classes(self):
paul@0 448
paul@0 449
        "Finalise the class relationships and attributes."
paul@0 450
paul@0 451
        self.derive_inherited_attrs()
paul@0 452
        self.derive_subclasses()
paul@0 453
        self.derive_shadowed_attrs()
paul@0 454
paul@0 455
    def derive_inherited_attrs(self):
paul@0 456
paul@0 457
        "Derive inherited attributes for classes throughout the program."
paul@0 458
paul@0 459
        for name in self.classes.keys():
paul@0 460
            self.propagate_attrs_for_class(name)
paul@0 461
paul@0 462
    def propagate_attrs_for_class(self, name, visited=None):
paul@0 463
paul@0 464
        "Propagate inherited attributes for class 'name'."
paul@0 465
paul@0 466
        # Visit classes only once.
paul@0 467
paul@0 468
        if self.all_combined_attrs.has_key(name):
paul@0 469
            return
paul@0 470
paul@0 471
        visited = visited or []
paul@0 472
paul@0 473
        if name in visited:
paul@0 474
            raise ProgramError, "Class %s may not inherit from itself: %s -> %s." % (name, " -> ".join(visited), name)
paul@0 475
paul@0 476
        visited.append(name)
paul@0 477
paul@0 478
        class_attrs = {}
paul@0 479
        instance_attrs = {}
paul@0 480
paul@0 481
        # Aggregate the attributes from base classes, recording the origins of
paul@0 482
        # applicable attributes.
paul@0 483
paul@0 484
        for base in self.classes[name][::-1]:
paul@0 485
paul@0 486
            # Get the identity of the class from the reference.
paul@0 487
paul@0 488
            base = base.get_origin()
paul@0 489
paul@0 490
            # Define the base class completely before continuing with this
paul@0 491
            # class.
paul@0 492
paul@0 493
            self.propagate_attrs_for_class(base, visited)
paul@0 494
            class_attrs.update(self.all_class_attrs[base])
paul@0 495
paul@0 496
            # Instance attribute origins are combined if different.
paul@0 497
paul@0 498
            for key, values in self.all_instance_attrs[base].items():
paul@0 499
                init_item(instance_attrs, key, set)
paul@0 500
                instance_attrs[key].update(values)
paul@0 501
paul@0 502
        # Class attributes override those defined earlier in the hierarchy.
paul@0 503
paul@0 504
        class_attrs.update(self.all_class_attrs.get(name, {}))
paul@0 505
paul@0 506
        # Instance attributes are merely added if not already defined.
paul@0 507
paul@0 508
        for key in self.all_instance_attrs.get(name, []):
paul@0 509
            if not instance_attrs.has_key(key):
paul@0 510
                instance_attrs[key] = set(["%s.%s" % (name, key)])
paul@0 511
paul@0 512
        self.all_class_attrs[name] = class_attrs
paul@0 513
        self.all_instance_attrs[name] = instance_attrs
paul@0 514
        self.all_combined_attrs[name] = set(class_attrs.keys()).union(instance_attrs.keys())
paul@0 515
paul@0 516
    def derive_subclasses(self):
paul@0 517
paul@0 518
        "Derive subclass details for classes."
paul@0 519
paul@0 520
        for name, bases in self.classes.items():
paul@0 521
            for base in bases:
paul@0 522
paul@0 523
                # Get the identity of the class from the reference.
paul@0 524
paul@0 525
                base = base.get_origin()
paul@0 526
                self.subclasses[base].add(name)
paul@0 527
paul@0 528
    def derive_shadowed_attrs(self):
paul@0 529
paul@0 530
        "Derive shadowed attributes for classes."
paul@0 531
paul@0 532
        for name, attrs in self.all_instance_attrs.items():
paul@0 533
            attrs = set(attrs.keys()).intersection(self.all_class_attrs[name].keys())
paul@0 534
            if attrs:
paul@0 535
                self.all_shadowed_attrs[name] = attrs
paul@0 536
paul@0 537
    def set_class_types(self):
paul@0 538
paul@0 539
        "Set the type of each class."
paul@0 540
paul@0 541
        for attrs in self.all_class_attrs.values():
paul@16 542
            attrs["__class__"] = self.type_ref.get_origin()
paul@0 543
paul@0 544
    def define_instantiators(self):
paul@0 545
paul@0 546
        """
paul@0 547
        Consolidate parameter and default details, incorporating initialiser
paul@0 548
        details to define instantiator signatures.
paul@0 549
        """
paul@0 550
paul@0 551
        for cls, attrs in self.all_class_attrs.items():
paul@0 552
            initialiser = attrs["__init__"]
paul@0 553
            self.function_parameters[cls] = self.function_parameters[initialiser][1:]
paul@0 554
            self.function_defaults[cls] = self.function_defaults[initialiser]
paul@0 555
paul@0 556
    def collect_constants(self):
paul@0 557
paul@0 558
        "Get constants from all active modules."
paul@0 559
paul@0 560
        for module in self.modules.values():
paul@0 561
            self.all_constants.update(module.constants)
paul@0 562
paul@0 563
    # Import methods.
paul@0 564
paul@0 565
    def find_in_path(self, name):
paul@0 566
paul@0 567
        """
paul@0 568
        Find the given module 'name' in the search path, returning None where no
paul@0 569
        such module could be found, or a 2-tuple from the 'find' method
paul@0 570
        otherwise.
paul@0 571
        """
paul@0 572
paul@0 573
        for d in self.path:
paul@0 574
            m = self.find(d, name)
paul@0 575
            if m: return m
paul@0 576
        return None
paul@0 577
paul@0 578
    def find(self, d, name):
paul@0 579
paul@0 580
        """
paul@0 581
        In the directory 'd', find the given module 'name', where 'name' can
paul@0 582
        either refer to a single file module or to a package. Return None if the
paul@0 583
        'name' cannot be associated with either a file or a package directory,
paul@0 584
        or a 2-tuple from '_find_package' or '_find_module' otherwise.
paul@0 585
        """
paul@0 586
paul@0 587
        m = self._find_package(d, name)
paul@0 588
        if m: return m
paul@0 589
        m = self._find_module(d, name)
paul@0 590
        if m: return m
paul@0 591
        return None
paul@0 592
paul@0 593
    def _find_module(self, d, name):
paul@0 594
paul@0 595
        """
paul@0 596
        In the directory 'd', find the given module 'name', returning None where
paul@0 597
        no suitable file exists in the directory, or a 2-tuple consisting of
paul@0 598
        None (indicating that no package directory is involved) and a filename
paul@0 599
        indicating the location of the module.
paul@0 600
        """
paul@0 601
paul@0 602
        name_py = name + extsep + "py"
paul@0 603
        filename = self._find_file(d, name_py)
paul@0 604
        if filename:
paul@0 605
            return None, filename
paul@0 606
        return None
paul@0 607
paul@0 608
    def _find_package(self, d, name):
paul@0 609
paul@0 610
        """
paul@0 611
        In the directory 'd', find the given package 'name', returning None
paul@0 612
        where no suitable package directory exists, or a 2-tuple consisting of
paul@0 613
        a directory (indicating the location of the package directory itself)
paul@0 614
        and a filename indicating the location of the __init__.py module which
paul@0 615
        declares the package's top-level contents.
paul@0 616
        """
paul@0 617
paul@0 618
        filename = self._find_file(d, name)
paul@0 619
        if filename:
paul@0 620
            init_py = "__init__" + extsep + "py"
paul@0 621
            init_py_filename = self._find_file(filename, init_py)
paul@0 622
            if init_py_filename:
paul@0 623
                return filename, init_py_filename
paul@0 624
        return None
paul@0 625
paul@0 626
    def _find_file(self, d, filename):
paul@0 627
paul@0 628
        """
paul@0 629
        Return the filename obtained when searching the directory 'd' for the
paul@0 630
        given 'filename', or None if no actual file exists for the filename.
paul@0 631
        """
paul@0 632
paul@0 633
        filename = join(d, filename)
paul@0 634
        if exists(filename):
paul@0 635
            return filename
paul@0 636
        else:
paul@0 637
            return None
paul@0 638
paul@12 639
    def load(self, name):
paul@0 640
paul@0 641
        """
paul@0 642
        Load the module or package with the given 'name'. Return an object
paul@0 643
        referencing the loaded module or package, or None if no such module or
paul@0 644
        package exists.
paul@0 645
        """
paul@0 646
paul@0 647
        # Loaded modules are returned immediately.
paul@0 648
        # Modules may be known but not yet loading (having been registered as
paul@0 649
        # submodules), loading, loaded, or completely unknown.
paul@0 650
paul@12 651
        module = self.get_module(name)
paul@0 652
paul@0 653
        if module:
paul@12 654
            return self.modules[name]
paul@0 655
paul@0 656
        # Otherwise, modules are loaded.
paul@0 657
paul@0 658
        # Split the name into path components, and try to find the uppermost in
paul@0 659
        # the search path.
paul@0 660
paul@0 661
        path = name.split(".")
paul@0 662
        path_so_far = []
paul@12 663
        module = None
paul@0 664
paul@0 665
        for p in path:
paul@0 666
paul@0 667
            # Get the module's filesystem details.
paul@0 668
paul@0 669
            if not path_so_far:
paul@0 670
                m = self.find_in_path(p)
paul@0 671
            elif d:
paul@0 672
                m = self.find(d, p)
paul@0 673
            else:
paul@0 674
                m = None
paul@0 675
paul@0 676
            path_so_far.append(p)
paul@0 677
            module_name = ".".join(path_so_far)
paul@0 678
paul@0 679
            if not m:
paul@0 680
                if self.verbose:
paul@0 681
                    print >>sys.stderr, "Not found (%s)" % name
paul@0 682
paul@0 683
                return None # NOTE: Import error.
paul@0 684
paul@0 685
            # Get the module itself.
paul@0 686
paul@0 687
            d, filename = m
paul@12 688
            module = self.load_from_file(filename, module_name)
paul@0 689
paul@12 690
        return module
paul@0 691
paul@12 692
    def load_from_file(self, filename, module_name=None):
paul@0 693
paul@0 694
        "Load the module from the given 'filename'."
paul@0 695
paul@0 696
        if module_name is None:
paul@0 697
            module_name = "__main__"
paul@0 698
paul@0 699
        module = self.modules.get(module_name)
paul@0 700
paul@0 701
        if not module:
paul@0 702
paul@0 703
            # Try to load from cache.
paul@0 704
paul@12 705
            module = self.load_from_cache(filename, module_name)
paul@0 706
            if module:
paul@0 707
                return module
paul@0 708
paul@0 709
            # If no cache entry exists, load from file.
paul@0 710
paul@0 711
            module = inspector.InspectedModule(module_name, self)
paul@0 712
            self.add_module(module_name, module)
paul@0 713
            self.update_cache_validity(module)
paul@0 714
paul@12 715
            self._load(module, module_name, lambda m: m.parse, filename)
paul@0 716
paul@0 717
        return module
paul@0 718
paul@0 719
    def update_cache_validity(self, module):
paul@0 720
paul@0 721
        "Make 'module' valid in the cache, but invalidate accessing modules."
paul@0 722
paul@12 723
        accessing = self.accessing_modules.get(module.name)
paul@12 724
        if accessing:
paul@12 725
            self.invalidated.update(accessing)
paul@0 726
        if module.name in self.invalidated:
paul@0 727
            self.invalidated.remove(module.name)
paul@0 728
paul@0 729
    def source_is_new(self, filename, module_name):
paul@0 730
paul@0 731
        "Return whether 'filename' is newer than the cached 'module_name'."
paul@0 732
paul@0 733
        if self.cache:
paul@0 734
            cache_filename = join(self.cache, module_name)
paul@0 735
            return not exists(cache_filename) or \
paul@0 736
                getmtime(filename) > getmtime(cache_filename) or \
paul@0 737
                module_name in self.invalidated
paul@0 738
        else:
paul@0 739
            return True
paul@0 740
paul@12 741
    def load_from_cache(self, filename, module_name):
paul@0 742
paul@0 743
        "Return a module residing in the cache."
paul@0 744
paul@0 745
        module = self.modules.get(module_name)
paul@0 746
paul@12 747
        if not module and not self.source_is_new(filename, module_name):
paul@13 748
            module = CachedModule(module_name, self)
paul@12 749
            self.add_module(module_name, module)
paul@0 750
paul@12 751
            filename = join(self.cache, module_name)
paul@12 752
            self._load(module, module_name, lambda m: m.from_cache, filename)
paul@0 753
paul@0 754
        return module
paul@0 755
paul@12 756
    def _load(self, module, module_name, fn, filename):
paul@0 757
paul@0 758
        """
paul@12 759
        Load 'module' for the given 'module_name', and with 'fn' performing an
paul@12 760
        invocation on the module with the given 'filename'.
paul@0 761
        """
paul@0 762
paul@12 763
        # Load the module.
paul@0 764
paul@0 765
        if self.verbose:
paul@53 766
            print >>sys.stderr, module_name in self.required and "Required" or "Loading", module_name, "from", filename
paul@0 767
        fn(module)(filename)
paul@0 768
paul@54 769
        # Add the module object if not already defined.
paul@54 770
paul@54 771
        if not self.objects.has_key(module_name):
paul@54 772
            self.objects[module_name] = Reference("<module>", module_name)
paul@54 773
paul@0 774
    def add_module(self, module_name, module):
paul@0 775
paul@0 776
        """
paul@0 777
        Return the module with the given 'module_name', adding a new module
paul@0 778
        object if one does not already exist.
paul@0 779
        """
paul@0 780
paul@0 781
        self.modules[module_name] = module
paul@12 782
        if module_name in self.to_import:
paul@12 783
            self.to_import.remove(module_name)
paul@0 784
paul@0 785
# vim: tabstop=4 expandtab shiftwidth=4