paul@18 | 1 | # -*- coding: iso-8859-1 -*- |
paul@18 | 2 | """ |
paul@18 | 3 | MoinMoin - MoinRemoteSupport library |
paul@18 | 4 | |
paul@111 | 5 | @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> |
paul@18 | 6 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@18 | 7 | """ |
paul@18 | 8 | |
paul@52 | 9 | from ContentTypeSupport import getContentTypeAndEncoding |
paul@18 | 10 | from MoinMoin.action import cache |
paul@18 | 11 | from MoinMoin import caching |
paul@21 | 12 | import urllib2, time |
paul@18 | 13 | |
paul@111 | 14 | def getCachedResource(request, url, arena, scope, max_cache_age, reader=None): |
paul@18 | 15 | |
paul@18 | 16 | """ |
paul@18 | 17 | Using the given 'request', return the resource data for the given 'url', |
paul@18 | 18 | accessing a cache entry with the given 'arena' and 'scope' where the data |
paul@18 | 19 | has already been downloaded. The 'max_cache_age' indicates the length in |
paul@18 | 20 | seconds that a cache entry remains valid. |
paul@20 | 21 | |
paul@111 | 22 | If the optional 'reader' object is given, it will be used to access the |
paul@111 | 23 | 'url' and write the downloaded data to a cache entry. Otherwise, a standard |
paul@111 | 24 | URL reader will be used. |
paul@111 | 25 | |
paul@20 | 26 | If the resource cannot be downloaded and cached, None is returned. |
paul@22 | 27 | Otherwise, the form of the data is as follows: |
paul@22 | 28 | |
paul@49 | 29 | url <newline> |
paul@49 | 30 | [ content-type-header ] <newline> |
paul@49 | 31 | [ other-header <newline> ]* |
paul@49 | 32 | <newline> |
paul@49 | 33 | content-body |
paul@18 | 34 | """ |
paul@18 | 35 | |
paul@111 | 36 | reader = reader or urlreader |
paul@111 | 37 | |
paul@18 | 38 | # See if the URL is cached. |
paul@18 | 39 | |
paul@18 | 40 | cache_key = cache.key(request, content=url) |
paul@18 | 41 | cache_entry = caching.CacheEntry(request, arena, cache_key, scope=scope) |
paul@18 | 42 | |
paul@18 | 43 | # If no entry exists, or if the entry is older than the specified age, |
paul@18 | 44 | # create one with the response from the URL. |
paul@18 | 45 | |
paul@18 | 46 | now = time.time() |
paul@18 | 47 | mtime = cache_entry.mtime() |
paul@18 | 48 | |
paul@18 | 49 | # NOTE: The URL could be checked and the 'If-Modified-Since' header |
paul@18 | 50 | # NOTE: (see MoinMoin.action.pollsistersites) could be checked. |
paul@18 | 51 | |
paul@18 | 52 | if not cache_entry.exists() or now - mtime >= max_cache_age: |
paul@18 | 53 | |
paul@18 | 54 | # Access the remote data source. |
paul@18 | 55 | |
paul@18 | 56 | cache_entry.open(mode="w") |
paul@18 | 57 | |
paul@18 | 58 | try: |
paul@111 | 59 | # Read from the source and write to the cache. |
paul@111 | 60 | |
paul@111 | 61 | reader(url, cache_entry) |
paul@18 | 62 | |
paul@20 | 63 | # In case of an exception, return None. |
paul@18 | 64 | |
paul@18 | 65 | except IOError: |
paul@18 | 66 | if cache_entry.exists(): |
paul@18 | 67 | cache_entry.remove() |
paul@20 | 68 | return None |
paul@18 | 69 | |
paul@18 | 70 | # Open the cache entry and read it. |
paul@18 | 71 | |
paul@18 | 72 | cache_entry.open() |
paul@18 | 73 | try: |
paul@18 | 74 | return cache_entry.read() |
paul@18 | 75 | finally: |
paul@18 | 76 | cache_entry.close() |
paul@18 | 77 | |
paul@111 | 78 | def urlreader(url, cache_entry): |
paul@111 | 79 | |
paul@111 | 80 | "Retrieve data from the given 'url', writing it to the 'cache_entry'." |
paul@111 | 81 | |
paul@111 | 82 | f = urllib2.urlopen(url) |
paul@111 | 83 | try: |
paul@111 | 84 | cache_entry.write(url + "\n") |
paul@111 | 85 | cache_entry.write((f.headers.get("content-type") or "") + "\n") |
paul@111 | 86 | for key, value in f.headers.items(): |
paul@111 | 87 | if key.lower() != "content-type": |
paul@111 | 88 | cache_entry.write("%s: %s\n" % (key, value)) |
paul@111 | 89 | cache_entry.write("\n") |
paul@111 | 90 | cache_entry.write(f.read()) |
paul@111 | 91 | finally: |
paul@111 | 92 | cache_entry.close() |
paul@111 | 93 | f.close() |
paul@111 | 94 | |
paul@52 | 95 | def getCachedResourceMetadata(f): |
paul@52 | 96 | |
paul@52 | 97 | "Return a metadata dictionary for the given resource file-like object 'f'." |
paul@52 | 98 | |
paul@52 | 99 | url = f.readline() |
paul@52 | 100 | content_type, encoding = getContentTypeAndEncoding(f.readline()) |
paul@52 | 101 | |
paul@52 | 102 | metadata = {} |
paul@52 | 103 | line = f.readline() |
paul@52 | 104 | |
paul@52 | 105 | while line.strip(): |
paul@52 | 106 | key, value = [v.strip() for v in line.split(":", 1)] |
paul@52 | 107 | metadata[key] = value |
paul@52 | 108 | line = f.readline() |
paul@52 | 109 | |
paul@52 | 110 | return url, content_type, encoding, metadata |
paul@52 | 111 | |
paul@18 | 112 | # vim: tabstop=4 expandtab shiftwidth=4 |