paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - MoinShare library |
paul@0 | 4 | |
paul@56 | 5 | @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> |
paul@34 | 6 | @copyright: 2003-2006 Edgewall Software |
paul@34 | 7 | @copyright: 2006 MoinMoin:AlexanderSchremmer |
paul@0 | 8 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 9 | """ |
paul@0 | 10 | |
paul@17 | 11 | from ContentTypeSupport import getContentPreferences |
paul@58 | 12 | from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \ |
paul@58 | 13 | getDateTimeFromISO8601, DateTime |
paul@0 | 14 | from MoinSupport import * |
paul@58 | 15 | from MoinRemoteSupport import * |
paul@51 | 16 | from ItemSupport import ItemStore |
paul@50 | 17 | from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError |
paul@54 | 18 | from MoinMessageSupport import get_homedir, get_username_for_fingerprint |
paul@37 | 19 | from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup |
paul@15 | 20 | from MoinMoin import wikiutil |
paul@33 | 21 | from email.parser import Parser |
paul@58 | 22 | from email.utils import parsedate |
paul@37 | 23 | from codecs import getwriter |
paul@58 | 24 | import xml.dom.pulldom |
paul@0 | 25 | |
paul@25 | 26 | try: |
paul@25 | 27 | from cStringIO import StringIO |
paul@25 | 28 | except ImportError: |
paul@25 | 29 | from StringIO import StringIO |
paul@25 | 30 | |
paul@16 | 31 | _getFragments = getFragments |
paul@0 | 32 | |
paul@0 | 33 | __version__ = "0.1" |
paul@0 | 34 | |
paul@58 | 35 | ATOM_NS = "http://www.w3.org/2005/Atom" |
paul@58 | 36 | |
paul@58 | 37 | # Utility functions. |
paul@58 | 38 | |
paul@58 | 39 | def text(element): |
paul@58 | 40 | nodes = [] |
paul@58 | 41 | for node in element.childNodes: |
paul@58 | 42 | if node.nodeType == node.TEXT_NODE: |
paul@58 | 43 | nodes.append(node.nodeValue) |
paul@58 | 44 | return "".join(nodes) |
paul@58 | 45 | |
paul@58 | 46 | def children(element): |
paul@58 | 47 | nodes = [] |
paul@58 | 48 | for node in element.childNodes: |
paul@58 | 49 | nodes.append(node.toxml()) |
paul@58 | 50 | return "".join(nodes) |
paul@58 | 51 | |
paul@58 | 52 | def unescape(text): |
paul@58 | 53 | return text.replace("<", "<").replace(">", ">").replace("&", "&") |
paul@58 | 54 | |
paul@58 | 55 | def linktext(element, feed_type): |
paul@58 | 56 | if feed_type == "rss": |
paul@58 | 57 | return text(element) |
paul@58 | 58 | else: |
paul@58 | 59 | return element.getAttribute("href") |
paul@58 | 60 | |
paul@58 | 61 | def need_content(show_content, tagname): |
paul@58 | 62 | return show_content in ("content", "description") and tagname in ("content", "description") |
paul@58 | 63 | |
paul@0 | 64 | # More Moin 1.9 compatibility functions. |
paul@0 | 65 | |
paul@0 | 66 | def has_member(request, groupname, username): |
paul@0 | 67 | if hasattr(request.dicts, "has_member"): |
paul@0 | 68 | return request.dicts.has_member(groupname, username) |
paul@0 | 69 | else: |
paul@0 | 70 | return username in request.dicts.get(groupname, []) |
paul@0 | 71 | |
paul@3 | 72 | # Fragments employ a "moinshare" attribute. |
paul@3 | 73 | |
paul@3 | 74 | fragment_attribute = "moinshare" |
paul@2 | 75 | |
paul@16 | 76 | def getFragments(s): |
paul@25 | 77 | |
paul@25 | 78 | "Return all fragments in 's' having the MoinShare fragment attribute." |
paul@25 | 79 | |
paul@2 | 80 | fragments = [] |
paul@16 | 81 | for format, attributes, body in _getFragments(s): |
paul@16 | 82 | if attributes.has_key(fragment_attribute): |
paul@16 | 83 | fragments.append((format, attributes, body)) |
paul@2 | 84 | return fragments |
paul@2 | 85 | |
paul@9 | 86 | def getPreferredOutputTypes(request, mimetypes): |
paul@9 | 87 | |
paul@9 | 88 | """ |
paul@9 | 89 | Using the 'request', perform content negotiation, obtaining mimetypes common |
paul@9 | 90 | to the fragment (given by 'mimetypes') and the client (found in the Accept |
paul@9 | 91 | header). |
paul@9 | 92 | """ |
paul@9 | 93 | |
paul@9 | 94 | accept = getHeader(request, "Accept", "HTTP") |
paul@12 | 95 | if accept: |
paul@12 | 96 | prefs = getContentPreferences(accept) |
paul@12 | 97 | return prefs.get_preferred_types(mimetypes) |
paul@12 | 98 | else: |
paul@12 | 99 | return mimetypes |
paul@9 | 100 | |
paul@9 | 101 | def getUpdatedTime(metadata): |
paul@9 | 102 | |
paul@9 | 103 | """ |
paul@9 | 104 | Return the last updated time based on the given 'metadata', using the |
paul@9 | 105 | current time if no explicit last modified time is specified. |
paul@9 | 106 | """ |
paul@9 | 107 | |
paul@9 | 108 | # NOTE: We could attempt to get the last edit time of a fragment. |
paul@9 | 109 | |
paul@9 | 110 | latest_timestamp = metadata.get("last-modified") |
paul@9 | 111 | if latest_timestamp: |
paul@33 | 112 | return latest_timestamp |
paul@9 | 113 | else: |
paul@33 | 114 | return getCurrentTime() |
paul@9 | 115 | |
paul@30 | 116 | # Entry/update classes. |
paul@30 | 117 | |
paul@30 | 118 | class Update: |
paul@30 | 119 | |
paul@30 | 120 | "A feed update entry." |
paul@30 | 121 | |
paul@30 | 122 | def __init__(self): |
paul@30 | 123 | self.content = None |
paul@30 | 124 | self.content_type = None |
paul@30 | 125 | self.updated = None |
paul@54 | 126 | self.author = None |
paul@30 | 127 | |
paul@64 | 128 | # Message-related attributes. |
paul@64 | 129 | |
paul@64 | 130 | self.parts = None |
paul@64 | 131 | |
paul@64 | 132 | # Feed-related attributes. |
paul@64 | 133 | |
paul@64 | 134 | self.title = None |
paul@64 | 135 | self.link = None |
paul@64 | 136 | |
paul@30 | 137 | # Page-related attributes. |
paul@30 | 138 | |
paul@30 | 139 | self.fragment = None |
paul@30 | 140 | self.preferred = None |
paul@30 | 141 | |
paul@64 | 142 | # Store-related attributes. |
paul@33 | 143 | |
paul@34 | 144 | self.message_number = None |
paul@33 | 145 | |
paul@64 | 146 | # Store- and page-related attributes. |
paul@34 | 147 | |
paul@34 | 148 | self.page = None |
paul@34 | 149 | |
paul@40 | 150 | # Identification. |
paul@40 | 151 | |
paul@40 | 152 | self.path = [] |
paul@40 | 153 | |
paul@40 | 154 | def unique_id(self): |
paul@64 | 155 | |
paul@64 | 156 | """ |
paul@64 | 157 | A unique identifier used for anchors to parts of presented updates. |
paul@64 | 158 | """ |
paul@64 | 159 | |
paul@40 | 160 | return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) |
paul@40 | 161 | |
paul@30 | 162 | def __cmp__(self, other): |
paul@30 | 163 | if self.updated is None and other.updated is not None: |
paul@30 | 164 | return 1 |
paul@30 | 165 | elif self.updated is not None and other.updated is None: |
paul@30 | 166 | return -1 |
paul@30 | 167 | else: |
paul@30 | 168 | return cmp(self.updated, other.updated) |
paul@30 | 169 | |
paul@40 | 170 | def copy(self, part_number=None): |
paul@40 | 171 | update = Update() |
paul@40 | 172 | update.title = self.title |
paul@40 | 173 | update.link = self.link |
paul@40 | 174 | update.updated = self.updated |
paul@54 | 175 | update.author = self.author |
paul@40 | 176 | update.fragment = self.fragment |
paul@40 | 177 | update.preferred = self.preferred |
paul@40 | 178 | update.message_number = self.message_number |
paul@40 | 179 | update.page = self.page |
paul@40 | 180 | update.path = self.path[:] |
paul@40 | 181 | if part_number is not None: |
paul@40 | 182 | update.path.append(part_number) |
paul@40 | 183 | return update |
paul@40 | 184 | |
paul@58 | 185 | # Error classes. |
paul@58 | 186 | |
paul@58 | 187 | class FeedError(Exception): |
paul@58 | 188 | pass |
paul@58 | 189 | |
paul@58 | 190 | class FeedMissingError(FeedError): |
paul@58 | 191 | pass |
paul@58 | 192 | |
paul@58 | 193 | class FeedContentTypeError(FeedError): |
paul@58 | 194 | pass |
paul@58 | 195 | |
paul@60 | 196 | # Update retrieval from URLs. |
paul@58 | 197 | |
paul@58 | 198 | def getUpdates(request, feed_url, max_entries, show_content): |
paul@58 | 199 | |
paul@58 | 200 | """ |
paul@58 | 201 | Using the given 'request', retrieve from 'feed_url' up to the given number |
paul@58 | 202 | 'max_entries' of update entries. The 'show_content' parameter can indicate |
paul@58 | 203 | that a "summary" is to be obtained for each update, that the "content" of |
paul@58 | 204 | each update is to be obtained (falling back to a summary if no content is |
paul@58 | 205 | provided), or no content (indicated by a false value) is to be obtained. |
paul@58 | 206 | |
paul@58 | 207 | A tuple of the form ((feed_type, channel_title, channel_link), updates) is |
paul@58 | 208 | returned. |
paul@58 | 209 | """ |
paul@58 | 210 | |
paul@60 | 211 | # Prevent local file access. |
paul@60 | 212 | |
paul@60 | 213 | if feed_url.startswith("file:"): |
paul@60 | 214 | raise FeedMissingError |
paul@58 | 215 | |
paul@62 | 216 | elif feed_url.startswith("imap"): |
paul@62 | 217 | reader = imapreader |
paul@62 | 218 | |
paul@62 | 219 | else: |
paul@62 | 220 | reader = None |
paul@62 | 221 | |
paul@58 | 222 | # Obtain the resource, using a cached version if appropriate. |
paul@58 | 223 | |
paul@58 | 224 | max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) |
paul@62 | 225 | data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader) |
paul@58 | 226 | if not data: |
paul@58 | 227 | raise FeedMissingError |
paul@58 | 228 | |
paul@58 | 229 | # Interpret the cached feed. |
paul@58 | 230 | |
paul@60 | 231 | f = StringIO(data) |
paul@60 | 232 | try: |
paul@60 | 233 | _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f) |
paul@60 | 234 | |
paul@60 | 235 | if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"): |
paul@60 | 236 | return getUpdatesFromFeed(f, max_entries, show_content) |
paul@62 | 237 | |
paul@62 | 238 | elif content_type == "multipart/mixed": |
paul@62 | 239 | return getUpdatesFromMailbox(f, max_entries, show_content, request) |
paul@62 | 240 | |
paul@60 | 241 | else: |
paul@60 | 242 | raise FeedContentTypeError |
paul@60 | 243 | |
paul@60 | 244 | finally: |
paul@60 | 245 | f.close() |
paul@60 | 246 | |
paul@60 | 247 | # Update retrieval from feeds. |
paul@58 | 248 | |
paul@60 | 249 | def getUpdatesFromFeed(feed, max_entries, show_content): |
paul@60 | 250 | |
paul@60 | 251 | """ |
paul@60 | 252 | Retrieve from 'feed' up to the given number 'max_entries' of update entries. |
paul@60 | 253 | The 'show_content' parameter can indicate that a "summary" is to be obtained |
paul@60 | 254 | for each update, that the "content" of each update is to be obtained |
paul@60 | 255 | (falling back to a summary if no content is provided), or no content |
paul@60 | 256 | (indicated by a false value) is to be obtained. |
paul@60 | 257 | |
paul@60 | 258 | A tuple of the form ((feed_type, channel_title, channel_link), updates) is |
paul@60 | 259 | returned. |
paul@60 | 260 | """ |
paul@60 | 261 | |
paul@60 | 262 | feed_updates = [] |
paul@58 | 263 | |
paul@62 | 264 | # Parse each node from the feed. |
paul@58 | 265 | |
paul@62 | 266 | channel_title = channel_link = None |
paul@58 | 267 | |
paul@62 | 268 | feed_type = None |
paul@62 | 269 | update = None |
paul@62 | 270 | in_source = False |
paul@58 | 271 | |
paul@62 | 272 | events = xml.dom.pulldom.parse(feed) |
paul@58 | 273 | |
paul@62 | 274 | for event, value in events: |
paul@58 | 275 | |
paul@62 | 276 | if not in_source and event == xml.dom.pulldom.START_ELEMENT: |
paul@62 | 277 | tagname = value.localName |
paul@58 | 278 | |
paul@62 | 279 | # Detect the feed type and items. |
paul@58 | 280 | |
paul@62 | 281 | if tagname == "feed" and value.namespaceURI == ATOM_NS: |
paul@62 | 282 | feed_type = "atom" |
paul@58 | 283 | |
paul@62 | 284 | elif tagname == "rss": |
paul@62 | 285 | feed_type = "rss" |
paul@58 | 286 | |
paul@62 | 287 | # Detect items. |
paul@58 | 288 | |
paul@62 | 289 | elif feed_type == "rss" and tagname == "item" or \ |
paul@62 | 290 | feed_type == "atom" and tagname == "entry": |
paul@58 | 291 | |
paul@62 | 292 | update = Update() |
paul@58 | 293 | |
paul@62 | 294 | # Detect source declarations. |
paul@58 | 295 | |
paul@62 | 296 | elif feed_type == "atom" and tagname == "source": |
paul@62 | 297 | in_source = True |
paul@58 | 298 | |
paul@62 | 299 | # Handle item elements. |
paul@58 | 300 | |
paul@62 | 301 | elif tagname == "title": |
paul@62 | 302 | events.expandNode(value) |
paul@62 | 303 | if update: |
paul@62 | 304 | update.title = text(value) |
paul@62 | 305 | else: |
paul@62 | 306 | channel_title = text(value) |
paul@58 | 307 | |
paul@62 | 308 | elif tagname == "link": |
paul@62 | 309 | events.expandNode(value) |
paul@62 | 310 | if update: |
paul@62 | 311 | update.link = linktext(value, feed_type) |
paul@62 | 312 | else: |
paul@62 | 313 | channel_link = linktext(value, feed_type) |
paul@58 | 314 | |
paul@62 | 315 | elif show_content and ( |
paul@62 | 316 | feed_type == "atom" and tagname in ("content", "summary") or |
paul@62 | 317 | feed_type == "rss" and tagname == "description"): |
paul@58 | 318 | |
paul@62 | 319 | events.expandNode(value) |
paul@58 | 320 | |
paul@62 | 321 | # Obtain content where requested or, failing that, a |
paul@62 | 322 | # summary. |
paul@58 | 323 | |
paul@62 | 324 | if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): |
paul@62 | 325 | if feed_type == "atom": |
paul@62 | 326 | update.content_type = value.getAttribute("type") or "text" |
paul@62 | 327 | |
paul@62 | 328 | # Normalise the content types and extract the |
paul@62 | 329 | # content. |
paul@58 | 330 | |
paul@62 | 331 | if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): |
paul@62 | 332 | update.content = children(value) |
paul@62 | 333 | update.content_type = "application/xhtml+xml" |
paul@62 | 334 | elif update.content_type in ("html", "text/html"): |
paul@62 | 335 | update.content = text(value) |
paul@62 | 336 | update.content_type = "text/html" |
paul@58 | 337 | else: |
paul@58 | 338 | update.content = text(value) |
paul@62 | 339 | update.content_type = "text/plain" |
paul@62 | 340 | else: |
paul@62 | 341 | update.content_type = "text/html" |
paul@62 | 342 | update.content = text(value) |
paul@58 | 343 | |
paul@62 | 344 | elif feed_type == "atom" and tagname == "updated" or \ |
paul@62 | 345 | feed_type == "rss" and tagname == "pubDate": |
paul@62 | 346 | |
paul@62 | 347 | events.expandNode(value) |
paul@58 | 348 | |
paul@62 | 349 | if update: |
paul@62 | 350 | if feed_type == "atom": |
paul@62 | 351 | value = getDateTimeFromISO8601(text(value)) |
paul@62 | 352 | else: |
paul@62 | 353 | value = DateTime(parsedate(text(value))) |
paul@62 | 354 | update.updated = value |
paul@58 | 355 | |
paul@62 | 356 | elif event == xml.dom.pulldom.END_ELEMENT: |
paul@62 | 357 | tagname = value.localName |
paul@58 | 358 | |
paul@62 | 359 | if feed_type == "rss" and tagname == "item" or \ |
paul@62 | 360 | feed_type == "atom" and tagname == "entry": |
paul@62 | 361 | |
paul@62 | 362 | feed_updates.append(update) |
paul@58 | 363 | |
paul@62 | 364 | update = None |
paul@58 | 365 | |
paul@62 | 366 | elif feed_type == "atom" and tagname == "source": |
paul@62 | 367 | in_source = False |
paul@58 | 368 | |
paul@58 | 369 | return (feed_type, channel_title, channel_link), feed_updates |
paul@58 | 370 | |
paul@62 | 371 | # Update retrieval from mailboxes and multipart messages. |
paul@62 | 372 | |
paul@62 | 373 | def getUpdatesFromMailbox(feed, max_entries, show_content, request): |
paul@62 | 374 | |
paul@62 | 375 | """ |
paul@62 | 376 | Retrieve from 'feed' up to the given number 'max_entries' of update entries. |
paul@62 | 377 | The 'show_content' parameter can indicate that a "summary" is to be obtained |
paul@62 | 378 | for each update, that the "content" of each update is to be obtained |
paul@62 | 379 | (falling back to a summary if no content is provided), or no content |
paul@62 | 380 | (indicated by a false value) is to be obtained. |
paul@62 | 381 | |
paul@62 | 382 | A tuple of the form ((feed_type, channel_title, channel_link), updates) is |
paul@62 | 383 | returned. |
paul@62 | 384 | """ |
paul@62 | 385 | |
paul@62 | 386 | mailbox = Parser().parse(feed) |
paul@62 | 387 | |
paul@62 | 388 | feed_updates = [] |
paul@62 | 389 | |
paul@62 | 390 | # Parse each message from the feed as a separate update. |
paul@62 | 391 | |
paul@62 | 392 | for message_number, part in enumerate(mailbox.get_payload()): |
paul@62 | 393 | update = Update() |
paul@64 | 394 | update.updated = getDateTimeFromRFC2822(part.get("date")) |
paul@62 | 395 | update.title = part.get("subject", "Update #%d" % message_number) |
paul@62 | 396 | update.message_number = message_number |
paul@62 | 397 | |
paul@62 | 398 | update.content, update.content_type, update.parts, actual_author = \ |
paul@62 | 399 | getUpdateContentFromPart(part, request) |
paul@62 | 400 | |
paul@62 | 401 | if actual_author: |
paul@62 | 402 | update.author = actual_author |
paul@62 | 403 | |
paul@62 | 404 | feed_updates.append(update) |
paul@62 | 405 | |
paul@62 | 406 | return ("mbox", None, None), feed_updates |
paul@62 | 407 | |
paul@30 | 408 | # Update retrieval from pages. |
paul@30 | 409 | |
paul@30 | 410 | def getUpdatesFromPage(page, request): |
paul@25 | 411 | |
paul@25 | 412 | """ |
paul@30 | 413 | Get updates from the given 'page' using the 'request'. A list of update |
paul@30 | 414 | objects is returned. |
paul@25 | 415 | """ |
paul@25 | 416 | |
paul@25 | 417 | updates = [] |
paul@25 | 418 | |
paul@25 | 419 | # NOTE: Use the updated datetime from the page for updates. |
paul@25 | 420 | # NOTE: The published and updated details would need to be deduced from |
paul@25 | 421 | # NOTE: the page history instead of being taken from the page as a whole. |
paul@25 | 422 | |
paul@25 | 423 | metadata = getMetadata(page) |
paul@25 | 424 | updated = getUpdatedTime(metadata) |
paul@25 | 425 | |
paul@25 | 426 | # Get the fragment regions for the page. |
paul@25 | 427 | |
paul@25 | 428 | for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): |
paul@25 | 429 | |
paul@33 | 430 | update = Update() |
paul@33 | 431 | |
paul@25 | 432 | # Produce a fragment identifier. |
paul@25 | 433 | # NOTE: Choose a more robust identifier where none is explicitly given. |
paul@25 | 434 | |
paul@30 | 435 | update.fragment = attributes.get("fragment", str(n)) |
paul@30 | 436 | update.title = attributes.get("summary", "Update #%d" % n) |
paul@25 | 437 | |
paul@25 | 438 | # Get the preferred content types available for the fragment. |
paul@25 | 439 | |
paul@30 | 440 | update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) |
paul@25 | 441 | |
paul@25 | 442 | # Try and obtain some suitable content for the entry. |
paul@25 | 443 | # NOTE: Could potentially get a summary for the fragment. |
paul@25 | 444 | |
paul@30 | 445 | update.content = None |
paul@25 | 446 | |
paul@30 | 447 | if "text/html" in update.preferred: |
paul@25 | 448 | parser_cls = getParserClass(request, format) |
paul@25 | 449 | |
paul@25 | 450 | if format == "html": |
paul@30 | 451 | update.content = body |
paul@39 | 452 | elif hasattr(parser_cls, "formatForOutputType"): |
paul@39 | 453 | update.content = formatTextForOutputType(body, request, parser_cls, "text/html") |
paul@25 | 454 | else: |
paul@25 | 455 | fmt = request.html_formatter |
paul@25 | 456 | fmt.setPage(page) |
paul@30 | 457 | update.content = formatText(body, request, fmt, parser_cls) |
paul@30 | 458 | |
paul@32 | 459 | update.content_type = "text/html" |
paul@25 | 460 | |
paul@34 | 461 | update.page = page |
paul@37 | 462 | |
paul@37 | 463 | # NOTE: The anchor would be supported in the page, but this requires |
paul@37 | 464 | # NOTE: formatter modifications for the regions providing updates. |
paul@37 | 465 | |
paul@37 | 466 | update.link = page.url(request, anchor=update.fragment) |
paul@30 | 467 | update.updated = updated |
paul@30 | 468 | |
paul@30 | 469 | updates.append(update) |
paul@25 | 470 | |
paul@25 | 471 | return updates |
paul@25 | 472 | |
paul@33 | 473 | # Update retrieval from message stores. |
paul@33 | 474 | |
paul@33 | 475 | def getUpdatesFromStore(page, request): |
paul@33 | 476 | |
paul@33 | 477 | """ |
paul@33 | 478 | Get updates from the message store associated with the given 'page' using |
paul@33 | 479 | the 'request'. A list of update objects is returned. |
paul@33 | 480 | """ |
paul@33 | 481 | |
paul@33 | 482 | updates = [] |
paul@33 | 483 | |
paul@33 | 484 | metadata = getMetadata(page) |
paul@33 | 485 | updated = getUpdatedTime(metadata) |
paul@33 | 486 | |
paul@33 | 487 | store = ItemStore(page, "messages", "message-locks") |
paul@33 | 488 | |
paul@57 | 489 | keys = store.keys() |
paul@57 | 490 | keys.sort() |
paul@57 | 491 | |
paul@57 | 492 | for key in keys: |
paul@57 | 493 | message_text = store[key] |
paul@57 | 494 | update = getUpdateFromMessageText(message_text, key, request) |
paul@34 | 495 | update.page = page |
paul@33 | 496 | updates.append(update) |
paul@33 | 497 | |
paul@33 | 498 | return updates |
paul@33 | 499 | |
paul@47 | 500 | def getUpdateFromMessageText(message_text, message_number, request): |
paul@46 | 501 | |
paul@46 | 502 | "Return an update for the given 'message_text' and 'message_number'." |
paul@46 | 503 | |
paul@46 | 504 | update = Update() |
paul@56 | 505 | message = Parser().parsestr(message_text) |
paul@46 | 506 | |
paul@46 | 507 | # Produce a fragment identifier. |
paul@46 | 508 | |
paul@64 | 509 | update.updated = getDateTimeFromRFC2822(message.get("date")) |
paul@46 | 510 | update.title = message.get("subject", "Update #%d" % message_number) |
paul@54 | 511 | update.author = message.get("moin-user") |
paul@46 | 512 | |
paul@46 | 513 | update.message_number = message_number |
paul@46 | 514 | |
paul@54 | 515 | update.content, update.content_type, update.parts, actual_author = \ |
paul@54 | 516 | getUpdateContentFromPart(message, request) |
paul@54 | 517 | |
paul@54 | 518 | if actual_author: |
paul@54 | 519 | update.author = actual_author |
paul@54 | 520 | |
paul@46 | 521 | return update |
paul@46 | 522 | |
paul@47 | 523 | def getUpdateContentFromPart(part, request): |
paul@40 | 524 | |
paul@40 | 525 | """ |
paul@54 | 526 | Return decoded content, the content type, any subparts, and any author |
paul@54 | 527 | identity in a tuple for a given 'part'. |
paul@40 | 528 | """ |
paul@40 | 529 | |
paul@40 | 530 | # Determine whether the part has several representations. |
paul@40 | 531 | |
paul@40 | 532 | # For a single part, use it as the update content. |
paul@40 | 533 | |
paul@40 | 534 | if not part.is_multipart(): |
paul@40 | 535 | content, content_type = getPartContent(part) |
paul@54 | 536 | return content, content_type, None, None |
paul@40 | 537 | |
paul@40 | 538 | # For a collection of related parts, use the first as the update content |
paul@40 | 539 | # and assume that the formatter will reference the other parts. |
paul@40 | 540 | |
paul@40 | 541 | elif part.get_content_subtype() == "related": |
paul@40 | 542 | main_part = part.get_payload()[0] |
paul@40 | 543 | content, content_type = getPartContent(main_part) |
paul@54 | 544 | return content, content_type, [main_part], None |
paul@40 | 545 | |
paul@46 | 546 | # Encrypted content cannot be meaningfully separated. |
paul@46 | 547 | |
paul@46 | 548 | elif part.get_content_subtype() == "encrypted": |
paul@50 | 549 | try: |
paul@54 | 550 | part, author = getDecryptedParts(part, request) |
paul@54 | 551 | content, content_type, parts, _author = getUpdateContentFromPart(part, request) |
paul@54 | 552 | return content, content_type, parts, author |
paul@50 | 553 | except MoinMessageError: |
paul@54 | 554 | return None, part.get_content_type(), part.get_payload(), None |
paul@46 | 555 | |
paul@40 | 556 | # Otherwise, just obtain the parts for separate display. |
paul@40 | 557 | |
paul@40 | 558 | else: |
paul@54 | 559 | return None, part.get_content_type(), part.get_payload(), None |
paul@40 | 560 | |
paul@47 | 561 | def getDecryptedParts(part, request): |
paul@47 | 562 | |
paul@47 | 563 | "Decrypt the given 'part', returning the decoded content." |
paul@47 | 564 | |
paul@47 | 565 | homedir = get_homedir(request) |
paul@47 | 566 | gpg = GPG(homedir) |
paul@47 | 567 | |
paul@47 | 568 | # Decrypt the part. |
paul@47 | 569 | |
paul@47 | 570 | if is_encrypted(part): |
paul@47 | 571 | text = gpg.decryptMessage(part) |
paul@56 | 572 | part = Parser().parsestr(text) |
paul@47 | 573 | |
paul@47 | 574 | # Extract any signature details. |
paul@47 | 575 | |
paul@47 | 576 | if is_signed(part): |
paul@47 | 577 | result = gpg.verifyMessage(part) |
paul@47 | 578 | if result: |
paul@47 | 579 | fingerprint, identity, content = result |
paul@54 | 580 | return content, get_username_for_fingerprint(request, fingerprint) |
paul@47 | 581 | |
paul@54 | 582 | return part, None |
paul@47 | 583 | |
paul@40 | 584 | def getPartContent(part): |
paul@40 | 585 | |
paul@40 | 586 | "Decode the 'part', returning the decoded payload and the content type." |
paul@40 | 587 | |
paul@40 | 588 | charset = part.get_content_charset() |
paul@40 | 589 | payload = part.get_payload(decode=True) |
paul@40 | 590 | return (charset and unicode(payload, charset) or payload), part.get_content_type() |
paul@40 | 591 | |
paul@47 | 592 | def getUpdateFromPart(parent, part, part_number, request): |
paul@40 | 593 | |
paul@40 | 594 | "Using the 'parent' update, return an update object for the given 'part'." |
paul@40 | 595 | |
paul@40 | 596 | update = parent.copy(part_number) |
paul@55 | 597 | update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request) |
paul@40 | 598 | return update |
paul@40 | 599 | |
paul@47 | 600 | def getUpdatesForFormatting(update, request): |
paul@46 | 601 | |
paul@46 | 602 | "Get a list of updates for formatting given 'update'." |
paul@46 | 603 | |
paul@46 | 604 | updates = [] |
paul@46 | 605 | |
paul@46 | 606 | # Handle multipart/alternative and other non-related multiparts. |
paul@46 | 607 | |
paul@46 | 608 | if update.parts: |
paul@46 | 609 | for n, part in enumerate(update.parts): |
paul@47 | 610 | update_part = getUpdateFromPart(update, part, n, request) |
paul@47 | 611 | updates += getUpdatesForFormatting(update_part, request) |
paul@46 | 612 | else: |
paul@46 | 613 | updates.append(update) |
paul@46 | 614 | |
paul@46 | 615 | return updates |
paul@46 | 616 | |
paul@46 | 617 | # Update formatting. |
paul@46 | 618 | |
paul@46 | 619 | def getFormattedUpdate(update, request, fmt): |
paul@46 | 620 | |
paul@46 | 621 | """ |
paul@46 | 622 | Return the formatted form of the given 'update' using the given 'request' |
paul@46 | 623 | and 'fmt'. |
paul@46 | 624 | """ |
paul@46 | 625 | |
paul@46 | 626 | # NOTE: Some control over the HTML and XHTML should be exercised. |
paul@46 | 627 | |
paul@46 | 628 | if update.content: |
paul@46 | 629 | if update.content_type == "text/html" and update.message_number is not None: |
paul@46 | 630 | parsers = [get_make_parser(update.page, update.message_number)] |
paul@46 | 631 | else: |
paul@46 | 632 | parsers = getParsersForContentType(request.cfg, update.content_type) |
paul@46 | 633 | |
paul@46 | 634 | if parsers: |
paul@46 | 635 | for parser_cls in parsers: |
paul@46 | 636 | if hasattr(parser_cls, "formatForOutputType"): |
paul@46 | 637 | return formatTextForOutputType(update.content, request, parser_cls, "text/html") |
paul@46 | 638 | else: |
paul@46 | 639 | return formatText(update.content, request, fmt, parser_cls=parser_cls) |
paul@46 | 640 | break |
paul@46 | 641 | else: |
paul@46 | 642 | return None |
paul@46 | 643 | else: |
paul@46 | 644 | return None |
paul@46 | 645 | |
paul@46 | 646 | def formatUpdate(update, request, fmt): |
paul@46 | 647 | |
paul@46 | 648 | "Format the given 'update' using the given 'request' and 'fmt'." |
paul@46 | 649 | |
paul@46 | 650 | result = [] |
paul@46 | 651 | append = result.append |
paul@46 | 652 | |
paul@47 | 653 | updates = getUpdatesForFormatting(update, request) |
paul@46 | 654 | single = len(updates) == 1 |
paul@46 | 655 | |
paul@46 | 656 | # Format some navigation tabs. |
paul@64 | 657 | # This only occurs for multipart updates. |
paul@46 | 658 | |
paul@46 | 659 | if not single: |
paul@46 | 660 | append(fmt.div(on=1, css_class="moinshare-alternatives")) |
paul@46 | 661 | |
paul@46 | 662 | first = True |
paul@46 | 663 | |
paul@46 | 664 | for update_part in updates: |
paul@46 | 665 | append(fmt.url(1, "#%s" % update_part.unique_id())) |
paul@46 | 666 | append(fmt.text(update_part.content_type)) |
paul@46 | 667 | append(fmt.url(0)) |
paul@46 | 668 | |
paul@46 | 669 | first = False |
paul@46 | 670 | |
paul@46 | 671 | append(fmt.div(on=0)) |
paul@46 | 672 | |
paul@46 | 673 | # Format the content. |
paul@46 | 674 | |
paul@46 | 675 | first = True |
paul@46 | 676 | |
paul@46 | 677 | for update_part in updates: |
paul@46 | 678 | |
paul@46 | 679 | # Encapsulate each alternative if many exist. |
paul@46 | 680 | |
paul@46 | 681 | if not single: |
paul@46 | 682 | css_class = first and "moinshare-default" or "moinshare-other" |
paul@46 | 683 | append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) |
paul@46 | 684 | |
paul@46 | 685 | # Include the content. |
paul@46 | 686 | |
paul@46 | 687 | append(formatUpdatePart(update_part, request, fmt)) |
paul@46 | 688 | |
paul@46 | 689 | if not single: |
paul@46 | 690 | append(fmt.div(on=0)) |
paul@46 | 691 | |
paul@46 | 692 | first = False |
paul@46 | 693 | |
paul@46 | 694 | return "".join(result) |
paul@46 | 695 | |
paul@46 | 696 | def formatUpdatePart(update, request, fmt): |
paul@46 | 697 | |
paul@46 | 698 | "Format the given 'update' using the given 'request' and 'fmt'." |
paul@46 | 699 | |
paul@46 | 700 | _ = request.getText |
paul@46 | 701 | |
paul@46 | 702 | result = [] |
paul@46 | 703 | append = result.append |
paul@46 | 704 | |
paul@46 | 705 | # Encapsulate the content. |
paul@46 | 706 | |
paul@46 | 707 | append(fmt.div(on=1, css_class="moinshare-content")) |
paul@46 | 708 | text = getFormattedUpdate(update, request, fmt) |
paul@46 | 709 | if text: |
paul@46 | 710 | append(text) |
paul@46 | 711 | else: |
paul@46 | 712 | append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) |
paul@46 | 713 | append(fmt.div(on=0)) |
paul@46 | 714 | |
paul@46 | 715 | return "".join(result) |
paul@46 | 716 | |
paul@31 | 717 | # Source management. |
paul@31 | 718 | |
paul@31 | 719 | def getUpdateSources(pagename, request): |
paul@31 | 720 | |
paul@31 | 721 | "Return the update sources from the given 'pagename' using the 'request'." |
paul@31 | 722 | |
paul@31 | 723 | sources = {} |
paul@31 | 724 | |
paul@31 | 725 | source_definitions = getWikiDict(pagename, request) |
paul@31 | 726 | |
paul@31 | 727 | if source_definitions: |
paul@31 | 728 | for name, value in source_definitions.items(): |
paul@31 | 729 | sources[name] = getSourceParameters(value) |
paul@31 | 730 | |
paul@31 | 731 | return sources |
paul@31 | 732 | |
paul@31 | 733 | def getSourceParameters(source_definition): |
paul@31 | 734 | |
paul@31 | 735 | "Return the parameters from the given 'source_definition' string." |
paul@31 | 736 | |
paul@43 | 737 | return parseDictEntry(source_definition, ("type", "location")) |
paul@31 | 738 | |
paul@34 | 739 | # HTML parsing support. |
paul@34 | 740 | |
paul@34 | 741 | class IncomingHTMLSanitizer(HTMLSanitizer): |
paul@34 | 742 | |
paul@34 | 743 | "An HTML parser that rewrites references to attachments." |
paul@34 | 744 | |
paul@34 | 745 | def __init__(self, out, request, page, message_number): |
paul@34 | 746 | HTMLSanitizer.__init__(self, out) |
paul@34 | 747 | self.request = request |
paul@34 | 748 | self.message_number = message_number |
paul@34 | 749 | self.page = page |
paul@34 | 750 | |
paul@34 | 751 | def rewrite_reference(self, ref): |
paul@34 | 752 | if ref.startswith("cid:"): |
paul@34 | 753 | part = ref[len("cid:"):] |
paul@34 | 754 | action_link = self.page.url(self.request, { |
paul@34 | 755 | "action" : "ReadMessage", "doit" : "1", |
paul@34 | 756 | "message" : self.message_number, "part" : part |
paul@34 | 757 | }) |
paul@34 | 758 | return action_link |
paul@34 | 759 | else: |
paul@34 | 760 | return ref |
paul@34 | 761 | |
paul@34 | 762 | def handle_starttag(self, tag, attrs): |
paul@34 | 763 | new_attrs = [] |
paul@34 | 764 | for attrname, attrvalue in attrs: |
paul@34 | 765 | if attrname in self.uri_attrs: |
paul@34 | 766 | new_attrs.append((attrname, self.rewrite_reference(attrvalue))) |
paul@34 | 767 | else: |
paul@34 | 768 | new_attrs.append((attrname, attrvalue)) |
paul@34 | 769 | HTMLSanitizer.handle_starttag(self, tag, new_attrs) |
paul@34 | 770 | |
paul@34 | 771 | class IncomingMarkup(Markup): |
paul@34 | 772 | |
paul@34 | 773 | "A special markup processor for incoming HTML." |
paul@34 | 774 | |
paul@34 | 775 | def sanitize(self, request, page, message_number): |
paul@37 | 776 | out = getwriter("utf-8")(StringIO()) |
paul@34 | 777 | sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) |
paul@34 | 778 | sanitizer.feed(self.stripentities(keepxmlentities=True)) |
paul@37 | 779 | return IncomingMarkup(unicode(out.getvalue(), "utf-8")) |
paul@34 | 780 | |
paul@34 | 781 | class IncomingHTMLParser: |
paul@34 | 782 | |
paul@34 | 783 | "Filters and rewrites incoming HTML content." |
paul@34 | 784 | |
paul@34 | 785 | def __init__(self, raw, request, **kw): |
paul@34 | 786 | self.raw = raw |
paul@34 | 787 | self.request = request |
paul@34 | 788 | self.message_number = None |
paul@34 | 789 | self.page = None |
paul@34 | 790 | |
paul@34 | 791 | def format(self, formatter, **kw): |
paul@34 | 792 | |
paul@34 | 793 | "Send the text." |
paul@34 | 794 | |
paul@34 | 795 | try: |
paul@34 | 796 | self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) |
paul@34 | 797 | except HTMLParseError, e: |
paul@34 | 798 | self.request.write(formatter.sysmsg(1) + |
paul@34 | 799 | formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, |
paul@34 | 800 | self.raw.splitlines()[e.lineno - 1].strip())) + |
paul@34 | 801 | formatter.sysmsg(0)) |
paul@34 | 802 | |
paul@34 | 803 | class MakeIncomingHTMLParser: |
paul@34 | 804 | |
paul@34 | 805 | "A class that makes parsers configured for messages." |
paul@34 | 806 | |
paul@34 | 807 | def __init__(self, page, message_number): |
paul@34 | 808 | |
paul@34 | 809 | "Initialise with state that is used to configure instantiated parsers." |
paul@34 | 810 | |
paul@34 | 811 | self.message_number = message_number |
paul@34 | 812 | self.page = page |
paul@34 | 813 | |
paul@34 | 814 | def __call__(self, *args, **kw): |
paul@34 | 815 | parser = IncomingHTMLParser(*args, **kw) |
paul@34 | 816 | parser.message_number = self.message_number |
paul@34 | 817 | parser.page = self.page |
paul@34 | 818 | return parser |
paul@34 | 819 | |
paul@34 | 820 | def get_make_parser(page, message_number): |
paul@34 | 821 | |
paul@34 | 822 | """ |
paul@34 | 823 | Return a callable that will return a parser configured for the message from |
paul@34 | 824 | the given 'page' with the given 'message_number'. |
paul@34 | 825 | """ |
paul@34 | 826 | |
paul@34 | 827 | return MakeIncomingHTMLParser(page, message_number) |
paul@34 | 828 | |
paul@0 | 829 | # vim: tabstop=4 expandtab shiftwidth=4 |