paul@0 | 1 | # -*- coding: iso-8859-1 -*- |
paul@0 | 2 | """ |
paul@0 | 3 | MoinMoin - MoinShare library |
paul@0 | 4 | |
paul@56 | 5 | @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> |
paul@34 | 6 | @copyright: 2003-2006 Edgewall Software |
paul@34 | 7 | @copyright: 2006 MoinMoin:AlexanderSchremmer |
paul@0 | 8 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@0 | 9 | """ |
paul@0 | 10 | |
paul@17 | 11 | from ContentTypeSupport import getContentPreferences |
paul@58 | 12 | from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \ |
paul@58 | 13 | getDateTimeFromISO8601, DateTime |
paul@0 | 14 | from MoinSupport import * |
paul@58 | 15 | from MoinRemoteSupport import * |
paul@51 | 16 | from ItemSupport import ItemStore |
paul@50 | 17 | from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError |
paul@54 | 18 | from MoinMessageSupport import get_homedir, get_username_for_fingerprint |
paul@37 | 19 | from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup |
paul@15 | 20 | from MoinMoin import wikiutil |
paul@33 | 21 | from email.parser import Parser |
paul@58 | 22 | from email.utils import parsedate |
paul@37 | 23 | from codecs import getwriter |
paul@58 | 24 | import xml.dom.pulldom |
paul@0 | 25 | |
paul@25 | 26 | try: |
paul@25 | 27 | from cStringIO import StringIO |
paul@25 | 28 | except ImportError: |
paul@25 | 29 | from StringIO import StringIO |
paul@25 | 30 | |
paul@16 | 31 | _getFragments = getFragments |
paul@0 | 32 | |
paul@0 | 33 | __version__ = "0.1" |
paul@0 | 34 | |
paul@58 | 35 | ATOM_NS = "http://www.w3.org/2005/Atom" |
paul@58 | 36 | |
paul@58 | 37 | # Utility functions. |
paul@58 | 38 | |
paul@58 | 39 | def text(element): |
paul@58 | 40 | nodes = [] |
paul@58 | 41 | for node in element.childNodes: |
paul@58 | 42 | if node.nodeType == node.TEXT_NODE: |
paul@58 | 43 | nodes.append(node.nodeValue) |
paul@58 | 44 | return "".join(nodes) |
paul@58 | 45 | |
paul@58 | 46 | def children(element): |
paul@58 | 47 | nodes = [] |
paul@58 | 48 | for node in element.childNodes: |
paul@58 | 49 | nodes.append(node.toxml()) |
paul@58 | 50 | return "".join(nodes) |
paul@58 | 51 | |
paul@58 | 52 | def unescape(text): |
paul@58 | 53 | return text.replace("<", "<").replace(">", ">").replace("&", "&") |
paul@58 | 54 | |
paul@58 | 55 | def linktext(element, feed_type): |
paul@58 | 56 | if feed_type == "rss": |
paul@58 | 57 | return text(element) |
paul@58 | 58 | else: |
paul@58 | 59 | return element.getAttribute("href") |
paul@58 | 60 | |
paul@58 | 61 | def need_content(show_content, tagname): |
paul@58 | 62 | return show_content in ("content", "description") and tagname in ("content", "description") |
paul@58 | 63 | |
paul@0 | 64 | # More Moin 1.9 compatibility functions. |
paul@0 | 65 | |
paul@0 | 66 | def has_member(request, groupname, username): |
paul@0 | 67 | if hasattr(request.dicts, "has_member"): |
paul@0 | 68 | return request.dicts.has_member(groupname, username) |
paul@0 | 69 | else: |
paul@0 | 70 | return username in request.dicts.get(groupname, []) |
paul@0 | 71 | |
paul@3 | 72 | # Fragments employ a "moinshare" attribute. |
paul@3 | 73 | |
paul@3 | 74 | fragment_attribute = "moinshare" |
paul@2 | 75 | |
paul@16 | 76 | def getFragments(s): |
paul@25 | 77 | |
paul@25 | 78 | "Return all fragments in 's' having the MoinShare fragment attribute." |
paul@25 | 79 | |
paul@2 | 80 | fragments = [] |
paul@16 | 81 | for format, attributes, body in _getFragments(s): |
paul@16 | 82 | if attributes.has_key(fragment_attribute): |
paul@16 | 83 | fragments.append((format, attributes, body)) |
paul@2 | 84 | return fragments |
paul@2 | 85 | |
paul@9 | 86 | def getPreferredOutputTypes(request, mimetypes): |
paul@9 | 87 | |
paul@9 | 88 | """ |
paul@9 | 89 | Using the 'request', perform content negotiation, obtaining mimetypes common |
paul@9 | 90 | to the fragment (given by 'mimetypes') and the client (found in the Accept |
paul@9 | 91 | header). |
paul@9 | 92 | """ |
paul@9 | 93 | |
paul@9 | 94 | accept = getHeader(request, "Accept", "HTTP") |
paul@12 | 95 | if accept: |
paul@12 | 96 | prefs = getContentPreferences(accept) |
paul@12 | 97 | return prefs.get_preferred_types(mimetypes) |
paul@12 | 98 | else: |
paul@12 | 99 | return mimetypes |
paul@9 | 100 | |
paul@9 | 101 | def getUpdatedTime(metadata): |
paul@9 | 102 | |
paul@9 | 103 | """ |
paul@9 | 104 | Return the last updated time based on the given 'metadata', using the |
paul@9 | 105 | current time if no explicit last modified time is specified. |
paul@9 | 106 | """ |
paul@9 | 107 | |
paul@9 | 108 | # NOTE: We could attempt to get the last edit time of a fragment. |
paul@9 | 109 | |
paul@9 | 110 | latest_timestamp = metadata.get("last-modified") |
paul@9 | 111 | if latest_timestamp: |
paul@33 | 112 | return latest_timestamp |
paul@9 | 113 | else: |
paul@33 | 114 | return getCurrentTime() |
paul@9 | 115 | |
paul@30 | 116 | # Entry/update classes. |
paul@30 | 117 | |
paul@30 | 118 | class Update: |
paul@30 | 119 | |
paul@30 | 120 | "A feed update entry." |
paul@30 | 121 | |
paul@30 | 122 | def __init__(self): |
paul@30 | 123 | self.title = None |
paul@30 | 124 | self.link = None |
paul@30 | 125 | self.content = None |
paul@30 | 126 | self.content_type = None |
paul@30 | 127 | self.updated = None |
paul@54 | 128 | self.author = None |
paul@30 | 129 | |
paul@30 | 130 | # Page-related attributes. |
paul@30 | 131 | |
paul@30 | 132 | self.fragment = None |
paul@30 | 133 | self.preferred = None |
paul@30 | 134 | |
paul@33 | 135 | # Message-related attributes. |
paul@33 | 136 | |
paul@34 | 137 | self.message_number = None |
paul@33 | 138 | self.parts = None |
paul@33 | 139 | |
paul@34 | 140 | # Message- and page-related attributes. |
paul@34 | 141 | |
paul@34 | 142 | self.page = None |
paul@34 | 143 | |
paul@40 | 144 | # Identification. |
paul@40 | 145 | |
paul@40 | 146 | self.path = [] |
paul@40 | 147 | |
paul@40 | 148 | def unique_id(self): |
paul@40 | 149 | return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) |
paul@40 | 150 | |
paul@30 | 151 | def __cmp__(self, other): |
paul@30 | 152 | if self.updated is None and other.updated is not None: |
paul@30 | 153 | return 1 |
paul@30 | 154 | elif self.updated is not None and other.updated is None: |
paul@30 | 155 | return -1 |
paul@30 | 156 | else: |
paul@30 | 157 | return cmp(self.updated, other.updated) |
paul@30 | 158 | |
paul@40 | 159 | def copy(self, part_number=None): |
paul@40 | 160 | update = Update() |
paul@40 | 161 | update.title = self.title |
paul@40 | 162 | update.link = self.link |
paul@40 | 163 | update.updated = self.updated |
paul@54 | 164 | update.author = self.author |
paul@40 | 165 | update.fragment = self.fragment |
paul@40 | 166 | update.preferred = self.preferred |
paul@40 | 167 | update.message_number = self.message_number |
paul@40 | 168 | update.page = self.page |
paul@40 | 169 | update.path = self.path[:] |
paul@40 | 170 | if part_number is not None: |
paul@40 | 171 | update.path.append(part_number) |
paul@40 | 172 | return update |
paul@40 | 173 | |
paul@58 | 174 | # Error classes. |
paul@58 | 175 | |
paul@58 | 176 | class FeedError(Exception): |
paul@58 | 177 | pass |
paul@58 | 178 | |
paul@58 | 179 | class FeedMissingError(FeedError): |
paul@58 | 180 | pass |
paul@58 | 181 | |
paul@58 | 182 | class FeedContentTypeError(FeedError): |
paul@58 | 183 | pass |
paul@58 | 184 | |
paul@60 | 185 | # Update retrieval from URLs. |
paul@58 | 186 | |
paul@58 | 187 | def getUpdates(request, feed_url, max_entries, show_content): |
paul@58 | 188 | |
paul@58 | 189 | """ |
paul@58 | 190 | Using the given 'request', retrieve from 'feed_url' up to the given number |
paul@58 | 191 | 'max_entries' of update entries. The 'show_content' parameter can indicate |
paul@58 | 192 | that a "summary" is to be obtained for each update, that the "content" of |
paul@58 | 193 | each update is to be obtained (falling back to a summary if no content is |
paul@58 | 194 | provided), or no content (indicated by a false value) is to be obtained. |
paul@58 | 195 | |
paul@58 | 196 | A tuple of the form ((feed_type, channel_title, channel_link), updates) is |
paul@58 | 197 | returned. |
paul@58 | 198 | """ |
paul@58 | 199 | |
paul@60 | 200 | # Prevent local file access. |
paul@60 | 201 | |
paul@60 | 202 | if feed_url.startswith("file:"): |
paul@60 | 203 | raise FeedMissingError |
paul@58 | 204 | |
paul@62 | 205 | elif feed_url.startswith("imap"): |
paul@62 | 206 | reader = imapreader |
paul@62 | 207 | |
paul@62 | 208 | else: |
paul@62 | 209 | reader = None |
paul@62 | 210 | |
paul@58 | 211 | # Obtain the resource, using a cached version if appropriate. |
paul@58 | 212 | |
paul@58 | 213 | max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) |
paul@62 | 214 | data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader) |
paul@58 | 215 | if not data: |
paul@58 | 216 | raise FeedMissingError |
paul@58 | 217 | |
paul@58 | 218 | # Interpret the cached feed. |
paul@58 | 219 | |
paul@60 | 220 | f = StringIO(data) |
paul@60 | 221 | try: |
paul@60 | 222 | _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f) |
paul@60 | 223 | |
paul@60 | 224 | if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"): |
paul@60 | 225 | return getUpdatesFromFeed(f, max_entries, show_content) |
paul@62 | 226 | |
paul@62 | 227 | elif content_type == "multipart/mixed": |
paul@62 | 228 | return getUpdatesFromMailbox(f, max_entries, show_content, request) |
paul@62 | 229 | |
paul@60 | 230 | else: |
paul@60 | 231 | raise FeedContentTypeError |
paul@60 | 232 | |
paul@60 | 233 | finally: |
paul@60 | 234 | f.close() |
paul@60 | 235 | |
paul@60 | 236 | # Update retrieval from feeds. |
paul@58 | 237 | |
paul@60 | 238 | def getUpdatesFromFeed(feed, max_entries, show_content): |
paul@60 | 239 | |
paul@60 | 240 | """ |
paul@60 | 241 | Retrieve from 'feed' up to the given number 'max_entries' of update entries. |
paul@60 | 242 | The 'show_content' parameter can indicate that a "summary" is to be obtained |
paul@60 | 243 | for each update, that the "content" of each update is to be obtained |
paul@60 | 244 | (falling back to a summary if no content is provided), or no content |
paul@60 | 245 | (indicated by a false value) is to be obtained. |
paul@60 | 246 | |
paul@60 | 247 | A tuple of the form ((feed_type, channel_title, channel_link), updates) is |
paul@60 | 248 | returned. |
paul@60 | 249 | """ |
paul@60 | 250 | |
paul@60 | 251 | feed_updates = [] |
paul@58 | 252 | |
paul@62 | 253 | # Parse each node from the feed. |
paul@58 | 254 | |
paul@62 | 255 | channel_title = channel_link = None |
paul@58 | 256 | |
paul@62 | 257 | feed_type = None |
paul@62 | 258 | update = None |
paul@62 | 259 | in_source = False |
paul@58 | 260 | |
paul@62 | 261 | events = xml.dom.pulldom.parse(feed) |
paul@58 | 262 | |
paul@62 | 263 | for event, value in events: |
paul@58 | 264 | |
paul@62 | 265 | if not in_source and event == xml.dom.pulldom.START_ELEMENT: |
paul@62 | 266 | tagname = value.localName |
paul@58 | 267 | |
paul@62 | 268 | # Detect the feed type and items. |
paul@58 | 269 | |
paul@62 | 270 | if tagname == "feed" and value.namespaceURI == ATOM_NS: |
paul@62 | 271 | feed_type = "atom" |
paul@58 | 272 | |
paul@62 | 273 | elif tagname == "rss": |
paul@62 | 274 | feed_type = "rss" |
paul@58 | 275 | |
paul@62 | 276 | # Detect items. |
paul@58 | 277 | |
paul@62 | 278 | elif feed_type == "rss" and tagname == "item" or \ |
paul@62 | 279 | feed_type == "atom" and tagname == "entry": |
paul@58 | 280 | |
paul@62 | 281 | update = Update() |
paul@58 | 282 | |
paul@62 | 283 | # Detect source declarations. |
paul@58 | 284 | |
paul@62 | 285 | elif feed_type == "atom" and tagname == "source": |
paul@62 | 286 | in_source = True |
paul@58 | 287 | |
paul@62 | 288 | # Handle item elements. |
paul@58 | 289 | |
paul@62 | 290 | elif tagname == "title": |
paul@62 | 291 | events.expandNode(value) |
paul@62 | 292 | if update: |
paul@62 | 293 | update.title = text(value) |
paul@62 | 294 | else: |
paul@62 | 295 | channel_title = text(value) |
paul@58 | 296 | |
paul@62 | 297 | elif tagname == "link": |
paul@62 | 298 | events.expandNode(value) |
paul@62 | 299 | if update: |
paul@62 | 300 | update.link = linktext(value, feed_type) |
paul@62 | 301 | else: |
paul@62 | 302 | channel_link = linktext(value, feed_type) |
paul@58 | 303 | |
paul@62 | 304 | elif show_content and ( |
paul@62 | 305 | feed_type == "atom" and tagname in ("content", "summary") or |
paul@62 | 306 | feed_type == "rss" and tagname == "description"): |
paul@58 | 307 | |
paul@62 | 308 | events.expandNode(value) |
paul@58 | 309 | |
paul@62 | 310 | # Obtain content where requested or, failing that, a |
paul@62 | 311 | # summary. |
paul@58 | 312 | |
paul@62 | 313 | if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): |
paul@62 | 314 | if feed_type == "atom": |
paul@62 | 315 | update.content_type = value.getAttribute("type") or "text" |
paul@62 | 316 | |
paul@62 | 317 | # Normalise the content types and extract the |
paul@62 | 318 | # content. |
paul@58 | 319 | |
paul@62 | 320 | if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): |
paul@62 | 321 | update.content = children(value) |
paul@62 | 322 | update.content_type = "application/xhtml+xml" |
paul@62 | 323 | elif update.content_type in ("html", "text/html"): |
paul@62 | 324 | update.content = text(value) |
paul@62 | 325 | update.content_type = "text/html" |
paul@58 | 326 | else: |
paul@58 | 327 | update.content = text(value) |
paul@62 | 328 | update.content_type = "text/plain" |
paul@62 | 329 | else: |
paul@62 | 330 | update.content_type = "text/html" |
paul@62 | 331 | update.content = text(value) |
paul@58 | 332 | |
paul@62 | 333 | elif feed_type == "atom" and tagname == "updated" or \ |
paul@62 | 334 | feed_type == "rss" and tagname == "pubDate": |
paul@62 | 335 | |
paul@62 | 336 | events.expandNode(value) |
paul@58 | 337 | |
paul@62 | 338 | if update: |
paul@62 | 339 | if feed_type == "atom": |
paul@62 | 340 | value = getDateTimeFromISO8601(text(value)) |
paul@62 | 341 | else: |
paul@62 | 342 | value = DateTime(parsedate(text(value))) |
paul@62 | 343 | update.updated = value |
paul@58 | 344 | |
paul@62 | 345 | elif event == xml.dom.pulldom.END_ELEMENT: |
paul@62 | 346 | tagname = value.localName |
paul@58 | 347 | |
paul@62 | 348 | if feed_type == "rss" and tagname == "item" or \ |
paul@62 | 349 | feed_type == "atom" and tagname == "entry": |
paul@62 | 350 | |
paul@62 | 351 | feed_updates.append(update) |
paul@58 | 352 | |
paul@62 | 353 | update = None |
paul@58 | 354 | |
paul@62 | 355 | elif feed_type == "atom" and tagname == "source": |
paul@62 | 356 | in_source = False |
paul@58 | 357 | |
paul@58 | 358 | return (feed_type, channel_title, channel_link), feed_updates |
paul@58 | 359 | |
paul@62 | 360 | # Update retrieval from mailboxes and multipart messages. |
paul@62 | 361 | |
paul@62 | 362 | def getUpdatesFromMailbox(feed, max_entries, show_content, request): |
paul@62 | 363 | |
paul@62 | 364 | """ |
paul@62 | 365 | Retrieve from 'feed' up to the given number 'max_entries' of update entries. |
paul@62 | 366 | The 'show_content' parameter can indicate that a "summary" is to be obtained |
paul@62 | 367 | for each update, that the "content" of each update is to be obtained |
paul@62 | 368 | (falling back to a summary if no content is provided), or no content |
paul@62 | 369 | (indicated by a false value) is to be obtained. |
paul@62 | 370 | |
paul@62 | 371 | A tuple of the form ((feed_type, channel_title, channel_link), updates) is |
paul@62 | 372 | returned. |
paul@62 | 373 | """ |
paul@62 | 374 | |
paul@62 | 375 | mailbox = Parser().parse(feed) |
paul@62 | 376 | |
paul@62 | 377 | feed_updates = [] |
paul@62 | 378 | |
paul@62 | 379 | # Parse each message from the feed as a separate update. |
paul@62 | 380 | |
paul@62 | 381 | for message_number, part in enumerate(mailbox.get_payload()): |
paul@62 | 382 | update = Update() |
paul@62 | 383 | update.fragment = update.updated = getDateTimeFromRFC2822(part.get("date")) |
paul@62 | 384 | update.title = part.get("subject", "Update #%d" % message_number) |
paul@62 | 385 | update.message_number = message_number |
paul@62 | 386 | |
paul@62 | 387 | update.content, update.content_type, update.parts, actual_author = \ |
paul@62 | 388 | getUpdateContentFromPart(part, request) |
paul@62 | 389 | |
paul@62 | 390 | if actual_author: |
paul@62 | 391 | update.author = actual_author |
paul@62 | 392 | |
paul@62 | 393 | feed_updates.append(update) |
paul@62 | 394 | |
paul@62 | 395 | return ("mbox", None, None), feed_updates |
paul@62 | 396 | |
paul@30 | 397 | # Update retrieval from pages. |
paul@30 | 398 | |
paul@30 | 399 | def getUpdatesFromPage(page, request): |
paul@25 | 400 | |
paul@25 | 401 | """ |
paul@30 | 402 | Get updates from the given 'page' using the 'request'. A list of update |
paul@30 | 403 | objects is returned. |
paul@25 | 404 | """ |
paul@25 | 405 | |
paul@25 | 406 | updates = [] |
paul@25 | 407 | |
paul@25 | 408 | # NOTE: Use the updated datetime from the page for updates. |
paul@25 | 409 | # NOTE: The published and updated details would need to be deduced from |
paul@25 | 410 | # NOTE: the page history instead of being taken from the page as a whole. |
paul@25 | 411 | |
paul@25 | 412 | metadata = getMetadata(page) |
paul@25 | 413 | updated = getUpdatedTime(metadata) |
paul@25 | 414 | |
paul@25 | 415 | # Get the fragment regions for the page. |
paul@25 | 416 | |
paul@25 | 417 | for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): |
paul@25 | 418 | |
paul@33 | 419 | update = Update() |
paul@33 | 420 | |
paul@25 | 421 | # Produce a fragment identifier. |
paul@25 | 422 | # NOTE: Choose a more robust identifier where none is explicitly given. |
paul@25 | 423 | |
paul@30 | 424 | update.fragment = attributes.get("fragment", str(n)) |
paul@30 | 425 | update.title = attributes.get("summary", "Update #%d" % n) |
paul@25 | 426 | |
paul@25 | 427 | # Get the preferred content types available for the fragment. |
paul@25 | 428 | |
paul@30 | 429 | update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) |
paul@25 | 430 | |
paul@25 | 431 | # Try and obtain some suitable content for the entry. |
paul@25 | 432 | # NOTE: Could potentially get a summary for the fragment. |
paul@25 | 433 | |
paul@30 | 434 | update.content = None |
paul@25 | 435 | |
paul@30 | 436 | if "text/html" in update.preferred: |
paul@25 | 437 | parser_cls = getParserClass(request, format) |
paul@25 | 438 | |
paul@25 | 439 | if format == "html": |
paul@30 | 440 | update.content = body |
paul@39 | 441 | elif hasattr(parser_cls, "formatForOutputType"): |
paul@39 | 442 | update.content = formatTextForOutputType(body, request, parser_cls, "text/html") |
paul@25 | 443 | else: |
paul@25 | 444 | fmt = request.html_formatter |
paul@25 | 445 | fmt.setPage(page) |
paul@30 | 446 | update.content = formatText(body, request, fmt, parser_cls) |
paul@30 | 447 | |
paul@32 | 448 | update.content_type = "text/html" |
paul@25 | 449 | |
paul@34 | 450 | update.page = page |
paul@37 | 451 | |
paul@37 | 452 | # NOTE: The anchor would be supported in the page, but this requires |
paul@37 | 453 | # NOTE: formatter modifications for the regions providing updates. |
paul@37 | 454 | |
paul@37 | 455 | update.link = page.url(request, anchor=update.fragment) |
paul@30 | 456 | update.updated = updated |
paul@30 | 457 | |
paul@30 | 458 | updates.append(update) |
paul@25 | 459 | |
paul@25 | 460 | return updates |
paul@25 | 461 | |
paul@33 | 462 | # Update retrieval from message stores. |
paul@33 | 463 | |
paul@33 | 464 | def getUpdatesFromStore(page, request): |
paul@33 | 465 | |
paul@33 | 466 | """ |
paul@33 | 467 | Get updates from the message store associated with the given 'page' using |
paul@33 | 468 | the 'request'. A list of update objects is returned. |
paul@33 | 469 | """ |
paul@33 | 470 | |
paul@33 | 471 | updates = [] |
paul@33 | 472 | |
paul@33 | 473 | metadata = getMetadata(page) |
paul@33 | 474 | updated = getUpdatedTime(metadata) |
paul@33 | 475 | |
paul@33 | 476 | store = ItemStore(page, "messages", "message-locks") |
paul@33 | 477 | |
paul@57 | 478 | keys = store.keys() |
paul@57 | 479 | keys.sort() |
paul@57 | 480 | |
paul@57 | 481 | for key in keys: |
paul@57 | 482 | message_text = store[key] |
paul@57 | 483 | update = getUpdateFromMessageText(message_text, key, request) |
paul@34 | 484 | update.page = page |
paul@33 | 485 | updates.append(update) |
paul@33 | 486 | |
paul@33 | 487 | return updates |
paul@33 | 488 | |
paul@47 | 489 | def getUpdateFromMessageText(message_text, message_number, request): |
paul@46 | 490 | |
paul@46 | 491 | "Return an update for the given 'message_text' and 'message_number'." |
paul@46 | 492 | |
paul@46 | 493 | update = Update() |
paul@56 | 494 | message = Parser().parsestr(message_text) |
paul@46 | 495 | |
paul@46 | 496 | # Produce a fragment identifier. |
paul@46 | 497 | |
paul@46 | 498 | update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date")) |
paul@46 | 499 | update.title = message.get("subject", "Update #%d" % message_number) |
paul@54 | 500 | update.author = message.get("moin-user") |
paul@46 | 501 | |
paul@46 | 502 | update.message_number = message_number |
paul@46 | 503 | |
paul@54 | 504 | update.content, update.content_type, update.parts, actual_author = \ |
paul@54 | 505 | getUpdateContentFromPart(message, request) |
paul@54 | 506 | |
paul@54 | 507 | if actual_author: |
paul@54 | 508 | update.author = actual_author |
paul@54 | 509 | |
paul@46 | 510 | return update |
paul@46 | 511 | |
paul@47 | 512 | def getUpdateContentFromPart(part, request): |
paul@40 | 513 | |
paul@40 | 514 | """ |
paul@54 | 515 | Return decoded content, the content type, any subparts, and any author |
paul@54 | 516 | identity in a tuple for a given 'part'. |
paul@40 | 517 | """ |
paul@40 | 518 | |
paul@40 | 519 | # Determine whether the part has several representations. |
paul@40 | 520 | |
paul@40 | 521 | # For a single part, use it as the update content. |
paul@40 | 522 | |
paul@40 | 523 | if not part.is_multipart(): |
paul@40 | 524 | content, content_type = getPartContent(part) |
paul@54 | 525 | return content, content_type, None, None |
paul@40 | 526 | |
paul@40 | 527 | # For a collection of related parts, use the first as the update content |
paul@40 | 528 | # and assume that the formatter will reference the other parts. |
paul@40 | 529 | |
paul@40 | 530 | elif part.get_content_subtype() == "related": |
paul@40 | 531 | main_part = part.get_payload()[0] |
paul@40 | 532 | content, content_type = getPartContent(main_part) |
paul@54 | 533 | return content, content_type, [main_part], None |
paul@40 | 534 | |
paul@46 | 535 | # Encrypted content cannot be meaningfully separated. |
paul@46 | 536 | |
paul@46 | 537 | elif part.get_content_subtype() == "encrypted": |
paul@50 | 538 | try: |
paul@54 | 539 | part, author = getDecryptedParts(part, request) |
paul@54 | 540 | content, content_type, parts, _author = getUpdateContentFromPart(part, request) |
paul@54 | 541 | return content, content_type, parts, author |
paul@50 | 542 | except MoinMessageError: |
paul@54 | 543 | return None, part.get_content_type(), part.get_payload(), None |
paul@46 | 544 | |
paul@40 | 545 | # Otherwise, just obtain the parts for separate display. |
paul@40 | 546 | |
paul@40 | 547 | else: |
paul@54 | 548 | return None, part.get_content_type(), part.get_payload(), None |
paul@40 | 549 | |
paul@47 | 550 | def getDecryptedParts(part, request): |
paul@47 | 551 | |
paul@47 | 552 | "Decrypt the given 'part', returning the decoded content." |
paul@47 | 553 | |
paul@47 | 554 | homedir = get_homedir(request) |
paul@47 | 555 | gpg = GPG(homedir) |
paul@47 | 556 | |
paul@47 | 557 | # Decrypt the part. |
paul@47 | 558 | |
paul@47 | 559 | if is_encrypted(part): |
paul@47 | 560 | text = gpg.decryptMessage(part) |
paul@56 | 561 | part = Parser().parsestr(text) |
paul@47 | 562 | |
paul@47 | 563 | # Extract any signature details. |
paul@47 | 564 | |
paul@47 | 565 | if is_signed(part): |
paul@47 | 566 | result = gpg.verifyMessage(part) |
paul@47 | 567 | if result: |
paul@47 | 568 | fingerprint, identity, content = result |
paul@54 | 569 | return content, get_username_for_fingerprint(request, fingerprint) |
paul@47 | 570 | |
paul@54 | 571 | return part, None |
paul@47 | 572 | |
paul@40 | 573 | def getPartContent(part): |
paul@40 | 574 | |
paul@40 | 575 | "Decode the 'part', returning the decoded payload and the content type." |
paul@40 | 576 | |
paul@40 | 577 | charset = part.get_content_charset() |
paul@40 | 578 | payload = part.get_payload(decode=True) |
paul@40 | 579 | return (charset and unicode(payload, charset) or payload), part.get_content_type() |
paul@40 | 580 | |
paul@47 | 581 | def getUpdateFromPart(parent, part, part_number, request): |
paul@40 | 582 | |
paul@40 | 583 | "Using the 'parent' update, return an update object for the given 'part'." |
paul@40 | 584 | |
paul@40 | 585 | update = parent.copy(part_number) |
paul@55 | 586 | update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request) |
paul@40 | 587 | return update |
paul@40 | 588 | |
paul@47 | 589 | def getUpdatesForFormatting(update, request): |
paul@46 | 590 | |
paul@46 | 591 | "Get a list of updates for formatting given 'update'." |
paul@46 | 592 | |
paul@46 | 593 | updates = [] |
paul@46 | 594 | |
paul@46 | 595 | # Handle multipart/alternative and other non-related multiparts. |
paul@46 | 596 | |
paul@46 | 597 | if update.parts: |
paul@46 | 598 | for n, part in enumerate(update.parts): |
paul@47 | 599 | update_part = getUpdateFromPart(update, part, n, request) |
paul@47 | 600 | updates += getUpdatesForFormatting(update_part, request) |
paul@46 | 601 | else: |
paul@46 | 602 | updates.append(update) |
paul@46 | 603 | |
paul@46 | 604 | return updates |
paul@46 | 605 | |
paul@46 | 606 | # Update formatting. |
paul@46 | 607 | |
paul@46 | 608 | def getFormattedUpdate(update, request, fmt): |
paul@46 | 609 | |
paul@46 | 610 | """ |
paul@46 | 611 | Return the formatted form of the given 'update' using the given 'request' |
paul@46 | 612 | and 'fmt'. |
paul@46 | 613 | """ |
paul@46 | 614 | |
paul@46 | 615 | # NOTE: Some control over the HTML and XHTML should be exercised. |
paul@46 | 616 | |
paul@46 | 617 | if update.content: |
paul@46 | 618 | if update.content_type == "text/html" and update.message_number is not None: |
paul@46 | 619 | parsers = [get_make_parser(update.page, update.message_number)] |
paul@46 | 620 | else: |
paul@46 | 621 | parsers = getParsersForContentType(request.cfg, update.content_type) |
paul@46 | 622 | |
paul@46 | 623 | if parsers: |
paul@46 | 624 | for parser_cls in parsers: |
paul@46 | 625 | if hasattr(parser_cls, "formatForOutputType"): |
paul@46 | 626 | return formatTextForOutputType(update.content, request, parser_cls, "text/html") |
paul@46 | 627 | else: |
paul@46 | 628 | return formatText(update.content, request, fmt, parser_cls=parser_cls) |
paul@46 | 629 | break |
paul@46 | 630 | else: |
paul@46 | 631 | return None |
paul@46 | 632 | else: |
paul@46 | 633 | return None |
paul@46 | 634 | |
paul@46 | 635 | def formatUpdate(update, request, fmt): |
paul@46 | 636 | |
paul@46 | 637 | "Format the given 'update' using the given 'request' and 'fmt'." |
paul@46 | 638 | |
paul@46 | 639 | result = [] |
paul@46 | 640 | append = result.append |
paul@46 | 641 | |
paul@47 | 642 | updates = getUpdatesForFormatting(update, request) |
paul@46 | 643 | single = len(updates) == 1 |
paul@46 | 644 | |
paul@46 | 645 | # Format some navigation tabs. |
paul@46 | 646 | |
paul@46 | 647 | if not single: |
paul@46 | 648 | append(fmt.div(on=1, css_class="moinshare-alternatives")) |
paul@46 | 649 | |
paul@46 | 650 | first = True |
paul@46 | 651 | |
paul@46 | 652 | for update_part in updates: |
paul@46 | 653 | append(fmt.url(1, "#%s" % update_part.unique_id())) |
paul@46 | 654 | append(fmt.text(update_part.content_type)) |
paul@46 | 655 | append(fmt.url(0)) |
paul@46 | 656 | |
paul@46 | 657 | first = False |
paul@46 | 658 | |
paul@46 | 659 | append(fmt.div(on=0)) |
paul@46 | 660 | |
paul@46 | 661 | # Format the content. |
paul@46 | 662 | |
paul@46 | 663 | first = True |
paul@46 | 664 | |
paul@46 | 665 | for update_part in updates: |
paul@46 | 666 | |
paul@46 | 667 | # Encapsulate each alternative if many exist. |
paul@46 | 668 | |
paul@46 | 669 | if not single: |
paul@46 | 670 | css_class = first and "moinshare-default" or "moinshare-other" |
paul@46 | 671 | append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) |
paul@46 | 672 | |
paul@46 | 673 | # Include the content. |
paul@46 | 674 | |
paul@46 | 675 | append(formatUpdatePart(update_part, request, fmt)) |
paul@46 | 676 | |
paul@46 | 677 | if not single: |
paul@46 | 678 | append(fmt.div(on=0)) |
paul@46 | 679 | |
paul@46 | 680 | first = False |
paul@46 | 681 | |
paul@46 | 682 | return "".join(result) |
paul@46 | 683 | |
paul@46 | 684 | def formatUpdatePart(update, request, fmt): |
paul@46 | 685 | |
paul@46 | 686 | "Format the given 'update' using the given 'request' and 'fmt'." |
paul@46 | 687 | |
paul@46 | 688 | _ = request.getText |
paul@46 | 689 | |
paul@46 | 690 | result = [] |
paul@46 | 691 | append = result.append |
paul@46 | 692 | |
paul@46 | 693 | # Encapsulate the content. |
paul@46 | 694 | |
paul@46 | 695 | append(fmt.div(on=1, css_class="moinshare-content")) |
paul@46 | 696 | text = getFormattedUpdate(update, request, fmt) |
paul@46 | 697 | if text: |
paul@46 | 698 | append(text) |
paul@46 | 699 | else: |
paul@46 | 700 | append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) |
paul@46 | 701 | append(fmt.div(on=0)) |
paul@46 | 702 | |
paul@46 | 703 | return "".join(result) |
paul@46 | 704 | |
paul@31 | 705 | # Source management. |
paul@31 | 706 | |
paul@31 | 707 | def getUpdateSources(pagename, request): |
paul@31 | 708 | |
paul@31 | 709 | "Return the update sources from the given 'pagename' using the 'request'." |
paul@31 | 710 | |
paul@31 | 711 | sources = {} |
paul@31 | 712 | |
paul@31 | 713 | source_definitions = getWikiDict(pagename, request) |
paul@31 | 714 | |
paul@31 | 715 | if source_definitions: |
paul@31 | 716 | for name, value in source_definitions.items(): |
paul@31 | 717 | sources[name] = getSourceParameters(value) |
paul@31 | 718 | |
paul@31 | 719 | return sources |
paul@31 | 720 | |
paul@31 | 721 | def getSourceParameters(source_definition): |
paul@31 | 722 | |
paul@31 | 723 | "Return the parameters from the given 'source_definition' string." |
paul@31 | 724 | |
paul@43 | 725 | return parseDictEntry(source_definition, ("type", "location")) |
paul@31 | 726 | |
paul@34 | 727 | # HTML parsing support. |
paul@34 | 728 | |
paul@34 | 729 | class IncomingHTMLSanitizer(HTMLSanitizer): |
paul@34 | 730 | |
paul@34 | 731 | "An HTML parser that rewrites references to attachments." |
paul@34 | 732 | |
paul@34 | 733 | def __init__(self, out, request, page, message_number): |
paul@34 | 734 | HTMLSanitizer.__init__(self, out) |
paul@34 | 735 | self.request = request |
paul@34 | 736 | self.message_number = message_number |
paul@34 | 737 | self.page = page |
paul@34 | 738 | |
paul@34 | 739 | def rewrite_reference(self, ref): |
paul@34 | 740 | if ref.startswith("cid:"): |
paul@34 | 741 | part = ref[len("cid:"):] |
paul@34 | 742 | action_link = self.page.url(self.request, { |
paul@34 | 743 | "action" : "ReadMessage", "doit" : "1", |
paul@34 | 744 | "message" : self.message_number, "part" : part |
paul@34 | 745 | }) |
paul@34 | 746 | return action_link |
paul@34 | 747 | else: |
paul@34 | 748 | return ref |
paul@34 | 749 | |
paul@34 | 750 | def handle_starttag(self, tag, attrs): |
paul@34 | 751 | new_attrs = [] |
paul@34 | 752 | for attrname, attrvalue in attrs: |
paul@34 | 753 | if attrname in self.uri_attrs: |
paul@34 | 754 | new_attrs.append((attrname, self.rewrite_reference(attrvalue))) |
paul@34 | 755 | else: |
paul@34 | 756 | new_attrs.append((attrname, attrvalue)) |
paul@34 | 757 | HTMLSanitizer.handle_starttag(self, tag, new_attrs) |
paul@34 | 758 | |
paul@34 | 759 | class IncomingMarkup(Markup): |
paul@34 | 760 | |
paul@34 | 761 | "A special markup processor for incoming HTML." |
paul@34 | 762 | |
paul@34 | 763 | def sanitize(self, request, page, message_number): |
paul@37 | 764 | out = getwriter("utf-8")(StringIO()) |
paul@34 | 765 | sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) |
paul@34 | 766 | sanitizer.feed(self.stripentities(keepxmlentities=True)) |
paul@37 | 767 | return IncomingMarkup(unicode(out.getvalue(), "utf-8")) |
paul@34 | 768 | |
paul@34 | 769 | class IncomingHTMLParser: |
paul@34 | 770 | |
paul@34 | 771 | "Filters and rewrites incoming HTML content." |
paul@34 | 772 | |
paul@34 | 773 | def __init__(self, raw, request, **kw): |
paul@34 | 774 | self.raw = raw |
paul@34 | 775 | self.request = request |
paul@34 | 776 | self.message_number = None |
paul@34 | 777 | self.page = None |
paul@34 | 778 | |
paul@34 | 779 | def format(self, formatter, **kw): |
paul@34 | 780 | |
paul@34 | 781 | "Send the text." |
paul@34 | 782 | |
paul@34 | 783 | try: |
paul@34 | 784 | self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) |
paul@34 | 785 | except HTMLParseError, e: |
paul@34 | 786 | self.request.write(formatter.sysmsg(1) + |
paul@34 | 787 | formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, |
paul@34 | 788 | self.raw.splitlines()[e.lineno - 1].strip())) + |
paul@34 | 789 | formatter.sysmsg(0)) |
paul@34 | 790 | |
paul@34 | 791 | class MakeIncomingHTMLParser: |
paul@34 | 792 | |
paul@34 | 793 | "A class that makes parsers configured for messages." |
paul@34 | 794 | |
paul@34 | 795 | def __init__(self, page, message_number): |
paul@34 | 796 | |
paul@34 | 797 | "Initialise with state that is used to configure instantiated parsers." |
paul@34 | 798 | |
paul@34 | 799 | self.message_number = message_number |
paul@34 | 800 | self.page = page |
paul@34 | 801 | |
paul@34 | 802 | def __call__(self, *args, **kw): |
paul@34 | 803 | parser = IncomingHTMLParser(*args, **kw) |
paul@34 | 804 | parser.message_number = self.message_number |
paul@34 | 805 | parser.page = self.page |
paul@34 | 806 | return parser |
paul@34 | 807 | |
paul@34 | 808 | def get_make_parser(page, message_number): |
paul@34 | 809 | |
paul@34 | 810 | """ |
paul@34 | 811 | Return a callable that will return a parser configured for the message from |
paul@34 | 812 | the given 'page' with the given 'message_number'. |
paul@34 | 813 | """ |
paul@34 | 814 | |
paul@34 | 815 | return MakeIncomingHTMLParser(page, message_number) |
paul@34 | 816 | |
paul@0 | 817 | # vim: tabstop=4 expandtab shiftwidth=4 |