1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - MoinShare library 4 5 @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk> 6 @copyright: 2003-2006 Edgewall Software 7 @copyright: 2006 MoinMoin:AlexanderSchremmer 8 @license: GNU GPL (v2 or later), see COPYING.txt for details. 9 """ 10 11 from ContentTypeSupport import getContentPreferences 12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \ 13 getDateTimeFromISO8601, DateTime 14 from MoinSupport import * 15 from MoinRemoteSupport import * 16 from ItemSupport import ItemStore 17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError 18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint 19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup 20 from MoinMoin import wikiutil 21 from email.parser import Parser 22 from email.utils import parsedate 23 from codecs import getwriter 24 import xml.dom.pulldom 25 26 try: 27 from cStringIO import StringIO 28 except ImportError: 29 from StringIO import StringIO 30 31 _getFragments = getFragments 32 33 __version__ = "0.1" 34 35 ATOM_NS = "http://www.w3.org/2005/Atom" 36 37 # Utility functions. 38 39 def text(element): 40 nodes = [] 41 for node in element.childNodes: 42 if node.nodeType == node.TEXT_NODE: 43 nodes.append(node.nodeValue) 44 return "".join(nodes) 45 46 def children(element): 47 nodes = [] 48 for node in element.childNodes: 49 nodes.append(node.toxml()) 50 return "".join(nodes) 51 52 def unescape(text): 53 return text.replace("<", "<").replace(">", ">").replace("&", "&") 54 55 def linktext(element, feed_type): 56 if feed_type == "rss": 57 return text(element) 58 else: 59 return element.getAttribute("href") 60 61 def need_content(show_content, tagname): 62 return show_content in ("content", "description") and tagname in ("content", "description") 63 64 # More Moin 1.9 compatibility functions. 65 66 def has_member(request, groupname, username): 67 if hasattr(request.dicts, "has_member"): 68 return request.dicts.has_member(groupname, username) 69 else: 70 return username in request.dicts.get(groupname, []) 71 72 # Fragments employ a "moinshare" attribute. 73 74 fragment_attribute = "moinshare" 75 76 def getFragments(s): 77 78 "Return all fragments in 's' having the MoinShare fragment attribute." 79 80 fragments = [] 81 for format, attributes, body in _getFragments(s): 82 if attributes.has_key(fragment_attribute): 83 fragments.append((format, attributes, body)) 84 return fragments 85 86 def getPreferredOutputTypes(request, mimetypes): 87 88 """ 89 Using the 'request', perform content negotiation, obtaining mimetypes common 90 to the fragment (given by 'mimetypes') and the client (found in the Accept 91 header). 92 """ 93 94 accept = getHeader(request, "Accept", "HTTP") 95 if accept: 96 prefs = getContentPreferences(accept) 97 return prefs.get_preferred_types(mimetypes) 98 else: 99 return mimetypes 100 101 def getUpdatedTime(metadata): 102 103 """ 104 Return the last updated time based on the given 'metadata', using the 105 current time if no explicit last modified time is specified. 106 """ 107 108 # NOTE: We could attempt to get the last edit time of a fragment. 109 110 latest_timestamp = metadata.get("last-modified") 111 if latest_timestamp: 112 return latest_timestamp 113 else: 114 return getCurrentTime() 115 116 # Entry/update classes. 117 118 class Update: 119 120 "A feed update entry." 121 122 def __init__(self): 123 self.content = None 124 self.content_type = None 125 self.updated = None 126 self.author = None 127 128 # Message-related attributes. 129 130 self.parts = None 131 132 # Feed-related attributes. 133 134 self.title = None 135 self.link = None 136 137 # Page-related attributes. 138 139 self.fragment = None 140 self.preferred = None 141 142 # Store-related attributes. 143 144 self.message_number = None 145 146 # Store- and page-related attributes. 147 148 self.page = None 149 150 # Identification. 151 152 self.path = [] 153 154 def unique_id(self): 155 156 """ 157 A unique identifier used for anchors to parts of presented updates. 158 """ 159 160 return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path))) 161 162 def __cmp__(self, other): 163 if self.updated is None and other.updated is not None: 164 return 1 165 elif self.updated is not None and other.updated is None: 166 return -1 167 else: 168 return cmp(self.updated, other.updated) 169 170 def copy(self, part_number=None): 171 update = Update() 172 update.title = self.title 173 update.link = self.link 174 update.updated = self.updated 175 update.author = self.author 176 update.fragment = self.fragment 177 update.preferred = self.preferred 178 update.message_number = self.message_number 179 update.page = self.page 180 update.path = self.path[:] 181 if part_number is not None: 182 update.path.append(part_number) 183 return update 184 185 # Error classes. 186 187 class FeedError(Exception): 188 pass 189 190 class FeedMissingError(FeedError): 191 pass 192 193 class FeedContentTypeError(FeedError): 194 pass 195 196 # Update retrieval from URLs. 197 198 def getUpdates(request, feed_url, max_entries, show_content): 199 200 """ 201 Using the given 'request', retrieve from 'feed_url' up to the given number 202 'max_entries' of update entries. The 'show_content' parameter can indicate 203 that a "summary" is to be obtained for each update, that the "content" of 204 each update is to be obtained (falling back to a summary if no content is 205 provided), or no content (indicated by a false value) is to be obtained. 206 207 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 208 returned. 209 """ 210 211 # Prevent local file access. 212 213 if feed_url.startswith("file:"): 214 raise FeedMissingError 215 216 elif feed_url.startswith("imap"): 217 reader = imapreader 218 219 else: 220 reader = None 221 222 # Obtain the resource, using a cached version if appropriate. 223 224 max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) 225 data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader) 226 if not data: 227 raise FeedMissingError 228 229 # Interpret the cached feed. 230 231 f = StringIO(data) 232 try: 233 _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f) 234 235 if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"): 236 return getUpdatesFromFeed(f, max_entries, show_content) 237 238 elif content_type == "multipart/mixed": 239 return getUpdatesFromMailbox(f, max_entries, show_content, request) 240 241 else: 242 raise FeedContentTypeError 243 244 finally: 245 f.close() 246 247 # Update retrieval from feeds. 248 249 def getUpdatesFromFeed(feed, max_entries, show_content): 250 251 """ 252 Retrieve from 'feed' up to the given number 'max_entries' of update entries. 253 The 'show_content' parameter can indicate that a "summary" is to be obtained 254 for each update, that the "content" of each update is to be obtained 255 (falling back to a summary if no content is provided), or no content 256 (indicated by a false value) is to be obtained. 257 258 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 259 returned. 260 """ 261 262 feed_updates = [] 263 264 # Parse each node from the feed. 265 266 channel_title = channel_link = None 267 268 feed_type = None 269 update = None 270 in_source = False 271 272 events = xml.dom.pulldom.parse(feed) 273 274 for event, value in events: 275 276 if not in_source and event == xml.dom.pulldom.START_ELEMENT: 277 tagname = value.localName 278 279 # Detect the feed type and items. 280 281 if tagname == "feed" and value.namespaceURI == ATOM_NS: 282 feed_type = "atom" 283 284 elif tagname == "rss": 285 feed_type = "rss" 286 287 # Detect items. 288 289 elif feed_type == "rss" and tagname == "item" or \ 290 feed_type == "atom" and tagname == "entry": 291 292 update = Update() 293 294 # Detect source declarations. 295 296 elif feed_type == "atom" and tagname == "source": 297 in_source = True 298 299 # Handle item elements. 300 301 elif tagname == "title": 302 events.expandNode(value) 303 if update: 304 update.title = text(value) 305 else: 306 channel_title = text(value) 307 308 elif tagname == "link": 309 events.expandNode(value) 310 if update: 311 update.link = linktext(value, feed_type) 312 else: 313 channel_link = linktext(value, feed_type) 314 315 elif show_content and ( 316 feed_type == "atom" and tagname in ("content", "summary") or 317 feed_type == "rss" and tagname == "description"): 318 319 events.expandNode(value) 320 321 # Obtain content where requested or, failing that, a 322 # summary. 323 324 if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): 325 if feed_type == "atom": 326 update.content_type = value.getAttribute("type") or "text" 327 328 # Normalise the content types and extract the 329 # content. 330 331 if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): 332 update.content = children(value) 333 update.content_type = "application/xhtml+xml" 334 elif update.content_type in ("html", "text/html"): 335 update.content = text(value) 336 update.content_type = "text/html" 337 else: 338 update.content = text(value) 339 update.content_type = "text/plain" 340 else: 341 update.content_type = "text/html" 342 update.content = text(value) 343 344 elif feed_type == "atom" and tagname == "updated" or \ 345 feed_type == "rss" and tagname == "pubDate": 346 347 events.expandNode(value) 348 349 if update: 350 if feed_type == "atom": 351 value = getDateTimeFromISO8601(text(value)) 352 else: 353 value = DateTime(parsedate(text(value))) 354 update.updated = value 355 356 elif event == xml.dom.pulldom.END_ELEMENT: 357 tagname = value.localName 358 359 if feed_type == "rss" and tagname == "item" or \ 360 feed_type == "atom" and tagname == "entry": 361 362 feed_updates.append(update) 363 364 update = None 365 366 elif feed_type == "atom" and tagname == "source": 367 in_source = False 368 369 return (feed_type, channel_title, channel_link), feed_updates 370 371 # Update retrieval from mailboxes and multipart messages. 372 373 def getUpdatesFromMailbox(feed, max_entries, show_content, request): 374 375 """ 376 Retrieve from 'feed' up to the given number 'max_entries' of update entries. 377 The 'show_content' parameter can indicate that a "summary" is to be obtained 378 for each update, that the "content" of each update is to be obtained 379 (falling back to a summary if no content is provided), or no content 380 (indicated by a false value) is to be obtained. 381 382 A tuple of the form ((feed_type, channel_title, channel_link), updates) is 383 returned. 384 """ 385 386 mailbox = Parser().parse(feed) 387 388 feed_updates = [] 389 390 # Parse each message from the feed as a separate update. 391 392 for message_number, part in enumerate(mailbox.get_payload()): 393 update = Update() 394 update.updated = getDateTimeFromRFC2822(part.get("date")) 395 update.title = part.get("subject", "Update #%d" % message_number) 396 397 update.message_number = message_number 398 399 update.content, update.content_type, update.parts, actual_author = \ 400 getUpdateContentFromPart(part, request) 401 402 if actual_author: 403 update.author = actual_author 404 405 feed_updates.append(update) 406 407 return ("mbox", None, None), feed_updates 408 409 # Update retrieval from pages. 410 411 def getUpdatesFromPage(page, request): 412 413 """ 414 Get updates from the given 'page' using the 'request'. A list of update 415 objects is returned. 416 """ 417 418 updates = [] 419 420 # NOTE: Use the updated datetime from the page for updates. 421 # NOTE: The published and updated details would need to be deduced from 422 # NOTE: the page history instead of being taken from the page as a whole. 423 424 metadata = getMetadata(page) 425 updated = getUpdatedTime(metadata) 426 427 # Get the fragment regions for the page. 428 429 for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())): 430 431 update = Update() 432 433 # Produce a fragment identifier. 434 # NOTE: Choose a more robust identifier where none is explicitly given. 435 436 update.fragment = attributes.get("fragment", str(n)) 437 update.title = attributes.get("summary", "Update #%d" % n) 438 439 # Get the preferred content types available for the fragment. 440 441 update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format)) 442 443 # Try and obtain some suitable content for the entry. 444 # NOTE: Could potentially get a summary for the fragment. 445 446 update.content = None 447 448 if "text/html" in update.preferred: 449 parser_cls = getParserClass(request, format) 450 451 if format == "html": 452 update.content = body 453 elif hasattr(parser_cls, "formatForOutputType"): 454 update.content = formatTextForOutputType(body, request, parser_cls, "text/html") 455 else: 456 fmt = request.html_formatter 457 fmt.setPage(page) 458 update.content = formatText(body, request, fmt, parser_cls) 459 460 update.content_type = "text/html" 461 462 update.page = page 463 464 # NOTE: The anchor would be supported in the page, but this requires 465 # NOTE: formatter modifications for the regions providing updates. 466 467 update.link = page.url(request, anchor=update.fragment) 468 update.updated = updated 469 470 updates.append(update) 471 472 return updates 473 474 # Update retrieval from message stores. 475 476 def getUpdatesFromStore(page, request): 477 478 """ 479 Get updates from the message store associated with the given 'page' using 480 the 'request'. A list of update objects is returned. 481 """ 482 483 updates = [] 484 485 metadata = getMetadata(page) 486 updated = getUpdatedTime(metadata) 487 488 store = ItemStore(page, "messages", "message-locks") 489 490 keys = store.keys() 491 keys.sort() 492 493 for key in keys: 494 message_text = store[key] 495 update = getUpdateFromMessageText(message_text, key, request) 496 update.page = page 497 updates.append(update) 498 499 return updates 500 501 def getUpdateFromMessageText(message_text, message_number, request): 502 503 "Return an update for the given 'message_text' and 'message_number'." 504 505 update = Update() 506 message = Parser().parsestr(message_text) 507 508 # Produce a fragment identifier. 509 510 update.updated = getDateTimeFromRFC2822(message.get("date")) 511 update.title = message.get("subject", "Update #%d" % message_number) 512 update.author = message.get("moin-user") 513 514 update.message_number = message_number 515 516 update.content, update.content_type, update.parts, actual_author = \ 517 getUpdateContentFromPart(message, request) 518 519 if actual_author: 520 update.author = actual_author 521 522 return update 523 524 def getUpdateContentFromPart(part, request): 525 526 """ 527 Return decoded content, the content type, any subparts, and any author 528 identity in a tuple for a given 'part'. 529 """ 530 531 # Determine whether the part has several representations. 532 533 # For a single part, use it as the update content. 534 535 if not part.is_multipart(): 536 content, content_type = getPartContent(part) 537 return content, content_type, None, None 538 539 # For a collection of related parts, use the first as the update content 540 # and assume that the formatter will reference the other parts. 541 542 elif part.get_content_subtype() == "related": 543 main_part = part.get_payload()[0] 544 content, content_type = getPartContent(main_part) 545 return content, content_type, [main_part], None 546 547 # Encrypted content cannot be meaningfully separated. 548 549 elif part.get_content_subtype() == "encrypted": 550 try: 551 part, author = getDecryptedParts(part, request) 552 content, content_type, parts, _author = getUpdateContentFromPart(part, request) 553 return content, content_type, parts, author 554 except MoinMessageError: 555 return None, part.get_content_type(), part.get_payload(), None 556 557 # Otherwise, just obtain the parts for separate display. 558 559 else: 560 return None, part.get_content_type(), part.get_payload(), None 561 562 def getDecryptedParts(part, request): 563 564 "Decrypt the given 'part', returning the decoded content." 565 566 homedir = get_homedir(request) 567 gpg = GPG(homedir) 568 569 # Decrypt the part. 570 571 if is_encrypted(part): 572 text = gpg.decryptMessage(part) 573 part = Parser().parsestr(text) 574 575 # Extract any signature details. 576 577 if is_signed(part): 578 result = gpg.verifyMessage(part) 579 if result: 580 fingerprint, identity, content = result 581 return content, get_username_for_fingerprint(request, fingerprint) 582 583 return part, None 584 585 def getPartContent(part): 586 587 "Decode the 'part', returning the decoded payload and the content type." 588 589 charset = part.get_content_charset() 590 payload = part.get_payload(decode=True) 591 return (charset and unicode(payload, charset) or payload), part.get_content_type() 592 593 def getUpdateFromPart(parent, part, part_number, request): 594 595 "Using the 'parent' update, return an update object for the given 'part'." 596 597 update = parent.copy(part_number) 598 update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request) 599 return update 600 601 def getUpdatesForFormatting(update, request): 602 603 "Get a list of updates for formatting given 'update'." 604 605 updates = [] 606 607 # Handle multipart/alternative and other non-related multiparts. 608 609 if update.parts: 610 for n, part in enumerate(update.parts): 611 update_part = getUpdateFromPart(update, part, n, request) 612 updates += getUpdatesForFormatting(update_part, request) 613 else: 614 updates.append(update) 615 616 return updates 617 618 # Update formatting. 619 620 def getFormattedUpdate(update, request, fmt): 621 622 """ 623 Return the formatted form of the given 'update' using the given 'request' 624 and 'fmt'. 625 """ 626 627 # NOTE: Some control over the HTML and XHTML should be exercised. 628 629 if update.content_type == "text/html" and update.page is not None and update.message_number is not None: 630 parsers = [get_make_parser(update.page, update.message_number)] 631 else: 632 parsers = getParsersForContentType(request.cfg, update.content_type) 633 634 if parsers: 635 for parser_cls in parsers: 636 if hasattr(parser_cls, "formatForOutputType"): 637 return formatTextForOutputType(update.content, request, parser_cls, "text/html") 638 else: 639 return formatText(update.content, request, fmt, parser_cls=parser_cls) 640 break 641 else: 642 return None 643 644 def formatUpdate(update, request, fmt): 645 646 "Format the given 'update' using the given 'request' and 'fmt'." 647 648 result = [] 649 append = result.append 650 651 updates = getUpdatesForFormatting(update, request) 652 single = len(updates) == 1 653 654 # Format some navigation tabs. 655 # This only occurs for multipart updates. 656 657 if not single: 658 append(fmt.div(on=1, css_class="moinshare-alternatives")) 659 660 first = True 661 662 for update_part in updates: 663 if update_part.content: 664 append(fmt.url(1, "#%s" % update_part.unique_id())) 665 append(fmt.text(update_part.content_type)) 666 append(fmt.url(0)) 667 668 first = False 669 670 append(fmt.div(on=0)) 671 672 # Format the content. 673 674 first = True 675 676 for update_part in updates: 677 if update_part.content: 678 679 # Encapsulate each alternative if many exist. 680 681 if not single: 682 css_class = first and "moinshare-default" or "moinshare-other" 683 append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id())) 684 685 # Include the content. 686 687 append(formatUpdatePart(update_part, request, fmt)) 688 689 if not single: 690 append(fmt.div(on=0)) 691 692 first = False 693 694 return "".join(result) 695 696 def formatUpdatePart(update, request, fmt): 697 698 "Format the given 'update' using the given 'request' and 'fmt'." 699 700 _ = request.getText 701 702 result = [] 703 append = result.append 704 705 # Encapsulate the content. 706 707 append(fmt.div(on=1, css_class="moinshare-content")) 708 text = getFormattedUpdate(update, request, fmt) 709 if text: 710 append(text) 711 else: 712 append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type)) 713 append(fmt.div(on=0)) 714 715 return "".join(result) 716 717 # Source management. 718 719 def getUpdateSources(pagename, request): 720 721 "Return the update sources from the given 'pagename' using the 'request'." 722 723 sources = {} 724 725 source_definitions = getWikiDict(pagename, request) 726 727 if source_definitions: 728 for name, value in source_definitions.items(): 729 sources[name] = getSourceParameters(value) 730 731 return sources 732 733 def getSourceParameters(source_definition): 734 735 "Return the parameters from the given 'source_definition' string." 736 737 return parseDictEntry(source_definition, ("type", "location")) 738 739 # HTML parsing support. 740 741 class IncomingHTMLSanitizer(HTMLSanitizer): 742 743 """ 744 An HTML parser that rewrites references to attachments. Instead of referring 745 to content identifier URLs with a scheme of "cid:", the resulting HTML will 746 refer to action URLs that extract parts from messages in message stores. 747 748 NOTE: This rewriting does not occur for other sources of HTML bundles 749 NOTE: because other actions would be required to support the extraction of 750 NOTE: resources from such sources. 751 """ 752 753 def __init__(self, out, request, page, message_number): 754 755 """ 756 Initialise the sanitizer with an 'out' stream for output, the given 757 'request', a 'page' from which the HTML originates, together with the 758 'message_number' providing the content. 759 """ 760 761 HTMLSanitizer.__init__(self, out) 762 self.request = request 763 self.message_number = message_number 764 self.page = page 765 766 def rewrite_reference(self, ref): 767 if ref.startswith("cid:"): 768 part = ref[len("cid:"):] 769 action_link = self.page.url(self.request, { 770 "action" : "ReadMessage", "doit" : "1", 771 "message" : self.message_number, "part" : part 772 }) 773 return action_link 774 else: 775 return ref 776 777 def handle_starttag(self, tag, attrs): 778 new_attrs = [] 779 for attrname, attrvalue in attrs: 780 if attrname in self.uri_attrs: 781 new_attrs.append((attrname, self.rewrite_reference(attrvalue))) 782 else: 783 new_attrs.append((attrname, attrvalue)) 784 HTMLSanitizer.handle_starttag(self, tag, new_attrs) 785 786 class IncomingMarkup(Markup): 787 788 "A special markup processor for incoming HTML." 789 790 def sanitize(self, request, page, message_number): 791 out = getwriter("utf-8")(StringIO()) 792 sanitizer = IncomingHTMLSanitizer(out, request, page, message_number) 793 sanitizer.feed(self.stripentities(keepxmlentities=True)) 794 return IncomingMarkup(unicode(out.getvalue(), "utf-8")) 795 796 class IncomingHTMLParser: 797 798 "Filters and rewrites incoming HTML content." 799 800 def __init__(self, raw, request, **kw): 801 self.raw = raw 802 self.request = request 803 self.message_number = None 804 self.page = None 805 806 def format(self, formatter, **kw): 807 808 "Send the text." 809 810 try: 811 self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number))) 812 except HTMLParseError, e: 813 self.request.write(formatter.sysmsg(1) + 814 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg, 815 self.raw.splitlines()[e.lineno - 1].strip())) + 816 formatter.sysmsg(0)) 817 818 class MakeIncomingHTMLParser: 819 820 "A class that makes parsers configured for messages." 821 822 def __init__(self, page, message_number): 823 824 "Initialise with state that is used to configure instantiated parsers." 825 826 self.message_number = message_number 827 self.page = page 828 829 def __call__(self, *args, **kw): 830 parser = IncomingHTMLParser(*args, **kw) 831 parser.message_number = self.message_number 832 parser.page = self.page 833 return parser 834 835 def get_make_parser(page, message_number): 836 837 """ 838 Return a callable that will return a parser configured for the message from 839 the given 'page' with the given 'message_number'. 840 """ 841 842 return MakeIncomingHTMLParser(page, message_number) 843 844 # vim: tabstop=4 expandtab shiftwidth=4