MoinShare (file MoinShare.py at 4ec0da93d8d5)

     1 # -*- coding: iso-8859-1 -*-     2 """     3     MoinMoin - MoinShare library     4      5     @copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>     6     @copyright: 2003-2006 Edgewall Software     7     @copyright: 2006 MoinMoin:AlexanderSchremmer     8     @license: GNU GPL (v2 or later), see COPYING.txt for details.     9 """    10     11 from ContentTypeSupport import getContentPreferences    12 from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \    13                         getDateTimeFromISO8601, DateTime    14 from MoinSupport import *    15 from MoinRemoteSupport import *    16 from ItemSupport import ItemStore    17 from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError    18 from MoinMessageSupport import get_homedir, get_username_for_fingerprint    19 from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup    20 from MoinMoin import wikiutil    21 from email.parser import Parser    22 from email.utils import parsedate    23 from codecs import getwriter    24 import xml.dom.pulldom    25     26 try:    27     from cStringIO import StringIO    28 except ImportError:    29     from StringIO import StringIO    30     31 _getFragments = getFragments    32     33 __version__ = "0.1"    34     35 ATOM_NS = "http://www.w3.org/2005/Atom"    36     37 # Utility functions.    38     39 def text(element):    40     nodes = []    41     for node in element.childNodes:    42         if node.nodeType == node.TEXT_NODE:    43             nodes.append(node.nodeValue)    44     return "".join(nodes)    45     46 def children(element):    47     nodes = []    48     for node in element.childNodes:    49         nodes.append(node.toxml())    50     return "".join(nodes)    51     52 def unescape(text):    53     return text.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")    54     55 def linktext(element, feed_type):    56     if feed_type == "rss":    57         return text(element)    58     else:    59         return element.getAttribute("href")    60     61 def need_content(show_content, tagname):    62     return show_content in ("content", "description") and tagname in ("content", "description")    63     64 # More Moin 1.9 compatibility functions.    65     66 def has_member(request, groupname, username):    67     if hasattr(request.dicts, "has_member"):    68         return request.dicts.has_member(groupname, username)    69     else:    70         return username in request.dicts.get(groupname, [])    71     72 # Fragments employ a "moinshare" attribute.    73     74 fragment_attribute = "moinshare"    75     76 def getFragments(s):    77     78     "Return all fragments in 's' having the MoinShare fragment attribute."    79     80     fragments = []    81     for format, attributes, body in _getFragments(s):    82         if attributes.has_key(fragment_attribute):    83             fragments.append((format, attributes, body))    84     return fragments    85     86 def getPreferredOutputTypes(request, mimetypes):    87     88     """    89     Using the 'request', perform content negotiation, obtaining mimetypes common    90     to the fragment (given by 'mimetypes') and the client (found in the Accept    91     header).    92     """    93     94     accept = getHeader(request, "Accept", "HTTP")    95     if accept:    96         prefs = getContentPreferences(accept)    97         return prefs.get_preferred_types(mimetypes)    98     else:    99         return mimetypes   100    101 def getUpdatedTime(metadata):   102    103     """   104     Return the last updated time based on the given 'metadata', using the   105     current time if no explicit last modified time is specified.   106     """   107    108     # NOTE: We could attempt to get the last edit time of a fragment.   109    110     latest_timestamp = metadata.get("last-modified")   111     if latest_timestamp:   112         return latest_timestamp   113     else:   114         return getCurrentTime()   115    116 # Entry/update classes.   117    118 class Update:   119    120     "A feed update entry."   121    122     def __init__(self):   123         self.content = None   124         self.content_type = None   125         self.updated = None   126         self.author = None   127    128         # Message-related attributes.   129    130         self.parts = None   131    132         # Feed-related attributes.   133    134         self.title = None   135         self.link = None   136    137         # Page-related attributes.   138    139         self.fragment = None   140         self.preferred = None   141    142         # Store-related attributes.   143    144         self.message_number = None   145    146         # Store- and page-related attributes.   147    148         self.page = None   149    150         # Identification.   151    152         self.path = []   153    154     def unique_id(self):   155    156         """   157         A unique identifier used for anchors to parts of presented updates.   158         """   159    160         return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path)))   161    162     def __cmp__(self, other):   163         if self.updated is None and other.updated is not None:   164             return 1   165         elif self.updated is not None and other.updated is None:   166             return -1   167         else:   168             return cmp(self.updated, other.updated)   169    170     def copy(self, part_number=None):   171         update = Update()   172         update.title = self.title   173         update.link = self.link   174         update.updated = self.updated   175         update.author = self.author   176         update.fragment = self.fragment   177         update.preferred = self.preferred   178         update.message_number = self.message_number   179         update.page = self.page   180         update.path = self.path[:]   181         if part_number is not None:   182             update.path.append(part_number)   183         return update   184    185 # Error classes.   186    187 class FeedError(Exception):   188     pass   189    190 class FeedMissingError(FeedError):   191     pass   192    193 class FeedContentTypeError(FeedError):   194     pass   195    196 # Update retrieval from URLs.   197    198 def getUpdates(request, feed_url, max_entries, show_content):   199    200     """   201     Using the given 'request', retrieve from 'feed_url' up to the given number   202     'max_entries' of update entries. The 'show_content' parameter can indicate   203     that a "summary" is to be obtained for each update, that the "content" of   204     each update is to be obtained (falling back to a summary if no content is   205     provided), or no content (indicated by a false value) is to be obtained.   206    207     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   208     returned.   209     """   210    211     # Prevent local file access.   212    213     if feed_url.startswith("file:"):   214         raise FeedMissingError   215    216     elif feed_url.startswith("imap"):   217         reader = imapreader   218    219     else:   220         reader = None   221    222     # Obtain the resource, using a cached version if appropriate.   223    224     max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))   225     data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader)   226     if not data:   227         raise FeedMissingError   228    229     # Interpret the cached feed.   230    231     f = StringIO(data)   232     try:   233         _url, content_type, _encoding, _metadata = getCachedResourceMetadata(f)   234    235         if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"):   236             return getUpdatesFromFeed(f, max_entries, show_content)   237    238         elif content_type == "multipart/mixed":   239             return getUpdatesFromMailbox(f, max_entries, show_content, request)   240    241         else:   242             raise FeedContentTypeError   243    244     finally:   245         f.close()   246    247 # Update retrieval from feeds.   248    249 def getUpdatesFromFeed(feed, max_entries, show_content):   250    251     """   252     Retrieve from 'feed' up to the given number 'max_entries' of update entries.   253     The 'show_content' parameter can indicate that a "summary" is to be obtained   254     for each update, that the "content" of each update is to be obtained   255     (falling back to a summary if no content is provided), or no content   256     (indicated by a false value) is to be obtained.   257    258     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   259     returned.   260     """   261    262     feed_updates = []   263    264     # Parse each node from the feed.   265    266     channel_title = channel_link = None   267    268     feed_type = None   269     update = None   270     in_source = False   271    272     events = xml.dom.pulldom.parse(feed)   273    274     for event, value in events:   275    276         if not in_source and event == xml.dom.pulldom.START_ELEMENT:   277             tagname = value.localName   278    279             # Detect the feed type and items.   280    281             if tagname == "feed" and value.namespaceURI == ATOM_NS:   282                 feed_type = "atom"   283    284             elif tagname == "rss":   285                 feed_type = "rss"   286    287             # Detect items.   288    289             elif feed_type == "rss" and tagname == "item" or \   290                 feed_type == "atom" and tagname == "entry":   291    292                 update = Update()   293    294             # Detect source declarations.   295    296             elif feed_type == "atom" and tagname == "source":   297                 in_source = True   298    299             # Handle item elements.   300    301             elif tagname == "title":   302                 events.expandNode(value)   303                 if update:   304                     update.title = text(value)   305                 else:   306                     channel_title = text(value)   307    308             elif tagname == "link":   309                 events.expandNode(value)   310                 if update:   311                     update.link = linktext(value, feed_type)   312                 else:   313                     channel_link = linktext(value, feed_type)   314    315             elif show_content and (   316                 feed_type == "atom" and tagname in ("content", "summary") or   317                 feed_type == "rss" and tagname == "description"):   318    319                 events.expandNode(value)   320    321                 # Obtain content where requested or, failing that, a   322                 # summary.   323    324                 if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):   325                     if feed_type == "atom":   326                         update.content_type = value.getAttribute("type") or "text"   327    328                         # Normalise the content types and extract the   329                         # content.   330    331                         if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):   332                             update.content = children(value)   333                             update.content_type = "application/xhtml+xml"   334                         elif update.content_type in ("html", "text/html"):   335                             update.content = text(value)   336                             update.content_type = "text/html"   337                         else:   338                             update.content = text(value)   339                             update.content_type = "text/plain"   340                     else:   341                         update.content_type = "text/html"   342                         update.content = text(value)   343    344             elif feed_type == "atom" and tagname == "updated" or \   345                 feed_type == "rss" and tagname == "pubDate":   346    347                 events.expandNode(value)   348    349                 if update:   350                     if feed_type == "atom":   351                         value = getDateTimeFromISO8601(text(value))   352                     else:   353                         value = DateTime(parsedate(text(value)))   354                     update.updated = value   355    356         elif event == xml.dom.pulldom.END_ELEMENT:   357             tagname = value.localName   358    359             if feed_type == "rss" and tagname == "item" or \   360                 feed_type == "atom" and tagname == "entry":   361    362                 feed_updates.append(update)   363    364                 update = None   365    366             elif feed_type == "atom" and tagname == "source":   367                 in_source = False   368    369     return (feed_type, channel_title, channel_link), feed_updates   370    371 # Update retrieval from mailboxes and multipart messages.   372    373 def getUpdatesFromMailbox(feed, max_entries, show_content, request):   374    375     """   376     Retrieve from 'feed' up to the given number 'max_entries' of update entries.   377     The 'show_content' parameter can indicate that a "summary" is to be obtained   378     for each update, that the "content" of each update is to be obtained   379     (falling back to a summary if no content is provided), or no content   380     (indicated by a false value) is to be obtained.   381    382     A tuple of the form ((feed_type, channel_title, channel_link), updates) is   383     returned.   384     """   385    386     mailbox = Parser().parse(feed)   387    388     feed_updates = []   389    390     # Parse each message from the feed as a separate update.   391    392     for message_number, part in enumerate(mailbox.get_payload()):   393         update = Update()   394         update.updated = getDateTimeFromRFC2822(part.get("date"))   395         update.title = part.get("subject", "Update #%d" % message_number)   396    397         update.message_number = message_number   398    399         update.content, update.content_type, update.parts, actual_author = \   400             getUpdateContentFromPart(part, request)   401    402         if actual_author:   403             update.author = actual_author   404    405         feed_updates.append(update)   406    407     return ("mbox", None, None), feed_updates   408    409 # Update retrieval from pages.   410    411 def getUpdatesFromPage(page, request):   412    413     """   414     Get updates from the given 'page' using the 'request'. A list of update   415     objects is returned.   416     """   417    418     updates = []   419    420     # NOTE: Use the updated datetime from the page for updates.   421     # NOTE: The published and updated details would need to be deduced from   422     # NOTE: the page history instead of being taken from the page as a whole.   423    424     metadata = getMetadata(page)   425     updated = getUpdatedTime(metadata)   426    427     # Get the fragment regions for the page.   428    429     for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())):   430    431         update = Update()   432    433         # Produce a fragment identifier.   434         # NOTE: Choose a more robust identifier where none is explicitly given.   435    436         update.fragment = attributes.get("fragment", str(n))   437         update.title = attributes.get("summary", "Update #%d" % n)   438    439         # Get the preferred content types available for the fragment.   440    441         update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format))   442    443         # Try and obtain some suitable content for the entry.   444         # NOTE: Could potentially get a summary for the fragment.   445    446         update.content = None   447    448         if "text/html" in update.preferred:   449             parser_cls = getParserClass(request, format)   450    451             if format == "html":   452                 update.content = body   453             elif hasattr(parser_cls, "formatForOutputType"):   454                 update.content = formatTextForOutputType(body, request, parser_cls, "text/html")   455             else:   456                 fmt = request.html_formatter   457                 fmt.setPage(page)   458                 update.content = formatText(body, request, fmt, parser_cls)   459    460             update.content_type = "text/html"   461    462         update.page = page   463    464         # NOTE: The anchor would be supported in the page, but this requires   465         # NOTE: formatter modifications for the regions providing updates.   466    467         update.link = page.url(request, anchor=update.fragment)   468         update.updated = updated   469    470         updates.append(update)   471    472     return updates   473    474 # Update retrieval from message stores.   475    476 def getUpdatesFromStore(page, request):   477    478     """   479     Get updates from the message store associated with the given 'page' using   480     the 'request'. A list of update objects is returned.   481     """   482    483     updates = []   484    485     metadata = getMetadata(page)   486     updated = getUpdatedTime(metadata)   487    488     store = ItemStore(page, "messages", "message-locks")   489    490     keys = store.keys()   491     keys.sort()   492    493     for key in keys:   494         message_text = store[key]   495         update = getUpdateFromMessageText(message_text, key, request)   496         update.page = page   497         updates.append(update)   498    499     return updates   500    501 def getUpdateFromMessageText(message_text, message_number, request):   502    503     "Return an update for the given 'message_text' and 'message_number'."   504    505     update = Update()   506     message = Parser().parsestr(message_text)   507    508     # Produce a fragment identifier.   509    510     update.updated = getDateTimeFromRFC2822(message.get("date"))   511     update.title = message.get("subject", "Update #%d" % message_number)   512     update.author = message.get("moin-user")   513    514     update.message_number = message_number   515    516     update.content, update.content_type, update.parts, actual_author = \   517         getUpdateContentFromPart(message, request)   518    519     if actual_author:   520         update.author = actual_author   521    522     return update   523    524 def getUpdateContentFromPart(part, request):   525    526     """   527     Return decoded content, the content type, any subparts, and any author   528     identity in a tuple for a given 'part'.   529     """   530    531     # Determine whether the part has several representations.   532    533     # For a single part, use it as the update content.   534    535     if not part.is_multipart():   536         content, content_type = getPartContent(part)   537         return content, content_type, None, None   538    539     # For a collection of related parts, use the first as the update content   540     # and assume that the formatter will reference the other parts.   541    542     elif part.get_content_subtype() == "related":   543         main_part = part.get_payload()[0]   544         content, content_type = getPartContent(main_part)   545         return content, content_type, [main_part], None   546    547     # Encrypted content cannot be meaningfully separated.   548    549     elif part.get_content_subtype() == "encrypted":   550         try:   551             part, author = getDecryptedParts(part, request)   552             content, content_type, parts, _author = getUpdateContentFromPart(part, request)   553             return content, content_type, parts, author   554         except MoinMessageError:   555             return None, part.get_content_type(), part.get_payload(), None   556    557     # Otherwise, just obtain the parts for separate display.   558    559     else:   560         return None, part.get_content_type(), part.get_payload(), None   561    562 def getDecryptedParts(part, request):   563    564     "Decrypt the given 'part', returning the decoded content."   565    566     homedir = get_homedir(request)   567     gpg = GPG(homedir)   568    569     # Decrypt the part.   570    571     if is_encrypted(part):   572         text = gpg.decryptMessage(part)   573         part = Parser().parsestr(text)   574    575     # Extract any signature details.   576    577     if is_signed(part):   578         result = gpg.verifyMessage(part)   579         if result:   580             fingerprint, identity, content = result   581             return content, get_username_for_fingerprint(request, fingerprint)   582    583     return part, None   584    585 def getPartContent(part):   586    587     "Decode the 'part', returning the decoded payload and the content type."   588    589     charset = part.get_content_charset()   590     payload = part.get_payload(decode=True)   591     return (charset and unicode(payload, charset) or payload), part.get_content_type()   592    593 def getUpdateFromPart(parent, part, part_number, request):   594    595     "Using the 'parent' update, return an update object for the given 'part'."   596    597     update = parent.copy(part_number)   598     update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request)   599     return update   600    601 def getUpdatesForFormatting(update, request):   602    603     "Get a list of updates for formatting given 'update'."   604    605     updates = []   606    607     # Handle multipart/alternative and other non-related multiparts.   608    609     if update.parts:   610         for n, part in enumerate(update.parts):   611             update_part = getUpdateFromPart(update, part, n, request)   612             updates += getUpdatesForFormatting(update_part, request)   613     else:   614         updates.append(update)   615    616     return updates   617    618 # Update formatting.   619    620 def getFormattedUpdate(update, request, fmt):   621    622     """   623     Return the formatted form of the given 'update' using the given 'request'   624     and 'fmt'.   625     """   626    627     # NOTE: Some control over the HTML and XHTML should be exercised.   628    629     if update.content_type == "text/html" and update.page is not None and update.message_number is not None:   630         parsers = [get_make_parser(update.page, update.message_number)]   631     else:   632         parsers = getParsersForContentType(request.cfg, update.content_type)   633    634     if parsers:   635         for parser_cls in parsers:   636             if hasattr(parser_cls, "formatForOutputType"):   637                 return formatTextForOutputType(update.content, request, parser_cls, "text/html")   638             else:   639                 return formatText(update.content, request, fmt, parser_cls=parser_cls)   640             break   641     else:   642         return None   643    644 def formatUpdate(update, request, fmt):   645    646     "Format the given 'update' using the given 'request' and 'fmt'."   647    648     result = []   649     append = result.append   650    651     updates = getUpdatesForFormatting(update, request)   652     single = len(updates) == 1   653    654     # Format some navigation tabs.   655     # This only occurs for multipart updates.   656    657     if not single:   658         append(fmt.div(on=1, css_class="moinshare-alternatives"))   659    660         first = True   661    662         for update_part in updates:   663             if update_part.content:   664                 append(fmt.url(1, "#%s" % update_part.unique_id()))   665                 append(fmt.text(update_part.content_type))   666                 append(fmt.url(0))   667    668                 first = False   669    670         append(fmt.div(on=0))   671    672     # Format the content.   673    674     first = True   675    676     for update_part in updates:   677         if update_part.content:   678    679             # Encapsulate each alternative if many exist.   680    681             if not single:   682                 css_class = first and "moinshare-default" or "moinshare-other"   683                 append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id()))   684    685             # Include the content.   686    687             append(formatUpdatePart(update_part, request, fmt))   688    689             if not single:   690                 append(fmt.div(on=0))   691    692             first = False   693    694     return "".join(result)   695    696 def formatUpdatePart(update, request, fmt):   697    698     "Format the given 'update' using the given 'request' and 'fmt'."   699    700     _ = request.getText   701    702     result = []   703     append = result.append   704    705     # Encapsulate the content.   706    707     append(fmt.div(on=1, css_class="moinshare-content"))   708     text = getFormattedUpdate(update, request, fmt)   709     if text:   710         append(text)   711     else:   712         append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type))   713     append(fmt.div(on=0))   714    715     return "".join(result)   716    717 # Source management.   718    719 def getUpdateSources(pagename, request):   720    721     "Return the update sources from the given 'pagename' using the 'request'."   722    723     sources = {}   724    725     source_definitions = getWikiDict(pagename, request)   726    727     if source_definitions:   728         for name, value in source_definitions.items():   729             sources[name] = getSourceParameters(value)   730    731     return sources   732    733 def getSourceParameters(source_definition):   734    735     "Return the parameters from the given 'source_definition' string."   736    737     return parseDictEntry(source_definition, ("type", "location"))   738    739 # HTML parsing support.   740    741 class IncomingHTMLSanitizer(HTMLSanitizer):   742    743     """   744     An HTML parser that rewrites references to attachments. Instead of referring   745     to content identifier URLs with a scheme of "cid:", the resulting HTML will   746     refer to action URLs that extract parts from messages in message stores.   747    748     NOTE: This rewriting does not occur for other sources of HTML bundles   749     NOTE: because other actions would be required to support the extraction of   750     NOTE: resources from such sources.   751     """   752    753     def __init__(self, out, request, page, message_number):   754    755         """   756         Initialise the sanitizer with an 'out' stream for output, the given   757         'request', a 'page' from which the HTML originates, together with the   758         'message_number' providing the content.   759         """   760    761         HTMLSanitizer.__init__(self, out)   762         self.request = request   763         self.message_number = message_number   764         self.page = page   765    766     def rewrite_reference(self, ref):   767         if ref.startswith("cid:"):   768             part = ref[len("cid:"):]   769             action_link = self.page.url(self.request, {   770                 "action" : "ReadMessage", "doit" : "1",   771                 "message" : self.message_number, "part" : part   772                 })   773             return action_link   774         else:   775             return ref   776    777     def handle_starttag(self, tag, attrs):   778         new_attrs = []   779         for attrname, attrvalue in attrs:   780             if attrname in self.uri_attrs:   781                 new_attrs.append((attrname, self.rewrite_reference(attrvalue)))   782             else:   783                 new_attrs.append((attrname, attrvalue))   784         HTMLSanitizer.handle_starttag(self, tag, new_attrs)   785    786 class IncomingMarkup(Markup):   787    788     "A special markup processor for incoming HTML."   789    790     def sanitize(self, request, page, message_number):   791         out = getwriter("utf-8")(StringIO())   792         sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)   793         sanitizer.feed(self.stripentities(keepxmlentities=True))   794         return IncomingMarkup(unicode(out.getvalue(), "utf-8"))   795    796 class IncomingHTMLParser:   797    798     "Filters and rewrites incoming HTML content."   799    800     def __init__(self, raw, request, **kw):   801         self.raw = raw   802         self.request = request   803         self.message_number = None   804         self.page = None   805    806     def format(self, formatter, **kw):   807    808         "Send the text."   809    810         try:   811             self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))   812         except HTMLParseError, e:   813             self.request.write(formatter.sysmsg(1) +   814                 formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,   815                                   self.raw.splitlines()[e.lineno - 1].strip())) +   816                 formatter.sysmsg(0))   817    818 class MakeIncomingHTMLParser:   819    820     "A class that makes parsers configured for messages."   821    822     def __init__(self, page, message_number):   823    824         "Initialise with state that is used to configure instantiated parsers."   825    826         self.message_number = message_number   827         self.page = page   828    829     def __call__(self, *args, **kw):   830         parser = IncomingHTMLParser(*args, **kw)   831         parser.message_number = self.message_number   832         parser.page = self.page   833         return parser   834    835 def get_make_parser(page, message_number):   836    837     """   838     Return a callable that will return a parser configured for the message from   839     the given 'page' with the given 'message_number'.   840     """   841    842     return MakeIncomingHTMLParser(page, message_number)   843    844 # vim: tabstop=4 expandtab shiftwidth=4