# HG changeset patch # User Paul Boddie # Date 1396302882 -7200 # Node ID 859ab0b33ddaa82141e59e72be79b4e458ba655d # Parent 5c334ed426e78dd575addd6badff8afaba616e8b Moved RSS/Atom feed retrieval into the library from the macro. diff -r 5c334ed426e7 -r 859ab0b33dda MoinShare.py --- a/MoinShare.py Sun Jan 26 00:42:49 2014 +0100 +++ b/MoinShare.py Mon Mar 31 23:54:42 2014 +0200 @@ -9,15 +9,19 @@ """ from ContentTypeSupport import getContentPreferences -from DateSupport import getCurrentTime, getDateTimeFromRFC2822 +from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \ + getDateTimeFromISO8601, DateTime from MoinSupport import * +from MoinRemoteSupport import * from ItemSupport import ItemStore from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError from MoinMessageSupport import get_homedir, get_username_for_fingerprint from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup from MoinMoin import wikiutil from email.parser import Parser +from email.utils import parsedate from codecs import getwriter +import xml.dom.pulldom try: from cStringIO import StringIO @@ -28,6 +32,35 @@ __version__ = "0.1" +ATOM_NS = "http://www.w3.org/2005/Atom" + +# Utility functions. + +def text(element): + nodes = [] + for node in element.childNodes: + if node.nodeType == node.TEXT_NODE: + nodes.append(node.nodeValue) + return "".join(nodes) + +def children(element): + nodes = [] + for node in element.childNodes: + nodes.append(node.toxml()) + return "".join(nodes) + +def unescape(text): + return text.replace("<", "<").replace(">", ">").replace("&", "&") + +def linktext(element, feed_type): + if feed_type == "rss": + return text(element) + else: + return element.getAttribute("href") + +def need_content(show_content, tagname): + return show_content in ("content", "description") and tagname in ("content", "description") + # More Moin 1.9 compatibility functions. def has_member(request, groupname, username): @@ -138,6 +171,160 @@ update.path.append(part_number) return update +# Error classes. + +class FeedError(Exception): + pass + +class FeedMissingError(FeedError): + pass + +class FeedContentTypeError(FeedError): + pass + +# Feed retrieval from URLs. + +def getUpdates(request, feed_url, max_entries, show_content): + + """ + Using the given 'request', retrieve from 'feed_url' up to the given number + 'max_entries' of update entries. The 'show_content' parameter can indicate + that a "summary" is to be obtained for each update, that the "content" of + each update is to be obtained (falling back to a summary if no content is + provided), or no content (indicated by a false value) is to be obtained. + + A tuple of the form ((feed_type, channel_title, channel_link), updates) is + returned. + """ + + feed_updates = [] + + # Obtain the resource, using a cached version if appropriate. + + max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) + data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age) + if not data: + raise FeedMissingError + + # Interpret the cached feed. + + feed = StringIO(data) + _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed) + + if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"): + raise FeedContentTypeError + + try: + # Parse each node from the feed. + + channel_title = channel_link = None + + feed_type = None + update = None + in_source = False + + events = xml.dom.pulldom.parse(feed) + + for event, value in events: + + if not in_source and event == xml.dom.pulldom.START_ELEMENT: + tagname = value.localName + + # Detect the feed type and items. + + if tagname == "feed" and value.namespaceURI == ATOM_NS: + feed_type = "atom" + + elif tagname == "rss": + feed_type = "rss" + + # Detect items. + + elif feed_type == "rss" and tagname == "item" or \ + feed_type == "atom" and tagname == "entry": + + update = Update() + + # Detect source declarations. + + elif feed_type == "atom" and tagname == "source": + in_source = True + + # Handle item elements. + + elif tagname == "title": + events.expandNode(value) + if update: + update.title = text(value) + else: + channel_title = text(value) + + elif tagname == "link": + events.expandNode(value) + if update: + update.link = linktext(value, feed_type) + else: + channel_link = linktext(value, feed_type) + + elif show_content and ( + feed_type == "atom" and tagname in ("content", "summary") or + feed_type == "rss" and tagname == "description"): + + events.expandNode(value) + + # Obtain content where requested or, failing that, a + # summary. + + if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): + if feed_type == "atom": + update.content_type = value.getAttribute("type") or "text" + + # Normalise the content types and extract the + # content. + + if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): + update.content = children(value) + update.content_type = "application/xhtml+xml" + elif update.content_type in ("html", "text/html"): + update.content = text(value) + update.content_type = "text/html" + else: + update.content = text(value) + update.content_type = "text/plain" + else: + update.content_type = "text/html" + update.content = text(value) + + elif feed_type == "atom" and tagname == "updated" or \ + feed_type == "rss" and tagname == "pubDate": + + events.expandNode(value) + + if update: + if feed_type == "atom": + value = getDateTimeFromISO8601(text(value)) + else: + value = DateTime(parsedate(text(value))) + update.updated = value + + elif event == xml.dom.pulldom.END_ELEMENT: + tagname = value.localName + + if feed_type == "rss" and tagname == "item" or \ + feed_type == "atom" and tagname == "entry": + + feed_updates.append(update) + + update = None + + elif feed_type == "atom" and tagname == "source": + in_source = False + + finally: + feed.close() + + return (feed_type, channel_title, channel_link), feed_updates + # Update retrieval from pages. def getUpdatesFromPage(page, request): diff -r 5c334ed426e7 -r 859ab0b33dda macros/SharedContent.py --- a/macros/SharedContent.py Sun Jan 26 00:42:49 2014 +0100 +++ b/macros/SharedContent.py Mon Mar 31 23:54:42 2014 +0200 @@ -2,210 +2,19 @@ """ MoinMoin - SharedContent macro, based on the FeedReader macro - @copyright: 2008, 2012, 2013 by Paul Boddie + @copyright: 2008, 2012, 2013, 2014 by Paul Boddie @license: GNU GPL (v2 or later), see COPYING.txt for details. """ -from DateSupport import getDateTimeFromISO8601, DateTime from MoinMoin.Page import Page -from MoinRemoteSupport import * from MoinSupport import parseMacroArguments -from MoinShare import getUpdateSources, getUpdatesFromPage, \ - getUpdatesFromStore, formatUpdate, \ - Update -from email.utils import parsedate -import xml.dom.pulldom - -try: - from cStringIO import StringIO -except ImportError: - from StringIO import StringIO +from MoinShare import getUpdateSources, getUpdates, \ + getUpdatesFromPage, getUpdatesFromStore, \ + formatUpdate, Update Dependencies = ["time"] MAX_ENTRIES = 5 -ATOM_NS = "http://www.w3.org/2005/Atom" - -# Utility functions. - -def text(element): - nodes = [] - for node in element.childNodes: - if node.nodeType == node.TEXT_NODE: - nodes.append(node.nodeValue) - return "".join(nodes) - -def children(element): - nodes = [] - for node in element.childNodes: - nodes.append(node.toxml()) - return "".join(nodes) - -def unescape(text): - return text.replace("<", "<").replace(">", ">").replace("&", "&") - -def linktext(element, feed_type): - if feed_type == "rss": - return text(element) - else: - return element.getAttribute("href") - -def need_content(show_content, tagname): - return show_content in ("content", "description") and tagname in ("content", "description") - -# Error classes. - -class FeedError(Exception): - pass - -class FeedMissingError(FeedError): - pass - -class FeedContentTypeError(FeedError): - pass - -# Feed retrieval. - -def getUpdates(request, feed_url, max_entries, show_content): - - """ - Using the given 'request', retrieve from 'feed_url' up to the given number - 'max_entries' of update entries. The 'show_content' parameter can indicate - that a "summary" is to be obtained for each update, that the "content" of - each update is to be obtained (falling back to a summary if no content is - provided), or no content (indicated by a false value) is to be obtained. - - A tuple of the form ((feed_type, channel_title, channel_link), updates) is - returned. - """ - - feed_updates = [] - - # Obtain the resource, using a cached version if appropriate. - - max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300")) - data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age) - if not data: - raise FeedMissingError - - # Interpret the cached feed. - - feed = StringIO(data) - _url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed) - - if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"): - raise FeedContentTypeError - - try: - # Parse each node from the feed. - - channel_title = channel_link = None - - feed_type = None - update = None - in_source = False - - events = xml.dom.pulldom.parse(feed) - - for event, value in events: - - if not in_source and event == xml.dom.pulldom.START_ELEMENT: - tagname = value.localName - - # Detect the feed type and items. - - if tagname == "feed" and value.namespaceURI == ATOM_NS: - feed_type = "atom" - - elif tagname == "rss": - feed_type = "rss" - - # Detect items. - - elif feed_type == "rss" and tagname == "item" or \ - feed_type == "atom" and tagname == "entry": - - update = Update() - - # Detect source declarations. - - elif feed_type == "atom" and tagname == "source": - in_source = True - - # Handle item elements. - - elif tagname == "title": - events.expandNode(value) - if update: - update.title = text(value) - else: - channel_title = text(value) - - elif tagname == "link": - events.expandNode(value) - if update: - update.link = linktext(value, feed_type) - else: - channel_link = linktext(value, feed_type) - - elif show_content and ( - feed_type == "atom" and tagname in ("content", "summary") or - feed_type == "rss" and tagname == "description"): - - events.expandNode(value) - - # Obtain content where requested or, failing that, a - # summary. - - if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content): - if feed_type == "atom": - update.content_type = value.getAttribute("type") or "text" - - # Normalise the content types and extract the - # content. - - if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"): - update.content = children(value) - update.content_type = "application/xhtml+xml" - elif update.content_type in ("html", "text/html"): - update.content = text(value) - update.content_type = "text/html" - else: - update.content = text(value) - update.content_type = "text/plain" - else: - update.content_type = "text/html" - update.content = text(value) - - elif feed_type == "atom" and tagname == "updated" or \ - feed_type == "rss" and tagname == "pubDate": - - events.expandNode(value) - - if update: - if feed_type == "atom": - value = getDateTimeFromISO8601(text(value)) - else: - value = DateTime(parsedate(text(value))) - update.updated = value - - elif event == xml.dom.pulldom.END_ELEMENT: - tagname = value.localName - - if feed_type == "rss" and tagname == "item" or \ - feed_type == "atom" and tagname == "entry": - - feed_updates.append(update) - - update = None - - elif feed_type == "atom" and tagname == "source": - in_source = False - - finally: - feed.close() - - return (feed_type, channel_title, channel_link), feed_updates # The macro itself.