MoinShare (annotate macros/SharedContent.py in 4967a73df0a5)

MoinShare

Annotated macros/SharedContent.py

29:4967a73df0a5

2013-05-14

Paul Boddie

Added support for getting content from RSS 2.0 feed entries.

paul@13	1	# -- coding: iso-8859-1 --
paul@13	2	"""
paul@13	3	MoinMoin - SharedContent macro, based on the FeedReader macro
paul@13	4
paul@18	5	@copyright: 2008, 2012, 2013 by Paul Boddie <paul@boddie.org.uk>
paul@13	6	@license: GNU GPL (v2 or later), see COPYING.txt for details.
paul@13	7	"""
paul@13	8
paul@24	9	from DateSupport import getDateTimeFromISO8601, DateTime
paul@13	10	from MoinMoin.Page import Page
paul@14	11	from MoinRemoteSupport import *
paul@20	12	from MoinSupport import parseMacroArguments
paul@22	13	from email.utils import parsedate
paul@13	14	import xml.dom.pulldom
paul@13	15
paul@14	16	try:
paul@14	17	from cStringIO import StringIO
paul@14	18	except ImportError:
paul@14	19	from StringIO import StringIO
paul@14	20
paul@13	21	Dependencies = ["time"]
paul@13	22
paul@13	23	MAX_ENTRIES = 5
paul@13	24	ATOM_NS = "http://www.w3.org/2005/Atom"
paul@13	25
paul@22	26	# Utility functions.
paul@22	27
paul@13	28	def text(element):
paul@13	29	nodes = []
paul@13	30	for node in element.childNodes:
paul@13	31	if node.nodeType == node.TEXT_NODE:
paul@13	32	nodes.append(node.nodeValue)
paul@13	33	return "".join(nodes)
paul@13	34
paul@20	35	def unescape(text):
paul@20	36	return text.replace("<", "<").replace(">", ">").replace("&", "&")
paul@20	37
paul@13	38	def linktext(element, feed_type):
paul@13	39	if feed_type == "rss":
paul@13	40	return text(element)
paul@13	41	else:
paul@13	42	return element.getAttribute("href")
paul@13	43
paul@29	44	def need_content(show_content, tagname):
paul@29	45	return show_content in ("content", "description") and tagname in ("content", "description")
paul@29	46
paul@22	47	# Error classes.
paul@22	48
paul@22	49	class FeedError(Exception):
paul@22	50	pass
paul@22	51
paul@22	52	class FeedMissingError(FeedError):
paul@22	53	pass
paul@13	54
paul@22	55	class FeedContentTypeError(FeedError):
paul@22	56	pass
paul@22	57
paul@22	58	# Entry/update classes.
paul@22	59
paul@22	60	class Update:
paul@22	61
paul@22	62	"A feed update entry."
paul@20	63
paul@22	64	def __init__(self):
paul@22	65	self.title = None
paul@22	66	self.link = None
paul@22	67	self.content = None
paul@22	68	self.content_type = None
paul@22	69	self.updated = None
paul@20	70
paul@22	71	def __cmp__(self, other):
paul@22	72	if self.updated is None and other.updated is not None:
paul@22	73	return 1
paul@22	74	elif self.updated is not None and other.updated is None:
paul@22	75	return -1
paul@22	76	else:
paul@22	77	return cmp(self.updated, other.updated)
paul@22	78
paul@22	79	# Feed retrieval.
paul@20	80
paul@28	81	def getUpdates(request, feed_url, max_entries, show_content):
paul@22	82
paul@22	83	"""
paul@22	84	Using the given 'request', retrieve from 'feed_url' up to the given number
paul@28	85	'max_entries' of update entries. The 'show_content' parameter can indicate
paul@28	86	that a "summary" is to be obtained for each update, that the "content" of
paul@28	87	each update is to be obtained (falling back to a summary if no content is
paul@28	88	provided), or no content (indicated by a false value) is to be obtained.
paul@22	89
paul@22	90	A tuple of the form ((feed_type, channel_title, channel_link), updates) is
paul@22	91	returned.
paul@22	92	"""
paul@22	93
paul@22	94	feed_updates = []
paul@13	95
paul@14	96	# Obtain the resource, using a cached version if appropriate.
paul@13	97
paul@14	98	max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))
paul@14	99	data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age)
paul@14	100	if not data:
paul@22	101	raise FeedMissingError
paul@22	102
paul@22	103	# Interpret the cached feed.
paul@14	104
paul@14	105	feed = StringIO(data)
paul@20	106	_url, content_type, _encoding, _metadata = getCachedResourceMetadata(feed)
paul@20	107
paul@27	108	if content_type not in ("application/atom+xml", "application/rss+xml", "application/xml"):
paul@22	109	raise FeedContentTypeError
paul@13	110
paul@13	111	try:
paul@13	112	# Parse each node from the feed.
paul@13	113
paul@13	114	channel_title = channel_link = None
paul@13	115
paul@13	116	feed_type = None
paul@22	117	update = None
paul@27	118	in_source = False
paul@13	119
paul@13	120	events = xml.dom.pulldom.parse(feed)
paul@13	121
paul@13	122	for event, value in events:
paul@13	123
paul@27	124	if not in_source and event == xml.dom.pulldom.START_ELEMENT:
paul@13	125	tagname = value.localName
paul@13	126
paul@13	127	# Detect the feed type and items.
paul@13	128
paul@13	129	if tagname == "feed" and value.namespaceURI == ATOM_NS:
paul@13	130	feed_type = "atom"
paul@13	131
paul@13	132	elif tagname == "rss":
paul@13	133	feed_type = "rss"
paul@13	134
paul@13	135	# Detect items.
paul@13	136
paul@13	137	elif feed_type == "rss" and tagname == "item" or \
paul@13	138	feed_type == "atom" and tagname == "entry":
paul@13	139
paul@22	140	update = Update()
paul@13	141
paul@27	142	# Detect source declarations.
paul@27	143
paul@27	144	elif feed_type == "atom" and tagname == "source":
paul@27	145	in_source = True
paul@27	146
paul@27	147	# Handle item elements.
paul@27	148
paul@13	149	elif tagname == "title":
paul@13	150	events.expandNode(value)
paul@22	151	if update:
paul@22	152	update.title = text(value)
paul@13	153	else:
paul@22	154	channel_title = text(value)
paul@13	155
paul@13	156	elif tagname == "link":
paul@13	157	events.expandNode(value)
paul@22	158	if update:
paul@22	159	update.link = linktext(value, feed_type)
paul@13	160	else:
paul@22	161	channel_link = linktext(value, feed_type)
paul@13	162
paul@29	163	elif show_content and (
paul@29	164	feed_type == "atom" and tagname in ("content", "summary") or
paul@29	165	feed_type == "rss" and tagname == "description"):
paul@29	166
paul@20	167	events.expandNode(value)
paul@28	168
paul@28	169	# Obtain content where requested or, failing that, a
paul@28	170	# summary.
paul@28	171
paul@29	172	if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):
paul@29	173	if feed_type == "atom":
paul@29	174	update.content_type = value.getAttribute("type") or "text"
paul@29	175	if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):
paul@29	176	update.content = value.toxml()
paul@29	177	else:
paul@29	178	update.content = text(value)
paul@27	179	else:
paul@29	180	update.content_type = "html"
paul@27	181	update.content = text(value)
paul@22	182
paul@22	183	elif feed_type == "atom" and tagname == "updated" or \
paul@22	184	feed_type == "rss" and tagname == "pubDate":
paul@29	185
paul@22	186	events.expandNode(value)
paul@22	187
paul@22	188	if update:
paul@22	189	if feed_type == "atom":
paul@24	190	value = getDateTimeFromISO8601(text(value))
paul@22	191	else:
paul@22	192	value = DateTime(parsedate(text(value)))
paul@22	193	update.updated = value
paul@20	194
paul@13	195	elif event == xml.dom.pulldom.END_ELEMENT:
paul@13	196	tagname = value.localName
paul@13	197
paul@13	198	if feed_type == "rss" and tagname == "item" or \
paul@13	199	feed_type == "atom" and tagname == "entry":
paul@13	200
paul@23	201	feed_updates.append(update)
paul@20	202
paul@22	203	update = None
paul@13	204
paul@27	205	elif feed_type == "atom" and tagname == "source":
paul@27	206	in_source = False
paul@27	207
paul@22	208	finally:
paul@22	209	feed.close()
paul@22	210
paul@22	211	return (feed_type, channel_title, channel_link), feed_updates
paul@22	212
paul@22	213	# The macro itself.
paul@22	214
paul@22	215	def execute(macro, args):
paul@22	216	request = macro.request
paul@22	217	fmt = macro.formatter
paul@22	218	_ = request.getText
paul@22	219
paul@22	220	feed_urls = []
paul@22	221	show_content = None
paul@22	222	max_entries = None
paul@22	223
paul@22	224	for arg, value in parseMacroArguments(args):
paul@22	225	if arg == "url":
paul@22	226	feed_urls.append(value)
paul@22	227	elif arg == "show":
paul@28	228	show_content = value.lower()
paul@22	229	elif arg == "limit":
paul@22	230	try:
paul@22	231	max_entries = int(value)
paul@22	232	except ValueError:
paul@22	233	return fmt.text(_("SharedContent: limit must be set to the maximum number of entries to be shown"))
paul@22	234
paul@22	235	if not feed_urls:
paul@22	236	return fmt.text(_("SharedContent: a feed URL must be specified"))
paul@22	237
paul@22	238	show_content = show_content or False
paul@22	239	max_entries = max_entries or MAX_ENTRIES
paul@22	240
paul@23	241	# Retrieve updates from feeds, classifying them as missing or bad and
paul@23	242	# excluding them if appropriate.
paul@23	243
paul@22	244	updates = []
paul@22	245	feeds = []
paul@22	246	missing = []
paul@22	247	bad_content = []
paul@13	248
paul@22	249	for feed_url in feed_urls:
paul@22	250	try:
paul@28	251	feed_info, feed_updates = getUpdates(request, feed_url, max_entries, show_content)
paul@22	252	updates += feed_updates
paul@26	253	feeds.append((feed_url, feed_info))
paul@22	254	except FeedMissingError:
paul@22	255	missing.append(feed_url)
paul@22	256	except FeedContentTypeError:
paul@22	257	bad_content.append(feed_url)
paul@22	258
paul@22	259	output = []
paul@22	260	append = output.append
paul@22	261
paul@22	262	# Show the updates.
paul@22	263
paul@22	264	if not show_content:
paul@22	265	append(fmt.bullet_list(on=1))
paul@22	266
paul@22	267	# NOTE: Permit configurable sorting.
paul@22	268
paul@22	269	updates.sort()
paul@22	270	updates.reverse()
paul@22	271
paul@23	272	# Truncate the number of updates to the maximum number.
paul@23	273
paul@23	274	updates = updates[:max_entries]
paul@23	275
paul@22	276	for update in updates:
paul@22	277
paul@22	278	# Emit content where appropriate.
paul@27	279	# NOTE: HTML and XHTML should be sanitised.
paul@22	280
paul@22	281	if show_content:
paul@22	282	append(fmt.div(on=1, css_class="moinshare-update"))
paul@24	283	append(fmt.div(on=1, css_class="moinshare-content"))
paul@27	284	if update.content:
paul@27	285	if update.content_type in ("html", "text/html"):
paul@27	286	append(fmt.rawHTML(unescape(update.content)))
paul@27	287	elif update.content_type in ("xhtml", "application/xhtml+xml"):
paul@27	288	append(fmt.rawHTML(update.content))
paul@27	289	elif update.content_type in ("text", "text/plain"):
paul@27	290	append(fmt.text(update.content))
paul@22	291	append(fmt.div(on=0))
paul@24	292	append(fmt.div(on=1, css_class="moinshare-date"))
paul@24	293	append(fmt.text(str(update.updated)))
paul@24	294	append(fmt.div(on=0))
paul@24	295	append(fmt.div(on=0))
paul@22	296
paul@22	297	# Or emit title and link information for items.
paul@22	298
paul@22	299	elif update.title and update.link:
paul@22	300	append(fmt.listitem(on=1, css_class="moinshare-update"))
paul@22	301	append(fmt.url(on=1, href=update.link))
paul@22	302	append(fmt.icon('www'))
paul@22	303	append(fmt.text(" " + update.title))
paul@22	304	append(fmt.url(on=0))
paul@22	305	append(fmt.listitem(on=0))
paul@22	306
paul@22	307	if not show_content:
paul@22	308	append(fmt.bullet_list(on=0))
paul@22	309
paul@22	310	# Show the feeds.
paul@22	311
paul@26	312	for feed_url, (feed_type, channel_title, channel_link) in feeds:
paul@13	313	if channel_title and channel_link:
paul@22	314	append(fmt.paragraph(on=1, css_class="moinshare-feed"))
paul@22	315	append(fmt.url(on=1, href=channel_link))
paul@22	316	append(fmt.text(channel_title))
paul@13	317	append(fmt.url(on=0))
paul@13	318	append(fmt.text(" "))
paul@13	319	append(fmt.url(on=1, href=feed_url))
paul@13	320	append(fmt.icon('rss'))
paul@13	321	append(fmt.url(on=0))
paul@13	322	append(fmt.paragraph(on=0))
paul@13	323
paul@22	324	# Show errors.
paul@22	325
paul@22	326	for feed_url in missing:
paul@22	327	append(fmt.paragraph(on=1, css_class="moinshare-missing-feed-error"))
paul@22	328	append(fmt.text(_("SharedContent: updates could not be retrieved for %s") % feed_url))
paul@22	329	append(fmt.paragraph(on=0))
paul@22	330
paul@22	331	for feed_url in bad_content:
paul@22	332	append(fmt.paragraph(on=1, css_class="moinshare-content-type-feed-error"))
paul@22	333	return fmt.text(_("SharedContent: updates for %s were not provided in Atom or RSS format") % feed_url)
paul@22	334	append(fmt.paragraph(on=0))
paul@13	335
paul@13	336	return ''.join(output)
paul@13	337
paul@13	338	# vim: tabstop=4 expandtab shiftwidth=4