MoinShare (annotate MoinShare.py in bb86e3e374a3)

MoinShare

Annotated MoinShare.py

64:bb86e3e374a3

2014-04-03

Paul Boddie

Some tidying and commenting.

paul@0	1	# -- coding: iso-8859-1 --
paul@0	2	"""
paul@0	3	MoinMoin - MoinShare library
paul@0	4
paul@56	5	@copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>
paul@34	6	@copyright: 2003-2006 Edgewall Software
paul@34	7	@copyright: 2006 MoinMoin:AlexanderSchremmer
paul@0	8	@license: GNU GPL (v2 or later), see COPYING.txt for details.
paul@0	9	"""
paul@0	10
paul@17	11	from ContentTypeSupport import getContentPreferences
paul@58	12	from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \
paul@58	13	getDateTimeFromISO8601, DateTime
paul@0	14	from MoinSupport import *
paul@58	15	from MoinRemoteSupport import *
paul@51	16	from ItemSupport import ItemStore
paul@50	17	from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError
paul@54	18	from MoinMessageSupport import get_homedir, get_username_for_fingerprint
paul@37	19	from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup
paul@15	20	from MoinMoin import wikiutil
paul@33	21	from email.parser import Parser
paul@58	22	from email.utils import parsedate
paul@37	23	from codecs import getwriter
paul@58	24	import xml.dom.pulldom
paul@0	25
paul@25	26	try:
paul@25	27	from cStringIO import StringIO
paul@25	28	except ImportError:
paul@25	29	from StringIO import StringIO
paul@25	30
paul@16	31	_getFragments = getFragments
paul@0	32
paul@0	33	__version__ = "0.1"
paul@0	34
paul@58	35	ATOM_NS = "http://www.w3.org/2005/Atom"
paul@58	36
paul@58	37	# Utility functions.
paul@58	38
paul@58	39	def text(element):
paul@58	40	nodes = []
paul@58	41	for node in element.childNodes:
paul@58	42	if node.nodeType == node.TEXT_NODE:
paul@58	43	nodes.append(node.nodeValue)
paul@58	44	return "".join(nodes)
paul@58	45
paul@58	46	def children(element):
paul@58	47	nodes = []
paul@58	48	for node in element.childNodes:
paul@58	49	nodes.append(node.toxml())
paul@58	50	return "".join(nodes)
paul@58	51
paul@58	52	def unescape(text):
paul@58	53	return text.replace("<", "<").replace(">", ">").replace("&", "&")
paul@58	54
paul@58	55	def linktext(element, feed_type):
paul@58	56	if feed_type == "rss":
paul@58	57	return text(element)
paul@58	58	else:
paul@58	59	return element.getAttribute("href")
paul@58	60
paul@58	61	def need_content(show_content, tagname):
paul@58	62	return show_content in ("content", "description") and tagname in ("content", "description")
paul@58	63
paul@0	64	# More Moin 1.9 compatibility functions.
paul@0	65
paul@0	66	def has_member(request, groupname, username):
paul@0	67	if hasattr(request.dicts, "has_member"):
paul@0	68	return request.dicts.has_member(groupname, username)
paul@0	69	else:
paul@0	70	return username in request.dicts.get(groupname, [])
paul@0	71
paul@3	72	# Fragments employ a "moinshare" attribute.
paul@3	73
paul@3	74	fragment_attribute = "moinshare"
paul@2	75
paul@16	76	def getFragments(s):
paul@25	77
paul@25	78	"Return all fragments in 's' having the MoinShare fragment attribute."
paul@25	79
paul@2	80	fragments = []
paul@16	81	for format, attributes, body in _getFragments(s):
paul@16	82	if attributes.has_key(fragment_attribute):
paul@16	83	fragments.append((format, attributes, body))
paul@2	84	return fragments
paul@2	85
paul@9	86	def getPreferredOutputTypes(request, mimetypes):
paul@9	87
paul@9	88	"""
paul@9	89	Using the 'request', perform content negotiation, obtaining mimetypes common
paul@9	90	to the fragment (given by 'mimetypes') and the client (found in the Accept
paul@9	91	header).
paul@9	92	"""
paul@9	93
paul@9	94	accept = getHeader(request, "Accept", "HTTP")
paul@12	95	if accept:
paul@12	96	prefs = getContentPreferences(accept)
paul@12	97	return prefs.get_preferred_types(mimetypes)
paul@12	98	else:
paul@12	99	return mimetypes
paul@9	100
paul@9	101	def getUpdatedTime(metadata):
paul@9	102
paul@9	103	"""
paul@9	104	Return the last updated time based on the given 'metadata', using the
paul@9	105	current time if no explicit last modified time is specified.
paul@9	106	"""
paul@9	107
paul@9	108	# NOTE: We could attempt to get the last edit time of a fragment.
paul@9	109
paul@9	110	latest_timestamp = metadata.get("last-modified")
paul@9	111	if latest_timestamp:
paul@33	112	return latest_timestamp
paul@9	113	else:
paul@33	114	return getCurrentTime()
paul@9	115
paul@30	116	# Entry/update classes.
paul@30	117
paul@30	118	class Update:
paul@30	119
paul@30	120	"A feed update entry."
paul@30	121
paul@30	122	def __init__(self):
paul@30	123	self.content = None
paul@30	124	self.content_type = None
paul@30	125	self.updated = None
paul@54	126	self.author = None
paul@30	127
paul@64	128	# Message-related attributes.
paul@64	129
paul@64	130	self.parts = None
paul@64	131
paul@64	132	# Feed-related attributes.
paul@64	133
paul@64	134	self.title = None
paul@64	135	self.link = None
paul@64	136
paul@30	137	# Page-related attributes.
paul@30	138
paul@30	139	self.fragment = None
paul@30	140	self.preferred = None
paul@30	141
paul@64	142	# Store-related attributes.
paul@33	143
paul@34	144	self.message_number = None
paul@33	145
paul@64	146	# Store- and page-related attributes.
paul@34	147
paul@34	148	self.page = None
paul@34	149
paul@40	150	# Identification.
paul@40	151
paul@40	152	self.path = []
paul@40	153
paul@40	154	def unique_id(self):
paul@64	155
paul@64	156	"""
paul@64	157	A unique identifier used for anchors to parts of presented updates.
paul@64	158	"""
paul@64	159
paul@40	160	return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path)))
paul@40	161
paul@30	162	def __cmp__(self, other):
paul@30	163	if self.updated is None and other.updated is not None:
paul@30	164	return 1
paul@30	165	elif self.updated is not None and other.updated is None:
paul@30	166	return -1
paul@30	167	else:
paul@30	168	return cmp(self.updated, other.updated)
paul@30	169
paul@40	170	def copy(self, part_number=None):
paul@40	171	update = Update()
paul@40	172	update.title = self.title
paul@40	173	update.link = self.link
paul@40	174	update.updated = self.updated
paul@54	175	update.author = self.author
paul@40	176	update.fragment = self.fragment
paul@40	177	update.preferred = self.preferred
paul@40	178	update.message_number = self.message_number
paul@40	179	update.page = self.page
paul@40	180	update.path = self.path[:]
paul@40	181	if part_number is not None:
paul@40	182	update.path.append(part_number)
paul@40	183	return update
paul@40	184
paul@58	185	# Error classes.
paul@58	186
paul@58	187	class FeedError(Exception):
paul@58	188	pass
paul@58	189
paul@58	190	class FeedMissingError(FeedError):
paul@58	191	pass
paul@58	192
paul@58	193	class FeedContentTypeError(FeedError):
paul@58	194	pass
paul@58	195
paul@60	196	# Update retrieval from URLs.
paul@58	197
paul@58	198	def getUpdates(request, feed_url, max_entries, show_content):
paul@58	199
paul@58	200	"""
paul@58	201	Using the given 'request', retrieve from 'feed_url' up to the given number
paul@58	202	'max_entries' of update entries. The 'show_content' parameter can indicate
paul@58	203	that a "summary" is to be obtained for each update, that the "content" of
paul@58	204	each update is to be obtained (falling back to a summary if no content is
paul@58	205	provided), or no content (indicated by a false value) is to be obtained.
paul@58	206
paul@58	207	A tuple of the form ((feed_type, channel_title, channel_link), updates) is
paul@58	208	returned.
paul@58	209	"""
paul@58	210
paul@60	211	# Prevent local file access.
paul@60	212
paul@60	213	if feed_url.startswith("file:"):
paul@60	214	raise FeedMissingError
paul@58	215
paul@62	216	elif feed_url.startswith("imap"):
paul@62	217	reader = imapreader
paul@62	218
paul@62	219	else:
paul@62	220	reader = None
paul@62	221
paul@58	222	# Obtain the resource, using a cached version if appropriate.
paul@58	223
paul@58	224	max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))
paul@62	225	data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader)
paul@58	226	if not data:
paul@58	227	raise FeedMissingError
paul@58	228
paul@58	229	# Interpret the cached feed.
paul@58	230
paul@60	231	f = StringIO(data)
paul@60	232	try:
paul@60	233	_url, content_type, _encoding, _metadata = getCachedResourceMetadata(f)
paul@60	234
paul@60	235	if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"):
paul@60	236	return getUpdatesFromFeed(f, max_entries, show_content)
paul@62	237
paul@62	238	elif content_type == "multipart/mixed":
paul@62	239	return getUpdatesFromMailbox(f, max_entries, show_content, request)
paul@62	240
paul@60	241	else:
paul@60	242	raise FeedContentTypeError
paul@60	243
paul@60	244	finally:
paul@60	245	f.close()
paul@60	246
paul@60	247	# Update retrieval from feeds.
paul@58	248
paul@60	249	def getUpdatesFromFeed(feed, max_entries, show_content):
paul@60	250
paul@60	251	"""
paul@60	252	Retrieve from 'feed' up to the given number 'max_entries' of update entries.
paul@60	253	The 'show_content' parameter can indicate that a "summary" is to be obtained
paul@60	254	for each update, that the "content" of each update is to be obtained
paul@60	255	(falling back to a summary if no content is provided), or no content
paul@60	256	(indicated by a false value) is to be obtained.
paul@60	257
paul@60	258	A tuple of the form ((feed_type, channel_title, channel_link), updates) is
paul@60	259	returned.
paul@60	260	"""
paul@60	261
paul@60	262	feed_updates = []
paul@58	263
paul@62	264	# Parse each node from the feed.
paul@58	265
paul@62	266	channel_title = channel_link = None
paul@58	267
paul@62	268	feed_type = None
paul@62	269	update = None
paul@62	270	in_source = False
paul@58	271
paul@62	272	events = xml.dom.pulldom.parse(feed)
paul@58	273
paul@62	274	for event, value in events:
paul@58	275
paul@62	276	if not in_source and event == xml.dom.pulldom.START_ELEMENT:
paul@62	277	tagname = value.localName
paul@58	278
paul@62	279	# Detect the feed type and items.
paul@58	280
paul@62	281	if tagname == "feed" and value.namespaceURI == ATOM_NS:
paul@62	282	feed_type = "atom"
paul@58	283
paul@62	284	elif tagname == "rss":
paul@62	285	feed_type = "rss"
paul@58	286
paul@62	287	# Detect items.
paul@58	288
paul@62	289	elif feed_type == "rss" and tagname == "item" or \
paul@62	290	feed_type == "atom" and tagname == "entry":
paul@58	291
paul@62	292	update = Update()
paul@58	293
paul@62	294	# Detect source declarations.
paul@58	295
paul@62	296	elif feed_type == "atom" and tagname == "source":
paul@62	297	in_source = True
paul@58	298
paul@62	299	# Handle item elements.
paul@58	300
paul@62	301	elif tagname == "title":
paul@62	302	events.expandNode(value)
paul@62	303	if update:
paul@62	304	update.title = text(value)
paul@62	305	else:
paul@62	306	channel_title = text(value)
paul@58	307
paul@62	308	elif tagname == "link":
paul@62	309	events.expandNode(value)
paul@62	310	if update:
paul@62	311	update.link = linktext(value, feed_type)
paul@62	312	else:
paul@62	313	channel_link = linktext(value, feed_type)
paul@58	314
paul@62	315	elif show_content and (
paul@62	316	feed_type == "atom" and tagname in ("content", "summary") or
paul@62	317	feed_type == "rss" and tagname == "description"):
paul@58	318
paul@62	319	events.expandNode(value)
paul@58	320
paul@62	321	# Obtain content where requested or, failing that, a
paul@62	322	# summary.
paul@58	323
paul@62	324	if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):
paul@62	325	if feed_type == "atom":
paul@62	326	update.content_type = value.getAttribute("type") or "text"
paul@62	327
paul@62	328	# Normalise the content types and extract the
paul@62	329	# content.
paul@58	330
paul@62	331	if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):
paul@62	332	update.content = children(value)
paul@62	333	update.content_type = "application/xhtml+xml"
paul@62	334	elif update.content_type in ("html", "text/html"):
paul@62	335	update.content = text(value)
paul@62	336	update.content_type = "text/html"
paul@58	337	else:
paul@58	338	update.content = text(value)
paul@62	339	update.content_type = "text/plain"
paul@62	340	else:
paul@62	341	update.content_type = "text/html"
paul@62	342	update.content = text(value)
paul@58	343
paul@62	344	elif feed_type == "atom" and tagname == "updated" or \
paul@62	345	feed_type == "rss" and tagname == "pubDate":
paul@62	346
paul@62	347	events.expandNode(value)
paul@58	348
paul@62	349	if update:
paul@62	350	if feed_type == "atom":
paul@62	351	value = getDateTimeFromISO8601(text(value))
paul@62	352	else:
paul@62	353	value = DateTime(parsedate(text(value)))
paul@62	354	update.updated = value
paul@58	355
paul@62	356	elif event == xml.dom.pulldom.END_ELEMENT:
paul@62	357	tagname = value.localName
paul@58	358
paul@62	359	if feed_type == "rss" and tagname == "item" or \
paul@62	360	feed_type == "atom" and tagname == "entry":
paul@62	361
paul@62	362	feed_updates.append(update)
paul@58	363
paul@62	364	update = None
paul@58	365
paul@62	366	elif feed_type == "atom" and tagname == "source":
paul@62	367	in_source = False
paul@58	368
paul@58	369	return (feed_type, channel_title, channel_link), feed_updates
paul@58	370
paul@62	371	# Update retrieval from mailboxes and multipart messages.
paul@62	372
paul@62	373	def getUpdatesFromMailbox(feed, max_entries, show_content, request):
paul@62	374
paul@62	375	"""
paul@62	376	Retrieve from 'feed' up to the given number 'max_entries' of update entries.
paul@62	377	The 'show_content' parameter can indicate that a "summary" is to be obtained
paul@62	378	for each update, that the "content" of each update is to be obtained
paul@62	379	(falling back to a summary if no content is provided), or no content
paul@62	380	(indicated by a false value) is to be obtained.
paul@62	381
paul@62	382	A tuple of the form ((feed_type, channel_title, channel_link), updates) is
paul@62	383	returned.
paul@62	384	"""
paul@62	385
paul@62	386	mailbox = Parser().parse(feed)
paul@62	387
paul@62	388	feed_updates = []
paul@62	389
paul@62	390	# Parse each message from the feed as a separate update.
paul@62	391
paul@62	392	for message_number, part in enumerate(mailbox.get_payload()):
paul@62	393	update = Update()
paul@64	394	update.updated = getDateTimeFromRFC2822(part.get("date"))
paul@62	395	update.title = part.get("subject", "Update #%d" % message_number)
paul@62	396	update.message_number = message_number
paul@62	397
paul@62	398	update.content, update.content_type, update.parts, actual_author = \
paul@62	399	getUpdateContentFromPart(part, request)
paul@62	400
paul@62	401	if actual_author:
paul@62	402	update.author = actual_author
paul@62	403
paul@62	404	feed_updates.append(update)
paul@62	405
paul@62	406	return ("mbox", None, None), feed_updates
paul@62	407
paul@30	408	# Update retrieval from pages.
paul@30	409
paul@30	410	def getUpdatesFromPage(page, request):
paul@25	411
paul@25	412	"""
paul@30	413	Get updates from the given 'page' using the 'request'. A list of update
paul@30	414	objects is returned.
paul@25	415	"""
paul@25	416
paul@25	417	updates = []
paul@25	418
paul@25	419	# NOTE: Use the updated datetime from the page for updates.
paul@25	420	# NOTE: The published and updated details would need to be deduced from
paul@25	421	# NOTE: the page history instead of being taken from the page as a whole.
paul@25	422
paul@25	423	metadata = getMetadata(page)
paul@25	424	updated = getUpdatedTime(metadata)
paul@25	425
paul@25	426	# Get the fragment regions for the page.
paul@25	427
paul@25	428	for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())):
paul@25	429
paul@33	430	update = Update()
paul@33	431
paul@25	432	# Produce a fragment identifier.
paul@25	433	# NOTE: Choose a more robust identifier where none is explicitly given.
paul@25	434
paul@30	435	update.fragment = attributes.get("fragment", str(n))
paul@30	436	update.title = attributes.get("summary", "Update #%d" % n)
paul@25	437
paul@25	438	# Get the preferred content types available for the fragment.
paul@25	439
paul@30	440	update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format))
paul@25	441
paul@25	442	# Try and obtain some suitable content for the entry.
paul@25	443	# NOTE: Could potentially get a summary for the fragment.
paul@25	444
paul@30	445	update.content = None
paul@25	446
paul@30	447	if "text/html" in update.preferred:
paul@25	448	parser_cls = getParserClass(request, format)
paul@25	449
paul@25	450	if format == "html":
paul@30	451	update.content = body
paul@39	452	elif hasattr(parser_cls, "formatForOutputType"):
paul@39	453	update.content = formatTextForOutputType(body, request, parser_cls, "text/html")
paul@25	454	else:
paul@25	455	fmt = request.html_formatter
paul@25	456	fmt.setPage(page)
paul@30	457	update.content = formatText(body, request, fmt, parser_cls)
paul@30	458
paul@32	459	update.content_type = "text/html"
paul@25	460
paul@34	461	update.page = page
paul@37	462
paul@37	463	# NOTE: The anchor would be supported in the page, but this requires
paul@37	464	# NOTE: formatter modifications for the regions providing updates.
paul@37	465
paul@37	466	update.link = page.url(request, anchor=update.fragment)
paul@30	467	update.updated = updated
paul@30	468
paul@30	469	updates.append(update)
paul@25	470
paul@25	471	return updates
paul@25	472
paul@33	473	# Update retrieval from message stores.
paul@33	474
paul@33	475	def getUpdatesFromStore(page, request):
paul@33	476
paul@33	477	"""
paul@33	478	Get updates from the message store associated with the given 'page' using
paul@33	479	the 'request'. A list of update objects is returned.
paul@33	480	"""
paul@33	481
paul@33	482	updates = []
paul@33	483
paul@33	484	metadata = getMetadata(page)
paul@33	485	updated = getUpdatedTime(metadata)
paul@33	486
paul@33	487	store = ItemStore(page, "messages", "message-locks")
paul@33	488
paul@57	489	keys = store.keys()
paul@57	490	keys.sort()
paul@57	491
paul@57	492	for key in keys:
paul@57	493	message_text = store[key]
paul@57	494	update = getUpdateFromMessageText(message_text, key, request)
paul@34	495	update.page = page
paul@33	496	updates.append(update)
paul@33	497
paul@33	498	return updates
paul@33	499
paul@47	500	def getUpdateFromMessageText(message_text, message_number, request):
paul@46	501
paul@46	502	"Return an update for the given 'message_text' and 'message_number'."
paul@46	503
paul@46	504	update = Update()
paul@56	505	message = Parser().parsestr(message_text)
paul@46	506
paul@46	507	# Produce a fragment identifier.
paul@46	508
paul@64	509	update.updated = getDateTimeFromRFC2822(message.get("date"))
paul@46	510	update.title = message.get("subject", "Update #%d" % message_number)
paul@54	511	update.author = message.get("moin-user")
paul@46	512
paul@46	513	update.message_number = message_number
paul@46	514
paul@54	515	update.content, update.content_type, update.parts, actual_author = \
paul@54	516	getUpdateContentFromPart(message, request)
paul@54	517
paul@54	518	if actual_author:
paul@54	519	update.author = actual_author
paul@54	520
paul@46	521	return update
paul@46	522
paul@47	523	def getUpdateContentFromPart(part, request):
paul@40	524
paul@40	525	"""
paul@54	526	Return decoded content, the content type, any subparts, and any author
paul@54	527	identity in a tuple for a given 'part'.
paul@40	528	"""
paul@40	529
paul@40	530	# Determine whether the part has several representations.
paul@40	531
paul@40	532	# For a single part, use it as the update content.
paul@40	533
paul@40	534	if not part.is_multipart():
paul@40	535	content, content_type = getPartContent(part)
paul@54	536	return content, content_type, None, None
paul@40	537
paul@40	538	# For a collection of related parts, use the first as the update content
paul@40	539	# and assume that the formatter will reference the other parts.
paul@40	540
paul@40	541	elif part.get_content_subtype() == "related":
paul@40	542	main_part = part.get_payload()[0]
paul@40	543	content, content_type = getPartContent(main_part)
paul@54	544	return content, content_type, [main_part], None
paul@40	545
paul@46	546	# Encrypted content cannot be meaningfully separated.
paul@46	547
paul@46	548	elif part.get_content_subtype() == "encrypted":
paul@50	549	try:
paul@54	550	part, author = getDecryptedParts(part, request)
paul@54	551	content, content_type, parts, _author = getUpdateContentFromPart(part, request)
paul@54	552	return content, content_type, parts, author
paul@50	553	except MoinMessageError:
paul@54	554	return None, part.get_content_type(), part.get_payload(), None
paul@46	555
paul@40	556	# Otherwise, just obtain the parts for separate display.
paul@40	557
paul@40	558	else:
paul@54	559	return None, part.get_content_type(), part.get_payload(), None
paul@40	560
paul@47	561	def getDecryptedParts(part, request):
paul@47	562
paul@47	563	"Decrypt the given 'part', returning the decoded content."
paul@47	564
paul@47	565	homedir = get_homedir(request)
paul@47	566	gpg = GPG(homedir)
paul@47	567
paul@47	568	# Decrypt the part.
paul@47	569
paul@47	570	if is_encrypted(part):
paul@47	571	text = gpg.decryptMessage(part)
paul@56	572	part = Parser().parsestr(text)
paul@47	573
paul@47	574	# Extract any signature details.
paul@47	575
paul@47	576	if is_signed(part):
paul@47	577	result = gpg.verifyMessage(part)
paul@47	578	if result:
paul@47	579	fingerprint, identity, content = result
paul@54	580	return content, get_username_for_fingerprint(request, fingerprint)
paul@47	581
paul@54	582	return part, None
paul@47	583
paul@40	584	def getPartContent(part):
paul@40	585
paul@40	586	"Decode the 'part', returning the decoded payload and the content type."
paul@40	587
paul@40	588	charset = part.get_content_charset()
paul@40	589	payload = part.get_payload(decode=True)
paul@40	590	return (charset and unicode(payload, charset) or payload), part.get_content_type()
paul@40	591
paul@47	592	def getUpdateFromPart(parent, part, part_number, request):
paul@40	593
paul@40	594	"Using the 'parent' update, return an update object for the given 'part'."
paul@40	595
paul@40	596	update = parent.copy(part_number)
paul@55	597	update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request)
paul@40	598	return update
paul@40	599
paul@47	600	def getUpdatesForFormatting(update, request):
paul@46	601
paul@46	602	"Get a list of updates for formatting given 'update'."
paul@46	603
paul@46	604	updates = []
paul@46	605
paul@46	606	# Handle multipart/alternative and other non-related multiparts.
paul@46	607
paul@46	608	if update.parts:
paul@46	609	for n, part in enumerate(update.parts):
paul@47	610	update_part = getUpdateFromPart(update, part, n, request)
paul@47	611	updates += getUpdatesForFormatting(update_part, request)
paul@46	612	else:
paul@46	613	updates.append(update)
paul@46	614
paul@46	615	return updates
paul@46	616
paul@46	617	# Update formatting.
paul@46	618
paul@46	619	def getFormattedUpdate(update, request, fmt):
paul@46	620
paul@46	621	"""
paul@46	622	Return the formatted form of the given 'update' using the given 'request'
paul@46	623	and 'fmt'.
paul@46	624	"""
paul@46	625
paul@46	626	# NOTE: Some control over the HTML and XHTML should be exercised.
paul@46	627
paul@46	628	if update.content:
paul@46	629	if update.content_type == "text/html" and update.message_number is not None:
paul@46	630	parsers = [get_make_parser(update.page, update.message_number)]
paul@46	631	else:
paul@46	632	parsers = getParsersForContentType(request.cfg, update.content_type)
paul@46	633
paul@46	634	if parsers:
paul@46	635	for parser_cls in parsers:
paul@46	636	if hasattr(parser_cls, "formatForOutputType"):
paul@46	637	return formatTextForOutputType(update.content, request, parser_cls, "text/html")
paul@46	638	else:
paul@46	639	return formatText(update.content, request, fmt, parser_cls=parser_cls)
paul@46	640	break
paul@46	641	else:
paul@46	642	return None
paul@46	643	else:
paul@46	644	return None
paul@46	645
paul@46	646	def formatUpdate(update, request, fmt):
paul@46	647
paul@46	648	"Format the given 'update' using the given 'request' and 'fmt'."
paul@46	649
paul@46	650	result = []
paul@46	651	append = result.append
paul@46	652
paul@47	653	updates = getUpdatesForFormatting(update, request)
paul@46	654	single = len(updates) == 1
paul@46	655
paul@46	656	# Format some navigation tabs.
paul@64	657	# This only occurs for multipart updates.
paul@46	658
paul@46	659	if not single:
paul@46	660	append(fmt.div(on=1, css_class="moinshare-alternatives"))
paul@46	661
paul@46	662	first = True
paul@46	663
paul@46	664	for update_part in updates:
paul@46	665	append(fmt.url(1, "#%s" % update_part.unique_id()))
paul@46	666	append(fmt.text(update_part.content_type))
paul@46	667	append(fmt.url(0))
paul@46	668
paul@46	669	first = False
paul@46	670
paul@46	671	append(fmt.div(on=0))
paul@46	672
paul@46	673	# Format the content.
paul@46	674
paul@46	675	first = True
paul@46	676
paul@46	677	for update_part in updates:
paul@46	678
paul@46	679	# Encapsulate each alternative if many exist.
paul@46	680
paul@46	681	if not single:
paul@46	682	css_class = first and "moinshare-default" or "moinshare-other"
paul@46	683	append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id()))
paul@46	684
paul@46	685	# Include the content.
paul@46	686
paul@46	687	append(formatUpdatePart(update_part, request, fmt))
paul@46	688
paul@46	689	if not single:
paul@46	690	append(fmt.div(on=0))
paul@46	691
paul@46	692	first = False
paul@46	693
paul@46	694	return "".join(result)
paul@46	695
paul@46	696	def formatUpdatePart(update, request, fmt):
paul@46	697
paul@46	698	"Format the given 'update' using the given 'request' and 'fmt'."
paul@46	699
paul@46	700	_ = request.getText
paul@46	701
paul@46	702	result = []
paul@46	703	append = result.append
paul@46	704
paul@46	705	# Encapsulate the content.
paul@46	706
paul@46	707	append(fmt.div(on=1, css_class="moinshare-content"))
paul@46	708	text = getFormattedUpdate(update, request, fmt)
paul@46	709	if text:
paul@46	710	append(text)
paul@46	711	else:
paul@46	712	append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type))
paul@46	713	append(fmt.div(on=0))
paul@46	714
paul@46	715	return "".join(result)
paul@46	716
paul@31	717	# Source management.
paul@31	718
paul@31	719	def getUpdateSources(pagename, request):
paul@31	720
paul@31	721	"Return the update sources from the given 'pagename' using the 'request'."
paul@31	722
paul@31	723	sources = {}
paul@31	724
paul@31	725	source_definitions = getWikiDict(pagename, request)
paul@31	726
paul@31	727	if source_definitions:
paul@31	728	for name, value in source_definitions.items():
paul@31	729	sources[name] = getSourceParameters(value)
paul@31	730
paul@31	731	return sources
paul@31	732
paul@31	733	def getSourceParameters(source_definition):
paul@31	734
paul@31	735	"Return the parameters from the given 'source_definition' string."
paul@31	736
paul@43	737	return parseDictEntry(source_definition, ("type", "location"))
paul@31	738
paul@34	739	# HTML parsing support.
paul@34	740
paul@34	741	class IncomingHTMLSanitizer(HTMLSanitizer):
paul@34	742
paul@34	743	"An HTML parser that rewrites references to attachments."
paul@34	744
paul@34	745	def __init__(self, out, request, page, message_number):
paul@34	746	HTMLSanitizer.__init__(self, out)
paul@34	747	self.request = request
paul@34	748	self.message_number = message_number
paul@34	749	self.page = page
paul@34	750
paul@34	751	def rewrite_reference(self, ref):
paul@34	752	if ref.startswith("cid:"):
paul@34	753	part = ref[len("cid:"):]
paul@34	754	action_link = self.page.url(self.request, {
paul@34	755	"action" : "ReadMessage", "doit" : "1",
paul@34	756	"message" : self.message_number, "part" : part
paul@34	757	})
paul@34	758	return action_link
paul@34	759	else:
paul@34	760	return ref
paul@34	761
paul@34	762	def handle_starttag(self, tag, attrs):
paul@34	763	new_attrs = []
paul@34	764	for attrname, attrvalue in attrs:
paul@34	765	if attrname in self.uri_attrs:
paul@34	766	new_attrs.append((attrname, self.rewrite_reference(attrvalue)))
paul@34	767	else:
paul@34	768	new_attrs.append((attrname, attrvalue))
paul@34	769	HTMLSanitizer.handle_starttag(self, tag, new_attrs)
paul@34	770
paul@34	771	class IncomingMarkup(Markup):
paul@34	772
paul@34	773	"A special markup processor for incoming HTML."
paul@34	774
paul@34	775	def sanitize(self, request, page, message_number):
paul@37	776	out = getwriter("utf-8")(StringIO())
paul@34	777	sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)
paul@34	778	sanitizer.feed(self.stripentities(keepxmlentities=True))
paul@37	779	return IncomingMarkup(unicode(out.getvalue(), "utf-8"))
paul@34	780
paul@34	781	class IncomingHTMLParser:
paul@34	782
paul@34	783	"Filters and rewrites incoming HTML content."
paul@34	784
paul@34	785	def __init__(self, raw, request, **kw):
paul@34	786	self.raw = raw
paul@34	787	self.request = request
paul@34	788	self.message_number = None
paul@34	789	self.page = None
paul@34	790
paul@34	791	def format(self, formatter, **kw):
paul@34	792
paul@34	793	"Send the text."
paul@34	794
paul@34	795	try:
paul@34	796	self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))
paul@34	797	except HTMLParseError, e:
paul@34	798	self.request.write(formatter.sysmsg(1) +
paul@34	799	formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,
paul@34	800	self.raw.splitlines()[e.lineno - 1].strip())) +
paul@34	801	formatter.sysmsg(0))
paul@34	802
paul@34	803	class MakeIncomingHTMLParser:
paul@34	804
paul@34	805	"A class that makes parsers configured for messages."
paul@34	806
paul@34	807	def __init__(self, page, message_number):
paul@34	808
paul@34	809	"Initialise with state that is used to configure instantiated parsers."
paul@34	810
paul@34	811	self.message_number = message_number
paul@34	812	self.page = page
paul@34	813
paul@34	814	def __call__(self, args, *kw):
paul@34	815	parser = IncomingHTMLParser(args, *kw)
paul@34	816	parser.message_number = self.message_number
paul@34	817	parser.page = self.page
paul@34	818	return parser
paul@34	819
paul@34	820	def get_make_parser(page, message_number):
paul@34	821
paul@34	822	"""
paul@34	823	Return a callable that will return a parser configured for the message from
paul@34	824	the given 'page' with the given 'message_number'.
paul@34	825	"""
paul@34	826
paul@34	827	return MakeIncomingHTMLParser(page, message_number)
paul@34	828
paul@0	829	# vim: tabstop=4 expandtab shiftwidth=4