MoinShare (annotate MoinShare.py in bca53f2afce8)

MoinShare

Annotated MoinShare.py

63:bca53f2afce8

2014-04-03

Paul Boddie

Added a plain text parser, useful for e-mail messages. Fixed docstrings in the PGP keys parser.

paul@0	1	# -- coding: iso-8859-1 --
paul@0	2	"""
paul@0	3	MoinMoin - MoinShare library
paul@0	4
paul@56	5	@copyright: 2011, 2012, 2013, 2014 by Paul Boddie <paul@boddie.org.uk>
paul@34	6	@copyright: 2003-2006 Edgewall Software
paul@34	7	@copyright: 2006 MoinMoin:AlexanderSchremmer
paul@0	8	@license: GNU GPL (v2 or later), see COPYING.txt for details.
paul@0	9	"""
paul@0	10
paul@17	11	from ContentTypeSupport import getContentPreferences
paul@58	12	from DateSupport import getCurrentTime, getDateTimeFromRFC2822, \
paul@58	13	getDateTimeFromISO8601, DateTime
paul@0	14	from MoinSupport import *
paul@58	15	from MoinRemoteSupport import *
paul@51	16	from ItemSupport import ItemStore
paul@50	17	from MoinMessage import GPG, is_encrypted, is_signed, MoinMessageError
paul@54	18	from MoinMessageSupport import get_homedir, get_username_for_fingerprint
paul@37	19	from MoinMoin.support.htmlmarkup import HTMLParseError, HTMLSanitizer, Markup
paul@15	20	from MoinMoin import wikiutil
paul@33	21	from email.parser import Parser
paul@58	22	from email.utils import parsedate
paul@37	23	from codecs import getwriter
paul@58	24	import xml.dom.pulldom
paul@0	25
paul@25	26	try:
paul@25	27	from cStringIO import StringIO
paul@25	28	except ImportError:
paul@25	29	from StringIO import StringIO
paul@25	30
paul@16	31	_getFragments = getFragments
paul@0	32
paul@0	33	__version__ = "0.1"
paul@0	34
paul@58	35	ATOM_NS = "http://www.w3.org/2005/Atom"
paul@58	36
paul@58	37	# Utility functions.
paul@58	38
paul@58	39	def text(element):
paul@58	40	nodes = []
paul@58	41	for node in element.childNodes:
paul@58	42	if node.nodeType == node.TEXT_NODE:
paul@58	43	nodes.append(node.nodeValue)
paul@58	44	return "".join(nodes)
paul@58	45
paul@58	46	def children(element):
paul@58	47	nodes = []
paul@58	48	for node in element.childNodes:
paul@58	49	nodes.append(node.toxml())
paul@58	50	return "".join(nodes)
paul@58	51
paul@58	52	def unescape(text):
paul@58	53	return text.replace("<", "<").replace(">", ">").replace("&", "&")
paul@58	54
paul@58	55	def linktext(element, feed_type):
paul@58	56	if feed_type == "rss":
paul@58	57	return text(element)
paul@58	58	else:
paul@58	59	return element.getAttribute("href")
paul@58	60
paul@58	61	def need_content(show_content, tagname):
paul@58	62	return show_content in ("content", "description") and tagname in ("content", "description")
paul@58	63
paul@0	64	# More Moin 1.9 compatibility functions.
paul@0	65
paul@0	66	def has_member(request, groupname, username):
paul@0	67	if hasattr(request.dicts, "has_member"):
paul@0	68	return request.dicts.has_member(groupname, username)
paul@0	69	else:
paul@0	70	return username in request.dicts.get(groupname, [])
paul@0	71
paul@3	72	# Fragments employ a "moinshare" attribute.
paul@3	73
paul@3	74	fragment_attribute = "moinshare"
paul@2	75
paul@16	76	def getFragments(s):
paul@25	77
paul@25	78	"Return all fragments in 's' having the MoinShare fragment attribute."
paul@25	79
paul@2	80	fragments = []
paul@16	81	for format, attributes, body in _getFragments(s):
paul@16	82	if attributes.has_key(fragment_attribute):
paul@16	83	fragments.append((format, attributes, body))
paul@2	84	return fragments
paul@2	85
paul@9	86	def getPreferredOutputTypes(request, mimetypes):
paul@9	87
paul@9	88	"""
paul@9	89	Using the 'request', perform content negotiation, obtaining mimetypes common
paul@9	90	to the fragment (given by 'mimetypes') and the client (found in the Accept
paul@9	91	header).
paul@9	92	"""
paul@9	93
paul@9	94	accept = getHeader(request, "Accept", "HTTP")
paul@12	95	if accept:
paul@12	96	prefs = getContentPreferences(accept)
paul@12	97	return prefs.get_preferred_types(mimetypes)
paul@12	98	else:
paul@12	99	return mimetypes
paul@9	100
paul@9	101	def getUpdatedTime(metadata):
paul@9	102
paul@9	103	"""
paul@9	104	Return the last updated time based on the given 'metadata', using the
paul@9	105	current time if no explicit last modified time is specified.
paul@9	106	"""
paul@9	107
paul@9	108	# NOTE: We could attempt to get the last edit time of a fragment.
paul@9	109
paul@9	110	latest_timestamp = metadata.get("last-modified")
paul@9	111	if latest_timestamp:
paul@33	112	return latest_timestamp
paul@9	113	else:
paul@33	114	return getCurrentTime()
paul@9	115
paul@30	116	# Entry/update classes.
paul@30	117
paul@30	118	class Update:
paul@30	119
paul@30	120	"A feed update entry."
paul@30	121
paul@30	122	def __init__(self):
paul@30	123	self.title = None
paul@30	124	self.link = None
paul@30	125	self.content = None
paul@30	126	self.content_type = None
paul@30	127	self.updated = None
paul@54	128	self.author = None
paul@30	129
paul@30	130	# Page-related attributes.
paul@30	131
paul@30	132	self.fragment = None
paul@30	133	self.preferred = None
paul@30	134
paul@33	135	# Message-related attributes.
paul@33	136
paul@34	137	self.message_number = None
paul@33	138	self.parts = None
paul@33	139
paul@34	140	# Message- and page-related attributes.
paul@34	141
paul@34	142	self.page = None
paul@34	143
paul@40	144	# Identification.
paul@40	145
paul@40	146	self.path = []
paul@40	147
paul@40	148	def unique_id(self):
paul@40	149	return "moinshare-tab-%s-%s" % (self.message_number, "-".join(map(str, self.path)))
paul@40	150
paul@30	151	def __cmp__(self, other):
paul@30	152	if self.updated is None and other.updated is not None:
paul@30	153	return 1
paul@30	154	elif self.updated is not None and other.updated is None:
paul@30	155	return -1
paul@30	156	else:
paul@30	157	return cmp(self.updated, other.updated)
paul@30	158
paul@40	159	def copy(self, part_number=None):
paul@40	160	update = Update()
paul@40	161	update.title = self.title
paul@40	162	update.link = self.link
paul@40	163	update.updated = self.updated
paul@54	164	update.author = self.author
paul@40	165	update.fragment = self.fragment
paul@40	166	update.preferred = self.preferred
paul@40	167	update.message_number = self.message_number
paul@40	168	update.page = self.page
paul@40	169	update.path = self.path[:]
paul@40	170	if part_number is not None:
paul@40	171	update.path.append(part_number)
paul@40	172	return update
paul@40	173
paul@58	174	# Error classes.
paul@58	175
paul@58	176	class FeedError(Exception):
paul@58	177	pass
paul@58	178
paul@58	179	class FeedMissingError(FeedError):
paul@58	180	pass
paul@58	181
paul@58	182	class FeedContentTypeError(FeedError):
paul@58	183	pass
paul@58	184
paul@60	185	# Update retrieval from URLs.
paul@58	186
paul@58	187	def getUpdates(request, feed_url, max_entries, show_content):
paul@58	188
paul@58	189	"""
paul@58	190	Using the given 'request', retrieve from 'feed_url' up to the given number
paul@58	191	'max_entries' of update entries. The 'show_content' parameter can indicate
paul@58	192	that a "summary" is to be obtained for each update, that the "content" of
paul@58	193	each update is to be obtained (falling back to a summary if no content is
paul@58	194	provided), or no content (indicated by a false value) is to be obtained.
paul@58	195
paul@58	196	A tuple of the form ((feed_type, channel_title, channel_link), updates) is
paul@58	197	returned.
paul@58	198	"""
paul@58	199
paul@60	200	# Prevent local file access.
paul@60	201
paul@60	202	if feed_url.startswith("file:"):
paul@60	203	raise FeedMissingError
paul@58	204
paul@62	205	elif feed_url.startswith("imap"):
paul@62	206	reader = imapreader
paul@62	207
paul@62	208	else:
paul@62	209	reader = None
paul@62	210
paul@58	211	# Obtain the resource, using a cached version if appropriate.
paul@58	212
paul@58	213	max_cache_age = int(getattr(request.cfg, "moin_share_max_cache_age", "300"))
paul@62	214	data = getCachedResource(request, feed_url, "MoinShare", "wiki", max_cache_age, reader)
paul@58	215	if not data:
paul@58	216	raise FeedMissingError
paul@58	217
paul@58	218	# Interpret the cached feed.
paul@58	219
paul@60	220	f = StringIO(data)
paul@60	221	try:
paul@60	222	_url, content_type, _encoding, _metadata = getCachedResourceMetadata(f)
paul@60	223
paul@60	224	if content_type in ("application/atom+xml", "application/rss+xml", "application/xml"):
paul@60	225	return getUpdatesFromFeed(f, max_entries, show_content)
paul@62	226
paul@62	227	elif content_type == "multipart/mixed":
paul@62	228	return getUpdatesFromMailbox(f, max_entries, show_content, request)
paul@62	229
paul@60	230	else:
paul@60	231	raise FeedContentTypeError
paul@60	232
paul@60	233	finally:
paul@60	234	f.close()
paul@60	235
paul@60	236	# Update retrieval from feeds.
paul@58	237
paul@60	238	def getUpdatesFromFeed(feed, max_entries, show_content):
paul@60	239
paul@60	240	"""
paul@60	241	Retrieve from 'feed' up to the given number 'max_entries' of update entries.
paul@60	242	The 'show_content' parameter can indicate that a "summary" is to be obtained
paul@60	243	for each update, that the "content" of each update is to be obtained
paul@60	244	(falling back to a summary if no content is provided), or no content
paul@60	245	(indicated by a false value) is to be obtained.
paul@60	246
paul@60	247	A tuple of the form ((feed_type, channel_title, channel_link), updates) is
paul@60	248	returned.
paul@60	249	"""
paul@60	250
paul@60	251	feed_updates = []
paul@58	252
paul@62	253	# Parse each node from the feed.
paul@58	254
paul@62	255	channel_title = channel_link = None
paul@58	256
paul@62	257	feed_type = None
paul@62	258	update = None
paul@62	259	in_source = False
paul@58	260
paul@62	261	events = xml.dom.pulldom.parse(feed)
paul@58	262
paul@62	263	for event, value in events:
paul@58	264
paul@62	265	if not in_source and event == xml.dom.pulldom.START_ELEMENT:
paul@62	266	tagname = value.localName
paul@58	267
paul@62	268	# Detect the feed type and items.
paul@58	269
paul@62	270	if tagname == "feed" and value.namespaceURI == ATOM_NS:
paul@62	271	feed_type = "atom"
paul@58	272
paul@62	273	elif tagname == "rss":
paul@62	274	feed_type = "rss"
paul@58	275
paul@62	276	# Detect items.
paul@58	277
paul@62	278	elif feed_type == "rss" and tagname == "item" or \
paul@62	279	feed_type == "atom" and tagname == "entry":
paul@58	280
paul@62	281	update = Update()
paul@58	282
paul@62	283	# Detect source declarations.
paul@58	284
paul@62	285	elif feed_type == "atom" and tagname == "source":
paul@62	286	in_source = True
paul@58	287
paul@62	288	# Handle item elements.
paul@58	289
paul@62	290	elif tagname == "title":
paul@62	291	events.expandNode(value)
paul@62	292	if update:
paul@62	293	update.title = text(value)
paul@62	294	else:
paul@62	295	channel_title = text(value)
paul@58	296
paul@62	297	elif tagname == "link":
paul@62	298	events.expandNode(value)
paul@62	299	if update:
paul@62	300	update.link = linktext(value, feed_type)
paul@62	301	else:
paul@62	302	channel_link = linktext(value, feed_type)
paul@58	303
paul@62	304	elif show_content and (
paul@62	305	feed_type == "atom" and tagname in ("content", "summary") or
paul@62	306	feed_type == "rss" and tagname == "description"):
paul@58	307
paul@62	308	events.expandNode(value)
paul@58	309
paul@62	310	# Obtain content where requested or, failing that, a
paul@62	311	# summary.
paul@58	312
paul@62	313	if update and (need_content(show_content, tagname) or tagname == "summary" and not update.content):
paul@62	314	if feed_type == "atom":
paul@62	315	update.content_type = value.getAttribute("type") or "text"
paul@62	316
paul@62	317	# Normalise the content types and extract the
paul@62	318	# content.
paul@58	319
paul@62	320	if update.content_type in ("xhtml", "application/xhtml+xml", "application/xml"):
paul@62	321	update.content = children(value)
paul@62	322	update.content_type = "application/xhtml+xml"
paul@62	323	elif update.content_type in ("html", "text/html"):
paul@62	324	update.content = text(value)
paul@62	325	update.content_type = "text/html"
paul@58	326	else:
paul@58	327	update.content = text(value)
paul@62	328	update.content_type = "text/plain"
paul@62	329	else:
paul@62	330	update.content_type = "text/html"
paul@62	331	update.content = text(value)
paul@58	332
paul@62	333	elif feed_type == "atom" and tagname == "updated" or \
paul@62	334	feed_type == "rss" and tagname == "pubDate":
paul@62	335
paul@62	336	events.expandNode(value)
paul@58	337
paul@62	338	if update:
paul@62	339	if feed_type == "atom":
paul@62	340	value = getDateTimeFromISO8601(text(value))
paul@62	341	else:
paul@62	342	value = DateTime(parsedate(text(value)))
paul@62	343	update.updated = value
paul@58	344
paul@62	345	elif event == xml.dom.pulldom.END_ELEMENT:
paul@62	346	tagname = value.localName
paul@58	347
paul@62	348	if feed_type == "rss" and tagname == "item" or \
paul@62	349	feed_type == "atom" and tagname == "entry":
paul@62	350
paul@62	351	feed_updates.append(update)
paul@58	352
paul@62	353	update = None
paul@58	354
paul@62	355	elif feed_type == "atom" and tagname == "source":
paul@62	356	in_source = False
paul@58	357
paul@58	358	return (feed_type, channel_title, channel_link), feed_updates
paul@58	359
paul@62	360	# Update retrieval from mailboxes and multipart messages.
paul@62	361
paul@62	362	def getUpdatesFromMailbox(feed, max_entries, show_content, request):
paul@62	363
paul@62	364	"""
paul@62	365	Retrieve from 'feed' up to the given number 'max_entries' of update entries.
paul@62	366	The 'show_content' parameter can indicate that a "summary" is to be obtained
paul@62	367	for each update, that the "content" of each update is to be obtained
paul@62	368	(falling back to a summary if no content is provided), or no content
paul@62	369	(indicated by a false value) is to be obtained.
paul@62	370
paul@62	371	A tuple of the form ((feed_type, channel_title, channel_link), updates) is
paul@62	372	returned.
paul@62	373	"""
paul@62	374
paul@62	375	mailbox = Parser().parse(feed)
paul@62	376
paul@62	377	feed_updates = []
paul@62	378
paul@62	379	# Parse each message from the feed as a separate update.
paul@62	380
paul@62	381	for message_number, part in enumerate(mailbox.get_payload()):
paul@62	382	update = Update()
paul@62	383	update.fragment = update.updated = getDateTimeFromRFC2822(part.get("date"))
paul@62	384	update.title = part.get("subject", "Update #%d" % message_number)
paul@62	385	update.message_number = message_number
paul@62	386
paul@62	387	update.content, update.content_type, update.parts, actual_author = \
paul@62	388	getUpdateContentFromPart(part, request)
paul@62	389
paul@62	390	if actual_author:
paul@62	391	update.author = actual_author
paul@62	392
paul@62	393	feed_updates.append(update)
paul@62	394
paul@62	395	return ("mbox", None, None), feed_updates
paul@62	396
paul@30	397	# Update retrieval from pages.
paul@30	398
paul@30	399	def getUpdatesFromPage(page, request):
paul@25	400
paul@25	401	"""
paul@30	402	Get updates from the given 'page' using the 'request'. A list of update
paul@30	403	objects is returned.
paul@25	404	"""
paul@25	405
paul@25	406	updates = []
paul@25	407
paul@25	408	# NOTE: Use the updated datetime from the page for updates.
paul@25	409	# NOTE: The published and updated details would need to be deduced from
paul@25	410	# NOTE: the page history instead of being taken from the page as a whole.
paul@25	411
paul@25	412	metadata = getMetadata(page)
paul@25	413	updated = getUpdatedTime(metadata)
paul@25	414
paul@25	415	# Get the fragment regions for the page.
paul@25	416
paul@25	417	for n, (format, attributes, body) in enumerate(getFragments(page.get_raw_body())):
paul@25	418
paul@33	419	update = Update()
paul@33	420
paul@25	421	# Produce a fragment identifier.
paul@25	422	# NOTE: Choose a more robust identifier where none is explicitly given.
paul@25	423
paul@30	424	update.fragment = attributes.get("fragment", str(n))
paul@30	425	update.title = attributes.get("summary", "Update #%d" % n)
paul@25	426
paul@25	427	# Get the preferred content types available for the fragment.
paul@25	428
paul@30	429	update.preferred = getPreferredOutputTypes(request, getOutputTypes(request, format))
paul@25	430
paul@25	431	# Try and obtain some suitable content for the entry.
paul@25	432	# NOTE: Could potentially get a summary for the fragment.
paul@25	433
paul@30	434	update.content = None
paul@25	435
paul@30	436	if "text/html" in update.preferred:
paul@25	437	parser_cls = getParserClass(request, format)
paul@25	438
paul@25	439	if format == "html":
paul@30	440	update.content = body
paul@39	441	elif hasattr(parser_cls, "formatForOutputType"):
paul@39	442	update.content = formatTextForOutputType(body, request, parser_cls, "text/html")
paul@25	443	else:
paul@25	444	fmt = request.html_formatter
paul@25	445	fmt.setPage(page)
paul@30	446	update.content = formatText(body, request, fmt, parser_cls)
paul@30	447
paul@32	448	update.content_type = "text/html"
paul@25	449
paul@34	450	update.page = page
paul@37	451
paul@37	452	# NOTE: The anchor would be supported in the page, but this requires
paul@37	453	# NOTE: formatter modifications for the regions providing updates.
paul@37	454
paul@37	455	update.link = page.url(request, anchor=update.fragment)
paul@30	456	update.updated = updated
paul@30	457
paul@30	458	updates.append(update)
paul@25	459
paul@25	460	return updates
paul@25	461
paul@33	462	# Update retrieval from message stores.
paul@33	463
paul@33	464	def getUpdatesFromStore(page, request):
paul@33	465
paul@33	466	"""
paul@33	467	Get updates from the message store associated with the given 'page' using
paul@33	468	the 'request'. A list of update objects is returned.
paul@33	469	"""
paul@33	470
paul@33	471	updates = []
paul@33	472
paul@33	473	metadata = getMetadata(page)
paul@33	474	updated = getUpdatedTime(metadata)
paul@33	475
paul@33	476	store = ItemStore(page, "messages", "message-locks")
paul@33	477
paul@57	478	keys = store.keys()
paul@57	479	keys.sort()
paul@57	480
paul@57	481	for key in keys:
paul@57	482	message_text = store[key]
paul@57	483	update = getUpdateFromMessageText(message_text, key, request)
paul@34	484	update.page = page
paul@33	485	updates.append(update)
paul@33	486
paul@33	487	return updates
paul@33	488
paul@47	489	def getUpdateFromMessageText(message_text, message_number, request):
paul@46	490
paul@46	491	"Return an update for the given 'message_text' and 'message_number'."
paul@46	492
paul@46	493	update = Update()
paul@56	494	message = Parser().parsestr(message_text)
paul@46	495
paul@46	496	# Produce a fragment identifier.
paul@46	497
paul@46	498	update.fragment = update.updated = getDateTimeFromRFC2822(message.get("date"))
paul@46	499	update.title = message.get("subject", "Update #%d" % message_number)
paul@54	500	update.author = message.get("moin-user")
paul@46	501
paul@46	502	update.message_number = message_number
paul@46	503
paul@54	504	update.content, update.content_type, update.parts, actual_author = \
paul@54	505	getUpdateContentFromPart(message, request)
paul@54	506
paul@54	507	if actual_author:
paul@54	508	update.author = actual_author
paul@54	509
paul@46	510	return update
paul@46	511
paul@47	512	def getUpdateContentFromPart(part, request):
paul@40	513
paul@40	514	"""
paul@54	515	Return decoded content, the content type, any subparts, and any author
paul@54	516	identity in a tuple for a given 'part'.
paul@40	517	"""
paul@40	518
paul@40	519	# Determine whether the part has several representations.
paul@40	520
paul@40	521	# For a single part, use it as the update content.
paul@40	522
paul@40	523	if not part.is_multipart():
paul@40	524	content, content_type = getPartContent(part)
paul@54	525	return content, content_type, None, None
paul@40	526
paul@40	527	# For a collection of related parts, use the first as the update content
paul@40	528	# and assume that the formatter will reference the other parts.
paul@40	529
paul@40	530	elif part.get_content_subtype() == "related":
paul@40	531	main_part = part.get_payload()[0]
paul@40	532	content, content_type = getPartContent(main_part)
paul@54	533	return content, content_type, [main_part], None
paul@40	534
paul@46	535	# Encrypted content cannot be meaningfully separated.
paul@46	536
paul@46	537	elif part.get_content_subtype() == "encrypted":
paul@50	538	try:
paul@54	539	part, author = getDecryptedParts(part, request)
paul@54	540	content, content_type, parts, _author = getUpdateContentFromPart(part, request)
paul@54	541	return content, content_type, parts, author
paul@50	542	except MoinMessageError:
paul@54	543	return None, part.get_content_type(), part.get_payload(), None
paul@46	544
paul@40	545	# Otherwise, just obtain the parts for separate display.
paul@40	546
paul@40	547	else:
paul@54	548	return None, part.get_content_type(), part.get_payload(), None
paul@40	549
paul@47	550	def getDecryptedParts(part, request):
paul@47	551
paul@47	552	"Decrypt the given 'part', returning the decoded content."
paul@47	553
paul@47	554	homedir = get_homedir(request)
paul@47	555	gpg = GPG(homedir)
paul@47	556
paul@47	557	# Decrypt the part.
paul@47	558
paul@47	559	if is_encrypted(part):
paul@47	560	text = gpg.decryptMessage(part)
paul@56	561	part = Parser().parsestr(text)
paul@47	562
paul@47	563	# Extract any signature details.
paul@47	564
paul@47	565	if is_signed(part):
paul@47	566	result = gpg.verifyMessage(part)
paul@47	567	if result:
paul@47	568	fingerprint, identity, content = result
paul@54	569	return content, get_username_for_fingerprint(request, fingerprint)
paul@47	570
paul@54	571	return part, None
paul@47	572
paul@40	573	def getPartContent(part):
paul@40	574
paul@40	575	"Decode the 'part', returning the decoded payload and the content type."
paul@40	576
paul@40	577	charset = part.get_content_charset()
paul@40	578	payload = part.get_payload(decode=True)
paul@40	579	return (charset and unicode(payload, charset) or payload), part.get_content_type()
paul@40	580
paul@47	581	def getUpdateFromPart(parent, part, part_number, request):
paul@40	582
paul@40	583	"Using the 'parent' update, return an update object for the given 'part'."
paul@40	584
paul@40	585	update = parent.copy(part_number)
paul@55	586	update.content, update.content_type, update.parts, update.author = getUpdateContentFromPart(part, request)
paul@40	587	return update
paul@40	588
paul@47	589	def getUpdatesForFormatting(update, request):
paul@46	590
paul@46	591	"Get a list of updates for formatting given 'update'."
paul@46	592
paul@46	593	updates = []
paul@46	594
paul@46	595	# Handle multipart/alternative and other non-related multiparts.
paul@46	596
paul@46	597	if update.parts:
paul@46	598	for n, part in enumerate(update.parts):
paul@47	599	update_part = getUpdateFromPart(update, part, n, request)
paul@47	600	updates += getUpdatesForFormatting(update_part, request)
paul@46	601	else:
paul@46	602	updates.append(update)
paul@46	603
paul@46	604	return updates
paul@46	605
paul@46	606	# Update formatting.
paul@46	607
paul@46	608	def getFormattedUpdate(update, request, fmt):
paul@46	609
paul@46	610	"""
paul@46	611	Return the formatted form of the given 'update' using the given 'request'
paul@46	612	and 'fmt'.
paul@46	613	"""
paul@46	614
paul@46	615	# NOTE: Some control over the HTML and XHTML should be exercised.
paul@46	616
paul@46	617	if update.content:
paul@46	618	if update.content_type == "text/html" and update.message_number is not None:
paul@46	619	parsers = [get_make_parser(update.page, update.message_number)]
paul@46	620	else:
paul@46	621	parsers = getParsersForContentType(request.cfg, update.content_type)
paul@46	622
paul@46	623	if parsers:
paul@46	624	for parser_cls in parsers:
paul@46	625	if hasattr(parser_cls, "formatForOutputType"):
paul@46	626	return formatTextForOutputType(update.content, request, parser_cls, "text/html")
paul@46	627	else:
paul@46	628	return formatText(update.content, request, fmt, parser_cls=parser_cls)
paul@46	629	break
paul@46	630	else:
paul@46	631	return None
paul@46	632	else:
paul@46	633	return None
paul@46	634
paul@46	635	def formatUpdate(update, request, fmt):
paul@46	636
paul@46	637	"Format the given 'update' using the given 'request' and 'fmt'."
paul@46	638
paul@46	639	result = []
paul@46	640	append = result.append
paul@46	641
paul@47	642	updates = getUpdatesForFormatting(update, request)
paul@46	643	single = len(updates) == 1
paul@46	644
paul@46	645	# Format some navigation tabs.
paul@46	646
paul@46	647	if not single:
paul@46	648	append(fmt.div(on=1, css_class="moinshare-alternatives"))
paul@46	649
paul@46	650	first = True
paul@46	651
paul@46	652	for update_part in updates:
paul@46	653	append(fmt.url(1, "#%s" % update_part.unique_id()))
paul@46	654	append(fmt.text(update_part.content_type))
paul@46	655	append(fmt.url(0))
paul@46	656
paul@46	657	first = False
paul@46	658
paul@46	659	append(fmt.div(on=0))
paul@46	660
paul@46	661	# Format the content.
paul@46	662
paul@46	663	first = True
paul@46	664
paul@46	665	for update_part in updates:
paul@46	666
paul@46	667	# Encapsulate each alternative if many exist.
paul@46	668
paul@46	669	if not single:
paul@46	670	css_class = first and "moinshare-default" or "moinshare-other"
paul@46	671	append(fmt.div(on=1, css_class="moinshare-alternative %s" % css_class, id=update_part.unique_id()))
paul@46	672
paul@46	673	# Include the content.
paul@46	674
paul@46	675	append(formatUpdatePart(update_part, request, fmt))
paul@46	676
paul@46	677	if not single:
paul@46	678	append(fmt.div(on=0))
paul@46	679
paul@46	680	first = False
paul@46	681
paul@46	682	return "".join(result)
paul@46	683
paul@46	684	def formatUpdatePart(update, request, fmt):
paul@46	685
paul@46	686	"Format the given 'update' using the given 'request' and 'fmt'."
paul@46	687
paul@46	688	_ = request.getText
paul@46	689
paul@46	690	result = []
paul@46	691	append = result.append
paul@46	692
paul@46	693	# Encapsulate the content.
paul@46	694
paul@46	695	append(fmt.div(on=1, css_class="moinshare-content"))
paul@46	696	text = getFormattedUpdate(update, request, fmt)
paul@46	697	if text:
paul@46	698	append(text)
paul@46	699	else:
paul@46	700	append(fmt.text(_("Update cannot be shown for content of type %s.") % update.content_type))
paul@46	701	append(fmt.div(on=0))
paul@46	702
paul@46	703	return "".join(result)
paul@46	704
paul@31	705	# Source management.
paul@31	706
paul@31	707	def getUpdateSources(pagename, request):
paul@31	708
paul@31	709	"Return the update sources from the given 'pagename' using the 'request'."
paul@31	710
paul@31	711	sources = {}
paul@31	712
paul@31	713	source_definitions = getWikiDict(pagename, request)
paul@31	714
paul@31	715	if source_definitions:
paul@31	716	for name, value in source_definitions.items():
paul@31	717	sources[name] = getSourceParameters(value)
paul@31	718
paul@31	719	return sources
paul@31	720
paul@31	721	def getSourceParameters(source_definition):
paul@31	722
paul@31	723	"Return the parameters from the given 'source_definition' string."
paul@31	724
paul@43	725	return parseDictEntry(source_definition, ("type", "location"))
paul@31	726
paul@34	727	# HTML parsing support.
paul@34	728
paul@34	729	class IncomingHTMLSanitizer(HTMLSanitizer):
paul@34	730
paul@34	731	"An HTML parser that rewrites references to attachments."
paul@34	732
paul@34	733	def __init__(self, out, request, page, message_number):
paul@34	734	HTMLSanitizer.__init__(self, out)
paul@34	735	self.request = request
paul@34	736	self.message_number = message_number
paul@34	737	self.page = page
paul@34	738
paul@34	739	def rewrite_reference(self, ref):
paul@34	740	if ref.startswith("cid:"):
paul@34	741	part = ref[len("cid:"):]
paul@34	742	action_link = self.page.url(self.request, {
paul@34	743	"action" : "ReadMessage", "doit" : "1",
paul@34	744	"message" : self.message_number, "part" : part
paul@34	745	})
paul@34	746	return action_link
paul@34	747	else:
paul@34	748	return ref
paul@34	749
paul@34	750	def handle_starttag(self, tag, attrs):
paul@34	751	new_attrs = []
paul@34	752	for attrname, attrvalue in attrs:
paul@34	753	if attrname in self.uri_attrs:
paul@34	754	new_attrs.append((attrname, self.rewrite_reference(attrvalue)))
paul@34	755	else:
paul@34	756	new_attrs.append((attrname, attrvalue))
paul@34	757	HTMLSanitizer.handle_starttag(self, tag, new_attrs)
paul@34	758
paul@34	759	class IncomingMarkup(Markup):
paul@34	760
paul@34	761	"A special markup processor for incoming HTML."
paul@34	762
paul@34	763	def sanitize(self, request, page, message_number):
paul@37	764	out = getwriter("utf-8")(StringIO())
paul@34	765	sanitizer = IncomingHTMLSanitizer(out, request, page, message_number)
paul@34	766	sanitizer.feed(self.stripentities(keepxmlentities=True))
paul@37	767	return IncomingMarkup(unicode(out.getvalue(), "utf-8"))
paul@34	768
paul@34	769	class IncomingHTMLParser:
paul@34	770
paul@34	771	"Filters and rewrites incoming HTML content."
paul@34	772
paul@34	773	def __init__(self, raw, request, **kw):
paul@34	774	self.raw = raw
paul@34	775	self.request = request
paul@34	776	self.message_number = None
paul@34	777	self.page = None
paul@34	778
paul@34	779	def format(self, formatter, **kw):
paul@34	780
paul@34	781	"Send the text."
paul@34	782
paul@34	783	try:
paul@34	784	self.request.write(formatter.rawHTML(IncomingMarkup(self.raw).sanitize(self.request, self.page, self.message_number)))
paul@34	785	except HTMLParseError, e:
paul@34	786	self.request.write(formatter.sysmsg(1) +
paul@34	787	formatter.text(u'HTML parsing error: %s in "%s"' % (e.msg,
paul@34	788	self.raw.splitlines()[e.lineno - 1].strip())) +
paul@34	789	formatter.sysmsg(0))
paul@34	790
paul@34	791	class MakeIncomingHTMLParser:
paul@34	792
paul@34	793	"A class that makes parsers configured for messages."
paul@34	794
paul@34	795	def __init__(self, page, message_number):
paul@34	796
paul@34	797	"Initialise with state that is used to configure instantiated parsers."
paul@34	798
paul@34	799	self.message_number = message_number
paul@34	800	self.page = page
paul@34	801
paul@34	802	def __call__(self, args, *kw):
paul@34	803	parser = IncomingHTMLParser(args, *kw)
paul@34	804	parser.message_number = self.message_number
paul@34	805	parser.page = self.page
paul@34	806	return parser
paul@34	807
paul@34	808	def get_make_parser(page, message_number):
paul@34	809
paul@34	810	"""
paul@34	811	Return a callable that will return a parser configured for the message from
paul@34	812	the given 'page' with the given 'message_number'.
paul@34	813	"""
paul@34	814
paul@34	815	return MakeIncomingHTMLParser(page, message_number)
paul@34	816
paul@0	817	# vim: tabstop=4 expandtab shiftwidth=4