MoinLight (annotate moinformat/links/html.py in d5b2b71a486d)

MoinLight

Annotated moinformat/links/html.py

159:d5b2b71a486d

2018-08-13

Paul Boddie

Improved link translation, handling the root page as a special case, employing resolved link targets instead of reusing target details in HTML links. This should fix certain kinds of links which behave rather differently in Moin, such as those employing the "." notation.

paul@91	1	#!/usr/bin/env python
paul@91	2
paul@91	3	"""
paul@91	4	HTML linking scheme.
paul@91	5
paul@91	6	Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk>
paul@91	7
paul@91	8	This program is free software; you can redistribute it and/or modify it under
paul@91	9	the terms of the GNU General Public License as published by the Free Software
paul@91	10	Foundation; either version 3 of the License, or (at your option) any later
paul@91	11	version.
paul@91	12
paul@91	13	This program is distributed in the hope that it will be useful, but WITHOUT
paul@91	14	ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
paul@91	15	FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
paul@91	16	details.
paul@91	17
paul@91	18	You should have received a copy of the GNU General Public License along with
paul@91	19	this program. If not, see <http://www.gnu.org/licenses/>.
paul@91	20	"""
paul@91	21
paul@159	22	from moinformat.links.common import Linker, resolve
paul@128	23	from urllib import quote, quote_plus
paul@91	24	from urlparse import urlparse
paul@91	25
paul@91	26	class HTMLLinker(Linker):
paul@91	27
paul@91	28	"Translate Moin links into HTML links."
paul@91	29
paul@91	30	name = "html"
paul@91	31
paul@91	32	def get_top_level(self):
paul@91	33
paul@91	34	"Return a relative link to the top level."
paul@91	35
paul@159	36	# The root page is at the top level already.
paul@159	37
paul@159	38	if self.pagename == self.root_pagename:
paul@159	39	return ""
paul@159	40
paul@159	41	# Siblings of the root page are actually one level below.
paul@159	42
paul@159	43	levels = self.pagename.count("/") + 1
paul@91	44	return "/".join([".."] * levels)
paul@91	45
paul@93	46	def is_url(self, target):
paul@91	47
paul@93	48	"Return whether the 'target' references a URL."
paul@91	49
paul@93	50	scheme, host, path, params, query, fragment = urlparse(target)
paul@93	51	return scheme and target or None
paul@91	52
paul@91	53	def normalise(self, path):
paul@91	54
paul@91	55	"Return a normalised form of 'path'."
paul@91	56
paul@91	57	return not path.endswith("/") and "%s/" % path or path
paul@91	58
paul@93	59	def translate(self, target):
paul@91	60
paul@118	61	"""
paul@118	62	Translate the 'target', returning a tuple containing the rewritten
paul@118	63	target string and a suitable default label.
paul@118	64	"""
paul@91	65
paul@93	66	target = target.rstrip("/")
paul@91	67
paul@157	68	# Fragments. Remove the leading hash for the label.
paul@138	69
paul@138	70	if target.startswith("#"):
paul@157	71	return self.quote(target), target.lstrip("#")
paul@138	72
paul@150	73	# Sub-pages. Remove the leading slash for the label.
paul@91	74
paul@159	75	if target.startswith("/"):
paul@159	76	return self.translate_pagename(target), target.lstrip("/")
paul@91	77
paul@91	78	# Sibling (of ancestor) pages.
paul@91	79
paul@93	80	if target.startswith("../"):
paul@159	81	return self.translate_pagename(target), None
paul@91	82
paul@91	83	# Attachment or interwiki link.
paul@91	84
paul@93	85	rewritten = self.translate_qualified_link(target)
paul@93	86	if rewritten:
paul@118	87	return rewritten # includes label
paul@91	88
paul@91	89	# Plain URL.
paul@91	90
paul@93	91	rewritten = self.is_url(target)
paul@93	92	if rewritten:
paul@118	93	return rewritten, None
paul@91	94
paul@91	95	# Top-level pages.
paul@91	96
paul@159	97	return self.translate_pagename(target), None
paul@159	98
paul@159	99	def translate_pagename(self, target):
paul@159	100
paul@159	101	"Translate the pagename in 'target'."
paul@159	102
paul@159	103	# Obtain the target pagename and the fragment.
paul@159	104	# Split the pagename into path components.
paul@159	105
paul@159	106	t = target.split("#", 1)
paul@159	107	p = t[0].rstrip("/").split("/")
paul@159	108
paul@159	109	# Determine the actual pagename referenced.
paul@159	110	# Replace the root pagename if it appears.
paul@159	111
paul@159	112	resolved = resolve(t[0], self.pagename, self.root_pagename)
paul@159	113
paul@159	114	# Rewrite the target using a relative link to the top level and then the
paul@159	115	# resolved pagename.
paul@159	116
paul@93	117	top_level = self.get_top_level()
paul@159	118	t[0] = "%s%s" % (top_level and "%s/" % top_level or "", resolved)
paul@159	119
paul@159	120	return self.quote("#".join(t))
paul@91	121
paul@93	122	def translate_qualified_link(self, target):
paul@91	123
paul@91	124	"""
paul@118	125	Translate a possible qualified link 'target', returning a tuple
paul@118	126	containing a rewritten target and a suitable default label.
paul@118	127
paul@118	128	Return None if the link is not suitable.
paul@91	129	"""
paul@91	130
paul@91	131	t = target.split(":", 1)
paul@91	132	if len(t) != 2:
paul@93	133	return None
paul@91	134
paul@91	135	prefix, target = t
paul@91	136
paul@91	137	# Attachment links.
paul@91	138
paul@91	139	if prefix == "attachment":
paul@118	140	return self.translate_attachment(target), target
paul@91	141
paul@91	142	# Interwiki links.
paul@91	143
paul@91	144	url = self.mapping.get(prefix)
paul@91	145	if url:
paul@118	146	return self.translate_interwiki(url, target), target
paul@91	147
paul@93	148	return None
paul@91	149
paul@91	150	# Specific link translators.
paul@91	151
paul@93	152	def translate_attachment(self, target):
paul@91	153
paul@93	154	"Return a translation of the given attachment 'target'."
paul@91	155
paul@144	156	return self.quote("./attachments/%s" % target)
paul@91	157
paul@93	158	def translate_interwiki(self, url, target):
paul@91	159
paul@93	160	"Return a translation of the given interwiki 'target'."
paul@91	161
paul@128	162	return "%s%s" % (self.normalise(url), self.quote(target))
paul@91	163
paul@128	164	# Path encoding.
paul@128	165
paul@128	166	def quote(self, s):
paul@128	167
paul@128	168	"""
paul@128	169	Quote URL path 's', preserving path separators and fragment indicators,
paul@128	170	encoding fragment identifiers.
paul@128	171	"""
paul@128	172
paul@128	173	parts = s.split("#", 1)
paul@128	174
paul@128	175	if len(parts) > 1:
paul@128	176	parts[1] = self.make_id(parts[1])
paul@128	177
paul@128	178	return "#".join(map(quote, parts))
paul@128	179
paul@128	180	# Identifier encoding.
paul@128	181
paul@128	182	def make_id(self, s):
paul@128	183
paul@128	184	"Make a suitable identifier for HTML element identification."
paul@128	185
paul@128	186	# NOTE: This reproduces the Moin algorithm for compatibility.
paul@128	187	# NOTE: There may well be improvements possible, possibly by replacing plus
paul@128	188	# NOTE: with something less cumbersome, even though plus may be unusual in
paul@128	189	# NOTE: things like headings, anyway.
paul@128	190
paul@128	191	# The desired output is the following pattern:
paul@128	192
paul@128	193	# [A-Za-z][-_:.A-Za-z0-9]*
paul@128	194
paul@128	195	# The Python UTF-7 encoder preserves symbols and it encodes + as +- with an
paul@128	196	# output range as follows (in addition to A-Za-z0-9):
paul@128	197
paul@128	198	# -_:.%+ !"#$&\'()*,/;<=>?@[]^`{\|}
paul@128	199
paul@128	200	# The quote_plus function converts space to plus, preserves -_:. and encodes
paul@128	201	# all other symbols (including original occurrences of plus and percent) and
paul@128	202	# non-alphanumeric (ASCII) characters using percent encoding.
paul@128	203
paul@128	204	# With colons preserved, the resulting output is in the following range
paul@128	205	# (in addition to A-Za-z0-9):
paul@128	206
paul@128	207	# -_:.%+
paul@128	208
paul@128	209	# Percent will only occur as an encoding prefix. Plus will only occur as a
paul@128	210	# replacement for space.
paul@128	211
paul@128	212	# Combining quote_plus and UTF-7 gives the following range (in addition to
paul@128	213	# A-Za-z0-9):
paul@128	214
paul@128	215	# -_:.%+
paul@128	216
paul@128	217	# Examples:
paul@128	218
paul@128	219	# UTF-7 quote_plus replace percent and plus
paul@128	220	# : -> : -> : -> :
paul@128	221	# - -> - -> - -> -
paul@128	222	# . -> . -> . -> .
paul@128	223	# % -> % -> %25 -> .25
paul@128	224	# + -> +- -> %2B- -> .2B-
paul@128	225	# _ -> _ -> _ -> _
paul@128	226	# space -> space -> + -> _
paul@128	227
paul@128	228	# See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode
paul@128	229
paul@128	230	quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_")
paul@128	231
paul@128	232	# Ensure that the identifier starts with an alphabetical character.
paul@128	233
paul@128	234	if not quoted[0].isalpha():
paul@128	235	return "A%s" % quoted
paul@128	236	else:
paul@128	237	return quoted
paul@91	238
paul@91	239	linker = HTMLLinker
paul@91	240
paul@91	241	# vim: tabstop=4 expandtab shiftwidth=4