1.1 --- a/moinformat/links/html.py Fri Aug 03 22:34:55 2018 +0200
1.2 +++ b/moinformat/links/html.py Sat Aug 04 16:57:49 2018 +0200
1.3 @@ -20,15 +20,9 @@
1.4 """
1.5
1.6 from moinformat.links.common import Linker
1.7 -from urllib import quote as _quote
1.8 +from urllib import quote, quote_plus
1.9 from urlparse import urlparse
1.10
1.11 -def quote(s):
1.12 -
1.13 - "Quote URL path 's', preserving path separators and fragment indicators."
1.14 -
1.15 - return "#".join(map(_quote, s.split("#", 1)))
1.16 -
1.17 class HTMLLinker(Linker):
1.18
1.19 "Translate Moin links into HTML links."
1.20 @@ -89,7 +83,7 @@
1.21 # Top-level pages.
1.22
1.23 top_level = self.get_top_level()
1.24 - return quote("%s%s" % (top_level and "%s/" % top_level or "", target)), None
1.25 + return self.quote("%s%s" % (top_level and "%s/" % top_level or "", target)), None
1.26
1.27 def translate_qualified_link(self, target):
1.28
1.29 @@ -125,26 +119,101 @@
1.30
1.31 "Return a translation of the given attachment 'target'."
1.32
1.33 - return quote("%sattachments/%s/%s" % (
1.34 + return self.quote("%sattachments/%s/%s" % (
1.35 self.get_top_level(), self.pagename, target))
1.36
1.37 def translate_interwiki(self, url, target):
1.38
1.39 "Return a translation of the given interwiki 'target'."
1.40
1.41 - return "%s%s" % (self.normalise(url), quote(target))
1.42 + return "%s%s" % (self.normalise(url), self.quote(target))
1.43
1.44 def translate_relative(self, target):
1.45
1.46 "Return a translation of the given relative 'target'."
1.47
1.48 - return quote(target[len("../"):])
1.49 + return self.quote(target[len("../"):])
1.50
1.51 def translate_subpage(self, target):
1.52
1.53 "Return a translation of the given subpage 'target'."
1.54
1.55 - return quote(".%s" % target)
1.56 + return self.quote(".%s" % target)
1.57 +
1.58 + # Path encoding.
1.59 +
1.60 + def quote(self, s):
1.61 +
1.62 + """
1.63 + Quote URL path 's', preserving path separators and fragment indicators,
1.64 + encoding fragment identifiers.
1.65 + """
1.66 +
1.67 + parts = s.split("#", 1)
1.68 +
1.69 + if len(parts) > 1:
1.70 + parts[1] = self.make_id(parts[1])
1.71 +
1.72 + return "#".join(map(quote, parts))
1.73 +
1.74 + # Identifier encoding.
1.75 +
1.76 + def make_id(self, s):
1.77 +
1.78 + "Make a suitable identifier for HTML element identification."
1.79 +
1.80 + # NOTE: This reproduces the Moin algorithm for compatibility.
1.81 + # NOTE: There may well be improvements possible, possibly by replacing plus
1.82 + # NOTE: with something less cumbersome, even though plus may be unusual in
1.83 + # NOTE: things like headings, anyway.
1.84 +
1.85 + # The desired output is the following pattern:
1.86 +
1.87 + # [A-Za-z][-_:.A-Za-z0-9]*
1.88 +
1.89 + # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an
1.90 + # output range as follows (in addition to A-Za-z0-9):
1.91 +
1.92 + # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|}
1.93 +
1.94 + # The quote_plus function converts space to plus, preserves -_:. and encodes
1.95 + # all other symbols (including original occurrences of plus and percent) and
1.96 + # non-alphanumeric (ASCII) characters using percent encoding.
1.97 +
1.98 + # With colons preserved, the resulting output is in the following range
1.99 + # (in addition to A-Za-z0-9):
1.100 +
1.101 + # -_:.%+
1.102 +
1.103 + # Percent will only occur as an encoding prefix. Plus will only occur as a
1.104 + # replacement for space.
1.105 +
1.106 + # Combining quote_plus and UTF-7 gives the following range (in addition to
1.107 + # A-Za-z0-9):
1.108 +
1.109 + # -_:.%+
1.110 +
1.111 + # Examples:
1.112 +
1.113 + # UTF-7 quote_plus replace percent and plus
1.114 + # : -> : -> : -> :
1.115 + # - -> - -> - -> -
1.116 + # . -> . -> . -> .
1.117 + # % -> % -> %25 -> .25
1.118 + # + -> +- -> %2B- -> .2B-
1.119 + # _ -> _ -> _ -> _
1.120 + # space -> space -> + -> _
1.121 +
1.122 + # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode
1.123 +
1.124 + quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_")
1.125 +
1.126 + # Ensure that the identifier starts with an alphabetical character.
1.127 +
1.128 + if not quoted[0].isalpha():
1.129 + return "A%s" % quoted
1.130 + else:
1.131 + return quoted
1.132
1.133 linker = HTMLLinker
1.134