1 #!/usr/bin/env python 2 3 """ 4 HTML linking scheme. 5 6 Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from moinformat.links.common import Linker 23 from urllib import quote, quote_plus 24 from urlparse import urlparse 25 26 class HTMLLinker(Linker): 27 28 "Translate Moin links into HTML links." 29 30 name = "html" 31 32 def get_top_level(self): 33 34 "Return a relative link to the top level." 35 36 levels = self.pagename.count("/") 37 return "/".join([".."] * levels) 38 39 def is_url(self, target): 40 41 "Return whether the 'target' references a URL." 42 43 scheme, host, path, params, query, fragment = urlparse(target) 44 return scheme and target or None 45 46 def normalise(self, path): 47 48 "Return a normalised form of 'path'." 49 50 return not path.endswith("/") and "%s/" % path or path 51 52 def translate(self, target): 53 54 """ 55 Translate the 'target', returning a tuple containing the rewritten 56 target string and a suitable default label. 57 """ 58 59 target = target.rstrip("/") 60 61 # Fragments. 62 63 if target.startswith("#"): 64 return self.quote(target), None 65 66 # Sub-pages. 67 68 elif target.startswith("/"): 69 return self.translate_subpage(target), None 70 71 # Sibling (of ancestor) pages. 72 73 if target.startswith("../"): 74 return self.translate_relative(target), None 75 76 # Attachment or interwiki link. 77 78 rewritten = self.translate_qualified_link(target) 79 if rewritten: 80 return rewritten # includes label 81 82 # Plain URL. 83 84 rewritten = self.is_url(target) 85 if rewritten: 86 return rewritten, None 87 88 # Top-level pages. 89 90 top_level = self.get_top_level() 91 return self.quote("%s%s" % (top_level and "%s/" % top_level or "", target)), None 92 93 def translate_qualified_link(self, target): 94 95 """ 96 Translate a possible qualified link 'target', returning a tuple 97 containing a rewritten target and a suitable default label. 98 99 Return None if the link is not suitable. 100 """ 101 102 t = target.split(":", 1) 103 if len(t) != 2: 104 return None 105 106 prefix, target = t 107 108 # Attachment links. 109 110 if prefix == "attachment": 111 return self.translate_attachment(target), target 112 113 # Interwiki links. 114 115 url = self.mapping.get(prefix) 116 if url: 117 return self.translate_interwiki(url, target), target 118 119 return None 120 121 # Specific link translators. 122 123 def translate_attachment(self, target): 124 125 "Return a translation of the given attachment 'target'." 126 127 return self.quote("%sattachments/%s/%s" % ( 128 self.get_top_level(), self.pagename, target)) 129 130 def translate_interwiki(self, url, target): 131 132 "Return a translation of the given interwiki 'target'." 133 134 return "%s%s" % (self.normalise(url), self.quote(target)) 135 136 def translate_relative(self, target): 137 138 "Return a translation of the given relative 'target'." 139 140 return self.quote(target[len("../"):]) 141 142 def translate_subpage(self, target): 143 144 "Return a translation of the given subpage 'target'." 145 146 return self.quote(".%s" % target) 147 148 # Path encoding. 149 150 def quote(self, s): 151 152 """ 153 Quote URL path 's', preserving path separators and fragment indicators, 154 encoding fragment identifiers. 155 """ 156 157 parts = s.split("#", 1) 158 159 if len(parts) > 1: 160 parts[1] = self.make_id(parts[1]) 161 162 return "#".join(map(quote, parts)) 163 164 # Identifier encoding. 165 166 def make_id(self, s): 167 168 "Make a suitable identifier for HTML element identification." 169 170 # NOTE: This reproduces the Moin algorithm for compatibility. 171 # NOTE: There may well be improvements possible, possibly by replacing plus 172 # NOTE: with something less cumbersome, even though plus may be unusual in 173 # NOTE: things like headings, anyway. 174 175 # The desired output is the following pattern: 176 177 # [A-Za-z][-_:.A-Za-z0-9]* 178 179 # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an 180 # output range as follows (in addition to A-Za-z0-9): 181 182 # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} 183 184 # The quote_plus function converts space to plus, preserves -_:. and encodes 185 # all other symbols (including original occurrences of plus and percent) and 186 # non-alphanumeric (ASCII) characters using percent encoding. 187 188 # With colons preserved, the resulting output is in the following range 189 # (in addition to A-Za-z0-9): 190 191 # -_:.%+ 192 193 # Percent will only occur as an encoding prefix. Plus will only occur as a 194 # replacement for space. 195 196 # Combining quote_plus and UTF-7 gives the following range (in addition to 197 # A-Za-z0-9): 198 199 # -_:.%+ 200 201 # Examples: 202 203 # UTF-7 quote_plus replace percent and plus 204 # : -> : -> : -> : 205 # - -> - -> - -> - 206 # . -> . -> . -> . 207 # % -> % -> %25 -> .25 208 # + -> +- -> %2B- -> .2B- 209 # _ -> _ -> _ -> _ 210 # space -> space -> + -> _ 211 212 # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode 213 214 quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") 215 216 # Ensure that the identifier starts with an alphabetical character. 217 218 if not quoted[0].isalpha(): 219 return "A%s" % quoted 220 else: 221 return quoted 222 223 linker = HTMLLinker 224 225 # vim: tabstop=4 expandtab shiftwidth=4