1 #!/usr/bin/env python 2 3 """ 4 HTML linking scheme. 5 6 Copyright (C) 2018, 2019, 2022, 2023 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from moinformat.links.common import Link, Linker, resolve 23 from urllib import quote, quote_plus 24 25 class HTMLLinker(Linker): 26 27 "Translate Moin links into HTML links." 28 29 name = "html" 30 31 def get_top_level(self): 32 33 "Return a relative link to the top level." 34 35 # The root page is at the top level already. 36 37 pagename = self.metadata.get("pagename", "") 38 39 if pagename == self.root_pagename: 40 return "" 41 42 # Siblings of the root page are actually one level below. 43 44 levels = pagename.count("/") + 1 45 return "/".join([".."] * levels) 46 47 def normalise(self, path): 48 49 "Return a normalised form of 'path'." 50 51 return not path.endswith("/") and "%s/" % path or path 52 53 def translate(self, target): 54 55 """ 56 Translate the 'target', returning a link object containing the rewritten 57 target and a suitable default label. 58 """ 59 60 identifier = target.get_identifier() 61 text = target.get_text() 62 type = target.get_type() 63 64 # Fragments. 65 66 if type == "fragment": 67 return Link(self.quote(text), identifier, target) 68 69 # Sub-pages. Remove the leading slash for the label. 70 71 if type == "sub-page": 72 return Link(self.translate_pagename(text), identifier, target) 73 74 # Sibling (of ancestor) pages. 75 76 if type == "sibling-page": 77 return Link(self.translate_pagename(text), identifier, target) 78 79 # Plain URL. 80 81 if type == "url": 82 return Link(text, identifier, target) 83 84 # Top-level pages. 85 86 if type == "page": 87 return Link(self.translate_pagename(text), identifier, target) 88 89 # Attachment or interwiki link. 90 91 return self.translate_qualified_link(target) 92 93 def translate_pagename(self, text): 94 95 "Translate the pagename in 'text'." 96 97 # Obtain the target pagename and the fragment. 98 # Split the pagename into path components. 99 100 t = text.split("#", 1) 101 102 # Determine the actual pagename referenced. 103 # Replace the root pagename if it appears. 104 105 pagename = self.metadata.get("pagename", "") 106 resolved = resolve(t[0], pagename, self.root_pagename) 107 108 # Rewrite the target using a relative link to the top level and then the 109 # resolved pagename. 110 111 top_level = self.get_top_level() 112 113 # Support an explicit "DocumentIndex" filename for file browsing. 114 115 document_index = self.metadata.get("document_index") 116 117 t[0] = "%s%s%s%s" % (top_level and "%s/" % top_level or "", 118 resolved, 119 resolved and "/" or "", 120 document_index or "") 121 122 return self.quote("#".join(t)) 123 124 def translate_qualified_link(self, target): 125 126 """ 127 Translate a possible qualified link 'target', returning a link object 128 retaining a rewritten target and a suitable default label. 129 130 Return None if the link is not suitable. 131 """ 132 133 identifier = target.get_identifier() 134 pagename = target.get_pagename() 135 text = target.get_text() 136 type = target.get_type() 137 138 # Attachment links. 139 140 if type == "attachment": 141 return Link(self.translate_attachment(identifier, pagename), 142 identifier, target) 143 144 # Interwiki links. 145 146 url = self.mapping.get(type) 147 if url: 148 return Link(self.translate_interwiki(url, identifier), 149 identifier or type, target) 150 151 return None 152 153 # Specific link translators. 154 155 def translate_attachment(self, target, pagename): 156 157 """ 158 Return a translation of the given attachment 'target' associated with 159 the given 'pagename'. 160 """ 161 162 common_attachments = self.metadata.get("common_attachments") 163 top_level = self.get_top_level() 164 165 return self.quote("%s%s/%s%s" % (top_level and "%s/" % top_level or "", 166 self.attachments_dir, 167 not common_attachments and "%s/" % pagename or "", 168 target)) 169 170 def translate_interwiki(self, url, target): 171 172 "Return a translation of the given interwiki 'target'." 173 174 return "%s%s" % (self.normalise(url), self.quote(target)) 175 176 # Path encoding. 177 178 def quote(self, s): 179 180 """ 181 Quote URL path 's', preserving path separators and fragment indicators, 182 encoding fragment identifiers. 183 """ 184 185 s = self.replace_whitespace(s) 186 parts = s.split("#", 1) 187 188 if len(parts) > 1: 189 parts[1] = self.make_id(parts[1]) 190 191 return "#".join(map(quote, parts)) 192 193 # Whitespace conversion in pagenames. 194 195 def replace_whitespace(self, pagename): 196 197 "Map whitespace in 'pagename' to appropriate characters." 198 199 wsmap = self.metadata.get("whitespace", self.default_whitespace_map) 200 201 for old, new in wsmap: 202 pagename = pagename.replace(old, new) 203 204 return pagename 205 206 # Identifier encoding. 207 208 def make_id(self, s): 209 210 "Make a suitable identifier for HTML element identification." 211 212 # NOTE: This reproduces the Moin algorithm for compatibility. 213 # NOTE: There may well be improvements possible, possibly by replacing plus 214 # NOTE: with something less cumbersome, even though plus may be unusual in 215 # NOTE: things like headings, anyway. 216 217 # The desired output is the following pattern: 218 219 # [A-Za-z][-_:.A-Za-z0-9]* 220 221 # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an 222 # output range as follows (in addition to A-Za-z0-9): 223 224 # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} 225 226 # The quote_plus function converts space to plus, preserves -_:. and encodes 227 # all other symbols (including original occurrences of plus and percent) and 228 # non-alphanumeric (ASCII) characters using percent encoding. 229 230 # With colons preserved, the resulting output is in the following range 231 # (in addition to A-Za-z0-9): 232 233 # -_:.%+ 234 235 # Percent will only occur as an encoding prefix. Plus will only occur as a 236 # replacement for space. 237 238 # Combining quote_plus and UTF-7 gives the following range (in addition to 239 # A-Za-z0-9): 240 241 # -_:.%+ 242 243 # Examples: 244 245 # UTF-7 quote_plus replace percent and plus 246 # : -> : -> : -> : 247 # - -> - -> - -> - 248 # . -> . -> . -> . 249 # % -> % -> %25 -> .25 250 # + -> +- -> %2B- -> .2B- 251 # _ -> _ -> _ -> _ 252 # space -> space -> + -> _ 253 254 # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode 255 256 quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") 257 258 # Ensure that the identifier starts with an alphabetical character. 259 260 if not quoted[0].isalpha(): 261 return "A%s" % quoted 262 else: 263 return quoted 264 265 linker = HTMLLinker 266 267 # vim: tabstop=4 expandtab shiftwidth=4