paul@91 | 1 | #!/usr/bin/env python |
paul@91 | 2 | |
paul@91 | 3 | """ |
paul@91 | 4 | HTML linking scheme. |
paul@91 | 5 | |
paul@91 | 6 | Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk> |
paul@91 | 7 | |
paul@91 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@91 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@91 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@91 | 11 | version. |
paul@91 | 12 | |
paul@91 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@91 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@91 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@91 | 16 | details. |
paul@91 | 17 | |
paul@91 | 18 | You should have received a copy of the GNU General Public License along with |
paul@91 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@91 | 20 | """ |
paul@91 | 21 | |
paul@159 | 22 | from moinformat.links.common import Linker, resolve |
paul@128 | 23 | from urllib import quote, quote_plus |
paul@91 | 24 | from urlparse import urlparse |
paul@91 | 25 | |
paul@91 | 26 | class HTMLLinker(Linker): |
paul@91 | 27 | |
paul@91 | 28 | "Translate Moin links into HTML links." |
paul@91 | 29 | |
paul@91 | 30 | name = "html" |
paul@91 | 31 | |
paul@91 | 32 | def get_top_level(self): |
paul@91 | 33 | |
paul@91 | 34 | "Return a relative link to the top level." |
paul@91 | 35 | |
paul@159 | 36 | # The root page is at the top level already. |
paul@159 | 37 | |
paul@159 | 38 | if self.pagename == self.root_pagename: |
paul@159 | 39 | return "" |
paul@159 | 40 | |
paul@159 | 41 | # Siblings of the root page are actually one level below. |
paul@159 | 42 | |
paul@159 | 43 | levels = self.pagename.count("/") + 1 |
paul@91 | 44 | return "/".join([".."] * levels) |
paul@91 | 45 | |
paul@93 | 46 | def is_url(self, target): |
paul@91 | 47 | |
paul@93 | 48 | "Return whether the 'target' references a URL." |
paul@91 | 49 | |
paul@93 | 50 | scheme, host, path, params, query, fragment = urlparse(target) |
paul@93 | 51 | return scheme and target or None |
paul@91 | 52 | |
paul@91 | 53 | def normalise(self, path): |
paul@91 | 54 | |
paul@91 | 55 | "Return a normalised form of 'path'." |
paul@91 | 56 | |
paul@91 | 57 | return not path.endswith("/") and "%s/" % path or path |
paul@91 | 58 | |
paul@93 | 59 | def translate(self, target): |
paul@91 | 60 | |
paul@118 | 61 | """ |
paul@118 | 62 | Translate the 'target', returning a tuple containing the rewritten |
paul@118 | 63 | target string and a suitable default label. |
paul@118 | 64 | """ |
paul@91 | 65 | |
paul@93 | 66 | target = target.rstrip("/") |
paul@91 | 67 | |
paul@157 | 68 | # Fragments. Remove the leading hash for the label. |
paul@138 | 69 | |
paul@138 | 70 | if target.startswith("#"): |
paul@157 | 71 | return self.quote(target), target.lstrip("#") |
paul@138 | 72 | |
paul@150 | 73 | # Sub-pages. Remove the leading slash for the label. |
paul@91 | 74 | |
paul@159 | 75 | if target.startswith("/"): |
paul@159 | 76 | return self.translate_pagename(target), target.lstrip("/") |
paul@91 | 77 | |
paul@91 | 78 | # Sibling (of ancestor) pages. |
paul@91 | 79 | |
paul@93 | 80 | if target.startswith("../"): |
paul@159 | 81 | return self.translate_pagename(target), None |
paul@91 | 82 | |
paul@91 | 83 | # Attachment or interwiki link. |
paul@91 | 84 | |
paul@93 | 85 | rewritten = self.translate_qualified_link(target) |
paul@93 | 86 | if rewritten: |
paul@118 | 87 | return rewritten # includes label |
paul@91 | 88 | |
paul@91 | 89 | # Plain URL. |
paul@91 | 90 | |
paul@93 | 91 | rewritten = self.is_url(target) |
paul@93 | 92 | if rewritten: |
paul@118 | 93 | return rewritten, None |
paul@91 | 94 | |
paul@91 | 95 | # Top-level pages. |
paul@91 | 96 | |
paul@159 | 97 | return self.translate_pagename(target), None |
paul@159 | 98 | |
paul@159 | 99 | def translate_pagename(self, target): |
paul@159 | 100 | |
paul@159 | 101 | "Translate the pagename in 'target'." |
paul@159 | 102 | |
paul@159 | 103 | # Obtain the target pagename and the fragment. |
paul@159 | 104 | # Split the pagename into path components. |
paul@159 | 105 | |
paul@159 | 106 | t = target.split("#", 1) |
paul@159 | 107 | p = t[0].rstrip("/").split("/") |
paul@159 | 108 | |
paul@159 | 109 | # Determine the actual pagename referenced. |
paul@159 | 110 | # Replace the root pagename if it appears. |
paul@159 | 111 | |
paul@159 | 112 | resolved = resolve(t[0], self.pagename, self.root_pagename) |
paul@159 | 113 | |
paul@159 | 114 | # Rewrite the target using a relative link to the top level and then the |
paul@159 | 115 | # resolved pagename. |
paul@159 | 116 | |
paul@93 | 117 | top_level = self.get_top_level() |
paul@159 | 118 | t[0] = "%s%s" % (top_level and "%s/" % top_level or "", resolved) |
paul@159 | 119 | |
paul@159 | 120 | return self.quote("#".join(t)) |
paul@91 | 121 | |
paul@93 | 122 | def translate_qualified_link(self, target): |
paul@91 | 123 | |
paul@91 | 124 | """ |
paul@118 | 125 | Translate a possible qualified link 'target', returning a tuple |
paul@118 | 126 | containing a rewritten target and a suitable default label. |
paul@118 | 127 | |
paul@118 | 128 | Return None if the link is not suitable. |
paul@91 | 129 | """ |
paul@91 | 130 | |
paul@91 | 131 | t = target.split(":", 1) |
paul@91 | 132 | if len(t) != 2: |
paul@93 | 133 | return None |
paul@91 | 134 | |
paul@91 | 135 | prefix, target = t |
paul@91 | 136 | |
paul@91 | 137 | # Attachment links. |
paul@91 | 138 | |
paul@91 | 139 | if prefix == "attachment": |
paul@118 | 140 | return self.translate_attachment(target), target |
paul@91 | 141 | |
paul@91 | 142 | # Interwiki links. |
paul@91 | 143 | |
paul@91 | 144 | url = self.mapping.get(prefix) |
paul@91 | 145 | if url: |
paul@118 | 146 | return self.translate_interwiki(url, target), target |
paul@91 | 147 | |
paul@93 | 148 | return None |
paul@91 | 149 | |
paul@91 | 150 | # Specific link translators. |
paul@91 | 151 | |
paul@93 | 152 | def translate_attachment(self, target): |
paul@91 | 153 | |
paul@93 | 154 | "Return a translation of the given attachment 'target'." |
paul@91 | 155 | |
paul@144 | 156 | return self.quote("./attachments/%s" % target) |
paul@91 | 157 | |
paul@93 | 158 | def translate_interwiki(self, url, target): |
paul@91 | 159 | |
paul@93 | 160 | "Return a translation of the given interwiki 'target'." |
paul@91 | 161 | |
paul@128 | 162 | return "%s%s" % (self.normalise(url), self.quote(target)) |
paul@91 | 163 | |
paul@128 | 164 | # Path encoding. |
paul@128 | 165 | |
paul@128 | 166 | def quote(self, s): |
paul@128 | 167 | |
paul@128 | 168 | """ |
paul@128 | 169 | Quote URL path 's', preserving path separators and fragment indicators, |
paul@128 | 170 | encoding fragment identifiers. |
paul@128 | 171 | """ |
paul@128 | 172 | |
paul@128 | 173 | parts = s.split("#", 1) |
paul@128 | 174 | |
paul@128 | 175 | if len(parts) > 1: |
paul@128 | 176 | parts[1] = self.make_id(parts[1]) |
paul@128 | 177 | |
paul@128 | 178 | return "#".join(map(quote, parts)) |
paul@128 | 179 | |
paul@128 | 180 | # Identifier encoding. |
paul@128 | 181 | |
paul@128 | 182 | def make_id(self, s): |
paul@128 | 183 | |
paul@128 | 184 | "Make a suitable identifier for HTML element identification." |
paul@128 | 185 | |
paul@128 | 186 | # NOTE: This reproduces the Moin algorithm for compatibility. |
paul@128 | 187 | # NOTE: There may well be improvements possible, possibly by replacing plus |
paul@128 | 188 | # NOTE: with something less cumbersome, even though plus may be unusual in |
paul@128 | 189 | # NOTE: things like headings, anyway. |
paul@128 | 190 | |
paul@128 | 191 | # The desired output is the following pattern: |
paul@128 | 192 | |
paul@128 | 193 | # [A-Za-z][-_:.A-Za-z0-9]* |
paul@128 | 194 | |
paul@128 | 195 | # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an |
paul@128 | 196 | # output range as follows (in addition to A-Za-z0-9): |
paul@128 | 197 | |
paul@128 | 198 | # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} |
paul@128 | 199 | |
paul@128 | 200 | # The quote_plus function converts space to plus, preserves -_:. and encodes |
paul@128 | 201 | # all other symbols (including original occurrences of plus and percent) and |
paul@128 | 202 | # non-alphanumeric (ASCII) characters using percent encoding. |
paul@128 | 203 | |
paul@128 | 204 | # With colons preserved, the resulting output is in the following range |
paul@128 | 205 | # (in addition to A-Za-z0-9): |
paul@128 | 206 | |
paul@128 | 207 | # -_:.%+ |
paul@128 | 208 | |
paul@128 | 209 | # Percent will only occur as an encoding prefix. Plus will only occur as a |
paul@128 | 210 | # replacement for space. |
paul@128 | 211 | |
paul@128 | 212 | # Combining quote_plus and UTF-7 gives the following range (in addition to |
paul@128 | 213 | # A-Za-z0-9): |
paul@128 | 214 | |
paul@128 | 215 | # -_:.%+ |
paul@128 | 216 | |
paul@128 | 217 | # Examples: |
paul@128 | 218 | |
paul@128 | 219 | # UTF-7 quote_plus replace percent and plus |
paul@128 | 220 | # : -> : -> : -> : |
paul@128 | 221 | # - -> - -> - -> - |
paul@128 | 222 | # . -> . -> . -> . |
paul@128 | 223 | # % -> % -> %25 -> .25 |
paul@128 | 224 | # + -> +- -> %2B- -> .2B- |
paul@128 | 225 | # _ -> _ -> _ -> _ |
paul@128 | 226 | # space -> space -> + -> _ |
paul@128 | 227 | |
paul@128 | 228 | # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode |
paul@128 | 229 | |
paul@128 | 230 | quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") |
paul@128 | 231 | |
paul@128 | 232 | # Ensure that the identifier starts with an alphabetical character. |
paul@128 | 233 | |
paul@128 | 234 | if not quoted[0].isalpha(): |
paul@128 | 235 | return "A%s" % quoted |
paul@128 | 236 | else: |
paul@128 | 237 | return quoted |
paul@91 | 238 | |
paul@91 | 239 | linker = HTMLLinker |
paul@91 | 240 | |
paul@91 | 241 | # vim: tabstop=4 expandtab shiftwidth=4 |