paul@91 | 1 | #!/usr/bin/env python |
paul@91 | 2 | |
paul@91 | 3 | """ |
paul@91 | 4 | HTML linking scheme. |
paul@91 | 5 | |
paul@91 | 6 | Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk> |
paul@91 | 7 | |
paul@91 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@91 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@91 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@91 | 11 | version. |
paul@91 | 12 | |
paul@91 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@91 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@91 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@91 | 16 | details. |
paul@91 | 17 | |
paul@91 | 18 | You should have received a copy of the GNU General Public License along with |
paul@91 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@91 | 20 | """ |
paul@91 | 21 | |
paul@91 | 22 | from moinformat.links.common import Linker |
paul@128 | 23 | from urllib import quote, quote_plus |
paul@91 | 24 | from urlparse import urlparse |
paul@91 | 25 | |
paul@91 | 26 | class HTMLLinker(Linker): |
paul@91 | 27 | |
paul@91 | 28 | "Translate Moin links into HTML links." |
paul@91 | 29 | |
paul@91 | 30 | name = "html" |
paul@91 | 31 | |
paul@91 | 32 | def get_top_level(self): |
paul@91 | 33 | |
paul@91 | 34 | "Return a relative link to the top level." |
paul@91 | 35 | |
paul@91 | 36 | levels = self.pagename.count("/") |
paul@91 | 37 | return "/".join([".."] * levels) |
paul@91 | 38 | |
paul@93 | 39 | def is_url(self, target): |
paul@91 | 40 | |
paul@93 | 41 | "Return whether the 'target' references a URL." |
paul@91 | 42 | |
paul@93 | 43 | scheme, host, path, params, query, fragment = urlparse(target) |
paul@93 | 44 | return scheme and target or None |
paul@91 | 45 | |
paul@91 | 46 | def normalise(self, path): |
paul@91 | 47 | |
paul@91 | 48 | "Return a normalised form of 'path'." |
paul@91 | 49 | |
paul@91 | 50 | return not path.endswith("/") and "%s/" % path or path |
paul@91 | 51 | |
paul@93 | 52 | def translate(self, target): |
paul@91 | 53 | |
paul@118 | 54 | """ |
paul@118 | 55 | Translate the 'target', returning a tuple containing the rewritten |
paul@118 | 56 | target string and a suitable default label. |
paul@118 | 57 | """ |
paul@91 | 58 | |
paul@93 | 59 | target = target.rstrip("/") |
paul@91 | 60 | |
paul@138 | 61 | # Fragments. |
paul@138 | 62 | |
paul@138 | 63 | if target.startswith("#"): |
paul@138 | 64 | return self.quote(target), None |
paul@138 | 65 | |
paul@91 | 66 | # Sub-pages. |
paul@91 | 67 | |
paul@138 | 68 | elif target.startswith("/"): |
paul@118 | 69 | return self.translate_subpage(target), None |
paul@91 | 70 | |
paul@91 | 71 | # Sibling (of ancestor) pages. |
paul@91 | 72 | |
paul@93 | 73 | if target.startswith("../"): |
paul@118 | 74 | return self.translate_relative(target), None |
paul@91 | 75 | |
paul@91 | 76 | # Attachment or interwiki link. |
paul@91 | 77 | |
paul@93 | 78 | rewritten = self.translate_qualified_link(target) |
paul@93 | 79 | if rewritten: |
paul@118 | 80 | return rewritten # includes label |
paul@91 | 81 | |
paul@91 | 82 | # Plain URL. |
paul@91 | 83 | |
paul@93 | 84 | rewritten = self.is_url(target) |
paul@93 | 85 | if rewritten: |
paul@118 | 86 | return rewritten, None |
paul@91 | 87 | |
paul@91 | 88 | # Top-level pages. |
paul@91 | 89 | |
paul@93 | 90 | top_level = self.get_top_level() |
paul@128 | 91 | return self.quote("%s%s" % (top_level and "%s/" % top_level or "", target)), None |
paul@91 | 92 | |
paul@93 | 93 | def translate_qualified_link(self, target): |
paul@91 | 94 | |
paul@91 | 95 | """ |
paul@118 | 96 | Translate a possible qualified link 'target', returning a tuple |
paul@118 | 97 | containing a rewritten target and a suitable default label. |
paul@118 | 98 | |
paul@118 | 99 | Return None if the link is not suitable. |
paul@91 | 100 | """ |
paul@91 | 101 | |
paul@91 | 102 | t = target.split(":", 1) |
paul@91 | 103 | if len(t) != 2: |
paul@93 | 104 | return None |
paul@91 | 105 | |
paul@91 | 106 | prefix, target = t |
paul@91 | 107 | |
paul@91 | 108 | # Attachment links. |
paul@91 | 109 | |
paul@91 | 110 | if prefix == "attachment": |
paul@118 | 111 | return self.translate_attachment(target), target |
paul@91 | 112 | |
paul@91 | 113 | # Interwiki links. |
paul@91 | 114 | |
paul@91 | 115 | url = self.mapping.get(prefix) |
paul@91 | 116 | if url: |
paul@118 | 117 | return self.translate_interwiki(url, target), target |
paul@91 | 118 | |
paul@93 | 119 | return None |
paul@91 | 120 | |
paul@91 | 121 | # Specific link translators. |
paul@91 | 122 | |
paul@93 | 123 | def translate_attachment(self, target): |
paul@91 | 124 | |
paul@93 | 125 | "Return a translation of the given attachment 'target'." |
paul@91 | 126 | |
paul@128 | 127 | return self.quote("%sattachments/%s/%s" % ( |
paul@112 | 128 | self.get_top_level(), self.pagename, target)) |
paul@91 | 129 | |
paul@93 | 130 | def translate_interwiki(self, url, target): |
paul@91 | 131 | |
paul@93 | 132 | "Return a translation of the given interwiki 'target'." |
paul@91 | 133 | |
paul@128 | 134 | return "%s%s" % (self.normalise(url), self.quote(target)) |
paul@91 | 135 | |
paul@93 | 136 | def translate_relative(self, target): |
paul@91 | 137 | |
paul@93 | 138 | "Return a translation of the given relative 'target'." |
paul@91 | 139 | |
paul@128 | 140 | return self.quote(target[len("../"):]) |
paul@91 | 141 | |
paul@93 | 142 | def translate_subpage(self, target): |
paul@91 | 143 | |
paul@93 | 144 | "Return a translation of the given subpage 'target'." |
paul@91 | 145 | |
paul@128 | 146 | return self.quote(".%s" % target) |
paul@128 | 147 | |
paul@128 | 148 | # Path encoding. |
paul@128 | 149 | |
paul@128 | 150 | def quote(self, s): |
paul@128 | 151 | |
paul@128 | 152 | """ |
paul@128 | 153 | Quote URL path 's', preserving path separators and fragment indicators, |
paul@128 | 154 | encoding fragment identifiers. |
paul@128 | 155 | """ |
paul@128 | 156 | |
paul@128 | 157 | parts = s.split("#", 1) |
paul@128 | 158 | |
paul@128 | 159 | if len(parts) > 1: |
paul@128 | 160 | parts[1] = self.make_id(parts[1]) |
paul@128 | 161 | |
paul@128 | 162 | return "#".join(map(quote, parts)) |
paul@128 | 163 | |
paul@128 | 164 | # Identifier encoding. |
paul@128 | 165 | |
paul@128 | 166 | def make_id(self, s): |
paul@128 | 167 | |
paul@128 | 168 | "Make a suitable identifier for HTML element identification." |
paul@128 | 169 | |
paul@128 | 170 | # NOTE: This reproduces the Moin algorithm for compatibility. |
paul@128 | 171 | # NOTE: There may well be improvements possible, possibly by replacing plus |
paul@128 | 172 | # NOTE: with something less cumbersome, even though plus may be unusual in |
paul@128 | 173 | # NOTE: things like headings, anyway. |
paul@128 | 174 | |
paul@128 | 175 | # The desired output is the following pattern: |
paul@128 | 176 | |
paul@128 | 177 | # [A-Za-z][-_:.A-Za-z0-9]* |
paul@128 | 178 | |
paul@128 | 179 | # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an |
paul@128 | 180 | # output range as follows (in addition to A-Za-z0-9): |
paul@128 | 181 | |
paul@128 | 182 | # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} |
paul@128 | 183 | |
paul@128 | 184 | # The quote_plus function converts space to plus, preserves -_:. and encodes |
paul@128 | 185 | # all other symbols (including original occurrences of plus and percent) and |
paul@128 | 186 | # non-alphanumeric (ASCII) characters using percent encoding. |
paul@128 | 187 | |
paul@128 | 188 | # With colons preserved, the resulting output is in the following range |
paul@128 | 189 | # (in addition to A-Za-z0-9): |
paul@128 | 190 | |
paul@128 | 191 | # -_:.%+ |
paul@128 | 192 | |
paul@128 | 193 | # Percent will only occur as an encoding prefix. Plus will only occur as a |
paul@128 | 194 | # replacement for space. |
paul@128 | 195 | |
paul@128 | 196 | # Combining quote_plus and UTF-7 gives the following range (in addition to |
paul@128 | 197 | # A-Za-z0-9): |
paul@128 | 198 | |
paul@128 | 199 | # -_:.%+ |
paul@128 | 200 | |
paul@128 | 201 | # Examples: |
paul@128 | 202 | |
paul@128 | 203 | # UTF-7 quote_plus replace percent and plus |
paul@128 | 204 | # : -> : -> : -> : |
paul@128 | 205 | # - -> - -> - -> - |
paul@128 | 206 | # . -> . -> . -> . |
paul@128 | 207 | # % -> % -> %25 -> .25 |
paul@128 | 208 | # + -> +- -> %2B- -> .2B- |
paul@128 | 209 | # _ -> _ -> _ -> _ |
paul@128 | 210 | # space -> space -> + -> _ |
paul@128 | 211 | |
paul@128 | 212 | # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode |
paul@128 | 213 | |
paul@128 | 214 | quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") |
paul@128 | 215 | |
paul@128 | 216 | # Ensure that the identifier starts with an alphabetical character. |
paul@128 | 217 | |
paul@128 | 218 | if not quoted[0].isalpha(): |
paul@128 | 219 | return "A%s" % quoted |
paul@128 | 220 | else: |
paul@128 | 221 | return quoted |
paul@91 | 222 | |
paul@91 | 223 | linker = HTMLLinker |
paul@91 | 224 | |
paul@91 | 225 | # vim: tabstop=4 expandtab shiftwidth=4 |