paul@91 | 1 | #!/usr/bin/env python |
paul@91 | 2 | |
paul@91 | 3 | """ |
paul@91 | 4 | HTML linking scheme. |
paul@91 | 5 | |
paul@91 | 6 | Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk> |
paul@91 | 7 | |
paul@91 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@91 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@91 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@91 | 11 | version. |
paul@91 | 12 | |
paul@91 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@91 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@91 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@91 | 16 | details. |
paul@91 | 17 | |
paul@91 | 18 | You should have received a copy of the GNU General Public License along with |
paul@91 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@91 | 20 | """ |
paul@91 | 21 | |
paul@91 | 22 | from moinformat.links.common import Linker |
paul@128 | 23 | from urllib import quote, quote_plus |
paul@91 | 24 | from urlparse import urlparse |
paul@91 | 25 | |
paul@91 | 26 | class HTMLLinker(Linker): |
paul@91 | 27 | |
paul@91 | 28 | "Translate Moin links into HTML links." |
paul@91 | 29 | |
paul@91 | 30 | name = "html" |
paul@91 | 31 | |
paul@91 | 32 | def get_top_level(self): |
paul@91 | 33 | |
paul@91 | 34 | "Return a relative link to the top level." |
paul@91 | 35 | |
paul@91 | 36 | levels = self.pagename.count("/") |
paul@91 | 37 | return "/".join([".."] * levels) |
paul@91 | 38 | |
paul@93 | 39 | def is_url(self, target): |
paul@91 | 40 | |
paul@93 | 41 | "Return whether the 'target' references a URL." |
paul@91 | 42 | |
paul@93 | 43 | scheme, host, path, params, query, fragment = urlparse(target) |
paul@93 | 44 | return scheme and target or None |
paul@91 | 45 | |
paul@91 | 46 | def normalise(self, path): |
paul@91 | 47 | |
paul@91 | 48 | "Return a normalised form of 'path'." |
paul@91 | 49 | |
paul@91 | 50 | return not path.endswith("/") and "%s/" % path or path |
paul@91 | 51 | |
paul@93 | 52 | def translate(self, target): |
paul@91 | 53 | |
paul@118 | 54 | """ |
paul@118 | 55 | Translate the 'target', returning a tuple containing the rewritten |
paul@118 | 56 | target string and a suitable default label. |
paul@118 | 57 | """ |
paul@91 | 58 | |
paul@93 | 59 | target = target.rstrip("/") |
paul@91 | 60 | |
paul@91 | 61 | # Sub-pages. |
paul@91 | 62 | |
paul@91 | 63 | if target.startswith("/"): |
paul@118 | 64 | return self.translate_subpage(target), None |
paul@91 | 65 | |
paul@91 | 66 | # Sibling (of ancestor) pages. |
paul@91 | 67 | |
paul@93 | 68 | if target.startswith("../"): |
paul@118 | 69 | return self.translate_relative(target), None |
paul@91 | 70 | |
paul@91 | 71 | # Attachment or interwiki link. |
paul@91 | 72 | |
paul@93 | 73 | rewritten = self.translate_qualified_link(target) |
paul@93 | 74 | if rewritten: |
paul@118 | 75 | return rewritten # includes label |
paul@91 | 76 | |
paul@91 | 77 | # Plain URL. |
paul@91 | 78 | |
paul@93 | 79 | rewritten = self.is_url(target) |
paul@93 | 80 | if rewritten: |
paul@118 | 81 | return rewritten, None |
paul@91 | 82 | |
paul@91 | 83 | # Top-level pages. |
paul@91 | 84 | |
paul@93 | 85 | top_level = self.get_top_level() |
paul@128 | 86 | return self.quote("%s%s" % (top_level and "%s/" % top_level or "", target)), None |
paul@91 | 87 | |
paul@93 | 88 | def translate_qualified_link(self, target): |
paul@91 | 89 | |
paul@91 | 90 | """ |
paul@118 | 91 | Translate a possible qualified link 'target', returning a tuple |
paul@118 | 92 | containing a rewritten target and a suitable default label. |
paul@118 | 93 | |
paul@118 | 94 | Return None if the link is not suitable. |
paul@91 | 95 | """ |
paul@91 | 96 | |
paul@91 | 97 | t = target.split(":", 1) |
paul@91 | 98 | if len(t) != 2: |
paul@93 | 99 | return None |
paul@91 | 100 | |
paul@91 | 101 | prefix, target = t |
paul@91 | 102 | |
paul@91 | 103 | # Attachment links. |
paul@91 | 104 | |
paul@91 | 105 | if prefix == "attachment": |
paul@118 | 106 | return self.translate_attachment(target), target |
paul@91 | 107 | |
paul@91 | 108 | # Interwiki links. |
paul@91 | 109 | |
paul@91 | 110 | url = self.mapping.get(prefix) |
paul@91 | 111 | if url: |
paul@118 | 112 | return self.translate_interwiki(url, target), target |
paul@91 | 113 | |
paul@93 | 114 | return None |
paul@91 | 115 | |
paul@91 | 116 | # Specific link translators. |
paul@91 | 117 | |
paul@93 | 118 | def translate_attachment(self, target): |
paul@91 | 119 | |
paul@93 | 120 | "Return a translation of the given attachment 'target'." |
paul@91 | 121 | |
paul@128 | 122 | return self.quote("%sattachments/%s/%s" % ( |
paul@112 | 123 | self.get_top_level(), self.pagename, target)) |
paul@91 | 124 | |
paul@93 | 125 | def translate_interwiki(self, url, target): |
paul@91 | 126 | |
paul@93 | 127 | "Return a translation of the given interwiki 'target'." |
paul@91 | 128 | |
paul@128 | 129 | return "%s%s" % (self.normalise(url), self.quote(target)) |
paul@91 | 130 | |
paul@93 | 131 | def translate_relative(self, target): |
paul@91 | 132 | |
paul@93 | 133 | "Return a translation of the given relative 'target'." |
paul@91 | 134 | |
paul@128 | 135 | return self.quote(target[len("../"):]) |
paul@91 | 136 | |
paul@93 | 137 | def translate_subpage(self, target): |
paul@91 | 138 | |
paul@93 | 139 | "Return a translation of the given subpage 'target'." |
paul@91 | 140 | |
paul@128 | 141 | return self.quote(".%s" % target) |
paul@128 | 142 | |
paul@128 | 143 | # Path encoding. |
paul@128 | 144 | |
paul@128 | 145 | def quote(self, s): |
paul@128 | 146 | |
paul@128 | 147 | """ |
paul@128 | 148 | Quote URL path 's', preserving path separators and fragment indicators, |
paul@128 | 149 | encoding fragment identifiers. |
paul@128 | 150 | """ |
paul@128 | 151 | |
paul@128 | 152 | parts = s.split("#", 1) |
paul@128 | 153 | |
paul@128 | 154 | if len(parts) > 1: |
paul@128 | 155 | parts[1] = self.make_id(parts[1]) |
paul@128 | 156 | |
paul@128 | 157 | return "#".join(map(quote, parts)) |
paul@128 | 158 | |
paul@128 | 159 | # Identifier encoding. |
paul@128 | 160 | |
paul@128 | 161 | def make_id(self, s): |
paul@128 | 162 | |
paul@128 | 163 | "Make a suitable identifier for HTML element identification." |
paul@128 | 164 | |
paul@128 | 165 | # NOTE: This reproduces the Moin algorithm for compatibility. |
paul@128 | 166 | # NOTE: There may well be improvements possible, possibly by replacing plus |
paul@128 | 167 | # NOTE: with something less cumbersome, even though plus may be unusual in |
paul@128 | 168 | # NOTE: things like headings, anyway. |
paul@128 | 169 | |
paul@128 | 170 | # The desired output is the following pattern: |
paul@128 | 171 | |
paul@128 | 172 | # [A-Za-z][-_:.A-Za-z0-9]* |
paul@128 | 173 | |
paul@128 | 174 | # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an |
paul@128 | 175 | # output range as follows (in addition to A-Za-z0-9): |
paul@128 | 176 | |
paul@128 | 177 | # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} |
paul@128 | 178 | |
paul@128 | 179 | # The quote_plus function converts space to plus, preserves -_:. and encodes |
paul@128 | 180 | # all other symbols (including original occurrences of plus and percent) and |
paul@128 | 181 | # non-alphanumeric (ASCII) characters using percent encoding. |
paul@128 | 182 | |
paul@128 | 183 | # With colons preserved, the resulting output is in the following range |
paul@128 | 184 | # (in addition to A-Za-z0-9): |
paul@128 | 185 | |
paul@128 | 186 | # -_:.%+ |
paul@128 | 187 | |
paul@128 | 188 | # Percent will only occur as an encoding prefix. Plus will only occur as a |
paul@128 | 189 | # replacement for space. |
paul@128 | 190 | |
paul@128 | 191 | # Combining quote_plus and UTF-7 gives the following range (in addition to |
paul@128 | 192 | # A-Za-z0-9): |
paul@128 | 193 | |
paul@128 | 194 | # -_:.%+ |
paul@128 | 195 | |
paul@128 | 196 | # Examples: |
paul@128 | 197 | |
paul@128 | 198 | # UTF-7 quote_plus replace percent and plus |
paul@128 | 199 | # : -> : -> : -> : |
paul@128 | 200 | # - -> - -> - -> - |
paul@128 | 201 | # . -> . -> . -> . |
paul@128 | 202 | # % -> % -> %25 -> .25 |
paul@128 | 203 | # + -> +- -> %2B- -> .2B- |
paul@128 | 204 | # _ -> _ -> _ -> _ |
paul@128 | 205 | # space -> space -> + -> _ |
paul@128 | 206 | |
paul@128 | 207 | # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode |
paul@128 | 208 | |
paul@128 | 209 | quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") |
paul@128 | 210 | |
paul@128 | 211 | # Ensure that the identifier starts with an alphabetical character. |
paul@128 | 212 | |
paul@128 | 213 | if not quoted[0].isalpha(): |
paul@128 | 214 | return "A%s" % quoted |
paul@128 | 215 | else: |
paul@128 | 216 | return quoted |
paul@91 | 217 | |
paul@91 | 218 | linker = HTMLLinker |
paul@91 | 219 | |
paul@91 | 220 | # vim: tabstop=4 expandtab shiftwidth=4 |