MoinLight (file moinformat/links/html.py at ea5b599f5155)

     1 #!/usr/bin/env python     2      3 """     4 HTML linking scheme.     5      6 Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.links.common import Linker    23 from urllib import quote, quote_plus    24 from urlparse import urlparse    25     26 class HTMLLinker(Linker):    27     28     "Translate Moin links into HTML links."    29     30     name = "html"    31     32     def get_top_level(self):    33     34         "Return a relative link to the top level."    35     36         levels = self.pagename.count("/")    37         return "/".join([".."] * levels)    38     39     def is_url(self, target):    40     41         "Return whether the 'target' references a URL."    42     43         scheme, host, path, params, query, fragment = urlparse(target)    44         return scheme and target or None    45     46     def normalise(self, path):    47     48         "Return a normalised form of 'path'."    49     50         return not path.endswith("/") and "%s/" % path or path    51     52     def translate(self, target):    53     54         """    55         Translate the 'target', returning a tuple containing the rewritten    56         target string and a suitable default label.    57         """    58     59         target = target.rstrip("/")    60     61         # Fragments.    62     63         if target.startswith("#"):    64             return self.quote(target), None    65     66         # Sub-pages.    67     68         elif target.startswith("/"):    69             return self.translate_subpage(target), None    70     71         # Sibling (of ancestor) pages.    72     73         if target.startswith("../"):    74             return self.translate_relative(target), None    75     76         # Attachment or interwiki link.    77     78         rewritten = self.translate_qualified_link(target)    79         if rewritten:    80             return rewritten # includes label    81     82         # Plain URL.    83     84         rewritten = self.is_url(target)    85         if rewritten:    86             return rewritten, None    87     88         # Top-level pages.    89     90         top_level = self.get_top_level()    91         return self.quote("%s%s" % (top_level and "%s/" % top_level or "", target)), None    92     93     def translate_qualified_link(self, target):    94     95         """    96         Translate a possible qualified link 'target', returning a tuple    97         containing a rewritten target and a suitable default label.    98     99         Return None if the link is not suitable.   100         """   101    102         t = target.split(":", 1)   103         if len(t) != 2:   104             return None   105    106         prefix, target = t   107    108         # Attachment links.   109    110         if prefix == "attachment":   111             return self.translate_attachment(target), target   112    113         # Interwiki links.   114    115         url = self.mapping.get(prefix)   116         if url:   117             return self.translate_interwiki(url, target), target   118    119         return None   120    121     # Specific link translators.   122    123     def translate_attachment(self, target):   124    125         "Return a translation of the given attachment 'target'."   126    127         return self.quote("%sattachments/%s/%s" % (   128             self.get_top_level(), self.pagename, target))   129    130     def translate_interwiki(self, url, target):   131    132         "Return a translation of the given interwiki 'target'."   133    134         return "%s%s" % (self.normalise(url), self.quote(target))   135    136     def translate_relative(self, target):   137    138         "Return a translation of the given relative 'target'."   139    140         return self.quote(target[len("../"):])   141    142     def translate_subpage(self, target):   143    144         "Return a translation of the given subpage 'target'."   145    146         return self.quote(".%s" % target)   147    148     # Path encoding.   149    150     def quote(self, s):   151    152         """   153         Quote URL path 's', preserving path separators and fragment indicators,   154         encoding fragment identifiers.   155         """   156    157         parts = s.split("#", 1)   158    159         if len(parts) > 1:   160             parts[1] = self.make_id(parts[1])   161    162         return "#".join(map(quote, parts))   163    164     # Identifier encoding.   165    166     def make_id(self, s):   167    168         "Make a suitable identifier for HTML element identification."   169    170         # NOTE: This reproduces the Moin algorithm for compatibility.   171         # NOTE: There may well be improvements possible, possibly by replacing plus   172         # NOTE: with something less cumbersome, even though plus may be unusual in   173         # NOTE: things like headings, anyway.   174    175         # The desired output is the following pattern:   176    177         # [A-Za-z][-_:.A-Za-z0-9]*   178    179         # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an   180         # output range as follows (in addition to A-Za-z0-9):   181    182         # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|}   183    184         # The quote_plus function converts space to plus, preserves -_:. and encodes   185         # all other symbols (including original occurrences of plus and percent) and   186         # non-alphanumeric (ASCII) characters using percent encoding.   187    188         # With colons preserved, the resulting output is in the following range   189         # (in addition to A-Za-z0-9):   190    191         # -_:.%+   192    193         # Percent will only occur as an encoding prefix. Plus will only occur as a   194         # replacement for space.   195    196         # Combining quote_plus and UTF-7 gives the following range (in addition to   197         # A-Za-z0-9):   198    199         # -_:.%+   200    201         # Examples:   202    203         #          UTF-7         quote_plus    replace percent and plus   204         # :     -> :          -> :          -> :   205         # -     -> -          -> -          -> -   206         # .     -> .          -> .          -> .   207         # %     -> %          -> %25        -> .25   208         # +     -> +-         -> %2B-       -> .2B-   209         # _     -> _          -> _          -> _   210         # space -> space      -> +          -> _   211    212         # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode   213    214         quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_")   215    216         # Ensure that the identifier starts with an alphabetical character.   217    218         if not quoted[0].isalpha():   219             return "A%s" % quoted   220         else:   221             return quoted   222    223 linker = HTMLLinker   224    225 # vim: tabstop=4 expandtab shiftwidth=4