MoinLight (file moinformat/links/html.py at 6ce4da9da429)

     1 #!/usr/bin/env python     2      3 """     4 HTML linking scheme.     5      6 Copyright (C) 2018, 2019, 2022, 2023 Paul Boddie <paul@boddie.org.uk>     7      8 This program is free software; you can redistribute it and/or modify it under     9 the terms of the GNU General Public License as published by the Free Software    10 Foundation; either version 3 of the License, or (at your option) any later    11 version.    12     13 This program is distributed in the hope that it will be useful, but WITHOUT    14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS    15 FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more    16 details.    17     18 You should have received a copy of the GNU General Public License along with    19 this program.  If not, see <http://www.gnu.org/licenses/>.    20 """    21     22 from moinformat.links.common import Link, Linker, resolve    23 from urllib import quote, quote_plus    24     25 class HTMLLinker(Linker):    26     27     "Translate Moin links into HTML links."    28     29     name = "html"    30     31     def get_top_level(self):    32     33         "Return a relative link to the top level."    34     35         # The root page is at the top level already.    36     37         pagename = self.metadata.get("pagename", "")    38     39         if pagename == self.root_pagename:    40             return ""    41     42         # Siblings of the root page are actually one level below.    43     44         levels = pagename.count("/") + 1    45         return "/".join([".."] * levels)    46     47     def normalise(self, path):    48     49         "Return a normalised form of 'path'."    50     51         return not path.endswith("/") and "%s/" % path or path    52     53     def translate(self, target):    54     55         """    56         Translate the 'target', returning a link object containing the rewritten    57         target and a suitable default label.    58         """    59     60         identifier = target.get_identifier()    61         text = target.get_text()    62         type = target.get_type()    63     64         # Fragments.    65     66         if type == "fragment":    67             return Link(self.quote(text), identifier, target)    68     69         # Sub-pages. Remove the leading slash for the label.    70     71         if type == "sub-page":    72             return Link(self.translate_pagename(text), identifier, target)    73     74         # Sibling (of ancestor) pages.    75     76         if type == "sibling-page":    77             return Link(self.translate_pagename(text), identifier, target)    78     79         # Plain URL.    80     81         if type == "url":    82             return Link(text, identifier, target)    83     84         # Top-level pages.    85     86         if type == "page":    87             return Link(self.translate_pagename(text), identifier, target)    88     89         # Attachment or interwiki link.    90     91         return self.translate_qualified_link(target)    92     93     def translate_pagename(self, text):    94     95         "Translate the pagename in 'text'."    96     97         # Obtain the target pagename and the fragment.    98         # Split the pagename into path components.    99    100         t = text.split("#", 1)   101    102         # Determine the actual pagename referenced.   103         # Replace the root pagename if it appears.   104    105         pagename = self.metadata.get("pagename", "")   106         resolved = resolve(t[0], pagename, self.root_pagename)   107    108         # Rewrite the target using a relative link to the top level and then the   109         # resolved pagename.   110    111         top_level = self.get_top_level()   112    113         # Support an explicit "DocumentIndex" filename for file browsing.   114    115         document_index = self.metadata.get("document_index")   116    117         t[0] = "%s%s%s%s" % (top_level and "%s/" % top_level or "",   118                              resolved,   119                              resolved and "/" or "",   120                              document_index or "")   121    122         return self.quote("#".join(t))   123    124     def translate_qualified_link(self, target):   125    126         """   127         Translate a possible qualified link 'target', returning a link object   128         retaining a rewritten target and a suitable default label.   129    130         Return None if the link is not suitable.   131         """   132    133         identifier = target.get_identifier()   134         pagename = target.get_pagename()   135         text = target.get_text()   136         type = target.get_type()   137    138         # Attachment links.   139    140         if type == "attachment":   141             return Link(self.translate_attachment(identifier, pagename),   142                         identifier, target)   143    144         # Interwiki links.   145    146         url = self.mapping.get(type)   147         if url:   148             return Link(self.translate_interwiki(url, identifier),   149                         identifier or type, target)   150    151         return None   152    153     # Specific link translators.   154    155     def translate_attachment(self, target, pagename):   156    157         """   158         Return a translation of the given attachment 'target' associated with   159         the given 'pagename'.   160         """   161    162         common_attachments = self.metadata.get("common_attachments")   163         top_level = self.get_top_level()   164    165         return self.quote("%s%s/%s%s" % (top_level and "%s/" % top_level or "",   166                                          self.attachments_dir,   167                                          not common_attachments and "%s/" % pagename or "",   168                                          target))   169    170     def translate_interwiki(self, url, target):   171    172         "Return a translation of the given interwiki 'target'."   173    174         return "%s%s" % (self.normalise(url), self.quote(target))   175    176     # Path encoding.   177    178     def quote(self, s):   179    180         """   181         Quote URL path 's', preserving path separators and fragment indicators,   182         encoding fragment identifiers.   183         """   184    185         s = self.replace_whitespace(s)   186         parts = s.split("#", 1)   187    188         if len(parts) > 1:   189             parts[1] = self.make_id(parts[1])   190    191         return "#".join(map(quote, parts))   192    193     # Whitespace conversion in pagenames.   194    195     def replace_whitespace(self, pagename):   196    197         "Map whitespace in 'pagename' to appropriate characters."   198    199         wsmap = self.metadata.get("whitespace", self.default_whitespace_map)   200    201         for old, new in wsmap:   202             pagename = pagename.replace(old, new)   203    204         return pagename   205    206     # Identifier encoding.   207    208     def make_id(self, s):   209    210         "Make a suitable identifier for HTML element identification."   211    212         # NOTE: This reproduces the Moin algorithm for compatibility.   213         # NOTE: There may well be improvements possible, possibly by replacing plus   214         # NOTE: with something less cumbersome, even though plus may be unusual in   215         # NOTE: things like headings, anyway.   216    217         # The desired output is the following pattern:   218    219         # [A-Za-z][-_:.A-Za-z0-9]*   220    221         # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an   222         # output range as follows (in addition to A-Za-z0-9):   223    224         # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|}   225    226         # The quote_plus function converts space to plus, preserves -_:. and encodes   227         # all other symbols (including original occurrences of plus and percent) and   228         # non-alphanumeric (ASCII) characters using percent encoding.   229    230         # With colons preserved, the resulting output is in the following range   231         # (in addition to A-Za-z0-9):   232    233         # -_:.%+   234    235         # Percent will only occur as an encoding prefix. Plus will only occur as a   236         # replacement for space.   237    238         # Combining quote_plus and UTF-7 gives the following range (in addition to   239         # A-Za-z0-9):   240    241         # -_:.%+   242    243         # Examples:   244    245         #          UTF-7         quote_plus    replace percent and plus   246         # :     -> :          -> :          -> :   247         # -     -> -          -> -          -> -   248         # .     -> .          -> .          -> .   249         # %     -> %          -> %25        -> .25   250         # +     -> +-         -> %2B-       -> .2B-   251         # _     -> _          -> _          -> _   252         # space -> space      -> +          -> _   253    254         # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode   255    256         quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_")   257    258         # Ensure that the identifier starts with an alphabetical character.   259    260         if not quoted[0].isalpha():   261             return "A%s" % quoted   262         else:   263             return quoted   264    265 linker = HTMLLinker   266    267 # vim: tabstop=4 expandtab shiftwidth=4