paul@38 | 1 | #!/usr/bin/env python |
paul@38 | 2 | |
paul@38 | 3 | """ |
paul@38 | 4 | Moin serialiser support. |
paul@38 | 5 | |
paul@46 | 6 | Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk> |
paul@38 | 7 | |
paul@38 | 8 | This program is free software; you can redistribute it and/or modify it under |
paul@38 | 9 | the terms of the GNU General Public License as published by the Free Software |
paul@38 | 10 | Foundation; either version 3 of the License, or (at your option) any later |
paul@38 | 11 | version. |
paul@38 | 12 | |
paul@38 | 13 | This program is distributed in the hope that it will be useful, but WITHOUT |
paul@38 | 14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
paul@38 | 15 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more |
paul@38 | 16 | details. |
paul@38 | 17 | |
paul@38 | 18 | You should have received a copy of the GNU General Public License along with |
paul@38 | 19 | this program. If not, see <http://www.gnu.org/licenses/>. |
paul@38 | 20 | """ |
paul@38 | 21 | |
paul@113 | 22 | from urllib import quote_plus |
paul@113 | 23 | |
paul@38 | 24 | class Serialiser: |
paul@38 | 25 | |
paul@38 | 26 | "General serialisation support." |
paul@38 | 27 | |
paul@85 | 28 | format = None # defined by subclasses |
paul@85 | 29 | |
paul@100 | 30 | def __init__(self, output, formats=None, linker=None): |
paul@46 | 31 | |
paul@46 | 32 | """ |
paul@100 | 33 | Initialise the serialiser with an 'output' context, an optional |
paul@100 | 34 | 'formats' mapping from names to serialiser classes, and an optional |
paul@100 | 35 | 'linker' object for translating links. |
paul@46 | 36 | """ |
paul@46 | 37 | |
paul@100 | 38 | self.output = output |
paul@46 | 39 | self.formats = formats |
paul@94 | 40 | self.linker = linker |
paul@100 | 41 | |
paul@100 | 42 | # Initialise a callable for use in serialisation. |
paul@100 | 43 | |
paul@100 | 44 | self.out = output.out |
paul@100 | 45 | |
paul@100 | 46 | # Initialisation of any other state. |
paul@100 | 47 | |
paul@39 | 48 | self.init() |
paul@39 | 49 | |
paul@39 | 50 | def init(self): |
paul@39 | 51 | |
paul@39 | 52 | "Initialisation method to be overridden by subclasses." |
paul@39 | 53 | |
paul@39 | 54 | pass |
paul@38 | 55 | |
paul@46 | 56 | def __repr__(self): |
paul@100 | 57 | return "%s(%r, %r, %r)" % (self.__class__.__name__, self.output, |
paul@94 | 58 | self.formats, self.linker) |
paul@46 | 59 | |
paul@100 | 60 | def get_serialiser(self, format): |
paul@100 | 61 | |
paul@100 | 62 | """ |
paul@100 | 63 | Return a serialiser for the given 'format'. Return self if no suitable |
paul@100 | 64 | serialiser can be obtained. |
paul@100 | 65 | """ |
paul@100 | 66 | |
paul@100 | 67 | cls = self.formats and self.formats.get(format) |
paul@100 | 68 | if cls: |
paul@100 | 69 | return self.instantiate(cls) |
paul@100 | 70 | else: |
paul@100 | 71 | return self |
paul@100 | 72 | |
paul@100 | 73 | def get_output(self): |
paul@100 | 74 | |
paul@100 | 75 | "Return the output as a string." |
paul@100 | 76 | |
paul@100 | 77 | return self.output.to_string() |
paul@100 | 78 | |
paul@100 | 79 | def instantiate(self, cls): |
paul@100 | 80 | |
paul@100 | 81 | """ |
paul@100 | 82 | Instantiate 'cls' and return the result if 'cls' is a different class to |
paul@100 | 83 | this instance. Otherwise, return this instance. |
paul@100 | 84 | """ |
paul@100 | 85 | |
paul@100 | 86 | if cls is self.__class__: |
paul@100 | 87 | return self |
paul@100 | 88 | else: |
paul@100 | 89 | return cls(self.output, self.formats, self.linker) |
paul@100 | 90 | |
paul@38 | 91 | def escape_attr(s): |
paul@38 | 92 | |
paul@38 | 93 | "Escape XML document attribute." |
paul@38 | 94 | |
paul@38 | 95 | return escape_text(s).replace("'", "'").replace('"', """) |
paul@38 | 96 | |
paul@38 | 97 | def escape_text(s): |
paul@38 | 98 | |
paul@38 | 99 | "Escape XML document text." |
paul@38 | 100 | |
paul@38 | 101 | return s.replace("&", "&").replace("<", "<").replace(">", ">") |
paul@38 | 102 | |
paul@113 | 103 | def make_id(s): |
paul@113 | 104 | |
paul@113 | 105 | "Make a suitable identifier for XML element identification." |
paul@113 | 106 | |
paul@113 | 107 | # NOTE: This reproduces the Moin algorithm for compatibility. |
paul@113 | 108 | # NOTE: There may well be improvements possible, possibly by replacing plus |
paul@113 | 109 | # NOTE: with something less cumbersome, even though plus may be unusual in |
paul@113 | 110 | # NOTE: things like headings, anyway. |
paul@113 | 111 | |
paul@113 | 112 | # The desired output is the following pattern: |
paul@113 | 113 | |
paul@113 | 114 | # [A-Za-z][-_:.A-Za-z0-9]* |
paul@113 | 115 | |
paul@113 | 116 | # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an |
paul@113 | 117 | # output range as follows (in addition to A-Za-z0-9): |
paul@113 | 118 | |
paul@113 | 119 | # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} |
paul@113 | 120 | |
paul@113 | 121 | # The quote_plus function converts space to plus, preserves -_:. and encodes |
paul@113 | 122 | # all other symbols (including original occurrences of plus and percent) and |
paul@113 | 123 | # non-alphanumeric (ASCII) characters using percent encoding. |
paul@113 | 124 | |
paul@113 | 125 | # With colons preserved, the resulting output is in the following range |
paul@113 | 126 | # (in addition to A-Za-z0-9): |
paul@113 | 127 | |
paul@113 | 128 | # -_:.%+ |
paul@113 | 129 | |
paul@113 | 130 | # Percent will only occur as an encoding prefix. Plus will only occur as a |
paul@113 | 131 | # replacement for space. |
paul@113 | 132 | |
paul@113 | 133 | # Combining quote_plus and UTF-7 gives the following range (in addition to |
paul@113 | 134 | # A-Za-z0-9): |
paul@113 | 135 | |
paul@113 | 136 | # -_:.%+ |
paul@113 | 137 | |
paul@113 | 138 | # Examples: |
paul@113 | 139 | |
paul@113 | 140 | # UTF-7 quote_plus replace percent and plus |
paul@113 | 141 | # : -> : -> : -> : |
paul@113 | 142 | # - -> - -> - -> - |
paul@113 | 143 | # . -> . -> . -> . |
paul@113 | 144 | # % -> % -> %25 -> .25 |
paul@113 | 145 | # + -> +- -> %2B- -> .2B- |
paul@113 | 146 | # _ -> _ -> _ -> _ |
paul@113 | 147 | # space -> space -> + -> _ |
paul@113 | 148 | |
paul@113 | 149 | # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode |
paul@113 | 150 | |
paul@113 | 151 | quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") |
paul@113 | 152 | |
paul@113 | 153 | # Ensure that the identifier starts with an alphabetical character. |
paul@113 | 154 | |
paul@113 | 155 | if not quoted[0].isalpha(): |
paul@113 | 156 | return "A%s" % quoted |
paul@113 | 157 | else: |
paul@113 | 158 | return quoted |
paul@113 | 159 | |
paul@38 | 160 | # vim: tabstop=4 expandtab shiftwidth=4 |