1 #!/usr/bin/env python 2 3 """ 4 Moin serialiser support. 5 6 Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 from urllib import quote_plus 23 24 class Serialiser: 25 26 "General serialisation support." 27 28 format = None # defined by subclasses 29 30 def __init__(self, output, formats=None, linker=None): 31 32 """ 33 Initialise the serialiser with an 'output' context, an optional 34 'formats' mapping from names to serialiser classes, and an optional 35 'linker' object for translating links. 36 """ 37 38 self.output = output 39 self.formats = formats 40 self.linker = linker 41 42 # Initialise a callable for use in serialisation. 43 44 self.out = output.out 45 46 # Initialisation of any other state. 47 48 self.init() 49 50 def init(self): 51 52 "Initialisation method to be overridden by subclasses." 53 54 pass 55 56 def __repr__(self): 57 return "%s(%r, %r, %r)" % (self.__class__.__name__, self.output, 58 self.formats, self.linker) 59 60 def get_serialiser(self, format): 61 62 """ 63 Return a serialiser for the given 'format'. Return self if no suitable 64 serialiser can be obtained. 65 """ 66 67 cls = self.formats and self.formats.get(format) 68 if cls: 69 return self.instantiate(cls) 70 else: 71 return self 72 73 def get_output(self): 74 75 "Return the output as a string." 76 77 return self.output.to_string() 78 79 def instantiate(self, cls): 80 81 """ 82 Instantiate 'cls' and return the result if 'cls' is a different class to 83 this instance. Otherwise, return this instance. 84 """ 85 86 if cls is self.__class__: 87 return self 88 else: 89 return cls(self.output, self.formats, self.linker) 90 91 def escape_attr(s): 92 93 "Escape XML document attribute." 94 95 return escape_text(s).replace("'", "'").replace('"', """) 96 97 def escape_text(s): 98 99 "Escape XML document text." 100 101 return s.replace("&", "&").replace("<", "<").replace(">", ">") 102 103 def make_id(s): 104 105 "Make a suitable identifier for XML element identification." 106 107 # NOTE: This reproduces the Moin algorithm for compatibility. 108 # NOTE: There may well be improvements possible, possibly by replacing plus 109 # NOTE: with something less cumbersome, even though plus may be unusual in 110 # NOTE: things like headings, anyway. 111 112 # The desired output is the following pattern: 113 114 # [A-Za-z][-_:.A-Za-z0-9]* 115 116 # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an 117 # output range as follows (in addition to A-Za-z0-9): 118 119 # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} 120 121 # The quote_plus function converts space to plus, preserves -_:. and encodes 122 # all other symbols (including original occurrences of plus and percent) and 123 # non-alphanumeric (ASCII) characters using percent encoding. 124 125 # With colons preserved, the resulting output is in the following range 126 # (in addition to A-Za-z0-9): 127 128 # -_:.%+ 129 130 # Percent will only occur as an encoding prefix. Plus will only occur as a 131 # replacement for space. 132 133 # Combining quote_plus and UTF-7 gives the following range (in addition to 134 # A-Za-z0-9): 135 136 # -_:.%+ 137 138 # Examples: 139 140 # UTF-7 quote_plus replace percent and plus 141 # : -> : -> : -> : 142 # - -> - -> - -> - 143 # . -> . -> . -> . 144 # % -> % -> %25 -> .25 145 # + -> +- -> %2B- -> .2B- 146 # _ -> _ -> _ -> _ 147 # space -> space -> + -> _ 148 149 # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode 150 151 quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") 152 153 # Ensure that the identifier starts with an alphabetical character. 154 155 if not quoted[0].isalpha(): 156 return "A%s" % quoted 157 else: 158 return quoted 159 160 # vim: tabstop=4 expandtab shiftwidth=4