# HG changeset patch # User Paul Boddie # Date 1533051568 -7200 # Node ID f9321fa1c1de18a55a6f108fcda651741564cf25 # Parent 6f40d4d5320a82e6c769cd99d9489245097b470c Introduced identifiers for headings to permit direct linking. diff -r 6f40d4d5320a -r f9321fa1c1de moinformat/serialisers/common.py --- a/moinformat/serialisers/common.py Tue Jul 31 17:38:45 2018 +0200 +++ b/moinformat/serialisers/common.py Tue Jul 31 17:39:28 2018 +0200 @@ -19,6 +19,8 @@ this program. If not, see . """ +from urllib import quote_plus + class Serialiser: "General serialisation support." @@ -98,4 +100,61 @@ return s.replace("&", "&").replace("<", "<").replace(">", ">") +def make_id(s): + + "Make a suitable identifier for XML element identification." + + # NOTE: This reproduces the Moin algorithm for compatibility. + # NOTE: There may well be improvements possible, possibly by replacing plus + # NOTE: with something less cumbersome, even though plus may be unusual in + # NOTE: things like headings, anyway. + + # The desired output is the following pattern: + + # [A-Za-z][-_:.A-Za-z0-9]* + + # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an + # output range as follows (in addition to A-Za-z0-9): + + # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|} + + # The quote_plus function converts space to plus, preserves -_:. and encodes + # all other symbols (including original occurrences of plus and percent) and + # non-alphanumeric (ASCII) characters using percent encoding. + + # With colons preserved, the resulting output is in the following range + # (in addition to A-Za-z0-9): + + # -_:.%+ + + # Percent will only occur as an encoding prefix. Plus will only occur as a + # replacement for space. + + # Combining quote_plus and UTF-7 gives the following range (in addition to + # A-Za-z0-9): + + # -_:.%+ + + # Examples: + + # UTF-7 quote_plus replace percent and plus + # : -> : -> : -> : + # - -> - -> - -> - + # . -> . -> . -> . + # % -> % -> %25 -> .25 + # + -> +- -> %2B- -> .2B- + # _ -> _ -> _ -> _ + # space -> space -> + -> _ + + # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode + + quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_") + + # Ensure that the identifier starts with an alphabetical character. + + if not quoted[0].isalpha(): + return "A%s" % quoted + else: + return quoted + # vim: tabstop=4 expandtab shiftwidth=4 diff -r 6f40d4d5320a -r f9321fa1c1de moinformat/serialisers/html/moin.py --- a/moinformat/serialisers/html/moin.py Tue Jul 31 17:38:45 2018 +0200 +++ b/moinformat/serialisers/html/moin.py Tue Jul 31 17:39:28 2018 +0200 @@ -19,7 +19,8 @@ this program. If not, see . """ -from moinformat.serialisers.common import escape_attr, escape_text, Serialiser +from moinformat.serialisers.common import escape_attr, escape_text, make_id, \ + Serialiser class HTMLSerialiser(Serialiser): @@ -88,8 +89,8 @@ def end_emphasis(self): self.out("") - def start_heading(self, level, extra, pad): - self.out("" % level) + def start_heading(self, level, extra, pad, text): + self.out("" % (level, make_id(text))) def end_heading(self, level, pad, extra): self.out("" % level) diff -r 6f40d4d5320a -r f9321fa1c1de moinformat/serialisers/moin/moin.py --- a/moinformat/serialisers/moin/moin.py Tue Jul 31 17:38:45 2018 +0200 +++ b/moinformat/serialisers/moin/moin.py Tue Jul 31 17:39:28 2018 +0200 @@ -66,7 +66,7 @@ def end_emphasis(self): self.out("''") - def start_heading(self, level, extra, pad): + def start_heading(self, level, extra, pad, text): self.out(extra + "=" * level + pad) def end_heading(self, level, pad, extra): diff -r 6f40d4d5320a -r f9321fa1c1de moinformat/tree/moin.py --- a/moinformat/tree/moin.py Tue Jul 31 17:38:45 2018 +0200 +++ b/moinformat/tree/moin.py Tue Jul 31 17:39:28 2018 +0200 @@ -113,6 +113,23 @@ i = self.nodes.index(old) self.nodes[i] = new + def text_content(self): + + """ + Return a string containing the content of text nodes within this + container. + """ + + l = [] + + for node in self.nodes: + if isinstance(node, Text): + l.append(node.s) + elif isinstance(node, Container): + l.append(node.text_content()) + + return "".join(l) + def __str__(self): return self.prettyprint() @@ -307,7 +324,7 @@ return self._prettyprint(l, indent) def to_string(self, out): - out.start_heading(self.level, self.start_extra, self.start_pad) + out.start_heading(self.level, self.start_extra, self.start_pad, self.text_content()) self._to_string(out) out.end_heading(self.level, self.end_pad, self.end_extra)