# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1533051568 -7200
# Node ID f9321fa1c1de18a55a6f108fcda651741564cf25
# Parent  6f40d4d5320a82e6c769cd99d9489245097b470c
Introduced identifiers for headings to permit direct linking.

diff -r 6f40d4d5320a -r f9321fa1c1de moinformat/serialisers/common.py
--- a/moinformat/serialisers/common.py	Tue Jul 31 17:38:45 2018 +0200
+++ b/moinformat/serialisers/common.py	Tue Jul 31 17:39:28 2018 +0200
@@ -19,6 +19,8 @@
 this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 
+from urllib import quote_plus
+
 class Serialiser:
 
     "General serialisation support."
@@ -98,4 +100,61 @@
 
     return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
 
+def make_id(s):
+
+    "Make a suitable identifier for XML element identification."
+
+    # NOTE: This reproduces the Moin algorithm for compatibility.
+    # NOTE: There may well be improvements possible, possibly by replacing plus
+    # NOTE: with something less cumbersome, even though plus may be unusual in
+    # NOTE: things like headings, anyway.
+
+    # The desired output is the following pattern:
+
+    # [A-Za-z][-_:.A-Za-z0-9]*
+
+    # The Python UTF-7 encoder preserves symbols and it encodes + as +- with an
+    # output range as follows (in addition to A-Za-z0-9):
+
+    # -_:.%+ !"#$&\'()*,/;<=>?@[]^`{|}
+
+    # The quote_plus function converts space to plus, preserves -_:. and encodes
+    # all other symbols (including original occurrences of plus and percent) and
+    # non-alphanumeric (ASCII) characters using percent encoding.
+
+    # With colons preserved, the resulting output is in the following range
+    # (in addition to A-Za-z0-9):
+
+    # -_:.%+
+
+    # Percent will only occur as an encoding prefix. Plus will only occur as a
+    # replacement for space.
+
+    # Combining quote_plus and UTF-7 gives the following range (in addition to
+    # A-Za-z0-9):
+
+    # -_:.%+
+
+    # Examples:
+
+    #          UTF-7         quote_plus    replace percent and plus
+    # :     -> :          -> :          -> :
+    # -     -> -          -> -          -> -
+    # .     -> .          -> .          -> .
+    # %     -> %          -> %25        -> .25
+    # +     -> +-         -> %2B-       -> .2B-
+    # _     -> _          -> _          -> _
+    # space -> space      -> +          -> _
+
+    # See: RFC2152 - UTF-7 A Mail-Safe Transformation Format of Unicode
+
+    quoted = quote_plus(s.encode("utf-7"), ":").replace("%", ".").replace("+", "_")
+
+    # Ensure that the identifier starts with an alphabetical character.
+
+    if not quoted[0].isalpha():
+        return "A%s" % quoted
+    else:
+        return quoted
+
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r 6f40d4d5320a -r f9321fa1c1de moinformat/serialisers/html/moin.py
--- a/moinformat/serialisers/html/moin.py	Tue Jul 31 17:38:45 2018 +0200
+++ b/moinformat/serialisers/html/moin.py	Tue Jul 31 17:39:28 2018 +0200
@@ -19,7 +19,8 @@
 this program.  If not, see <http://www.gnu.org/licenses/>.
 """
 
-from moinformat.serialisers.common import escape_attr, escape_text, Serialiser
+from moinformat.serialisers.common import escape_attr, escape_text, make_id, \
+                                          Serialiser
 
 class HTMLSerialiser(Serialiser):
 
@@ -88,8 +89,8 @@
     def end_emphasis(self):
         self.out("</em>")
 
-    def start_heading(self, level, extra, pad):
-        self.out("<h%d>" % level)
+    def start_heading(self, level, extra, pad, text):
+        self.out("<h%d id='%s'>" % (level, make_id(text)))
 
     def end_heading(self, level, pad, extra):
         self.out("</h%d>" % level)
diff -r 6f40d4d5320a -r f9321fa1c1de moinformat/serialisers/moin/moin.py
--- a/moinformat/serialisers/moin/moin.py	Tue Jul 31 17:38:45 2018 +0200
+++ b/moinformat/serialisers/moin/moin.py	Tue Jul 31 17:39:28 2018 +0200
@@ -66,7 +66,7 @@
     def end_emphasis(self):
         self.out("''")
 
-    def start_heading(self, level, extra, pad):
+    def start_heading(self, level, extra, pad, text):
         self.out(extra + "=" * level + pad)
 
     def end_heading(self, level, pad, extra):
diff -r 6f40d4d5320a -r f9321fa1c1de moinformat/tree/moin.py
--- a/moinformat/tree/moin.py	Tue Jul 31 17:38:45 2018 +0200
+++ b/moinformat/tree/moin.py	Tue Jul 31 17:39:28 2018 +0200
@@ -113,6 +113,23 @@
         i = self.nodes.index(old)
         self.nodes[i] = new
 
+    def text_content(self):
+
+        """
+        Return a string containing the content of text nodes within this
+        container.
+        """
+
+        l = []
+
+        for node in self.nodes:
+            if isinstance(node, Text):
+                l.append(node.s)
+            elif isinstance(node, Container):
+                l.append(node.text_content())
+
+        return "".join(l)
+
     def __str__(self):
         return self.prettyprint()
 
@@ -307,7 +324,7 @@
         return self._prettyprint(l, indent)
 
     def to_string(self, out):
-        out.start_heading(self.level, self.start_extra, self.start_pad)
+        out.start_heading(self.level, self.start_extra, self.start_pad, self.text_content())
         self._to_string(out)
         out.end_heading(self.level, self.end_pad, self.end_extra)