# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1692310722 -7200
# Node ID 3bf425390801ad3e127b522a05b80729691faef2
# Parent  23098f02bda7e266cc6495d4fdc022563b33bc98# Parent  9f6181276b350eb813f5006090e4963350538e8d
Merged changes from the generic-visitors branch.

diff -r 23098f02bda7 -r 3bf425390801 moinconvert
--- a/moinconvert	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinconvert	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Moin wiki format converter.
 
-Copyright (C) 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -36,6 +36,10 @@
 
 To indicate pagenames within an input directory, omit any --pagename flags."""
 
+message_tree_format_usage = """\
+The --tree option cannot be used together with the --format or --output-format
+options since the --tree option indicates use of the "pretty" format."""
+
 
 
 # Options management.
@@ -88,7 +92,7 @@
     attachments_dir = []
     document_indexes = []
     filenames = []
-    formats = []
+    input_formats = []
     input_dir_types = []
     input_dirs = []
     input_encodings = []
@@ -96,6 +100,7 @@
     mappings = []
     output_dirs = []
     output_encodings = []
+    output_formats = []
     output_page_seps = []
     pagenames = []
     root_pagenames = []
@@ -152,6 +157,9 @@
         # Detect tree output.
 
         elif arg == "--tree":
+            if output_formats:
+                print >>sys.stderr, message_tree_format_usage
+                sys.exit(1)
             tree = True
 
         # Options with following arguments.
@@ -170,8 +178,11 @@
 
         # Switch to collecting formats.
 
-        elif arg == "--format":
-            l = formats
+        elif arg in ("--format", "--output-format"):
+            if tree:
+                print >>sys.stderr, message_tree_format_usage
+                sys.exit(1)
+            l = output_formats
             continue
 
         # Switch to collecting input locations.
@@ -192,6 +203,12 @@
             l = input_encodings
             continue
 
+        # Switch to collecting input formats.
+
+        elif arg == "--input-format":
+            l = input_formats
+            continue
+
         # Switch to collecting input page hierarchy separators.
 
         elif arg == "--input-page-sep":
@@ -254,7 +271,8 @@
 
         l = filenames
 
-    format = formats and formats[0] or "html"
+    input_format = input_formats and input_formats[0] or "moin"
+    output_format = tree and "pretty" or output_formats and output_formats[0] or "html"
     input_dir = getvalue(input_dirs)
     output_dir = getvalue(output_dirs)
 
@@ -265,23 +283,25 @@
         "bundle"            : bundle,
         "common_attachments": common,
         "document_index"    : getvalue(document_indexes),
+        "fragment"          : fragment,
         "input_context"     : input_dir and \
                               getvalue(input_dir_types, "directory") or \
                               "standalone",
         "input_encoding"    : getvalue(input_encodings),
         "input_filename"    : input_dir,
+        "input_format"      : input_format,
         "input_separator"   : getvalue(input_page_seps),
-        "link_format"       : format,
+        "link_format"       : output_format,
         "mapping"           : getmapping(mappings),
         "no_inline"         : no_inline,
         "output_context"    : output_dir and "directory" or "standalone",
         "output_encoding"   : getvalue(output_encodings),
-        "output_format"     : format,
+        "output_format"     : output_format,
         "output_filename"   : output_dir,
         "output_separator"  : getvalue(output_page_seps),
         "root_pagename"     : getvalue(root_pagenames, "FrontPage"),
         "theme_name"        : not fragment and \
-                              "%s.%s" % (getvalue(theme_names, "default"), format) or None,
+                              "%s.%s" % (getvalue(theme_names, "default"), output_format) or None,
         })
 
     # Define the input context and theme.
@@ -333,19 +353,17 @@
 
         p.update_metadata(metadata)
 
-        # Show a document tree for debugging purposes, if requested.
-
-        if tree:
-            print d.prettyprint()
-            continue
-
-        # Otherwise, serialise the document.
-
         # Obtain a serialiser using the configuration.
 
         serialiser = make_serialiser(metadata)
         outtext = serialise(d, serialiser)
 
+        # Show a document tree for debugging purposes, if requested.
+
+        if tree:
+            print outtext
+            continue
+
         # With a theme, apply it to the text.
 
         if theme:
@@ -386,11 +404,13 @@
 
 --common            Obtain attachments from a common directory for all pages,
                     rather than each page having its own subdirectory of a
-                    top-level attachments directory.
+                    top-level attachments directory
 --input-dir         Indicate an input directory containing document files
 --input-dir-type    Indicate the type of input directory involved
                     (default: directory)
 --input-encoding    Indicate the character encoding used in document files
+--input-format      Indicate the format of the parsed documents
+                    (default: moin)
 --input-page-sep    Indicate the separator used in filenames to encode
                     hierarchical relationships (subpages and descendant pages)
 --pagename          Indicate the page name corresponding to an indicated
@@ -401,20 +421,24 @@
 
 --bundle            Bundle resources such as stylesheets within every document,
                     useful for publishing documents that need to be copied or
-                    distributed individually.
+                    distributed individually
 --document-index    Provide a "DocumentIndex" filename to be used in links in
                     HTML format output, useful for local file browsing instead
                     of Web-published content
---format            Indicate the format to be used for serialised documents
+--format            Indicate the format to be used for serialised documents;
+                    equivalent to --output-format
                     (default: html)
 --fragment          Indicates that an output fragment, not an entire document,
                     is to be generated, skipping any theming activities
 --no-inline         Suppress inline objects in serialised documents, linking to
-                    separate objects instead.
+                    separate objects instead
 --output-dir        Indicate an output directory to contain serialised document
                     files
 --output-encoding   Indicate the character encoding used in serialised document
                     files
+--output-format     Indicate the format to be used for serialised documents;
+                    equivalent to --format 
+                    (default: html)
 --output-page-sep   Indicate the separator used in filenames to encode
                     hierarchical relationships (subpages and descendant pages)
 --theme             Indicate a theme for serialised documents, typically
diff -r 23098f02bda7 -r 3bf425390801 moinformat/metadata.py
--- a/moinformat/metadata.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/metadata.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Metadata for document conversion.
 
-Copyright (C) 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -43,13 +43,13 @@
         }
 
     effects = {
-        "input_context"     : "input",
-        "input_format"      : "parser",
-        "input_separator"   : "input",
-        "link_format"       : "linker",
-        "output_context"    : "output",
-        "output_format"     : "serialiser",
-        "theme_name"        : "theme",
+        "input_context"     : ["input"],
+        "input_format"      : ["parser", "serialiser"],
+        "input_separator"   : ["input"],
+        "link_format"       : ["linker"],
+        "output_context"    : ["output"],
+        "output_format"     : ["serialiser"],
+        "theme_name"        : ["theme"],
         }
 
     def __init__(self, parameters=None):
@@ -92,12 +92,14 @@
 
         self.parameters[name] = value
 
-        # Invalidate any affected setting.
+        # Invalidate any affected settings.
 
         affected = self.effects.get(name)
 
-        if affected and self.has_key(affected):
-            del self.parameters[affected]
+        if affected:
+            for affected_name in affected:
+                if self.has_key(affected_name):
+                    del self.parameters[affected_name]
 
         # Set any default values.
 
@@ -106,36 +108,43 @@
         if affected and not self.get(affected):
             self.set(affected, value)
 
-    def make_object(self, name, fn, typename, typevalue=None):
+    def get_update(self, name, value=None):
 
         """
-        Make an object to be stored in the setting 'name', using 'fn' to
-        acquire the object class, with the object type being retrieved from the
-        'typename' setting, this being overwritten by 'typevalue' if specified.
-        Return None if no class is obtained.
+        Obtain the 'name' setting, this being overwritten by 'value' if
+        specified. Return the updated setting.
         """
 
-        # Return any existing object if not reset.
+        # Overwrite any existing setting.
 
-        if not typevalue:
-            obj = self.get(name)
-            if obj:
-                return obj
+        if value:
+            self.set(name, value)
+            return value
+        else:
+            return self.get(name)
+
+    def make_object(self, name, cls):
 
-        # Overwrite any existing typename setting.
+        """
+        Make an object to be stored in the setting 'name', using 'cls' as the
+        object class.
+        """
 
-        else:
-            self.set(typename, typevalue)
+        # Return any existing, preserved object. Since updates to various
+        # properties will discard objects, any preserved object should still be
+        # applicable.
 
-        # Obtain the class.
+        obj = self.get(name)
+        if obj:
+            return obj
 
-        cls = fn(self.get(typename))
+        # Without any object class, return None.
 
         if not cls:
             self.set(name, None)
             return None
 
-        # Instantiate the class.
+        # Instantiate the class and record the object.
 
         obj = cls(self)
         self.set(name, obj)
@@ -148,7 +157,9 @@
         "input_context" setting which will be replaced by any given 'name'.
         """
 
-        return self.make_object("input", get_input, "input_context", name)
+        cls = get_input(self.get_update("input_context", name))
+
+        return self.make_object("input", cls)
 
     def get_linker(self, name=None):
 
@@ -157,7 +168,9 @@
         "link_format" setting which will be replaced by any given 'name'.
         """
 
-        return self.make_object("linker", get_linker, "link_format", name)
+        cls = get_linker(self.get_update("link_format", name))
+
+        return self.make_object("linker", cls)
 
     def get_output(self, name=None):
 
@@ -166,7 +179,9 @@
         "output_context" setting which will be replaced by any given 'name'.
         """
 
-        return self.make_object("output", get_output, "output_context", name)
+        cls = get_output(self.get_update("output_context", name))
+
+        return self.make_object("output", cls)
 
     def get_parser(self, name=None):
 
@@ -175,7 +190,9 @@
         "input_format" setting which will be replaced by any given 'name'.
         """
 
-        parser = self.make_object("parser", get_parser, "input_format", name)
+        cls = get_parser(self.get_update("input_format", name))
+
+        parser = self.make_object("parser", cls)
         parser.parsers = parsers
         return parser
 
@@ -186,8 +203,10 @@
         "output_format" setting which will be replaced by any given 'name'.
         """
 
-        serialiser = self.make_object("serialiser", get_serialiser,
-                                      "output_format", name)
+        cls = get_serialiser(self.get_update("output_format", name),
+                             self.get("input_format"))
+
+        serialiser = self.make_object("serialiser", cls)
         serialiser.serialisers = serialisers
         return serialiser
 
@@ -198,6 +217,8 @@
         setting which will be replaced by any given 'name'.
         """
 
-        return self.make_object("theme", get_theme, "theme_name", name)
+        cls = get_theme(self.get_update("theme_name", name))
+
+        return self.make_object("theme", cls)
 
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/output/common.py
--- a/moinformat/output/common.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/output/common.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Output context common functionality.
 
-Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2018, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,9 +40,10 @@
 
     def reset(self):
 
-        "Set up an output collector."
+        "Set up an output collector and output state."
 
         self.output = []
+        self.indent = ""
 
     def encode(self, text):
 
diff -r 23098f02bda7 -r 3bf425390801 moinformat/parsers/__init__.py
--- a/moinformat/parsers/__init__.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/parsers/__init__.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Moin wiki parsers.
 
-Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2017, 2018, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,13 +24,13 @@
 
 # Top-level functions.
 
-def get_parser(name="moin"):
+def get_parser(name=None):
 
     "Return the parser class supporting the format with the given 'name'."
 
-    return parsers[name]
+    return parsers[name or "moin"]
 
-def make_parser(metadata, name="moin"):
+def make_parser(metadata, name=None):
 
     "Return a parser instance using the given 'metadata' and optional 'name'."
 
diff -r 23098f02bda7 -r 3bf425390801 moinformat/parsers/common.py
--- a/moinformat/parsers/common.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/parsers/common.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Moin wiki parsing functionality.
 
-Copyright (C) 2017, 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2017, 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -279,6 +279,9 @@
         self.parsers = parsers
         self.root = root
 
+    def update_metadata(self, metadata):
+        pass
+
     def get_parser(self, format_type):
 
         """
diff -r 23098f02bda7 -r 3bf425390801 moinformat/parsers/html.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/parsers/html.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+"""
+HTML document fragment parser.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.parsers.common import ParserBase
+from moinformat.tree.html import Element, Fragment
+from moinformat.utils.htmlparse import Parser
+
+class HTMLParser(ParserBase):
+
+    "A prettyprinted document tree parser."
+
+    formats = ["html"]
+
+    def __init__(self, metadata):
+        self.metadata = metadata
+
+    def parse(self, s):
+
+        "Parse the tree structure representation in 's'."
+
+        doc = Parser(s).parse()
+
+        # If only a fragment is involved, find the body node and return its
+        # children in a fragment.
+
+        if self.metadata.get("fragment") or self.metadata.get("theme_name"):
+            body = self._find_body(doc)
+
+            if body:
+                return Fragment(body.nodes)
+            else:
+                return None
+
+        # Otherwise, return the top-level node.
+
+        else:
+            return doc
+
+    def _find_body(self, node):
+
+        """
+        Find the body element from 'node', returning the element if found or
+        None otherwise.
+        """
+
+        # Search all nodes with children.
+
+        if isinstance(node, Fragment):
+
+            # Return the node if it is a body element.
+
+            if isinstance(node, Element) and node.name == "body":
+                return node
+
+            for n in node.nodes:
+                body = self._find_body(n)
+                if body:
+                    return body
+
+        return None
+
+parser = HTMLParser
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/parsers/moin.py
--- a/moinformat/parsers/moin.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/parsers/moin.py	Fri Aug 18 00:18:42 2023 +0200
@@ -56,15 +56,14 @@
 
     formats = ["moin", "wiki"]
 
-    def __init__(self, metadata, parsers=None, root=None):
+    # Principal parser methods.
+
+    def parse(self, s):
 
         """
-        Initialise the parser with the given 'metadata' and optional 'parsers'.
-        An optional 'root' indicates the document-level parser.
+        Parse page text 's'. Pages consist of regions delimited by markers.
         """
 
-        ParserBase.__init__(self, metadata, parsers, root)
-
         # Record certain node occurrences for later evaluation.
 
         self.macros = []
@@ -77,13 +76,7 @@
 
         self.link_targets = []
 
-    # Principal parser methods.
-
-    def parse(self, s):
-
-        """
-        Parse page text 's'. Pages consist of regions delimited by markers.
-        """
+        # Obtain the token stream and a region to populate.
 
         self.items = self.get_items(s)
         self.region = Region([], type="moin")
diff -r 23098f02bda7 -r 3bf425390801 moinformat/parsers/pretty.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/parsers/pretty.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,78 @@
+#!/usr/bin/env python
+
+"""
+Prettyprinted document tree parser.
+
+Copyright (C) 2017, 2018, 2019, 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.tree.pretty import Node
+
+class PrettyParser:
+
+    "A prettyprinted document tree parser."
+
+    formats = ["pretty"]
+
+    def __init__(self, metadata):
+        self.metadata = metadata
+
+    def parse(self, s):
+
+        "Parse the tree structure representation in 's'."
+
+        indent = 0
+        branches = []
+
+        for line in s.split("\n"):
+            line = line.rstrip()
+            if not line:
+                continue
+
+            new_indent = line.rfind(" ") + 1
+            node = Node(line[new_indent:])
+
+            # Establish a branch to add nodes to.
+
+            if not branches:
+                branches.append(node)
+            else:
+                # Note the current node as outermost branch.
+
+                if new_indent > indent:
+                    branches.append(node)
+                else:
+                    # Reduced indent involves obtaining an inner branch again.
+
+                    while indent > new_indent:
+                        del branches[-1]
+                        indent -= 2
+
+                    # Note the current node as outermost branch.
+
+                    branches[-1] = node
+
+                # Append the current node to the parent branch.
+
+                branches[-2].append(node)
+
+            indent = new_indent
+
+        return branches[0]
+
+parser = PrettyParser
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/__init__.py
--- a/moinformat/serialisers/__init__.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/serialisers/__init__.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Moin wiki serialisers.
 
-Copyright (C) 2017, 2018 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2017, 2018, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,11 +23,15 @@
 
 # Top-level functions.
 
-def get_serialiser(name):
+def get_serialiser(name, doctype=None):
 
-    "Return the main serialiser class for the format having the given 'name'."
+    """
+    Return the main serialiser class for the format having the given 'name'.
+    If 'doctype' is indicated, obtain a serialiser class specific to that
+    document type. Otherwise, a general Moin serialiser class is obtained.
+    """
 
-    return serialisers["%s.moin" % name]
+    return serialisers["%s.%s" % (name, doctype or "moin")]
 
 def make_serialiser(metadata, format=None):
 
@@ -43,7 +47,7 @@
     "Serialise 'doc' using the given 'serialiser' instance."
 
     serialiser.reset()
-    doc.to_string(serialiser)
+    doc.visit(serialiser)
     return serialiser.get_output()
 
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/common.py
--- a/moinformat/serialisers/common.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/serialisers/common.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Moin serialiser support.
 
-Copyright (C) 2017, 2018, 2019, 2021 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2017, 2018, 2019, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -92,6 +92,36 @@
         else:
             return cls(self.metadata, self.serialisers)
 
+    # Serialisation visitor methods.
+
+    def visit(self, node):
+
+        "Visit the 'node' to invoke the appropriate serialisation handler."
+
+        node.visit(self)
+
+    def visit_region(self, region):
+
+        """
+        Obtain a serialiser for the region from the same format family. Retain
+        the same serialiser if no appropriate serialiser could be obtained.
+        """
+
+        serialiser_name = self.formats and "%s.%s" % (self.formats[0], region.type) or None
+        serialiser = self.get_serialiser(serialiser_name)
+
+        # Serialise the region.
+
+        serialiser.container(region)
+
+    def container(self, container):
+
+        "Visit all nodes in 'container'."
+
+        if container.nodes:
+            for node in container.nodes:
+                self.visit(node)
+
 def escape_attr(s):
 
     "Escape XML document attribute."
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/html/graphviz.py
--- a/moinformat/serialisers/html/graphviz.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/serialisers/html/graphviz.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Graphviz serialiser, generating content for embedding in HTML documents.
 
-Copyright (C) 2018, 2019, 2022 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2018, 2019, 2022, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -55,19 +55,16 @@
     def init(self):
         self.directives = {}
 
-    def start_block(self):
-        pass
+    def block(self, block):
+        self.container(block)
 
-    def end_block(self):
-        pass
-
-    def directive(self, key, value, directive):
-        if not self.directives.has_key(key):
-            self.directives[key] = []
-        self.directives[key].append(value)
+    def directive(self, directive):
+        if not self.directives.has_key(directive.key):
+            self.directives[directive.key] = []
+        self.directives[directive.key].append(directive.value)
 
     def text(self, text):
-        self.process_graph(text)
+        self.process_graph(text.s)
 
 
 
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/html/html.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/serialisers/html/html.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+
+"""
+HTML serialiser.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.serialisers.common import Serialiser
+
+
+
+# The serialiser class.
+
+class HTMLSerialiser(Serialiser):
+
+    "Serialisation of HTML fragments."
+
+    input_formats = ["html"]
+    formats = ["html"]
+
+    def attribute(self, attribute):
+        self.out(attribute.name)
+        if attribute.value is not None:
+            self.out("=")
+            self.visit(attribute.value)
+
+    def attribute_value(self, attribute_value):
+        self.out("%s%s%s" % (attribute_value.quote, attribute_value.value, attribute_value.quote))
+
+    def element(self, element):
+        self.out("<%s" % element.name)
+        for attribute in element.attributes:
+            self.out(" ")
+            self.visit(attribute)
+        self.out(">")
+        self.container(element)
+        self.out("</%s>" % element.name)
+
+    def comment(self, comment):
+        self.out("<%s>" % comment.value)
+
+    def directive(self, directive):
+        self.out("<%s>" % directive.value)
+
+    def inclusion(self, inclusion):
+        self.out("<%s>" % inclusion.value)
+
+    def node(self, node):
+        self.out(node.value)
+
+    text = node
+
+    def fragment(self, fragment):
+        self.container(fragment)
+
+serialiser = HTMLSerialiser
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/html/moin.py
--- a/moinformat/serialisers/html/moin.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/serialisers/html/moin.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,8 @@
 """
 HTML serialiser.
 
-Copyright (C) 2017, 2018, 2019, 2021, 2022 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2017, 2018, 2019, 2021, 2022,
+              2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,90 +31,7 @@
     input_formats = ["moin", "wiki"]
     formats = ["html"]
 
-    def _region_tag(self, type):
-
-        # NOTE: Need to support types in general.
-
-        type = type and type.split()[0]
-
-        if type == "inline":
-            return "tt"
-        elif type in (None, "python"):
-            return "pre"
-        else:
-            return "span"
-
-    def start_region(self, level, indent, type, args, extra):
-
-        # Generate attributes, joining them when preparing the tag.
-
-        l = []
-        out = l.append
-
-        if level:
-            out("level-%d" % level)
-
-        if indent:
-            out("indent-%d" % indent)
-
-        # NOTE: Encode type details for CSS.
-
-        out("type-%s" % escape_attr(type or "opaque"))
-
-        tag = self._region_tag(type)
-
-        # Inline regions must preserve "indent" as space in the text.
-
-        if type == "inline" and indent:
-            self.out(" " * indent)
-
-        self.out("<%s class='%s'>" % (tag, " ".join(l)))
-
-    def end_region(self, level, indent, type, args, extra):
-        tag = self._region_tag(type)
-        self.out("</%s>" % tag)
-
-    def start_block(self):
-        self.out("<p>")
-
-    def end_block(self):
-        self.out("</p>")
-
-    def start_defitem(self, pad, extra):
-        self.out("<dd>")
-
-    def end_defitem(self, pad, extra):
-        self.out("</dd>")
-
-    def start_defterm(self, pad, extra):
-        self.out("<dt>")
-
-    def end_defterm(self, pad, extra):
-        self.out("</dt>")
-
-    def start_emphasis(self):
-        self.out("<em>")
-
-    def end_emphasis(self):
-        self.out("</em>")
-
-    def start_heading(self, level, extra, pad, identifier):
-        self.out("<h%d id='%s'>" % (level, escape_attr(self.linker.make_id(identifier))))
-
-    def end_heading(self, level, pad, extra):
-        self.out("</h%d>" % level)
-
-    def start_larger(self):
-        self.out("<big>")
-
-    def end_larger(self):
-        self.out("</big>")
-
-    def start_linktext(self):
-        pass
-
-    def end_linktext(self):
-        pass
+    # Support methods.
 
     list_tags = {
         "i" : "lower-roman",
@@ -132,141 +50,246 @@
 
         return "ul", None
 
-    def start_list(self, indent, marker, num):
-        tag, style_type = self._get_list_tag(marker)
-        style = style_type and ' style="list-style-type: %s"' % escape_attr(style_type) or ""
-        start = style_type and num is not None and ' start="%s"' % escape_attr(num) or ""
-        self.out("<%s%s%s>" % (tag, style, start))
+    def _link(self, target, nodes, tag, attr):
+        link = self.linker and self.linker.translate(target) or None
+
+        self.out('<%s %s="%s"' % (tag, attr, escape_attr(link.get_target())))
+
+        # Provide link parameters as attributes.
+
+        if nodes:
+            for node in nodes:
+                if isinstance(node, LinkParameter):
+                    self.out(" ")
+                    node.visit(self)
+
+        # Close the tag if an image.
+
+        if tag == "img":
+            self.out(" />")
+
+        # Provide the link label if specified. Otherwise, use a generated
+        # default for the label.
+
+        else:
+            self.out(">")
+
+            for node in nodes or []:
+                if isinstance(node, LinkLabel):
+                    node.visit(self)
+                break
+            else:
+                self.out(escape_text(link.get_label()))
+
+            self.out("</%s>" % tag)
+
+    def _region_tag(self, type):
+
+        # NOTE: Need to support types in general.
+
+        type = type and type.split()[0]
 
-    def end_list(self, indent, marker, num):
-        tag, style = self._get_list_tag(marker)
+        if type == "inline":
+            return "tt"
+        elif type in (None, "python"):
+            return "pre"
+        else:
+            return "span"
+
+    # Node handler methods.
+
+    def region(self, region):
+        tag = self._region_tag(region.type)
+
+        # Generate attributes, joining them when preparing the tag.
+
+        attrs = []
+        attr = attrs.append
+
+        if region.level:
+            attr("region-level-%d" % region.level)
+
+        if region.indent:
+            attr("region-indent-%d" % region.indent)
+
+        # NOTE: Encode type details for CSS.
+
+        attr("region-type-%s" % escape_attr(region.type or "opaque"))
+
+        # Inline regions must preserve "indent" as space in the text.
+
+        if region.type == "inline" and region.indent:
+            self.out(" " * region.indent)
+
+        self.out("<%s class='%s'>" % (tag, " ".join(attrs)))
+
+        # Serialise the region content.
+
+        self.visit_region(region)
+
+        # End the region with the previous serialiser.
+
         self.out("</%s>" % tag)
 
-    def start_listitem(self, indent, marker, space, num):
+    # Block node methods.
+
+    def block(self, block):
+        self.out("<p>")
+        self.container(block)
+        self.out("</p>")
+
+    def defitem(self, defitem):
+        self.out("<dd>")
+        self.container(defitem)
+        self.out("</dd>")
+
+    def defterm(self, defterm):
+        self.out("<dt>")
+        self.container(defterm)
+        self.out("</dt>")
+
+    def fontstyle(self, fontstyle):
+        if fontstyle.emphasis:
+            self.out("<em>")
+        elif fontstyle.strong:
+            self.out("<strong>")
+        self.container(fontstyle)
+        if fontstyle.emphasis:
+            self.out("</em>")
+        elif fontstyle.strong:
+            self.out("</strong>")
+
+    def heading(self, heading):
+        self.out("<h%d id='%s'>" % (
+            heading.level,
+            escape_attr(self.linker.make_id(heading.identifier))))
+        self.container(heading)
+        self.out("</h%d>" % heading.level)
+
+    def larger(self, larger):
+        self.out("<big>")
+        self.container(larger)
+        self.out("</big>")
+
+    def list(self, list):
+        tag, style_type = self._get_list_tag(list.marker)
+        style = style_type and \
+            ' style="list-style-type: %s"' % escape_attr(style_type) or ""
+        start = style_type and \
+            list.num is not None and ' start="%s"' % escape_attr(list.num) or ""
+        self.out("<%s%s%s>" % (tag, style, start))
+        self.container(list)
+        self.out("</%s>" % tag)
+
+    def listitem(self, listitem):
         self.out("<li>")
-
-    def end_listitem(self, indent, marker, space, num):
+        self.container(listitem)
         self.out("</li>")
 
-    def start_macro(self, name, args, nodes, inline):
+    def macro(self, macro):
 
         # Special case of a deliberately unexpanded macro.
 
-        if nodes is None:
+        if macro.nodes is None:
             return
 
-        tag = inline and "span" or "div"
-        self.out("<%s class='macro %s'>" % (tag, escape_text(name)))
+        tag = macro.inline and "span" or "div"
+        self.out("<%s class='macro %s'>" % (tag, escape_text(macro.name)))
 
         # Fallback case for when macros are not replaced.
 
-        if not nodes:
+        if not macro.nodes:
             self.out(escape_text("<<"))
-            self.out("<span class='name'>%s</span>" % escape_text(name))
-            if args:
+            self.out("<span class='name'>%s</span>" % escape_text(macro.name))
+            if macro.args:
                 self.out("(")
             first = True
-            for arg in args:
+            for arg in macro.args:
                 if not first:
                     self.out(",")
                 self.out("<span class='arg'>%s</span>" % escape_text(arg))
                 first = False
-            if args:
+            if macro.args:
                 self.out(")")
             self.out(escape_text(">>"))
 
-    def end_macro(self, inline):
-        tag = inline and "span" or "div"
+        # Produce the expanded macro content.
+
+        else:
+            self.container(macro)
+
+        tag = macro.inline and "span" or "div"
         self.out("</%s>" % tag)
 
-    def start_monospace(self):
+    def monospace(self, monospace):
         self.out("<tt>")
-
-    def end_monospace(self):
+        self.container(monospace)
         self.out("</tt>")
 
-    def start_smaller(self):
+    def smaller(self, smaller):
         self.out("<small>")
-
-    def end_smaller(self):
+        self.container(smaller)
         self.out("</small>")
 
-    def start_strikethrough(self):
+    def strikethrough(self, strikethrough):
         self.out("<del>")
-
-    def end_strikethrough(self):
+        self.container(strikethrough)
         self.out("</del>")
 
-    def start_strong(self):
-        self.out("<strong>")
-
-    def end_strong(self):
-        self.out("</strong>")
-
-    def start_subscript(self):
+    def subscript(self, subscript):
         self.out("<sub>")
-
-    def end_subscript(self):
+        self.container(subscript)
         self.out("</sub>")
 
-    def start_superscript(self):
+    def superscript(self, superscript):
         self.out("<sup>")
-
-    def end_superscript(self):
+        self.container(superscript)
         self.out("</sup>")
 
-    def start_table(self):
+    def table(self, table):
         self.out("<table>")
-
-    def end_table(self):
+        self.container(table)
         self.out("</table>")
 
-    def start_table_attrs(self):
-        pass
-
-    def end_table_attrs(self):
-        pass
-
-    def start_table_cell(self, attrs, leading, padding):
+    def table_cell(self, table_cell):
         self.out("<td")
 
         # Handle the attributes separately from their container.
 
-        if attrs and not attrs.empty():
-            for attr in attrs.nodes:
-                attr.to_string(self)
+        if table_cell.attrs and not table_cell.attrs.empty():
+            for attr in table_cell.attrs.nodes:
+                attr.visit(self)
 
         self.out(">")
-
-    def end_table_cell(self):
+        self.container(table_cell)
         self.out("</td>")
 
-    def start_table_row(self, leading, padding):
+    def table_row(self, table_row):
         self.out("<tr>")
-
-    def end_table_row(self, trailing):
+        self.container(table_row)
         self.out("</tr>")
 
-    def start_underline(self):
+    def underline(self, underline):
         self.out("<span style='text-decoration: underline'>")
-
-    def end_underline(self):
+        self.container(underline)
         self.out("</span>")
 
-    def anchor(self, target):
-        self.out("<a name='%s' />" % escape_attr(self.linker.make_id(target)))
+    # Inline node methods.
 
-    def break_(self):
+    def anchor(self, anchor):
+        self.out("<a name='%s' />" % escape_attr(self.linker.make_id(anchor.target)))
+
+    def break_(self, break_):
         pass
 
-    def comment(self, comment, extra):
+    def comment(self, comment):
         pass
 
-    def directive(self, directive, extra):
+    def directive(self, directive):
 
         # Obtain a blank value if the value is missing.
 
-        name, text = (directive.split(None, 1) + [""])[:2]
+        name, text = (directive.directive.split(None, 1) + [""])[:2]
 
         # Produce a readable redirect.
 
@@ -281,80 +304,51 @@
 
             self.end_block()
 
-    def linebreak(self):
+    def linebreak(self, linebreak):
         self.out("<br />")
 
-    def _link(self, target, nodes, tag, attr):
-        link = self.linker and self.linker.translate(target) or None
-
-        self.out('<%s %s="%s"' % (tag, attr, escape_attr(link.get_target())))
-
-        # Provide link parameters as attributes.
-
-        if nodes:
-            for node in nodes:
-                if isinstance(node, LinkParameter):
-                    self.out(" ")
-                    node.to_string(self)
-
-        # Close the tag if an image.
-
-        if tag == "img":
-            self.out(" />")
-
-        # Provide the link label if specified. Otherwise, use a generated
-        # default for the label.
+    def link(self, link):
+        self._link(link.target, link.nodes, "a", "href")
 
-        else:
-            self.out(">")
-
-            for node in nodes or []:
-                if isinstance(node, LinkLabel):
-                    node.to_string(self)
-                break
-            else:
-                self.out(escape_text(link.get_label()))
+    def link_label(self, link_label):
+        self.container(link_label)
 
-            self.out("</%s>" % tag)
-
-    def link(self, target, nodes):
-        self._link(target, nodes, "a", "href")
+    def link_parameter(self, link_parameter):
+        s = link_parameter.text_content()
+        key_value = s.split("=", 1)
 
-    def link_label(self, nodes):
-        for node in nodes:
-            node.to_string(self)
-
-    def link_parameter(self, key_value):
         if len(key_value) == 1:
             self.out(key_value[0])
         else:
             key, value = key_value
             self.out("%s='%s'" % (key, escape_attr(value)))
 
-    def nbsp(self):
+    def nbsp(self, nbsp):
         self.out("&nbsp;")
 
-    def rule(self, height):
-        self.out("<hr style='height: %dpt' />" % min(height, 10))
+    def rule(self, rule):
+        self.out("<hr style='height: %dpt' />" % min(rule.height, 10))
 
-    def table_attrs(self, nodes):
+    def table_attrs(self, table_attrs):
 
         # Skip the attributes in their original form.
 
         pass
 
-    def table_attr(self, name, value, concise, quote):
-        self.out(" %s%s" % (escape_text(name), value is not None and
-            "='%s'" % escape_attr(value) or ""))
+    def table_attr(self, table_attr):
+        self.out(" %s%s" % (
+            escape_text(table_attr.name),
+            table_attr.value is not None and
+            "='%s'" % escape_attr(table_attr.value) or ""))
 
-    def text(self, s):
-        self.out(escape_text(s))
+    def text(self, text):
+        self.out(escape_text(text.s))
 
-    def transclusion(self, target, nodes):
-        self._link(target, nodes, "img", "src")
+    def transclusion(self, transclusion):
+        self._link(transclusion.target, transclusion.nodes, "img", "src")
 
-    def verbatim(self, s):
-        self.text(s)
+    def verbatim(self, verbatim):
+        self.out(escape_text(verbatim.text))
 
 serialiser = HTMLSerialiser
 
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/html/table.py
--- a/moinformat/serialisers/html/table.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/serialisers/html/table.py	Fri Aug 18 00:18:42 2023 +0200
@@ -28,7 +28,7 @@
 
     input_formats = ["table"]
 
-    def continuation(self, text):
+    def continuation(self, continuation):
         self.out(" ")
 
 serialiser = HTMLTableSerialiser
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/moin/graphviz.py
--- a/moinformat/serialisers/moin/graphviz.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/serialisers/moin/graphviz.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Moin Graphviz region serialiser.
 
-Copyright (C) 2018, 2021 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2018, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,20 +28,19 @@
     input_formats = ["graphviz", "dot"]
     formats = ["moin", "wiki"]
 
-    def start_block(self):
-        pass
-
-    def end_block(self):
-        pass
+    def block(self, block):
+        self.container(block)
 
-    def directive(self, key, value, directive):
-        if directive:
-            self.out("#%s\n" % directive)
+    def directive(self, directive):
+        if directive.directive:
+            self.out("#%s\n" % directive.directive)
         else:
-            self.out("//%s%s\n" % (value and "%s=" % key or key, value or ""))
+            self.out("//%s%s\n" % (
+                directive.value and "%s=" % directive.key or directive.key,
+                directive.value or ""))
 
     def text(self, text):
-        self.out(text)
+        self.out(text.s)
 
 serialiser = MoinGraphvizSerialiser
 
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/moin/moin.py
--- a/moinformat/serialisers/moin/moin.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/serialisers/moin/moin.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Moin wiki text serialiser.
 
-Copyright (C) 2017, 2018, 2021, 2022 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2017, 2018, 2021, 2022, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,224 +28,216 @@
     input_formats = ["moin", "wiki"]
     formats = ["moin", "wiki"]
 
-    def start_region(self, level, indent, type, args, extra):
+    # Node handler methods.
+
+    def region(self, region):
         out = self.out
-        if level:
-            out(" " * indent + "{" * level)
 
-        # Produce a header for regions within a top-level region.
+        if region.level:
+            out(" " * region.indent + "{" * region.level)
 
-        if type and type != "inline" and level:
+            # Produce a header for regions within a top-level region.
 
-            # Obtain individual arguments, excluding the region type.
+            if region.type and region.type != "inline":
+
+                # Obtain individual arguments, excluding the region type.
 
-            args = args.split(" ")[1:]
-            args_str = args and (" %s" % " ".join(args)) or ""
+                args = region.args and region.args.split(" ")[1:] or None
+                args_str = args and (" %s" % " ".join(args)) or ""
 
-            out("#!%s%s\n" % (type, args_str))
+                out("#!%s%s\n" % (region.type, args_str))
+
+        # Serialise the region content.
 
-    def end_region(self, level, indent, type, args, extra):
-        out = self.out
-        if level:
-            out("%s%s" % ("}" * level, extra or ""))
+        self.visit_region(region)
 
-    def start_block(self):
-        pass
+        if region.level:
+            out("%s%s" % ("}" * region.level, region.extra or ""))
 
-    def end_block(self):
-        pass
+    # Block node methods.
 
-    def start_defitem(self, pad, extra):
-        self.out((extra and extra + "::" or "") + pad)
-
-    def end_defitem(self, pad, extra):
-        pass
+    def block(self, block):
+        self.container(block)
 
-    def start_defterm(self, pad, extra):
-        self.out(pad)
+    def defitem(self, defitem):
+        self.out((defitem.extra and defitem.extra + "::" or "") + defitem.pad)
+        self.container(defitem)
 
-    def end_defterm(self, pad, extra):
-        self.out("::" + extra)
+    def defterm(self, defterm):
+        self.out(defterm.pad)
+        self.container(defterm)
+        self.out("::" + defterm.extra)
 
-    def start_emphasis(self):
-        self.out("''")
-
-    def end_emphasis(self):
-        self.out("''")
+    def fontstyle(self, fontstyle):
+        if fontstyle.emphasis:
+            self.out("''")
+        elif fontstyle.strong:
+            self.out("'''")
+        self.container(fontstyle)
+        if fontstyle.emphasis:
+            self.out("''")
+        elif fontstyle.strong:
+            self.out("'''")
 
-    def start_heading(self, level, extra, pad, identifier):
-        self.out(extra + "=" * level + pad)
+    def heading(self, heading):
+        self.out(heading.start_extra + "=" * heading.level + heading.start_pad)
+        self.container(heading)
+        self.out(heading.end_pad + "=" * heading.level + heading.end_extra)
 
-    def end_heading(self, level, pad, extra):
-        self.out(pad + "=" * level + extra)
-
-    def start_larger(self):
+    def larger(self, larger):
         self.out("~+")
-
-    def end_larger(self):
+        self.container(larger)
         self.out("+~")
 
-    def start_list(self, indent, marker, num):
-        pass
-
-    def end_list(self, indent, marker, num):
-        pass
+    def list(self, list):
+        self.container(list)
 
-    def start_listitem(self, indent, marker, space, num):
-        self.out("%s%s%s%s" % (indent * " ", marker, num and "#%s" % num or "", space))
+    def listitem(self, listitem):
+        self.out("%s%s%s%s" % (
+            listitem.indent * " ",
+            listitem.marker,
+            listitem.num and "#%s" % listitem.num or "",
+            listitem.space))
+        self.container(listitem)
 
-    def end_listitem(self, indent, marker, space, num):
-        pass
-
-    def start_macro(self, name, args, nodes, inline):
+    def macro(self, macro):
 
         # Special case of a deliberately unexpanded macro.
 
-        if nodes is None:
+        if macro.nodes is None:
             return
 
         # Fallback case for when macros are not replaced.
 
-        if not nodes:
-            self.out("<<%s%s>>" % (name, args and "(%s)" % ",".join(args) or ""))
+        if not macro.nodes:
+            self.out("<<%s%s>>" % (macro.name, macro.args and "(%s)" % ",".join(macro.args) or ""))
 
-    def end_macro(self, inline):
-        pass
-
-    def start_monospace(self):
+    def monospace(self, monospace):
+        self.out("`")
+        self.container(monospace)
         self.out("`")
 
-    def end_monospace(self):
-        self.out("`")
-
-    def start_smaller(self):
+    def smaller(self, smaller):
         self.out("~-")
-
-    def end_smaller(self):
+        self.container(smaller)
         self.out("-~")
 
-    def start_strong(self):
-        self.out("'''")
-
-    def end_strong(self):
-        self.out("'''")
-
-    def start_strikethrough(self):
+    def strikethrough(self, strikethrough):
         self.out("--(")
-
-    def end_strikethrough(self):
+        self.container(strikethrough)
         self.out(")--")
 
-    def start_subscript(self):
+    def subscript(self, subscript):
+        self.out(",,")
+        self.container(subscript)
         self.out(",,")
 
-    def end_subscript(self):
-        self.out(",,")
-
-    def start_superscript(self):
+    def superscript(self, superscript):
         self.out("^")
-
-    def end_superscript(self):
+        self.container(superscript)
         self.out("^")
 
-    def start_table(self):
-        pass
-
-    def end_table(self):
-        pass
+    def table(self, table):
+        self.container(table)
 
-    def start_table_attrs(self):
-        self.out("<")
-
-    def end_table_attrs(self):
-        self.out(">")
+    def table_cell(self, table_cell):
+        self.out("||")
+        self.container(table_cell)
 
-    def start_table_cell(self, attrs, leading, padding):
+    def table_row(self, table_row):
+        self.container(table_row)
         self.out("||")
-
-    def end_table_cell(self):
-        pass
+        self.out(table_row.trailing)
 
-    def start_table_row(self, leading, padding):
-        pass
-
-    def end_table_row(self, trailing):
-        self.out("||")
-        self.out(trailing)
-
-    def start_underline(self):
+    def underline(self, underline):
+        self.out("__")
+        self.container(underline)
         self.out("__")
 
-    def end_underline(self):
-        self.out("__")
+    # Inline node methods.
 
-    def anchor(self, target):
-        self.out("((%s))" % target)
+    def anchor(self, anchor):
+        self.out("((%s))" % anchor.target)
 
-    def break_(self):
+    def break_(self, break_):
         self.out("\n")
 
-    def comment(self, comment, extra):
-        self.out("##%s%s" % (comment, extra))
+    def comment(self, comment):
+        self.out("##%s%s" % (comment.comment, comment.extra))
 
-    def directive(self, directive, extra):
-        self.out("#%s%s" % (directive, extra))
+    def directive(self, directive):
+        self.out("#%s%s" % (directive.directive, directive.extra))
 
-    def linebreak(self):
+    def linebreak(self, linebreak):
         self.out(r"\\")
 
-    def link(self, target, nodes):
-        self.out("[[%s" % target)
-        for node in nodes:
+    def link(self, link):
+        self.out("[[%s" % link.target)
+        for node in link.nodes:
             self.out("|")
-            node.to_string(self)
+            node.visit(self)
         self.out("]]")
 
-    def link_label(self, nodes):
-        for node in nodes:
-            node.to_string(self)
+    def link_label(self, link_label):
+        self.container(link_label)
 
-    def link_parameter(self, key_value):
+    def link_parameter(self, link_parameter):
+        s = link_parameter.text_content()
+        key_value = s.split("=", 1)
+
         if len(key_value) == 1:
             self.out(key_value[0])
         else:
             self.out("=".join(key_value))
 
-    def nbsp(self):
+    def nbsp(self, nbsp):
         self.out(r"\_")
 
-    def rule(self, height):
-        self.out("-" * (height + 4))
+    def rule(self, rule):
+        self.out("-" * (rule.height + 4))
 
-    def table_attrs(self, nodes):
-        for node in nodes:
-            node.to_string(self)
+    def table_attrs(self, table_attrs):
+        self.out("<")
+        self.container(table_attrs)
+        if not table_attrs.incomplete:
+            self.out(">")
 
-    def table_attr(self, name, value, concise, quote):
-        if concise:
-            if name == "bgcolor": self.out(value)
-            elif name == "colspan": self.out("-%s" % value)
-            elif name == "align" : self.out(value == "left" and "(" or value == "right" and ")" or ":")
-            elif name == "rowspan": self.out("|%s" % value)
-            elif name == "valign" : self.out(value == "top" and "^" or "v")
-            elif name == "width" : self.out(value)
+    def table_attr(self, table_attr):
+        if table_attr.concise:
+            if table_attr.name == "bgcolor":
+                self.out(table_attr.value)
+            elif table_attr.name == "colspan":
+                self.out("-%s" % table_attr.value)
+            elif table_attr.name == "align":
+                self.out(table_attr.value == "left" and "(" or table_attr.value == "right" and ")" or ":")
+            elif table_attr.name == "rowspan":
+                self.out("|%s" % table_attr.value)
+            elif table_attr.name == "valign":
+                self.out(table_attr.value == "top" and "^" or "v")
+            elif table_attr.name == "width":
+                self.out(table_attr.value)
         else:
-            self.out("%s%s" % (escape_text(name), value is not None and
-                               "=%s%s%s" % (quote or '"', escape_attr(value), quote or '"') or ""))
-
-    def text(self, s):
-        self.out(s)
+            self.out("%s%s" % (
+                escape_text(table_attr.name),
+                table_attr.value is not None and "=%s%s%s" % (
+                    table_attr.quote or '"',
+                    escape_attr(table_attr.value),
+                    table_attr.quote or '"')
+                    or ""))
 
-    def transclusion(self, target, nodes):
-        self.out("{{%s" % target)
-        for node in nodes:
+    def text(self, text):
+        self.out(text.s)
+
+    def transclusion(self, transclusion):
+        self.out("{{%s" % transclusion.target)
+        for node in transclusion.nodes:
             self.out("|")
-            node.to_string(self)
+            node.visit(self)
         self.out("}}")
 
-    def verbatim(self, text):
+    def verbatim(self, verbatim):
         self.out("<<<")
-        self.out(text)
+        self.out(verbatim.text)
         self.out(">>>")
 
 serialiser = MoinSerialiser
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/moin/table.py
--- a/moinformat/serialisers/moin/table.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/serialisers/moin/table.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Moin wiki table serialiser.
 
-Copyright (C) 2017, 2018, 2021 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2017, 2018, 2021, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,31 +31,35 @@
         self.first_cell = False
         self.first_row = False
 
-    def start_table(self):
+    def table(self, table):
         self.first_row = True
+        self.container(table)
 
-    def start_table_cell(self, attrs, leading, padding):
+    def table_cell(self, table_cell):
         if not self.first_cell:
-            self.out(leading)
+            self.out(table_cell.leading)
             self.out("||")
         else:
             self.first_cell = False
-        self.out(padding)
 
-    def start_table_row(self, leading, padding):
+        self.out(table_cell.padding)
+        self.container(table_cell)
+
+    def table_row(self, table_row):
         self.first_cell = True
+
         if not self.first_row:
-            self.out(leading)
+            self.out(table_row.leading)
             self.out("==")
-            self.out(padding)
+            self.out(table_row.padding)
         else:
             self.first_row = False
 
-    def end_table_row(self, trailing):
-        self.out(trailing)
+        self.container(table_row)
+        self.out(table_row.trailing)
 
-    def continuation(self, text):
-        self.out(text)
+    def continuation(self, continuation):
+        self.out(continuation.text)
 
 serialiser = MoinTableSerialiser
 
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/pretty/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/serialisers/pretty/__init__.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+
+"""
+A package of modules containing prettyprinting serialisers.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/pretty/common.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/serialisers/pretty/common.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+"""
+Generic prettyprinted text serialiser.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.serialisers.common import Serialiser as CommonSerialiser
+
+class Serialiser(CommonSerialiser):
+
+    "Serialisation of nodes for inspection."
+
+    def container(self, container):
+
+        "Visit all nodes in 'container'."
+
+        if container.nodes:
+            self.output.indent += "  "
+            for node in container.nodes:
+                self.visit(node)
+            self.output.indent = self.output.indent[:-2]
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/pretty/graphviz.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/serialisers/pretty/graphviz.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,47 @@
+#!/usr/bin/env python
+
+"""
+Prettyprinted text serialiser for Graphviz nodes.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.serialisers.pretty.common import Serialiser
+
+class GraphvizSerialiser(Serialiser):
+
+    "Serialisation of Graphviz nodes for inspection."
+
+    input_formats = ["dot", "graphviz"]
+    formats = ["pretty"]
+
+    # Node handler methods.
+
+    def block(self, block):
+        self.out("%sBlock\n" % self.output.indent)
+        self.container(block)
+
+    def directive(self, directive):
+        self.out("%sDirective: key=%r value=%r directive=%r\n" % (
+            self.output.indent, directive.key, directive.value,
+            directive.directive))
+
+    def text(self, text):
+        self.out("%sText: %r\n" % (self.output.indent, text.s))
+
+serialiser = GraphvizSerialiser
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/pretty/html.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/serialisers/pretty/html.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+"""
+Prettyprinted HTML document node prettyprinter.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.serialisers.pretty.common import Serialiser
+
+class HTMLSerialiser(Serialiser):
+
+    "Serialisation of prettyprinted document nodes for inspection."
+
+    input_formats = ["html"]
+    formats = ["pretty"]
+
+    def attribute(self, attribute):
+        self.out("%sAttribute: %s" % (self.output.indent, attribute.name))
+        if attribute.value is not None:
+            self.out("=")
+        self.visit(attribute.value)
+        self.out("\n")
+
+    def attribute_value(self, attribute_value):
+        self.out("%s%s%s" % (attribute_value.quote, attribute_value.value, attribute_value.quote))
+
+    def element(self, element):
+        self.out("%sElement: name=%r\n" % (self.output.indent, element.name))
+        self.output.indent += "  "
+        for attribute in element.attributes:
+            self.visit(attribute)
+        self.output.indent = self.output.indent[:-2]
+        self.container(element)
+
+    def node(self, node):
+        self.out("%s%s: %r\n" % (self.output.indent, node.__class__.__name__, node.value))
+
+    comment = node
+    directive = node
+    inclusion = node
+    text = node
+
+    def fragment(self, fragment):
+        self.out("%s%s\n" % (self.output.indent, fragment.__class__.__name__))
+        self.container(fragment)
+
+serialiser = HTMLSerialiser
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/pretty/moin.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/serialisers/pretty/moin.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+
+"""
+Prettyprinted text serialiser for Moin nodes.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.serialisers.pretty.common import Serialiser
+
+class MoinSerialiser(Serialiser):
+
+    "Serialisation of Moin nodes for inspection."
+
+    input_formats = ["moin", "wiki"]
+    formats = ["pretty"]
+
+    # Node handler methods.
+
+    def region(self, region):
+        self.out("%sRegion: level=%d indent=%d type=%s args=%r extra=%r\n" % (
+            self.output.indent, region.level, region.indent, region.type, region.args,
+            region.extra))
+        self.visit_region(region)
+
+    # Block node methods.
+
+    def block(self, block):
+        self.out("%sBlock\n" % self.output.indent)
+        self.container(block)
+
+    def defitem(self, defitem):
+        self.out("%sDefItem: pad=%r extra=%r\n" % (self.output.indent,
+            defitem.pad, defitem.extra))
+        self.container(defitem)
+
+    def defterm(self, defterm):
+        self.out("%sDefTerm: pad=%r extra=%r\n" % (self.output.indent,
+            defterm.pad, defterm.extra))
+        self.container(defterm)
+
+    def fontstyle(self, fontstyle):
+        self.out("%sFontStyle: emphasis=%r strong=%r\n" % (self.output.indent,
+            fontstyle.emphasis, fontstyle.strong))
+        self.container(fontstyle)
+
+    def heading(self, heading):
+        self.out("%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r"
+            " end_extra=%r identifier=%r\n" % (
+            self.output.indent, heading.level, heading.start_extra,
+            heading.start_pad, heading.end_pad, heading.end_extra,
+            heading.identifier))
+        self.container(heading)
+
+    def link_label(self, link_label):
+        self.out("%sLinkLabel\n" % self.output.indent)
+        self.container(link_label)
+
+    def link_parameter(self, link_parameter):
+        self.out("%sLinkParameter\n" % self.output.indent)
+        self.container(link_parameter)
+
+    def list(self, list):
+        self.out("%sList: indent=%r marker=%r num=%r\n" % (
+            self.output.indent, list.indent, list.marker, list.num))
+        self.container(list)
+
+    def listitem(self, listitem):
+        self.out("%sListItem: indent=%d marker=%r space=%r num=%r\n" % (
+            self.output.indent, listitem.indent, listitem.marker, listitem.space, listitem.num))
+        self.container(listitem)
+
+    def table(self, table):
+        self.out("%sTable:\n" % self.output.indent)
+        self.container(table)
+
+    def table_attrs(self, table_attrs):
+        self.out("%sTableAttrs:\n" % self.output.indent)
+        self.container(table_attrs)
+
+    def table_cell(self, table_cell):
+        self.out("%sTableCell: leading=%r padding=%r\n" % (
+            self.output.indent, table_cell.leading, table_cell.padding))
+        self.container(table_cell)
+
+    def table_row(self, table_row):
+        self.out("%sTableRow: trailing=%r leading=%r padding=%r\n" % (
+            self.output.indent, table_row.trailing, table_row.leading,
+            table_row.padding))
+        self.container(table_row)
+
+    def inline(self, inline):
+        self.out("%s%s\n" % (self.output.indent, inline.__class__.__name__))
+
+    # Inline nodes with children.
+
+    def inline_container(self, inline):
+        self.inline(inline)
+        self.container(inline)
+
+    larger = inline_container
+
+    def link(self, link):
+        self.out("%sLink: target=%r\n" % (self.output.indent, link.target))
+        self.container(link)
+
+    def macro(self, macro):
+        self.out("%sMacro: name=%r args=%r\n" % (self.output.indent, macro.name, macro.args))
+        self.container(macro)
+
+    monospace = inline_container
+    smaller = inline_container
+    strikethrough = inline_container
+    subscript = inline_container
+    superscript = inline_container
+
+    def transclusion(self, transclusion):
+        self.out("%sTransclusion: target=%r\n" % (self.output.indent, transclusion.target))
+        self.container(transclusion)
+
+    underline = inline_container
+
+    # Inline nodes without children.
+
+    def anchor(self, anchor):
+        self.out("%sAnchor: target=%r\n" % (self.output.indent, anchor.target))
+
+    break_ = inline
+
+    def comment(self, comment):
+        self.out("%sComment: comment=%r extra=%r\n" % (self.output.indent, comment.comment, comment.extra))
+
+    def directive(self, directive):
+        self.out("%sDirective: directive=%r extra=%r\n" % (self.output.indent, directive.directive, directive.extra))
+
+    linebreak = inline
+    nbsp = inline
+
+    def rule(self, rule):
+        self.out("%sRule: height=%d\n" % (self.output.indent, rule.height))
+
+    def table_attr(self, table_attr):
+        self.out("%sTableAttr: name=%r value=%r concise=%r quote=%r\n" % (
+            self.output.indent, table_attr.name, table_attr.value,
+            table_attr.concise, table_attr.quote))
+
+    def text(self, text):
+        self.out("%sText: %r\n" % (self.output.indent, text.s))
+
+    def verbatim(self, verbatim):
+        self.out("%sVerbatim: text=%r\n" % (self.output.indent, verbatim.text))
+
+serialiser = MoinSerialiser
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/pretty/pretty.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/serialisers/pretty/pretty.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+"""
+Prettyprinted document node prettyprinter.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.serialisers.pretty.common import Serialiser
+
+class PrettySerialiser(Serialiser):
+
+    "Serialisation of prettyprinted document nodes for inspection."
+
+    input_formats = ["pretty"]
+    formats = ["pretty"]
+
+    def node(self, node):
+        self.out("%s%s%s\n" % (self.output.indent, node.name,
+            len(node.nodes) and " nodes=%d" % len(node.nodes) or ""))
+        self.container(node)
+
+serialiser = PrettySerialiser
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/serialisers/pretty/table.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/serialisers/pretty/table.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,35 @@
+#!/usr/bin/env python
+
+"""
+Moin wiki table prettyprinter.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.serialisers.pretty.moin import MoinSerialiser
+
+class MoinTableSerialiser(MoinSerialiser):
+
+    "Serialisation of table nodes for inspection."
+
+    input_formats = ["table"]
+
+    def continuation(self, continuation):
+        self.out("%sContinuation: %r\n" % (self.output.indent, continuation.text))
+
+serialiser = MoinTableSerialiser
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/tree/graphviz.py
--- a/moinformat/tree/graphviz.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/tree/graphviz.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Graphviz document tree nodes.
 
-Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2018, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -39,10 +39,7 @@
     def __repr__(self):
         return "Directive(%r, %r, %r)" % (self.key, self.value, self.directive)
 
-    def prettyprint(self, indent=""):
-        return "%sDirective: key=%r value=%r directive=%r" % (indent, self.key, self.value, self.directive)
-
-    def to_string(self, out):
-        out.directive(self.key, self.value, self.directive)
+    def visit(self, visitor):
+        return visitor.directive(self)
 
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/tree/html.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/tree/html.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+"""
+HTML document nodes.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.utils.htmlparse.tree import Attribute, AttributeValue, \
+                                            Comment, Directive, Element, \
+                                            Fragment, Node, Inclusion, Text
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/tree/moin.py
--- a/moinformat/tree/moin.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/tree/moin.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,8 @@
 """
 Moin wiki format document tree nodes.
 
-Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2017, 2018, 2019, 2020, 2021, 2022,
+              2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -174,18 +175,6 @@
 
         return not self.text_content().strip()
 
-    def __str__(self):
-        return self.prettyprint()
-
-    def _prettyprint(self, l, indent=""):
-        for node in self.nodes:
-            l.append(node.prettyprint(indent + "  "))
-        return "\n".join(l)
-
-    def _to_string(self, out):
-        for node in self.nodes:
-            node.to_string(out)
-
 class Region(Container):
 
     "A region of the page."
@@ -216,28 +205,8 @@
         return "Region(%r, %r, %r, %r, %r, %r, %r)" % (self.nodes, self.level,
             self.indent, self.type, self.args, self.transparent, self.extra)
 
-    def prettyprint(self, indent=""):
-        l = ["%sRegion: level=%d indent=%d type=%s args=%r extra=%r" % (indent,
-             self.level, self.indent, self.type, self.args, self.extra)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_region(self.level, self.indent, self.type, self.args, self.extra)
-
-        # Obtain a serialiser for the region from the same format family.
-        # Retain the same serialiser if no appropriate serialiser could be
-        # obtained.
-
-        serialiser_name = "%s.%s" % (out.formats[0], self.type)
-        serialiser = out.get_serialiser(serialiser_name)
-
-        # Serialise the region.
-
-        self._to_string(serialiser)
-
-        # End the region with the previous serialiser.
-
-        out.end_region(self.level, self.indent, self.type, self.args, self.extra)
+    def visit(self, visitor):
+        return visitor.region(self)
 
 
 
@@ -250,14 +219,8 @@
     def __repr__(self):
         return "Block(%r)" % self.nodes
 
-    def prettyprint(self, indent=""):
-        l = ["%sBlock" % indent]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_block()
-        self._to_string(out)
-        out.end_block()
+    def visit(self, visitor):
+        return visitor.block(self)
 
 class DefItem(Container):
 
@@ -271,14 +234,8 @@
     def __repr__(self):
         return "DefItem(%r, %r, %r)" % (self.nodes, self.pad, self.extra)
 
-    def prettyprint(self, indent=""):
-        l = ["%sDefItem: pad=%r extra=%r" % (indent, self.pad, self.extra)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_defitem(self.pad, self.extra)
-        self._to_string(out)
-        out.end_defitem(self.pad, self.extra)
+    def visit(self, visitor):
+        return visitor.defitem(self)
 
 class DefTerm(Container):
 
@@ -292,14 +249,8 @@
     def __repr__(self):
         return "DefTerm(%r, %r, %r)" % (self.nodes, self.pad, self.extra)
 
-    def prettyprint(self, indent=""):
-        l = ["%sDefTerm: pad=%r extra=%r" % (indent, self.pad, self.extra)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_defterm(self.pad, self.extra)
-        self._to_string(out)
-        out.end_defterm(self.pad, self.extra)
+    def visit(self, visitor):
+        return visitor.defterm(self)
 
 class FontStyle(Container):
 
@@ -327,20 +278,8 @@
     def __repr__(self):
         return "FontStyle(%r, %r, %r)" % (self.nodes, self.emphasis, self.strong)
 
-    def prettyprint(self, indent=""):
-        l = ["%sFontStyle: emphasis=%r strong=%r" % (indent, self.emphasis, self.strong)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        if self.emphasis:
-            out.start_emphasis()
-        elif self.strong:
-            out.start_strong()
-        self._to_string(out)
-        if self.emphasis:
-            out.end_emphasis()
-        elif self.strong:
-            out.end_strong()
+    def visit(self, visitor):
+        return visitor.fontstyle(self)
 
 class Heading(Container):
 
@@ -361,17 +300,8 @@
             self.nodes, self.level, self.start_extra, self.start_pad,
             self.end_pad, self.end_extra, self.identifier)
 
-    def prettyprint(self, indent=""):
-        l = ["%sHeading: level=%d start_extra=%r start_pad=%r end_pad=%r"
-             " end_extra=%r identifier=%r" % (
-             indent, self.level, self.start_extra, self.start_pad, self.end_pad,
-             self.end_extra, self.identifier)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_heading(self.level, self.start_extra, self.start_pad, self.identifier)
-        self._to_string(out)
-        out.end_heading(self.level, self.end_pad, self.end_extra)
+    def visit(self, visitor):
+        return visitor.heading(self)
 
 class LinkLabel(Container):
 
@@ -380,12 +310,8 @@
     def __repr__(self):
         return "LinkLabel(%r)" % self.nodes
 
-    def prettyprint(self, indent=""):
-        l = ["%sLinkLabel" % indent]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.link_label(self.nodes)
+    def visit(self, visitor):
+        return visitor.link_label(self)
 
 class LinkParameter(Container):
 
@@ -394,14 +320,8 @@
     def __repr__(self):
         return "LinkParameter(%r)" % self.nodes
 
-    def prettyprint(self, indent=""):
-        l = ["%sLinkParameter" % indent]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        s = self.text_content()
-        t = s.split("=", 1)
-        out.link_parameter(t)
+    def visit(self, visitor):
+        return visitor.link_parameter(self)
 
 class List(Container):
 
@@ -420,18 +340,10 @@
     def __repr__(self):
         return "List(%r)" % self.nodes
 
-    def prettyprint(self, indent=""):
+    def visit(self, visitor):
         if not self.first:
             self.init()
-        l = ["%sList: indent=%r marker=%r num=%r" % (indent, self.indent, self.marker, self.num)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        if not self.first:
-            self.init()
-        out.start_list(self.indent, self.marker, self.num)
-        self._to_string(out)
-        out.end_list(self.indent, self.marker, self.num)
+        return visitor.list(self)
 
 class ListItem(Container):
 
@@ -451,14 +363,18 @@
     def __repr__(self):
         return "ListItem(%r, %r, %r, %r, %r)" % (self.nodes, self.indent, self.marker, self.space, self.num)
 
-    def prettyprint(self, indent=""):
-        l = ["%sListItem: indent=%d marker=%r space=%r num=%r" % (indent, self.indent, self.marker, self.space, self.num)]
-        return self._prettyprint(l, indent)
+    def visit(self, visitor):
+        return visitor.listitem(self)
+
+class Table(Container):
 
-    def to_string(self, out):
-        out.start_listitem(self.indent, self.marker, self.space, self.num)
-        self._to_string(out)
-        out.end_listitem(self.indent, self.marker, self.space, self.num)
+    "A table."
+
+    def __repr__(self):
+        return "Table(%r)" % self.nodes
+
+    def visit(self, visitor):
+        return visitor.table(self)
 
 class TableAttrs(Container):
 
@@ -476,31 +392,8 @@
     def __repr__(self):
         return "TableAttrs(%r)" % self.nodes
 
-    def prettyprint(self, indent=""):
-        l = ["%sTableAttrs:" % indent]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_table_attrs()
-        out.table_attrs(self.nodes)
-        if not self.incomplete:
-            out.end_table_attrs()
-
-class Table(Container):
-
-    "A table."
-
-    def __repr__(self):
-        return "Table(%r)" % self.nodes
-
-    def prettyprint(self, indent=""):
-        l = ["%sTable:" % indent]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_table()
-        self._to_string(out)
-        out.end_table()
+    def visit(self, visitor):
+        return visitor.table_attrs(self)
 
 class TableCell(Container):
 
@@ -516,15 +409,8 @@
         return "TableCell(%r, %r, %r, %r)" % (self.nodes, self.attrs,
                                               self.leading, self.padding)
 
-    def prettyprint(self, indent=""):
-        l = ["%sTableCell: leading=%r padding=%r" % (indent, self.leading,
-                                                     self.padding)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_table_cell(self.attrs, self.leading, self.padding)
-        self._to_string(out)
-        out.end_table_cell()
+    def visit(self, visitor):
+        return visitor.table_cell(self)
 
 class TableRow(Container):
 
@@ -540,15 +426,8 @@
         return "TableRow(%r, %r, %r, %r)" % (self.nodes, self.trailing,
                                              self.leading, self.padding)
 
-    def prettyprint(self, indent=""):
-        l = ["%sTableRow: trailing=%r leading=%r padding=%r" % (
-            indent, self.trailing, self.leading, self.padding)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_table_row(self.leading, self.padding)
-        self._to_string(out)
-        out.end_table_row(self.trailing)
+    def visit(self, visitor):
+        return visitor.table_row(self)
 
 
 
@@ -561,18 +440,12 @@
     def __repr__(self):
         return "%s(%r)" % (self.__class__.__name__, self.nodes)
 
-    def prettyprint(self, indent=""):
-        l = ["%s%s" % (indent, self.__class__.__name__)]
-        return self._prettyprint(l, indent)
-
 class Larger(Inline):
 
     "Larger text."
 
-    def to_string(self, out):
-        out.start_larger()
-        self._to_string(out)
-        out.end_larger()
+    def visit(self, visitor):
+        return visitor.larger(self)
 
 class Link(Container):
 
@@ -585,12 +458,8 @@
     def __repr__(self):
         return "Link(%r, %r)" % (self.nodes, self.target)
 
-    def prettyprint(self, indent=""):
-        l = ["%sLink: target=%r" % (indent, self.target)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.link(self.target, self.nodes)
+    def visit(self, visitor):
+        return visitor.link(self)
 
 class Macro(Container):
 
@@ -609,60 +478,43 @@
                                                   self.parent, self.region,
                                                   self.nodes, self.inline)
 
-    def prettyprint(self, indent=""):
-        l = ["%sMacro: name=%r args=%r" % (indent, self.name, self.args)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.start_macro(self.name, self.args, self.nodes, self.inline)
-        if self.nodes:
-            self._to_string(out)
-        out.end_macro(self.inline)
+    def visit(self, visitor):
+        return visitor.macro(self)
 
 class Monospace(Inline):
 
     "Monospaced text."
 
-    def to_string(self, out):
-        out.start_monospace()
-        self._to_string(out)
-        out.end_monospace()
+    def visit(self, visitor):
+        return visitor.monospace(self)
 
 class Smaller(Inline):
 
     "Smaller text."
 
-    def to_string(self, out):
-        out.start_smaller()
-        self._to_string(out)
-        out.end_smaller()
+    def visit(self, visitor):
+        return visitor.smaller(self)
 
 class Strikethrough(Inline):
 
-    "Crossed-out text."
+    "Crossed-visitor text."
 
-    def to_string(self, out):
-        out.start_strikethrough()
-        self._to_string(out)
-        out.end_strikethrough()
+    def visit(self, visitor):
+        return visitor.strikethrough(self)
 
 class Subscript(Inline):
 
     "Subscripted text."
 
-    def to_string(self, out):
-        out.start_subscript()
-        self._to_string(out)
-        out.end_subscript()
+    def visit(self, visitor):
+        return visitor.subscript(self)
 
 class Superscript(Inline):
 
     "Superscripted text."
 
-    def to_string(self, out):
-        out.start_superscript()
-        self._to_string(out)
-        out.end_superscript()
+    def visit(self, visitor):
+        return visitor.superscript(self)
 
 class Transclusion(Container):
 
@@ -675,21 +527,15 @@
     def __repr__(self):
         return "Transclusion(%r, %r)" % (self.nodes, self.target)
 
-    def prettyprint(self, indent=""):
-        l = ["%sTransclusion: target=%r" % (indent, self.target)]
-        return self._prettyprint(l, indent)
-
-    def to_string(self, out):
-        out.transclusion(self.target, self.nodes)
+    def visit(self, visitor):
+        return visitor.transclusion(self)
 
 class Underline(Inline):
 
     "Underlined text."
 
-    def to_string(self, out):
-        out.start_underline()
-        self._to_string(out)
-        out.end_underline()
+    def visit(self, visitor):
+        return visitor.underline(self)
 
 
 
@@ -712,11 +558,8 @@
     def __repr__(self):
         return "Anchor(%r)" % self.target
 
-    def prettyprint(self, indent=""):
-        return "%sAnchor: target=%r" % (indent, self.target)
-
-    def to_string(self, out):
-        out.anchor(self.target)
+    def visit(self, visitor):
+        return visitor.anchor(self)
 
 class Break(Node):
 
@@ -725,11 +568,8 @@
     def __repr__(self):
         return "Break()"
 
-    def prettyprint(self, indent=""):
-        return "%sBreak" % indent
-
-    def to_string(self, out):
-        out.break_()
+    def visit(self, visitor):
+        return visitor.break_(self)
 
 class Comment(Node):
 
@@ -742,11 +582,8 @@
     def __repr__(self):
         return "Comment(%r, %r)" % (self.comment, self.extra)
 
-    def prettyprint(self, indent=""):
-        return "%sComment: comment=%r extra=%r" % (indent, self.comment, self.extra)
-
-    def to_string(self, out):
-        out.comment(self.comment, self.extra)
+    def visit(self, visitor):
+        return visitor.comment(self)
 
 class Directive(Node):
 
@@ -759,11 +596,8 @@
     def __repr__(self):
         return "Directive(%r, %r)" % (self.directive, self.extra)
 
-    def prettyprint(self, indent=""):
-        return "%sDirective: directive=%r extra=%r" % (indent, self.directive, self.extra)
-
-    def to_string(self, out):
-        out.directive(self.directive, self.extra)
+    def visit(self, visitor):
+        return visitor.directive(self)
 
 class LineBreak(Node):
 
@@ -772,11 +606,8 @@
     def __repr__(self):
         return "LineBreak()"
 
-    def prettyprint(self, indent=""):
-        return "%sLineBreak" % indent
-
-    def to_string(self, out):
-        out.linebreak()
+    def visit(self, visitor):
+        return visitor.linebreak(self)
 
 class NonBreakingSpace(Node):
 
@@ -785,11 +616,8 @@
     def __repr__(self):
         return "NonBreakingSpace()"
 
-    def prettyprint(self, indent=""):
-        return "%sNonBreakingSpace" % indent
-
-    def to_string(self, out):
-        out.nbsp()
+    def visit(self, visitor):
+        return visitor.nbsp(self)
 
 class Rule(Node):
 
@@ -801,11 +629,8 @@
     def __repr__(self):
         return "Rule(%d)" % self.height
 
-    def prettyprint(self, indent=""):
-        return "%sRule: height=%d" % (indent, self.height)
-
-    def to_string(self, out):
-        out.rule(self.height)
+    def visit(self, visitor):
+        return visitor.rule(self)
 
 class TableAttr(Node):
 
@@ -820,11 +645,8 @@
     def __repr__(self):
         return "TableAttr(%r, %r, %r, %r)" % (self.name, self.value, self.concise, self.quote)
 
-    def prettyprint(self, indent=""):
-        return "%sTableAttr: name=%r value=%r concise=%r quote=%r" % (indent, self.name, self.value, self.concise, self.quote)
-
-    def to_string(self, out):
-        out.table_attr(self.name, self.value, self.concise, self.quote)
+    def visit(self, visitor):
+        return visitor.table_attr(self)
 
 class Text(Node):
 
@@ -845,11 +667,8 @@
     def __repr__(self):
         return "Text(%r)" % self.s
 
-    def prettyprint(self, indent=""):
-        return "%sText: %r" % (indent, self.s)
-
-    def to_string(self, out):
-        out.text(self.s)
+    def visit(self, visitor):
+        return visitor.text(self)
 
 class Verbatim(Node):
 
@@ -861,10 +680,7 @@
     def __repr__(self):
         return "Verbatim(%r)" % self.text
 
-    def prettyprint(self, indent=""):
-        return "%sVerbatim: text=%r" % (indent, self.text)
-
-    def to_string(self, out):
-        out.verbatim(self.text)
+    def visit(self, visitor):
+        return visitor.verbatim(self)
 
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/tree/pretty.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/tree/pretty.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+"""
+Prettyprinted document tree nodes.
+
+Copyright (C) 2017, 2018, 2019, 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.tree.moin import Container
+
+class Node:
+
+    "A simplified tree node representation."
+
+    def __init__(self, name):
+        self.name = name
+        self.nodes = []
+
+    def __repr__(self):
+        return "Node(%r, %r)" % (self.name, self.nodes)
+
+    def visit(self, visitor):
+        return visitor.node(self)
+
+    def append(self, node):
+        self.nodes.append(node)
+
+    def test(self, other):
+
+        """
+        Test whether this node is considered equivalent to 'other', where
+        'other' is a moinparser.tree node.
+
+        Return any failing tree nodes or None.
+        """
+
+        if other.__class__.__name__ != self.name:
+            return self, other, "name"
+
+        if isinstance(other, Container):
+            for node, other_node in map(None, self.nodes, other.nodes):
+                if node is None or other_node is None:
+                    return self, other, node is None and "simple" or "document"
+                t = node.test(other_node)
+                if t:
+                    return t
+        elif self.nodes:
+            return self, other, "empty"
+
+        return None
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/tree/table.py
--- a/moinformat/tree/table.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/moinformat/tree/table.py	Fri Aug 18 00:18:42 2023 +0200
@@ -3,7 +3,7 @@
 """
 Extended table syntax document tree nodes.
 
-Copyright (C) 2018 Paul Boddie <paul@boddie.org.uk>
+Copyright (C) 2018, 2023 Paul Boddie <paul@boddie.org.uk>
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -31,10 +31,7 @@
     def __repr__(self):
         return "Continuation(%r)" % self.text
 
-    def prettyprint(self, indent=""):
-        return "%sContinuation: %r" % (indent, self.text)
-
-    def to_string(self, out):
-        out.continuation(self.text)
+    def visit(self, visitor):
+        return visitor.continuation(self)
 
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/utils/htmlparse/__init__.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/utils/htmlparse/__init__.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+"""
+HTML parsing modules.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.utils.htmlparse.parse import Parser
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/utils/htmlparse/lex.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/utils/htmlparse/lex.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,234 @@
+#!/usr/bin/env python
+
+"""
+Lexical partitioning of HTML document content.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+# Lexical analysis state transition handler functions.
+
+def tag_or_similar(text, pos):
+
+    # Consult the text positions following the position indicated.
+
+    if text[pos:pos+2] == "<!":
+        if text[pos+3:pos+4] == "[":
+            return IN_INCLUSION
+        elif text[pos+3:pos+5] == "--":
+            return IN_COMMENT
+        else:
+            return IN_DIRECTIVE
+    else:
+        return IN_TAG
+
+def at_attribute_value(text, pos):
+    return AT_ATTRIBUTE_VALUE
+
+def in_dq_attribute_value(text, pos):
+    return IN_DQ_ATTRIBUTE_VALUE
+
+def in_sq_attribute_value(text, pos):
+    return IN_SQ_ATTRIBUTE_VALUE
+
+def after_attribute_value(text, pos):
+    return AFTER_ATTRIBUTE_VALUE
+
+def end_of_standalone_tag(text, pos):
+    return AT_END_OF_TAG
+
+def end_of_tag(text, pos):
+    return BETWEEN_TAGS
+
+
+
+# Lexical analysis states/spans.
+
+class Span:
+    def __init__(self, text):
+        self.text = text
+
+    def empty(self):
+        return not self.text
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self.text)
+
+class AT_END_OF_TAG(Span):
+    transitions = [(None, "", end_of_tag)]
+
+    def empty(self):
+        return False
+
+    def visit(self, visitor):
+        return visitor.at_end_of_tag(self)
+
+class BETWEEN_TAGS(Span):
+    transitions = [("<", "", tag_or_similar)]
+
+    def visit(self, visitor):
+        return visitor.between_tags(self)
+
+class IN_TAG(Span):
+    transitions = [
+        ("=", "", at_attribute_value),
+        ("/>", "", end_of_standalone_tag),
+        (">", "", end_of_tag),
+        ]
+
+    def visit(self, visitor):
+        return visitor.in_tag(self)
+
+class IN_COMMENT(Span):
+    transitions = [("-->", "--", end_of_tag)]
+
+    def visit(self, visitor):
+        return visitor.in_comment(self)
+
+class IN_DIRECTIVE(Span):
+    transitions = [(">", "", end_of_tag)]
+
+    def visit(self, visitor):
+        return visitor.in_directive(self)
+
+class IN_INCLUSION(Span):
+    transitions = [("]]>", "]]", end_of_tag)]
+
+    def visit(self, visitor):
+        return visitor.in_inclusion(self)
+
+class AFTER_ATTRIBUTE_VALUE(Span):
+    transitions = [
+        ("=", "", at_attribute_value),
+        ("/>", "", end_of_standalone_tag),
+        (">", "", end_of_tag),
+        ]
+
+    def empty(self):
+        return not self.text.strip()
+
+    def visit(self, visitor):
+        return visitor.after_attribute_value(self)
+
+class AT_ATTRIBUTE_VALUE(Span):
+    transitions = [
+        ("=", "", at_attribute_value),
+        ('"', "", in_dq_attribute_value),
+        ("'", "", in_sq_attribute_value),
+        ("/>", "", end_of_standalone_tag),
+        (">", "", end_of_tag),
+        ]
+
+    def empty(self):
+        return not self.text.strip()
+
+    def visit(self, visitor):
+        return visitor.at_attribute_value(self)
+
+class IN_DQ_ATTRIBUTE_VALUE(Span):
+    transitions = [('"', "", after_attribute_value)]
+
+    def visit(self, visitor):
+        return visitor.in_dq_attribute_value(self)
+
+class IN_SQ_ATTRIBUTE_VALUE(Span):
+    transitions = [("'", "", after_attribute_value)]
+
+    def visit(self, visitor):
+        return visitor.in_sq_attribute_value(self)
+
+
+
+# Utility functions.
+
+def find_one(text, pos, choices):
+
+    """
+    Find in 'text' from 'pos' the earliest occurring instance of one of the
+    given 'choices', these being a list of (token string, extra string, state)
+    tuples.
+
+    The token string is a token marking the start of the next span, the extra
+    string is the portion of the token to be added to the end of the current
+    span upon matching, and the state applies to the next span.
+
+    The associated state, the position of the occurrence, and the position of
+    the text following the occurrence are returned as a tuple.
+    """
+
+    next_state = None
+    first_pos = None
+    first_extra = None
+    next_pos = None
+
+    for token, extra, state in choices:
+        if token is None:
+            return state, pos, extra, pos
+
+        found_pos = text.find(token, pos)
+
+        if found_pos != -1 and (next_state is None or found_pos < first_pos):
+            next_state = state
+            first_pos = found_pos
+            first_extra = extra
+            next_pos = found_pos + len(token)
+
+    return next_state, first_pos, first_extra, next_pos
+
+
+
+# Lexical partitioning.
+
+class Lexer:
+    def __init__(self, text):
+        self.text = text
+        self.state = BETWEEN_TAGS
+        self.pos = 0
+
+    def _end_of_input(self):
+        start = self.pos
+        self.pos = None
+        return self._span(self.text[start:])
+
+    def _span(self, text):
+        return self.state(text)
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.pos is None:
+            raise StopIteration
+
+        # Obtain details of a state transition: a handler function to determine
+        # the next state, and the start and end positions of the token causing
+        # the transition.
+
+        handler, pos, extra, next_pos = find_one(self.text, self.pos, self.state.transitions)
+
+        if handler is None:
+            return self._end_of_input()
+
+        # Obtain the lexical span and update the state and position.
+
+        span = self._span(self.text[self.pos:pos] + extra)
+
+        self.state = handler(self.text, pos)
+        self.pos = next_pos
+
+        return span
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/utils/htmlparse/parse.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/utils/htmlparse/parse.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+
+"""
+An absurdly minimal HTML parser.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from moinformat.utils.htmlparse.token import Tokeniser
+from moinformat.utils.htmlparse.tree import Attribute, AttributeValue, \
+                                            Comment, Directive, Element, \
+                                            Fragment, Inclusion, Node, Text
+
+
+
+# Token processing employing the tokens from tokenisation.
+
+class Visitor:
+    def __init__(self):
+        self.node = Fragment()
+        self.stack = [self.node]
+
+    def append(self, node):
+        self.node.nodes.append(node)
+
+    def push(self, node):
+        self.stack.append(node)
+        self.append(node)
+        self.node = node
+
+    def pop(self):
+        self.stack.pop()
+        self.node = self.stack[-1]
+
+    def visit(self, token):
+        token.visit(self)
+
+    # Specific handler methods.
+
+    def attribute(self, token):
+        if isinstance(self.node, Element):
+            self.node.attributes.append(Attribute(token.value))
+        else:
+            raise ValueError, token
+
+    def attribute_value(self, token):
+        if isinstance(self.node, Element):
+            self.node.attributes[-1].value = AttributeValue(token.value, token.quote)
+        else:
+            raise ValueError, token
+
+    def comment(self, token):
+        self.append(Comment(token.value))
+
+    def directive(self, token):
+        self.append(Directive(token.value))
+
+    def inclusion(self, token):
+        self.append(Inclusion(token.value))
+
+    def tag(self, token):
+        if not token.is_end():
+            self.push(Element(token.tag_name()))
+        elif self.node.name == token.tag_name():
+            self.pop()
+        else:
+            raise ValueError, token
+
+    def tag_close(self, token):
+        self.pop()
+
+    def text(self, token):
+        self.append(Text(token.value))
+
+
+
+# Parsing and document construction.
+
+class Parser:
+    def __init__(self, text):
+        self.tokeniser = Tokeniser(text)
+        self.visitor = Visitor()
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        token = self.tokeniser.next()
+        self.visitor.visit(token)
+
+    def parse(self):
+        for _none in self:
+            pass
+
+        return self.visitor.node
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/utils/htmlparse/token.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/utils/htmlparse/token.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,160 @@
+#!/usr/bin/env python
+
+"""
+An absurdly minimal HTML tokeniser.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+from collections import deque
+from moinformat.utils.htmlparse.lex import Lexer
+
+
+
+# Document token classes.
+
+class Token:
+    def __init__(self, value):
+        self.value = value
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self.value)
+
+class Attribute(Token):
+    def visit(self, visitor):
+        return visitor.attribute(self)
+
+class AttributeValue(Token):
+    def __init__(self, value, quote):
+        self.value = value
+        self.quote = quote
+
+    def __repr__(self):
+        return "%s(%r, %r)" % (self.__class__.__name__, self.value, self.quote)
+
+    def visit(self, visitor):
+        return visitor.attribute_value(self)
+
+class Comment(Token):
+    def visit(self, visitor):
+        return visitor.comment(self)
+
+class Directive(Token):
+    def visit(self, visitor):
+        return visitor.directive(self)
+
+class Inclusion(Token):
+    def visit(self, visitor):
+        return visitor.inclusion(self)
+
+class Tag(Token):
+    def visit(self, visitor):
+        return visitor.tag(self)
+
+    def is_end(self):
+        return self.value.startswith("/")
+
+    def tag_name(self):
+        return self.is_end() and self.value[1:] or self.value
+
+class TagClose:
+    def visit(self, visitor):
+        return visitor.tag_close(self)
+
+    def __repr__(self):
+        return "%s()" % self.__class__.__name__
+
+class Text(Token):
+    def visit(self, visitor):
+        return visitor.text(self)
+
+
+
+# Tidying visitor employing the spans from lexical partitioning.
+
+class Visitor:
+    def __init__(self):
+        self.queued = deque()
+
+    def visit(self, span):
+        return span.visit(self)
+
+    # Specific handler methods.
+
+    def between_tags(self, span):
+        return Text(span.text)
+
+    def in_comment(self, span):
+        return Comment(span.text)
+
+    def in_directive(self, span):
+        return Directive(span.text)
+
+    def in_inclusion(self, span):
+        return Inclusion(span.text)
+
+    def _queue_attributes(self, tokens):
+        for token in tokens:
+            self.queued.append(Attribute(token))
+
+    def in_tag(self, span):
+        tokens = span.text.split()
+        self._queue_attributes(tokens[1:])
+        return Tag(tokens[0])
+
+    def at_end_of_tag(self, span):
+        return TagClose()
+
+    def after_attribute_value(self, span):
+        tokens = span.text.split()
+        self._queue_attributes(tokens)
+        return self.queued.popleft()
+
+    def at_attribute_value(self, span):
+        tokens = span.text.split()
+        self._queue_attributes(tokens[1:])
+        return AttributeValue(tokens[0], "")
+
+    def in_dq_attribute_value(self, span):
+        return AttributeValue(span.text, '"')
+
+    def in_sq_attribute_value(self, span):
+        return AttributeValue(span.text, "'")
+
+
+
+# Tokenising.
+
+class Tokeniser:
+    def __init__(self, text):
+        self.lexer = Lexer(text)
+        self.visitor = Visitor()
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        if self.visitor.queued:
+            return self.visitor.queued.popleft()
+
+        while 1:
+            span = self.lexer.next()
+            if not span.empty():
+                break
+
+        return self.visitor.visit(span)
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 moinformat/utils/htmlparse/tree.py
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/moinformat/utils/htmlparse/tree.py	Fri Aug 18 00:18:42 2023 +0200
@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+
+"""
+HTML document nodes.
+
+Copyright (C) 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
+# Element attributes.
+
+class Attribute:
+    def __init__(self, name, value=None):
+        self.name = name
+        self.value = value
+
+    def __repr__(self):
+        return "%s(%r, %r)" % (self.__class__.__name__, self.name, self.value)
+
+    def visit(self, visitor):
+        return visitor.attribute(self)
+
+
+
+# Nodes containing other nodes.
+
+class Fragment:
+    def __init__(self, nodes=None):
+        self.nodes = nodes or []
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self.nodes)
+
+    def visit(self, visitor):
+        return visitor.fragment(self)
+
+class Element(Fragment):
+    def __init__(self, name, attributes=None, nodes=None):
+        Fragment.__init__(self, nodes)
+        self.name = name
+        self.attributes = attributes or []
+
+    def __repr__(self):
+        return "%s(%r, %r, %r)" % (self.__class__.__name__, self.name, self.attributes, self.nodes)
+
+    def visit(self, visitor):
+        return visitor.element(self)
+
+
+
+# Nodes having values.
+
+class Node:
+    def __init__(self, value):
+        self.value = value
+
+    def __repr__(self):
+        return "%s(%r)" % (self.__class__.__name__, self.value)
+
+    def visit(self, visitor):
+        return visitor.node(self)
+
+class AttributeValue(Node):
+    def __init__(self, value, quote):
+        Node.__init__(self, value)
+        self.quote = quote
+
+    def __repr__(self):
+        return "%s(%r, %r)" % (self.__class__.__name__, self.value, self.quote)
+
+    def visit(self, visitor):
+        return visitor.attribute_value(self)
+
+class Comment(Node):
+    def visit(self, visitor):
+        return visitor.comment(self)
+
+class Directive(Node):
+    def visit(self, visitor):
+        return visitor.directive(self)
+
+class Inclusion(Node):
+    def visit(self, visitor):
+        return visitor.inclusion(self)
+
+class Text(Node):
+    def visit(self, visitor):
+        return visitor.text(self)
+
+# vim: tabstop=4 expandtab shiftwidth=4
diff -r 23098f02bda7 -r 3bf425390801 tests/test_parser.py
--- a/tests/test_parser.py	Tue Jun 20 18:58:47 2023 +0200
+++ b/tests/test_parser.py	Fri Aug 18 00:18:42 2023 +0200
@@ -1,5 +1,24 @@
 #!/usr/bin/env python
 
+"""
+Test document parsing and serialisation.
+
+Copyright (C) 2017, 2018, 2019, 2023 Paul Boddie <paul@boddie.org.uk>
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; either version 3 of the License, or (at your option) any later
+version.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+details.
+
+You should have received a copy of the GNU General Public License along with
+this program.  If not, see <http://www.gnu.org/licenses/>.
+"""
+
 from os import listdir
 from os.path import abspath, split
 import sys
@@ -17,10 +36,12 @@
 
 # Import specific objects.
 
-from moinformat import Metadata, make_input, make_output, make_parser, \
-                       make_serialiser, parse, serialise
+from moinformat import get_parser, Metadata, make_input, make_output, \
+                       make_parser, make_serialiser, parse, serialise
 from moinformat.tree.moin import Container
 
+
+
 def test_input(d, s):
 
     "Compare serialised output from 'd' with its original form 's'."
@@ -61,7 +82,8 @@
     metadata.set("output_format", "html")
     metadata.set("mapping", {"MoinMoin" : "https://moinmo.in/"})
 
-    print serialise(d, make_serialiser(metadata))
+    result = serialise(d, make_serialiser(metadata))
+    print output.encode(result)
     print "-" * 60
     print
 
@@ -69,7 +91,9 @@
 
 def test_tree(d, t, ts):
 
-    "Compare tree structure 'd' with simplified, expected form 't' from 'ts'."
+    """
+    Compare tree structure 'd' with simplified, expected form 't' from 'ts'.
+    """
 
     failing = t.test(d)
 
@@ -78,9 +102,12 @@
 
     # Show tree versus expected forms.
 
+    moin_prettyprinter = make_serialiser(Metadata({"input_format" : "moin"}), "pretty")
+    tree_prettyprinter = make_serialiser(Metadata({"input_format" : "pretty"}), "pretty")
+
     print not failing
     print "-" * 60
-    print d.prettyprint()
+    print serialise(d, moin_prettyprinter)
     if failing:
         print "-" * 60
         print ts
@@ -90,102 +117,14 @@
         print repr(simple)
         print repr(tree)
         print "-" * 60
-        print tree.prettyprint()
+        print serialise(tree, tree_prettyprinter)
         print "-" * 60
-        print simple.prettyprint()
+        print serialise(simple, tree_prettyprinter)
     print "-" * 60
     print
 
     return not failing
 
-class Node:
-
-    "A simplified tree node representation."
-
-    def __init__(self, name):
-        self.name = name
-        self.nodes = []
-
-    def __repr__(self):
-        return "Node(%r, %r)" % (self.name, self.nodes)
-
-    def prettyprint(self, indent=""):
-        l = []
-        l.append("%s%s%s" % (indent, self.name, len(self.nodes) and " nodes=%d" % len(self.nodes) or ""))
-        for node in self.nodes:
-            l.append(node.prettyprint(indent + "  "))
-        return "\n".join(l)
-
-    def append(self, node):
-        self.nodes.append(node)
-
-    def test(self, other):
-
-        """
-        Test whether this node is considered equivalent to 'other', where
-        'other' is a moinparser.tree node.
-
-        Return any failing tree nodes or None.
-        """
-
-        if other.__class__.__name__ != self.name:
-            return self, other, "name"
-
-        if isinstance(other, Container):
-            for node, other_node in map(None, self.nodes, other.nodes):
-                if node is None or other_node is None:
-                    return self, other, node is None and "simple" or "document"
-                t = node.test(other_node)
-                if t:
-                    return t
-        elif self.nodes:
-            return self, other, "empty"
-
-        return None
-
-def parse_tree(s):
-
-    "Parse the tree structure representation in 's'."
-
-    indent = 0
-    branches = []
-
-    for line in s.split("\n"):
-        line = line.rstrip()
-        if not line:
-            continue
-
-        new_indent = line.rfind(" ") + 1
-        node = Node(line[new_indent:])
-
-        # Establish a branch to add nodes to.
-
-        if not branches:
-            branches.append(node)
-        else:
-            # Note the current node as outermost branch.
-
-            if new_indent > indent:
-                branches.append(node)
-            else:
-                # Reduced indent involves obtaining an inner branch again.
-
-                while indent > new_indent:
-                    del branches[-1]
-                    indent -= 2
-
-                # Note the current node as outermost branch.
-
-                branches[-1] = node
-
-            # Append the current node to the parent branch.
-
-            branches[-2].append(node)
-
-        indent = new_indent
-
-    return branches[0]
-
 def get_filename(filename):
 
     "Using 'filename', return the core text filename and any encoding."
@@ -206,10 +145,14 @@
 
     if input.dir.exists(tree_filename):
         ts = input.readfile(tree_filename)
-        return ts, parse_tree(ts)
+        return ts, parse(ts, make_parser(Metadata(), "pretty"))
     else:
         return None, None
 
+
+
+# Main program.
+
 if __name__ == "__main__":
     args = sys.argv[1:]
 
@@ -222,7 +165,7 @@
 
 -q          Suppress test output, reporting only success or failure
 --quiet     Equivalent to -q
-"""
+""" % sys.argv[0]
         sys.exit(1)
 
     for arg in ["-q", "--quiet"]: