Reorganised the code into a package.

     1.1 --- a/moinformat.py	Fri Apr 28 18:56:50 2017 +0200
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,530 +0,0 @@
     1.4 -#!/usr/bin/env python
     1.5 -
     1.6 -"""
     1.7 -Moin wiki format parser.
     1.8 -
     1.9 -Copyright (C) 2012, 2013, 2015, 2017 Paul Boddie <paul@boddie.org.uk>
    1.10 -
    1.11 -This program is free software; you can redistribute it and/or modify it under
    1.12 -the terms of the GNU General Public License as published by the Free Software
    1.13 -Foundation; either version 3 of the License, or (at your option) any later
    1.14 -version.
    1.15 -
    1.16 -This program is distributed in the hope that it will be useful, but WITHOUT
    1.17 -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    1.18 -FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
    1.19 -details.
    1.20 -
    1.21 -You should have received a copy of the GNU General Public License along with
    1.22 -this program.  If not, see <http://www.gnu.org/licenses/>.
    1.23 -"""
    1.24 -
    1.25 -from cgi import escape
    1.26 -import re
    1.27 -
    1.28 -# Regular expressions.
    1.29 -
    1.30 -syntax = {
    1.31 -    # Page regions:
    1.32 -    "regionstart"   : (r"((^\s*)([{]{3,}))",            re.MULTILINE | re.DOTALL),  # {{{...
    1.33 -    "regionend"     : (r"^\s*([}]{3,})",                re.MULTILINE | re.DOTALL),  # }}}...
    1.34 -    "header"        : (r"#!(.*?)\n",                    0),                         # #! char-excl-nl
    1.35 -
    1.36 -    # Region contents:
    1.37 -    "break"         : (r"^(\s*?)\n",                    re.MULTILINE),              # blank line
    1.38 -    "listitem"      : (r"^((\s+)([*]|\d+[.]))",         re.MULTILINE),              # indent (list-item or number-item)
    1.39 -
    1.40 -    # List contents:
    1.41 -    "listitemend"   : (r"^",                            re.MULTILINE),              # next line
    1.42 -    }
    1.43 -
    1.44 -# Define patterns for the regular expressions.
    1.45 -
    1.46 -patterns = {}
    1.47 -for name, (value, flags) in syntax.items():
    1.48 -    patterns[name] = re.compile(value, re.UNICODE | flags)
    1.49 -
    1.50 -
    1.51 -
    1.52 -# Document nodes.
    1.53 -
    1.54 -class Container:
    1.55 -
    1.56 -    "A container of document nodes."
    1.57 -
    1.58 -    def __init__(self, nodes):
    1.59 -        self.nodes = nodes
    1.60 -
    1.61 -    def append(self, node):
    1.62 -        self.nodes.append(node)
    1.63 -
    1.64 -    append_text = append
    1.65 -
    1.66 -    def empty(self):
    1.67 -        return not self.nodes
    1.68 -
    1.69 -    def normalise(self):
    1.70 -
    1.71 -        "Combine adjacent text nodes."
    1.72 -
    1.73 -        nodes = self.nodes
    1.74 -        self.nodes = []
    1.75 -        text = None
    1.76 -
    1.77 -        for node in nodes:
    1.78 -
    1.79 -            # Open a text node or merge text into an open node.
    1.80 -
    1.81 -            if isinstance(node, Text):
    1.82 -                if not text:
    1.83 -                    text = node
    1.84 -                else:
    1.85 -                    text.merge(node)
    1.86 -
    1.87 -            # Close any open text node and append the current node.
    1.88 -
    1.89 -            else:
    1.90 -                if text:
    1.91 -                    self.append(text)
    1.92 -                    text = None
    1.93 -                self.append(node)
    1.94 -
    1.95 -        # Add any open text node.
    1.96 -
    1.97 -        if text:
    1.98 -            self.append(text)
    1.99 -
   1.100 -    def __str__(self):
   1.101 -        return self.prettyprint()
   1.102 -
   1.103 -    def prettyprint(self, indent=""):
   1.104 -        pass
   1.105 -
   1.106 -class Region(Container):
   1.107 -
   1.108 -    "A region of the page."
   1.109 -
   1.110 -    transparent_region_types = ["wiki"]
   1.111 -
   1.112 -    def __init__(self, nodes, level=0, indent=0, type=None):
   1.113 -        Container.__init__(self, nodes)
   1.114 -        self.level = level
   1.115 -        self.indent = indent
   1.116 -        self.type = type
   1.117 -
   1.118 -    def append(self, node):
   1.119 -        last = self.nodes and self.nodes[-1]
   1.120 -        if last and last.empty():
   1.121 -            self.nodes[-1] = node
   1.122 -        else:
   1.123 -            self.nodes.append(node)
   1.124 -
   1.125 -    def append_text(self, s):
   1.126 -        if self.is_transparent():
   1.127 -            self.nodes[-1].append(s)
   1.128 -        else:
   1.129 -            self.append(s)
   1.130 -
   1.131 -    def have_end(self, s):
   1.132 -        return self.level and s.startswith("}") and self.level == len(s)
   1.133 -
   1.134 -    def is_transparent(self):
   1.135 -        return not self.level or self.type in self.transparent_region_types
   1.136 -
   1.137 -    def __repr__(self):
   1.138 -        return "Region(%r, %r, %r, %r)" % (self.nodes, self.level, self.indent, self.type)
   1.139 -
   1.140 -    def prettyprint(self, indent=""):
   1.141 -        l = ["%sRegion: level=%d indent=%d type=%s" % (indent, self.level, self.indent, self.type)]
   1.142 -        for node in self.nodes:
   1.143 -            l.append(node.prettyprint(indent + "  "))
   1.144 -        return "\n".join(l)
   1.145 -
   1.146 -    def to_string(self, out):
   1.147 -        out.start_region(self.level, self.indent, self.type)
   1.148 -        for node in self.nodes:
   1.149 -            node.to_string(out)
   1.150 -        out.end_region(self.level, self.indent, self.type)
   1.151 -
   1.152 -class Block(Container):
   1.153 -
   1.154 -    "A block in the page."
   1.155 -
   1.156 -    def __init__(self, nodes, final=True):
   1.157 -        Container.__init__(self, nodes)
   1.158 -        self.final = final
   1.159 -
   1.160 -    def __repr__(self):
   1.161 -        return "Block(%r)" % self.nodes
   1.162 -
   1.163 -    def prettyprint(self, indent=""):
   1.164 -        l = ["%sBlock: final=%s" % (indent, self.final)]
   1.165 -        for node in self.nodes:
   1.166 -            l.append(node.prettyprint(indent + "  "))
   1.167 -        return "\n".join(l)
   1.168 -
   1.169 -    def to_string(self, out):
   1.170 -        out.start_block(self.final)
   1.171 -        for node in self.nodes:
   1.172 -            node.to_string(out)
   1.173 -        out.end_block(self.final)
   1.174 -
   1.175 -class ListItem(Container):
   1.176 -
   1.177 -    "A list item."
   1.178 -
   1.179 -    def __repr__(self):
   1.180 -        return "ListItem(%r)" % self.nodes
   1.181 -
   1.182 -    def prettyprint(self, indent=""):
   1.183 -        l = ["%sListItem:" % indent]
   1.184 -        for node in self.nodes:
   1.185 -            l.append(node.prettyprint(indent + "  "))
   1.186 -        return "\n".join(l)
   1.187 -
   1.188 -    def to_string(self, out):
   1.189 -        out.start_listitem()
   1.190 -        for node in self.nodes:
   1.191 -            node.to_string(out)
   1.192 -        out.end_listitem()
   1.193 -
   1.194 -
   1.195 -class Text:
   1.196 -
   1.197 -    "A text node."
   1.198 -
   1.199 -    def __init__(self, s):
   1.200 -        self.s = s
   1.201 -
   1.202 -    def empty(self):
   1.203 -        return not self.s
   1.204 -
   1.205 -    def merge(self, text):
   1.206 -        self.s += text.s
   1.207 -
   1.208 -    def __repr__(self):
   1.209 -        return "Text(%r)" % self.s
   1.210 -
   1.211 -    def prettyprint(self, indent=""):
   1.212 -        return "%sText: %r" % (indent, self.s)
   1.213 -
   1.214 -    def to_string(self, out):
   1.215 -        out.text(self.s)
   1.216 -
   1.217 -
   1.218 -
   1.219 -# Serialisation.
   1.220 -
   1.221 -class Serialiser:
   1.222 -
   1.223 -    "General serialisation support."
   1.224 -
   1.225 -    def __init__(self, out):
   1.226 -        self.out = out
   1.227 -
   1.228 -class MoinSerialiser(Serialiser):
   1.229 -
   1.230 -    "Serialisation of the page."
   1.231 -
   1.232 -    def start_region(self, level, indent, type):
   1.233 -        out = self.out
   1.234 -        if level:
   1.235 -            out(" " * indent + "{" * level)
   1.236 -        if type and level:
   1.237 -            out("#!%s\n" % type)
   1.238 -
   1.239 -    def end_region(self, level, indent, type):
   1.240 -        out = self.out
   1.241 -        if level:
   1.242 -            out("}" * level)
   1.243 -
   1.244 -    def start_block(self, final):
   1.245 -        pass
   1.246 -
   1.247 -    def end_block(self, final):
   1.248 -        if not final:
   1.249 -            self.out("\n")
   1.250 -
   1.251 -    def start_listitem(self):
   1.252 -        self.out(" *")
   1.253 -
   1.254 -    def end_listitem(self):
   1.255 -        pass
   1.256 -
   1.257 -    def text(self, s):
   1.258 -        self.out(s)
   1.259 -
   1.260 -class HTMLSerialiser(Serialiser):
   1.261 -
   1.262 -    "Serialisation of the page."
   1.263 -
   1.264 -    def start_region(self, level, indent, type):
   1.265 -        l = []
   1.266 -        out = l.append
   1.267 -        if level:
   1.268 -            out("level-%d" % level)
   1.269 -
   1.270 -        if indent:
   1.271 -            out("indent-%d" % indent)
   1.272 -
   1.273 -        # NOTE: Encode type details for CSS.
   1.274 -
   1.275 -        if type:
   1.276 -            out("type-%s" % escape(type, True))
   1.277 -
   1.278 -        self.out("<span class='%s'>" % " ".join(l))
   1.279 -
   1.280 -    def end_region(self, level, indent, type):
   1.281 -        self.out("</span>")
   1.282 -
   1.283 -    def start_block(self, final):
   1.284 -        self.out("<p>")
   1.285 -
   1.286 -    def end_block(self, final):
   1.287 -        self.out("</p>")
   1.288 -
   1.289 -    def start_listitem(self):
   1.290 -        self.out("<li>")
   1.291 -
   1.292 -    def end_listitem(self):
   1.293 -        self.out("</li>")
   1.294 -
   1.295 -    def text(self, s):
   1.296 -        self.out(escape(s))
   1.297 -
   1.298 -
   1.299 -
   1.300 -# Tokenising functions.
   1.301 -
   1.302 -class TokenStream:
   1.303 -
   1.304 -    "A stream of tokens taken from a string."
   1.305 -
   1.306 -    def __init__(self, s):
   1.307 -        self.s = s
   1.308 -        self.pos = 0
   1.309 -        self.match = None
   1.310 -        self.matching = None
   1.311 -
   1.312 -    def read_until(self, pattern_names, remaining=True):
   1.313 -
   1.314 -        """
   1.315 -        Find the first match for the given 'pattern_names'. Return the text
   1.316 -        preceding any match, the remaining text if no match was found, or None
   1.317 -        if no match was found and 'remaining' is given as a false value.
   1.318 -        """
   1.319 -
   1.320 -        first = None
   1.321 -        self.matching = None
   1.322 -
   1.323 -        # Find the first matching pattern.
   1.324 -
   1.325 -        for pattern_name in pattern_names:
   1.326 -            match = patterns[pattern_name].search(self.s, self.pos)
   1.327 -            if match:
   1.328 -                start, end = match.span()
   1.329 -                if self.matching is None or start < first:
   1.330 -                    first = start
   1.331 -                    self.matching = pattern_name
   1.332 -                    self.match = match
   1.333 -
   1.334 -        if self.matching is None:
   1.335 -            if remaining:
   1.336 -                return self.s[self.pos:]
   1.337 -            else:
   1.338 -                return None
   1.339 -        else:
   1.340 -            return self.s[self.pos:first]
   1.341 -
   1.342 -    def read_match(self, group=1):
   1.343 -
   1.344 -        """
   1.345 -        Return the matched text, updating the position in the stream. If 'group'
   1.346 -        is specified, the indicated group in a match will be returned.
   1.347 -        Typically, group 1 should contain all pertinent data, but groups defined
   1.348 -        within group 1 can provide sections of the data.
   1.349 -        """
   1.350 -
   1.351 -        if self.match:
   1.352 -            _start, self.pos = self.match.span()
   1.353 -            try:
   1.354 -                return self.match.group(group)
   1.355 -            except IndexError:
   1.356 -                return ""
   1.357 -        else:
   1.358 -            self.pos = len(self.s)
   1.359 -            return None
   1.360 -
   1.361 -
   1.362 -
   1.363 -# Parser functions.
   1.364 -
   1.365 -def parse_page(s):
   1.366 -
   1.367 -    """
   1.368 -    Parse page text 's'. Pages consist of regions delimited by markers.
   1.369 -    """
   1.370 -
   1.371 -    return parse_region(TokenStream(s))
   1.372 -
   1.373 -def parse_region(items, level=0, indent=0):
   1.374 -
   1.375 -    """
   1.376 -    Parse the data provided by 'items' to populate a region with the given
   1.377 -    'level' at the given 'indent'.
   1.378 -    """
   1.379 -
   1.380 -    region = Region([], level, indent)
   1.381 -
   1.382 -    # Parse section headers.
   1.383 -
   1.384 -    parse_region_header(items, region)
   1.385 -
   1.386 -    # Parse section body.
   1.387 -
   1.388 -    if region.is_transparent():
   1.389 -        parse_region_wiki(items, region)
   1.390 -    else:
   1.391 -        parse_region_opaque(items, region)
   1.392 -
   1.393 -    return region
   1.394 -
   1.395 -def parse_region_header(items, region):
   1.396 -
   1.397 -    """
   1.398 -    Parse the region header from the 'items', setting it for the given 'region'.
   1.399 -    """
   1.400 -
   1.401 -    if items.read_until(["header"], False) == "": # None means no header
   1.402 -        region.type = items.read_match()
   1.403 -
   1.404 -def parse_region_wiki(items, region):
   1.405 -
   1.406 -    "Parse the data provided by 'items' to populate a wiki 'region'."
   1.407 -
   1.408 -    new_block(region)
   1.409 -    parse_region_details(items, region, ["break", "listitem", "regionstart", "regionend"])
   1.410 -
   1.411 -def parse_region_opaque(items, region):
   1.412 -
   1.413 -    "Parse the data provided by 'items' to populate an opaque 'region'."
   1.414 -
   1.415 -    parse_region_details(items, region, ["regionend"])
   1.416 -
   1.417 -def parse_region_details(items, region, pattern_names):
   1.418 -
   1.419 -    "Parse 'items' within 'region' searching using 'pattern_names'."
   1.420 -
   1.421 -    try:
   1.422 -        while True:
   1.423 -
   1.424 -            # Obtain text before any marker or the end of the input.
   1.425 -
   1.426 -            preceding = items.read_until(pattern_names)
   1.427 -            if preceding:
   1.428 -                region.append_text(Text(preceding))
   1.429 -
   1.430 -            # End of input.
   1.431 -
   1.432 -            if not items.matching:
   1.433 -                break
   1.434 -
   1.435 -            # Obtain any feature.
   1.436 -
   1.437 -            feature = items.read_match()
   1.438 -            handler = handlers.get(items.matching)
   1.439 -
   1.440 -            # Handle each feature or add text to the region.
   1.441 -
   1.442 -            if handler:
   1.443 -                handler(items, region)
   1.444 -            else:
   1.445 -                region.append_text(Text(feature))
   1.446 -
   1.447 -    except StopIteration:
   1.448 -        pass
   1.449 -
   1.450 -    region.normalise()
   1.451 -
   1.452 -def end_region(items, region):
   1.453 -
   1.454 -    "End the parsing of 'region'."
   1.455 -
   1.456 -    raise StopIteration
   1.457 -
   1.458 -def parse_break(items, region):
   1.459 -
   1.460 -    "Handle a paragraph break within 'region'."
   1.461 -
   1.462 -    # Mark any previous block as not being the final one in a sequence.
   1.463 -
   1.464 -    block = region.nodes[-1]
   1.465 -    block.final = False
   1.466 -    new_block(region)
   1.467 -
   1.468 -def parse_listitem_end(items, region):
   1.469 -
   1.470 -    "Handle the end of a list."
   1.471 -
   1.472 -    raise StopIteration
   1.473 -
   1.474 -def parse_listitem(items, region):
   1.475 -
   1.476 -    "Handle a list item marker within 'region'."
   1.477 -
   1.478 -    item = ListItem([])
   1.479 -    parse_region_details(items, item, ["listitemend"])
   1.480 -    region.append(item)
   1.481 -    new_block(region)
   1.482 -
   1.483 -def parse_section(items, region):
   1.484 -
   1.485 -    "Handle the start of a new section within 'region'."
   1.486 -
   1.487 -    # Parse the section and start a new block after the section.
   1.488 -
   1.489 -    indent = len(items.read_match(2))
   1.490 -    level = len(items.read_match(3))
   1.491 -    region.append(parse_region(items, level, indent))
   1.492 -    new_block(region)
   1.493 -
   1.494 -def parse_section_end(items, region):
   1.495 -
   1.496 -    "Handle the end of a new section within 'region'."
   1.497 -
   1.498 -    feature = items.read_match()
   1.499 -    if region.have_end(feature):
   1.500 -        raise StopIteration
   1.501 -    else:
   1.502 -        region.append_text(Text(feature))
   1.503 -
   1.504 -# Pattern handlers.
   1.505 -
   1.506 -handlers = {
   1.507 -    None : end_region,
   1.508 -    "break" : parse_break,
   1.509 -    "listitemend" : parse_listitem_end,
   1.510 -    "listitem" : parse_listitem,
   1.511 -    "regionstart" : parse_section,
   1.512 -    "regionend" : parse_section_end,
   1.513 -    }
   1.514 -
   1.515 -def new_block(region):
   1.516 -
   1.517 -    "Start a new block in 'region'."
   1.518 -
   1.519 -    block = Block([])
   1.520 -    region.append(block)
   1.521 -
   1.522 -
   1.523 -
   1.524 -# Top-level functions.
   1.525 -
   1.526 -parse = parse_page
   1.527 -
   1.528 -def serialise(doc, serialiser=MoinSerialiser):
   1.529 -    l = []
   1.530 -    doc.to_string(serialiser(l.append))
   1.531 -    return "".join(l)
   1.532 -
   1.533 -# vim: tabstop=4 expandtab shiftwidth=4

     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/moinformat/__init__.py	Sat Apr 29 17:47:03 2017 +0200
     2.3 @@ -0,0 +1,277 @@
     2.4 +#!/usr/bin/env python
     2.5 +
     2.6 +"""
     2.7 +Moin wiki format parser.
     2.8 +
     2.9 +Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
    2.10 +
    2.11 +This program is free software; you can redistribute it and/or modify it under
    2.12 +the terms of the GNU General Public License as published by the Free Software
    2.13 +Foundation; either version 3 of the License, or (at your option) any later
    2.14 +version.
    2.15 +
    2.16 +This program is distributed in the hope that it will be useful, but WITHOUT
    2.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    2.18 +FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
    2.19 +details.
    2.20 +
    2.21 +You should have received a copy of the GNU General Public License along with
    2.22 +this program.  If not, see <http://www.gnu.org/licenses/>.
    2.23 +"""
    2.24 +
    2.25 +from moinformat.tree import Region, Block, ListItem, Text
    2.26 +import re
    2.27 +
    2.28 +# Regular expressions.
    2.29 +
    2.30 +syntax = {
    2.31 +    # Page regions:
    2.32 +    "regionstart"   : (r"((^\s*)([{]{3,}))",            re.MULTILINE | re.DOTALL),  # {{{...
    2.33 +    "regionend"     : (r"^\s*([}]{3,})",                re.MULTILINE | re.DOTALL),  # }}}...
    2.34 +    "header"        : (r"#!(.*?)\n",                    0),                         # #! char-excl-nl
    2.35 +
    2.36 +    # Region contents:
    2.37 +    "break"         : (r"^(\s*?)\n",                    re.MULTILINE),              # blank line
    2.38 +    "listitem"      : (r"^((\s+)([*]|\d+[.]))",         re.MULTILINE),              # indent (list-item or number-item)
    2.39 +
    2.40 +    # List contents:
    2.41 +    "listitemend"   : (r"^",                            re.MULTILINE),              # next line
    2.42 +    }
    2.43 +
    2.44 +# Define patterns for the regular expressions.
    2.45 +
    2.46 +patterns = {}
    2.47 +for name, (value, flags) in syntax.items():
    2.48 +    patterns[name] = re.compile(value, re.UNICODE | flags)
    2.49 +
    2.50 +
    2.51 +
    2.52 +# Tokenising functions.
    2.53 +
    2.54 +class TokenStream:
    2.55 +
    2.56 +    "A stream of tokens taken from a string."
    2.57 +
    2.58 +    def __init__(self, s):
    2.59 +        self.s = s
    2.60 +        self.pos = 0
    2.61 +        self.match = None
    2.62 +        self.matching = None
    2.63 +
    2.64 +    def read_until(self, pattern_names, remaining=True):
    2.65 +
    2.66 +        """
    2.67 +        Find the first match for the given 'pattern_names'. Return the text
    2.68 +        preceding any match, the remaining text if no match was found, or None
    2.69 +        if no match was found and 'remaining' is given as a false value.
    2.70 +        """
    2.71 +
    2.72 +        first = None
    2.73 +        self.matching = None
    2.74 +
    2.75 +        # Find the first matching pattern.
    2.76 +
    2.77 +        for pattern_name in pattern_names:
    2.78 +            match = patterns[pattern_name].search(self.s, self.pos)
    2.79 +            if match:
    2.80 +                start, end = match.span()
    2.81 +                if self.matching is None or start < first:
    2.82 +                    first = start
    2.83 +                    self.matching = pattern_name
    2.84 +                    self.match = match
    2.85 +
    2.86 +        if self.matching is None:
    2.87 +            if remaining:
    2.88 +                return self.s[self.pos:]
    2.89 +            else:
    2.90 +                return None
    2.91 +        else:
    2.92 +            return self.s[self.pos:first]
    2.93 +
    2.94 +    def read_match(self, group=1):
    2.95 +
    2.96 +        """
    2.97 +        Return the matched text, updating the position in the stream. If 'group'
    2.98 +        is specified, the indicated group in a match will be returned.
    2.99 +        Typically, group 1 should contain all pertinent data, but groups defined
   2.100 +        within group 1 can provide sections of the data.
   2.101 +        """
   2.102 +
   2.103 +        if self.match:
   2.104 +            _start, self.pos = self.match.span()
   2.105 +            try:
   2.106 +                return self.match.group(group)
   2.107 +            except IndexError:
   2.108 +                return ""
   2.109 +        else:
   2.110 +            self.pos = len(self.s)
   2.111 +            return None
   2.112 +
   2.113 +
   2.114 +
   2.115 +# Parser functions.
   2.116 +
   2.117 +def parse_page(s):
   2.118 +
   2.119 +    """
   2.120 +    Parse page text 's'. Pages consist of regions delimited by markers.
   2.121 +    """
   2.122 +
   2.123 +    return parse_region(TokenStream(s))
   2.124 +
   2.125 +def parse_region(items, level=0, indent=0):
   2.126 +
   2.127 +    """
   2.128 +    Parse the data provided by 'items' to populate a region with the given
   2.129 +    'level' at the given 'indent'.
   2.130 +    """
   2.131 +
   2.132 +    region = Region([], level, indent)
   2.133 +
   2.134 +    # Parse section headers.
   2.135 +
   2.136 +    parse_region_header(items, region)
   2.137 +
   2.138 +    # Parse section body.
   2.139 +
   2.140 +    if region.is_transparent():
   2.141 +        parse_region_wiki(items, region)
   2.142 +    else:
   2.143 +        parse_region_opaque(items, region)
   2.144 +
   2.145 +    return region
   2.146 +
   2.147 +def parse_region_header(items, region):
   2.148 +
   2.149 +    """
   2.150 +    Parse the region header from the 'items', setting it for the given 'region'.
   2.151 +    """
   2.152 +
   2.153 +    if items.read_until(["header"], False) == "": # None means no header
   2.154 +        region.type = items.read_match()
   2.155 +
   2.156 +def parse_region_wiki(items, region):
   2.157 +
   2.158 +    "Parse the data provided by 'items' to populate a wiki 'region'."
   2.159 +
   2.160 +    new_block(region)
   2.161 +    parse_region_details(items, region, ["break", "listitem", "regionstart", "regionend"])
   2.162 +
   2.163 +def parse_region_opaque(items, region):
   2.164 +
   2.165 +    "Parse the data provided by 'items' to populate an opaque 'region'."
   2.166 +
   2.167 +    parse_region_details(items, region, ["regionend"])
   2.168 +
   2.169 +def parse_region_details(items, region, pattern_names):
   2.170 +
   2.171 +    "Parse 'items' within 'region' searching using 'pattern_names'."
   2.172 +
   2.173 +    try:
   2.174 +        while True:
   2.175 +
   2.176 +            # Obtain text before any marker or the end of the input.
   2.177 +
   2.178 +            preceding = items.read_until(pattern_names)
   2.179 +            if preceding:
   2.180 +                region.append_text(Text(preceding))
   2.181 +
   2.182 +            # End of input.
   2.183 +
   2.184 +            if not items.matching:
   2.185 +                break
   2.186 +
   2.187 +            # Obtain any feature.
   2.188 +
   2.189 +            feature = items.read_match()
   2.190 +            handler = handlers.get(items.matching)
   2.191 +
   2.192 +            # Handle each feature or add text to the region.
   2.193 +
   2.194 +            if handler:
   2.195 +                handler(items, region)
   2.196 +            else:
   2.197 +                region.append_text(Text(feature))
   2.198 +
   2.199 +    except StopIteration:
   2.200 +        pass
   2.201 +
   2.202 +    region.normalise()
   2.203 +
   2.204 +def end_region(items, region):
   2.205 +
   2.206 +    "End the parsing of 'region'."
   2.207 +
   2.208 +    raise StopIteration
   2.209 +
   2.210 +def parse_break(items, region):
   2.211 +
   2.212 +    "Handle a paragraph break within 'region'."
   2.213 +
   2.214 +    # Mark any previous block as not being the final one in a sequence.
   2.215 +
   2.216 +    block = region.nodes[-1]
   2.217 +    block.final = False
   2.218 +    new_block(region)
   2.219 +
   2.220 +def parse_listitem_end(items, region):
   2.221 +
   2.222 +    "Handle the end of a list."
   2.223 +
   2.224 +    raise StopIteration
   2.225 +
   2.226 +def parse_listitem(items, region):
   2.227 +
   2.228 +    "Handle a list item marker within 'region'."
   2.229 +
   2.230 +    item = ListItem([])
   2.231 +    parse_region_details(items, item, ["listitemend"])
   2.232 +    region.append(item)
   2.233 +    new_block(region)
   2.234 +
   2.235 +def parse_section(items, region):
   2.236 +
   2.237 +    "Handle the start of a new section within 'region'."
   2.238 +
   2.239 +    # Parse the section and start a new block after the section.
   2.240 +
   2.241 +    indent = len(items.read_match(2))
   2.242 +    level = len(items.read_match(3))
   2.243 +    region.append(parse_region(items, level, indent))
   2.244 +    new_block(region)
   2.245 +
   2.246 +def parse_section_end(items, region):
   2.247 +
   2.248 +    "Handle the end of a new section within 'region'."
   2.249 +
   2.250 +    feature = items.read_match()
   2.251 +    if region.have_end(feature):
   2.252 +        raise StopIteration
   2.253 +    else:
   2.254 +        region.append_text(Text(feature))
   2.255 +
   2.256 +# Pattern handlers.
   2.257 +
   2.258 +handlers = {
   2.259 +    None : end_region,
   2.260 +    "break" : parse_break,
   2.261 +    "listitemend" : parse_listitem_end,
   2.262 +    "listitem" : parse_listitem,
   2.263 +    "regionstart" : parse_section,
   2.264 +    "regionend" : parse_section_end,
   2.265 +    }
   2.266 +
   2.267 +def new_block(region):
   2.268 +
   2.269 +    "Start a new block in 'region'."
   2.270 +
   2.271 +    block = Block([])
   2.272 +    region.append(block)
   2.273 +
   2.274 +
   2.275 +
   2.276 +# Top-level functions.
   2.277 +
   2.278 +parse = parse_page
   2.279 +
   2.280 +# vim: tabstop=4 expandtab shiftwidth=4

     3.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.2 +++ b/moinformat/serialisers.py	Sat Apr 29 17:47:03 2017 +0200
     3.3 @@ -0,0 +1,108 @@
     3.4 +#!/usr/bin/env python
     3.5 +
     3.6 +"""
     3.7 +Moin wiki serialisers.
     3.8 +
     3.9 +Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
    3.10 +
    3.11 +This program is free software; you can redistribute it and/or modify it under
    3.12 +the terms of the GNU General Public License as published by the Free Software
    3.13 +Foundation; either version 3 of the License, or (at your option) any later
    3.14 +version.
    3.15 +
    3.16 +This program is distributed in the hope that it will be useful, but WITHOUT
    3.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    3.18 +FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
    3.19 +details.
    3.20 +
    3.21 +You should have received a copy of the GNU General Public License along with
    3.22 +this program.  If not, see <http://www.gnu.org/licenses/>.
    3.23 +"""
    3.24 +
    3.25 +from cgi import escape
    3.26 +
    3.27 +class Serialiser:
    3.28 +
    3.29 +    "General serialisation support."
    3.30 +
    3.31 +    def __init__(self, out):
    3.32 +        self.out = out
    3.33 +
    3.34 +class MoinSerialiser(Serialiser):
    3.35 +
    3.36 +    "Serialisation of the page."
    3.37 +
    3.38 +    def start_region(self, level, indent, type):
    3.39 +        out = self.out
    3.40 +        if level:
    3.41 +            out(" " * indent + "{" * level)
    3.42 +        if type and level:
    3.43 +            out("#!%s\n" % type)
    3.44 +
    3.45 +    def end_region(self, level, indent, type):
    3.46 +        out = self.out
    3.47 +        if level:
    3.48 +            out("}" * level)
    3.49 +
    3.50 +    def start_block(self, final):
    3.51 +        pass
    3.52 +
    3.53 +    def end_block(self, final):
    3.54 +        if not final:
    3.55 +            self.out("\n")
    3.56 +
    3.57 +    def start_listitem(self):
    3.58 +        self.out(" *")
    3.59 +
    3.60 +    def end_listitem(self):
    3.61 +        pass
    3.62 +
    3.63 +    def text(self, s):
    3.64 +        self.out(s)
    3.65 +
    3.66 +class HTMLSerialiser(Serialiser):
    3.67 +
    3.68 +    "Serialisation of the page."
    3.69 +
    3.70 +    def start_region(self, level, indent, type):
    3.71 +        l = []
    3.72 +        out = l.append
    3.73 +        if level:
    3.74 +            out("level-%d" % level)
    3.75 +
    3.76 +        if indent:
    3.77 +            out("indent-%d" % indent)
    3.78 +
    3.79 +        # NOTE: Encode type details for CSS.
    3.80 +
    3.81 +        if type:
    3.82 +            out("type-%s" % escape(type, True))
    3.83 +
    3.84 +        self.out("<span class='%s'>" % " ".join(l))
    3.85 +
    3.86 +    def end_region(self, level, indent, type):
    3.87 +        self.out("</span>")
    3.88 +
    3.89 +    def start_block(self, final):
    3.90 +        self.out("<p>")
    3.91 +
    3.92 +    def end_block(self, final):
    3.93 +        self.out("</p>")
    3.94 +
    3.95 +    def start_listitem(self):
    3.96 +        self.out("<li>")
    3.97 +
    3.98 +    def end_listitem(self):
    3.99 +        self.out("</li>")
   3.100 +
   3.101 +    def text(self, s):
   3.102 +        self.out(escape(s))
   3.103 +
   3.104 +# Top-level functions.
   3.105 +
   3.106 +def serialise(doc, serialiser=MoinSerialiser):
   3.107 +    l = []
   3.108 +    doc.to_string(serialiser(l.append))
   3.109 +    return "".join(l)
   3.110 +
   3.111 +# vim: tabstop=4 expandtab shiftwidth=4

     4.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     4.2 +++ b/moinformat/tree.py	Sat Apr 29 17:47:03 2017 +0200
     4.3 @@ -0,0 +1,184 @@
     4.4 +#!/usr/bin/env python
     4.5 +
     4.6 +"""
     4.7 +Moin wiki format document tree nodes.
     4.8 +
     4.9 +Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
    4.10 +
    4.11 +This program is free software; you can redistribute it and/or modify it under
    4.12 +the terms of the GNU General Public License as published by the Free Software
    4.13 +Foundation; either version 3 of the License, or (at your option) any later
    4.14 +version.
    4.15 +
    4.16 +This program is distributed in the hope that it will be useful, but WITHOUT
    4.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
    4.18 +FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
    4.19 +details.
    4.20 +
    4.21 +You should have received a copy of the GNU General Public License along with
    4.22 +this program.  If not, see <http://www.gnu.org/licenses/>.
    4.23 +"""
    4.24 +
    4.25 +class Container:
    4.26 +
    4.27 +    "A container of document nodes."
    4.28 +
    4.29 +    def __init__(self, nodes):
    4.30 +        self.nodes = nodes
    4.31 +
    4.32 +    def append(self, node):
    4.33 +        self.nodes.append(node)
    4.34 +
    4.35 +    append_text = append
    4.36 +
    4.37 +    def empty(self):
    4.38 +        return not self.nodes
    4.39 +
    4.40 +    def normalise(self):
    4.41 +
    4.42 +        "Combine adjacent text nodes."
    4.43 +
    4.44 +        nodes = self.nodes
    4.45 +        self.nodes = []
    4.46 +        text = None
    4.47 +
    4.48 +        for node in nodes:
    4.49 +
    4.50 +            # Open a text node or merge text into an open node.
    4.51 +
    4.52 +            if isinstance(node, Text):
    4.53 +                if not text:
    4.54 +                    text = node
    4.55 +                else:
    4.56 +                    text.merge(node)
    4.57 +
    4.58 +            # Close any open text node and append the current node.
    4.59 +
    4.60 +            else:
    4.61 +                if text:
    4.62 +                    self.append(text)
    4.63 +                    text = None
    4.64 +                self.append(node)
    4.65 +
    4.66 +        # Add any open text node.
    4.67 +
    4.68 +        if text:
    4.69 +            self.append(text)
    4.70 +
    4.71 +    def __str__(self):
    4.72 +        return self.prettyprint()
    4.73 +
    4.74 +    def prettyprint(self, indent=""):
    4.75 +        pass
    4.76 +
    4.77 +class Region(Container):
    4.78 +
    4.79 +    "A region of the page."
    4.80 +
    4.81 +    transparent_region_types = ["wiki"]
    4.82 +
    4.83 +    def __init__(self, nodes, level=0, indent=0, type=None):
    4.84 +        Container.__init__(self, nodes)
    4.85 +        self.level = level
    4.86 +        self.indent = indent
    4.87 +        self.type = type
    4.88 +
    4.89 +    def append(self, node):
    4.90 +        last = self.nodes and self.nodes[-1]
    4.91 +        if last and last.empty():
    4.92 +            self.nodes[-1] = node
    4.93 +        else:
    4.94 +            self.nodes.append(node)
    4.95 +
    4.96 +    def append_text(self, s):
    4.97 +        if self.is_transparent():
    4.98 +            self.nodes[-1].append(s)
    4.99 +        else:
   4.100 +            self.append(s)
   4.101 +
   4.102 +    def have_end(self, s):
   4.103 +        return self.level and s.startswith("}") and self.level == len(s)
   4.104 +
   4.105 +    def is_transparent(self):
   4.106 +        return not self.level or self.type in self.transparent_region_types
   4.107 +
   4.108 +    def __repr__(self):
   4.109 +        return "Region(%r, %r, %r, %r)" % (self.nodes, self.level, self.indent, self.type)
   4.110 +
   4.111 +    def prettyprint(self, indent=""):
   4.112 +        l = ["%sRegion: level=%d indent=%d type=%s" % (indent, self.level, self.indent, self.type)]
   4.113 +        for node in self.nodes:
   4.114 +            l.append(node.prettyprint(indent + "  "))
   4.115 +        return "\n".join(l)
   4.116 +
   4.117 +    def to_string(self, out):
   4.118 +        out.start_region(self.level, self.indent, self.type)
   4.119 +        for node in self.nodes:
   4.120 +            node.to_string(out)
   4.121 +        out.end_region(self.level, self.indent, self.type)
   4.122 +
   4.123 +class Block(Container):
   4.124 +
   4.125 +    "A block in the page."
   4.126 +
   4.127 +    def __init__(self, nodes, final=True):
   4.128 +        Container.__init__(self, nodes)
   4.129 +        self.final = final
   4.130 +
   4.131 +    def __repr__(self):
   4.132 +        return "Block(%r)" % self.nodes
   4.133 +
   4.134 +    def prettyprint(self, indent=""):
   4.135 +        l = ["%sBlock: final=%s" % (indent, self.final)]
   4.136 +        for node in self.nodes:
   4.137 +            l.append(node.prettyprint(indent + "  "))
   4.138 +        return "\n".join(l)
   4.139 +
   4.140 +    def to_string(self, out):
   4.141 +        out.start_block(self.final)
   4.142 +        for node in self.nodes:
   4.143 +            node.to_string(out)
   4.144 +        out.end_block(self.final)
   4.145 +
   4.146 +class ListItem(Container):
   4.147 +
   4.148 +    "A list item."
   4.149 +
   4.150 +    def __repr__(self):
   4.151 +        return "ListItem(%r)" % self.nodes
   4.152 +
   4.153 +    def prettyprint(self, indent=""):
   4.154 +        l = ["%sListItem:" % indent]
   4.155 +        for node in self.nodes:
   4.156 +            l.append(node.prettyprint(indent + "  "))
   4.157 +        return "\n".join(l)
   4.158 +
   4.159 +    def to_string(self, out):
   4.160 +        out.start_listitem()
   4.161 +        for node in self.nodes:
   4.162 +            node.to_string(out)
   4.163 +        out.end_listitem()
   4.164 +
   4.165 +class Text:
   4.166 +
   4.167 +    "A text node."
   4.168 +
   4.169 +    def __init__(self, s):
   4.170 +        self.s = s
   4.171 +
   4.172 +    def empty(self):
   4.173 +        return not self.s
   4.174 +
   4.175 +    def merge(self, text):
   4.176 +        self.s += text.s
   4.177 +
   4.178 +    def __repr__(self):
   4.179 +        return "Text(%r)" % self.s
   4.180 +
   4.181 +    def prettyprint(self, indent=""):
   4.182 +        return "%sText: %r" % (indent, self.s)
   4.183 +
   4.184 +    def to_string(self, out):
   4.185 +        out.text(self.s)
   4.186 +
   4.187 +# vim: tabstop=4 expandtab shiftwidth=4

     5.1 --- a/tests/test_parser.py	Fri Apr 28 18:56:50 2017 +0200
     5.2 +++ b/tests/test_parser.py	Sat Apr 29 17:47:03 2017 +0200
     5.3 @@ -1,6 +1,7 @@
     5.4  #!/usr/bin/env python
     5.5  
     5.6 -from moinformat import parse, serialise, HTMLSerialiser
     5.7 +from moinformat import parse
     5.8 +from moinformat.serialisers import serialise, HTMLSerialiser
     5.9  
    5.10  s0 = """\
    5.11  Hello
2017-04-29	Paul Boddie	raw files shortlog changelog graph	Reorganised the code into a package.
			moinformat.py moinformat/__init__.py (file) moinformat/serialisers.py (file) moinformat/tree.py (file) tests/test_parser.py (file)