1.1 --- a/moinformat.py Fri Apr 28 18:56:50 2017 +0200
1.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
1.3 @@ -1,530 +0,0 @@
1.4 -#!/usr/bin/env python
1.5 -
1.6 -"""
1.7 -Moin wiki format parser.
1.8 -
1.9 -Copyright (C) 2012, 2013, 2015, 2017 Paul Boddie <paul@boddie.org.uk>
1.10 -
1.11 -This program is free software; you can redistribute it and/or modify it under
1.12 -the terms of the GNU General Public License as published by the Free Software
1.13 -Foundation; either version 3 of the License, or (at your option) any later
1.14 -version.
1.15 -
1.16 -This program is distributed in the hope that it will be useful, but WITHOUT
1.17 -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1.18 -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
1.19 -details.
1.20 -
1.21 -You should have received a copy of the GNU General Public License along with
1.22 -this program. If not, see <http://www.gnu.org/licenses/>.
1.23 -"""
1.24 -
1.25 -from cgi import escape
1.26 -import re
1.27 -
1.28 -# Regular expressions.
1.29 -
1.30 -syntax = {
1.31 - # Page regions:
1.32 - "regionstart" : (r"((^\s*)([{]{3,}))", re.MULTILINE | re.DOTALL), # {{{...
1.33 - "regionend" : (r"^\s*([}]{3,})", re.MULTILINE | re.DOTALL), # }}}...
1.34 - "header" : (r"#!(.*?)\n", 0), # #! char-excl-nl
1.35 -
1.36 - # Region contents:
1.37 - "break" : (r"^(\s*?)\n", re.MULTILINE), # blank line
1.38 - "listitem" : (r"^((\s+)([*]|\d+[.]))", re.MULTILINE), # indent (list-item or number-item)
1.39 -
1.40 - # List contents:
1.41 - "listitemend" : (r"^", re.MULTILINE), # next line
1.42 - }
1.43 -
1.44 -# Define patterns for the regular expressions.
1.45 -
1.46 -patterns = {}
1.47 -for name, (value, flags) in syntax.items():
1.48 - patterns[name] = re.compile(value, re.UNICODE | flags)
1.49 -
1.50 -
1.51 -
1.52 -# Document nodes.
1.53 -
1.54 -class Container:
1.55 -
1.56 - "A container of document nodes."
1.57 -
1.58 - def __init__(self, nodes):
1.59 - self.nodes = nodes
1.60 -
1.61 - def append(self, node):
1.62 - self.nodes.append(node)
1.63 -
1.64 - append_text = append
1.65 -
1.66 - def empty(self):
1.67 - return not self.nodes
1.68 -
1.69 - def normalise(self):
1.70 -
1.71 - "Combine adjacent text nodes."
1.72 -
1.73 - nodes = self.nodes
1.74 - self.nodes = []
1.75 - text = None
1.76 -
1.77 - for node in nodes:
1.78 -
1.79 - # Open a text node or merge text into an open node.
1.80 -
1.81 - if isinstance(node, Text):
1.82 - if not text:
1.83 - text = node
1.84 - else:
1.85 - text.merge(node)
1.86 -
1.87 - # Close any open text node and append the current node.
1.88 -
1.89 - else:
1.90 - if text:
1.91 - self.append(text)
1.92 - text = None
1.93 - self.append(node)
1.94 -
1.95 - # Add any open text node.
1.96 -
1.97 - if text:
1.98 - self.append(text)
1.99 -
1.100 - def __str__(self):
1.101 - return self.prettyprint()
1.102 -
1.103 - def prettyprint(self, indent=""):
1.104 - pass
1.105 -
1.106 -class Region(Container):
1.107 -
1.108 - "A region of the page."
1.109 -
1.110 - transparent_region_types = ["wiki"]
1.111 -
1.112 - def __init__(self, nodes, level=0, indent=0, type=None):
1.113 - Container.__init__(self, nodes)
1.114 - self.level = level
1.115 - self.indent = indent
1.116 - self.type = type
1.117 -
1.118 - def append(self, node):
1.119 - last = self.nodes and self.nodes[-1]
1.120 - if last and last.empty():
1.121 - self.nodes[-1] = node
1.122 - else:
1.123 - self.nodes.append(node)
1.124 -
1.125 - def append_text(self, s):
1.126 - if self.is_transparent():
1.127 - self.nodes[-1].append(s)
1.128 - else:
1.129 - self.append(s)
1.130 -
1.131 - def have_end(self, s):
1.132 - return self.level and s.startswith("}") and self.level == len(s)
1.133 -
1.134 - def is_transparent(self):
1.135 - return not self.level or self.type in self.transparent_region_types
1.136 -
1.137 - def __repr__(self):
1.138 - return "Region(%r, %r, %r, %r)" % (self.nodes, self.level, self.indent, self.type)
1.139 -
1.140 - def prettyprint(self, indent=""):
1.141 - l = ["%sRegion: level=%d indent=%d type=%s" % (indent, self.level, self.indent, self.type)]
1.142 - for node in self.nodes:
1.143 - l.append(node.prettyprint(indent + " "))
1.144 - return "\n".join(l)
1.145 -
1.146 - def to_string(self, out):
1.147 - out.start_region(self.level, self.indent, self.type)
1.148 - for node in self.nodes:
1.149 - node.to_string(out)
1.150 - out.end_region(self.level, self.indent, self.type)
1.151 -
1.152 -class Block(Container):
1.153 -
1.154 - "A block in the page."
1.155 -
1.156 - def __init__(self, nodes, final=True):
1.157 - Container.__init__(self, nodes)
1.158 - self.final = final
1.159 -
1.160 - def __repr__(self):
1.161 - return "Block(%r)" % self.nodes
1.162 -
1.163 - def prettyprint(self, indent=""):
1.164 - l = ["%sBlock: final=%s" % (indent, self.final)]
1.165 - for node in self.nodes:
1.166 - l.append(node.prettyprint(indent + " "))
1.167 - return "\n".join(l)
1.168 -
1.169 - def to_string(self, out):
1.170 - out.start_block(self.final)
1.171 - for node in self.nodes:
1.172 - node.to_string(out)
1.173 - out.end_block(self.final)
1.174 -
1.175 -class ListItem(Container):
1.176 -
1.177 - "A list item."
1.178 -
1.179 - def __repr__(self):
1.180 - return "ListItem(%r)" % self.nodes
1.181 -
1.182 - def prettyprint(self, indent=""):
1.183 - l = ["%sListItem:" % indent]
1.184 - for node in self.nodes:
1.185 - l.append(node.prettyprint(indent + " "))
1.186 - return "\n".join(l)
1.187 -
1.188 - def to_string(self, out):
1.189 - out.start_listitem()
1.190 - for node in self.nodes:
1.191 - node.to_string(out)
1.192 - out.end_listitem()
1.193 -
1.194 -
1.195 -class Text:
1.196 -
1.197 - "A text node."
1.198 -
1.199 - def __init__(self, s):
1.200 - self.s = s
1.201 -
1.202 - def empty(self):
1.203 - return not self.s
1.204 -
1.205 - def merge(self, text):
1.206 - self.s += text.s
1.207 -
1.208 - def __repr__(self):
1.209 - return "Text(%r)" % self.s
1.210 -
1.211 - def prettyprint(self, indent=""):
1.212 - return "%sText: %r" % (indent, self.s)
1.213 -
1.214 - def to_string(self, out):
1.215 - out.text(self.s)
1.216 -
1.217 -
1.218 -
1.219 -# Serialisation.
1.220 -
1.221 -class Serialiser:
1.222 -
1.223 - "General serialisation support."
1.224 -
1.225 - def __init__(self, out):
1.226 - self.out = out
1.227 -
1.228 -class MoinSerialiser(Serialiser):
1.229 -
1.230 - "Serialisation of the page."
1.231 -
1.232 - def start_region(self, level, indent, type):
1.233 - out = self.out
1.234 - if level:
1.235 - out(" " * indent + "{" * level)
1.236 - if type and level:
1.237 - out("#!%s\n" % type)
1.238 -
1.239 - def end_region(self, level, indent, type):
1.240 - out = self.out
1.241 - if level:
1.242 - out("}" * level)
1.243 -
1.244 - def start_block(self, final):
1.245 - pass
1.246 -
1.247 - def end_block(self, final):
1.248 - if not final:
1.249 - self.out("\n")
1.250 -
1.251 - def start_listitem(self):
1.252 - self.out(" *")
1.253 -
1.254 - def end_listitem(self):
1.255 - pass
1.256 -
1.257 - def text(self, s):
1.258 - self.out(s)
1.259 -
1.260 -class HTMLSerialiser(Serialiser):
1.261 -
1.262 - "Serialisation of the page."
1.263 -
1.264 - def start_region(self, level, indent, type):
1.265 - l = []
1.266 - out = l.append
1.267 - if level:
1.268 - out("level-%d" % level)
1.269 -
1.270 - if indent:
1.271 - out("indent-%d" % indent)
1.272 -
1.273 - # NOTE: Encode type details for CSS.
1.274 -
1.275 - if type:
1.276 - out("type-%s" % escape(type, True))
1.277 -
1.278 - self.out("<span class='%s'>" % " ".join(l))
1.279 -
1.280 - def end_region(self, level, indent, type):
1.281 - self.out("</span>")
1.282 -
1.283 - def start_block(self, final):
1.284 - self.out("<p>")
1.285 -
1.286 - def end_block(self, final):
1.287 - self.out("</p>")
1.288 -
1.289 - def start_listitem(self):
1.290 - self.out("<li>")
1.291 -
1.292 - def end_listitem(self):
1.293 - self.out("</li>")
1.294 -
1.295 - def text(self, s):
1.296 - self.out(escape(s))
1.297 -
1.298 -
1.299 -
1.300 -# Tokenising functions.
1.301 -
1.302 -class TokenStream:
1.303 -
1.304 - "A stream of tokens taken from a string."
1.305 -
1.306 - def __init__(self, s):
1.307 - self.s = s
1.308 - self.pos = 0
1.309 - self.match = None
1.310 - self.matching = None
1.311 -
1.312 - def read_until(self, pattern_names, remaining=True):
1.313 -
1.314 - """
1.315 - Find the first match for the given 'pattern_names'. Return the text
1.316 - preceding any match, the remaining text if no match was found, or None
1.317 - if no match was found and 'remaining' is given as a false value.
1.318 - """
1.319 -
1.320 - first = None
1.321 - self.matching = None
1.322 -
1.323 - # Find the first matching pattern.
1.324 -
1.325 - for pattern_name in pattern_names:
1.326 - match = patterns[pattern_name].search(self.s, self.pos)
1.327 - if match:
1.328 - start, end = match.span()
1.329 - if self.matching is None or start < first:
1.330 - first = start
1.331 - self.matching = pattern_name
1.332 - self.match = match
1.333 -
1.334 - if self.matching is None:
1.335 - if remaining:
1.336 - return self.s[self.pos:]
1.337 - else:
1.338 - return None
1.339 - else:
1.340 - return self.s[self.pos:first]
1.341 -
1.342 - def read_match(self, group=1):
1.343 -
1.344 - """
1.345 - Return the matched text, updating the position in the stream. If 'group'
1.346 - is specified, the indicated group in a match will be returned.
1.347 - Typically, group 1 should contain all pertinent data, but groups defined
1.348 - within group 1 can provide sections of the data.
1.349 - """
1.350 -
1.351 - if self.match:
1.352 - _start, self.pos = self.match.span()
1.353 - try:
1.354 - return self.match.group(group)
1.355 - except IndexError:
1.356 - return ""
1.357 - else:
1.358 - self.pos = len(self.s)
1.359 - return None
1.360 -
1.361 -
1.362 -
1.363 -# Parser functions.
1.364 -
1.365 -def parse_page(s):
1.366 -
1.367 - """
1.368 - Parse page text 's'. Pages consist of regions delimited by markers.
1.369 - """
1.370 -
1.371 - return parse_region(TokenStream(s))
1.372 -
1.373 -def parse_region(items, level=0, indent=0):
1.374 -
1.375 - """
1.376 - Parse the data provided by 'items' to populate a region with the given
1.377 - 'level' at the given 'indent'.
1.378 - """
1.379 -
1.380 - region = Region([], level, indent)
1.381 -
1.382 - # Parse section headers.
1.383 -
1.384 - parse_region_header(items, region)
1.385 -
1.386 - # Parse section body.
1.387 -
1.388 - if region.is_transparent():
1.389 - parse_region_wiki(items, region)
1.390 - else:
1.391 - parse_region_opaque(items, region)
1.392 -
1.393 - return region
1.394 -
1.395 -def parse_region_header(items, region):
1.396 -
1.397 - """
1.398 - Parse the region header from the 'items', setting it for the given 'region'.
1.399 - """
1.400 -
1.401 - if items.read_until(["header"], False) == "": # None means no header
1.402 - region.type = items.read_match()
1.403 -
1.404 -def parse_region_wiki(items, region):
1.405 -
1.406 - "Parse the data provided by 'items' to populate a wiki 'region'."
1.407 -
1.408 - new_block(region)
1.409 - parse_region_details(items, region, ["break", "listitem", "regionstart", "regionend"])
1.410 -
1.411 -def parse_region_opaque(items, region):
1.412 -
1.413 - "Parse the data provided by 'items' to populate an opaque 'region'."
1.414 -
1.415 - parse_region_details(items, region, ["regionend"])
1.416 -
1.417 -def parse_region_details(items, region, pattern_names):
1.418 -
1.419 - "Parse 'items' within 'region' searching using 'pattern_names'."
1.420 -
1.421 - try:
1.422 - while True:
1.423 -
1.424 - # Obtain text before any marker or the end of the input.
1.425 -
1.426 - preceding = items.read_until(pattern_names)
1.427 - if preceding:
1.428 - region.append_text(Text(preceding))
1.429 -
1.430 - # End of input.
1.431 -
1.432 - if not items.matching:
1.433 - break
1.434 -
1.435 - # Obtain any feature.
1.436 -
1.437 - feature = items.read_match()
1.438 - handler = handlers.get(items.matching)
1.439 -
1.440 - # Handle each feature or add text to the region.
1.441 -
1.442 - if handler:
1.443 - handler(items, region)
1.444 - else:
1.445 - region.append_text(Text(feature))
1.446 -
1.447 - except StopIteration:
1.448 - pass
1.449 -
1.450 - region.normalise()
1.451 -
1.452 -def end_region(items, region):
1.453 -
1.454 - "End the parsing of 'region'."
1.455 -
1.456 - raise StopIteration
1.457 -
1.458 -def parse_break(items, region):
1.459 -
1.460 - "Handle a paragraph break within 'region'."
1.461 -
1.462 - # Mark any previous block as not being the final one in a sequence.
1.463 -
1.464 - block = region.nodes[-1]
1.465 - block.final = False
1.466 - new_block(region)
1.467 -
1.468 -def parse_listitem_end(items, region):
1.469 -
1.470 - "Handle the end of a list."
1.471 -
1.472 - raise StopIteration
1.473 -
1.474 -def parse_listitem(items, region):
1.475 -
1.476 - "Handle a list item marker within 'region'."
1.477 -
1.478 - item = ListItem([])
1.479 - parse_region_details(items, item, ["listitemend"])
1.480 - region.append(item)
1.481 - new_block(region)
1.482 -
1.483 -def parse_section(items, region):
1.484 -
1.485 - "Handle the start of a new section within 'region'."
1.486 -
1.487 - # Parse the section and start a new block after the section.
1.488 -
1.489 - indent = len(items.read_match(2))
1.490 - level = len(items.read_match(3))
1.491 - region.append(parse_region(items, level, indent))
1.492 - new_block(region)
1.493 -
1.494 -def parse_section_end(items, region):
1.495 -
1.496 - "Handle the end of a new section within 'region'."
1.497 -
1.498 - feature = items.read_match()
1.499 - if region.have_end(feature):
1.500 - raise StopIteration
1.501 - else:
1.502 - region.append_text(Text(feature))
1.503 -
1.504 -# Pattern handlers.
1.505 -
1.506 -handlers = {
1.507 - None : end_region,
1.508 - "break" : parse_break,
1.509 - "listitemend" : parse_listitem_end,
1.510 - "listitem" : parse_listitem,
1.511 - "regionstart" : parse_section,
1.512 - "regionend" : parse_section_end,
1.513 - }
1.514 -
1.515 -def new_block(region):
1.516 -
1.517 - "Start a new block in 'region'."
1.518 -
1.519 - block = Block([])
1.520 - region.append(block)
1.521 -
1.522 -
1.523 -
1.524 -# Top-level functions.
1.525 -
1.526 -parse = parse_page
1.527 -
1.528 -def serialise(doc, serialiser=MoinSerialiser):
1.529 - l = []
1.530 - doc.to_string(serialiser(l.append))
1.531 - return "".join(l)
1.532 -
1.533 -# vim: tabstop=4 expandtab shiftwidth=4
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/moinformat/__init__.py Sat Apr 29 17:47:03 2017 +0200
2.3 @@ -0,0 +1,277 @@
2.4 +#!/usr/bin/env python
2.5 +
2.6 +"""
2.7 +Moin wiki format parser.
2.8 +
2.9 +Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
2.10 +
2.11 +This program is free software; you can redistribute it and/or modify it under
2.12 +the terms of the GNU General Public License as published by the Free Software
2.13 +Foundation; either version 3 of the License, or (at your option) any later
2.14 +version.
2.15 +
2.16 +This program is distributed in the hope that it will be useful, but WITHOUT
2.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
2.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
2.19 +details.
2.20 +
2.21 +You should have received a copy of the GNU General Public License along with
2.22 +this program. If not, see <http://www.gnu.org/licenses/>.
2.23 +"""
2.24 +
2.25 +from moinformat.tree import Region, Block, ListItem, Text
2.26 +import re
2.27 +
2.28 +# Regular expressions.
2.29 +
2.30 +syntax = {
2.31 + # Page regions:
2.32 + "regionstart" : (r"((^\s*)([{]{3,}))", re.MULTILINE | re.DOTALL), # {{{...
2.33 + "regionend" : (r"^\s*([}]{3,})", re.MULTILINE | re.DOTALL), # }}}...
2.34 + "header" : (r"#!(.*?)\n", 0), # #! char-excl-nl
2.35 +
2.36 + # Region contents:
2.37 + "break" : (r"^(\s*?)\n", re.MULTILINE), # blank line
2.38 + "listitem" : (r"^((\s+)([*]|\d+[.]))", re.MULTILINE), # indent (list-item or number-item)
2.39 +
2.40 + # List contents:
2.41 + "listitemend" : (r"^", re.MULTILINE), # next line
2.42 + }
2.43 +
2.44 +# Define patterns for the regular expressions.
2.45 +
2.46 +patterns = {}
2.47 +for name, (value, flags) in syntax.items():
2.48 + patterns[name] = re.compile(value, re.UNICODE | flags)
2.49 +
2.50 +
2.51 +
2.52 +# Tokenising functions.
2.53 +
2.54 +class TokenStream:
2.55 +
2.56 + "A stream of tokens taken from a string."
2.57 +
2.58 + def __init__(self, s):
2.59 + self.s = s
2.60 + self.pos = 0
2.61 + self.match = None
2.62 + self.matching = None
2.63 +
2.64 + def read_until(self, pattern_names, remaining=True):
2.65 +
2.66 + """
2.67 + Find the first match for the given 'pattern_names'. Return the text
2.68 + preceding any match, the remaining text if no match was found, or None
2.69 + if no match was found and 'remaining' is given as a false value.
2.70 + """
2.71 +
2.72 + first = None
2.73 + self.matching = None
2.74 +
2.75 + # Find the first matching pattern.
2.76 +
2.77 + for pattern_name in pattern_names:
2.78 + match = patterns[pattern_name].search(self.s, self.pos)
2.79 + if match:
2.80 + start, end = match.span()
2.81 + if self.matching is None or start < first:
2.82 + first = start
2.83 + self.matching = pattern_name
2.84 + self.match = match
2.85 +
2.86 + if self.matching is None:
2.87 + if remaining:
2.88 + return self.s[self.pos:]
2.89 + else:
2.90 + return None
2.91 + else:
2.92 + return self.s[self.pos:first]
2.93 +
2.94 + def read_match(self, group=1):
2.95 +
2.96 + """
2.97 + Return the matched text, updating the position in the stream. If 'group'
2.98 + is specified, the indicated group in a match will be returned.
2.99 + Typically, group 1 should contain all pertinent data, but groups defined
2.100 + within group 1 can provide sections of the data.
2.101 + """
2.102 +
2.103 + if self.match:
2.104 + _start, self.pos = self.match.span()
2.105 + try:
2.106 + return self.match.group(group)
2.107 + except IndexError:
2.108 + return ""
2.109 + else:
2.110 + self.pos = len(self.s)
2.111 + return None
2.112 +
2.113 +
2.114 +
2.115 +# Parser functions.
2.116 +
2.117 +def parse_page(s):
2.118 +
2.119 + """
2.120 + Parse page text 's'. Pages consist of regions delimited by markers.
2.121 + """
2.122 +
2.123 + return parse_region(TokenStream(s))
2.124 +
2.125 +def parse_region(items, level=0, indent=0):
2.126 +
2.127 + """
2.128 + Parse the data provided by 'items' to populate a region with the given
2.129 + 'level' at the given 'indent'.
2.130 + """
2.131 +
2.132 + region = Region([], level, indent)
2.133 +
2.134 + # Parse section headers.
2.135 +
2.136 + parse_region_header(items, region)
2.137 +
2.138 + # Parse section body.
2.139 +
2.140 + if region.is_transparent():
2.141 + parse_region_wiki(items, region)
2.142 + else:
2.143 + parse_region_opaque(items, region)
2.144 +
2.145 + return region
2.146 +
2.147 +def parse_region_header(items, region):
2.148 +
2.149 + """
2.150 + Parse the region header from the 'items', setting it for the given 'region'.
2.151 + """
2.152 +
2.153 + if items.read_until(["header"], False) == "": # None means no header
2.154 + region.type = items.read_match()
2.155 +
2.156 +def parse_region_wiki(items, region):
2.157 +
2.158 + "Parse the data provided by 'items' to populate a wiki 'region'."
2.159 +
2.160 + new_block(region)
2.161 + parse_region_details(items, region, ["break", "listitem", "regionstart", "regionend"])
2.162 +
2.163 +def parse_region_opaque(items, region):
2.164 +
2.165 + "Parse the data provided by 'items' to populate an opaque 'region'."
2.166 +
2.167 + parse_region_details(items, region, ["regionend"])
2.168 +
2.169 +def parse_region_details(items, region, pattern_names):
2.170 +
2.171 + "Parse 'items' within 'region' searching using 'pattern_names'."
2.172 +
2.173 + try:
2.174 + while True:
2.175 +
2.176 + # Obtain text before any marker or the end of the input.
2.177 +
2.178 + preceding = items.read_until(pattern_names)
2.179 + if preceding:
2.180 + region.append_text(Text(preceding))
2.181 +
2.182 + # End of input.
2.183 +
2.184 + if not items.matching:
2.185 + break
2.186 +
2.187 + # Obtain any feature.
2.188 +
2.189 + feature = items.read_match()
2.190 + handler = handlers.get(items.matching)
2.191 +
2.192 + # Handle each feature or add text to the region.
2.193 +
2.194 + if handler:
2.195 + handler(items, region)
2.196 + else:
2.197 + region.append_text(Text(feature))
2.198 +
2.199 + except StopIteration:
2.200 + pass
2.201 +
2.202 + region.normalise()
2.203 +
2.204 +def end_region(items, region):
2.205 +
2.206 + "End the parsing of 'region'."
2.207 +
2.208 + raise StopIteration
2.209 +
2.210 +def parse_break(items, region):
2.211 +
2.212 + "Handle a paragraph break within 'region'."
2.213 +
2.214 + # Mark any previous block as not being the final one in a sequence.
2.215 +
2.216 + block = region.nodes[-1]
2.217 + block.final = False
2.218 + new_block(region)
2.219 +
2.220 +def parse_listitem_end(items, region):
2.221 +
2.222 + "Handle the end of a list."
2.223 +
2.224 + raise StopIteration
2.225 +
2.226 +def parse_listitem(items, region):
2.227 +
2.228 + "Handle a list item marker within 'region'."
2.229 +
2.230 + item = ListItem([])
2.231 + parse_region_details(items, item, ["listitemend"])
2.232 + region.append(item)
2.233 + new_block(region)
2.234 +
2.235 +def parse_section(items, region):
2.236 +
2.237 + "Handle the start of a new section within 'region'."
2.238 +
2.239 + # Parse the section and start a new block after the section.
2.240 +
2.241 + indent = len(items.read_match(2))
2.242 + level = len(items.read_match(3))
2.243 + region.append(parse_region(items, level, indent))
2.244 + new_block(region)
2.245 +
2.246 +def parse_section_end(items, region):
2.247 +
2.248 + "Handle the end of a new section within 'region'."
2.249 +
2.250 + feature = items.read_match()
2.251 + if region.have_end(feature):
2.252 + raise StopIteration
2.253 + else:
2.254 + region.append_text(Text(feature))
2.255 +
2.256 +# Pattern handlers.
2.257 +
2.258 +handlers = {
2.259 + None : end_region,
2.260 + "break" : parse_break,
2.261 + "listitemend" : parse_listitem_end,
2.262 + "listitem" : parse_listitem,
2.263 + "regionstart" : parse_section,
2.264 + "regionend" : parse_section_end,
2.265 + }
2.266 +
2.267 +def new_block(region):
2.268 +
2.269 + "Start a new block in 'region'."
2.270 +
2.271 + block = Block([])
2.272 + region.append(block)
2.273 +
2.274 +
2.275 +
2.276 +# Top-level functions.
2.277 +
2.278 +parse = parse_page
2.279 +
2.280 +# vim: tabstop=4 expandtab shiftwidth=4
3.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
3.2 +++ b/moinformat/serialisers.py Sat Apr 29 17:47:03 2017 +0200
3.3 @@ -0,0 +1,108 @@
3.4 +#!/usr/bin/env python
3.5 +
3.6 +"""
3.7 +Moin wiki serialisers.
3.8 +
3.9 +Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
3.10 +
3.11 +This program is free software; you can redistribute it and/or modify it under
3.12 +the terms of the GNU General Public License as published by the Free Software
3.13 +Foundation; either version 3 of the License, or (at your option) any later
3.14 +version.
3.15 +
3.16 +This program is distributed in the hope that it will be useful, but WITHOUT
3.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
3.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
3.19 +details.
3.20 +
3.21 +You should have received a copy of the GNU General Public License along with
3.22 +this program. If not, see <http://www.gnu.org/licenses/>.
3.23 +"""
3.24 +
3.25 +from cgi import escape
3.26 +
3.27 +class Serialiser:
3.28 +
3.29 + "General serialisation support."
3.30 +
3.31 + def __init__(self, out):
3.32 + self.out = out
3.33 +
3.34 +class MoinSerialiser(Serialiser):
3.35 +
3.36 + "Serialisation of the page."
3.37 +
3.38 + def start_region(self, level, indent, type):
3.39 + out = self.out
3.40 + if level:
3.41 + out(" " * indent + "{" * level)
3.42 + if type and level:
3.43 + out("#!%s\n" % type)
3.44 +
3.45 + def end_region(self, level, indent, type):
3.46 + out = self.out
3.47 + if level:
3.48 + out("}" * level)
3.49 +
3.50 + def start_block(self, final):
3.51 + pass
3.52 +
3.53 + def end_block(self, final):
3.54 + if not final:
3.55 + self.out("\n")
3.56 +
3.57 + def start_listitem(self):
3.58 + self.out(" *")
3.59 +
3.60 + def end_listitem(self):
3.61 + pass
3.62 +
3.63 + def text(self, s):
3.64 + self.out(s)
3.65 +
3.66 +class HTMLSerialiser(Serialiser):
3.67 +
3.68 + "Serialisation of the page."
3.69 +
3.70 + def start_region(self, level, indent, type):
3.71 + l = []
3.72 + out = l.append
3.73 + if level:
3.74 + out("level-%d" % level)
3.75 +
3.76 + if indent:
3.77 + out("indent-%d" % indent)
3.78 +
3.79 + # NOTE: Encode type details for CSS.
3.80 +
3.81 + if type:
3.82 + out("type-%s" % escape(type, True))
3.83 +
3.84 + self.out("<span class='%s'>" % " ".join(l))
3.85 +
3.86 + def end_region(self, level, indent, type):
3.87 + self.out("</span>")
3.88 +
3.89 + def start_block(self, final):
3.90 + self.out("<p>")
3.91 +
3.92 + def end_block(self, final):
3.93 + self.out("</p>")
3.94 +
3.95 + def start_listitem(self):
3.96 + self.out("<li>")
3.97 +
3.98 + def end_listitem(self):
3.99 + self.out("</li>")
3.100 +
3.101 + def text(self, s):
3.102 + self.out(escape(s))
3.103 +
3.104 +# Top-level functions.
3.105 +
3.106 +def serialise(doc, serialiser=MoinSerialiser):
3.107 + l = []
3.108 + doc.to_string(serialiser(l.append))
3.109 + return "".join(l)
3.110 +
3.111 +# vim: tabstop=4 expandtab shiftwidth=4
4.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
4.2 +++ b/moinformat/tree.py Sat Apr 29 17:47:03 2017 +0200
4.3 @@ -0,0 +1,184 @@
4.4 +#!/usr/bin/env python
4.5 +
4.6 +"""
4.7 +Moin wiki format document tree nodes.
4.8 +
4.9 +Copyright (C) 2017 Paul Boddie <paul@boddie.org.uk>
4.10 +
4.11 +This program is free software; you can redistribute it and/or modify it under
4.12 +the terms of the GNU General Public License as published by the Free Software
4.13 +Foundation; either version 3 of the License, or (at your option) any later
4.14 +version.
4.15 +
4.16 +This program is distributed in the hope that it will be useful, but WITHOUT
4.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
4.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
4.19 +details.
4.20 +
4.21 +You should have received a copy of the GNU General Public License along with
4.22 +this program. If not, see <http://www.gnu.org/licenses/>.
4.23 +"""
4.24 +
4.25 +class Container:
4.26 +
4.27 + "A container of document nodes."
4.28 +
4.29 + def __init__(self, nodes):
4.30 + self.nodes = nodes
4.31 +
4.32 + def append(self, node):
4.33 + self.nodes.append(node)
4.34 +
4.35 + append_text = append
4.36 +
4.37 + def empty(self):
4.38 + return not self.nodes
4.39 +
4.40 + def normalise(self):
4.41 +
4.42 + "Combine adjacent text nodes."
4.43 +
4.44 + nodes = self.nodes
4.45 + self.nodes = []
4.46 + text = None
4.47 +
4.48 + for node in nodes:
4.49 +
4.50 + # Open a text node or merge text into an open node.
4.51 +
4.52 + if isinstance(node, Text):
4.53 + if not text:
4.54 + text = node
4.55 + else:
4.56 + text.merge(node)
4.57 +
4.58 + # Close any open text node and append the current node.
4.59 +
4.60 + else:
4.61 + if text:
4.62 + self.append(text)
4.63 + text = None
4.64 + self.append(node)
4.65 +
4.66 + # Add any open text node.
4.67 +
4.68 + if text:
4.69 + self.append(text)
4.70 +
4.71 + def __str__(self):
4.72 + return self.prettyprint()
4.73 +
4.74 + def prettyprint(self, indent=""):
4.75 + pass
4.76 +
4.77 +class Region(Container):
4.78 +
4.79 + "A region of the page."
4.80 +
4.81 + transparent_region_types = ["wiki"]
4.82 +
4.83 + def __init__(self, nodes, level=0, indent=0, type=None):
4.84 + Container.__init__(self, nodes)
4.85 + self.level = level
4.86 + self.indent = indent
4.87 + self.type = type
4.88 +
4.89 + def append(self, node):
4.90 + last = self.nodes and self.nodes[-1]
4.91 + if last and last.empty():
4.92 + self.nodes[-1] = node
4.93 + else:
4.94 + self.nodes.append(node)
4.95 +
4.96 + def append_text(self, s):
4.97 + if self.is_transparent():
4.98 + self.nodes[-1].append(s)
4.99 + else:
4.100 + self.append(s)
4.101 +
4.102 + def have_end(self, s):
4.103 + return self.level and s.startswith("}") and self.level == len(s)
4.104 +
4.105 + def is_transparent(self):
4.106 + return not self.level or self.type in self.transparent_region_types
4.107 +
4.108 + def __repr__(self):
4.109 + return "Region(%r, %r, %r, %r)" % (self.nodes, self.level, self.indent, self.type)
4.110 +
4.111 + def prettyprint(self, indent=""):
4.112 + l = ["%sRegion: level=%d indent=%d type=%s" % (indent, self.level, self.indent, self.type)]
4.113 + for node in self.nodes:
4.114 + l.append(node.prettyprint(indent + " "))
4.115 + return "\n".join(l)
4.116 +
4.117 + def to_string(self, out):
4.118 + out.start_region(self.level, self.indent, self.type)
4.119 + for node in self.nodes:
4.120 + node.to_string(out)
4.121 + out.end_region(self.level, self.indent, self.type)
4.122 +
4.123 +class Block(Container):
4.124 +
4.125 + "A block in the page."
4.126 +
4.127 + def __init__(self, nodes, final=True):
4.128 + Container.__init__(self, nodes)
4.129 + self.final = final
4.130 +
4.131 + def __repr__(self):
4.132 + return "Block(%r)" % self.nodes
4.133 +
4.134 + def prettyprint(self, indent=""):
4.135 + l = ["%sBlock: final=%s" % (indent, self.final)]
4.136 + for node in self.nodes:
4.137 + l.append(node.prettyprint(indent + " "))
4.138 + return "\n".join(l)
4.139 +
4.140 + def to_string(self, out):
4.141 + out.start_block(self.final)
4.142 + for node in self.nodes:
4.143 + node.to_string(out)
4.144 + out.end_block(self.final)
4.145 +
4.146 +class ListItem(Container):
4.147 +
4.148 + "A list item."
4.149 +
4.150 + def __repr__(self):
4.151 + return "ListItem(%r)" % self.nodes
4.152 +
4.153 + def prettyprint(self, indent=""):
4.154 + l = ["%sListItem:" % indent]
4.155 + for node in self.nodes:
4.156 + l.append(node.prettyprint(indent + " "))
4.157 + return "\n".join(l)
4.158 +
4.159 + def to_string(self, out):
4.160 + out.start_listitem()
4.161 + for node in self.nodes:
4.162 + node.to_string(out)
4.163 + out.end_listitem()
4.164 +
4.165 +class Text:
4.166 +
4.167 + "A text node."
4.168 +
4.169 + def __init__(self, s):
4.170 + self.s = s
4.171 +
4.172 + def empty(self):
4.173 + return not self.s
4.174 +
4.175 + def merge(self, text):
4.176 + self.s += text.s
4.177 +
4.178 + def __repr__(self):
4.179 + return "Text(%r)" % self.s
4.180 +
4.181 + def prettyprint(self, indent=""):
4.182 + return "%sText: %r" % (indent, self.s)
4.183 +
4.184 + def to_string(self, out):
4.185 + out.text(self.s)
4.186 +
4.187 +# vim: tabstop=4 expandtab shiftwidth=4
5.1 --- a/tests/test_parser.py Fri Apr 28 18:56:50 2017 +0200
5.2 +++ b/tests/test_parser.py Sat Apr 29 17:47:03 2017 +0200
5.3 @@ -1,6 +1,7 @@
5.4 #!/usr/bin/env python
5.5
5.6 -from moinformat import parse, serialise, HTMLSerialiser
5.7 +from moinformat import parse
5.8 +from moinformat.serialisers import serialise, HTMLSerialiser
5.9
5.10 s0 = """\
5.11 Hello