1.1 --- a/moinformat/__init__.py Thu May 04 21:41:13 2017 +0200
1.2 +++ b/moinformat/__init__.py Thu May 04 22:39:00 2017 +0200
1.3 @@ -19,11 +19,13 @@
1.4 this program. If not, see <http://www.gnu.org/licenses/>.
1.5 """
1.6
1.7 +from moinformat.parsing import ParserBase, TokenStream, new_block
1.8 from moinformat.serialisers import serialise
1.9 -from moinformat.tree import Block, Break, DefItem, DefTerm, FontStyle, Heading, \
1.10 +from moinformat.tree import Break, DefItem, DefTerm, FontStyle, Heading, \
1.11 Larger, ListItem, Monospace, Region, Rule, Smaller, \
1.12 Subscript, Superscript, TableAttr, TableAttrs, \
1.13 TableCell, TableRow, Text, Underline
1.14 +
1.15 import re
1.16
1.17 # Regular expressions.
1.18 @@ -120,141 +122,60 @@
1.19
1.20
1.21
1.22 -# Tokenising functions.
1.23 -
1.24 -class TokenStream:
1.25 -
1.26 - "A stream of tokens taken from a string."
1.27 +class Parser(ParserBase):
1.28
1.29 - def __init__(self, s):
1.30 - self.s = s
1.31 - self.pos = 0
1.32 - self.match = None
1.33 - self.matching = None
1.34 + "A wiki region parser."
1.35
1.36 - def rewind(self, length):
1.37 -
1.38 - "Rewind in the string by 'length'."
1.39 -
1.40 - self.pos -= min(length, self.pos)
1.41 -
1.42 - def read_until(self, pattern_names, remaining=True):
1.43 + def __init__(self, formats=None):
1.44
1.45 """
1.46 - Find the first match for the given 'pattern_names'. Return the text
1.47 - preceding any match, the remaining text if no match was found, or None
1.48 - if no match was found and 'remaining' is given as a false value.
1.49 + Initialise the parser with any given 'formats' mapping from region type
1.50 + names to parser objects.
1.51 """
1.52
1.53 - first = None
1.54 - self.matching = None
1.55 -
1.56 - # Find the first matching pattern.
1.57 -
1.58 - for pattern_name in pattern_names:
1.59 - match = patterns[pattern_name].search(self.s, self.pos)
1.60 - if match:
1.61 - start, end = match.span()
1.62 - if self.matching is None or start < first:
1.63 - first = start
1.64 - self.matching = pattern_name
1.65 - self.match = match
1.66 + formats = {"wiki" : self}
1.67 + if formats:
1.68 + formats.update(formats)
1.69
1.70 - if self.matching is None:
1.71 - if remaining:
1.72 - return self.s[self.pos:]
1.73 - else:
1.74 - return None
1.75 - else:
1.76 - return self.s[self.pos:first]
1.77 -
1.78 - def read_match(self, group=1):
1.79 -
1.80 - """
1.81 - Return the matched text, updating the position in the stream. If 'group'
1.82 - is specified, the indicated group in a match will be returned.
1.83 - Typically, group 1 should contain all pertinent data, but groups defined
1.84 - within group 1 can provide sections of the data.
1.85 - """
1.86 + ParserBase.__init__(self, formats)
1.87
1.88 - if self.match:
1.89 - _start, self.pos = self.match.span()
1.90 - try:
1.91 - return self.match.group(group)
1.92 - except IndexError:
1.93 - return ""
1.94 - else:
1.95 - self.pos = len(self.s)
1.96 - return None
1.97 -
1.98 -
1.99 -
1.100 -# Utility functions.
1.101 -
1.102 -def new_block(region):
1.103 + def get_items(self, s):
1.104
1.105 - "Start a new block in 'region'."
1.106 -
1.107 - block = Block([])
1.108 - region.add(block)
1.109 -
1.110 -
1.111 + "Return a sequence of token items for 's'."
1.112
1.113 -# Parser abstraction.
1.114 -
1.115 -class Parser:
1.116 -
1.117 - "An extensible parser."
1.118 -
1.119 - def __init__(self, formats=None):
1.120 - self.formats = formats
1.121 + return TokenStream(s, patterns)
1.122
1.123 # Principal parser methods.
1.124
1.125 - def parse_page(self, s):
1.126 + def parse(self, s):
1.127
1.128 """
1.129 Parse page text 's'. Pages consist of regions delimited by markers.
1.130 """
1.131
1.132 - return self.parse_region(TokenStream(s))
1.133 -
1.134 - def parse_region(self, items, level=0, indent=0):
1.135 + items = self.get_items(s)
1.136 + region = Region([])
1.137
1.138 - """
1.139 - Parse the data provided by 'items' to populate a region with the given
1.140 - 'level' at the given 'indent'.
1.141 - """
1.142 -
1.143 - region = Region([], level, indent)
1.144 -
1.145 - # Parse section headers.
1.146 + # Parse page header.
1.147
1.148 self.parse_region_header(items, region)
1.149
1.150 - # Parse section body.
1.151 + # Handle pages directly with this parser.
1.152 + # Otherwise, test the type and find an appropriate parser.
1.153
1.154 - if region.is_transparent():
1.155 - self.parse_region_wiki(items, region)
1.156 + if not region.type:
1.157 + self.parse_region_content(items, region)
1.158 else:
1.159 - self.parse_region_opaque(items, region)
1.160 + self.parse_region_type(items, region)
1.161
1.162 return region
1.163
1.164 - def parse_region_header(self, items, region):
1.165 -
1.166 - """
1.167 - Parse the region header from the 'items', setting it for the given 'region'.
1.168 - """
1.169 -
1.170 - if items.read_until(["header"], False) == "": # None means no header
1.171 - region.type = items.read_match()
1.172 -
1.173 - def parse_region_wiki(self, items, region):
1.174 + def parse_region_content(self, items, region):
1.175
1.176 "Parse the data provided by 'items' to populate a wiki 'region'."
1.177
1.178 new_block(region)
1.179 +
1.180 self.parse_region_details(items, region, inline_pattern_names + [
1.181 "break", "heading",
1.182 "defterm", "defterm_empty",
1.183 @@ -265,12 +186,6 @@
1.184 "tablerow",
1.185 ])
1.186
1.187 - def parse_region_opaque(self, items, region):
1.188 -
1.189 - "Parse the data provided by 'items' to populate an opaque 'region'."
1.190 -
1.191 - self.parse_region_details(items, region, ["regionend"])
1.192 -
1.193 # Parser methods supporting different page features.
1.194
1.195 def parse_attrname(self, items, attrs):
1.196 @@ -575,52 +490,9 @@
1.197
1.198
1.199
1.200 - # Parsing utilities.
1.201 -
1.202 - def parse_region_details(self, items, region, pattern_names):
1.203 -
1.204 - "Parse 'items' within 'region' searching using 'pattern_names'."
1.205 -
1.206 - try:
1.207 - while True:
1.208 -
1.209 - # Obtain text before any marker or the end of the input.
1.210 -
1.211 - preceding = items.read_until(pattern_names)
1.212 - if preceding:
1.213 - region.append_inline(Text(preceding))
1.214 -
1.215 - # End of input.
1.216 -
1.217 - if not items.matching:
1.218 - break
1.219 -
1.220 - # Obtain any feature.
1.221 + # Pattern handlers.
1.222
1.223 - feature = items.read_match()
1.224 - handler = self.handlers.get(items.matching)
1.225 -
1.226 - # Handle each feature or add text to the region.
1.227 -
1.228 - if handler:
1.229 - handler(self, items, region)
1.230 - else:
1.231 - region.append_inline(Text(feature))
1.232 -
1.233 - except StopIteration:
1.234 - pass
1.235 -
1.236 - region.normalise()
1.237 -
1.238 - def end_region(self, items, region):
1.239 -
1.240 - "End the parsing of 'region', breaking out of the parsing loop."
1.241 -
1.242 - raise StopIteration
1.243 -
1.244 -
1.245 -
1.246 - # Pattern handlers.
1.247 + end_region = ParserBase.end_region
1.248
1.249 handlers = {
1.250 None : end_region,
1.251 @@ -672,6 +544,6 @@
1.252 # Top-level functions.
1.253
1.254 def parse(s, formats=None):
1.255 - return Parser(formats).parse_page(s)
1.256 + return Parser(formats).parse(s)
1.257
1.258 # vim: tabstop=4 expandtab shiftwidth=4