1.1 --- a/moinformat/__init__.py Fri May 05 17:39:31 2017 +0200
1.2 +++ b/moinformat/__init__.py Fri May 05 22:38:31 2017 +0200
1.3 @@ -19,12 +19,13 @@
1.4 this program. If not, see <http://www.gnu.org/licenses/>.
1.5 """
1.6
1.7 -from moinformat.parsing import ParserBase, TokenStream, get_patterns, new_block
1.8 +from moinformat.parsing import ParserBase, TokenStream, get_patterns, \
1.9 + init_formats, new_block
1.10 from moinformat.serialisers import serialise
1.11 from moinformat.tree import Break, DefItem, DefTerm, FontStyle, Heading, \
1.12 Larger, ListItem, Monospace, Region, Rule, Smaller, \
1.13 - Subscript, Superscript, TableAttr, TableAttrs, \
1.14 - TableCell, TableRow, Text, Underline
1.15 + Subscript, Superscript, Table, TableAttr, \
1.16 + TableAttrs, TableCell, TableRow, Text, Underline
1.17
1.18 # Regular expressions.
1.19
1.20 @@ -150,11 +151,11 @@
1.21
1.22 # Principal parser methods.
1.23
1.24 - def get_items(self, s):
1.25 + def get_items(self, s, pos=0):
1.26
1.27 - "Return a sequence of token items for 's'."
1.28 + "Return a sequence of token items for 's' and 'pos'."
1.29
1.30 - return TokenStream(s, self.patterns)
1.31 + return TokenStream(s, self.patterns, pos)
1.32
1.33 def parse(self, s):
1.34
1.35 @@ -183,9 +184,19 @@
1.36
1.37 "Parse the data provided by 'items' to populate a wiki 'region'."
1.38
1.39 + # Obtain a suitable token stream.
1.40 +
1.41 + items = self.replace_items(items)
1.42 +
1.43 + # Define a block to hold text and start parsing.
1.44 +
1.45 new_block(region)
1.46 self.parse_region_details(items, region, self.region_pattern_names)
1.47
1.48 + # Update the previous token stream.
1.49 +
1.50 + self.update_items(items)
1.51 +
1.52 # Parser methods supporting different page features.
1.53
1.54 def parse_attrname(self, items, attrs):
1.55 @@ -389,6 +400,16 @@
1.56
1.57 "Handle the start of a table row within 'region'."
1.58
1.59 + # Identify any active table.
1.60 +
1.61 + table = region.node(-2)
1.62 + block = region.node(-1)
1.63 +
1.64 + if not (isinstance(table, Table) and block.empty()):
1.65 + new_table = table = Table([])
1.66 + else:
1.67 + new_table = None
1.68 +
1.69 row = TableRow([])
1.70
1.71 while True:
1.72 @@ -424,7 +445,12 @@
1.73
1.74 row.append(cell)
1.75
1.76 - region.add(row)
1.77 + # Add the row to the table and any new table to the region.
1.78 +
1.79 + table.add(row)
1.80 + if new_table:
1.81 + region.add(new_table)
1.82 +
1.83 new_block(region)
1.84
1.85 def parse_valign(self, items, attrs):
1.86 @@ -544,6 +570,6 @@
1.87 # Top-level functions.
1.88
1.89 def parse(s, formats=None):
1.90 - return Parser(formats).parse(s)
1.91 + return Parser(init_formats(formats)).parse(s)
1.92
1.93 # vim: tabstop=4 expandtab shiftwidth=4
2.1 --- a/moinformat/parsing.py Fri May 05 17:39:31 2017 +0200
2.2 +++ b/moinformat/parsing.py Fri May 05 22:38:31 2017 +0200
2.3 @@ -24,12 +24,19 @@
2.4
2.5 # Pattern management.
2.6
2.7 +ws_excl_nl = r"[ \f\r\t\v]"
2.8 +
2.9 def get_patterns(syntax):
2.10
2.11 - "Define patterns for the regular expressions in the 'syntax' mapping."
2.12 + """
2.13 + Define patterns for the regular expressions in the 'syntax' mapping. In each
2.14 + pattern, replace \N with a pattern for matching whitespace excluding
2.15 + newlines.
2.16 + """
2.17
2.18 patterns = {}
2.19 for name, value in syntax.items():
2.20 + value = value.replace(r"\N", ws_excl_nl)
2.21 patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)
2.22 return patterns
2.23
2.24 @@ -37,10 +44,18 @@
2.25
2.26 "Combine 'patterns' with those defined by the given 'syntax' mapping."
2.27
2.28 - p = {}
2.29 - p.update(patterns)
2.30 - p.update(get_patterns(syntax))
2.31 - return p
2.32 + return combine_dicts([patterns, get_patterns(syntax)])
2.33 +
2.34 +def combine_dicts(dicts):
2.35 +
2.36 + "Combine the given 'dicts'."
2.37 +
2.38 + combined = {}
2.39 + for d in dicts:
2.40 + combined.update(d)
2.41 + return combined
2.42 +
2.43 +
2.44
2.45 # Tokenising functions.
2.46
2.47 @@ -48,10 +63,10 @@
2.48
2.49 "A stream of tokens taken from a string."
2.50
2.51 - def __init__(self, s, patterns):
2.52 + def __init__(self, s, patterns, pos=0):
2.53 self.s = s
2.54 self.patterns = patterns
2.55 - self.pos = 0
2.56 + self.pos = pos
2.57 self.match = None
2.58 self.matching = None
2.59
2.60 @@ -136,13 +151,27 @@
2.61 """
2.62
2.63 self.formats = formats
2.64 + self.replaced_items = None
2.65
2.66 - def get_items(self, s):
2.67 + def get_items(self, s, pos=0):
2.68
2.69 - "Return a sequence of token items for 's'."
2.70 + "Return a sequence of token items for 's' and 'pos'."
2.71
2.72 raise NotImplementedError
2.73
2.74 + def replace_items(self, items):
2.75 +
2.76 + "Replace the given 'items' with a sequence employing the same state."
2.77 +
2.78 + self.replaced_items = items
2.79 + return self.get_items(items.s, items.pos)
2.80 +
2.81 + def update_items(self, items):
2.82 +
2.83 + "Update the state of the replaced items with that of 'items'."
2.84 +
2.85 + self.replaced_items.pos = items.pos
2.86 +
2.87 def parse(self, s):
2.88
2.89 """
2.90 @@ -249,4 +278,20 @@
2.91
2.92 raise StopIteration
2.93
2.94 +
2.95 +# Format mapping initialisation.
2.96 +
2.97 +def init_formats(formats):
2.98 +
2.99 + """
2.100 + Convert the given 'formats' mapping from a name-to-class mapping to a
2.101 + name-to-instance mapping with each parser instance employing the format
2.102 + mapping itself. Return the converted mapping.
2.103 + """
2.104 +
2.105 + d = {}
2.106 + for name, cls in formats.items():
2.107 + d[name] = cls(d)
2.108 + return d
2.109 +
2.110 # vim: tabstop=4 expandtab shiftwidth=4
4.1 --- a/moinformat/tree.py Fri May 05 17:39:31 2017 +0200
4.2 +++ b/moinformat/tree.py Fri May 05 22:38:31 2017 +0200
4.3 @@ -296,6 +296,22 @@
4.4 self._to_string(out)
4.5 out.end_table_attrs()
4.6
4.7 +class Table(Container):
4.8 +
4.9 + "A table."
4.10 +
4.11 + def __repr__(self):
4.12 + return "Table(%r)" % self.nodes
4.13 +
4.14 + def prettyprint(self, indent=""):
4.15 + l = ["%sTable:" % indent]
4.16 + return self._prettyprint(l, indent)
4.17 +
4.18 + def to_string(self, out):
4.19 + out.start_table()
4.20 + self._to_string(out)
4.21 + out.end_table()
4.22 +
4.23 class TableCell(Container):
4.24
4.25 "A table cell."
5.1 --- a/tests/test_parser.py Fri May 05 17:39:31 2017 +0200
5.2 +++ b/tests/test_parser.py Fri May 05 22:38:31 2017 +0200
5.3 @@ -1,6 +1,7 @@
5.4 #!/usr/bin/env python
5.5
5.6 from moinformat import parse
5.7 +from moinformat.parsers import table
5.8 from moinformat.serialisers import serialise, HTMLSerialiser
5.9 from glob import glob
5.10 from os.path import join, split
5.11 @@ -8,8 +9,12 @@
5.12
5.13 dirname = split(sys.argv[0])[0]
5.14
5.15 +formats = {
5.16 + "table" : table.TableParser,
5.17 + }
5.18 +
5.19 def test_input(s):
5.20 - d = parse(s)
5.21 + d = parse(s, formats)
5.22 o = serialise(d)
5.23
5.24 print o == s