1.1 --- a/moinformat/parsers/common.py Wed Dec 13 00:50:09 2017 +0100
1.2 +++ b/moinformat/parsers/common.py Fri Jun 01 15:18:32 2018 +0200
1.3 @@ -19,6 +19,7 @@
1.4 this program. If not, see <http://www.gnu.org/licenses/>.
1.5 """
1.6
1.7 +from collections import defaultdict
1.8 from moinformat.tree import Block, Region, Text
1.9 import re
1.10
1.11 @@ -120,16 +121,6 @@
1.12
1.13
1.14
1.15 -# Utility functions.
1.16 -
1.17 -def new_block(region):
1.18 -
1.19 - "Start a new block in 'region'."
1.20 -
1.21 - region.add(Block([]))
1.22 -
1.23 -
1.24 -
1.25 # Parser abstractions.
1.26
1.27 class ParserBase:
1.28 @@ -146,6 +137,7 @@
1.29 """
1.30
1.31 self.formats = formats
1.32 + self.queued = defaultdict(list)
1.33
1.34 def get_parser(self, format_type):
1.35
1.36 @@ -225,7 +217,7 @@
1.37
1.38 # Define a block to hold text and start parsing.
1.39
1.40 - new_block(region)
1.41 + self.new_block(region)
1.42
1.43 if self.region_pattern_names:
1.44 self.parse_region_details(region, self.region_pattern_names)
1.45 @@ -284,9 +276,12 @@
1.46
1.47 # Parsing utilities.
1.48
1.49 - def parse_region_details(self, region, pattern_names):
1.50 + def parse_region_details(self, region, pattern_names, strict=False):
1.51
1.52 - "Search 'region' using the 'pattern_names'."
1.53 + """
1.54 + Search 'region' using the 'pattern_names'. If 'strict' is set to a true
1.55 + value, forbid the accumulation of additional textual padding.
1.56 + """
1.57
1.58 try:
1.59 while True:
1.60 @@ -295,7 +290,10 @@
1.61
1.62 preceding = self.read_until(pattern_names)
1.63 if preceding:
1.64 - region.append_inline(Text(preceding))
1.65 + if not strict:
1.66 + region.append_inline(Text(preceding))
1.67 + else:
1.68 + break
1.69
1.70 # End of input.
1.71
1.72 @@ -311,18 +309,60 @@
1.73
1.74 if handler:
1.75 handler(self, region)
1.76 + elif not strict:
1.77 + region.append_inline(Text(feature))
1.78 else:
1.79 - region.append_inline(Text(feature))
1.80 + break
1.81
1.82 except StopIteration:
1.83 pass
1.84
1.85 region.normalise()
1.86
1.87 + def add_node(self, region, node):
1.88 +
1.89 + "Add to 'region' the given 'node'."
1.90 +
1.91 + region.add(node)
1.92 + self.unqueue_region(region, node)
1.93 +
1.94 + def append_node(self, region, node):
1.95 +
1.96 + "Append to 'region' the given 'node'."
1.97 +
1.98 + region.append(node)
1.99 + self.unqueue_region(region, node)
1.100 +
1.101 def end_region(self, region):
1.102
1.103 "End the parsing of 'region', breaking out of the parsing loop."
1.104
1.105 raise StopIteration
1.106
1.107 + def queue_region(self, region, current):
1.108 +
1.109 + "Queue 'region' for appending after the 'current' region is ended."
1.110 +
1.111 + self.queued[current].append(region)
1.112 +
1.113 + def unqueue_region(self, region, ended):
1.114 +
1.115 + "Unqueue any queued region, adding it to 'region' after 'ended'."
1.116 +
1.117 + nodes = self.queued.get(ended)
1.118 +
1.119 + while nodes:
1.120 + node = nodes.pop()
1.121 + region.add(node)
1.122 + self.unqueue_region(region, node)
1.123 +
1.124 + if self.queued.has_key(ended):
1.125 + del self.queued[ended]
1.126 +
1.127 + def new_block(self, region):
1.128 +
1.129 + "Start a new block in 'region'."
1.130 +
1.131 + self.add_node(region, Block([]))
1.132 +
1.133 # vim: tabstop=4 expandtab shiftwidth=4
2.1 --- a/moinformat/parsers/moin.py Wed Dec 13 00:50:09 2017 +0100
2.2 +++ b/moinformat/parsers/moin.py Fri Jun 01 15:18:32 2018 +0200
2.3 @@ -19,11 +19,11 @@
2.4 this program. If not, see <http://www.gnu.org/licenses/>.
2.5 """
2.6
2.7 -from moinformat.parsers.common import ParserBase, get_patterns, get_subset, new_block
2.8 +from moinformat.parsers.common import ParserBase, get_patterns, get_subset
2.9 from moinformat.serialisers import serialise
2.10 from moinformat.tree import Break, DefItem, DefTerm, FontStyle, Heading, \
2.11 - Larger, ListItem, Monospace, Region, Rule, Smaller, \
2.12 - Subscript, Superscript, Table, TableAttr, \
2.13 + Larger, List, ListItem, Monospace, Region, Rule, \
2.14 + Smaller, Subscript, Superscript, Table, TableAttr, \
2.15 TableAttrs, TableCell, TableRow, Text, Underline
2.16
2.17 class MoinParser(ParserBase):
2.18 @@ -95,8 +95,8 @@
2.19
2.20 "Handle a paragraph break within 'region'."
2.21
2.22 - region.add(Break())
2.23 - new_block(region)
2.24 + self.add_node(region, Break())
2.25 + self.new_block(region)
2.26
2.27 def parse_defitem(self, region, extra=""):
2.28
2.29 @@ -105,8 +105,8 @@
2.30 pad = self.read_match(1)
2.31 item = DefItem([], pad, extra)
2.32 self.parse_region_details(item, ["listitemend"])
2.33 - region.add(item)
2.34 - new_block(region)
2.35 + self.add_node(region, item)
2.36 + self.new_block(region)
2.37
2.38 def parse_defterm(self, region):
2.39
2.40 @@ -115,7 +115,7 @@
2.41 pad = self.read_match(1)
2.42 term = DefTerm([], pad)
2.43 self.parse_region_details(term, ["deftermend", "deftermsep"])
2.44 - region.add(term)
2.45 + self.add_node(region, term)
2.46 if self.read_matching() == "deftermsep":
2.47 self.parse_defitem(region)
2.48
2.49 @@ -183,8 +183,8 @@
2.50 start_pad = self.read_match(3)
2.51 heading = Heading([], level, start_extra, start_pad)
2.52 self.parse_region_details(heading, ["headingend"] + self.inline_pattern_names)
2.53 - region.add(heading)
2.54 - new_block(region)
2.55 + self.add_node(region, heading)
2.56 + self.new_block(region)
2.57
2.58 def parse_heading_end(self, heading):
2.59
2.60 @@ -196,6 +196,14 @@
2.61 heading.end_extra = self.read_match(3)
2.62 raise StopIteration
2.63
2.64 + def parse_list(self, item):
2.65 +
2.66 + "Create a list, starting with 'item'."
2.67 +
2.68 + list = List([item], item.indent, item.marker)
2.69 + self.parse_region_details(list, self.list_pattern_names, True)
2.70 + return list
2.71 +
2.72 def parse_listitem(self, region):
2.73
2.74 "Handle a list item marker within 'region'."
2.75 @@ -203,10 +211,32 @@
2.76 indent = len(self.read_match(1))
2.77 marker = self.read_match(2)
2.78 space = self.read_match(3)
2.79 +
2.80 item = ListItem([], indent, marker, space)
2.81 self.parse_region_details(item, self.listitem_pattern_names)
2.82 - region.add(item)
2.83 - new_block(region)
2.84 +
2.85 + last = region.node(-1)
2.86 +
2.87 + # Start a new list if not preceded by a list item.
2.88 +
2.89 + if not isinstance(last, ListItem):
2.90 + item = self.parse_list(item)
2.91 +
2.92 + # End the current list if the indent or marker is different from the
2.93 + # last list item.
2.94 +
2.95 + elif last.indent != indent or last.marker != marker:
2.96 +
2.97 + # Queue the new list, end this list, causing the new list to be
2.98 + # added after this one.
2.99 +
2.100 + self.queue_region(self.parse_list(item), region)
2.101 + self.end_region(region)
2.102 +
2.103 + # Add a new item in a list or a completed nested list.
2.104 +
2.105 + self.add_node(region, item)
2.106 + self.new_block(region)
2.107
2.108 def parse_rule(self, region):
2.109
2.110 @@ -214,8 +244,8 @@
2.111
2.112 length = len(self.read_match(1))
2.113 rule = Rule(length)
2.114 - region.add(rule)
2.115 - new_block(region)
2.116 + self.add_node(region, rule)
2.117 + self.new_block(region)
2.118
2.119 def parse_section(self, region):
2.120
2.121 @@ -225,8 +255,8 @@
2.122
2.123 indent = len(self.read_match(2))
2.124 level = len(self.read_match(3))
2.125 - region.add(self.parse_region(level, indent))
2.126 - new_block(region)
2.127 + self.add_node(region, self.parse_region(level, indent))
2.128 + self.new_block(region)
2.129
2.130 def parse_section_end(self, region):
2.131
2.132 @@ -307,7 +337,7 @@
2.133 region.append_inline(Text(serialise(cell)))
2.134 region.append_inline(Text(trailing))
2.135
2.136 - new_block(region)
2.137 + self.new_block(region)
2.138 return
2.139
2.140 # Append the final cell, if not empty.
2.141 @@ -327,9 +357,9 @@
2.142
2.143 table.add(row)
2.144 if new_table:
2.145 - region.add(new_table)
2.146 + self.add_node(region, new_table)
2.147
2.148 - new_block(region)
2.149 + self.new_block(region)
2.150
2.151 def parse_valign(self, attrs):
2.152
2.153 @@ -483,11 +513,15 @@
2.154 "fontstyle", "larger", "monospace", "smaller", "sub", "super", "underline",
2.155 ]
2.156
2.157 + list_pattern_names = [
2.158 + "listitem", "listitem_alpha", "listitem_dot", "listitem_num",
2.159 + "listitem_roman",
2.160 + ]
2.161 +
2.162 listitem_pattern_names = inline_pattern_names + ["listitemend"]
2.163
2.164 - region_pattern_names = inline_pattern_names + [
2.165 - "break", "heading", "defterm", "defterm_empty", "listitem",
2.166 - "listitem_alpha", "listitem_dot", "listitem_num", "listitem_roman",
2.167 + region_pattern_names = inline_pattern_names + list_pattern_names + [
2.168 + "break", "heading", "defterm", "defterm_empty",
2.169 "regionstart", "regionend", "rule", "tablerow",
2.170 ]
2.171