# HG changeset patch # User Paul Boddie # Date 1493935827 -7200 # Node ID 8dbedbb8ef8b60cb736e216f88acc364409380be # Parent 3993165616f88b901f0897fe41fd0d7115d1454f Moved patterns into the parser, introducing functionality for pattern re-use. diff -r 3993165616f8 -r 8dbedbb8ef8b moinformat/__init__.py --- a/moinformat/__init__.py Thu May 04 22:39:00 2017 +0200 +++ b/moinformat/__init__.py Fri May 05 00:10:27 2017 +0200 @@ -19,15 +19,13 @@ this program. If not, see . """ -from moinformat.parsing import ParserBase, TokenStream, new_block +from moinformat.parsing import ParserBase, TokenStream, get_patterns, new_block from moinformat.serialisers import serialise from moinformat.tree import Break, DefItem, DefTerm, FontStyle, Heading, \ Larger, ListItem, Monospace, Region, Rule, Smaller, \ Subscript, Superscript, TableAttr, TableAttrs, \ TableCell, TableRow, Text, Underline -import re - # Regular expressions. syntax = { @@ -103,23 +101,6 @@ "attrvalue" : r"""=(?P['"])(.*?)(?P=x)""", } -# Define pattern details. - -table_pattern_names = ["attrname", "colour", "colspan", "halign", "rowspan", "tableattrsend", "valign", "width"] - -inline_pattern_names = ["fontstyle", "larger", "monospace", "smaller", "sub", "super", "underline"] - -def inline_patterns_for(name): - names = inline_pattern_names[:] - names[names.index(name)] = "%send" % name - return names - -# Define patterns for the regular expressions. - -patterns = {} -for name, value in syntax.items(): - patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE) - class Parser(ParserBase): @@ -139,13 +120,37 @@ ParserBase.__init__(self, formats) + # Pattern details. + + patterns = get_patterns(syntax) + + table_pattern_names = [ + "attrname", "colour", "colspan", "halign", "rowspan", "tableattrsend", + "valign", "width" + ] + + inline_pattern_names = [ + "fontstyle", "larger", "monospace", "smaller", "sub", "super", "underline", + ] + + region_pattern_names = inline_pattern_names + [ + "break", "heading", "defterm", "defterm_empty", "listitem", + "listitem_alpha", "listitem_dot", "listitem_num", "listitem_roman", + "regionstart", "regionend", "rule", "tablerow", + ] + + def inline_patterns_for(self, name): + names = self.inline_pattern_names[:] + names[names.index(name)] = "%send" % name + return names + + # Principal parser methods. + def get_items(self, s): "Return a sequence of token items for 's'." - return TokenStream(s, patterns) - - # Principal parser methods. + return TokenStream(s, self.patterns) def parse(self, s): @@ -175,16 +180,7 @@ "Parse the data provided by 'items' to populate a wiki 'region'." new_block(region) - - self.parse_region_details(items, region, inline_pattern_names + [ - "break", "heading", - "defterm", "defterm_empty", - "listitem", "listitem_alpha", "listitem_dot", "listitem_num", - "listitem_roman", - "regionstart", "regionend", - "rule", - "tablerow", - ]) + self.parse_region_details(items, region, self.region_pattern_names) # Parser methods supporting different page features. @@ -274,7 +270,7 @@ span = FontStyle([], emphasis, strong) if not double: - self.parse_region_details(items, span, inline_pattern_names) + self.parse_region_details(items, span, self.inline_pattern_names) region.append_inline(span) def parse_halign(self, items, attrs): @@ -293,7 +289,7 @@ level = len(items.read_match(2)) start_pad = items.read_match(3) heading = Heading([], level, start_extra, start_pad) - self.parse_region_details(items, heading, ["headingend"] + inline_pattern_names) + self.parse_region_details(items, heading, ["headingend"] + self.inline_pattern_names) region.add(heading) new_block(region) @@ -354,7 +350,7 @@ "Handle the start of table attributes within 'cell'." attrs = TableAttrs([]) - self.parse_region_details(items, attrs, table_pattern_names) + self.parse_region_details(items, attrs, self.table_pattern_names) # Test the validity of the attributes. @@ -444,7 +440,7 @@ "Handle an inline region." span = cls([]) - self.parse_region_details(items, span, inline_patterns_for(pattern_name)) + self.parse_region_details(items, span, self.inline_patterns_for(pattern_name)) region.append_inline(span) def parse_larger(self, items, region): diff -r 3993165616f8 -r 8dbedbb8ef8b moinformat/parsing.py --- a/moinformat/parsing.py Thu May 04 22:39:00 2017 +0200 +++ b/moinformat/parsing.py Fri May 05 00:10:27 2017 +0200 @@ -20,6 +20,27 @@ """ from moinformat.tree import Block, Region, Text +import re + +# Pattern management. + +def get_patterns(syntax): + + "Define patterns for the regular expressions in the 'syntax' mapping." + + patterns = {} + for name, value in syntax.items(): + patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE) + return patterns + +def combine_patterns(patterns, syntax): + + "Combine 'patterns' with those defined by the given 'syntax' mapping." + + p = {} + p.update(patterns) + p.update(get_patterns(syntax)) + return p # Tokenising functions.