# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1562962037 -7200
# Node ID 90003650d826a4ea57f0ea9a42ba8b52ef7bdc5b
# Parent  2b8cbd82ee13ce4f6f8487224fcc30d175b57f2a# Parent  ee5d6cf035d9a9cf080f85cb3daa2c5f1110c696
Merged the single-regexp-searching branch at last.

diff -r 2b8cbd82ee13 -r 90003650d826 moinformat/parsers/common.py
--- a/moinformat/parsers/common.py	Fri Jul 12 22:06:58 2019 +0200
+++ b/moinformat/parsers/common.py	Fri Jul 12 22:07:17 2019 +0200
@@ -50,7 +50,10 @@
 
 def group(name, s):
 
-    "Return a pattern group having 'name' and the pattern string 's'."
+    """
+    Return a pattern for the group having the given 'name' and employing the
+    pattern string 's'.
+    """
 
     return "(?P<%s>%s)" % (name, s)
 
@@ -73,37 +76,52 @@
     return "%s{%s,%s}" % (s, min is not None and min or "",
                              max is not None and max or "")
 
-def get_pattern(s):
-
-    "Return a compiled regular expression for the given pattern 's'."
-
-    return re.compile(s, re.UNICODE | re.MULTILINE)
-
 def get_patterns(syntax):
 
     """
     Define patterns for the regular expressions in the 'syntax' mapping. In each
-    pattern, replace \N with a pattern for matching whitespace excluding
-    newlines.
+    pattern, replace...
+
+    \E with a pattern for matching all characters including newlines
+    \N with a pattern for matching whitespace excluding newlines
+    \P with a pattern for matching all characters within a paragraph
+    \Q with a pattern for matching quotation marks
+
+    Group names are also qualified with a pattern name prefix.
     """
 
     patterns = {}
+
     for name, value in syntax.items():
         value = value.replace(r"\N", ws_excl_nl)
         value = value.replace(r"\Q", quotes)
         value = value.replace(r"\E", dotall)
         value = value.replace(r"\P", dotparagraph)
-        patterns[name] = get_pattern(value)
+
+        # Add the name to group names as a prefix.
+
+        value = value.replace("(?P<", "(?P<%s_" % name)
+        value = value.replace("(?P=", "(?P=%s_" % name)
+
+        # Record the updated expression and add an identifying null group.
+
+        patterns[name] = "%s(?P<group_%s>)" % (value, name)
+
     return patterns
 
-def get_subset(d, keys):
+def get_expression(d, keys):
 
-    "Return a subset of 'd' having the given 'keys'."
+    """
+    Return a compiled expression combining patterns in 'd' having the given
+    'keys'.
+    """
 
-    subset = {}
+    subset = []
+
     for key in keys:
-        subset[key] = d[key]
-    return subset
+        subset.append(d[key])
+
+    return re.compile("|".join(subset), re.UNICODE | re.MULTILINE)
 
 
 
@@ -121,7 +139,7 @@
 
         self.match = None
         self.queued = None
-        self.match_start = None
+        self.groups = {}
 
         # Pattern name details.
 
@@ -139,56 +157,75 @@
 
         self.queued = self.match
 
-    def read_until(self, patterns, remaining=True):
+    def read_until(self, expression, remaining=True):
 
         """
-        Find the first match for the given 'patterns'. Return the text preceding
-        any match, the remaining text if no match was found, or None if no match
-        was found and 'remaining' is given as a false value.
+        Find the first match for the given 'expression'. Return the text
+        preceding any match, the remaining text if no match was found, or None
+        if no match was found and 'remaining' is given as a false value.
         """
 
         if self.queued:
             self.match = self.queued
             self.queued = None
         else:
-            self.match_start = None
             self.matching = None
 
             # Find the first matching pattern.
 
-            for pattern_name, pattern in patterns.items():
-                match = pattern.search(self.s, self.pos)
-                if match:
-                    start, end = match.span()
-                    if self.matching is None or start < self.start:
-                        self.start = start
-                        self.matching = pattern_name
+            match = expression.search(self.s, self.pos)
+
+            if match:
+                for name, value in match.groupdict().items():
+
+                    # Use a group with a non-null value to identify the
+                    # matching pattern.
+
+                    if name.startswith("group_") and value is not None:
+                        self.matching = name[len("group_"):]
+                        self.start, self.end = match.span()
                         self.match = match
+                        break
+
+        # Return the remaining text, if appropriate.
 
         if self.matching is None:
+            self.groups = {}
             if remaining:
                 return self.s[self.pos:]
             else:
                 return None
         else:
+            self.groups = self.filter_groups()
             return self.s[self.pos:self.start]
 
-    def match_group(self, group=1):
+    def filter_groups(self):
+
+        "Filter groups from the current match for the matching pattern."
+
+        prefix = "%s_" % self.matching
+
+        d = {}
+        for key, value in self.match.groupdict().items():
+            if key.startswith(prefix):
+                d[key[len(prefix):]] = value
+        return d
+
+    def match_group(self, group=None):
 
         """
         Return the matched text, updating the position in the stream. If 'group'
         is specified, the indicated group in a match will be returned.
-        Typically, group 1 should contain all pertinent data, but groups defined
-        within group 1 can provide sections of the data.
+        Otherwise, the entire match is returned.
         """
 
         self.update_pos()
 
         if self.match:
-            try:
-                return self.match.group(group)
-            except IndexError:
-                return ""
+            if group is None:
+                return self.s[self.start:self.end]
+            else:
+                return self.groups.get(group)
         else:
             return None
 
@@ -200,9 +237,12 @@
 
         if self.match:
             if groups is None:
-                return self.match.groups()
+                return self.groups
             else:
-                return self.match.groups(groups)
+                l = []
+                for group in groups:
+                    l.append(self.groups.get(group))
+                return l
         else:
             return []
 
@@ -248,11 +288,11 @@
         else:
             return None
 
-    def get_patterns(self, pattern_names):
+    def get_expression(self, pattern_names):
 
         "Return a mapping of the given 'pattern_names' to patterns."
 
-        return get_subset(self.patterns, pattern_names)
+        return get_expression(self.patterns, pattern_names)
 
     def get_items(self, s, pos=0):
 
@@ -275,12 +315,13 @@
         or None if no match was found and 'remaining' is given as a false value.
         """
 
-        return self.items.read_until(self.get_patterns(pattern_names))
+        return self.items.read_until(self.get_expression(pattern_names))
 
-    def match_group(self, group=1):
+    def match_group(self, group=None):
 
         """
-        Return the group of the matching pattern with the given 'group' number.
+        Return the group of the matching pattern with the given 'group'
+        identifier. If 'group' is omitted or None, return the entire match.
         """
 
         return self.items.match_group(group)