1.1 --- a/moinformat/parsers/common.py Sun Jul 15 23:59:08 2018 +0200
1.2 +++ b/moinformat/parsers/common.py Mon Jul 16 00:04:23 2018 +0200
1.3 @@ -42,7 +42,10 @@
1.4
1.5 def group(name, s):
1.6
1.7 - "Return a pattern group having 'name' and the pattern string 's'."
1.8 + """
1.9 + Return a pattern for the group having the given 'name' and employing the
1.10 + pattern string 's'.
1.11 + """
1.12
1.13 return "(?P<%s>%s)" % (name, s)
1.14
1.15 @@ -69,25 +72,44 @@
1.16
1.17 """
1.18 Define patterns for the regular expressions in the 'syntax' mapping. In each
1.19 - pattern, replace \N with a pattern for matching whitespace excluding
1.20 - newlines.
1.21 + pattern, replace...
1.22 +
1.23 + \N with a pattern for matching whitespace excluding newlines
1.24 + \Q with a pattern for matching quotation marks
1.25 +
1.26 + Group names are also qualified with a pattern name prefix.
1.27 """
1.28
1.29 patterns = {}
1.30 +
1.31 for name, value in syntax.items():
1.32 value = value.replace(r"\N", ws_excl_nl)
1.33 value = value.replace(r"\Q", quotes)
1.34 - patterns[name] = re.compile(value, re.UNICODE | re.MULTILINE)
1.35 +
1.36 + # Add the name to group names as a prefix.
1.37 +
1.38 + value = value.replace("(?P<", "(?P<%s_" % name)
1.39 + value = value.replace("(?P=", "(?P=%s_" % name)
1.40 +
1.41 + # Record the updated expression and add an identifying null group.
1.42 +
1.43 + patterns[name] = "%s(?P<group_%s>)" % (value, name)
1.44 +
1.45 return patterns
1.46
1.47 -def get_subset(d, keys):
1.48 +def get_expression(d, keys):
1.49
1.50 - "Return a subset of 'd' having the given 'keys'."
1.51 + """
1.52 + Return a compiled expression combining patterns in 'd' having the given
1.53 + 'keys'.
1.54 + """
1.55
1.56 - subset = {}
1.57 + subset = []
1.58 +
1.59 for key in keys:
1.60 - subset[key] = d[key]
1.61 - return subset
1.62 + subset.append(d[key])
1.63 +
1.64 + return re.compile("|".join(subset), re.UNICODE | re.MULTILINE)
1.65
1.66
1.67
1.68 @@ -105,7 +127,7 @@
1.69
1.70 self.match = None
1.71 self.queued = None
1.72 - self.match_start = None
1.73 + self.groups = {}
1.74
1.75 # Pattern name details.
1.76
1.77 @@ -123,56 +145,73 @@
1.78
1.79 self.queued = self.match
1.80
1.81 - def read_until(self, patterns, remaining=True):
1.82 + def read_until(self, expression, remaining=True):
1.83
1.84 """
1.85 - Find the first match for the given 'patterns'. Return the text preceding
1.86 - any match, the remaining text if no match was found, or None if no match
1.87 - was found and 'remaining' is given as a false value.
1.88 + Find the first match for the given 'expression'. Return the text
1.89 + preceding any match, the remaining text if no match was found, or None
1.90 + if no match was found and 'remaining' is given as a false value.
1.91 """
1.92
1.93 if self.queued:
1.94 self.match = self.queued
1.95 self.queued = None
1.96 else:
1.97 - self.match_start = None
1.98 self.matching = None
1.99
1.100 # Find the first matching pattern.
1.101
1.102 - for pattern_name, pattern in patterns.items():
1.103 - match = pattern.search(self.s, self.pos)
1.104 - if match:
1.105 - start, end = match.span()
1.106 - if self.matching is None or start < self.start:
1.107 - self.start = start
1.108 - self.matching = pattern_name
1.109 + match = expression.search(self.s, self.pos)
1.110 +
1.111 + if match:
1.112 + for name, value in match.groupdict().items():
1.113 +
1.114 + # Use a group with a non-null value to identify the
1.115 + # matching pattern.
1.116 +
1.117 + if name.startswith("group_") and value is not None:
1.118 + self.matching = name[len("group_"):]
1.119 + self.start, self.end = match.span()
1.120 self.match = match
1.121 + break
1.122 +
1.123 + # Return the remaining text, if appropriate.
1.124
1.125 if self.matching is None:
1.126 + self.groups = {}
1.127 if remaining:
1.128 return self.s[self.pos:]
1.129 else:
1.130 return None
1.131 else:
1.132 + self.groups = self.filter_groups()
1.133 return self.s[self.pos:self.start]
1.134
1.135 - def match_group(self, group=1):
1.136 + def filter_groups(self):
1.137 +
1.138 + "Filter groups from the current match for the matching pattern."
1.139 +
1.140 + d = {}
1.141 + for key, value in self.match.groupdict().items():
1.142 + if key.startswith("%s_" % self.matching):
1.143 + d[key] = value
1.144 + return d
1.145 +
1.146 + def match_group(self, group=None):
1.147
1.148 """
1.149 Return the matched text, updating the position in the stream. If 'group'
1.150 is specified, the indicated group in a match will be returned.
1.151 - Typically, group 1 should contain all pertinent data, but groups defined
1.152 - within group 1 can provide sections of the data.
1.153 + Otherwise, the entire match is returned.
1.154 """
1.155
1.156 self.update_pos()
1.157
1.158 if self.match:
1.159 - try:
1.160 - return self.match.group(group)
1.161 - except IndexError:
1.162 - return ""
1.163 + if group is None:
1.164 + return self.s[self.start:self.end]
1.165 + else:
1.166 + return self.groups.get("%s_%s" % (self.matching, group))
1.167 else:
1.168 return None
1.169
1.170 @@ -184,9 +223,12 @@
1.171
1.172 if self.match:
1.173 if groups is None:
1.174 - return self.match.groups()
1.175 + return self.groups
1.176 else:
1.177 - return self.match.groups(groups)
1.178 + l = []
1.179 + for group in groups:
1.180 + l.append(self.groups.get("%s_%s" % (self.matching, group)))
1.181 + return l
1.182 else:
1.183 return []
1.184
1.185 @@ -233,11 +275,11 @@
1.186 else:
1.187 return None
1.188
1.189 - def get_patterns(self, pattern_names):
1.190 + def get_expression(self, pattern_names):
1.191
1.192 "Return a mapping of the given 'pattern_names' to patterns."
1.193
1.194 - return get_subset(self.patterns, pattern_names)
1.195 + return get_expression(self.patterns, pattern_names)
1.196
1.197 def get_items(self, s, pos=0):
1.198
1.199 @@ -260,12 +302,13 @@
1.200 or None if no match was found and 'remaining' is given as a false value.
1.201 """
1.202
1.203 - return self.items.read_until(self.get_patterns(pattern_names))
1.204 + return self.items.read_until(self.get_expression(pattern_names))
1.205
1.206 - def match_group(self, group=1):
1.207 + def match_group(self, group=None):
1.208
1.209 """
1.210 - Return the group of the matching pattern with the given 'group' number.
1.211 + Return the group of the matching pattern with the given 'group'
1.212 + identifier. If 'group' is omitted or None, return the entire match.
1.213 """
1.214
1.215 return self.items.match_group(group)
1.216 @@ -407,7 +450,7 @@
1.217
1.218 # Obtain any feature.
1.219
1.220 - feature = self.match_group()
1.221 + feature = self.match_group(None)
1.222 handler = self.handlers.get(self.matching_pattern())
1.223
1.224 # Handle each feature or add text to the region.