1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/tests/test_tables_sections_mixed.txt Mon Jun 10 18:40:35 2013 +0200
1.3 @@ -0,0 +1,21 @@
1.4 +Here are some examples of possible regular expressions and matching lines:
1.5 +|| Regular expression || Matching lines ||
1.6 +| zuff | Keywords: zuff |
1.7 +| zuff | Keywords: ZUFF |
1.8 +| zuff | Keywords: Zuff |
1.9 +| zuff | Keywords: amaryllis, zuff, applesauce |
1.10 +| zuff | Subject: \[zuff\] Do you have the right stuff for zuff? |
1.11 +| zuff | Subject: Do you have the right stuff for zuff? |
1.12 +| zuff | Subject: What is zuff? |
1.13 +| {noformat:nopanel=true}
1.14 + \[zuff\]
1.15 +{noformat} | Keywords: \[zuff\] |
1.16 +| {noformat:nopanel=true}
1.17 + \[zuff\]
1.18 +{noformat} | Subject: \[zuff\] Do you have the right stuff? |
1.19 +| {noformat:nopanel=true}
1.20 + \[zuff\]
1.21 +{noformat} | Subject: Online zuff tutorials (was Re: \[zuff\] What is zuff?) |
1.22 +A few notes:
1.23 +* The matching is case-insensitive, so if zuff matches, so will ZUFF, zuFF, and any other variations in capitalization.
1.24 +* Some characters have special meaning in a regular expression, so to match those characters specifically, they must be "escaped" with a backslash (). As you can see in the above example, \[ and \] are such characters. (Others include ".", "?", and "*"). The backslash is also used for other things (I wasn't kidding about regular expressions being complex: consult other documentation for details about other uses of the backslash character), but this is the most likely use in a topic expression.
2.1 --- a/wikiparser.py Mon Jun 10 13:45:29 2013 +0200
2.2 +++ b/wikiparser.py Mon Jun 10 18:40:35 2013 +0200
2.3 @@ -39,7 +39,7 @@
2.4
2.5 # Section extraction.
2.6
2.7 -sections_regexp_str = r"(?<!{){(?P<type>[^-_*+{}\n:]+)(?P<options>:[^}\n]+)?}"
2.8 +sections_regexp_str = r"(?<!{){(?P<type>[^-_*+{}\n:]+)(?P<options>:[^}\n]+)?}|^(?P<rowstart>[|]{1,2})|(?P<rowend>[|]{1,2})(\n|$)"
2.9 sections_regexp = re.compile(sections_regexp_str, re.DOTALL | re.MULTILINE)
2.10
2.11 def get_regions(s):
2.12 @@ -52,11 +52,13 @@
2.13 last = 0
2.14 regions = [""]
2.15 depth = 0
2.16 + had_row = False
2.17
2.18 for match in sections_regexp.finditer(s):
2.19 start, end = match.span()
2.20 - is_start = match.group("options")
2.21 + is_start = match.group("options") or match.group("rowstart")
2.22 is_section = is_section_marker(match.group("type"))
2.23 + is_row = match.group("rowstart") or match.group("rowend")
2.24
2.25 # The start of a region is either indicated by a marker with options or
2.26 # by a marker where no region is currently active.
2.27 @@ -74,6 +76,16 @@
2.28 if is_section:
2.29 regions.append(s[start:end])
2.30
2.31 + # A new row may either continue a table region or start a new
2.32 + # table region.
2.33 +
2.34 + elif is_row:
2.35 + if (last != start or not had_row):
2.36 + regions.append(s[start:end])
2.37 + else:
2.38 + regions[-2] += regions[-1] + s[start:end]
2.39 + regions.pop()
2.40 +
2.41 # Certain markers may be standalone macros.
2.42
2.43 else:
2.44 @@ -85,7 +97,7 @@
2.45 else:
2.46 regions[-1] += s[last:end]
2.47
2.48 - if is_section:
2.49 + if is_section or is_row:
2.50 depth += 1
2.51
2.52 # The end of a region is indicated by a marker with no options.
2.53 @@ -106,7 +118,7 @@
2.54 # current region and the details of the region are then obtained.
2.55
2.56 else:
2.57 - if depth > 1 or not is_section:
2.58 + if depth > 1 or (not is_section and not is_row):
2.59 regions[-1] += s[last:end]
2.60
2.61 # Terminate the active region, interpreting its contents.
2.62 @@ -115,9 +127,10 @@
2.63 regions[-1] += s[last:end]
2.64 regions.append("")
2.65
2.66 - if is_section:
2.67 + if is_section or is_row:
2.68 depth -= 1
2.69
2.70 + had_row = is_row
2.71 last = end
2.72
2.73 # Where a region is still active, terminate it.