# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1370882435 -7200
# Node ID 3f0fbef873932cee301991cdd9bec7e4cb903eca
# Parent  4a10cbd14a491bc903e2e3607f004067acc1a931
Supported table recognition in region extraction in order to handle sections
within tables, where the appearance of sections would break up tables around
those sections.

diff -r 4a10cbd14a49 -r 3f0fbef87393 tests/test_tables_sections_mixed.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_tables_sections_mixed.txt	Mon Jun 10 18:40:35 2013 +0200
@@ -0,0 +1,21 @@
+Here are some examples of possible regular expressions and matching lines:
+|| Regular expression || Matching lines ||
+| zuff | Keywords: zuff |
+| zuff | Keywords: ZUFF |
+| zuff | Keywords: Zuff |
+| zuff | Keywords: amaryllis, zuff, applesauce |
+| zuff | Subject: \[zuff\] Do you have the right stuff for zuff? |
+| zuff | Subject: Do you have the right stuff for zuff? |
+| zuff | Subject: What is zuff? |
+| {noformat:nopanel=true}
+ \[zuff\]
+{noformat} | Keywords: \[zuff\] |
+| {noformat:nopanel=true}
+ \[zuff\]
+{noformat} | Subject: \[zuff\] Do you have the right stuff? |
+| {noformat:nopanel=true}
+ \[zuff\]
+{noformat} | Subject: Online zuff tutorials (was Re: \[zuff\] What is zuff?) |
+A few notes:
+* The matching is case-insensitive, so if zuff matches, so will ZUFF, zuFF, and any other variations in capitalization.
+* Some characters have special meaning in a regular expression, so to match those characters specifically, they must be "escaped" with a backslash (). As you can see in the above example, \[ and \] are such characters. (Others include ".", "?", and "*"). The backslash is also used for other things (I wasn't kidding about regular expressions being complex: consult other documentation for details about other uses of the backslash character), but this is the most likely use in a topic expression.
diff -r 4a10cbd14a49 -r 3f0fbef87393 wikiparser.py
--- a/wikiparser.py	Mon Jun 10 13:45:29 2013 +0200
+++ b/wikiparser.py	Mon Jun 10 18:40:35 2013 +0200
@@ -39,7 +39,7 @@
 
 # Section extraction.
 
-sections_regexp_str = r"(?<!{){(?P<type>[^-_*+{}\n:]+)(?P<options>:[^}\n]+)?}"
+sections_regexp_str = r"(?<!{){(?P<type>[^-_*+{}\n:]+)(?P<options>:[^}\n]+)?}|^(?P<rowstart>[|]{1,2})|(?P<rowend>[|]{1,2})(\n|$)"
 sections_regexp = re.compile(sections_regexp_str, re.DOTALL | re.MULTILINE)
 
 def get_regions(s):
@@ -52,11 +52,13 @@
     last = 0
     regions = [""]
     depth = 0
+    had_row = False
 
     for match in sections_regexp.finditer(s):
         start, end = match.span()
-        is_start = match.group("options")
+        is_start = match.group("options") or match.group("rowstart")
         is_section = is_section_marker(match.group("type"))
+        is_row = match.group("rowstart") or match.group("rowend")
 
         # The start of a region is either indicated by a marker with options or
         # by a marker where no region is currently active.
@@ -74,6 +76,16 @@
                 if is_section:
                     regions.append(s[start:end])
 
+                # A new row may either continue a table region or start a new
+                # table region.
+
+                elif is_row:
+                    if (last != start or not had_row):
+                        regions.append(s[start:end])
+                    else:
+                        regions[-2] += regions[-1] + s[start:end]
+                        regions.pop()
+
                 # Certain markers may be standalone macros.
 
                 else:
@@ -85,7 +97,7 @@
             else:
                 regions[-1] += s[last:end]
 
-            if is_section:
+            if is_section or is_row:
                 depth += 1
 
         # The end of a region is indicated by a marker with no options.
@@ -106,7 +118,7 @@
             # current region and the details of the region are then obtained.
 
             else:
-                if depth > 1 or not is_section:
+                if depth > 1 or (not is_section and not is_row):
                     regions[-1] += s[last:end]
 
                 # Terminate the active region, interpreting its contents.
@@ -115,9 +127,10 @@
                     regions[-1] += s[last:end]
                     regions.append("")
 
-                if is_section:
+                if is_section or is_row:
                     depth -= 1
 
+        had_row = is_row
         last = end
 
     # Where a region is still active, terminate it.