paul@6 | 1 | #!/usr/bin/env python |
paul@6 | 2 | |
paul@6 | 3 | import re |
paul@6 | 4 | |
paul@6 | 5 | # Section extraction. |
paul@6 | 6 | |
paul@6 | 7 | sections_regexp_str = r"(?<!{)(?P<section>{(?P<type>[^{}]+)}.*?{(?P=type)})" |
paul@6 | 8 | sections_regexp = re.compile(sections_regexp_str, re.DOTALL | re.MULTILINE) |
paul@6 | 9 | |
paul@6 | 10 | # Section inspection. |
paul@6 | 11 | |
paul@6 | 12 | section_regexp_str = r"{(?P<sectiontype>.*?)}(?P<section>.*){(?P=sectiontype)}" |
paul@6 | 13 | section_regexp = re.compile(section_regexp_str, re.DOTALL | re.MULTILINE) |
paul@6 | 14 | |
paul@6 | 15 | def get_regions(s): |
paul@6 | 16 | |
paul@6 | 17 | """ |
paul@6 | 18 | Return a list of regions from 's'. Each region is specified using a tuple of |
paul@6 | 19 | the form (type, text). |
paul@6 | 20 | """ |
paul@6 | 21 | |
paul@6 | 22 | last = 0 |
paul@6 | 23 | regions = [] |
paul@6 | 24 | for match in sections_regexp.finditer(s): |
paul@6 | 25 | start, end = match.span() |
paul@6 | 26 | regions.append((None, s[last:start])) |
paul@6 | 27 | regions.append(get_section_details(s[start:end])) |
paul@6 | 28 | last = end |
paul@6 | 29 | regions.append((None, s[last:])) |
paul@6 | 30 | return regions |
paul@6 | 31 | |
paul@6 | 32 | def get_section_details(s): |
paul@6 | 33 | |
paul@6 | 34 | "Return the details of a section in the form (type, text)." |
paul@6 | 35 | |
paul@6 | 36 | match = section_regexp.match(s) |
paul@6 | 37 | if match: |
paul@6 | 38 | return match.group("sectiontype"), match.group("section") |
paul@6 | 39 | else: |
paul@6 | 40 | return None, s |
paul@6 | 41 | |
paul@6 | 42 | if __name__ == "__main__": |
paul@6 | 43 | import sys |
paul@6 | 44 | |
paul@6 | 45 | s = sys.stdin.read() |
paul@6 | 46 | |
paul@6 | 47 | for type, text in get_regions(s): |
paul@6 | 48 | print "Region type:", type |
paul@6 | 49 | print "Region:" |
paul@6 | 50 | print text |
paul@6 | 51 | print |
paul@6 | 52 | print "-" * 60 |
paul@6 | 53 | |
paul@6 | 54 | # vim: tabstop=4 expandtab shiftwidth=4 |