1 #!/usr/bin/env python 2 3 import re 4 5 # Section extraction. 6 7 sections_regexp_str = r"(?<!{)(?P<section>{(?P<type>[^{}]+)}.*?{(?P=type)})" 8 sections_regexp = re.compile(sections_regexp_str, re.DOTALL | re.MULTILINE) 9 10 # Section inspection. 11 12 section_regexp_str = r"{(?P<sectiontype>.*?)}(?P<section>.*){(?P=sectiontype)}" 13 section_regexp = re.compile(section_regexp_str, re.DOTALL | re.MULTILINE) 14 15 def get_regions(s): 16 17 """ 18 Return a list of regions from 's'. Each region is specified using a tuple of 19 the form (type, text). 20 """ 21 22 last = 0 23 regions = [] 24 for match in sections_regexp.finditer(s): 25 start, end = match.span() 26 regions.append((None, s[last:start])) 27 regions.append(get_section_details(s[start:end])) 28 last = end 29 regions.append((None, s[last:])) 30 return regions 31 32 def get_section_details(s): 33 34 "Return the details of a section in the form (type, text)." 35 36 match = section_regexp.match(s) 37 if match: 38 return match.group("sectiontype"), match.group("section") 39 else: 40 return None, s 41 42 if __name__ == "__main__": 43 import sys 44 45 s = sys.stdin.read() 46 47 for type, text in get_regions(s): 48 print "Region type:", type 49 print "Region:" 50 print text 51 print 52 print "-" * 60 53 54 # vim: tabstop=4 expandtab shiftwidth=4