1.1 --- a/moinformat.py Wed Apr 26 13:57:10 2017 +0200
1.2 +++ b/moinformat.py Wed Apr 26 17:32:08 2017 +0200
1.3 @@ -29,7 +29,7 @@
1.4 "markers" : (r"^\s*([{]{3,}|[}]{3,})", re.MULTILINE | re.DOTALL), # {{{... or }}}...
1.5
1.6 # Region contents:
1.7 - "header" : (r"\A#!(.*?)$", re.MULTILINE), # #! char-excl-nl
1.8 + "header" : (r"\A#!(.*?)\n", 0), # #! char-excl-nl
1.9 "region text" : (r"(^\s*$)", re.MULTILINE), # blank line
1.10 }
1.11
1.12 @@ -53,18 +53,74 @@
1.13 def append(self, node):
1.14 self.nodes.append(node)
1.15
1.16 + def normalise(self):
1.17 +
1.18 + "Combine adjacent text nodes."
1.19 +
1.20 + nodes = self.nodes
1.21 + self.nodes = []
1.22 + text = None
1.23 +
1.24 + for node in nodes:
1.25 +
1.26 + # Open a text node or merge text into an open node.
1.27 +
1.28 + if isinstance(node, Text):
1.29 + if not text:
1.30 + text = node
1.31 + else:
1.32 + text.merge(node)
1.33 +
1.34 + # Close any open text node and append the current node.
1.35 +
1.36 + else:
1.37 + if text:
1.38 + self.append(text)
1.39 + text = None
1.40 + self.append(node)
1.41 +
1.42 + # Add any open text node.
1.43 +
1.44 + if text:
1.45 + self.append(text)
1.46 +
1.47 class Region(Container):
1.48
1.49 "A region of the page."
1.50
1.51 + transparent_region_types = ["wiki"]
1.52 +
1.53 def __init__(self, nodes, level=0, type=None):
1.54 Container.__init__(self, nodes)
1.55 self.level = level
1.56 self.type = type
1.57
1.58 + def expand(self):
1.59 +
1.60 + """
1.61 + Expand text nodes by parsing them as region text, if the region is
1.62 + understandable to the standard parser.
1.63 + """
1.64 +
1.65 + if self.is_transparent():
1.66 + nodes = self.nodes
1.67 + self.nodes = []
1.68 +
1.69 + for node in nodes:
1.70 + if isinstance(node, Text):
1.71 + parse_region_text(node.s, self)
1.72 + else:
1.73 + self.append(node)
1.74 +
1.75 + def have_start(self, s):
1.76 + return self.is_transparent() and s.startswith("{")
1.77 +
1.78 def have_end(self, s):
1.79 return self.level and s.startswith("}") and self.level == len(s)
1.80
1.81 + def is_transparent(self):
1.82 + return not self.level or self.type in self.transparent_region_types
1.83 +
1.84 def __repr__(self):
1.85 return "Region(%r, %r, %r)" % (self.nodes, self.level, self.type)
1.86
1.87 @@ -98,6 +154,9 @@
1.88 def __init__(self, s):
1.89 self.s = s
1.90
1.91 + def merge(self, text):
1.92 + self.s += text.s
1.93 +
1.94 def __repr__(self):
1.95 return "Text(%r)" % self.s
1.96
1.97 @@ -123,8 +182,8 @@
1.98 out = self.out
1.99 if level:
1.100 out("{" * level) # marker
1.101 - if type:
1.102 - out("#!%s" % type) # header
1.103 + if type and level:
1.104 + out("#!%s\n" % type) # header
1.105
1.106 def end_region(self, level, type):
1.107 out = self.out
1.108 @@ -180,8 +239,13 @@
1.109 Parse page text 's'. Pages consist of regions delimited by markers.
1.110 """
1.111
1.112 + # Define tokens for interpretation by the parser.
1.113 +
1.114 + items = iter(patterns["markers"].split(s))
1.115 +
1.116 + # Define a region for the page and parse it.
1.117 +
1.118 region = Region([])
1.119 - items = iter(patterns["markers"].split(s))
1.120 parse_region(items, region)
1.121 return region
1.122
1.123 @@ -195,37 +259,50 @@
1.124 # Process exposed text and sections.
1.125
1.126 try:
1.127 - while True:
1.128 + try:
1.129 + while True:
1.130
1.131 - # Parse section headers.
1.132 + # Parse section headers.
1.133
1.134 - if first:
1.135 - match_text = parse_region_header(items.next(), region)
1.136 - first = False
1.137 - else:
1.138 - match_text = items.next()
1.139 + if first:
1.140 + match_text = parse_region_header(items.next(), region)
1.141 + first = False
1.142 + else:
1.143 + match_text = items.next()
1.144 +
1.145 + # Start a section if an appropriate marker is given.
1.146
1.147 - # Start a section if an appropriate marker is given.
1.148 + if region.have_start(match_text):
1.149 +
1.150 + # Define the section and parse it.
1.151 +
1.152 + _region = Region([], len(match_text))
1.153 + region.append(_region)
1.154 + parse_region(items, _region)
1.155
1.156 - if match_text.startswith("{"):
1.157 - _region = Region([], len(match_text))
1.158 - region.append(_region)
1.159 - parse_region(items, _region)
1.160 + # Interpret the given marker, closing the current section if the
1.161 + # given marker is the corresponding end marker for the current
1.162 + # section.
1.163
1.164 - # Interpret the given marker, closing the current section if the
1.165 - # given marker is the corresponding end marker for the current
1.166 - # section.
1.167 + elif region.have_end(match_text):
1.168 + return
1.169 +
1.170 + # Otherwise, parse text in the region.
1.171 +
1.172 + else:
1.173 + region.append(Text(match_text))
1.174
1.175 - elif region.have_end(match_text):
1.176 - return
1.177 + # End of input.
1.178
1.179 - # Otherwise, parse text in the region.
1.180 + except StopIteration:
1.181 + pass
1.182
1.183 - else:
1.184 - parse_region_text(match_text, region)
1.185 + finally:
1.186 + region.normalise()
1.187
1.188 - except StopIteration:
1.189 - pass
1.190 + # Parse region contents, if possible.
1.191 +
1.192 + region.expand()
1.193
1.194 def parse_region_header(s, region):
1.195
1.196 @@ -265,12 +342,12 @@
1.197 except StopIteration:
1.198 pass
1.199
1.200 -# Top-level parsing function.
1.201 +
1.202 +
1.203 +# Top-level functions.
1.204
1.205 parse = parse_page
1.206
1.207 -# Top-level serialising functions.
1.208 -
1.209 def serialise(doc, serialiser=MoinSerialiser):
1.210 l = []
1.211 doc.to_string(serialiser(l.append))
2.1 --- a/tests/test_parser.py Wed Apr 26 13:57:10 2017 +0200
2.2 +++ b/tests/test_parser.py Wed Apr 26 17:32:08 2017 +0200
2.3 @@ -2,7 +2,19 @@
2.4
2.5 from moinformat import parse, serialise, HTMLSerialiser
2.6
2.7 -s = """\
2.8 +s0 = """\
2.9 +Hello
2.10 +{{{{#!wiki
2.11 +A region
2.12 +{{{
2.13 +Another
2.14 +}}}
2.15 +End
2.16 +}}}}
2.17 +XXX
2.18 +"""
2.19 +
2.20 +s1 = """\
2.21 Hello
2.22 {{{{#!xxx
2.23 A region
2.24 @@ -27,32 +39,24 @@
2.25 s3 = """\
2.26 Hello {{{world}}} again"""
2.27
2.28 -d = parse(s)
2.29 +d0 = parse(s0)
2.30 +d1 = parse(s1)
2.31 d2 = parse(s2)
2.32 d3 = parse(s3)
2.33
2.34 -ns = serialise(d)
2.35 +ns0 = serialise(d0)
2.36 +ns1 = serialise(d1)
2.37 ns2 = serialise(d2)
2.38 ns3 = serialise(d3)
2.39
2.40 -print ns == s
2.41 -print
2.42 -print ns
2.43 -print "----"
2.44 -print ns2 == s2
2.45 -print
2.46 -print ns2
2.47 -print "----"
2.48 -print ns3 == s3
2.49 -print
2.50 -print ns3
2.51 -print "----"
2.52 +for s, n in zip([s0, s1, s2, s3], [ns0, ns1, ns2, ns3]):
2.53 + print n == s
2.54 + print
2.55 + print n
2.56 + print "----"
2.57
2.58 -print serialise(d, HTMLSerialiser)
2.59 -print "----"
2.60 -print serialise(d2, HTMLSerialiser)
2.61 -print "----"
2.62 -print serialise(d3, HTMLSerialiser)
2.63 -print "----"
2.64 +for d in [d0, d1, d2, d3]:
2.65 + print serialise(d, HTMLSerialiser)
2.66 + print "----"
2.67
2.68 # vim: tabstop=4 expandtab shiftwidth=4