1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/pyparser/test/test_parser.py Sun Jan 08 20:20:39 2017 +0100
1.3 @@ -0,0 +1,293 @@
1.4 +# New parser tests.
1.5 +import py
1.6 +import tokenize
1.7 +import token
1.8 +import StringIO
1.9 +from pyparser import parser, metaparser, pygram
1.10 +from pyparser.test.test_metaparser import MyGrammar
1.11 +
1.12 +
1.13 +class SimpleParser(parser.Parser):
1.14 +
1.15 + def parse(self, input):
1.16 + self.prepare()
1.17 + rl = StringIO.StringIO(input + "\n").readline
1.18 + gen = tokenize.generate_tokens(rl)
1.19 + for tp, value, begin, end, line in gen:
1.20 + if self.add_token(tp, value, begin[0], begin[1], line):
1.21 + py.test.raises(StopIteration, gen.next)
1.22 + return self.root
1.23 +
1.24 +
1.25 +def tree_from_string(expected, gram):
1.26 + def count_indent(s):
1.27 + indent = 0
1.28 + for char in s:
1.29 + if char != " ":
1.30 + break
1.31 + indent += 1
1.32 + return indent
1.33 + last_newline_index = 0
1.34 + for i, char in enumerate(expected):
1.35 + if char == "\n":
1.36 + last_newline_index = i
1.37 + elif char != " ":
1.38 + break
1.39 + if last_newline_index:
1.40 + expected = expected[last_newline_index + 1:]
1.41 + base_indent = count_indent(expected)
1.42 + assert not divmod(base_indent, 4)[1], "not using 4 space indentation"
1.43 + lines = [line[base_indent:] for line in expected.splitlines()]
1.44 + last_indent = 0
1.45 + node_stack = []
1.46 + for line in lines:
1.47 + if not line.strip():
1.48 + continue
1.49 + data = line.split()
1.50 + if data[0].isupper():
1.51 + tp = getattr(token, data[0])
1.52 + if len(data) == 2:
1.53 + value = data[1].strip("\"")
1.54 + elif tp == token.NEWLINE:
1.55 + value = "\n"
1.56 + else:
1.57 + value = ""
1.58 + n = parser.Terminal(tp, value, 0, 0)
1.59 + else:
1.60 + tp = gram.symbol_ids[data[0]]
1.61 + children = []
1.62 + n = parser.Nonterminal(tp, children)
1.63 + new_indent = count_indent(line)
1.64 + if new_indent >= last_indent:
1.65 + if new_indent == last_indent and node_stack:
1.66 + node_stack.pop()
1.67 + if node_stack:
1.68 + node_stack[-1].append_child(n)
1.69 + node_stack.append(n)
1.70 + else:
1.71 + diff = last_indent - new_indent
1.72 + pop_nodes = diff // 4 + 1
1.73 + del node_stack[-pop_nodes:]
1.74 + node_stack[-1].append_child(n)
1.75 + node_stack.append(n)
1.76 + last_indent = new_indent
1.77 + return node_stack[0]
1.78 +
1.79 +
1.80 +class TestParser:
1.81 +
1.82 + def parser_for(self, gram, add_endmarker=True):
1.83 + if add_endmarker:
1.84 + gram += " NEWLINE ENDMARKER\n"
1.85 + pgen = metaparser.ParserGenerator(gram)
1.86 + g = pgen.build_grammar(MyGrammar)
1.87 + return SimpleParser(g), g
1.88 +
1.89 + def test_multiple_rules(self):
1.90 + gram = """foo: 'next_rule' bar 'end' NEWLINE ENDMARKER
1.91 +bar: NAME NUMBER\n"""
1.92 + p, gram = self.parser_for(gram, False)
1.93 + expected = """
1.94 + foo
1.95 + NAME "next_rule"
1.96 + bar
1.97 + NAME "a_name"
1.98 + NUMBER "42"
1.99 + NAME "end"
1.100 + NEWLINE
1.101 + ENDMARKER"""
1.102 + input = "next_rule a_name 42 end"
1.103 + assert tree_from_string(expected, gram) == p.parse(input)
1.104 +
1.105 + def test_recursive_rule(self):
1.106 + gram = """foo: NAME bar STRING NEWLINE ENDMARKER
1.107 +bar: NAME [bar] NUMBER\n"""
1.108 + p, gram = self.parser_for(gram, False)
1.109 + expected = """
1.110 + foo
1.111 + NAME "hi"
1.112 + bar
1.113 + NAME "hello"
1.114 + bar
1.115 + NAME "a_name"
1.116 + NUMBER "32"
1.117 + NUMBER "42"
1.118 + STRING "'string'"
1.119 + NEWLINE
1.120 + ENDMARKER"""
1.121 + input = "hi hello a_name 32 42 'string'"
1.122 + assert tree_from_string(expected, gram) == p.parse(input)
1.123 +
1.124 + def test_symbol(self):
1.125 + gram = """parent: first_child second_child NEWLINE ENDMARKER
1.126 +first_child: NAME age
1.127 +second_child: STRING
1.128 +age: NUMBER\n"""
1.129 + p, gram = self.parser_for(gram, False)
1.130 + expected = """
1.131 + parent
1.132 + first_child
1.133 + NAME "harry"
1.134 + age
1.135 + NUMBER "13"
1.136 + second_child
1.137 + STRING "'fred'"
1.138 + NEWLINE
1.139 + ENDMARKER"""
1.140 + input = "harry 13 'fred'"
1.141 + assert tree_from_string(expected, gram) == p.parse(input)
1.142 +
1.143 + def test_token(self):
1.144 + p, gram = self.parser_for("foo: NAME")
1.145 + expected = """
1.146 + foo
1.147 + NAME "hi"
1.148 + NEWLINE
1.149 + ENDMARKER"""
1.150 + assert tree_from_string(expected, gram) == p.parse("hi")
1.151 + py.test.raises(parser.ParseError, p.parse, "567")
1.152 + p, gram = self.parser_for("foo: NUMBER NAME STRING")
1.153 + expected = """
1.154 + foo
1.155 + NUMBER "42"
1.156 + NAME "hi"
1.157 + STRING "'bar'"
1.158 + NEWLINE
1.159 + ENDMARKER"""
1.160 + assert tree_from_string(expected, gram) == p.parse("42 hi 'bar'")
1.161 +
1.162 + def test_optional(self):
1.163 + p, gram = self.parser_for("foo: [NAME] 'end'")
1.164 + expected = """
1.165 + foo
1.166 + NAME "hi"
1.167 + NAME "end"
1.168 + NEWLINE
1.169 + ENDMARKER"""
1.170 + assert tree_from_string(expected, gram) == p.parse("hi end")
1.171 + expected = """
1.172 + foo
1.173 + NAME "end"
1.174 + NEWLINE
1.175 + ENDMARKER"""
1.176 + assert tree_from_string(expected, gram) == p.parse("end")
1.177 +
1.178 + def test_grouping(self):
1.179 + p, gram = self.parser_for(
1.180 + "foo: ((NUMBER NAME | STRING) | 'second_option')")
1.181 + expected = """
1.182 + foo
1.183 + NUMBER "42"
1.184 + NAME "hi"
1.185 + NEWLINE
1.186 + ENDMARKER"""
1.187 + assert tree_from_string(expected, gram) == p.parse("42 hi")
1.188 + expected = """
1.189 + foo
1.190 + STRING "'hi'"
1.191 + NEWLINE
1.192 + ENDMARKER"""
1.193 + assert tree_from_string(expected, gram) == p.parse("'hi'")
1.194 + expected = """
1.195 + foo
1.196 + NAME "second_option"
1.197 + NEWLINE
1.198 + ENDMARKER"""
1.199 + assert tree_from_string(expected, gram) == p.parse("second_option")
1.200 + py.test.raises(parser.ParseError, p.parse, "42 a_name 'hi'")
1.201 + py.test.raises(parser.ParseError, p.parse, "42 second_option")
1.202 +
1.203 + def test_alternative(self):
1.204 + p, gram = self.parser_for("foo: (NAME | NUMBER)")
1.205 + expected = """
1.206 + foo
1.207 + NAME "hi"
1.208 + NEWLINE
1.209 + ENDMARKER"""
1.210 + assert tree_from_string(expected, gram) == p.parse("hi")
1.211 + expected = """
1.212 + foo
1.213 + NUMBER "42"
1.214 + NEWLINE
1.215 + ENDMARKER"""
1.216 + assert tree_from_string(expected, gram) == p.parse("42")
1.217 + py.test.raises(parser.ParseError, p.parse, "hi 23")
1.218 + py.test.raises(parser.ParseError, p.parse, "23 hi")
1.219 + py.test.raises(parser.ParseError, p.parse, "'some string'")
1.220 +
1.221 + def test_keyword(self):
1.222 + p, gram = self.parser_for("foo: 'key'")
1.223 + expected = """
1.224 + foo
1.225 + NAME "key"
1.226 + NEWLINE
1.227 + ENDMARKER"""
1.228 + assert tree_from_string(expected, gram) == p.parse("key")
1.229 + py.test.raises(parser.ParseError, p.parse, "")
1.230 + p, gram = self.parser_for("foo: NAME 'key'")
1.231 + expected = """
1.232 + foo
1.233 + NAME "some_name"
1.234 + NAME "key"
1.235 + NEWLINE
1.236 + ENDMARKER"""
1.237 + assert tree_from_string(expected, gram) == p.parse("some_name key")
1.238 + py.test.raises(parser.ParseError, p.parse, "some_name")
1.239 +
1.240 + def test_repeaters(self):
1.241 + p, gram = self.parser_for("foo: NAME+ 'end'")
1.242 + expected = """
1.243 + foo
1.244 + NAME "hi"
1.245 + NAME "bye"
1.246 + NAME "nothing"
1.247 + NAME "end"
1.248 + NEWLINE
1.249 + ENDMARKER"""
1.250 + assert tree_from_string(expected, gram) == p.parse("hi bye nothing end")
1.251 + py.test.raises(parser.ParseError, p.parse, "end")
1.252 + py.test.raises(parser.ParseError, p.parse, "hi bye")
1.253 + p, gram = self.parser_for("foo: NAME* 'end'")
1.254 + expected = """
1.255 + foo
1.256 + NAME "hi"
1.257 + NAME "bye"
1.258 + NAME "end"
1.259 + NEWLINE
1.260 + ENDMARKER"""
1.261 + assert tree_from_string(expected, gram) == p.parse("hi bye end")
1.262 + py.test.raises(parser.ParseError, p.parse, "hi bye")
1.263 + expected = """
1.264 + foo
1.265 + NAME "end"
1.266 + NEWLINE
1.267 + ENDMARKER"""
1.268 + assert tree_from_string(expected, gram) == p.parse("end")
1.269 +
1.270 + p, gram = self.parser_for("foo: (NAME | NUMBER)+ 'end'")
1.271 + expected = """
1.272 + foo
1.273 + NAME "a_name"
1.274 + NAME "name_two"
1.275 + NAME "end"
1.276 + NEWLINE
1.277 + ENDMARKER"""
1.278 + assert tree_from_string(expected, gram) == p.parse("a_name name_two end")
1.279 + expected = """
1.280 + foo
1.281 + NUMBER "42"
1.282 + NAME "name"
1.283 + NAME "end"
1.284 + NEWLINE
1.285 + ENDMARKER"""
1.286 + assert tree_from_string(expected, gram) == p.parse("42 name end")
1.287 + py.test.raises(parser.ParseError, p.parse, "end")
1.288 + p, gram = self.parser_for("foo: (NAME | NUMBER)* 'end'")
1.289 + expected = """
1.290 + foo
1.291 + NAME "hi"
1.292 + NUMBER 42
1.293 + NAME "end"
1.294 + NEWLINE
1.295 + ENDMARKER"""
1.296 + assert tree_from_string(expected, gram) == p.parse("hi 42 end")