Lichen

pyparser/parser.py

1027:dd0745ab8b8a
5 months ago Paul Boddie Reordered GCC arguments to prevent linking failures. Someone decided to change the GCC invocation or linking semantics at some point, meaning that libraries specified "too early" in the argument list no longer provide the symbols required by the program objects, whereas specifying them at the end of the argument list allows those symbols to be found and obtained.
     1 """     2 A CPython inspired RPython parser.     3 """     4      5      6 class Grammar(object):     7     """     8     Base Grammar object.     9     10     Pass this to ParserGenerator.build_grammar to fill it with useful values for    11     the Parser.    12     """    13     14     def __init__(self):    15         self.symbol_ids = {}    16         self.symbol_names = {}    17         self.symbol_to_label = {}    18         self.keyword_ids = {}    19         self.dfas = []    20         self.labels = [0]    21         self.token_ids = {}    22         self.start = -1    23     24     def shared_copy(self):    25         new = self.__class__()    26         new.symbol_ids = self.symbol_ids    27         new.symbols_names = self.symbol_names    28         new.keyword_ids = self.keyword_ids    29         new.dfas = self.dfas    30         new.labels = self.labels    31         new.token_ids = self.token_ids    32         return new    33     34     def _freeze_(self):    35         # Remove some attributes not used in parsing.    36         try:    37             del self.symbol_to_label    38             del self.symbol_names    39             del self.symbol_ids    40         except AttributeError:    41             pass    42         return True    43     44     45 class Node(object):    46     47     __slots__ = ("type", )    48     49     def __init__(self, type):    50         self.type = type    51     52     def __eq__(self, other):    53         raise NotImplementedError("abstract base class")    54     55     def __ne__(self, other):    56         return not self == other    57     58     def get_value(self):    59         return None    60     61     def get_child(self, i):    62         raise NotImplementedError("abstract base class")    63     64     def num_children(self):    65         return 0    66     67     def append_child(self, child):    68         raise NotImplementedError("abstract base class")    69     70     def get_lineno(self):    71         raise NotImplementedError("abstract base class")    72     73     def get_column(self):    74         raise NotImplementedError("abstract base class")    75     76     77 class Terminal(Node):    78     __slots__ = ("value", "lineno", "column")    79     def __init__(self, type, value, lineno, column):    80         Node.__init__(self, type)    81         self.value = value    82         self.lineno = lineno    83         self.column = column    84     85     def __repr__(self):    86         return "Terminal(type=%s, value=%r)" % (self.type, self.value)    87     88     def __eq__(self, other):    89         # For tests.    90         return (type(self) == type(other) and    91                 self.type == other.type and    92                 self.value == other.value)    93     94     def get_value(self):    95         return self.value    96     97     def get_lineno(self):    98         return self.lineno    99    100     def get_column(self):   101         return self.column   102    103    104 class AbstractNonterminal(Node):   105     __slots__ = ()   106    107     def get_lineno(self):   108         return self.get_child(0).get_lineno()   109    110     def get_column(self):   111         return self.get_child(0).get_column()   112    113     def __eq__(self, other):   114         # For tests.   115         # grumble, annoying   116         if not isinstance(other, AbstractNonterminal):   117             return False   118         if self.type != other.type:   119             return False   120         if self.num_children() != other.num_children():   121             return False   122         for i in range(self.num_children()):   123             if self.get_child(i) != other.get_child(i):   124                 return False   125         return True   126    127    128 class Nonterminal(AbstractNonterminal):   129     __slots__ = ("_children", )   130     def __init__(self, type, children):   131         Node.__init__(self, type)   132         self._children = children   133    134     def __repr__(self):   135         return "Nonterminal(type=%s, children=%r)" % (self.type, self._children)   136    137     def get_child(self, i):   138         return self._children[i]   139    140     def num_children(self):   141         return len(self._children)   142    143     def append_child(self, child):   144         self._children.append(child)   145    146    147 class Nonterminal1(AbstractNonterminal):   148     __slots__ = ("_child", )   149     def __init__(self, type, child):   150         Node.__init__(self, type)   151         self._child = child   152    153     def __repr__(self):   154         return "Nonterminal(type=%s, children=[%r])" % (self.type, self._child)   155    156     def get_child(self, i):   157         assert i == 0 or i == -1   158         return self._child   159    160     def num_children(self):   161         return 1   162    163     def append_child(self, child):   164         assert 0, "should be unreachable"   165    166    167 class NonterminalEnc(Nonterminal1):   168     def __init__(self, type, child, encoding):   169         Nonterminal1.__init__(self, type, child)   170         self.encoding = encoding   171    172     def __repr__(self):   173         return "NonterminalEnc(type=%s, child=%r, encoding=%r)" % (self.type, self._child, self.encoding)   174    175    176 class ParseError(Exception):   177    178     def __init__(self, msg, token_type, value, lineno, column, line,   179                  expected=-1):   180         self.msg = msg   181         self.token_type = token_type   182         self.value = value   183         self.lineno = lineno   184         self.column = column   185         self.line = line   186         self.expected = expected   187    188     def __str__(self):   189         return "ParserError(%s, %r)" % (self.token_type, self.value)   190    191    192 class Parser(object):   193    194     def __init__(self, grammar):   195         self.grammar = grammar   196         self.root = None   197         self.stack = None   198    199     def prepare(self, start=-1):   200         """Setup the parser for parsing.   201    202         Takes the starting symbol as an argument.   203         """   204         if start == -1:   205             start = self.grammar.start   206         self.root = None   207         current_node = Nonterminal(start, [])   208         self.stack = []   209         self.stack.append((self.grammar.dfas[start - 256], 0, current_node))   210    211     def add_token(self, token_type, value, lineno, column, line):   212         label_index = self.classify(token_type, value, lineno, column, line)   213         sym_id = 0 # for the annotator   214         while True:   215             dfa, state_index, node = self.stack[-1]   216             states, first = dfa   217             arcs, is_accepting = states[state_index]   218             for i, next_state in arcs:   219                 sym_id = self.grammar.labels[i]   220                 if label_index == i:   221                     # We matched a non-terminal.   222                     self.shift(next_state, token_type, value, lineno, column)   223                     state = states[next_state]   224                     # While the only possible action is to accept, pop nodes off   225                     # the stack.   226                     while state[1] and not state[0]:   227                         self.pop()   228                         if not self.stack:   229                             # Parsing is done.   230                             return True   231                         dfa, state_index, node = self.stack[-1]   232                         state = dfa[0][state_index]   233                     return False   234                 elif sym_id >= 256:   235                     sub_node_dfa = self.grammar.dfas[sym_id - 256]   236                     # Check if this token can start a child node.   237                     if label_index in sub_node_dfa[1]:   238                         self.push(sub_node_dfa, next_state, sym_id, lineno,   239                                   column)   240                         break   241             else:   242                 # We failed to find any arcs to another state, so unless this   243                 # state is accepting, it's invalid input.   244                 if is_accepting:   245                     self.pop()   246                     if not self.stack:   247                         raise ParseError("too much input", token_type, value,   248                                          lineno, column, line)   249                 else:   250                     # If only one possible input would satisfy, attach it to the   251                     # error.   252                     if len(arcs) == 1:   253                         expected = sym_id   254                     else:   255                         expected = -1   256                     raise ParseError("bad input", token_type, value, lineno,   257                                      column, line, expected)   258    259     def classify(self, token_type, value, lineno, column, line):   260         """Find the label for a token."""   261         if token_type == self.grammar.KEYWORD_TOKEN:   262             label_index = self.grammar.keyword_ids.get(value, -1)   263             if label_index != -1:   264                 return label_index   265         label_index = self.grammar.token_ids.get(token_type, -1)   266         if label_index == -1:   267             raise ParseError("invalid token", token_type, value, lineno, column,   268                              line)   269         return label_index   270    271     def shift(self, next_state, token_type, value, lineno, column):   272         """Shift a non-terminal and prepare for the next state."""   273         dfa, state, node = self.stack[-1]   274         new_node = Terminal(token_type, value, lineno, column)   275         node.append_child(new_node)   276         self.stack[-1] = (dfa, next_state, node)   277    278     def push(self, next_dfa, next_state, node_type, lineno, column):   279         """Push a terminal and adjust the current state."""   280         dfa, state, node = self.stack[-1]   281         new_node = Nonterminal(node_type, [])   282         self.stack[-1] = (dfa, next_state, node)   283         self.stack.append((next_dfa, 0, new_node))   284    285     def pop(self):   286         """Pop an entry off the stack and make its node a child of the last."""   287         dfa, state, node = self.stack.pop()   288         if self.stack:   289             # we are now done with node, so we can store it more efficiently if   290             # it has just one child   291             if node.num_children() == 1:   292                 node = Nonterminal1(node.type, node.get_child(0))   293             self.stack[-1][2].append_child(node)   294         else:   295             self.root = node