Lichen

pyparser/automata.py

1027:dd0745ab8b8a
5 months ago Paul Boddie Reordered GCC arguments to prevent linking failures. Someone decided to change the GCC invocation or linking semantics at some point, meaning that libraries specified "too early" in the argument list no longer provide the symbols required by the program objects, whereas specifying them at the end of the argument list allows those symbols to be found and obtained.
     1 # ______________________________________________________________________     2 """Module automata     3      4 THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED     5 TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists     6 instead of a unique nested one)     7      8 $Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $     9 """    10 # ______________________________________________________________________    11 # Module level definitions    12     13 # PYPY Modification: removed the EMPTY class as it's not needed here    14     15     16 # PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython    17 # dicts (see pytokenize.py).  Then DFA.__init__() turns these dicts into    18 # more compact strings.    19 DEFAULT = object()    20     21 # PYPY Modification : removed all automata functions (any, maybe,    22 #                     newArcPair, etc.)    23     24 ERROR_STATE = chr(255)    25     26 class DFA:    27     # ____________________________________________________________    28     def __init__(self, states, accepts, start = 0):    29         """ NOT_RPYTHON """    30         assert len(states) < 255 # no support for huge amounts of states    31         # construct string for looking up state transitions    32         string_states = [] * len(states)    33         # compute maximum    34         maximum = 0    35         for state in states:    36             for key in state:    37                 if key == DEFAULT:    38                     continue    39                 maximum = max(ord(key), maximum)    40         self.max_char = maximum + 1    41     42         defaults = []    43         for i, state in enumerate(states):    44             default = ERROR_STATE    45             if DEFAULT in state:    46                 default = chr(state[DEFAULT])    47             defaults.append(default)    48             string_state = [default] * self.max_char    49             for key, value in state.iteritems():    50                 if key == DEFAULT:    51                     continue    52                 assert len(key) == 1    53                 assert ord(key) < self.max_char    54                 string_state[ord(key)] = chr(value)    55             string_states.extend(string_state)    56         self.states = "".join(string_states)    57         self.defaults = "".join(defaults)    58         self.accepts = accepts    59         self.start = start    60     61     # ____________________________________________________________    62     63     def _next_state(self, item, crntState):    64         if ord(item) >= self.max_char:    65             return self.defaults[crntState]    66         else:    67             return self.states[crntState * self.max_char + ord(item)]    68     69     def recognize(self, inVec, pos = 0):    70         crntState = self.start    71         lastAccept = False    72         i = pos    73         for i in range(pos, len(inVec)):    74             item = inVec[i]    75             accept = self.accepts[crntState]    76             crntState = self._next_state(item, crntState)    77             if crntState != ERROR_STATE:    78                 pass    79             elif accept:    80                 return i    81             elif lastAccept:    82                 # This is now needed b/c of exception cases where there are    83                 # transitions to dead states    84                 return i - 1    85             else:    86                 return -1    87             crntState = ord(crntState)    88             lastAccept = accept    89         # if self.states[crntState][1]:    90         if self.accepts[crntState]:    91             return i + 1    92         elif lastAccept:    93             return i    94         else:    95             return -1    96     97 # ______________________________________________________________________    98     99 class NonGreedyDFA (DFA):   100    101     def recognize(self, inVec, pos = 0):   102         crntState = self.start   103         i = pos   104         for i in range(pos, len(inVec)):   105             item = inVec[i]   106             accept = self.accepts[crntState]   107             if accept:   108                 return i   109             crntState = self._next_state(item, crntState)   110             if crntState == ERROR_STATE:   111                 return -1   112             crntState = ord(crntState)   113             i += 1   114         if self.accepts[crntState]:   115             return i   116         else:   117             return -1   118    119 # ______________________________________________________________________   120 # End of automata.py