1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/pyparser/automata.py Sun Jan 08 20:20:39 2017 +0100
1.3 @@ -0,0 +1,120 @@
1.4 +# ______________________________________________________________________
1.5 +"""Module automata
1.6 +
1.7 +THIS FILE WAS COPIED FROM pypy/module/parser/pytokenize.py AND ADAPTED
1.8 +TO BE ANNOTABLE (Mainly made the DFA's __init__ accept two lists
1.9 +instead of a unique nested one)
1.10 +
1.11 +$Id: automata.py,v 1.2 2003/10/02 17:37:17 jriehl Exp $
1.12 +"""
1.13 +# ______________________________________________________________________
1.14 +# Module level definitions
1.15 +
1.16 +# PYPY Modification: removed the EMPTY class as it's not needed here
1.17 +
1.18 +
1.19 +# PYPY Modification: DEFAULT is a singleton, used only in the pre-RPython
1.20 +# dicts (see pytokenize.py). Then DFA.__init__() turns these dicts into
1.21 +# more compact strings.
1.22 +DEFAULT = object()
1.23 +
1.24 +# PYPY Modification : removed all automata functions (any, maybe,
1.25 +# newArcPair, etc.)
1.26 +
1.27 +ERROR_STATE = chr(255)
1.28 +
1.29 +class DFA:
1.30 + # ____________________________________________________________
1.31 + def __init__(self, states, accepts, start = 0):
1.32 + """ NOT_RPYTHON """
1.33 + assert len(states) < 255 # no support for huge amounts of states
1.34 + # construct string for looking up state transitions
1.35 + string_states = [] * len(states)
1.36 + # compute maximum
1.37 + maximum = 0
1.38 + for state in states:
1.39 + for key in state:
1.40 + if key == DEFAULT:
1.41 + continue
1.42 + maximum = max(ord(key), maximum)
1.43 + self.max_char = maximum + 1
1.44 +
1.45 + defaults = []
1.46 + for i, state in enumerate(states):
1.47 + default = ERROR_STATE
1.48 + if DEFAULT in state:
1.49 + default = chr(state[DEFAULT])
1.50 + defaults.append(default)
1.51 + string_state = [default] * self.max_char
1.52 + for key, value in state.iteritems():
1.53 + if key == DEFAULT:
1.54 + continue
1.55 + assert len(key) == 1
1.56 + assert ord(key) < self.max_char
1.57 + string_state[ord(key)] = chr(value)
1.58 + string_states.extend(string_state)
1.59 + self.states = "".join(string_states)
1.60 + self.defaults = "".join(defaults)
1.61 + self.accepts = accepts
1.62 + self.start = start
1.63 +
1.64 + # ____________________________________________________________
1.65 +
1.66 + def _next_state(self, item, crntState):
1.67 + if ord(item) >= self.max_char:
1.68 + return self.defaults[crntState]
1.69 + else:
1.70 + return self.states[crntState * self.max_char + ord(item)]
1.71 +
1.72 + def recognize(self, inVec, pos = 0):
1.73 + crntState = self.start
1.74 + lastAccept = False
1.75 + i = pos
1.76 + for i in range(pos, len(inVec)):
1.77 + item = inVec[i]
1.78 + accept = self.accepts[crntState]
1.79 + crntState = self._next_state(item, crntState)
1.80 + if crntState != ERROR_STATE:
1.81 + pass
1.82 + elif accept:
1.83 + return i
1.84 + elif lastAccept:
1.85 + # This is now needed b/c of exception cases where there are
1.86 + # transitions to dead states
1.87 + return i - 1
1.88 + else:
1.89 + return -1
1.90 + crntState = ord(crntState)
1.91 + lastAccept = accept
1.92 + # if self.states[crntState][1]:
1.93 + if self.accepts[crntState]:
1.94 + return i + 1
1.95 + elif lastAccept:
1.96 + return i
1.97 + else:
1.98 + return -1
1.99 +
1.100 +# ______________________________________________________________________
1.101 +
1.102 +class NonGreedyDFA (DFA):
1.103 +
1.104 + def recognize(self, inVec, pos = 0):
1.105 + crntState = self.start
1.106 + i = pos
1.107 + for i in range(pos, len(inVec)):
1.108 + item = inVec[i]
1.109 + accept = self.accepts[crntState]
1.110 + if accept:
1.111 + return i
1.112 + crntState = self._next_state(item, crntState)
1.113 + if crntState == ERROR_STATE:
1.114 + return -1
1.115 + crntState = ord(crntState)
1.116 + i += 1
1.117 + if self.accepts[crntState]:
1.118 + return i
1.119 + else:
1.120 + return -1
1.121 +
1.122 +# ______________________________________________________________________
1.123 +# End of automata.py