Lichen

pyparser/pylexer.py

1027:dd0745ab8b8a
5 months ago Paul Boddie Reordered GCC arguments to prevent linking failures. Someone decided to change the GCC invocation or linking semantics at some point, meaning that libraries specified "too early" in the argument list no longer provide the symbols required by the program objects, whereas specifying them at the end of the argument list allows those symbols to be found and obtained.
     1 # Used by genpytokenize.py to generate the parser in pytokenize.py     2 from pyparser.automata import DFA, DEFAULT     3      4 class EMPTY: pass     5      6 def newArcPair (states, transitionLabel):     7     s1Index = len(states)     8     s2Index = s1Index + 1     9     states.append([(transitionLabel, s2Index)])    10     states.append([])    11     return s1Index, s2Index    12     13 # ______________________________________________________________________    14     15 def chain (states, *stateIndexPairs):    16     if len(stateIndexPairs) > 1:    17         start, lastFinish = stateIndexPairs[0]    18         for nStart, nFinish in stateIndexPairs[1:]:    19             states[lastFinish].append((EMPTY, nStart))    20             lastFinish = nFinish    21         return start, nFinish    22     else:    23         return stateIndexPairs[0]    24     25     26 # ______________________________________________________________________    27     28 def chainStr (states, str):    29     return chain(states, *map(lambda x : newArcPair(states, x), str))    30     31 # ______________________________________________________________________    32     33 def notChainStr (states, str):    34     """XXX I'm not sure this is how it should be done, but I'm going to    35     try it anyway.  Note that for this case, I require only single character    36     arcs, since I would have to basically invert all accepting states and    37     non-accepting states of any sub-NFA's.    38     """    39     assert len(str) > 0    40     arcs = map(lambda x : newArcPair(states, x), str)    41     finish = len(states)    42     states.append([])    43     start, lastFinish = arcs[0]    44     states[start].append((EMPTY, finish))    45     for crntStart, crntFinish in arcs[1:]:    46         states[lastFinish].append((EMPTY, crntStart))    47         states[crntStart].append((EMPTY, finish))    48     return start, finish    49     50 # ______________________________________________________________________    51     52 def group (states, *stateIndexPairs):    53     if len(stateIndexPairs) > 1:    54         start = len(states)    55         finish = start + 1    56         startList = []    57         states.append(startList)    58         states.append([])    59         for eStart, eFinish in stateIndexPairs:    60             startList.append((EMPTY, eStart))    61             states[eFinish].append((EMPTY, finish))    62         return start, finish    63     else:    64         return stateIndexPairs[0]    65     66 # ______________________________________________________________________    67     68 def groupStr (states, str):    69     return group(states, *map(lambda x : newArcPair(states, x), str))    70     71 # ______________________________________________________________________    72     73 def notGroup (states, *stateIndexPairs):    74     """Like group, but will add a DEFAULT transition to a new end state,    75     causing anything in the group to not match by going to a dead state.    76     XXX I think this is right...    77     """    78     start, dead = group(states, *stateIndexPairs)    79     finish = len(states)    80     states.append([])    81     states[start].append((DEFAULT, finish))    82     return start, finish    83     84 # ______________________________________________________________________    85     86 def notGroupStr (states, str):    87     return notGroup(states, *map(lambda x : newArcPair(states, x), str))    88 # ______________________________________________________________________    89     90 def any (states, *stateIndexPairs):    91     start, finish = group(states, *stateIndexPairs)    92     states[finish].append((EMPTY, start))    93     return start, start    94     95 # ______________________________________________________________________    96     97 def maybe (states, *stateIndexPairs):    98     start, finish = group(states, *stateIndexPairs)    99     states[start].append((EMPTY, finish))   100     return start, finish   101    102 # ______________________________________________________________________   103    104 def atleastonce (states, *stateIndexPairs):   105     start, finish = group(states, *stateIndexPairs)   106     states[finish].append((EMPTY, start))   107     return start, finish   108    109 # ______________________________________________________________________   110    111 def closure (states, start, result = 0L):   112     if None == result:   113         result = 0L   114     if 0 == (result & (1L << start)):   115         result |= (1L << start)   116         for label, arrow in states[start]:   117             if label == EMPTY:   118                 result |= closure(states, arrow, result)   119     return result   120    121 # ______________________________________________________________________   122    123 def nfaToDfa (states, start, finish):   124     tempStates = []   125     startClosure = closure(states, start)   126     crntTempState = [startClosure, [], 0 != (startClosure & (1L << finish))]   127     tempStates.append(crntTempState)   128     index = 0   129     while index < len(tempStates):   130         crntTempState = tempStates[index]   131         crntClosure, crntArcs, crntAccept = crntTempState   132         for index2 in range(0, len(states)):   133             if 0 != (crntClosure & (1L << index2)):   134                 for label, nfaArrow in states[index2]:   135                     if label == EMPTY:   136                         continue   137                     foundTempArc = False   138                     for tempArc in crntArcs:   139                         if tempArc[0] == label:   140                             foundTempArc = True   141                             break   142                     if not foundTempArc:   143                         tempArc = [label, -1, 0L]   144                         crntArcs.append(tempArc)   145                     tempArc[2] = closure(states, nfaArrow, tempArc[2])   146         for arcIndex in range(0, len(crntArcs)):   147             label, arrow, targetStates = crntArcs[arcIndex]   148             targetFound = False   149             arrow = 0   150             for destTempState in tempStates:   151                 if destTempState[0] == targetStates:   152                     targetFound = True   153                     break   154                 arrow += 1   155             if not targetFound:   156                 assert arrow == len(tempStates)   157                 newState = [targetStates, [], 0 != (targetStates &   158                                                     (1L << finish))]   159                 tempStates.append(newState)   160             crntArcs[arcIndex][1] = arrow   161         index += 1   162     tempStates = simplifyTempDfa(tempStates)   163     states = finalizeTempDfa(tempStates)   164     return states   165    166 # ______________________________________________________________________   167    168 def sameState (s1, s2):   169     """sameState(s1, s2)   170     Note:   171     state := [ nfaclosure : Long, [ arc ], accept : Boolean ]   172     arc := [ label, arrow : Int, nfaClosure : Long ]   173     """   174     if (len(s1[1]) != len(s2[1])) or (s1[2] != s2[2]):   175         return False   176     for arcIndex in range(0, len(s1[1])):   177         arc1 = s1[1][arcIndex]   178         arc2 = s2[1][arcIndex]   179         if arc1[:-1] != arc2[:-1]:   180             return False   181     return True   182    183 # ______________________________________________________________________   184    185 def simplifyTempDfa (tempStates):   186     """simplifyTempDfa (tempStates)   187     """   188     changes = True   189     deletedStates = []   190     while changes:   191         changes = False   192         for i in range(1, len(tempStates)):   193             if i in deletedStates:   194                 continue   195             for j in range(0, i):   196                 if j in deletedStates:   197                     continue   198                 if sameState(tempStates[i], tempStates[j]):   199                     deletedStates.append(i)   200                     for k in range(0, len(tempStates)):   201                         if k in deletedStates:   202                             continue   203                         for arc in tempStates[k][1]:   204                             if arc[1] == i:   205                                 arc[1] = j   206                     changes = True   207                     break   208     for stateIndex in deletedStates:   209         tempStates[stateIndex] = None   210     return tempStates   211 # ______________________________________________________________________   212    213 def finalizeTempDfa (tempStates):   214     """finalizeTempDfa (tempStates)   215        216     Input domain:   217     tempState := [ nfaClosure : Long, [ tempArc ], accept : Boolean ]   218     tempArc := [ label, arrow, nfaClosure ]   219    220     Output domain:   221     state := [ arcMap, accept : Boolean ]   222     """   223     states = []   224     accepts = []   225     stateMap = {}   226     tempIndex = 0   227     for tempIndex in range(0, len(tempStates)):   228         tempState = tempStates[tempIndex]   229         if None != tempState:   230             stateMap[tempIndex] = len(states)   231             states.append({})   232             accepts.append(tempState[2])   233     for tempIndex in stateMap.keys():   234         stateBitset, tempArcs, accepting = tempStates[tempIndex]   235         newIndex = stateMap[tempIndex]   236         arcMap = states[newIndex]   237         for tempArc in tempArcs:   238             arcMap[tempArc[0]] = stateMap[tempArc[1]]   239     return states, accepts   240