1.1 --- a/iixr/phrases.py Tue Jan 25 01:55:43 2011 +0100
1.2 +++ b/iixr/phrases.py Fri Jan 28 01:36:25 2011 +0100
1.3 @@ -4,7 +4,7 @@
1.4 Phrase iterators providing navigation over common positions for a number of
1.5 different terms.
1.6
1.7 -Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2009, 2010, 2011 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This program is free software; you can redistribute it and/or modify it under
1.11 the terms of the GNU General Public License as published by the Free Software
1.12 @@ -140,45 +140,75 @@
1.13 if self.iters:
1.14 while 1:
1.15 current, first_token, next = self.iters[0]
1.16 - values = [current]
1.17 - last = current
1.18 - last_token = first_token
1.19 +
1.20 + # Only examine the sequence if the first iterator is in the right
1.21 + # position.
1.22 +
1.23 + if first_token == 0:
1.24 + values = [current]
1.25 + last = current
1.26 + last_token = first_token
1.27 +
1.28 + # Find a sequence of positions providing a phrase.
1.29 +
1.30 + expected_token = 1
1.31
1.32 - # Find a sequence of positions providing a phrase.
1.33 + for current, current_token, _next in self.iters[1:]:
1.34 +
1.35 + # If an iterator appears out of sequence, request a new
1.36 + # result from it. Likewise, if an iterator produces a
1.37 + # position equal to the last, another result is
1.38 + # required.
1.39 +
1.40 + if current_token != expected_token or \
1.41 + self.sub_positions(current, last) == 0:
1.42 +
1.43 + del self.iters[expected_token]
1.44 + self._add_next(_next, current_token)
1.45 + break
1.46
1.47 - for current, current_token, _next in self.iters[1:]:
1.48 - if not self.is_phrase_position(last, last_token, current, current_token):
1.49 - break
1.50 - values.append(current)
1.51 - last = current
1.52 - last_token = current_token
1.53 + # If the current position is more than one place after
1.54 + # the last, reset the sequence.
1.55 +
1.56 + if self.sub_positions(current, last) > 1:
1.57 + del self.iters[0]
1.58 + self._add_next(next, first_token)
1.59 + break
1.60 +
1.61 + values.append(current)
1.62 + last = current
1.63 + last_token = current_token
1.64 +
1.65 + expected_token += 1
1.66 +
1.67 + else:
1.68 + del self.iters[0]
1.69 +
1.70 + # Handle future end of iteration.
1.71 +
1.72 + try:
1.73 + self._add_next(next, first_token)
1.74 + except StopIteration:
1.75 + self.iters = []
1.76 +
1.77 + return values
1.78 +
1.79 + # Try to get a first iterator with the right position.
1.80 +
1.81 else:
1.82 del self.iters[0]
1.83 -
1.84 - # Handle future end of iteration.
1.85 -
1.86 - try:
1.87 - self._add_next(next, first_token)
1.88 - except StopIteration:
1.89 - self.iters = []
1.90 -
1.91 - return values
1.92 -
1.93 - del self.iters[0]
1.94 - self._add_next(next, first_token)
1.95 + self._add_next(next, first_token)
1.96 else:
1.97 raise StopIteration
1.98
1.99 - def is_phrase_position(self, last, last_token, current, current_token):
1.100 - if current_token <= last_token:
1.101 - return 0
1.102 + def sub_positions(self, a, b):
1.103
1.104 # NOTE: For position sequences, assume that the first value is the token
1.105 # NOTE: index/position.
1.106
1.107 - if isinstance(last, (list, tuple)):
1.108 - return current[0] - last[0] == 1
1.109 + if isinstance(a, (list, tuple)):
1.110 + return a[0] - b[0]
1.111 else:
1.112 - return current - last == 1
1.113 + return a - b
1.114
1.115 # vim: tabstop=4 expandtab shiftwidth=4