# HG changeset patch # User Paul Boddie # Date 1296174985 -3600 # Node ID b50ba4291c5c6e1e40590f1860a97b1b7f6dd8f3 # Parent 34f535fe8cb08a727d4edfded47eddc670cbd405 Made the read_sequence method simpler to follow and perhaps slightly more efficient. Fixed the PhraseFilter to handle out-of-sequence tokens properly as well as iterators for different tokens contributing identical positions. diff -r 34f535fe8cb0 -r b50ba4291c5c iixr/files.py --- a/iixr/files.py Tue Jan 25 01:55:43 2011 +0100 +++ b/iixr/files.py Fri Jan 28 01:36:25 2011 +0100 @@ -199,15 +199,31 @@ v_in = self.read_number() value.append(v + v_in) else: - expect_delta = 1 - for v in last: + i = 0 + n = len(last) + value = list(last) + + # Traverse a copy of the last value. + + while i < n: v_in = self.read_number() - if expect_delta: - value.append(v + v_in) - if v_in != 0: - expect_delta = 0 - else: - value.append(v_in - 1) + + # While zeros are read, retain the last value elements. + # Otherwise, add the delta... + + if v_in != 0: + value[i] += v_in + i += 1 + + # Then set absolute values for the remaining elements. + + while i < n: + value[i] = self.read_number() - 1 + i += 1 + break + + i += 1 + return tuple(value) else: return last + self.read_number() diff -r 34f535fe8cb0 -r b50ba4291c5c iixr/phrases.py --- a/iixr/phrases.py Tue Jan 25 01:55:43 2011 +0100 +++ b/iixr/phrases.py Fri Jan 28 01:36:25 2011 +0100 @@ -4,7 +4,7 @@ Phrase iterators providing navigation over common positions for a number of different terms. -Copyright (C) 2009, 2010 Paul Boddie +Copyright (C) 2009, 2010, 2011 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -140,45 +140,75 @@ if self.iters: while 1: current, first_token, next = self.iters[0] - values = [current] - last = current - last_token = first_token + + # Only examine the sequence if the first iterator is in the right + # position. + + if first_token == 0: + values = [current] + last = current + last_token = first_token + + # Find a sequence of positions providing a phrase. + + expected_token = 1 - # Find a sequence of positions providing a phrase. + for current, current_token, _next in self.iters[1:]: + + # If an iterator appears out of sequence, request a new + # result from it. Likewise, if an iterator produces a + # position equal to the last, another result is + # required. + + if current_token != expected_token or \ + self.sub_positions(current, last) == 0: + + del self.iters[expected_token] + self._add_next(_next, current_token) + break - for current, current_token, _next in self.iters[1:]: - if not self.is_phrase_position(last, last_token, current, current_token): - break - values.append(current) - last = current - last_token = current_token + # If the current position is more than one place after + # the last, reset the sequence. + + if self.sub_positions(current, last) > 1: + del self.iters[0] + self._add_next(next, first_token) + break + + values.append(current) + last = current + last_token = current_token + + expected_token += 1 + + else: + del self.iters[0] + + # Handle future end of iteration. + + try: + self._add_next(next, first_token) + except StopIteration: + self.iters = [] + + return values + + # Try to get a first iterator with the right position. + else: del self.iters[0] - - # Handle future end of iteration. - - try: - self._add_next(next, first_token) - except StopIteration: - self.iters = [] - - return values - - del self.iters[0] - self._add_next(next, first_token) + self._add_next(next, first_token) else: raise StopIteration - def is_phrase_position(self, last, last_token, current, current_token): - if current_token <= last_token: - return 0 + def sub_positions(self, a, b): # NOTE: For position sequences, assume that the first value is the token # NOTE: index/position. - if isinstance(last, (list, tuple)): - return current[0] - last[0] == 1 + if isinstance(a, (list, tuple)): + return a[0] - b[0] else: - return current - last == 1 + return a - b # vim: tabstop=4 expandtab shiftwidth=4