1.1 --- a/iixr/terms.py Mon Feb 14 01:53:57 2011 +0100
1.2 +++ b/iixr/terms.py Mon Feb 14 03:06:16 2011 +0100
1.3 @@ -323,22 +323,41 @@
1.4
1.5 "Common iterator support."
1.6
1.7 + def __init__(self):
1.8 +
1.9 + "Cache the last term and positions."
1.10 +
1.11 + self.last_term_returned = None
1.12 + self.last_positions_returned = None
1.13 +
1.14 def go_to_term(self, term):
1.15 + if term == self.last_term_returned:
1.16 + return self.last_term_returned, self.last_positions_returned
1.17 +
1.18 t, dp = self.next()
1.19 while t < term:
1.20 t, dp = self.next()
1.21 + self.last_term_returned, self.last_positions_returned = t, dp
1.22 return t, dp
1.23
1.24 def __iter__(self):
1.25 return self
1.26
1.27 + def next(self):
1.28 + self.last_term_returned, self.last_positions_returned = t = self._next()
1.29 + return t
1.30 +
1.31 # External reading classes.
1.32
1.33 class TermIterator(TermReader, Iterator):
1.34
1.35 "An iterator over terms and positions read from a file."
1.36
1.37 - def next(self):
1.38 + def __init__(self, f):
1.39 + TermReader.__init__(self, f)
1.40 + Iterator.__init__(self)
1.41 +
1.42 + def _next(self):
1.43 try:
1.44 self.begin_record()
1.45 return self.read_term()
1.46 @@ -349,10 +368,11 @@
1.47
1.48 "An iterator over terms and unprocessed document positions data."
1.49
1.50 - def __iter__(self):
1.51 - return self
1.52 + def __init__(self, f):
1.53 + TermReader.__init__(self, f)
1.54 + Iterator.__init__(self)
1.55
1.56 - def next(self):
1.57 + def _next(self):
1.58 try:
1.59 self.begin_record()
1.60 return self.read_term_plus_remaining()
1.61 @@ -373,29 +393,25 @@
1.62 except EOFError:
1.63 raise StopIteration
1.64
1.65 -class CombinedIterator:
1.66 +class CombinedIterator(Iterator):
1.67
1.68 "An iterator providing index and information file access."
1.69
1.70 def __init__(self, reader, index_reader):
1.71 + Iterator.__init__(self)
1.72 self.reader = reader
1.73 self.index_reader = index_reader
1.74 self.records = list(index_reader)
1.75 self.terms = [t for t, dp in self.records]
1.76
1.77 - # Cache the last term and positions.
1.78 -
1.79 - self.last_term = None
1.80 - self.last_positions = None
1.81 -
1.82 def go_to_term(self, term):
1.83
1.84 """
1.85 Return the 'term' and positions or nearest following term and positions.
1.86 """
1.87
1.88 - if self.last_term == term:
1.89 - return self.last_term, self.last_positions
1.90 + if self.last_term_returned == term:
1.91 + return self.last_term_returned, self.last_positions_returned
1.92
1.93 # Get the record providing a term less than or equal to the requested
1.94 # term, getting the first entry if no such records exist.
1.95 @@ -409,10 +425,11 @@
1.96 # Seek to the corresponding record in the information file.
1.97 # Only do this if the term is more quickly reached by seeking.
1.98
1.99 - if term <= t or self.last_term is None or term <= self.last_term or \
1.100 - self.last_term < t or terms_after and terms_after[0] <= self.last_term:
1.101 + if term <= t or self.last_term_returned is None or term <= self.last_term_returned or \
1.102 + self.last_term_returned < t or terms_after and terms_after[0] <= self.last_term_returned:
1.103
1.104 self.reader.seek(offset)
1.105 + self.reader.last_term = t
1.106
1.107 # Where the found term is equal or greater, just read the positions for
1.108 # the index entry.
1.109 @@ -426,7 +443,8 @@
1.110 self.reader.read_term_only()
1.111 self.reader.last_term = t
1.112
1.113 - return t, self.reader.read_positions()
1.114 + self.last_term_returned, self.last_positions_returned = t, self.reader.read_positions()
1.115 + return self.last_term_returned, self.last_positions_returned
1.116
1.117 # Where the found term is less, use the information file to find the
1.118 # term or the one after.
1.119 @@ -435,19 +453,15 @@
1.120
1.121 # Overwrite the reader's state, then scan for the term.
1.122
1.123 - self.reader.last_term = t
1.124 t, dp = self.reader.next()
1.125 while t < term:
1.126 t, dp = self.reader.next()
1.127
1.128 + self.last_term_returned, self.last_positions_returned = t, dp
1.129 return t, dp
1.130
1.131 - def __iter__(self):
1.132 - return self
1.133 -
1.134 - def next(self):
1.135 - self.last_term, self.last_positions = t = self.reader.next()
1.136 - return t
1.137 + def _next(self):
1.138 + return self.reader.next()
1.139
1.140 def close(self):
1.141 if self.reader is not None: