# HG changeset patch # User Paul Boddie # Date 1297649176 -3600 # Node ID a0cec1caf0f0bb214ac78bc48fc542389b403fde # Parent 52bfb08d5b2f539118a72a3bbc8a4eb20649f194 Introduced last term/positions caching for all iterators. Fixed reader state adjustments around the last term read. diff -r 52bfb08d5b2f -r a0cec1caf0f0 iixr/terms.py --- a/iixr/terms.py Mon Feb 14 01:53:57 2011 +0100 +++ b/iixr/terms.py Mon Feb 14 03:06:16 2011 +0100 @@ -323,22 +323,41 @@ "Common iterator support." + def __init__(self): + + "Cache the last term and positions." + + self.last_term_returned = None + self.last_positions_returned = None + def go_to_term(self, term): + if term == self.last_term_returned: + return self.last_term_returned, self.last_positions_returned + t, dp = self.next() while t < term: t, dp = self.next() + self.last_term_returned, self.last_positions_returned = t, dp return t, dp def __iter__(self): return self + def next(self): + self.last_term_returned, self.last_positions_returned = t = self._next() + return t + # External reading classes. class TermIterator(TermReader, Iterator): "An iterator over terms and positions read from a file." - def next(self): + def __init__(self, f): + TermReader.__init__(self, f) + Iterator.__init__(self) + + def _next(self): try: self.begin_record() return self.read_term() @@ -349,10 +368,11 @@ "An iterator over terms and unprocessed document positions data." - def __iter__(self): - return self + def __init__(self, f): + TermReader.__init__(self, f) + Iterator.__init__(self) - def next(self): + def _next(self): try: self.begin_record() return self.read_term_plus_remaining() @@ -373,29 +393,25 @@ except EOFError: raise StopIteration -class CombinedIterator: +class CombinedIterator(Iterator): "An iterator providing index and information file access." def __init__(self, reader, index_reader): + Iterator.__init__(self) self.reader = reader self.index_reader = index_reader self.records = list(index_reader) self.terms = [t for t, dp in self.records] - # Cache the last term and positions. - - self.last_term = None - self.last_positions = None - def go_to_term(self, term): """ Return the 'term' and positions or nearest following term and positions. """ - if self.last_term == term: - return self.last_term, self.last_positions + if self.last_term_returned == term: + return self.last_term_returned, self.last_positions_returned # Get the record providing a term less than or equal to the requested # term, getting the first entry if no such records exist. @@ -409,10 +425,11 @@ # Seek to the corresponding record in the information file. # Only do this if the term is more quickly reached by seeking. - if term <= t or self.last_term is None or term <= self.last_term or \ - self.last_term < t or terms_after and terms_after[0] <= self.last_term: + if term <= t or self.last_term_returned is None or term <= self.last_term_returned or \ + self.last_term_returned < t or terms_after and terms_after[0] <= self.last_term_returned: self.reader.seek(offset) + self.reader.last_term = t # Where the found term is equal or greater, just read the positions for # the index entry. @@ -426,7 +443,8 @@ self.reader.read_term_only() self.reader.last_term = t - return t, self.reader.read_positions() + self.last_term_returned, self.last_positions_returned = t, self.reader.read_positions() + return self.last_term_returned, self.last_positions_returned # Where the found term is less, use the information file to find the # term or the one after. @@ -435,19 +453,15 @@ # Overwrite the reader's state, then scan for the term. - self.reader.last_term = t t, dp = self.reader.next() while t < term: t, dp = self.reader.next() + self.last_term_returned, self.last_positions_returned = t, dp return t, dp - def __iter__(self): - return self - - def next(self): - self.last_term, self.last_positions = t = self.reader.next() - return t + def _next(self): + return self.reader.next() def close(self): if self.reader is not None: