# HG changeset patch # User Paul Boddie # Date 1290730050 -3600 # Node ID ea2944f51430299339fd9e4490773d1b447668ab # Parent e0bd00412dbc9ac14807de7a3c70b1d8ec0e0ea8 Introduced support for higher-level sequential access to indexes. diff -r e0bd00412dbc -r ea2944f51430 iixr/index.py --- a/iixr/index.py Fri Nov 26 01:06:44 2010 +0100 +++ b/iixr/index.py Fri Nov 26 01:07:30 2010 +0100 @@ -184,6 +184,16 @@ self.dict_reader = get_term_reader(pathname, "merged") self.field_dict_reader = get_field_reader(pathname, "merged") + # Sequential access. + + def read_term(self): + return self.dict_reader.read_term() + + def go_to_term(self, term): + return self.dict_reader._get_term_and_positions(*self.dict_reader.go_to_term(term)) + + # Query access. + def get_terms(self): return self.dict_reader.get_terms() diff -r e0bd00412dbc -r ea2944f51430 iixr/terms.py --- a/iixr/terms.py Fri Nov 26 01:06:44 2010 +0100 +++ b/iixr/terms.py Fri Nov 26 01:07:30 2010 +0100 @@ -301,6 +301,15 @@ else: return None + def _get_term_and_positions(self, term, offset, frequency, doc_frequency): + + """ + Return the term plus positions details using the given 'term', 'offset', + 'frequency' and 'doc_frequency'. + """ + + return term, frequency, doc_frequency, self._get_positions(offset, doc_frequency) + def _get_positions(self, offset, doc_frequency): """ @@ -335,7 +344,24 @@ """ term, offset, frequency, doc_frequency = self.info_reader.read_term() - return term, frequency, doc_frequency, self._get_positions(offset, doc_frequency) + return self._get_term_and_positions(term, offset, frequency, doc_frequency) + + def go_to_term(self, term): + + """ + Navigate to 'term' in the dictionary, returning the details from its + entry. The returned details can be augmented with position information + when presented to the _get_term_and_positions method. + """ + + found_term, offset, frequency, doc_frequency, info_offset = self._find_closest_term(term) + + # Position the reader, if necessary. + + if info_offset is not None: + self.info_reader.go_to_term(found_term, offset, info_offset) + + return found_term, offset, frequency, doc_frequency # Query methods. @@ -351,12 +377,7 @@ terms = [] - found_term, offset, frequency, doc_frequency, info_offset = self._find_closest_term(term) - - # Position the reader, if necessary. - - if info_offset is not None: - self.info_reader.go_to_term(found_term, offset, info_offset) + found_term, offset, frequency, doc_frequency = self.go_to_term(term) # Read and record terms.