1.1 --- a/iixr.py Mon Aug 31 20:24:41 2009 +0200
1.2 +++ b/iixr.py Mon Aug 31 21:02:30 2009 +0200
1.3 @@ -293,20 +293,44 @@
1.4 self.reset()
1.5 self.f.seek(offset)
1.6
1.7 + # Could duplicate the file handle using...
1.8 + # fdopen(dup(self.f.fileno()), "rb")
1.9 +
1.10 + return PositionIterator(self.f)
1.11 +
1.12 +class PositionIterator(PositionReader):
1.13 +
1.14 + "Iterating over document positions."
1.15 +
1.16 + def __init__(self, f):
1.17 + PositionReader.__init__(self, f)
1.18 +
1.19 # Read the number of documents.
1.20
1.21 - ndocuments = self.read_number()
1.22 + self.ndocuments = self.read_number()
1.23 + self.read_documents = 0
1.24 +
1.25 + def __len__(self):
1.26 + return self.ndocuments
1.27
1.28 - # Read all records.
1.29 + def sort(self):
1.30 +
1.31 + "Stored document positions are already sorted."
1.32 +
1.33 + pass
1.34
1.35 - i = 0
1.36 - doc_positions = []
1.37 + def __iter__(self):
1.38 + return self
1.39 +
1.40 + def next(self):
1.41
1.42 - while i < ndocuments:
1.43 - doc_positions.append(self.read_positions())
1.44 - i += 1
1.45 + "Read positions for a single document."
1.46
1.47 - return doc_positions
1.48 + if self.read_documents < self.ndocuments:
1.49 + self.read_documents += 1
1.50 + return self.read_positions()
1.51 + else:
1.52 + raise StopIteration
1.53
1.54 class TermWriter(FileWriter):
1.55
2.1 --- a/test.py Mon Aug 31 20:24:41 2009 +0200
2.2 +++ b/test.py Mon Aug 31 21:02:30 2009 +0200
2.3 @@ -1,6 +1,20 @@
2.4 #!/usr/bin/env python
2.5
2.6 import iixr
2.7 +import os
2.8 +
2.9 +# Remove old test files.
2.10 +
2.11 +for filename in ("test", "testF", "testFI", "testI", "testP"):
2.12 + try:
2.13 + os.remove(filename)
2.14 + except OSError:
2.15 + pass
2.16 +
2.17 +try:
2.18 + os.removedirs("test_index")
2.19 +except OSError:
2.20 + pass
2.21
2.22 # Test basic data types.
2.23
2.24 @@ -63,7 +77,7 @@
2.25 offsets.reverse()
2.26 all_doc_positions.reverse()
2.27 for offset, doc_positions in zip(offsets, all_doc_positions):
2.28 - dp = r.read_term_positions(offset)
2.29 + dp = list(r.read_term_positions(offset))
2.30 print doc_positions == dp, doc_positions, dp
2.31 r.close()
2.32
2.33 @@ -267,7 +281,7 @@
2.34 terms_reversed = terms_with_positions[:]
2.35 terms_reversed.reverse()
2.36 for term, doc_positions in terms_reversed:
2.37 - dp = rd.find_positions(term)
2.38 + dp = list(rd.find_positions(term))
2.39 print doc_positions == dp, doc_positions, dp
2.40 for term in ("dog", "dingo"):
2.41 dp = rd.find_positions(term)
2.42 @@ -278,6 +292,7 @@
2.43 rd.rewind()
2.44 for term, doc_positions in terms_with_positions:
2.45 t, fr, dp = rd.read_term()
2.46 + dp = list(dp)
2.47 print term == t, term, t
2.48 print doc_positions == dp, doc_positions, dp
2.49 rd.close()
2.50 @@ -310,7 +325,7 @@
2.51
2.52 rd = index.get_reader()
2.53 for term, frequency, doc_positions in doc_tests:
2.54 - dp = rd.find_positions(term)
2.55 + dp = list(rd.find_positions(term))
2.56 print doc_positions == dp, doc_positions, dp
2.57 fr = rd.get_frequency(term)
2.58 print frequency == fr, frequency, fr