Introduced position dictionary, file and index iterators which capture the relevant result data in caches for particular terms, wrapping the underlying shared file readers. Added section output to the test program in order to make troubleshooting easier. Added a seek method to the File class.

     1.1 --- a/docs/COPYING.txt	Fri Jan 08 00:44:59 2010 +0100
     1.2 +++ b/docs/COPYING.txt	Sun Jan 10 20:47:41 2010 +0100
     1.3 @@ -1,7 +1,7 @@
     1.4  Licence Agreement for iixr
     1.5  --------------------------
     1.6  
     1.7 -Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk>
     1.8 +Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
     1.9  
    1.10  This program is free software; you can redistribute it and/or modify it under
    1.11  the terms of the GNU General Public License as published by the Free Software

     2.1 --- a/iixr/fields.py	Fri Jan 08 00:44:59 2010 +0100
     2.2 +++ b/iixr/fields.py	Sun Jan 10 20:47:41 2010 +0100
     2.3 @@ -3,7 +3,7 @@
     2.4  """
     2.5  Specific classes for storing document information.
     2.6  
     2.7 -Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk>
     2.8 +Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
     2.9  
    2.10  This program is free software; you can redistribute it and/or modify it under
    2.11  the terms of the GNU General Public License as published by the Free Software
    2.12 @@ -96,7 +96,7 @@
    2.13          later documents.
    2.14          """
    2.15  
    2.16 -        self.f.seek(offset)
    2.17 +        self.seek(offset)
    2.18          bad_docnum, fields = self.read_fields()
    2.19          self.last_docnum = docnum
    2.20          return docnum, fields

     3.1 --- a/iixr/files.py	Fri Jan 08 00:44:59 2010 +0100
     3.2 +++ b/iixr/files.py	Sun Jan 10 20:47:41 2010 +0100
     3.3 @@ -3,7 +3,7 @@
     3.4  """
     3.5  Generic file access.
     3.6  
     3.7 -Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk>
     3.8 +Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
     3.9  
    3.10  This program is free software; you can redistribute it and/or modify it under
    3.11  the terms of the GNU General Public License as published by the Free Software
    3.12 @@ -38,6 +38,10 @@
    3.13  
    3.14          pass
    3.15  
    3.16 +    def seek(self, offset):
    3.17 +        self.f.seek(offset)
    3.18 +        self.reset()
    3.19 +
    3.20      def rewind(self):
    3.21          self.f.seek(0)
    3.22          self.reset()

     4.1 --- a/iixr/filesystem.py	Fri Jan 08 00:44:59 2010 +0100
     4.2 +++ b/iixr/filesystem.py	Sun Jan 10 20:47:41 2010 +0100
     4.3 @@ -3,7 +3,7 @@
     4.4  """
     4.5  File access.
     4.6  
     4.7 -Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk>
     4.8 +Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
     4.9  
    4.10  This program is free software; you can redistribute it and/or modify it under
    4.11  the terms of the GNU General Public License as published by the Free Software

     5.1 --- a/iixr/positions.py	Fri Jan 08 00:44:59 2010 +0100
     5.2 +++ b/iixr/positions.py	Sun Jan 10 20:47:41 2010 +0100
     5.3 @@ -3,7 +3,7 @@
     5.4  """
     5.5  Specific classes for storing position information.
     5.6  
     5.7 -Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk>
     5.8 +Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
     5.9  
    5.10  This program is free software; you can redistribute it and/or modify it under
    5.11  the terms of the GNU General Public License as published by the Free Software
    5.12 @@ -61,6 +61,39 @@
    5.13  
    5.14          self.last_docnum = docnum
    5.15  
    5.16 +class PositionReader(FileReader):
    5.17 +
    5.18 +    "Reading position information within term-specific regions of a file."
    5.19 +
    5.20 +    def reset(self):
    5.21 +        self.last_docnum = 0
    5.22 +
    5.23 +    def read_positions(self):
    5.24 +
    5.25 +        "Read positions, returning a document number and a list of positions."
    5.26 +
    5.27 +        # Read the document number delta and add it to the last number.
    5.28 +
    5.29 +        self.last_docnum += self.read_number()
    5.30 +
    5.31 +        # Read the number of positions.
    5.32 +
    5.33 +        npositions = self.read_number()
    5.34 +
    5.35 +        # Read the position deltas, adding each previous position to get the
    5.36 +        # appropriate collection of absolute positions.
    5.37 +
    5.38 +        i = 0
    5.39 +        last = 0
    5.40 +        positions = []
    5.41 +
    5.42 +        while i < npositions:
    5.43 +            last += self.read_number()
    5.44 +            positions.append(last)
    5.45 +            i += 1
    5.46 +
    5.47 +        return self.last_docnum, positions
    5.48 +
    5.49  class PositionIndexWriter(FileWriter):
    5.50  
    5.51      "Writing position index information to files."
    5.52 @@ -92,109 +125,13 @@
    5.53          self.last_pos_offset = pos_offset
    5.54          self.last_docnum = docnum
    5.55  
    5.56 -# Iterators for position-related files.
    5.57 -
    5.58 -class IteratorBase:
    5.59 -
    5.60 -    def __init__(self, count):
    5.61 -        self.replenish(count)
    5.62 -
    5.63 -    def replenish(self, count):
    5.64 -        self.count = count
    5.65 -        self.read_documents = 0
    5.66 -
    5.67 -    def __len__(self):
    5.68 -        return self.count
    5.69 -
    5.70 -    def sort(self):
    5.71 -        pass # Stored document positions are already sorted.
    5.72 -
    5.73 -    def __iter__(self):
    5.74 -        return self
    5.75 -
    5.76 -class PositionReader(FileReader, IteratorBase):
    5.77 -
    5.78 -    "Iterating over document positions."
    5.79 -
    5.80 -    def __init__(self, f):
    5.81 -        FileReader.__init__(self, f)
    5.82 -        IteratorBase.__init__(self, 0) # no iteration initially permitted
    5.83 -        self.reset()
    5.84 -
    5.85 -    def reset(self):
    5.86 -        self.last_docnum = 0
    5.87 -
    5.88 -    def seek(self, offset, count):
    5.89 -
    5.90 -        """
    5.91 -        Seek to 'offset' in the file, limiting the number of documents available
    5.92 -        for reading to 'count'.
    5.93 -        """
    5.94 -
    5.95 -        self.f.seek(offset)
    5.96 -        self.replenish(count)
    5.97 -        self.reset()
    5.98 +class PositionIndexReader(FileReader):
    5.99  
   5.100 -    def read_positions(self):
   5.101 -
   5.102 -        "Read positions, returning a document number and a list of positions."
   5.103 -
   5.104 -        # Read the document number delta and add it to the last number.
   5.105 -
   5.106 -        self.last_docnum += self.read_number()
   5.107 -
   5.108 -        # Read the number of positions.
   5.109 -
   5.110 -        npositions = self.read_number()
   5.111 -
   5.112 -        # Read the position deltas, adding each previous position to get the
   5.113 -        # appropriate collection of absolute positions.
   5.114 -
   5.115 -        i = 0
   5.116 -        last = 0
   5.117 -        positions = []
   5.118 -
   5.119 -        while i < npositions:
   5.120 -            last += self.read_number()
   5.121 -            positions.append(last)
   5.122 -            i += 1
   5.123 -
   5.124 -        return self.last_docnum, positions
   5.125 -
   5.126 -    def next(self):
   5.127 -
   5.128 -        "Read positions for a single document."
   5.129 -
   5.130 -        if self.read_documents < self.count:
   5.131 -            self.read_documents += 1
   5.132 -            return self.read_positions()
   5.133 -        else:
   5.134 -            raise StopIteration
   5.135 -
   5.136 -class PositionIndexReader(FileReader, IteratorBase):
   5.137 -
   5.138 -    "Iterating over document positions."
   5.139 -
   5.140 -    def __init__(self, f):
   5.141 -        FileReader.__init__(self, f)
   5.142 -        IteratorBase.__init__(self, 0) # no iteration initially permitted
   5.143 -        self.reset()
   5.144 +    "Reading position index information within term-specific regions of a file."
   5.145  
   5.146      def reset(self):
   5.147          self.last_docnum = 0
   5.148          self.last_pos_offset = 0
   5.149 -        self.section_count = 0
   5.150 -
   5.151 -    def seek(self, offset, doc_frequency):
   5.152 -
   5.153 -        """
   5.154 -        Seek to 'offset' in the file, limiting the number of documents available
   5.155 -        for reading to 'doc_frequency'.
   5.156 -        """
   5.157 -
   5.158 -        self.f.seek(offset)
   5.159 -        self.replenish(doc_frequency)
   5.160 -        self.reset()
   5.161  
   5.162      def read_positions(self):
   5.163  
   5.164 @@ -217,16 +154,112 @@
   5.165  
   5.166          return self.last_docnum, self.last_pos_offset, count
   5.167  
   5.168 +# Iterators for position-related files.
   5.169 +
   5.170 +class IteratorBase:
   5.171 +
   5.172 +    "Support for iterating over results."
   5.173 +
   5.174 +    def __init__(self, reader):
   5.175 +
   5.176 +        "Initialise the iterator using the given 'reader'."
   5.177 +
   5.178 +        self.reader = reader
   5.179 +        self.replenish(0) # no iteration initially permitted
   5.180 +
   5.181 +    def replenish(self, count):
   5.182 +
   5.183 +        "Replenish the iterator with 'count' results."
   5.184 +
   5.185 +        self.count = count
   5.186 +        self.read_documents = 0
   5.187 +
   5.188 +    def __len__(self):
   5.189 +
   5.190 +        "Return the total number of results."
   5.191 +
   5.192 +        return self.count
   5.193 +
   5.194 +    def sort(self):
   5.195 +        pass # Stored document positions are already sorted.
   5.196 +
   5.197 +    def __iter__(self):
   5.198 +        return self
   5.199 +
   5.200 +class PositionIterator(IteratorBase):
   5.201 +
   5.202 +    "Iterating over document positions."
   5.203 +
   5.204 +    def replenish(self, count):
   5.205 +        IteratorBase.replenish(self, count)
   5.206 +
   5.207 +        # Fill a cache of positions.
   5.208 +
   5.209 +        self.cache = []
   5.210 +        n = 0
   5.211 +
   5.212 +        while n < self.count:
   5.213 +            self.cache.append(self.reader.read_positions())
   5.214 +            n += 1
   5.215 +
   5.216 +    def seek(self, offset, count):
   5.217 +
   5.218 +        """
   5.219 +        Seek to 'offset' in the file, limiting the number of documents available
   5.220 +        for reading to 'count'.
   5.221 +        """
   5.222 +
   5.223 +        self.reader.seek(offset)
   5.224 +        self.replenish(count)
   5.225 +
   5.226      def next(self):
   5.227  
   5.228          "Read positions for a single document."
   5.229  
   5.230 -        self.read_documents += self.section_count
   5.231          if self.read_documents < self.count:
   5.232 -            docnum, pos_offset, self.section_count = t = self.read_positions()
   5.233 +            positions = self.cache[self.read_documents]
   5.234 +            self.read_documents += 1
   5.235 +            return positions
   5.236 +        else:
   5.237 +            raise StopIteration
   5.238 +
   5.239 +class PositionIndexIterator(IteratorBase):
   5.240 +
   5.241 +    "Iterating over document positions."
   5.242 +
   5.243 +    def replenish(self, count):
   5.244 +        IteratorBase.replenish(self, count)
   5.245 +
   5.246 +        # Fill a cache of offsets.
   5.247 +
   5.248 +        self.cache = []
   5.249 +        self.current = 0
   5.250 +        n = 0
   5.251 +
   5.252 +        while n < self.count:
   5.253 +            docnum, pos_offset, section_count = t = self.reader.read_positions()
   5.254 +            self.cache.append(t)
   5.255 +            n += section_count
   5.256 +
   5.257 +    def seek(self, offset, doc_frequency):
   5.258 +
   5.259 +        """
   5.260 +        Seek to 'offset' in the file, limiting the number of documents available
   5.261 +        for reading to 'doc_frequency'.
   5.262 +        """
   5.263 +
   5.264 +        self.reader.seek(offset)
   5.265 +        self.replenish(doc_frequency)
   5.266 +
   5.267 +    def next(self):
   5.268 +
   5.269 +        "Read positions for a single document."
   5.270 +
   5.271 +        if self.current < len(self.cache):
   5.272 +            docnum, pos_offset, self.section_count = t = self.cache[self.current]
   5.273 +            self.current += 1
   5.274              return t
   5.275          else:
   5.276 -            #assert self.read_documents == self.count # not upheld by from_document
   5.277              raise StopIteration
   5.278  
   5.279  class PositionDictionaryWriter:
   5.280 @@ -311,11 +344,31 @@
   5.281  
   5.282  class PositionDictionaryReader:
   5.283  
   5.284 -    "Iteration over position dictionary entries."
   5.285 +    "Access to position dictionary entries through iterators."
   5.286  
   5.287      def __init__(self, position_reader, position_index_reader):
   5.288          self.position_reader = position_reader
   5.289          self.position_index_reader = position_index_reader
   5.290 +
   5.291 +    def read_term_positions(self, offset, doc_frequency):
   5.292 +        iterator = PositionDictionaryIterator(
   5.293 +            PositionIterator(self.position_reader),
   5.294 +            PositionIndexIterator(self.position_index_reader)
   5.295 +            )
   5.296 +        iterator.seek(offset, doc_frequency)
   5.297 +        return iterator
   5.298 +
   5.299 +    def close(self):
   5.300 +        self.position_reader.close()
   5.301 +        self.position_index_reader.close()
   5.302 +
   5.303 +class PositionDictionaryIterator:
   5.304 +
   5.305 +    "Iteration over position dictionary entries."
   5.306 +
   5.307 +    def __init__(self, position_iterator, position_index_iterator):
   5.308 +        self.position_iterator = position_iterator
   5.309 +        self.position_index_iterator = position_index_iterator
   5.310          self.reset()
   5.311  
   5.312      def reset(self):
   5.313 @@ -339,9 +392,9 @@
   5.314  
   5.315          # Seek to the appropriate index entry.
   5.316  
   5.317 -        self.position_index_reader.seek(offset, doc_frequency)
   5.318 +        self.position_index_iterator.seek(offset, doc_frequency)
   5.319  
   5.320 -        # Initialise the current index entry and current position file reader.
   5.321 +        # Initialise the current index entry and current position file iterator.
   5.322  
   5.323          self._next_section()
   5.324          self._init_section()
   5.325 @@ -349,7 +402,7 @@
   5.326      # Sequence methods.
   5.327  
   5.328      def __len__(self):
   5.329 -        return len(self.position_index_reader)
   5.330 +        return len(self.position_index_iterator)
   5.331  
   5.332      def sort(self):
   5.333          pass
   5.334 @@ -380,23 +433,18 @@
   5.335              # Either return the next record.
   5.336  
   5.337              try:
   5.338 -                return self.position_reader.next()
   5.339 +                return self.position_iterator.next()
   5.340  
   5.341              # Or, where a section is finished, get the next section and try again.
   5.342  
   5.343              except StopIteration:
   5.344  
   5.345 -                # Where a section follows, update the index reader, but keep
   5.346 -                # reading using the same file reader (since the data should just
   5.347 -                # follow on from the last section).
   5.348 +                # Although, where a single iterator is in use, the file reader
   5.349 +                # would be positioned appropriately, this is not guaranteed in a
   5.350 +                # multiple iterator situation.
   5.351  
   5.352                  self._next_section()
   5.353 -                self.position_reader.replenish(self.section_count)
   5.354 -
   5.355 -                # Reset the state of the reader to make sure that document
   5.356 -                # numbers are correct.
   5.357 -
   5.358 -                self.position_reader.reset()
   5.359 +                self._init_section()
   5.360  
   5.361      def from_document(self, docnum):
   5.362  
   5.363 @@ -415,7 +463,7 @@
   5.364  
   5.365          try:
   5.366              if self.next_docnum is None:
   5.367 -                self.next_docnum, self.next_pos_offset, self.next_section_count = self.position_index_reader.next()
   5.368 +                self.next_docnum, self.next_pos_offset, self.next_section_count = self.position_index_iterator.next()
   5.369  
   5.370              # Read until the next entry is after the desired document number,
   5.371              # or until the end of the results.
   5.372 @@ -423,7 +471,7 @@
   5.373              while self.next_docnum <= docnum:
   5.374                  self._next_read_section()
   5.375                  if self.docnum < docnum:
   5.376 -                    self.next_docnum, self.next_pos_offset, self.next_section_count = self.position_index_reader.next()
   5.377 +                    self.next_docnum, self.next_pos_offset, self.next_section_count = self.position_index_iterator.next()
   5.378                  else:
   5.379                      break
   5.380  
   5.381 @@ -436,7 +484,7 @@
   5.382  
   5.383          try:
   5.384              while 1:
   5.385 -                found_docnum, found_positions = self.position_reader.next()
   5.386 +                found_docnum, found_positions = self.position_iterator.next()
   5.387  
   5.388                  # Return the desired document positions or None (retaining the
   5.389                  # positions for the document immediately after).
   5.390 @@ -457,7 +505,7 @@
   5.391          "Attempt to get the next section in the index."
   5.392  
   5.393          if self.next_docnum is None:
   5.394 -            self.docnum, self.pos_offset, self.section_count = self.position_index_reader.next()
   5.395 +            self.docnum, self.pos_offset, self.section_count = self.position_index_iterator.next()
   5.396          else:
   5.397              self._next_read_section()
   5.398  
   5.399 @@ -473,14 +521,10 @@
   5.400  
   5.401      def _init_section(self):
   5.402  
   5.403 -        "Initialise the reader for the section in the position file."
   5.404 +        "Initialise the iterator for the section in the position file."
   5.405  
   5.406          # Seek to the position entry.
   5.407  
   5.408 -        self.position_reader.seek(self.pos_offset, self.section_count)
   5.409 -
   5.410 -    def close(self):
   5.411 -        self.position_reader.close()
   5.412 -        self.position_index_reader.close()
   5.413 +        self.position_iterator.seek(self.pos_offset, self.section_count)
   5.414  
   5.415  # vim: tabstop=4 expandtab shiftwidth=4

     6.1 --- a/iixr/terms.py	Fri Jan 08 00:44:59 2010 +0100
     6.2 +++ b/iixr/terms.py	Sun Jan 10 20:47:41 2010 +0100
     6.3 @@ -3,7 +3,7 @@
     6.4  """
     6.5  Specific classes for storing term information.
     6.6  
     6.7 -Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk>
     6.8 +Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
     6.9  
    6.10  This program is free software; you can redistribute it and/or modify it under
    6.11  the terms of the GNU General Public License as published by the Free Software
    6.12 @@ -106,7 +106,7 @@
    6.13          permits the scanning for later terms from the specified term.
    6.14          """
    6.15  
    6.16 -        self.f.seek(info_offset)
    6.17 +        self.seek(info_offset)
    6.18          self.last_term = term
    6.19          self.last_offset = offset
    6.20  
    6.21 @@ -301,8 +301,7 @@
    6.22          documents equal to the given 'doc_frequency'.
    6.23          """
    6.24  
    6.25 -        self.position_dict_reader.seek(offset, doc_frequency)
    6.26 -        return self.position_dict_reader
    6.27 +        return self.position_dict_reader.read_term_positions(offset, doc_frequency)
    6.28  
    6.29      # Iterator convenience methods.
    6.30  
    6.31 @@ -329,9 +328,7 @@
    6.32          """
    6.33  
    6.34          term, offset, frequency, doc_frequency = self.info_reader.read_term()
    6.35 -
    6.36 -        self.position_dict_reader.seek(offset, doc_frequency)
    6.37 -        return term, frequency, doc_frequency, self.position_dict_reader
    6.38 +        return term, frequency, doc_frequency, self._get_positions(offset, doc_frequency)
    6.39  
    6.40      # Query methods.
    6.41  

     7.1 --- a/test.py	Fri Jan 08 00:44:59 2010 +0100
     7.2 +++ b/test.py	Sun Jan 10 20:47:41 2010 +0100
     7.3 @@ -26,7 +26,7 @@
     7.4  if "clean" in sys.argv:
     7.5      sys.exit(0)
     7.6  
     7.7 -# Test basic data types.
     7.8 +print "- Test basic data types."
     7.9  
    7.10  numbers = [12345678, 0, 1, 127, 128, 255, 256]
    7.11  
    7.12 @@ -43,7 +43,7 @@
    7.13      print number == n, number, n
    7.14  r.close()
    7.15  
    7.16 -# Test positions.
    7.17 +print "- Test positions."
    7.18  
    7.19  all_doc_positions = [
    7.20      [
    7.21 @@ -77,7 +77,7 @@
    7.22      r.reset()
    7.23  r.close()
    7.24  
    7.25 -# Test position index files.
    7.26 +print "- Test position index files."
    7.27  
    7.28  indexed_positions = [
    7.29      [
    7.30 @@ -105,7 +105,7 @@
    7.31      offsets.append((offset, doc_frequency))
    7.32  w.close()
    7.33  
    7.34 -r = PositionIndexReader(open("testPI", "rb"))
    7.35 +r = PositionIndexIterator(PositionIndexReader(open("testPI", "rb")))
    7.36  offsets.reverse()
    7.37  indexed_positions.reverse()
    7.38  for (offset, doc_frequency), term_positions in zip(offsets, indexed_positions):
    7.39 @@ -114,9 +114,9 @@
    7.40          print docnum == dn, docnum, dn
    7.41          print pos_offset == po, pos_offset, po
    7.42          print count == c, count, c
    7.43 -r.close()
    7.44 +r.reader.close()
    7.45  
    7.46 -# Test position dictionaries.
    7.47 +print "- Test position dictionaries."
    7.48  
    7.49  f = open("testP", "wb")
    7.50  w = PositionWriter(f)
    7.51 @@ -135,12 +135,12 @@
    7.52  offsets.reverse()
    7.53  all_doc_positions.reverse()
    7.54  for (offset, doc_frequency), doc_positions in zip(offsets, all_doc_positions):
    7.55 -    rd.seek(offset, doc_frequency)
    7.56 -    dp = list(rd)
    7.57 +    it = rd.read_term_positions(offset, doc_frequency)
    7.58 +    dp = list(it)
    7.59      print doc_positions == dp, doc_positions, dp
    7.60  rd.close()
    7.61  
    7.62 -# Test fields.
    7.63 +print "- Test fields."
    7.64  
    7.65  doc_fields = [
    7.66      (123, ["testing", "fields", "stored", "compressed"]),
    7.67 @@ -166,7 +166,7 @@
    7.68      print list(enumerate(fields)) == df, list(enumerate(fields)), df
    7.69  r.close()
    7.70  
    7.71 -# Test field index files.
    7.72 +print "- Test field index files."
    7.73  
    7.74  indexed_docs = [
    7.75      (123, 100000987),
    7.76 @@ -188,7 +188,7 @@
    7.77      print offset == o, offset, o
    7.78  r.close()
    7.79  
    7.80 -# Test field dictionaries.
    7.81 +print "- Test field dictionaries."
    7.82  
    7.83  f = open("testF", "wb")
    7.84  w = FieldWriter(f)
    7.85 @@ -213,7 +213,7 @@
    7.86      df = rd.get_fields(docnum)
    7.87      print df is None, df
    7.88  
    7.89 -# (Test sequential access.)
    7.90 +print "- (Test sequential access.)"
    7.91  
    7.92  rd.rewind()
    7.93  for docnum, fields in doc_fields:
    7.94 @@ -222,7 +222,7 @@
    7.95      print list(enumerate(fields)) == df, list(enumerate(fields)), df
    7.96  rd.close()
    7.97  
    7.98 -# Test terms.
    7.99 +print "- Test terms."
   7.100  
   7.101  terms = [
   7.102      # term       offset      frequency  doc_frequency
   7.103 @@ -250,7 +250,7 @@
   7.104      print doc_frequency == df, doc_frequency, df
   7.105  r.close()
   7.106  
   7.107 -# Test terms in index files.
   7.108 +print "- Test terms in index files."
   7.109  
   7.110  indexed_terms = [
   7.111      # term       offset      frequency  doc_frequency   info_offset
   7.112 @@ -279,7 +279,7 @@
   7.113      print info_offset == i, info_offset, i
   7.114  r.close()
   7.115  
   7.116 -# Test dictionaries with only term data.
   7.117 +print "- Test dictionaries with only term data."
   7.118  
   7.119  f = open("test", "wb")
   7.120  w = TermWriter(f)
   7.121 @@ -314,7 +314,7 @@
   7.122      t = rd._find_term(term)
   7.123      print t is None, t
   7.124  
   7.125 -# (Test term prefix searching.)
   7.126 +print "- (Test term prefix searching.)"
   7.127  
   7.128  print rd.find_terms("a") == ["aardvark", "anteater"], rd.find_terms("a"), ["aardvark", "anteater"]
   7.129  print rd.find_terms("bu") == ["bull", "bulldog"], rd.find_terms("bu"), ["bull", "bulldog"]
   7.130 @@ -322,7 +322,7 @@
   7.131  print rd.find_terms("d") == [], rd.find_terms("d"), []
   7.132  rd.close()
   7.133  
   7.134 -# Test dictionaries with term and position data.
   7.135 +print "- Test dictionaries with term and position data."
   7.136  
   7.137  terms_with_positions = [
   7.138      ("aardvark",  [(1, [2, 45, 96]), (20, [13])]),
   7.139 @@ -374,14 +374,14 @@
   7.140      dp = rd.find_positions(term)
   7.141      print dp == [], dp
   7.142  
   7.143 -# (Test iterators.)
   7.144 +print "- (Test iterators.)"
   7.145  
   7.146  for term, docnum, positions in position_dict_tests:
   7.147      dp = rd.find_positions(term)
   7.148      pos = dp.from_document(docnum)
   7.149      print positions is None and pos is None or pos is not None and positions == list(pos), positions, pos
   7.150  
   7.151 -# (Test sequential access.)
   7.152 +print "- (Test sequential access.)"
   7.153  
   7.154  rd.rewind()
   7.155  for term, doc_positions in terms_with_positions:
   7.156 @@ -391,7 +391,7 @@
   7.157      print doc_positions == dp, doc_positions, dp
   7.158  rd.close()
   7.159  
   7.160 -# Test high-level index operations (including merging).
   7.161 +print "- Test high-level index operations (including merging)."
   7.162  
   7.163  docs = [
   7.164      (1, "The cat sat on the mat"),
   7.165 @@ -434,7 +434,7 @@
   7.166  
   7.167  rd = index.get_reader()
   7.168  
   7.169 -# (Test searching.)
   7.170 +print "- (Test searching.)"
   7.171  
   7.172  for term, frequency, doc_positions in doc_tests:
   7.173      dp = list(rd.find_positions(term))
   7.174 @@ -442,20 +442,20 @@
   7.175      fr = rd.get_frequency(term)
   7.176      print frequency == fr, frequency, fr
   7.177  
   7.178 -# (Test fields.)
   7.179 +print "- (Test fields.)"
   7.180  
   7.181  for docnum, text in docs:
   7.182      df = dict(rd.get_fields(docnum))
   7.183      print df[123] == text, text, df[123]
   7.184  
   7.185 -# (Test navigation.)
   7.186 +print "- (Test navigation.)"
   7.187  
   7.188  for term, docnum, positions in position_tests:
   7.189      dp = rd.find_positions(term)
   7.190      pos = dp.from_document(docnum)
   7.191      print positions is None and pos is None or pos is not None and positions == list(pos), positions, pos
   7.192  
   7.193 -# (Test phrases.)
   7.194 +print "- (Test phrases.)"
   7.195  
   7.196  for terms, results in phrase_tests:
   7.197      res = list(rd.find_common_positions(terms))
   7.198 @@ -463,7 +463,7 @@
   7.199  
   7.200  index.close()
   7.201  
   7.202 -# Test index updates.
   7.203 +print "- Test index updates."
   7.204  
   7.205  index = Index("test_index")
   7.206  index2 = Index("test_index2", 3, 2, 3, 6)
   7.207 @@ -500,7 +500,7 @@
   7.208      print frequency == fr, frequency, fr
   7.209  index2.close()
   7.210  
   7.211 -# (Test update of an empty index.)
   7.212 +print "- (Test update of an empty index.)"
   7.213  
   7.214  index = Index("test_index")
   7.215  index3 = Index("test_index3")
2010-01-10	Paul Boddie	raw files shortlog changelog graph	Introduced position dictionary, file and index iterators which capture the relevant result data in caches for particular terms, wrapping the underlying shared file readers. Added section output to the test program in order to make troubleshooting easier. Added a seek method to the File class.
			docs/COPYING.txt (file) iixr/fields.py (file) iixr/files.py (file) iixr/filesystem.py (file) iixr/positions.py (file) iixr/terms.py (file) test.py (file)