1.1 --- a/iixr/positions.py Sat Nov 20 23:56:16 2010 +0100
1.2 +++ b/iixr/positions.py Sun Nov 21 20:44:43 2010 +0100
1.3 @@ -27,7 +27,8 @@
1.4 "Writing position information to files."
1.5
1.6 def reset(self):
1.7 - self.last_docnum = 0
1.8 + self.last_docnum = None
1.9 + self.docnum_size = None
1.10
1.11 def write_positions(self, docnum, positions):
1.12
1.13 @@ -35,6 +36,12 @@
1.14 Write for the document 'docnum' the given 'positions'.
1.15 """
1.16
1.17 + # Find the size of document number values.
1.18 +
1.19 + if self.docnum_size is None:
1.20 + self.docnum_size = self.get_value_size(docnum)
1.21 + self.last_docnum = self.get_initial_value(self.docnum_size)
1.22 +
1.23 if docnum < self.last_docnum:
1.24 raise ValueError, "Document number %r is less than previous number %r." % (docnum, self.last_docnum)
1.25
1.26 @@ -42,20 +49,27 @@
1.27
1.28 positions.sort()
1.29
1.30 + # Find the size of position values.
1.31 +
1.32 + size = self.get_value_size(positions[0])
1.33 +
1.34 + # Write the number of values per document number.
1.35 # Write the document number delta.
1.36 # Write the number of positions.
1.37 + # Write the number of values per position.
1.38
1.39 output = array('B')
1.40 - vint_to_array(docnum - self.last_docnum, output)
1.41 + vint_to_array(self.docnum_size, output)
1.42 + self.last_docnum = self.write_sequence(output, docnum, self.last_docnum, self.docnum_size, monotonic=0)
1.43 vint_to_array(len(positions), output)
1.44 + vint_to_array(size, output)
1.45
1.46 # Write the position deltas.
1.47
1.48 - last = 0
1.49 + last = self.get_initial_value(size)
1.50
1.51 for position in positions:
1.52 - vint_to_array(position - last, output)
1.53 - last = position
1.54 + last = self.write_sequence(output, position, last, size)
1.55
1.56 output.tofile(self.f)
1.57
1.58 @@ -66,29 +80,44 @@
1.59 "Reading position information within term-specific regions of a file."
1.60
1.61 def reset(self):
1.62 - self.last_docnum = 0
1.63 + self.last_docnum = None
1.64
1.65 def read_positions(self):
1.66
1.67 - "Read positions, returning a document number and a list of positions."
1.68 + """
1.69 + Read positions, returning a document number and a list of positions.
1.70 + """
1.71 +
1.72 + # Read the number of values per document number.
1.73 +
1.74 + docnum_size = self.read_number()
1.75 +
1.76 + if self.last_docnum is None:
1.77 + self.last_docnum = self.get_initial_value(docnum_size)
1.78
1.79 # Read the document number delta and add it to the last number.
1.80
1.81 - self.last_docnum += self.read_number()
1.82 + self.last_docnum = self.read_sequence(self.last_docnum, docnum_size, monotonic=0)
1.83
1.84 # Read the number of positions.
1.85
1.86 npositions = self.read_number()
1.87
1.88 + # Read the number of values per position.
1.89 +
1.90 + size = self.read_number()
1.91 +
1.92 # Read the position deltas, adding each previous position to get the
1.93 # appropriate collection of absolute positions.
1.94
1.95 i = 0
1.96 - last = 0
1.97 +
1.98 + last = self.get_initial_value(size)
1.99 +
1.100 positions = []
1.101
1.102 while i < npositions:
1.103 - last += self.read_number()
1.104 + last = self.read_sequence(last, size)
1.105 positions.append(last)
1.106 i += 1
1.107
1.108 @@ -99,7 +128,8 @@
1.109 "Writing position index information to files."
1.110
1.111 def reset(self):
1.112 - self.last_docnum = 0
1.113 + self.last_docnum = None
1.114 + self.docnum_size = None
1.115 self.last_pos_offset = 0
1.116
1.117 def write_positions(self, docnum, pos_offset, count):
1.118 @@ -109,12 +139,20 @@
1.119 position index file.
1.120 """
1.121
1.122 + # Find the size of document number values.
1.123 +
1.124 + if self.docnum_size is None:
1.125 + self.docnum_size = self.get_value_size(docnum)
1.126 + self.last_docnum = self.get_initial_value(self.docnum_size)
1.127 +
1.128 + # Write the number of values per document number.
1.129 # Write the document number delta.
1.130 # Write the position file offset delta.
1.131 # Write the document count.
1.132
1.133 output = array('B')
1.134 - vint_to_array(docnum - self.last_docnum, output)
1.135 + vint_to_array(self.docnum_size, output)
1.136 + self.last_docnum = self.write_sequence(output, docnum, self.last_docnum, self.docnum_size, monotonic=0)
1.137 vint_to_array(pos_offset - self.last_pos_offset, output)
1.138 vint_to_array(count, output)
1.139
1.140 @@ -123,14 +161,13 @@
1.141 output.tofile(self.f)
1.142
1.143 self.last_pos_offset = pos_offset
1.144 - self.last_docnum = docnum
1.145
1.146 class PositionIndexReader(FileReader):
1.147
1.148 "Reading position index information within term-specific regions of a file."
1.149
1.150 def reset(self):
1.151 - self.last_docnum = 0
1.152 + self.last_docnum = None
1.153 self.last_pos_offset = 0
1.154
1.155 def read_positions(self):
1.156 @@ -140,9 +177,16 @@
1.157 file, and the number of documents in a section of that file.
1.158 """
1.159
1.160 - # Read the document number delta.
1.161 + # Read the number of values per document number.
1.162 +
1.163 + docnum_size = self.read_number()
1.164
1.165 - self.last_docnum += self.read_number()
1.166 + if self.last_docnum is None:
1.167 + self.last_docnum = self.get_initial_value(docnum_size)
1.168 +
1.169 + # Read the document number delta and add it to the last number.
1.170 +
1.171 + self.last_docnum = self.read_sequence(self.last_docnum, docnum_size, monotonic=0)
1.172
1.173 # Read the offset delta.
1.174