1.1 --- a/iixr/data.py Tue Jan 25 00:36:31 2011 +0100
1.2 +++ b/iixr/data.py Tue Jan 25 01:55:43 2011 +0100
1.3 @@ -3,7 +3,7 @@
1.4 """
1.5 Variable-length integer functions.
1.6
1.7 -Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2009, 2010, 2011 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This program is free software; you can redistribute it and/or modify it under
1.11 the terms of the GNU General Public License as published by the Free Software
1.12 @@ -65,6 +65,16 @@
1.13 else:
1.14 bytes.append(number)
1.15
1.16 +def vint_from_array(bytes):
1.17 +
1.18 + "Read a variable-length integer from 'bytes', returning a number."
1.19 +
1.20 + number = 0
1.21 + while bytes:
1.22 + number <<= 7
1.23 + number += bytes.pop() & 127
1.24 + return number
1.25 +
1.26 def string_to_array(s, bytes):
1.27
1.28 "Write the given string 's' to 'bytes'."
2.1 --- a/iixr/files.py Tue Jan 25 00:36:31 2011 +0100
2.2 +++ b/iixr/files.py Tue Jan 25 01:55:43 2011 +0100
2.3 @@ -3,7 +3,7 @@
2.4 """
2.5 Generic file access.
2.6
2.7 -Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
2.8 +Copyright (C) 2009, 2010, 2011 Paul Boddie <paul@boddie.org.uk>
2.9
2.10 This program is free software; you can redistribute it and/or modify it under
2.11 the terms of the GNU General Public License as published by the Free Software
2.12 @@ -18,7 +18,7 @@
2.13 with this program. If not, see <http://www.gnu.org/licenses/>.
2.14 """
2.15
2.16 -from iixr.data import vint, vint_to_array
2.17 +from iixr.data import vint, vint_to_array, vint_from_array
2.18 from array import array
2.19 import zlib
2.20
2.21 @@ -76,6 +76,15 @@
2.22
2.23 self.f.write(vint(number))
2.24
2.25 + def write_numbers(self, numbers):
2.26 +
2.27 + "Write 'numbers' to the file using a variable length encoding."
2.28 +
2.29 + output = array('B')
2.30 + for number in numbers:
2.31 + vint_to_array(number, output)
2.32 + output.tofile(self.f)
2.33 +
2.34 def write_string(self, s, compress=0):
2.35
2.36 """
2.37 @@ -152,7 +161,7 @@
2.38 while csd & 128:
2.39 fromfile(f, 1)
2.40 csd = a[-1]
2.41 - return sum([((csd & 127) << (number * 7)) for (number, csd) in enumerate(a)])
2.42 + return vint_from_array(a)
2.43
2.44 def read_string(self, decompress=0):
2.45
2.46 @@ -184,16 +193,21 @@
2.47
2.48 def read_sequence(self, last, size, monotonic=1):
2.49 if size:
2.50 - expect_delta = 1
2.51 value = []
2.52 - for v in last:
2.53 - v_in = self.read_number()
2.54 - if monotonic or expect_delta:
2.55 + if monotonic:
2.56 + for v in last:
2.57 + v_in = self.read_number()
2.58 value.append(v + v_in)
2.59 - if expect_delta and v_in != 0:
2.60 - expect_delta = 0
2.61 - else:
2.62 - value.append(v_in - 1)
2.63 + else:
2.64 + expect_delta = 1
2.65 + for v in last:
2.66 + v_in = self.read_number()
2.67 + if expect_delta:
2.68 + value.append(v + v_in)
2.69 + if v_in != 0:
2.70 + expect_delta = 0
2.71 + else:
2.72 + value.append(v_in - 1)
2.73 return tuple(value)
2.74 else:
2.75 return last + self.read_number()
3.1 --- a/iixr/terms.py Tue Jan 25 00:36:31 2011 +0100
3.2 +++ b/iixr/terms.py Tue Jan 25 01:55:43 2011 +0100
3.3 @@ -52,16 +52,14 @@
3.4 self.write_string(suffix)
3.5
3.6 # Write the offset delta.
3.7 -
3.8 - self.write_number(offset - self.last_offset)
3.9 -
3.10 # Write the frequency.
3.11 -
3.12 - self.write_number(frequency)
3.13 -
3.14 # Write the document frequency.
3.15
3.16 - self.write_number(doc_frequency)
3.17 + self.write_numbers((
3.18 + offset - self.last_offset,
3.19 + frequency,
3.20 + doc_frequency
3.21 + ))
3.22
3.23 self.last_term = term
3.24 self.last_offset = offset