# HG changeset patch # User Paul Boddie # Date 1295916943 -3600 # Node ID 34f535fe8cb08a727d4edfded47eddc670cbd405 # Parent c4da9505f73e98eb5cf54d1a23433a395f95aa2d Introduced various optimisation attempts. diff -r c4da9505f73e -r 34f535fe8cb0 iixr/data.py --- a/iixr/data.py Tue Jan 25 00:36:31 2011 +0100 +++ b/iixr/data.py Tue Jan 25 01:55:43 2011 +0100 @@ -3,7 +3,7 @@ """ Variable-length integer functions. -Copyright (C) 2009, 2010 Paul Boddie +Copyright (C) 2009, 2010, 2011 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -65,6 +65,16 @@ else: bytes.append(number) +def vint_from_array(bytes): + + "Read a variable-length integer from 'bytes', returning a number." + + number = 0 + while bytes: + number <<= 7 + number += bytes.pop() & 127 + return number + def string_to_array(s, bytes): "Write the given string 's' to 'bytes'." diff -r c4da9505f73e -r 34f535fe8cb0 iixr/files.py --- a/iixr/files.py Tue Jan 25 00:36:31 2011 +0100 +++ b/iixr/files.py Tue Jan 25 01:55:43 2011 +0100 @@ -3,7 +3,7 @@ """ Generic file access. -Copyright (C) 2009, 2010 Paul Boddie +Copyright (C) 2009, 2010, 2011 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -18,7 +18,7 @@ with this program. If not, see . """ -from iixr.data import vint, vint_to_array +from iixr.data import vint, vint_to_array, vint_from_array from array import array import zlib @@ -76,6 +76,15 @@ self.f.write(vint(number)) + def write_numbers(self, numbers): + + "Write 'numbers' to the file using a variable length encoding." + + output = array('B') + for number in numbers: + vint_to_array(number, output) + output.tofile(self.f) + def write_string(self, s, compress=0): """ @@ -152,7 +161,7 @@ while csd & 128: fromfile(f, 1) csd = a[-1] - return sum([((csd & 127) << (number * 7)) for (number, csd) in enumerate(a)]) + return vint_from_array(a) def read_string(self, decompress=0): @@ -184,16 +193,21 @@ def read_sequence(self, last, size, monotonic=1): if size: - expect_delta = 1 value = [] - for v in last: - v_in = self.read_number() - if monotonic or expect_delta: + if monotonic: + for v in last: + v_in = self.read_number() value.append(v + v_in) - if expect_delta and v_in != 0: - expect_delta = 0 - else: - value.append(v_in - 1) + else: + expect_delta = 1 + for v in last: + v_in = self.read_number() + if expect_delta: + value.append(v + v_in) + if v_in != 0: + expect_delta = 0 + else: + value.append(v_in - 1) return tuple(value) else: return last + self.read_number() diff -r c4da9505f73e -r 34f535fe8cb0 iixr/terms.py --- a/iixr/terms.py Tue Jan 25 00:36:31 2011 +0100 +++ b/iixr/terms.py Tue Jan 25 01:55:43 2011 +0100 @@ -52,16 +52,14 @@ self.write_string(suffix) # Write the offset delta. - - self.write_number(offset - self.last_offset) - # Write the frequency. - - self.write_number(frequency) - # Write the document frequency. - self.write_number(doc_frequency) + self.write_numbers(( + offset - self.last_offset, + frequency, + doc_frequency + )) self.last_term = term self.last_offset = offset