# HG changeset patch # User Paul Boddie # Date 1253305285 -7200 # Node ID e0fbe13922c1bc90674132a9916cf714b0ab1448 # Parent 7189d6ef001f984dd34703904850fa6e221252f6 Introduced separate vint functions for strings and byte arrays. Modified position writing to use arrays for potentially improved performance. diff -r 7189d6ef001f -r e0fbe13922c1 iixr/data.py --- a/iixr/data.py Fri Sep 18 21:07:35 2009 +0200 +++ b/iixr/data.py Fri Sep 18 22:21:25 2009 +0200 @@ -21,6 +21,7 @@ from array import array vint_cache = {} +vint_bytes_cache = {} def vint(number): @@ -30,17 +31,8 @@ return vint_cache[number] except KeyError: if number >= 0: - - # Write the number from least to most significant digits. - bytes = array('B') - - while number > 127: - bytes.append(number & 127 | 128) - number = number >> 7 - else: - bytes.append(number) - + _vint_to_array(number, bytes) return bytes.tostring() # Negative numbers are not supported. @@ -48,7 +40,35 @@ else: raise ValueError, "Number %r is negative." % number +def vint_to_array(number, bytes): + + "Write 'number' as a variable-length integer to 'bytes'." + + try: + bytes += vint_bytes_cache[number] + except KeyError: + if number >= 0: + _vint_to_array(number, bytes) + + # Negative numbers are not supported. + + else: + raise ValueError, "Number %r is negative." % number + +def _vint_to_array(number, bytes): + + "Write the 'number' to 'bytes' from least to most significant digits." + + while number > 127: + bytes.append(number & 127 | 128) + number = number >> 7 + else: + bytes.append(number) + for i in xrange(0, 65536): - vint_cache[i] = vint(i) + bytes = array('B') + _vint_to_array(i, bytes) + vint_bytes_cache[i] = bytes + vint_cache[i] = bytes.tostring() # vim: tabstop=4 expandtab shiftwidth=4 diff -r 7189d6ef001f -r e0fbe13922c1 iixr/positions.py --- a/iixr/positions.py Fri Sep 18 21:07:35 2009 +0200 +++ b/iixr/positions.py Fri Sep 18 22:21:25 2009 +0200 @@ -19,7 +19,8 @@ """ from iixr.files import * -from iixr.data import vint +from iixr.data import vint, vint_to_array +from array import array class PositionWriter(FileWriter): @@ -44,18 +45,19 @@ # Write the document number delta. # Write the number of positions. - output = [docnum - self.last_docnum, len(positions)] + output = array('B') + vint_to_array(docnum - self.last_docnum, output) + vint_to_array(len(positions), output) # Write the position deltas. - append = output.append last = 0 for position in positions: - append(position - last) + vint_to_array(position - last, output) last = position - self.f.write("".join([vint(x) for x in output])) + output.tofile(self.f) self.last_docnum = docnum