# HG changeset patch # User Paul Boddie # Date 1252708291 -7200 # Node ID f0f1799c9f4cb262d1bcf9fd4895f138214c8750 # Parent 1293ffb9e43b1d38a6ad7686fe8cd728312d39f7 Attempted to add batch writing to the FileWriter class for supposedly improved performance. diff -r 1293ffb9e43b -r f0f1799c9f4c iixr.py --- a/iixr.py Fri Sep 11 01:27:18 2009 +0200 +++ b/iixr.py Sat Sep 12 00:31:31 2009 +0200 @@ -99,6 +99,8 @@ def __init__(self, f): self.f = f self.reset() + self.cache = [] + self.cache_length = 0 def reset(self): @@ -110,8 +112,23 @@ self.f.seek(0) self.reset() + def write(self, s): + self.cache.append(s) + self.cache_length += len(s) + if len(self.cache) >= 1000: + self.flush() + + def tell(self): + return self.f.tell() + self.cache_length + + def flush(self): + self.f.write("".join(self.cache)) + self.cache = [] + self.cache_length = 0 + def close(self): if self.f is not None: + self.flush() self.f.close() self.f = None @@ -123,7 +140,7 @@ "Write 'number' to the file using a variable length encoding." - self.f.write(vint(number)) + self.write(vint(number)) def write_string(self, s, compress=0): @@ -157,7 +174,7 @@ # Write the length of the data before the data itself. length = len(s) - self.f.write(flag + vint(length) + s) + self.write(flag + vint(length) + s) class FileReader(File): @@ -247,7 +264,7 @@ # Record the offset of this record. - offset = self.f.tell() + offset = self.tell() # Make sure that the positions are sorted. @@ -266,7 +283,7 @@ # Write the number of positions. # Then write the positions. - self.f.write(vint(docnum - self.last_docnum) + vint(len(positions)) + "".join(output)) + self.write(vint(docnum - self.last_docnum) + vint(len(positions)) + "".join(output)) self.last_docnum = docnum return offset @@ -306,7 +323,7 @@ # Record the offset of this record. - offset = self.f.tell() + offset = self.tell() output = [] # Write the document number delta. @@ -325,7 +342,7 @@ # Actually write the data. - self.f.write("".join(output)) + self.write("".join(output)) return offset @@ -769,7 +786,7 @@ self.last_term = term self.last_offset = offset - return self.f.tell() + return self.tell() class TermReader(FileReader): @@ -1116,7 +1133,7 @@ Return the offset at which the fields are stored. """ - offset = self.f.tell() + offset = self.tell() # Write the document number delta.