# HG changeset patch # User Paul Boddie # Date 1297385168 -3600 # Node ID 74e2e30aabea30e0877bea18d74439c7ff4d8459 # Parent 158d0a4400c1bee4b1c8b5caedac20766332ea98 Introduced read and write caches in order to investigate performance changes. diff -r 158d0a4400c1 -r 74e2e30aabea iixr/files.py --- a/iixr/files.py Fri Feb 11 00:03:22 2011 +0100 +++ b/iixr/files.py Fri Feb 11 01:46:08 2011 +0100 @@ -21,10 +21,13 @@ from iixr.data import * from array import array import zlib -import sys # Constants. +CACHE_SIZE = 1000 + +# Classes. + class File: "A basic file abstraction." @@ -32,6 +35,7 @@ def __init__(self, f): self.f = f self.record = array('B') # record buffer + self.cache = array('B') self.data_start = 0 def reset(self): @@ -45,8 +49,7 @@ self.reset() def rewind(self): - self.f.seek(self.data_start) - self.reset() + self.seek(self.data_start) def close(self): if self.f is not None: @@ -71,11 +74,13 @@ def end_record(self): if self.record: length = len(self.record) - size = vint(length) - self.f.write(size) - self.record.tofile(self.f) - self.written += len(size) + length + before = len(self.cache) + vint_to_array(length, self.cache) + length_size = len(self.cache) - before + self.cache += self.record + self.written += length_size + length self.record = array('B') + self.flush() def write_number(self, number): @@ -139,12 +144,15 @@ convert_sequence(values, get_monotonic_subtractor(values[0])) self.write_sequence_values(values, size) - def flush(self): + def flush(self, force=0): if self.f is not None: self.end_record() + if force or len(self.cache) > CACHE_SIZE: + self.cache.tofile(self.f) + self.cache = array('B') def close(self): - self.flush() + self.flush(1) File.close(self) class FileReader(File): @@ -153,12 +161,11 @@ def __init__(self, f): File.__init__(self, f) + self.record_start = 0 + self.record_end = 0 + self.cache_start = 0 self.begin() - def tell(self): - # NOTE: Will not be accurate within the current record. - return self.f.tell() - def begin(self): "Initialise file-wide parameters." @@ -166,34 +173,61 @@ pass def begin_record(self): - self.record = array('B') self.start = 0 try: size = self.read_number_from_file() - self.record.fromfile(self.f, size) + self.record = self.from_cache(size) except EOFError: pass def end_record(self): pass + def seek(self, offset): + if self.cache_start <= offset < self.cache_start + len(self.cache): + self.cache = self.cache[offset - self.cache_start:] + else: + self.f.seek(offset) + self.cache = array('B') + self.cache_start = offset + self.record_start = 0 + self.record_end = 0 + self.reset() + + def tell(self): + return self.cache_start + self.record_start + self.start + + def ensure_cache(self, size): + if size > len(self.cache) - self.record_end: + self.cache = self.cache[self.record_end:] + self.cache_start += self.record_end + s = self.f.read(CACHE_SIZE) + self.cache.fromstring(s) + self.record_start = 0 + if not s: + raise EOFError + else: + self.record_start = self.record_end + self.record_end = self.record_start + size + + def from_cache(self, size): + self.ensure_cache(size) + return self.cache[self.record_start:self.record_end] + def read_number_from_file(self): "Read a number from the file." # Read each byte, adding it to the number. - f = self.f a = array('B') - fromfile = a.fromfile - - fromfile(f, 1) + a += self.from_cache(1) csd = a[-1] if csd < 128: return csd else: while csd & 128: - fromfile(f, 1) + a += self.from_cache(1) csd = a[-1] return vint_from_array(a)