1.1 --- a/iixr/files.py Sat Feb 12 01:23:58 2011 +0100
1.2 +++ b/iixr/files.py Sun Feb 13 02:49:55 2011 +0100
1.3 @@ -22,10 +22,6 @@
1.4 from array import array
1.5 import zlib
1.6
1.7 -# Constants.
1.8 -
1.9 -CACHE_SIZE = 100000
1.10 -
1.11 # Classes.
1.12
1.13 class File:
1.14 @@ -35,14 +31,21 @@
1.15 def __init__(self, f):
1.16 self.f = f
1.17 self.record = array('B') # record buffer
1.18 - self.cache = array('B')
1.19 + self.data_start = None
1.20 +
1.21 + def begin(self):
1.22 +
1.23 + """
1.24 + Initialise file-wide parameters. In writers, this method may require
1.25 + parameters to be specified. In readers, the parameters may be read from
1.26 + the file.
1.27 + """
1.28 +
1.29 self.data_start = 0
1.30
1.31 - def reset(self):
1.32 -
1.33 - "To be used to reset the state of the reader or writer between records."
1.34 -
1.35 - pass
1.36 + def tell(self):
1.37 + # NOTE: Will not be accurate within the current record.
1.38 + return self.f.tell()
1.39
1.40 def seek(self, offset):
1.41 self.f.seek(offset)
1.42 @@ -60,27 +63,26 @@
1.43
1.44 "Writing basic data types to files."
1.45
1.46 - def __init__(self, f):
1.47 - File.__init__(self, f)
1.48 - self.written = 0
1.49 -
1.50 - def tell(self):
1.51 - # NOTE: Will not be accurate within the current record.
1.52 - return self.written
1.53 -
1.54 def begin_record(self):
1.55 pass
1.56
1.57 def end_record(self):
1.58 if self.record:
1.59 - length = len(self.record)
1.60 - before = len(self.cache)
1.61 - vint_to_array(length, self.cache)
1.62 - length_size = len(self.cache) - before
1.63 - self.cache += self.record
1.64 - self.written += length_size + length
1.65 + self.f.write(vint(len(self.record)))
1.66 + self.record.tofile(self.f)
1.67 self.record = array('B')
1.68 - self.flush_cache()
1.69 +
1.70 + def write_remaining(self, a):
1.71 +
1.72 + "Write remaining data from the raw array 'a'."
1.73 +
1.74 + self.record += a
1.75 +
1.76 + def write_byte(self, b):
1.77 +
1.78 + "Write the given byte 'b'."
1.79 +
1.80 + self.record.append(b)
1.81
1.82 def write_number(self, number):
1.83
1.84 @@ -137,25 +139,17 @@
1.85 self.write_sequence_value(value, size)
1.86
1.87 def write_delta_sequence(self, values, size):
1.88 - convert_sequence(values, get_subtractor(values[0]))
1.89 - self.write_sequence_values(values, size)
1.90 + self.write_sequence_values(
1.91 + convert_sequence(values, get_subtractor(size), 1),
1.92 + size)
1.93
1.94 def write_monotonic_sequence(self, values, size):
1.95 - convert_sequence(values, get_monotonic_subtractor(values[0]))
1.96 - self.write_sequence_values(values, size)
1.97 -
1.98 - def flush(self, force=0):
1.99 - self.end_record()
1.100 - self.flush_cache(force)
1.101 -
1.102 - def flush_cache(self, force=0):
1.103 - if self.f is not None:
1.104 - if force or len(self.cache) > CACHE_SIZE:
1.105 - self.cache.tofile(self.f)
1.106 - self.cache = array('B')
1.107 + self.write_sequence_values(
1.108 + convert_sequence(values, get_monotonic_subtractor(size), 1),
1.109 + size)
1.110
1.111 def close(self):
1.112 - self.flush(1)
1.113 + self.end_record()
1.114 File.close(self)
1.115
1.116 class FileReader(File):
1.117 @@ -164,58 +158,33 @@
1.118
1.119 def __init__(self, f):
1.120 File.__init__(self, f)
1.121 - self.record_start = 0
1.122 - self.record_end = 0
1.123 - self.cache_start = 0
1.124 self.begin()
1.125
1.126 - def begin(self):
1.127 -
1.128 - "Initialise file-wide parameters."
1.129 -
1.130 - pass
1.131 -
1.132 def begin_record(self):
1.133 self.start = 0
1.134 + self.record = array('B')
1.135 try:
1.136 size = self.read_number_from_file()
1.137 - self.record = self.from_cache(size)
1.138 + self.record.fromfile(self.f, size)
1.139 except EOFError:
1.140 pass
1.141
1.142 def end_record(self):
1.143 pass
1.144
1.145 - def seek(self, offset):
1.146 - from_cache_start = offset - self.cache_start
1.147 - if 0 <= from_cache_start < len(self.cache):
1.148 - self.record_start = self.record_end = from_cache_start
1.149 - else:
1.150 - self.f.seek(offset)
1.151 - self.cache = array('B')
1.152 - self.cache_start = offset
1.153 - self.record_start = self.record_end = 0
1.154 - self.reset()
1.155 + def read_remaining(self):
1.156
1.157 - def tell(self):
1.158 - return self.cache_start + self.record_start + self.start
1.159 + "Read remaining data as a raw array."
1.160 +
1.161 + return self.record[self.start:]
1.162
1.163 - def ensure_cache(self, size):
1.164 - if size > len(self.cache) - self.record_end:
1.165 - self.cache = self.cache[self.record_end:]
1.166 - self.cache_start += self.record_end
1.167 - s = self.f.read(CACHE_SIZE)
1.168 - self.cache.fromstring(s)
1.169 - self.record_start = 0
1.170 - if not s:
1.171 - raise EOFError
1.172 - else:
1.173 - self.record_start = self.record_end
1.174 - self.record_end = self.record_start + size
1.175 + def read_byte(self):
1.176 +
1.177 + "Read a byte from the record."
1.178
1.179 - def from_cache(self, size):
1.180 - self.ensure_cache(size)
1.181 - return self.cache[self.record_start:self.record_end]
1.182 + b = self.record[self.start]
1.183 + self.start += 1
1.184 + return b
1.185
1.186 def read_number_from_file(self):
1.187
1.188 @@ -224,13 +193,13 @@
1.189 # Read each byte, adding it to the number.
1.190
1.191 a = array('B')
1.192 - a += self.from_cache(1)
1.193 + a.fromfile(self.f, 1)
1.194 csd = a[-1]
1.195 if csd < 128:
1.196 return csd
1.197 else:
1.198 while csd & 128:
1.199 - a += self.from_cache(1)
1.200 + a.fromfile(self.f, 1)
1.201 csd = a[-1]
1.202 return vint_from_array(a)
1.203
1.204 @@ -292,13 +261,9 @@
1.205 return values
1.206
1.207 def read_delta_sequence(self, size):
1.208 - values = self.read_sequences(size)
1.209 - convert_sequence(values, get_adder(values[0]))
1.210 - return values
1.211 + return convert_sequence(self.read_sequences(size), get_adder(size), 0)
1.212
1.213 def read_monotonic_sequence(self, size):
1.214 - values = self.read_sequences(size)
1.215 - convert_sequence(values, get_monotonic_adder(values[0]))
1.216 - return values
1.217 + return convert_sequence(self.read_sequences(size), get_monotonic_adder(size), 0)
1.218
1.219 # vim: tabstop=4 expandtab shiftwidth=4