1 #!/usr/bin/env python 2 3 """ 4 Generic file access. 5 6 Copyright (C) 2009, 2010, 2011 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 from iixr.data import * 22 from array import array 23 import zlib 24 25 # Constants. 26 27 CACHE_SIZE = 1000 28 29 # Classes. 30 31 class File: 32 33 "A basic file abstraction." 34 35 def __init__(self, f): 36 self.f = f 37 self.record = array('B') # record buffer 38 self.cache = array('B') 39 self.data_start = 0 40 41 def reset(self): 42 43 "To be used to reset the state of the reader or writer between records." 44 45 pass 46 47 def seek(self, offset): 48 self.f.seek(offset) 49 self.reset() 50 51 def rewind(self): 52 self.seek(self.data_start) 53 54 def close(self): 55 if self.f is not None: 56 self.f.close() 57 self.f = None 58 59 class FileWriter(File): 60 61 "Writing basic data types to files." 62 63 def __init__(self, f): 64 File.__init__(self, f) 65 self.written = 0 66 67 def tell(self): 68 # NOTE: Will not be accurate within the current record. 69 return self.written 70 71 def begin_record(self): 72 pass 73 74 def end_record(self): 75 if self.record: 76 length = len(self.record) 77 before = len(self.cache) 78 vint_to_array(length, self.cache) 79 length_size = len(self.cache) - before 80 self.cache += self.record 81 self.written += length_size + length 82 self.record = array('B') 83 self.flush() 84 85 def write_number(self, number): 86 87 "Write 'number' to the file using a variable length encoding." 88 89 vint_to_array(number, self.record) 90 91 def write_numbers(self, numbers): 92 93 "Write 'numbers' to the file using a variable length encoding." 94 95 for number in numbers: 96 vint_to_array(number, self.record) 97 98 def write_string(self, s, compress=0): 99 100 """ 101 Write 's' to the file, recording its length and compressing the string 102 if 'compress' is set to a true value. 103 """ 104 105 # Convert Unicode objects to strings. 106 107 if isinstance(s, unicode): 108 s = s.encode("utf-8") 109 110 # Compress the string if requested. 111 112 if compress: 113 cs = zlib.compress(s) 114 115 # Take any shorter than the original. 116 117 if len(cs) < len(s): 118 flag = "z" 119 s = cs 120 else: 121 flag = "-" 122 123 else: 124 flag = "" 125 126 # Write the length of the data before the data itself. 127 128 length = len(s) 129 self.record.fromstring("".join([flag, vint(length), s])) 130 131 def write_sequence_value(self, value, size): 132 sequence_to_array(value, size, self.record) 133 134 def write_sequence_values(self, values, size): 135 vint_to_array(len(values), self.record) 136 for value in values: 137 self.write_sequence_value(value, size) 138 139 def write_delta_sequence(self, values, size): 140 convert_sequence(values, get_subtractor(values[0])) 141 self.write_sequence_values(values, size) 142 143 def write_monotonic_sequence(self, values, size): 144 convert_sequence(values, get_monotonic_subtractor(values[0])) 145 self.write_sequence_values(values, size) 146 147 def flush(self, force=0): 148 if self.f is not None: 149 self.end_record() 150 if force or len(self.cache) > CACHE_SIZE: 151 self.cache.tofile(self.f) 152 self.cache = array('B') 153 154 def close(self): 155 self.flush(1) 156 File.close(self) 157 158 class FileReader(File): 159 160 "Reading basic data types from files." 161 162 def __init__(self, f): 163 File.__init__(self, f) 164 self.record_start = 0 165 self.record_end = 0 166 self.cache_start = 0 167 self.begin() 168 169 def begin(self): 170 171 "Initialise file-wide parameters." 172 173 pass 174 175 def begin_record(self): 176 self.start = 0 177 try: 178 size = self.read_number_from_file() 179 self.record = self.from_cache(size) 180 except EOFError: 181 pass 182 183 def end_record(self): 184 pass 185 186 def seek(self, offset): 187 if self.cache_start <= offset < self.cache_start + len(self.cache): 188 self.cache = self.cache[offset - self.cache_start:] 189 else: 190 self.f.seek(offset) 191 self.cache = array('B') 192 self.cache_start = offset 193 self.record_start = 0 194 self.record_end = 0 195 self.reset() 196 197 def tell(self): 198 return self.cache_start + self.record_start + self.start 199 200 def ensure_cache(self, size): 201 if size > len(self.cache) - self.record_end: 202 self.cache = self.cache[self.record_end:] 203 self.cache_start += self.record_end 204 s = self.f.read(CACHE_SIZE) 205 self.cache.fromstring(s) 206 self.record_start = 0 207 if not s: 208 raise EOFError 209 else: 210 self.record_start = self.record_end 211 self.record_end = self.record_start + size 212 213 def from_cache(self, size): 214 self.ensure_cache(size) 215 return self.cache[self.record_start:self.record_end] 216 217 def read_number_from_file(self): 218 219 "Read a number from the file." 220 221 # Read each byte, adding it to the number. 222 223 a = array('B') 224 a += self.from_cache(1) 225 csd = a[-1] 226 if csd < 128: 227 return csd 228 else: 229 while csd & 128: 230 a += self.from_cache(1) 231 csd = a[-1] 232 return vint_from_array(a) 233 234 def read_number(self): 235 236 "Read a number from the current record." 237 238 n, self.start = vint_from_array_start(self.record, self.start) 239 return n 240 241 def read_numbers(self, n): 242 l = [] 243 i = 0 244 while i < n: 245 l.append(self.read_number()) 246 i += 1 247 return l 248 249 def read_string(self, decompress=0): 250 251 """ 252 Read a string from the current record, decompressing the stored data if 253 'decompress' is set to a true value. 254 """ 255 256 # Decompress the data if requested. 257 258 if decompress: 259 flag = chr(self.record[self.start]) 260 self.start += 1 261 else: 262 flag = "-" 263 264 length = self.read_number() 265 start = self.start 266 self.start += length 267 s = self.record[start:self.start].tostring() 268 269 # Perform decompression if applicable. 270 271 if flag == "z": 272 s = zlib.decompress(s) 273 274 # Convert strings to Unicode objects. 275 276 return unicode(s, "utf-8") 277 278 def read_sequence_value(self, size): 279 value, self.start = sequence_from_array(self.record, size, self.start) 280 return value 281 282 def read_sequences(self, size): 283 values = [] 284 length = self.read_number() 285 i = 0 286 while i < length: 287 values.append(self.read_sequence_value(size)) 288 i += 1 289 return values 290 291 def read_delta_sequence(self, size): 292 values = self.read_sequences(size) 293 convert_sequence(values, get_adder(values[0])) 294 return values 295 296 def read_monotonic_sequence(self, size): 297 values = self.read_sequences(size) 298 convert_sequence(values, get_monotonic_adder(values[0])) 299 return values 300 301 # vim: tabstop=4 expandtab shiftwidth=4