1 #!/usr/bin/env python 2 3 """ 4 Generic file access. 5 6 Copyright (C) 2009, 2010, 2011 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 from iixr.data import * 22 from array import array 23 import zlib 24 25 # Classes. 26 27 class File: 28 29 "A basic file abstraction." 30 31 def __init__(self, f): 32 self.f = f 33 self.record = array('B') # record buffer 34 self.data_start = None 35 36 def begin(self): 37 38 """ 39 Initialise file-wide parameters. In writers, this method may require 40 parameters to be specified. In readers, the parameters may be read from 41 the file. 42 """ 43 44 self.data_start = 0 45 46 def tell(self): 47 # NOTE: Will not be accurate within the current record. 48 return self.f.tell() 49 50 def seek(self, offset): 51 self.f.seek(offset) 52 self.reset() 53 54 def rewind(self): 55 self.seek(self.data_start) 56 57 def close(self): 58 if self.f is not None: 59 self.f.close() 60 self.f = None 61 62 class FileWriter(File): 63 64 "Writing basic data types to files." 65 66 def begin_record(self): 67 pass 68 69 def end_record(self): 70 if self.record: 71 self.f.write(vint(len(self.record))) 72 self.record.tofile(self.f) 73 self.record = array('B') 74 75 def write_remaining(self, a): 76 77 "Write remaining data from the raw array 'a'." 78 79 self.record += a 80 81 def write_byte(self, b): 82 83 "Write the given byte 'b'." 84 85 self.record.append(b) 86 87 def write_number(self, number): 88 89 "Write 'number' to the file using a variable length encoding." 90 91 vint_to_array(number, self.record) 92 93 def write_numbers(self, numbers): 94 95 "Write 'numbers' to the file using a variable length encoding." 96 97 for number in numbers: 98 vint_to_array(number, self.record) 99 100 def write_string(self, s, compress=0): 101 102 """ 103 Write 's' to the file, recording its length and compressing the string 104 if 'compress' is set to a true value. 105 """ 106 107 # Convert Unicode objects to strings. 108 109 if isinstance(s, unicode): 110 s = s.encode("utf-8") 111 112 # Compress the string if requested. 113 114 if compress: 115 cs = zlib.compress(s) 116 117 # Take any shorter than the original. 118 119 if len(cs) < len(s): 120 flag = "z" 121 s = cs 122 else: 123 flag = "-" 124 125 else: 126 flag = "" 127 128 # Write the length of the data before the data itself. 129 130 length = len(s) 131 self.record.fromstring("".join([flag, vint(length), s])) 132 133 def write_sequence_value(self, value, size): 134 sequence_to_array(value, size, self.record) 135 136 def write_sequence_values(self, values, size): 137 vint_to_array(len(values), self.record) 138 for value in values: 139 self.write_sequence_value(value, size) 140 141 def write_delta_sequence(self, values, size): 142 self.write_sequence_values( 143 convert_sequence(values, get_subtractor(size), 1), 144 size) 145 146 def write_monotonic_sequence(self, values, size): 147 self.write_sequence_values( 148 convert_sequence(values, get_monotonic_subtractor(size), 1), 149 size) 150 151 def close(self): 152 self.end_record() 153 File.close(self) 154 155 class FileReader(File): 156 157 "Reading basic data types from files." 158 159 def __init__(self, f): 160 File.__init__(self, f) 161 self.begin() 162 163 def begin_record(self): 164 self.start = 0 165 self.record = array('B') 166 try: 167 size = self.read_number_from_file() 168 self.record.fromfile(self.f, size) 169 except EOFError: 170 pass 171 172 def end_record(self): 173 pass 174 175 def read_remaining(self): 176 177 "Read remaining data as a raw array." 178 179 return self.record[self.start:] 180 181 def read_byte(self): 182 183 "Read a byte from the record." 184 185 b = self.record[self.start] 186 self.start += 1 187 return b 188 189 def read_number_from_file(self): 190 191 "Read a number from the file." 192 193 # Read each byte, adding it to the number. 194 195 a = array('B') 196 a.fromfile(self.f, 1) 197 csd = a[-1] 198 if csd < 128: 199 return csd 200 else: 201 while csd & 128: 202 a.fromfile(self.f, 1) 203 csd = a[-1] 204 return vint_from_array(a) 205 206 def read_number(self): 207 208 "Read a number from the current record." 209 210 n, self.start = vint_from_array_start(self.record, self.start) 211 return n 212 213 def read_numbers(self, n): 214 l = [] 215 i = 0 216 while i < n: 217 l.append(self.read_number()) 218 i += 1 219 return l 220 221 def read_string(self, decompress=0): 222 223 """ 224 Read a string from the current record, decompressing the stored data if 225 'decompress' is set to a true value. 226 """ 227 228 # Decompress the data if requested. 229 230 if decompress: 231 flag = chr(self.record[self.start]) 232 self.start += 1 233 else: 234 flag = "-" 235 236 length = self.read_number() 237 start = self.start 238 self.start += length 239 s = self.record[start:self.start].tostring() 240 241 # Perform decompression if applicable. 242 243 if flag == "z": 244 s = zlib.decompress(s) 245 246 # Convert strings to Unicode objects. 247 248 return unicode(s, "utf-8") 249 250 def read_sequence_value(self, size): 251 value, self.start = sequence_from_array(self.record, size, self.start) 252 return value 253 254 def read_sequences(self, size): 255 values = [] 256 length = self.read_number() 257 i = 0 258 while i < length: 259 values.append(self.read_sequence_value(size)) 260 i += 1 261 return values 262 263 def read_delta_sequence(self, size): 264 return convert_sequence(self.read_sequences(size), get_adder(size), 0) 265 266 def read_monotonic_sequence(self, size): 267 return convert_sequence(self.read_sequences(size), get_monotonic_adder(size), 0) 268 269 # vim: tabstop=4 expandtab shiftwidth=4