1 #!/usr/bin/env python 2 3 """ 4 Generic file access. 5 6 Copyright (C) 2009, 2010, 2011 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 from iixr.data import * 22 from array import array 23 import zlib 24 25 # Constants. 26 27 class File: 28 29 "A basic file abstraction." 30 31 def __init__(self, f): 32 self.f = f 33 self.data = array('B') # master buffer 34 self.record = array('B') # record buffer 35 self.data_start = 0 36 37 def reset(self): 38 39 "To be used to reset the state of the reader or writer between records." 40 41 pass 42 43 def seek(self, offset): 44 self.f.seek(offset) 45 self.reset() 46 47 def rewind(self): 48 self.f.seek(self.data_start) 49 self.reset() 50 51 def close(self): 52 if self.f is not None: 53 self.f.close() 54 self.f = None 55 56 class FileWriter(File): 57 58 "Writing basic data types to files." 59 60 def __init__(self, f): 61 File.__init__(self, f) 62 self.written = 0 63 64 def tell(self): 65 # NOTE: Will not be accurate within the current record. 66 return self.written 67 68 def begin_record(self): 69 pass 70 71 def end_record(self): 72 if self.record: 73 length = len(self.record) 74 size = vint(length) 75 self.f.write(size) 76 self.record.tofile(self.f) 77 self.written += len(size) + length 78 self.record = array('B') 79 80 def write_number(self, number): 81 82 "Write 'number' to the file using a variable length encoding." 83 84 vint_to_array(number, self.record) 85 86 def write_numbers(self, numbers): 87 88 "Write 'numbers' to the file using a variable length encoding." 89 90 for number in numbers: 91 vint_to_array(number, self.record) 92 93 def write_string(self, s, compress=0): 94 95 """ 96 Write 's' to the file, recording its length and compressing the string 97 if 'compress' is set to a true value. 98 """ 99 100 # Convert Unicode objects to strings. 101 102 if isinstance(s, unicode): 103 s = s.encode("utf-8") 104 105 # Compress the string if requested. 106 107 if compress: 108 cs = zlib.compress(s) 109 110 # Take any shorter than the original. 111 112 if len(cs) < len(s): 113 flag = "z" 114 s = cs 115 else: 116 flag = "-" 117 118 else: 119 flag = "" 120 121 # Write the length of the data before the data itself. 122 123 length = len(s) 124 self.record.fromstring("".join([flag, vint(length), s])) 125 126 def write_sequence_value(self, value, size): 127 sequence_to_array(value, size, self.record) 128 129 def write_sequence_values(self, values, size): 130 vint_to_array(len(values), self.record) 131 for value in values: 132 self.write_sequence_value(value, size) 133 134 def write_delta_sequence(self, values, size): 135 convert_sequence(values, get_subtractor(values[0])) 136 self.write_sequence_values(values, size) 137 138 def write_monotonic_sequence(self, values, size): 139 convert_sequence(values, get_monotonic_subtractor(values[0])) 140 self.write_sequence_values(values, size) 141 142 def flush(self): 143 if self.f is not None: 144 self.end_record() 145 self.data.tofile(self.f) 146 self.data = array('B') 147 148 def close(self): 149 self.flush() 150 File.close(self) 151 152 class FileReader(File): 153 154 "Reading basic data types from files." 155 156 def __init__(self, f): 157 File.__init__(self, f) 158 self.begin() 159 160 def tell(self): 161 # NOTE: Will not be accurate within the current record. 162 return self.f.tell() 163 164 def begin(self): 165 166 "Initialise file-wide parameters." 167 168 pass 169 170 def begin_record(self): 171 self.record = array('B') 172 self.start = 0 173 try: 174 size = self.read_number_from_file() 175 self.record.fromfile(self.f, size) 176 except EOFError: 177 pass 178 179 def end_record(self): 180 pass 181 182 def read_number_from_file(self): 183 184 "Read a number from the file." 185 186 # Read each byte, adding it to the number. 187 188 f = self.f 189 a = array('B') 190 fromfile = a.fromfile 191 192 fromfile(f, 1) 193 csd = a[-1] 194 if csd < 128: 195 return csd 196 else: 197 while csd & 128: 198 fromfile(f, 1) 199 csd = a[-1] 200 return vint_from_array(a) 201 202 def read_number(self): 203 204 "Read a number from the current record." 205 206 n, self.start = vint_from_array_start(self.record, self.start) 207 return n 208 209 def read_numbers(self, n): 210 l = [] 211 i = 0 212 while i < n: 213 l.append(self.read_number()) 214 i += 1 215 return l 216 217 def read_string(self, decompress=0): 218 219 """ 220 Read a string from the current record, decompressing the stored data if 221 'decompress' is set to a true value. 222 """ 223 224 # Decompress the data if requested. 225 226 if decompress: 227 flag = chr(self.record[self.start]) 228 self.start += 1 229 else: 230 flag = "-" 231 232 length = self.read_number() 233 start = self.start 234 self.start += length 235 s = self.record[start:self.start].tostring() 236 237 # Perform decompression if applicable. 238 239 if flag == "z": 240 s = zlib.decompress(s) 241 242 # Convert strings to Unicode objects. 243 244 return unicode(s, "utf-8") 245 246 def read_sequence_value(self, size): 247 value, self.start = sequence_from_array(self.record, size, self.start) 248 return value 249 250 def read_sequences(self, size): 251 values = [] 252 length = self.read_number() 253 i = 0 254 while i < length: 255 values.append(self.read_sequence_value(size)) 256 i += 1 257 return values 258 259 def read_delta_sequence(self, size): 260 values = self.read_sequences(size) 261 convert_sequence(values, get_adder(values[0])) 262 return values 263 264 def read_monotonic_sequence(self, size): 265 values = self.read_sequences(size) 266 convert_sequence(values, get_monotonic_adder(values[0])) 267 return values 268 269 # vim: tabstop=4 expandtab shiftwidth=4