1 #!/usr/bin/env python 2 3 """ 4 Generic file access. 5 6 Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 from iixr.data import vint 22 import bz2, zlib 23 24 # Constants. 25 26 WRITE_CACHE_SIZE = 100000 27 READ_CACHE_SIZE = 10000 28 READ_CACHE_RESIZE = 5000 29 30 compressors = [("b", bz2.compress), ("z", zlib.compress)] 31 decompressors = {"b" : bz2.decompress, "z" : zlib.decompress} 32 33 class File: 34 35 "A basic file abstraction." 36 37 def __init__(self, f): 38 self.f = f 39 self.reset() 40 41 def reset(self): 42 43 "To be used to reset the state of the reader or writer between records." 44 45 pass 46 47 def rewind(self): 48 self.seek(0) 49 self.reset() 50 51 def seek(self, offset): 52 53 "To be defined by readers." 54 55 pass 56 57 def flush(self): 58 59 "To be defined by writers." 60 61 pass 62 63 def close(self): 64 if self.f is not None: 65 self.flush() 66 self.f.close() 67 self.f = None 68 69 class FileWriter(File): 70 71 "Writing basic data types to files." 72 73 def __init__(self, f): 74 File.__init__(self, f) 75 self.cache = [] 76 self.cache_length = 0 77 78 def write_number(self, number): 79 80 "Write 'number' to the file using a variable length encoding." 81 82 self.write(vint(number)) 83 84 def write_string(self, s, compress=0): 85 86 """ 87 Write 's' to the file, recording its length and compressing the string 88 if 'compress' is set to a true value. 89 """ 90 91 # Convert Unicode objects to strings. 92 93 if isinstance(s, unicode): 94 s = s.encode("utf-8") 95 96 # Compress the string if requested. 97 98 if compress: 99 for flag, fn in compressors: 100 cs = fn(s) 101 102 # Take the first string shorter than the original. 103 104 if len(cs) < len(s): 105 s = cs 106 break 107 else: 108 flag = "-" 109 110 else: 111 flag = "" 112 113 # Write the length of the data before the data itself. 114 115 length = len(s) 116 self.write(flag + vint(length) + s) 117 118 # Cache-affected methods. 119 120 def write(self, s): 121 self.cache.append(s) 122 self.cache_length += len(s) 123 if self.cache_length >= WRITE_CACHE_SIZE: 124 self.flush() 125 126 def tell(self): 127 return self.f.tell() + self.cache_length 128 129 def flush(self): 130 self.f.write("".join(self.cache)) 131 self.cache = [] 132 self.cache_length = 0 133 134 class FileReader(File): 135 136 "Reading basic data types from files." 137 138 def __init__(self, f): 139 File.__init__(self, f) 140 self.reset_cache(0) 141 142 def reset_cache(self, offset): 143 self.cache = "" 144 self.cache_length = 0 145 self.cache_start = 0 146 self.cache_offset = offset 147 self.f.seek(offset) 148 149 def resize_cache(self, next_start): 150 self.cache = self.cache[next_start:] 151 self.cache_length = len(self.cache) 152 self.cache_start = 0 153 self.cache_offset += next_start 154 155 def read_number(self): 156 157 "Read a number from the file." 158 159 # Read each byte, adding it to the number. 160 161 shift = 0 162 number = 0 163 read = self.read 164 165 try: 166 csd = ord(read(1)) 167 while csd & 128: 168 number += ((csd & 127) << shift) 169 shift += 7 170 csd = ord(read(1)) 171 else: 172 number += (csd << shift) 173 except TypeError: 174 raise EOFError 175 176 return number 177 178 def read_string(self, decompress=0): 179 180 """ 181 Read a string from the file, decompressing the stored data if 182 'decompress' is set to a true value. 183 """ 184 185 # Decompress the data if requested. 186 187 if decompress: 188 flag = self.read(1) 189 else: 190 flag = "-" 191 192 length = self.read_number() 193 s = self.read(length) 194 195 # Perform decompression if applicable. 196 197 if flag != "-": 198 fn = decompressors[flag] 199 s = fn(s) 200 201 # Convert strings to Unicode objects. 202 203 return unicode(s, "utf-8") 204 205 # Cache-affected methods. 206 207 def read(self, n): 208 needed = n - (self.cache_length - self.cache_start) 209 210 # Read the needed number of characters, if possible. 211 212 if needed > 0: 213 s = self.f.read(max(needed, READ_CACHE_SIZE)) 214 self.cache += s 215 self.cache_length += len(s) 216 217 # Get the end of the requested block. 218 219 next_start = self.cache_start + n 220 s = self.cache[self.cache_start:next_start] 221 222 # Reposition the pointer to the cache. 223 224 self._seek_cache(len(s)) 225 return s 226 227 def tell(self): 228 return self.cache_offset + self.cache_start 229 230 def seek(self, offset): 231 current = self.tell() 232 233 # If seeking forward, attempt to navigate the cache. 234 235 if offset >= current: 236 self._seek_cache(offset - current) 237 else: 238 self.reset_cache(offset) 239 240 def _seek_cache(self, delta): 241 next_start = self.cache_start + delta 242 243 if next_start > 0 and next_start >= len(self.cache): 244 self.reset_cache(self.cache_offset + next_start) 245 246 # If the cache is too big, resize it. 247 248 elif next_start > READ_CACHE_RESIZE: 249 self.resize_cache(next_start) 250 251 # Otherwise, just reference the next part of the cache. 252 253 else: 254 self.cache_start = next_start 255 256 class FileOpener: 257 258 "Opening files using their filenames." 259 260 def __init__(self, filename): 261 self.filename = filename 262 263 def open(self, mode): 264 return open(self.filename, mode) 265 266 def close(self): 267 pass 268 269 # vim: tabstop=4 expandtab shiftwidth=4