1 #!/usr/bin/env python 2 3 """ 4 Generic file access. 5 6 Copyright (C) 2009 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT ANY 14 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 15 PARTICULAR PURPOSE. See the GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License along 18 with this program. If not, see <http://www.gnu.org/licenses/>. 19 """ 20 21 from iixr.data import vint 22 import bz2, zlib 23 24 # Constants. 25 26 WRITE_CACHE_SIZE = 100000 27 READ_CACHE_SIZE = 10000 28 READ_CACHE_RESIZE = 5000 29 30 compressors = [("b", bz2.compress), ("z", zlib.compress)] 31 decompressors = {"b" : bz2.decompress, "z" : zlib.decompress} 32 33 class File: 34 35 "A basic file abstraction." 36 37 def __init__(self, f): 38 self.f = f 39 self.reset() 40 41 def reset(self): 42 43 "To be used to reset the state of the reader or writer between records." 44 45 pass 46 47 def rewind(self): 48 self.seek(0) 49 self.reset() 50 51 def seek(self, offset): 52 53 "To be defined by readers." 54 55 pass 56 57 def flush(self): 58 59 "To be defined by writers." 60 61 pass 62 63 def close(self): 64 if self.f is not None: 65 self.flush() 66 self.f.close() 67 self.f = None 68 69 class FileWriter(File): 70 71 "Writing basic data types to files." 72 73 def __init__(self, f): 74 File.__init__(self, f) 75 self.cache = [] 76 self.cache_length = 0 77 78 def write_number(self, number): 79 80 "Write 'number' to the file using a variable length encoding." 81 82 self.write(vint(number)) 83 84 def write_string(self, s, compress=0): 85 86 """ 87 Write 's' to the file, recording its length and compressing the string 88 if 'compress' is set to a true value. 89 """ 90 91 # Convert Unicode objects to strings. 92 93 if isinstance(s, unicode): 94 s = s.encode("utf-8") 95 96 # Compress the string if requested. 97 98 if compress: 99 for flag, fn in compressors: 100 cs = fn(s) 101 102 # Take the first string shorter than the original. 103 104 if len(cs) < len(s): 105 s = cs 106 break 107 else: 108 flag = "-" 109 110 else: 111 flag = "" 112 113 # Write the length of the data before the data itself. 114 115 length = len(s) 116 self.write(flag + vint(length) + s) 117 118 # Cache-affected methods. 119 120 def write(self, s): 121 self.cache.append(s) 122 self.cache_length += len(s) 123 if self.cache_length >= WRITE_CACHE_SIZE: 124 self.flush() 125 126 def tell(self): 127 return self.f.tell() + self.cache_length 128 129 def flush(self): 130 self.f.write("".join(self.cache)) 131 self.cache = [] 132 self.cache_length = 0 133 134 class FileReader(File): 135 136 "Reading basic data types from files." 137 138 def __init__(self, f): 139 File.__init__(self, f) 140 self.reset_cache() 141 142 def reset_cache(self): 143 self.cache = "" 144 self.cache_length = 0 145 self.cache_start = 0 146 147 def read_number(self): 148 149 "Read a number from the file." 150 151 # Read each byte, adding it to the number. 152 153 shift = 0 154 number = 0 155 read = self.read 156 157 try: 158 csd = ord(read(1)) 159 while csd & 128: 160 number += ((csd & 127) << shift) 161 shift += 7 162 csd = ord(read(1)) 163 else: 164 number += (csd << shift) 165 except TypeError: 166 raise EOFError 167 168 return number 169 170 def read_string(self, decompress=0): 171 172 """ 173 Read a string from the file, decompressing the stored data if 174 'decompress' is set to a true value. 175 """ 176 177 # Decompress the data if requested. 178 179 if decompress: 180 flag = self.read(1) 181 else: 182 flag = "-" 183 184 length = self.read_number() 185 s = self.read(length) 186 187 # Perform decompression if applicable. 188 189 if flag != "-": 190 fn = decompressors[flag] 191 s = fn(s) 192 193 # Convert strings to Unicode objects. 194 195 return unicode(s, "utf-8") 196 197 # Cache-affected methods. 198 199 def read(self, n): 200 needed = n - (self.cache_length - self.cache_start) 201 202 # Read the needed number of characters, if possible. 203 204 if needed > 0: 205 s = self.f.read(max(needed, READ_CACHE_SIZE)) 206 self.cache += s 207 self.cache_length += len(s) 208 209 # Get the end of the requested block. 210 211 next_start = self.cache_start + n 212 s = self.cache[self.cache_start:next_start] 213 214 # Reposition the pointer to the cache. 215 216 self._seek_cache(len(s)) 217 return s 218 219 def tell(self): 220 return self.f.tell() - self.cache_length + self.cache_start 221 222 def seek(self, offset): 223 current = self.tell() 224 self.f.seek(offset) 225 226 # If seeking forward, attempt to navigate the cache. 227 228 if offset >= current: 229 self._seek_cache(offset - current) 230 else: 231 self.reset_cache() 232 233 def _seek_cache(self, delta): 234 next_start = self.cache_start + delta 235 236 if next_start > 0 and next_start >= len(self.cache): 237 self.reset_cache() 238 239 # If the cache is too big, resize it. 240 241 elif next_start > READ_CACHE_RESIZE: 242 self.cache = self.cache[next_start:] 243 self.cache_length = len(self.cache) 244 self.cache_start = 0 245 246 # Otherwise, just reference the next part of the cache. 247 248 else: 249 self.cache_start = next_start 250 251 class FileOpener: 252 253 "Opening files using their filenames." 254 255 def __init__(self, filename): 256 self.filename = filename 257 258 def open(self, mode): 259 return open(self.filename, mode) 260 261 def close(self): 262 pass 263 264 # vim: tabstop=4 expandtab shiftwidth=4