1.1 --- a/iixr/fields.py Fri Oct 02 00:22:10 2009 +0200
1.2 +++ b/iixr/fields.py Sat Oct 03 03:03:32 2009 +0200
1.3 @@ -21,6 +21,8 @@
1.4 from iixr.files import *
1.5 from bisect import bisect_right # to find terms in the dictionary index
1.6
1.7 +DOCUMENT_CACHE_LIMIT = 10000
1.8 +
1.9 class FieldWriter(FileWriter):
1.10
1.11 "Writing field data to files."
1.12 @@ -176,6 +178,7 @@
1.13 self.field_reader = field_reader
1.14 self.field_index_reader = field_index_reader
1.15
1.16 + self.cache = {}
1.17 self.docs = []
1.18 try:
1.19 while 1:
1.20 @@ -219,6 +222,9 @@
1.21
1.22 "Read the fields of the document with the given 'docnum'."
1.23
1.24 + if self.cache.has_key(docnum):
1.25 + return self.cache[docnum]
1.26 +
1.27 i = bisect_right(self.docs, (docnum, self.max_offset)) - 1
1.28
1.29 # Get the entry position providing the term or one preceding it.
1.30 @@ -243,6 +249,16 @@
1.31 # If the document is found, return the fields.
1.32
1.33 if docnum == found_docnum:
1.34 +
1.35 + # Store the fields in the cache, removing entries if the limit has
1.36 + # been reached.
1.37 +
1.38 + keys = self.cache.keys()
1.39 +
1.40 + if len(keys) == DOCUMENT_CACHE_LIMIT:
1.41 + del self.cache[keys[0]]
1.42 +
1.43 + self.cache[docnum] = fields
1.44 return fields
1.45 else:
1.46 return None
2.1 --- a/iixr/files.py Fri Oct 02 00:22:10 2009 +0200
2.2 +++ b/iixr/files.py Sat Oct 03 03:03:32 2009 +0200
2.3 @@ -19,6 +19,7 @@
2.4 """
2.5
2.6 from iixr.data import vint
2.7 +from array import array
2.8 import zlib
2.9
2.10 # Constants.
2.11 @@ -105,24 +106,19 @@
2.12
2.13 # Read each byte, adding it to the number.
2.14
2.15 - read = self.f.read
2.16 + a = array('B')
2.17 + fromfile = a.fromfile
2.18 + f = self.f
2.19
2.20 - c = read(1)
2.21 - if c:
2.22 - csd = ord(c)
2.23 - if csd < 128:
2.24 - return csd
2.25 - else:
2.26 - shift = 0
2.27 - number = 0
2.28 - while csd & 128:
2.29 - number += ((csd & 127) << shift)
2.30 - shift += 7
2.31 - csd = ord(read(1))
2.32 - else:
2.33 - return number + (csd << shift)
2.34 + fromfile(f, 1)
2.35 + csd = a[-1]
2.36 + if csd < 128:
2.37 + return csd
2.38 else:
2.39 - raise EOFError
2.40 + while csd & 128:
2.41 + fromfile(f, 1)
2.42 + csd = a[-1]
2.43 + return sum([((csd & 127) << (number * 7)) for (number, csd) in enumerate(a)])
2.44
2.45 def read_string(self, decompress=0):
2.46