1.1 --- a/iixr.py Sat Sep 12 18:48:23 2009 +0200
1.2 +++ b/iixr.py Mon Sep 14 21:23:32 2009 +0200
1.3 @@ -39,6 +39,10 @@
1.4 FIELD_INTERVAL = 100
1.5 FLUSH_INTERVAL = 10000
1.6
1.7 +WRITE_CACHE_SIZE = 100000
1.8 +READ_CACHE_SIZE = 10000
1.9 +READ_CACHE_RESIZE = 5000
1.10 +
1.11 TERM_FILENAMES = "terms", "terms_index", "positions", "positions_index"
1.12 FIELD_FILENAMES = "fields", "fields_index"
1.13
1.14 @@ -182,7 +186,7 @@
1.15 def write(self, s):
1.16 self.cache.append(s)
1.17 self.cache_length += len(s)
1.18 - if self.cache_length >= 1000:
1.19 + if self.cache_length >= WRITE_CACHE_SIZE:
1.20 self.flush()
1.21
1.22 def tell(self):
1.23 @@ -264,7 +268,7 @@
1.24 # Read the needed number of characters, if possible.
1.25
1.26 if needed > 0:
1.27 - s = self.f.read(max(needed, 1000))
1.28 + s = self.f.read(max(needed, READ_CACHE_SIZE))
1.29 self.cache += s
1.30 self.cache_length += len(s)
1.31
1.32 @@ -295,12 +299,12 @@
1.33 def _seek_cache(self, delta):
1.34 next_start = self.cache_start + delta
1.35
1.36 - if next_start >= len(self.cache):
1.37 + if next_start > 0 and next_start >= len(self.cache):
1.38 self.reset_cache()
1.39
1.40 # If the cache is too big, resize it.
1.41
1.42 - elif next_start > 1000:
1.43 + elif next_start > READ_CACHE_RESIZE:
1.44 self.cache = self.cache[next_start:]
1.45 self.cache_length = len(self.cache)
1.46 self.cache_start = 0