# HG changeset patch # User Paul Boddie # Date 1252956212 -7200 # Node ID a0f37b0ef3502b8fc3b5715ce1f11f4242036516 # Parent 76bd0bcfa998cf54b8de031b82ff1db309699442 Added constants for various measures. Prevented unnecessary read cache resets where the cache offset is zero. diff -r 76bd0bcfa998 -r a0f37b0ef350 iixr.py --- a/iixr.py Sat Sep 12 18:48:23 2009 +0200 +++ b/iixr.py Mon Sep 14 21:23:32 2009 +0200 @@ -39,6 +39,10 @@ FIELD_INTERVAL = 100 FLUSH_INTERVAL = 10000 +WRITE_CACHE_SIZE = 100000 +READ_CACHE_SIZE = 10000 +READ_CACHE_RESIZE = 5000 + TERM_FILENAMES = "terms", "terms_index", "positions", "positions_index" FIELD_FILENAMES = "fields", "fields_index" @@ -182,7 +186,7 @@ def write(self, s): self.cache.append(s) self.cache_length += len(s) - if self.cache_length >= 1000: + if self.cache_length >= WRITE_CACHE_SIZE: self.flush() def tell(self): @@ -264,7 +268,7 @@ # Read the needed number of characters, if possible. if needed > 0: - s = self.f.read(max(needed, 1000)) + s = self.f.read(max(needed, READ_CACHE_SIZE)) self.cache += s self.cache_length += len(s) @@ -295,12 +299,12 @@ def _seek_cache(self, delta): next_start = self.cache_start + delta - if next_start >= len(self.cache): + if next_start > 0 and next_start >= len(self.cache): self.reset_cache() # If the cache is too big, resize it. - elif next_start > 1000: + elif next_start > READ_CACHE_RESIZE: self.cache = self.cache[next_start:] self.cache_length = len(self.cache) self.cache_start = 0