# HG changeset patch # User Paul Boddie # Date 1254349355 -7200 # Node ID 197d3326e5284c797af9e76bc80d0e8bd9b5501f # Parent ff3800a700d52bde0b8e3f07d15b4521f2f352d9 Changed indexing interval configuration to use the Index initialiser. diff -r ff3800a700d5 -r 197d3326e528 iixr/index.py --- a/iixr/index.py Wed Sep 30 22:02:51 2009 +0200 +++ b/iixr/index.py Thu Oct 01 00:22:35 2009 +0200 @@ -215,20 +215,23 @@ "An inverted index solution encapsulating the various components." - def __init__(self, pathname): + def __init__(self, pathname, interval=TERM_INTERVAL, doc_interval=DOCUMENT_INTERVAL, field_interval=FIELD_INTERVAL, + flush_interval=FLUSH_INTERVAL): + self.pathname = pathname + self.interval = interval + self.doc_interval = doc_interval + self.field_interval = field_interval + self.flush_interval = flush_interval self.reader = None self.writer = None - def get_writer(self, interval=TERM_INTERVAL, doc_interval=DOCUMENT_INTERVAL, flush_interval=FLUSH_INTERVAL): + def get_writer(self): - """ - Return a writer, optionally using the given indexing 'interval', - 'doc_interval' and 'flush_interval'. - """ + "Return a writer." self._ensure_directory() - self.writer = IndexWriter(self.pathname, interval, doc_interval, flush_interval) + self.writer = IndexWriter(self.pathname, self.interval, self.doc_interval, self.flush_interval) return self.writer def _ensure_directory(self): @@ -288,12 +291,9 @@ self._merge_terms() self._merge_fields() - def _merge_terms(self, interval=TERM_INTERVAL, doc_interval=DOCUMENT_INTERVAL): + def _merge_terms(self): - """ - Merge term dictionaries using the given indexing 'interval' and - 'doc_interval'. - """ + "Merge term dictionaries." readers = [] partitions = self.get_term_partitions() @@ -309,7 +309,7 @@ partitions.remove("merged") partitions.add("old-merged") - writer = get_term_writer(self.pathname, "merged", interval, doc_interval) + writer = get_term_writer(self.pathname, "merged", self.interval, self.doc_interval) merger = TermDictionaryMerger(writer, readers) merger.merge() merger.close() @@ -324,9 +324,9 @@ if partition != "merged": rename_term_files(self.pathname, partition, "merged") - def _merge_fields(self, interval=FIELD_INTERVAL): + def _merge_fields(self): - "Merge field dictionaries using the given indexing 'interval'." + "Merge field dictionaries." readers = [] partitions = self.get_field_partitions() @@ -342,7 +342,7 @@ partitions.remove("merged") partitions.add("old-merged") - writer = get_field_writer(self.pathname, "merged", interval) + writer = get_field_writer(self.pathname, "merged", self.field_interval) merger = FieldDictionaryMerger(writer, readers) merger.merge() merger.close() diff -r ff3800a700d5 -r 197d3326e528 test.py --- a/test.py Wed Sep 30 22:02:51 2009 +0200 +++ b/test.py Thu Oct 01 00:22:35 2009 +0200 @@ -421,8 +421,8 @@ (["sea", "shore"], [(36, [6, 7])]) ] -index = Index("test_index") -wi = index.get_writer(3, 2, 6) +index = Index("test_index", 3, 2, 3, 6) +wi = index.get_writer() for docnum, text in docs: doc = Document(docnum) for position, term in enumerate(text.split()): @@ -465,8 +465,8 @@ # Test index updates. index = Index("test_index") -index2 = Index("test_index2") -wi = index2.get_writer(3, 2, 6) +index2 = Index("test_index2", 3, 2, 3, 6) +wi = index2.get_writer() for docnum, text in docs: # Add the same documents but with different numbers.