1.1 --- a/iixr.py Sun Sep 06 02:12:16 2009 +0200
1.2 +++ b/iixr.py Tue Sep 08 00:13:23 2009 +0200
1.3 @@ -37,7 +37,7 @@
1.4 TERM_INTERVAL = 100
1.5 DOCUMENT_INTERVAL = 100
1.6 FIELD_INTERVAL = 100
1.7 -FLUSH_INTERVAL = 1000000
1.8 +FLUSH_INTERVAL = 100000
1.9
1.10 TERM_FILENAMES = "terms", "terms_index", "positions", "positions_index"
1.11 FIELD_FILENAMES = "fields", "fields_index"
1.12 @@ -83,10 +83,10 @@
1.13 if number < 0:
1.14 raise ValueError, "Number %r is negative." % number
1.15
1.16 - # Special case: one byte containing zero.
1.17 -
1.18 - elif number == 0:
1.19 - self.f.write(chr(0))
1.20 + # Special case: one byte containing a 7-bit number.
1.21 +
1.22 + elif number < 128:
1.23 + self.f.write(chr(number))
1.24 return
1.25
1.26 # Write the number from least to most significant digits.
1.27 @@ -1527,8 +1527,7 @@
1.28 self.terms = {}
1.29 self.docs = {}
1.30
1.31 - self.position_counter = 0
1.32 - self.field_counter = 0
1.33 + self.doc_counter = 0
1.34
1.35 def add_position(self, term, docnum, position):
1.36
1.37 @@ -1549,11 +1548,6 @@
1.38
1.39 doc.append(position)
1.40
1.41 - self.position_counter += 1
1.42 - if self.flush_interval and self.position_counter >= self.flush_interval:
1.43 - self.flush_terms()
1.44 - self.position_counter = 0
1.45 -
1.46 def add_field(self, docnum, identifier, value):
1.47
1.48 """
1.49 @@ -1568,11 +1562,6 @@
1.50
1.51 doc_fields.append((identifier, unicode(value))) # convert to string
1.52
1.53 - self.field_counter += 1
1.54 - if self.flush_interval and self.field_counter >= self.flush_interval:
1.55 - self.flush_fields()
1.56 - self.field_counter = 0
1.57 -
1.58 def set_fields(self, docnum, fields):
1.59
1.60 """
1.61 @@ -1582,10 +1571,15 @@
1.62
1.63 self.docs[docnum] = fields
1.64
1.65 - self.field_counter += len(fields)
1.66 - if self.flush_interval and self.field_counter >= self.flush_interval:
1.67 + def commit_document(self):
1.68 +
1.69 + "Update the document counter, flushing terms and fields if appropriate."
1.70 +
1.71 + self.doc_counter += 1
1.72 + if self.flush_interval and self.doc_counter >= self.flush_interval:
1.73 + self.flush_terms()
1.74 self.flush_fields()
1.75 - self.field_counter = 0
1.76 + self.doc_counter = 0
1.77
1.78 def get_term_writer(self):
1.79