1.1 --- a/iixr/fields.py Sat Nov 20 23:56:16 2010 +0100
1.2 +++ b/iixr/fields.py Sun Nov 21 20:44:43 2010 +0100
1.3 @@ -28,7 +28,8 @@
1.4 "Writing field data to files."
1.5
1.6 def reset(self):
1.7 - self.last_docnum = 0
1.8 + self.last_docnum = None
1.9 + self.docnum_size = None
1.10
1.11 def write_fields(self, docnum, fields):
1.12
1.13 @@ -37,13 +38,23 @@
1.14 representing field identifiers and values respectively).
1.15 """
1.16
1.17 + # Find the size of document number values.
1.18 +
1.19 + if self.docnum_size is None:
1.20 + self.docnum_size = self.get_value_size(docnum)
1.21 + self.last_docnum = self.get_initial_value(self.docnum_size)
1.22 +
1.23 + # Write the number of values per document number.
1.24 # Write the document number delta.
1.25
1.26 - self.write_number(docnum - self.last_docnum)
1.27 + output = array('B')
1.28 + vint_to_array(self.docnum_size, output)
1.29 + self.last_docnum = self.write_sequence(output, docnum, self.last_docnum, self.docnum_size, monotonic=0)
1.30
1.31 # Write the number of fields.
1.32
1.33 - self.write_number(len(fields))
1.34 + vint_to_array(len(fields), output)
1.35 + output.tofile(self.f)
1.36
1.37 # Write the fields themselves.
1.38
1.39 @@ -51,14 +62,12 @@
1.40 self.write_number(i)
1.41 self.write_string(field, 1) # compress
1.42
1.43 - self.last_docnum = docnum
1.44 -
1.45 class FieldReader(FileReader):
1.46
1.47 "Reading field data from files."
1.48
1.49 def reset(self):
1.50 - self.last_docnum = 0
1.51 + self.last_docnum = None
1.52
1.53 def read_fields(self):
1.54
1.55 @@ -67,9 +76,16 @@
1.56 number and a list of field (identifier, value) pairs.
1.57 """
1.58
1.59 - # Read the document number.
1.60 + # Read the number of values per document number.
1.61 +
1.62 + docnum_size = self.read_number()
1.63
1.64 - self.last_docnum += self.read_number()
1.65 + if self.last_docnum is None:
1.66 + self.last_docnum = self.get_initial_value(docnum_size)
1.67 +
1.68 + # Read the document number delta and add it to the last number.
1.69 +
1.70 + self.last_docnum = self.read_sequence(self.last_docnum, docnum_size, monotonic=0)
1.71
1.72 # Read the number of fields.
1.73
1.74 @@ -106,7 +122,8 @@
1.75 "Writing field index details to files."
1.76
1.77 def reset(self):
1.78 - self.last_docnum = 0
1.79 + self.last_docnum = None
1.80 + self.docnum_size = None
1.81 self.last_offset = 0
1.82
1.83 def write_document(self, docnum, offset):
1.84 @@ -116,12 +133,23 @@
1.85 document are stored in the fields file.
1.86 """
1.87
1.88 - # Write the document number and offset deltas.
1.89 + # Find the size of document number values.
1.90 +
1.91 + if self.docnum_size is None:
1.92 + self.docnum_size = self.get_value_size(docnum)
1.93 + self.last_docnum = self.get_initial_value(self.docnum_size)
1.94 +
1.95 + # Write the number of values per document number.
1.96 + # Write the document number delta.
1.97
1.98 - self.write_number(docnum - self.last_docnum)
1.99 + output = array('B')
1.100 + vint_to_array(self.docnum_size, output)
1.101 + self.last_docnum = self.write_sequence(output, docnum, self.last_docnum, self.docnum_size, monotonic=0)
1.102 + output.tofile(self.f)
1.103 +
1.104 + # Write the offset delta.
1.105 +
1.106 self.write_number(offset - self.last_offset)
1.107 -
1.108 - self.last_docnum = docnum
1.109 self.last_offset = offset
1.110
1.111 class FieldIndexReader(FileReader):
1.112 @@ -129,16 +157,26 @@
1.113 "Reading field index details from files."
1.114
1.115 def reset(self):
1.116 - self.last_docnum = 0
1.117 + self.last_docnum = None
1.118 self.last_offset = 0
1.119
1.120 def read_document(self):
1.121
1.122 "Read a document number and field file offset."
1.123
1.124 - # Read the document number delta and offset.
1.125 + # Read the number of values per document number.
1.126 +
1.127 + docnum_size = self.read_number()
1.128 +
1.129 + if self.last_docnum is None:
1.130 + self.last_docnum = self.get_initial_value(docnum_size)
1.131
1.132 - self.last_docnum += self.read_number()
1.133 + # Read the document number delta and add it to the last number.
1.134 +
1.135 + self.last_docnum = self.read_sequence(self.last_docnum, docnum_size, monotonic=0)
1.136 +
1.137 + # Read the offset.
1.138 +
1.139 self.last_offset += self.read_number()
1.140
1.141 return self.last_docnum, self.last_offset