1.1 --- a/iixr/data.py Tue Feb 08 00:08:27 2011 +0100
1.2 +++ b/iixr/data.py Thu Feb 10 01:19:13 2011 +0100
1.3 @@ -35,14 +35,23 @@
1.4 last = current
1.5 i += 1
1.6
1.7 +def op_seq_monotonic(x, y, op):
1.8 + return tuple([op(a, b) for a, b in zip(x, y)])
1.9 +
1.10 def add_seq_monotonic(x, y):
1.11 return op_seq_monotonic(x, y, operator.add)
1.12
1.13 def sub_seq_monotonic(x, y):
1.14 return op_seq_monotonic(x, y, operator.sub)
1.15
1.16 -def op_seq_monotonic(x, y, op):
1.17 - return tuple([op(a, b) for a, b in zip(x, y)])
1.18 +def op_first_monotonic(x, y, op):
1.19 + return (op(x[0], y[0]),) + tuple(zip(x[1:], y[1:]))
1.20 +
1.21 +def add_first_monotonic(x, y):
1.22 + return op_first_monotonic(x, y, operator.add)
1.23 +
1.24 +def sub_first_monotonic(x, y):
1.25 + return op_first_monotonic(x, y, operator.sub)
1.26
1.27 def add_seq(x, y):
1.28 length = min(len(x), len(y))
1.29 @@ -72,6 +81,9 @@
1.30 def is_sequence(value):
1.31 return isinstance(value, (list, tuple))
1.32
1.33 +def sizeof(value):
1.34 + return is_sequence(value) and len(value) or 0
1.35 +
1.36 def get_monotonic_adder(value):
1.37 return is_sequence(value) and add_seq_monotonic or operator.add
1.38
1.39 @@ -176,26 +188,28 @@
1.40
1.41 # Sequence serialisation.
1.42
1.43 -def sequence_to_array(value, bytes):
1.44 +def sequence_to_array(value, size, bytes):
1.45
1.46 - "Write the given sequence 'value' to 'bytes'."
1.47 + "Write the given sequence 'value' with the given 'size' to 'bytes'."
1.48
1.49 - size = is_sequence(value) and len(value) or 0
1.50 - vint_to_array(size, bytes)
1.51 if size:
1.52 - for a in value:
1.53 - vint_to_array(a, bytes)
1.54 + i = 0
1.55 + limit = min(len(value), size)
1.56 + while i < limit:
1.57 + vint_to_array(value[i], bytes)
1.58 + i += 1
1.59 + while i < size:
1.60 + vint_to_array(0, bytes)
1.61 else:
1.62 vint_to_array(value, bytes)
1.63
1.64 -def sequence_from_array(bytes, start=0):
1.65 +def sequence_from_array(bytes, size, start=0):
1.66
1.67 """
1.68 - Read a sequence from 'bytes', returning the sequence and the first position
1.69 - after the sequence.
1.70 + Read a sequence from 'bytes' having the given 'size', returning the sequence
1.71 + and the first position after the sequence.
1.72 """
1.73
1.74 - size, start = vint_from_array_start(bytes, start)
1.75 if size:
1.76 j = 0
1.77 value = []
2.1 --- a/iixr/fields.py Tue Feb 08 00:08:27 2011 +0100
2.2 +++ b/iixr/fields.py Thu Feb 10 01:19:13 2011 +0100
2.3 @@ -28,6 +28,12 @@
2.4
2.5 "Writing field data to files."
2.6
2.7 + def begin(self, docnum_size):
2.8 + self.write_number(docnum_size)
2.9 + self.end_record()
2.10 + self.docnum_size = docnum_size
2.11 + self.data_start = self.tell()
2.12 +
2.13 def reset(self):
2.14 self.end_record()
2.15 self.last_docnum = None
2.16 @@ -50,7 +56,7 @@
2.17
2.18 # Write the document number.
2.19
2.20 - self.write_sequence_value(docnum_seq)
2.21 + self.write_sequence_value(docnum_seq, self.docnum_size)
2.22
2.23 # Write the number of fields.
2.24
2.25 @@ -68,6 +74,14 @@
2.26
2.27 "Reading field data from files."
2.28
2.29 + def begin(self):
2.30 + self.begin_record()
2.31 + try:
2.32 + self.docnum_size = self.read_number()
2.33 + except EOFError:
2.34 + self.docnum_size = 0 # NOTE: No fields!
2.35 + self.data_start = self.tell()
2.36 +
2.37 def reset(self):
2.38 self.last_docnum = None
2.39 self.adder = None
2.40 @@ -82,7 +96,7 @@
2.41
2.42 # Read the document number.
2.43
2.44 - docnum = self.read_sequence_value()
2.45 + docnum = self.read_sequence_value(self.docnum_size)
2.46
2.47 if self.last_docnum is not None:
2.48 self.last_docnum = self.adder(docnum, self.last_docnum)
2.49 @@ -120,14 +134,12 @@
2.50 self.last_docnum = docnum
2.51 return docnum, fields
2.52
2.53 -class FieldIndexWriter(FileWriter):
2.54 +class FieldIndexWriter(FieldWriter):
2.55
2.56 "Writing field index details to files."
2.57
2.58 def reset(self):
2.59 - self.end_record()
2.60 - self.last_docnum = None
2.61 - self.subtractor = None
2.62 + FieldWriter.reset(self)
2.63 self.last_offset = 0
2.64
2.65 def write_document(self, docnum, offset):
2.66 @@ -147,7 +159,7 @@
2.67
2.68 # Write the document number.
2.69
2.70 - self.write_sequence_value(docnum_seq)
2.71 + self.write_sequence_value(docnum_seq, self.docnum_size)
2.72
2.73 # Write the offset delta.
2.74
2.75 @@ -156,15 +168,13 @@
2.76 self.last_docnum = docnum
2.77 self.last_offset = offset
2.78
2.79 -class FieldIndexReader(FileReader):
2.80 +class FieldIndexReader(FieldReader):
2.81
2.82 "Reading field index details from files."
2.83
2.84 def reset(self):
2.85 - self.last_docnum = None
2.86 - self.adder = None
2.87 + FieldReader.reset(self)
2.88 self.last_offset = 0
2.89 - self.begin_record()
2.90
2.91 def read_document(self):
2.92
2.93 @@ -172,7 +182,7 @@
2.94
2.95 # Read the document number.
2.96
2.97 - docnum = self.read_sequence_value()
2.98 + docnum = self.read_sequence_value(self.docnum_size)
2.99
2.100 if self.last_docnum is not None:
2.101 self.last_docnum = self.adder(docnum, self.last_docnum)
2.102 @@ -198,7 +208,13 @@
2.103
2.104 def write_fields(self, docnum, fields):
2.105
2.106 - "Write details of the document with the given 'docnum' and 'fields'."
2.107 + "Write details of the given 'docnum' and 'fields'."
2.108 +
2.109 + if self.entry == 0:
2.110 + docnum_size = sizeof(docnum)
2.111 + self.field_writer.begin(docnum_size)
2.112 + self.field_index_writer.begin(docnum_size)
2.113 + self.field_index_writer.reset()
2.114
2.115 if self.entry % self.interval == 0:
2.116 self.field_writer.reset()
2.117 @@ -221,9 +237,13 @@
2.118 def __init__(self, field_reader, field_index_reader):
2.119 self.field_reader = field_reader
2.120 self.field_index_reader = field_index_reader
2.121 - self.entry = 0
2.122 +
2.123 + self.field_reader.reset()
2.124 + self.field_index_reader.reset()
2.125
2.126 self.cache = {}
2.127 +
2.128 + self.entry = 0
2.129 self.docs = []
2.130 try:
2.131 while 1:
3.1 --- a/iixr/files.py Tue Feb 08 00:08:27 2011 +0100
3.2 +++ b/iixr/files.py Thu Feb 10 01:19:13 2011 +0100
3.3 @@ -32,7 +32,7 @@
3.4 self.f = f
3.5 self.data = array('B') # master buffer
3.6 self.record = array('B') # record buffer
3.7 - self.reset()
3.8 + self.data_start = 0
3.9
3.10 def reset(self):
3.11
3.12 @@ -45,7 +45,7 @@
3.13 self.reset()
3.14
3.15 def rewind(self):
3.16 - self.f.seek(0)
3.17 + self.f.seek(self.data_start)
3.18 self.reset()
3.19
3.20 def close(self):
3.21 @@ -57,17 +57,24 @@
3.22
3.23 "Writing basic data types to files."
3.24
3.25 + def __init__(self, f):
3.26 + File.__init__(self, f)
3.27 + self.written = 0
3.28 +
3.29 def tell(self):
3.30 # NOTE: Will not be accurate within the current record.
3.31 - return self.f.tell() + len(self.data)
3.32 + return self.written
3.33
3.34 def begin_record(self):
3.35 pass
3.36
3.37 def end_record(self):
3.38 if self.record:
3.39 - vint_to_array(len(self.record), self.data)
3.40 - self.data += self.record
3.41 + length = len(self.record)
3.42 + size = vint(length)
3.43 + self.f.write(size)
3.44 + self.record.tofile(self.f)
3.45 + self.written += len(size) + length
3.46 self.record = array('B')
3.47
3.48 def write_number(self, number):
3.49 @@ -116,21 +123,21 @@
3.50 length = len(s)
3.51 self.record.fromstring("".join([flag, vint(length), s]))
3.52
3.53 - def write_sequence_value(self, value):
3.54 - sequence_to_array(value, self.record)
3.55 + def write_sequence_value(self, value, size):
3.56 + sequence_to_array(value, size, self.record)
3.57
3.58 - def write_sequence_values(self, values):
3.59 + def write_sequence_values(self, values, size):
3.60 vint_to_array(len(values), self.record)
3.61 for value in values:
3.62 - self.write_sequence_value(value)
3.63 + self.write_sequence_value(value, size)
3.64
3.65 - def write_delta_sequence(self, values):
3.66 + def write_delta_sequence(self, values, size):
3.67 convert_sequence(values, get_subtractor(values[0]))
3.68 - self.write_sequence_values(values)
3.69 + self.write_sequence_values(values, size)
3.70
3.71 - def write_monotonic_sequence(self, values):
3.72 + def write_monotonic_sequence(self, values, size):
3.73 convert_sequence(values, get_monotonic_subtractor(values[0]))
3.74 - self.write_sequence_values(values)
3.75 + self.write_sequence_values(values, size)
3.76
3.77 def flush(self):
3.78 if self.f is not None:
3.79 @@ -146,6 +153,20 @@
3.80
3.81 "Reading basic data types from files."
3.82
3.83 + def __init__(self, f):
3.84 + File.__init__(self, f)
3.85 + self.begin()
3.86 +
3.87 + def tell(self):
3.88 + # NOTE: Will not be accurate within the current record.
3.89 + return self.f.tell()
3.90 +
3.91 + def begin(self):
3.92 +
3.93 + "Initialise file-wide parameters."
3.94 +
3.95 + pass
3.96 +
3.97 def begin_record(self):
3.98 self.record = array('B')
3.99 self.start = 0
3.100 @@ -185,6 +206,14 @@
3.101 n, self.start = vint_from_array_start(self.record, self.start)
3.102 return n
3.103
3.104 + def read_numbers(self, n):
3.105 + l = []
3.106 + i = 0
3.107 + while i < n:
3.108 + l.append(self.read_number())
3.109 + i += 1
3.110 + return l
3.111 +
3.112 def read_string(self, decompress=0):
3.113
3.114 """
3.115 @@ -214,26 +243,26 @@
3.116
3.117 return unicode(s, "utf-8")
3.118
3.119 - def read_sequence_value(self):
3.120 - value, self.start = sequence_from_array(self.record, self.start)
3.121 + def read_sequence_value(self, size):
3.122 + value, self.start = sequence_from_array(self.record, size, self.start)
3.123 return value
3.124
3.125 - def read_sequences(self):
3.126 + def read_sequences(self, size):
3.127 values = []
3.128 length = self.read_number()
3.129 i = 0
3.130 while i < length:
3.131 - values.append(self.read_sequence_value())
3.132 + values.append(self.read_sequence_value(size))
3.133 i += 1
3.134 return values
3.135
3.136 - def read_delta_sequence(self):
3.137 - values = self.read_sequences()
3.138 + def read_delta_sequence(self, size):
3.139 + values = self.read_sequences(size)
3.140 convert_sequence(values, get_adder(values[0]))
3.141 return values
3.142
3.143 - def read_monotonic_sequence(self):
3.144 - values = self.read_sequences()
3.145 + def read_monotonic_sequence(self, size):
3.146 + values = self.read_sequences(size)
3.147 convert_sequence(values, get_monotonic_adder(values[0]))
3.148 return values
3.149
4.1 --- a/iixr/index.py Tue Feb 08 00:08:27 2011 +0100
4.2 +++ b/iixr/index.py Thu Feb 10 01:19:13 2011 +0100
4.3 @@ -171,10 +171,8 @@
4.4 self.docs.sort()
4.5
4.6 field_dict_writer = self.get_field_writer()
4.7 -
4.8 for docnum, fields in self.docs:
4.9 field_dict_writer.write_fields(docnum, fields)
4.10 -
4.11 field_dict_writer.close()
4.12
4.13 self.docs = []
5.1 --- a/iixr/positions.py Tue Feb 08 00:08:27 2011 +0100
5.2 +++ b/iixr/positions.py Thu Feb 10 01:19:13 2011 +0100
5.3 @@ -25,6 +25,13 @@
5.4
5.5 "Writing position information to files."
5.6
5.7 + def begin(self, docnum_size, position_size):
5.8 + self.write_numbers((docnum_size, position_size))
5.9 + self.end_record()
5.10 + self.data_start = self.tell()
5.11 + self.docnum_size = docnum_size
5.12 + self.position_size = position_size
5.13 +
5.14 def reset(self):
5.15 self.end_record()
5.16 self.last_docnum = None
5.17 @@ -57,8 +64,8 @@
5.18 self.subtractor = get_subtractor(docnum)
5.19 docnum_seq = docnum
5.20
5.21 - self.write_sequence_value(docnum_seq)
5.22 - self.write_monotonic_sequence(positions)
5.23 + self.write_sequence_value(docnum_seq, self.docnum_size)
5.24 + self.write_monotonic_sequence(positions, self.position_size)
5.25
5.26 self.last_docnum = docnum
5.27
5.28 @@ -66,6 +73,14 @@
5.29
5.30 "Reading position information within term-specific regions of a file."
5.31
5.32 + def begin(self):
5.33 + self.begin_record()
5.34 + try:
5.35 + self.docnum_size, self.position_size = self.read_numbers(2)
5.36 + except EOFError:
5.37 + self.docnum_size, self.position_size = 0, 0 # NOTE: No positions!
5.38 + self.data_start = self.tell()
5.39 +
5.40 def reset(self):
5.41 self.last_docnum = None
5.42 self.adder = None
5.43 @@ -79,7 +94,7 @@
5.44
5.45 # Read the document number.
5.46
5.47 - docnum = self.read_sequence_value()
5.48 + docnum = self.read_sequence_value(self.docnum_size)
5.49
5.50 # Calculate an ongoing delta.
5.51
5.52 @@ -92,18 +107,19 @@
5.53 self.adder = get_adder(docnum)
5.54 self.last_docnum = docnum
5.55
5.56 - positions = self.read_monotonic_sequence()
5.57 + positions = self.read_monotonic_sequence(self.position_size)
5.58
5.59 return self.last_docnum, positions
5.60
5.61 -class PositionIndexWriter(FileWriter):
5.62 +class PositionIndexWriter(PositionWriter):
5.63
5.64 "Writing position index information to files."
5.65
5.66 + def begin(self, docnum_size):
5.67 + PositionWriter.begin(self, docnum_size, 0)
5.68 +
5.69 def reset(self):
5.70 - self.end_record()
5.71 - self.last_docnum = None
5.72 - self.subtractor = None
5.73 + PositionWriter.reset(self)
5.74 self.last_pos_offset = 0
5.75
5.76 def write_positions(self, docnum, pos_offset, count):
5.77 @@ -121,22 +137,20 @@
5.78 self.subtractor = get_subtractor(docnum)
5.79 docnum_seq = docnum
5.80
5.81 - self.write_sequence_value(docnum_seq)
5.82 + self.write_sequence_value(docnum_seq, self.docnum_size)
5.83 self.write_number(pos_offset - self.last_pos_offset)
5.84 self.write_number(count)
5.85
5.86 self.last_docnum = docnum
5.87 self.last_pos_offset = pos_offset
5.88
5.89 -class PositionIndexReader(FileReader):
5.90 +class PositionIndexReader(PositionReader):
5.91
5.92 "Reading position index information within term-specific regions of a file."
5.93
5.94 def reset(self):
5.95 - self.last_docnum = None
5.96 - self.adder = None
5.97 + PositionReader.reset(self)
5.98 self.last_pos_offset = 0
5.99 - self.begin_record()
5.100
5.101 def read_positions(self):
5.102
5.103 @@ -147,7 +161,7 @@
5.104
5.105 # Read the document number.
5.106
5.107 - docnum = self.read_sequence_value()
5.108 + docnum = self.read_sequence_value(self.docnum_size)
5.109
5.110 if self.last_docnum is not None:
5.111 self.last_docnum = self.adder(docnum, self.last_docnum)
5.112 @@ -295,28 +309,38 @@
5.113 the term involved.
5.114 """
5.115
5.116 - # Reset the writers.
5.117 -
5.118 - self.position_writer.reset()
5.119 - self.position_index_writer.reset()
5.120 -
5.121 - # Remember the first index entry offset.
5.122 -
5.123 - index_offset = self.position_index_writer.tell()
5.124 -
5.125 # Write the positions.
5.126
5.127 frequency = 0
5.128 count = 0
5.129
5.130 if doc_positions:
5.131 + doc_positions.sort()
5.132 +
5.133 + # Look ahead at the first document record.
5.134 + # NOTE: Any iterator would need to support this.
5.135 +
5.136 + first_docnum, first_positions = doc_positions[0]
5.137 + first_position = first_positions[0]
5.138 +
5.139 + # Write out size details.
5.140 +
5.141 + docnum_size, position_size = sizeof(first_docnum), sizeof(first_position)
5.142 + self.position_writer.begin(docnum_size, position_size)
5.143 + self.position_index_writer.begin(docnum_size)
5.144 +
5.145 + # Reset the writers.
5.146 +
5.147 + self.position_writer.reset()
5.148 + self.position_index_writer.reset()
5.149 +
5.150 + # Remember the first index entry offset.
5.151 +
5.152 + index_offset = self.position_index_writer.tell()
5.153
5.154 # Retain the first record offset for a subsequent index entry.
5.155
5.156 first_offset = self.position_writer.tell()
5.157 - first_docnum = None
5.158 -
5.159 - doc_positions.sort()
5.160
5.161 for docnum, positions in doc_positions:
5.162 if first_docnum is None:
6.1 --- a/iixr/terms.py Tue Feb 08 00:08:27 2011 +0100
6.2 +++ b/iixr/terms.py Thu Feb 10 01:19:13 2011 +0100
6.3 @@ -172,6 +172,8 @@
6.4 self.interval = interval
6.5 self.entry = 0
6.6
6.7 + self.index_writer.reset()
6.8 +
6.9 def _write_term(self, term, offset, frequency, doc_frequency):
6.10
6.11 """
6.12 @@ -216,8 +218,11 @@
6.13 self.info_reader = info_reader
6.14 self.index_reader = index_reader
6.15 self.position_dict_reader = position_dict_reader
6.16 +
6.17 + self.info_reader.reset()
6.18 + self.index_reader.reset()
6.19 +
6.20 self.entry = 0
6.21 -
6.22 self.terms = []
6.23 try:
6.24 while 1:
7.1 --- a/itermerge.py Tue Feb 08 00:08:27 2011 +0100
7.2 +++ b/itermerge.py Thu Feb 10 01:19:13 2011 +0100
7.3 @@ -29,6 +29,7 @@
7.4
7.5 def __init__(self, sequences):
7.6 self.iters = []
7.7 + self.first = None
7.8
7.9 # Prepare the underlying iterators.
7.10
7.11 @@ -43,6 +44,15 @@
7.12 next = iterator.next
7.13 self._add_next(next)
7.14
7.15 + def __getitem__(self, i):
7.16 + if i == 0:
7.17 + if self.first is None:
7.18 + value, next = self.iters[0]
7.19 + self.first = value
7.20 + return self.first
7.21 + else:
7.22 + raise IndexError, "Index %d cannot be accessed in this iterator." % i
7.23 +
7.24 def sort(self):
7.25 pass # The output should be sorted.
7.26
8.1 --- a/test.py Tue Feb 08 00:08:27 2011 +0100
8.2 +++ b/test.py Thu Feb 10 01:19:13 2011 +0100
8.3 @@ -52,14 +52,14 @@
8.4 f = open("testMS", "wb")
8.5 w = FileWriter(f)
8.6 w.begin_record()
8.7 -w.write_monotonic_sequence(tuples)
8.8 +w.write_monotonic_sequence(tuples, 2)
8.9 w.end_record()
8.10 w.close()
8.11
8.12 f = open("testMS", "rb")
8.13 r = FileReader(f)
8.14 r.begin_record()
8.15 -for t, t2 in zip(r.read_monotonic_sequence(), tuples):
8.16 +for t, t2 in zip(r.read_monotonic_sequence(2), tuples):
8.17 print t == t2, t, t2
8.18 r.end_record()
8.19 r.close()
8.20 @@ -69,14 +69,14 @@
8.21 f = open("testNMS", "wb")
8.22 w = FileWriter(f)
8.23 w.begin_record()
8.24 -w.write_delta_sequence(tuples2)
8.25 +w.write_delta_sequence(tuples2, 2)
8.26 w.end_record()
8.27 w.close()
8.28
8.29 f = open("testNMS", "rb")
8.30 r = FileReader(f)
8.31 r.begin_record()
8.32 -for t, t2 in zip(r.read_delta_sequence(), tuples2):
8.33 +for t, t2 in zip(r.read_delta_sequence(2), tuples2):
8.34 print t == t2, t, t2
8.35 r.end_record()
8.36 r.close()
8.37 @@ -99,20 +99,21 @@
8.38
8.39 f = open("testP", "wb")
8.40 w = PositionWriter(f)
8.41 +w.begin(0, 0)
8.42 for doc_positions in all_doc_positions:
8.43 + w.reset()
8.44 for docnum, positions in doc_positions:
8.45 w.write_positions(docnum, positions)
8.46 - w.reset()
8.47 w.close()
8.48
8.49 f = open("testP", "rb")
8.50 r = PositionReader(f)
8.51 for doc_positions in all_doc_positions:
8.52 + r.reset()
8.53 for docnum, positions in doc_positions:
8.54 d, p = r.read_positions()
8.55 print docnum == d, docnum, d
8.56 print positions == p, positions, p
8.57 - r.reset()
8.58 r.close()
8.59
8.60 all_doc_positions_seq = [
8.61 @@ -131,20 +132,21 @@
8.62
8.63 f = open("testP2", "wb")
8.64 w = PositionWriter(f)
8.65 +w.begin(2, 2)
8.66 for doc_positions in all_doc_positions_seq:
8.67 + w.reset()
8.68 for docnum, positions in doc_positions:
8.69 w.write_positions(docnum, positions)
8.70 - w.reset()
8.71 w.close()
8.72
8.73 f = open("testP2", "rb")
8.74 r = PositionReader(f)
8.75 for doc_positions in all_doc_positions_seq:
8.76 + r.reset()
8.77 for docnum, positions in doc_positions:
8.78 d, p = r.read_positions()
8.79 print docnum == d, docnum, d
8.80 print positions == p, positions, p
8.81 - r.reset()
8.82 r.close()
8.83
8.84 print "- Test position index files."
8.85 @@ -163,6 +165,7 @@
8.86 offsets = []
8.87 f = open("testPI", "wb")
8.88 w = PositionIndexWriter(f)
8.89 +w.begin(0)
8.90 for term_positions in indexed_positions:
8.91 offset = None
8.92 doc_frequency = 0
8.93 @@ -224,12 +227,15 @@
8.94
8.95 f = open("testF", "wb")
8.96 w = FieldWriter(f)
8.97 +w.begin(0)
8.98 +w.reset()
8.99 for docnum, fields in doc_fields:
8.100 w.write_fields(docnum, list(enumerate(fields)))
8.101 w.close()
8.102
8.103 f = open("testF", "rb")
8.104 r = FieldReader(f)
8.105 +r.reset()
8.106 for docnum, fields in doc_fields:
8.107 dn, df = r.read_fields()
8.108 print docnum == dn, docnum, dn
8.109 @@ -246,12 +252,15 @@
8.110
8.111 f = open("testFI", "wb")
8.112 w = FieldIndexWriter(f)
8.113 +w.begin(0)
8.114 +w.reset()
8.115 for docnum, offset in indexed_docs:
8.116 w.write_document(docnum, offset)
8.117 w.close()
8.118
8.119 f = open("testFI", "rb")
8.120 r = FieldIndexReader(f)
8.121 +r.reset()
8.122 for docnum, offset in indexed_docs:
8.123 dn, o = r.read_document()
8.124 print docnum == dn, docnum, dn
8.125 @@ -306,12 +315,14 @@
8.126
8.127 f = open("test", "wb")
8.128 w = TermWriter(f)
8.129 +w.reset()
8.130 for term, offset, frequency, doc_frequency in terms:
8.131 w.write_term(term, offset, frequency, doc_frequency)
8.132 w.close()
8.133
8.134 f = open("test", "rb")
8.135 r = TermReader(f)
8.136 +r.reset()
8.137 for term, offset, frequency, doc_frequency in terms:
8.138 t, o, fr, df = r.read_term()
8.139 print term == t, term, t
8.140 @@ -334,12 +345,14 @@
8.141
8.142 f = open("test", "wb")
8.143 w = TermIndexWriter(f)
8.144 +w.reset()
8.145 for term, offset, frequency, doc_frequency, info_offset in indexed_terms:
8.146 w.write_term(term, offset, frequency, doc_frequency, info_offset)
8.147 w.close()
8.148
8.149 f = open("test", "rb")
8.150 r = TermIndexReader(f)
8.151 +r.reset()
8.152 for term, offset, frequency, doc_frequency, info_offset in indexed_terms:
8.153 t, o, fr, df, i = r.read_term()
8.154 print term == t, term, t