1.1 --- a/iixr.py Sun Sep 06 02:12:16 2009 +0200
1.2 +++ b/iixr.py Sun Sep 06 20:53:40 2009 +0200
1.3 @@ -203,7 +203,7 @@
1.4 def reset(self):
1.5 self.last_docnum = 0
1.6
1.7 - def write_positions(self, docnum, positions):
1.8 + def write_sorted_positions(self, docnum, positions):
1.9
1.10 """
1.11 Write for the document 'docnum' the given 'positions'.
1.12 @@ -225,10 +225,6 @@
1.13
1.14 self.write_number(len(positions))
1.15
1.16 - # Make sure that the positions are sorted.
1.17 -
1.18 - positions.sort()
1.19 -
1.20 # Write the position deltas.
1.21
1.22 last = 0
1.23 @@ -241,6 +237,18 @@
1.24
1.25 return offset
1.26
1.27 + def write_positions(self, docnum, positions):
1.28 +
1.29 + """
1.30 + Write for the document 'docnum' the given 'positions'.
1.31 + Return the offset of the written record.
1.32 + """
1.33 +
1.34 + # Make sure that the positions are sorted.
1.35 +
1.36 + positions.sort()
1.37 + return self.write_sorted_positions(docnum, positions)
1.38 +
1.39 class PositionReader(FileReader):
1.40
1.41 "Reading position information from files."
1.42 @@ -433,7 +441,7 @@
1.43 self.position_index_writer = position_index_writer
1.44 self.interval = interval
1.45
1.46 - def write_term_positions(self, doc_positions):
1.47 + def write_sorted_term_positions(self, doc_positions):
1.48
1.49 """
1.50 Write all 'doc_positions' - a collection of tuples of the form (document
1.51 @@ -460,8 +468,6 @@
1.52 first_offset = None
1.53 count = 0
1.54
1.55 - doc_positions.sort()
1.56 -
1.57 for docnum, positions in doc_positions:
1.58 pos_offset = self.position_writer.write_positions(docnum, positions)
1.59
1.60 @@ -506,6 +512,22 @@
1.61
1.62 return index_offset, frequency, len(doc_positions)
1.63
1.64 + def write_term_positions(self, doc_positions):
1.65 +
1.66 + """
1.67 + Write all 'doc_positions' - a collection of tuples of the form (document
1.68 + number, position list) - to the file.
1.69 +
1.70 + Add some records to the index, making dictionary entries.
1.71 +
1.72 + Return a tuple containing the offset of the written data, the frequency
1.73 + (number of positions), and document frequency (number of documents) for
1.74 + the term involved.
1.75 + """
1.76 +
1.77 + doc_positions.sort()
1.78 + return self.write_sorted_term_positions(doc_positions)
1.79 +
1.80 def close(self):
1.81 self.position_writer.close()
1.82 self.position_index_writer.close()