1.1 --- a/iixr.py Tue Sep 08 20:29:47 2009 +0200
1.2 +++ b/iixr.py Wed Sep 09 00:29:30 2009 +0200
1.3 @@ -1311,9 +1311,11 @@
1.4 other_term, other_doc_positions, other_partition = entries[i]
1.5
1.6 # For such entries, merge the positions.
1.7 + # Since document positions should only appear in a single
1.8 + # partition, a simple update should be sufficient.
1.9
1.10 if other_term == term:
1.11 - doc_positions = self.merge_positions(doc_positions, other_doc_positions)
1.12 + doc_positions.update(other_doc_positions)
1.13 to_update.append(other_partition)
1.14 i += 1
1.15 else:
1.16 @@ -1334,23 +1336,6 @@
1.17 except EOFError:
1.18 pass
1.19
1.20 - def merge_positions(self, doc_positions, other_doc_positions):
1.21 -
1.22 - """
1.23 - Merge 'doc_positions' with 'other_doc_positions' so that common document
1.24 - records contain positions from both collections.
1.25 - """
1.26 -
1.27 - doc_position_dict = dict(doc_positions)
1.28 -
1.29 - for docnum, positions in other_doc_positions:
1.30 - if doc_position_dict.has_key(docnum):
1.31 - doc_position_dict[docnum] += positions
1.32 - else:
1.33 - doc_position_dict[docnum] = positions
1.34 -
1.35 - return doc_position_dict.items()
1.36 -
1.37 class FieldDictionaryMerger(Merger):
1.38
1.39 "Merge field files."
1.40 @@ -1375,28 +1360,11 @@
1.41 pass
1.42
1.43 # While entries are available, write them out in order, merging where
1.44 - # appropriate.
1.45 + # appropriate. Since fields from one document should only appear in a
1.46 + # single partition, only one partition will be updated at a time.
1.47
1.48 while entries:
1.49 docnum, fields, partition = entries[0]
1.50 - to_update = [partition]
1.51 -
1.52 - nentries = len(entries)
1.53 - i = 1
1.54 -
1.55 - # Find other entries for the term.
1.56 -
1.57 - while i < nentries:
1.58 - other_docnum, other_fields, other_partition = entries[i]
1.59 -
1.60 - # For such entries, merge the positions.
1.61 -
1.62 - if other_docnum == docnum:
1.63 - fields += other_fields
1.64 - to_update.append(other_partition)
1.65 - i += 1
1.66 - else:
1.67 - break
1.68
1.69 # Write the combined term details.
1.70
1.71 @@ -1404,14 +1372,13 @@
1.72
1.73 # Update the entries from the affected readers.
1.74
1.75 - del entries[:i]
1.76 -
1.77 - for partition in to_update:
1.78 - try:
1.79 - docnum, fields = self.readers[partition].read_fields()
1.80 - insort_right(entries, (docnum, fields, partition))
1.81 - except EOFError:
1.82 - pass
1.83 + del entries[0]
1.84 +
1.85 + try:
1.86 + docnum, fields = self.readers[partition].read_fields()
1.87 + insort_right(entries, (docnum, fields, partition))
1.88 + except EOFError:
1.89 + pass
1.90
1.91 # Utility functions.
1.92
1.93 @@ -1604,13 +1571,14 @@
1.94
1.95 # Get the terms in order.
1.96
1.97 - terms = self.terms.items()
1.98 + all_terms = self.terms
1.99 + terms = all_terms.keys()
1.100 terms.sort()
1.101
1.102 dict_writer = self.get_term_writer()
1.103
1.104 - for term, doc_positions in terms:
1.105 - doc_positions = doc_positions.items()
1.106 + for term in terms:
1.107 + doc_positions = all_terms[term].items()
1.108 dict_writer.write_term_positions(term, doc_positions)
1.109
1.110 dict_writer.close()