1.1 --- a/iixr/index.py Sun Sep 27 23:03:19 2009 +0200
1.2 +++ b/iixr/index.py Wed Sep 30 22:02:51 2009 +0200
1.3 @@ -41,10 +41,11 @@
1.4
1.5 "A container of document information."
1.6
1.7 - def __init__(self, docnum):
1.8 + def __init__(self, docnum, fields=None):
1.9 self.docnum = docnum
1.10 - self.fields = []
1.11 + self.fields = fields or []
1.12 self.terms = {}
1.13 + self.field_dict = None
1.14
1.15 def add_position(self, term, position):
1.16
1.17 @@ -70,6 +71,18 @@
1.18
1.19 self.fields = fields
1.20
1.21 + def _ensure_dict(self):
1.22 + if self.field_dict is None:
1.23 + self.field_dict = dict(self.fields)
1.24 +
1.25 + def keys(self):
1.26 + self._ensure_dict()
1.27 + return self.field_dict.keys()
1.28 +
1.29 + def __getitem__(self, key):
1.30 + self._ensure_dict()
1.31 + return self.field_dict[key]
1.32 +
1.33 class IndexWriter:
1.34
1.35 """
1.36 @@ -86,7 +99,7 @@
1.37 self.field_dict_partition = 0
1.38
1.39 self.terms = {}
1.40 - self.docs = {}
1.41 + self.docs = []
1.42
1.43 self.doc_counter = 0
1.44
1.45 @@ -100,7 +113,7 @@
1.46 for term, positions in doc.terms.items():
1.47 self.terms.setdefault(term, {})[doc.docnum] = positions
1.48
1.49 - self.docs[doc.docnum] = doc.fields
1.50 + self.docs.append((doc.docnum, doc.fields))
1.51
1.52 self.doc_counter += 1
1.53 if self.flush_interval and self.doc_counter >= self.flush_interval:
1.54 @@ -147,17 +160,16 @@
1.55
1.56 # Get the documents in order.
1.57
1.58 - docs = self.docs.items()
1.59 - docs.sort()
1.60 + self.docs.sort()
1.61
1.62 field_dict_writer = self.get_field_writer()
1.63
1.64 - for docnum, fields in docs:
1.65 + for docnum, fields in self.docs:
1.66 field_dict_writer.write_fields(docnum, fields)
1.67
1.68 field_dict_writer.close()
1.69
1.70 - self.docs = {}
1.71 + self.docs = []
1.72 self.field_dict_partition += 1
1.73
1.74 def close(self):
1.75 @@ -192,6 +204,9 @@
1.76 def get_fields(self, docnum):
1.77 return self.field_dict_reader.get_fields(docnum)
1.78
1.79 + def get_document(self, docnum):
1.80 + return Document(docnum, self.get_fields(docnum))
1.81 +
1.82 def close(self):
1.83 self.dict_reader.close()
1.84 self.field_dict_reader.close()
2.1 --- a/iixr/positions.py Sun Sep 27 23:03:19 2009 +0200
2.2 +++ b/iixr/positions.py Wed Sep 30 22:02:51 2009 +0200
2.3 @@ -73,8 +73,6 @@
2.4 to 'count'.
2.5 """
2.6
2.7 - # Duplicate the file handle.
2.8 -
2.9 f = self.open("rb")
2.10 return PositionIterator(f, offset, count)
2.11
2.12 @@ -121,8 +119,6 @@
2.13 to 'doc_frequency'.
2.14 """
2.15
2.16 - # Duplicate the file handle.
2.17 -
2.18 f = self.open("rb")
2.19 return PositionIndexIterator(f, offset, doc_frequency)
2.20