1.1 --- a/iixr/fields.py Thu Feb 03 01:26:35 2011 +0100
1.2 +++ b/iixr/fields.py Mon Feb 07 02:05:38 2011 +0100
1.3 @@ -3,7 +3,7 @@
1.4 """
1.5 Specific classes for storing document information.
1.6
1.7 -Copyright (C) 2009, 2010 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2009, 2010, 2011 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This program is free software; you can redistribute it and/or modify it under
1.11 the terms of the GNU General Public License as published by the Free Software
1.12 @@ -18,6 +18,7 @@
1.13 with this program. If not, see <http://www.gnu.org/licenses/>.
1.14 """
1.15
1.16 +from iixr.data import *
1.17 from iixr.files import *
1.18 from bisect import bisect_right # to find terms in the dictionary index
1.19
1.20 @@ -29,7 +30,7 @@
1.21
1.22 def reset(self):
1.23 self.last_docnum = None
1.24 - self.docnum_size = None
1.25 + self.subtractor = None
1.26
1.27 def write_fields(self, docnum, fields):
1.28
1.29 @@ -40,15 +41,17 @@
1.30
1.31 # Find the size of document number values.
1.32
1.33 - if self.docnum_size is None:
1.34 - self.docnum_size = self.get_value_size(docnum)
1.35 - self.last_docnum = self.get_initial_value(self.docnum_size)
1.36 + if self.last_docnum is not None:
1.37 + docnum_seq = self.subtractor(docnum, self.last_docnum)
1.38 + else:
1.39 + self.subtractor = get_subtractor(docnum)
1.40 + docnum_seq = docnum
1.41
1.42 - # Write the number of values per document number.
1.43 - # Write the document number delta.
1.44 + self.begin_record()
1.45
1.46 - self.write_number(self.docnum_size)
1.47 - self.last_docnum = self.write_sequence(docnum, self.last_docnum, self.docnum_size, monotonic=0)
1.48 + # Write the document number.
1.49 +
1.50 + self.write_sequence_value(docnum_seq)
1.51
1.52 # Write the number of fields.
1.53
1.54 @@ -60,12 +63,17 @@
1.55 self.write_number(i)
1.56 self.write_string(field, 1) # compress
1.57
1.58 + self.end_record()
1.59 +
1.60 + self.last_docnum = docnum
1.61 +
1.62 class FieldReader(FileReader):
1.63
1.64 "Reading field data from files."
1.65
1.66 def reset(self):
1.67 self.last_docnum = None
1.68 + self.adder = None
1.69
1.70 def read_fields(self):
1.71
1.72 @@ -74,16 +82,17 @@
1.73 number and a list of field (identifier, value) pairs.
1.74 """
1.75
1.76 - # Read the number of values per document number.
1.77 + self.begin_record()
1.78
1.79 - docnum_size = self.read_number()
1.80 + # Read the document number.
1.81 +
1.82 + docnum = self.read_sequence_value()
1.83
1.84 - if self.last_docnum is None:
1.85 - self.last_docnum = self.get_initial_value(docnum_size)
1.86 -
1.87 - # Read the document number delta and add it to the last number.
1.88 -
1.89 - self.last_docnum = self.read_sequence(self.last_docnum, docnum_size, monotonic=0)
1.90 + if self.last_docnum is not None:
1.91 + self.last_docnum = self.adder(docnum, self.last_docnum)
1.92 + else:
1.93 + self.adder = get_adder(docnum)
1.94 + self.last_docnum = docnum
1.95
1.96 # Read the number of fields.
1.97
1.98 @@ -100,6 +109,8 @@
1.99 fields.append((identifier, value))
1.100 i += 1
1.101
1.102 + self.end_record()
1.103 +
1.104 return self.last_docnum, fields
1.105
1.106 def read_document_fields(self, docnum, offset):
1.107 @@ -121,7 +132,7 @@
1.108
1.109 def reset(self):
1.110 self.last_docnum = None
1.111 - self.docnum_size = None
1.112 + self.subtractor = None
1.113 self.last_offset = 0
1.114
1.115 def write_document(self, docnum, offset):
1.116 @@ -133,19 +144,24 @@
1.117
1.118 # Find the size of document number values.
1.119
1.120 - if self.docnum_size is None:
1.121 - self.docnum_size = self.get_value_size(docnum)
1.122 - self.last_docnum = self.get_initial_value(self.docnum_size)
1.123 + if self.last_docnum is not None:
1.124 + docnum_seq = self.subtractor(docnum, self.last_docnum)
1.125 + else:
1.126 + self.subtractor = get_subtractor(docnum)
1.127 + docnum_seq = docnum
1.128
1.129 - # Write the number of values per document number.
1.130 - # Write the document number delta.
1.131 + self.begin_record()
1.132
1.133 - self.write_number(self.docnum_size)
1.134 - self.last_docnum = self.write_sequence(docnum, self.last_docnum, self.docnum_size, monotonic=0)
1.135 + # Write the document number.
1.136 +
1.137 + self.write_sequence_value(docnum_seq)
1.138
1.139 # Write the offset delta.
1.140
1.141 self.write_number(offset - self.last_offset)
1.142 + self.end_record()
1.143 +
1.144 + self.last_docnum = docnum
1.145 self.last_offset = offset
1.146
1.147 class FieldIndexReader(FileReader):
1.148 @@ -154,26 +170,29 @@
1.149
1.150 def reset(self):
1.151 self.last_docnum = None
1.152 + self.adder = None
1.153 self.last_offset = 0
1.154
1.155 def read_document(self):
1.156
1.157 "Read a document number and field file offset."
1.158
1.159 - # Read the number of values per document number.
1.160 + self.begin_record()
1.161
1.162 - docnum_size = self.read_number()
1.163 + # Read the document number.
1.164 +
1.165 + docnum = self.read_sequence_value()
1.166
1.167 - if self.last_docnum is None:
1.168 - self.last_docnum = self.get_initial_value(docnum_size)
1.169 -
1.170 - # Read the document number delta and add it to the last number.
1.171 -
1.172 - self.last_docnum = self.read_sequence(self.last_docnum, docnum_size, monotonic=0)
1.173 + if self.last_docnum is not None:
1.174 + self.last_docnum = self.adder(docnum, self.last_docnum)
1.175 + else:
1.176 + self.adder = get_adder(docnum)
1.177 + self.last_docnum = docnum
1.178
1.179 # Read the offset.
1.180
1.181 self.last_offset += self.read_number()
1.182 + self.end_record()
1.183
1.184 return self.last_docnum, self.last_offset
1.185