# HG changeset patch # User Paul Boddie # Date 1253228815 -7200 # Node ID 571be37961d84e41c67e0412c114d5241ed18893 # Parent b97809e3386721adb355ae52f04561fb7ec88858 Use file methods directly. Made minor optimisations. diff -r b97809e33867 -r 571be37961d8 iixr/fields.py --- a/iixr/fields.py Thu Sep 17 21:35:30 2009 +0200 +++ b/iixr/fields.py Fri Sep 18 01:06:55 2009 +0200 @@ -36,7 +36,7 @@ Return the offset at which the fields are stored. """ - offset = self.tell() + offset = self.f.tell() # Write the document number delta. @@ -98,7 +98,7 @@ later documents. """ - self.seek(offset) + self.f.seek(offset) bad_docnum, fields = self.read_fields() self.last_docnum = docnum return docnum, fields diff -r b97809e33867 -r 571be37961d8 iixr/files.py --- a/iixr/files.py Thu Sep 17 21:35:30 2009 +0200 +++ b/iixr/files.py Fri Sep 18 01:06:55 2009 +0200 @@ -38,24 +38,11 @@ pass def rewind(self): - self.seek(0) + self.f.seek(0) self.reset() - def seek(self, offset): - - "To be defined by readers." - - pass - - def flush(self): - - "To be defined by writers." - - pass - def close(self): if self.f is not None: - self.flush() self.f.close() self.f = None @@ -70,7 +57,7 @@ "Write 'number' to the file using a variable length encoding." - self.write(vint(number)) + self.f.write(vint(number)) def write_string(self, s, compress=0): @@ -103,15 +90,7 @@ # Write the length of the data before the data itself. length = len(s) - self.write(flag + vint(length) + s) - - # Cache-affected methods. - - def write(self, s): - self.f.write(s) - - def tell(self): - return self.f.tell() + self.f.write("".join([flag, vint(length), s])) class FileReader(File): @@ -128,7 +107,7 @@ shift = 0 number = 0 - read = self.read + read = self.f.read try: csd = ord(read(1)) @@ -150,15 +129,17 @@ 'decompress' is set to a true value. """ + read = self.f.read + # Decompress the data if requested. if decompress: - flag = self.read(1) + flag = read(1) else: flag = "-" length = self.read_number() - s = self.read(length) + s = read(length) # Perform decompression if applicable. @@ -169,17 +150,6 @@ return unicode(s, "utf-8") - # Cache-affected methods. - - def read(self, n): - return self.f.read(n) - - def tell(self): - return self.f.tell() - - def seek(self, offset): - self.f.seek(offset) - class FileOpener: "Opening files using their filenames." diff -r b97809e33867 -r 571be37961d8 iixr/positions.py --- a/iixr/positions.py Thu Sep 17 21:35:30 2009 +0200 +++ b/iixr/positions.py Fri Sep 18 01:06:55 2009 +0200 @@ -40,26 +40,27 @@ # Record the offset of this record. - offset = self.tell() + offset = self.f.tell() # Make sure that the positions are sorted. positions.sort() + # Write the document number delta. + # Write the number of positions. + + output = [docnum - self.last_docnum, len(positions)] + # Write the position deltas. - output = [] + append = output.append last = 0 for position in positions: - output.append(vint(position - last)) + append(position - last) last = position - # Write the document number delta. - # Write the number of positions. - # Then write the positions. - - self.write(vint(docnum - self.last_docnum) + vint(len(positions)) + "".join(output)) + self.f.write("".join([vint(x) for x in output])) self.last_docnum = docnum return offset @@ -98,27 +99,20 @@ # Record the offset of this record. - offset = self.tell() - output = [] + offset = self.f.tell() # Write the document number delta. - - output.append(vint(docnum - self.last_docnum)) - self.last_docnum = docnum - # Write the position file offset delta. - - output.append(vint(pos_offset - self.last_pos_offset)) - self.last_pos_offset = pos_offset - # Write the document count. - output.append(vint(count)) + output = [docnum - self.last_docnum, pos_offset - self.last_pos_offset, count] # Actually write the data. - self.write("".join(output)) + self.f.write("".join([vint(x) for x in output])) + self.last_pos_offset = pos_offset + self.last_docnum = docnum return offset class PositionIndexOpener(FileOpener): @@ -165,7 +159,7 @@ def __init__(self, f, offset, count): FileReader.__init__(self, f) IteratorBase.__init__(self, count) - self.seek(offset) + self.f.seek(offset) def reset(self): self.last_docnum = 0 @@ -213,7 +207,7 @@ def __init__(self, f, offset, count): FileReader.__init__(self, f) IteratorBase.__init__(self, count) - self.seek(offset) + self.f.seek(offset) def reset(self): self.last_docnum = 0 @@ -384,9 +378,10 @@ # Attempt to reuse the index iterator. if self.index_iterator is not None: - self.index_iterator.replenish(doc_frequency) - self.index_iterator.seek(offset) - self.index_iterator.reset() + ii = self.index_iterator + ii.replenish(doc_frequency) + ii.f.seek(offset) + ii.reset() # Or make a new index iterator. @@ -534,9 +529,10 @@ # Attempt to reuse any correctly positioned iterator. if self.iterator is not None: - self.iterator.replenish(self.section_count) - self.iterator.seek(self.pos_offset) - self.iterator.reset() + i = self.iterator + i.replenish(self.section_count) + i.f.seek(self.pos_offset) + i.reset() # Otherwise, obtain a new iterator. diff -r b97809e33867 -r 571be37961d8 iixr/terms.py --- a/iixr/terms.py Thu Sep 17 21:35:30 2009 +0200 +++ b/iixr/terms.py Fri Sep 18 01:06:55 2009 +0200 @@ -63,7 +63,7 @@ self.last_term = term self.last_offset = offset - return self.tell() + return self.f.tell() class TermReader(FileReader): @@ -108,7 +108,7 @@ permits the scanning for later terms from the specified term. """ - self.seek(info_offset) + self.f.seek(info_offset) self.last_term = term self.last_offset = offset