1.1 --- a/iixr/fields.py Thu Sep 17 21:35:30 2009 +0200
1.2 +++ b/iixr/fields.py Fri Sep 18 01:06:55 2009 +0200
1.3 @@ -36,7 +36,7 @@
1.4 Return the offset at which the fields are stored.
1.5 """
1.6
1.7 - offset = self.tell()
1.8 + offset = self.f.tell()
1.9
1.10 # Write the document number delta.
1.11
1.12 @@ -98,7 +98,7 @@
1.13 later documents.
1.14 """
1.15
1.16 - self.seek(offset)
1.17 + self.f.seek(offset)
1.18 bad_docnum, fields = self.read_fields()
1.19 self.last_docnum = docnum
1.20 return docnum, fields
2.1 --- a/iixr/files.py Thu Sep 17 21:35:30 2009 +0200
2.2 +++ b/iixr/files.py Fri Sep 18 01:06:55 2009 +0200
2.3 @@ -38,24 +38,11 @@
2.4 pass
2.5
2.6 def rewind(self):
2.7 - self.seek(0)
2.8 + self.f.seek(0)
2.9 self.reset()
2.10
2.11 - def seek(self, offset):
2.12 -
2.13 - "To be defined by readers."
2.14 -
2.15 - pass
2.16 -
2.17 - def flush(self):
2.18 -
2.19 - "To be defined by writers."
2.20 -
2.21 - pass
2.22 -
2.23 def close(self):
2.24 if self.f is not None:
2.25 - self.flush()
2.26 self.f.close()
2.27 self.f = None
2.28
2.29 @@ -70,7 +57,7 @@
2.30
2.31 "Write 'number' to the file using a variable length encoding."
2.32
2.33 - self.write(vint(number))
2.34 + self.f.write(vint(number))
2.35
2.36 def write_string(self, s, compress=0):
2.37
2.38 @@ -103,15 +90,7 @@
2.39 # Write the length of the data before the data itself.
2.40
2.41 length = len(s)
2.42 - self.write(flag + vint(length) + s)
2.43 -
2.44 - # Cache-affected methods.
2.45 -
2.46 - def write(self, s):
2.47 - self.f.write(s)
2.48 -
2.49 - def tell(self):
2.50 - return self.f.tell()
2.51 + self.f.write("".join([flag, vint(length), s]))
2.52
2.53 class FileReader(File):
2.54
2.55 @@ -128,7 +107,7 @@
2.56
2.57 shift = 0
2.58 number = 0
2.59 - read = self.read
2.60 + read = self.f.read
2.61
2.62 try:
2.63 csd = ord(read(1))
2.64 @@ -150,15 +129,17 @@
2.65 'decompress' is set to a true value.
2.66 """
2.67
2.68 + read = self.f.read
2.69 +
2.70 # Decompress the data if requested.
2.71
2.72 if decompress:
2.73 - flag = self.read(1)
2.74 + flag = read(1)
2.75 else:
2.76 flag = "-"
2.77
2.78 length = self.read_number()
2.79 - s = self.read(length)
2.80 + s = read(length)
2.81
2.82 # Perform decompression if applicable.
2.83
2.84 @@ -169,17 +150,6 @@
2.85
2.86 return unicode(s, "utf-8")
2.87
2.88 - # Cache-affected methods.
2.89 -
2.90 - def read(self, n):
2.91 - return self.f.read(n)
2.92 -
2.93 - def tell(self):
2.94 - return self.f.tell()
2.95 -
2.96 - def seek(self, offset):
2.97 - self.f.seek(offset)
2.98 -
2.99 class FileOpener:
2.100
2.101 "Opening files using their filenames."
3.1 --- a/iixr/positions.py Thu Sep 17 21:35:30 2009 +0200
3.2 +++ b/iixr/positions.py Fri Sep 18 01:06:55 2009 +0200
3.3 @@ -40,26 +40,27 @@
3.4
3.5 # Record the offset of this record.
3.6
3.7 - offset = self.tell()
3.8 + offset = self.f.tell()
3.9
3.10 # Make sure that the positions are sorted.
3.11
3.12 positions.sort()
3.13
3.14 + # Write the document number delta.
3.15 + # Write the number of positions.
3.16 +
3.17 + output = [docnum - self.last_docnum, len(positions)]
3.18 +
3.19 # Write the position deltas.
3.20
3.21 - output = []
3.22 + append = output.append
3.23 last = 0
3.24
3.25 for position in positions:
3.26 - output.append(vint(position - last))
3.27 + append(position - last)
3.28 last = position
3.29
3.30 - # Write the document number delta.
3.31 - # Write the number of positions.
3.32 - # Then write the positions.
3.33 -
3.34 - self.write(vint(docnum - self.last_docnum) + vint(len(positions)) + "".join(output))
3.35 + self.f.write("".join([vint(x) for x in output]))
3.36
3.37 self.last_docnum = docnum
3.38 return offset
3.39 @@ -98,27 +99,20 @@
3.40
3.41 # Record the offset of this record.
3.42
3.43 - offset = self.tell()
3.44 - output = []
3.45 + offset = self.f.tell()
3.46
3.47 # Write the document number delta.
3.48 -
3.49 - output.append(vint(docnum - self.last_docnum))
3.50 - self.last_docnum = docnum
3.51 -
3.52 # Write the position file offset delta.
3.53 -
3.54 - output.append(vint(pos_offset - self.last_pos_offset))
3.55 - self.last_pos_offset = pos_offset
3.56 -
3.57 # Write the document count.
3.58
3.59 - output.append(vint(count))
3.60 + output = [docnum - self.last_docnum, pos_offset - self.last_pos_offset, count]
3.61
3.62 # Actually write the data.
3.63
3.64 - self.write("".join(output))
3.65 + self.f.write("".join([vint(x) for x in output]))
3.66
3.67 + self.last_pos_offset = pos_offset
3.68 + self.last_docnum = docnum
3.69 return offset
3.70
3.71 class PositionIndexOpener(FileOpener):
3.72 @@ -165,7 +159,7 @@
3.73 def __init__(self, f, offset, count):
3.74 FileReader.__init__(self, f)
3.75 IteratorBase.__init__(self, count)
3.76 - self.seek(offset)
3.77 + self.f.seek(offset)
3.78
3.79 def reset(self):
3.80 self.last_docnum = 0
3.81 @@ -213,7 +207,7 @@
3.82 def __init__(self, f, offset, count):
3.83 FileReader.__init__(self, f)
3.84 IteratorBase.__init__(self, count)
3.85 - self.seek(offset)
3.86 + self.f.seek(offset)
3.87
3.88 def reset(self):
3.89 self.last_docnum = 0
3.90 @@ -384,9 +378,10 @@
3.91 # Attempt to reuse the index iterator.
3.92
3.93 if self.index_iterator is not None:
3.94 - self.index_iterator.replenish(doc_frequency)
3.95 - self.index_iterator.seek(offset)
3.96 - self.index_iterator.reset()
3.97 + ii = self.index_iterator
3.98 + ii.replenish(doc_frequency)
3.99 + ii.f.seek(offset)
3.100 + ii.reset()
3.101
3.102 # Or make a new index iterator.
3.103
3.104 @@ -534,9 +529,10 @@
3.105 # Attempt to reuse any correctly positioned iterator.
3.106
3.107 if self.iterator is not None:
3.108 - self.iterator.replenish(self.section_count)
3.109 - self.iterator.seek(self.pos_offset)
3.110 - self.iterator.reset()
3.111 + i = self.iterator
3.112 + i.replenish(self.section_count)
3.113 + i.f.seek(self.pos_offset)
3.114 + i.reset()
3.115
3.116 # Otherwise, obtain a new iterator.
3.117
4.1 --- a/iixr/terms.py Thu Sep 17 21:35:30 2009 +0200
4.2 +++ b/iixr/terms.py Fri Sep 18 01:06:55 2009 +0200
4.3 @@ -63,7 +63,7 @@
4.4 self.last_term = term
4.5 self.last_offset = offset
4.6
4.7 - return self.tell()
4.8 + return self.f.tell()
4.9
4.10 class TermReader(FileReader):
4.11
4.12 @@ -108,7 +108,7 @@
4.13 permits the scanning for later terms from the specified term.
4.14 """
4.15
4.16 - self.seek(info_offset)
4.17 + self.f.seek(info_offset)
4.18 self.last_term = term
4.19 self.last_offset = offset
4.20