1.1 --- a/iixr.py Wed Aug 26 22:51:29 2009 +0200
1.2 +++ b/iixr.py Wed Aug 26 23:36:02 2009 +0200
1.3 @@ -356,9 +356,10 @@
1.4
1.5 "Writing term dictionaries."
1.6
1.7 - def __init__(self, info_writer, index_writer, interval):
1.8 + def __init__(self, info_writer, index_writer, position_writer, interval):
1.9 self.info_writer = info_writer
1.10 self.index_writer = index_writer
1.11 + self.position_writer = position_writer
1.12 self.interval = interval
1.13 self.entry = 0
1.14
1.15 @@ -376,17 +377,29 @@
1.16
1.17 self.entry += 1
1.18
1.19 + def write_term_positions(self, term, doc_positions):
1.20 +
1.21 + """
1.22 + Write the given 'term' and the 'doc_positions' recording the documents
1.23 + and positions at which the term is found.
1.24 + """
1.25 +
1.26 + offset = self.position_writer.write_all_positions(doc_positions)
1.27 + self.write_term(term, offset)
1.28 +
1.29 def close(self):
1.30 self.info_writer.close()
1.31 self.index_writer.close()
1.32 + self.position_writer.close()
1.33
1.34 class TermDictionaryReader:
1.35
1.36 "Reading term dictionaries."
1.37
1.38 - def __init__(self, info_reader, index_reader):
1.39 + def __init__(self, info_reader, index_reader, position_reader):
1.40 self.info_reader = info_reader
1.41 self.index_reader = index_reader
1.42 + self.position_reader = position_reader
1.43
1.44 self.terms = []
1.45 try:
1.46 @@ -436,8 +449,19 @@
1.47 else:
1.48 return None
1.49
1.50 + def find_positions(self, term):
1.51 +
1.52 + "Return the documents and positions at which the given 'term' is found."
1.53 +
1.54 + offset = self.find(term)
1.55 + if offset is None:
1.56 + return None
1.57 + else:
1.58 + return self.position_reader.read_all_positions(offset)
1.59 +
1.60 def close(self):
1.61 self.info_reader.close()
1.62 self.index_reader.close()
1.63 + self.position_reader.close()
1.64
1.65 # vim: tabstop=4 expandtab shiftwidth=4
2.1 --- a/test.py Wed Aug 26 22:51:29 2009 +0200
2.2 +++ b/test.py Wed Aug 26 23:36:02 2009 +0200
2.3 @@ -2,7 +2,7 @@
2.4
2.5 import iixr
2.6
2.7 -numbers = [12345678, 0, 1]
2.8 +numbers = [12345678, 0, 1, 127, 128, 255, 256]
2.9
2.10 f = open("test", "wb")
2.11 w = iixr.FileWriter(f)
2.12 @@ -115,24 +115,65 @@
2.13 w = iixr.TermWriter(f)
2.14 f2 = open("testI", "wb")
2.15 w2 = iixr.TermIndexWriter(f2)
2.16 -w3 = iixr.TermDictionaryWriter(w, w2, 3)
2.17 +f3 = open("testP", "wb")
2.18 +w3 = iixr.PositionWriter(f3)
2.19 +wd = iixr.TermDictionaryWriter(w, w2, w3, 3)
2.20 for term, offset in terms:
2.21 - w3.write_term(term, offset)
2.22 -w3.close()
2.23 + wd.write_term(term, offset)
2.24 +wd.close()
2.25
2.26 f = open("test", "rb")
2.27 r = iixr.TermReader(f)
2.28 f2 = open("testI", "rb")
2.29 r2 = iixr.TermIndexReader(f2)
2.30 -r3 = iixr.TermDictionaryReader(r, r2)
2.31 +f3 = open("testP", "rb")
2.32 +r3 = iixr.PositionReader(f3)
2.33 +rd = iixr.TermDictionaryReader(r, r2, r3)
2.34 terms_reversed = terms[:]
2.35 terms_reversed.reverse()
2.36 for term, offset in terms_reversed:
2.37 - o = r3.find(term)
2.38 + o = rd.find(term)
2.39 print offset == o, offset, o
2.40 for term in ("dog", "dingo"):
2.41 - o = r3.find(term)
2.42 + o = rd.find(term)
2.43 print o is None, o
2.44 -r3.close()
2.45 +rd.close()
2.46 +
2.47 +terms_with_positions = [
2.48 + ("aardvark", [(1, [2, 45, 96]), (20, [13])]),
2.49 + ("anteater", [(1, [43, 44])]),
2.50 + ("badger", [(7, [2, 22, 196]), (19, [55, 1333]), (21, [0])]),
2.51 + ("bull", [(6, [128]), (16, [12])]),
2.52 + ("bulldog", [(43, [17, 19, 256, 512])]),
2.53 + ("cat", [(123, [12, 145, 196]), (1200, [113])])
2.54 + ]
2.55 +
2.56 +f = open("test", "wb")
2.57 +w = iixr.TermWriter(f)
2.58 +f2 = open("testI", "wb")
2.59 +w2 = iixr.TermIndexWriter(f2)
2.60 +f3 = open("testP", "wb")
2.61 +w3 = iixr.PositionWriter(f3)
2.62 +wd = iixr.TermDictionaryWriter(w, w2, w3, 3)
2.63 +for term, doc_positions in terms_with_positions:
2.64 + wd.write_term_positions(term, doc_positions)
2.65 +wd.close()
2.66 +
2.67 +f = open("test", "rb")
2.68 +r = iixr.TermReader(f)
2.69 +f2 = open("testI", "rb")
2.70 +r2 = iixr.TermIndexReader(f2)
2.71 +f3 = open("testP", "rb")
2.72 +r3 = iixr.PositionReader(f3)
2.73 +rd = iixr.TermDictionaryReader(r, r2, r3)
2.74 +terms_reversed = terms_with_positions[:]
2.75 +terms_reversed.reverse()
2.76 +for term, doc_positions in terms_reversed:
2.77 + dp = rd.find_positions(term)
2.78 + print doc_positions == dp, doc_positions, dp
2.79 +for term in ("dog", "dingo"):
2.80 + dp = rd.find_positions(term)
2.81 + print dp is None, dp
2.82 +rd.close()
2.83
2.84 # vim: tabstop=4 expandtab shiftwidth=4