1.1 --- a/test.py Sat Feb 12 01:23:58 2011 +0100
1.2 +++ b/test.py Sun Feb 13 02:49:55 2011 +0100
1.3 @@ -1,22 +1,21 @@
1.4 #!/usr/bin/env python
1.5 +# encoding: iso-8859-1
1.6
1.7 from iixr.files import *
1.8 -from iixr.fields import *
1.9 from iixr.terms import *
1.10 -from iixr.positions import *
1.11 from iixr.index import *
1.12 import os, sys
1.13
1.14 # Remove old test files.
1.15
1.16 -for filename in ("test", "testMS", "testNMS", "testF", "testFI", "testI", "testP", "testP2", "testPI"):
1.17 +for filename in ("test", "testMS", "testNMS", "testP", "testP2"):
1.18 try:
1.19 os.remove(filename)
1.20 except OSError:
1.21 pass
1.22
1.23 try:
1.24 - for dirname in ("test_index", "test_index2", "test_index3", "test_indexT"):
1.25 + for dirname in ("test_index",):
1.26 for filename in os.listdir(dirname):
1.27 os.remove(os.path.join(dirname, filename))
1.28 os.rmdir(dirname)
1.29 @@ -98,22 +97,20 @@
1.30 ]
1.31
1.32 f = open("testP", "wb")
1.33 -w = PositionWriter(f)
1.34 +w = TermWriter(f)
1.35 w.begin(0, 0)
1.36 for doc_positions in all_doc_positions:
1.37 - w.reset()
1.38 - for docnum, positions in doc_positions:
1.39 - w.write_positions(docnum, positions)
1.40 + w.write_positions(doc_positions)
1.41 + w.end_record()
1.42 w.close()
1.43
1.44 f = open("testP", "rb")
1.45 -r = PositionReader(f)
1.46 +r = TermReader(f)
1.47 for doc_positions in all_doc_positions:
1.48 - r.reset()
1.49 - for docnum, positions in doc_positions:
1.50 - d, p = r.read_positions()
1.51 - print docnum == d, docnum, d
1.52 - print positions == p, positions, p
1.53 + r.begin_record()
1.54 + dp = r.read_positions()
1.55 + print doc_positions == dp, doc_positions
1.56 + print " ", dp
1.57 r.close()
1.58
1.59 all_doc_positions_seq = [
1.60 @@ -131,350 +128,56 @@
1.61 ]
1.62
1.63 f = open("testP2", "wb")
1.64 -w = PositionWriter(f)
1.65 +w = TermWriter(f)
1.66 w.begin(2, 2)
1.67 for doc_positions in all_doc_positions_seq:
1.68 - w.reset()
1.69 - for docnum, positions in doc_positions:
1.70 - w.write_positions(docnum, positions)
1.71 + w.write_positions(doc_positions)
1.72 + w.end_record()
1.73 w.close()
1.74
1.75 f = open("testP2", "rb")
1.76 -r = PositionReader(f)
1.77 +r = TermReader(f)
1.78 for doc_positions in all_doc_positions_seq:
1.79 - r.reset()
1.80 - for docnum, positions in doc_positions:
1.81 - d, p = r.read_positions()
1.82 - print docnum == d, docnum, d
1.83 - print positions == p, positions, p
1.84 -r.close()
1.85 -
1.86 -print "- Test position index files."
1.87 -
1.88 -indexed_positions = [
1.89 - [
1.90 - (1234, 0, 100),
1.91 - (2345, 700, 100),
1.92 - (3456, 1900, 50)
1.93 - ],
1.94 - [
1.95 - (4567, 2800, 20)
1.96 - ]
1.97 - ]
1.98 -
1.99 -offsets = []
1.100 -f = open("testPI", "wb")
1.101 -w = PositionIndexWriter(f)
1.102 -w.begin(0)
1.103 -for term_positions in indexed_positions:
1.104 - offset = None
1.105 - doc_frequency = 0
1.106 - w.reset()
1.107 - for docnum, pos_offset, count in term_positions:
1.108 - if offset is None:
1.109 - offset = w.tell()
1.110 - w.write_positions(docnum, pos_offset, count)
1.111 - doc_frequency += count
1.112 - offsets.append((offset, doc_frequency))
1.113 -w.close()
1.114 -
1.115 -r = PositionIndexIterator(PositionIndexReader(open("testPI", "rb")))
1.116 -offsets.reverse()
1.117 -indexed_positions.reverse()
1.118 -for (offset, doc_frequency), term_positions in zip(offsets, indexed_positions):
1.119 - r.seek(offset, doc_frequency)
1.120 - for (docnum, pos_offset, count), (dn, po, c) in zip(term_positions, r):
1.121 - print docnum == dn, docnum, dn
1.122 - print pos_offset == po, pos_offset, po
1.123 - print count == c, count, c
1.124 -r.reader.close()
1.125 -
1.126 -print "- Test position dictionaries."
1.127 -
1.128 -f = open("testP", "wb")
1.129 -w = PositionWriter(f)
1.130 -f2 = open("testPI", "wb")
1.131 -w2 = PositionIndexWriter(f2)
1.132 -wd = PositionDictionaryWriter(w, w2, 2)
1.133 -offsets = []
1.134 -for doc_positions in all_doc_positions:
1.135 - offset, frequency, doc_frequency = wd.write_term_positions(doc_positions)
1.136 - offsets.append((offset, doc_frequency))
1.137 -wd.close()
1.138 -
1.139 -r = PositionReader(open("testP", "rb"))
1.140 -r2 = PositionIndexReader(open("testPI", "rb"))
1.141 -rd = PositionDictionaryReader(r, r2)
1.142 -offsets.reverse()
1.143 -all_doc_positions.reverse()
1.144 -for (offset, doc_frequency), doc_positions in zip(offsets, all_doc_positions):
1.145 - it = rd.read_term_positions(offset, doc_frequency)
1.146 - dp = list(it)
1.147 - print doc_positions == dp, doc_positions, dp
1.148 -rd.close()
1.149 -
1.150 -print "- Test fields."
1.151 -
1.152 -doc_fields = [
1.153 - (123, ["testing", "fields", "stored", "compressed"]),
1.154 - (456, ["fields", "for a second", "document"]),
1.155 - (789, ["field value"]),
1.156 - (1234, []),
1.157 - (2345, ["abc", "def"]),
1.158 - (3456, ["apple", "banana", "cherry"]),
1.159 - (4567, ["drue", "eple"])
1.160 - ]
1.161 -
1.162 -f = open("testF", "wb")
1.163 -w = FieldWriter(f)
1.164 -w.begin(0)
1.165 -w.reset()
1.166 -for docnum, fields in doc_fields:
1.167 - w.write_fields(docnum, list(enumerate(fields)))
1.168 -w.close()
1.169 -
1.170 -f = open("testF", "rb")
1.171 -r = FieldReader(f)
1.172 -r.reset()
1.173 -for docnum, fields in doc_fields:
1.174 - dn, df = r.read_fields()
1.175 - print docnum == dn, docnum, dn
1.176 - print list(enumerate(fields)) == df, list(enumerate(fields)), df
1.177 -r.close()
1.178 -
1.179 -print "- Test field index files."
1.180 -
1.181 -indexed_docs = [
1.182 - (123, 100000987),
1.183 - (456, 100004321),
1.184 - (789, 100008765)
1.185 - ]
1.186 -
1.187 -f = open("testFI", "wb")
1.188 -w = FieldIndexWriter(f)
1.189 -w.begin(0)
1.190 -w.reset()
1.191 -for docnum, offset in indexed_docs:
1.192 - w.write_document(docnum, offset)
1.193 -w.close()
1.194 -
1.195 -f = open("testFI", "rb")
1.196 -r = FieldIndexReader(f)
1.197 -r.reset()
1.198 -for docnum, offset in indexed_docs:
1.199 - dn, o = r.read_document()
1.200 - print docnum == dn, docnum, dn
1.201 - print offset == o, offset, o
1.202 + r.begin_record()
1.203 + dp = r.read_positions()
1.204 + print doc_positions == dp, doc_positions
1.205 + print " ", dp
1.206 r.close()
1.207
1.208 -print "- Test field dictionaries."
1.209 -
1.210 -f = open("testF", "wb")
1.211 -w = FieldWriter(f)
1.212 -f2 = open("testFI", "wb")
1.213 -w2 = FieldIndexWriter(f2)
1.214 -wd = FieldDictionaryWriter(w, w2, 3)
1.215 -for docnum, fields in doc_fields:
1.216 - wd.write_fields(docnum, list(enumerate(fields)))
1.217 -wd.close()
1.218 -
1.219 -f = open("testF", "rb")
1.220 -r = FieldReader(f)
1.221 -f2 = open("testFI", "rb")
1.222 -r2 = FieldIndexReader(f2)
1.223 -rd = FieldDictionaryReader(r, r2)
1.224 -doc_fields_reversed = doc_fields[:]
1.225 -doc_fields_reversed.reverse()
1.226 -for docnum, fields in doc_fields_reversed:
1.227 - df = dict(rd.get_fields(docnum))
1.228 - print dict(enumerate(fields)) == df, dict(enumerate(fields)), df
1.229 -for docnum in (13579, 246810):
1.230 - df = rd.get_fields(docnum)
1.231 - print df is None, df
1.232 -
1.233 -print "- (Test sequential access.)"
1.234 -
1.235 -rd.rewind()
1.236 -for docnum, fields in doc_fields:
1.237 - dn, df = rd.read_fields()
1.238 - print docnum == dn, docnum, dn
1.239 - print list(enumerate(fields)) == df, list(enumerate(fields)), df
1.240 -rd.close()
1.241 -
1.242 -print "- Test terms."
1.243 -
1.244 -terms = [
1.245 - # term offset frequency doc_frequency
1.246 - ("aardvark", 100000123, 1, 1),
1.247 - ("anteater", 100000456, 2, 1),
1.248 - ("badger", 100000789, 13, 7),
1.249 - ("bull", 1000001234, 59, 17),
1.250 - ("bulldog", 1000002345, 99, 80),
1.251 - ("cat", 1000003456, 89, 28)
1.252 - ]
1.253 -
1.254 -f = open("test", "wb")
1.255 -w = TermWriter(f)
1.256 -w.reset()
1.257 -for term, offset, frequency, doc_frequency in terms:
1.258 - w.write_term(term, offset, frequency, doc_frequency)
1.259 -w.close()
1.260 -
1.261 -f = open("test", "rb")
1.262 -r = TermReader(f)
1.263 -r.reset()
1.264 -for term, offset, frequency, doc_frequency in terms:
1.265 - t, o, fr, df = r.read_term()
1.266 - print term == t, term, t
1.267 - print offset == o, offset, o
1.268 - print frequency == fr, frequency, fr
1.269 - print doc_frequency == df, doc_frequency, df
1.270 -r.close()
1.271 -
1.272 -print "- Test terms in index files."
1.273 -
1.274 -indexed_terms = [
1.275 - # term offset frequency doc_frequency info_offset
1.276 - ("aardvark", 100000123, 1, 1, 200000321),
1.277 - ("anteater", 100000456, 2, 1, 200000654),
1.278 - ("badger", 100000789, 13, 7, 200000987),
1.279 - ("bull", 1000001234, 59, 17, 200004321),
1.280 - ("bulldog", 1000002345, 99, 80, 200005432),
1.281 - ("cat", 1000003456, 89, 28, 200006543)
1.282 - ]
1.283 -
1.284 -f = open("test", "wb")
1.285 -w = TermIndexWriter(f)
1.286 -w.reset()
1.287 -for term, offset, frequency, doc_frequency, info_offset in indexed_terms:
1.288 - w.write_term(term, offset, frequency, doc_frequency, info_offset)
1.289 -w.close()
1.290 -
1.291 -f = open("test", "rb")
1.292 -r = TermIndexReader(f)
1.293 -r.reset()
1.294 -for term, offset, frequency, doc_frequency, info_offset in indexed_terms:
1.295 - t, o, fr, df, i = r.read_term()
1.296 - print term == t, term, t
1.297 - print offset == o, offset, o
1.298 - print frequency == fr, frequency, fr
1.299 - print doc_frequency == df, doc_frequency, df
1.300 - print info_offset == i, info_offset, i
1.301 -r.close()
1.302 -
1.303 -print "- Test dictionaries with only term data."
1.304 -
1.305 -f = open("test", "wb")
1.306 -w = TermWriter(f)
1.307 -f2 = open("testI", "wb")
1.308 -w2 = TermIndexWriter(f2)
1.309 -f3 = open("testP", "wb")
1.310 -w3 = PositionWriter(f3)
1.311 -f4 = open("testPI", "wb")
1.312 -w4 = PositionIndexWriter(f4)
1.313 -wp = PositionDictionaryWriter(w3, w4, 2)
1.314 -wd = TermDictionaryWriter(w, w2, wp, 3)
1.315 -for term, offset, frequency, doc_frequency in terms:
1.316 - wd._write_term(term, offset, frequency, doc_frequency)
1.317 -wd.close()
1.318 -
1.319 -f = open("test", "rb")
1.320 -r = TermReader(f)
1.321 -f2 = open("testI", "rb")
1.322 -r2 = TermIndexReader(f2)
1.323 -r3 = PositionReader(open("testP", "rb"))
1.324 -r4 = PositionIndexReader(open("testPI", "rb"))
1.325 -rp = PositionDictionaryReader(r3, r4)
1.326 -rd = TermDictionaryReader(r, r2, rp)
1.327 -terms_reversed = terms[:]
1.328 -terms_reversed.reverse()
1.329 -for term, offset, frequency, doc_frequency in terms_reversed:
1.330 - o, fr, df = rd._find_term(term)
1.331 - print offset == o, offset, o
1.332 - print frequency == fr, frequency, fr
1.333 - print doc_frequency == df, doc_frequency, df
1.334 -for term in ("dog", "dingo"):
1.335 - t = rd._find_term(term)
1.336 - print t is None, t
1.337 -
1.338 -print "- (Test term prefix searching.)"
1.339 -
1.340 -print rd.find_terms("a") == ["aardvark", "anteater"], rd.find_terms("a"), ["aardvark", "anteater"]
1.341 -print rd.find_terms("bu") == ["bull", "bulldog"], rd.find_terms("bu"), ["bull", "bulldog"]
1.342 -print rd.find_terms("c") == ["cat"], rd.find_terms("c"), ["cat"]
1.343 -print rd.find_terms("d") == [], rd.find_terms("d"), []
1.344 -rd.close()
1.345 -
1.346 print "- Test dictionaries with term and position data."
1.347
1.348 terms_with_positions = [
1.349 ("aardvark", [(1, [2, 45, 96]), (20, [13])]),
1.350 ("anteater", [(1, [43, 44])]),
1.351 ("badger", [(7, [2, 22, 196]), (19, [55, 1333]), (21, [0])]),
1.352 + (u"bjørn", [(11, [19, 54])]),
1.353 ("bull", [(6, [128]), (16, [12]), (26, [1, 3, 5, 7, 9]), (36, [2, 4, 6, 8, 10])]),
1.354 ("bulldog", [(43, [17, 19, 256, 512])]),
1.355 - ("cat", [(123, [12, 145, 196]), (1200, [113])])
1.356 - ]
1.357 -
1.358 -position_dict_tests = [
1.359 - ("badger", 19, [55, 1333]),
1.360 - ("badger", 20, None),
1.361 - ("bull", 6, [128]),
1.362 - ("bull", 26, [1, 3, 5, 7, 9]),
1.363 - ("cat", 111, None),
1.364 - ("cat", 123, [12, 145, 196]),
1.365 - ("cat", 1234, None)
1.366 + ("cat", [(123, [12, 145, 196]), (1200, [113])]),
1.367 + (u"å", [(15, [384])]),
1.368 ]
1.369
1.370 f = open("test", "wb")
1.371 w = TermWriter(f)
1.372 -f2 = open("testI", "wb")
1.373 -w2 = TermIndexWriter(f2)
1.374 -f3 = open("testP", "wb")
1.375 -w3 = PositionWriter(f3)
1.376 -f4 = open("testPI", "wb")
1.377 -w4 = PositionIndexWriter(f4)
1.378 -wp = PositionDictionaryWriter(w3, w4, 2)
1.379 -wd = TermDictionaryWriter(w, w2, wp, 3)
1.380 -for term, doc_positions in terms_with_positions:
1.381 - wd.write_term_positions(term, doc_positions)
1.382 -wd.close()
1.383 +w.begin(0, 0)
1.384 +w.write_terms(terms_with_positions)
1.385 +w.close()
1.386
1.387 f = open("test", "rb")
1.388 -r = TermReader(f)
1.389 -f2 = open("testI", "rb")
1.390 -r2 = TermIndexReader(f2)
1.391 -r3 = PositionReader(open("testP", "rb"))
1.392 -r4 = PositionIndexReader(open("testPI", "rb"))
1.393 -rp = PositionDictionaryReader(r3, r4)
1.394 -rd = TermDictionaryReader(r, r2, rp)
1.395 -terms_reversed = terms_with_positions[:]
1.396 -terms_reversed.reverse()
1.397 -for term, doc_positions in terms_reversed:
1.398 - dp = list(rd.find_positions(term))
1.399 - print doc_positions == dp, doc_positions, dp
1.400 -for term in ("aaa", "dog", "dingo"):
1.401 - dp = rd.find_positions(term)
1.402 - print dp == [], dp
1.403 +r = TermIterator(f)
1.404 +for (term, doc_positions), (t, dp) in zip(terms_with_positions, r):
1.405 + print term == t, term, t
1.406 + print doc_positions == dp, doc_positions
1.407 + print " ", dp
1.408 +r.close()
1.409
1.410 -print "- (Test iterators.)"
1.411 -
1.412 -for term, docnum, positions in position_dict_tests:
1.413 - dp = rd.find_positions(term)
1.414 - pos = dp.from_document(docnum)
1.415 - print positions is None and pos is None or pos is not None and positions == list(pos), positions, pos
1.416 -
1.417 -print "- (Test sequential access.)"
1.418 +f = open("test", "rb")
1.419 +r = TermDataIterator(f)
1.420 +for (term, doc_positions), (t, data) in zip(terms_with_positions, r):
1.421 + print term == t, term, t, data
1.422 +r.close()
1.423
1.424 -rd.rewind()
1.425 -for term, doc_positions in terms_with_positions:
1.426 - t, fr, df, dp = rd.read_term()
1.427 - dp = list(dp)
1.428 - print term == t, term, t
1.429 - print doc_positions == dp, doc_positions, dp
1.430 -rd.close()
1.431 -
1.432 -print "- Test high-level index operations (including merging)."
1.433 +print "- Test high-level index operations."
1.434
1.435 docs = [
1.436 (1, "The cat sat on the mat"),
1.437 @@ -485,189 +188,26 @@
1.438 (36, "She sells sea shells on the sea shore")
1.439 ]
1.440
1.441 -doc_tests = [
1.442 - ("Every", 2, [(2, [0]), (14, [0])]),
1.443 - ("good", 2, [(2, [1]), (13, [1])]),
1.444 - ("deserves", 2, [(2, [3]), (13, [3])]),
1.445 - ("sea", 2, [(36, [2, 6])])
1.446 - ]
1.447 -
1.448 -position_tests = [
1.449 - ("Every", 14, [0]),
1.450 - ("sea", 36, [2, 6]),
1.451 - ("shells", 1, None),
1.452 - ("shells", 37, None)
1.453 - ]
1.454 -
1.455 -phrase_tests = [
1.456 - (["good", "boy"], [(2, [1, 2])]),
1.457 - (["on", "the"], [(1, [3, 4]), (36, [4, 5])]),
1.458 - (["sea", "shore"], [(36, [6, 7])])
1.459 - ]
1.460 -
1.461 -index = Index("test_index", 3, 2, 3, 6)
1.462 +index = Index("test_index", 3)
1.463 wi = index.get_writer()
1.464 for docnum, text in docs:
1.465 doc = Document(docnum)
1.466 for position, term in enumerate(text.split()):
1.467 doc.add_position(term, position)
1.468 - doc.add_field(123, text)
1.469 - wi.add_document(doc)
1.470 -wi.close()
1.471 -
1.472 -rd = index.get_reader()
1.473 -
1.474 -print "- (Test searching.)"
1.475 -
1.476 -for term, frequency, doc_positions in doc_tests:
1.477 - dp = list(rd.find_positions(term))
1.478 - print doc_positions == dp, doc_positions, dp
1.479 - fr = rd.get_frequency(term)
1.480 - print frequency == fr, frequency, fr
1.481 -
1.482 -print "- (Test fields.)"
1.483 -
1.484 -for docnum, text in docs:
1.485 - df = dict(rd.get_fields(docnum))
1.486 - print df[123] == text, text, df[123]
1.487 -
1.488 -print "- (Test navigation.)"
1.489 -
1.490 -for term, docnum, positions in position_tests:
1.491 - dp = rd.find_positions(term)
1.492 - pos = dp.from_document(docnum)
1.493 - print positions is None and pos is None or pos is not None and positions == list(pos), positions, pos
1.494 -
1.495 -print "- (Test phrases.)"
1.496 -
1.497 -for terms, results in phrase_tests:
1.498 - res = list(rd.find_common_positions(terms))
1.499 - print results == res, results, res
1.500 -
1.501 -index.close()
1.502 -
1.503 -docs2 = [
1.504 - ((1, 0), "The cat sat on the mat"),
1.505 - ((1, 2), "Every good boy deserves football"),
1.506 - ((13, 1), "One good turn deserves another"),
1.507 - ((14, 0), "Every man for himself"),
1.508 - ((14, 25), "Red sky at night shepherd's delight"),
1.509 - ((36, 12), "She sells sea shells on the sea shore")
1.510 - ]
1.511 -
1.512 -doc_tests2 = [
1.513 - ("Every", 2, [((1, 2), [(0, 0)]), ((14, 0), [(0, 0)])]),
1.514 - ("good", 2, [((1, 2), [(1, 6)]), ((13, 1), [(1, 4)])]),
1.515 - ("deserves", 2, [((1, 2), [(3, 15)]), ((13, 1), [(3, 14)])]),
1.516 - ("sea", 2, [((36, 12), [(2, 10), (6, 28)])])
1.517 - ]
1.518 -
1.519 -position_tests2 = [
1.520 - ("Every", (14, 0), [(0, 0)]),
1.521 - ("sea", (36, 12), [(2, 10), (6, 28)]),
1.522 - ("shells", (1, 0), None),
1.523 - ("shells", (37, 0), None)
1.524 - ]
1.525 -
1.526 -phrase_tests2 = [
1.527 - (["good", "boy"], [((1, 2), [(1, 6), (2, 11)])]),
1.528 - (["on", "the"], [((1, 0), [(3, 12), (4, 15)]), ((36, 12), [(4, 21), (5, 24)])]),
1.529 - (["sea", "shore"], [((36, 12), [(6, 28), (7, 32)])])
1.530 - ]
1.531 -
1.532 -index = Index("test_indexT", 3, 2, 3, 6)
1.533 -wi = index.get_writer()
1.534 -for docnum, text in docs2:
1.535 - doc = Document(docnum)
1.536 - offset = 0
1.537 - for position, term in enumerate(text.split()):
1.538 - doc.add_position(term, (position, offset))
1.539 - offset += len(term) + 1 # assume one space after the term
1.540 - doc.add_field(123, text)
1.541 wi.add_document(doc)
1.542 wi.close()
1.543
1.544 -rd = index.get_reader()
1.545 -
1.546 -print "- (Test searching.)"
1.547 -
1.548 -for term, frequency, doc_positions in doc_tests2:
1.549 - dp = list(rd.find_positions(term))
1.550 - print doc_positions == dp, doc_positions, dp
1.551 - fr = rd.get_frequency(term)
1.552 - print frequency == fr, frequency, fr
1.553 -
1.554 -print "- (Test fields.)"
1.555 +print "- Test merge."
1.556
1.557 -for docnum, text in docs2:
1.558 - df = dict(rd.get_fields(docnum))
1.559 - print df[123] == text, text, df[123]
1.560 -
1.561 -print "- (Test navigation.)"
1.562 +l1 = list(index.get_reader())
1.563 +index.merge()
1.564 +l2 = list(index.get_reader(1))
1.565
1.566 -for term, docnum, positions in position_tests2:
1.567 - dp = rd.find_positions(term)
1.568 - pos = dp.from_document(docnum)
1.569 - print positions is None and pos is None or pos is not None and positions == list(pos), positions, pos
1.570 -
1.571 -print "- (Test phrases.)"
1.572 -
1.573 -for terms, results in phrase_tests2:
1.574 - res = list(rd.find_common_positions(terms))
1.575 - print results == res, results, res
1.576 +for (t1, dp1), (t2, dp2) in zip(l1, l2):
1.577 + print t1 == t2, t1, t2
1.578 + print dp1 == dp1, dp1
1.579 + print " ", dp2
1.580
1.581 index.close()
1.582
1.583 -print "- Test index updates."
1.584 -
1.585 -index = Index("test_index")
1.586 -index2 = Index("test_index2", 3, 2, 3, 6)
1.587 -wi = index2.get_writer()
1.588 -for docnum, text in docs:
1.589 -
1.590 - # Add the same documents but with different numbers.
1.591 -
1.592 - doc = Document(docnum + 100)
1.593 - for position, term in enumerate(text.split()):
1.594 - doc.add_position(term, position)
1.595 - doc.add_field(123, text)
1.596 - wi.add_document(doc)
1.597 -wi.close()
1.598 -
1.599 -index2.update([index])
1.600 -index.close()
1.601 -
1.602 -rd = index2.get_reader()
1.603 -for term, frequency, doc_positions in doc_tests:
1.604 -
1.605 - # Add the extra documents to the expected result.
1.606 -
1.607 - orig_doc_positions = doc_positions
1.608 - doc_positions = doc_positions[:]
1.609 -
1.610 - for docnum, positions in orig_doc_positions:
1.611 - doc_positions.append((docnum + 100, positions))
1.612 - frequency *= 2
1.613 -
1.614 - dp = list(rd.find_positions(term))
1.615 - print doc_positions == dp, doc_positions, dp
1.616 - fr = rd.get_frequency(term)
1.617 - print frequency == fr, frequency, fr
1.618 -index2.close()
1.619 -
1.620 -print "- (Test update of an empty index.)"
1.621 -
1.622 -index = Index("test_index")
1.623 -index3 = Index("test_index3")
1.624 -index3.update([index])
1.625 -index.close()
1.626 -
1.627 -rd = index3.get_reader()
1.628 -for term, frequency, doc_positions in doc_tests:
1.629 - dp = list(rd.find_positions(term))
1.630 - print doc_positions == dp, doc_positions, dp
1.631 - fr = rd.get_frequency(term)
1.632 - print frequency == fr, frequency, fr
1.633 -index3.close()
1.634 -
1.635 # vim: tabstop=4 expandtab shiftwidth=4