1.1 --- a/test.py	Sat Feb 12 01:23:58 2011 +0100
     1.2 +++ b/test.py	Sun Feb 13 02:49:55 2011 +0100
     1.3 @@ -1,22 +1,21 @@
     1.4  #!/usr/bin/env python
     1.5 +# encoding: iso-8859-1
     1.6  
     1.7  from iixr.files import *
     1.8 -from iixr.fields import *
     1.9  from iixr.terms import *
    1.10 -from iixr.positions import *
    1.11  from iixr.index import *
    1.12  import os, sys
    1.13  
    1.14  # Remove old test files.
    1.15  
    1.16 -for filename in ("test", "testMS", "testNMS", "testF", "testFI", "testI", "testP", "testP2", "testPI"):
    1.17 +for filename in ("test", "testMS", "testNMS", "testP", "testP2"):
    1.18      try:
    1.19          os.remove(filename)
    1.20      except OSError:
    1.21          pass
    1.22  
    1.23  try:
    1.24 -    for dirname in ("test_index", "test_index2", "test_index3", "test_indexT"):
    1.25 +    for dirname in ("test_index",):
    1.26          for filename in os.listdir(dirname):
    1.27              os.remove(os.path.join(dirname, filename))
    1.28          os.rmdir(dirname)
    1.29 @@ -98,22 +97,20 @@
    1.30      ]
    1.31  
    1.32  f = open("testP", "wb")
    1.33 -w = PositionWriter(f)
    1.34 +w = TermWriter(f)
    1.35  w.begin(0, 0)
    1.36  for doc_positions in all_doc_positions:
    1.37 -    w.reset()
    1.38 -    for docnum, positions in doc_positions:
    1.39 -        w.write_positions(docnum, positions)
    1.40 +    w.write_positions(doc_positions)
    1.41 +    w.end_record()
    1.42  w.close()
    1.43  
    1.44  f = open("testP", "rb")
    1.45 -r = PositionReader(f)
    1.46 +r = TermReader(f)
    1.47  for doc_positions in all_doc_positions:
    1.48 -    r.reset()
    1.49 -    for docnum, positions in doc_positions:
    1.50 -        d, p = r.read_positions()
    1.51 -        print docnum == d, docnum, d
    1.52 -        print positions == p, positions, p
    1.53 +    r.begin_record()
    1.54 +    dp = r.read_positions()
    1.55 +    print doc_positions == dp, doc_positions
    1.56 +    print "     ", dp
    1.57  r.close()
    1.58  
    1.59  all_doc_positions_seq = [
    1.60 @@ -131,350 +128,56 @@
    1.61      ]
    1.62  
    1.63  f = open("testP2", "wb")
    1.64 -w = PositionWriter(f)
    1.65 +w = TermWriter(f)
    1.66  w.begin(2, 2)
    1.67  for doc_positions in all_doc_positions_seq:
    1.68 -    w.reset()
    1.69 -    for docnum, positions in doc_positions:
    1.70 -        w.write_positions(docnum, positions)
    1.71 +    w.write_positions(doc_positions)
    1.72 +    w.end_record()
    1.73  w.close()
    1.74  
    1.75  f = open("testP2", "rb")
    1.76 -r = PositionReader(f)
    1.77 +r = TermReader(f)
    1.78  for doc_positions in all_doc_positions_seq:
    1.79 -    r.reset()
    1.80 -    for docnum, positions in doc_positions:
    1.81 -        d, p = r.read_positions()
    1.82 -        print docnum == d, docnum, d
    1.83 -        print positions == p, positions, p
    1.84 -r.close()
    1.85 -
    1.86 -print "- Test position index files."
    1.87 -
    1.88 -indexed_positions = [
    1.89 -    [
    1.90 -        (1234, 0, 100),
    1.91 -        (2345, 700, 100),
    1.92 -        (3456, 1900, 50)
    1.93 -    ],
    1.94 -    [
    1.95 -        (4567, 2800, 20)
    1.96 -    ]
    1.97 -    ]
    1.98 -
    1.99 -offsets = []
   1.100 -f = open("testPI", "wb")
   1.101 -w = PositionIndexWriter(f)
   1.102 -w.begin(0)
   1.103 -for term_positions in indexed_positions:
   1.104 -    offset = None
   1.105 -    doc_frequency = 0
   1.106 -    w.reset()
   1.107 -    for docnum, pos_offset, count in term_positions:
   1.108 -        if offset is None:
   1.109 -            offset = w.tell()
   1.110 -        w.write_positions(docnum, pos_offset, count)
   1.111 -        doc_frequency += count
   1.112 -    offsets.append((offset, doc_frequency))
   1.113 -w.close()
   1.114 -
   1.115 -r = PositionIndexIterator(PositionIndexReader(open("testPI", "rb")))
   1.116 -offsets.reverse()
   1.117 -indexed_positions.reverse()
   1.118 -for (offset, doc_frequency), term_positions in zip(offsets, indexed_positions):
   1.119 -    r.seek(offset, doc_frequency)
   1.120 -    for (docnum, pos_offset, count), (dn, po, c) in zip(term_positions, r):
   1.121 -        print docnum == dn, docnum, dn
   1.122 -        print pos_offset == po, pos_offset, po
   1.123 -        print count == c, count, c
   1.124 -r.reader.close()
   1.125 -
   1.126 -print "- Test position dictionaries."
   1.127 -
   1.128 -f = open("testP", "wb")
   1.129 -w = PositionWriter(f)
   1.130 -f2 = open("testPI", "wb")
   1.131 -w2 = PositionIndexWriter(f2)
   1.132 -wd = PositionDictionaryWriter(w, w2, 2)
   1.133 -offsets = []
   1.134 -for doc_positions in all_doc_positions:
   1.135 -    offset, frequency, doc_frequency = wd.write_term_positions(doc_positions)
   1.136 -    offsets.append((offset, doc_frequency))
   1.137 -wd.close()
   1.138 -
   1.139 -r = PositionReader(open("testP", "rb"))
   1.140 -r2 = PositionIndexReader(open("testPI", "rb"))
   1.141 -rd = PositionDictionaryReader(r, r2)
   1.142 -offsets.reverse()
   1.143 -all_doc_positions.reverse()
   1.144 -for (offset, doc_frequency), doc_positions in zip(offsets, all_doc_positions):
   1.145 -    it = rd.read_term_positions(offset, doc_frequency)
   1.146 -    dp = list(it)
   1.147 -    print doc_positions == dp, doc_positions, dp
   1.148 -rd.close()
   1.149 -
   1.150 -print "- Test fields."
   1.151 -
   1.152 -doc_fields = [
   1.153 -    (123, ["testing", "fields", "stored", "compressed"]),
   1.154 -    (456, ["fields", "for a second", "document"]),
   1.155 -    (789, ["field value"]),
   1.156 -    (1234, []),
   1.157 -    (2345, ["abc", "def"]),
   1.158 -    (3456, ["apple", "banana", "cherry"]),
   1.159 -    (4567, ["drue", "eple"])
   1.160 -    ]
   1.161 -
   1.162 -f = open("testF", "wb")
   1.163 -w = FieldWriter(f)
   1.164 -w.begin(0)
   1.165 -w.reset()
   1.166 -for docnum, fields in doc_fields:
   1.167 -    w.write_fields(docnum, list(enumerate(fields)))
   1.168 -w.close()
   1.169 -
   1.170 -f = open("testF", "rb")
   1.171 -r = FieldReader(f)
   1.172 -r.reset()
   1.173 -for docnum, fields in doc_fields:
   1.174 -    dn, df = r.read_fields()
   1.175 -    print docnum == dn, docnum, dn
   1.176 -    print list(enumerate(fields)) == df, list(enumerate(fields)), df
   1.177 -r.close()
   1.178 -
   1.179 -print "- Test field index files."
   1.180 -
   1.181 -indexed_docs = [
   1.182 -    (123, 100000987),
   1.183 -    (456, 100004321),
   1.184 -    (789, 100008765)
   1.185 -    ]
   1.186 -
   1.187 -f = open("testFI", "wb")
   1.188 -w = FieldIndexWriter(f)
   1.189 -w.begin(0)
   1.190 -w.reset()
   1.191 -for docnum, offset in indexed_docs:
   1.192 -    w.write_document(docnum, offset)
   1.193 -w.close()
   1.194 -
   1.195 -f = open("testFI", "rb")
   1.196 -r = FieldIndexReader(f)
   1.197 -r.reset()
   1.198 -for docnum, offset in indexed_docs:
   1.199 -    dn, o = r.read_document()
   1.200 -    print docnum == dn, docnum, dn
   1.201 -    print offset == o, offset, o
   1.202 +    r.begin_record()
   1.203 +    dp = r.read_positions()
   1.204 +    print doc_positions == dp, doc_positions
   1.205 +    print "     ", dp
   1.206  r.close()
   1.207  
   1.208 -print "- Test field dictionaries."
   1.209 -
   1.210 -f = open("testF", "wb")
   1.211 -w = FieldWriter(f)
   1.212 -f2 = open("testFI", "wb")
   1.213 -w2 = FieldIndexWriter(f2)
   1.214 -wd = FieldDictionaryWriter(w, w2, 3)
   1.215 -for docnum, fields in doc_fields:
   1.216 -    wd.write_fields(docnum, list(enumerate(fields)))
   1.217 -wd.close()
   1.218 -
   1.219 -f = open("testF", "rb")
   1.220 -r = FieldReader(f)
   1.221 -f2 = open("testFI", "rb")
   1.222 -r2 = FieldIndexReader(f2)
   1.223 -rd = FieldDictionaryReader(r, r2)
   1.224 -doc_fields_reversed = doc_fields[:]
   1.225 -doc_fields_reversed.reverse()
   1.226 -for docnum, fields in doc_fields_reversed:
   1.227 -    df = dict(rd.get_fields(docnum))
   1.228 -    print dict(enumerate(fields)) == df, dict(enumerate(fields)), df
   1.229 -for docnum in (13579, 246810):
   1.230 -    df = rd.get_fields(docnum)
   1.231 -    print df is None, df
   1.232 -
   1.233 -print "- (Test sequential access.)"
   1.234 -
   1.235 -rd.rewind()
   1.236 -for docnum, fields in doc_fields:
   1.237 -    dn, df = rd.read_fields()
   1.238 -    print docnum == dn, docnum, dn
   1.239 -    print list(enumerate(fields)) == df, list(enumerate(fields)), df
   1.240 -rd.close()
   1.241 -
   1.242 -print "- Test terms."
   1.243 -
   1.244 -terms = [
   1.245 -    # term       offset      frequency  doc_frequency
   1.246 -    ("aardvark",  100000123,  1,         1),
   1.247 -    ("anteater",  100000456,  2,         1),
   1.248 -    ("badger",    100000789, 13,         7),
   1.249 -    ("bull",     1000001234, 59,        17),
   1.250 -    ("bulldog",  1000002345, 99,        80),
   1.251 -    ("cat",      1000003456, 89,        28)
   1.252 -    ]
   1.253 -
   1.254 -f = open("test", "wb")
   1.255 -w = TermWriter(f)
   1.256 -w.reset()
   1.257 -for term, offset, frequency, doc_frequency in terms:
   1.258 -    w.write_term(term, offset, frequency, doc_frequency)
   1.259 -w.close()
   1.260 -
   1.261 -f = open("test", "rb")
   1.262 -r = TermReader(f)
   1.263 -r.reset()
   1.264 -for term, offset, frequency, doc_frequency in terms:
   1.265 -    t, o, fr, df = r.read_term()
   1.266 -    print term == t, term, t
   1.267 -    print offset == o, offset, o
   1.268 -    print frequency == fr, frequency, fr
   1.269 -    print doc_frequency == df, doc_frequency, df
   1.270 -r.close()
   1.271 -
   1.272 -print "- Test terms in index files."
   1.273 -
   1.274 -indexed_terms = [
   1.275 -    # term       offset      frequency  doc_frequency   info_offset
   1.276 -    ("aardvark",  100000123,  1,         1,             200000321),
   1.277 -    ("anteater",  100000456,  2,         1,             200000654),
   1.278 -    ("badger",    100000789, 13,         7,             200000987),
   1.279 -    ("bull",     1000001234, 59,        17,             200004321),
   1.280 -    ("bulldog",  1000002345, 99,        80,             200005432),
   1.281 -    ("cat",      1000003456, 89,        28,             200006543)
   1.282 -    ]
   1.283 -
   1.284 -f = open("test", "wb")
   1.285 -w = TermIndexWriter(f)
   1.286 -w.reset()
   1.287 -for term, offset, frequency, doc_frequency, info_offset in indexed_terms:
   1.288 -    w.write_term(term, offset, frequency, doc_frequency, info_offset)
   1.289 -w.close()
   1.290 -
   1.291 -f = open("test", "rb")
   1.292 -r = TermIndexReader(f)
   1.293 -r.reset()
   1.294 -for term, offset, frequency, doc_frequency, info_offset in indexed_terms:
   1.295 -    t, o, fr, df, i = r.read_term()
   1.296 -    print term == t, term, t
   1.297 -    print offset == o, offset, o
   1.298 -    print frequency == fr, frequency, fr
   1.299 -    print doc_frequency == df, doc_frequency, df
   1.300 -    print info_offset == i, info_offset, i
   1.301 -r.close()
   1.302 -
   1.303 -print "- Test dictionaries with only term data."
   1.304 -
   1.305 -f = open("test", "wb")
   1.306 -w = TermWriter(f)
   1.307 -f2 = open("testI", "wb")
   1.308 -w2 = TermIndexWriter(f2)
   1.309 -f3 = open("testP", "wb")
   1.310 -w3 = PositionWriter(f3)
   1.311 -f4 = open("testPI", "wb")
   1.312 -w4 = PositionIndexWriter(f4)
   1.313 -wp = PositionDictionaryWriter(w3, w4, 2)
   1.314 -wd = TermDictionaryWriter(w, w2, wp, 3)
   1.315 -for term, offset, frequency, doc_frequency in terms:
   1.316 -    wd._write_term(term, offset, frequency, doc_frequency)
   1.317 -wd.close()
   1.318 -
   1.319 -f = open("test", "rb")
   1.320 -r = TermReader(f)
   1.321 -f2 = open("testI", "rb")
   1.322 -r2 = TermIndexReader(f2)
   1.323 -r3 = PositionReader(open("testP", "rb"))
   1.324 -r4 = PositionIndexReader(open("testPI", "rb"))
   1.325 -rp = PositionDictionaryReader(r3, r4)
   1.326 -rd = TermDictionaryReader(r, r2, rp)
   1.327 -terms_reversed = terms[:]
   1.328 -terms_reversed.reverse()
   1.329 -for term, offset, frequency, doc_frequency in terms_reversed:
   1.330 -    o, fr, df = rd._find_term(term)
   1.331 -    print offset == o, offset, o
   1.332 -    print frequency == fr, frequency, fr
   1.333 -    print doc_frequency == df, doc_frequency, df
   1.334 -for term in ("dog", "dingo"):
   1.335 -    t = rd._find_term(term)
   1.336 -    print t is None, t
   1.337 -
   1.338 -print "- (Test term prefix searching.)"
   1.339 -
   1.340 -print rd.find_terms("a") == ["aardvark", "anteater"], rd.find_terms("a"), ["aardvark", "anteater"]
   1.341 -print rd.find_terms("bu") == ["bull", "bulldog"], rd.find_terms("bu"), ["bull", "bulldog"]
   1.342 -print rd.find_terms("c") == ["cat"], rd.find_terms("c"), ["cat"]
   1.343 -print rd.find_terms("d") == [], rd.find_terms("d"), []
   1.344 -rd.close()
   1.345 -
   1.346  print "- Test dictionaries with term and position data."
   1.347  
   1.348  terms_with_positions = [
   1.349      ("aardvark",  [(1, [2, 45, 96]), (20, [13])]),
   1.350      ("anteater",  [(1, [43, 44])]),
   1.351      ("badger",    [(7, [2, 22, 196]), (19, [55, 1333]), (21, [0])]),
   1.352 +    (u"bj�rn",    [(11, [19, 54])]),
   1.353      ("bull",      [(6, [128]), (16, [12]), (26, [1, 3, 5, 7, 9]), (36, [2, 4, 6, 8, 10])]),
   1.354      ("bulldog",   [(43, [17, 19, 256, 512])]),
   1.355 -    ("cat",       [(123, [12, 145, 196]), (1200, [113])])
   1.356 -    ]
   1.357 -
   1.358 -position_dict_tests = [
   1.359 -    ("badger", 19, [55, 1333]),
   1.360 -    ("badger", 20, None),
   1.361 -    ("bull", 6, [128]),
   1.362 -    ("bull", 26, [1, 3, 5, 7, 9]),
   1.363 -    ("cat", 111, None),
   1.364 -    ("cat", 123, [12, 145, 196]),
   1.365 -    ("cat", 1234, None)
   1.366 +    ("cat",       [(123, [12, 145, 196]), (1200, [113])]),
   1.367 +    (u"�",        [(15, [384])]),
   1.368      ]
   1.369  
   1.370  f = open("test", "wb")
   1.371  w = TermWriter(f)
   1.372 -f2 = open("testI", "wb")
   1.373 -w2 = TermIndexWriter(f2)
   1.374 -f3 = open("testP", "wb")
   1.375 -w3 = PositionWriter(f3)
   1.376 -f4 = open("testPI", "wb")
   1.377 -w4 = PositionIndexWriter(f4)
   1.378 -wp = PositionDictionaryWriter(w3, w4, 2)
   1.379 -wd = TermDictionaryWriter(w, w2, wp, 3)
   1.380 -for term, doc_positions in terms_with_positions:
   1.381 -    wd.write_term_positions(term, doc_positions)
   1.382 -wd.close()
   1.383 +w.begin(0, 0)
   1.384 +w.write_terms(terms_with_positions)
   1.385 +w.close()
   1.386  
   1.387  f = open("test", "rb")
   1.388 -r = TermReader(f)
   1.389 -f2 = open("testI", "rb")
   1.390 -r2 = TermIndexReader(f2)
   1.391 -r3 = PositionReader(open("testP", "rb"))
   1.392 -r4 = PositionIndexReader(open("testPI", "rb"))
   1.393 -rp = PositionDictionaryReader(r3, r4)
   1.394 -rd = TermDictionaryReader(r, r2, rp)
   1.395 -terms_reversed = terms_with_positions[:]
   1.396 -terms_reversed.reverse()
   1.397 -for term, doc_positions in terms_reversed:
   1.398 -    dp = list(rd.find_positions(term))
   1.399 -    print doc_positions == dp, doc_positions, dp
   1.400 -for term in ("aaa", "dog", "dingo"):
   1.401 -    dp = rd.find_positions(term)
   1.402 -    print dp == [], dp
   1.403 +r = TermIterator(f)
   1.404 +for (term, doc_positions), (t, dp) in zip(terms_with_positions, r):
   1.405 +    print term == t, term, t
   1.406 +    print doc_positions == dp, doc_positions
   1.407 +    print "     ", dp
   1.408 +r.close()
   1.409  
   1.410 -print "- (Test iterators.)"
   1.411 -
   1.412 -for term, docnum, positions in position_dict_tests:
   1.413 -    dp = rd.find_positions(term)
   1.414 -    pos = dp.from_document(docnum)
   1.415 -    print positions is None and pos is None or pos is not None and positions == list(pos), positions, pos
   1.416 -
   1.417 -print "- (Test sequential access.)"
   1.418 +f = open("test", "rb")
   1.419 +r = TermDataIterator(f)
   1.420 +for (term, doc_positions), (t, data) in zip(terms_with_positions, r):
   1.421 +    print term == t, term, t, data
   1.422 +r.close()
   1.423  
   1.424 -rd.rewind()
   1.425 -for term, doc_positions in terms_with_positions:
   1.426 -    t, fr, df, dp = rd.read_term()
   1.427 -    dp = list(dp)
   1.428 -    print term == t, term, t
   1.429 -    print doc_positions == dp, doc_positions, dp
   1.430 -rd.close()
   1.431 -
   1.432 -print "- Test high-level index operations (including merging)."
   1.433 +print "- Test high-level index operations."
   1.434  
   1.435  docs = [
   1.436      (1, "The cat sat on the mat"),
   1.437 @@ -485,189 +188,26 @@
   1.438      (36, "She sells sea shells on the sea shore")
   1.439      ]
   1.440  
   1.441 -doc_tests = [
   1.442 -    ("Every", 2, [(2, [0]), (14, [0])]),
   1.443 -    ("good", 2, [(2, [1]), (13, [1])]),
   1.444 -    ("deserves", 2, [(2, [3]), (13, [3])]),
   1.445 -    ("sea", 2, [(36, [2, 6])])
   1.446 -    ]
   1.447 -
   1.448 -position_tests = [
   1.449 -    ("Every", 14, [0]),
   1.450 -    ("sea", 36, [2, 6]),
   1.451 -    ("shells", 1, None),
   1.452 -    ("shells", 37, None)
   1.453 -    ]
   1.454 -
   1.455 -phrase_tests = [
   1.456 -    (["good", "boy"], [(2, [1, 2])]),
   1.457 -    (["on", "the"], [(1, [3, 4]), (36, [4, 5])]),
   1.458 -    (["sea", "shore"], [(36, [6, 7])])
   1.459 -    ]
   1.460 -
   1.461 -index = Index("test_index", 3, 2, 3, 6)
   1.462 +index = Index("test_index", 3)
   1.463  wi = index.get_writer()
   1.464  for docnum, text in docs:
   1.465      doc = Document(docnum)
   1.466      for position, term in enumerate(text.split()):
   1.467          doc.add_position(term, position)
   1.468 -    doc.add_field(123, text)
   1.469 -    wi.add_document(doc)
   1.470 -wi.close()
   1.471 -
   1.472 -rd = index.get_reader()
   1.473 -
   1.474 -print "- (Test searching.)"
   1.475 -
   1.476 -for term, frequency, doc_positions in doc_tests:
   1.477 -    dp = list(rd.find_positions(term))
   1.478 -    print doc_positions == dp, doc_positions, dp
   1.479 -    fr = rd.get_frequency(term)
   1.480 -    print frequency == fr, frequency, fr
   1.481 -
   1.482 -print "- (Test fields.)"
   1.483 -
   1.484 -for docnum, text in docs:
   1.485 -    df = dict(rd.get_fields(docnum))
   1.486 -    print df[123] == text, text, df[123]
   1.487 -
   1.488 -print "- (Test navigation.)"
   1.489 -
   1.490 -for term, docnum, positions in position_tests:
   1.491 -    dp = rd.find_positions(term)
   1.492 -    pos = dp.from_document(docnum)
   1.493 -    print positions is None and pos is None or pos is not None and positions == list(pos), positions, pos
   1.494 -
   1.495 -print "- (Test phrases.)"
   1.496 -
   1.497 -for terms, results in phrase_tests:
   1.498 -    res = list(rd.find_common_positions(terms))
   1.499 -    print results == res, results, res
   1.500 -
   1.501 -index.close()
   1.502 -
   1.503 -docs2 = [
   1.504 -    ((1, 0), "The cat sat on the mat"),
   1.505 -    ((1, 2), "Every good boy deserves football"),
   1.506 -    ((13, 1), "One good turn deserves another"),
   1.507 -    ((14, 0), "Every man for himself"),
   1.508 -    ((14, 25), "Red sky at night shepherd's delight"),
   1.509 -    ((36, 12), "She sells sea shells on the sea shore")
   1.510 -    ]
   1.511 -
   1.512 -doc_tests2 = [
   1.513 -    ("Every", 2, [((1, 2), [(0, 0)]), ((14, 0), [(0, 0)])]),
   1.514 -    ("good", 2, [((1, 2), [(1, 6)]), ((13, 1), [(1, 4)])]),
   1.515 -    ("deserves", 2, [((1, 2), [(3, 15)]), ((13, 1), [(3, 14)])]),
   1.516 -    ("sea", 2, [((36, 12), [(2, 10), (6, 28)])])
   1.517 -    ]
   1.518 -
   1.519 -position_tests2 = [
   1.520 -    ("Every", (14, 0), [(0, 0)]),
   1.521 -    ("sea", (36, 12), [(2, 10), (6, 28)]),
   1.522 -    ("shells", (1, 0), None),
   1.523 -    ("shells", (37, 0), None)
   1.524 -    ]
   1.525 -
   1.526 -phrase_tests2 = [
   1.527 -    (["good", "boy"], [((1, 2), [(1, 6), (2, 11)])]),
   1.528 -    (["on", "the"], [((1, 0), [(3, 12), (4, 15)]), ((36, 12), [(4, 21), (5, 24)])]),
   1.529 -    (["sea", "shore"], [((36, 12), [(6, 28), (7, 32)])])
   1.530 -    ]
   1.531 -
   1.532 -index = Index("test_indexT", 3, 2, 3, 6)
   1.533 -wi = index.get_writer()
   1.534 -for docnum, text in docs2:
   1.535 -    doc = Document(docnum)
   1.536 -    offset = 0
   1.537 -    for position, term in enumerate(text.split()):
   1.538 -        doc.add_position(term, (position, offset))
   1.539 -        offset += len(term) + 1 # assume one space after the term
   1.540 -    doc.add_field(123, text)
   1.541      wi.add_document(doc)
   1.542  wi.close()
   1.543  
   1.544 -rd = index.get_reader()
   1.545 -
   1.546 -print "- (Test searching.)"
   1.547 -
   1.548 -for term, frequency, doc_positions in doc_tests2:
   1.549 -    dp = list(rd.find_positions(term))
   1.550 -    print doc_positions == dp, doc_positions, dp
   1.551 -    fr = rd.get_frequency(term)
   1.552 -    print frequency == fr, frequency, fr
   1.553 -
   1.554 -print "- (Test fields.)"
   1.555 +print "- Test merge."
   1.556  
   1.557 -for docnum, text in docs2:
   1.558 -    df = dict(rd.get_fields(docnum))
   1.559 -    print df[123] == text, text, df[123]
   1.560 -
   1.561 -print "- (Test navigation.)"
   1.562 +l1 = list(index.get_reader())
   1.563 +index.merge()
   1.564 +l2 = list(index.get_reader(1))
   1.565  
   1.566 -for term, docnum, positions in position_tests2:
   1.567 -    dp = rd.find_positions(term)
   1.568 -    pos = dp.from_document(docnum)
   1.569 -    print positions is None and pos is None or pos is not None and positions == list(pos), positions, pos
   1.570 -
   1.571 -print "- (Test phrases.)"
   1.572 -
   1.573 -for terms, results in phrase_tests2:
   1.574 -    res = list(rd.find_common_positions(terms))
   1.575 -    print results == res, results, res
   1.576 +for (t1, dp1), (t2, dp2) in zip(l1, l2):
   1.577 +    print t1 == t2, t1, t2
   1.578 +    print dp1 == dp1, dp1
   1.579 +    print "     ", dp2
   1.580  
   1.581  index.close()
   1.582  
   1.583 -print "- Test index updates."
   1.584 -
   1.585 -index = Index("test_index")
   1.586 -index2 = Index("test_index2", 3, 2, 3, 6)
   1.587 -wi = index2.get_writer()
   1.588 -for docnum, text in docs:
   1.589 -
   1.590 -    # Add the same documents but with different numbers.
   1.591 -
   1.592 -    doc = Document(docnum + 100)
   1.593 -    for position, term in enumerate(text.split()):
   1.594 -        doc.add_position(term, position)
   1.595 -    doc.add_field(123, text)
   1.596 -    wi.add_document(doc)
   1.597 -wi.close()
   1.598 -
   1.599 -index2.update([index])
   1.600 -index.close()
   1.601 -
   1.602 -rd = index2.get_reader()
   1.603 -for term, frequency, doc_positions in doc_tests:
   1.604 -
   1.605 -    # Add the extra documents to the expected result.
   1.606 -
   1.607 -    orig_doc_positions = doc_positions
   1.608 -    doc_positions = doc_positions[:]
   1.609 -
   1.610 -    for docnum, positions in orig_doc_positions:
   1.611 -        doc_positions.append((docnum + 100, positions))
   1.612 -    frequency *= 2
   1.613 -
   1.614 -    dp = list(rd.find_positions(term))
   1.615 -    print doc_positions == dp, doc_positions, dp
   1.616 -    fr = rd.get_frequency(term)
   1.617 -    print frequency == fr, frequency, fr
   1.618 -index2.close()
   1.619 -
   1.620 -print "- (Test update of an empty index.)"
   1.621 -
   1.622 -index = Index("test_index")
   1.623 -index3 = Index("test_index3")
   1.624 -index3.update([index])
   1.625 -index.close()
   1.626 -
   1.627 -rd = index3.get_reader()
   1.628 -for term, frequency, doc_positions in doc_tests:
   1.629 -    dp = list(rd.find_positions(term))
   1.630 -    print doc_positions == dp, doc_positions, dp
   1.631 -    fr = rd.get_frequency(term)
   1.632 -    print frequency == fr, frequency, fr
   1.633 -index3.close()
   1.634 -
   1.635  # vim: tabstop=4 expandtab shiftwidth=4