# HG changeset patch
# User Paul Boddie <paul@boddie.org.uk>
# Date 1290468172 -3600
# Node ID 7e79dd580a629c70c12fac3f39304098745ccfe0
# Parent  f1cbbf5ef885fac3039b6076cb7eb52ee3626d4d
Added support for phrase searching where document positions are specified using
sequences of values, with the first value in each sequence being the token
index/position.
Added more tests of document numbers and position values being specified using
sequences.

diff -r f1cbbf5ef885 -r 7e79dd580a62 iixr/phrases.py
--- a/iixr/phrases.py	Mon Nov 22 23:50:03 2010 +0100
+++ b/iixr/phrases.py	Tue Nov 23 00:22:52 2010 +0100
@@ -168,6 +168,15 @@
             raise StopIteration
 
     def is_phrase_position(self, last, last_token, current, current_token):
-        return current - last <= 1 and current_token > last_token
+        if current_token <= last_token:
+            return 0
+
+        # NOTE: For position sequences, assume that the first value is the token
+        # NOTE: index/position.
+
+        if isinstance(last, (list, tuple)):
+            return current[0] - last[0] <= 1
+        else:
+            return current - last <= 1
 
 # vim: tabstop=4 expandtab shiftwidth=4
diff -r f1cbbf5ef885 -r 7e79dd580a62 test.py
--- a/test.py	Mon Nov 22 23:50:03 2010 +0100
+++ b/test.py	Tue Nov 23 00:22:52 2010 +0100
@@ -17,7 +17,7 @@
         pass
 
 try:
-    for dirname in ("test_index", "test_index2", "test_index3"):
+    for dirname in ("test_index", "test_index2", "test_index3", "test_indexT"):
         for filename in os.listdir(dirname):
             os.remove(os.path.join(dirname, filename))
         os.rmdir(dirname)
@@ -534,6 +534,78 @@
 
 index.close()
 
+docs2 = [
+    ((1, 0), "The cat sat on the mat"),
+    ((1, 2), "Every good boy deserves football"),
+    ((13, 1), "One good turn deserves another"),
+    ((14, 0), "Every man for himself"),
+    ((14, 25), "Red sky at night shepherd's delight"),
+    ((36, 12), "She sells sea shells on the sea shore")
+    ]
+
+doc_tests2 = [
+    ("Every", 2, [((1, 2), [(0, 0)]), ((14, 0), [(0, 0)])]),
+    ("good", 2, [((1, 2), [(1, 6)]), ((13, 1), [(1, 4)])]),
+    ("deserves", 2, [((1, 2), [(3, 15)]), ((13, 1), [(3, 14)])]),
+    ("sea", 2, [((36, 12), [(2, 10), (6, 28)])])
+    ]
+
+position_tests2 = [
+    ("Every", (14, 0), [(0, 0)]),
+    ("sea", (36, 12), [(2, 10), (6, 28)]),
+    ("shells", (1, 0), None),
+    ("shells", (37, 0), None)
+    ]
+
+phrase_tests2 = [
+    (["good", "boy"], [((1, 2), [(1, 6), (2, 11)])]),
+    (["on", "the"], [((1, 0), [(3, 12), (4, 15)]), ((36, 12), [(4, 21), (5, 24)])]),
+    (["sea", "shore"], [((36, 12), [(6, 28), (7, 32)])])
+    ]
+
+index = Index("test_indexT", 3, 2, 3, 6)
+wi = index.get_writer()
+for docnum, text in docs2:
+    doc = Document(docnum)
+    offset = 0
+    for position, term in enumerate(text.split()):
+        doc.add_position(term, (position, offset))
+        offset += len(term) + 1 # assume one space after the term
+    doc.add_field(123, text)
+    wi.add_document(doc)
+wi.close()
+
+rd = index.get_reader()
+
+print "- (Test searching.)"
+
+for term, frequency, doc_positions in doc_tests2:
+    dp = list(rd.find_positions(term))
+    print doc_positions == dp, doc_positions, dp
+    fr = rd.get_frequency(term)
+    print frequency == fr, frequency, fr
+
+print "- (Test fields.)"
+
+for docnum, text in docs2:
+    df = dict(rd.get_fields(docnum))
+    print df[123] == text, text, df[123]
+
+print "- (Test navigation.)"
+
+for term, docnum, positions in position_tests2:
+    dp = rd.find_positions(term)
+    pos = dp.from_document(docnum)
+    print positions is None and pos is None or pos is not None and positions == list(pos), positions, pos
+
+print "- (Test phrases.)"
+
+for terms, results in phrase_tests2:
+    res = list(rd.find_common_positions(terms))
+    print results == res, results, res
+
+index.close()
+
 print "- Test index updates."
 
 index = Index("test_index")