# HG changeset patch # User Paul Boddie # Date 1290207674 -3600 # Node ID 00995a70f535af56298371b86b68bb59efb4642c # Parent 4614ef99dbe102f234f6c50ebf2cd05f1e7aed42 An experiment adding preceding text to position records. diff -r 4614ef99dbe1 -r 00995a70f535 iixr/phrases.py --- a/iixr/phrases.py Sat Nov 20 00:00:01 2010 +0100 +++ b/iixr/phrases.py Sat Nov 20 00:01:14 2010 +0100 @@ -4,7 +4,7 @@ Phrase iterators providing navigation over common positions for a number of different terms. -Copyright (C) 2009 Paul Boddie +Copyright (C) 2009, 2010 Paul Boddie This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -168,6 +168,8 @@ raise StopIteration def is_phrase_position(self, last, last_token, current, current_token): - return current - last <= 1 and current_token > last_token + last_position, last_preceding = last + position, preceding = current + return position - last_position <= 1 and current_token > last_token # vim: tabstop=4 expandtab shiftwidth=4 diff -r 4614ef99dbe1 -r 00995a70f535 iixr/positions.py --- a/iixr/positions.py Sat Nov 20 00:00:01 2010 +0100 +++ b/iixr/positions.py Sat Nov 20 00:01:14 2010 +0100 @@ -19,7 +19,7 @@ """ from iixr.files import * -from iixr.data import vint, vint_to_array +from iixr.data import vint, vint_to_array, string_to_array from array import array class PositionWriter(FileWriter): @@ -53,8 +53,11 @@ last = 0 - for position in positions: + # Handle tuples incorporating preceding text. + + for position, preceding in positions: vint_to_array(position - last, output) + string_to_array(preceding, output) last = position output.tofile(self.f) @@ -89,7 +92,8 @@ while i < npositions: last += self.read_number() - positions.append(last) + preceding = self.read_string() + positions.append((last, preceding)) i += 1 return self.last_docnum, positions