1.1 --- a/simplex/__init__.py Sun Oct 02 19:52:30 2011 +0200
1.2 +++ b/simplex/__init__.py Sun Oct 02 20:43:03 2011 +0200
1.3 @@ -29,42 +29,9 @@
1.4 """
1.5
1.6 from simplex.readers import *
1.7 +from simplex.indexers import *
1.8 import bisect
1.9
1.10 -def make_index(reader, get_key, interval):
1.11 -
1.12 - """
1.13 - Index a resource whose 'reader' provides records, using a 'get_key'
1.14 - operation to yield the key for such records, creating an index entry for a
1.15 - record after a given number of records, defined by 'interval', have been
1.16 - read since the last entry was produced.
1.17 - """
1.18 -
1.19 - l = []
1.20 - pos = 0
1.21 -
1.22 - current_key = None
1.23 - start_pos = 0
1.24 -
1.25 - for i, record in enumerate(reader):
1.26 - key = get_key(record)
1.27 -
1.28 - # Where duplicate keys are permitted, the first record employing the key
1.29 - # must be available as an index entry. Otherwise, records preceding the
1.30 - # one referenced by the entry may have the same key and be missed when
1.31 - # seeking using the index.
1.32 -
1.33 - if key != current_key:
1.34 - current_key = key
1.35 - start_pos = pos
1.36 -
1.37 - if i % interval == 0:
1.38 - l.append((current_key, start_pos))
1.39 -
1.40 - pos += len(record)
1.41 -
1.42 - return l
1.43 -
1.44 def find_with_index(reader, get_key, l, term):
1.45
1.46 """
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/simplex/indexers.py Sun Oct 02 20:43:03 2011 +0200
2.3 @@ -0,0 +1,86 @@
2.4 +#!/usr/bin/env python
2.5 +
2.6 +"""
2.7 +Indexing classes.
2.8 +
2.9 +Copyright (C) 2011 Paul Boddie <paul@boddie.org.uk>
2.10 +
2.11 +This program is free software; you can redistribute it and/or modify it under
2.12 +the terms of the GNU General Public License as published by the Free Software
2.13 +Foundation; either version 3 of the License, or (at your option) any later
2.14 +version.
2.15 +
2.16 +This program is distributed in the hope that it will be useful, but WITHOUT ANY
2.17 +WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
2.18 +PARTICULAR PURPOSE. See the GNU General Public License for more details.
2.19 +
2.20 +You should have received a copy of the GNU General Public License along
2.21 +with this program. If not, see <http://www.gnu.org/licenses/>.
2.22 +"""
2.23 +
2.24 +class Indexer:
2.25 +
2.26 + "An indexer which records an entry periodically."
2.27 +
2.28 + def __init__(self, output, get_key, interval):
2.29 +
2.30 + """
2.31 + Index a resource, recording entries in the given 'output' sequence,
2.32 + using a 'get_key' operation to yield the key for each record, creating
2.33 + an index entry for a record after a given number of records, defined by
2.34 + 'interval', have been appended since the last entry was produced.
2.35 + """
2.36 +
2.37 + self.output = output
2.38 + self.interval = interval
2.39 + self.get_key = get_key
2.40 +
2.41 + self.count = 0
2.42 + self.pos = 0
2.43 +
2.44 + # Information about the current group.
2.45 +
2.46 + self.start_pos = 0
2.47 + self.current_key = None
2.48 +
2.49 + def append(self, record):
2.50 +
2.51 + """
2.52 + Present the given 'record' to the indexer, recording it if appropriate.
2.53 + """
2.54 +
2.55 + key = self.get_key(record)
2.56 +
2.57 + # Where duplicate keys are permitted, the first record employing the key
2.58 + # must be available as an index entry. Otherwise, records preceding the
2.59 + # one referenced by the entry may have the same key and be missed when
2.60 + # seeking using the index.
2.61 +
2.62 + if key != self.current_key:
2.63 + self.current_key = key
2.64 + self.start_pos = self.pos
2.65 +
2.66 + if self.count % self.interval == 0:
2.67 + self.output.append((self.current_key, self.start_pos))
2.68 +
2.69 + self.count += 1
2.70 + self.pos += len(record)
2.71 +
2.72 +def make_index(reader, get_key, interval):
2.73 +
2.74 + """
2.75 + Index a resource whose 'reader' provides records, using a 'get_key'
2.76 + operation to yield the key for such records, creating an index entry for a
2.77 + record after a given number of records, defined by 'interval', have been
2.78 + read since the last entry was produced.
2.79 + """
2.80 +
2.81 + l = []
2.82 + indexer = Indexer(l, get_key, interval)
2.83 +
2.84 + for record in reader:
2.85 + indexer.append(record)
2.86 +
2.87 + return l
2.88 +
2.89 +# vim: tabstop=4 expandtab shiftwidth=4