# HG changeset patch # User Paul Boddie # Date 1317569131 -7200 # Node ID 294a658846b63a015c1cc44b5a3e810ae24cd0b4 # Parent d3776f8ba4326b76ff7556af7212c868174a051c Changed the index-related functions to use a get_key callable instead of an object providing a get_key method. diff -r d3776f8ba432 -r 294a658846b6 simplex/__init__.py --- a/simplex/__init__.py Sun Oct 02 16:32:41 2011 +0200 +++ b/simplex/__init__.py Sun Oct 02 17:25:31 2011 +0200 @@ -31,13 +31,13 @@ from simplex.readers import * import bisect -def make_index(reader, accessor, interval): +def make_index(reader, get_key, interval): """ - Index a resource whose 'reader' provides records and whose 'accessor' can - yield the key for such records, creating an index entry for a record after a - given number of records, defined by 'interval', have been read since the - last entry was produced. + Index a resource whose 'reader' provides records, using a 'get_key' + operation to yield the key for such records, creating an index entry for a + record after a given number of records, defined by 'interval', have been + read since the last entry was produced. """ l = [] @@ -47,7 +47,7 @@ start_pos = 0 for i, record in enumerate(reader): - key = accessor.get_key(record) + key = get_key(record) # Where duplicate keys are permitted, the first record employing the key # must be available as an index entry. Otherwise, records preceding the @@ -65,12 +65,12 @@ return l -def find_with_index(reader, accessor, l, term): +def find_with_index(reader, get_key, l, term): """ - Find in the resource whose 'reader' provides records and whose 'accessor' - can yield the key for such records, using the given index list 'l', the - given 'term', returning a record employing the term or None if no such + In the resource whose 'reader' provides records, using a 'get_key' operation + to yield the key for such records, and using the given index list 'l', find + the given 'term', returning a record employing the term or None if no such record was found. """ @@ -90,18 +90,18 @@ found, pos = l[i] reader.seek(pos) - return find_in_file(reader, accessor, term) + return find_in_file(reader, get_key, term) -def find_in_file(reader, accessor, term): +def find_in_file(reader, get_key, term): """ - Find in the resource whose 'reader' provides records and whose 'accessor' - can yield the key for such records, the given 'term', returning a record + In the resource whose 'reader' provides records, using a 'get_key' operation + to yield the key for such records, find the given 'term', returning a record employing the term or None if no such record was found. """ for record in reader: - key = accessor.get_key(record) + key = get_key(record) if term == key: return record diff -r d3776f8ba432 -r 294a658846b6 test_indexed.py --- a/test_indexed.py Sun Oct 02 16:32:41 2011 +0200 +++ b/test_indexed.py Sun Oct 02 17:25:31 2011 +0200 @@ -17,14 +17,14 @@ accessor = DelimitedRecord(keys, numeric=(numeric == "true")) try: t = time.time() - l = make_index(reader, accessor, int(interval)) + l = make_index(reader, accessor.get_key, int(interval)) print "Indexed in %s seconds." % (time.time() - t) # Now use the index. for term in terms: t = time.time() - line = find_with_index(reader, accessor, l, accessor.convert(term)) + line = find_with_index(reader, accessor.get_key, l, accessor.convert(term)) if line: print "Found (at %s seconds)...\n%s" % (time.time() - t, line) diff -r d3776f8ba432 -r 294a658846b6 test_scan.py --- a/test_scan.py Sun Oct 02 16:32:41 2011 +0200 +++ b/test_scan.py Sun Oct 02 17:25:31 2011 +0200 @@ -20,7 +20,7 @@ reader.seek(0) t = time.time() - line = find_in_file(reader, accessor, accessor.convert(term)) + line = find_in_file(reader, accessor.get_key, accessor.convert(term)) if line: print "Found (at %s seconds)...\n%s" % (time.time() - t, line) finally: