paul@21 | 1 | #!/usr/bin/env python |
paul@21 | 2 | |
paul@21 | 3 | from simplex import * |
paul@21 | 4 | import sys, time |
paul@21 | 5 | |
paul@21 | 6 | def from_index_record(convert, record): |
paul@21 | 7 | values = record.split("\t") |
paul@21 | 8 | key = convert(values[:-1]) |
paul@21 | 9 | pos = int(values[-1]) |
paul@21 | 10 | return key, pos |
paul@21 | 11 | |
paul@21 | 12 | try: |
paul@21 | 13 | separator = sys.argv.index("--") |
paul@21 | 14 | filename, numeric, index_filename = sys.argv[1:4] |
paul@21 | 15 | fields = map(int, sys.argv[4:separator]) |
paul@21 | 16 | terms = groups(sys.argv[separator+1:], len(fields)) |
paul@21 | 17 | numeric = numeric == "true" |
paul@21 | 18 | except (IndexError, ValueError): |
paul@21 | 19 | print >>sys.stderr, "Usage: %s <filename> <numeric> <index> <field>... -- <term value>..." % sys.argv[0] |
paul@21 | 20 | sys.exit(1) |
paul@21 | 21 | |
paul@21 | 22 | f = open(filename) |
paul@21 | 23 | fi = open(index_filename) |
paul@21 | 24 | accessor = DelimitedRecord(fields, numeric=numeric) |
paul@21 | 25 | |
paul@21 | 26 | try: |
paul@21 | 27 | t = time.time() |
paul@21 | 28 | l = [from_index_record(accessor.convert, record) for record in fi] |
paul@21 | 29 | print "Read index (at %s seconds, with %d entries)." % (time.time() - t, len(l)) |
paul@21 | 30 | |
paul@21 | 31 | # Now use the index. |
paul@21 | 32 | |
paul@22 | 33 | index = IndexedFile(f, l, accessor.get_key) |
paul@22 | 34 | |
paul@21 | 35 | for term in terms: |
paul@22 | 36 | |
paul@22 | 37 | # Convert the term to the appropriate type. |
paul@22 | 38 | |
paul@22 | 39 | term = accessor.convert(term) |
paul@22 | 40 | |
paul@22 | 41 | # Perform the search. |
paul@22 | 42 | |
paul@21 | 43 | t = time.time() |
paul@22 | 44 | line = index.find(term) |
paul@21 | 45 | if line: |
paul@21 | 46 | print "Found (at %s seconds)...\n%s" % (time.time() - t, line) |
paul@21 | 47 | |
paul@21 | 48 | finally: |
paul@21 | 49 | f.close() |
paul@21 | 50 | fi.close() |
paul@21 | 51 | |
paul@21 | 52 | # vim: tabstop=4 expandtab shiftwidth=4 |