1 #!/usr/bin/env python 2 3 from simplex import * 4 import sys, time 5 6 def from_index_record(convert, record): 7 values = record.split("\t") 8 key = convert(values[:-1]) 9 pos = int(values[-1]) 10 return key, pos 11 12 try: 13 separator = sys.argv.index("--") 14 filename, numeric, index_filename = sys.argv[1:4] 15 fields = map(int, sys.argv[4:separator]) 16 terms = groups(sys.argv[separator+1:], len(fields)) 17 numeric = numeric == "true" 18 except (IndexError, ValueError): 19 print >>sys.stderr, "Usage: %s <filename> <numeric> <index> <field>... -- <term value>..." % sys.argv[0] 20 sys.exit(1) 21 22 f = open(filename) 23 fi = open(index_filename) 24 accessor = DelimitedRecord(fields, numeric=numeric) 25 26 try: 27 t = time.time() 28 l = [from_index_record(accessor.convert, record) for record in fi] 29 print "Read index (at %s seconds, with %d entries)." % (time.time() - t, len(l)) 30 31 # Now use the index. 32 33 for term in terms: 34 t = time.time() 35 line = find_with_index(f, accessor.get_key, l, accessor.convert(term)) 36 if line: 37 print "Found (at %s seconds)...\n%s" % (time.time() - t, line) 38 39 finally: 40 f.close() 41 fi.close() 42 43 # vim: tabstop=4 expandtab shiftwidth=4