1.1 --- a/simplex.py Sat Oct 01 00:01:04 2011 +0200
1.2 +++ b/simplex.py Sat Oct 01 00:40:38 2011 +0200
1.3 @@ -41,9 +41,9 @@
1.4
1.5 "A wrapper around text files."
1.6
1.7 - def __init__(self, f, key=0, delimiter=None):
1.8 + def __init__(self, f, keys=None, delimiter=None):
1.9 self.f = f
1.10 - self.key = key
1.11 + self.keys = keys or [0]
1.12 self.delimiter = delimiter
1.13
1.14 def seek(self, pos):
1.15 @@ -53,7 +53,8 @@
1.16 return self.f.xreadlines()
1.17
1.18 def get_key(self, record):
1.19 - return record.split(self.delimiter)[self.key]
1.20 + values = record.split(self.delimiter)
1.21 + return [values[key] for key in self.keys]
1.22
1.23 def index_file(f, interval):
1.24
1.25 @@ -95,7 +96,11 @@
1.26 """
1.27
1.28 i = bisect.bisect_left(l, (term, None))
1.29 - found, pos = l[i]
1.30 +
1.31 + try:
1.32 + found, pos = l[i]
1.33 + except IndexError:
1.34 + return None
1.35
1.36 # Since the index is more coarse than the underlying file, the bisect left
1.37 # operation will most likely point to an index entry for later records than
1.38 @@ -132,4 +137,20 @@
1.39 def find(self, term):
1.40 return find_with_index(self.f, self.entries, term)
1.41
1.42 +def groups(l, length):
1.43 +
1.44 + "Split 'l' into groups of the given 'length'."
1.45 +
1.46 + if length <= 0:
1.47 + raise ValueError, "Groups must be greater than zero."
1.48 +
1.49 + i = 0
1.50 + g = []
1.51 +
1.52 + while i < len(l):
1.53 + g.append(l[i:i+length])
1.54 + i += length
1.55 +
1.56 + return g
1.57 +
1.58 # vim: tabstop=4 expandtab shiftwidth=4
2.1 --- a/test_indexed.py Sat Oct 01 00:01:04 2011 +0200
2.2 +++ b/test_indexed.py Sat Oct 01 00:40:38 2011 +0200
2.3 @@ -3,14 +3,20 @@
2.4 from simplex import *
2.5 import sys, time
2.6
2.7 -filename, step = sys.argv[1:3]
2.8 -terms = sys.argv[3:]
2.9 +try:
2.10 + separator = sys.argv.index("--")
2.11 + filename, interval = sys.argv[1:3]
2.12 + keys = map(int, sys.argv[3:separator])
2.13 + terms = groups(sys.argv[separator+1:], len(keys))
2.14 +except (IndexError, ValueError):
2.15 + print >>sys.stderr, "Usage: %s <filename> <interval> <key>... -- <term value>..." % sys.argv[0]
2.16 + sys.exit(1)
2.17
2.18 f = open(filename)
2.19 -tf = TextFile(f)
2.20 +tf = TextFile(f, keys)
2.21 try:
2.22 t = time.time()
2.23 - l = index_file(tf, int(step))
2.24 + l = index_file(tf, int(interval))
2.25 print "Indexed in %s seconds." % (time.time() - t)
2.26
2.27 # Now use the index.
3.1 --- a/test_scan.py Sat Oct 01 00:01:04 2011 +0200
3.2 +++ b/test_scan.py Sat Oct 01 00:40:38 2011 +0200
3.3 @@ -3,11 +3,17 @@
3.4 from simplex import *
3.5 import sys, time
3.6
3.7 -filename = sys.argv[1]
3.8 -terms = sys.argv[2:]
3.9 +try:
3.10 + separator = sys.argv.index("--")
3.11 + filename = sys.argv[1]
3.12 + keys = map(int, sys.argv[2:separator])
3.13 + terms = groups(sys.argv[separator+1:], len(keys))
3.14 +except (IndexError, ValueError):
3.15 + print >>sys.stderr, "Usage: %s <filename> <key>... -- <term value>..." % sys.argv[0]
3.16 + sys.exit(1)
3.17
3.18 f = open(filename)
3.19 -tf = TextFile(f)
3.20 +tf = TextFile(f, keys)
3.21 try:
3.22 for term in terms:
3.23 tf.seek(0)