1.1 --- a/simplex.py Sat Oct 01 00:56:16 2011 +0200
1.2 +++ b/simplex.py Sat Oct 01 01:14:13 2011 +0200
1.3 @@ -34,10 +34,8 @@
1.4
1.5 "A wrapper around text files."
1.6
1.7 - def __init__(self, f, keys=None, delimiter=None):
1.8 + def __init__(self, f):
1.9 self.f = f
1.10 - self.keys = keys or [0]
1.11 - self.delimiter = delimiter
1.12
1.13 def seek(self, pos):
1.14 self.f.seek(pos)
1.15 @@ -45,6 +43,14 @@
1.16 def get_records(self):
1.17 return self.f.xreadlines()
1.18
1.19 +class DelimitedRecord:
1.20 +
1.21 + "An accessor using a delimiter to split a record."
1.22 +
1.23 + def __init__(self, keys=None, delimiter=None):
1.24 + self.keys = keys or [0]
1.25 + self.delimiter = delimiter
1.26 +
1.27 def get_key(self, record):
1.28 values = record.split(self.delimiter)
1.29 return [values[key] for key in self.keys]
2.1 --- a/test_indexed.py Sat Oct 01 00:56:16 2011 +0200
2.2 +++ b/test_indexed.py Sat Oct 01 01:14:13 2011 +0200
2.3 @@ -13,17 +13,18 @@
2.4 sys.exit(1)
2.5
2.6 f = open(filename)
2.7 -reader = TextFile(f, keys)
2.8 +reader = TextFile(f)
2.9 +accessor = DelimitedRecord(keys)
2.10 try:
2.11 t = time.time()
2.12 - l = make_index(reader, reader, int(interval))
2.13 + l = make_index(reader, accessor, int(interval))
2.14 print "Indexed in %s seconds." % (time.time() - t)
2.15
2.16 # Now use the index.
2.17
2.18 for term in terms:
2.19 t = time.time()
2.20 - line = find_with_index(reader, reader, l, term)
2.21 + line = find_with_index(reader, accessor, l, term)
2.22 if line:
2.23 print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
2.24
3.1 --- a/test_scan.py Sat Oct 01 00:56:16 2011 +0200
3.2 +++ b/test_scan.py Sat Oct 01 01:14:13 2011 +0200
3.3 @@ -13,13 +13,14 @@
3.4 sys.exit(1)
3.5
3.6 f = open(filename)
3.7 -reader = TextFile(f, keys)
3.8 +reader = TextFile(f)
3.9 +accessor = DelimitedRecord(keys)
3.10 try:
3.11 for term in terms:
3.12 reader.seek(0)
3.13
3.14 t = time.time()
3.15 - line = find_in_file(reader, reader, term)
3.16 + line = find_in_file(reader, accessor, term)
3.17 if line:
3.18 print "Found (at %s seconds)...\n%s" % (time.time() - t, line)
3.19 finally: