1.1 --- a/simplex/accessors.py Sat Oct 01 20:49:15 2011 +0200
1.2 +++ b/simplex/accessors.py Sat Oct 01 22:06:03 2011 +0200
1.3 @@ -18,50 +18,59 @@
1.4 with this program. If not, see <http://www.gnu.org/licenses/>.
1.5 """
1.6
1.7 -class DelimitedRecord:
1.8 +class Accessor:
1.9 +
1.10 + "An abstract accessor."
1.11 +
1.12 + def reset(self):
1.13 + pass
1.14 +
1.15 + def convert(self, term):
1.16 + return term
1.17 +
1.18 + def get_key(self, record):
1.19 + return record
1.20 +
1.21 +class DelimitedRecord(Accessor):
1.22
1.23 "An accessor using a delimiter to split a record."
1.24
1.25 - def __init__(self, keys=None, delimiter=None, converter=None):
1.26 + def __init__(self, fields=None, delimiter=None):
1.27
1.28 """
1.29 - Initialise the accessor using a sequence of 'keys' indicating the
1.30 + Initialise the accessor using a sequence of 'fields' indicating the
1.31 columns in each record that provide the values in the eventual compound
1.32 key provided by each record, along with a 'delimiter' indicating how
1.33 - such columns are identified. If 'converter' is specified, this will be
1.34 - used to convert the retrieved data.
1.35 + such columns are identified.
1.36 """
1.37
1.38 - self.keys = keys or [0]
1.39 + self.fields = fields or [0]
1.40 self.delimiter = delimiter
1.41 - self.converter = converter
1.42 - self.convert = converter and converter.convert or (lambda x: x)
1.43
1.44 def get_key(self, record):
1.45 values = record.split(self.delimiter)
1.46 - return self.convert([values[key] for key in self.keys])
1.47 + return [values[field] for field in self.fields]
1.48
1.49 - def get_sort_command(self):
1.50 +class Converted(Accessor):
1.51
1.52 - """
1.53 - Return the Unix sort command invocation required to produce the ordering
1.54 - described by this instance.
1.55 - """
1.56 + "Conversion of keys."
1.57 +
1.58 + def __init__(self, accessor, converters=None):
1.59
1.60 - return "sort%s%s%s" % (
1.61 - self.delimiter and (" -t $'%s'" % repr(self.delimiter)[1:-1]) or "",
1.62 - self.converter and self.converter.get_sort_options() or "",
1.63 - "".join([(" -k %d,%d" % (key + 1, key + 1)) for key in self.keys])
1.64 - )
1.65 + "Wrap the given 'accessor' with the given 'converters'."
1.66
1.67 -class ConvertNumeric:
1.68 + self.accessor = accessor
1.69 + self.converters = converters
1.70
1.71 - "Convert numeric values to integers."
1.72 + def get_converter(self, converter):
1.73 + return converter or (lambda x: x)
1.74
1.75 def convert(self, term):
1.76 - return map(int, term)
1.77 + converters = map(self.get_converter, self.converters)
1.78 + return [converter(value) for converter, value in zip(converters, term)]
1.79
1.80 - def get_sort_options(self):
1.81 - return " -n"
1.82 + def get_key(self, record):
1.83 + key = self.accessor.get_key(record)
1.84 + return self.convert(key)
1.85
1.86 # vim: tabstop=4 expandtab shiftwidth=4
2.1 --- a/simplex/iterators.py Sat Oct 01 20:49:15 2011 +0200
2.2 +++ b/simplex/iterators.py Sat Oct 01 22:06:03 2011 +0200
2.3 @@ -32,6 +32,7 @@
2.4
2.5 def __iter__(self):
2.6 self.iterator = iter(self.records)
2.7 + self.accessor.reset()
2.8 return self
2.9
2.10 def next(self):
2.11 @@ -40,22 +41,4 @@
2.12 record = self.iterator.next()
2.13 return self.accessor.get_key(record), record
2.14
2.15 -class StatefulIterator(Iterator):
2.16 -
2.17 - "An iterator over records maintaining state."
2.18 -
2.19 - def __init__(self, accessor, state):
2.20 - Iterator.__init__(self, accessor)
2.21 - self.state = state
2.22 -
2.23 - def __iter__(self):
2.24 - Iterator.__iter__(self)
2.25 - self.state.reset()
2.26 - return self
2.27 -
2.28 - def next(self):
2.29 - key, record = Iterator.next(self)
2.30 - self.key = self.state.update(key)
2.31 - return self.key, record
2.32 -
2.33 # vim: tabstop=4 expandtab shiftwidth=4
3.1 --- a/test_indexed.py Sat Oct 01 20:49:15 2011 +0200
3.2 +++ b/test_indexed.py Sat Oct 01 22:06:03 2011 +0200
3.3 @@ -6,18 +6,17 @@
3.4 try:
3.5 separator = sys.argv.index("--")
3.6 filename, numeric, interval = sys.argv[1:4]
3.7 - keys = map(int, sys.argv[4:separator])
3.8 - terms = groups(sys.argv[separator+1:], len(keys))
3.9 + fields = map(int, sys.argv[4:separator])
3.10 + terms = groups(sys.argv[separator+1:], len(fields))
3.11 except (IndexError, ValueError):
3.12 - print >>sys.stderr, "Usage: %s <filename> <interval> <key>... -- <term value>..." % sys.argv[0]
3.13 + print >>sys.stderr, "Usage: %s <filename> <interval> <field>... -- <term value>..." % sys.argv[0]
3.14 sys.exit(1)
3.15
3.16 f = open(filename)
3.17 -accessor = DelimitedRecord(keys, converter=(numeric == "true" and ConvertNumeric() or None))
3.18 +converters = [(numeric == "true" and int or None) for field in fields]
3.19 +accessor = Converted(DelimitedRecord(fields), converters)
3.20 reader = TextFile(f, Iterator(accessor))
3.21
3.22 -print "Sort command:", accessor.get_sort_command()
3.23 -
3.24 try:
3.25 t = time.time()
3.26 l = make_index(reader, int(interval))
4.1 --- a/test_scan.py Sat Oct 01 20:49:15 2011 +0200
4.2 +++ b/test_scan.py Sat Oct 01 22:06:03 2011 +0200
4.3 @@ -6,18 +6,17 @@
4.4 try:
4.5 separator = sys.argv.index("--")
4.6 filename, numeric = sys.argv[1:3]
4.7 - keys = map(int, sys.argv[3:separator])
4.8 - terms = groups(sys.argv[separator+1:], len(keys))
4.9 + fields = map(int, sys.argv[3:separator])
4.10 + terms = groups(sys.argv[separator+1:], len(fields))
4.11 except (IndexError, ValueError):
4.12 - print >>sys.stderr, "Usage: %s <filename> <key>... -- <term value>..." % sys.argv[0]
4.13 + print >>sys.stderr, "Usage: %s <filename> <field>... -- <term value>..." % sys.argv[0]
4.14 sys.exit(1)
4.15
4.16 f = open(filename)
4.17 -accessor = DelimitedRecord(keys, converter=(numeric == "true" and ConvertNumeric() or None))
4.18 +converters = [(numeric == "true" and int or None) for field in fields]
4.19 +accessor = Converted(DelimitedRecord(fields), converters)
4.20 reader = TextFile(f, Iterator(accessor))
4.21
4.22 -print "Sort command:", accessor.get_sort_command()
4.23 -
4.24 try:
4.25 for term in terms:
4.26 reader.seek(0)