# HG changeset patch # User Paul Boddie # Date 1317499563 -7200 # Node ID d254badcda0ab39f390a5c2f8c9daece6254e620 # Parent 8795a00cfb265c33316a994ed63128e1dff959a3 Introduced a mechanism for wrapping accessors with converters, removing the simple mechanism supporting numeric conversion. Renamed keys to fields in the context of delimited input data. Merged the stateful iterator support into the basic iterator and accessor. diff -r 8795a00cfb26 -r d254badcda0a simplex/accessors.py --- a/simplex/accessors.py Sat Oct 01 20:49:15 2011 +0200 +++ b/simplex/accessors.py Sat Oct 01 22:06:03 2011 +0200 @@ -18,50 +18,59 @@ with this program. If not, see . """ -class DelimitedRecord: +class Accessor: + + "An abstract accessor." + + def reset(self): + pass + + def convert(self, term): + return term + + def get_key(self, record): + return record + +class DelimitedRecord(Accessor): "An accessor using a delimiter to split a record." - def __init__(self, keys=None, delimiter=None, converter=None): + def __init__(self, fields=None, delimiter=None): """ - Initialise the accessor using a sequence of 'keys' indicating the + Initialise the accessor using a sequence of 'fields' indicating the columns in each record that provide the values in the eventual compound key provided by each record, along with a 'delimiter' indicating how - such columns are identified. If 'converter' is specified, this will be - used to convert the retrieved data. + such columns are identified. """ - self.keys = keys or [0] + self.fields = fields or [0] self.delimiter = delimiter - self.converter = converter - self.convert = converter and converter.convert or (lambda x: x) def get_key(self, record): values = record.split(self.delimiter) - return self.convert([values[key] for key in self.keys]) + return [values[field] for field in self.fields] - def get_sort_command(self): +class Converted(Accessor): - """ - Return the Unix sort command invocation required to produce the ordering - described by this instance. - """ + "Conversion of keys." + + def __init__(self, accessor, converters=None): - return "sort%s%s%s" % ( - self.delimiter and (" -t $'%s'" % repr(self.delimiter)[1:-1]) or "", - self.converter and self.converter.get_sort_options() or "", - "".join([(" -k %d,%d" % (key + 1, key + 1)) for key in self.keys]) - ) + "Wrap the given 'accessor' with the given 'converters'." -class ConvertNumeric: + self.accessor = accessor + self.converters = converters - "Convert numeric values to integers." + def get_converter(self, converter): + return converter or (lambda x: x) def convert(self, term): - return map(int, term) + converters = map(self.get_converter, self.converters) + return [converter(value) for converter, value in zip(converters, term)] - def get_sort_options(self): - return " -n" + def get_key(self, record): + key = self.accessor.get_key(record) + return self.convert(key) # vim: tabstop=4 expandtab shiftwidth=4 diff -r 8795a00cfb26 -r d254badcda0a simplex/iterators.py --- a/simplex/iterators.py Sat Oct 01 20:49:15 2011 +0200 +++ b/simplex/iterators.py Sat Oct 01 22:06:03 2011 +0200 @@ -32,6 +32,7 @@ def __iter__(self): self.iterator = iter(self.records) + self.accessor.reset() return self def next(self): @@ -40,22 +41,4 @@ record = self.iterator.next() return self.accessor.get_key(record), record -class StatefulIterator(Iterator): - - "An iterator over records maintaining state." - - def __init__(self, accessor, state): - Iterator.__init__(self, accessor) - self.state = state - - def __iter__(self): - Iterator.__iter__(self) - self.state.reset() - return self - - def next(self): - key, record = Iterator.next(self) - self.key = self.state.update(key) - return self.key, record - # vim: tabstop=4 expandtab shiftwidth=4 diff -r 8795a00cfb26 -r d254badcda0a test_indexed.py --- a/test_indexed.py Sat Oct 01 20:49:15 2011 +0200 +++ b/test_indexed.py Sat Oct 01 22:06:03 2011 +0200 @@ -6,18 +6,17 @@ try: separator = sys.argv.index("--") filename, numeric, interval = sys.argv[1:4] - keys = map(int, sys.argv[4:separator]) - terms = groups(sys.argv[separator+1:], len(keys)) + fields = map(int, sys.argv[4:separator]) + terms = groups(sys.argv[separator+1:], len(fields)) except (IndexError, ValueError): - print >>sys.stderr, "Usage: %s ... -- ..." % sys.argv[0] + print >>sys.stderr, "Usage: %s ... -- ..." % sys.argv[0] sys.exit(1) f = open(filename) -accessor = DelimitedRecord(keys, converter=(numeric == "true" and ConvertNumeric() or None)) +converters = [(numeric == "true" and int or None) for field in fields] +accessor = Converted(DelimitedRecord(fields), converters) reader = TextFile(f, Iterator(accessor)) -print "Sort command:", accessor.get_sort_command() - try: t = time.time() l = make_index(reader, int(interval)) diff -r 8795a00cfb26 -r d254badcda0a test_scan.py --- a/test_scan.py Sat Oct 01 20:49:15 2011 +0200 +++ b/test_scan.py Sat Oct 01 22:06:03 2011 +0200 @@ -6,18 +6,17 @@ try: separator = sys.argv.index("--") filename, numeric = sys.argv[1:3] - keys = map(int, sys.argv[3:separator]) - terms = groups(sys.argv[separator+1:], len(keys)) + fields = map(int, sys.argv[3:separator]) + terms = groups(sys.argv[separator+1:], len(fields)) except (IndexError, ValueError): - print >>sys.stderr, "Usage: %s ... -- ..." % sys.argv[0] + print >>sys.stderr, "Usage: %s ... -- ..." % sys.argv[0] sys.exit(1) f = open(filename) -accessor = DelimitedRecord(keys, converter=(numeric == "true" and ConvertNumeric() or None)) +converters = [(numeric == "true" and int or None) for field in fields] +accessor = Converted(DelimitedRecord(fields), converters) reader = TextFile(f, Iterator(accessor)) -print "Sort command:", accessor.get_sort_command() - try: for term in terms: reader.seek(0)