1.1 --- a/imiptools/text.py Thu May 12 17:51:10 2016 +0200
1.2 +++ b/imiptools/text.py Thu May 12 22:58:57 2016 +0200
1.3 @@ -3,7 +3,7 @@
1.4 """
1.5 Parsing of textual content.
1.6
1.7 -Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
1.8 +Copyright (C) 2014, 2015, 2016 Paul Boddie <paul@boddie.org.uk>
1.9
1.10 This program is free software; you can redistribute it and/or modify it under
1.11 the terms of the GNU General Public License as published by the Free Software
1.12 @@ -19,6 +19,7 @@
1.13 this program. If not, see <http://www.gnu.org/licenses/>.
1.14 """
1.15
1.16 +import codecs
1.17 import re
1.18
1.19 # Parsing of lines to obtain functions and arguments.
1.20 @@ -53,4 +54,72 @@
1.21
1.22 return parts
1.23
1.24 +# Parsing of tabular files.
1.25 +
1.26 +def set_defaults(t, empty_defaults):
1.27 +
1.28 + """
1.29 + In the list 't', replace values that are empty or absent with defaults
1.30 + provided by the 'empty_defaults' collection whose entries are of the form
1.31 + (index, value).
1.32 + """
1.33 +
1.34 + for i, default in empty_defaults:
1.35 + if i >= len(t):
1.36 + t += [None] * (i - len(t) + 1)
1.37 + if not t[i]:
1.38 + t[i] = default
1.39 + return t
1.40 +
1.41 +def get_table(filename, empty_defaults=None, tab_separated=True):
1.42 +
1.43 + """
1.44 + From the file having the given 'filename', return a list of tuples
1.45 + representing the file's contents.
1.46 +
1.47 + The 'empty_defaults' is a list of (index, value) tuples indicating the
1.48 + default value where a column either does not exist or provides an empty
1.49 + value.
1.50 +
1.51 + If 'tab_separated' is specified and is a false value, line parsing using
1.52 + the imiptools.text.parse_line function will be performed instead of
1.53 + splitting each line of the file using tab characters as separators.
1.54 + """
1.55 +
1.56 + f = codecs.open(filename, "rb", encoding="utf-8")
1.57 + try:
1.58 + return get_table_from_stream(f, empty_defaults, tab_separated)
1.59 + finally:
1.60 + f.close()
1.61 +
1.62 +def get_table_from_stream(f, empty_defaults=None, tab_separated=True):
1.63 +
1.64 + """
1.65 + Return a list of tuples representing the contents of the stream 'f'.
1.66 +
1.67 + The 'empty_defaults' is a list of (index, value) tuples indicating the
1.68 + default value where a column either does not exist or provides an empty
1.69 + value.
1.70 +
1.71 + If 'tab_separated' is specified and is a false value, line parsing using
1.72 + the imiptools.text.parse_line function will be performed instead of
1.73 + splitting each line of the file using tab characters as separators.
1.74 + """
1.75 +
1.76 + l = []
1.77 +
1.78 + for line in f.readlines():
1.79 + line = line.strip(" \r\n")
1.80 +
1.81 + if tab_separated:
1.82 + t = line.split("\t")
1.83 + else:
1.84 + t = parse_line(line)
1.85 +
1.86 + if empty_defaults:
1.87 + t = set_defaults(t, empty_defaults)
1.88 + l.append(tuple(t))
1.89 +
1.90 + return l
1.91 +
1.92 # vim: tabstop=4 expandtab shiftwidth=4