1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/imiptools/text.py Sun Jan 31 00:45:26 2016 +0100
1.3 @@ -0,0 +1,56 @@
1.4 +#!/usr/bin/env python
1.5 +
1.6 +"""
1.7 +Parsing of textual content.
1.8 +
1.9 +Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk>
1.10 +
1.11 +This program is free software; you can redistribute it and/or modify it under
1.12 +the terms of the GNU General Public License as published by the Free Software
1.13 +Foundation; either version 3 of the License, or (at your option) any later
1.14 +version.
1.15 +
1.16 +This program is distributed in the hope that it will be useful, but WITHOUT
1.17 +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1.18 +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
1.19 +details.
1.20 +
1.21 +You should have received a copy of the GNU General Public License along with
1.22 +this program. If not, see <http://www.gnu.org/licenses/>.
1.23 +"""
1.24 +
1.25 +import re
1.26 +
1.27 +# Parsing of lines to obtain functions and arguments.
1.28 +
1.29 +line_pattern_str = r"(?:" \
1.30 + r"(?:'(.*?)')" \
1.31 + r"|" \
1.32 + r'(?:"(.*?)")' \
1.33 + r"|" \
1.34 + r"([^\s]+)" \
1.35 + r")+" \
1.36 + r"(?:\s+|$)"
1.37 +line_pattern = re.compile(line_pattern_str)
1.38 +
1.39 +def parse_line(text):
1.40 +
1.41 + """
1.42 + Parse the given 'text', returning a list of words separated by whitespace in
1.43 + the input, where whitespace may occur inside words if quoted using single or
1.44 + double quotes.
1.45 + """
1.46 +
1.47 + parts = []
1.48 +
1.49 + # Match the components of each part.
1.50 +
1.51 + for match in line_pattern.finditer(text):
1.52 +
1.53 + # Combine the components by traversing the matching groups.
1.54 +
1.55 + parts.append(reduce(lambda a, b: (a or "") + (b or ""), match.groups()))
1.56 +
1.57 + return parts
1.58 +
1.59 +# vim: tabstop=4 expandtab shiftwidth=4