1 #!/usr/bin/env python 2 3 """ 4 Parsing of textual content. 5 6 Copyright (C) 2016 Paul Boddie <paul@boddie.org.uk> 7 8 This program is free software; you can redistribute it and/or modify it under 9 the terms of the GNU General Public License as published by the Free Software 10 Foundation; either version 3 of the License, or (at your option) any later 11 version. 12 13 This program is distributed in the hope that it will be useful, but WITHOUT 14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 15 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more 16 details. 17 18 You should have received a copy of the GNU General Public License along with 19 this program. If not, see <http://www.gnu.org/licenses/>. 20 """ 21 22 import re 23 24 # Parsing of lines to obtain functions and arguments. 25 26 line_pattern_str = r"(?:" \ 27 r"(?:'(.*?)')" \ 28 r"|" \ 29 r'(?:"(.*?)")' \ 30 r"|" \ 31 r"([^\s]+)" \ 32 r")+" \ 33 r"(?:\s+|$)" 34 line_pattern = re.compile(line_pattern_str) 35 36 def parse_line(text): 37 38 """ 39 Parse the given 'text', returning a list of words separated by whitespace in 40 the input, where whitespace may occur inside words if quoted using single or 41 double quotes. 42 """ 43 44 parts = [] 45 46 # Match the components of each part. 47 48 for match in line_pattern.finditer(text): 49 50 # Combine the components by traversing the matching groups. 51 52 parts.append(reduce(lambda a, b: (a or "") + (b or ""), match.groups())) 53 54 return parts 55 56 # vim: tabstop=4 expandtab shiftwidth=4