1 #!/usr/bin/env python 2 3 """ 4 A regular expression implementation of SQL statement tokenisation. 5 6 Copyright (C) 2007 Paul Boddie <paul@boddie.org.uk> 7 8 This library is free software; you can redistribute it and/or 9 modify it under the terms of the GNU Lesser General Public 10 License as published by the Free Software Foundation; either 11 version 2.1 of the License, or (at your option) any later version. 12 13 This library is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 Lesser General Public License for more details. 17 18 You should have received a copy of the GNU Lesser General Public 19 License along with this library; if not, write to the Free Software 20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA 21 """ 22 23 import re 24 25 region_expr = re.compile("(?P<non_literal>[^']+)|(?P<literal>'(?:[^']|(?:''))*')") 26 27 def parseString(s): 28 29 """ 30 Parse the string 's' and return a list of regions, with the first element 31 being a non-literal region, the next element being a literal region, and 32 with subsequent elements repeating this pattern. Note that the first region 33 may be an empty string. 34 """ 35 36 regions = [] 37 first = 1 38 for match in region_expr.finditer(s): 39 non_literal, literal = match.groups() 40 if first and literal: 41 regions.append("") 42 if non_literal: 43 regions.append(non_literal) 44 elif literal: 45 regions.append(literal) 46 first = 0 47 48 return regions 49 50 # vim: tabstop=4 expandtab shiftwidth=4