1 # -*- coding: iso-8859-1 -*- 2 """ 3 MoinMoin - TokenSupport library 4 5 @copyright: 2013 by Paul Boddie <paul@boddie.org.uk> 6 @license: GNU GPL (v2 or later), see COPYING.txt for details. 7 """ 8 9 import re 10 11 identifier_expr = re.compile( 12 """(?P<non_literal>[^'" ]+)""" 13 "|" 14 "(?P<spaces> +)" 15 "|" 16 "(?P<literal1>'[^']*')" 17 "|" 18 '(?P<literal2>"[^"]*")' 19 ) 20 21 def getIdentifiers(s, doubling=False): 22 23 """ 24 Return 's' containing space-separated quoted identifiers, parsed into 25 regions that hold the individual identifiers. The optional 'doubling' 26 argument can be used to support convenient quote doubling to reproduce 27 single quote characters. 28 29 Quoting of identifiers can be done using the single-quote and double-quote 30 characters in order to include spaces within identifiers. For example: 31 32 'contains space' 33 -> contains space (a single identifier) 34 35 Where one kind of quote (or apostrophe) is to be included in an identifier, 36 the other quoting character can be used to delimit the identifier. For 37 example: 38 39 "Python's syntax" 40 -> Python's syntax (a single identifier) 41 42 Where the 'doubling' argument is set to a true value, a quote character can 43 be doubled to include it in an identifier. For example: 44 45 Python''s syntax 46 -> Python's syntax (a single identifier) 47 48 Where a mixture of quotes is required in a single identifier, adjacent 49 quoted regions can be used. For example: 50 51 "Python's "'"intuitive" syntax' 52 -> "Python's " (region #1) 53 + '"intuitive" syntax' (region #2) 54 -> Python's "intuitive" syntax (a single identifier) 55 56 Where unquoted regions are adjacent to quoted regions, the regions are 57 combined. For example: 58 59 "Python's "intuitive" syntax" 60 -> "Python's " (region #1) 61 + intuitive (region #2) 62 + " syntax" (region #3) 63 -> Python's intuitive syntax (a single identifier) 64 """ 65 66 regions = [] 67 in_literal = False 68 69 for match in identifier_expr.finditer(s): 70 non_literal, spaces, literal1, literal2 = match.groups() 71 72 identifier = None 73 74 # Spaces prevent continuation of identifier regions. 75 76 if spaces: 77 in_literal = False 78 79 # Unquoted regions contribute to the current identifier. 80 81 if non_literal and non_literal.strip(): 82 identifier = non_literal.strip() 83 84 # Quoted regions also contribute to the current identifier. 85 86 for s in (literal1, literal2): 87 if s is not None: 88 89 # Either strip the quoting or for empty regions, adopt the 90 # quote character. 91 92 if not doubling or len(s) > 2: 93 identifier = s[1:-1] 94 elif doubling: 95 identifier = s[0] 96 97 # Either continue or add an identifier, and indicate possible 98 # continuation. 99 100 if identifier: 101 if in_literal: 102 regions[-1] += identifier 103 else: 104 regions.append(identifier) 105 in_literal = True 106 107 return regions 108 109 # vim: tabstop=4 expandtab shiftwidth=4