1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/TokenSupport.py Fri Dec 20 23:25:44 2013 +0100
1.3 @@ -0,0 +1,109 @@
1.4 +# -*- coding: iso-8859-1 -*-
1.5 +"""
1.6 + MoinMoin - TokenSupport library
1.7 +
1.8 + @copyright: 2013 by Paul Boddie <paul@boddie.org.uk>
1.9 + @license: GNU GPL (v2 or later), see COPYING.txt for details.
1.10 +"""
1.11 +
1.12 +import re
1.13 +
1.14 +identifier_expr = re.compile(
1.15 + """(?P<non_literal>[^'" ]+)"""
1.16 + "|"
1.17 + "(?P<spaces> +)"
1.18 + "|"
1.19 + "(?P<literal1>'[^']*')"
1.20 + "|"
1.21 + '(?P<literal2>"[^"]*")'
1.22 + )
1.23 +
1.24 +def getIdentifiers(s, doubling=False):
1.25 +
1.26 + """
1.27 + Return 's' containing space-separated quoted identifiers, parsed into
1.28 + regions that hold the individual identifiers. The optional 'doubling'
1.29 + argument can be used to support convenient quote doubling to reproduce
1.30 + single quote characters.
1.31 +
1.32 + Quoting of identifiers can be done using the single-quote and double-quote
1.33 + characters in order to include spaces within identifiers. For example:
1.34 +
1.35 + 'contains space'
1.36 + -> contains space (a single identifier)
1.37 +
1.38 + Where one kind of quote (or apostrophe) is to be included in an identifier,
1.39 + the other quoting character can be used to delimit the identifier. For
1.40 + example:
1.41 +
1.42 + "Python's syntax"
1.43 + -> Python's syntax (a single identifier)
1.44 +
1.45 + Where the 'doubling' argument is set to a true value, a quote character can
1.46 + be doubled to include it in an identifier. For example:
1.47 +
1.48 + Python''s syntax
1.49 + -> Python's syntax (a single identifier)
1.50 +
1.51 + Where a mixture of quotes is required in a single identifier, adjacent
1.52 + quoted regions can be used. For example:
1.53 +
1.54 + "Python's "'"intuitive" syntax'
1.55 + -> "Python's " (region #1)
1.56 + + '"intuitive" syntax' (region #2)
1.57 + -> Python's "intuitive" syntax (a single identifier)
1.58 +
1.59 + Where unquoted regions are adjacent to quoted regions, the regions are
1.60 + combined. For example:
1.61 +
1.62 + "Python's "intuitive" syntax"
1.63 + -> "Python's " (region #1)
1.64 + + intuitive (region #2)
1.65 + + " syntax" (region #3)
1.66 + -> Python's intuitive syntax (a single identifier)
1.67 + """
1.68 +
1.69 + regions = []
1.70 + in_literal = False
1.71 +
1.72 + for match in identifier_expr.finditer(s):
1.73 + non_literal, spaces, literal1, literal2 = match.groups()
1.74 +
1.75 + identifier = None
1.76 +
1.77 + # Spaces prevent continuation of identifier regions.
1.78 +
1.79 + if spaces:
1.80 + in_literal = False
1.81 +
1.82 + # Unquoted regions contribute to the current identifier.
1.83 +
1.84 + if non_literal and non_literal.strip():
1.85 + identifier = non_literal.strip()
1.86 +
1.87 + # Quoted regions also contribute to the current identifier.
1.88 +
1.89 + for s in (literal1, literal2):
1.90 + if s is not None:
1.91 +
1.92 + # Either strip the quoting or for empty regions, adopt the
1.93 + # quote character.
1.94 +
1.95 + if not doubling or len(s) > 2:
1.96 + identifier = s[1:-1]
1.97 + elif doubling:
1.98 + identifier = s[0]
1.99 +
1.100 + # Either continue or add an identifier, and indicate possible
1.101 + # continuation.
1.102 +
1.103 + if identifier:
1.104 + if in_literal:
1.105 + regions[-1] += identifier
1.106 + else:
1.107 + regions.append(identifier)
1.108 + in_literal = True
1.109 +
1.110 + return regions
1.111 +
1.112 +# vim: tabstop=4 expandtab shiftwidth=4