paul@103 | 1 | # -*- coding: iso-8859-1 -*- |
paul@103 | 2 | """ |
paul@103 | 3 | MoinMoin - TokenSupport library |
paul@103 | 4 | |
paul@103 | 5 | @copyright: 2013 by Paul Boddie <paul@boddie.org.uk> |
paul@103 | 6 | @license: GNU GPL (v2 or later), see COPYING.txt for details. |
paul@103 | 7 | """ |
paul@103 | 8 | |
paul@103 | 9 | import re |
paul@103 | 10 | |
paul@103 | 11 | identifier_expr = re.compile( |
paul@103 | 12 | """(?P<non_literal>[^'" ]+)""" |
paul@103 | 13 | "|" |
paul@103 | 14 | "(?P<spaces> +)" |
paul@103 | 15 | "|" |
paul@103 | 16 | "(?P<literal1>'[^']*')" |
paul@103 | 17 | "|" |
paul@103 | 18 | '(?P<literal2>"[^"]*")' |
paul@103 | 19 | ) |
paul@103 | 20 | |
paul@103 | 21 | def getIdentifiers(s, doubling=False): |
paul@103 | 22 | |
paul@103 | 23 | """ |
paul@103 | 24 | Return 's' containing space-separated quoted identifiers, parsed into |
paul@103 | 25 | regions that hold the individual identifiers. The optional 'doubling' |
paul@103 | 26 | argument can be used to support convenient quote doubling to reproduce |
paul@103 | 27 | single quote characters. |
paul@103 | 28 | |
paul@103 | 29 | Quoting of identifiers can be done using the single-quote and double-quote |
paul@103 | 30 | characters in order to include spaces within identifiers. For example: |
paul@103 | 31 | |
paul@103 | 32 | 'contains space' |
paul@103 | 33 | -> contains space (a single identifier) |
paul@103 | 34 | |
paul@103 | 35 | Where one kind of quote (or apostrophe) is to be included in an identifier, |
paul@103 | 36 | the other quoting character can be used to delimit the identifier. For |
paul@103 | 37 | example: |
paul@103 | 38 | |
paul@103 | 39 | "Python's syntax" |
paul@103 | 40 | -> Python's syntax (a single identifier) |
paul@103 | 41 | |
paul@103 | 42 | Where the 'doubling' argument is set to a true value, a quote character can |
paul@103 | 43 | be doubled to include it in an identifier. For example: |
paul@103 | 44 | |
paul@103 | 45 | Python''s syntax |
paul@103 | 46 | -> Python's syntax (a single identifier) |
paul@103 | 47 | |
paul@103 | 48 | Where a mixture of quotes is required in a single identifier, adjacent |
paul@103 | 49 | quoted regions can be used. For example: |
paul@103 | 50 | |
paul@103 | 51 | "Python's "'"intuitive" syntax' |
paul@103 | 52 | -> "Python's " (region #1) |
paul@103 | 53 | + '"intuitive" syntax' (region #2) |
paul@103 | 54 | -> Python's "intuitive" syntax (a single identifier) |
paul@103 | 55 | |
paul@103 | 56 | Where unquoted regions are adjacent to quoted regions, the regions are |
paul@103 | 57 | combined. For example: |
paul@103 | 58 | |
paul@103 | 59 | "Python's "intuitive" syntax" |
paul@103 | 60 | -> "Python's " (region #1) |
paul@103 | 61 | + intuitive (region #2) |
paul@103 | 62 | + " syntax" (region #3) |
paul@103 | 63 | -> Python's intuitive syntax (a single identifier) |
paul@103 | 64 | """ |
paul@103 | 65 | |
paul@103 | 66 | regions = [] |
paul@103 | 67 | in_literal = False |
paul@103 | 68 | |
paul@103 | 69 | for match in identifier_expr.finditer(s): |
paul@103 | 70 | non_literal, spaces, literal1, literal2 = match.groups() |
paul@103 | 71 | |
paul@103 | 72 | identifier = None |
paul@103 | 73 | |
paul@103 | 74 | # Spaces prevent continuation of identifier regions. |
paul@103 | 75 | |
paul@103 | 76 | if spaces: |
paul@103 | 77 | in_literal = False |
paul@103 | 78 | |
paul@103 | 79 | # Unquoted regions contribute to the current identifier. |
paul@103 | 80 | |
paul@103 | 81 | if non_literal and non_literal.strip(): |
paul@103 | 82 | identifier = non_literal.strip() |
paul@103 | 83 | |
paul@103 | 84 | # Quoted regions also contribute to the current identifier. |
paul@103 | 85 | |
paul@103 | 86 | for s in (literal1, literal2): |
paul@103 | 87 | if s is not None: |
paul@103 | 88 | |
paul@103 | 89 | # Either strip the quoting or for empty regions, adopt the |
paul@103 | 90 | # quote character. |
paul@103 | 91 | |
paul@103 | 92 | if not doubling or len(s) > 2: |
paul@103 | 93 | identifier = s[1:-1] |
paul@103 | 94 | elif doubling: |
paul@103 | 95 | identifier = s[0] |
paul@103 | 96 | |
paul@103 | 97 | # Either continue or add an identifier, and indicate possible |
paul@103 | 98 | # continuation. |
paul@103 | 99 | |
paul@103 | 100 | if identifier: |
paul@103 | 101 | if in_literal: |
paul@103 | 102 | regions[-1] += identifier |
paul@103 | 103 | else: |
paul@103 | 104 | regions.append(identifier) |
paul@103 | 105 | in_literal = True |
paul@103 | 106 | |
paul@103 | 107 | return regions |
paul@103 | 108 | |
paul@103 | 109 | # vim: tabstop=4 expandtab shiftwidth=4 |