Lichen

Annotated lib/sre_constants.py

6:f551873980e5
2016-08-30 Paul Boddie Added PythonLight alternative libraries.
paul@6 1
#
paul@6 2
# Secret Labs' Regular Expression Engine
paul@6 3
#
paul@6 4
# various symbols used by the regular expression engine.
paul@6 5
#
paul@6 6
# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
paul@6 7
#
paul@6 8
# See the sre.py file for information on usage and redistribution.
paul@6 9
#
paul@6 10
paul@6 11
"""Internal support module for sre"""
paul@6 12
paul@6 13
# update when constants are added or removed
paul@6 14
paul@6 15
MAGIC = 20031017
paul@6 16
paul@6 17
# max code word in this release
paul@6 18
paul@6 19
MAXREPEAT = 65535
paul@6 20
paul@6 21
# SRE standard exception (access as sre.error)
paul@6 22
# should this really be here?
paul@6 23
paul@6 24
class error(Exception):
paul@6 25
    pass
paul@6 26
paul@6 27
# operators
paul@6 28
paul@6 29
FAILURE = "failure"
paul@6 30
SUCCESS = "success"
paul@6 31
paul@6 32
ANY = "any"
paul@6 33
ANY_ALL = "any_all"
paul@6 34
ASSERT = "assert"
paul@6 35
ASSERT_NOT = "assert_not"
paul@6 36
AT = "at"
paul@6 37
BIGCHARSET = "bigcharset"
paul@6 38
BRANCH = "branch"
paul@6 39
CALL = "call"
paul@6 40
CATEGORY = "category"
paul@6 41
CHARSET = "charset"
paul@6 42
GROUPREF = "groupref"
paul@6 43
GROUPREF_IGNORE = "groupref_ignore"
paul@6 44
GROUPREF_EXISTS = "groupref_exists"
paul@6 45
IN = "in"
paul@6 46
IN_IGNORE = "in_ignore"
paul@6 47
INFO = "info"
paul@6 48
JUMP = "jump"
paul@6 49
LITERAL = "literal"
paul@6 50
LITERAL_IGNORE = "literal_ignore"
paul@6 51
MARK = "mark"
paul@6 52
MAX_REPEAT = "max_repeat"
paul@6 53
MAX_UNTIL = "max_until"
paul@6 54
MIN_REPEAT = "min_repeat"
paul@6 55
MIN_UNTIL = "min_until"
paul@6 56
NEGATE = "negate"
paul@6 57
NOT_LITERAL = "not_literal"
paul@6 58
NOT_LITERAL_IGNORE = "not_literal_ignore"
paul@6 59
RANGE = "range"
paul@6 60
REPEAT = "repeat"
paul@6 61
REPEAT_ONE = "repeat_one"
paul@6 62
SUBPATTERN = "subpattern"
paul@6 63
MIN_REPEAT_ONE = "min_repeat_one"
paul@6 64
paul@6 65
# positions
paul@6 66
AT_BEGINNING = "at_beginning"
paul@6 67
AT_BEGINNING_LINE = "at_beginning_line"
paul@6 68
AT_BEGINNING_STRING = "at_beginning_string"
paul@6 69
AT_BOUNDARY = "at_boundary"
paul@6 70
AT_NON_BOUNDARY = "at_non_boundary"
paul@6 71
AT_END = "at_end"
paul@6 72
AT_END_LINE = "at_end_line"
paul@6 73
AT_END_STRING = "at_end_string"
paul@6 74
AT_LOC_BOUNDARY = "at_loc_boundary"
paul@6 75
AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"
paul@6 76
AT_UNI_BOUNDARY = "at_uni_boundary"
paul@6 77
AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"
paul@6 78
paul@6 79
# categories
paul@6 80
CATEGORY_DIGIT = "category_digit"
paul@6 81
CATEGORY_NOT_DIGIT = "category_not_digit"
paul@6 82
CATEGORY_SPACE = "category_space"
paul@6 83
CATEGORY_NOT_SPACE = "category_not_space"
paul@6 84
CATEGORY_WORD = "category_word"
paul@6 85
CATEGORY_NOT_WORD = "category_not_word"
paul@6 86
CATEGORY_LINEBREAK = "category_linebreak"
paul@6 87
CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
paul@6 88
CATEGORY_LOC_WORD = "category_loc_word"
paul@6 89
CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
paul@6 90
CATEGORY_UNI_DIGIT = "category_uni_digit"
paul@6 91
CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"
paul@6 92
CATEGORY_UNI_SPACE = "category_uni_space"
paul@6 93
CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"
paul@6 94
CATEGORY_UNI_WORD = "category_uni_word"
paul@6 95
CATEGORY_UNI_NOT_WORD = "category_uni_not_word"
paul@6 96
CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"
paul@6 97
CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"
paul@6 98
paul@6 99
OPCODES = [
paul@6 100
paul@6 101
    # failure=0 success=1 (just because it looks better that way :-)
paul@6 102
    FAILURE, SUCCESS,
paul@6 103
paul@6 104
    ANY, ANY_ALL,
paul@6 105
    ASSERT, ASSERT_NOT,
paul@6 106
    AT,
paul@6 107
    BRANCH,
paul@6 108
    CALL,
paul@6 109
    CATEGORY,
paul@6 110
    CHARSET, BIGCHARSET,
paul@6 111
    GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,
paul@6 112
    IN, IN_IGNORE,
paul@6 113
    INFO,
paul@6 114
    JUMP,
paul@6 115
    LITERAL, LITERAL_IGNORE,
paul@6 116
    MARK,
paul@6 117
    MAX_UNTIL,
paul@6 118
    MIN_UNTIL,
paul@6 119
    NOT_LITERAL, NOT_LITERAL_IGNORE,
paul@6 120
    NEGATE,
paul@6 121
    RANGE,
paul@6 122
    REPEAT,
paul@6 123
    REPEAT_ONE,
paul@6 124
    SUBPATTERN,
paul@6 125
    MIN_REPEAT_ONE
paul@6 126
paul@6 127
]
paul@6 128
paul@6 129
ATCODES = [
paul@6 130
    AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,
paul@6 131
    AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,
paul@6 132
    AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,
paul@6 133
    AT_UNI_NON_BOUNDARY
paul@6 134
]
paul@6 135
paul@6 136
CHCODES = [
paul@6 137
    CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
paul@6 138
    CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
paul@6 139
    CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,
paul@6 140
    CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,
paul@6 141
    CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,
paul@6 142
    CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,
paul@6 143
    CATEGORY_UNI_NOT_LINEBREAK
paul@6 144
]
paul@6 145
paul@6 146
def makedict(list):
paul@6 147
    d = {}
paul@6 148
    i = 0
paul@6 149
    for item in list:
paul@6 150
        d[item] = i
paul@6 151
        i = i + 1
paul@6 152
    return d
paul@6 153
paul@6 154
OPCODES = makedict(OPCODES)
paul@6 155
ATCODES = makedict(ATCODES)
paul@6 156
CHCODES = makedict(CHCODES)
paul@6 157
paul@6 158
# replacement operations for "ignore case" mode
paul@6 159
OP_IGNORE = {
paul@6 160
    GROUPREF: GROUPREF_IGNORE,
paul@6 161
    IN: IN_IGNORE,
paul@6 162
    LITERAL: LITERAL_IGNORE,
paul@6 163
    NOT_LITERAL: NOT_LITERAL_IGNORE
paul@6 164
}
paul@6 165
paul@6 166
AT_MULTILINE = {
paul@6 167
    AT_BEGINNING: AT_BEGINNING_LINE,
paul@6 168
    AT_END: AT_END_LINE
paul@6 169
}
paul@6 170
paul@6 171
AT_LOCALE = {
paul@6 172
    AT_BOUNDARY: AT_LOC_BOUNDARY,
paul@6 173
    AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
paul@6 174
}
paul@6 175
paul@6 176
AT_UNICODE = {
paul@6 177
    AT_BOUNDARY: AT_UNI_BOUNDARY,
paul@6 178
    AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
paul@6 179
}
paul@6 180
paul@6 181
CH_LOCALE = {
paul@6 182
    CATEGORY_DIGIT: CATEGORY_DIGIT,
paul@6 183
    CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
paul@6 184
    CATEGORY_SPACE: CATEGORY_SPACE,
paul@6 185
    CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
paul@6 186
    CATEGORY_WORD: CATEGORY_LOC_WORD,
paul@6 187
    CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
paul@6 188
    CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
paul@6 189
    CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
paul@6 190
}
paul@6 191
paul@6 192
CH_UNICODE = {
paul@6 193
    CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
paul@6 194
    CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
paul@6 195
    CATEGORY_SPACE: CATEGORY_UNI_SPACE,
paul@6 196
    CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
paul@6 197
    CATEGORY_WORD: CATEGORY_UNI_WORD,
paul@6 198
    CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
paul@6 199
    CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
paul@6 200
    CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
paul@6 201
}
paul@6 202
paul@6 203
# flags
paul@6 204
SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
paul@6 205
SRE_FLAG_IGNORECASE = 2 # case insensitive
paul@6 206
SRE_FLAG_LOCALE = 4 # honour system locale
paul@6 207
SRE_FLAG_MULTILINE = 8 # treat target as multiline string
paul@6 208
SRE_FLAG_DOTALL = 16 # treat target as a single string
paul@6 209
SRE_FLAG_UNICODE = 32 # use unicode locale
paul@6 210
SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
paul@6 211
SRE_FLAG_DEBUG = 128 # debugging
paul@6 212
paul@6 213
# flags for INFO primitive
paul@6 214
SRE_INFO_PREFIX = 1 # has prefix
paul@6 215
SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
paul@6 216
SRE_INFO_CHARSET = 4 # pattern starts with character from given set