Lichen

lib/sre_constants.py

90:c7ddfc4525da
2016-10-08 Paul Boddie Added some support for eliminating accessor class types where the provided attributes are invoked and are unbound methods. This uses a more sophisticated method involving usage observations that incorporate invocation information, permitting classes as accessors if paths through the code support them, even if other paths require instances as accessors to invoke methods.
     1 #     2 # Secret Labs' Regular Expression Engine     3 #     4 # various symbols used by the regular expression engine.     5 #     6 # Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.     7 #     8 # See the sre.py file for information on usage and redistribution.     9 #    10     11 """Internal support module for sre"""    12     13 # update when constants are added or removed    14     15 MAGIC = 20031017    16     17 # max code word in this release    18     19 MAXREPEAT = 65535    20     21 # SRE standard exception (access as sre.error)    22 # should this really be here?    23     24 class error(Exception):    25     pass    26     27 # operators    28     29 FAILURE = "failure"    30 SUCCESS = "success"    31     32 ANY = "any"    33 ANY_ALL = "any_all"    34 ASSERT = "assert"    35 ASSERT_NOT = "assert_not"    36 AT = "at"    37 BIGCHARSET = "bigcharset"    38 BRANCH = "branch"    39 CALL = "call"    40 CATEGORY = "category"    41 CHARSET = "charset"    42 GROUPREF = "groupref"    43 GROUPREF_IGNORE = "groupref_ignore"    44 GROUPREF_EXISTS = "groupref_exists"    45 IN = "in"    46 IN_IGNORE = "in_ignore"    47 INFO = "info"    48 JUMP = "jump"    49 LITERAL = "literal"    50 LITERAL_IGNORE = "literal_ignore"    51 MARK = "mark"    52 MAX_REPEAT = "max_repeat"    53 MAX_UNTIL = "max_until"    54 MIN_REPEAT = "min_repeat"    55 MIN_UNTIL = "min_until"    56 NEGATE = "negate"    57 NOT_LITERAL = "not_literal"    58 NOT_LITERAL_IGNORE = "not_literal_ignore"    59 RANGE = "range"    60 REPEAT = "repeat"    61 REPEAT_ONE = "repeat_one"    62 SUBPATTERN = "subpattern"    63 MIN_REPEAT_ONE = "min_repeat_one"    64     65 # positions    66 AT_BEGINNING = "at_beginning"    67 AT_BEGINNING_LINE = "at_beginning_line"    68 AT_BEGINNING_STRING = "at_beginning_string"    69 AT_BOUNDARY = "at_boundary"    70 AT_NON_BOUNDARY = "at_non_boundary"    71 AT_END = "at_end"    72 AT_END_LINE = "at_end_line"    73 AT_END_STRING = "at_end_string"    74 AT_LOC_BOUNDARY = "at_loc_boundary"    75 AT_LOC_NON_BOUNDARY = "at_loc_non_boundary"    76 AT_UNI_BOUNDARY = "at_uni_boundary"    77 AT_UNI_NON_BOUNDARY = "at_uni_non_boundary"    78     79 # categories    80 CATEGORY_DIGIT = "category_digit"    81 CATEGORY_NOT_DIGIT = "category_not_digit"    82 CATEGORY_SPACE = "category_space"    83 CATEGORY_NOT_SPACE = "category_not_space"    84 CATEGORY_WORD = "category_word"    85 CATEGORY_NOT_WORD = "category_not_word"    86 CATEGORY_LINEBREAK = "category_linebreak"    87 CATEGORY_NOT_LINEBREAK = "category_not_linebreak"    88 CATEGORY_LOC_WORD = "category_loc_word"    89 CATEGORY_LOC_NOT_WORD = "category_loc_not_word"    90 CATEGORY_UNI_DIGIT = "category_uni_digit"    91 CATEGORY_UNI_NOT_DIGIT = "category_uni_not_digit"    92 CATEGORY_UNI_SPACE = "category_uni_space"    93 CATEGORY_UNI_NOT_SPACE = "category_uni_not_space"    94 CATEGORY_UNI_WORD = "category_uni_word"    95 CATEGORY_UNI_NOT_WORD = "category_uni_not_word"    96 CATEGORY_UNI_LINEBREAK = "category_uni_linebreak"    97 CATEGORY_UNI_NOT_LINEBREAK = "category_uni_not_linebreak"    98     99 OPCODES = [   100    101     # failure=0 success=1 (just because it looks better that way :-)   102     FAILURE, SUCCESS,   103    104     ANY, ANY_ALL,   105     ASSERT, ASSERT_NOT,   106     AT,   107     BRANCH,   108     CALL,   109     CATEGORY,   110     CHARSET, BIGCHARSET,   111     GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE,   112     IN, IN_IGNORE,   113     INFO,   114     JUMP,   115     LITERAL, LITERAL_IGNORE,   116     MARK,   117     MAX_UNTIL,   118     MIN_UNTIL,   119     NOT_LITERAL, NOT_LITERAL_IGNORE,   120     NEGATE,   121     RANGE,   122     REPEAT,   123     REPEAT_ONE,   124     SUBPATTERN,   125     MIN_REPEAT_ONE   126    127 ]   128    129 ATCODES = [   130     AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY,   131     AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING,   132     AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY,   133     AT_UNI_NON_BOUNDARY   134 ]   135    136 CHCODES = [   137     CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,   138     CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,   139     CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD,   140     CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT,   141     CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD,   142     CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK,   143     CATEGORY_UNI_NOT_LINEBREAK   144 ]   145    146 def makedict(list):   147     d = {}   148     i = 0   149     for item in list:   150         d[item] = i   151         i = i + 1   152     return d   153    154 OPCODES = makedict(OPCODES)   155 ATCODES = makedict(ATCODES)   156 CHCODES = makedict(CHCODES)   157    158 # replacement operations for "ignore case" mode   159 OP_IGNORE = {   160     GROUPREF: GROUPREF_IGNORE,   161     IN: IN_IGNORE,   162     LITERAL: LITERAL_IGNORE,   163     NOT_LITERAL: NOT_LITERAL_IGNORE   164 }   165    166 AT_MULTILINE = {   167     AT_BEGINNING: AT_BEGINNING_LINE,   168     AT_END: AT_END_LINE   169 }   170    171 AT_LOCALE = {   172     AT_BOUNDARY: AT_LOC_BOUNDARY,   173     AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY   174 }   175    176 AT_UNICODE = {   177     AT_BOUNDARY: AT_UNI_BOUNDARY,   178     AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY   179 }   180    181 CH_LOCALE = {   182     CATEGORY_DIGIT: CATEGORY_DIGIT,   183     CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,   184     CATEGORY_SPACE: CATEGORY_SPACE,   185     CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,   186     CATEGORY_WORD: CATEGORY_LOC_WORD,   187     CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,   188     CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,   189     CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK   190 }   191    192 CH_UNICODE = {   193     CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,   194     CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,   195     CATEGORY_SPACE: CATEGORY_UNI_SPACE,   196     CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,   197     CATEGORY_WORD: CATEGORY_UNI_WORD,   198     CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,   199     CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,   200     CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK   201 }   202    203 # flags   204 SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)   205 SRE_FLAG_IGNORECASE = 2 # case insensitive   206 SRE_FLAG_LOCALE = 4 # honour system locale   207 SRE_FLAG_MULTILINE = 8 # treat target as multiline string   208 SRE_FLAG_DOTALL = 16 # treat target as a single string   209 SRE_FLAG_UNICODE = 32 # use unicode locale   210 SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments   211 SRE_FLAG_DEBUG = 128 # debugging   212    213 # flags for INFO primitive   214 SRE_INFO_PREFIX = 1 # has prefix   215 SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)   216 SRE_INFO_CHARSET = 4 # pattern starts with character from given set