[279] | 1 | /**********************************************************************
|
---|
| 2 | regposix.c - Onigmo (Oniguruma-mod) (regular expression library)
|
---|
| 3 | **********************************************************************/
|
---|
| 4 | /*-
|
---|
| 5 | * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
---|
[331] | 6 | * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
---|
[279] | 7 | * All rights reserved.
|
---|
| 8 | *
|
---|
| 9 | * Redistribution and use in source and binary forms, with or without
|
---|
| 10 | * modification, are permitted provided that the following conditions
|
---|
| 11 | * are met:
|
---|
| 12 | * 1. Redistributions of source code must retain the above copyright
|
---|
| 13 | * notice, this list of conditions and the following disclaimer.
|
---|
| 14 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
| 15 | * notice, this list of conditions and the following disclaimer in the
|
---|
| 16 | * documentation and/or other materials provided with the distribution.
|
---|
| 17 | *
|
---|
| 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
---|
| 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
| 20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
---|
| 21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
---|
| 22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
---|
| 23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
---|
| 24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
---|
| 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
---|
| 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
---|
| 27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
| 28 | * SUCH DAMAGE.
|
---|
| 29 | */
|
---|
| 30 |
|
---|
| 31 | #define regex_t onig_regex_t
|
---|
| 32 | #include "regint.h"
|
---|
| 33 | #undef regex_t
|
---|
[331] | 34 | #include "onigmoposix.h"
|
---|
[279] | 35 |
|
---|
| 36 | #define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
|
---|
| 37 | #define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
|
---|
| 38 |
|
---|
| 39 | /* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
|
---|
| 40 | #define ENC_STRING_LEN(enc,s,len) do { \
|
---|
| 41 | if (ONIGENC_MBC_MINLEN(enc) == 1) { \
|
---|
| 42 | UChar* tmps = (UChar* )(s); \
|
---|
| 43 | while (*tmps != 0) tmps++; \
|
---|
| 44 | len = (int )(tmps - (UChar* )(s)); \
|
---|
| 45 | } \
|
---|
| 46 | else { \
|
---|
| 47 | len = onigenc_str_bytelen_null(enc, (UChar* )s); \
|
---|
| 48 | } \
|
---|
| 49 | } while(0)
|
---|
| 50 |
|
---|
| 51 | typedef struct {
|
---|
| 52 | int onig_err;
|
---|
| 53 | int posix_err;
|
---|
| 54 | } O2PERR;
|
---|
| 55 |
|
---|
| 56 | static int
|
---|
| 57 | onig2posix_error_code(int code)
|
---|
| 58 | {
|
---|
| 59 | static const O2PERR o2p[] = {
|
---|
| 60 | { ONIG_MISMATCH, REG_NOMATCH },
|
---|
| 61 | { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
|
---|
| 62 | { ONIGERR_MEMORY, REG_ESPACE },
|
---|
| 63 | { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
|
---|
| 64 | { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
|
---|
| 65 | { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
|
---|
| 66 | { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
|
---|
| 67 | { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
|
---|
| 68 | { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
|
---|
| 69 | { ONIGERR_DEFAULT_ENCODING_IS_NOT_SET, REG_EONIG_BADARG },
|
---|
| 70 | { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
|
---|
| 71 | { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
|
---|
| 72 | { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
|
---|
| 73 | { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
|
---|
| 74 | { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
|
---|
| 75 | { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
|
---|
| 76 | { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
|
---|
| 77 | { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
|
---|
| 78 | { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
|
---|
| 79 | { ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
|
---|
| 80 | { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
|
---|
| 81 | { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
|
---|
| 82 | { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
|
---|
| 83 | { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
|
---|
| 84 | { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
|
---|
| 85 | { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
|
---|
| 86 | { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
|
---|
| 87 | { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
|
---|
| 88 | { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
|
---|
| 89 | { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
|
---|
| 90 | { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
|
---|
| 91 | { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
|
---|
| 92 | { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
|
---|
| 93 | { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
|
---|
| 94 | { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
|
---|
| 95 | { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
|
---|
| 96 | { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
|
---|
| 97 | { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
|
---|
| 98 | { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
|
---|
| 99 | { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
|
---|
| 100 | { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
|
---|
| 101 | { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
|
---|
| 102 | { ONIGERR_INVALID_BACKREF, REG_ESUBREG },
|
---|
| 103 | { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
|
---|
| 104 | { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
|
---|
| 105 | { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
|
---|
| 106 | { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC },
|
---|
| 107 | { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
|
---|
| 108 | { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
|
---|
| 109 | { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
|
---|
| 110 | { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
|
---|
| 111 | { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
|
---|
| 112 | { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
|
---|
| 113 | { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
|
---|
| 114 | { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
|
---|
| 115 | { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
|
---|
| 116 | { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
|
---|
| 117 | { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
|
---|
| 118 |
|
---|
| 119 | };
|
---|
| 120 |
|
---|
| 121 | int i;
|
---|
| 122 |
|
---|
| 123 | if (code >= 0) return 0;
|
---|
| 124 |
|
---|
| 125 | for (i = 0; i < numberof(o2p); i++) {
|
---|
| 126 | if (code == o2p[i].onig_err)
|
---|
| 127 | return o2p[i].posix_err;
|
---|
| 128 | }
|
---|
| 129 |
|
---|
| 130 | return REG_EONIG_INTERNAL; /* but, unknown error code */
|
---|
| 131 | }
|
---|
| 132 |
|
---|
| 133 | extern int
|
---|
| 134 | regcomp(regex_t* reg, const char* pattern, int posix_options)
|
---|
| 135 | {
|
---|
| 136 | int r, len;
|
---|
[331] | 137 | const OnigSyntaxType* syntax = OnigDefaultSyntax;
|
---|
[279] | 138 | OnigOptionType options;
|
---|
| 139 |
|
---|
| 140 | if ((posix_options & REG_EXTENDED) == 0)
|
---|
| 141 | syntax = ONIG_SYNTAX_POSIX_BASIC;
|
---|
| 142 |
|
---|
| 143 | options = syntax->options;
|
---|
| 144 | if ((posix_options & REG_ICASE) != 0)
|
---|
| 145 | ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
|
---|
| 146 | if ((posix_options & REG_NEWLINE) != 0) {
|
---|
| 147 | ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
|
---|
| 148 | ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
|
---|
| 149 | }
|
---|
| 150 |
|
---|
| 151 | reg->comp_options = posix_options;
|
---|
| 152 |
|
---|
| 153 | ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
|
---|
| 154 | r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
|
---|
| 155 | options, OnigEncDefaultCharEncoding, syntax,
|
---|
| 156 | (OnigErrorInfo* )NULL);
|
---|
| 157 | if (r != ONIG_NORMAL) {
|
---|
| 158 | return onig2posix_error_code(r);
|
---|
| 159 | }
|
---|
| 160 |
|
---|
| 161 | reg->re_nsub = ONIG_C(reg)->num_mem;
|
---|
| 162 | return 0;
|
---|
| 163 | }
|
---|
| 164 |
|
---|
| 165 | extern int
|
---|
| 166 | regexec(regex_t* reg, const char* str, size_t nmatch,
|
---|
| 167 | regmatch_t pmatch[], int posix_options)
|
---|
| 168 | {
|
---|
| 169 | int r, i, len;
|
---|
| 170 | UChar* end;
|
---|
[331] | 171 | OnigRegion* region = NULL;
|
---|
[279] | 172 | OnigOptionType options;
|
---|
| 173 |
|
---|
[331] | 174 | options = ONIG_OPTION_NONE;
|
---|
[279] | 175 | if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
|
---|
| 176 | if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
|
---|
| 177 |
|
---|
[331] | 178 | if ((reg->comp_options & REG_NOSUB) != 0) {
|
---|
[279] | 179 | nmatch = 0;
|
---|
| 180 | }
|
---|
[331] | 181 | else if (nmatch != 0) {
|
---|
| 182 | region = onig_region_new();
|
---|
| 183 | if (region == NULL)
|
---|
[279] | 184 | return REG_ESPACE;
|
---|
| 185 | }
|
---|
| 186 |
|
---|
| 187 | ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
|
---|
| 188 | end = (UChar* )(str + len);
|
---|
| 189 | r = (int )onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
|
---|
[331] | 190 | region, options);
|
---|
[279] | 191 |
|
---|
| 192 | if (r >= 0) {
|
---|
| 193 | r = 0; /* Match */
|
---|
[331] | 194 | for (i = 0; i < (int )nmatch; i++) {
|
---|
| 195 | pmatch[i].rm_so = (regoff_t )region->beg[i];
|
---|
| 196 | pmatch[i].rm_eo = (regoff_t )region->end[i];
|
---|
[279] | 197 | }
|
---|
| 198 | }
|
---|
| 199 | else if (r == ONIG_MISMATCH) {
|
---|
| 200 | r = REG_NOMATCH;
|
---|
| 201 | for (i = 0; i < (int )nmatch; i++)
|
---|
| 202 | pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
|
---|
| 203 | }
|
---|
| 204 | else {
|
---|
| 205 | r = onig2posix_error_code(r);
|
---|
| 206 | }
|
---|
| 207 |
|
---|
[331] | 208 | if (region != NULL)
|
---|
| 209 | onig_region_free(region, 1);
|
---|
[279] | 210 |
|
---|
| 211 | #if 0
|
---|
| 212 | if (reg->re_nsub > nmatch - 1)
|
---|
| 213 | reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
|
---|
| 214 | #endif
|
---|
| 215 |
|
---|
| 216 | return r;
|
---|
| 217 | }
|
---|
| 218 |
|
---|
| 219 | extern void
|
---|
| 220 | regfree(regex_t* reg)
|
---|
| 221 | {
|
---|
| 222 | onig_free(ONIG_C(reg));
|
---|
| 223 | }
|
---|
| 224 |
|
---|
| 225 |
|
---|
| 226 | extern void
|
---|
| 227 | reg_set_encoding(int mb_code)
|
---|
| 228 | {
|
---|
| 229 | OnigEncoding enc;
|
---|
| 230 |
|
---|
| 231 | switch (mb_code) {
|
---|
| 232 | case REG_POSIX_ENCODING_ASCII:
|
---|
| 233 | enc = ONIG_ENCODING_ASCII;
|
---|
| 234 | break;
|
---|
| 235 | case REG_POSIX_ENCODING_EUC_JP:
|
---|
| 236 | enc = ONIG_ENCODING_EUC_JP;
|
---|
| 237 | break;
|
---|
| 238 | case REG_POSIX_ENCODING_SJIS:
|
---|
| 239 | enc = ONIG_ENCODING_SJIS;
|
---|
| 240 | break;
|
---|
| 241 | case REG_POSIX_ENCODING_UTF8:
|
---|
| 242 | enc = ONIG_ENCODING_UTF8;
|
---|
| 243 | break;
|
---|
| 244 | case REG_POSIX_ENCODING_UTF16_BE:
|
---|
| 245 | enc = ONIG_ENCODING_UTF16_BE;
|
---|
| 246 | break;
|
---|
| 247 | case REG_POSIX_ENCODING_UTF16_LE:
|
---|
| 248 | enc = ONIG_ENCODING_UTF16_LE;
|
---|
| 249 | break;
|
---|
[331] | 250 |
|
---|
[279] | 251 | default:
|
---|
| 252 | return ;
|
---|
| 253 | break;
|
---|
| 254 | }
|
---|
| 255 |
|
---|
| 256 | onigenc_set_default_encoding(enc);
|
---|
| 257 | }
|
---|
| 258 |
|
---|
| 259 | extern int
|
---|
| 260 | reg_name_to_group_numbers(regex_t* reg,
|
---|
| 261 | const unsigned char* name, const unsigned char* name_end, int** nums)
|
---|
| 262 | {
|
---|
| 263 | return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
|
---|
| 264 | }
|
---|
| 265 |
|
---|
| 266 | typedef struct {
|
---|
| 267 | int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
|
---|
| 268 | regex_t* reg;
|
---|
| 269 | void* arg;
|
---|
| 270 | } i_wrap;
|
---|
| 271 |
|
---|
| 272 | static int
|
---|
| 273 | i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
|
---|
| 274 | onig_regex_t* reg ARG_UNUSED, void* arg)
|
---|
| 275 | {
|
---|
| 276 | i_wrap* warg = (i_wrap* )arg;
|
---|
| 277 |
|
---|
| 278 | return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
|
---|
| 279 | }
|
---|
| 280 |
|
---|
| 281 | extern int
|
---|
| 282 | reg_foreach_name(regex_t* reg,
|
---|
| 283 | int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
|
---|
| 284 | void* arg)
|
---|
| 285 | {
|
---|
| 286 | i_wrap warg;
|
---|
| 287 |
|
---|
| 288 | warg.func = func;
|
---|
| 289 | warg.reg = reg;
|
---|
| 290 | warg.arg = arg;
|
---|
| 291 |
|
---|
| 292 | return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
|
---|
| 293 | }
|
---|
| 294 |
|
---|
| 295 | extern int
|
---|
| 296 | reg_number_of_names(regex_t* reg)
|
---|
| 297 | {
|
---|
| 298 | return onig_number_of_names(ONIG_C(reg));
|
---|
| 299 | }
|
---|