1 | /**********************************************************************
|
---|
2 | regposix.c - Onigmo (Oniguruma-mod) (regular expression library)
|
---|
3 | **********************************************************************/
|
---|
4 | /*-
|
---|
5 | * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
---|
6 | * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
---|
7 | * All rights reserved.
|
---|
8 | *
|
---|
9 | * Redistribution and use in source and binary forms, with or without
|
---|
10 | * modification, are permitted provided that the following conditions
|
---|
11 | * are met:
|
---|
12 | * 1. Redistributions of source code must retain the above copyright
|
---|
13 | * notice, this list of conditions and the following disclaimer.
|
---|
14 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
15 | * notice, this list of conditions and the following disclaimer in the
|
---|
16 | * documentation and/or other materials provided with the distribution.
|
---|
17 | *
|
---|
18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
---|
19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
---|
21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
---|
22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
---|
23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
---|
24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
---|
25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
---|
26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
---|
27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
28 | * SUCH DAMAGE.
|
---|
29 | */
|
---|
30 |
|
---|
31 | #define regex_t onig_regex_t
|
---|
32 | #include "regint.h"
|
---|
33 | #undef regex_t
|
---|
34 | #include "onigmoposix.h"
|
---|
35 |
|
---|
36 | #define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
|
---|
37 | #define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
|
---|
38 |
|
---|
39 | /* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
|
---|
40 | #define ENC_STRING_LEN(enc,s,len) do { \
|
---|
41 | if (ONIGENC_MBC_MINLEN(enc) == 1) { \
|
---|
42 | UChar* tmps = (UChar* )(s); \
|
---|
43 | while (*tmps != 0) tmps++; \
|
---|
44 | len = (int )(tmps - (UChar* )(s)); \
|
---|
45 | } \
|
---|
46 | else { \
|
---|
47 | len = onigenc_str_bytelen_null(enc, (UChar* )s); \
|
---|
48 | } \
|
---|
49 | } while(0)
|
---|
50 |
|
---|
51 | typedef struct {
|
---|
52 | int onig_err;
|
---|
53 | int posix_err;
|
---|
54 | } O2PERR;
|
---|
55 |
|
---|
56 | static int
|
---|
57 | onig2posix_error_code(int code)
|
---|
58 | {
|
---|
59 | static const O2PERR o2p[] = {
|
---|
60 | { ONIG_MISMATCH, REG_NOMATCH },
|
---|
61 | { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
|
---|
62 | { ONIGERR_MEMORY, REG_ESPACE },
|
---|
63 | { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
|
---|
64 | { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
|
---|
65 | { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
|
---|
66 | { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
|
---|
67 | { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
|
---|
68 | { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
|
---|
69 | { ONIGERR_DEFAULT_ENCODING_IS_NOT_SET, REG_EONIG_BADARG },
|
---|
70 | { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
|
---|
71 | { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
|
---|
72 | { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
|
---|
73 | { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
|
---|
74 | { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
|
---|
75 | { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
|
---|
76 | { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
|
---|
77 | { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
|
---|
78 | { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
|
---|
79 | { ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
|
---|
80 | { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
|
---|
81 | { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
|
---|
82 | { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
|
---|
83 | { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
|
---|
84 | { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
|
---|
85 | { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
|
---|
86 | { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
|
---|
87 | { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
|
---|
88 | { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
|
---|
89 | { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
|
---|
90 | { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
|
---|
91 | { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
|
---|
92 | { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
|
---|
93 | { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
|
---|
94 | { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
|
---|
95 | { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
|
---|
96 | { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
|
---|
97 | { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
|
---|
98 | { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
|
---|
99 | { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
|
---|
100 | { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
|
---|
101 | { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
|
---|
102 | { ONIGERR_INVALID_BACKREF, REG_ESUBREG },
|
---|
103 | { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
|
---|
104 | { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
|
---|
105 | { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
|
---|
106 | { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC },
|
---|
107 | { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
|
---|
108 | { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
|
---|
109 | { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
|
---|
110 | { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
|
---|
111 | { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
|
---|
112 | { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
|
---|
113 | { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
|
---|
114 | { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
|
---|
115 | { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
|
---|
116 | { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
|
---|
117 | { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
|
---|
118 |
|
---|
119 | };
|
---|
120 |
|
---|
121 | int i;
|
---|
122 |
|
---|
123 | if (code >= 0) return 0;
|
---|
124 |
|
---|
125 | for (i = 0; i < numberof(o2p); i++) {
|
---|
126 | if (code == o2p[i].onig_err)
|
---|
127 | return o2p[i].posix_err;
|
---|
128 | }
|
---|
129 |
|
---|
130 | return REG_EONIG_INTERNAL; /* but, unknown error code */
|
---|
131 | }
|
---|
132 |
|
---|
133 | extern int
|
---|
134 | regcomp(regex_t* reg, const char* pattern, int posix_options)
|
---|
135 | {
|
---|
136 | int r, len;
|
---|
137 | const OnigSyntaxType* syntax = OnigDefaultSyntax;
|
---|
138 | OnigOptionType options;
|
---|
139 |
|
---|
140 | if ((posix_options & REG_EXTENDED) == 0)
|
---|
141 | syntax = ONIG_SYNTAX_POSIX_BASIC;
|
---|
142 |
|
---|
143 | options = syntax->options;
|
---|
144 | if ((posix_options & REG_ICASE) != 0)
|
---|
145 | ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
|
---|
146 | if ((posix_options & REG_NEWLINE) != 0) {
|
---|
147 | ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
|
---|
148 | ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
|
---|
149 | }
|
---|
150 |
|
---|
151 | reg->comp_options = posix_options;
|
---|
152 |
|
---|
153 | ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
|
---|
154 | r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
|
---|
155 | options, OnigEncDefaultCharEncoding, syntax,
|
---|
156 | (OnigErrorInfo* )NULL);
|
---|
157 | if (r != ONIG_NORMAL) {
|
---|
158 | return onig2posix_error_code(r);
|
---|
159 | }
|
---|
160 |
|
---|
161 | reg->re_nsub = ONIG_C(reg)->num_mem;
|
---|
162 | return 0;
|
---|
163 | }
|
---|
164 |
|
---|
165 | extern int
|
---|
166 | regexec(regex_t* reg, const char* str, size_t nmatch,
|
---|
167 | regmatch_t pmatch[], int posix_options)
|
---|
168 | {
|
---|
169 | int r, i, len;
|
---|
170 | UChar* end;
|
---|
171 | OnigRegion* region = NULL;
|
---|
172 | OnigOptionType options;
|
---|
173 |
|
---|
174 | options = ONIG_OPTION_NONE;
|
---|
175 | if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
|
---|
176 | if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
|
---|
177 |
|
---|
178 | if ((reg->comp_options & REG_NOSUB) != 0) {
|
---|
179 | nmatch = 0;
|
---|
180 | }
|
---|
181 | else if (nmatch != 0) {
|
---|
182 | region = onig_region_new();
|
---|
183 | if (region == NULL)
|
---|
184 | return REG_ESPACE;
|
---|
185 | }
|
---|
186 |
|
---|
187 | ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
|
---|
188 | end = (UChar* )(str + len);
|
---|
189 | r = (int )onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
|
---|
190 | region, options);
|
---|
191 |
|
---|
192 | if (r >= 0) {
|
---|
193 | r = 0; /* Match */
|
---|
194 | for (i = 0; i < (int )nmatch; i++) {
|
---|
195 | pmatch[i].rm_so = (regoff_t )region->beg[i];
|
---|
196 | pmatch[i].rm_eo = (regoff_t )region->end[i];
|
---|
197 | }
|
---|
198 | }
|
---|
199 | else if (r == ONIG_MISMATCH) {
|
---|
200 | r = REG_NOMATCH;
|
---|
201 | for (i = 0; i < (int )nmatch; i++)
|
---|
202 | pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
|
---|
203 | }
|
---|
204 | else {
|
---|
205 | r = onig2posix_error_code(r);
|
---|
206 | }
|
---|
207 |
|
---|
208 | if (region != NULL)
|
---|
209 | onig_region_free(region, 1);
|
---|
210 |
|
---|
211 | #if 0
|
---|
212 | if (reg->re_nsub > nmatch - 1)
|
---|
213 | reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
|
---|
214 | #endif
|
---|
215 |
|
---|
216 | return r;
|
---|
217 | }
|
---|
218 |
|
---|
219 | extern void
|
---|
220 | regfree(regex_t* reg)
|
---|
221 | {
|
---|
222 | onig_free(ONIG_C(reg));
|
---|
223 | }
|
---|
224 |
|
---|
225 |
|
---|
226 | extern void
|
---|
227 | reg_set_encoding(int mb_code)
|
---|
228 | {
|
---|
229 | OnigEncoding enc;
|
---|
230 |
|
---|
231 | switch (mb_code) {
|
---|
232 | case REG_POSIX_ENCODING_ASCII:
|
---|
233 | enc = ONIG_ENCODING_ASCII;
|
---|
234 | break;
|
---|
235 | case REG_POSIX_ENCODING_EUC_JP:
|
---|
236 | enc = ONIG_ENCODING_EUC_JP;
|
---|
237 | break;
|
---|
238 | case REG_POSIX_ENCODING_SJIS:
|
---|
239 | enc = ONIG_ENCODING_SJIS;
|
---|
240 | break;
|
---|
241 | case REG_POSIX_ENCODING_UTF8:
|
---|
242 | enc = ONIG_ENCODING_UTF8;
|
---|
243 | break;
|
---|
244 | case REG_POSIX_ENCODING_UTF16_BE:
|
---|
245 | enc = ONIG_ENCODING_UTF16_BE;
|
---|
246 | break;
|
---|
247 | case REG_POSIX_ENCODING_UTF16_LE:
|
---|
248 | enc = ONIG_ENCODING_UTF16_LE;
|
---|
249 | break;
|
---|
250 |
|
---|
251 | default:
|
---|
252 | return ;
|
---|
253 | break;
|
---|
254 | }
|
---|
255 |
|
---|
256 | onigenc_set_default_encoding(enc);
|
---|
257 | }
|
---|
258 |
|
---|
259 | extern int
|
---|
260 | reg_name_to_group_numbers(regex_t* reg,
|
---|
261 | const unsigned char* name, const unsigned char* name_end, int** nums)
|
---|
262 | {
|
---|
263 | return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
|
---|
264 | }
|
---|
265 |
|
---|
266 | typedef struct {
|
---|
267 | int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
|
---|
268 | regex_t* reg;
|
---|
269 | void* arg;
|
---|
270 | } i_wrap;
|
---|
271 |
|
---|
272 | static int
|
---|
273 | i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
|
---|
274 | onig_regex_t* reg ARG_UNUSED, void* arg)
|
---|
275 | {
|
---|
276 | i_wrap* warg = (i_wrap* )arg;
|
---|
277 |
|
---|
278 | return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
|
---|
279 | }
|
---|
280 |
|
---|
281 | extern int
|
---|
282 | reg_foreach_name(regex_t* reg,
|
---|
283 | int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
|
---|
284 | void* arg)
|
---|
285 | {
|
---|
286 | i_wrap warg;
|
---|
287 |
|
---|
288 | warg.func = func;
|
---|
289 | warg.reg = reg;
|
---|
290 | warg.arg = arg;
|
---|
291 |
|
---|
292 | return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
|
---|
293 | }
|
---|
294 |
|
---|
295 | extern int
|
---|
296 | reg_number_of_names(regex_t* reg)
|
---|
297 | {
|
---|
298 | return onig_number_of_names(ONIG_C(reg));
|
---|
299 | }
|
---|