source: EcnlProtoTool/trunk/onigmo-6.1.3/src/regposix.c@ 331

Last change on this file since 331 was 331, checked in by coas-nagasima, 6 years ago

prototoolに関連するプロジェクトをnewlibからmuslを使うよう変更・更新
ntshellをnewlibの下位の実装から、muslのsyscallの実装に変更・更新
以下のOSSをアップデート
・mruby-1.3.0
・musl-1.1.18
・onigmo-6.1.3
・tcc-0.9.27
以下のOSSを追加
・openssl-1.1.0e
・curl-7.57.0
・zlib-1.2.11
以下のmrbgemsを追加
・iij/mruby-digest
・iij/mruby-env
・iij/mruby-errno
・iij/mruby-iijson
・iij/mruby-ipaddr
・iij/mruby-mock
・iij/mruby-require
・iij/mruby-tls-openssl

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 10.5 KB
Line 
1/**********************************************************************
2 regposix.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#define regex_t onig_regex_t
32#include "regint.h"
33#undef regex_t
34#include "onigmoposix.h"
35
36#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
37#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
38
39/* #define ENC_STRING_LEN(enc,s,len) len = strlen(s) */
40#define ENC_STRING_LEN(enc,s,len) do { \
41 if (ONIGENC_MBC_MINLEN(enc) == 1) { \
42 UChar* tmps = (UChar* )(s); \
43 while (*tmps != 0) tmps++; \
44 len = (int )(tmps - (UChar* )(s)); \
45 } \
46 else { \
47 len = onigenc_str_bytelen_null(enc, (UChar* )s); \
48 } \
49} while(0)
50
51typedef struct {
52 int onig_err;
53 int posix_err;
54} O2PERR;
55
56static int
57onig2posix_error_code(int code)
58{
59 static const O2PERR o2p[] = {
60 { ONIG_MISMATCH, REG_NOMATCH },
61 { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL },
62 { ONIGERR_MEMORY, REG_ESPACE },
63 { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
64 { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
65 { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
66 { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
67 { ONIGERR_UNDEFINED_BYTECODE, REG_EONIG_INTERNAL },
68 { ONIGERR_UNEXPECTED_BYTECODE, REG_EONIG_INTERNAL },
69 { ONIGERR_DEFAULT_ENCODING_IS_NOT_SET, REG_EONIG_BADARG },
70 { ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR, REG_EONIG_BADARG },
71 { ONIGERR_INVALID_ARGUMENT, REG_EONIG_BADARG },
72 { ONIGERR_END_PATTERN_AT_LEFT_BRACE, REG_EBRACE },
73 { ONIGERR_END_PATTERN_AT_LEFT_BRACKET, REG_EBRACK },
74 { ONIGERR_EMPTY_CHAR_CLASS, REG_ECTYPE },
75 { ONIGERR_PREMATURE_END_OF_CHAR_CLASS, REG_ECTYPE },
76 { ONIGERR_END_PATTERN_AT_ESCAPE, REG_EESCAPE },
77 { ONIGERR_END_PATTERN_AT_META, REG_EESCAPE },
78 { ONIGERR_END_PATTERN_AT_CONTROL, REG_EESCAPE },
79 { ONIGERR_META_CODE_SYNTAX, REG_BADPAT },
80 { ONIGERR_CONTROL_CODE_SYNTAX, REG_BADPAT },
81 { ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE, REG_ECTYPE },
82 { ONIGERR_CHAR_CLASS_VALUE_AT_START_OF_RANGE, REG_ECTYPE },
83 { ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS, REG_ECTYPE },
84 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED, REG_BADRPT },
85 { ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID, REG_BADRPT },
86 { ONIGERR_NESTED_REPEAT_OPERATOR, REG_BADRPT },
87 { ONIGERR_UNMATCHED_CLOSE_PARENTHESIS, REG_EPAREN },
88 { ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS, REG_EPAREN },
89 { ONIGERR_END_PATTERN_IN_GROUP, REG_BADPAT },
90 { ONIGERR_UNDEFINED_GROUP_OPTION, REG_BADPAT },
91 { ONIGERR_INVALID_POSIX_BRACKET_TYPE, REG_BADPAT },
92 { ONIGERR_INVALID_LOOK_BEHIND_PATTERN, REG_BADPAT },
93 { ONIGERR_INVALID_REPEAT_RANGE_PATTERN, REG_BADPAT },
94 { ONIGERR_TOO_BIG_NUMBER, REG_BADPAT },
95 { ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE, REG_BADBR },
96 { ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE, REG_BADBR },
97 { ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS, REG_ECTYPE },
98 { ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE, REG_ECTYPE },
99 { ONIGERR_TOO_MANY_MULTI_BYTE_RANGES, REG_ECTYPE },
100 { ONIGERR_TOO_SHORT_MULTI_BYTE_STRING, REG_BADPAT },
101 { ONIGERR_TOO_BIG_BACKREF_NUMBER, REG_ESUBREG },
102 { ONIGERR_INVALID_BACKREF, REG_ESUBREG },
103 { ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED, REG_BADPAT },
104 { ONIGERR_TOO_BIG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
105 { ONIGERR_TOO_LONG_WIDE_CHAR_VALUE, REG_EONIG_BADWC },
106 { ONIGERR_INVALID_CODE_POINT_VALUE, REG_EONIG_BADWC },
107 { ONIGERR_EMPTY_GROUP_NAME, REG_BADPAT },
108 { ONIGERR_INVALID_GROUP_NAME, REG_BADPAT },
109 { ONIGERR_INVALID_CHAR_IN_GROUP_NAME, REG_BADPAT },
110 { ONIGERR_UNDEFINED_NAME_REFERENCE, REG_BADPAT },
111 { ONIGERR_UNDEFINED_GROUP_REFERENCE, REG_BADPAT },
112 { ONIGERR_MULTIPLEX_DEFINED_NAME, REG_BADPAT },
113 { ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, REG_BADPAT },
114 { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT },
115 { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT },
116 { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT },
117 { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG },
118
119 };
120
121 int i;
122
123 if (code >= 0) return 0;
124
125 for (i = 0; i < numberof(o2p); i++) {
126 if (code == o2p[i].onig_err)
127 return o2p[i].posix_err;
128 }
129
130 return REG_EONIG_INTERNAL; /* but, unknown error code */
131}
132
133extern int
134regcomp(regex_t* reg, const char* pattern, int posix_options)
135{
136 int r, len;
137 const OnigSyntaxType* syntax = OnigDefaultSyntax;
138 OnigOptionType options;
139
140 if ((posix_options & REG_EXTENDED) == 0)
141 syntax = ONIG_SYNTAX_POSIX_BASIC;
142
143 options = syntax->options;
144 if ((posix_options & REG_ICASE) != 0)
145 ONIG_OPTION_ON(options, ONIG_OPTION_IGNORECASE);
146 if ((posix_options & REG_NEWLINE) != 0) {
147 ONIG_OPTION_ON( options, ONIG_OPTION_NEGATE_SINGLELINE);
148 ONIG_OPTION_OFF(options, ONIG_OPTION_SINGLELINE);
149 }
150
151 reg->comp_options = posix_options;
152
153 ENC_STRING_LEN(OnigEncDefaultCharEncoding, pattern, len);
154 r = onig_new(PONIG_C(reg), (UChar* )pattern, (UChar* )(pattern + len),
155 options, OnigEncDefaultCharEncoding, syntax,
156 (OnigErrorInfo* )NULL);
157 if (r != ONIG_NORMAL) {
158 return onig2posix_error_code(r);
159 }
160
161 reg->re_nsub = ONIG_C(reg)->num_mem;
162 return 0;
163}
164
165extern int
166regexec(regex_t* reg, const char* str, size_t nmatch,
167 regmatch_t pmatch[], int posix_options)
168{
169 int r, i, len;
170 UChar* end;
171 OnigRegion* region = NULL;
172 OnigOptionType options;
173
174 options = ONIG_OPTION_NONE;
175 if ((posix_options & REG_NOTBOL) != 0) options |= ONIG_OPTION_NOTBOL;
176 if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
177
178 if ((reg->comp_options & REG_NOSUB) != 0) {
179 nmatch = 0;
180 }
181 else if (nmatch != 0) {
182 region = onig_region_new();
183 if (region == NULL)
184 return REG_ESPACE;
185 }
186
187 ENC_STRING_LEN(ONIG_C(reg)->enc, str, len);
188 end = (UChar* )(str + len);
189 r = (int )onig_search(ONIG_C(reg), (UChar* )str, end, (UChar* )str, end,
190 region, options);
191
192 if (r >= 0) {
193 r = 0; /* Match */
194 for (i = 0; i < (int )nmatch; i++) {
195 pmatch[i].rm_so = (regoff_t )region->beg[i];
196 pmatch[i].rm_eo = (regoff_t )region->end[i];
197 }
198 }
199 else if (r == ONIG_MISMATCH) {
200 r = REG_NOMATCH;
201 for (i = 0; i < (int )nmatch; i++)
202 pmatch[i].rm_so = pmatch[i].rm_eo = ONIG_REGION_NOTPOS;
203 }
204 else {
205 r = onig2posix_error_code(r);
206 }
207
208 if (region != NULL)
209 onig_region_free(region, 1);
210
211#if 0
212 if (reg->re_nsub > nmatch - 1)
213 reg->re_nsub = (nmatch <= 1 ? 0 : nmatch - 1);
214#endif
215
216 return r;
217}
218
219extern void
220regfree(regex_t* reg)
221{
222 onig_free(ONIG_C(reg));
223}
224
225
226extern void
227reg_set_encoding(int mb_code)
228{
229 OnigEncoding enc;
230
231 switch (mb_code) {
232 case REG_POSIX_ENCODING_ASCII:
233 enc = ONIG_ENCODING_ASCII;
234 break;
235 case REG_POSIX_ENCODING_EUC_JP:
236 enc = ONIG_ENCODING_EUC_JP;
237 break;
238 case REG_POSIX_ENCODING_SJIS:
239 enc = ONIG_ENCODING_SJIS;
240 break;
241 case REG_POSIX_ENCODING_UTF8:
242 enc = ONIG_ENCODING_UTF8;
243 break;
244 case REG_POSIX_ENCODING_UTF16_BE:
245 enc = ONIG_ENCODING_UTF16_BE;
246 break;
247 case REG_POSIX_ENCODING_UTF16_LE:
248 enc = ONIG_ENCODING_UTF16_LE;
249 break;
250
251 default:
252 return ;
253 break;
254 }
255
256 onigenc_set_default_encoding(enc);
257}
258
259extern int
260reg_name_to_group_numbers(regex_t* reg,
261 const unsigned char* name, const unsigned char* name_end, int** nums)
262{
263 return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
264}
265
266typedef struct {
267 int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
268 regex_t* reg;
269 void* arg;
270} i_wrap;
271
272static int
273i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
274 onig_regex_t* reg ARG_UNUSED, void* arg)
275{
276 i_wrap* warg = (i_wrap* )arg;
277
278 return (*warg->func)(name, name_end, ng, gs, warg->reg, warg->arg);
279}
280
281extern int
282reg_foreach_name(regex_t* reg,
283 int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
284 void* arg)
285{
286 i_wrap warg;
287
288 warg.func = func;
289 warg.reg = reg;
290 warg.arg = arg;
291
292 return onig_foreach_name(ONIG_C(reg), i_wrapper, &warg);
293}
294
295extern int
296reg_number_of_names(regex_t* reg)
297{
298 return onig_number_of_names(ONIG_C(reg));
299}
Note: See TracBrowser for help on using the repository browser.