1 | /*
|
---|
2 | The MIT License (MIT)
|
---|
3 |
|
---|
4 | Copyright (c) 2015 mattn.
|
---|
5 |
|
---|
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
|
---|
7 | of this software and associated documentation files (the "Software"), to deal
|
---|
8 | in the Software without restriction, including without limitation the rights
|
---|
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
---|
10 | copies of the Software, and to permit persons to whom the Software is
|
---|
11 | furnished to do so, subject to the following conditions:
|
---|
12 |
|
---|
13 | The above copyright notice and this permission notice shall be included in
|
---|
14 | all copies or substantial portions of the Software.
|
---|
15 |
|
---|
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
---|
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
---|
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
---|
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
---|
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
---|
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
---|
22 | THE SOFTWARE.
|
---|
23 | */
|
---|
24 | #include <stdio.h>
|
---|
25 | #include <string.h>
|
---|
26 | #include <ctype.h>
|
---|
27 | #include <stdlib.h>
|
---|
28 | #include <mruby.h>
|
---|
29 | #include <mruby/class.h>
|
---|
30 | #include <mruby/variable.h>
|
---|
31 | #include <mruby/array.h>
|
---|
32 | #include <mruby/string.h>
|
---|
33 | #include <mruby/data.h>
|
---|
34 | #include <mruby/variable.h>
|
---|
35 | #ifdef _MSC_VER
|
---|
36 | #define ONIG_EXTERN extern
|
---|
37 | #endif
|
---|
38 | #include "onigmo.h"
|
---|
39 |
|
---|
40 | #ifdef MRUBY_VERSION
|
---|
41 | #define mrb_args_int mrb_int
|
---|
42 | #else
|
---|
43 | #define mrb_args_int int
|
---|
44 | #endif
|
---|
45 |
|
---|
46 | static void
|
---|
47 | onig_regexp_free(mrb_state *mrb, void *p) {
|
---|
48 | onig_free((OnigRegex) p);
|
---|
49 | }
|
---|
50 |
|
---|
51 | static struct mrb_data_type mrb_onig_regexp_type = {
|
---|
52 | "PosixRegexp", onig_regexp_free
|
---|
53 | };
|
---|
54 |
|
---|
55 | static void
|
---|
56 | match_data_free(mrb_state* mrb, void* p) {
|
---|
57 | (void)mrb;
|
---|
58 | onig_region_free((OnigRegion*)p, 1);
|
---|
59 | }
|
---|
60 |
|
---|
61 | static struct mrb_data_type mrb_onig_region_type = {
|
---|
62 | "OnigRegion", match_data_free
|
---|
63 | };
|
---|
64 |
|
---|
65 | static mrb_value
|
---|
66 | onig_regexp_initialize(mrb_state *mrb, mrb_value self) {
|
---|
67 | mrb_value str, flag = mrb_nil_value(), code = mrb_nil_value();
|
---|
68 | mrb_get_args(mrb, "S|oo", &str, &flag, &code);
|
---|
69 |
|
---|
70 | int cflag = 0;
|
---|
71 | OnigSyntaxType* syntax = ONIG_SYNTAX_RUBY;
|
---|
72 | OnigEncoding enc = ONIG_ENCODING_UTF8;
|
---|
73 | if(mrb_string_p(code)) {
|
---|
74 | char const* str_code = mrb_string_value_ptr(mrb, code);
|
---|
75 | if(strchr(str_code, 'n') || strchr(str_code, 'N')) {
|
---|
76 | enc = ONIG_ENCODING_ASCII;
|
---|
77 | }
|
---|
78 | }
|
---|
79 | if(mrb_nil_p(flag)) {
|
---|
80 | } else if(mrb_type(flag) == MRB_TT_TRUE) {
|
---|
81 | cflag |= ONIG_OPTION_IGNORECASE;
|
---|
82 | } else if(mrb_fixnum_p(flag)) {
|
---|
83 | int int_flags = mrb_fixnum(flag);
|
---|
84 | if(int_flags & 0x1) { cflag |= ONIG_OPTION_IGNORECASE; }
|
---|
85 | if(int_flags & 0x2) { cflag |= ONIG_OPTION_EXTEND; }
|
---|
86 | if(int_flags & 0x4) { cflag |= ONIG_OPTION_MULTILINE; }
|
---|
87 | } else if(mrb_string_p(flag)) {
|
---|
88 | char const* str_flags = mrb_string_value_ptr(mrb, flag);
|
---|
89 | if(strchr(str_flags, 'i')) { cflag |= ONIG_OPTION_IGNORECASE; }
|
---|
90 | if(strchr(str_flags, 'x')) { cflag |= ONIG_OPTION_EXTEND; }
|
---|
91 | if(strchr(str_flags, 'm')) { cflag |= ONIG_OPTION_MULTILINE; }
|
---|
92 | } else {
|
---|
93 | mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown regexp flag: %S", flag);
|
---|
94 | }
|
---|
95 |
|
---|
96 | OnigErrorInfo einfo;
|
---|
97 | OnigRegex reg;
|
---|
98 | int result = onig_new(®, (OnigUChar*)RSTRING_PTR(str), (OnigUChar*) RSTRING_PTR(str) + RSTRING_LEN(str),
|
---|
99 | cflag, enc, syntax, &einfo);
|
---|
100 | if (result != ONIG_NORMAL) {
|
---|
101 | char err[ONIG_MAX_ERROR_MESSAGE_LEN] = "";
|
---|
102 | onig_error_code_to_str((OnigUChar*)err, result);
|
---|
103 | mrb_raisef(mrb, E_ARGUMENT_ERROR, "'%S' is an invalid regular expression because %S.",
|
---|
104 | str, mrb_str_new_cstr(mrb, err));
|
---|
105 | }
|
---|
106 | mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@source"), str);
|
---|
107 |
|
---|
108 | DATA_PTR(self) = reg;
|
---|
109 | DATA_TYPE(self) = &mrb_onig_regexp_type;
|
---|
110 |
|
---|
111 | return self;
|
---|
112 | }
|
---|
113 |
|
---|
114 | static mrb_value
|
---|
115 | create_onig_region(mrb_state* mrb, mrb_value const str, mrb_value rex) {
|
---|
116 | mrb_assert(mrb_string_p(str));
|
---|
117 | mrb_assert(mrb_type(rex) == MRB_TT_DATA && DATA_TYPE(rex) == &mrb_onig_regexp_type);
|
---|
118 | mrb_value const c = mrb_obj_value(mrb_data_object_alloc(
|
---|
119 | mrb, mrb_class_get(mrb, "OnigMatchData"), onig_region_new(), &mrb_onig_region_type));
|
---|
120 | mrb_iv_set(mrb, c, mrb_intern_lit(mrb, "string"), mrb_str_dup(mrb, str));
|
---|
121 | mrb_iv_set(mrb, c, mrb_intern_lit(mrb, "regexp"), rex);
|
---|
122 | return c;
|
---|
123 | }
|
---|
124 |
|
---|
125 | static int
|
---|
126 | onig_match_common(mrb_state* mrb, OnigRegex reg, mrb_value match_value, mrb_value str, int pos) {
|
---|
127 | mrb_assert(mrb_string_p(str));
|
---|
128 | mrb_assert(DATA_TYPE(match_value) == &mrb_onig_region_type);
|
---|
129 | OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
|
---|
130 | OnigUChar const* str_ptr = (OnigUChar const*)RSTRING_PTR(str);
|
---|
131 | int const result = onig_search(reg, str_ptr, str_ptr + RSTRING_LEN(str),
|
---|
132 | str_ptr + pos, str_ptr + RSTRING_LEN(str), match, 0);
|
---|
133 | if (result != ONIG_MISMATCH && result < 0) {
|
---|
134 | char err[ONIG_MAX_ERROR_MESSAGE_LEN] = "";
|
---|
135 | onig_error_code_to_str((OnigUChar*)err, result);
|
---|
136 | mrb_raise(mrb, E_REGEXP_ERROR, err);
|
---|
137 | }
|
---|
138 |
|
---|
139 | struct RObject* const cls = (struct RObject*)mrb_class_get(mrb, "OnigRegexp");
|
---|
140 | mrb_obj_iv_set(mrb, cls, mrb_intern_lit(mrb, "@last_match"), match_value);
|
---|
141 |
|
---|
142 | if (result != ONIG_MISMATCH &&
|
---|
143 | mrb_class_get(mrb, "Regexp") == (struct RClass*)cls &&
|
---|
144 | mrb_bool(mrb_obj_iv_get(mrb, (struct RObject*)cls, mrb_intern_lit(mrb, "@set_global_variables"))))
|
---|
145 | {
|
---|
146 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$~"), match_value);
|
---|
147 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$&"),
|
---|
148 | mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(0)));
|
---|
149 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$`"), mrb_funcall(mrb, match_value, "pre_match", 0));
|
---|
150 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$'"), mrb_funcall(mrb, match_value, "post_match", 0));
|
---|
151 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$+"),
|
---|
152 | mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(match->num_regs - 1)));
|
---|
153 |
|
---|
154 | // $1 to $9
|
---|
155 | int idx = 1;
|
---|
156 | int const idx_max = match->num_regs > 10? 10 : match->num_regs;
|
---|
157 | for(; idx < idx_max; ++idx) {
|
---|
158 | char const n[] = { '$', '0' + idx };
|
---|
159 | mrb_gv_set(mrb, mrb_intern(mrb, n, 2),
|
---|
160 | mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(idx)));
|
---|
161 | }
|
---|
162 |
|
---|
163 | for(; idx < 10; ++idx) {
|
---|
164 | char const n[] = { '$', '0' + idx };
|
---|
165 | mrb_gv_remove(mrb, mrb_intern(mrb, n, 2));
|
---|
166 | }
|
---|
167 | }
|
---|
168 |
|
---|
169 | return result;
|
---|
170 | }
|
---|
171 |
|
---|
172 | static mrb_value
|
---|
173 | onig_regexp_match(mrb_state *mrb, mrb_value self) {
|
---|
174 | mrb_value str = mrb_nil_value();
|
---|
175 | OnigRegex reg;
|
---|
176 | mrb_int pos = 0;
|
---|
177 |
|
---|
178 | mrb_get_args(mrb, "o|i", &str, &pos);
|
---|
179 | if (pos < 0 || (pos > 0 && pos >= RSTRING_LEN(str))) {
|
---|
180 | return mrb_nil_value();
|
---|
181 | }
|
---|
182 |
|
---|
183 | if (mrb_nil_p(str)) {
|
---|
184 | return mrb_nil_value();
|
---|
185 | }
|
---|
186 | str = mrb_string_type(mrb, str);
|
---|
187 |
|
---|
188 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
189 |
|
---|
190 | mrb_value const ret = create_onig_region(mrb, str, self);
|
---|
191 | return (onig_match_common(mrb, reg, ret, str, pos) == ONIG_MISMATCH)
|
---|
192 | ? mrb_nil_value() : ret;
|
---|
193 | }
|
---|
194 |
|
---|
195 | static mrb_value
|
---|
196 | onig_regexp_equal(mrb_state *mrb, mrb_value self) {
|
---|
197 | mrb_value other;
|
---|
198 | OnigRegex self_reg, other_reg;
|
---|
199 |
|
---|
200 | mrb_get_args(mrb, "o", &other);
|
---|
201 | if (mrb_obj_equal(mrb, self, other)){
|
---|
202 | return mrb_true_value();
|
---|
203 | }
|
---|
204 | if (mrb_nil_p(other)) {
|
---|
205 | return mrb_false_value();
|
---|
206 | }
|
---|
207 | if (!mrb_obj_is_kind_of(mrb, other, mrb_class_get(mrb, "OnigRegexp"))) {
|
---|
208 | return mrb_false_value();
|
---|
209 | }
|
---|
210 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, self_reg);
|
---|
211 | Data_Get_Struct(mrb, other, &mrb_onig_regexp_type, other_reg);
|
---|
212 |
|
---|
213 | if (!self_reg || !other_reg){
|
---|
214 | mrb_raise(mrb, E_RUNTIME_ERROR, "Invalid OnigRegexp");
|
---|
215 | }
|
---|
216 | if (onig_get_options(self_reg) != onig_get_options(other_reg)){
|
---|
217 | return mrb_false_value();
|
---|
218 | }
|
---|
219 | return mrb_str_equal(mrb, mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")), mrb_iv_get(mrb, other, mrb_intern_lit(mrb, "@source"))) ?
|
---|
220 | mrb_true_value() : mrb_false_value();
|
---|
221 | }
|
---|
222 |
|
---|
223 | static mrb_value
|
---|
224 | onig_regexp_casefold_p(mrb_state *mrb, mrb_value self) {
|
---|
225 | OnigRegex reg;
|
---|
226 |
|
---|
227 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
228 | return (onig_get_options(reg) & ONIG_OPTION_IGNORECASE) ? mrb_true_value() : mrb_false_value();
|
---|
229 | }
|
---|
230 |
|
---|
231 | static mrb_value
|
---|
232 | onig_regexp_options(mrb_state *mrb, mrb_value self) {
|
---|
233 | OnigRegex reg;
|
---|
234 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
235 | return mrb_fixnum_value(onig_get_options(reg));
|
---|
236 | }
|
---|
237 |
|
---|
238 | static char *
|
---|
239 | option_to_str(char str[4], int options) {
|
---|
240 | char *p = str;
|
---|
241 | if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
|
---|
242 | if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
|
---|
243 | if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
|
---|
244 | *p = 0;
|
---|
245 | return str;
|
---|
246 | }
|
---|
247 |
|
---|
248 | static mrb_value
|
---|
249 | regexp_expr_str(mrb_state *mrb, mrb_value str, const char *p, int len) {
|
---|
250 | const char *pend;
|
---|
251 | char buf[5];
|
---|
252 |
|
---|
253 | pend = (const char *) p + len;
|
---|
254 | for (;p < pend; p++) {
|
---|
255 | unsigned char c, cc;
|
---|
256 |
|
---|
257 | c = *p;
|
---|
258 | if (c == '/'|| c == '\\') {
|
---|
259 | buf[0] = '\\'; buf[1] = c;
|
---|
260 | mrb_str_cat(mrb, str, buf, 2);
|
---|
261 | continue;
|
---|
262 | }
|
---|
263 | if (ISPRINT(c)) {
|
---|
264 | buf[0] = c;
|
---|
265 | mrb_str_cat(mrb, str, buf, 1);
|
---|
266 | continue;
|
---|
267 | }
|
---|
268 | switch (c) {
|
---|
269 | case '\n': cc = 'n'; break;
|
---|
270 | case '\r': cc = 'r'; break;
|
---|
271 | case '\t': cc = 't'; break;
|
---|
272 | default: cc = 0; break;
|
---|
273 | }
|
---|
274 | if (cc) {
|
---|
275 | buf[0] = '\\';
|
---|
276 | buf[1] = (char)cc;
|
---|
277 | mrb_str_cat(mrb, str, buf, 2);
|
---|
278 | continue;
|
---|
279 | }
|
---|
280 | else {
|
---|
281 | buf[0] = '\\';
|
---|
282 | buf[3] = '0' + c % 8; c /= 8;
|
---|
283 | buf[2] = '0' + c % 8; c /= 8;
|
---|
284 | buf[1] = '0' + c % 8;
|
---|
285 | mrb_str_cat(mrb, str, buf, 4);
|
---|
286 | continue;
|
---|
287 | }
|
---|
288 | }
|
---|
289 | return str;
|
---|
290 | }
|
---|
291 |
|
---|
292 | static mrb_value
|
---|
293 | onig_regexp_inspect(mrb_state *mrb, mrb_value self) {
|
---|
294 | OnigRegex reg;
|
---|
295 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
296 | mrb_value str = mrb_str_new_lit(mrb, "/");
|
---|
297 | mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source"));
|
---|
298 | regexp_expr_str(mrb, str, (const char *)RSTRING_PTR(src), RSTRING_LEN(src));
|
---|
299 | mrb_str_cat_lit(mrb, str, "/");
|
---|
300 | char opts[4];
|
---|
301 | if (*option_to_str(opts, onig_get_options(reg))) {
|
---|
302 | mrb_str_cat_cstr(mrb, str, opts);
|
---|
303 | }
|
---|
304 | if (onig_get_encoding(reg) == ONIG_ENCODING_ASCII) {
|
---|
305 | mrb_str_cat_lit(mrb, str, "n");
|
---|
306 | }
|
---|
307 | return str;
|
---|
308 | }
|
---|
309 |
|
---|
310 | static mrb_value
|
---|
311 | onig_regexp_to_s(mrb_state *mrb, mrb_value self) {
|
---|
312 | int options;
|
---|
313 | const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
|
---|
314 | long len;
|
---|
315 | const char* ptr;
|
---|
316 | mrb_value str = mrb_str_new_lit(mrb, "(?");
|
---|
317 | char optbuf[5];
|
---|
318 |
|
---|
319 | OnigRegex reg;
|
---|
320 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
321 | options = onig_get_options(reg);
|
---|
322 | mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source"));
|
---|
323 | ptr = RSTRING_PTR(src);
|
---|
324 | len = RSTRING_LEN(src);
|
---|
325 |
|
---|
326 | again:
|
---|
327 | if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
|
---|
328 | int err = 1;
|
---|
329 | ptr += 2;
|
---|
330 | if ((len -= 2) > 0) {
|
---|
331 | do {
|
---|
332 | if(strchr(ptr, 'i')) { options |= ONIG_OPTION_IGNORECASE; }
|
---|
333 | if(strchr(ptr, 'x')) { options |= ONIG_OPTION_EXTEND; }
|
---|
334 | if(strchr(ptr, 'm')) { options |= ONIG_OPTION_MULTILINE; }
|
---|
335 | ++ptr;
|
---|
336 | } while (--len > 0);
|
---|
337 | }
|
---|
338 | if (len > 1 && *ptr == '-') {
|
---|
339 | ++ptr;
|
---|
340 | --len;
|
---|
341 | do {
|
---|
342 | if(strchr(ptr, 'i')) { options &= ~ONIG_OPTION_IGNORECASE; }
|
---|
343 | if(strchr(ptr, 'x')) { options &= ~ONIG_OPTION_EXTEND; }
|
---|
344 | if(strchr(ptr, 'm')) { options &= ~ONIG_OPTION_MULTILINE; }
|
---|
345 | ++ptr;
|
---|
346 | } while (--len > 0);
|
---|
347 | }
|
---|
348 | if (*ptr == ')') {
|
---|
349 | --len;
|
---|
350 | ++ptr;
|
---|
351 | goto again;
|
---|
352 | }
|
---|
353 | if (*ptr == ':' && ptr[len-1] == ')') {
|
---|
354 | OnigRegex rp;
|
---|
355 | ++ptr;
|
---|
356 | len -= 2;
|
---|
357 | err = onig_new(&rp, (OnigUChar*)ptr, (OnigUChar*)ptr + len, ONIG_OPTION_DEFAULT,
|
---|
358 | ONIG_ENCODING_UTF8, OnigDefaultSyntax, NULL);
|
---|
359 | onig_free(rp);
|
---|
360 | }
|
---|
361 | if (err) {
|
---|
362 | options = onig_get_options(reg);
|
---|
363 | ptr = RSTRING_PTR(src);
|
---|
364 | len = RSTRING_LEN(src);
|
---|
365 | }
|
---|
366 | }
|
---|
367 |
|
---|
368 | if (*option_to_str(optbuf, options)) mrb_str_cat_cstr(mrb, str, optbuf);
|
---|
369 |
|
---|
370 | if ((options & embeddable) != embeddable) {
|
---|
371 | optbuf[0] = '-';
|
---|
372 | option_to_str(optbuf + 1, ~options);
|
---|
373 | mrb_str_cat_cstr(mrb, str, optbuf);
|
---|
374 | }
|
---|
375 |
|
---|
376 | mrb_str_cat_cstr(mrb, str, ":");
|
---|
377 | regexp_expr_str(mrb, str, ptr, len);
|
---|
378 | mrb_str_cat_cstr(mrb, str, ")");
|
---|
379 | return str;
|
---|
380 | }
|
---|
381 |
|
---|
382 |
|
---|
383 | static mrb_value
|
---|
384 | onig_regexp_version(mrb_state* mrb, mrb_value self) {
|
---|
385 | (void)self;
|
---|
386 | return mrb_str_new_cstr(mrb, onig_version());
|
---|
387 | }
|
---|
388 |
|
---|
389 | static mrb_value
|
---|
390 | match_data_to_a(mrb_state* mrb, mrb_value self);
|
---|
391 |
|
---|
392 | static mrb_int
|
---|
393 | match_data_actual_index(mrb_state* mrb, mrb_value self, mrb_value idx_value) {
|
---|
394 | if(mrb_fixnum_p(idx_value)) { return mrb_fixnum(idx_value); }
|
---|
395 |
|
---|
396 | char const* name = NULL;
|
---|
397 | char const* name_end = NULL;
|
---|
398 | if(mrb_symbol_p(idx_value)) {
|
---|
399 | mrb_int sym_len;
|
---|
400 | name = mrb_sym2name_len(mrb, mrb_symbol(idx_value), &sym_len);
|
---|
401 | name_end = name + sym_len;
|
---|
402 | } else if(mrb_string_p(idx_value)) {
|
---|
403 | name = mrb_string_value_ptr(mrb, idx_value);
|
---|
404 | name_end = name + strlen(name);
|
---|
405 | } else { mrb_assert(FALSE); }
|
---|
406 | mrb_assert(name && name_end);
|
---|
407 |
|
---|
408 | mrb_value const regexp = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "regexp"));
|
---|
409 | mrb_assert(!mrb_nil_p(regexp));
|
---|
410 | mrb_assert(DATA_TYPE(regexp) == &mrb_onig_regexp_type);
|
---|
411 | mrb_assert(DATA_TYPE(self) == &mrb_onig_region_type);
|
---|
412 | int const idx = onig_name_to_backref_number(
|
---|
413 | (OnigRegex)DATA_PTR(regexp), (OnigUChar const*)name, (OnigUChar const*)name_end,
|
---|
414 | (OnigRegion*)DATA_PTR(self));
|
---|
415 | if (idx < 0) {
|
---|
416 | mrb_raisef(mrb, E_INDEX_ERROR, "undefined group name reference: %S", idx_value);
|
---|
417 | }
|
---|
418 | return idx;
|
---|
419 | }
|
---|
420 |
|
---|
421 | // ISO 15.2.16.3.1
|
---|
422 | static mrb_value
|
---|
423 | match_data_index(mrb_state* mrb, mrb_value self) {
|
---|
424 | mrb_value src;
|
---|
425 | mrb_int argc; mrb_value *argv;
|
---|
426 |
|
---|
427 | mrb_get_args(mrb, "*", &argv, &argc);
|
---|
428 |
|
---|
429 | src = match_data_to_a(mrb, self);
|
---|
430 |
|
---|
431 | if (argc == 1) {
|
---|
432 | switch (mrb_type(argv[0])) {
|
---|
433 | case MRB_TT_FIXNUM:
|
---|
434 | case MRB_TT_SYMBOL:
|
---|
435 | case MRB_TT_STRING:
|
---|
436 | return mrb_ary_entry(src, match_data_actual_index(mrb, self, argv[0]));
|
---|
437 | default: break;
|
---|
438 | }
|
---|
439 | }
|
---|
440 |
|
---|
441 | return mrb_funcall_argv(mrb, src, mrb_intern_lit(mrb, "[]"), argc, argv);
|
---|
442 | }
|
---|
443 |
|
---|
444 | #define match_data_check_index(idx) \
|
---|
445 | if(idx < 0 || reg->num_regs <= idx) \
|
---|
446 | mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of matches", mrb_fixnum_value(idx)) \
|
---|
447 |
|
---|
448 | // ISO 15.2.16.3.2
|
---|
449 | static mrb_value
|
---|
450 | match_data_begin(mrb_state* mrb, mrb_value self) {
|
---|
451 | mrb_value idx_value;
|
---|
452 | mrb_get_args(mrb, "o", &idx_value);
|
---|
453 | OnigRegion* reg;
|
---|
454 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
455 | mrb_int const idx = match_data_actual_index(mrb, self, idx_value);
|
---|
456 | match_data_check_index(idx);
|
---|
457 | return mrb_fixnum_value(reg->beg[idx]);
|
---|
458 | }
|
---|
459 |
|
---|
460 | // ISO 15.2.16.3.3
|
---|
461 | static mrb_value
|
---|
462 | match_data_captures(mrb_state* mrb, mrb_value self) {
|
---|
463 | mrb_value ary = match_data_to_a(mrb, self);
|
---|
464 | return mrb_ary_new_from_values(mrb, RARRAY_LEN(ary) - 1, RARRAY_PTR(ary) + 1);
|
---|
465 | }
|
---|
466 |
|
---|
467 | // ISO 15.2.16.3.4
|
---|
468 | static mrb_value
|
---|
469 | match_data_end(mrb_state* mrb, mrb_value self) {
|
---|
470 | mrb_value idx_value;
|
---|
471 | mrb_get_args(mrb, "o", &idx_value);
|
---|
472 | OnigRegion* reg;
|
---|
473 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
474 | mrb_int const idx = match_data_actual_index(mrb, self, idx_value);
|
---|
475 | match_data_check_index(idx);
|
---|
476 | return mrb_fixnum_value(reg->end[idx]);
|
---|
477 | }
|
---|
478 |
|
---|
479 | // ISO 15.2.16.3.5
|
---|
480 | static mrb_value
|
---|
481 | match_data_copy(mrb_state* mrb, mrb_value self) {
|
---|
482 | mrb_value src_val;
|
---|
483 | mrb_get_args(mrb, "o", &src_val);
|
---|
484 |
|
---|
485 | OnigRegion* src;
|
---|
486 | Data_Get_Struct(mrb, src_val, &mrb_onig_region_type, src);
|
---|
487 |
|
---|
488 | OnigRegion* dst = onig_region_new();
|
---|
489 | onig_region_copy(dst, src);
|
---|
490 |
|
---|
491 | DATA_PTR(self) = dst;
|
---|
492 | DATA_TYPE(self) = &mrb_onig_region_type;
|
---|
493 | mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "string"), mrb_iv_get(mrb, src_val, mrb_intern_lit(mrb, "string")));
|
---|
494 | mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "regexp"), mrb_iv_get(mrb, src_val, mrb_intern_lit(mrb, "regexp")));
|
---|
495 | return self;
|
---|
496 | }
|
---|
497 |
|
---|
498 | // ISO 15.2.16.3.6
|
---|
499 | // ISO 15.2.16.3.10
|
---|
500 | static mrb_value
|
---|
501 | match_data_length(mrb_state* mrb, mrb_value self) {
|
---|
502 | OnigRegion* reg;
|
---|
503 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
504 | return mrb_fixnum_value(reg->num_regs);
|
---|
505 | }
|
---|
506 |
|
---|
507 | // ISO 15.2.16.3.7
|
---|
508 | static mrb_value
|
---|
509 | match_data_offset(mrb_state* mrb, mrb_value self) {
|
---|
510 | mrb_value idx_value;
|
---|
511 | mrb_get_args(mrb, "o", &idx_value);
|
---|
512 | OnigRegion* reg;
|
---|
513 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
514 | mrb_int const idx = match_data_actual_index(mrb, self, idx_value);
|
---|
515 | match_data_check_index(idx);
|
---|
516 | mrb_value ret = mrb_ary_new_capa(mrb, 2);
|
---|
517 | mrb_ary_push(mrb, ret, mrb_fixnum_value(reg->beg[idx]));
|
---|
518 | mrb_ary_push(mrb, ret, mrb_fixnum_value(reg->end[idx]));
|
---|
519 | return ret;
|
---|
520 | }
|
---|
521 |
|
---|
522 | // ISO 15.2.16.3.8
|
---|
523 | static mrb_value
|
---|
524 | match_data_post_match(mrb_state* mrb, mrb_value self) {
|
---|
525 | OnigRegion* reg;
|
---|
526 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
527 | mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
528 | return mrb_str_substr(mrb, str, reg->end[0], RSTRING_LEN(str) - reg->end[0]);
|
---|
529 | }
|
---|
530 |
|
---|
531 | // ISO 15.2.16.3.9
|
---|
532 | static mrb_value
|
---|
533 | match_data_pre_match(mrb_state* mrb, mrb_value self) {
|
---|
534 | OnigRegion* reg;
|
---|
535 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
536 | mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
537 | return mrb_str_substr(mrb, str, 0, reg->beg[0]);
|
---|
538 | }
|
---|
539 |
|
---|
540 | // ISO 15.2.16.3.11
|
---|
541 | static mrb_value
|
---|
542 | match_data_string(mrb_state* mrb, mrb_value self) {
|
---|
543 | return mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
544 | }
|
---|
545 |
|
---|
546 | static mrb_value
|
---|
547 | match_data_regexp(mrb_state* mrb, mrb_value self) {
|
---|
548 | return mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "regexp"));
|
---|
549 | }
|
---|
550 |
|
---|
551 | // ISO 15.2.16.3.12
|
---|
552 | static mrb_value
|
---|
553 | match_data_to_a(mrb_state* mrb, mrb_value self) {
|
---|
554 | mrb_value cache = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "cache"));
|
---|
555 | if(!mrb_nil_p(cache)) {
|
---|
556 | return cache;
|
---|
557 | }
|
---|
558 |
|
---|
559 | mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
560 | OnigRegion* reg;
|
---|
561 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
562 |
|
---|
563 | mrb_value ret = mrb_ary_new_capa(mrb, reg->num_regs);
|
---|
564 | int i, ai = mrb_gc_arena_save(mrb);
|
---|
565 | for(i = 0; i < reg->num_regs; ++i) {
|
---|
566 | if(reg->beg[i] == ONIG_REGION_NOTPOS) {
|
---|
567 | mrb_ary_push(mrb, ret, mrb_nil_value());
|
---|
568 | } else {
|
---|
569 | mrb_ary_push(mrb, ret, mrb_str_substr(mrb, str, reg->beg[i], reg->end[i] - reg->beg[i]));
|
---|
570 | }
|
---|
571 | mrb_gc_arena_restore(mrb, ai);
|
---|
572 | }
|
---|
573 | return ret;
|
---|
574 | }
|
---|
575 |
|
---|
576 | // ISO 15.2.16.3.13
|
---|
577 | static mrb_value
|
---|
578 | match_data_to_s(mrb_state* mrb, mrb_value self) {
|
---|
579 | mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
580 | OnigRegion* reg;
|
---|
581 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
582 | return mrb_str_substr(mrb, str, reg->beg[0], reg->end[0] - reg->beg[0]);
|
---|
583 | }
|
---|
584 |
|
---|
585 | static void
|
---|
586 | append_replace_str(mrb_state* mrb, mrb_value result, mrb_value replace,
|
---|
587 | mrb_value src, OnigRegex reg, OnigRegion* match)
|
---|
588 | {
|
---|
589 | mrb_assert(mrb_string_p(replace));
|
---|
590 | char const* ch;
|
---|
591 | char const* const end = RSTRING_PTR(replace) + RSTRING_LEN(replace);
|
---|
592 | for(ch = RSTRING_PTR(replace); ch < end; ++ch) {
|
---|
593 | if (*ch != '\\' || (ch + 1) >= end) {
|
---|
594 | mrb_str_cat(mrb, result, ch, 1);
|
---|
595 | continue;
|
---|
596 | }
|
---|
597 |
|
---|
598 | switch(*(++ch)) { // skip back slash and get next char
|
---|
599 | case 'k': { // group name
|
---|
600 | if ((ch + 2) >= end || ch[1] != '<') { goto replace_expr_error; }
|
---|
601 | char const* name_beg = ch += 2;
|
---|
602 | while (*ch != '>') { if(++ch == end) { goto replace_expr_error; } }
|
---|
603 | mrb_assert(ch < end);
|
---|
604 | mrb_assert(*ch == '>');
|
---|
605 | int const idx = onig_name_to_backref_number(
|
---|
606 | reg, (OnigUChar const*)name_beg, (OnigUChar const*)ch, match);
|
---|
607 | if (idx < 0) {
|
---|
608 | mrb_raisef(mrb, E_INDEX_ERROR, "undefined group name reference: %S",
|
---|
609 | mrb_str_substr(mrb, replace, name_beg - RSTRING_PTR(replace), ch - name_beg));
|
---|
610 | }
|
---|
611 | mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]);
|
---|
612 | } break;
|
---|
613 |
|
---|
614 | case '\\': // escaped back slash
|
---|
615 | mrb_str_cat(mrb, result, ch, 1);
|
---|
616 | break;
|
---|
617 |
|
---|
618 | default:
|
---|
619 | if (isdigit(*ch)) { // group number 0-9
|
---|
620 | int const idx = *ch - '0';
|
---|
621 | if (idx >= match->num_regs) {
|
---|
622 | mrb_raisef(mrb, E_INDEX_ERROR, "undefined group number reference: %S (max: %S)",
|
---|
623 | mrb_fixnum_value(idx), mrb_fixnum_value(match->num_regs));
|
---|
624 | }
|
---|
625 | mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]);
|
---|
626 | } else {
|
---|
627 | char const str[] = { '\\', *ch };
|
---|
628 | mrb_str_cat(mrb, result, str, 2);
|
---|
629 | }
|
---|
630 | break;
|
---|
631 | }
|
---|
632 | }
|
---|
633 |
|
---|
634 | if(ch == end) { return; }
|
---|
635 |
|
---|
636 | replace_expr_error:
|
---|
637 | mrb_raisef(mrb, E_REGEXP_ERROR, "invalid replace expression: %S", replace);
|
---|
638 | }
|
---|
639 |
|
---|
640 | // ISO 15.2.10.5.18
|
---|
641 | static mrb_value
|
---|
642 | string_gsub(mrb_state* mrb, mrb_value self) {
|
---|
643 | mrb_value blk, match_expr, replace_expr = mrb_nil_value();
|
---|
644 | int const argc = mrb_get_args(mrb, "&o|S", &blk, &match_expr, &replace_expr);
|
---|
645 |
|
---|
646 | if(mrb_string_p(match_expr)) {
|
---|
647 | mrb_value argv[] = { match_expr, replace_expr };
|
---|
648 | return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_gsub"), argc, argv, blk);
|
---|
649 | }
|
---|
650 |
|
---|
651 | if(!mrb_nil_p(blk) && !mrb_nil_p(replace_expr)) {
|
---|
652 | mrb_raise(mrb, E_ARGUMENT_ERROR, "both block and replace expression must not be passed");
|
---|
653 | }
|
---|
654 |
|
---|
655 | OnigRegex reg;
|
---|
656 | Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg);
|
---|
657 | mrb_value const result = mrb_str_new(mrb, NULL, 0);
|
---|
658 | mrb_value const match_value = create_onig_region(mrb, self, match_expr);
|
---|
659 | OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
|
---|
660 | int last_end_pos = 0;
|
---|
661 |
|
---|
662 | while(1) {
|
---|
663 | if(onig_match_common(mrb, reg, match_value, self, last_end_pos) == ONIG_MISMATCH) { break; }
|
---|
664 |
|
---|
665 | mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, match->beg[0] - last_end_pos);
|
---|
666 |
|
---|
667 | if(mrb_nil_p(blk)) {
|
---|
668 | append_replace_str(mrb, result, replace_expr, self, reg, match);
|
---|
669 | } else {
|
---|
670 | mrb_value const tmp_str = mrb_str_to_str(mrb, mrb_yield(mrb, blk, mrb_str_substr(
|
---|
671 | mrb, self, match->beg[0], match->end[0] - match->beg[0])));
|
---|
672 | mrb_assert(mrb_string_p(tmp_str));
|
---|
673 | mrb_str_concat(mrb, result, tmp_str);
|
---|
674 | }
|
---|
675 |
|
---|
676 | last_end_pos = match->end[0];
|
---|
677 | }
|
---|
678 |
|
---|
679 | mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, RSTRING_LEN(self) - last_end_pos);
|
---|
680 | return result;
|
---|
681 | }
|
---|
682 |
|
---|
683 | // ISO 15.2.10.5.32
|
---|
684 | static mrb_value
|
---|
685 | string_scan(mrb_state* mrb, mrb_value self) {
|
---|
686 | mrb_value blk, match_expr;
|
---|
687 | mrb_get_args(mrb, "&o", &blk, &match_expr);
|
---|
688 |
|
---|
689 | if(mrb_string_p(match_expr)) {
|
---|
690 | return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_scan"),
|
---|
691 | 1, &match_expr, blk);
|
---|
692 | }
|
---|
693 |
|
---|
694 | OnigRegex reg;
|
---|
695 | Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg);
|
---|
696 | mrb_value const result = mrb_nil_p(blk)? mrb_ary_new(mrb) : self;
|
---|
697 | mrb_value m_value = create_onig_region(mrb, self, match_expr);
|
---|
698 | OnigRegion* const m = (OnigRegion*)DATA_PTR(m_value);
|
---|
699 | int last_end_pos = 0;
|
---|
700 | int i;
|
---|
701 |
|
---|
702 | while (1) {
|
---|
703 | if(onig_match_common(mrb, reg, m_value, self, last_end_pos) == ONIG_MISMATCH) { break; }
|
---|
704 |
|
---|
705 | if(mrb_nil_p(blk)) {
|
---|
706 | mrb_assert(mrb_array_p(result));
|
---|
707 | if(m->num_regs == 1) {
|
---|
708 | mrb_ary_push(mrb, result, mrb_str_substr(mrb, self, m->beg[0], m->end[0] - m->beg[0]));
|
---|
709 | } else {
|
---|
710 | mrb_value const elem = mrb_ary_new_capa(mrb, m->num_regs - 1);
|
---|
711 | for(i = 1; i < m->num_regs; ++i) {
|
---|
712 | mrb_ary_push(mrb, elem, mrb_str_substr(mrb, self, m->beg[i], m->end[i] - m->beg[i]));
|
---|
713 | }
|
---|
714 | mrb_ary_push(mrb, result, elem);
|
---|
715 | }
|
---|
716 | } else { // call block
|
---|
717 | mrb_assert(mrb_string_p(result));
|
---|
718 | if(m->num_regs == 1) {
|
---|
719 | mrb_yield(mrb, blk, mrb_str_substr(mrb, self, m->beg[0], m->end[0] - m->beg[0]));
|
---|
720 | } else {
|
---|
721 | mrb_value argv = mrb_ary_new_capa(mrb, m->num_regs - 1);
|
---|
722 | for(i = 1; i < m->num_regs; ++i) {
|
---|
723 | mrb_ary_push(mrb, argv, mrb_str_substr(mrb, self, m->beg[i], m->end[i] - m->beg[i]));
|
---|
724 | }
|
---|
725 | mrb_yield(mrb, blk, argv);
|
---|
726 | }
|
---|
727 | }
|
---|
728 |
|
---|
729 | last_end_pos = m->end[0];
|
---|
730 | }
|
---|
731 |
|
---|
732 | return result;
|
---|
733 | }
|
---|
734 |
|
---|
735 | // ISO 15.2.10.5.35
|
---|
736 | static mrb_value
|
---|
737 | string_split(mrb_state* mrb, mrb_value self) {
|
---|
738 | mrb_value pattern = mrb_nil_value(); mrb_int limit = 0;
|
---|
739 | int argc = mrb_get_args(mrb, "|oi", &pattern, &limit);
|
---|
740 |
|
---|
741 | if(argc == 0) { // check $; global variable
|
---|
742 | pattern = mrb_gv_get(mrb, mrb_intern_lit(mrb, "$;"));
|
---|
743 | if(!mrb_nil_p(pattern)) { argc = 1; }
|
---|
744 | }
|
---|
745 |
|
---|
746 | if(mrb_nil_p(pattern) || mrb_string_p(pattern)) {
|
---|
747 | return mrb_funcall(mrb, self, "string_split", argc, pattern, mrb_fixnum_value(limit));
|
---|
748 | }
|
---|
749 |
|
---|
750 | mrb_value const result = mrb_ary_new(mrb);
|
---|
751 | if(RSTRING_LEN(self) == 0) { return result; }
|
---|
752 |
|
---|
753 | OnigRegex reg;
|
---|
754 | Data_Get_Struct(mrb, pattern, &mrb_onig_regexp_type, reg);
|
---|
755 | mrb_value const match_value = create_onig_region(mrb, self, pattern);
|
---|
756 | OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
|
---|
757 | int last_end_pos = 0, next_match_pos = 0;
|
---|
758 | mrb_int num_matches = 0;
|
---|
759 |
|
---|
760 | while (limit <= 0 || (limit - 1) > num_matches) {
|
---|
761 | int i;
|
---|
762 | if(next_match_pos >= RSTRING_LEN(self) ||
|
---|
763 | onig_match_common(mrb, reg, match_value, self, next_match_pos) == ONIG_MISMATCH) { break; }
|
---|
764 |
|
---|
765 | if (last_end_pos == match->end[0]) {
|
---|
766 | ++next_match_pos;
|
---|
767 | // Remove this loop if not using UTF-8
|
---|
768 | for (; next_match_pos < RSTRING_LEN(self) && (RSTRING_PTR(self)[next_match_pos] & 0xC0) == 0x80;
|
---|
769 | ++next_match_pos) {}
|
---|
770 | } else {
|
---|
771 | mrb_ary_push(mrb, result, mrb_str_substr(
|
---|
772 | mrb, self, last_end_pos, match->beg[0] - last_end_pos));
|
---|
773 | // If there are captures, add them to the array
|
---|
774 | for (i = 1; i < match->num_regs; ++i) {
|
---|
775 | mrb_ary_push(mrb, result, mrb_str_substr(
|
---|
776 | mrb, self, match->beg[i], match->end[i] - match->beg[i]));
|
---|
777 | }
|
---|
778 | last_end_pos = match->end[0];
|
---|
779 | next_match_pos = last_end_pos;
|
---|
780 | ++num_matches;
|
---|
781 | }
|
---|
782 | }
|
---|
783 | if (last_end_pos <= RSTRING_LEN(self)) {
|
---|
784 | mrb_ary_push(mrb, result, mrb_str_substr(
|
---|
785 | mrb, self, last_end_pos, RSTRING_LEN(self) - last_end_pos));
|
---|
786 | }
|
---|
787 |
|
---|
788 | if (limit == 0) { // remove empty trailing elements
|
---|
789 | int count = 0, i;
|
---|
790 | for (i = RARRAY_LEN(result); i > 0; --i) {
|
---|
791 | mrb_assert(mrb_string_p(RARRAY_PTR(result)[i - 1]));
|
---|
792 | if (RSTRING_LEN(RARRAY_PTR(result)[i - 1]) != 0) { break; }
|
---|
793 | else { ++count; }
|
---|
794 | }
|
---|
795 | if(count > 0) {
|
---|
796 | return mrb_ary_new_from_values(mrb, RARRAY_LEN(result) - count, RARRAY_PTR(result));
|
---|
797 | }
|
---|
798 | }
|
---|
799 |
|
---|
800 | return result;
|
---|
801 | }
|
---|
802 |
|
---|
803 | // ISO 15.2.10.5.36
|
---|
804 | static mrb_value
|
---|
805 | string_sub(mrb_state* mrb, mrb_value self) {
|
---|
806 | mrb_value blk, match_expr, replace_expr = mrb_nil_value();
|
---|
807 | int const argc = mrb_get_args(mrb, "&o|S", &blk, &match_expr, &replace_expr);
|
---|
808 |
|
---|
809 | if(mrb_string_p(match_expr)) {
|
---|
810 | mrb_value argv[] = { match_expr, replace_expr };
|
---|
811 | return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_sub"), argc, argv, blk);
|
---|
812 | }
|
---|
813 |
|
---|
814 | if(!mrb_nil_p(blk) && !mrb_nil_p(replace_expr)) {
|
---|
815 | mrb_raise(mrb, E_ARGUMENT_ERROR, "both block and replace expression must not be passed");
|
---|
816 | }
|
---|
817 |
|
---|
818 | OnigRegex reg;
|
---|
819 | Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg);
|
---|
820 | mrb_value const result = mrb_str_new(mrb, NULL, 0);
|
---|
821 | mrb_value const match_value = create_onig_region(mrb, self, match_expr);
|
---|
822 | OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
|
---|
823 |
|
---|
824 | int const onig_result = onig_match_common(mrb, reg, match_value, self, 0);
|
---|
825 | if(onig_result == ONIG_MISMATCH) { return self; }
|
---|
826 |
|
---|
827 | mrb_str_cat(mrb, result, RSTRING_PTR(self), match->beg[0]);
|
---|
828 |
|
---|
829 | if(mrb_nil_p(blk)) {
|
---|
830 | append_replace_str(mrb, result, replace_expr, self, reg, match);
|
---|
831 | } else {
|
---|
832 | mrb_value const tmp_str = mrb_str_to_str(mrb, mrb_yield(mrb, blk, mrb_str_substr(
|
---|
833 | mrb, self, match->beg[0], match->end[0] - match->beg[0])));
|
---|
834 | mrb_assert(mrb_string_p(tmp_str));
|
---|
835 | mrb_str_concat(mrb, result, tmp_str);
|
---|
836 | }
|
---|
837 |
|
---|
838 | int const last_end_pos = match->end[0];
|
---|
839 | mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, RSTRING_LEN(self) - last_end_pos);
|
---|
840 |
|
---|
841 | return result;
|
---|
842 | }
|
---|
843 |
|
---|
844 | static mrb_value
|
---|
845 | onig_regexp_clear_global_variables(mrb_state* mrb, mrb_value self) {
|
---|
846 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$~"));
|
---|
847 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$&"));
|
---|
848 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$`"));
|
---|
849 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$'"));
|
---|
850 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$+"));
|
---|
851 |
|
---|
852 | int idx;
|
---|
853 | for(idx = 1; idx < 10; ++idx) {
|
---|
854 | char const n[] = { '$', '0' + idx };
|
---|
855 | mrb_gv_remove(mrb, mrb_intern(mrb, n, 2));
|
---|
856 | }
|
---|
857 |
|
---|
858 | return self;
|
---|
859 | }
|
---|
860 |
|
---|
861 | static mrb_value
|
---|
862 | onig_regexp_does_set_global_variables(mrb_state* mrb, mrb_value self) {
|
---|
863 | (void)self;
|
---|
864 | return mrb_obj_iv_get(mrb, (struct RObject*)mrb_class_get(mrb, "OnigRegexp"),
|
---|
865 | mrb_intern_lit(mrb, "@set_global_variables"));
|
---|
866 | }
|
---|
867 | static mrb_value
|
---|
868 | onig_regexp_set_set_global_variables(mrb_state* mrb, mrb_value self) {
|
---|
869 | mrb_value arg;
|
---|
870 | mrb_get_args(mrb, "o", &arg);
|
---|
871 | mrb_value const ret = mrb_bool_value(mrb_bool(arg));
|
---|
872 | mrb_obj_iv_set(mrb, (struct RObject*)mrb_class_get(mrb, "OnigRegexp"),
|
---|
873 | mrb_intern_lit(mrb, "@set_global_variables"), ret);
|
---|
874 | onig_regexp_clear_global_variables(mrb, self);
|
---|
875 | return ret;
|
---|
876 | }
|
---|
877 |
|
---|
878 | // ISO 15.2.15.6.2
|
---|
879 | static mrb_value
|
---|
880 | onig_regexp_escape(mrb_state* mrb, mrb_value self) {
|
---|
881 | char* str_begin; mrb_args_int str_len;
|
---|
882 | mrb_get_args(mrb, "s", &str_begin, &str_len);
|
---|
883 |
|
---|
884 | mrb_value const ret = mrb_str_new(mrb, NULL, 0);
|
---|
885 | char escaped_char = 0;
|
---|
886 | int substr_count = 0;
|
---|
887 | char const* str = str_begin;
|
---|
888 |
|
---|
889 | for(; str < (str_begin + str_len); ++str) {
|
---|
890 | switch(*str) {
|
---|
891 | case '\n': escaped_char = 'n'; break;
|
---|
892 | case '\t': escaped_char = 't'; break;
|
---|
893 | case '\r': escaped_char = 'r'; break;
|
---|
894 | case '\f': escaped_char = 'f'; break;
|
---|
895 |
|
---|
896 | case ' ':
|
---|
897 | case '#':
|
---|
898 | case '$':
|
---|
899 | case '(':
|
---|
900 | case ')':
|
---|
901 | case '*':
|
---|
902 | case '+':
|
---|
903 | case '-':
|
---|
904 | case '.':
|
---|
905 | case '?':
|
---|
906 | case '[':
|
---|
907 | case '\\':
|
---|
908 | case ']':
|
---|
909 | case '^':
|
---|
910 | case '{':
|
---|
911 | case '|':
|
---|
912 | case '}':
|
---|
913 | escaped_char = *str; break;
|
---|
914 |
|
---|
915 | default: ++substr_count; continue;
|
---|
916 | }
|
---|
917 |
|
---|
918 | mrb_str_cat(mrb, ret, str - substr_count, substr_count);
|
---|
919 | substr_count = 0;
|
---|
920 |
|
---|
921 | char const c[] = { '\\', escaped_char };
|
---|
922 | mrb_str_cat(mrb, ret, c, 2);
|
---|
923 | }
|
---|
924 | mrb_str_cat(mrb, ret, str - substr_count, substr_count);
|
---|
925 | return ret;
|
---|
926 | }
|
---|
927 |
|
---|
928 | void
|
---|
929 | mrb_mruby_onig_regexp_gem_init(mrb_state* mrb) {
|
---|
930 | struct RClass *clazz;
|
---|
931 |
|
---|
932 | clazz = mrb_define_class(mrb, "OnigRegexp", mrb->object_class);
|
---|
933 | MRB_SET_INSTANCE_TT(clazz, MRB_TT_DATA);
|
---|
934 |
|
---|
935 | // enable global variables setting in onig_match_common by default
|
---|
936 | mrb_obj_iv_set(mrb, (struct RObject*)clazz, mrb_intern_lit(mrb, "@set_global_variables"), mrb_true_value());
|
---|
937 |
|
---|
938 | mrb_define_const(mrb, clazz, "IGNORECASE", mrb_fixnum_value(ONIG_OPTION_IGNORECASE));
|
---|
939 | mrb_define_const(mrb, clazz, "EXTENDED", mrb_fixnum_value(ONIG_OPTION_EXTEND));
|
---|
940 | mrb_define_const(mrb, clazz, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE));
|
---|
941 | mrb_define_const(mrb, clazz, "SINGLELINE", mrb_fixnum_value(ONIG_OPTION_SINGLELINE));
|
---|
942 | mrb_define_const(mrb, clazz, "FIND_LONGEST", mrb_fixnum_value(ONIG_OPTION_FIND_LONGEST));
|
---|
943 | mrb_define_const(mrb, clazz, "FIND_NOT_EMPTY", mrb_fixnum_value(ONIG_OPTION_FIND_NOT_EMPTY));
|
---|
944 | mrb_define_const(mrb, clazz, "NEGATE_SINGLELINE", mrb_fixnum_value(ONIG_OPTION_NEGATE_SINGLELINE));
|
---|
945 | mrb_define_const(mrb, clazz, "DONT_CAPTURE_GROUP", mrb_fixnum_value(ONIG_OPTION_DONT_CAPTURE_GROUP));
|
---|
946 | mrb_define_const(mrb, clazz, "CAPTURE_GROUP", mrb_fixnum_value(ONIG_OPTION_CAPTURE_GROUP));
|
---|
947 | mrb_define_const(mrb, clazz, "NOTBOL", mrb_fixnum_value(ONIG_OPTION_NOTBOL));
|
---|
948 | mrb_define_const(mrb, clazz, "NOTEOL", mrb_fixnum_value(ONIG_OPTION_NOTEOL));
|
---|
949 | #ifdef ONIG_OPTION_ASCII_RANGE
|
---|
950 | mrb_define_const(mrb, clazz, "ASCII_RANGE", mrb_fixnum_value(ONIG_OPTION_ASCII_RANGE));
|
---|
951 | #endif
|
---|
952 | #ifdef ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
|
---|
953 | mrb_define_const(mrb, clazz, "POSIX_BRACKET_ALL_RANGE", mrb_fixnum_value(ONIG_OPTION_POSIX_BRACKET_ALL_RANGE));
|
---|
954 | #endif
|
---|
955 | #ifdef ONIG_OPTION_WORD_BOUND_ALL_RANGE
|
---|
956 | mrb_define_const(mrb, clazz, "WORD_BOUND_ALL_RANGE", mrb_fixnum_value(ONIG_OPTION_WORD_BOUND_ALL_RANGE));
|
---|
957 | #endif
|
---|
958 | #ifdef ONIG_OPTION_NEWLINE_CRLF
|
---|
959 | mrb_define_const(mrb, clazz, "NEWLINE_CRLF", mrb_fixnum_value(ONIG_OPTION_NEWLINE_CRLF));
|
---|
960 | #endif
|
---|
961 | #ifdef ONIG_OPTION_NOTBOS
|
---|
962 | mrb_define_const(mrb, clazz, "NOTBOS", mrb_fixnum_value(ONIG_OPTION_NOTBOS));
|
---|
963 | #endif
|
---|
964 | #ifdef ONIG_OPTION_NOTEOS
|
---|
965 | mrb_define_const(mrb, clazz, "NOTEOS", mrb_fixnum_value(ONIG_OPTION_NOTEOS));
|
---|
966 | #endif
|
---|
967 |
|
---|
968 | mrb_define_method(mrb, clazz, "initialize", onig_regexp_initialize, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(2));
|
---|
969 | mrb_define_method(mrb, clazz, "==", onig_regexp_equal, MRB_ARGS_REQ(1));
|
---|
970 | mrb_define_method(mrb, clazz, "match", onig_regexp_match, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1));
|
---|
971 | mrb_define_method(mrb, clazz, "casefold?", onig_regexp_casefold_p, MRB_ARGS_NONE());
|
---|
972 |
|
---|
973 | mrb_define_method(mrb, clazz, "options", onig_regexp_options, MRB_ARGS_NONE());
|
---|
974 | mrb_define_method(mrb, clazz, "inspect", onig_regexp_inspect, MRB_ARGS_NONE());
|
---|
975 | mrb_define_method(mrb, clazz, "to_s", onig_regexp_to_s, MRB_ARGS_NONE());
|
---|
976 |
|
---|
977 | mrb_define_module_function(mrb, clazz, "escape", onig_regexp_escape, MRB_ARGS_REQ(1));
|
---|
978 | mrb_define_module_function(mrb, clazz, "quote", onig_regexp_escape, MRB_ARGS_REQ(1));
|
---|
979 | mrb_define_module_function(mrb, clazz, "version", onig_regexp_version, MRB_ARGS_NONE());
|
---|
980 | mrb_define_module_function(mrb, clazz, "set_global_variables?", onig_regexp_does_set_global_variables, MRB_ARGS_NONE());
|
---|
981 | mrb_define_module_function(mrb, clazz, "set_global_variables=", onig_regexp_set_set_global_variables, MRB_ARGS_REQ(1));
|
---|
982 | mrb_define_module_function(mrb, clazz, "clear_global_variables", onig_regexp_clear_global_variables, MRB_ARGS_NONE());
|
---|
983 |
|
---|
984 | struct RClass* match_data = mrb_define_class(mrb, "OnigMatchData", mrb->object_class);
|
---|
985 | MRB_SET_INSTANCE_TT(clazz, MRB_TT_DATA);
|
---|
986 | mrb_undef_class_method(mrb, match_data, "new");
|
---|
987 |
|
---|
988 | // mrb_define_method(mrb, match_data, "==", &match_data_eq);
|
---|
989 | mrb_define_method(mrb, match_data, "[]", &match_data_index, MRB_ARGS_REQ(1));
|
---|
990 | mrb_define_method(mrb, match_data, "begin", &match_data_begin, MRB_ARGS_REQ(1));
|
---|
991 | mrb_define_method(mrb, match_data, "captures", &match_data_captures, MRB_ARGS_NONE());
|
---|
992 | mrb_define_method(mrb, match_data, "end", &match_data_end, MRB_ARGS_REQ(1));
|
---|
993 | // mrb_define_method(mrb, match_data, "eql?", &match_data_eq);
|
---|
994 | // mrb_define_method(mrb, match_data, "hash", &match_data_hash);
|
---|
995 | mrb_define_method(mrb, match_data, "initialize_copy", &match_data_copy, MRB_ARGS_REQ(1));
|
---|
996 | // mrb_define_method(mrb, match_data, "inspect", &match_data_inspect);
|
---|
997 | mrb_define_method(mrb, match_data, "length", &match_data_length, MRB_ARGS_NONE());
|
---|
998 | // mrb_define_method(mrb, match_data, "names", &match_data_names);
|
---|
999 | mrb_define_method(mrb, match_data, "offset", &match_data_offset, MRB_ARGS_REQ(1));
|
---|
1000 | mrb_define_method(mrb, match_data, "post_match", &match_data_post_match, MRB_ARGS_NONE());
|
---|
1001 | mrb_define_method(mrb, match_data, "pre_match", &match_data_pre_match, MRB_ARGS_NONE());
|
---|
1002 | mrb_define_method(mrb, match_data, "regexp", &match_data_regexp, MRB_ARGS_NONE());
|
---|
1003 | mrb_define_method(mrb, match_data, "size", &match_data_length, MRB_ARGS_NONE());
|
---|
1004 | mrb_define_method(mrb, match_data, "string", &match_data_string, MRB_ARGS_NONE());
|
---|
1005 | mrb_define_method(mrb, match_data, "to_a", &match_data_to_a, MRB_ARGS_NONE());
|
---|
1006 | mrb_define_method(mrb, match_data, "to_s", &match_data_to_s, MRB_ARGS_NONE());
|
---|
1007 | // mrb_define_method(mrb, match_data, "values_at", &match_data_values_at);
|
---|
1008 |
|
---|
1009 | mrb_define_method(mrb, mrb->string_class, "onig_regexp_gsub", &string_gsub, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1) | MRB_ARGS_BLOCK());
|
---|
1010 | mrb_define_method(mrb, mrb->string_class, "onig_regexp_sub", &string_sub, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1) | MRB_ARGS_BLOCK());
|
---|
1011 | mrb_define_method(mrb, mrb->string_class, "onig_regexp_split", &string_split, MRB_ARGS_REQ(1));
|
---|
1012 | mrb_define_method(mrb, mrb->string_class, "onig_regexp_scan", &string_scan, MRB_ARGS_REQ(1) | MRB_ARGS_BLOCK());
|
---|
1013 | }
|
---|
1014 |
|
---|
1015 | void
|
---|
1016 | mrb_mruby_onig_regexp_gem_final(mrb_state* mrb) {
|
---|
1017 | (void)mrb;
|
---|
1018 | }
|
---|
1019 |
|
---|
1020 | // vim:set et:
|
---|