source: EcnlProtoTool/trunk/mrbgems/mruby-onig-regexp/src/mruby_onig_regexp.c@ 439

Last change on this file since 439 was 439, checked in by coas-nagasima, 4 years ago

mrubyを2.1.1に更新

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 40.3 KB
Line 
1/*
2The MIT License (MIT)
3
4Copyright (c) 2015 mattn.
5
6Permission is hereby granted, free of charge, to any person obtaining a copy
7of this software and associated documentation files (the "Software"), to deal
8in the Software without restriction, including without limitation the rights
9to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10copies of the Software, and to permit persons to whom the Software is
11furnished to do so, subject to the following conditions:
12
13The above copyright notice and this permission notice shall be included in
14all copies or substantial portions of the Software.
15
16THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22THE SOFTWARE.
23*/
24#include <stdio.h>
25#include <string.h>
26#include <ctype.h>
27#include <memory.h>
28#include <mruby.h>
29#include <mruby/class.h>
30#include <mruby/variable.h>
31#include <mruby/array.h>
32#include <mruby/hash.h>
33#include <mruby/string.h>
34#include <mruby/data.h>
35#include <mruby/variable.h>
36#ifdef _MSC_VER
37#define ONIG_EXTERN extern
38#endif
39#ifdef HAVE_ONIGMO_H
40#include <onigmo.h>
41#elif defined(HAVE_ONIGURUMA_H)
42#include <oniguruma.h>
43#else
44#include "oniguruma.h"
45#endif
46
47#ifdef MRUBY_VERSION
48#define mrb_args_int mrb_int
49#else
50#define mrb_args_int int
51#endif
52
53static const char utf8len_codepage[256] =
54{
55 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
56 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
57 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
58 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
59 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
60 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
61 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
62 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
63};
64
65static mrb_int
66utf8len(const char* p, const char* e)
67{
68 mrb_int len;
69 mrb_int i;
70
71 len = utf8len_codepage[(unsigned char)*p];
72 if (p + len > e) return 1;
73 for (i = 1; i < len; ++i)
74 if ((p[i] & 0xc0) != 0x80)
75 return 1;
76 return len;
77}
78
79static void
80onig_regexp_free(mrb_state *mrb, void *p) {
81 onig_free((OnigRegex) p);
82}
83
84static struct mrb_data_type mrb_onig_regexp_type = {
85 "PosixRegexp", onig_regexp_free
86};
87
88#define ONIG_REGEXP_P(obj) \
89 ((mrb_type(obj) == MRB_TT_DATA) && (DATA_TYPE(obj) == &mrb_onig_regexp_type))
90
91static void
92match_data_free(mrb_state* mrb, void* p) {
93 (void)mrb;
94 onig_region_free((OnigRegion*)p, 1);
95}
96
97static struct mrb_data_type mrb_onig_region_type = {
98 "OnigRegion", match_data_free
99};
100
101static mrb_value
102str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
103{
104#ifdef MRB_UTF8_STRING
105 return mrb_str_new(mrb, RSTRING_PTR(str) + beg, len);
106#else
107 return mrb_str_substr(mrb, str, beg, len);
108#endif
109}
110
111static mrb_value
112onig_regexp_initialize(mrb_state *mrb, mrb_value self) {
113 mrb_value str, flag = mrb_nil_value(), code = mrb_nil_value();
114 mrb_get_args(mrb, "S|oo", &str, &flag, &code);
115
116 int cflag = 0;
117 OnigEncoding enc = ONIG_ENCODING_UTF8;
118 if(mrb_string_p(code)) {
119 char const* str_code = mrb_string_value_ptr(mrb, code);
120 if(strchr(str_code, 'n') || strchr(str_code, 'N')) {
121 enc = ONIG_ENCODING_ASCII;
122 }
123 }
124 if(mrb_nil_p(flag)) {
125 } else if(mrb_type(flag) == MRB_TT_TRUE) {
126 cflag |= ONIG_OPTION_IGNORECASE;
127 } else if(mrb_fixnum_p(flag)) {
128 int int_flags = mrb_fixnum(flag);
129 if(int_flags & 0x1) { cflag |= ONIG_OPTION_IGNORECASE; }
130 if(int_flags & 0x2) { cflag |= ONIG_OPTION_EXTEND; }
131 if(int_flags & 0x4) { cflag |= ONIG_OPTION_MULTILINE; }
132 } else if(mrb_string_p(flag)) {
133 char const* str_flags = mrb_string_value_ptr(mrb, flag);
134 if(strchr(str_flags, 'i')) { cflag |= ONIG_OPTION_IGNORECASE; }
135 if(strchr(str_flags, 'x')) { cflag |= ONIG_OPTION_EXTEND; }
136 if(strchr(str_flags, 'm')) { cflag |= ONIG_OPTION_MULTILINE; }
137 } else {
138 mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown regexp flag: %S", flag);
139 }
140
141 OnigErrorInfo einfo;
142 OnigRegex reg;
143 int result = onig_new(&reg, (OnigUChar*)RSTRING_PTR(str), (OnigUChar*) RSTRING_PTR(str) + RSTRING_LEN(str),
144 cflag, enc, ONIG_SYNTAX_RUBY, &einfo);
145 if (result != ONIG_NORMAL) {
146 char err[ONIG_MAX_ERROR_MESSAGE_LEN] = "";
147 onig_error_code_to_str((OnigUChar*)err, result, &einfo);
148 mrb_raisef(mrb, E_REGEXP_ERROR, "'%S' is an invalid regular expression because %S.",
149 str, mrb_str_new_cstr(mrb, err));
150 }
151 mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@source"), str);
152
153 DATA_PTR(self) = reg;
154 DATA_TYPE(self) = &mrb_onig_regexp_type;
155
156 return self;
157}
158
159static mrb_value
160create_onig_region(mrb_state* mrb, mrb_value const str, mrb_value rex) {
161 mrb_assert(mrb_string_p(str));
162 mrb_assert(mrb_type(rex) == MRB_TT_DATA && DATA_TYPE(rex) == &mrb_onig_regexp_type);
163 mrb_value const c = mrb_obj_value(mrb_data_object_alloc(
164 mrb, mrb_class_get(mrb, "OnigMatchData"), onig_region_new(), &mrb_onig_region_type));
165 mrb_iv_set(mrb, c, mrb_intern_lit(mrb, "string"), mrb_str_dup(mrb, str));
166 mrb_iv_set(mrb, c, mrb_intern_lit(mrb, "regexp"), rex);
167 return c;
168}
169
170#define MISMATCH_NIL_OR(v) (result == ONIG_MISMATCH ? mrb_nil_value() : (v))
171
172static int
173onig_match_common(mrb_state* mrb, OnigRegex reg, mrb_value match_value, mrb_value str, int pos) {
174 mrb_assert(mrb_string_p(str));
175 mrb_assert(DATA_TYPE(match_value) == &mrb_onig_region_type);
176 OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
177 OnigUChar const* str_ptr = (OnigUChar const*)RSTRING_PTR(str);
178 int const result = onig_search(reg, str_ptr, str_ptr + RSTRING_LEN(str),
179 str_ptr + pos, str_ptr + RSTRING_LEN(str), match, 0);
180 if (result != ONIG_MISMATCH && result < 0) {
181 char err[ONIG_MAX_ERROR_MESSAGE_LEN] = "";
182 onig_error_code_to_str((OnigUChar*)err, result);
183 mrb_raise(mrb, E_REGEXP_ERROR, err);
184 }
185
186 struct RObject* const cls = (struct RObject*)mrb_class_get(mrb, "OnigRegexp");
187 mrb_obj_iv_set(mrb, cls, mrb_intern_lit(mrb, "@last_match"), MISMATCH_NIL_OR(match_value));
188
189 if (mrb_class_get(mrb, "Regexp") == (struct RClass*)cls &&
190 mrb_bool(mrb_obj_iv_get(mrb, (struct RObject*)cls, mrb_intern_lit(mrb, "@set_global_variables"))))
191 {
192 mrb_gv_set(mrb, mrb_intern_lit(mrb, "$~"),
193 MISMATCH_NIL_OR(match_value));
194 mrb_gv_set(mrb, mrb_intern_lit(mrb, "$&"),
195 MISMATCH_NIL_OR(mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(0))));
196 mrb_gv_set(mrb, mrb_intern_lit(mrb, "$`"),
197 MISMATCH_NIL_OR(mrb_funcall(mrb, match_value, "pre_match", 0)));
198 mrb_gv_set(mrb, mrb_intern_lit(mrb, "$'"),
199 MISMATCH_NIL_OR(mrb_funcall(mrb, match_value, "post_match", 0)));
200 mrb_gv_set(mrb, mrb_intern_lit(mrb, "$+"),
201 MISMATCH_NIL_OR(mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(match->num_regs - 1))));
202
203 // $1 to $9
204 int idx = 1;
205 int const idx_max = match->num_regs > 10? 10 : match->num_regs;
206 for(; idx < idx_max; ++idx) {
207 char const n[] = { '$', '0' + idx };
208 mrb_gv_set(mrb, mrb_intern(mrb, n, 2),
209 mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(idx)));
210 }
211
212 for(; idx < 10; ++idx) {
213 char const n[] = { '$', '0' + idx };
214 mrb_gv_remove(mrb, mrb_intern(mrb, n, 2));
215 }
216 }
217
218 return result;
219}
220
221static mrb_value
222reg_operand(mrb_state *mrb, mrb_value obj) {
223 mrb_value ret;
224
225 if (mrb_symbol_p(obj)) {
226 ret = mrb_sym2str(mrb, mrb_symbol(obj));
227 if (mrb_undef_p(ret)) {
228 mrb_bug(mrb, "can not intern %S", obj);
229 }
230 }
231 else {
232 ret = mrb_string_type(mrb, obj);
233 }
234 return ret;
235}
236
237static mrb_value
238onig_regexp_match(mrb_state *mrb, mrb_value self) {
239 mrb_value str = mrb_nil_value();
240 OnigRegex reg;
241 mrb_int pos = 0;
242 mrb_value block = mrb_nil_value();
243
244 mrb_get_args(mrb, "o|i&", &str, &pos, &block);
245 if (mrb_nil_p(str)) {
246 return mrb_nil_value();
247 }
248 str = reg_operand(mrb, str);
249 if (pos < 0 || (pos > 0 && pos >= RSTRING_LEN(str))) {
250 return mrb_nil_value();
251 }
252
253 Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
254
255 mrb_value const ret = create_onig_region(mrb, str, self);
256 if (onig_match_common(mrb, reg, ret, str, pos) == ONIG_MISMATCH) {
257 return mrb_nil_value();
258 }
259
260 if (mrb_nil_p(block)) {
261 return ret;
262 } else {
263 return mrb_yield(mrb, block, ret);
264 }
265}
266
267static mrb_value
268onig_regexp_match_p(mrb_state *mrb, mrb_value self) {
269 mrb_value str = mrb_nil_value();
270 mrb_int pos = 0;
271 OnigRegex reg;
272 OnigUChar const* str_ptr;
273
274 mrb_get_args(mrb, "o|i", &str, &pos);
275 if (mrb_nil_p(str)) {
276 return mrb_nil_value();
277 }
278 str = reg_operand(mrb, str);
279 if (pos < 0 || (pos > 0 && pos >= RSTRING_LEN(str))) {
280 return mrb_nil_value();
281 }
282
283 Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
284 str_ptr = (OnigUChar const*)RSTRING_PTR(str);
285 return mrb_bool_value(onig_search(
286 reg, str_ptr, str_ptr + RSTRING_LEN(str),
287 str_ptr + pos, str_ptr + RSTRING_LEN(str), NULL, 0) != ONIG_MISMATCH);
288}
289
290static mrb_value
291string_match_p(mrb_state *mrb, mrb_value self) {
292 mrb_value str = self;
293 mrb_int pos = 0;
294 OnigRegex reg;
295 OnigUChar const* str_ptr;
296
297 mrb_get_args(mrb, "d|i", &reg, &mrb_onig_regexp_type, &pos);
298 if (pos < 0 || (pos > 0 && pos >= RSTRING_LEN(str))) {
299 return mrb_nil_value();
300 }
301
302 if (mrb_nil_p(str)) {
303 return mrb_nil_value();
304 }
305 str = mrb_string_type(mrb, str);
306
307 str_ptr = (OnigUChar const*)RSTRING_PTR(str);
308 return mrb_bool_value(onig_search(
309 reg, str_ptr, str_ptr + RSTRING_LEN(str),
310 str_ptr + pos, str_ptr + RSTRING_LEN(str), NULL, 0) != ONIG_MISMATCH);
311}
312
313static mrb_value
314onig_regexp_equal(mrb_state *mrb, mrb_value self) {
315 mrb_value other;
316 OnigRegex self_reg, other_reg;
317
318 mrb_get_args(mrb, "o", &other);
319 if (mrb_obj_equal(mrb, self, other)){
320 return mrb_true_value();
321 }
322 if (mrb_nil_p(other)) {
323 return mrb_false_value();
324 }
325 if (!mrb_obj_is_kind_of(mrb, other, mrb_class_get(mrb, "OnigRegexp"))) {
326 return mrb_false_value();
327 }
328 Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, self_reg);
329 Data_Get_Struct(mrb, other, &mrb_onig_regexp_type, other_reg);
330
331 if (!self_reg || !other_reg){
332 mrb_raise(mrb, E_RUNTIME_ERROR, "Invalid OnigRegexp");
333 }
334 if (onig_get_options(self_reg) != onig_get_options(other_reg)){
335 return mrb_false_value();
336 }
337 return mrb_str_equal(mrb, mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")), mrb_iv_get(mrb, other, mrb_intern_lit(mrb, "@source"))) ?
338 mrb_true_value() : mrb_false_value();
339}
340
341static mrb_value
342onig_regexp_casefold_p(mrb_state *mrb, mrb_value self) {
343 OnigRegex reg;
344
345 Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
346 return (onig_get_options(reg) & ONIG_OPTION_IGNORECASE) ? mrb_true_value() : mrb_false_value();
347}
348
349static mrb_value
350onig_regexp_options(mrb_state *mrb, mrb_value self) {
351 OnigRegex reg;
352 Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
353 return mrb_fixnum_value(onig_get_options(reg));
354}
355
356static char *
357option_to_str(char str[4], int options) {
358 char *p = str;
359 if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
360 if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
361 if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
362 *p = 0;
363 return str;
364}
365
366static mrb_value
367regexp_expr_str(mrb_state *mrb, mrb_value str, const char *p, int len) {
368 const char *pend;
369 char buf[5];
370
371 pend = (const char *) p + len;
372 for (;p < pend; p++) {
373 unsigned char c, cc;
374
375 c = *p;
376 if (c == '/') {
377 buf[0] = '\\'; buf[1] = c;
378 mrb_str_cat(mrb, str, buf, 2);
379 continue;
380 }
381 if (ISPRINT(c)) {
382 buf[0] = c;
383 mrb_str_cat(mrb, str, buf, 1);
384 continue;
385 }
386 switch (c) {
387 case '\n': cc = 'n'; break;
388 case '\r': cc = 'r'; break;
389 case '\t': cc = 't'; break;
390 default: cc = 0; break;
391 }
392 if (cc) {
393 buf[0] = '\\';
394 buf[1] = (char)cc;
395 mrb_str_cat(mrb, str, buf, 2);
396 continue;
397 }
398 else {
399 buf[0] = '\\';
400 buf[3] = '0' + c % 8; c /= 8;
401 buf[2] = '0' + c % 8; c /= 8;
402 buf[1] = '0' + c % 8;
403 mrb_str_cat(mrb, str, buf, 4);
404 continue;
405 }
406 }
407 return str;
408}
409
410static mrb_value
411onig_regexp_inspect(mrb_state *mrb, mrb_value self) {
412 OnigRegex reg;
413 Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
414 mrb_value str = mrb_str_new_lit(mrb, "/");
415 mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source"));
416 regexp_expr_str(mrb, str, (const char *)RSTRING_PTR(src), RSTRING_LEN(src));
417 mrb_str_cat_lit(mrb, str, "/");
418 char opts[4];
419 if (*option_to_str(opts, onig_get_options(reg))) {
420 mrb_str_cat_cstr(mrb, str, opts);
421 }
422 if (onig_get_encoding(reg) == ONIG_ENCODING_ASCII) {
423 mrb_str_cat_lit(mrb, str, "n");
424 }
425 return str;
426}
427
428static mrb_value
429onig_regexp_to_s(mrb_state *mrb, mrb_value self) {
430 int options;
431 const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
432 long len;
433 const char* ptr;
434 mrb_value str = mrb_str_new_lit(mrb, "(?");
435 char optbuf[5];
436
437 OnigRegex reg;
438 Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
439 options = onig_get_options(reg);
440 mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source"));
441 ptr = RSTRING_PTR(src);
442 len = RSTRING_LEN(src);
443
444 again:
445 if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
446 int err = 1;
447 ptr += 2;
448 if ((len -= 2) > 0) {
449 do {
450 if(strchr(ptr, 'i')) { options |= ONIG_OPTION_IGNORECASE; }
451 if(strchr(ptr, 'x')) { options |= ONIG_OPTION_EXTEND; }
452 if(strchr(ptr, 'm')) { options |= ONIG_OPTION_MULTILINE; }
453 ++ptr;
454 } while (--len > 0);
455 }
456 if (len > 1 && *ptr == '-') {
457 ++ptr;
458 --len;
459 do {
460 if(strchr(ptr, 'i')) { options &= ~ONIG_OPTION_IGNORECASE; }
461 if(strchr(ptr, 'x')) { options &= ~ONIG_OPTION_EXTEND; }
462 if(strchr(ptr, 'm')) { options &= ~ONIG_OPTION_MULTILINE; }
463 ++ptr;
464 } while (--len > 0);
465 }
466 if (*ptr == ')') {
467 --len;
468 ++ptr;
469 goto again;
470 }
471 if (*ptr == ':' && ptr[len-1] == ')') {
472 OnigRegex rp;
473 ++ptr;
474 len -= 2;
475 err = onig_new(&rp, (OnigUChar*)ptr, (OnigUChar*)ptr + len, ONIG_OPTION_DEFAULT,
476 ONIG_ENCODING_UTF8, OnigDefaultSyntax, NULL);
477 onig_free(rp);
478 }
479 if (err) {
480 options = onig_get_options(reg);
481 ptr = RSTRING_PTR(src);
482 len = RSTRING_LEN(src);
483 }
484 }
485
486 if (*option_to_str(optbuf, options)) mrb_str_cat_cstr(mrb, str, optbuf);
487
488 if ((options & embeddable) != embeddable) {
489 optbuf[0] = '-';
490 option_to_str(optbuf + 1, ~options);
491 mrb_str_cat_cstr(mrb, str, optbuf);
492 }
493
494 mrb_str_cat_cstr(mrb, str, ":");
495 regexp_expr_str(mrb, str, ptr, len);
496 mrb_str_cat_cstr(mrb, str, ")");
497 return str;
498}
499
500
501static mrb_value
502onig_regexp_version(mrb_state* mrb, mrb_value self) {
503 (void)self;
504 return mrb_str_new_cstr(mrb, onig_version());
505}
506
507static mrb_value
508match_data_to_a(mrb_state* mrb, mrb_value self);
509
510static mrb_int
511match_data_actual_index(mrb_state* mrb, mrb_value self, mrb_value idx_value) {
512 if(mrb_fixnum_p(idx_value)) { return mrb_fixnum(idx_value); }
513
514 char const* name = NULL;
515 char const* name_end = NULL;
516 if(mrb_symbol_p(idx_value)) {
517 mrb_int sym_len;
518 name = mrb_sym2name_len(mrb, mrb_symbol(idx_value), &sym_len);
519 name_end = name + sym_len;
520 } else if(mrb_string_p(idx_value)) {
521 name = mrb_string_value_ptr(mrb, idx_value);
522 name_end = name + strlen(name);
523 } else { mrb_assert(FALSE); }
524 mrb_assert(name && name_end);
525
526 mrb_value const regexp = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "regexp"));
527 mrb_assert(!mrb_nil_p(regexp));
528 mrb_assert(DATA_TYPE(regexp) == &mrb_onig_regexp_type);
529 mrb_assert(DATA_TYPE(self) == &mrb_onig_region_type);
530 int const idx = onig_name_to_backref_number(
531 (OnigRegex)DATA_PTR(regexp), (OnigUChar const*)name, (OnigUChar const*)name_end,
532 (OnigRegion*)DATA_PTR(self));
533 if (idx < 0) {
534 mrb_raisef(mrb, E_INDEX_ERROR, "undefined group name reference: %S", idx_value);
535 }
536 return idx;
537}
538
539// ISO 15.2.16.3.1
540static mrb_value
541match_data_index(mrb_state* mrb, mrb_value self) {
542 mrb_value src;
543 mrb_int argc; mrb_value *argv;
544
545 mrb_get_args(mrb, "*", &argv, &argc);
546
547 src = match_data_to_a(mrb, self);
548
549 if (argc == 1) {
550 switch (mrb_type(argv[0])) {
551 case MRB_TT_FIXNUM:
552 case MRB_TT_SYMBOL:
553 case MRB_TT_STRING:
554 return mrb_ary_entry(src, match_data_actual_index(mrb, self, argv[0]));
555 default: break;
556 }
557 }
558
559 return mrb_funcall_argv(mrb, src, mrb_intern_lit(mrb, "[]"), argc, argv);
560}
561
562#define match_data_check_index(idx) \
563 if(idx < 0 || reg->num_regs <= idx) \
564 mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of matches", mrb_fixnum_value(idx)) \
565
566// ISO 15.2.16.3.2
567static mrb_value
568match_data_begin(mrb_state* mrb, mrb_value self) {
569 mrb_value idx_value;
570 mrb_get_args(mrb, "o", &idx_value);
571 OnigRegion* reg;
572 Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
573 mrb_int const idx = match_data_actual_index(mrb, self, idx_value);
574 match_data_check_index(idx);
575 return mrb_fixnum_value(reg->beg[idx]);
576}
577
578// ISO 15.2.16.3.3
579static mrb_value
580match_data_captures(mrb_state* mrb, mrb_value self) {
581 mrb_value ary = match_data_to_a(mrb, self);
582 return mrb_ary_new_from_values(mrb, RARRAY_LEN(ary) - 1, RARRAY_PTR(ary) + 1);
583}
584
585// ISO 15.2.16.3.4
586static mrb_value
587match_data_end(mrb_state* mrb, mrb_value self) {
588 mrb_value idx_value;
589 mrb_get_args(mrb, "o", &idx_value);
590 OnigRegion* reg;
591 Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
592 mrb_int const idx = match_data_actual_index(mrb, self, idx_value);
593 match_data_check_index(idx);
594 return mrb_fixnum_value(reg->end[idx]);
595}
596
597// ISO 15.2.16.3.5
598static mrb_value
599match_data_copy(mrb_state* mrb, mrb_value self) {
600 mrb_value src_val;
601 mrb_get_args(mrb, "o", &src_val);
602
603 OnigRegion* src;
604 Data_Get_Struct(mrb, src_val, &mrb_onig_region_type, src);
605
606 OnigRegion* dst = onig_region_new();
607 onig_region_copy(dst, src);
608
609 DATA_PTR(self) = dst;
610 DATA_TYPE(self) = &mrb_onig_region_type;
611 mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "string"), mrb_iv_get(mrb, src_val, mrb_intern_lit(mrb, "string")));
612 mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "regexp"), mrb_iv_get(mrb, src_val, mrb_intern_lit(mrb, "regexp")));
613 return self;
614}
615
616// ISO 15.2.16.3.6
617// ISO 15.2.16.3.10
618static mrb_value
619match_data_length(mrb_state* mrb, mrb_value self) {
620 OnigRegion* reg;
621 Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
622 return mrb_fixnum_value(reg->num_regs);
623}
624
625// ISO 15.2.16.3.7
626static mrb_value
627match_data_offset(mrb_state* mrb, mrb_value self) {
628 mrb_value idx_value;
629 mrb_get_args(mrb, "o", &idx_value);
630 OnigRegion* reg;
631 Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
632 mrb_int const idx = match_data_actual_index(mrb, self, idx_value);
633 match_data_check_index(idx);
634 mrb_value ret = mrb_ary_new_capa(mrb, 2);
635 mrb_ary_push(mrb, ret, mrb_fixnum_value(reg->beg[idx]));
636 mrb_ary_push(mrb, ret, mrb_fixnum_value(reg->end[idx]));
637 return ret;
638}
639
640// ISO 15.2.16.3.8
641static mrb_value
642match_data_post_match(mrb_state* mrb, mrb_value self) {
643 OnigRegion* reg;
644 Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
645 mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
646 return str_substr(mrb, str, reg->end[0], RSTRING_LEN(str) - reg->end[0]);
647}
648
649// ISO 15.2.16.3.9
650static mrb_value
651match_data_pre_match(mrb_state* mrb, mrb_value self) {
652 OnigRegion* reg;
653 Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
654 mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
655 return str_substr(mrb, str, 0, reg->beg[0]);
656}
657
658// ISO 15.2.16.3.11
659static mrb_value
660match_data_string(mrb_state* mrb, mrb_value self) {
661 return mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
662}
663
664static mrb_value
665match_data_regexp(mrb_state* mrb, mrb_value self) {
666 return mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "regexp"));
667}
668
669// ISO 15.2.16.3.12
670static mrb_value
671match_data_to_a(mrb_state* mrb, mrb_value self) {
672 mrb_value cache = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "cache"));
673 if(!mrb_nil_p(cache)) {
674 return cache;
675 }
676
677 mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
678 OnigRegion* reg;
679 Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
680
681 mrb_value ret = mrb_ary_new_capa(mrb, reg->num_regs);
682 int i, ai = mrb_gc_arena_save(mrb);
683 for(i = 0; i < reg->num_regs; ++i) {
684 if(reg->beg[i] == ONIG_REGION_NOTPOS) {
685 mrb_ary_push(mrb, ret, mrb_nil_value());
686 } else {
687 mrb_ary_push(mrb, ret, str_substr(mrb, str, reg->beg[i], reg->end[i] - reg->beg[i]));
688 }
689 mrb_gc_arena_restore(mrb, ai);
690 }
691 return ret;
692}
693
694// ISO 15.2.16.3.13
695static mrb_value
696match_data_to_s(mrb_state* mrb, mrb_value self) {
697 mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
698 OnigRegion* reg;
699 Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
700 return str_substr(mrb, str, reg->beg[0], reg->end[0] - reg->beg[0]);
701}
702
703static void
704append_replace_str(mrb_state* mrb, mrb_value result, mrb_value replace,
705 mrb_value src, OnigRegex reg, OnigRegion* match)
706{
707 if (mrb_hash_p(replace)) {
708 mrb_value v = mrb_hash_get(mrb, replace, mrb_str_substr(mrb, src, match->beg[0], match->end[0] - match->beg[0]));
709 v = mrb_str_to_str(mrb, v);
710 mrb_str_cat_str(mrb, result, v);
711 return;
712 }
713
714 mrb_assert(mrb_string_p(replace));
715 char const* ch;
716 char const* const end = RSTRING_PTR(replace) + RSTRING_LEN(replace);
717 for(ch = RSTRING_PTR(replace); ch < end; ++ch) {
718 if (*ch != '\\' || (ch + 1) >= end) {
719 mrb_str_cat(mrb, result, ch, 1);
720 continue;
721 }
722
723 switch(*(++ch)) { // skip back slash and get next char
724 case 'k': { // group name
725 if ((ch + 2) >= end || ch[1] != '<') { goto replace_expr_error; }
726 char const* name_beg = ch += 2;
727 while (*ch != '>') { if(++ch == end) { goto replace_expr_error; } }
728 mrb_assert(ch < end);
729 mrb_assert(*ch == '>');
730 int const idx = onig_name_to_backref_number(
731 reg, (OnigUChar const*)name_beg, (OnigUChar const*)ch, match);
732 if (idx < 0) {
733 mrb_raisef(mrb, E_INDEX_ERROR, "undefined group name reference: %S",
734 str_substr(mrb, replace, name_beg - RSTRING_PTR(replace), ch - name_beg));
735 }
736 mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]);
737 } break;
738
739 case '\\': // escaped back slash
740 mrb_str_cat(mrb, result, ch, 1);
741 break;
742
743 default:
744 if (isdigit(*ch)) { // group number 0-9
745 int const idx = *ch - '0';
746 if (idx < match->num_regs) {
747 mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]);
748 }
749 } else {
750 char const str[] = { '\\', *ch };
751 mrb_str_cat(mrb, result, str, 2);
752 }
753 break;
754 }
755 }
756
757 if(ch == end) { return; }
758
759replace_expr_error:
760 mrb_raisef(mrb, E_REGEXP_ERROR, "invalid replace expression: %S", replace);
761}
762
763// ISO 15.2.10.5.18
764static mrb_value
765string_gsub(mrb_state* mrb, mrb_value self) {
766 mrb_value blk, match_expr, replace_expr = mrb_nil_value();
767 int const argc = mrb_get_args(mrb, "&o|o", &blk, &match_expr, &replace_expr);
768
769 if(!ONIG_REGEXP_P(match_expr)) {
770 mrb_value argv[] = { match_expr, replace_expr };
771 return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_gsub"), argc, argv, blk);
772 }
773
774 if(argc == 1 && mrb_nil_p(blk)) {
775 return mrb_funcall(mrb, self, "to_enum", 2, mrb_symbol_value(mrb_intern_lit(mrb, "onig_regexp_gsub")), match_expr);
776 }
777
778 if(!mrb_nil_p(blk) && !mrb_nil_p(replace_expr)) {
779 blk = mrb_nil_value();
780 }
781
782 if (mrb_nil_p(blk) && !mrb_hash_p(replace_expr)) {
783 replace_expr = mrb_string_type(mrb, replace_expr);
784 }
785
786 OnigRegex reg;
787 Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg);
788 mrb_value const result = mrb_str_new(mrb, NULL, 0);
789 mrb_value const match_value = create_onig_region(mrb, self, match_expr);
790 OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
791 int last_end_pos = 0;
792
793 while(1) {
794 if(onig_match_common(mrb, reg, match_value, self, last_end_pos) == ONIG_MISMATCH) { break; }
795
796 mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, match->beg[0] - last_end_pos);
797
798 if(mrb_nil_p(blk)) {
799 append_replace_str(mrb, result, replace_expr, self, reg, match);
800 } else {
801 mrb_value const tmp_str = mrb_str_to_str(mrb, mrb_yield(mrb, blk, str_substr(
802 mrb, self, match->beg[0], match->end[0] - match->beg[0])));
803 mrb_assert(mrb_string_p(tmp_str));
804 mrb_str_concat(mrb, result, tmp_str);
805 }
806
807 last_end_pos = match->end[0];
808 if (match->beg[0] == match->end[0]) {
809 /*
810 * Always consume at least one character of the input string
811 * in order to prevent infinite loops.
812 */
813 char* p = RSTRING_PTR(self) + last_end_pos;
814 char* e = p + RSTRING_LEN(self);
815 int len = utf8len(p, e);
816 if (RSTRING_LEN(self) < last_end_pos + len) break;
817 mrb_str_cat(mrb, result, p, len);
818 last_end_pos += len;
819 }
820 }
821
822 if (RSTRING_LEN(self) < last_end_pos) {
823 mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in UTF-8");
824 }
825 mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, RSTRING_LEN(self) - last_end_pos);
826 return result;
827}
828
829// ISO 15.2.10.5.32
830static mrb_value
831string_scan(mrb_state* mrb, mrb_value self) {
832 mrb_value blk, match_expr;
833 mrb_get_args(mrb, "&o", &blk, &match_expr);
834
835 if(!ONIG_REGEXP_P(match_expr)) {
836 return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_scan"),
837 1, &match_expr, blk);
838 }
839
840 OnigRegex reg;
841 Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg);
842 mrb_value const result = mrb_nil_p(blk)? mrb_ary_new(mrb) : self;
843 mrb_value m_value = create_onig_region(mrb, self, match_expr);
844 OnigRegion* const m = (OnigRegion*)DATA_PTR(m_value);
845 int last_end_pos = 0;
846 int i;
847
848 while (1) {
849 if(onig_match_common(mrb, reg, m_value, self, last_end_pos) == ONIG_MISMATCH) { break; }
850
851 if(mrb_nil_p(blk)) {
852 mrb_assert(mrb_array_p(result));
853 if(m->num_regs == 1) {
854 mrb_ary_push(mrb, result, str_substr(mrb, self, m->beg[0], m->end[0] - m->beg[0]));
855 } else {
856 mrb_value const elem = mrb_ary_new_capa(mrb, m->num_regs - 1);
857 for(i = 1; i < m->num_regs; ++i) {
858 mrb_ary_push(mrb, elem, str_substr(mrb, self, m->beg[i], m->end[i] - m->beg[i]));
859 }
860 mrb_ary_push(mrb, result, elem);
861 }
862 } else { // call block
863 mrb_assert(mrb_string_p(result));
864 if(m->num_regs == 1) {
865 mrb_yield(mrb, blk, str_substr(mrb, self, m->beg[0], m->end[0] - m->beg[0]));
866 } else {
867 mrb_value argv = mrb_ary_new_capa(mrb, m->num_regs - 1);
868 for(i = 1; i < m->num_regs; ++i) {
869 mrb_ary_push(mrb, argv, str_substr(mrb, self, m->beg[i], m->end[i] - m->beg[i]));
870 }
871 mrb_yield(mrb, blk, argv);
872 }
873 }
874
875 if (m->beg[0] == m->end[0]) {
876 /*
877 * Always consume at least one character of the input string
878 */
879 if (RSTRING_LEN(self) > m->end[0]) {
880 char* p = RSTRING_PTR(self) + last_end_pos;
881 char* e = p + RSTRING_LEN(self);
882 int len = utf8len(p, e);
883 last_end_pos = m->end[0] + len;
884 } else {
885 last_end_pos = m->end[0] + 1;
886 }
887 } else {
888 last_end_pos = m->end[0];
889 }
890 }
891
892 return result;
893}
894
895// ISO 15.2.10.5.35
896static mrb_value
897string_split(mrb_state* mrb, mrb_value self) {
898 mrb_value pattern = mrb_nil_value(); mrb_int limit = 0;
899 int argc = mrb_get_args(mrb, "|oi", &pattern, &limit);
900 mrb_value result, tmp;
901 mrb_bool lim_p = !(argc == 2 && 0 < limit);
902
903 if(mrb_nil_p(pattern)) { // check $; global variable
904 pattern = mrb_gv_get(mrb, mrb_intern_lit(mrb, "$;"));
905 if (mrb_nil_p(pattern)) {
906 pattern = mrb_str_new_lit(mrb, " ");
907 } else if (!mrb_string_p(pattern) && !ONIG_REGEXP_P(pattern)) {
908 mrb_raise(mrb, E_TYPE_ERROR, "value of $; must be String or Regexp");
909 }
910 if (argc == 0) { argc = 1; }
911 }
912
913 if (!ONIG_REGEXP_P(pattern)) {
914 if(!mrb_nil_p(pattern)) { pattern = mrb_string_type(mrb, pattern); }
915 if(mrb_string_p(pattern) && RSTRING_LEN(pattern) == 0) {
916 /* Special case - split into chars */
917 pattern = mrb_funcall(mrb, mrb_obj_value(mrb_class_get(mrb, "OnigRegexp")), "new", 1, pattern);
918 } else {
919 return mrb_funcall(mrb, self, "string_split", argc, pattern, mrb_fixnum_value(limit));
920 }
921 }
922
923 if(RSTRING_LEN(self) == 0) { return mrb_ary_new(mrb); }
924 if(limit == 1) { return mrb_ary_new_from_values(mrb, 1, &self); }
925
926 result = mrb_ary_new(mrb);
927
928 OnigRegex reg;
929 Data_Get_Struct(mrb, pattern, &mrb_onig_regexp_type, reg);
930 mrb_value const match_value = create_onig_region(mrb, self, pattern);
931 OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
932 char *ptr = mrb_str_to_cstr(mrb, self);
933 mrb_int len = RSTRING_LEN(self);
934 mrb_int start = 0, beg = 0, end = 0;
935 mrb_int idx = 0, i = 0;
936 mrb_int last_null = 0;
937
938 if (argc == 2) { i = 1; }
939 while ((end = onig_match_common(mrb, reg, match_value, self, start)) >= 0) {
940 if (start == end && match->beg[0] == match->end[0]) {
941 if (!ptr) {
942 mrb_ary_push(mrb, result, mrb_str_new_lit(mrb, ""));
943 break;
944 }
945 else if (last_null == 1) {
946 mrb_ary_push(mrb, result, str_substr(mrb, self, beg, utf8len(ptr+beg, ptr+len)));
947 beg = start;
948 }
949 else {
950 if (start == len)
951 start++;
952 else
953 start += utf8len(ptr+start, ptr+len);
954 last_null = 1;
955 continue;
956 }
957 }
958 else {
959 mrb_ary_push(mrb, result, str_substr(mrb, self, beg, end-beg));
960 beg = start = match->end[0];
961 }
962 last_null = 0;
963
964 for (idx=1; idx < match->num_regs; idx++) {
965 if (match->beg[idx] == -1) continue;
966 if (match->beg[idx] == match->end[idx])
967 tmp = mrb_str_new_lit(mrb, "");
968 else
969 tmp = str_substr(mrb, self, match->beg[idx], match->end[idx]-match->beg[idx]);
970 mrb_ary_push(mrb, result, tmp);
971 }
972 if (!lim_p && limit <= ++i) break;
973 }
974
975 if (RSTRING_LEN(self) > 0 && (!lim_p || RSTRING_LEN(self) > beg || limit < 0)) {
976 if (RSTRING_LEN(self) == beg)
977 tmp = mrb_str_new_lit(mrb, "");
978 else
979 tmp = str_substr(mrb, self, beg, RSTRING_LEN(self)-beg);
980 mrb_ary_push(mrb, result, tmp);
981 }
982 if (lim_p && limit == 0) {
983 while ((len = RARRAY_LEN(result)) > 0 &&
984 (tmp = mrb_ary_ref(mrb, result, len-1), RSTRING_LEN(tmp) == 0))
985 mrb_ary_pop(mrb, result);
986 }
987
988 return result;
989}
990
991// ISO 15.2.10.5.36
992static mrb_value
993string_sub(mrb_state* mrb, mrb_value self) {
994 mrb_value blk, match_expr, replace_expr = mrb_nil_value();
995 int const argc = mrb_get_args(mrb, "&o|o", &blk, &match_expr, &replace_expr);
996
997 if(!ONIG_REGEXP_P(match_expr)) {
998 mrb_value argv[] = { match_expr, replace_expr };
999 return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_sub"), argc, argv, blk);
1000 }
1001
1002 if(argc == 1 && mrb_nil_p(blk)) {
1003 mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (given 1, expected 2)");
1004 }
1005
1006 if(!mrb_nil_p(blk) && !mrb_nil_p(replace_expr)) {
1007 blk = mrb_nil_value();
1008 }
1009
1010 if (mrb_nil_p(blk) && !mrb_hash_p(replace_expr)) {
1011 replace_expr = mrb_string_type(mrb, replace_expr);
1012 }
1013
1014 OnigRegex reg;
1015 Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg);
1016 mrb_value const result = mrb_str_new(mrb, NULL, 0);
1017 mrb_value const match_value = create_onig_region(mrb, self, match_expr);
1018 OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
1019
1020 int const onig_result = onig_match_common(mrb, reg, match_value, self, 0);
1021 if(onig_result == ONIG_MISMATCH) { return self; }
1022
1023 mrb_str_cat(mrb, result, RSTRING_PTR(self), match->beg[0]);
1024
1025 if(mrb_nil_p(blk)) {
1026 append_replace_str(mrb, result, replace_expr, self, reg, match);
1027 } else {
1028 mrb_value const tmp_str = mrb_str_to_str(mrb, mrb_yield(mrb, blk, str_substr(
1029 mrb, self, match->beg[0], match->end[0] - match->beg[0])));
1030 mrb_assert(mrb_string_p(tmp_str));
1031 mrb_str_concat(mrb, result, tmp_str);
1032 }
1033
1034 int const last_end_pos = match->end[0];
1035 mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, RSTRING_LEN(self) - last_end_pos);
1036
1037 return result;
1038}
1039
1040static mrb_value
1041onig_regexp_clear_global_variables(mrb_state* mrb, mrb_value self) {
1042 mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$~"));
1043 mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$&"));
1044 mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$`"));
1045 mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$'"));
1046 mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$+"));
1047
1048 int idx;
1049 for(idx = 1; idx < 10; ++idx) {
1050 char const n[] = { '$', '0' + idx };
1051 mrb_gv_remove(mrb, mrb_intern(mrb, n, 2));
1052 }
1053
1054 return self;
1055}
1056
1057static mrb_value
1058onig_regexp_does_set_global_variables(mrb_state* mrb, mrb_value self) {
1059 (void)self;
1060 return mrb_obj_iv_get(mrb, (struct RObject*)mrb_class_get(mrb, "OnigRegexp"),
1061 mrb_intern_lit(mrb, "@set_global_variables"));
1062}
1063static mrb_value
1064onig_regexp_set_set_global_variables(mrb_state* mrb, mrb_value self) {
1065 mrb_value arg;
1066 mrb_get_args(mrb, "o", &arg);
1067 mrb_value const ret = mrb_bool_value(mrb_bool(arg));
1068 mrb_obj_iv_set(mrb, (struct RObject*)mrb_class_get(mrb, "OnigRegexp"),
1069 mrb_intern_lit(mrb, "@set_global_variables"), ret);
1070 onig_regexp_clear_global_variables(mrb, self);
1071 return ret;
1072}
1073
1074// ISO 15.2.15.6.2
1075static mrb_value
1076onig_regexp_escape(mrb_state* mrb, mrb_value self) {
1077 char* str_begin; mrb_args_int str_len;
1078 mrb_get_args(mrb, "s", &str_begin, &str_len);
1079
1080 mrb_value const ret = mrb_str_new(mrb, NULL, 0);
1081 char escaped_char = 0;
1082 int substr_count = 0;
1083 char const* str = str_begin;
1084
1085 for(; str < (str_begin + str_len); ++str) {
1086 switch(*str) {
1087 case '\n': escaped_char = 'n'; break;
1088 case '\t': escaped_char = 't'; break;
1089 case '\r': escaped_char = 'r'; break;
1090 case '\f': escaped_char = 'f'; break;
1091
1092 case ' ':
1093 case '#':
1094 case '$':
1095 case '(':
1096 case ')':
1097 case '*':
1098 case '+':
1099 case '-':
1100 case '.':
1101 case '?':
1102 case '[':
1103 case '\\':
1104 case ']':
1105 case '^':
1106 case '{':
1107 case '|':
1108 case '}':
1109 escaped_char = *str; break;
1110
1111 default: ++substr_count; continue;
1112 }
1113
1114 mrb_str_cat(mrb, ret, str - substr_count, substr_count);
1115 substr_count = 0;
1116
1117 char const c[] = { '\\', escaped_char };
1118 mrb_str_cat(mrb, ret, c, 2);
1119 }
1120 mrb_str_cat(mrb, ret, str - substr_count, substr_count);
1121 return ret;
1122}
1123
1124void
1125mrb_mruby_onig_regexp_gem_init(mrb_state* mrb) {
1126 struct RClass *clazz;
1127
1128 clazz = mrb_define_class(mrb, "OnigRegexp", mrb->object_class);
1129 MRB_SET_INSTANCE_TT(clazz, MRB_TT_DATA);
1130
1131 // enable global variables setting in onig_match_common by default
1132 mrb_obj_iv_set(mrb, (struct RObject*)clazz, mrb_intern_lit(mrb, "@set_global_variables"), mrb_true_value());
1133
1134 mrb_define_const(mrb, clazz, "IGNORECASE", mrb_fixnum_value(ONIG_OPTION_IGNORECASE));
1135 mrb_define_const(mrb, clazz, "EXTENDED", mrb_fixnum_value(ONIG_OPTION_EXTEND));
1136 mrb_define_const(mrb, clazz, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE));
1137 mrb_define_const(mrb, clazz, "SINGLELINE", mrb_fixnum_value(ONIG_OPTION_SINGLELINE));
1138 mrb_define_const(mrb, clazz, "FIND_LONGEST", mrb_fixnum_value(ONIG_OPTION_FIND_LONGEST));
1139 mrb_define_const(mrb, clazz, "FIND_NOT_EMPTY", mrb_fixnum_value(ONIG_OPTION_FIND_NOT_EMPTY));
1140 mrb_define_const(mrb, clazz, "NEGATE_SINGLELINE", mrb_fixnum_value(ONIG_OPTION_NEGATE_SINGLELINE));
1141 mrb_define_const(mrb, clazz, "DONT_CAPTURE_GROUP", mrb_fixnum_value(ONIG_OPTION_DONT_CAPTURE_GROUP));
1142 mrb_define_const(mrb, clazz, "CAPTURE_GROUP", mrb_fixnum_value(ONIG_OPTION_CAPTURE_GROUP));
1143 mrb_define_const(mrb, clazz, "NOTBOL", mrb_fixnum_value(ONIG_OPTION_NOTBOL));
1144 mrb_define_const(mrb, clazz, "NOTEOL", mrb_fixnum_value(ONIG_OPTION_NOTEOL));
1145#ifdef ONIG_OPTION_POSIX_REGION
1146 mrb_define_const(mrb, clazz, "POSIX_REGION", mrb_fixnum_value(ONIG_OPTION_POSIX_REGION));
1147#endif
1148#ifdef ONIG_OPTION_ASCII_RANGE
1149 mrb_define_const(mrb, clazz, "ASCII_RANGE", mrb_fixnum_value(ONIG_OPTION_ASCII_RANGE));
1150#endif
1151#ifdef ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
1152 mrb_define_const(mrb, clazz, "POSIX_BRACKET_ALL_RANGE", mrb_fixnum_value(ONIG_OPTION_POSIX_BRACKET_ALL_RANGE));
1153#endif
1154#ifdef ONIG_OPTION_WORD_BOUND_ALL_RANGE
1155 mrb_define_const(mrb, clazz, "WORD_BOUND_ALL_RANGE", mrb_fixnum_value(ONIG_OPTION_WORD_BOUND_ALL_RANGE));
1156#endif
1157#ifdef ONIG_OPTION_NEWLINE_CRLF
1158 mrb_define_const(mrb, clazz, "NEWLINE_CRLF", mrb_fixnum_value(ONIG_OPTION_NEWLINE_CRLF));
1159#endif
1160#ifdef ONIG_OPTION_NOTBOS
1161 mrb_define_const(mrb, clazz, "NOTBOS", mrb_fixnum_value(ONIG_OPTION_NOTBOS));
1162#endif
1163#ifdef ONIG_OPTION_NOTEOS
1164 mrb_define_const(mrb, clazz, "NOTEOS", mrb_fixnum_value(ONIG_OPTION_NOTEOS));
1165#endif
1166
1167 mrb_define_method(mrb, clazz, "initialize", onig_regexp_initialize, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(2));
1168 mrb_define_method(mrb, clazz, "==", onig_regexp_equal, MRB_ARGS_REQ(1));
1169 mrb_define_method(mrb, clazz, "match", onig_regexp_match, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1));
1170 mrb_define_method(mrb, clazz, "match?", onig_regexp_match_p, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1));
1171 mrb_define_method(mrb, clazz, "casefold?", onig_regexp_casefold_p, MRB_ARGS_NONE());
1172
1173 mrb_define_method(mrb, clazz, "options", onig_regexp_options, MRB_ARGS_NONE());
1174 mrb_define_method(mrb, clazz, "inspect", onig_regexp_inspect, MRB_ARGS_NONE());
1175 mrb_define_method(mrb, clazz, "to_s", onig_regexp_to_s, MRB_ARGS_NONE());
1176
1177 mrb_define_module_function(mrb, clazz, "escape", onig_regexp_escape, MRB_ARGS_REQ(1));
1178 mrb_define_module_function(mrb, clazz, "quote", onig_regexp_escape, MRB_ARGS_REQ(1));
1179 mrb_define_module_function(mrb, clazz, "version", onig_regexp_version, MRB_ARGS_NONE());
1180 mrb_define_module_function(mrb, clazz, "set_global_variables?", onig_regexp_does_set_global_variables, MRB_ARGS_NONE());
1181 mrb_define_module_function(mrb, clazz, "set_global_variables=", onig_regexp_set_set_global_variables, MRB_ARGS_REQ(1));
1182 mrb_define_module_function(mrb, clazz, "clear_global_variables", onig_regexp_clear_global_variables, MRB_ARGS_NONE());
1183
1184 struct RClass* match_data = mrb_define_class(mrb, "OnigMatchData", mrb->object_class);
1185 MRB_SET_INSTANCE_TT(clazz, MRB_TT_DATA);
1186 mrb_undef_class_method(mrb, match_data, "new");
1187
1188 // mrb_define_method(mrb, match_data, "==", &match_data_eq);
1189 mrb_define_method(mrb, match_data, "[]", &match_data_index, MRB_ARGS_REQ(1));
1190 mrb_define_method(mrb, match_data, "begin", &match_data_begin, MRB_ARGS_REQ(1));
1191 mrb_define_method(mrb, match_data, "captures", &match_data_captures, MRB_ARGS_NONE());
1192 mrb_define_method(mrb, match_data, "end", &match_data_end, MRB_ARGS_REQ(1));
1193 // mrb_define_method(mrb, match_data, "eql?", &match_data_eq);
1194 // mrb_define_method(mrb, match_data, "hash", &match_data_hash);
1195 mrb_define_method(mrb, match_data, "initialize_copy", &match_data_copy, MRB_ARGS_REQ(1));
1196 // mrb_define_method(mrb, match_data, "inspect", &match_data_inspect);
1197 mrb_define_method(mrb, match_data, "length", &match_data_length, MRB_ARGS_NONE());
1198 // mrb_define_method(mrb, match_data, "names", &match_data_names);
1199 mrb_define_method(mrb, match_data, "offset", &match_data_offset, MRB_ARGS_REQ(1));
1200 mrb_define_method(mrb, match_data, "post_match", &match_data_post_match, MRB_ARGS_NONE());
1201 mrb_define_method(mrb, match_data, "pre_match", &match_data_pre_match, MRB_ARGS_NONE());
1202 mrb_define_method(mrb, match_data, "regexp", &match_data_regexp, MRB_ARGS_NONE());
1203 mrb_define_method(mrb, match_data, "size", &match_data_length, MRB_ARGS_NONE());
1204 mrb_define_method(mrb, match_data, "string", &match_data_string, MRB_ARGS_NONE());
1205 mrb_define_method(mrb, match_data, "to_a", &match_data_to_a, MRB_ARGS_NONE());
1206 mrb_define_method(mrb, match_data, "to_s", &match_data_to_s, MRB_ARGS_NONE());
1207 // mrb_define_method(mrb, match_data, "values_at", &match_data_values_at);
1208
1209 mrb_define_method(mrb, mrb->string_class, "onig_regexp_gsub", &string_gsub, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1) | MRB_ARGS_BLOCK());
1210 mrb_define_method(mrb, mrb->string_class, "onig_regexp_sub", &string_sub, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1) | MRB_ARGS_BLOCK());
1211 mrb_define_method(mrb, mrb->string_class, "onig_regexp_split", &string_split, MRB_ARGS_REQ(1));
1212 mrb_define_method(mrb, mrb->string_class, "onig_regexp_scan", &string_scan, MRB_ARGS_REQ(1) | MRB_ARGS_BLOCK());
1213 mrb_define_method(mrb, mrb->string_class, "onig_regexp_match?", &string_match_p, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1));
1214}
1215
1216void
1217mrb_mruby_onig_regexp_gem_final(mrb_state* mrb) {
1218 (void)mrb;
1219}
1220
1221// vim:set et:
Note: See TracBrowser for help on using the repository browser.