[279] | 1 | /*
|
---|
| 2 | The MIT License (MIT)
|
---|
| 3 |
|
---|
| 4 | Copyright (c) 2015 mattn.
|
---|
| 5 |
|
---|
| 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
|
---|
| 7 | of this software and associated documentation files (the "Software"), to deal
|
---|
| 8 | in the Software without restriction, including without limitation the rights
|
---|
| 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
---|
| 10 | copies of the Software, and to permit persons to whom the Software is
|
---|
| 11 | furnished to do so, subject to the following conditions:
|
---|
| 12 |
|
---|
| 13 | The above copyright notice and this permission notice shall be included in
|
---|
| 14 | all copies or substantial portions of the Software.
|
---|
| 15 |
|
---|
| 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
---|
| 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
---|
| 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
---|
| 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
---|
| 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
---|
| 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
---|
| 22 | THE SOFTWARE.
|
---|
| 23 | */
|
---|
| 24 | #include <stdio.h>
|
---|
| 25 | #include <string.h>
|
---|
| 26 | #include <ctype.h>
|
---|
[439] | 27 | #include <memory.h>
|
---|
[279] | 28 | #include <mruby.h>
|
---|
| 29 | #include <mruby/class.h>
|
---|
| 30 | #include <mruby/variable.h>
|
---|
| 31 | #include <mruby/array.h>
|
---|
[439] | 32 | #include <mruby/hash.h>
|
---|
[279] | 33 | #include <mruby/string.h>
|
---|
| 34 | #include <mruby/data.h>
|
---|
| 35 | #include <mruby/variable.h>
|
---|
| 36 | #ifdef _MSC_VER
|
---|
| 37 | #define ONIG_EXTERN extern
|
---|
| 38 | #endif
|
---|
[439] | 39 | #ifdef HAVE_ONIGMO_H
|
---|
| 40 | #include <onigmo.h>
|
---|
| 41 | #elif defined(HAVE_ONIGURUMA_H)
|
---|
| 42 | #include <oniguruma.h>
|
---|
| 43 | #else
|
---|
| 44 | #include "oniguruma.h"
|
---|
| 45 | #endif
|
---|
[279] | 46 |
|
---|
| 47 | #ifdef MRUBY_VERSION
|
---|
| 48 | #define mrb_args_int mrb_int
|
---|
| 49 | #else
|
---|
| 50 | #define mrb_args_int int
|
---|
| 51 | #endif
|
---|
| 52 |
|
---|
[439] | 53 | static const char utf8len_codepage[256] =
|
---|
| 54 | {
|
---|
| 55 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 56 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 57 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 58 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 59 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 60 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 61 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
---|
| 62 | 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 63 | };
|
---|
| 64 |
|
---|
| 65 | static mrb_int
|
---|
| 66 | utf8len(const char* p, const char* e)
|
---|
| 67 | {
|
---|
| 68 | mrb_int len;
|
---|
| 69 | mrb_int i;
|
---|
| 70 |
|
---|
| 71 | len = utf8len_codepage[(unsigned char)*p];
|
---|
| 72 | if (p + len > e) return 1;
|
---|
| 73 | for (i = 1; i < len; ++i)
|
---|
| 74 | if ((p[i] & 0xc0) != 0x80)
|
---|
| 75 | return 1;
|
---|
| 76 | return len;
|
---|
| 77 | }
|
---|
| 78 |
|
---|
[279] | 79 | static void
|
---|
| 80 | onig_regexp_free(mrb_state *mrb, void *p) {
|
---|
| 81 | onig_free((OnigRegex) p);
|
---|
| 82 | }
|
---|
| 83 |
|
---|
| 84 | static struct mrb_data_type mrb_onig_regexp_type = {
|
---|
| 85 | "PosixRegexp", onig_regexp_free
|
---|
| 86 | };
|
---|
| 87 |
|
---|
[439] | 88 | #define ONIG_REGEXP_P(obj) \
|
---|
| 89 | ((mrb_type(obj) == MRB_TT_DATA) && (DATA_TYPE(obj) == &mrb_onig_regexp_type))
|
---|
| 90 |
|
---|
[279] | 91 | static void
|
---|
| 92 | match_data_free(mrb_state* mrb, void* p) {
|
---|
| 93 | (void)mrb;
|
---|
| 94 | onig_region_free((OnigRegion*)p, 1);
|
---|
| 95 | }
|
---|
| 96 |
|
---|
| 97 | static struct mrb_data_type mrb_onig_region_type = {
|
---|
| 98 | "OnigRegion", match_data_free
|
---|
| 99 | };
|
---|
| 100 |
|
---|
| 101 | static mrb_value
|
---|
[439] | 102 | str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
|
---|
| 103 | {
|
---|
| 104 | #ifdef MRB_UTF8_STRING
|
---|
| 105 | return mrb_str_new(mrb, RSTRING_PTR(str) + beg, len);
|
---|
| 106 | #else
|
---|
| 107 | return mrb_str_substr(mrb, str, beg, len);
|
---|
| 108 | #endif
|
---|
| 109 | }
|
---|
| 110 |
|
---|
| 111 | static mrb_value
|
---|
[279] | 112 | onig_regexp_initialize(mrb_state *mrb, mrb_value self) {
|
---|
| 113 | mrb_value str, flag = mrb_nil_value(), code = mrb_nil_value();
|
---|
| 114 | mrb_get_args(mrb, "S|oo", &str, &flag, &code);
|
---|
| 115 |
|
---|
| 116 | int cflag = 0;
|
---|
| 117 | OnigEncoding enc = ONIG_ENCODING_UTF8;
|
---|
| 118 | if(mrb_string_p(code)) {
|
---|
| 119 | char const* str_code = mrb_string_value_ptr(mrb, code);
|
---|
| 120 | if(strchr(str_code, 'n') || strchr(str_code, 'N')) {
|
---|
| 121 | enc = ONIG_ENCODING_ASCII;
|
---|
| 122 | }
|
---|
| 123 | }
|
---|
| 124 | if(mrb_nil_p(flag)) {
|
---|
| 125 | } else if(mrb_type(flag) == MRB_TT_TRUE) {
|
---|
| 126 | cflag |= ONIG_OPTION_IGNORECASE;
|
---|
| 127 | } else if(mrb_fixnum_p(flag)) {
|
---|
| 128 | int int_flags = mrb_fixnum(flag);
|
---|
| 129 | if(int_flags & 0x1) { cflag |= ONIG_OPTION_IGNORECASE; }
|
---|
| 130 | if(int_flags & 0x2) { cflag |= ONIG_OPTION_EXTEND; }
|
---|
| 131 | if(int_flags & 0x4) { cflag |= ONIG_OPTION_MULTILINE; }
|
---|
| 132 | } else if(mrb_string_p(flag)) {
|
---|
| 133 | char const* str_flags = mrb_string_value_ptr(mrb, flag);
|
---|
| 134 | if(strchr(str_flags, 'i')) { cflag |= ONIG_OPTION_IGNORECASE; }
|
---|
| 135 | if(strchr(str_flags, 'x')) { cflag |= ONIG_OPTION_EXTEND; }
|
---|
| 136 | if(strchr(str_flags, 'm')) { cflag |= ONIG_OPTION_MULTILINE; }
|
---|
| 137 | } else {
|
---|
| 138 | mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown regexp flag: %S", flag);
|
---|
| 139 | }
|
---|
| 140 |
|
---|
| 141 | OnigErrorInfo einfo;
|
---|
| 142 | OnigRegex reg;
|
---|
| 143 | int result = onig_new(®, (OnigUChar*)RSTRING_PTR(str), (OnigUChar*) RSTRING_PTR(str) + RSTRING_LEN(str),
|
---|
[439] | 144 | cflag, enc, ONIG_SYNTAX_RUBY, &einfo);
|
---|
[279] | 145 | if (result != ONIG_NORMAL) {
|
---|
| 146 | char err[ONIG_MAX_ERROR_MESSAGE_LEN] = "";
|
---|
[439] | 147 | onig_error_code_to_str((OnigUChar*)err, result, &einfo);
|
---|
| 148 | mrb_raisef(mrb, E_REGEXP_ERROR, "'%S' is an invalid regular expression because %S.",
|
---|
[279] | 149 | str, mrb_str_new_cstr(mrb, err));
|
---|
| 150 | }
|
---|
| 151 | mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "@source"), str);
|
---|
| 152 |
|
---|
| 153 | DATA_PTR(self) = reg;
|
---|
| 154 | DATA_TYPE(self) = &mrb_onig_regexp_type;
|
---|
| 155 |
|
---|
| 156 | return self;
|
---|
| 157 | }
|
---|
| 158 |
|
---|
| 159 | static mrb_value
|
---|
| 160 | create_onig_region(mrb_state* mrb, mrb_value const str, mrb_value rex) {
|
---|
| 161 | mrb_assert(mrb_string_p(str));
|
---|
| 162 | mrb_assert(mrb_type(rex) == MRB_TT_DATA && DATA_TYPE(rex) == &mrb_onig_regexp_type);
|
---|
| 163 | mrb_value const c = mrb_obj_value(mrb_data_object_alloc(
|
---|
| 164 | mrb, mrb_class_get(mrb, "OnigMatchData"), onig_region_new(), &mrb_onig_region_type));
|
---|
| 165 | mrb_iv_set(mrb, c, mrb_intern_lit(mrb, "string"), mrb_str_dup(mrb, str));
|
---|
| 166 | mrb_iv_set(mrb, c, mrb_intern_lit(mrb, "regexp"), rex);
|
---|
| 167 | return c;
|
---|
| 168 | }
|
---|
| 169 |
|
---|
[439] | 170 | #define MISMATCH_NIL_OR(v) (result == ONIG_MISMATCH ? mrb_nil_value() : (v))
|
---|
| 171 |
|
---|
[279] | 172 | static int
|
---|
| 173 | onig_match_common(mrb_state* mrb, OnigRegex reg, mrb_value match_value, mrb_value str, int pos) {
|
---|
| 174 | mrb_assert(mrb_string_p(str));
|
---|
| 175 | mrb_assert(DATA_TYPE(match_value) == &mrb_onig_region_type);
|
---|
| 176 | OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
|
---|
| 177 | OnigUChar const* str_ptr = (OnigUChar const*)RSTRING_PTR(str);
|
---|
| 178 | int const result = onig_search(reg, str_ptr, str_ptr + RSTRING_LEN(str),
|
---|
| 179 | str_ptr + pos, str_ptr + RSTRING_LEN(str), match, 0);
|
---|
| 180 | if (result != ONIG_MISMATCH && result < 0) {
|
---|
| 181 | char err[ONIG_MAX_ERROR_MESSAGE_LEN] = "";
|
---|
| 182 | onig_error_code_to_str((OnigUChar*)err, result);
|
---|
| 183 | mrb_raise(mrb, E_REGEXP_ERROR, err);
|
---|
| 184 | }
|
---|
| 185 |
|
---|
| 186 | struct RObject* const cls = (struct RObject*)mrb_class_get(mrb, "OnigRegexp");
|
---|
[439] | 187 | mrb_obj_iv_set(mrb, cls, mrb_intern_lit(mrb, "@last_match"), MISMATCH_NIL_OR(match_value));
|
---|
[279] | 188 |
|
---|
[439] | 189 | if (mrb_class_get(mrb, "Regexp") == (struct RClass*)cls &&
|
---|
| 190 | mrb_bool(mrb_obj_iv_get(mrb, (struct RObject*)cls, mrb_intern_lit(mrb, "@set_global_variables"))))
|
---|
[279] | 191 | {
|
---|
[439] | 192 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$~"),
|
---|
| 193 | MISMATCH_NIL_OR(match_value));
|
---|
[279] | 194 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$&"),
|
---|
[439] | 195 | MISMATCH_NIL_OR(mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(0))));
|
---|
| 196 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$`"),
|
---|
| 197 | MISMATCH_NIL_OR(mrb_funcall(mrb, match_value, "pre_match", 0)));
|
---|
| 198 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$'"),
|
---|
| 199 | MISMATCH_NIL_OR(mrb_funcall(mrb, match_value, "post_match", 0)));
|
---|
[279] | 200 | mrb_gv_set(mrb, mrb_intern_lit(mrb, "$+"),
|
---|
[439] | 201 | MISMATCH_NIL_OR(mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(match->num_regs - 1))));
|
---|
[279] | 202 |
|
---|
| 203 | // $1 to $9
|
---|
| 204 | int idx = 1;
|
---|
| 205 | int const idx_max = match->num_regs > 10? 10 : match->num_regs;
|
---|
| 206 | for(; idx < idx_max; ++idx) {
|
---|
| 207 | char const n[] = { '$', '0' + idx };
|
---|
| 208 | mrb_gv_set(mrb, mrb_intern(mrb, n, 2),
|
---|
| 209 | mrb_funcall(mrb, match_value, "[]", 1, mrb_fixnum_value(idx)));
|
---|
| 210 | }
|
---|
| 211 |
|
---|
| 212 | for(; idx < 10; ++idx) {
|
---|
| 213 | char const n[] = { '$', '0' + idx };
|
---|
| 214 | mrb_gv_remove(mrb, mrb_intern(mrb, n, 2));
|
---|
| 215 | }
|
---|
| 216 | }
|
---|
| 217 |
|
---|
| 218 | return result;
|
---|
| 219 | }
|
---|
| 220 |
|
---|
| 221 | static mrb_value
|
---|
[439] | 222 | reg_operand(mrb_state *mrb, mrb_value obj) {
|
---|
| 223 | mrb_value ret;
|
---|
| 224 |
|
---|
| 225 | if (mrb_symbol_p(obj)) {
|
---|
| 226 | ret = mrb_sym2str(mrb, mrb_symbol(obj));
|
---|
| 227 | if (mrb_undef_p(ret)) {
|
---|
| 228 | mrb_bug(mrb, "can not intern %S", obj);
|
---|
| 229 | }
|
---|
| 230 | }
|
---|
| 231 | else {
|
---|
| 232 | ret = mrb_string_type(mrb, obj);
|
---|
| 233 | }
|
---|
| 234 | return ret;
|
---|
| 235 | }
|
---|
| 236 |
|
---|
| 237 | static mrb_value
|
---|
[279] | 238 | onig_regexp_match(mrb_state *mrb, mrb_value self) {
|
---|
| 239 | mrb_value str = mrb_nil_value();
|
---|
| 240 | OnigRegex reg;
|
---|
| 241 | mrb_int pos = 0;
|
---|
[439] | 242 | mrb_value block = mrb_nil_value();
|
---|
[279] | 243 |
|
---|
[439] | 244 | mrb_get_args(mrb, "o|i&", &str, &pos, &block);
|
---|
| 245 | if (mrb_nil_p(str)) {
|
---|
| 246 | return mrb_nil_value();
|
---|
| 247 | }
|
---|
| 248 | str = reg_operand(mrb, str);
|
---|
| 249 | if (pos < 0 || (pos > 0 && pos >= RSTRING_LEN(str))) {
|
---|
| 250 | return mrb_nil_value();
|
---|
| 251 | }
|
---|
| 252 |
|
---|
| 253 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
| 254 |
|
---|
| 255 | mrb_value const ret = create_onig_region(mrb, str, self);
|
---|
| 256 | if (onig_match_common(mrb, reg, ret, str, pos) == ONIG_MISMATCH) {
|
---|
| 257 | return mrb_nil_value();
|
---|
| 258 | }
|
---|
| 259 |
|
---|
| 260 | if (mrb_nil_p(block)) {
|
---|
| 261 | return ret;
|
---|
| 262 | } else {
|
---|
| 263 | return mrb_yield(mrb, block, ret);
|
---|
| 264 | }
|
---|
| 265 | }
|
---|
| 266 |
|
---|
| 267 | static mrb_value
|
---|
| 268 | onig_regexp_match_p(mrb_state *mrb, mrb_value self) {
|
---|
| 269 | mrb_value str = mrb_nil_value();
|
---|
| 270 | mrb_int pos = 0;
|
---|
| 271 | OnigRegex reg;
|
---|
| 272 | OnigUChar const* str_ptr;
|
---|
| 273 |
|
---|
[279] | 274 | mrb_get_args(mrb, "o|i", &str, &pos);
|
---|
[439] | 275 | if (mrb_nil_p(str)) {
|
---|
| 276 | return mrb_nil_value();
|
---|
| 277 | }
|
---|
| 278 | str = reg_operand(mrb, str);
|
---|
[279] | 279 | if (pos < 0 || (pos > 0 && pos >= RSTRING_LEN(str))) {
|
---|
| 280 | return mrb_nil_value();
|
---|
| 281 | }
|
---|
| 282 |
|
---|
[439] | 283 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
| 284 | str_ptr = (OnigUChar const*)RSTRING_PTR(str);
|
---|
| 285 | return mrb_bool_value(onig_search(
|
---|
| 286 | reg, str_ptr, str_ptr + RSTRING_LEN(str),
|
---|
| 287 | str_ptr + pos, str_ptr + RSTRING_LEN(str), NULL, 0) != ONIG_MISMATCH);
|
---|
| 288 | }
|
---|
| 289 |
|
---|
| 290 | static mrb_value
|
---|
| 291 | string_match_p(mrb_state *mrb, mrb_value self) {
|
---|
| 292 | mrb_value str = self;
|
---|
| 293 | mrb_int pos = 0;
|
---|
| 294 | OnigRegex reg;
|
---|
| 295 | OnigUChar const* str_ptr;
|
---|
| 296 |
|
---|
| 297 | mrb_get_args(mrb, "d|i", ®, &mrb_onig_regexp_type, &pos);
|
---|
| 298 | if (pos < 0 || (pos > 0 && pos >= RSTRING_LEN(str))) {
|
---|
| 299 | return mrb_nil_value();
|
---|
| 300 | }
|
---|
| 301 |
|
---|
[279] | 302 | if (mrb_nil_p(str)) {
|
---|
| 303 | return mrb_nil_value();
|
---|
| 304 | }
|
---|
| 305 | str = mrb_string_type(mrb, str);
|
---|
| 306 |
|
---|
[439] | 307 | str_ptr = (OnigUChar const*)RSTRING_PTR(str);
|
---|
| 308 | return mrb_bool_value(onig_search(
|
---|
| 309 | reg, str_ptr, str_ptr + RSTRING_LEN(str),
|
---|
| 310 | str_ptr + pos, str_ptr + RSTRING_LEN(str), NULL, 0) != ONIG_MISMATCH);
|
---|
[279] | 311 | }
|
---|
| 312 |
|
---|
| 313 | static mrb_value
|
---|
| 314 | onig_regexp_equal(mrb_state *mrb, mrb_value self) {
|
---|
| 315 | mrb_value other;
|
---|
| 316 | OnigRegex self_reg, other_reg;
|
---|
| 317 |
|
---|
| 318 | mrb_get_args(mrb, "o", &other);
|
---|
| 319 | if (mrb_obj_equal(mrb, self, other)){
|
---|
| 320 | return mrb_true_value();
|
---|
| 321 | }
|
---|
| 322 | if (mrb_nil_p(other)) {
|
---|
| 323 | return mrb_false_value();
|
---|
| 324 | }
|
---|
| 325 | if (!mrb_obj_is_kind_of(mrb, other, mrb_class_get(mrb, "OnigRegexp"))) {
|
---|
| 326 | return mrb_false_value();
|
---|
| 327 | }
|
---|
| 328 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, self_reg);
|
---|
| 329 | Data_Get_Struct(mrb, other, &mrb_onig_regexp_type, other_reg);
|
---|
| 330 |
|
---|
| 331 | if (!self_reg || !other_reg){
|
---|
| 332 | mrb_raise(mrb, E_RUNTIME_ERROR, "Invalid OnigRegexp");
|
---|
| 333 | }
|
---|
| 334 | if (onig_get_options(self_reg) != onig_get_options(other_reg)){
|
---|
| 335 | return mrb_false_value();
|
---|
| 336 | }
|
---|
| 337 | return mrb_str_equal(mrb, mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source")), mrb_iv_get(mrb, other, mrb_intern_lit(mrb, "@source"))) ?
|
---|
| 338 | mrb_true_value() : mrb_false_value();
|
---|
| 339 | }
|
---|
| 340 |
|
---|
| 341 | static mrb_value
|
---|
| 342 | onig_regexp_casefold_p(mrb_state *mrb, mrb_value self) {
|
---|
| 343 | OnigRegex reg;
|
---|
| 344 |
|
---|
| 345 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
| 346 | return (onig_get_options(reg) & ONIG_OPTION_IGNORECASE) ? mrb_true_value() : mrb_false_value();
|
---|
| 347 | }
|
---|
| 348 |
|
---|
| 349 | static mrb_value
|
---|
| 350 | onig_regexp_options(mrb_state *mrb, mrb_value self) {
|
---|
| 351 | OnigRegex reg;
|
---|
| 352 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
| 353 | return mrb_fixnum_value(onig_get_options(reg));
|
---|
| 354 | }
|
---|
| 355 |
|
---|
| 356 | static char *
|
---|
| 357 | option_to_str(char str[4], int options) {
|
---|
| 358 | char *p = str;
|
---|
| 359 | if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
|
---|
| 360 | if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
|
---|
| 361 | if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
|
---|
| 362 | *p = 0;
|
---|
| 363 | return str;
|
---|
| 364 | }
|
---|
| 365 |
|
---|
| 366 | static mrb_value
|
---|
| 367 | regexp_expr_str(mrb_state *mrb, mrb_value str, const char *p, int len) {
|
---|
| 368 | const char *pend;
|
---|
| 369 | char buf[5];
|
---|
| 370 |
|
---|
| 371 | pend = (const char *) p + len;
|
---|
| 372 | for (;p < pend; p++) {
|
---|
| 373 | unsigned char c, cc;
|
---|
| 374 |
|
---|
| 375 | c = *p;
|
---|
[439] | 376 | if (c == '/') {
|
---|
[279] | 377 | buf[0] = '\\'; buf[1] = c;
|
---|
| 378 | mrb_str_cat(mrb, str, buf, 2);
|
---|
| 379 | continue;
|
---|
| 380 | }
|
---|
| 381 | if (ISPRINT(c)) {
|
---|
| 382 | buf[0] = c;
|
---|
| 383 | mrb_str_cat(mrb, str, buf, 1);
|
---|
| 384 | continue;
|
---|
| 385 | }
|
---|
| 386 | switch (c) {
|
---|
| 387 | case '\n': cc = 'n'; break;
|
---|
| 388 | case '\r': cc = 'r'; break;
|
---|
| 389 | case '\t': cc = 't'; break;
|
---|
| 390 | default: cc = 0; break;
|
---|
| 391 | }
|
---|
| 392 | if (cc) {
|
---|
| 393 | buf[0] = '\\';
|
---|
| 394 | buf[1] = (char)cc;
|
---|
| 395 | mrb_str_cat(mrb, str, buf, 2);
|
---|
| 396 | continue;
|
---|
| 397 | }
|
---|
| 398 | else {
|
---|
| 399 | buf[0] = '\\';
|
---|
| 400 | buf[3] = '0' + c % 8; c /= 8;
|
---|
| 401 | buf[2] = '0' + c % 8; c /= 8;
|
---|
| 402 | buf[1] = '0' + c % 8;
|
---|
| 403 | mrb_str_cat(mrb, str, buf, 4);
|
---|
| 404 | continue;
|
---|
| 405 | }
|
---|
| 406 | }
|
---|
| 407 | return str;
|
---|
| 408 | }
|
---|
| 409 |
|
---|
| 410 | static mrb_value
|
---|
| 411 | onig_regexp_inspect(mrb_state *mrb, mrb_value self) {
|
---|
| 412 | OnigRegex reg;
|
---|
| 413 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
| 414 | mrb_value str = mrb_str_new_lit(mrb, "/");
|
---|
| 415 | mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source"));
|
---|
| 416 | regexp_expr_str(mrb, str, (const char *)RSTRING_PTR(src), RSTRING_LEN(src));
|
---|
| 417 | mrb_str_cat_lit(mrb, str, "/");
|
---|
| 418 | char opts[4];
|
---|
| 419 | if (*option_to_str(opts, onig_get_options(reg))) {
|
---|
| 420 | mrb_str_cat_cstr(mrb, str, opts);
|
---|
| 421 | }
|
---|
| 422 | if (onig_get_encoding(reg) == ONIG_ENCODING_ASCII) {
|
---|
| 423 | mrb_str_cat_lit(mrb, str, "n");
|
---|
| 424 | }
|
---|
| 425 | return str;
|
---|
| 426 | }
|
---|
| 427 |
|
---|
| 428 | static mrb_value
|
---|
| 429 | onig_regexp_to_s(mrb_state *mrb, mrb_value self) {
|
---|
| 430 | int options;
|
---|
| 431 | const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
|
---|
| 432 | long len;
|
---|
| 433 | const char* ptr;
|
---|
| 434 | mrb_value str = mrb_str_new_lit(mrb, "(?");
|
---|
| 435 | char optbuf[5];
|
---|
| 436 |
|
---|
| 437 | OnigRegex reg;
|
---|
| 438 | Data_Get_Struct(mrb, self, &mrb_onig_regexp_type, reg);
|
---|
| 439 | options = onig_get_options(reg);
|
---|
| 440 | mrb_value src = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "@source"));
|
---|
| 441 | ptr = RSTRING_PTR(src);
|
---|
| 442 | len = RSTRING_LEN(src);
|
---|
| 443 |
|
---|
| 444 | again:
|
---|
| 445 | if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
|
---|
[439] | 446 | int err = 1;
|
---|
| 447 | ptr += 2;
|
---|
| 448 | if ((len -= 2) > 0) {
|
---|
[279] | 449 | do {
|
---|
| 450 | if(strchr(ptr, 'i')) { options |= ONIG_OPTION_IGNORECASE; }
|
---|
| 451 | if(strchr(ptr, 'x')) { options |= ONIG_OPTION_EXTEND; }
|
---|
| 452 | if(strchr(ptr, 'm')) { options |= ONIG_OPTION_MULTILINE; }
|
---|
[439] | 453 | ++ptr;
|
---|
[279] | 454 | } while (--len > 0);
|
---|
[439] | 455 | }
|
---|
| 456 | if (len > 1 && *ptr == '-') {
|
---|
[279] | 457 | ++ptr;
|
---|
| 458 | --len;
|
---|
| 459 | do {
|
---|
| 460 | if(strchr(ptr, 'i')) { options &= ~ONIG_OPTION_IGNORECASE; }
|
---|
| 461 | if(strchr(ptr, 'x')) { options &= ~ONIG_OPTION_EXTEND; }
|
---|
| 462 | if(strchr(ptr, 'm')) { options &= ~ONIG_OPTION_MULTILINE; }
|
---|
[439] | 463 | ++ptr;
|
---|
[279] | 464 | } while (--len > 0);
|
---|
[439] | 465 | }
|
---|
| 466 | if (*ptr == ')') {
|
---|
[279] | 467 | --len;
|
---|
| 468 | ++ptr;
|
---|
| 469 | goto again;
|
---|
[439] | 470 | }
|
---|
| 471 | if (*ptr == ':' && ptr[len-1] == ')') {
|
---|
[279] | 472 | OnigRegex rp;
|
---|
| 473 | ++ptr;
|
---|
| 474 | len -= 2;
|
---|
| 475 | err = onig_new(&rp, (OnigUChar*)ptr, (OnigUChar*)ptr + len, ONIG_OPTION_DEFAULT,
|
---|
| 476 | ONIG_ENCODING_UTF8, OnigDefaultSyntax, NULL);
|
---|
| 477 | onig_free(rp);
|
---|
[439] | 478 | }
|
---|
| 479 | if (err) {
|
---|
[279] | 480 | options = onig_get_options(reg);
|
---|
| 481 | ptr = RSTRING_PTR(src);
|
---|
| 482 | len = RSTRING_LEN(src);
|
---|
[439] | 483 | }
|
---|
[279] | 484 | }
|
---|
| 485 |
|
---|
| 486 | if (*option_to_str(optbuf, options)) mrb_str_cat_cstr(mrb, str, optbuf);
|
---|
| 487 |
|
---|
| 488 | if ((options & embeddable) != embeddable) {
|
---|
[439] | 489 | optbuf[0] = '-';
|
---|
| 490 | option_to_str(optbuf + 1, ~options);
|
---|
| 491 | mrb_str_cat_cstr(mrb, str, optbuf);
|
---|
[279] | 492 | }
|
---|
| 493 |
|
---|
| 494 | mrb_str_cat_cstr(mrb, str, ":");
|
---|
| 495 | regexp_expr_str(mrb, str, ptr, len);
|
---|
| 496 | mrb_str_cat_cstr(mrb, str, ")");
|
---|
| 497 | return str;
|
---|
| 498 | }
|
---|
| 499 |
|
---|
| 500 |
|
---|
| 501 | static mrb_value
|
---|
| 502 | onig_regexp_version(mrb_state* mrb, mrb_value self) {
|
---|
| 503 | (void)self;
|
---|
| 504 | return mrb_str_new_cstr(mrb, onig_version());
|
---|
| 505 | }
|
---|
| 506 |
|
---|
| 507 | static mrb_value
|
---|
| 508 | match_data_to_a(mrb_state* mrb, mrb_value self);
|
---|
| 509 |
|
---|
| 510 | static mrb_int
|
---|
| 511 | match_data_actual_index(mrb_state* mrb, mrb_value self, mrb_value idx_value) {
|
---|
| 512 | if(mrb_fixnum_p(idx_value)) { return mrb_fixnum(idx_value); }
|
---|
| 513 |
|
---|
| 514 | char const* name = NULL;
|
---|
| 515 | char const* name_end = NULL;
|
---|
| 516 | if(mrb_symbol_p(idx_value)) {
|
---|
| 517 | mrb_int sym_len;
|
---|
| 518 | name = mrb_sym2name_len(mrb, mrb_symbol(idx_value), &sym_len);
|
---|
| 519 | name_end = name + sym_len;
|
---|
| 520 | } else if(mrb_string_p(idx_value)) {
|
---|
| 521 | name = mrb_string_value_ptr(mrb, idx_value);
|
---|
| 522 | name_end = name + strlen(name);
|
---|
| 523 | } else { mrb_assert(FALSE); }
|
---|
| 524 | mrb_assert(name && name_end);
|
---|
| 525 |
|
---|
| 526 | mrb_value const regexp = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "regexp"));
|
---|
| 527 | mrb_assert(!mrb_nil_p(regexp));
|
---|
| 528 | mrb_assert(DATA_TYPE(regexp) == &mrb_onig_regexp_type);
|
---|
| 529 | mrb_assert(DATA_TYPE(self) == &mrb_onig_region_type);
|
---|
| 530 | int const idx = onig_name_to_backref_number(
|
---|
| 531 | (OnigRegex)DATA_PTR(regexp), (OnigUChar const*)name, (OnigUChar const*)name_end,
|
---|
| 532 | (OnigRegion*)DATA_PTR(self));
|
---|
| 533 | if (idx < 0) {
|
---|
| 534 | mrb_raisef(mrb, E_INDEX_ERROR, "undefined group name reference: %S", idx_value);
|
---|
| 535 | }
|
---|
| 536 | return idx;
|
---|
| 537 | }
|
---|
| 538 |
|
---|
| 539 | // ISO 15.2.16.3.1
|
---|
| 540 | static mrb_value
|
---|
| 541 | match_data_index(mrb_state* mrb, mrb_value self) {
|
---|
| 542 | mrb_value src;
|
---|
| 543 | mrb_int argc; mrb_value *argv;
|
---|
| 544 |
|
---|
| 545 | mrb_get_args(mrb, "*", &argv, &argc);
|
---|
| 546 |
|
---|
| 547 | src = match_data_to_a(mrb, self);
|
---|
| 548 |
|
---|
| 549 | if (argc == 1) {
|
---|
| 550 | switch (mrb_type(argv[0])) {
|
---|
| 551 | case MRB_TT_FIXNUM:
|
---|
| 552 | case MRB_TT_SYMBOL:
|
---|
| 553 | case MRB_TT_STRING:
|
---|
| 554 | return mrb_ary_entry(src, match_data_actual_index(mrb, self, argv[0]));
|
---|
| 555 | default: break;
|
---|
| 556 | }
|
---|
| 557 | }
|
---|
| 558 |
|
---|
| 559 | return mrb_funcall_argv(mrb, src, mrb_intern_lit(mrb, "[]"), argc, argv);
|
---|
| 560 | }
|
---|
| 561 |
|
---|
| 562 | #define match_data_check_index(idx) \
|
---|
| 563 | if(idx < 0 || reg->num_regs <= idx) \
|
---|
| 564 | mrb_raisef(mrb, E_INDEX_ERROR, "index %S out of matches", mrb_fixnum_value(idx)) \
|
---|
| 565 |
|
---|
| 566 | // ISO 15.2.16.3.2
|
---|
| 567 | static mrb_value
|
---|
| 568 | match_data_begin(mrb_state* mrb, mrb_value self) {
|
---|
| 569 | mrb_value idx_value;
|
---|
| 570 | mrb_get_args(mrb, "o", &idx_value);
|
---|
| 571 | OnigRegion* reg;
|
---|
| 572 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
| 573 | mrb_int const idx = match_data_actual_index(mrb, self, idx_value);
|
---|
| 574 | match_data_check_index(idx);
|
---|
| 575 | return mrb_fixnum_value(reg->beg[idx]);
|
---|
| 576 | }
|
---|
| 577 |
|
---|
| 578 | // ISO 15.2.16.3.3
|
---|
| 579 | static mrb_value
|
---|
| 580 | match_data_captures(mrb_state* mrb, mrb_value self) {
|
---|
| 581 | mrb_value ary = match_data_to_a(mrb, self);
|
---|
| 582 | return mrb_ary_new_from_values(mrb, RARRAY_LEN(ary) - 1, RARRAY_PTR(ary) + 1);
|
---|
| 583 | }
|
---|
| 584 |
|
---|
| 585 | // ISO 15.2.16.3.4
|
---|
| 586 | static mrb_value
|
---|
| 587 | match_data_end(mrb_state* mrb, mrb_value self) {
|
---|
| 588 | mrb_value idx_value;
|
---|
| 589 | mrb_get_args(mrb, "o", &idx_value);
|
---|
| 590 | OnigRegion* reg;
|
---|
| 591 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
| 592 | mrb_int const idx = match_data_actual_index(mrb, self, idx_value);
|
---|
| 593 | match_data_check_index(idx);
|
---|
| 594 | return mrb_fixnum_value(reg->end[idx]);
|
---|
| 595 | }
|
---|
| 596 |
|
---|
| 597 | // ISO 15.2.16.3.5
|
---|
| 598 | static mrb_value
|
---|
| 599 | match_data_copy(mrb_state* mrb, mrb_value self) {
|
---|
| 600 | mrb_value src_val;
|
---|
| 601 | mrb_get_args(mrb, "o", &src_val);
|
---|
| 602 |
|
---|
| 603 | OnigRegion* src;
|
---|
| 604 | Data_Get_Struct(mrb, src_val, &mrb_onig_region_type, src);
|
---|
| 605 |
|
---|
| 606 | OnigRegion* dst = onig_region_new();
|
---|
| 607 | onig_region_copy(dst, src);
|
---|
| 608 |
|
---|
| 609 | DATA_PTR(self) = dst;
|
---|
| 610 | DATA_TYPE(self) = &mrb_onig_region_type;
|
---|
| 611 | mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "string"), mrb_iv_get(mrb, src_val, mrb_intern_lit(mrb, "string")));
|
---|
| 612 | mrb_iv_set(mrb, self, mrb_intern_lit(mrb, "regexp"), mrb_iv_get(mrb, src_val, mrb_intern_lit(mrb, "regexp")));
|
---|
| 613 | return self;
|
---|
| 614 | }
|
---|
| 615 |
|
---|
| 616 | // ISO 15.2.16.3.6
|
---|
| 617 | // ISO 15.2.16.3.10
|
---|
| 618 | static mrb_value
|
---|
| 619 | match_data_length(mrb_state* mrb, mrb_value self) {
|
---|
| 620 | OnigRegion* reg;
|
---|
| 621 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
| 622 | return mrb_fixnum_value(reg->num_regs);
|
---|
| 623 | }
|
---|
| 624 |
|
---|
| 625 | // ISO 15.2.16.3.7
|
---|
| 626 | static mrb_value
|
---|
| 627 | match_data_offset(mrb_state* mrb, mrb_value self) {
|
---|
| 628 | mrb_value idx_value;
|
---|
| 629 | mrb_get_args(mrb, "o", &idx_value);
|
---|
| 630 | OnigRegion* reg;
|
---|
| 631 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
| 632 | mrb_int const idx = match_data_actual_index(mrb, self, idx_value);
|
---|
| 633 | match_data_check_index(idx);
|
---|
| 634 | mrb_value ret = mrb_ary_new_capa(mrb, 2);
|
---|
| 635 | mrb_ary_push(mrb, ret, mrb_fixnum_value(reg->beg[idx]));
|
---|
| 636 | mrb_ary_push(mrb, ret, mrb_fixnum_value(reg->end[idx]));
|
---|
| 637 | return ret;
|
---|
| 638 | }
|
---|
| 639 |
|
---|
| 640 | // ISO 15.2.16.3.8
|
---|
| 641 | static mrb_value
|
---|
| 642 | match_data_post_match(mrb_state* mrb, mrb_value self) {
|
---|
| 643 | OnigRegion* reg;
|
---|
| 644 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
| 645 | mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
[439] | 646 | return str_substr(mrb, str, reg->end[0], RSTRING_LEN(str) - reg->end[0]);
|
---|
[279] | 647 | }
|
---|
| 648 |
|
---|
| 649 | // ISO 15.2.16.3.9
|
---|
| 650 | static mrb_value
|
---|
| 651 | match_data_pre_match(mrb_state* mrb, mrb_value self) {
|
---|
| 652 | OnigRegion* reg;
|
---|
| 653 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
| 654 | mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
[439] | 655 | return str_substr(mrb, str, 0, reg->beg[0]);
|
---|
[279] | 656 | }
|
---|
| 657 |
|
---|
| 658 | // ISO 15.2.16.3.11
|
---|
| 659 | static mrb_value
|
---|
| 660 | match_data_string(mrb_state* mrb, mrb_value self) {
|
---|
| 661 | return mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
| 662 | }
|
---|
| 663 |
|
---|
| 664 | static mrb_value
|
---|
| 665 | match_data_regexp(mrb_state* mrb, mrb_value self) {
|
---|
| 666 | return mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "regexp"));
|
---|
| 667 | }
|
---|
| 668 |
|
---|
| 669 | // ISO 15.2.16.3.12
|
---|
| 670 | static mrb_value
|
---|
| 671 | match_data_to_a(mrb_state* mrb, mrb_value self) {
|
---|
| 672 | mrb_value cache = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "cache"));
|
---|
| 673 | if(!mrb_nil_p(cache)) {
|
---|
| 674 | return cache;
|
---|
| 675 | }
|
---|
| 676 |
|
---|
| 677 | mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
| 678 | OnigRegion* reg;
|
---|
| 679 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
| 680 |
|
---|
| 681 | mrb_value ret = mrb_ary_new_capa(mrb, reg->num_regs);
|
---|
| 682 | int i, ai = mrb_gc_arena_save(mrb);
|
---|
| 683 | for(i = 0; i < reg->num_regs; ++i) {
|
---|
| 684 | if(reg->beg[i] == ONIG_REGION_NOTPOS) {
|
---|
| 685 | mrb_ary_push(mrb, ret, mrb_nil_value());
|
---|
| 686 | } else {
|
---|
[439] | 687 | mrb_ary_push(mrb, ret, str_substr(mrb, str, reg->beg[i], reg->end[i] - reg->beg[i]));
|
---|
[279] | 688 | }
|
---|
| 689 | mrb_gc_arena_restore(mrb, ai);
|
---|
| 690 | }
|
---|
| 691 | return ret;
|
---|
| 692 | }
|
---|
| 693 |
|
---|
| 694 | // ISO 15.2.16.3.13
|
---|
| 695 | static mrb_value
|
---|
| 696 | match_data_to_s(mrb_state* mrb, mrb_value self) {
|
---|
| 697 | mrb_value str = mrb_iv_get(mrb, self, mrb_intern_lit(mrb, "string"));
|
---|
| 698 | OnigRegion* reg;
|
---|
| 699 | Data_Get_Struct(mrb, self, &mrb_onig_region_type, reg);
|
---|
[439] | 700 | return str_substr(mrb, str, reg->beg[0], reg->end[0] - reg->beg[0]);
|
---|
[279] | 701 | }
|
---|
| 702 |
|
---|
| 703 | static void
|
---|
| 704 | append_replace_str(mrb_state* mrb, mrb_value result, mrb_value replace,
|
---|
| 705 | mrb_value src, OnigRegex reg, OnigRegion* match)
|
---|
| 706 | {
|
---|
[439] | 707 | if (mrb_hash_p(replace)) {
|
---|
| 708 | mrb_value v = mrb_hash_get(mrb, replace, mrb_str_substr(mrb, src, match->beg[0], match->end[0] - match->beg[0]));
|
---|
| 709 | v = mrb_str_to_str(mrb, v);
|
---|
| 710 | mrb_str_cat_str(mrb, result, v);
|
---|
| 711 | return;
|
---|
| 712 | }
|
---|
| 713 |
|
---|
[279] | 714 | mrb_assert(mrb_string_p(replace));
|
---|
| 715 | char const* ch;
|
---|
| 716 | char const* const end = RSTRING_PTR(replace) + RSTRING_LEN(replace);
|
---|
| 717 | for(ch = RSTRING_PTR(replace); ch < end; ++ch) {
|
---|
| 718 | if (*ch != '\\' || (ch + 1) >= end) {
|
---|
| 719 | mrb_str_cat(mrb, result, ch, 1);
|
---|
| 720 | continue;
|
---|
| 721 | }
|
---|
| 722 |
|
---|
| 723 | switch(*(++ch)) { // skip back slash and get next char
|
---|
| 724 | case 'k': { // group name
|
---|
| 725 | if ((ch + 2) >= end || ch[1] != '<') { goto replace_expr_error; }
|
---|
| 726 | char const* name_beg = ch += 2;
|
---|
| 727 | while (*ch != '>') { if(++ch == end) { goto replace_expr_error; } }
|
---|
| 728 | mrb_assert(ch < end);
|
---|
| 729 | mrb_assert(*ch == '>');
|
---|
| 730 | int const idx = onig_name_to_backref_number(
|
---|
| 731 | reg, (OnigUChar const*)name_beg, (OnigUChar const*)ch, match);
|
---|
| 732 | if (idx < 0) {
|
---|
| 733 | mrb_raisef(mrb, E_INDEX_ERROR, "undefined group name reference: %S",
|
---|
[439] | 734 | str_substr(mrb, replace, name_beg - RSTRING_PTR(replace), ch - name_beg));
|
---|
[279] | 735 | }
|
---|
| 736 | mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]);
|
---|
| 737 | } break;
|
---|
| 738 |
|
---|
| 739 | case '\\': // escaped back slash
|
---|
| 740 | mrb_str_cat(mrb, result, ch, 1);
|
---|
| 741 | break;
|
---|
| 742 |
|
---|
| 743 | default:
|
---|
| 744 | if (isdigit(*ch)) { // group number 0-9
|
---|
| 745 | int const idx = *ch - '0';
|
---|
[439] | 746 | if (idx < match->num_regs) {
|
---|
| 747 | mrb_str_cat(mrb, result, RSTRING_PTR(src) + match->beg[idx], match->end[idx] - match->beg[idx]);
|
---|
[279] | 748 | }
|
---|
| 749 | } else {
|
---|
| 750 | char const str[] = { '\\', *ch };
|
---|
| 751 | mrb_str_cat(mrb, result, str, 2);
|
---|
| 752 | }
|
---|
| 753 | break;
|
---|
| 754 | }
|
---|
| 755 | }
|
---|
| 756 |
|
---|
| 757 | if(ch == end) { return; }
|
---|
| 758 |
|
---|
| 759 | replace_expr_error:
|
---|
| 760 | mrb_raisef(mrb, E_REGEXP_ERROR, "invalid replace expression: %S", replace);
|
---|
| 761 | }
|
---|
| 762 |
|
---|
| 763 | // ISO 15.2.10.5.18
|
---|
| 764 | static mrb_value
|
---|
| 765 | string_gsub(mrb_state* mrb, mrb_value self) {
|
---|
| 766 | mrb_value blk, match_expr, replace_expr = mrb_nil_value();
|
---|
[439] | 767 | int const argc = mrb_get_args(mrb, "&o|o", &blk, &match_expr, &replace_expr);
|
---|
[279] | 768 |
|
---|
[439] | 769 | if(!ONIG_REGEXP_P(match_expr)) {
|
---|
[279] | 770 | mrb_value argv[] = { match_expr, replace_expr };
|
---|
| 771 | return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_gsub"), argc, argv, blk);
|
---|
| 772 | }
|
---|
| 773 |
|
---|
[439] | 774 | if(argc == 1 && mrb_nil_p(blk)) {
|
---|
| 775 | return mrb_funcall(mrb, self, "to_enum", 2, mrb_symbol_value(mrb_intern_lit(mrb, "onig_regexp_gsub")), match_expr);
|
---|
| 776 | }
|
---|
| 777 |
|
---|
[279] | 778 | if(!mrb_nil_p(blk) && !mrb_nil_p(replace_expr)) {
|
---|
[439] | 779 | blk = mrb_nil_value();
|
---|
[279] | 780 | }
|
---|
| 781 |
|
---|
[439] | 782 | if (mrb_nil_p(blk) && !mrb_hash_p(replace_expr)) {
|
---|
| 783 | replace_expr = mrb_string_type(mrb, replace_expr);
|
---|
| 784 | }
|
---|
| 785 |
|
---|
[279] | 786 | OnigRegex reg;
|
---|
| 787 | Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg);
|
---|
| 788 | mrb_value const result = mrb_str_new(mrb, NULL, 0);
|
---|
| 789 | mrb_value const match_value = create_onig_region(mrb, self, match_expr);
|
---|
| 790 | OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
|
---|
| 791 | int last_end_pos = 0;
|
---|
| 792 |
|
---|
| 793 | while(1) {
|
---|
| 794 | if(onig_match_common(mrb, reg, match_value, self, last_end_pos) == ONIG_MISMATCH) { break; }
|
---|
| 795 |
|
---|
| 796 | mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, match->beg[0] - last_end_pos);
|
---|
| 797 |
|
---|
| 798 | if(mrb_nil_p(blk)) {
|
---|
| 799 | append_replace_str(mrb, result, replace_expr, self, reg, match);
|
---|
| 800 | } else {
|
---|
[439] | 801 | mrb_value const tmp_str = mrb_str_to_str(mrb, mrb_yield(mrb, blk, str_substr(
|
---|
[279] | 802 | mrb, self, match->beg[0], match->end[0] - match->beg[0])));
|
---|
| 803 | mrb_assert(mrb_string_p(tmp_str));
|
---|
| 804 | mrb_str_concat(mrb, result, tmp_str);
|
---|
| 805 | }
|
---|
| 806 |
|
---|
| 807 | last_end_pos = match->end[0];
|
---|
[439] | 808 | if (match->beg[0] == match->end[0]) {
|
---|
| 809 | /*
|
---|
| 810 | * Always consume at least one character of the input string
|
---|
| 811 | * in order to prevent infinite loops.
|
---|
| 812 | */
|
---|
| 813 | char* p = RSTRING_PTR(self) + last_end_pos;
|
---|
| 814 | char* e = p + RSTRING_LEN(self);
|
---|
| 815 | int len = utf8len(p, e);
|
---|
| 816 | if (RSTRING_LEN(self) < last_end_pos + len) break;
|
---|
| 817 | mrb_str_cat(mrb, result, p, len);
|
---|
| 818 | last_end_pos += len;
|
---|
| 819 | }
|
---|
[279] | 820 | }
|
---|
| 821 |
|
---|
[439] | 822 | if (RSTRING_LEN(self) < last_end_pos) {
|
---|
| 823 | mrb_raise(mrb, E_ARGUMENT_ERROR, "invalid byte sequence in UTF-8");
|
---|
| 824 | }
|
---|
[279] | 825 | mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, RSTRING_LEN(self) - last_end_pos);
|
---|
| 826 | return result;
|
---|
| 827 | }
|
---|
| 828 |
|
---|
| 829 | // ISO 15.2.10.5.32
|
---|
| 830 | static mrb_value
|
---|
| 831 | string_scan(mrb_state* mrb, mrb_value self) {
|
---|
| 832 | mrb_value blk, match_expr;
|
---|
| 833 | mrb_get_args(mrb, "&o", &blk, &match_expr);
|
---|
| 834 |
|
---|
[439] | 835 | if(!ONIG_REGEXP_P(match_expr)) {
|
---|
[279] | 836 | return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_scan"),
|
---|
| 837 | 1, &match_expr, blk);
|
---|
| 838 | }
|
---|
| 839 |
|
---|
| 840 | OnigRegex reg;
|
---|
| 841 | Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg);
|
---|
| 842 | mrb_value const result = mrb_nil_p(blk)? mrb_ary_new(mrb) : self;
|
---|
| 843 | mrb_value m_value = create_onig_region(mrb, self, match_expr);
|
---|
| 844 | OnigRegion* const m = (OnigRegion*)DATA_PTR(m_value);
|
---|
| 845 | int last_end_pos = 0;
|
---|
| 846 | int i;
|
---|
| 847 |
|
---|
| 848 | while (1) {
|
---|
| 849 | if(onig_match_common(mrb, reg, m_value, self, last_end_pos) == ONIG_MISMATCH) { break; }
|
---|
| 850 |
|
---|
| 851 | if(mrb_nil_p(blk)) {
|
---|
| 852 | mrb_assert(mrb_array_p(result));
|
---|
| 853 | if(m->num_regs == 1) {
|
---|
[439] | 854 | mrb_ary_push(mrb, result, str_substr(mrb, self, m->beg[0], m->end[0] - m->beg[0]));
|
---|
[279] | 855 | } else {
|
---|
| 856 | mrb_value const elem = mrb_ary_new_capa(mrb, m->num_regs - 1);
|
---|
| 857 | for(i = 1; i < m->num_regs; ++i) {
|
---|
[439] | 858 | mrb_ary_push(mrb, elem, str_substr(mrb, self, m->beg[i], m->end[i] - m->beg[i]));
|
---|
[279] | 859 | }
|
---|
| 860 | mrb_ary_push(mrb, result, elem);
|
---|
| 861 | }
|
---|
| 862 | } else { // call block
|
---|
| 863 | mrb_assert(mrb_string_p(result));
|
---|
| 864 | if(m->num_regs == 1) {
|
---|
[439] | 865 | mrb_yield(mrb, blk, str_substr(mrb, self, m->beg[0], m->end[0] - m->beg[0]));
|
---|
[279] | 866 | } else {
|
---|
| 867 | mrb_value argv = mrb_ary_new_capa(mrb, m->num_regs - 1);
|
---|
| 868 | for(i = 1; i < m->num_regs; ++i) {
|
---|
[439] | 869 | mrb_ary_push(mrb, argv, str_substr(mrb, self, m->beg[i], m->end[i] - m->beg[i]));
|
---|
[279] | 870 | }
|
---|
| 871 | mrb_yield(mrb, blk, argv);
|
---|
| 872 | }
|
---|
| 873 | }
|
---|
| 874 |
|
---|
[439] | 875 | if (m->beg[0] == m->end[0]) {
|
---|
| 876 | /*
|
---|
| 877 | * Always consume at least one character of the input string
|
---|
| 878 | */
|
---|
| 879 | if (RSTRING_LEN(self) > m->end[0]) {
|
---|
| 880 | char* p = RSTRING_PTR(self) + last_end_pos;
|
---|
| 881 | char* e = p + RSTRING_LEN(self);
|
---|
| 882 | int len = utf8len(p, e);
|
---|
| 883 | last_end_pos = m->end[0] + len;
|
---|
| 884 | } else {
|
---|
| 885 | last_end_pos = m->end[0] + 1;
|
---|
| 886 | }
|
---|
| 887 | } else {
|
---|
| 888 | last_end_pos = m->end[0];
|
---|
| 889 | }
|
---|
[279] | 890 | }
|
---|
| 891 |
|
---|
| 892 | return result;
|
---|
| 893 | }
|
---|
| 894 |
|
---|
| 895 | // ISO 15.2.10.5.35
|
---|
| 896 | static mrb_value
|
---|
| 897 | string_split(mrb_state* mrb, mrb_value self) {
|
---|
| 898 | mrb_value pattern = mrb_nil_value(); mrb_int limit = 0;
|
---|
| 899 | int argc = mrb_get_args(mrb, "|oi", &pattern, &limit);
|
---|
[439] | 900 | mrb_value result, tmp;
|
---|
| 901 | mrb_bool lim_p = !(argc == 2 && 0 < limit);
|
---|
[279] | 902 |
|
---|
[439] | 903 | if(mrb_nil_p(pattern)) { // check $; global variable
|
---|
[279] | 904 | pattern = mrb_gv_get(mrb, mrb_intern_lit(mrb, "$;"));
|
---|
[439] | 905 | if (mrb_nil_p(pattern)) {
|
---|
| 906 | pattern = mrb_str_new_lit(mrb, " ");
|
---|
| 907 | } else if (!mrb_string_p(pattern) && !ONIG_REGEXP_P(pattern)) {
|
---|
| 908 | mrb_raise(mrb, E_TYPE_ERROR, "value of $; must be String or Regexp");
|
---|
| 909 | }
|
---|
| 910 | if (argc == 0) { argc = 1; }
|
---|
[279] | 911 | }
|
---|
| 912 |
|
---|
[439] | 913 | if (!ONIG_REGEXP_P(pattern)) {
|
---|
| 914 | if(!mrb_nil_p(pattern)) { pattern = mrb_string_type(mrb, pattern); }
|
---|
| 915 | if(mrb_string_p(pattern) && RSTRING_LEN(pattern) == 0) {
|
---|
| 916 | /* Special case - split into chars */
|
---|
| 917 | pattern = mrb_funcall(mrb, mrb_obj_value(mrb_class_get(mrb, "OnigRegexp")), "new", 1, pattern);
|
---|
| 918 | } else {
|
---|
| 919 | return mrb_funcall(mrb, self, "string_split", argc, pattern, mrb_fixnum_value(limit));
|
---|
| 920 | }
|
---|
[279] | 921 | }
|
---|
| 922 |
|
---|
[439] | 923 | if(RSTRING_LEN(self) == 0) { return mrb_ary_new(mrb); }
|
---|
| 924 | if(limit == 1) { return mrb_ary_new_from_values(mrb, 1, &self); }
|
---|
[279] | 925 |
|
---|
[439] | 926 | result = mrb_ary_new(mrb);
|
---|
| 927 |
|
---|
[279] | 928 | OnigRegex reg;
|
---|
| 929 | Data_Get_Struct(mrb, pattern, &mrb_onig_regexp_type, reg);
|
---|
| 930 | mrb_value const match_value = create_onig_region(mrb, self, pattern);
|
---|
| 931 | OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
|
---|
[439] | 932 | char *ptr = mrb_str_to_cstr(mrb, self);
|
---|
| 933 | mrb_int len = RSTRING_LEN(self);
|
---|
| 934 | mrb_int start = 0, beg = 0, end = 0;
|
---|
| 935 | mrb_int idx = 0, i = 0;
|
---|
| 936 | mrb_int last_null = 0;
|
---|
[279] | 937 |
|
---|
[439] | 938 | if (argc == 2) { i = 1; }
|
---|
| 939 | while ((end = onig_match_common(mrb, reg, match_value, self, start)) >= 0) {
|
---|
| 940 | if (start == end && match->beg[0] == match->end[0]) {
|
---|
| 941 | if (!ptr) {
|
---|
| 942 | mrb_ary_push(mrb, result, mrb_str_new_lit(mrb, ""));
|
---|
| 943 | break;
|
---|
[279] | 944 | }
|
---|
[439] | 945 | else if (last_null == 1) {
|
---|
| 946 | mrb_ary_push(mrb, result, str_substr(mrb, self, beg, utf8len(ptr+beg, ptr+len)));
|
---|
| 947 | beg = start;
|
---|
| 948 | }
|
---|
| 949 | else {
|
---|
| 950 | if (start == len)
|
---|
| 951 | start++;
|
---|
| 952 | else
|
---|
| 953 | start += utf8len(ptr+start, ptr+len);
|
---|
| 954 | last_null = 1;
|
---|
| 955 | continue;
|
---|
| 956 | }
|
---|
[279] | 957 | }
|
---|
[439] | 958 | else {
|
---|
| 959 | mrb_ary_push(mrb, result, str_substr(mrb, self, beg, end-beg));
|
---|
| 960 | beg = start = match->end[0];
|
---|
| 961 | }
|
---|
| 962 | last_null = 0;
|
---|
[279] | 963 |
|
---|
[439] | 964 | for (idx=1; idx < match->num_regs; idx++) {
|
---|
| 965 | if (match->beg[idx] == -1) continue;
|
---|
| 966 | if (match->beg[idx] == match->end[idx])
|
---|
| 967 | tmp = mrb_str_new_lit(mrb, "");
|
---|
| 968 | else
|
---|
| 969 | tmp = str_substr(mrb, self, match->beg[idx], match->end[idx]-match->beg[idx]);
|
---|
| 970 | mrb_ary_push(mrb, result, tmp);
|
---|
[279] | 971 | }
|
---|
[439] | 972 | if (!lim_p && limit <= ++i) break;
|
---|
[279] | 973 | }
|
---|
| 974 |
|
---|
[439] | 975 | if (RSTRING_LEN(self) > 0 && (!lim_p || RSTRING_LEN(self) > beg || limit < 0)) {
|
---|
| 976 | if (RSTRING_LEN(self) == beg)
|
---|
| 977 | tmp = mrb_str_new_lit(mrb, "");
|
---|
| 978 | else
|
---|
| 979 | tmp = str_substr(mrb, self, beg, RSTRING_LEN(self)-beg);
|
---|
| 980 | mrb_ary_push(mrb, result, tmp);
|
---|
| 981 | }
|
---|
| 982 | if (lim_p && limit == 0) {
|
---|
| 983 | while ((len = RARRAY_LEN(result)) > 0 &&
|
---|
| 984 | (tmp = mrb_ary_ref(mrb, result, len-1), RSTRING_LEN(tmp) == 0))
|
---|
| 985 | mrb_ary_pop(mrb, result);
|
---|
| 986 | }
|
---|
| 987 |
|
---|
[279] | 988 | return result;
|
---|
| 989 | }
|
---|
| 990 |
|
---|
| 991 | // ISO 15.2.10.5.36
|
---|
| 992 | static mrb_value
|
---|
| 993 | string_sub(mrb_state* mrb, mrb_value self) {
|
---|
| 994 | mrb_value blk, match_expr, replace_expr = mrb_nil_value();
|
---|
[439] | 995 | int const argc = mrb_get_args(mrb, "&o|o", &blk, &match_expr, &replace_expr);
|
---|
[279] | 996 |
|
---|
[439] | 997 | if(!ONIG_REGEXP_P(match_expr)) {
|
---|
[279] | 998 | mrb_value argv[] = { match_expr, replace_expr };
|
---|
| 999 | return mrb_funcall_with_block(mrb, self, mrb_intern_lit(mrb, "string_sub"), argc, argv, blk);
|
---|
| 1000 | }
|
---|
| 1001 |
|
---|
[439] | 1002 | if(argc == 1 && mrb_nil_p(blk)) {
|
---|
| 1003 | mrb_raise(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (given 1, expected 2)");
|
---|
| 1004 | }
|
---|
| 1005 |
|
---|
[279] | 1006 | if(!mrb_nil_p(blk) && !mrb_nil_p(replace_expr)) {
|
---|
[439] | 1007 | blk = mrb_nil_value();
|
---|
[279] | 1008 | }
|
---|
| 1009 |
|
---|
[439] | 1010 | if (mrb_nil_p(blk) && !mrb_hash_p(replace_expr)) {
|
---|
| 1011 | replace_expr = mrb_string_type(mrb, replace_expr);
|
---|
| 1012 | }
|
---|
| 1013 |
|
---|
[279] | 1014 | OnigRegex reg;
|
---|
| 1015 | Data_Get_Struct(mrb, match_expr, &mrb_onig_regexp_type, reg);
|
---|
| 1016 | mrb_value const result = mrb_str_new(mrb, NULL, 0);
|
---|
| 1017 | mrb_value const match_value = create_onig_region(mrb, self, match_expr);
|
---|
| 1018 | OnigRegion* const match = (OnigRegion*)DATA_PTR(match_value);
|
---|
| 1019 |
|
---|
| 1020 | int const onig_result = onig_match_common(mrb, reg, match_value, self, 0);
|
---|
| 1021 | if(onig_result == ONIG_MISMATCH) { return self; }
|
---|
| 1022 |
|
---|
| 1023 | mrb_str_cat(mrb, result, RSTRING_PTR(self), match->beg[0]);
|
---|
| 1024 |
|
---|
| 1025 | if(mrb_nil_p(blk)) {
|
---|
| 1026 | append_replace_str(mrb, result, replace_expr, self, reg, match);
|
---|
| 1027 | } else {
|
---|
[439] | 1028 | mrb_value const tmp_str = mrb_str_to_str(mrb, mrb_yield(mrb, blk, str_substr(
|
---|
[279] | 1029 | mrb, self, match->beg[0], match->end[0] - match->beg[0])));
|
---|
| 1030 | mrb_assert(mrb_string_p(tmp_str));
|
---|
| 1031 | mrb_str_concat(mrb, result, tmp_str);
|
---|
| 1032 | }
|
---|
| 1033 |
|
---|
| 1034 | int const last_end_pos = match->end[0];
|
---|
| 1035 | mrb_str_cat(mrb, result, RSTRING_PTR(self) + last_end_pos, RSTRING_LEN(self) - last_end_pos);
|
---|
| 1036 |
|
---|
| 1037 | return result;
|
---|
| 1038 | }
|
---|
| 1039 |
|
---|
| 1040 | static mrb_value
|
---|
| 1041 | onig_regexp_clear_global_variables(mrb_state* mrb, mrb_value self) {
|
---|
| 1042 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$~"));
|
---|
| 1043 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$&"));
|
---|
| 1044 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$`"));
|
---|
| 1045 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$'"));
|
---|
| 1046 | mrb_gv_remove(mrb, mrb_intern_lit(mrb, "$+"));
|
---|
| 1047 |
|
---|
| 1048 | int idx;
|
---|
| 1049 | for(idx = 1; idx < 10; ++idx) {
|
---|
| 1050 | char const n[] = { '$', '0' + idx };
|
---|
| 1051 | mrb_gv_remove(mrb, mrb_intern(mrb, n, 2));
|
---|
| 1052 | }
|
---|
| 1053 |
|
---|
| 1054 | return self;
|
---|
| 1055 | }
|
---|
| 1056 |
|
---|
| 1057 | static mrb_value
|
---|
| 1058 | onig_regexp_does_set_global_variables(mrb_state* mrb, mrb_value self) {
|
---|
| 1059 | (void)self;
|
---|
| 1060 | return mrb_obj_iv_get(mrb, (struct RObject*)mrb_class_get(mrb, "OnigRegexp"),
|
---|
| 1061 | mrb_intern_lit(mrb, "@set_global_variables"));
|
---|
| 1062 | }
|
---|
| 1063 | static mrb_value
|
---|
| 1064 | onig_regexp_set_set_global_variables(mrb_state* mrb, mrb_value self) {
|
---|
| 1065 | mrb_value arg;
|
---|
| 1066 | mrb_get_args(mrb, "o", &arg);
|
---|
| 1067 | mrb_value const ret = mrb_bool_value(mrb_bool(arg));
|
---|
| 1068 | mrb_obj_iv_set(mrb, (struct RObject*)mrb_class_get(mrb, "OnigRegexp"),
|
---|
| 1069 | mrb_intern_lit(mrb, "@set_global_variables"), ret);
|
---|
| 1070 | onig_regexp_clear_global_variables(mrb, self);
|
---|
| 1071 | return ret;
|
---|
| 1072 | }
|
---|
| 1073 |
|
---|
| 1074 | // ISO 15.2.15.6.2
|
---|
| 1075 | static mrb_value
|
---|
| 1076 | onig_regexp_escape(mrb_state* mrb, mrb_value self) {
|
---|
| 1077 | char* str_begin; mrb_args_int str_len;
|
---|
| 1078 | mrb_get_args(mrb, "s", &str_begin, &str_len);
|
---|
| 1079 |
|
---|
| 1080 | mrb_value const ret = mrb_str_new(mrb, NULL, 0);
|
---|
| 1081 | char escaped_char = 0;
|
---|
| 1082 | int substr_count = 0;
|
---|
| 1083 | char const* str = str_begin;
|
---|
| 1084 |
|
---|
| 1085 | for(; str < (str_begin + str_len); ++str) {
|
---|
| 1086 | switch(*str) {
|
---|
| 1087 | case '\n': escaped_char = 'n'; break;
|
---|
| 1088 | case '\t': escaped_char = 't'; break;
|
---|
| 1089 | case '\r': escaped_char = 'r'; break;
|
---|
| 1090 | case '\f': escaped_char = 'f'; break;
|
---|
| 1091 |
|
---|
| 1092 | case ' ':
|
---|
| 1093 | case '#':
|
---|
| 1094 | case '$':
|
---|
| 1095 | case '(':
|
---|
| 1096 | case ')':
|
---|
| 1097 | case '*':
|
---|
| 1098 | case '+':
|
---|
| 1099 | case '-':
|
---|
| 1100 | case '.':
|
---|
| 1101 | case '?':
|
---|
| 1102 | case '[':
|
---|
| 1103 | case '\\':
|
---|
| 1104 | case ']':
|
---|
| 1105 | case '^':
|
---|
| 1106 | case '{':
|
---|
| 1107 | case '|':
|
---|
| 1108 | case '}':
|
---|
| 1109 | escaped_char = *str; break;
|
---|
| 1110 |
|
---|
| 1111 | default: ++substr_count; continue;
|
---|
| 1112 | }
|
---|
| 1113 |
|
---|
| 1114 | mrb_str_cat(mrb, ret, str - substr_count, substr_count);
|
---|
| 1115 | substr_count = 0;
|
---|
| 1116 |
|
---|
| 1117 | char const c[] = { '\\', escaped_char };
|
---|
| 1118 | mrb_str_cat(mrb, ret, c, 2);
|
---|
| 1119 | }
|
---|
| 1120 | mrb_str_cat(mrb, ret, str - substr_count, substr_count);
|
---|
| 1121 | return ret;
|
---|
| 1122 | }
|
---|
| 1123 |
|
---|
| 1124 | void
|
---|
| 1125 | mrb_mruby_onig_regexp_gem_init(mrb_state* mrb) {
|
---|
| 1126 | struct RClass *clazz;
|
---|
| 1127 |
|
---|
| 1128 | clazz = mrb_define_class(mrb, "OnigRegexp", mrb->object_class);
|
---|
| 1129 | MRB_SET_INSTANCE_TT(clazz, MRB_TT_DATA);
|
---|
| 1130 |
|
---|
| 1131 | // enable global variables setting in onig_match_common by default
|
---|
| 1132 | mrb_obj_iv_set(mrb, (struct RObject*)clazz, mrb_intern_lit(mrb, "@set_global_variables"), mrb_true_value());
|
---|
| 1133 |
|
---|
| 1134 | mrb_define_const(mrb, clazz, "IGNORECASE", mrb_fixnum_value(ONIG_OPTION_IGNORECASE));
|
---|
| 1135 | mrb_define_const(mrb, clazz, "EXTENDED", mrb_fixnum_value(ONIG_OPTION_EXTEND));
|
---|
| 1136 | mrb_define_const(mrb, clazz, "MULTILINE", mrb_fixnum_value(ONIG_OPTION_MULTILINE));
|
---|
| 1137 | mrb_define_const(mrb, clazz, "SINGLELINE", mrb_fixnum_value(ONIG_OPTION_SINGLELINE));
|
---|
| 1138 | mrb_define_const(mrb, clazz, "FIND_LONGEST", mrb_fixnum_value(ONIG_OPTION_FIND_LONGEST));
|
---|
| 1139 | mrb_define_const(mrb, clazz, "FIND_NOT_EMPTY", mrb_fixnum_value(ONIG_OPTION_FIND_NOT_EMPTY));
|
---|
| 1140 | mrb_define_const(mrb, clazz, "NEGATE_SINGLELINE", mrb_fixnum_value(ONIG_OPTION_NEGATE_SINGLELINE));
|
---|
| 1141 | mrb_define_const(mrb, clazz, "DONT_CAPTURE_GROUP", mrb_fixnum_value(ONIG_OPTION_DONT_CAPTURE_GROUP));
|
---|
| 1142 | mrb_define_const(mrb, clazz, "CAPTURE_GROUP", mrb_fixnum_value(ONIG_OPTION_CAPTURE_GROUP));
|
---|
| 1143 | mrb_define_const(mrb, clazz, "NOTBOL", mrb_fixnum_value(ONIG_OPTION_NOTBOL));
|
---|
| 1144 | mrb_define_const(mrb, clazz, "NOTEOL", mrb_fixnum_value(ONIG_OPTION_NOTEOL));
|
---|
[439] | 1145 | #ifdef ONIG_OPTION_POSIX_REGION
|
---|
| 1146 | mrb_define_const(mrb, clazz, "POSIX_REGION", mrb_fixnum_value(ONIG_OPTION_POSIX_REGION));
|
---|
| 1147 | #endif
|
---|
[279] | 1148 | #ifdef ONIG_OPTION_ASCII_RANGE
|
---|
| 1149 | mrb_define_const(mrb, clazz, "ASCII_RANGE", mrb_fixnum_value(ONIG_OPTION_ASCII_RANGE));
|
---|
| 1150 | #endif
|
---|
| 1151 | #ifdef ONIG_OPTION_POSIX_BRACKET_ALL_RANGE
|
---|
| 1152 | mrb_define_const(mrb, clazz, "POSIX_BRACKET_ALL_RANGE", mrb_fixnum_value(ONIG_OPTION_POSIX_BRACKET_ALL_RANGE));
|
---|
| 1153 | #endif
|
---|
| 1154 | #ifdef ONIG_OPTION_WORD_BOUND_ALL_RANGE
|
---|
| 1155 | mrb_define_const(mrb, clazz, "WORD_BOUND_ALL_RANGE", mrb_fixnum_value(ONIG_OPTION_WORD_BOUND_ALL_RANGE));
|
---|
| 1156 | #endif
|
---|
| 1157 | #ifdef ONIG_OPTION_NEWLINE_CRLF
|
---|
| 1158 | mrb_define_const(mrb, clazz, "NEWLINE_CRLF", mrb_fixnum_value(ONIG_OPTION_NEWLINE_CRLF));
|
---|
| 1159 | #endif
|
---|
| 1160 | #ifdef ONIG_OPTION_NOTBOS
|
---|
| 1161 | mrb_define_const(mrb, clazz, "NOTBOS", mrb_fixnum_value(ONIG_OPTION_NOTBOS));
|
---|
| 1162 | #endif
|
---|
| 1163 | #ifdef ONIG_OPTION_NOTEOS
|
---|
| 1164 | mrb_define_const(mrb, clazz, "NOTEOS", mrb_fixnum_value(ONIG_OPTION_NOTEOS));
|
---|
| 1165 | #endif
|
---|
| 1166 |
|
---|
| 1167 | mrb_define_method(mrb, clazz, "initialize", onig_regexp_initialize, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(2));
|
---|
| 1168 | mrb_define_method(mrb, clazz, "==", onig_regexp_equal, MRB_ARGS_REQ(1));
|
---|
| 1169 | mrb_define_method(mrb, clazz, "match", onig_regexp_match, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1));
|
---|
[439] | 1170 | mrb_define_method(mrb, clazz, "match?", onig_regexp_match_p, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1));
|
---|
[279] | 1171 | mrb_define_method(mrb, clazz, "casefold?", onig_regexp_casefold_p, MRB_ARGS_NONE());
|
---|
| 1172 |
|
---|
| 1173 | mrb_define_method(mrb, clazz, "options", onig_regexp_options, MRB_ARGS_NONE());
|
---|
| 1174 | mrb_define_method(mrb, clazz, "inspect", onig_regexp_inspect, MRB_ARGS_NONE());
|
---|
| 1175 | mrb_define_method(mrb, clazz, "to_s", onig_regexp_to_s, MRB_ARGS_NONE());
|
---|
| 1176 |
|
---|
| 1177 | mrb_define_module_function(mrb, clazz, "escape", onig_regexp_escape, MRB_ARGS_REQ(1));
|
---|
| 1178 | mrb_define_module_function(mrb, clazz, "quote", onig_regexp_escape, MRB_ARGS_REQ(1));
|
---|
| 1179 | mrb_define_module_function(mrb, clazz, "version", onig_regexp_version, MRB_ARGS_NONE());
|
---|
| 1180 | mrb_define_module_function(mrb, clazz, "set_global_variables?", onig_regexp_does_set_global_variables, MRB_ARGS_NONE());
|
---|
| 1181 | mrb_define_module_function(mrb, clazz, "set_global_variables=", onig_regexp_set_set_global_variables, MRB_ARGS_REQ(1));
|
---|
| 1182 | mrb_define_module_function(mrb, clazz, "clear_global_variables", onig_regexp_clear_global_variables, MRB_ARGS_NONE());
|
---|
| 1183 |
|
---|
| 1184 | struct RClass* match_data = mrb_define_class(mrb, "OnigMatchData", mrb->object_class);
|
---|
| 1185 | MRB_SET_INSTANCE_TT(clazz, MRB_TT_DATA);
|
---|
| 1186 | mrb_undef_class_method(mrb, match_data, "new");
|
---|
| 1187 |
|
---|
| 1188 | // mrb_define_method(mrb, match_data, "==", &match_data_eq);
|
---|
| 1189 | mrb_define_method(mrb, match_data, "[]", &match_data_index, MRB_ARGS_REQ(1));
|
---|
| 1190 | mrb_define_method(mrb, match_data, "begin", &match_data_begin, MRB_ARGS_REQ(1));
|
---|
| 1191 | mrb_define_method(mrb, match_data, "captures", &match_data_captures, MRB_ARGS_NONE());
|
---|
| 1192 | mrb_define_method(mrb, match_data, "end", &match_data_end, MRB_ARGS_REQ(1));
|
---|
| 1193 | // mrb_define_method(mrb, match_data, "eql?", &match_data_eq);
|
---|
| 1194 | // mrb_define_method(mrb, match_data, "hash", &match_data_hash);
|
---|
| 1195 | mrb_define_method(mrb, match_data, "initialize_copy", &match_data_copy, MRB_ARGS_REQ(1));
|
---|
| 1196 | // mrb_define_method(mrb, match_data, "inspect", &match_data_inspect);
|
---|
| 1197 | mrb_define_method(mrb, match_data, "length", &match_data_length, MRB_ARGS_NONE());
|
---|
| 1198 | // mrb_define_method(mrb, match_data, "names", &match_data_names);
|
---|
| 1199 | mrb_define_method(mrb, match_data, "offset", &match_data_offset, MRB_ARGS_REQ(1));
|
---|
| 1200 | mrb_define_method(mrb, match_data, "post_match", &match_data_post_match, MRB_ARGS_NONE());
|
---|
| 1201 | mrb_define_method(mrb, match_data, "pre_match", &match_data_pre_match, MRB_ARGS_NONE());
|
---|
| 1202 | mrb_define_method(mrb, match_data, "regexp", &match_data_regexp, MRB_ARGS_NONE());
|
---|
| 1203 | mrb_define_method(mrb, match_data, "size", &match_data_length, MRB_ARGS_NONE());
|
---|
| 1204 | mrb_define_method(mrb, match_data, "string", &match_data_string, MRB_ARGS_NONE());
|
---|
| 1205 | mrb_define_method(mrb, match_data, "to_a", &match_data_to_a, MRB_ARGS_NONE());
|
---|
| 1206 | mrb_define_method(mrb, match_data, "to_s", &match_data_to_s, MRB_ARGS_NONE());
|
---|
| 1207 | // mrb_define_method(mrb, match_data, "values_at", &match_data_values_at);
|
---|
| 1208 |
|
---|
| 1209 | mrb_define_method(mrb, mrb->string_class, "onig_regexp_gsub", &string_gsub, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1) | MRB_ARGS_BLOCK());
|
---|
| 1210 | mrb_define_method(mrb, mrb->string_class, "onig_regexp_sub", &string_sub, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1) | MRB_ARGS_BLOCK());
|
---|
| 1211 | mrb_define_method(mrb, mrb->string_class, "onig_regexp_split", &string_split, MRB_ARGS_REQ(1));
|
---|
| 1212 | mrb_define_method(mrb, mrb->string_class, "onig_regexp_scan", &string_scan, MRB_ARGS_REQ(1) | MRB_ARGS_BLOCK());
|
---|
[439] | 1213 | mrb_define_method(mrb, mrb->string_class, "onig_regexp_match?", &string_match_p, MRB_ARGS_REQ(1) | MRB_ARGS_OPT(1));
|
---|
[279] | 1214 | }
|
---|
| 1215 |
|
---|
| 1216 | void
|
---|
| 1217 | mrb_mruby_onig_regexp_gem_final(mrb_state* mrb) {
|
---|
| 1218 | (void)mrb;
|
---|
| 1219 | }
|
---|
| 1220 |
|
---|
| 1221 | // vim:set et:
|
---|