[270] | 1 | #include <string.h>
|
---|
[331] | 2 | #include <mruby.h>
|
---|
| 3 | #include <mruby/array.h>
|
---|
| 4 | #include <mruby/class.h>
|
---|
| 5 | #include <mruby/string.h>
|
---|
| 6 | #include <mruby/range.h>
|
---|
[270] | 7 |
|
---|
[439] | 8 | #define ENC_ASCII_8BIT "ASCII-8BIT"
|
---|
| 9 | #define ENC_BINARY "BINARY"
|
---|
| 10 | #define ENC_UTF8 "UTF-8"
|
---|
| 11 |
|
---|
| 12 | #define ENC_COMP_P(enc, enc_lit) \
|
---|
| 13 | str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1)
|
---|
| 14 |
|
---|
| 15 | #ifdef MRB_WITHOUT_FLOAT
|
---|
| 16 | # define mrb_float_p(o) FALSE
|
---|
| 17 | #endif
|
---|
| 18 |
|
---|
| 19 | static mrb_bool
|
---|
| 20 | str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2)
|
---|
[270] | 21 | {
|
---|
[439] | 22 | const char *e1, *e2;
|
---|
[270] | 23 |
|
---|
[439] | 24 | if (len1 != len2) return FALSE;
|
---|
| 25 | e1 = s1 + len1;
|
---|
| 26 | e2 = s2 + len2;
|
---|
| 27 | while (s1 < e1 && s2 < e2) {
|
---|
| 28 | if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE;
|
---|
| 29 | ++s1;
|
---|
| 30 | ++s2;
|
---|
| 31 | }
|
---|
| 32 | return TRUE;
|
---|
[270] | 33 | }
|
---|
| 34 |
|
---|
| 35 | static mrb_value
|
---|
[439] | 36 | int_chr_binary(mrb_state *mrb, mrb_value num)
|
---|
[270] | 37 | {
|
---|
[439] | 38 | mrb_int cp = mrb_int(mrb, num);
|
---|
| 39 | char c;
|
---|
| 40 | mrb_value str;
|
---|
[270] | 41 |
|
---|
[439] | 42 | if (cp < 0 || 0xff < cp) {
|
---|
| 43 | mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num);
|
---|
| 44 | }
|
---|
| 45 | c = (char)cp;
|
---|
| 46 | str = mrb_str_new(mrb, &c, 1);
|
---|
| 47 | RSTR_SET_ASCII_FLAG(mrb_str_ptr(str));
|
---|
| 48 | return str;
|
---|
[270] | 49 | }
|
---|
| 50 |
|
---|
[439] | 51 | #ifdef MRB_UTF8_STRING
|
---|
[270] | 52 | static mrb_value
|
---|
[439] | 53 | int_chr_utf8(mrb_state *mrb, mrb_value num)
|
---|
[270] | 54 | {
|
---|
[439] | 55 | mrb_int cp = mrb_int(mrb, num);
|
---|
| 56 | char utf8[4];
|
---|
[270] | 57 | mrb_int len;
|
---|
[439] | 58 | mrb_value str;
|
---|
| 59 | uint32_t ascii_flag = 0;
|
---|
[270] | 60 |
|
---|
[439] | 61 | if (cp < 0 || 0x10FFFF < cp) {
|
---|
| 62 | mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num);
|
---|
[270] | 63 | }
|
---|
[439] | 64 | if (cp < 0x80) {
|
---|
| 65 | utf8[0] = (char)cp;
|
---|
| 66 | len = 1;
|
---|
| 67 | ascii_flag = MRB_STR_ASCII;
|
---|
[270] | 68 | }
|
---|
[439] | 69 | else if (cp < 0x800) {
|
---|
| 70 | utf8[0] = (char)(0xC0 | (cp >> 6));
|
---|
| 71 | utf8[1] = (char)(0x80 | (cp & 0x3F));
|
---|
| 72 | len = 2;
|
---|
| 73 | }
|
---|
| 74 | else if (cp < 0x10000) {
|
---|
| 75 | utf8[0] = (char)(0xE0 | (cp >> 12));
|
---|
| 76 | utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
|
---|
| 77 | utf8[2] = (char)(0x80 | ( cp & 0x3F));
|
---|
| 78 | len = 3;
|
---|
| 79 | }
|
---|
| 80 | else {
|
---|
| 81 | utf8[0] = (char)(0xF0 | (cp >> 18));
|
---|
| 82 | utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
|
---|
| 83 | utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
|
---|
| 84 | utf8[3] = (char)(0x80 | ( cp & 0x3F));
|
---|
| 85 | len = 4;
|
---|
| 86 | }
|
---|
| 87 | str = mrb_str_new(mrb, utf8, len);
|
---|
| 88 | mrb_str_ptr(str)->flags |= ascii_flag;
|
---|
| 89 | return str;
|
---|
[270] | 90 | }
|
---|
[439] | 91 | #endif
|
---|
[270] | 92 |
|
---|
| 93 | /*
|
---|
| 94 | * call-seq:
|
---|
| 95 | * str.swapcase! -> str or nil
|
---|
| 96 | *
|
---|
| 97 | * Equivalent to <code>String#swapcase</code>, but modifies the receiver in
|
---|
| 98 | * place, returning <i>str</i>, or <code>nil</code> if no changes were made.
|
---|
| 99 | * Note: case conversion is effective only in ASCII region.
|
---|
| 100 | */
|
---|
| 101 | static mrb_value
|
---|
| 102 | mrb_str_swapcase_bang(mrb_state *mrb, mrb_value str)
|
---|
| 103 | {
|
---|
| 104 | char *p, *pend;
|
---|
| 105 | int modify = 0;
|
---|
| 106 | struct RString *s = mrb_str_ptr(str);
|
---|
| 107 |
|
---|
| 108 | mrb_str_modify(mrb, s);
|
---|
| 109 | p = RSTRING_PTR(str);
|
---|
| 110 | pend = p + RSTRING_LEN(str);
|
---|
| 111 | while (p < pend) {
|
---|
| 112 | if (ISUPPER(*p)) {
|
---|
| 113 | *p = TOLOWER(*p);
|
---|
| 114 | modify = 1;
|
---|
| 115 | }
|
---|
| 116 | else if (ISLOWER(*p)) {
|
---|
| 117 | *p = TOUPPER(*p);
|
---|
| 118 | modify = 1;
|
---|
| 119 | }
|
---|
| 120 | p++;
|
---|
| 121 | }
|
---|
| 122 |
|
---|
| 123 | if (modify) return str;
|
---|
| 124 | return mrb_nil_value();
|
---|
| 125 | }
|
---|
| 126 |
|
---|
| 127 | /*
|
---|
| 128 | * call-seq:
|
---|
| 129 | * str.swapcase -> new_str
|
---|
| 130 | *
|
---|
| 131 | * Returns a copy of <i>str</i> with uppercase alphabetic characters converted
|
---|
| 132 | * to lowercase and lowercase characters converted to uppercase.
|
---|
| 133 | * Note: case conversion is effective only in ASCII region.
|
---|
| 134 | *
|
---|
| 135 | * "Hello".swapcase #=> "hELLO"
|
---|
| 136 | * "cYbEr_PuNk11".swapcase #=> "CyBeR_pUnK11"
|
---|
| 137 | */
|
---|
| 138 | static mrb_value
|
---|
| 139 | mrb_str_swapcase(mrb_state *mrb, mrb_value self)
|
---|
| 140 | {
|
---|
| 141 | mrb_value str;
|
---|
| 142 |
|
---|
| 143 | str = mrb_str_dup(mrb, self);
|
---|
| 144 | mrb_str_swapcase_bang(mrb, str);
|
---|
| 145 | return str;
|
---|
| 146 | }
|
---|
| 147 |
|
---|
| 148 | /*
|
---|
| 149 | * call-seq:
|
---|
| 150 | * str << integer -> str
|
---|
| 151 | * str.concat(integer) -> str
|
---|
| 152 | * str << obj -> str
|
---|
| 153 | * str.concat(obj) -> str
|
---|
| 154 | *
|
---|
| 155 | * Append---Concatenates the given object to <i>str</i>. If the object is a
|
---|
| 156 | * <code>Integer</code>, it is considered as a codepoint, and is converted
|
---|
[439] | 157 | * to a character before concatenation
|
---|
| 158 | * (equivalent to <code>str.concat(integer.chr(__ENCODING__))</code>).
|
---|
[270] | 159 | *
|
---|
| 160 | * a = "hello "
|
---|
| 161 | * a << "world" #=> "hello world"
|
---|
| 162 | * a.concat(33) #=> "hello world!"
|
---|
| 163 | */
|
---|
| 164 | static mrb_value
|
---|
[439] | 165 | mrb_str_concat_m(mrb_state *mrb, mrb_value self)
|
---|
[270] | 166 | {
|
---|
| 167 | mrb_value str;
|
---|
[331] | 168 |
|
---|
| 169 | mrb_get_args(mrb, "o", &str);
|
---|
[439] | 170 | if (mrb_fixnum_p(str) || mrb_float_p(str))
|
---|
| 171 | #ifdef MRB_UTF8_STRING
|
---|
| 172 | str = int_chr_utf8(mrb, str);
|
---|
| 173 | #else
|
---|
| 174 | str = int_chr_binary(mrb, str);
|
---|
| 175 | #endif
|
---|
[331] | 176 | else
|
---|
[439] | 177 | mrb_ensure_string_type(mrb, str);
|
---|
| 178 | mrb_str_cat_str(mrb, self, str);
|
---|
[270] | 179 | return self;
|
---|
| 180 | }
|
---|
| 181 |
|
---|
| 182 | /*
|
---|
| 183 | * call-seq:
|
---|
| 184 | * str.start_with?([prefixes]+) -> true or false
|
---|
| 185 | *
|
---|
| 186 | * Returns true if +str+ starts with one of the +prefixes+ given.
|
---|
| 187 | *
|
---|
| 188 | * "hello".start_with?("hell") #=> true
|
---|
| 189 | *
|
---|
| 190 | * # returns true if one of the prefixes matches.
|
---|
| 191 | * "hello".start_with?("heaven", "hell") #=> true
|
---|
| 192 | * "hello".start_with?("heaven", "paradise") #=> false
|
---|
| 193 | * "h".start_with?("heaven", "hell") #=> false
|
---|
| 194 | */
|
---|
| 195 | static mrb_value
|
---|
| 196 | mrb_str_start_with(mrb_state *mrb, mrb_value self)
|
---|
| 197 | {
|
---|
| 198 | mrb_value *argv, sub;
|
---|
| 199 | mrb_int argc, i;
|
---|
| 200 | mrb_get_args(mrb, "*", &argv, &argc);
|
---|
| 201 |
|
---|
| 202 | for (i = 0; i < argc; i++) {
|
---|
| 203 | size_t len_l, len_r;
|
---|
| 204 | int ai = mrb_gc_arena_save(mrb);
|
---|
[439] | 205 | sub = mrb_ensure_string_type(mrb, argv[i]);
|
---|
[270] | 206 | mrb_gc_arena_restore(mrb, ai);
|
---|
| 207 | len_l = RSTRING_LEN(self);
|
---|
| 208 | len_r = RSTRING_LEN(sub);
|
---|
| 209 | if (len_l >= len_r) {
|
---|
| 210 | if (memcmp(RSTRING_PTR(self), RSTRING_PTR(sub), len_r) == 0) {
|
---|
| 211 | return mrb_true_value();
|
---|
| 212 | }
|
---|
| 213 | }
|
---|
| 214 | }
|
---|
| 215 | return mrb_false_value();
|
---|
| 216 | }
|
---|
| 217 |
|
---|
| 218 | /*
|
---|
| 219 | * call-seq:
|
---|
| 220 | * str.end_with?([suffixes]+) -> true or false
|
---|
| 221 | *
|
---|
| 222 | * Returns true if +str+ ends with one of the +suffixes+ given.
|
---|
| 223 | */
|
---|
| 224 | static mrb_value
|
---|
| 225 | mrb_str_end_with(mrb_state *mrb, mrb_value self)
|
---|
| 226 | {
|
---|
| 227 | mrb_value *argv, sub;
|
---|
| 228 | mrb_int argc, i;
|
---|
| 229 | mrb_get_args(mrb, "*", &argv, &argc);
|
---|
| 230 |
|
---|
| 231 | for (i = 0; i < argc; i++) {
|
---|
| 232 | size_t len_l, len_r;
|
---|
| 233 | int ai = mrb_gc_arena_save(mrb);
|
---|
[439] | 234 | sub = mrb_ensure_string_type(mrb, argv[i]);
|
---|
[270] | 235 | mrb_gc_arena_restore(mrb, ai);
|
---|
| 236 | len_l = RSTRING_LEN(self);
|
---|
| 237 | len_r = RSTRING_LEN(sub);
|
---|
| 238 | if (len_l >= len_r) {
|
---|
| 239 | if (memcmp(RSTRING_PTR(self) + (len_l - len_r),
|
---|
| 240 | RSTRING_PTR(sub),
|
---|
| 241 | len_r) == 0) {
|
---|
| 242 | return mrb_true_value();
|
---|
| 243 | }
|
---|
| 244 | }
|
---|
| 245 | }
|
---|
| 246 | return mrb_false_value();
|
---|
| 247 | }
|
---|
| 248 |
|
---|
[439] | 249 | enum tr_pattern_type {
|
---|
| 250 | TR_UNINITIALIZED = 0,
|
---|
| 251 | TR_IN_ORDER = 1,
|
---|
| 252 | TR_RANGE = 2,
|
---|
| 253 | };
|
---|
| 254 |
|
---|
| 255 | /*
|
---|
| 256 | #tr Pattern syntax
|
---|
| 257 |
|
---|
| 258 | <syntax> ::= (<pattern>)* | '^' (<pattern>)*
|
---|
| 259 | <pattern> ::= <in order> | <range>
|
---|
| 260 | <in order> ::= (<ch>)+
|
---|
| 261 | <range> ::= <ch> '-' <ch>
|
---|
| 262 | */
|
---|
| 263 | struct tr_pattern {
|
---|
| 264 | uint8_t type; // 1:in-order, 2:range
|
---|
| 265 | mrb_bool flag_reverse : 1;
|
---|
| 266 | mrb_bool flag_on_heap : 1;
|
---|
| 267 | uint16_t n;
|
---|
| 268 | union {
|
---|
| 269 | uint16_t start_pos;
|
---|
| 270 | char ch[2];
|
---|
| 271 | } val;
|
---|
| 272 | struct tr_pattern *next;
|
---|
| 273 | };
|
---|
| 274 |
|
---|
| 275 | #define STATIC_TR_PATTERN { 0 }
|
---|
| 276 |
|
---|
| 277 | static inline void
|
---|
| 278 | tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat)
|
---|
| 279 | {
|
---|
| 280 | while (pat) {
|
---|
| 281 | struct tr_pattern *p = pat->next;
|
---|
| 282 | if (pat->flag_on_heap) {
|
---|
| 283 | mrb_free(mrb, pat);
|
---|
| 284 | }
|
---|
| 285 | pat = p;
|
---|
| 286 | }
|
---|
| 287 | }
|
---|
| 288 |
|
---|
| 289 | static struct tr_pattern*
|
---|
| 290 | tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable)
|
---|
| 291 | {
|
---|
| 292 | const char *pattern = RSTRING_PTR(v_pattern);
|
---|
| 293 | mrb_int pattern_length = RSTRING_LEN(v_pattern);
|
---|
| 294 | mrb_bool flag_reverse = FALSE;
|
---|
| 295 | struct tr_pattern *pat1;
|
---|
| 296 | mrb_int i = 0;
|
---|
| 297 |
|
---|
| 298 | if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') {
|
---|
| 299 | flag_reverse = TRUE;
|
---|
| 300 | i++;
|
---|
| 301 | }
|
---|
| 302 |
|
---|
| 303 | while (i < pattern_length) {
|
---|
| 304 | /* is range pattern ? */
|
---|
| 305 | mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED);
|
---|
| 306 | pat1 = ret_uninit
|
---|
| 307 | ? ret
|
---|
| 308 | : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern));
|
---|
| 309 | if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') {
|
---|
| 310 | if (pat1 == NULL && ret) {
|
---|
| 311 | nomem:
|
---|
| 312 | tr_free_pattern(mrb, ret);
|
---|
| 313 | mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err));
|
---|
| 314 | return NULL; /* not reached */
|
---|
| 315 | }
|
---|
| 316 | pat1->type = TR_RANGE;
|
---|
| 317 | pat1->flag_reverse = flag_reverse;
|
---|
| 318 | pat1->flag_on_heap = !ret_uninit;
|
---|
| 319 | pat1->n = pattern[i+2] - pattern[i] + 1;
|
---|
| 320 | pat1->next = NULL;
|
---|
| 321 | pat1->val.ch[0] = pattern[i];
|
---|
| 322 | pat1->val.ch[1] = pattern[i+2];
|
---|
| 323 | i += 3;
|
---|
| 324 | }
|
---|
| 325 | else {
|
---|
| 326 | /* in order pattern. */
|
---|
| 327 | mrb_int start_pos = i++;
|
---|
| 328 | mrb_int len;
|
---|
| 329 |
|
---|
| 330 | while (i < pattern_length) {
|
---|
| 331 | if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-')
|
---|
| 332 | break;
|
---|
| 333 | i++;
|
---|
| 334 | }
|
---|
| 335 |
|
---|
| 336 | len = i - start_pos;
|
---|
| 337 | if (len > UINT16_MAX) {
|
---|
| 338 | mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65536)");
|
---|
| 339 | }
|
---|
| 340 | if (pat1 == NULL && ret) {
|
---|
| 341 | goto nomem;
|
---|
| 342 | }
|
---|
| 343 | pat1->type = TR_IN_ORDER;
|
---|
| 344 | pat1->flag_reverse = flag_reverse;
|
---|
| 345 | pat1->flag_on_heap = !ret_uninit;
|
---|
| 346 | pat1->n = len;
|
---|
| 347 | pat1->next = NULL;
|
---|
| 348 | pat1->val.start_pos = start_pos;
|
---|
| 349 | }
|
---|
| 350 |
|
---|
| 351 | if (ret == NULL || ret_uninit) {
|
---|
| 352 | ret = pat1;
|
---|
| 353 | }
|
---|
| 354 | else {
|
---|
| 355 | struct tr_pattern *p = ret;
|
---|
| 356 | while (p->next != NULL) {
|
---|
| 357 | p = p->next;
|
---|
| 358 | }
|
---|
| 359 | p->next = pat1;
|
---|
| 360 | }
|
---|
| 361 | }
|
---|
| 362 |
|
---|
| 363 | return ret;
|
---|
| 364 | }
|
---|
| 365 |
|
---|
| 366 | static inline mrb_int
|
---|
| 367 | tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch)
|
---|
| 368 | {
|
---|
| 369 | mrb_int ret = -1;
|
---|
| 370 | mrb_int n_sum = 0;
|
---|
| 371 | mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
|
---|
| 372 |
|
---|
| 373 | while (pat != NULL) {
|
---|
| 374 | if (pat->type == TR_IN_ORDER) {
|
---|
| 375 | int i;
|
---|
| 376 | for (i = 0; i < pat->n; i++) {
|
---|
| 377 | if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i;
|
---|
| 378 | }
|
---|
| 379 | }
|
---|
| 380 | else if (pat->type == TR_RANGE) {
|
---|
| 381 | if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1])
|
---|
| 382 | ret = n_sum + ch - pat->val.ch[0];
|
---|
| 383 | }
|
---|
| 384 | else {
|
---|
| 385 | mrb_assert(pat->type == TR_UNINITIALIZED);
|
---|
| 386 | }
|
---|
| 387 | n_sum += pat->n;
|
---|
| 388 | pat = pat->next;
|
---|
| 389 | }
|
---|
| 390 |
|
---|
| 391 | if (flag_reverse) {
|
---|
| 392 | return (ret < 0) ? MRB_INT_MAX : -1;
|
---|
| 393 | }
|
---|
| 394 | return ret;
|
---|
| 395 | }
|
---|
| 396 |
|
---|
| 397 | static inline mrb_int
|
---|
| 398 | tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th)
|
---|
| 399 | {
|
---|
| 400 | mrb_int n_sum = 0;
|
---|
| 401 |
|
---|
| 402 | while (pat != NULL) {
|
---|
| 403 | if (n_th < (n_sum + pat->n)) {
|
---|
| 404 | mrb_int i = (n_th - n_sum);
|
---|
| 405 |
|
---|
| 406 | switch (pat->type) {
|
---|
| 407 | case TR_IN_ORDER:
|
---|
| 408 | return pat_str[pat->val.start_pos + i];
|
---|
| 409 | case TR_RANGE:
|
---|
| 410 | return pat->val.ch[0]+i;
|
---|
| 411 | case TR_UNINITIALIZED:
|
---|
| 412 | return -1;
|
---|
| 413 | }
|
---|
| 414 | }
|
---|
| 415 | if (pat->next == NULL) {
|
---|
| 416 | switch (pat->type) {
|
---|
| 417 | case TR_IN_ORDER:
|
---|
| 418 | return pat_str[pat->val.start_pos + pat->n - 1];
|
---|
| 419 | case TR_RANGE:
|
---|
| 420 | return pat->val.ch[1];
|
---|
| 421 | case TR_UNINITIALIZED:
|
---|
| 422 | return -1;
|
---|
| 423 | }
|
---|
| 424 | }
|
---|
| 425 | n_sum += pat->n;
|
---|
| 426 | pat = pat->next;
|
---|
| 427 | }
|
---|
| 428 |
|
---|
| 429 | return -1;
|
---|
| 430 | }
|
---|
| 431 |
|
---|
| 432 | static inline void
|
---|
| 433 | tr_bitmap_set(uint8_t bitmap[32], uint8_t ch)
|
---|
| 434 | {
|
---|
| 435 | uint8_t idx1 = ch / 8;
|
---|
| 436 | uint8_t idx2 = ch % 8;
|
---|
| 437 | bitmap[idx1] |= (1<<idx2);
|
---|
| 438 | }
|
---|
| 439 |
|
---|
| 440 | static inline mrb_bool
|
---|
| 441 | tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch)
|
---|
| 442 | {
|
---|
| 443 | uint8_t idx1 = ch / 8;
|
---|
| 444 | uint8_t idx2 = ch % 8;
|
---|
| 445 | if (bitmap[idx1] & (1<<idx2))
|
---|
| 446 | return TRUE;
|
---|
| 447 | return FALSE;
|
---|
| 448 | }
|
---|
| 449 |
|
---|
| 450 | /* compile patter to bitmap */
|
---|
| 451 | static void
|
---|
| 452 | tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32])
|
---|
| 453 | {
|
---|
| 454 | const char *pattern = RSTRING_PTR(pstr);
|
---|
| 455 | mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
|
---|
| 456 | int i;
|
---|
| 457 |
|
---|
| 458 | for (i=0; i<32; i++) {
|
---|
| 459 | bitmap[i] = 0;
|
---|
| 460 | }
|
---|
| 461 | while (pat != NULL) {
|
---|
| 462 | if (pat->type == TR_IN_ORDER) {
|
---|
| 463 | for (i = 0; i < pat->n; i++) {
|
---|
| 464 | tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]);
|
---|
| 465 | }
|
---|
| 466 | }
|
---|
| 467 | else if (pat->type == TR_RANGE) {
|
---|
| 468 | for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) {
|
---|
| 469 | tr_bitmap_set(bitmap, i);
|
---|
| 470 | }
|
---|
| 471 | }
|
---|
| 472 | else {
|
---|
| 473 | mrb_assert(pat->type == TR_UNINITIALIZED);
|
---|
| 474 | }
|
---|
| 475 | pat = pat->next;
|
---|
| 476 | }
|
---|
| 477 |
|
---|
| 478 | if (flag_reverse) {
|
---|
| 479 | for (i=0; i<32; i++) {
|
---|
| 480 | bitmap[i] ^= 0xff;
|
---|
| 481 | }
|
---|
| 482 | }
|
---|
| 483 | }
|
---|
| 484 |
|
---|
| 485 | static mrb_bool
|
---|
| 486 | str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze)
|
---|
| 487 | {
|
---|
| 488 | struct tr_pattern pat = STATIC_TR_PATTERN;
|
---|
| 489 | struct tr_pattern rep_storage = STATIC_TR_PATTERN;
|
---|
| 490 | char *s;
|
---|
| 491 | mrb_int len;
|
---|
| 492 | mrb_int i;
|
---|
| 493 | mrb_int j;
|
---|
| 494 | mrb_bool flag_changed = FALSE;
|
---|
| 495 | mrb_int lastch = -1;
|
---|
| 496 | struct tr_pattern *rep;
|
---|
| 497 |
|
---|
| 498 | mrb_str_modify(mrb, mrb_str_ptr(str));
|
---|
| 499 | tr_parse_pattern(mrb, &pat, p1, TRUE);
|
---|
| 500 | rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE);
|
---|
| 501 | s = RSTRING_PTR(str);
|
---|
| 502 | len = RSTRING_LEN(str);
|
---|
| 503 |
|
---|
| 504 | for (i=j=0; i<len; i++,j++) {
|
---|
| 505 | mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]);
|
---|
| 506 |
|
---|
| 507 | if (i>j) s[j] = s[i];
|
---|
| 508 | if (n >= 0) {
|
---|
| 509 | flag_changed = TRUE;
|
---|
| 510 | if (rep == NULL) {
|
---|
| 511 | j--;
|
---|
| 512 | }
|
---|
| 513 | else {
|
---|
| 514 | mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n);
|
---|
| 515 |
|
---|
| 516 | if (c < 0 || (squeeze && c == lastch)) {
|
---|
| 517 | j--;
|
---|
| 518 | continue;
|
---|
| 519 | }
|
---|
| 520 | if (c > 0x80) {
|
---|
| 521 | mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%i) out of range", c);
|
---|
| 522 | }
|
---|
| 523 | lastch = c;
|
---|
| 524 | s[i] = (char)c;
|
---|
| 525 | }
|
---|
| 526 | }
|
---|
| 527 | }
|
---|
| 528 |
|
---|
| 529 | tr_free_pattern(mrb, &pat);
|
---|
| 530 | tr_free_pattern(mrb, rep);
|
---|
| 531 |
|
---|
| 532 | if (flag_changed) {
|
---|
| 533 | RSTR_SET_LEN(RSTRING(str), j);
|
---|
| 534 | RSTRING_PTR(str)[j] = 0;
|
---|
| 535 | }
|
---|
| 536 | return flag_changed;
|
---|
| 537 | }
|
---|
| 538 |
|
---|
| 539 | /*
|
---|
| 540 | * call-seq:
|
---|
| 541 | * str.tr(from_str, to_str) => new_str
|
---|
| 542 | *
|
---|
| 543 | * Returns a copy of str with the characters in from_str replaced by the
|
---|
| 544 | * corresponding characters in to_str. If to_str is shorter than from_str,
|
---|
| 545 | * it is padded with its last character in order to maintain the
|
---|
| 546 | * correspondence.
|
---|
| 547 | *
|
---|
| 548 | * "hello".tr('el', 'ip') #=> "hippo"
|
---|
| 549 | * "hello".tr('aeiou', '*') #=> "h*ll*"
|
---|
| 550 | * "hello".tr('aeiou', 'AA*') #=> "hAll*"
|
---|
| 551 | *
|
---|
| 552 | * Both strings may use the c1-c2 notation to denote ranges of characters,
|
---|
| 553 | * and from_str may start with a ^, which denotes all characters except
|
---|
| 554 | * those listed.
|
---|
| 555 | *
|
---|
| 556 | * "hello".tr('a-y', 'b-z') #=> "ifmmp"
|
---|
| 557 | * "hello".tr('^aeiou', '*') #=> "*e**o"
|
---|
| 558 | *
|
---|
| 559 | * The backslash character \ can be used to escape ^ or - and is otherwise
|
---|
| 560 | * ignored unless it appears at the end of a range or the end of the
|
---|
| 561 | * from_str or to_str:
|
---|
| 562 | *
|
---|
| 563 | *
|
---|
| 564 | * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
|
---|
| 565 | * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld"
|
---|
| 566 | *
|
---|
| 567 | * "hello\r\nworld".tr("\r", "") #=> "hello\nworld"
|
---|
| 568 | * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold"
|
---|
| 569 | * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
|
---|
| 570 | *
|
---|
| 571 | * "X['\\b']".tr("X\\", "") #=> "['b']"
|
---|
| 572 | * "X['\\b']".tr("X-\\]", "") #=> "'b'"
|
---|
| 573 | *
|
---|
| 574 | * Note: conversion is effective only in ASCII region.
|
---|
| 575 | */
|
---|
[270] | 576 | static mrb_value
|
---|
[439] | 577 | mrb_str_tr(mrb_state *mrb, mrb_value str)
|
---|
[270] | 578 | {
|
---|
[439] | 579 | mrb_value dup;
|
---|
| 580 | mrb_value p1, p2;
|
---|
| 581 |
|
---|
| 582 | mrb_get_args(mrb, "SS", &p1, &p2);
|
---|
| 583 | dup = mrb_str_dup(mrb, str);
|
---|
| 584 | str_tr(mrb, dup, p1, p2, FALSE);
|
---|
| 585 | return dup;
|
---|
[270] | 586 | }
|
---|
| 587 |
|
---|
[439] | 588 | /*
|
---|
| 589 | * call-seq:
|
---|
| 590 | * str.tr!(from_str, to_str) -> str or nil
|
---|
| 591 | *
|
---|
| 592 | * Translates str in place, using the same rules as String#tr.
|
---|
| 593 | * Returns str, or nil if no changes were made.
|
---|
| 594 | */
|
---|
[270] | 595 | static mrb_value
|
---|
[439] | 596 | mrb_str_tr_bang(mrb_state *mrb, mrb_value str)
|
---|
[270] | 597 | {
|
---|
[439] | 598 | mrb_value p1, p2;
|
---|
| 599 |
|
---|
| 600 | mrb_get_args(mrb, "SS", &p1, &p2);
|
---|
| 601 | if (str_tr(mrb, str, p1, p2, FALSE)) {
|
---|
| 602 | return str;
|
---|
| 603 | }
|
---|
| 604 | return mrb_nil_value();
|
---|
[270] | 605 | }
|
---|
| 606 |
|
---|
| 607 | /*
|
---|
[439] | 608 | * call-seq:
|
---|
| 609 | * str.tr_s(from_str, to_str) -> new_str
|
---|
[270] | 610 | *
|
---|
[439] | 611 | * Processes a copy of str as described under String#tr, then removes
|
---|
| 612 | * duplicate characters in regions that were affected by the translation.
|
---|
[270] | 613 | *
|
---|
[439] | 614 | * "hello".tr_s('l', 'r') #=> "hero"
|
---|
| 615 | * "hello".tr_s('el', '*') #=> "h*o"
|
---|
| 616 | * "hello".tr_s('el', 'hx') #=> "hhxo"
|
---|
[270] | 617 | */
|
---|
| 618 | static mrb_value
|
---|
[439] | 619 | mrb_str_tr_s(mrb_state *mrb, mrb_value str)
|
---|
[270] | 620 | {
|
---|
[439] | 621 | mrb_value dup;
|
---|
| 622 | mrb_value p1, p2;
|
---|
| 623 |
|
---|
| 624 | mrb_get_args(mrb, "SS", &p1, &p2);
|
---|
| 625 | dup = mrb_str_dup(mrb, str);
|
---|
| 626 | str_tr(mrb, dup, p1, p2, TRUE);
|
---|
| 627 | return dup;
|
---|
[270] | 628 | }
|
---|
| 629 |
|
---|
[439] | 630 | /*
|
---|
| 631 | * call-seq:
|
---|
| 632 | * str.tr_s!(from_str, to_str) -> str or nil
|
---|
| 633 | *
|
---|
| 634 | * Performs String#tr_s processing on str in place, returning
|
---|
| 635 | * str, or nil if no changes were made.
|
---|
| 636 | */
|
---|
[270] | 637 | static mrb_value
|
---|
[439] | 638 | mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str)
|
---|
[270] | 639 | {
|
---|
[439] | 640 | mrb_value p1, p2;
|
---|
| 641 |
|
---|
| 642 | mrb_get_args(mrb, "SS", &p1, &p2);
|
---|
| 643 | if (str_tr(mrb, str, p1, p2, TRUE)) {
|
---|
| 644 | return str;
|
---|
| 645 | }
|
---|
| 646 | return mrb_nil_value();
|
---|
| 647 | }
|
---|
| 648 |
|
---|
| 649 | static mrb_bool
|
---|
| 650 | str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat)
|
---|
| 651 | {
|
---|
| 652 | struct tr_pattern pat_storage = STATIC_TR_PATTERN;
|
---|
| 653 | struct tr_pattern *pat = NULL;
|
---|
| 654 | mrb_int i, j;
|
---|
| 655 | char *s;
|
---|
[270] | 656 | mrb_int len;
|
---|
[439] | 657 | mrb_bool flag_changed = FALSE;
|
---|
| 658 | mrb_int lastch = -1;
|
---|
| 659 | uint8_t bitmap[32];
|
---|
[270] | 660 |
|
---|
[439] | 661 | mrb_str_modify(mrb, mrb_str_ptr(str));
|
---|
| 662 | if (!mrb_nil_p(v_pat)) {
|
---|
| 663 | pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE);
|
---|
| 664 | tr_compile_pattern(pat, v_pat, bitmap);
|
---|
| 665 | tr_free_pattern(mrb, pat);
|
---|
[270] | 666 | }
|
---|
[439] | 667 | s = RSTRING_PTR(str);
|
---|
| 668 | len = RSTRING_LEN(str);
|
---|
| 669 |
|
---|
| 670 | if (pat) {
|
---|
| 671 | for (i=j=0; i<len; i++,j++) {
|
---|
| 672 | if (i>j) s[j] = s[i];
|
---|
| 673 | if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) {
|
---|
| 674 | flag_changed = TRUE;
|
---|
| 675 | j--;
|
---|
| 676 | }
|
---|
| 677 | lastch = s[i];
|
---|
| 678 | }
|
---|
[270] | 679 | }
|
---|
| 680 | else {
|
---|
[439] | 681 | for (i=j=0; i<len; i++,j++) {
|
---|
| 682 | if (i>j) s[j] = s[i];
|
---|
| 683 | if (s[i] >= 0 && s[i] == lastch) {
|
---|
| 684 | flag_changed = TRUE;
|
---|
| 685 | j--;
|
---|
| 686 | }
|
---|
| 687 | lastch = s[i];
|
---|
| 688 | }
|
---|
[270] | 689 | }
|
---|
| 690 |
|
---|
[439] | 691 | if (flag_changed) {
|
---|
| 692 | RSTR_SET_LEN(RSTRING(str), j);
|
---|
| 693 | RSTRING_PTR(str)[j] = 0;
|
---|
[270] | 694 | }
|
---|
[439] | 695 | return flag_changed;
|
---|
[270] | 696 | }
|
---|
| 697 |
|
---|
| 698 | /*
|
---|
[439] | 699 | * call-seq:
|
---|
| 700 | * str.squeeze([other_str]) -> new_str
|
---|
[270] | 701 | *
|
---|
[439] | 702 | * Builds a set of characters from the other_str
|
---|
| 703 | * parameter(s) using the procedure described for String#count. Returns a
|
---|
| 704 | * new string where runs of the same character that occur in this set are
|
---|
| 705 | * replaced by a single character. If no arguments are given, all runs of
|
---|
| 706 | * identical characters are replaced by a single character.
|
---|
[270] | 707 | *
|
---|
[439] | 708 | * "yellow moon".squeeze #=> "yelow mon"
|
---|
| 709 | * " now is the".squeeze(" ") #=> " now is the"
|
---|
| 710 | * "putters shoot balls".squeeze("m-z") #=> "puters shot balls"
|
---|
[270] | 711 | */
|
---|
| 712 | static mrb_value
|
---|
[439] | 713 | mrb_str_squeeze(mrb_state *mrb, mrb_value str)
|
---|
[270] | 714 | {
|
---|
[439] | 715 | mrb_value pat = mrb_nil_value();
|
---|
| 716 | mrb_value dup;
|
---|
| 717 |
|
---|
| 718 | mrb_get_args(mrb, "|S", &pat);
|
---|
| 719 | dup = mrb_str_dup(mrb, str);
|
---|
| 720 | str_squeeze(mrb, dup, pat);
|
---|
| 721 | return dup;
|
---|
| 722 | }
|
---|
| 723 |
|
---|
| 724 | /*
|
---|
| 725 | * call-seq:
|
---|
| 726 | * str.squeeze!([other_str]) -> str or nil
|
---|
| 727 | *
|
---|
| 728 | * Squeezes str in place, returning either str, or nil if no
|
---|
| 729 | * changes were made.
|
---|
| 730 | */
|
---|
| 731 | static mrb_value
|
---|
| 732 | mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str)
|
---|
| 733 | {
|
---|
| 734 | mrb_value pat = mrb_nil_value();
|
---|
| 735 |
|
---|
| 736 | mrb_get_args(mrb, "|S", &pat);
|
---|
| 737 | if (str_squeeze(mrb, str, pat)) {
|
---|
| 738 | return str;
|
---|
| 739 | }
|
---|
| 740 | return mrb_nil_value();
|
---|
| 741 | }
|
---|
| 742 |
|
---|
| 743 | static mrb_bool
|
---|
| 744 | str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat)
|
---|
| 745 | {
|
---|
| 746 | struct tr_pattern pat = STATIC_TR_PATTERN;
|
---|
| 747 | mrb_int i, j;
|
---|
| 748 | char *s;
|
---|
[270] | 749 | mrb_int len;
|
---|
[439] | 750 | mrb_bool flag_changed = FALSE;
|
---|
| 751 | uint8_t bitmap[32];
|
---|
[270] | 752 |
|
---|
[439] | 753 | mrb_str_modify(mrb, mrb_str_ptr(str));
|
---|
| 754 | tr_parse_pattern(mrb, &pat, v_pat, TRUE);
|
---|
| 755 | tr_compile_pattern(&pat, v_pat, bitmap);
|
---|
| 756 | tr_free_pattern(mrb, &pat);
|
---|
[270] | 757 |
|
---|
[439] | 758 | s = RSTRING_PTR(str);
|
---|
| 759 | len = RSTRING_LEN(str);
|
---|
| 760 |
|
---|
| 761 | for (i=j=0; i<len; i++,j++) {
|
---|
| 762 | if (i>j) s[j] = s[i];
|
---|
| 763 | if (tr_bitmap_detect(bitmap, s[i])) {
|
---|
| 764 | flag_changed = TRUE;
|
---|
| 765 | j--;
|
---|
[270] | 766 | }
|
---|
| 767 | }
|
---|
[439] | 768 | if (flag_changed) {
|
---|
| 769 | RSTR_SET_LEN(RSTRING(str), j);
|
---|
| 770 | RSTRING_PTR(str)[j] = 0;
|
---|
[270] | 771 | }
|
---|
[439] | 772 | return flag_changed;
|
---|
[270] | 773 | }
|
---|
| 774 |
|
---|
[439] | 775 | static mrb_value
|
---|
| 776 | mrb_str_delete(mrb_state *mrb, mrb_value str)
|
---|
| 777 | {
|
---|
| 778 | mrb_value pat;
|
---|
| 779 | mrb_value dup;
|
---|
| 780 |
|
---|
| 781 | mrb_get_args(mrb, "S", &pat);
|
---|
| 782 | dup = mrb_str_dup(mrb, str);
|
---|
| 783 | str_delete(mrb, dup, pat);
|
---|
| 784 | return dup;
|
---|
| 785 | }
|
---|
| 786 |
|
---|
| 787 | static mrb_value
|
---|
| 788 | mrb_str_delete_bang(mrb_state *mrb, mrb_value str)
|
---|
| 789 | {
|
---|
| 790 | mrb_value pat;
|
---|
| 791 |
|
---|
| 792 | mrb_get_args(mrb, "S", &pat);
|
---|
| 793 | if (str_delete(mrb, str, pat)) {
|
---|
| 794 | return str;
|
---|
| 795 | }
|
---|
| 796 | return mrb_nil_value();
|
---|
| 797 | }
|
---|
| 798 |
|
---|
[270] | 799 | /*
|
---|
[439] | 800 | * call_seq:
|
---|
| 801 | * str.count([other_str]) -> integer
|
---|
| 802 | *
|
---|
| 803 | * Each other_str parameter defines a set of characters to count. The
|
---|
| 804 | * intersection of these sets defines the characters to count in str. Any
|
---|
| 805 | * other_str that starts with a caret ^ is negated. The sequence c1-c2
|
---|
| 806 | * means all characters between c1 and c2. The backslash character \ can
|
---|
| 807 | * be used to escape ^ or - and is otherwise ignored unless it appears at
|
---|
| 808 | * the end of a sequence or the end of a other_str.
|
---|
| 809 | */
|
---|
| 810 | static mrb_value
|
---|
| 811 | mrb_str_count(mrb_state *mrb, mrb_value str)
|
---|
| 812 | {
|
---|
| 813 | mrb_value v_pat = mrb_nil_value();
|
---|
| 814 | mrb_int i;
|
---|
| 815 | char *s;
|
---|
| 816 | mrb_int len;
|
---|
| 817 | mrb_int count = 0;
|
---|
| 818 | struct tr_pattern pat = STATIC_TR_PATTERN;
|
---|
| 819 | uint8_t bitmap[32];
|
---|
| 820 |
|
---|
| 821 | mrb_get_args(mrb, "S", &v_pat);
|
---|
| 822 | tr_parse_pattern(mrb, &pat, v_pat, TRUE);
|
---|
| 823 | tr_compile_pattern(&pat, v_pat, bitmap);
|
---|
| 824 | tr_free_pattern(mrb, &pat);
|
---|
| 825 |
|
---|
| 826 | s = RSTRING_PTR(str);
|
---|
| 827 | len = RSTRING_LEN(str);
|
---|
| 828 | for (i = 0; i < len; i++) {
|
---|
| 829 | if (tr_bitmap_detect(bitmap, s[i])) count++;
|
---|
| 830 | }
|
---|
| 831 | return mrb_fixnum_value(count);
|
---|
| 832 | }
|
---|
| 833 |
|
---|
| 834 | static mrb_value
|
---|
| 835 | mrb_str_hex(mrb_state *mrb, mrb_value self)
|
---|
| 836 | {
|
---|
| 837 | return mrb_str_to_inum(mrb, self, 16, FALSE);
|
---|
| 838 | }
|
---|
| 839 |
|
---|
| 840 | static mrb_value
|
---|
| 841 | mrb_str_oct(mrb_state *mrb, mrb_value self)
|
---|
| 842 | {
|
---|
| 843 | return mrb_str_to_inum(mrb, self, 8, FALSE);
|
---|
| 844 | }
|
---|
| 845 |
|
---|
| 846 | /*
|
---|
[270] | 847 | * call-seq:
|
---|
[439] | 848 | * string.chr -> string
|
---|
| 849 | *
|
---|
| 850 | * Returns a one-character string at the beginning of the string.
|
---|
| 851 | *
|
---|
| 852 | * a = "abcde"
|
---|
| 853 | * a.chr #=> "a"
|
---|
| 854 | */
|
---|
| 855 | static mrb_value
|
---|
| 856 | mrb_str_chr(mrb_state *mrb, mrb_value self)
|
---|
| 857 | {
|
---|
| 858 | return mrb_str_substr(mrb, self, 0, 1);
|
---|
| 859 | }
|
---|
| 860 |
|
---|
| 861 | /*
|
---|
| 862 | * call-seq:
|
---|
| 863 | * int.chr([encoding]) -> string
|
---|
| 864 | *
|
---|
| 865 | * Returns a string containing the character represented by the +int+'s value
|
---|
| 866 | * according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only
|
---|
| 867 | * with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is
|
---|
| 868 | * +"ASCII-8BIT"+).
|
---|
| 869 | *
|
---|
| 870 | * 65.chr #=> "A"
|
---|
| 871 | * 230.chr #=> "\xE6"
|
---|
| 872 | * 230.chr("ASCII-8BIT") #=> "\xE6"
|
---|
| 873 | * 230.chr("UTF-8") #=> "\u00E6"
|
---|
| 874 | */
|
---|
| 875 | static mrb_value
|
---|
| 876 | mrb_int_chr(mrb_state *mrb, mrb_value num)
|
---|
| 877 | {
|
---|
| 878 | mrb_value enc;
|
---|
| 879 | mrb_bool enc_given;
|
---|
| 880 |
|
---|
| 881 | mrb_get_args(mrb, "|S?", &enc, &enc_given);
|
---|
| 882 | if (!enc_given ||
|
---|
| 883 | ENC_COMP_P(enc, ENC_ASCII_8BIT) ||
|
---|
| 884 | ENC_COMP_P(enc, ENC_BINARY)) {
|
---|
| 885 | return int_chr_binary(mrb, num);
|
---|
| 886 | }
|
---|
| 887 | #ifdef MRB_UTF8_STRING
|
---|
| 888 | else if (ENC_COMP_P(enc, ENC_UTF8)) {
|
---|
| 889 | return int_chr_utf8(mrb, num);
|
---|
| 890 | }
|
---|
| 891 | #endif
|
---|
| 892 | else {
|
---|
| 893 | mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc);
|
---|
| 894 | }
|
---|
| 895 | /* not reached */
|
---|
| 896 | return mrb_nil_value();
|
---|
| 897 | }
|
---|
| 898 |
|
---|
| 899 | /*
|
---|
| 900 | * call-seq:
|
---|
[270] | 901 | * string.succ -> string
|
---|
| 902 | *
|
---|
| 903 | * Returns next sequence of the string;
|
---|
| 904 | *
|
---|
| 905 | * a = "abc"
|
---|
| 906 | * a.succ #=> "abd"
|
---|
| 907 | */
|
---|
| 908 | static mrb_value
|
---|
| 909 | mrb_str_succ_bang(mrb_state *mrb, mrb_value self)
|
---|
| 910 | {
|
---|
| 911 | mrb_value result;
|
---|
| 912 | unsigned char *p, *e, *b, *t;
|
---|
| 913 | const char *prepend;
|
---|
| 914 | struct RString *s = mrb_str_ptr(self);
|
---|
[331] | 915 | mrb_int l;
|
---|
[270] | 916 |
|
---|
| 917 | if (RSTRING_LEN(self) == 0)
|
---|
| 918 | return self;
|
---|
| 919 |
|
---|
| 920 | mrb_str_modify(mrb, s);
|
---|
| 921 | l = RSTRING_LEN(self);
|
---|
| 922 | b = p = (unsigned char*) RSTRING_PTR(self);
|
---|
| 923 | t = e = p + l;
|
---|
| 924 | *(e--) = 0;
|
---|
| 925 |
|
---|
| 926 | // find trailing ascii/number
|
---|
| 927 | while (e >= b) {
|
---|
| 928 | if (ISALNUM(*e))
|
---|
| 929 | break;
|
---|
| 930 | e--;
|
---|
| 931 | }
|
---|
| 932 | if (e < b) {
|
---|
| 933 | e = p + l - 1;
|
---|
| 934 | result = mrb_str_new_lit(mrb, "");
|
---|
[331] | 935 | }
|
---|
| 936 | else {
|
---|
[270] | 937 | // find leading letter of the ascii/number
|
---|
| 938 | b = e;
|
---|
| 939 | while (b > p) {
|
---|
| 940 | if (!ISALNUM(*b) || (ISALNUM(*b) && *b != '9' && *b != 'z' && *b != 'Z'))
|
---|
| 941 | break;
|
---|
| 942 | b--;
|
---|
| 943 | }
|
---|
| 944 | if (!ISALNUM(*b))
|
---|
| 945 | b++;
|
---|
| 946 | result = mrb_str_new(mrb, (char*) p, b - p);
|
---|
| 947 | }
|
---|
| 948 |
|
---|
| 949 | while (e >= b) {
|
---|
| 950 | if (!ISALNUM(*e)) {
|
---|
| 951 | if (*e == 0xff) {
|
---|
| 952 | mrb_str_cat_lit(mrb, result, "\x01");
|
---|
| 953 | (*e) = 0;
|
---|
[331] | 954 | }
|
---|
| 955 | else
|
---|
[270] | 956 | (*e)++;
|
---|
| 957 | break;
|
---|
| 958 | }
|
---|
| 959 | prepend = NULL;
|
---|
| 960 | if (*e == '9') {
|
---|
| 961 | if (e == b) prepend = "1";
|
---|
| 962 | *e = '0';
|
---|
[331] | 963 | }
|
---|
| 964 | else if (*e == 'z') {
|
---|
[270] | 965 | if (e == b) prepend = "a";
|
---|
| 966 | *e = 'a';
|
---|
[331] | 967 | }
|
---|
| 968 | else if (*e == 'Z') {
|
---|
[270] | 969 | if (e == b) prepend = "A";
|
---|
| 970 | *e = 'A';
|
---|
[331] | 971 | }
|
---|
| 972 | else {
|
---|
[270] | 973 | (*e)++;
|
---|
| 974 | break;
|
---|
| 975 | }
|
---|
| 976 | if (prepend) mrb_str_cat_cstr(mrb, result, prepend);
|
---|
| 977 | e--;
|
---|
| 978 | }
|
---|
| 979 | result = mrb_str_cat(mrb, result, (char*) b, t - b);
|
---|
| 980 | l = RSTRING_LEN(result);
|
---|
| 981 | mrb_str_resize(mrb, self, l);
|
---|
| 982 | memcpy(RSTRING_PTR(self), RSTRING_PTR(result), l);
|
---|
| 983 | return self;
|
---|
| 984 | }
|
---|
| 985 |
|
---|
| 986 | static mrb_value
|
---|
| 987 | mrb_str_succ(mrb_state *mrb, mrb_value self)
|
---|
| 988 | {
|
---|
| 989 | mrb_value str;
|
---|
| 990 |
|
---|
| 991 | str = mrb_str_dup(mrb, self);
|
---|
| 992 | mrb_str_succ_bang(mrb, str);
|
---|
| 993 | return str;
|
---|
| 994 | }
|
---|
| 995 |
|
---|
| 996 | #ifdef MRB_UTF8_STRING
|
---|
| 997 | static const char utf8len_codepage_zero[256] =
|
---|
| 998 | {
|
---|
| 999 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 1000 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 1001 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 1002 | 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
---|
| 1003 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
---|
| 1004 | 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
---|
| 1005 | 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
---|
| 1006 | 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0,
|
---|
| 1007 | };
|
---|
| 1008 |
|
---|
| 1009 | static mrb_int
|
---|
| 1010 | utf8code(unsigned char* p)
|
---|
| 1011 | {
|
---|
| 1012 | mrb_int len;
|
---|
| 1013 |
|
---|
| 1014 | if (p[0] < 0x80)
|
---|
| 1015 | return p[0];
|
---|
| 1016 |
|
---|
| 1017 | len = utf8len_codepage_zero[p[0]];
|
---|
| 1018 | if (len > 1 && (p[1] & 0xc0) == 0x80) {
|
---|
| 1019 | if (len == 2)
|
---|
| 1020 | return ((p[0] & 0x1f) << 6) + (p[1] & 0x3f);
|
---|
| 1021 | if ((p[2] & 0xc0) == 0x80) {
|
---|
| 1022 | if (len == 3)
|
---|
| 1023 | return ((p[0] & 0x0f) << 12) + ((p[1] & 0x3f) << 6)
|
---|
| 1024 | + (p[2] & 0x3f);
|
---|
| 1025 | if ((p[3] & 0xc0) == 0x80) {
|
---|
| 1026 | if (len == 4)
|
---|
| 1027 | return ((p[0] & 0x07) << 18) + ((p[1] & 0x3f) << 12)
|
---|
| 1028 | + ((p[2] & 0x3f) << 6) + (p[3] & 0x3f);
|
---|
| 1029 | if ((p[4] & 0xc0) == 0x80) {
|
---|
| 1030 | if (len == 5)
|
---|
| 1031 | return ((p[0] & 0x03) << 24) + ((p[1] & 0x3f) << 18)
|
---|
| 1032 | + ((p[2] & 0x3f) << 12) + ((p[3] & 0x3f) << 6)
|
---|
| 1033 | + (p[4] & 0x3f);
|
---|
| 1034 | if ((p[5] & 0xc0) == 0x80 && len == 6)
|
---|
| 1035 | return ((p[0] & 0x01) << 30) + ((p[1] & 0x3f) << 24)
|
---|
| 1036 | + ((p[2] & 0x3f) << 18) + ((p[3] & 0x3f) << 12)
|
---|
| 1037 | + ((p[4] & 0x3f) << 6) + (p[5] & 0x3f);
|
---|
| 1038 | }
|
---|
| 1039 | }
|
---|
| 1040 | }
|
---|
| 1041 | }
|
---|
| 1042 | return p[0];
|
---|
| 1043 | }
|
---|
| 1044 |
|
---|
| 1045 | static mrb_value
|
---|
| 1046 | mrb_str_ord(mrb_state* mrb, mrb_value str)
|
---|
| 1047 | {
|
---|
| 1048 | if (RSTRING_LEN(str) == 0)
|
---|
| 1049 | mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
|
---|
| 1050 | return mrb_fixnum_value(utf8code((unsigned char*) RSTRING_PTR(str)));
|
---|
| 1051 | }
|
---|
| 1052 | #else
|
---|
| 1053 | static mrb_value
|
---|
| 1054 | mrb_str_ord(mrb_state* mrb, mrb_value str)
|
---|
| 1055 | {
|
---|
| 1056 | if (RSTRING_LEN(str) == 0)
|
---|
| 1057 | mrb_raise(mrb, E_ARGUMENT_ERROR, "empty string");
|
---|
[331] | 1058 | return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[0]);
|
---|
[270] | 1059 | }
|
---|
| 1060 | #endif
|
---|
| 1061 |
|
---|
[439] | 1062 | /*
|
---|
| 1063 | * call-seq:
|
---|
| 1064 | * str.delete_prefix!(prefix) -> self or nil
|
---|
| 1065 | *
|
---|
| 1066 | * Deletes leading <code>prefix</code> from <i>str</i>, returning
|
---|
| 1067 | * <code>nil</code> if no change was made.
|
---|
| 1068 | *
|
---|
| 1069 | * "hello".delete_prefix!("hel") #=> "lo"
|
---|
| 1070 | * "hello".delete_prefix!("llo") #=> nil
|
---|
| 1071 | */
|
---|
| 1072 | static mrb_value
|
---|
| 1073 | mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self)
|
---|
[331] | 1074 | {
|
---|
[439] | 1075 | mrb_int plen, slen;
|
---|
| 1076 | char *ptr, *s;
|
---|
| 1077 | struct RString *str = RSTRING(self);
|
---|
| 1078 |
|
---|
| 1079 | mrb_get_args(mrb, "s", &ptr, &plen);
|
---|
| 1080 | slen = RSTR_LEN(str);
|
---|
| 1081 | if (plen > slen) return mrb_nil_value();
|
---|
| 1082 | s = RSTR_PTR(str);
|
---|
| 1083 | if (memcmp(s, ptr, plen) != 0) return mrb_nil_value();
|
---|
| 1084 | if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
|
---|
| 1085 | str->as.heap.ptr += plen;
|
---|
[331] | 1086 | }
|
---|
[439] | 1087 | else {
|
---|
| 1088 | mrb_str_modify(mrb, str);
|
---|
| 1089 | s = RSTR_PTR(str);
|
---|
| 1090 | memmove(s, s+plen, slen-plen);
|
---|
| 1091 | }
|
---|
| 1092 | RSTR_SET_LEN(str, slen-plen);
|
---|
| 1093 | return self;
|
---|
[331] | 1094 | }
|
---|
| 1095 |
|
---|
| 1096 | /*
|
---|
| 1097 | * call-seq:
|
---|
[439] | 1098 | * str.delete_prefix(prefix) -> new_str
|
---|
[331] | 1099 | *
|
---|
[439] | 1100 | * Returns a copy of <i>str</i> with leading <code>prefix</code> deleted.
|
---|
[331] | 1101 | *
|
---|
[439] | 1102 | * "hello".delete_prefix("hel") #=> "lo"
|
---|
| 1103 | * "hello".delete_prefix("llo") #=> "hello"
|
---|
| 1104 | */
|
---|
| 1105 | static mrb_value
|
---|
| 1106 | mrb_str_del_prefix(mrb_state *mrb, mrb_value self)
|
---|
| 1107 | {
|
---|
| 1108 | mrb_int plen, slen;
|
---|
| 1109 | char *ptr;
|
---|
| 1110 |
|
---|
| 1111 | mrb_get_args(mrb, "s", &ptr, &plen);
|
---|
| 1112 | slen = RSTRING_LEN(self);
|
---|
| 1113 | if (plen > slen) return mrb_str_dup(mrb, self);
|
---|
| 1114 | if (memcmp(RSTRING_PTR(self), ptr, plen) != 0)
|
---|
| 1115 | return mrb_str_dup(mrb, self);
|
---|
| 1116 | return mrb_str_substr(mrb, self, plen, slen-plen);
|
---|
| 1117 | }
|
---|
| 1118 |
|
---|
| 1119 | /*
|
---|
| 1120 | * call-seq:
|
---|
| 1121 | * str.delete_suffix!(suffix) -> self or nil
|
---|
[331] | 1122 | *
|
---|
[439] | 1123 | * Deletes trailing <code>suffix</code> from <i>str</i>, returning
|
---|
| 1124 | * <code>nil</code> if no change was made.
|
---|
[331] | 1125 | *
|
---|
[439] | 1126 | * "hello".delete_suffix!("llo") #=> "he"
|
---|
| 1127 | * "hello".delete_suffix!("hel") #=> nil
|
---|
[331] | 1128 | */
|
---|
| 1129 | static mrb_value
|
---|
[439] | 1130 | mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self)
|
---|
[331] | 1131 | {
|
---|
[439] | 1132 | mrb_int plen, slen;
|
---|
| 1133 | char *ptr, *s;
|
---|
| 1134 | struct RString *str = RSTRING(self);
|
---|
[331] | 1135 |
|
---|
[439] | 1136 | mrb_get_args(mrb, "s", &ptr, &plen);
|
---|
| 1137 | slen = RSTR_LEN(str);
|
---|
| 1138 | if (plen > slen) return mrb_nil_value();
|
---|
| 1139 | s = RSTR_PTR(str);
|
---|
| 1140 | if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value();
|
---|
| 1141 | if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
|
---|
| 1142 | /* no need to modify string */
|
---|
[331] | 1143 | }
|
---|
[439] | 1144 | else {
|
---|
| 1145 | mrb_str_modify(mrb, str);
|
---|
| 1146 | }
|
---|
| 1147 | RSTR_SET_LEN(str, slen-plen);
|
---|
| 1148 | return self;
|
---|
| 1149 | }
|
---|
[331] | 1150 |
|
---|
[439] | 1151 | /*
|
---|
| 1152 | * call-seq:
|
---|
| 1153 | * str.delete_suffix(suffix) -> new_str
|
---|
| 1154 | *
|
---|
| 1155 | * Returns a copy of <i>str</i> with leading <code>suffix</code> deleted.
|
---|
| 1156 | *
|
---|
| 1157 | * "hello".delete_suffix("hel") #=> "lo"
|
---|
| 1158 | * "hello".delete_suffix("llo") #=> "hello"
|
---|
| 1159 | */
|
---|
| 1160 | static mrb_value
|
---|
| 1161 | mrb_str_del_suffix(mrb_state *mrb, mrb_value self)
|
---|
| 1162 | {
|
---|
| 1163 | mrb_int plen, slen;
|
---|
| 1164 | char *ptr;
|
---|
[331] | 1165 |
|
---|
[439] | 1166 | mrb_get_args(mrb, "s", &ptr, &plen);
|
---|
| 1167 | slen = RSTRING_LEN(self);
|
---|
| 1168 | if (plen > slen) return mrb_str_dup(mrb, self);
|
---|
| 1169 | if (memcmp(RSTRING_PTR(self)+slen-plen, ptr, plen) != 0)
|
---|
| 1170 | return mrb_str_dup(mrb, self);
|
---|
| 1171 | return mrb_str_substr(mrb, self, 0, slen-plen);
|
---|
| 1172 | }
|
---|
[331] | 1173 |
|
---|
[439] | 1174 | static mrb_value
|
---|
| 1175 | mrb_str_lines(mrb_state *mrb, mrb_value self)
|
---|
| 1176 | {
|
---|
| 1177 | mrb_value result;
|
---|
| 1178 | int ai;
|
---|
| 1179 | mrb_int len;
|
---|
| 1180 | char *b = RSTRING_PTR(self);
|
---|
| 1181 | char *p = b, *t;
|
---|
| 1182 | char *e = b + RSTRING_LEN(self);
|
---|
[331] | 1183 |
|
---|
[439] | 1184 | result = mrb_ary_new(mrb);
|
---|
| 1185 | ai = mrb_gc_arena_save(mrb);
|
---|
| 1186 | while (p < e) {
|
---|
| 1187 | t = p;
|
---|
| 1188 | while (p < e && *p != '\n') p++;
|
---|
| 1189 | if (*p == '\n') p++;
|
---|
| 1190 | len = (mrb_int) (p - t);
|
---|
| 1191 | mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len));
|
---|
[331] | 1192 | mrb_gc_arena_restore(mrb, ai);
|
---|
| 1193 | }
|
---|
[439] | 1194 | return result;
|
---|
[331] | 1195 | }
|
---|
| 1196 |
|
---|
[270] | 1197 | void
|
---|
| 1198 | mrb_mruby_string_ext_gem_init(mrb_state* mrb)
|
---|
| 1199 | {
|
---|
| 1200 | struct RClass * s = mrb->string_class;
|
---|
| 1201 |
|
---|
| 1202 | mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE());
|
---|
| 1203 | mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE());
|
---|
| 1204 | mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE());
|
---|
[439] | 1205 | mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1));
|
---|
| 1206 | mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1));
|
---|
| 1207 | mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1));
|
---|
| 1208 | mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2));
|
---|
| 1209 | mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2));
|
---|
| 1210 | mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2));
|
---|
| 1211 | mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2));
|
---|
| 1212 | mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1));
|
---|
| 1213 | mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1));
|
---|
| 1214 | mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1));
|
---|
| 1215 | mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1));
|
---|
[270] | 1216 | mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST());
|
---|
| 1217 | mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST());
|
---|
| 1218 | mrb_define_method(mrb, s, "hex", mrb_str_hex, MRB_ARGS_NONE());
|
---|
| 1219 | mrb_define_method(mrb, s, "oct", mrb_str_oct, MRB_ARGS_NONE());
|
---|
| 1220 | mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE());
|
---|
| 1221 | mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE());
|
---|
| 1222 | mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE());
|
---|
[439] | 1223 | mrb_define_method(mrb, s, "next", mrb_str_succ, MRB_ARGS_NONE());
|
---|
| 1224 | mrb_define_method(mrb, s, "next!", mrb_str_succ_bang, MRB_ARGS_NONE());
|
---|
| 1225 | mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE());
|
---|
| 1226 | mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1));
|
---|
| 1227 | mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1));
|
---|
| 1228 | mrb_define_method(mrb, s, "delete_suffix!", mrb_str_del_suffix_bang, MRB_ARGS_REQ(1));
|
---|
| 1229 | mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1));
|
---|
[270] | 1230 |
|
---|
[439] | 1231 | mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE());
|
---|
| 1232 |
|
---|
| 1233 | mrb_define_method(mrb, mrb_module_get(mrb, "Integral"), "chr", mrb_int_chr, MRB_ARGS_OPT(1));
|
---|
[270] | 1234 | }
|
---|
| 1235 |
|
---|
| 1236 | void
|
---|
| 1237 | mrb_mruby_string_ext_gem_final(mrb_state* mrb)
|
---|
| 1238 | {
|
---|
| 1239 | }
|
---|