- Timestamp:
- Jul 9, 2020, 8:51:43 AM (4 years ago)
- Location:
- EcnlProtoTool/trunk/mruby-2.1.1
- Files:
-
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext/src/string.c
r331 r439 6 6 #include <mruby/range.h> 7 7 8 static mrb_value 9 mrb_str_getbyte(mrb_state *mrb, mrb_value str) 10 { 11 mrb_int pos; 12 mrb_get_args(mrb, "i", &pos); 13 14 if (pos < 0) 15 pos += RSTRING_LEN(str); 16 if (pos < 0 || RSTRING_LEN(str) <= pos) 17 return mrb_nil_value(); 18 19 return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]); 20 } 21 22 static mrb_value 23 mrb_str_setbyte(mrb_state *mrb, mrb_value str) 24 { 25 mrb_int pos, byte; 26 long len; 27 28 mrb_get_args(mrb, "ii", &pos, &byte); 29 30 len = RSTRING_LEN(str); 31 if (pos < -len || len <= pos) 32 mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos)); 33 if (pos < 0) 34 pos += len; 35 36 mrb_str_modify(mrb, mrb_str_ptr(str)); 37 byte &= 0xff; 38 RSTRING_PTR(str)[pos] = byte; 39 return mrb_fixnum_value((unsigned char)byte); 40 } 41 42 static mrb_value 43 mrb_str_byteslice(mrb_state *mrb, mrb_value str) 44 { 45 mrb_value a1; 8 #define ENC_ASCII_8BIT "ASCII-8BIT" 9 #define ENC_BINARY "BINARY" 10 #define ENC_UTF8 "UTF-8" 11 12 #define ENC_COMP_P(enc, enc_lit) \ 13 str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1) 14 15 #ifdef MRB_WITHOUT_FLOAT 16 # define mrb_float_p(o) FALSE 17 #endif 18 19 static mrb_bool 20 str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2) 21 { 22 const char *e1, *e2; 23 24 if (len1 != len2) return FALSE; 25 e1 = s1 + len1; 26 e2 = s2 + len2; 27 while (s1 < e1 && s2 < e2) { 28 if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE; 29 ++s1; 30 ++s2; 31 } 32 return TRUE; 33 } 34 35 static mrb_value 36 int_chr_binary(mrb_state *mrb, mrb_value num) 37 { 38 mrb_int cp = mrb_int(mrb, num); 39 char c; 40 mrb_value str; 41 42 if (cp < 0 || 0xff < cp) { 43 mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); 44 } 45 c = (char)cp; 46 str = mrb_str_new(mrb, &c, 1); 47 RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); 48 return str; 49 } 50 51 #ifdef MRB_UTF8_STRING 52 static mrb_value 53 int_chr_utf8(mrb_state *mrb, mrb_value num) 54 { 55 mrb_int cp = mrb_int(mrb, num); 56 char utf8[4]; 46 57 mrb_int len; 47 int argc;48 49 argc = mrb_get_args(mrb, "o|i", &a1, &len); 50 if ( argc == 2) {51 return mrb_str_substr(mrb, str, mrb_fixnum(a1), len);52 } 53 switch (mrb_type(a1)) {54 case MRB_TT_RANGE:55 {56 mrb_int beg;57 58 len = RSTRING_LEN(str);59 switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) {60 case 0: /* not range */61 break;62 case 1: /* range */63 return mrb_str_substr(mrb, str, beg, len);64 case 2: /* out of range */65 mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1);66 break;67 }68 return mrb_nil_value();69 }70 case MRB_TT_FLOAT:71 a1 = mrb_fixnum_value((mrb_int)mrb_float(a1));72 /* fall through */73 case MRB_TT_FIXNUM:74 return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1);75 default:76 mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument");77 }78 /* not reached */79 return mrb_nil_value(); 80 } 58 mrb_value str; 59 uint32_t ascii_flag = 0; 60 61 if (cp < 0 || 0x10FFFF < cp) { 62 mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); 63 } 64 if (cp < 0x80) { 65 utf8[0] = (char)cp; 66 len = 1; 67 ascii_flag = MRB_STR_ASCII; 68 } 69 else if (cp < 0x800) { 70 utf8[0] = (char)(0xC0 | (cp >> 6)); 71 utf8[1] = (char)(0x80 | (cp & 0x3F)); 72 len = 2; 73 } 74 else if (cp < 0x10000) { 75 utf8[0] = (char)(0xE0 | (cp >> 12)); 76 utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); 77 utf8[2] = (char)(0x80 | ( cp & 0x3F)); 78 len = 3; 79 } 80 else { 81 utf8[0] = (char)(0xF0 | (cp >> 18)); 82 utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); 83 utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); 84 utf8[3] = (char)(0x80 | ( cp & 0x3F)); 85 len = 4; 86 } 87 str = mrb_str_new(mrb, utf8, len); 88 mrb_str_ptr(str)->flags |= ascii_flag; 89 return str; 90 } 91 #endif 81 92 82 93 /* … … 135 146 } 136 147 137 static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num);138 139 148 /* 140 149 * call-seq: … … 146 155 * Append---Concatenates the given object to <i>str</i>. If the object is a 147 156 * <code>Integer</code>, it is considered as a codepoint, and is converted 148 * to a character before concatenation. 157 * to a character before concatenation 158 * (equivalent to <code>str.concat(integer.chr(__ENCODING__))</code>). 149 159 * 150 160 * a = "hello " … … 153 163 */ 154 164 static mrb_value 155 mrb_str_concat 2(mrb_state *mrb, mrb_value self)165 mrb_str_concat_m(mrb_state *mrb, mrb_value self) 156 166 { 157 167 mrb_value str; 158 168 159 169 mrb_get_args(mrb, "o", &str); 160 if (mrb_fixnum_p(str)) 161 str = mrb_fixnum_chr(mrb, str); 170 if (mrb_fixnum_p(str) || mrb_float_p(str)) 171 #ifdef MRB_UTF8_STRING 172 str = int_chr_utf8(mrb, str); 173 #else 174 str = int_chr_binary(mrb, str); 175 #endif 162 176 else 163 str = mrb_string_type(mrb, str);164 mrb_str_c oncat(mrb, self, str);177 mrb_ensure_string_type(mrb, str); 178 mrb_str_cat_str(mrb, self, str); 165 179 return self; 166 180 } … … 189 203 size_t len_l, len_r; 190 204 int ai = mrb_gc_arena_save(mrb); 191 sub = mrb_ string_type(mrb, argv[i]);205 sub = mrb_ensure_string_type(mrb, argv[i]); 192 206 mrb_gc_arena_restore(mrb, ai); 193 207 len_l = RSTRING_LEN(self); … … 218 232 size_t len_l, len_r; 219 233 int ai = mrb_gc_arena_save(mrb); 220 sub = mrb_ string_type(mrb, argv[i]);234 sub = mrb_ensure_string_type(mrb, argv[i]); 221 235 mrb_gc_arena_restore(mrb, ai); 222 236 len_l = RSTRING_LEN(self); … … 233 247 } 234 248 249 enum tr_pattern_type { 250 TR_UNINITIALIZED = 0, 251 TR_IN_ORDER = 1, 252 TR_RANGE = 2, 253 }; 254 255 /* 256 #tr Pattern syntax 257 258 <syntax> ::= (<pattern>)* | '^' (<pattern>)* 259 <pattern> ::= <in order> | <range> 260 <in order> ::= (<ch>)+ 261 <range> ::= <ch> '-' <ch> 262 */ 263 struct tr_pattern { 264 uint8_t type; // 1:in-order, 2:range 265 mrb_bool flag_reverse : 1; 266 mrb_bool flag_on_heap : 1; 267 uint16_t n; 268 union { 269 uint16_t start_pos; 270 char ch[2]; 271 } val; 272 struct tr_pattern *next; 273 }; 274 275 #define STATIC_TR_PATTERN { 0 } 276 277 static inline void 278 tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat) 279 { 280 while (pat) { 281 struct tr_pattern *p = pat->next; 282 if (pat->flag_on_heap) { 283 mrb_free(mrb, pat); 284 } 285 pat = p; 286 } 287 } 288 289 static struct tr_pattern* 290 tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable) 291 { 292 const char *pattern = RSTRING_PTR(v_pattern); 293 mrb_int pattern_length = RSTRING_LEN(v_pattern); 294 mrb_bool flag_reverse = FALSE; 295 struct tr_pattern *pat1; 296 mrb_int i = 0; 297 298 if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') { 299 flag_reverse = TRUE; 300 i++; 301 } 302 303 while (i < pattern_length) { 304 /* is range pattern ? */ 305 mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED); 306 pat1 = ret_uninit 307 ? ret 308 : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern)); 309 if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') { 310 if (pat1 == NULL && ret) { 311 nomem: 312 tr_free_pattern(mrb, ret); 313 mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err)); 314 return NULL; /* not reached */ 315 } 316 pat1->type = TR_RANGE; 317 pat1->flag_reverse = flag_reverse; 318 pat1->flag_on_heap = !ret_uninit; 319 pat1->n = pattern[i+2] - pattern[i] + 1; 320 pat1->next = NULL; 321 pat1->val.ch[0] = pattern[i]; 322 pat1->val.ch[1] = pattern[i+2]; 323 i += 3; 324 } 325 else { 326 /* in order pattern. */ 327 mrb_int start_pos = i++; 328 mrb_int len; 329 330 while (i < pattern_length) { 331 if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') 332 break; 333 i++; 334 } 335 336 len = i - start_pos; 337 if (len > UINT16_MAX) { 338 mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65536)"); 339 } 340 if (pat1 == NULL && ret) { 341 goto nomem; 342 } 343 pat1->type = TR_IN_ORDER; 344 pat1->flag_reverse = flag_reverse; 345 pat1->flag_on_heap = !ret_uninit; 346 pat1->n = len; 347 pat1->next = NULL; 348 pat1->val.start_pos = start_pos; 349 } 350 351 if (ret == NULL || ret_uninit) { 352 ret = pat1; 353 } 354 else { 355 struct tr_pattern *p = ret; 356 while (p->next != NULL) { 357 p = p->next; 358 } 359 p->next = pat1; 360 } 361 } 362 363 return ret; 364 } 365 366 static inline mrb_int 367 tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch) 368 { 369 mrb_int ret = -1; 370 mrb_int n_sum = 0; 371 mrb_int flag_reverse = pat ? pat->flag_reverse : 0; 372 373 while (pat != NULL) { 374 if (pat->type == TR_IN_ORDER) { 375 int i; 376 for (i = 0; i < pat->n; i++) { 377 if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i; 378 } 379 } 380 else if (pat->type == TR_RANGE) { 381 if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1]) 382 ret = n_sum + ch - pat->val.ch[0]; 383 } 384 else { 385 mrb_assert(pat->type == TR_UNINITIALIZED); 386 } 387 n_sum += pat->n; 388 pat = pat->next; 389 } 390 391 if (flag_reverse) { 392 return (ret < 0) ? MRB_INT_MAX : -1; 393 } 394 return ret; 395 } 396 397 static inline mrb_int 398 tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th) 399 { 400 mrb_int n_sum = 0; 401 402 while (pat != NULL) { 403 if (n_th < (n_sum + pat->n)) { 404 mrb_int i = (n_th - n_sum); 405 406 switch (pat->type) { 407 case TR_IN_ORDER: 408 return pat_str[pat->val.start_pos + i]; 409 case TR_RANGE: 410 return pat->val.ch[0]+i; 411 case TR_UNINITIALIZED: 412 return -1; 413 } 414 } 415 if (pat->next == NULL) { 416 switch (pat->type) { 417 case TR_IN_ORDER: 418 return pat_str[pat->val.start_pos + pat->n - 1]; 419 case TR_RANGE: 420 return pat->val.ch[1]; 421 case TR_UNINITIALIZED: 422 return -1; 423 } 424 } 425 n_sum += pat->n; 426 pat = pat->next; 427 } 428 429 return -1; 430 } 431 432 static inline void 433 tr_bitmap_set(uint8_t bitmap[32], uint8_t ch) 434 { 435 uint8_t idx1 = ch / 8; 436 uint8_t idx2 = ch % 8; 437 bitmap[idx1] |= (1<<idx2); 438 } 439 440 static inline mrb_bool 441 tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch) 442 { 443 uint8_t idx1 = ch / 8; 444 uint8_t idx2 = ch % 8; 445 if (bitmap[idx1] & (1<<idx2)) 446 return TRUE; 447 return FALSE; 448 } 449 450 /* compile patter to bitmap */ 451 static void 452 tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32]) 453 { 454 const char *pattern = RSTRING_PTR(pstr); 455 mrb_int flag_reverse = pat ? pat->flag_reverse : 0; 456 int i; 457 458 for (i=0; i<32; i++) { 459 bitmap[i] = 0; 460 } 461 while (pat != NULL) { 462 if (pat->type == TR_IN_ORDER) { 463 for (i = 0; i < pat->n; i++) { 464 tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]); 465 } 466 } 467 else if (pat->type == TR_RANGE) { 468 for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) { 469 tr_bitmap_set(bitmap, i); 470 } 471 } 472 else { 473 mrb_assert(pat->type == TR_UNINITIALIZED); 474 } 475 pat = pat->next; 476 } 477 478 if (flag_reverse) { 479 for (i=0; i<32; i++) { 480 bitmap[i] ^= 0xff; 481 } 482 } 483 } 484 485 static mrb_bool 486 str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze) 487 { 488 struct tr_pattern pat = STATIC_TR_PATTERN; 489 struct tr_pattern rep_storage = STATIC_TR_PATTERN; 490 char *s; 491 mrb_int len; 492 mrb_int i; 493 mrb_int j; 494 mrb_bool flag_changed = FALSE; 495 mrb_int lastch = -1; 496 struct tr_pattern *rep; 497 498 mrb_str_modify(mrb, mrb_str_ptr(str)); 499 tr_parse_pattern(mrb, &pat, p1, TRUE); 500 rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE); 501 s = RSTRING_PTR(str); 502 len = RSTRING_LEN(str); 503 504 for (i=j=0; i<len; i++,j++) { 505 mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]); 506 507 if (i>j) s[j] = s[i]; 508 if (n >= 0) { 509 flag_changed = TRUE; 510 if (rep == NULL) { 511 j--; 512 } 513 else { 514 mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n); 515 516 if (c < 0 || (squeeze && c == lastch)) { 517 j--; 518 continue; 519 } 520 if (c > 0x80) { 521 mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%i) out of range", c); 522 } 523 lastch = c; 524 s[i] = (char)c; 525 } 526 } 527 } 528 529 tr_free_pattern(mrb, &pat); 530 tr_free_pattern(mrb, rep); 531 532 if (flag_changed) { 533 RSTR_SET_LEN(RSTRING(str), j); 534 RSTRING_PTR(str)[j] = 0; 535 } 536 return flag_changed; 537 } 538 539 /* 540 * call-seq: 541 * str.tr(from_str, to_str) => new_str 542 * 543 * Returns a copy of str with the characters in from_str replaced by the 544 * corresponding characters in to_str. If to_str is shorter than from_str, 545 * it is padded with its last character in order to maintain the 546 * correspondence. 547 * 548 * "hello".tr('el', 'ip') #=> "hippo" 549 * "hello".tr('aeiou', '*') #=> "h*ll*" 550 * "hello".tr('aeiou', 'AA*') #=> "hAll*" 551 * 552 * Both strings may use the c1-c2 notation to denote ranges of characters, 553 * and from_str may start with a ^, which denotes all characters except 554 * those listed. 555 * 556 * "hello".tr('a-y', 'b-z') #=> "ifmmp" 557 * "hello".tr('^aeiou', '*') #=> "*e**o" 558 * 559 * The backslash character \ can be used to escape ^ or - and is otherwise 560 * ignored unless it appears at the end of a range or the end of the 561 * from_str or to_str: 562 * 563 * 564 * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld" 565 * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld" 566 * 567 * "hello\r\nworld".tr("\r", "") #=> "hello\nworld" 568 * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold" 569 * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld" 570 * 571 * "X['\\b']".tr("X\\", "") #=> "['b']" 572 * "X['\\b']".tr("X-\\]", "") #=> "'b'" 573 * 574 * Note: conversion is effective only in ASCII region. 575 */ 576 static mrb_value 577 mrb_str_tr(mrb_state *mrb, mrb_value str) 578 { 579 mrb_value dup; 580 mrb_value p1, p2; 581 582 mrb_get_args(mrb, "SS", &p1, &p2); 583 dup = mrb_str_dup(mrb, str); 584 str_tr(mrb, dup, p1, p2, FALSE); 585 return dup; 586 } 587 588 /* 589 * call-seq: 590 * str.tr!(from_str, to_str) -> str or nil 591 * 592 * Translates str in place, using the same rules as String#tr. 593 * Returns str, or nil if no changes were made. 594 */ 595 static mrb_value 596 mrb_str_tr_bang(mrb_state *mrb, mrb_value str) 597 { 598 mrb_value p1, p2; 599 600 mrb_get_args(mrb, "SS", &p1, &p2); 601 if (str_tr(mrb, str, p1, p2, FALSE)) { 602 return str; 603 } 604 return mrb_nil_value(); 605 } 606 607 /* 608 * call-seq: 609 * str.tr_s(from_str, to_str) -> new_str 610 * 611 * Processes a copy of str as described under String#tr, then removes 612 * duplicate characters in regions that were affected by the translation. 613 * 614 * "hello".tr_s('l', 'r') #=> "hero" 615 * "hello".tr_s('el', '*') #=> "h*o" 616 * "hello".tr_s('el', 'hx') #=> "hhxo" 617 */ 618 static mrb_value 619 mrb_str_tr_s(mrb_state *mrb, mrb_value str) 620 { 621 mrb_value dup; 622 mrb_value p1, p2; 623 624 mrb_get_args(mrb, "SS", &p1, &p2); 625 dup = mrb_str_dup(mrb, str); 626 str_tr(mrb, dup, p1, p2, TRUE); 627 return dup; 628 } 629 630 /* 631 * call-seq: 632 * str.tr_s!(from_str, to_str) -> str or nil 633 * 634 * Performs String#tr_s processing on str in place, returning 635 * str, or nil if no changes were made. 636 */ 637 static mrb_value 638 mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) 639 { 640 mrb_value p1, p2; 641 642 mrb_get_args(mrb, "SS", &p1, &p2); 643 if (str_tr(mrb, str, p1, p2, TRUE)) { 644 return str; 645 } 646 return mrb_nil_value(); 647 } 648 649 static mrb_bool 650 str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) 651 { 652 struct tr_pattern pat_storage = STATIC_TR_PATTERN; 653 struct tr_pattern *pat = NULL; 654 mrb_int i, j; 655 char *s; 656 mrb_int len; 657 mrb_bool flag_changed = FALSE; 658 mrb_int lastch = -1; 659 uint8_t bitmap[32]; 660 661 mrb_str_modify(mrb, mrb_str_ptr(str)); 662 if (!mrb_nil_p(v_pat)) { 663 pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE); 664 tr_compile_pattern(pat, v_pat, bitmap); 665 tr_free_pattern(mrb, pat); 666 } 667 s = RSTRING_PTR(str); 668 len = RSTRING_LEN(str); 669 670 if (pat) { 671 for (i=j=0; i<len; i++,j++) { 672 if (i>j) s[j] = s[i]; 673 if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) { 674 flag_changed = TRUE; 675 j--; 676 } 677 lastch = s[i]; 678 } 679 } 680 else { 681 for (i=j=0; i<len; i++,j++) { 682 if (i>j) s[j] = s[i]; 683 if (s[i] >= 0 && s[i] == lastch) { 684 flag_changed = TRUE; 685 j--; 686 } 687 lastch = s[i]; 688 } 689 } 690 691 if (flag_changed) { 692 RSTR_SET_LEN(RSTRING(str), j); 693 RSTRING_PTR(str)[j] = 0; 694 } 695 return flag_changed; 696 } 697 698 /* 699 * call-seq: 700 * str.squeeze([other_str]) -> new_str 701 * 702 * Builds a set of characters from the other_str 703 * parameter(s) using the procedure described for String#count. Returns a 704 * new string where runs of the same character that occur in this set are 705 * replaced by a single character. If no arguments are given, all runs of 706 * identical characters are replaced by a single character. 707 * 708 * "yellow moon".squeeze #=> "yelow mon" 709 * " now is the".squeeze(" ") #=> " now is the" 710 * "putters shoot balls".squeeze("m-z") #=> "puters shot balls" 711 */ 712 static mrb_value 713 mrb_str_squeeze(mrb_state *mrb, mrb_value str) 714 { 715 mrb_value pat = mrb_nil_value(); 716 mrb_value dup; 717 718 mrb_get_args(mrb, "|S", &pat); 719 dup = mrb_str_dup(mrb, str); 720 str_squeeze(mrb, dup, pat); 721 return dup; 722 } 723 724 /* 725 * call-seq: 726 * str.squeeze!([other_str]) -> str or nil 727 * 728 * Squeezes str in place, returning either str, or nil if no 729 * changes were made. 730 */ 731 static mrb_value 732 mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) 733 { 734 mrb_value pat = mrb_nil_value(); 735 736 mrb_get_args(mrb, "|S", &pat); 737 if (str_squeeze(mrb, str, pat)) { 738 return str; 739 } 740 return mrb_nil_value(); 741 } 742 743 static mrb_bool 744 str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) 745 { 746 struct tr_pattern pat = STATIC_TR_PATTERN; 747 mrb_int i, j; 748 char *s; 749 mrb_int len; 750 mrb_bool flag_changed = FALSE; 751 uint8_t bitmap[32]; 752 753 mrb_str_modify(mrb, mrb_str_ptr(str)); 754 tr_parse_pattern(mrb, &pat, v_pat, TRUE); 755 tr_compile_pattern(&pat, v_pat, bitmap); 756 tr_free_pattern(mrb, &pat); 757 758 s = RSTRING_PTR(str); 759 len = RSTRING_LEN(str); 760 761 for (i=j=0; i<len; i++,j++) { 762 if (i>j) s[j] = s[i]; 763 if (tr_bitmap_detect(bitmap, s[i])) { 764 flag_changed = TRUE; 765 j--; 766 } 767 } 768 if (flag_changed) { 769 RSTR_SET_LEN(RSTRING(str), j); 770 RSTRING_PTR(str)[j] = 0; 771 } 772 return flag_changed; 773 } 774 775 static mrb_value 776 mrb_str_delete(mrb_state *mrb, mrb_value str) 777 { 778 mrb_value pat; 779 mrb_value dup; 780 781 mrb_get_args(mrb, "S", &pat); 782 dup = mrb_str_dup(mrb, str); 783 str_delete(mrb, dup, pat); 784 return dup; 785 } 786 787 static mrb_value 788 mrb_str_delete_bang(mrb_state *mrb, mrb_value str) 789 { 790 mrb_value pat; 791 792 mrb_get_args(mrb, "S", &pat); 793 if (str_delete(mrb, str, pat)) { 794 return str; 795 } 796 return mrb_nil_value(); 797 } 798 799 /* 800 * call_seq: 801 * str.count([other_str]) -> integer 802 * 803 * Each other_str parameter defines a set of characters to count. The 804 * intersection of these sets defines the characters to count in str. Any 805 * other_str that starts with a caret ^ is negated. The sequence c1-c2 806 * means all characters between c1 and c2. The backslash character \ can 807 * be used to escape ^ or - and is otherwise ignored unless it appears at 808 * the end of a sequence or the end of a other_str. 809 */ 810 static mrb_value 811 mrb_str_count(mrb_state *mrb, mrb_value str) 812 { 813 mrb_value v_pat = mrb_nil_value(); 814 mrb_int i; 815 char *s; 816 mrb_int len; 817 mrb_int count = 0; 818 struct tr_pattern pat = STATIC_TR_PATTERN; 819 uint8_t bitmap[32]; 820 821 mrb_get_args(mrb, "S", &v_pat); 822 tr_parse_pattern(mrb, &pat, v_pat, TRUE); 823 tr_compile_pattern(&pat, v_pat, bitmap); 824 tr_free_pattern(mrb, &pat); 825 826 s = RSTRING_PTR(str); 827 len = RSTRING_LEN(str); 828 for (i = 0; i < len; i++) { 829 if (tr_bitmap_detect(bitmap, s[i])) count++; 830 } 831 return mrb_fixnum_value(count); 832 } 833 235 834 static mrb_value 236 835 mrb_str_hex(mrb_state *mrb, mrb_value self) … … 260 859 } 261 860 262 static mrb_value 263 mrb_fixnum_chr(mrb_state *mrb, mrb_value num) 264 { 265 mrb_int cp = mrb_fixnum(num); 861 /* 862 * call-seq: 863 * int.chr([encoding]) -> string 864 * 865 * Returns a string containing the character represented by the +int+'s value 866 * according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only 867 * with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is 868 * +"ASCII-8BIT"+). 869 * 870 * 65.chr #=> "A" 871 * 230.chr #=> "\xE6" 872 * 230.chr("ASCII-8BIT") #=> "\xE6" 873 * 230.chr("UTF-8") #=> "\u00E6" 874 */ 875 static mrb_value 876 mrb_int_chr(mrb_state *mrb, mrb_value num) 877 { 878 mrb_value enc; 879 mrb_bool enc_given; 880 881 mrb_get_args(mrb, "|S?", &enc, &enc_given); 882 if (!enc_given || 883 ENC_COMP_P(enc, ENC_ASCII_8BIT) || 884 ENC_COMP_P(enc, ENC_BINARY)) { 885 return int_chr_binary(mrb, num); 886 } 266 887 #ifdef MRB_UTF8_STRING 267 char utf8[4]; 268 mrb_int len; 269 270 if (cp < 0 || 0x10FFFF < cp) { 271 mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); 272 } 273 if (cp < 0x80) { 274 utf8[0] = (char)cp; 275 len = 1; 276 } 277 else if (cp < 0x800) { 278 utf8[0] = (char)(0xC0 | (cp >> 6)); 279 utf8[1] = (char)(0x80 | (cp & 0x3F)); 280 len = 2; 281 } 282 else if (cp < 0x10000) { 283 utf8[0] = (char)(0xE0 | (cp >> 12)); 284 utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); 285 utf8[2] = (char)(0x80 | ( cp & 0x3F)); 286 len = 3; 287 } 888 else if (ENC_COMP_P(enc, ENC_UTF8)) { 889 return int_chr_utf8(mrb, num); 890 } 891 #endif 288 892 else { 289 utf8[0] = (char)(0xF0 | (cp >> 18)); 290 utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); 291 utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); 292 utf8[3] = (char)(0x80 | ( cp & 0x3F)); 293 len = 4; 294 } 295 return mrb_str_new(mrb, utf8, len); 296 #else 297 char c; 298 299 if (cp < 0 || 0xff < cp) { 300 mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); 301 } 302 c = (char)cp; 303 return mrb_str_new(mrb, &c, 1); 304 #endif 305 } 306 307 /* 308 * call-seq: 309 * string.lines -> array of string 310 * 311 * Returns strings per line; 312 * 313 * a = "abc\ndef" 314 * a.lines #=> ["abc\n", "def"] 315 */ 316 static mrb_value 317 mrb_str_lines(mrb_state *mrb, mrb_value self) 318 { 319 mrb_value result; 320 mrb_value blk; 321 int ai; 322 mrb_int len; 323 mrb_value arg; 324 char *b = RSTRING_PTR(self); 325 char *p = b, *t; 326 char *e = b + RSTRING_LEN(self); 327 328 mrb_get_args(mrb, "&", &blk); 329 330 result = mrb_ary_new(mrb); 331 ai = mrb_gc_arena_save(mrb); 332 if (!mrb_nil_p(blk)) { 333 while (p < e) { 334 t = p; 335 while (p < e && *p != '\n') p++; 336 if (*p == '\n') p++; 337 len = (mrb_int) (p - t); 338 arg = mrb_str_new(mrb, t, len); 339 mrb_yield_argv(mrb, blk, 1, &arg); 340 mrb_gc_arena_restore(mrb, ai); 341 if (b != RSTRING_PTR(self)) { 342 ptrdiff_t diff = p - b; 343 b = RSTRING_PTR(self); 344 p = b + diff; 345 } 346 e = b + RSTRING_LEN(self); 347 } 348 return self; 349 } 350 while (p < e) { 351 t = p; 352 while (p < e && *p != '\n') p++; 353 if (*p == '\n') p++; 354 len = (mrb_int) (p - t); 355 mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len)); 356 mrb_gc_arena_restore(mrb, ai); 357 } 358 return result; 893 mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc); 894 } 895 /* not reached */ 896 return mrb_nil_value(); 359 897 } 360 898 … … 522 1060 #endif 523 1061 524 static mrb_bool525 all_digits_p(const char *s, mrb_int len)526 {527 while (len-- > 0) {528 if (!ISDIGIT(*s)) return FALSE;529 s++;530 }531 return TRUE;532 }533 534 1062 /* 535 1063 * call-seq: 536 * str.upto(other_str, exclusive=false) {|s| block } -> str 537 * str.upto(other_str, exclusive=false) -> an_enumerator 538 * 539 * Iterates through successive values, starting at <i>str</i> and 540 * ending at <i>other_str</i> inclusive, passing each value in turn to 541 * the block. The <code>String#succ</code> method is used to generate 542 * each value. If optional second argument exclusive is omitted or is false, 543 * the last value will be included; otherwise it will be excluded. 544 * 545 * If no block is given, an enumerator is returned instead. 546 * 547 * "a8".upto("b6") {|s| print s, ' ' } 548 * for s in "a8".."b6" 549 * print s, ' ' 550 * end 551 * 552 * <em>produces:</em> 553 * 554 * a8 a9 b0 b1 b2 b3 b4 b5 b6 555 * a8 a9 b0 b1 b2 b3 b4 b5 b6 556 * 557 * If <i>str</i> and <i>other_str</i> contains only ascii numeric characters, 558 * both are recognized as decimal numbers. In addition, the width of 559 * string (e.g. leading zeros) is handled appropriately. 560 * 561 * "9".upto("11").to_a #=> ["9", "10", "11"] 562 * "25".upto("5").to_a #=> [] 563 * "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"] 564 */ 565 static mrb_value 566 mrb_str_upto(mrb_state *mrb, mrb_value beg) 567 { 568 mrb_value end; 569 mrb_value exclusive = mrb_false_value(); 570 mrb_value block = mrb_nil_value(); 571 mrb_value current, after_end; 572 mrb_int n; 573 mrb_bool excl; 574 575 mrb_get_args(mrb, "o|o&", &end, &exclusive, &block); 576 577 if (mrb_nil_p(block)) { 578 return mrb_funcall(mrb, beg, "to_enum", 3, mrb_symbol_value(mrb_intern_lit(mrb, "upto")), end, exclusive); 579 } 580 end = mrb_string_type(mrb, end); 581 excl = mrb_test(exclusive); 582 583 /* single character */ 584 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && 585 ISASCII(RSTRING_PTR(beg)[0]) && ISASCII(RSTRING_PTR(end)[0])) { 586 char c = RSTRING_PTR(beg)[0]; 587 char e = RSTRING_PTR(end)[0]; 588 int ai = mrb_gc_arena_save(mrb); 589 590 if (c > e || (excl && c == e)) return beg; 591 for (;;) { 592 mrb_yield(mrb, block, mrb_str_new(mrb, &c, 1)); 593 mrb_gc_arena_restore(mrb, ai); 594 if (!excl && c == e) break; 595 c++; 596 if (excl && c == e) break; 597 } 598 return beg; 599 } 600 /* both edges are all digits */ 601 if (ISDIGIT(RSTRING_PTR(beg)[0]) && ISDIGIT(RSTRING_PTR(end)[0]) && 602 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) && 603 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) { 604 int ai = mrb_gc_arena_save(mrb); 605 mrb_int min_width = RSTRING_LEN(beg); 606 mrb_int max_width = RSTRING_LEN(end); 607 mrb_int bi = mrb_int(mrb, mrb_str_to_inum(mrb, beg, 10, FALSE)); 608 mrb_int ei = mrb_int(mrb, mrb_str_to_inum(mrb, end, 10, FALSE)); 609 mrb_value str = mrb_str_new(mrb, NULL, max_width); 610 char *buf = RSTRING_PTR(str); 611 612 while (bi <= ei) { 613 if (excl && bi == ei) break; 614 snprintf(buf, max_width+1, "%.*" MRB_PRId, (int)min_width, bi); 615 mrb_yield(mrb, block, mrb_str_new(mrb, buf, strlen(buf))); 616 mrb_gc_arena_restore(mrb, ai); 617 bi++; 618 } 619 620 return beg; 621 } 622 /* normal case */ 623 n = mrb_int(mrb, mrb_funcall(mrb, beg, "<=>", 1, end)); 624 if (n > 0 || (excl && n == 0)) return beg; 625 626 after_end = mrb_funcall(mrb, end, "succ", 0); 627 current = mrb_str_dup(mrb, beg); 628 while (!mrb_str_equal(mrb, current, after_end)) { 629 int ai = mrb_gc_arena_save(mrb); 630 mrb_value next = mrb_nil_value(); 631 if (excl || !mrb_str_equal(mrb, current, end)) 632 next = mrb_funcall(mrb, current, "succ", 0); 633 mrb_yield(mrb, block, current); 634 if (mrb_nil_p(next)) break; 635 current = mrb_str_to_str(mrb, next); 636 if (excl && mrb_str_equal(mrb, current, end)) break; 637 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0) 638 break; 1064 * str.delete_prefix!(prefix) -> self or nil 1065 * 1066 * Deletes leading <code>prefix</code> from <i>str</i>, returning 1067 * <code>nil</code> if no change was made. 1068 * 1069 * "hello".delete_prefix!("hel") #=> "lo" 1070 * "hello".delete_prefix!("llo") #=> nil 1071 */ 1072 static mrb_value 1073 mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self) 1074 { 1075 mrb_int plen, slen; 1076 char *ptr, *s; 1077 struct RString *str = RSTRING(self); 1078 1079 mrb_get_args(mrb, "s", &ptr, &plen); 1080 slen = RSTR_LEN(str); 1081 if (plen > slen) return mrb_nil_value(); 1082 s = RSTR_PTR(str); 1083 if (memcmp(s, ptr, plen) != 0) return mrb_nil_value(); 1084 if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { 1085 str->as.heap.ptr += plen; 1086 } 1087 else { 1088 mrb_str_modify(mrb, str); 1089 s = RSTR_PTR(str); 1090 memmove(s, s+plen, slen-plen); 1091 } 1092 RSTR_SET_LEN(str, slen-plen); 1093 return self; 1094 } 1095 1096 /* 1097 * call-seq: 1098 * str.delete_prefix(prefix) -> new_str 1099 * 1100 * Returns a copy of <i>str</i> with leading <code>prefix</code> deleted. 1101 * 1102 * "hello".delete_prefix("hel") #=> "lo" 1103 * "hello".delete_prefix("llo") #=> "hello" 1104 */ 1105 static mrb_value 1106 mrb_str_del_prefix(mrb_state *mrb, mrb_value self) 1107 { 1108 mrb_int plen, slen; 1109 char *ptr; 1110 1111 mrb_get_args(mrb, "s", &ptr, &plen); 1112 slen = RSTRING_LEN(self); 1113 if (plen > slen) return mrb_str_dup(mrb, self); 1114 if (memcmp(RSTRING_PTR(self), ptr, plen) != 0) 1115 return mrb_str_dup(mrb, self); 1116 return mrb_str_substr(mrb, self, plen, slen-plen); 1117 } 1118 1119 /* 1120 * call-seq: 1121 * str.delete_suffix!(suffix) -> self or nil 1122 * 1123 * Deletes trailing <code>suffix</code> from <i>str</i>, returning 1124 * <code>nil</code> if no change was made. 1125 * 1126 * "hello".delete_suffix!("llo") #=> "he" 1127 * "hello".delete_suffix!("hel") #=> nil 1128 */ 1129 static mrb_value 1130 mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self) 1131 { 1132 mrb_int plen, slen; 1133 char *ptr, *s; 1134 struct RString *str = RSTRING(self); 1135 1136 mrb_get_args(mrb, "s", &ptr, &plen); 1137 slen = RSTR_LEN(str); 1138 if (plen > slen) return mrb_nil_value(); 1139 s = RSTR_PTR(str); 1140 if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value(); 1141 if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { 1142 /* no need to modify string */ 1143 } 1144 else { 1145 mrb_str_modify(mrb, str); 1146 } 1147 RSTR_SET_LEN(str, slen-plen); 1148 return self; 1149 } 1150 1151 /* 1152 * call-seq: 1153 * str.delete_suffix(suffix) -> new_str 1154 * 1155 * Returns a copy of <i>str</i> with leading <code>suffix</code> deleted. 1156 * 1157 * "hello".delete_suffix("hel") #=> "lo" 1158 * "hello".delete_suffix("llo") #=> "hello" 1159 */ 1160 static mrb_value 1161 mrb_str_del_suffix(mrb_state *mrb, mrb_value self) 1162 { 1163 mrb_int plen, slen; 1164 char *ptr; 1165 1166 mrb_get_args(mrb, "s", &ptr, &plen); 1167 slen = RSTRING_LEN(self); 1168 if (plen > slen) return mrb_str_dup(mrb, self); 1169 if (memcmp(RSTRING_PTR(self)+slen-plen, ptr, plen) != 0) 1170 return mrb_str_dup(mrb, self); 1171 return mrb_str_substr(mrb, self, 0, slen-plen); 1172 } 1173 1174 static mrb_value 1175 mrb_str_lines(mrb_state *mrb, mrb_value self) 1176 { 1177 mrb_value result; 1178 int ai; 1179 mrb_int len; 1180 char *b = RSTRING_PTR(self); 1181 char *p = b, *t; 1182 char *e = b + RSTRING_LEN(self); 1183 1184 result = mrb_ary_new(mrb); 1185 ai = mrb_gc_arena_save(mrb); 1186 while (p < e) { 1187 t = p; 1188 while (p < e && *p != '\n') p++; 1189 if (*p == '\n') p++; 1190 len = (mrb_int) (p - t); 1191 mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len)); 639 1192 mrb_gc_arena_restore(mrb, ai); 640 1193 } 641 642 return beg; 1194 return result; 643 1195 } 644 1196 … … 649 1201 650 1202 mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE()); 651 mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1));652 mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2));653 mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1));654 1203 mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE()); 655 1204 mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); 656 mrb_define_method(mrb, s, "concat", mrb_str_concat2, MRB_ARGS_REQ(1)); 657 mrb_define_method(mrb, s, "<<", mrb_str_concat2, MRB_ARGS_REQ(1)); 1205 mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1)); 1206 mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); 1207 mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1)); 1208 mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); 1209 mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); 1210 mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); 1211 mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); 1212 mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1)); 1213 mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1)); 1214 mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1)); 1215 mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1)); 658 1216 mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); 659 1217 mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); … … 661 1219 mrb_define_method(mrb, s, "oct", mrb_str_oct, MRB_ARGS_NONE()); 662 1220 mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE()); 663 mrb_define_method(mrb, s, "lines", mrb_str_lines, MRB_ARGS_NONE());664 1221 mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE()); 665 1222 mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE()); 666 mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next"), mrb_intern_lit(mrb, "succ")); 667 mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next!"), mrb_intern_lit(mrb, "succ!")); 668 mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); 669 mrb_define_method(mrb, s, "upto", mrb_str_upto, MRB_ARGS_ANY()); 670 671 mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE()); 1223 mrb_define_method(mrb, s, "next", mrb_str_succ, MRB_ARGS_NONE()); 1224 mrb_define_method(mrb, s, "next!", mrb_str_succ_bang, MRB_ARGS_NONE()); 1225 mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); 1226 mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1)); 1227 mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1)); 1228 mrb_define_method(mrb, s, "delete_suffix!", mrb_str_del_suffix_bang, MRB_ARGS_REQ(1)); 1229 mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1)); 1230 1231 mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE()); 1232 1233 mrb_define_method(mrb, mrb_module_get(mrb, "Integral"), "chr", mrb_int_chr, MRB_ARGS_OPT(1)); 672 1234 } 673 1235
Note:
See TracChangeset
for help on using the changeset viewer.