Changeset 439 for EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext
- Timestamp:
- Jul 9, 2020, 8:51:43 AM (4 years ago)
- Location:
- EcnlProtoTool/trunk/mruby-2.1.1
- Files:
-
- 2 added
- 4 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext/mrbgem.rake
r331 r439 3 3 spec.author = 'mruby developers' 4 4 spec.summary = 'String class extension' 5 spec.add_test_dependency 'mruby-enumerator', core: 'mruby-enumerator'6 5 end -
EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext/mrblib/string.rb
r331 r439 1 1 class String 2 3 ##4 # call-seq:5 # String.try_convert(obj) -> string or nil6 #7 # Try to convert <i>obj</i> into a String, using to_str method.8 # Returns converted string or nil if <i>obj</i> cannot be converted9 # for any reason.10 #11 # String.try_convert("str") #=> "str"12 # String.try_convert(/re/) #=> nil13 #14 def self.try_convert(obj)15 if obj.respond_to?(:to_str)16 obj.to_str17 else18 nil19 end20 end21 2 22 3 ## … … 96 77 # 97 78 def lstrip! 98 raise RuntimeError, "can't modify frozen String" if frozen?79 raise FrozenError, "can't modify frozen String" if frozen? 99 80 s = self.lstrip 100 81 (s == self) ? nil : self.replace(s) … … 113 94 # 114 95 def rstrip! 115 raise RuntimeError, "can't modify frozen String" if frozen?96 raise FrozenError, "can't modify frozen String" if frozen? 116 97 s = self.rstrip 117 98 (s == self) ? nil : self.replace(s) … … 126 107 # 127 108 def strip! 128 raise RuntimeError, "can't modify frozen String" if frozen?109 raise FrozenError, "can't modify frozen String" if frozen? 129 110 s = self.strip 130 111 (s == self) ? nil : self.replace(s) … … 143 124 # 144 125 def casecmp(str) 145 self.downcase <=> str. to_str.downcase126 self.downcase <=> str.__to_str.downcase 146 127 rescue NoMethodError 147 raise TypeError, "no implicit conversion of #{str.class} into String" 128 nil 129 end 130 131 ## 132 # call-seq: 133 # str.casecmp?(other) -> true, false, or nil 134 # 135 # Returns true if str and other_str are equal after case folding, 136 # false if they are not equal, and nil if other_str is not a string. 137 138 def casecmp?(str) 139 c = self.casecmp(str) 140 return nil if c.nil? 141 return c == 0 148 142 end 149 143 … … 187 181 # 188 182 def slice!(arg1, arg2=nil) 189 raise RuntimeError, "can't modify frozen String" if frozen?183 raise FrozenError, "can't modify frozen String" if frozen? 190 184 raise "wrong number of arguments (for 1..2)" if arg1.nil? && arg2.nil? 191 185 … … 317 311 end 318 312 313 ## 314 # Call the given block for each character of 315 # +self+. 319 316 def each_char(&block) 320 317 return to_enum :each_char unless block 321 322 split('').each do |i| 323 block.call(i) 318 pos = 0 319 while pos < self.size 320 block.call(self[pos]) 321 pos += 1 324 322 end 325 323 self … … 353 351 self 354 352 end 353 354 ## 355 # call-seq: 356 # string.lines -> array of string 357 # string.lines {|s| block} -> array of string 358 # 359 # Returns strings per line; 360 # 361 # a = "abc\ndef" 362 # a.lines #=> ["abc\n", "def"] 363 # 364 # If a block is given, it works the same as <code>each_line</code>. 365 def lines(&blk) 366 lines = self.__lines 367 if blk 368 lines.each do |line| 369 blk.call(line) 370 end 371 end 372 lines 373 end 374 375 ## 376 # call-seq: 377 # str.upto(other_str, exclusive=false) {|s| block } -> str 378 # str.upto(other_str, exclusive=false) -> an_enumerator 379 # 380 # Iterates through successive values, starting at <i>str</i> and 381 # ending at <i>other_str</i> inclusive, passing each value in turn to 382 # the block. The <code>String#succ</code> method is used to generate 383 # each value. If optional second argument exclusive is omitted or is false, 384 # the last value will be included; otherwise it will be excluded. 385 # 386 # If no block is given, an enumerator is returned instead. 387 # 388 # "a8".upto("b6") {|s| print s, ' ' } 389 # for s in "a8".."b6" 390 # print s, ' ' 391 # end 392 # 393 # <em>produces:</em> 394 # 395 # a8 a9 b0 b1 b2 b3 b4 b5 b6 396 # a8 a9 b0 b1 b2 b3 b4 b5 b6 397 # 398 # If <i>str</i> and <i>other_str</i> contains only ascii numeric characters, 399 # both are recognized as decimal numbers. In addition, the width of 400 # string (e.g. leading zeros) is handled appropriately. 401 # 402 # "9".upto("11").to_a #=> ["9", "10", "11"] 403 # "25".upto("5").to_a #=> [] 404 # "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"] 405 def upto(max, exclusive=false, &block) 406 return to_enum(:upto, max, exclusive) unless block 407 raise TypeError, "no implicit conversion of #{max.class} into String" unless max.kind_of? String 408 409 len = self.length 410 maxlen = max.length 411 # single character 412 if len == 1 and maxlen == 1 413 c = self.ord 414 e = max.ord 415 while c <= e 416 break if exclusive and c == e 417 yield c.chr(__ENCODING__) 418 c += 1 419 end 420 return self 421 end 422 # both edges are all digits 423 bi = self.to_i(10) 424 ei = max.to_i(10) 425 len = self.length 426 if (bi > 0 or bi == "0"*len) and (ei > 0 or ei == "0"*maxlen) 427 while bi <= ei 428 break if exclusive and bi == ei 429 s = bi.to_s 430 s = s.rjust(len, "0") if s.length < len 431 yield s 432 bi += 1 433 end 434 return self 435 end 436 bs = self 437 while true 438 n = (bs <=> max) 439 break if n > 0 440 break if exclusive and n == 0 441 yield bs 442 break if n == 0 443 bs = bs.succ 444 end 445 self 446 end 355 447 end -
EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext/src/string.c
r331 r439 6 6 #include <mruby/range.h> 7 7 8 static mrb_value 9 mrb_str_getbyte(mrb_state *mrb, mrb_value str) 10 { 11 mrb_int pos; 12 mrb_get_args(mrb, "i", &pos); 13 14 if (pos < 0) 15 pos += RSTRING_LEN(str); 16 if (pos < 0 || RSTRING_LEN(str) <= pos) 17 return mrb_nil_value(); 18 19 return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]); 20 } 21 22 static mrb_value 23 mrb_str_setbyte(mrb_state *mrb, mrb_value str) 24 { 25 mrb_int pos, byte; 26 long len; 27 28 mrb_get_args(mrb, "ii", &pos, &byte); 29 30 len = RSTRING_LEN(str); 31 if (pos < -len || len <= pos) 32 mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos)); 33 if (pos < 0) 34 pos += len; 35 36 mrb_str_modify(mrb, mrb_str_ptr(str)); 37 byte &= 0xff; 38 RSTRING_PTR(str)[pos] = byte; 39 return mrb_fixnum_value((unsigned char)byte); 40 } 41 42 static mrb_value 43 mrb_str_byteslice(mrb_state *mrb, mrb_value str) 44 { 45 mrb_value a1; 8 #define ENC_ASCII_8BIT "ASCII-8BIT" 9 #define ENC_BINARY "BINARY" 10 #define ENC_UTF8 "UTF-8" 11 12 #define ENC_COMP_P(enc, enc_lit) \ 13 str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1) 14 15 #ifdef MRB_WITHOUT_FLOAT 16 # define mrb_float_p(o) FALSE 17 #endif 18 19 static mrb_bool 20 str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2) 21 { 22 const char *e1, *e2; 23 24 if (len1 != len2) return FALSE; 25 e1 = s1 + len1; 26 e2 = s2 + len2; 27 while (s1 < e1 && s2 < e2) { 28 if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE; 29 ++s1; 30 ++s2; 31 } 32 return TRUE; 33 } 34 35 static mrb_value 36 int_chr_binary(mrb_state *mrb, mrb_value num) 37 { 38 mrb_int cp = mrb_int(mrb, num); 39 char c; 40 mrb_value str; 41 42 if (cp < 0 || 0xff < cp) { 43 mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); 44 } 45 c = (char)cp; 46 str = mrb_str_new(mrb, &c, 1); 47 RSTR_SET_ASCII_FLAG(mrb_str_ptr(str)); 48 return str; 49 } 50 51 #ifdef MRB_UTF8_STRING 52 static mrb_value 53 int_chr_utf8(mrb_state *mrb, mrb_value num) 54 { 55 mrb_int cp = mrb_int(mrb, num); 56 char utf8[4]; 46 57 mrb_int len; 47 int argc;48 49 argc = mrb_get_args(mrb, "o|i", &a1, &len); 50 if ( argc == 2) {51 return mrb_str_substr(mrb, str, mrb_fixnum(a1), len);52 } 53 switch (mrb_type(a1)) {54 case MRB_TT_RANGE:55 {56 mrb_int beg;57 58 len = RSTRING_LEN(str);59 switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) {60 case 0: /* not range */61 break;62 case 1: /* range */63 return mrb_str_substr(mrb, str, beg, len);64 case 2: /* out of range */65 mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1);66 break;67 }68 return mrb_nil_value();69 }70 case MRB_TT_FLOAT:71 a1 = mrb_fixnum_value((mrb_int)mrb_float(a1));72 /* fall through */73 case MRB_TT_FIXNUM:74 return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1);75 default:76 mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument");77 }78 /* not reached */79 return mrb_nil_value(); 80 } 58 mrb_value str; 59 uint32_t ascii_flag = 0; 60 61 if (cp < 0 || 0x10FFFF < cp) { 62 mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num); 63 } 64 if (cp < 0x80) { 65 utf8[0] = (char)cp; 66 len = 1; 67 ascii_flag = MRB_STR_ASCII; 68 } 69 else if (cp < 0x800) { 70 utf8[0] = (char)(0xC0 | (cp >> 6)); 71 utf8[1] = (char)(0x80 | (cp & 0x3F)); 72 len = 2; 73 } 74 else if (cp < 0x10000) { 75 utf8[0] = (char)(0xE0 | (cp >> 12)); 76 utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); 77 utf8[2] = (char)(0x80 | ( cp & 0x3F)); 78 len = 3; 79 } 80 else { 81 utf8[0] = (char)(0xF0 | (cp >> 18)); 82 utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); 83 utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); 84 utf8[3] = (char)(0x80 | ( cp & 0x3F)); 85 len = 4; 86 } 87 str = mrb_str_new(mrb, utf8, len); 88 mrb_str_ptr(str)->flags |= ascii_flag; 89 return str; 90 } 91 #endif 81 92 82 93 /* … … 135 146 } 136 147 137 static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num);138 139 148 /* 140 149 * call-seq: … … 146 155 * Append---Concatenates the given object to <i>str</i>. If the object is a 147 156 * <code>Integer</code>, it is considered as a codepoint, and is converted 148 * to a character before concatenation. 157 * to a character before concatenation 158 * (equivalent to <code>str.concat(integer.chr(__ENCODING__))</code>). 149 159 * 150 160 * a = "hello " … … 153 163 */ 154 164 static mrb_value 155 mrb_str_concat 2(mrb_state *mrb, mrb_value self)165 mrb_str_concat_m(mrb_state *mrb, mrb_value self) 156 166 { 157 167 mrb_value str; 158 168 159 169 mrb_get_args(mrb, "o", &str); 160 if (mrb_fixnum_p(str)) 161 str = mrb_fixnum_chr(mrb, str); 170 if (mrb_fixnum_p(str) || mrb_float_p(str)) 171 #ifdef MRB_UTF8_STRING 172 str = int_chr_utf8(mrb, str); 173 #else 174 str = int_chr_binary(mrb, str); 175 #endif 162 176 else 163 str = mrb_string_type(mrb, str);164 mrb_str_c oncat(mrb, self, str);177 mrb_ensure_string_type(mrb, str); 178 mrb_str_cat_str(mrb, self, str); 165 179 return self; 166 180 } … … 189 203 size_t len_l, len_r; 190 204 int ai = mrb_gc_arena_save(mrb); 191 sub = mrb_ string_type(mrb, argv[i]);205 sub = mrb_ensure_string_type(mrb, argv[i]); 192 206 mrb_gc_arena_restore(mrb, ai); 193 207 len_l = RSTRING_LEN(self); … … 218 232 size_t len_l, len_r; 219 233 int ai = mrb_gc_arena_save(mrb); 220 sub = mrb_ string_type(mrb, argv[i]);234 sub = mrb_ensure_string_type(mrb, argv[i]); 221 235 mrb_gc_arena_restore(mrb, ai); 222 236 len_l = RSTRING_LEN(self); … … 233 247 } 234 248 249 enum tr_pattern_type { 250 TR_UNINITIALIZED = 0, 251 TR_IN_ORDER = 1, 252 TR_RANGE = 2, 253 }; 254 255 /* 256 #tr Pattern syntax 257 258 <syntax> ::= (<pattern>)* | '^' (<pattern>)* 259 <pattern> ::= <in order> | <range> 260 <in order> ::= (<ch>)+ 261 <range> ::= <ch> '-' <ch> 262 */ 263 struct tr_pattern { 264 uint8_t type; // 1:in-order, 2:range 265 mrb_bool flag_reverse : 1; 266 mrb_bool flag_on_heap : 1; 267 uint16_t n; 268 union { 269 uint16_t start_pos; 270 char ch[2]; 271 } val; 272 struct tr_pattern *next; 273 }; 274 275 #define STATIC_TR_PATTERN { 0 } 276 277 static inline void 278 tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat) 279 { 280 while (pat) { 281 struct tr_pattern *p = pat->next; 282 if (pat->flag_on_heap) { 283 mrb_free(mrb, pat); 284 } 285 pat = p; 286 } 287 } 288 289 static struct tr_pattern* 290 tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable) 291 { 292 const char *pattern = RSTRING_PTR(v_pattern); 293 mrb_int pattern_length = RSTRING_LEN(v_pattern); 294 mrb_bool flag_reverse = FALSE; 295 struct tr_pattern *pat1; 296 mrb_int i = 0; 297 298 if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') { 299 flag_reverse = TRUE; 300 i++; 301 } 302 303 while (i < pattern_length) { 304 /* is range pattern ? */ 305 mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED); 306 pat1 = ret_uninit 307 ? ret 308 : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern)); 309 if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') { 310 if (pat1 == NULL && ret) { 311 nomem: 312 tr_free_pattern(mrb, ret); 313 mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err)); 314 return NULL; /* not reached */ 315 } 316 pat1->type = TR_RANGE; 317 pat1->flag_reverse = flag_reverse; 318 pat1->flag_on_heap = !ret_uninit; 319 pat1->n = pattern[i+2] - pattern[i] + 1; 320 pat1->next = NULL; 321 pat1->val.ch[0] = pattern[i]; 322 pat1->val.ch[1] = pattern[i+2]; 323 i += 3; 324 } 325 else { 326 /* in order pattern. */ 327 mrb_int start_pos = i++; 328 mrb_int len; 329 330 while (i < pattern_length) { 331 if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') 332 break; 333 i++; 334 } 335 336 len = i - start_pos; 337 if (len > UINT16_MAX) { 338 mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65536)"); 339 } 340 if (pat1 == NULL && ret) { 341 goto nomem; 342 } 343 pat1->type = TR_IN_ORDER; 344 pat1->flag_reverse = flag_reverse; 345 pat1->flag_on_heap = !ret_uninit; 346 pat1->n = len; 347 pat1->next = NULL; 348 pat1->val.start_pos = start_pos; 349 } 350 351 if (ret == NULL || ret_uninit) { 352 ret = pat1; 353 } 354 else { 355 struct tr_pattern *p = ret; 356 while (p->next != NULL) { 357 p = p->next; 358 } 359 p->next = pat1; 360 } 361 } 362 363 return ret; 364 } 365 366 static inline mrb_int 367 tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch) 368 { 369 mrb_int ret = -1; 370 mrb_int n_sum = 0; 371 mrb_int flag_reverse = pat ? pat->flag_reverse : 0; 372 373 while (pat != NULL) { 374 if (pat->type == TR_IN_ORDER) { 375 int i; 376 for (i = 0; i < pat->n; i++) { 377 if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i; 378 } 379 } 380 else if (pat->type == TR_RANGE) { 381 if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1]) 382 ret = n_sum + ch - pat->val.ch[0]; 383 } 384 else { 385 mrb_assert(pat->type == TR_UNINITIALIZED); 386 } 387 n_sum += pat->n; 388 pat = pat->next; 389 } 390 391 if (flag_reverse) { 392 return (ret < 0) ? MRB_INT_MAX : -1; 393 } 394 return ret; 395 } 396 397 static inline mrb_int 398 tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th) 399 { 400 mrb_int n_sum = 0; 401 402 while (pat != NULL) { 403 if (n_th < (n_sum + pat->n)) { 404 mrb_int i = (n_th - n_sum); 405 406 switch (pat->type) { 407 case TR_IN_ORDER: 408 return pat_str[pat->val.start_pos + i]; 409 case TR_RANGE: 410 return pat->val.ch[0]+i; 411 case TR_UNINITIALIZED: 412 return -1; 413 } 414 } 415 if (pat->next == NULL) { 416 switch (pat->type) { 417 case TR_IN_ORDER: 418 return pat_str[pat->val.start_pos + pat->n - 1]; 419 case TR_RANGE: 420 return pat->val.ch[1]; 421 case TR_UNINITIALIZED: 422 return -1; 423 } 424 } 425 n_sum += pat->n; 426 pat = pat->next; 427 } 428 429 return -1; 430 } 431 432 static inline void 433 tr_bitmap_set(uint8_t bitmap[32], uint8_t ch) 434 { 435 uint8_t idx1 = ch / 8; 436 uint8_t idx2 = ch % 8; 437 bitmap[idx1] |= (1<<idx2); 438 } 439 440 static inline mrb_bool 441 tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch) 442 { 443 uint8_t idx1 = ch / 8; 444 uint8_t idx2 = ch % 8; 445 if (bitmap[idx1] & (1<<idx2)) 446 return TRUE; 447 return FALSE; 448 } 449 450 /* compile patter to bitmap */ 451 static void 452 tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32]) 453 { 454 const char *pattern = RSTRING_PTR(pstr); 455 mrb_int flag_reverse = pat ? pat->flag_reverse : 0; 456 int i; 457 458 for (i=0; i<32; i++) { 459 bitmap[i] = 0; 460 } 461 while (pat != NULL) { 462 if (pat->type == TR_IN_ORDER) { 463 for (i = 0; i < pat->n; i++) { 464 tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]); 465 } 466 } 467 else if (pat->type == TR_RANGE) { 468 for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) { 469 tr_bitmap_set(bitmap, i); 470 } 471 } 472 else { 473 mrb_assert(pat->type == TR_UNINITIALIZED); 474 } 475 pat = pat->next; 476 } 477 478 if (flag_reverse) { 479 for (i=0; i<32; i++) { 480 bitmap[i] ^= 0xff; 481 } 482 } 483 } 484 485 static mrb_bool 486 str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze) 487 { 488 struct tr_pattern pat = STATIC_TR_PATTERN; 489 struct tr_pattern rep_storage = STATIC_TR_PATTERN; 490 char *s; 491 mrb_int len; 492 mrb_int i; 493 mrb_int j; 494 mrb_bool flag_changed = FALSE; 495 mrb_int lastch = -1; 496 struct tr_pattern *rep; 497 498 mrb_str_modify(mrb, mrb_str_ptr(str)); 499 tr_parse_pattern(mrb, &pat, p1, TRUE); 500 rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE); 501 s = RSTRING_PTR(str); 502 len = RSTRING_LEN(str); 503 504 for (i=j=0; i<len; i++,j++) { 505 mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]); 506 507 if (i>j) s[j] = s[i]; 508 if (n >= 0) { 509 flag_changed = TRUE; 510 if (rep == NULL) { 511 j--; 512 } 513 else { 514 mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n); 515 516 if (c < 0 || (squeeze && c == lastch)) { 517 j--; 518 continue; 519 } 520 if (c > 0x80) { 521 mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%i) out of range", c); 522 } 523 lastch = c; 524 s[i] = (char)c; 525 } 526 } 527 } 528 529 tr_free_pattern(mrb, &pat); 530 tr_free_pattern(mrb, rep); 531 532 if (flag_changed) { 533 RSTR_SET_LEN(RSTRING(str), j); 534 RSTRING_PTR(str)[j] = 0; 535 } 536 return flag_changed; 537 } 538 539 /* 540 * call-seq: 541 * str.tr(from_str, to_str) => new_str 542 * 543 * Returns a copy of str with the characters in from_str replaced by the 544 * corresponding characters in to_str. If to_str is shorter than from_str, 545 * it is padded with its last character in order to maintain the 546 * correspondence. 547 * 548 * "hello".tr('el', 'ip') #=> "hippo" 549 * "hello".tr('aeiou', '*') #=> "h*ll*" 550 * "hello".tr('aeiou', 'AA*') #=> "hAll*" 551 * 552 * Both strings may use the c1-c2 notation to denote ranges of characters, 553 * and from_str may start with a ^, which denotes all characters except 554 * those listed. 555 * 556 * "hello".tr('a-y', 'b-z') #=> "ifmmp" 557 * "hello".tr('^aeiou', '*') #=> "*e**o" 558 * 559 * The backslash character \ can be used to escape ^ or - and is otherwise 560 * ignored unless it appears at the end of a range or the end of the 561 * from_str or to_str: 562 * 563 * 564 * "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld" 565 * "hello-world".tr("a\\-eo", "*") #=> "h*ll**w*rld" 566 * 567 * "hello\r\nworld".tr("\r", "") #=> "hello\nworld" 568 * "hello\r\nworld".tr("\\r", "") #=> "hello\r\nwold" 569 * "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld" 570 * 571 * "X['\\b']".tr("X\\", "") #=> "['b']" 572 * "X['\\b']".tr("X-\\]", "") #=> "'b'" 573 * 574 * Note: conversion is effective only in ASCII region. 575 */ 576 static mrb_value 577 mrb_str_tr(mrb_state *mrb, mrb_value str) 578 { 579 mrb_value dup; 580 mrb_value p1, p2; 581 582 mrb_get_args(mrb, "SS", &p1, &p2); 583 dup = mrb_str_dup(mrb, str); 584 str_tr(mrb, dup, p1, p2, FALSE); 585 return dup; 586 } 587 588 /* 589 * call-seq: 590 * str.tr!(from_str, to_str) -> str or nil 591 * 592 * Translates str in place, using the same rules as String#tr. 593 * Returns str, or nil if no changes were made. 594 */ 595 static mrb_value 596 mrb_str_tr_bang(mrb_state *mrb, mrb_value str) 597 { 598 mrb_value p1, p2; 599 600 mrb_get_args(mrb, "SS", &p1, &p2); 601 if (str_tr(mrb, str, p1, p2, FALSE)) { 602 return str; 603 } 604 return mrb_nil_value(); 605 } 606 607 /* 608 * call-seq: 609 * str.tr_s(from_str, to_str) -> new_str 610 * 611 * Processes a copy of str as described under String#tr, then removes 612 * duplicate characters in regions that were affected by the translation. 613 * 614 * "hello".tr_s('l', 'r') #=> "hero" 615 * "hello".tr_s('el', '*') #=> "h*o" 616 * "hello".tr_s('el', 'hx') #=> "hhxo" 617 */ 618 static mrb_value 619 mrb_str_tr_s(mrb_state *mrb, mrb_value str) 620 { 621 mrb_value dup; 622 mrb_value p1, p2; 623 624 mrb_get_args(mrb, "SS", &p1, &p2); 625 dup = mrb_str_dup(mrb, str); 626 str_tr(mrb, dup, p1, p2, TRUE); 627 return dup; 628 } 629 630 /* 631 * call-seq: 632 * str.tr_s!(from_str, to_str) -> str or nil 633 * 634 * Performs String#tr_s processing on str in place, returning 635 * str, or nil if no changes were made. 636 */ 637 static mrb_value 638 mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str) 639 { 640 mrb_value p1, p2; 641 642 mrb_get_args(mrb, "SS", &p1, &p2); 643 if (str_tr(mrb, str, p1, p2, TRUE)) { 644 return str; 645 } 646 return mrb_nil_value(); 647 } 648 649 static mrb_bool 650 str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat) 651 { 652 struct tr_pattern pat_storage = STATIC_TR_PATTERN; 653 struct tr_pattern *pat = NULL; 654 mrb_int i, j; 655 char *s; 656 mrb_int len; 657 mrb_bool flag_changed = FALSE; 658 mrb_int lastch = -1; 659 uint8_t bitmap[32]; 660 661 mrb_str_modify(mrb, mrb_str_ptr(str)); 662 if (!mrb_nil_p(v_pat)) { 663 pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE); 664 tr_compile_pattern(pat, v_pat, bitmap); 665 tr_free_pattern(mrb, pat); 666 } 667 s = RSTRING_PTR(str); 668 len = RSTRING_LEN(str); 669 670 if (pat) { 671 for (i=j=0; i<len; i++,j++) { 672 if (i>j) s[j] = s[i]; 673 if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) { 674 flag_changed = TRUE; 675 j--; 676 } 677 lastch = s[i]; 678 } 679 } 680 else { 681 for (i=j=0; i<len; i++,j++) { 682 if (i>j) s[j] = s[i]; 683 if (s[i] >= 0 && s[i] == lastch) { 684 flag_changed = TRUE; 685 j--; 686 } 687 lastch = s[i]; 688 } 689 } 690 691 if (flag_changed) { 692 RSTR_SET_LEN(RSTRING(str), j); 693 RSTRING_PTR(str)[j] = 0; 694 } 695 return flag_changed; 696 } 697 698 /* 699 * call-seq: 700 * str.squeeze([other_str]) -> new_str 701 * 702 * Builds a set of characters from the other_str 703 * parameter(s) using the procedure described for String#count. Returns a 704 * new string where runs of the same character that occur in this set are 705 * replaced by a single character. If no arguments are given, all runs of 706 * identical characters are replaced by a single character. 707 * 708 * "yellow moon".squeeze #=> "yelow mon" 709 * " now is the".squeeze(" ") #=> " now is the" 710 * "putters shoot balls".squeeze("m-z") #=> "puters shot balls" 711 */ 712 static mrb_value 713 mrb_str_squeeze(mrb_state *mrb, mrb_value str) 714 { 715 mrb_value pat = mrb_nil_value(); 716 mrb_value dup; 717 718 mrb_get_args(mrb, "|S", &pat); 719 dup = mrb_str_dup(mrb, str); 720 str_squeeze(mrb, dup, pat); 721 return dup; 722 } 723 724 /* 725 * call-seq: 726 * str.squeeze!([other_str]) -> str or nil 727 * 728 * Squeezes str in place, returning either str, or nil if no 729 * changes were made. 730 */ 731 static mrb_value 732 mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str) 733 { 734 mrb_value pat = mrb_nil_value(); 735 736 mrb_get_args(mrb, "|S", &pat); 737 if (str_squeeze(mrb, str, pat)) { 738 return str; 739 } 740 return mrb_nil_value(); 741 } 742 743 static mrb_bool 744 str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat) 745 { 746 struct tr_pattern pat = STATIC_TR_PATTERN; 747 mrb_int i, j; 748 char *s; 749 mrb_int len; 750 mrb_bool flag_changed = FALSE; 751 uint8_t bitmap[32]; 752 753 mrb_str_modify(mrb, mrb_str_ptr(str)); 754 tr_parse_pattern(mrb, &pat, v_pat, TRUE); 755 tr_compile_pattern(&pat, v_pat, bitmap); 756 tr_free_pattern(mrb, &pat); 757 758 s = RSTRING_PTR(str); 759 len = RSTRING_LEN(str); 760 761 for (i=j=0; i<len; i++,j++) { 762 if (i>j) s[j] = s[i]; 763 if (tr_bitmap_detect(bitmap, s[i])) { 764 flag_changed = TRUE; 765 j--; 766 } 767 } 768 if (flag_changed) { 769 RSTR_SET_LEN(RSTRING(str), j); 770 RSTRING_PTR(str)[j] = 0; 771 } 772 return flag_changed; 773 } 774 775 static mrb_value 776 mrb_str_delete(mrb_state *mrb, mrb_value str) 777 { 778 mrb_value pat; 779 mrb_value dup; 780 781 mrb_get_args(mrb, "S", &pat); 782 dup = mrb_str_dup(mrb, str); 783 str_delete(mrb, dup, pat); 784 return dup; 785 } 786 787 static mrb_value 788 mrb_str_delete_bang(mrb_state *mrb, mrb_value str) 789 { 790 mrb_value pat; 791 792 mrb_get_args(mrb, "S", &pat); 793 if (str_delete(mrb, str, pat)) { 794 return str; 795 } 796 return mrb_nil_value(); 797 } 798 799 /* 800 * call_seq: 801 * str.count([other_str]) -> integer 802 * 803 * Each other_str parameter defines a set of characters to count. The 804 * intersection of these sets defines the characters to count in str. Any 805 * other_str that starts with a caret ^ is negated. The sequence c1-c2 806 * means all characters between c1 and c2. The backslash character \ can 807 * be used to escape ^ or - and is otherwise ignored unless it appears at 808 * the end of a sequence or the end of a other_str. 809 */ 810 static mrb_value 811 mrb_str_count(mrb_state *mrb, mrb_value str) 812 { 813 mrb_value v_pat = mrb_nil_value(); 814 mrb_int i; 815 char *s; 816 mrb_int len; 817 mrb_int count = 0; 818 struct tr_pattern pat = STATIC_TR_PATTERN; 819 uint8_t bitmap[32]; 820 821 mrb_get_args(mrb, "S", &v_pat); 822 tr_parse_pattern(mrb, &pat, v_pat, TRUE); 823 tr_compile_pattern(&pat, v_pat, bitmap); 824 tr_free_pattern(mrb, &pat); 825 826 s = RSTRING_PTR(str); 827 len = RSTRING_LEN(str); 828 for (i = 0; i < len; i++) { 829 if (tr_bitmap_detect(bitmap, s[i])) count++; 830 } 831 return mrb_fixnum_value(count); 832 } 833 235 834 static mrb_value 236 835 mrb_str_hex(mrb_state *mrb, mrb_value self) … … 260 859 } 261 860 262 static mrb_value 263 mrb_fixnum_chr(mrb_state *mrb, mrb_value num) 264 { 265 mrb_int cp = mrb_fixnum(num); 861 /* 862 * call-seq: 863 * int.chr([encoding]) -> string 864 * 865 * Returns a string containing the character represented by the +int+'s value 866 * according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only 867 * with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is 868 * +"ASCII-8BIT"+). 869 * 870 * 65.chr #=> "A" 871 * 230.chr #=> "\xE6" 872 * 230.chr("ASCII-8BIT") #=> "\xE6" 873 * 230.chr("UTF-8") #=> "\u00E6" 874 */ 875 static mrb_value 876 mrb_int_chr(mrb_state *mrb, mrb_value num) 877 { 878 mrb_value enc; 879 mrb_bool enc_given; 880 881 mrb_get_args(mrb, "|S?", &enc, &enc_given); 882 if (!enc_given || 883 ENC_COMP_P(enc, ENC_ASCII_8BIT) || 884 ENC_COMP_P(enc, ENC_BINARY)) { 885 return int_chr_binary(mrb, num); 886 } 266 887 #ifdef MRB_UTF8_STRING 267 char utf8[4]; 268 mrb_int len; 269 270 if (cp < 0 || 0x10FFFF < cp) { 271 mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); 272 } 273 if (cp < 0x80) { 274 utf8[0] = (char)cp; 275 len = 1; 276 } 277 else if (cp < 0x800) { 278 utf8[0] = (char)(0xC0 | (cp >> 6)); 279 utf8[1] = (char)(0x80 | (cp & 0x3F)); 280 len = 2; 281 } 282 else if (cp < 0x10000) { 283 utf8[0] = (char)(0xE0 | (cp >> 12)); 284 utf8[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); 285 utf8[2] = (char)(0x80 | ( cp & 0x3F)); 286 len = 3; 287 } 888 else if (ENC_COMP_P(enc, ENC_UTF8)) { 889 return int_chr_utf8(mrb, num); 890 } 891 #endif 288 892 else { 289 utf8[0] = (char)(0xF0 | (cp >> 18)); 290 utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); 291 utf8[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); 292 utf8[3] = (char)(0x80 | ( cp & 0x3F)); 293 len = 4; 294 } 295 return mrb_str_new(mrb, utf8, len); 296 #else 297 char c; 298 299 if (cp < 0 || 0xff < cp) { 300 mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num); 301 } 302 c = (char)cp; 303 return mrb_str_new(mrb, &c, 1); 304 #endif 305 } 306 307 /* 308 * call-seq: 309 * string.lines -> array of string 310 * 311 * Returns strings per line; 312 * 313 * a = "abc\ndef" 314 * a.lines #=> ["abc\n", "def"] 315 */ 316 static mrb_value 317 mrb_str_lines(mrb_state *mrb, mrb_value self) 318 { 319 mrb_value result; 320 mrb_value blk; 321 int ai; 322 mrb_int len; 323 mrb_value arg; 324 char *b = RSTRING_PTR(self); 325 char *p = b, *t; 326 char *e = b + RSTRING_LEN(self); 327 328 mrb_get_args(mrb, "&", &blk); 329 330 result = mrb_ary_new(mrb); 331 ai = mrb_gc_arena_save(mrb); 332 if (!mrb_nil_p(blk)) { 333 while (p < e) { 334 t = p; 335 while (p < e && *p != '\n') p++; 336 if (*p == '\n') p++; 337 len = (mrb_int) (p - t); 338 arg = mrb_str_new(mrb, t, len); 339 mrb_yield_argv(mrb, blk, 1, &arg); 340 mrb_gc_arena_restore(mrb, ai); 341 if (b != RSTRING_PTR(self)) { 342 ptrdiff_t diff = p - b; 343 b = RSTRING_PTR(self); 344 p = b + diff; 345 } 346 e = b + RSTRING_LEN(self); 347 } 348 return self; 349 } 350 while (p < e) { 351 t = p; 352 while (p < e && *p != '\n') p++; 353 if (*p == '\n') p++; 354 len = (mrb_int) (p - t); 355 mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len)); 356 mrb_gc_arena_restore(mrb, ai); 357 } 358 return result; 893 mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc); 894 } 895 /* not reached */ 896 return mrb_nil_value(); 359 897 } 360 898 … … 522 1060 #endif 523 1061 524 static mrb_bool525 all_digits_p(const char *s, mrb_int len)526 {527 while (len-- > 0) {528 if (!ISDIGIT(*s)) return FALSE;529 s++;530 }531 return TRUE;532 }533 534 1062 /* 535 1063 * call-seq: 536 * str.upto(other_str, exclusive=false) {|s| block } -> str 537 * str.upto(other_str, exclusive=false) -> an_enumerator 538 * 539 * Iterates through successive values, starting at <i>str</i> and 540 * ending at <i>other_str</i> inclusive, passing each value in turn to 541 * the block. The <code>String#succ</code> method is used to generate 542 * each value. If optional second argument exclusive is omitted or is false, 543 * the last value will be included; otherwise it will be excluded. 544 * 545 * If no block is given, an enumerator is returned instead. 546 * 547 * "a8".upto("b6") {|s| print s, ' ' } 548 * for s in "a8".."b6" 549 * print s, ' ' 550 * end 551 * 552 * <em>produces:</em> 553 * 554 * a8 a9 b0 b1 b2 b3 b4 b5 b6 555 * a8 a9 b0 b1 b2 b3 b4 b5 b6 556 * 557 * If <i>str</i> and <i>other_str</i> contains only ascii numeric characters, 558 * both are recognized as decimal numbers. In addition, the width of 559 * string (e.g. leading zeros) is handled appropriately. 560 * 561 * "9".upto("11").to_a #=> ["9", "10", "11"] 562 * "25".upto("5").to_a #=> [] 563 * "07".upto("11").to_a #=> ["07", "08", "09", "10", "11"] 564 */ 565 static mrb_value 566 mrb_str_upto(mrb_state *mrb, mrb_value beg) 567 { 568 mrb_value end; 569 mrb_value exclusive = mrb_false_value(); 570 mrb_value block = mrb_nil_value(); 571 mrb_value current, after_end; 572 mrb_int n; 573 mrb_bool excl; 574 575 mrb_get_args(mrb, "o|o&", &end, &exclusive, &block); 576 577 if (mrb_nil_p(block)) { 578 return mrb_funcall(mrb, beg, "to_enum", 3, mrb_symbol_value(mrb_intern_lit(mrb, "upto")), end, exclusive); 579 } 580 end = mrb_string_type(mrb, end); 581 excl = mrb_test(exclusive); 582 583 /* single character */ 584 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && 585 ISASCII(RSTRING_PTR(beg)[0]) && ISASCII(RSTRING_PTR(end)[0])) { 586 char c = RSTRING_PTR(beg)[0]; 587 char e = RSTRING_PTR(end)[0]; 588 int ai = mrb_gc_arena_save(mrb); 589 590 if (c > e || (excl && c == e)) return beg; 591 for (;;) { 592 mrb_yield(mrb, block, mrb_str_new(mrb, &c, 1)); 593 mrb_gc_arena_restore(mrb, ai); 594 if (!excl && c == e) break; 595 c++; 596 if (excl && c == e) break; 597 } 598 return beg; 599 } 600 /* both edges are all digits */ 601 if (ISDIGIT(RSTRING_PTR(beg)[0]) && ISDIGIT(RSTRING_PTR(end)[0]) && 602 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) && 603 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) { 604 int ai = mrb_gc_arena_save(mrb); 605 mrb_int min_width = RSTRING_LEN(beg); 606 mrb_int max_width = RSTRING_LEN(end); 607 mrb_int bi = mrb_int(mrb, mrb_str_to_inum(mrb, beg, 10, FALSE)); 608 mrb_int ei = mrb_int(mrb, mrb_str_to_inum(mrb, end, 10, FALSE)); 609 mrb_value str = mrb_str_new(mrb, NULL, max_width); 610 char *buf = RSTRING_PTR(str); 611 612 while (bi <= ei) { 613 if (excl && bi == ei) break; 614 snprintf(buf, max_width+1, "%.*" MRB_PRId, (int)min_width, bi); 615 mrb_yield(mrb, block, mrb_str_new(mrb, buf, strlen(buf))); 616 mrb_gc_arena_restore(mrb, ai); 617 bi++; 618 } 619 620 return beg; 621 } 622 /* normal case */ 623 n = mrb_int(mrb, mrb_funcall(mrb, beg, "<=>", 1, end)); 624 if (n > 0 || (excl && n == 0)) return beg; 625 626 after_end = mrb_funcall(mrb, end, "succ", 0); 627 current = mrb_str_dup(mrb, beg); 628 while (!mrb_str_equal(mrb, current, after_end)) { 629 int ai = mrb_gc_arena_save(mrb); 630 mrb_value next = mrb_nil_value(); 631 if (excl || !mrb_str_equal(mrb, current, end)) 632 next = mrb_funcall(mrb, current, "succ", 0); 633 mrb_yield(mrb, block, current); 634 if (mrb_nil_p(next)) break; 635 current = mrb_str_to_str(mrb, next); 636 if (excl && mrb_str_equal(mrb, current, end)) break; 637 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0) 638 break; 1064 * str.delete_prefix!(prefix) -> self or nil 1065 * 1066 * Deletes leading <code>prefix</code> from <i>str</i>, returning 1067 * <code>nil</code> if no change was made. 1068 * 1069 * "hello".delete_prefix!("hel") #=> "lo" 1070 * "hello".delete_prefix!("llo") #=> nil 1071 */ 1072 static mrb_value 1073 mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self) 1074 { 1075 mrb_int plen, slen; 1076 char *ptr, *s; 1077 struct RString *str = RSTRING(self); 1078 1079 mrb_get_args(mrb, "s", &ptr, &plen); 1080 slen = RSTR_LEN(str); 1081 if (plen > slen) return mrb_nil_value(); 1082 s = RSTR_PTR(str); 1083 if (memcmp(s, ptr, plen) != 0) return mrb_nil_value(); 1084 if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { 1085 str->as.heap.ptr += plen; 1086 } 1087 else { 1088 mrb_str_modify(mrb, str); 1089 s = RSTR_PTR(str); 1090 memmove(s, s+plen, slen-plen); 1091 } 1092 RSTR_SET_LEN(str, slen-plen); 1093 return self; 1094 } 1095 1096 /* 1097 * call-seq: 1098 * str.delete_prefix(prefix) -> new_str 1099 * 1100 * Returns a copy of <i>str</i> with leading <code>prefix</code> deleted. 1101 * 1102 * "hello".delete_prefix("hel") #=> "lo" 1103 * "hello".delete_prefix("llo") #=> "hello" 1104 */ 1105 static mrb_value 1106 mrb_str_del_prefix(mrb_state *mrb, mrb_value self) 1107 { 1108 mrb_int plen, slen; 1109 char *ptr; 1110 1111 mrb_get_args(mrb, "s", &ptr, &plen); 1112 slen = RSTRING_LEN(self); 1113 if (plen > slen) return mrb_str_dup(mrb, self); 1114 if (memcmp(RSTRING_PTR(self), ptr, plen) != 0) 1115 return mrb_str_dup(mrb, self); 1116 return mrb_str_substr(mrb, self, plen, slen-plen); 1117 } 1118 1119 /* 1120 * call-seq: 1121 * str.delete_suffix!(suffix) -> self or nil 1122 * 1123 * Deletes trailing <code>suffix</code> from <i>str</i>, returning 1124 * <code>nil</code> if no change was made. 1125 * 1126 * "hello".delete_suffix!("llo") #=> "he" 1127 * "hello".delete_suffix!("hel") #=> nil 1128 */ 1129 static mrb_value 1130 mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self) 1131 { 1132 mrb_int plen, slen; 1133 char *ptr, *s; 1134 struct RString *str = RSTRING(self); 1135 1136 mrb_get_args(mrb, "s", &ptr, &plen); 1137 slen = RSTR_LEN(str); 1138 if (plen > slen) return mrb_nil_value(); 1139 s = RSTR_PTR(str); 1140 if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value(); 1141 if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) { 1142 /* no need to modify string */ 1143 } 1144 else { 1145 mrb_str_modify(mrb, str); 1146 } 1147 RSTR_SET_LEN(str, slen-plen); 1148 return self; 1149 } 1150 1151 /* 1152 * call-seq: 1153 * str.delete_suffix(suffix) -> new_str 1154 * 1155 * Returns a copy of <i>str</i> with leading <code>suffix</code> deleted. 1156 * 1157 * "hello".delete_suffix("hel") #=> "lo" 1158 * "hello".delete_suffix("llo") #=> "hello" 1159 */ 1160 static mrb_value 1161 mrb_str_del_suffix(mrb_state *mrb, mrb_value self) 1162 { 1163 mrb_int plen, slen; 1164 char *ptr; 1165 1166 mrb_get_args(mrb, "s", &ptr, &plen); 1167 slen = RSTRING_LEN(self); 1168 if (plen > slen) return mrb_str_dup(mrb, self); 1169 if (memcmp(RSTRING_PTR(self)+slen-plen, ptr, plen) != 0) 1170 return mrb_str_dup(mrb, self); 1171 return mrb_str_substr(mrb, self, 0, slen-plen); 1172 } 1173 1174 static mrb_value 1175 mrb_str_lines(mrb_state *mrb, mrb_value self) 1176 { 1177 mrb_value result; 1178 int ai; 1179 mrb_int len; 1180 char *b = RSTRING_PTR(self); 1181 char *p = b, *t; 1182 char *e = b + RSTRING_LEN(self); 1183 1184 result = mrb_ary_new(mrb); 1185 ai = mrb_gc_arena_save(mrb); 1186 while (p < e) { 1187 t = p; 1188 while (p < e && *p != '\n') p++; 1189 if (*p == '\n') p++; 1190 len = (mrb_int) (p - t); 1191 mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len)); 639 1192 mrb_gc_arena_restore(mrb, ai); 640 1193 } 641 642 return beg; 1194 return result; 643 1195 } 644 1196 … … 649 1201 650 1202 mrb_define_method(mrb, s, "dump", mrb_str_dump, MRB_ARGS_NONE()); 651 mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1));652 mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2));653 mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1));654 1203 mrb_define_method(mrb, s, "swapcase!", mrb_str_swapcase_bang, MRB_ARGS_NONE()); 655 1204 mrb_define_method(mrb, s, "swapcase", mrb_str_swapcase, MRB_ARGS_NONE()); 656 mrb_define_method(mrb, s, "concat", mrb_str_concat2, MRB_ARGS_REQ(1)); 657 mrb_define_method(mrb, s, "<<", mrb_str_concat2, MRB_ARGS_REQ(1)); 1205 mrb_define_method(mrb, s, "concat", mrb_str_concat_m, MRB_ARGS_REQ(1)); 1206 mrb_define_method(mrb, s, "<<", mrb_str_concat_m, MRB_ARGS_REQ(1)); 1207 mrb_define_method(mrb, s, "count", mrb_str_count, MRB_ARGS_REQ(1)); 1208 mrb_define_method(mrb, s, "tr", mrb_str_tr, MRB_ARGS_REQ(2)); 1209 mrb_define_method(mrb, s, "tr!", mrb_str_tr_bang, MRB_ARGS_REQ(2)); 1210 mrb_define_method(mrb, s, "tr_s", mrb_str_tr_s, MRB_ARGS_REQ(2)); 1211 mrb_define_method(mrb, s, "tr_s!", mrb_str_tr_s_bang, MRB_ARGS_REQ(2)); 1212 mrb_define_method(mrb, s, "squeeze", mrb_str_squeeze, MRB_ARGS_OPT(1)); 1213 mrb_define_method(mrb, s, "squeeze!", mrb_str_squeeze_bang, MRB_ARGS_OPT(1)); 1214 mrb_define_method(mrb, s, "delete", mrb_str_delete, MRB_ARGS_REQ(1)); 1215 mrb_define_method(mrb, s, "delete!", mrb_str_delete_bang, MRB_ARGS_REQ(1)); 658 1216 mrb_define_method(mrb, s, "start_with?", mrb_str_start_with, MRB_ARGS_REST()); 659 1217 mrb_define_method(mrb, s, "end_with?", mrb_str_end_with, MRB_ARGS_REST()); … … 661 1219 mrb_define_method(mrb, s, "oct", mrb_str_oct, MRB_ARGS_NONE()); 662 1220 mrb_define_method(mrb, s, "chr", mrb_str_chr, MRB_ARGS_NONE()); 663 mrb_define_method(mrb, s, "lines", mrb_str_lines, MRB_ARGS_NONE());664 1221 mrb_define_method(mrb, s, "succ", mrb_str_succ, MRB_ARGS_NONE()); 665 1222 mrb_define_method(mrb, s, "succ!", mrb_str_succ_bang, MRB_ARGS_NONE()); 666 mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next"), mrb_intern_lit(mrb, "succ")); 667 mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next!"), mrb_intern_lit(mrb, "succ!")); 668 mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); 669 mrb_define_method(mrb, s, "upto", mrb_str_upto, MRB_ARGS_ANY()); 670 671 mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE()); 1223 mrb_define_method(mrb, s, "next", mrb_str_succ, MRB_ARGS_NONE()); 1224 mrb_define_method(mrb, s, "next!", mrb_str_succ_bang, MRB_ARGS_NONE()); 1225 mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE()); 1226 mrb_define_method(mrb, s, "delete_prefix!", mrb_str_del_prefix_bang, MRB_ARGS_REQ(1)); 1227 mrb_define_method(mrb, s, "delete_prefix", mrb_str_del_prefix, MRB_ARGS_REQ(1)); 1228 mrb_define_method(mrb, s, "delete_suffix!", mrb_str_del_suffix_bang, MRB_ARGS_REQ(1)); 1229 mrb_define_method(mrb, s, "delete_suffix", mrb_str_del_suffix, MRB_ARGS_REQ(1)); 1230 1231 mrb_define_method(mrb, s, "__lines", mrb_str_lines, MRB_ARGS_NONE()); 1232 1233 mrb_define_method(mrb, mrb_module_get(mrb, "Integral"), "chr", mrb_int_chr, MRB_ARGS_OPT(1)); 672 1234 } 673 1235 -
EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext/test/string.rb
r331 r439 1 # coding: utf-8 1 2 ## 2 3 # String(Ext) Test 3 4 4 UTF8STRING = ("\343\201\202".size == 1) 5 6 assert('String.try_convert') do 7 assert_nil String.try_convert(nil) 8 assert_nil String.try_convert(:foo) 9 assert_equal "", String.try_convert("") 10 assert_equal "1,2,3", String.try_convert("1,2,3") 11 end 12 13 assert('String#getbyte') do 14 str1 = "hello" 15 bytes1 = [104, 101, 108, 108, 111] 16 assert_equal bytes1[0], str1.getbyte(0) 17 assert_equal bytes1[-1], str1.getbyte(-1) 18 assert_equal bytes1[6], str1.getbyte(6) 19 20 str2 = "\xFF" 21 bytes2 = [0xFF] 22 assert_equal bytes2[0], str2.getbyte(0) 23 end 24 25 assert('String#setbyte') do 26 str1 = "hello" 27 h = "H".getbyte(0) 28 str1.setbyte(0, h) 29 assert_equal(h, str1.getbyte(0)) 30 assert_equal("Hello", str1) 31 end 32 33 assert("String#setbyte raises IndexError if arg conversion resizes String") do 34 $s = "01234\n" 35 class Tmp 36 def to_i 37 $s.chomp! '' 38 95 39 end 40 end 41 tmp = Tmp.new 42 assert_raise(IndexError) { $s.setbyte(5, tmp) } 43 end 44 45 assert('String#byteslice') do 46 str1 = "hello" 47 assert_equal("e", str1.byteslice(1)) 48 assert_equal("o", str1.byteslice(-1)) 49 assert_equal("ell", str1.byteslice(1..3)) 50 assert_equal("el", str1.byteslice(1...3)) 5 UTF8STRING = __ENCODING__ == "UTF-8" 6 7 def assert_upto(exp, receiver, *args) 8 act = [] 9 receiver.upto(*args) { |v| act << v } 10 assert_equal exp, act 51 11 end 52 12 53 13 assert('String#dump') do 54 ("\1" * 100).dump # should not raise an exception - regress #1210 55 "\0".inspect == "\"\\000\"" and 56 "foo".dump == "\"foo\"" 14 assert_equal("\"\\x00\"", "\0".dump) 15 assert_equal("\"foo\"", "foo".dump) 16 assert_equal('"\xe3\x82\x8b"', "る".dump) 17 assert_nothing_raised { ("\1" * 100).dump } # regress #1210 57 18 end 58 19 59 20 assert('String#strip') do 60 21 s = " abc " 61 "".strip == "" and " \t\r\n\f\v".strip == "" and 62 "\0a\0".strip == "\0a" and 63 "abc".strip == "abc" and 64 " abc".strip == "abc" and 65 "abc ".strip == "abc" and 66 " abc ".strip == "abc" and 67 s == " abc " 22 assert_equal("abc", s.strip) 23 assert_equal(" abc ", s) 24 assert_equal("", "".strip) 25 assert_equal("", " \t\r\n\f\v".strip) 26 assert_equal("\0a", "\0a\0".strip) 27 assert_equal("abc", "abc".strip) 28 assert_equal("abc", " abc".strip) 29 assert_equal("abc", "abc ".strip) 68 30 end 69 31 70 32 assert('String#lstrip') do 71 33 s = " abc " 72 s.lstrip73 "".lstrip == "" and " \t\r\n\f\v".lstrip == "" and74 "\0a\0".lstrip == "\0a\0" and75 "abc".lstrip == "abc" and76 " abc".lstrip == "abc" and77 "abc ".lstrip == "abc " and78 " abc ".lstrip == "abc " and79 s == " abc "34 assert_equal("abc ", s.lstrip) 35 assert_equal(" abc ", s) 36 assert_equal("", "".lstrip) 37 assert_equal("", " \t\r\n\f\v".lstrip) 38 assert_equal("\0a\0", "\0a\0".lstrip) 39 assert_equal("abc", "abc".lstrip) 40 assert_equal("abc", " abc".lstrip) 41 assert_equal("abc ", "abc ".lstrip) 80 42 end 81 43 82 44 assert('String#rstrip') do 83 45 s = " abc " 84 s.rstrip85 "".rstrip == "" and " \t\r\n\f\v".rstrip == "" and86 "\0a\0".rstrip == "\0a" and87 "abc".rstrip == "abc" and88 " abc".rstrip == " abc" and89 "abc ".rstrip == "abc" and90 " abc ".rstrip == " abc" and91 s == " abc "46 assert_equal(" abc", s.rstrip) 47 assert_equal(" abc ", s) 48 assert_equal("", "".rstrip) 49 assert_equal("", " \t\r\n\f\v".rstrip) 50 assert_equal("\0a", "\0a\0".rstrip) 51 assert_equal("abc", "abc".rstrip) 52 assert_equal(" abc", " abc".rstrip) 53 assert_equal("abc", "abc ".rstrip) 92 54 end 93 55 … … 95 57 s = " abc " 96 58 t = "abc" 97 s.strip! == "abc" and s == "abc" and t.strip! == nil 59 assert_equal("abc", s.strip!) 60 assert_equal("abc", s) 61 assert_nil(t.strip!) 62 assert_equal("abc", t) 98 63 end 99 64 … … 101 66 s = " abc " 102 67 t = "abc " 103 s.lstrip! == "abc " and s == "abc " and t.lstrip! == nil 68 assert_equal("abc ", s.lstrip!) 69 assert_equal("abc ", s) 70 assert_nil(t.lstrip!) 71 assert_equal("abc ", t) 104 72 end 105 73 … … 107 75 s = " abc " 108 76 t = " abc" 109 s.rstrip! == " abc" and s == " abc" and t.rstrip! == nil 77 assert_equal(" abc", s.rstrip!) 78 assert_equal(" abc", s) 79 assert_nil(t.rstrip!) 80 assert_equal(" abc", t) 110 81 end 111 82 … … 125 96 assert_equal "Hello World!", "Hello " << "World" << 33 126 97 assert_equal "Hello World!", "Hello ".concat("World").concat(33) 127 128 o = Object.new129 def o.to_str130 "to_str"131 end132 assert_equal "hi to_str", "hi " << o133 134 98 assert_raise(TypeError) { "".concat(Object.new) } 99 100 if UTF8STRING 101 assert_equal "H«", "H" << 0xab 102 assert_equal "Hは", "H" << 12399 103 else 104 assert_equal "H\xab", "H" << 0xab 105 assert_raise(RangeError) { "H" << 12399 } 106 end 135 107 end 136 108 … … 140 112 assert_equal(-1, "abcdef".casecmp("abcdefg")) 141 113 assert_equal 0, "abcdef".casecmp("ABCDEF") 142 o = Object.new 143 def o.to_str 144 "ABCDEF" 145 end 146 assert_equal 0, "abcdef".casecmp(o) 114 end 115 116 assert('String#count') do 117 s = "abccdeff123" 118 assert_equal 0, s.count("") 119 assert_equal 1, s.count("a") 120 assert_equal 2, s.count("ab") 121 assert_equal 9, s.count("^c") 122 assert_equal 8, s.count("a-z") 123 assert_equal 4, s.count("a0-9") 124 end 125 126 assert('String#tr') do 127 assert_equal "ABC", "abc".tr('a-z', 'A-Z') 128 assert_equal "hippo", "hello".tr('el', 'ip') 129 assert_equal "Ruby", "Lisp".tr("Lisp", "Ruby") 130 assert_equal "*e**o", "hello".tr('^aeiou', '*') 131 assert_equal "heo", "hello".tr('l', '') 132 end 133 134 assert('String#tr!') do 135 s = "abcdefghijklmnopqR" 136 assert_equal "ab12222hijklmnopqR", s.tr!("cdefg", "12") 137 assert_equal "ab12222hijklmnopqR", s 138 end 139 140 assert('String#tr_s') do 141 assert_equal "hero", "hello".tr_s('l', 'r') 142 assert_equal "h*o", "hello".tr_s('el', '*') 143 assert_equal "hhxo", "hello".tr_s('el', 'hx') 144 end 145 146 assert('String#tr_s!') do 147 s = "hello" 148 assert_equal "hero", s.tr_s!('l', 'r') 149 assert_equal "hero", s 150 assert_nil s.tr_s!('l', 'r') 151 end 152 153 assert('String#squeeze') do 154 assert_equal "yelow mon", "yellow moon".squeeze 155 assert_equal " now is the", " now is the".squeeze(" ") 156 assert_equal "puters shot balls", "putters shoot balls".squeeze("m-z") 157 end 158 159 assert('String#squeeze!') do 160 s = " now is the" 161 assert_equal " now is the", s.squeeze!(" ") 162 assert_equal " now is the", s 163 end 164 165 assert('String#delete') do 166 assert_equal "he", "hello".delete("lo") 167 assert_equal "hll", "hello".delete("aeiou") 168 assert_equal "ll", "hello".delete("^l") 169 assert_equal "ho", "hello".delete("ej-m") 170 end 171 172 assert('String#delete!') do 173 s = "hello" 174 assert_equal "he", s.delete!("lo") 175 assert_equal "he", s 176 assert_nil s.delete!("lz") 147 177 end 148 178 … … 202 232 assert_equal 8, "010".oct 203 233 assert_equal (-8), "-10".oct 204 end205 206 assert('String#chr') do207 assert_equal "a", "abcde".chr208 # test Fixnum#chr as well209 assert_equal "a", 97.chr210 234 end 211 235 … … 496 520 497 521 assert('String#upto') do 498 assert_equal %w(a8 a9 b0 b1 b2 b3 b4 b5 b6), "a8".upto("b6").to_a 499 assert_equal ["9", "10", "11"], "9".upto("11").to_a 500 assert_equal [], "25".upto("5").to_a 501 assert_equal ["07", "08", "09", "10", "11"], "07".upto("11").to_a 502 503 if UTF8STRING 504 assert_equal ["あ", "ぃ", "い", "ぅ", "う", "ぇ", "え", "ぉ", "お"], "あ".upto("お").to_a 505 end 506 507 assert_equal ["9", ":", ";", "<", "=", ">", "?", "@", "A"], "9".upto("A").to_a 522 assert_upto %w(a8 a9 b0 b1 b2 b3 b4 b5 b6), "a8", "b6" 523 assert_upto ["9", "10", "11"], "9", "11" 524 assert_upto [], "25", "5" 525 assert_upto ["07", "08", "09", "10", "11"], "07", "11" 526 assert_upto ["9", ":", ";", "<", "=", ">", "?", "@", "A"], "9", "A" 527 528 if UTF8STRING 529 assert_upto %w(あ ぃ い ぅ う ぇ え ぉ お), "あ", "お" 530 end 508 531 509 532 a = "aa" … … 587 610 588 611 assert('String#chr') do 612 assert_equal "a", "abcde".chr 589 613 assert_equal "h", "hello!".chr 590 end 614 assert_equal "", "".chr 615 end 616 591 617 assert('String#chr(UTF-8)') do 592 618 assert_equal "こ", "こんにちは世界!".chr … … 614 640 615 641 assert('String#each_char') do 616 s = ""642 chars = [] 617 643 "hello!".each_char do |x| 618 s +=x619 end 620 assert_equal "hello!",s644 chars << x 645 end 646 assert_equal ["h", "e", "l", "l", "o", "!"], chars 621 647 end 622 648 623 649 assert('String#each_char(UTF-8)') do 624 s = ""650 chars = [] 625 651 "こんにちは世界!".each_char do |x| 626 s +=x627 end 628 assert_equal "こんにちは世界!",s652 chars << x 653 end 654 assert_equal ["こ", "ん", "に", "ち", "は", "世", "界", "!"], chars 629 655 end if UTF8STRING 630 656 … … 666 692 assert_equal expect, cp 667 693 end if UTF8STRING 694 695 assert('String#delete_prefix') do 696 assert_equal "llo", "hello".delete_prefix("he") 697 assert_equal "hello", "hello".delete_prefix("llo") 698 assert_equal "llo", "hello".delete_prefix!("he") 699 assert_nil "hello".delete_prefix!("llo") 700 end 701 702 assert('String#delete_suffix') do 703 assert_equal "he", "hello".delete_suffix("llo") 704 assert_equal "hello", "hello".delete_suffix("he") 705 assert_equal "he", "hello".delete_suffix!("llo") 706 assert_nil "hello".delete_suffix!("he") 707 end
Note:
See TracChangeset
for help on using the changeset viewer.