Changeset 439 for EcnlProtoTool/trunk/mruby-2.1.1/src/string.c
- Timestamp:
- Jul 9, 2020, 8:51:43 AM (4 years ago)
- Location:
- EcnlProtoTool/trunk/mruby-2.1.1
- Files:
-
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
EcnlProtoTool/trunk/mruby-2.1.1/src/string.c
r331 r439 9 9 #endif 10 10 11 #ifndef MRB_WITHOUT_FLOAT 11 12 #include <float.h> 13 #include <math.h> 14 #endif 12 15 #include <limits.h> 13 16 #include <stddef.h> … … 19 22 #include <mruby/range.h> 20 23 #include <mruby/string.h> 21 #include <mruby/ re.h>24 #include <mruby/numeric.h> 22 25 23 26 typedef struct mrb_shared_string { 24 mrb_bool nofree : 1;25 27 int refcnt; 28 mrb_ssize capa; 26 29 char *ptr; 27 mrb_int len;28 30 } mrb_shared_string; 29 31 … … 31 33 32 34 #define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class)) 35 36 static struct RString* 37 str_init_normal_capa(mrb_state *mrb, struct RString *s, 38 const char *p, size_t len, size_t capa) 39 { 40 char *dst = (char *)mrb_malloc(mrb, capa + 1); 41 if (p) memcpy(dst, p, len); 42 dst[len] = '\0'; 43 s->as.heap.ptr = dst; 44 s->as.heap.len = (mrb_ssize)len; 45 s->as.heap.aux.capa = (mrb_ssize)capa; 46 RSTR_UNSET_TYPE_FLAG(s); 47 return s; 48 } 49 50 static struct RString* 51 str_init_normal(mrb_state *mrb, struct RString *s, const char *p, size_t len) 52 { 53 return str_init_normal_capa(mrb, s, p, len, len); 54 } 55 56 static struct RString* 57 str_init_embed(struct RString *s, const char *p, size_t len) 58 { 59 if (p) memcpy(RSTR_EMBED_PTR(s), p, len); 60 RSTR_EMBED_PTR(s)[len] = '\0'; 61 RSTR_SET_TYPE_FLAG(s, EMBED); 62 RSTR_SET_EMBED_LEN(s, len); 63 return s; 64 } 65 66 static struct RString* 67 str_init_nofree(struct RString *s, const char *p, size_t len) 68 { 69 s->as.heap.ptr = (char *)p; 70 s->as.heap.len = (mrb_ssize)len; 71 s->as.heap.aux.capa = 0; /* nofree */ 72 RSTR_SET_TYPE_FLAG(s, NOFREE); 73 return s; 74 } 75 76 static struct RString* 77 str_init_shared(mrb_state *mrb, const struct RString *orig, struct RString *s, mrb_shared_string *shared) 78 { 79 if (shared) { 80 shared->refcnt++; 81 } 82 else { 83 shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string)); 84 shared->refcnt = 1; 85 shared->ptr = orig->as.heap.ptr; 86 shared->capa = orig->as.heap.aux.capa; 87 } 88 s->as.heap.ptr = orig->as.heap.ptr; 89 s->as.heap.len = orig->as.heap.len; 90 s->as.heap.aux.shared = shared; 91 RSTR_SET_TYPE_FLAG(s, SHARED); 92 return s; 93 } 94 95 static struct RString* 96 str_init_fshared(const struct RString *orig, struct RString *s, struct RString *fshared) 97 { 98 s->as.heap.ptr = orig->as.heap.ptr; 99 s->as.heap.len = orig->as.heap.len; 100 s->as.heap.aux.fshared = fshared; 101 RSTR_SET_TYPE_FLAG(s, FSHARED); 102 return s; 103 } 104 105 static struct RString* 106 str_init_modifiable(mrb_state *mrb, struct RString *s, const char *p, size_t len) 107 { 108 if (RSTR_EMBEDDABLE_P(len)) { 109 return str_init_embed(s, p, len); 110 } 111 else { 112 return str_init_normal(mrb, s, p, len); 113 } 114 } 33 115 34 116 static struct RString* 35 117 str_new_static(mrb_state *mrb, const char *p, size_t len) 36 118 { 37 struct RString *s; 38 39 if (len >= MRB_INT_MAX) { 119 if (RSTR_EMBEDDABLE_P(len)) { 120 return str_init_embed(mrb_obj_alloc_string(mrb), p, len); 121 } 122 if (len >= MRB_SSIZE_MAX) { 40 123 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); 41 124 } 42 s = mrb_obj_alloc_string(mrb); 43 s->as.heap.len = len; 44 s->as.heap.aux.capa = 0; /* nofree */ 45 s->as.heap.ptr = (char *)p; 46 s->flags = MRB_STR_NOFREE; 47 48 return s; 125 return str_init_nofree(mrb_obj_alloc_string(mrb), p, len); 49 126 } 50 127 … … 52 129 str_new(mrb_state *mrb, const char *p, size_t len) 53 130 { 131 if (RSTR_EMBEDDABLE_P(len)) { 132 return str_init_embed(mrb_obj_alloc_string(mrb), p, len); 133 } 134 if (len >= MRB_SSIZE_MAX) { 135 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); 136 } 137 if (p && mrb_ro_data_p(p)) { 138 return str_init_nofree(mrb_obj_alloc_string(mrb), p, len); 139 } 140 return str_init_normal(mrb, mrb_obj_alloc_string(mrb), p, len); 141 } 142 143 static inline void 144 str_with_class(struct RString *s, mrb_value obj) 145 { 146 s->c = mrb_str_ptr(obj)->c; 147 } 148 149 static mrb_value 150 mrb_str_new_empty(mrb_state *mrb, mrb_value str) 151 { 152 struct RString *s = str_new(mrb, 0, 0); 153 154 str_with_class(s, str); 155 return mrb_obj_value(s); 156 } 157 158 MRB_API mrb_value 159 mrb_str_new_capa(mrb_state *mrb, size_t capa) 160 { 54 161 struct RString *s; 55 162 56 if (p && mrb_ro_data_p(p)) { 57 return str_new_static(mrb, p, len); 58 } 59 s = mrb_obj_alloc_string(mrb); 60 if (len < RSTRING_EMBED_LEN_MAX) { 61 RSTR_SET_EMBED_FLAG(s); 62 RSTR_SET_EMBED_LEN(s, len); 63 if (p) { 64 memcpy(s->as.ary, p, len); 65 } 163 if (RSTR_EMBEDDABLE_P(capa)) { 164 s = str_init_embed(mrb_obj_alloc_string(mrb), NULL, 0); 165 } 166 else if (capa >= MRB_SSIZE_MAX) { 167 mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big"); 168 /* not reached */ 169 s = NULL; 66 170 } 67 171 else { 68 if (len >= MRB_INT_MAX) { 69 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); 70 } 71 s->as.heap.len = len; 72 s->as.heap.aux.capa = len; 73 s->as.heap.ptr = (char *)mrb_malloc(mrb, len+1); 74 if (p) { 75 memcpy(s->as.heap.ptr, p, len); 76 } 77 } 78 RSTR_PTR(s)[len] = '\0'; 79 return s; 80 } 81 82 static inline void 83 str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj) 84 { 85 s->c = mrb_str_ptr(obj)->c; 86 } 87 88 static mrb_value 89 mrb_str_new_empty(mrb_state *mrb, mrb_value str) 90 { 91 struct RString *s = str_new(mrb, 0, 0); 92 93 str_with_class(mrb, s, str); 172 s = str_init_normal_capa(mrb, mrb_obj_alloc_string(mrb), NULL, 0, capa); 173 } 174 94 175 return mrb_obj_value(s); 95 176 } … … 102 183 mrb_str_buf_new(mrb_state *mrb, size_t capa) 103 184 { 104 struct RString *s;105 106 s = mrb_obj_alloc_string(mrb);107 108 if (capa >= MRB_INT_MAX) {109 mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big");110 }111 185 if (capa < MRB_STR_BUF_MIN_SIZE) { 112 186 capa = MRB_STR_BUF_MIN_SIZE; 113 187 } 114 s->as.heap.len = 0; 115 s->as.heap.aux.capa = capa; 116 s->as.heap.ptr = (char *)mrb_malloc(mrb, capa+1); 117 RSTR_PTR(s)[0] = '\0'; 118 119 return mrb_obj_value(s); 188 return mrb_str_new_capa(mrb, capa); 120 189 } 121 190 … … 123 192 resize_capa(mrb_state *mrb, struct RString *s, size_t capacity) 124 193 { 125 #if SIZE_MAX > MRB_ INT_MAX126 mrb_assert(capacity < MRB_ INT_MAX);194 #if SIZE_MAX > MRB_SSIZE_MAX 195 mrb_assert(capacity < MRB_SSIZE_MAX); 127 196 #endif 128 197 if (RSTR_EMBED_P(s)) { 129 if (RSTRING_EMBED_LEN_MAX < capacity) { 130 char *const tmp = (char *)mrb_malloc(mrb, capacity+1); 131 const mrb_int len = RSTR_EMBED_LEN(s); 132 memcpy(tmp, s->as.ary, len); 133 RSTR_UNSET_EMBED_FLAG(s); 134 s->as.heap.ptr = tmp; 135 s->as.heap.len = len; 136 s->as.heap.aux.capa = (mrb_int)capacity; 198 if (!RSTR_EMBEDDABLE_P(capacity)) { 199 str_init_normal_capa(mrb, s, RSTR_EMBED_PTR(s), RSTR_EMBED_LEN(s), capacity); 137 200 } 138 201 } 139 202 else { 140 203 s->as.heap.ptr = (char*)mrb_realloc(mrb, RSTR_PTR(s), capacity+1); 141 s->as.heap.aux.capa = (mrb_int)capacity; 142 } 143 } 144 145 static void 146 str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len) 147 { 148 size_t capa; 149 size_t total; 150 ptrdiff_t off = -1; 151 152 if (len == 0) return; 153 mrb_str_modify(mrb, s); 154 if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) { 155 off = ptr - RSTR_PTR(s); 156 } 157 158 capa = RSTR_CAPA(s); 159 if (capa <= RSTRING_EMBED_LEN_MAX) 160 capa = RSTRING_EMBED_LEN_MAX+1; 161 162 total = RSTR_LEN(s)+len; 163 if (total >= MRB_INT_MAX) { 164 size_error: 165 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); 166 } 167 if (capa <= total) { 168 while (total > capa) { 169 if (capa <= MRB_INT_MAX / 2) { 170 capa *= 2; 171 } 172 else { 173 capa = total; 174 } 175 } 176 if (capa < total || capa > MRB_INT_MAX) { 177 goto size_error; 178 } 179 resize_capa(mrb, s, capa); 180 } 181 if (off != -1) { 182 ptr = RSTR_PTR(s) + off; 183 } 184 memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len); 185 mrb_assert_int_fit(size_t, total, mrb_int, MRB_INT_MAX); 186 RSTR_SET_LEN(s, total); 187 RSTR_PTR(s)[total] = '\0'; /* sentinel */ 204 s->as.heap.aux.capa = (mrb_ssize)capacity; 205 } 188 206 } 189 207 … … 194 212 } 195 213 196 /*197 * call-seq: (Caution! NULL string)198 * String.new(str="") => new_str199 *200 * Returns a new string object containing a copy of <i>str</i>.201 */202 203 214 MRB_API mrb_value 204 215 mrb_str_new_cstr(mrb_state *mrb, const char *p) … … 231 242 shared->refcnt--; 232 243 if (shared->refcnt == 0) { 233 if (!shared->nofree) { 234 mrb_free(mrb, shared->ptr); 235 } 244 mrb_free(mrb, shared->ptr); 236 245 mrb_free(mrb, shared); 246 } 247 } 248 249 static void 250 str_modify_keep_ascii(mrb_state *mrb, struct RString *s) 251 { 252 if (RSTR_SHARED_P(s)) { 253 mrb_shared_string *shared = s->as.heap.aux.shared; 254 255 if (shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) { 256 s->as.heap.aux.capa = shared->capa; 257 s->as.heap.ptr[s->as.heap.len] = '\0'; 258 RSTR_UNSET_SHARED_FLAG(s); 259 mrb_free(mrb, shared); 260 } 261 else { 262 str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len); 263 str_decref(mrb, shared); 264 } 265 } 266 else if (RSTR_NOFREE_P(s) || RSTR_FSHARED_P(s)) { 267 str_init_modifiable(mrb, s, s->as.heap.ptr, (size_t)s->as.heap.len); 268 } 269 } 270 271 static void 272 check_null_byte(mrb_state *mrb, mrb_value str) 273 { 274 mrb_to_str(mrb, str); 275 if (memchr(RSTRING_PTR(str), '\0', RSTRING_LEN(str))) { 276 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte"); 237 277 } 238 278 } … … 245 285 else if (RSTR_SHARED_P(str)) 246 286 str_decref(mrb, str->as.heap.aux.shared); 247 else if (!RSTR_NOFREE_P(str) )287 else if (!RSTR_NOFREE_P(str) && !RSTR_FSHARED_P(str)) 248 288 mrb_free(mrb, str->as.heap.ptr); 249 289 } … … 262 302 }; 263 303 264 staticmrb_int265 utf8len(const char* p, const char* e)304 mrb_int 305 mrb_utf8len(const char* p, const char* e) 266 306 { 267 307 mrb_int len; 268 308 mrb_int i; 269 309 310 if ((unsigned char)*p < 0x80) return 1; 270 311 len = utf8len_codepage[(unsigned char)*p]; 271 if (p + len > e) return 1; 312 if (len == 1) return 1; 313 if (len > e - p) return 1; 272 314 for (i = 1; i < len; ++i) 273 315 if ((p[i] & 0xc0) != 0x80) … … 276 318 } 277 319 320 mrb_int 321 mrb_utf8_strlen(const char *str, mrb_int byte_len) 322 { 323 mrb_int total = 0; 324 const char *p = str; 325 const char *e = p + byte_len; 326 327 while (p < e) { 328 p += mrb_utf8len(p, e); 329 total++; 330 } 331 return total; 332 } 333 278 334 static mrb_int 279 utf8_strlen(mrb_value str, mrb_int len) 280 { 281 mrb_int total = 0; 282 char* p = RSTRING_PTR(str); 283 char* e = p; 284 if (RSTRING(str)->flags & MRB_STR_NO_UTF) { 285 return RSTRING_LEN(str); 286 } 287 e += len < 0 ? RSTRING_LEN(str) : len; 288 while (p<e) { 289 p += utf8len(p, e); 290 total++; 291 } 292 if (RSTRING_LEN(str) == total) { 293 RSTRING(str)->flags |= MRB_STR_NO_UTF; 294 } 295 return total; 296 } 297 298 #define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1) 335 utf8_strlen(mrb_value str) 336 { 337 struct RString *s = mrb_str_ptr(str); 338 mrb_int byte_len = RSTR_LEN(s); 339 340 if (RSTR_ASCII_P(s)) { 341 return byte_len; 342 } 343 else { 344 mrb_int utf8_len = mrb_utf8_strlen(RSTR_PTR(s), byte_len); 345 if (byte_len == utf8_len) RSTR_SET_ASCII_FLAG(s); 346 return utf8_len; 347 } 348 } 349 350 #define RSTRING_CHAR_LEN(s) utf8_strlen(s) 299 351 300 352 /* map character index to byte offset index */ … … 302 354 chars2bytes(mrb_value s, mrb_int off, mrb_int idx) 303 355 { 304 mrb_int i, b, n; 305 const char *p = RSTRING_PTR(s) + off; 306 const char *e = RSTRING_END(s); 307 308 for (b=i=0; p<e && i<idx; i++) { 309 n = utf8len(p, e); 310 b += n; 311 p += n; 312 } 313 return b; 356 if (RSTR_ASCII_P(mrb_str_ptr(s))) { 357 return idx; 358 } 359 else { 360 mrb_int i, b, n; 361 const char *p = RSTRING_PTR(s) + off; 362 const char *e = RSTRING_END(s); 363 364 for (b=i=0; p<e && i<idx; i++) { 365 n = mrb_utf8len(p, e); 366 b += n; 367 p += n; 368 } 369 return b; 370 } 314 371 } 315 372 316 373 /* map byte offset to character index */ 317 374 static mrb_int 318 bytes2chars(char *p, mrb_int bi)319 { 320 mrb_int i, b,n;321 322 for (b=i=0; b<bi; i++) {323 n = utf8len_codepage[(unsigned char)*p]; 324 b += n;325 p += n;326 } 327 if ( b != bi) return -1;375 bytes2chars(char *p, mrb_int len, mrb_int bi) 376 { 377 const char *e = p + (size_t)len; 378 const char *pivot = p + bi; 379 mrb_int i; 380 381 for (i = 0; p < pivot; i ++) { 382 p += mrb_utf8len(p, e); 383 } 384 if (p != pivot) return -1; 328 385 return i; 386 } 387 388 static const char * 389 char_adjust(const char *beg, const char *end, const char *ptr) 390 { 391 if ((ptr > beg || ptr < end) && (*ptr & 0xc0) == 0x80) { 392 const int utf8_adjust_max = 3; 393 const char *p; 394 395 if (ptr - beg > utf8_adjust_max) { 396 beg = ptr - utf8_adjust_max; 397 } 398 399 p = ptr; 400 while (p > beg) { 401 p --; 402 if ((*p & 0xc0) != 0x80) { 403 int clen = mrb_utf8len(p, end); 404 if (clen > ptr - p) return p; 405 break; 406 } 407 } 408 } 409 410 return ptr; 411 } 412 413 static const char * 414 char_backtrack(const char *ptr, const char *end) 415 { 416 if (ptr < end) { 417 const int utf8_bytelen_max = 4; 418 const char *p; 419 420 if (end - ptr > utf8_bytelen_max) { 421 ptr = end - utf8_bytelen_max; 422 } 423 424 p = end; 425 while (p > ptr) { 426 p --; 427 if ((*p & 0xc0) != 0x80) { 428 int clen = utf8len_codepage[(unsigned char)*p]; 429 if (clen == end - p) { return p; } 430 break; 431 } 432 } 433 } 434 435 return end - 1; 436 } 437 438 static mrb_int 439 str_index_str_by_char_search(mrb_state *mrb, const char *p, const char *pend, const char *s, const mrb_int slen, mrb_int off) 440 { 441 /* Based on Quick Search algorithm (Boyer-Moore-Horspool algorithm) */ 442 443 ptrdiff_t qstable[1 << CHAR_BIT]; 444 445 /* Preprocessing */ 446 { 447 mrb_int i; 448 449 for (i = 0; i < 1 << CHAR_BIT; i ++) { 450 qstable[i] = slen; 451 } 452 for (i = 0; i < slen; i ++) { 453 qstable[(unsigned char)s[i]] = slen - (i + 1); 454 } 455 } 456 457 /* Searching */ 458 while (p < pend && pend - p >= slen) { 459 const char *pivot; 460 461 if (memcmp(p, s, slen) == 0) { 462 return off; 463 } 464 465 pivot = p + qstable[(unsigned char)p[slen - 1]]; 466 if (pivot >= pend || pivot < p /* overflowed */) { return -1; } 467 468 do { 469 p += mrb_utf8len(p, pend); 470 off ++; 471 } while (p < pivot); 472 } 473 474 return -1; 475 } 476 477 static mrb_int 478 str_index_str_by_char(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) 479 { 480 const char *p = RSTRING_PTR(str); 481 const char *pend = p + RSTRING_LEN(str); 482 const char *s = RSTRING_PTR(sub); 483 const mrb_int slen = RSTRING_LEN(sub); 484 mrb_int off = pos; 485 486 for (; pos > 0; pos --) { 487 if (pend - p < 1) { return -1; } 488 p += mrb_utf8len(p, pend); 489 } 490 491 if (slen < 1) { return off; } 492 493 return str_index_str_by_char_search(mrb, p, pend, s, slen, off); 329 494 } 330 495 … … 333 498 #define RSTRING_CHAR_LEN(s) RSTRING_LEN(s) 334 499 #define chars2bytes(p, off, ci) (ci) 335 #define bytes2chars(p, bi) (bi) 500 #define bytes2chars(p, end, bi) (bi) 501 #define char_adjust(beg, end, ptr) (ptr) 502 #define char_backtrack(ptr, end) ((end) - 1) 336 503 #define BYTES_ALIGN_CHECK(pos) 504 #define str_index_str_by_char(mrb, str, sub, pos) str_index_str(mrb, str, sub, pos) 505 #endif 506 507 #ifndef MRB_QS_SHORT_STRING_LENGTH 508 #define MRB_QS_SHORT_STRING_LENGTH 2048 337 509 #endif 338 510 … … 340 512 mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n) 341 513 { 342 const unsigned char *x = xs, *xe = xs + m; 343 const unsigned char *y = ys; 344 int i, qstable[256]; 345 346 /* Preprocessing */ 347 for (i = 0; i < 256; ++i) 348 qstable[i] = m + 1; 349 for (; x < xe; ++x) 350 qstable[*x] = xe - x; 351 /* Searching */ 352 for (; y + m <= ys + n; y += *(qstable + y[m])) { 353 if (*xs == *y && memcmp(xs, y, m) == 0) 354 return y - ys; 355 } 356 return -1; 514 if (n + m < MRB_QS_SHORT_STRING_LENGTH) { 515 const unsigned char *y = ys; 516 const unsigned char *ye = ys+n-m+1; 517 518 for (;;) { 519 y = (const unsigned char*)memchr(y, xs[0], (size_t)(ye-y)); 520 if (y == NULL) return -1; 521 if (memcmp(xs, y, m) == 0) { 522 return (mrb_int)(y - ys); 523 } 524 y++; 525 } 526 return -1; 527 } 528 else { 529 const unsigned char *x = xs, *xe = xs + m; 530 const unsigned char *y = ys; 531 int i; 532 ptrdiff_t qstable[256]; 533 534 /* Preprocessing */ 535 for (i = 0; i < 256; ++i) 536 qstable[i] = m + 1; 537 for (; x < xe; ++x) 538 qstable[*x] = xe - x; 539 /* Searching */ 540 for (; y + m <= ys + n; y += *(qstable + y[m])) { 541 if (*xs == *y && memcmp(xs, y, m) == 0) 542 return (mrb_int)(y - ys); 543 } 544 return -1; 545 } 357 546 } 358 547 … … 373 562 374 563 if (ys) 375 return ys - y;564 return (mrb_int)(ys - y); 376 565 else 377 566 return -1; … … 381 570 382 571 static void 383 str_make_shared(mrb_state *mrb, struct RString *s) 384 { 385 if (!RSTR_SHARED_P(s)) { 386 mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string)); 387 388 shared->refcnt = 1; 389 if (RSTR_EMBED_P(s)) { 390 const mrb_int len = RSTR_EMBED_LEN(s); 391 char *const tmp = (char *)mrb_malloc(mrb, len+1); 392 memcpy(tmp, s->as.ary, len); 393 tmp[len] = '\0'; 394 RSTR_UNSET_EMBED_FLAG(s); 395 s->as.heap.ptr = tmp; 396 s->as.heap.len = len; 397 shared->nofree = FALSE; 398 shared->ptr = s->as.heap.ptr; 399 } 400 else if (RSTR_NOFREE_P(s)) { 401 shared->nofree = TRUE; 402 shared->ptr = s->as.heap.ptr; 403 RSTR_UNSET_NOFREE_FLAG(s); 404 } 405 else { 406 shared->nofree = FALSE; 407 if (s->as.heap.aux.capa > s->as.heap.len) { 408 s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1); 409 } 410 else { 411 shared->ptr = s->as.heap.ptr; 412 } 413 } 414 shared->len = s->as.heap.len; 415 s->as.heap.aux.shared = shared; 416 RSTR_SET_SHARED_FLAG(s); 417 } 418 } 419 420 static mrb_value 421 byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) 572 str_share(mrb_state *mrb, struct RString *orig, struct RString *s) 573 { 574 size_t len = (size_t)orig->as.heap.len; 575 576 mrb_assert(!RSTR_EMBED_P(orig)); 577 if (RSTR_NOFREE_P(orig)) { 578 str_init_nofree(s, orig->as.heap.ptr, len); 579 } 580 else if (RSTR_SHARED_P(orig)) { 581 str_init_shared(mrb, orig, s, orig->as.heap.aux.shared); 582 } 583 else if (RSTR_FSHARED_P(orig)) { 584 str_init_fshared(orig, s, orig->as.heap.aux.fshared); 585 } 586 else if (mrb_frozen_p(orig) && !RSTR_POOL_P(orig)) { 587 str_init_fshared(orig, s, orig); 588 } 589 else { 590 if (orig->as.heap.aux.capa > orig->as.heap.len) { 591 orig->as.heap.ptr = (char *)mrb_realloc(mrb, orig->as.heap.ptr, len+1); 592 orig->as.heap.aux.capa = (mrb_ssize)len; 593 } 594 str_init_shared(mrb, orig, s, NULL); 595 str_init_shared(mrb, orig, orig, s->as.heap.aux.shared); 596 } 597 } 598 599 mrb_value 600 mrb_str_pool(mrb_state *mrb, const char *p, mrb_int len, mrb_bool nofree) 601 { 602 struct RString *s = (struct RString *)mrb_malloc(mrb, sizeof(struct RString)); 603 604 s->tt = MRB_TT_STRING; 605 s->c = mrb->string_class; 606 s->flags = 0; 607 608 if (RSTR_EMBEDDABLE_P(len)) { 609 str_init_embed(s, p, len); 610 } 611 else if (nofree) { 612 str_init_nofree(s, p, len); 613 } 614 else { 615 str_init_normal(mrb, s, p, len); 616 } 617 RSTR_SET_POOL_FLAG(s); 618 MRB_SET_FROZEN_FLAG(s); 619 return mrb_obj_value(s); 620 } 621 622 mrb_value 623 mrb_str_byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) 422 624 { 423 625 struct RString *orig, *s; 424 mrb_shared_string *shared;425 626 426 627 orig = mrb_str_ptr(str); 427 if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0) { 428 s = str_new(mrb, orig->as.ary+beg, len); 628 s = mrb_obj_alloc_string(mrb); 629 if (RSTR_EMBEDDABLE_P(len)) { 630 str_init_embed(s, RSTR_PTR(orig)+beg, len); 429 631 } 430 632 else { 431 str_make_shared(mrb, orig); 432 shared = orig->as.heap.aux.shared; 433 s = mrb_obj_alloc_string(mrb); 434 s->as.heap.ptr = orig->as.heap.ptr + beg; 435 s->as.heap.len = len; 436 s->as.heap.aux.shared = shared; 437 RSTR_SET_SHARED_FLAG(s); 438 shared->refcnt++; 439 } 440 633 str_share(mrb, orig, s); 634 s->as.heap.ptr += (mrb_ssize)beg; 635 s->as.heap.len = (mrb_ssize)len; 636 } 637 RSTR_COPY_ASCII_FLAG(s, orig); 441 638 return mrb_obj_value(s); 639 } 640 641 static void 642 str_range_to_bytes(mrb_value str, mrb_int *pos, mrb_int *len) 643 { 644 *pos = chars2bytes(str, 0, *pos); 645 *len = chars2bytes(str, *pos, *len); 442 646 } 443 647 #ifdef MRB_UTF8_STRING … … 445 649 str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) 446 650 { 447 beg = chars2bytes(str, 0, beg); 448 len = chars2bytes(str, beg, len); 449 450 return byte_subseq(mrb, str, beg, len); 651 str_range_to_bytes(str, &beg, &len); 652 return mrb_str_byte_subseq(mrb, str, beg, len); 451 653 } 452 654 #else 453 #define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len)655 #define str_subseq(mrb, str, beg, len) mrb_str_byte_subseq(mrb, str, beg, len) 454 656 #endif 455 657 658 mrb_bool 659 mrb_str_beg_len(mrb_int str_len, mrb_int *begp, mrb_int *lenp) 660 { 661 if (str_len < *begp || *lenp < 0) return FALSE; 662 if (*begp < 0) { 663 *begp += str_len; 664 if (*begp < 0) return FALSE; 665 } 666 if (*lenp > str_len - *begp) 667 *lenp = str_len - *begp; 668 if (*lenp <= 0) { 669 *lenp = 0; 670 } 671 return TRUE; 672 } 673 456 674 static mrb_value 457 675 str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len) 458 676 { 459 mrb_int clen = RSTRING_CHAR_LEN(str); 460 461 if (len < 0) return mrb_nil_value(); 462 if (clen == 0) { 463 len = 0; 464 } 465 else if (beg < 0) { 466 beg = clen + beg; 467 } 468 if (beg > clen) return mrb_nil_value(); 469 if (beg < 0) { 470 beg += clen; 471 if (beg < 0) return mrb_nil_value(); 472 } 473 if (len > clen - beg) 474 len = clen - beg; 475 if (len <= 0) { 476 len = 0; 477 } 478 return str_subseq(mrb, str, beg, len); 479 } 480 481 static mrb_int 482 str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset) 677 return mrb_str_beg_len(RSTRING_CHAR_LEN(str), &beg, &len) ? 678 str_subseq(mrb, str, beg, len) : mrb_nil_value(); 679 } 680 681 MRB_API mrb_int 682 mrb_str_index(mrb_state *mrb, mrb_value str, const char *sptr, mrb_int slen, mrb_int offset) 483 683 { 484 684 mrb_int pos; 485 char *s , *sptr;486 mrb_int len , slen;685 char *s; 686 mrb_int len; 487 687 488 688 len = RSTRING_LEN(str); 489 slen = RSTRING_LEN(sub);490 689 if (offset < 0) { 491 690 offset += len; … … 499 698 if (slen == 0) return offset; 500 699 /* need proceed one character at a time */ 501 sptr = RSTRING_PTR(sub);502 slen = RSTRING_LEN(sub);503 700 len = RSTRING_LEN(str) - offset; 504 701 pos = mrb_memsearch(sptr, slen, s, len); … … 507 704 } 508 705 509 static void 510 check_frozen(mrb_state *mrb, struct RString *s) 511 { 512 if (MRB_FROZEN_P(s)) { 513 mrb_raise(mrb, E_RUNTIME_ERROR, "can't modify frozen string"); 514 } 706 static mrb_int 707 str_index_str(mrb_state *mrb, mrb_value str, mrb_value str2, mrb_int offset) 708 { 709 const char *ptr; 710 mrb_int len; 711 712 ptr = RSTRING_PTR(str2); 713 len = RSTRING_LEN(str2); 714 715 return mrb_str_index(mrb, str, ptr, len, offset); 515 716 } 516 717 … … 518 719 str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2) 519 720 { 520 longlen;521 522 check_frozen(mrb, s1);721 size_t len; 722 723 mrb_check_frozen(mrb, s1); 523 724 if (s1 == s2) return mrb_obj_value(s1); 524 s1->flags &= ~MRB_STR_NO_UTF; 525 s1->flags |= s2->flags&MRB_STR_NO_UTF; 526 len = RSTR_LEN(s2); 725 RSTR_COPY_ASCII_FLAG(s1, s2); 527 726 if (RSTR_SHARED_P(s1)) { 528 727 str_decref(mrb, s1->as.heap.aux.shared); 529 728 } 530 else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) { 729 else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1) && !RSTR_FSHARED_P(s1) 730 && s1->as.heap.ptr) { 531 731 mrb_free(mrb, s1->as.heap.ptr); 532 732 } 533 733 534 RSTR_UNSET_NOFREE_FLAG(s1); 535 536 if (RSTR_SHARED_P(s2)) { 537 L_SHARE: 538 RSTR_UNSET_EMBED_FLAG(s1); 539 s1->as.heap.ptr = s2->as.heap.ptr; 540 s1->as.heap.len = len; 541 s1->as.heap.aux.shared = s2->as.heap.aux.shared; 542 RSTR_SET_SHARED_FLAG(s1); 543 s1->as.heap.aux.shared->refcnt++; 734 len = (size_t)RSTR_LEN(s2); 735 if (RSTR_EMBEDDABLE_P(len)) { 736 str_init_embed(s1, RSTR_PTR(s2), len); 544 737 } 545 738 else { 546 if (len <= RSTRING_EMBED_LEN_MAX) { 547 RSTR_UNSET_SHARED_FLAG(s1); 548 RSTR_SET_EMBED_FLAG(s1); 549 memcpy(s1->as.ary, RSTR_PTR(s2), len); 550 RSTR_SET_EMBED_LEN(s1, len); 551 } 552 else { 553 str_make_shared(mrb, s2); 554 goto L_SHARE; 555 } 739 str_share(mrb, s2, s1); 556 740 } 557 741 … … 562 746 str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos) 563 747 { 564 c har *s, *sbeg, *t;748 const char *s, *sbeg, *t; 565 749 struct RString *ps = mrb_str_ptr(str); 566 750 mrb_int len = RSTRING_LEN(sub); … … 575 759 t = RSTRING_PTR(sub); 576 760 if (len) { 761 s = char_adjust(sbeg, sbeg + RSTR_LEN(ps), s); 577 762 while (sbeg <= s) { 578 763 if (memcmp(s, t, len) == 0) { 579 return s - RSTR_PTR(ps);580 } 581 s --;764 return (mrb_int)(s - RSTR_PTR(ps)); 765 } 766 s = char_backtrack(sbeg, s); 582 767 } 583 768 return -1; … … 607 792 608 793 char* 609 mrb_utf8_from_locale(const char *str, size_t len)794 mrb_utf8_from_locale(const char *str, int len) 610 795 { 611 796 wchar_t* wcsp; 612 797 char* mbsp; 613 size_t mbssize, wcssize;798 int mbssize, wcssize; 614 799 615 800 if (len == 0) 616 801 return strdup(""); 617 802 if (len == -1) 618 len = strlen(str);803 len = (int)strlen(str); 619 804 wcssize = MultiByteToWideChar(GetACP(), 0, str, len, NULL, 0); 620 805 wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t)); … … 637 822 638 823 char* 639 mrb_locale_from_utf8(const char *utf8, size_t len)824 mrb_locale_from_utf8(const char *utf8, int len) 640 825 { 641 826 wchar_t* wcsp; 642 827 char* mbsp; 643 size_t mbssize, wcssize;828 int mbssize, wcssize; 644 829 645 830 if (len == 0) 646 831 return strdup(""); 647 832 if (len == -1) 648 len = strlen(utf8);833 len = (int)strlen(utf8); 649 834 wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, NULL, 0); 650 835 wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t)); … … 667 852 668 853 MRB_API void 854 mrb_str_modify_keep_ascii(mrb_state *mrb, struct RString *s) 855 { 856 mrb_check_frozen(mrb, s); 857 str_modify_keep_ascii(mrb, s); 858 } 859 860 MRB_API void 669 861 mrb_str_modify(mrb_state *mrb, struct RString *s) 670 862 { 671 check_frozen(mrb, s); 672 s->flags &= ~MRB_STR_NO_UTF; 673 if (RSTR_SHARED_P(s)) { 674 mrb_shared_string *shared = s->as.heap.aux.shared; 675 676 if (shared->nofree == 0 && shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) { 677 s->as.heap.ptr = shared->ptr; 678 s->as.heap.aux.capa = shared->len; 679 RSTR_PTR(s)[s->as.heap.len] = '\0'; 680 mrb_free(mrb, shared); 681 } 682 else { 683 char *ptr, *p; 684 mrb_int len; 685 686 p = RSTR_PTR(s); 687 len = s->as.heap.len; 688 if (len < RSTRING_EMBED_LEN_MAX) { 689 RSTR_SET_EMBED_FLAG(s); 690 RSTR_SET_EMBED_LEN(s, len); 691 ptr = RSTR_PTR(s); 692 } 693 else { 694 ptr = (char *)mrb_malloc(mrb, (size_t)len + 1); 695 s->as.heap.ptr = ptr; 696 s->as.heap.aux.capa = len; 697 } 698 if (p) { 699 memcpy(ptr, p, len); 700 } 701 ptr[len] = '\0'; 702 str_decref(mrb, shared); 703 } 704 RSTR_UNSET_SHARED_FLAG(s); 705 return; 706 } 707 if (RSTR_NOFREE_P(s)) { 708 char *p = s->as.heap.ptr; 709 mrb_int len = s->as.heap.len; 710 711 RSTR_UNSET_NOFREE_FLAG(s); 712 if (len < RSTRING_EMBED_LEN_MAX) { 713 RSTR_SET_EMBED_FLAG(s); 714 RSTR_SET_EMBED_LEN(s, len); 715 } 716 else { 717 s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)len+1); 718 s->as.heap.aux.capa = len; 719 } 720 if (p) { 721 memcpy(RSTR_PTR(s), p, len); 722 } 723 RSTR_PTR(s)[len] = '\0'; 724 return; 725 } 863 mrb_str_modify_keep_ascii(mrb, s); 864 RSTR_UNSET_ASCII_FLAG(s); 726 865 } 727 866 … … 732 871 struct RString *s = mrb_str_ptr(str); 733 872 873 if (len < 0) { 874 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative (or overflowed) string size"); 875 } 734 876 mrb_str_modify(mrb, s); 735 877 slen = RSTR_LEN(s); … … 749 891 struct RString *s; 750 892 751 if (!mrb_string_p(str0)) { 752 mrb_raise(mrb, E_TYPE_ERROR, "expected String"); 753 } 754 893 check_null_byte(mrb, str0); 755 894 s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0)); 756 if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) {757 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");758 }759 895 return RSTR_PTR(s); 760 896 } 761 897 762 /*763 * call-seq: (Caution! String("abcd") change)764 * String("abcdefg") = String("abcd") + String("efg")765 *766 * Returns a new string object containing a copy of <i>str</i>.767 */768 898 MRB_API void 769 899 mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other) 770 900 { 771 struct RString *s1 = mrb_str_ptr(self), *s2; 772 mrb_int len; 773 774 mrb_str_modify(mrb, s1); 775 if (!mrb_string_p(other)) { 776 other = mrb_str_to_str(mrb, other); 777 } 778 s2 = mrb_str_ptr(other); 779 if (RSTR_LEN(s2) == 0) { 780 return; 781 } 782 len = RSTR_LEN(s1) + RSTR_LEN(s2); 783 784 if (len < 0 || len >= MRB_INT_MAX) { 785 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); 786 } 787 if (RSTRING_CAPA(self) < len) { 788 resize_capa(mrb, s1, len); 789 } 790 memcpy(RSTR_PTR(s1)+RSTR_LEN(s1), RSTR_PTR(s2), RSTR_LEN(s2)); 791 RSTR_SET_LEN(s1, len); 792 RSTR_PTR(s1)[len] = '\0'; 793 } 794 795 /* 796 * call-seq: (Caution! String("abcd") remain) 797 * String("abcdefg") = String("abcd") + String("efg") 798 * 799 * Returns a new string object containing a copy of <i>str</i>. 800 */ 901 other = mrb_str_to_str(mrb, other); 902 mrb_str_cat_str(mrb, self, other); 903 } 904 801 905 MRB_API mrb_value 802 906 mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b) … … 816 920 817 921 /* 818 * call-seq: (Caution! String("abcd") remain) for stack_argument 819 * String("abcdefg") = String("abcd") + String("efg") 820 * 821 * Returns a new string object containing a copy of <i>str</i>. 922 * call-seq: 923 * str + other_str -> new_str 924 * 925 * Concatenation---Returns a new <code>String</code> containing 926 * <i>other_str</i> concatenated to <i>str</i>. 927 * 928 * "Hello from " + self.to_s #=> "Hello from main" 822 929 */ 823 930 static mrb_value … … 873 980 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument"); 874 981 } 875 if (times && MRB_ INT_MAX / times < RSTRING_LEN(self)) {982 if (times && MRB_SSIZE_MAX / times < RSTRING_LEN(self)) { 876 983 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big"); 877 984 } … … 879 986 len = RSTRING_LEN(self)*times; 880 987 str2 = str_new(mrb, 0, len); 881 str_with_class( mrb,str2, self);988 str_with_class(str2, self); 882 989 p = RSTR_PTR(str2); 883 990 if (len > 0) { … … 891 998 } 892 999 p[RSTR_LEN(str2)] = '\0'; 1000 RSTR_COPY_ASCII_FLAG(str2, mrb_str_ptr(self)); 893 1001 894 1002 return mrb_obj_value(str2); … … 959 1067 mrb_get_args(mrb, "o", &str2); 960 1068 if (!mrb_string_p(str2)) { 961 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) { 962 return mrb_nil_value(); 963 } 964 else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) { 965 return mrb_nil_value(); 966 } 967 else { 968 mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1); 969 970 if (!mrb_nil_p(tmp)) return mrb_nil_value(); 971 if (!mrb_fixnum_p(tmp)) { 972 return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp); 973 } 974 result = -mrb_fixnum(tmp); 975 } 1069 return mrb_nil_value(); 976 1070 } 977 1071 else { … … 995 1089 mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2) 996 1090 { 997 if (mrb_immediate_p(str2)) return FALSE; 998 if (!mrb_string_p(str2)) { 999 if (mrb_nil_p(str2)) return FALSE; 1000 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) { 1001 return FALSE; 1002 } 1003 str2 = mrb_funcall(mrb, str2, "to_str", 0); 1004 return mrb_equal(mrb, str2, str1); 1005 } 1091 if (!mrb_string_p(str2)) return FALSE; 1006 1092 return str_eql(mrb, str1, str2); 1007 1093 } … … 1028 1114 } 1029 1115 /* ---------------------------------- */ 1116 1030 1117 MRB_API mrb_value 1031 1118 mrb_str_to_str(mrb_state *mrb, mrb_value str) 1032 1119 { 1033 mrb_value s; 1034 1035 if (!mrb_string_p(str)) { 1036 s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str"); 1037 if (mrb_nil_p(s)) { 1038 s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); 1039 } 1040 return s; 1041 } 1042 return str; 1043 } 1044 1120 switch (mrb_type(str)) { 1121 case MRB_TT_STRING: 1122 return str; 1123 case MRB_TT_SYMBOL: 1124 return mrb_sym_str(mrb, mrb_symbol(str)); 1125 case MRB_TT_FIXNUM: 1126 return mrb_fixnum_to_str(mrb, str, 10); 1127 case MRB_TT_CLASS: 1128 case MRB_TT_MODULE: 1129 return mrb_mod_to_s(mrb, str); 1130 default: 1131 return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s"); 1132 } 1133 } 1134 1135 /* obslete: use RSTRING_PTR() */ 1045 1136 MRB_API const char* 1046 mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)1047 { 1048 mrb_value str = mrb_str_to_str(mrb, ptr);1137 mrb_string_value_ptr(mrb_state *mrb, mrb_value str) 1138 { 1139 str = mrb_str_to_str(mrb, str); 1049 1140 return RSTRING_PTR(str); 1050 1141 } 1051 1142 1143 /* obslete: use RSTRING_LEN() */ 1052 1144 MRB_API mrb_int 1053 1145 mrb_string_value_len(mrb_state *mrb, mrb_value ptr) 1054 1146 { 1055 mrb_value str = mrb_str_to_str(mrb, ptr); 1056 return RSTRING_LEN(str); 1057 } 1058 1059 void 1060 mrb_noregexp(mrb_state *mrb, mrb_value self) 1061 { 1062 mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented"); 1063 } 1064 1065 void 1066 mrb_regexp_check(mrb_state *mrb, mrb_value obj) 1067 { 1068 if (mrb_regexp_p(mrb, obj)) { 1069 mrb_noregexp(mrb, obj); 1070 } 1147 mrb_to_str(mrb, ptr); 1148 return RSTRING_LEN(ptr); 1071 1149 } 1072 1150 … … 1077 1155 struct RString *dup = str_new(mrb, 0, 0); 1078 1156 1079 str_with_class( mrb,dup, str);1157 str_with_class(dup, str); 1080 1158 return str_replace(mrb, dup, s); 1081 1159 } 1082 1160 1083 static mrb_value 1084 mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx) 1085 { 1086 mrb_int idx; 1087 1088 mrb_regexp_check(mrb, indx); 1089 switch (mrb_type(indx)) { 1090 case MRB_TT_FIXNUM: 1091 idx = mrb_fixnum(indx); 1092 1093 num_index: 1094 str = str_substr(mrb, str, idx, 1); 1095 if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); 1096 return str; 1097 1098 case MRB_TT_STRING: 1099 if (str_index(mrb, str, indx, 0) != -1) 1100 return mrb_str_dup(mrb, indx); 1101 return mrb_nil_value(); 1102 1103 case MRB_TT_RANGE: 1104 goto range_arg; 1105 1106 default: 1107 indx = mrb_Integer(mrb, indx); 1108 if (mrb_nil_p(indx)) { 1109 range_arg: 1110 { 1111 mrb_int beg, len; 1112 1113 len = RSTRING_CHAR_LEN(str); 1114 switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) { 1115 case 1: 1116 return str_subseq(mrb, str, beg, len); 1117 case 2: 1118 return mrb_nil_value(); 1161 enum str_convert_range { 1162 /* `beg` and `len` are byte unit in `0 ... str.bytesize` */ 1163 STR_BYTE_RANGE_CORRECTED = 1, 1164 1165 /* `beg` and `len` are char unit in any range */ 1166 STR_CHAR_RANGE = 2, 1167 1168 /* `beg` and `len` are char unit in `0 ... str.size` */ 1169 STR_CHAR_RANGE_CORRECTED = 3, 1170 1171 /* `beg` is out of range */ 1172 STR_OUT_OF_RANGE = -1 1173 }; 1174 1175 static enum str_convert_range 1176 str_convert_range(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_int *beg, mrb_int *len) 1177 { 1178 if (!mrb_undef_p(alen)) { 1179 *beg = mrb_int(mrb, indx); 1180 *len = mrb_int(mrb, alen); 1181 return STR_CHAR_RANGE; 1182 } 1183 else { 1184 switch (mrb_type(indx)) { 1185 case MRB_TT_FIXNUM: 1186 *beg = mrb_fixnum(indx); 1187 *len = 1; 1188 return STR_CHAR_RANGE; 1189 1190 case MRB_TT_STRING: 1191 *beg = str_index_str(mrb, str, indx, 0); 1192 if (*beg < 0) { break; } 1193 *len = RSTRING_LEN(indx); 1194 return STR_BYTE_RANGE_CORRECTED; 1195 1196 case MRB_TT_RANGE: 1197 goto range_arg; 1198 1199 default: 1200 indx = mrb_to_int(mrb, indx); 1201 if (mrb_fixnum_p(indx)) { 1202 *beg = mrb_fixnum(indx); 1203 *len = 1; 1204 return STR_CHAR_RANGE; 1205 } 1206 range_arg: 1207 *len = RSTRING_CHAR_LEN(str); 1208 switch (mrb_range_beg_len(mrb, indx, beg, len, *len, TRUE)) { 1209 case MRB_RANGE_OK: 1210 return STR_CHAR_RANGE_CORRECTED; 1211 case MRB_RANGE_OUT: 1212 return STR_OUT_OF_RANGE; 1119 1213 default: 1120 1214 break; 1121 }1122 1215 } 1216 1123 1217 mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum"); 1124 } 1125 idx = mrb_fixnum(indx); 1126 goto num_index; 1127 } 1128 return mrb_nil_value(); /* not reached */ 1218 } 1219 } 1220 return STR_OUT_OF_RANGE; 1221 } 1222 1223 static mrb_value 1224 mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen) 1225 { 1226 mrb_int beg, len; 1227 1228 switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) { 1229 case STR_CHAR_RANGE_CORRECTED: 1230 return str_subseq(mrb, str, beg, len); 1231 case STR_CHAR_RANGE: 1232 str = str_substr(mrb, str, beg, len); 1233 if (mrb_undef_p(alen) && !mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value(); 1234 return str; 1235 case STR_BYTE_RANGE_CORRECTED: 1236 if (mrb_string_p(indx)) { 1237 return mrb_str_dup(mrb, indx); 1238 } 1239 else { 1240 return mrb_str_byte_subseq(mrb, str, beg, len); 1241 } 1242 case STR_OUT_OF_RANGE: 1243 default: 1244 return mrb_nil_value(); 1245 } 1129 1246 } 1130 1247 … … 1136 1253 * str[fixnum, fixnum] => new_str or nil 1137 1254 * str[range] => new_str or nil 1138 * str[regexp] => new_str or nil1139 * str[regexp, fixnum] => new_str or nil1140 1255 * str[other_str] => new_str or nil 1141 1256 * str.slice(fixnum) => fixnum or nil … … 1173 1288 { 1174 1289 mrb_value a1, a2; 1175 int argc; 1176 1177 argc = mrb_get_args(mrb, "o|o", &a1, &a2); 1178 if (argc == 2) { 1179 mrb_int n1, n2; 1180 1181 mrb_regexp_check(mrb, a1); 1182 mrb_get_args(mrb, "ii", &n1, &n2); 1183 return str_substr(mrb, str, n1, n2); 1184 } 1185 if (argc != 1) { 1186 mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc)); 1187 } 1188 return mrb_str_aref(mrb, str, a1); 1290 1291 if (mrb_get_args(mrb, "o|o", &a1, &a2) == 1) { 1292 a2 = mrb_undef_value(); 1293 } 1294 1295 return mrb_str_aref(mrb, str, a1, a2); 1296 } 1297 1298 static mrb_noreturn void 1299 str_out_of_index(mrb_state *mrb, mrb_value index) 1300 { 1301 mrb_raisef(mrb, E_INDEX_ERROR, "index %v out of string", index); 1302 } 1303 1304 static mrb_value 1305 str_replace_partial(mrb_state *mrb, mrb_value src, mrb_int pos, mrb_int end, mrb_value rep) 1306 { 1307 const mrb_int shrink_threshold = 256; 1308 struct RString *str = mrb_str_ptr(src); 1309 mrb_int len = RSTR_LEN(str); 1310 mrb_int replen, newlen; 1311 char *strp; 1312 1313 if (end > len) { end = len; } 1314 1315 if (pos < 0 || pos > len) { 1316 str_out_of_index(mrb, mrb_fixnum_value(pos)); 1317 } 1318 1319 replen = (mrb_nil_p(rep) ? 0 : RSTRING_LEN(rep)); 1320 newlen = replen + len - (end - pos); 1321 1322 if (newlen >= MRB_SSIZE_MAX || newlen < replen /* overflowed */) { 1323 mrb_raise(mrb, E_RUNTIME_ERROR, "string size too big"); 1324 } 1325 1326 mrb_str_modify(mrb, str); 1327 1328 if (len < newlen) { 1329 resize_capa(mrb, str, newlen); 1330 } 1331 1332 strp = RSTR_PTR(str); 1333 1334 memmove(strp + newlen - (len - end), strp + end, len - end); 1335 if (!mrb_nil_p(rep)) { 1336 memmove(strp + pos, RSTRING_PTR(rep), replen); 1337 } 1338 RSTR_SET_LEN(str, newlen); 1339 strp[newlen] = '\0'; 1340 1341 if (len - newlen >= shrink_threshold) { 1342 resize_capa(mrb, str, newlen); 1343 } 1344 1345 return src; 1346 } 1347 1348 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{')) 1349 1350 static mrb_value 1351 str_escape(mrb_state *mrb, mrb_value str, mrb_bool inspect) 1352 { 1353 const char *p, *pend; 1354 char buf[4]; /* `\x??` or UTF-8 character */ 1355 mrb_value result = mrb_str_new_lit(mrb, "\""); 1356 #ifdef MRB_UTF8_STRING 1357 uint32_t ascii_flag = MRB_STR_ASCII; 1358 #endif 1359 1360 p = RSTRING_PTR(str); pend = RSTRING_END(str); 1361 for (;p < pend; p++) { 1362 unsigned char c, cc; 1363 #ifdef MRB_UTF8_STRING 1364 if (inspect) { 1365 mrb_int clen = mrb_utf8len(p, pend); 1366 if (clen > 1) { 1367 mrb_int i; 1368 1369 for (i=0; i<clen; i++) { 1370 buf[i] = p[i]; 1371 } 1372 mrb_str_cat(mrb, result, buf, clen); 1373 p += clen-1; 1374 ascii_flag = 0; 1375 continue; 1376 } 1377 } 1378 #endif 1379 c = *p; 1380 if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) { 1381 buf[0] = '\\'; buf[1] = c; 1382 mrb_str_cat(mrb, result, buf, 2); 1383 continue; 1384 } 1385 if (ISPRINT(c)) { 1386 buf[0] = c; 1387 mrb_str_cat(mrb, result, buf, 1); 1388 continue; 1389 } 1390 switch (c) { 1391 case '\n': cc = 'n'; break; 1392 case '\r': cc = 'r'; break; 1393 case '\t': cc = 't'; break; 1394 case '\f': cc = 'f'; break; 1395 case '\013': cc = 'v'; break; 1396 case '\010': cc = 'b'; break; 1397 case '\007': cc = 'a'; break; 1398 case 033: cc = 'e'; break; 1399 default: cc = 0; break; 1400 } 1401 if (cc) { 1402 buf[0] = '\\'; 1403 buf[1] = (char)cc; 1404 mrb_str_cat(mrb, result, buf, 2); 1405 continue; 1406 } 1407 else { 1408 buf[0] = '\\'; 1409 buf[1] = 'x'; 1410 buf[3] = mrb_digitmap[c % 16]; c /= 16; 1411 buf[2] = mrb_digitmap[c % 16]; 1412 mrb_str_cat(mrb, result, buf, 4); 1413 continue; 1414 } 1415 } 1416 mrb_str_cat_lit(mrb, result, "\""); 1417 #ifdef MRB_UTF8_STRING 1418 if (inspect) { 1419 mrb_str_ptr(str)->flags |= ascii_flag; 1420 mrb_str_ptr(result)->flags |= ascii_flag; 1421 } 1422 else { 1423 RSTR_SET_ASCII_FLAG(mrb_str_ptr(result)); 1424 } 1425 #endif 1426 1427 return result; 1428 } 1429 1430 static void 1431 mrb_str_aset(mrb_state *mrb, mrb_value str, mrb_value indx, mrb_value alen, mrb_value replace) 1432 { 1433 mrb_int beg, len, charlen; 1434 1435 mrb_to_str(mrb, replace); 1436 1437 switch (str_convert_range(mrb, str, indx, alen, &beg, &len)) { 1438 case STR_OUT_OF_RANGE: 1439 default: 1440 mrb_raise(mrb, E_INDEX_ERROR, "string not matched"); 1441 case STR_CHAR_RANGE: 1442 if (len < 0) { 1443 mrb_raisef(mrb, E_INDEX_ERROR, "negative length %v", alen); 1444 } 1445 charlen = RSTRING_CHAR_LEN(str); 1446 if (beg < 0) { beg += charlen; } 1447 if (beg < 0 || beg > charlen) { str_out_of_index(mrb, indx); } 1448 /* fall through */ 1449 case STR_CHAR_RANGE_CORRECTED: 1450 str_range_to_bytes(str, &beg, &len); 1451 /* fall through */ 1452 case STR_BYTE_RANGE_CORRECTED: 1453 str_replace_partial(mrb, str, beg, beg + len, replace); 1454 } 1455 } 1456 1457 /* 1458 * call-seq: 1459 * str[fixnum] = replace 1460 * str[fixnum, fixnum] = replace 1461 * str[range] = replace 1462 * str[other_str] = replace 1463 * 1464 * Modify +self+ by replacing the content of +self+. 1465 * The portion of the string affected is determined using the same criteria as +String#[]+. 1466 */ 1467 static mrb_value 1468 mrb_str_aset_m(mrb_state *mrb, mrb_value str) 1469 { 1470 mrb_value indx, alen, replace; 1471 1472 switch (mrb_get_args(mrb, "oo|S!", &indx, &alen, &replace)) { 1473 case 2: 1474 replace = alen; 1475 alen = mrb_undef_value(); 1476 break; 1477 case 3: 1478 break; 1479 } 1480 mrb_str_aset(mrb, str, indx, alen, replace); 1481 return str; 1189 1482 } 1190 1483 … … 1209 1502 struct RString *s = mrb_str_ptr(str); 1210 1503 1211 mrb_str_modify (mrb, s);1504 mrb_str_modify_keep_ascii(mrb, s); 1212 1505 if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value(); 1213 1506 p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s); … … 1267 1560 struct RString *s = mrb_str_ptr(str); 1268 1561 1269 mrb_str_modify(mrb, s);1270 1562 argc = mrb_get_args(mrb, "|S", &rs); 1563 mrb_str_modify_keep_ascii(mrb, s); 1271 1564 len = RSTR_LEN(s); 1272 1565 if (argc == 0) { … … 1330 1623 * 1331 1624 * Returns a new <code>String</code> with the given record separator removed 1332 * from the end of <i>str</i> (if present). If <code>$/</code> has not been 1333 * changed from the default Ruby record separator, then <code>chomp</code> also 1334 * removes carriage return characters (that is it will remove <code>\n</code>, 1625 * from the end of <i>str</i> (if present). <code>chomp</code> also removes 1626 * carriage return characters (that is it will remove <code>\n</code>, 1335 1627 * <code>\r</code>, and <code>\r\n</code>). 1336 1628 * … … 1367 1659 struct RString *s = mrb_str_ptr(str); 1368 1660 1369 mrb_str_modify (mrb, s);1661 mrb_str_modify_keep_ascii(mrb, s); 1370 1662 if (RSTR_LEN(s) > 0) { 1371 1663 mrb_int len; … … 1374 1666 const char* e = p + RSTR_LEN(s); 1375 1667 while (p<e) { 1376 mrb_int clen = utf8len(p, e);1668 mrb_int clen = mrb_utf8len(p, e); 1377 1669 if (p + clen>=e) break; 1378 1670 p += clen; … … 1436 1728 struct RString *s = mrb_str_ptr(str); 1437 1729 1438 mrb_str_modify (mrb, s);1730 mrb_str_modify_keep_ascii(mrb, s); 1439 1731 p = RSTR_PTR(s); 1440 1732 pend = RSTR_PTR(s) + RSTR_LEN(s); … … 1504 1796 1505 1797 mrb_get_args(mrb, "o", &str2); 1506 eql_p = (mrb_ type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);1798 eql_p = (mrb_string_p(str2)) && str_eql(mrb, self, str2); 1507 1799 1508 1800 return mrb_bool_value(eql_p); … … 1515 1807 } 1516 1808 1517 mrb_int1809 uint32_t 1518 1810 mrb_str_hash(mrb_state *mrb, mrb_value str) 1519 1811 { … … 1522 1814 mrb_int len = RSTR_LEN(s); 1523 1815 char *p = RSTR_PTR(s); 1524 mrb_int key = 0;1816 uint64_t key = 0; 1525 1817 1526 1818 while (len--) { … … 1528 1820 p++; 1529 1821 } 1530 return key + (key>>5);1822 return (uint32_t)(key + (key>>5)); 1531 1823 } 1532 1824 … … 1564 1856 1565 1857 mrb_get_args(mrb, "S", &str2); 1566 if (str_index (mrb, self, str2, 0) < 0)1858 if (str_index_str(mrb, self, str2, 0) < 0) 1567 1859 return mrb_bool_value(FALSE); 1568 1860 return mrb_bool_value(TRUE); … … 1573 1865 * call-seq: 1574 1866 * str.index(substring [, offset]) => fixnum or nil 1575 * str.index(fixnum [, offset]) => fixnum or nil1576 * str.index(regexp [, offset]) => fixnum or nil1577 1867 * 1578 1868 * Returns the index of the first occurrence of the given 1579 * <i>substring</i>, 1580 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. 1581 * Returns 1582 * <code>nil</code> if not found. 1869 * <i>substring</i>. Returns <code>nil</code> if not found. 1583 1870 * If the second parameter is present, it 1584 1871 * specifies the position in the string to begin the search. 1585 1872 * 1586 * "hello".index(' e') #=> 11873 * "hello".index('l') #=> 2 1587 1874 * "hello".index('lo') #=> 3 1588 1875 * "hello".index('a') #=> nil 1589 * "hello".index(101) #=> 1(101=0x65='e') 1590 * "hello".index(/[aeiou]/, -3) #=> 4 1591 */ 1592 static mrb_value 1593 mrb_str_index(mrb_state *mrb, mrb_value str) 1594 { 1595 mrb_value *argv; 1596 mrb_int argc; 1876 * "hello".index('l', -2) #=> 3 1877 */ 1878 static mrb_value 1879 mrb_str_index_m(mrb_state *mrb, mrb_value str) 1880 { 1597 1881 mrb_value sub; 1598 mrb_int pos, clen; 1599 1600 mrb_get_args(mrb, "*", &argv, &argc); 1601 if (argc == 2) { 1602 mrb_get_args(mrb, "oi", &sub, &pos); 1603 } 1604 else { 1882 mrb_int pos; 1883 1884 if (mrb_get_args(mrb, "S|i", &sub, &pos) == 1) { 1605 1885 pos = 0; 1606 if (argc > 0) 1607 sub = argv[0]; 1608 else 1609 sub = mrb_nil_value(); 1610 } 1611 mrb_regexp_check(mrb, sub); 1612 clen = RSTRING_CHAR_LEN(str); 1613 if (pos < 0) { 1886 } 1887 else if (pos < 0) { 1888 mrb_int clen = RSTRING_CHAR_LEN(str); 1614 1889 pos += clen; 1615 1890 if (pos < 0) { … … 1617 1892 } 1618 1893 } 1619 if (pos > clen) return mrb_nil_value(); 1620 pos = chars2bytes(str, 0, pos); 1621 1622 switch (mrb_type(sub)) { 1623 default: { 1624 mrb_value tmp; 1625 1626 tmp = mrb_check_string_type(mrb, sub); 1627 if (mrb_nil_p(tmp)) { 1628 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); 1629 } 1630 sub = tmp; 1631 } 1632 /* fall through */ 1633 case MRB_TT_STRING: 1634 pos = str_index(mrb, str, sub, pos); 1635 break; 1636 } 1894 pos = str_index_str_by_char(mrb, str, sub, pos); 1637 1895 1638 1896 if (pos == -1) return mrb_nil_value(); 1639 pos = bytes2chars(RSTRING_PTR(str), pos);1640 1897 BYTES_ALIGN_CHECK(pos); 1641 1898 return mrb_fixnum_value(pos); 1642 1899 } 1643 1644 #define STR_REPLACE_SHARED_MIN 101645 1900 1646 1901 /* 15.2.10.5.24 */ … … 1712 1967 mrb_obj_as_string(mrb_state *mrb, mrb_value obj) 1713 1968 { 1714 mrb_value str;1715 1716 1969 if (mrb_string_p(obj)) { 1717 1970 return obj; 1718 1971 } 1719 str = mrb_funcall(mrb, obj, "to_s", 0); 1720 if (!mrb_string_p(str)) 1721 return mrb_any_to_s(mrb, obj); 1722 return str; 1972 return mrb_str_to_str(mrb, obj); 1723 1973 } 1724 1974 … … 1753 2003 } 1754 2004 1755 MRB_API mrb_value 1756 mrb_string_type(mrb_state *mrb, mrb_value str)1757 { 1758 return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");1759 } 1760 1761 MRB_API mrb_value 1762 mrb_check_string_type(mrb_state *mrb, mrb_value str) 1763 { 1764 return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");2005 static inline void 2006 str_reverse(char *p, char *e) 2007 { 2008 char c; 2009 2010 while (p < e) { 2011 c = *p; 2012 *p++ = *e; 2013 *e-- = c; 2014 } 1765 2015 } 1766 2016 … … 1775 2025 mrb_str_reverse_bang(mrb_state *mrb, mrb_value str) 1776 2026 { 2027 struct RString *s = mrb_str_ptr(str); 2028 char *p, *e; 2029 1777 2030 #ifdef MRB_UTF8_STRING 1778 2031 mrb_int utf8_len = RSTRING_CHAR_LEN(str); 1779 mrb_int len = RSTRING_LEN(str); 1780 1781 if (utf8_len == len) goto bytes; 1782 if (utf8_len > 1) { 1783 char *buf; 1784 char *p, *e, *r; 1785 1786 mrb_str_modify(mrb, mrb_str_ptr(str)); 1787 len = RSTRING_LEN(str); 1788 buf = (char*)mrb_malloc(mrb, (size_t)len); 1789 p = buf; 1790 e = buf + len; 1791 1792 memcpy(buf, RSTRING_PTR(str), len); 1793 r = RSTRING_PTR(str) + len; 1794 2032 mrb_int len = RSTR_LEN(s); 2033 2034 if (utf8_len < 2) return str; 2035 if (utf8_len < len) { 2036 mrb_str_modify(mrb, s); 2037 p = RSTR_PTR(s); 2038 e = p + RSTR_LEN(s); 1795 2039 while (p<e) { 1796 mrb_int clen = utf8len(p, e); 1797 r -= clen; 1798 memcpy(r, p, clen); 2040 mrb_int clen = mrb_utf8len(p, e); 2041 str_reverse(p, p + clen - 1); 1799 2042 p += clen; 1800 2043 } 1801 mrb_free(mrb, buf); 2044 goto bytes; 2045 } 2046 #endif 2047 2048 if (RSTR_LEN(s) > 1) { 2049 mrb_str_modify(mrb, s); 2050 goto bytes; 1802 2051 } 1803 2052 return str; 1804 2053 1805 2054 bytes: 1806 #endif 1807 { 1808 struct RString *s = mrb_str_ptr(str); 1809 char *p, *e; 1810 char c; 1811 1812 mrb_str_modify(mrb, s); 1813 if (RSTR_LEN(s) > 1) { 1814 p = RSTR_PTR(s); 1815 e = p + RSTR_LEN(s) - 1; 1816 while (p < e) { 1817 c = *p; 1818 *p++ = *e; 1819 *e-- = c; 1820 } 1821 } 1822 return str; 1823 } 2055 p = RSTR_PTR(s); 2056 e = p + RSTR_LEN(s) - 1; 2057 str_reverse(p, e); 2058 return str; 1824 2059 } 1825 2060 … … 1845 2080 /* 1846 2081 * call-seq: 1847 * str.rindex(substring [, fixnum]) => fixnum or nil 1848 * str.rindex(fixnum [, fixnum]) => fixnum or nil 1849 * str.rindex(regexp [, fixnum]) => fixnum or nil 1850 * 1851 * Returns the index of the last occurrence of the given <i>substring</i>, 1852 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns 1853 * <code>nil</code> if not found. If the second parameter is present, it 1854 * specifies the position in the string to end the search---characters beyond 1855 * this point will not be considered. 2082 * str.rindex(substring [, offset]) => fixnum or nil 2083 * 2084 * Returns the index of the last occurrence of the given <i>substring</i>. 2085 * Returns <code>nil</code> if not found. If the second parameter is 2086 * present, it specifies the position in the string to end the 2087 * search---characters beyond this point will not be considered. 1856 2088 * 1857 2089 * "hello".rindex('e') #=> 1 1858 2090 * "hello".rindex('l') #=> 3 1859 2091 * "hello".rindex('a') #=> nil 1860 * "hello".rindex(101) #=> 1 1861 * "hello".rindex(/[aeiou]/, -2) #=> 1 2092 * "hello".rindex('l', 2) #=> 2 1862 2093 */ 1863 2094 static mrb_value 1864 2095 mrb_str_rindex(mrb_state *mrb, mrb_value str) 1865 2096 { 1866 mrb_value *argv;1867 mrb_int argc;1868 2097 mrb_value sub; 1869 2098 mrb_int pos, len = RSTRING_CHAR_LEN(str); 1870 2099 1871 mrb_get_args(mrb, "*", &argv, &argc); 1872 if (argc == 2) { 1873 mrb_get_args(mrb, "oi", &sub, &pos); 2100 if (mrb_get_args(mrb, "S|i", &sub, &pos) == 1) { 2101 pos = len; 2102 } 2103 else { 1874 2104 if (pos < 0) { 1875 2105 pos += len; 1876 2106 if (pos < 0) { 1877 mrb_regexp_check(mrb, sub);1878 2107 return mrb_nil_value(); 1879 2108 } … … 1881 2110 if (pos > len) pos = len; 1882 2111 } 1883 else {1884 pos = len;1885 if (argc > 0)1886 sub = argv[0];1887 else1888 sub = mrb_nil_value();1889 }1890 2112 pos = chars2bytes(str, 0, pos); 1891 mrb_regexp_check(mrb, sub); 1892 1893 switch (mrb_type(sub)) { 1894 default: { 1895 mrb_value tmp; 1896 1897 tmp = mrb_check_string_type(mrb, sub); 1898 if (mrb_nil_p(tmp)) { 1899 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub); 1900 } 1901 sub = tmp; 1902 } 1903 /* fall through */ 1904 case MRB_TT_STRING: 1905 pos = str_rindex(mrb, str, sub, pos); 1906 if (pos >= 0) { 1907 pos = bytes2chars(RSTRING_PTR(str), pos); 1908 BYTES_ALIGN_CHECK(pos); 1909 return mrb_fixnum_value(pos); 1910 } 1911 break; 1912 1913 } /* end of switch (TYPE(sub)) */ 2113 pos = str_rindex(mrb, str, sub, pos); 2114 if (pos >= 0) { 2115 pos = bytes2chars(RSTRING_PTR(str), RSTRING_LEN(str), pos); 2116 BYTES_ALIGN_CHECK(pos); 2117 return mrb_fixnum_value(pos); 2118 } 1914 2119 return mrb_nil_value(); 1915 2120 } … … 1919 2124 /* 1920 2125 * call-seq: 1921 * str.split( pattern="\n", [limit]) => anArray2126 * str.split(separator=nil, [limit]) => anArray 1922 2127 * 1923 2128 * Divides <i>str</i> into substrings based on a delimiter, returning an array 1924 2129 * of these substrings. 1925 2130 * 1926 * If <i> pattern</i> is a <code>String</code>, then its contents are used as1927 * the delimiter when splitting <i>str</i>. If <i> pattern</i> is a single2131 * If <i>separator</i> is a <code>String</code>, then its contents are used as 2132 * the delimiter when splitting <i>str</i>. If <i>separator</i> is a single 1928 2133 * space, <i>str</i> is split on whitespace, with leading whitespace and runs 1929 2134 * of contiguous whitespace characters ignored. 1930 2135 * 1931 * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the 1932 * pattern matches. Whenever the pattern matches a zero-length string, 1933 * <i>str</i> is split into individual characters. 1934 * 1935 * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If 1936 * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is 1937 * split on whitespace as if ' ' were specified. 2136 * If <i>separator</i> is omitted or <code>nil</code> (which is the default), 2137 * <i>str</i> is split on whitespace as if ' ' were specified. 1938 2138 * 1939 2139 * If the <i>limit</i> parameter is omitted, trailing null fields are … … 1946 2146 * " now's the time".split #=> ["now's", "the", "time"] 1947 2147 * " now's the time".split(' ') #=> ["now's", "the", "time"] 1948 * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]1949 * "hello".split(//) #=> ["h", "e", "l", "l", "o"]1950 * "hello".split(//, 3) #=> ["h", "e", "llo"]1951 2148 * 1952 2149 * "mellow yellow".split("ello") #=> ["m", "w y", "w"] … … 1959 2156 mrb_str_split_m(mrb_state *mrb, mrb_value str) 1960 2157 { 1961 int argc;2158 mrb_int argc; 1962 2159 mrb_value spat = mrb_nil_value(); 1963 enum {awk, string , regexp} split_type = string;2160 enum {awk, string} split_type = string; 1964 2161 mrb_int i = 0; 1965 2162 mrb_int beg; … … 1983 2180 split_type = awk; 1984 2181 } 1985 else { 1986 if (mrb_string_p(spat)) { 1987 split_type = string; 1988 if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') { 1989 split_type = awk; 1990 } 1991 } 1992 else { 1993 mrb_noregexp(mrb, str); 1994 } 2182 else if (!mrb_string_p(spat)) { 2183 mrb_raise(mrb, E_TYPE_ERROR, "expected String"); 2184 } 2185 else if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') { 2186 split_type = awk; 1995 2187 } 1996 2188 … … 2018 2210 } 2019 2211 else if (ISSPACE(c)) { 2020 mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg));2212 mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, beg, end-beg)); 2021 2213 mrb_gc_arena_restore(mrb, ai); 2022 2214 skip = TRUE; … … 2029 2221 } 2030 2222 } 2031 else if (split_type == string) {2223 else { /* split_type == string */ 2032 2224 mrb_int str_len = RSTRING_LEN(str); 2033 2225 mrb_int pat_len = RSTRING_LEN(spat); … … 2043 2235 end = chars2bytes(str, idx, 1); 2044 2236 } 2045 mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end));2237 mrb_ary_push(mrb, result, mrb_str_byte_subseq(mrb, str, idx, end)); 2046 2238 mrb_gc_arena_restore(mrb, ai); 2047 2239 idx += end + pat_len; … … 2050 2242 beg = idx; 2051 2243 } 2052 else {2053 mrb_noregexp(mrb, str);2054 }2055 2244 if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) { 2056 2245 if (RSTRING_LEN(str) == beg) { … … 2058 2247 } 2059 2248 else { 2060 tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);2249 tmp = mrb_str_byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg); 2061 2250 } 2062 2251 mrb_ary_push(mrb, result, tmp); … … 2072 2261 } 2073 2262 2074 MRB_APImrb_value2075 mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len,int base, int badcheck)2263 mrb_value 2264 mrb_str_len_to_inum(mrb_state *mrb, const char *str, mrb_int len, mrb_int base, int badcheck) 2076 2265 { 2077 2266 const char *p = str; … … 2156 2345 default: 2157 2346 if (base < 2 || 36 < base) { 2158 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix % S", mrb_fixnum_value(base));2347 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base); 2159 2348 } 2160 2349 break; … … 2183 2372 p--; 2184 2373 } 2185 if (p == pend ) {2374 if (p == pend || *p == '_') { 2186 2375 if (badcheck) goto bad; 2187 2376 return mrb_fixnum_value(0); … … 2209 2398 n += c; 2210 2399 if (n > (uint64_t)MRB_INT_MAX + (sign ? 0 : 1)) { 2211 mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer", 2212 mrb_str_new(mrb, str, pend-str)); 2400 #ifndef MRB_WITHOUT_FLOAT 2401 if (base == 10) { 2402 return mrb_float_value(mrb, mrb_str_to_dbl(mrb, mrb_str_new(mrb, str, len), badcheck)); 2403 } 2404 else 2405 #endif 2406 { 2407 mrb_raisef(mrb, E_RANGE_ERROR, "string (%l) too big for integer", str, pend-str); 2408 } 2213 2409 } 2214 2410 } 2215 2411 val = (mrb_int)n; 2216 2412 if (badcheck) { 2217 if (p == str) goto bad; /* no number */ 2413 if (p == str) goto bad; /* no number */ 2414 if (*(p - 1) == '_') goto bad; /* trailing '_' */ 2218 2415 while (p<pend && ISSPACE(*p)) p++; 2219 if (p<pend) goto bad; /* trailing garbage */2416 if (p<pend) goto bad; /* trailing garbage */ 2220 2417 } 2221 2418 … … 2225 2422 /* not reached */ 2226 2423 bad: 2227 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)", 2228 mrb_inspect(mrb, mrb_str_new(mrb, str, pend-str))); 2424 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%!l)", str, pend-str); 2229 2425 /* not reached */ 2230 2426 return mrb_fixnum_value(0); … … 2232 2428 2233 2429 MRB_API mrb_value 2234 mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, intbadcheck)2430 mrb_cstr_to_inum(mrb_state *mrb, const char *str, mrb_int base, mrb_bool badcheck) 2235 2431 { 2236 2432 return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck); 2237 2433 } 2238 2434 2435 /* obslete: use RSTRING_CSTR() or mrb_string_cstr() */ 2239 2436 MRB_API const char* 2240 2437 mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr) 2241 2438 { 2242 mrb_value str = mrb_str_to_str(mrb, *ptr); 2243 struct RString *ps = mrb_str_ptr(str); 2244 mrb_int len = mrb_str_strlen(mrb, ps); 2245 char *p = RSTR_PTR(ps); 2246 2247 if (!p || p[len] != '\0') { 2248 if (MRB_FROZEN_P(ps)) { 2249 *ptr = str = mrb_str_dup(mrb, str); 2250 ps = mrb_str_ptr(str); 2251 } 2252 mrb_str_modify(mrb, ps); 2253 return RSTR_PTR(ps); 2254 } 2255 return p; 2439 struct RString *ps; 2440 const char *p; 2441 mrb_int len; 2442 2443 check_null_byte(mrb, *ptr); 2444 ps = mrb_str_ptr(*ptr); 2445 p = RSTR_PTR(ps); 2446 len = RSTR_LEN(ps); 2447 if (p[len] == '\0') { 2448 return p; 2449 } 2450 2451 /* 2452 * Even after str_modify_keep_ascii(), NULL termination is not ensured if 2453 * RSTR_SET_LEN() is used explicitly (e.g. String#delete_suffix!). 2454 */ 2455 str_modify_keep_ascii(mrb, ps); 2456 RSTR_PTR(ps)[len] = '\0'; 2457 return RSTR_PTR(ps); 2458 } 2459 2460 MRB_API const char* 2461 mrb_string_cstr(mrb_state *mrb, mrb_value str) 2462 { 2463 return mrb_string_value_cstr(mrb, &str); 2256 2464 } 2257 2465 … … 2262 2470 mrb_int len; 2263 2471 2264 s = mrb_string_value_ptr(mrb, str); 2472 mrb_to_str(mrb, str); 2473 s = RSTRING_PTR(str); 2265 2474 len = RSTRING_LEN(str); 2266 2475 return mrb_str_len_to_inum(mrb, s, len, base, badcheck); … … 2295 2504 mrb_get_args(mrb, "|i", &base); 2296 2505 if (base < 0) { 2297 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix % S", mrb_fixnum_value(base));2506 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %i", base); 2298 2507 } 2299 2508 return mrb_str_to_inum(mrb, self, base, FALSE); 2300 2509 } 2301 2510 2302 MRB_API double 2303 mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck) 2304 { 2511 #ifndef MRB_WITHOUT_FLOAT 2512 double 2513 mrb_str_len_to_dbl(mrb_state *mrb, const char *s, size_t len, mrb_bool badcheck) 2514 { 2515 char buf[DBL_DIG * 4 + 20]; 2516 const char *p = s, *p2; 2517 const char *pend = p + len; 2305 2518 char *end; 2306 char buf[DBL_DIG * 4 + 10]; 2519 char *n; 2520 char prev = 0; 2307 2521 double d; 2308 2309 enum {max_width = 20}; 2522 mrb_bool dot = FALSE; 2310 2523 2311 2524 if (!p) return 0.0; 2312 while (ISSPACE(*p)) p++; 2313 2314 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2315 return 0.0; 2316 } 2525 while (p<pend && ISSPACE(*p)) p++; 2526 p2 = p; 2527 2528 if (pend - p > 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2529 mrb_value x; 2530 2531 if (!badcheck) return 0.0; 2532 x = mrb_str_len_to_inum(mrb, p, pend-p, 0, badcheck); 2533 if (mrb_fixnum_p(x)) 2534 d = (double)mrb_fixnum(x); 2535 else /* if (mrb_float_p(x)) */ 2536 d = mrb_float(x); 2537 return d; 2538 } 2539 while (p < pend) { 2540 if (!*p) { 2541 if (badcheck) { 2542 mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte"); 2543 /* not reached */ 2544 } 2545 pend = p; 2546 p = p2; 2547 goto nocopy; 2548 } 2549 if (!badcheck && *p == ' ') { 2550 pend = p; 2551 p = p2; 2552 goto nocopy; 2553 } 2554 if (*p == '_') break; 2555 p++; 2556 } 2557 p = p2; 2558 n = buf; 2559 while (p < pend) { 2560 char c = *p++; 2561 if (c == '.') dot = TRUE; 2562 if (c == '_') { 2563 /* remove an underscore between digits */ 2564 if (n == buf || !ISDIGIT(prev) || p == pend) { 2565 if (badcheck) goto bad; 2566 break; 2567 } 2568 } 2569 else if (badcheck && prev == '_' && !ISDIGIT(c)) goto bad; 2570 else { 2571 const char *bend = buf+sizeof(buf)-1; 2572 if (n==bend) { /* buffer overflow */ 2573 if (dot) break; /* cut off remaining fractions */ 2574 return INFINITY; 2575 } 2576 *n++ = c; 2577 } 2578 prev = c; 2579 } 2580 *n = '\0'; 2581 p = buf; 2582 pend = n; 2583 nocopy: 2317 2584 d = mrb_float_read(p, &end); 2318 2585 if (p == end) { 2319 2586 if (badcheck) { 2320 2587 bad: 2321 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(% S)", mrb_str_new_cstr(mrb, p));2588 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%!s)", s); 2322 2589 /* not reached */ 2323 2590 } 2324 2591 return d; 2325 2592 } 2326 if (*end) { 2327 char *n = buf; 2328 char *e = buf + sizeof(buf) - 1; 2329 char prev = 0; 2330 2331 while (p < end && n < e) prev = *n++ = *p++; 2332 while (*p) { 2333 if (*p == '_') { 2334 /* remove underscores between digits */ 2335 if (badcheck) { 2336 if (n == buf || !ISDIGIT(prev)) goto bad; 2337 ++p; 2338 if (!ISDIGIT(*p)) goto bad; 2339 } 2340 else { 2341 while (*++p == '_'); 2342 continue; 2343 } 2344 } 2345 prev = *p++; 2346 if (n < e) *n++ = prev; 2347 } 2348 *n = '\0'; 2349 p = buf; 2350 2351 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { 2352 return 0.0; 2353 } 2354 2355 d = mrb_float_read(p, &end); 2356 if (badcheck) { 2357 if (!end || p == end) goto bad; 2358 while (*end && ISSPACE(*end)) end++; 2359 if (*end) goto bad; 2360 } 2593 if (badcheck) { 2594 if (!end || p == end) goto bad; 2595 while (end<pend && ISSPACE(*end)) end++; 2596 if (end<pend) goto bad; 2361 2597 } 2362 2598 return d; 2599 } 2600 2601 MRB_API double 2602 mrb_cstr_to_dbl(mrb_state *mrb, const char *s, mrb_bool badcheck) 2603 { 2604 return mrb_str_len_to_dbl(mrb, s, strlen(s), badcheck); 2363 2605 } 2364 2606 … … 2366 2608 mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck) 2367 2609 { 2368 char *s; 2369 mrb_int len; 2370 2371 str = mrb_str_to_str(mrb, str); 2372 s = RSTRING_PTR(str); 2373 len = RSTRING_LEN(str); 2374 if (s) { 2375 if (badcheck && memchr(s, '\0', len)) { 2376 mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte"); 2377 } 2378 if (s[len]) { /* no sentinel somehow */ 2379 struct RString *temp_str = str_new(mrb, s, len); 2380 s = RSTR_PTR(temp_str); 2381 } 2382 } 2383 return mrb_cstr_to_dbl(mrb, s, badcheck); 2610 return mrb_str_len_to_dbl(mrb, RSTRING_PTR(str), RSTRING_LEN(str), badcheck); 2384 2611 } 2385 2612 … … 2403 2630 return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, FALSE)); 2404 2631 } 2632 #endif 2405 2633 2406 2634 /* 15.2.10.5.40 */ … … 2408 2636 * call-seq: 2409 2637 * str.to_s => str 2410 * str.to_str => str2411 2638 * 2412 2639 * Returns the receiver. … … 2436 2663 mrb_bool modify = FALSE; 2437 2664 2438 mrb_str_modify (mrb, s);2665 mrb_str_modify_keep_ascii(mrb, s); 2439 2666 p = RSTRING_PTR(str); 2440 2667 pend = RSTRING_END(str); … … 2472 2699 } 2473 2700 2474 #define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))2475 2476 2701 /* 2477 2702 * call-seq: … … 2484 2709 mrb_str_dump(mrb_state *mrb, mrb_value str) 2485 2710 { 2486 mrb_int len; 2487 const char *p, *pend; 2488 char *q; 2489 struct RString *result; 2490 2491 len = 2; /* "" */ 2492 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); 2493 while (p < pend) { 2494 unsigned char c = *p++; 2495 switch (c) { 2496 case '"': case '\\': 2497 case '\n': case '\r': 2498 case '\t': case '\f': 2499 case '\013': case '\010': case '\007': case '\033': 2500 len += 2; 2501 break; 2502 2503 case '#': 2504 len += IS_EVSTR(p, pend) ? 2 : 1; 2505 break; 2506 2507 default: 2508 if (ISPRINT(c)) { 2509 len++; 2510 } 2511 else { 2512 len += 4; /* \NNN */ 2513 } 2514 break; 2515 } 2516 } 2517 2518 result = str_new(mrb, 0, len); 2519 str_with_class(mrb, result, str); 2520 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str); 2521 q = RSTR_PTR(result); 2522 *q++ = '"'; 2523 while (p < pend) { 2524 unsigned char c = *p++; 2525 2526 switch (c) { 2527 case '"': 2528 case '\\': 2529 *q++ = '\\'; 2530 *q++ = c; 2531 break; 2532 2533 case '\n': 2534 *q++ = '\\'; 2535 *q++ = 'n'; 2536 break; 2537 2538 case '\r': 2539 *q++ = '\\'; 2540 *q++ = 'r'; 2541 break; 2542 2543 case '\t': 2544 *q++ = '\\'; 2545 *q++ = 't'; 2546 break; 2547 2548 case '\f': 2549 *q++ = '\\'; 2550 *q++ = 'f'; 2551 break; 2552 2553 case '\013': 2554 *q++ = '\\'; 2555 *q++ = 'v'; 2556 break; 2557 2558 case '\010': 2559 *q++ = '\\'; 2560 *q++ = 'b'; 2561 break; 2562 2563 case '\007': 2564 *q++ = '\\'; 2565 *q++ = 'a'; 2566 break; 2567 2568 case '\033': 2569 *q++ = '\\'; 2570 *q++ = 'e'; 2571 break; 2572 2573 case '#': 2574 if (IS_EVSTR(p, pend)) *q++ = '\\'; 2575 *q++ = '#'; 2576 break; 2577 2578 default: 2579 if (ISPRINT(c)) { 2580 *q++ = c; 2581 } 2582 else { 2583 *q++ = '\\'; 2584 q[2] = '0' + c % 8; c /= 8; 2585 q[1] = '0' + c % 8; c /= 8; 2586 q[0] = '0' + c % 8; 2587 q += 3; 2588 } 2589 } 2590 } 2591 *q = '"'; 2592 return mrb_obj_value(result); 2711 return str_escape(mrb, str, FALSE); 2593 2712 } 2594 2713 … … 2596 2715 mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len) 2597 2716 { 2598 str_buf_cat(mrb, mrb_str_ptr(str), ptr, len); 2717 struct RString *s = mrb_str_ptr(str); 2718 size_t capa; 2719 size_t total; 2720 ptrdiff_t off = -1; 2721 2722 if (len == 0) return str; 2723 mrb_str_modify(mrb, s); 2724 if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) { 2725 off = ptr - RSTR_PTR(s); 2726 } 2727 2728 capa = RSTR_CAPA(s); 2729 total = RSTR_LEN(s)+len; 2730 if (total >= MRB_SSIZE_MAX) { 2731 size_error: 2732 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big"); 2733 } 2734 if (capa <= total) { 2735 if (capa == 0) capa = 1; 2736 while (capa <= total) { 2737 if (capa <= MRB_SSIZE_MAX / 2) { 2738 capa *= 2; 2739 } 2740 else { 2741 capa = total+1; 2742 } 2743 } 2744 if (capa <= total || capa > MRB_SSIZE_MAX) { 2745 goto size_error; 2746 } 2747 resize_capa(mrb, s, capa); 2748 } 2749 if (off != -1) { 2750 ptr = RSTR_PTR(s) + off; 2751 } 2752 memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len); 2753 mrb_assert_int_fit(size_t, total, mrb_ssize, MRB_SSIZE_MAX); 2754 RSTR_SET_LEN(s, total); 2755 RSTR_PTR(s)[total] = '\0'; /* sentinel */ 2599 2756 return str; 2600 2757 } … … 2609 2766 mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2) 2610 2767 { 2768 if (mrb_str_ptr(str) == mrb_str_ptr(str2)) { 2769 mrb_str_modify(mrb, mrb_str_ptr(str)); 2770 } 2611 2771 return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2)); 2612 2772 } … … 2615 2775 mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2) 2616 2776 { 2617 str2 = mrb_str_to_str(mrb, str2);2777 mrb_to_str(mrb, str2); 2618 2778 return mrb_str_cat_str(mrb, str1, str2); 2619 2779 } 2620 2621 #define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */2622 2780 2623 2781 /* … … 2635 2793 mrb_str_inspect(mrb_state *mrb, mrb_value str) 2636 2794 { 2637 const char *p, *pend; 2638 char buf[CHAR_ESC_LEN + 1]; 2639 mrb_value result = mrb_str_new_lit(mrb, "\""); 2640 2641 p = RSTRING_PTR(str); pend = RSTRING_END(str); 2642 for (;p < pend; p++) { 2643 unsigned char c, cc; 2644 #ifdef MRB_UTF8_STRING 2645 mrb_int clen; 2646 2647 clen = utf8len(p, pend); 2648 if (clen > 1) { 2649 mrb_int i; 2650 2651 for (i=0; i<clen; i++) { 2652 buf[i] = p[i]; 2653 } 2654 mrb_str_cat(mrb, result, buf, clen); 2655 p += clen-1; 2656 continue; 2657 } 2658 #endif 2659 c = *p; 2660 if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) { 2661 buf[0] = '\\'; buf[1] = c; 2662 mrb_str_cat(mrb, result, buf, 2); 2663 continue; 2664 } 2665 if (ISPRINT(c)) { 2666 buf[0] = c; 2667 mrb_str_cat(mrb, result, buf, 1); 2668 continue; 2669 } 2670 switch (c) { 2671 case '\n': cc = 'n'; break; 2672 case '\r': cc = 'r'; break; 2673 case '\t': cc = 't'; break; 2674 case '\f': cc = 'f'; break; 2675 case '\013': cc = 'v'; break; 2676 case '\010': cc = 'b'; break; 2677 case '\007': cc = 'a'; break; 2678 case 033: cc = 'e'; break; 2679 default: cc = 0; break; 2680 } 2681 if (cc) { 2682 buf[0] = '\\'; 2683 buf[1] = (char)cc; 2684 mrb_str_cat(mrb, result, buf, 2); 2685 continue; 2686 } 2687 else { 2688 buf[0] = '\\'; 2689 buf[3] = '0' + c % 8; c /= 8; 2690 buf[2] = '0' + c % 8; c /= 8; 2691 buf[1] = '0' + c % 8; 2692 mrb_str_cat(mrb, result, buf, 4); 2693 continue; 2694 } 2695 } 2696 mrb_str_cat_lit(mrb, result, "\""); 2697 2698 return result; 2795 return str_escape(mrb, str, TRUE); 2699 2796 } 2700 2797 … … 2722 2819 } 2723 2820 2821 /* 2822 * call-seq: 2823 * str.getbyte(index) -> 0 .. 255 2824 * 2825 * returns the <i>index</i>th byte as an integer. 2826 */ 2827 static mrb_value 2828 mrb_str_getbyte(mrb_state *mrb, mrb_value str) 2829 { 2830 mrb_int pos; 2831 mrb_get_args(mrb, "i", &pos); 2832 2833 if (pos < 0) 2834 pos += RSTRING_LEN(str); 2835 if (pos < 0 || RSTRING_LEN(str) <= pos) 2836 return mrb_nil_value(); 2837 2838 return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]); 2839 } 2840 2841 /* 2842 * call-seq: 2843 * str.setbyte(index, integer) -> integer 2844 * 2845 * modifies the <i>index</i>th byte as <i>integer</i>. 2846 */ 2847 static mrb_value 2848 mrb_str_setbyte(mrb_state *mrb, mrb_value str) 2849 { 2850 mrb_int pos, byte; 2851 mrb_int len; 2852 2853 mrb_get_args(mrb, "ii", &pos, &byte); 2854 2855 len = RSTRING_LEN(str); 2856 if (pos < -len || len <= pos) 2857 mrb_raisef(mrb, E_INDEX_ERROR, "index %i out of string", pos); 2858 if (pos < 0) 2859 pos += len; 2860 2861 mrb_str_modify(mrb, mrb_str_ptr(str)); 2862 byte &= 0xff; 2863 RSTRING_PTR(str)[pos] = (unsigned char)byte; 2864 return mrb_fixnum_value((unsigned char)byte); 2865 } 2866 2867 /* 2868 * call-seq: 2869 * str.byteslice(integer) -> new_str or nil 2870 * str.byteslice(integer, integer) -> new_str or nil 2871 * str.byteslice(range) -> new_str or nil 2872 * 2873 * Byte Reference---If passed a single Integer, returns a 2874 * substring of one byte at that position. If passed two Integer 2875 * objects, returns a substring starting at the offset given by the first, and 2876 * a length given by the second. If given a Range, a substring containing 2877 * bytes at offsets given by the range is returned. In all three cases, if 2878 * an offset is negative, it is counted from the end of <i>str</i>. Returns 2879 * <code>nil</code> if the initial offset falls outside the string, the length 2880 * is negative, or the beginning of the range is greater than the end. 2881 * The encoding of the resulted string keeps original encoding. 2882 * 2883 * "hello".byteslice(1) #=> "e" 2884 * "hello".byteslice(-1) #=> "o" 2885 * "hello".byteslice(1, 2) #=> "el" 2886 * "\x80\u3042".byteslice(1, 3) #=> "\u3042" 2887 * "\x03\u3042\xff".byteslice(1..3) #=> "\u3042" 2888 */ 2889 static mrb_value 2890 mrb_str_byteslice(mrb_state *mrb, mrb_value str) 2891 { 2892 mrb_value a1, a2; 2893 mrb_int str_len = RSTRING_LEN(str), beg, len; 2894 mrb_bool empty = TRUE; 2895 2896 if (mrb_get_args(mrb, "o|o", &a1, &a2) == 2) { 2897 beg = mrb_fixnum(mrb_to_int(mrb, a1)); 2898 len = mrb_fixnum(mrb_to_int(mrb, a2)); 2899 } 2900 else if (mrb_range_p(a1)) { 2901 if (mrb_range_beg_len(mrb, a1, &beg, &len, str_len, TRUE) != MRB_RANGE_OK) { 2902 return mrb_nil_value(); 2903 } 2904 } 2905 else { 2906 beg = mrb_fixnum(mrb_to_int(mrb, a1)); 2907 len = 1; 2908 empty = FALSE; 2909 } 2910 2911 if (mrb_str_beg_len(str_len, &beg, &len) && (empty || len != 0)) { 2912 return mrb_str_byte_subseq(mrb, str, beg, len); 2913 } 2914 else { 2915 return mrb_nil_value(); 2916 } 2917 } 2918 2724 2919 /* ---------------------------*/ 2725 2920 void … … 2728 2923 struct RClass *s; 2729 2924 2730 mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string"); 2925 mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << MRB_STR_EMBED_LEN_BIT), 2926 "pointer size too big for embedded string"); 2731 2927 2732 2928 mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */ … … 2740 2936 mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */ 2741 2937 mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */ 2938 mrb_define_method(mrb, s, "[]=", mrb_str_aset_m, MRB_ARGS_ANY()); 2742 2939 mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */ 2743 2940 mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */ … … 2753 2950 mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */ 2754 2951 mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */ 2755 mrb_define_method(mrb, s, "index", mrb_str_index , MRB_ARGS_ANY()); /* 15.2.10.5.22 */2952 mrb_define_method(mrb, s, "index", mrb_str_index_m, MRB_ARGS_ARG(1,1)); /* 15.2.10.5.22 */ 2756 2953 mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */ 2757 2954 mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */ … … 2766 2963 mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */ 2767 2964 2965 #ifndef MRB_WITHOUT_FLOAT 2768 2966 mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */ 2967 #endif 2769 2968 mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */ 2770 2969 mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */ … … 2775 2974 mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */ 2776 2975 mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE()); 2777 } 2778 2779 /* 2780 * Source code for the "strtod" library procedure. 2976 2977 mrb_define_method(mrb, s, "getbyte", mrb_str_getbyte, MRB_ARGS_REQ(1)); 2978 mrb_define_method(mrb, s, "setbyte", mrb_str_setbyte, MRB_ARGS_REQ(2)); 2979 mrb_define_method(mrb, s, "byteslice", mrb_str_byteslice, MRB_ARGS_ARG(1,1)); 2980 } 2981 2982 #ifndef MRB_WITHOUT_FLOAT 2983 /* 2984 * Source code for the "strtod" library procedure. 2781 2985 * 2782 2986 * Copyright (c) 1988-1993 The Regents of the University of California. … … 2804 3008 static const double powersOf10[] = {/* Table giving binary powers of 10. Entry */ 2805 3009 10., /* is 10^2^i. Used to convert decimal */ 2806 100., 3010 100., /* exponents into floating-point numbers. */ 2807 3011 1.0e4, 2808 3012 1.0e8, … … 2816 3020 MRB_API double 2817 3021 mrb_float_read(const char *string, char **endPtr) 2818 /* const char *string; 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 /* char **endPtr; 2831 3022 /* const char *string; A decimal ASCII floating-point number, 3023 * optionally preceded by white space. 3024 * Must have form "-I.FE-X", where I is the 3025 * integer part of the mantissa, F is the 3026 * fractional part of the mantissa, and X 3027 * is the exponent. Either of the signs 3028 * may be "+", "-", or omitted. Either I 3029 * or F may be omitted, or both. The decimal 3030 * point isn't necessary unless F is present. 3031 * The "E" may actually be an "e". E and X 3032 * may both be omitted (but not just one). 3033 */ 3034 /* char **endPtr; If non-NULL, store terminating character's 3035 * address here. */ 2832 3036 { 2833 3037 int sign, expSign = FALSE; 2834 3038 double fraction, dblExp; 2835 3039 const double *d; 2836 registerconst char *p;2837 registerint c;2838 int exp = 0; 2839 int fracExp = 0; 2840 2841 2842 2843 2844 2845 2846 2847 2848 int mantSize; 2849 int decPt; 2850 2851 const char *pExp; 2852 3040 const char *p; 3041 int c; 3042 int exp = 0; /* Exponent read from "EX" field. */ 3043 int fracExp = 0; /* Exponent that derives from the fractional 3044 * part. Under normal circumstatnces, it is 3045 * the negative of the number of digits in F. 3046 * However, if I is very long, the last digits 3047 * of I get dropped (otherwise a long I with a 3048 * large negative exponent could cause an 3049 * unnecessary overflow on I alone). In this 3050 * case, fracExp is incremented one for each 3051 * dropped digit. */ 3052 int mantSize; /* Number of digits in mantissa. */ 3053 int decPt; /* Number of mantissa digits BEFORE decimal 3054 * point. */ 3055 const char *pExp; /* Temporarily holds location of exponent 3056 * in string. */ 2853 3057 2854 3058 /* … … 2857 3061 2858 3062 p = string; 2859 while ( isspace(*p)) {2860 3063 while (ISSPACE(*p)) { 3064 p += 1; 2861 3065 } 2862 3066 if (*p == '-') { 2863 2864 3067 sign = TRUE; 3068 p += 1; 2865 3069 } 2866 3070 else { 2867 2868 2869 2870 3071 if (*p == '+') { 3072 p += 1; 3073 } 3074 sign = FALSE; 2871 3075 } 2872 3076 … … 2879 3083 for (mantSize = 0; ; mantSize += 1) 2880 3084 { 2881 2882 if (!isdigit(c)) {2883 2884 2885 2886 2887 2888 3085 c = *p; 3086 if (!ISDIGIT(c)) { 3087 if ((c != '.') || (decPt >= 0)) { 3088 break; 3089 } 3090 decPt = mantSize; 3091 } 3092 p += 1; 2889 3093 } 2890 3094 … … 2899 3103 p -= mantSize; 2900 3104 if (decPt < 0) { 2901 3105 decPt = mantSize; 2902 3106 } 2903 3107 else { 2904 mantSize -= 1;/* One of the digits was the point. */3108 mantSize -= 1; /* One of the digits was the point. */ 2905 3109 } 2906 3110 if (mantSize > 18) { 2907 2908 2909 2910 2911 2912 2913 3111 if (decPt - 18 > 29999) { 3112 fracExp = 29999; 3113 } 3114 else { 3115 fracExp = decPt - 18; 3116 } 3117 mantSize = 18; 2914 3118 } 2915 3119 else { 2916 3120 fracExp = decPt - mantSize; 2917 3121 } 2918 3122 if (mantSize == 0) { 2919 2920 2921 3123 fraction = 0.0; 3124 p = string; 3125 goto done; 2922 3126 } 2923 3127 else { 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 3128 int frac1, frac2; 3129 frac1 = 0; 3130 for ( ; mantSize > 9; mantSize -= 1) 3131 { 3132 c = *p; 3133 p += 1; 3134 if (c == '.') { 3135 c = *p; 3136 p += 1; 3137 } 3138 frac1 = 10*frac1 + (c - '0'); 3139 } 3140 frac2 = 0; 3141 for (; mantSize > 0; mantSize -= 1) 3142 { 3143 c = *p; 3144 p += 1; 3145 if (c == '.') { 3146 c = *p; 3147 p += 1; 3148 } 3149 frac2 = 10*frac2 + (c - '0'); 3150 } 3151 fraction = (1.0e9 * frac1) + frac2; 2948 3152 } 2949 3153 … … 2954 3158 p = pExp; 2955 3159 if ((*p == 'E') || (*p == 'e')) { 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 while (isdigit(*p)) {2968 2969 2970 2971 2972 2973 3160 p += 1; 3161 if (*p == '-') { 3162 expSign = TRUE; 3163 p += 1; 3164 } 3165 else { 3166 if (*p == '+') { 3167 p += 1; 3168 } 3169 expSign = FALSE; 3170 } 3171 while (ISDIGIT(*p)) { 3172 exp = exp * 10 + (*p - '0'); 3173 if (exp > 19999) { 3174 exp = 19999; 3175 } 3176 p += 1; 3177 } 2974 3178 } 2975 3179 if (expSign) { 2976 3180 exp = fracExp - exp; 2977 3181 } 2978 3182 else { 2979 3183 exp = fracExp + exp; 2980 3184 } 2981 3185 … … 2988 3192 2989 3193 if (exp < 0) { 2990 2991 3194 expSign = TRUE; 3195 exp = -exp; 2992 3196 } 2993 3197 else { 2994 3198 expSign = FALSE; 2995 3199 } 2996 3200 if (exp > maxExponent) { 2997 2998 3201 exp = maxExponent; 3202 errno = ERANGE; 2999 3203 } 3000 3204 dblExp = 1.0; 3001 3205 for (d = powersOf10; exp != 0; exp >>= 1, d += 1) { 3002 3003 3004 3206 if (exp & 01) { 3207 dblExp *= *d; 3208 } 3005 3209 } 3006 3210 if (expSign) { 3007 3211 fraction /= dblExp; 3008 3212 } 3009 3213 else { 3010 3214 fraction *= dblExp; 3011 3215 } 3012 3216 3013 3217 done: 3014 3218 if (endPtr != NULL) { 3015 3219 *endPtr = (char *) p; 3016 3220 } 3017 3221 3018 3222 if (sign) { 3019 3223 return -fraction; 3020 3224 } 3021 3225 return fraction; 3022 3226 } 3227 #endif
Note:
See TracChangeset
for help on using the changeset viewer.