source: EcnlProtoTool/trunk/mruby-1.3.0/src/string.c@ 331

Last change on this file since 331 was 331, checked in by coas-nagasima, 6 years ago

prototoolに関連するプロジェクトをnewlibからmuslを使うよう変更・更新
ntshellをnewlibの下位の実装から、muslのsyscallの実装に変更・更新
以下のOSSをアップデート
・mruby-1.3.0
・musl-1.1.18
・onigmo-6.1.3
・tcc-0.9.27
以下のOSSを追加
・openssl-1.1.0e
・curl-7.57.0
・zlib-1.2.11
以下のmrbgemsを追加
・iij/mruby-digest
・iij/mruby-env
・iij/mruby-errno
・iij/mruby-iijson
・iij/mruby-ipaddr
・iij/mruby-mock
・iij/mruby-require
・iij/mruby-tls-openssl

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 73.4 KB
Line 
1/*
2** string.c - String class
3**
4** See Copyright Notice in mruby.h
5*/
6
7#ifdef _MSC_VER
8# define _CRT_NONSTDC_NO_DEPRECATE
9#endif
10
11#include <float.h>
12#include <limits.h>
13#include <stddef.h>
14#include <stdlib.h>
15#include <string.h>
16#include <mruby.h>
17#include <mruby/array.h>
18#include <mruby/class.h>
19#include <mruby/range.h>
20#include <mruby/string.h>
21#include <mruby/re.h>
22
23typedef struct mrb_shared_string {
24 mrb_bool nofree : 1;
25 int refcnt;
26 char *ptr;
27 mrb_int len;
28} mrb_shared_string;
29
30const char mrb_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
31
32#define mrb_obj_alloc_string(mrb) ((struct RString*)mrb_obj_alloc((mrb), MRB_TT_STRING, (mrb)->string_class))
33
34static struct RString*
35str_new_static(mrb_state *mrb, const char *p, size_t len)
36{
37 struct RString *s;
38
39 if (len >= MRB_INT_MAX) {
40 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
41 }
42 s = mrb_obj_alloc_string(mrb);
43 s->as.heap.len = len;
44 s->as.heap.aux.capa = 0; /* nofree */
45 s->as.heap.ptr = (char *)p;
46 s->flags = MRB_STR_NOFREE;
47
48 return s;
49}
50
51static struct RString*
52str_new(mrb_state *mrb, const char *p, size_t len)
53{
54 struct RString *s;
55
56 if (p && mrb_ro_data_p(p)) {
57 return str_new_static(mrb, p, len);
58 }
59 s = mrb_obj_alloc_string(mrb);
60 if (len < RSTRING_EMBED_LEN_MAX) {
61 RSTR_SET_EMBED_FLAG(s);
62 RSTR_SET_EMBED_LEN(s, len);
63 if (p) {
64 memcpy(s->as.ary, p, len);
65 }
66 }
67 else {
68 if (len >= MRB_INT_MAX) {
69 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
70 }
71 s->as.heap.len = len;
72 s->as.heap.aux.capa = len;
73 s->as.heap.ptr = (char *)mrb_malloc(mrb, len+1);
74 if (p) {
75 memcpy(s->as.heap.ptr, p, len);
76 }
77 }
78 RSTR_PTR(s)[len] = '\0';
79 return s;
80}
81
82static inline void
83str_with_class(mrb_state *mrb, struct RString *s, mrb_value obj)
84{
85 s->c = mrb_str_ptr(obj)->c;
86}
87
88static mrb_value
89mrb_str_new_empty(mrb_state *mrb, mrb_value str)
90{
91 struct RString *s = str_new(mrb, 0, 0);
92
93 str_with_class(mrb, s, str);
94 return mrb_obj_value(s);
95}
96
97#ifndef MRB_STR_BUF_MIN_SIZE
98# define MRB_STR_BUF_MIN_SIZE 128
99#endif
100
101MRB_API mrb_value
102mrb_str_buf_new(mrb_state *mrb, size_t capa)
103{
104 struct RString *s;
105
106 s = mrb_obj_alloc_string(mrb);
107
108 if (capa >= MRB_INT_MAX) {
109 mrb_raise(mrb, E_ARGUMENT_ERROR, "string capacity size too big");
110 }
111 if (capa < MRB_STR_BUF_MIN_SIZE) {
112 capa = MRB_STR_BUF_MIN_SIZE;
113 }
114 s->as.heap.len = 0;
115 s->as.heap.aux.capa = capa;
116 s->as.heap.ptr = (char *)mrb_malloc(mrb, capa+1);
117 RSTR_PTR(s)[0] = '\0';
118
119 return mrb_obj_value(s);
120}
121
122static void
123resize_capa(mrb_state *mrb, struct RString *s, size_t capacity)
124{
125#if SIZE_MAX > MRB_INT_MAX
126 mrb_assert(capacity < MRB_INT_MAX);
127#endif
128 if (RSTR_EMBED_P(s)) {
129 if (RSTRING_EMBED_LEN_MAX < capacity) {
130 char *const tmp = (char *)mrb_malloc(mrb, capacity+1);
131 const mrb_int len = RSTR_EMBED_LEN(s);
132 memcpy(tmp, s->as.ary, len);
133 RSTR_UNSET_EMBED_FLAG(s);
134 s->as.heap.ptr = tmp;
135 s->as.heap.len = len;
136 s->as.heap.aux.capa = (mrb_int)capacity;
137 }
138 }
139 else {
140 s->as.heap.ptr = (char*)mrb_realloc(mrb, RSTR_PTR(s), capacity+1);
141 s->as.heap.aux.capa = (mrb_int)capacity;
142 }
143}
144
145static void
146str_buf_cat(mrb_state *mrb, struct RString *s, const char *ptr, size_t len)
147{
148 size_t capa;
149 size_t total;
150 ptrdiff_t off = -1;
151
152 if (len == 0) return;
153 mrb_str_modify(mrb, s);
154 if (ptr >= RSTR_PTR(s) && ptr <= RSTR_PTR(s) + (size_t)RSTR_LEN(s)) {
155 off = ptr - RSTR_PTR(s);
156 }
157
158 capa = RSTR_CAPA(s);
159 if (capa <= RSTRING_EMBED_LEN_MAX)
160 capa = RSTRING_EMBED_LEN_MAX+1;
161
162 total = RSTR_LEN(s)+len;
163 if (total >= MRB_INT_MAX) {
164 size_error:
165 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
166 }
167 if (capa <= total) {
168 while (total > capa) {
169 if (capa <= MRB_INT_MAX / 2) {
170 capa *= 2;
171 }
172 else {
173 capa = total;
174 }
175 }
176 if (capa < total || capa > MRB_INT_MAX) {
177 goto size_error;
178 }
179 resize_capa(mrb, s, capa);
180 }
181 if (off != -1) {
182 ptr = RSTR_PTR(s) + off;
183 }
184 memcpy(RSTR_PTR(s) + RSTR_LEN(s), ptr, len);
185 mrb_assert_int_fit(size_t, total, mrb_int, MRB_INT_MAX);
186 RSTR_SET_LEN(s, total);
187 RSTR_PTR(s)[total] = '\0'; /* sentinel */
188}
189
190MRB_API mrb_value
191mrb_str_new(mrb_state *mrb, const char *p, size_t len)
192{
193 return mrb_obj_value(str_new(mrb, p, len));
194}
195
196/*
197 * call-seq: (Caution! NULL string)
198 * String.new(str="") => new_str
199 *
200 * Returns a new string object containing a copy of <i>str</i>.
201 */
202
203MRB_API mrb_value
204mrb_str_new_cstr(mrb_state *mrb, const char *p)
205{
206 struct RString *s;
207 size_t len;
208
209 if (p) {
210 len = strlen(p);
211 }
212 else {
213 len = 0;
214 }
215
216 s = str_new(mrb, p, len);
217
218 return mrb_obj_value(s);
219}
220
221MRB_API mrb_value
222mrb_str_new_static(mrb_state *mrb, const char *p, size_t len)
223{
224 struct RString *s = str_new_static(mrb, p, len);
225 return mrb_obj_value(s);
226}
227
228static void
229str_decref(mrb_state *mrb, mrb_shared_string *shared)
230{
231 shared->refcnt--;
232 if (shared->refcnt == 0) {
233 if (!shared->nofree) {
234 mrb_free(mrb, shared->ptr);
235 }
236 mrb_free(mrb, shared);
237 }
238}
239
240void
241mrb_gc_free_str(mrb_state *mrb, struct RString *str)
242{
243 if (RSTR_EMBED_P(str))
244 /* no code */;
245 else if (RSTR_SHARED_P(str))
246 str_decref(mrb, str->as.heap.aux.shared);
247 else if (!RSTR_NOFREE_P(str))
248 mrb_free(mrb, str->as.heap.ptr);
249}
250
251#ifdef MRB_UTF8_STRING
252static const char utf8len_codepage[256] =
253{
254 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
255 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
256 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
257 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
258 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
259 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
260 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
261 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1,
262};
263
264static mrb_int
265utf8len(const char* p, const char* e)
266{
267 mrb_int len;
268 mrb_int i;
269
270 len = utf8len_codepage[(unsigned char)*p];
271 if (p + len > e) return 1;
272 for (i = 1; i < len; ++i)
273 if ((p[i] & 0xc0) != 0x80)
274 return 1;
275 return len;
276}
277
278static mrb_int
279utf8_strlen(mrb_value str, mrb_int len)
280{
281 mrb_int total = 0;
282 char* p = RSTRING_PTR(str);
283 char* e = p;
284 if (RSTRING(str)->flags & MRB_STR_NO_UTF) {
285 return RSTRING_LEN(str);
286 }
287 e += len < 0 ? RSTRING_LEN(str) : len;
288 while (p<e) {
289 p += utf8len(p, e);
290 total++;
291 }
292 if (RSTRING_LEN(str) == total) {
293 RSTRING(str)->flags |= MRB_STR_NO_UTF;
294 }
295 return total;
296}
297
298#define RSTRING_CHAR_LEN(s) utf8_strlen(s, -1)
299
300/* map character index to byte offset index */
301static mrb_int
302chars2bytes(mrb_value s, mrb_int off, mrb_int idx)
303{
304 mrb_int i, b, n;
305 const char *p = RSTRING_PTR(s) + off;
306 const char *e = RSTRING_END(s);
307
308 for (b=i=0; p<e && i<idx; i++) {
309 n = utf8len(p, e);
310 b += n;
311 p += n;
312 }
313 return b;
314}
315
316/* map byte offset to character index */
317static mrb_int
318bytes2chars(char *p, mrb_int bi)
319{
320 mrb_int i, b, n;
321
322 for (b=i=0; b<bi; i++) {
323 n = utf8len_codepage[(unsigned char)*p];
324 b += n;
325 p += n;
326 }
327 if (b != bi) return -1;
328 return i;
329}
330
331#define BYTES_ALIGN_CHECK(pos) if (pos < 0) return mrb_nil_value();
332#else
333#define RSTRING_CHAR_LEN(s) RSTRING_LEN(s)
334#define chars2bytes(p, off, ci) (ci)
335#define bytes2chars(p, bi) (bi)
336#define BYTES_ALIGN_CHECK(pos)
337#endif
338
339static inline mrb_int
340mrb_memsearch_qs(const unsigned char *xs, mrb_int m, const unsigned char *ys, mrb_int n)
341{
342 const unsigned char *x = xs, *xe = xs + m;
343 const unsigned char *y = ys;
344 int i, qstable[256];
345
346 /* Preprocessing */
347 for (i = 0; i < 256; ++i)
348 qstable[i] = m + 1;
349 for (; x < xe; ++x)
350 qstable[*x] = xe - x;
351 /* Searching */
352 for (; y + m <= ys + n; y += *(qstable + y[m])) {
353 if (*xs == *y && memcmp(xs, y, m) == 0)
354 return y - ys;
355 }
356 return -1;
357}
358
359static mrb_int
360mrb_memsearch(const void *x0, mrb_int m, const void *y0, mrb_int n)
361{
362 const unsigned char *x = (const unsigned char *)x0, *y = (const unsigned char *)y0;
363
364 if (m > n) return -1;
365 else if (m == n) {
366 return memcmp(x0, y0, m) == 0 ? 0 : -1;
367 }
368 else if (m < 1) {
369 return 0;
370 }
371 else if (m == 1) {
372 const unsigned char *ys = (const unsigned char *)memchr(y, *x, n);
373
374 if (ys)
375 return ys - y;
376 else
377 return -1;
378 }
379 return mrb_memsearch_qs((const unsigned char *)x0, m, (const unsigned char *)y0, n);
380}
381
382static void
383str_make_shared(mrb_state *mrb, struct RString *s)
384{
385 if (!RSTR_SHARED_P(s)) {
386 mrb_shared_string *shared = (mrb_shared_string *)mrb_malloc(mrb, sizeof(mrb_shared_string));
387
388 shared->refcnt = 1;
389 if (RSTR_EMBED_P(s)) {
390 const mrb_int len = RSTR_EMBED_LEN(s);
391 char *const tmp = (char *)mrb_malloc(mrb, len+1);
392 memcpy(tmp, s->as.ary, len);
393 tmp[len] = '\0';
394 RSTR_UNSET_EMBED_FLAG(s);
395 s->as.heap.ptr = tmp;
396 s->as.heap.len = len;
397 shared->nofree = FALSE;
398 shared->ptr = s->as.heap.ptr;
399 }
400 else if (RSTR_NOFREE_P(s)) {
401 shared->nofree = TRUE;
402 shared->ptr = s->as.heap.ptr;
403 RSTR_UNSET_NOFREE_FLAG(s);
404 }
405 else {
406 shared->nofree = FALSE;
407 if (s->as.heap.aux.capa > s->as.heap.len) {
408 s->as.heap.ptr = shared->ptr = (char *)mrb_realloc(mrb, s->as.heap.ptr, s->as.heap.len+1);
409 }
410 else {
411 shared->ptr = s->as.heap.ptr;
412 }
413 }
414 shared->len = s->as.heap.len;
415 s->as.heap.aux.shared = shared;
416 RSTR_SET_SHARED_FLAG(s);
417 }
418}
419
420static mrb_value
421byte_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
422{
423 struct RString *orig, *s;
424 mrb_shared_string *shared;
425
426 orig = mrb_str_ptr(str);
427 if (RSTR_EMBED_P(orig) || RSTR_LEN(orig) == 0) {
428 s = str_new(mrb, orig->as.ary+beg, len);
429 }
430 else {
431 str_make_shared(mrb, orig);
432 shared = orig->as.heap.aux.shared;
433 s = mrb_obj_alloc_string(mrb);
434 s->as.heap.ptr = orig->as.heap.ptr + beg;
435 s->as.heap.len = len;
436 s->as.heap.aux.shared = shared;
437 RSTR_SET_SHARED_FLAG(s);
438 shared->refcnt++;
439 }
440
441 return mrb_obj_value(s);
442}
443#ifdef MRB_UTF8_STRING
444static inline mrb_value
445str_subseq(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
446{
447 beg = chars2bytes(str, 0, beg);
448 len = chars2bytes(str, beg, len);
449
450 return byte_subseq(mrb, str, beg, len);
451}
452#else
453#define str_subseq(mrb, str, beg, len) byte_subseq(mrb, str, beg, len)
454#endif
455
456static mrb_value
457str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
458{
459 mrb_int clen = RSTRING_CHAR_LEN(str);
460
461 if (len < 0) return mrb_nil_value();
462 if (clen == 0) {
463 len = 0;
464 }
465 else if (beg < 0) {
466 beg = clen + beg;
467 }
468 if (beg > clen) return mrb_nil_value();
469 if (beg < 0) {
470 beg += clen;
471 if (beg < 0) return mrb_nil_value();
472 }
473 if (len > clen - beg)
474 len = clen - beg;
475 if (len <= 0) {
476 len = 0;
477 }
478 return str_subseq(mrb, str, beg, len);
479}
480
481static mrb_int
482str_index(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int offset)
483{
484 mrb_int pos;
485 char *s, *sptr;
486 mrb_int len, slen;
487
488 len = RSTRING_LEN(str);
489 slen = RSTRING_LEN(sub);
490 if (offset < 0) {
491 offset += len;
492 if (offset < 0) return -1;
493 }
494 if (len - offset < slen) return -1;
495 s = RSTRING_PTR(str);
496 if (offset) {
497 s += offset;
498 }
499 if (slen == 0) return offset;
500 /* need proceed one character at a time */
501 sptr = RSTRING_PTR(sub);
502 slen = RSTRING_LEN(sub);
503 len = RSTRING_LEN(str) - offset;
504 pos = mrb_memsearch(sptr, slen, s, len);
505 if (pos < 0) return pos;
506 return pos + offset;
507}
508
509static void
510check_frozen(mrb_state *mrb, struct RString *s)
511{
512 if (MRB_FROZEN_P(s)) {
513 mrb_raise(mrb, E_RUNTIME_ERROR, "can't modify frozen string");
514 }
515}
516
517static mrb_value
518str_replace(mrb_state *mrb, struct RString *s1, struct RString *s2)
519{
520 long len;
521
522 check_frozen(mrb, s1);
523 if (s1 == s2) return mrb_obj_value(s1);
524 s1->flags &= ~MRB_STR_NO_UTF;
525 s1->flags |= s2->flags&MRB_STR_NO_UTF;
526 len = RSTR_LEN(s2);
527 if (RSTR_SHARED_P(s1)) {
528 str_decref(mrb, s1->as.heap.aux.shared);
529 }
530 else if (!RSTR_EMBED_P(s1) && !RSTR_NOFREE_P(s1)) {
531 mrb_free(mrb, s1->as.heap.ptr);
532 }
533
534 RSTR_UNSET_NOFREE_FLAG(s1);
535
536 if (RSTR_SHARED_P(s2)) {
537L_SHARE:
538 RSTR_UNSET_EMBED_FLAG(s1);
539 s1->as.heap.ptr = s2->as.heap.ptr;
540 s1->as.heap.len = len;
541 s1->as.heap.aux.shared = s2->as.heap.aux.shared;
542 RSTR_SET_SHARED_FLAG(s1);
543 s1->as.heap.aux.shared->refcnt++;
544 }
545 else {
546 if (len <= RSTRING_EMBED_LEN_MAX) {
547 RSTR_UNSET_SHARED_FLAG(s1);
548 RSTR_SET_EMBED_FLAG(s1);
549 memcpy(s1->as.ary, RSTR_PTR(s2), len);
550 RSTR_SET_EMBED_LEN(s1, len);
551 }
552 else {
553 str_make_shared(mrb, s2);
554 goto L_SHARE;
555 }
556 }
557
558 return mrb_obj_value(s1);
559}
560
561static mrb_int
562str_rindex(mrb_state *mrb, mrb_value str, mrb_value sub, mrb_int pos)
563{
564 char *s, *sbeg, *t;
565 struct RString *ps = mrb_str_ptr(str);
566 mrb_int len = RSTRING_LEN(sub);
567
568 /* substring longer than string */
569 if (RSTR_LEN(ps) < len) return -1;
570 if (RSTR_LEN(ps) - pos < len) {
571 pos = RSTR_LEN(ps) - len;
572 }
573 sbeg = RSTR_PTR(ps);
574 s = RSTR_PTR(ps) + pos;
575 t = RSTRING_PTR(sub);
576 if (len) {
577 while (sbeg <= s) {
578 if (memcmp(s, t, len) == 0) {
579 return s - RSTR_PTR(ps);
580 }
581 s--;
582 }
583 return -1;
584 }
585 else {
586 return pos;
587 }
588}
589
590MRB_API mrb_int
591mrb_str_strlen(mrb_state *mrb, struct RString *s)
592{
593 mrb_int i, max = RSTR_LEN(s);
594 char *p = RSTR_PTR(s);
595
596 if (!p) return 0;
597 for (i=0; i<max; i++) {
598 if (p[i] == '\0') {
599 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
600 }
601 }
602 return max;
603}
604
605#ifdef _WIN32
606#include <windows.h>
607
608char*
609mrb_utf8_from_locale(const char *str, size_t len)
610{
611 wchar_t* wcsp;
612 char* mbsp;
613 size_t mbssize, wcssize;
614
615 if (len == 0)
616 return strdup("");
617 if (len == -1)
618 len = strlen(str);
619 wcssize = MultiByteToWideChar(GetACP(), 0, str, len, NULL, 0);
620 wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
621 if (!wcsp)
622 return NULL;
623 wcssize = MultiByteToWideChar(GetACP(), 0, str, len, wcsp, wcssize + 1);
624 wcsp[wcssize] = 0;
625
626 mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
627 mbsp = (char*) malloc((mbssize + 1));
628 if (!mbsp) {
629 free(wcsp);
630 return NULL;
631 }
632 mbssize = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
633 mbsp[mbssize] = 0;
634 free(wcsp);
635 return mbsp;
636}
637
638char*
639mrb_locale_from_utf8(const char *utf8, size_t len)
640{
641 wchar_t* wcsp;
642 char* mbsp;
643 size_t mbssize, wcssize;
644
645 if (len == 0)
646 return strdup("");
647 if (len == -1)
648 len = strlen(utf8);
649 wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, NULL, 0);
650 wcsp = (wchar_t*) malloc((wcssize + 1) * sizeof(wchar_t));
651 if (!wcsp)
652 return NULL;
653 wcssize = MultiByteToWideChar(CP_UTF8, 0, utf8, len, wcsp, wcssize + 1);
654 wcsp[wcssize] = 0;
655 mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, NULL, 0, NULL, NULL);
656 mbsp = (char*) malloc((mbssize + 1));
657 if (!mbsp) {
658 free(wcsp);
659 return NULL;
660 }
661 mbssize = WideCharToMultiByte(GetACP(), 0, (LPCWSTR) wcsp, -1, mbsp, mbssize, NULL, NULL);
662 mbsp[mbssize] = 0;
663 free(wcsp);
664 return mbsp;
665}
666#endif
667
668MRB_API void
669mrb_str_modify(mrb_state *mrb, struct RString *s)
670{
671 check_frozen(mrb, s);
672 s->flags &= ~MRB_STR_NO_UTF;
673 if (RSTR_SHARED_P(s)) {
674 mrb_shared_string *shared = s->as.heap.aux.shared;
675
676 if (shared->nofree == 0 && shared->refcnt == 1 && s->as.heap.ptr == shared->ptr) {
677 s->as.heap.ptr = shared->ptr;
678 s->as.heap.aux.capa = shared->len;
679 RSTR_PTR(s)[s->as.heap.len] = '\0';
680 mrb_free(mrb, shared);
681 }
682 else {
683 char *ptr, *p;
684 mrb_int len;
685
686 p = RSTR_PTR(s);
687 len = s->as.heap.len;
688 if (len < RSTRING_EMBED_LEN_MAX) {
689 RSTR_SET_EMBED_FLAG(s);
690 RSTR_SET_EMBED_LEN(s, len);
691 ptr = RSTR_PTR(s);
692 }
693 else {
694 ptr = (char *)mrb_malloc(mrb, (size_t)len + 1);
695 s->as.heap.ptr = ptr;
696 s->as.heap.aux.capa = len;
697 }
698 if (p) {
699 memcpy(ptr, p, len);
700 }
701 ptr[len] = '\0';
702 str_decref(mrb, shared);
703 }
704 RSTR_UNSET_SHARED_FLAG(s);
705 return;
706 }
707 if (RSTR_NOFREE_P(s)) {
708 char *p = s->as.heap.ptr;
709 mrb_int len = s->as.heap.len;
710
711 RSTR_UNSET_NOFREE_FLAG(s);
712 if (len < RSTRING_EMBED_LEN_MAX) {
713 RSTR_SET_EMBED_FLAG(s);
714 RSTR_SET_EMBED_LEN(s, len);
715 }
716 else {
717 s->as.heap.ptr = (char *)mrb_malloc(mrb, (size_t)len+1);
718 s->as.heap.aux.capa = len;
719 }
720 if (p) {
721 memcpy(RSTR_PTR(s), p, len);
722 }
723 RSTR_PTR(s)[len] = '\0';
724 return;
725 }
726}
727
728MRB_API mrb_value
729mrb_str_resize(mrb_state *mrb, mrb_value str, mrb_int len)
730{
731 mrb_int slen;
732 struct RString *s = mrb_str_ptr(str);
733
734 mrb_str_modify(mrb, s);
735 slen = RSTR_LEN(s);
736 if (len != slen) {
737 if (slen < len || slen - len > 256) {
738 resize_capa(mrb, s, len);
739 }
740 RSTR_SET_LEN(s, len);
741 RSTR_PTR(s)[len] = '\0'; /* sentinel */
742 }
743 return str;
744}
745
746MRB_API char*
747mrb_str_to_cstr(mrb_state *mrb, mrb_value str0)
748{
749 struct RString *s;
750
751 if (!mrb_string_p(str0)) {
752 mrb_raise(mrb, E_TYPE_ERROR, "expected String");
753 }
754
755 s = str_new(mrb, RSTRING_PTR(str0), RSTRING_LEN(str0));
756 if ((strlen(RSTR_PTR(s)) ^ RSTR_LEN(s)) != 0) {
757 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
758 }
759 return RSTR_PTR(s);
760}
761
762/*
763 * call-seq: (Caution! String("abcd") change)
764 * String("abcdefg") = String("abcd") + String("efg")
765 *
766 * Returns a new string object containing a copy of <i>str</i>.
767 */
768MRB_API void
769mrb_str_concat(mrb_state *mrb, mrb_value self, mrb_value other)
770{
771 struct RString *s1 = mrb_str_ptr(self), *s2;
772 mrb_int len;
773
774 mrb_str_modify(mrb, s1);
775 if (!mrb_string_p(other)) {
776 other = mrb_str_to_str(mrb, other);
777 }
778 s2 = mrb_str_ptr(other);
779 if (RSTR_LEN(s2) == 0) {
780 return;
781 }
782 len = RSTR_LEN(s1) + RSTR_LEN(s2);
783
784 if (len < 0 || len >= MRB_INT_MAX) {
785 mrb_raise(mrb, E_ARGUMENT_ERROR, "string size too big");
786 }
787 if (RSTRING_CAPA(self) < len) {
788 resize_capa(mrb, s1, len);
789 }
790 memcpy(RSTR_PTR(s1)+RSTR_LEN(s1), RSTR_PTR(s2), RSTR_LEN(s2));
791 RSTR_SET_LEN(s1, len);
792 RSTR_PTR(s1)[len] = '\0';
793}
794
795/*
796 * call-seq: (Caution! String("abcd") remain)
797 * String("abcdefg") = String("abcd") + String("efg")
798 *
799 * Returns a new string object containing a copy of <i>str</i>.
800 */
801MRB_API mrb_value
802mrb_str_plus(mrb_state *mrb, mrb_value a, mrb_value b)
803{
804 struct RString *s = mrb_str_ptr(a);
805 struct RString *s2 = mrb_str_ptr(b);
806 struct RString *t;
807
808 t = str_new(mrb, 0, RSTR_LEN(s) + RSTR_LEN(s2));
809 memcpy(RSTR_PTR(t), RSTR_PTR(s), RSTR_LEN(s));
810 memcpy(RSTR_PTR(t) + RSTR_LEN(s), RSTR_PTR(s2), RSTR_LEN(s2));
811
812 return mrb_obj_value(t);
813}
814
815/* 15.2.10.5.2 */
816
817/*
818 * call-seq: (Caution! String("abcd") remain) for stack_argument
819 * String("abcdefg") = String("abcd") + String("efg")
820 *
821 * Returns a new string object containing a copy of <i>str</i>.
822 */
823static mrb_value
824mrb_str_plus_m(mrb_state *mrb, mrb_value self)
825{
826 mrb_value str;
827
828 mrb_get_args(mrb, "S", &str);
829 return mrb_str_plus(mrb, self, str);
830}
831
832/* 15.2.10.5.26 */
833/* 15.2.10.5.33 */
834/*
835 * call-seq:
836 * "abcd".size => int
837 *
838 * Returns the length of string.
839 */
840static mrb_value
841mrb_str_size(mrb_state *mrb, mrb_value self)
842{
843 mrb_int len = RSTRING_CHAR_LEN(self);
844 return mrb_fixnum_value(len);
845}
846
847static mrb_value
848mrb_str_bytesize(mrb_state *mrb, mrb_value self)
849{
850 mrb_int len = RSTRING_LEN(self);
851 return mrb_fixnum_value(len);
852}
853
854/* 15.2.10.5.1 */
855/*
856 * call-seq:
857 * str * integer => new_str
858 *
859 * Copy---Returns a new <code>String</code> containing <i>integer</i> copies of
860 * the receiver.
861 *
862 * "Ho! " * 3 #=> "Ho! Ho! Ho! "
863 */
864static mrb_value
865mrb_str_times(mrb_state *mrb, mrb_value self)
866{
867 mrb_int n,len,times;
868 struct RString *str2;
869 char *p;
870
871 mrb_get_args(mrb, "i", &times);
872 if (times < 0) {
873 mrb_raise(mrb, E_ARGUMENT_ERROR, "negative argument");
874 }
875 if (times && MRB_INT_MAX / times < RSTRING_LEN(self)) {
876 mrb_raise(mrb, E_ARGUMENT_ERROR, "argument too big");
877 }
878
879 len = RSTRING_LEN(self)*times;
880 str2 = str_new(mrb, 0, len);
881 str_with_class(mrb, str2, self);
882 p = RSTR_PTR(str2);
883 if (len > 0) {
884 n = RSTRING_LEN(self);
885 memcpy(p, RSTRING_PTR(self), n);
886 while (n <= len/2) {
887 memcpy(p + n, p, n);
888 n *= 2;
889 }
890 memcpy(p + n, p, len-n);
891 }
892 p[RSTR_LEN(str2)] = '\0';
893
894 return mrb_obj_value(str2);
895}
896/* -------------------------------------------------------------- */
897
898#define lesser(a,b) (((a)>(b))?(b):(a))
899
900/* ---------------------------*/
901/*
902 * call-seq:
903 * mrb_value str1 <=> mrb_value str2 => int
904 * > 1
905 * = 0
906 * < -1
907 */
908MRB_API int
909mrb_str_cmp(mrb_state *mrb, mrb_value str1, mrb_value str2)
910{
911 mrb_int len;
912 mrb_int retval;
913 struct RString *s1 = mrb_str_ptr(str1);
914 struct RString *s2 = mrb_str_ptr(str2);
915
916 len = lesser(RSTR_LEN(s1), RSTR_LEN(s2));
917 retval = memcmp(RSTR_PTR(s1), RSTR_PTR(s2), len);
918 if (retval == 0) {
919 if (RSTR_LEN(s1) == RSTR_LEN(s2)) return 0;
920 if (RSTR_LEN(s1) > RSTR_LEN(s2)) return 1;
921 return -1;
922 }
923 if (retval > 0) return 1;
924 return -1;
925}
926
927/* 15.2.10.5.3 */
928
929/*
930 * call-seq:
931 * str <=> other_str => -1, 0, +1
932 *
933 * Comparison---Returns -1 if <i>other_str</i> is less than, 0 if
934 * <i>other_str</i> is equal to, and +1 if <i>other_str</i> is greater than
935 * <i>str</i>. If the strings are of different lengths, and the strings are
936 * equal when compared up to the shortest length, then the longer string is
937 * considered greater than the shorter one. If the variable <code>$=</code> is
938 * <code>false</code>, the comparison is based on comparing the binary values
939 * of each character in the string. In older versions of Ruby, setting
940 * <code>$=</code> allowed case-insensitive comparisons; this is now deprecated
941 * in favor of using <code>String#casecmp</code>.
942 *
943 * <code><=></code> is the basis for the methods <code><</code>,
944 * <code><=</code>, <code>></code>, <code>>=</code>, and <code>between?</code>,
945 * included from module <code>Comparable</code>. The method
946 * <code>String#==</code> does not use <code>Comparable#==</code>.
947 *
948 * "abcdef" <=> "abcde" #=> 1
949 * "abcdef" <=> "abcdef" #=> 0
950 * "abcdef" <=> "abcdefg" #=> -1
951 * "abcdef" <=> "ABCDEF" #=> 1
952 */
953static mrb_value
954mrb_str_cmp_m(mrb_state *mrb, mrb_value str1)
955{
956 mrb_value str2;
957 mrb_int result;
958
959 mrb_get_args(mrb, "o", &str2);
960 if (!mrb_string_p(str2)) {
961 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_s"))) {
962 return mrb_nil_value();
963 }
964 else if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "<=>"))) {
965 return mrb_nil_value();
966 }
967 else {
968 mrb_value tmp = mrb_funcall(mrb, str2, "<=>", 1, str1);
969
970 if (!mrb_nil_p(tmp)) return mrb_nil_value();
971 if (!mrb_fixnum_p(tmp)) {
972 return mrb_funcall(mrb, mrb_fixnum_value(0), "-", 1, tmp);
973 }
974 result = -mrb_fixnum(tmp);
975 }
976 }
977 else {
978 result = mrb_str_cmp(mrb, str1, str2);
979 }
980 return mrb_fixnum_value(result);
981}
982
983static mrb_bool
984str_eql(mrb_state *mrb, const mrb_value str1, const mrb_value str2)
985{
986 const mrb_int len = RSTRING_LEN(str1);
987
988 if (len != RSTRING_LEN(str2)) return FALSE;
989 if (memcmp(RSTRING_PTR(str1), RSTRING_PTR(str2), (size_t)len) == 0)
990 return TRUE;
991 return FALSE;
992}
993
994MRB_API mrb_bool
995mrb_str_equal(mrb_state *mrb, mrb_value str1, mrb_value str2)
996{
997 if (mrb_immediate_p(str2)) return FALSE;
998 if (!mrb_string_p(str2)) {
999 if (mrb_nil_p(str2)) return FALSE;
1000 if (!mrb_respond_to(mrb, str2, mrb_intern_lit(mrb, "to_str"))) {
1001 return FALSE;
1002 }
1003 str2 = mrb_funcall(mrb, str2, "to_str", 0);
1004 return mrb_equal(mrb, str2, str1);
1005 }
1006 return str_eql(mrb, str1, str2);
1007}
1008
1009/* 15.2.10.5.4 */
1010/*
1011 * call-seq:
1012 * str == obj => true or false
1013 *
1014 * Equality---
1015 * If <i>obj</i> is not a <code>String</code>, returns <code>false</code>.
1016 * Otherwise, returns <code>false</code> or <code>true</code>
1017 *
1018 * caution:if <i>str</i> <code><=></code> <i>obj</i> returns zero.
1019 */
1020static mrb_value
1021mrb_str_equal_m(mrb_state *mrb, mrb_value str1)
1022{
1023 mrb_value str2;
1024
1025 mrb_get_args(mrb, "o", &str2);
1026
1027 return mrb_bool_value(mrb_str_equal(mrb, str1, str2));
1028}
1029/* ---------------------------------- */
1030MRB_API mrb_value
1031mrb_str_to_str(mrb_state *mrb, mrb_value str)
1032{
1033 mrb_value s;
1034
1035 if (!mrb_string_p(str)) {
1036 s = mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1037 if (mrb_nil_p(s)) {
1038 s = mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_s");
1039 }
1040 return s;
1041 }
1042 return str;
1043}
1044
1045MRB_API const char*
1046mrb_string_value_ptr(mrb_state *mrb, mrb_value ptr)
1047{
1048 mrb_value str = mrb_str_to_str(mrb, ptr);
1049 return RSTRING_PTR(str);
1050}
1051
1052MRB_API mrb_int
1053mrb_string_value_len(mrb_state *mrb, mrb_value ptr)
1054{
1055 mrb_value str = mrb_str_to_str(mrb, ptr);
1056 return RSTRING_LEN(str);
1057}
1058
1059void
1060mrb_noregexp(mrb_state *mrb, mrb_value self)
1061{
1062 mrb_raise(mrb, E_NOTIMP_ERROR, "Regexp class not implemented");
1063}
1064
1065void
1066mrb_regexp_check(mrb_state *mrb, mrb_value obj)
1067{
1068 if (mrb_regexp_p(mrb, obj)) {
1069 mrb_noregexp(mrb, obj);
1070 }
1071}
1072
1073MRB_API mrb_value
1074mrb_str_dup(mrb_state *mrb, mrb_value str)
1075{
1076 struct RString *s = mrb_str_ptr(str);
1077 struct RString *dup = str_new(mrb, 0, 0);
1078
1079 str_with_class(mrb, dup, str);
1080 return str_replace(mrb, dup, s);
1081}
1082
1083static mrb_value
1084mrb_str_aref(mrb_state *mrb, mrb_value str, mrb_value indx)
1085{
1086 mrb_int idx;
1087
1088 mrb_regexp_check(mrb, indx);
1089 switch (mrb_type(indx)) {
1090 case MRB_TT_FIXNUM:
1091 idx = mrb_fixnum(indx);
1092
1093num_index:
1094 str = str_substr(mrb, str, idx, 1);
1095 if (!mrb_nil_p(str) && RSTRING_LEN(str) == 0) return mrb_nil_value();
1096 return str;
1097
1098 case MRB_TT_STRING:
1099 if (str_index(mrb, str, indx, 0) != -1)
1100 return mrb_str_dup(mrb, indx);
1101 return mrb_nil_value();
1102
1103 case MRB_TT_RANGE:
1104 goto range_arg;
1105
1106 default:
1107 indx = mrb_Integer(mrb, indx);
1108 if (mrb_nil_p(indx)) {
1109 range_arg:
1110 {
1111 mrb_int beg, len;
1112
1113 len = RSTRING_CHAR_LEN(str);
1114 switch (mrb_range_beg_len(mrb, indx, &beg, &len, len, TRUE)) {
1115 case 1:
1116 return str_subseq(mrb, str, beg, len);
1117 case 2:
1118 return mrb_nil_value();
1119 default:
1120 break;
1121 }
1122 }
1123 mrb_raise(mrb, E_TYPE_ERROR, "can't convert to Fixnum");
1124 }
1125 idx = mrb_fixnum(indx);
1126 goto num_index;
1127 }
1128 return mrb_nil_value(); /* not reached */
1129}
1130
1131/* 15.2.10.5.6 */
1132/* 15.2.10.5.34 */
1133/*
1134 * call-seq:
1135 * str[fixnum] => fixnum or nil
1136 * str[fixnum, fixnum] => new_str or nil
1137 * str[range] => new_str or nil
1138 * str[regexp] => new_str or nil
1139 * str[regexp, fixnum] => new_str or nil
1140 * str[other_str] => new_str or nil
1141 * str.slice(fixnum) => fixnum or nil
1142 * str.slice(fixnum, fixnum) => new_str or nil
1143 * str.slice(range) => new_str or nil
1144 * str.slice(other_str) => new_str or nil
1145 *
1146 * Element Reference---If passed a single <code>Fixnum</code>, returns the code
1147 * of the character at that position. If passed two <code>Fixnum</code>
1148 * objects, returns a substring starting at the offset given by the first, and
1149 * a length given by the second. If given a range, a substring containing
1150 * characters at offsets given by the range is returned. In all three cases, if
1151 * an offset is negative, it is counted from the end of <i>str</i>. Returns
1152 * <code>nil</code> if the initial offset falls outside the string, the length
1153 * is negative, or the beginning of the range is greater than the end.
1154 *
1155 * If a <code>String</code> is given, that string is returned if it occurs in
1156 * <i>str</i>. In both cases, <code>nil</code> is returned if there is no
1157 * match.
1158 *
1159 * a = "hello there"
1160 * a[1] #=> 101(1.8.7) "e"(1.9.2)
1161 * a[1.1] #=> "e"(1.9.2)
1162 * a[1,3] #=> "ell"
1163 * a[1..3] #=> "ell"
1164 * a[-3,2] #=> "er"
1165 * a[-4..-2] #=> "her"
1166 * a[12..-1] #=> nil
1167 * a[-2..-4] #=> ""
1168 * a["lo"] #=> "lo"
1169 * a["bye"] #=> nil
1170 */
1171static mrb_value
1172mrb_str_aref_m(mrb_state *mrb, mrb_value str)
1173{
1174 mrb_value a1, a2;
1175 int argc;
1176
1177 argc = mrb_get_args(mrb, "o|o", &a1, &a2);
1178 if (argc == 2) {
1179 mrb_int n1, n2;
1180
1181 mrb_regexp_check(mrb, a1);
1182 mrb_get_args(mrb, "ii", &n1, &n2);
1183 return str_substr(mrb, str, n1, n2);
1184 }
1185 if (argc != 1) {
1186 mrb_raisef(mrb, E_ARGUMENT_ERROR, "wrong number of arguments (%S for 1)", mrb_fixnum_value(argc));
1187 }
1188 return mrb_str_aref(mrb, str, a1);
1189}
1190
1191/* 15.2.10.5.8 */
1192/*
1193 * call-seq:
1194 * str.capitalize! => str or nil
1195 *
1196 * Modifies <i>str</i> by converting the first character to uppercase and the
1197 * remainder to lowercase. Returns <code>nil</code> if no changes are made.
1198 *
1199 * a = "hello"
1200 * a.capitalize! #=> "Hello"
1201 * a #=> "Hello"
1202 * a.capitalize! #=> nil
1203 */
1204static mrb_value
1205mrb_str_capitalize_bang(mrb_state *mrb, mrb_value str)
1206{
1207 char *p, *pend;
1208 mrb_bool modify = FALSE;
1209 struct RString *s = mrb_str_ptr(str);
1210
1211 mrb_str_modify(mrb, s);
1212 if (RSTR_LEN(s) == 0 || !RSTR_PTR(s)) return mrb_nil_value();
1213 p = RSTR_PTR(s); pend = RSTR_PTR(s) + RSTR_LEN(s);
1214 if (ISLOWER(*p)) {
1215 *p = TOUPPER(*p);
1216 modify = TRUE;
1217 }
1218 while (++p < pend) {
1219 if (ISUPPER(*p)) {
1220 *p = TOLOWER(*p);
1221 modify = TRUE;
1222 }
1223 }
1224 if (modify) return str;
1225 return mrb_nil_value();
1226}
1227
1228/* 15.2.10.5.7 */
1229/*
1230 * call-seq:
1231 * str.capitalize => new_str
1232 *
1233 * Returns a copy of <i>str</i> with the first character converted to uppercase
1234 * and the remainder to lowercase.
1235 *
1236 * "hello".capitalize #=> "Hello"
1237 * "HELLO".capitalize #=> "Hello"
1238 * "123ABC".capitalize #=> "123abc"
1239 */
1240static mrb_value
1241mrb_str_capitalize(mrb_state *mrb, mrb_value self)
1242{
1243 mrb_value str;
1244
1245 str = mrb_str_dup(mrb, self);
1246 mrb_str_capitalize_bang(mrb, str);
1247 return str;
1248}
1249
1250/* 15.2.10.5.10 */
1251/*
1252 * call-seq:
1253 * str.chomp!(separator="\n") => str or nil
1254 *
1255 * Modifies <i>str</i> in place as described for <code>String#chomp</code>,
1256 * returning <i>str</i>, or <code>nil</code> if no modifications were made.
1257 */
1258static mrb_value
1259mrb_str_chomp_bang(mrb_state *mrb, mrb_value str)
1260{
1261 mrb_value rs;
1262 mrb_int newline;
1263 char *p, *pp;
1264 mrb_int rslen;
1265 mrb_int len;
1266 mrb_int argc;
1267 struct RString *s = mrb_str_ptr(str);
1268
1269 mrb_str_modify(mrb, s);
1270 argc = mrb_get_args(mrb, "|S", &rs);
1271 len = RSTR_LEN(s);
1272 if (argc == 0) {
1273 if (len == 0) return mrb_nil_value();
1274 smart_chomp:
1275 if (RSTR_PTR(s)[len-1] == '\n') {
1276 RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
1277 if (RSTR_LEN(s) > 0 &&
1278 RSTR_PTR(s)[RSTR_LEN(s)-1] == '\r') {
1279 RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
1280 }
1281 }
1282 else if (RSTR_PTR(s)[len-1] == '\r') {
1283 RSTR_SET_LEN(s, RSTR_LEN(s) - 1);
1284 }
1285 else {
1286 return mrb_nil_value();
1287 }
1288 RSTR_PTR(s)[RSTR_LEN(s)] = '\0';
1289 return str;
1290 }
1291
1292 if (len == 0 || mrb_nil_p(rs)) return mrb_nil_value();
1293 p = RSTR_PTR(s);
1294 rslen = RSTRING_LEN(rs);
1295 if (rslen == 0) {
1296 while (len>0 && p[len-1] == '\n') {
1297 len--;
1298 if (len>0 && p[len-1] == '\r')
1299 len--;
1300 }
1301 if (len < RSTR_LEN(s)) {
1302 RSTR_SET_LEN(s, len);
1303 p[len] = '\0';
1304 return str;
1305 }
1306 return mrb_nil_value();
1307 }
1308 if (rslen > len) return mrb_nil_value();
1309 newline = RSTRING_PTR(rs)[rslen-1];
1310 if (rslen == 1 && newline == '\n')
1311 newline = RSTRING_PTR(rs)[rslen-1];
1312 if (rslen == 1 && newline == '\n')
1313 goto smart_chomp;
1314
1315 pp = p + len - rslen;
1316 if (p[len-1] == newline &&
1317 (rslen <= 1 ||
1318 memcmp(RSTRING_PTR(rs), pp, rslen) == 0)) {
1319 RSTR_SET_LEN(s, len - rslen);
1320 p[RSTR_LEN(s)] = '\0';
1321 return str;
1322 }
1323 return mrb_nil_value();
1324}
1325
1326/* 15.2.10.5.9 */
1327/*
1328 * call-seq:
1329 * str.chomp(separator="\n") => new_str
1330 *
1331 * Returns a new <code>String</code> with the given record separator removed
1332 * from the end of <i>str</i> (if present). If <code>$/</code> has not been
1333 * changed from the default Ruby record separator, then <code>chomp</code> also
1334 * removes carriage return characters (that is it will remove <code>\n</code>,
1335 * <code>\r</code>, and <code>\r\n</code>).
1336 *
1337 * "hello".chomp #=> "hello"
1338 * "hello\n".chomp #=> "hello"
1339 * "hello\r\n".chomp #=> "hello"
1340 * "hello\n\r".chomp #=> "hello\n"
1341 * "hello\r".chomp #=> "hello"
1342 * "hello \n there".chomp #=> "hello \n there"
1343 * "hello".chomp("llo") #=> "he"
1344 */
1345static mrb_value
1346mrb_str_chomp(mrb_state *mrb, mrb_value self)
1347{
1348 mrb_value str;
1349
1350 str = mrb_str_dup(mrb, self);
1351 mrb_str_chomp_bang(mrb, str);
1352 return str;
1353}
1354
1355/* 15.2.10.5.12 */
1356/*
1357 * call-seq:
1358 * str.chop! => str or nil
1359 *
1360 * Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
1361 * or <code>nil</code> if <i>str</i> is the empty string. See also
1362 * <code>String#chomp!</code>.
1363 */
1364static mrb_value
1365mrb_str_chop_bang(mrb_state *mrb, mrb_value str)
1366{
1367 struct RString *s = mrb_str_ptr(str);
1368
1369 mrb_str_modify(mrb, s);
1370 if (RSTR_LEN(s) > 0) {
1371 mrb_int len;
1372#ifdef MRB_UTF8_STRING
1373 const char* t = RSTR_PTR(s), *p = t;
1374 const char* e = p + RSTR_LEN(s);
1375 while (p<e) {
1376 mrb_int clen = utf8len(p, e);
1377 if (p + clen>=e) break;
1378 p += clen;
1379 }
1380 len = p - t;
1381#else
1382 len = RSTR_LEN(s) - 1;
1383#endif
1384 if (RSTR_PTR(s)[len] == '\n') {
1385 if (len > 0 &&
1386 RSTR_PTR(s)[len-1] == '\r') {
1387 len--;
1388 }
1389 }
1390 RSTR_SET_LEN(s, len);
1391 RSTR_PTR(s)[len] = '\0';
1392 return str;
1393 }
1394 return mrb_nil_value();
1395}
1396
1397/* 15.2.10.5.11 */
1398/*
1399 * call-seq:
1400 * str.chop => new_str
1401 *
1402 * Returns a new <code>String</code> with the last character removed. If the
1403 * string ends with <code>\r\n</code>, both characters are removed. Applying
1404 * <code>chop</code> to an empty string returns an empty
1405 * string. <code>String#chomp</code> is often a safer alternative, as it leaves
1406 * the string unchanged if it doesn't end in a record separator.
1407 *
1408 * "string\r\n".chop #=> "string"
1409 * "string\n\r".chop #=> "string\n"
1410 * "string\n".chop #=> "string"
1411 * "string".chop #=> "strin"
1412 * "x".chop #=> ""
1413 */
1414static mrb_value
1415mrb_str_chop(mrb_state *mrb, mrb_value self)
1416{
1417 mrb_value str;
1418 str = mrb_str_dup(mrb, self);
1419 mrb_str_chop_bang(mrb, str);
1420 return str;
1421}
1422
1423/* 15.2.10.5.14 */
1424/*
1425 * call-seq:
1426 * str.downcase! => str or nil
1427 *
1428 * Downcases the contents of <i>str</i>, returning <code>nil</code> if no
1429 * changes were made.
1430 */
1431static mrb_value
1432mrb_str_downcase_bang(mrb_state *mrb, mrb_value str)
1433{
1434 char *p, *pend;
1435 mrb_bool modify = FALSE;
1436 struct RString *s = mrb_str_ptr(str);
1437
1438 mrb_str_modify(mrb, s);
1439 p = RSTR_PTR(s);
1440 pend = RSTR_PTR(s) + RSTR_LEN(s);
1441 while (p < pend) {
1442 if (ISUPPER(*p)) {
1443 *p = TOLOWER(*p);
1444 modify = TRUE;
1445 }
1446 p++;
1447 }
1448
1449 if (modify) return str;
1450 return mrb_nil_value();
1451}
1452
1453/* 15.2.10.5.13 */
1454/*
1455 * call-seq:
1456 * str.downcase => new_str
1457 *
1458 * Returns a copy of <i>str</i> with all uppercase letters replaced with their
1459 * lowercase counterparts. The operation is locale insensitive---only
1460 * characters 'A' to 'Z' are affected.
1461 *
1462 * "hEllO".downcase #=> "hello"
1463 */
1464static mrb_value
1465mrb_str_downcase(mrb_state *mrb, mrb_value self)
1466{
1467 mrb_value str;
1468
1469 str = mrb_str_dup(mrb, self);
1470 mrb_str_downcase_bang(mrb, str);
1471 return str;
1472}
1473
1474/* 15.2.10.5.16 */
1475/*
1476 * call-seq:
1477 * str.empty? => true or false
1478 *
1479 * Returns <code>true</code> if <i>str</i> has a length of zero.
1480 *
1481 * "hello".empty? #=> false
1482 * "".empty? #=> true
1483 */
1484static mrb_value
1485mrb_str_empty_p(mrb_state *mrb, mrb_value self)
1486{
1487 struct RString *s = mrb_str_ptr(self);
1488
1489 return mrb_bool_value(RSTR_LEN(s) == 0);
1490}
1491
1492/* 15.2.10.5.17 */
1493/*
1494 * call-seq:
1495 * str.eql?(other) => true or false
1496 *
1497 * Two strings are equal if the have the same length and content.
1498 */
1499static mrb_value
1500mrb_str_eql(mrb_state *mrb, mrb_value self)
1501{
1502 mrb_value str2;
1503 mrb_bool eql_p;
1504
1505 mrb_get_args(mrb, "o", &str2);
1506 eql_p = (mrb_type(str2) == MRB_TT_STRING) && str_eql(mrb, self, str2);
1507
1508 return mrb_bool_value(eql_p);
1509}
1510
1511MRB_API mrb_value
1512mrb_str_substr(mrb_state *mrb, mrb_value str, mrb_int beg, mrb_int len)
1513{
1514 return str_substr(mrb, str, beg, len);
1515}
1516
1517mrb_int
1518mrb_str_hash(mrb_state *mrb, mrb_value str)
1519{
1520 /* 1-8-7 */
1521 struct RString *s = mrb_str_ptr(str);
1522 mrb_int len = RSTR_LEN(s);
1523 char *p = RSTR_PTR(s);
1524 mrb_int key = 0;
1525
1526 while (len--) {
1527 key = key*65599 + *p;
1528 p++;
1529 }
1530 return key + (key>>5);
1531}
1532
1533/* 15.2.10.5.20 */
1534/*
1535 * call-seq:
1536 * str.hash => fixnum
1537 *
1538 * Return a hash based on the string's length and content.
1539 */
1540static mrb_value
1541mrb_str_hash_m(mrb_state *mrb, mrb_value self)
1542{
1543 mrb_int key = mrb_str_hash(mrb, self);
1544 return mrb_fixnum_value(key);
1545}
1546
1547/* 15.2.10.5.21 */
1548/*
1549 * call-seq:
1550 * str.include? other_str => true or false
1551 * str.include? fixnum => true or false
1552 *
1553 * Returns <code>true</code> if <i>str</i> contains the given string or
1554 * character.
1555 *
1556 * "hello".include? "lo" #=> true
1557 * "hello".include? "ol" #=> false
1558 * "hello".include? ?h #=> true
1559 */
1560static mrb_value
1561mrb_str_include(mrb_state *mrb, mrb_value self)
1562{
1563 mrb_value str2;
1564
1565 mrb_get_args(mrb, "S", &str2);
1566 if (str_index(mrb, self, str2, 0) < 0)
1567 return mrb_bool_value(FALSE);
1568 return mrb_bool_value(TRUE);
1569}
1570
1571/* 15.2.10.5.22 */
1572/*
1573 * call-seq:
1574 * str.index(substring [, offset]) => fixnum or nil
1575 * str.index(fixnum [, offset]) => fixnum or nil
1576 * str.index(regexp [, offset]) => fixnum or nil
1577 *
1578 * Returns the index of the first occurrence of the given
1579 * <i>substring</i>,
1580 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>.
1581 * Returns
1582 * <code>nil</code> if not found.
1583 * If the second parameter is present, it
1584 * specifies the position in the string to begin the search.
1585 *
1586 * "hello".index('e') #=> 1
1587 * "hello".index('lo') #=> 3
1588 * "hello".index('a') #=> nil
1589 * "hello".index(101) #=> 1(101=0x65='e')
1590 * "hello".index(/[aeiou]/, -3) #=> 4
1591 */
1592static mrb_value
1593mrb_str_index(mrb_state *mrb, mrb_value str)
1594{
1595 mrb_value *argv;
1596 mrb_int argc;
1597 mrb_value sub;
1598 mrb_int pos, clen;
1599
1600 mrb_get_args(mrb, "*", &argv, &argc);
1601 if (argc == 2) {
1602 mrb_get_args(mrb, "oi", &sub, &pos);
1603 }
1604 else {
1605 pos = 0;
1606 if (argc > 0)
1607 sub = argv[0];
1608 else
1609 sub = mrb_nil_value();
1610 }
1611 mrb_regexp_check(mrb, sub);
1612 clen = RSTRING_CHAR_LEN(str);
1613 if (pos < 0) {
1614 pos += clen;
1615 if (pos < 0) {
1616 return mrb_nil_value();
1617 }
1618 }
1619 if (pos > clen) return mrb_nil_value();
1620 pos = chars2bytes(str, 0, pos);
1621
1622 switch (mrb_type(sub)) {
1623 default: {
1624 mrb_value tmp;
1625
1626 tmp = mrb_check_string_type(mrb, sub);
1627 if (mrb_nil_p(tmp)) {
1628 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1629 }
1630 sub = tmp;
1631 }
1632 /* fall through */
1633 case MRB_TT_STRING:
1634 pos = str_index(mrb, str, sub, pos);
1635 break;
1636 }
1637
1638 if (pos == -1) return mrb_nil_value();
1639 pos = bytes2chars(RSTRING_PTR(str), pos);
1640 BYTES_ALIGN_CHECK(pos);
1641 return mrb_fixnum_value(pos);
1642}
1643
1644#define STR_REPLACE_SHARED_MIN 10
1645
1646/* 15.2.10.5.24 */
1647/* 15.2.10.5.28 */
1648/*
1649 * call-seq:
1650 * str.replace(other_str) => str
1651 *
1652 * s = "hello" #=> "hello"
1653 * s.replace "world" #=> "world"
1654 */
1655static mrb_value
1656mrb_str_replace(mrb_state *mrb, mrb_value str)
1657{
1658 mrb_value str2;
1659
1660 mrb_get_args(mrb, "S", &str2);
1661 return str_replace(mrb, mrb_str_ptr(str), mrb_str_ptr(str2));
1662}
1663
1664/* 15.2.10.5.23 */
1665/*
1666 * call-seq:
1667 * String.new(str="") => new_str
1668 *
1669 * Returns a new string object containing a copy of <i>str</i>.
1670 */
1671static mrb_value
1672mrb_str_init(mrb_state *mrb, mrb_value self)
1673{
1674 mrb_value str2;
1675
1676 if (mrb_get_args(mrb, "|S", &str2) == 0) {
1677 struct RString *s = str_new(mrb, 0, 0);
1678 str2 = mrb_obj_value(s);
1679 }
1680 str_replace(mrb, mrb_str_ptr(self), mrb_str_ptr(str2));
1681 return self;
1682}
1683
1684/* 15.2.10.5.25 */
1685/* 15.2.10.5.41 */
1686/*
1687 * call-seq:
1688 * str.intern => symbol
1689 * str.to_sym => symbol
1690 *
1691 * Returns the <code>Symbol</code> corresponding to <i>str</i>, creating the
1692 * symbol if it did not previously exist. See <code>Symbol#id2name</code>.
1693 *
1694 * "Koala".intern #=> :Koala
1695 * s = 'cat'.to_sym #=> :cat
1696 * s == :cat #=> true
1697 * s = '@cat'.to_sym #=> :@cat
1698 * s == :@cat #=> true
1699 *
1700 * This can also be used to create symbols that cannot be represented using the
1701 * <code>:xxx</code> notation.
1702 *
1703 * 'cat and dog'.to_sym #=> :"cat and dog"
1704 */
1705MRB_API mrb_value
1706mrb_str_intern(mrb_state *mrb, mrb_value self)
1707{
1708 return mrb_symbol_value(mrb_intern_str(mrb, self));
1709}
1710/* ---------------------------------- */
1711MRB_API mrb_value
1712mrb_obj_as_string(mrb_state *mrb, mrb_value obj)
1713{
1714 mrb_value str;
1715
1716 if (mrb_string_p(obj)) {
1717 return obj;
1718 }
1719 str = mrb_funcall(mrb, obj, "to_s", 0);
1720 if (!mrb_string_p(str))
1721 return mrb_any_to_s(mrb, obj);
1722 return str;
1723}
1724
1725MRB_API mrb_value
1726mrb_ptr_to_str(mrb_state *mrb, void *p)
1727{
1728 struct RString *p_str;
1729 char *p1;
1730 char *p2;
1731 uintptr_t n = (uintptr_t)p;
1732
1733 p_str = str_new(mrb, NULL, 2 + sizeof(uintptr_t) * CHAR_BIT / 4);
1734 p1 = RSTR_PTR(p_str);
1735 *p1++ = '0';
1736 *p1++ = 'x';
1737 p2 = p1;
1738
1739 do {
1740 *p2++ = mrb_digitmap[n % 16];
1741 n /= 16;
1742 } while (n > 0);
1743 *p2 = '\0';
1744 RSTR_SET_LEN(p_str, (mrb_int)(p2 - RSTR_PTR(p_str)));
1745
1746 while (p1 < p2) {
1747 const char c = *p1;
1748 *p1++ = *--p2;
1749 *p2 = c;
1750 }
1751
1752 return mrb_obj_value(p_str);
1753}
1754
1755MRB_API mrb_value
1756mrb_string_type(mrb_state *mrb, mrb_value str)
1757{
1758 return mrb_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1759}
1760
1761MRB_API mrb_value
1762mrb_check_string_type(mrb_state *mrb, mrb_value str)
1763{
1764 return mrb_check_convert_type(mrb, str, MRB_TT_STRING, "String", "to_str");
1765}
1766
1767/* 15.2.10.5.30 */
1768/*
1769 * call-seq:
1770 * str.reverse! => str
1771 *
1772 * Reverses <i>str</i> in place.
1773 */
1774static mrb_value
1775mrb_str_reverse_bang(mrb_state *mrb, mrb_value str)
1776{
1777#ifdef MRB_UTF8_STRING
1778 mrb_int utf8_len = RSTRING_CHAR_LEN(str);
1779 mrb_int len = RSTRING_LEN(str);
1780
1781 if (utf8_len == len) goto bytes;
1782 if (utf8_len > 1) {
1783 char *buf;
1784 char *p, *e, *r;
1785
1786 mrb_str_modify(mrb, mrb_str_ptr(str));
1787 len = RSTRING_LEN(str);
1788 buf = (char*)mrb_malloc(mrb, (size_t)len);
1789 p = buf;
1790 e = buf + len;
1791
1792 memcpy(buf, RSTRING_PTR(str), len);
1793 r = RSTRING_PTR(str) + len;
1794
1795 while (p<e) {
1796 mrb_int clen = utf8len(p, e);
1797 r -= clen;
1798 memcpy(r, p, clen);
1799 p += clen;
1800 }
1801 mrb_free(mrb, buf);
1802 }
1803 return str;
1804
1805 bytes:
1806#endif
1807 {
1808 struct RString *s = mrb_str_ptr(str);
1809 char *p, *e;
1810 char c;
1811
1812 mrb_str_modify(mrb, s);
1813 if (RSTR_LEN(s) > 1) {
1814 p = RSTR_PTR(s);
1815 e = p + RSTR_LEN(s) - 1;
1816 while (p < e) {
1817 c = *p;
1818 *p++ = *e;
1819 *e-- = c;
1820 }
1821 }
1822 return str;
1823 }
1824}
1825
1826/* ---------------------------------- */
1827/* 15.2.10.5.29 */
1828/*
1829 * call-seq:
1830 * str.reverse => new_str
1831 *
1832 * Returns a new string with the characters from <i>str</i> in reverse order.
1833 *
1834 * "stressed".reverse #=> "desserts"
1835 */
1836static mrb_value
1837mrb_str_reverse(mrb_state *mrb, mrb_value str)
1838{
1839 mrb_value str2 = mrb_str_dup(mrb, str);
1840 mrb_str_reverse_bang(mrb, str2);
1841 return str2;
1842}
1843
1844/* 15.2.10.5.31 */
1845/*
1846 * call-seq:
1847 * str.rindex(substring [, fixnum]) => fixnum or nil
1848 * str.rindex(fixnum [, fixnum]) => fixnum or nil
1849 * str.rindex(regexp [, fixnum]) => fixnum or nil
1850 *
1851 * Returns the index of the last occurrence of the given <i>substring</i>,
1852 * character (<i>fixnum</i>), or pattern (<i>regexp</i>) in <i>str</i>. Returns
1853 * <code>nil</code> if not found. If the second parameter is present, it
1854 * specifies the position in the string to end the search---characters beyond
1855 * this point will not be considered.
1856 *
1857 * "hello".rindex('e') #=> 1
1858 * "hello".rindex('l') #=> 3
1859 * "hello".rindex('a') #=> nil
1860 * "hello".rindex(101) #=> 1
1861 * "hello".rindex(/[aeiou]/, -2) #=> 1
1862 */
1863static mrb_value
1864mrb_str_rindex(mrb_state *mrb, mrb_value str)
1865{
1866 mrb_value *argv;
1867 mrb_int argc;
1868 mrb_value sub;
1869 mrb_int pos, len = RSTRING_CHAR_LEN(str);
1870
1871 mrb_get_args(mrb, "*", &argv, &argc);
1872 if (argc == 2) {
1873 mrb_get_args(mrb, "oi", &sub, &pos);
1874 if (pos < 0) {
1875 pos += len;
1876 if (pos < 0) {
1877 mrb_regexp_check(mrb, sub);
1878 return mrb_nil_value();
1879 }
1880 }
1881 if (pos > len) pos = len;
1882 }
1883 else {
1884 pos = len;
1885 if (argc > 0)
1886 sub = argv[0];
1887 else
1888 sub = mrb_nil_value();
1889 }
1890 pos = chars2bytes(str, 0, pos);
1891 mrb_regexp_check(mrb, sub);
1892
1893 switch (mrb_type(sub)) {
1894 default: {
1895 mrb_value tmp;
1896
1897 tmp = mrb_check_string_type(mrb, sub);
1898 if (mrb_nil_p(tmp)) {
1899 mrb_raisef(mrb, E_TYPE_ERROR, "type mismatch: %S given", sub);
1900 }
1901 sub = tmp;
1902 }
1903 /* fall through */
1904 case MRB_TT_STRING:
1905 pos = str_rindex(mrb, str, sub, pos);
1906 if (pos >= 0) {
1907 pos = bytes2chars(RSTRING_PTR(str), pos);
1908 BYTES_ALIGN_CHECK(pos);
1909 return mrb_fixnum_value(pos);
1910 }
1911 break;
1912
1913 } /* end of switch (TYPE(sub)) */
1914 return mrb_nil_value();
1915}
1916
1917/* 15.2.10.5.35 */
1918
1919/*
1920 * call-seq:
1921 * str.split(pattern="\n", [limit]) => anArray
1922 *
1923 * Divides <i>str</i> into substrings based on a delimiter, returning an array
1924 * of these substrings.
1925 *
1926 * If <i>pattern</i> is a <code>String</code>, then its contents are used as
1927 * the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
1928 * space, <i>str</i> is split on whitespace, with leading whitespace and runs
1929 * of contiguous whitespace characters ignored.
1930 *
1931 * If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
1932 * pattern matches. Whenever the pattern matches a zero-length string,
1933 * <i>str</i> is split into individual characters.
1934 *
1935 * If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
1936 * <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
1937 * split on whitespace as if ' ' were specified.
1938 *
1939 * If the <i>limit</i> parameter is omitted, trailing null fields are
1940 * suppressed. If <i>limit</i> is a positive number, at most that number of
1941 * fields will be returned (if <i>limit</i> is <code>1</code>, the entire
1942 * string is returned as the only entry in an array). If negative, there is no
1943 * limit to the number of fields returned, and trailing null fields are not
1944 * suppressed.
1945 *
1946 * " now's the time".split #=> ["now's", "the", "time"]
1947 * " now's the time".split(' ') #=> ["now's", "the", "time"]
1948 * " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
1949 * "hello".split(//) #=> ["h", "e", "l", "l", "o"]
1950 * "hello".split(//, 3) #=> ["h", "e", "llo"]
1951 *
1952 * "mellow yellow".split("ello") #=> ["m", "w y", "w"]
1953 * "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
1954 * "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
1955 * "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
1956 */
1957
1958static mrb_value
1959mrb_str_split_m(mrb_state *mrb, mrb_value str)
1960{
1961 int argc;
1962 mrb_value spat = mrb_nil_value();
1963 enum {awk, string, regexp} split_type = string;
1964 mrb_int i = 0;
1965 mrb_int beg;
1966 mrb_int end;
1967 mrb_int lim = 0;
1968 mrb_bool lim_p;
1969 mrb_value result, tmp;
1970
1971 argc = mrb_get_args(mrb, "|oi", &spat, &lim);
1972 lim_p = (lim > 0 && argc == 2);
1973 if (argc == 2) {
1974 if (lim == 1) {
1975 if (RSTRING_LEN(str) == 0)
1976 return mrb_ary_new_capa(mrb, 0);
1977 return mrb_ary_new_from_values(mrb, 1, &str);
1978 }
1979 i = 1;
1980 }
1981
1982 if (argc == 0 || mrb_nil_p(spat)) {
1983 split_type = awk;
1984 }
1985 else {
1986 if (mrb_string_p(spat)) {
1987 split_type = string;
1988 if (RSTRING_LEN(spat) == 1 && RSTRING_PTR(spat)[0] == ' ') {
1989 split_type = awk;
1990 }
1991 }
1992 else {
1993 mrb_noregexp(mrb, str);
1994 }
1995 }
1996
1997 result = mrb_ary_new(mrb);
1998 beg = 0;
1999 if (split_type == awk) {
2000 mrb_bool skip = TRUE;
2001 mrb_int idx = 0;
2002 mrb_int str_len = RSTRING_LEN(str);
2003 unsigned int c;
2004 int ai = mrb_gc_arena_save(mrb);
2005
2006 idx = end = beg;
2007 while (idx < str_len) {
2008 c = (unsigned char)RSTRING_PTR(str)[idx++];
2009 if (skip) {
2010 if (ISSPACE(c)) {
2011 beg = idx;
2012 }
2013 else {
2014 end = idx;
2015 skip = FALSE;
2016 if (lim_p && lim <= i) break;
2017 }
2018 }
2019 else if (ISSPACE(c)) {
2020 mrb_ary_push(mrb, result, byte_subseq(mrb, str, beg, end-beg));
2021 mrb_gc_arena_restore(mrb, ai);
2022 skip = TRUE;
2023 beg = idx;
2024 if (lim_p) ++i;
2025 }
2026 else {
2027 end = idx;
2028 }
2029 }
2030 }
2031 else if (split_type == string) {
2032 mrb_int str_len = RSTRING_LEN(str);
2033 mrb_int pat_len = RSTRING_LEN(spat);
2034 mrb_int idx = 0;
2035 int ai = mrb_gc_arena_save(mrb);
2036
2037 while (idx < str_len) {
2038 if (pat_len > 0) {
2039 end = mrb_memsearch(RSTRING_PTR(spat), pat_len, RSTRING_PTR(str)+idx, str_len - idx);
2040 if (end < 0) break;
2041 }
2042 else {
2043 end = chars2bytes(str, idx, 1);
2044 }
2045 mrb_ary_push(mrb, result, byte_subseq(mrb, str, idx, end));
2046 mrb_gc_arena_restore(mrb, ai);
2047 idx += end + pat_len;
2048 if (lim_p && lim <= ++i) break;
2049 }
2050 beg = idx;
2051 }
2052 else {
2053 mrb_noregexp(mrb, str);
2054 }
2055 if (RSTRING_LEN(str) > 0 && (lim_p || RSTRING_LEN(str) > beg || lim < 0)) {
2056 if (RSTRING_LEN(str) == beg) {
2057 tmp = mrb_str_new_empty(mrb, str);
2058 }
2059 else {
2060 tmp = byte_subseq(mrb, str, beg, RSTRING_LEN(str)-beg);
2061 }
2062 mrb_ary_push(mrb, result, tmp);
2063 }
2064 if (!lim_p && lim == 0) {
2065 mrb_int len;
2066 while ((len = RARRAY_LEN(result)) > 0 &&
2067 (tmp = RARRAY_PTR(result)[len-1], RSTRING_LEN(tmp) == 0))
2068 mrb_ary_pop(mrb, result);
2069 }
2070
2071 return result;
2072}
2073
2074MRB_API mrb_value
2075mrb_str_len_to_inum(mrb_state *mrb, const char *str, size_t len, int base, int badcheck)
2076{
2077 const char *p = str;
2078 const char *pend = str + len;
2079 char sign = 1;
2080 int c;
2081 uint64_t n = 0;
2082 mrb_int val;
2083
2084#define conv_digit(c) \
2085 (ISDIGIT(c) ? ((c) - '0') : \
2086 ISLOWER(c) ? ((c) - 'a' + 10) : \
2087 ISUPPER(c) ? ((c) - 'A' + 10) : \
2088 -1)
2089
2090 if (!p) {
2091 if (badcheck) goto bad;
2092 return mrb_fixnum_value(0);
2093 }
2094 while (p<pend && ISSPACE(*p))
2095 p++;
2096
2097 if (p[0] == '+') {
2098 p++;
2099 }
2100 else if (p[0] == '-') {
2101 p++;
2102 sign = 0;
2103 }
2104 if (base <= 0) {
2105 if (p[0] == '0') {
2106 switch (p[1]) {
2107 case 'x': case 'X':
2108 base = 16;
2109 break;
2110 case 'b': case 'B':
2111 base = 2;
2112 break;
2113 case 'o': case 'O':
2114 base = 8;
2115 break;
2116 case 'd': case 'D':
2117 base = 10;
2118 break;
2119 default:
2120 base = 8;
2121 break;
2122 }
2123 }
2124 else if (base < -1) {
2125 base = -base;
2126 }
2127 else {
2128 base = 10;
2129 }
2130 }
2131 switch (base) {
2132 case 2:
2133 if (p[0] == '0' && (p[1] == 'b'||p[1] == 'B')) {
2134 p += 2;
2135 }
2136 break;
2137 case 3:
2138 break;
2139 case 8:
2140 if (p[0] == '0' && (p[1] == 'o'||p[1] == 'O')) {
2141 p += 2;
2142 }
2143 case 4: case 5: case 6: case 7:
2144 break;
2145 case 10:
2146 if (p[0] == '0' && (p[1] == 'd'||p[1] == 'D')) {
2147 p += 2;
2148 }
2149 case 9: case 11: case 12: case 13: case 14: case 15:
2150 break;
2151 case 16:
2152 if (p[0] == '0' && (p[1] == 'x'||p[1] == 'X')) {
2153 p += 2;
2154 }
2155 break;
2156 default:
2157 if (base < 2 || 36 < base) {
2158 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
2159 }
2160 break;
2161 } /* end of switch (base) { */
2162 if (p>=pend) {
2163 if (badcheck) goto bad;
2164 return mrb_fixnum_value(0);
2165 }
2166 if (*p == '0') { /* squeeze preceding 0s */
2167 p++;
2168 while (p<pend) {
2169 c = *p++;
2170 if (c == '_') {
2171 if (p<pend && *p == '_') {
2172 if (badcheck) goto bad;
2173 break;
2174 }
2175 continue;
2176 }
2177 if (c != '0') {
2178 p--;
2179 break;
2180 }
2181 }
2182 if (*(p - 1) == '0')
2183 p--;
2184 }
2185 if (p == pend) {
2186 if (badcheck) goto bad;
2187 return mrb_fixnum_value(0);
2188 }
2189 for ( ;p<pend;p++) {
2190 if (*p == '_') {
2191 p++;
2192 if (p==pend) {
2193 if (badcheck) goto bad;
2194 continue;
2195 }
2196 if (*p == '_') {
2197 if (badcheck) goto bad;
2198 break;
2199 }
2200 }
2201 if (badcheck && *p == '\0') {
2202 goto nullbyte;
2203 }
2204 c = conv_digit(*p);
2205 if (c < 0 || c >= base) {
2206 break;
2207 }
2208 n *= base;
2209 n += c;
2210 if (n > (uint64_t)MRB_INT_MAX + (sign ? 0 : 1)) {
2211 mrb_raisef(mrb, E_ARGUMENT_ERROR, "string (%S) too big for integer",
2212 mrb_str_new(mrb, str, pend-str));
2213 }
2214 }
2215 val = (mrb_int)n;
2216 if (badcheck) {
2217 if (p == str) goto bad; /* no number */
2218 while (p<pend && ISSPACE(*p)) p++;
2219 if (p<pend) goto bad; /* trailing garbage */
2220 }
2221
2222 return mrb_fixnum_value(sign ? val : -val);
2223 nullbyte:
2224 mrb_raise(mrb, E_ARGUMENT_ERROR, "string contains null byte");
2225 /* not reached */
2226 bad:
2227 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for number(%S)",
2228 mrb_inspect(mrb, mrb_str_new(mrb, str, pend-str)));
2229 /* not reached */
2230 return mrb_fixnum_value(0);
2231}
2232
2233MRB_API mrb_value
2234mrb_cstr_to_inum(mrb_state *mrb, const char *str, int base, int badcheck)
2235{
2236 return mrb_str_len_to_inum(mrb, str, strlen(str), base, badcheck);
2237}
2238
2239MRB_API const char*
2240mrb_string_value_cstr(mrb_state *mrb, mrb_value *ptr)
2241{
2242 mrb_value str = mrb_str_to_str(mrb, *ptr);
2243 struct RString *ps = mrb_str_ptr(str);
2244 mrb_int len = mrb_str_strlen(mrb, ps);
2245 char *p = RSTR_PTR(ps);
2246
2247 if (!p || p[len] != '\0') {
2248 if (MRB_FROZEN_P(ps)) {
2249 *ptr = str = mrb_str_dup(mrb, str);
2250 ps = mrb_str_ptr(str);
2251 }
2252 mrb_str_modify(mrb, ps);
2253 return RSTR_PTR(ps);
2254 }
2255 return p;
2256}
2257
2258MRB_API mrb_value
2259mrb_str_to_inum(mrb_state *mrb, mrb_value str, mrb_int base, mrb_bool badcheck)
2260{
2261 const char *s;
2262 mrb_int len;
2263
2264 s = mrb_string_value_ptr(mrb, str);
2265 len = RSTRING_LEN(str);
2266 return mrb_str_len_to_inum(mrb, s, len, base, badcheck);
2267}
2268
2269/* 15.2.10.5.38 */
2270/*
2271 * call-seq:
2272 * str.to_i(base=10) => integer
2273 *
2274 * Returns the result of interpreting leading characters in <i>str</i> as an
2275 * integer base <i>base</i> (between 2 and 36). Extraneous characters past the
2276 * end of a valid number are ignored. If there is not a valid number at the
2277 * start of <i>str</i>, <code>0</code> is returned. This method never raises an
2278 * exception.
2279 *
2280 * "12345".to_i #=> 12345
2281 * "99 red balloons".to_i #=> 99
2282 * "0a".to_i #=> 0
2283 * "0a".to_i(16) #=> 10
2284 * "hello".to_i #=> 0
2285 * "1100101".to_i(2) #=> 101
2286 * "1100101".to_i(8) #=> 294977
2287 * "1100101".to_i(10) #=> 1100101
2288 * "1100101".to_i(16) #=> 17826049
2289 */
2290static mrb_value
2291mrb_str_to_i(mrb_state *mrb, mrb_value self)
2292{
2293 mrb_int base = 10;
2294
2295 mrb_get_args(mrb, "|i", &base);
2296 if (base < 0) {
2297 mrb_raisef(mrb, E_ARGUMENT_ERROR, "illegal radix %S", mrb_fixnum_value(base));
2298 }
2299 return mrb_str_to_inum(mrb, self, base, FALSE);
2300}
2301
2302MRB_API double
2303mrb_cstr_to_dbl(mrb_state *mrb, const char * p, mrb_bool badcheck)
2304{
2305 char *end;
2306 char buf[DBL_DIG * 4 + 10];
2307 double d;
2308
2309 enum {max_width = 20};
2310
2311 if (!p) return 0.0;
2312 while (ISSPACE(*p)) p++;
2313
2314 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2315 return 0.0;
2316 }
2317 d = mrb_float_read(p, &end);
2318 if (p == end) {
2319 if (badcheck) {
2320bad:
2321 mrb_raisef(mrb, E_ARGUMENT_ERROR, "invalid string for float(%S)", mrb_str_new_cstr(mrb, p));
2322 /* not reached */
2323 }
2324 return d;
2325 }
2326 if (*end) {
2327 char *n = buf;
2328 char *e = buf + sizeof(buf) - 1;
2329 char prev = 0;
2330
2331 while (p < end && n < e) prev = *n++ = *p++;
2332 while (*p) {
2333 if (*p == '_') {
2334 /* remove underscores between digits */
2335 if (badcheck) {
2336 if (n == buf || !ISDIGIT(prev)) goto bad;
2337 ++p;
2338 if (!ISDIGIT(*p)) goto bad;
2339 }
2340 else {
2341 while (*++p == '_');
2342 continue;
2343 }
2344 }
2345 prev = *p++;
2346 if (n < e) *n++ = prev;
2347 }
2348 *n = '\0';
2349 p = buf;
2350
2351 if (!badcheck && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
2352 return 0.0;
2353 }
2354
2355 d = mrb_float_read(p, &end);
2356 if (badcheck) {
2357 if (!end || p == end) goto bad;
2358 while (*end && ISSPACE(*end)) end++;
2359 if (*end) goto bad;
2360 }
2361 }
2362 return d;
2363}
2364
2365MRB_API double
2366mrb_str_to_dbl(mrb_state *mrb, mrb_value str, mrb_bool badcheck)
2367{
2368 char *s;
2369 mrb_int len;
2370
2371 str = mrb_str_to_str(mrb, str);
2372 s = RSTRING_PTR(str);
2373 len = RSTRING_LEN(str);
2374 if (s) {
2375 if (badcheck && memchr(s, '\0', len)) {
2376 mrb_raise(mrb, E_ARGUMENT_ERROR, "string for Float contains null byte");
2377 }
2378 if (s[len]) { /* no sentinel somehow */
2379 struct RString *temp_str = str_new(mrb, s, len);
2380 s = RSTR_PTR(temp_str);
2381 }
2382 }
2383 return mrb_cstr_to_dbl(mrb, s, badcheck);
2384}
2385
2386/* 15.2.10.5.39 */
2387/*
2388 * call-seq:
2389 * str.to_f => float
2390 *
2391 * Returns the result of interpreting leading characters in <i>str</i> as a
2392 * floating point number. Extraneous characters past the end of a valid number
2393 * are ignored. If there is not a valid number at the start of <i>str</i>,
2394 * <code>0.0</code> is returned. This method never raises an exception.
2395 *
2396 * "123.45e1".to_f #=> 1234.5
2397 * "45.67 degrees".to_f #=> 45.67
2398 * "thx1138".to_f #=> 0.0
2399 */
2400static mrb_value
2401mrb_str_to_f(mrb_state *mrb, mrb_value self)
2402{
2403 return mrb_float_value(mrb, mrb_str_to_dbl(mrb, self, FALSE));
2404}
2405
2406/* 15.2.10.5.40 */
2407/*
2408 * call-seq:
2409 * str.to_s => str
2410 * str.to_str => str
2411 *
2412 * Returns the receiver.
2413 */
2414static mrb_value
2415mrb_str_to_s(mrb_state *mrb, mrb_value self)
2416{
2417 if (mrb_obj_class(mrb, self) != mrb->string_class) {
2418 return mrb_str_dup(mrb, self);
2419 }
2420 return self;
2421}
2422
2423/* 15.2.10.5.43 */
2424/*
2425 * call-seq:
2426 * str.upcase! => str or nil
2427 *
2428 * Upcases the contents of <i>str</i>, returning <code>nil</code> if no changes
2429 * were made.
2430 */
2431static mrb_value
2432mrb_str_upcase_bang(mrb_state *mrb, mrb_value str)
2433{
2434 struct RString *s = mrb_str_ptr(str);
2435 char *p, *pend;
2436 mrb_bool modify = FALSE;
2437
2438 mrb_str_modify(mrb, s);
2439 p = RSTRING_PTR(str);
2440 pend = RSTRING_END(str);
2441 while (p < pend) {
2442 if (ISLOWER(*p)) {
2443 *p = TOUPPER(*p);
2444 modify = TRUE;
2445 }
2446 p++;
2447 }
2448
2449 if (modify) return str;
2450 return mrb_nil_value();
2451}
2452
2453/* 15.2.10.5.42 */
2454/*
2455 * call-seq:
2456 * str.upcase => new_str
2457 *
2458 * Returns a copy of <i>str</i> with all lowercase letters replaced with their
2459 * uppercase counterparts. The operation is locale insensitive---only
2460 * characters 'a' to 'z' are affected.
2461 *
2462 * "hEllO".upcase #=> "HELLO"
2463 */
2464static mrb_value
2465mrb_str_upcase(mrb_state *mrb, mrb_value self)
2466{
2467 mrb_value str;
2468
2469 str = mrb_str_dup(mrb, self);
2470 mrb_str_upcase_bang(mrb, str);
2471 return str;
2472}
2473
2474#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
2475
2476/*
2477 * call-seq:
2478 * str.dump -> new_str
2479 *
2480 * Produces a version of <i>str</i> with all nonprinting characters replaced by
2481 * <code>\nnn</code> notation and all special characters escaped.
2482 */
2483mrb_value
2484mrb_str_dump(mrb_state *mrb, mrb_value str)
2485{
2486 mrb_int len;
2487 const char *p, *pend;
2488 char *q;
2489 struct RString *result;
2490
2491 len = 2; /* "" */
2492 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2493 while (p < pend) {
2494 unsigned char c = *p++;
2495 switch (c) {
2496 case '"': case '\\':
2497 case '\n': case '\r':
2498 case '\t': case '\f':
2499 case '\013': case '\010': case '\007': case '\033':
2500 len += 2;
2501 break;
2502
2503 case '#':
2504 len += IS_EVSTR(p, pend) ? 2 : 1;
2505 break;
2506
2507 default:
2508 if (ISPRINT(c)) {
2509 len++;
2510 }
2511 else {
2512 len += 4; /* \NNN */
2513 }
2514 break;
2515 }
2516 }
2517
2518 result = str_new(mrb, 0, len);
2519 str_with_class(mrb, result, str);
2520 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
2521 q = RSTR_PTR(result);
2522 *q++ = '"';
2523 while (p < pend) {
2524 unsigned char c = *p++;
2525
2526 switch (c) {
2527 case '"':
2528 case '\\':
2529 *q++ = '\\';
2530 *q++ = c;
2531 break;
2532
2533 case '\n':
2534 *q++ = '\\';
2535 *q++ = 'n';
2536 break;
2537
2538 case '\r':
2539 *q++ = '\\';
2540 *q++ = 'r';
2541 break;
2542
2543 case '\t':
2544 *q++ = '\\';
2545 *q++ = 't';
2546 break;
2547
2548 case '\f':
2549 *q++ = '\\';
2550 *q++ = 'f';
2551 break;
2552
2553 case '\013':
2554 *q++ = '\\';
2555 *q++ = 'v';
2556 break;
2557
2558 case '\010':
2559 *q++ = '\\';
2560 *q++ = 'b';
2561 break;
2562
2563 case '\007':
2564 *q++ = '\\';
2565 *q++ = 'a';
2566 break;
2567
2568 case '\033':
2569 *q++ = '\\';
2570 *q++ = 'e';
2571 break;
2572
2573 case '#':
2574 if (IS_EVSTR(p, pend)) *q++ = '\\';
2575 *q++ = '#';
2576 break;
2577
2578 default:
2579 if (ISPRINT(c)) {
2580 *q++ = c;
2581 }
2582 else {
2583 *q++ = '\\';
2584 q[2] = '0' + c % 8; c /= 8;
2585 q[1] = '0' + c % 8; c /= 8;
2586 q[0] = '0' + c % 8;
2587 q += 3;
2588 }
2589 }
2590 }
2591 *q = '"';
2592 return mrb_obj_value(result);
2593}
2594
2595MRB_API mrb_value
2596mrb_str_cat(mrb_state *mrb, mrb_value str, const char *ptr, size_t len)
2597{
2598 str_buf_cat(mrb, mrb_str_ptr(str), ptr, len);
2599 return str;
2600}
2601
2602MRB_API mrb_value
2603mrb_str_cat_cstr(mrb_state *mrb, mrb_value str, const char *ptr)
2604{
2605 return mrb_str_cat(mrb, str, ptr, strlen(ptr));
2606}
2607
2608MRB_API mrb_value
2609mrb_str_cat_str(mrb_state *mrb, mrb_value str, mrb_value str2)
2610{
2611 return mrb_str_cat(mrb, str, RSTRING_PTR(str2), RSTRING_LEN(str2));
2612}
2613
2614MRB_API mrb_value
2615mrb_str_append(mrb_state *mrb, mrb_value str1, mrb_value str2)
2616{
2617 str2 = mrb_str_to_str(mrb, str2);
2618 return mrb_str_cat_str(mrb, str1, str2);
2619}
2620
2621#define CHAR_ESC_LEN 13 /* sizeof(\x{ hex of 32bit unsigned int } \0) */
2622
2623/*
2624 * call-seq:
2625 * str.inspect -> string
2626 *
2627 * Returns a printable version of _str_, surrounded by quote marks,
2628 * with special characters escaped.
2629 *
2630 * str = "hello"
2631 * str[3] = "\b"
2632 * str.inspect #=> "\"hel\\bo\""
2633 */
2634mrb_value
2635mrb_str_inspect(mrb_state *mrb, mrb_value str)
2636{
2637 const char *p, *pend;
2638 char buf[CHAR_ESC_LEN + 1];
2639 mrb_value result = mrb_str_new_lit(mrb, "\"");
2640
2641 p = RSTRING_PTR(str); pend = RSTRING_END(str);
2642 for (;p < pend; p++) {
2643 unsigned char c, cc;
2644#ifdef MRB_UTF8_STRING
2645 mrb_int clen;
2646
2647 clen = utf8len(p, pend);
2648 if (clen > 1) {
2649 mrb_int i;
2650
2651 for (i=0; i<clen; i++) {
2652 buf[i] = p[i];
2653 }
2654 mrb_str_cat(mrb, result, buf, clen);
2655 p += clen-1;
2656 continue;
2657 }
2658#endif
2659 c = *p;
2660 if (c == '"'|| c == '\\' || (c == '#' && IS_EVSTR(p+1, pend))) {
2661 buf[0] = '\\'; buf[1] = c;
2662 mrb_str_cat(mrb, result, buf, 2);
2663 continue;
2664 }
2665 if (ISPRINT(c)) {
2666 buf[0] = c;
2667 mrb_str_cat(mrb, result, buf, 1);
2668 continue;
2669 }
2670 switch (c) {
2671 case '\n': cc = 'n'; break;
2672 case '\r': cc = 'r'; break;
2673 case '\t': cc = 't'; break;
2674 case '\f': cc = 'f'; break;
2675 case '\013': cc = 'v'; break;
2676 case '\010': cc = 'b'; break;
2677 case '\007': cc = 'a'; break;
2678 case 033: cc = 'e'; break;
2679 default: cc = 0; break;
2680 }
2681 if (cc) {
2682 buf[0] = '\\';
2683 buf[1] = (char)cc;
2684 mrb_str_cat(mrb, result, buf, 2);
2685 continue;
2686 }
2687 else {
2688 buf[0] = '\\';
2689 buf[3] = '0' + c % 8; c /= 8;
2690 buf[2] = '0' + c % 8; c /= 8;
2691 buf[1] = '0' + c % 8;
2692 mrb_str_cat(mrb, result, buf, 4);
2693 continue;
2694 }
2695 }
2696 mrb_str_cat_lit(mrb, result, "\"");
2697
2698 return result;
2699}
2700
2701/*
2702 * call-seq:
2703 * str.bytes -> array of fixnums
2704 *
2705 * Returns an array of bytes in _str_.
2706 *
2707 * str = "hello"
2708 * str.bytes #=> [104, 101, 108, 108, 111]
2709 */
2710static mrb_value
2711mrb_str_bytes(mrb_state *mrb, mrb_value str)
2712{
2713 struct RString *s = mrb_str_ptr(str);
2714 mrb_value a = mrb_ary_new_capa(mrb, RSTR_LEN(s));
2715 unsigned char *p = (unsigned char *)(RSTR_PTR(s)), *pend = p + RSTR_LEN(s);
2716
2717 while (p < pend) {
2718 mrb_ary_push(mrb, a, mrb_fixnum_value(p[0]));
2719 p++;
2720 }
2721 return a;
2722}
2723
2724/* ---------------------------*/
2725void
2726mrb_init_string(mrb_state *mrb)
2727{
2728 struct RClass *s;
2729
2730 mrb_static_assert(RSTRING_EMBED_LEN_MAX < (1 << 5), "pointer size too big for embedded string");
2731
2732 mrb->string_class = s = mrb_define_class(mrb, "String", mrb->object_class); /* 15.2.10 */
2733 MRB_SET_INSTANCE_TT(s, MRB_TT_STRING);
2734
2735 mrb_define_method(mrb, s, "bytesize", mrb_str_bytesize, MRB_ARGS_NONE());
2736
2737 mrb_define_method(mrb, s, "<=>", mrb_str_cmp_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.1 */
2738 mrb_define_method(mrb, s, "==", mrb_str_equal_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.2 */
2739 mrb_define_method(mrb, s, "+", mrb_str_plus_m, MRB_ARGS_REQ(1)); /* 15.2.10.5.4 */
2740 mrb_define_method(mrb, s, "*", mrb_str_times, MRB_ARGS_REQ(1)); /* 15.2.10.5.5 */
2741 mrb_define_method(mrb, s, "[]", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.6 */
2742 mrb_define_method(mrb, s, "capitalize", mrb_str_capitalize, MRB_ARGS_NONE()); /* 15.2.10.5.7 */
2743 mrb_define_method(mrb, s, "capitalize!", mrb_str_capitalize_bang, MRB_ARGS_NONE()); /* 15.2.10.5.8 */
2744 mrb_define_method(mrb, s, "chomp", mrb_str_chomp, MRB_ARGS_ANY()); /* 15.2.10.5.9 */
2745 mrb_define_method(mrb, s, "chomp!", mrb_str_chomp_bang, MRB_ARGS_ANY()); /* 15.2.10.5.10 */
2746 mrb_define_method(mrb, s, "chop", mrb_str_chop, MRB_ARGS_NONE()); /* 15.2.10.5.11 */
2747 mrb_define_method(mrb, s, "chop!", mrb_str_chop_bang, MRB_ARGS_NONE()); /* 15.2.10.5.12 */
2748 mrb_define_method(mrb, s, "downcase", mrb_str_downcase, MRB_ARGS_NONE()); /* 15.2.10.5.13 */
2749 mrb_define_method(mrb, s, "downcase!", mrb_str_downcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.14 */
2750 mrb_define_method(mrb, s, "empty?", mrb_str_empty_p, MRB_ARGS_NONE()); /* 15.2.10.5.16 */
2751 mrb_define_method(mrb, s, "eql?", mrb_str_eql, MRB_ARGS_REQ(1)); /* 15.2.10.5.17 */
2752
2753 mrb_define_method(mrb, s, "hash", mrb_str_hash_m, MRB_ARGS_NONE()); /* 15.2.10.5.20 */
2754 mrb_define_method(mrb, s, "include?", mrb_str_include, MRB_ARGS_REQ(1)); /* 15.2.10.5.21 */
2755 mrb_define_method(mrb, s, "index", mrb_str_index, MRB_ARGS_ANY()); /* 15.2.10.5.22 */
2756 mrb_define_method(mrb, s, "initialize", mrb_str_init, MRB_ARGS_REQ(1)); /* 15.2.10.5.23 */
2757 mrb_define_method(mrb, s, "initialize_copy", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.24 */
2758 mrb_define_method(mrb, s, "intern", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.25 */
2759 mrb_define_method(mrb, s, "length", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.26 */
2760 mrb_define_method(mrb, s, "replace", mrb_str_replace, MRB_ARGS_REQ(1)); /* 15.2.10.5.28 */
2761 mrb_define_method(mrb, s, "reverse", mrb_str_reverse, MRB_ARGS_NONE()); /* 15.2.10.5.29 */
2762 mrb_define_method(mrb, s, "reverse!", mrb_str_reverse_bang, MRB_ARGS_NONE()); /* 15.2.10.5.30 */
2763 mrb_define_method(mrb, s, "rindex", mrb_str_rindex, MRB_ARGS_ANY()); /* 15.2.10.5.31 */
2764 mrb_define_method(mrb, s, "size", mrb_str_size, MRB_ARGS_NONE()); /* 15.2.10.5.33 */
2765 mrb_define_method(mrb, s, "slice", mrb_str_aref_m, MRB_ARGS_ANY()); /* 15.2.10.5.34 */
2766 mrb_define_method(mrb, s, "split", mrb_str_split_m, MRB_ARGS_ANY()); /* 15.2.10.5.35 */
2767
2768 mrb_define_method(mrb, s, "to_f", mrb_str_to_f, MRB_ARGS_NONE()); /* 15.2.10.5.38 */
2769 mrb_define_method(mrb, s, "to_i", mrb_str_to_i, MRB_ARGS_ANY()); /* 15.2.10.5.39 */
2770 mrb_define_method(mrb, s, "to_s", mrb_str_to_s, MRB_ARGS_NONE()); /* 15.2.10.5.40 */
2771 mrb_define_method(mrb, s, "to_str", mrb_str_to_s, MRB_ARGS_NONE());
2772 mrb_define_method(mrb, s, "to_sym", mrb_str_intern, MRB_ARGS_NONE()); /* 15.2.10.5.41 */
2773 mrb_define_method(mrb, s, "upcase", mrb_str_upcase, MRB_ARGS_NONE()); /* 15.2.10.5.42 */
2774 mrb_define_method(mrb, s, "upcase!", mrb_str_upcase_bang, MRB_ARGS_NONE()); /* 15.2.10.5.43 */
2775 mrb_define_method(mrb, s, "inspect", mrb_str_inspect, MRB_ARGS_NONE()); /* 15.2.10.5.46(x) */
2776 mrb_define_method(mrb, s, "bytes", mrb_str_bytes, MRB_ARGS_NONE());
2777}
2778
2779/*
2780 * Source code for the "strtod" library procedure.
2781 *
2782 * Copyright (c) 1988-1993 The Regents of the University of California.
2783 * Copyright (c) 1994 Sun Microsystems, Inc.
2784 *
2785 * Permission to use, copy, modify, and distribute this
2786 * software and its documentation for any purpose and without
2787 * fee is hereby granted, provided that the above copyright
2788 * notice appear in all copies. The University of California
2789 * makes no representations about the suitability of this
2790 * software for any purpose. It is provided "as is" without
2791 * express or implied warranty.
2792 *
2793 * RCS: @(#) $Id$
2794 */
2795
2796#include <ctype.h>
2797#include <errno.h>
2798
2799static const int maxExponent = 511; /* Largest possible base 10 exponent. Any
2800 * exponent larger than this will already
2801 * produce underflow or overflow, so there's
2802 * no need to worry about additional digits.
2803 */
2804static const double powersOf10[] = {/* Table giving binary powers of 10. Entry */
2805 10., /* is 10^2^i. Used to convert decimal */
2806 100., /* exponents into floating-point numbers. */
2807 1.0e4,
2808 1.0e8,
2809 1.0e16,
2810 1.0e32,
2811 1.0e64,
2812 1.0e128,
2813 1.0e256
2814};
2815
2816MRB_API double
2817mrb_float_read(const char *string, char **endPtr)
2818/* const char *string; A decimal ASCII floating-point number,
2819 * optionally preceded by white space.
2820 * Must have form "-I.FE-X", where I is the
2821 * integer part of the mantissa, F is the
2822 * fractional part of the mantissa, and X
2823 * is the exponent. Either of the signs
2824 * may be "+", "-", or omitted. Either I
2825 * or F may be omitted, or both. The decimal
2826 * point isn't necessary unless F is present.
2827 * The "E" may actually be an "e". E and X
2828 * may both be omitted (but not just one).
2829 */
2830/* char **endPtr; If non-NULL, store terminating character's
2831 * address here. */
2832{
2833 int sign, expSign = FALSE;
2834 double fraction, dblExp;
2835 const double *d;
2836 register const char *p;
2837 register int c;
2838 int exp = 0; /* Exponent read from "EX" field. */
2839 int fracExp = 0; /* Exponent that derives from the fractional
2840 * part. Under normal circumstatnces, it is
2841 * the negative of the number of digits in F.
2842 * However, if I is very long, the last digits
2843 * of I get dropped (otherwise a long I with a
2844 * large negative exponent could cause an
2845 * unnecessary overflow on I alone). In this
2846 * case, fracExp is incremented one for each
2847 * dropped digit. */
2848 int mantSize; /* Number of digits in mantissa. */
2849 int decPt; /* Number of mantissa digits BEFORE decimal
2850 * point. */
2851 const char *pExp; /* Temporarily holds location of exponent
2852 * in string. */
2853
2854 /*
2855 * Strip off leading blanks and check for a sign.
2856 */
2857
2858 p = string;
2859 while (isspace(*p)) {
2860 p += 1;
2861 }
2862 if (*p == '-') {
2863 sign = TRUE;
2864 p += 1;
2865 }
2866 else {
2867 if (*p == '+') {
2868 p += 1;
2869 }
2870 sign = FALSE;
2871 }
2872
2873 /*
2874 * Count the number of digits in the mantissa (including the decimal
2875 * point), and also locate the decimal point.
2876 */
2877
2878 decPt = -1;
2879 for (mantSize = 0; ; mantSize += 1)
2880 {
2881 c = *p;
2882 if (!isdigit(c)) {
2883 if ((c != '.') || (decPt >= 0)) {
2884 break;
2885 }
2886 decPt = mantSize;
2887 }
2888 p += 1;
2889 }
2890
2891 /*
2892 * Now suck up the digits in the mantissa. Use two integers to
2893 * collect 9 digits each (this is faster than using floating-point).
2894 * If the mantissa has more than 18 digits, ignore the extras, since
2895 * they can't affect the value anyway.
2896 */
2897
2898 pExp = p;
2899 p -= mantSize;
2900 if (decPt < 0) {
2901 decPt = mantSize;
2902 }
2903 else {
2904 mantSize -= 1; /* One of the digits was the point. */
2905 }
2906 if (mantSize > 18) {
2907 if (decPt - 18 > 29999) {
2908 fracExp = 29999;
2909 }
2910 else {
2911 fracExp = decPt - 18;
2912 }
2913 mantSize = 18;
2914 }
2915 else {
2916 fracExp = decPt - mantSize;
2917 }
2918 if (mantSize == 0) {
2919 fraction = 0.0;
2920 p = string;
2921 goto done;
2922 }
2923 else {
2924 int frac1, frac2;
2925 frac1 = 0;
2926 for ( ; mantSize > 9; mantSize -= 1)
2927 {
2928 c = *p;
2929 p += 1;
2930 if (c == '.') {
2931 c = *p;
2932 p += 1;
2933 }
2934 frac1 = 10*frac1 + (c - '0');
2935 }
2936 frac2 = 0;
2937 for (; mantSize > 0; mantSize -= 1)
2938 {
2939 c = *p;
2940 p += 1;
2941 if (c == '.') {
2942 c = *p;
2943 p += 1;
2944 }
2945 frac2 = 10*frac2 + (c - '0');
2946 }
2947 fraction = (1.0e9 * frac1) + frac2;
2948 }
2949
2950 /*
2951 * Skim off the exponent.
2952 */
2953
2954 p = pExp;
2955 if ((*p == 'E') || (*p == 'e')) {
2956 p += 1;
2957 if (*p == '-') {
2958 expSign = TRUE;
2959 p += 1;
2960 }
2961 else {
2962 if (*p == '+') {
2963 p += 1;
2964 }
2965 expSign = FALSE;
2966 }
2967 while (isdigit(*p)) {
2968 exp = exp * 10 + (*p - '0');
2969 if (exp > 19999) {
2970 exp = 19999;
2971 }
2972 p += 1;
2973 }
2974 }
2975 if (expSign) {
2976 exp = fracExp - exp;
2977 }
2978 else {
2979 exp = fracExp + exp;
2980 }
2981
2982 /*
2983 * Generate a floating-point number that represents the exponent.
2984 * Do this by processing the exponent one bit at a time to combine
2985 * many powers of 2 of 10. Then combine the exponent with the
2986 * fraction.
2987 */
2988
2989 if (exp < 0) {
2990 expSign = TRUE;
2991 exp = -exp;
2992 }
2993 else {
2994 expSign = FALSE;
2995 }
2996 if (exp > maxExponent) {
2997 exp = maxExponent;
2998 errno = ERANGE;
2999 }
3000 dblExp = 1.0;
3001 for (d = powersOf10; exp != 0; exp >>= 1, d += 1) {
3002 if (exp & 01) {
3003 dblExp *= *d;
3004 }
3005 }
3006 if (expSign) {
3007 fraction /= dblExp;
3008 }
3009 else {
3010 fraction *= dblExp;
3011 }
3012
3013done:
3014 if (endPtr != NULL) {
3015 *endPtr = (char *) p;
3016 }
3017
3018 if (sign) {
3019 return -fraction;
3020 }
3021 return fraction;
3022}
Note: See TracBrowser for help on using the repository browser.