source: EcnlProtoTool/trunk/mruby-2.1.1/src/symbol.c@ 439

Last change on this file since 439 was 439, checked in by coas-nagasima, 4 years ago

mrubyを2.1.1に更新

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 14.1 KB
Line 
1/*
2** symbol.c - Symbol class
3**
4** See Copyright Notice in mruby.h
5*/
6
7#include <limits.h>
8#include <string.h>
9#include <mruby.h>
10#include <mruby/khash.h>
11#include <mruby/string.h>
12#include <mruby/dump.h>
13#include <mruby/class.h>
14
15/* ------------------------------------------------------ */
16typedef struct symbol_name {
17 mrb_bool lit : 1;
18 uint8_t prev;
19 uint16_t len;
20 const char *name;
21} symbol_name;
22
23#define SYMBOL_INLINE_BIT_POS 1
24#define SYMBOL_INLINE_LOWER_BIT_POS 2
25#define SYMBOL_INLINE (1 << (SYMBOL_INLINE_BIT_POS - 1))
26#define SYMBOL_INLINE_LOWER (1 << (SYMBOL_INLINE_LOWER_BIT_POS - 1))
27#define SYMBOL_NORMAL_SHIFT SYMBOL_INLINE_BIT_POS
28#define SYMBOL_INLINE_SHIFT SYMBOL_INLINE_LOWER_BIT_POS
29#ifdef MRB_ENABLE_ALL_SYMBOLS
30# define SYMBOL_INLINE_P(sym) FALSE
31# define SYMBOL_INLINE_LOWER_P(sym) FALSE
32# define sym_inline_pack(name, len) 0
33# define sym_inline_unpack(sym, buf, lenp) NULL
34#else
35# define SYMBOL_INLINE_P(sym) ((sym) & SYMBOL_INLINE)
36# define SYMBOL_INLINE_LOWER_P(sym) ((sym) & SYMBOL_INLINE_LOWER)
37#endif
38
39static void
40sym_validate_len(mrb_state *mrb, size_t len)
41{
42 if (len >= RITE_LV_NULL_MARK) {
43 mrb_raise(mrb, E_ARGUMENT_ERROR, "symbol length too long");
44 }
45}
46
47#ifndef MRB_ENABLE_ALL_SYMBOLS
48static const char pack_table[] = "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
49
50static mrb_sym
51sym_inline_pack(const char *name, size_t len)
52{
53 const size_t lower_length_max = (MRB_SYMBOL_BIT - 2) / 5;
54 const size_t mix_length_max = (MRB_SYMBOL_BIT - 2) / 6;
55
56 char c;
57 const char *p;
58 size_t i;
59 mrb_sym sym = 0;
60 mrb_bool lower = TRUE;
61
62 if (len > lower_length_max) return 0; /* too long */
63 for (i=0; i<len; i++) {
64 uint32_t bits;
65
66 c = name[i];
67 if (c == 0) return 0; /* NUL in name */
68 p = strchr(pack_table, (int)c);
69 if (p == 0) return 0; /* non alnum char */
70 bits = (uint32_t)(p - pack_table)+1;
71 if (bits > 27) lower = FALSE;
72 if (i >= mix_length_max) break;
73 sym |= bits<<(i*6+SYMBOL_INLINE_SHIFT);
74 }
75 if (lower) {
76 sym = 0;
77 for (i=0; i<len; i++) {
78 uint32_t bits;
79
80 c = name[i];
81 p = strchr(pack_table, (int)c);
82 bits = (uint32_t)(p - pack_table)+1;
83 sym |= bits<<(i*5+SYMBOL_INLINE_SHIFT);
84 }
85 return sym | SYMBOL_INLINE | SYMBOL_INLINE_LOWER;
86 }
87 if (len > mix_length_max) return 0;
88 return sym | SYMBOL_INLINE;
89}
90
91static const char*
92sym_inline_unpack(mrb_sym sym, char *buf, mrb_int *lenp)
93{
94 int bit_per_char = SYMBOL_INLINE_LOWER_P(sym) ? 5 : 6;
95 int i;
96
97 mrb_assert(SYMBOL_INLINE_P(sym));
98
99 for (i=0; i<30/bit_per_char; i++) {
100 uint32_t bits = sym>>(i*bit_per_char+SYMBOL_INLINE_SHIFT) & ((1<<bit_per_char)-1);
101 if (bits == 0) break;
102 buf[i] = pack_table[bits-1];;
103 }
104 buf[i] = '\0';
105 if (lenp) *lenp = i;
106 return buf;
107}
108#endif
109
110static uint8_t
111symhash(const char *key, size_t len)
112{
113 uint32_t hash, i;
114
115 for(hash = i = 0; i < len; ++i) {
116 hash += key[i];
117 hash += (hash << 10);
118 hash ^= (hash >> 6);
119 }
120 hash += (hash << 3);
121 hash ^= (hash >> 11);
122 hash += (hash << 15);
123 return hash & 0xff;
124}
125
126static mrb_sym
127find_symbol(mrb_state *mrb, const char *name, size_t len, uint8_t *hashp)
128{
129 mrb_sym i;
130 symbol_name *sname;
131 uint8_t hash;
132
133 /* inline symbol */
134 i = sym_inline_pack(name, len);
135 if (i > 0) return i;
136
137 hash = symhash(name, len);
138 if (hashp) *hashp = hash;
139
140 i = mrb->symhash[hash];
141 if (i == 0) return 0;
142 do {
143 sname = &mrb->symtbl[i];
144 if (sname->len == len && memcmp(sname->name, name, len) == 0) {
145 return i<<SYMBOL_NORMAL_SHIFT;
146 }
147 if (sname->prev == 0xff) {
148 i -= 0xff;
149 sname = &mrb->symtbl[i];
150 while (mrb->symtbl < sname) {
151 if (sname->len == len && memcmp(sname->name, name, len) == 0) {
152 return (mrb_sym)(sname - mrb->symtbl)<<SYMBOL_NORMAL_SHIFT;
153 }
154 sname--;
155 }
156 return 0;
157 }
158 i -= sname->prev;
159 } while (sname->prev > 0);
160 return 0;
161}
162
163static mrb_sym
164sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit)
165{
166 mrb_sym sym;
167 symbol_name *sname;
168 uint8_t hash;
169
170 sym_validate_len(mrb, len);
171 sym = find_symbol(mrb, name, len, &hash);
172 if (sym > 0) return sym;
173
174 /* registering a new symbol */
175 sym = ++mrb->symidx;
176 if (mrb->symcapa < sym) {
177 if (mrb->symcapa == 0) mrb->symcapa = 100;
178 else mrb->symcapa = (size_t)(mrb->symcapa * 6 / 5);
179 mrb->symtbl = (symbol_name*)mrb_realloc(mrb, mrb->symtbl, sizeof(symbol_name)*(mrb->symcapa+1));
180 }
181 sname = &mrb->symtbl[sym];
182 sname->len = (uint16_t)len;
183 if (lit || mrb_ro_data_p(name)) {
184 sname->name = name;
185 sname->lit = TRUE;
186 }
187 else {
188 char *p = (char *)mrb_malloc(mrb, len+1);
189 memcpy(p, name, len);
190 p[len] = 0;
191 sname->name = (const char*)p;
192 sname->lit = FALSE;
193 }
194 if (mrb->symhash[hash]) {
195 mrb_sym i = sym - mrb->symhash[hash];
196 if (i > 0xff)
197 sname->prev = 0xff;
198 else
199 sname->prev = i;
200 }
201 else {
202 sname->prev = 0;
203 }
204 mrb->symhash[hash] = sym;
205
206 return sym<<SYMBOL_NORMAL_SHIFT;
207}
208
209MRB_API mrb_sym
210mrb_intern(mrb_state *mrb, const char *name, size_t len)
211{
212 return sym_intern(mrb, name, len, FALSE);
213}
214
215MRB_API mrb_sym
216mrb_intern_static(mrb_state *mrb, const char *name, size_t len)
217{
218 return sym_intern(mrb, name, len, TRUE);
219}
220
221MRB_API mrb_sym
222mrb_intern_cstr(mrb_state *mrb, const char *name)
223{
224 return mrb_intern(mrb, name, strlen(name));
225}
226
227MRB_API mrb_sym
228mrb_intern_str(mrb_state *mrb, mrb_value str)
229{
230 return mrb_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
231}
232
233MRB_API mrb_value
234mrb_check_intern(mrb_state *mrb, const char *name, size_t len)
235{
236 mrb_sym sym;
237
238 sym_validate_len(mrb, len);
239 sym = find_symbol(mrb, name, len, NULL);
240 if (sym > 0) return mrb_symbol_value(sym);
241 return mrb_nil_value();
242}
243
244MRB_API mrb_value
245mrb_check_intern_cstr(mrb_state *mrb, const char *name)
246{
247 return mrb_check_intern(mrb, name, strlen(name));
248}
249
250MRB_API mrb_value
251mrb_check_intern_str(mrb_state *mrb, mrb_value str)
252{
253 return mrb_check_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
254}
255
256static const char*
257sym2name_len(mrb_state *mrb, mrb_sym sym, char *buf, mrb_int *lenp)
258{
259 if (SYMBOL_INLINE_P(sym)) return sym_inline_unpack(sym, buf, lenp);
260
261 sym >>= SYMBOL_NORMAL_SHIFT;
262 if (sym == 0 || mrb->symidx < sym) {
263 if (lenp) *lenp = 0;
264 return NULL;
265 }
266
267 if (lenp) *lenp = mrb->symtbl[sym].len;
268 return mrb->symtbl[sym].name;
269}
270
271MRB_API const char*
272mrb_sym_name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp)
273{
274 return sym2name_len(mrb, sym, mrb->symbuf, lenp);
275}
276
277void
278mrb_free_symtbl(mrb_state *mrb)
279{
280 mrb_sym i, lim;
281
282 for (i=1, lim=mrb->symidx+1; i<lim; i++) {
283 if (!mrb->symtbl[i].lit) {
284 mrb_free(mrb, (char*)mrb->symtbl[i].name);
285 }
286 }
287 mrb_free(mrb, mrb->symtbl);
288}
289
290void
291mrb_init_symtbl(mrb_state *mrb)
292{
293}
294
295/**********************************************************************
296 * Document-class: Symbol
297 *
298 * <code>Symbol</code> objects represent names and some strings
299 * inside the Ruby
300 * interpreter. They are generated using the <code>:name</code> and
301 * <code>:"string"</code> literals
302 * syntax, and by the various <code>to_sym</code> methods. The same
303 * <code>Symbol</code> object will be created for a given name or string
304 * for the duration of a program's execution, regardless of the context
305 * or meaning of that name. Thus if <code>Fred</code> is a constant in
306 * one context, a method in another, and a class in a third, the
307 * <code>Symbol</code> <code>:Fred</code> will be the same object in
308 * all three contexts.
309 *
310 * module One
311 * class Fred
312 * end
313 * $f1 = :Fred
314 * end
315 * module Two
316 * Fred = 1
317 * $f2 = :Fred
318 * end
319 * def Fred()
320 * end
321 * $f3 = :Fred
322 * $f1.object_id #=> 2514190
323 * $f2.object_id #=> 2514190
324 * $f3.object_id #=> 2514190
325 *
326 */
327
328/* 15.2.11.3.2 */
329/* 15.2.11.3.3 */
330/*
331 * call-seq:
332 * sym.id2name -> string
333 * sym.to_s -> string
334 *
335 * Returns the name or string corresponding to <i>sym</i>.
336 *
337 * :fred.id2name #=> "fred"
338 */
339static mrb_value
340sym_to_s(mrb_state *mrb, mrb_value sym)
341{
342 return mrb_sym_str(mrb, mrb_symbol(sym));
343}
344
345/* 15.2.11.3.4 */
346/*
347 * call-seq:
348 * sym.to_sym -> sym
349 * sym.intern -> sym
350 *
351 * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
352 * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
353 * in this case.
354 */
355
356static mrb_value
357sym_to_sym(mrb_state *mrb, mrb_value sym)
358{
359 return sym;
360}
361
362/* 15.2.11.3.5(x) */
363/*
364 * call-seq:
365 * sym.inspect -> string
366 *
367 * Returns the representation of <i>sym</i> as a symbol literal.
368 *
369 * :fred.inspect #=> ":fred"
370 */
371
372#if __STDC__
373# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
374#else /* not __STDC__ */
375/* As in Harbison and Steele. */
376# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
377#endif
378#define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
379
380static mrb_bool
381is_special_global_name(const char* m)
382{
383 switch (*m) {
384 case '~': case '*': case '$': case '?': case '!': case '@':
385 case '/': case '\\': case ';': case ',': case '.': case '=':
386 case ':': case '<': case '>': case '\"':
387 case '&': case '`': case '\'': case '+':
388 case '0':
389 ++m;
390 break;
391 case '-':
392 ++m;
393 if (is_identchar(*m)) m += 1;
394 break;
395 default:
396 if (!ISDIGIT(*m)) return FALSE;
397 do ++m; while (ISDIGIT(*m));
398 break;
399 }
400 return !*m;
401}
402
403static mrb_bool
404symname_p(const char *name)
405{
406 const char *m = name;
407 mrb_bool localid = FALSE;
408
409 if (!m) return FALSE;
410 switch (*m) {
411 case '\0':
412 return FALSE;
413
414 case '$':
415 if (is_special_global_name(++m)) return TRUE;
416 goto id;
417
418 case '@':
419 if (*++m == '@') ++m;
420 goto id;
421
422 case '<':
423 switch (*++m) {
424 case '<': ++m; break;
425 case '=': if (*++m == '>') ++m; break;
426 default: break;
427 }
428 break;
429
430 case '>':
431 switch (*++m) {
432 case '>': case '=': ++m; break;
433 default: break;
434 }
435 break;
436
437 case '=':
438 switch (*++m) {
439 case '~': ++m; break;
440 case '=': if (*++m == '=') ++m; break;
441 default: return FALSE;
442 }
443 break;
444
445 case '*':
446 if (*++m == '*') ++m;
447 break;
448 case '!':
449 switch (*++m) {
450 case '=': case '~': ++m;
451 }
452 break;
453 case '+': case '-':
454 if (*++m == '@') ++m;
455 break;
456 case '|':
457 if (*++m == '|') ++m;
458 break;
459 case '&':
460 if (*++m == '&') ++m;
461 break;
462
463 case '^': case '/': case '%': case '~': case '`':
464 ++m;
465 break;
466
467 case '[':
468 if (*++m != ']') return FALSE;
469 if (*++m == '=') ++m;
470 break;
471
472 default:
473 localid = !ISUPPER(*m);
474id:
475 if (*m != '_' && !ISALPHA(*m)) return FALSE;
476 while (is_identchar(*m)) m += 1;
477 if (localid) {
478 switch (*m) {
479 case '!': case '?': case '=': ++m;
480 default: break;
481 }
482 }
483 break;
484 }
485 return *m ? FALSE : TRUE;
486}
487
488static mrb_value
489sym_inspect(mrb_state *mrb, mrb_value sym)
490{
491 mrb_value str;
492 const char *name;
493 mrb_int len;
494 mrb_sym id = mrb_symbol(sym);
495 char *sp;
496
497 name = mrb_sym_name_len(mrb, id, &len);
498 str = mrb_str_new(mrb, 0, len+1);
499 sp = RSTRING_PTR(str);
500 sp[0] = ':';
501 memcpy(sp+1, name, len);
502 mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX);
503 if (!symname_p(name) || strlen(name) != (size_t)len) {
504 str = mrb_str_inspect(mrb, str);
505 sp = RSTRING_PTR(str);
506 sp[0] = ':';
507 sp[1] = '"';
508 }
509#ifdef MRB_UTF8_STRING
510 if (SYMBOL_INLINE_P(id)) RSTR_SET_ASCII_FLAG(mrb_str_ptr(str));
511#endif
512 return str;
513}
514
515MRB_API mrb_value
516mrb_sym_str(mrb_state *mrb, mrb_sym sym)
517{
518 mrb_int len;
519 const char *name = mrb_sym_name_len(mrb, sym, &len);
520
521 if (!name) return mrb_undef_value(); /* can't happen */
522 if (SYMBOL_INLINE_P(sym)) {
523 mrb_value str = mrb_str_new(mrb, name, len);
524 RSTR_SET_ASCII_FLAG(mrb_str_ptr(str));
525 return str;
526 }
527 return mrb_str_new_static(mrb, name, len);
528}
529
530static const char*
531sym_name(mrb_state *mrb, mrb_sym sym, mrb_bool dump)
532{
533 mrb_int len;
534 const char *name = mrb_sym_name_len(mrb, sym, &len);
535
536 if (!name) return NULL;
537 if (strlen(name) == (size_t)len && (!dump || symname_p(name))) {
538 return name;
539 }
540 else {
541 mrb_value str = SYMBOL_INLINE_P(sym) ?
542 mrb_str_new(mrb, name, len) : mrb_str_new_static(mrb, name, len);
543 str = mrb_str_dump(mrb, str);
544 return RSTRING_PTR(str);
545 }
546}
547
548MRB_API const char*
549mrb_sym_name(mrb_state *mrb, mrb_sym sym)
550{
551 return sym_name(mrb, sym, FALSE);
552}
553
554MRB_API const char*
555mrb_sym_dump(mrb_state *mrb, mrb_sym sym)
556{
557 return sym_name(mrb, sym, TRUE);
558}
559
560#define lesser(a,b) (((a)>(b))?(b):(a))
561
562static mrb_value
563sym_cmp(mrb_state *mrb, mrb_value s1)
564{
565 mrb_value s2;
566 mrb_sym sym1, sym2;
567
568 mrb_get_args(mrb, "o", &s2);
569 if (!mrb_symbol_p(s2)) return mrb_nil_value();
570 sym1 = mrb_symbol(s1);
571 sym2 = mrb_symbol(s2);
572 if (sym1 == sym2) return mrb_fixnum_value(0);
573 else {
574 const char *p1, *p2;
575 int retval;
576 mrb_int len, len1, len2;
577 char buf1[8], buf2[8];
578
579 p1 = sym2name_len(mrb, sym1, buf1, &len1);
580 p2 = sym2name_len(mrb, sym2, buf2, &len2);
581 len = lesser(len1, len2);
582 retval = memcmp(p1, p2, len);
583 if (retval == 0) {
584 if (len1 == len2) return mrb_fixnum_value(0);
585 if (len1 > len2) return mrb_fixnum_value(1);
586 return mrb_fixnum_value(-1);
587 }
588 if (retval > 0) return mrb_fixnum_value(1);
589 return mrb_fixnum_value(-1);
590 }
591}
592
593void
594mrb_init_symbol(mrb_state *mrb)
595{
596 struct RClass *sym;
597
598 mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class); /* 15.2.11 */
599 MRB_SET_INSTANCE_TT(sym, MRB_TT_SYMBOL);
600 mrb_undef_class_method(mrb, sym, "new");
601
602 mrb_define_method(mrb, sym, "id2name", sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.2 */
603 mrb_define_method(mrb, sym, "to_s", sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */
604 mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */
605 mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */
606 mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1));
607}
Note: See TracBrowser for help on using the repository browser.