1 | /*
|
---|
2 | ** symbol.c - Symbol class
|
---|
3 | **
|
---|
4 | ** See Copyright Notice in mruby.h
|
---|
5 | */
|
---|
6 |
|
---|
7 | #include <limits.h>
|
---|
8 | #include <string.h>
|
---|
9 | #include <mruby.h>
|
---|
10 | #include <mruby/khash.h>
|
---|
11 | #include <mruby/string.h>
|
---|
12 | #include <mruby/dump.h>
|
---|
13 | #include <mruby/class.h>
|
---|
14 |
|
---|
15 | /* ------------------------------------------------------ */
|
---|
16 | typedef struct symbol_name {
|
---|
17 | mrb_bool lit : 1;
|
---|
18 | uint16_t len;
|
---|
19 | const char *name;
|
---|
20 | } symbol_name;
|
---|
21 |
|
---|
22 | static inline khint_t
|
---|
23 | sym_hash_func(mrb_state *mrb, mrb_sym s)
|
---|
24 | {
|
---|
25 | khint_t h = 0;
|
---|
26 | size_t i, len = mrb->symtbl[s].len;
|
---|
27 | const char *p = mrb->symtbl[s].name;
|
---|
28 |
|
---|
29 | for (i=0; i<len; i++) {
|
---|
30 | h = (h << 5) - h + *p++;
|
---|
31 | }
|
---|
32 | return h;
|
---|
33 | }
|
---|
34 | #define sym_hash_equal(mrb,a, b) (mrb->symtbl[a].len == mrb->symtbl[b].len && memcmp(mrb->symtbl[a].name, mrb->symtbl[b].name, mrb->symtbl[a].len) == 0)
|
---|
35 |
|
---|
36 | KHASH_DECLARE(n2s, mrb_sym, mrb_sym, FALSE)
|
---|
37 | KHASH_DEFINE (n2s, mrb_sym, mrb_sym, FALSE, sym_hash_func, sym_hash_equal)
|
---|
38 | /* ------------------------------------------------------ */
|
---|
39 |
|
---|
40 | static void
|
---|
41 | sym_validate_len(mrb_state *mrb, size_t len)
|
---|
42 | {
|
---|
43 | if (len >= RITE_LV_NULL_MARK) {
|
---|
44 | mrb_raise(mrb, E_ARGUMENT_ERROR, "symbol length too long");
|
---|
45 | }
|
---|
46 | }
|
---|
47 |
|
---|
48 | static mrb_sym
|
---|
49 | sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit)
|
---|
50 | {
|
---|
51 | khash_t(n2s) *h = mrb->name2sym;
|
---|
52 | symbol_name *sname = mrb->symtbl; /* symtbl[0] for working memory */
|
---|
53 | khiter_t k;
|
---|
54 | mrb_sym sym;
|
---|
55 | char *p;
|
---|
56 |
|
---|
57 | sym_validate_len(mrb, len);
|
---|
58 | if (sname) {
|
---|
59 | sname->lit = lit;
|
---|
60 | sname->len = (uint16_t)len;
|
---|
61 | sname->name = name;
|
---|
62 | k = kh_get(n2s, mrb, h, 0);
|
---|
63 | if (k != kh_end(h))
|
---|
64 | return kh_key(h, k);
|
---|
65 | }
|
---|
66 |
|
---|
67 | /* registering a new symbol */
|
---|
68 | sym = ++mrb->symidx;
|
---|
69 | if (mrb->symcapa < sym) {
|
---|
70 | if (mrb->symcapa == 0) mrb->symcapa = 100;
|
---|
71 | else mrb->symcapa = (size_t)(mrb->symcapa * 1.2);
|
---|
72 | mrb->symtbl = (symbol_name*)mrb_realloc(mrb, mrb->symtbl, sizeof(symbol_name)*(mrb->symcapa+1));
|
---|
73 | }
|
---|
74 | sname = &mrb->symtbl[sym];
|
---|
75 | sname->len = (uint16_t)len;
|
---|
76 | if (lit || mrb_ro_data_p(name)) {
|
---|
77 | sname->name = name;
|
---|
78 | sname->lit = TRUE;
|
---|
79 | }
|
---|
80 | else {
|
---|
81 | p = (char *)mrb_malloc(mrb, len+1);
|
---|
82 | memcpy(p, name, len);
|
---|
83 | p[len] = 0;
|
---|
84 | sname->name = (const char*)p;
|
---|
85 | sname->lit = FALSE;
|
---|
86 | }
|
---|
87 | kh_put(n2s, mrb, h, sym);
|
---|
88 |
|
---|
89 | return sym;
|
---|
90 | }
|
---|
91 |
|
---|
92 | MRB_API mrb_sym
|
---|
93 | mrb_intern(mrb_state *mrb, const char *name, size_t len)
|
---|
94 | {
|
---|
95 | return sym_intern(mrb, name, len, FALSE);
|
---|
96 | }
|
---|
97 |
|
---|
98 | MRB_API mrb_sym
|
---|
99 | mrb_intern_static(mrb_state *mrb, const char *name, size_t len)
|
---|
100 | {
|
---|
101 | return sym_intern(mrb, name, len, TRUE);
|
---|
102 | }
|
---|
103 |
|
---|
104 | MRB_API mrb_sym
|
---|
105 | mrb_intern_cstr(mrb_state *mrb, const char *name)
|
---|
106 | {
|
---|
107 | return mrb_intern(mrb, name, strlen(name));
|
---|
108 | }
|
---|
109 |
|
---|
110 | MRB_API mrb_sym
|
---|
111 | mrb_intern_str(mrb_state *mrb, mrb_value str)
|
---|
112 | {
|
---|
113 | return mrb_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
|
---|
114 | }
|
---|
115 |
|
---|
116 | MRB_API mrb_value
|
---|
117 | mrb_check_intern(mrb_state *mrb, const char *name, size_t len)
|
---|
118 | {
|
---|
119 | khash_t(n2s) *h = mrb->name2sym;
|
---|
120 | symbol_name *sname = mrb->symtbl;
|
---|
121 | khiter_t k;
|
---|
122 |
|
---|
123 | sym_validate_len(mrb, len);
|
---|
124 | sname->len = (uint16_t)len;
|
---|
125 | sname->name = name;
|
---|
126 |
|
---|
127 | k = kh_get(n2s, mrb, h, 0);
|
---|
128 | if (k != kh_end(h)) {
|
---|
129 | return mrb_symbol_value(kh_key(h, k));
|
---|
130 | }
|
---|
131 | return mrb_nil_value();
|
---|
132 | }
|
---|
133 |
|
---|
134 | MRB_API mrb_value
|
---|
135 | mrb_check_intern_cstr(mrb_state *mrb, const char *name)
|
---|
136 | {
|
---|
137 | return mrb_check_intern(mrb, name, (mrb_int)strlen(name));
|
---|
138 | }
|
---|
139 |
|
---|
140 | MRB_API mrb_value
|
---|
141 | mrb_check_intern_str(mrb_state *mrb, mrb_value str)
|
---|
142 | {
|
---|
143 | return mrb_check_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
|
---|
144 | }
|
---|
145 |
|
---|
146 | /* lenp must be a pointer to a size_t variable */
|
---|
147 | MRB_API const char*
|
---|
148 | mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp)
|
---|
149 | {
|
---|
150 | if (sym == 0 || mrb->symidx < sym) {
|
---|
151 | if (lenp) *lenp = 0;
|
---|
152 | return NULL;
|
---|
153 | }
|
---|
154 |
|
---|
155 | if (lenp) *lenp = mrb->symtbl[sym].len;
|
---|
156 | return mrb->symtbl[sym].name;
|
---|
157 | }
|
---|
158 |
|
---|
159 | void
|
---|
160 | mrb_free_symtbl(mrb_state *mrb)
|
---|
161 | {
|
---|
162 | mrb_sym i, lim;
|
---|
163 |
|
---|
164 | for (i=1, lim=mrb->symidx+1; i<lim; i++) {
|
---|
165 | if (!mrb->symtbl[i].lit) {
|
---|
166 | mrb_free(mrb, (char*)mrb->symtbl[i].name);
|
---|
167 | }
|
---|
168 | }
|
---|
169 | mrb_free(mrb, mrb->symtbl);
|
---|
170 | kh_destroy(n2s, mrb, mrb->name2sym);
|
---|
171 | }
|
---|
172 |
|
---|
173 | void
|
---|
174 | mrb_init_symtbl(mrb_state *mrb)
|
---|
175 | {
|
---|
176 | mrb->name2sym = kh_init(n2s, mrb);
|
---|
177 | }
|
---|
178 |
|
---|
179 | /**********************************************************************
|
---|
180 | * Document-class: Symbol
|
---|
181 | *
|
---|
182 | * <code>Symbol</code> objects represent names and some strings
|
---|
183 | * inside the Ruby
|
---|
184 | * interpreter. They are generated using the <code>:name</code> and
|
---|
185 | * <code>:"string"</code> literals
|
---|
186 | * syntax, and by the various <code>to_sym</code> methods. The same
|
---|
187 | * <code>Symbol</code> object will be created for a given name or string
|
---|
188 | * for the duration of a program's execution, regardless of the context
|
---|
189 | * or meaning of that name. Thus if <code>Fred</code> is a constant in
|
---|
190 | * one context, a method in another, and a class in a third, the
|
---|
191 | * <code>Symbol</code> <code>:Fred</code> will be the same object in
|
---|
192 | * all three contexts.
|
---|
193 | *
|
---|
194 | * module One
|
---|
195 | * class Fred
|
---|
196 | * end
|
---|
197 | * $f1 = :Fred
|
---|
198 | * end
|
---|
199 | * module Two
|
---|
200 | * Fred = 1
|
---|
201 | * $f2 = :Fred
|
---|
202 | * end
|
---|
203 | * def Fred()
|
---|
204 | * end
|
---|
205 | * $f3 = :Fred
|
---|
206 | * $f1.object_id #=> 2514190
|
---|
207 | * $f2.object_id #=> 2514190
|
---|
208 | * $f3.object_id #=> 2514190
|
---|
209 | *
|
---|
210 | */
|
---|
211 |
|
---|
212 |
|
---|
213 | /* 15.2.11.3.1 */
|
---|
214 | /*
|
---|
215 | * call-seq:
|
---|
216 | * sym == obj -> true or false
|
---|
217 | *
|
---|
218 | * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
|
---|
219 | * symbol, returns <code>true</code>.
|
---|
220 | */
|
---|
221 |
|
---|
222 | static mrb_value
|
---|
223 | sym_equal(mrb_state *mrb, mrb_value sym1)
|
---|
224 | {
|
---|
225 | mrb_value sym2;
|
---|
226 |
|
---|
227 | mrb_get_args(mrb, "o", &sym2);
|
---|
228 |
|
---|
229 | return mrb_bool_value(mrb_obj_equal(mrb, sym1, sym2));
|
---|
230 | }
|
---|
231 |
|
---|
232 | /* 15.2.11.3.2 */
|
---|
233 | /* 15.2.11.3.3 */
|
---|
234 | /*
|
---|
235 | * call-seq:
|
---|
236 | * sym.id2name -> string
|
---|
237 | * sym.to_s -> string
|
---|
238 | *
|
---|
239 | * Returns the name or string corresponding to <i>sym</i>.
|
---|
240 | *
|
---|
241 | * :fred.id2name #=> "fred"
|
---|
242 | */
|
---|
243 | static mrb_value
|
---|
244 | mrb_sym_to_s(mrb_state *mrb, mrb_value sym)
|
---|
245 | {
|
---|
246 | mrb_sym id = mrb_symbol(sym);
|
---|
247 | const char *p;
|
---|
248 | mrb_int len;
|
---|
249 |
|
---|
250 | p = mrb_sym2name_len(mrb, id, &len);
|
---|
251 | return mrb_str_new_static(mrb, p, len);
|
---|
252 | }
|
---|
253 |
|
---|
254 | /* 15.2.11.3.4 */
|
---|
255 | /*
|
---|
256 | * call-seq:
|
---|
257 | * sym.to_sym -> sym
|
---|
258 | * sym.intern -> sym
|
---|
259 | *
|
---|
260 | * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
|
---|
261 | * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
|
---|
262 | * in this case.
|
---|
263 | */
|
---|
264 |
|
---|
265 | static mrb_value
|
---|
266 | sym_to_sym(mrb_state *mrb, mrb_value sym)
|
---|
267 | {
|
---|
268 | return sym;
|
---|
269 | }
|
---|
270 |
|
---|
271 | /* 15.2.11.3.5(x) */
|
---|
272 | /*
|
---|
273 | * call-seq:
|
---|
274 | * sym.inspect -> string
|
---|
275 | *
|
---|
276 | * Returns the representation of <i>sym</i> as a symbol literal.
|
---|
277 | *
|
---|
278 | * :fred.inspect #=> ":fred"
|
---|
279 | */
|
---|
280 |
|
---|
281 | #if __STDC__
|
---|
282 | # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
|
---|
283 | #else /* not __STDC__ */
|
---|
284 | /* As in Harbison and Steele. */
|
---|
285 | # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
|
---|
286 | #endif
|
---|
287 | #define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
|
---|
288 |
|
---|
289 | static mrb_bool
|
---|
290 | is_special_global_name(const char* m)
|
---|
291 | {
|
---|
292 | switch (*m) {
|
---|
293 | case '~': case '*': case '$': case '?': case '!': case '@':
|
---|
294 | case '/': case '\\': case ';': case ',': case '.': case '=':
|
---|
295 | case ':': case '<': case '>': case '\"':
|
---|
296 | case '&': case '`': case '\'': case '+':
|
---|
297 | case '0':
|
---|
298 | ++m;
|
---|
299 | break;
|
---|
300 | case '-':
|
---|
301 | ++m;
|
---|
302 | if (is_identchar(*m)) m += 1;
|
---|
303 | break;
|
---|
304 | default:
|
---|
305 | if (!ISDIGIT(*m)) return FALSE;
|
---|
306 | do ++m; while (ISDIGIT(*m));
|
---|
307 | break;
|
---|
308 | }
|
---|
309 | return !*m;
|
---|
310 | }
|
---|
311 |
|
---|
312 | static mrb_bool
|
---|
313 | symname_p(const char *name)
|
---|
314 | {
|
---|
315 | const char *m = name;
|
---|
316 | mrb_bool localid = FALSE;
|
---|
317 |
|
---|
318 | if (!m) return FALSE;
|
---|
319 | switch (*m) {
|
---|
320 | case '\0':
|
---|
321 | return FALSE;
|
---|
322 |
|
---|
323 | case '$':
|
---|
324 | if (is_special_global_name(++m)) return TRUE;
|
---|
325 | goto id;
|
---|
326 |
|
---|
327 | case '@':
|
---|
328 | if (*++m == '@') ++m;
|
---|
329 | goto id;
|
---|
330 |
|
---|
331 | case '<':
|
---|
332 | switch (*++m) {
|
---|
333 | case '<': ++m; break;
|
---|
334 | case '=': if (*++m == '>') ++m; break;
|
---|
335 | default: break;
|
---|
336 | }
|
---|
337 | break;
|
---|
338 |
|
---|
339 | case '>':
|
---|
340 | switch (*++m) {
|
---|
341 | case '>': case '=': ++m; break;
|
---|
342 | default: break;
|
---|
343 | }
|
---|
344 | break;
|
---|
345 |
|
---|
346 | case '=':
|
---|
347 | switch (*++m) {
|
---|
348 | case '~': ++m; break;
|
---|
349 | case '=': if (*++m == '=') ++m; break;
|
---|
350 | default: return FALSE;
|
---|
351 | }
|
---|
352 | break;
|
---|
353 |
|
---|
354 | case '*':
|
---|
355 | if (*++m == '*') ++m;
|
---|
356 | break;
|
---|
357 | case '!':
|
---|
358 | switch (*++m) {
|
---|
359 | case '=': case '~': ++m;
|
---|
360 | }
|
---|
361 | break;
|
---|
362 | case '+': case '-':
|
---|
363 | if (*++m == '@') ++m;
|
---|
364 | break;
|
---|
365 | case '|':
|
---|
366 | if (*++m == '|') ++m;
|
---|
367 | break;
|
---|
368 | case '&':
|
---|
369 | if (*++m == '&') ++m;
|
---|
370 | break;
|
---|
371 |
|
---|
372 | case '^': case '/': case '%': case '~': case '`':
|
---|
373 | ++m;
|
---|
374 | break;
|
---|
375 |
|
---|
376 | case '[':
|
---|
377 | if (*++m != ']') return FALSE;
|
---|
378 | if (*++m == '=') ++m;
|
---|
379 | break;
|
---|
380 |
|
---|
381 | default:
|
---|
382 | localid = !ISUPPER(*m);
|
---|
383 | id:
|
---|
384 | if (*m != '_' && !ISALPHA(*m)) return FALSE;
|
---|
385 | while (is_identchar(*m)) m += 1;
|
---|
386 | if (localid) {
|
---|
387 | switch (*m) {
|
---|
388 | case '!': case '?': case '=': ++m;
|
---|
389 | default: break;
|
---|
390 | }
|
---|
391 | }
|
---|
392 | break;
|
---|
393 | }
|
---|
394 | return *m ? FALSE : TRUE;
|
---|
395 | }
|
---|
396 |
|
---|
397 | static mrb_value
|
---|
398 | sym_inspect(mrb_state *mrb, mrb_value sym)
|
---|
399 | {
|
---|
400 | mrb_value str;
|
---|
401 | const char *name;
|
---|
402 | mrb_int len;
|
---|
403 | mrb_sym id = mrb_symbol(sym);
|
---|
404 | char *sp;
|
---|
405 |
|
---|
406 | name = mrb_sym2name_len(mrb, id, &len);
|
---|
407 | str = mrb_str_new(mrb, 0, len+1);
|
---|
408 | sp = RSTRING_PTR(str);
|
---|
409 | RSTRING_PTR(str)[0] = ':';
|
---|
410 | memcpy(sp+1, name, len);
|
---|
411 | mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX);
|
---|
412 | if (!symname_p(name) || strlen(name) != (size_t)len) {
|
---|
413 | str = mrb_str_dump(mrb, str);
|
---|
414 | sp = RSTRING_PTR(str);
|
---|
415 | sp[0] = ':';
|
---|
416 | sp[1] = '"';
|
---|
417 | }
|
---|
418 | return str;
|
---|
419 | }
|
---|
420 |
|
---|
421 | MRB_API mrb_value
|
---|
422 | mrb_sym2str(mrb_state *mrb, mrb_sym sym)
|
---|
423 | {
|
---|
424 | mrb_int len;
|
---|
425 | const char *name = mrb_sym2name_len(mrb, sym, &len);
|
---|
426 |
|
---|
427 | if (!name) return mrb_undef_value(); /* can't happen */
|
---|
428 | return mrb_str_new_static(mrb, name, len);
|
---|
429 | }
|
---|
430 |
|
---|
431 | MRB_API const char*
|
---|
432 | mrb_sym2name(mrb_state *mrb, mrb_sym sym)
|
---|
433 | {
|
---|
434 | mrb_int len;
|
---|
435 | const char *name = mrb_sym2name_len(mrb, sym, &len);
|
---|
436 |
|
---|
437 | if (!name) return NULL;
|
---|
438 | if (symname_p(name) && strlen(name) == (size_t)len) {
|
---|
439 | return name;
|
---|
440 | }
|
---|
441 | else {
|
---|
442 | mrb_value str = mrb_str_dump(mrb, mrb_str_new_static(mrb, name, len));
|
---|
443 | return RSTRING_PTR(str);
|
---|
444 | }
|
---|
445 | }
|
---|
446 |
|
---|
447 | #define lesser(a,b) (((a)>(b))?(b):(a))
|
---|
448 |
|
---|
449 | static mrb_value
|
---|
450 | sym_cmp(mrb_state *mrb, mrb_value s1)
|
---|
451 | {
|
---|
452 | mrb_value s2;
|
---|
453 | mrb_sym sym1, sym2;
|
---|
454 |
|
---|
455 | mrb_get_args(mrb, "o", &s2);
|
---|
456 | if (mrb_type(s2) != MRB_TT_SYMBOL) return mrb_nil_value();
|
---|
457 | sym1 = mrb_symbol(s1);
|
---|
458 | sym2 = mrb_symbol(s2);
|
---|
459 | if (sym1 == sym2) return mrb_fixnum_value(0);
|
---|
460 | else {
|
---|
461 | const char *p1, *p2;
|
---|
462 | int retval;
|
---|
463 | mrb_int len, len1, len2;
|
---|
464 |
|
---|
465 | p1 = mrb_sym2name_len(mrb, sym1, &len1);
|
---|
466 | p2 = mrb_sym2name_len(mrb, sym2, &len2);
|
---|
467 | len = lesser(len1, len2);
|
---|
468 | retval = memcmp(p1, p2, len);
|
---|
469 | if (retval == 0) {
|
---|
470 | if (len1 == len2) return mrb_fixnum_value(0);
|
---|
471 | if (len1 > len2) return mrb_fixnum_value(1);
|
---|
472 | return mrb_fixnum_value(-1);
|
---|
473 | }
|
---|
474 | if (retval > 0) return mrb_fixnum_value(1);
|
---|
475 | return mrb_fixnum_value(-1);
|
---|
476 | }
|
---|
477 | }
|
---|
478 |
|
---|
479 | void
|
---|
480 | mrb_init_symbol(mrb_state *mrb)
|
---|
481 | {
|
---|
482 | struct RClass *sym;
|
---|
483 |
|
---|
484 | mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class); /* 15.2.11 */
|
---|
485 | MRB_SET_INSTANCE_TT(sym, MRB_TT_SYMBOL);
|
---|
486 | mrb_undef_class_method(mrb, sym, "new");
|
---|
487 |
|
---|
488 | mrb_define_method(mrb, sym, "===", sym_equal, MRB_ARGS_REQ(1)); /* 15.2.11.3.1 */
|
---|
489 | mrb_define_method(mrb, sym, "id2name", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.2 */
|
---|
490 | mrb_define_method(mrb, sym, "to_s", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */
|
---|
491 | mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */
|
---|
492 | mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */
|
---|
493 | mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1));
|
---|
494 | }
|
---|