1 | /*
|
---|
2 | ** symbol.c - Symbol class
|
---|
3 | **
|
---|
4 | ** See Copyright Notice in mruby.h
|
---|
5 | */
|
---|
6 |
|
---|
7 | #include <limits.h>
|
---|
8 | #include <string.h>
|
---|
9 | #include "mruby.h"
|
---|
10 | #include "mruby/khash.h"
|
---|
11 | #include "mruby/string.h"
|
---|
12 | #include "mruby/dump.h"
|
---|
13 |
|
---|
14 | /* ------------------------------------------------------ */
|
---|
15 | typedef struct symbol_name {
|
---|
16 | mrb_bool lit : 1;
|
---|
17 | uint16_t len;
|
---|
18 | const char *name;
|
---|
19 | } symbol_name;
|
---|
20 |
|
---|
21 | static inline khint_t
|
---|
22 | sym_hash_func(mrb_state *mrb, mrb_sym s)
|
---|
23 | {
|
---|
24 | khint_t h = 0;
|
---|
25 | size_t i, len = mrb->symtbl[s].len;
|
---|
26 | const char *p = mrb->symtbl[s].name;
|
---|
27 |
|
---|
28 | for (i=0; i<len; i++) {
|
---|
29 | h = (h << 5) - h + *p++;
|
---|
30 | }
|
---|
31 | return h;
|
---|
32 | }
|
---|
33 | #define sym_hash_equal(mrb,a, b) (mrb->symtbl[a].len == mrb->symtbl[b].len && memcmp(mrb->symtbl[a].name, mrb->symtbl[b].name, mrb->symtbl[a].len) == 0)
|
---|
34 |
|
---|
35 | KHASH_DECLARE(n2s, mrb_sym, mrb_sym, FALSE)
|
---|
36 | KHASH_DEFINE (n2s, mrb_sym, mrb_sym, FALSE, sym_hash_func, sym_hash_equal)
|
---|
37 | /* ------------------------------------------------------ */
|
---|
38 |
|
---|
39 | static void
|
---|
40 | sym_validate_len(mrb_state *mrb, size_t len)
|
---|
41 | {
|
---|
42 | if (len >= RITE_LV_NULL_MARK) {
|
---|
43 | mrb_raise(mrb, E_ARGUMENT_ERROR, "symbol length too long");
|
---|
44 | }
|
---|
45 | }
|
---|
46 |
|
---|
47 | static mrb_sym
|
---|
48 | sym_intern(mrb_state *mrb, const char *name, size_t len, mrb_bool lit)
|
---|
49 | {
|
---|
50 | khash_t(n2s) *h = mrb->name2sym;
|
---|
51 | symbol_name *sname = mrb->symtbl; /* symtbl[0] for working memory */
|
---|
52 | khiter_t k;
|
---|
53 | mrb_sym sym;
|
---|
54 | char *p;
|
---|
55 |
|
---|
56 | sym_validate_len(mrb, len);
|
---|
57 | if (sname) {
|
---|
58 | sname->lit = lit;
|
---|
59 | sname->len = (uint16_t)len;
|
---|
60 | sname->name = name;
|
---|
61 | k = kh_get(n2s, mrb, h, 0);
|
---|
62 | if (k != kh_end(h))
|
---|
63 | return kh_key(h, k);
|
---|
64 | }
|
---|
65 |
|
---|
66 | /* registering a new symbol */
|
---|
67 | sym = ++mrb->symidx;
|
---|
68 | if (mrb->symcapa < sym) {
|
---|
69 | if (mrb->symcapa == 0) mrb->symcapa = 100;
|
---|
70 | else mrb->symcapa = (size_t)(mrb->symcapa * 1.2);
|
---|
71 | mrb->symtbl = (symbol_name*)mrb_realloc(mrb, mrb->symtbl, sizeof(symbol_name)*(mrb->symcapa+1));
|
---|
72 | }
|
---|
73 | sname = &mrb->symtbl[sym];
|
---|
74 | sname->len = (uint16_t)len;
|
---|
75 | if (lit || mrb_ro_data_p(name)) {
|
---|
76 | sname->name = name;
|
---|
77 | sname->lit = TRUE;
|
---|
78 | }
|
---|
79 | else {
|
---|
80 | p = (char *)mrb_malloc(mrb, len+1);
|
---|
81 | memcpy(p, name, len);
|
---|
82 | p[len] = 0;
|
---|
83 | sname->name = (const char*)p;
|
---|
84 | sname->lit = FALSE;
|
---|
85 | }
|
---|
86 | kh_put(n2s, mrb, h, sym);
|
---|
87 |
|
---|
88 | return sym;
|
---|
89 | }
|
---|
90 |
|
---|
91 | MRB_API mrb_sym
|
---|
92 | mrb_intern(mrb_state *mrb, const char *name, size_t len)
|
---|
93 | {
|
---|
94 | return sym_intern(mrb, name, len, FALSE);
|
---|
95 | }
|
---|
96 |
|
---|
97 | MRB_API mrb_sym
|
---|
98 | mrb_intern_static(mrb_state *mrb, const char *name, size_t len)
|
---|
99 | {
|
---|
100 | return sym_intern(mrb, name, len, TRUE);
|
---|
101 | }
|
---|
102 |
|
---|
103 | MRB_API mrb_sym
|
---|
104 | mrb_intern_cstr(mrb_state *mrb, const char *name)
|
---|
105 | {
|
---|
106 | return mrb_intern(mrb, name, strlen(name));
|
---|
107 | }
|
---|
108 |
|
---|
109 | MRB_API mrb_sym
|
---|
110 | mrb_intern_str(mrb_state *mrb, mrb_value str)
|
---|
111 | {
|
---|
112 | return mrb_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
|
---|
113 | }
|
---|
114 |
|
---|
115 | MRB_API mrb_value
|
---|
116 | mrb_check_intern(mrb_state *mrb, const char *name, size_t len)
|
---|
117 | {
|
---|
118 | khash_t(n2s) *h = mrb->name2sym;
|
---|
119 | symbol_name *sname = mrb->symtbl;
|
---|
120 | khiter_t k;
|
---|
121 |
|
---|
122 | sym_validate_len(mrb, len);
|
---|
123 | sname->len = (uint16_t)len;
|
---|
124 | sname->name = name;
|
---|
125 |
|
---|
126 | k = kh_get(n2s, mrb, h, 0);
|
---|
127 | if (k != kh_end(h)) {
|
---|
128 | return mrb_symbol_value(kh_key(h, k));
|
---|
129 | }
|
---|
130 | return mrb_nil_value();
|
---|
131 | }
|
---|
132 |
|
---|
133 | MRB_API mrb_value
|
---|
134 | mrb_check_intern_cstr(mrb_state *mrb, const char *name)
|
---|
135 | {
|
---|
136 | return mrb_check_intern(mrb, name, (mrb_int)strlen(name));
|
---|
137 | }
|
---|
138 |
|
---|
139 | MRB_API mrb_value
|
---|
140 | mrb_check_intern_str(mrb_state *mrb, mrb_value str)
|
---|
141 | {
|
---|
142 | return mrb_check_intern(mrb, RSTRING_PTR(str), RSTRING_LEN(str));
|
---|
143 | }
|
---|
144 |
|
---|
145 | /* lenp must be a pointer to a size_t variable */
|
---|
146 | MRB_API const char*
|
---|
147 | mrb_sym2name_len(mrb_state *mrb, mrb_sym sym, mrb_int *lenp)
|
---|
148 | {
|
---|
149 | if (sym == 0 || mrb->symidx < sym) {
|
---|
150 | if (lenp) *lenp = 0;
|
---|
151 | return NULL;
|
---|
152 | }
|
---|
153 |
|
---|
154 | if (lenp) *lenp = mrb->symtbl[sym].len;
|
---|
155 | return mrb->symtbl[sym].name;
|
---|
156 | }
|
---|
157 |
|
---|
158 | void
|
---|
159 | mrb_free_symtbl(mrb_state *mrb)
|
---|
160 | {
|
---|
161 | mrb_sym i, lim;
|
---|
162 |
|
---|
163 | for (i=1, lim=mrb->symidx+1; i<lim; i++) {
|
---|
164 | if (!mrb->symtbl[i].lit) {
|
---|
165 | mrb_free(mrb, (char*)mrb->symtbl[i].name);
|
---|
166 | }
|
---|
167 | }
|
---|
168 | mrb_free(mrb, mrb->symtbl);
|
---|
169 | kh_destroy(n2s, mrb, mrb->name2sym);
|
---|
170 | }
|
---|
171 |
|
---|
172 | void
|
---|
173 | mrb_init_symtbl(mrb_state *mrb)
|
---|
174 | {
|
---|
175 | mrb->name2sym = kh_init(n2s, mrb);
|
---|
176 | }
|
---|
177 |
|
---|
178 | /**********************************************************************
|
---|
179 | * Document-class: Symbol
|
---|
180 | *
|
---|
181 | * <code>Symbol</code> objects represent names and some strings
|
---|
182 | * inside the Ruby
|
---|
183 | * interpreter. They are generated using the <code>:name</code> and
|
---|
184 | * <code>:"string"</code> literals
|
---|
185 | * syntax, and by the various <code>to_sym</code> methods. The same
|
---|
186 | * <code>Symbol</code> object will be created for a given name or string
|
---|
187 | * for the duration of a program's execution, regardless of the context
|
---|
188 | * or meaning of that name. Thus if <code>Fred</code> is a constant in
|
---|
189 | * one context, a method in another, and a class in a third, the
|
---|
190 | * <code>Symbol</code> <code>:Fred</code> will be the same object in
|
---|
191 | * all three contexts.
|
---|
192 | *
|
---|
193 | * module One
|
---|
194 | * class Fred
|
---|
195 | * end
|
---|
196 | * $f1 = :Fred
|
---|
197 | * end
|
---|
198 | * module Two
|
---|
199 | * Fred = 1
|
---|
200 | * $f2 = :Fred
|
---|
201 | * end
|
---|
202 | * def Fred()
|
---|
203 | * end
|
---|
204 | * $f3 = :Fred
|
---|
205 | * $f1.object_id #=> 2514190
|
---|
206 | * $f2.object_id #=> 2514190
|
---|
207 | * $f3.object_id #=> 2514190
|
---|
208 | *
|
---|
209 | */
|
---|
210 |
|
---|
211 |
|
---|
212 | /* 15.2.11.3.1 */
|
---|
213 | /*
|
---|
214 | * call-seq:
|
---|
215 | * sym == obj -> true or false
|
---|
216 | *
|
---|
217 | * Equality---If <i>sym</i> and <i>obj</i> are exactly the same
|
---|
218 | * symbol, returns <code>true</code>.
|
---|
219 | */
|
---|
220 |
|
---|
221 | static mrb_value
|
---|
222 | sym_equal(mrb_state *mrb, mrb_value sym1)
|
---|
223 | {
|
---|
224 | mrb_value sym2;
|
---|
225 |
|
---|
226 | mrb_get_args(mrb, "o", &sym2);
|
---|
227 |
|
---|
228 | return mrb_bool_value(mrb_obj_equal(mrb, sym1, sym2));
|
---|
229 | }
|
---|
230 |
|
---|
231 | /* 15.2.11.3.2 */
|
---|
232 | /* 15.2.11.3.3 */
|
---|
233 | /*
|
---|
234 | * call-seq:
|
---|
235 | * sym.id2name -> string
|
---|
236 | * sym.to_s -> string
|
---|
237 | *
|
---|
238 | * Returns the name or string corresponding to <i>sym</i>.
|
---|
239 | *
|
---|
240 | * :fred.id2name #=> "fred"
|
---|
241 | */
|
---|
242 | static mrb_value
|
---|
243 | mrb_sym_to_s(mrb_state *mrb, mrb_value sym)
|
---|
244 | {
|
---|
245 | mrb_sym id = mrb_symbol(sym);
|
---|
246 | const char *p;
|
---|
247 | mrb_int len;
|
---|
248 |
|
---|
249 | p = mrb_sym2name_len(mrb, id, &len);
|
---|
250 | return mrb_str_new_static(mrb, p, len);
|
---|
251 | }
|
---|
252 |
|
---|
253 | /* 15.2.11.3.4 */
|
---|
254 | /*
|
---|
255 | * call-seq:
|
---|
256 | * sym.to_sym -> sym
|
---|
257 | * sym.intern -> sym
|
---|
258 | *
|
---|
259 | * In general, <code>to_sym</code> returns the <code>Symbol</code> corresponding
|
---|
260 | * to an object. As <i>sym</i> is already a symbol, <code>self</code> is returned
|
---|
261 | * in this case.
|
---|
262 | */
|
---|
263 |
|
---|
264 | static mrb_value
|
---|
265 | sym_to_sym(mrb_state *mrb, mrb_value sym)
|
---|
266 | {
|
---|
267 | return sym;
|
---|
268 | }
|
---|
269 |
|
---|
270 | /* 15.2.11.3.5(x) */
|
---|
271 | /*
|
---|
272 | * call-seq:
|
---|
273 | * sym.inspect -> string
|
---|
274 | *
|
---|
275 | * Returns the representation of <i>sym</i> as a symbol literal.
|
---|
276 | *
|
---|
277 | * :fred.inspect #=> ":fred"
|
---|
278 | */
|
---|
279 |
|
---|
280 | #if __STDC__
|
---|
281 | # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
|
---|
282 | #else /* not __STDC__ */
|
---|
283 | /* As in Harbison and Steele. */
|
---|
284 | # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
|
---|
285 | #endif
|
---|
286 | #define is_identchar(c) (SIGN_EXTEND_CHAR(c)!=-1&&(ISALNUM(c) || (c) == '_'))
|
---|
287 |
|
---|
288 | static mrb_bool
|
---|
289 | is_special_global_name(const char* m)
|
---|
290 | {
|
---|
291 | switch (*m) {
|
---|
292 | case '~': case '*': case '$': case '?': case '!': case '@':
|
---|
293 | case '/': case '\\': case ';': case ',': case '.': case '=':
|
---|
294 | case ':': case '<': case '>': case '\"':
|
---|
295 | case '&': case '`': case '\'': case '+':
|
---|
296 | case '0':
|
---|
297 | ++m;
|
---|
298 | break;
|
---|
299 | case '-':
|
---|
300 | ++m;
|
---|
301 | if (is_identchar(*m)) m += 1;
|
---|
302 | break;
|
---|
303 | default:
|
---|
304 | if (!ISDIGIT(*m)) return FALSE;
|
---|
305 | do ++m; while (ISDIGIT(*m));
|
---|
306 | break;
|
---|
307 | }
|
---|
308 | return !*m;
|
---|
309 | }
|
---|
310 |
|
---|
311 | static mrb_bool
|
---|
312 | symname_p(const char *name)
|
---|
313 | {
|
---|
314 | const char *m = name;
|
---|
315 | mrb_bool localid = FALSE;
|
---|
316 |
|
---|
317 | if (!m) return FALSE;
|
---|
318 | switch (*m) {
|
---|
319 | case '\0':
|
---|
320 | return FALSE;
|
---|
321 |
|
---|
322 | case '$':
|
---|
323 | if (is_special_global_name(++m)) return TRUE;
|
---|
324 | goto id;
|
---|
325 |
|
---|
326 | case '@':
|
---|
327 | if (*++m == '@') ++m;
|
---|
328 | goto id;
|
---|
329 |
|
---|
330 | case '<':
|
---|
331 | switch (*++m) {
|
---|
332 | case '<': ++m; break;
|
---|
333 | case '=': if (*++m == '>') ++m; break;
|
---|
334 | default: break;
|
---|
335 | }
|
---|
336 | break;
|
---|
337 |
|
---|
338 | case '>':
|
---|
339 | switch (*++m) {
|
---|
340 | case '>': case '=': ++m; break;
|
---|
341 | default: break;
|
---|
342 | }
|
---|
343 | break;
|
---|
344 |
|
---|
345 | case '=':
|
---|
346 | switch (*++m) {
|
---|
347 | case '~': ++m; break;
|
---|
348 | case '=': if (*++m == '=') ++m; break;
|
---|
349 | default: return FALSE;
|
---|
350 | }
|
---|
351 | break;
|
---|
352 |
|
---|
353 | case '*':
|
---|
354 | if (*++m == '*') ++m;
|
---|
355 | break;
|
---|
356 | case '!':
|
---|
357 | if (*++m == '=') ++m;
|
---|
358 | break;
|
---|
359 | case '+': case '-':
|
---|
360 | if (*++m == '@') ++m;
|
---|
361 | break;
|
---|
362 | case '|':
|
---|
363 | if (*++m == '|') ++m;
|
---|
364 | break;
|
---|
365 | case '&':
|
---|
366 | if (*++m == '&') ++m;
|
---|
367 | break;
|
---|
368 |
|
---|
369 | case '^': case '/': case '%': case '~': case '`':
|
---|
370 | ++m;
|
---|
371 | break;
|
---|
372 |
|
---|
373 | case '[':
|
---|
374 | if (*++m != ']') return FALSE;
|
---|
375 | if (*++m == '=') ++m;
|
---|
376 | break;
|
---|
377 |
|
---|
378 | default:
|
---|
379 | localid = !ISUPPER(*m);
|
---|
380 | id:
|
---|
381 | if (*m != '_' && !ISALPHA(*m)) return FALSE;
|
---|
382 | while (is_identchar(*m)) m += 1;
|
---|
383 | if (localid) {
|
---|
384 | switch (*m) {
|
---|
385 | case '!': case '?': case '=': ++m;
|
---|
386 | default: break;
|
---|
387 | }
|
---|
388 | }
|
---|
389 | break;
|
---|
390 | }
|
---|
391 | return *m ? FALSE : TRUE;
|
---|
392 | }
|
---|
393 |
|
---|
394 | static mrb_value
|
---|
395 | sym_inspect(mrb_state *mrb, mrb_value sym)
|
---|
396 | {
|
---|
397 | mrb_value str;
|
---|
398 | const char *name;
|
---|
399 | mrb_int len;
|
---|
400 | mrb_sym id = mrb_symbol(sym);
|
---|
401 | char *sp;
|
---|
402 |
|
---|
403 | name = mrb_sym2name_len(mrb, id, &len);
|
---|
404 | str = mrb_str_new(mrb, 0, len+1);
|
---|
405 | sp = RSTRING_PTR(str);
|
---|
406 | RSTRING_PTR(str)[0] = ':';
|
---|
407 | memcpy(sp+1, name, len);
|
---|
408 | mrb_assert_int_fit(mrb_int, len, size_t, SIZE_MAX);
|
---|
409 | if (!symname_p(name) || strlen(name) != (size_t)len) {
|
---|
410 | str = mrb_str_dump(mrb, str);
|
---|
411 | sp = RSTRING_PTR(str);
|
---|
412 | sp[0] = ':';
|
---|
413 | sp[1] = '"';
|
---|
414 | }
|
---|
415 | return str;
|
---|
416 | }
|
---|
417 |
|
---|
418 | MRB_API mrb_value
|
---|
419 | mrb_sym2str(mrb_state *mrb, mrb_sym sym)
|
---|
420 | {
|
---|
421 | mrb_int len;
|
---|
422 | const char *name = mrb_sym2name_len(mrb, sym, &len);
|
---|
423 |
|
---|
424 | if (!name) return mrb_undef_value(); /* can't happen */
|
---|
425 | return mrb_str_new_static(mrb, name, len);
|
---|
426 | }
|
---|
427 |
|
---|
428 | MRB_API const char*
|
---|
429 | mrb_sym2name(mrb_state *mrb, mrb_sym sym)
|
---|
430 | {
|
---|
431 | mrb_int len;
|
---|
432 | const char *name = mrb_sym2name_len(mrb, sym, &len);
|
---|
433 |
|
---|
434 | if (!name) return NULL;
|
---|
435 | if (symname_p(name) && strlen(name) == (size_t)len) {
|
---|
436 | return name;
|
---|
437 | }
|
---|
438 | else {
|
---|
439 | mrb_value str = mrb_str_dump(mrb, mrb_str_new_static(mrb, name, len));
|
---|
440 | return RSTRING_PTR(str);
|
---|
441 | }
|
---|
442 | }
|
---|
443 |
|
---|
444 | #define lesser(a,b) (((a)>(b))?(b):(a))
|
---|
445 |
|
---|
446 | static mrb_value
|
---|
447 | sym_cmp(mrb_state *mrb, mrb_value s1)
|
---|
448 | {
|
---|
449 | mrb_value s2;
|
---|
450 | mrb_sym sym1, sym2;
|
---|
451 |
|
---|
452 | mrb_get_args(mrb, "o", &s2);
|
---|
453 | if (mrb_type(s2) != MRB_TT_SYMBOL) return mrb_nil_value();
|
---|
454 | sym1 = mrb_symbol(s1);
|
---|
455 | sym2 = mrb_symbol(s2);
|
---|
456 | if (sym1 == sym2) return mrb_fixnum_value(0);
|
---|
457 | else {
|
---|
458 | const char *p1, *p2;
|
---|
459 | int retval;
|
---|
460 | mrb_int len, len1, len2;
|
---|
461 |
|
---|
462 | p1 = mrb_sym2name_len(mrb, sym1, &len1);
|
---|
463 | p2 = mrb_sym2name_len(mrb, sym2, &len2);
|
---|
464 | len = lesser(len1, len2);
|
---|
465 | retval = memcmp(p1, p2, len);
|
---|
466 | if (retval == 0) {
|
---|
467 | if (len1 == len2) return mrb_fixnum_value(0);
|
---|
468 | if (len1 > len2) return mrb_fixnum_value(1);
|
---|
469 | return mrb_fixnum_value(-1);
|
---|
470 | }
|
---|
471 | if (retval > 0) return mrb_fixnum_value(1);
|
---|
472 | return mrb_fixnum_value(-1);
|
---|
473 | }
|
---|
474 | }
|
---|
475 |
|
---|
476 | void
|
---|
477 | mrb_init_symbol(mrb_state *mrb)
|
---|
478 | {
|
---|
479 | struct RClass *sym;
|
---|
480 |
|
---|
481 | mrb->symbol_class = sym = mrb_define_class(mrb, "Symbol", mrb->object_class); /* 15.2.11 */
|
---|
482 |
|
---|
483 | mrb_define_method(mrb, sym, "===", sym_equal, MRB_ARGS_REQ(1)); /* 15.2.11.3.1 */
|
---|
484 | mrb_define_method(mrb, sym, "id2name", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.2 */
|
---|
485 | mrb_define_method(mrb, sym, "to_s", mrb_sym_to_s, MRB_ARGS_NONE()); /* 15.2.11.3.3 */
|
---|
486 | mrb_define_method(mrb, sym, "to_sym", sym_to_sym, MRB_ARGS_NONE()); /* 15.2.11.3.4 */
|
---|
487 | mrb_define_method(mrb, sym, "inspect", sym_inspect, MRB_ARGS_NONE()); /* 15.2.11.3.5(x) */
|
---|
488 | mrb_define_method(mrb, sym, "<=>", sym_cmp, MRB_ARGS_REQ(1));
|
---|
489 | }
|
---|