[ruby/etc] bump up to 1.3.1
[ruby-80x24.org.git] / symbol.c
blob5ce95f5b07afd4f7d07e8760f2198d7557bd97cc
1 /**********************************************************************
3 symbol.h -
5 $Author$
6 created at: Tue Jul 8 15:49:54 JST 2014
8 Copyright (C) 2014 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "gc.h"
13 #include "internal.h"
14 #include "internal/error.h"
15 #include "internal/gc.h"
16 #include "internal/hash.h"
17 #include "internal/object.h"
18 #include "internal/symbol.h"
19 #include "internal/vm.h"
20 #include "probes.h"
21 #include "ruby/encoding.h"
22 #include "ruby/st.h"
23 #include "symbol.h"
24 #include "vm_sync.h"
26 #ifndef USE_SYMBOL_GC
27 # define USE_SYMBOL_GC 1
28 #endif
29 #ifndef SYMBOL_DEBUG
30 # define SYMBOL_DEBUG 0
31 #endif
32 #ifndef CHECK_ID_SERIAL
33 # define CHECK_ID_SERIAL SYMBOL_DEBUG
34 #endif
36 #define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
38 #define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT)
40 static ID register_static_symid(ID, const char *, long, rb_encoding *);
41 static ID register_static_symid_str(ID, VALUE);
42 #define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
43 #include "id.c"
45 #define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
47 #define op_tbl_count numberof(op_tbl)
48 STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3);
49 #define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
51 static void
52 Init_op_tbl(void)
54 int i;
55 rb_encoding *const enc = rb_usascii_encoding();
57 for (i = '!'; i <= '~'; ++i) {
58 if (!ISALNUM(i) && i != '_') {
59 char c = (char)i;
60 register_static_symid(i, &c, 1, enc);
63 for (i = 0; i < op_tbl_count; ++i) {
64 register_static_symid(op_tbl[i].token, op_tbl[i].name, op_tbl_len(i), enc);
68 static const int ID_ENTRY_UNIT = 512;
70 enum id_entry_type {
71 ID_ENTRY_STR,
72 ID_ENTRY_SYM,
73 ID_ENTRY_SIZE
76 rb_symbols_t ruby_global_symbols = {tNEXT_ID-1};
78 static const struct st_hash_type symhash = {
79 rb_str_hash_cmp,
80 rb_str_hash,
83 void
84 Init_sym(void)
86 rb_symbols_t *symbols = &ruby_global_symbols;
88 VALUE dsym_fstrs = rb_ident_hash_new();
89 symbols->dsymbol_fstr_hash = dsym_fstrs;
90 rb_gc_register_mark_object(dsym_fstrs);
91 rb_obj_hide(dsym_fstrs);
93 symbols->str_sym = st_init_table_with_size(&symhash, 1000);
94 symbols->ids = rb_ary_tmp_new(0);
95 rb_gc_register_mark_object(symbols->ids);
97 Init_op_tbl();
98 Init_id();
101 WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type));
102 WARN_UNUSED_RESULT(static VALUE dsymbol_check(rb_symbols_t *symbols, const VALUE sym));
103 WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
104 WARN_UNUSED_RESULT(static VALUE lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str));
105 WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str));
106 WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id));
107 WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable));
109 #define GLOBAL_SYMBOLS_ENTER(symbols) rb_symbols_t *symbols = &ruby_global_symbols; RB_VM_LOCK_ENTER()
110 #define GLOBAL_SYMBOLS_LEAVE() RB_VM_LOCK_LEAVE()
113 rb_id_attrset(ID id)
115 VALUE str, sym;
116 int scope;
118 if (!is_notop_id(id)) {
119 switch (id) {
120 case tAREF: case tASET:
121 return tASET; /* only exception */
123 rb_name_error(id, "cannot make operator ID :%"PRIsVALUE" attrset",
124 rb_id2str(id));
126 else {
127 scope = id_type(id);
128 switch (scope) {
129 case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL:
130 case ID_CONST: case ID_CLASS: case ID_JUNK:
131 break;
132 case ID_ATTRSET:
133 return id;
134 default:
136 if ((str = lookup_id_str(id)) != 0) {
137 rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
138 scope, str);
140 else {
141 rb_name_error_str(Qnil, "cannot make unknown type anonymous ID %d:%"PRIxVALUE" attrset",
142 scope, (VALUE)id);
148 /* make new symbol and ID */
149 if (!(str = lookup_id_str(id))) {
150 static const char id_types[][8] = {
151 "local",
152 "instance",
153 "invalid",
154 "global",
155 "attrset",
156 "const",
157 "class",
158 "junk",
160 rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset",
161 (int)sizeof(id_types[0]), id_types[scope], (VALUE)id);
163 str = rb_str_dup(str);
164 rb_str_cat(str, "=", 1);
165 sym = lookup_str_sym(str);
166 id = sym ? rb_sym2id(sym) : intern_str(str, 1);
167 return id;
170 static int
171 is_special_global_name(const char *m, const char *e, rb_encoding *enc)
173 int mb = 0;
175 if (m >= e) return 0;
176 if (is_global_name_punct(*m)) {
177 ++m;
179 else if (*m == '-') {
180 if (++m >= e) return 0;
181 if (is_identchar(m, e, enc)) {
182 if (!ISASCII(*m)) mb = 1;
183 m += rb_enc_mbclen(m, e, enc);
186 else {
187 if (!ISDIGIT(*m)) return 0;
188 do {
189 if (!ISASCII(*m)) mb = 1;
190 ++m;
191 } while (m < e && ISDIGIT(*m));
193 return m == e ? mb + 1 : 0;
197 rb_symname_p(const char *name)
199 return rb_enc_symname_p(name, rb_ascii8bit_encoding());
203 rb_enc_symname_p(const char *name, rb_encoding *enc)
205 return rb_enc_symname2_p(name, strlen(name), enc);
208 static int
209 rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
211 int c, len;
212 const char *end = name + nlen;
214 if (nlen < 1) return FALSE;
215 if (ISASCII(*name)) return ISUPPER(*name);
216 c = rb_enc_precise_mbclen(name, end, enc);
217 if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
218 len = MBCLEN_CHARFOUND_LEN(c);
219 c = rb_enc_mbc_to_codepoint(name, end, enc);
220 if (ONIGENC_IS_UNICODE(enc)) {
221 static int ctype_titlecase = 0;
222 if (rb_enc_isupper(c, enc)) return TRUE;
223 if (rb_enc_islower(c, enc)) return FALSE;
224 if (!ctype_titlecase) {
225 static const UChar cname[] = "titlecaseletter";
226 static const UChar *const end = cname + sizeof(cname) - 1;
227 ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
229 if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
231 else {
232 /* fallback to case-folding */
233 OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
234 const OnigUChar *beg = (const OnigUChar *)name;
235 int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
236 &beg, (const OnigUChar *)end,
237 fold, enc);
238 if (r > 0 && (r != len || memcmp(fold, name, r)))
239 return TRUE;
241 return FALSE;
244 #define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
245 #define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
247 struct enc_synmane_type_leading_chars_tag {
248 const enum { invalid, stophere, needmore, } kind;
249 const enum ruby_id_types type;
250 const long nread;
253 #define t struct enc_synmane_type_leading_chars_tag
255 static struct enc_synmane_type_leading_chars_tag
256 enc_synmane_type_leading_chars(const char *name, long len, rb_encoding *enc, int allowed_attrset)
258 const char *m = name;
259 const char *e = m + len;
261 if (! rb_enc_asciicompat(enc)) {
262 return (t) { invalid, 0, 0, };
264 else if (! m) {
265 return (t) { invalid, 0, 0, };
267 else if ( len <= 0 ) {
268 return (t) { invalid, 0, 0, };
270 switch (*m) {
271 case '\0':
272 return (t) { invalid, 0, 0, };
274 case '$':
275 if (is_special_global_name(++m, e, enc)) {
276 return (t) { stophere, ID_GLOBAL, len, };
278 else {
279 return (t) { needmore, ID_GLOBAL, 1, };
282 case '@':
283 switch (*++m) {
284 default: return (t) { needmore, ID_INSTANCE, 1, };
285 case '@': return (t) { needmore, ID_CLASS, 2, };
288 case '<':
289 switch (*++m) {
290 default: return (t) { stophere, ID_JUNK, 1, };
291 case '<': return (t) { stophere, ID_JUNK, 2, };
292 case '=':
293 switch (*++m) {
294 default: return (t) { stophere, ID_JUNK, 2, };
295 case '>': return (t) { stophere, ID_JUNK, 3, };
299 case '>':
300 switch (*++m) {
301 default: return (t) { stophere, ID_JUNK, 1, };
302 case '>': case '=': return (t) { stophere, ID_JUNK, 2, };
305 case '=':
306 switch (*++m) {
307 default: return (t) { invalid, 0, 1, };
308 case '~': return (t) { stophere, ID_JUNK, 2, };
309 case '=':
310 switch (*++m) {
311 default: return (t) { stophere, ID_JUNK, 2, };
312 case '=': return (t) { stophere, ID_JUNK, 3, };
316 case '*':
317 switch (*++m) {
318 default: return (t) { stophere, ID_JUNK, 1, };
319 case '*': return (t) { stophere, ID_JUNK, 2, };
322 case '+': case '-':
323 switch (*++m) {
324 default: return (t) { stophere, ID_JUNK, 1, };
325 case '@': return (t) { stophere, ID_JUNK, 2, };
328 case '|': case '^': case '&': case '/': case '%': case '~': case '`':
329 return (t) { stophere, ID_JUNK, 1, };
331 case '[':
332 switch (*++m) {
333 default: return (t) { needmore, ID_JUNK, 0, };
334 case ']':
335 switch (*++m) {
336 default: return (t) { stophere, ID_JUNK, 2, };
337 case '=': return (t) { stophere, ID_JUNK, 3, };
341 case '!':
342 switch (*++m) {
343 case '=': case '~': return (t) { stophere, ID_JUNK, 2, };
344 default:
345 if (allowed_attrset & (1U << ID_JUNK)) {
346 return (t) { needmore, ID_JUNK, 1, };
348 else {
349 return (t) { stophere, ID_JUNK, 1, };
353 default:
354 if (rb_sym_constant_char_p(name, len, enc)) {
355 return (t) { needmore, ID_CONST, 0, };
357 else {
358 return (t) { needmore, ID_LOCAL, 0, };
362 #undef t
365 rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int allowed_attrset)
367 const struct enc_synmane_type_leading_chars_tag f =
368 enc_synmane_type_leading_chars(name, len, enc, allowed_attrset);
369 const char *m = name + f.nread;
370 const char *e = name + len;
371 int type = (int)f.type;
373 switch (f.kind) {
374 case invalid: return -1;
375 case stophere: break;
376 case needmore:
378 if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
379 if (len > 1 && *(e-1) == '=') {
380 type = rb_enc_symname_type(name, len-1, enc, allowed_attrset);
381 if (type != ID_ATTRSET) return ID_ATTRSET;
383 return -1;
385 while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
386 if (m >= e) break;
387 switch (*m) {
388 case '!': case '?':
389 if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1;
390 type = ID_JUNK;
391 ++m;
392 if (m + 1 < e || *m != '=') break;
393 /* fall through */
394 case '=':
395 if (!(allowed_attrset & (1U << type))) return -1;
396 type = ID_ATTRSET;
397 ++m;
398 break;
402 return m == e ? type : -1;
406 rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
408 return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
411 static int
412 rb_str_symname_type(VALUE name, unsigned int allowed_attrset)
414 const char *ptr = StringValuePtr(name);
415 long len = RSTRING_LEN(name);
416 int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset);
417 RB_GC_GUARD(name);
418 return type;
421 static void
422 set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym)
424 ASSERT_vm_locking();
425 size_t idx = num / ID_ENTRY_UNIT;
427 VALUE ary, ids = symbols->ids;
428 if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
429 ary = rb_ary_tmp_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE);
430 rb_ary_store(ids, (long)idx, ary);
432 idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
433 rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str);
434 rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym);
437 static VALUE
438 get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t)
440 VALUE result = 0;
442 GLOBAL_SYMBOLS_ENTER(symbols);
444 if (num && num <= symbols->last_id) {
445 size_t idx = num / ID_ENTRY_UNIT;
446 VALUE ids = symbols->ids;
447 VALUE ary;
448 if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
449 long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
450 result = rb_ary_entry(ary, pos + t);
452 if (NIL_P(result)) {
453 result = 0;
455 else {
456 #if CHECK_ID_SERIAL
457 if (id) {
458 VALUE sym = result;
459 if (t != ID_ENTRY_SYM)
460 sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
461 if (STATIC_SYM_P(sym)) {
462 if (STATIC_SYM2ID(sym) != id) result = 0;
464 else {
465 if (RSYMBOL(sym)->id != id) result = 0;
468 #endif
473 GLOBAL_SYMBOLS_LEAVE();
475 return result;
478 static VALUE
479 get_id_entry(ID id, const enum id_entry_type t)
481 return get_id_serial_entry(rb_id_to_serial(id), id, t);
484 static inline ID
485 rb_id_serial_to_id(rb_id_serial_t num)
487 if (is_notop_id((ID)num)) {
488 VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM);
489 if (sym) return SYM2ID(sym);
490 return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM;
492 else {
493 return (ID)num;
497 #if SYMBOL_DEBUG
498 static int
499 register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
501 if (existing) {
502 rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE,
503 (VALUE)*key, (VALUE)*value);
505 *value = arg;
506 return ST_CONTINUE;
508 #endif
510 static void
511 register_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
513 ASSERT_vm_locking();
515 #if SYMBOL_DEBUG
516 st_update(symbols->str_sym, (st_data_t)str,
517 register_sym_update_callback, (st_data_t)sym);
518 #else
519 st_add_direct(symbols->str_sym, (st_data_t)str, (st_data_t)sym);
520 #endif
523 static void
524 unregister_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
526 ASSERT_vm_locking();
528 st_data_t str_data = (st_data_t)str;
529 if (!st_delete(symbols->str_sym, &str_data, NULL)) {
530 rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str));
534 static ID
535 register_static_symid(ID id, const char *name, long len, rb_encoding *enc)
537 VALUE str = rb_enc_str_new(name, len, enc);
538 return register_static_symid_str(id, str);
541 static ID
542 register_static_symid_str(ID id, VALUE str)
544 rb_id_serial_t num = rb_id_to_serial(id);
545 VALUE sym = STATIC_ID2SYM(id);
547 OBJ_FREEZE(str);
548 str = rb_fstring(str);
550 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str));
552 GLOBAL_SYMBOLS_ENTER(symbols)
554 register_sym(symbols, str, sym);
555 set_id_entry(symbols, num, str, sym);
557 GLOBAL_SYMBOLS_LEAVE();
559 return id;
562 static int
563 sym_check_asciionly(VALUE str)
565 if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
566 switch (rb_enc_str_coderange(str)) {
567 case ENC_CODERANGE_BROKEN:
568 rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
569 rb_enc_name(rb_enc_get(str)), str);
570 case ENC_CODERANGE_7BIT:
571 return TRUE;
573 return FALSE;
576 #if 0
578 * _str_ itself will be registered at the global symbol table. _str_
579 * can be modified before the registration, since the encoding will be
580 * set to ASCII-8BIT if it is a special global name.
583 static inline void
584 must_be_dynamic_symbol(VALUE x)
586 if (UNLIKELY(!DYNAMIC_SYM_P(x))) {
587 if (STATIC_SYM_P(x)) {
588 VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT));
590 if (str) {
591 rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str));
593 else {
594 rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x);
597 else {
598 rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x));
602 #endif
604 static VALUE
605 dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type)
607 ASSERT_vm_locking();
609 const VALUE dsym = rb_newobj_of(klass, T_SYMBOL | FL_WB_PROTECTED);
610 long hashval;
612 rb_enc_set_index(dsym, rb_enc_to_index(enc));
613 OBJ_FREEZE(dsym);
614 RB_OBJ_WRITE(dsym, &RSYMBOL(dsym)->fstr, str);
615 RSYMBOL(dsym)->id = type;
617 /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
618 hashval = (long)rb_str_hash(str);
619 RSYMBOL(dsym)->hashval = RSHIFT((long)hashval, 1);
620 register_sym(symbols, str, dsym);
621 rb_hash_aset(symbols->dsymbol_fstr_hash, str, Qtrue);
622 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(RSYMBOL(dsym)->fstr));
624 return dsym;
627 static inline VALUE
628 dsymbol_check(rb_symbols_t *symbols, const VALUE sym)
630 ASSERT_vm_locking();
632 if (UNLIKELY(rb_objspace_garbage_object_p(sym))) {
633 const VALUE fstr = RSYMBOL(sym)->fstr;
634 const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK;
635 RSYMBOL(sym)->fstr = 0;
636 unregister_sym(symbols, fstr, sym);
637 return dsymbol_alloc(symbols, rb_cSymbol, fstr, rb_enc_get(fstr), type);
639 else {
640 return sym;
644 static ID
645 lookup_str_id(VALUE str)
647 st_data_t sym_data;
648 int found;
650 GLOBAL_SYMBOLS_ENTER(symbols);
652 found = st_lookup(symbols->str_sym, (st_data_t)str, &sym_data);
654 GLOBAL_SYMBOLS_LEAVE();
656 if (found) {
657 const VALUE sym = (VALUE)sym_data;
659 if (STATIC_SYM_P(sym)) {
660 return STATIC_SYM2ID(sym);
662 else if (DYNAMIC_SYM_P(sym)) {
663 ID id = RSYMBOL(sym)->id;
664 if (id & ~ID_SCOPE_MASK) return id;
666 else {
667 rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table",
668 rb_builtin_class_name(sym), sym, str);
671 return (ID)0;
674 static VALUE
675 lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str)
677 st_data_t sym_data;
678 if (st_lookup(symbols->str_sym, (st_data_t)str, &sym_data)) {
679 VALUE sym = (VALUE)sym_data;
680 if (DYNAMIC_SYM_P(sym)) {
681 sym = dsymbol_check(symbols, sym);
683 return sym;
685 else {
686 return Qfalse;
690 static VALUE
691 lookup_str_sym(const VALUE str)
693 VALUE sym;
695 GLOBAL_SYMBOLS_ENTER(symbols);
697 sym = lookup_str_sym_with_lock(symbols, str);
699 GLOBAL_SYMBOLS_LEAVE();
701 return sym;
704 static VALUE
705 lookup_id_str(ID id)
707 return get_id_entry(id, ID_ENTRY_STR);
711 rb_intern3(const char *name, long len, rb_encoding *enc)
713 VALUE sym;
714 struct RString fake_str;
715 VALUE str = rb_setup_fake_str(&fake_str, name, len, enc);
716 OBJ_FREEZE(str);
717 sym = lookup_str_sym(str);
718 if (sym) return rb_sym2id(sym);
719 str = rb_enc_str_new(name, len, enc); /* make true string */
720 return intern_str(str, 1);
723 static ID
724 next_id_base_with_lock(rb_symbols_t *symbols)
726 ID id;
727 rb_id_serial_t next_serial = symbols->last_id + 1;
729 if (next_serial == 0) {
730 id = (ID)-1;
732 else {
733 const size_t num = ++symbols->last_id;
734 id = num << ID_SCOPE_SHIFT;
737 return id;
740 static ID
741 next_id_base(void)
743 ID id;
744 GLOBAL_SYMBOLS_ENTER(symbols);
746 id = next_id_base_with_lock(symbols);
748 GLOBAL_SYMBOLS_LEAVE();
749 return id;
752 static ID
753 intern_str(VALUE str, int mutable)
755 ID id;
756 ID nid;
758 id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
759 if (id == (ID)-1) id = ID_JUNK;
760 if (sym_check_asciionly(str)) {
761 if (!mutable) str = rb_str_dup(str);
762 rb_enc_associate(str, rb_usascii_encoding());
764 if ((nid = next_id_base()) == (ID)-1) {
765 str = rb_str_ellipsize(str, 20);
766 rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")",
767 str);
769 id |= nid;
770 id |= ID_STATIC_SYM;
771 return register_static_symid_str(id, str);
775 rb_intern2(const char *name, long len)
777 return rb_intern3(name, len, rb_usascii_encoding());
780 #undef rb_intern
782 rb_intern(const char *name)
784 return rb_intern2(name, strlen(name));
788 rb_intern_str(VALUE str)
790 VALUE sym = lookup_str_sym(str);
792 if (sym) {
793 return SYM2ID(sym);
796 return intern_str(str, 0);
799 void
800 rb_gc_free_dsymbol(VALUE sym)
802 VALUE str = RSYMBOL(sym)->fstr;
804 if (str) {
805 RSYMBOL(sym)->fstr = 0;
807 GLOBAL_SYMBOLS_ENTER(symbols);
809 unregister_sym(symbols, str, sym);
810 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, str);
812 GLOBAL_SYMBOLS_LEAVE();
817 * call-seq:
818 * str.intern -> symbol
819 * str.to_sym -> symbol
821 * Returns the Symbol corresponding to <i>str</i>, creating the
822 * symbol if it did not previously exist. See Symbol#id2name.
824 * "Koala".intern #=> :Koala
825 * s = 'cat'.to_sym #=> :cat
826 * s == :cat #=> true
827 * s = '@cat'.to_sym #=> :@cat
828 * s == :@cat #=> true
830 * This can also be used to create symbols that cannot be represented using the
831 * <code>:xxx</code> notation.
833 * 'cat and dog'.to_sym #=> :"cat and dog"
836 VALUE
837 rb_str_intern(VALUE str)
839 VALUE sym;
840 #if USE_SYMBOL_GC
841 rb_encoding *enc, *ascii;
842 int type;
843 #else
844 ID id;
845 #endif
846 GLOBAL_SYMBOLS_ENTER(symbols);
848 sym = lookup_str_sym_with_lock(symbols, str);
850 if (sym) {
851 // ok
853 else {
854 #if USE_SYMBOL_GC
855 enc = rb_enc_get(str);
856 ascii = rb_usascii_encoding();
857 if (enc != ascii && sym_check_asciionly(str)) {
858 str = rb_str_dup(str);
859 rb_enc_associate(str, ascii);
860 OBJ_FREEZE(str);
861 enc = ascii;
863 else {
864 str = rb_str_dup(str);
865 OBJ_FREEZE(str);
867 str = rb_fstring(str);
868 type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
869 if (type < 0) type = ID_JUNK;
870 sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
871 #else
872 id = intern_str(str, 0);
873 sym = ID2SYM(id);
874 #endif
877 GLOBAL_SYMBOLS_LEAVE();
878 return sym;
882 rb_sym2id(VALUE sym)
884 ID id;
885 if (STATIC_SYM_P(sym)) {
886 id = STATIC_SYM2ID(sym);
888 else if (DYNAMIC_SYM_P(sym)) {
889 GLOBAL_SYMBOLS_ENTER(symbols);
891 sym = dsymbol_check(symbols, sym);
892 id = RSYMBOL(sym)->id;
894 if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) {
895 VALUE fstr = RSYMBOL(sym)->fstr;
896 ID num = next_id_base_with_lock(symbols);
898 RSYMBOL(sym)->id = id |= num;
899 /* make it permanent object */
901 set_id_entry(symbols, rb_id_to_serial(num), fstr, sym);
902 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, fstr);
905 GLOBAL_SYMBOLS_LEAVE();
907 else {
908 rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)",
909 rb_builtin_class_name(sym));
911 return id;
914 #undef rb_id2sym
915 VALUE
916 rb_id2sym(ID x)
918 if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
919 return get_id_entry(x, ID_ENTRY_SYM);
923 VALUE
924 rb_sym2str(VALUE sym)
926 if (DYNAMIC_SYM_P(sym)) {
927 return RSYMBOL(sym)->fstr;
929 else {
930 return rb_id2str(STATIC_SYM2ID(sym));
934 VALUE
935 rb_id2str(ID id)
937 return lookup_id_str(id);
940 const char *
941 rb_id2name(ID id)
943 VALUE str = rb_id2str(id);
945 if (!str) return 0;
946 return RSTRING_PTR(str);
950 rb_make_internal_id(void)
952 return next_id_base() | ID_INTERNAL | ID_STATIC_SYM;
956 rb_make_temporary_id(size_t n)
958 const ID max_id = RB_ID_SERIAL_MAX & ~0xffff;
959 const ID id = max_id - (ID)n;
960 if (id <= ruby_global_symbols.last_id) {
961 rb_raise(rb_eRuntimeError, "too big to make temporary ID: %" PRIdSIZE, n);
963 return (id << ID_SCOPE_SHIFT) | ID_STATIC_SYM | ID_INTERNAL;
966 static int
967 symbols_i(st_data_t key, st_data_t value, st_data_t arg)
969 VALUE ary = (VALUE)arg;
970 VALUE sym = (VALUE)value;
972 if (STATIC_SYM_P(sym)) {
973 rb_ary_push(ary, sym);
974 return ST_CONTINUE;
976 else if (!DYNAMIC_SYM_P(sym)) {
977 rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key));
979 else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) {
980 RSYMBOL(sym)->fstr = 0;
981 return ST_DELETE;
983 else {
984 rb_ary_push(ary, sym);
985 return ST_CONTINUE;
990 VALUE
991 rb_sym_all_symbols(void)
993 VALUE ary;
995 GLOBAL_SYMBOLS_ENTER(symbols);
997 ary = rb_ary_new2(symbols->str_sym->num_entries);
998 st_foreach(symbols->str_sym, symbols_i, ary);
1000 GLOBAL_SYMBOLS_LEAVE();
1002 return ary;
1005 size_t
1006 rb_sym_immortal_count(void)
1008 return (size_t)ruby_global_symbols.last_id;
1012 rb_is_const_id(ID id)
1014 return is_const_id(id);
1018 rb_is_class_id(ID id)
1020 return is_class_id(id);
1024 rb_is_global_id(ID id)
1026 return is_global_id(id);
1030 rb_is_instance_id(ID id)
1032 return is_instance_id(id);
1036 rb_is_attrset_id(ID id)
1038 return is_attrset_id(id);
1042 rb_is_local_id(ID id)
1044 return is_local_id(id);
1048 rb_is_junk_id(ID id)
1050 return is_junk_id(id);
1054 rb_is_const_sym(VALUE sym)
1056 return is_const_sym(sym);
1060 rb_is_attrset_sym(VALUE sym)
1062 return is_attrset_sym(sym);
1066 rb_check_id(volatile VALUE *namep)
1068 VALUE tmp;
1069 VALUE name = *namep;
1071 if (STATIC_SYM_P(name)) {
1072 return STATIC_SYM2ID(name);
1074 else if (DYNAMIC_SYM_P(name)) {
1075 if (SYMBOL_PINNED_P(name)) {
1076 return RSYMBOL(name)->id;
1078 else {
1079 *namep = RSYMBOL(name)->fstr;
1080 return 0;
1083 else if (!RB_TYPE_P(name, T_STRING)) {
1084 tmp = rb_check_string_type(name);
1085 if (NIL_P(tmp)) {
1086 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1087 name);
1089 name = tmp;
1090 *namep = name;
1093 sym_check_asciionly(name);
1095 return lookup_str_id(name);
1098 VALUE
1099 rb_check_symbol(volatile VALUE *namep)
1101 VALUE sym;
1102 VALUE tmp;
1103 VALUE name = *namep;
1105 if (STATIC_SYM_P(name)) {
1106 return name;
1108 else if (DYNAMIC_SYM_P(name)) {
1109 if (!SYMBOL_PINNED_P(name)) {
1110 GLOBAL_SYMBOLS_ENTER(symbols);
1112 name = dsymbol_check(symbols, name);
1114 GLOBAL_SYMBOLS_LEAVE();
1116 *namep = name;
1118 return name;
1120 else if (!RB_TYPE_P(name, T_STRING)) {
1121 tmp = rb_check_string_type(name);
1122 if (NIL_P(tmp)) {
1123 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1124 name);
1126 name = tmp;
1127 *namep = name;
1130 sym_check_asciionly(name);
1132 if ((sym = lookup_str_sym(name)) != 0) {
1133 return sym;
1136 return Qnil;
1140 rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
1142 struct RString fake_str;
1143 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1145 sym_check_asciionly(name);
1147 return lookup_str_id(name);
1150 VALUE
1151 rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
1153 VALUE sym;
1154 struct RString fake_str;
1155 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1157 sym_check_asciionly(name);
1159 if ((sym = lookup_str_sym(name)) != 0) {
1160 return sym;
1163 return Qnil;
1166 #undef rb_sym_intern_ascii_cstr
1167 #ifdef __clang__
1168 NOINLINE(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1169 #else
1170 FUNC_MINIMIZED(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1171 FUNC_MINIMIZED(VALUE rb_sym_intern_ascii(const char *ptr, long len));
1172 FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr));
1173 #endif
1175 VALUE
1176 rb_sym_intern(const char *ptr, long len, rb_encoding *enc)
1178 struct RString fake_str;
1179 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1180 return rb_str_intern(name);
1183 VALUE
1184 rb_sym_intern_ascii(const char *ptr, long len)
1186 return rb_sym_intern(ptr, len, rb_usascii_encoding());
1189 VALUE
1190 rb_sym_intern_ascii_cstr(const char *ptr)
1192 return rb_sym_intern_ascii(ptr, strlen(ptr));
1195 VALUE
1196 rb_to_symbol_type(VALUE obj)
1198 return rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym);
1202 rb_is_const_name(VALUE name)
1204 return rb_str_symname_type(name, 0) == ID_CONST;
1208 rb_is_class_name(VALUE name)
1210 return rb_str_symname_type(name, 0) == ID_CLASS;
1214 rb_is_instance_name(VALUE name)
1216 return rb_str_symname_type(name, 0) == ID_INSTANCE;
1220 rb_is_local_name(VALUE name)
1222 return rb_str_symname_type(name, 0) == ID_LOCAL;
1225 #include "id_table.c"