[rubygems/rubygems] Use a constant empty tar header to avoid extra allocations
[ruby.git] / symbol.c
blob7126154bf8e16d495d7e649b5dc9b7ccdbf54625
1 /**********************************************************************
3 symbol.h -
5 $Author$
6 created at: Tue Jul 8 15:49:54 JST 2014
8 Copyright (C) 2014 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "internal.h"
13 #include "internal/error.h"
14 #include "internal/gc.h"
15 #include "internal/hash.h"
16 #include "internal/object.h"
17 #include "internal/symbol.h"
18 #include "internal/vm.h"
19 #include "probes.h"
20 #include "ruby/encoding.h"
21 #include "ruby/st.h"
22 #include "symbol.h"
23 #include "vm_sync.h"
24 #include "builtin.h"
26 #if defined(USE_SYMBOL_GC) && !(USE_SYMBOL_GC+0)
27 # undef USE_SYMBOL_GC
28 # define USE_SYMBOL_GC 0
29 #else
30 # undef USE_SYMBOL_GC
31 # define USE_SYMBOL_GC 1
32 #endif
33 #if defined(SYMBOL_DEBUG) && (SYMBOL_DEBUG+0)
34 # undef SYMBOL_DEBUG
35 # define SYMBOL_DEBUG 1
36 #else
37 # undef SYMBOL_DEBUG
38 # define SYMBOL_DEBUG 0
39 #endif
40 #ifndef CHECK_ID_SERIAL
41 # define CHECK_ID_SERIAL SYMBOL_DEBUG
42 #endif
44 #define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
46 #define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT)
48 static ID register_static_symid(ID, const char *, long, rb_encoding *);
49 static ID register_static_symid_str(ID, VALUE);
50 #define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
51 #include "id.c"
53 #define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
55 #define op_tbl_count numberof(op_tbl)
56 STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3);
57 #define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
59 static void
60 Init_op_tbl(void)
62 int i;
63 rb_encoding *const enc = rb_usascii_encoding();
65 for (i = '!'; i <= '~'; ++i) {
66 if (!ISALNUM(i) && i != '_') {
67 char c = (char)i;
68 register_static_symid(i, &c, 1, enc);
71 for (i = 0; i < op_tbl_count; ++i) {
72 register_static_symid(op_tbl[i].token, op_tbl[i].name, op_tbl_len(i), enc);
76 static const int ID_ENTRY_UNIT = 512;
78 enum id_entry_type {
79 ID_ENTRY_STR,
80 ID_ENTRY_SYM,
81 ID_ENTRY_SIZE
84 rb_symbols_t ruby_global_symbols = {tNEXT_ID-1};
86 static const struct st_hash_type symhash = {
87 rb_str_hash_cmp,
88 rb_str_hash,
91 void
92 Init_sym(void)
94 rb_symbols_t *symbols = &ruby_global_symbols;
96 VALUE dsym_fstrs = rb_ident_hash_new();
97 symbols->dsymbol_fstr_hash = dsym_fstrs;
98 rb_vm_register_global_object(dsym_fstrs);
99 rb_obj_hide(dsym_fstrs);
101 symbols->str_sym = st_init_table_with_size(&symhash, 1000);
102 symbols->ids = rb_ary_hidden_new(0);
103 rb_vm_register_global_object(symbols->ids);
105 Init_op_tbl();
106 Init_id();
109 WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type));
110 WARN_UNUSED_RESULT(static VALUE dsymbol_check(rb_symbols_t *symbols, const VALUE sym));
111 WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
112 WARN_UNUSED_RESULT(static VALUE lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str));
113 WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str));
114 WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id));
115 WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable));
117 #define GLOBAL_SYMBOLS_ENTER(symbols) rb_symbols_t *symbols = &ruby_global_symbols; RB_VM_LOCK_ENTER()
118 #define GLOBAL_SYMBOLS_LEAVE() RB_VM_LOCK_LEAVE()
121 rb_id_attrset(ID id)
123 VALUE str, sym;
124 int scope;
126 if (!is_notop_id(id)) {
127 switch (id) {
128 case tAREF: case tASET:
129 return tASET; /* only exception */
131 rb_name_error(id, "cannot make operator ID :%"PRIsVALUE" attrset",
132 rb_id2str(id));
134 else {
135 scope = id_type(id);
136 switch (scope) {
137 case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL:
138 case ID_CONST: case ID_CLASS: case ID_JUNK:
139 break;
140 case ID_ATTRSET:
141 return id;
142 default:
144 if ((str = lookup_id_str(id)) != 0) {
145 rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
146 scope, str);
148 else {
149 rb_name_error_str(Qnil, "cannot make unknown type anonymous ID %d:%"PRIxVALUE" attrset",
150 scope, (VALUE)id);
156 /* make new symbol and ID */
157 if (!(str = lookup_id_str(id))) {
158 static const char id_types[][8] = {
159 "local",
160 "instance",
161 "invalid",
162 "global",
163 "attrset",
164 "const",
165 "class",
166 "junk",
168 rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset",
169 (int)sizeof(id_types[0]), id_types[scope], (VALUE)id);
171 str = rb_str_dup(str);
172 rb_str_cat(str, "=", 1);
173 sym = lookup_str_sym(str);
174 id = sym ? rb_sym2id(sym) : intern_str(str, 1);
175 return id;
178 static int
179 is_special_global_name(const char *m, const char *e, rb_encoding *enc)
181 int mb = 0;
183 if (m >= e) return 0;
184 if (is_global_name_punct(*m)) {
185 ++m;
187 else if (*m == '-') {
188 if (++m >= e) return 0;
189 if (is_identchar(m, e, enc)) {
190 if (!ISASCII(*m)) mb = 1;
191 m += rb_enc_mbclen(m, e, enc);
194 else {
195 if (!ISDIGIT(*m)) return 0;
196 do {
197 if (!ISASCII(*m)) mb = 1;
198 ++m;
199 } while (m < e && ISDIGIT(*m));
201 return m == e ? mb + 1 : 0;
205 rb_symname_p(const char *name)
207 return rb_enc_symname_p(name, rb_ascii8bit_encoding());
211 rb_enc_symname_p(const char *name, rb_encoding *enc)
213 return rb_enc_symname2_p(name, strlen(name), enc);
216 static int
217 rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
219 int c, len;
220 const char *end = name + nlen;
222 if (nlen < 1) return FALSE;
223 if (ISASCII(*name)) return ISUPPER(*name);
224 c = rb_enc_precise_mbclen(name, end, enc);
225 if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
226 len = MBCLEN_CHARFOUND_LEN(c);
227 c = rb_enc_mbc_to_codepoint(name, end, enc);
228 if (rb_enc_isupper(c, enc)) return TRUE;
229 if (rb_enc_islower(c, enc)) return FALSE;
230 if (ONIGENC_IS_UNICODE(enc)) {
231 static int ctype_titlecase = 0;
232 if (!ctype_titlecase) {
233 static const UChar cname[] = "titlecaseletter";
234 static const UChar *const end = cname + sizeof(cname) - 1;
235 ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
237 if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
239 else {
240 /* fallback to case-folding */
241 OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
242 const OnigUChar *beg = (const OnigUChar *)name;
243 int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
244 &beg, (const OnigUChar *)end,
245 fold, enc);
246 if (r > 0 && (r != len || memcmp(fold, name, r)))
247 return TRUE;
249 return FALSE;
252 #define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
253 #define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
255 struct enc_synmane_type_leading_chars_tag {
256 const enum { invalid, stophere, needmore, } kind;
257 const enum ruby_id_types type;
258 const long nread;
261 #define t struct enc_synmane_type_leading_chars_tag
263 static struct enc_synmane_type_leading_chars_tag
264 enc_synmane_type_leading_chars(const char *name, long len, rb_encoding *enc, int allowed_attrset)
266 const char *m = name;
267 const char *e = m + len;
269 if (! rb_enc_asciicompat(enc)) {
270 return (t) { invalid, 0, 0, };
272 else if (! m) {
273 return (t) { invalid, 0, 0, };
275 else if ( len <= 0 ) {
276 return (t) { invalid, 0, 0, };
278 switch (*m) {
279 case '\0':
280 return (t) { invalid, 0, 0, };
282 case '$':
283 if (is_special_global_name(++m, e, enc)) {
284 return (t) { stophere, ID_GLOBAL, len, };
286 else {
287 return (t) { needmore, ID_GLOBAL, 1, };
290 case '@':
291 switch (*++m) {
292 default: return (t) { needmore, ID_INSTANCE, 1, };
293 case '@': return (t) { needmore, ID_CLASS, 2, };
296 case '<':
297 switch (*++m) {
298 default: return (t) { stophere, ID_JUNK, 1, };
299 case '<': return (t) { stophere, ID_JUNK, 2, };
300 case '=':
301 switch (*++m) {
302 default: return (t) { stophere, ID_JUNK, 2, };
303 case '>': return (t) { stophere, ID_JUNK, 3, };
307 case '>':
308 switch (*++m) {
309 default: return (t) { stophere, ID_JUNK, 1, };
310 case '>': case '=': return (t) { stophere, ID_JUNK, 2, };
313 case '=':
314 switch (*++m) {
315 default: return (t) { invalid, 0, 1, };
316 case '~': return (t) { stophere, ID_JUNK, 2, };
317 case '=':
318 switch (*++m) {
319 default: return (t) { stophere, ID_JUNK, 2, };
320 case '=': return (t) { stophere, ID_JUNK, 3, };
324 case '*':
325 switch (*++m) {
326 default: return (t) { stophere, ID_JUNK, 1, };
327 case '*': return (t) { stophere, ID_JUNK, 2, };
330 case '+': case '-':
331 switch (*++m) {
332 default: return (t) { stophere, ID_JUNK, 1, };
333 case '@': return (t) { stophere, ID_JUNK, 2, };
336 case '|': case '^': case '&': case '/': case '%': case '~': case '`':
337 return (t) { stophere, ID_JUNK, 1, };
339 case '[':
340 switch (*++m) {
341 default: return (t) { needmore, ID_JUNK, 0, };
342 case ']':
343 switch (*++m) {
344 default: return (t) { stophere, ID_JUNK, 2, };
345 case '=': return (t) { stophere, ID_JUNK, 3, };
349 case '!':
350 switch (*++m) {
351 case '=': case '~': return (t) { stophere, ID_JUNK, 2, };
352 default:
353 if (allowed_attrset & (1U << ID_JUNK)) {
354 return (t) { needmore, ID_JUNK, 1, };
356 else {
357 return (t) { stophere, ID_JUNK, 1, };
361 default:
362 if (rb_sym_constant_char_p(name, len, enc)) {
363 return (t) { needmore, ID_CONST, 0, };
365 else {
366 return (t) { needmore, ID_LOCAL, 0, };
370 #undef t
373 rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int allowed_attrset)
375 const struct enc_synmane_type_leading_chars_tag f =
376 enc_synmane_type_leading_chars(name, len, enc, allowed_attrset);
377 const char *m = name + f.nread;
378 const char *e = name + len;
379 int type = (int)f.type;
381 switch (f.kind) {
382 case invalid: return -1;
383 case stophere: break;
384 case needmore:
386 if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
387 if (len > 1 && *(e-1) == '=') {
388 type = rb_enc_symname_type(name, len-1, enc, allowed_attrset);
389 if (allowed_attrset & (1U << type)) return ID_ATTRSET;
391 return -1;
393 while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
394 if (m >= e) break;
395 switch (*m) {
396 case '!': case '?':
397 if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1;
398 type = ID_JUNK;
399 ++m;
400 if (m + 1 < e || *m != '=') break;
401 /* fall through */
402 case '=':
403 if (!(allowed_attrset & (1U << type))) return -1;
404 type = ID_ATTRSET;
405 ++m;
406 break;
410 return m == e ? type : -1;
414 rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
416 return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
419 static int
420 rb_str_symname_type(VALUE name, unsigned int allowed_attrset)
422 const char *ptr = StringValuePtr(name);
423 long len = RSTRING_LEN(name);
424 int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset);
425 RB_GC_GUARD(name);
426 return type;
429 static void
430 set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym)
432 ASSERT_vm_locking();
433 RUBY_ASSERT_BUILTIN_TYPE(str, T_STRING);
434 RUBY_ASSERT_BUILTIN_TYPE(sym, T_SYMBOL);
436 size_t idx = num / ID_ENTRY_UNIT;
438 VALUE ary, ids = symbols->ids;
439 if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
440 ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE);
441 rb_ary_store(ids, (long)idx, ary);
443 idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
444 rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str);
445 rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym);
448 static VALUE
449 get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t)
451 VALUE result = 0;
453 GLOBAL_SYMBOLS_ENTER(symbols);
455 if (num && num <= symbols->last_id) {
456 size_t idx = num / ID_ENTRY_UNIT;
457 VALUE ids = symbols->ids;
458 VALUE ary;
459 if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
460 long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
461 result = rb_ary_entry(ary, pos + t);
463 if (NIL_P(result)) {
464 result = 0;
466 else if (CHECK_ID_SERIAL) {
467 if (id) {
468 VALUE sym = result;
469 if (t != ID_ENTRY_SYM)
470 sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
471 if (STATIC_SYM_P(sym)) {
472 if (STATIC_SYM2ID(sym) != id) result = 0;
474 else {
475 if (RSYMBOL(sym)->id != id) result = 0;
482 GLOBAL_SYMBOLS_LEAVE();
484 if (result) {
485 switch (t) {
486 case ID_ENTRY_STR:
487 RUBY_ASSERT_BUILTIN_TYPE(result, T_STRING);
488 break;
489 case ID_ENTRY_SYM:
490 RUBY_ASSERT_BUILTIN_TYPE(result, T_SYMBOL);
491 break;
492 default:
493 break;
497 return result;
500 static VALUE
501 get_id_entry(ID id, const enum id_entry_type t)
503 return get_id_serial_entry(rb_id_to_serial(id), id, t);
507 rb_static_id_valid_p(ID id)
509 return STATIC_ID2SYM(id) == get_id_entry(id, ID_ENTRY_SYM);
512 static inline ID
513 rb_id_serial_to_id(rb_id_serial_t num)
515 if (is_notop_id((ID)num)) {
516 VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM);
517 if (sym) return SYM2ID(sym);
518 return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM;
520 else {
521 return (ID)num;
525 static int
526 register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
528 if (existing) {
529 rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE,
530 (VALUE)*key, (VALUE)*value);
532 *value = arg;
533 return ST_CONTINUE;
536 static void
537 register_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
539 ASSERT_vm_locking();
541 if (SYMBOL_DEBUG) {
542 st_update(symbols->str_sym, (st_data_t)str,
543 register_sym_update_callback, (st_data_t)sym);
545 else {
546 st_add_direct(symbols->str_sym, (st_data_t)str, (st_data_t)sym);
550 void
551 rb_free_static_symid_str(void)
553 GLOBAL_SYMBOLS_ENTER(symbols)
555 st_free_table(symbols->str_sym);
557 GLOBAL_SYMBOLS_LEAVE();
560 static void
561 unregister_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
563 ASSERT_vm_locking();
565 st_data_t str_data = (st_data_t)str;
566 if (!st_delete(symbols->str_sym, &str_data, NULL)) {
567 rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str));
571 static ID
572 register_static_symid(ID id, const char *name, long len, rb_encoding *enc)
574 VALUE str = rb_enc_str_new(name, len, enc);
575 return register_static_symid_str(id, str);
578 static ID
579 register_static_symid_str(ID id, VALUE str)
581 rb_id_serial_t num = rb_id_to_serial(id);
582 VALUE sym = STATIC_ID2SYM(id);
584 OBJ_FREEZE(str);
585 str = rb_fstring(str);
587 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str));
589 GLOBAL_SYMBOLS_ENTER(symbols)
591 register_sym(symbols, str, sym);
592 set_id_entry(symbols, num, str, sym);
594 GLOBAL_SYMBOLS_LEAVE();
596 return id;
599 static int
600 sym_check_asciionly(VALUE str, bool fake_str)
602 if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
603 switch (rb_enc_str_coderange(str)) {
604 case ENC_CODERANGE_BROKEN:
605 if (fake_str) {
606 str = rb_enc_str_new(RSTRING_PTR(str), RSTRING_LEN(str), rb_enc_get(str));
608 rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
609 rb_enc_name(rb_enc_get(str)), str);
610 case ENC_CODERANGE_7BIT:
611 return TRUE;
613 return FALSE;
616 #if 0
618 * _str_ itself will be registered at the global symbol table. _str_
619 * can be modified before the registration, since the encoding will be
620 * set to ASCII-8BIT if it is a special global name.
623 static inline void
624 must_be_dynamic_symbol(VALUE x)
626 if (UNLIKELY(!DYNAMIC_SYM_P(x))) {
627 if (STATIC_SYM_P(x)) {
628 VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT));
630 if (str) {
631 rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str));
633 else {
634 rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x);
637 else {
638 rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x));
642 #endif
644 static VALUE
645 dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type)
647 ASSERT_vm_locking();
649 NEWOBJ_OF(obj, struct RSymbol, klass, T_SYMBOL | FL_WB_PROTECTED, sizeof(struct RSymbol), 0);
651 long hashval;
653 rb_enc_set_index((VALUE)obj, rb_enc_to_index(enc));
654 OBJ_FREEZE((VALUE)obj);
655 RB_OBJ_WRITE((VALUE)obj, &obj->fstr, str);
656 obj->id = type;
658 /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
659 hashval = (long)rb_str_hash(str);
660 obj->hashval = RSHIFT((long)hashval, 1);
661 register_sym(symbols, str, (VALUE)obj);
662 rb_hash_aset(symbols->dsymbol_fstr_hash, str, Qtrue);
663 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(obj->fstr));
665 return (VALUE)obj;
668 static inline VALUE
669 dsymbol_check(rb_symbols_t *symbols, const VALUE sym)
671 ASSERT_vm_locking();
673 if (UNLIKELY(rb_objspace_garbage_object_p(sym))) {
674 const VALUE fstr = RSYMBOL(sym)->fstr;
675 const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK;
676 RSYMBOL(sym)->fstr = 0;
677 unregister_sym(symbols, fstr, sym);
678 return dsymbol_alloc(symbols, rb_cSymbol, fstr, rb_enc_get(fstr), type);
680 else {
681 return sym;
685 static ID
686 lookup_str_id(VALUE str)
688 st_data_t sym_data;
689 int found;
691 GLOBAL_SYMBOLS_ENTER(symbols);
693 found = st_lookup(symbols->str_sym, (st_data_t)str, &sym_data);
695 GLOBAL_SYMBOLS_LEAVE();
697 if (found) {
698 const VALUE sym = (VALUE)sym_data;
700 if (STATIC_SYM_P(sym)) {
701 return STATIC_SYM2ID(sym);
703 else if (DYNAMIC_SYM_P(sym)) {
704 ID id = RSYMBOL(sym)->id;
705 if (id & ~ID_SCOPE_MASK) return id;
707 else {
708 rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table",
709 rb_builtin_class_name(sym), sym, str);
712 return (ID)0;
715 static VALUE
716 lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str)
718 st_data_t sym_data;
719 if (st_lookup(symbols->str_sym, (st_data_t)str, &sym_data)) {
720 VALUE sym = (VALUE)sym_data;
721 if (DYNAMIC_SYM_P(sym)) {
722 sym = dsymbol_check(symbols, sym);
724 return sym;
726 else {
727 return Qfalse;
731 static VALUE
732 lookup_str_sym(const VALUE str)
734 VALUE sym;
736 GLOBAL_SYMBOLS_ENTER(symbols);
738 sym = lookup_str_sym_with_lock(symbols, str);
740 GLOBAL_SYMBOLS_LEAVE();
742 return sym;
745 static VALUE
746 lookup_id_str(ID id)
748 return get_id_entry(id, ID_ENTRY_STR);
752 rb_intern3(const char *name, long len, rb_encoding *enc)
754 VALUE sym;
755 struct RString fake_str;
756 VALUE str = rb_setup_fake_str(&fake_str, name, len, enc);
757 OBJ_FREEZE(str);
758 sym = lookup_str_sym(str);
759 if (sym) return rb_sym2id(sym);
760 str = rb_enc_str_new(name, len, enc); /* make true string */
761 return intern_str(str, 1);
764 static ID
765 next_id_base_with_lock(rb_symbols_t *symbols)
767 ID id;
768 rb_id_serial_t next_serial = symbols->last_id + 1;
770 if (next_serial == 0) {
771 id = (ID)-1;
773 else {
774 const size_t num = ++symbols->last_id;
775 id = num << ID_SCOPE_SHIFT;
778 return id;
781 static ID
782 next_id_base(void)
784 ID id;
785 GLOBAL_SYMBOLS_ENTER(symbols);
787 id = next_id_base_with_lock(symbols);
789 GLOBAL_SYMBOLS_LEAVE();
790 return id;
793 static ID
794 intern_str(VALUE str, int mutable)
796 ID id;
797 ID nid;
799 id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
800 if (id == (ID)-1) id = ID_JUNK;
801 if (sym_check_asciionly(str, false)) {
802 if (!mutable) str = rb_str_dup(str);
803 rb_enc_associate(str, rb_usascii_encoding());
805 if ((nid = next_id_base()) == (ID)-1) {
806 str = rb_str_ellipsize(str, 20);
807 rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")",
808 str);
810 id |= nid;
811 id |= ID_STATIC_SYM;
812 return register_static_symid_str(id, str);
816 rb_intern2(const char *name, long len)
818 return rb_intern3(name, len, rb_usascii_encoding());
821 #undef rb_intern
823 rb_intern(const char *name)
825 return rb_intern2(name, strlen(name));
829 rb_intern_str(VALUE str)
831 VALUE sym = lookup_str_sym(str);
833 if (sym) {
834 return SYM2ID(sym);
837 return intern_str(str, 0);
840 void
841 rb_gc_free_dsymbol(VALUE sym)
843 VALUE str = RSYMBOL(sym)->fstr;
845 if (str) {
846 RSYMBOL(sym)->fstr = 0;
848 GLOBAL_SYMBOLS_ENTER(symbols);
850 unregister_sym(symbols, str, sym);
851 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, str);
853 GLOBAL_SYMBOLS_LEAVE();
858 * call-seq:
859 * str.intern -> symbol
860 * str.to_sym -> symbol
862 * Returns the +Symbol+ corresponding to <i>str</i>, creating the
863 * symbol if it did not previously exist. See Symbol#id2name.
865 * "Koala".intern #=> :Koala
866 * s = 'cat'.to_sym #=> :cat
867 * s == :cat #=> true
868 * s = '@cat'.to_sym #=> :@cat
869 * s == :@cat #=> true
871 * This can also be used to create symbols that cannot be represented using the
872 * <code>:xxx</code> notation.
874 * 'cat and dog'.to_sym #=> :"cat and dog"
877 VALUE
878 rb_str_intern(VALUE str)
880 VALUE sym;
882 GLOBAL_SYMBOLS_ENTER(symbols);
884 sym = lookup_str_sym_with_lock(symbols, str);
886 if (sym) {
887 // ok
889 else if (USE_SYMBOL_GC) {
890 rb_encoding *enc = rb_enc_get(str);
891 rb_encoding *ascii = rb_usascii_encoding();
892 if (enc != ascii && sym_check_asciionly(str, false)) {
893 str = rb_str_dup(str);
894 rb_enc_associate(str, ascii);
895 OBJ_FREEZE(str);
896 enc = ascii;
898 else {
899 str = rb_str_dup(str);
900 OBJ_FREEZE(str);
902 str = rb_fstring(str);
903 int type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
904 if (type < 0) type = ID_JUNK;
905 sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
907 else {
908 ID id = intern_str(str, 0);
909 sym = ID2SYM(id);
912 GLOBAL_SYMBOLS_LEAVE();
913 return sym;
917 rb_sym2id(VALUE sym)
919 ID id;
920 if (STATIC_SYM_P(sym)) {
921 id = STATIC_SYM2ID(sym);
923 else if (DYNAMIC_SYM_P(sym)) {
924 GLOBAL_SYMBOLS_ENTER(symbols);
926 sym = dsymbol_check(symbols, sym);
927 id = RSYMBOL(sym)->id;
929 if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) {
930 VALUE fstr = RSYMBOL(sym)->fstr;
931 ID num = next_id_base_with_lock(symbols);
933 RSYMBOL(sym)->id = id |= num;
934 /* make it permanent object */
936 set_id_entry(symbols, rb_id_to_serial(num), fstr, sym);
937 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, fstr);
940 GLOBAL_SYMBOLS_LEAVE();
942 else {
943 rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)",
944 rb_builtin_class_name(sym));
946 return id;
949 #undef rb_id2sym
950 VALUE
951 rb_id2sym(ID x)
953 if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
954 return get_id_entry(x, ID_ENTRY_SYM);
958 * call-seq:
959 * name -> string
961 * Returns a frozen string representation of +self+ (not including the leading colon):
963 * :foo.name # => "foo"
964 * :foo.name.frozen? # => true
966 * Related: Symbol#to_s, Symbol#inspect.
969 VALUE
970 rb_sym2str(VALUE sym)
972 VALUE str;
973 if (DYNAMIC_SYM_P(sym)) {
974 str = RSYMBOL(sym)->fstr;
975 RUBY_ASSERT_BUILTIN_TYPE(str, T_STRING);
977 else {
978 str = rb_id2str(STATIC_SYM2ID(sym));
979 if (str) RUBY_ASSERT_BUILTIN_TYPE(str, T_STRING);
982 return str;
985 VALUE
986 rb_id2str(ID id)
988 return lookup_id_str(id);
991 const char *
992 rb_id2name(ID id)
994 VALUE str = rb_id2str(id);
996 if (!str) return 0;
997 return RSTRING_PTR(str);
1001 rb_make_internal_id(void)
1003 return next_id_base() | ID_INTERNAL | ID_STATIC_SYM;
1007 rb_make_temporary_id(size_t n)
1009 const ID max_id = RB_ID_SERIAL_MAX & ~0xffff;
1010 const ID id = max_id - (ID)n;
1011 if (id <= ruby_global_symbols.last_id) {
1012 rb_raise(rb_eRuntimeError, "too big to make temporary ID: %" PRIdSIZE, n);
1014 return (id << ID_SCOPE_SHIFT) | ID_STATIC_SYM | ID_INTERNAL;
1017 static int
1018 symbols_i(st_data_t key, st_data_t value, st_data_t arg)
1020 VALUE ary = (VALUE)arg;
1021 VALUE sym = (VALUE)value;
1023 if (STATIC_SYM_P(sym)) {
1024 rb_ary_push(ary, sym);
1025 return ST_CONTINUE;
1027 else if (!DYNAMIC_SYM_P(sym)) {
1028 rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key));
1030 else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) {
1031 RSYMBOL(sym)->fstr = 0;
1032 return ST_DELETE;
1034 else {
1035 rb_ary_push(ary, sym);
1036 return ST_CONTINUE;
1041 VALUE
1042 rb_sym_all_symbols(void)
1044 VALUE ary;
1046 GLOBAL_SYMBOLS_ENTER(symbols);
1048 ary = rb_ary_new2(symbols->str_sym->num_entries);
1049 st_foreach(symbols->str_sym, symbols_i, ary);
1051 GLOBAL_SYMBOLS_LEAVE();
1053 return ary;
1056 size_t
1057 rb_sym_immortal_count(void)
1059 return (size_t)ruby_global_symbols.last_id;
1063 rb_is_const_id(ID id)
1065 return is_const_id(id);
1069 rb_is_class_id(ID id)
1071 return is_class_id(id);
1075 rb_is_global_id(ID id)
1077 return is_global_id(id);
1081 rb_is_instance_id(ID id)
1083 return is_instance_id(id);
1087 rb_is_attrset_id(ID id)
1089 return is_attrset_id(id);
1093 rb_is_local_id(ID id)
1095 return is_local_id(id);
1099 rb_is_junk_id(ID id)
1101 return is_junk_id(id);
1105 rb_is_const_sym(VALUE sym)
1107 return is_const_sym(sym);
1111 rb_is_attrset_sym(VALUE sym)
1113 return is_attrset_sym(sym);
1117 rb_check_id(volatile VALUE *namep)
1119 VALUE tmp;
1120 VALUE name = *namep;
1122 if (STATIC_SYM_P(name)) {
1123 return STATIC_SYM2ID(name);
1125 else if (DYNAMIC_SYM_P(name)) {
1126 if (SYMBOL_PINNED_P(name)) {
1127 return RSYMBOL(name)->id;
1129 else {
1130 *namep = RSYMBOL(name)->fstr;
1131 return 0;
1134 else if (!RB_TYPE_P(name, T_STRING)) {
1135 tmp = rb_check_string_type(name);
1136 if (NIL_P(tmp)) {
1137 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1138 name);
1140 name = tmp;
1141 *namep = name;
1144 sym_check_asciionly(name, false);
1146 return lookup_str_id(name);
1149 // Used by yjit for handling .send without throwing exceptions
1151 rb_get_symbol_id(VALUE name)
1153 if (STATIC_SYM_P(name)) {
1154 return STATIC_SYM2ID(name);
1156 else if (DYNAMIC_SYM_P(name)) {
1157 if (SYMBOL_PINNED_P(name)) {
1158 return RSYMBOL(name)->id;
1160 else {
1161 return 0;
1164 else if (RB_TYPE_P(name, T_STRING)) {
1165 return lookup_str_id(name);
1167 else {
1168 return 0;
1173 VALUE
1174 rb_check_symbol(volatile VALUE *namep)
1176 VALUE sym;
1177 VALUE tmp;
1178 VALUE name = *namep;
1180 if (STATIC_SYM_P(name)) {
1181 return name;
1183 else if (DYNAMIC_SYM_P(name)) {
1184 if (!SYMBOL_PINNED_P(name)) {
1185 GLOBAL_SYMBOLS_ENTER(symbols);
1187 name = dsymbol_check(symbols, name);
1189 GLOBAL_SYMBOLS_LEAVE();
1191 *namep = name;
1193 return name;
1195 else if (!RB_TYPE_P(name, T_STRING)) {
1196 tmp = rb_check_string_type(name);
1197 if (NIL_P(tmp)) {
1198 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1199 name);
1201 name = tmp;
1202 *namep = name;
1205 sym_check_asciionly(name, false);
1207 if ((sym = lookup_str_sym(name)) != 0) {
1208 return sym;
1211 return Qnil;
1215 rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
1217 struct RString fake_str;
1218 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1220 sym_check_asciionly(name, true);
1222 return lookup_str_id(name);
1225 VALUE
1226 rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
1228 VALUE sym;
1229 struct RString fake_str;
1230 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1232 sym_check_asciionly(name, true);
1234 if ((sym = lookup_str_sym(name)) != 0) {
1235 return sym;
1238 return Qnil;
1241 #undef rb_sym_intern_ascii_cstr
1242 #ifdef __clang__
1243 NOINLINE(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1244 #else
1245 FUNC_MINIMIZED(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1246 FUNC_MINIMIZED(VALUE rb_sym_intern_ascii(const char *ptr, long len));
1247 FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr));
1248 #endif
1250 VALUE
1251 rb_sym_intern(const char *ptr, long len, rb_encoding *enc)
1253 struct RString fake_str;
1254 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1255 return rb_str_intern(name);
1258 VALUE
1259 rb_sym_intern_ascii(const char *ptr, long len)
1261 return rb_sym_intern(ptr, len, rb_usascii_encoding());
1264 VALUE
1265 rb_sym_intern_ascii_cstr(const char *ptr)
1267 return rb_sym_intern_ascii(ptr, strlen(ptr));
1270 VALUE
1271 rb_to_symbol_type(VALUE obj)
1273 return rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym);
1277 rb_is_const_name(VALUE name)
1279 return rb_str_symname_type(name, 0) == ID_CONST;
1283 rb_is_class_name(VALUE name)
1285 return rb_str_symname_type(name, 0) == ID_CLASS;
1289 rb_is_instance_name(VALUE name)
1291 return rb_str_symname_type(name, 0) == ID_INSTANCE;
1295 rb_is_local_name(VALUE name)
1297 return rb_str_symname_type(name, 0) == ID_LOCAL;
1300 #include "id_table.c"
1301 #include "symbol.rbinc"