1 /**********************************************************************
6 created at: Tue Jul 8 15:49:54 JST 2014
8 Copyright (C) 2014 Yukihiro Matsumoto
10 **********************************************************************/
13 #include "internal/error.h"
14 #include "internal/gc.h"
15 #include "internal/hash.h"
16 #include "internal/object.h"
17 #include "internal/symbol.h"
18 #include "internal/vm.h"
20 #include "ruby/encoding.h"
26 #if defined(USE_SYMBOL_GC) && !(USE_SYMBOL_GC+0)
28 # define USE_SYMBOL_GC 0
31 # define USE_SYMBOL_GC 1
33 #if defined(SYMBOL_DEBUG) && (SYMBOL_DEBUG+0)
35 # define SYMBOL_DEBUG 1
38 # define SYMBOL_DEBUG 0
40 #ifndef CHECK_ID_SERIAL
41 # define CHECK_ID_SERIAL SYMBOL_DEBUG
44 #define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
46 #define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT)
48 static ID
register_static_symid(ID
, const char *, long, rb_encoding
*);
49 static ID
register_static_symid_str(ID
, VALUE
);
50 #define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
53 #define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
55 #define op_tbl_count numberof(op_tbl)
56 STATIC_ASSERT(op_tbl_name_size
, sizeof(op_tbl
[0].name
) == 3);
57 #define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
63 rb_encoding
*const enc
= rb_usascii_encoding();
65 for (i
= '!'; i
<= '~'; ++i
) {
66 if (!ISALNUM(i
) && i
!= '_') {
68 register_static_symid(i
, &c
, 1, enc
);
71 for (i
= 0; i
< op_tbl_count
; ++i
) {
72 register_static_symid(op_tbl
[i
].token
, op_tbl
[i
].name
, op_tbl_len(i
), enc
);
76 static const int ID_ENTRY_UNIT
= 512;
84 rb_symbols_t ruby_global_symbols
= {tNEXT_ID
-1};
86 static const struct st_hash_type symhash
= {
94 rb_symbols_t
*symbols
= &ruby_global_symbols
;
96 VALUE dsym_fstrs
= rb_ident_hash_new();
97 symbols
->dsymbol_fstr_hash
= dsym_fstrs
;
98 rb_vm_register_global_object(dsym_fstrs
);
99 rb_obj_hide(dsym_fstrs
);
101 symbols
->str_sym
= st_init_table_with_size(&symhash
, 1000);
102 symbols
->ids
= rb_ary_hidden_new(0);
103 rb_vm_register_global_object(symbols
->ids
);
109 WARN_UNUSED_RESULT(static VALUE
dsymbol_alloc(rb_symbols_t
*symbols
, const VALUE klass
, const VALUE str
, rb_encoding
*const enc
, const ID type
));
110 WARN_UNUSED_RESULT(static VALUE
dsymbol_check(rb_symbols_t
*symbols
, const VALUE sym
));
111 WARN_UNUSED_RESULT(static ID
lookup_str_id(VALUE str
));
112 WARN_UNUSED_RESULT(static VALUE
lookup_str_sym_with_lock(rb_symbols_t
*symbols
, const VALUE str
));
113 WARN_UNUSED_RESULT(static VALUE
lookup_str_sym(const VALUE str
));
114 WARN_UNUSED_RESULT(static VALUE
lookup_id_str(ID id
));
115 WARN_UNUSED_RESULT(static ID
intern_str(VALUE str
, int mutable));
117 #define GLOBAL_SYMBOLS_ENTER(symbols) rb_symbols_t *symbols = &ruby_global_symbols; RB_VM_LOCK_ENTER()
118 #define GLOBAL_SYMBOLS_LEAVE() RB_VM_LOCK_LEAVE()
126 if (!is_notop_id(id
)) {
128 case tAREF
: case tASET
:
129 return tASET
; /* only exception */
131 rb_name_error(id
, "cannot make operator ID :%"PRIsVALUE
" attrset",
137 case ID_LOCAL
: case ID_INSTANCE
: case ID_GLOBAL
:
138 case ID_CONST
: case ID_CLASS
: case ID_JUNK
:
144 if ((str
= lookup_id_str(id
)) != 0) {
145 rb_name_error(id
, "cannot make unknown type ID %d:%"PRIsVALUE
" attrset",
149 rb_name_error_str(Qnil
, "cannot make unknown type anonymous ID %d:%"PRIxVALUE
" attrset",
156 /* make new symbol and ID */
157 if (!(str
= lookup_id_str(id
))) {
158 static const char id_types
[][8] = {
168 rb_name_error(id
, "cannot make anonymous %.*s ID %"PRIxVALUE
" attrset",
169 (int)sizeof(id_types
[0]), id_types
[scope
], (VALUE
)id
);
171 str
= rb_str_dup(str
);
172 rb_str_cat(str
, "=", 1);
173 sym
= lookup_str_sym(str
);
174 id
= sym
? rb_sym2id(sym
) : intern_str(str
, 1);
179 is_special_global_name(const char *m
, const char *e
, rb_encoding
*enc
)
183 if (m
>= e
) return 0;
184 if (is_global_name_punct(*m
)) {
187 else if (*m
== '-') {
188 if (++m
>= e
) return 0;
189 if (is_identchar(m
, e
, enc
)) {
190 if (!ISASCII(*m
)) mb
= 1;
191 m
+= rb_enc_mbclen(m
, e
, enc
);
195 if (!ISDIGIT(*m
)) return 0;
197 if (!ISASCII(*m
)) mb
= 1;
199 } while (m
< e
&& ISDIGIT(*m
));
201 return m
== e
? mb
+ 1 : 0;
205 rb_symname_p(const char *name
)
207 return rb_enc_symname_p(name
, rb_ascii8bit_encoding());
211 rb_enc_symname_p(const char *name
, rb_encoding
*enc
)
213 return rb_enc_symname2_p(name
, strlen(name
), enc
);
217 rb_sym_constant_char_p(const char *name
, long nlen
, rb_encoding
*enc
)
220 const char *end
= name
+ nlen
;
222 if (nlen
< 1) return FALSE
;
223 if (ISASCII(*name
)) return ISUPPER(*name
);
224 c
= rb_enc_precise_mbclen(name
, end
, enc
);
225 if (!MBCLEN_CHARFOUND_P(c
)) return FALSE
;
226 len
= MBCLEN_CHARFOUND_LEN(c
);
227 c
= rb_enc_mbc_to_codepoint(name
, end
, enc
);
228 if (rb_enc_isupper(c
, enc
)) return TRUE
;
229 if (rb_enc_islower(c
, enc
)) return FALSE
;
230 if (ONIGENC_IS_UNICODE(enc
)) {
231 static int ctype_titlecase
= 0;
232 if (!ctype_titlecase
) {
233 static const UChar cname
[] = "titlecaseletter";
234 static const UChar
*const end
= cname
+ sizeof(cname
) - 1;
235 ctype_titlecase
= ONIGENC_PROPERTY_NAME_TO_CTYPE(enc
, cname
, end
);
237 if (rb_enc_isctype(c
, ctype_titlecase
, enc
)) return TRUE
;
240 /* fallback to case-folding */
241 OnigUChar fold
[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM
];
242 const OnigUChar
*beg
= (const OnigUChar
*)name
;
243 int r
= enc
->mbc_case_fold(ONIGENC_CASE_FOLD
,
244 &beg
, (const OnigUChar
*)end
,
246 if (r
> 0 && (r
!= len
|| memcmp(fold
, name
, r
)))
252 #define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
253 #define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
255 struct enc_synmane_type_leading_chars_tag
{
256 const enum { invalid
, stophere
, needmore
, } kind
;
257 const enum ruby_id_types type
;
261 #define t struct enc_synmane_type_leading_chars_tag
263 static struct enc_synmane_type_leading_chars_tag
264 enc_synmane_type_leading_chars(const char *name
, long len
, rb_encoding
*enc
, int allowed_attrset
)
266 const char *m
= name
;
267 const char *e
= m
+ len
;
269 if (! rb_enc_asciicompat(enc
)) {
270 return (t
) { invalid
, 0, 0, };
273 return (t
) { invalid
, 0, 0, };
275 else if ( len
<= 0 ) {
276 return (t
) { invalid
, 0, 0, };
280 return (t
) { invalid
, 0, 0, };
283 if (is_special_global_name(++m
, e
, enc
)) {
284 return (t
) { stophere
, ID_GLOBAL
, len
, };
287 return (t
) { needmore
, ID_GLOBAL
, 1, };
292 default: return (t
) { needmore
, ID_INSTANCE
, 1, };
293 case '@': return (t
) { needmore
, ID_CLASS
, 2, };
298 default: return (t
) { stophere
, ID_JUNK
, 1, };
299 case '<': return (t
) { stophere
, ID_JUNK
, 2, };
302 default: return (t
) { stophere
, ID_JUNK
, 2, };
303 case '>': return (t
) { stophere
, ID_JUNK
, 3, };
309 default: return (t
) { stophere
, ID_JUNK
, 1, };
310 case '>': case '=': return (t
) { stophere
, ID_JUNK
, 2, };
315 default: return (t
) { invalid
, 0, 1, };
316 case '~': return (t
) { stophere
, ID_JUNK
, 2, };
319 default: return (t
) { stophere
, ID_JUNK
, 2, };
320 case '=': return (t
) { stophere
, ID_JUNK
, 3, };
326 default: return (t
) { stophere
, ID_JUNK
, 1, };
327 case '*': return (t
) { stophere
, ID_JUNK
, 2, };
332 default: return (t
) { stophere
, ID_JUNK
, 1, };
333 case '@': return (t
) { stophere
, ID_JUNK
, 2, };
336 case '|': case '^': case '&': case '/': case '%': case '~': case '`':
337 return (t
) { stophere
, ID_JUNK
, 1, };
341 default: return (t
) { needmore
, ID_JUNK
, 0, };
344 default: return (t
) { stophere
, ID_JUNK
, 2, };
345 case '=': return (t
) { stophere
, ID_JUNK
, 3, };
351 case '=': case '~': return (t
) { stophere
, ID_JUNK
, 2, };
353 if (allowed_attrset
& (1U << ID_JUNK
)) {
354 return (t
) { needmore
, ID_JUNK
, 1, };
357 return (t
) { stophere
, ID_JUNK
, 1, };
362 if (rb_sym_constant_char_p(name
, len
, enc
)) {
363 return (t
) { needmore
, ID_CONST
, 0, };
366 return (t
) { needmore
, ID_LOCAL
, 0, };
373 rb_enc_symname_type(const char *name
, long len
, rb_encoding
*enc
, unsigned int allowed_attrset
)
375 const struct enc_synmane_type_leading_chars_tag f
=
376 enc_synmane_type_leading_chars(name
, len
, enc
, allowed_attrset
);
377 const char *m
= name
+ f
.nread
;
378 const char *e
= name
+ len
;
379 int type
= (int)f
.type
;
382 case invalid
: return -1;
383 case stophere
: break;
386 if (m
>= e
|| (*m
!= '_' && !ISALPHA(*m
) && ISASCII(*m
))) {
387 if (len
> 1 && *(e
-1) == '=') {
388 type
= rb_enc_symname_type(name
, len
-1, enc
, allowed_attrset
);
389 if (allowed_attrset
& (1U << type
)) return ID_ATTRSET
;
393 while (m
< e
&& is_identchar(m
, e
, enc
)) m
+= rb_enc_mbclen(m
, e
, enc
);
397 if (type
== ID_GLOBAL
|| type
== ID_CLASS
|| type
== ID_INSTANCE
) return -1;
400 if (m
+ 1 < e
|| *m
!= '=') break;
403 if (!(allowed_attrset
& (1U << type
))) return -1;
410 return m
== e
? type
: -1;
414 rb_enc_symname2_p(const char *name
, long len
, rb_encoding
*enc
)
416 return rb_enc_symname_type(name
, len
, enc
, IDSET_ATTRSET_FOR_SYNTAX
) != -1;
420 rb_str_symname_type(VALUE name
, unsigned int allowed_attrset
)
422 const char *ptr
= StringValuePtr(name
);
423 long len
= RSTRING_LEN(name
);
424 int type
= rb_enc_symname_type(ptr
, len
, rb_enc_get(name
), allowed_attrset
);
430 set_id_entry(rb_symbols_t
*symbols
, rb_id_serial_t num
, VALUE str
, VALUE sym
)
433 RUBY_ASSERT_BUILTIN_TYPE(str
, T_STRING
);
434 RUBY_ASSERT_BUILTIN_TYPE(sym
, T_SYMBOL
);
436 size_t idx
= num
/ ID_ENTRY_UNIT
;
438 VALUE ary
, ids
= symbols
->ids
;
439 if (idx
>= (size_t)RARRAY_LEN(ids
) || NIL_P(ary
= rb_ary_entry(ids
, (long)idx
))) {
440 ary
= rb_ary_hidden_new(ID_ENTRY_UNIT
* ID_ENTRY_SIZE
);
441 rb_ary_store(ids
, (long)idx
, ary
);
443 idx
= (num
% ID_ENTRY_UNIT
) * ID_ENTRY_SIZE
;
444 rb_ary_store(ary
, (long)idx
+ ID_ENTRY_STR
, str
);
445 rb_ary_store(ary
, (long)idx
+ ID_ENTRY_SYM
, sym
);
449 get_id_serial_entry(rb_id_serial_t num
, ID id
, const enum id_entry_type t
)
453 GLOBAL_SYMBOLS_ENTER(symbols
);
455 if (num
&& num
<= symbols
->last_id
) {
456 size_t idx
= num
/ ID_ENTRY_UNIT
;
457 VALUE ids
= symbols
->ids
;
459 if (idx
< (size_t)RARRAY_LEN(ids
) && !NIL_P(ary
= rb_ary_entry(ids
, (long)idx
))) {
460 long pos
= (long)(num
% ID_ENTRY_UNIT
) * ID_ENTRY_SIZE
;
461 result
= rb_ary_entry(ary
, pos
+ t
);
466 else if (CHECK_ID_SERIAL
) {
469 if (t
!= ID_ENTRY_SYM
)
470 sym
= rb_ary_entry(ary
, pos
+ ID_ENTRY_SYM
);
471 if (STATIC_SYM_P(sym
)) {
472 if (STATIC_SYM2ID(sym
) != id
) result
= 0;
475 if (RSYMBOL(sym
)->id
!= id
) result
= 0;
482 GLOBAL_SYMBOLS_LEAVE();
487 RUBY_ASSERT_BUILTIN_TYPE(result
, T_STRING
);
490 RUBY_ASSERT_BUILTIN_TYPE(result
, T_SYMBOL
);
501 get_id_entry(ID id
, const enum id_entry_type t
)
503 return get_id_serial_entry(rb_id_to_serial(id
), id
, t
);
507 rb_static_id_valid_p(ID id
)
509 return STATIC_ID2SYM(id
) == get_id_entry(id
, ID_ENTRY_SYM
);
513 rb_id_serial_to_id(rb_id_serial_t num
)
515 if (is_notop_id((ID
)num
)) {
516 VALUE sym
= get_id_serial_entry(num
, 0, ID_ENTRY_SYM
);
517 if (sym
) return SYM2ID(sym
);
518 return ((ID
)num
<< ID_SCOPE_SHIFT
) | ID_INTERNAL
| ID_STATIC_SYM
;
526 register_sym_update_callback(st_data_t
*key
, st_data_t
*value
, st_data_t arg
, int existing
)
529 rb_fatal("symbol :% "PRIsVALUE
" is already registered with %"PRIxVALUE
,
530 (VALUE
)*key
, (VALUE
)*value
);
537 register_sym(rb_symbols_t
*symbols
, VALUE str
, VALUE sym
)
542 st_update(symbols
->str_sym
, (st_data_t
)str
,
543 register_sym_update_callback
, (st_data_t
)sym
);
546 st_add_direct(symbols
->str_sym
, (st_data_t
)str
, (st_data_t
)sym
);
551 rb_free_static_symid_str(void)
553 GLOBAL_SYMBOLS_ENTER(symbols
)
555 st_free_table(symbols
->str_sym
);
557 GLOBAL_SYMBOLS_LEAVE();
561 unregister_sym(rb_symbols_t
*symbols
, VALUE str
, VALUE sym
)
565 st_data_t str_data
= (st_data_t
)str
;
566 if (!st_delete(symbols
->str_sym
, &str_data
, NULL
)) {
567 rb_bug("%p can't remove str from str_id (%s)", (void *)sym
, RSTRING_PTR(str
));
572 register_static_symid(ID id
, const char *name
, long len
, rb_encoding
*enc
)
574 VALUE str
= rb_enc_str_new(name
, len
, enc
);
575 return register_static_symid_str(id
, str
);
579 register_static_symid_str(ID id
, VALUE str
)
581 rb_id_serial_t num
= rb_id_to_serial(id
);
582 VALUE sym
= STATIC_ID2SYM(id
);
585 str
= rb_fstring(str
);
587 RUBY_DTRACE_CREATE_HOOK(SYMBOL
, RSTRING_PTR(str
));
589 GLOBAL_SYMBOLS_ENTER(symbols
)
591 register_sym(symbols
, str
, sym
);
592 set_id_entry(symbols
, num
, str
, sym
);
594 GLOBAL_SYMBOLS_LEAVE();
600 sym_check_asciionly(VALUE str
, bool fake_str
)
602 if (!rb_enc_asciicompat(rb_enc_get(str
))) return FALSE
;
603 switch (rb_enc_str_coderange(str
)) {
604 case ENC_CODERANGE_BROKEN
:
606 str
= rb_enc_str_new(RSTRING_PTR(str
), RSTRING_LEN(str
), rb_enc_get(str
));
608 rb_raise(rb_eEncodingError
, "invalid symbol in encoding %s :%+"PRIsVALUE
,
609 rb_enc_name(rb_enc_get(str
)), str
);
610 case ENC_CODERANGE_7BIT
:
618 * _str_ itself will be registered at the global symbol table. _str_
619 * can be modified before the registration, since the encoding will be
620 * set to ASCII-8BIT if it is a special global name.
624 must_be_dynamic_symbol(VALUE x
)
626 if (UNLIKELY(!DYNAMIC_SYM_P(x
))) {
627 if (STATIC_SYM_P(x
)) {
628 VALUE str
= lookup_id_str(RSHIFT((unsigned long)(x
),RUBY_SPECIAL_SHIFT
));
631 rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str
));
634 rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x
);
638 rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x
));
645 dsymbol_alloc(rb_symbols_t
*symbols
, const VALUE klass
, const VALUE str
, rb_encoding
* const enc
, const ID type
)
649 NEWOBJ_OF(obj
, struct RSymbol
, klass
, T_SYMBOL
| FL_WB_PROTECTED
, sizeof(struct RSymbol
), 0);
653 rb_enc_set_index((VALUE
)obj
, rb_enc_to_index(enc
));
654 OBJ_FREEZE((VALUE
)obj
);
655 RB_OBJ_WRITE((VALUE
)obj
, &obj
->fstr
, str
);
658 /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
659 hashval
= (long)rb_str_hash(str
);
660 obj
->hashval
= RSHIFT((long)hashval
, 1);
661 register_sym(symbols
, str
, (VALUE
)obj
);
662 rb_hash_aset(symbols
->dsymbol_fstr_hash
, str
, Qtrue
);
663 RUBY_DTRACE_CREATE_HOOK(SYMBOL
, RSTRING_PTR(obj
->fstr
));
669 dsymbol_check(rb_symbols_t
*symbols
, const VALUE sym
)
673 if (UNLIKELY(rb_objspace_garbage_object_p(sym
))) {
674 const VALUE fstr
= RSYMBOL(sym
)->fstr
;
675 const ID type
= RSYMBOL(sym
)->id
& ID_SCOPE_MASK
;
676 RSYMBOL(sym
)->fstr
= 0;
677 unregister_sym(symbols
, fstr
, sym
);
678 return dsymbol_alloc(symbols
, rb_cSymbol
, fstr
, rb_enc_get(fstr
), type
);
686 lookup_str_id(VALUE str
)
691 GLOBAL_SYMBOLS_ENTER(symbols
);
693 found
= st_lookup(symbols
->str_sym
, (st_data_t
)str
, &sym_data
);
695 GLOBAL_SYMBOLS_LEAVE();
698 const VALUE sym
= (VALUE
)sym_data
;
700 if (STATIC_SYM_P(sym
)) {
701 return STATIC_SYM2ID(sym
);
703 else if (DYNAMIC_SYM_P(sym
)) {
704 ID id
= RSYMBOL(sym
)->id
;
705 if (id
& ~ID_SCOPE_MASK
) return id
;
708 rb_bug("non-symbol object %s:%"PRIxVALUE
" for %"PRIsVALUE
" in symbol table",
709 rb_builtin_class_name(sym
), sym
, str
);
716 lookup_str_sym_with_lock(rb_symbols_t
*symbols
, const VALUE str
)
719 if (st_lookup(symbols
->str_sym
, (st_data_t
)str
, &sym_data
)) {
720 VALUE sym
= (VALUE
)sym_data
;
721 if (DYNAMIC_SYM_P(sym
)) {
722 sym
= dsymbol_check(symbols
, sym
);
732 lookup_str_sym(const VALUE str
)
736 GLOBAL_SYMBOLS_ENTER(symbols
);
738 sym
= lookup_str_sym_with_lock(symbols
, str
);
740 GLOBAL_SYMBOLS_LEAVE();
748 return get_id_entry(id
, ID_ENTRY_STR
);
752 rb_intern3(const char *name
, long len
, rb_encoding
*enc
)
755 struct RString fake_str
;
756 VALUE str
= rb_setup_fake_str(&fake_str
, name
, len
, enc
);
758 sym
= lookup_str_sym(str
);
759 if (sym
) return rb_sym2id(sym
);
760 str
= rb_enc_str_new(name
, len
, enc
); /* make true string */
761 return intern_str(str
, 1);
765 next_id_base_with_lock(rb_symbols_t
*symbols
)
768 rb_id_serial_t next_serial
= symbols
->last_id
+ 1;
770 if (next_serial
== 0) {
774 const size_t num
= ++symbols
->last_id
;
775 id
= num
<< ID_SCOPE_SHIFT
;
785 GLOBAL_SYMBOLS_ENTER(symbols
);
787 id
= next_id_base_with_lock(symbols
);
789 GLOBAL_SYMBOLS_LEAVE();
794 intern_str(VALUE str
, int mutable)
799 id
= rb_str_symname_type(str
, IDSET_ATTRSET_FOR_INTERN
);
800 if (id
== (ID
)-1) id
= ID_JUNK
;
801 if (sym_check_asciionly(str
, false)) {
802 if (!mutable) str
= rb_str_dup(str
);
803 rb_enc_associate(str
, rb_usascii_encoding());
805 if ((nid
= next_id_base()) == (ID
)-1) {
806 str
= rb_str_ellipsize(str
, 20);
807 rb_raise(rb_eRuntimeError
, "symbol table overflow (symbol %"PRIsVALUE
")",
812 return register_static_symid_str(id
, str
);
816 rb_intern2(const char *name
, long len
)
818 return rb_intern3(name
, len
, rb_usascii_encoding());
823 rb_intern(const char *name
)
825 return rb_intern2(name
, strlen(name
));
829 rb_intern_str(VALUE str
)
831 VALUE sym
= lookup_str_sym(str
);
837 return intern_str(str
, 0);
841 rb_gc_free_dsymbol(VALUE sym
)
843 VALUE str
= RSYMBOL(sym
)->fstr
;
846 RSYMBOL(sym
)->fstr
= 0;
848 GLOBAL_SYMBOLS_ENTER(symbols
);
850 unregister_sym(symbols
, str
, sym
);
851 rb_hash_delete_entry(symbols
->dsymbol_fstr_hash
, str
);
853 GLOBAL_SYMBOLS_LEAVE();
859 * str.intern -> symbol
860 * str.to_sym -> symbol
862 * Returns the +Symbol+ corresponding to <i>str</i>, creating the
863 * symbol if it did not previously exist. See Symbol#id2name.
865 * "Koala".intern #=> :Koala
866 * s = 'cat'.to_sym #=> :cat
868 * s = '@cat'.to_sym #=> :@cat
869 * s == :@cat #=> true
871 * This can also be used to create symbols that cannot be represented using the
872 * <code>:xxx</code> notation.
874 * 'cat and dog'.to_sym #=> :"cat and dog"
878 rb_str_intern(VALUE str
)
882 GLOBAL_SYMBOLS_ENTER(symbols
);
884 sym
= lookup_str_sym_with_lock(symbols
, str
);
889 else if (USE_SYMBOL_GC
) {
890 rb_encoding
*enc
= rb_enc_get(str
);
891 rb_encoding
*ascii
= rb_usascii_encoding();
892 if (enc
!= ascii
&& sym_check_asciionly(str
, false)) {
893 str
= rb_str_dup(str
);
894 rb_enc_associate(str
, ascii
);
899 str
= rb_str_dup(str
);
902 str
= rb_fstring(str
);
903 int type
= rb_str_symname_type(str
, IDSET_ATTRSET_FOR_INTERN
);
904 if (type
< 0) type
= ID_JUNK
;
905 sym
= dsymbol_alloc(symbols
, rb_cSymbol
, str
, enc
, type
);
908 ID id
= intern_str(str
, 0);
912 GLOBAL_SYMBOLS_LEAVE();
920 if (STATIC_SYM_P(sym
)) {
921 id
= STATIC_SYM2ID(sym
);
923 else if (DYNAMIC_SYM_P(sym
)) {
924 GLOBAL_SYMBOLS_ENTER(symbols
);
926 sym
= dsymbol_check(symbols
, sym
);
927 id
= RSYMBOL(sym
)->id
;
929 if (UNLIKELY(!(id
& ~ID_SCOPE_MASK
))) {
930 VALUE fstr
= RSYMBOL(sym
)->fstr
;
931 ID num
= next_id_base_with_lock(symbols
);
933 RSYMBOL(sym
)->id
= id
|= num
;
934 /* make it permanent object */
936 set_id_entry(symbols
, rb_id_to_serial(num
), fstr
, sym
);
937 rb_hash_delete_entry(symbols
->dsymbol_fstr_hash
, fstr
);
940 GLOBAL_SYMBOLS_LEAVE();
943 rb_raise(rb_eTypeError
, "wrong argument type %s (expected Symbol)",
944 rb_builtin_class_name(sym
));
953 if (!DYNAMIC_ID_P(x
)) return STATIC_ID2SYM(x
);
954 return get_id_entry(x
, ID_ENTRY_SYM
);
961 * Returns a frozen string representation of +self+ (not including the leading colon):
963 * :foo.name # => "foo"
964 * :foo.name.frozen? # => true
966 * Related: Symbol#to_s, Symbol#inspect.
970 rb_sym2str(VALUE sym
)
973 if (DYNAMIC_SYM_P(sym
)) {
974 str
= RSYMBOL(sym
)->fstr
;
975 RUBY_ASSERT_BUILTIN_TYPE(str
, T_STRING
);
978 str
= rb_id2str(STATIC_SYM2ID(sym
));
979 if (str
) RUBY_ASSERT_BUILTIN_TYPE(str
, T_STRING
);
988 return lookup_id_str(id
);
994 VALUE str
= rb_id2str(id
);
997 return RSTRING_PTR(str
);
1001 rb_make_internal_id(void)
1003 return next_id_base() | ID_INTERNAL
| ID_STATIC_SYM
;
1007 rb_make_temporary_id(size_t n
)
1009 const ID max_id
= RB_ID_SERIAL_MAX
& ~0xffff;
1010 const ID id
= max_id
- (ID
)n
;
1011 if (id
<= ruby_global_symbols
.last_id
) {
1012 rb_raise(rb_eRuntimeError
, "too big to make temporary ID: %" PRIdSIZE
, n
);
1014 return (id
<< ID_SCOPE_SHIFT
) | ID_STATIC_SYM
| ID_INTERNAL
;
1018 symbols_i(st_data_t key
, st_data_t value
, st_data_t arg
)
1020 VALUE ary
= (VALUE
)arg
;
1021 VALUE sym
= (VALUE
)value
;
1023 if (STATIC_SYM_P(sym
)) {
1024 rb_ary_push(ary
, sym
);
1027 else if (!DYNAMIC_SYM_P(sym
)) {
1028 rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE
)key
));
1030 else if (!SYMBOL_PINNED_P(sym
) && rb_objspace_garbage_object_p(sym
)) {
1031 RSYMBOL(sym
)->fstr
= 0;
1035 rb_ary_push(ary
, sym
);
1042 rb_sym_all_symbols(void)
1046 GLOBAL_SYMBOLS_ENTER(symbols
);
1048 ary
= rb_ary_new2(symbols
->str_sym
->num_entries
);
1049 st_foreach(symbols
->str_sym
, symbols_i
, ary
);
1051 GLOBAL_SYMBOLS_LEAVE();
1057 rb_sym_immortal_count(void)
1059 return (size_t)ruby_global_symbols
.last_id
;
1063 rb_is_const_id(ID id
)
1065 return is_const_id(id
);
1069 rb_is_class_id(ID id
)
1071 return is_class_id(id
);
1075 rb_is_global_id(ID id
)
1077 return is_global_id(id
);
1081 rb_is_instance_id(ID id
)
1083 return is_instance_id(id
);
1087 rb_is_attrset_id(ID id
)
1089 return is_attrset_id(id
);
1093 rb_is_local_id(ID id
)
1095 return is_local_id(id
);
1099 rb_is_junk_id(ID id
)
1101 return is_junk_id(id
);
1105 rb_is_const_sym(VALUE sym
)
1107 return is_const_sym(sym
);
1111 rb_is_attrset_sym(VALUE sym
)
1113 return is_attrset_sym(sym
);
1117 rb_check_id(volatile VALUE
*namep
)
1120 VALUE name
= *namep
;
1122 if (STATIC_SYM_P(name
)) {
1123 return STATIC_SYM2ID(name
);
1125 else if (DYNAMIC_SYM_P(name
)) {
1126 if (SYMBOL_PINNED_P(name
)) {
1127 return RSYMBOL(name
)->id
;
1130 *namep
= RSYMBOL(name
)->fstr
;
1134 else if (!RB_TYPE_P(name
, T_STRING
)) {
1135 tmp
= rb_check_string_type(name
);
1137 rb_raise(rb_eTypeError
, "%+"PRIsVALUE
" is not a symbol nor a string",
1144 sym_check_asciionly(name
, false);
1146 return lookup_str_id(name
);
1149 // Used by yjit for handling .send without throwing exceptions
1151 rb_get_symbol_id(VALUE name
)
1153 if (STATIC_SYM_P(name
)) {
1154 return STATIC_SYM2ID(name
);
1156 else if (DYNAMIC_SYM_P(name
)) {
1157 if (SYMBOL_PINNED_P(name
)) {
1158 return RSYMBOL(name
)->id
;
1164 else if (RB_TYPE_P(name
, T_STRING
)) {
1165 return lookup_str_id(name
);
1174 rb_check_symbol(volatile VALUE
*namep
)
1178 VALUE name
= *namep
;
1180 if (STATIC_SYM_P(name
)) {
1183 else if (DYNAMIC_SYM_P(name
)) {
1184 if (!SYMBOL_PINNED_P(name
)) {
1185 GLOBAL_SYMBOLS_ENTER(symbols
);
1187 name
= dsymbol_check(symbols
, name
);
1189 GLOBAL_SYMBOLS_LEAVE();
1195 else if (!RB_TYPE_P(name
, T_STRING
)) {
1196 tmp
= rb_check_string_type(name
);
1198 rb_raise(rb_eTypeError
, "%+"PRIsVALUE
" is not a symbol nor a string",
1205 sym_check_asciionly(name
, false);
1207 if ((sym
= lookup_str_sym(name
)) != 0) {
1215 rb_check_id_cstr(const char *ptr
, long len
, rb_encoding
*enc
)
1217 struct RString fake_str
;
1218 const VALUE name
= rb_setup_fake_str(&fake_str
, ptr
, len
, enc
);
1220 sym_check_asciionly(name
, true);
1222 return lookup_str_id(name
);
1226 rb_check_symbol_cstr(const char *ptr
, long len
, rb_encoding
*enc
)
1229 struct RString fake_str
;
1230 const VALUE name
= rb_setup_fake_str(&fake_str
, ptr
, len
, enc
);
1232 sym_check_asciionly(name
, true);
1234 if ((sym
= lookup_str_sym(name
)) != 0) {
1241 #undef rb_sym_intern_ascii_cstr
1243 NOINLINE(VALUE
rb_sym_intern(const char *ptr
, long len
, rb_encoding
*enc
));
1245 FUNC_MINIMIZED(VALUE
rb_sym_intern(const char *ptr
, long len
, rb_encoding
*enc
));
1246 FUNC_MINIMIZED(VALUE
rb_sym_intern_ascii(const char *ptr
, long len
));
1247 FUNC_MINIMIZED(VALUE
rb_sym_intern_ascii_cstr(const char *ptr
));
1251 rb_sym_intern(const char *ptr
, long len
, rb_encoding
*enc
)
1253 struct RString fake_str
;
1254 const VALUE name
= rb_setup_fake_str(&fake_str
, ptr
, len
, enc
);
1255 return rb_str_intern(name
);
1259 rb_sym_intern_ascii(const char *ptr
, long len
)
1261 return rb_sym_intern(ptr
, len
, rb_usascii_encoding());
1265 rb_sym_intern_ascii_cstr(const char *ptr
)
1267 return rb_sym_intern_ascii(ptr
, strlen(ptr
));
1271 rb_to_symbol_type(VALUE obj
)
1273 return rb_convert_type_with_id(obj
, T_SYMBOL
, "Symbol", idTo_sym
);
1277 rb_is_const_name(VALUE name
)
1279 return rb_str_symname_type(name
, 0) == ID_CONST
;
1283 rb_is_class_name(VALUE name
)
1285 return rb_str_symname_type(name
, 0) == ID_CLASS
;
1289 rb_is_instance_name(VALUE name
)
1291 return rb_str_symname_type(name
, 0) == ID_INSTANCE
;
1295 rb_is_local_name(VALUE name
)
1297 return rb_str_symname_type(name
, 0) == ID_LOCAL
;
1300 #include "id_table.c"
1301 #include "symbol.rbinc"