Ruby 3.2.2p53 (2023-03-30 revision e51014f9c05aa65cbf203442d37fef7c12390015)
symbol.c
1/**********************************************************************
2
3 symbol.h -
4
5 $Author$
6 created at: Tue Jul 8 15:49:54 JST 2014
7
8 Copyright (C) 2014 Yukihiro Matsumoto
9
10**********************************************************************/
11
12#include "gc.h"
13#include "internal.h"
14#include "internal/error.h"
15#include "internal/gc.h"
16#include "internal/hash.h"
17#include "internal/object.h"
18#include "internal/symbol.h"
19#include "internal/vm.h"
20#include "probes.h"
21#include "ruby/encoding.h"
22#include "ruby/st.h"
23#include "symbol.h"
24#include "vm_sync.h"
25#include "builtin.h"
26
27#ifndef USE_SYMBOL_GC
28# define USE_SYMBOL_GC 1
29#endif
30#ifndef SYMBOL_DEBUG
31# define SYMBOL_DEBUG 0
32#endif
33#ifndef CHECK_ID_SERIAL
34# define CHECK_ID_SERIAL SYMBOL_DEBUG
35#endif
36
37#define SYMBOL_PINNED_P(sym) (RSYMBOL(sym)->id&~ID_SCOPE_MASK)
38
39#define STATIC_SYM2ID(sym) RSHIFT((VALUE)(sym), RUBY_SPECIAL_SHIFT)
40
41static ID register_static_symid(ID, const char *, long, rb_encoding *);
42static ID register_static_symid_str(ID, VALUE);
43#define REGISTER_SYMID(id, name) register_static_symid((id), (name), strlen(name), enc)
44#include "id.c"
45
46#define is_identchar(p,e,enc) (ISALNUM((unsigned char)*(p)) || (*(p)) == '_' || !ISASCII(*(p)))
47
48#define op_tbl_count numberof(op_tbl)
49STATIC_ASSERT(op_tbl_name_size, sizeof(op_tbl[0].name) == 3);
50#define op_tbl_len(i) (!op_tbl[i].name[1] ? 1 : !op_tbl[i].name[2] ? 2 : 3)
51
52static void
53Init_op_tbl(void)
54{
55 int i;
56 rb_encoding *const enc = rb_usascii_encoding();
57
58 for (i = '!'; i <= '~'; ++i) {
59 if (!ISALNUM(i) && i != '_') {
60 char c = (char)i;
61 register_static_symid(i, &c, 1, enc);
62 }
63 }
64 for (i = 0; i < op_tbl_count; ++i) {
65 register_static_symid(op_tbl[i].token, op_tbl[i].name, op_tbl_len(i), enc);
66 }
67}
68
69static const int ID_ENTRY_UNIT = 512;
70
71enum id_entry_type {
72 ID_ENTRY_STR,
73 ID_ENTRY_SYM,
74 ID_ENTRY_SIZE
75};
76
77rb_symbols_t ruby_global_symbols = {tNEXT_ID-1};
78
79static const struct st_hash_type symhash = {
82};
83
84void
85Init_sym(void)
86{
87 rb_symbols_t *symbols = &ruby_global_symbols;
88
89 VALUE dsym_fstrs = rb_ident_hash_new();
90 symbols->dsymbol_fstr_hash = dsym_fstrs;
91 rb_gc_register_mark_object(dsym_fstrs);
92 rb_obj_hide(dsym_fstrs);
93
94 symbols->str_sym = st_init_table_with_size(&symhash, 1000);
95 symbols->ids = rb_ary_hidden_new(0);
96 rb_gc_register_mark_object(symbols->ids);
97
98 Init_op_tbl();
99 Init_id();
100}
101
102WARN_UNUSED_RESULT(static VALUE dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding *const enc, const ID type));
103WARN_UNUSED_RESULT(static VALUE dsymbol_check(rb_symbols_t *symbols, const VALUE sym));
104WARN_UNUSED_RESULT(static ID lookup_str_id(VALUE str));
105WARN_UNUSED_RESULT(static VALUE lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str));
106WARN_UNUSED_RESULT(static VALUE lookup_str_sym(const VALUE str));
107WARN_UNUSED_RESULT(static VALUE lookup_id_str(ID id));
108WARN_UNUSED_RESULT(static ID intern_str(VALUE str, int mutable));
109
110#define GLOBAL_SYMBOLS_ENTER(symbols) rb_symbols_t *symbols = &ruby_global_symbols; RB_VM_LOCK_ENTER()
111#define GLOBAL_SYMBOLS_LEAVE() RB_VM_LOCK_LEAVE()
112
113ID
115{
116 VALUE str, sym;
117 int scope;
118
119 if (!is_notop_id(id)) {
120 switch (id) {
121 case tAREF: case tASET:
122 return tASET; /* only exception */
123 }
124 rb_name_error(id, "cannot make operator ID :%"PRIsVALUE" attrset",
125 rb_id2str(id));
126 }
127 else {
128 scope = id_type(id);
129 switch (scope) {
130 case ID_LOCAL: case ID_INSTANCE: case ID_GLOBAL:
131 case ID_CONST: case ID_CLASS: case ID_JUNK:
132 break;
133 case ID_ATTRSET:
134 return id;
135 default:
136 {
137 if ((str = lookup_id_str(id)) != 0) {
138 rb_name_error(id, "cannot make unknown type ID %d:%"PRIsVALUE" attrset",
139 scope, str);
140 }
141 else {
142 rb_name_error_str(Qnil, "cannot make unknown type anonymous ID %d:%"PRIxVALUE" attrset",
143 scope, (VALUE)id);
144 }
145 }
146 }
147 }
148
149 /* make new symbol and ID */
150 if (!(str = lookup_id_str(id))) {
151 static const char id_types[][8] = {
152 "local",
153 "instance",
154 "invalid",
155 "global",
156 "attrset",
157 "const",
158 "class",
159 "junk",
160 };
161 rb_name_error(id, "cannot make anonymous %.*s ID %"PRIxVALUE" attrset",
162 (int)sizeof(id_types[0]), id_types[scope], (VALUE)id);
163 }
164 str = rb_str_dup(str);
165 rb_str_cat(str, "=", 1);
166 sym = lookup_str_sym(str);
167 id = sym ? rb_sym2id(sym) : intern_str(str, 1);
168 return id;
169}
170
171static int
172is_special_global_name(const char *m, const char *e, rb_encoding *enc)
173{
174 int mb = 0;
175
176 if (m >= e) return 0;
177 if (is_global_name_punct(*m)) {
178 ++m;
179 }
180 else if (*m == '-') {
181 if (++m >= e) return 0;
182 if (is_identchar(m, e, enc)) {
183 if (!ISASCII(*m)) mb = 1;
184 m += rb_enc_mbclen(m, e, enc);
185 }
186 }
187 else {
188 if (!ISDIGIT(*m)) return 0;
189 do {
190 if (!ISASCII(*m)) mb = 1;
191 ++m;
192 } while (m < e && ISDIGIT(*m));
193 }
194 return m == e ? mb + 1 : 0;
195}
196
197int
198rb_symname_p(const char *name)
199{
200 return rb_enc_symname_p(name, rb_ascii8bit_encoding());
201}
202
203int
204rb_enc_symname_p(const char *name, rb_encoding *enc)
205{
206 return rb_enc_symname2_p(name, strlen(name), enc);
207}
208
209static int
210rb_sym_constant_char_p(const char *name, long nlen, rb_encoding *enc)
211{
212 int c, len;
213 const char *end = name + nlen;
214
215 if (nlen < 1) return FALSE;
216 if (ISASCII(*name)) return ISUPPER(*name);
217 c = rb_enc_precise_mbclen(name, end, enc);
218 if (!MBCLEN_CHARFOUND_P(c)) return FALSE;
219 len = MBCLEN_CHARFOUND_LEN(c);
220 c = rb_enc_mbc_to_codepoint(name, end, enc);
221 if (ONIGENC_IS_UNICODE(enc)) {
222 static int ctype_titlecase = 0;
223 if (rb_enc_isupper(c, enc)) return TRUE;
224 if (rb_enc_islower(c, enc)) return FALSE;
225 if (!ctype_titlecase) {
226 static const UChar cname[] = "titlecaseletter";
227 static const UChar *const end = cname + sizeof(cname) - 1;
228 ctype_titlecase = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, cname, end);
229 }
230 if (rb_enc_isctype(c, ctype_titlecase, enc)) return TRUE;
231 }
232 else {
233 /* fallback to case-folding */
234 OnigUChar fold[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
235 const OnigUChar *beg = (const OnigUChar *)name;
236 int r = enc->mbc_case_fold(ONIGENC_CASE_FOLD,
237 &beg, (const OnigUChar *)end,
238 fold, enc);
239 if (r > 0 && (r != len || memcmp(fold, name, r)))
240 return TRUE;
241 }
242 return FALSE;
243}
244
245#define IDSET_ATTRSET_FOR_SYNTAX ((1U<<ID_LOCAL)|(1U<<ID_CONST))
246#define IDSET_ATTRSET_FOR_INTERN (~(~0U<<(1<<ID_SCOPE_SHIFT)) & ~(1U<<ID_ATTRSET))
247
249 const enum { invalid, stophere, needmore, } kind;
250 const enum ruby_id_types type;
251 const long nread;
252};
253
254#define t struct enc_synmane_type_leading_chars_tag
255
257enc_synmane_type_leading_chars(const char *name, long len, rb_encoding *enc, int allowed_attrset)
258{
259 const char *m = name;
260 const char *e = m + len;
261
262 if (! rb_enc_asciicompat(enc)) {
263 return (t) { invalid, 0, 0, };
264 }
265 else if (! m) {
266 return (t) { invalid, 0, 0, };
267 }
268 else if ( len <= 0 ) {
269 return (t) { invalid, 0, 0, };
270 }
271 switch (*m) {
272 case '\0':
273 return (t) { invalid, 0, 0, };
274
275 case '$':
276 if (is_special_global_name(++m, e, enc)) {
277 return (t) { stophere, ID_GLOBAL, len, };
278 }
279 else {
280 return (t) { needmore, ID_GLOBAL, 1, };
281 }
282
283 case '@':
284 switch (*++m) {
285 default: return (t) { needmore, ID_INSTANCE, 1, };
286 case '@': return (t) { needmore, ID_CLASS, 2, };
287 }
288
289 case '<':
290 switch (*++m) {
291 default: return (t) { stophere, ID_JUNK, 1, };
292 case '<': return (t) { stophere, ID_JUNK, 2, };
293 case '=':
294 switch (*++m) {
295 default: return (t) { stophere, ID_JUNK, 2, };
296 case '>': return (t) { stophere, ID_JUNK, 3, };
297 }
298 }
299
300 case '>':
301 switch (*++m) {
302 default: return (t) { stophere, ID_JUNK, 1, };
303 case '>': case '=': return (t) { stophere, ID_JUNK, 2, };
304 }
305
306 case '=':
307 switch (*++m) {
308 default: return (t) { invalid, 0, 1, };
309 case '~': return (t) { stophere, ID_JUNK, 2, };
310 case '=':
311 switch (*++m) {
312 default: return (t) { stophere, ID_JUNK, 2, };
313 case '=': return (t) { stophere, ID_JUNK, 3, };
314 }
315 }
316
317 case '*':
318 switch (*++m) {
319 default: return (t) { stophere, ID_JUNK, 1, };
320 case '*': return (t) { stophere, ID_JUNK, 2, };
321 }
322
323 case '+': case '-':
324 switch (*++m) {
325 default: return (t) { stophere, ID_JUNK, 1, };
326 case '@': return (t) { stophere, ID_JUNK, 2, };
327 }
328
329 case '|': case '^': case '&': case '/': case '%': case '~': case '`':
330 return (t) { stophere, ID_JUNK, 1, };
331
332 case '[':
333 switch (*++m) {
334 default: return (t) { needmore, ID_JUNK, 0, };
335 case ']':
336 switch (*++m) {
337 default: return (t) { stophere, ID_JUNK, 2, };
338 case '=': return (t) { stophere, ID_JUNK, 3, };
339 }
340 }
341
342 case '!':
343 switch (*++m) {
344 case '=': case '~': return (t) { stophere, ID_JUNK, 2, };
345 default:
346 if (allowed_attrset & (1U << ID_JUNK)) {
347 return (t) { needmore, ID_JUNK, 1, };
348 }
349 else {
350 return (t) { stophere, ID_JUNK, 1, };
351 }
352 }
353
354 default:
355 if (rb_sym_constant_char_p(name, len, enc)) {
356 return (t) { needmore, ID_CONST, 0, };
357 }
358 else {
359 return (t) { needmore, ID_LOCAL, 0, };
360 }
361 }
362}
363#undef t
364
365int
366rb_enc_symname_type(const char *name, long len, rb_encoding *enc, unsigned int allowed_attrset)
367{
369 enc_synmane_type_leading_chars(name, len, enc, allowed_attrset);
370 const char *m = name + f.nread;
371 const char *e = name + len;
372 int type = (int)f.type;
373
374 switch (f.kind) {
375 case invalid: return -1;
376 case stophere: break;
377 case needmore:
378
379 if (m >= e || (*m != '_' && !ISALPHA(*m) && ISASCII(*m))) {
380 if (len > 1 && *(e-1) == '=') {
381 type = rb_enc_symname_type(name, len-1, enc, allowed_attrset);
382 if (allowed_attrset & (1U << type)) return ID_ATTRSET;
383 }
384 return -1;
385 }
386 while (m < e && is_identchar(m, e, enc)) m += rb_enc_mbclen(m, e, enc);
387 if (m >= e) break;
388 switch (*m) {
389 case '!': case '?':
390 if (type == ID_GLOBAL || type == ID_CLASS || type == ID_INSTANCE) return -1;
391 type = ID_JUNK;
392 ++m;
393 if (m + 1 < e || *m != '=') break;
394 /* fall through */
395 case '=':
396 if (!(allowed_attrset & (1U << type))) return -1;
397 type = ID_ATTRSET;
398 ++m;
399 break;
400 }
401 }
402
403 return m == e ? type : -1;
404}
405
406int
407rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
408{
409 return rb_enc_symname_type(name, len, enc, IDSET_ATTRSET_FOR_SYNTAX) != -1;
410}
411
412static int
413rb_str_symname_type(VALUE name, unsigned int allowed_attrset)
414{
415 const char *ptr = StringValuePtr(name);
416 long len = RSTRING_LEN(name);
417 int type = rb_enc_symname_type(ptr, len, rb_enc_get(name), allowed_attrset);
418 RB_GC_GUARD(name);
419 return type;
420}
421
422static void
423set_id_entry(rb_symbols_t *symbols, rb_id_serial_t num, VALUE str, VALUE sym)
424{
425 ASSERT_vm_locking();
426 size_t idx = num / ID_ENTRY_UNIT;
427
428 VALUE ary, ids = symbols->ids;
429 if (idx >= (size_t)RARRAY_LEN(ids) || NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
430 ary = rb_ary_hidden_new(ID_ENTRY_UNIT * ID_ENTRY_SIZE);
431 rb_ary_store(ids, (long)idx, ary);
432 }
433 idx = (num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
434 rb_ary_store(ary, (long)idx + ID_ENTRY_STR, str);
435 rb_ary_store(ary, (long)idx + ID_ENTRY_SYM, sym);
436}
437
438static VALUE
439get_id_serial_entry(rb_id_serial_t num, ID id, const enum id_entry_type t)
440{
441 VALUE result = 0;
442
443 GLOBAL_SYMBOLS_ENTER(symbols);
444 {
445 if (num && num <= symbols->last_id) {
446 size_t idx = num / ID_ENTRY_UNIT;
447 VALUE ids = symbols->ids;
448 VALUE ary;
449 if (idx < (size_t)RARRAY_LEN(ids) && !NIL_P(ary = rb_ary_entry(ids, (long)idx))) {
450 long pos = (long)(num % ID_ENTRY_UNIT) * ID_ENTRY_SIZE;
451 result = rb_ary_entry(ary, pos + t);
452
453 if (NIL_P(result)) {
454 result = 0;
455 }
456 else {
457#if CHECK_ID_SERIAL
458 if (id) {
459 VALUE sym = result;
460 if (t != ID_ENTRY_SYM)
461 sym = rb_ary_entry(ary, pos + ID_ENTRY_SYM);
462 if (STATIC_SYM_P(sym)) {
463 if (STATIC_SYM2ID(sym) != id) result = 0;
464 }
465 else {
466 if (RSYMBOL(sym)->id != id) result = 0;
467 }
468 }
469#endif
470 }
471 }
472 }
473 }
474 GLOBAL_SYMBOLS_LEAVE();
475
476 return result;
477}
478
479static VALUE
480get_id_entry(ID id, const enum id_entry_type t)
481{
482 return get_id_serial_entry(rb_id_to_serial(id), id, t);
483}
484
485int
486rb_static_id_valid_p(ID id)
487{
488 return STATIC_ID2SYM(id) == get_id_entry(id, ID_ENTRY_SYM);
489}
490
491static inline ID
492rb_id_serial_to_id(rb_id_serial_t num)
493{
494 if (is_notop_id((ID)num)) {
495 VALUE sym = get_id_serial_entry(num, 0, ID_ENTRY_SYM);
496 if (sym) return SYM2ID(sym);
497 return ((ID)num << ID_SCOPE_SHIFT) | ID_INTERNAL | ID_STATIC_SYM;
498 }
499 else {
500 return (ID)num;
501 }
502}
503
504#if SYMBOL_DEBUG
505static int
506register_sym_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
507{
508 if (existing) {
509 rb_fatal("symbol :% "PRIsVALUE" is already registered with %"PRIxVALUE,
510 (VALUE)*key, (VALUE)*value);
511 }
512 *value = arg;
513 return ST_CONTINUE;
514}
515#endif
516
517static void
518register_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
519{
520 ASSERT_vm_locking();
521
522#if SYMBOL_DEBUG
523 st_update(symbols->str_sym, (st_data_t)str,
524 register_sym_update_callback, (st_data_t)sym);
525#else
526 st_add_direct(symbols->str_sym, (st_data_t)str, (st_data_t)sym);
527#endif
528}
529
530static void
531unregister_sym(rb_symbols_t *symbols, VALUE str, VALUE sym)
532{
533 ASSERT_vm_locking();
534
535 st_data_t str_data = (st_data_t)str;
536 if (!st_delete(symbols->str_sym, &str_data, NULL)) {
537 rb_bug("%p can't remove str from str_id (%s)", (void *)sym, RSTRING_PTR(str));
538 }
539}
540
541static ID
542register_static_symid(ID id, const char *name, long len, rb_encoding *enc)
543{
544 VALUE str = rb_enc_str_new(name, len, enc);
545 return register_static_symid_str(id, str);
546}
547
548static ID
549register_static_symid_str(ID id, VALUE str)
550{
551 rb_id_serial_t num = rb_id_to_serial(id);
552 VALUE sym = STATIC_ID2SYM(id);
553
554 OBJ_FREEZE(str);
555 str = rb_fstring(str);
556
557 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(str));
558
559 GLOBAL_SYMBOLS_ENTER(symbols)
560 {
561 register_sym(symbols, str, sym);
562 set_id_entry(symbols, num, str, sym);
563 }
564 GLOBAL_SYMBOLS_LEAVE();
565
566 return id;
567}
568
569static int
570sym_check_asciionly(VALUE str)
571{
572 if (!rb_enc_asciicompat(rb_enc_get(str))) return FALSE;
573 switch (rb_enc_str_coderange(str)) {
575 rb_raise(rb_eEncodingError, "invalid symbol in encoding %s :%+"PRIsVALUE,
576 rb_enc_name(rb_enc_get(str)), str);
578 return TRUE;
579 }
580 return FALSE;
581}
582
583#if 0
584/*
585 * _str_ itself will be registered at the global symbol table. _str_
586 * can be modified before the registration, since the encoding will be
587 * set to ASCII-8BIT if it is a special global name.
588 */
589
590static inline void
591must_be_dynamic_symbol(VALUE x)
592{
593 if (UNLIKELY(!DYNAMIC_SYM_P(x))) {
594 if (STATIC_SYM_P(x)) {
595 VALUE str = lookup_id_str(RSHIFT((unsigned long)(x),RUBY_SPECIAL_SHIFT));
596
597 if (str) {
598 rb_bug("wrong argument: %s (inappropriate Symbol)", RSTRING_PTR(str));
599 }
600 else {
601 rb_bug("wrong argument: inappropriate Symbol (%p)", (void *)x);
602 }
603 }
604 else {
605 rb_bug("wrong argument type %s (expected Symbol)", rb_builtin_class_name(x));
606 }
607 }
608}
609#endif
610
611static VALUE
612dsymbol_alloc(rb_symbols_t *symbols, const VALUE klass, const VALUE str, rb_encoding * const enc, const ID type)
613{
614 ASSERT_vm_locking();
615
616 const VALUE dsym = rb_newobj_of(klass, T_SYMBOL | FL_WB_PROTECTED);
617 long hashval;
618
619 rb_enc_set_index(dsym, rb_enc_to_index(enc));
620 OBJ_FREEZE(dsym);
621 RB_OBJ_WRITE(dsym, &RSYMBOL(dsym)->fstr, str);
622 RSYMBOL(dsym)->id = type;
623
624 /* we want hashval to be in Fixnum range [ruby-core:15713] r15672 */
625 hashval = (long)rb_str_hash(str);
626 RSYMBOL(dsym)->hashval = RSHIFT((long)hashval, 1);
627 register_sym(symbols, str, dsym);
628 rb_hash_aset(symbols->dsymbol_fstr_hash, str, Qtrue);
629 RUBY_DTRACE_CREATE_HOOK(SYMBOL, RSTRING_PTR(RSYMBOL(dsym)->fstr));
630
631 return dsym;
632}
633
634static inline VALUE
635dsymbol_check(rb_symbols_t *symbols, const VALUE sym)
636{
637 ASSERT_vm_locking();
638
639 if (UNLIKELY(rb_objspace_garbage_object_p(sym))) {
640 const VALUE fstr = RSYMBOL(sym)->fstr;
641 const ID type = RSYMBOL(sym)->id & ID_SCOPE_MASK;
642 RSYMBOL(sym)->fstr = 0;
643 unregister_sym(symbols, fstr, sym);
644 return dsymbol_alloc(symbols, rb_cSymbol, fstr, rb_enc_get(fstr), type);
645 }
646 else {
647 return sym;
648 }
649}
650
651static ID
652lookup_str_id(VALUE str)
653{
654 st_data_t sym_data;
655 int found;
656
657 GLOBAL_SYMBOLS_ENTER(symbols);
658 {
659 found = st_lookup(symbols->str_sym, (st_data_t)str, &sym_data);
660 }
661 GLOBAL_SYMBOLS_LEAVE();
662
663 if (found) {
664 const VALUE sym = (VALUE)sym_data;
665
666 if (STATIC_SYM_P(sym)) {
667 return STATIC_SYM2ID(sym);
668 }
669 else if (DYNAMIC_SYM_P(sym)) {
670 ID id = RSYMBOL(sym)->id;
671 if (id & ~ID_SCOPE_MASK) return id;
672 }
673 else {
674 rb_bug("non-symbol object %s:%"PRIxVALUE" for %"PRIsVALUE" in symbol table",
675 rb_builtin_class_name(sym), sym, str);
676 }
677 }
678 return (ID)0;
679}
680
681static VALUE
682lookup_str_sym_with_lock(rb_symbols_t *symbols, const VALUE str)
683{
684 st_data_t sym_data;
685 if (st_lookup(symbols->str_sym, (st_data_t)str, &sym_data)) {
686 VALUE sym = (VALUE)sym_data;
687 if (DYNAMIC_SYM_P(sym)) {
688 sym = dsymbol_check(symbols, sym);
689 }
690 return sym;
691 }
692 else {
693 return Qfalse;
694 }
695}
696
697static VALUE
698lookup_str_sym(const VALUE str)
699{
700 VALUE sym;
701
702 GLOBAL_SYMBOLS_ENTER(symbols);
703 {
704 sym = lookup_str_sym_with_lock(symbols, str);
705 }
706 GLOBAL_SYMBOLS_LEAVE();
707
708 return sym;
709}
710
711static VALUE
712lookup_id_str(ID id)
713{
714 return get_id_entry(id, ID_ENTRY_STR);
715}
716
717ID
718rb_intern3(const char *name, long len, rb_encoding *enc)
719{
720 VALUE sym;
721 struct RString fake_str;
722 VALUE str = rb_setup_fake_str(&fake_str, name, len, enc);
723 OBJ_FREEZE(str);
724 sym = lookup_str_sym(str);
725 if (sym) return rb_sym2id(sym);
726 str = rb_enc_str_new(name, len, enc); /* make true string */
727 return intern_str(str, 1);
728}
729
730static ID
731next_id_base_with_lock(rb_symbols_t *symbols)
732{
733 ID id;
734 rb_id_serial_t next_serial = symbols->last_id + 1;
735
736 if (next_serial == 0) {
737 id = (ID)-1;
738 }
739 else {
740 const size_t num = ++symbols->last_id;
741 id = num << ID_SCOPE_SHIFT;
742 }
743
744 return id;
745}
746
747static ID
748next_id_base(void)
749{
750 ID id;
751 GLOBAL_SYMBOLS_ENTER(symbols);
752 {
753 id = next_id_base_with_lock(symbols);
754 }
755 GLOBAL_SYMBOLS_LEAVE();
756 return id;
757}
758
759static ID
760intern_str(VALUE str, int mutable)
761{
762 ID id;
763 ID nid;
764
765 id = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
766 if (id == (ID)-1) id = ID_JUNK;
767 if (sym_check_asciionly(str)) {
768 if (!mutable) str = rb_str_dup(str);
769 rb_enc_associate(str, rb_usascii_encoding());
770 }
771 if ((nid = next_id_base()) == (ID)-1) {
772 str = rb_str_ellipsize(str, 20);
773 rb_raise(rb_eRuntimeError, "symbol table overflow (symbol %"PRIsVALUE")",
774 str);
775 }
776 id |= nid;
777 id |= ID_STATIC_SYM;
778 return register_static_symid_str(id, str);
779}
780
781ID
782rb_intern2(const char *name, long len)
783{
784 return rb_intern3(name, len, rb_usascii_encoding());
785}
786
787#undef rb_intern
788ID
789rb_intern(const char *name)
790{
791 return rb_intern2(name, strlen(name));
792}
793
794ID
796{
797 VALUE sym = lookup_str_sym(str);
798
799 if (sym) {
800 return SYM2ID(sym);
801 }
802
803 return intern_str(str, 0);
804}
805
806void
807rb_gc_free_dsymbol(VALUE sym)
808{
809 VALUE str = RSYMBOL(sym)->fstr;
810
811 if (str) {
812 RSYMBOL(sym)->fstr = 0;
813
814 GLOBAL_SYMBOLS_ENTER(symbols);
815 {
816 unregister_sym(symbols, str, sym);
817 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, str);
818 }
819 GLOBAL_SYMBOLS_LEAVE();
820 }
821}
822
823/*
824 * call-seq:
825 * str.intern -> symbol
826 * str.to_sym -> symbol
827 *
828 * Returns the Symbol corresponding to <i>str</i>, creating the
829 * symbol if it did not previously exist. See Symbol#id2name.
830 *
831 * "Koala".intern #=> :Koala
832 * s = 'cat'.to_sym #=> :cat
833 * s == :cat #=> true
834 * s = '@cat'.to_sym #=> :@cat
835 * s == :@cat #=> true
836 *
837 * This can also be used to create symbols that cannot be represented using the
838 * <code>:xxx</code> notation.
839 *
840 * 'cat and dog'.to_sym #=> :"cat and dog"
841 */
842
843VALUE
845{
846 VALUE sym;
847#if USE_SYMBOL_GC
848 rb_encoding *enc, *ascii;
849 int type;
850#else
851 ID id;
852#endif
853 GLOBAL_SYMBOLS_ENTER(symbols);
854 {
855 sym = lookup_str_sym_with_lock(symbols, str);
856
857 if (sym) {
858 // ok
859 }
860 else {
861#if USE_SYMBOL_GC
862 enc = rb_enc_get(str);
863 ascii = rb_usascii_encoding();
864 if (enc != ascii && sym_check_asciionly(str)) {
865 str = rb_str_dup(str);
866 rb_enc_associate(str, ascii);
867 OBJ_FREEZE(str);
868 enc = ascii;
869 }
870 else {
871 str = rb_str_dup(str);
872 OBJ_FREEZE(str);
873 }
874 str = rb_fstring(str);
875 type = rb_str_symname_type(str, IDSET_ATTRSET_FOR_INTERN);
876 if (type < 0) type = ID_JUNK;
877 sym = dsymbol_alloc(symbols, rb_cSymbol, str, enc, type);
878#else
879 id = intern_str(str, 0);
880 sym = ID2SYM(id);
881#endif
882 }
883 }
884 GLOBAL_SYMBOLS_LEAVE();
885 return sym;
886}
887
888ID
890{
891 ID id;
892 if (STATIC_SYM_P(sym)) {
893 id = STATIC_SYM2ID(sym);
894 }
895 else if (DYNAMIC_SYM_P(sym)) {
896 GLOBAL_SYMBOLS_ENTER(symbols);
897 {
898 sym = dsymbol_check(symbols, sym);
899 id = RSYMBOL(sym)->id;
900
901 if (UNLIKELY(!(id & ~ID_SCOPE_MASK))) {
902 VALUE fstr = RSYMBOL(sym)->fstr;
903 ID num = next_id_base_with_lock(symbols);
904
905 RSYMBOL(sym)->id = id |= num;
906 /* make it permanent object */
907
908 set_id_entry(symbols, rb_id_to_serial(num), fstr, sym);
909 rb_hash_delete_entry(symbols->dsymbol_fstr_hash, fstr);
910 }
911 }
912 GLOBAL_SYMBOLS_LEAVE();
913 }
914 else {
915 rb_raise(rb_eTypeError, "wrong argument type %s (expected Symbol)",
916 rb_builtin_class_name(sym));
917 }
918 return id;
919}
920
921#undef rb_id2sym
922VALUE
924{
925 if (!DYNAMIC_ID_P(x)) return STATIC_ID2SYM(x);
926 return get_id_entry(x, ID_ENTRY_SYM);
927}
928
929/*
930 * call-seq:
931 * name -> string
932 *
933 * Returns a frozen string representation of +self+ (not including the leading colon):
934 *
935 * :foo.name # => "foo"
936 * :foo.name.frozen? # => true
937 *
938 * Related: Symbol#to_s, Symbol#inspect.
939 */
940
941VALUE
943{
944 if (DYNAMIC_SYM_P(sym)) {
945 return RSYMBOL(sym)->fstr;
946 }
947 else {
948 return rb_id2str(STATIC_SYM2ID(sym));
949 }
950}
951
952VALUE
953rb_id2str(ID id)
954{
955 return lookup_id_str(id);
956}
957
958const char *
960{
961 VALUE str = rb_id2str(id);
962
963 if (!str) return 0;
964 return RSTRING_PTR(str);
965}
966
967ID
968rb_make_internal_id(void)
969{
970 return next_id_base() | ID_INTERNAL | ID_STATIC_SYM;
971}
972
973ID
974rb_make_temporary_id(size_t n)
975{
976 const ID max_id = RB_ID_SERIAL_MAX & ~0xffff;
977 const ID id = max_id - (ID)n;
978 if (id <= ruby_global_symbols.last_id) {
979 rb_raise(rb_eRuntimeError, "too big to make temporary ID: %" PRIdSIZE, n);
980 }
981 return (id << ID_SCOPE_SHIFT) | ID_STATIC_SYM | ID_INTERNAL;
982}
983
984static int
985symbols_i(st_data_t key, st_data_t value, st_data_t arg)
986{
987 VALUE ary = (VALUE)arg;
988 VALUE sym = (VALUE)value;
989
990 if (STATIC_SYM_P(sym)) {
991 rb_ary_push(ary, sym);
992 return ST_CONTINUE;
993 }
994 else if (!DYNAMIC_SYM_P(sym)) {
995 rb_bug("invalid symbol: %s", RSTRING_PTR((VALUE)key));
996 }
997 else if (!SYMBOL_PINNED_P(sym) && rb_objspace_garbage_object_p(sym)) {
998 RSYMBOL(sym)->fstr = 0;
999 return ST_DELETE;
1000 }
1001 else {
1002 rb_ary_push(ary, sym);
1003 return ST_CONTINUE;
1004 }
1005
1006}
1007
1008VALUE
1010{
1011 VALUE ary;
1012
1013 GLOBAL_SYMBOLS_ENTER(symbols);
1014 {
1015 ary = rb_ary_new2(symbols->str_sym->num_entries);
1016 st_foreach(symbols->str_sym, symbols_i, ary);
1017 }
1018 GLOBAL_SYMBOLS_LEAVE();
1019
1020 return ary;
1021}
1022
1023size_t
1024rb_sym_immortal_count(void)
1025{
1026 return (size_t)ruby_global_symbols.last_id;
1027}
1028
1029int
1031{
1032 return is_const_id(id);
1033}
1034
1035int
1037{
1038 return is_class_id(id);
1039}
1040
1041int
1043{
1044 return is_global_id(id);
1045}
1046
1047int
1049{
1050 return is_instance_id(id);
1051}
1052
1053int
1055{
1056 return is_attrset_id(id);
1057}
1058
1059int
1061{
1062 return is_local_id(id);
1063}
1064
1065int
1067{
1068 return is_junk_id(id);
1069}
1070
1071int
1072rb_is_const_sym(VALUE sym)
1073{
1074 return is_const_sym(sym);
1075}
1076
1077int
1078rb_is_attrset_sym(VALUE sym)
1079{
1080 return is_attrset_sym(sym);
1081}
1082
1083ID
1084rb_check_id(volatile VALUE *namep)
1085{
1086 VALUE tmp;
1087 VALUE name = *namep;
1088
1089 if (STATIC_SYM_P(name)) {
1090 return STATIC_SYM2ID(name);
1091 }
1092 else if (DYNAMIC_SYM_P(name)) {
1093 if (SYMBOL_PINNED_P(name)) {
1094 return RSYMBOL(name)->id;
1095 }
1096 else {
1097 *namep = RSYMBOL(name)->fstr;
1098 return 0;
1099 }
1100 }
1101 else if (!RB_TYPE_P(name, T_STRING)) {
1102 tmp = rb_check_string_type(name);
1103 if (NIL_P(tmp)) {
1104 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1105 name);
1106 }
1107 name = tmp;
1108 *namep = name;
1109 }
1110
1111 sym_check_asciionly(name);
1112
1113 return lookup_str_id(name);
1114}
1115
1116// Used by yjit for handling .send without throwing exceptions
1117ID
1118rb_get_symbol_id(VALUE name)
1119{
1120 if (STATIC_SYM_P(name)) {
1121 return STATIC_SYM2ID(name);
1122 }
1123 else if (DYNAMIC_SYM_P(name)) {
1124 if (SYMBOL_PINNED_P(name)) {
1125 return RSYMBOL(name)->id;
1126 }
1127 else {
1128 return 0;
1129 }
1130 }
1131 else {
1132 RUBY_ASSERT_ALWAYS(RB_TYPE_P(name, T_STRING));
1133 return lookup_str_id(name);
1134 }
1135}
1136
1137
1138VALUE
1139rb_check_symbol(volatile VALUE *namep)
1140{
1141 VALUE sym;
1142 VALUE tmp;
1143 VALUE name = *namep;
1144
1145 if (STATIC_SYM_P(name)) {
1146 return name;
1147 }
1148 else if (DYNAMIC_SYM_P(name)) {
1149 if (!SYMBOL_PINNED_P(name)) {
1150 GLOBAL_SYMBOLS_ENTER(symbols);
1151 {
1152 name = dsymbol_check(symbols, name);
1153 }
1154 GLOBAL_SYMBOLS_LEAVE();
1155
1156 *namep = name;
1157 }
1158 return name;
1159 }
1160 else if (!RB_TYPE_P(name, T_STRING)) {
1161 tmp = rb_check_string_type(name);
1162 if (NIL_P(tmp)) {
1163 rb_raise(rb_eTypeError, "%+"PRIsVALUE" is not a symbol nor a string",
1164 name);
1165 }
1166 name = tmp;
1167 *namep = name;
1168 }
1169
1170 sym_check_asciionly(name);
1171
1172 if ((sym = lookup_str_sym(name)) != 0) {
1173 return sym;
1174 }
1175
1176 return Qnil;
1177}
1178
1179ID
1180rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
1181{
1182 struct RString fake_str;
1183 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1184
1185 sym_check_asciionly(name);
1186
1187 return lookup_str_id(name);
1188}
1189
1190VALUE
1191rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
1192{
1193 VALUE sym;
1194 struct RString fake_str;
1195 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1196
1197 sym_check_asciionly(name);
1198
1199 if ((sym = lookup_str_sym(name)) != 0) {
1200 return sym;
1201 }
1202
1203 return Qnil;
1204}
1205
1206#undef rb_sym_intern_ascii_cstr
1207#ifdef __clang__
1208NOINLINE(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1209#else
1210FUNC_MINIMIZED(VALUE rb_sym_intern(const char *ptr, long len, rb_encoding *enc));
1211FUNC_MINIMIZED(VALUE rb_sym_intern_ascii(const char *ptr, long len));
1212FUNC_MINIMIZED(VALUE rb_sym_intern_ascii_cstr(const char *ptr));
1213#endif
1214
1215VALUE
1216rb_sym_intern(const char *ptr, long len, rb_encoding *enc)
1217{
1218 struct RString fake_str;
1219 const VALUE name = rb_setup_fake_str(&fake_str, ptr, len, enc);
1220 return rb_str_intern(name);
1221}
1222
1223VALUE
1224rb_sym_intern_ascii(const char *ptr, long len)
1225{
1226 return rb_sym_intern(ptr, len, rb_usascii_encoding());
1227}
1228
1229VALUE
1230rb_sym_intern_ascii_cstr(const char *ptr)
1231{
1232 return rb_sym_intern_ascii(ptr, strlen(ptr));
1233}
1234
1235VALUE
1236rb_to_symbol_type(VALUE obj)
1237{
1238 return rb_convert_type_with_id(obj, T_SYMBOL, "Symbol", idTo_sym);
1239}
1240
1241int
1242rb_is_const_name(VALUE name)
1243{
1244 return rb_str_symname_type(name, 0) == ID_CONST;
1245}
1246
1247int
1248rb_is_class_name(VALUE name)
1249{
1250 return rb_str_symname_type(name, 0) == ID_CLASS;
1251}
1252
1253int
1254rb_is_instance_name(VALUE name)
1255{
1256 return rb_str_symname_type(name, 0) == ID_INSTANCE;
1257}
1258
1259int
1260rb_is_local_name(VALUE name)
1261{
1262 return rb_str_symname_type(name, 0) == ID_LOCAL;
1263}
1264
1265#include "id_table.c"
1266#include "symbol.rbinc"
#define RUBY_ASSERT_ALWAYS(expr)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
Definition assert.h:167
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
Definition coderange.h:180
#define T_STRING
Old name of RUBY_T_STRING.
Definition value_type.h:78
#define ISUPPER
Old name of rb_isupper.
Definition ctype.h:89
#define ID2SYM
Old name of RB_ID2SYM.
Definition symbol.h:44
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
Definition fl_type.h:143
#define SYM2ID
Old name of RB_SYM2ID.
Definition symbol.h:45
#define ISDIGIT
Old name of rb_isdigit.
Definition ctype.h:93
#define STATIC_SYM_P
Old name of RB_STATIC_SYM_P.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
Definition encoding.h:533
#define ISALPHA
Old name of rb_isalpha.
Definition ctype.h:92
#define ISASCII
Old name of rb_isascii.
Definition ctype.h:85
#define Qtrue
Old name of RUBY_Qtrue.
#define DYNAMIC_SYM_P
Old name of RB_DYNAMIC_SYM_P.
Definition value_type.h:86
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
Definition coderange.h:182
#define NIL_P
Old name of RB_NIL_P.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
Definition encoding.h:532
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
Definition fl_type.h:59
#define T_SYMBOL
Old name of RUBY_T_SYMBOL.
Definition value_type.h:80
#define rb_ary_new2
Old name of rb_ary_new_capa.
Definition array.h:651
#define ISALNUM
Old name of rb_isalnum.
Definition ctype.h:91
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition error.c:3148
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition error.c:794
void rb_name_error(ID id, const char *fmt,...)
Raises an instance of rb_eNameError.
Definition error.c:1784
VALUE rb_eTypeError
TypeError exception.
Definition error.c:1091
void rb_name_error_str(VALUE str, const char *fmt,...)
Identical to rb_name_error(), except it takes a VALUE instead of ID.
Definition error.c:1799
void rb_fatal(const char *fmt,...)
Raises the unsung "fatal" exception.
Definition error.c:3199
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1089
VALUE rb_eEncodingError
EncodingError exception.
Definition error.c:1097
VALUE rb_obj_hide(VALUE obj)
Make the object invisible from Ruby code.
Definition object.c:84
VALUE rb_cSymbol
Sumbol class.
Definition string.c:80
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
Definition rgengc.h:220
Encoding relates APIs.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
Definition string.c:821
int rb_enc_symname_p(const char *str, rb_encoding *enc)
Identical to rb_symname_p(), except it additionally takes an encoding.
Definition symbol.c:204
ID rb_intern3(const char *name, long len, rb_encoding *enc)
Identical to rb_intern2(), except it additionally takes an encoding.
Definition symbol.c:718
VALUE rb_check_symbol_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id_cstr(), except for the return type.
Definition symbol.c:1191
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
Definition symbol.c:407
ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
Identical to rb_check_id(), except it takes a pointer to a memory region instead of Ruby's string.
Definition symbol.c:1180
Defines RBIMPL_HAS_BUILTIN.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
Definition symbol.c:1009
int rb_is_global_id(ID id)
Classifies the given ID, then sees if it is a global variable.
Definition symbol.c:1042
int rb_is_instance_id(ID id)
Classifies the given ID, then sees if it is an instance variable.
Definition symbol.c:1048
int rb_is_const_id(ID id)
Classifies the given ID, then sees if it is a constant.
Definition symbol.c:1030
int rb_is_junk_id(ID)
Classifies the given ID, then sees if it is a junk ID.
Definition symbol.c:1066
int rb_symname_p(const char *str)
Sees if the passed C string constructs a valid syntactic symbol.
Definition symbol.c:198
ID rb_id_attrset(ID id)
Calculates an ID of attribute writer.
Definition symbol.c:114
int rb_is_class_id(ID id)
Classifies the given ID, then sees if it is a class variable.
Definition symbol.c:1036
int rb_is_attrset_id(ID id)
Classifies the given ID, then sees if it is an attribute writer.
Definition symbol.c:1054
int rb_is_local_id(ID id)
Classifies the given ID, then sees if it is a local variable.
Definition symbol.c:1060
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
Definition string.c:3548
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
Definition string.c:10826
VALUE rb_str_dup(VALUE str)
Duplicates a string.
Definition string.c:1834
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
Definition string.c:3538
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
Definition string.c:3150
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
Definition string.c:2640
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
Definition symbol.c:844
ID rb_intern2(const char *name, long len)
Identical to rb_intern(), except it additionally takes the length of the string.
Definition symbol.c:782
VALUE rb_check_symbol(volatile VALUE *namep)
Identical to rb_check_id(), except it returns an instance of rb_cSymbol instead.
Definition symbol.c:1139
VALUE rb_id2sym(ID id)
Allocates an instance of rb_cSymbol that has the given id.
Definition symbol.c:923
ID rb_check_id(volatile VALUE *namep)
Detects if the given name is already interned or not.
Definition symbol.c:1084
VALUE rb_sym2str(VALUE id)
Identical to rb_id2str(), except it takes an instance of rb_cSymbol rather than an ID.
Definition symbol.c:942
ID rb_sym2id(VALUE obj)
Converts an instance of rb_cSymbol into an ID.
Definition symbol.c:889
const char * rb_id2name(ID id)
Retrieves the name mapped to the given id.
Definition symbol.c:959
ID rb_intern_str(VALUE str)
Identical to rb_intern(), except it takes an instance of rb_cString.
Definition symbol.c:795
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
Definition memory.h:161
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define RARRAY_LEN
Just another name of rb_array_len.
Definition rarray.h:68
#define StringValuePtr(v)
Identical to StringValue, except it returns a char*.
Definition rstring.h:82
@ RUBY_SPECIAL_SHIFT
Least significant 8 bits are reserved.
Ruby's String.
Definition rstring.h:231
char ary[RSTRING_EMBED_LEN_MAX+1]
When a string is short enough, it uses this area to store the contents themselves.
Definition rstring.h:298
long len
Length of the string, not including terminating NUL character.
Definition rstring.h:250
char * ptr
Pointer to the contents of the string.
Definition rstring.h:258
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
Definition value.h:52