[rubygems/rubygems] Use a constant empty tar header to avoid extra allocations
[ruby.git] / ruby_parser.c
blob1dcdfd8e79ecd84a6417e7e134a220b21b149e6d
1 /* This is a wrapper for parse.y */
3 #include "internal/parse.h"
4 #include "internal/re.h"
5 #include "internal/ruby_parser.h"
7 #include "node.h"
8 #include "rubyparser.h"
9 #include "internal/error.h"
11 #ifdef UNIVERSAL_PARSER
13 #include "internal.h"
14 #include "internal/array.h"
15 #include "internal/bignum.h"
16 #include "internal/compile.h"
17 #include "internal/complex.h"
18 #include "internal/encoding.h"
19 #include "internal/gc.h"
20 #include "internal/hash.h"
21 #include "internal/io.h"
22 #include "internal/rational.h"
23 #include "internal/re.h"
24 #include "internal/string.h"
25 #include "internal/symbol.h"
26 #include "internal/thread.h"
28 #include "ruby/ractor.h"
29 #include "ruby/ruby.h"
30 #include "ruby/util.h"
31 #include "internal.h"
32 #include "vm_core.h"
33 #include "symbol.h"
35 #define parser_encoding const void
37 static int
38 is_ascii_string2(VALUE str)
40 return is_ascii_string(str);
43 RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 6, 0)
44 static VALUE
45 syntax_error_append(VALUE exc, VALUE file, int line, int column,
46 parser_encoding *enc, const char *fmt, va_list args)
48 return rb_syntax_error_append(exc, file, line, column, enc, fmt, args);
51 static int
52 local_defined(ID id, const void *p)
54 return rb_local_defined(id, (const rb_iseq_t *)p);
57 static int
58 dvar_defined(ID id, const void *p)
60 return rb_dvar_defined(id, (const rb_iseq_t *)p);
63 static int
64 is_usascii_enc(parser_encoding *enc)
66 return rb_is_usascii_enc(enc);
69 static int
70 is_local_id2(ID id)
72 return is_local_id(id);
75 static int
76 is_attrset_id2(ID id)
78 return is_attrset_id(id);
81 static int
82 is_notop_id2(ID id)
84 return is_notop_id(id);
87 static VALUE
88 enc_str_new(const char *ptr, long len, parser_encoding *enc)
90 return rb_enc_str_new(ptr, len, enc);
93 static int
94 enc_isalnum(OnigCodePoint c, parser_encoding *enc)
96 return rb_enc_isalnum(c, enc);
99 static int
100 enc_precise_mbclen(const char *p, const char *e, parser_encoding *enc)
102 return rb_enc_precise_mbclen(p, e, enc);
105 static int
106 mbclen_charfound_p(int len)
108 return MBCLEN_CHARFOUND_P(len);
111 static int
112 mbclen_charfound_len(int len)
114 return MBCLEN_CHARFOUND_LEN(len);
117 static const char *
118 enc_name(parser_encoding *enc)
120 return rb_enc_name(enc);
123 static char *
124 enc_prev_char(const char *s, const char *p, const char *e, parser_encoding *enc)
126 return rb_enc_prev_char(s, p, e, enc);
129 static parser_encoding *
130 enc_get(VALUE obj)
132 return rb_enc_get(obj);
135 static int
136 enc_asciicompat(parser_encoding *enc)
138 return rb_enc_asciicompat(enc);
141 static parser_encoding *
142 utf8_encoding(void)
144 return rb_utf8_encoding();
147 static VALUE
148 enc_associate(VALUE obj, parser_encoding *enc)
150 return rb_enc_associate(obj, enc);
153 static parser_encoding *
154 ascii8bit_encoding(void)
156 return rb_ascii8bit_encoding();
159 static int
160 enc_codelen(int c, parser_encoding *enc)
162 return rb_enc_codelen(c, enc);
165 static int
166 enc_mbcput(unsigned int c, void *buf, parser_encoding *enc)
168 return rb_enc_mbcput(c, buf, enc);
171 static int
172 enc_mbclen(const char *p, const char *e, parser_encoding *enc)
174 return rb_enc_mbclen(p, e, enc);
177 static parser_encoding *
178 enc_from_index(int idx)
180 return rb_enc_from_index(idx);
183 static int
184 enc_isspace(OnigCodePoint c, parser_encoding *enc)
186 return rb_enc_isspace(c, enc);
189 static ID
190 intern3(const char *name, long len, parser_encoding *enc)
192 return rb_intern3(name, len, enc);
195 static parser_encoding *
196 usascii_encoding(void)
198 return rb_usascii_encoding();
201 static int
202 enc_symname_type(const char *name, long len, parser_encoding *enc, unsigned int allowed_attrset)
204 return rb_enc_symname_type(name, len, enc, allowed_attrset);
207 typedef struct {
208 struct parser_params *parser;
209 rb_encoding *enc;
210 NODE *succ_block;
211 const rb_code_location_t *loc;
212 } reg_named_capture_assign_t;
214 static int
215 reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end,
216 int back_num, int *back_refs, OnigRegex regex, void *arg0)
218 reg_named_capture_assign_t *arg = (reg_named_capture_assign_t*)arg0;
219 struct parser_params* p = arg->parser;
220 rb_encoding *enc = arg->enc;
221 const rb_code_location_t *loc = arg->loc;
222 long len = name_end - name;
223 const char *s = (const char *)name;
225 return rb_reg_named_capture_assign_iter_impl(p, s, len, enc, &arg->succ_block, loc);
228 static NODE *
229 reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_location_t *loc)
231 reg_named_capture_assign_t arg;
233 arg.parser = p;
234 arg.enc = rb_enc_get(regexp);
235 arg.succ_block = 0;
236 arg.loc = loc;
237 onig_foreach_name(RREGEXP_PTR(regexp), reg_named_capture_assign_iter, &arg);
239 if (!arg.succ_block) return 0;
240 return RNODE_BLOCK(arg.succ_block)->nd_next;
243 static int
244 rtest(VALUE obj)
246 return (int)RB_TEST(obj);
249 static int
250 nil_p(VALUE obj)
252 return (int)NIL_P(obj);
255 static VALUE
256 syntax_error_new(void)
258 return rb_class_new_instance(0, 0, rb_eSyntaxError);
261 static void *
262 memmove2(void *dest, const void *src, size_t t, size_t n)
264 return memmove(dest, src, rbimpl_size_mul_or_raise(t, n));
267 static void *
268 nonempty_memcpy(void *dest, const void *src, size_t t, size_t n)
270 return ruby_nonempty_memcpy(dest, src, rbimpl_size_mul_or_raise(t, n));
273 static VALUE
274 ruby_verbose2(void)
276 return ruby_verbose;
279 static int *
280 rb_errno_ptr2(void)
282 return rb_errno_ptr();
285 static void *
286 zalloc(size_t elemsiz)
288 return ruby_xcalloc(1, elemsiz);
291 static void
292 gc_guard(VALUE obj)
294 RB_GC_GUARD(obj);
297 static VALUE
298 arg_error(void)
300 return rb_eArgError;
303 static VALUE
304 static_id2sym(ID id)
306 return (((VALUE)(id)<<RUBY_SPECIAL_SHIFT)|SYMBOL_FLAG);
309 static long
310 str_coderange_scan_restartable(const char *s, const char *e, parser_encoding *enc, int *cr)
312 return rb_str_coderange_scan_restartable(s, e, enc, cr);
315 static int
316 enc_mbminlen(parser_encoding *enc)
318 return rb_enc_mbminlen(enc);
321 static bool
322 enc_isascii(OnigCodePoint c, parser_encoding *enc)
324 return rb_enc_isascii(c, enc);
327 static OnigCodePoint
328 enc_mbc_to_codepoint(const char *p, const char *e, parser_encoding *enc)
330 const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
331 const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
333 return ONIGENC_MBC_TO_CODE((rb_encoding *)enc, up, ue);
336 extern VALUE rb_eArgError;
338 static const rb_parser_config_t rb_global_parser_config = {
339 .malloc = ruby_xmalloc,
340 .calloc = ruby_xcalloc,
341 .realloc = ruby_xrealloc,
342 .free = ruby_xfree,
343 .alloc_n = ruby_xmalloc2,
344 .alloc = ruby_xmalloc,
345 .realloc_n = ruby_xrealloc2,
346 .zalloc = zalloc,
347 .rb_memmove = memmove2,
348 .nonempty_memcpy = nonempty_memcpy,
349 .xmalloc_mul_add = rb_xmalloc_mul_add,
351 .compile_callback = rb_suppress_tracing,
352 .reg_named_capture_assign = reg_named_capture_assign,
354 .attr_get = rb_attr_get,
356 .ary_new = rb_ary_new,
357 .ary_push = rb_ary_push,
358 .ary_new_from_args = rb_ary_new_from_args,
359 .ary_unshift = rb_ary_unshift,
361 .make_temporary_id = rb_make_temporary_id,
362 .is_local_id = is_local_id2,
363 .is_attrset_id = is_attrset_id2,
364 .is_global_name_punct = is_global_name_punct,
365 .id_type = id_type,
366 .id_attrset = rb_id_attrset,
367 .intern = rb_intern,
368 .intern2 = rb_intern2,
369 .intern3 = intern3,
370 .intern_str = rb_intern_str,
371 .is_notop_id = is_notop_id2,
372 .enc_symname_type = enc_symname_type,
373 .id2name = rb_id2name,
374 .id2str = rb_id2str,
375 .id2sym = rb_id2sym,
376 .sym2id = rb_sym2id,
378 .str_catf = rb_str_catf,
379 .str_cat_cstr = rb_str_cat_cstr,
380 .str_modify = rb_str_modify,
381 .str_set_len = rb_str_set_len,
382 .str_cat = rb_str_cat,
383 .str_resize = rb_str_resize,
384 .str_new = rb_str_new,
385 .str_new_cstr = rb_str_new_cstr,
386 .str_to_interned_str = rb_str_to_interned_str,
387 .is_ascii_string = is_ascii_string2,
388 .enc_str_new = enc_str_new,
389 .str_vcatf = rb_str_vcatf,
390 .string_value_cstr = rb_string_value_cstr,
391 .rb_sprintf = rb_sprintf,
392 .rstring_ptr = RSTRING_PTR,
393 .rstring_end = RSTRING_END,
394 .rstring_len = RSTRING_LEN,
395 .obj_as_string = rb_obj_as_string,
397 .int2num = rb_int2num_inline,
399 .stderr_tty_p = rb_stderr_tty_p,
400 .write_error_str = rb_write_error_str,
401 .io_write = rb_io_write,
402 .io_flush = rb_io_flush,
403 .io_puts = rb_io_puts,
405 .debug_output_stdout = rb_ractor_stdout,
406 .debug_output_stderr = rb_ractor_stderr,
408 .is_usascii_enc = is_usascii_enc,
409 .enc_isalnum = enc_isalnum,
410 .enc_precise_mbclen = enc_precise_mbclen,
411 .mbclen_charfound_p = mbclen_charfound_p,
412 .mbclen_charfound_len = mbclen_charfound_len,
413 .enc_name = enc_name,
414 .enc_prev_char = enc_prev_char,
415 .enc_get = enc_get,
416 .enc_asciicompat = enc_asciicompat,
417 .utf8_encoding = utf8_encoding,
418 .enc_associate = enc_associate,
419 .ascii8bit_encoding = ascii8bit_encoding,
420 .enc_codelen = enc_codelen,
421 .enc_mbcput = enc_mbcput,
422 .enc_mbclen = enc_mbclen,
423 .enc_find_index = rb_enc_find_index,
424 .enc_from_index = enc_from_index,
425 .enc_isspace = enc_isspace,
426 .enc_coderange_7bit = ENC_CODERANGE_7BIT,
427 .enc_coderange_unknown = ENC_CODERANGE_UNKNOWN,
428 .usascii_encoding = usascii_encoding,
429 .enc_coderange_broken = ENC_CODERANGE_BROKEN,
430 .enc_mbminlen = enc_mbminlen,
431 .enc_isascii = enc_isascii,
432 .enc_mbc_to_codepoint = enc_mbc_to_codepoint,
434 .local_defined = local_defined,
435 .dvar_defined = dvar_defined,
437 .syntax_error_append = syntax_error_append,
438 .raise = rb_raise,
439 .syntax_error_new = syntax_error_new,
441 .errinfo = rb_errinfo,
442 .set_errinfo = rb_set_errinfo,
443 .exc_raise = rb_exc_raise,
444 .make_exception = rb_make_exception,
446 .sized_xfree = ruby_sized_xfree,
447 .sized_realloc_n = ruby_sized_realloc_n,
448 .gc_guard = gc_guard,
449 .gc_mark = rb_gc_mark,
451 .reg_compile = rb_reg_compile,
452 .reg_check_preprocess = rb_reg_check_preprocess,
453 .memcicmp = rb_memcicmp,
455 .compile_warn = rb_compile_warn,
456 .compile_warning = rb_compile_warning,
457 .bug = rb_bug,
458 .fatal = rb_fatal,
459 .verbose = ruby_verbose2,
460 .errno_ptr = rb_errno_ptr2,
462 .make_backtrace = rb_make_backtrace,
464 .scan_hex = ruby_scan_hex,
465 .scan_oct = ruby_scan_oct,
466 .scan_digits = ruby_scan_digits,
467 .strtod = ruby_strtod,
469 .rtest = rtest,
470 .nil_p = nil_p,
471 .qnil = Qnil,
472 .qfalse = Qfalse,
473 .eArgError = arg_error,
474 .long2int = rb_long2int,
476 /* For Ripper */
477 .static_id2sym = static_id2sym,
478 .str_coderange_scan_restartable = str_coderange_scan_restartable,
480 #endif
482 enum lex_type {
483 lex_type_str,
484 lex_type_io,
485 lex_type_array,
486 lex_type_generic,
489 struct ruby_parser {
490 rb_parser_t *parser_params;
491 enum lex_type type;
492 union {
493 struct lex_pointer_string lex_str;
494 struct {
495 VALUE file;
496 } lex_io;
497 struct {
498 VALUE ary;
499 } lex_array;
500 } data;
503 static void
504 parser_mark(void *ptr)
506 struct ruby_parser *parser = (struct ruby_parser*)ptr;
507 rb_ruby_parser_mark(parser->parser_params);
509 switch (parser->type) {
510 case lex_type_str:
511 rb_gc_mark(parser->data.lex_str.str);
512 break;
513 case lex_type_io:
514 rb_gc_mark(parser->data.lex_io.file);
515 break;
516 case lex_type_array:
517 rb_gc_mark(parser->data.lex_array.ary);
518 break;
519 case lex_type_generic:
520 /* noop. Caller of rb_parser_compile_generic should mark the objects. */
521 break;
525 static void
526 parser_free(void *ptr)
528 struct ruby_parser *parser = (struct ruby_parser*)ptr;
529 rb_ruby_parser_free(parser->parser_params);
530 xfree(parser);
533 static size_t
534 parser_memsize(const void *ptr)
536 struct ruby_parser *parser = (struct ruby_parser*)ptr;
537 return rb_ruby_parser_memsize(parser->parser_params);
540 static const rb_data_type_t ruby_parser_data_type = {
541 "parser",
543 parser_mark,
544 parser_free,
545 parser_memsize,
547 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
550 #ifdef UNIVERSAL_PARSER
551 const rb_parser_config_t *
552 rb_ruby_parser_config(void)
554 return &rb_global_parser_config;
557 rb_parser_t *
558 rb_parser_params_new(void)
560 return rb_ruby_parser_new(&rb_global_parser_config);
562 #else
563 rb_parser_t *
564 rb_parser_params_new(void)
566 return rb_ruby_parser_new();
568 #endif /* UNIVERSAL_PARSER */
570 VALUE
571 rb_parser_new(void)
573 struct ruby_parser *parser;
574 rb_parser_t *parser_params;
577 * Create parser_params ahead of vparser because
578 * rb_ruby_parser_new can run GC so if create vparser
579 * first, parser_mark tries to mark not initialized parser_params.
581 parser_params = rb_parser_params_new();
582 VALUE vparser = TypedData_Make_Struct(0, struct ruby_parser,
583 &ruby_parser_data_type, parser);
584 parser->parser_params = parser_params;
586 return vparser;
589 void
590 rb_parser_set_options(VALUE vparser, int print, int loop, int chomp, int split)
592 struct ruby_parser *parser;
594 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
595 rb_ruby_parser_set_options(parser->parser_params, print, loop, chomp, split);
598 VALUE
599 rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main)
601 struct ruby_parser *parser;
603 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
604 rb_ruby_parser_set_context(parser->parser_params, base, main);
605 return vparser;
608 void
609 rb_parser_set_script_lines(VALUE vparser)
611 struct ruby_parser *parser;
613 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
614 rb_ruby_parser_set_script_lines(parser->parser_params);
617 void
618 rb_parser_error_tolerant(VALUE vparser)
620 struct ruby_parser *parser;
622 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
623 rb_ruby_parser_error_tolerant(parser->parser_params);
626 void
627 rb_parser_keep_tokens(VALUE vparser)
629 struct ruby_parser *parser;
631 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
632 rb_ruby_parser_keep_tokens(parser->parser_params);
635 rb_parser_string_t *
636 rb_parser_lex_get_str(struct parser_params *p, struct lex_pointer_string *ptr_str)
638 char *beg, *end, *start;
639 long len;
640 VALUE s = ptr_str->str;
642 beg = RSTRING_PTR(s);
643 len = RSTRING_LEN(s);
644 start = beg;
645 if (ptr_str->ptr) {
646 if (len == ptr_str->ptr) return 0;
647 beg += ptr_str->ptr;
648 len -= ptr_str->ptr;
650 end = memchr(beg, '\n', len);
651 if (end) len = ++end - beg;
652 ptr_str->ptr += len;
653 return rb_str_to_parser_string(p, rb_str_subseq(s, beg - start, len));
656 static rb_parser_string_t *
657 lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count)
659 return rb_parser_lex_get_str(p, (struct lex_pointer_string *)input);
662 static void parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines);
664 static rb_ast_t*
665 parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line)
667 rb_ast_t *ast;
668 const char *ptr = 0;
669 long len = 0;
670 rb_encoding *enc = 0;
672 if (!NIL_P(fname)) {
673 StringValueCStr(fname);
674 ptr = RSTRING_PTR(fname);
675 len = RSTRING_LEN(fname);
676 enc = rb_enc_get(fname);
679 ast = rb_parser_compile(p, gets, ptr, len, enc, input, line);
680 parser_aset_script_lines_for(fname, ast->body.script_lines);
681 return ast;
684 static rb_ast_t*
685 parser_compile_string0(struct ruby_parser *parser, VALUE fname, VALUE s, int line)
687 VALUE str = rb_str_new_frozen(s);
689 parser->type = lex_type_str;
690 parser->data.lex_str.str = str;
691 parser->data.lex_str.ptr = 0;
693 return parser_compile(parser->parser_params, lex_get_str, fname, (rb_parser_input_data)&parser->data, line);
696 static rb_encoding *
697 must_be_ascii_compatible(VALUE s)
699 rb_encoding *enc = rb_enc_get(s);
700 if (!rb_enc_asciicompat(enc)) {
701 rb_raise(rb_eArgError, "invalid source encoding");
703 return enc;
706 static rb_ast_t*
707 parser_compile_string_path(struct ruby_parser *parser, VALUE f, VALUE s, int line)
709 must_be_ascii_compatible(s);
710 return parser_compile_string0(parser, f, s, line);
713 static rb_ast_t*
714 parser_compile_string(struct ruby_parser *parser, const char *f, VALUE s, int line)
716 return parser_compile_string_path(parser, rb_filesystem_str_new_cstr(f), s, line);
719 VALUE rb_io_gets_internal(VALUE io);
721 static rb_parser_string_t *
722 lex_io_gets(struct parser_params *p, rb_parser_input_data input, int line_count)
724 VALUE io = (VALUE)input;
725 VALUE line = rb_io_gets_internal(io);
726 if (NIL_P(line)) return 0;
727 return rb_str_to_parser_string(p, line);
730 static rb_parser_string_t *
731 lex_gets_array(struct parser_params *p, rb_parser_input_data data, int index)
733 VALUE array = (VALUE)data;
734 VALUE str = rb_ary_entry(array, index);
735 if (!NIL_P(str)) {
736 StringValue(str);
737 if (!rb_enc_asciicompat(rb_enc_get(str))) {
738 rb_raise(rb_eArgError, "invalid source encoding");
740 return rb_str_to_parser_string(p, str);
742 else {
743 return 0;
747 static rb_ast_t*
748 parser_compile_file_path(struct ruby_parser *parser, VALUE fname, VALUE file, int start)
750 parser->type = lex_type_io;
751 parser->data.lex_io.file = file;
753 return parser_compile(parser->parser_params, lex_io_gets, fname, (rb_parser_input_data)file, start);
756 static rb_ast_t*
757 parser_compile_array(struct ruby_parser *parser, VALUE fname, VALUE array, int start)
759 parser->type = lex_type_array;
760 parser->data.lex_array.ary = array;
762 return parser_compile(parser->parser_params, lex_gets_array, fname, (rb_parser_input_data)array, start);
765 static rb_ast_t*
766 parser_compile_generic(struct ruby_parser *parser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
768 parser->type = lex_type_generic;
770 return parser_compile(parser->parser_params, lex_gets, fname, (rb_parser_input_data)input, start);
773 static void
774 ast_free(void *ptr)
776 rb_ast_t *ast = (rb_ast_t *)ptr;
777 rb_ast_free(ast);
780 static const rb_data_type_t ast_data_type = {
781 "AST",
783 NULL,
784 ast_free,
785 NULL, // No dsize() because this object does not appear in ObjectSpace.
787 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
790 static VALUE
791 ast_alloc(void)
793 return TypedData_Wrap_Struct(0, &ast_data_type, NULL);
796 VALUE
797 rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start)
799 struct ruby_parser *parser;
800 VALUE ast_value = ast_alloc();
802 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
803 DATA_PTR(ast_value) = parser_compile_file_path(parser, fname, file, start);
804 RB_GC_GUARD(vparser);
806 return ast_value;
809 VALUE
810 rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start)
812 struct ruby_parser *parser;
813 VALUE ast_value = ast_alloc();
815 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
816 DATA_PTR(ast_value) = parser_compile_array(parser, fname, array, start);
817 RB_GC_GUARD(vparser);
819 return ast_value;
822 VALUE
823 rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
825 struct ruby_parser *parser;
826 VALUE ast_value = ast_alloc();
828 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
829 DATA_PTR(ast_value) = parser_compile_generic(parser, lex_gets, fname, input, start);
830 RB_GC_GUARD(vparser);
832 return ast_value;
835 VALUE
836 rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
838 struct ruby_parser *parser;
839 VALUE ast_value = ast_alloc();
841 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
842 DATA_PTR(ast_value) = parser_compile_string(parser, f, s, line);
843 RB_GC_GUARD(vparser);
845 return ast_value;
848 VALUE
849 rb_parser_compile_string_path(VALUE vparser, VALUE f, VALUE s, int line)
851 struct ruby_parser *parser;
852 VALUE ast_value = ast_alloc();
854 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
855 DATA_PTR(ast_value) = parser_compile_string_path(parser, f, s, line);
856 RB_GC_GUARD(vparser);
858 return ast_value;
861 VALUE
862 rb_parser_encoding(VALUE vparser)
864 struct ruby_parser *parser;
866 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
867 return rb_enc_from_encoding(rb_ruby_parser_encoding(parser->parser_params));
870 VALUE
871 rb_parser_end_seen_p(VALUE vparser)
873 struct ruby_parser *parser;
875 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
876 return RBOOL(rb_ruby_parser_end_seen_p(parser->parser_params));
879 VALUE
880 rb_parser_set_yydebug(VALUE vparser, VALUE flag)
882 struct ruby_parser *parser;
884 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
885 rb_ruby_parser_set_yydebug(parser->parser_params, RTEST(flag));
886 return flag;
889 void
890 rb_set_script_lines_for(VALUE vparser, VALUE path)
892 struct ruby_parser *parser;
893 VALUE hash;
894 ID script_lines;
895 CONST_ID(script_lines, "SCRIPT_LINES__");
896 if (!rb_const_defined_at(rb_cObject, script_lines)) return;
897 hash = rb_const_get_at(rb_cObject, script_lines);
898 if (RB_TYPE_P(hash, T_HASH)) {
899 rb_hash_aset(hash, path, Qtrue);
900 TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
901 rb_ruby_parser_set_script_lines(parser->parser_params);
905 VALUE
906 rb_parser_build_script_lines_from(rb_parser_ary_t *lines)
908 int i;
909 if (!lines) return Qnil;
910 if (lines->data_type != PARSER_ARY_DATA_SCRIPT_LINE) {
911 rb_bug("unexpected rb_parser_ary_data_type (%d) for script lines", lines->data_type);
913 VALUE script_lines = rb_ary_new_capa(lines->len);
914 for (i = 0; i < lines->len; i++) {
915 rb_parser_string_t *str = (rb_parser_string_t *)lines->data[i];
916 rb_ary_push(script_lines, rb_enc_str_new(str->ptr, str->len, str->enc));
918 return script_lines;
921 VALUE
922 rb_str_new_parser_string(rb_parser_string_t *str)
924 VALUE string = rb_enc_interned_str(str->ptr, str->len, str->enc);
925 rb_enc_str_coderange(string);
926 return string;
929 VALUE
930 rb_str_new_mutable_parser_string(rb_parser_string_t *str)
932 return rb_enc_str_new(str->ptr, str->len, str->enc);
935 static VALUE
936 negative_numeric(VALUE val)
938 if (FIXNUM_P(val)) {
939 return LONG2FIX(-FIX2LONG(val));
941 if (SPECIAL_CONST_P(val)) {
942 #if USE_FLONUM
943 if (FLONUM_P(val)) {
944 return DBL2NUM(-RFLOAT_VALUE(val));
946 #endif
947 goto unknown;
949 switch (BUILTIN_TYPE(val)) {
950 case T_BIGNUM:
951 BIGNUM_NEGATE(val);
952 val = rb_big_norm(val);
953 break;
954 case T_RATIONAL:
955 RATIONAL_SET_NUM(val, negative_numeric(RRATIONAL(val)->num));
956 break;
957 case T_COMPLEX:
958 RCOMPLEX_SET_REAL(val, negative_numeric(RCOMPLEX(val)->real));
959 RCOMPLEX_SET_IMAG(val, negative_numeric(RCOMPLEX(val)->imag));
960 break;
961 case T_FLOAT:
962 val = DBL2NUM(-RFLOAT_VALUE(val));
963 break;
964 unknown:
965 default:
966 rb_bug("unknown literal type (%s) passed to negative_numeric",
967 rb_builtin_class_name(val));
968 break;
970 return val;
973 static VALUE
974 integer_value(const char *val, int base)
976 return rb_cstr_to_inum(val, base, FALSE);
979 static VALUE
980 rational_value(const char *node_val, int base, int seen_point)
982 VALUE lit;
983 char* val = strdup(node_val);
984 if (seen_point > 0) {
985 int len = (int)(strlen(val));
986 char *point = &val[seen_point];
987 size_t fraclen = len-seen_point-1;
988 memmove(point, point+1, fraclen+1);
990 lit = rb_rational_new(integer_value(val, base), rb_int_positive_pow(10, fraclen));
992 else {
993 lit = rb_rational_raw1(integer_value(val, base));
996 free(val);
998 return lit;
1001 VALUE
1002 rb_node_integer_literal_val(const NODE *n)
1004 const rb_node_integer_t *node = RNODE_INTEGER(n);
1005 VALUE val = integer_value(node->val, node->base);
1006 if (node->minus) {
1007 val = negative_numeric(val);
1009 return val;
1012 VALUE
1013 rb_node_float_literal_val(const NODE *n)
1015 const rb_node_float_t *node = RNODE_FLOAT(n);
1016 double d = strtod(node->val, 0);
1017 if (node->minus) {
1018 d = -d;
1020 VALUE val = DBL2NUM(d);
1021 return val;
1024 VALUE
1025 rb_node_rational_literal_val(const NODE *n)
1027 VALUE lit;
1028 const rb_node_rational_t *node = RNODE_RATIONAL(n);
1030 lit = rational_value(node->val, node->base, node->seen_point);
1032 if (node->minus) {
1033 lit = negative_numeric(lit);
1036 return lit;
1039 VALUE
1040 rb_node_imaginary_literal_val(const NODE *n)
1042 VALUE lit;
1043 const rb_node_imaginary_t *node = RNODE_IMAGINARY(n);
1045 enum rb_numeric_type type = node->type;
1047 switch (type) {
1048 case integer_literal:
1049 lit = integer_value(node->val, node->base);
1050 break;
1051 case float_literal:{
1052 double d = strtod(node->val, 0);
1053 lit = DBL2NUM(d);
1054 break;
1056 case rational_literal:
1057 lit = rational_value(node->val, node->base, node->seen_point);
1058 break;
1059 default:
1060 rb_bug("unreachable");
1063 lit = rb_complex_raw(INT2FIX(0), lit);
1065 if (node->minus) {
1066 lit = negative_numeric(lit);
1068 return lit;
1071 VALUE
1072 rb_node_str_string_val(const NODE *node)
1074 rb_parser_string_t *str = RNODE_STR(node)->string;
1075 return rb_str_new_parser_string(str);
1078 VALUE
1079 rb_node_sym_string_val(const NODE *node)
1081 rb_parser_string_t *str = RNODE_SYM(node)->string;
1082 return ID2SYM(rb_intern3(str->ptr, str->len, str->enc));
1085 VALUE
1086 rb_node_dstr_string_val(const NODE *node)
1088 rb_parser_string_t *str = RNODE_DSTR(node)->string;
1089 return str ? rb_str_new_parser_string(str) : Qnil;
1092 VALUE
1093 rb_node_dregx_string_val(const NODE *node)
1095 rb_parser_string_t *str = RNODE_DREGX(node)->string;
1096 return rb_str_new_parser_string(str);
1099 VALUE
1100 rb_node_regx_string_val(const NODE *node)
1102 rb_node_regx_t *node_reg = RNODE_REGX(node);
1103 rb_parser_string_t *string = node_reg->string;
1104 VALUE str = rb_enc_str_new(string->ptr, string->len, string->enc);
1106 return rb_reg_compile(str, node_reg->options, NULL, 0);
1109 VALUE
1110 rb_node_line_lineno_val(const NODE *node)
1112 return INT2FIX(node->nd_loc.beg_pos.lineno);
1115 VALUE
1116 rb_node_file_path_val(const NODE *node)
1118 return rb_str_new_parser_string(RNODE_FILE(node)->path);
1121 VALUE
1122 rb_node_encoding_val(const NODE *node)
1124 return rb_enc_from_encoding(RNODE_ENCODING(node)->enc);
1127 static void
1128 parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines)
1130 VALUE hash, script_lines;
1131 ID script_lines_id;
1132 if (NIL_P(path) || !lines) return;
1133 CONST_ID(script_lines_id, "SCRIPT_LINES__");
1134 if (!rb_const_defined_at(rb_cObject, script_lines_id)) return;
1135 hash = rb_const_get_at(rb_cObject, script_lines_id);
1136 if (!RB_TYPE_P(hash, T_HASH)) return;
1137 if (rb_hash_lookup(hash, path) == Qnil) return;
1138 script_lines = rb_parser_build_script_lines_from(lines);
1139 rb_hash_aset(hash, path, script_lines);
1142 VALUE
1143 rb_ruby_ast_new(const NODE *const root)
1145 rb_ast_t *ast;
1146 VALUE ast_value = TypedData_Make_Struct(0, rb_ast_t, &ast_data_type, ast);
1147 #ifdef UNIVERSAL_PARSER
1148 ast->config = &rb_global_parser_config;
1149 #endif
1150 ast->body = (rb_ast_body_t){
1151 .root = root,
1152 .frozen_string_literal = -1,
1153 .coverage_enabled = -1,
1154 .script_lines = NULL,
1155 .line_count = 0,
1157 return ast_value;
1160 rb_ast_t *
1161 rb_ruby_ast_data_get(VALUE ast_value)
1163 rb_ast_t *ast;
1164 if (NIL_P(ast_value)) return NULL;
1165 TypedData_Get_Struct(ast_value, rb_ast_t, &ast_data_type, ast);
1166 return ast;