1 /**********************************************************************
6 created at: Thu Apr 27 16:30:01 JST 1995
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/internal/config.h"
25 #include "internal/array.h"
26 #include "internal/bignum.h"
27 #include "internal/class.h"
28 #include "internal/encoding.h"
29 #include "internal/error.h"
30 #include "internal/hash.h"
31 #include "internal/object.h"
32 #include "internal/struct.h"
33 #include "internal/symbol.h"
34 #include "internal/util.h"
35 #include "internal/vm.h"
37 #include "ruby/ruby.h"
39 #include "ruby/util.h"
42 #define BITSPERSHORT (2*CHAR_BIT)
43 #define SHORTMASK ((1<<BITSPERSHORT)-1)
44 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
46 #if SIZEOF_SHORT == SIZEOF_BDIGIT
47 #define SHORTLEN(x) (x)
50 shortlen(size_t len
, BDIGIT
*ds
)
60 return (len
- 1)*SIZEOF_BDIGIT
/2 + offset
;
62 #define SHORTLEN(x) shortlen((x),d)
65 #define MARSHAL_MAJOR 4
66 #define MARSHAL_MINOR 8
70 #define TYPE_FALSE 'F'
71 #define TYPE_FIXNUM 'i'
73 #define TYPE_EXTENDED 'e'
74 #define TYPE_UCLASS 'C'
75 #define TYPE_OBJECT 'o'
77 #define TYPE_USERDEF 'u'
78 #define TYPE_USRMARSHAL 'U'
79 #define TYPE_FLOAT 'f'
80 #define TYPE_BIGNUM 'l'
81 #define TYPE_STRING '"'
82 #define TYPE_REGEXP '/'
83 #define TYPE_ARRAY '['
85 #define TYPE_HASH_DEF '}'
86 #define TYPE_STRUCT 'S'
87 #define TYPE_MODULE_OLD 'M'
88 #define TYPE_CLASS 'c'
89 #define TYPE_MODULE 'm'
91 #define TYPE_SYMBOL ':'
92 #define TYPE_SYMLINK ';'
97 static ID s_dump
, s_load
, s_mdump
, s_mload
;
98 static ID s_dump_data
, s_load_data
, s_alloc
, s_call
;
99 static ID s_getbyte
, s_read
, s_write
, s_binmode
;
100 static ID s_encoding_short
, s_ruby2_keywords_flag
;
102 #define name_s_dump "_dump"
103 #define name_s_load "_load"
104 #define name_s_mdump "marshal_dump"
105 #define name_s_mload "marshal_load"
106 #define name_s_dump_data "_dump_data"
107 #define name_s_load_data "_load_data"
108 #define name_s_alloc "_alloc"
109 #define name_s_call "call"
110 #define name_s_getbyte "getbyte"
111 #define name_s_read "read"
112 #define name_s_write "write"
113 #define name_s_binmode "binmode"
114 #define name_s_encoding_short "E"
115 #define name_s_ruby2_keywords_flag "K"
120 VALUE (*dumper
)(VALUE
);
121 VALUE (*loader
)(VALUE
, VALUE
);
124 static st_table
*compat_allocator_tbl
;
125 static VALUE compat_allocator_tbl_wrapper
;
126 static VALUE
rb_marshal_dump_limited(VALUE obj
, VALUE port
, int limit
);
127 static VALUE
rb_marshal_load_with_proc(VALUE port
, VALUE proc
, bool freeze
);
130 mark_marshal_compat_i(st_data_t key
, st_data_t value
, st_data_t _
)
132 marshal_compat_t
*p
= (marshal_compat_t
*)value
;
133 rb_gc_mark(p
->newclass
);
134 rb_gc_mark(p
->oldclass
);
139 mark_marshal_compat_t(void *tbl
)
142 st_foreach(tbl
, mark_marshal_compat_i
, 0);
145 static st_table
*compat_allocator_table(void);
148 rb_marshal_define_compat(VALUE newclass
, VALUE oldclass
, VALUE (*dumper
)(VALUE
), VALUE (*loader
)(VALUE
, VALUE
))
150 marshal_compat_t
*compat
;
151 rb_alloc_func_t allocator
= rb_get_alloc_func(newclass
);
154 rb_raise(rb_eTypeError
, "no allocator");
157 compat
= ALLOC(marshal_compat_t
);
158 compat
->newclass
= Qnil
;
159 compat
->oldclass
= Qnil
;
160 compat
->newclass
= newclass
;
161 compat
->oldclass
= oldclass
;
162 compat
->dumper
= dumper
;
163 compat
->loader
= loader
;
165 st_insert(compat_allocator_table(), (st_data_t
)allocator
, (st_data_t
)compat
);
172 st_table
*compat_tbl
;
176 struct dump_call_arg
{
178 struct dump_arg
*arg
;
183 check_dump_arg(VALUE ret
, struct dump_arg
*arg
, const char *name
)
186 rb_raise(rb_eRuntimeError
, "Marshal.dump reentered at %s",
193 check_userdump_arg(VALUE obj
, ID sym
, int argc
, const VALUE
*argv
,
194 struct dump_arg
*arg
, const char *name
)
196 VALUE ret
= rb_funcallv(obj
, sym
, argc
, argv
);
197 VALUE klass
= CLASS_OF(obj
);
198 if (CLASS_OF(ret
) == klass
) {
199 rb_raise(rb_eRuntimeError
, "%"PRIsVALUE
"#%s returned same class instance",
202 return check_dump_arg(ret
, arg
, name
);
205 #define dump_funcall(arg, obj, sym, argc, argv) \
206 check_userdump_arg(obj, sym, argc, argv, arg, name_##sym)
207 #define dump_check_funcall(arg, obj, sym, argc, argv) \
208 check_dump_arg(rb_check_funcall(obj, sym, argc, argv), arg, name_##sym)
210 static void clear_dump_arg(struct dump_arg
*arg
);
213 mark_dump_arg(void *ptr
)
215 struct dump_arg
*p
= ptr
;
218 rb_mark_set(p
->symbols
);
219 rb_mark_set(p
->data
);
220 rb_mark_hash(p
->compat_tbl
);
225 free_dump_arg(void *ptr
)
232 memsize_dump_arg(const void *ptr
)
234 return sizeof(struct dump_arg
);
237 static const rb_data_type_t dump_arg_data
= {
239 {mark_dump_arg
, free_dump_arg
, memsize_dump_arg
,},
240 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
244 must_not_be_anonymous(const char *type
, VALUE path
)
246 char *n
= RSTRING_PTR(path
);
248 if (!rb_enc_asciicompat(rb_enc_get(path
))) {
250 rb_raise(rb_eTypeError
, "can't dump non-ascii %s name % "PRIsVALUE
,
254 rb_raise(rb_eTypeError
, "can't dump anonymous %s % "PRIsVALUE
,
261 class2path(VALUE klass
)
263 VALUE path
= rb_class_path(klass
);
265 must_not_be_anonymous((RB_TYPE_P(klass
, T_CLASS
) ? "class" : "module"), path
);
266 if (rb_path_to_class(path
) != rb_class_real(klass
)) {
267 rb_raise(rb_eTypeError
, "% "PRIsVALUE
" can't be referred to", path
);
272 int ruby_marshal_write_long(long x
, char *buf
);
273 static void w_long(long, struct dump_arg
*);
274 static int w_encoding(VALUE encname
, struct dump_call_arg
*arg
);
275 static VALUE
encoding_name(VALUE obj
, struct dump_arg
*arg
);
278 w_nbyte(const char *s
, long n
, struct dump_arg
*arg
)
280 VALUE buf
= arg
->str
;
281 rb_str_buf_cat(buf
, s
, n
);
282 if (arg
->dest
&& RSTRING_LEN(buf
) >= BUFSIZ
) {
283 rb_io_write(arg
->dest
, buf
);
284 rb_str_resize(buf
, 0);
289 w_byte(char c
, struct dump_arg
*arg
)
295 w_bytes(const char *s
, long n
, struct dump_arg
*arg
)
301 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
304 w_short(int x
, struct dump_arg
*arg
)
306 w_byte((char)((x
>> 0) & 0xff), arg
);
307 w_byte((char)((x
>> 8) & 0xff), arg
);
311 w_long(long x
, struct dump_arg
*arg
)
313 char buf
[sizeof(long)+1];
314 int i
= ruby_marshal_write_long(x
, buf
);
316 rb_raise(rb_eTypeError
, "long too big to dump");
318 w_nbyte(buf
, i
, arg
);
322 ruby_marshal_write_long(long x
, char *buf
)
327 if (!(RSHIFT(x
, 31) == 0 || RSHIFT(x
, 31) == -1)) {
328 /* big long does not fit in 4 bytes */
337 if (0 < x
&& x
< 123) {
338 buf
[0] = (char)(x
+ 5);
341 if (-124 < x
&& x
< 0) {
342 buf
[0] = (char)((x
- 5)&0xff);
345 for (i
=1;i
<(int)sizeof(long)+1;i
++) {
346 buf
[i
] = (char)(x
& 0xff);
361 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
363 #if DBL_MANT_DIG > 32
365 #elif DBL_MANT_DIG > 24
367 #elif DBL_MANT_DIG > 16
374 load_mantissa(double d
, const char *buf
, long len
)
377 if (--len
> 0 && !*buf
++) { /* binary mantissa mark */
378 int e
, s
= d
< 0, dig
= 0;
381 modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
385 default: m
= *buf
++ & 0xff; /* fall through */
387 case 3: m
= (m
<< 8) | (*buf
++ & 0xff); /* fall through */
390 case 2: m
= (m
<< 8) | (*buf
++ & 0xff); /* fall through */
393 case 1: m
= (m
<< 8) | (*buf
++ & 0xff);
396 dig
-= len
< MANT_BITS
/ 8 ? 8 * (unsigned)len
: MANT_BITS
;
397 d
+= ldexp((double)m
, dig
);
398 } while ((len
-= MANT_BITS
/ 8) > 0);
399 d
= ldexp(d
, e
- DECIMAL_MANT
);
405 #define load_mantissa(d, buf, len) (d)
409 #define FLOAT_DIG (DBL_DIG+2)
415 w_float(double d
, struct dump_arg
*arg
)
417 char buf
[FLOAT_DIG
+ (DECIMAL_MANT
+ 7) / 8 + 10];
420 if (d
< 0) w_cstr("-inf", arg
);
421 else w_cstr("inf", arg
);
427 if (signbit(d
)) w_cstr("-0", arg
);
428 else w_cstr("0", arg
);
431 int decpt
, sign
, digs
, len
= 0;
432 char *e
, *p
= ruby_dtoa(d
, 0, 0, &decpt
, &sign
, &e
);
433 if (sign
) buf
[len
++] = '-';
435 if (decpt
< -3 || decpt
> digs
) {
437 if (--digs
> 0) buf
[len
++] = '.';
438 memcpy(buf
+ len
, p
+ 1, digs
);
440 len
+= snprintf(buf
+ len
, sizeof(buf
) - len
, "e%d", decpt
- 1);
442 else if (decpt
> 0) {
443 memcpy(buf
+ len
, p
, decpt
);
445 if ((digs
-= decpt
) > 0) {
447 memcpy(buf
+ len
, p
+ decpt
, digs
);
455 memset(buf
+ len
, '0', -decpt
);
458 memcpy(buf
+ len
, p
, digs
);
462 w_bytes(buf
, len
, arg
);
467 w_symbol(VALUE sym
, struct dump_arg
*arg
)
472 if (st_lookup(arg
->symbols
, sym
, &num
)) {
473 w_byte(TYPE_SYMLINK
, arg
);
474 w_long((long)num
, arg
);
477 const VALUE orig_sym
= sym
;
478 sym
= rb_sym2str(sym
);
480 rb_raise(rb_eTypeError
, "can't dump anonymous ID %"PRIdVALUE
, sym
);
482 encname
= encoding_name(sym
, arg
);
483 if (NIL_P(encname
) ||
484 rb_enc_str_coderange(sym
) == ENC_CODERANGE_7BIT
) {
488 w_byte(TYPE_IVAR
, arg
);
490 w_byte(TYPE_SYMBOL
, arg
);
491 w_bytes(RSTRING_PTR(sym
), RSTRING_LEN(sym
), arg
);
492 st_add_direct(arg
->symbols
, orig_sym
, arg
->symbols
->num_entries
);
493 if (!NIL_P(encname
)) {
494 struct dump_call_arg c_arg
;
498 w_encoding(encname
, &c_arg
);
504 w_unique(VALUE s
, struct dump_arg
*arg
)
506 must_not_be_anonymous("class", s
);
507 w_symbol(rb_str_intern(s
), arg
);
510 static void w_object(VALUE
,struct dump_arg
*,int);
513 hash_each(VALUE key
, VALUE value
, VALUE v
)
515 struct dump_call_arg
*arg
= (void *)v
;
516 w_object(key
, arg
->arg
, arg
->limit
);
517 w_object(value
, arg
->arg
, arg
->limit
);
521 #define SINGLETON_DUMP_UNABLE_P(klass) \
522 (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \
523 (RCLASS_IV_TBL(klass) && RCLASS_IV_TBL(klass)->num_entries > 1))
526 w_extended(VALUE klass
, struct dump_arg
*arg
, int check
)
528 if (check
&& FL_TEST(klass
, FL_SINGLETON
)) {
529 VALUE origin
= RCLASS_ORIGIN(klass
);
530 if (SINGLETON_DUMP_UNABLE_P(klass
) ||
531 (origin
!= klass
&& SINGLETON_DUMP_UNABLE_P(origin
))) {
532 rb_raise(rb_eTypeError
, "singleton can't be dumped");
534 klass
= RCLASS_SUPER(klass
);
536 while (BUILTIN_TYPE(klass
) == T_ICLASS
) {
537 if (!FL_TEST(klass
, RICLASS_IS_ORIGIN
) ||
538 BUILTIN_TYPE(RBASIC(klass
)->klass
) != T_MODULE
) {
539 VALUE path
= rb_class_name(RBASIC(klass
)->klass
);
540 w_byte(TYPE_EXTENDED
, arg
);
543 klass
= RCLASS_SUPER(klass
);
548 w_class(char type
, VALUE obj
, struct dump_arg
*arg
, int check
)
554 if (arg
->compat_tbl
&&
555 st_lookup(arg
->compat_tbl
, (st_data_t
)obj
, &real_obj
)) {
556 obj
= (VALUE
)real_obj
;
558 klass
= CLASS_OF(obj
);
559 w_extended(klass
, arg
, check
);
561 path
= class2path(rb_class_real(klass
));
566 w_uclass(VALUE obj
, VALUE super
, struct dump_arg
*arg
)
568 VALUE klass
= CLASS_OF(obj
);
570 w_extended(klass
, arg
, TRUE
);
571 klass
= rb_class_real(klass
);
572 if (klass
!= super
) {
573 w_byte(TYPE_UCLASS
, arg
);
574 w_unique(class2path(klass
), arg
);
579 rb_hash_ruby2_keywords_p(VALUE obj
)
581 return (RHASH(obj
)->basic
.flags
& RHASH_PASS_AS_KEYWORDS
) != 0;
585 rb_hash_ruby2_keywords(VALUE obj
)
587 RHASH(obj
)->basic
.flags
|= RHASH_PASS_AS_KEYWORDS
;
591 to_be_skipped_id(const ID id
)
593 if (id
== s_encoding_short
) return true;
594 if (id
== s_ruby2_keywords_flag
) return true;
595 if (id
== rb_id_encoding()) return true;
596 return !rb_id2str(id
);
600 struct dump_call_arg
*dump
;
605 w_obj_each(st_data_t key
, st_data_t val
, st_data_t a
)
608 VALUE value
= (VALUE
)val
;
609 struct w_ivar_arg
*ivarg
= (struct w_ivar_arg
*)a
;
610 struct dump_call_arg
*arg
= ivarg
->dump
;
612 if (to_be_skipped_id(id
)) {
613 if (id
== s_encoding_short
) {
614 rb_warn("instance variable `"name_s_encoding_short
"' on class %"PRIsVALUE
" is not dumped",
617 if (id
== s_ruby2_keywords_flag
) {
618 rb_warn("instance variable `"name_s_ruby2_keywords_flag
"' on class %"PRIsVALUE
" is not dumped",
623 if (!ivarg
->num_ivar
) {
624 rb_raise(rb_eRuntimeError
, "instance variable added to %"PRIsVALUE
" instance",
628 w_symbol(ID2SYM(id
), arg
->arg
);
629 w_object(value
, arg
->arg
, arg
->limit
);
634 obj_count_ivars(st_data_t key
, st_data_t val
, st_data_t a
)
637 if (!to_be_skipped_id(id
) && UNLIKELY(!++*(st_index_t
*)a
)) {
638 rb_raise(rb_eRuntimeError
, "too many instance variables");
644 encoding_name(VALUE obj
, struct dump_arg
*arg
)
646 if (rb_enc_capable(obj
)) {
647 int encidx
= rb_enc_get_index(obj
);
648 rb_encoding
*enc
= 0;
651 if (encidx
<= 0 || !(enc
= rb_enc_from_index(encidx
))) {
655 /* special treatment for US-ASCII and UTF-8 */
656 if (encidx
== rb_usascii_encindex()) {
659 else if (encidx
== rb_utf8_encindex()) {
664 !st_lookup(arg
->encodings
, (st_data_t
)rb_enc_name(enc
), &name
) :
665 (arg
->encodings
= st_init_strcasetable(), 1)) {
666 name
= (st_data_t
)rb_str_new_cstr(rb_enc_name(enc
));
667 st_insert(arg
->encodings
, (st_data_t
)rb_enc_name(enc
), name
);
677 w_encoding(VALUE encname
, struct dump_call_arg
*arg
)
679 int limit
= arg
->limit
;
680 if (limit
>= 0) ++limit
;
684 w_symbol(ID2SYM(s_encoding_short
), arg
->arg
);
685 w_object(encname
, arg
->arg
, limit
);
690 w_symbol(ID2SYM(rb_id_encoding()), arg
->arg
);
691 w_object(encname
, arg
->arg
, limit
);
696 has_ivars(VALUE obj
, VALUE encname
, VALUE
*ivobj
)
698 st_index_t num
= !NIL_P(encname
);
700 if (SPECIAL_CONST_P(obj
)) goto generic
;
701 switch (BUILTIN_TYPE(obj
)) {
705 break; /* counted elsewhere */
707 if (rb_hash_ruby2_keywords_p(obj
)) ++num
;
711 rb_ivar_foreach(obj
, obj_count_ivars
, (st_data_t
)&num
);
712 if (num
) *ivobj
= obj
;
719 w_ivar_each(VALUE obj
, st_index_t num
, struct dump_call_arg
*arg
)
721 struct w_ivar_arg ivarg
= {arg
, num
};
723 rb_ivar_foreach(obj
, w_obj_each
, (st_data_t
)&ivarg
);
724 if (ivarg
.num_ivar
) {
725 rb_raise(rb_eRuntimeError
, "instance variable removed from %"PRIsVALUE
" instance",
731 w_ivar(st_index_t num
, VALUE ivobj
, VALUE encname
, struct dump_call_arg
*arg
)
733 w_long(num
, arg
->arg
);
734 num
-= w_encoding(encname
, arg
);
735 if (RB_TYPE_P(ivobj
, T_HASH
) && rb_hash_ruby2_keywords_p(ivobj
)) {
736 int limit
= arg
->limit
;
737 if (limit
>= 0) ++limit
;
738 w_symbol(ID2SYM(s_ruby2_keywords_flag
), arg
->arg
);
739 w_object(Qtrue
, arg
->arg
, limit
);
742 if (ivobj
!= Qundef
&& num
) {
743 w_ivar_each(ivobj
, num
, arg
);
748 w_objivar(VALUE obj
, struct dump_call_arg
*arg
)
752 rb_ivar_foreach(obj
, obj_count_ivars
, (st_data_t
)&num
);
753 w_long(num
, arg
->arg
);
754 w_ivar_each(obj
, num
, arg
);
758 w_object(VALUE obj
, struct dump_arg
*arg
, int limit
)
760 struct dump_call_arg c_arg
;
761 VALUE ivobj
= Qundef
;
763 st_index_t hasiv
= 0;
764 VALUE encname
= Qnil
;
767 rb_raise(rb_eArgError
, "exceed depth limit");
770 if (limit
> 0) limit
--;
775 if (st_lookup(arg
->data
, obj
, &num
)) {
776 w_byte(TYPE_LINK
, arg
);
777 w_long((long)num
, arg
);
782 w_byte(TYPE_NIL
, arg
);
784 else if (obj
== Qtrue
) {
785 w_byte(TYPE_TRUE
, arg
);
787 else if (obj
== Qfalse
) {
788 w_byte(TYPE_FALSE
, arg
);
790 else if (FIXNUM_P(obj
)) {
792 w_byte(TYPE_FIXNUM
, arg
);
793 w_long(FIX2INT(obj
), arg
);
795 if (RSHIFT((long)obj
, 31) == 0 || RSHIFT((long)obj
, 31) == -1) {
796 w_byte(TYPE_FIXNUM
, arg
);
797 w_long(FIX2LONG(obj
), arg
);
800 w_object(rb_int2big(FIX2LONG(obj
)), arg
, limit
);
804 else if (SYMBOL_P(obj
)) {
807 else if (FLONUM_P(obj
)) {
808 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
809 w_byte(TYPE_FLOAT
, arg
);
810 w_float(RFLOAT_VALUE(obj
), arg
);
815 if (!RBASIC_CLASS(obj
)) {
816 rb_raise(rb_eTypeError
, "can't dump internal %s",
817 rb_builtin_type_name(BUILTIN_TYPE(obj
)));
820 if (rb_obj_respond_to(obj
, s_mdump
, TRUE
)) {
821 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
823 v
= dump_funcall(arg
, obj
, s_mdump
, 0, 0);
824 w_class(TYPE_USRMARSHAL
, obj
, arg
, FALSE
);
825 w_object(v
, arg
, limit
);
828 if (rb_obj_respond_to(obj
, s_dump
, TRUE
)) {
829 VALUE ivobj2
= Qundef
;
834 v
= dump_funcall(arg
, obj
, s_dump
, 1, &v
);
835 if (!RB_TYPE_P(v
, T_STRING
)) {
836 rb_raise(rb_eTypeError
, "_dump() must return string");
838 hasiv
= has_ivars(obj
, (encname
= encoding_name(obj
, arg
)), &ivobj
);
839 hasiv2
= has_ivars(v
, (encname2
= encoding_name(v
, arg
)), &ivobj2
);
845 if (hasiv
) w_byte(TYPE_IVAR
, arg
);
846 w_class(TYPE_USERDEF
, obj
, arg
, FALSE
);
847 w_bytes(RSTRING_PTR(v
), RSTRING_LEN(v
), arg
);
849 w_ivar(hasiv
, ivobj
, encname
, &c_arg
);
851 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
855 st_add_direct(arg
->data
, obj
, arg
->data
->num_entries
);
857 hasiv
= has_ivars(obj
, (encname
= encoding_name(obj
, arg
)), &ivobj
);
859 st_data_t compat_data
;
860 rb_alloc_func_t allocator
= rb_get_alloc_func(RBASIC(obj
)->klass
);
861 if (st_lookup(compat_allocator_tbl
,
862 (st_data_t
)allocator
,
864 marshal_compat_t
*compat
= (marshal_compat_t
*)compat_data
;
865 VALUE real_obj
= obj
;
866 obj
= compat
->dumper(real_obj
);
867 if (!arg
->compat_tbl
) {
868 arg
->compat_tbl
= rb_init_identtable();
870 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
871 if (obj
!= real_obj
&& ivobj
== Qundef
) hasiv
= 0;
874 if (hasiv
) w_byte(TYPE_IVAR
, arg
);
876 switch (BUILTIN_TYPE(obj
)) {
878 if (FL_TEST(obj
, FL_SINGLETON
)) {
879 rb_raise(rb_eTypeError
, "singleton class can't be dumped");
881 w_byte(TYPE_CLASS
, arg
);
883 VALUE path
= class2path(obj
);
884 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
890 w_byte(TYPE_MODULE
, arg
);
892 VALUE path
= class2path(obj
);
893 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
899 w_byte(TYPE_FLOAT
, arg
);
900 w_float(RFLOAT_VALUE(obj
), arg
);
904 w_byte(TYPE_BIGNUM
, arg
);
906 char sign
= BIGNUM_SIGN(obj
) ? '+' : '-';
907 size_t len
= BIGNUM_LEN(obj
);
910 BDIGIT
*d
= BIGNUM_DIGITS(obj
);
912 slen
= SHORTLEN(len
);
913 if (LONG_MAX
< slen
) {
914 rb_raise(rb_eTypeError
, "too big Bignum can't be dumped");
918 w_long((long)slen
, arg
);
919 for (j
= 0; j
< len
; j
++) {
920 #if SIZEOF_BDIGIT > SIZEOF_SHORT
924 for (i
=0; i
<SIZEOF_BDIGIT
; i
+=SIZEOF_SHORT
) {
925 w_short(num
& SHORTMASK
, arg
);
927 if (j
== len
- 1 && num
== 0) break;
938 w_uclass(obj
, rb_cString
, arg
);
939 w_byte(TYPE_STRING
, arg
);
940 w_bytes(RSTRING_PTR(obj
), RSTRING_LEN(obj
), arg
);
944 w_uclass(obj
, rb_cRegexp
, arg
);
945 w_byte(TYPE_REGEXP
, arg
);
947 int opts
= rb_reg_options(obj
);
948 w_bytes(RREGEXP_SRC_PTR(obj
), RREGEXP_SRC_LEN(obj
), arg
);
949 w_byte((char)opts
, arg
);
954 w_uclass(obj
, rb_cArray
, arg
);
955 w_byte(TYPE_ARRAY
, arg
);
957 long i
, len
= RARRAY_LEN(obj
);
960 for (i
=0; i
<RARRAY_LEN(obj
); i
++) {
961 w_object(RARRAY_AREF(obj
, i
), arg
, limit
);
962 if (len
!= RARRAY_LEN(obj
)) {
963 rb_raise(rb_eRuntimeError
, "array modified during dump");
970 w_uclass(obj
, rb_cHash
, arg
);
971 if (rb_hash_compare_by_id_p(obj
)) {
972 w_byte(TYPE_UCLASS
, arg
);
973 w_symbol(rb_sym_intern_ascii_cstr("Hash"), arg
);
975 if (NIL_P(RHASH_IFNONE(obj
))) {
976 w_byte(TYPE_HASH
, arg
);
978 else if (FL_TEST(obj
, RHASH_PROC_DEFAULT
)) {
979 rb_raise(rb_eTypeError
, "can't dump hash with default proc");
982 w_byte(TYPE_HASH_DEF
, arg
);
984 w_long(rb_hash_size_num(obj
), arg
);
985 rb_hash_foreach(obj
, hash_each
, (st_data_t
)&c_arg
);
986 if (!NIL_P(RHASH_IFNONE(obj
))) {
987 w_object(RHASH_IFNONE(obj
), arg
, limit
);
992 w_class(TYPE_STRUCT
, obj
, arg
, TRUE
);
994 long len
= RSTRUCT_LEN(obj
);
999 mem
= rb_struct_members(obj
);
1000 for (i
=0; i
<len
; i
++) {
1001 w_symbol(RARRAY_AREF(mem
, i
), arg
);
1002 w_object(RSTRUCT_GET(obj
, i
), arg
, limit
);
1008 w_class(TYPE_OBJECT
, obj
, arg
, TRUE
);
1009 w_objivar(obj
, &c_arg
);
1016 if (!rb_obj_respond_to(obj
, s_dump_data
, TRUE
)) {
1017 rb_raise(rb_eTypeError
,
1018 "no _dump_data is defined for class %"PRIsVALUE
,
1021 v
= dump_funcall(arg
, obj
, s_dump_data
, 0, 0);
1022 w_class(TYPE_DATA
, obj
, arg
, TRUE
);
1023 w_object(v
, arg
, limit
);
1028 rb_raise(rb_eTypeError
, "can't dump %"PRIsVALUE
,
1035 w_ivar(hasiv
, ivobj
, encname
, &c_arg
);
1040 clear_dump_arg(struct dump_arg
*arg
)
1042 if (!arg
->symbols
) return;
1043 st_free_table(arg
->symbols
);
1045 st_free_table(arg
->data
);
1047 if (arg
->compat_tbl
) {
1048 st_free_table(arg
->compat_tbl
);
1049 arg
->compat_tbl
= 0;
1051 if (arg
->encodings
) {
1052 st_free_table(arg
->encodings
);
1057 NORETURN(static inline void io_needed(void));
1061 rb_raise(rb_eTypeError
, "instance of IO needed");
1066 * dump( obj [, anIO] , limit=-1 ) -> anIO
1068 * Serializes obj and all descendant objects. If anIO is
1069 * specified, the serialized data will be written to it, otherwise the
1070 * data will be returned as a String. If limit is specified, the
1071 * traversal of subobjects will be limited to that depth. If limit is
1072 * negative, no checking of depth will be performed.
1075 * def initialize(str)
1083 * (produces no output)
1085 * o = Klass.new("hello\n")
1086 * data = Marshal.dump(o)
1087 * obj = Marshal.load(data)
1088 * obj.say_hello #=> "hello\n"
1090 * Marshal can't dump following objects:
1091 * * anonymous Class/Module.
1092 * * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
1094 * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
1095 * ThreadGroup, Continuation
1096 * * objects which define singleton methods
1099 marshal_dump(int argc
, VALUE
*argv
, VALUE _
)
1101 VALUE obj
, port
, a1
, a2
;
1105 rb_scan_args(argc
, argv
, "12", &obj
, &a1
, &a2
);
1107 if (!NIL_P(a2
)) limit
= NUM2INT(a2
);
1108 if (NIL_P(a1
)) io_needed();
1111 else if (argc
== 2) {
1112 if (FIXNUM_P(a1
)) limit
= FIX2INT(a1
);
1113 else if (NIL_P(a1
)) io_needed();
1116 return rb_marshal_dump_limited(obj
, port
, limit
);
1120 rb_marshal_dump_limited(VALUE obj
, VALUE port
, int limit
)
1122 struct dump_arg
*arg
;
1123 VALUE wrapper
; /* used to avoid memory leak in case of exception */
1125 wrapper
= TypedData_Make_Struct(0, struct dump_arg
, &dump_arg_data
, arg
);
1127 arg
->symbols
= st_init_numtable();
1128 arg
->data
= rb_init_identtable();
1129 arg
->compat_tbl
= 0;
1131 arg
->str
= rb_str_buf_new(0);
1133 if (!rb_respond_to(port
, s_write
)) {
1137 dump_check_funcall(arg
, port
, s_binmode
, 0, 0);
1143 w_byte(MARSHAL_MAJOR
, arg
);
1144 w_byte(MARSHAL_MINOR
, arg
);
1146 w_object(obj
, arg
, limit
);
1148 rb_io_write(arg
->dest
, arg
->str
);
1149 rb_str_resize(arg
->str
, 0);
1151 clear_dump_arg(arg
);
1152 RB_GC_GUARD(wrapper
);
1165 st_table
*partial_objects
;
1167 st_table
*compat_tbl
;
1172 check_load_arg(VALUE ret
, struct load_arg
*arg
, const char *name
)
1174 if (!arg
->symbols
) {
1175 rb_raise(rb_eRuntimeError
, "Marshal.load reentered at %s",
1180 #define load_funcall(arg, obj, sym, argc, argv) \
1181 check_load_arg(rb_funcallv(obj, sym, argc, argv), arg, name_##sym)
1183 static void clear_load_arg(struct load_arg
*arg
);
1186 mark_load_arg(void *ptr
)
1188 struct load_arg
*p
= ptr
;
1191 rb_mark_tbl(p
->symbols
);
1192 rb_mark_tbl(p
->data
);
1193 rb_mark_tbl(p
->partial_objects
);
1194 rb_mark_hash(p
->compat_tbl
);
1198 free_load_arg(void *ptr
)
1200 clear_load_arg(ptr
);
1205 memsize_load_arg(const void *ptr
)
1207 return sizeof(struct load_arg
);
1210 static const rb_data_type_t load_arg_data
= {
1212 {mark_load_arg
, free_load_arg
, memsize_load_arg
,},
1213 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
1216 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1217 static VALUE
r_object(struct load_arg
*arg
);
1218 static VALUE
r_symbol(struct load_arg
*arg
);
1220 NORETURN(static void too_short(void));
1224 rb_raise(rb_eArgError
, "marshal data too short");
1228 r_prepare(struct load_arg
*arg
)
1230 st_index_t idx
= arg
->data
->num_entries
;
1232 st_insert(arg
->data
, (st_data_t
)idx
, (st_data_t
)Qundef
);
1236 static unsigned char
1237 r_byte1_buffered(struct load_arg
*arg
)
1239 if (arg
->buflen
== 0) {
1240 long readable
= arg
->readable
< BUFSIZ
? arg
->readable
: BUFSIZ
;
1241 VALUE str
, n
= LONG2NUM(readable
);
1243 str
= load_funcall(arg
, arg
->src
, s_read
, 1, &n
);
1244 if (NIL_P(str
)) too_short();
1246 memcpy(arg
->buf
, RSTRING_PTR(str
), RSTRING_LEN(str
));
1248 arg
->buflen
= RSTRING_LEN(str
);
1251 return arg
->buf
[arg
->offset
++];
1255 r_byte(struct load_arg
*arg
)
1259 if (RB_TYPE_P(arg
->src
, T_STRING
)) {
1260 if (RSTRING_LEN(arg
->src
) > arg
->offset
) {
1261 c
= (unsigned char)RSTRING_PTR(arg
->src
)[arg
->offset
++];
1268 if (arg
->readable
>0 || arg
->buflen
> 0) {
1269 c
= r_byte1_buffered(arg
);
1272 VALUE v
= load_funcall(arg
, arg
->src
, s_getbyte
, 0, 0);
1273 if (NIL_P(v
)) rb_eof_error();
1274 c
= (unsigned char)NUM2CHR(v
);
1280 NORETURN(static void long_toobig(int size
));
1283 long_toobig(int size
)
1285 rb_raise(rb_eTypeError
, "long too big for this architecture (size "
1286 STRINGIZE(SIZEOF_LONG
)", given %d)", size
);
1290 r_long(struct load_arg
*arg
)
1293 int c
= (signed char)r_byte(arg
);
1296 if (c
== 0) return 0;
1298 if (4 < c
&& c
< 128) {
1301 if (c
> (int)sizeof(long)) long_toobig(c
);
1304 x
|= (long)r_byte(arg
) << (8*i
);
1308 if (-129 < c
&& c
< -4) {
1312 if (c
> (int)sizeof(long)) long_toobig(c
);
1315 x
&= ~((long)0xff << (8*i
));
1316 x
|= (long)r_byte(arg
) << (8*i
);
1323 ruby_marshal_read_long(const char **buf
, long len
)
1327 struct load_arg arg
;
1328 memset(&arg
, 0, sizeof(arg
));
1329 arg
.src
= rb_setup_fake_str(&src
, *buf
, len
, 0);
1336 r_bytes1(long len
, struct load_arg
*arg
)
1338 VALUE str
, n
= LONG2NUM(len
);
1340 str
= load_funcall(arg
, arg
->src
, s_read
, 1, &n
);
1341 if (NIL_P(str
)) too_short();
1343 if (RSTRING_LEN(str
) != len
) too_short();
1349 r_bytes1_buffered(long len
, struct load_arg
*arg
)
1353 if (len
<= arg
->buflen
) {
1354 str
= rb_str_new(arg
->buf
+arg
->offset
, len
);
1359 long buflen
= arg
->buflen
;
1360 long readable
= arg
->readable
+ 1;
1361 long tmp_len
, read_len
, need_len
= len
- buflen
;
1364 readable
= readable
< BUFSIZ
? readable
: BUFSIZ
;
1365 read_len
= need_len
> readable
? need_len
: readable
;
1366 n
= LONG2NUM(read_len
);
1367 tmp
= load_funcall(arg
, arg
->src
, s_read
, 1, &n
);
1368 if (NIL_P(tmp
)) too_short();
1371 tmp_len
= RSTRING_LEN(tmp
);
1373 if (tmp_len
< need_len
) too_short();
1375 str
= rb_str_new(arg
->buf
+arg
->offset
, buflen
);
1376 rb_str_cat(str
, RSTRING_PTR(tmp
), need_len
);
1378 if (tmp_len
> need_len
) {
1379 buflen
= tmp_len
- need_len
;
1380 memcpy(arg
->buf
, RSTRING_PTR(tmp
)+need_len
, buflen
);
1381 arg
->buflen
= buflen
;
1392 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1395 r_bytes0(long len
, struct load_arg
*arg
)
1399 if (len
== 0) return rb_str_new(0, 0);
1400 if (RB_TYPE_P(arg
->src
, T_STRING
)) {
1401 if (RSTRING_LEN(arg
->src
) - arg
->offset
>= len
) {
1402 str
= rb_str_new(RSTRING_PTR(arg
->src
)+arg
->offset
, len
);
1410 if (arg
->readable
> 0 || arg
->buflen
> 0) {
1411 str
= r_bytes1_buffered(len
, arg
);
1414 str
= r_bytes1(len
, arg
);
1421 name_equal(const char *name
, size_t nlen
, const char *p
, long l
)
1423 if ((size_t)l
!= nlen
|| *p
!= *name
) return 0;
1424 return nlen
== 1 || memcmp(p
+1, name
+1, nlen
-1) == 0;
1428 sym2encidx(VALUE sym
, VALUE val
)
1430 static const char name_encoding
[8] = "encoding";
1433 if (rb_enc_get_index(sym
) != ENCINDEX_US_ASCII
) return -1;
1434 RSTRING_GETMEM(sym
, p
, l
);
1435 if (l
<= 0) return -1;
1436 if (name_equal(name_encoding
, sizeof(name_encoding
), p
, l
)) {
1437 int idx
= rb_enc_find_index(StringValueCStr(val
));
1440 if (name_equal(name_s_encoding_short
, rb_strlen_lit(name_s_encoding_short
), p
, l
)) {
1441 if (val
== Qfalse
) return rb_usascii_encindex();
1442 else if (val
== Qtrue
) return rb_utf8_encindex();
1449 symname_equal(VALUE sym
, const char *name
, size_t nlen
)
1453 if (rb_enc_get_index(sym
) != ENCINDEX_US_ASCII
) return 0;
1454 RSTRING_GETMEM(sym
, p
, l
);
1455 return name_equal(name
, nlen
, p
, l
);
1458 #define BUILD_ASSERT_POSITIVE(n) \
1459 /* make 0 negative to workaround the "zero size array" GCC extension, */ \
1460 ((sizeof(char [2*(ssize_t)(n)-1])+1)/2) /* assuming no overflow */
1461 #define symname_equal_lit(sym, sym_name) \
1462 symname_equal(sym, sym_name, BUILD_ASSERT_POSITIVE(rb_strlen_lit(sym_name)))
1465 r_symlink(struct load_arg
*arg
)
1468 long num
= r_long(arg
);
1470 if (!st_lookup(arg
->symbols
, num
, &sym
)) {
1471 rb_raise(rb_eArgError
, "bad symbol");
1477 r_symreal(struct load_arg
*arg
, int ivar
)
1479 VALUE s
= r_bytes(arg
);
1482 st_index_t n
= arg
->symbols
->num_entries
;
1484 if (rb_enc_str_asciionly_p(s
)) rb_enc_associate_index(s
, ENCINDEX_US_ASCII
);
1485 st_insert(arg
->symbols
, (st_data_t
)n
, (st_data_t
)s
);
1487 long num
= r_long(arg
);
1489 sym
= r_symbol(arg
);
1490 idx
= sym2encidx(sym
, r_object(arg
));
1494 rb_enc_associate_index(s
, idx
);
1495 if (rb_enc_str_coderange(s
) == ENC_CODERANGE_BROKEN
) {
1496 rb_raise(rb_eArgError
, "invalid byte sequence in %s: %+"PRIsVALUE
,
1497 rb_enc_name(rb_enc_from_index(idx
)), s
);
1505 r_symbol(struct load_arg
*arg
)
1510 switch ((type
= r_byte(arg
))) {
1512 rb_raise(rb_eArgError
, "dump format error for symbol(0x%x)", type
);
1517 return r_symreal(arg
, ivar
);
1520 rb_raise(rb_eArgError
, "dump format error (symlink with encoding)");
1522 return r_symlink(arg
);
1527 r_unique(struct load_arg
*arg
)
1529 return r_symbol(arg
);
1533 r_string(struct load_arg
*arg
)
1535 return r_bytes(arg
);
1539 r_entry0(VALUE v
, st_index_t num
, struct load_arg
*arg
)
1541 st_data_t real_obj
= (st_data_t
)v
;
1542 if (arg
->compat_tbl
) {
1543 /* real_obj is kept if not found */
1544 st_lookup(arg
->compat_tbl
, v
, &real_obj
);
1546 st_insert(arg
->data
, num
, real_obj
);
1547 st_insert(arg
->partial_objects
, (st_data_t
)real_obj
, Qtrue
);
1552 r_fixup_compat(VALUE v
, struct load_arg
*arg
)
1555 st_data_t key
= (st_data_t
)v
;
1556 if (arg
->compat_tbl
&& st_delete(arg
->compat_tbl
, &key
, &data
)) {
1557 VALUE real_obj
= (VALUE
)data
;
1558 rb_alloc_func_t allocator
= rb_get_alloc_func(CLASS_OF(real_obj
));
1559 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1560 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1561 compat
->loader(real_obj
, v
);
1569 r_post_proc(VALUE v
, struct load_arg
*arg
)
1572 v
= load_funcall(arg
, arg
->proc
, s_call
, 1, &v
);
1578 r_leave(VALUE v
, struct load_arg
*arg
, bool partial
)
1580 v
= r_fixup_compat(v
, arg
);
1583 st_data_t key
= (st_data_t
)v
;
1584 st_delete(arg
->partial_objects
, &key
, &data
);
1586 if (RB_TYPE_P(v
, T_MODULE
) || RB_TYPE_P(v
, T_CLASS
)) {
1589 else if (RB_TYPE_P(v
, T_STRING
)) {
1590 v
= rb_str_to_interned_str(v
);
1596 v
= r_post_proc(v
, arg
);
1602 copy_ivar_i(st_data_t key
, st_data_t val
, st_data_t arg
)
1604 VALUE obj
= (VALUE
)arg
, value
= (VALUE
)val
;
1607 if (!rb_ivar_defined(obj
, vid
))
1608 rb_ivar_set(obj
, vid
, value
);
1613 r_copy_ivar(VALUE v
, VALUE data
)
1615 rb_ivar_foreach(data
, copy_ivar_i
, (st_data_t
)v
);
1620 r_ivar(VALUE obj
, int *has_encoding
, struct load_arg
*arg
)
1627 VALUE sym
= r_symbol(arg
);
1628 VALUE val
= r_object(arg
);
1629 int idx
= sym2encidx(sym
, val
);
1631 if (rb_enc_capable(obj
)) {
1632 rb_enc_associate_index(obj
, idx
);
1635 rb_raise(rb_eArgError
, "%"PRIsVALUE
" is not enc_capable", obj
);
1637 if (has_encoding
) *has_encoding
= TRUE
;
1639 else if (symname_equal_lit(sym
, name_s_ruby2_keywords_flag
)) {
1640 if (RB_TYPE_P(obj
, T_HASH
)) {
1641 rb_hash_ruby2_keywords(obj
);
1644 rb_raise(rb_eArgError
, "ruby2_keywords flag is given but %"PRIsVALUE
" is not a Hash", obj
);
1648 rb_ivar_set(obj
, rb_intern_str(sym
), val
);
1650 } while (--len
> 0);
1655 path2class(VALUE path
)
1657 VALUE v
= rb_path_to_class(path
);
1659 if (!RB_TYPE_P(v
, T_CLASS
)) {
1660 rb_raise(rb_eArgError
, "%"PRIsVALUE
" does not refer to class", path
);
1665 #define path2module(path) must_be_module(rb_path_to_class(path), path)
1668 must_be_module(VALUE v
, VALUE path
)
1670 if (!RB_TYPE_P(v
, T_MODULE
)) {
1671 rb_raise(rb_eArgError
, "%"PRIsVALUE
" does not refer to module", path
);
1677 obj_alloc_by_klass(VALUE klass
, struct load_arg
*arg
, VALUE
*oldclass
)
1680 rb_alloc_func_t allocator
;
1682 allocator
= rb_get_alloc_func(klass
);
1683 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1684 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1685 VALUE real_obj
= rb_obj_alloc(klass
);
1686 VALUE obj
= rb_obj_alloc(compat
->oldclass
);
1687 if (oldclass
) *oldclass
= compat
->oldclass
;
1689 if (!arg
->compat_tbl
) {
1690 arg
->compat_tbl
= rb_init_identtable();
1692 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
1696 return rb_obj_alloc(klass
);
1700 obj_alloc_by_path(VALUE path
, struct load_arg
*arg
)
1702 return obj_alloc_by_klass(path2class(path
), arg
, 0);
1706 append_extmod(VALUE obj
, VALUE extmod
)
1708 long i
= RARRAY_LEN(extmod
);
1710 VALUE m
= RARRAY_AREF(extmod
, --i
);
1711 rb_extend_object(obj
, m
);
1716 #define prohibit_ivar(type, str) do { \
1717 if (!ivp || !*ivp) break; \
1718 rb_raise(rb_eTypeError, \
1719 "can't override instance variable of "type" `%"PRIsVALUE"'", \
1723 static VALUE
r_object_for(struct load_arg
*arg
, bool partial
, int *ivp
, VALUE extmod
, int type
);
1726 r_object0(struct load_arg
*arg
, bool partial
, int *ivp
, VALUE extmod
)
1728 int type
= r_byte(arg
);
1729 return r_object_for(arg
, partial
, ivp
, extmod
, type
);
1733 r_object_for(struct load_arg
*arg
, bool partial
, int *ivp
, VALUE extmod
, int type
)
1735 VALUE (*hash_new_with_size
)(st_index_t
) = rb_hash_new_with_size
;
1743 if (!st_lookup(arg
->data
, (st_data_t
)id
, &link
)) {
1744 rb_raise(rb_eArgError
, "dump format error (unlinked)");
1747 if (!st_lookup(arg
->partial_objects
, (st_data_t
)v
, &link
)) {
1748 v
= r_post_proc(v
, arg
);
1756 v
= r_object0(arg
, true, &ivar
, extmod
);
1757 if (ivar
) r_ivar(v
, NULL
, arg
);
1758 v
= r_leave(v
, arg
, partial
);
1764 VALUE path
= r_unique(arg
);
1765 VALUE m
= rb_path_to_class(path
);
1766 if (NIL_P(extmod
)) extmod
= rb_ary_tmp_new(0);
1768 if (RB_TYPE_P(m
, T_CLASS
)) { /* prepended */
1771 v
= r_object0(arg
, true, 0, Qnil
);
1773 if (c
!= m
|| FL_TEST(c
, FL_SINGLETON
)) {
1774 rb_raise(rb_eArgError
,
1775 "prepended class %"PRIsVALUE
" differs from class %"PRIsVALUE
,
1776 path
, rb_class_name(c
));
1778 c
= rb_singleton_class(v
);
1779 while (RARRAY_LEN(extmod
) > 0) {
1780 m
= rb_ary_pop(extmod
);
1781 rb_prepend_module(c
, m
);
1785 must_be_module(m
, path
);
1786 rb_ary_push(extmod
, m
);
1788 v
= r_object0(arg
, true, 0, extmod
);
1789 while (RARRAY_LEN(extmod
) > 0) {
1790 m
= rb_ary_pop(extmod
);
1791 rb_extend_object(v
, m
);
1799 VALUE c
= path2class(r_unique(arg
));
1801 if (FL_TEST(c
, FL_SINGLETON
)) {
1802 rb_raise(rb_eTypeError
, "singleton can't be loaded");
1805 if ((c
== rb_cHash
) &&
1806 /* Hack for compare_by_identify */
1807 (type
== TYPE_HASH
|| type
== TYPE_HASH_DEF
)) {
1808 hash_new_with_size
= rb_ident_hash_new_with_size
;
1811 v
= r_object_for(arg
, partial
, 0, extmod
, type
);
1812 if (rb_special_const_p(v
) || RB_TYPE_P(v
, T_OBJECT
) || RB_TYPE_P(v
, T_CLASS
)) {
1815 if (RB_TYPE_P(v
, T_MODULE
) || !RTEST(rb_class_inherited_p(c
, RBASIC(v
)->klass
))) {
1816 VALUE tmp
= rb_obj_alloc(c
);
1818 if (TYPE(v
) != TYPE(tmp
)) goto format_error
;
1820 RBASIC_SET_CLASS(v
, c
);
1825 rb_raise(rb_eArgError
, "dump format error (user class)");
1829 v
= r_leave(v
, arg
, false);
1834 v
= r_leave(v
, arg
, false);
1839 v
= r_leave(v
, arg
, false);
1844 long i
= r_long(arg
);
1847 v
= r_leave(v
, arg
, false);
1853 VALUE str
= r_bytes(arg
);
1854 const char *ptr
= RSTRING_PTR(str
);
1856 if (strcmp(ptr
, "nan") == 0) {
1859 else if (strcmp(ptr
, "inf") == 0) {
1862 else if (strcmp(ptr
, "-inf") == 0) {
1867 d
= strtod(ptr
, &e
);
1868 d
= load_mantissa(d
, e
, RSTRING_LEN(str
) - (e
- ptr
));
1871 v
= r_entry(v
, arg
);
1872 v
= r_leave(v
, arg
, false);
1884 data
= r_bytes0(len
* 2, arg
);
1885 v
= rb_integer_unpack(RSTRING_PTR(data
), len
, 2, 0,
1886 INTEGER_PACK_LITTLE_ENDIAN
| (sign
== '-' ? INTEGER_PACK_NEGATIVE
: 0));
1887 rb_str_resize(data
, 0L);
1888 v
= r_entry(v
, arg
);
1889 v
= r_leave(v
, arg
, false);
1894 v
= r_entry(r_string(arg
), arg
);
1895 v
= r_leave(v
, arg
, partial
);
1900 VALUE str
= r_bytes(arg
);
1901 int options
= r_byte(arg
);
1902 int has_encoding
= FALSE
;
1903 st_index_t idx
= r_prepare(arg
);
1906 r_ivar(str
, &has_encoding
, arg
);
1909 if (!has_encoding
) {
1910 /* 1.8 compatibility; remove escapes undefined in 1.8 */
1911 char *ptr
= RSTRING_PTR(str
), *dst
= ptr
, *src
= ptr
;
1912 long len
= RSTRING_LEN(str
);
1914 for (; len
-- > 0; *dst
++ = *src
++) {
1916 case '\\': bs
++; break;
1917 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1918 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
1919 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
1920 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
1921 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
1924 default: bs
= 0; break;
1927 rb_str_set_len(str
, dst
- ptr
);
1929 v
= r_entry0(rb_reg_new_str(str
, options
), idx
, arg
);
1930 v
= r_leave(v
, arg
, partial
);
1936 long len
= r_long(arg
);
1938 v
= rb_ary_new2(len
);
1939 v
= r_entry(v
, arg
);
1940 arg
->readable
+= len
- 1;
1942 rb_ary_push(v
, r_object(arg
));
1945 v
= r_leave(v
, arg
, partial
);
1954 long len
= r_long(arg
);
1956 v
= hash_new_with_size(len
);
1957 v
= r_entry(v
, arg
);
1958 arg
->readable
+= (len
- 1) * 2;
1960 VALUE key
= r_object(arg
);
1961 VALUE value
= r_object(arg
);
1962 rb_hash_aset(v
, key
, value
);
1966 if (type
== TYPE_HASH_DEF
) {
1967 RHASH_SET_IFNONE(v
, r_object(arg
));
1969 v
= r_leave(v
, arg
, partial
);
1978 st_index_t idx
= r_prepare(arg
);
1979 VALUE klass
= path2class(r_unique(arg
));
1980 long len
= r_long(arg
);
1982 v
= rb_obj_alloc(klass
);
1983 if (!RB_TYPE_P(v
, T_STRUCT
)) {
1984 rb_raise(rb_eTypeError
, "class %"PRIsVALUE
" not a struct", rb_class_name(klass
));
1986 mem
= rb_struct_s_members(klass
);
1987 if (RARRAY_LEN(mem
) != len
) {
1988 rb_raise(rb_eTypeError
, "struct %"PRIsVALUE
" not compatible (struct size differs)",
1989 rb_class_name(klass
));
1992 arg
->readable
+= (len
- 1) * 2;
1993 v
= r_entry0(v
, idx
, arg
);
1994 values
= rb_ary_new2(len
);
1996 VALUE keywords
= Qfalse
;
1997 if (RTEST(rb_struct_s_keyword_init(klass
))) {
1998 keywords
= rb_hash_new();
1999 rb_ary_push(values
, keywords
);
2002 for (i
=0; i
<len
; i
++) {
2003 VALUE n
= rb_sym2str(RARRAY_AREF(mem
, i
));
2004 slot
= r_symbol(arg
);
2006 if (!rb_str_equal(n
, slot
)) {
2007 rb_raise(rb_eTypeError
, "struct %"PRIsVALUE
" not compatible (:%"PRIsVALUE
" for :%"PRIsVALUE
")",
2008 rb_class_name(klass
),
2012 rb_hash_aset(keywords
, RARRAY_AREF(mem
, i
), r_object(arg
));
2015 rb_ary_push(values
, r_object(arg
));
2020 rb_struct_initialize(v
, values
);
2021 v
= r_leave(v
, arg
, partial
);
2028 VALUE name
= r_unique(arg
);
2029 VALUE klass
= path2class(name
);
2033 if (!rb_obj_respond_to(klass
, s_load
, TRUE
)) {
2034 rb_raise(rb_eTypeError
, "class %"PRIsVALUE
" needs to have method `_load'",
2037 data
= r_string(arg
);
2039 r_ivar(data
, NULL
, arg
);
2042 v
= load_funcall(arg
, klass
, s_load
, 1, &data
);
2043 v
= r_entry(v
, arg
);
2044 if (st_lookup(compat_allocator_tbl
, (st_data_t
)rb_get_alloc_func(klass
), &d
)) {
2045 marshal_compat_t
*compat
= (marshal_compat_t
*)d
;
2046 v
= compat
->loader(klass
, v
);
2048 if (!partial
) v
= r_post_proc(v
, arg
);
2052 case TYPE_USRMARSHAL
:
2054 VALUE name
= r_unique(arg
);
2055 VALUE klass
= path2class(name
);
2059 v
= obj_alloc_by_klass(klass
, arg
, &oldclass
);
2060 if (!NIL_P(extmod
)) {
2061 /* for the case marshal_load is overridden */
2062 append_extmod(v
, extmod
);
2064 if (!rb_obj_respond_to(v
, s_mload
, TRUE
)) {
2065 rb_raise(rb_eTypeError
, "instance of %"PRIsVALUE
" needs to have method `marshal_load'",
2068 v
= r_entry(v
, arg
);
2069 data
= r_object(arg
);
2070 load_funcall(arg
, v
, s_mload
, 1, &data
);
2071 v
= r_fixup_compat(v
, arg
);
2072 v
= r_copy_ivar(v
, data
);
2073 v
= r_post_proc(v
, arg
);
2074 if (!NIL_P(extmod
)) {
2075 if (oldclass
) append_extmod(v
, extmod
);
2076 rb_ary_clear(extmod
);
2083 st_index_t idx
= r_prepare(arg
);
2084 v
= obj_alloc_by_path(r_unique(arg
), arg
);
2085 if (!RB_TYPE_P(v
, T_OBJECT
)) {
2086 rb_raise(rb_eArgError
, "dump format error");
2088 v
= r_entry0(v
, idx
, arg
);
2089 r_ivar(v
, NULL
, arg
);
2090 v
= r_leave(v
, arg
, partial
);
2096 VALUE name
= r_unique(arg
);
2097 VALUE klass
= path2class(name
);
2101 v
= obj_alloc_by_klass(klass
, arg
, &oldclass
);
2102 if (!RB_TYPE_P(v
, T_DATA
)) {
2103 rb_raise(rb_eArgError
, "dump format error");
2105 v
= r_entry(v
, arg
);
2106 if (!rb_obj_respond_to(v
, s_load_data
, TRUE
)) {
2107 rb_raise(rb_eTypeError
,
2108 "class %"PRIsVALUE
" needs to have instance method `_load_data'",
2111 r
= r_object0(arg
, partial
, 0, extmod
);
2112 load_funcall(arg
, v
, s_load_data
, 1, &r
);
2113 v
= r_leave(v
, arg
, partial
);
2117 case TYPE_MODULE_OLD
:
2119 VALUE str
= r_bytes(arg
);
2121 v
= rb_path_to_class(str
);
2122 prohibit_ivar("class/module", str
);
2123 v
= r_entry(v
, arg
);
2124 v
= r_leave(v
, arg
, partial
);
2130 VALUE str
= r_bytes(arg
);
2132 v
= path2class(str
);
2133 prohibit_ivar("class", str
);
2134 v
= r_entry(v
, arg
);
2135 v
= r_leave(v
, arg
, partial
);
2141 VALUE str
= r_bytes(arg
);
2143 v
= path2module(str
);
2144 prohibit_ivar("module", str
);
2145 v
= r_entry(v
, arg
);
2146 v
= r_leave(v
, arg
, partial
);
2152 v
= r_symreal(arg
, *ivp
);
2156 v
= r_symreal(arg
, 0);
2158 v
= rb_str_intern(v
);
2159 v
= r_leave(v
, arg
, partial
);
2163 v
= rb_str_intern(r_symlink(arg
));
2167 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
2172 rb_raise(rb_eArgError
, "dump format error (bad link)");
2179 r_object(struct load_arg
*arg
)
2181 return r_object0(arg
, false, 0, Qnil
);
2185 clear_load_arg(struct load_arg
*arg
)
2194 if (!arg
->symbols
) return;
2195 st_free_table(arg
->symbols
);
2197 st_free_table(arg
->data
);
2199 st_free_table(arg
->partial_objects
);
2200 arg
->partial_objects
= 0;
2201 if (arg
->compat_tbl
) {
2202 st_free_table(arg
->compat_tbl
);
2203 arg
->compat_tbl
= 0;
2208 rb_marshal_load_with_proc(VALUE port
, VALUE proc
, bool freeze
)
2212 VALUE wrapper
; /* used to avoid memory leak in case of exception */
2213 struct load_arg
*arg
;
2215 v
= rb_check_string_type(port
);
2219 else if (rb_respond_to(port
, s_getbyte
) && rb_respond_to(port
, s_read
)) {
2220 rb_check_funcall(port
, s_binmode
, 0, 0);
2225 wrapper
= TypedData_Make_Struct(0, struct load_arg
, &load_arg_data
, arg
);
2228 arg
->symbols
= st_init_numtable();
2229 arg
->data
= rb_init_identtable();
2230 arg
->partial_objects
= rb_init_identtable();
2231 arg
->compat_tbl
= 0;
2234 arg
->freeze
= freeze
;
2237 arg
->buf
= xmalloc(BUFSIZ
);
2241 major
= r_byte(arg
);
2242 minor
= r_byte(arg
);
2243 if (major
!= MARSHAL_MAJOR
|| minor
> MARSHAL_MINOR
) {
2244 clear_load_arg(arg
);
2245 rb_raise(rb_eTypeError
, "incompatible marshal file format (can't be read)\n\
2246 \tformat version %d.%d required; %d.%d given",
2247 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
2249 if (RTEST(ruby_verbose
) && minor
!= MARSHAL_MINOR
) {
2250 rb_warn("incompatible marshal file format (can be read)\n\
2251 \tformat version %d.%d required; %d.%d given",
2252 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
2255 if (!NIL_P(proc
)) arg
->proc
= proc
;
2257 clear_load_arg(arg
);
2258 RB_GC_GUARD(wrapper
);
2263 static VALUE
marshal_load(rb_execution_context_t
*ec
, VALUE mod
, VALUE source
, VALUE proc
, VALUE freeze
)
2265 return rb_marshal_load_with_proc(source
, proc
, RTEST(freeze
));
2268 #include "marshal.rbinc"
2271 * The marshaling library converts collections of Ruby objects into a
2272 * byte stream, allowing them to be stored outside the currently
2273 * active script. This data may subsequently be read and the original
2274 * objects reconstituted.
2276 * Marshaled data has major and minor version numbers stored along
2277 * with the object information. In normal use, marshaling can only
2278 * load data written with the same major version number and an equal
2279 * or lower minor version number. If Ruby's ``verbose'' flag is set
2280 * (normally using -d, -v, -w, or --verbose) the major and minor
2281 * numbers must match exactly. Marshal versioning is independent of
2282 * Ruby's version numbers. You can extract the version by reading the
2283 * first two bytes of marshaled data.
2285 * str = Marshal.dump("thing")
2286 * RUBY_VERSION #=> "1.9.0"
2290 * Some objects cannot be dumped: if the objects to be dumped include
2291 * bindings, procedure or method objects, instances of class IO, or
2292 * singleton objects, a TypeError will be raised.
2294 * If your class has special serialization needs (for example, if you
2295 * want to serialize in some specific format), or if it contains
2296 * objects that would otherwise not be serializable, you can implement
2297 * your own serialization strategy.
2299 * There are two methods of doing this, your object can define either
2300 * marshal_dump and marshal_load or _dump and _load. marshal_dump will take
2301 * precedence over _dump if both are defined. marshal_dump may result in
2302 * smaller Marshal strings.
2304 * == Security considerations
2306 * By design, Marshal.load can deserialize almost any class loaded into the
2307 * Ruby process. In many cases this can lead to remote code execution if the
2308 * Marshal data is loaded from an untrusted source.
2310 * As a result, Marshal.load is not suitable as a general purpose serialization
2311 * format and you should never unmarshal user supplied input or other untrusted
2314 * If you need to deserialize untrusted data, use JSON or another serialization
2315 * format that is only able to load simple, 'primitive' types such as String,
2316 * Array, Hash, etc. Never allow user input to specify arbitrary types to
2319 * == marshal_dump and marshal_load
2321 * When dumping an object the method marshal_dump will be called.
2322 * marshal_dump must return a result containing the information necessary for
2323 * marshal_load to reconstitute the object. The result can be any object.
2325 * When loading an object dumped using marshal_dump the object is first
2326 * allocated then marshal_load is called with the result from marshal_dump.
2327 * marshal_load must recreate the object from the information in the result.
2332 * def initialize name, version, data
2334 * @version = version
2342 * def marshal_load array
2343 * @name, @version = array
2347 * == _dump and _load
2349 * Use _dump and _load when you need to allocate the object you're restoring
2352 * When dumping an object the instance method _dump is called with an Integer
2353 * which indicates the maximum depth of objects to dump (a value of -1 implies
2354 * that you should disable depth checking). _dump must return a String
2355 * containing the information necessary to reconstitute the object.
2357 * The class method _load should take a String and use it to return an object
2358 * of the same class.
2363 * def initialize name, version, data
2365 * @version = version
2370 * [@name, @version].join ':'
2373 * def self._load args
2374 * new(*args.split(':'))
2378 * Since Marshal.dump outputs a string you can have _dump return a Marshal
2379 * string which is Marshal.loaded in _load for complex objects.
2384 VALUE rb_mMarshal
= rb_define_module("Marshal");
2385 #define set_id(sym) sym = rb_intern_const(name_##sym)
2390 set_id(s_dump_data
);
2391 set_id(s_load_data
);
2398 set_id(s_encoding_short
);
2399 set_id(s_ruby2_keywords_flag
);
2401 rb_define_module_function(rb_mMarshal
, "dump", marshal_dump
, -1);
2404 rb_define_const(rb_mMarshal
, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR
));
2406 rb_define_const(rb_mMarshal
, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR
));
2410 compat_allocator_table(void)
2412 if (compat_allocator_tbl
) return compat_allocator_tbl
;
2413 compat_allocator_tbl
= st_init_numtable();
2414 #undef RUBY_UNTYPED_DATA_WARNING
2415 #define RUBY_UNTYPED_DATA_WARNING 0
2416 compat_allocator_tbl_wrapper
=
2417 Data_Wrap_Struct(0, mark_marshal_compat_t
, 0, compat_allocator_tbl
);
2418 rb_gc_register_mark_object(compat_allocator_tbl_wrapper
);
2419 return compat_allocator_tbl
;
2423 rb_marshal_dump(VALUE obj
, VALUE port
)
2425 return rb_marshal_dump_limited(obj
, port
, -1);
2429 rb_marshal_load(VALUE port
)
2431 return rb_marshal_load_with_proc(port
, Qnil
, false);