1 /**********************************************************************
6 created at: Thu Apr 27 16:30:01 JST 1995
8 Copyright (C) 1993-2007 Yukihiro Matsumoto
10 **********************************************************************/
12 #include "ruby/internal/config.h"
25 #include "internal/array.h"
26 #include "internal/bignum.h"
27 #include "internal/class.h"
28 #include "internal/encoding.h"
29 #include "internal/error.h"
30 #include "internal/hash.h"
31 #include "internal/numeric.h"
32 #include "internal/object.h"
33 #include "internal/struct.h"
34 #include "internal/symbol.h"
35 #include "internal/util.h"
36 #include "internal/vm.h"
38 #include "ruby/ruby.h"
40 #include "ruby/util.h"
44 #define BITSPERSHORT (2*CHAR_BIT)
45 #define SHORTMASK ((1<<BITSPERSHORT)-1)
46 #define SHORTDN(x) RSHIFT((x),BITSPERSHORT)
48 #if SIZEOF_SHORT == SIZEOF_BDIGIT
49 #define SHORTLEN(x) (x)
52 shortlen(size_t len
, BDIGIT
*ds
)
62 return (len
- 1)*SIZEOF_BDIGIT
/2 + offset
;
64 #define SHORTLEN(x) shortlen((x),d)
67 #define MARSHAL_MAJOR 4
68 #define MARSHAL_MINOR 8
72 #define TYPE_FALSE 'F'
73 #define TYPE_FIXNUM 'i'
75 #define TYPE_EXTENDED 'e'
76 #define TYPE_UCLASS 'C'
77 #define TYPE_OBJECT 'o'
79 #define TYPE_USERDEF 'u'
80 #define TYPE_USRMARSHAL 'U'
81 #define TYPE_FLOAT 'f'
82 #define TYPE_BIGNUM 'l'
83 #define TYPE_STRING '"'
84 #define TYPE_REGEXP '/'
85 #define TYPE_ARRAY '['
87 #define TYPE_HASH_DEF '}'
88 #define TYPE_STRUCT 'S'
89 #define TYPE_MODULE_OLD 'M'
90 #define TYPE_CLASS 'c'
91 #define TYPE_MODULE 'm'
93 #define TYPE_SYMBOL ':'
94 #define TYPE_SYMLINK ';'
99 static ID s_dump
, s_load
, s_mdump
, s_mload
;
100 static ID s_dump_data
, s_load_data
, s_alloc
, s_call
;
101 static ID s_getbyte
, s_read
, s_write
, s_binmode
;
102 static ID s_encoding_short
, s_ruby2_keywords_flag
;
104 #define name_s_dump "_dump"
105 #define name_s_load "_load"
106 #define name_s_mdump "marshal_dump"
107 #define name_s_mload "marshal_load"
108 #define name_s_dump_data "_dump_data"
109 #define name_s_load_data "_load_data"
110 #define name_s_alloc "_alloc"
111 #define name_s_call "call"
112 #define name_s_getbyte "getbyte"
113 #define name_s_read "read"
114 #define name_s_write "write"
115 #define name_s_binmode "binmode"
116 #define name_s_encoding_short "E"
117 #define name_s_ruby2_keywords_flag "K"
122 VALUE (*dumper
)(VALUE
);
123 VALUE (*loader
)(VALUE
, VALUE
);
126 static st_table
*compat_allocator_tbl
;
127 static VALUE compat_allocator_tbl_wrapper
;
128 static VALUE
rb_marshal_dump_limited(VALUE obj
, VALUE port
, int limit
);
129 static VALUE
rb_marshal_load_with_proc(VALUE port
, VALUE proc
, bool freeze
);
132 mark_marshal_compat_i(st_data_t key
, st_data_t value
, st_data_t _
)
134 marshal_compat_t
*p
= (marshal_compat_t
*)value
;
135 rb_gc_mark(p
->newclass
);
136 rb_gc_mark(p
->oldclass
);
141 mark_marshal_compat_t(void *tbl
)
144 st_foreach(tbl
, mark_marshal_compat_i
, 0);
147 static st_table
*compat_allocator_table(void);
150 rb_marshal_define_compat(VALUE newclass
, VALUE oldclass
, VALUE (*dumper
)(VALUE
), VALUE (*loader
)(VALUE
, VALUE
))
152 marshal_compat_t
*compat
;
153 rb_alloc_func_t allocator
= rb_get_alloc_func(newclass
);
156 rb_raise(rb_eTypeError
, "no allocator");
159 compat
= ALLOC(marshal_compat_t
);
160 compat
->newclass
= Qnil
;
161 compat
->oldclass
= Qnil
;
162 compat
->newclass
= newclass
;
163 compat
->oldclass
= oldclass
;
164 compat
->dumper
= dumper
;
165 compat
->loader
= loader
;
167 st_insert(compat_allocator_table(), (st_data_t
)allocator
, (st_data_t
)compat
);
174 st_table
*compat_tbl
;
176 st_index_t num_entries
;
179 struct dump_call_arg
{
181 struct dump_arg
*arg
;
186 check_dump_arg(VALUE ret
, struct dump_arg
*arg
, const char *name
)
189 rb_raise(rb_eRuntimeError
, "Marshal.dump reentered at %s",
196 check_userdump_arg(VALUE obj
, ID sym
, int argc
, const VALUE
*argv
,
197 struct dump_arg
*arg
, const char *name
)
199 VALUE ret
= rb_funcallv(obj
, sym
, argc
, argv
);
200 VALUE klass
= CLASS_OF(obj
);
201 if (CLASS_OF(ret
) == klass
) {
202 rb_raise(rb_eRuntimeError
, "%"PRIsVALUE
"#%s returned same class instance",
205 return check_dump_arg(ret
, arg
, name
);
208 #define dump_funcall(arg, obj, sym, argc, argv) \
209 check_userdump_arg(obj, sym, argc, argv, arg, name_##sym)
210 #define dump_check_funcall(arg, obj, sym, argc, argv) \
211 check_dump_arg(rb_check_funcall(obj, sym, argc, argv), arg, name_##sym)
213 static void clear_dump_arg(struct dump_arg
*arg
);
216 mark_dump_arg(void *ptr
)
218 struct dump_arg
*p
= ptr
;
221 rb_mark_set(p
->symbols
);
222 rb_mark_set(p
->data
);
223 rb_mark_hash(p
->compat_tbl
);
228 free_dump_arg(void *ptr
)
234 memsize_dump_arg(const void *ptr
)
236 const struct dump_arg
*p
= (struct dump_arg
*)ptr
;
238 if (p
->symbols
) memsize
+= rb_st_memsize(p
->symbols
);
239 if (p
->data
) memsize
+= rb_st_memsize(p
->data
);
240 if (p
->compat_tbl
) memsize
+= rb_st_memsize(p
->compat_tbl
);
241 if (p
->encodings
) memsize
+= rb_st_memsize(p
->encodings
);
245 static const rb_data_type_t dump_arg_data
= {
247 {mark_dump_arg
, free_dump_arg
, memsize_dump_arg
,},
248 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
| RUBY_TYPED_EMBEDDABLE
252 must_not_be_anonymous(const char *type
, VALUE path
)
254 char *n
= RSTRING_PTR(path
);
256 if (!rb_enc_asciicompat(rb_enc_get(path
))) {
258 rb_raise(rb_eTypeError
, "can't dump non-ascii %s name % "PRIsVALUE
,
262 rb_raise(rb_eTypeError
, "can't dump anonymous %s % "PRIsVALUE
,
269 class2path(VALUE klass
)
271 VALUE path
= rb_class_path(klass
);
273 must_not_be_anonymous((RB_TYPE_P(klass
, T_CLASS
) ? "class" : "module"), path
);
274 if (rb_path_to_class(path
) != rb_class_real(klass
)) {
275 rb_raise(rb_eTypeError
, "% "PRIsVALUE
" can't be referred to", path
);
280 int ruby_marshal_write_long(long x
, char *buf
);
281 static void w_long(long, struct dump_arg
*);
282 static int w_encoding(VALUE encname
, struct dump_call_arg
*arg
);
283 static VALUE
encoding_name(VALUE obj
, struct dump_arg
*arg
);
286 w_nbyte(const char *s
, long n
, struct dump_arg
*arg
)
288 VALUE buf
= arg
->str
;
289 rb_str_buf_cat(buf
, s
, n
);
290 if (arg
->dest
&& RSTRING_LEN(buf
) >= BUFSIZ
) {
291 rb_io_write(arg
->dest
, buf
);
292 rb_str_resize(buf
, 0);
297 w_byte(char c
, struct dump_arg
*arg
)
303 w_bytes(const char *s
, long n
, struct dump_arg
*arg
)
309 #define w_cstr(s, arg) w_bytes((s), strlen(s), (arg))
312 w_short(int x
, struct dump_arg
*arg
)
314 w_byte((char)((x
>> 0) & 0xff), arg
);
315 w_byte((char)((x
>> 8) & 0xff), arg
);
319 w_long(long x
, struct dump_arg
*arg
)
321 char buf
[sizeof(long)+1];
322 int i
= ruby_marshal_write_long(x
, buf
);
324 rb_raise(rb_eTypeError
, "long too big to dump");
326 w_nbyte(buf
, i
, arg
);
330 ruby_marshal_write_long(long x
, char *buf
)
335 if (!(RSHIFT(x
, 31) == 0 || RSHIFT(x
, 31) == -1)) {
336 /* big long does not fit in 4 bytes */
345 if (0 < x
&& x
< 123) {
346 buf
[0] = (char)(x
+ 5);
349 if (-124 < x
&& x
< 0) {
350 buf
[0] = (char)((x
- 5)&0xff);
353 for (i
=1;i
<(int)sizeof(long)+1;i
++) {
354 buf
[i
] = (char)(x
& 0xff);
369 #define DECIMAL_MANT (53-16) /* from IEEE754 double precision */
371 #if DBL_MANT_DIG > 32
373 #elif DBL_MANT_DIG > 24
375 #elif DBL_MANT_DIG > 16
382 load_mantissa(double d
, const char *buf
, long len
)
385 if (--len
> 0 && !*buf
++) { /* binary mantissa mark */
386 int e
, s
= d
< 0, dig
= 0;
389 modf(ldexp(frexp(fabs(d
), &e
), DECIMAL_MANT
), &d
);
393 default: m
= *buf
++ & 0xff; /* fall through */
395 case 3: m
= (m
<< 8) | (*buf
++ & 0xff); /* fall through */
398 case 2: m
= (m
<< 8) | (*buf
++ & 0xff); /* fall through */
401 case 1: m
= (m
<< 8) | (*buf
++ & 0xff);
404 dig
-= len
< MANT_BITS
/ 8 ? 8 * (unsigned)len
: MANT_BITS
;
405 d
+= ldexp((double)m
, dig
);
406 } while ((len
-= MANT_BITS
/ 8) > 0);
407 d
= ldexp(d
, e
- DECIMAL_MANT
);
413 #define load_mantissa(d, buf, len) (d)
417 #define FLOAT_DIG (DBL_DIG+2)
423 w_float(double d
, struct dump_arg
*arg
)
425 char buf
[FLOAT_DIG
+ (DECIMAL_MANT
+ 7) / 8 + 10];
428 if (d
< 0) w_cstr("-inf", arg
);
429 else w_cstr("inf", arg
);
435 if (signbit(d
)) w_cstr("-0", arg
);
436 else w_cstr("0", arg
);
439 int decpt
, sign
, digs
, len
= 0;
440 char *e
, *p
= ruby_dtoa(d
, 0, 0, &decpt
, &sign
, &e
);
441 if (sign
) buf
[len
++] = '-';
443 if (decpt
< -3 || decpt
> digs
) {
445 if (--digs
> 0) buf
[len
++] = '.';
446 memcpy(buf
+ len
, p
+ 1, digs
);
448 len
+= snprintf(buf
+ len
, sizeof(buf
) - len
, "e%d", decpt
- 1);
450 else if (decpt
> 0) {
451 memcpy(buf
+ len
, p
, decpt
);
453 if ((digs
-= decpt
) > 0) {
455 memcpy(buf
+ len
, p
+ decpt
, digs
);
463 memset(buf
+ len
, '0', -decpt
);
466 memcpy(buf
+ len
, p
, digs
);
470 w_bytes(buf
, len
, arg
);
475 w_symbol(VALUE sym
, struct dump_arg
*arg
)
480 if (st_lookup(arg
->symbols
, sym
, &num
)) {
481 w_byte(TYPE_SYMLINK
, arg
);
482 w_long((long)num
, arg
);
485 const VALUE orig_sym
= sym
;
486 sym
= rb_sym2str(sym
);
488 rb_raise(rb_eTypeError
, "can't dump anonymous ID %"PRIdVALUE
, sym
);
490 encname
= encoding_name(sym
, arg
);
491 if (NIL_P(encname
) ||
492 is_ascii_string(sym
)) {
496 w_byte(TYPE_IVAR
, arg
);
498 w_byte(TYPE_SYMBOL
, arg
);
499 w_bytes(RSTRING_PTR(sym
), RSTRING_LEN(sym
), arg
);
500 st_add_direct(arg
->symbols
, orig_sym
, arg
->symbols
->num_entries
);
501 if (!NIL_P(encname
)) {
502 struct dump_call_arg c_arg
;
506 w_encoding(encname
, &c_arg
);
512 w_unique(VALUE s
, struct dump_arg
*arg
)
514 must_not_be_anonymous("class", s
);
515 w_symbol(rb_str_intern(s
), arg
);
518 static void w_object(VALUE
,struct dump_arg
*,int);
521 hash_each(VALUE key
, VALUE value
, VALUE v
)
523 struct dump_call_arg
*arg
= (void *)v
;
524 w_object(key
, arg
->arg
, arg
->limit
);
525 w_object(value
, arg
->arg
, arg
->limit
);
529 #define SINGLETON_DUMP_UNABLE_P(klass) \
530 (rb_id_table_size(RCLASS_M_TBL(klass)) > 0 || \
531 rb_ivar_count(klass) > 0)
534 w_extended(VALUE klass
, struct dump_arg
*arg
, int check
)
536 if (check
&& RCLASS_SINGLETON_P(klass
)) {
537 VALUE origin
= RCLASS_ORIGIN(klass
);
538 if (SINGLETON_DUMP_UNABLE_P(klass
) ||
539 (origin
!= klass
&& SINGLETON_DUMP_UNABLE_P(origin
))) {
540 rb_raise(rb_eTypeError
, "singleton can't be dumped");
542 klass
= RCLASS_SUPER(klass
);
544 while (BUILTIN_TYPE(klass
) == T_ICLASS
) {
545 if (!FL_TEST(klass
, RICLASS_IS_ORIGIN
) ||
546 BUILTIN_TYPE(RBASIC(klass
)->klass
) != T_MODULE
) {
547 VALUE path
= rb_class_name(RBASIC(klass
)->klass
);
548 w_byte(TYPE_EXTENDED
, arg
);
551 klass
= RCLASS_SUPER(klass
);
556 w_class(char type
, VALUE obj
, struct dump_arg
*arg
, int check
)
562 if (arg
->compat_tbl
&&
563 st_lookup(arg
->compat_tbl
, (st_data_t
)obj
, &real_obj
)) {
564 obj
= (VALUE
)real_obj
;
566 klass
= CLASS_OF(obj
);
567 w_extended(klass
, arg
, check
);
569 path
= class2path(rb_class_real(klass
));
574 w_uclass(VALUE obj
, VALUE super
, struct dump_arg
*arg
)
576 VALUE klass
= CLASS_OF(obj
);
578 w_extended(klass
, arg
, TRUE
);
579 klass
= rb_class_real(klass
);
580 if (klass
!= super
) {
581 w_byte(TYPE_UCLASS
, arg
);
582 w_unique(class2path(klass
), arg
);
587 rb_hash_ruby2_keywords_p(VALUE obj
)
589 return (RHASH(obj
)->basic
.flags
& RHASH_PASS_AS_KEYWORDS
) != 0;
593 rb_hash_ruby2_keywords(VALUE obj
)
595 RHASH(obj
)->basic
.flags
|= RHASH_PASS_AS_KEYWORDS
;
599 to_be_skipped_id(const ID id
)
601 if (id
== s_encoding_short
) return true;
602 if (id
== s_ruby2_keywords_flag
) return true;
603 if (id
== rb_id_encoding()) return true;
604 return !rb_id2str(id
);
608 struct dump_call_arg
*dump
;
613 w_obj_each(ID id
, VALUE value
, st_data_t a
)
615 struct w_ivar_arg
*ivarg
= (struct w_ivar_arg
*)a
;
616 struct dump_call_arg
*arg
= ivarg
->dump
;
618 if (to_be_skipped_id(id
)) {
619 if (id
== s_encoding_short
) {
620 rb_warn("instance variable '"name_s_encoding_short
"' on class %"PRIsVALUE
" is not dumped",
623 if (id
== s_ruby2_keywords_flag
) {
624 rb_warn("instance variable '"name_s_ruby2_keywords_flag
"' on class %"PRIsVALUE
" is not dumped",
630 w_symbol(ID2SYM(id
), arg
->arg
);
631 w_object(value
, arg
->arg
, arg
->limit
);
636 obj_count_ivars(ID id
, VALUE val
, st_data_t a
)
638 if (!to_be_skipped_id(id
) && UNLIKELY(!++*(st_index_t
*)a
)) {
639 rb_raise(rb_eRuntimeError
, "too many instance variables");
645 encoding_name(VALUE obj
, struct dump_arg
*arg
)
647 if (rb_enc_capable(obj
)) {
648 int encidx
= rb_enc_get_index(obj
);
649 rb_encoding
*enc
= 0;
652 if (encidx
<= 0 || !(enc
= rb_enc_from_index(encidx
))) {
656 /* special treatment for US-ASCII and UTF-8 */
657 if (encidx
== rb_usascii_encindex()) {
660 else if (encidx
== rb_utf8_encindex()) {
665 !st_lookup(arg
->encodings
, (st_data_t
)rb_enc_name(enc
), &name
) :
666 (arg
->encodings
= st_init_strcasetable(), 1)) {
667 name
= (st_data_t
)rb_str_new_cstr(rb_enc_name(enc
));
668 st_insert(arg
->encodings
, (st_data_t
)rb_enc_name(enc
), name
);
678 w_encoding(VALUE encname
, struct dump_call_arg
*arg
)
680 int limit
= arg
->limit
;
681 if (limit
>= 0) ++limit
;
685 w_symbol(ID2SYM(s_encoding_short
), arg
->arg
);
686 w_object(encname
, arg
->arg
, limit
);
691 w_symbol(ID2SYM(rb_id_encoding()), arg
->arg
);
692 w_object(encname
, arg
->arg
, limit
);
697 has_ivars(VALUE obj
, VALUE encname
, VALUE
*ivobj
)
699 st_index_t num
= !NIL_P(encname
);
701 if (SPECIAL_CONST_P(obj
)) goto generic
;
702 switch (BUILTIN_TYPE(obj
)) {
706 break; /* counted elsewhere */
708 if (rb_hash_ruby2_keywords_p(obj
)) ++num
;
712 rb_ivar_foreach(obj
, obj_count_ivars
, (st_data_t
)&num
);
713 if (num
) *ivobj
= obj
;
720 w_ivar_each(VALUE obj
, st_index_t num
, struct dump_call_arg
*arg
)
722 shape_id_t shape_id
= rb_shape_get_shape_id(arg
->obj
);
723 struct w_ivar_arg ivarg
= {arg
, num
};
725 rb_ivar_foreach(obj
, w_obj_each
, (st_data_t
)&ivarg
);
727 if (shape_id
!= rb_shape_get_shape_id(arg
->obj
)) {
728 rb_shape_t
* expected_shape
= rb_shape_get_shape_by_id(shape_id
);
729 rb_shape_t
* actual_shape
= rb_shape_get_shape(arg
->obj
);
731 // If the shape tree got _shorter_ then we probably removed an IV
732 // If the shape tree got longer, then we probably added an IV.
733 // The exception message might not be accurate when someone adds and
734 // removes the same number of IVs, but they will still get an exception
735 if (rb_shape_depth(expected_shape
) > rb_shape_depth(actual_shape
)) {
736 rb_raise(rb_eRuntimeError
, "instance variable removed from %"PRIsVALUE
" instance",
740 rb_raise(rb_eRuntimeError
, "instance variable added to %"PRIsVALUE
" instance",
747 w_ivar(st_index_t num
, VALUE ivobj
, VALUE encname
, struct dump_call_arg
*arg
)
749 w_long(num
, arg
->arg
);
750 num
-= w_encoding(encname
, arg
);
751 if (RB_TYPE_P(ivobj
, T_HASH
) && rb_hash_ruby2_keywords_p(ivobj
)) {
752 int limit
= arg
->limit
;
753 if (limit
>= 0) ++limit
;
754 w_symbol(ID2SYM(s_ruby2_keywords_flag
), arg
->arg
);
755 w_object(Qtrue
, arg
->arg
, limit
);
758 if (!UNDEF_P(ivobj
) && num
) {
759 w_ivar_each(ivobj
, num
, arg
);
764 w_objivar(VALUE obj
, struct dump_call_arg
*arg
)
768 rb_ivar_foreach(obj
, obj_count_ivars
, (st_data_t
)&num
);
769 w_long(num
, arg
->arg
);
770 w_ivar_each(obj
, num
, arg
);
774 // Optimized dump for fixnum larger than 31-bits
776 w_bigfixnum(VALUE obj
, struct dump_arg
*arg
)
778 RUBY_ASSERT(FIXNUM_P(obj
));
780 w_byte(TYPE_BIGNUM
, arg
);
782 #if SIZEOF_LONG == SIZEOF_VALUE
786 long long num
, slen_num
;
790 char sign
= num
< 0 ? '-' : '+';
793 // Guaranteed not to overflow, as FIXNUM is 1-bit less than long
794 if (num
< 0) num
= -num
;
796 // calculate the size in shorts
802 slen_num
= SHORTDN(slen_num
);
806 RUBY_ASSERT(slen
> 0 && slen
<= SIZEOF_LONG
/ 2);
808 w_long((long)slen
, arg
);
810 for (int i
= 0; i
< slen
; i
++) {
811 w_short(num
& SHORTMASK
, arg
);
815 // We aren't adding this object to the link table, but we need to increment
819 RUBY_ASSERT(num
== 0);
824 w_remember(VALUE obj
, struct dump_arg
*arg
)
826 st_add_direct(arg
->data
, obj
, arg
->num_entries
++);
830 w_object(VALUE obj
, struct dump_arg
*arg
, int limit
)
832 struct dump_call_arg c_arg
;
833 VALUE ivobj
= Qundef
;
835 st_index_t hasiv
= 0;
836 VALUE encname
= Qnil
;
839 rb_raise(rb_eArgError
, "exceed depth limit");
843 w_byte(TYPE_NIL
, arg
);
845 else if (obj
== Qtrue
) {
846 w_byte(TYPE_TRUE
, arg
);
848 else if (obj
== Qfalse
) {
849 w_byte(TYPE_FALSE
, arg
);
851 else if (FIXNUM_P(obj
)) {
853 w_byte(TYPE_FIXNUM
, arg
);
854 w_long(FIX2INT(obj
), arg
);
856 if (RSHIFT((long)obj
, 31) == 0 || RSHIFT((long)obj
, 31) == -1) {
857 w_byte(TYPE_FIXNUM
, arg
);
858 w_long(FIX2LONG(obj
), arg
);
861 w_bigfixnum(obj
, arg
);
865 else if (SYMBOL_P(obj
)) {
869 if (st_lookup(arg
->data
, obj
, &num
)) {
870 w_byte(TYPE_LINK
, arg
);
871 w_long((long)num
, arg
);
875 if (limit
> 0) limit
--;
881 w_remember(obj
, arg
);
882 w_byte(TYPE_FLOAT
, arg
);
883 w_float(RFLOAT_VALUE(obj
), arg
);
889 if (!RBASIC_CLASS(obj
)) {
890 rb_raise(rb_eTypeError
, "can't dump internal %s",
891 rb_builtin_type_name(BUILTIN_TYPE(obj
)));
894 if (rb_obj_respond_to(obj
, s_mdump
, TRUE
)) {
895 w_remember(obj
, arg
);
897 v
= dump_funcall(arg
, obj
, s_mdump
, 0, 0);
898 w_class(TYPE_USRMARSHAL
, obj
, arg
, FALSE
);
899 w_object(v
, arg
, limit
);
902 if (rb_obj_respond_to(obj
, s_dump
, TRUE
)) {
903 VALUE ivobj2
= Qundef
;
908 v
= dump_funcall(arg
, obj
, s_dump
, 1, &v
);
909 if (!RB_TYPE_P(v
, T_STRING
)) {
910 rb_raise(rb_eTypeError
, "_dump() must return string");
912 hasiv
= has_ivars(obj
, (encname
= encoding_name(obj
, arg
)), &ivobj
);
913 hasiv2
= has_ivars(v
, (encname2
= encoding_name(v
, arg
)), &ivobj2
);
919 if (hasiv
) w_byte(TYPE_IVAR
, arg
);
920 w_class(TYPE_USERDEF
, obj
, arg
, FALSE
);
921 w_bytes(RSTRING_PTR(v
), RSTRING_LEN(v
), arg
);
923 w_ivar(hasiv
, ivobj
, encname
, &c_arg
);
925 w_remember(obj
, arg
);
929 w_remember(obj
, arg
);
931 hasiv
= has_ivars(obj
, (encname
= encoding_name(obj
, arg
)), &ivobj
);
933 st_data_t compat_data
;
934 rb_alloc_func_t allocator
= rb_get_alloc_func(RBASIC(obj
)->klass
);
935 if (st_lookup(compat_allocator_tbl
,
936 (st_data_t
)allocator
,
938 marshal_compat_t
*compat
= (marshal_compat_t
*)compat_data
;
939 VALUE real_obj
= obj
;
940 obj
= compat
->dumper(real_obj
);
941 if (!arg
->compat_tbl
) {
942 arg
->compat_tbl
= rb_init_identtable();
944 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
945 if (obj
!= real_obj
&& UNDEF_P(ivobj
)) hasiv
= 0;
948 if (hasiv
) w_byte(TYPE_IVAR
, arg
);
950 switch (BUILTIN_TYPE(obj
)) {
952 if (FL_TEST(obj
, FL_SINGLETON
)) {
953 rb_raise(rb_eTypeError
, "singleton class can't be dumped");
955 w_byte(TYPE_CLASS
, arg
);
957 VALUE path
= class2path(obj
);
958 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
964 w_byte(TYPE_MODULE
, arg
);
966 VALUE path
= class2path(obj
);
967 w_bytes(RSTRING_PTR(path
), RSTRING_LEN(path
), arg
);
973 w_byte(TYPE_FLOAT
, arg
);
974 w_float(RFLOAT_VALUE(obj
), arg
);
978 w_byte(TYPE_BIGNUM
, arg
);
980 char sign
= BIGNUM_SIGN(obj
) ? '+' : '-';
981 size_t len
= BIGNUM_LEN(obj
);
984 BDIGIT
*d
= BIGNUM_DIGITS(obj
);
986 slen
= SHORTLEN(len
);
987 if (LONG_MAX
< slen
) {
988 rb_raise(rb_eTypeError
, "too big Bignum can't be dumped");
992 w_long((long)slen
, arg
);
993 for (j
= 0; j
< len
; j
++) {
994 #if SIZEOF_BDIGIT > SIZEOF_SHORT
998 for (i
=0; i
<SIZEOF_BDIGIT
; i
+=SIZEOF_SHORT
) {
999 w_short(num
& SHORTMASK
, arg
);
1001 if (j
== len
- 1 && num
== 0) break;
1012 w_uclass(obj
, rb_cString
, arg
);
1013 w_byte(TYPE_STRING
, arg
);
1014 w_bytes(RSTRING_PTR(obj
), RSTRING_LEN(obj
), arg
);
1018 w_uclass(obj
, rb_cRegexp
, arg
);
1019 w_byte(TYPE_REGEXP
, arg
);
1021 int opts
= rb_reg_options(obj
);
1022 w_bytes(RREGEXP_SRC_PTR(obj
), RREGEXP_SRC_LEN(obj
), arg
);
1023 w_byte((char)opts
, arg
);
1028 w_uclass(obj
, rb_cArray
, arg
);
1029 w_byte(TYPE_ARRAY
, arg
);
1031 long i
, len
= RARRAY_LEN(obj
);
1034 for (i
=0; i
<RARRAY_LEN(obj
); i
++) {
1035 w_object(RARRAY_AREF(obj
, i
), arg
, limit
);
1036 if (len
!= RARRAY_LEN(obj
)) {
1037 rb_raise(rb_eRuntimeError
, "array modified during dump");
1044 w_uclass(obj
, rb_cHash
, arg
);
1045 if (rb_hash_compare_by_id_p(obj
)) {
1046 w_byte(TYPE_UCLASS
, arg
);
1047 w_symbol(rb_sym_intern_ascii_cstr("Hash"), arg
);
1049 if (NIL_P(RHASH_IFNONE(obj
))) {
1050 w_byte(TYPE_HASH
, arg
);
1052 else if (FL_TEST(obj
, RHASH_PROC_DEFAULT
)) {
1053 rb_raise(rb_eTypeError
, "can't dump hash with default proc");
1056 w_byte(TYPE_HASH_DEF
, arg
);
1058 w_long(rb_hash_size_num(obj
), arg
);
1059 rb_hash_foreach(obj
, hash_each
, (st_data_t
)&c_arg
);
1060 if (!NIL_P(RHASH_IFNONE(obj
))) {
1061 w_object(RHASH_IFNONE(obj
), arg
, limit
);
1066 w_class(TYPE_STRUCT
, obj
, arg
, TRUE
);
1068 long len
= RSTRUCT_LEN(obj
);
1073 mem
= rb_struct_members(obj
);
1074 for (i
=0; i
<len
; i
++) {
1075 w_symbol(RARRAY_AREF(mem
, i
), arg
);
1076 w_object(RSTRUCT_GET(obj
, i
), arg
, limit
);
1082 w_class(TYPE_OBJECT
, obj
, arg
, TRUE
);
1083 w_objivar(obj
, &c_arg
);
1090 if (!rb_obj_respond_to(obj
, s_dump_data
, TRUE
)) {
1091 rb_raise(rb_eTypeError
,
1092 "no _dump_data is defined for class %"PRIsVALUE
,
1095 v
= dump_funcall(arg
, obj
, s_dump_data
, 0, 0);
1096 w_class(TYPE_DATA
, obj
, arg
, TRUE
);
1097 w_object(v
, arg
, limit
);
1102 rb_raise(rb_eTypeError
, "can't dump %"PRIsVALUE
,
1109 w_ivar(hasiv
, ivobj
, encname
, &c_arg
);
1114 clear_dump_arg(struct dump_arg
*arg
)
1116 if (!arg
->symbols
) return;
1117 st_free_table(arg
->symbols
);
1119 st_free_table(arg
->data
);
1121 arg
->num_entries
= 0;
1122 if (arg
->compat_tbl
) {
1123 st_free_table(arg
->compat_tbl
);
1124 arg
->compat_tbl
= 0;
1126 if (arg
->encodings
) {
1127 st_free_table(arg
->encodings
);
1132 NORETURN(static inline void io_needed(void));
1136 rb_raise(rb_eTypeError
, "instance of IO needed");
1141 * dump( obj [, anIO] , limit=-1 ) -> anIO
1143 * Serializes obj and all descendant objects. If anIO is
1144 * specified, the serialized data will be written to it, otherwise the
1145 * data will be returned as a String. If limit is specified, the
1146 * traversal of subobjects will be limited to that depth. If limit is
1147 * negative, no checking of depth will be performed.
1150 * def initialize(str)
1158 * (produces no output)
1160 * o = Klass.new("hello\n")
1161 * data = Marshal.dump(o)
1162 * obj = Marshal.load(data)
1163 * obj.say_hello #=> "hello\n"
1165 * Marshal can't dump following objects:
1166 * * anonymous Class/Module.
1167 * * objects which are related to system (ex: Dir, File::Stat, IO, File, Socket
1169 * * an instance of MatchData, Data, Method, UnboundMethod, Proc, Thread,
1170 * ThreadGroup, Continuation
1171 * * objects which define singleton methods
1174 marshal_dump(int argc
, VALUE
*argv
, VALUE _
)
1176 VALUE obj
, port
, a1
, a2
;
1180 rb_scan_args(argc
, argv
, "12", &obj
, &a1
, &a2
);
1182 if (!NIL_P(a2
)) limit
= NUM2INT(a2
);
1183 if (NIL_P(a1
)) io_needed();
1186 else if (argc
== 2) {
1187 if (FIXNUM_P(a1
)) limit
= FIX2INT(a1
);
1188 else if (NIL_P(a1
)) io_needed();
1191 return rb_marshal_dump_limited(obj
, port
, limit
);
1195 rb_marshal_dump_limited(VALUE obj
, VALUE port
, int limit
)
1197 struct dump_arg
*arg
;
1198 VALUE wrapper
; /* used to avoid memory leak in case of exception */
1200 wrapper
= TypedData_Make_Struct(0, struct dump_arg
, &dump_arg_data
, arg
);
1202 arg
->symbols
= st_init_numtable();
1203 arg
->data
= rb_init_identtable();
1204 arg
->num_entries
= 0;
1205 arg
->compat_tbl
= 0;
1207 arg
->str
= rb_str_buf_new(0);
1209 if (!rb_respond_to(port
, s_write
)) {
1213 dump_check_funcall(arg
, port
, s_binmode
, 0, 0);
1219 w_byte(MARSHAL_MAJOR
, arg
);
1220 w_byte(MARSHAL_MINOR
, arg
);
1222 w_object(obj
, arg
, limit
);
1224 rb_io_write(arg
->dest
, arg
->str
);
1225 rb_str_resize(arg
->str
, 0);
1227 clear_dump_arg(arg
);
1228 RB_GC_GUARD(wrapper
);
1241 st_table
*partial_objects
;
1243 st_table
*compat_tbl
;
1248 check_load_arg(VALUE ret
, struct load_arg
*arg
, const char *name
)
1250 if (!arg
->symbols
) {
1251 rb_raise(rb_eRuntimeError
, "Marshal.load reentered at %s",
1256 #define load_funcall(arg, obj, sym, argc, argv) \
1257 check_load_arg(rb_funcallv(obj, sym, argc, argv), arg, name_##sym)
1259 static void clear_load_arg(struct load_arg
*arg
);
1262 mark_load_arg(void *ptr
)
1264 struct load_arg
*p
= ptr
;
1267 rb_mark_tbl(p
->symbols
);
1268 rb_mark_tbl(p
->data
);
1269 rb_mark_tbl(p
->partial_objects
);
1270 rb_mark_hash(p
->compat_tbl
);
1274 free_load_arg(void *ptr
)
1276 clear_load_arg(ptr
);
1280 memsize_load_arg(const void *ptr
)
1282 const struct load_arg
*p
= (struct load_arg
*)ptr
;
1284 if (p
->symbols
) memsize
+= rb_st_memsize(p
->symbols
);
1285 if (p
->data
) memsize
+= rb_st_memsize(p
->data
);
1286 if (p
->partial_objects
) memsize
+= rb_st_memsize(p
->partial_objects
);
1287 if (p
->compat_tbl
) memsize
+= rb_st_memsize(p
->compat_tbl
);
1291 static const rb_data_type_t load_arg_data
= {
1293 {mark_load_arg
, free_load_arg
, memsize_load_arg
,},
1294 0, 0, RUBY_TYPED_FREE_IMMEDIATELY
| RUBY_TYPED_EMBEDDABLE
1297 #define r_entry(v, arg) r_entry0((v), (arg)->data->num_entries, (arg))
1298 static VALUE
r_object(struct load_arg
*arg
);
1299 static VALUE
r_symbol(struct load_arg
*arg
);
1301 NORETURN(static void too_short(void));
1305 rb_raise(rb_eArgError
, "marshal data too short");
1309 r_prepare(struct load_arg
*arg
)
1311 st_index_t idx
= arg
->data
->num_entries
;
1313 st_insert(arg
->data
, (st_data_t
)idx
, (st_data_t
)Qundef
);
1317 static unsigned char
1318 r_byte1_buffered(struct load_arg
*arg
)
1320 if (arg
->buflen
== 0) {
1321 long readable
= arg
->readable
< BUFSIZ
? arg
->readable
: BUFSIZ
;
1322 VALUE str
, n
= LONG2NUM(readable
);
1324 str
= load_funcall(arg
, arg
->src
, s_read
, 1, &n
);
1325 if (NIL_P(str
)) too_short();
1327 memcpy(arg
->buf
, RSTRING_PTR(str
), RSTRING_LEN(str
));
1329 arg
->buflen
= RSTRING_LEN(str
);
1332 return arg
->buf
[arg
->offset
++];
1336 r_byte(struct load_arg
*arg
)
1340 if (RB_TYPE_P(arg
->src
, T_STRING
)) {
1341 if (RSTRING_LEN(arg
->src
) > arg
->offset
) {
1342 c
= (unsigned char)RSTRING_PTR(arg
->src
)[arg
->offset
++];
1349 if (arg
->readable
>0 || arg
->buflen
> 0) {
1350 c
= r_byte1_buffered(arg
);
1353 VALUE v
= load_funcall(arg
, arg
->src
, s_getbyte
, 0, 0);
1354 if (NIL_P(v
)) rb_eof_error();
1355 c
= (unsigned char)NUM2CHR(v
);
1361 NORETURN(static void long_toobig(int size
));
1364 long_toobig(int size
)
1366 rb_raise(rb_eTypeError
, "long too big for this architecture (size "
1367 STRINGIZE(SIZEOF_LONG
)", given %d)", size
);
1371 r_long(struct load_arg
*arg
)
1374 int c
= (signed char)r_byte(arg
);
1377 if (c
== 0) return 0;
1379 if (4 < c
&& c
< 128) {
1382 if (c
> (int)sizeof(long)) long_toobig(c
);
1385 x
|= (long)r_byte(arg
) << (8*i
);
1389 if (-129 < c
&& c
< -4) {
1393 if (c
> (int)sizeof(long)) long_toobig(c
);
1396 x
&= ~((long)0xff << (8*i
));
1397 x
|= (long)r_byte(arg
) << (8*i
);
1404 ruby_marshal_read_long(const char **buf
, long len
)
1408 struct load_arg arg
;
1409 memset(&arg
, 0, sizeof(arg
));
1410 arg
.src
= rb_setup_fake_str(&src
, *buf
, len
, 0);
1417 r_bytes1(long len
, struct load_arg
*arg
)
1419 VALUE str
, n
= LONG2NUM(len
);
1421 str
= load_funcall(arg
, arg
->src
, s_read
, 1, &n
);
1422 if (NIL_P(str
)) too_short();
1424 if (RSTRING_LEN(str
) != len
) too_short();
1430 r_bytes1_buffered(long len
, struct load_arg
*arg
)
1434 if (len
<= arg
->buflen
) {
1435 str
= rb_str_new(arg
->buf
+arg
->offset
, len
);
1440 long buflen
= arg
->buflen
;
1441 long readable
= arg
->readable
+ 1;
1442 long tmp_len
, read_len
, need_len
= len
- buflen
;
1445 readable
= readable
< BUFSIZ
? readable
: BUFSIZ
;
1446 read_len
= need_len
> readable
? need_len
: readable
;
1447 n
= LONG2NUM(read_len
);
1448 tmp
= load_funcall(arg
, arg
->src
, s_read
, 1, &n
);
1449 if (NIL_P(tmp
)) too_short();
1452 tmp_len
= RSTRING_LEN(tmp
);
1454 if (tmp_len
< need_len
) too_short();
1456 str
= rb_str_new(arg
->buf
+arg
->offset
, buflen
);
1457 rb_str_cat(str
, RSTRING_PTR(tmp
), need_len
);
1459 if (tmp_len
> need_len
) {
1460 buflen
= tmp_len
- need_len
;
1461 memcpy(arg
->buf
, RSTRING_PTR(tmp
)+need_len
, buflen
);
1462 arg
->buflen
= buflen
;
1473 #define r_bytes(arg) r_bytes0(r_long(arg), (arg))
1476 r_bytes0(long len
, struct load_arg
*arg
)
1480 if (len
== 0) return rb_str_new(0, 0);
1481 if (RB_TYPE_P(arg
->src
, T_STRING
)) {
1482 if (RSTRING_LEN(arg
->src
) - arg
->offset
>= len
) {
1483 str
= rb_str_new(RSTRING_PTR(arg
->src
)+arg
->offset
, len
);
1491 if (arg
->readable
> 0 || arg
->buflen
> 0) {
1492 str
= r_bytes1_buffered(len
, arg
);
1495 str
= r_bytes1(len
, arg
);
1502 name_equal(const char *name
, size_t nlen
, const char *p
, long l
)
1504 if ((size_t)l
!= nlen
|| *p
!= *name
) return 0;
1505 return nlen
== 1 || memcmp(p
+1, name
+1, nlen
-1) == 0;
1509 sym2encidx(VALUE sym
, VALUE val
)
1511 static const char name_encoding
[8] = "encoding";
1514 if (rb_enc_get_index(sym
) != ENCINDEX_US_ASCII
) return -1;
1515 RSTRING_GETMEM(sym
, p
, l
);
1516 if (l
<= 0) return -1;
1517 if (name_equal(name_encoding
, sizeof(name_encoding
), p
, l
)) {
1518 int idx
= rb_enc_find_index(StringValueCStr(val
));
1521 if (name_equal(name_s_encoding_short
, rb_strlen_lit(name_s_encoding_short
), p
, l
)) {
1522 if (val
== Qfalse
) return rb_usascii_encindex();
1523 else if (val
== Qtrue
) return rb_utf8_encindex();
1530 symname_equal(VALUE sym
, const char *name
, size_t nlen
)
1534 if (rb_enc_get_index(sym
) != ENCINDEX_US_ASCII
) return 0;
1535 RSTRING_GETMEM(sym
, p
, l
);
1536 return name_equal(name
, nlen
, p
, l
);
1539 #define BUILD_ASSERT_POSITIVE(n) \
1540 /* make 0 negative to workaround the "zero size array" GCC extension, */ \
1541 ((sizeof(char [2*(ssize_t)(n)-1])+1)/2) /* assuming no overflow */
1542 #define symname_equal_lit(sym, sym_name) \
1543 symname_equal(sym, sym_name, BUILD_ASSERT_POSITIVE(rb_strlen_lit(sym_name)))
1546 r_symlink(struct load_arg
*arg
)
1549 long num
= r_long(arg
);
1551 if (!st_lookup(arg
->symbols
, num
, &sym
)) {
1552 rb_raise(rb_eArgError
, "bad symbol");
1558 r_symreal(struct load_arg
*arg
, int ivar
)
1560 VALUE s
= r_bytes(arg
);
1563 st_index_t n
= arg
->symbols
->num_entries
;
1565 if (rb_enc_str_asciionly_p(s
)) rb_enc_associate_index(s
, ENCINDEX_US_ASCII
);
1566 st_insert(arg
->symbols
, (st_data_t
)n
, (st_data_t
)s
);
1568 long num
= r_long(arg
);
1570 sym
= r_symbol(arg
);
1571 idx
= sym2encidx(sym
, r_object(arg
));
1575 rb_enc_associate_index(s
, idx
);
1576 if (is_broken_string(s
)) {
1577 rb_raise(rb_eArgError
, "invalid byte sequence in %s: %+"PRIsVALUE
,
1578 rb_enc_name(rb_enc_from_index(idx
)), s
);
1586 r_symbol(struct load_arg
*arg
)
1591 switch ((type
= r_byte(arg
))) {
1593 rb_raise(rb_eArgError
, "dump format error for symbol(0x%x)", type
);
1598 return r_symreal(arg
, ivar
);
1601 rb_raise(rb_eArgError
, "dump format error (symlink with encoding)");
1603 return r_symlink(arg
);
1608 r_unique(struct load_arg
*arg
)
1610 return r_symbol(arg
);
1614 r_string(struct load_arg
*arg
)
1616 return r_bytes(arg
);
1620 r_entry0(VALUE v
, st_index_t num
, struct load_arg
*arg
)
1622 st_data_t real_obj
= (st_data_t
)v
;
1623 if (arg
->compat_tbl
) {
1624 /* real_obj is kept if not found */
1625 st_lookup(arg
->compat_tbl
, v
, &real_obj
);
1627 st_insert(arg
->data
, num
, real_obj
);
1628 st_insert(arg
->partial_objects
, (st_data_t
)real_obj
, Qtrue
);
1633 r_fixup_compat(VALUE v
, struct load_arg
*arg
)
1636 st_data_t key
= (st_data_t
)v
;
1637 if (arg
->compat_tbl
&& st_delete(arg
->compat_tbl
, &key
, &data
)) {
1638 VALUE real_obj
= (VALUE
)data
;
1639 rb_alloc_func_t allocator
= rb_get_alloc_func(CLASS_OF(real_obj
));
1640 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1641 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1642 compat
->loader(real_obj
, v
);
1650 r_post_proc(VALUE v
, struct load_arg
*arg
)
1653 v
= load_funcall(arg
, arg
->proc
, s_call
, 1, &v
);
1659 r_leave(VALUE v
, struct load_arg
*arg
, bool partial
)
1661 v
= r_fixup_compat(v
, arg
);
1664 st_data_t key
= (st_data_t
)v
;
1665 st_delete(arg
->partial_objects
, &key
, &data
);
1667 if (RB_TYPE_P(v
, T_MODULE
) || RB_TYPE_P(v
, T_CLASS
)) {
1670 else if (RB_TYPE_P(v
, T_STRING
)) {
1671 v
= rb_str_to_interned_str(v
);
1677 v
= r_post_proc(v
, arg
);
1683 copy_ivar_i(ID vid
, VALUE value
, st_data_t arg
)
1685 VALUE obj
= (VALUE
)arg
;
1687 if (!rb_ivar_defined(obj
, vid
))
1688 rb_ivar_set(obj
, vid
, value
);
1693 r_copy_ivar(VALUE v
, VALUE data
)
1695 rb_ivar_foreach(data
, copy_ivar_i
, (st_data_t
)v
);
1699 #define override_ivar_error(type, str) \
1700 rb_raise(rb_eTypeError, \
1701 "can't override instance variable of "type" '%"PRIsVALUE"'", \
1705 r_ivar(VALUE obj
, int *has_encoding
, struct load_arg
*arg
)
1711 if (RB_TYPE_P(obj
, T_MODULE
)) {
1712 override_ivar_error("module", rb_mod_name(obj
));
1714 else if (RB_TYPE_P(obj
, T_CLASS
)) {
1715 override_ivar_error("class", rb_class_name(obj
));
1718 VALUE sym
= r_symbol(arg
);
1719 VALUE val
= r_object(arg
);
1720 int idx
= sym2encidx(sym
, val
);
1722 if (rb_enc_capable(obj
)) {
1723 rb_enc_associate_index(obj
, idx
);
1726 rb_raise(rb_eArgError
, "%"PRIsVALUE
" is not enc_capable", obj
);
1728 if (has_encoding
) *has_encoding
= TRUE
;
1730 else if (symname_equal_lit(sym
, name_s_ruby2_keywords_flag
)) {
1731 if (RB_TYPE_P(obj
, T_HASH
)) {
1732 rb_hash_ruby2_keywords(obj
);
1735 rb_raise(rb_eArgError
, "ruby2_keywords flag is given but %"PRIsVALUE
" is not a Hash", obj
);
1739 rb_ivar_set(obj
, rb_intern_str(sym
), val
);
1741 } while (--len
> 0);
1746 path2class(VALUE path
)
1748 VALUE v
= rb_path_to_class(path
);
1750 if (!RB_TYPE_P(v
, T_CLASS
)) {
1751 rb_raise(rb_eArgError
, "%"PRIsVALUE
" does not refer to class", path
);
1756 #define path2module(path) must_be_module(rb_path_to_class(path), path)
1759 must_be_module(VALUE v
, VALUE path
)
1761 if (!RB_TYPE_P(v
, T_MODULE
)) {
1762 rb_raise(rb_eArgError
, "%"PRIsVALUE
" does not refer to module", path
);
1768 obj_alloc_by_klass(VALUE klass
, struct load_arg
*arg
, VALUE
*oldclass
)
1771 rb_alloc_func_t allocator
;
1773 allocator
= rb_get_alloc_func(klass
);
1774 if (st_lookup(compat_allocator_tbl
, (st_data_t
)allocator
, &data
)) {
1775 marshal_compat_t
*compat
= (marshal_compat_t
*)data
;
1776 VALUE real_obj
= rb_obj_alloc(klass
);
1777 VALUE obj
= rb_obj_alloc(compat
->oldclass
);
1778 if (oldclass
) *oldclass
= compat
->oldclass
;
1780 if (!arg
->compat_tbl
) {
1781 arg
->compat_tbl
= rb_init_identtable();
1783 st_insert(arg
->compat_tbl
, (st_data_t
)obj
, (st_data_t
)real_obj
);
1787 return rb_obj_alloc(klass
);
1791 obj_alloc_by_path(VALUE path
, struct load_arg
*arg
)
1793 return obj_alloc_by_klass(path2class(path
), arg
, 0);
1797 append_extmod(VALUE obj
, VALUE extmod
)
1799 long i
= RARRAY_LEN(extmod
);
1801 VALUE m
= RARRAY_AREF(extmod
, --i
);
1802 rb_extend_object(obj
, m
);
1807 #define prohibit_ivar(type, str) do { \
1808 if (!ivp || !*ivp) break; \
1809 override_ivar_error(type, str); \
1812 static VALUE
r_object_for(struct load_arg
*arg
, bool partial
, int *ivp
, VALUE extmod
, int type
);
1815 r_object0(struct load_arg
*arg
, bool partial
, int *ivp
, VALUE extmod
)
1817 int type
= r_byte(arg
);
1818 return r_object_for(arg
, partial
, ivp
, extmod
, type
);
1822 r_object_for(struct load_arg
*arg
, bool partial
, int *ivp
, VALUE extmod
, int type
)
1824 VALUE (*hash_new_with_size
)(st_index_t
) = rb_hash_new_with_size
;
1832 if (!st_lookup(arg
->data
, (st_data_t
)id
, &link
)) {
1833 rb_raise(rb_eArgError
, "dump format error (unlinked)");
1836 if (!st_lookup(arg
->partial_objects
, (st_data_t
)v
, &link
)) {
1837 v
= r_post_proc(v
, arg
);
1844 v
= r_object0(arg
, true, &ivar
, extmod
);
1845 if (ivar
) r_ivar(v
, NULL
, arg
);
1846 v
= r_leave(v
, arg
, partial
);
1852 VALUE path
= r_unique(arg
);
1853 VALUE m
= rb_path_to_class(path
);
1854 if (NIL_P(extmod
)) extmod
= rb_ary_hidden_new(0);
1856 if (RB_TYPE_P(m
, T_CLASS
)) { /* prepended */
1859 v
= r_object0(arg
, true, 0, Qnil
);
1861 if (c
!= m
|| FL_TEST(c
, FL_SINGLETON
)) {
1862 rb_raise(rb_eArgError
,
1863 "prepended class %"PRIsVALUE
" differs from class %"PRIsVALUE
,
1864 path
, rb_class_name(c
));
1866 c
= rb_singleton_class(v
);
1867 while (RARRAY_LEN(extmod
) > 0) {
1868 m
= rb_ary_pop(extmod
);
1869 rb_prepend_module(c
, m
);
1873 must_be_module(m
, path
);
1874 rb_ary_push(extmod
, m
);
1876 v
= r_object0(arg
, true, 0, extmod
);
1877 while (RARRAY_LEN(extmod
) > 0) {
1878 m
= rb_ary_pop(extmod
);
1879 rb_extend_object(v
, m
);
1882 v
= r_leave(v
, arg
, partial
);
1888 VALUE c
= path2class(r_unique(arg
));
1890 if (FL_TEST(c
, FL_SINGLETON
)) {
1891 rb_raise(rb_eTypeError
, "singleton can't be loaded");
1894 if ((c
== rb_cHash
) &&
1895 /* Hack for compare_by_identify */
1896 (type
== TYPE_HASH
|| type
== TYPE_HASH_DEF
)) {
1897 hash_new_with_size
= rb_ident_hash_new_with_size
;
1900 v
= r_object_for(arg
, partial
, 0, extmod
, type
);
1901 if (RB_SPECIAL_CONST_P(v
) || RB_TYPE_P(v
, T_OBJECT
) || RB_TYPE_P(v
, T_CLASS
)) {
1904 if (RB_TYPE_P(v
, T_MODULE
) || !RTEST(rb_class_inherited_p(c
, RBASIC(v
)->klass
))) {
1905 VALUE tmp
= rb_obj_alloc(c
);
1907 if (TYPE(v
) != TYPE(tmp
)) goto format_error
;
1909 RBASIC_SET_CLASS(v
, c
);
1914 rb_raise(rb_eArgError
, "dump format error (user class)");
1918 v
= r_leave(v
, arg
, false);
1923 v
= r_leave(v
, arg
, false);
1928 v
= r_leave(v
, arg
, false);
1933 long i
= r_long(arg
);
1936 v
= r_leave(v
, arg
, false);
1942 VALUE str
= r_bytes(arg
);
1943 const char *ptr
= RSTRING_PTR(str
);
1945 if (strcmp(ptr
, "nan") == 0) {
1948 else if (strcmp(ptr
, "inf") == 0) {
1951 else if (strcmp(ptr
, "-inf") == 0) {
1956 d
= strtod(ptr
, &e
);
1957 d
= load_mantissa(d
, e
, RSTRING_LEN(str
) - (e
- ptr
));
1960 v
= r_entry(v
, arg
);
1961 v
= r_leave(v
, arg
, false);
1974 if (SIZEOF_VALUE
>= 8 && len
<= 4) {
1975 // Representable within uintptr, likely FIXNUM
1977 for (int i
= 0; i
< len
; i
++) {
1978 num
|= (VALUE
)r_byte(arg
) << (i
* 16);
1979 num
|= (VALUE
)r_byte(arg
) << (i
* 16 + 8);
1981 #if SIZEOF_VALUE == SIZEOF_LONG
1987 v
= rb_int_uminus(v
);
1991 data
= r_bytes0(len
* 2, arg
);
1992 v
= rb_integer_unpack(RSTRING_PTR(data
), len
, 2, 0,
1993 INTEGER_PACK_LITTLE_ENDIAN
| (sign
== '-' ? INTEGER_PACK_NEGATIVE
: 0));
1994 rb_str_resize(data
, 0L);
1996 v
= r_entry(v
, arg
);
1997 v
= r_leave(v
, arg
, false);
2002 v
= r_entry(r_string(arg
), arg
);
2003 v
= r_leave(v
, arg
, partial
);
2008 VALUE str
= r_bytes(arg
);
2009 int options
= r_byte(arg
);
2010 int has_encoding
= FALSE
;
2011 st_index_t idx
= r_prepare(arg
);
2014 r_ivar(str
, &has_encoding
, arg
);
2017 if (!has_encoding
) {
2018 /* 1.8 compatibility; remove escapes undefined in 1.8 */
2019 char *ptr
= RSTRING_PTR(str
), *dst
= ptr
, *src
= ptr
;
2020 long len
= RSTRING_LEN(str
);
2022 for (; len
-- > 0; *dst
++ = *src
++) {
2024 case '\\': bs
++; break;
2025 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
2026 case 'm': case 'o': case 'p': case 'q': case 'u': case 'y':
2027 case 'E': case 'F': case 'H': case 'I': case 'J': case 'K':
2028 case 'L': case 'N': case 'O': case 'P': case 'Q': case 'R':
2029 case 'S': case 'T': case 'U': case 'V': case 'X': case 'Y':
2032 default: bs
= 0; break;
2035 rb_str_set_len(str
, dst
- ptr
);
2037 VALUE regexp
= rb_reg_new_str(str
, options
);
2038 r_copy_ivar(regexp
, str
);
2040 v
= r_entry0(regexp
, idx
, arg
);
2041 v
= r_leave(v
, arg
, partial
);
2047 long len
= r_long(arg
);
2049 v
= rb_ary_new2(len
);
2050 v
= r_entry(v
, arg
);
2051 arg
->readable
+= len
- 1;
2053 rb_ary_push(v
, r_object(arg
));
2056 v
= r_leave(v
, arg
, partial
);
2065 long len
= r_long(arg
);
2067 v
= hash_new_with_size(len
);
2068 v
= r_entry(v
, arg
);
2069 arg
->readable
+= (len
- 1) * 2;
2071 VALUE key
= r_object(arg
);
2072 VALUE value
= r_object(arg
);
2073 rb_hash_aset(v
, key
, value
);
2077 if (type
== TYPE_HASH_DEF
) {
2078 RHASH_SET_IFNONE(v
, r_object(arg
));
2080 v
= r_leave(v
, arg
, partial
);
2089 st_index_t idx
= r_prepare(arg
);
2090 VALUE klass
= path2class(r_unique(arg
));
2091 long len
= r_long(arg
);
2093 v
= rb_obj_alloc(klass
);
2094 if (!RB_TYPE_P(v
, T_STRUCT
)) {
2095 rb_raise(rb_eTypeError
, "class %"PRIsVALUE
" not a struct", rb_class_name(klass
));
2097 mem
= rb_struct_s_members(klass
);
2098 if (RARRAY_LEN(mem
) != len
) {
2099 rb_raise(rb_eTypeError
, "struct %"PRIsVALUE
" not compatible (struct size differs)",
2100 rb_class_name(klass
));
2103 arg
->readable
+= (len
- 1) * 2;
2104 v
= r_entry0(v
, idx
, arg
);
2105 values
= rb_ary_new2(len
);
2107 VALUE keywords
= Qfalse
;
2108 if (RTEST(rb_struct_s_keyword_init(klass
))) {
2109 keywords
= rb_hash_new();
2110 rb_ary_push(values
, keywords
);
2113 for (i
=0; i
<len
; i
++) {
2114 VALUE n
= rb_sym2str(RARRAY_AREF(mem
, i
));
2115 slot
= r_symbol(arg
);
2117 if (!rb_str_equal(n
, slot
)) {
2118 rb_raise(rb_eTypeError
, "struct %"PRIsVALUE
" not compatible (:%"PRIsVALUE
" for :%"PRIsVALUE
")",
2119 rb_class_name(klass
),
2123 rb_hash_aset(keywords
, RARRAY_AREF(mem
, i
), r_object(arg
));
2126 rb_ary_push(values
, r_object(arg
));
2131 rb_struct_initialize(v
, values
);
2132 v
= r_leave(v
, arg
, partial
);
2139 VALUE name
= r_unique(arg
);
2140 VALUE klass
= path2class(name
);
2144 if (!rb_obj_respond_to(klass
, s_load
, TRUE
)) {
2145 rb_raise(rb_eTypeError
, "class %"PRIsVALUE
" needs to have method '_load'",
2148 data
= r_string(arg
);
2150 r_ivar(data
, NULL
, arg
);
2153 v
= load_funcall(arg
, klass
, s_load
, 1, &data
);
2154 v
= r_entry(v
, arg
);
2155 if (st_lookup(compat_allocator_tbl
, (st_data_t
)rb_get_alloc_func(klass
), &d
)) {
2156 marshal_compat_t
*compat
= (marshal_compat_t
*)d
;
2157 v
= compat
->loader(klass
, v
);
2163 v
= r_post_proc(v
, arg
);
2168 case TYPE_USRMARSHAL
:
2170 VALUE name
= r_unique(arg
);
2171 VALUE klass
= path2class(name
);
2175 v
= obj_alloc_by_klass(klass
, arg
, &oldclass
);
2176 if (!NIL_P(extmod
)) {
2177 /* for the case marshal_load is overridden */
2178 append_extmod(v
, extmod
);
2180 if (!rb_obj_respond_to(v
, s_mload
, TRUE
)) {
2181 rb_raise(rb_eTypeError
, "instance of %"PRIsVALUE
" needs to have method 'marshal_load'",
2184 v
= r_entry(v
, arg
);
2185 data
= r_object(arg
);
2186 load_funcall(arg
, v
, s_mload
, 1, &data
);
2187 v
= r_fixup_compat(v
, arg
);
2188 v
= r_copy_ivar(v
, data
);
2192 v
= r_post_proc(v
, arg
);
2193 if (!NIL_P(extmod
)) {
2194 if (oldclass
) append_extmod(v
, extmod
);
2195 rb_ary_clear(extmod
);
2202 st_index_t idx
= r_prepare(arg
);
2203 v
= obj_alloc_by_path(r_unique(arg
), arg
);
2204 if (!RB_TYPE_P(v
, T_OBJECT
)) {
2205 rb_raise(rb_eArgError
, "dump format error");
2207 v
= r_entry0(v
, idx
, arg
);
2208 r_ivar(v
, NULL
, arg
);
2209 v
= r_leave(v
, arg
, partial
);
2215 VALUE name
= r_unique(arg
);
2216 VALUE klass
= path2class(name
);
2220 v
= obj_alloc_by_klass(klass
, arg
, &oldclass
);
2221 if (!RB_TYPE_P(v
, T_DATA
)) {
2222 rb_raise(rb_eArgError
, "dump format error");
2224 v
= r_entry(v
, arg
);
2225 if (!rb_obj_respond_to(v
, s_load_data
, TRUE
)) {
2226 rb_raise(rb_eTypeError
,
2227 "class %"PRIsVALUE
" needs to have instance method '_load_data'",
2230 r
= r_object0(arg
, partial
, 0, extmod
);
2231 load_funcall(arg
, v
, s_load_data
, 1, &r
);
2232 v
= r_leave(v
, arg
, partial
);
2236 case TYPE_MODULE_OLD
:
2238 VALUE str
= r_bytes(arg
);
2240 v
= rb_path_to_class(str
);
2241 prohibit_ivar("class/module", str
);
2242 v
= r_entry(v
, arg
);
2243 v
= r_leave(v
, arg
, partial
);
2249 VALUE str
= r_bytes(arg
);
2251 v
= path2class(str
);
2252 prohibit_ivar("class", str
);
2253 v
= r_entry(v
, arg
);
2254 v
= r_leave(v
, arg
, partial
);
2260 VALUE str
= r_bytes(arg
);
2262 v
= path2module(str
);
2263 prohibit_ivar("module", str
);
2264 v
= r_entry(v
, arg
);
2265 v
= r_leave(v
, arg
, partial
);
2271 v
= r_symreal(arg
, *ivp
);
2275 v
= r_symreal(arg
, 0);
2277 v
= rb_str_intern(v
);
2278 v
= r_leave(v
, arg
, partial
);
2282 v
= rb_str_intern(r_symlink(arg
));
2286 rb_raise(rb_eArgError
, "dump format error(0x%x)", type
);
2291 rb_raise(rb_eArgError
, "dump format error (bad link)");
2298 r_object(struct load_arg
*arg
)
2300 return r_object0(arg
, false, 0, Qnil
);
2304 clear_load_arg(struct load_arg
*arg
)
2311 if (!arg
->symbols
) return;
2312 st_free_table(arg
->symbols
);
2314 st_free_table(arg
->data
);
2316 st_free_table(arg
->partial_objects
);
2317 arg
->partial_objects
= 0;
2318 if (arg
->compat_tbl
) {
2319 st_free_table(arg
->compat_tbl
);
2320 arg
->compat_tbl
= 0;
2325 rb_marshal_load_with_proc(VALUE port
, VALUE proc
, bool freeze
)
2329 VALUE wrapper
; /* used to avoid memory leak in case of exception */
2330 struct load_arg
*arg
;
2332 v
= rb_check_string_type(port
);
2336 else if (rb_respond_to(port
, s_getbyte
) && rb_respond_to(port
, s_read
)) {
2337 rb_check_funcall(port
, s_binmode
, 0, 0);
2342 wrapper
= TypedData_Make_Struct(0, struct load_arg
, &load_arg_data
, arg
);
2345 arg
->symbols
= st_init_numtable();
2346 arg
->data
= rb_init_identtable();
2347 arg
->partial_objects
= rb_init_identtable();
2348 arg
->compat_tbl
= 0;
2351 arg
->freeze
= freeze
;
2354 arg
->buf
= xmalloc(BUFSIZ
);
2358 major
= r_byte(arg
);
2359 minor
= r_byte(arg
);
2360 if (major
!= MARSHAL_MAJOR
|| minor
> MARSHAL_MINOR
) {
2361 clear_load_arg(arg
);
2362 rb_raise(rb_eTypeError
, "incompatible marshal file format (can't be read)\n\
2363 \tformat version %d.%d required; %d.%d given",
2364 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
2366 if (RTEST(ruby_verbose
) && minor
!= MARSHAL_MINOR
) {
2367 rb_warn("incompatible marshal file format (can be read)\n\
2368 \tformat version %d.%d required; %d.%d given",
2369 MARSHAL_MAJOR
, MARSHAL_MINOR
, major
, minor
);
2372 if (!NIL_P(proc
)) arg
->proc
= proc
;
2374 clear_load_arg(arg
);
2375 RB_GC_GUARD(wrapper
);
2381 marshal_load(rb_execution_context_t
*ec
, VALUE mod
, VALUE source
, VALUE proc
, VALUE freeze
)
2383 return rb_marshal_load_with_proc(source
, proc
, RTEST(freeze
));
2386 #include "marshal.rbinc"
2389 * The marshaling library converts collections of Ruby objects into a
2390 * byte stream, allowing them to be stored outside the currently
2391 * active script. This data may subsequently be read and the original
2392 * objects reconstituted.
2394 * Marshaled data has major and minor version numbers stored along
2395 * with the object information. In normal use, marshaling can only
2396 * load data written with the same major version number and an equal
2397 * or lower minor version number. If Ruby's ``verbose'' flag is set
2398 * (normally using -d, -v, -w, or --verbose) the major and minor
2399 * numbers must match exactly. Marshal versioning is independent of
2400 * Ruby's version numbers. You can extract the version by reading the
2401 * first two bytes of marshaled data.
2403 * str = Marshal.dump("thing")
2404 * RUBY_VERSION #=> "1.9.0"
2408 * Some objects cannot be dumped: if the objects to be dumped include
2409 * bindings, procedure or method objects, instances of class IO, or
2410 * singleton objects, a TypeError will be raised.
2412 * If your class has special serialization needs (for example, if you
2413 * want to serialize in some specific format), or if it contains
2414 * objects that would otherwise not be serializable, you can implement
2415 * your own serialization strategy.
2417 * There are two methods of doing this, your object can define either
2418 * marshal_dump and marshal_load or _dump and _load. marshal_dump will take
2419 * precedence over _dump if both are defined. marshal_dump may result in
2420 * smaller Marshal strings.
2422 * == Security considerations
2424 * By design, Marshal.load can deserialize almost any class loaded into the
2425 * Ruby process. In many cases this can lead to remote code execution if the
2426 * Marshal data is loaded from an untrusted source.
2428 * As a result, Marshal.load is not suitable as a general purpose serialization
2429 * format and you should never unmarshal user supplied input or other untrusted
2432 * If you need to deserialize untrusted data, use JSON or another serialization
2433 * format that is only able to load simple, 'primitive' types such as String,
2434 * Array, Hash, etc. Never allow user input to specify arbitrary types to
2437 * == marshal_dump and marshal_load
2439 * When dumping an object the method marshal_dump will be called.
2440 * marshal_dump must return a result containing the information necessary for
2441 * marshal_load to reconstitute the object. The result can be any object.
2443 * When loading an object dumped using marshal_dump the object is first
2444 * allocated then marshal_load is called with the result from marshal_dump.
2445 * marshal_load must recreate the object from the information in the result.
2450 * def initialize name, version, data
2452 * @version = version
2460 * def marshal_load array
2461 * @name, @version = array
2465 * == _dump and _load
2467 * Use _dump and _load when you need to allocate the object you're restoring
2470 * When dumping an object the instance method _dump is called with an Integer
2471 * which indicates the maximum depth of objects to dump (a value of -1 implies
2472 * that you should disable depth checking). _dump must return a String
2473 * containing the information necessary to reconstitute the object.
2475 * The class method _load should take a String and use it to return an object
2476 * of the same class.
2481 * def initialize name, version, data
2483 * @version = version
2488 * [@name, @version].join ':'
2491 * def self._load args
2492 * new(*args.split(':'))
2496 * Since Marshal.dump outputs a string you can have _dump return a Marshal
2497 * string which is Marshal.loaded in _load for complex objects.
2502 VALUE rb_mMarshal
= rb_define_module("Marshal");
2503 #define set_id(sym) sym = rb_intern_const(name_##sym)
2508 set_id(s_dump_data
);
2509 set_id(s_load_data
);
2516 set_id(s_encoding_short
);
2517 set_id(s_ruby2_keywords_flag
);
2519 rb_define_module_function(rb_mMarshal
, "dump", marshal_dump
, -1);
2522 rb_define_const(rb_mMarshal
, "MAJOR_VERSION", INT2FIX(MARSHAL_MAJOR
));
2524 rb_define_const(rb_mMarshal
, "MINOR_VERSION", INT2FIX(MARSHAL_MINOR
));
2528 free_compat_i(st_data_t key
, st_data_t value
, st_data_t _
)
2530 xfree((marshal_compat_t
*)value
);
2535 free_compat_allocator_table(void *data
)
2537 st_foreach(data
, free_compat_i
, 0);
2538 st_free_table(data
);
2542 compat_allocator_table(void)
2544 if (compat_allocator_tbl
) return compat_allocator_tbl
;
2545 compat_allocator_tbl
= st_init_numtable();
2546 #undef RUBY_UNTYPED_DATA_WARNING
2547 #define RUBY_UNTYPED_DATA_WARNING 0
2548 compat_allocator_tbl_wrapper
=
2549 Data_Wrap_Struct(0, mark_marshal_compat_t
, free_compat_allocator_table
, compat_allocator_tbl
);
2550 rb_vm_register_global_object(compat_allocator_tbl_wrapper
);
2551 return compat_allocator_tbl
;
2555 rb_marshal_dump(VALUE obj
, VALUE port
)
2557 return rb_marshal_dump_limited(obj
, port
, -1);
2561 rb_marshal_load(VALUE port
)
2563 return rb_marshal_load_with_proc(port
, Qnil
, false);