2 /* Write Python objects to files and read them back.
3 This is intended for writing and reading compiled Python code only;
4 a true persistent storage facility would be much harder, since
5 it would have to take circular links and sharing into account. */
7 #define PY_SSIZE_T_CLEAN
10 #include "longintrepr.h"
14 /* High water mark to determine when the marshalled object is dangerously deep
15 * and risks coring the interpreter. When the object stack gets this deep,
16 * raise an exception instead of continuing.
18 #define MAX_MARSHAL_STACK_DEPTH 5000
22 #define TYPE_FALSE 'F'
24 #define TYPE_STOPITER 'S'
25 #define TYPE_ELLIPSIS '.'
27 #define TYPE_INT64 'I'
28 #define TYPE_FLOAT 'f'
29 #define TYPE_BINARY_FLOAT 'g'
30 #define TYPE_COMPLEX 'x'
31 #define TYPE_BINARY_COMPLEX 'y'
33 #define TYPE_STRING 's'
34 #define TYPE_INTERNED 't'
35 #define TYPE_STRINGREF 'R'
36 #define TYPE_TUPLE '('
40 #define TYPE_UNICODE 'u'
41 #define TYPE_UNKNOWN '?'
43 #define TYPE_FROZENSET '>'
49 /* If fp == NULL, the following are valid: */
53 PyObject
*strings
; /* dict on marshal, list on unmarshal */
57 #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
58 else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
62 w_more(int c
, WFILE
*p
)
64 Py_ssize_t size
, newsize
;
66 return; /* An error already occurred */
67 size
= PyString_Size(p
->str
);
68 newsize
= size
+ 1024;
69 if (_PyString_Resize(&p
->str
, newsize
) != 0) {
70 p
->ptr
= p
->end
= NULL
;
73 p
->ptr
= PyString_AS_STRING((PyStringObject
*)p
->str
) + size
;
75 PyString_AS_STRING((PyStringObject
*)p
->str
) + newsize
;
76 *p
->ptr
++ = Py_SAFE_DOWNCAST(c
, int, char);
81 w_string(char *s
, int n
, WFILE
*p
)
84 fwrite(s
, 1, n
, p
->fp
);
95 w_short(int x
, WFILE
*p
)
97 w_byte((char)( x
& 0xff), p
);
98 w_byte((char)((x
>> 8) & 0xff), p
);
102 w_long(long x
, WFILE
*p
)
104 w_byte((char)( x
& 0xff), p
);
105 w_byte((char)((x
>> 8) & 0xff), p
);
106 w_byte((char)((x
>>16) & 0xff), p
);
107 w_byte((char)((x
>>24) & 0xff), p
);
112 w_long64(long x
, WFILE
*p
)
120 w_object(PyObject
*v
, WFILE
*p
)
126 if (p
->depth
> MAX_MARSHAL_STACK_DEPTH
) {
129 else if (v
== NULL
) {
130 w_byte(TYPE_NULL
, p
);
132 else if (v
== Py_None
) {
133 w_byte(TYPE_NONE
, p
);
135 else if (v
== PyExc_StopIteration
) {
136 w_byte(TYPE_STOPITER
, p
);
138 else if (v
== Py_Ellipsis
) {
139 w_byte(TYPE_ELLIPSIS
, p
);
141 else if (v
== Py_False
) {
142 w_byte(TYPE_FALSE
, p
);
144 else if (v
== Py_True
) {
145 w_byte(TYPE_TRUE
, p
);
147 else if (PyInt_Check(v
)) {
148 long x
= PyInt_AS_LONG((PyIntObject
*)v
);
150 long y
= Py_ARITHMETIC_RIGHT_SHIFT(long, x
, 31);
152 w_byte(TYPE_INT64
, p
);
162 else if (PyLong_Check(v
)) {
163 PyLongObject
*ob
= (PyLongObject
*)v
;
164 w_byte(TYPE_LONG
, p
);
169 for (i
= 0; i
< n
; i
++)
170 w_short(ob
->ob_digit
[i
], p
);
172 else if (PyFloat_Check(v
)) {
173 if (p
->version
> 1) {
174 unsigned char buf
[8];
175 if (_PyFloat_Pack8(PyFloat_AsDouble(v
),
180 w_byte(TYPE_BINARY_FLOAT
, p
);
181 w_string((char*)buf
, 8, p
);
184 char buf
[256]; /* Plenty to format any double */
185 PyFloat_AsReprString(buf
, (PyFloatObject
*)v
);
187 w_byte(TYPE_FLOAT
, p
);
192 #ifndef WITHOUT_COMPLEX
193 else if (PyComplex_Check(v
)) {
194 if (p
->version
> 1) {
195 unsigned char buf
[8];
196 if (_PyFloat_Pack8(PyComplex_RealAsDouble(v
),
201 w_byte(TYPE_BINARY_COMPLEX
, p
);
202 w_string((char*)buf
, 8, p
);
203 if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v
),
208 w_string((char*)buf
, 8, p
);
211 char buf
[256]; /* Plenty to format any double */
213 w_byte(TYPE_COMPLEX
, p
);
214 temp
= (PyFloatObject
*)PyFloat_FromDouble(
215 PyComplex_RealAsDouble(v
));
216 PyFloat_AsReprString(buf
, temp
);
218 n
= (int)strlen(buf
);
221 temp
= (PyFloatObject
*)PyFloat_FromDouble(
222 PyComplex_ImagAsDouble(v
));
223 PyFloat_AsReprString(buf
, temp
);
225 n
= (int)strlen(buf
);
231 else if (PyString_Check(v
)) {
232 if (p
->strings
&& PyString_CHECK_INTERNED(v
)) {
233 PyObject
*o
= PyDict_GetItem(p
->strings
, v
);
235 long w
= PyInt_AsLong(o
);
236 w_byte(TYPE_STRINGREF
, p
);
241 o
= PyInt_FromSsize_t(PyDict_Size(p
->strings
));
242 PyDict_SetItem(p
->strings
, v
, o
);
244 w_byte(TYPE_INTERNED
, p
);
248 w_byte(TYPE_STRING
, p
);
250 n
= PyString_GET_SIZE(v
);
252 w_string(PyString_AS_STRING(v
), n
, p
);
254 #ifdef Py_USING_UNICODE
255 else if (PyUnicode_Check(v
)) {
257 utf8
= PyUnicode_AsUTF8String(v
);
263 w_byte(TYPE_UNICODE
, p
);
264 n
= PyString_GET_SIZE(utf8
);
266 w_string(PyString_AS_STRING(utf8
), n
, p
);
270 else if (PyTuple_Check(v
)) {
271 w_byte(TYPE_TUPLE
, p
);
274 for (i
= 0; i
< n
; i
++) {
275 w_object(PyTuple_GET_ITEM(v
, i
), p
);
278 else if (PyList_Check(v
)) {
279 w_byte(TYPE_LIST
, p
);
280 n
= PyList_GET_SIZE(v
);
282 for (i
= 0; i
< n
; i
++) {
283 w_object(PyList_GET_ITEM(v
, i
), p
);
286 else if (PyDict_Check(v
)) {
288 PyObject
*key
, *value
;
289 w_byte(TYPE_DICT
, p
);
290 /* This one is NULL object terminated! */
292 while (PyDict_Next(v
, &pos
, &key
, &value
)) {
296 w_object((PyObject
*)NULL
, p
);
298 else if (PyAnySet_Check(v
)) {
299 PyObject
*value
, *it
;
301 if (PyObject_TypeCheck(v
, &PySet_Type
))
304 w_byte(TYPE_FROZENSET
, p
);
305 n
= PyObject_Size(v
);
312 it
= PyObject_GetIter(v
);
318 while ((value
= PyIter_Next(it
)) != NULL
) {
323 if (PyErr_Occurred()) {
329 else if (PyCode_Check(v
)) {
330 PyCodeObject
*co
= (PyCodeObject
*)v
;
331 w_byte(TYPE_CODE
, p
);
332 w_long(co
->co_argcount
, p
);
333 w_long(co
->co_nlocals
, p
);
334 w_long(co
->co_stacksize
, p
);
335 w_long(co
->co_flags
, p
);
336 w_object(co
->co_code
, p
);
337 w_object(co
->co_consts
, p
);
338 w_object(co
->co_names
, p
);
339 w_object(co
->co_varnames
, p
);
340 w_object(co
->co_freevars
, p
);
341 w_object(co
->co_cellvars
, p
);
342 w_object(co
->co_filename
, p
);
343 w_object(co
->co_name
, p
);
344 w_long(co
->co_firstlineno
, p
);
345 w_object(co
->co_lnotab
, p
);
347 else if (PyObject_CheckReadBuffer(v
)) {
348 /* Write unknown buffer-style objects as a string */
350 PyBufferProcs
*pb
= v
->ob_type
->tp_as_buffer
;
351 w_byte(TYPE_STRING
, p
);
352 n
= (*pb
->bf_getreadbuffer
)(v
, 0, (void **)&s
);
357 w_byte(TYPE_UNKNOWN
, p
);
364 /* version currently has no effect for writing longs. */
366 PyMarshal_WriteLongToFile(long x
, FILE *fp
, int version
)
373 wf
.version
= version
;
378 PyMarshal_WriteObjectToFile(PyObject
*x
, FILE *fp
, int version
)
384 wf
.strings
= (version
> 0) ? PyDict_New() : NULL
;
385 wf
.version
= version
;
387 Py_XDECREF(wf
.strings
);
390 typedef WFILE RFILE
; /* Same struct with different invariants */
392 #define rs_byte(p) (((p)->ptr != (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
394 #define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
397 r_string(char *s
, int n
, RFILE
*p
)
400 /* The result fits into int because it must be <=n. */
401 return (int)fread(s
, 1, n
, p
->fp
);
402 if (p
->end
- p
->ptr
< n
)
403 n
= (int)(p
->end
- p
->ptr
);
404 memcpy(s
, p
->ptr
, n
);
415 /* Sign-extension, in case short greater than 16 bits */
424 register FILE *fp
= p
->fp
;
427 x
|= (long)getc(fp
) << 8;
428 x
|= (long)getc(fp
) << 16;
429 x
|= (long)getc(fp
) << 24;
433 x
|= (long)rs_byte(p
) << 8;
434 x
|= (long)rs_byte(p
) << 16;
435 x
|= (long)rs_byte(p
) << 24;
438 /* Sign extension for 64-bit machines */
439 x
|= -(x
& 0x80000000L
);
444 /* r_long64 deals with the TYPE_INT64 code. On a machine with
445 sizeof(long) > 4, it returns a Python int object, else a Python long
446 object. Note that w_long64 writes out TYPE_INT if 32 bits is enough,
447 so there's no inefficiency here in returning a PyLong on 32-bit boxes
448 for everything written via TYPE_INT64 (i.e., if an int is written via
449 TYPE_INT64, it *needs* more than 32 bits).
454 long lo4
= r_long(p
);
455 long hi4
= r_long(p
);
457 long x
= (hi4
<< 32) | (lo4
& 0xFFFFFFFFL
);
458 return PyInt_FromLong(x
);
460 unsigned char buf
[8];
462 int is_little_endian
= (int)*(char*)&one
;
463 if (is_little_endian
) {
464 memcpy(buf
, &lo4
, 4);
465 memcpy(buf
+4, &hi4
, 4);
468 memcpy(buf
, &hi4
, 4);
469 memcpy(buf
+4, &lo4
, 4);
471 return _PyLong_FromByteArray(buf
, 8, is_little_endian
, 1);
478 /* NULL is a valid return value, it does not necessarily means that
479 an exception is set. */
480 PyObject
*v
, *v2
, *v3
;
482 int type
= r_byte(p
);
487 PyErr_SetString(PyExc_EOFError
,
488 "EOF read where object expected");
499 Py_INCREF(PyExc_StopIteration
);
500 return PyExc_StopIteration
;
503 Py_INCREF(Py_Ellipsis
);
515 return PyInt_FromLong(r_long(p
));
526 ob
= _PyLong_New(size
);
530 for (i
= 0; i
< size
; i
++) {
531 int digit
= r_short(p
);
534 PyErr_SetString(PyExc_ValueError
,
538 ob
->ob_digit
[i
] = digit
;
540 return (PyObject
*)ob
;
548 if (n
== EOF
|| r_string(buf
, (int)n
, p
) != n
) {
549 PyErr_SetString(PyExc_EOFError
,
550 "EOF read where object expected");
554 PyFPE_START_PROTECT("atof", return 0)
555 dx
= PyOS_ascii_atof(buf
);
556 PyFPE_END_PROTECT(dx
)
557 return PyFloat_FromDouble(dx
);
560 case TYPE_BINARY_FLOAT
:
562 unsigned char buf
[8];
564 if (r_string((char*)buf
, 8, p
) != 8) {
565 PyErr_SetString(PyExc_EOFError
,
566 "EOF read where object expected");
569 x
= _PyFloat_Unpack8(buf
, 1);
570 if (x
== -1.0 && PyErr_Occurred()) {
573 return PyFloat_FromDouble(x
);
576 #ifndef WITHOUT_COMPLEX
582 if (n
== EOF
|| r_string(buf
, (int)n
, p
) != n
) {
583 PyErr_SetString(PyExc_EOFError
,
584 "EOF read where object expected");
588 PyFPE_START_PROTECT("atof", return 0)
589 c
.real
= PyOS_ascii_atof(buf
);
592 if (n
== EOF
|| r_string(buf
, (int)n
, p
) != n
) {
593 PyErr_SetString(PyExc_EOFError
,
594 "EOF read where object expected");
598 PyFPE_START_PROTECT("atof", return 0)
599 c
.imag
= PyOS_ascii_atof(buf
);
601 return PyComplex_FromCComplex(c
);
604 case TYPE_BINARY_COMPLEX
:
606 unsigned char buf
[8];
608 if (r_string((char*)buf
, 8, p
) != 8) {
609 PyErr_SetString(PyExc_EOFError
,
610 "EOF read where object expected");
613 c
.real
= _PyFloat_Unpack8(buf
, 1);
614 if (c
.real
== -1.0 && PyErr_Occurred()) {
617 if (r_string((char*)buf
, 8, p
) != 8) {
618 PyErr_SetString(PyExc_EOFError
,
619 "EOF read where object expected");
622 c
.imag
= _PyFloat_Unpack8(buf
, 1);
623 if (c
.imag
== -1.0 && PyErr_Occurred()) {
626 return PyComplex_FromCComplex(c
);
634 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
637 v
= PyString_FromStringAndSize((char *)NULL
, n
);
640 if (r_string(PyString_AS_STRING(v
), (int)n
, p
) != n
) {
642 PyErr_SetString(PyExc_EOFError
,
643 "EOF read where object expected");
646 if (type
== TYPE_INTERNED
) {
647 PyString_InternInPlace(&v
);
648 PyList_Append(p
->strings
, v
);
654 if (n
< 0 || n
>= PyList_GET_SIZE(p
->strings
)) {
655 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
658 v
= PyList_GET_ITEM(p
->strings
, n
);
662 #ifdef Py_USING_UNICODE
669 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
672 buffer
= PyMem_NEW(char, n
);
674 return PyErr_NoMemory();
675 if (r_string(buffer
, (int)n
, p
) != n
) {
677 PyErr_SetString(PyExc_EOFError
,
678 "EOF read where object expected");
681 v
= PyUnicode_DecodeUTF8(buffer
, n
, NULL
);
690 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
693 v
= PyTuple_New((int)n
);
696 for (i
= 0; i
< n
; i
++) {
699 if (!PyErr_Occurred())
700 PyErr_SetString(PyExc_TypeError
,
701 "NULL object in marshal data");
706 PyTuple_SET_ITEM(v
, (int)i
, v2
);
713 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
716 v
= PyList_New((int)n
);
719 for (i
= 0; i
< n
; i
++) {
722 if (!PyErr_Occurred())
723 PyErr_SetString(PyExc_TypeError
,
724 "NULL object in marshal data");
729 PyList_SetItem(v
, (int)i
, v2
);
744 PyDict_SetItem(v
, key
, val
);
748 if (PyErr_Occurred()) {
758 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
761 v
= PyTuple_New((int)n
);
764 for (i
= 0; i
< n
; i
++) {
767 if (!PyErr_Occurred())
768 PyErr_SetString(PyExc_TypeError
,
769 "NULL object in marshal data");
774 PyTuple_SET_ITEM(v
, (int)i
, v2
);
778 if (type
== TYPE_SET
)
781 v3
= PyFrozenSet_New(v
);
786 if (PyEval_GetRestricted()) {
787 PyErr_SetString(PyExc_RuntimeError
,
788 "cannot unmarshal code objects in "
789 "restricted execution mode");
797 PyObject
*code
= NULL
;
798 PyObject
*consts
= NULL
;
799 PyObject
*names
= NULL
;
800 PyObject
*varnames
= NULL
;
801 PyObject
*freevars
= NULL
;
802 PyObject
*cellvars
= NULL
;
803 PyObject
*filename
= NULL
;
804 PyObject
*name
= NULL
;
806 PyObject
*lnotab
= NULL
;
810 argcount
= r_long(p
);
812 stacksize
= r_long(p
);
817 consts
= r_object(p
);
823 varnames
= r_object(p
);
824 if (varnames
== NULL
)
826 freevars
= r_object(p
);
827 if (freevars
== NULL
)
829 cellvars
= r_object(p
);
830 if (cellvars
== NULL
)
832 filename
= r_object(p
);
833 if (filename
== NULL
)
838 firstlineno
= r_long(p
);
839 lnotab
= r_object(p
);
843 v
= (PyObject
*) PyCode_New(
844 argcount
, nlocals
, stacksize
, flags
,
845 code
, consts
, names
, varnames
,
846 freevars
, cellvars
, filename
, name
,
847 firstlineno
, lnotab
);
853 Py_XDECREF(varnames
);
854 Py_XDECREF(freevars
);
855 Py_XDECREF(cellvars
);
856 Py_XDECREF(filename
);
864 /* Bogus data got written, which isn't ideal.
865 This will let you keep working and recover. */
866 PyErr_SetString(PyExc_ValueError
, "bad marshal data");
873 read_object(RFILE
*p
)
876 if (PyErr_Occurred()) {
877 fprintf(stderr
, "XXX readobject called with exception set\n");
881 if (v
== NULL
&& !PyErr_Occurred())
882 PyErr_SetString(PyExc_TypeError
, "NULL object in marshal data");
887 PyMarshal_ReadShortFromFile(FILE *fp
)
893 rf
.end
= rf
.ptr
= NULL
;
898 PyMarshal_ReadLongFromFile(FILE *fp
)
907 /* Return size of file in bytes; < 0 if unknown. */
909 getfilesize(FILE *fp
)
912 if (fstat(fileno(fp
), &st
) != 0)
919 /* If we can get the size of the file up-front, and it's reasonably small,
920 * read it in one gulp and delegate to ...FromString() instead. Much quicker
921 * than reading a byte at a time from file; speeds .pyc imports.
922 * CAUTION: since this may read the entire remainder of the file, don't
923 * call it unless you know you're done with the file.
926 PyMarshal_ReadLastObjectFromFile(FILE *fp
)
928 /* 75% of 2.1's .pyc files can exploit SMALL_FILE_LIMIT.
929 * REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc.
931 #define SMALL_FILE_LIMIT (1L << 14)
932 #define REASONABLE_FILE_LIMIT (1L << 18)
937 filesize
= getfilesize(fp
);
939 char buf
[SMALL_FILE_LIMIT
];
941 if (filesize
<= SMALL_FILE_LIMIT
)
943 else if (filesize
<= REASONABLE_FILE_LIMIT
)
944 pBuf
= (char *)PyMem_MALLOC(filesize
);
948 /* filesize must fit into an int, because it
949 is smaller than REASONABLE_FILE_LIMIT */
950 n
= fread(pBuf
, 1, (int)filesize
, fp
);
951 v
= PyMarshal_ReadObjectFromString(pBuf
, n
);
959 /* We don't have fstat, or we do but the file is larger than
960 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
962 return PyMarshal_ReadObjectFromFile(fp
);
964 #undef SMALL_FILE_LIMIT
965 #undef REASONABLE_FILE_LIMIT
969 PyMarshal_ReadObjectFromFile(FILE *fp
)
974 rf
.strings
= PyList_New(0);
975 result
= r_object(&rf
);
976 Py_DECREF(rf
.strings
);
981 PyMarshal_ReadObjectFromString(char *str
, Py_ssize_t len
)
988 rf
.strings
= PyList_New(0);
989 result
= r_object(&rf
);
990 Py_DECREF(rf
.strings
);
995 PyMarshal_WriteObjectToString(PyObject
*x
, int version
)
999 wf
.str
= PyString_FromStringAndSize((char *)NULL
, 50);
1002 wf
.ptr
= PyString_AS_STRING((PyStringObject
*)wf
.str
);
1003 wf
.end
= wf
.ptr
+ PyString_Size(wf
.str
);
1006 wf
.version
= version
;
1007 wf
.strings
= (version
> 0) ? PyDict_New() : NULL
;
1009 Py_XDECREF(wf
.strings
);
1011 _PyString_Resize(&wf
.str
,
1013 PyString_AS_STRING((PyStringObject
*)wf
.str
)));
1016 PyErr_SetString(PyExc_ValueError
,
1017 (wf
.error
==1)?"unmarshallable object"
1018 :"object too deeply nested to marshal");
1024 /* And an interface for Python programs... */
1027 marshal_dump(PyObject
*self
, PyObject
*args
)
1032 int version
= Py_MARSHAL_VERSION
;
1033 if (!PyArg_ParseTuple(args
, "OO|i:dump", &x
, &f
, &version
))
1035 if (!PyFile_Check(f
)) {
1036 PyErr_SetString(PyExc_TypeError
,
1037 "marshal.dump() 2nd arg must be file");
1040 wf
.fp
= PyFile_AsFile(f
);
1042 wf
.ptr
= wf
.end
= NULL
;
1045 wf
.strings
= (version
> 0) ? PyDict_New() : 0;
1046 wf
.version
= version
;
1048 Py_XDECREF(wf
.strings
);
1050 PyErr_SetString(PyExc_ValueError
,
1051 (wf
.error
==1)?"unmarshallable object"
1052 :"object too deeply nested to marshal");
1060 marshal_load(PyObject
*self
, PyObject
*args
)
1063 PyObject
*f
, *result
;
1064 if (!PyArg_ParseTuple(args
, "O:load", &f
))
1066 if (!PyFile_Check(f
)) {
1067 PyErr_SetString(PyExc_TypeError
,
1068 "marshal.load() arg must be file");
1071 rf
.fp
= PyFile_AsFile(f
);
1072 rf
.strings
= PyList_New(0);
1073 result
= read_object(&rf
);
1074 Py_DECREF(rf
.strings
);
1079 marshal_dumps(PyObject
*self
, PyObject
*args
)
1082 int version
= Py_MARSHAL_VERSION
;
1083 if (!PyArg_ParseTuple(args
, "O|i:dumps", &x
, &version
))
1085 return PyMarshal_WriteObjectToString(x
, version
);
1089 marshal_loads(PyObject
*self
, PyObject
*args
)
1095 if (!PyArg_ParseTuple(args
, "s#:loads", &s
, &n
))
1100 rf
.strings
= PyList_New(0);
1101 result
= read_object(&rf
);
1102 Py_DECREF(rf
.strings
);
1106 static PyMethodDef marshal_methods
[] = {
1107 {"dump", marshal_dump
, METH_VARARGS
},
1108 {"load", marshal_load
, METH_VARARGS
},
1109 {"dumps", marshal_dumps
, METH_VARARGS
},
1110 {"loads", marshal_loads
, METH_VARARGS
},
1111 {NULL
, NULL
} /* sentinel */
1115 PyMarshal_Init(void)
1117 PyObject
*mod
= Py_InitModule("marshal", marshal_methods
);
1120 PyModule_AddIntConstant(mod
, "version", Py_MARSHAL_VERSION
);