Merged revisions 75246 via svnmerge from
[python/dscho.git] / Python / marshal.c
blob256285b4268c8162de18c8fb8069c1963f667ab1
2 /* Write Python objects to files and read them back.
3 This is intended for writing and reading compiled Python code only;
4 a true persistent storage facility would be much harder, since
5 it would have to take circular links and sharing into account. */
7 #define PY_SSIZE_T_CLEAN
9 #include "Python.h"
10 #include "longintrepr.h"
11 #include "code.h"
12 #include "marshal.h"
14 #define ABS(x) ((x) < 0 ? -(x) : (x))
16 /* High water mark to determine when the marshalled object is dangerously deep
17 * and risks coring the interpreter. When the object stack gets this deep,
18 * raise an exception instead of continuing.
19 * On Windows debug builds, reduce this value.
21 #if defined(MS_WINDOWS) && defined(_DEBUG)
22 #define MAX_MARSHAL_STACK_DEPTH 1500
23 #else
24 #define MAX_MARSHAL_STACK_DEPTH 2000
25 #endif
27 #define TYPE_NULL '0'
28 #define TYPE_NONE 'N'
29 #define TYPE_FALSE 'F'
30 #define TYPE_TRUE 'T'
31 #define TYPE_STOPITER 'S'
32 #define TYPE_ELLIPSIS '.'
33 #define TYPE_INT 'i'
34 #define TYPE_INT64 'I'
35 #define TYPE_FLOAT 'f'
36 #define TYPE_BINARY_FLOAT 'g'
37 #define TYPE_COMPLEX 'x'
38 #define TYPE_BINARY_COMPLEX 'y'
39 #define TYPE_LONG 'l'
40 #define TYPE_STRING 's'
41 #define TYPE_TUPLE '('
42 #define TYPE_LIST '['
43 #define TYPE_DICT '{'
44 #define TYPE_CODE 'c'
45 #define TYPE_UNICODE 'u'
46 #define TYPE_UNKNOWN '?'
47 #define TYPE_SET '<'
48 #define TYPE_FROZENSET '>'
50 #define WFERR_OK 0
51 #define WFERR_UNMARSHALLABLE 1
52 #define WFERR_NESTEDTOODEEP 2
53 #define WFERR_NOMEMORY 3
55 typedef struct {
56 FILE *fp;
57 int error; /* see WFERR_* values */
58 int depth;
59 /* If fp == NULL, the following are valid: */
60 PyObject *str;
61 char *ptr;
62 char *end;
63 PyObject *strings; /* dict on marshal, list on unmarshal */
64 int version;
65 } WFILE;
67 #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
68 else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
69 else w_more(c, p)
71 static void
72 w_more(int c, WFILE *p)
74 Py_ssize_t size, newsize;
75 if (p->str == NULL)
76 return; /* An error already occurred */
77 size = PyBytes_Size(p->str);
78 newsize = size + size + 1024;
79 if (newsize > 32*1024*1024) {
80 newsize = size + (size >> 3); /* 12.5% overallocation */
82 if (_PyBytes_Resize(&p->str, newsize) != 0) {
83 p->ptr = p->end = NULL;
85 else {
86 p->ptr = PyBytes_AS_STRING((PyBytesObject *)p->str) + size;
87 p->end =
88 PyBytes_AS_STRING((PyBytesObject *)p->str) + newsize;
89 *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char);
93 static void
94 w_string(char *s, int n, WFILE *p)
96 if (p->fp != NULL) {
97 fwrite(s, 1, n, p->fp);
99 else {
100 while (--n >= 0) {
101 w_byte(*s, p);
102 s++;
107 static void
108 w_short(int x, WFILE *p)
110 w_byte((char)( x & 0xff), p);
111 w_byte((char)((x>> 8) & 0xff), p);
114 static void
115 w_long(long x, WFILE *p)
117 w_byte((char)( x & 0xff), p);
118 w_byte((char)((x>> 8) & 0xff), p);
119 w_byte((char)((x>>16) & 0xff), p);
120 w_byte((char)((x>>24) & 0xff), p);
123 #if SIZEOF_LONG > 4
124 static void
125 w_long64(long x, WFILE *p)
127 w_long(x, p);
128 w_long(x>>32, p);
130 #endif
132 /* We assume that Python longs are stored internally in base some power of
133 2**15; for the sake of portability we'll always read and write them in base
134 exactly 2**15. */
136 #define PyLong_MARSHAL_SHIFT 15
137 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
138 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
139 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
140 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
141 #endif
142 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
144 static void
145 w_PyLong(const PyLongObject *ob, WFILE *p)
147 Py_ssize_t i, j, n, l;
148 digit d;
150 w_byte(TYPE_LONG, p);
151 if (Py_SIZE(ob) == 0) {
152 w_long((long)0, p);
153 return;
156 /* set l to number of base PyLong_MARSHAL_BASE digits */
157 n = ABS(Py_SIZE(ob));
158 l = (n-1) * PyLong_MARSHAL_RATIO;
159 d = ob->ob_digit[n-1];
160 assert(d != 0); /* a PyLong is always normalized */
161 do {
162 d >>= PyLong_MARSHAL_SHIFT;
163 l++;
164 } while (d != 0);
165 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
167 for (i=0; i < n-1; i++) {
168 d = ob->ob_digit[i];
169 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
170 w_short(d & PyLong_MARSHAL_MASK, p);
171 d >>= PyLong_MARSHAL_SHIFT;
173 assert (d == 0);
175 d = ob->ob_digit[n-1];
176 do {
177 w_short(d & PyLong_MARSHAL_MASK, p);
178 d >>= PyLong_MARSHAL_SHIFT;
179 } while (d != 0);
182 static void
183 w_object(PyObject *v, WFILE *p)
185 Py_ssize_t i, n;
187 p->depth++;
189 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
190 p->error = WFERR_NESTEDTOODEEP;
192 else if (v == NULL) {
193 w_byte(TYPE_NULL, p);
195 else if (v == Py_None) {
196 w_byte(TYPE_NONE, p);
198 else if (v == PyExc_StopIteration) {
199 w_byte(TYPE_STOPITER, p);
201 else if (v == Py_Ellipsis) {
202 w_byte(TYPE_ELLIPSIS, p);
204 else if (v == Py_False) {
205 w_byte(TYPE_FALSE, p);
207 else if (v == Py_True) {
208 w_byte(TYPE_TRUE, p);
210 else if (PyLong_CheckExact(v)) {
211 long x = PyLong_AsLong(v);
212 if ((x == -1) && PyErr_Occurred()) {
213 PyLongObject *ob = (PyLongObject *)v;
214 PyErr_Clear();
215 w_PyLong(ob, p);
217 else {
218 #if SIZEOF_LONG > 4
219 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
220 if (y && y != -1) {
221 w_byte(TYPE_INT64, p);
222 w_long64(x, p);
224 else
225 #endif
227 w_byte(TYPE_INT, p);
228 w_long(x, p);
232 else if (PyFloat_CheckExact(v)) {
233 if (p->version > 1) {
234 unsigned char buf[8];
235 if (_PyFloat_Pack8(PyFloat_AsDouble(v),
236 buf, 1) < 0) {
237 p->error = WFERR_UNMARSHALLABLE;
238 return;
240 w_byte(TYPE_BINARY_FLOAT, p);
241 w_string((char*)buf, 8, p);
243 else {
244 char *buf = PyOS_double_to_string(PyFloat_AS_DOUBLE(v),
245 'g', 17, 0, NULL);
246 if (!buf) {
247 p->error = WFERR_NOMEMORY;
248 return;
250 n = strlen(buf);
251 w_byte(TYPE_FLOAT, p);
252 w_byte((int)n, p);
253 w_string(buf, (int)n, p);
254 PyMem_Free(buf);
257 #ifndef WITHOUT_COMPLEX
258 else if (PyComplex_CheckExact(v)) {
259 if (p->version > 1) {
260 unsigned char buf[8];
261 if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
262 buf, 1) < 0) {
263 p->error = WFERR_UNMARSHALLABLE;
264 return;
266 w_byte(TYPE_BINARY_COMPLEX, p);
267 w_string((char*)buf, 8, p);
268 if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
269 buf, 1) < 0) {
270 p->error = WFERR_UNMARSHALLABLE;
271 return;
273 w_string((char*)buf, 8, p);
275 else {
276 char *buf;
277 w_byte(TYPE_COMPLEX, p);
278 buf = PyOS_double_to_string(PyComplex_RealAsDouble(v),
279 'g', 17, 0, NULL);
280 if (!buf) {
281 p->error = WFERR_NOMEMORY;
282 return;
284 n = strlen(buf);
285 w_byte((int)n, p);
286 w_string(buf, (int)n, p);
287 PyMem_Free(buf);
288 buf = PyOS_double_to_string(PyComplex_ImagAsDouble(v),
289 'g', 17, 0, NULL);
290 if (!buf) {
291 p->error = WFERR_NOMEMORY;
292 return;
294 n = strlen(buf);
295 w_byte((int)n, p);
296 w_string(buf, (int)n, p);
297 PyMem_Free(buf);
300 #endif
301 else if (PyBytes_CheckExact(v)) {
302 w_byte(TYPE_STRING, p);
303 n = PyBytes_GET_SIZE(v);
304 if (n > INT_MAX) {
305 /* huge strings are not supported */
306 p->depth--;
307 p->error = WFERR_UNMARSHALLABLE;
308 return;
310 w_long((long)n, p);
311 w_string(PyBytes_AS_STRING(v), (int)n, p);
313 else if (PyUnicode_CheckExact(v)) {
314 PyObject *utf8;
315 utf8 = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(v),
316 PyUnicode_GET_SIZE(v),
317 "surrogatepass");
318 if (utf8 == NULL) {
319 p->depth--;
320 p->error = WFERR_UNMARSHALLABLE;
321 return;
323 w_byte(TYPE_UNICODE, p);
324 n = PyBytes_GET_SIZE(utf8);
325 if (n > INT_MAX) {
326 p->depth--;
327 p->error = WFERR_UNMARSHALLABLE;
328 return;
330 w_long((long)n, p);
331 w_string(PyBytes_AS_STRING(utf8), (int)n, p);
332 Py_DECREF(utf8);
334 else if (PyTuple_CheckExact(v)) {
335 w_byte(TYPE_TUPLE, p);
336 n = PyTuple_Size(v);
337 w_long((long)n, p);
338 for (i = 0; i < n; i++) {
339 w_object(PyTuple_GET_ITEM(v, i), p);
342 else if (PyList_CheckExact(v)) {
343 w_byte(TYPE_LIST, p);
344 n = PyList_GET_SIZE(v);
345 w_long((long)n, p);
346 for (i = 0; i < n; i++) {
347 w_object(PyList_GET_ITEM(v, i), p);
350 else if (PyDict_CheckExact(v)) {
351 Py_ssize_t pos;
352 PyObject *key, *value;
353 w_byte(TYPE_DICT, p);
354 /* This one is NULL object terminated! */
355 pos = 0;
356 while (PyDict_Next(v, &pos, &key, &value)) {
357 w_object(key, p);
358 w_object(value, p);
360 w_object((PyObject *)NULL, p);
362 else if (PyAnySet_CheckExact(v)) {
363 PyObject *value, *it;
365 if (PyObject_TypeCheck(v, &PySet_Type))
366 w_byte(TYPE_SET, p);
367 else
368 w_byte(TYPE_FROZENSET, p);
369 n = PyObject_Size(v);
370 if (n == -1) {
371 p->depth--;
372 p->error = WFERR_UNMARSHALLABLE;
373 return;
375 w_long((long)n, p);
376 it = PyObject_GetIter(v);
377 if (it == NULL) {
378 p->depth--;
379 p->error = WFERR_UNMARSHALLABLE;
380 return;
382 while ((value = PyIter_Next(it)) != NULL) {
383 w_object(value, p);
384 Py_DECREF(value);
386 Py_DECREF(it);
387 if (PyErr_Occurred()) {
388 p->depth--;
389 p->error = WFERR_UNMARSHALLABLE;
390 return;
393 else if (PyCode_Check(v)) {
394 PyCodeObject *co = (PyCodeObject *)v;
395 w_byte(TYPE_CODE, p);
396 w_long(co->co_argcount, p);
397 w_long(co->co_kwonlyargcount, p);
398 w_long(co->co_nlocals, p);
399 w_long(co->co_stacksize, p);
400 w_long(co->co_flags, p);
401 w_object(co->co_code, p);
402 w_object(co->co_consts, p);
403 w_object(co->co_names, p);
404 w_object(co->co_varnames, p);
405 w_object(co->co_freevars, p);
406 w_object(co->co_cellvars, p);
407 w_object(co->co_filename, p);
408 w_object(co->co_name, p);
409 w_long(co->co_firstlineno, p);
410 w_object(co->co_lnotab, p);
412 else if (PyObject_CheckBuffer(v)) {
413 /* Write unknown buffer-style objects as a string */
414 char *s;
415 PyBufferProcs *pb = v->ob_type->tp_as_buffer;
416 Py_buffer view;
417 if ((*pb->bf_getbuffer)(v, &view, PyBUF_SIMPLE) != 0) {
418 w_byte(TYPE_UNKNOWN, p);
419 p->error = WFERR_UNMARSHALLABLE;
421 w_byte(TYPE_STRING, p);
422 n = view.len;
423 s = view.buf;
424 if (n > INT_MAX) {
425 p->depth--;
426 p->error = WFERR_UNMARSHALLABLE;
427 return;
429 w_long((long)n, p);
430 w_string(s, (int)n, p);
431 if (pb->bf_releasebuffer != NULL)
432 (*pb->bf_releasebuffer)(v, &view);
434 else {
435 w_byte(TYPE_UNKNOWN, p);
436 p->error = WFERR_UNMARSHALLABLE;
438 p->depth--;
441 /* version currently has no effect for writing longs. */
442 void
443 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
445 WFILE wf;
446 wf.fp = fp;
447 wf.error = WFERR_OK;
448 wf.depth = 0;
449 wf.strings = NULL;
450 wf.version = version;
451 w_long(x, &wf);
454 void
455 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
457 WFILE wf;
458 wf.fp = fp;
459 wf.error = WFERR_OK;
460 wf.depth = 0;
461 wf.strings = (version > 0) ? PyDict_New() : NULL;
462 wf.version = version;
463 w_object(x, &wf);
464 Py_XDECREF(wf.strings);
467 typedef WFILE RFILE; /* Same struct with different invariants */
469 #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
471 #define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
473 static int
474 r_string(char *s, int n, RFILE *p)
476 if (p->fp != NULL)
477 /* The result fits into int because it must be <=n. */
478 return (int)fread(s, 1, n, p->fp);
479 if (p->end - p->ptr < n)
480 n = (int)(p->end - p->ptr);
481 memcpy(s, p->ptr, n);
482 p->ptr += n;
483 return n;
486 static int
487 r_short(RFILE *p)
489 register short x;
490 x = r_byte(p);
491 x |= r_byte(p) << 8;
492 /* Sign-extension, in case short greater than 16 bits */
493 x |= -(x & 0x8000);
494 return x;
497 static long
498 r_long(RFILE *p)
500 register long x;
501 register FILE *fp = p->fp;
502 if (fp) {
503 x = getc(fp);
504 x |= (long)getc(fp) << 8;
505 x |= (long)getc(fp) << 16;
506 x |= (long)getc(fp) << 24;
508 else {
509 x = rs_byte(p);
510 x |= (long)rs_byte(p) << 8;
511 x |= (long)rs_byte(p) << 16;
512 x |= (long)rs_byte(p) << 24;
514 #if SIZEOF_LONG > 4
515 /* Sign extension for 64-bit machines */
516 x |= -(x & 0x80000000L);
517 #endif
518 return x;
521 /* r_long64 deals with the TYPE_INT64 code. On a machine with
522 sizeof(long) > 4, it returns a Python int object, else a Python long
523 object. Note that w_long64 writes out TYPE_INT if 32 bits is enough,
524 so there's no inefficiency here in returning a PyLong on 32-bit boxes
525 for everything written via TYPE_INT64 (i.e., if an int is written via
526 TYPE_INT64, it *needs* more than 32 bits).
528 static PyObject *
529 r_long64(RFILE *p)
531 long lo4 = r_long(p);
532 long hi4 = r_long(p);
533 #if SIZEOF_LONG > 4
534 long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
535 return PyLong_FromLong(x);
536 #else
537 unsigned char buf[8];
538 int one = 1;
539 int is_little_endian = (int)*(char*)&one;
540 if (is_little_endian) {
541 memcpy(buf, &lo4, 4);
542 memcpy(buf+4, &hi4, 4);
544 else {
545 memcpy(buf, &hi4, 4);
546 memcpy(buf+4, &lo4, 4);
548 return _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
549 #endif
552 static PyObject *
553 r_PyLong(RFILE *p)
555 PyLongObject *ob;
556 int size, i, j, md, shorts_in_top_digit;
557 long n;
558 digit d;
560 n = r_long(p);
561 if (n == 0)
562 return (PyObject *)_PyLong_New(0);
563 if (n < -INT_MAX || n > INT_MAX) {
564 PyErr_SetString(PyExc_ValueError,
565 "bad marshal data (long size out of range)");
566 return NULL;
569 size = 1 + (ABS(n) - 1) / PyLong_MARSHAL_RATIO;
570 shorts_in_top_digit = 1 + (ABS(n) - 1) % PyLong_MARSHAL_RATIO;
571 ob = _PyLong_New(size);
572 if (ob == NULL)
573 return NULL;
574 Py_SIZE(ob) = n > 0 ? size : -size;
576 for (i = 0; i < size-1; i++) {
577 d = 0;
578 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
579 md = r_short(p);
580 if (md < 0 || md > PyLong_MARSHAL_BASE)
581 goto bad_digit;
582 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
584 ob->ob_digit[i] = d;
586 d = 0;
587 for (j=0; j < shorts_in_top_digit; j++) {
588 md = r_short(p);
589 if (md < 0 || md > PyLong_MARSHAL_BASE)
590 goto bad_digit;
591 /* topmost marshal digit should be nonzero */
592 if (md == 0 && j == shorts_in_top_digit - 1) {
593 Py_DECREF(ob);
594 PyErr_SetString(PyExc_ValueError,
595 "bad marshal data (unnormalized long data)");
596 return NULL;
598 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
600 /* top digit should be nonzero, else the resulting PyLong won't be
601 normalized */
602 ob->ob_digit[size-1] = d;
603 return (PyObject *)ob;
604 bad_digit:
605 Py_DECREF(ob);
606 PyErr_SetString(PyExc_ValueError,
607 "bad marshal data (digit out of range in long)");
608 return NULL;
612 static PyObject *
613 r_object(RFILE *p)
615 /* NULL is a valid return value, it does not necessarily means that
616 an exception is set. */
617 PyObject *v, *v2;
618 long i, n;
619 int type = r_byte(p);
620 PyObject *retval;
622 p->depth++;
624 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
625 p->depth--;
626 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
627 return NULL;
630 switch (type) {
632 case EOF:
633 PyErr_SetString(PyExc_EOFError,
634 "EOF read where object expected");
635 retval = NULL;
636 break;
638 case TYPE_NULL:
639 retval = NULL;
640 break;
642 case TYPE_NONE:
643 Py_INCREF(Py_None);
644 retval = Py_None;
645 break;
647 case TYPE_STOPITER:
648 Py_INCREF(PyExc_StopIteration);
649 retval = PyExc_StopIteration;
650 break;
652 case TYPE_ELLIPSIS:
653 Py_INCREF(Py_Ellipsis);
654 retval = Py_Ellipsis;
655 break;
657 case TYPE_FALSE:
658 Py_INCREF(Py_False);
659 retval = Py_False;
660 break;
662 case TYPE_TRUE:
663 Py_INCREF(Py_True);
664 retval = Py_True;
665 break;
667 case TYPE_INT:
668 retval = PyLong_FromLong(r_long(p));
669 break;
671 case TYPE_INT64:
672 retval = r_long64(p);
673 break;
675 case TYPE_LONG:
676 retval = r_PyLong(p);
677 break;
679 case TYPE_FLOAT:
681 char buf[256];
682 double dx;
683 retval = NULL;
684 n = r_byte(p);
685 if (n == EOF || r_string(buf, (int)n, p) != n) {
686 PyErr_SetString(PyExc_EOFError,
687 "EOF read where object expected");
688 break;
690 buf[n] = '\0';
691 dx = PyOS_string_to_double(buf, NULL, NULL);
692 if (dx == -1.0 && PyErr_Occurred())
693 break;
694 retval = PyFloat_FromDouble(dx);
695 break;
698 case TYPE_BINARY_FLOAT:
700 unsigned char buf[8];
701 double x;
702 if (r_string((char*)buf, 8, p) != 8) {
703 PyErr_SetString(PyExc_EOFError,
704 "EOF read where object expected");
705 retval = NULL;
706 break;
708 x = _PyFloat_Unpack8(buf, 1);
709 if (x == -1.0 && PyErr_Occurred()) {
710 retval = NULL;
711 break;
713 retval = PyFloat_FromDouble(x);
714 break;
717 #ifndef WITHOUT_COMPLEX
718 case TYPE_COMPLEX:
720 char buf[256];
721 Py_complex c;
722 retval = NULL;
723 n = r_byte(p);
724 if (n == EOF || r_string(buf, (int)n, p) != n) {
725 PyErr_SetString(PyExc_EOFError,
726 "EOF read where object expected");
727 break;
729 buf[n] = '\0';
730 c.real = PyOS_string_to_double(buf, NULL, NULL);
731 if (c.real == -1.0 && PyErr_Occurred())
732 break;
733 n = r_byte(p);
734 if (n == EOF || r_string(buf, (int)n, p) != n) {
735 PyErr_SetString(PyExc_EOFError,
736 "EOF read where object expected");
737 break;
739 buf[n] = '\0';
740 c.imag = PyOS_string_to_double(buf, NULL, NULL);
741 if (c.imag == -1.0 && PyErr_Occurred())
742 break;
743 retval = PyComplex_FromCComplex(c);
744 break;
747 case TYPE_BINARY_COMPLEX:
749 unsigned char buf[8];
750 Py_complex c;
751 if (r_string((char*)buf, 8, p) != 8) {
752 PyErr_SetString(PyExc_EOFError,
753 "EOF read where object expected");
754 retval = NULL;
755 break;
757 c.real = _PyFloat_Unpack8(buf, 1);
758 if (c.real == -1.0 && PyErr_Occurred()) {
759 retval = NULL;
760 break;
762 if (r_string((char*)buf, 8, p) != 8) {
763 PyErr_SetString(PyExc_EOFError,
764 "EOF read where object expected");
765 retval = NULL;
766 break;
768 c.imag = _PyFloat_Unpack8(buf, 1);
769 if (c.imag == -1.0 && PyErr_Occurred()) {
770 retval = NULL;
771 break;
773 retval = PyComplex_FromCComplex(c);
774 break;
776 #endif
778 case TYPE_STRING:
779 n = r_long(p);
780 if (n < 0 || n > INT_MAX) {
781 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
782 retval = NULL;
783 break;
785 v = PyBytes_FromStringAndSize((char *)NULL, n);
786 if (v == NULL) {
787 retval = NULL;
788 break;
790 if (r_string(PyBytes_AS_STRING(v), (int)n, p) != n) {
791 Py_DECREF(v);
792 PyErr_SetString(PyExc_EOFError,
793 "EOF read where object expected");
794 retval = NULL;
795 break;
797 retval = v;
798 break;
800 case TYPE_UNICODE:
802 char *buffer;
804 n = r_long(p);
805 if (n < 0 || n > INT_MAX) {
806 PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
807 retval = NULL;
808 break;
810 buffer = PyMem_NEW(char, n);
811 if (buffer == NULL) {
812 retval = PyErr_NoMemory();
813 break;
815 if (r_string(buffer, (int)n, p) != n) {
816 PyMem_DEL(buffer);
817 PyErr_SetString(PyExc_EOFError,
818 "EOF read where object expected");
819 retval = NULL;
820 break;
822 v = PyUnicode_DecodeUTF8(buffer, n, "surrogatepass");
823 PyMem_DEL(buffer);
824 retval = v;
825 break;
828 case TYPE_TUPLE:
829 n = r_long(p);
830 if (n < 0 || n > INT_MAX) {
831 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
832 retval = NULL;
833 break;
835 v = PyTuple_New((int)n);
836 if (v == NULL) {
837 retval = NULL;
838 break;
840 for (i = 0; i < n; i++) {
841 v2 = r_object(p);
842 if ( v2 == NULL ) {
843 if (!PyErr_Occurred())
844 PyErr_SetString(PyExc_TypeError,
845 "NULL object in marshal data for tuple");
846 Py_DECREF(v);
847 v = NULL;
848 break;
850 PyTuple_SET_ITEM(v, (int)i, v2);
852 retval = v;
853 break;
855 case TYPE_LIST:
856 n = r_long(p);
857 if (n < 0 || n > INT_MAX) {
858 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
859 retval = NULL;
860 break;
862 v = PyList_New((int)n);
863 if (v == NULL) {
864 retval = NULL;
865 break;
867 for (i = 0; i < n; i++) {
868 v2 = r_object(p);
869 if ( v2 == NULL ) {
870 if (!PyErr_Occurred())
871 PyErr_SetString(PyExc_TypeError,
872 "NULL object in marshal data for list");
873 Py_DECREF(v);
874 v = NULL;
875 break;
877 PyList_SET_ITEM(v, (int)i, v2);
879 retval = v;
880 break;
882 case TYPE_DICT:
883 v = PyDict_New();
884 if (v == NULL) {
885 retval = NULL;
886 break;
888 for (;;) {
889 PyObject *key, *val;
890 key = r_object(p);
891 if (key == NULL)
892 break;
893 val = r_object(p);
894 if (val != NULL)
895 PyDict_SetItem(v, key, val);
896 Py_DECREF(key);
897 Py_XDECREF(val);
899 if (PyErr_Occurred()) {
900 Py_DECREF(v);
901 v = NULL;
903 retval = v;
904 break;
906 case TYPE_SET:
907 case TYPE_FROZENSET:
908 n = r_long(p);
909 if (n < 0 || n > INT_MAX) {
910 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
911 retval = NULL;
912 break;
914 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
915 if (v == NULL) {
916 retval = NULL;
917 break;
919 for (i = 0; i < n; i++) {
920 v2 = r_object(p);
921 if ( v2 == NULL ) {
922 if (!PyErr_Occurred())
923 PyErr_SetString(PyExc_TypeError,
924 "NULL object in marshal data for set");
925 Py_DECREF(v);
926 v = NULL;
927 break;
929 if (PySet_Add(v, v2) == -1) {
930 Py_DECREF(v);
931 Py_DECREF(v2);
932 v = NULL;
933 break;
935 Py_DECREF(v2);
937 retval = v;
938 break;
940 case TYPE_CODE:
942 int argcount;
943 int kwonlyargcount;
944 int nlocals;
945 int stacksize;
946 int flags;
947 PyObject *code = NULL;
948 PyObject *consts = NULL;
949 PyObject *names = NULL;
950 PyObject *varnames = NULL;
951 PyObject *freevars = NULL;
952 PyObject *cellvars = NULL;
953 PyObject *filename = NULL;
954 PyObject *name = NULL;
955 int firstlineno;
956 PyObject *lnotab = NULL;
958 v = NULL;
960 /* XXX ignore long->int overflows for now */
961 argcount = (int)r_long(p);
962 kwonlyargcount = (int)r_long(p);
963 nlocals = (int)r_long(p);
964 stacksize = (int)r_long(p);
965 flags = (int)r_long(p);
966 code = r_object(p);
967 if (code == NULL)
968 goto code_error;
969 consts = r_object(p);
970 if (consts == NULL)
971 goto code_error;
972 names = r_object(p);
973 if (names == NULL)
974 goto code_error;
975 varnames = r_object(p);
976 if (varnames == NULL)
977 goto code_error;
978 freevars = r_object(p);
979 if (freevars == NULL)
980 goto code_error;
981 cellvars = r_object(p);
982 if (cellvars == NULL)
983 goto code_error;
984 filename = r_object(p);
985 if (filename == NULL)
986 goto code_error;
987 name = r_object(p);
988 if (name == NULL)
989 goto code_error;
990 firstlineno = (int)r_long(p);
991 lnotab = r_object(p);
992 if (lnotab == NULL)
993 goto code_error;
995 v = (PyObject *) PyCode_New(
996 argcount, kwonlyargcount,
997 nlocals, stacksize, flags,
998 code, consts, names, varnames,
999 freevars, cellvars, filename, name,
1000 firstlineno, lnotab);
1002 code_error:
1003 Py_XDECREF(code);
1004 Py_XDECREF(consts);
1005 Py_XDECREF(names);
1006 Py_XDECREF(varnames);
1007 Py_XDECREF(freevars);
1008 Py_XDECREF(cellvars);
1009 Py_XDECREF(filename);
1010 Py_XDECREF(name);
1011 Py_XDECREF(lnotab);
1013 retval = v;
1014 break;
1016 default:
1017 /* Bogus data got written, which isn't ideal.
1018 This will let you keep working and recover. */
1019 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1020 retval = NULL;
1021 break;
1024 p->depth--;
1025 return retval;
1028 static PyObject *
1029 read_object(RFILE *p)
1031 PyObject *v;
1032 if (PyErr_Occurred()) {
1033 fprintf(stderr, "XXX readobject called with exception set\n");
1034 return NULL;
1036 v = r_object(p);
1037 if (v == NULL && !PyErr_Occurred())
1038 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1039 return v;
1043 PyMarshal_ReadShortFromFile(FILE *fp)
1045 RFILE rf;
1046 assert(fp);
1047 rf.fp = fp;
1048 rf.strings = NULL;
1049 rf.end = rf.ptr = NULL;
1050 return r_short(&rf);
1053 long
1054 PyMarshal_ReadLongFromFile(FILE *fp)
1056 RFILE rf;
1057 rf.fp = fp;
1058 rf.strings = NULL;
1059 rf.ptr = rf.end = NULL;
1060 return r_long(&rf);
1063 #ifdef HAVE_FSTAT
1064 /* Return size of file in bytes; < 0 if unknown. */
1065 static off_t
1066 getfilesize(FILE *fp)
1068 struct stat st;
1069 if (fstat(fileno(fp), &st) != 0)
1070 return -1;
1071 else
1072 return st.st_size;
1074 #endif
1076 /* If we can get the size of the file up-front, and it's reasonably small,
1077 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1078 * than reading a byte at a time from file; speeds .pyc imports.
1079 * CAUTION: since this may read the entire remainder of the file, don't
1080 * call it unless you know you're done with the file.
1082 PyObject *
1083 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1085 /* 75% of 2.1's .pyc files can exploit SMALL_FILE_LIMIT.
1086 * REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc.
1088 #define SMALL_FILE_LIMIT (1L << 14)
1089 #define REASONABLE_FILE_LIMIT (1L << 18)
1090 #ifdef HAVE_FSTAT
1091 off_t filesize;
1092 #endif
1093 #ifdef HAVE_FSTAT
1094 filesize = getfilesize(fp);
1095 if (filesize > 0) {
1096 char buf[SMALL_FILE_LIMIT];
1097 char* pBuf = NULL;
1098 if (filesize <= SMALL_FILE_LIMIT)
1099 pBuf = buf;
1100 else if (filesize <= REASONABLE_FILE_LIMIT)
1101 pBuf = (char *)PyMem_MALLOC(filesize);
1102 if (pBuf != NULL) {
1103 PyObject* v;
1104 size_t n;
1105 /* filesize must fit into an int, because it
1106 is smaller than REASONABLE_FILE_LIMIT */
1107 n = fread(pBuf, 1, (int)filesize, fp);
1108 v = PyMarshal_ReadObjectFromString(pBuf, n);
1109 if (pBuf != buf)
1110 PyMem_FREE(pBuf);
1111 return v;
1115 #endif
1116 /* We don't have fstat, or we do but the file is larger than
1117 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1119 return PyMarshal_ReadObjectFromFile(fp);
1121 #undef SMALL_FILE_LIMIT
1122 #undef REASONABLE_FILE_LIMIT
1125 PyObject *
1126 PyMarshal_ReadObjectFromFile(FILE *fp)
1128 RFILE rf;
1129 PyObject *result;
1130 rf.fp = fp;
1131 rf.strings = PyList_New(0);
1132 rf.depth = 0;
1133 rf.ptr = rf.end = NULL;
1134 result = r_object(&rf);
1135 Py_DECREF(rf.strings);
1136 return result;
1139 PyObject *
1140 PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
1142 RFILE rf;
1143 PyObject *result;
1144 rf.fp = NULL;
1145 rf.ptr = str;
1146 rf.end = str + len;
1147 rf.strings = PyList_New(0);
1148 rf.depth = 0;
1149 result = r_object(&rf);
1150 Py_DECREF(rf.strings);
1151 return result;
1154 PyObject *
1155 PyMarshal_WriteObjectToString(PyObject *x, int version)
1157 WFILE wf;
1158 PyObject *res = NULL;
1160 wf.fp = NULL;
1161 wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
1162 if (wf.str == NULL)
1163 return NULL;
1164 wf.ptr = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1165 wf.end = wf.ptr + PyBytes_Size(wf.str);
1166 wf.error = WFERR_OK;
1167 wf.depth = 0;
1168 wf.version = version;
1169 wf.strings = (version > 0) ? PyDict_New() : NULL;
1170 w_object(x, &wf);
1171 Py_XDECREF(wf.strings);
1172 if (wf.str != NULL) {
1173 char *base = PyBytes_AS_STRING((PyBytesObject *)wf.str);
1174 if (wf.ptr - base > PY_SSIZE_T_MAX) {
1175 Py_DECREF(wf.str);
1176 PyErr_SetString(PyExc_OverflowError,
1177 "too much marshal data for a string");
1178 return NULL;
1180 if (_PyBytes_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base)) < 0)
1181 return NULL;
1183 if (wf.error != WFERR_OK) {
1184 Py_XDECREF(wf.str);
1185 if (wf.error == WFERR_NOMEMORY)
1186 PyErr_NoMemory();
1187 else
1188 PyErr_SetString(PyExc_ValueError,
1189 (wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
1190 :"object too deeply nested to marshal");
1191 return NULL;
1193 if (wf.str != NULL) {
1194 /* XXX Quick hack -- need to do this differently */
1195 res = PyBytes_FromObject(wf.str);
1196 Py_DECREF(wf.str);
1198 return res;
1201 /* And an interface for Python programs... */
1203 static PyObject *
1204 marshal_dump(PyObject *self, PyObject *args)
1206 /* XXX Quick hack -- need to do this differently */
1207 PyObject *x;
1208 PyObject *f;
1209 int version = Py_MARSHAL_VERSION;
1210 PyObject *s;
1211 PyObject *res;
1212 if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
1213 return NULL;
1214 s = PyMarshal_WriteObjectToString(x, version);
1215 if (s == NULL)
1216 return NULL;
1217 res = PyObject_CallMethod(f, "write", "O", s);
1218 Py_DECREF(s);
1219 return res;
1222 PyDoc_STRVAR(dump_doc,
1223 "dump(value, file[, version])\n\
1225 Write the value on the open file. The value must be a supported type.\n\
1226 The file must be an open file object such as sys.stdout or returned by\n\
1227 open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\
1229 If the value has (or contains an object that has) an unsupported type, a\n\
1230 ValueError exception is raised — but garbage data will also be written\n\
1231 to the file. The object will not be properly read back by load()\n\
1233 The version argument indicates the data format that dump should use.");
1235 static PyObject *
1236 marshal_load(PyObject *self, PyObject *f)
1238 /* XXX Quick hack -- need to do this differently */
1239 PyObject *data, *result;
1240 RFILE rf;
1241 data = PyObject_CallMethod(f, "read", "");
1242 if (data == NULL)
1243 return NULL;
1244 rf.fp = NULL;
1245 if (PyBytes_Check(data)) {
1246 rf.ptr = PyBytes_AS_STRING(data);
1247 rf.end = rf.ptr + PyBytes_GET_SIZE(data);
1249 else if (PyBytes_Check(data)) {
1250 rf.ptr = PyBytes_AS_STRING(data);
1251 rf.end = rf.ptr + PyBytes_GET_SIZE(data);
1253 else {
1254 PyErr_Format(PyExc_TypeError,
1255 "f.read() returned neither string "
1256 "nor bytes but %.100s",
1257 data->ob_type->tp_name);
1258 Py_DECREF(data);
1259 return NULL;
1261 rf.strings = PyList_New(0);
1262 rf.depth = 0;
1263 result = read_object(&rf);
1264 Py_DECREF(rf.strings);
1265 Py_DECREF(data);
1266 return result;
1269 PyDoc_STRVAR(load_doc,
1270 "load(file)\n\
1272 Read one value from the open file and return it. If no valid value is\n\
1273 read (e.g. because the data has a different Python version’s\n\
1274 incompatible marshal format), raise EOFError, ValueError or TypeError.\n\
1275 The file must be an open file object opened in binary mode ('rb' or\n\
1276 'r+b').\n\
1278 Note: If an object containing an unsupported type was marshalled with\n\
1279 dump(), load() will substitute None for the unmarshallable type.");
1282 static PyObject *
1283 marshal_dumps(PyObject *self, PyObject *args)
1285 PyObject *x;
1286 int version = Py_MARSHAL_VERSION;
1287 if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version))
1288 return NULL;
1289 return PyMarshal_WriteObjectToString(x, version);
1292 PyDoc_STRVAR(dumps_doc,
1293 "dumps(value[, version])\n\
1295 Return the string that would be written to a file by dump(value, file).\n\
1296 The value must be a supported type. Raise a ValueError exception if\n\
1297 value has (or contains an object that has) an unsupported type.\n\
1299 The version argument indicates the data format that dumps should use.");
1302 static PyObject *
1303 marshal_loads(PyObject *self, PyObject *args)
1305 RFILE rf;
1306 Py_buffer p;
1307 char *s;
1308 Py_ssize_t n;
1309 PyObject* result;
1310 if (!PyArg_ParseTuple(args, "s*:loads", &p))
1311 return NULL;
1312 s = p.buf;
1313 n = p.len;
1314 rf.fp = NULL;
1315 rf.ptr = s;
1316 rf.end = s + n;
1317 rf.strings = PyList_New(0);
1318 rf.depth = 0;
1319 result = read_object(&rf);
1320 Py_DECREF(rf.strings);
1321 PyBuffer_Release(&p);
1322 return result;
1325 PyDoc_STRVAR(loads_doc,
1326 "loads(string)\n\
1328 Convert the string to a value. If no valid value is found, raise\n\
1329 EOFError, ValueError or TypeError. Extra characters in the string are\n\
1330 ignored.");
1332 static PyMethodDef marshal_methods[] = {
1333 {"dump", marshal_dump, METH_VARARGS, dump_doc},
1334 {"load", marshal_load, METH_O, load_doc},
1335 {"dumps", marshal_dumps, METH_VARARGS, dumps_doc},
1336 {"loads", marshal_loads, METH_VARARGS, loads_doc},
1337 {NULL, NULL} /* sentinel */
1341 PyDoc_STRVAR(module_doc,
1342 "This module contains functions that can read and write Python values in\n\
1343 a binary format. The format is specific to Python, but independent of\n\
1344 machine architecture issues.\n\
1346 Not all Python object types are supported; in general, only objects\n\
1347 whose value is independent from a particular invocation of Python can be\n\
1348 written and read by this module. The following types are supported:\n\
1349 None, integers, floating point numbers, strings, bytes, bytearrays,\n\
1350 tuples, lists, sets, dictionaries, and code objects, where it\n\
1351 should be understood that tuples, lists and dictionaries are only\n\
1352 supported as long as the values contained therein are themselves\n\
1353 supported; and recursive lists and dictionaries should not be written\n\
1354 (they will cause infinite loops).\n\
1356 Variables:\n\
1358 version -- indicates the format that the module uses. Version 0 is the\n\
1359 historical format, version 1 shares interned strings and version 2\n\
1360 uses a binary format for floating point numbers.\n\
1362 Functions:\n\
1364 dump() -- write value to a file\n\
1365 load() -- read value from a file\n\
1366 dumps() -- write value to a string\n\
1367 loads() -- read value from a string");
1371 static struct PyModuleDef marshalmodule = {
1372 PyModuleDef_HEAD_INIT,
1373 "marshal",
1374 module_doc,
1376 marshal_methods,
1377 NULL,
1378 NULL,
1379 NULL,
1380 NULL
1383 PyMODINIT_FUNC
1384 PyMarshal_Init(void)
1386 PyObject *mod = PyModule_Create(&marshalmodule);
1387 if (mod == NULL)
1388 return NULL;
1389 PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);
1390 return mod;