Fix 'primes 0 1'
[python.git] / Python / marshal.c
blob1ef41b081b7a1dfda231d86edd9c0adfbe220dfb
2 /* Write Python objects to files and read them back.
3 This is intended for writing and reading compiled Python code only;
4 a true persistent storage facility would be much harder, since
5 it would have to take circular links and sharing into account. */
7 #define PY_SSIZE_T_CLEAN
9 #include "Python.h"
10 #include "longintrepr.h"
11 #include "code.h"
12 #include "marshal.h"
14 #define ABS(x) ((x) < 0 ? -(x) : (x))
16 /* High water mark to determine when the marshalled object is dangerously deep
17 * and risks coring the interpreter. When the object stack gets this deep,
18 * raise an exception instead of continuing.
20 #define MAX_MARSHAL_STACK_DEPTH 2000
22 #define TYPE_NULL '0'
23 #define TYPE_NONE 'N'
24 #define TYPE_FALSE 'F'
25 #define TYPE_TRUE 'T'
26 #define TYPE_STOPITER 'S'
27 #define TYPE_ELLIPSIS '.'
28 #define TYPE_INT 'i'
29 #define TYPE_INT64 'I'
30 #define TYPE_FLOAT 'f'
31 #define TYPE_BINARY_FLOAT 'g'
32 #define TYPE_COMPLEX 'x'
33 #define TYPE_BINARY_COMPLEX 'y'
34 #define TYPE_LONG 'l'
35 #define TYPE_STRING 's'
36 #define TYPE_INTERNED 't'
37 #define TYPE_STRINGREF 'R'
38 #define TYPE_TUPLE '('
39 #define TYPE_LIST '['
40 #define TYPE_DICT '{'
41 #define TYPE_CODE 'c'
42 #define TYPE_UNICODE 'u'
43 #define TYPE_UNKNOWN '?'
44 #define TYPE_SET '<'
45 #define TYPE_FROZENSET '>'
47 typedef struct {
48 FILE *fp;
49 int error;
50 int depth;
51 /* If fp == NULL, the following are valid: */
52 PyObject *str;
53 char *ptr;
54 char *end;
55 PyObject *strings; /* dict on marshal, list on unmarshal */
56 int version;
57 } WFILE;
59 #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
60 else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
61 else w_more(c, p)
63 static void
64 w_more(int c, WFILE *p)
66 Py_ssize_t size, newsize;
67 if (p->str == NULL)
68 return; /* An error already occurred */
69 size = PyString_Size(p->str);
70 newsize = size + size + 1024;
71 if (newsize > 32*1024*1024) {
72 newsize = size + (size >> 3); /* 12.5% overallocation */
74 if (_PyString_Resize(&p->str, newsize) != 0) {
75 p->ptr = p->end = NULL;
77 else {
78 p->ptr = PyString_AS_STRING((PyStringObject *)p->str) + size;
79 p->end =
80 PyString_AS_STRING((PyStringObject *)p->str) + newsize;
81 *p->ptr++ = Py_SAFE_DOWNCAST(c, int, char);
85 static void
86 w_string(char *s, int n, WFILE *p)
88 if (p->fp != NULL) {
89 fwrite(s, 1, n, p->fp);
91 else {
92 while (--n >= 0) {
93 w_byte(*s, p);
94 s++;
99 static void
100 w_short(int x, WFILE *p)
102 w_byte((char)( x & 0xff), p);
103 w_byte((char)((x>> 8) & 0xff), p);
106 static void
107 w_long(long x, WFILE *p)
109 w_byte((char)( x & 0xff), p);
110 w_byte((char)((x>> 8) & 0xff), p);
111 w_byte((char)((x>>16) & 0xff), p);
112 w_byte((char)((x>>24) & 0xff), p);
115 #if SIZEOF_LONG > 4
116 static void
117 w_long64(long x, WFILE *p)
119 w_long(x, p);
120 w_long(x>>32, p);
122 #endif
124 /* We assume that Python longs are stored internally in base some power of
125 2**15; for the sake of portability we'll always read and write them in base
126 exactly 2**15. */
128 #define PyLong_MARSHAL_SHIFT 15
129 #define PyLong_MARSHAL_BASE ((short)1 << PyLong_MARSHAL_SHIFT)
130 #define PyLong_MARSHAL_MASK (PyLong_MARSHAL_BASE - 1)
131 #if PyLong_SHIFT % PyLong_MARSHAL_SHIFT != 0
132 #error "PyLong_SHIFT must be a multiple of PyLong_MARSHAL_SHIFT"
133 #endif
134 #define PyLong_MARSHAL_RATIO (PyLong_SHIFT / PyLong_MARSHAL_SHIFT)
136 static void
137 w_PyLong(const PyLongObject *ob, WFILE *p)
139 Py_ssize_t i, j, n, l;
140 digit d;
142 w_byte(TYPE_LONG, p);
143 if (Py_SIZE(ob) == 0) {
144 w_long((long)0, p);
145 return;
148 /* set l to number of base PyLong_MARSHAL_BASE digits */
149 n = ABS(Py_SIZE(ob));
150 l = (n-1) * PyLong_MARSHAL_RATIO;
151 d = ob->ob_digit[n-1];
152 assert(d != 0); /* a PyLong is always normalized */
153 do {
154 d >>= PyLong_MARSHAL_SHIFT;
155 l++;
156 } while (d != 0);
157 w_long((long)(Py_SIZE(ob) > 0 ? l : -l), p);
159 for (i=0; i < n-1; i++) {
160 d = ob->ob_digit[i];
161 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
162 w_short(d & PyLong_MARSHAL_MASK, p);
163 d >>= PyLong_MARSHAL_SHIFT;
165 assert (d == 0);
167 d = ob->ob_digit[n-1];
168 do {
169 w_short(d & PyLong_MARSHAL_MASK, p);
170 d >>= PyLong_MARSHAL_SHIFT;
171 } while (d != 0);
174 static void
175 w_object(PyObject *v, WFILE *p)
177 Py_ssize_t i, n;
179 p->depth++;
181 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
182 p->error = 2;
184 else if (v == NULL) {
185 w_byte(TYPE_NULL, p);
187 else if (v == Py_None) {
188 w_byte(TYPE_NONE, p);
190 else if (v == PyExc_StopIteration) {
191 w_byte(TYPE_STOPITER, p);
193 else if (v == Py_Ellipsis) {
194 w_byte(TYPE_ELLIPSIS, p);
196 else if (v == Py_False) {
197 w_byte(TYPE_FALSE, p);
199 else if (v == Py_True) {
200 w_byte(TYPE_TRUE, p);
202 else if (PyInt_CheckExact(v)) {
203 long x = PyInt_AS_LONG((PyIntObject *)v);
204 #if SIZEOF_LONG > 4
205 long y = Py_ARITHMETIC_RIGHT_SHIFT(long, x, 31);
206 if (y && y != -1) {
207 w_byte(TYPE_INT64, p);
208 w_long64(x, p);
210 else
211 #endif
213 w_byte(TYPE_INT, p);
214 w_long(x, p);
217 else if (PyLong_CheckExact(v)) {
218 PyLongObject *ob = (PyLongObject *)v;
219 w_PyLong(ob, p);
221 else if (PyFloat_CheckExact(v)) {
222 if (p->version > 1) {
223 unsigned char buf[8];
224 if (_PyFloat_Pack8(PyFloat_AsDouble(v),
225 buf, 1) < 0) {
226 p->error = 1;
227 return;
229 w_byte(TYPE_BINARY_FLOAT, p);
230 w_string((char*)buf, 8, p);
232 else {
233 char buf[256]; /* Plenty to format any double */
234 PyFloat_AsReprString(buf, (PyFloatObject *)v);
235 n = strlen(buf);
236 w_byte(TYPE_FLOAT, p);
237 w_byte((int)n, p);
238 w_string(buf, (int)n, p);
241 #ifndef WITHOUT_COMPLEX
242 else if (PyComplex_CheckExact(v)) {
243 if (p->version > 1) {
244 unsigned char buf[8];
245 if (_PyFloat_Pack8(PyComplex_RealAsDouble(v),
246 buf, 1) < 0) {
247 p->error = 1;
248 return;
250 w_byte(TYPE_BINARY_COMPLEX, p);
251 w_string((char*)buf, 8, p);
252 if (_PyFloat_Pack8(PyComplex_ImagAsDouble(v),
253 buf, 1) < 0) {
254 p->error = 1;
255 return;
257 w_string((char*)buf, 8, p);
259 else {
260 char buf[256]; /* Plenty to format any double */
261 PyFloatObject *temp;
262 w_byte(TYPE_COMPLEX, p);
263 temp = (PyFloatObject*)PyFloat_FromDouble(
264 PyComplex_RealAsDouble(v));
265 if (!temp) {
266 p->error = 1;
267 return;
269 PyFloat_AsReprString(buf, temp);
270 Py_DECREF(temp);
271 n = strlen(buf);
272 w_byte((int)n, p);
273 w_string(buf, (int)n, p);
274 temp = (PyFloatObject*)PyFloat_FromDouble(
275 PyComplex_ImagAsDouble(v));
276 if (!temp) {
277 p->error = 1;
278 return;
280 PyFloat_AsReprString(buf, temp);
281 Py_DECREF(temp);
282 n = strlen(buf);
283 w_byte((int)n, p);
284 w_string(buf, (int)n, p);
287 #endif
288 else if (PyString_CheckExact(v)) {
289 if (p->strings && PyString_CHECK_INTERNED(v)) {
290 PyObject *o = PyDict_GetItem(p->strings, v);
291 if (o) {
292 long w = PyInt_AsLong(o);
293 w_byte(TYPE_STRINGREF, p);
294 w_long(w, p);
295 goto exit;
297 else {
298 int ok;
299 o = PyInt_FromSsize_t(PyDict_Size(p->strings));
300 ok = o &&
301 PyDict_SetItem(p->strings, v, o) >= 0;
302 Py_XDECREF(o);
303 if (!ok) {
304 p->depth--;
305 p->error = 1;
306 return;
308 w_byte(TYPE_INTERNED, p);
311 else {
312 w_byte(TYPE_STRING, p);
314 n = PyString_GET_SIZE(v);
315 if (n > INT_MAX) {
316 /* huge strings are not supported */
317 p->depth--;
318 p->error = 1;
319 return;
321 w_long((long)n, p);
322 w_string(PyString_AS_STRING(v), (int)n, p);
324 #ifdef Py_USING_UNICODE
325 else if (PyUnicode_CheckExact(v)) {
326 PyObject *utf8;
327 utf8 = PyUnicode_AsUTF8String(v);
328 if (utf8 == NULL) {
329 p->depth--;
330 p->error = 1;
331 return;
333 w_byte(TYPE_UNICODE, p);
334 n = PyString_GET_SIZE(utf8);
335 if (n > INT_MAX) {
336 p->depth--;
337 p->error = 1;
338 return;
340 w_long((long)n, p);
341 w_string(PyString_AS_STRING(utf8), (int)n, p);
342 Py_DECREF(utf8);
344 #endif
345 else if (PyTuple_CheckExact(v)) {
346 w_byte(TYPE_TUPLE, p);
347 n = PyTuple_Size(v);
348 w_long((long)n, p);
349 for (i = 0; i < n; i++) {
350 w_object(PyTuple_GET_ITEM(v, i), p);
353 else if (PyList_CheckExact(v)) {
354 w_byte(TYPE_LIST, p);
355 n = PyList_GET_SIZE(v);
356 w_long((long)n, p);
357 for (i = 0; i < n; i++) {
358 w_object(PyList_GET_ITEM(v, i), p);
361 else if (PyDict_CheckExact(v)) {
362 Py_ssize_t pos;
363 PyObject *key, *value;
364 w_byte(TYPE_DICT, p);
365 /* This one is NULL object terminated! */
366 pos = 0;
367 while (PyDict_Next(v, &pos, &key, &value)) {
368 w_object(key, p);
369 w_object(value, p);
371 w_object((PyObject *)NULL, p);
373 else if (PyAnySet_CheckExact(v)) {
374 PyObject *value, *it;
376 if (PyObject_TypeCheck(v, &PySet_Type))
377 w_byte(TYPE_SET, p);
378 else
379 w_byte(TYPE_FROZENSET, p);
380 n = PyObject_Size(v);
381 if (n == -1) {
382 p->depth--;
383 p->error = 1;
384 return;
386 w_long((long)n, p);
387 it = PyObject_GetIter(v);
388 if (it == NULL) {
389 p->depth--;
390 p->error = 1;
391 return;
393 while ((value = PyIter_Next(it)) != NULL) {
394 w_object(value, p);
395 Py_DECREF(value);
397 Py_DECREF(it);
398 if (PyErr_Occurred()) {
399 p->depth--;
400 p->error = 1;
401 return;
404 else if (PyCode_Check(v)) {
405 PyCodeObject *co = (PyCodeObject *)v;
406 w_byte(TYPE_CODE, p);
407 w_long(co->co_argcount, p);
408 w_long(co->co_nlocals, p);
409 w_long(co->co_stacksize, p);
410 w_long(co->co_flags, p);
411 w_object(co->co_code, p);
412 w_object(co->co_consts, p);
413 w_object(co->co_names, p);
414 w_object(co->co_varnames, p);
415 w_object(co->co_freevars, p);
416 w_object(co->co_cellvars, p);
417 w_object(co->co_filename, p);
418 w_object(co->co_name, p);
419 w_long(co->co_firstlineno, p);
420 w_object(co->co_lnotab, p);
422 else if (PyObject_CheckReadBuffer(v)) {
423 /* Write unknown buffer-style objects as a string */
424 char *s;
425 PyBufferProcs *pb = v->ob_type->tp_as_buffer;
426 w_byte(TYPE_STRING, p);
427 n = (*pb->bf_getreadbuffer)(v, 0, (void **)&s);
428 if (n > INT_MAX) {
429 p->depth--;
430 p->error = 1;
431 return;
433 w_long((long)n, p);
434 w_string(s, (int)n, p);
436 else {
437 w_byte(TYPE_UNKNOWN, p);
438 p->error = 1;
440 exit:
441 p->depth--;
444 /* version currently has no effect for writing longs. */
445 void
446 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
448 WFILE wf;
449 wf.fp = fp;
450 wf.error = 0;
451 wf.depth = 0;
452 wf.strings = NULL;
453 wf.version = version;
454 w_long(x, &wf);
457 void
458 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
460 WFILE wf;
461 wf.fp = fp;
462 wf.error = 0;
463 wf.depth = 0;
464 wf.strings = (version > 0) ? PyDict_New() : NULL;
465 wf.version = version;
466 w_object(x, &wf);
467 Py_XDECREF(wf.strings);
470 typedef WFILE RFILE; /* Same struct with different invariants */
472 #define rs_byte(p) (((p)->ptr < (p)->end) ? (unsigned char)*(p)->ptr++ : EOF)
474 #define r_byte(p) ((p)->fp ? getc((p)->fp) : rs_byte(p))
476 static int
477 r_string(char *s, int n, RFILE *p)
479 if (p->fp != NULL)
480 /* The result fits into int because it must be <=n. */
481 return (int)fread(s, 1, n, p->fp);
482 if (p->end - p->ptr < n)
483 n = (int)(p->end - p->ptr);
484 memcpy(s, p->ptr, n);
485 p->ptr += n;
486 return n;
489 static int
490 r_short(RFILE *p)
492 register short x;
493 x = r_byte(p);
494 x |= r_byte(p) << 8;
495 /* Sign-extension, in case short greater than 16 bits */
496 x |= -(x & 0x8000);
497 return x;
500 static long
501 r_long(RFILE *p)
503 register long x;
504 register FILE *fp = p->fp;
505 if (fp) {
506 x = getc(fp);
507 x |= (long)getc(fp) << 8;
508 x |= (long)getc(fp) << 16;
509 x |= (long)getc(fp) << 24;
511 else {
512 x = rs_byte(p);
513 x |= (long)rs_byte(p) << 8;
514 x |= (long)rs_byte(p) << 16;
515 x |= (long)rs_byte(p) << 24;
517 #if SIZEOF_LONG > 4
518 /* Sign extension for 64-bit machines */
519 x |= -(x & 0x80000000L);
520 #endif
521 return x;
524 /* r_long64 deals with the TYPE_INT64 code. On a machine with
525 sizeof(long) > 4, it returns a Python int object, else a Python long
526 object. Note that w_long64 writes out TYPE_INT if 32 bits is enough,
527 so there's no inefficiency here in returning a PyLong on 32-bit boxes
528 for everything written via TYPE_INT64 (i.e., if an int is written via
529 TYPE_INT64, it *needs* more than 32 bits).
531 static PyObject *
532 r_long64(RFILE *p)
534 long lo4 = r_long(p);
535 long hi4 = r_long(p);
536 #if SIZEOF_LONG > 4
537 long x = (hi4 << 32) | (lo4 & 0xFFFFFFFFL);
538 return PyInt_FromLong(x);
539 #else
540 unsigned char buf[8];
541 int one = 1;
542 int is_little_endian = (int)*(char*)&one;
543 if (is_little_endian) {
544 memcpy(buf, &lo4, 4);
545 memcpy(buf+4, &hi4, 4);
547 else {
548 memcpy(buf, &hi4, 4);
549 memcpy(buf+4, &lo4, 4);
551 return _PyLong_FromByteArray(buf, 8, is_little_endian, 1);
552 #endif
555 static PyObject *
556 r_PyLong(RFILE *p)
558 PyLongObject *ob;
559 int size, i, j, md, shorts_in_top_digit;
560 long n;
561 digit d;
563 n = r_long(p);
564 if (n == 0)
565 return (PyObject *)_PyLong_New(0);
566 if (n < -INT_MAX || n > INT_MAX) {
567 PyErr_SetString(PyExc_ValueError,
568 "bad marshal data (long size out of range)");
569 return NULL;
572 size = 1 + (ABS(n) - 1) / PyLong_MARSHAL_RATIO;
573 shorts_in_top_digit = 1 + (ABS(n) - 1) % PyLong_MARSHAL_RATIO;
574 ob = _PyLong_New(size);
575 if (ob == NULL)
576 return NULL;
577 Py_SIZE(ob) = n > 0 ? size : -size;
579 for (i = 0; i < size-1; i++) {
580 d = 0;
581 for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
582 md = r_short(p);
583 if (md < 0 || md > PyLong_MARSHAL_BASE)
584 goto bad_digit;
585 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
587 ob->ob_digit[i] = d;
589 d = 0;
590 for (j=0; j < shorts_in_top_digit; j++) {
591 md = r_short(p);
592 if (md < 0 || md > PyLong_MARSHAL_BASE)
593 goto bad_digit;
594 /* topmost marshal digit should be nonzero */
595 if (md == 0 && j == shorts_in_top_digit - 1) {
596 Py_DECREF(ob);
597 PyErr_SetString(PyExc_ValueError,
598 "bad marshal data (unnormalized long data)");
599 return NULL;
601 d += (digit)md << j*PyLong_MARSHAL_SHIFT;
603 /* top digit should be nonzero, else the resulting PyLong won't be
604 normalized */
605 ob->ob_digit[size-1] = d;
606 return (PyObject *)ob;
607 bad_digit:
608 Py_DECREF(ob);
609 PyErr_SetString(PyExc_ValueError,
610 "bad marshal data (digit out of range in long)");
611 return NULL;
615 static PyObject *
616 r_object(RFILE *p)
618 /* NULL is a valid return value, it does not necessarily means that
619 an exception is set. */
620 PyObject *v, *v2;
621 long i, n;
622 int type = r_byte(p);
623 PyObject *retval;
625 p->depth++;
627 if (p->depth > MAX_MARSHAL_STACK_DEPTH) {
628 p->depth--;
629 PyErr_SetString(PyExc_ValueError, "recursion limit exceeded");
630 return NULL;
633 switch (type) {
635 case EOF:
636 PyErr_SetString(PyExc_EOFError,
637 "EOF read where object expected");
638 retval = NULL;
639 break;
641 case TYPE_NULL:
642 retval = NULL;
643 break;
645 case TYPE_NONE:
646 Py_INCREF(Py_None);
647 retval = Py_None;
648 break;
650 case TYPE_STOPITER:
651 Py_INCREF(PyExc_StopIteration);
652 retval = PyExc_StopIteration;
653 break;
655 case TYPE_ELLIPSIS:
656 Py_INCREF(Py_Ellipsis);
657 retval = Py_Ellipsis;
658 break;
660 case TYPE_FALSE:
661 Py_INCREF(Py_False);
662 retval = Py_False;
663 break;
665 case TYPE_TRUE:
666 Py_INCREF(Py_True);
667 retval = Py_True;
668 break;
670 case TYPE_INT:
671 retval = PyInt_FromLong(r_long(p));
672 break;
674 case TYPE_INT64:
675 retval = r_long64(p);
676 break;
678 case TYPE_LONG:
679 retval = r_PyLong(p);
680 break;
682 case TYPE_FLOAT:
684 char buf[256];
685 double dx;
686 n = r_byte(p);
687 if (n == EOF || r_string(buf, (int)n, p) != n) {
688 PyErr_SetString(PyExc_EOFError,
689 "EOF read where object expected");
690 retval = NULL;
691 break;
693 buf[n] = '\0';
694 retval = NULL;
695 PyFPE_START_PROTECT("atof", break)
696 dx = PyOS_ascii_atof(buf);
697 PyFPE_END_PROTECT(dx)
698 retval = PyFloat_FromDouble(dx);
699 break;
702 case TYPE_BINARY_FLOAT:
704 unsigned char buf[8];
705 double x;
706 if (r_string((char*)buf, 8, p) != 8) {
707 PyErr_SetString(PyExc_EOFError,
708 "EOF read where object expected");
709 retval = NULL;
710 break;
712 x = _PyFloat_Unpack8(buf, 1);
713 if (x == -1.0 && PyErr_Occurred()) {
714 retval = NULL;
715 break;
717 retval = PyFloat_FromDouble(x);
718 break;
721 #ifndef WITHOUT_COMPLEX
722 case TYPE_COMPLEX:
724 char buf[256];
725 Py_complex c;
726 n = r_byte(p);
727 if (n == EOF || r_string(buf, (int)n, p) != n) {
728 PyErr_SetString(PyExc_EOFError,
729 "EOF read where object expected");
730 retval = NULL;
731 break;
733 buf[n] = '\0';
734 retval = NULL;
735 PyFPE_START_PROTECT("atof", break;)
736 c.real = PyOS_ascii_atof(buf);
737 PyFPE_END_PROTECT(c)
738 n = r_byte(p);
739 if (n == EOF || r_string(buf, (int)n, p) != n) {
740 PyErr_SetString(PyExc_EOFError,
741 "EOF read where object expected");
742 retval = NULL;
743 break;
745 buf[n] = '\0';
746 PyFPE_START_PROTECT("atof", break)
747 c.imag = PyOS_ascii_atof(buf);
748 PyFPE_END_PROTECT(c)
749 retval = PyComplex_FromCComplex(c);
750 break;
753 case TYPE_BINARY_COMPLEX:
755 unsigned char buf[8];
756 Py_complex c;
757 if (r_string((char*)buf, 8, p) != 8) {
758 PyErr_SetString(PyExc_EOFError,
759 "EOF read where object expected");
760 retval = NULL;
761 break;
763 c.real = _PyFloat_Unpack8(buf, 1);
764 if (c.real == -1.0 && PyErr_Occurred()) {
765 retval = NULL;
766 break;
768 if (r_string((char*)buf, 8, p) != 8) {
769 PyErr_SetString(PyExc_EOFError,
770 "EOF read where object expected");
771 retval = NULL;
772 break;
774 c.imag = _PyFloat_Unpack8(buf, 1);
775 if (c.imag == -1.0 && PyErr_Occurred()) {
776 retval = NULL;
777 break;
779 retval = PyComplex_FromCComplex(c);
780 break;
782 #endif
784 case TYPE_INTERNED:
785 case TYPE_STRING:
786 n = r_long(p);
787 if (n < 0 || n > INT_MAX) {
788 PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
789 retval = NULL;
790 break;
792 v = PyString_FromStringAndSize((char *)NULL, n);
793 if (v == NULL) {
794 retval = NULL;
795 break;
797 if (r_string(PyString_AS_STRING(v), (int)n, p) != n) {
798 Py_DECREF(v);
799 PyErr_SetString(PyExc_EOFError,
800 "EOF read where object expected");
801 retval = NULL;
802 break;
804 if (type == TYPE_INTERNED) {
805 PyString_InternInPlace(&v);
806 if (PyList_Append(p->strings, v) < 0) {
807 retval = NULL;
808 break;
811 retval = v;
812 break;
814 case TYPE_STRINGREF:
815 n = r_long(p);
816 if (n < 0 || n >= PyList_GET_SIZE(p->strings)) {
817 PyErr_SetString(PyExc_ValueError, "bad marshal data (string ref out of range)");
818 retval = NULL;
819 break;
821 v = PyList_GET_ITEM(p->strings, n);
822 Py_INCREF(v);
823 retval = v;
824 break;
826 #ifdef Py_USING_UNICODE
827 case TYPE_UNICODE:
829 char *buffer;
831 n = r_long(p);
832 if (n < 0 || n > INT_MAX) {
833 PyErr_SetString(PyExc_ValueError, "bad marshal data (unicode size out of range)");
834 retval = NULL;
835 break;
837 buffer = PyMem_NEW(char, n);
838 if (buffer == NULL) {
839 retval = PyErr_NoMemory();
840 break;
842 if (r_string(buffer, (int)n, p) != n) {
843 PyMem_DEL(buffer);
844 PyErr_SetString(PyExc_EOFError,
845 "EOF read where object expected");
846 retval = NULL;
847 break;
849 v = PyUnicode_DecodeUTF8(buffer, n, NULL);
850 PyMem_DEL(buffer);
851 retval = v;
852 break;
854 #endif
856 case TYPE_TUPLE:
857 n = r_long(p);
858 if (n < 0 || n > INT_MAX) {
859 PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
860 retval = NULL;
861 break;
863 v = PyTuple_New((int)n);
864 if (v == NULL) {
865 retval = NULL;
866 break;
868 for (i = 0; i < n; i++) {
869 v2 = r_object(p);
870 if ( v2 == NULL ) {
871 if (!PyErr_Occurred())
872 PyErr_SetString(PyExc_TypeError,
873 "NULL object in marshal data for tuple");
874 Py_DECREF(v);
875 v = NULL;
876 break;
878 PyTuple_SET_ITEM(v, (int)i, v2);
880 retval = v;
881 break;
883 case TYPE_LIST:
884 n = r_long(p);
885 if (n < 0 || n > INT_MAX) {
886 PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
887 retval = NULL;
888 break;
890 v = PyList_New((int)n);
891 if (v == NULL) {
892 retval = NULL;
893 break;
895 for (i = 0; i < n; i++) {
896 v2 = r_object(p);
897 if ( v2 == NULL ) {
898 if (!PyErr_Occurred())
899 PyErr_SetString(PyExc_TypeError,
900 "NULL object in marshal data for list");
901 Py_DECREF(v);
902 v = NULL;
903 break;
905 PyList_SET_ITEM(v, (int)i, v2);
907 retval = v;
908 break;
910 case TYPE_DICT:
911 v = PyDict_New();
912 if (v == NULL) {
913 retval = NULL;
914 break;
916 for (;;) {
917 PyObject *key, *val;
918 key = r_object(p);
919 if (key == NULL)
920 break;
921 val = r_object(p);
922 if (val != NULL)
923 PyDict_SetItem(v, key, val);
924 Py_DECREF(key);
925 Py_XDECREF(val);
927 if (PyErr_Occurred()) {
928 Py_DECREF(v);
929 v = NULL;
931 retval = v;
932 break;
934 case TYPE_SET:
935 case TYPE_FROZENSET:
936 n = r_long(p);
937 if (n < 0 || n > INT_MAX) {
938 PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
939 retval = NULL;
940 break;
942 v = (type == TYPE_SET) ? PySet_New(NULL) : PyFrozenSet_New(NULL);
943 if (v == NULL) {
944 retval = NULL;
945 break;
947 for (i = 0; i < n; i++) {
948 v2 = r_object(p);
949 if ( v2 == NULL ) {
950 if (!PyErr_Occurred())
951 PyErr_SetString(PyExc_TypeError,
952 "NULL object in marshal data for set");
953 Py_DECREF(v);
954 v = NULL;
955 break;
957 if (PySet_Add(v, v2) == -1) {
958 Py_DECREF(v);
959 Py_DECREF(v2);
960 v = NULL;
961 break;
963 Py_DECREF(v2);
965 retval = v;
966 break;
968 case TYPE_CODE:
969 if (PyEval_GetRestricted()) {
970 PyErr_SetString(PyExc_RuntimeError,
971 "cannot unmarshal code objects in "
972 "restricted execution mode");
973 retval = NULL;
974 break;
976 else {
977 int argcount;
978 int nlocals;
979 int stacksize;
980 int flags;
981 PyObject *code = NULL;
982 PyObject *consts = NULL;
983 PyObject *names = NULL;
984 PyObject *varnames = NULL;
985 PyObject *freevars = NULL;
986 PyObject *cellvars = NULL;
987 PyObject *filename = NULL;
988 PyObject *name = NULL;
989 int firstlineno;
990 PyObject *lnotab = NULL;
992 v = NULL;
994 /* XXX ignore long->int overflows for now */
995 argcount = (int)r_long(p);
996 nlocals = (int)r_long(p);
997 stacksize = (int)r_long(p);
998 flags = (int)r_long(p);
999 code = r_object(p);
1000 if (code == NULL)
1001 goto code_error;
1002 consts = r_object(p);
1003 if (consts == NULL)
1004 goto code_error;
1005 names = r_object(p);
1006 if (names == NULL)
1007 goto code_error;
1008 varnames = r_object(p);
1009 if (varnames == NULL)
1010 goto code_error;
1011 freevars = r_object(p);
1012 if (freevars == NULL)
1013 goto code_error;
1014 cellvars = r_object(p);
1015 if (cellvars == NULL)
1016 goto code_error;
1017 filename = r_object(p);
1018 if (filename == NULL)
1019 goto code_error;
1020 name = r_object(p);
1021 if (name == NULL)
1022 goto code_error;
1023 firstlineno = (int)r_long(p);
1024 lnotab = r_object(p);
1025 if (lnotab == NULL)
1026 goto code_error;
1028 v = (PyObject *) PyCode_New(
1029 argcount, nlocals, stacksize, flags,
1030 code, consts, names, varnames,
1031 freevars, cellvars, filename, name,
1032 firstlineno, lnotab);
1034 code_error:
1035 Py_XDECREF(code);
1036 Py_XDECREF(consts);
1037 Py_XDECREF(names);
1038 Py_XDECREF(varnames);
1039 Py_XDECREF(freevars);
1040 Py_XDECREF(cellvars);
1041 Py_XDECREF(filename);
1042 Py_XDECREF(name);
1043 Py_XDECREF(lnotab);
1046 retval = v;
1047 break;
1049 default:
1050 /* Bogus data got written, which isn't ideal.
1051 This will let you keep working and recover. */
1052 PyErr_SetString(PyExc_ValueError, "bad marshal data (unknown type code)");
1053 retval = NULL;
1054 break;
1057 p->depth--;
1058 return retval;
1061 static PyObject *
1062 read_object(RFILE *p)
1064 PyObject *v;
1065 if (PyErr_Occurred()) {
1066 fprintf(stderr, "XXX readobject called with exception set\n");
1067 return NULL;
1069 v = r_object(p);
1070 if (v == NULL && !PyErr_Occurred())
1071 PyErr_SetString(PyExc_TypeError, "NULL object in marshal data for object");
1072 return v;
1076 PyMarshal_ReadShortFromFile(FILE *fp)
1078 RFILE rf;
1079 assert(fp);
1080 rf.fp = fp;
1081 rf.strings = NULL;
1082 rf.end = rf.ptr = NULL;
1083 return r_short(&rf);
1086 long
1087 PyMarshal_ReadLongFromFile(FILE *fp)
1089 RFILE rf;
1090 rf.fp = fp;
1091 rf.strings = NULL;
1092 rf.ptr = rf.end = NULL;
1093 return r_long(&rf);
1096 #ifdef HAVE_FSTAT
1097 /* Return size of file in bytes; < 0 if unknown. */
1098 static off_t
1099 getfilesize(FILE *fp)
1101 struct stat st;
1102 if (fstat(fileno(fp), &st) != 0)
1103 return -1;
1104 else
1105 return st.st_size;
1107 #endif
1109 /* If we can get the size of the file up-front, and it's reasonably small,
1110 * read it in one gulp and delegate to ...FromString() instead. Much quicker
1111 * than reading a byte at a time from file; speeds .pyc imports.
1112 * CAUTION: since this may read the entire remainder of the file, don't
1113 * call it unless you know you're done with the file.
1115 PyObject *
1116 PyMarshal_ReadLastObjectFromFile(FILE *fp)
1118 /* 75% of 2.1's .pyc files can exploit SMALL_FILE_LIMIT.
1119 * REASONABLE_FILE_LIMIT is by defn something big enough for Tkinter.pyc.
1121 #define SMALL_FILE_LIMIT (1L << 14)
1122 #define REASONABLE_FILE_LIMIT (1L << 18)
1123 #ifdef HAVE_FSTAT
1124 off_t filesize;
1125 #endif
1126 #ifdef HAVE_FSTAT
1127 filesize = getfilesize(fp);
1128 if (filesize > 0) {
1129 char buf[SMALL_FILE_LIMIT];
1130 char* pBuf = NULL;
1131 if (filesize <= SMALL_FILE_LIMIT)
1132 pBuf = buf;
1133 else if (filesize <= REASONABLE_FILE_LIMIT)
1134 pBuf = (char *)PyMem_MALLOC(filesize);
1135 if (pBuf != NULL) {
1136 PyObject* v;
1137 size_t n;
1138 /* filesize must fit into an int, because it
1139 is smaller than REASONABLE_FILE_LIMIT */
1140 n = fread(pBuf, 1, (int)filesize, fp);
1141 v = PyMarshal_ReadObjectFromString(pBuf, n);
1142 if (pBuf != buf)
1143 PyMem_FREE(pBuf);
1144 return v;
1148 #endif
1149 /* We don't have fstat, or we do but the file is larger than
1150 * REASONABLE_FILE_LIMIT or malloc failed -- read a byte at a time.
1152 return PyMarshal_ReadObjectFromFile(fp);
1154 #undef SMALL_FILE_LIMIT
1155 #undef REASONABLE_FILE_LIMIT
1158 PyObject *
1159 PyMarshal_ReadObjectFromFile(FILE *fp)
1161 RFILE rf;
1162 PyObject *result;
1163 rf.fp = fp;
1164 rf.strings = PyList_New(0);
1165 rf.depth = 0;
1166 rf.ptr = rf.end = NULL;
1167 result = r_object(&rf);
1168 Py_DECREF(rf.strings);
1169 return result;
1172 PyObject *
1173 PyMarshal_ReadObjectFromString(char *str, Py_ssize_t len)
1175 RFILE rf;
1176 PyObject *result;
1177 rf.fp = NULL;
1178 rf.ptr = str;
1179 rf.end = str + len;
1180 rf.strings = PyList_New(0);
1181 rf.depth = 0;
1182 result = r_object(&rf);
1183 Py_DECREF(rf.strings);
1184 return result;
1187 PyObject *
1188 PyMarshal_WriteObjectToString(PyObject *x, int version)
1190 WFILE wf;
1191 wf.fp = NULL;
1192 wf.str = PyString_FromStringAndSize((char *)NULL, 50);
1193 if (wf.str == NULL)
1194 return NULL;
1195 wf.ptr = PyString_AS_STRING((PyStringObject *)wf.str);
1196 wf.end = wf.ptr + PyString_Size(wf.str);
1197 wf.error = 0;
1198 wf.depth = 0;
1199 wf.version = version;
1200 wf.strings = (version > 0) ? PyDict_New() : NULL;
1201 w_object(x, &wf);
1202 Py_XDECREF(wf.strings);
1203 if (wf.str != NULL) {
1204 char *base = PyString_AS_STRING((PyStringObject *)wf.str);
1205 if (wf.ptr - base > PY_SSIZE_T_MAX) {
1206 Py_DECREF(wf.str);
1207 PyErr_SetString(PyExc_OverflowError,
1208 "too much marshall data for a string");
1209 return NULL;
1211 _PyString_Resize(&wf.str, (Py_ssize_t)(wf.ptr - base));
1213 if (wf.error) {
1214 Py_XDECREF(wf.str);
1215 PyErr_SetString(PyExc_ValueError,
1216 (wf.error==1)?"unmarshallable object"
1217 :"object too deeply nested to marshal");
1218 return NULL;
1220 return wf.str;
1223 /* And an interface for Python programs... */
1225 static PyObject *
1226 marshal_dump(PyObject *self, PyObject *args)
1228 WFILE wf;
1229 PyObject *x;
1230 PyObject *f;
1231 int version = Py_MARSHAL_VERSION;
1232 if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version))
1233 return NULL;
1234 if (!PyFile_Check(f)) {
1235 PyErr_SetString(PyExc_TypeError,
1236 "marshal.dump() 2nd arg must be file");
1237 return NULL;
1239 wf.fp = PyFile_AsFile(f);
1240 wf.str = NULL;
1241 wf.ptr = wf.end = NULL;
1242 wf.error = 0;
1243 wf.depth = 0;
1244 wf.strings = (version > 0) ? PyDict_New() : 0;
1245 wf.version = version;
1246 w_object(x, &wf);
1247 Py_XDECREF(wf.strings);
1248 if (wf.error) {
1249 PyErr_SetString(PyExc_ValueError,
1250 (wf.error==1)?"unmarshallable object"
1251 :"object too deeply nested to marshal");
1252 return NULL;
1254 Py_INCREF(Py_None);
1255 return Py_None;
1258 PyDoc_STRVAR(dump_doc,
1259 "dump(value, file[, version])\n\
1261 Write the value on the open file. The value must be a supported type.\n\
1262 The file must be an open file object such as sys.stdout or returned by\n\
1263 open() or os.popen(). It must be opened in binary mode ('wb' or 'w+b').\n\
1265 If the value has (or contains an object that has) an unsupported type, a\n\
1266 ValueError exception is raised — but garbage data will also be written\n\
1267 to the file. The object will not be properly read back by load()\n\
1269 New in version 2.4: The version argument indicates the data format that\n\
1270 dump should use.");
1272 static PyObject *
1273 marshal_load(PyObject *self, PyObject *f)
1275 RFILE rf;
1276 PyObject *result;
1277 if (!PyFile_Check(f)) {
1278 PyErr_SetString(PyExc_TypeError,
1279 "marshal.load() arg must be file");
1280 return NULL;
1282 rf.fp = PyFile_AsFile(f);
1283 rf.strings = PyList_New(0);
1284 rf.depth = 0;
1285 result = read_object(&rf);
1286 Py_DECREF(rf.strings);
1287 return result;
1290 PyDoc_STRVAR(load_doc,
1291 "load(file)\n\
1293 Read one value from the open file and return it. If no valid value is\n\
1294 read (e.g. because the data has a different Python version’s\n\
1295 incompatible marshal format), raise EOFError, ValueError or TypeError.\n\
1296 The file must be an open file object opened in binary mode ('rb' or\n\
1297 'r+b').\n\
1299 Note: If an object containing an unsupported type was marshalled with\n\
1300 dump(), load() will substitute None for the unmarshallable type.");
1303 static PyObject *
1304 marshal_dumps(PyObject *self, PyObject *args)
1306 PyObject *x;
1307 int version = Py_MARSHAL_VERSION;
1308 if (!PyArg_ParseTuple(args, "O|i:dumps", &x, &version))
1309 return NULL;
1310 return PyMarshal_WriteObjectToString(x, version);
1313 PyDoc_STRVAR(dumps_doc,
1314 "dumps(value[, version])\n\
1316 Return the string that would be written to a file by dump(value, file).\n\
1317 The value must be a supported type. Raise a ValueError exception if\n\
1318 value has (or contains an object that has) an unsupported type.\n\
1320 New in version 2.4: The version argument indicates the data format that\n\
1321 dumps should use.");
1324 static PyObject *
1325 marshal_loads(PyObject *self, PyObject *args)
1327 RFILE rf;
1328 char *s;
1329 Py_ssize_t n;
1330 PyObject* result;
1331 if (!PyArg_ParseTuple(args, "s#:loads", &s, &n))
1332 return NULL;
1333 rf.fp = NULL;
1334 rf.ptr = s;
1335 rf.end = s + n;
1336 rf.strings = PyList_New(0);
1337 rf.depth = 0;
1338 result = read_object(&rf);
1339 Py_DECREF(rf.strings);
1340 return result;
1343 PyDoc_STRVAR(loads_doc,
1344 "loads(string)\n\
1346 Convert the string to a value. If no valid value is found, raise\n\
1347 EOFError, ValueError or TypeError. Extra characters in the string are\n\
1348 ignored.");
1350 static PyMethodDef marshal_methods[] = {
1351 {"dump", marshal_dump, METH_VARARGS, dump_doc},
1352 {"load", marshal_load, METH_O, load_doc},
1353 {"dumps", marshal_dumps, METH_VARARGS, dumps_doc},
1354 {"loads", marshal_loads, METH_VARARGS, loads_doc},
1355 {NULL, NULL} /* sentinel */
1358 PyDoc_STRVAR(marshal_doc,
1359 "This module contains functions that can read and write Python values in\n\
1360 a binary format. The format is specific to Python, but independent of\n\
1361 machine architecture issues.\n\
1363 Not all Python object types are supported; in general, only objects\n\
1364 whose value is independent from a particular invocation of Python can be\n\
1365 written and read by this module. The following types are supported:\n\
1366 None, integers, long integers, floating point numbers, strings, Unicode\n\
1367 objects, tuples, lists, sets, dictionaries, and code objects, where it\n\
1368 should be understood that tuples, lists and dictionaries are only\n\
1369 supported as long as the values contained therein are themselves\n\
1370 supported; and recursive lists and dictionaries should not be written\n\
1371 (they will cause infinite loops).\n\
1373 Variables:\n\
1375 version -- indicates the format that the module uses. Version 0 is the\n\
1376 historical format, version 1 (added in Python 2.4) shares interned\n\
1377 strings and version 2 (added in Python 2.5) uses a binary format for\n\
1378 floating point numbers. (New in version 2.4)\n\
1380 Functions:\n\
1382 dump() -- write value to a file\n\
1383 load() -- read value from a file\n\
1384 dumps() -- write value to a string\n\
1385 loads() -- read value from a string");
1388 PyMODINIT_FUNC
1389 PyMarshal_Init(void)
1391 PyObject *mod = Py_InitModule3("marshal", marshal_methods,
1392 marshal_doc);
1393 if (mod == NULL)
1394 return;
1395 PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION);