1 // Copyright (c) 2006- Facebook
2 // Distributed under the Thrift Software License
4 // See accompanying file LICENSE or visit the Thrift site at:
5 // http://developers.facebook.com/thrift/
7 // NOTE: This code was contributed by an external developer.
8 // The internal Thrift team has reviewed and tested it,
9 // but we cannot guarantee that it is production-ready.
10 // Please feel free to report bugs and/or success stories
11 // to the public mailing list.
14 #include "cStringIO.h"
17 #include <netinet/in.h>
19 // TODO(dreiss): defval appears to be unused. Look into removing it.
20 // TODO(dreiss): Make parse_spec_args recursive, and cache the output
21 // permanently in the object. (Malloc and orphan.)
22 // TODO(dreiss): Why do we need cStringIO for reading, why not just char*?
23 // Can cStringIO let us work with a BufferedTransport?
24 // TODO(dreiss): Don't ignore the rv from cwrite (maybe).
26 /* ====== BEGIN UTILITIES ====== */
28 #define INIT_OUTBUF_SIZE 128
30 // Stolen out of TProtocol.h.
31 // It would be a huge pain to have both get this from one place.
53 // Same comment as the enum. Sorry.
54 #if __BYTE_ORDER == __BIG_ENDIAN
55 # define ntohll(n) (n)
56 # define htonll(n) (n)
57 #elif __BYTE_ORDER == __LITTLE_ENDIAN
58 # if defined(__GNUC__) && defined(__GLIBC__)
59 # include <byteswap.h>
60 # define ntohll(n) bswap_64(n)
61 # define htonll(n) bswap_64(n)
62 # else /* GNUC & GLIBC */
63 # define ntohll(n) ( (((unsigned long long)ntohl(n)) << 32) + ntohl(n >> 32) )
64 # define htonll(n) ( (((unsigned long long)htonl(n)) << 32) + htonl(n >> 32) )
65 # endif /* GNUC & GLIBC */
66 #else /* __BYTE_ORDER */
67 # error "Can't define htonll or ntohll!"
70 // Doing a benchmark shows that interning actually makes a difference, amazingly.
71 #define INTERN_STRING(value) _intern_ ## value
73 #define INT_CONV_ERROR_OCCURRED(v) ( ((v) == -1) && PyErr_Occurred() )
74 #define CHECK_RANGE(v, min, max) ( ((v) <= (max)) && ((v) >= (min)) )
76 // Py_ssize_t was not defined before Python 2.5
77 #if (PY_VERSION_HEX < 0x02050000)
78 typedef int Py_ssize_t
;
82 * A cache of the spec_args for a set or list,
83 * so we don't have to keep calling PyTuple_GET_ITEM.
91 * A cache of the spec_args for a map,
92 * so we don't have to keep calling PyTuple_GET_ITEM.
102 * A cache of the spec_args for a struct,
103 * so we don't have to keep calling PyTuple_GET_ITEM.
111 * A cache of the item spec from a struct specification,
112 * so we don't have to keep calling PyTuple_GET_ITEM.
123 * A cache of the two key attributes of a CReadableTransport,
124 * so we don't have to keep calling PyObject_GetAttr.
127 PyObject
* stringiobuf
;
128 PyObject
* refill_callable
;
131 /** Pointer to interned string to speed up attribute lookup. */
132 static PyObject
* INTERN_STRING(cstringio_buf
);
133 /** Pointer to interned string to speed up attribute lookup. */
134 static PyObject
* INTERN_STRING(cstringio_refill
);
137 check_ssize_t_32(Py_ssize_t len
) {
138 // error from getting the int
139 if (INT_CONV_ERROR_OCCURRED(len
)) {
142 if (!CHECK_RANGE(len
, 0, INT32_MAX
)) {
143 PyErr_SetString(PyExc_OverflowError
, "string size out of range");
150 parse_pyint(PyObject
* o
, int32_t* ret
, int32_t min
, int32_t max
) {
151 long val
= PyInt_AsLong(o
);
153 if (INT_CONV_ERROR_OCCURRED(val
)) {
156 if (!CHECK_RANGE(val
, min
, max
)) {
157 PyErr_SetString(PyExc_OverflowError
, "int out of range");
161 *ret
= (int32_t) val
;
166 /* --- FUNCTIONS TO PARSE STRUCT SPECIFICATOINS --- */
169 parse_set_list_args(SetListTypeArgs
* dest
, PyObject
* typeargs
) {
170 if (PyTuple_Size(typeargs
) != 2) {
171 PyErr_SetString(PyExc_TypeError
, "expecting tuple of size 2 for list/set type args");
175 dest
->element_type
= PyInt_AsLong(PyTuple_GET_ITEM(typeargs
, 0));
176 if (INT_CONV_ERROR_OCCURRED(dest
->element_type
)) {
180 dest
->typeargs
= PyTuple_GET_ITEM(typeargs
, 1);
186 parse_map_args(MapTypeArgs
* dest
, PyObject
* typeargs
) {
187 if (PyTuple_Size(typeargs
) != 4) {
188 PyErr_SetString(PyExc_TypeError
, "expecting 4 arguments for typeargs to map");
192 dest
->ktag
= PyInt_AsLong(PyTuple_GET_ITEM(typeargs
, 0));
193 if (INT_CONV_ERROR_OCCURRED(dest
->ktag
)) {
197 dest
->vtag
= PyInt_AsLong(PyTuple_GET_ITEM(typeargs
, 2));
198 if (INT_CONV_ERROR_OCCURRED(dest
->vtag
)) {
202 dest
->ktypeargs
= PyTuple_GET_ITEM(typeargs
, 1);
203 dest
->vtypeargs
= PyTuple_GET_ITEM(typeargs
, 3);
209 parse_struct_args(StructTypeArgs
* dest
, PyObject
* typeargs
) {
210 if (PyTuple_Size(typeargs
) != 2) {
211 PyErr_SetString(PyExc_TypeError
, "expecting tuple of size 2 for struct args");
215 dest
->klass
= PyTuple_GET_ITEM(typeargs
, 0);
216 dest
->spec
= PyTuple_GET_ITEM(typeargs
, 1);
222 parse_struct_item_spec(StructItemSpec
* dest
, PyObject
* spec_tuple
) {
224 // i'd like to use ParseArgs here, but it seems to be a bottleneck.
225 if (PyTuple_Size(spec_tuple
) != 5) {
226 PyErr_SetString(PyExc_TypeError
, "expecting 5 arguments for spec tuple");
230 dest
->tag
= PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple
, 0));
231 if (INT_CONV_ERROR_OCCURRED(dest
->tag
)) {
235 dest
->type
= PyInt_AsLong(PyTuple_GET_ITEM(spec_tuple
, 1));
236 if (INT_CONV_ERROR_OCCURRED(dest
->type
)) {
240 dest
->attrname
= PyTuple_GET_ITEM(spec_tuple
, 2);
241 dest
->typeargs
= PyTuple_GET_ITEM(spec_tuple
, 3);
242 dest
->defval
= PyTuple_GET_ITEM(spec_tuple
, 4);
246 /* ====== END UTILITIES ====== */
249 /* ====== BEGIN WRITING FUNCTIONS ====== */
251 /* --- LOW-LEVEL WRITING FUNCTIONS --- */
253 static void writeByte(PyObject
* outbuf
, int8_t val
) {
255 PycStringIO
->cwrite(outbuf
, (char*)&net
, sizeof(int8_t));
258 static void writeI16(PyObject
* outbuf
, int16_t val
) {
259 int16_t net
= (int16_t)htons(val
);
260 PycStringIO
->cwrite(outbuf
, (char*)&net
, sizeof(int16_t));
263 static void writeI32(PyObject
* outbuf
, int32_t val
) {
264 int32_t net
= (int32_t)htonl(val
);
265 PycStringIO
->cwrite(outbuf
, (char*)&net
, sizeof(int32_t));
268 static void writeI64(PyObject
* outbuf
, int64_t val
) {
269 int64_t net
= (int64_t)htonll(val
);
270 PycStringIO
->cwrite(outbuf
, (char*)&net
, sizeof(int64_t));
273 static void writeDouble(PyObject
* outbuf
, double dub
) {
274 // Unfortunately, bitwise_cast doesn't work in C. Bad C!
280 writeI64(outbuf
, transfer
.t
);
284 /* --- MAIN RECURSIVE OUTPUT FUCNTION -- */
287 output_val(PyObject
* output
, PyObject
* value
, TType type
, PyObject
* typeargs
) {
289 * Refcounting Strategy:
291 * We assume that elements of the thrift_spec tuple are not going to be
292 * mutated, so we don't ref count those at all. Other than that, we try to
293 * keep a reference to all the user-created objects while we work with them.
294 * output_val assumes that a reference is already held. The *caller* is
295 * responsible for handling references
301 int v
= PyObject_IsTrue(value
);
306 writeByte(output
, (int8_t) v
);
312 if (!parse_pyint(value
, &val
, INT8_MIN
, INT8_MAX
)) {
316 writeByte(output
, (int8_t) val
);
322 if (!parse_pyint(value
, &val
, INT16_MIN
, INT16_MAX
)) {
326 writeI16(output
, (int16_t) val
);
332 if (!parse_pyint(value
, &val
, INT32_MIN
, INT32_MAX
)) {
336 writeI32(output
, val
);
340 int64_t nval
= PyLong_AsLongLong(value
);
342 if (INT_CONV_ERROR_OCCURRED(nval
)) {
346 if (!CHECK_RANGE(nval
, INT64_MIN
, INT64_MAX
)) {
347 PyErr_SetString(PyExc_OverflowError
, "int out of range");
351 writeI64(output
, nval
);
356 double nval
= PyFloat_AsDouble(value
);
357 if (nval
== -1.0 && PyErr_Occurred()) {
361 writeDouble(output
, nval
);
366 Py_ssize_t len
= PyString_Size(value
);
368 if (!check_ssize_t_32(len
)) {
372 writeI32(output
, (int32_t) len
);
373 PycStringIO
->cwrite(output
, PyString_AsString(value
), (int32_t) len
);
380 SetListTypeArgs parsedargs
;
384 if (!parse_set_list_args(&parsedargs
, typeargs
)) {
388 len
= PyObject_Length(value
);
390 if (!check_ssize_t_32(len
)) {
394 writeByte(output
, parsedargs
.element_type
);
395 writeI32(output
, (int32_t) len
);
397 iterator
= PyObject_GetIter(value
);
398 if (iterator
== NULL
) {
402 while ((item
= PyIter_Next(iterator
))) {
403 if (!output_val(output
, item
, parsedargs
.element_type
, parsedargs
.typeargs
)) {
413 if (PyErr_Occurred()) {
425 MapTypeArgs parsedargs
;
427 len
= PyDict_Size(value
);
428 if (!check_ssize_t_32(len
)) {
432 if (!parse_map_args(&parsedargs
, typeargs
)) {
436 writeByte(output
, parsedargs
.ktag
);
437 writeByte(output
, parsedargs
.vtag
);
438 writeI32(output
, len
);
440 // TODO(bmaurer): should support any mapping, not just dicts
441 while (PyDict_Next(value
, &pos
, &k
, &v
)) {
442 // TODO(dreiss): Think hard about whether these INCREFs actually
443 // turn any unsafe scenarios into safe scenarios.
447 if (!output_val(output
, k
, parsedargs
.ktag
, parsedargs
.ktypeargs
)
448 || !output_val(output
, v
, parsedargs
.vtag
, parsedargs
.vtypeargs
)) {
457 // TODO(dreiss): Consider breaking this out as a function
458 // the way we did for decode_struct.
460 StructTypeArgs parsedargs
;
464 if (!parse_struct_args(&parsedargs
, typeargs
)) {
468 nspec
= PyTuple_Size(parsedargs
.spec
);
474 for (i
= 0; i
< nspec
; i
++) {
475 StructItemSpec parsedspec
;
476 PyObject
* spec_tuple
;
477 PyObject
* instval
= NULL
;
479 spec_tuple
= PyTuple_GET_ITEM(parsedargs
.spec
, i
);
480 if (spec_tuple
== Py_None
) {
484 if (!parse_struct_item_spec (&parsedspec
, spec_tuple
)) {
488 instval
= PyObject_GetAttr(value
, parsedspec
.attrname
);
494 if (instval
== Py_None
) {
499 writeByte(output
, (int8_t) parsedspec
.type
);
500 writeI16(output
, parsedspec
.tag
);
502 if (!output_val(output
, instval
, parsedspec
.type
, parsedspec
.typeargs
)) {
510 writeByte(output
, (int8_t)T_STOP
);
520 PyErr_SetString(PyExc_TypeError
, "Unexpected TType");
529 /* --- TOP-LEVEL WRAPPER FOR OUTPUT -- */
532 encode_binary(PyObject
*self
, PyObject
*args
) {
536 PyObject
* ret
= NULL
;
538 if (!PyArg_ParseTuple(args
, "OO", &enc_obj
, &type_args
)) {
542 buf
= PycStringIO
->NewOutput(INIT_OUTBUF_SIZE
);
543 if (output_val(buf
, enc_obj
, T_STRUCT
, type_args
)) {
544 ret
= PycStringIO
->cgetvalue(buf
);
551 /* ====== END WRITING FUNCTIONS ====== */
554 /* ====== BEGIN READING FUNCTIONS ====== */
556 /* --- LOW-LEVEL READING FUNCTIONS --- */
559 free_decodebuf(DecodeBuffer
* d
) {
560 Py_XDECREF(d
->stringiobuf
);
561 Py_XDECREF(d
->refill_callable
);
565 decode_buffer_from_obj(DecodeBuffer
* dest
, PyObject
* obj
) {
566 dest
->stringiobuf
= PyObject_GetAttr(obj
, INTERN_STRING(cstringio_buf
));
567 if (!dest
->stringiobuf
) {
571 if (!PycStringIO_InputCheck(dest
->stringiobuf
)) {
572 free_decodebuf(dest
);
573 PyErr_SetString(PyExc_TypeError
, "expecting stringio input");
577 dest
->refill_callable
= PyObject_GetAttr(obj
, INTERN_STRING(cstringio_refill
));
579 if(!dest
->refill_callable
) {
580 free_decodebuf(dest
);
584 if (!PyCallable_Check(dest
->refill_callable
)) {
585 free_decodebuf(dest
);
586 PyErr_SetString(PyExc_TypeError
, "expecting callable");
593 static bool readBytes(DecodeBuffer
* input
, char** output
, int len
) {
596 // TODO(dreiss): Don't fear the malloc. Think about taking a copy of
597 // the partial read instead of forcing the transport
598 // to prepend it to its buffer.
600 read
= PycStringIO
->cread(input
->stringiobuf
, output
, len
);
604 } else if (read
== -1) {
609 // using building functions as this is a rare codepath
610 newiobuf
= PyObject_CallFunction(
611 input
->refill_callable
, "s#i", *output
, read
, len
, NULL
);
612 if (newiobuf
== NULL
) {
616 // must do this *AFTER* the call so that we don't deref the io buffer
617 Py_CLEAR(input
->stringiobuf
);
618 input
->stringiobuf
= newiobuf
;
620 read
= PycStringIO
->cread(input
->stringiobuf
, output
, len
);
624 } else if (read
== -1) {
627 // TODO(dreiss): This could be a valid code path for big binary blobs.
628 PyErr_SetString(PyExc_TypeError
,
629 "refill claimed to have refilled the buffer, but didn't!!");
635 static int8_t readByte(DecodeBuffer
* input
) {
637 if (!readBytes(input
, &buf
, sizeof(int8_t))) {
641 return *(int8_t*) buf
;
644 static int16_t readI16(DecodeBuffer
* input
) {
646 if (!readBytes(input
, &buf
, sizeof(int16_t))) {
650 return (int16_t) ntohs(*(int16_t*) buf
);
653 static int32_t readI32(DecodeBuffer
* input
) {
655 if (!readBytes(input
, &buf
, sizeof(int32_t))) {
658 return (int32_t) ntohl(*(int32_t*) buf
);
662 static int64_t readI64(DecodeBuffer
* input
) {
664 if (!readBytes(input
, &buf
, sizeof(int64_t))) {
668 return (int64_t) ntohll(*(int64_t*) buf
);
671 static double readDouble(DecodeBuffer
* input
) {
677 transfer
.f
= readI64(input
);
678 if (transfer
.f
== -1) {
685 checkTypeByte(DecodeBuffer
* input
, TType expected
) {
686 TType got
= readByte(input
);
687 if (INT_CONV_ERROR_OCCURRED(got
)) {
691 if (expected
!= got
) {
692 PyErr_SetString(PyExc_TypeError
, "got wrong ttype while reading field");
699 skip(DecodeBuffer
* input
, TType type
) {
700 #define SKIPBYTES(n) \
702 if (!readBytes(input, &dummy_buf, (n))) { \
712 case T_I08
: SKIPBYTES(1); break;
713 case T_I16
: SKIPBYTES(2); break;
714 case T_I32
: SKIPBYTES(4); break;
716 case T_DOUBLE
: SKIPBYTES(8); break;
719 // TODO(dreiss): Find out if these check_ssize_t32s are really necessary.
720 int len
= readI32(input
);
721 if (!check_ssize_t_32(len
)) {
733 etype
= readByte(input
);
738 len
= readI32(input
);
739 if (!check_ssize_t_32(len
)) {
743 for (i
= 0; i
< len
; i
++) {
744 if (!skip(input
, etype
)) {
755 ktype
= readByte(input
);
760 vtype
= readByte(input
);
765 len
= readI32(input
);
766 if (!check_ssize_t_32(len
)) {
770 for (i
= 0; i
< len
; i
++) {
771 if (!(skip(input
, ktype
) && skip(input
, vtype
))) {
782 type
= readByte(input
);
791 if (!skip(input
, type
)) {
804 PyErr_SetString(PyExc_TypeError
, "Unexpected TType");
815 /* --- HELPER FUNCTION FOR DECODE_VAL --- */
818 decode_val(DecodeBuffer
* input
, TType type
, PyObject
* typeargs
);
821 decode_struct(DecodeBuffer
* input
, PyObject
* output
, PyObject
* spec_seq
) {
822 int spec_seq_len
= PyTuple_Size(spec_seq
);
823 if (spec_seq_len
== -1) {
831 PyObject
* fieldval
= NULL
;
832 StructItemSpec parsedspec
;
834 type
= readByte(input
);
838 if (type
== T_STOP
) {
841 tag
= readI16(input
);
842 if (INT_CONV_ERROR_OCCURRED(tag
)) {
845 if (tag
>= 0 && tag
< spec_seq_len
) {
846 item_spec
= PyTuple_GET_ITEM(spec_seq
, tag
);
851 if (item_spec
== Py_None
) {
852 if (!skip(input
, type
)) {
857 if (!parse_struct_item_spec(&parsedspec
, item_spec
)) {
860 if (parsedspec
.type
!= type
) {
861 PyErr_SetString(PyExc_TypeError
, "struct field had wrong type while reading");
865 fieldval
= decode_val(input
, parsedspec
.type
, parsedspec
.typeargs
);
866 if (fieldval
== NULL
) {
870 if (PyObject_SetAttr(output
, parsedspec
.attrname
, fieldval
) == -1) {
880 /* --- MAIN RECURSIVE INPUT FUCNTION --- */
882 // Returns a new reference.
884 decode_val(DecodeBuffer
* input
, TType type
, PyObject
* typeargs
) {
888 int8_t v
= readByte(input
);
889 if (INT_CONV_ERROR_OCCURRED(v
)) {
894 case 0: Py_RETURN_FALSE
;
895 case 1: Py_RETURN_TRUE
;
896 // Don't laugh. This is a potentially serious issue.
897 default: PyErr_SetString(PyExc_TypeError
, "boolean out of range"); return NULL
;
902 int8_t v
= readByte(input
);
903 if (INT_CONV_ERROR_OCCURRED(v
)) {
907 return PyInt_FromLong(v
);
910 int16_t v
= readI16(input
);
911 if (INT_CONV_ERROR_OCCURRED(v
)) {
914 return PyInt_FromLong(v
);
917 int32_t v
= readI32(input
);
918 if (INT_CONV_ERROR_OCCURRED(v
)) {
921 return PyInt_FromLong(v
);
925 int64_t v
= readI64(input
);
926 if (INT_CONV_ERROR_OCCURRED(v
)) {
929 // TODO(dreiss): Find out if we can take this fastpath always when
930 // sizeof(long) == sizeof(long long).
931 if (CHECK_RANGE(v
, LONG_MIN
, LONG_MAX
)) {
932 return PyInt_FromLong((long) v
);
935 return PyLong_FromLongLong(v
);
939 double v
= readDouble(input
);
940 if (v
== -1.0 && PyErr_Occurred()) {
943 return PyFloat_FromDouble(v
);
947 Py_ssize_t len
= readI32(input
);
949 if (!readBytes(input
, &buf
, len
)) {
953 return PyString_FromStringAndSize(buf
, len
);
958 SetListTypeArgs parsedargs
;
960 PyObject
* ret
= NULL
;
963 if (!parse_set_list_args(&parsedargs
, typeargs
)) {
967 if (!checkTypeByte(input
, parsedargs
.element_type
)) {
971 len
= readI32(input
);
972 if (!check_ssize_t_32(len
)) {
976 ret
= PyList_New(len
);
981 for (i
= 0; i
< len
; i
++) {
982 PyObject
* item
= decode_val(input
, parsedargs
.element_type
, parsedargs
.typeargs
);
987 PyList_SET_ITEM(ret
, i
, item
);
990 // TODO(dreiss): Consider biting the bullet and making two separate cases
991 // for list and set, avoiding this post facto conversion.
994 #if (PY_VERSION_HEX < 0x02050000)
995 // hack needed for older versions
996 setret
= PyObject_CallFunctionObjArgs((PyObject
*)&PySet_Type
, ret
, NULL
);
999 setret
= PySet_New(ret
);
1010 MapTypeArgs parsedargs
;
1011 PyObject
* ret
= NULL
;
1013 if (!parse_map_args(&parsedargs
, typeargs
)) {
1017 if (!checkTypeByte(input
, parsedargs
.ktag
)) {
1020 if (!checkTypeByte(input
, parsedargs
.vtag
)) {
1024 len
= readI32(input
);
1025 if (!check_ssize_t_32(len
)) {
1034 for (i
= 0; i
< len
; i
++) {
1037 k
= decode_val(input
, parsedargs
.ktag
, parsedargs
.ktypeargs
);
1041 v
= decode_val(input
, parsedargs
.vtag
, parsedargs
.vtypeargs
);
1045 if (PyDict_SetItem(ret
, k
, v
) == -1) {
1053 // Yuck! Destructors, anyone?
1068 StructTypeArgs parsedargs
;
1069 if (!parse_struct_args(&parsedargs
, typeargs
)) {
1073 PyObject
* ret
= PyObject_CallObject(parsedargs
.klass
, NULL
);
1078 if (!decode_struct(input
, ret
, parsedargs
.spec
)) {
1092 PyErr_SetString(PyExc_TypeError
, "Unexpected TType");
1098 /* --- TOP-LEVEL WRAPPER FOR INPUT -- */
1101 decode_binary(PyObject
*self
, PyObject
*args
) {
1102 PyObject
* output_obj
= NULL
;
1103 PyObject
* transport
= NULL
;
1104 PyObject
* typeargs
= NULL
;
1105 StructTypeArgs parsedargs
;
1106 DecodeBuffer input
= {};
1108 if (!PyArg_ParseTuple(args
, "OOO", &output_obj
, &transport
, &typeargs
)) {
1112 if (!parse_struct_args(&parsedargs
, typeargs
)) {
1116 if (!decode_buffer_from_obj(&input
, transport
)) {
1120 if (!decode_struct(&input
, output_obj
, parsedargs
.spec
)) {
1121 free_decodebuf(&input
);
1125 free_decodebuf(&input
);
1130 /* ====== END READING FUNCTIONS ====== */
1133 /* -- PYTHON MODULE SETUP STUFF --- */
1135 static PyMethodDef ThriftFastBinaryMethods
[] = {
1137 {"encode_binary", encode_binary
, METH_VARARGS
, ""},
1138 {"decode_binary", decode_binary
, METH_VARARGS
, ""},
1140 {NULL
, NULL
, 0, NULL
} /* Sentinel */
1144 initfastbinary(void) {
1145 #define INIT_INTERN_STRING(value) \
1147 INTERN_STRING(value) = PyString_InternFromString(#value); \
1148 if(!INTERN_STRING(value)) return; \
1151 INIT_INTERN_STRING(cstringio_buf
);
1152 INIT_INTERN_STRING(cstringio_refill
);
1153 #undef INIT_INTERN_STRING
1156 if (PycStringIO
== NULL
) return;
1158 (void) Py_InitModule("thrift.protocol.fastbinary", ThriftFastBinaryMethods
);