2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 2.00 of the Zend license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.zend.com/license/2_00.txt. |
12 | If you did not receive a copy of the Zend license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@zend.com so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/base/zend-pack.h"
19 #include "hphp/runtime/base/builtin-functions.h"
20 #include "hphp/util/tiny-vector.h"
26 #define INC_OUTPUTPOS(a,b) \
27 if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
28 throw_invalid_argument \
29 ("Type %c: integer overflow in format string", code); \
34 ///////////////////////////////////////////////////////////////////////////////
36 ZendPack::ZendPack() {
37 int machine_endian_check
= 1;
40 machine_little_endian
= ((char *)&machine_endian_check
)[0];
42 if (machine_little_endian
) {
43 /* Where to get lo to hi bytes from */
46 for (i
= 0; i
< (int)sizeof(int); i
++) {
50 machine_endian_short_map
[0] = 0;
51 machine_endian_short_map
[1] = 1;
52 big_endian_short_map
[0] = 1;
53 big_endian_short_map
[1] = 0;
54 little_endian_short_map
[0] = 0;
55 little_endian_short_map
[1] = 1;
57 machine_endian_int32_map
[0] = 0;
58 machine_endian_int32_map
[1] = 1;
59 machine_endian_int32_map
[2] = 2;
60 machine_endian_int32_map
[3] = 3;
61 big_endian_int32_map
[0] = 3;
62 big_endian_int32_map
[1] = 2;
63 big_endian_int32_map
[2] = 1;
64 big_endian_int32_map
[3] = 0;
65 little_endian_int32_map
[0] = 0;
66 little_endian_int32_map
[1] = 1;
67 little_endian_int32_map
[2] = 2;
68 little_endian_int32_map
[3] = 3;
70 machine_endian_int64_map
[0] = 0;
71 machine_endian_int64_map
[1] = 1;
72 machine_endian_int64_map
[2] = 2;
73 machine_endian_int64_map
[3] = 3;
74 machine_endian_int64_map
[4] = 4;
75 machine_endian_int64_map
[5] = 5;
76 machine_endian_int64_map
[6] = 6;
77 machine_endian_int64_map
[7] = 7;
78 big_endian_int64_map
[0] = 7;
79 big_endian_int64_map
[1] = 6;
80 big_endian_int64_map
[2] = 5;
81 big_endian_int64_map
[3] = 4;
82 big_endian_int64_map
[4] = 3;
83 big_endian_int64_map
[5] = 2;
84 big_endian_int64_map
[6] = 1;
85 big_endian_int64_map
[7] = 0;
86 little_endian_int64_map
[0] = 0;
87 little_endian_int64_map
[1] = 1;
88 little_endian_int64_map
[2] = 2;
89 little_endian_int64_map
[3] = 3;
90 little_endian_int64_map
[4] = 4;
91 little_endian_int64_map
[5] = 5;
92 little_endian_int64_map
[6] = 6;
93 little_endian_int64_map
[7] = 7;
95 int64_t size
= sizeof(int64_t);
97 /* Where to get hi to lo bytes from */
98 byte_map
[0] = size
- 1;
100 for (i
= 0; i
< (int)sizeof(int); i
++) {
101 int_map
[i
] = size
- (sizeof(int64_t) - i
);
104 machine_endian_short_map
[0] = size
- 2;
105 machine_endian_short_map
[1] = size
- 1;
106 big_endian_short_map
[0] = size
- 2;
107 big_endian_short_map
[1] = size
- 1;
108 little_endian_short_map
[0] = size
- 1;
109 little_endian_short_map
[1] = size
- 2;
111 machine_endian_int32_map
[0] = size
- 4;
112 machine_endian_int32_map
[1] = size
- 3;
113 machine_endian_int32_map
[2] = size
- 2;
114 machine_endian_int32_map
[3] = size
- 1;
115 big_endian_int32_map
[0] = size
- 4;
116 big_endian_int32_map
[1] = size
- 3;
117 big_endian_int32_map
[2] = size
- 2;
118 big_endian_int32_map
[3] = size
- 1;
119 little_endian_int32_map
[0] = size
- 1;
120 little_endian_int32_map
[1] = size
- 2;
121 little_endian_int32_map
[2] = size
- 3;
122 little_endian_int32_map
[3] = size
- 4;
124 machine_endian_int64_map
[0] = size
- 8;
125 machine_endian_int64_map
[1] = size
- 7;
126 machine_endian_int64_map
[2] = size
- 6;
127 machine_endian_int64_map
[3] = size
- 5;
128 machine_endian_int64_map
[4] = size
- 4;
129 machine_endian_int64_map
[5] = size
- 3;
130 machine_endian_int64_map
[6] = size
- 2;
131 machine_endian_int64_map
[7] = size
- 1;
132 big_endian_int64_map
[0] = size
- 8;
133 big_endian_int64_map
[1] = size
- 7;
134 big_endian_int64_map
[2] = size
- 6;
135 big_endian_int64_map
[3] = size
- 5;
136 big_endian_int64_map
[4] = size
- 4;
137 big_endian_int64_map
[5] = size
- 3;
138 big_endian_int64_map
[6] = size
- 2;
139 big_endian_int64_map
[7] = size
- 1;
140 little_endian_int64_map
[0] = size
- 1;
141 little_endian_int64_map
[1] = size
- 2;
142 little_endian_int64_map
[2] = size
- 3;
143 little_endian_int64_map
[3] = size
- 4;
144 little_endian_int64_map
[4] = size
- 5;
145 little_endian_int64_map
[5] = size
- 6;
146 little_endian_int64_map
[6] = size
- 7;
147 little_endian_int64_map
[7] = size
- 8;
151 void ZendPack::pack(const Variant
& val
, int64_t size
, int64_t *map
,
153 int64_t n
= val
.toInt64();
155 for (int64_t i
= 0; i
< size
; i
++) {
156 *output
++ = v
[map
[i
]];
160 Variant
ZendPack::pack(const String
& fmt
, const Array
& argv
) {
161 /* Preprocess format into formatcodes and formatargs */
162 req::TinyVector
<char, 64> formatcodes
; // up to 64 codes on the stack
163 req::TinyVector
<int, 64> formatargs
;
164 int argc
= argv
.size();
166 const char *format
= fmt
.c_str();
167 int formatlen
= fmt
.size();
169 for (int i
= 0; i
< formatlen
; ) {
170 char code
= format
[i
++];
173 /* Handle format arguments if any */
181 else if (c
>= '0' && c
<= '9') {
182 arg
= atoi(&format
[i
]);
184 while (format
[i
] >= '0' && format
[i
] <= '9' && i
< formatlen
) {
190 /* Handle special arg '*' for all codes and check argv overflows */
191 switch ((int) code
) {
192 /* Never uses any args */
197 throw_invalid_argument("Type %c: '*' ignored", code
);
202 /* Always uses one arg */
208 if (currentarg
>= argc
) {
209 throw_invalid_argument("Type %c: not enough arguments", code
);
214 arg
= argv
[currentarg
].toString().size();
215 //add one, because Z is always NUL-terminated
224 /* Use as many args as specified */
244 arg
= argc
- currentarg
;
249 if (currentarg
> argc
) {
250 throw_invalid_argument("Type %c: too few arguments", code
);
256 throw_invalid_argument("Type %c: unknown format code", code
);
260 formatcodes
.push_back(code
);
261 formatargs
.push_back(arg
);
264 if (currentarg
< argc
) {
265 throw_invalid_argument("%d arguments unused", (argc
- currentarg
));
268 int outputpos
= 0, outputsize
= 0;
269 /* Calculate output length and upper bound while processing*/
270 for (int i
= 0; i
< (int)formatcodes
.size(); i
++) {
271 int code
= (int) formatcodes
[i
];
272 int arg
= formatargs
[i
];
274 switch ((int) code
) {
277 INC_OUTPUTPOS((arg
+ (arg
% 2)) / 2,1); /* 4 bit per arg */
286 INC_OUTPUTPOS(arg
,1); /* 8 bit per arg */
293 INC_OUTPUTPOS(arg
,2); /* 16 bit per arg */
298 INC_OUTPUTPOS(arg
,sizeof(int));
305 INC_OUTPUTPOS(arg
,4); /* 32 bit per arg */
311 INC_OUTPUTPOS(arg
,8); /* 64 bit per arg */
315 INC_OUTPUTPOS(arg
,sizeof(float));
319 INC_OUTPUTPOS(arg
,sizeof(double));
326 throw_invalid_argument("Type %c: outside of string", code
);
336 if (outputsize
< outputpos
) {
337 outputsize
= outputpos
;
341 String str
= String(outputsize
, ReserveString
);
342 char *output
= str
.mutableData();
346 /* Do actual packing */
347 for (int i
= 0; i
< (int)formatcodes
.size(); i
++) {
348 int code
= (int) formatcodes
[i
];
349 int arg
= formatargs
[i
];
354 switch ((int) code
) {
358 int arg_cp
= (code
!= 'Z') ? arg
: std::max(0, arg
- 1);
359 memset(&output
[outputpos
], (code
!= 'A') ? '\0' : ' ', arg
);
360 val
= argv
[currentarg
++].toString();
363 memcpy(&output
[outputpos
], s
, (slen
< arg_cp
) ? slen
: arg_cp
);
370 int nibbleshift
= (code
== 'h') ? 0 : 4;
374 val
= argv
[currentarg
++].toString();
379 throw_invalid_argument
380 ("Type %c: not enough characters in string", code
);
387 if (n
>= '0' && n
<= '9') {
389 } else if (n
>= 'A' && n
<= 'F') {
391 } else if (n
>= 'a' && n
<= 'f') {
394 throw_invalid_argument("Type %c: illegal hex digit %c", code
, n
);
399 output
[++outputpos
] = 0;
404 output
[outputpos
] |= (n
<< nibbleshift
);
405 nibbleshift
= (nibbleshift
+ 4) & 7;
415 pack(argv
[currentarg
++], 1, byte_map
, &output
[outputpos
]);
424 int64_t *map
= machine_endian_short_map
;
427 map
= big_endian_short_map
;
428 } else if (code
== 'v') {
429 map
= little_endian_short_map
;
433 pack(argv
[currentarg
++], 2, map
, &output
[outputpos
]);
442 pack(argv
[currentarg
++], sizeof(int), int_map
, &output
[outputpos
]);
443 outputpos
+= sizeof(int);
451 int64_t *map
= machine_endian_int32_map
;
454 map
= big_endian_int32_map
;
455 } else if (code
== 'V') {
456 map
= little_endian_int32_map
;
460 pack(argv
[currentarg
++], 4, map
, &output
[outputpos
]);
470 int64_t *map
= machine_endian_int64_map
;
472 map
= big_endian_int64_map
;
473 } else if (code
== 'P') {
474 map
= little_endian_int64_map
;
478 pack(argv
[currentarg
++], 8, map
, &output
[outputpos
]);
488 v
= argv
[currentarg
++].toDouble();
489 memcpy(&output
[outputpos
], &v
, sizeof(v
));
490 outputpos
+= sizeof(v
);
499 v
= argv
[currentarg
++].toDouble();
500 memcpy(&output
[outputpos
], &v
, sizeof(v
));
501 outputpos
+= sizeof(v
);
507 memset(&output
[outputpos
], '\0', arg
);
520 if (arg
> outputpos
) {
521 memset(&output
[outputpos
], '\0', arg
- outputpos
);
528 str
.setSize(outputpos
);
532 int64_t ZendPack::unpack(const char *data
, int64_t size
, int issigned
,
535 char *cresult
= (char *) &result
;
538 result
= issigned
? -1 : 0;
540 for (i
= 0; i
< size
; i
++) {
541 cresult
[map
[i
]] = *data
++;
547 Variant
ZendPack::unpack(const String
& fmt
, const String
& data
) {
548 const char *format
= fmt
.c_str();
549 int formatlen
= fmt
.size();
550 const char *input
= data
.c_str();
551 int inputlen
= data
.size();
554 Array ret
= Array::Create();
555 while (formatlen
-- > 0) {
556 char type
= *(format
++);
562 /* Handle format arguments if any */
566 if (c
>= '0' && c
<= '9') {
569 while (formatlen
> 0 && *format
>= '0' && *format
<= '9') {
573 } else if (c
== '*') {
580 /* Get of new value in array */
584 while (formatlen
> 0 && *format
!= '/') {
589 namelen
= format
- name
;
594 switch ((int) type
) {
595 /* Never use any input */
599 throw_invalid_argument("Type %c: '*' ignored", type
);
617 size
= (arg
> 0) ? (arg
+ (arg
% 2)) / 2 : arg
;
621 /* Use 1 byte of input */
628 /* Use 2 bytes of input */
636 /* Use machine dependent bytes of input */
642 /* Use 4 bytes of input */
650 /* Use 8 bytes of input */
657 /* Use sizeof(float) bytes of input */
659 size
= sizeof(float);
662 /* Use sizeof(double) bytes of input */
664 size
= sizeof(double);
668 throw_invalid_argument("Invalid format type %c", type
);
672 /* Do actual unpacking */
673 for (int i
= 0; i
!= arg
; i
++ ) {
674 /* Space for name + number, safe as namelen is ensured <= 200 */
677 if (arg
!= 1 || namelen
== 0) {
678 /* Need to add element number to name */
679 snprintf(n
, sizeof(n
), "%.*s%d", namelen
, name
, i
+ 1);
681 /* Truncate name to next format code or end of string */
682 snprintf(n
, sizeof(n
), "%.*s", namelen
, name
);
685 if (size
!= 0 && size
!= -1 && INT_MAX
- size
+ 1 < inputpos
) {
686 throw_invalid_argument("Type %c: integer overflow", type
);
690 if ((inputpos
+ size
) <= inputlen
) {
691 switch ((int) type
) {
695 int len
= inputlen
- inputpos
; /* Remaining string */
697 /* If size was given take minimum of len and size */
698 if ((size
>= 0) && (len
> size
)) {
704 /* A will strip any trailing whitespace */
707 char padn
= '\0'; char pads
= ' '; char padt
= '\t';
708 char padc
= '\r'; char padl
= '\n';
710 if (input
[inputpos
+ len
] != padn
711 && input
[inputpos
+ len
] != pads
712 && input
[inputpos
+ len
] != padt
713 && input
[inputpos
+ len
] != padc
714 && input
[inputpos
+ len
] != padl
719 /* Remove everything after the first null */
722 for (s
=0 ; s
< len
; s
++) {
723 if (input
[inputpos
+ s
] == '\0')
729 /*only A is \0 terminated*/
733 ret
.set(String(n
, CopyString
),
734 String(input
+ inputpos
, len
, CopyString
));
740 int len
= (inputlen
- inputpos
) * 2; /* Remaining */
741 int nibbleshift
= (type
== 'h') ? 0 : 4;
746 /* If size was given take minimum of len and size */
747 if (size
>= 0 && len
> (size
* 2)) {
755 String s
= String(len
, ReserveString
);
756 buf
= s
.mutableData();
758 for (ipos
= opos
= 0; opos
< len
; opos
++) {
759 char c
= (input
[inputpos
+ ipos
] >> nibbleshift
) & 0xf;
768 nibbleshift
= (nibbleshift
+ 4) & 7;
777 ret
.set(String(n
, CopyString
), s
);
783 int issigned
= (type
== 'c') ? (input
[inputpos
] & 0x80) : 0;
784 ret
.set(String(n
, CopyString
),
785 unpack(&input
[inputpos
], 1, issigned
, byte_map
));
794 int64_t *map
= machine_endian_short_map
;
797 issigned
= input
[inputpos
+ (machine_little_endian
? 1 : 0)] &
799 } else if (type
== 'n') {
800 map
= big_endian_short_map
;
801 } else if (type
== 'v') {
802 map
= little_endian_short_map
;
805 ret
.set(String(n
, CopyString
),
806 unpack(&input
[inputpos
], 2, issigned
, map
));
816 issigned
= input
[inputpos
+ (machine_little_endian
?
817 (sizeof(int) - 1) : 0)] & 0x80;
818 } else if (sizeof(int32_t) > 4 &&
819 (input
[inputpos
+ machine_endian_int32_map
[3]]
824 v
|= unpack(&input
[inputpos
], sizeof(int), issigned
, int_map
);
826 ret
.set(String(n
, CopyString
), v
);
828 uint64_t u64
= uint32_t(v
);
829 ret
.set(String(n
, CopyString
), u64
);
839 int64_t *map
= machine_endian_int32_map
;
842 if (type
== 'l' || type
== 'L') {
843 issigned
= input
[inputpos
+ (machine_little_endian
? 3 : 0)]
845 } else if (type
== 'N') {
846 issigned
= input
[inputpos
] & 0x80;
847 map
= big_endian_int32_map
;
848 } else if (type
== 'V') {
849 issigned
= input
[inputpos
+ 3] & 0x80;
850 map
= little_endian_int32_map
;
853 if (sizeof(int32_t) > 4 && issigned
) {
857 v
|= unpack(&input
[inputpos
], 4, issigned
, map
);
859 ret
.set(String(n
, CopyString
), v
);
861 uint64_t u64
= uint32_t(v
);
862 ret
.set(String(n
, CopyString
), u64
);
872 int64_t *map
= machine_endian_int64_map
;
874 if (type
== 'q' || type
== 'Q') {
875 issigned
= input
[inputpos
+ (machine_little_endian
? 7 : 0)] & 0x80;
876 } else if (type
== 'J') {
877 issigned
= input
[inputpos
] & 0x80;
878 map
= big_endian_int64_map
;
879 } else if (type
== 'P') {
880 issigned
= input
[inputpos
+ 7] & 0x80;
881 map
= little_endian_int64_map
;
884 v
= unpack(&input
[inputpos
], 8, issigned
, map
);
887 ret
.set(String(n
, CopyString
), v
);
889 uint64_t u64
= uint64_t(v
);
890 ret
.set(String(n
, CopyString
), u64
);
899 memcpy(&v
, &input
[inputpos
], sizeof(float));
900 ret
.set(String(n
, CopyString
), (double)v
);
907 memcpy(&v
, &input
[inputpos
], sizeof(double));
908 ret
.set(String(n
, CopyString
), v
);
913 /* Do nothing with input, just skip it */
917 if (inputpos
< size
) {
919 i
= arg
- 1; /* Break out of for loop */
922 throw_invalid_argument("Type %c: outside of string", type
);
928 if (arg
<= inputlen
) {
931 throw_invalid_argument("Type %c: outside of string", type
);
934 i
= arg
- 1; /* Done, break out of for loop */
940 if (size
!= -1) { /* only print warning if not working with * */
941 throw_invalid_argument("Type %c: outside of string", type
);
945 } else if (arg
< 0) {
946 /* Reached end of input for '*' repeater */
949 throw_invalid_argument
950 ("Type %c: not enough input, need %d, have %d",
951 type
, size
, inputlen
- inputpos
);
956 formatlen
--; /* Skip '/' separator, does no harm if inputlen == 0 */
963 ///////////////////////////////////////////////////////////////////////////////