Use folly::dynamic::object and folly::dynamic::string exclusivly
[hiphop-php.git] / hphp / runtime / base / zend-pack.cpp
blob12dd301a78f10871a1868d550595112d8d20ce21
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 2.00 of the Zend license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.zend.com/license/2_00.txt. |
12 | If you did not receive a copy of the Zend license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@zend.com so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/base/zend-pack.h"
19 #include "hphp/runtime/base/complex-types.h"
20 #include "hphp/runtime/base/type-conversions.h"
21 #include "hphp/runtime/base/builtin-functions.h"
23 namespace HPHP {
25 #define INC_OUTPUTPOS(a,b) \
26 if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
27 throw_invalid_argument \
28 ("Type %c: integer overflow in format string", code); \
29 return false; \
30 } \
31 outputpos += (a)*(b);
33 ///////////////////////////////////////////////////////////////////////////////
35 ZendPack::ZendPack() {
36 int machine_endian_check = 1;
37 int i;
39 machine_little_endian = ((char *)&machine_endian_check)[0];
41 if (machine_little_endian) {
42 /* Where to get lo to hi bytes from */
43 byte_map[0] = 0;
45 for (i = 0; i < (int)sizeof(int); i++) {
46 int_map[i] = i;
49 machine_endian_short_map[0] = 0;
50 machine_endian_short_map[1] = 1;
51 big_endian_short_map[0] = 1;
52 big_endian_short_map[1] = 0;
53 little_endian_short_map[0] = 0;
54 little_endian_short_map[1] = 1;
56 machine_endian_int32_map[0] = 0;
57 machine_endian_int32_map[1] = 1;
58 machine_endian_int32_map[2] = 2;
59 machine_endian_int32_map[3] = 3;
60 big_endian_int32_map[0] = 3;
61 big_endian_int32_map[1] = 2;
62 big_endian_int32_map[2] = 1;
63 big_endian_int32_map[3] = 0;
64 little_endian_int32_map[0] = 0;
65 little_endian_int32_map[1] = 1;
66 little_endian_int32_map[2] = 2;
67 little_endian_int32_map[3] = 3;
68 } else {
69 int size = sizeof(int32_t);
71 /* Where to get hi to lo bytes from */
72 byte_map[0] = size - 1;
74 for (i = 0; i < (int)sizeof(int); i++) {
75 int_map[i] = size - (sizeof(int) - i);
78 machine_endian_short_map[0] = size - 2;
79 machine_endian_short_map[1] = size - 1;
80 big_endian_short_map[0] = size - 2;
81 big_endian_short_map[1] = size - 1;
82 little_endian_short_map[0] = size - 1;
83 little_endian_short_map[1] = size - 2;
85 machine_endian_int32_map[0] = size - 4;
86 machine_endian_int32_map[1] = size - 3;
87 machine_endian_int32_map[2] = size - 2;
88 machine_endian_int32_map[3] = size - 1;
89 big_endian_int32_map[0] = size - 4;
90 big_endian_int32_map[1] = size - 3;
91 big_endian_int32_map[2] = size - 2;
92 big_endian_int32_map[3] = size - 1;
93 little_endian_int32_map[0] = size - 1;
94 little_endian_int32_map[1] = size - 2;
95 little_endian_int32_map[2] = size - 3;
96 little_endian_int32_map[3] = size - 4;
100 void ZendPack::pack(CVarRef val, int size, int *map, char *output) {
101 int32_t n = val.toInt32();
102 char *v = (char*)&n;
103 for (int i = 0; i < size; i++) {
104 *output++ = v[map[i]];
108 Variant ZendPack::pack(const String& fmt, CArrRef argv) {
109 /* Preprocess format into formatcodes and formatargs */
110 std::vector<char> formatcodes;
111 std::vector<int> formatargs;
112 int argc = argv.size();
114 const char *format = fmt.c_str();
115 int formatlen = fmt.size();
116 int currentarg = 0;
117 for (int i = 0; i < formatlen; ) {
118 char code = format[i++];
119 int arg = 1;
121 /* Handle format arguments if any */
122 if (i < formatlen) {
123 char c = format[i];
125 if (c == '*') {
126 arg = -1;
127 i++;
129 else if (c >= '0' && c <= '9') {
130 arg = atoi(&format[i]);
132 while (format[i] >= '0' && format[i] <= '9' && i < formatlen) {
133 i++;
138 /* Handle special arg '*' for all codes and check argv overflows */
139 switch ((int) code) {
140 /* Never uses any args */
141 case 'x':
142 case 'X':
143 case '@':
144 if (arg < 0) {
145 throw_invalid_argument("Type %c: '*' ignored", code);
146 arg = 1;
148 break;
150 /* Always uses one arg */
151 case 'a':
152 case 'A':
153 case 'h':
154 case 'H':
155 if (currentarg >= argc) {
156 throw_invalid_argument("Type %c: not enough arguments", code);
157 return false;
160 if (arg < 0) {
161 arg = argv[currentarg].toString().size();
164 currentarg++;
165 break;
167 /* Use as many args as specified */
168 case 'c':
169 case 'C':
170 case 's':
171 case 'S':
172 case 'i':
173 case 'I':
174 case 'l':
175 case 'L':
176 case 'n':
177 case 'N':
178 case 'v':
179 case 'V':
180 case 'f':
181 case 'd':
182 if (arg < 0) {
183 arg = argc - currentarg;
186 currentarg += arg;
188 if (currentarg > argc) {
189 throw_invalid_argument("Type %c: too few arguments", code);
190 return false;
192 break;
194 default:
195 throw_invalid_argument("Type %c: unknown format code", code);
196 return false;
199 formatcodes.push_back(code);
200 formatargs.push_back(arg);
203 if (currentarg < argc) {
204 throw_invalid_argument("%d arguments unused", (argc - currentarg));
207 int outputpos = 0, outputsize = 0;
208 /* Calculate output length and upper bound while processing*/
209 for (int i = 0; i < (int)formatcodes.size(); i++) {
210 int code = (int) formatcodes[i];
211 int arg = formatargs[i];
213 switch ((int) code) {
214 case 'h':
215 case 'H':
216 INC_OUTPUTPOS((arg + (arg % 2)) / 2,1); /* 4 bit per arg */
217 break;
219 case 'a':
220 case 'A':
221 case 'c':
222 case 'C':
223 case 'x':
224 INC_OUTPUTPOS(arg,1); /* 8 bit per arg */
225 break;
227 case 's':
228 case 'S':
229 case 'n':
230 case 'v':
231 INC_OUTPUTPOS(arg,2); /* 16 bit per arg */
232 break;
234 case 'i':
235 case 'I':
236 INC_OUTPUTPOS(arg,sizeof(int));
237 break;
239 case 'l':
240 case 'L':
241 case 'N':
242 case 'V':
243 INC_OUTPUTPOS(arg,4); /* 32 bit per arg */
244 break;
246 case 'f':
247 INC_OUTPUTPOS(arg,sizeof(float));
248 break;
250 case 'd':
251 INC_OUTPUTPOS(arg,sizeof(double));
252 break;
254 case 'X':
255 outputpos -= arg;
257 if (outputpos < 0) {
258 throw_invalid_argument("Type %c: outside of string", code);
259 outputpos = 0;
261 break;
263 case '@':
264 outputpos = arg;
265 break;
268 if (outputsize < outputpos) {
269 outputsize = outputpos;
273 String s = String(outputsize, ReserveString);
274 char *output = s.bufferSlice().ptr;
275 outputpos = 0;
276 currentarg = 0;
278 /* Do actual packing */
279 for (int i = 0; i < (int)formatcodes.size(); i++) {
280 int code = (int) formatcodes[i];
281 int arg = formatargs[i];
282 String val;
283 const char *s;
284 int slen;
286 switch ((int) code) {
287 case 'a':
288 case 'A':
289 memset(&output[outputpos], (code == 'a') ? '\0' : ' ', arg);
290 val = argv[currentarg++].toString();
291 s = val.c_str();
292 slen = val.size();
293 memcpy(&output[outputpos], s, (slen < arg) ? slen : arg);
294 outputpos += arg;
295 break;
297 case 'h':
298 case 'H': {
299 int nibbleshift = (code == 'h') ? 0 : 4;
300 int first = 1;
301 const char *v;
303 val = argv[currentarg++].toString();
304 v = val.data();
305 slen = val.size();
306 outputpos--;
307 if(arg > slen) {
308 throw_invalid_argument
309 ("Type %c: not enough characters in string", code);
310 arg = slen;
313 while (arg-- > 0) {
314 char n = *v++;
316 if (n >= '0' && n <= '9') {
317 n -= '0';
318 } else if (n >= 'A' && n <= 'F') {
319 n -= ('A' - 10);
320 } else if (n >= 'a' && n <= 'f') {
321 n -= ('a' - 10);
322 } else {
323 throw_invalid_argument("Type %c: illegal hex digit %c", code, n);
324 n = 0;
327 if (first--) {
328 output[++outputpos] = 0;
329 } else {
330 first = 1;
333 output[outputpos] |= (n << nibbleshift);
334 nibbleshift = (nibbleshift + 4) & 7;
337 outputpos++;
338 break;
341 case 'c':
342 case 'C':
343 while (arg-- > 0) {
344 pack(argv[currentarg++], 1, byte_map, &output[outputpos]);
345 outputpos++;
347 break;
349 case 's':
350 case 'S':
351 case 'n':
352 case 'v': {
353 int *map = machine_endian_short_map;
355 if (code == 'n') {
356 map = big_endian_short_map;
357 } else if (code == 'v') {
358 map = little_endian_short_map;
361 while (arg-- > 0) {
362 pack(argv[currentarg++], 2, map, &output[outputpos]);
363 outputpos += 2;
365 break;
368 case 'i':
369 case 'I':
370 while (arg-- > 0) {
371 pack(argv[currentarg++], sizeof(int), int_map, &output[outputpos]);
372 outputpos += sizeof(int);
374 break;
376 case 'l':
377 case 'L':
378 case 'N':
379 case 'V': {
380 int *map = machine_endian_int32_map;
382 if (code == 'N') {
383 map = big_endian_int32_map;
384 } else if (code == 'V') {
385 map = little_endian_int32_map;
388 while (arg-- > 0) {
389 pack(argv[currentarg++], 4, map, &output[outputpos]);
390 outputpos += 4;
392 break;
395 case 'f': {
396 float v;
398 while (arg-- > 0) {
399 v = argv[currentarg++].toDouble();
400 memcpy(&output[outputpos], &v, sizeof(v));
401 outputpos += sizeof(v);
403 break;
406 case 'd': {
407 double v;
409 while (arg-- > 0) {
410 v = argv[currentarg++].toDouble();
411 memcpy(&output[outputpos], &v, sizeof(v));
412 outputpos += sizeof(v);
414 break;
417 case 'x':
418 memset(&output[outputpos], '\0', arg);
419 outputpos += arg;
420 break;
422 case 'X':
423 outputpos -= arg;
425 if (outputpos < 0) {
426 outputpos = 0;
428 break;
430 case '@':
431 if (arg > outputpos) {
432 memset(&output[outputpos], '\0', arg - outputpos);
434 outputpos = arg;
435 break;
439 return s.setSize(outputpos);
442 int32_t ZendPack::unpack(const char *data, int size, int issigned, int *map) {
443 int32_t result;
444 char *cresult = (char *) &result;
445 int i;
447 result = issigned ? -1 : 0;
449 for (i = 0; i < size; i++) {
450 cresult[map[i]] = *data++;
453 return result;
456 Variant ZendPack::unpack(const String& fmt, const String& data) {
457 const char *format = fmt.c_str();
458 int formatlen = fmt.size();
459 const char *input = data.c_str();
460 int inputlen = data.size();
461 int inputpos = 0;
463 Array ret;
464 while (formatlen-- > 0) {
465 char type = *(format++);
466 char c;
467 int arg = 1, argb;
468 const char *name;
469 int namelen;
470 int size=0;
472 /* Handle format arguments if any */
473 if (formatlen > 0) {
474 c = *format;
476 if (c >= '0' && c <= '9') {
477 arg = atoi(format);
479 while (formatlen > 0 && *format >= '0' && *format <= '9') {
480 format++;
481 formatlen--;
483 } else if (c == '*') {
484 arg = -1;
485 format++;
486 formatlen--;
490 /* Get of new value in array */
491 name = format;
492 argb = arg;
494 while (formatlen > 0 && *format != '/') {
495 formatlen--;
496 format++;
499 namelen = format - name;
501 if (namelen > 200)
502 namelen = 200;
504 switch ((int) type) {
505 /* Never use any input */
506 case 'X':
507 size = -1;
508 break;
510 case '@':
511 size = 0;
512 break;
514 case 'a':
515 case 'A':
516 size = arg;
517 arg = 1;
518 break;
520 case 'h':
521 case 'H':
522 size = (arg > 0) ? (arg + (arg % 2)) / 2 : arg;
523 arg = 1;
524 break;
526 /* Use 1 byte of input */
527 case 'c':
528 case 'C':
529 case 'x':
530 size = 1;
531 break;
533 /* Use 2 bytes of input */
534 case 's':
535 case 'S':
536 case 'n':
537 case 'v':
538 size = 2;
539 break;
541 /* Use sizeof(int) bytes of input */
542 case 'i':
543 case 'I':
544 size = sizeof(int);
545 break;
547 /* Use 4 bytes of input */
548 case 'l':
549 case 'L':
550 case 'N':
551 case 'V':
552 size = 4;
553 break;
555 /* Use sizeof(float) bytes of input */
556 case 'f':
557 size = sizeof(float);
558 break;
560 /* Use sizeof(double) bytes of input */
561 case 'd':
562 size = sizeof(double);
563 break;
565 default:
566 throw_invalid_argument("Invalid format type %c", type);
567 return false;
570 /* Do actual unpacking */
571 for (int i = 0; i != arg; i++ ) {
572 /* Space for name + number, safe as namelen is ensured <= 200 */
573 char n[256];
575 if (arg != 1 || namelen == 0) {
576 /* Need to add element number to name */
577 snprintf(n, sizeof(n), "%.*s%d", namelen, name, i + 1);
578 } else {
579 /* Truncate name to next format code or end of string */
580 snprintf(n, sizeof(n), "%.*s", namelen, name);
583 if (size != 0 && size != -1 && INT_MAX - size + 1 < inputpos) {
584 throw_invalid_argument("Type %c: integer overflow", type);
585 inputpos = 0;
588 if ((inputpos + size) <= inputlen) {
589 switch ((int) type) {
590 case 'a':
591 case 'A': {
592 char pad = (type == 'a') ? '\0' : ' ';
593 int len = inputlen - inputpos; /* Remaining string */
595 /* If size was given take minimum of len and size */
596 if ((size >= 0) && (len > size)) {
597 len = size;
600 size = len;
602 /* Remove padding chars from unpacked data */
603 while (--len >= 0) {
604 if (input[inputpos + len] != pad)
605 break;
608 ret.set(String(n, CopyString),
609 String(input + inputpos, len + 1, CopyString));
610 break;
613 case 'h':
614 case 'H': {
615 int len = (inputlen - inputpos) * 2; /* Remaining */
616 int nibbleshift = (type == 'h') ? 0 : 4;
617 int first = 1;
618 char *buf;
619 int ipos, opos;
621 /* If size was given take minimum of len and size */
622 if (size >= 0 && len > (size * 2)) {
623 len = size * 2;
626 if (argb > 0) {
627 len -= argb % 2;
630 String s = String(len, ReserveString);
631 buf = s.bufferSlice().ptr;
633 for (ipos = opos = 0; opos < len; opos++) {
634 char c = (input[inputpos + ipos] >> nibbleshift) & 0xf;
636 if (c < 10) {
637 c += '0';
638 } else {
639 c += 'a' - 10;
642 buf[opos] = c;
643 nibbleshift = (nibbleshift + 4) & 7;
645 if (first-- == 0) {
646 ipos++;
647 first = 1;
651 s.setSize(len);
652 ret.set(String(n, CopyString), s);
653 break;
656 case 'c':
657 case 'C': {
658 int issigned = (type == 'c') ? (input[inputpos] & 0x80) : 0;
659 ret.set(String(n, CopyString),
660 unpack(&input[inputpos], 1, issigned, byte_map));
661 break;
664 case 's':
665 case 'S':
666 case 'n':
667 case 'v': {
668 int issigned = 0;
669 int *map = machine_endian_short_map;
671 if (type == 's') {
672 issigned = input[inputpos + (machine_little_endian ? 1 : 0)] &
673 0x80;
674 } else if (type == 'n') {
675 map = big_endian_short_map;
676 } else if (type == 'v') {
677 map = little_endian_short_map;
680 ret.set(String(n, CopyString),
681 unpack(&input[inputpos], 2, issigned, map));
682 break;
685 case 'i':
686 case 'I': {
687 int32_t v = 0;
688 int issigned = 0;
690 if (type == 'i') {
691 issigned = input[inputpos + (machine_little_endian ?
692 (sizeof(int) - 1) : 0)] & 0x80;
693 } else if (sizeof(int32_t) > 4 &&
694 (input[inputpos + machine_endian_int32_map[3]]
695 & 0x80) == 0x80) {
696 v = ~INT_MAX;
699 v |= unpack(&input[inputpos], sizeof(int), issigned, int_map);
700 if (type == 'i') {
701 ret.set(String(n, CopyString), v);
702 } else {
703 uint64_t u64 = uint32_t(v);
704 ret.set(String(n, CopyString), u64);
706 break;
709 case 'l':
710 case 'L':
711 case 'N':
712 case 'V': {
713 int issigned = 0;
714 int *map = machine_endian_int32_map;
715 int32_t v = 0;
717 if (type == 'l' || type == 'L') {
718 issigned = input[inputpos + (machine_little_endian ? 3 : 0)]
719 & 0x80;
720 } else if (type == 'N') {
721 issigned = input[inputpos] & 0x80;
722 map = big_endian_int32_map;
723 } else if (type == 'V') {
724 issigned = input[inputpos + 3] & 0x80;
725 map = little_endian_int32_map;
728 if (sizeof(int32_t) > 4 && issigned) {
729 v = ~INT_MAX;
732 v |= unpack(&input[inputpos], 4, issigned, map);
733 if (type == 'l') {
734 ret.set(String(n, CopyString), v);
735 } else {
736 uint64_t u64 = uint32_t(v);
737 ret.set(String(n, CopyString), u64);
739 break;
742 case 'f': {
743 float v;
745 memcpy(&v, &input[inputpos], sizeof(float));
746 ret.set(String(n, CopyString), (double)v);
747 break;
750 case 'd': {
751 double v;
753 memcpy(&v, &input[inputpos], sizeof(double));
754 ret.set(String(n, CopyString), v);
755 break;
758 case 'x':
759 /* Do nothing with input, just skip it */
760 break;
762 case 'X':
763 if (inputpos < size) {
764 inputpos = -size;
765 i = arg - 1; /* Break out of for loop */
767 if (arg >= 0) {
768 throw_invalid_argument("Type %c: outside of string", type);
771 break;
773 case '@':
774 if (arg <= inputlen) {
775 inputpos = arg;
776 } else {
777 throw_invalid_argument("Type %c: outside of string", type);
780 i = arg - 1; /* Done, break out of for loop */
781 break;
784 inputpos += size;
785 if (inputpos < 0) {
786 if (size != -1) { /* only print warning if not working with * */
787 throw_invalid_argument("Type %c: outside of string", type);
789 inputpos = 0;
791 } else if (arg < 0) {
792 /* Reached end of input for '*' repeater */
793 break;
794 } else {
795 throw_invalid_argument
796 ("Type %c: not enough input, need %d, have %d",
797 type, size, inputlen - inputpos);
798 return false;
802 formatlen--; /* Skip '/' separator, does no harm if inputlen == 0 */
803 format++;
806 return ret;
809 ///////////////////////////////////////////////////////////////////////////////