Add sub-controls for Hack array compat runtime checks
[hiphop-php.git] / hphp / runtime / base / zend-scanf.cpp
blob33e287b2194887df9634b00f7d3bfce9d256e4d6
1 /*
2 +----------------------------------------------------------------------+
3 | HipHop for PHP |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 | Copyright (c) 1998-2010 Zend Technologies Ltd. (http://www.zend.com) |
7 +----------------------------------------------------------------------+
8 | This source file is subject to version 2.00 of the Zend license, |
9 | that is bundled with this package in the file LICENSE, and is |
10 | available through the world-wide-web at the following url: |
11 | http://www.zend.com/license/2_00.txt. |
12 | If you did not receive a copy of the Zend license and are unable to |
13 | obtain it through the world-wide-web, please send a note to |
14 | license@zend.com so we can mail you a copy immediately. |
15 +----------------------------------------------------------------------+
18 #include "hphp/runtime/base/zend-scanf.h"
20 #include "hphp/runtime/base/builtin-functions.h"
22 ///////////////////////////////////////////////////////////////////////////////
24 scanf.c --
26 This file contains the base code which implements sscanf and by extension
27 fscanf. Original code is from TCL8.3.0 and bears the following copyright:
29 This software is copyrighted by the Regents of the University of
30 California, Sun Microsystems, Inc., Scriptics Corporation,
31 and other parties. The following terms apply to all files associated
32 with the software unless explicitly disclaimed in individual files.
34 The authors hereby grant permission to use, copy, modify, distribute,
35 and license this software and its documentation for any purpose, provided
36 that existing copyright notices are retained in all copies and that this
37 notice is included verbatim in any distributions. No written agreement,
38 license, or royalty fee is required for any of the authorized uses.
39 Modifications to this software may be copyrighted by their authors
40 and need not follow the licensing terms described here, provided that
41 the new terms are clearly indicated on the first page of each file where
42 they apply.
44 IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
45 FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
46 ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
47 DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
48 POSSIBILITY OF SUCH DAMAGE.
50 THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
51 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
52 FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
53 IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
54 NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
55 MODIFICATIONS.
57 GOVERNMENT USE: If you are acquiring this software on behalf of the
58 U.S. government, the Government shall have only "Restricted Rights"
59 in the software and related documentation as defined in the Federal
60 Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
61 are acquiring the software on behalf of the Department of Defense, the
62 software shall be classified as "Commercial Computer Software" and the
63 Government shall have only "Restricted Rights" as defined in Clause
64 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
65 authors grant the U.S. Government and others acting in its behalf
66 permission to use and distribute the software in accordance with the
67 terms specified in this license.
70 #define SCAN_MAX_ARGS 0xFF // Maximum number of variable which can be
71 // passed to (f|s)scanf. This is an artifical
72 // upper limit to keep resources in check and
73 // minimize the possibility of exploits
75 #define SCAN_ERROR_INTERNAL (SCAN_ERROR_WRONG_PARAM_COUNT - 1)
78 * Flag values used internally by [f|s]canf.
80 #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
81 #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
82 #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
83 #define SCAN_WIDTH 0x8 /* A width value was supplied. */
85 #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
86 #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
87 #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
88 #define SCAN_XOK 0x80 /* An 'x' is allowed. */
89 #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
90 #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
92 #define UCHAR(x) (unsigned char)(x)
94 ///////////////////////////////////////////////////////////////////////////////
96 * The following structure contains the information associated with
97 * a character set.
99 struct Range {
100 char start;
101 char end;
104 typedef struct CharSet {
105 int exclude; /* 1 if this is an exclusion set. */
106 int nchars;
107 char *chars;
108 int nranges;
109 Range *ranges;
110 } CharSet;
112 namespace HPHP {
114 *----------------------------------------------------------------------
116 * BuildCharSet --
118 * This function examines a character set format specification
119 * and builds a CharSet containing the individual characters and
120 * character ranges specified.
122 * Results:
123 * Returns the next format position.
125 * Side effects:
126 * Initializes the charset.
128 *----------------------------------------------------------------------
130 static const char *BuildCharSet(CharSet *cset, const char *format) {
131 const char *ch;
132 char start;
133 int nranges;
134 const char *end;
136 memset(cset, 0, sizeof(CharSet));
138 ch = format;
139 if (*ch == '^') {
140 cset->exclude = 1;
141 ch = ++format;
143 end = format + 1; /* verify this - cc */
146 * Find the close bracket so we can overallocate the set.
148 if (*ch == ']') {
149 ch = end++;
151 nranges = 0;
152 while (*ch != ']') {
153 if (*ch == '-') {
154 nranges++;
156 ch = end++;
159 cset->chars = (char *)req::malloc_noptrs(end - format - 1);
160 if (nranges > 0) {
161 cset->ranges = req::make_raw_array<::Range>(nranges);
162 } else {
163 cset->ranges = nullptr;
167 * Now build the character set.
169 cset->nchars = cset->nranges = 0;
170 ch = format++;
171 start = *ch;
172 if (*ch == ']' || *ch == '-') {
173 cset->chars[cset->nchars++] = *ch;
174 ch = format++;
176 while (*ch != ']') {
177 if (*format == '-') {
179 * This may be the first character of a range, so don't add
180 * it yet.
182 start = *ch;
183 } else if (*ch == '-') {
185 * Check to see if this is the last character in the set, in which
186 * case it is not a range and we should add the previous character
187 * as well as the dash.
189 if (*format == ']') {
190 cset->chars[cset->nchars++] = start;
191 cset->chars[cset->nchars++] = *ch;
192 } else {
193 ch = format++;
196 * Check to see if the range is in reverse order.
198 if (start < *ch) {
199 cset->ranges[cset->nranges].start = start;
200 cset->ranges[cset->nranges].end = *ch;
201 } else {
202 cset->ranges[cset->nranges].start = *ch;
203 cset->ranges[cset->nranges].end = start;
205 cset->nranges++;
207 } else {
208 cset->chars[cset->nchars++] = *ch;
210 ch = format++;
212 return format;
216 *----------------------------------------------------------------------
218 * CharInSet --
220 * Check to see if a character matches the given set.
222 * Results:
223 * Returns non-zero if the character matches the given set.
225 * Side effects:
226 * None.
228 *----------------------------------------------------------------------
230 static int CharInSet(CharSet *cset, int c) {
231 char ch = (char) c;
232 int i, match = 0;
234 for (i = 0; i < cset->nchars; i++) {
235 if (cset->chars[i] == ch) {
236 match = 1;
237 break;
240 if (!match) {
241 for (i = 0; i < cset->nranges; i++) {
242 if ((cset->ranges[i].start <= ch)
243 && (ch <= cset->ranges[i].end)) {
244 match = 1;
245 break;
249 return (cset->exclude ? !match : match);
253 *----------------------------------------------------------------------
255 * ReleaseCharSet --
257 * Free the storage associated with a character set.
259 * Results:
260 * None.
262 * Side effects:
263 * None.
265 *----------------------------------------------------------------------
267 static void ReleaseCharSet(CharSet *cset) {
268 req::free((char *)cset->chars);
269 if (cset->ranges) {
270 req::free((char *)cset->ranges);
274 static inline void scan_set_error_return(int numVars, Variant &return_value) {
275 if (numVars) {
276 return_value = SCAN_ERROR_EOF; /* EOF marker */
277 } else {
278 return_value = uninit_null();
283 *----------------------------------------------------------------------
285 * ValidateFormat --
287 * Parse the format string and verify that it is properly formed
288 * and that there are exactly enough variables on the command line.
290 * Parameters :
291 * format The format string.
292 * numVars The number of variables passed to the scan command.
293 * totalSubs The number of variables that will be required.
295 *----------------------------------------------------------------------
297 static int ValidateFormat(const char *format, int numVars, int *totalSubs) {
298 #define STATIC_LIST_SIZE 16
299 int gotXpg, gotSequential, value, i, flags;
300 const char *end, *ch = nullptr;
301 int staticAssign[STATIC_LIST_SIZE];
302 int *nassign = staticAssign;
303 int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
306 * Initialize an array that records the number of times a variable
307 * is assigned to by the format string. We use this to detect if
308 * a variable is multiply assigned or left unassigned.
310 if (numVars > nspace) {
311 nassign = (int*)req::malloc_noptrs(sizeof(int) * numVars);
312 nspace = numVars;
314 for (i = 0; i < nspace; i++) {
315 nassign[i] = 0;
318 xpgSize = objIndex = gotXpg = gotSequential = 0;
320 while (*format != '\0') {
321 ch = format++;
322 flags = 0;
324 if (*ch != '%') {
325 continue;
327 ch = format++;
328 if (*ch == '%') {
329 continue;
331 if (*ch == '*') {
332 flags |= SCAN_SUPPRESS;
333 ch = format++;
334 goto xpgCheckDone;
337 if ( isdigit( (int)*ch ) ) {
339 * Check for an XPG3-style %n$ specification. Note: there
340 * must not be a mixture of XPG3 specs and non-XPG3 specs
341 * in the same format string.
343 char *endptr;
344 value = strtoul(format-1, &endptr, 10);
345 end = endptr;
346 if (*end != '$') {
347 goto notXpg;
349 format = end+1;
350 ch = format++;
351 gotXpg = 1;
352 if (gotSequential) {
353 goto mixedXPG;
355 objIndex = value - 1;
356 if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
357 goto badIndex;
358 } else if (numVars == 0) {
360 * In the case where no vars are specified, the user can
361 * specify %9999$ legally, so we have to consider special
362 * rules for growing the assign array. 'value' is
363 * guaranteed to be > 0.
366 /* set a lower artificial limit on this
367 * in the interest of security and resource friendliness
368 * 255 arguments should be more than enough. - cc
370 if (value > SCAN_MAX_ARGS) {
371 goto badIndex;
374 xpgSize = (xpgSize > value) ? xpgSize : value;
376 goto xpgCheckDone;
379 notXpg:
380 gotSequential = 1;
381 if (gotXpg) {
382 mixedXPG:
383 if (nassign != staticAssign) req::free((char *)nassign);
384 throw_invalid_argument
385 ("format: cannot mix \"%%\" and \"%%n$\" conversion specifiers");
386 return SCAN_ERROR_INVALID_FORMAT;
389 xpgCheckDone:
391 * Parse any width specifier.
393 if (isdigit(UCHAR(*ch))) {
394 char *endptr;
395 value = strtoul(format-1, &endptr, 10);
396 format = endptr;
397 flags |= SCAN_WIDTH;
398 ch = format++;
402 * Ignore size specifier.
404 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
405 ch = format++;
408 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
409 goto badIndex;
413 * Handle the various field types.
415 switch (*ch) {
416 case 'n':
417 case 'd':
418 case 'D':
419 case 'i':
420 case 'o':
421 case 'x':
422 case 'X':
423 case 'u':
424 case 'f':
425 case 'e':
426 case 'E':
427 case 'g':
428 case 's':
429 break;
431 case 'c':
432 /* we differ here with the TCL implementation in allowing for */
433 /* a character width specification, to be more consistent with */
434 /* ANSI. since Zend auto allocates space for vars, this is no */
435 /* problem - cc */
437 if (flags & SCAN_WIDTH) {
438 throw_invalid_argument
439 ("format: Field width may not be specified in %c conversion");
441 return SCAN_ERROR_INVALID_FORMAT;
443 break;
445 case '[':
446 if (*format == '\0') {
447 goto badSet;
449 ch = format++;
450 if (*ch == '^') {
451 if (*format == '\0') {
452 goto badSet;
454 ch = format++;
456 if (*ch == ']') {
457 if (*format == '\0') {
458 goto badSet;
460 ch = format++;
462 while (*ch != ']') {
463 if (*format == '\0') {
464 goto badSet;
466 ch = format++;
468 break;
469 badSet:
470 if (nassign != staticAssign) req::free((char *)nassign);
471 throw_invalid_argument("format: Unmatched [ in format string");
472 return SCAN_ERROR_INVALID_FORMAT;
474 default:
475 if (nassign != staticAssign) req::free((char *)nassign);
476 throw_invalid_argument("Bad scan conversion character \"%c\"", *ch);
477 return SCAN_ERROR_INVALID_FORMAT;
480 if (!(flags & SCAN_SUPPRESS)) {
481 if (objIndex >= nspace) {
483 * Expand the nassign buffer. If we are using XPG specifiers,
484 * make sure that we grow to a large enough size. xpgSize is
485 * guaranteed to be at least one larger than objIndex.
487 value = nspace;
488 if (xpgSize) {
489 nspace = xpgSize;
490 } else {
491 nspace += STATIC_LIST_SIZE;
493 if (nassign == staticAssign) {
494 nassign = (int*)req::malloc_noptrs(nspace * sizeof(int));
495 for (i = 0; i < STATIC_LIST_SIZE; ++i) {
496 nassign[i] = staticAssign[i];
498 } else {
499 nassign =
500 (int*)req::realloc_noptrs((void *)nassign, nspace * sizeof(int));
502 for (i = value; i < nspace; i++) {
503 nassign[i] = 0;
506 nassign[objIndex]++;
507 objIndex++;
509 } /* while (*format != '\0') */
512 * Verify that all of the variable were assigned exactly once.
514 if (numVars == 0) {
515 if (xpgSize) {
516 numVars = xpgSize;
517 } else {
518 numVars = objIndex;
521 if (totalSubs) {
522 *totalSubs = numVars;
524 for (i = 0; i < numVars; i++) {
525 if (nassign[i] > 1) {
526 if (nassign != staticAssign) req::free((char *)nassign);
527 throw_invalid_argument
528 ("format: Variable is assigned by multiple \"%%n$\" specifiers");
529 return SCAN_ERROR_INVALID_FORMAT;
530 } else if (!xpgSize && (nassign[i] == 0)) {
532 * If the space is empty, and xpgSize is 0 (means XPG wasn't
533 * used, and/or numVars != 0), then too many vars were given
535 if (nassign != staticAssign) req::free((char *)nassign);
536 throw_invalid_argument
537 ("format: Variable is not assigned by any conversion specifiers");
538 return SCAN_ERROR_INVALID_FORMAT;
542 if (nassign != staticAssign) req::free((char *)nassign);
543 return SCAN_SUCCESS;
545 badIndex:
546 if (nassign != staticAssign) req::free((char *)nassign);
547 if (gotXpg) {
548 throw_invalid_argument
549 ("format: \"%%n$\" argument index out of range");
550 } else {
551 throw_invalid_argument
552 ("format: Different numbers of variable names and field specifiers");
554 return SCAN_ERROR_INVALID_FORMAT;
555 #undef STATIC_LIST_SIZE
559 * This is the internal function which does processing on behalf of
560 * both sscanf() and fscanf()
562 * parameters :
563 * string literal string to be processed
564 * format format string
565 * return_value set with the results of the scan
567 int string_sscanf(const char *string, const char *format, int numVars,
568 Variant &return_value) {
569 int nconversions;
570 int totalVars = -1;
571 int64_t value;
572 char *end;
573 const char *baseString;
574 char op = 0;
575 int base = 0;
576 int underflow = 0;
577 size_t width;
578 long (*fn)(const char *, char **, int) = nullptr;
579 const char *ch;
580 char sch;
581 int flags;
582 char buf[64]; /* Temporary buffer to hold scanned number
583 * strings before they are passed to strtoul() */
585 Array returnArray;
588 * Check for errors in the format string.
590 if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
591 scan_set_error_return(numVars, return_value);
592 return SCAN_ERROR_INVALID_FORMAT;
595 baseString = string;
598 * Iterate over the format string filling in the result objects until
599 * we reach the end of input, the end of the format string, or there
600 * is a mismatch.
602 nconversions = 0;
604 while (*format != '\0') {
605 ch = format++;
606 flags = 0;
609 * If we see whitespace in the format, skip whitespace in the string.
611 if ( isspace( (int)*ch ) ) {
612 sch = *string;
613 while ( isspace( (int)sch ) ) {
614 if (*string == '\0') {
615 goto done;
617 string++;
618 sch = *string;
620 continue;
623 if (*ch != '%') {
624 literal:
625 if (*string == '\0') {
626 underflow = 1;
627 goto done;
629 sch = *string;
630 string++;
631 if (*ch != sch) {
632 goto done;
634 continue;
637 ch = format++;
638 if (*ch == '%') {
639 goto literal;
643 * Check for assignment suppression ('*') or an XPG3-style
644 * assignment ('%n$').
646 if (*ch == '*') {
647 flags |= SCAN_SUPPRESS;
648 ch = format++;
649 } else if ( isdigit(UCHAR(*ch))) {
650 value = strtoul(format-1, &end, 10);
651 if (*end == '$') {
652 format = end+1;
653 ch = format++;
658 * Parse any width specifier.
660 if ( isdigit(UCHAR(*ch))) {
661 char *endptr;
662 width = strtoul(format-1, &endptr, 10);
663 format = endptr;
664 ch = format++;
665 } else {
666 width = 0;
670 * Ignore size specifier.
672 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
673 ch = format++;
677 * Handle the various field types.
679 switch (*ch) {
680 case 'n':
681 if (!(flags & SCAN_SUPPRESS)) {
682 returnArray.append((int)(string - baseString));
684 nconversions++;
685 continue;
687 case 'd':
688 case 'D':
689 op = 'i';
690 base = 10;
691 fn = (long (*)(const char *, char **, int))strtol;
692 break;
693 case 'i':
694 op = 'i';
695 base = 0;
696 fn = (long (*)(const char *, char **, int))strtol;
697 break;
698 case 'o':
699 op = 'i';
700 base = 8;
701 fn = (long (*)(const char *, char **, int))strtol;
702 break;
703 case 'x':
704 case 'X':
705 op = 'i';
706 base = 16;
707 fn = (long (*)(const char *, char **, int))strtol;
708 break;
709 case 'u':
710 op = 'i';
711 base = 10;
712 flags |= SCAN_UNSIGNED;
713 fn = (long (*)(const char *, char **, int))strtoul;
714 break;
716 case 'f':
717 case 'e':
718 case 'E':
719 case 'g':
720 op = 'f';
721 break;
723 case 's':
724 op = 's';
725 break;
727 case 'c':
728 op = 's';
729 flags |= SCAN_NOSKIP;
730 /*-cc-*/
731 if (0 == width) {
732 width = 1;
734 /*-cc-*/
735 break;
736 case '[':
737 op = '[';
738 flags |= SCAN_NOSKIP;
739 break;
740 } /* switch */
743 * At this point, we will need additional characters from the
744 * string to proceed.
746 if (*string == '\0') {
747 underflow = 1;
748 goto done;
752 * Skip any leading whitespace at the beginning of a field unless
753 * the format suppresses this behavior.
755 if (!(flags & SCAN_NOSKIP)) {
756 while (*string != '\0') {
757 sch = *string;
758 if (! isspace((int)sch) ) {
759 break;
761 string++;
763 if (*string == '\0') {
764 underflow = 1;
765 goto done;
770 * Perform the requested scanning operation.
772 switch (op) {
773 case 'c':
774 case 's':
776 * Scan a string up to width characters or whitespace.
778 if (width == 0) {
779 width = (size_t) ~0;
781 end = (char*)string;
782 while (*end != '\0') {
783 sch = *end;
784 if ( isspace( (int)sch ) ) {
785 break;
787 end++;
788 if (--width == 0) {
789 break;
792 if (!(flags & SCAN_SUPPRESS)) {
793 returnArray.append(String(string, end-string, CopyString));
795 string = end;
796 break;
798 case '[': {
799 CharSet cset;
801 if (width == 0) {
802 width = (size_t) ~0;
804 end = (char*)string;
806 format = BuildCharSet(&cset, format);
807 while (*end != '\0') {
808 sch = *end;
809 if (!CharInSet(&cset, (int)sch)) {
810 break;
812 end++;
813 if (--width == 0) {
814 break;
817 ReleaseCharSet(&cset);
819 if (string == end) {
821 * Nothing matched the range, stop processing
823 goto done;
825 if (!(flags & SCAN_SUPPRESS)) {
826 returnArray.append(String(string, end-string, CopyString));
828 string = end;
829 break;
831 case 'i':
833 * Scan an unsigned or signed integer.
835 /*-cc-*/
836 buf[0] = '\0';
837 /*-cc-*/
838 if ((width == 0) || (width > sizeof(buf) - 1)) {
839 width = sizeof(buf) - 1;
842 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
843 for (end = buf; width > 0; width--) {
844 switch (*string) {
846 * The 0 digit has special meaning at the beginning of
847 * a number. If we are unsure of the base, it
848 * indicates that we are in base 8 or base 16 (if it is
849 * followed by an 'x').
851 case '0':
852 /*-cc-*/
853 if (base == 16) {
854 flags |= SCAN_XOK;
856 /*-cc-*/
857 if (base == 0) {
858 base = 8;
859 flags |= SCAN_XOK;
861 if (flags & SCAN_NOZERO) {
862 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
863 } else {
864 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
866 goto addToInt;
868 case '1': case '2': case '3': case '4':
869 case '5': case '6': case '7':
870 if (base == 0) {
871 base = 10;
873 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
874 goto addToInt;
876 case '8': case '9':
877 if (base == 0) {
878 base = 10;
880 if (base <= 8) {
881 break;
883 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
884 goto addToInt;
886 case 'A': case 'B': case 'C':
887 case 'D': case 'E': case 'F':
888 case 'a': case 'b': case 'c':
889 case 'd': case 'e': case 'f':
890 if (base <= 10) {
891 break;
893 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
894 goto addToInt;
896 case '+': case '-':
897 if (flags & SCAN_SIGNOK) {
898 flags &= ~SCAN_SIGNOK;
899 goto addToInt;
901 break;
903 case 'x': case 'X':
904 if ((flags & SCAN_XOK) && (end == buf+1)) {
905 base = 16;
906 flags &= ~SCAN_XOK;
907 goto addToInt;
909 break;
913 * We got an illegal character so we are done accumulating.
915 break;
917 addToInt:
919 * Add the character to the temporary buffer.
921 *end++ = *string++;
922 if (*string == '\0') {
923 break;
928 * Check to see if we need to back up because we only got a
929 * sign or a trailing x after a 0.
931 if (flags & SCAN_NODIGITS) {
932 if (*string == '\0') {
933 underflow = 1;
935 goto done;
936 } else if (end[-1] == 'x' || end[-1] == 'X') {
937 end--;
938 string--;
942 * Scan the value from the temporary buffer. If we are
943 * returning a large unsigned value, we have to convert it back
944 * to a string since PHP only supports signed values.
946 if (!(flags & SCAN_SUPPRESS)) {
947 *end = '\0';
948 value = (int64_t) (*fn)(buf, nullptr, base);
949 if ((flags & SCAN_UNSIGNED) && (value < 0)) {
950 snprintf(buf, sizeof(buf), "%lu", (long)value); /* INTL: ISO digit */
951 returnArray.append(String(buf, CopyString));
952 } else {
953 returnArray.append(value);
956 break;
958 case 'f':
960 * Scan a floating point number
962 buf[0] = '\0'; /* call me pedantic */
963 if ((width == 0) || (width > sizeof(buf) - 1)) {
964 width = sizeof(buf) - 1;
966 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
967 for (end = buf; width > 0; width--) {
968 switch (*string) {
969 case '0': case '1': case '2': case '3':
970 case '4': case '5': case '6': case '7':
971 case '8': case '9':
972 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
973 goto addToFloat;
974 case '+':
975 case '-':
976 if (flags & SCAN_SIGNOK) {
977 flags &= ~SCAN_SIGNOK;
978 goto addToFloat;
980 break;
981 case '.':
982 if (flags & SCAN_PTOK) {
983 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
984 goto addToFloat;
986 break;
987 case 'e':
988 case 'E':
990 * An exponent is not allowed until there has
991 * been at least one digit.
993 if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
994 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
995 | SCAN_SIGNOK | SCAN_NODIGITS;
996 goto addToFloat;
998 break;
1002 * We got an illegal character so we are done accumulating.
1004 break;
1006 addToFloat:
1008 * Add the character to the temporary buffer.
1010 *end++ = *string++;
1011 if (*string == '\0') {
1012 break;
1017 * Check to see if we need to back up because we saw a
1018 * trailing 'e' or sign.
1020 if (flags & SCAN_NODIGITS) {
1021 if (flags & SCAN_EXPOK) {
1023 * There were no digits at all so scanning has
1024 * failed and we are done.
1026 if (*string == '\0') {
1027 underflow = 1;
1029 goto done;
1033 * We got a bad exponent ('e' and maybe a sign).
1035 end--;
1036 string--;
1037 if (*end != 'e' && *end != 'E') {
1038 end--;
1039 string--;
1044 * Scan the value from the temporary buffer.
1046 if (!(flags & SCAN_SUPPRESS)) {
1047 double dvalue;
1048 *end = '\0';
1049 dvalue = strtod(buf, nullptr);
1050 returnArray.append(dvalue);
1052 break;
1053 } /* switch (op) */
1054 nconversions++;
1055 } /* while (*format != '\0') */
1057 done:
1058 if (underflow && (0==nconversions)) {
1059 scan_set_error_return(numVars, return_value);
1060 return SCAN_ERROR_EOF;
1061 } else if (nconversions < totalVars) {
1062 /* TODO: not all elements converted. we need to prune the list - cc */
1064 return_value = returnArray;
1065 return SCAN_SUCCESS;
1068 ///////////////////////////////////////////////////////////////////////////////