* bits/types.h: Don't include stddef.h, don't define __need_size_t.
[glibc.git] / stdio-common / vfscanf.c
blob9e6daced5dc035e41bcaa647c064f99372c4e12d
1 /* Copyright (C) 1991-2006, 2007 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <wctype.h>
30 #include <bits/libc-lock.h>
31 #include <locale/localeinfo.h>
33 #ifdef __GNUC__
34 # define HAVE_LONGLONG
35 # define LONGLONG long long
36 #else
37 # define LONGLONG long
38 #endif
40 /* Determine whether we have to handle `long long' at all. */
41 #if LONG_MAX == LONG_LONG_MAX
42 # define need_longlong 0
43 #else
44 # define need_longlong 1
45 #endif
47 /* Determine whether we have to handle `long'. */
48 #if INT_MAX == LONG_MAX
49 # define need_long 0
50 #else
51 # define need_long 1
52 #endif
54 /* Those are flags in the conversion format. */
55 #define LONG 0x0001 /* l: long or double */
56 #define LONGDBL 0x0002 /* L: long long or long double */
57 #define SHORT 0x0004 /* h: short */
58 #define SUPPRESS 0x0008 /* *: suppress assignment */
59 #define POINTER 0x0010 /* weird %p pointer (`fake hex') */
60 #define NOSKIP 0x0020 /* do not skip blanks */
61 #define NUMBER_SIGNED 0x0040 /* signed integer */
62 #define GROUP 0x0080 /* ': group numbers */
63 #define MALLOC 0x0100 /* a: malloc strings */
64 #define CHAR 0x0200 /* hh: char */
65 #define I18N 0x0400 /* I: use locale's digits */
66 #define HEXA_FLOAT 0x0800 /* hexadecimal float */
67 #define READ_POINTER 0x1000 /* this is a pointer value */
70 #include <locale/localeinfo.h>
71 #include <libioP.h>
72 #include <libio.h>
74 #undef va_list
75 #define va_list _IO_va_list
77 #ifdef COMPILE_WSCANF
78 # define ungetc(c, s) ((void) (c == WEOF \
79 || (--read_in, \
80 INTUSE(_IO_sputbackwc) (s, c))))
81 # define ungetc_not_eof(c, s) ((void) (--read_in, \
82 INTUSE(_IO_sputbackwc) (s, c)))
83 # define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
84 : ((c = _IO_getwc_unlocked (s)), \
85 (void) (c != WEOF \
86 ? ++read_in \
87 : (size_t) (inchar_errno = errno)), c))
89 # define MEMCPY(d, s, n) __wmemcpy (d, s, n)
90 # define ISSPACE(Ch) iswspace (Ch)
91 # define ISDIGIT(Ch) iswdigit (Ch)
92 # define ISXDIGIT(Ch) iswxdigit (Ch)
93 # define TOLOWER(Ch) towlower (Ch)
94 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
95 # define __strtoll_internal __wcstoll_internal
96 # define __strtoull_internal __wcstoull_internal
97 # define __strtol_internal __wcstol_internal
98 # define __strtoul_internal __wcstoul_internal
99 # define __strtold_internal __wcstold_internal
100 # define __strtod_internal __wcstod_internal
101 # define __strtof_internal __wcstof_internal
103 # define L_(Str) L##Str
104 # define CHAR_T wchar_t
105 # define UCHAR_T unsigned int
106 # define WINT_T wint_t
107 # undef EOF
108 # define EOF WEOF
109 #else
110 # define ungetc(c, s) ((void) ((int) c == EOF \
111 || (--read_in, \
112 INTUSE(_IO_sputbackc) (s, (unsigned char) c))))
113 # define ungetc_not_eof(c, s) ((void) (--read_in, \
114 INTUSE(_IO_sputbackc) (s, (unsigned char) c)))
115 # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
116 : ((c = _IO_getc_unlocked (s)), \
117 (void) (c != EOF \
118 ? ++read_in \
119 : (size_t) (inchar_errno = errno)), c))
120 # define MEMCPY(d, s, n) memcpy (d, s, n)
121 # define ISSPACE(Ch) __isspace_l (Ch, loc)
122 # define ISDIGIT(Ch) __isdigit_l (Ch, loc)
123 # define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
124 # define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
125 # define ORIENT if (_IO_vtable_offset (s) == 0 \
126 && _IO_fwide (s, -1) != -1) \
127 return EOF
129 # define L_(Str) Str
130 # define CHAR_T char
131 # define UCHAR_T unsigned char
132 # define WINT_T int
133 #endif
135 #define encode_error() do { \
136 errval = 4; \
137 __set_errno (EILSEQ); \
138 goto errout; \
139 } while (0)
140 #define conv_error() do { \
141 errval = 2; \
142 goto errout; \
143 } while (0)
144 #define input_error() do { \
145 errval = 1; \
146 if (done == 0) done = EOF; \
147 goto errout; \
148 } while (0)
149 #define ARGCHECK(s, format) \
150 do \
152 /* Check file argument for consistence. */ \
153 CHECK_FILE (s, EOF); \
154 if (s->_flags & _IO_NO_READS) \
156 __set_errno (EBADF); \
157 return EOF; \
159 else if (format == NULL) \
161 MAYBE_SET_EINVAL; \
162 return EOF; \
164 } while (0)
165 #define LOCK_STREAM(S) \
166 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
167 _IO_flockfile (S)
168 #define UNLOCK_STREAM(S) \
169 _IO_funlockfile (S); \
170 __libc_cleanup_region_end (0)
173 /* Read formatted input from S according to the format string
174 FORMAT, using the argument list in ARG.
175 Return the number of assignments made, or -1 for an input error. */
176 #ifdef COMPILE_WSCANF
178 _IO_vfwscanf (_IO_FILE *s, const wchar_t *format, _IO_va_list argptr,
179 int *errp)
180 #else
182 _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
183 int *errp)
184 #endif
186 va_list arg;
187 register const CHAR_T *f = format;
188 register UCHAR_T fc; /* Current character of the format. */
189 register WINT_T done = 0; /* Assignments done. */
190 register size_t read_in = 0; /* Chars read in. */
191 register WINT_T c = 0; /* Last char read. */
192 register int width; /* Maximum field width. */
193 register int flags; /* Modifiers for current format element. */
194 int errval = 0;
195 #ifndef COMPILE_WSCANF
196 __locale_t loc = _NL_CURRENT_LOCALE;
197 struct locale_data *const curctype = loc->__locales[LC_CTYPE];
198 #endif
200 /* Errno of last failed inchar call. */
201 int inchar_errno = 0;
202 /* Status for reading F-P nums. */
203 char got_dot, got_e, negative;
204 /* If a [...] is a [^...]. */
205 CHAR_T not_in;
206 #define exp_char not_in
207 /* Base for integral numbers. */
208 int base;
209 /* Decimal point character. */
210 #ifdef COMPILE_WSCANF
211 wint_t decimal;
212 #else
213 const char *decimal;
214 #endif
215 /* The thousands character of the current locale. */
216 #ifdef COMPILE_WSCANF
217 wint_t thousands;
218 #else
219 const char *thousands;
220 #endif
221 /* State for the conversions. */
222 mbstate_t state;
223 /* Integral holding variables. */
224 union
226 long long int q;
227 unsigned long long int uq;
228 long int l;
229 unsigned long int ul;
230 } num;
231 /* Character-buffer pointer. */
232 char *str = NULL;
233 wchar_t *wstr = NULL;
234 char **strptr = NULL;
235 ssize_t strsize = 0;
236 /* We must not react on white spaces immediately because they can
237 possibly be matched even if in the input stream no character is
238 available anymore. */
239 int skip_space = 0;
240 /* Workspace. */
241 CHAR_T *tw; /* Temporary pointer. */
242 CHAR_T *wp = NULL; /* Workspace. */
243 size_t wpmax = 0; /* Maximal size of workspace. */
244 size_t wpsize; /* Currently used bytes in workspace. */
245 #define ADDW(Ch) \
246 do \
248 if (wpsize == wpmax) \
250 CHAR_T *old = wp; \
251 wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax); \
252 wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t)); \
253 if (old != NULL) \
254 MEMCPY (wp, old, wpsize); \
256 wp[wpsize++] = (Ch); \
258 while (0)
260 #ifdef __va_copy
261 __va_copy (arg, argptr);
262 #else
263 arg = (va_list) argptr;
264 #endif
266 #ifdef ORIENT
267 ORIENT;
268 #endif
270 ARGCHECK (s, format);
273 #ifndef COMPILE_WSCANF
274 struct locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
275 #endif
277 /* Figure out the decimal point character. */
278 #ifdef COMPILE_WSCANF
279 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
280 #else
281 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
282 #endif
283 /* Figure out the thousands separator character. */
284 #ifdef COMPILE_WSCANF
285 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
286 #else
287 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
288 if (*thousands == '\0')
289 thousands = NULL;
290 #endif
293 /* Lock the stream. */
294 LOCK_STREAM (s);
297 #ifndef COMPILE_WSCANF
298 /* From now on we use `state' to convert the format string. */
299 memset (&state, '\0', sizeof (state));
300 #endif
302 /* Run through the format string. */
303 while (*f != '\0')
305 unsigned int argpos;
306 /* Extract the next argument, which is of type TYPE.
307 For a %N$... spec, this is the Nth argument from the beginning;
308 otherwise it is the next argument after the state now in ARG. */
309 #ifdef __va_copy
310 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
311 ({ unsigned int pos = argpos; \
312 va_list arg; \
313 __va_copy (arg, argptr); \
314 while (--pos > 0) \
315 (void) va_arg (arg, void *); \
316 va_arg (arg, type); \
318 #else
319 # if 0
320 /* XXX Possible optimization. */
321 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
322 ({ va_list arg = (va_list) argptr; \
323 arg = (va_list) ((char *) arg \
324 + (argpos - 1) \
325 * __va_rounded_size (void *)); \
326 va_arg (arg, type); \
328 # else
329 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
330 ({ unsigned int pos = argpos; \
331 va_list arg = (va_list) argptr; \
332 while (--pos > 0) \
333 (void) va_arg (arg, void *); \
334 va_arg (arg, type); \
336 # endif
337 #endif
339 #ifndef COMPILE_WSCANF
340 if (!isascii ((unsigned char) *f))
342 /* Non-ASCII, may be a multibyte. */
343 int len = __mbrlen (f, strlen (f), &state);
344 if (len > 0)
348 c = inchar ();
349 if (__builtin_expect (c == EOF, 0))
350 input_error ();
351 else if (c != (unsigned char) *f++)
353 ungetc_not_eof (c, s);
354 conv_error ();
357 while (--len > 0);
358 continue;
361 #endif
363 fc = *f++;
364 if (fc != '%')
366 /* Remember to skip spaces. */
367 if (ISSPACE (fc))
369 skip_space = 1;
370 continue;
373 /* Read a character. */
374 c = inchar ();
376 /* Characters other than format specs must just match. */
377 if (__builtin_expect (c == EOF, 0))
378 input_error ();
380 /* We saw white space char as the last character in the format
381 string. Now it's time to skip all leading white space. */
382 if (skip_space)
384 while (ISSPACE (c))
385 if (__builtin_expect (inchar () == EOF, 0))
386 input_error ();
387 skip_space = 0;
390 if (__builtin_expect (c != fc, 0))
392 ungetc (c, s);
393 conv_error ();
396 continue;
399 /* This is the start of the conversion string. */
400 flags = 0;
402 /* Initialize state of modifiers. */
403 argpos = 0;
405 /* Prepare temporary buffer. */
406 wpsize = 0;
408 /* Check for a positional parameter specification. */
409 if (ISDIGIT ((UCHAR_T) *f))
411 argpos = (UCHAR_T) *f++ - L_('0');
412 while (ISDIGIT ((UCHAR_T) *f))
413 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
414 if (*f == L_('$'))
415 ++f;
416 else
418 /* Oops; that was actually the field width. */
419 width = argpos;
420 argpos = 0;
421 goto got_width;
425 /* Check for the assignment-suppressing, the number grouping flag,
426 and the signal to use the locale's digit representation. */
427 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
428 switch (*f++)
430 case L_('*'):
431 flags |= SUPPRESS;
432 break;
433 case L_('\''):
434 #ifdef COMPILE_WSCANF
435 if (thousands != L'\0')
436 #else
437 if (thousands != NULL)
438 #endif
439 flags |= GROUP;
440 break;
441 case L_('I'):
442 flags |= I18N;
443 break;
446 /* Find the maximum field width. */
447 width = 0;
448 while (ISDIGIT ((UCHAR_T) *f))
450 width *= 10;
451 width += (UCHAR_T) *f++ - L_('0');
453 got_width:
454 if (width == 0)
455 width = -1;
457 /* Check for type modifiers. */
458 switch (*f++)
460 case L_('h'):
461 /* ints are short ints or chars. */
462 if (*f == L_('h'))
464 ++f;
465 flags |= CHAR;
467 else
468 flags |= SHORT;
469 break;
470 case L_('l'):
471 if (*f == L_('l'))
473 /* A double `l' is equivalent to an `L'. */
474 ++f;
475 flags |= LONGDBL | LONG;
477 else
478 /* ints are long ints. */
479 flags |= LONG;
480 break;
481 case L_('q'):
482 case L_('L'):
483 /* doubles are long doubles, and ints are long long ints. */
484 flags |= LONGDBL | LONG;
485 break;
486 case L_('a'):
487 /* The `a' is used as a flag only if followed by `s', `S' or
488 `['. */
489 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
491 --f;
492 break;
494 /* String conversions (%s, %[) take a `char **'
495 arg and fill it in with a malloc'd pointer. */
496 flags |= MALLOC;
497 break;
498 case L_('z'):
499 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
500 flags |= LONGDBL;
501 else if (sizeof (size_t) > sizeof (unsigned int))
502 flags |= LONG;
503 break;
504 case L_('j'):
505 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
506 flags |= LONGDBL;
507 else if (sizeof (uintmax_t) > sizeof (unsigned int))
508 flags |= LONG;
509 break;
510 case L_('t'):
511 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
512 flags |= LONGDBL;
513 else if (sizeof (ptrdiff_t) > sizeof (int))
514 flags |= LONG;
515 break;
516 default:
517 /* Not a recognized modifier. Backup. */
518 --f;
519 break;
522 /* End of the format string? */
523 if (__builtin_expect (*f == L_('\0'), 0))
524 conv_error ();
526 /* Find the conversion specifier. */
527 fc = *f++;
528 if (skip_space || (fc != L_('[') && fc != L_('c')
529 && fc != L_('C') && fc != L_('n')))
531 /* Eat whitespace. */
532 int save_errno = errno;
533 __set_errno (0);
535 /* We add the additional test for EOF here since otherwise
536 inchar will restore the old errno value which might be
537 EINTR but does not indicate an interrupt since nothing
538 was read at this time. */
539 if (__builtin_expect ((c == EOF || inchar () == EOF)
540 && errno == EINTR, 0))
541 input_error ();
542 while (ISSPACE (c));
543 __set_errno (save_errno);
544 ungetc (c, s);
545 skip_space = 0;
548 switch (fc)
550 case L_('%'): /* Must match a literal '%'. */
551 c = inchar ();
552 if (__builtin_expect (c == EOF, 0))
553 input_error ();
554 if (__builtin_expect (c != fc, 0))
556 ungetc_not_eof (c, s);
557 conv_error ();
559 break;
561 case L_('n'): /* Answer number of assignments done. */
562 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
563 with the 'n' conversion specifier. */
564 if (!(flags & SUPPRESS))
566 /* Don't count the read-ahead. */
567 if (need_longlong && (flags & LONGDBL))
568 *ARG (long long int *) = read_in;
569 else if (need_long && (flags & LONG))
570 *ARG (long int *) = read_in;
571 else if (flags & SHORT)
572 *ARG (short int *) = read_in;
573 else if (!(flags & CHAR))
574 *ARG (int *) = read_in;
575 else
576 *ARG (char *) = read_in;
578 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
579 /* We have a severe problem here. The ISO C standard
580 contradicts itself in explaining the effect of the %n
581 format in `scanf'. While in ISO C:1990 and the ISO C
582 Amendement 1:1995 the result is described as
584 Execution of a %n directive does not effect the
585 assignment count returned at the completion of
586 execution of the f(w)scanf function.
588 in ISO C Corrigendum 1:1994 the following was added:
590 Subclause 7.9.6.2
591 Add the following fourth example:
593 #include <stdio.h>
594 int d1, d2, n1, n2, i;
595 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
596 the value 123 is assigned to d1 and the value3 to n1.
597 Because %n can never get an input failure the value
598 of 3 is also assigned to n2. The value of d2 is not
599 affected. The value 3 is assigned to i.
601 We go for now with the historically correct code from ISO C,
602 i.e., we don't count the %n assignments. When it ever
603 should proof to be wrong just remove the #ifdef above. */
604 ++done;
605 #endif
607 break;
609 case L_('c'): /* Match characters. */
610 if ((flags & LONG) == 0)
612 if (!(flags & SUPPRESS))
614 str = ARG (char *);
615 if (str == NULL)
616 conv_error ();
619 c = inchar ();
620 if (__builtin_expect (c == EOF, 0))
621 input_error ();
623 if (width == -1)
624 width = 1;
626 #ifdef COMPILE_WSCANF
627 /* We have to convert the wide character(s) into multibyte
628 characters and store the result. */
629 memset (&state, '\0', sizeof (state));
633 size_t n;
635 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
636 if (__builtin_expect (n == (size_t) -1, 0))
637 /* No valid wide character. */
638 input_error ();
640 /* Increment the output pointer. Even if we don't
641 write anything. */
642 str += n;
644 while (--width > 0 && inchar () != EOF);
645 #else
646 if (!(flags & SUPPRESS))
649 *str++ = c;
650 while (--width > 0 && inchar () != EOF);
652 else
653 while (--width > 0 && inchar () != EOF);
654 #endif
656 if (!(flags & SUPPRESS))
657 ++done;
659 break;
661 /* FALLTHROUGH */
662 case L_('C'):
663 if (!(flags & SUPPRESS))
665 wstr = ARG (wchar_t *);
666 if (wstr == NULL)
667 conv_error ();
670 c = inchar ();
671 if (__builtin_expect (c == EOF, 0))
672 input_error ();
674 #ifdef COMPILE_WSCANF
675 /* Just store the incoming wide characters. */
676 if (!(flags & SUPPRESS))
679 *wstr++ = c;
680 while (--width > 0 && inchar () != EOF);
682 else
683 while (--width > 0 && inchar () != EOF);
684 #else
686 /* We have to convert the multibyte input sequence to wide
687 characters. */
688 char buf[1];
689 mbstate_t cstate;
691 memset (&cstate, '\0', sizeof (cstate));
695 /* This is what we present the mbrtowc function first. */
696 buf[0] = c;
698 while (1)
700 size_t n;
702 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
703 buf, 1, &cstate);
705 if (n == (size_t) -2)
707 /* Possibly correct character, just not enough
708 input. */
709 if (__builtin_expect (inchar () == EOF, 0))
710 encode_error ();
712 buf[0] = c;
713 continue;
716 if (__builtin_expect (n != 1, 0))
717 encode_error ();
719 /* We have a match. */
720 break;
723 /* Advance the result pointer. */
724 ++wstr;
726 while (--width > 0 && inchar () != EOF);
728 #endif
730 if (!(flags & SUPPRESS))
731 ++done;
733 break;
735 case L_('s'): /* Read a string. */
736 if (!(flags & LONG))
738 #define STRING_ARG(Str, Type) \
739 do if (!(flags & SUPPRESS)) \
741 if (flags & MALLOC) \
743 /* The string is to be stored in a malloc'd buffer. */ \
744 strptr = ARG (char **); \
745 if (strptr == NULL) \
746 conv_error (); \
747 /* Allocate an initial buffer. */ \
748 strsize = 100; \
749 *strptr = (char *) malloc (strsize * sizeof (Type)); \
750 Str = (Type *) *strptr; \
752 else \
753 Str = ARG (Type *); \
754 if (Str == NULL) \
755 conv_error (); \
756 } while (0)
757 STRING_ARG (str, char);
759 c = inchar ();
760 if (__builtin_expect (c == EOF, 0))
761 input_error ();
763 #ifdef COMPILE_WSCANF
764 memset (&state, '\0', sizeof (state));
765 #endif
769 if (ISSPACE (c))
771 ungetc_not_eof (c, s);
772 break;
775 #ifdef COMPILE_WSCANF
776 /* This is quite complicated. We have to convert the
777 wide characters into multibyte characters and then
778 store them. */
780 size_t n;
782 if (!(flags & SUPPRESS) && (flags & MALLOC)
783 && str + MB_CUR_MAX >= *strptr + strsize)
785 /* We have to enlarge the buffer if the `a' flag
786 was given. */
787 size_t strleng = str - *strptr;
788 char *newstr;
790 newstr = (char *) realloc (*strptr, strsize * 2);
791 if (newstr == NULL)
793 /* Can't allocate that much. Last-ditch
794 effort. */
795 newstr = (char *) realloc (*strptr,
796 strleng + MB_CUR_MAX);
797 if (newstr == NULL)
799 /* We lose. Oh well. Terminate the
800 string and stop converting,
801 so at least we don't skip any input. */
802 ((char *) (*strptr))[strleng] = '\0';
803 ++done;
804 conv_error ();
806 else
808 *strptr = newstr;
809 str = newstr + strleng;
810 strsize = strleng + MB_CUR_MAX;
813 else
815 *strptr = newstr;
816 str = newstr + strleng;
817 strsize *= 2;
821 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
822 &state);
823 if (__builtin_expect (n == (size_t) -1, 0))
824 encode_error ();
826 assert (n <= MB_CUR_MAX);
827 str += n;
829 #else
830 /* This is easy. */
831 if (!(flags & SUPPRESS))
833 *str++ = c;
834 if ((flags & MALLOC)
835 && (char *) str == *strptr + strsize)
837 /* Enlarge the buffer. */
838 str = (char *) realloc (*strptr, 2 * strsize);
839 if (str == NULL)
841 /* Can't allocate that much. Last-ditch
842 effort. */
843 str = (char *) realloc (*strptr, strsize + 1);
844 if (str == NULL)
846 /* We lose. Oh well. Terminate the
847 string and stop converting,
848 so at least we don't skip any input. */
849 ((char *) (*strptr))[strsize - 1] = '\0';
850 ++done;
851 conv_error ();
853 else
855 *strptr = (char *) str;
856 str += strsize;
857 ++strsize;
860 else
862 *strptr = (char *) str;
863 str += strsize;
864 strsize *= 2;
868 #endif
870 while ((width <= 0 || --width > 0) && inchar () != EOF);
872 if (!(flags & SUPPRESS))
874 #ifdef COMPILE_WSCANF
875 /* We have to emit the code to get into the initial
876 state. */
877 char buf[MB_LEN_MAX];
878 size_t n = __wcrtomb (buf, L'\0', &state);
879 if (n > 0 && (flags & MALLOC)
880 && str + n >= *strptr + strsize)
882 /* Enlarge the buffer. */
883 size_t strleng = str - *strptr;
884 char *newstr;
886 newstr = (char *) realloc (*strptr, strleng + n + 1);
887 if (newstr == NULL)
889 /* We lose. Oh well. Terminate the string
890 and stop converting, so at least we don't
891 skip any input. */
892 ((char *) (*strptr))[strleng] = '\0';
893 ++done;
894 conv_error ();
896 else
898 *strptr = newstr;
899 str = newstr + strleng;
900 strsize = strleng + n + 1;
904 str = __mempcpy (str, buf, n);
905 #endif
906 *str++ = '\0';
908 if ((flags & MALLOC) && str - *strptr != strsize)
910 char *cp = (char *) realloc (*strptr, str - *strptr);
911 if (cp != NULL)
912 *strptr = cp;
915 ++done;
917 break;
919 /* FALLTHROUGH */
921 case L_('S'):
923 #ifndef COMPILE_WSCANF
924 mbstate_t cstate;
925 #endif
927 /* Wide character string. */
928 STRING_ARG (wstr, wchar_t);
930 c = inchar ();
931 if (__builtin_expect (c == EOF, 0))
932 input_error ();
934 #ifndef COMPILE_WSCANF
935 memset (&cstate, '\0', sizeof (cstate));
936 #endif
940 if (ISSPACE (c))
942 ungetc_not_eof (c, s);
943 break;
946 #ifdef COMPILE_WSCANF
947 /* This is easy. */
948 if (!(flags & SUPPRESS))
950 *wstr++ = c;
951 if ((flags & MALLOC)
952 && wstr == (wchar_t *) *strptr + strsize)
954 /* Enlarge the buffer. */
955 wstr = (wchar_t *) realloc (*strptr,
956 (2 * strsize)
957 * sizeof (wchar_t));
958 if (wstr == NULL)
960 /* Can't allocate that much. Last-ditch
961 effort. */
962 wstr = (wchar_t *) realloc (*strptr,
963 (strsize + 1)
964 * sizeof (wchar_t));
965 if (wstr == NULL)
967 /* We lose. Oh well. Terminate the string
968 and stop converting, so at least we don't
969 skip any input. */
970 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
971 ++done;
972 conv_error ();
974 else
976 *strptr = (char *) wstr;
977 wstr += strsize;
978 ++strsize;
981 else
983 *strptr = (char *) wstr;
984 wstr += strsize;
985 strsize *= 2;
989 #else
991 char buf[1];
993 buf[0] = c;
995 while (1)
997 size_t n;
999 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1000 buf, 1, &cstate);
1002 if (n == (size_t) -2)
1004 /* Possibly correct character, just not enough
1005 input. */
1006 if (__builtin_expect (inchar () == EOF, 0))
1007 encode_error ();
1009 buf[0] = c;
1010 continue;
1013 if (__builtin_expect (n != 1, 0))
1014 encode_error ();
1016 /* We have a match. */
1017 ++wstr;
1018 break;
1021 if (!(flags & SUPPRESS) && (flags & MALLOC)
1022 && wstr == (wchar_t *) *strptr + strsize)
1024 /* Enlarge the buffer. */
1025 wstr = (wchar_t *) realloc (*strptr,
1026 (2 * strsize
1027 * sizeof (wchar_t)));
1028 if (wstr == NULL)
1030 /* Can't allocate that much. Last-ditch effort. */
1031 wstr = (wchar_t *) realloc (*strptr,
1032 ((strsize + 1)
1033 * sizeof (wchar_t)));
1034 if (wstr == NULL)
1036 /* We lose. Oh well. Terminate the
1037 string and stop converting, so at
1038 least we don't skip any input. */
1039 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1040 ++done;
1041 conv_error ();
1043 else
1045 *strptr = (char *) wstr;
1046 wstr += strsize;
1047 ++strsize;
1050 else
1052 *strptr = (char *) wstr;
1053 wstr += strsize;
1054 strsize *= 2;
1058 #endif
1060 while ((width <= 0 || --width > 0) && inchar () != EOF);
1062 if (!(flags & SUPPRESS))
1064 *wstr++ = L'\0';
1066 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1068 wchar_t *cp = (wchar_t *) realloc (*strptr,
1069 ((wstr
1070 - (wchar_t *) *strptr)
1071 * sizeof(wchar_t)));
1072 if (cp != NULL)
1073 *strptr = (char *) cp;
1076 ++done;
1079 break;
1081 case L_('x'): /* Hexadecimal integer. */
1082 case L_('X'): /* Ditto. */
1083 base = 16;
1084 goto number;
1086 case L_('o'): /* Octal integer. */
1087 base = 8;
1088 goto number;
1090 case L_('u'): /* Unsigned decimal integer. */
1091 base = 10;
1092 goto number;
1094 case L_('d'): /* Signed decimal integer. */
1095 base = 10;
1096 flags |= NUMBER_SIGNED;
1097 goto number;
1099 case L_('i'): /* Generic number. */
1100 base = 0;
1101 flags |= NUMBER_SIGNED;
1103 number:
1104 c = inchar ();
1105 if (__builtin_expect (c == EOF, 0))
1106 input_error ();
1108 /* Check for a sign. */
1109 if (c == L_('-') || c == L_('+'))
1111 ADDW (c);
1112 if (width > 0)
1113 --width;
1114 c = inchar ();
1117 /* Look for a leading indication of base. */
1118 if (width != 0 && c == L_('0'))
1120 if (width > 0)
1121 --width;
1123 ADDW (c);
1124 c = inchar ();
1126 if (width != 0 && TOLOWER (c) == L_('x'))
1128 if (base == 0)
1129 base = 16;
1130 if (base == 16)
1132 if (width > 0)
1133 --width;
1134 c = inchar ();
1137 else if (base == 0)
1138 base = 8;
1141 if (base == 0)
1142 base = 10;
1144 if (base == 10 && __builtin_expect ((flags & I18N) != 0, 0))
1146 int from_level;
1147 int to_level;
1148 int level;
1149 #ifdef COMPILE_WSCANF
1150 const wchar_t *wcdigits[10];
1151 const wchar_t *wcdigits_extended[10];
1152 #else
1153 const char *mbdigits[10];
1154 const char *mbdigits_extended[10];
1155 #endif
1156 /* "to_inpunct" is a map from ASCII digits to their
1157 equivalent in locale. This is defined for locales
1158 which use an extra digits set. */
1159 wctrans_t map = __wctrans ("to_inpunct");
1160 int n;
1162 from_level = 0;
1163 #ifdef COMPILE_WSCANF
1164 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1165 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1166 #else
1167 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1168 #endif
1170 /* Get the alternative digit forms if there are any. */
1171 if (__builtin_expect (map != NULL, 0))
1173 /* Adding new level for extra digits set in locale file. */
1174 ++to_level;
1176 for (n = 0; n < 10; ++n)
1178 #ifdef COMPILE_WSCANF
1179 wcdigits[n] = (const wchar_t *)
1180 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1182 wchar_t *wc_extended = (wchar_t *)
1183 alloca ((to_level + 2) * sizeof (wchar_t));
1184 __wmemcpy (wc_extended, wcdigits[n], to_level);
1185 wc_extended[to_level] = __towctrans (L'0' + n, map);
1186 wc_extended[to_level + 1] = '\0';
1187 wcdigits_extended[n] = wc_extended;
1188 #else
1189 mbdigits[n]
1190 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1192 /* Get the equivalent wide char in map. */
1193 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1195 /* Convert it to multibyte representation. */
1196 mbstate_t state;
1197 memset (&state, '\0', sizeof (state));
1199 char extra_mbdigit[MB_LEN_MAX];
1200 size_t mblen
1201 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1203 if (mblen == (size_t) -1)
1205 /* Ignore this new level. */
1206 map = NULL;
1207 break;
1210 /* Calculate the length of mbdigits[n]. */
1211 const char *last_char = mbdigits[n];
1212 for (level = 0; level < to_level; ++level)
1213 last_char = strchr (last_char, '\0') + 1;
1215 size_t mbdigits_len = last_char - mbdigits[n];
1217 /* Allocate memory for extended multibyte digit. */
1218 char *mb_extended;
1219 mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
1221 /* And get the mbdigits + extra_digit string. */
1222 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1223 mbdigits_len),
1224 extra_mbdigit, mblen) = '\0';
1225 mbdigits_extended[n] = mb_extended;
1226 #endif
1230 /* Read the number into workspace. */
1231 while (c != EOF && width != 0)
1233 /* In this round we get the pointer to the digit strings
1234 and also perform the first round of comparisons. */
1235 for (n = 0; n < 10; ++n)
1237 /* Get the string for the digits with value N. */
1238 #ifdef COMPILE_WSCANF
1239 if (__builtin_expect (map != NULL, 0))
1240 wcdigits[n] = wcdigits_extended[n];
1241 else
1242 wcdigits[n] = (const wchar_t *)
1243 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1244 wcdigits[n] += from_level;
1246 if (c == (wint_t) *wcdigits[n])
1248 to_level = from_level;
1249 break;
1252 /* Advance the pointer to the next string. */
1253 ++wcdigits[n];
1254 #else
1255 const char *cmpp;
1256 int avail = width > 0 ? width : INT_MAX;
1258 if (__builtin_expect (map != NULL, 0))
1259 mbdigits[n] = mbdigits_extended[n];
1260 else
1261 mbdigits[n]
1262 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1264 for (level = 0; level < from_level; level++)
1265 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1267 cmpp = mbdigits[n];
1268 while ((unsigned char) *cmpp == c && avail >= 0)
1270 if (*++cmpp == '\0')
1271 break;
1272 else
1274 if (avail == 0 || inchar () == EOF)
1275 break;
1276 --avail;
1280 if (*cmpp == '\0')
1282 if (width > 0)
1283 width = avail;
1284 to_level = from_level;
1285 break;
1288 /* We are pushing all read characters back. */
1289 if (cmpp > mbdigits[n])
1291 ungetc (c, s);
1292 while (--cmpp > mbdigits[n])
1293 ungetc_not_eof ((unsigned char) *cmpp, s);
1294 c = (unsigned char) *cmpp;
1297 /* Advance the pointer to the next string. */
1298 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1299 #endif
1302 if (n == 10)
1304 /* Have not yet found the digit. */
1305 for (level = from_level + 1; level <= to_level; ++level)
1307 /* Search all ten digits of this level. */
1308 for (n = 0; n < 10; ++n)
1310 #ifdef COMPILE_WSCANF
1311 if (c == (wint_t) *wcdigits[n])
1312 break;
1314 /* Advance the pointer to the next string. */
1315 ++wcdigits[n];
1316 #else
1317 const char *cmpp;
1318 int avail = width > 0 ? width : INT_MAX;
1320 cmpp = mbdigits[n];
1321 while ((unsigned char) *cmpp == c && avail >= 0)
1323 if (*++cmpp == '\0')
1324 break;
1325 else
1327 if (avail == 0 || inchar () == EOF)
1328 break;
1329 --avail;
1333 if (*cmpp == '\0')
1335 if (width > 0)
1336 width = avail;
1337 break;
1340 /* We are pushing all read characters back. */
1341 if (cmpp > mbdigits[n])
1343 ungetc (c, s);
1344 while (--cmpp > mbdigits[n])
1345 ungetc_not_eof ((unsigned char) *cmpp, s);
1346 c = (unsigned char) *cmpp;
1349 /* Advance the pointer to the next string. */
1350 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1351 #endif
1354 if (n < 10)
1356 /* Found it. */
1357 from_level = level;
1358 to_level = level;
1359 break;
1364 if (n < 10)
1365 c = L_('0') + n;
1366 else if (flags & GROUP)
1368 /* Try matching against the thousands separator. */
1369 #ifdef COMPILE_WSCANF
1370 if (c != thousands)
1371 break;
1372 #else
1373 const char *cmpp = thousands;
1374 int avail = width > 0 ? width : INT_MAX;
1376 while ((unsigned char) *cmpp == c && avail >= 0)
1378 ADDW (c);
1379 if (*++cmpp == '\0')
1380 break;
1381 else
1383 if (avail == 0 || inchar () == EOF)
1384 break;
1385 --avail;
1389 if (*cmpp != '\0')
1391 /* We are pushing all read characters back. */
1392 if (cmpp > thousands)
1394 wpsize -= cmpp - thousands;
1395 ungetc (c, s);
1396 while (--cmpp > thousands)
1397 ungetc_not_eof ((unsigned char) *cmpp, s);
1398 c = (unsigned char) *cmpp;
1400 break;
1403 if (width > 0)
1404 width = avail;
1406 /* The last thousands character will be added back by
1407 the ADDW below. */
1408 --wpsize;
1409 #endif
1411 else
1412 break;
1414 ADDW (c);
1415 if (width > 0)
1416 --width;
1418 c = inchar ();
1421 else
1422 /* Read the number into workspace. */
1423 while (c != EOF && width != 0)
1425 if (base == 16)
1427 if (!ISXDIGIT (c))
1428 break;
1430 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1432 if (base == 10 && (flags & GROUP))
1434 /* Try matching against the thousands separator. */
1435 #ifdef COMPILE_WSCANF
1436 if (c != thousands)
1437 break;
1438 #else
1439 const char *cmpp = thousands;
1440 int avail = width > 0 ? width : INT_MAX;
1442 while ((unsigned char) *cmpp == c && avail >= 0)
1444 ADDW (c);
1445 if (*++cmpp == '\0')
1446 break;
1447 else
1449 if (avail == 0 || inchar () == EOF)
1450 break;
1451 --avail;
1455 if (*cmpp != '\0')
1457 /* We are pushing all read characters back. */
1458 if (cmpp > thousands)
1460 wpsize -= cmpp - thousands;
1461 ungetc (c, s);
1462 while (--cmpp > thousands)
1463 ungetc_not_eof ((unsigned char) *cmpp, s);
1464 c = (unsigned char) *cmpp;
1466 break;
1469 if (width > 0)
1470 width = avail;
1472 /* The last thousands character will be added back by
1473 the ADDW below. */
1474 --wpsize;
1475 #endif
1477 else
1478 break;
1480 ADDW (c);
1481 if (width > 0)
1482 --width;
1484 c = inchar ();
1487 if (wpsize == 0
1488 || (wpsize == 1 && (wp[0] == L_('+') || wp[0] == L_('-'))))
1490 /* There was no number. If we are supposed to read a pointer
1491 we must recognize "(nil)" as well. */
1492 if (__builtin_expect (wpsize == 0
1493 && (flags & READ_POINTER)
1494 && (width < 0 || width >= 0)
1495 && c == '('
1496 && TOLOWER (inchar ()) == L_('n')
1497 && TOLOWER (inchar ()) == L_('i')
1498 && TOLOWER (inchar ()) == L_('l')
1499 && inchar () == L_(')'), 1))
1500 /* We must produce the value of a NULL pointer. A single
1501 '0' digit is enough. */
1502 ADDW (L_('0'));
1503 else
1505 /* The last read character is not part of the number
1506 anymore. */
1507 ungetc (c, s);
1509 conv_error ();
1512 else
1513 /* The just read character is not part of the number anymore. */
1514 ungetc (c, s);
1516 /* Convert the number. */
1517 ADDW (L_('\0'));
1518 if (need_longlong && (flags & LONGDBL))
1520 if (flags & NUMBER_SIGNED)
1521 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
1522 else
1523 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
1525 else
1527 if (flags & NUMBER_SIGNED)
1528 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
1529 else
1530 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
1532 if (__builtin_expect (wp == tw, 0))
1533 conv_error ();
1535 if (!(flags & SUPPRESS))
1537 if (flags & NUMBER_SIGNED)
1539 if (need_longlong && (flags & LONGDBL))
1540 *ARG (LONGLONG int *) = num.q;
1541 else if (need_long && (flags & LONG))
1542 *ARG (long int *) = num.l;
1543 else if (flags & SHORT)
1544 *ARG (short int *) = (short int) num.l;
1545 else if (!(flags & CHAR))
1546 *ARG (int *) = (int) num.l;
1547 else
1548 *ARG (signed char *) = (signed char) num.ul;
1550 else
1552 if (need_longlong && (flags & LONGDBL))
1553 *ARG (unsigned LONGLONG int *) = num.uq;
1554 else if (need_long && (flags & LONG))
1555 *ARG (unsigned long int *) = num.ul;
1556 else if (flags & SHORT)
1557 *ARG (unsigned short int *)
1558 = (unsigned short int) num.ul;
1559 else if (!(flags & CHAR))
1560 *ARG (unsigned int *) = (unsigned int) num.ul;
1561 else
1562 *ARG (unsigned char *) = (unsigned char) num.ul;
1564 ++done;
1566 break;
1568 case L_('e'): /* Floating-point numbers. */
1569 case L_('E'):
1570 case L_('f'):
1571 case L_('F'):
1572 case L_('g'):
1573 case L_('G'):
1574 case L_('a'):
1575 case L_('A'):
1576 c = inchar ();
1577 if (width > 0)
1578 --width;
1579 if (__builtin_expect (c == EOF, 0))
1580 input_error ();
1582 got_dot = got_e = 0;
1584 /* Check for a sign. */
1585 if (c == L_('-') || c == L_('+'))
1587 negative = c == L_('-');
1588 if (__builtin_expect (width == 0 || inchar () == EOF, 0))
1589 /* EOF is only an input error before we read any chars. */
1590 conv_error ();
1591 if (width > 0)
1592 --width;
1594 else
1595 negative = 0;
1597 /* Take care for the special arguments "nan" and "inf". */
1598 if (TOLOWER (c) == L_('n'))
1600 /* Maybe "nan". */
1601 ADDW (c);
1602 if (__builtin_expect (width == 0
1603 || inchar () == EOF
1604 || TOLOWER (c) != L_('a'), 0))
1605 conv_error ();
1606 if (width > 0)
1607 --width;
1608 ADDW (c);
1609 if (__builtin_expect (width == 0
1610 || inchar () == EOF
1611 || TOLOWER (c) != L_('n'), 0))
1612 conv_error ();
1613 if (width > 0)
1614 --width;
1615 ADDW (c);
1616 /* It is "nan". */
1617 goto scan_float;
1619 else if (TOLOWER (c) == L_('i'))
1621 /* Maybe "inf" or "infinity". */
1622 ADDW (c);
1623 if (__builtin_expect (width == 0
1624 || inchar () == EOF
1625 || TOLOWER (c) != L_('n'), 0))
1626 conv_error ();
1627 if (width > 0)
1628 --width;
1629 ADDW (c);
1630 if (__builtin_expect (width == 0
1631 || inchar () == EOF
1632 || TOLOWER (c) != L_('f'), 0))
1633 conv_error ();
1634 if (width > 0)
1635 --width;
1636 ADDW (c);
1637 /* It is as least "inf". */
1638 if (width != 0 && inchar () != EOF)
1640 if (TOLOWER (c) == L_('i'))
1642 if (width > 0)
1643 --width;
1644 /* Now we have to read the rest as well. */
1645 ADDW (c);
1646 if (__builtin_expect (width == 0
1647 || inchar () == EOF
1648 || TOLOWER (c) != L_('n'), 0))
1649 conv_error ();
1650 if (width > 0)
1651 --width;
1652 ADDW (c);
1653 if (__builtin_expect (width == 0
1654 || inchar () == EOF
1655 || TOLOWER (c) != L_('i'), 0))
1656 conv_error ();
1657 if (width > 0)
1658 --width;
1659 ADDW (c);
1660 if (__builtin_expect (width == 0
1661 || inchar () == EOF
1662 || TOLOWER (c) != L_('t'), 0))
1663 conv_error ();
1664 if (width > 0)
1665 --width;
1666 ADDW (c);
1667 if (__builtin_expect (width == 0
1668 || inchar () == EOF
1669 || TOLOWER (c) != L_('y'), 0))
1670 conv_error ();
1671 if (width > 0)
1672 --width;
1673 ADDW (c);
1675 else
1676 /* Never mind. */
1677 ungetc (c, s);
1679 goto scan_float;
1682 exp_char = L_('e');
1683 if (width != 0 && c == L_('0'))
1685 ADDW (c);
1686 c = inchar ();
1687 if (width > 0)
1688 --width;
1689 if (width != 0 && TOLOWER (c) == L_('x'))
1691 /* It is a number in hexadecimal format. */
1692 ADDW (c);
1694 flags |= HEXA_FLOAT;
1695 exp_char = L_('p');
1697 /* Grouping is not allowed. */
1698 flags &= ~GROUP;
1699 c = inchar ();
1700 if (width > 0)
1701 --width;
1705 while (1)
1707 if (ISDIGIT (c))
1708 ADDW (c);
1709 else if (!got_e && (flags & HEXA_FLOAT) && ISXDIGIT (c))
1710 ADDW (c);
1711 else if (got_e && wp[wpsize - 1] == exp_char
1712 && (c == L_('-') || c == L_('+')))
1713 ADDW (c);
1714 else if (wpsize > 0 && !got_e
1715 && (CHAR_T) TOLOWER (c) == exp_char)
1717 ADDW (exp_char);
1718 got_e = got_dot = 1;
1720 else
1722 #ifdef COMPILE_WSCANF
1723 if (! got_dot && c == decimal)
1725 ADDW (c);
1726 got_dot = 1;
1728 else if ((flags & GROUP) != 0 && ! got_dot && c == thousands)
1729 ADDW (c);
1730 else
1732 /* The last read character is not part of the number
1733 anymore. */
1734 ungetc (c, s);
1735 break;
1737 #else
1738 const char *cmpp = decimal;
1739 int avail = width > 0 ? width : INT_MAX;
1741 if (! got_dot)
1743 while ((unsigned char) *cmpp == c && avail >= 0)
1744 if (*++cmpp == '\0')
1745 break;
1746 else
1748 if (avail == 0 || inchar () == EOF)
1749 break;
1750 --avail;
1754 if (*cmpp == '\0')
1756 /* Add all the characters. */
1757 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1758 ADDW ((unsigned char) *cmpp);
1759 if (width > 0)
1760 width = avail;
1761 got_dot = 1;
1763 else
1765 /* Figure out whether it is a thousands separator.
1766 There is one problem: we possibly read more than
1767 one character. We cannot push them back but since
1768 we know that parts of the `decimal' string matched,
1769 we can compare against it. */
1770 const char *cmp2p = thousands;
1772 if ((flags & GROUP) != 0 && ! got_dot)
1774 while (cmp2p - thousands < cmpp - decimal
1775 && *cmp2p == decimal[cmp2p - thousands])
1776 ++cmp2p;
1777 if (cmp2p - thousands == cmpp - decimal)
1779 while ((unsigned char) *cmp2p == c && avail >= 0)
1780 if (*++cmp2p == '\0')
1781 break;
1782 else
1784 if (avail == 0 || inchar () == EOF)
1785 break;
1786 --avail;
1791 if (cmp2p != NULL && *cmp2p == '\0')
1793 /* Add all the characters. */
1794 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
1795 ADDW ((unsigned char) *cmpp);
1796 if (width > 0)
1797 width = avail;
1799 else
1801 /* The last read character is not part of the number
1802 anymore. */
1803 ungetc (c, s);
1804 break;
1807 #endif
1810 if (width == 0 || inchar () == EOF)
1811 break;
1813 if (width > 0)
1814 --width;
1817 wctrans_t map;
1818 if (__builtin_expect ((flags & I18N) != 0, 0)
1819 /* Hexadecimal floats make no sense, fixing localized
1820 digits with ASCII letters. */
1821 && !(flags & HEXA_FLOAT)
1822 /* Minimum requirement. */
1823 && (wpsize == 0 || got_dot)
1824 && (map = __wctrans ("to_inpunct")) != NULL)
1826 /* Reget the first character. */
1827 inchar ();
1829 /* Localized digits, decimal points, and thousands
1830 separator. */
1831 wint_t wcdigits[12];
1833 /* First get decimal equivalent to check if we read it
1834 or not. */
1835 wcdigits[11] = __towctrans (L'.', map);
1837 /* If we have not read any character or have just read
1838 locale decimal point which matches the decimal point
1839 for localized FP numbers, then we may have localized
1840 digits. Note, we test GOT_DOT above. */
1841 #ifdef COMPILE_WSCANF
1842 if (wpsize == 0 || (wpsize == 1 && wcdigits[11] == decimal))
1843 #else
1844 char mbdigits[12][MB_LEN_MAX + 1];
1846 mbstate_t state;
1847 memset (&state, '\0', sizeof (state));
1849 bool match_so_far = wpsize == 0;
1850 size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
1851 if (mblen != (size_t) -1)
1853 mbdigits[11][mblen] = '\0';
1854 match_so_far |= (wpsize == strlen (decimal)
1855 && strcmp (decimal, mbdigits[11]) == 0);
1857 else
1859 size_t decimal_len = strlen (decimal);
1860 /* This should always be the case but the data comes
1861 from a file. */
1862 if (decimal_len <= MB_LEN_MAX)
1864 match_so_far |= wpsize == decimal_len;
1865 memcpy (mbdigits[11], decimal, decimal_len + 1);
1867 else
1868 match_so_far = false;
1871 if (match_so_far)
1872 #endif
1874 bool have_locthousands = (flags & GROUP) != 0;
1876 /* Now get the digits and the thousands-sep equivalents. */
1877 for (int n = 0; n < 11; ++n)
1879 if (n < 10)
1880 wcdigits[n] = __towctrans (L'0' + n, map);
1881 else if (n == 10)
1883 wcdigits[10] = __towctrans (L',', map);
1884 have_locthousands &= wcdigits[10] != L'\0';
1887 #ifndef COMPILE_WSCANF
1888 memset (&state, '\0', sizeof (state));
1890 size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
1891 &state);
1892 if (mblen == (size_t) -1)
1894 if (n == 10)
1896 if (have_locthousands)
1898 size_t thousands_len = strlen (thousands);
1899 if (thousands_len <= MB_LEN_MAX)
1900 memcpy (mbdigits[10], thousands,
1901 thousands_len + 1);
1902 else
1903 have_locthousands = false;
1906 else
1907 /* Ignore checking against localized digits. */
1908 goto no_i18nflt;
1910 else
1911 mbdigits[n][mblen] = '\0';
1912 #endif
1915 /* Start checking against localized digits, if
1916 convertion is done correctly. */
1917 while (1)
1919 if (got_e && wp[wpsize - 1] == exp_char
1920 && (c == L_('-') || c == L_('+')))
1921 ADDW (c);
1922 else if (wpsize > 0 && !got_e
1923 && (CHAR_T) TOLOWER (c) == exp_char)
1925 ADDW (exp_char);
1926 got_e = got_dot = 1;
1928 else
1930 /* Check against localized digits, decimal point,
1931 and thousands separator. */
1932 int n;
1933 for (n = 0; n < 12; ++n)
1935 #ifdef COMPILE_WSCANF
1936 if (c == wcdigits[n])
1938 if (n < 10)
1939 ADDW (L_('0') + n);
1940 else if (n == 11 && !got_dot)
1942 ADDW (decimal);
1943 got_dot = 1;
1945 else if (n == 10 && have_locthousands
1946 && ! got_dot)
1947 ADDW (thousands);
1948 else
1949 /* The last read character is not part
1950 of the number anymore. */
1951 n = 12;
1953 break;
1955 #else
1956 const char *cmpp = mbdigits[n];
1957 int avail = width > 0 ? width : INT_MAX;
1959 while ((unsigned char) *cmpp == c && avail >= 0)
1960 if (*++cmpp == '\0')
1961 break;
1962 else
1964 if (avail == 0 || inchar () == EOF)
1965 break;
1966 --avail;
1968 if (*cmpp == '\0')
1970 if (width > 0)
1971 width = avail;
1973 if (n < 10)
1974 ADDW (L_('0') + n);
1975 else if (n == 11 && !got_dot)
1977 /* Add all the characters. */
1978 for (cmpp = decimal; *cmpp != '\0';
1979 ++cmpp)
1980 ADDW ((unsigned char) *cmpp);
1982 got_dot = 1;
1984 else if (n == 10 && (flags & GROUP) != 0
1985 && ! got_dot)
1987 /* Add all the characters. */
1988 for (cmpp = thousands; *cmpp != '\0';
1989 ++cmpp)
1990 ADDW ((unsigned char) *cmpp);
1992 else
1993 /* The last read character is not part
1994 of the number anymore. */
1995 n = 12;
1997 break;
2000 /* We are pushing all read characters back. */
2001 if (cmpp > mbdigits[n])
2003 ungetc (c, s);
2004 while (--cmpp > mbdigits[n])
2005 ungetc_not_eof ((unsigned char) *cmpp, s);
2006 c = (unsigned char) *cmpp;
2008 #endif
2011 if (n >= 12)
2013 /* The last read character is not part
2014 of the number anymore. */
2015 ungetc (c, s);
2016 break;
2020 if (width == 0 || inchar () == EOF)
2021 break;
2023 if (width > 0)
2024 --width;
2028 #ifndef COMPILE_WSCANF
2029 no_i18nflt:
2031 #endif
2034 /* Have we read any character? If we try to read a number
2035 in hexadecimal notation and we have read only the `0x'
2036 prefix this is an error. */
2037 if (__builtin_expect (wpsize == 0
2038 || ((flags & HEXA_FLOAT) && wpsize == 2), 0))
2039 conv_error ();
2041 scan_float:
2042 /* Convert the number. */
2043 ADDW (L_('\0'));
2044 if ((flags & LONGDBL) && !__ldbl_is_dbl)
2046 long double d = __strtold_internal (wp, &tw, flags & GROUP);
2047 if (!(flags & SUPPRESS) && tw != wp)
2048 *ARG (long double *) = negative ? -d : d;
2050 else if (flags & (LONG | LONGDBL))
2052 double d = __strtod_internal (wp, &tw, flags & GROUP);
2053 if (!(flags & SUPPRESS) && tw != wp)
2054 *ARG (double *) = negative ? -d : d;
2056 else
2058 float d = __strtof_internal (wp, &tw, flags & GROUP);
2059 if (!(flags & SUPPRESS) && tw != wp)
2060 *ARG (float *) = negative ? -d : d;
2063 if (__builtin_expect (tw == wp, 0))
2064 conv_error ();
2066 if (!(flags & SUPPRESS))
2067 ++done;
2068 break;
2070 case L_('['): /* Character class. */
2071 if (flags & LONG)
2072 STRING_ARG (wstr, wchar_t);
2073 else
2074 STRING_ARG (str, char);
2076 if (*f == L_('^'))
2078 ++f;
2079 not_in = 1;
2081 else
2082 not_in = 0;
2084 if (width < 0)
2085 /* There is no width given so there is also no limit on the
2086 number of characters we read. Therefore we set width to
2087 a very high value to make the algorithm easier. */
2088 width = INT_MAX;
2090 #ifdef COMPILE_WSCANF
2091 /* Find the beginning and the end of the scanlist. We are not
2092 creating a lookup table since it would have to be too large.
2093 Instead we search each time through the string. This is not
2094 a constant lookup time but who uses this feature deserves to
2095 be punished. */
2096 tw = (wchar_t *) f; /* Marks the beginning. */
2098 if (*f == L']')
2099 ++f;
2101 while ((fc = *f++) != L'\0' && fc != L']');
2103 if (__builtin_expect (fc == L'\0', 0))
2104 conv_error ();
2105 wp = (wchar_t *) f - 1;
2106 #else
2107 /* Fill WP with byte flags indexed by character.
2108 We will use this flag map for matching input characters. */
2109 if (wpmax < UCHAR_MAX + 1)
2111 wpmax = UCHAR_MAX + 1;
2112 wp = (char *) alloca (wpmax);
2114 memset (wp, '\0', UCHAR_MAX + 1);
2116 fc = *f;
2117 if (fc == ']' || fc == '-')
2119 /* If ] or - appears before any char in the set, it is not
2120 the terminator or separator, but the first char in the
2121 set. */
2122 wp[fc] = 1;
2123 ++f;
2126 while ((fc = *f++) != '\0' && fc != ']')
2127 if (fc == '-' && *f != '\0' && *f != ']'
2128 && (unsigned char) f[-2] <= (unsigned char) *f)
2130 /* Add all characters from the one before the '-'
2131 up to (but not including) the next format char. */
2132 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
2133 wp[fc] = 1;
2135 else
2136 /* Add the character to the flag map. */
2137 wp[fc] = 1;
2139 if (__builtin_expect (fc == '\0', 0))
2140 conv_error();
2141 #endif
2143 if (flags & LONG)
2145 size_t now = read_in;
2146 #ifdef COMPILE_WSCANF
2147 if (__builtin_expect (inchar () == WEOF, 0))
2148 input_error ();
2152 wchar_t *runp;
2154 /* Test whether it's in the scanlist. */
2155 runp = tw;
2156 while (runp < wp)
2158 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2159 && runp != tw
2160 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2162 /* Match against all characters in between the
2163 first and last character of the sequence. */
2164 wchar_t wc;
2166 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2167 if ((wint_t) wc == c)
2168 break;
2170 if (wc <= runp[1] && !not_in)
2171 break;
2172 if (wc <= runp[1] && not_in)
2174 /* The current character is not in the
2175 scanset. */
2176 ungetc (c, s);
2177 goto out;
2180 runp += 2;
2182 else
2184 if ((wint_t) *runp == c && !not_in)
2185 break;
2186 if ((wint_t) *runp == c && not_in)
2188 ungetc (c, s);
2189 goto out;
2192 ++runp;
2196 if (runp == wp && !not_in)
2198 ungetc (c, s);
2199 goto out;
2202 if (!(flags & SUPPRESS))
2204 *wstr++ = c;
2206 if ((flags & MALLOC)
2207 && wstr == (wchar_t *) *strptr + strsize)
2209 /* Enlarge the buffer. */
2210 wstr = (wchar_t *) realloc (*strptr,
2211 (2 * strsize)
2212 * sizeof (wchar_t));
2213 if (wstr == NULL)
2215 /* Can't allocate that much. Last-ditch
2216 effort. */
2217 wstr = (wchar_t *)
2218 realloc (*strptr, (strsize + 1)
2219 * sizeof (wchar_t));
2220 if (wstr == NULL)
2222 /* We lose. Oh well. Terminate the string
2223 and stop converting, so at least we don't
2224 skip any input. */
2225 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2226 ++done;
2227 conv_error ();
2229 else
2231 *strptr = (char *) wstr;
2232 wstr += strsize;
2233 ++strsize;
2236 else
2238 *strptr = (char *) wstr;
2239 wstr += strsize;
2240 strsize *= 2;
2245 while (--width > 0 && inchar () != WEOF);
2246 out:
2247 #else
2248 char buf[MB_LEN_MAX];
2249 size_t cnt = 0;
2250 mbstate_t cstate;
2252 if (__builtin_expect (inchar () == EOF, 0))
2253 input_error ();
2255 memset (&cstate, '\0', sizeof (cstate));
2259 if (wp[c] == not_in)
2261 ungetc_not_eof (c, s);
2262 break;
2265 /* This is easy. */
2266 if (!(flags & SUPPRESS))
2268 size_t n;
2270 /* Convert it into a wide character. */
2271 buf[0] = c;
2272 n = __mbrtowc (wstr, buf, 1, &cstate);
2274 if (n == (size_t) -2)
2276 /* Possibly correct character, just not enough
2277 input. */
2278 ++cnt;
2279 assert (cnt < MB_CUR_MAX);
2280 continue;
2282 cnt = 0;
2284 ++wstr;
2285 if ((flags & MALLOC)
2286 && wstr == (wchar_t *) *strptr + strsize)
2288 /* Enlarge the buffer. */
2289 wstr = (wchar_t *) realloc (*strptr,
2290 (2 * strsize
2291 * sizeof (wchar_t)));
2292 if (wstr == NULL)
2294 /* Can't allocate that much. Last-ditch
2295 effort. */
2296 wstr = (wchar_t *)
2297 realloc (*strptr, ((strsize + 1)
2298 * sizeof (wchar_t)));
2299 if (wstr == NULL)
2301 /* We lose. Oh well. Terminate the
2302 string and stop converting,
2303 so at least we don't skip any input. */
2304 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2305 ++done;
2306 conv_error ();
2308 else
2310 *strptr = (char *) wstr;
2311 wstr += strsize;
2312 ++strsize;
2315 else
2317 *strptr = (char *) wstr;
2318 wstr += strsize;
2319 strsize *= 2;
2324 if (--width <= 0)
2325 break;
2327 while (inchar () != EOF);
2329 if (__builtin_expect (cnt != 0, 0))
2330 /* We stopped in the middle of recognizing another
2331 character. That's a problem. */
2332 encode_error ();
2333 #endif
2335 if (__builtin_expect (now == read_in, 0))
2336 /* We haven't succesfully read any character. */
2337 conv_error ();
2339 if (!(flags & SUPPRESS))
2341 *wstr++ = L'\0';
2343 if ((flags & MALLOC)
2344 && wstr - (wchar_t *) *strptr != strsize)
2346 wchar_t *cp = (wchar_t *)
2347 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2348 * sizeof(wchar_t)));
2349 if (cp != NULL)
2350 *strptr = (char *) cp;
2353 ++done;
2356 else
2358 size_t now = read_in;
2360 if (__builtin_expect (inchar () == EOF, 0))
2361 input_error ();
2363 #ifdef COMPILE_WSCANF
2365 memset (&state, '\0', sizeof (state));
2369 wchar_t *runp;
2370 size_t n;
2372 /* Test whether it's in the scanlist. */
2373 runp = tw;
2374 while (runp < wp)
2376 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2377 && runp != tw
2378 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2380 /* Match against all characters in between the
2381 first and last character of the sequence. */
2382 wchar_t wc;
2384 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2385 if ((wint_t) wc == c)
2386 break;
2388 if (wc <= runp[1] && !not_in)
2389 break;
2390 if (wc <= runp[1] && not_in)
2392 /* The current character is not in the
2393 scanset. */
2394 ungetc (c, s);
2395 goto out2;
2398 runp += 2;
2400 else
2402 if ((wint_t) *runp == c && !not_in)
2403 break;
2404 if ((wint_t) *runp == c && not_in)
2406 ungetc (c, s);
2407 goto out2;
2410 ++runp;
2414 if (runp == wp && !not_in)
2416 ungetc (c, s);
2417 goto out2;
2420 if (!(flags & SUPPRESS))
2422 if ((flags & MALLOC)
2423 && str + MB_CUR_MAX >= *strptr + strsize)
2425 /* Enlarge the buffer. */
2426 size_t strleng = str - *strptr;
2427 char *newstr;
2429 newstr = (char *) realloc (*strptr, 2 * strsize);
2430 if (newstr == NULL)
2432 /* Can't allocate that much. Last-ditch
2433 effort. */
2434 newstr = (char *) realloc (*strptr,
2435 strleng + MB_CUR_MAX);
2436 if (newstr == NULL)
2438 /* We lose. Oh well. Terminate the string
2439 and stop converting, so at least we don't
2440 skip any input. */
2441 ((char *) (*strptr))[strleng] = '\0';
2442 ++done;
2443 conv_error ();
2445 else
2447 *strptr = newstr;
2448 str = newstr + strleng;
2449 strsize = strleng + MB_CUR_MAX;
2452 else
2454 *strptr = newstr;
2455 str = newstr + strleng;
2456 strsize *= 2;
2461 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2462 if (__builtin_expect (n == (size_t) -1, 0))
2463 encode_error ();
2465 assert (n <= MB_CUR_MAX);
2466 str += n;
2468 while (--width > 0 && inchar () != WEOF);
2469 out2:
2470 #else
2473 if (wp[c] == not_in)
2475 ungetc_not_eof (c, s);
2476 break;
2479 /* This is easy. */
2480 if (!(flags & SUPPRESS))
2482 *str++ = c;
2483 if ((flags & MALLOC)
2484 && (char *) str == *strptr + strsize)
2486 /* Enlarge the buffer. */
2487 size_t newsize = 2 * strsize;
2489 allocagain:
2490 str = (char *) realloc (*strptr, newsize);
2491 if (str == NULL)
2493 /* Can't allocate that much. Last-ditch
2494 effort. */
2495 if (newsize > strsize + 1)
2497 newsize = strsize + 1;
2498 goto allocagain;
2500 /* We lose. Oh well. Terminate the
2501 string and stop converting,
2502 so at least we don't skip any input. */
2503 ((char *) (*strptr))[strsize - 1] = '\0';
2504 ++done;
2505 conv_error ();
2507 else
2509 *strptr = (char *) str;
2510 str += strsize;
2511 strsize = newsize;
2516 while (--width > 0 && inchar () != EOF);
2517 #endif
2519 if (__builtin_expect (now == read_in, 0))
2520 /* We haven't succesfully read any character. */
2521 conv_error ();
2523 if (!(flags & SUPPRESS))
2525 #ifdef COMPILE_WSCANF
2526 /* We have to emit the code to get into the initial
2527 state. */
2528 char buf[MB_LEN_MAX];
2529 size_t n = __wcrtomb (buf, L'\0', &state);
2530 if (n > 0 && (flags & MALLOC)
2531 && str + n >= *strptr + strsize)
2533 /* Enlarge the buffer. */
2534 size_t strleng = str - *strptr;
2535 char *newstr;
2537 newstr = (char *) realloc (*strptr, strleng + n + 1);
2538 if (newstr == NULL)
2540 /* We lose. Oh well. Terminate the string
2541 and stop converting, so at least we don't
2542 skip any input. */
2543 ((char *) (*strptr))[strleng] = '\0';
2544 ++done;
2545 conv_error ();
2547 else
2549 *strptr = newstr;
2550 str = newstr + strleng;
2551 strsize = strleng + n + 1;
2555 str = __mempcpy (str, buf, n);
2556 #endif
2557 *str++ = '\0';
2559 if ((flags & MALLOC) && str - *strptr != strsize)
2561 char *cp = (char *) realloc (*strptr, str - *strptr);
2562 if (cp != NULL)
2563 *strptr = cp;
2566 ++done;
2569 break;
2571 case L_('p'): /* Generic pointer. */
2572 base = 16;
2573 /* A PTR must be the same size as a `long int'. */
2574 flags &= ~(SHORT|LONGDBL);
2575 if (need_long)
2576 flags |= LONG;
2577 flags |= READ_POINTER;
2578 goto number;
2580 default:
2581 /* If this is an unknown format character punt. */
2582 conv_error ();
2586 /* The last thing we saw int the format string was a white space.
2587 Consume the last white spaces. */
2588 if (skip_space)
2591 c = inchar ();
2592 while (ISSPACE (c));
2593 ungetc (c, s);
2596 errout:
2597 /* Unlock stream. */
2598 UNLOCK_STREAM (s);
2600 if (errp != NULL)
2601 *errp |= errval;
2603 return done;
2606 #ifdef COMPILE_WSCANF
2608 __vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
2610 return _IO_vfwscanf (s, format, argptr, NULL);
2612 ldbl_weak_alias (__vfwscanf, vfwscanf)
2613 #else
2615 ___vfscanf (FILE *s, const char *format, va_list argptr)
2617 return _IO_vfscanf_internal (s, format, argptr, NULL);
2619 ldbl_strong_alias (_IO_vfscanf_internal, _IO_vfscanf)
2620 ldbl_strong_alias (___vfscanf, __vfscanf)
2621 ldbl_hidden_def (___vfscanf, __vfscanf)
2622 ldbl_weak_alias (___vfscanf, vfscanf)
2623 #endif