2005-01-17 Roland McGrath <roland@redhat.com>
[glibc.git] / stdio-common / vfscanf.c
blobc641d2d37104c89847de88fb4749db13407d56f7
1 /* Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <wctype.h>
30 #include <bits/libc-lock.h>
31 #include <locale/localeinfo.h>
33 #ifdef __GNUC__
34 # define HAVE_LONGLONG
35 # define LONGLONG long long
36 #else
37 # define LONGLONG long
38 #endif
40 /* Determine whether we have to handle `long long' at all. */
41 #if LONG_MAX == LONG_LONG_MAX
42 # define need_longlong 0
43 #else
44 # define need_longlong 1
45 #endif
47 /* Determine whether we have to handle `long'. */
48 #if INT_MAX == LONG_MAX
49 # define need_long 0
50 #else
51 # define need_long 1
52 #endif
54 /* Those are flags in the conversion format. */
55 #define LONG 0x001 /* l: long or double */
56 #define LONGDBL 0x002 /* L: long long or long double */
57 #define SHORT 0x004 /* h: short */
58 #define SUPPRESS 0x008 /* *: suppress assignment */
59 #define POINTER 0x010 /* weird %p pointer (`fake hex') */
60 #define NOSKIP 0x020 /* do not skip blanks */
61 #define WIDTH 0x040 /* width was given */
62 #define GROUP 0x080 /* ': group numbers */
63 #define MALLOC 0x100 /* a: malloc strings */
64 #define CHAR 0x200 /* hh: char */
65 #define I18N 0x400 /* I: use locale's digits */
68 #include <locale/localeinfo.h>
69 #include <libioP.h>
70 #include <libio.h>
72 #undef va_list
73 #define va_list _IO_va_list
75 #ifdef COMPILE_WSCANF
76 # define ungetc(c, s) ((void) (c == WEOF \
77 || (--read_in, \
78 INTUSE(_IO_sputbackwc) (s, c))))
79 # define ungetc_not_eof(c, s) ((void) (--read_in, \
80 INTUSE(_IO_sputbackwc) (s, c)))
81 # define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
82 : ((c = _IO_getwc_unlocked (s)), \
83 (void) (c != WEOF \
84 ? ++read_in \
85 : (size_t) (inchar_errno = errno)), c))
87 # define MEMCPY(d, s, n) __wmemcpy (d, s, n)
88 # define ISSPACE(Ch) iswspace (Ch)
89 # define ISDIGIT(Ch) iswdigit (Ch)
90 # define ISXDIGIT(Ch) iswxdigit (Ch)
91 # define TOLOWER(Ch) towlower (Ch)
92 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
93 # define __strtoll_internal __wcstoll_internal
94 # define __strtoull_internal __wcstoull_internal
95 # define __strtol_internal __wcstol_internal
96 # define __strtoul_internal __wcstoul_internal
97 # define __strtold_internal __wcstold_internal
98 # define __strtod_internal __wcstod_internal
99 # define __strtof_internal __wcstof_internal
101 # define L_(Str) L##Str
102 # define CHAR_T wchar_t
103 # define UCHAR_T unsigned int
104 # define WINT_T wint_t
105 # undef EOF
106 # define EOF WEOF
107 #else
108 # define ungetc(c, s) ((void) ((int) c == EOF \
109 || (--read_in, \
110 INTUSE(_IO_sputbackc) (s, (unsigned char) c))))
111 # define ungetc_not_eof(c, s) ((void) (--read_in, \
112 INTUSE(_IO_sputbackc) (s, (unsigned char) c)))
113 # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
114 : ((c = _IO_getc_unlocked (s)), \
115 (void) (c != EOF \
116 ? ++read_in \
117 : (size_t) (inchar_errno = errno)), c))
118 # define MEMCPY(d, s, n) memcpy (d, s, n)
119 # define ISSPACE(Ch) __isspace_l (Ch, loc)
120 # define ISDIGIT(Ch) __isdigit_l (Ch, loc)
121 # define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
122 # define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
123 # define ORIENT if (_IO_vtable_offset (s) == 0 \
124 && _IO_fwide (s, -1) != -1) \
125 return EOF
127 # define L_(Str) Str
128 # define CHAR_T char
129 # define UCHAR_T unsigned char
130 # define WINT_T int
131 #endif
133 #define encode_error() do { \
134 errval = 4; \
135 __set_errno (EILSEQ); \
136 goto errout; \
137 } while (0)
138 #define conv_error() do { \
139 errval = 2; \
140 goto errout; \
141 } while (0)
142 #define input_error() do { \
143 errval = 1; \
144 if (done == 0) done = EOF; \
145 goto errout; \
146 } while (0)
147 #define memory_error() do { \
148 __set_errno (ENOMEM); \
149 done = EOF; \
150 goto errout; \
151 } while (0)
152 #define ARGCHECK(s, format) \
153 do \
155 /* Check file argument for consistence. */ \
156 CHECK_FILE (s, EOF); \
157 if (s->_flags & _IO_NO_READS) \
159 __set_errno (EBADF); \
160 return EOF; \
162 else if (format == NULL) \
164 MAYBE_SET_EINVAL; \
165 return EOF; \
167 } while (0)
168 #define LOCK_STREAM(S) \
169 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
170 _IO_flockfile (S)
171 #define UNLOCK_STREAM(S) \
172 _IO_funlockfile (S); \
173 __libc_cleanup_region_end (0)
176 /* Read formatted input from S according to the format string
177 FORMAT, using the argument list in ARG.
178 Return the number of assignments made, or -1 for an input error. */
179 #ifdef COMPILE_WSCANF
181 _IO_vfwscanf (s, format, argptr, errp)
182 _IO_FILE *s;
183 const wchar_t *format;
184 _IO_va_list argptr;
185 int *errp;
186 #else
188 _IO_vfscanf (s, format, argptr, errp)
189 _IO_FILE *s;
190 const char *format;
191 _IO_va_list argptr;
192 int *errp;
193 #endif
195 va_list arg;
196 register const CHAR_T *f = format;
197 register UCHAR_T fc; /* Current character of the format. */
198 register WINT_T done = 0; /* Assignments done. */
199 register size_t read_in = 0; /* Chars read in. */
200 register WINT_T c = 0; /* Last char read. */
201 register int width; /* Maximum field width. */
202 register int flags; /* Modifiers for current format element. */
203 int errval = 0;
204 #ifndef COMPILE_WSCANF
205 __locale_t loc = _NL_CURRENT_LOCALE;
206 struct locale_data *const curctype = loc->__locales[LC_CTYPE];
207 #endif
209 /* Errno of last failed inchar call. */
210 int inchar_errno = 0;
211 /* Status for reading F-P nums. */
212 char got_dot, got_e, negative;
213 /* If a [...] is a [^...]. */
214 CHAR_T not_in;
215 #define exp_char not_in
216 /* Base for integral numbers. */
217 int base;
218 /* Signedness for integral numbers. */
219 int number_signed;
220 #define is_hexa number_signed
221 /* Decimal point character. */
222 #ifdef COMPILE_WSCANF
223 wint_t decimal;
224 #else
225 const char *decimal;
226 #endif
227 /* The thousands character of the current locale. */
228 #ifdef COMPILE_WSCANF
229 wint_t thousands;
230 #else
231 const char *thousands;
232 #endif
233 /* State for the conversions. */
234 mbstate_t state;
235 /* Integral holding variables. */
236 union
238 long long int q;
239 unsigned long long int uq;
240 long int l;
241 unsigned long int ul;
242 } num;
243 /* Character-buffer pointer. */
244 char *str = NULL;
245 wchar_t *wstr = NULL;
246 char **strptr = NULL;
247 ssize_t strsize = 0;
248 /* We must not react on white spaces immediately because they can
249 possibly be matched even if in the input stream no character is
250 available anymore. */
251 int skip_space = 0;
252 /* Nonzero if we are reading a pointer. */
253 int read_pointer;
254 /* Workspace. */
255 CHAR_T *tw; /* Temporary pointer. */
256 CHAR_T *wp = NULL; /* Workspace. */
257 size_t wpmax = 0; /* Maximal size of workspace. */
258 size_t wpsize; /* Currently used bytes in workspace. */
259 #define ADDW(Ch) \
260 do \
262 if (wpsize == wpmax) \
264 CHAR_T *old = wp; \
265 wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax); \
266 wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t)); \
267 if (old != NULL) \
268 MEMCPY (wp, old, wpsize); \
270 wp[wpsize++] = (Ch); \
272 while (0)
274 #ifdef __va_copy
275 __va_copy (arg, argptr);
276 #else
277 arg = (va_list) argptr;
278 #endif
280 #ifdef ORIENT
281 ORIENT;
282 #endif
284 ARGCHECK (s, format);
287 #ifndef COMPILE_WSCANF
288 struct locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
289 #endif
291 /* Figure out the decimal point character. */
292 #ifdef COMPILE_WSCANF
293 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
294 #else
295 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
296 #endif
297 /* Figure out the thousands separator character. */
298 #ifdef COMPILE_WSCANF
299 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
300 #else
301 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
302 if (*thousands == '\0')
303 thousands = NULL;
304 #endif
307 /* Lock the stream. */
308 LOCK_STREAM (s);
311 #ifndef COMPILE_WSCANF
312 /* From now on we use `state' to convert the format string. */
313 memset (&state, '\0', sizeof (state));
314 #endif
316 /* Run through the format string. */
317 while (*f != '\0')
319 unsigned int argpos;
320 /* Extract the next argument, which is of type TYPE.
321 For a %N$... spec, this is the Nth argument from the beginning;
322 otherwise it is the next argument after the state now in ARG. */
323 #ifdef __va_copy
324 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
325 ({ unsigned int pos = argpos; \
326 va_list arg; \
327 __va_copy (arg, argptr); \
328 while (--pos > 0) \
329 (void) va_arg (arg, void *); \
330 va_arg (arg, type); \
332 #else
333 # if 0
334 /* XXX Possible optimization. */
335 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
336 ({ va_list arg = (va_list) argptr; \
337 arg = (va_list) ((char *) arg \
338 + (argpos - 1) \
339 * __va_rounded_size (void *)); \
340 va_arg (arg, type); \
342 # else
343 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
344 ({ unsigned int pos = argpos; \
345 va_list arg = (va_list) argptr; \
346 while (--pos > 0) \
347 (void) va_arg (arg, void *); \
348 va_arg (arg, type); \
350 # endif
351 #endif
353 #ifndef COMPILE_WSCANF
354 if (!isascii ((unsigned char) *f))
356 /* Non-ASCII, may be a multibyte. */
357 int len = __mbrlen (f, strlen (f), &state);
358 if (len > 0)
362 c = inchar ();
363 if (c == EOF)
364 input_error ();
365 else if (c != (unsigned char) *f++)
367 ungetc_not_eof (c, s);
368 conv_error ();
371 while (--len > 0);
372 continue;
375 #endif
377 fc = *f++;
378 if (fc != '%')
380 /* Remember to skip spaces. */
381 if (ISSPACE (fc))
383 skip_space = 1;
384 continue;
387 /* Read a character. */
388 c = inchar ();
390 /* Characters other than format specs must just match. */
391 if (c == EOF)
392 input_error ();
394 /* We saw white space char as the last character in the format
395 string. Now it's time to skip all leading white space. */
396 if (skip_space)
398 while (ISSPACE (c))
399 if (inchar () == EOF)
400 input_error ();
401 skip_space = 0;
404 if (c != fc)
406 ungetc (c, s);
407 conv_error ();
410 continue;
413 /* This is the start of the conversion string. */
414 flags = 0;
416 /* Not yet decided whether we read a pointer or not. */
417 read_pointer = 0;
419 /* Initialize state of modifiers. */
420 argpos = 0;
422 /* Prepare temporary buffer. */
423 wpsize = 0;
425 /* Check for a positional parameter specification. */
426 if (ISDIGIT ((UCHAR_T) *f))
428 argpos = (UCHAR_T) *f++ - L_('0');
429 while (ISDIGIT ((UCHAR_T) *f))
430 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
431 if (*f == L_('$'))
432 ++f;
433 else
435 /* Oops; that was actually the field width. */
436 width = argpos;
437 flags |= WIDTH;
438 argpos = 0;
439 goto got_width;
443 /* Check for the assignment-suppressing, the number grouping flag,
444 and the signal to use the locale's digit representation. */
445 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
446 switch (*f++)
448 case L_('*'):
449 flags |= SUPPRESS;
450 break;
451 case L_('\''):
452 flags |= GROUP;
453 break;
454 case L_('I'):
455 flags |= I18N;
456 break;
459 /* We have seen width. */
460 if (ISDIGIT ((UCHAR_T) *f))
461 flags |= WIDTH;
463 /* Find the maximum field width. */
464 width = 0;
465 while (ISDIGIT ((UCHAR_T) *f))
467 width *= 10;
468 width += (UCHAR_T) *f++ - L_('0');
470 got_width:
471 if (width == 0)
472 width = -1;
474 /* Check for type modifiers. */
475 switch (*f++)
477 case L_('h'):
478 /* ints are short ints or chars. */
479 if (*f == L_('h'))
481 ++f;
482 flags |= CHAR;
484 else
485 flags |= SHORT;
486 break;
487 case L_('l'):
488 if (*f == L_('l'))
490 /* A double `l' is equivalent to an `L'. */
491 ++f;
492 flags |= LONGDBL | LONG;
494 else
495 /* ints are long ints. */
496 flags |= LONG;
497 break;
498 case L_('q'):
499 case L_('L'):
500 /* doubles are long doubles, and ints are long long ints. */
501 flags |= LONGDBL | LONG;
502 break;
503 case L_('a'):
504 /* The `a' is used as a flag only if followed by `s', `S' or
505 `['. */
506 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
508 --f;
509 break;
511 /* String conversions (%s, %[) take a `char **'
512 arg and fill it in with a malloc'd pointer. */
513 flags |= MALLOC;
514 break;
515 case L_('z'):
516 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
517 flags |= LONGDBL;
518 else if (sizeof (size_t) > sizeof (unsigned int))
519 flags |= LONG;
520 break;
521 case L_('j'):
522 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
523 flags |= LONGDBL;
524 else if (sizeof (uintmax_t) > sizeof (unsigned int))
525 flags |= LONG;
526 break;
527 case L_('t'):
528 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
529 flags |= LONGDBL;
530 else if (sizeof (ptrdiff_t) > sizeof (int))
531 flags |= LONG;
532 break;
533 default:
534 /* Not a recognized modifier. Backup. */
535 --f;
536 break;
539 /* End of the format string? */
540 if (*f == L_('\0'))
541 conv_error ();
543 /* Find the conversion specifier. */
544 fc = *f++;
545 if (skip_space || (fc != L_('[') && fc != L_('c')
546 && fc != L_('C') && fc != L_('n')))
548 /* Eat whitespace. */
549 int save_errno = errno;
550 errno = 0;
552 if (inchar () == EOF && errno == EINTR)
553 input_error ();
554 while (ISSPACE (c));
555 errno = save_errno;
556 ungetc (c, s);
557 skip_space = 0;
560 switch (fc)
562 case L_('%'): /* Must match a literal '%'. */
563 c = inchar ();
564 if (c == EOF)
565 input_error ();
566 if (c != fc)
568 ungetc_not_eof (c, s);
569 conv_error ();
571 break;
573 case L_('n'): /* Answer number of assignments done. */
574 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
575 with the 'n' conversion specifier. */
576 if (!(flags & SUPPRESS))
578 /* Don't count the read-ahead. */
579 if (need_longlong && (flags & LONGDBL))
580 *ARG (long long int *) = read_in;
581 else if (need_long && (flags & LONG))
582 *ARG (long int *) = read_in;
583 else if (flags & SHORT)
584 *ARG (short int *) = read_in;
585 else if (!(flags & CHAR))
586 *ARG (int *) = read_in;
587 else
588 *ARG (char *) = read_in;
590 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
591 /* We have a severe problem here. The ISO C standard
592 contradicts itself in explaining the effect of the %n
593 format in `scanf'. While in ISO C:1990 and the ISO C
594 Amendement 1:1995 the result is described as
596 Execution of a %n directive does not effect the
597 assignment count returned at the completion of
598 execution of the f(w)scanf function.
600 in ISO C Corrigendum 1:1994 the following was added:
602 Subclause 7.9.6.2
603 Add the following fourth example:
605 #include <stdio.h>
606 int d1, d2, n1, n2, i;
607 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
608 the value 123 is assigned to d1 and the value3 to n1.
609 Because %n can never get an input failure the value
610 of 3 is also assigned to n2. The value of d2 is not
611 affected. The value 3 is assigned to i.
613 We go for now with the historically correct code from ISO C,
614 i.e., we don't count the %n assignments. When it ever
615 should proof to be wrong just remove the #ifdef above. */
616 ++done;
617 #endif
619 break;
621 case L_('c'): /* Match characters. */
622 if ((flags & LONG) == 0)
624 if (!(flags & SUPPRESS))
626 str = ARG (char *);
627 if (str == NULL)
628 conv_error ();
631 c = inchar ();
632 if (c == EOF)
633 input_error ();
635 if (width == -1)
636 width = 1;
638 #ifdef COMPILE_WSCANF
639 /* We have to convert the wide character(s) into multibyte
640 characters and store the result. */
641 memset (&state, '\0', sizeof (state));
645 size_t n;
647 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
648 if (n == (size_t) -1)
649 /* No valid wide character. */
650 input_error ();
652 /* Increment the output pointer. Even if we don't
653 write anything. */
654 str += n;
656 while (--width > 0 && inchar () != EOF);
657 #else
658 if (!(flags & SUPPRESS))
661 *str++ = c;
662 while (--width > 0 && inchar () != EOF);
664 else
665 while (--width > 0 && inchar () != EOF);
666 #endif
668 if (!(flags & SUPPRESS))
669 ++done;
671 break;
673 /* FALLTHROUGH */
674 case L_('C'):
675 if (!(flags & SUPPRESS))
677 wstr = ARG (wchar_t *);
678 if (wstr == NULL)
679 conv_error ();
682 c = inchar ();
683 if (c == EOF)
684 input_error ();
686 #ifdef COMPILE_WSCANF
687 /* Just store the incoming wide characters. */
688 if (!(flags & SUPPRESS))
691 *wstr++ = c;
692 while (--width > 0 && inchar () != EOF);
694 else
695 while (--width > 0 && inchar () != EOF);
696 #else
698 /* We have to convert the multibyte input sequence to wide
699 characters. */
700 char buf[1];
701 mbstate_t cstate;
703 memset (&cstate, '\0', sizeof (cstate));
707 /* This is what we present the mbrtowc function first. */
708 buf[0] = c;
710 while (1)
712 size_t n;
714 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
715 buf, 1, &cstate);
717 if (n == (size_t) -2)
719 /* Possibly correct character, just not enough
720 input. */
721 if (inchar () == EOF)
722 encode_error ();
724 buf[0] = c;
725 continue;
728 if (n != 1)
729 encode_error ();
731 /* We have a match. */
732 break;
735 /* Advance the result pointer. */
736 ++wstr;
738 while (--width > 0 && inchar () != EOF);
740 #endif
742 if (!(flags & SUPPRESS))
743 ++done;
745 break;
747 case L_('s'): /* Read a string. */
748 if (!(flags & LONG))
750 #define STRING_ARG(Str, Type) \
751 do if (!(flags & SUPPRESS)) \
753 if (flags & MALLOC) \
755 /* The string is to be stored in a malloc'd buffer. */ \
756 strptr = ARG (char **); \
757 if (strptr == NULL) \
758 conv_error (); \
759 /* Allocate an initial buffer. */ \
760 strsize = 100; \
761 *strptr = (char *) malloc (strsize * sizeof (Type)); \
762 Str = (Type *) *strptr; \
764 else \
765 Str = ARG (Type *); \
766 if (Str == NULL) \
767 conv_error (); \
768 } while (0)
769 STRING_ARG (str, char);
771 c = inchar ();
772 if (c == EOF)
773 input_error ();
775 #ifdef COMPILE_WSCANF
776 memset (&state, '\0', sizeof (state));
777 #endif
781 if (ISSPACE (c))
783 ungetc_not_eof (c, s);
784 break;
787 #ifdef COMPILE_WSCANF
788 /* This is quite complicated. We have to convert the
789 wide characters into multibyte characters and then
790 store them. */
792 size_t n;
794 if (!(flags & SUPPRESS) && (flags & MALLOC)
795 && str + MB_CUR_MAX >= *strptr + strsize)
797 /* We have to enlarge the buffer if the `a' flag
798 was given. */
799 size_t strleng = str - *strptr;
800 char *newstr;
802 newstr = (char *) realloc (*strptr, strsize * 2);
803 if (newstr == NULL)
805 /* Can't allocate that much. Last-ditch
806 effort. */
807 newstr = (char *) realloc (*strptr,
808 strleng + MB_CUR_MAX);
809 if (newstr == NULL)
811 /* We lose. Oh well. Terminate the
812 string and stop converting,
813 so at least we don't skip any input. */
814 ((char *) (*strptr))[strleng] = '\0';
815 ++done;
816 conv_error ();
818 else
820 *strptr = newstr;
821 str = newstr + strleng;
822 strsize = strleng + MB_CUR_MAX;
825 else
827 *strptr = newstr;
828 str = newstr + strleng;
829 strsize *= 2;
833 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
834 &state);
835 if (n == (size_t) -1)
836 encode_error ();
838 assert (n <= MB_CUR_MAX);
839 str += n;
841 #else
842 /* This is easy. */
843 if (!(flags & SUPPRESS))
845 *str++ = c;
846 if ((flags & MALLOC)
847 && (char *) str == *strptr + strsize)
849 /* Enlarge the buffer. */
850 str = (char *) realloc (*strptr, 2 * strsize);
851 if (str == NULL)
853 /* Can't allocate that much. Last-ditch
854 effort. */
855 str = (char *) realloc (*strptr, strsize + 1);
856 if (str == NULL)
858 /* We lose. Oh well. Terminate the
859 string and stop converting,
860 so at least we don't skip any input. */
861 ((char *) (*strptr))[strsize - 1] = '\0';
862 ++done;
863 conv_error ();
865 else
867 *strptr = (char *) str;
868 str += strsize;
869 ++strsize;
872 else
874 *strptr = (char *) str;
875 str += strsize;
876 strsize *= 2;
880 #endif
882 while ((width <= 0 || --width > 0) && inchar () != EOF);
884 if (!(flags & SUPPRESS))
886 #ifdef COMPILE_WSCANF
887 /* We have to emit the code to get into the initial
888 state. */
889 char buf[MB_LEN_MAX];
890 size_t n = __wcrtomb (buf, L'\0', &state);
891 if (n > 0 && (flags & MALLOC)
892 && str + n >= *strptr + strsize)
894 /* Enlarge the buffer. */
895 size_t strleng = str - *strptr;
896 char *newstr;
898 newstr = (char *) realloc (*strptr, strleng + n + 1);
899 if (newstr == NULL)
901 /* We lose. Oh well. Terminate the string
902 and stop converting, so at least we don't
903 skip any input. */
904 ((char *) (*strptr))[strleng] = '\0';
905 ++done;
906 conv_error ();
908 else
910 *strptr = newstr;
911 str = newstr + strleng;
912 strsize = strleng + n + 1;
916 str = __mempcpy (str, buf, n);
917 #endif
918 *str++ = '\0';
920 if ((flags & MALLOC) && str - *strptr != strsize)
922 char *cp = (char *) realloc (*strptr, str - *strptr);
923 if (cp != NULL)
924 *strptr = cp;
927 ++done;
929 break;
931 /* FALLTHROUGH */
933 case L_('S'):
935 #ifndef COMPILE_WSCANF
936 mbstate_t cstate;
937 #endif
939 /* Wide character string. */
940 STRING_ARG (wstr, wchar_t);
942 c = inchar ();
943 if (c == EOF)
944 input_error ();
946 #ifndef COMPILE_WSCANF
947 memset (&cstate, '\0', sizeof (cstate));
948 #endif
952 if (ISSPACE (c))
954 ungetc_not_eof (c, s);
955 break;
958 #ifdef COMPILE_WSCANF
959 /* This is easy. */
960 if (!(flags & SUPPRESS))
962 *wstr++ = c;
963 if ((flags & MALLOC)
964 && wstr == (wchar_t *) *strptr + strsize)
966 /* Enlarge the buffer. */
967 wstr = (wchar_t *) realloc (*strptr,
968 (2 * strsize)
969 * sizeof (wchar_t));
970 if (wstr == NULL)
972 /* Can't allocate that much. Last-ditch
973 effort. */
974 wstr = (wchar_t *) realloc (*strptr,
975 (strsize + 1)
976 * sizeof (wchar_t));
977 if (wstr == NULL)
979 /* We lose. Oh well. Terminate the string
980 and stop converting, so at least we don't
981 skip any input. */
982 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
983 ++done;
984 conv_error ();
986 else
988 *strptr = (char *) wstr;
989 wstr += strsize;
990 ++strsize;
993 else
995 *strptr = (char *) wstr;
996 wstr += strsize;
997 strsize *= 2;
1001 #else
1003 char buf[1];
1005 buf[0] = c;
1007 while (1)
1009 size_t n;
1011 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1012 buf, 1, &cstate);
1014 if (n == (size_t) -2)
1016 /* Possibly correct character, just not enough
1017 input. */
1018 if (inchar () == EOF)
1019 encode_error ();
1021 buf[0] = c;
1022 continue;
1025 if (n != 1)
1026 encode_error ();
1028 /* We have a match. */
1029 ++wstr;
1030 break;
1033 if (!(flags & SUPPRESS) && (flags & MALLOC)
1034 && wstr == (wchar_t *) *strptr + strsize)
1036 /* Enlarge the buffer. */
1037 wstr = (wchar_t *) realloc (*strptr,
1038 (2 * strsize
1039 * sizeof (wchar_t)));
1040 if (wstr == NULL)
1042 /* Can't allocate that much. Last-ditch effort. */
1043 wstr = (wchar_t *) realloc (*strptr,
1044 ((strsize + 1)
1045 * sizeof (wchar_t)));
1046 if (wstr == NULL)
1048 /* We lose. Oh well. Terminate the
1049 string and stop converting, so at
1050 least we don't skip any input. */
1051 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1052 ++done;
1053 conv_error ();
1055 else
1057 *strptr = (char *) wstr;
1058 wstr += strsize;
1059 ++strsize;
1062 else
1064 *strptr = (char *) wstr;
1065 wstr += strsize;
1066 strsize *= 2;
1070 #endif
1072 while ((width <= 0 || --width > 0) && inchar () != EOF);
1074 if (!(flags & SUPPRESS))
1076 *wstr++ = L'\0';
1078 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1080 wchar_t *cp = (wchar_t *) realloc (*strptr,
1081 ((wstr
1082 - (wchar_t *) *strptr)
1083 * sizeof(wchar_t)));
1084 if (cp != NULL)
1085 *strptr = (char *) cp;
1088 ++done;
1091 break;
1093 case L_('x'): /* Hexadecimal integer. */
1094 case L_('X'): /* Ditto. */
1095 base = 16;
1096 number_signed = 0;
1097 goto number;
1099 case L_('o'): /* Octal integer. */
1100 base = 8;
1101 number_signed = 0;
1102 goto number;
1104 case L_('u'): /* Unsigned decimal integer. */
1105 base = 10;
1106 number_signed = 0;
1107 goto number;
1109 case L_('d'): /* Signed decimal integer. */
1110 base = 10;
1111 number_signed = 1;
1112 goto number;
1114 case L_('i'): /* Generic number. */
1115 base = 0;
1116 number_signed = 1;
1118 number:
1119 c = inchar ();
1120 if (c == EOF)
1121 input_error ();
1123 /* Check for a sign. */
1124 if (c == L_('-') || c == L_('+'))
1126 ADDW (c);
1127 if (width > 0)
1128 --width;
1129 c = inchar ();
1132 /* Look for a leading indication of base. */
1133 if (width != 0 && c == L_('0'))
1135 if (width > 0)
1136 --width;
1138 ADDW (c);
1139 c = inchar ();
1141 if (width != 0 && TOLOWER (c) == L_('x'))
1143 if (base == 0)
1144 base = 16;
1145 if (base == 16)
1147 if (width > 0)
1148 --width;
1149 c = inchar ();
1152 else if (base == 0)
1153 base = 8;
1156 if (base == 0)
1157 base = 10;
1159 if (base == 10 && (flags & I18N) != 0)
1161 int from_level;
1162 int to_level;
1163 int level;
1164 #ifdef COMPILE_WSCANF
1165 const wchar_t *wcdigits[10];
1166 const wchar_t *wcdigits_extended[10];
1167 #else
1168 const char *mbdigits[10];
1169 const char *mbdigits_extended[10];
1170 #endif
1171 /* "to_inpunct" is a map from ASCII digits to their
1172 equivalent in locale. This is defined for locales
1173 which use an extra digits set. */
1174 wctrans_t map = __wctrans ("to_inpunct");
1175 int n;
1177 from_level = 0;
1178 #ifdef COMPILE_WSCANF
1179 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1180 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1181 #else
1182 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1183 #endif
1185 /* Get the alternative digit forms if there are any. */
1186 if (__builtin_expect (map != NULL, 0))
1188 /* Adding new level for extra digits set in locale file. */
1189 ++to_level;
1191 for (n = 0; n < 10; ++n)
1193 #ifdef COMPILE_WSCANF
1194 wcdigits[n] = (const wchar_t *)
1195 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1197 wchar_t *wc_extended = (wchar_t *)
1198 alloca ((to_level + 2) * sizeof (wchar_t));
1199 __wmemcpy (wc_extended, wcdigits[n], to_level);
1200 wc_extended[to_level] = __towctrans (L'0' + n, map);
1201 wc_extended[to_level + 1] = '\0';
1202 wcdigits_extended[n] = wc_extended;
1203 #else
1204 mbdigits[n]
1205 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1207 /* Get the equivalent wide char in map. */
1208 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1210 /* Convert it to multibyte representation. */
1211 mbstate_t state;
1212 memset (&state, '\0', sizeof (state));
1214 char extra_mbdigit[MB_LEN_MAX];
1215 size_t mblen
1216 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1218 if (mblen == (size_t) -1)
1220 /* Ignore this new level. */
1221 map = NULL;
1222 break;
1225 /* Calculate the length of mbdigits[n]. */
1226 const char *last_char = mbdigits[n];
1227 for (level = 0; level < to_level; ++level)
1228 last_char = strchr (last_char, '\0') + 1;
1230 size_t mbdigits_len = last_char - mbdigits[n];
1232 /* Allocate memory for extended multibyte digit. */
1233 char *mb_extended;
1234 mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
1236 /* And get the mbdigits + extra_digit string. */
1237 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1238 mbdigits_len),
1239 extra_mbdigit, mblen) = '\0';
1240 mbdigits_extended[n] = mb_extended;
1241 #endif
1245 /* Read the number into workspace. */
1246 while (c != EOF && width != 0)
1248 /* In this round we get the pointer to the digit strings
1249 and also perform the first round of comparisons. */
1250 for (n = 0; n < 10; ++n)
1252 /* Get the string for the digits with value N. */
1253 #ifdef COMPILE_WSCANF
1254 if (__builtin_expect (map != NULL, 0))
1255 wcdigits[n] = wcdigits_extended[n];
1256 else
1257 wcdigits[n] = (const wchar_t *)
1258 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1259 wcdigits[n] += from_level;
1261 if (c == (wint_t) *wcdigits[n])
1263 to_level = from_level;
1264 break;
1267 /* Advance the pointer to the next string. */
1268 ++wcdigits[n];
1269 #else
1270 const char *cmpp;
1271 int avail = width > 0 ? width : INT_MAX;
1273 if (__builtin_expect (map != NULL, 0))
1274 mbdigits[n] = mbdigits_extended[n];
1275 else
1276 mbdigits[n]
1277 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1279 for (level = 0; level < from_level; level++)
1280 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1282 cmpp = mbdigits[n];
1283 while ((unsigned char) *cmpp == c && avail > 0)
1285 if (*++cmpp == '\0')
1286 break;
1287 else
1289 if ((c = inchar ()) == EOF)
1290 break;
1291 --avail;
1295 if (*cmpp == '\0')
1297 if (width > 0)
1298 width = avail;
1299 to_level = from_level;
1300 break;
1303 /* We are pushing all read characters back. */
1304 if (cmpp > mbdigits[n])
1306 ungetc (c, s);
1307 while (--cmpp > mbdigits[n])
1308 ungetc_not_eof ((unsigned char) *cmpp, s);
1309 c = (unsigned char) *cmpp;
1312 /* Advance the pointer to the next string. */
1313 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1314 #endif
1317 if (n == 10)
1319 /* Have not yet found the digit. */
1320 for (level = from_level + 1; level <= to_level; ++level)
1322 /* Search all ten digits of this level. */
1323 for (n = 0; n < 10; ++n)
1325 #ifdef COMPILE_WSCANF
1326 if (c == (wint_t) *wcdigits[n])
1327 break;
1329 /* Advance the pointer to the next string. */
1330 ++wcdigits[n];
1331 #else
1332 const char *cmpp;
1333 int avail = width > 0 ? width : INT_MAX;
1335 cmpp = mbdigits[n];
1336 while ((unsigned char) *cmpp == c && avail > 0)
1338 if (*++cmpp == '\0')
1339 break;
1340 else
1342 if ((c = inchar ()) == EOF)
1343 break;
1344 --avail;
1348 if (*cmpp == '\0')
1350 if (width > 0)
1351 width = avail;
1352 break;
1355 /* We are pushing all read characters back. */
1356 if (cmpp > mbdigits[n])
1358 ungetc (c, s);
1359 while (--cmpp > mbdigits[n])
1360 ungetc_not_eof ((unsigned char) *cmpp, s);
1361 c = (unsigned char) *cmpp;
1364 /* Advance the pointer to the next string. */
1365 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1366 #endif
1369 if (n < 10)
1371 /* Found it. */
1372 from_level = level;
1373 to_level = level;
1374 break;
1379 if (n < 10)
1380 c = L_('0') + n;
1381 else if ((flags & GROUP)
1382 #ifdef COMPILE_WSCANF
1383 && thousands != L'\0'
1384 #else
1385 && thousands != NULL
1386 #endif
1389 /* Try matching against the thousands separator. */
1390 #ifdef COMPILE_WSCANF
1391 if (c != thousands)
1392 break;
1393 #else
1394 const char *cmpp = thousands;
1395 int avail = width > 0 ? width : INT_MAX;
1397 while ((unsigned char) *cmpp == c && avail > 0)
1399 ADDW (c);
1400 if (*++cmpp == '\0')
1401 break;
1402 else
1404 if ((c = inchar ()) == EOF)
1405 break;
1406 --avail;
1410 if (*cmpp != '\0')
1412 /* We are pushing all read characters back. */
1413 if (cmpp > thousands)
1415 wpsize -= cmpp - thousands;
1416 ungetc (c, s);
1417 while (--cmpp > thousands)
1418 ungetc_not_eof ((unsigned char) *cmpp, s);
1419 c = (unsigned char) *cmpp;
1421 break;
1424 if (width > 0)
1425 width = avail;
1427 /* The last thousands character will be added back by
1428 the ADDW below. */
1429 --wpsize;
1430 #endif
1432 else
1433 break;
1435 ADDW (c);
1436 if (width > 0)
1437 --width;
1439 c = inchar ();
1442 else
1443 /* Read the number into workspace. */
1444 while (c != EOF && width != 0)
1446 if (base == 16)
1448 if (!ISXDIGIT (c))
1449 break;
1451 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1453 if (base == 10 && (flags & GROUP)
1454 #ifdef COMPILE_WSCANF
1455 && thousands != L'\0'
1456 #else
1457 && thousands != NULL
1458 #endif
1461 /* Try matching against the thousands separator. */
1462 #ifdef COMPILE_WSCANF
1463 if (c != thousands)
1464 break;
1465 #else
1466 const char *cmpp = thousands;
1467 int avail = width > 0 ? width : INT_MAX;
1469 while ((unsigned char) *cmpp == c && avail > 0)
1471 ADDW (c);
1472 if (*++cmpp == '\0')
1473 break;
1474 else
1476 if ((c = inchar ()) == EOF)
1477 break;
1478 --avail;
1482 if (*cmpp != '\0')
1484 /* We are pushing all read characters back. */
1485 if (cmpp > thousands)
1487 wpsize -= cmpp - thousands;
1488 ungetc (c, s);
1489 while (--cmpp > thousands)
1490 ungetc_not_eof ((unsigned char) *cmpp, s);
1491 c = (unsigned char) *cmpp;
1493 break;
1496 if (width > 0)
1497 width = avail;
1499 /* The last thousands character will be added back by
1500 the ADDW below. */
1501 --wpsize;
1502 #endif
1504 else
1505 break;
1507 ADDW (c);
1508 if (width > 0)
1509 --width;
1511 c = inchar ();
1514 if (wpsize == 0
1515 || (wpsize == 1 && (wp[0] == L_('+') || wp[0] == L_('-'))))
1517 /* There was no number. If we are supposed to read a pointer
1518 we must recognize "(nil)" as well. */
1519 if (wpsize == 0 && read_pointer && (width < 0 || width >= 0)
1520 && c == '('
1521 && TOLOWER (inchar ()) == L_('n')
1522 && TOLOWER (inchar ()) == L_('i')
1523 && TOLOWER (inchar ()) == L_('l')
1524 && inchar () == L_(')'))
1525 /* We must produce the value of a NULL pointer. A single
1526 '0' digit is enough. */
1527 ADDW (L_('0'));
1528 else
1530 /* The last read character is not part of the number
1531 anymore. */
1532 ungetc (c, s);
1534 conv_error ();
1537 else
1538 /* The just read character is not part of the number anymore. */
1539 ungetc (c, s);
1541 /* Convert the number. */
1542 ADDW (L_('\0'));
1543 if (need_longlong && (flags & LONGDBL))
1545 if (number_signed)
1546 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
1547 else
1548 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
1550 else
1552 if (number_signed)
1553 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
1554 else
1555 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
1557 if (wp == tw)
1558 conv_error ();
1560 if (!(flags & SUPPRESS))
1562 if (! number_signed)
1564 if (need_longlong && (flags & LONGDBL))
1565 *ARG (unsigned LONGLONG int *) = num.uq;
1566 else if (need_long && (flags & LONG))
1567 *ARG (unsigned long int *) = num.ul;
1568 else if (flags & SHORT)
1569 *ARG (unsigned short int *)
1570 = (unsigned short int) num.ul;
1571 else if (!(flags & CHAR))
1572 *ARG (unsigned int *) = (unsigned int) num.ul;
1573 else
1574 *ARG (unsigned char *) = (unsigned char) num.ul;
1576 else
1578 if (need_longlong && (flags & LONGDBL))
1579 *ARG (LONGLONG int *) = num.q;
1580 else if (need_long && (flags & LONG))
1581 *ARG (long int *) = num.l;
1582 else if (flags & SHORT)
1583 *ARG (short int *) = (short int) num.l;
1584 else if (!(flags & CHAR))
1585 *ARG (int *) = (int) num.l;
1586 else
1587 *ARG (signed char *) = (signed char) num.ul;
1589 ++done;
1591 break;
1593 case L_('e'): /* Floating-point numbers. */
1594 case L_('E'):
1595 case L_('f'):
1596 case L_('F'):
1597 case L_('g'):
1598 case L_('G'):
1599 case L_('a'):
1600 case L_('A'):
1601 c = inchar ();
1602 if (c == EOF)
1603 input_error ();
1605 /* Check for a sign. */
1606 if (c == L_('-') || c == L_('+'))
1608 negative = c == L_('-');
1609 if (width == 0 || inchar () == EOF)
1610 /* EOF is only an input error before we read any chars. */
1611 conv_error ();
1612 if (! ISDIGIT (c) && TOLOWER (c) != L_('i')
1613 && TOLOWER (c) != L_('n'))
1615 #ifdef COMPILE_WSCANF
1616 if (c != decimal)
1618 /* This is no valid number. */
1619 ungetc (c, s);
1620 conv_error ();
1622 #else
1623 /* Match against the decimal point. At this point
1624 we are taking advantage of the fact that we can
1625 push more than one character back. This is
1626 (almost) never necessary since the decimal point
1627 string hopefully never contains more than one
1628 byte. */
1629 const char *cmpp = decimal;
1630 int avail = width > 0 ? width : INT_MAX;
1632 while ((unsigned char) *cmpp == c && avail > 0)
1633 if (*++cmpp == '\0')
1634 break;
1635 else
1637 if (inchar () == EOF)
1638 break;
1639 --avail;
1642 if (*cmpp != '\0')
1644 /* This is no valid number. */
1645 while (1)
1647 ungetc (c, s);
1648 if (cmpp == decimal)
1649 break;
1650 c = (unsigned char) *--cmpp;
1653 conv_error ();
1655 if (width > 0)
1656 width = avail;
1657 #endif
1659 if (width > 0)
1660 --width;
1662 else
1663 negative = 0;
1665 /* Take care for the special arguments "nan" and "inf". */
1666 if (TOLOWER (c) == L_('n'))
1668 /* Maybe "nan". */
1669 ADDW (c);
1670 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('a'))
1671 conv_error ();
1672 if (width > 0)
1673 --width;
1674 ADDW (c);
1675 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1676 conv_error ();
1677 if (width > 0)
1678 --width;
1679 ADDW (c);
1680 /* It is "nan". */
1681 goto scan_float;
1683 else if (TOLOWER (c) == L_('i'))
1685 /* Maybe "inf" or "infinity". */
1686 ADDW (c);
1687 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1688 conv_error ();
1689 if (width > 0)
1690 --width;
1691 ADDW (c);
1692 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('f'))
1693 conv_error ();
1694 if (width > 0)
1695 --width;
1696 ADDW (c);
1697 /* It is as least "inf". */
1698 if (width != 0 && inchar () != EOF)
1700 if (TOLOWER (c) == L_('i'))
1702 if (width > 0)
1703 --width;
1704 /* Now we have to read the rest as well. */
1705 ADDW (c);
1706 if (width == 0 || inchar () == EOF
1707 || TOLOWER (c) != L_('n'))
1708 conv_error ();
1709 if (width > 0)
1710 --width;
1711 ADDW (c);
1712 if (width == 0 || inchar () == EOF
1713 || TOLOWER (c) != L_('i'))
1714 conv_error ();
1715 if (width > 0)
1716 --width;
1717 ADDW (c);
1718 if (width == 0 || inchar () == EOF
1719 || TOLOWER (c) != L_('t'))
1720 conv_error ();
1721 if (width > 0)
1722 --width;
1723 ADDW (c);
1724 if (width == 0 || inchar () == EOF
1725 || TOLOWER (c) != L_('y'))
1726 conv_error ();
1727 if (width > 0)
1728 --width;
1729 ADDW (c);
1731 else
1732 /* Never mind. */
1733 ungetc (c, s);
1735 goto scan_float;
1738 is_hexa = 0;
1739 exp_char = L_('e');
1740 if (width != 0 && c == L_('0'))
1742 ADDW (c);
1743 c = inchar ();
1744 if (width > 0)
1745 --width;
1746 if (width != 0 && TOLOWER (c) == L_('x'))
1748 /* It is a number in hexadecimal format. */
1749 ADDW (c);
1751 is_hexa = 1;
1752 exp_char = L_('p');
1754 /* Grouping is not allowed. */
1755 flags &= ~GROUP;
1756 c = inchar ();
1757 if (width > 0)
1758 --width;
1762 got_dot = got_e = 0;
1765 if (ISDIGIT (c))
1766 ADDW (c);
1767 else if (!got_e && is_hexa && ISXDIGIT (c))
1768 ADDW (c);
1769 else if (got_e && wp[wpsize - 1] == exp_char
1770 && (c == L_('-') || c == L_('+')))
1771 ADDW (c);
1772 else if (wpsize > 0 && !got_e
1773 && (CHAR_T) TOLOWER (c) == exp_char)
1775 ADDW (exp_char);
1776 got_e = got_dot = 1;
1778 else
1780 #ifdef COMPILE_WSCANF
1781 if (! got_dot && c == decimal)
1783 ADDW (c);
1784 got_dot = 1;
1786 else if ((flags & GROUP) != 0 && thousands != L'\0'
1787 && ! got_dot && c == thousands)
1788 ADDW (c);
1789 else
1791 /* The last read character is not part of the number
1792 anymore. */
1793 ungetc (c, s);
1794 break;
1796 #else
1797 const char *cmpp = decimal;
1798 int avail = width > 0 ? width : INT_MAX;
1800 if (! got_dot)
1802 while ((unsigned char) *cmpp == c && avail > 0)
1803 if (*++cmpp == '\0')
1804 break;
1805 else
1807 if (inchar () == EOF)
1808 break;
1809 --avail;
1813 if (*cmpp == '\0')
1815 /* Add all the characters. */
1816 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1817 ADDW ((unsigned char) *cmpp);
1818 if (width > 0)
1819 width = avail;
1820 got_dot = 1;
1822 else
1824 /* Figure out whether it is a thousands separator.
1825 There is one problem: we possibly read more than
1826 one character. We cannot push them back but since
1827 we know that parts of the `decimal' string matched,
1828 we can compare against it. */
1829 const char *cmp2p = thousands;
1831 if ((flags & GROUP) != 0 && thousands != NULL
1832 && ! got_dot)
1834 while (cmp2p - thousands < cmpp - decimal
1835 && *cmp2p == decimal[cmp2p - thousands])
1836 ++cmp2p;
1837 if (cmp2p - thousands == cmpp - decimal)
1839 while ((unsigned char) *cmp2p == c && avail > 0)
1840 if (*++cmp2p == '\0')
1841 break;
1842 else
1844 if (inchar () == EOF)
1845 break;
1846 --avail;
1851 if (cmp2p != NULL && *cmp2p == '\0')
1853 /* Add all the characters. */
1854 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
1855 ADDW ((unsigned char) *cmpp);
1856 if (width > 0)
1857 width = avail;
1859 else
1861 /* The last read character is not part of the number
1862 anymore. */
1863 ungetc (c, s);
1864 break;
1867 #endif
1869 if (width > 0)
1870 --width;
1872 while (width != 0 && inchar () != EOF);
1874 /* Have we read any character? If we try to read a number
1875 in hexadecimal notation and we have read only the `0x'
1876 prefix or no exponent this is an error. */
1877 if (wpsize == 0 || (is_hexa && (wpsize == 2 || ! got_e)))
1878 conv_error ();
1880 scan_float:
1881 /* Convert the number. */
1882 ADDW (L_('\0'));
1883 if (flags & LONGDBL)
1885 long double d = __strtold_internal (wp, &tw, flags & GROUP);
1886 if (!(flags & SUPPRESS) && tw != wp)
1887 *ARG (long double *) = negative ? -d : d;
1889 else if (flags & LONG)
1891 double d = __strtod_internal (wp, &tw, flags & GROUP);
1892 if (!(flags & SUPPRESS) && tw != wp)
1893 *ARG (double *) = negative ? -d : d;
1895 else
1897 float d = __strtof_internal (wp, &tw, flags & GROUP);
1898 if (!(flags & SUPPRESS) && tw != wp)
1899 *ARG (float *) = negative ? -d : d;
1902 if (tw == wp)
1903 conv_error ();
1905 if (!(flags & SUPPRESS))
1906 ++done;
1907 break;
1909 case L_('['): /* Character class. */
1910 if (flags & LONG)
1911 STRING_ARG (wstr, wchar_t);
1912 else
1913 STRING_ARG (str, char);
1915 if (*f == L_('^'))
1917 ++f;
1918 not_in = 1;
1920 else
1921 not_in = 0;
1923 if (width < 0)
1924 /* There is no width given so there is also no limit on the
1925 number of characters we read. Therefore we set width to
1926 a very high value to make the algorithm easier. */
1927 width = INT_MAX;
1929 #ifdef COMPILE_WSCANF
1930 /* Find the beginning and the end of the scanlist. We are not
1931 creating a lookup table since it would have to be too large.
1932 Instead we search each time through the string. This is not
1933 a constant lookup time but who uses this feature deserves to
1934 be punished. */
1935 tw = (wchar_t *) f; /* Marks the beginning. */
1937 if (*f == L']')
1938 ++f;
1940 while ((fc = *f++) != L'\0' && fc != L']');
1942 if (fc == L'\0')
1943 conv_error ();
1944 wp = (wchar_t *) f - 1;
1945 #else
1946 /* Fill WP with byte flags indexed by character.
1947 We will use this flag map for matching input characters. */
1948 if (wpmax < UCHAR_MAX + 1)
1950 wpmax = UCHAR_MAX + 1;
1951 wp = (char *) alloca (wpmax);
1953 memset (wp, '\0', UCHAR_MAX + 1);
1955 fc = *f;
1956 if (fc == ']' || fc == '-')
1958 /* If ] or - appears before any char in the set, it is not
1959 the terminator or separator, but the first char in the
1960 set. */
1961 wp[fc] = 1;
1962 ++f;
1965 while ((fc = *f++) != '\0' && fc != ']')
1966 if (fc == '-' && *f != '\0' && *f != ']'
1967 && (unsigned char) f[-2] <= (unsigned char) *f)
1969 /* Add all characters from the one before the '-'
1970 up to (but not including) the next format char. */
1971 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
1972 wp[fc] = 1;
1974 else
1975 /* Add the character to the flag map. */
1976 wp[fc] = 1;
1978 if (fc == '\0')
1979 conv_error();
1980 #endif
1982 if (flags & LONG)
1984 size_t now = read_in;
1985 #ifdef COMPILE_WSCANF
1986 if (inchar () == WEOF)
1987 input_error ();
1991 wchar_t *runp;
1993 /* Test whether it's in the scanlist. */
1994 runp = tw;
1995 while (runp < wp)
1997 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
1998 && runp != tw
1999 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2001 /* Match against all characters in between the
2002 first and last character of the sequence. */
2003 wchar_t wc;
2005 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2006 if ((wint_t) wc == c)
2007 break;
2009 if (wc <= runp[1] && !not_in)
2010 break;
2011 if (wc <= runp[1] && not_in)
2013 /* The current character is not in the
2014 scanset. */
2015 ungetc (c, s);
2016 goto out;
2019 runp += 2;
2021 else
2023 if ((wint_t) *runp == c && !not_in)
2024 break;
2025 if ((wint_t) *runp == c && not_in)
2027 ungetc (c, s);
2028 goto out;
2031 ++runp;
2035 if (runp == wp && !not_in)
2037 ungetc (c, s);
2038 goto out;
2041 if (!(flags & SUPPRESS))
2043 *wstr++ = c;
2045 if ((flags & MALLOC)
2046 && wstr == (wchar_t *) *strptr + strsize)
2048 /* Enlarge the buffer. */
2049 wstr = (wchar_t *) realloc (*strptr,
2050 (2 * strsize)
2051 * sizeof (wchar_t));
2052 if (wstr == NULL)
2054 /* Can't allocate that much. Last-ditch
2055 effort. */
2056 wstr = (wchar_t *)
2057 realloc (*strptr, (strsize + 1)
2058 * sizeof (wchar_t));
2059 if (wstr == NULL)
2061 /* We lose. Oh well. Terminate the string
2062 and stop converting, so at least we don't
2063 skip any input. */
2064 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2065 ++done;
2066 conv_error ();
2068 else
2070 *strptr = (char *) wstr;
2071 wstr += strsize;
2072 ++strsize;
2075 else
2077 *strptr = (char *) wstr;
2078 wstr += strsize;
2079 strsize *= 2;
2084 while (--width > 0 && inchar () != WEOF);
2085 out:
2086 #else
2087 char buf[MB_LEN_MAX];
2088 size_t cnt = 0;
2089 mbstate_t cstate;
2091 if (inchar () == EOF)
2092 input_error ();
2094 memset (&cstate, '\0', sizeof (cstate));
2098 if (wp[c] == not_in)
2100 ungetc_not_eof (c, s);
2101 break;
2104 /* This is easy. */
2105 if (!(flags & SUPPRESS))
2107 size_t n;
2109 /* Convert it into a wide character. */
2110 buf[0] = c;
2111 n = __mbrtowc (wstr, buf, 1, &cstate);
2113 if (n == (size_t) -2)
2115 /* Possibly correct character, just not enough
2116 input. */
2117 ++cnt;
2118 assert (cnt < MB_CUR_MAX);
2119 continue;
2121 cnt = 0;
2123 ++wstr;
2124 if ((flags & MALLOC)
2125 && wstr == (wchar_t *) *strptr + strsize)
2127 /* Enlarge the buffer. */
2128 wstr = (wchar_t *) realloc (*strptr,
2129 (2 * strsize
2130 * sizeof (wchar_t)));
2131 if (wstr == NULL)
2133 /* Can't allocate that much. Last-ditch
2134 effort. */
2135 wstr = (wchar_t *)
2136 realloc (*strptr, ((strsize + 1)
2137 * sizeof (wchar_t)));
2138 if (wstr == NULL)
2140 /* We lose. Oh well. Terminate the
2141 string and stop converting,
2142 so at least we don't skip any input. */
2143 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2144 ++done;
2145 conv_error ();
2147 else
2149 *strptr = (char *) wstr;
2150 wstr += strsize;
2151 ++strsize;
2154 else
2156 *strptr = (char *) wstr;
2157 wstr += strsize;
2158 strsize *= 2;
2163 if (--width <= 0)
2164 break;
2166 while (inchar () != EOF);
2168 if (cnt != 0)
2169 /* We stopped in the middle of recognizing another
2170 character. That's a problem. */
2171 encode_error ();
2172 #endif
2174 if (now == read_in)
2175 /* We haven't succesfully read any character. */
2176 conv_error ();
2178 if (!(flags & SUPPRESS))
2180 *wstr++ = L'\0';
2182 if ((flags & MALLOC)
2183 && wstr - (wchar_t *) *strptr != strsize)
2185 wchar_t *cp = (wchar_t *)
2186 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2187 * sizeof(wchar_t)));
2188 if (cp != NULL)
2189 *strptr = (char *) cp;
2192 ++done;
2195 else
2197 size_t now = read_in;
2199 if (inchar () == EOF)
2200 input_error ();
2202 #ifdef COMPILE_WSCANF
2204 memset (&state, '\0', sizeof (state));
2208 wchar_t *runp;
2209 size_t n;
2211 /* Test whether it's in the scanlist. */
2212 runp = tw;
2213 while (runp < wp)
2215 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2216 && runp != tw
2217 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2219 /* Match against all characters in between the
2220 first and last character of the sequence. */
2221 wchar_t wc;
2223 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2224 if ((wint_t) wc == c)
2225 break;
2227 if (wc <= runp[1] && !not_in)
2228 break;
2229 if (wc <= runp[1] && not_in)
2231 /* The current character is not in the
2232 scanset. */
2233 ungetc (c, s);
2234 goto out2;
2237 runp += 2;
2239 else
2241 if ((wint_t) *runp == c && !not_in)
2242 break;
2243 if ((wint_t) *runp == c && not_in)
2245 ungetc (c, s);
2246 goto out2;
2249 ++runp;
2253 if (runp == wp && !not_in)
2255 ungetc (c, s);
2256 goto out2;
2259 if (!(flags & SUPPRESS))
2261 if ((flags & MALLOC)
2262 && str + MB_CUR_MAX >= *strptr + strsize)
2264 /* Enlarge the buffer. */
2265 size_t strleng = str - *strptr;
2266 char *newstr;
2268 newstr = (char *) realloc (*strptr, 2 * strsize);
2269 if (newstr == NULL)
2271 /* Can't allocate that much. Last-ditch
2272 effort. */
2273 newstr = (char *) realloc (*strptr,
2274 strleng + MB_CUR_MAX);
2275 if (newstr == NULL)
2277 /* We lose. Oh well. Terminate the string
2278 and stop converting, so at least we don't
2279 skip any input. */
2280 ((char *) (*strptr))[strleng] = '\0';
2281 ++done;
2282 conv_error ();
2284 else
2286 *strptr = newstr;
2287 str = newstr + strleng;
2288 strsize = strleng + MB_CUR_MAX;
2291 else
2293 *strptr = newstr;
2294 str = newstr + strleng;
2295 strsize *= 2;
2300 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2301 if (n == (size_t) -1)
2302 encode_error ();
2304 assert (n <= MB_CUR_MAX);
2305 str += n;
2307 while (--width > 0 && inchar () != WEOF);
2308 out2:
2309 #else
2312 if (wp[c] == not_in)
2314 ungetc_not_eof (c, s);
2315 break;
2318 /* This is easy. */
2319 if (!(flags & SUPPRESS))
2321 *str++ = c;
2322 if ((flags & MALLOC)
2323 && (char *) str == *strptr + strsize)
2325 /* Enlarge the buffer. */
2326 size_t newsize = 2 * strsize;
2328 allocagain:
2329 str = (char *) realloc (*strptr, newsize);
2330 if (str == NULL)
2332 /* Can't allocate that much. Last-ditch
2333 effort. */
2334 if (newsize > strsize + 1)
2336 newsize = strsize + 1;
2337 goto allocagain;
2339 /* We lose. Oh well. Terminate the
2340 string and stop converting,
2341 so at least we don't skip any input. */
2342 ((char *) (*strptr))[strsize - 1] = '\0';
2343 ++done;
2344 conv_error ();
2346 else
2348 *strptr = (char *) str;
2349 str += strsize;
2350 strsize = newsize;
2355 while (--width > 0 && inchar () != EOF);
2356 #endif
2358 if (now == read_in)
2359 /* We haven't succesfully read any character. */
2360 conv_error ();
2362 if (!(flags & SUPPRESS))
2364 #ifdef COMPILE_WSCANF
2365 /* We have to emit the code to get into the initial
2366 state. */
2367 char buf[MB_LEN_MAX];
2368 size_t n = __wcrtomb (buf, L'\0', &state);
2369 if (n > 0 && (flags & MALLOC)
2370 && str + n >= *strptr + strsize)
2372 /* Enlarge the buffer. */
2373 size_t strleng = str - *strptr;
2374 char *newstr;
2376 newstr = (char *) realloc (*strptr, strleng + n + 1);
2377 if (newstr == NULL)
2379 /* We lose. Oh well. Terminate the string
2380 and stop converting, so at least we don't
2381 skip any input. */
2382 ((char *) (*strptr))[strleng] = '\0';
2383 ++done;
2384 conv_error ();
2386 else
2388 *strptr = newstr;
2389 str = newstr + strleng;
2390 strsize = strleng + n + 1;
2394 str = __mempcpy (str, buf, n);
2395 #endif
2396 *str++ = '\0';
2398 if ((flags & MALLOC) && str - *strptr != strsize)
2400 char *cp = (char *) realloc (*strptr, str - *strptr);
2401 if (cp != NULL)
2402 *strptr = cp;
2405 ++done;
2408 break;
2410 case L_('p'): /* Generic pointer. */
2411 base = 16;
2412 /* A PTR must be the same size as a `long int'. */
2413 flags &= ~(SHORT|LONGDBL);
2414 if (need_long)
2415 flags |= LONG;
2416 number_signed = 0;
2417 read_pointer = 1;
2418 goto number;
2420 default:
2421 /* If this is an unknown format character punt. */
2422 conv_error ();
2426 /* The last thing we saw int the format string was a white space.
2427 Consume the last white spaces. */
2428 if (skip_space)
2431 c = inchar ();
2432 while (ISSPACE (c));
2433 ungetc (c, s);
2436 errout:
2437 /* Unlock stream. */
2438 UNLOCK_STREAM (s);
2440 if (errp != NULL)
2441 *errp |= errval;
2443 return done;
2446 #ifdef COMPILE_WSCANF
2448 __vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
2450 return _IO_vfwscanf (s, format, argptr, NULL);
2452 #else
2454 __vfscanf (FILE *s, const char *format, va_list argptr)
2456 return INTUSE(_IO_vfscanf) (s, format, argptr, NULL);
2458 libc_hidden_def (__vfscanf)
2459 #endif
2461 #ifdef COMPILE_WSCANF
2462 weak_alias (__vfwscanf, vfwscanf)
2463 #else
2464 weak_alias (__vfscanf, vfscanf)
2465 INTDEF(_IO_vfscanf)
2466 #endif