Update.
[glibc.git] / stdio-common / vfscanf.c
blob97163de60da0d324b5a16aa1de668127077d3dfe
1 /* Copyright (C) 1991-1999, 2000, 2001, 2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <wctype.h>
30 #include <bits/libc-lock.h>
31 #include <locale/localeinfo.h>
33 #ifdef __GNUC__
34 # define HAVE_LONGLONG
35 # define LONGLONG long long
36 #else
37 # define LONGLONG long
38 #endif
40 /* Determine whether we have to handle `long long' at all. */
41 #if LONG_MAX == LONG_LONG_MAX
42 # define need_longlong 0
43 #else
44 # define need_longlong 1
45 #endif
47 /* Determine whether we have to handle `long'. */
48 #if INT_MAX == LONG_MAX
49 # define need_long 0
50 #else
51 # define need_long 1
52 #endif
54 /* Those are flags in the conversion format. */
55 #define LONG 0x001 /* l: long or double */
56 #define LONGDBL 0x002 /* L: long long or long double */
57 #define SHORT 0x004 /* h: short */
58 #define SUPPRESS 0x008 /* *: suppress assignment */
59 #define POINTER 0x010 /* weird %p pointer (`fake hex') */
60 #define NOSKIP 0x020 /* do not skip blanks */
61 #define WIDTH 0x040 /* width was given */
62 #define GROUP 0x080 /* ': group numbers */
63 #define MALLOC 0x100 /* a: malloc strings */
64 #define CHAR 0x200 /* hh: char */
65 #define I18N 0x400 /* I: use locale's digits */
68 #ifdef USE_IN_LIBIO
69 # include <libioP.h>
70 # include <libio.h>
72 # undef va_list
73 # define va_list _IO_va_list
75 # ifdef COMPILE_WSCANF
76 # define ungetc(c, s) ((void) (c == WEOF \
77 || (--read_in, \
78 INTUSE(_IO_sputbackwc) (s, c))))
79 # define ungetc_not_eof(c, s) ((void) (--read_in, \
80 INTUSE(_IO_sputbackwc) (s, c)))
81 # define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
82 : ((c = _IO_getwc_unlocked (s)), \
83 (void) (c != WEOF \
84 ? ++read_in \
85 : (size_t) (inchar_errno = errno)), c))
87 # define MEMCPY(d, s, n) __wmemcpy (d, s, n)
88 # define ISSPACE(Ch) iswspace (Ch)
89 # define ISDIGIT(Ch) iswdigit (Ch)
90 # define ISXDIGIT(Ch) iswxdigit (Ch)
91 # define TOLOWER(Ch) towlower (Ch)
92 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
93 # define __strtoll_internal __wcstoll_internal
94 # define __strtoull_internal __wcstoull_internal
95 # define __strtol_internal __wcstol_internal
96 # define __strtoul_internal __wcstoul_internal
97 # define __strtold_internal __wcstold_internal
98 # define __strtod_internal __wcstod_internal
99 # define __strtof_internal __wcstof_internal
101 # define L_(Str) L##Str
102 # define CHAR_T wchar_t
103 # define UCHAR_T unsigned int
104 # define WINT_T wint_t
105 # undef EOF
106 # define EOF WEOF
107 # else
108 # define ungetc(c, s) ((void) ((int) c == EOF \
109 || (--read_in, \
110 INTUSE(_IO_sputbackc) (s, (unsigned char) c))))
111 # define ungetc_not_eof(c, s) ((void) (--read_in, \
112 INTUSE(_IO_sputbackc) (s, (unsigned char) c)))
113 # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
114 : ((c = _IO_getc_unlocked (s)), \
115 (void) (c != EOF \
116 ? ++read_in \
117 : (size_t) (inchar_errno = errno)), c))
118 # define MEMCPY(d, s, n) memcpy (d, s, n)
119 # define ISSPACE(Ch) isspace (Ch)
120 # define ISDIGIT(Ch) isdigit (Ch)
121 # define ISXDIGIT(Ch) isxdigit (Ch)
122 # define TOLOWER(Ch) tolower (Ch)
123 # define ORIENT if (s->_vtable_offset == 0 \
124 && _IO_fwide (s, -1) != -1) \
125 return EOF
127 # define L_(Str) Str
128 # define CHAR_T char
129 # define UCHAR_T unsigned char
130 # define WINT_T int
131 # endif
133 # define encode_error() do { \
134 if (errp != NULL) *errp |= 4; \
135 _IO_funlockfile (s); \
136 __libc_cleanup_end (0); \
137 __set_errno (EILSEQ); \
138 return done; \
139 } while (0)
140 # define conv_error() do { \
141 if (errp != NULL) *errp |= 2; \
142 _IO_funlockfile (s); \
143 __libc_cleanup_end (0); \
144 return done; \
145 } while (0)
146 # define input_error() do { \
147 _IO_funlockfile (s); \
148 if (errp != NULL) *errp |= 1; \
149 __libc_cleanup_end (0); \
150 return done ?: EOF; \
151 } while (0)
152 # define memory_error() do { \
153 _IO_funlockfile (s); \
154 __set_errno (ENOMEM); \
155 __libc_cleanup_end (0); \
156 return EOF; \
157 } while (0)
158 # define ARGCHECK(s, format) \
159 do \
161 /* Check file argument for consistence. */ \
162 CHECK_FILE (s, EOF); \
163 if (s->_flags & _IO_NO_READS) \
165 __set_errno (EBADF); \
166 return EOF; \
168 else if (format == NULL) \
170 MAYBE_SET_EINVAL; \
171 return EOF; \
173 } while (0)
174 # define LOCK_STREAM(S) \
175 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
176 _IO_flockfile (S)
177 # define UNLOCK_STREAM(S) \
178 _IO_funlockfile (S); \
179 __libc_cleanup_region_end (0)
180 #else
181 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
182 # define ungetc_not_eof(c, s) (--read_in, (ungetc) (c, s))
183 # define inchar() (c == EOF ? EOF \
184 : ((c = getc (s)), (void) (c != EOF && ++read_in), c))
185 # define MEMCPY(d, s, n) memcpy (d, s, n)
186 # define ISSPACE(Ch) isspace (Ch)
187 # define ISDIGIT(Ch) isdigit (Ch)
188 # define ISXDIGIT(Ch) isxdigit (Ch)
189 # define TOLOWER(Ch) tolower (Ch)
191 # define L_(Str) Str
192 # define CHAR_T char
193 # define UCHAR_T unsigned char
194 # define WINT_T int
196 # define encode_error() do { \
197 funlockfile (s); \
198 __set_errno (EILSEQ); \
199 return done; \
200 } while (0)
201 # define conv_error() do { \
202 funlockfile (s); \
203 return done; \
204 } while (0)
205 # define input_error() do { \
206 funlockfile (s); \
207 return done ?: EOF; \
208 } while (0)
209 # define memory_error() do { \
210 funlockfile (s); \
211 __set_errno (ENOMEM); \
212 return EOF; \
213 } while (0)
214 # define ARGCHECK(s, format) \
215 do \
217 /* Check file argument for consistence. */ \
218 if (!__validfp (s) || !s->__mode.__read) \
220 __set_errno (EBADF); \
221 return EOF; \
223 else if (format == NULL) \
225 __set_errno (EINVAL); \
226 return EOF; \
228 } while (0)
229 #if 1
230 /* XXX For now !!! */
231 # define flockfile(S) /* nothing */
232 # define funlockfile(S) /* nothing */
233 # define LOCK_STREAM(S)
234 # define UNLOCK_STREAM(S)
235 #else
236 # define LOCK_STREAM(S) \
237 __libc_cleanup_region_start (&__funlockfile, (S)); \
238 __flockfile (S)
239 # define UNLOCK_STREAM(S) \
240 __funlockfile (S); \
241 __libc_cleanup_region_end (0)
242 #endif
243 #endif
246 /* Read formatted input from S according to the format string
247 FORMAT, using the argument list in ARG.
248 Return the number of assignments made, or -1 for an input error. */
249 #ifdef USE_IN_LIBIO
250 # ifdef COMPILE_WSCANF
252 _IO_vfwscanf (s, format, argptr, errp)
253 _IO_FILE *s;
254 const wchar_t *format;
255 _IO_va_list argptr;
256 int *errp;
257 # else
259 _IO_vfscanf (s, format, argptr, errp)
260 _IO_FILE *s;
261 const char *format;
262 _IO_va_list argptr;
263 int *errp;
264 # endif
265 #else
267 __vfscanf (FILE *s, const char *format, va_list argptr)
268 #endif
270 va_list arg;
271 register const CHAR_T *f = format;
272 register UCHAR_T fc; /* Current character of the format. */
273 register WINT_T done = 0; /* Assignments done. */
274 register size_t read_in = 0; /* Chars read in. */
275 register WINT_T c = 0; /* Last char read. */
276 register int width; /* Maximum field width. */
277 register int flags; /* Modifiers for current format element. */
279 /* Errno of last failed inchar call. */
280 int inchar_errno = 0;
281 /* Status for reading F-P nums. */
282 char got_dot, got_e, negative;
283 /* If a [...] is a [^...]. */
284 CHAR_T not_in;
285 #define exp_char not_in
286 /* Base for integral numbers. */
287 int base;
288 /* Signedness for integral numbers. */
289 int number_signed;
290 #define is_hexa number_signed
291 /* Decimal point character. */
292 #ifdef COMPILE_WSCANF
293 wchar_t decimal;
294 #else
295 const char *decimal;
296 #endif
297 /* The thousands character of the current locale. */
298 #ifdef COMPILE_WSCANF
299 wchar_t thousands;
300 #else
301 const char *thousands;
302 #endif
303 /* State for the conversions. */
304 mbstate_t state;
305 /* Integral holding variables. */
306 union
308 long long int q;
309 unsigned long long int uq;
310 long int l;
311 unsigned long int ul;
312 } num;
313 /* Character-buffer pointer. */
314 char *str = NULL;
315 wchar_t *wstr = NULL;
316 char **strptr = NULL;
317 ssize_t strsize = 0;
318 /* We must not react on white spaces immediately because they can
319 possibly be matched even if in the input stream no character is
320 available anymore. */
321 int skip_space = 0;
322 /* Nonzero if we are reading a pointer. */
323 int read_pointer;
324 /* Workspace. */
325 CHAR_T *tw; /* Temporary pointer. */
326 CHAR_T *wp = NULL; /* Workspace. */
327 size_t wpmax = 0; /* Maximal size of workspace. */
328 size_t wpsize; /* Currently used bytes in workspace. */
329 #define ADDW(Ch) \
330 do \
332 if (wpsize == wpmax) \
334 CHAR_T *old = wp; \
335 wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax); \
336 wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t)); \
337 if (old != NULL) \
338 MEMCPY (wp, old, wpsize); \
340 wp[wpsize++] = (Ch); \
342 while (0)
344 #ifdef __va_copy
345 __va_copy (arg, argptr);
346 #else
347 arg = (va_list) argptr;
348 #endif
350 #ifdef ORIENT
351 ORIENT;
352 #endif
354 ARGCHECK (s, format);
356 /* Figure out the decimal point character. */
357 #ifdef COMPILE_WSCANF
358 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
359 #else
360 decimal = _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT);
361 #endif
362 /* Figure out the thousands separator character. */
363 #ifdef COMPILE_WSCANF
364 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
365 #else
366 thousands = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
367 if (*thousands == '\0')
368 thousands = NULL;
369 #endif
371 /* Lock the stream. */
372 LOCK_STREAM (s);
375 #ifndef COMPILE_WSCANF
376 /* From now on we use `state' to convert the format string. */
377 memset (&state, '\0', sizeof (state));
378 #endif
380 /* Run through the format string. */
381 while (*f != '\0')
383 unsigned int argpos;
384 /* Extract the next argument, which is of type TYPE.
385 For a %N$... spec, this is the Nth argument from the beginning;
386 otherwise it is the next argument after the state now in ARG. */
387 #ifdef __va_copy
388 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
389 ({ unsigned int pos = argpos; \
390 va_list arg; \
391 __va_copy (arg, argptr); \
392 while (--pos > 0) \
393 (void) va_arg (arg, void *); \
394 va_arg (arg, type); \
396 #else
397 # if 0
398 /* XXX Possible optimization. */
399 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
400 ({ va_list arg = (va_list) argptr; \
401 arg = (va_list) ((char *) arg \
402 + (argpos - 1) \
403 * __va_rounded_size (void *)); \
404 va_arg (arg, type); \
406 # else
407 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
408 ({ unsigned int pos = argpos; \
409 va_list arg = (va_list) argptr; \
410 while (--pos > 0) \
411 (void) va_arg (arg, void *); \
412 va_arg (arg, type); \
414 # endif
415 #endif
417 #ifndef COMPILE_WSCANF
418 if (!isascii ((unsigned char) *f))
420 /* Non-ASCII, may be a multibyte. */
421 int len = __mbrlen (f, strlen (f), &state);
422 if (len > 0)
426 c = inchar ();
427 if (c == EOF)
428 input_error ();
429 else if (c != (unsigned char) *f++)
431 ungetc_not_eof (c, s);
432 conv_error ();
435 while (--len > 0);
436 continue;
439 #endif
441 fc = *f++;
442 if (fc != '%')
444 /* Remember to skip spaces. */
445 if (ISSPACE (fc))
447 skip_space = 1;
448 continue;
451 /* Read a character. */
452 c = inchar ();
454 /* Characters other than format specs must just match. */
455 if (c == EOF)
456 input_error ();
458 /* We saw white space char as the last character in the format
459 string. Now it's time to skip all leading white space. */
460 if (skip_space)
462 while (ISSPACE (c))
463 if (inchar () == EOF && errno == EINTR)
464 conv_error ();
465 skip_space = 0;
468 if (c != fc)
470 ungetc (c, s);
471 conv_error ();
474 continue;
477 /* This is the start of the conversion string. */
478 flags = 0;
480 /* Not yet decided whether we read a pointer or not. */
481 read_pointer = 0;
483 /* Initialize state of modifiers. */
484 argpos = 0;
486 /* Prepare temporary buffer. */
487 wpsize = 0;
489 /* Check for a positional parameter specification. */
490 if (ISDIGIT ((UCHAR_T) *f))
492 argpos = (UCHAR_T) *f++ - L_('0');
493 while (ISDIGIT ((UCHAR_T) *f))
494 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
495 if (*f == L_('$'))
496 ++f;
497 else
499 /* Oops; that was actually the field width. */
500 width = argpos;
501 flags |= WIDTH;
502 argpos = 0;
503 goto got_width;
507 /* Check for the assignment-suppressing, the number grouping flag,
508 and the signal to use the locale's digit representation. */
509 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
510 switch (*f++)
512 case L_('*'):
513 flags |= SUPPRESS;
514 break;
515 case L_('\''):
516 flags |= GROUP;
517 break;
518 case L_('I'):
519 flags |= I18N;
520 break;
523 /* We have seen width. */
524 if (ISDIGIT ((UCHAR_T) *f))
525 flags |= WIDTH;
527 /* Find the maximum field width. */
528 width = 0;
529 while (ISDIGIT ((UCHAR_T) *f))
531 width *= 10;
532 width += (UCHAR_T) *f++ - L_('0');
534 got_width:
535 if (width == 0)
536 width = -1;
538 /* Check for type modifiers. */
539 switch (*f++)
541 case L_('h'):
542 /* ints are short ints or chars. */
543 if (*f == L_('h'))
545 ++f;
546 flags |= CHAR;
548 else
549 flags |= SHORT;
550 break;
551 case L_('l'):
552 if (*f == L_('l'))
554 /* A double `l' is equivalent to an `L'. */
555 ++f;
556 flags |= LONGDBL | LONG;
558 else
559 /* ints are long ints. */
560 flags |= LONG;
561 break;
562 case L_('q'):
563 case L_('L'):
564 /* doubles are long doubles, and ints are long long ints. */
565 flags |= LONGDBL | LONG;
566 break;
567 case L_('a'):
568 /* The `a' is used as a flag only if followed by `s', `S' or
569 `['. */
570 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
572 --f;
573 break;
575 /* String conversions (%s, %[) take a `char **'
576 arg and fill it in with a malloc'd pointer. */
577 flags |= MALLOC;
578 break;
579 case L_('z'):
580 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
581 flags |= LONGDBL;
582 else if (sizeof (size_t) > sizeof (unsigned int))
583 flags |= LONG;
584 break;
585 case L_('j'):
586 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
587 flags |= LONGDBL;
588 else if (sizeof (uintmax_t) > sizeof (unsigned int))
589 flags |= LONG;
590 break;
591 case L_('t'):
592 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
593 flags |= LONGDBL;
594 else if (sizeof (ptrdiff_t) > sizeof (int))
595 flags |= LONG;
596 break;
597 default:
598 /* Not a recognized modifier. Backup. */
599 --f;
600 break;
603 /* End of the format string? */
604 if (*f == L_('\0'))
605 conv_error ();
607 /* Find the conversion specifier. */
608 fc = *f++;
609 if (skip_space || (fc != L_('[') && fc != L_('c')
610 && fc != L_('C') && fc != L_('n')))
612 /* Eat whitespace. */
613 int save_errno = errno;
614 errno = 0;
616 if (inchar () == EOF && errno == EINTR)
617 input_error ();
618 while (ISSPACE (c));
619 errno = save_errno;
620 ungetc (c, s);
621 skip_space = 0;
624 switch (fc)
626 case L_('%'): /* Must match a literal '%'. */
627 c = inchar ();
628 if (c == EOF)
629 input_error ();
630 if (c != fc)
632 ungetc_not_eof (c, s);
633 conv_error ();
635 break;
637 case L_('n'): /* Answer number of assignments done. */
638 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
639 with the 'n' conversion specifier. */
640 if (!(flags & SUPPRESS))
642 /* Don't count the read-ahead. */
643 if (need_longlong && (flags & LONGDBL))
644 *ARG (long long int *) = read_in;
645 else if (need_long && (flags & LONG))
646 *ARG (long int *) = read_in;
647 else if (flags & SHORT)
648 *ARG (short int *) = read_in;
649 else if (!(flags & CHAR))
650 *ARG (int *) = read_in;
651 else
652 *ARG (char *) = read_in;
654 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
655 /* We have a severe problem here. The ISO C standard
656 contradicts itself in explaining the effect of the %n
657 format in `scanf'. While in ISO C:1990 and the ISO C
658 Amendement 1:1995 the result is described as
660 Execution of a %n directive does not effect the
661 assignment count returned at the completion of
662 execution of the f(w)scanf function.
664 in ISO C Corrigendum 1:1994 the following was added:
666 Subclause 7.9.6.2
667 Add the following fourth example:
669 #include <stdio.h>
670 int d1, d2, n1, n2, i;
671 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
672 the value 123 is assigned to d1 and the value3 to n1.
673 Because %n can never get an input failure the value
674 of 3 is also assigned to n2. The value of d2 is not
675 affected. The value 3 is assigned to i.
677 We go for now with the historically correct code from ISO C,
678 i.e., we don't count the %n assignments. When it ever
679 should proof to be wrong just remove the #ifdef above. */
680 ++done;
681 #endif
683 break;
685 case L_('c'): /* Match characters. */
686 if ((flags & LONG) == 0)
688 if (!(flags & SUPPRESS))
690 str = ARG (char *);
691 if (str == NULL)
692 conv_error ();
695 c = inchar ();
696 if (c == EOF)
697 input_error ();
699 if (width == -1)
700 width = 1;
702 #ifdef COMPILE_WSCANF
703 /* We have to convert the wide character(s) into multibyte
704 characters and store the result. */
705 memset (&state, '\0', sizeof (state));
709 size_t n;
711 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
712 if (n == (size_t) -1)
713 /* No valid wide character. */
714 input_error ();
716 /* Increment the output pointer. Even if we don't
717 write anything. */
718 str += n;
720 while (--width > 0 && inchar () != EOF);
721 #else
722 if (!(flags & SUPPRESS))
725 *str++ = c;
726 while (--width > 0 && inchar () != EOF);
728 else
729 while (--width > 0 && inchar () != EOF);
730 #endif
732 if (!(flags & SUPPRESS))
733 ++done;
735 break;
737 /* FALLTHROUGH */
738 case L_('C'):
739 if (!(flags & SUPPRESS))
741 wstr = ARG (wchar_t *);
742 if (wstr == NULL)
743 conv_error ();
746 c = inchar ();
747 if (c == EOF)
748 input_error ();
750 #ifdef COMPILE_WSCANF
751 /* Just store the incoming wide characters. */
752 if (!(flags & SUPPRESS))
755 *wstr++ = c;
756 while (--width > 0 && inchar () != EOF);
758 else
759 while (--width > 0 && inchar () != EOF);
760 #else
762 /* We have to convert the multibyte input sequence to wide
763 characters. */
764 char buf[1];
765 mbstate_t cstate;
767 memset (&cstate, '\0', sizeof (cstate));
771 /* This is what we present the mbrtowc function first. */
772 buf[0] = c;
774 while (1)
776 size_t n;
778 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
779 buf, 1, &cstate);
781 if (n == (size_t) -2)
783 /* Possibly correct character, just not enough
784 input. */
785 if (inchar () == EOF)
786 encode_error ();
788 buf[0] = c;
789 continue;
792 if (n != 1)
793 encode_error ();
795 /* We have a match. */
796 break;
799 /* Advance the result pointer. */
800 ++wstr;
802 while (--width > 0 && inchar () != EOF);
804 #endif
806 if (!(flags & SUPPRESS))
807 ++done;
809 break;
811 case L_('s'): /* Read a string. */
812 if (!(flags & LONG))
814 #define STRING_ARG(Str, Type) \
815 do if (!(flags & SUPPRESS)) \
817 if (flags & MALLOC) \
819 /* The string is to be stored in a malloc'd buffer. */ \
820 strptr = ARG (char **); \
821 if (strptr == NULL) \
822 conv_error (); \
823 /* Allocate an initial buffer. */ \
824 strsize = 100; \
825 *strptr = (char *) malloc (strsize * sizeof (Type)); \
826 Str = (Type *) *strptr; \
828 else \
829 Str = ARG (Type *); \
830 if (Str == NULL) \
831 conv_error (); \
832 } while (0)
833 STRING_ARG (str, char);
835 c = inchar ();
836 if (c == EOF)
837 input_error ();
839 #ifdef COMPILE_WSCANF
840 memset (&state, '\0', sizeof (state));
841 #endif
845 if (ISSPACE (c))
847 ungetc_not_eof (c, s);
848 break;
851 #ifdef COMPILE_WSCANF
852 /* This is quite complicated. We have to convert the
853 wide characters into multibyte characters and then
854 store them. */
856 size_t n;
858 if (!(flags & SUPPRESS) && (flags & MALLOC)
859 && str + MB_CUR_MAX >= *strptr + strsize)
861 /* We have to enlarge the buffer if the `a' flag
862 was given. */
863 size_t strleng = str - *strptr;
864 char *newstr;
866 newstr = (char *) realloc (*strptr, strsize * 2);
867 if (newstr == NULL)
869 /* Can't allocate that much. Last-ditch
870 effort. */
871 newstr = (char *) realloc (*strptr,
872 strleng + MB_CUR_MAX);
873 if (newstr == NULL)
875 /* We lose. Oh well. Terminate the
876 string and stop converting,
877 so at least we don't skip any input. */
878 ((char *) (*strptr))[strleng] = '\0';
879 ++done;
880 conv_error ();
882 else
884 *strptr = newstr;
885 str = newstr + strleng;
886 strsize = strleng + MB_CUR_MAX;
889 else
891 *strptr = newstr;
892 str = newstr + strleng;
893 strsize *= 2;
897 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
898 &state);
899 if (n == (size_t) -1)
900 encode_error ();
902 assert (n <= MB_CUR_MAX);
903 str += n;
905 #else
906 /* This is easy. */
907 if (!(flags & SUPPRESS))
909 *str++ = c;
910 if ((flags & MALLOC)
911 && (char *) str == *strptr + strsize)
913 /* Enlarge the buffer. */
914 str = (char *) realloc (*strptr, 2 * strsize);
915 if (str == NULL)
917 /* Can't allocate that much. Last-ditch
918 effort. */
919 str = (char *) realloc (*strptr, strsize + 1);
920 if (str == NULL)
922 /* We lose. Oh well. Terminate the
923 string and stop converting,
924 so at least we don't skip any input. */
925 ((char *) (*strptr))[strsize - 1] = '\0';
926 ++done;
927 conv_error ();
929 else
931 *strptr = (char *) str;
932 str += strsize;
933 ++strsize;
936 else
938 *strptr = (char *) str;
939 str += strsize;
940 strsize *= 2;
944 #endif
946 while ((width <= 0 || --width > 0) && inchar () != EOF);
948 if (!(flags & SUPPRESS))
950 #ifdef COMPILE_WSCANF
951 /* We have to emit the code to get into the initial
952 state. */
953 char buf[MB_LEN_MAX];
954 size_t n = __wcrtomb (buf, L'\0', &state);
955 if (n > 0 && (flags & MALLOC)
956 && str + n >= *strptr + strsize)
958 /* Enlarge the buffer. */
959 size_t strleng = str - *strptr;
960 char *newstr;
962 newstr = (char *) realloc (*strptr, strleng + n + 1);
963 if (newstr == NULL)
965 /* We lose. Oh well. Terminate the string
966 and stop converting, so at least we don't
967 skip any input. */
968 ((char *) (*strptr))[strleng] = '\0';
969 ++done;
970 conv_error ();
972 else
974 *strptr = newstr;
975 str = newstr + strleng;
976 strsize = strleng + n + 1;
980 str = __mempcpy (str, buf, n);
981 #endif
982 *str++ = '\0';
984 if ((flags & MALLOC) && str - *strptr != strsize)
986 char *cp = (char *) realloc (*strptr, str - *strptr);
987 if (cp != NULL)
988 *strptr = cp;
991 ++done;
993 break;
995 /* FALLTHROUGH */
997 case L_('S'):
999 #ifndef COMPILE_WSCANF
1000 mbstate_t cstate;
1001 #endif
1003 /* Wide character string. */
1004 STRING_ARG (wstr, wchar_t);
1006 c = inchar ();
1007 if (c == EOF)
1008 input_error ();
1010 #ifndef COMPILE_WSCANF
1011 memset (&cstate, '\0', sizeof (cstate));
1012 #endif
1016 if (ISSPACE (c))
1018 ungetc_not_eof (c, s);
1019 break;
1022 #ifdef COMPILE_WSCANF
1023 /* This is easy. */
1024 if (!(flags & SUPPRESS))
1026 *wstr++ = c;
1027 if ((flags & MALLOC)
1028 && wstr == (wchar_t *) *strptr + strsize)
1030 /* Enlarge the buffer. */
1031 wstr = (wchar_t *) realloc (*strptr,
1032 (2 * strsize)
1033 * sizeof (wchar_t));
1034 if (wstr == NULL)
1036 /* Can't allocate that much. Last-ditch
1037 effort. */
1038 wstr = (wchar_t *) realloc (*strptr,
1039 (strsize + 1)
1040 * sizeof (wchar_t));
1041 if (wstr == NULL)
1043 /* We lose. Oh well. Terminate the string
1044 and stop converting, so at least we don't
1045 skip any input. */
1046 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1047 ++done;
1048 conv_error ();
1050 else
1052 *strptr = (char *) wstr;
1053 wstr += strsize;
1054 ++strsize;
1057 else
1059 *strptr = (char *) wstr;
1060 wstr += strsize;
1061 strsize *= 2;
1065 #else
1067 char buf[1];
1069 buf[0] = c;
1071 while (1)
1073 size_t n;
1075 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1076 buf, 1, &cstate);
1078 if (n == (size_t) -2)
1080 /* Possibly correct character, just not enough
1081 input. */
1082 if (inchar () == EOF)
1083 encode_error ();
1085 buf[0] = c;
1086 continue;
1089 if (n != 1)
1090 encode_error ();
1092 /* We have a match. */
1093 ++wstr;
1094 break;
1097 if (!(flags & SUPPRESS) && (flags & MALLOC)
1098 && wstr == (wchar_t *) *strptr + strsize)
1100 /* Enlarge the buffer. */
1101 wstr = (wchar_t *) realloc (*strptr,
1102 (2 * strsize
1103 * sizeof (wchar_t)));
1104 if (wstr == NULL)
1106 /* Can't allocate that much. Last-ditch effort. */
1107 wstr = (wchar_t *) realloc (*strptr,
1108 ((strsize + 1)
1109 * sizeof (wchar_t)));
1110 if (wstr == NULL)
1112 /* We lose. Oh well. Terminate the
1113 string and stop converting, so at
1114 least we don't skip any input. */
1115 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1116 ++done;
1117 conv_error ();
1119 else
1121 *strptr = (char *) wstr;
1122 wstr += strsize;
1123 ++strsize;
1126 else
1128 *strptr = (char *) wstr;
1129 wstr += strsize;
1130 strsize *= 2;
1134 #endif
1136 while ((width <= 0 || --width > 0) && inchar () != EOF);
1138 if (!(flags & SUPPRESS))
1140 *wstr++ = L'\0';
1142 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1144 wchar_t *cp = (wchar_t *) realloc (*strptr,
1145 ((wstr
1146 - (wchar_t *) *strptr)
1147 * sizeof(wchar_t)));
1148 if (cp != NULL)
1149 *strptr = (char *) cp;
1152 ++done;
1155 break;
1157 case L_('x'): /* Hexadecimal integer. */
1158 case L_('X'): /* Ditto. */
1159 base = 16;
1160 number_signed = 0;
1161 goto number;
1163 case L_('o'): /* Octal integer. */
1164 base = 8;
1165 number_signed = 0;
1166 goto number;
1168 case L_('u'): /* Unsigned decimal integer. */
1169 base = 10;
1170 number_signed = 0;
1171 goto number;
1173 case L_('d'): /* Signed decimal integer. */
1174 base = 10;
1175 number_signed = 1;
1176 goto number;
1178 case L_('i'): /* Generic number. */
1179 base = 0;
1180 number_signed = 1;
1182 number:
1183 c = inchar ();
1184 if (c == EOF)
1185 input_error ();
1187 /* Check for a sign. */
1188 if (c == L_('-') || c == L_('+'))
1190 ADDW (c);
1191 if (width > 0)
1192 --width;
1193 c = inchar ();
1196 /* Look for a leading indication of base. */
1197 if (width != 0 && c == L_('0'))
1199 if (width > 0)
1200 --width;
1202 ADDW (c);
1203 c = inchar ();
1205 if (width != 0 && TOLOWER (c) == L_('x'))
1207 if (base == 0)
1208 base = 16;
1209 if (base == 16)
1211 if (width > 0)
1212 --width;
1213 c = inchar ();
1216 else if (base == 0)
1217 base = 8;
1220 if (base == 0)
1221 base = 10;
1223 if (base == 10 && (flags & I18N) != 0)
1225 int from_level;
1226 int to_level;
1227 int level;
1228 #ifdef COMPILE_WSCANF
1229 const wchar_t *wcdigits[10];
1230 #else
1231 const char *mbdigits[10];
1232 #endif
1233 int n;
1235 from_level = 0;
1236 #ifdef COMPILE_WSCANF
1237 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1238 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1239 #else
1240 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1241 _NL_CTYPE_INDIGITS_MB_LEN) - 1;
1242 #endif
1244 /* Read the number into workspace. */
1245 while (c != EOF && width != 0)
1247 /* In this round we get the pointer to the digit strings
1248 and also perform the first round of comparisons. */
1249 for (n = 0; n < 10; ++n)
1251 /* Get the string for the digits with value N. */
1252 #ifdef COMPILE_WSCANF
1253 wcdigits[n] = (const wchar_t *)
1254 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1255 wcdigits[n] += from_level;
1257 if (c == *wcdigits[n])
1259 to_level = from_level;
1260 break;
1263 /* Advance the pointer to the next string. */
1264 ++wcdigits[n];
1265 #else
1266 const char *cmpp;
1267 int avail = width > 0 ? width : INT_MAX;
1269 mbdigits[n] = _NL_CURRENT (LC_CTYPE,
1270 _NL_CTYPE_INDIGITS0_MB + n);
1272 for (level = 0; level < from_level; level++)
1273 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1275 cmpp = mbdigits[n];
1276 while ((unsigned char) *cmpp == c && avail > 0)
1278 if (*++cmpp == '\0')
1279 break;
1280 else
1282 if ((c = inchar ()) == EOF)
1283 break;
1284 --avail;
1288 if (*cmpp == '\0')
1290 if (width > 0)
1291 width = avail;
1292 to_level = from_level;
1293 break;
1296 /* We are pushing all read characters back. */
1297 if (cmpp > mbdigits[n])
1299 ungetc (c, s);
1300 while (--cmpp > mbdigits[n])
1301 ungetc_not_eof ((unsigned char) *cmpp, s);
1302 c = (unsigned char) *cmpp;
1305 /* Advance the pointer to the next string. */
1306 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1307 #endif
1310 if (n == 10)
1312 /* Have not yet found the digit. */
1313 for (level = from_level + 1; level <= to_level; ++level)
1315 /* Search all ten digits of this level. */
1316 for (n = 0; n < 10; ++n)
1318 #ifdef COMPILE_WSCANF
1319 if (c == *wcdigits[n])
1320 break;
1322 /* Advance the pointer to the next string. */
1323 ++wcdigits[n];
1324 #else
1325 const char *cmpp;
1326 int avail = width > 0 ? width : INT_MAX;
1328 cmpp = mbdigits[n];
1329 while ((unsigned char) *cmpp == c && avail > 0)
1331 if (*++cmpp == '\0')
1332 break;
1333 else
1335 if ((c = inchar ()) == EOF)
1336 break;
1337 --avail;
1341 if (*cmpp == '\0')
1343 if (width > 0)
1344 width = avail;
1345 break;
1348 /* We are pushing all read characters back. */
1349 if (cmpp > mbdigits[n])
1351 ungetc (c, s);
1352 while (--cmpp > mbdigits[n])
1353 ungetc_not_eof ((unsigned char) *cmpp, s);
1354 c = (unsigned char) *cmpp;
1357 /* Advance the pointer to the next string. */
1358 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1359 #endif
1362 if (n < 10)
1364 /* Found it. */
1365 from_level = level;
1366 to_level = level;
1367 break;
1372 if (n < 10)
1373 c = L_('0') + n;
1374 else if ((flags & GROUP)
1375 #ifdef COMPILE_WSCANF
1376 && thousands != L'\0'
1377 #else
1378 && thousands != NULL
1379 #endif
1382 /* Try matching against the thousands separator. */
1383 #ifdef COMPILE_WSCANF
1384 if (c != thousands)
1385 break;
1386 #else
1387 const char *cmpp = thousands;
1388 int avail = width > 0 ? width : INT_MAX;
1390 while ((unsigned char) *cmpp == c && avail > 0)
1392 ADDW (c);
1393 if (*++cmpp == '\0')
1394 break;
1395 else
1397 if ((c = inchar ()) == EOF)
1398 break;
1399 --avail;
1403 if (*cmpp != '\0')
1405 /* We are pushing all read characters back. */
1406 if (cmpp > thousands)
1408 wpsize -= cmpp - thousands;
1409 ungetc (c, s);
1410 while (--cmpp > thousands)
1411 ungetc_not_eof ((unsigned char) *cmpp, s);
1412 c = (unsigned char) *cmpp;
1414 break;
1417 if (width > 0)
1418 width = avail;
1420 /* The last thousands character will be added back by
1421 the ADDW below. */
1422 --wpsize;
1423 #endif
1425 else
1426 break;
1428 ADDW (c);
1429 if (width > 0)
1430 --width;
1432 c = inchar ();
1435 else
1436 /* Read the number into workspace. */
1437 while (c != EOF && width != 0)
1439 if (base == 16)
1441 if (!ISXDIGIT (c))
1442 break;
1444 else if (!ISDIGIT (c) || c - L_('0') >= base)
1446 if (base == 10 && (flags & GROUP)
1447 #ifdef COMPILE_WSCANF
1448 && thousands != L'\0'
1449 #else
1450 && thousands != NULL
1451 #endif
1454 /* Try matching against the thousands separator. */
1455 #ifdef COMPILE_WSCANF
1456 if (c != thousands)
1457 break;
1458 #else
1459 const char *cmpp = thousands;
1460 int avail = width > 0 ? width : INT_MAX;
1462 while ((unsigned char) *cmpp == c && avail > 0)
1464 ADDW (c);
1465 if (*++cmpp == '\0')
1466 break;
1467 else
1469 if ((c = inchar ()) == EOF)
1470 break;
1471 --avail;
1475 if (*cmpp != '\0')
1477 /* We are pushing all read characters back. */
1478 if (cmpp > thousands)
1480 wpsize -= cmpp - thousands;
1481 ungetc (c, s);
1482 while (--cmpp > thousands)
1483 ungetc_not_eof ((unsigned char) *cmpp, s);
1484 c = (unsigned char) *cmpp;
1486 break;
1489 if (width > 0)
1490 width = avail;
1492 /* The last thousands character will be added back by
1493 the ADDW below. */
1494 --wpsize;
1495 #endif
1497 else
1498 break;
1500 ADDW (c);
1501 if (width > 0)
1502 --width;
1504 c = inchar ();
1507 if (wpsize == 0
1508 || (wpsize == 1 && (wp[0] == L_('+') || wp[0] == L_('-'))))
1510 /* There was no number. If we are supposed to read a pointer
1511 we must recognize "(nil)" as well. */
1512 if (wpsize == 0 && read_pointer && (width < 0 || width >= 0)
1513 && c == '('
1514 && TOLOWER (inchar ()) == L_('n')
1515 && TOLOWER (inchar ()) == L_('i')
1516 && TOLOWER (inchar ()) == L_('l')
1517 && inchar () == L_(')'))
1518 /* We must produce the value of a NULL pointer. A single
1519 '0' digit is enough. */
1520 ADDW (L_('0'));
1521 else
1523 /* The last read character is not part of the number
1524 anymore. */
1525 ungetc (c, s);
1527 conv_error ();
1530 else
1531 /* The just read character is not part of the number anymore. */
1532 ungetc (c, s);
1534 /* Convert the number. */
1535 ADDW (L_('\0'));
1536 if (need_longlong && (flags & LONGDBL))
1538 if (number_signed)
1539 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
1540 else
1541 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
1543 else
1545 if (number_signed)
1546 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
1547 else
1548 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
1550 if (wp == tw)
1551 conv_error ();
1553 if (!(flags & SUPPRESS))
1555 if (! number_signed)
1557 if (need_longlong && (flags & LONGDBL))
1558 *ARG (unsigned LONGLONG int *) = num.uq;
1559 else if (need_long && (flags & LONG))
1560 *ARG (unsigned long int *) = num.ul;
1561 else if (flags & SHORT)
1562 *ARG (unsigned short int *)
1563 = (unsigned short int) num.ul;
1564 else if (!(flags & CHAR))
1565 *ARG (unsigned int *) = (unsigned int) num.ul;
1566 else
1567 *ARG (unsigned char *) = (unsigned char) num.ul;
1569 else
1571 if (need_longlong && (flags & LONGDBL))
1572 *ARG (LONGLONG int *) = num.q;
1573 else if (need_long && (flags & LONG))
1574 *ARG (long int *) = num.l;
1575 else if (flags & SHORT)
1576 *ARG (short int *) = (short int) num.l;
1577 else if (!(flags & CHAR))
1578 *ARG (int *) = (int) num.l;
1579 else
1580 *ARG (signed char *) = (signed char) num.ul;
1582 ++done;
1584 break;
1586 case L_('e'): /* Floating-point numbers. */
1587 case L_('E'):
1588 case L_('f'):
1589 case L_('F'):
1590 case L_('g'):
1591 case L_('G'):
1592 case L_('a'):
1593 case L_('A'):
1594 c = inchar ();
1595 if (c == EOF)
1596 input_error ();
1598 /* Check for a sign. */
1599 if (c == L_('-') || c == L_('+'))
1601 negative = c == L_('-');
1602 if (width == 0 || inchar () == EOF)
1603 /* EOF is only an input error before we read any chars. */
1604 conv_error ();
1605 if (! ISDIGIT (c) && TOLOWER (c) != L_('i')
1606 && TOLOWER (c) != L_('n'))
1608 #ifdef COMPILE_WSCANF
1609 if (c != decimal)
1611 /* This is no valid number. */
1612 ungetc (c, s);
1613 conv_error ();
1615 #else
1616 /* Match against the decimal point. At this point
1617 we are taking advantage of the fact that we can
1618 push more than one character back. This is
1619 (almost) never necessary since the decimal point
1620 string hopefully never contains more than one
1621 byte. */
1622 const char *cmpp = decimal;
1623 int avail = width > 0 ? width : INT_MAX;
1625 while ((unsigned char) *cmpp == c && avail > 0)
1626 if (*++cmpp == '\0')
1627 break;
1628 else
1630 if (inchar () == EOF)
1631 break;
1632 --avail;
1635 if (*cmpp != '\0')
1637 /* This is no valid number. */
1638 while (1)
1640 ungetc (c, s);
1641 if (cmpp == decimal)
1642 break;
1643 c = (unsigned char) *--cmpp;
1646 conv_error ();
1648 if (width > 0)
1649 width = avail;
1650 #endif
1652 if (width > 0)
1653 --width;
1655 else
1656 negative = 0;
1658 /* Take care for the special arguments "nan" and "inf". */
1659 if (TOLOWER (c) == L_('n'))
1661 /* Maybe "nan". */
1662 ADDW (c);
1663 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('a'))
1664 conv_error ();
1665 if (width > 0)
1666 --width;
1667 ADDW (c);
1668 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1669 conv_error ();
1670 if (width > 0)
1671 --width;
1672 ADDW (c);
1673 /* It is "nan". */
1674 goto scan_float;
1676 else if (TOLOWER (c) == L_('i'))
1678 /* Maybe "inf" or "infinity". */
1679 ADDW (c);
1680 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1681 conv_error ();
1682 if (width > 0)
1683 --width;
1684 ADDW (c);
1685 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('f'))
1686 conv_error ();
1687 if (width > 0)
1688 --width;
1689 ADDW (c);
1690 /* It is as least "inf". */
1691 if (width != 0 && inchar () != EOF)
1693 if (TOLOWER (c) == L_('i'))
1695 if (width > 0)
1696 --width;
1697 /* Now we have to read the rest as well. */
1698 ADDW (c);
1699 if (width == 0 || inchar () == EOF
1700 || TOLOWER (c) != L_('n'))
1701 conv_error ();
1702 if (width > 0)
1703 --width;
1704 ADDW (c);
1705 if (width == 0 || inchar () == EOF
1706 || TOLOWER (c) != L_('i'))
1707 conv_error ();
1708 if (width > 0)
1709 --width;
1710 ADDW (c);
1711 if (width == 0 || inchar () == EOF
1712 || TOLOWER (c) != L_('t'))
1713 conv_error ();
1714 if (width > 0)
1715 --width;
1716 ADDW (c);
1717 if (width == 0 || inchar () == EOF
1718 || TOLOWER (c) != L_('y'))
1719 conv_error ();
1720 if (width > 0)
1721 --width;
1722 ADDW (c);
1724 else
1725 /* Never mind. */
1726 ungetc (c, s);
1728 goto scan_float;
1731 is_hexa = 0;
1732 exp_char = L_('e');
1733 if (width != 0 && c == L_('0'))
1735 ADDW (c);
1736 c = inchar ();
1737 if (width > 0)
1738 --width;
1739 if (width != 0 && TOLOWER (c) == L_('x'))
1741 /* It is a number in hexadecimal format. */
1742 ADDW (c);
1744 is_hexa = 1;
1745 exp_char = L_('p');
1747 /* Grouping is not allowed. */
1748 flags &= ~GROUP;
1749 c = inchar ();
1750 if (width > 0)
1751 --width;
1755 got_dot = got_e = 0;
1758 if (ISDIGIT (c))
1759 ADDW (c);
1760 else if (!got_e && is_hexa && ISXDIGIT (c))
1761 ADDW (c);
1762 else if (got_e && wp[wpsize - 1] == exp_char
1763 && (c == L_('-') || c == L_('+')))
1764 ADDW (c);
1765 else if (wpsize > 0 && !got_e && TOLOWER (c) == exp_char)
1767 ADDW (exp_char);
1768 got_e = got_dot = 1;
1770 else
1772 #ifdef COMPILE_WSCANF
1773 if (! got_dot && c == decimal)
1775 ADDW (c);
1776 got_dot = 1;
1778 else if (thousands != L'\0' && ! got_dot && c == thousands)
1779 ADDW (c);
1780 else
1782 /* The last read character is not part of the number
1783 anymore. */
1784 ungetc (c, s);
1785 break;
1787 #else
1788 const char *cmpp = decimal;
1789 int avail = width > 0 ? width : INT_MAX;
1791 if (! got_dot)
1793 while ((unsigned char) *cmpp == c && avail > 0)
1794 if (*++cmpp == '\0')
1795 break;
1796 else
1798 if (inchar () == EOF)
1799 break;
1800 --avail;
1804 if (*cmpp == '\0')
1806 /* Add all the characters. */
1807 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1808 ADDW ((unsigned char) *cmpp);
1809 if (width > 0)
1810 width = avail;
1811 got_dot = 1;
1813 else
1815 /* Figure out whether it is a thousands separator.
1816 There is one problem: we possibly read more than
1817 one character. We cannot push them back but since
1818 we know that parts of the `decimal' string matched,
1819 we can compare against it. */
1820 const char *cmp2p = thousands;
1822 if (thousands != NULL && ! got_dot)
1824 while (cmp2p < cmpp
1825 && *cmp2p == decimal[cmp2p - thousands])
1826 ++cmp2p;
1827 if (cmp2p == cmpp)
1829 while ((unsigned char) *cmp2p == c && avail > 0)
1830 if (*++cmp2p == '\0')
1831 break;
1832 else
1834 if (inchar () == EOF)
1835 break;
1836 --avail;
1841 if (cmp2p != NULL && *cmp2p == '\0')
1843 /* Add all the characters. */
1844 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
1845 ADDW ((unsigned char) *cmpp);
1846 if (width > 0)
1847 width = avail;
1849 else
1851 /* The last read character is not part of the number
1852 anymore. */
1853 ungetc (c, s);
1854 break;
1857 #endif
1859 if (width > 0)
1860 --width;
1862 while (width != 0 && inchar () != EOF);
1864 /* Have we read any character? If we try to read a number
1865 in hexadecimal notation and we have read only the `0x'
1866 prefix or no exponent this is an error. */
1867 if (wpsize == 0 || (is_hexa && (wpsize == 2 || ! got_e)))
1868 conv_error ();
1870 scan_float:
1871 /* Convert the number. */
1872 ADDW (L_('\0'));
1873 if (flags & LONGDBL)
1875 long double d = __strtold_internal (wp, &tw, flags & GROUP);
1876 if (!(flags & SUPPRESS) && tw != wp)
1877 *ARG (long double *) = negative ? -d : d;
1879 else if (flags & LONG)
1881 double d = __strtod_internal (wp, &tw, flags & GROUP);
1882 if (!(flags & SUPPRESS) && tw != wp)
1883 *ARG (double *) = negative ? -d : d;
1885 else
1887 float d = __strtof_internal (wp, &tw, flags & GROUP);
1888 if (!(flags & SUPPRESS) && tw != wp)
1889 *ARG (float *) = negative ? -d : d;
1892 if (tw == wp)
1893 conv_error ();
1895 if (!(flags & SUPPRESS))
1896 ++done;
1897 break;
1899 case L_('['): /* Character class. */
1900 if (flags & LONG)
1901 STRING_ARG (wstr, wchar_t);
1902 else
1903 STRING_ARG (str, char);
1905 if (*f == L_('^'))
1907 ++f;
1908 not_in = 1;
1910 else
1911 not_in = 0;
1913 if (width < 0)
1914 /* There is no width given so there is also no limit on the
1915 number of characters we read. Therefore we set width to
1916 a very high value to make the algorithm easier. */
1917 width = INT_MAX;
1919 #ifdef COMPILE_WSCANF
1920 /* Find the beginning and the end of the scanlist. We are not
1921 creating a lookup table since it would have to be too large.
1922 Instead we search each time through the string. This is not
1923 a constant lookup time but who uses this feature deserves to
1924 be punished. */
1925 tw = (wchar_t *) f; /* Marks the beginning. */
1927 if (*f == L']')
1928 ++f;
1930 while ((fc = *f++) != L'\0' && fc != L']');
1932 if (fc == L'\0')
1933 conv_error ();
1934 wp = (wchar_t *) f - 1;
1935 #else
1936 /* Fill WP with byte flags indexed by character.
1937 We will use this flag map for matching input characters. */
1938 if (wpmax < UCHAR_MAX + 1)
1940 wpmax = UCHAR_MAX + 1;
1941 wp = (char *) alloca (wpmax);
1943 memset (wp, '\0', UCHAR_MAX + 1);
1945 fc = *f;
1946 if (fc == ']' || fc == '-')
1948 /* If ] or - appears before any char in the set, it is not
1949 the terminator or separator, but the first char in the
1950 set. */
1951 wp[fc] = 1;
1952 ++f;
1955 while ((fc = *f++) != '\0' && fc != ']')
1956 if (fc == '-' && *f != '\0' && *f != ']'
1957 && (unsigned char) f[-2] <= (unsigned char) *f)
1959 /* Add all characters from the one before the '-'
1960 up to (but not including) the next format char. */
1961 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
1962 wp[fc] = 1;
1964 else
1965 /* Add the character to the flag map. */
1966 wp[fc] = 1;
1968 if (fc == '\0')
1969 conv_error();
1970 #endif
1972 if (flags & LONG)
1974 size_t now = read_in;
1975 #ifdef COMPILE_WSCANF
1976 if (inchar () == WEOF)
1977 input_error ();
1981 wchar_t *runp;
1983 /* Test whether it's in the scanlist. */
1984 runp = tw;
1985 while (runp < wp)
1987 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
1988 && runp != tw
1989 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
1991 /* Match against all characters in between the
1992 first and last character of the sequence. */
1993 wchar_t wc;
1995 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
1996 if (wc == c)
1997 break;
1999 if (wc <= runp[1] && !not_in)
2000 break;
2001 if (wc <= runp[1] && not_in)
2003 /* The current character is not in the
2004 scanset. */
2005 ungetc (c, s);
2006 goto out;
2009 runp += 2;
2011 else
2013 if (*runp == c && !not_in)
2014 break;
2015 if (*runp == c && not_in)
2017 ungetc (c, s);
2018 goto out;
2021 ++runp;
2025 if (runp == wp && !not_in)
2027 ungetc (c, s);
2028 goto out;
2031 if (!(flags & SUPPRESS))
2033 *wstr++ = c;
2035 if ((flags & MALLOC)
2036 && wstr == (wchar_t *) *strptr + strsize)
2038 /* Enlarge the buffer. */
2039 wstr = (wchar_t *) realloc (*strptr,
2040 (2 * strsize)
2041 * sizeof (wchar_t));
2042 if (wstr == NULL)
2044 /* Can't allocate that much. Last-ditch
2045 effort. */
2046 wstr = (wchar_t *)
2047 realloc (*strptr, (strsize + 1)
2048 * sizeof (wchar_t));
2049 if (wstr == NULL)
2051 /* We lose. Oh well. Terminate the string
2052 and stop converting, so at least we don't
2053 skip any input. */
2054 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2055 ++done;
2056 conv_error ();
2058 else
2060 *strptr = (char *) wstr;
2061 wstr += strsize;
2062 ++strsize;
2065 else
2067 *strptr = (char *) wstr;
2068 wstr += strsize;
2069 strsize *= 2;
2074 while (--width > 0 && inchar () != WEOF);
2075 out:
2076 #else
2077 char buf[MB_LEN_MAX];
2078 size_t cnt = 0;
2079 mbstate_t cstate;
2081 if (inchar () == EOF)
2082 input_error ();
2084 memset (&cstate, '\0', sizeof (cstate));
2088 if (wp[c] == not_in)
2090 ungetc_not_eof (c, s);
2091 break;
2094 /* This is easy. */
2095 if (!(flags & SUPPRESS))
2097 size_t n;
2099 /* Convert it into a wide character. */
2100 buf[0] = c;
2101 n = __mbrtowc (wstr, buf, 1, &cstate);
2103 if (n == (size_t) -2)
2105 /* Possibly correct character, just not enough
2106 input. */
2107 ++cnt;
2108 assert (cnt < MB_CUR_MAX);
2109 continue;
2112 ++wstr;
2113 if ((flags & MALLOC)
2114 && wstr == (wchar_t *) *strptr + strsize)
2116 /* Enlarge the buffer. */
2117 wstr = (wchar_t *) realloc (*strptr,
2118 (2 * strsize
2119 * sizeof (wchar_t)));
2120 if (wstr == NULL)
2122 /* Can't allocate that much. Last-ditch
2123 effort. */
2124 wstr = (wchar_t *)
2125 realloc (*strptr, ((strsize + 1)
2126 * sizeof (wchar_t)));
2127 if (wstr == NULL)
2129 /* We lose. Oh well. Terminate the
2130 string and stop converting,
2131 so at least we don't skip any input. */
2132 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2133 ++done;
2134 conv_error ();
2136 else
2138 *strptr = (char *) wstr;
2139 wstr += strsize;
2140 ++strsize;
2143 else
2145 *strptr = (char *) wstr;
2146 wstr += strsize;
2147 strsize *= 2;
2152 if (--width <= 0)
2153 break;
2155 while (inchar () != EOF);
2157 if (cnt != 0)
2158 /* We stopped in the middle of recognizing another
2159 character. That's a problem. */
2160 encode_error ();
2161 #endif
2163 if (now == read_in)
2164 /* We haven't succesfully read any character. */
2165 conv_error ();
2167 if (!(flags & SUPPRESS))
2169 *wstr++ = L'\0';
2171 if ((flags & MALLOC)
2172 && wstr - (wchar_t *) *strptr != strsize)
2174 wchar_t *cp = (wchar_t *)
2175 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2176 * sizeof(wchar_t)));
2177 if (cp != NULL)
2178 *strptr = (char *) cp;
2181 ++done;
2184 else
2186 size_t now = read_in;
2188 if (inchar () == EOF)
2189 input_error ();
2191 #ifdef COMPILE_WSCANF
2193 memset (&state, '\0', sizeof (state));
2197 wchar_t *runp;
2198 size_t n;
2200 /* Test whether it's in the scanlist. */
2201 runp = tw;
2202 while (runp < wp)
2204 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2205 && runp != tw
2206 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2208 /* Match against all characters in between the
2209 first and last character of the sequence. */
2210 wchar_t wc;
2212 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2213 if (wc == c)
2214 break;
2216 if (wc <= runp[1] && !not_in)
2217 break;
2218 if (wc <= runp[1] && not_in)
2220 /* The current character is not in the
2221 scanset. */
2222 ungetc (c, s);
2223 goto out2;
2226 runp += 2;
2228 else
2230 if (*runp == c && !not_in)
2231 break;
2232 if (*runp == c && not_in)
2234 ungetc (c, s);
2235 goto out2;
2238 ++runp;
2242 if (runp == wp && !not_in)
2244 ungetc (c, s);
2245 goto out2;
2248 if (!(flags & SUPPRESS))
2250 if ((flags & MALLOC)
2251 && str + MB_CUR_MAX >= *strptr + strsize)
2253 /* Enlarge the buffer. */
2254 size_t strleng = str - *strptr;
2255 char *newstr;
2257 newstr = (char *) realloc (*strptr, 2 * strsize);
2258 if (newstr == NULL)
2260 /* Can't allocate that much. Last-ditch
2261 effort. */
2262 newstr = (char *) realloc (*strptr,
2263 strleng + MB_CUR_MAX);
2264 if (newstr == NULL)
2266 /* We lose. Oh well. Terminate the string
2267 and stop converting, so at least we don't
2268 skip any input. */
2269 ((char *) (*strptr))[strleng] = '\0';
2270 ++done;
2271 conv_error ();
2273 else
2275 *strptr = newstr;
2276 str = newstr + strleng;
2277 strsize = strleng + MB_CUR_MAX;
2280 else
2282 *strptr = newstr;
2283 str = newstr + strleng;
2284 strsize *= 2;
2289 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2290 if (n == (size_t) -1)
2291 encode_error ();
2293 assert (n <= MB_CUR_MAX);
2294 str += n;
2296 while (--width > 0 && inchar () != WEOF);
2297 out2:
2298 #else
2301 if (wp[c] == not_in)
2303 ungetc_not_eof (c, s);
2304 break;
2307 /* This is easy. */
2308 if (!(flags & SUPPRESS))
2310 *str++ = c;
2311 if ((flags & MALLOC)
2312 && (char *) str == *strptr + strsize)
2314 /* Enlarge the buffer. */
2315 str = (char *) realloc (*strptr, 2 * strsize);
2316 if (str == NULL)
2318 /* Can't allocate that much. Last-ditch
2319 effort. */
2320 str = (char *) realloc (*strptr, strsize + 1);
2321 if (str == NULL)
2323 /* We lose. Oh well. Terminate the
2324 string and stop converting,
2325 so at least we don't skip any input. */
2326 ((char *) (*strptr))[strsize - 1] = '\0';
2327 ++done;
2328 conv_error ();
2330 else
2332 *strptr = (char *) str;
2333 str += strsize;
2334 ++strsize;
2337 else
2339 *strptr = (char *) str;
2340 str += strsize;
2341 strsize *= 2;
2346 while (--width > 0 && inchar () != EOF);
2347 #endif
2349 if (now == read_in)
2350 /* We haven't succesfully read any character. */
2351 conv_error ();
2353 if (!(flags & SUPPRESS))
2355 #ifdef COMPILE_WSCANF
2356 /* We have to emit the code to get into the initial
2357 state. */
2358 char buf[MB_LEN_MAX];
2359 size_t n = __wcrtomb (buf, L'\0', &state);
2360 if (n > 0 && (flags & MALLOC)
2361 && str + n >= *strptr + strsize)
2363 /* Enlarge the buffer. */
2364 size_t strleng = str - *strptr;
2365 char *newstr;
2367 newstr = (char *) realloc (*strptr, strleng + n + 1);
2368 if (newstr == NULL)
2370 /* We lose. Oh well. Terminate the string
2371 and stop converting, so at least we don't
2372 skip any input. */
2373 ((char *) (*strptr))[strleng] = '\0';
2374 ++done;
2375 conv_error ();
2377 else
2379 *strptr = newstr;
2380 str = newstr + strleng;
2381 strsize = strleng + n + 1;
2385 str = __mempcpy (str, buf, n);
2386 #endif
2387 *str++ = '\0';
2389 if ((flags & MALLOC) && str - *strptr != strsize)
2391 char *cp = (char *) realloc (*strptr, str - *strptr);
2392 if (cp != NULL)
2393 *strptr = cp;
2396 ++done;
2399 break;
2401 case L_('p'): /* Generic pointer. */
2402 base = 16;
2403 /* A PTR must be the same size as a `long int'. */
2404 flags &= ~(SHORT|LONGDBL);
2405 if (need_long)
2406 flags |= LONG;
2407 number_signed = 0;
2408 read_pointer = 1;
2409 goto number;
2411 default:
2412 /* If this is an unknown format character punt. */
2413 conv_error ();
2417 /* The last thing we saw int the format string was a white space.
2418 Consume the last white spaces. */
2419 if (skip_space)
2422 c = inchar ();
2423 while (ISSPACE (c));
2424 ungetc (c, s);
2427 /* Unlock stream. */
2428 UNLOCK_STREAM (s);
2430 return done;
2433 #ifdef USE_IN_LIBIO
2434 # ifdef COMPILE_WSCANF
2436 __vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
2438 return _IO_vfwscanf (s, format, argptr, NULL);
2440 # else
2442 __vfscanf (FILE *s, const char *format, va_list argptr)
2444 return INTUSE(_IO_vfscanf) (s, format, argptr, NULL);
2446 libc_hidden_def (__vfscanf)
2447 # endif
2448 #endif
2450 #ifdef COMPILE_WSCANF
2451 weak_alias (__vfwscanf, vfwscanf)
2452 #else
2453 weak_alias (__vfscanf, vfscanf)
2454 INTDEF(_IO_vfscanf)
2455 #endif