2003-05-16 Roland McGrath <roland@redhat.com>
[glibc.git] / stdio-common / vfscanf.c
blob8f8c99af8c4fd9f7319ff063120931cc67b81072
1 /* Copyright (C) 1991-2002, 2003 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, write to the Free
16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17 02111-1307 USA. */
19 #include <assert.h>
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdint.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <wchar.h>
29 #include <wctype.h>
30 #include <bits/libc-lock.h>
31 #include <locale/localeinfo.h>
33 #ifdef __GNUC__
34 # define HAVE_LONGLONG
35 # define LONGLONG long long
36 #else
37 # define LONGLONG long
38 #endif
40 /* Determine whether we have to handle `long long' at all. */
41 #if LONG_MAX == LONG_LONG_MAX
42 # define need_longlong 0
43 #else
44 # define need_longlong 1
45 #endif
47 /* Determine whether we have to handle `long'. */
48 #if INT_MAX == LONG_MAX
49 # define need_long 0
50 #else
51 # define need_long 1
52 #endif
54 /* Those are flags in the conversion format. */
55 #define LONG 0x001 /* l: long or double */
56 #define LONGDBL 0x002 /* L: long long or long double */
57 #define SHORT 0x004 /* h: short */
58 #define SUPPRESS 0x008 /* *: suppress assignment */
59 #define POINTER 0x010 /* weird %p pointer (`fake hex') */
60 #define NOSKIP 0x020 /* do not skip blanks */
61 #define WIDTH 0x040 /* width was given */
62 #define GROUP 0x080 /* ': group numbers */
63 #define MALLOC 0x100 /* a: malloc strings */
64 #define CHAR 0x200 /* hh: char */
65 #define I18N 0x400 /* I: use locale's digits */
68 #ifdef USE_IN_LIBIO
69 # include <libioP.h>
70 # include <libio.h>
72 # undef va_list
73 # define va_list _IO_va_list
75 # ifdef COMPILE_WSCANF
76 # define ungetc(c, s) ((void) (c == WEOF \
77 || (--read_in, \
78 INTUSE(_IO_sputbackwc) (s, c))))
79 # define ungetc_not_eof(c, s) ((void) (--read_in, \
80 INTUSE(_IO_sputbackwc) (s, c)))
81 # define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
82 : ((c = _IO_getwc_unlocked (s)), \
83 (void) (c != WEOF \
84 ? ++read_in \
85 : (size_t) (inchar_errno = errno)), c))
87 # define MEMCPY(d, s, n) __wmemcpy (d, s, n)
88 # define ISSPACE(Ch) iswspace (Ch)
89 # define ISDIGIT(Ch) iswdigit (Ch)
90 # define ISXDIGIT(Ch) iswxdigit (Ch)
91 # define TOLOWER(Ch) towlower (Ch)
92 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
93 # define __strtoll_internal __wcstoll_internal
94 # define __strtoull_internal __wcstoull_internal
95 # define __strtol_internal __wcstol_internal
96 # define __strtoul_internal __wcstoul_internal
97 # define __strtold_internal __wcstold_internal
98 # define __strtod_internal __wcstod_internal
99 # define __strtof_internal __wcstof_internal
101 # define L_(Str) L##Str
102 # define CHAR_T wchar_t
103 # define UCHAR_T unsigned int
104 # define WINT_T wint_t
105 # undef EOF
106 # define EOF WEOF
107 # else
108 # define ungetc(c, s) ((void) ((int) c == EOF \
109 || (--read_in, \
110 INTUSE(_IO_sputbackc) (s, (unsigned char) c))))
111 # define ungetc_not_eof(c, s) ((void) (--read_in, \
112 INTUSE(_IO_sputbackc) (s, (unsigned char) c)))
113 # define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
114 : ((c = _IO_getc_unlocked (s)), \
115 (void) (c != EOF \
116 ? ++read_in \
117 : (size_t) (inchar_errno = errno)), c))
118 # define MEMCPY(d, s, n) memcpy (d, s, n)
119 # define ISSPACE(Ch) isspace (Ch)
120 # define ISDIGIT(Ch) isdigit (Ch)
121 # define ISXDIGIT(Ch) isxdigit (Ch)
122 # define TOLOWER(Ch) tolower (Ch)
123 # define ORIENT if (s->_vtable_offset == 0 \
124 && _IO_fwide (s, -1) != -1) \
125 return EOF
127 # define L_(Str) Str
128 # define CHAR_T char
129 # define UCHAR_T unsigned char
130 # define WINT_T int
131 # endif
133 # define encode_error() do { \
134 if (errp != NULL) *errp |= 4; \
135 _IO_funlockfile (s); \
136 __libc_cleanup_end (0); \
137 __set_errno (EILSEQ); \
138 return done; \
139 } while (0)
140 # define conv_error() do { \
141 if (errp != NULL) *errp |= 2; \
142 _IO_funlockfile (s); \
143 __libc_cleanup_end (0); \
144 return done; \
145 } while (0)
146 # define input_error() do { \
147 _IO_funlockfile (s); \
148 if (errp != NULL) *errp |= 1; \
149 __libc_cleanup_end (0); \
150 return done ?: EOF; \
151 } while (0)
152 # define memory_error() do { \
153 _IO_funlockfile (s); \
154 __set_errno (ENOMEM); \
155 __libc_cleanup_end (0); \
156 return EOF; \
157 } while (0)
158 # define ARGCHECK(s, format) \
159 do \
161 /* Check file argument for consistence. */ \
162 CHECK_FILE (s, EOF); \
163 if (s->_flags & _IO_NO_READS) \
165 __set_errno (EBADF); \
166 return EOF; \
168 else if (format == NULL) \
170 MAYBE_SET_EINVAL; \
171 return EOF; \
173 } while (0)
174 # define LOCK_STREAM(S) \
175 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
176 _IO_flockfile (S)
177 # define UNLOCK_STREAM(S) \
178 _IO_funlockfile (S); \
179 __libc_cleanup_region_end (0)
180 #else
181 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
182 # define ungetc_not_eof(c, s) (--read_in, (ungetc) (c, s))
183 # define inchar() (c == EOF ? EOF \
184 : ((c = getc (s)), (void) (c != EOF && ++read_in), c))
185 # define MEMCPY(d, s, n) memcpy (d, s, n)
186 # define ISSPACE(Ch) isspace (Ch)
187 # define ISDIGIT(Ch) isdigit (Ch)
188 # define ISXDIGIT(Ch) isxdigit (Ch)
189 # define TOLOWER(Ch) tolower (Ch)
191 # define L_(Str) Str
192 # define CHAR_T char
193 # define UCHAR_T unsigned char
194 # define WINT_T int
196 # define encode_error() do { \
197 funlockfile (s); \
198 __set_errno (EILSEQ); \
199 return done; \
200 } while (0)
201 # define conv_error() do { \
202 funlockfile (s); \
203 return done; \
204 } while (0)
205 # define input_error() do { \
206 funlockfile (s); \
207 return done ?: EOF; \
208 } while (0)
209 # define memory_error() do { \
210 funlockfile (s); \
211 __set_errno (ENOMEM); \
212 return EOF; \
213 } while (0)
214 # define ARGCHECK(s, format) \
215 do \
217 /* Check file argument for consistence. */ \
218 if (!__validfp (s) || !s->__mode.__read) \
220 __set_errno (EBADF); \
221 return EOF; \
223 else if (format == NULL) \
225 __set_errno (EINVAL); \
226 return EOF; \
228 } while (0)
229 #if 1
230 /* XXX For now !!! */
231 # define flockfile(S) /* nothing */
232 # define funlockfile(S) /* nothing */
233 # define LOCK_STREAM(S)
234 # define UNLOCK_STREAM(S)
235 #else
236 # define LOCK_STREAM(S) \
237 __libc_cleanup_region_start (&__funlockfile, (S)); \
238 __flockfile (S)
239 # define UNLOCK_STREAM(S) \
240 __funlockfile (S); \
241 __libc_cleanup_region_end (0)
242 #endif
243 #endif
246 /* Read formatted input from S according to the format string
247 FORMAT, using the argument list in ARG.
248 Return the number of assignments made, or -1 for an input error. */
249 #ifdef USE_IN_LIBIO
250 # ifdef COMPILE_WSCANF
252 _IO_vfwscanf (s, format, argptr, errp)
253 _IO_FILE *s;
254 const wchar_t *format;
255 _IO_va_list argptr;
256 int *errp;
257 # else
259 _IO_vfscanf (s, format, argptr, errp)
260 _IO_FILE *s;
261 const char *format;
262 _IO_va_list argptr;
263 int *errp;
264 # endif
265 #else
267 __vfscanf (FILE *s, const char *format, va_list argptr)
268 #endif
270 va_list arg;
271 register const CHAR_T *f = format;
272 register UCHAR_T fc; /* Current character of the format. */
273 register WINT_T done = 0; /* Assignments done. */
274 register size_t read_in = 0; /* Chars read in. */
275 register WINT_T c = 0; /* Last char read. */
276 register int width; /* Maximum field width. */
277 register int flags; /* Modifiers for current format element. */
279 /* Errno of last failed inchar call. */
280 int inchar_errno = 0;
281 /* Status for reading F-P nums. */
282 char got_dot, got_e, negative;
283 /* If a [...] is a [^...]. */
284 CHAR_T not_in;
285 #define exp_char not_in
286 /* Base for integral numbers. */
287 int base;
288 /* Signedness for integral numbers. */
289 int number_signed;
290 #define is_hexa number_signed
291 /* Decimal point character. */
292 #ifdef COMPILE_WSCANF
293 wint_t decimal;
294 #else
295 const char *decimal;
296 #endif
297 /* The thousands character of the current locale. */
298 #ifdef COMPILE_WSCANF
299 wint_t thousands;
300 #else
301 const char *thousands;
302 #endif
303 /* State for the conversions. */
304 mbstate_t state;
305 /* Integral holding variables. */
306 union
308 long long int q;
309 unsigned long long int uq;
310 long int l;
311 unsigned long int ul;
312 } num;
313 /* Character-buffer pointer. */
314 char *str = NULL;
315 wchar_t *wstr = NULL;
316 char **strptr = NULL;
317 ssize_t strsize = 0;
318 /* We must not react on white spaces immediately because they can
319 possibly be matched even if in the input stream no character is
320 available anymore. */
321 int skip_space = 0;
322 /* Nonzero if we are reading a pointer. */
323 int read_pointer;
324 /* Workspace. */
325 CHAR_T *tw; /* Temporary pointer. */
326 CHAR_T *wp = NULL; /* Workspace. */
327 size_t wpmax = 0; /* Maximal size of workspace. */
328 size_t wpsize; /* Currently used bytes in workspace. */
329 #define ADDW(Ch) \
330 do \
332 if (wpsize == wpmax) \
334 CHAR_T *old = wp; \
335 wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax); \
336 wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t)); \
337 if (old != NULL) \
338 MEMCPY (wp, old, wpsize); \
340 wp[wpsize++] = (Ch); \
342 while (0)
344 #ifdef __va_copy
345 __va_copy (arg, argptr);
346 #else
347 arg = (va_list) argptr;
348 #endif
350 #ifdef ORIENT
351 ORIENT;
352 #endif
354 ARGCHECK (s, format);
356 /* Figure out the decimal point character. */
357 #ifdef COMPILE_WSCANF
358 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
359 #else
360 decimal = _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT);
361 #endif
362 /* Figure out the thousands separator character. */
363 #ifdef COMPILE_WSCANF
364 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
365 #else
366 thousands = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
367 if (*thousands == '\0')
368 thousands = NULL;
369 #endif
371 /* Lock the stream. */
372 LOCK_STREAM (s);
375 #ifndef COMPILE_WSCANF
376 /* From now on we use `state' to convert the format string. */
377 memset (&state, '\0', sizeof (state));
378 #endif
380 /* Run through the format string. */
381 while (*f != '\0')
383 unsigned int argpos;
384 /* Extract the next argument, which is of type TYPE.
385 For a %N$... spec, this is the Nth argument from the beginning;
386 otherwise it is the next argument after the state now in ARG. */
387 #ifdef __va_copy
388 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
389 ({ unsigned int pos = argpos; \
390 va_list arg; \
391 __va_copy (arg, argptr); \
392 while (--pos > 0) \
393 (void) va_arg (arg, void *); \
394 va_arg (arg, type); \
396 #else
397 # if 0
398 /* XXX Possible optimization. */
399 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
400 ({ va_list arg = (va_list) argptr; \
401 arg = (va_list) ((char *) arg \
402 + (argpos - 1) \
403 * __va_rounded_size (void *)); \
404 va_arg (arg, type); \
406 # else
407 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
408 ({ unsigned int pos = argpos; \
409 va_list arg = (va_list) argptr; \
410 while (--pos > 0) \
411 (void) va_arg (arg, void *); \
412 va_arg (arg, type); \
414 # endif
415 #endif
417 #ifndef COMPILE_WSCANF
418 if (!isascii ((unsigned char) *f))
420 /* Non-ASCII, may be a multibyte. */
421 int len = __mbrlen (f, strlen (f), &state);
422 if (len > 0)
426 c = inchar ();
427 if (c == EOF)
428 input_error ();
429 else if (c != (unsigned char) *f++)
431 ungetc_not_eof (c, s);
432 conv_error ();
435 while (--len > 0);
436 continue;
439 #endif
441 fc = *f++;
442 if (fc != '%')
444 /* Remember to skip spaces. */
445 if (ISSPACE (fc))
447 skip_space = 1;
448 continue;
451 /* Read a character. */
452 c = inchar ();
454 /* Characters other than format specs must just match. */
455 if (c == EOF)
456 input_error ();
458 /* We saw white space char as the last character in the format
459 string. Now it's time to skip all leading white space. */
460 if (skip_space)
462 while (ISSPACE (c))
463 if (inchar () == EOF && errno == EINTR)
464 conv_error ();
465 skip_space = 0;
468 if (c != fc)
470 ungetc (c, s);
471 conv_error ();
474 continue;
477 /* This is the start of the conversion string. */
478 flags = 0;
480 /* Not yet decided whether we read a pointer or not. */
481 read_pointer = 0;
483 /* Initialize state of modifiers. */
484 argpos = 0;
486 /* Prepare temporary buffer. */
487 wpsize = 0;
489 /* Check for a positional parameter specification. */
490 if (ISDIGIT ((UCHAR_T) *f))
492 argpos = (UCHAR_T) *f++ - L_('0');
493 while (ISDIGIT ((UCHAR_T) *f))
494 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
495 if (*f == L_('$'))
496 ++f;
497 else
499 /* Oops; that was actually the field width. */
500 width = argpos;
501 flags |= WIDTH;
502 argpos = 0;
503 goto got_width;
507 /* Check for the assignment-suppressing, the number grouping flag,
508 and the signal to use the locale's digit representation. */
509 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
510 switch (*f++)
512 case L_('*'):
513 flags |= SUPPRESS;
514 break;
515 case L_('\''):
516 flags |= GROUP;
517 break;
518 case L_('I'):
519 flags |= I18N;
520 break;
523 /* We have seen width. */
524 if (ISDIGIT ((UCHAR_T) *f))
525 flags |= WIDTH;
527 /* Find the maximum field width. */
528 width = 0;
529 while (ISDIGIT ((UCHAR_T) *f))
531 width *= 10;
532 width += (UCHAR_T) *f++ - L_('0');
534 got_width:
535 if (width == 0)
536 width = -1;
538 /* Check for type modifiers. */
539 switch (*f++)
541 case L_('h'):
542 /* ints are short ints or chars. */
543 if (*f == L_('h'))
545 ++f;
546 flags |= CHAR;
548 else
549 flags |= SHORT;
550 break;
551 case L_('l'):
552 if (*f == L_('l'))
554 /* A double `l' is equivalent to an `L'. */
555 ++f;
556 flags |= LONGDBL | LONG;
558 else
559 /* ints are long ints. */
560 flags |= LONG;
561 break;
562 case L_('q'):
563 case L_('L'):
564 /* doubles are long doubles, and ints are long long ints. */
565 flags |= LONGDBL | LONG;
566 break;
567 case L_('a'):
568 /* The `a' is used as a flag only if followed by `s', `S' or
569 `['. */
570 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
572 --f;
573 break;
575 /* String conversions (%s, %[) take a `char **'
576 arg and fill it in with a malloc'd pointer. */
577 flags |= MALLOC;
578 break;
579 case L_('z'):
580 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
581 flags |= LONGDBL;
582 else if (sizeof (size_t) > sizeof (unsigned int))
583 flags |= LONG;
584 break;
585 case L_('j'):
586 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
587 flags |= LONGDBL;
588 else if (sizeof (uintmax_t) > sizeof (unsigned int))
589 flags |= LONG;
590 break;
591 case L_('t'):
592 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
593 flags |= LONGDBL;
594 else if (sizeof (ptrdiff_t) > sizeof (int))
595 flags |= LONG;
596 break;
597 default:
598 /* Not a recognized modifier. Backup. */
599 --f;
600 break;
603 /* End of the format string? */
604 if (*f == L_('\0'))
605 conv_error ();
607 /* Find the conversion specifier. */
608 fc = *f++;
609 if (skip_space || (fc != L_('[') && fc != L_('c')
610 && fc != L_('C') && fc != L_('n')))
612 /* Eat whitespace. */
613 int save_errno = errno;
614 errno = 0;
616 if (inchar () == EOF && errno == EINTR)
617 input_error ();
618 while (ISSPACE (c));
619 errno = save_errno;
620 ungetc (c, s);
621 skip_space = 0;
624 switch (fc)
626 case L_('%'): /* Must match a literal '%'. */
627 c = inchar ();
628 if (c == EOF)
629 input_error ();
630 if (c != fc)
632 ungetc_not_eof (c, s);
633 conv_error ();
635 break;
637 case L_('n'): /* Answer number of assignments done. */
638 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
639 with the 'n' conversion specifier. */
640 if (!(flags & SUPPRESS))
642 /* Don't count the read-ahead. */
643 if (need_longlong && (flags & LONGDBL))
644 *ARG (long long int *) = read_in;
645 else if (need_long && (flags & LONG))
646 *ARG (long int *) = read_in;
647 else if (flags & SHORT)
648 *ARG (short int *) = read_in;
649 else if (!(flags & CHAR))
650 *ARG (int *) = read_in;
651 else
652 *ARG (char *) = read_in;
654 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
655 /* We have a severe problem here. The ISO C standard
656 contradicts itself in explaining the effect of the %n
657 format in `scanf'. While in ISO C:1990 and the ISO C
658 Amendement 1:1995 the result is described as
660 Execution of a %n directive does not effect the
661 assignment count returned at the completion of
662 execution of the f(w)scanf function.
664 in ISO C Corrigendum 1:1994 the following was added:
666 Subclause 7.9.6.2
667 Add the following fourth example:
669 #include <stdio.h>
670 int d1, d2, n1, n2, i;
671 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
672 the value 123 is assigned to d1 and the value3 to n1.
673 Because %n can never get an input failure the value
674 of 3 is also assigned to n2. The value of d2 is not
675 affected. The value 3 is assigned to i.
677 We go for now with the historically correct code from ISO C,
678 i.e., we don't count the %n assignments. When it ever
679 should proof to be wrong just remove the #ifdef above. */
680 ++done;
681 #endif
683 break;
685 case L_('c'): /* Match characters. */
686 if ((flags & LONG) == 0)
688 if (!(flags & SUPPRESS))
690 str = ARG (char *);
691 if (str == NULL)
692 conv_error ();
695 c = inchar ();
696 if (c == EOF)
697 input_error ();
699 if (width == -1)
700 width = 1;
702 #ifdef COMPILE_WSCANF
703 /* We have to convert the wide character(s) into multibyte
704 characters and store the result. */
705 memset (&state, '\0', sizeof (state));
709 size_t n;
711 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
712 if (n == (size_t) -1)
713 /* No valid wide character. */
714 input_error ();
716 /* Increment the output pointer. Even if we don't
717 write anything. */
718 str += n;
720 while (--width > 0 && inchar () != EOF);
721 #else
722 if (!(flags & SUPPRESS))
725 *str++ = c;
726 while (--width > 0 && inchar () != EOF);
728 else
729 while (--width > 0 && inchar () != EOF);
730 #endif
732 if (!(flags & SUPPRESS))
733 ++done;
735 break;
737 /* FALLTHROUGH */
738 case L_('C'):
739 if (!(flags & SUPPRESS))
741 wstr = ARG (wchar_t *);
742 if (wstr == NULL)
743 conv_error ();
746 c = inchar ();
747 if (c == EOF)
748 input_error ();
750 #ifdef COMPILE_WSCANF
751 /* Just store the incoming wide characters. */
752 if (!(flags & SUPPRESS))
755 *wstr++ = c;
756 while (--width > 0 && inchar () != EOF);
758 else
759 while (--width > 0 && inchar () != EOF);
760 #else
762 /* We have to convert the multibyte input sequence to wide
763 characters. */
764 char buf[1];
765 mbstate_t cstate;
767 memset (&cstate, '\0', sizeof (cstate));
771 /* This is what we present the mbrtowc function first. */
772 buf[0] = c;
774 while (1)
776 size_t n;
778 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
779 buf, 1, &cstate);
781 if (n == (size_t) -2)
783 /* Possibly correct character, just not enough
784 input. */
785 if (inchar () == EOF)
786 encode_error ();
788 buf[0] = c;
789 continue;
792 if (n != 1)
793 encode_error ();
795 /* We have a match. */
796 break;
799 /* Advance the result pointer. */
800 ++wstr;
802 while (--width > 0 && inchar () != EOF);
804 #endif
806 if (!(flags & SUPPRESS))
807 ++done;
809 break;
811 case L_('s'): /* Read a string. */
812 if (!(flags & LONG))
814 #define STRING_ARG(Str, Type) \
815 do if (!(flags & SUPPRESS)) \
817 if (flags & MALLOC) \
819 /* The string is to be stored in a malloc'd buffer. */ \
820 strptr = ARG (char **); \
821 if (strptr == NULL) \
822 conv_error (); \
823 /* Allocate an initial buffer. */ \
824 strsize = 100; \
825 *strptr = (char *) malloc (strsize * sizeof (Type)); \
826 Str = (Type *) *strptr; \
828 else \
829 Str = ARG (Type *); \
830 if (Str == NULL) \
831 conv_error (); \
832 } while (0)
833 STRING_ARG (str, char);
835 c = inchar ();
836 if (c == EOF)
837 input_error ();
839 #ifdef COMPILE_WSCANF
840 memset (&state, '\0', sizeof (state));
841 #endif
845 if (ISSPACE (c))
847 ungetc_not_eof (c, s);
848 break;
851 #ifdef COMPILE_WSCANF
852 /* This is quite complicated. We have to convert the
853 wide characters into multibyte characters and then
854 store them. */
856 size_t n;
858 if (!(flags & SUPPRESS) && (flags & MALLOC)
859 && str + MB_CUR_MAX >= *strptr + strsize)
861 /* We have to enlarge the buffer if the `a' flag
862 was given. */
863 size_t strleng = str - *strptr;
864 char *newstr;
866 newstr = (char *) realloc (*strptr, strsize * 2);
867 if (newstr == NULL)
869 /* Can't allocate that much. Last-ditch
870 effort. */
871 newstr = (char *) realloc (*strptr,
872 strleng + MB_CUR_MAX);
873 if (newstr == NULL)
875 /* We lose. Oh well. Terminate the
876 string and stop converting,
877 so at least we don't skip any input. */
878 ((char *) (*strptr))[strleng] = '\0';
879 ++done;
880 conv_error ();
882 else
884 *strptr = newstr;
885 str = newstr + strleng;
886 strsize = strleng + MB_CUR_MAX;
889 else
891 *strptr = newstr;
892 str = newstr + strleng;
893 strsize *= 2;
897 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
898 &state);
899 if (n == (size_t) -1)
900 encode_error ();
902 assert (n <= MB_CUR_MAX);
903 str += n;
905 #else
906 /* This is easy. */
907 if (!(flags & SUPPRESS))
909 *str++ = c;
910 if ((flags & MALLOC)
911 && (char *) str == *strptr + strsize)
913 /* Enlarge the buffer. */
914 str = (char *) realloc (*strptr, 2 * strsize);
915 if (str == NULL)
917 /* Can't allocate that much. Last-ditch
918 effort. */
919 str = (char *) realloc (*strptr, strsize + 1);
920 if (str == NULL)
922 /* We lose. Oh well. Terminate the
923 string and stop converting,
924 so at least we don't skip any input. */
925 ((char *) (*strptr))[strsize - 1] = '\0';
926 ++done;
927 conv_error ();
929 else
931 *strptr = (char *) str;
932 str += strsize;
933 ++strsize;
936 else
938 *strptr = (char *) str;
939 str += strsize;
940 strsize *= 2;
944 #endif
946 while ((width <= 0 || --width > 0) && inchar () != EOF);
948 if (!(flags & SUPPRESS))
950 #ifdef COMPILE_WSCANF
951 /* We have to emit the code to get into the initial
952 state. */
953 char buf[MB_LEN_MAX];
954 size_t n = __wcrtomb (buf, L'\0', &state);
955 if (n > 0 && (flags & MALLOC)
956 && str + n >= *strptr + strsize)
958 /* Enlarge the buffer. */
959 size_t strleng = str - *strptr;
960 char *newstr;
962 newstr = (char *) realloc (*strptr, strleng + n + 1);
963 if (newstr == NULL)
965 /* We lose. Oh well. Terminate the string
966 and stop converting, so at least we don't
967 skip any input. */
968 ((char *) (*strptr))[strleng] = '\0';
969 ++done;
970 conv_error ();
972 else
974 *strptr = newstr;
975 str = newstr + strleng;
976 strsize = strleng + n + 1;
980 str = __mempcpy (str, buf, n);
981 #endif
982 *str++ = '\0';
984 if ((flags & MALLOC) && str - *strptr != strsize)
986 char *cp = (char *) realloc (*strptr, str - *strptr);
987 if (cp != NULL)
988 *strptr = cp;
991 ++done;
993 break;
995 /* FALLTHROUGH */
997 case L_('S'):
999 #ifndef COMPILE_WSCANF
1000 mbstate_t cstate;
1001 #endif
1003 /* Wide character string. */
1004 STRING_ARG (wstr, wchar_t);
1006 c = inchar ();
1007 if (c == EOF)
1008 input_error ();
1010 #ifndef COMPILE_WSCANF
1011 memset (&cstate, '\0', sizeof (cstate));
1012 #endif
1016 if (ISSPACE (c))
1018 ungetc_not_eof (c, s);
1019 break;
1022 #ifdef COMPILE_WSCANF
1023 /* This is easy. */
1024 if (!(flags & SUPPRESS))
1026 *wstr++ = c;
1027 if ((flags & MALLOC)
1028 && wstr == (wchar_t *) *strptr + strsize)
1030 /* Enlarge the buffer. */
1031 wstr = (wchar_t *) realloc (*strptr,
1032 (2 * strsize)
1033 * sizeof (wchar_t));
1034 if (wstr == NULL)
1036 /* Can't allocate that much. Last-ditch
1037 effort. */
1038 wstr = (wchar_t *) realloc (*strptr,
1039 (strsize + 1)
1040 * sizeof (wchar_t));
1041 if (wstr == NULL)
1043 /* We lose. Oh well. Terminate the string
1044 and stop converting, so at least we don't
1045 skip any input. */
1046 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1047 ++done;
1048 conv_error ();
1050 else
1052 *strptr = (char *) wstr;
1053 wstr += strsize;
1054 ++strsize;
1057 else
1059 *strptr = (char *) wstr;
1060 wstr += strsize;
1061 strsize *= 2;
1065 #else
1067 char buf[1];
1069 buf[0] = c;
1071 while (1)
1073 size_t n;
1075 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1076 buf, 1, &cstate);
1078 if (n == (size_t) -2)
1080 /* Possibly correct character, just not enough
1081 input. */
1082 if (inchar () == EOF)
1083 encode_error ();
1085 buf[0] = c;
1086 continue;
1089 if (n != 1)
1090 encode_error ();
1092 /* We have a match. */
1093 ++wstr;
1094 break;
1097 if (!(flags & SUPPRESS) && (flags & MALLOC)
1098 && wstr == (wchar_t *) *strptr + strsize)
1100 /* Enlarge the buffer. */
1101 wstr = (wchar_t *) realloc (*strptr,
1102 (2 * strsize
1103 * sizeof (wchar_t)));
1104 if (wstr == NULL)
1106 /* Can't allocate that much. Last-ditch effort. */
1107 wstr = (wchar_t *) realloc (*strptr,
1108 ((strsize + 1)
1109 * sizeof (wchar_t)));
1110 if (wstr == NULL)
1112 /* We lose. Oh well. Terminate the
1113 string and stop converting, so at
1114 least we don't skip any input. */
1115 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1116 ++done;
1117 conv_error ();
1119 else
1121 *strptr = (char *) wstr;
1122 wstr += strsize;
1123 ++strsize;
1126 else
1128 *strptr = (char *) wstr;
1129 wstr += strsize;
1130 strsize *= 2;
1134 #endif
1136 while ((width <= 0 || --width > 0) && inchar () != EOF);
1138 if (!(flags & SUPPRESS))
1140 *wstr++ = L'\0';
1142 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1144 wchar_t *cp = (wchar_t *) realloc (*strptr,
1145 ((wstr
1146 - (wchar_t *) *strptr)
1147 * sizeof(wchar_t)));
1148 if (cp != NULL)
1149 *strptr = (char *) cp;
1152 ++done;
1155 break;
1157 case L_('x'): /* Hexadecimal integer. */
1158 case L_('X'): /* Ditto. */
1159 base = 16;
1160 number_signed = 0;
1161 goto number;
1163 case L_('o'): /* Octal integer. */
1164 base = 8;
1165 number_signed = 0;
1166 goto number;
1168 case L_('u'): /* Unsigned decimal integer. */
1169 base = 10;
1170 number_signed = 0;
1171 goto number;
1173 case L_('d'): /* Signed decimal integer. */
1174 base = 10;
1175 number_signed = 1;
1176 goto number;
1178 case L_('i'): /* Generic number. */
1179 base = 0;
1180 number_signed = 1;
1182 number:
1183 c = inchar ();
1184 if (c == EOF)
1185 input_error ();
1187 /* Check for a sign. */
1188 if (c == L_('-') || c == L_('+'))
1190 ADDW (c);
1191 if (width > 0)
1192 --width;
1193 c = inchar ();
1196 /* Look for a leading indication of base. */
1197 if (width != 0 && c == L_('0'))
1199 if (width > 0)
1200 --width;
1202 ADDW (c);
1203 c = inchar ();
1205 if (width != 0 && TOLOWER (c) == L_('x'))
1207 if (base == 0)
1208 base = 16;
1209 if (base == 16)
1211 if (width > 0)
1212 --width;
1213 c = inchar ();
1216 else if (base == 0)
1217 base = 8;
1220 if (base == 0)
1221 base = 10;
1223 if (base == 10 && (flags & I18N) != 0)
1225 int from_level;
1226 int to_level;
1227 int level;
1228 #ifdef COMPILE_WSCANF
1229 const wchar_t *wcdigits[10];
1230 #else
1231 const char *mbdigits[10];
1232 #endif
1233 int n;
1235 from_level = 0;
1236 #ifdef COMPILE_WSCANF
1237 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1238 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1239 #else
1240 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1241 _NL_CTYPE_INDIGITS_MB_LEN) - 1;
1242 #endif
1244 /* Read the number into workspace. */
1245 while (c != EOF && width != 0)
1247 /* In this round we get the pointer to the digit strings
1248 and also perform the first round of comparisons. */
1249 for (n = 0; n < 10; ++n)
1251 /* Get the string for the digits with value N. */
1252 #ifdef COMPILE_WSCANF
1253 wcdigits[n] = (const wchar_t *)
1254 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1255 wcdigits[n] += from_level;
1257 if (c == (wint_t) *wcdigits[n])
1259 to_level = from_level;
1260 break;
1263 /* Advance the pointer to the next string. */
1264 ++wcdigits[n];
1265 #else
1266 const char *cmpp;
1267 int avail = width > 0 ? width : INT_MAX;
1269 mbdigits[n] = _NL_CURRENT (LC_CTYPE,
1270 _NL_CTYPE_INDIGITS0_MB + n);
1272 for (level = 0; level < from_level; level++)
1273 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1275 cmpp = mbdigits[n];
1276 while ((unsigned char) *cmpp == c && avail > 0)
1278 if (*++cmpp == '\0')
1279 break;
1280 else
1282 if ((c = inchar ()) == EOF)
1283 break;
1284 --avail;
1288 if (*cmpp == '\0')
1290 if (width > 0)
1291 width = avail;
1292 to_level = from_level;
1293 break;
1296 /* We are pushing all read characters back. */
1297 if (cmpp > mbdigits[n])
1299 ungetc (c, s);
1300 while (--cmpp > mbdigits[n])
1301 ungetc_not_eof ((unsigned char) *cmpp, s);
1302 c = (unsigned char) *cmpp;
1305 /* Advance the pointer to the next string. */
1306 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1307 #endif
1310 if (n == 10)
1312 /* Have not yet found the digit. */
1313 for (level = from_level + 1; level <= to_level; ++level)
1315 /* Search all ten digits of this level. */
1316 for (n = 0; n < 10; ++n)
1318 #ifdef COMPILE_WSCANF
1319 if (c == (wint_t) *wcdigits[n])
1320 break;
1322 /* Advance the pointer to the next string. */
1323 ++wcdigits[n];
1324 #else
1325 const char *cmpp;
1326 int avail = width > 0 ? width : INT_MAX;
1328 cmpp = mbdigits[n];
1329 while ((unsigned char) *cmpp == c && avail > 0)
1331 if (*++cmpp == '\0')
1332 break;
1333 else
1335 if ((c = inchar ()) == EOF)
1336 break;
1337 --avail;
1341 if (*cmpp == '\0')
1343 if (width > 0)
1344 width = avail;
1345 break;
1348 /* We are pushing all read characters back. */
1349 if (cmpp > mbdigits[n])
1351 ungetc (c, s);
1352 while (--cmpp > mbdigits[n])
1353 ungetc_not_eof ((unsigned char) *cmpp, s);
1354 c = (unsigned char) *cmpp;
1357 /* Advance the pointer to the next string. */
1358 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1359 #endif
1362 if (n < 10)
1364 /* Found it. */
1365 from_level = level;
1366 to_level = level;
1367 break;
1372 if (n < 10)
1373 c = L_('0') + n;
1374 else if ((flags & GROUP)
1375 #ifdef COMPILE_WSCANF
1376 && thousands != L'\0'
1377 #else
1378 && thousands != NULL
1379 #endif
1382 /* Try matching against the thousands separator. */
1383 #ifdef COMPILE_WSCANF
1384 if (c != thousands)
1385 break;
1386 #else
1387 const char *cmpp = thousands;
1388 int avail = width > 0 ? width : INT_MAX;
1390 while ((unsigned char) *cmpp == c && avail > 0)
1392 ADDW (c);
1393 if (*++cmpp == '\0')
1394 break;
1395 else
1397 if ((c = inchar ()) == EOF)
1398 break;
1399 --avail;
1403 if (*cmpp != '\0')
1405 /* We are pushing all read characters back. */
1406 if (cmpp > thousands)
1408 wpsize -= cmpp - thousands;
1409 ungetc (c, s);
1410 while (--cmpp > thousands)
1411 ungetc_not_eof ((unsigned char) *cmpp, s);
1412 c = (unsigned char) *cmpp;
1414 break;
1417 if (width > 0)
1418 width = avail;
1420 /* The last thousands character will be added back by
1421 the ADDW below. */
1422 --wpsize;
1423 #endif
1425 else
1426 break;
1428 ADDW (c);
1429 if (width > 0)
1430 --width;
1432 c = inchar ();
1435 else
1436 /* Read the number into workspace. */
1437 while (c != EOF && width != 0)
1439 if (base == 16)
1441 if (!ISXDIGIT (c))
1442 break;
1444 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1446 if (base == 10 && (flags & GROUP)
1447 #ifdef COMPILE_WSCANF
1448 && thousands != L'\0'
1449 #else
1450 && thousands != NULL
1451 #endif
1454 /* Try matching against the thousands separator. */
1455 #ifdef COMPILE_WSCANF
1456 if (c != thousands)
1457 break;
1458 #else
1459 const char *cmpp = thousands;
1460 int avail = width > 0 ? width : INT_MAX;
1462 while ((unsigned char) *cmpp == c && avail > 0)
1464 ADDW (c);
1465 if (*++cmpp == '\0')
1466 break;
1467 else
1469 if ((c = inchar ()) == EOF)
1470 break;
1471 --avail;
1475 if (*cmpp != '\0')
1477 /* We are pushing all read characters back. */
1478 if (cmpp > thousands)
1480 wpsize -= cmpp - thousands;
1481 ungetc (c, s);
1482 while (--cmpp > thousands)
1483 ungetc_not_eof ((unsigned char) *cmpp, s);
1484 c = (unsigned char) *cmpp;
1486 break;
1489 if (width > 0)
1490 width = avail;
1492 /* The last thousands character will be added back by
1493 the ADDW below. */
1494 --wpsize;
1495 #endif
1497 else
1498 break;
1500 ADDW (c);
1501 if (width > 0)
1502 --width;
1504 c = inchar ();
1507 if (wpsize == 0
1508 || (wpsize == 1 && (wp[0] == L_('+') || wp[0] == L_('-'))))
1510 /* There was no number. If we are supposed to read a pointer
1511 we must recognize "(nil)" as well. */
1512 if (wpsize == 0 && read_pointer && (width < 0 || width >= 0)
1513 && c == '('
1514 && TOLOWER (inchar ()) == L_('n')
1515 && TOLOWER (inchar ()) == L_('i')
1516 && TOLOWER (inchar ()) == L_('l')
1517 && inchar () == L_(')'))
1518 /* We must produce the value of a NULL pointer. A single
1519 '0' digit is enough. */
1520 ADDW (L_('0'));
1521 else
1523 /* The last read character is not part of the number
1524 anymore. */
1525 ungetc (c, s);
1527 conv_error ();
1530 else
1531 /* The just read character is not part of the number anymore. */
1532 ungetc (c, s);
1534 /* Convert the number. */
1535 ADDW (L_('\0'));
1536 if (need_longlong && (flags & LONGDBL))
1538 if (number_signed)
1539 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP);
1540 else
1541 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP);
1543 else
1545 if (number_signed)
1546 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
1547 else
1548 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
1550 if (wp == tw)
1551 conv_error ();
1553 if (!(flags & SUPPRESS))
1555 if (! number_signed)
1557 if (need_longlong && (flags & LONGDBL))
1558 *ARG (unsigned LONGLONG int *) = num.uq;
1559 else if (need_long && (flags & LONG))
1560 *ARG (unsigned long int *) = num.ul;
1561 else if (flags & SHORT)
1562 *ARG (unsigned short int *)
1563 = (unsigned short int) num.ul;
1564 else if (!(flags & CHAR))
1565 *ARG (unsigned int *) = (unsigned int) num.ul;
1566 else
1567 *ARG (unsigned char *) = (unsigned char) num.ul;
1569 else
1571 if (need_longlong && (flags & LONGDBL))
1572 *ARG (LONGLONG int *) = num.q;
1573 else if (need_long && (flags & LONG))
1574 *ARG (long int *) = num.l;
1575 else if (flags & SHORT)
1576 *ARG (short int *) = (short int) num.l;
1577 else if (!(flags & CHAR))
1578 *ARG (int *) = (int) num.l;
1579 else
1580 *ARG (signed char *) = (signed char) num.ul;
1582 ++done;
1584 break;
1586 case L_('e'): /* Floating-point numbers. */
1587 case L_('E'):
1588 case L_('f'):
1589 case L_('F'):
1590 case L_('g'):
1591 case L_('G'):
1592 case L_('a'):
1593 case L_('A'):
1594 c = inchar ();
1595 if (c == EOF)
1596 input_error ();
1598 /* Check for a sign. */
1599 if (c == L_('-') || c == L_('+'))
1601 negative = c == L_('-');
1602 if (width == 0 || inchar () == EOF)
1603 /* EOF is only an input error before we read any chars. */
1604 conv_error ();
1605 if (! ISDIGIT (c) && TOLOWER (c) != L_('i')
1606 && TOLOWER (c) != L_('n'))
1608 #ifdef COMPILE_WSCANF
1609 if (c != decimal)
1611 /* This is no valid number. */
1612 ungetc (c, s);
1613 conv_error ();
1615 #else
1616 /* Match against the decimal point. At this point
1617 we are taking advantage of the fact that we can
1618 push more than one character back. This is
1619 (almost) never necessary since the decimal point
1620 string hopefully never contains more than one
1621 byte. */
1622 const char *cmpp = decimal;
1623 int avail = width > 0 ? width : INT_MAX;
1625 while ((unsigned char) *cmpp == c && avail > 0)
1626 if (*++cmpp == '\0')
1627 break;
1628 else
1630 if (inchar () == EOF)
1631 break;
1632 --avail;
1635 if (*cmpp != '\0')
1637 /* This is no valid number. */
1638 while (1)
1640 ungetc (c, s);
1641 if (cmpp == decimal)
1642 break;
1643 c = (unsigned char) *--cmpp;
1646 conv_error ();
1648 if (width > 0)
1649 width = avail;
1650 #endif
1652 if (width > 0)
1653 --width;
1655 else
1656 negative = 0;
1658 /* Take care for the special arguments "nan" and "inf". */
1659 if (TOLOWER (c) == L_('n'))
1661 /* Maybe "nan". */
1662 ADDW (c);
1663 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('a'))
1664 conv_error ();
1665 if (width > 0)
1666 --width;
1667 ADDW (c);
1668 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1669 conv_error ();
1670 if (width > 0)
1671 --width;
1672 ADDW (c);
1673 /* It is "nan". */
1674 goto scan_float;
1676 else if (TOLOWER (c) == L_('i'))
1678 /* Maybe "inf" or "infinity". */
1679 ADDW (c);
1680 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n'))
1681 conv_error ();
1682 if (width > 0)
1683 --width;
1684 ADDW (c);
1685 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('f'))
1686 conv_error ();
1687 if (width > 0)
1688 --width;
1689 ADDW (c);
1690 /* It is as least "inf". */
1691 if (width != 0 && inchar () != EOF)
1693 if (TOLOWER (c) == L_('i'))
1695 if (width > 0)
1696 --width;
1697 /* Now we have to read the rest as well. */
1698 ADDW (c);
1699 if (width == 0 || inchar () == EOF
1700 || TOLOWER (c) != L_('n'))
1701 conv_error ();
1702 if (width > 0)
1703 --width;
1704 ADDW (c);
1705 if (width == 0 || inchar () == EOF
1706 || TOLOWER (c) != L_('i'))
1707 conv_error ();
1708 if (width > 0)
1709 --width;
1710 ADDW (c);
1711 if (width == 0 || inchar () == EOF
1712 || TOLOWER (c) != L_('t'))
1713 conv_error ();
1714 if (width > 0)
1715 --width;
1716 ADDW (c);
1717 if (width == 0 || inchar () == EOF
1718 || TOLOWER (c) != L_('y'))
1719 conv_error ();
1720 if (width > 0)
1721 --width;
1722 ADDW (c);
1724 else
1725 /* Never mind. */
1726 ungetc (c, s);
1728 goto scan_float;
1731 is_hexa = 0;
1732 exp_char = L_('e');
1733 if (width != 0 && c == L_('0'))
1735 ADDW (c);
1736 c = inchar ();
1737 if (width > 0)
1738 --width;
1739 if (width != 0 && TOLOWER (c) == L_('x'))
1741 /* It is a number in hexadecimal format. */
1742 ADDW (c);
1744 is_hexa = 1;
1745 exp_char = L_('p');
1747 /* Grouping is not allowed. */
1748 flags &= ~GROUP;
1749 c = inchar ();
1750 if (width > 0)
1751 --width;
1755 got_dot = got_e = 0;
1758 if (ISDIGIT (c))
1759 ADDW (c);
1760 else if (!got_e && is_hexa && ISXDIGIT (c))
1761 ADDW (c);
1762 else if (got_e && wp[wpsize - 1] == exp_char
1763 && (c == L_('-') || c == L_('+')))
1764 ADDW (c);
1765 else if (wpsize > 0 && !got_e
1766 && (CHAR_T) TOLOWER (c) == exp_char)
1768 ADDW (exp_char);
1769 got_e = got_dot = 1;
1771 else
1773 #ifdef COMPILE_WSCANF
1774 if (! got_dot && c == decimal)
1776 ADDW (c);
1777 got_dot = 1;
1779 else if (thousands != L'\0' && ! got_dot && c == thousands)
1780 ADDW (c);
1781 else
1783 /* The last read character is not part of the number
1784 anymore. */
1785 ungetc (c, s);
1786 break;
1788 #else
1789 const char *cmpp = decimal;
1790 int avail = width > 0 ? width : INT_MAX;
1792 if (! got_dot)
1794 while ((unsigned char) *cmpp == c && avail > 0)
1795 if (*++cmpp == '\0')
1796 break;
1797 else
1799 if (inchar () == EOF)
1800 break;
1801 --avail;
1805 if (*cmpp == '\0')
1807 /* Add all the characters. */
1808 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
1809 ADDW ((unsigned char) *cmpp);
1810 if (width > 0)
1811 width = avail;
1812 got_dot = 1;
1814 else
1816 /* Figure out whether it is a thousands separator.
1817 There is one problem: we possibly read more than
1818 one character. We cannot push them back but since
1819 we know that parts of the `decimal' string matched,
1820 we can compare against it. */
1821 const char *cmp2p = thousands;
1823 if (thousands != NULL && ! got_dot)
1825 while (cmp2p < cmpp
1826 && *cmp2p == decimal[cmp2p - thousands])
1827 ++cmp2p;
1828 if (cmp2p == cmpp)
1830 while ((unsigned char) *cmp2p == c && avail > 0)
1831 if (*++cmp2p == '\0')
1832 break;
1833 else
1835 if (inchar () == EOF)
1836 break;
1837 --avail;
1842 if (cmp2p != NULL && *cmp2p == '\0')
1844 /* Add all the characters. */
1845 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
1846 ADDW ((unsigned char) *cmpp);
1847 if (width > 0)
1848 width = avail;
1850 else
1852 /* The last read character is not part of the number
1853 anymore. */
1854 ungetc (c, s);
1855 break;
1858 #endif
1860 if (width > 0)
1861 --width;
1863 while (width != 0 && inchar () != EOF);
1865 /* Have we read any character? If we try to read a number
1866 in hexadecimal notation and we have read only the `0x'
1867 prefix or no exponent this is an error. */
1868 if (wpsize == 0 || (is_hexa && (wpsize == 2 || ! got_e)))
1869 conv_error ();
1871 scan_float:
1872 /* Convert the number. */
1873 ADDW (L_('\0'));
1874 if (flags & LONGDBL)
1876 long double d = __strtold_internal (wp, &tw, flags & GROUP);
1877 if (!(flags & SUPPRESS) && tw != wp)
1878 *ARG (long double *) = negative ? -d : d;
1880 else if (flags & LONG)
1882 double d = __strtod_internal (wp, &tw, flags & GROUP);
1883 if (!(flags & SUPPRESS) && tw != wp)
1884 *ARG (double *) = negative ? -d : d;
1886 else
1888 float d = __strtof_internal (wp, &tw, flags & GROUP);
1889 if (!(flags & SUPPRESS) && tw != wp)
1890 *ARG (float *) = negative ? -d : d;
1893 if (tw == wp)
1894 conv_error ();
1896 if (!(flags & SUPPRESS))
1897 ++done;
1898 break;
1900 case L_('['): /* Character class. */
1901 if (flags & LONG)
1902 STRING_ARG (wstr, wchar_t);
1903 else
1904 STRING_ARG (str, char);
1906 if (*f == L_('^'))
1908 ++f;
1909 not_in = 1;
1911 else
1912 not_in = 0;
1914 if (width < 0)
1915 /* There is no width given so there is also no limit on the
1916 number of characters we read. Therefore we set width to
1917 a very high value to make the algorithm easier. */
1918 width = INT_MAX;
1920 #ifdef COMPILE_WSCANF
1921 /* Find the beginning and the end of the scanlist. We are not
1922 creating a lookup table since it would have to be too large.
1923 Instead we search each time through the string. This is not
1924 a constant lookup time but who uses this feature deserves to
1925 be punished. */
1926 tw = (wchar_t *) f; /* Marks the beginning. */
1928 if (*f == L']')
1929 ++f;
1931 while ((fc = *f++) != L'\0' && fc != L']');
1933 if (fc == L'\0')
1934 conv_error ();
1935 wp = (wchar_t *) f - 1;
1936 #else
1937 /* Fill WP with byte flags indexed by character.
1938 We will use this flag map for matching input characters. */
1939 if (wpmax < UCHAR_MAX + 1)
1941 wpmax = UCHAR_MAX + 1;
1942 wp = (char *) alloca (wpmax);
1944 memset (wp, '\0', UCHAR_MAX + 1);
1946 fc = *f;
1947 if (fc == ']' || fc == '-')
1949 /* If ] or - appears before any char in the set, it is not
1950 the terminator or separator, but the first char in the
1951 set. */
1952 wp[fc] = 1;
1953 ++f;
1956 while ((fc = *f++) != '\0' && fc != ']')
1957 if (fc == '-' && *f != '\0' && *f != ']'
1958 && (unsigned char) f[-2] <= (unsigned char) *f)
1960 /* Add all characters from the one before the '-'
1961 up to (but not including) the next format char. */
1962 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
1963 wp[fc] = 1;
1965 else
1966 /* Add the character to the flag map. */
1967 wp[fc] = 1;
1969 if (fc == '\0')
1970 conv_error();
1971 #endif
1973 if (flags & LONG)
1975 size_t now = read_in;
1976 #ifdef COMPILE_WSCANF
1977 if (inchar () == WEOF)
1978 input_error ();
1982 wchar_t *runp;
1984 /* Test whether it's in the scanlist. */
1985 runp = tw;
1986 while (runp < wp)
1988 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
1989 && runp != tw
1990 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
1992 /* Match against all characters in between the
1993 first and last character of the sequence. */
1994 wchar_t wc;
1996 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
1997 if ((wint_t) wc == c)
1998 break;
2000 if (wc <= runp[1] && !not_in)
2001 break;
2002 if (wc <= runp[1] && not_in)
2004 /* The current character is not in the
2005 scanset. */
2006 ungetc (c, s);
2007 goto out;
2010 runp += 2;
2012 else
2014 if ((wint_t) *runp == c && !not_in)
2015 break;
2016 if ((wint_t) *runp == c && not_in)
2018 ungetc (c, s);
2019 goto out;
2022 ++runp;
2026 if (runp == wp && !not_in)
2028 ungetc (c, s);
2029 goto out;
2032 if (!(flags & SUPPRESS))
2034 *wstr++ = c;
2036 if ((flags & MALLOC)
2037 && wstr == (wchar_t *) *strptr + strsize)
2039 /* Enlarge the buffer. */
2040 wstr = (wchar_t *) realloc (*strptr,
2041 (2 * strsize)
2042 * sizeof (wchar_t));
2043 if (wstr == NULL)
2045 /* Can't allocate that much. Last-ditch
2046 effort. */
2047 wstr = (wchar_t *)
2048 realloc (*strptr, (strsize + 1)
2049 * sizeof (wchar_t));
2050 if (wstr == NULL)
2052 /* We lose. Oh well. Terminate the string
2053 and stop converting, so at least we don't
2054 skip any input. */
2055 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2056 ++done;
2057 conv_error ();
2059 else
2061 *strptr = (char *) wstr;
2062 wstr += strsize;
2063 ++strsize;
2066 else
2068 *strptr = (char *) wstr;
2069 wstr += strsize;
2070 strsize *= 2;
2075 while (--width > 0 && inchar () != WEOF);
2076 out:
2077 #else
2078 char buf[MB_LEN_MAX];
2079 size_t cnt = 0;
2080 mbstate_t cstate;
2082 if (inchar () == EOF)
2083 input_error ();
2085 memset (&cstate, '\0', sizeof (cstate));
2089 if (wp[c] == not_in)
2091 ungetc_not_eof (c, s);
2092 break;
2095 /* This is easy. */
2096 if (!(flags & SUPPRESS))
2098 size_t n;
2100 /* Convert it into a wide character. */
2101 buf[0] = c;
2102 n = __mbrtowc (wstr, buf, 1, &cstate);
2104 if (n == (size_t) -2)
2106 /* Possibly correct character, just not enough
2107 input. */
2108 ++cnt;
2109 assert (cnt < MB_CUR_MAX);
2110 continue;
2112 cnt = 0;
2114 ++wstr;
2115 if ((flags & MALLOC)
2116 && wstr == (wchar_t *) *strptr + strsize)
2118 /* Enlarge the buffer. */
2119 wstr = (wchar_t *) realloc (*strptr,
2120 (2 * strsize
2121 * sizeof (wchar_t)));
2122 if (wstr == NULL)
2124 /* Can't allocate that much. Last-ditch
2125 effort. */
2126 wstr = (wchar_t *)
2127 realloc (*strptr, ((strsize + 1)
2128 * sizeof (wchar_t)));
2129 if (wstr == NULL)
2131 /* We lose. Oh well. Terminate the
2132 string and stop converting,
2133 so at least we don't skip any input. */
2134 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2135 ++done;
2136 conv_error ();
2138 else
2140 *strptr = (char *) wstr;
2141 wstr += strsize;
2142 ++strsize;
2145 else
2147 *strptr = (char *) wstr;
2148 wstr += strsize;
2149 strsize *= 2;
2154 if (--width <= 0)
2155 break;
2157 while (inchar () != EOF);
2159 if (cnt != 0)
2160 /* We stopped in the middle of recognizing another
2161 character. That's a problem. */
2162 encode_error ();
2163 #endif
2165 if (now == read_in)
2166 /* We haven't succesfully read any character. */
2167 conv_error ();
2169 if (!(flags & SUPPRESS))
2171 *wstr++ = L'\0';
2173 if ((flags & MALLOC)
2174 && wstr - (wchar_t *) *strptr != strsize)
2176 wchar_t *cp = (wchar_t *)
2177 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2178 * sizeof(wchar_t)));
2179 if (cp != NULL)
2180 *strptr = (char *) cp;
2183 ++done;
2186 else
2188 size_t now = read_in;
2190 if (inchar () == EOF)
2191 input_error ();
2193 #ifdef COMPILE_WSCANF
2195 memset (&state, '\0', sizeof (state));
2199 wchar_t *runp;
2200 size_t n;
2202 /* Test whether it's in the scanlist. */
2203 runp = tw;
2204 while (runp < wp)
2206 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp
2207 && runp != tw
2208 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2210 /* Match against all characters in between the
2211 first and last character of the sequence. */
2212 wchar_t wc;
2214 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2215 if ((wint_t) wc == c)
2216 break;
2218 if (wc <= runp[1] && !not_in)
2219 break;
2220 if (wc <= runp[1] && not_in)
2222 /* The current character is not in the
2223 scanset. */
2224 ungetc (c, s);
2225 goto out2;
2228 runp += 2;
2230 else
2232 if ((wint_t) *runp == c && !not_in)
2233 break;
2234 if ((wint_t) *runp == c && not_in)
2236 ungetc (c, s);
2237 goto out2;
2240 ++runp;
2244 if (runp == wp && !not_in)
2246 ungetc (c, s);
2247 goto out2;
2250 if (!(flags & SUPPRESS))
2252 if ((flags & MALLOC)
2253 && str + MB_CUR_MAX >= *strptr + strsize)
2255 /* Enlarge the buffer. */
2256 size_t strleng = str - *strptr;
2257 char *newstr;
2259 newstr = (char *) realloc (*strptr, 2 * strsize);
2260 if (newstr == NULL)
2262 /* Can't allocate that much. Last-ditch
2263 effort. */
2264 newstr = (char *) realloc (*strptr,
2265 strleng + MB_CUR_MAX);
2266 if (newstr == NULL)
2268 /* We lose. Oh well. Terminate the string
2269 and stop converting, so at least we don't
2270 skip any input. */
2271 ((char *) (*strptr))[strleng] = '\0';
2272 ++done;
2273 conv_error ();
2275 else
2277 *strptr = newstr;
2278 str = newstr + strleng;
2279 strsize = strleng + MB_CUR_MAX;
2282 else
2284 *strptr = newstr;
2285 str = newstr + strleng;
2286 strsize *= 2;
2291 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2292 if (n == (size_t) -1)
2293 encode_error ();
2295 assert (n <= MB_CUR_MAX);
2296 str += n;
2298 while (--width > 0 && inchar () != WEOF);
2299 out2:
2300 #else
2303 if (wp[c] == not_in)
2305 ungetc_not_eof (c, s);
2306 break;
2309 /* This is easy. */
2310 if (!(flags & SUPPRESS))
2312 *str++ = c;
2313 if ((flags & MALLOC)
2314 && (char *) str == *strptr + strsize)
2316 /* Enlarge the buffer. */
2317 str = (char *) realloc (*strptr, 2 * strsize);
2318 if (str == NULL)
2320 /* Can't allocate that much. Last-ditch
2321 effort. */
2322 str = (char *) realloc (*strptr, strsize + 1);
2323 if (str == NULL)
2325 /* We lose. Oh well. Terminate the
2326 string and stop converting,
2327 so at least we don't skip any input. */
2328 ((char *) (*strptr))[strsize - 1] = '\0';
2329 ++done;
2330 conv_error ();
2332 else
2334 *strptr = (char *) str;
2335 str += strsize;
2336 ++strsize;
2339 else
2341 *strptr = (char *) str;
2342 str += strsize;
2343 strsize *= 2;
2348 while (--width > 0 && inchar () != EOF);
2349 #endif
2351 if (now == read_in)
2352 /* We haven't succesfully read any character. */
2353 conv_error ();
2355 if (!(flags & SUPPRESS))
2357 #ifdef COMPILE_WSCANF
2358 /* We have to emit the code to get into the initial
2359 state. */
2360 char buf[MB_LEN_MAX];
2361 size_t n = __wcrtomb (buf, L'\0', &state);
2362 if (n > 0 && (flags & MALLOC)
2363 && str + n >= *strptr + strsize)
2365 /* Enlarge the buffer. */
2366 size_t strleng = str - *strptr;
2367 char *newstr;
2369 newstr = (char *) realloc (*strptr, strleng + n + 1);
2370 if (newstr == NULL)
2372 /* We lose. Oh well. Terminate the string
2373 and stop converting, so at least we don't
2374 skip any input. */
2375 ((char *) (*strptr))[strleng] = '\0';
2376 ++done;
2377 conv_error ();
2379 else
2381 *strptr = newstr;
2382 str = newstr + strleng;
2383 strsize = strleng + n + 1;
2387 str = __mempcpy (str, buf, n);
2388 #endif
2389 *str++ = '\0';
2391 if ((flags & MALLOC) && str - *strptr != strsize)
2393 char *cp = (char *) realloc (*strptr, str - *strptr);
2394 if (cp != NULL)
2395 *strptr = cp;
2398 ++done;
2401 break;
2403 case L_('p'): /* Generic pointer. */
2404 base = 16;
2405 /* A PTR must be the same size as a `long int'. */
2406 flags &= ~(SHORT|LONGDBL);
2407 if (need_long)
2408 flags |= LONG;
2409 number_signed = 0;
2410 read_pointer = 1;
2411 goto number;
2413 default:
2414 /* If this is an unknown format character punt. */
2415 conv_error ();
2419 /* The last thing we saw int the format string was a white space.
2420 Consume the last white spaces. */
2421 if (skip_space)
2424 c = inchar ();
2425 while (ISSPACE (c));
2426 ungetc (c, s);
2429 /* Unlock stream. */
2430 UNLOCK_STREAM (s);
2432 return done;
2435 #ifdef USE_IN_LIBIO
2436 # ifdef COMPILE_WSCANF
2438 __vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
2440 return _IO_vfwscanf (s, format, argptr, NULL);
2442 # else
2444 __vfscanf (FILE *s, const char *format, va_list argptr)
2446 return INTUSE(_IO_vfscanf) (s, format, argptr, NULL);
2448 libc_hidden_def (__vfscanf)
2449 # endif
2450 #endif
2452 #ifdef COMPILE_WSCANF
2453 weak_alias (__vfwscanf, vfwscanf)
2454 #else
2455 weak_alias (__vfscanf, vfscanf)
2456 INTDEF(_IO_vfscanf)
2457 #endif