1 /* Copyright (C) 1991,92,93,94,95,96,97,98 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
27 #include <bits/libc-lock.h>
28 #include <locale/localeinfo.h>
31 # define HAVE_LONGLONG
32 # define LONGLONG long long
34 # define LONGLONG long
37 /* Those are flags in the conversion format. */
38 # define LONG 0x001 /* l: long or double */
39 # define LONGDBL 0x002 /* L: long long or long double */
40 # define SHORT 0x004 /* h: short */
41 # define SUPPRESS 0x008 /* *: suppress assignment */
42 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
43 # define NOSKIP 0x020 /* do not skip blanks */
44 # define WIDTH 0x040 /* width was given */
45 # define GROUP 0x080 /* ': group numbers */
46 # define MALLOC 0x100 /* a: malloc strings */
47 # define CHAR 0x200 /* hh: char */
49 # define TYPEMOD (LONG|LONGDBL|SHORT|CHAR)
57 # define va_list _IO_va_list
58 # define ungetc(c, s) ((void) ((int) c != EOF && --read_in), \
60 # define inchar() (c == EOF ? EOF \
61 : ((c = _IO_getc_unlocked (s)), \
62 (void) (c != EOF && ++read_in), c))
63 # define encode_error() do { \
64 if (errp != NULL) *errp |= 4; \
65 _IO_funlockfile (s); \
66 __set_errno (EILSEQ); \
69 # define conv_error() do { \
70 if (errp != NULL) *errp |= 2; \
71 _IO_funlockfile (s); \
74 # define input_error() do { \
75 _IO_funlockfile (s); \
76 if (errp != NULL) *errp |= 1; \
79 # define memory_error() do { \
80 _IO_funlockfile (s); \
81 __set_errno (ENOMEM); \
84 # define ARGCHECK(s, format) \
87 /* Check file argument for consistence. */ \
88 CHECK_FILE (s, EOF); \
89 if (s->_flags & _IO_NO_READS) \
91 __set_errno (EBADF); \
94 else if (format == NULL) \
100 # define LOCK_STREAM(S) \
101 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
103 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
105 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
106 # define inchar() (c == EOF ? EOF \
107 : ((c = getc (s)), (void) (c != EOF && ++read_in), c))
108 # define encode_error() do { \
110 __set_errno (EILSEQ); \
113 # define conv_error() do { \
117 # define input_error() do { \
119 return done ?: EOF; \
121 # define memory_error() do { \
123 __set_errno (ENOMEM); \
126 # define ARGCHECK(s, format) \
129 /* Check file argument for consistence. */ \
130 if (!__validfp (s) || !s->__mode.__read) \
132 __set_errno (EBADF); \
135 else if (format == NULL) \
137 __set_errno (EINVAL); \
142 /* XXX For now !!! */
143 # define flockfile(S) /* nothing */
144 # define funlockfile(S) /* nothing */
145 # define LOCK_STREAM(S)
146 # define UNLOCK_STREAM
148 # define LOCK_STREAM(S) \
149 __libc_cleanup_region_start (&__funlockfile, (S)); \
151 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
156 /* Read formatted input from S according to the format string
157 FORMAT, using the argument list in ARG.
158 Return the number of assignments made, or -1 for an input error. */
161 _IO_vfscanf (s
, format
, argptr
, errp
)
168 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
172 register const char *f
= format
;
173 register unsigned char fc
; /* Current character of the format. */
174 register size_t done
= 0; /* Assignments done. */
175 register size_t read_in
= 0; /* Chars read in. */
176 register int c
= 0; /* Last char read. */
177 register int width
; /* Maximum field width. */
178 register int flags
; /* Modifiers for current format element. */
180 /* Status for reading F-P nums. */
181 char got_dot
, got_e
, negative
;
182 /* If a [...] is a [^...]. */
184 #define exp_char not_in
185 /* Base for integral numbers. */
187 /* Signedness for integral numbers. */
189 #define is_hexa number_signed
190 /* Decimal point character. */
192 /* The thousands character of the current locale. */
194 /* Integral holding variables. */
198 unsigned long long int uq
;
200 unsigned long int ul
;
202 /* Character-buffer pointer. */
204 wchar_t *wstr
= NULL
;
205 char **strptr
= NULL
;
207 /* We must not react on white spaces immediately because they can
208 possibly be matched even if in the input stream no character is
209 available anymore. */
212 char *tw
; /* Temporary pointer. */
213 char *wp
= NULL
; /* Workspace. */
214 size_t wpmax
= 0; /* Maximal size of workspace. */
215 size_t wpsize
; /* Currently used bytes in workspace. */
219 if (wpsize == wpmax) \
222 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
223 wp = (char *) alloca (wpmax); \
225 memcpy (wp, old, wpsize); \
227 wp[wpsize++] = (Ch); \
232 __va_copy (arg
, argptr
);
234 arg
= (va_list) argptr
;
237 ARGCHECK (s
, format
);
239 /* Figure out the decimal point character. */
240 if (mbtowc (&decimal
, _NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
),
241 strlen (_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
))) <= 0)
242 decimal
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
);
243 /* Figure out the thousands separator character. */
244 if (mbtowc (&thousands
, _NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
),
245 strlen (_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
))) <= 0)
246 thousands
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
);
248 /* Lock the stream. */
251 /* Run through the format string. */
255 /* Extract the next argument, which is of type TYPE.
256 For a %N$... spec, this is the Nth argument from the beginning;
257 otherwise it is the next argument after the state now in ARG. */
259 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
260 ({ unsigned int pos = argpos; \
262 __va_copy (arg, argptr); \
264 (void) va_arg (arg, void *); \
265 va_arg (arg, type); \
269 /* XXX Possible optimization. */
270 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
271 ({ va_list arg = (va_list) argptr; \
272 arg = (va_list) ((char *) arg \
274 * __va_rounded_size (void *)); \
275 va_arg (arg, type); \
278 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
279 ({ unsigned int pos = argpos; \
280 va_list arg = (va_list) argptr; \
282 (void) va_arg (arg, void *); \
283 va_arg (arg, type); \
290 /* Non-ASCII, may be a multibyte. */
291 int len
= mblen (f
, strlen (f
));
313 /* Remember to skip spaces. */
320 /* Read a character. */
323 /* Characters other than format specs must just match. */
327 /* We saw white space char as the last character in the format
328 string. Now it's time to skip all leading white space. */
332 if (inchar () == EOF
&& errno
== EINTR
)
346 /* This is the start of the conversion string. */
349 /* Initialize state of modifiers. */
352 /* Prepare temporary buffer. */
355 /* Check for a positional parameter specification. */
360 argpos
= argpos
* 10 + (*f
++ - '0');
365 /* Oops; that was actually the field width. */
373 /* Check for the assignment-suppressing and the number grouping flag. */
374 while (*f
== '*' || *f
== '\'')
385 /* We have seen width. */
389 /* Find the maximum field width. */
400 /* Check for type modifiers. */
401 while (*f
== 'h' || *f
== 'l' || *f
== 'L' || *f
== 'a' || *f
== 'q')
405 /* int's are short int's. */
406 if (flags
& (LONG
|LONGDBL
|CHAR
))
407 /* Signal illegal format element. */
418 if (flags
& (SHORT
|LONGDBL
|CHAR
))
420 else if (flags
& LONG
)
422 /* A double `l' is equivalent to an `L'. */
427 /* int's are long int's. */
432 /* double's are long double's, and int's are long long int's. */
434 /* Signal illegal format element. */
439 /* The `a' is used as a flag only if followed by `s', `S' or
441 if (*f
!= 's' && *f
!= 'S' && *f
!= '[')
447 /* Signal illegal format element. */
449 /* String conversions (%s, %[) take a `char **'
450 arg and fill it in with a malloc'd pointer. */
455 /* End of the format string? */
459 /* Find the conversion specifier. */
461 if (skip_space
|| (fc
!= '[' && fc
!= 'c' && fc
!= 'C' && fc
!= 'n'))
463 /* Eat whitespace. */
464 int save_errno
= errno
;
467 if (inchar () == EOF
&& errno
== EINTR
)
477 case '%': /* Must match a literal '%'. */
488 case 'n': /* Answer number of assignments done. */
489 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
490 with the 'n' conversion specifier. */
491 if (!(flags
& SUPPRESS
))
493 /* Don't count the read-ahead. */
495 *ARG (long long int *) = read_in
;
496 else if (flags
& LONG
)
497 *ARG (long int *) = read_in
;
498 else if (flags
& SHORT
)
499 *ARG (short int *) = read_in
;
501 *ARG (int *) = read_in
;
503 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
504 /* We have a severe problem here. The ISO C standard
505 contradicts itself in explaining the effect of the %n
506 format in `scanf'. While in ISO C:1990 and the ISO C
507 Amendement 1:1995 the result is described as
509 Execution of a %n directive does not effect the
510 assignment count returned at the completion of
511 execution of the f(w)scanf function.
513 in ISO C Corrigendum 1:1994 the following was added:
516 Add the following fourth example:
519 int d1, d2, n1, n2, i;
520 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
521 the value 123 is assigned to d1 and the value3 to n1.
522 Because %n can never get an input failure the value
523 of 3 is also assigned to n2. The value of d2 is not
524 affected. The value 3 is assigned to i.
526 We go for now with the historically correct code from ISO C,
527 i.e., we don't count the %n assignments. When it ever
528 should proof to be wrong just remove the #ifdef above. */
534 case 'c': /* Match characters. */
535 if ((flags
& LONG
) == 0)
537 if (!(flags
& SUPPRESS
))
551 if (!(flags
& SUPPRESS
))
555 while (--width
> 0 && inchar () != EOF
);
558 while (--width
> 0 && inchar () != EOF
);
560 if (!(flags
& SUPPRESS
))
567 /* Get UTF-8 encoded wide character. Here we assume (as in
568 other parts of the libc) that we only have to handle
575 if (!(flags
& SUPPRESS
))
577 wstr
= ARG (wchar_t *);
584 #define NEXT_WIDE_CHAR(First) \
587 /* EOF is only an error for the first character. */ \
595 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
597 if ((c & 0xe0) == 0xc0) \
599 /* We expect two bytes. */ \
603 else if ((c & 0xf0) == 0xe0) \
605 /* We expect three bytes. */ \
609 else if ((c & 0xf8) == 0xf0) \
611 /* We expect four bytes. */ \
615 else if ((c & 0xfc) == 0xf8) \
617 /* We expect five bytes. */ \
623 /* We expect six bytes. */ \
632 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
640 if (!(flags & SUPPRESS)) \
644 NEXT_WIDE_CHAR (first
);
648 if (!(flags
& SUPPRESS
))
653 case 's': /* Read a string. */
655 /* We have to process a wide character string. */
656 goto wide_char_string
;
658 #define STRING_ARG(Str, Type) \
659 if (!(flags & SUPPRESS)) \
661 if (flags & MALLOC) \
663 /* The string is to be stored in a malloc'd buffer. */ \
664 strptr = ARG (char **); \
665 if (strptr == NULL) \
667 /* Allocate an initial buffer. */ \
669 *strptr = malloc (strsize * sizeof (Type)); \
670 Str = (Type *) *strptr; \
673 Str = ARG (Type *); \
677 STRING_ARG (str
, char);
690 #define STRING_ADD_CHAR(Str, c, Type) \
691 if (!(flags & SUPPRESS)) \
694 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
696 /* Enlarge the buffer. */ \
697 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
700 /* Can't allocate that much. Last-ditch effort. */\
701 Str = realloc (*strptr, \
702 (strsize + 1) * sizeof (Type)); \
705 /* We lose. Oh well. \
706 Terminate the string and stop converting, \
707 so at least we don't skip any input. */ \
708 ((Type *) (*strptr))[strsize] = '\0'; \
714 *strptr = (char *) Str; \
715 Str = ((Type *) *strptr) + strsize; \
721 *strptr = (char *) Str; \
722 Str = ((Type *) *strptr) + strsize; \
727 STRING_ADD_CHAR (str
, c
, char);
728 } while ((width
<= 0 || --width
> 0) && inchar () != EOF
);
730 if (!(flags
& SUPPRESS
))
738 /* Wide character string. */
743 STRING_ARG (wstr
, wchar_t);
748 NEXT_WIDE_CHAR (first
);
752 /* XXX We would have to push back the whole wide char
753 with possibly many bytes. But since scanf does
754 not make a difference for white space characters
755 we can simply push back a simple <SP> which is
756 guaranteed to be in the [:space:] class. */
761 STRING_ADD_CHAR (wstr
, val
, wchar_t);
764 while (width
<= 0 || --width
> 0);
766 if (!(flags
& SUPPRESS
))
774 case 'x': /* Hexadecimal integer. */
775 case 'X': /* Ditto. */
780 case 'o': /* Octal integer. */
785 case 'u': /* Unsigned decimal integer. */
790 case 'd': /* Signed decimal integer. */
795 case 'i': /* Generic number. */
804 /* Check for a sign. */
805 if (c
== '-' || c
== '+')
813 /* Look for a leading indication of base. */
814 if (width
!= 0 && c
== '0')
822 if (width
!= 0 && tolower (c
) == 'x')
840 /* Read the number into workspace. */
841 while (c
!= EOF
&& width
!= 0)
843 if (base
== 16 ? !isxdigit (c
) :
844 ((!isdigit (c
) || c
- '0' >= base
) &&
845 !((flags
& GROUP
) && base
== 10 && c
== thousands
)))
854 /* The just read character is not part of the number anymore. */
858 (wpsize
== 1 && (wp
[0] == '+' || wp
[0] == '-')))
859 /* There was no number. */
862 /* Convert the number. */
867 num
.q
= __strtoll_internal (wp
, &tw
, base
, flags
& GROUP
);
869 num
.uq
= __strtoull_internal (wp
, &tw
, base
, flags
& GROUP
);
874 num
.l
= __strtol_internal (wp
, &tw
, base
, flags
& GROUP
);
876 num
.ul
= __strtoul_internal (wp
, &tw
, base
, flags
& GROUP
);
881 if (!(flags
& SUPPRESS
))
886 *ARG (unsigned LONGLONG
int *) = num
.uq
;
887 else if (flags
& LONG
)
888 *ARG (unsigned long int *) = num
.ul
;
889 else if (flags
& SHORT
)
890 *ARG (unsigned short int *)
891 = (unsigned short int) num
.ul
;
892 else if (flags
& CHAR
)
893 *ARG (unsigned char *) = (unsigned char) num
.ul
;
895 *ARG (unsigned int *) = (unsigned int) num
.ul
;
900 *ARG (LONGLONG
int *) = num
.q
;
901 else if (flags
& LONG
)
902 *ARG (long int *) = num
.l
;
903 else if (flags
& SHORT
)
904 *ARG (short int *) = (short int) num
.l
;
905 else if (flags
& CHAR
)
906 *ARG (signed char *) = (signed char) num
.ul
;
908 *ARG (int *) = (int) num
.l
;
914 case 'e': /* Floating-point numbers. */
925 /* Check for a sign. */
926 if (c
== '-' || c
== '+')
929 if (inchar () == EOF
)
930 /* EOF is only an input error before we read any chars. */
944 if (tolower (c
) == 'x')
946 /* It is a number in hexadecimal format. */
952 /* Grouping is not allowed. */
963 else if (!got_e
&& is_hexa
&& isxdigit (c
))
965 else if (got_e
&& wp
[wpsize
- 1] == exp_char
966 && (c
== '-' || c
== '+'))
968 else if (wpsize
> 0 && !got_e
&& tolower (c
) == exp_char
)
973 else if (c
== decimal
&& !got_dot
)
978 else if ((flags
& GROUP
) && c
== thousands
&& !got_dot
)
982 /* The last read character is not part of the number
990 while (width
!= 0 && inchar () != EOF
);
992 /* Have we read any character? If we try to read a number
993 in hexadecimal notation and we have read only the `0x'
994 prefix this is an error. */
995 if (wpsize
== 0 || (is_hexa
&& wpsize
== 2))
998 /* Convert the number. */
1000 if (flags
& LONGDBL
)
1002 long double d
= __strtold_internal (wp
, &tw
, flags
& GROUP
);
1003 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1004 *ARG (long double *) = negative
? -d
: d
;
1006 else if (flags
& LONG
)
1008 double d
= __strtod_internal (wp
, &tw
, flags
& GROUP
);
1009 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1010 *ARG (double *) = negative
? -d
: d
;
1014 float d
= __strtof_internal (wp
, &tw
, flags
& GROUP
);
1015 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1016 *ARG (float *) = negative
? -d
: d
;
1022 if (!(flags
& SUPPRESS
))
1026 case '[': /* Character class. */
1029 STRING_ARG (wstr
, wchar_t);
1030 c
= '\0'; /* This is to keep gcc quiet. */
1034 STRING_ARG (str
, char);
1049 /* Fill WP with byte flags indexed by character.
1050 We will use this flag map for matching input characters. */
1051 if (wpmax
< UCHAR_MAX
)
1054 wp
= (char *) alloca (wpmax
);
1056 memset (wp
, 0, UCHAR_MAX
);
1059 if (fc
== ']' || fc
== '-')
1061 /* If ] or - appears before any char in the set, it is not
1062 the terminator or separator, but the first char in the
1068 while ((fc
= *f
++) != '\0' && fc
!= ']')
1070 if (fc
== '-' && *f
!= '\0' && *f
!= ']' &&
1071 (unsigned char) f
[-2] <= (unsigned char) *f
)
1073 /* Add all characters from the one before the '-'
1074 up to (but not including) the next format char. */
1075 for (fc
= f
[-2]; fc
< *f
; ++fc
)
1079 /* Add the character to the flag map. */
1084 if (!(flags
& LONG
))
1097 NEXT_WIDE_CHAR (first
);
1098 if (val
> 255 || wp
[val
] == not_in
)
1100 /* XXX We have a problem here. We read a wide
1101 character and this possibly took several
1102 bytes. But we can only push back one single
1103 character. To be sure we don't create wrong
1104 input we push it back only in case it is
1105 representable within one byte. */
1110 STRING_ADD_CHAR (wstr
, val
, wchar_t);
1120 if (!(flags
& SUPPRESS
))
1128 num
.ul
= read_in
- 1; /* -1 because we already read one char. */
1131 if (wp
[c
] == not_in
)
1136 STRING_ADD_CHAR (str
, c
, char);
1140 while (width
!= 0 && inchar () != EOF
);
1142 if (read_in
== num
.ul
)
1145 if (!(flags
& SUPPRESS
))
1153 case 'p': /* Generic pointer. */
1155 /* A PTR must be the same size as a `long int'. */
1156 flags
&= ~(SHORT
|LONGDBL
);
1163 /* The last thing we saw int the format string was a white space.
1164 Consume the last white spaces. */
1169 while (isspace (c
));
1173 /* Unlock stream. */
1181 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
1183 return _IO_vfscanf (s
, format
, argptr
, NULL
);
1187 weak_alias (__vfscanf
, vfscanf
)