1 /* Copyright (C) 1991,92,93,94,95,96,97,98 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
28 #include <bits/libc-lock.h>
29 #include <locale/localeinfo.h>
32 # define HAVE_LONGLONG
33 # define LONGLONG long long
35 # define LONGLONG long
38 /* Those are flags in the conversion format. */
39 # define LONG 0x001 /* l: long or double */
40 # define LONGDBL 0x002 /* L: long long or long double */
41 # define SHORT 0x004 /* h: short */
42 # define SUPPRESS 0x008 /* *: suppress assignment */
43 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
44 # define NOSKIP 0x020 /* do not skip blanks */
45 # define WIDTH 0x040 /* width was given */
46 # define GROUP 0x080 /* ': group numbers */
47 # define MALLOC 0x100 /* a: malloc strings */
48 # define CHAR 0x200 /* hh: char */
56 # define va_list _IO_va_list
57 # define ungetc(c, s) ((void) ((int) c == EOF \
59 _IO_sputbackc (s, (unsigned char) c))))
60 # define inchar() (c == EOF ? EOF \
61 : ((c = _IO_getc_unlocked (s)), \
62 (void) (c != EOF && ++read_in), c))
63 # define encode_error() do { \
64 if (errp != NULL) *errp |= 4; \
65 _IO_funlockfile (s); \
66 __libc_cleanup_end (0); \
67 __set_errno (EILSEQ); \
70 # define conv_error() do { \
71 if (errp != NULL) *errp |= 2; \
72 _IO_funlockfile (s); \
73 __libc_cleanup_end (0); \
76 # define input_error() do { \
77 _IO_funlockfile (s); \
78 if (errp != NULL) *errp |= 1; \
79 __libc_cleanup_end (0); \
82 # define memory_error() do { \
83 _IO_funlockfile (s); \
84 __set_errno (ENOMEM); \
85 __libc_cleanup_end (0); \
88 # define ARGCHECK(s, format) \
91 /* Check file argument for consistence. */ \
92 CHECK_FILE (s, EOF); \
93 if (s->_flags & _IO_NO_READS) \
95 __set_errno (EBADF); \
98 else if (format == NULL) \
104 # define LOCK_STREAM(S) \
105 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
107 # define UNLOCK_STREAM(S) \
108 _IO_funlockfile (S); \
109 __libc_cleanup_region_end (0)
111 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
112 # define inchar() (c == EOF ? EOF \
113 : ((c = getc (s)), (void) (c != EOF && ++read_in), c))
114 # define encode_error() do { \
116 __set_errno (EILSEQ); \
119 # define conv_error() do { \
123 # define input_error() do { \
125 return done ?: EOF; \
127 # define memory_error() do { \
129 __set_errno (ENOMEM); \
132 # define ARGCHECK(s, format) \
135 /* Check file argument for consistence. */ \
136 if (!__validfp (s) || !s->__mode.__read) \
138 __set_errno (EBADF); \
141 else if (format == NULL) \
143 __set_errno (EINVAL); \
148 /* XXX For now !!! */
149 # define flockfile(S) /* nothing */
150 # define funlockfile(S) /* nothing */
151 # define LOCK_STREAM(S)
152 # define UNLOCK_STREAM(S)
154 # define LOCK_STREAM(S) \
155 __libc_cleanup_region_start (&__funlockfile, (S)); \
157 # define UNLOCK_STREAM(S) \
159 __libc_cleanup_region_end (0)
164 /* Read formatted input from S according to the format string
165 FORMAT, using the argument list in ARG.
166 Return the number of assignments made, or -1 for an input error. */
169 _IO_vfscanf (s
, format
, argptr
, errp
)
176 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
180 register const char *f
= format
;
181 register unsigned char fc
; /* Current character of the format. */
182 register size_t done
= 0; /* Assignments done. */
183 register size_t read_in
= 0; /* Chars read in. */
184 register int c
= 0; /* Last char read. */
185 register int width
; /* Maximum field width. */
186 register int flags
; /* Modifiers for current format element. */
188 /* Status for reading F-P nums. */
189 char got_dot
, got_e
, negative
;
190 /* If a [...] is a [^...]. */
192 #define exp_char not_in
193 /* Base for integral numbers. */
195 /* Signedness for integral numbers. */
197 #define is_hexa number_signed
198 /* Decimal point character. */
200 /* The thousands character of the current locale. */
202 /* Integral holding variables. */
206 unsigned long long int uq
;
208 unsigned long int ul
;
210 /* Character-buffer pointer. */
212 wchar_t *wstr
= NULL
;
213 char **strptr
= NULL
;
215 /* We must not react on white spaces immediately because they can
216 possibly be matched even if in the input stream no character is
217 available anymore. */
220 char *tw
; /* Temporary pointer. */
221 char *wp
= NULL
; /* Workspace. */
222 size_t wpmax
= 0; /* Maximal size of workspace. */
223 size_t wpsize
; /* Currently used bytes in workspace. */
227 if (wpsize == wpmax) \
230 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
231 wp = (char *) alloca (wpmax); \
233 memcpy (wp, old, wpsize); \
235 wp[wpsize++] = (Ch); \
240 __va_copy (arg
, argptr
);
242 arg
= (va_list) argptr
;
245 ARGCHECK (s
, format
);
247 /* Figure out the decimal point character. */
248 if (mbtowc (&decimal
, _NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
),
249 strlen (_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
))) <= 0)
250 decimal
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
);
251 /* Figure out the thousands separator character. */
252 if (mbtowc (&thousands
, _NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
),
253 strlen (_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
))) <= 0)
254 thousands
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
);
256 /* Lock the stream. */
259 /* Run through the format string. */
263 /* Extract the next argument, which is of type TYPE.
264 For a %N$... spec, this is the Nth argument from the beginning;
265 otherwise it is the next argument after the state now in ARG. */
267 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
268 ({ unsigned int pos = argpos; \
270 __va_copy (arg, argptr); \
272 (void) va_arg (arg, void *); \
273 va_arg (arg, type); \
277 /* XXX Possible optimization. */
278 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
279 ({ va_list arg = (va_list) argptr; \
280 arg = (va_list) ((char *) arg \
282 * __va_rounded_size (void *)); \
283 va_arg (arg, type); \
286 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
287 ({ unsigned int pos = argpos; \
288 va_list arg = (va_list) argptr; \
290 (void) va_arg (arg, void *); \
291 va_arg (arg, type); \
298 /* Non-ASCII, may be a multibyte. */
299 int len
= mblen (f
, strlen (f
));
321 /* Remember to skip spaces. */
328 /* Read a character. */
331 /* Characters other than format specs must just match. */
335 /* We saw white space char as the last character in the format
336 string. Now it's time to skip all leading white space. */
340 if (inchar () == EOF
&& errno
== EINTR
)
354 /* This is the start of the conversion string. */
357 /* Initialize state of modifiers. */
360 /* Prepare temporary buffer. */
363 /* Check for a positional parameter specification. */
368 argpos
= argpos
* 10 + (*f
++ - '0');
373 /* Oops; that was actually the field width. */
381 /* Check for the assignment-suppressing and the number grouping flag. */
382 while (*f
== '*' || *f
== '\'')
393 /* We have seen width. */
397 /* Find the maximum field width. */
408 /* Check for type modifiers. */
412 /* ints are short ints or chars. */
424 /* A double `l' is equivalent to an `L'. */
429 /* ints are long ints. */
434 /* doubles are long doubles, and ints are long long ints. */
438 /* The `a' is used as a flag only if followed by `s', `S' or
440 if (*f
!= 's' && *f
!= 'S' && *f
!= '[')
445 /* String conversions (%s, %[) take a `char **'
446 arg and fill it in with a malloc'd pointer. */
450 if (sizeof (size_t) > sizeof (unsigned long int))
452 else if (sizeof (size_t) > sizeof (unsigned int))
456 if (sizeof (uintmax_t) > sizeof (unsigned long int))
458 else if (sizeof (uintmax_t) > sizeof (unsigned int))
462 if (sizeof (ptrdiff_t) > sizeof (long int))
464 else if (sizeof (ptrdiff_t) > sizeof (int))
468 /* Not a recognized modifier. Backup. */
473 /* End of the format string? */
477 /* Find the conversion specifier. */
479 if (skip_space
|| (fc
!= '[' && fc
!= 'c' && fc
!= 'C' && fc
!= 'n'))
481 /* Eat whitespace. */
482 int save_errno
= errno
;
485 if (inchar () == EOF
&& errno
== EINTR
)
495 case '%': /* Must match a literal '%'. */
506 case 'n': /* Answer number of assignments done. */
507 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
508 with the 'n' conversion specifier. */
509 if (!(flags
& SUPPRESS
))
511 /* Don't count the read-ahead. */
513 *ARG (long long int *) = read_in
;
514 else if (flags
& LONG
)
515 *ARG (long int *) = read_in
;
516 else if (flags
& SHORT
)
517 *ARG (short int *) = read_in
;
519 *ARG (int *) = read_in
;
521 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
522 /* We have a severe problem here. The ISO C standard
523 contradicts itself in explaining the effect of the %n
524 format in `scanf'. While in ISO C:1990 and the ISO C
525 Amendement 1:1995 the result is described as
527 Execution of a %n directive does not effect the
528 assignment count returned at the completion of
529 execution of the f(w)scanf function.
531 in ISO C Corrigendum 1:1994 the following was added:
534 Add the following fourth example:
537 int d1, d2, n1, n2, i;
538 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
539 the value 123 is assigned to d1 and the value3 to n1.
540 Because %n can never get an input failure the value
541 of 3 is also assigned to n2. The value of d2 is not
542 affected. The value 3 is assigned to i.
544 We go for now with the historically correct code from ISO C,
545 i.e., we don't count the %n assignments. When it ever
546 should proof to be wrong just remove the #ifdef above. */
552 case 'c': /* Match characters. */
553 if ((flags
& LONG
) == 0)
555 if (!(flags
& SUPPRESS
))
569 if (!(flags
& SUPPRESS
))
573 while (--width
> 0 && inchar () != EOF
);
576 while (--width
> 0 && inchar () != EOF
);
578 if (!(flags
& SUPPRESS
))
585 /* Get UTF-8 encoded wide character. Here we assume (as in
586 other parts of the libc) that we only have to handle
593 if (!(flags
& SUPPRESS
))
595 wstr
= ARG (wchar_t *);
602 #define NEXT_WIDE_CHAR(First) \
606 /* EOF is only an error for the first character. */ \
615 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
617 if ((c & 0xe0) == 0xc0) \
619 /* We expect two bytes. */ \
623 else if ((c & 0xf0) == 0xe0) \
625 /* We expect three bytes. */ \
629 else if ((c & 0xf8) == 0xf0) \
631 /* We expect four bytes. */ \
635 else if ((c & 0xfc) == 0xf8) \
637 /* We expect five bytes. */ \
643 /* We expect six bytes. */ \
652 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
660 if (!(flags & SUPPRESS)) \
664 NEXT_WIDE_CHAR (first
);
668 if (!(flags
& SUPPRESS
))
673 case 's': /* Read a string. */
675 /* We have to process a wide character string. */
676 goto wide_char_string
;
678 #define STRING_ARG(Str, Type) \
679 if (!(flags & SUPPRESS)) \
681 if (flags & MALLOC) \
683 /* The string is to be stored in a malloc'd buffer. */ \
684 strptr = ARG (char **); \
685 if (strptr == NULL) \
687 /* Allocate an initial buffer. */ \
689 *strptr = malloc (strsize * sizeof (Type)); \
690 Str = (Type *) *strptr; \
693 Str = ARG (Type *); \
697 STRING_ARG (str
, char);
710 #define STRING_ADD_CHAR(Str, c, Type) \
711 if (!(flags & SUPPRESS)) \
714 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
716 /* Enlarge the buffer. */ \
717 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
720 /* Can't allocate that much. Last-ditch effort. */\
721 Str = realloc (*strptr, \
722 (strsize + 1) * sizeof (Type)); \
725 /* We lose. Oh well. \
726 Terminate the string and stop converting, \
727 so at least we don't skip any input. */ \
728 ((Type *) (*strptr))[strsize] = '\0'; \
734 *strptr = (char *) Str; \
735 Str = ((Type *) *strptr) + strsize; \
741 *strptr = (char *) Str; \
742 Str = ((Type *) *strptr) + strsize; \
747 STRING_ADD_CHAR (str
, c
, char);
748 } while ((width
<= 0 || --width
> 0) && inchar () != EOF
);
750 if (!(flags
& SUPPRESS
))
758 /* Wide character string. */
763 STRING_ARG (wstr
, wchar_t);
768 NEXT_WIDE_CHAR (first
);
772 /* XXX We would have to push back the whole wide char
773 with possibly many bytes. But since scanf does
774 not make a difference for white space characters
775 we can simply push back a simple <SP> which is
776 guaranteed to be in the [:space:] class. */
781 STRING_ADD_CHAR (wstr
, val
, wchar_t);
784 while (width
<= 0 || --width
> 0);
786 if (!(flags
& SUPPRESS
))
794 case 'x': /* Hexadecimal integer. */
795 case 'X': /* Ditto. */
800 case 'o': /* Octal integer. */
805 case 'u': /* Unsigned decimal integer. */
810 case 'd': /* Signed decimal integer. */
815 case 'i': /* Generic number. */
824 /* Check for a sign. */
825 if (c
== '-' || c
== '+')
833 /* Look for a leading indication of base. */
834 if (width
!= 0 && c
== '0')
842 if (width
!= 0 && tolower (c
) == 'x')
860 /* Read the number into workspace. */
861 while (c
!= EOF
&& width
!= 0)
863 if (base
== 16 ? !isxdigit (c
) :
864 ((!isdigit (c
) || c
- '0' >= base
) &&
865 !((flags
& GROUP
) && base
== 10 && c
== thousands
)))
874 /* The just read character is not part of the number anymore. */
878 (wpsize
== 1 && (wp
[0] == '+' || wp
[0] == '-')))
879 /* There was no number. */
882 /* Convert the number. */
887 num
.q
= __strtoll_internal (wp
, &tw
, base
, flags
& GROUP
);
889 num
.uq
= __strtoull_internal (wp
, &tw
, base
, flags
& GROUP
);
894 num
.l
= __strtol_internal (wp
, &tw
, base
, flags
& GROUP
);
896 num
.ul
= __strtoul_internal (wp
, &tw
, base
, flags
& GROUP
);
901 if (!(flags
& SUPPRESS
))
906 *ARG (unsigned LONGLONG
int *) = num
.uq
;
907 else if (flags
& LONG
)
908 *ARG (unsigned long int *) = num
.ul
;
909 else if (flags
& SHORT
)
910 *ARG (unsigned short int *)
911 = (unsigned short int) num
.ul
;
912 else if (flags
& CHAR
)
913 *ARG (unsigned char *) = (unsigned char) num
.ul
;
915 *ARG (unsigned int *) = (unsigned int) num
.ul
;
920 *ARG (LONGLONG
int *) = num
.q
;
921 else if (flags
& LONG
)
922 *ARG (long int *) = num
.l
;
923 else if (flags
& SHORT
)
924 *ARG (short int *) = (short int) num
.l
;
925 else if (flags
& CHAR
)
926 *ARG (signed char *) = (signed char) num
.ul
;
928 *ARG (int *) = (int) num
.l
;
934 case 'e': /* Floating-point numbers. */
945 /* Check for a sign. */
946 if (c
== '-' || c
== '+')
949 if (inchar () == EOF
)
950 /* EOF is only an input error before we read any chars. */
958 /* Take care for the special arguments "nan" and "inf". */
959 if (tolower (c
) == 'n')
963 if (inchar () == EOF
|| tolower (c
) != 'a')
966 if (inchar () == EOF
|| tolower (c
) != 'n')
972 else if (tolower (c
) == 'i')
974 /* Maybe "inf" or "infinity". */
976 if (inchar () == EOF
|| tolower (c
) != 'n')
979 if (inchar () == EOF
|| tolower (c
) != 'f')
982 /* It is as least "inf". */
983 if (inchar () != EOF
)
985 if (tolower (c
) == 'i')
987 /* No we have to read the rest as well. */
989 if (inchar () == EOF
|| tolower (c
) != 'n')
992 if (inchar () == EOF
|| tolower (c
) != 'i')
995 if (inchar () == EOF
|| tolower (c
) != 't')
998 if (inchar () == EOF
|| tolower (c
) != 'y')
1015 if (tolower (c
) == 'x')
1017 /* It is a number in hexadecimal format. */
1023 /* Grouping is not allowed. */
1029 got_dot
= got_e
= 0;
1034 else if (!got_e
&& is_hexa
&& isxdigit (c
))
1036 else if (got_e
&& wp
[wpsize
- 1] == exp_char
1037 && (c
== '-' || c
== '+'))
1039 else if (wpsize
> 0 && !got_e
&& tolower (c
) == exp_char
)
1042 got_e
= got_dot
= 1;
1044 else if (c
== decimal
&& !got_dot
)
1049 else if ((flags
& GROUP
) && c
== thousands
&& !got_dot
)
1053 /* The last read character is not part of the number
1061 while (width
!= 0 && inchar () != EOF
);
1063 /* Have we read any character? If we try to read a number
1064 in hexadecimal notation and we have read only the `0x'
1065 prefix this is an error. */
1066 if (wpsize
== 0 || (is_hexa
&& wpsize
== 2))
1070 /* Convert the number. */
1072 if (flags
& LONGDBL
)
1074 long double d
= __strtold_internal (wp
, &tw
, flags
& GROUP
);
1075 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1076 *ARG (long double *) = negative
? -d
: d
;
1078 else if (flags
& LONG
)
1080 double d
= __strtod_internal (wp
, &tw
, flags
& GROUP
);
1081 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1082 *ARG (double *) = negative
? -d
: d
;
1086 float d
= __strtof_internal (wp
, &tw
, flags
& GROUP
);
1087 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1088 *ARG (float *) = negative
? -d
: d
;
1094 if (!(flags
& SUPPRESS
))
1098 case '[': /* Character class. */
1101 STRING_ARG (wstr
, wchar_t);
1102 c
= '\0'; /* This is to keep gcc quiet. */
1106 STRING_ARG (str
, char);
1121 /* Fill WP with byte flags indexed by character.
1122 We will use this flag map for matching input characters. */
1123 if (wpmax
< UCHAR_MAX
)
1126 wp
= (char *) alloca (wpmax
);
1128 memset (wp
, 0, UCHAR_MAX
);
1131 if (fc
== ']' || fc
== '-')
1133 /* If ] or - appears before any char in the set, it is not
1134 the terminator or separator, but the first char in the
1140 while ((fc
= *f
++) != '\0' && fc
!= ']')
1142 if (fc
== '-' && *f
!= '\0' && *f
!= ']' &&
1143 (unsigned char) f
[-2] <= (unsigned char) *f
)
1145 /* Add all characters from the one before the '-'
1146 up to (but not including) the next format char. */
1147 for (fc
= f
[-2]; fc
< *f
; ++fc
)
1151 /* Add the character to the flag map. */
1156 if (!(flags
& LONG
))
1169 NEXT_WIDE_CHAR (first
);
1170 if (val
<= 255 && wp
[val
] == not_in
)
1175 STRING_ADD_CHAR (wstr
, val
, wchar_t);
1185 if (!(flags
& SUPPRESS
))
1193 num
.ul
= read_in
- 1; /* -1 because we already read one char. */
1196 if (wp
[c
] == not_in
)
1201 STRING_ADD_CHAR (str
, c
, char);
1205 while (width
!= 0 && inchar () != EOF
);
1207 if (read_in
== num
.ul
)
1210 if (!(flags
& SUPPRESS
))
1218 case 'p': /* Generic pointer. */
1220 /* A PTR must be the same size as a `long int'. */
1221 flags
&= ~(SHORT
|LONGDBL
);
1228 /* The last thing we saw int the format string was a white space.
1229 Consume the last white spaces. */
1234 while (isspace (c
));
1238 /* Unlock stream. */
1246 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
1248 return _IO_vfscanf (s
, format
, argptr
, NULL
);
1252 weak_alias (__vfscanf
, vfscanf
)