1 /* Copyright (C) 1991,92,93,94,95,96,97,98 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
28 #include <bits/libc-lock.h>
29 #include <locale/localeinfo.h>
32 # define HAVE_LONGLONG
33 # define LONGLONG long long
35 # define LONGLONG long
38 /* Those are flags in the conversion format. */
39 # define LONG 0x001 /* l: long or double */
40 # define LONGDBL 0x002 /* L: long long or long double */
41 # define SHORT 0x004 /* h: short */
42 # define SUPPRESS 0x008 /* *: suppress assignment */
43 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
44 # define NOSKIP 0x020 /* do not skip blanks */
45 # define WIDTH 0x040 /* width was given */
46 # define GROUP 0x080 /* ': group numbers */
47 # define MALLOC 0x100 /* a: malloc strings */
48 # define CHAR 0x200 /* hh: char */
50 # define TYPEMOD (LONG|LONGDBL|SHORT|CHAR)
58 # define va_list _IO_va_list
59 # define ungetc(c, s) ((void) ((int) c == EOF \
61 _IO_sputbackc (s, (unsigned char) c))))
62 # define inchar() (c == EOF ? EOF \
63 : ((c = _IO_getc_unlocked (s)), \
64 (void) (c != EOF && ++read_in), c))
65 # define encode_error() do { \
66 if (errp != NULL) *errp |= 4; \
67 _IO_funlockfile (s); \
68 __libc_cleanup_end (0); \
69 __set_errno (EILSEQ); \
72 # define conv_error() do { \
73 if (errp != NULL) *errp |= 2; \
74 _IO_funlockfile (s); \
75 __libc_cleanup_end (0); \
78 # define input_error() do { \
79 _IO_funlockfile (s); \
80 if (errp != NULL) *errp |= 1; \
81 __libc_cleanup_end (0); \
84 # define memory_error() do { \
85 _IO_funlockfile (s); \
86 __set_errno (ENOMEM); \
87 __libc_cleanup_end (0); \
90 # define ARGCHECK(s, format) \
93 /* Check file argument for consistence. */ \
94 CHECK_FILE (s, EOF); \
95 if (s->_flags & _IO_NO_READS) \
97 __set_errno (EBADF); \
100 else if (format == NULL) \
106 # define LOCK_STREAM(S) \
107 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
109 # define UNLOCK_STREAM(S) \
110 _IO_funlockfile (S); \
111 __libc_cleanup_region_end (0)
113 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
114 # define inchar() (c == EOF ? EOF \
115 : ((c = getc (s)), (void) (c != EOF && ++read_in), c))
116 # define encode_error() do { \
118 __set_errno (EILSEQ); \
121 # define conv_error() do { \
125 # define input_error() do { \
127 return done ?: EOF; \
129 # define memory_error() do { \
131 __set_errno (ENOMEM); \
134 # define ARGCHECK(s, format) \
137 /* Check file argument for consistence. */ \
138 if (!__validfp (s) || !s->__mode.__read) \
140 __set_errno (EBADF); \
143 else if (format == NULL) \
145 __set_errno (EINVAL); \
150 /* XXX For now !!! */
151 # define flockfile(S) /* nothing */
152 # define funlockfile(S) /* nothing */
153 # define LOCK_STREAM(S)
154 # define UNLOCK_STREAM(S)
156 # define LOCK_STREAM(S) \
157 __libc_cleanup_region_start (&__funlockfile, (S)); \
159 # define UNLOCK_STREAM(S) \
161 __libc_cleanup_region_end (0)
166 /* Read formatted input from S according to the format string
167 FORMAT, using the argument list in ARG.
168 Return the number of assignments made, or -1 for an input error. */
171 _IO_vfscanf (s
, format
, argptr
, errp
)
178 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
182 register const char *f
= format
;
183 register unsigned char fc
; /* Current character of the format. */
184 register size_t done
= 0; /* Assignments done. */
185 register size_t read_in
= 0; /* Chars read in. */
186 register int c
= 0; /* Last char read. */
187 register int width
; /* Maximum field width. */
188 register int flags
; /* Modifiers for current format element. */
190 /* Status for reading F-P nums. */
191 char got_dot
, got_e
, negative
;
192 /* If a [...] is a [^...]. */
194 #define exp_char not_in
195 /* Base for integral numbers. */
197 /* Signedness for integral numbers. */
199 #define is_hexa number_signed
200 /* Decimal point character. */
202 /* The thousands character of the current locale. */
204 /* Integral holding variables. */
208 unsigned long long int uq
;
210 unsigned long int ul
;
212 /* Character-buffer pointer. */
214 wchar_t *wstr
= NULL
;
215 char **strptr
= NULL
;
217 /* We must not react on white spaces immediately because they can
218 possibly be matched even if in the input stream no character is
219 available anymore. */
222 char *tw
; /* Temporary pointer. */
223 char *wp
= NULL
; /* Workspace. */
224 size_t wpmax
= 0; /* Maximal size of workspace. */
225 size_t wpsize
; /* Currently used bytes in workspace. */
229 if (wpsize == wpmax) \
232 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
233 wp = (char *) alloca (wpmax); \
235 memcpy (wp, old, wpsize); \
237 wp[wpsize++] = (Ch); \
242 __va_copy (arg
, argptr
);
244 arg
= (va_list) argptr
;
247 ARGCHECK (s
, format
);
249 /* Figure out the decimal point character. */
250 if (mbtowc (&decimal
, _NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
),
251 strlen (_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
))) <= 0)
252 decimal
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
);
253 /* Figure out the thousands separator character. */
254 if (mbtowc (&thousands
, _NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
),
255 strlen (_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
))) <= 0)
256 thousands
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
);
258 /* Lock the stream. */
261 /* Run through the format string. */
265 /* Extract the next argument, which is of type TYPE.
266 For a %N$... spec, this is the Nth argument from the beginning;
267 otherwise it is the next argument after the state now in ARG. */
269 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
270 ({ unsigned int pos = argpos; \
272 __va_copy (arg, argptr); \
274 (void) va_arg (arg, void *); \
275 va_arg (arg, type); \
279 /* XXX Possible optimization. */
280 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
281 ({ va_list arg = (va_list) argptr; \
282 arg = (va_list) ((char *) arg \
284 * __va_rounded_size (void *)); \
285 va_arg (arg, type); \
288 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
289 ({ unsigned int pos = argpos; \
290 va_list arg = (va_list) argptr; \
292 (void) va_arg (arg, void *); \
293 va_arg (arg, type); \
300 /* Non-ASCII, may be a multibyte. */
301 int len
= mblen (f
, strlen (f
));
323 /* Remember to skip spaces. */
330 /* Read a character. */
333 /* Characters other than format specs must just match. */
337 /* We saw white space char as the last character in the format
338 string. Now it's time to skip all leading white space. */
342 if (inchar () == EOF
&& errno
== EINTR
)
356 /* This is the start of the conversion string. */
359 /* Initialize state of modifiers. */
362 /* Prepare temporary buffer. */
365 /* Check for a positional parameter specification. */
370 argpos
= argpos
* 10 + (*f
++ - '0');
375 /* Oops; that was actually the field width. */
383 /* Check for the assignment-suppressing and the number grouping flag. */
384 while (*f
== '*' || *f
== '\'')
395 /* We have seen width. */
399 /* Find the maximum field width. */
410 /* Check for type modifiers. */
411 while (*f
== 'h' || *f
== 'l' || *f
== 'L' || *f
== 'a' || *f
== 'q'
412 || *f
== 'z' || *f
== 't' || *f
== 'j')
416 /* int's are short int's. */
417 if (flags
& (LONG
|LONGDBL
|CHAR
))
418 /* Signal illegal format element. */
429 if (flags
& (SHORT
|LONGDBL
|CHAR
))
431 else if (flags
& LONG
)
433 /* A double `l' is equivalent to an `L'. */
438 /* int's are long int's. */
443 /* double's are long double's, and int's are long long int's. */
445 /* Signal illegal format element. */
450 /* The `a' is used as a flag only if followed by `s', `S' or
452 if (*f
!= 's' && *f
!= 'S' && *f
!= '[')
458 /* Signal illegal format element. */
460 /* String conversions (%s, %[) take a `char **'
461 arg and fill it in with a malloc'd pointer. */
465 if (flags
& (SHORT
|LONGDBL
|CHAR
))
467 if (sizeof (size_t) > sizeof (unsigned long int))
469 else if (sizeof (size_t) > sizeof (unsigned int))
473 if (flags
& (SHORT
|LONGDBL
|CHAR
))
475 if (sizeof (intmax_t) > sizeof (unsigned long int))
477 else if (sizeof (intmax_t) > sizeof (unsigned int))
481 if (flags
& (SHORT
|LONGDBL
|CHAR
))
483 if (sizeof (ptrdiff_t) > sizeof (unsigned long int))
485 else if (sizeof (ptrdiff_t) > sizeof (unsigned int))
490 /* End of the format string? */
494 /* Find the conversion specifier. */
496 if (skip_space
|| (fc
!= '[' && fc
!= 'c' && fc
!= 'C' && fc
!= 'n'))
498 /* Eat whitespace. */
499 int save_errno
= errno
;
502 if (inchar () == EOF
&& errno
== EINTR
)
512 case '%': /* Must match a literal '%'. */
523 case 'n': /* Answer number of assignments done. */
524 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
525 with the 'n' conversion specifier. */
526 if (!(flags
& SUPPRESS
))
528 /* Don't count the read-ahead. */
530 *ARG (long long int *) = read_in
;
531 else if (flags
& LONG
)
532 *ARG (long int *) = read_in
;
533 else if (flags
& SHORT
)
534 *ARG (short int *) = read_in
;
536 *ARG (int *) = read_in
;
538 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
539 /* We have a severe problem here. The ISO C standard
540 contradicts itself in explaining the effect of the %n
541 format in `scanf'. While in ISO C:1990 and the ISO C
542 Amendement 1:1995 the result is described as
544 Execution of a %n directive does not effect the
545 assignment count returned at the completion of
546 execution of the f(w)scanf function.
548 in ISO C Corrigendum 1:1994 the following was added:
551 Add the following fourth example:
554 int d1, d2, n1, n2, i;
555 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
556 the value 123 is assigned to d1 and the value3 to n1.
557 Because %n can never get an input failure the value
558 of 3 is also assigned to n2. The value of d2 is not
559 affected. The value 3 is assigned to i.
561 We go for now with the historically correct code from ISO C,
562 i.e., we don't count the %n assignments. When it ever
563 should proof to be wrong just remove the #ifdef above. */
569 case 'c': /* Match characters. */
570 if ((flags
& LONG
) == 0)
572 if (!(flags
& SUPPRESS
))
586 if (!(flags
& SUPPRESS
))
590 while (--width
> 0 && inchar () != EOF
);
593 while (--width
> 0 && inchar () != EOF
);
595 if (!(flags
& SUPPRESS
))
602 /* Get UTF-8 encoded wide character. Here we assume (as in
603 other parts of the libc) that we only have to handle
610 if (!(flags
& SUPPRESS
))
612 wstr
= ARG (wchar_t *);
619 #define NEXT_WIDE_CHAR(First) \
622 /* EOF is only an error for the first character. */ \
630 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
632 if ((c & 0xe0) == 0xc0) \
634 /* We expect two bytes. */ \
638 else if ((c & 0xf0) == 0xe0) \
640 /* We expect three bytes. */ \
644 else if ((c & 0xf8) == 0xf0) \
646 /* We expect four bytes. */ \
650 else if ((c & 0xfc) == 0xf8) \
652 /* We expect five bytes. */ \
658 /* We expect six bytes. */ \
667 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
675 if (!(flags & SUPPRESS)) \
679 NEXT_WIDE_CHAR (first
);
683 if (!(flags
& SUPPRESS
))
688 case 's': /* Read a string. */
690 /* We have to process a wide character string. */
691 goto wide_char_string
;
693 #define STRING_ARG(Str, Type) \
694 if (!(flags & SUPPRESS)) \
696 if (flags & MALLOC) \
698 /* The string is to be stored in a malloc'd buffer. */ \
699 strptr = ARG (char **); \
700 if (strptr == NULL) \
702 /* Allocate an initial buffer. */ \
704 *strptr = malloc (strsize * sizeof (Type)); \
705 Str = (Type *) *strptr; \
708 Str = ARG (Type *); \
712 STRING_ARG (str
, char);
725 #define STRING_ADD_CHAR(Str, c, Type) \
726 if (!(flags & SUPPRESS)) \
729 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
731 /* Enlarge the buffer. */ \
732 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
735 /* Can't allocate that much. Last-ditch effort. */\
736 Str = realloc (*strptr, \
737 (strsize + 1) * sizeof (Type)); \
740 /* We lose. Oh well. \
741 Terminate the string and stop converting, \
742 so at least we don't skip any input. */ \
743 ((Type *) (*strptr))[strsize] = '\0'; \
749 *strptr = (char *) Str; \
750 Str = ((Type *) *strptr) + strsize; \
756 *strptr = (char *) Str; \
757 Str = ((Type *) *strptr) + strsize; \
762 STRING_ADD_CHAR (str
, c
, char);
763 } while ((width
<= 0 || --width
> 0) && inchar () != EOF
);
765 if (!(flags
& SUPPRESS
))
773 /* Wide character string. */
778 STRING_ARG (wstr
, wchar_t);
783 NEXT_WIDE_CHAR (first
);
787 /* XXX We would have to push back the whole wide char
788 with possibly many bytes. But since scanf does
789 not make a difference for white space characters
790 we can simply push back a simple <SP> which is
791 guaranteed to be in the [:space:] class. */
796 STRING_ADD_CHAR (wstr
, val
, wchar_t);
799 while (width
<= 0 || --width
> 0);
801 if (!(flags
& SUPPRESS
))
809 case 'x': /* Hexadecimal integer. */
810 case 'X': /* Ditto. */
815 case 'o': /* Octal integer. */
820 case 'u': /* Unsigned decimal integer. */
825 case 'd': /* Signed decimal integer. */
830 case 'i': /* Generic number. */
839 /* Check for a sign. */
840 if (c
== '-' || c
== '+')
848 /* Look for a leading indication of base. */
849 if (width
!= 0 && c
== '0')
857 if (width
!= 0 && tolower (c
) == 'x')
875 /* Read the number into workspace. */
876 while (c
!= EOF
&& width
!= 0)
878 if (base
== 16 ? !isxdigit (c
) :
879 ((!isdigit (c
) || c
- '0' >= base
) &&
880 !((flags
& GROUP
) && base
== 10 && c
== thousands
)))
889 /* The just read character is not part of the number anymore. */
893 (wpsize
== 1 && (wp
[0] == '+' || wp
[0] == '-')))
894 /* There was no number. */
897 /* Convert the number. */
902 num
.q
= __strtoll_internal (wp
, &tw
, base
, flags
& GROUP
);
904 num
.uq
= __strtoull_internal (wp
, &tw
, base
, flags
& GROUP
);
909 num
.l
= __strtol_internal (wp
, &tw
, base
, flags
& GROUP
);
911 num
.ul
= __strtoul_internal (wp
, &tw
, base
, flags
& GROUP
);
916 if (!(flags
& SUPPRESS
))
921 *ARG (unsigned LONGLONG
int *) = num
.uq
;
922 else if (flags
& LONG
)
923 *ARG (unsigned long int *) = num
.ul
;
924 else if (flags
& SHORT
)
925 *ARG (unsigned short int *)
926 = (unsigned short int) num
.ul
;
927 else if (flags
& CHAR
)
928 *ARG (unsigned char *) = (unsigned char) num
.ul
;
930 *ARG (unsigned int *) = (unsigned int) num
.ul
;
935 *ARG (LONGLONG
int *) = num
.q
;
936 else if (flags
& LONG
)
937 *ARG (long int *) = num
.l
;
938 else if (flags
& SHORT
)
939 *ARG (short int *) = (short int) num
.l
;
940 else if (flags
& CHAR
)
941 *ARG (signed char *) = (signed char) num
.ul
;
943 *ARG (int *) = (int) num
.l
;
949 case 'e': /* Floating-point numbers. */
960 /* Check for a sign. */
961 if (c
== '-' || c
== '+')
964 if (inchar () == EOF
)
965 /* EOF is only an input error before we read any chars. */
973 /* Take care for the special arguments "nan" and "inf". */
974 if (tolower (c
) == 'n')
978 if (inchar () == EOF
|| tolower (c
) != 'a')
981 if (inchar () == EOF
|| tolower (c
) != 'n')
987 else if (tolower (c
) == 'i')
989 /* Maybe "inf" or "infinity". */
991 if (inchar () == EOF
|| tolower (c
) != 'n')
994 if (inchar () == EOF
|| tolower (c
) != 'f')
997 /* It is as least "inf". */
998 if (inchar () != EOF
)
1000 if (tolower (c
) == 'i')
1002 /* No we have to read the rest as well. */
1004 if (inchar () == EOF
|| tolower (c
) != 'n')
1007 if (inchar () == EOF
|| tolower (c
) != 'i')
1010 if (inchar () == EOF
|| tolower (c
) != 't')
1013 if (inchar () == EOF
|| tolower (c
) != 'y')
1030 if (tolower (c
) == 'x')
1032 /* It is a number in hexadecimal format. */
1038 /* Grouping is not allowed. */
1044 got_dot
= got_e
= 0;
1049 else if (!got_e
&& is_hexa
&& isxdigit (c
))
1051 else if (got_e
&& wp
[wpsize
- 1] == exp_char
1052 && (c
== '-' || c
== '+'))
1054 else if (wpsize
> 0 && !got_e
&& tolower (c
) == exp_char
)
1057 got_e
= got_dot
= 1;
1059 else if (c
== decimal
&& !got_dot
)
1064 else if ((flags
& GROUP
) && c
== thousands
&& !got_dot
)
1068 /* The last read character is not part of the number
1076 while (width
!= 0 && inchar () != EOF
);
1078 /* Have we read any character? If we try to read a number
1079 in hexadecimal notation and we have read only the `0x'
1080 prefix this is an error. */
1081 if (wpsize
== 0 || (is_hexa
&& wpsize
== 2))
1085 /* Convert the number. */
1087 if (flags
& LONGDBL
)
1089 long double d
= __strtold_internal (wp
, &tw
, flags
& GROUP
);
1090 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1091 *ARG (long double *) = negative
? -d
: d
;
1093 else if (flags
& LONG
)
1095 double d
= __strtod_internal (wp
, &tw
, flags
& GROUP
);
1096 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1097 *ARG (double *) = negative
? -d
: d
;
1101 float d
= __strtof_internal (wp
, &tw
, flags
& GROUP
);
1102 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1103 *ARG (float *) = negative
? -d
: d
;
1109 if (!(flags
& SUPPRESS
))
1113 case '[': /* Character class. */
1116 STRING_ARG (wstr
, wchar_t);
1117 c
= '\0'; /* This is to keep gcc quiet. */
1121 STRING_ARG (str
, char);
1136 /* Fill WP with byte flags indexed by character.
1137 We will use this flag map for matching input characters. */
1138 if (wpmax
< UCHAR_MAX
)
1141 wp
= (char *) alloca (wpmax
);
1143 memset (wp
, 0, UCHAR_MAX
);
1146 if (fc
== ']' || fc
== '-')
1148 /* If ] or - appears before any char in the set, it is not
1149 the terminator or separator, but the first char in the
1155 while ((fc
= *f
++) != '\0' && fc
!= ']')
1157 if (fc
== '-' && *f
!= '\0' && *f
!= ']' &&
1158 (unsigned char) f
[-2] <= (unsigned char) *f
)
1160 /* Add all characters from the one before the '-'
1161 up to (but not including) the next format char. */
1162 for (fc
= f
[-2]; fc
< *f
; ++fc
)
1166 /* Add the character to the flag map. */
1171 if (!(flags
& LONG
))
1184 NEXT_WIDE_CHAR (first
);
1185 if (val
<= 255 && wp
[val
] == not_in
)
1190 STRING_ADD_CHAR (wstr
, val
, wchar_t);
1200 if (!(flags
& SUPPRESS
))
1208 num
.ul
= read_in
- 1; /* -1 because we already read one char. */
1211 if (wp
[c
] == not_in
)
1216 STRING_ADD_CHAR (str
, c
, char);
1220 while (width
!= 0 && inchar () != EOF
);
1222 if (read_in
== num
.ul
)
1225 if (!(flags
& SUPPRESS
))
1233 case 'p': /* Generic pointer. */
1235 /* A PTR must be the same size as a `long int'. */
1236 flags
&= ~(SHORT
|LONGDBL
);
1243 /* The last thing we saw int the format string was a white space.
1244 Consume the last white spaces. */
1249 while (isspace (c
));
1253 /* Unlock stream. */
1261 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
1263 return _IO_vfscanf (s
, format
, argptr
, NULL
);
1267 weak_alias (__vfscanf
, vfscanf
)