1 /* Copyright (C) 1991,92,93,94,95,96,97,98,99 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
29 #include <bits/libc-lock.h>
30 #include <locale/localeinfo.h>
33 # define HAVE_LONGLONG
34 # define LONGLONG long long
36 # define LONGLONG long
39 /* Those are flags in the conversion format. */
40 # define LONG 0x001 /* l: long or double */
41 # define LONGDBL 0x002 /* L: long long or long double */
42 # define SHORT 0x004 /* h: short */
43 # define SUPPRESS 0x008 /* *: suppress assignment */
44 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
45 # define NOSKIP 0x020 /* do not skip blanks */
46 # define WIDTH 0x040 /* width was given */
47 # define GROUP 0x080 /* ': group numbers */
48 # define MALLOC 0x100 /* a: malloc strings */
49 # define CHAR 0x200 /* hh: char */
57 # define va_list _IO_va_list
58 # define ungetc(c, s) ((void) ((int) c == EOF \
60 _IO_sputbackc (s, (unsigned char) c))))
61 # define inchar() (c == EOF ? EOF \
62 : ((c = _IO_getc_unlocked (s)), \
63 (void) (c != EOF && ++read_in), c))
64 # define encode_error() do { \
65 if (errp != NULL) *errp |= 4; \
66 _IO_funlockfile (s); \
67 __libc_cleanup_end (0); \
68 __set_errno (EILSEQ); \
71 # define conv_error() do { \
72 if (errp != NULL) *errp |= 2; \
73 _IO_funlockfile (s); \
74 __libc_cleanup_end (0); \
77 # define input_error() do { \
78 _IO_funlockfile (s); \
79 if (errp != NULL) *errp |= 1; \
80 __libc_cleanup_end (0); \
83 # define memory_error() do { \
84 _IO_funlockfile (s); \
85 __set_errno (ENOMEM); \
86 __libc_cleanup_end (0); \
89 # define ARGCHECK(s, format) \
92 /* Check file argument for consistence. */ \
93 CHECK_FILE (s, EOF); \
94 if (s->_flags & _IO_NO_READS) \
96 __set_errno (EBADF); \
99 else if (format == NULL) \
105 # define LOCK_STREAM(S) \
106 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
108 # define UNLOCK_STREAM(S) \
109 _IO_funlockfile (S); \
110 __libc_cleanup_region_end (0)
112 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s))
113 # define inchar() (c == EOF ? EOF \
114 : ((c = getc (s)), (void) (c != EOF && ++read_in), c))
115 # define encode_error() do { \
117 __set_errno (EILSEQ); \
120 # define conv_error() do { \
124 # define input_error() do { \
126 return done ?: EOF; \
128 # define memory_error() do { \
130 __set_errno (ENOMEM); \
133 # define ARGCHECK(s, format) \
136 /* Check file argument for consistence. */ \
137 if (!__validfp (s) || !s->__mode.__read) \
139 __set_errno (EBADF); \
142 else if (format == NULL) \
144 __set_errno (EINVAL); \
149 /* XXX For now !!! */
150 # define flockfile(S) /* nothing */
151 # define funlockfile(S) /* nothing */
152 # define LOCK_STREAM(S)
153 # define UNLOCK_STREAM(S)
155 # define LOCK_STREAM(S) \
156 __libc_cleanup_region_start (&__funlockfile, (S)); \
158 # define UNLOCK_STREAM(S) \
160 __libc_cleanup_region_end (0)
165 /* Read formatted input from S according to the format string
166 FORMAT, using the argument list in ARG.
167 Return the number of assignments made, or -1 for an input error. */
170 _IO_vfscanf (s
, format
, argptr
, errp
)
177 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
181 register const char *f
= format
;
182 register unsigned char fc
; /* Current character of the format. */
183 register size_t done
= 0; /* Assignments done. */
184 register size_t read_in
= 0; /* Chars read in. */
185 register int c
= 0; /* Last char read. */
186 register int width
; /* Maximum field width. */
187 register int flags
; /* Modifiers for current format element. */
189 /* Status for reading F-P nums. */
190 char got_dot
, got_e
, negative
;
191 /* If a [...] is a [^...]. */
193 #define exp_char not_in
194 /* Base for integral numbers. */
196 /* Signedness for integral numbers. */
198 #define is_hexa number_signed
199 /* Decimal point character. */
201 /* The thousands character of the current locale. */
203 /* State for the conversions. */
205 /* Integral holding variables. */
209 unsigned long long int uq
;
211 unsigned long int ul
;
213 /* Character-buffer pointer. */
215 wchar_t *wstr
= NULL
;
216 char **strptr
= NULL
;
218 /* We must not react on white spaces immediately because they can
219 possibly be matched even if in the input stream no character is
220 available anymore. */
222 /* Nonzero if we are reading a pointer. */
225 char *tw
; /* Temporary pointer. */
226 char *wp
= NULL
; /* Workspace. */
227 size_t wpmax
= 0; /* Maximal size of workspace. */
228 size_t wpsize
; /* Currently used bytes in workspace. */
232 if (wpsize == wpmax) \
235 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
236 wp = (char *) alloca (wpmax); \
238 memcpy (wp, old, wpsize); \
240 wp[wpsize++] = (Ch); \
245 __va_copy (arg
, argptr
);
247 arg
= (va_list) argptr
;
250 ARGCHECK (s
, format
);
252 /* Figure out the decimal point character. */
253 memset (&state
, '\0', sizeof (state
));
254 if (__mbrtowc (&decimal
, _NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
),
255 strlen (_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
)), &state
)
257 decimal
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
);
258 /* Figure out the thousands separator character. */
259 memset (&state
, '\0', sizeof (state
));
260 if (__mbrtowc (&thousands
, _NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
),
261 strlen (_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
)),
263 thousands
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
);
265 /* Lock the stream. */
269 /* From now on we use `state' to convert the format string. */
270 memset (&state
, '\0', sizeof (state
));
272 /* Run through the format string. */
276 /* Extract the next argument, which is of type TYPE.
277 For a %N$... spec, this is the Nth argument from the beginning;
278 otherwise it is the next argument after the state now in ARG. */
280 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
281 ({ unsigned int pos = argpos; \
283 __va_copy (arg, argptr); \
285 (void) va_arg (arg, void *); \
286 va_arg (arg, type); \
290 /* XXX Possible optimization. */
291 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
292 ({ va_list arg = (va_list) argptr; \
293 arg = (va_list) ((char *) arg \
295 * __va_rounded_size (void *)); \
296 va_arg (arg, type); \
299 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
300 ({ unsigned int pos = argpos; \
301 va_list arg = (va_list) argptr; \
303 (void) va_arg (arg, void *); \
304 va_arg (arg, type); \
311 /* Non-ASCII, may be a multibyte. */
312 int len
= __mbrlen (f
, strlen (f
), &state
);
334 /* Remember to skip spaces. */
341 /* Read a character. */
344 /* Characters other than format specs must just match. */
348 /* We saw white space char as the last character in the format
349 string. Now it's time to skip all leading white space. */
353 if (inchar () == EOF
&& errno
== EINTR
)
367 /* This is the start of the conversion string. */
370 /* Not yet decided whether we read a pointer or not. */
373 /* Initialize state of modifiers. */
376 /* Prepare temporary buffer. */
379 /* Check for a positional parameter specification. */
384 argpos
= argpos
* 10 + (*f
++ - '0');
389 /* Oops; that was actually the field width. */
397 /* Check for the assignment-suppressing and the number grouping flag. */
398 while (*f
== '*' || *f
== '\'')
409 /* We have seen width. */
413 /* Find the maximum field width. */
424 /* Check for type modifiers. */
428 /* ints are short ints or chars. */
440 /* A double `l' is equivalent to an `L'. */
445 /* ints are long ints. */
450 /* doubles are long doubles, and ints are long long ints. */
454 /* The `a' is used as a flag only if followed by `s', `S' or
456 if (*f
!= 's' && *f
!= 'S' && *f
!= '[')
461 /* String conversions (%s, %[) take a `char **'
462 arg and fill it in with a malloc'd pointer. */
466 if (sizeof (size_t) > sizeof (unsigned long int))
468 else if (sizeof (size_t) > sizeof (unsigned int))
472 if (sizeof (uintmax_t) > sizeof (unsigned long int))
474 else if (sizeof (uintmax_t) > sizeof (unsigned int))
478 if (sizeof (ptrdiff_t) > sizeof (long int))
480 else if (sizeof (ptrdiff_t) > sizeof (int))
484 /* Not a recognized modifier. Backup. */
489 /* End of the format string? */
493 /* Find the conversion specifier. */
495 if (skip_space
|| (fc
!= '[' && fc
!= 'c' && fc
!= 'C' && fc
!= 'n'))
497 /* Eat whitespace. */
498 int save_errno
= errno
;
501 if (inchar () == EOF
&& errno
== EINTR
)
511 case '%': /* Must match a literal '%'. */
522 case 'n': /* Answer number of assignments done. */
523 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
524 with the 'n' conversion specifier. */
525 if (!(flags
& SUPPRESS
))
527 /* Don't count the read-ahead. */
529 *ARG (long long int *) = read_in
;
530 else if (flags
& LONG
)
531 *ARG (long int *) = read_in
;
532 else if (flags
& SHORT
)
533 *ARG (short int *) = read_in
;
535 *ARG (int *) = read_in
;
537 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
538 /* We have a severe problem here. The ISO C standard
539 contradicts itself in explaining the effect of the %n
540 format in `scanf'. While in ISO C:1990 and the ISO C
541 Amendement 1:1995 the result is described as
543 Execution of a %n directive does not effect the
544 assignment count returned at the completion of
545 execution of the f(w)scanf function.
547 in ISO C Corrigendum 1:1994 the following was added:
550 Add the following fourth example:
553 int d1, d2, n1, n2, i;
554 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
555 the value 123 is assigned to d1 and the value3 to n1.
556 Because %n can never get an input failure the value
557 of 3 is also assigned to n2. The value of d2 is not
558 affected. The value 3 is assigned to i.
560 We go for now with the historically correct code from ISO C,
561 i.e., we don't count the %n assignments. When it ever
562 should proof to be wrong just remove the #ifdef above. */
568 case 'c': /* Match characters. */
569 if ((flags
& LONG
) == 0)
571 if (!(flags
& SUPPRESS
))
585 if (!(flags
& SUPPRESS
))
589 while (--width
> 0 && inchar () != EOF
);
592 while (--width
> 0 && inchar () != EOF
);
594 if (!(flags
& SUPPRESS
))
601 /* Get UTF-8 encoded wide character. Here we assume (as in
602 other parts of the libc) that we only have to handle
609 if (!(flags
& SUPPRESS
))
611 wstr
= ARG (wchar_t *);
618 #define NEXT_WIDE_CHAR(First) \
622 /* EOF is only an error for the first character. */ \
631 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
633 if ((c & 0xe0) == 0xc0) \
635 /* We expect two bytes. */ \
639 else if ((c & 0xf0) == 0xe0) \
641 /* We expect three bytes. */ \
645 else if ((c & 0xf8) == 0xf0) \
647 /* We expect four bytes. */ \
651 else if ((c & 0xfc) == 0xf8) \
653 /* We expect five bytes. */ \
659 /* We expect six bytes. */ \
668 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
676 if (!(flags & SUPPRESS)) \
680 NEXT_WIDE_CHAR (first
);
684 if (!(flags
& SUPPRESS
))
689 case 's': /* Read a string. */
691 /* We have to process a wide character string. */
692 goto wide_char_string
;
694 #define STRING_ARG(Str, Type) \
695 if (!(flags & SUPPRESS)) \
697 if (flags & MALLOC) \
699 /* The string is to be stored in a malloc'd buffer. */ \
700 strptr = ARG (char **); \
701 if (strptr == NULL) \
703 /* Allocate an initial buffer. */ \
705 *strptr = malloc (strsize * sizeof (Type)); \
706 Str = (Type *) *strptr; \
709 Str = ARG (Type *); \
713 STRING_ARG (str
, char);
726 #define STRING_ADD_CHAR(Str, c, Type) \
727 if (!(flags & SUPPRESS)) \
730 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
732 /* Enlarge the buffer. */ \
733 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
736 /* Can't allocate that much. Last-ditch effort. */\
737 Str = realloc (*strptr, \
738 (strsize + 1) * sizeof (Type)); \
741 /* We lose. Oh well. \
742 Terminate the string and stop converting, \
743 so at least we don't skip any input. */ \
744 ((Type *) (*strptr))[strsize] = '\0'; \
750 *strptr = (char *) Str; \
751 Str = ((Type *) *strptr) + strsize; \
757 *strptr = (char *) Str; \
758 Str = ((Type *) *strptr) + strsize; \
763 STRING_ADD_CHAR (str
, c
, char);
764 } while ((width
<= 0 || --width
> 0) && inchar () != EOF
);
766 if (!(flags
& SUPPRESS
))
774 /* Wide character string. */
779 STRING_ARG (wstr
, wchar_t);
784 NEXT_WIDE_CHAR (first
);
786 if (__iswspace (val
))
788 /* XXX We would have to push back the whole wide char
789 with possibly many bytes. But since scanf does
790 not make a difference for white space characters
791 we can simply push back a simple <SP> which is
792 guaranteed to be in the [:space:] class. */
797 STRING_ADD_CHAR (wstr
, val
, wchar_t);
800 while (width
<= 0 || --width
> 0);
802 if (!(flags
& SUPPRESS
))
810 case 'x': /* Hexadecimal integer. */
811 case 'X': /* Ditto. */
816 case 'o': /* Octal integer. */
821 case 'u': /* Unsigned decimal integer. */
826 case 'd': /* Signed decimal integer. */
831 case 'i': /* Generic number. */
840 /* Check for a sign. */
841 if (c
== '-' || c
== '+')
849 /* Look for a leading indication of base. */
850 if (width
!= 0 && c
== '0')
858 if (width
!= 0 && _tolower (c
) == 'x')
876 /* Read the number into workspace. */
877 while (c
!= EOF
&& width
!= 0)
879 if (base
== 16 ? !isxdigit (c
) :
880 ((!isdigit (c
) || c
- '0' >= base
) &&
881 !((flags
& GROUP
) && base
== 10 && c
== thousands
)))
891 (wpsize
== 1 && (wp
[0] == '+' || wp
[0] == '-')))
893 /* There was no number. If we are supposed to read a pointer
894 we must recognize "(nil)" as well. */
895 if (wpsize
== 0 && read_pointer
&& (width
< 0 || width
>= 0)
897 && _tolower (inchar ()) == 'n'
898 && _tolower (inchar ()) == 'i'
899 && _tolower (inchar ()) == 'l'
901 /* We must produce the value of a NULL pointer. A single
902 '0' digit is enough. */
906 /* The last read character is not part of the number
914 /* The just read character is not part of the number anymore. */
917 /* Convert the number. */
922 num
.q
= __strtoll_internal (wp
, &tw
, base
, flags
& GROUP
);
924 num
.uq
= __strtoull_internal (wp
, &tw
, base
, flags
& GROUP
);
929 num
.l
= __strtol_internal (wp
, &tw
, base
, flags
& GROUP
);
931 num
.ul
= __strtoul_internal (wp
, &tw
, base
, flags
& GROUP
);
936 if (!(flags
& SUPPRESS
))
941 *ARG (unsigned LONGLONG
int *) = num
.uq
;
942 else if (flags
& LONG
)
943 *ARG (unsigned long int *) = num
.ul
;
944 else if (flags
& SHORT
)
945 *ARG (unsigned short int *)
946 = (unsigned short int) num
.ul
;
947 else if (flags
& CHAR
)
948 *ARG (unsigned char *) = (unsigned char) num
.ul
;
950 *ARG (unsigned int *) = (unsigned int) num
.ul
;
955 *ARG (LONGLONG
int *) = num
.q
;
956 else if (flags
& LONG
)
957 *ARG (long int *) = num
.l
;
958 else if (flags
& SHORT
)
959 *ARG (short int *) = (short int) num
.l
;
960 else if (flags
& CHAR
)
961 *ARG (signed char *) = (signed char) num
.ul
;
963 *ARG (int *) = (int) num
.l
;
969 case 'e': /* Floating-point numbers. */
980 /* Check for a sign. */
981 if (c
== '-' || c
== '+')
984 if (inchar () == EOF
)
985 /* EOF is only an input error before we read any chars. */
987 if (! isdigit (c
) && c
!= decimal
)
989 /* This is no valid number. */
999 /* Take care for the special arguments "nan" and "inf". */
1000 if (_tolower (c
) == 'n')
1004 if (inchar () == EOF
|| _tolower (c
) != 'a')
1007 if (inchar () == EOF
|| _tolower (c
) != 'n')
1013 else if (_tolower (c
) == 'i')
1015 /* Maybe "inf" or "infinity". */
1017 if (inchar () == EOF
|| _tolower (c
) != 'n')
1020 if (inchar () == EOF
|| _tolower (c
) != 'f')
1023 /* It is as least "inf". */
1024 if (inchar () != EOF
)
1026 if (_tolower (c
) == 'i')
1028 /* Now we have to read the rest as well. */
1030 if (inchar () == EOF
|| _tolower (c
) != 'n')
1033 if (inchar () == EOF
|| _tolower (c
) != 'i')
1036 if (inchar () == EOF
|| _tolower (c
) != 't')
1039 if (inchar () == EOF
|| _tolower (c
) != 'y')
1056 if (_tolower (c
) == 'x')
1058 /* It is a number in hexadecimal format. */
1064 /* Grouping is not allowed. */
1070 got_dot
= got_e
= 0;
1075 else if (!got_e
&& is_hexa
&& isxdigit (c
))
1077 else if (got_e
&& wp
[wpsize
- 1] == exp_char
1078 && (c
== '-' || c
== '+'))
1080 else if (wpsize
> 0 && !got_e
&& _tolower (c
) == exp_char
)
1083 got_e
= got_dot
= 1;
1085 else if (c
== decimal
&& !got_dot
)
1090 else if ((flags
& GROUP
) && c
== thousands
&& !got_dot
)
1094 /* The last read character is not part of the number
1102 while (width
!= 0 && inchar () != EOF
);
1104 /* Have we read any character? If we try to read a number
1105 in hexadecimal notation and we have read only the `0x'
1106 prefix this is an error. */
1107 if (wpsize
== 0 || (is_hexa
&& wpsize
== 2))
1111 /* Convert the number. */
1113 if (flags
& LONGDBL
)
1115 long double d
= __strtold_internal (wp
, &tw
, flags
& GROUP
);
1116 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1117 *ARG (long double *) = negative
? -d
: d
;
1119 else if (flags
& LONG
)
1121 double d
= __strtod_internal (wp
, &tw
, flags
& GROUP
);
1122 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1123 *ARG (double *) = negative
? -d
: d
;
1127 float d
= __strtof_internal (wp
, &tw
, flags
& GROUP
);
1128 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1129 *ARG (float *) = negative
? -d
: d
;
1135 if (!(flags
& SUPPRESS
))
1139 case '[': /* Character class. */
1142 STRING_ARG (wstr
, wchar_t);
1143 c
= '\0'; /* This is to keep gcc quiet. */
1147 STRING_ARG (str
, char);
1162 /* Fill WP with byte flags indexed by character.
1163 We will use this flag map for matching input characters. */
1164 if (wpmax
< UCHAR_MAX
)
1167 wp
= (char *) alloca (wpmax
);
1169 memset (wp
, 0, UCHAR_MAX
);
1172 if (fc
== ']' || fc
== '-')
1174 /* If ] or - appears before any char in the set, it is not
1175 the terminator or separator, but the first char in the
1181 while ((fc
= *f
++) != '\0' && fc
!= ']')
1183 if (fc
== '-' && *f
!= '\0' && *f
!= ']' &&
1184 (unsigned char) f
[-2] <= (unsigned char) *f
)
1186 /* Add all characters from the one before the '-'
1187 up to (but not including) the next format char. */
1188 for (fc
= f
[-2]; fc
< *f
; ++fc
)
1192 /* Add the character to the flag map. */
1197 if (!(flags
& LONG
))
1210 NEXT_WIDE_CHAR (first
);
1211 if (val
<= 255 && wp
[val
] == not_in
)
1216 STRING_ADD_CHAR (wstr
, val
, wchar_t);
1226 if (!(flags
& SUPPRESS
))
1234 num
.ul
= read_in
- 1; /* -1 because we already read one char. */
1237 if (wp
[c
] == not_in
)
1242 STRING_ADD_CHAR (str
, c
, char);
1246 while (width
!= 0 && inchar () != EOF
);
1248 if (read_in
== num
.ul
)
1251 if (!(flags
& SUPPRESS
))
1259 case 'p': /* Generic pointer. */
1261 /* A PTR must be the same size as a `long int'. */
1262 flags
&= ~(SHORT
|LONGDBL
);
1269 /* If this is an unknown format character punt. */
1274 /* The last thing we saw int the format string was a white space.
1275 Consume the last white spaces. */
1280 while (isspace (c
));
1284 /* Unlock stream. */
1292 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
1294 return _IO_vfscanf (s
, format
, argptr
, NULL
);
1298 weak_alias (__vfscanf
, vfscanf
)