1 /* Copyright (C) 1991, 92, 93, 94, 95, 96, 97 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If not,
16 write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
17 Boston, MA 02111-1307, USA. */
19 #include "../locale/localeinfo.h"
28 #include <libc-lock.h>
32 #define LONGLONG long long
37 /* Those are flags in the conversion format. */
38 # define LONG 0x001 /* l: long or double */
39 # define LONGDBL 0x002 /* L: long long or long double */
40 # define SHORT 0x004 /* h: short */
41 # define SUPPRESS 0x008 /* *: suppress assignment */
42 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
43 # define NOSKIP 0x020 /* do not skip blanks */
44 # define WIDTH 0x040 /* width was given */
45 # define GROUP 0x080 /* ': group numbers */
46 # define MALLOC 0x100 /* a: malloc strings */
48 # define TYPEMOD (LONG|LONGDBL|SHORT)
56 # define va_list _IO_va_list
57 # define ungetc(c, s) (--read_in, _IO_ungetc (c, s))
58 # define inchar() ((c = _IO_getc_unlocked (s)), (void) ++read_in, c)
59 # define encode_error() do { \
60 if (errp != NULL) *errp |= 4; \
61 _IO_funlockfile (s); \
62 __set_errno (EILSEQ); \
65 # define conv_error() do { \
66 if (errp != NULL) *errp |= 2; \
67 _IO_funlockfile (s); \
70 # define input_error() do { \
71 _IO_funlockfile (s); \
72 if (errp != NULL) *errp |= 1; \
75 # define memory_error() do { \
76 _IO_funlockfile (s); \
77 __set_errno (ENOMEM); \
80 # define ARGCHECK(s, format) \
83 /* Check file argument for consistence. */ \
84 CHECK_FILE (s, EOF); \
85 if (s->_flags & _IO_NO_READS) \
87 __set_errno (EBADF); \
90 else if (format == NULL) \
96 # define LOCK_STREAM(S) \
97 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
99 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
101 # define ungetc(c, s) (--read_in, ungetc (c, s))
102 # define inchar() ((c = getc (s)), (void) ++read_in, c)
103 # define encode_error() do { \
105 __set_errno (EILSEQ); \
108 # define conv_error() do { \
112 # define input_error() do { \
114 return done ?: EOF; \
116 # define memory_error() do { \
118 __set_errno (ENOMEM); \
121 # define ARGCHECK(s, format) \
124 /* Check file argument for consistence. */ \
125 if (!__validfp (s) || !s->__mode.__read) \
127 __set_errno (EBADF); \
130 else if (format == NULL) \
132 __set_errno (EINVAL); \
137 /* XXX For now !!! */
138 # define flockfile(S) /* nothing */
139 # define funlockfile(S) /* nothing */
140 # define LOCK_STREAM(S)
141 # define UNLOCK_STREAM
143 # define LOCK_STREAM(S) \
144 __libc_cleanup_region_start (&__funlockfile, (S)); \
146 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
151 /* Read formatted input from S according to the format string
152 FORMAT, using the argument list in ARG.
153 Return the number of assignments made, or -1 for an input error. */
156 _IO_vfscanf (s
, format
, argptr
, errp
)
163 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
167 register const char *f
= format
;
168 register unsigned char fc
; /* Current character of the format. */
169 register size_t done
= 0; /* Assignments done. */
170 register size_t read_in
= 0; /* Chars read in. */
171 register int c
= 0; /* Last char read. */
172 register int width
; /* Maximum field width. */
173 register int flags
; /* Modifiers for current format element. */
175 /* Status for reading F-P nums. */
176 char got_dot
, got_e
, negative
;
177 /* If a [...] is a [^...]. */
179 #define exp_char not_in
180 /* Base for integral numbers. */
182 /* Signedness for integral numbers. */
184 #define is_hexa number_signed
185 /* Decimal point character. */
187 /* The thousands character of the current locale. */
189 /* Integral holding variables. */
193 unsigned long long int uq
;
195 unsigned long int ul
;
197 /* Character-buffer pointer. */
199 wchar_t *wstr
= NULL
;
200 char **strptr
= NULL
;
202 /* We must not react on white spaces immediately because they can
203 possibly be matched even if in the input stream no character is
204 available anymore. */
207 char *tw
; /* Temporary pointer. */
208 char *wp
= NULL
; /* Workspace. */
209 size_t wpmax
= 0; /* Maximal size of workspace. */
210 size_t wpsize
; /* Currently used bytes in workspace. */
214 if (wpsize == wpmax) \
217 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
218 wp = (char *) alloca (wpmax); \
220 memcpy (wp, old, wpsize); \
222 wp[wpsize++] = (Ch); \
227 __va_copy (arg
, argptr
);
229 arg
= (va_list) argptr
;
232 ARGCHECK (s
, format
);
234 /* Figure out the decimal point character. */
235 if (mbtowc (&decimal
, _NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
),
236 strlen (_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
))) <= 0)
237 decimal
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, DECIMAL_POINT
);
238 /* Figure out the thousands separator character. */
239 if (mbtowc (&thousands
, _NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
),
240 strlen (_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
))) <= 0)
241 thousands
= (wchar_t) *_NL_CURRENT (LC_NUMERIC
, THOUSANDS_SEP
);
243 /* Lock the stream. */
246 /* Run through the format string. */
250 /* Extract the next argument, which is of type TYPE.
251 For a %N$... spec, this is the Nth argument from the beginning;
252 otherwise it is the next argument after the state now in ARG. */
254 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
255 ({ unsigned int pos = argpos; \
257 __va_copy (arg, argptr); \
259 (void) va_arg (arg, void *); \
260 va_arg (arg, type); \
264 /* XXX Possible optimization. */
265 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
266 ({ va_list arg = (va_list) argptr; \
267 arg = (va_list) ((char *) arg \
269 * __va_rounded_size (void *)); \
270 va_arg (arg, type); \
273 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
274 ({ unsigned int pos = argpos; \
275 va_list arg = (va_list) argptr; \
277 (void) va_arg (arg, void *); \
278 va_arg (arg, type); \
285 /* Non-ASCII, may be a multibyte. */
286 int len
= mblen (f
, strlen (f
));
308 /* Remember to skip spaces. */
315 /* Read a character. */
318 /* Characters other than format specs must just match. */
322 /* We saw white space char as the last character in the format
323 string. Now it's time to skip all leading white space. */
327 if (inchar () == EOF
&& errno
== EINTR
)
341 /* This is the start of the conversion string. */
344 /* Initialize state of modifiers. */
347 /* Prepare temporary buffer. */
350 /* Check for a positional parameter specification. */
355 argpos
= argpos
* 10 + (*f
++ - '0');
360 /* Oops; that was actually the field width. */
368 /* Check for the assignment-suppressing and the number grouping flag. */
369 while (*f
== '*' || *f
== '\'')
380 /* We have seen width. */
384 /* Find the maximum field width. */
395 /* Check for type modifiers. */
396 while (*f
== 'h' || *f
== 'l' || *f
== 'L' || *f
== 'a' || *f
== 'q')
400 /* int's are short int's. */
402 /* Signal illegal format element. */
407 if (flags
& (SHORT
|LONGDBL
))
409 else if (flags
& LONG
)
411 /* A double `l' is equivalent to an `L'. */
416 /* int's are long int's. */
421 /* double's are long double's, and int's are long long int's. */
423 /* Signal illegal format element. */
429 /* Signal illegal format element. */
431 /* String conversions (%s, %[) take a `char **'
432 arg and fill it in with a malloc'd pointer. */
437 /* End of the format string? */
441 /* We must take care for EINTR errors. */
442 if (c
== EOF
&& errno
== EINTR
)
445 /* Find the conversion specifier. */
447 if (skip_space
|| (fc
!= '[' && fc
!= 'c' && fc
!= 'C' && fc
!= 'n'))
449 /* Eat whitespace. */
451 if (inchar () == EOF
&& errno
== EINTR
)
460 case '%': /* Must match a literal '%'. */
469 case 'n': /* Answer number of assignments done. */
470 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
471 with the 'n' conversion specifier. */
472 if (!(flags
& SUPPRESS
))
474 /* Don't count the read-ahead. */
476 *ARG (long long int *) = read_in
;
477 else if (flags
& LONG
)
478 *ARG (long int *) = read_in
;
479 else if (flags
& SHORT
)
480 *ARG (short int *) = read_in
;
482 *ARG (int *) = read_in
;
484 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
485 /* We have a severe problem here. The ISO C standard
486 contradicts itself in explaining the effect of the %n
487 format in `scanf'. While in ISO C:1990 and the ISO C
488 Amendement 1:1995 the result is described as
490 Execution of a %n directive does not effect the
491 assignment count returned at the completion of
492 execution of the f(w)scanf function.
494 in ISO C Corrigendum 1:1994 the following was added:
497 Add the following fourth example:
500 int d1, d2, n1, n2, i;
501 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
502 the value 123 is assigned to d1 and the value3 to n1.
503 Because %n can never get an input failure the value
504 of 3 is also assigned to n2. The value of d2 is not
505 affected. The value 3 is assigned to i.
507 We go for now with the historically correct code fro ISO C,
508 i.e., we don't count the %n assignments. When it ever
509 should proof to be wrong just remove the #ifdef above. */
515 case 'c': /* Match characters. */
516 if ((flags
& LONG
) == 0)
518 if (!(flags
& SUPPRESS
))
532 if (!(flags
& SUPPRESS
))
536 while (--width
> 0 && inchar () != EOF
);
539 while (--width
> 0 && inchar () != EOF
);
542 /* I.e., EOF was read. */
545 if (!(flags
& SUPPRESS
))
552 /* Get UTF-8 encoded wide character. Here we assume (as in
553 other parts of the libc) that we only have to handle
560 if (!(flags
& SUPPRESS
))
562 wstr
= ARG (wchar_t *);
569 #define NEXT_WIDE_CHAR(First) \
572 /* EOF is only an error for the first character. */ \
583 if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
585 if ((c & 0xe0) == 0xc0) \
587 /* We expect two bytes. */ \
591 else if ((c & 0xf0) == 0xe0) \
593 /* We expect three bytes. */ \
597 else if ((c & 0xf8) == 0xf0) \
599 /* We expect four bytes. */ \
603 else if ((c & 0xfc) == 0xf8) \
605 /* We expect five bytes. */ \
611 /* We expect six bytes. */ \
620 || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
628 if (!(flags & SUPPRESS)) \
632 NEXT_WIDE_CHAR (first
);
637 /* I.e., EOF was read. */
640 if (!(flags
& SUPPRESS
))
645 case 's': /* Read a string. */
647 /* We have to process a wide character string. */
648 goto wide_char_string
;
650 #define STRING_ARG(Str, Type) \
651 if (!(flags & SUPPRESS)) \
653 if (flags & MALLOC) \
655 /* The string is to be stored in a malloc'd buffer. */ \
656 strptr = ARG (char **); \
657 if (strptr == NULL) \
659 /* Allocate an initial buffer. */ \
661 *strptr = malloc (strsize * sizeof (Type)); \
662 Str = (Type *) *strptr; \
665 Str = ARG (Type *); \
669 STRING_ARG (str
, char);
682 #define STRING_ADD_CHAR(Str, c, Type) \
683 if (!(flags & SUPPRESS)) \
686 if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
688 /* Enlarge the buffer. */ \
689 Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
692 /* Can't allocate that much. Last-ditch effort. */\
693 Str = realloc (*strptr, \
694 (strsize + 1) * sizeof (Type)); \
697 /* We lose. Oh well. \
698 Terminate the string and stop converting, \
699 so at least we don't skip any input. */ \
700 ((Type *) (*strptr))[strsize] = '\0'; \
706 *strptr = (char *) Str; \
707 Str = ((Type *) *strptr) + strsize; \
713 *strptr = (char *) Str; \
714 Str = ((Type *) *strptr) + strsize; \
719 STRING_ADD_CHAR (str
, c
, char);
720 } while ((width
<= 0 || --width
> 0) && inchar () != EOF
);
725 if (!(flags
& SUPPRESS
))
733 /* Wide character string. */
738 STRING_ARG (wstr
, wchar_t);
743 NEXT_WIDE_CHAR (first
);
747 /* XXX We would have to push back the whole wide char
748 with possibly many bytes. But since scanf does
749 not make a difference for white space characters
750 we can simply push back a simple <SP> which is
751 guaranteed to be in the [:space:] class. */
756 STRING_ADD_CHAR (wstr
, val
, wchar_t);
759 while (width
<= 0 || --width
> 0);
761 if (!(flags
& SUPPRESS
))
769 case 'x': /* Hexadecimal integer. */
770 case 'X': /* Ditto. */
775 case 'o': /* Octal integer. */
780 case 'u': /* Unsigned decimal integer. */
785 case 'd': /* Signed decimal integer. */
790 case 'i': /* Generic number. */
799 /* Check for a sign. */
800 if (c
== '-' || c
== '+')
808 /* Look for a leading indication of base. */
809 if (width
!= 0 && c
== '0')
817 if (width
!= 0 && tolower (c
) == 'x')
835 /* Read the number into workspace. */
836 while (c
!= EOF
&& width
!= 0)
838 if (base
== 16 ? !isxdigit (c
) :
839 ((!isdigit (c
) || c
- '0' >= base
) &&
840 !((flags
& GROUP
) && base
== 10 && c
== thousands
)))
849 /* The just read character is not part of the number anymore. */
853 (wpsize
== 1 && (wp
[0] == '+' || wp
[0] == '-')))
854 /* There was no number. */
857 /* Convert the number. */
862 num
.q
= __strtoll_internal (wp
, &tw
, base
, flags
& GROUP
);
864 num
.uq
= __strtoull_internal (wp
, &tw
, base
, flags
& GROUP
);
869 num
.l
= __strtol_internal (wp
, &tw
, base
, flags
& GROUP
);
871 num
.ul
= __strtoul_internal (wp
, &tw
, base
, flags
& GROUP
);
876 if (!(flags
& SUPPRESS
))
881 *ARG (unsigned LONGLONG
int *) = num
.uq
;
882 else if (flags
& LONG
)
883 *ARG (unsigned long int *) = num
.ul
;
884 else if (flags
& SHORT
)
885 *ARG (unsigned short int *)
886 = (unsigned short int) num
.ul
;
888 *ARG (unsigned int *) = (unsigned int) num
.ul
;
893 *ARG (LONGLONG
int *) = num
.q
;
894 else if (flags
& LONG
)
895 *ARG (long int *) = num
.l
;
896 else if (flags
& SHORT
)
897 *ARG (short int *) = (short int) num
.l
;
899 *ARG (int *) = (int) num
.l
;
905 case 'e': /* Floating-point numbers. */
915 /* Check for a sign. */
916 if (c
== '-' || c
== '+')
919 if (inchar () == EOF
)
920 /* EOF is only an input error before we read any chars. */
934 if (tolower (c
) == 'x')
936 /* It is a number in hexadecimal format. */
942 /* Grouping is not allowed. */
953 else if (!got_e
&& is_hexa
&& isxdigit (c
))
955 else if (got_e
&& wp
[wpsize
- 1] == exp_char
956 && (c
== '-' || c
== '+'))
958 else if (wpsize
> 0 && !got_e
&& tolower (c
) == exp_char
)
963 else if (c
== decimal
&& !got_dot
)
968 else if ((flags
& GROUP
) && c
== thousands
&& !got_dot
)
972 /* The last read character is not part of the number
980 while (width
!= 0 && inchar () != EOF
);
982 /* Have we read any character? If we try to read a number
983 in hexadecimal notation and we have read only the `0x'
984 prefix this is an error. */
985 if (wpsize
== 0 || (is_hexa
&& wpsize
== 2))
988 /* Convert the number. */
992 long double d
= __strtold_internal (wp
, &tw
, flags
& GROUP
);
993 if (!(flags
& SUPPRESS
) && tw
!= wp
)
994 *ARG (long double *) = negative
? -d
: d
;
996 else if (flags
& LONG
)
998 double d
= __strtod_internal (wp
, &tw
, flags
& GROUP
);
999 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1000 *ARG (double *) = negative
? -d
: d
;
1004 float d
= __strtof_internal (wp
, &tw
, flags
& GROUP
);
1005 if (!(flags
& SUPPRESS
) && tw
!= wp
)
1006 *ARG (float *) = negative
? -d
: d
;
1012 if (!(flags
& SUPPRESS
))
1016 case '[': /* Character class. */
1019 STRING_ARG (wstr
, wchar_t);
1020 c
= '\0'; /* This is to keep gcc quiet. */
1024 STRING_ARG (str
, char);
1039 /* Fill WP with byte flags indexed by character.
1040 We will use this flag map for matching input characters. */
1041 if (wpmax
< UCHAR_MAX
)
1044 wp
= (char *) alloca (wpmax
);
1046 memset (wp
, 0, UCHAR_MAX
);
1049 if (fc
== ']' || fc
== '-')
1051 /* If ] or - appears before any char in the set, it is not
1052 the terminator or separator, but the first char in the
1058 while ((fc
= *f
++) != '\0' && fc
!= ']')
1060 if (fc
== '-' && *f
!= '\0' && *f
!= ']' &&
1061 (unsigned char) f
[-2] <= (unsigned char) *f
)
1063 /* Add all characters from the one before the '-'
1064 up to (but not including) the next format char. */
1065 for (fc
= f
[-2]; fc
< *f
; ++fc
)
1069 /* Add the character to the flag map. */
1074 if (!(flags
& LONG
))
1087 NEXT_WIDE_CHAR (first
);
1088 if (val
> 255 || wp
[val
] == not_in
)
1090 /* XXX We have a problem here. We read a wide
1091 character and this possibly took several
1092 bytes. But we can only push back one single
1093 character. To be sure we don't create wrong
1094 input we push it back only in case it is
1095 representable within one byte. */
1100 STRING_ADD_CHAR (wstr
, val
, wchar_t);
1110 if (!(flags
& SUPPRESS
))
1118 num
.ul
= read_in
- 1; /* -1 because we already read one char. */
1121 if (wp
[c
] == not_in
)
1126 STRING_ADD_CHAR (str
, c
, char);
1130 while (width
!= 0 && inchar () != EOF
);
1132 if (read_in
== num
.ul
)
1135 if (!(flags
& SUPPRESS
))
1143 case 'p': /* Generic pointer. */
1145 /* A PTR must be the same size as a `long int'. */
1146 flags
&= ~(SHORT
|LONGDBL
);
1153 /* The last thing we saw int the format string was a white space.
1154 Consume the last white spaces. */
1159 while (isspace (c
));
1163 /* Unlock stream. */
1171 __vfscanf (FILE *s
, const char *format
, va_list argptr
)
1173 return _IO_vfscanf (s
, format
, argptr
, NULL
);
1177 weak_alias (__vfscanf
, vfscanf
)