update from main archive 961001
[glibc.git] / stdio-common / vfscanf.c
blob5564e2b0e68adbf8af73dbdda1568e705460a348
1 /* Copyright (C) 1991, 92, 93, 94, 95, 96 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If
16 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
17 Cambridge, MA 02139, USA. */
19 #include "../locale/localeinfo.h"
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libc-lock.h>
29 #ifdef __GNUC__
30 #define HAVE_LONGLONG
31 #define LONGLONG long long
32 #else
33 #define LONGLONG long
34 #endif
36 /* Those are flags in the conversion format. */
37 # define LONG 0x001 /* l: long or double */
38 # define LONGDBL 0x002 /* L: long long or long double */
39 # define SHORT 0x004 /* h: short */
40 # define SUPPRESS 0x008 /* *: suppress assignment */
41 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
42 # define NOSKIP 0x020 /* do not skip blanks */
43 # define WIDTH 0x040 /* width was given */
44 # define GROUP 0x080 /* ': group numbers */
45 # define MALLOC 0x100 /* a: malloc strings */
47 # define TYPEMOD (LONG|LONGDBL|SHORT)
50 #ifdef USE_IN_LIBIO
51 # include <libioP.h>
52 # include <libio.h>
54 # undef va_list
55 # define va_list _IO_va_list
56 # define ungetc(c, s) (--read_in, _IO_ungetc (c, s))
57 # define inchar() ((c = _IO_getc_unlocked (s)), (void) ++read_in, c)
58 # define conv_error() do { \
59 if (errp != NULL) *errp |= 2; \
60 _IO_funlockfile (s); \
61 return done; \
62 } while (0)
63 # define input_error() do { \
64 _IO_funlockfile (s); \
65 if (errp != NULL) *errp |= 1; \
66 return done ?: EOF; \
67 } while (0)
68 # define memory_error() do { \
69 _IO_funlockfile (s); \
70 __set_errno (ENOMEM); \
71 return EOF; \
72 } while (0)
73 # define ARGCHECK(s, format) \
74 do \
75 { \
76 /* Check file argument for consistence. */ \
77 CHECK_FILE (s, EOF); \
78 if (s->_flags & _IO_NO_READS || format == NULL) \
79 { \
80 MAYBE_SET_EINVAL; \
81 return EOF; \
82 } \
83 } while (0)
84 # define LOCK_STREAM(S) \
85 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
86 _IO_flockfile (S)
87 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
88 #else
89 # define ungetc(c, s) (--read_in, ungetc (c, s))
90 # define inchar() ((c = getc (s)), (void) ++read_in, c)
91 # define conv_error() do { \
92 funlockfile (s); \
93 return done; \
94 } while (0)
95 # define input_error() do { \
96 funlockfile (s); \
97 return done ?: EOF; \
98 } while (0)
99 # define memory_error() do { \
100 funlockfile (s); \
101 __set_errno (ENOMEM); \
102 return EOF; \
103 } while (0)
104 # define ARGCHECK(s, format) \
105 do \
107 /* Check file argument for consistence. */ \
108 if (!__validfp (s) || !s->__mode.__read || format == NULL) \
110 __set_errno (EINVAL); \
111 return EOF; \
113 } while (0)
114 #if 1
115 /* XXX For now !!! */
116 # define flockfile(S) /* nothing */
117 # define funlockfile(S) /* nothing */
118 # define LOCK_STREAM(S)
119 # define UNLOCK_STREAM
120 #else
121 # define LOCK_STREAM(S) \
122 __libc_cleanup_region_start (&__funlockfile, (S)); \
123 __flockfile (S)
124 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
125 #endif
126 #endif
129 /* Read formatted input from S according to the format string
130 FORMAT, using the argument list in ARG.
131 Return the number of assignments made, or -1 for an input error. */
132 #ifdef USE_IN_LIBIO
134 _IO_vfscanf (s, format, argptr, errp)
135 _IO_FILE *s;
136 const char *format;
137 _IO_va_list argptr;
138 int *errp;
139 #else
141 __vfscanf (FILE *s, const char *format, va_list argptr)
142 #endif
144 va_list arg = (va_list) argptr;
146 register const char *f = format;
147 register unsigned char fc; /* Current character of the format. */
148 register size_t done = 0; /* Assignments done. */
149 register size_t read_in = 0; /* Chars read in. */
150 register int c; /* Last char read. */
151 register int width; /* Maximum field width. */
152 register int flags; /* Modifiers for current format element. */
154 /* Status for reading F-P nums. */
155 char got_dot, got_e, negative;
156 /* If a [...] is a [^...]. */
157 char not_in;
158 /* Base for integral numbers. */
159 int base;
160 /* Signedness for integral numbers. */
161 int number_signed;
162 /* Decimal point character. */
163 wchar_t decimal;
164 /* The thousands character of the current locale. */
165 wchar_t thousands;
166 /* Integral holding variables. */
167 union
169 long long int q;
170 unsigned long long int uq;
171 long int l;
172 unsigned long int ul;
173 } num;
174 /* Character-buffer pointer. */
175 register char *str, **strptr;
176 size_t strsize;
177 /* We must not react on white spaces immediately because they can
178 possibly be matched even if in the input stream no character is
179 available anymore. */
180 int skip_space = 0;
181 /* Workspace. */
182 char *tw; /* Temporary pointer. */
183 char *wp = NULL; /* Workspace. */
184 size_t wpmax = 0; /* Maximal size of workspace. */
185 size_t wpsize; /* Currently used bytes in workspace. */
186 #define ADDW(Ch) \
187 do \
189 if (wpsize == wpmax) \
191 char *old = wp; \
192 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
193 wp = (char *) alloca (wpmax); \
194 if (old != NULL) \
195 memcpy (wp, old, wpsize); \
197 wp[wpsize++] = (Ch); \
199 while (0)
201 ARGCHECK (s, format);
203 /* Figure out the decimal point character. */
204 if (mbtowc (&decimal, _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT),
205 strlen (_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT))) <= 0)
206 decimal = (wchar_t) *_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT);
207 /* Figure out the thousands separator character. */
208 if (mbtowc (&thousands, _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP),
209 strlen (_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP))) <= 0)
210 thousands = (wchar_t) *_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
212 /* Lock the stream. */
213 LOCK_STREAM (s);
215 /* Run through the format string. */
216 while (*f != '\0')
218 unsigned int argpos;
219 /* Extract the next argument, which is of type TYPE.
220 For a %N$... spec, this is the Nth argument from the beginning;
221 otherwise it is the next argument after the state now in ARG. */
222 #if 0
223 /* XXX Possible optimization. */
224 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
225 ({ va_list arg = (va_list) argptr; \
226 arg = (va_list) ((char *) arg \
227 + (argpos - 1) \
228 * __va_rounded_size (void *)); \
229 va_arg (arg, type); \
231 #else
232 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
233 ({ unsigned int pos = argpos; \
234 va_list arg = (va_list) argptr; \
235 while (--pos > 0) \
236 (void) va_arg (arg, void *); \
237 va_arg (arg, type); \
239 #endif
241 if (!isascii (*f))
243 /* Non-ASCII, may be a multibyte. */
244 int len = mblen (f, strlen (f));
245 if (len > 0)
249 c = inchar ();
250 if (c == EOF)
251 input_error ();
252 else if (c != *f++)
254 ungetc (c, s);
255 conv_error ();
258 while (--len > 0);
259 continue;
263 fc = *f++;
264 if (fc != '%')
266 /* Remember to skip spaces. */
267 if (isspace (fc))
269 skip_space = 1;
270 continue;
273 /* Read a character. */
274 c = inchar ();
276 /* Characters other than format specs must just match. */
277 if (c == EOF)
278 input_error ();
280 /* We saw white space char as the last character in the format
281 string. Now it's time to skip all leading white space. */
282 if (skip_space)
284 while (isspace (c))
285 (void) inchar ();
286 skip_space = 0;
289 if (c != fc)
291 ungetc (c, s);
292 conv_error ();
295 continue;
298 /* This is the start of the conversion string. */
299 flags = 0;
301 /* Initialize state of modifiers. */
302 argpos = 0;
304 /* Prepare temporary buffer. */
305 wpsize = 0;
307 /* Check for a positional parameter specification. */
308 if (isdigit (*f))
310 argpos = *f++ - '0';
311 while (isdigit (*f))
312 argpos = argpos * 10 + (*f++ - '0');
313 if (*f == '$')
314 ++f;
315 else
317 /* Oops; that was actually the field width. */
318 width = argpos;
319 flags |= WIDTH;
320 argpos = 0;
321 goto got_width;
325 /* Check for the assignment-suppressant and the number grouping flag. */
326 while (*f == '*' || *f == '\'')
327 switch (*f++)
329 case '*':
330 flags |= SUPPRESS;
331 break;
332 case '\'':
333 flags |= GROUP;
334 break;
337 /* We have seen width. */
338 if (isdigit (*f))
339 flags |= WIDTH;
341 /* Find the maximum field width. */
342 width = 0;
343 while (isdigit (*f))
345 width *= 10;
346 width += *f++ - '0';
348 got_width:
349 if (width == 0)
350 width = -1;
352 /* Check for type modifiers. */
353 while (*f == 'h' || *f == 'l' || *f == 'L' || *f == 'a' || *f == 'q')
354 switch (*f++)
356 case 'h':
357 /* int's are short int's. */
358 if (flags & TYPEMOD)
359 /* Signal illegal format element. */
360 conv_error ();
361 flags |= SHORT;
362 break;
363 case 'l':
364 if (flags & (SHORT|LONGDBL))
365 conv_error ();
366 else if (flags & LONG)
368 /* A double `l' is equivalent to an `L'. */
369 flags &= ~LONG;
370 flags |= LONGDBL;
372 else
373 /* int's are long int's. */
374 flags |= LONG;
375 break;
376 case 'q':
377 case 'L':
378 /* double's are long double's, and int's are long long int's. */
379 if (flags & TYPEMOD)
380 /* Signal illegal format element. */
381 conv_error ();
382 flags |= LONGDBL;
383 break;
384 case 'a':
385 if (flags & TYPEMOD)
386 /* Signal illegal format element. */
387 conv_error ();
388 /* String conversions (%s, %[) take a `char **'
389 arg and fill it in with a malloc'd pointer. */
390 flags |= MALLOC;
391 break;
394 /* End of the format string? */
395 if (*f == '\0')
396 conv_error ();
398 /* Find the conversion specifier. */
399 fc = *f++;
400 if (skip_space || (fc != '[' && fc != 'c' && fc != 'n'))
402 /* Eat whitespace. */
404 (void) inchar ();
405 while (isspace (c));
406 ungetc (c, s);
407 skip_space = 0;
410 switch (fc)
412 case '%': /* Must match a literal '%'. */
413 c = inchar ();
414 if (c != fc)
416 ungetc (c, s);
417 conv_error ();
419 break;
421 case 'n': /* Answer number of assignments done. */
422 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
423 with the 'n' conversion specifier. */
424 if (!(flags & SUPPRESS))
425 /* Don't count the read-ahead. */
426 if (flags & LONGDBL)
427 *ARG (long long int *) = read_in;
428 else if (flags & LONG)
429 *ARG (long int *) = read_in;
430 else if (flags & SHORT)
431 *ARG (short int *) = read_in;
432 else
433 *ARG (int *) = read_in;
434 break;
436 case 'c': /* Match characters. */
437 if (!(flags & SUPPRESS))
439 str = ARG (char *);
440 if (str == NULL)
441 conv_error ();
444 c = inchar ();
445 if (c == EOF)
446 input_error ();
448 if (width == -1)
449 width = 1;
451 if (!(flags & SUPPRESS))
454 *str++ = c;
455 while (--width > 0 && inchar () != EOF);
457 else
458 while (--width > 0 && inchar () != EOF);
460 if (width > 0)
461 /* I.e., EOF was read. */
462 --read_in;
464 if (!(flags & SUPPRESS))
465 ++done;
467 break;
469 case 's': /* Read a string. */
470 #define STRING_ARG \
471 if (!(flags & SUPPRESS)) \
473 if (flags & MALLOC) \
475 /* The string is to be stored in a malloc'd buffer. */ \
476 strptr = ARG (char **); \
477 if (strptr == NULL) \
478 conv_error (); \
479 /* Allocate an initial buffer. */ \
480 strsize = 100; \
481 *strptr = str = malloc (strsize); \
483 else \
484 str = ARG (char *); \
485 if (str == NULL) \
486 conv_error (); \
488 STRING_ARG;
490 c = inchar ();
491 if (c == EOF)
492 input_error ();
496 if (isspace (c))
498 ungetc (c, s);
499 break;
501 #define STRING_ADD_CHAR(c) \
502 if (!(flags & SUPPRESS)) \
504 *str++ = c; \
505 if ((flags & MALLOC) && str == *strptr + strsize) \
507 /* Enlarge the buffer. */ \
508 str = realloc (*strptr, strsize * 2); \
509 if (str == NULL) \
511 /* Can't allocate that much. Last-ditch effort. */\
512 str = realloc (*strptr, strsize + 1); \
513 if (str == NULL) \
515 /* We lose. Oh well. \
516 Terminate the string and stop converting, \
517 so at least we don't skip any input. */ \
518 (*strptr)[strsize] = '\0'; \
519 ++done; \
520 conv_error (); \
522 else \
524 *strptr = str; \
525 str += strsize; \
526 ++strsize; \
529 else \
531 *strptr = str; \
532 str += strsize; \
533 strsize *= 2; \
537 STRING_ADD_CHAR (c);
538 } while ((width <= 0 || --width > 0) && inchar () != EOF);
540 if (!(flags & SUPPRESS))
542 *str = '\0';
543 ++done;
545 break;
547 case 'x': /* Hexadecimal integer. */
548 case 'X': /* Ditto. */
549 base = 16;
550 number_signed = 0;
551 goto number;
553 case 'o': /* Octal integer. */
554 base = 8;
555 number_signed = 0;
556 goto number;
558 case 'u': /* Unsigned decimal integer. */
559 base = 10;
560 number_signed = 0;
561 goto number;
563 case 'd': /* Signed decimal integer. */
564 base = 10;
565 number_signed = 1;
566 goto number;
568 case 'i': /* Generic number. */
569 base = 0;
570 number_signed = 1;
572 number:
573 c = inchar ();
574 if (c == EOF)
575 input_error ();
577 /* Check for a sign. */
578 if (c == '-' || c == '+')
580 ADDW (c);
581 if (width > 0)
582 --width;
583 c = inchar ();
586 /* Look for a leading indication of base. */
587 if (width != 0 && c == '0')
589 if (width > 0)
590 --width;
592 ADDW (c);
593 c = inchar ();
595 if (width != 0 && tolower (c) == 'x')
597 if (base == 0)
598 base = 16;
599 if (base == 16)
601 if (width > 0)
602 --width;
603 c = inchar ();
606 else if (base == 0)
607 base = 8;
610 if (base == 0)
611 base = 10;
613 /* Read the number into workspace. */
614 while (c != EOF && width != 0)
616 if (base == 16 ? !isxdigit (c) :
617 ((!isdigit (c) || c - '0' >= base) &&
618 !((flags & GROUP) && base == 10 && c == thousands)))
619 break;
620 ADDW (c);
621 if (width > 0)
622 --width;
624 c = inchar ();
627 /* The just read character is not part of the number anymore. */
628 ungetc (c, s);
630 if (wpsize == 0 ||
631 (wpsize == 1 && (wp[0] == '+' || wp[0] == '-')))
632 /* There was no number. */
633 conv_error ();
635 /* Convert the number. */
636 ADDW ('\0');
637 if (flags & LONGDBL)
639 if (number_signed)
640 num.q = __strtoq_internal (wp, &tw, base, flags & GROUP);
641 else
642 num.uq = __strtouq_internal (wp, &tw, base, flags & GROUP);
644 else
646 if (number_signed)
647 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
648 else
649 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
651 if (wp == tw)
652 conv_error ();
654 if (!(flags & SUPPRESS))
656 if (! number_signed)
658 if (flags & LONGDBL)
659 *ARG (unsigned LONGLONG int *) = num.uq;
660 else if (flags & LONG)
661 *ARG (unsigned long int *) = num.ul;
662 else if (flags & SHORT)
663 *ARG (unsigned short int *)
664 = (unsigned short int) num.ul;
665 else
666 *ARG (unsigned int *) = (unsigned int) num.ul;
668 else
670 if (flags & LONGDBL)
671 *ARG (LONGLONG int *) = num.q;
672 else if (flags & LONG)
673 *ARG (long int *) = num.l;
674 else if (flags & SHORT)
675 *ARG (short int *) = (short int) num.l;
676 else
677 *ARG (int *) = (int) num.l;
679 ++done;
681 break;
683 case 'e': /* Floating-point numbers. */
684 case 'E':
685 case 'f':
686 case 'g':
687 case 'G':
688 c = inchar ();
689 if (c == EOF)
690 input_error ();
692 /* Check for a sign. */
693 if (c == '-' || c == '+')
695 negative = c == '-';
696 if (inchar () == EOF)
697 /* EOF is only an input error before we read any chars. */
698 conv_error ();
699 if (width > 0)
700 --width;
702 else
703 negative = 0;
705 got_dot = got_e = 0;
708 if (isdigit (c))
709 ADDW (c);
710 else if (got_e && wp[wpsize - 1] == 'e'
711 && (c == '-' || c == '+'))
712 ADDW (c);
713 else if (wpsize > 0 && !got_e && tolower (c) == 'e')
715 ADDW ('e');
716 got_e = got_dot = 1;
718 else if (c == decimal && !got_dot)
720 ADDW (c);
721 got_dot = 1;
723 else if ((flags & GROUP) && c == thousands && !got_dot)
724 ADDW (c);
725 else
726 break;
727 if (width > 0)
728 --width;
730 while (inchar () != EOF && width != 0);
732 /* The last read character is not part of the number anymore. */
733 ungetc (c, s);
735 if (wpsize == 0)
736 conv_error ();
738 /* Convert the number. */
739 ADDW ('\0');
740 if (flags & LONGDBL)
742 long double d = __strtold_internal (wp, &tw, flags & GROUP);
743 if (!(flags & SUPPRESS) && tw != wp)
744 *ARG (long double *) = negative ? -d : d;
746 else if (flags & LONG)
748 double d = __strtod_internal (wp, &tw, flags & GROUP);
749 if (!(flags & SUPPRESS) && tw != wp)
750 *ARG (double *) = negative ? -d : d;
752 else
754 float d = __strtof_internal (wp, &tw, flags & GROUP);
755 if (!(flags & SUPPRESS) && tw != wp)
756 *ARG (float *) = negative ? -d : d;
759 if (tw == wp)
760 conv_error ();
762 if (!(flags & SUPPRESS))
763 ++done;
764 break;
766 case '[': /* Character class. */
767 STRING_ARG;
769 c = inchar ();
770 if (c == EOF)
771 input_error ();
773 if (*f == '^')
775 ++f;
776 not_in = 1;
778 else
779 not_in = 0;
781 /* Fill WP with byte flags indexed by character.
782 We will use this flag map for matching input characters. */
783 if (wpmax < UCHAR_MAX)
785 wpmax = UCHAR_MAX;
786 wp = (char *) alloca (wpmax);
788 memset (wp, 0, UCHAR_MAX);
790 fc = *f;
791 if (fc == ']' || fc == '-')
793 /* If ] or - appears before any char in the set, it is not
794 the terminator or separator, but the first char in the
795 set. */
796 wp[fc] = 1;
797 ++f;
800 while ((fc = *f++) != '\0' && fc != ']')
802 if (fc == '-' && *f != '\0' && *f != ']' &&
803 (unsigned char) f[-2] <= (unsigned char) *f)
805 /* Add all characters from the one before the '-'
806 up to (but not including) the next format char. */
807 for (fc = f[-2]; fc < *f; ++fc)
808 wp[fc] = 1;
810 else
811 /* Add the character to the flag map. */
812 wp[fc] = 1;
814 if (fc == '\0')
816 ungetc (c, s);
817 conv_error();
820 num.ul = read_in - 1; /* -1 because we already read one char. */
823 if (wp[c] == not_in)
825 ungetc (c, s);
826 break;
828 STRING_ADD_CHAR (c);
829 if (width > 0)
830 --width;
832 while (width != 0 && inchar () != EOF);
834 if (read_in == num.ul)
835 conv_error ();
837 if (!(flags & SUPPRESS))
839 *str = '\0';
840 ++done;
842 break;
844 case 'p': /* Generic pointer. */
845 base = 16;
846 /* A PTR must be the same size as a `long int'. */
847 flags &= ~(SHORT|LONGDBL);
848 flags |= LONG;
849 number_signed = 0;
850 goto number;
854 /* The last thing we saw int the format string was a white space.
855 Consume the last white spaces. */
856 if (skip_space)
859 c = inchar ();
860 while (isspace (c));
861 ungetc (c, s);
864 /* Unlock stream. */
865 UNLOCK_STREAM;
867 return done;
870 #ifdef USE_IN_LIBIO
872 __vfscanf (FILE *s, const char *format, va_list argptr)
874 return _IO_vfscanf (s, format, argptr, NULL);
876 #endif
878 weak_alias (__vfscanf, vfscanf)