update from main archive 960911
[glibc.git] / stdio-common / vfscanf.c
blob903f5849cc8258db978028e6c4eee769e2441d81
1 /* Copyright (C) 1991, 92, 93, 94, 95, 96 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If
16 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
17 Cambridge, MA 02139, USA. */
19 #include "../locale/localeinfo.h"
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
28 #ifdef __GNUC__
29 #define HAVE_LONGLONG
30 #define LONGLONG long long
31 #else
32 #define LONGLONG long
33 #endif
35 /* Those are flags in the conversion format. */
36 # define LONG 0x001 /* l: long or double */
37 # define LONGDBL 0x002 /* L: long long or long double */
38 # define SHORT 0x004 /* h: short */
39 # define SUPPRESS 0x008 /* *: suppress assignment */
40 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
41 # define NOSKIP 0x020 /* do not skip blanks */
42 # define WIDTH 0x040 /* width was given */
43 # define GROUP 0x080 /* ': group numbers */
44 # define MALLOC 0x100 /* a: malloc strings */
46 # define TYPEMOD (LONG|LONGDBL|SHORT)
49 #ifdef USE_IN_LIBIO
50 # include <libioP.h>
51 # include <libio.h>
53 # define va_list _IO_va_list
54 # define ungetc(c, s) (--read_in, _IO_ungetc (c, s))
55 # define inchar() ((c = _IO_getc_unlocked (s)), (void) ++read_in, c)
56 # define conv_error() do { \
57 if (errp != NULL) *errp |= 2; \
58 _IO_funlockfile (s); \
59 return done; \
60 } while (0)
61 # define input_error() do { \
62 _IO_funlockfile (s); \
63 if (errp != NULL) *errp |= 1; \
64 return done ?: EOF; \
65 } while (0)
66 # define memory_error() do { \
67 _IO_funlockfile (s); \
68 errno = ENOMEM; \
69 return EOF; \
70 } while (0)
71 # define ARGCHECK(s, format) \
72 do \
73 { \
74 /* Check file argument for consistence. */ \
75 CHECK_FILE (s, EOF); \
76 if (s->_flags & _IO_NO_READS || format == NULL) \
77 { \
78 MAYBE_SET_EINVAL; \
79 return EOF; \
80 } \
81 } while (0)
82 # define LOCK_STREAM(S) \
83 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
84 _IO_flockfile (S)
85 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
86 #else
87 # define ungetc(c, s) (--read_in, ungetc (c, s))
88 # define inchar() ((c = getc (s)), (void) ++read_in, c)
89 # define conv_error() do { \
90 funlockfile (s); \
91 return done; \
92 } while (0)
93 # define input_error() do { \
94 funlockfile (s); \
95 return done ?: EOF; \
96 } while (0)
97 # define memory_error() do { \
98 funlockfile (s); \
99 errno = ENOMEM; \
100 return EOF; \
101 } while (0)
102 # define ARGCHECK(s, format) \
103 do \
105 /* Check file argument for consistence. */ \
106 if (!__validfp (s) || !s->__mode.__read || format == NULL) \
108 errno = EINVAL; \
109 return EOF; \
111 } while (0)
112 #if 1
113 /* XXX For now !!! */
114 # define flockfile(S) /* nothing */
115 # define funlockfile(S) /* nothing */
116 # define LOCK_STREAM(S)
117 # define UNLOCK_STREAM
118 #else
119 # define LOCK_STREAM(S) \
120 __libc_cleanup_region_start (&__funlockfile, (S)); \
121 __flockfile (S)
122 # define UNLOCK_STREAM __libc_cleanup_region_start (1)
123 #endif
124 #endif
127 /* Read formatted input from S according to the format string
128 FORMAT, using the argument list in ARG.
129 Return the number of assignments made, or -1 for an input error. */
130 #ifdef USE_IN_LIBIO
132 _IO_vfscanf (s, format, argptr, errp)
133 _IO_FILE *s;
134 const char *format;
135 _IO_va_list argptr;
136 int *errp;
137 #else
139 __vfscanf (FILE *s, const char *format, va_list argptr)
140 #endif
142 va_list arg = (va_list) argptr;
144 register const char *f = format;
145 register unsigned char fc; /* Current character of the format. */
146 register size_t done = 0; /* Assignments done. */
147 register size_t read_in = 0; /* Chars read in. */
148 register int c; /* Last char read. */
149 register int width; /* Maximum field width. */
150 register int flags; /* Modifiers for current format element. */
152 /* Status for reading F-P nums. */
153 char got_dot, got_e, negative;
154 /* If a [...] is a [^...]. */
155 char not_in;
156 /* Base for integral numbers. */
157 int base;
158 /* Signedness for integral numbers. */
159 int number_signed;
160 /* Decimal point character. */
161 wchar_t decimal;
162 /* The thousands character of the current locale. */
163 wchar_t thousands;
164 /* Integral holding variables. */
165 union
167 long long int q;
168 unsigned long long int uq;
169 long int l;
170 unsigned long int ul;
171 } num;
172 /* Character-buffer pointer. */
173 register char *str, **strptr;
174 size_t strsize;
175 /* We must not react on white spaces immediately because they can
176 possibly be matched even if in the input stream no character is
177 available anymore. */
178 int skip_space = 0;
179 /* Workspace. */
180 char *tw; /* Temporary pointer. */
181 char *wp = NULL; /* Workspace. */
182 size_t wpmax = 0; /* Maximal size of workspace. */
183 size_t wpsize; /* Currently used bytes in workspace. */
184 #define ADDW(Ch) \
185 do \
187 if (wpsize == wpmax) \
189 char *old = wp; \
190 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
191 wp = (char *) alloca (wpmax); \
192 if (old != NULL) \
193 memcpy (wp, old, wpsize); \
195 wp[wpsize++] = (Ch); \
197 while (0)
199 ARGCHECK (s, format);
201 /* Figure out the decimal point character. */
202 if (mbtowc (&decimal, _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT),
203 strlen (_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT))) <= 0)
204 decimal = (wchar_t) *_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT);
205 /* Figure out the thousands separator character. */
206 if (mbtowc (&thousands, _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP),
207 strlen (_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP))) <= 0)
208 thousands = (wchar_t) *_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
210 /* Lock the stream. */
211 LOCK_STREAM (s);
213 /* Run through the format string. */
214 while (*f != '\0')
216 unsigned int argpos;
217 /* Extract the next argument, which is of type TYPE.
218 For a %N$... spec, this is the Nth argument from the beginning;
219 otherwise it is the next argument after the state now in ARG. */
220 #if 0
221 /* XXX Possible optimization. */
222 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
223 ({ va_list arg = (va_list) argptr; \
224 arg = (va_list) ((char *) arg \
225 + (argpos - 1) \
226 * __va_rounded_size (void *)); \
227 va_arg (arg, type); \
229 #else
230 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
231 ({ unsigned int pos = argpos; \
232 va_list arg = (va_list) argptr; \
233 while (--pos > 0) \
234 (void) va_arg (arg, void *); \
235 va_arg (arg, type); \
237 #endif
239 if (!isascii (*f))
241 /* Non-ASCII, may be a multibyte. */
242 int len = mblen (f, strlen (f));
243 if (len > 0)
247 c = inchar ();
248 if (c == EOF)
249 input_error ();
250 else if (c != *f++)
252 ungetc (c, s);
253 conv_error ();
256 while (--len > 0);
257 continue;
261 fc = *f++;
262 if (fc != '%')
264 /* Remember to skip spaces. */
265 if (isspace (fc))
267 skip_space = 1;
268 continue;
271 /* Read a character. */
272 c = inchar ();
274 /* Characters other than format specs must just match. */
275 if (c == EOF)
276 input_error ();
278 /* We saw white space char as the last character in the format
279 string. Now it's time to skip all leading white space. */
280 if (skip_space)
282 while (isspace (c))
283 (void) inchar ();
284 skip_space = 0;
287 if (c != fc)
289 ungetc (c, s);
290 conv_error ();
293 continue;
296 /* This is the start of the conversion string. */
297 flags = 0;
299 /* Initialize state of modifiers. */
300 argpos = 0;
302 /* Prepare temporary buffer. */
303 wpsize = 0;
305 /* Check for a positional parameter specification. */
306 if (isdigit (*f))
308 argpos = *f++ - '0';
309 while (isdigit (*f))
310 argpos = argpos * 10 + (*f++ - '0');
311 if (*f == '$')
312 ++f;
313 else
315 /* Oops; that was actually the field width. */
316 width = argpos;
317 flags |= WIDTH;
318 argpos = 0;
319 goto got_width;
323 /* Check for the assignment-suppressant and the number grouping flag. */
324 while (*f == '*' || *f == '\'')
325 switch (*f++)
327 case '*':
328 flags |= SUPPRESS;
329 break;
330 case '\'':
331 flags |= GROUP;
332 break;
335 /* We have seen width. */
336 if (isdigit (*f))
337 flags |= WIDTH;
339 /* Find the maximum field width. */
340 width = 0;
341 while (isdigit (*f))
343 width *= 10;
344 width += *f++ - '0';
346 got_width:
347 if (width == 0)
348 width = -1;
350 /* Check for type modifiers. */
351 while (*f == 'h' || *f == 'l' || *f == 'L' || *f == 'a' || *f == 'q')
352 switch (*f++)
354 case 'h':
355 /* int's are short int's. */
356 if (flags & TYPEMOD)
357 /* Signal illegal format element. */
358 conv_error ();
359 flags |= SHORT;
360 break;
361 case 'l':
362 if (flags & (SHORT|LONGDBL))
363 conv_error ();
364 else if (flags & LONG)
366 /* A double `l' is equivalent to an `L'. */
367 flags &= ~LONG;
368 flags |= LONGDBL;
370 else
371 /* int's are long int's. */
372 flags |= LONG;
373 break;
374 case 'q':
375 case 'L':
376 /* double's are long double's, and int's are long long int's. */
377 if (flags & TYPEMOD)
378 /* Signal illegal format element. */
379 conv_error ();
380 flags |= LONGDBL;
381 break;
382 case 'a':
383 if (flags & TYPEMOD)
384 /* Signal illegal format element. */
385 conv_error ();
386 /* String conversions (%s, %[) take a `char **'
387 arg and fill it in with a malloc'd pointer. */
388 flags |= MALLOC;
389 break;
392 /* End of the format string? */
393 if (*f == '\0')
394 conv_error ();
396 /* Find the conversion specifier. */
397 fc = *f++;
398 if (skip_space || (fc != '[' && fc != 'c' && fc != 'n'))
400 /* Eat whitespace. */
402 (void) inchar ();
403 while (isspace (c));
404 ungetc (c, s);
405 skip_space = 0;
408 switch (fc)
410 case '%': /* Must match a literal '%'. */
411 c = inchar ();
412 if (c != fc)
414 ungetc (c, s);
415 conv_error ();
417 break;
419 case 'n': /* Answer number of assignments done. */
420 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
421 with the 'n' conversion specifier. */
422 if (!(flags & SUPPRESS))
423 /* Don't count the read-ahead. */
424 if (flags & LONGDBL)
425 *ARG (long long int *) = read_in;
426 else if (flags & LONG)
427 *ARG (long int *) = read_in;
428 else if (flags & SHORT)
429 *ARG (short int *) = read_in;
430 else
431 *ARG (int *) = read_in;
432 break;
434 case 'c': /* Match characters. */
435 if (!(flags & SUPPRESS))
437 str = ARG (char *);
438 if (str == NULL)
439 conv_error ();
442 c = inchar ();
443 if (c == EOF)
444 input_error ();
446 if (width == -1)
447 width = 1;
449 if (!(flags & SUPPRESS))
452 *str++ = c;
453 while (--width > 0 && inchar () != EOF);
455 else
456 while (--width > 0 && inchar () != EOF);
458 if (width > 0)
459 /* I.e., EOF was read. */
460 --read_in;
462 if (!(flags & SUPPRESS))
463 ++done;
465 break;
467 case 's': /* Read a string. */
468 #define STRING_ARG \
469 if (!(flags & SUPPRESS)) \
471 if (flags & MALLOC) \
473 /* The string is to be stored in a malloc'd buffer. */ \
474 strptr = ARG (char **); \
475 if (strptr == NULL) \
476 conv_error (); \
477 /* Allocate an initial buffer. */ \
478 strsize = 100; \
479 *strptr = str = malloc (strsize); \
481 else \
482 str = ARG (char *); \
483 if (str == NULL) \
484 conv_error (); \
486 STRING_ARG;
488 c = inchar ();
489 if (c == EOF)
490 input_error ();
494 if (isspace (c))
496 ungetc (c, s);
497 break;
499 #define STRING_ADD_CHAR(c) \
500 if (!(flags & SUPPRESS)) \
502 *str++ = c; \
503 if ((flags & MALLOC) && str == *strptr + strsize) \
505 /* Enlarge the buffer. */ \
506 str = realloc (*strptr, strsize * 2); \
507 if (str == NULL) \
509 /* Can't allocate that much. Last-ditch effort. */\
510 str = realloc (*strptr, strsize + 1); \
511 if (str == NULL) \
513 /* We lose. Oh well. \
514 Terminate the string and stop converting, \
515 so at least we don't skip any input. */ \
516 (*strptr)[strsize] = '\0'; \
517 ++done; \
518 conv_error (); \
520 else \
522 *strptr = str; \
523 str += strsize; \
524 ++strsize; \
527 else \
529 *strptr = str; \
530 str += strsize; \
531 strsize *= 2; \
535 STRING_ADD_CHAR (c);
536 } while ((width <= 0 || --width > 0) && inchar () != EOF);
538 if (!(flags & SUPPRESS))
540 *str = '\0';
541 ++done;
543 break;
545 case 'x': /* Hexadecimal integer. */
546 case 'X': /* Ditto. */
547 base = 16;
548 number_signed = 0;
549 goto number;
551 case 'o': /* Octal integer. */
552 base = 8;
553 number_signed = 0;
554 goto number;
556 case 'u': /* Unsigned decimal integer. */
557 base = 10;
558 number_signed = 0;
559 goto number;
561 case 'd': /* Signed decimal integer. */
562 base = 10;
563 number_signed = 1;
564 goto number;
566 case 'i': /* Generic number. */
567 base = 0;
568 number_signed = 1;
570 number:
571 c = inchar ();
572 if (c == EOF)
573 input_error ();
575 /* Check for a sign. */
576 if (c == '-' || c == '+')
578 ADDW (c);
579 if (width > 0)
580 --width;
581 c = inchar ();
584 /* Look for a leading indication of base. */
585 if (width != 0 && c == '0')
587 if (width > 0)
588 --width;
590 ADDW (c);
591 c = inchar ();
593 if (width != 0 && tolower (c) == 'x')
595 if (base == 0)
596 base = 16;
597 if (base == 16)
599 if (width > 0)
600 --width;
601 c = inchar ();
604 else if (base == 0)
605 base = 8;
608 if (base == 0)
609 base = 10;
611 /* Read the number into workspace. */
612 while (c != EOF && width != 0)
614 if (base == 16 ? !isxdigit (c) :
615 ((!isdigit (c) || c - '0' >= base) &&
616 !((flags & GROUP) && base == 10 && c == thousands)))
617 break;
618 ADDW (c);
619 if (width > 0)
620 --width;
622 c = inchar ();
625 /* The just read character is not part of the number anymore. */
626 ungetc (c, s);
628 if (wpsize == 0 ||
629 (wpsize == 1 && (wp[0] == '+' || wp[0] == '-')))
630 /* There was no number. */
631 conv_error ();
633 /* Convert the number. */
634 ADDW ('\0');
635 if (flags & LONGDBL)
637 if (number_signed)
638 num.q = __strtoq_internal (wp, &tw, base, flags & GROUP);
639 else
640 num.uq = __strtouq_internal (wp, &tw, base, flags & GROUP);
642 else
644 if (number_signed)
645 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
646 else
647 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
649 if (wp == tw)
650 conv_error ();
652 if (!(flags & SUPPRESS))
654 if (! number_signed)
656 if (flags & LONGDBL)
657 *ARG (unsigned LONGLONG int *) = num.uq;
658 else if (flags & LONG)
659 *ARG (unsigned long int *) = num.ul;
660 else if (flags & SHORT)
661 *ARG (unsigned short int *)
662 = (unsigned short int) num.ul;
663 else
664 *ARG (unsigned int *) = (unsigned int) num.ul;
666 else
668 if (flags & LONGDBL)
669 *ARG (LONGLONG int *) = num.q;
670 else if (flags & LONG)
671 *ARG (long int *) = num.l;
672 else if (flags & SHORT)
673 *ARG (short int *) = (short int) num.l;
674 else
675 *ARG (int *) = (int) num.l;
677 ++done;
679 break;
681 case 'e': /* Floating-point numbers. */
682 case 'E':
683 case 'f':
684 case 'g':
685 case 'G':
686 c = inchar ();
687 if (c == EOF)
688 input_error ();
690 /* Check for a sign. */
691 if (c == '-' || c == '+')
693 negative = c == '-';
694 if (inchar () == EOF)
695 /* EOF is only an input error before we read any chars. */
696 conv_error ();
697 if (width > 0)
698 --width;
700 else
701 negative = 0;
703 got_dot = got_e = 0;
706 if (isdigit (c))
707 ADDW (c);
708 else if (got_e && wp[wpsize - 1] == 'e'
709 && (c == '-' || c == '+'))
710 ADDW (c);
711 else if (wpsize > 0 && !got_e && tolower (c) == 'e')
713 ADDW ('e');
714 got_e = got_dot = 1;
716 else if (c == decimal && !got_dot)
718 ADDW (c);
719 got_dot = 1;
721 else if ((flags & GROUP) && c == thousands && !got_dot)
722 ADDW (c);
723 else
724 break;
725 if (width > 0)
726 --width;
728 while (inchar () != EOF && width != 0);
730 /* The last read character is not part of the number anymore. */
731 ungetc (c, s);
733 if (wpsize == 0)
734 conv_error ();
736 /* Convert the number. */
737 ADDW ('\0');
738 if (flags & LONGDBL)
740 long double d = __strtold_internal (wp, &tw, flags & GROUP);
741 if (!(flags & SUPPRESS) && tw != wp)
742 *ARG (long double *) = negative ? -d : d;
744 else if (flags & LONG)
746 double d = __strtod_internal (wp, &tw, flags & GROUP);
747 if (!(flags & SUPPRESS) && tw != wp)
748 *ARG (double *) = negative ? -d : d;
750 else
752 float d = __strtof_internal (wp, &tw, flags & GROUP);
753 if (!(flags & SUPPRESS) && tw != wp)
754 *ARG (float *) = negative ? -d : d;
757 if (tw == wp)
758 conv_error ();
760 if (!(flags & SUPPRESS))
761 ++done;
762 break;
764 case '[': /* Character class. */
765 STRING_ARG;
767 c = inchar ();
768 if (c == EOF)
769 input_error ();
771 if (*f == '^')
773 ++f;
774 not_in = 1;
776 else
777 not_in = 0;
779 /* Fill WP with byte flags indexed by character.
780 We will use this flag map for matching input characters. */
781 if (wpmax < UCHAR_MAX)
783 wpmax = UCHAR_MAX;
784 wp = (char *) alloca (wpmax);
786 memset (wp, 0, UCHAR_MAX);
788 fc = *f;
789 if (fc == ']' || fc == '-')
791 /* If ] or - appears before any char in the set, it is not
792 the terminator or separator, but the first char in the
793 set. */
794 wp[fc] = 1;
795 ++f;
798 while ((fc = *f++) != '\0' && fc != ']')
800 if (fc == '-' && *f != '\0' && *f != ']' &&
801 (unsigned char) f[-2] <= (unsigned char) *f)
803 /* Add all characters from the one before the '-'
804 up to (but not including) the next format char. */
805 for (fc = f[-2]; fc < *f; ++fc)
806 wp[fc] = 1;
808 else
809 /* Add the character to the flag map. */
810 wp[fc] = 1;
812 if (fc == '\0')
814 ungetc (c, s);
815 conv_error();
818 num.ul = read_in - 1; /* -1 because we already read one char. */
821 if (wp[c] == not_in)
823 ungetc (c, s);
824 break;
826 STRING_ADD_CHAR (c);
827 if (width > 0)
828 --width;
830 while (width != 0 && inchar () != EOF);
832 if (read_in == num.ul)
833 conv_error ();
835 if (!(flags & SUPPRESS))
837 *str = '\0';
838 ++done;
840 break;
842 case 'p': /* Generic pointer. */
843 base = 16;
844 /* A PTR must be the same size as a `long int'. */
845 flags &= ~(SHORT|LONGDBL);
846 flags |= LONG;
847 number_signed = 0;
848 goto number;
852 /* The last thing we saw int the format string was a white space.
853 Consume the last white spaces. */
854 if (skip_space)
857 c = inchar ();
858 while (isspace (c));
859 ungetc (c, s);
862 /* Unlock stream. */
863 UNLOCK_STREAM;
865 return done;
868 #ifdef USE_IN_LIBIO
870 __vfscanf (FILE *s, const char *format, va_list argptr)
872 return _IO_vfscanf (s, format, argptr, NULL);
874 #endif
876 weak_alias (__vfscanf, vfscanf)