update from main archive 961030
[glibc.git] / stdio-common / vfscanf.c
blob28f8ceb820a481995adb3c7aeb3e5ccdadbf0479
1 /* Copyright (C) 1991, 92, 93, 94, 95, 96 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If
16 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
17 Cambridge, MA 02139, USA. */
19 #include "../locale/localeinfo.h"
20 #include <errno.h>
21 #include <limits.h>
22 #include <ctype.h>
23 #include <stdarg.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <libc-lock.h>
29 #ifdef __GNUC__
30 #define HAVE_LONGLONG
31 #define LONGLONG long long
32 #else
33 #define LONGLONG long
34 #endif
36 /* Those are flags in the conversion format. */
37 # define LONG 0x001 /* l: long or double */
38 # define LONGDBL 0x002 /* L: long long or long double */
39 # define SHORT 0x004 /* h: short */
40 # define SUPPRESS 0x008 /* *: suppress assignment */
41 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
42 # define NOSKIP 0x020 /* do not skip blanks */
43 # define WIDTH 0x040 /* width was given */
44 # define GROUP 0x080 /* ': group numbers */
45 # define MALLOC 0x100 /* a: malloc strings */
47 # define TYPEMOD (LONG|LONGDBL|SHORT)
50 #ifdef USE_IN_LIBIO
51 # include <libioP.h>
52 # include <libio.h>
54 # undef va_list
55 # define va_list _IO_va_list
56 # define ungetc(c, s) (--read_in, _IO_ungetc (c, s))
57 # define inchar() ((c = _IO_getc_unlocked (s)), (void) ++read_in, c)
58 # define conv_error() do { \
59 if (errp != NULL) *errp |= 2; \
60 _IO_funlockfile (s); \
61 return done; \
62 } while (0)
63 # define input_error() do { \
64 _IO_funlockfile (s); \
65 if (errp != NULL) *errp |= 1; \
66 return done ?: EOF; \
67 } while (0)
68 # define memory_error() do { \
69 _IO_funlockfile (s); \
70 __set_errno (ENOMEM); \
71 return EOF; \
72 } while (0)
73 # define ARGCHECK(s, format) \
74 do \
75 { \
76 /* Check file argument for consistence. */ \
77 CHECK_FILE (s, EOF); \
78 if (s->_flags & _IO_NO_READS || format == NULL) \
79 { \
80 MAYBE_SET_EINVAL; \
81 return EOF; \
82 } \
83 } while (0)
84 # define LOCK_STREAM(S) \
85 __libc_cleanup_region_start ((void (*) (void *)) &_IO_funlockfile, (S)); \
86 _IO_flockfile (S)
87 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
88 #else
89 # define ungetc(c, s) (--read_in, ungetc (c, s))
90 # define inchar() ((c = getc (s)), (void) ++read_in, c)
91 # define conv_error() do { \
92 funlockfile (s); \
93 return done; \
94 } while (0)
95 # define input_error() do { \
96 funlockfile (s); \
97 return done ?: EOF; \
98 } while (0)
99 # define memory_error() do { \
100 funlockfile (s); \
101 __set_errno (ENOMEM); \
102 return EOF; \
103 } while (0)
104 # define ARGCHECK(s, format) \
105 do \
107 /* Check file argument for consistence. */ \
108 if (!__validfp (s) || !s->__mode.__read || format == NULL) \
110 __set_errno (EINVAL); \
111 return EOF; \
113 } while (0)
114 #if 1
115 /* XXX For now !!! */
116 # define flockfile(S) /* nothing */
117 # define funlockfile(S) /* nothing */
118 # define LOCK_STREAM(S)
119 # define UNLOCK_STREAM
120 #else
121 # define LOCK_STREAM(S) \
122 __libc_cleanup_region_start (&__funlockfile, (S)); \
123 __flockfile (S)
124 # define UNLOCK_STREAM __libc_cleanup_region_end (1)
125 #endif
126 #endif
129 /* Read formatted input from S according to the format string
130 FORMAT, using the argument list in ARG.
131 Return the number of assignments made, or -1 for an input error. */
132 #ifdef USE_IN_LIBIO
134 _IO_vfscanf (s, format, argptr, errp)
135 _IO_FILE *s;
136 const char *format;
137 _IO_va_list argptr;
138 int *errp;
139 #else
141 __vfscanf (FILE *s, const char *format, va_list argptr)
142 #endif
144 va_list arg = (va_list) argptr;
146 register const char *f = format;
147 register unsigned char fc; /* Current character of the format. */
148 register size_t done = 0; /* Assignments done. */
149 register size_t read_in = 0; /* Chars read in. */
150 register int c; /* Last char read. */
151 register int width; /* Maximum field width. */
152 register int flags; /* Modifiers for current format element. */
154 /* Status for reading F-P nums. */
155 char got_dot, got_e, negative;
156 /* If a [...] is a [^...]. */
157 char not_in;
158 /* Base for integral numbers. */
159 int base;
160 /* Signedness for integral numbers. */
161 int number_signed;
162 /* Decimal point character. */
163 wchar_t decimal;
164 /* The thousands character of the current locale. */
165 wchar_t thousands;
166 /* Integral holding variables. */
167 union
169 long long int q;
170 unsigned long long int uq;
171 long int l;
172 unsigned long int ul;
173 } num;
174 /* Character-buffer pointer. */
175 char *str = NULL;
176 char **strptr = NULL;
177 size_t strsize = 0;
178 /* We must not react on white spaces immediately because they can
179 possibly be matched even if in the input stream no character is
180 available anymore. */
181 int skip_space = 0;
182 /* Workspace. */
183 char *tw; /* Temporary pointer. */
184 char *wp = NULL; /* Workspace. */
185 size_t wpmax = 0; /* Maximal size of workspace. */
186 size_t wpsize; /* Currently used bytes in workspace. */
187 #define ADDW(Ch) \
188 do \
190 if (wpsize == wpmax) \
192 char *old = wp; \
193 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
194 wp = (char *) alloca (wpmax); \
195 if (old != NULL) \
196 memcpy (wp, old, wpsize); \
198 wp[wpsize++] = (Ch); \
200 while (0)
202 ARGCHECK (s, format);
204 /* Figure out the decimal point character. */
205 if (mbtowc (&decimal, _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT),
206 strlen (_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT))) <= 0)
207 decimal = (wchar_t) *_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT);
208 /* Figure out the thousands separator character. */
209 if (mbtowc (&thousands, _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP),
210 strlen (_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP))) <= 0)
211 thousands = (wchar_t) *_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
213 /* Lock the stream. */
214 LOCK_STREAM (s);
216 /* Run through the format string. */
217 while (*f != '\0')
219 unsigned int argpos;
220 /* Extract the next argument, which is of type TYPE.
221 For a %N$... spec, this is the Nth argument from the beginning;
222 otherwise it is the next argument after the state now in ARG. */
223 #if 0
224 /* XXX Possible optimization. */
225 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
226 ({ va_list arg = (va_list) argptr; \
227 arg = (va_list) ((char *) arg \
228 + (argpos - 1) \
229 * __va_rounded_size (void *)); \
230 va_arg (arg, type); \
232 #else
233 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
234 ({ unsigned int pos = argpos; \
235 va_list arg = (va_list) argptr; \
236 while (--pos > 0) \
237 (void) va_arg (arg, void *); \
238 va_arg (arg, type); \
240 #endif
242 if (!isascii (*f))
244 /* Non-ASCII, may be a multibyte. */
245 int len = mblen (f, strlen (f));
246 if (len > 0)
250 c = inchar ();
251 if (c == EOF)
252 input_error ();
253 else if (c != *f++)
255 ungetc (c, s);
256 conv_error ();
259 while (--len > 0);
260 continue;
264 fc = *f++;
265 if (fc != '%')
267 /* Remember to skip spaces. */
268 if (isspace (fc))
270 skip_space = 1;
271 continue;
274 /* Read a character. */
275 c = inchar ();
277 /* Characters other than format specs must just match. */
278 if (c == EOF)
279 input_error ();
281 /* We saw white space char as the last character in the format
282 string. Now it's time to skip all leading white space. */
283 if (skip_space)
285 while (isspace (c))
286 (void) inchar ();
287 skip_space = 0;
290 if (c != fc)
292 ungetc (c, s);
293 conv_error ();
296 continue;
299 /* This is the start of the conversion string. */
300 flags = 0;
302 /* Initialize state of modifiers. */
303 argpos = 0;
305 /* Prepare temporary buffer. */
306 wpsize = 0;
308 /* Check for a positional parameter specification. */
309 if (isdigit (*f))
311 argpos = *f++ - '0';
312 while (isdigit (*f))
313 argpos = argpos * 10 + (*f++ - '0');
314 if (*f == '$')
315 ++f;
316 else
318 /* Oops; that was actually the field width. */
319 width = argpos;
320 flags |= WIDTH;
321 argpos = 0;
322 goto got_width;
326 /* Check for the assignment-suppressant and the number grouping flag. */
327 while (*f == '*' || *f == '\'')
328 switch (*f++)
330 case '*':
331 flags |= SUPPRESS;
332 break;
333 case '\'':
334 flags |= GROUP;
335 break;
338 /* We have seen width. */
339 if (isdigit (*f))
340 flags |= WIDTH;
342 /* Find the maximum field width. */
343 width = 0;
344 while (isdigit (*f))
346 width *= 10;
347 width += *f++ - '0';
349 got_width:
350 if (width == 0)
351 width = -1;
353 /* Check for type modifiers. */
354 while (*f == 'h' || *f == 'l' || *f == 'L' || *f == 'a' || *f == 'q')
355 switch (*f++)
357 case 'h':
358 /* int's are short int's. */
359 if (flags & TYPEMOD)
360 /* Signal illegal format element. */
361 conv_error ();
362 flags |= SHORT;
363 break;
364 case 'l':
365 if (flags & (SHORT|LONGDBL))
366 conv_error ();
367 else if (flags & LONG)
369 /* A double `l' is equivalent to an `L'. */
370 flags &= ~LONG;
371 flags |= LONGDBL;
373 else
374 /* int's are long int's. */
375 flags |= LONG;
376 break;
377 case 'q':
378 case 'L':
379 /* double's are long double's, and int's are long long int's. */
380 if (flags & TYPEMOD)
381 /* Signal illegal format element. */
382 conv_error ();
383 flags |= LONGDBL;
384 break;
385 case 'a':
386 if (flags & TYPEMOD)
387 /* Signal illegal format element. */
388 conv_error ();
389 /* String conversions (%s, %[) take a `char **'
390 arg and fill it in with a malloc'd pointer. */
391 flags |= MALLOC;
392 break;
395 /* End of the format string? */
396 if (*f == '\0')
397 conv_error ();
399 /* Find the conversion specifier. */
400 fc = *f++;
401 if (skip_space || (fc != '[' && fc != 'c' && fc != 'n'))
403 /* Eat whitespace. */
405 (void) inchar ();
406 while (isspace (c));
407 ungetc (c, s);
408 skip_space = 0;
411 switch (fc)
413 case '%': /* Must match a literal '%'. */
414 c = inchar ();
415 if (c != fc)
417 ungetc (c, s);
418 conv_error ();
420 break;
422 case 'n': /* Answer number of assignments done. */
423 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
424 with the 'n' conversion specifier. */
425 if (!(flags & SUPPRESS))
426 /* Don't count the read-ahead. */
427 if (flags & LONGDBL)
428 *ARG (long long int *) = read_in;
429 else if (flags & LONG)
430 *ARG (long int *) = read_in;
431 else if (flags & SHORT)
432 *ARG (short int *) = read_in;
433 else
434 *ARG (int *) = read_in;
435 break;
437 case 'c': /* Match characters. */
438 if (!(flags & SUPPRESS))
440 str = ARG (char *);
441 if (str == NULL)
442 conv_error ();
445 c = inchar ();
446 if (c == EOF)
447 input_error ();
449 if (width == -1)
450 width = 1;
452 if (!(flags & SUPPRESS))
455 *str++ = c;
456 while (--width > 0 && inchar () != EOF);
458 else
459 while (--width > 0 && inchar () != EOF);
461 if (width > 0)
462 /* I.e., EOF was read. */
463 --read_in;
465 if (!(flags & SUPPRESS))
466 ++done;
468 break;
470 case 's': /* Read a string. */
471 #define STRING_ARG \
472 if (!(flags & SUPPRESS)) \
474 if (flags & MALLOC) \
476 /* The string is to be stored in a malloc'd buffer. */ \
477 strptr = ARG (char **); \
478 if (strptr == NULL) \
479 conv_error (); \
480 /* Allocate an initial buffer. */ \
481 strsize = 100; \
482 *strptr = str = malloc (strsize); \
484 else \
485 str = ARG (char *); \
486 if (str == NULL) \
487 conv_error (); \
489 STRING_ARG;
491 c = inchar ();
492 if (c == EOF)
493 input_error ();
497 if (isspace (c))
499 ungetc (c, s);
500 break;
502 #define STRING_ADD_CHAR(c) \
503 if (!(flags & SUPPRESS)) \
505 *str++ = c; \
506 if ((flags & MALLOC) && str == *strptr + strsize) \
508 /* Enlarge the buffer. */ \
509 str = realloc (*strptr, strsize * 2); \
510 if (str == NULL) \
512 /* Can't allocate that much. Last-ditch effort. */\
513 str = realloc (*strptr, strsize + 1); \
514 if (str == NULL) \
516 /* We lose. Oh well. \
517 Terminate the string and stop converting, \
518 so at least we don't skip any input. */ \
519 (*strptr)[strsize] = '\0'; \
520 ++done; \
521 conv_error (); \
523 else \
525 *strptr = str; \
526 str += strsize; \
527 ++strsize; \
530 else \
532 *strptr = str; \
533 str += strsize; \
534 strsize *= 2; \
538 STRING_ADD_CHAR (c);
539 } while ((width <= 0 || --width > 0) && inchar () != EOF);
541 if (!(flags & SUPPRESS))
543 *str = '\0';
544 ++done;
546 break;
548 case 'x': /* Hexadecimal integer. */
549 case 'X': /* Ditto. */
550 base = 16;
551 number_signed = 0;
552 goto number;
554 case 'o': /* Octal integer. */
555 base = 8;
556 number_signed = 0;
557 goto number;
559 case 'u': /* Unsigned decimal integer. */
560 base = 10;
561 number_signed = 0;
562 goto number;
564 case 'd': /* Signed decimal integer. */
565 base = 10;
566 number_signed = 1;
567 goto number;
569 case 'i': /* Generic number. */
570 base = 0;
571 number_signed = 1;
573 number:
574 c = inchar ();
575 if (c == EOF)
576 input_error ();
578 /* Check for a sign. */
579 if (c == '-' || c == '+')
581 ADDW (c);
582 if (width > 0)
583 --width;
584 c = inchar ();
587 /* Look for a leading indication of base. */
588 if (width != 0 && c == '0')
590 if (width > 0)
591 --width;
593 ADDW (c);
594 c = inchar ();
596 if (width != 0 && tolower (c) == 'x')
598 if (base == 0)
599 base = 16;
600 if (base == 16)
602 if (width > 0)
603 --width;
604 c = inchar ();
607 else if (base == 0)
608 base = 8;
611 if (base == 0)
612 base = 10;
614 /* Read the number into workspace. */
615 while (c != EOF && width != 0)
617 if (base == 16 ? !isxdigit (c) :
618 ((!isdigit (c) || c - '0' >= base) &&
619 !((flags & GROUP) && base == 10 && c == thousands)))
620 break;
621 ADDW (c);
622 if (width > 0)
623 --width;
625 c = inchar ();
628 /* The just read character is not part of the number anymore. */
629 ungetc (c, s);
631 if (wpsize == 0 ||
632 (wpsize == 1 && (wp[0] == '+' || wp[0] == '-')))
633 /* There was no number. */
634 conv_error ();
636 /* Convert the number. */
637 ADDW ('\0');
638 if (flags & LONGDBL)
640 if (number_signed)
641 num.q = __strtoq_internal (wp, &tw, base, flags & GROUP);
642 else
643 num.uq = __strtouq_internal (wp, &tw, base, flags & GROUP);
645 else
647 if (number_signed)
648 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
649 else
650 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
652 if (wp == tw)
653 conv_error ();
655 if (!(flags & SUPPRESS))
657 if (! number_signed)
659 if (flags & LONGDBL)
660 *ARG (unsigned LONGLONG int *) = num.uq;
661 else if (flags & LONG)
662 *ARG (unsigned long int *) = num.ul;
663 else if (flags & SHORT)
664 *ARG (unsigned short int *)
665 = (unsigned short int) num.ul;
666 else
667 *ARG (unsigned int *) = (unsigned int) num.ul;
669 else
671 if (flags & LONGDBL)
672 *ARG (LONGLONG int *) = num.q;
673 else if (flags & LONG)
674 *ARG (long int *) = num.l;
675 else if (flags & SHORT)
676 *ARG (short int *) = (short int) num.l;
677 else
678 *ARG (int *) = (int) num.l;
680 ++done;
682 break;
684 case 'e': /* Floating-point numbers. */
685 case 'E':
686 case 'f':
687 case 'g':
688 case 'G':
689 c = inchar ();
690 if (c == EOF)
691 input_error ();
693 /* Check for a sign. */
694 if (c == '-' || c == '+')
696 negative = c == '-';
697 if (inchar () == EOF)
698 /* EOF is only an input error before we read any chars. */
699 conv_error ();
700 if (width > 0)
701 --width;
703 else
704 negative = 0;
706 got_dot = got_e = 0;
709 if (isdigit (c))
710 ADDW (c);
711 else if (got_e && wp[wpsize - 1] == 'e'
712 && (c == '-' || c == '+'))
713 ADDW (c);
714 else if (wpsize > 0 && !got_e && tolower (c) == 'e')
716 ADDW ('e');
717 got_e = got_dot = 1;
719 else if (c == decimal && !got_dot)
721 ADDW (c);
722 got_dot = 1;
724 else if ((flags & GROUP) && c == thousands && !got_dot)
725 ADDW (c);
726 else
727 break;
728 if (width > 0)
729 --width;
731 while (inchar () != EOF && width != 0);
733 /* The last read character is not part of the number anymore. */
734 ungetc (c, s);
736 if (wpsize == 0)
737 conv_error ();
739 /* Convert the number. */
740 ADDW ('\0');
741 if (flags & LONGDBL)
743 long double d = __strtold_internal (wp, &tw, flags & GROUP);
744 if (!(flags & SUPPRESS) && tw != wp)
745 *ARG (long double *) = negative ? -d : d;
747 else if (flags & LONG)
749 double d = __strtod_internal (wp, &tw, flags & GROUP);
750 if (!(flags & SUPPRESS) && tw != wp)
751 *ARG (double *) = negative ? -d : d;
753 else
755 float d = __strtof_internal (wp, &tw, flags & GROUP);
756 if (!(flags & SUPPRESS) && tw != wp)
757 *ARG (float *) = negative ? -d : d;
760 if (tw == wp)
761 conv_error ();
763 if (!(flags & SUPPRESS))
764 ++done;
765 break;
767 case '[': /* Character class. */
768 STRING_ARG;
770 c = inchar ();
771 if (c == EOF)
772 input_error ();
774 if (*f == '^')
776 ++f;
777 not_in = 1;
779 else
780 not_in = 0;
782 /* Fill WP with byte flags indexed by character.
783 We will use this flag map for matching input characters. */
784 if (wpmax < UCHAR_MAX)
786 wpmax = UCHAR_MAX;
787 wp = (char *) alloca (wpmax);
789 memset (wp, 0, UCHAR_MAX);
791 fc = *f;
792 if (fc == ']' || fc == '-')
794 /* If ] or - appears before any char in the set, it is not
795 the terminator or separator, but the first char in the
796 set. */
797 wp[fc] = 1;
798 ++f;
801 while ((fc = *f++) != '\0' && fc != ']')
803 if (fc == '-' && *f != '\0' && *f != ']' &&
804 (unsigned char) f[-2] <= (unsigned char) *f)
806 /* Add all characters from the one before the '-'
807 up to (but not including) the next format char. */
808 for (fc = f[-2]; fc < *f; ++fc)
809 wp[fc] = 1;
811 else
812 /* Add the character to the flag map. */
813 wp[fc] = 1;
815 if (fc == '\0')
817 ungetc (c, s);
818 conv_error();
821 num.ul = read_in - 1; /* -1 because we already read one char. */
824 if (wp[c] == not_in)
826 ungetc (c, s);
827 break;
829 STRING_ADD_CHAR (c);
830 if (width > 0)
831 --width;
833 while (width != 0 && inchar () != EOF);
835 if (read_in == num.ul)
836 conv_error ();
838 if (!(flags & SUPPRESS))
840 *str = '\0';
841 ++done;
843 break;
845 case 'p': /* Generic pointer. */
846 base = 16;
847 /* A PTR must be the same size as a `long int'. */
848 flags &= ~(SHORT|LONGDBL);
849 flags |= LONG;
850 number_signed = 0;
851 goto number;
855 /* The last thing we saw int the format string was a white space.
856 Consume the last white spaces. */
857 if (skip_space)
860 c = inchar ();
861 while (isspace (c));
862 ungetc (c, s);
865 /* Unlock stream. */
866 UNLOCK_STREAM;
868 return done;
871 #ifdef USE_IN_LIBIO
873 __vfscanf (FILE *s, const char *format, va_list argptr)
875 return _IO_vfscanf (s, format, argptr, NULL);
877 #endif
879 weak_alias (__vfscanf, vfscanf)