Sun May 12 11:16:58 1996 Roland McGrath <roland@delasyd.gnu.ai.mit.edu>
[glibc.git] / stdio-common / vfscanf.c
blob9cdad7bb8b47ad0fe0afc5fdeeab45329ddcee53
1 /* Copyright (C) 1991, 92, 93, 94, 95, 96 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If
16 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
17 Cambridge, MA 02139, USA. */
19 #include <ansidecl.h>
20 #include "../locale/localeinfo.h"
21 #include <errno.h>
22 #include <limits.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
29 #ifdef __GNUC__
30 #define HAVE_LONGLONG
31 #define LONGLONG long long
32 #else
33 #define LONGLONG long
34 #endif
36 /* Those are flags in the conversion format. */
37 # define LONG 0x001 /* l: long or double */
38 # define LONGDBL 0x002 /* L: long long or long double */
39 # define SHORT 0x004 /* h: short */
40 # define SUPPRESS 0x008 /* *: suppress assignment */
41 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
42 # define NOSKIP 0x020 /* do not skip blanks */
43 # define WIDTH 0x040 /* width was given */
44 # define GROUP 0x080 /* ': group numbers */
45 # define MALLOC 0x100 /* a: malloc strings */
47 # define TYPEMOD (LONG|LONGDBL|SHORT)
50 #ifdef USE_IN_LIBIO
51 # include <libioP.h>
52 # include <libio.h>
54 # define va_list _IO_va_list
55 # define ungetc(c, s) _IO_ungetc (c, s)
56 # define inchar() ((c = _IO_getc_unlocked (s)), (void) ++read_in, c)
57 # define conv_error() do { \
58 if (errp != NULL) *errp |= 2; \
59 if (c != EOF) _IO_ungetc (c, s); \
60 _IO_funlockfile (s); \
61 return done; \
62 } while (0)
63 # define input_error() do { \
64 _IO_funlockfile (s); \
65 if (errp != NULL) *errp |= 1; \
66 return done ?: EOF; \
67 } while (0)
68 # define memory_error() do { \
69 _IO_funlockfile (s); \
70 errno = ENOMEM; \
71 return EOF; \
72 } while (0)
73 # define ARGCHECK(s, format) \
74 do \
75 { \
76 /* Check file argument for consistence. */ \
77 CHECK_FILE (s, EOF); \
78 if (s->_flags & _IO_NO_READS || format == NULL) \
79 { \
80 MAYBE_SET_EINVAL; \
81 return EOF; \
82 } \
83 } while (0)
84 # define flockfile(S) _IO_flockfile (S)
85 # define funlockfile(S) _IO_funlockfile (S)
86 #else
87 # define inchar() ((c = getc (s)), (void) ++read_in, c)
88 # define conv_error() do { \
89 funlockfile (s); \
90 ungetc (c, s); \
91 return done; \
92 } while (0)
93 # define input_error() do { \
94 funlockfile (s); \
95 return done ?: EOF; \
96 } while (0)
97 # define memory_error() do { \
98 funlockfile (s); \
99 errno = ENOMEM; \
100 return EOF; \
101 } while (0)
102 # define ARGCHECK(s, format) \
103 do \
105 /* Check file argument for consistence. */ \
106 if (!__validfp (s) || !s->__mode.__read || format == NULL) \
108 errno = EINVAL; \
109 return EOF; \
111 } while (0)
112 # define flockfile(S) /* nothing */
113 # define funlockfile(S) /* nothing */
114 #endif
117 /* Read formatted input from S according to the format string
118 FORMAT, using the argument list in ARG.
119 Return the number of assignments made, or -1 for an input error. */
120 #ifdef USE_IN_LIBIO
122 _IO_vfscanf (s, format, argptr, errp)
123 _IO_FILE *s;
124 const char *format;
125 _IO_va_list argptr;
126 int *errp;
127 #else
129 __vfscanf (FILE *s, const char *format, va_list argptr)
130 #endif
132 va_list arg = (va_list) argptr;
134 register const char *f = format;
135 register unsigned char fc; /* Current character of the format. */
136 register size_t done = 0; /* Assignments done. */
137 register size_t read_in = 0; /* Chars read in. */
138 register int c; /* Last char read. */
139 register int width; /* Maximum field width. */
140 register int flags; /* Modifiers for current format element. */
142 /* Status for reading F-P nums. */
143 char got_dot, got_e, negative;
144 /* If a [...] is a [^...]. */
145 char not_in;
146 /* Base for integral numbers. */
147 int base;
148 /* Signedness for integral numbers. */
149 int number_signed;
150 /* Decimal point character. */
151 wchar_t decimal;
152 /* The thousands character of the current locale. */
153 wchar_t thousands;
154 /* Integral holding variables. */
155 union
157 long long int q;
158 unsigned long long int uq;
159 long int l;
160 unsigned long int ul;
161 } num;
162 /* Character-buffer pointer. */
163 register char *str, **strptr;
164 size_t strsize;
165 /* We must not react on white spaces immediately because they can
166 possibly be matched even if in the input stream no character is
167 available anymore. */
168 int skip_space = 0;
169 /* Workspace. */
170 char *tw; /* Temporary pointer. */
171 char *wp = NULL; /* Workspace. */
172 size_t wpmax = 0; /* Maximal size of workspace. */
173 size_t wpsize; /* Currently used bytes in workspace. */
174 #define ADDW(Ch) \
175 do \
177 if (wpsize == wpmax) \
179 char *old = wp; \
180 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
181 wp = (char *) alloca (wpmax); \
182 if (old != NULL) \
183 memcpy (wp, old, wpsize); \
185 wp[wpsize++] = (Ch); \
187 while (0)
189 ARGCHECK (s, format);
191 /* Figure out the decimal point character. */
192 if (mbtowc (&decimal, _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT),
193 strlen (_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT))) <= 0)
194 decimal = (wchar_t) *_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT);
195 /* Figure out the thousands separator character. */
196 if (mbtowc (&thousands, _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP),
197 strlen (_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP))) <= 0)
198 thousands = (wchar_t) *_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
200 /* Lock the stream. */
201 flockfile (s);
203 c = inchar ();
205 /* Run through the format string. */
206 while (*f != '\0')
208 unsigned int argpos;
209 /* Extract the next argument, which is of type TYPE.
210 For a %N$... spec, this is the Nth argument from the beginning;
211 otherwise it is the next argument after the state now in ARG. */
212 #if 0
213 /* XXX Possible optimization. */
214 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
215 ({ va_list arg = (va_list) argptr; \
216 arg = (va_list) ((char *) arg \
217 + (argpos - 1) \
218 * __va_rounded_size (void *)); \
219 va_arg (arg, type); \
221 #else
222 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
223 ({ unsigned int pos = argpos; \
224 va_list arg = (va_list) argptr; \
225 while (--pos > 0) \
226 (void) va_arg (arg, void *); \
227 va_arg (arg, type); \
229 #endif
231 if (!isascii (*f))
233 /* Non-ASCII, may be a multibyte. */
234 int len = mblen (f, strlen (f));
235 if (len > 0)
237 while (len-- > 0)
238 if (c == EOF)
239 input_error ();
240 else if (c == *f++)
241 (void) inchar ();
242 else
243 conv_error ();
244 continue;
248 fc = *f++;
249 if (fc != '%')
251 /* Remember to skip spaces. */
252 if (isspace (fc))
254 skip_space = 1;
255 continue;
258 /* Characters other than format specs must just match. */
259 if (c == EOF)
260 input_error ();
262 /* We saw white space char as the last character in the format
263 string. Now it's time to skip all leading white space. */
264 if (skip_space)
266 while (isspace (c))
267 (void) inchar ();
268 skip_space = 0;
271 if (c == fc)
272 (void) inchar ();
273 else
274 conv_error ();
276 continue;
279 /* This is the start of the conversion string. */
280 flags = 0;
282 /* Initialize state of modifiers. */
283 argpos = 0;
285 /* Prepare temporary buffer. */
286 wpsize = 0;
288 /* Check for a positional parameter specification. */
289 if (isdigit (*f))
291 argpos = *f++ - '0';
292 while (isdigit (*f))
293 argpos = argpos * 10 + (*f++ - '0');
294 if (*f == '$')
295 ++f;
296 else
298 /* Oops; that was actually the field width. */
299 width = argpos;
300 flags |= WIDTH;
301 argpos = 0;
302 goto got_width;
306 /* Check for the assignment-suppressant and the number grouping flag. */
307 while (*f == '*' || *f == '\'')
308 switch (*f++)
310 case '*':
311 flags |= SUPPRESS;
312 break;
313 case '\'':
314 flags |= GROUP;
315 break;
318 /* We have seen width. */
319 if (isdigit (*f))
320 flags |= WIDTH;
322 /* Find the maximum field width. */
323 width = 0;
324 while (isdigit (*f))
326 width *= 10;
327 width += *f++ - '0';
329 got_width:
330 if (width == 0)
331 width = -1;
333 /* Check for type modifiers. */
334 while (*f == 'h' || *f == 'l' || *f == 'L' || *f == 'a' || *f == 'q')
335 switch (*f++)
337 case 'h':
338 /* int's are short int's. */
339 if (flags & TYPEMOD)
340 /* Signal illegal format element. */
341 conv_error ();
342 flags |= SHORT;
343 break;
344 case 'l':
345 if (flags & (SHORT|LONGDBL))
346 conv_error ();
347 else if (flags & LONG)
349 /* A double `l' is equivalent to an `L'. */
350 flags &= ~LONG;
351 flags |= LONGDBL;
353 else
354 /* int's are long int's. */
355 flags |= LONG;
356 break;
357 case 'q':
358 case 'L':
359 /* double's are long double's, and int's are long long int's. */
360 if (flags & TYPEMOD)
361 /* Signal illegal format element. */
362 conv_error ();
363 flags |= LONGDBL;
364 break;
365 case 'a':
366 if (flags & TYPEMOD)
367 /* Signal illegal format element. */
368 conv_error ();
369 /* String conversions (%s, %[) take a `char **'
370 arg and fill it in with a malloc'd pointer. */
371 flags |= MALLOC;
372 break;
375 /* End of the format string? */
376 if (*f == '\0')
377 conv_error ();
379 /* Find the conversion specifier. */
380 fc = *f++;
381 if (skip_space || (fc != '[' && fc != 'c' && fc != 'n'))
383 /* Eat whitespace. */
384 while (isspace (c))
385 (void) inchar ();
386 skip_space = 0;
389 switch (fc)
391 case '%': /* Must match a literal '%'. */
392 if (c != fc)
393 conv_error ();
394 inchar ();
395 break;
397 case 'n': /* Answer number of assignments done. */
398 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
399 with the 'n' conversion specifier. */
400 if (!(flags & SUPPRESS))
401 /* Don't count the read-ahead. */
402 if (flags & LONGDBL)
403 *ARG (long long int *) = read_in - 1;
404 else if (flags & LONG)
405 *ARG (long int *) = read_in - 1;
406 else if (flags & SHORT)
407 *ARG (short int *) = read_in - 1;
408 else
409 *ARG (int *) = read_in - 1;
410 break;
412 case 'c': /* Match characters. */
413 if (!(flags & SUPPRESS))
415 str = ARG (char *);
416 if (str == NULL)
417 conv_error ();
420 if (c == EOF)
421 input_error ();
423 if (width == -1)
424 width = 1;
426 if (!(flags & SUPPRESS))
429 *str++ = c;
430 while (inchar () != EOF && --width > 0);
432 else
433 while (inchar () != EOF && --width > 0);
435 if (!(flags & SUPPRESS))
436 ++done;
438 break;
440 case 's': /* Read a string. */
441 #define STRING_ARG \
442 if (!(flags & SUPPRESS)) \
444 if (flags & MALLOC) \
446 /* The string is to be stored in a malloc'd buffer. */ \
447 strptr = ARG (char **); \
448 if (strptr == NULL) \
449 conv_error (); \
450 /* Allocate an initial buffer. */ \
451 strsize = 100; \
452 *strptr = str = malloc (strsize); \
454 else \
455 str = ARG (char *); \
456 if (str == NULL) \
457 conv_error (); \
459 STRING_ARG;
461 if (c == EOF)
462 input_error ();
466 if (isspace (c))
467 break;
468 #define STRING_ADD_CHAR(c) \
469 if (!(flags & SUPPRESS)) \
471 *str++ = c; \
472 if ((flags & MALLOC) && str == *strptr + strsize) \
474 /* Enlarge the buffer. */ \
475 str = realloc (*strptr, strsize * 2); \
476 if (str == NULL) \
478 /* Can't allocate that much. Last-ditch effort. */\
479 str = realloc (*strptr, strsize + 1); \
480 if (str == NULL) \
482 /* We lose. Oh well. \
483 Terminate the string and stop converting, \
484 so at least we don't skip any input. */ \
485 (*strptr)[strsize] = '\0'; \
486 ++done; \
487 conv_error (); \
489 else \
491 *strptr = str; \
492 str += strsize; \
493 ++strsize; \
496 else \
498 *strptr = str; \
499 str += strsize; \
500 strsize *= 2; \
504 STRING_ADD_CHAR (c);
505 } while (inchar () != EOF && (width <= 0 || --width > 0));
507 if (!(flags & SUPPRESS))
509 *str = '\0';
510 ++done;
512 break;
514 case 'x': /* Hexadecimal integer. */
515 case 'X': /* Ditto. */
516 base = 16;
517 number_signed = 0;
518 goto number;
520 case 'o': /* Octal integer. */
521 base = 8;
522 number_signed = 0;
523 goto number;
525 case 'u': /* Unsigned decimal integer. */
526 base = 10;
527 number_signed = 0;
528 goto number;
530 case 'd': /* Signed decimal integer. */
531 base = 10;
532 number_signed = 1;
533 goto number;
535 case 'i': /* Generic number. */
536 base = 0;
537 number_signed = 1;
539 number:
540 if (c == EOF)
541 input_error ();
543 /* Check for a sign. */
544 if (c == '-' || c == '+')
546 ADDW (c);
547 if (width > 0)
548 --width;
549 (void) inchar ();
552 /* Look for a leading indication of base. */
553 if (width != 0 && c == '0')
555 if (width > 0)
556 --width;
558 ADDW (c);
559 (void) inchar ();
561 if (width != 0 && tolower (c) == 'x')
563 if (base == 0)
564 base = 16;
565 if (base == 16)
567 if (width > 0)
568 --width;
569 (void) inchar ();
572 else if (base == 0)
573 base = 8;
576 if (base == 0)
577 base = 10;
579 /* Read the number into workspace. */
580 while (c != EOF && width != 0)
582 if (base == 16 ? !isxdigit (c) :
583 ((!isdigit (c) || c - '0' >= base) &&
584 !((flags & GROUP) && base == 10 && c == thousands)))
585 break;
586 ADDW (c);
587 if (width > 0)
588 --width;
590 (void) inchar ();
593 if (wpsize == 0 ||
594 (wpsize == 1 && (wp[0] == '+' || wp[0] == '-')))
595 /* There was no number. */
596 conv_error ();
598 /* Convert the number. */
599 ADDW ('\0');
600 if (flags & LONGDBL)
602 if (number_signed)
603 num.q = __strtoq_internal (wp, &tw, base, flags & GROUP);
604 else
605 num.uq = __strtouq_internal (wp, &tw, base, flags & GROUP);
607 else
609 if (number_signed)
610 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
611 else
612 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
614 if (wp == tw)
615 conv_error ();
617 if (!(flags & SUPPRESS))
619 if (! number_signed)
621 if (flags & LONGDBL)
622 *ARG (unsigned LONGLONG int *) = num.uq;
623 else if (flags & LONG)
624 *ARG (unsigned long int *) = num.ul;
625 else if (flags & SHORT)
626 *ARG (unsigned short int *)
627 = (unsigned short int) num.ul;
628 else
629 *ARG (unsigned int *) = (unsigned int) num.ul;
631 else
633 if (flags & LONGDBL)
634 *ARG (LONGLONG int *) = num.q;
635 else if (flags & LONG)
636 *ARG (long int *) = num.l;
637 else if (flags & SHORT)
638 *ARG (short int *) = (short int) num.l;
639 else
640 *ARG (int *) = (int) num.l;
642 ++done;
644 break;
646 case 'e': /* Floating-point numbers. */
647 case 'E':
648 case 'f':
649 case 'g':
650 case 'G':
651 if (c == EOF)
652 input_error ();
654 /* Check for a sign. */
655 if (c == '-' || c == '+')
657 negative = c == '-';
658 if (inchar () == EOF)
659 /* EOF is only an input error before we read any chars. */
660 conv_error ();
661 if (width > 0)
662 --width;
664 else
665 negative = 0;
667 got_dot = got_e = 0;
670 if (isdigit (c))
671 ADDW (c);
672 else if (got_e && wp[wpsize - 1] == 'e'
673 && (c == '-' || c == '+'))
674 ADDW (c);
675 else if (wpsize > 0 && !got_e && tolower (c) == 'e')
677 ADDW ('e');
678 got_e = got_dot = 1;
680 else if (c == decimal && !got_dot)
682 ADDW (c);
683 got_dot = 1;
685 else if ((flags & GROUP) && c == thousands && !got_dot)
686 ADDW (c);
687 else
688 break;
689 if (width > 0)
690 --width;
692 while (inchar () != EOF && width != 0);
694 if (wpsize == 0)
695 conv_error ();
697 /* Convert the number. */
698 ADDW ('\0');
699 if (flags & LONGDBL)
701 long double d = __strtold_internal (wp, &tw, flags & GROUP);
702 if (!(flags & SUPPRESS) && tw != wp)
703 *ARG (long double *) = negative ? -d : d;
705 else if (flags & LONG)
707 double d = __strtod_internal (wp, &tw, flags & GROUP);
708 if (!(flags & SUPPRESS) && tw != wp)
709 *ARG (double *) = negative ? -d : d;
711 else
713 float d = __strtof_internal (wp, &tw, flags & GROUP);
714 if (!(flags & SUPPRESS) && tw != wp)
715 *ARG (float *) = negative ? -d : d;
718 if (tw == wp)
719 conv_error ();
721 if (!(flags & SUPPRESS))
722 ++done;
723 break;
725 case '[': /* Character class. */
726 STRING_ARG;
728 if (c == EOF)
729 input_error();
731 if (*f == '^')
733 ++f;
734 not_in = 1;
736 else
737 not_in = 0;
739 /* Fill WP with byte flags indexed by character.
740 We will use this flag map for matching input characters. */
741 if (wpmax < UCHAR_MAX)
743 wpmax = UCHAR_MAX;
744 wp = (char *) alloca (wpmax);
746 memset (wp, 0, UCHAR_MAX);
748 fc = *f;
749 if (fc == ']' || fc == '-')
751 /* If ] or - appears before any char in the set, it is not
752 the terminator or separator, but the first char in the
753 set. */
754 wp[fc] = 1;
755 ++f;
758 while ((fc = *f++) != '\0' && fc != ']')
760 if (fc == '-' && *f != '\0' && *f != ']' &&
761 (unsigned char) f[-2] <= (unsigned char) *f)
763 /* Add all characters from the one before the '-'
764 up to (but not including) the next format char. */
765 for (fc = f[-2]; fc < *f; ++fc)
766 wp[fc] = 1;
768 else
769 /* Add the character to the flag map. */
770 wp[fc] = 1;
772 if (fc == '\0')
773 conv_error();
775 num.ul = read_in;
778 if (wp[c] == not_in)
779 break;
780 STRING_ADD_CHAR (c);
781 if (width > 0)
782 --width;
784 while (inchar () != EOF && width != 0);
785 if (read_in == num.ul)
786 conv_error ();
788 if (!(flags & SUPPRESS))
790 *str = '\0';
791 ++done;
793 break;
795 case 'p': /* Generic pointer. */
796 base = 16;
797 /* A PTR must be the same size as a `long int'. */
798 flags &= ~(SHORT|LONGDBL);
799 flags |= LONG;
800 number_signed = 0;
801 goto number;
805 /* The last thing we saw int the format string was a white space.
806 Consume the last white spaces. */
807 if (skip_space)
808 while (isspace (c))
809 (void) inchar ();
811 /* Unlock stream. */
812 funlockfile (s);
814 return ((void) (c == EOF || ungetc (c, s)), done);
817 #ifdef USE_IN_LIBIO
819 __vfscanf (FILE *s, const char *format, va_list argptr)
821 return _IO_vfscanf (s, format, argptr, NULL);
823 #endif
825 weak_alias (__vfscanf, vfscanf)