Wed Jan 24 04:18:36 1996 Paul Eggert <eggert@twinsun.com>
[glibc.git] / stdio-common / vfscanf.c
blob8a799acf78ae9ce6162e67b4796c7e1a4be8ffec
1 /* Copyright (C) 1991, 92, 93, 94, 95, 96 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Library General Public License as
6 published by the Free Software Foundation; either version 2 of the
7 License, or (at your option) any later version.
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU C Library; see the file COPYING.LIB. If
16 not, write to the Free Software Foundation, Inc., 675 Mass Ave,
17 Cambridge, MA 02139, USA. */
19 #include <ansidecl.h>
20 #include "../locale/localeinfo.h"
21 #include <errno.h>
22 #include <limits.h>
23 #include <ctype.h>
24 #include <stdarg.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
30 #ifdef __GNUC__
31 #define HAVE_LONGLONG
32 #define LONGLONG long long
33 #else
34 #define LONGLONG long
35 #endif
37 /* Those are flags in the conversion format. */
38 # define LONG 0x001 /* l: long or double */
39 # define LONGDBL 0x002 /* L: long long or long double */
40 # define SHORT 0x004 /* h: short */
41 # define SUPPRESS 0x008 /* *: suppress assignment */
42 # define POINTER 0x010 /* weird %p pointer (`fake hex') */
43 # define NOSKIP 0x020 /* do not skip blanks */
44 # define WIDTH 0x040 /* width was given */
45 # define GROUP 0x080 /* ': group numbers */
46 # define MALLOC 0x100 /* a: malloc strings */
49 #ifdef USE_IN_LIBIO
50 # include <libioP.h>
51 # include <libio.h>
53 # define va_list _IO_va_list
54 # define ungetc(c, s) _IO_ungetc (c, s)
55 # define inchar() ((c = _IO_getc (s)), (void) ++read_in, c)
56 # define conv_error() return ((void) (errp != NULL && (*errp |= 2)), \
57 (void) (c == EOF || _IO_ungetc (c, s)), done)
59 # define input_error() return ((void) (errp != NULL && (*errp |= 1)), \
60 done == 0 ? EOF : done)
61 # define memory_error() return ((void) (errno = ENOMEM), EOF)
62 # define ARGCHECK(s, format) \
63 do \
64 { \
65 /* Check file argument for consistence. */ \
66 CHECK_FILE (s, -1); \
67 if (s->_flags & _IO_NO_READS || format == NULL) \
68 { \
69 MAYBE_SET_EINVAL; \
70 return -1; \
71 } \
72 } while (0)
73 #else
74 # define inchar() ((c = getc (s)), (void) ++read_in, c)
75 # define conv_error() return ((void) ungetc (c, s), done)
76 # define input_error() return (done == 0 ? EOF : done)
77 # define memory_error() return ((void) (errno = ENOMEM), EOF)
78 # define ARGCHECK(s, format) \
79 do \
80 { \
81 /* Check file argument for consistence. */ \
82 if (!__validfp (s) || !s->__mode.__read || format == NULL) \
83 { \
84 errno = EINVAL; \
85 return -1; \
86 } \
87 } while (0)
88 #endif
91 /* Read formatted input from S according to the format string
92 FORMAT, using the argument list in ARG.
93 Return the number of assignments made, or -1 for an input error. */
94 #ifdef USE_IN_LIBIO
95 int
96 _IO_vfscanf (s, format, argptr, errp)
97 _IO_FILE *s;
98 const char *format;
99 _IO_va_list argptr;
100 int *errp;
101 #else
103 __vfscanf (FILE *s, const char *format, va_list argptr)
104 #endif
106 va_list arg = (va_list) argptr;
108 register const char *f = format;
109 register unsigned char fc; /* Current character of the format. */
110 register size_t done = 0; /* Assignments done. */
111 register size_t read_in = 0; /* Chars read in. */
112 register int c; /* Last char read. */
113 register int width; /* Maximum field width. */
114 register int flags; /* Modifiers for current format element. */
116 /* Status for reading F-P nums. */
117 char got_dot, got_e;
118 /* If a [...] is a [^...]. */
119 char not_in;
120 /* Base for integral numbers. */
121 int base;
122 /* Signedness for integral numbers. */
123 int number_signed;
124 /* Decimal point character. */
125 wchar_t decimal;
126 /* The thousands character of the current locale. */
127 wchar_t thousands;
128 /* Integral holding variables. */
129 union
131 long long int q;
132 unsigned long long int uq;
133 long int l;
134 unsigned long int ul;
135 } num;
136 /* Character-buffer pointer. */
137 register char *str, **strptr;
138 size_t strsize;
139 /* We must not react on white spaces immediately because they can
140 possibly be matched even if in the input stream no character is
141 available anymore. */
142 int skip_space = 0;
143 /* Workspace. */
144 char *tw; /* Temporary pointer. */
145 char *wp = NULL; /* Workspace. */
146 size_t wpmax = 0; /* Maximal size of workspace. */
147 size_t wpsize; /* Currently used bytes in workspace. */
148 #define ADDW(Ch) \
149 do \
151 if (wpsize == wpmax) \
153 char *old = wp; \
154 wpmax = UCHAR_MAX > 2 * wpmax ? UCHAR_MAX : 2 * wpmax; \
155 wp = (char *) alloca (wpmax); \
156 if (old != NULL) \
157 memcpy (wp, old, wpsize); \
159 wp[wpsize++] = (Ch); \
161 while (0)
163 ARGCHECK (s, format);
165 /* Figure out the decimal point character. */
166 if (mbtowc (&decimal, _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT),
167 strlen (_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT))) <= 0)
168 decimal = (wchar_t) *_NL_CURRENT (LC_NUMERIC, DECIMAL_POINT);
169 /* Figure out the thousands separator character. */
170 if (mbtowc (&thousands, _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP),
171 strlen (_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP))) <= 0)
172 thousands = (wchar_t) *_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
174 c = inchar ();
176 /* Run through the format string. */
177 while (*f != '\0')
179 unsigned int argpos;
180 /* Extract the next argument, which is of type TYPE.
181 For a %N$... spec, this is the Nth argument from the beginning;
182 otherwise it is the next argument after the state now in ARG. */
183 #if 0
184 /* XXX Possible optimization. */
185 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
186 ({ va_list arg = (va_list) argptr; \
187 arg = (va_list) ((char *) arg \
188 + (argpos - 1) \
189 * __va_rounded_size (void *)); \
190 va_arg (arg, type); \
192 #else
193 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
194 ({ unsigned int pos = argpos; \
195 va_list arg = (va_list) argptr; \
196 while (--pos > 0) \
197 (void) va_arg (arg, void *); \
198 va_arg (arg, type); \
200 #endif
202 if (!isascii (*f))
204 /* Non-ASCII, may be a multibyte. */
205 int len = mblen (f, strlen (f));
206 if (len > 0)
208 while (len-- > 0)
209 if (c == EOF)
210 input_error ();
211 else if (c == *f++)
212 (void) inchar ();
213 else
214 conv_error ();
215 continue;
219 fc = *f++;
220 if (fc != '%')
222 /* Remember to skip spaces. */
223 if (isspace (fc))
225 skip_space = 1;
226 continue;
229 /* Characters other than format specs must just match. */
230 if (c == EOF)
231 input_error ();
233 /* We saw white space char as the last character in the format
234 string. Now it's time to skip all leading white space. */
235 if (skip_space)
237 while (isspace (c))
238 (void) inchar ();
239 skip_space = 0;
242 if (c == fc)
243 (void) inchar ();
244 else
245 conv_error ();
247 continue;
250 /* This is the start of the conversion string. */
251 flags = 0;
253 /* Initialize state of modifiers. */
254 argpos = 0;
256 /* Prepare temporary buffer. */
257 wpsize = 0;
259 /* Check for a positional parameter specification. */
260 if (isdigit (*f))
262 argpos = *f++ - '0';
263 while (isdigit (*f))
264 argpos = argpos * 10 + (*f++ - '0');
265 if (*f == '$')
266 ++f;
267 else
269 /* Oops; that was actually the field width. */
270 width = argpos;
271 flags |= WIDTH;
272 argpos = 0;
273 goto got_width;
277 /* Check for the assignment-suppressant and the number grouping flag. */
278 while (*f == '*' || *f == '\'')
279 switch (*f++)
281 case '*':
282 flags |= SUPPRESS;
283 break;
284 case '\'':
285 flags |= GROUP;
286 break;
289 /* We have seen width. */
290 if (isdigit (*f))
291 flags |= WIDTH;
293 /* Find the maximum field width. */
294 width = 0;
295 while (isdigit (*f))
297 width *= 10;
298 width += *f++ - '0';
300 got_width:
301 if (width == 0)
302 width = -1;
304 /* Check for type modifiers. */
305 while (*f == 'h' || *f == 'l' || *f == 'L' || *f == 'a' || *f == 'q')
306 switch (*f++)
308 case 'h':
309 /* int's are short int's. */
310 if (flags & (LONG|LONGDBL))
311 /* Signal illegal format element. */
312 conv_error ();
313 flags |= SHORT;
314 break;
315 case 'l':
316 if (flags & SHORT)
317 conv_error ();
318 else if (flags & LONG)
320 /* A double `l' is equivalent to an `L'. */
321 flags &= ~LONG;
322 flags |= LONGDBL;
324 else
325 /* int's are long int's. */
326 flags |= LONG;
327 break;
328 case 'q':
329 case 'L':
330 /* double's are long double's, and int's are long long int's. */
331 if (flags & (LONG|SHORT))
332 /* Signal illegal format element. */
333 conv_error ();
334 flags |= LONGDBL;
335 break;
336 case 'a':
337 /* String conversions (%s, %[) take a `char **'
338 arg and fill it in with a malloc'd pointer. */
339 flags |= MALLOC;
340 break;
343 /* End of the format string? */
344 if (*f == '\0')
345 conv_error ();
347 /* Find the conversion specifier. */
348 fc = *f++;
349 if (skip_space || (fc != '[' && fc != 'c' && fc != 'n'))
351 /* Eat whitespace. */
352 while (isspace (c))
353 (void) inchar ();
354 skip_space = 0;
357 switch (fc)
359 case '%': /* Must match a literal '%'. */
360 if (c != fc)
361 conv_error ();
362 inchar ();
363 break;
365 case 'n': /* Answer number of assignments done. */
366 if (!(flags & SUPPRESS))
367 *ARG (int *) = read_in - 1; /* Don't count the read-ahead. */
368 break;
370 case 'c': /* Match characters. */
371 if (!(flags & SUPPRESS))
373 str = ARG (char *);
374 if (str == NULL)
375 conv_error ();
378 if (c == EOF)
379 input_error ();
381 if (width == -1)
382 width = 1;
384 if (!(flags & SUPPRESS))
387 *str++ = c;
388 while (inchar () != EOF && --width > 0);
390 else
391 while (inchar () != EOF && --width > 0);
393 if (!(flags & SUPPRESS))
394 ++done;
396 break;
398 case 's': /* Read a string. */
399 #define STRING_ARG \
400 if (!(flags & SUPPRESS)) \
402 if (flags & MALLOC) \
404 /* The string is to be stored in a malloc'd buffer. */ \
405 strptr = ARG (char **); \
406 if (strptr == NULL) \
407 conv_error (); \
408 /* Allocate an initial buffer. */ \
409 strsize = 100; \
410 *strptr = str = malloc (strsize); \
412 else \
413 str = ARG (char *); \
414 if (str == NULL) \
415 conv_error (); \
417 STRING_ARG;
419 if (c == EOF)
420 input_error ();
424 if (isspace (c))
425 break;
426 #define STRING_ADD_CHAR(c) \
427 if (!(flags & SUPPRESS)) \
429 *str++ = c; \
430 if ((flags & MALLOC) && str == *strptr + strsize) \
432 /* Enlarge the buffer. */ \
433 str = realloc (*strptr, strsize * 2); \
434 if (str == NULL) \
436 /* Can't allocate that much. Last-ditch effort. */\
437 str = realloc (*strptr, strsize + 1); \
438 if (str == NULL) \
440 /* We lose. Oh well. \
441 Terminate the string and stop converting, \
442 so at least we don't skip any input. */ \
443 (*strptr)[strsize] = '\0'; \
444 ++done; \
445 conv_error (); \
447 else \
449 *strptr = str; \
450 str += strsize; \
451 ++strsize; \
454 else \
456 *strptr = str; \
457 str += strsize; \
458 strsize *= 2; \
462 STRING_ADD_CHAR (c);
463 } while (inchar () != EOF && (width <= 0 || --width > 0));
465 if (!(flags & SUPPRESS))
467 *str = '\0';
468 ++done;
470 break;
472 case 'x': /* Hexadecimal integer. */
473 case 'X': /* Ditto. */
474 base = 16;
475 number_signed = 0;
476 goto number;
478 case 'o': /* Octal integer. */
479 base = 8;
480 number_signed = 0;
481 goto number;
483 case 'u': /* Unsigned decimal integer. */
484 base = 10;
485 number_signed = 0;
486 goto number;
488 case 'd': /* Signed decimal integer. */
489 base = 10;
490 number_signed = 1;
491 goto number;
493 case 'i': /* Generic number. */
494 base = 0;
495 number_signed = 1;
497 number:
498 if (c == EOF)
499 input_error ();
501 /* Check for a sign. */
502 if (c == '-' || c == '+')
504 ADDW (c);
505 if (width > 0)
506 --width;
507 (void) inchar ();
510 /* Look for a leading indication of base. */
511 if (width != 0 && c == '0')
513 if (width > 0)
514 --width;
515 ADDW ('0');
517 (void) inchar ();
519 if (width != 0 && tolower (c) == 'x')
521 if (base == 0)
522 base = 16;
523 if (base == 16)
525 if (width > 0)
526 --width;
527 (void) inchar ();
530 else if (base == 0)
531 base = 8;
534 if (base == 0)
535 base = 10;
537 /* Read the number into workspace. */
538 while (c != EOF && width != 0)
540 if (base == 16 ? !isxdigit (c) :
541 ((!isdigit (c) || c - '0' >= base) &&
542 !((flags & GROUP) && base == 10 && c == thousands)))
543 break;
544 ADDW (c);
545 if (width > 0)
546 --width;
548 (void) inchar ();
551 if (wpsize == 0 ||
552 (wpsize == 1 && (wp[0] == '+' || wp[0] == '-')))
553 /* There was no number. */
554 conv_error ();
556 /* Convert the number. */
557 ADDW ('\0');
558 if (flags & LONGDBL)
560 if (number_signed)
561 num.q = __strtoq_internal (wp, &tw, base, flags & GROUP);
562 else
563 num.uq = __strtouq_internal (wp, &tw, base, flags & GROUP);
565 else
567 if (number_signed)
568 num.l = __strtol_internal (wp, &tw, base, flags & GROUP);
569 else
570 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP);
572 if (wp == tw)
573 conv_error ();
575 if (!(flags & SUPPRESS))
577 if (! number_signed)
579 if (flags & LONGDBL)
580 *ARG (unsigned LONGLONG int *) = num.uq;
581 else if (flags & LONG)
582 *ARG (unsigned long int *) = num.ul;
583 else if (flags & SHORT)
584 *ARG (unsigned short int *)
585 = (unsigned short int) num.ul;
586 else
587 *ARG (unsigned int *) = (unsigned int) num.ul;
589 else
591 if (flags & LONGDBL)
592 *ARG (LONGLONG int *) = num.q;
593 else if (flags & LONG)
594 *ARG (long int *) = num.l;
595 else if (flags & SHORT)
596 *ARG (short int *) = (short int) num.l;
597 else
598 *ARG (int *) = (int) num.l;
600 ++done;
602 break;
604 case 'e': /* Floating-point numbers. */
605 case 'E':
606 case 'f':
607 case 'g':
608 case 'G':
609 if (c == EOF)
610 input_error ();
612 /* Check for a sign. */
613 if (c == '-' || c == '+')
615 ADDW (c);
616 if (inchar () == EOF)
617 /* EOF is only an input error before we read any chars. */
618 conv_error ();
619 if (width > 0)
620 --width;
623 got_dot = got_e = 0;
626 if (isdigit (c))
627 ADDW (c);
628 else if (got_e && wp[wpsize - 1] == 'e'
629 && (c == '-' || c == '+'))
630 ADDW (c);
631 else if (!got_e && tolower (c) == 'e')
633 ADDW ('e');
634 got_e = got_dot = 1;
636 else if (c == decimal && !got_dot)
638 ADDW (c);
639 got_dot = 1;
641 else if ((flags & GROUP) && c == thousands && !got_dot)
642 ADDW (c);
643 else
644 break;
645 if (width > 0)
646 --width;
647 } while (inchar () != EOF && width != 0);
649 if (wpsize == 0)
650 conv_error();
651 if (wp[wpsize - 1] == '-' || wp[wpsize - 1] == '+'
652 || wp[wpsize - 1] == 'e')
653 conv_error ();
655 /* Convert the number. */
656 ADDW ('\0');
657 if (flags & LONGDBL)
659 long double d = __strtold_internal (wp, &tw, flags & GROUP);
660 if (!(flags & SUPPRESS) && tw != wp)
661 *ARG (long double *) = d;
663 else if (flags & LONG)
665 double d = __strtod_internal (wp, &tw, flags & GROUP);
666 if (!(flags & SUPPRESS) && tw != wp)
667 *ARG (double *) = d;
669 else
671 float d = __strtof_internal (wp, &tw, flags & GROUP);
672 if (!(flags & SUPPRESS) && tw != wp)
673 *ARG (float *) = d;
676 if (tw == wp)
677 conv_error ();
679 if (!(flags & SUPPRESS))
680 ++done;
681 break;
683 case '[': /* Character class. */
684 STRING_ARG;
686 if (c == EOF)
687 input_error();
689 if (*f == '^')
691 ++f;
692 not_in = 1;
694 else
695 not_in = 0;
697 /* Fill WP with byte flags indexed by character.
698 We will use this flag map for matching input characters. */
699 if (wpmax < UCHAR_MAX)
701 wpmax = UCHAR_MAX;
702 wp = (char *) alloca (wpmax);
704 memset (wp, 0, UCHAR_MAX);
706 fc = *f;
707 if (fc == ']' || fc == '-')
709 /* If ] or - appears before any char in the set, it is not
710 the terminator or separator, but the first char in the
711 set. */
712 wp[fc] = 1;
713 ++f;
716 while ((fc = *f++) != '\0' && fc != ']')
718 if (fc == '-' && *f != '\0' && *f != ']' &&
719 (unsigned char) f[-2] <= (unsigned char) *f)
721 /* Add all characters from the one before the '-'
722 up to (but not including) the next format char. */
723 for (fc = f[-2]; fc < *f; ++fc)
724 wp[fc] = 1;
726 else
727 /* Add the character to the flag map. */
728 wp[fc] = 1;
730 if (fc == '\0')
731 conv_error();
733 num.ul = read_in;
736 if (wp[c] == not_in)
737 break;
738 STRING_ADD_CHAR (c);
739 if (width > 0)
740 --width;
741 } while (inchar () != EOF && width != 0);
742 if (read_in == num.ul)
743 conv_error ();
745 if (!(flags & SUPPRESS))
747 *str = '\0';
748 ++done;
750 break;
752 case 'p': /* Generic pointer. */
753 base = 16;
754 /* A PTR must be the same size as a `long int'. */
755 flags &= ~(SHORT|LONGDBL);
756 flags |= LONG;
757 number_signed = 0;
758 goto number;
762 /* The last thing we saw int the format string was a white space.
763 Consume the last white spaces. */
764 if (skip_space)
765 while (isspace (c))
766 (void) inchar ();
768 return ((void) (c == EOF || ungetc (c, s)), done);
771 #ifdef USE_IN_LIBIO
773 __vfscanf (FILE *s, const char *format, va_list argptr)
775 return _IO_vfscanf (s, format, argptr, NULL);
777 #endif
779 weak_alias (__vfscanf, vfscanf)