unistr/u{8,16,32}-uctomb: Avoid possible trouble with huge strings.
[gnulib.git] / lib / printf-parse.c
blobbfe85b1ad79d40aed2880fe290c50a66296021a0
1 /* Formatted output to strings.
2 Copyright (C) 1999-2000, 2002-2003, 2006-2020 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License along
15 with this program; if not, see <https://www.gnu.org/licenses/>. */
17 /* This file can be parametrized with the following macros:
18 CHAR_T The element type of the format string.
19 CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
20 in the format string are ASCII.
21 DIRECTIVE Structure denoting a format directive.
22 Depends on CHAR_T.
23 DIRECTIVES Structure denoting the set of format directives of a
24 format string. Depends on CHAR_T.
25 PRINTF_PARSE Function that parses a format string.
26 Depends on CHAR_T.
27 STATIC Set to 'static' to declare the function static.
28 ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
30 #ifndef PRINTF_PARSE
31 # include <config.h>
32 #endif
34 /* Specification. */
35 #ifndef PRINTF_PARSE
36 # include "printf-parse.h"
37 #endif
39 /* Default parameters. */
40 #ifndef PRINTF_PARSE
41 # define PRINTF_PARSE printf_parse
42 # define CHAR_T char
43 # define DIRECTIVE char_directive
44 # define DIRECTIVES char_directives
45 #endif
47 /* Get size_t, NULL. */
48 #include <stddef.h>
50 /* Get intmax_t. */
51 #if defined IN_LIBINTL || defined IN_LIBASPRINTF
52 # if HAVE_STDINT_H_WITH_UINTMAX
53 # include <stdint.h>
54 # endif
55 # if HAVE_INTTYPES_H_WITH_UINTMAX
56 # include <inttypes.h>
57 # endif
58 #else
59 # include <stdint.h>
60 #endif
62 /* malloc(), realloc(), free(). */
63 #include <stdlib.h>
65 /* memcpy(). */
66 #include <string.h>
68 /* errno. */
69 #include <errno.h>
71 /* Checked size_t computations. */
72 #include "xsize.h"
74 #if CHAR_T_ONLY_ASCII
75 /* c_isascii(). */
76 # include "c-ctype.h"
77 #endif
79 #ifdef STATIC
80 STATIC
81 #endif
82 int
83 PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
85 const CHAR_T *cp = format; /* pointer into format */
86 size_t arg_posn = 0; /* number of regular arguments consumed */
87 size_t d_allocated; /* allocated elements of d->dir */
88 size_t a_allocated; /* allocated elements of a->arg */
89 size_t max_width_length = 0;
90 size_t max_precision_length = 0;
92 d->count = 0;
93 d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
94 d->dir = d->direct_alloc_dir;
96 a->count = 0;
97 a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
98 a->arg = a->direct_alloc_arg;
100 #define REGISTER_ARG(_index_,_type_) \
102 size_t n = (_index_); \
103 if (n >= a_allocated) \
105 size_t memory_size; \
106 argument *memory; \
108 a_allocated = xtimes (a_allocated, 2); \
109 if (a_allocated <= n) \
110 a_allocated = xsum (n, 1); \
111 memory_size = xtimes (a_allocated, sizeof (argument)); \
112 if (size_overflow_p (memory_size)) \
113 /* Overflow, would lead to out of memory. */ \
114 goto out_of_memory; \
115 memory = (argument *) (a->arg != a->direct_alloc_arg \
116 ? realloc (a->arg, memory_size) \
117 : malloc (memory_size)); \
118 if (memory == NULL) \
119 /* Out of memory. */ \
120 goto out_of_memory; \
121 if (a->arg == a->direct_alloc_arg) \
122 memcpy (memory, a->arg, a->count * sizeof (argument)); \
123 a->arg = memory; \
125 while (a->count <= n) \
126 a->arg[a->count++].type = TYPE_NONE; \
127 if (a->arg[n].type == TYPE_NONE) \
128 a->arg[n].type = (_type_); \
129 else if (a->arg[n].type != (_type_)) \
130 /* Ambiguous type for positional argument. */ \
131 goto error; \
134 while (*cp != '\0')
136 CHAR_T c = *cp++;
137 if (c == '%')
139 size_t arg_index = ARG_NONE;
140 DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
142 /* Initialize the next directive. */
143 dp->dir_start = cp - 1;
144 dp->flags = 0;
145 dp->width_start = NULL;
146 dp->width_end = NULL;
147 dp->width_arg_index = ARG_NONE;
148 dp->precision_start = NULL;
149 dp->precision_end = NULL;
150 dp->precision_arg_index = ARG_NONE;
151 dp->arg_index = ARG_NONE;
153 /* Test for positional argument. */
154 if (*cp >= '0' && *cp <= '9')
156 const CHAR_T *np;
158 for (np = cp; *np >= '0' && *np <= '9'; np++)
160 if (*np == '$')
162 size_t n = 0;
164 for (np = cp; *np >= '0' && *np <= '9'; np++)
165 n = xsum (xtimes (n, 10), *np - '0');
166 if (n == 0)
167 /* Positional argument 0. */
168 goto error;
169 if (size_overflow_p (n))
170 /* n too large, would lead to out of memory later. */
171 goto error;
172 arg_index = n - 1;
173 cp = np + 1;
177 /* Read the flags. */
178 for (;;)
180 if (*cp == '\'')
182 dp->flags |= FLAG_GROUP;
183 cp++;
185 else if (*cp == '-')
187 dp->flags |= FLAG_LEFT;
188 cp++;
190 else if (*cp == '+')
192 dp->flags |= FLAG_SHOWSIGN;
193 cp++;
195 else if (*cp == ' ')
197 dp->flags |= FLAG_SPACE;
198 cp++;
200 else if (*cp == '#')
202 dp->flags |= FLAG_ALT;
203 cp++;
205 else if (*cp == '0')
207 dp->flags |= FLAG_ZERO;
208 cp++;
210 #if __GLIBC__ >= 2 && !defined __UCLIBC__
211 else if (*cp == 'I')
213 dp->flags |= FLAG_LOCALIZED;
214 cp++;
216 #endif
217 else
218 break;
221 /* Parse the field width. */
222 if (*cp == '*')
224 dp->width_start = cp;
225 cp++;
226 dp->width_end = cp;
227 if (max_width_length < 1)
228 max_width_length = 1;
230 /* Test for positional argument. */
231 if (*cp >= '0' && *cp <= '9')
233 const CHAR_T *np;
235 for (np = cp; *np >= '0' && *np <= '9'; np++)
237 if (*np == '$')
239 size_t n = 0;
241 for (np = cp; *np >= '0' && *np <= '9'; np++)
242 n = xsum (xtimes (n, 10), *np - '0');
243 if (n == 0)
244 /* Positional argument 0. */
245 goto error;
246 if (size_overflow_p (n))
247 /* n too large, would lead to out of memory later. */
248 goto error;
249 dp->width_arg_index = n - 1;
250 cp = np + 1;
253 if (dp->width_arg_index == ARG_NONE)
255 dp->width_arg_index = arg_posn++;
256 if (dp->width_arg_index == ARG_NONE)
257 /* arg_posn wrapped around. */
258 goto error;
260 REGISTER_ARG (dp->width_arg_index, TYPE_INT);
262 else if (*cp >= '0' && *cp <= '9')
264 size_t width_length;
266 dp->width_start = cp;
267 for (; *cp >= '0' && *cp <= '9'; cp++)
269 dp->width_end = cp;
270 width_length = dp->width_end - dp->width_start;
271 if (max_width_length < width_length)
272 max_width_length = width_length;
275 /* Parse the precision. */
276 if (*cp == '.')
278 cp++;
279 if (*cp == '*')
281 dp->precision_start = cp - 1;
282 cp++;
283 dp->precision_end = cp;
284 if (max_precision_length < 2)
285 max_precision_length = 2;
287 /* Test for positional argument. */
288 if (*cp >= '0' && *cp <= '9')
290 const CHAR_T *np;
292 for (np = cp; *np >= '0' && *np <= '9'; np++)
294 if (*np == '$')
296 size_t n = 0;
298 for (np = cp; *np >= '0' && *np <= '9'; np++)
299 n = xsum (xtimes (n, 10), *np - '0');
300 if (n == 0)
301 /* Positional argument 0. */
302 goto error;
303 if (size_overflow_p (n))
304 /* n too large, would lead to out of memory
305 later. */
306 goto error;
307 dp->precision_arg_index = n - 1;
308 cp = np + 1;
311 if (dp->precision_arg_index == ARG_NONE)
313 dp->precision_arg_index = arg_posn++;
314 if (dp->precision_arg_index == ARG_NONE)
315 /* arg_posn wrapped around. */
316 goto error;
318 REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
320 else
322 size_t precision_length;
324 dp->precision_start = cp - 1;
325 for (; *cp >= '0' && *cp <= '9'; cp++)
327 dp->precision_end = cp;
328 precision_length = dp->precision_end - dp->precision_start;
329 if (max_precision_length < precision_length)
330 max_precision_length = precision_length;
335 arg_type type;
337 /* Parse argument type/size specifiers. */
339 int flags = 0;
341 for (;;)
343 if (*cp == 'h')
345 flags |= (1 << (flags & 1));
346 cp++;
348 else if (*cp == 'L')
350 flags |= 4;
351 cp++;
353 else if (*cp == 'l')
355 flags += 8;
356 cp++;
358 else if (*cp == 'j')
360 if (sizeof (intmax_t) > sizeof (long))
362 /* intmax_t = long long */
363 flags += 16;
365 else if (sizeof (intmax_t) > sizeof (int))
367 /* intmax_t = long */
368 flags += 8;
370 cp++;
372 else if (*cp == 'z' || *cp == 'Z')
374 /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
375 because the warning facility in gcc-2.95.2 understands
376 only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
377 if (sizeof (size_t) > sizeof (long))
379 /* size_t = long long */
380 flags += 16;
382 else if (sizeof (size_t) > sizeof (int))
384 /* size_t = long */
385 flags += 8;
387 cp++;
389 else if (*cp == 't')
391 if (sizeof (ptrdiff_t) > sizeof (long))
393 /* ptrdiff_t = long long */
394 flags += 16;
396 else if (sizeof (ptrdiff_t) > sizeof (int))
398 /* ptrdiff_t = long */
399 flags += 8;
401 cp++;
403 #if defined __APPLE__ && defined __MACH__
404 /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
405 We cannot change it to "lld" because PRIdMAX must also
406 be understood by the system's printf routines. */
407 else if (*cp == 'q')
409 if (64 / 8 > sizeof (long))
411 /* int64_t = long long */
412 flags += 16;
414 else
416 /* int64_t = long */
417 flags += 8;
419 cp++;
421 #endif
422 #if defined _WIN32 && ! defined __CYGWIN__
423 /* On native Windows, PRIdMAX is defined as "I64d".
424 We cannot change it to "lld" because PRIdMAX must also
425 be understood by the system's printf routines. */
426 else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
428 if (64 / 8 > sizeof (long))
430 /* __int64 = long long */
431 flags += 16;
433 else
435 /* __int64 = long */
436 flags += 8;
438 cp += 3;
440 #endif
441 else
442 break;
445 /* Read the conversion character. */
446 c = *cp++;
447 switch (c)
449 case 'd': case 'i':
450 /* If 'long long' is larger than 'long': */
451 if (flags >= 16 || (flags & 4))
452 type = TYPE_LONGLONGINT;
453 else
454 /* If 'long long' is the same as 'long', we parse "lld" into
455 TYPE_LONGINT. */
456 if (flags >= 8)
457 type = TYPE_LONGINT;
458 else if (flags & 2)
459 type = TYPE_SCHAR;
460 else if (flags & 1)
461 type = TYPE_SHORT;
462 else
463 type = TYPE_INT;
464 break;
465 case 'o': case 'u': case 'x': case 'X':
466 /* If 'unsigned long long' is larger than 'unsigned long': */
467 if (flags >= 16 || (flags & 4))
468 type = TYPE_ULONGLONGINT;
469 else
470 /* If 'unsigned long long' is the same as 'unsigned long', we
471 parse "llu" into TYPE_ULONGINT. */
472 if (flags >= 8)
473 type = TYPE_ULONGINT;
474 else if (flags & 2)
475 type = TYPE_UCHAR;
476 else if (flags & 1)
477 type = TYPE_USHORT;
478 else
479 type = TYPE_UINT;
480 break;
481 case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
482 case 'a': case 'A':
483 if (flags >= 16 || (flags & 4))
484 type = TYPE_LONGDOUBLE;
485 else
486 type = TYPE_DOUBLE;
487 break;
488 case 'c':
489 if (flags >= 8)
490 #if HAVE_WINT_T
491 type = TYPE_WIDE_CHAR;
492 #else
493 goto error;
494 #endif
495 else
496 type = TYPE_CHAR;
497 break;
498 #if HAVE_WINT_T
499 case 'C':
500 type = TYPE_WIDE_CHAR;
501 c = 'c';
502 break;
503 #endif
504 case 's':
505 if (flags >= 8)
506 #if HAVE_WCHAR_T
507 type = TYPE_WIDE_STRING;
508 #else
509 goto error;
510 #endif
511 else
512 type = TYPE_STRING;
513 break;
514 #if HAVE_WCHAR_T
515 case 'S':
516 type = TYPE_WIDE_STRING;
517 c = 's';
518 break;
519 #endif
520 case 'p':
521 type = TYPE_POINTER;
522 break;
523 case 'n':
524 /* If 'long long' is larger than 'long': */
525 if (flags >= 16 || (flags & 4))
526 type = TYPE_COUNT_LONGLONGINT_POINTER;
527 else
528 /* If 'long long' is the same as 'long', we parse "lln" into
529 TYPE_COUNT_LONGINT_POINTER. */
530 if (flags >= 8)
531 type = TYPE_COUNT_LONGINT_POINTER;
532 else if (flags & 2)
533 type = TYPE_COUNT_SCHAR_POINTER;
534 else if (flags & 1)
535 type = TYPE_COUNT_SHORT_POINTER;
536 else
537 type = TYPE_COUNT_INT_POINTER;
538 break;
539 #if ENABLE_UNISTDIO
540 /* The unistdio extensions. */
541 case 'U':
542 if (flags >= 16)
543 type = TYPE_U32_STRING;
544 else if (flags >= 8)
545 type = TYPE_U16_STRING;
546 else
547 type = TYPE_U8_STRING;
548 break;
549 #endif
550 case '%':
551 type = TYPE_NONE;
552 break;
553 default:
554 /* Unknown conversion character. */
555 goto error;
559 if (type != TYPE_NONE)
561 dp->arg_index = arg_index;
562 if (dp->arg_index == ARG_NONE)
564 dp->arg_index = arg_posn++;
565 if (dp->arg_index == ARG_NONE)
566 /* arg_posn wrapped around. */
567 goto error;
569 REGISTER_ARG (dp->arg_index, type);
571 dp->conversion = c;
572 dp->dir_end = cp;
575 d->count++;
576 if (d->count >= d_allocated)
578 size_t memory_size;
579 DIRECTIVE *memory;
581 d_allocated = xtimes (d_allocated, 2);
582 memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
583 if (size_overflow_p (memory_size))
584 /* Overflow, would lead to out of memory. */
585 goto out_of_memory;
586 memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
587 ? realloc (d->dir, memory_size)
588 : malloc (memory_size));
589 if (memory == NULL)
590 /* Out of memory. */
591 goto out_of_memory;
592 if (d->dir == d->direct_alloc_dir)
593 memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
594 d->dir = memory;
597 #if CHAR_T_ONLY_ASCII
598 else if (!c_isascii (c))
600 /* Non-ASCII character. Not supported. */
601 goto error;
603 #endif
605 d->dir[d->count].dir_start = cp;
607 d->max_width_length = max_width_length;
608 d->max_precision_length = max_precision_length;
609 return 0;
611 error:
612 if (a->arg != a->direct_alloc_arg)
613 free (a->arg);
614 if (d->dir != d->direct_alloc_dir)
615 free (d->dir);
616 errno = EINVAL;
617 return -1;
619 out_of_memory:
620 if (a->arg != a->direct_alloc_arg)
621 free (a->arg);
622 if (d->dir != d->direct_alloc_dir)
623 free (d->dir);
624 errno = ENOMEM;
625 return -1;
628 #undef PRINTF_PARSE
629 #undef DIRECTIVES
630 #undef DIRECTIVE
631 #undef CHAR_T_ONLY_ASCII
632 #undef CHAR_T