RISC-V: Fix more splitters accidentally calling gen_reg_rtx.
[official-gcc.git] / gcc / gimple-ssa-sprintf.c
blobb11d7989d5e866cd8cb8f62d41c95657bc91ad79
1 /* Copyright (C) 2016-2019 Free Software Foundation, Inc.
2 Contributed by Martin Sebor <msebor@redhat.com>.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the printf-return-value pass. The pass does
21 two things: 1) it analyzes calls to formatted output functions like
22 sprintf looking for possible buffer overflows and calls to bounded
23 functions like snprintf for early truncation (and under the control
24 of the -Wformat-length option issues warnings), and 2) under the
25 control of the -fprintf-return-value option it folds the return
26 value of safe calls into constants, making it possible to eliminate
27 code that depends on the value of those constants.
29 For all functions (bounded or not) the pass uses the size of the
30 destination object. That means that it will diagnose calls to
31 snprintf not on the basis of the size specified by the function's
32 second argument but rathger on the basis of the size the first
33 argument points to (if possible). For bound-checking built-ins
34 like __builtin___snprintf_chk the pass uses the size typically
35 determined by __builtin_object_size and passed to the built-in
36 by the Glibc inline wrapper.
38 The pass handles all forms standard sprintf format directives,
39 including character, integer, floating point, pointer, and strings,
40 with the standard C flags, widths, and precisions. For integers
41 and strings it computes the length of output itself. For floating
42 point it uses MPFR to fornmat known constants with up and down
43 rounding and uses the resulting range of output lengths. For
44 strings it uses the length of string literals and the sizes of
45 character arrays that a character pointer may point to as a bound
46 on the longest string. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "params.h"
64 #include "tree-cfg.h"
65 #include "tree-ssa-propagate.h"
66 #include "calls.h"
67 #include "cfgloop.h"
68 #include "tree-scalar-evolution.h"
69 #include "tree-ssa-loop.h"
70 #include "intl.h"
71 #include "langhooks.h"
73 #include "attribs.h"
74 #include "builtins.h"
75 #include "stor-layout.h"
77 #include "realmpfr.h"
78 #include "target.h"
80 #include "cpplib.h"
81 #include "input.h"
82 #include "toplev.h"
83 #include "substring-locations.h"
84 #include "diagnostic.h"
85 #include "domwalk.h"
86 #include "alloc-pool.h"
87 #include "vr-values.h"
88 #include "tree-ssa-strlen.h"
90 /* The likely worst case value of MB_LEN_MAX for the target, large enough
91 for UTF-8. Ideally, this would be obtained by a target hook if it were
92 to be used for optimization but it's good enough as is for warnings. */
93 #define target_mb_len_max() 6
95 /* The maximum number of bytes a single non-string directive can result
96 in. This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
97 LDBL_MAX_10_EXP of 4932. */
98 #define IEEE_MAX_10_EXP 4932
99 #define target_dir_max() (target_int_max () + IEEE_MAX_10_EXP + 2)
101 namespace {
103 /* Set to the warning level for the current function which is equal
104 either to warn_format_trunc for bounded functions or to
105 warn_format_overflow otherwise. */
107 static int warn_level;
109 struct call_info;
110 struct format_result;
112 /* The minimum, maximum, likely, and unlikely maximum number of bytes
113 of output either a formatting function or an individual directive
114 can result in. */
116 struct result_range
118 /* The absolute minimum number of bytes. The result of a successful
119 conversion is guaranteed to be no less than this. (An erroneous
120 conversion can be indicated by MIN > HOST_WIDE_INT_MAX.) */
121 unsigned HOST_WIDE_INT min;
122 /* The likely maximum result that is used in diagnostics. In most
123 cases MAX is the same as the worst case UNLIKELY result. */
124 unsigned HOST_WIDE_INT max;
125 /* The likely result used to trigger diagnostics. For conversions
126 that result in a range of bytes [MIN, MAX], LIKELY is somewhere
127 in that range. */
128 unsigned HOST_WIDE_INT likely;
129 /* In rare cases (e.g., for nultibyte characters) UNLIKELY gives
130 the worst cases maximum result of a directive. In most cases
131 UNLIKELY == MAX. UNLIKELY is used to control the return value
132 optimization but not in diagnostics. */
133 unsigned HOST_WIDE_INT unlikely;
136 /* The result of a call to a formatted function. */
138 struct format_result
140 /* Range of characters written by the formatted function.
141 Setting the minimum to HOST_WIDE_INT_MAX disables all
142 length tracking for the remainder of the format string. */
143 result_range range;
145 /* True when the range above is obtained from known values of
146 directive arguments, or bounds on the amount of output such
147 as width and precision, and not the result of heuristics that
148 depend on warning levels. It's used to issue stricter diagnostics
149 in cases where strings of unknown lengths are bounded by the arrays
150 they are determined to refer to. KNOWNRANGE must not be used for
151 the return value optimization. */
152 bool knownrange;
154 /* True if no individual directive could fail or result in more than
155 4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be
156 greater). Implementations are not required to handle directives
157 that produce more than 4K bytes (leading to undefined behavior)
158 and so when one is found it disables the return value optimization.
159 Similarly, directives that can fail (such as wide character
160 directives) disable the optimization. */
161 bool posunder4k;
163 /* True when a floating point directive has been seen in the format
164 string. */
165 bool floating;
167 /* True when an intermediate result has caused a warning. Used to
168 avoid issuing duplicate warnings while finishing the processing
169 of a call. WARNED also disables the return value optimization. */
170 bool warned;
172 /* Preincrement the number of output characters by 1. */
173 format_result& operator++ ()
175 return *this += 1;
178 /* Postincrement the number of output characters by 1. */
179 format_result operator++ (int)
181 format_result prev (*this);
182 *this += 1;
183 return prev;
186 /* Increment the number of output characters by N. */
187 format_result& operator+= (unsigned HOST_WIDE_INT);
190 format_result&
191 format_result::operator+= (unsigned HOST_WIDE_INT n)
193 gcc_assert (n < HOST_WIDE_INT_MAX);
195 if (range.min < HOST_WIDE_INT_MAX)
196 range.min += n;
198 if (range.max < HOST_WIDE_INT_MAX)
199 range.max += n;
201 if (range.likely < HOST_WIDE_INT_MAX)
202 range.likely += n;
204 if (range.unlikely < HOST_WIDE_INT_MAX)
205 range.unlikely += n;
207 return *this;
210 /* Return the value of INT_MIN for the target. */
212 static inline HOST_WIDE_INT
213 target_int_min ()
215 return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
218 /* Return the value of INT_MAX for the target. */
220 static inline unsigned HOST_WIDE_INT
221 target_int_max ()
223 return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
226 /* Return the value of SIZE_MAX for the target. */
228 static inline unsigned HOST_WIDE_INT
229 target_size_max ()
231 return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
234 /* A straightforward mapping from the execution character set to the host
235 character set indexed by execution character. */
237 static char target_to_host_charmap[256];
239 /* Initialize a mapping from the execution character set to the host
240 character set. */
242 static bool
243 init_target_to_host_charmap ()
245 /* If the percent sign is non-zero the mapping has already been
246 initialized. */
247 if (target_to_host_charmap['%'])
248 return true;
250 /* Initialize the target_percent character (done elsewhere). */
251 if (!init_target_chars ())
252 return false;
254 /* The subset of the source character set used by printf conversion
255 specifications (strictly speaking, not all letters are used but
256 they are included here for the sake of simplicity). The dollar
257 sign must be included even though it's not in the basic source
258 character set. */
259 const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
260 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
262 /* Set the mapping for all characters to some ordinary value (i,e.,
263 not none used in printf conversion specifications) and overwrite
264 those that are used by conversion specifications with their
265 corresponding values. */
266 memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
268 /* Are the two sets of characters the same? */
269 bool all_same_p = true;
271 for (const char *pc = srcset; *pc; ++pc)
273 /* Slice off the high end bits in case target characters are
274 signed. All values are expected to be non-nul, otherwise
275 there's a problem. */
276 if (unsigned char tc = lang_hooks.to_target_charset (*pc))
278 target_to_host_charmap[tc] = *pc;
279 if (tc != *pc)
280 all_same_p = false;
282 else
283 return false;
287 /* Set the first element to a non-zero value if the mapping
288 is 1-to-1, otherwise leave it clear (NUL is assumed to be
289 the same in both character sets). */
290 target_to_host_charmap[0] = all_same_p;
292 return true;
295 /* Return the host source character corresponding to the character
296 CH in the execution character set if one exists, or some innocuous
297 (non-special, non-nul) source character otherwise. */
299 static inline unsigned char
300 target_to_host (unsigned char ch)
302 return target_to_host_charmap[ch];
305 /* Convert an initial substring of the string TARGSTR consisting of
306 characters in the execution character set into a string in the
307 source character set on the host and store up to HOSTSZ characters
308 in the buffer pointed to by HOSTR. Return HOSTR. */
310 static const char*
311 target_to_host (char *hostr, size_t hostsz, const char *targstr)
313 /* Make sure the buffer is reasonably big. */
314 gcc_assert (hostsz > 4);
316 /* The interesting subset of source and execution characters are
317 the same so no conversion is necessary. However, truncate
318 overlong strings just like the translated strings are. */
319 if (target_to_host_charmap['\0'] == 1)
321 size_t len = strlen (targstr);
322 if (len >= hostsz)
324 memcpy (hostr, targstr, hostsz - 4);
325 strcpy (hostr + hostsz - 4, "...");
327 else
328 memcpy (hostr, targstr, len + 1);
329 return hostr;
332 /* Convert the initial substring of TARGSTR to the corresponding
333 characters in the host set, appending "..." if TARGSTR is too
334 long to fit. Using the static buffer assumes the function is
335 not called in between sequence points (which it isn't). */
336 for (char *ph = hostr; ; ++targstr)
338 *ph++ = target_to_host (*targstr);
339 if (!*targstr)
340 break;
342 if (size_t (ph - hostr) == hostsz)
344 strcpy (ph - 4, "...");
345 break;
349 return hostr;
352 /* Convert the sequence of decimal digits in the execution character
353 starting at *PS to a HOST_WIDE_INT, analogously to strtol. Return
354 the result and set *PS to one past the last converted character.
355 On range error set ERANGE to the digit that caused it. */
357 static inline HOST_WIDE_INT
358 target_strtowi (const char **ps, const char **erange)
360 unsigned HOST_WIDE_INT val = 0;
361 for ( ; ; ++*ps)
363 unsigned char c = target_to_host (**ps);
364 if (ISDIGIT (c))
366 c -= '0';
368 /* Check for overflow. */
369 if (val > ((unsigned HOST_WIDE_INT) HOST_WIDE_INT_MAX - c) / 10LU)
371 val = HOST_WIDE_INT_MAX;
372 *erange = *ps;
374 /* Skip the remaining digits. */
376 c = target_to_host (*++*ps);
377 while (ISDIGIT (c));
378 break;
380 else
381 val = val * 10 + c;
383 else
384 break;
387 return val;
390 /* Given FORMAT, set *PLOC to the source location of the format string
391 and return the format string if it is known or null otherwise. */
393 static const char*
394 get_format_string (tree format, location_t *ploc)
396 *ploc = EXPR_LOC_OR_LOC (format, input_location);
398 return c_getstr (format);
401 /* For convenience and brevity, shorter named entrypoints of
402 format_string_diagnostic_t::emit_warning_va and
403 format_string_diagnostic_t::emit_warning_n_va.
404 These have to be functions with the attribute so that exgettext
405 works properly. */
407 static bool
408 ATTRIBUTE_GCC_DIAG (5, 6)
409 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
410 const char *corrected_substring, int opt, const char *gmsgid, ...)
412 format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
413 corrected_substring);
414 va_list ap;
415 va_start (ap, gmsgid);
416 bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
417 va_end (ap);
419 return warned;
422 static bool
423 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
424 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
425 const char *corrected_substring, int opt, unsigned HOST_WIDE_INT n,
426 const char *singular_gmsgid, const char *plural_gmsgid, ...)
428 format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
429 corrected_substring);
430 va_list ap;
431 va_start (ap, plural_gmsgid);
432 bool warned = diag.emit_warning_n_va (opt, n, singular_gmsgid, plural_gmsgid,
433 &ap);
434 va_end (ap);
436 return warned;
439 /* Format length modifiers. */
441 enum format_lengths
443 FMT_LEN_none,
444 FMT_LEN_hh, // char argument
445 FMT_LEN_h, // short
446 FMT_LEN_l, // long
447 FMT_LEN_ll, // long long
448 FMT_LEN_L, // long double (and GNU long long)
449 FMT_LEN_z, // size_t
450 FMT_LEN_t, // ptrdiff_t
451 FMT_LEN_j // intmax_t
455 /* Description of the result of conversion either of a single directive
456 or the whole format string. */
458 class fmtresult
460 public:
461 /* Construct a FMTRESULT object with all counters initialized
462 to MIN. KNOWNRANGE is set when MIN is valid. */
463 fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
464 : argmin (), argmax (), nonstr (),
465 knownrange (min < HOST_WIDE_INT_MAX),
466 mayfail (), nullp ()
468 range.min = min;
469 range.max = min;
470 range.likely = min;
471 range.unlikely = min;
474 /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
475 KNOWNRANGE is set when both MIN and MAX are valid. */
476 fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
477 unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
478 : argmin (), argmax (), nonstr (),
479 knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
480 mayfail (), nullp ()
482 range.min = min;
483 range.max = max;
484 range.likely = max < likely ? min : likely;
485 range.unlikely = max;
488 /* Adjust result upward to reflect the RANGE of values the specified
489 width or precision is known to be in. */
490 fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
491 tree = NULL_TREE,
492 unsigned = 0, unsigned = 0);
494 /* Return the maximum number of decimal digits a value of TYPE
495 formats as on output. */
496 static unsigned type_max_digits (tree, int);
498 /* The range a directive's argument is in. */
499 tree argmin, argmax;
501 /* The minimum and maximum number of bytes that a directive
502 results in on output for an argument in the range above. */
503 result_range range;
505 /* Non-nul when the argument of a string directive is not a nul
506 terminated string. */
507 tree nonstr;
509 /* True when the range above is obtained from a known value of
510 a directive's argument or its bounds and not the result of
511 heuristics that depend on warning levels. */
512 bool knownrange;
514 /* True for a directive that may fail (such as wide character
515 directives). */
516 bool mayfail;
518 /* True when the argument is a null pointer. */
519 bool nullp;
522 /* Adjust result upward to reflect the range ADJUST of values the
523 specified width or precision is known to be in. When non-null,
524 TYPE denotes the type of the directive whose result is being
525 adjusted, BASE gives the base of the directive (octal, decimal,
526 or hex), and ADJ denotes the additional adjustment to the LIKELY
527 counter that may need to be added when ADJUST is a range. */
529 fmtresult&
530 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
531 tree type /* = NULL_TREE */,
532 unsigned base /* = 0 */,
533 unsigned adj /* = 0 */)
535 bool minadjusted = false;
537 /* Adjust the minimum and likely counters. */
538 if (adjust[0] >= 0)
540 if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
542 range.min = adjust[0];
543 minadjusted = true;
546 /* Adjust the likely counter. */
547 if (range.likely < range.min)
548 range.likely = range.min;
550 else if (adjust[0] == target_int_min ()
551 && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
552 knownrange = false;
554 /* Adjust the maximum counter. */
555 if (adjust[1] > 0)
557 if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
559 range.max = adjust[1];
561 /* Set KNOWNRANGE if both the minimum and maximum have been
562 adjusted. Otherwise leave it at what it was before. */
563 knownrange = minadjusted;
567 if (warn_level > 1 && type)
569 /* For large non-constant width or precision whose range spans
570 the maximum number of digits produced by the directive for
571 any argument, set the likely number of bytes to be at most
572 the number digits plus other adjustment determined by the
573 caller (one for sign or two for the hexadecimal "0x"
574 prefix). */
575 unsigned dirdigs = type_max_digits (type, base);
576 if (adjust[0] < dirdigs && dirdigs < adjust[1]
577 && range.likely < dirdigs)
578 range.likely = dirdigs + adj;
580 else if (range.likely < (range.min ? range.min : 1))
582 /* Conservatively, set LIKELY to at least MIN but no less than
583 1 unless MAX is zero. */
584 range.likely = (range.min
585 ? range.min
586 : range.max && (range.max < HOST_WIDE_INT_MAX
587 || warn_level > 1) ? 1 : 0);
590 /* Finally adjust the unlikely counter to be at least as large as
591 the maximum. */
592 if (range.unlikely < range.max)
593 range.unlikely = range.max;
595 return *this;
598 /* Return the maximum number of digits a value of TYPE formats in
599 BASE on output, not counting base prefix . */
601 unsigned
602 fmtresult::type_max_digits (tree type, int base)
604 unsigned prec = TYPE_PRECISION (type);
605 switch (base)
607 case 8:
608 return (prec + 2) / 3;
609 case 10:
610 /* Decimal approximation: yields 3, 5, 10, and 20 for precision
611 of 8, 16, 32, and 64 bits. */
612 return prec * 301 / 1000 + 1;
613 case 16:
614 return prec / 4;
617 gcc_unreachable ();
620 static bool
621 get_int_range (tree, HOST_WIDE_INT *, HOST_WIDE_INT *, bool, HOST_WIDE_INT,
622 const vr_values *);
624 /* Description of a format directive. A directive is either a plain
625 string or a conversion specification that starts with '%'. */
627 struct directive
629 /* The 1-based directive number (for debugging). */
630 unsigned dirno;
632 /* The first character of the directive and its length. */
633 const char *beg;
634 size_t len;
636 /* A bitmap of flags, one for each character. */
637 unsigned flags[256 / sizeof (int)];
639 /* The range of values of the specified width, or -1 if not specified. */
640 HOST_WIDE_INT width[2];
641 /* The range of values of the specified precision, or -1 if not
642 specified. */
643 HOST_WIDE_INT prec[2];
645 /* Length modifier. */
646 format_lengths modifier;
648 /* Format specifier character. */
649 char specifier;
651 /* The argument of the directive or null when the directive doesn't
652 take one or when none is available (such as for vararg functions). */
653 tree arg;
655 /* Format conversion function that given a directive and an argument
656 returns the formatting result. */
657 fmtresult (*fmtfunc) (const directive &, tree, const vr_values *);
659 /* Return True when a the format flag CHR has been used. */
660 bool get_flag (char chr) const
662 unsigned char c = chr & 0xff;
663 return (flags[c / (CHAR_BIT * sizeof *flags)]
664 & (1U << (c % (CHAR_BIT * sizeof *flags))));
667 /* Make a record of the format flag CHR having been used. */
668 void set_flag (char chr)
670 unsigned char c = chr & 0xff;
671 flags[c / (CHAR_BIT * sizeof *flags)]
672 |= (1U << (c % (CHAR_BIT * sizeof *flags)));
675 /* Reset the format flag CHR. */
676 void clear_flag (char chr)
678 unsigned char c = chr & 0xff;
679 flags[c / (CHAR_BIT * sizeof *flags)]
680 &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
683 /* Set both bounds of the width range to VAL. */
684 void set_width (HOST_WIDE_INT val)
686 width[0] = width[1] = val;
689 /* Set the width range according to ARG, with both bounds being
690 no less than 0. For a constant ARG set both bounds to its value
691 or 0, whichever is greater. For a non-constant ARG in some range
692 set width to its range adjusting each bound to -1 if it's less.
693 For an indeterminate ARG set width to [0, INT_MAX]. */
694 void set_width (tree arg, const vr_values *vr)
696 get_int_range (arg, width, width + 1, true, 0, vr);
699 /* Set both bounds of the precision range to VAL. */
700 void set_precision (HOST_WIDE_INT val)
702 prec[0] = prec[1] = val;
705 /* Set the precision range according to ARG, with both bounds being
706 no less than -1. For a constant ARG set both bounds to its value
707 or -1 whichever is greater. For a non-constant ARG in some range
708 set precision to its range adjusting each bound to -1 if it's less.
709 For an indeterminate ARG set precision to [-1, INT_MAX]. */
710 void set_precision (tree arg, const vr_values *vr)
712 get_int_range (arg, prec, prec + 1, false, -1, vr);
715 /* Return true if both width and precision are known to be
716 either constant or in some range, false otherwise. */
717 bool known_width_and_precision () const
719 return ((width[1] < 0
720 || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
721 && (prec[1] < 0
722 || (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
726 /* Return the logarithm of X in BASE. */
728 static int
729 ilog (unsigned HOST_WIDE_INT x, int base)
731 int res = 0;
734 ++res;
735 x /= base;
736 } while (x);
737 return res;
740 /* Return the number of bytes resulting from converting into a string
741 the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
742 PLUS indicates whether 1 for a plus sign should be added for positive
743 numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
744 ('0x') prefix should be added for nonzero numbers. Return -1 if X cannot
745 be represented. */
747 static HOST_WIDE_INT
748 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
750 unsigned HOST_WIDE_INT absval;
752 HOST_WIDE_INT res;
754 if (TYPE_UNSIGNED (TREE_TYPE (x)))
756 if (tree_fits_uhwi_p (x))
758 absval = tree_to_uhwi (x);
759 res = plus;
761 else
762 return -1;
764 else
766 if (tree_fits_shwi_p (x))
768 HOST_WIDE_INT i = tree_to_shwi (x);
769 if (HOST_WIDE_INT_MIN == i)
771 /* Avoid undefined behavior due to negating a minimum. */
772 absval = HOST_WIDE_INT_MAX;
773 res = 1;
775 else if (i < 0)
777 absval = -i;
778 res = 1;
780 else
782 absval = i;
783 res = plus;
786 else
787 return -1;
790 int ndigs = ilog (absval, base);
792 res += prec < ndigs ? ndigs : prec;
794 /* Adjust a non-zero value for the base prefix, either hexadecimal,
795 or, unless precision has resulted in a leading zero, also octal. */
796 if (prefix && absval && (base == 16 || prec <= ndigs))
798 if (base == 8)
799 res += 1;
800 else if (base == 16)
801 res += 2;
804 return res;
807 /* Given the formatting result described by RES and NAVAIL, the number
808 of available in the destination, return the range of bytes remaining
809 in the destination. */
811 static inline result_range
812 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
814 result_range range;
816 if (HOST_WIDE_INT_MAX <= navail)
818 range.min = range.max = range.likely = range.unlikely = navail;
819 return range;
822 /* The lower bound of the available range is the available size
823 minus the maximum output size, and the upper bound is the size
824 minus the minimum. */
825 range.max = res.range.min < navail ? navail - res.range.min : 0;
827 range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
829 if (res.range.max < HOST_WIDE_INT_MAX)
830 range.min = res.range.max < navail ? navail - res.range.max : 0;
831 else
832 range.min = range.likely;
834 range.unlikely = (res.range.unlikely < navail
835 ? navail - res.range.unlikely : 0);
837 return range;
840 /* Description of a call to a formatted function. */
842 struct call_info
844 /* Function call statement. */
845 gimple *callstmt;
847 /* Function called. */
848 tree func;
850 /* Called built-in function code. */
851 built_in_function fncode;
853 /* Format argument and format string extracted from it. */
854 tree format;
855 const char *fmtstr;
857 /* The location of the format argument. */
858 location_t fmtloc;
860 /* The destination object size for __builtin___xxx_chk functions
861 typically determined by __builtin_object_size, or -1 if unknown. */
862 unsigned HOST_WIDE_INT objsize;
864 /* Number of the first variable argument. */
865 unsigned HOST_WIDE_INT argidx;
867 /* True for functions like snprintf that specify the size of
868 the destination, false for others like sprintf that don't. */
869 bool bounded;
871 /* True for bounded functions like snprintf that specify a zero-size
872 buffer as a request to compute the size of output without actually
873 writing any. NOWRITE is cleared in response to the %n directive
874 which has side-effects similar to writing output. */
875 bool nowrite;
877 /* Return true if the called function's return value is used. */
878 bool retval_used () const
880 return gimple_get_lhs (callstmt);
883 /* Return the warning option corresponding to the called function. */
884 int warnopt () const
886 return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
889 /* Return true for calls to file formatted functions. */
890 bool is_file_func () const
892 return (fncode == BUILT_IN_FPRINTF
893 || fncode == BUILT_IN_FPRINTF_CHK
894 || fncode == BUILT_IN_FPRINTF_UNLOCKED
895 || fncode == BUILT_IN_VFPRINTF
896 || fncode == BUILT_IN_VFPRINTF_CHK);
899 /* Return true for calls to string formatted functions. */
900 bool is_string_func () const
902 return (fncode == BUILT_IN_SPRINTF
903 || fncode == BUILT_IN_SPRINTF_CHK
904 || fncode == BUILT_IN_SNPRINTF
905 || fncode == BUILT_IN_SNPRINTF_CHK
906 || fncode == BUILT_IN_VSPRINTF
907 || fncode == BUILT_IN_VSPRINTF_CHK
908 || fncode == BUILT_IN_VSNPRINTF
909 || fncode == BUILT_IN_VSNPRINTF_CHK);
913 /* Return the result of formatting a no-op directive (such as '%n'). */
915 static fmtresult
916 format_none (const directive &, tree, const vr_values *)
918 fmtresult res (0);
919 return res;
922 /* Return the result of formatting the '%%' directive. */
924 static fmtresult
925 format_percent (const directive &, tree, const vr_values *)
927 fmtresult res (1);
928 return res;
932 /* Compute intmax_type_node and uintmax_type_node similarly to how
933 tree.c builds size_type_node. */
935 static void
936 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
938 if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
940 *pintmax = integer_type_node;
941 *puintmax = unsigned_type_node;
943 else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
945 *pintmax = long_integer_type_node;
946 *puintmax = long_unsigned_type_node;
948 else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
950 *pintmax = long_long_integer_type_node;
951 *puintmax = long_long_unsigned_type_node;
953 else
955 for (int i = 0; i < NUM_INT_N_ENTS; i++)
956 if (int_n_enabled_p[i])
958 char name[50], altname[50];
959 sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
960 sprintf (altname, "__int%d__ unsigned", int_n_data[i].bitsize);
962 if (strcmp (name, UINTMAX_TYPE) == 0
963 || strcmp (altname, UINTMAX_TYPE) == 0)
965 *pintmax = int_n_trees[i].signed_type;
966 *puintmax = int_n_trees[i].unsigned_type;
967 return;
970 gcc_unreachable ();
974 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
975 in and that is representable in type int.
976 Return true when the range is a subrange of that of int.
977 When ARG is null it is as if it had the full range of int.
978 When ABSOLUTE is true the range reflects the absolute value of
979 the argument. When ABSOLUTE is false, negative bounds of
980 the determined range are replaced with NEGBOUND. */
982 static bool
983 get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
984 bool absolute, HOST_WIDE_INT negbound,
985 const class vr_values *vr_values)
987 /* The type of the result. */
988 const_tree type = integer_type_node;
990 bool knownrange = false;
992 if (!arg)
994 *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
995 *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
997 else if (TREE_CODE (arg) == INTEGER_CST
998 && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1000 /* For a constant argument return its value adjusted as specified
1001 by NEGATIVE and NEGBOUND and return true to indicate that the
1002 result is known. */
1003 *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1004 *pmax = *pmin;
1005 knownrange = true;
1007 else
1009 /* True if the argument's range cannot be determined. */
1010 bool unknown = true;
1012 tree argtype = TREE_TYPE (arg);
1014 /* Ignore invalid arguments with greater precision that that
1015 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1016 They will have been detected and diagnosed by -Wformat and
1017 so it's not important to complicate this code to try to deal
1018 with them again. */
1019 if (TREE_CODE (arg) == SSA_NAME
1020 && INTEGRAL_TYPE_P (argtype)
1021 && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1023 /* Try to determine the range of values of the integer argument. */
1024 const value_range *vr
1025 = CONST_CAST (class vr_values *, vr_values)->get_value_range (arg);
1027 if (range_int_cst_p (vr))
1029 HOST_WIDE_INT type_min
1030 = (TYPE_UNSIGNED (argtype)
1031 ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1032 : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1034 HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1036 *pmin = TREE_INT_CST_LOW (vr->min ());
1037 *pmax = TREE_INT_CST_LOW (vr->max ());
1039 if (*pmin < *pmax)
1041 /* Return true if the adjusted range is a subrange of
1042 the full range of the argument's type. *PMAX may
1043 be less than *PMIN when the argument is unsigned
1044 and its upper bound is in excess of TYPE_MAX. In
1045 that (invalid) case disregard the range and use that
1046 of the expected type instead. */
1047 knownrange = type_min < *pmin || *pmax < type_max;
1049 unknown = false;
1054 /* Handle an argument with an unknown range as if none had been
1055 provided. */
1056 if (unknown)
1057 return get_int_range (NULL_TREE, pmin, pmax, absolute,
1058 negbound, vr_values);
1061 /* Adjust each bound as specified by ABSOLUTE and NEGBOUND. */
1062 if (absolute)
1064 if (*pmin < 0)
1066 if (*pmin == *pmax)
1067 *pmin = *pmax = -*pmin;
1068 else
1070 /* Make sure signed overlow is avoided. */
1071 gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1073 HOST_WIDE_INT tmp = -*pmin;
1074 *pmin = 0;
1075 if (*pmax < tmp)
1076 *pmax = tmp;
1080 else if (*pmin < negbound)
1081 *pmin = negbound;
1083 return knownrange;
1086 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1087 argument, due to the conversion from either *ARGMIN or *ARGMAX to
1088 the type of the directive's formal argument it's possible for both
1089 to result in the same number of bytes or a range of bytes that's
1090 less than the number of bytes that would result from formatting
1091 some other value in the range [*ARGMIN, *ARGMAX]. This can be
1092 determined by checking for the actual argument being in the range
1093 of the type of the directive. If it isn't it must be assumed to
1094 take on the full range of the directive's type.
1095 Return true when the range has been adjusted to the full range
1096 of DIRTYPE, and false otherwise. */
1098 static bool
1099 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1101 tree argtype = TREE_TYPE (*argmin);
1102 unsigned argprec = TYPE_PRECISION (argtype);
1103 unsigned dirprec = TYPE_PRECISION (dirtype);
1105 /* If the actual argument and the directive's argument have the same
1106 precision and sign there can be no overflow and so there is nothing
1107 to adjust. */
1108 if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1109 return false;
1111 /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1112 branch in the extract_range_from_unary_expr function in tree-vrp.c. */
1114 if (TREE_CODE (*argmin) == INTEGER_CST
1115 && TREE_CODE (*argmax) == INTEGER_CST
1116 && (dirprec >= argprec
1117 || integer_zerop (int_const_binop (RSHIFT_EXPR,
1118 int_const_binop (MINUS_EXPR,
1119 *argmax,
1120 *argmin),
1121 size_int (dirprec)))))
1123 *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1124 *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1126 /* If *ARGMIN is still less than *ARGMAX the conversion above
1127 is safe. Otherwise, it has overflowed and would be unsafe. */
1128 if (tree_int_cst_le (*argmin, *argmax))
1129 return false;
1132 *argmin = TYPE_MIN_VALUE (dirtype);
1133 *argmax = TYPE_MAX_VALUE (dirtype);
1134 return true;
1137 /* Return a range representing the minimum and maximum number of bytes
1138 that the format directive DIR will output for any argument given
1139 the WIDTH and PRECISION (extracted from DIR). This function is
1140 used when the directive argument or its value isn't known. */
1142 static fmtresult
1143 format_integer (const directive &dir, tree arg, const vr_values *vr_values)
1145 tree intmax_type_node;
1146 tree uintmax_type_node;
1148 /* Base to format the number in. */
1149 int base;
1151 /* True when a conversion is preceded by a prefix indicating the base
1152 of the argument (octal or hexadecimal). */
1153 bool maybebase = dir.get_flag ('#');
1155 /* True when a signed conversion is preceded by a sign or space. */
1156 bool maybesign = false;
1158 /* True for signed conversions (i.e., 'd' and 'i'). */
1159 bool sign = false;
1161 switch (dir.specifier)
1163 case 'd':
1164 case 'i':
1165 /* Space and '+' are only meaningful for signed conversions. */
1166 maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1167 sign = true;
1168 base = 10;
1169 break;
1170 case 'u':
1171 base = 10;
1172 break;
1173 case 'o':
1174 base = 8;
1175 break;
1176 case 'X':
1177 case 'x':
1178 base = 16;
1179 break;
1180 default:
1181 gcc_unreachable ();
1184 /* The type of the "formal" argument expected by the directive. */
1185 tree dirtype = NULL_TREE;
1187 /* Determine the expected type of the argument from the length
1188 modifier. */
1189 switch (dir.modifier)
1191 case FMT_LEN_none:
1192 if (dir.specifier == 'p')
1193 dirtype = ptr_type_node;
1194 else
1195 dirtype = sign ? integer_type_node : unsigned_type_node;
1196 break;
1198 case FMT_LEN_h:
1199 dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1200 break;
1202 case FMT_LEN_hh:
1203 dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1204 break;
1206 case FMT_LEN_l:
1207 dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1208 break;
1210 case FMT_LEN_L:
1211 case FMT_LEN_ll:
1212 dirtype = (sign
1213 ? long_long_integer_type_node
1214 : long_long_unsigned_type_node);
1215 break;
1217 case FMT_LEN_z:
1218 dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1219 break;
1221 case FMT_LEN_t:
1222 dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1223 break;
1225 case FMT_LEN_j:
1226 build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1227 dirtype = sign ? intmax_type_node : uintmax_type_node;
1228 break;
1230 default:
1231 return fmtresult ();
1234 /* The type of the argument to the directive, either deduced from
1235 the actual non-constant argument if one is known, or from
1236 the directive itself when none has been provided because it's
1237 a va_list. */
1238 tree argtype = NULL_TREE;
1240 if (!arg)
1242 /* When the argument has not been provided, use the type of
1243 the directive's argument as an approximation. This will
1244 result in false positives for directives like %i with
1245 arguments with smaller precision (such as short or char). */
1246 argtype = dirtype;
1248 else if (TREE_CODE (arg) == INTEGER_CST)
1250 /* When a constant argument has been provided use its value
1251 rather than type to determine the length of the output. */
1252 fmtresult res;
1254 if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1256 /* As a special case, a precision of zero with a zero argument
1257 results in zero bytes except in base 8 when the '#' flag is
1258 specified, and for signed conversions in base 8 and 10 when
1259 either the space or '+' flag has been specified and it results
1260 in just one byte (with width having the normal effect). This
1261 must extend to the case of a specified precision with
1262 an unknown value because it can be zero. */
1263 res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1264 if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1266 res.range.max = 1;
1267 res.range.likely = 1;
1269 else
1271 res.range.max = res.range.min;
1272 res.range.likely = res.range.min;
1275 else
1277 /* Convert the argument to the type of the directive. */
1278 arg = fold_convert (dirtype, arg);
1280 res.range.min = tree_digits (arg, base, dir.prec[0],
1281 maybesign, maybebase);
1282 if (dir.prec[0] == dir.prec[1])
1283 res.range.max = res.range.min;
1284 else
1285 res.range.max = tree_digits (arg, base, dir.prec[1],
1286 maybesign, maybebase);
1287 res.range.likely = res.range.min;
1288 res.knownrange = true;
1291 res.range.unlikely = res.range.max;
1293 /* Bump up the counters if WIDTH is greater than LEN. */
1294 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1295 (sign | maybebase) + (base == 16));
1296 /* Bump up the counters again if PRECision is greater still. */
1297 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1298 (sign | maybebase) + (base == 16));
1300 return res;
1302 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1303 || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1304 /* Determine the type of the provided non-constant argument. */
1305 argtype = TREE_TYPE (arg);
1306 else
1307 /* Don't bother with invalid arguments since they likely would
1308 have already been diagnosed, and disable any further checking
1309 of the format string by returning [-1, -1]. */
1310 return fmtresult ();
1312 fmtresult res;
1314 /* Using either the range the non-constant argument is in, or its
1315 type (either "formal" or actual), create a range of values that
1316 constrain the length of output given the warning level. */
1317 tree argmin = NULL_TREE;
1318 tree argmax = NULL_TREE;
1320 if (arg
1321 && TREE_CODE (arg) == SSA_NAME
1322 && INTEGRAL_TYPE_P (argtype))
1324 /* Try to determine the range of values of the integer argument
1325 (range information is not available for pointers). */
1326 const value_range *vr
1327 = CONST_CAST (class vr_values *, vr_values)->get_value_range (arg);
1329 if (range_int_cst_p (vr))
1331 argmin = vr->min ();
1332 argmax = vr->max ();
1334 /* Set KNOWNRANGE if the argument is in a known subrange
1335 of the directive's type and neither width nor precision
1336 is unknown. (KNOWNRANGE may be reset below). */
1337 res.knownrange
1338 = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1339 || !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1340 && dir.known_width_and_precision ());
1342 res.argmin = argmin;
1343 res.argmax = argmax;
1345 else if (vr->kind () == VR_ANTI_RANGE)
1347 /* Handle anti-ranges if/when bug 71690 is resolved. */
1349 else if (vr->varying_p () || vr->undefined_p ())
1351 /* The argument here may be the result of promoting the actual
1352 argument to int. Try to determine the type of the actual
1353 argument before promotion and narrow down its range that
1354 way. */
1355 gimple *def = SSA_NAME_DEF_STMT (arg);
1356 if (is_gimple_assign (def))
1358 tree_code code = gimple_assign_rhs_code (def);
1359 if (code == INTEGER_CST)
1361 arg = gimple_assign_rhs1 (def);
1362 return format_integer (dir, arg, vr_values);
1365 if (code == NOP_EXPR)
1367 tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1368 if (INTEGRAL_TYPE_P (type)
1369 || TREE_CODE (type) == POINTER_TYPE)
1370 argtype = type;
1376 if (!argmin)
1378 if (TREE_CODE (argtype) == POINTER_TYPE)
1380 argmin = build_int_cst (pointer_sized_int_node, 0);
1381 argmax = build_all_ones_cst (pointer_sized_int_node);
1383 else
1385 argmin = TYPE_MIN_VALUE (argtype);
1386 argmax = TYPE_MAX_VALUE (argtype);
1390 /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1391 of the directive. If it has been cleared then since ARGMIN and/or
1392 ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1393 ARGMAX in the result to include in diagnostics. */
1394 if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1396 res.knownrange = false;
1397 res.argmin = argmin;
1398 res.argmax = argmax;
1401 /* Recursively compute the minimum and maximum from the known range. */
1402 if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1404 /* For unsigned conversions/directives or signed when
1405 the minimum is positive, use the minimum and maximum to compute
1406 the shortest and longest output, respectively. */
1407 res.range.min = format_integer (dir, argmin, vr_values).range.min;
1408 res.range.max = format_integer (dir, argmax, vr_values).range.max;
1410 else if (tree_int_cst_sgn (argmax) < 0)
1412 /* For signed conversions/directives if maximum is negative,
1413 use the minimum as the longest output and maximum as the
1414 shortest output. */
1415 res.range.min = format_integer (dir, argmax, vr_values).range.min;
1416 res.range.max = format_integer (dir, argmin, vr_values).range.max;
1418 else
1420 /* Otherwise, 0 is inside of the range and minimum negative. Use 0
1421 as the shortest output and for the longest output compute the
1422 length of the output of both minimum and maximum and pick the
1423 longer. */
1424 unsigned HOST_WIDE_INT max1
1425 = format_integer (dir, argmin, vr_values).range.max;
1426 unsigned HOST_WIDE_INT max2
1427 = format_integer (dir, argmax, vr_values).range.max;
1428 res.range.min
1429 = format_integer (dir, integer_zero_node, vr_values).range.min;
1430 res.range.max = MAX (max1, max2);
1433 /* If the range is known, use the maximum as the likely length. */
1434 if (res.knownrange)
1435 res.range.likely = res.range.max;
1436 else
1438 /* Otherwise, use the minimum. Except for the case where for %#x or
1439 %#o the minimum is just for a single value in the range (0) and
1440 for all other values it is something longer, like 0x1 or 01.
1441 Use the length for value 1 in that case instead as the likely
1442 length. */
1443 res.range.likely = res.range.min;
1444 if (maybebase
1445 && base != 10
1446 && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1448 if (res.range.min == 1)
1449 res.range.likely += base == 8 ? 1 : 2;
1450 else if (res.range.min == 2
1451 && base == 16
1452 && (dir.width[0] == 2 || dir.prec[0] == 2))
1453 ++res.range.likely;
1457 res.range.unlikely = res.range.max;
1458 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1459 (sign | maybebase) + (base == 16));
1460 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1461 (sign | maybebase) + (base == 16));
1463 return res;
1466 /* Return the number of bytes that a format directive consisting of FLAGS,
1467 PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1468 would result for argument X under ideal conditions (i.e., if PREC
1469 weren't excessive). MPFR 3.1 allocates large amounts of memory for
1470 values of PREC with large magnitude and can fail (see MPFR bug #21056).
1471 This function works around those problems. */
1473 static unsigned HOST_WIDE_INT
1474 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1475 char spec, char rndspec)
1477 char fmtstr[40];
1479 HOST_WIDE_INT len = strlen (flags);
1481 fmtstr[0] = '%';
1482 memcpy (fmtstr + 1, flags, len);
1483 memcpy (fmtstr + 1 + len, ".*R", 3);
1484 fmtstr[len + 4] = rndspec;
1485 fmtstr[len + 5] = spec;
1486 fmtstr[len + 6] = '\0';
1488 spec = TOUPPER (spec);
1489 if (spec == 'E' || spec == 'F')
1491 /* For %e, specify the precision explicitly since mpfr_sprintf
1492 does its own thing just to be different (see MPFR bug 21088). */
1493 if (prec < 0)
1494 prec = 6;
1496 else
1498 /* Avoid passing negative precisions with larger magnitude to MPFR
1499 to avoid exposing its bugs. (A negative precision is supposed
1500 to be ignored.) */
1501 if (prec < 0)
1502 prec = -1;
1505 HOST_WIDE_INT p = prec;
1507 if (spec == 'G' && !strchr (flags, '#'))
1509 /* For G/g without the pound flag, precision gives the maximum number
1510 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1511 a 128 bit IEEE extended precision, 4932. Using twice as much here
1512 should be more than sufficient for any real format. */
1513 if ((IEEE_MAX_10_EXP * 2) < prec)
1514 prec = IEEE_MAX_10_EXP * 2;
1515 p = prec;
1517 else
1519 /* Cap precision arbitrarily at 1KB and add the difference
1520 (if any) to the MPFR result. */
1521 if (prec > 1024)
1522 p = 1024;
1525 len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1527 /* Handle the unlikely (impossible?) error by returning more than
1528 the maximum dictated by the function's return type. */
1529 if (len < 0)
1530 return target_dir_max () + 1;
1532 /* Adjust the return value by the difference. */
1533 if (p < prec)
1534 len += prec - p;
1536 return len;
1539 /* Return the number of bytes to format using the format specifier
1540 SPEC and the precision PREC the largest value in the real floating
1541 TYPE. */
1543 static unsigned HOST_WIDE_INT
1544 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1546 machine_mode mode = TYPE_MODE (type);
1548 /* IBM Extended mode. */
1549 if (MODE_COMPOSITE_P (mode))
1550 mode = DFmode;
1552 /* Get the real type format desription for the target. */
1553 const real_format *rfmt = REAL_MODE_FORMAT (mode);
1554 REAL_VALUE_TYPE rv;
1556 real_maxval (&rv, 0, mode);
1558 /* Convert the GCC real value representation with the precision
1559 of the real type to the mpfr_t format with the GCC default
1560 round-to-nearest mode. */
1561 mpfr_t x;
1562 mpfr_init2 (x, rfmt->p);
1563 mpfr_from_real (x, &rv, GMP_RNDN);
1565 /* Return a value one greater to account for the leading minus sign. */
1566 unsigned HOST_WIDE_INT r
1567 = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1568 mpfr_clear (x);
1569 return r;
1572 /* Return a range representing the minimum and maximum number of bytes
1573 that the directive DIR will output for any argument. PREC gives
1574 the adjusted precision range to account for negative precisions
1575 meaning the default 6. This function is used when the directive
1576 argument or its value isn't known. */
1578 static fmtresult
1579 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1581 tree type;
1583 switch (dir.modifier)
1585 case FMT_LEN_l:
1586 case FMT_LEN_none:
1587 type = double_type_node;
1588 break;
1590 case FMT_LEN_L:
1591 type = long_double_type_node;
1592 break;
1594 case FMT_LEN_ll:
1595 type = long_double_type_node;
1596 break;
1598 default:
1599 return fmtresult ();
1602 /* The minimum and maximum number of bytes produced by the directive. */
1603 fmtresult res;
1605 /* The minimum output as determined by flags. It's always at least 1.
1606 When plus or space are set the output is preceded by either a sign
1607 or a space. */
1608 unsigned flagmin = (1 /* for the first digit */
1609 + (dir.get_flag ('+') | dir.get_flag (' ')));
1611 /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1612 for the plus sign/space with the '+' and ' ' flags, respectively,
1613 unless reduced below. */
1614 res.range.min = 2 + flagmin;
1616 /* When the pound flag is set the decimal point is included in output
1617 regardless of precision. Whether or not a decimal point is included
1618 otherwise depends on the specification and precision. */
1619 bool radix = dir.get_flag ('#');
1621 switch (dir.specifier)
1623 case 'A':
1624 case 'a':
1626 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1627 if (dir.prec[0] <= 0)
1628 minprec = 0;
1629 else if (dir.prec[0] > 0)
1630 minprec = dir.prec[0] + !radix /* decimal point */;
1632 res.range.likely = (2 /* 0x */
1633 + flagmin
1634 + radix
1635 + minprec
1636 + 3 /* p+0 */);
1638 res.range.max = format_floating_max (type, 'a', prec[1]);
1640 /* The unlikely maximum accounts for the longest multibyte
1641 decimal point character. */
1642 res.range.unlikely = res.range.max;
1643 if (dir.prec[1] > 0)
1644 res.range.unlikely += target_mb_len_max () - 1;
1646 break;
1649 case 'E':
1650 case 'e':
1652 /* Minimum output attributable to precision and, when it's
1653 non-zero, decimal point. */
1654 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1656 /* The likely minimum output is "[-+]1.234567e+00" regardless
1657 of the value of the actual argument. */
1658 res.range.likely = (flagmin
1659 + radix
1660 + minprec
1661 + 2 /* e+ */ + 2);
1663 res.range.max = format_floating_max (type, 'e', prec[1]);
1665 /* The unlikely maximum accounts for the longest multibyte
1666 decimal point character. */
1667 if (dir.prec[0] != dir.prec[1]
1668 || dir.prec[0] == -1 || dir.prec[0] > 0)
1669 res.range.unlikely = res.range.max + target_mb_len_max () -1;
1670 else
1671 res.range.unlikely = res.range.max;
1672 break;
1675 case 'F':
1676 case 'f':
1678 /* Minimum output attributable to precision and, when it's non-zero,
1679 decimal point. */
1680 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1682 /* For finite numbers (i.e., not infinity or NaN) the lower bound
1683 when precision isn't specified is 8 bytes ("1.23456" since
1684 precision is taken to be 6). When precision is zero, the lower
1685 bound is 1 byte (e.g., "1"). Otherwise, when precision is greater
1686 than zero, then the lower bound is 2 plus precision (plus flags).
1687 But in all cases, the lower bound is no greater than 3. */
1688 unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1689 if (min < res.range.min)
1690 res.range.min = min;
1692 /* Compute the upper bound for -TYPE_MAX. */
1693 res.range.max = format_floating_max (type, 'f', prec[1]);
1695 /* The minimum output with unknown precision is a single byte
1696 (e.g., "0") but the more likely output is 3 bytes ("0.0"). */
1697 if (dir.prec[0] < 0 && dir.prec[1] > 0)
1698 res.range.likely = 3;
1699 else
1700 res.range.likely = min;
1702 /* The unlikely maximum accounts for the longest multibyte
1703 decimal point character. */
1704 if (dir.prec[0] != dir.prec[1]
1705 || dir.prec[0] == -1 || dir.prec[0] > 0)
1706 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1707 break;
1710 case 'G':
1711 case 'g':
1713 /* The %g output depends on precision and the exponent of
1714 the argument. Since the value of the argument isn't known
1715 the lower bound on the range of bytes (not counting flags
1716 or width) is 1 plus radix (i.e., either "0" or "0." for
1717 "%g" and "%#g", respectively, with a zero argument). */
1718 unsigned HOST_WIDE_INT min = flagmin + radix;
1719 if (min < res.range.min)
1720 res.range.min = min;
1722 char spec = 'g';
1723 HOST_WIDE_INT maxprec = dir.prec[1];
1724 if (radix && maxprec)
1726 /* When the pound flag (radix) is set, trailing zeros aren't
1727 trimmed and so the longest output is the same as for %e,
1728 except with precision minus 1 (as specified in C11). */
1729 spec = 'e';
1730 if (maxprec > 0)
1731 --maxprec;
1732 else if (maxprec < 0)
1733 maxprec = 5;
1735 else
1736 maxprec = prec[1];
1738 res.range.max = format_floating_max (type, spec, maxprec);
1740 /* The likely output is either the maximum computed above
1741 minus 1 (assuming the maximum is positive) when precision
1742 is known (or unspecified), or the same minimum as for %e
1743 (which is computed for a non-negative argument). Unlike
1744 for the other specifiers above the likely output isn't
1745 the minimum because for %g that's 1 which is unlikely. */
1746 if (dir.prec[1] < 0
1747 || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1748 res.range.likely = res.range.max - 1;
1749 else
1751 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1752 res.range.likely = (flagmin
1753 + radix
1754 + minprec
1755 + 2 /* e+ */ + 2);
1758 /* The unlikely maximum accounts for the longest multibyte
1759 decimal point character. */
1760 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1761 break;
1764 default:
1765 return fmtresult ();
1768 /* Bump up the byte counters if WIDTH is greater. */
1769 res.adjust_for_width_or_precision (dir.width);
1770 return res;
1773 /* Return a range representing the minimum and maximum number of bytes
1774 that the directive DIR will write on output for the floating argument
1775 ARG. */
1777 static fmtresult
1778 format_floating (const directive &dir, tree arg, const vr_values *)
1780 HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1781 tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1782 ? long_double_type_node : double_type_node);
1784 /* For an indeterminate precision the lower bound must be assumed
1785 to be zero. */
1786 if (TOUPPER (dir.specifier) == 'A')
1788 /* Get the number of fractional decimal digits needed to represent
1789 the argument without a loss of accuracy. */
1790 unsigned fmtprec
1791 = REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1793 /* The precision of the IEEE 754 double format is 53.
1794 The precision of all other GCC binary double formats
1795 is 56 or less. */
1796 unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1798 /* For %a, leave the minimum precision unspecified to let
1799 MFPR trim trailing zeros (as it and many other systems
1800 including Glibc happen to do) and set the maximum
1801 precision to reflect what it would be with trailing zeros
1802 present (as Solaris and derived systems do). */
1803 if (dir.prec[1] < 0)
1805 /* Both bounds are negative implies that precision has
1806 not been specified. */
1807 prec[0] = maxprec;
1808 prec[1] = -1;
1810 else if (dir.prec[0] < 0)
1812 /* With a negative lower bound and a non-negative upper
1813 bound set the minimum precision to zero and the maximum
1814 to the greater of the maximum precision (i.e., with
1815 trailing zeros present) and the specified upper bound. */
1816 prec[0] = 0;
1817 prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1820 else if (dir.prec[0] < 0)
1822 if (dir.prec[1] < 0)
1824 /* A precision in a strictly negative range is ignored and
1825 the default of 6 is used instead. */
1826 prec[0] = prec[1] = 6;
1828 else
1830 /* For a precision in a partly negative range, the lower bound
1831 must be assumed to be zero and the new upper bound is the
1832 greater of 6 (the default precision used when the specified
1833 precision is negative) and the upper bound of the specified
1834 range. */
1835 prec[0] = 0;
1836 prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1840 if (!arg
1841 || TREE_CODE (arg) != REAL_CST
1842 || !useless_type_conversion_p (type, TREE_TYPE (arg)))
1843 return format_floating (dir, prec);
1845 /* The minimum and maximum number of bytes produced by the directive. */
1846 fmtresult res;
1848 /* Get the real type format desription for the target. */
1849 const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
1850 const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
1852 if (!real_isfinite (rvp))
1854 /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
1855 and "[-]nan" with the choice being implementation-defined
1856 but not locale dependent. */
1857 bool sign = dir.get_flag ('+') || real_isneg (rvp);
1858 res.range.min = 3 + sign;
1860 res.range.likely = res.range.min;
1861 res.range.max = res.range.min;
1862 /* The unlikely maximum is "[-/+]infinity" or "[-/+][qs]nan".
1863 For NaN, the C/POSIX standards specify two formats:
1864 "[-/+]nan"
1866 "[-/+]nan(n-char-sequence)"
1867 No known printf implementation outputs the latter format but AIX
1868 outputs QNaN and SNaN for quiet and signalling NaN, respectively,
1869 so the unlikely maximum reflects that. */
1870 res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 4);
1872 /* The range for infinity and NaN is known unless either width
1873 or precision is unknown. Width has the same effect regardless
1874 of whether the argument is finite. Precision is either ignored
1875 (e.g., Glibc) or can have an effect on the short vs long format
1876 such as inf/infinity (e.g., Solaris). */
1877 res.knownrange = dir.known_width_and_precision ();
1879 /* Adjust the range for width but ignore precision. */
1880 res.adjust_for_width_or_precision (dir.width);
1882 return res;
1885 char fmtstr [40];
1886 char *pfmt = fmtstr;
1888 /* Append flags. */
1889 for (const char *pf = "-+ #0"; *pf; ++pf)
1890 if (dir.get_flag (*pf))
1891 *pfmt++ = *pf;
1893 *pfmt = '\0';
1896 /* Set up an array to easily iterate over. */
1897 unsigned HOST_WIDE_INT* const minmax[] = {
1898 &res.range.min, &res.range.max
1901 for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
1903 /* Convert the GCC real value representation with the precision
1904 of the real type to the mpfr_t format rounding down in the
1905 first iteration that computes the minimm and up in the second
1906 that computes the maximum. This order is arbibtrary because
1907 rounding in either direction can result in longer output. */
1908 mpfr_t mpfrval;
1909 mpfr_init2 (mpfrval, rfmt->p);
1910 mpfr_from_real (mpfrval, rvp, i ? GMP_RNDU : GMP_RNDD);
1912 /* Use the MPFR rounding specifier to round down in the first
1913 iteration and then up. In most but not all cases this will
1914 result in the same number of bytes. */
1915 char rndspec = "DU"[i];
1917 /* Format it and store the result in the corresponding member
1918 of the result struct. */
1919 *minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
1920 dir.specifier, rndspec);
1921 mpfr_clear (mpfrval);
1925 /* Make sure the minimum is less than the maximum (MPFR rounding
1926 in the call to mpfr_snprintf can result in the reverse. */
1927 if (res.range.max < res.range.min)
1929 unsigned HOST_WIDE_INT tmp = res.range.min;
1930 res.range.min = res.range.max;
1931 res.range.max = tmp;
1934 /* The range is known unless either width or precision is unknown. */
1935 res.knownrange = dir.known_width_and_precision ();
1937 /* For the same floating point constant, unless width or precision
1938 is unknown, use the longer output as the likely maximum since
1939 with round to nearest either is equally likely. Otheriwse, when
1940 precision is unknown, use the greater of the minimum and 3 as
1941 the likely output (for "0.0" since zero precision is unlikely). */
1942 if (res.knownrange)
1943 res.range.likely = res.range.max;
1944 else if (res.range.min < 3
1945 && dir.prec[0] < 0
1946 && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
1947 res.range.likely = 3;
1948 else
1949 res.range.likely = res.range.min;
1951 res.range.unlikely = res.range.max;
1953 if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
1955 /* Unless the precision is zero output longer than 2 bytes may
1956 include the decimal point which must be a single character
1957 up to MB_LEN_MAX in length. This is overly conservative
1958 since in some conversions some constants result in no decimal
1959 point (e.g., in %g). */
1960 res.range.unlikely += target_mb_len_max () - 1;
1963 res.adjust_for_width_or_precision (dir.width);
1964 return res;
1967 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
1968 strings referenced by the expression STR, or (-1, -1) when not known.
1969 Used by the format_string function below. */
1971 static fmtresult
1972 get_string_length (tree str, unsigned eltsize, const vr_values *vr)
1974 if (!str)
1975 return fmtresult ();
1977 /* Try to determine the dynamic string length first. */
1978 c_strlen_data lendata = { };
1979 if (eltsize == 1)
1980 get_range_strlen_dynamic (str, &lendata, vr);
1981 else
1983 /* Determine the length of the shortest and longest string referenced
1984 by STR. Strings of unknown lengths are bounded by the sizes of
1985 arrays that subexpressions of STR may refer to. Pointers that
1986 aren't known to point any such arrays result in LENDATA.MAXLEN
1987 set to SIZE_MAX. */
1988 get_range_strlen (str, &lendata, eltsize);
1991 /* LENDATA.MAXBOUND is null when LENDATA.MIN corresponds to the shortest
1992 string referenced by STR. Otherwise, if it's not equal to .MINLEN it
1993 corresponds to the bound of the largest array STR refers to, if known,
1994 or it's SIZE_MAX otherwise. */
1996 /* Return the default result when nothing is known about the string. */
1997 if (lendata.maxbound)
1999 if (integer_all_onesp (lendata.maxbound)
2000 && integer_all_onesp (lendata.maxlen))
2001 return fmtresult ();
2003 if (!tree_fits_uhwi_p (lendata.maxbound)
2004 || !tree_fits_uhwi_p (lendata.maxlen))
2005 return fmtresult ();
2007 unsigned HOST_WIDE_INT lenmax = tree_to_uhwi (max_object_size ()) - 2;
2008 if (lenmax <= tree_to_uhwi (lendata.maxbound)
2009 && lenmax <= tree_to_uhwi (lendata.maxlen))
2010 return fmtresult ();
2013 HOST_WIDE_INT min
2014 = (tree_fits_uhwi_p (lendata.minlen)
2015 ? tree_to_uhwi (lendata.minlen)
2016 : 0);
2018 HOST_WIDE_INT max
2019 = (lendata.maxbound && tree_fits_uhwi_p (lendata.maxbound)
2020 ? tree_to_uhwi (lendata.maxbound)
2021 : HOST_WIDE_INT_M1U);
2023 const bool unbounded = integer_all_onesp (lendata.maxlen);
2025 /* Set the max/likely counters to unbounded when a minimum is known
2026 but the maximum length isn't bounded. This implies that STR is
2027 a conditional expression involving a string of known length and
2028 and an expression of unknown/unbounded length. */
2029 if (min
2030 && (unsigned HOST_WIDE_INT)min < HOST_WIDE_INT_M1U
2031 && unbounded)
2032 max = HOST_WIDE_INT_M1U;
2034 /* get_range_strlen() returns the target value of SIZE_MAX for
2035 strings of unknown length. Bump it up to HOST_WIDE_INT_M1U
2036 which may be bigger. */
2037 if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2038 min = HOST_WIDE_INT_M1U;
2039 if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2040 max = HOST_WIDE_INT_M1U;
2042 fmtresult res (min, max);
2043 res.nonstr = lendata.decl;
2045 /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2046 by STR are known to be bounded (though not necessarily by their
2047 actual length but perhaps by their maximum possible length). */
2048 if (res.range.max < target_int_max ())
2050 res.knownrange = true;
2051 /* When the the length of the longest string is known and not
2052 excessive use it as the likely length of the string(s). */
2053 res.range.likely = res.range.max;
2055 else
2057 /* When the upper bound is unknown (it can be zero or excessive)
2058 set the likely length to the greater of 1. If MAXBOUND is
2059 set, also reset the length of the lower bound to zero. */
2060 res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2061 if (lendata.maxbound)
2062 res.range.min = 0;
2065 res.range.unlikely = unbounded ? HOST_WIDE_INT_MAX : res.range.max;
2067 return res;
2070 /* Return the minimum and maximum number of characters formatted
2071 by the '%c' format directives and its wide character form for
2072 the argument ARG. ARG can be null (for functions such as
2073 vsprinf). */
2075 static fmtresult
2076 format_character (const directive &dir, tree arg, const vr_values *vr_values)
2078 fmtresult res;
2080 res.knownrange = true;
2082 if (dir.specifier == 'C'
2083 || dir.modifier == FMT_LEN_l)
2085 /* A wide character can result in as few as zero bytes. */
2086 res.range.min = 0;
2088 HOST_WIDE_INT min, max;
2089 if (get_int_range (arg, &min, &max, false, 0, vr_values))
2091 if (min == 0 && max == 0)
2093 /* The NUL wide character results in no bytes. */
2094 res.range.max = 0;
2095 res.range.likely = 0;
2096 res.range.unlikely = 0;
2098 else if (min >= 0 && min < 128)
2100 /* Be conservative if the target execution character set
2101 is not a 1-to-1 mapping to the source character set or
2102 if the source set is not ASCII. */
2103 bool one_2_one_ascii
2104 = (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97);
2106 /* A wide character in the ASCII range most likely results
2107 in a single byte, and only unlikely in up to MB_LEN_MAX. */
2108 res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
2109 res.range.likely = 1;
2110 res.range.unlikely = target_mb_len_max ();
2111 res.mayfail = !one_2_one_ascii;
2113 else
2115 /* A wide character outside the ASCII range likely results
2116 in up to two bytes, and only unlikely in up to MB_LEN_MAX. */
2117 res.range.max = target_mb_len_max ();
2118 res.range.likely = 2;
2119 res.range.unlikely = res.range.max;
2120 /* Converting such a character may fail. */
2121 res.mayfail = true;
2124 else
2126 /* An unknown wide character is treated the same as a wide
2127 character outside the ASCII range. */
2128 res.range.max = target_mb_len_max ();
2129 res.range.likely = 2;
2130 res.range.unlikely = res.range.max;
2131 res.mayfail = true;
2134 else
2136 /* A plain '%c' directive. Its ouput is exactly 1. */
2137 res.range.min = res.range.max = 1;
2138 res.range.likely = res.range.unlikely = 1;
2139 res.knownrange = true;
2142 /* Bump up the byte counters if WIDTH is greater. */
2143 return res.adjust_for_width_or_precision (dir.width);
2146 /* Return the minimum and maximum number of characters formatted
2147 by the '%s' format directive and its wide character form for
2148 the argument ARG. ARG can be null (for functions such as
2149 vsprinf). */
2151 static fmtresult
2152 format_string (const directive &dir, tree arg, const vr_values *vr_values)
2154 fmtresult res;
2156 /* Compute the range the argument's length can be in. */
2157 int count_by = 1;
2158 if (dir.specifier == 'S' || dir.modifier == FMT_LEN_l)
2160 /* Get a node for a C type that will be the same size
2161 as a wchar_t on the target. */
2162 tree node = get_typenode_from_name (MODIFIED_WCHAR_TYPE);
2164 /* Now that we have a suitable node, get the number of
2165 bytes it occupies. */
2166 count_by = int_size_in_bytes (node);
2167 gcc_checking_assert (count_by == 2 || count_by == 4);
2170 fmtresult slen = get_string_length (arg, count_by, vr_values);
2171 if (slen.range.min == slen.range.max
2172 && slen.range.min < HOST_WIDE_INT_MAX)
2174 /* The argument is either a string constant or it refers
2175 to one of a number of strings of the same length. */
2177 /* A '%s' directive with a string argument with constant length. */
2178 res.range = slen.range;
2180 if (dir.specifier == 'S'
2181 || dir.modifier == FMT_LEN_l)
2183 /* In the worst case the length of output of a wide string S
2184 is bounded by MB_LEN_MAX * wcslen (S). */
2185 res.range.max *= target_mb_len_max ();
2186 res.range.unlikely = res.range.max;
2187 /* It's likely that the the total length is not more that
2188 2 * wcslen (S).*/
2189 res.range.likely = res.range.min * 2;
2191 if (dir.prec[1] >= 0
2192 && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2194 res.range.max = dir.prec[1];
2195 res.range.likely = dir.prec[1];
2196 res.range.unlikely = dir.prec[1];
2199 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2200 res.range.min = 0;
2201 else if (dir.prec[0] >= 0)
2202 res.range.likely = dir.prec[0];
2204 /* Even a non-empty wide character string need not convert into
2205 any bytes. */
2206 res.range.min = 0;
2208 /* A non-empty wide character conversion may fail. */
2209 if (slen.range.max > 0)
2210 res.mayfail = true;
2212 else
2214 res.knownrange = true;
2216 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2217 res.range.min = 0;
2218 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2219 res.range.min = dir.prec[0];
2221 if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2223 res.range.max = dir.prec[1];
2224 res.range.likely = dir.prec[1];
2225 res.range.unlikely = dir.prec[1];
2229 else if (arg && integer_zerop (arg))
2231 /* Handle null pointer argument. */
2233 fmtresult res (0);
2234 res.nullp = true;
2235 return res;
2237 else
2239 /* For a '%s' and '%ls' directive with a non-constant string (either
2240 one of a number of strings of known length or an unknown string)
2241 the minimum number of characters is lesser of PRECISION[0] and
2242 the length of the shortest known string or zero, and the maximum
2243 is the lessser of the length of the longest known string or
2244 PTRDIFF_MAX and PRECISION[1]. The likely length is either
2245 the minimum at level 1 and the greater of the minimum and 1
2246 at level 2. This result is adjust upward for width (if it's
2247 specified). */
2249 if (dir.specifier == 'S'
2250 || dir.modifier == FMT_LEN_l)
2252 /* A wide character converts to as few as zero bytes. */
2253 slen.range.min = 0;
2254 if (slen.range.max < target_int_max ())
2255 slen.range.max *= target_mb_len_max ();
2257 if (slen.range.likely < target_int_max ())
2258 slen.range.likely *= 2;
2260 if (slen.range.likely < target_int_max ())
2261 slen.range.unlikely *= target_mb_len_max ();
2263 /* A non-empty wide character conversion may fail. */
2264 if (slen.range.max > 0)
2265 res.mayfail = true;
2268 res.range = slen.range;
2270 if (dir.prec[0] >= 0)
2272 /* Adjust the minimum to zero if the string length is unknown,
2273 or at most the lower bound of the precision otherwise. */
2274 if (slen.range.min >= target_int_max ())
2275 res.range.min = 0;
2276 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2277 res.range.min = dir.prec[0];
2279 /* Make both maxima no greater than the upper bound of precision. */
2280 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2281 || slen.range.max >= target_int_max ())
2283 res.range.max = dir.prec[1];
2284 res.range.unlikely = dir.prec[1];
2287 /* If precision is constant, set the likely counter to the lesser
2288 of it and the maximum string length. Otherwise, if the lower
2289 bound of precision is greater than zero, set the likely counter
2290 to the minimum. Otherwise set it to zero or one based on
2291 the warning level. */
2292 if (dir.prec[0] == dir.prec[1])
2293 res.range.likely
2294 = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2295 ? dir.prec[0] : slen.range.max);
2296 else if (dir.prec[0] > 0)
2297 res.range.likely = res.range.min;
2298 else
2299 res.range.likely = warn_level > 1;
2301 else if (dir.prec[1] >= 0)
2303 res.range.min = 0;
2304 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2305 res.range.max = dir.prec[1];
2306 res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2307 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.unlikely)
2308 res.range.unlikely = dir.prec[1];
2310 else if (slen.range.min >= target_int_max ())
2312 res.range.min = 0;
2313 res.range.max = HOST_WIDE_INT_MAX;
2314 /* At level 1 strings of unknown length are assumed to be
2315 empty, while at level 1 they are assumed to be one byte
2316 long. */
2317 res.range.likely = warn_level > 1;
2318 res.range.unlikely = HOST_WIDE_INT_MAX;
2320 else
2322 /* A string of unknown length unconstrained by precision is
2323 assumed to be empty at level 1 and just one character long
2324 at higher levels. */
2325 if (res.range.likely >= target_int_max ())
2326 res.range.likely = warn_level > 1;
2330 /* If the argument isn't a nul-terminated string and the number
2331 of bytes on output isn't bounded by precision, set NONSTR. */
2332 if (slen.nonstr && slen.range.min < (unsigned HOST_WIDE_INT)dir.prec[0])
2333 res.nonstr = slen.nonstr;
2335 /* Bump up the byte counters if WIDTH is greater. */
2336 return res.adjust_for_width_or_precision (dir.width);
2339 /* Format plain string (part of the format string itself). */
2341 static fmtresult
2342 format_plain (const directive &dir, tree, const vr_values *)
2344 fmtresult res (dir.len);
2345 return res;
2348 /* Return true if the RESULT of a directive in a call describe by INFO
2349 should be diagnosed given the AVAILable space in the destination. */
2351 static bool
2352 should_warn_p (const call_info &info,
2353 const result_range &avail, const result_range &result)
2355 if (result.max <= avail.min)
2357 /* The least amount of space remaining in the destination is big
2358 enough for the longest output. */
2359 return false;
2362 if (info.bounded)
2364 if (warn_format_trunc == 1 && result.min <= avail.max
2365 && info.retval_used ())
2367 /* The likely amount of space remaining in the destination is big
2368 enough for the least output and the return value is used. */
2369 return false;
2372 if (warn_format_trunc == 1 && result.likely <= avail.likely
2373 && !info.retval_used ())
2375 /* The likely amount of space remaining in the destination is big
2376 enough for the likely output and the return value is unused. */
2377 return false;
2380 if (warn_format_trunc == 2
2381 && result.likely <= avail.min
2382 && (result.max <= avail.min
2383 || result.max > HOST_WIDE_INT_MAX))
2385 /* The minimum amount of space remaining in the destination is big
2386 enough for the longest output. */
2387 return false;
2390 else
2392 if (warn_level == 1 && result.likely <= avail.likely)
2394 /* The likely amount of space remaining in the destination is big
2395 enough for the likely output. */
2396 return false;
2399 if (warn_level == 2
2400 && result.likely <= avail.min
2401 && (result.max <= avail.min
2402 || result.max > HOST_WIDE_INT_MAX))
2404 /* The minimum amount of space remaining in the destination is big
2405 enough for the longest output. */
2406 return false;
2410 return true;
2413 /* At format string location describe by DIRLOC in a call described
2414 by INFO, issue a warning for a directive DIR whose output may be
2415 in excess of the available space AVAIL_RANGE in the destination
2416 given the formatting result FMTRES. This function does nothing
2417 except decide whether to issue a warning for a possible write
2418 past the end or truncation and, if so, format the warning.
2419 Return true if a warning has been issued. */
2421 static bool
2422 maybe_warn (substring_loc &dirloc, location_t argloc,
2423 const call_info &info,
2424 const result_range &avail_range, const result_range &res,
2425 const directive &dir)
2427 if (!should_warn_p (info, avail_range, res))
2428 return false;
2430 /* A warning will definitely be issued below. */
2432 /* The maximum byte count to reference in the warning. Larger counts
2433 imply that the upper bound is unknown (and could be anywhere between
2434 RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2435 than "between N and X" where X is some huge number. */
2436 unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2438 /* True when there is enough room in the destination for the least
2439 amount of a directive's output but not enough for its likely or
2440 maximum output. */
2441 bool maybe = (res.min <= avail_range.max
2442 && (avail_range.min < res.likely
2443 || (res.max < HOST_WIDE_INT_MAX
2444 && avail_range.min < res.max)));
2446 /* Buffer for the directive in the host character set (used when
2447 the source character set is different). */
2448 char hostdir[32];
2450 if (avail_range.min == avail_range.max)
2452 /* The size of the destination region is exact. */
2453 unsigned HOST_WIDE_INT navail = avail_range.max;
2455 if (target_to_host (*dir.beg) != '%')
2457 /* For plain character directives (i.e., the format string itself)
2458 but not others, point the caret at the first character that's
2459 past the end of the destination. */
2460 if (navail < dir.len)
2461 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2464 if (*dir.beg == '\0')
2466 /* This is the terminating nul. */
2467 gcc_assert (res.min == 1 && res.min == res.max);
2469 return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2470 info.bounded
2471 ? (maybe
2472 ? G_("%qE output may be truncated before the "
2473 "last format character")
2474 : G_("%qE output truncated before the last "
2475 "format character"))
2476 : (maybe
2477 ? G_("%qE may write a terminating nul past the "
2478 "end of the destination")
2479 : G_("%qE writing a terminating nul past the "
2480 "end of the destination")),
2481 info.func);
2484 if (res.min == res.max)
2486 const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2487 if (!info.bounded)
2488 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2489 "%<%.*s%> directive writing %wu byte into a "
2490 "region of size %wu",
2491 "%<%.*s%> directive writing %wu bytes into a "
2492 "region of size %wu",
2493 (int) dir.len, d, res.min, navail);
2494 else if (maybe)
2495 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2496 "%<%.*s%> directive output may be truncated "
2497 "writing %wu byte into a region of size %wu",
2498 "%<%.*s%> directive output may be truncated "
2499 "writing %wu bytes into a region of size %wu",
2500 (int) dir.len, d, res.min, navail);
2501 else
2502 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2503 "%<%.*s%> directive output truncated writing "
2504 "%wu byte into a region of size %wu",
2505 "%<%.*s%> directive output truncated writing "
2506 "%wu bytes into a region of size %wu",
2507 (int) dir.len, d, res.min, navail);
2509 if (res.min == 0 && res.max < maxbytes)
2510 return fmtwarn (dirloc, argloc, NULL,
2511 info.warnopt (),
2512 info.bounded
2513 ? (maybe
2514 ? G_("%<%.*s%> directive output may be truncated "
2515 "writing up to %wu bytes into a region of "
2516 "size %wu")
2517 : G_("%<%.*s%> directive output truncated writing "
2518 "up to %wu bytes into a region of size %wu"))
2519 : G_("%<%.*s%> directive writing up to %wu bytes "
2520 "into a region of size %wu"), (int) dir.len,
2521 target_to_host (hostdir, sizeof hostdir, dir.beg),
2522 res.max, navail);
2524 if (res.min == 0 && maxbytes <= res.max)
2525 /* This is a special case to avoid issuing the potentially
2526 confusing warning:
2527 writing 0 or more bytes into a region of size 0. */
2528 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2529 info.bounded
2530 ? (maybe
2531 ? G_("%<%.*s%> directive output may be truncated "
2532 "writing likely %wu or more bytes into a "
2533 "region of size %wu")
2534 : G_("%<%.*s%> directive output truncated writing "
2535 "likely %wu or more bytes into a region of "
2536 "size %wu"))
2537 : G_("%<%.*s%> directive writing likely %wu or more "
2538 "bytes into a region of size %wu"), (int) dir.len,
2539 target_to_host (hostdir, sizeof hostdir, dir.beg),
2540 res.likely, navail);
2542 if (res.max < maxbytes)
2543 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2544 info.bounded
2545 ? (maybe
2546 ? G_("%<%.*s%> directive output may be truncated "
2547 "writing between %wu and %wu bytes into a "
2548 "region of size %wu")
2549 : G_("%<%.*s%> directive output truncated "
2550 "writing between %wu and %wu bytes into a "
2551 "region of size %wu"))
2552 : G_("%<%.*s%> directive writing between %wu and "
2553 "%wu bytes into a region of size %wu"),
2554 (int) dir.len,
2555 target_to_host (hostdir, sizeof hostdir, dir.beg),
2556 res.min, res.max, navail);
2558 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2559 info.bounded
2560 ? (maybe
2561 ? G_("%<%.*s%> directive output may be truncated "
2562 "writing %wu or more bytes into a region of "
2563 "size %wu")
2564 : G_("%<%.*s%> directive output truncated writing "
2565 "%wu or more bytes into a region of size %wu"))
2566 : G_("%<%.*s%> directive writing %wu or more bytes "
2567 "into a region of size %wu"), (int) dir.len,
2568 target_to_host (hostdir, sizeof hostdir, dir.beg),
2569 res.min, navail);
2572 /* The size of the destination region is a range. */
2574 if (target_to_host (*dir.beg) != '%')
2576 unsigned HOST_WIDE_INT navail = avail_range.max;
2578 /* For plain character directives (i.e., the format string itself)
2579 but not others, point the caret at the first character that's
2580 past the end of the destination. */
2581 if (navail < dir.len)
2582 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2585 if (*dir.beg == '\0')
2587 gcc_assert (res.min == 1 && res.min == res.max);
2589 return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2590 info.bounded
2591 ? (maybe
2592 ? G_("%qE output may be truncated before the last "
2593 "format character")
2594 : G_("%qE output truncated before the last format "
2595 "character"))
2596 : (maybe
2597 ? G_("%qE may write a terminating nul past the end "
2598 "of the destination")
2599 : G_("%qE writing a terminating nul past the end "
2600 "of the destination")), info.func);
2603 if (res.min == res.max)
2605 const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2606 if (!info.bounded)
2607 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2608 "%<%.*s%> directive writing %wu byte into a region "
2609 "of size between %wu and %wu",
2610 "%<%.*s%> directive writing %wu bytes into a region "
2611 "of size between %wu and %wu", (int) dir.len, d,
2612 res.min, avail_range.min, avail_range.max);
2613 else if (maybe)
2614 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2615 "%<%.*s%> directive output may be truncated writing "
2616 "%wu byte into a region of size between %wu and %wu",
2617 "%<%.*s%> directive output may be truncated writing "
2618 "%wu bytes into a region of size between %wu and "
2619 "%wu", (int) dir.len, d, res.min, avail_range.min,
2620 avail_range.max);
2621 else
2622 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2623 "%<%.*s%> directive output truncated writing %wu "
2624 "byte into a region of size between %wu and %wu",
2625 "%<%.*s%> directive output truncated writing %wu "
2626 "bytes into a region of size between %wu and %wu",
2627 (int) dir.len, d, res.min, avail_range.min,
2628 avail_range.max);
2631 if (res.min == 0 && res.max < maxbytes)
2632 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2633 info.bounded
2634 ? (maybe
2635 ? G_("%<%.*s%> directive output may be truncated "
2636 "writing up to %wu bytes into a region of size "
2637 "between %wu and %wu")
2638 : G_("%<%.*s%> directive output truncated writing "
2639 "up to %wu bytes into a region of size between "
2640 "%wu and %wu"))
2641 : G_("%<%.*s%> directive writing up to %wu bytes "
2642 "into a region of size between %wu and %wu"),
2643 (int) dir.len,
2644 target_to_host (hostdir, sizeof hostdir, dir.beg),
2645 res.max, avail_range.min, avail_range.max);
2647 if (res.min == 0 && maxbytes <= res.max)
2648 /* This is a special case to avoid issuing the potentially confusing
2649 warning:
2650 writing 0 or more bytes into a region of size between 0 and N. */
2651 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2652 info.bounded
2653 ? (maybe
2654 ? G_("%<%.*s%> directive output may be truncated "
2655 "writing likely %wu or more bytes into a region "
2656 "of size between %wu and %wu")
2657 : G_("%<%.*s%> directive output truncated writing "
2658 "likely %wu or more bytes into a region of size "
2659 "between %wu and %wu"))
2660 : G_("%<%.*s%> directive writing likely %wu or more bytes "
2661 "into a region of size between %wu and %wu"),
2662 (int) dir.len,
2663 target_to_host (hostdir, sizeof hostdir, dir.beg),
2664 res.likely, avail_range.min, avail_range.max);
2666 if (res.max < maxbytes)
2667 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2668 info.bounded
2669 ? (maybe
2670 ? G_("%<%.*s%> directive output may be truncated "
2671 "writing between %wu and %wu bytes into a region "
2672 "of size between %wu and %wu")
2673 : G_("%<%.*s%> directive output truncated writing "
2674 "between %wu and %wu bytes into a region of size "
2675 "between %wu and %wu"))
2676 : G_("%<%.*s%> directive writing between %wu and "
2677 "%wu bytes into a region of size between %wu and "
2678 "%wu"), (int) dir.len,
2679 target_to_host (hostdir, sizeof hostdir, dir.beg),
2680 res.min, res.max, avail_range.min, avail_range.max);
2682 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2683 info.bounded
2684 ? (maybe
2685 ? G_("%<%.*s%> directive output may be truncated writing "
2686 "%wu or more bytes into a region of size between "
2687 "%wu and %wu")
2688 : G_("%<%.*s%> directive output truncated writing "
2689 "%wu or more bytes into a region of size between "
2690 "%wu and %wu"))
2691 : G_("%<%.*s%> directive writing %wu or more bytes "
2692 "into a region of size between %wu and %wu"),
2693 (int) dir.len,
2694 target_to_host (hostdir, sizeof hostdir, dir.beg),
2695 res.min, avail_range.min, avail_range.max);
2698 /* Compute the length of the output resulting from the directive DIR
2699 in a call described by INFO and update the overall result of the call
2700 in *RES. Return true if the directive has been handled. */
2702 static bool
2703 format_directive (const call_info &info,
2704 format_result *res, const directive &dir,
2705 const class vr_values *vr_values)
2707 /* Offset of the beginning of the directive from the beginning
2708 of the format string. */
2709 size_t offset = dir.beg - info.fmtstr;
2710 size_t start = offset;
2711 size_t length = offset + dir.len - !!dir.len;
2713 /* Create a location for the whole directive from the % to the format
2714 specifier. */
2715 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
2716 offset, start, length);
2718 /* Also get the location of the argument if possible.
2719 This doesn't work for integer literals or function calls. */
2720 location_t argloc = UNKNOWN_LOCATION;
2721 if (dir.arg)
2722 argloc = EXPR_LOCATION (dir.arg);
2724 /* Bail when there is no function to compute the output length,
2725 or when minimum length checking has been disabled. */
2726 if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
2727 return false;
2729 /* Compute the range of lengths of the formatted output. */
2730 fmtresult fmtres = dir.fmtfunc (dir, dir.arg, vr_values);
2732 /* Record whether the output of all directives is known to be
2733 bounded by some maximum, implying that their arguments are
2734 either known exactly or determined to be in a known range
2735 or, for strings, limited by the upper bounds of the arrays
2736 they refer to. */
2737 res->knownrange &= fmtres.knownrange;
2739 if (!fmtres.knownrange)
2741 /* Only when the range is known, check it against the host value
2742 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
2743 INT_MAX precision, which is the longest possible output of any
2744 single directive). That's the largest valid byte count (though
2745 not valid call to a printf-like function because it can never
2746 return such a count). Otherwise, the range doesn't correspond
2747 to known values of the argument. */
2748 if (fmtres.range.max > target_dir_max ())
2750 /* Normalize the MAX counter to avoid having to deal with it
2751 later. The counter can be less than HOST_WIDE_INT_M1U
2752 when compiling for an ILP32 target on an LP64 host. */
2753 fmtres.range.max = HOST_WIDE_INT_M1U;
2754 /* Disable exact and maximum length checking after a failure
2755 to determine the maximum number of characters (for example
2756 for wide characters or wide character strings) but continue
2757 tracking the minimum number of characters. */
2758 res->range.max = HOST_WIDE_INT_M1U;
2761 if (fmtres.range.min > target_dir_max ())
2763 /* Disable exact length checking after a failure to determine
2764 even the minimum number of characters (it shouldn't happen
2765 except in an error) but keep tracking the minimum and maximum
2766 number of characters. */
2767 return true;
2771 /* Buffer for the directive in the host character set (used when
2772 the source character set is different). */
2773 char hostdir[32];
2775 int dirlen = dir.len;
2777 if (fmtres.nullp)
2779 fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2780 "%G%<%.*s%> directive argument is null",
2781 info.callstmt, dirlen,
2782 target_to_host (hostdir, sizeof hostdir, dir.beg));
2784 /* Don't bother processing the rest of the format string. */
2785 res->warned = true;
2786 res->range.min = HOST_WIDE_INT_M1U;
2787 res->range.max = HOST_WIDE_INT_M1U;
2788 return false;
2791 /* Compute the number of available bytes in the destination. There
2792 must always be at least one byte of space for the terminating
2793 NUL that's appended after the format string has been processed. */
2794 result_range avail_range = bytes_remaining (info.objsize, *res);
2796 bool warned = res->warned;
2798 if (!warned)
2799 warned = maybe_warn (dirloc, argloc, info, avail_range,
2800 fmtres.range, dir);
2802 /* Bump up the total maximum if it isn't too big. */
2803 if (res->range.max < HOST_WIDE_INT_MAX
2804 && fmtres.range.max < HOST_WIDE_INT_MAX)
2805 res->range.max += fmtres.range.max;
2807 /* Raise the total unlikely maximum by the larger of the maximum
2808 and the unlikely maximum. */
2809 unsigned HOST_WIDE_INT save = res->range.unlikely;
2810 if (fmtres.range.max < fmtres.range.unlikely)
2811 res->range.unlikely += fmtres.range.unlikely;
2812 else
2813 res->range.unlikely += fmtres.range.max;
2815 if (res->range.unlikely < save)
2816 res->range.unlikely = HOST_WIDE_INT_M1U;
2818 res->range.min += fmtres.range.min;
2819 res->range.likely += fmtres.range.likely;
2821 /* Has the minimum directive output length exceeded the maximum
2822 of 4095 bytes required to be supported? */
2823 bool minunder4k = fmtres.range.min < 4096;
2824 bool maxunder4k = fmtres.range.max < 4096;
2825 /* Clear POSUNDER4K in the overall result if the maximum has exceeded
2826 the 4k (this is necessary to avoid the return value optimization
2827 that may not be safe in the maximum case). */
2828 if (!maxunder4k)
2829 res->posunder4k = false;
2830 /* Also clear POSUNDER4K if the directive may fail. */
2831 if (fmtres.mayfail)
2832 res->posunder4k = false;
2834 if (!warned
2835 /* Only warn at level 2. */
2836 && warn_level > 1
2837 /* Only warn for string functions. */
2838 && info.is_string_func ()
2839 && (!minunder4k
2840 || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
2842 /* The directive output may be longer than the maximum required
2843 to be handled by an implementation according to 7.21.6.1, p15
2844 of C11. Warn on this only at level 2 but remember this and
2845 prevent folding the return value when done. This allows for
2846 the possibility of the actual libc call failing due to ENOMEM
2847 (like Glibc does with very large precision or width).
2848 Issue the "may exceed" warning only for string functions and
2849 not for fprintf or printf. */
2851 if (fmtres.range.min == fmtres.range.max)
2852 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2853 "%<%.*s%> directive output of %wu bytes exceeds "
2854 "minimum required size of 4095", dirlen,
2855 target_to_host (hostdir, sizeof hostdir, dir.beg),
2856 fmtres.range.min);
2857 else if (!minunder4k)
2858 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2859 "%<%.*s%> directive output between %wu and %wu "
2860 "bytes exceeds minimum required size of 4095",
2861 dirlen,
2862 target_to_host (hostdir, sizeof hostdir, dir.beg),
2863 fmtres.range.min, fmtres.range.max);
2864 else if (!info.retval_used () && info.is_string_func ())
2865 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2866 "%<%.*s%> directive output between %wu and %wu "
2867 "bytes may exceed minimum required size of "
2868 "4095",
2869 dirlen,
2870 target_to_host (hostdir, sizeof hostdir, dir.beg),
2871 fmtres.range.min, fmtres.range.max);
2874 /* Has the likely and maximum directive output exceeded INT_MAX? */
2875 bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
2876 /* Don't consider the maximum to be in excess when it's the result
2877 of a string of unknown length (i.e., whose maximum has been set
2878 to be greater than or equal to HOST_WIDE_INT_MAX. */
2879 bool maxximax = (*dir.beg
2880 && res->range.max > target_int_max ()
2881 && res->range.max < HOST_WIDE_INT_MAX);
2883 if (!warned
2884 /* Warn for the likely output size at level 1. */
2885 && (likelyximax
2886 /* But only warn for the maximum at level 2. */
2887 || (warn_level > 1
2888 && maxximax
2889 && fmtres.range.max < HOST_WIDE_INT_MAX)))
2891 if (fmtres.range.min > target_int_max ())
2893 /* The directive output exceeds INT_MAX bytes. */
2894 if (fmtres.range.min == fmtres.range.max)
2895 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2896 "%<%.*s%> directive output of %wu bytes exceeds "
2897 "%<INT_MAX%>", dirlen,
2898 target_to_host (hostdir, sizeof hostdir, dir.beg),
2899 fmtres.range.min);
2900 else
2901 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2902 "%<%.*s%> directive output between %wu and "
2903 "%wu bytes exceeds %<INT_MAX%>", dirlen,
2904 target_to_host (hostdir, sizeof hostdir, dir.beg),
2905 fmtres.range.min, fmtres.range.max);
2907 else if (res->range.min > target_int_max ())
2909 /* The directive output is under INT_MAX but causes the result
2910 to exceed INT_MAX bytes. */
2911 if (fmtres.range.min == fmtres.range.max)
2912 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2913 "%<%.*s%> directive output of %wu bytes causes "
2914 "result to exceed %<INT_MAX%>", dirlen,
2915 target_to_host (hostdir, sizeof hostdir, dir.beg),
2916 fmtres.range.min);
2917 else
2918 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2919 "%<%.*s%> directive output between %wu and "
2920 "%wu bytes causes result to exceed %<INT_MAX%>",
2921 dirlen,
2922 target_to_host (hostdir, sizeof hostdir, dir.beg),
2923 fmtres.range.min, fmtres.range.max);
2925 else if ((!info.retval_used () || !info.bounded)
2926 && (info.is_string_func ()))
2927 /* Warn for calls to string functions that either aren't bounded
2928 (sprintf) or whose return value isn't used. */
2929 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2930 "%<%.*s%> directive output between %wu and "
2931 "%wu bytes may cause result to exceed "
2932 "%<INT_MAX%>", dirlen,
2933 target_to_host (hostdir, sizeof hostdir, dir.beg),
2934 fmtres.range.min, fmtres.range.max);
2937 if (!warned && fmtres.nonstr)
2939 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2940 "%<%.*s%> directive argument is not a nul-terminated "
2941 "string",
2942 dirlen,
2943 target_to_host (hostdir, sizeof hostdir, dir.beg));
2944 if (warned && DECL_P (fmtres.nonstr))
2945 inform (DECL_SOURCE_LOCATION (fmtres.nonstr),
2946 "referenced argument declared here");
2947 return false;
2950 if (warned && fmtres.range.min < fmtres.range.likely
2951 && fmtres.range.likely < fmtres.range.max)
2952 inform_n (info.fmtloc, fmtres.range.likely,
2953 "assuming directive output of %wu byte",
2954 "assuming directive output of %wu bytes",
2955 fmtres.range.likely);
2957 if (warned && fmtres.argmin)
2959 if (fmtres.argmin == fmtres.argmax)
2960 inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
2961 else if (fmtres.knownrange)
2962 inform (info.fmtloc, "directive argument in the range [%E, %E]",
2963 fmtres.argmin, fmtres.argmax);
2964 else
2965 inform (info.fmtloc,
2966 "using the range [%E, %E] for directive argument",
2967 fmtres.argmin, fmtres.argmax);
2970 res->warned |= warned;
2972 if (!dir.beg[0] && res->warned)
2974 location_t callloc = gimple_location (info.callstmt);
2976 unsigned HOST_WIDE_INT min = res->range.min;
2977 unsigned HOST_WIDE_INT max = res->range.max;
2979 if (info.objsize < HOST_WIDE_INT_MAX)
2981 /* If a warning has been issued for buffer overflow or truncation
2982 help the user figure out how big a buffer they need. */
2984 if (min == max)
2985 inform_n (callloc, min,
2986 "%qE output %wu byte into a destination of size %wu",
2987 "%qE output %wu bytes into a destination of size %wu",
2988 info.func, min, info.objsize);
2989 else if (max < HOST_WIDE_INT_MAX)
2990 inform (callloc,
2991 "%qE output between %wu and %wu bytes into "
2992 "a destination of size %wu",
2993 info.func, min, max, info.objsize);
2994 else if (min < res->range.likely && res->range.likely < max)
2995 inform (callloc,
2996 "%qE output %wu or more bytes (assuming %wu) into "
2997 "a destination of size %wu",
2998 info.func, min, res->range.likely, info.objsize);
2999 else
3000 inform (callloc,
3001 "%qE output %wu or more bytes into a destination of size "
3002 "%wu",
3003 info.func, min, info.objsize);
3005 else if (!info.is_string_func ())
3007 /* If the warning is for a file function function like fprintf
3008 of printf with no destination size just print the computed
3009 result. */
3010 if (min == max)
3011 inform_n (callloc, min,
3012 "%qE output %wu byte", "%qE output %wu bytes",
3013 info.func, min);
3014 else if (max < HOST_WIDE_INT_MAX)
3015 inform (callloc,
3016 "%qE output between %wu and %wu bytes",
3017 info.func, min, max);
3018 else if (min < res->range.likely && res->range.likely < max)
3019 inform (callloc,
3020 "%qE output %wu or more bytes (assuming %wu)",
3021 info.func, min, res->range.likely);
3022 else
3023 inform (callloc,
3024 "%qE output %wu or more bytes",
3025 info.func, min);
3029 if (dump_file && *dir.beg)
3031 fprintf (dump_file,
3032 " Result: "
3033 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3034 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
3035 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3036 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3037 fmtres.range.min, fmtres.range.likely,
3038 fmtres.range.max, fmtres.range.unlikely,
3039 res->range.min, res->range.likely,
3040 res->range.max, res->range.unlikely);
3043 return true;
3046 /* Parse a format directive in function call described by INFO starting
3047 at STR and populate DIR structure. Bump up *ARGNO by the number of
3048 arguments extracted for the directive. Return the length of
3049 the directive. */
3051 static size_t
3052 parse_directive (call_info &info,
3053 directive &dir, format_result *res,
3054 const char *str, unsigned *argno,
3055 const vr_values *vr_values)
3057 const char *pcnt = strchr (str, target_percent);
3058 dir.beg = str;
3060 if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3062 /* This directive is either a plain string or the terminating nul
3063 (which isn't really a directive but it simplifies things to
3064 handle it as if it were). */
3065 dir.len = len;
3066 dir.fmtfunc = format_plain;
3068 if (dump_file)
3070 fprintf (dump_file, " Directive %u at offset "
3071 HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3072 "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3073 dir.dirno,
3074 (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3075 (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3078 return len - !*str;
3081 const char *pf = pcnt + 1;
3083 /* POSIX numbered argument index or zero when none. */
3084 HOST_WIDE_INT dollar = 0;
3086 /* With and precision. -1 when not specified, HOST_WIDE_INT_MIN
3087 when given by a va_list argument, and a non-negative value
3088 when specified in the format string itself. */
3089 HOST_WIDE_INT width = -1;
3090 HOST_WIDE_INT precision = -1;
3092 /* Pointers to the beginning of the width and precision decimal
3093 string (if any) within the directive. */
3094 const char *pwidth = 0;
3095 const char *pprec = 0;
3097 /* When the value of the decimal string that specifies width or
3098 precision is out of range, points to the digit that causes
3099 the value to exceed the limit. */
3100 const char *werange = NULL;
3101 const char *perange = NULL;
3103 /* Width specified via the asterisk. Need not be INTEGER_CST.
3104 For vararg functions set to void_node. */
3105 tree star_width = NULL_TREE;
3107 /* Width specified via the asterisk. Need not be INTEGER_CST.
3108 For vararg functions set to void_node. */
3109 tree star_precision = NULL_TREE;
3111 if (ISDIGIT (target_to_host (*pf)))
3113 /* This could be either a POSIX positional argument, the '0'
3114 flag, or a width, depending on what follows. Store it as
3115 width and sort it out later after the next character has
3116 been seen. */
3117 pwidth = pf;
3118 width = target_strtowi (&pf, &werange);
3120 else if (target_to_host (*pf) == '*')
3122 /* Similarly to the block above, this could be either a POSIX
3123 positional argument or a width, depending on what follows. */
3124 if (*argno < gimple_call_num_args (info.callstmt))
3125 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3126 else
3127 star_width = void_node;
3128 ++pf;
3131 if (target_to_host (*pf) == '$')
3133 /* Handle the POSIX dollar sign which references the 1-based
3134 positional argument number. */
3135 if (width != -1)
3136 dollar = width + info.argidx;
3137 else if (star_width
3138 && TREE_CODE (star_width) == INTEGER_CST
3139 && (TYPE_PRECISION (TREE_TYPE (star_width))
3140 <= TYPE_PRECISION (integer_type_node)))
3141 dollar = width + tree_to_shwi (star_width);
3143 /* Bail when the numbered argument is out of range (it will
3144 have already been diagnosed by -Wformat). */
3145 if (dollar == 0
3146 || dollar == (int)info.argidx
3147 || dollar > gimple_call_num_args (info.callstmt))
3148 return false;
3150 --dollar;
3152 star_width = NULL_TREE;
3153 width = -1;
3154 ++pf;
3157 if (dollar || !star_width)
3159 if (width != -1)
3161 if (width == 0)
3163 /* The '0' that has been interpreted as a width above is
3164 actually a flag. Reset HAVE_WIDTH, set the '0' flag,
3165 and continue processing other flags. */
3166 width = -1;
3167 dir.set_flag ('0');
3169 else if (!dollar)
3171 /* (Non-zero) width has been seen. The next character
3172 is either a period or a digit. */
3173 goto start_precision;
3176 /* When either '$' has been seen, or width has not been seen,
3177 the next field is the optional flags followed by an optional
3178 width. */
3179 for ( ; ; ) {
3180 switch (target_to_host (*pf))
3182 case ' ':
3183 case '0':
3184 case '+':
3185 case '-':
3186 case '#':
3187 dir.set_flag (target_to_host (*pf++));
3188 break;
3190 default:
3191 goto start_width;
3195 start_width:
3196 if (ISDIGIT (target_to_host (*pf)))
3198 werange = 0;
3199 pwidth = pf;
3200 width = target_strtowi (&pf, &werange);
3202 else if (target_to_host (*pf) == '*')
3204 if (*argno < gimple_call_num_args (info.callstmt))
3205 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3206 else
3208 /* This is (likely) a va_list. It could also be an invalid
3209 call with insufficient arguments. */
3210 star_width = void_node;
3212 ++pf;
3214 else if (target_to_host (*pf) == '\'')
3216 /* The POSIX apostrophe indicating a numeric grouping
3217 in the current locale. Even though it's possible to
3218 estimate the upper bound on the size of the output
3219 based on the number of digits it probably isn't worth
3220 continuing. */
3221 return 0;
3225 start_precision:
3226 if (target_to_host (*pf) == '.')
3228 ++pf;
3230 if (ISDIGIT (target_to_host (*pf)))
3232 pprec = pf;
3233 precision = target_strtowi (&pf, &perange);
3235 else if (target_to_host (*pf) == '*')
3237 if (*argno < gimple_call_num_args (info.callstmt))
3238 star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3239 else
3241 /* This is (likely) a va_list. It could also be an invalid
3242 call with insufficient arguments. */
3243 star_precision = void_node;
3245 ++pf;
3247 else
3249 /* The decimal precision or the asterisk are optional.
3250 When neither is dirified it's taken to be zero. */
3251 precision = 0;
3255 switch (target_to_host (*pf))
3257 case 'h':
3258 if (target_to_host (pf[1]) == 'h')
3260 ++pf;
3261 dir.modifier = FMT_LEN_hh;
3263 else
3264 dir.modifier = FMT_LEN_h;
3265 ++pf;
3266 break;
3268 case 'j':
3269 dir.modifier = FMT_LEN_j;
3270 ++pf;
3271 break;
3273 case 'L':
3274 dir.modifier = FMT_LEN_L;
3275 ++pf;
3276 break;
3278 case 'l':
3279 if (target_to_host (pf[1]) == 'l')
3281 ++pf;
3282 dir.modifier = FMT_LEN_ll;
3284 else
3285 dir.modifier = FMT_LEN_l;
3286 ++pf;
3287 break;
3289 case 't':
3290 dir.modifier = FMT_LEN_t;
3291 ++pf;
3292 break;
3294 case 'z':
3295 dir.modifier = FMT_LEN_z;
3296 ++pf;
3297 break;
3300 switch (target_to_host (*pf))
3302 /* Handle a sole '%' character the same as "%%" but since it's
3303 undefined prevent the result from being folded. */
3304 case '\0':
3305 --pf;
3306 res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3307 /* FALLTHRU */
3308 case '%':
3309 dir.fmtfunc = format_percent;
3310 break;
3312 case 'a':
3313 case 'A':
3314 case 'e':
3315 case 'E':
3316 case 'f':
3317 case 'F':
3318 case 'g':
3319 case 'G':
3320 res->floating = true;
3321 dir.fmtfunc = format_floating;
3322 break;
3324 case 'd':
3325 case 'i':
3326 case 'o':
3327 case 'u':
3328 case 'x':
3329 case 'X':
3330 dir.fmtfunc = format_integer;
3331 break;
3333 case 'p':
3334 /* The %p output is implementation-defined. It's possible
3335 to determine this format but due to extensions (edirially
3336 those of the Linux kernel -- see bug 78512) the first %p
3337 in the format string disables any further processing. */
3338 return false;
3340 case 'n':
3341 /* %n has side-effects even when nothing is actually printed to
3342 any buffer. */
3343 info.nowrite = false;
3344 dir.fmtfunc = format_none;
3345 break;
3347 case 'C':
3348 case 'c':
3349 /* POSIX wide character and C/POSIX narrow character. */
3350 dir.fmtfunc = format_character;
3351 break;
3353 case 'S':
3354 case 's':
3355 /* POSIX wide string and C/POSIX narrow character string. */
3356 dir.fmtfunc = format_string;
3357 break;
3359 default:
3360 /* Unknown conversion specification. */
3361 return 0;
3364 dir.specifier = target_to_host (*pf++);
3366 /* Store the length of the format directive. */
3367 dir.len = pf - pcnt;
3369 /* Buffer for the directive in the host character set (used when
3370 the source character set is different). */
3371 char hostdir[32];
3373 if (star_width)
3375 if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3376 dir.set_width (star_width, vr_values);
3377 else
3379 /* Width specified by a va_list takes on the range [0, -INT_MIN]
3380 (width is the absolute value of that specified). */
3381 dir.width[0] = 0;
3382 dir.width[1] = target_int_max () + 1;
3385 else
3387 if (width == HOST_WIDE_INT_MAX && werange)
3389 size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3390 size_t caret = begin + (werange - pcnt);
3391 size_t end = pf - info.fmtstr - 1;
3393 /* Create a location for the width part of the directive,
3394 pointing the caret at the first out-of-range digit. */
3395 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3396 caret, begin, end);
3398 fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3399 "%<%.*s%> directive width out of range", (int) dir.len,
3400 target_to_host (hostdir, sizeof hostdir, dir.beg));
3403 dir.set_width (width);
3406 if (star_precision)
3408 if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3409 dir.set_precision (star_precision, vr_values);
3410 else
3412 /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3413 (unlike width, negative precision is ignored). */
3414 dir.prec[0] = -1;
3415 dir.prec[1] = target_int_max ();
3418 else
3420 if (precision == HOST_WIDE_INT_MAX && perange)
3422 size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3423 size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3424 size_t end = pf - info.fmtstr - 2;
3426 /* Create a location for the precision part of the directive,
3427 including the leading period, pointing the caret at the first
3428 out-of-range digit . */
3429 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3430 caret, begin, end);
3432 fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3433 "%<%.*s%> directive precision out of range", (int) dir.len,
3434 target_to_host (hostdir, sizeof hostdir, dir.beg));
3437 dir.set_precision (precision);
3440 /* Extract the argument if the directive takes one and if it's
3441 available (e.g., the function doesn't take a va_list). Treat
3442 missing arguments the same as va_list, even though they will
3443 have likely already been diagnosed by -Wformat. */
3444 if (dir.specifier != '%'
3445 && *argno < gimple_call_num_args (info.callstmt))
3446 dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3448 if (dump_file)
3450 fprintf (dump_file,
3451 " Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3452 ": \"%.*s\"",
3453 dir.dirno,
3454 (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3455 (int)dir.len, dir.beg);
3456 if (star_width)
3458 if (dir.width[0] == dir.width[1])
3459 fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3460 dir.width[0]);
3461 else
3462 fprintf (dump_file,
3463 ", width in range [" HOST_WIDE_INT_PRINT_DEC
3464 ", " HOST_WIDE_INT_PRINT_DEC "]",
3465 dir.width[0], dir.width[1]);
3468 if (star_precision)
3470 if (dir.prec[0] == dir.prec[1])
3471 fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3472 dir.prec[0]);
3473 else
3474 fprintf (dump_file,
3475 ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3476 HOST_WIDE_INT_PRINT_DEC "]",
3477 dir.prec[0], dir.prec[1]);
3479 fputc ('\n', dump_file);
3482 return dir.len;
3485 /* Compute the length of the output resulting from the call to a formatted
3486 output function described by INFO and store the result of the call in
3487 *RES. Issue warnings for detected past the end writes. Return true
3488 if the complete format string has been processed and *RES can be relied
3489 on, false otherwise (e.g., when a unknown or unhandled directive was seen
3490 that caused the processing to be terminated early). */
3492 static bool
3493 compute_format_length (call_info &info, format_result *res, const vr_values *vr)
3495 if (dump_file)
3497 location_t callloc = gimple_location (info.callstmt);
3498 fprintf (dump_file, "%s:%i: ",
3499 LOCATION_FILE (callloc), LOCATION_LINE (callloc));
3500 print_generic_expr (dump_file, info.func, dump_flags);
3502 fprintf (dump_file,
3503 ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
3504 ", fmtstr = \"%s\"\n",
3505 info.objsize, info.fmtstr);
3508 /* Reset the minimum and maximum byte counters. */
3509 res->range.min = res->range.max = 0;
3511 /* No directive has been seen yet so the length of output is bounded
3512 by the known range [0, 0] (with no conversion resulting in a failure
3513 or producing more than 4K bytes) until determined otherwise. */
3514 res->knownrange = true;
3515 res->floating = false;
3516 res->warned = false;
3518 /* 1-based directive counter. */
3519 unsigned dirno = 1;
3521 /* The variadic argument counter. */
3522 unsigned argno = info.argidx;
3524 for (const char *pf = info.fmtstr; ; ++dirno)
3526 directive dir = directive ();
3527 dir.dirno = dirno;
3529 size_t n = parse_directive (info, dir, res, pf, &argno, vr);
3531 /* Return failure if the format function fails. */
3532 if (!format_directive (info, res, dir, vr))
3533 return false;
3535 /* Return success the directive is zero bytes long and it's
3536 the last think in the format string (i.e., it's the terminating
3537 nul, which isn't really a directive but handling it as one makes
3538 things simpler). */
3539 if (!n)
3540 return *pf == '\0';
3542 pf += n;
3545 /* The complete format string was processed (with or without warnings). */
3546 return true;
3549 /* Return the size of the object referenced by the expression DEST if
3550 available, or the maximum possible size otherwise. */
3552 static unsigned HOST_WIDE_INT
3553 get_destination_size (tree dest)
3555 /* When there is no destination return the maximum. */
3556 if (!dest)
3557 return HOST_WIDE_INT_MAX;
3559 /* Initialize object size info before trying to compute it. */
3560 init_object_sizes ();
3562 /* Use __builtin_object_size to determine the size of the destination
3563 object. When optimizing, determine the smallest object (such as
3564 a member array as opposed to the whole enclosing object), otherwise
3565 use type-zero object size to determine the size of the enclosing
3566 object (the function fails without optimization in this type). */
3567 int ost = optimize > 0;
3568 unsigned HOST_WIDE_INT size;
3569 if (compute_builtin_object_size (dest, ost, &size))
3570 return size;
3572 return HOST_WIDE_INT_MAX;
3575 /* Return true if the call described by INFO with result RES safe to
3576 optimize (i.e., no undefined behavior), and set RETVAL to the range
3577 of its return values. */
3579 static bool
3580 is_call_safe (const call_info &info,
3581 const format_result &res, bool under4k,
3582 unsigned HOST_WIDE_INT retval[2])
3584 if (under4k && !res.posunder4k)
3585 return false;
3587 /* The minimum return value. */
3588 retval[0] = res.range.min;
3590 /* The maximum return value is in most cases bounded by RES.RANGE.MAX
3591 but in cases involving multibyte characters could be as large as
3592 RES.RANGE.UNLIKELY. */
3593 retval[1]
3594 = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
3596 /* Adjust the number of bytes which includes the terminating nul
3597 to reflect the return value of the function which does not.
3598 Because the valid range of the function is [INT_MIN, INT_MAX],
3599 a valid range before the adjustment below is [0, INT_MAX + 1]
3600 (the functions only return negative values on error or undefined
3601 behavior). */
3602 if (retval[0] <= target_int_max () + 1)
3603 --retval[0];
3604 if (retval[1] <= target_int_max () + 1)
3605 --retval[1];
3607 /* Avoid the return value optimization when the behavior of the call
3608 is undefined either because any directive may have produced 4K or
3609 more of output, or the return value exceeds INT_MAX, or because
3610 the output overflows the destination object (but leave it enabled
3611 when the function is bounded because then the behavior is well-
3612 defined). */
3613 if (retval[0] == retval[1]
3614 && (info.bounded || retval[0] < info.objsize)
3615 && retval[0] <= target_int_max ())
3616 return true;
3618 if ((info.bounded || retval[1] < info.objsize)
3619 && (retval[0] < target_int_max ()
3620 && retval[1] < target_int_max ()))
3621 return true;
3623 if (!under4k && (info.bounded || retval[0] < info.objsize))
3624 return true;
3626 return false;
3629 /* Given a suitable result RES of a call to a formatted output function
3630 described by INFO, substitute the result for the return value of
3631 the call. The result is suitable if the number of bytes it represents
3632 is known and exact. A result that isn't suitable for substitution may
3633 have its range set to the range of return values, if that is known.
3634 Return true if the call is removed and gsi_next should not be performed
3635 in the caller. */
3637 static bool
3638 try_substitute_return_value (gimple_stmt_iterator *gsi,
3639 const call_info &info,
3640 const format_result &res)
3642 tree lhs = gimple_get_lhs (info.callstmt);
3644 /* Set to true when the entire call has been removed. */
3645 bool removed = false;
3647 /* The minimum and maximum return value. */
3648 unsigned HOST_WIDE_INT retval[2];
3649 bool safe = is_call_safe (info, res, true, retval);
3651 if (safe
3652 && retval[0] == retval[1]
3653 /* Not prepared to handle possibly throwing calls here; they shouldn't
3654 appear in non-artificial testcases, except when the __*_chk routines
3655 are badly declared. */
3656 && !stmt_ends_bb_p (info.callstmt))
3658 tree cst = build_int_cst (lhs ? TREE_TYPE (lhs) : integer_type_node,
3659 retval[0]);
3661 if (lhs == NULL_TREE && info.nowrite)
3663 /* Remove the call to the bounded function with a zero size
3664 (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs. */
3665 unlink_stmt_vdef (info.callstmt);
3666 gsi_remove (gsi, true);
3667 removed = true;
3669 else if (info.nowrite)
3671 /* Replace the call to the bounded function with a zero size
3672 (e.g., snprintf(0, 0, "%i", 123) with the constant result
3673 of the function. */
3674 if (!update_call_from_tree (gsi, cst))
3675 gimplify_and_update_call_from_tree (gsi, cst);
3676 gimple *callstmt = gsi_stmt (*gsi);
3677 update_stmt (callstmt);
3679 else if (lhs)
3681 /* Replace the left-hand side of the call with the constant
3682 result of the formatted function. */
3683 gimple_call_set_lhs (info.callstmt, NULL_TREE);
3684 gimple *g = gimple_build_assign (lhs, cst);
3685 gsi_insert_after (gsi, g, GSI_NEW_STMT);
3686 update_stmt (info.callstmt);
3689 if (dump_file)
3691 if (removed)
3692 fprintf (dump_file, " Removing call statement.");
3693 else
3695 fprintf (dump_file, " Substituting ");
3696 print_generic_expr (dump_file, cst, dump_flags);
3697 fprintf (dump_file, " for %s.\n",
3698 info.nowrite ? "statement" : "return value");
3702 else if (lhs && types_compatible_p (TREE_TYPE (lhs), integer_type_node))
3704 bool setrange = false;
3706 if (safe
3707 && (info.bounded || retval[1] < info.objsize)
3708 && (retval[0] < target_int_max ()
3709 && retval[1] < target_int_max ()))
3711 /* If the result is in a valid range bounded by the size of
3712 the destination set it so that it can be used for subsequent
3713 optimizations. */
3714 int prec = TYPE_PRECISION (integer_type_node);
3716 wide_int min = wi::shwi (retval[0], prec);
3717 wide_int max = wi::shwi (retval[1], prec);
3718 set_range_info (lhs, VR_RANGE, min, max);
3720 setrange = true;
3723 if (dump_file)
3725 const char *inbounds
3726 = (retval[0] < info.objsize
3727 ? (retval[1] < info.objsize
3728 ? "in" : "potentially out-of")
3729 : "out-of");
3731 const char *what = setrange ? "Setting" : "Discarding";
3732 if (retval[0] != retval[1])
3733 fprintf (dump_file,
3734 " %s %s-bounds return value range ["
3735 HOST_WIDE_INT_PRINT_UNSIGNED ", "
3736 HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
3737 what, inbounds, retval[0], retval[1]);
3738 else
3739 fprintf (dump_file, " %s %s-bounds return value "
3740 HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
3741 what, inbounds, retval[0]);
3745 if (dump_file)
3746 fputc ('\n', dump_file);
3748 return removed;
3751 /* Try to simplify a s{,n}printf call described by INFO with result
3752 RES by replacing it with a simpler and presumably more efficient
3753 call (such as strcpy). */
3755 static bool
3756 try_simplify_call (gimple_stmt_iterator *gsi,
3757 const call_info &info,
3758 const format_result &res)
3760 unsigned HOST_WIDE_INT dummy[2];
3761 if (!is_call_safe (info, res, info.retval_used (), dummy))
3762 return false;
3764 switch (info.fncode)
3766 case BUILT_IN_SNPRINTF:
3767 return gimple_fold_builtin_snprintf (gsi);
3769 case BUILT_IN_SPRINTF:
3770 return gimple_fold_builtin_sprintf (gsi);
3772 default:
3776 return false;
3779 /* Return the zero-based index of the format string argument of a printf
3780 like function and set *IDX_ARGS to the first format argument. When
3781 no such index exists return UINT_MAX. */
3783 static unsigned
3784 get_user_idx_format (tree fndecl, unsigned *idx_args)
3786 tree attrs = lookup_attribute ("format", DECL_ATTRIBUTES (fndecl));
3787 if (!attrs)
3788 attrs = lookup_attribute ("format", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
3790 if (!attrs)
3791 return UINT_MAX;
3793 attrs = TREE_VALUE (attrs);
3795 tree archetype = TREE_VALUE (attrs);
3796 if (strcmp ("printf", IDENTIFIER_POINTER (archetype)))
3797 return UINT_MAX;
3799 attrs = TREE_CHAIN (attrs);
3800 tree fmtarg = TREE_VALUE (attrs);
3802 attrs = TREE_CHAIN (attrs);
3803 tree elliparg = TREE_VALUE (attrs);
3805 /* Attribute argument indices are 1-based but we use zero-based. */
3806 *idx_args = tree_to_uhwi (elliparg) - 1;
3807 return tree_to_uhwi (fmtarg) - 1;
3810 } /* Unnamed namespace. */
3812 /* Determine if a GIMPLE call at *GSI is to one of the sprintf-like built-in
3813 functions and if so, handle it. Return true if the call is removed and
3814 gsi_next should not be performed in the caller. */
3816 bool
3817 handle_printf_call (gimple_stmt_iterator *gsi, const vr_values *vr_values)
3819 init_target_to_host_charmap ();
3821 call_info info = call_info ();
3823 info.callstmt = gsi_stmt (*gsi);
3824 info.func = gimple_call_fndecl (info.callstmt);
3825 if (!info.func)
3826 return false;
3828 /* Format string argument number (valid for all functions). */
3829 unsigned idx_format = UINT_MAX;
3830 if (gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
3831 info.fncode = DECL_FUNCTION_CODE (info.func);
3832 else
3834 unsigned idx_args;
3835 idx_format = get_user_idx_format (info.func, &idx_args);
3836 if (idx_format == UINT_MAX
3837 || idx_format >= gimple_call_num_args (info.callstmt)
3838 || idx_args > gimple_call_num_args (info.callstmt)
3839 || !POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (info.callstmt,
3840 idx_format))))
3841 return false;
3842 info.fncode = BUILT_IN_NONE;
3843 info.argidx = idx_args;
3846 /* The size of the destination as in snprintf(dest, size, ...). */
3847 unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
3849 /* The size of the destination determined by __builtin_object_size. */
3850 unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
3852 /* Zero-based buffer size argument number (snprintf and vsnprintf). */
3853 unsigned idx_dstsize = UINT_MAX;
3855 /* Object size argument number (snprintf_chk and vsnprintf_chk). */
3856 unsigned idx_objsize = UINT_MAX;
3858 /* Destinaton argument number (valid for sprintf functions only). */
3859 unsigned idx_dstptr = 0;
3861 switch (info.fncode)
3863 case BUILT_IN_NONE:
3864 // User-defined function with attribute format (printf).
3865 idx_dstptr = -1;
3866 break;
3868 case BUILT_IN_FPRINTF:
3869 // Signature:
3870 // __builtin_fprintf (FILE*, format, ...)
3871 idx_format = 1;
3872 info.argidx = 2;
3873 idx_dstptr = -1;
3874 break;
3876 case BUILT_IN_FPRINTF_CHK:
3877 // Signature:
3878 // __builtin_fprintf_chk (FILE*, ost, format, ...)
3879 idx_format = 2;
3880 info.argidx = 3;
3881 idx_dstptr = -1;
3882 break;
3884 case BUILT_IN_FPRINTF_UNLOCKED:
3885 // Signature:
3886 // __builtin_fprintf_unnlocked (FILE*, format, ...)
3887 idx_format = 1;
3888 info.argidx = 2;
3889 idx_dstptr = -1;
3890 break;
3892 case BUILT_IN_PRINTF:
3893 // Signature:
3894 // __builtin_printf (format, ...)
3895 idx_format = 0;
3896 info.argidx = 1;
3897 idx_dstptr = -1;
3898 break;
3900 case BUILT_IN_PRINTF_CHK:
3901 // Signature:
3902 // __builtin_printf_chk (ost, format, ...)
3903 idx_format = 1;
3904 info.argidx = 2;
3905 idx_dstptr = -1;
3906 break;
3908 case BUILT_IN_PRINTF_UNLOCKED:
3909 // Signature:
3910 // __builtin_printf (format, ...)
3911 idx_format = 0;
3912 info.argidx = 1;
3913 idx_dstptr = -1;
3914 break;
3916 case BUILT_IN_SPRINTF:
3917 // Signature:
3918 // __builtin_sprintf (dst, format, ...)
3919 idx_format = 1;
3920 info.argidx = 2;
3921 break;
3923 case BUILT_IN_SPRINTF_CHK:
3924 // Signature:
3925 // __builtin___sprintf_chk (dst, ost, objsize, format, ...)
3926 idx_objsize = 2;
3927 idx_format = 3;
3928 info.argidx = 4;
3929 break;
3931 case BUILT_IN_SNPRINTF:
3932 // Signature:
3933 // __builtin_snprintf (dst, size, format, ...)
3934 idx_dstsize = 1;
3935 idx_format = 2;
3936 info.argidx = 3;
3937 info.bounded = true;
3938 break;
3940 case BUILT_IN_SNPRINTF_CHK:
3941 // Signature:
3942 // __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
3943 idx_dstsize = 1;
3944 idx_objsize = 3;
3945 idx_format = 4;
3946 info.argidx = 5;
3947 info.bounded = true;
3948 break;
3950 case BUILT_IN_VFPRINTF:
3951 // Signature:
3952 // __builtin_vprintf (FILE*, format, va_list)
3953 idx_format = 1;
3954 info.argidx = -1;
3955 idx_dstptr = -1;
3956 break;
3958 case BUILT_IN_VFPRINTF_CHK:
3959 // Signature:
3960 // __builtin___vfprintf_chk (FILE*, ost, format, va_list)
3961 idx_format = 2;
3962 info.argidx = -1;
3963 idx_dstptr = -1;
3964 break;
3966 case BUILT_IN_VPRINTF:
3967 // Signature:
3968 // __builtin_vprintf (format, va_list)
3969 idx_format = 0;
3970 info.argidx = -1;
3971 idx_dstptr = -1;
3972 break;
3974 case BUILT_IN_VPRINTF_CHK:
3975 // Signature:
3976 // __builtin___vprintf_chk (ost, format, va_list)
3977 idx_format = 1;
3978 info.argidx = -1;
3979 idx_dstptr = -1;
3980 break;
3982 case BUILT_IN_VSNPRINTF:
3983 // Signature:
3984 // __builtin_vsprintf (dst, size, format, va)
3985 idx_dstsize = 1;
3986 idx_format = 2;
3987 info.argidx = -1;
3988 info.bounded = true;
3989 break;
3991 case BUILT_IN_VSNPRINTF_CHK:
3992 // Signature:
3993 // __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
3994 idx_dstsize = 1;
3995 idx_objsize = 3;
3996 idx_format = 4;
3997 info.argidx = -1;
3998 info.bounded = true;
3999 break;
4001 case BUILT_IN_VSPRINTF:
4002 // Signature:
4003 // __builtin_vsprintf (dst, format, va)
4004 idx_format = 1;
4005 info.argidx = -1;
4006 break;
4008 case BUILT_IN_VSPRINTF_CHK:
4009 // Signature:
4010 // __builtin___vsprintf_chk (dst, ost, objsize, format, va)
4011 idx_format = 3;
4012 idx_objsize = 2;
4013 info.argidx = -1;
4014 break;
4016 default:
4017 return false;
4020 /* Set the global warning level for this function. */
4021 warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
4023 /* For all string functions the first argument is a pointer to
4024 the destination. */
4025 tree dstptr = (idx_dstptr < gimple_call_num_args (info.callstmt)
4026 ? gimple_call_arg (info.callstmt, 0) : NULL_TREE);
4028 info.format = gimple_call_arg (info.callstmt, idx_format);
4030 /* True when the destination size is constant as opposed to the lower
4031 or upper bound of a range. */
4032 bool dstsize_cst_p = true;
4033 bool posunder4k = true;
4035 if (idx_dstsize == UINT_MAX)
4037 /* For non-bounded functions like sprintf, determine the size
4038 of the destination from the object or pointer passed to it
4039 as the first argument. */
4040 dstsize = get_destination_size (dstptr);
4042 else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
4044 /* For bounded functions try to get the size argument. */
4046 if (TREE_CODE (size) == INTEGER_CST)
4048 dstsize = tree_to_uhwi (size);
4049 /* No object can be larger than SIZE_MAX bytes (half the address
4050 space) on the target.
4051 The functions are defined only for output of at most INT_MAX
4052 bytes. Specifying a bound in excess of that limit effectively
4053 defeats the bounds checking (and on some implementations such
4054 as Solaris cause the function to fail with EINVAL). */
4055 if (dstsize > target_size_max () / 2)
4057 /* Avoid warning if -Wstringop-overflow is specified since
4058 it also warns for the same thing though only for the
4059 checking built-ins. */
4060 if ((idx_objsize == UINT_MAX
4061 || !warn_stringop_overflow))
4062 warning_at (gimple_location (info.callstmt), info.warnopt (),
4063 "specified bound %wu exceeds maximum object size "
4064 "%wu",
4065 dstsize, target_size_max () / 2);
4066 /* POSIX requires snprintf to fail if DSTSIZE is greater
4067 than INT_MAX. Even though not all POSIX implementations
4068 conform to the requirement, avoid folding in this case. */
4069 posunder4k = false;
4071 else if (dstsize > target_int_max ())
4073 warning_at (gimple_location (info.callstmt), info.warnopt (),
4074 "specified bound %wu exceeds %<INT_MAX%>",
4075 dstsize);
4076 /* POSIX requires snprintf to fail if DSTSIZE is greater
4077 than INT_MAX. Avoid folding in that case. */
4078 posunder4k = false;
4081 else if (TREE_CODE (size) == SSA_NAME)
4083 /* Try to determine the range of values of the argument
4084 and use the greater of the two at level 1 and the smaller
4085 of them at level 2. */
4086 const value_range *vr
4087 = CONST_CAST (class vr_values *, vr_values)->get_value_range (size);
4089 if (range_int_cst_p (vr))
4091 unsigned HOST_WIDE_INT minsize = TREE_INT_CST_LOW (vr->min ());
4092 unsigned HOST_WIDE_INT maxsize = TREE_INT_CST_LOW (vr->max ());
4093 dstsize = warn_level < 2 ? maxsize : minsize;
4095 if (minsize > target_int_max ())
4096 warning_at (gimple_location (info.callstmt), info.warnopt (),
4097 "specified bound range [%wu, %wu] exceeds "
4098 "%<INT_MAX%>",
4099 minsize, maxsize);
4101 /* POSIX requires snprintf to fail if DSTSIZE is greater
4102 than INT_MAX. Avoid folding if that's possible. */
4103 if (maxsize > target_int_max ())
4104 posunder4k = false;
4106 else if (vr->varying_p ())
4108 /* POSIX requires snprintf to fail if DSTSIZE is greater
4109 than INT_MAX. Since SIZE's range is unknown, avoid
4110 folding. */
4111 posunder4k = false;
4114 /* The destination size is not constant. If the function is
4115 bounded (e.g., snprintf) a lower bound of zero doesn't
4116 necessarily imply it can be eliminated. */
4117 dstsize_cst_p = false;
4121 if (idx_objsize != UINT_MAX)
4122 if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
4123 if (tree_fits_uhwi_p (size))
4124 objsize = tree_to_uhwi (size);
4126 if (info.bounded && !dstsize)
4128 /* As a special case, when the explicitly specified destination
4129 size argument (to a bounded function like snprintf) is zero
4130 it is a request to determine the number of bytes on output
4131 without actually producing any. Pretend the size is
4132 unlimited in this case. */
4133 info.objsize = HOST_WIDE_INT_MAX;
4134 info.nowrite = dstsize_cst_p;
4136 else
4138 /* For calls to non-bounded functions or to those of bounded
4139 functions with a non-zero size, warn if the destination
4140 pointer is null. */
4141 if (dstptr && integer_zerop (dstptr))
4143 /* This is diagnosed with -Wformat only when the null is a constant
4144 pointer. The warning here diagnoses instances where the pointer
4145 is not constant. */
4146 location_t loc = gimple_location (info.callstmt);
4147 warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
4148 info.warnopt (), "%Gnull destination pointer",
4149 info.callstmt);
4150 return false;
4153 /* Set the object size to the smaller of the two arguments
4154 of both have been specified and they're not equal. */
4155 info.objsize = dstsize < objsize ? dstsize : objsize;
4157 if (info.bounded
4158 && dstsize < target_size_max () / 2 && objsize < dstsize
4159 /* Avoid warning if -Wstringop-overflow is specified since
4160 it also warns for the same thing though only for the
4161 checking built-ins. */
4162 && (idx_objsize == UINT_MAX
4163 || !warn_stringop_overflow))
4165 warning_at (gimple_location (info.callstmt), info.warnopt (),
4166 "specified bound %wu exceeds the size %wu "
4167 "of the destination object", dstsize, objsize);
4171 /* Determine if the format argument may be null and warn if not
4172 and if the argument is null. */
4173 if (integer_zerop (info.format)
4174 && gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4176 location_t loc = gimple_location (info.callstmt);
4177 warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4178 info.warnopt (), "%Gnull format string",
4179 info.callstmt);
4180 return false;
4183 info.fmtstr = get_format_string (info.format, &info.fmtloc);
4184 if (!info.fmtstr)
4185 return false;
4187 /* The result is the number of bytes output by the formatted function,
4188 including the terminating NUL. */
4189 format_result res = format_result ();
4191 /* I/O functions with no destination argument (i.e., all forms of fprintf
4192 and printf) may fail under any conditions. Others (i.e., all forms of
4193 sprintf) may only fail under specific conditions determined for each
4194 directive. Clear POSUNDER4K for the former set of functions and set
4195 it to true for the latter (it can only be cleared later, but it is
4196 never set to true again). */
4197 res.posunder4k = posunder4k && dstptr;
4199 bool success = compute_format_length (info, &res, vr_values);
4200 if (res.warned)
4201 gimple_set_no_warning (info.callstmt, true);
4203 /* When optimizing and the printf return value optimization is enabled,
4204 attempt to substitute the computed result for the return value of
4205 the call. Avoid this optimization when -frounding-math is in effect
4206 and the format string contains a floating point directive. */
4207 bool call_removed = false;
4208 if (success && optimize > 0)
4210 /* Save a copy of the iterator pointing at the call. The iterator
4211 may change to point past the call in try_substitute_return_value
4212 but the original value is needed in try_simplify_call. */
4213 gimple_stmt_iterator gsi_call = *gsi;
4215 if (flag_printf_return_value
4216 && (!flag_rounding_math || !res.floating))
4217 call_removed = try_substitute_return_value (gsi, info, res);
4219 if (!call_removed)
4220 try_simplify_call (&gsi_call, info, res);
4223 return call_removed;