PR rtl-optimization/88018
[official-gcc.git] / gcc / gimple-ssa-sprintf.c
blob456a7d400115713a6600b1ce7bb303b6c971550e
1 /* Copyright (C) 2016-2018 Free Software Foundation, Inc.
2 Contributed by Martin Sebor <msebor@redhat.com>.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the printf-return-value pass. The pass does
21 two things: 1) it analyzes calls to formatted output functions like
22 sprintf looking for possible buffer overflows and calls to bounded
23 functions like snprintf for early truncation (and under the control
24 of the -Wformat-length option issues warnings), and 2) under the
25 control of the -fprintf-return-value option it folds the return
26 value of safe calls into constants, making it possible to eliminate
27 code that depends on the value of those constants.
29 For all functions (bounded or not) the pass uses the size of the
30 destination object. That means that it will diagnose calls to
31 snprintf not on the basis of the size specified by the function's
32 second argument but rathger on the basis of the size the first
33 argument points to (if possible). For bound-checking built-ins
34 like __builtin___snprintf_chk the pass uses the size typically
35 determined by __builtin_object_size and passed to the built-in
36 by the Glibc inline wrapper.
38 The pass handles all forms standard sprintf format directives,
39 including character, integer, floating point, pointer, and strings,
40 with the standard C flags, widths, and precisions. For integers
41 and strings it computes the length of output itself. For floating
42 point it uses MPFR to fornmat known constants with up and down
43 rounding and uses the resulting range of output lengths. For
44 strings it uses the length of string literals and the sizes of
45 character arrays that a character pointer may point to as a bound
46 on the longest string. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "params.h"
64 #include "tree-cfg.h"
65 #include "tree-ssa-propagate.h"
66 #include "calls.h"
67 #include "cfgloop.h"
68 #include "intl.h"
69 #include "langhooks.h"
71 #include "attribs.h"
72 #include "builtins.h"
73 #include "stor-layout.h"
75 #include "realmpfr.h"
76 #include "target.h"
78 #include "cpplib.h"
79 #include "input.h"
80 #include "toplev.h"
81 #include "substring-locations.h"
82 #include "diagnostic.h"
83 #include "domwalk.h"
84 #include "alloc-pool.h"
85 #include "vr-values.h"
86 #include "gimple-ssa-evrp-analyze.h"
88 /* The likely worst case value of MB_LEN_MAX for the target, large enough
89 for UTF-8. Ideally, this would be obtained by a target hook if it were
90 to be used for optimization but it's good enough as is for warnings. */
91 #define target_mb_len_max() 6
93 /* The maximum number of bytes a single non-string directive can result
94 in. This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
95 LDBL_MAX_10_EXP of 4932. */
96 #define IEEE_MAX_10_EXP 4932
97 #define target_dir_max() (target_int_max () + IEEE_MAX_10_EXP + 2)
99 namespace {
101 const pass_data pass_data_sprintf_length = {
102 GIMPLE_PASS, // pass type
103 "printf-return-value", // pass name
104 OPTGROUP_NONE, // optinfo_flags
105 TV_NONE, // tv_id
106 PROP_cfg, // properties_required
107 0, // properties_provided
108 0, // properties_destroyed
109 0, // properties_start
110 0, // properties_finish
113 /* Set to the warning level for the current function which is equal
114 either to warn_format_trunc for bounded functions or to
115 warn_format_overflow otherwise. */
117 static int warn_level;
119 struct format_result;
121 class sprintf_dom_walker : public dom_walker
123 public:
124 sprintf_dom_walker () : dom_walker (CDI_DOMINATORS) {}
125 ~sprintf_dom_walker () {}
127 edge before_dom_children (basic_block) FINAL OVERRIDE;
128 void after_dom_children (basic_block) FINAL OVERRIDE;
129 bool handle_gimple_call (gimple_stmt_iterator *);
131 struct call_info;
132 bool compute_format_length (call_info &, format_result *);
133 class evrp_range_analyzer evrp_range_analyzer;
136 class pass_sprintf_length : public gimple_opt_pass
138 bool fold_return_value;
140 public:
141 pass_sprintf_length (gcc::context *ctxt)
142 : gimple_opt_pass (pass_data_sprintf_length, ctxt),
143 fold_return_value (false)
146 opt_pass * clone () { return new pass_sprintf_length (m_ctxt); }
148 virtual bool gate (function *);
150 virtual unsigned int execute (function *);
152 void set_pass_param (unsigned int n, bool param)
154 gcc_assert (n == 0);
155 fold_return_value = param;
160 bool
161 pass_sprintf_length::gate (function *)
163 /* Run the pass iff -Warn-format-overflow or -Warn-format-truncation
164 is specified and either not optimizing and the pass is being invoked
165 early, or when optimizing and the pass is being invoked during
166 optimization (i.e., "late"). */
167 return ((warn_format_overflow > 0
168 || warn_format_trunc > 0
169 || flag_printf_return_value)
170 && (optimize > 0) == fold_return_value);
173 /* The minimum, maximum, likely, and unlikely maximum number of bytes
174 of output either a formatting function or an individual directive
175 can result in. */
177 struct result_range
179 /* The absolute minimum number of bytes. The result of a successful
180 conversion is guaranteed to be no less than this. (An erroneous
181 conversion can be indicated by MIN > HOST_WIDE_INT_MAX.) */
182 unsigned HOST_WIDE_INT min;
183 /* The likely maximum result that is used in diagnostics. In most
184 cases MAX is the same as the worst case UNLIKELY result. */
185 unsigned HOST_WIDE_INT max;
186 /* The likely result used to trigger diagnostics. For conversions
187 that result in a range of bytes [MIN, MAX], LIKELY is somewhere
188 in that range. */
189 unsigned HOST_WIDE_INT likely;
190 /* In rare cases (e.g., for nultibyte characters) UNLIKELY gives
191 the worst cases maximum result of a directive. In most cases
192 UNLIKELY == MAX. UNLIKELY is used to control the return value
193 optimization but not in diagnostics. */
194 unsigned HOST_WIDE_INT unlikely;
197 /* The result of a call to a formatted function. */
199 struct format_result
201 /* Range of characters written by the formatted function.
202 Setting the minimum to HOST_WIDE_INT_MAX disables all
203 length tracking for the remainder of the format string. */
204 result_range range;
206 /* True when the range above is obtained from known values of
207 directive arguments, or bounds on the amount of output such
208 as width and precision, and not the result of heuristics that
209 depend on warning levels. It's used to issue stricter diagnostics
210 in cases where strings of unknown lengths are bounded by the arrays
211 they are determined to refer to. KNOWNRANGE must not be used for
212 the return value optimization. */
213 bool knownrange;
215 /* True if no individual directive could fail or result in more than
216 4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be
217 greater). Implementations are not required to handle directives
218 that produce more than 4K bytes (leading to undefined behavior)
219 and so when one is found it disables the return value optimization.
220 Similarly, directives that can fail (such as wide character
221 directives) disable the optimization. */
222 bool posunder4k;
224 /* True when a floating point directive has been seen in the format
225 string. */
226 bool floating;
228 /* True when an intermediate result has caused a warning. Used to
229 avoid issuing duplicate warnings while finishing the processing
230 of a call. WARNED also disables the return value optimization. */
231 bool warned;
233 /* Preincrement the number of output characters by 1. */
234 format_result& operator++ ()
236 return *this += 1;
239 /* Postincrement the number of output characters by 1. */
240 format_result operator++ (int)
242 format_result prev (*this);
243 *this += 1;
244 return prev;
247 /* Increment the number of output characters by N. */
248 format_result& operator+= (unsigned HOST_WIDE_INT);
251 format_result&
252 format_result::operator+= (unsigned HOST_WIDE_INT n)
254 gcc_assert (n < HOST_WIDE_INT_MAX);
256 if (range.min < HOST_WIDE_INT_MAX)
257 range.min += n;
259 if (range.max < HOST_WIDE_INT_MAX)
260 range.max += n;
262 if (range.likely < HOST_WIDE_INT_MAX)
263 range.likely += n;
265 if (range.unlikely < HOST_WIDE_INT_MAX)
266 range.unlikely += n;
268 return *this;
271 /* Return the value of INT_MIN for the target. */
273 static inline HOST_WIDE_INT
274 target_int_min ()
276 return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
279 /* Return the value of INT_MAX for the target. */
281 static inline unsigned HOST_WIDE_INT
282 target_int_max ()
284 return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
287 /* Return the value of SIZE_MAX for the target. */
289 static inline unsigned HOST_WIDE_INT
290 target_size_max ()
292 return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
295 /* A straightforward mapping from the execution character set to the host
296 character set indexed by execution character. */
298 static char target_to_host_charmap[256];
300 /* Initialize a mapping from the execution character set to the host
301 character set. */
303 static bool
304 init_target_to_host_charmap ()
306 /* If the percent sign is non-zero the mapping has already been
307 initialized. */
308 if (target_to_host_charmap['%'])
309 return true;
311 /* Initialize the target_percent character (done elsewhere). */
312 if (!init_target_chars ())
313 return false;
315 /* The subset of the source character set used by printf conversion
316 specifications (strictly speaking, not all letters are used but
317 they are included here for the sake of simplicity). The dollar
318 sign must be included even though it's not in the basic source
319 character set. */
320 const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
321 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
323 /* Set the mapping for all characters to some ordinary value (i,e.,
324 not none used in printf conversion specifications) and overwrite
325 those that are used by conversion specifications with their
326 corresponding values. */
327 memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
329 /* Are the two sets of characters the same? */
330 bool all_same_p = true;
332 for (const char *pc = srcset; *pc; ++pc)
334 /* Slice off the high end bits in case target characters are
335 signed. All values are expected to be non-nul, otherwise
336 there's a problem. */
337 if (unsigned char tc = lang_hooks.to_target_charset (*pc))
339 target_to_host_charmap[tc] = *pc;
340 if (tc != *pc)
341 all_same_p = false;
343 else
344 return false;
348 /* Set the first element to a non-zero value if the mapping
349 is 1-to-1, otherwise leave it clear (NUL is assumed to be
350 the same in both character sets). */
351 target_to_host_charmap[0] = all_same_p;
353 return true;
356 /* Return the host source character corresponding to the character
357 CH in the execution character set if one exists, or some innocuous
358 (non-special, non-nul) source character otherwise. */
360 static inline unsigned char
361 target_to_host (unsigned char ch)
363 return target_to_host_charmap[ch];
366 /* Convert an initial substring of the string TARGSTR consisting of
367 characters in the execution character set into a string in the
368 source character set on the host and store up to HOSTSZ characters
369 in the buffer pointed to by HOSTR. Return HOSTR. */
371 static const char*
372 target_to_host (char *hostr, size_t hostsz, const char *targstr)
374 /* Make sure the buffer is reasonably big. */
375 gcc_assert (hostsz > 4);
377 /* The interesting subset of source and execution characters are
378 the same so no conversion is necessary. However, truncate
379 overlong strings just like the translated strings are. */
380 if (target_to_host_charmap['\0'] == 1)
382 strncpy (hostr, targstr, hostsz - 4);
383 if (strlen (targstr) >= hostsz)
384 strcpy (hostr + hostsz - 4, "...");
385 return hostr;
388 /* Convert the initial substring of TARGSTR to the corresponding
389 characters in the host set, appending "..." if TARGSTR is too
390 long to fit. Using the static buffer assumes the function is
391 not called in between sequence points (which it isn't). */
392 for (char *ph = hostr; ; ++targstr)
394 *ph++ = target_to_host (*targstr);
395 if (!*targstr)
396 break;
398 if (size_t (ph - hostr) == hostsz - 4)
400 *ph = '\0';
401 strcat (ph, "...");
402 break;
406 return hostr;
409 /* Convert the sequence of decimal digits in the execution character
410 starting at S to a long, just like strtol does. Return the result
411 and set *END to one past the last converted character. On range
412 error set ERANGE to the digit that caused it. */
414 static inline long
415 target_strtol10 (const char **ps, const char **erange)
417 unsigned HOST_WIDE_INT val = 0;
418 for ( ; ; ++*ps)
420 unsigned char c = target_to_host (**ps);
421 if (ISDIGIT (c))
423 c -= '0';
425 /* Check for overflow. */
426 if (val > (LONG_MAX - c) / 10LU)
428 val = LONG_MAX;
429 *erange = *ps;
431 /* Skip the remaining digits. */
433 c = target_to_host (*++*ps);
434 while (ISDIGIT (c));
435 break;
437 else
438 val = val * 10 + c;
440 else
441 break;
444 return val;
447 /* Given FORMAT, set *PLOC to the source location of the format string
448 and return the format string if it is known or null otherwise. */
450 static const char*
451 get_format_string (tree format, location_t *ploc)
453 *ploc = EXPR_LOC_OR_LOC (format, input_location);
455 return c_getstr (format);
458 /* For convenience and brevity, shorter named entrypoints of
459 format_string_diagnostic_t::emit_warning_va and
460 format_string_diagnostic_t::emit_warning_n_va.
461 These have to be functions with the attribute so that exgettext
462 works properly. */
464 static bool
465 ATTRIBUTE_GCC_DIAG (5, 6)
466 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
467 const char *corrected_substring, int opt, const char *gmsgid, ...)
469 format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
470 corrected_substring);
471 va_list ap;
472 va_start (ap, gmsgid);
473 bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
474 va_end (ap);
476 return warned;
479 static bool
480 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
481 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
482 const char *corrected_substring, int opt, unsigned HOST_WIDE_INT n,
483 const char *singular_gmsgid, const char *plural_gmsgid, ...)
485 format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
486 corrected_substring);
487 va_list ap;
488 va_start (ap, plural_gmsgid);
489 bool warned = diag.emit_warning_n_va (opt, n, singular_gmsgid, plural_gmsgid,
490 &ap);
491 va_end (ap);
493 return warned;
496 /* Format length modifiers. */
498 enum format_lengths
500 FMT_LEN_none,
501 FMT_LEN_hh, // char argument
502 FMT_LEN_h, // short
503 FMT_LEN_l, // long
504 FMT_LEN_ll, // long long
505 FMT_LEN_L, // long double (and GNU long long)
506 FMT_LEN_z, // size_t
507 FMT_LEN_t, // ptrdiff_t
508 FMT_LEN_j // intmax_t
512 /* Description of the result of conversion either of a single directive
513 or the whole format string. */
515 struct fmtresult
517 /* Construct a FMTRESULT object with all counters initialized
518 to MIN. KNOWNRANGE is set when MIN is valid. */
519 fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
520 : argmin (), argmax (), nonstr (),
521 knownrange (min < HOST_WIDE_INT_MAX),
522 mayfail (), nullp ()
524 range.min = min;
525 range.max = min;
526 range.likely = min;
527 range.unlikely = min;
530 /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
531 KNOWNRANGE is set when both MIN and MAX are valid. */
532 fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
533 unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
534 : argmin (), argmax (), nonstr (),
535 knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
536 mayfail (), nullp ()
538 range.min = min;
539 range.max = max;
540 range.likely = max < likely ? min : likely;
541 range.unlikely = max;
544 /* Adjust result upward to reflect the RANGE of values the specified
545 width or precision is known to be in. */
546 fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
547 tree = NULL_TREE,
548 unsigned = 0, unsigned = 0);
550 /* Return the maximum number of decimal digits a value of TYPE
551 formats as on output. */
552 static unsigned type_max_digits (tree, int);
554 /* The range a directive's argument is in. */
555 tree argmin, argmax;
557 /* The minimum and maximum number of bytes that a directive
558 results in on output for an argument in the range above. */
559 result_range range;
561 /* Non-nul when the argument of a string directive is not a nul
562 terminated string. */
563 tree nonstr;
565 /* True when the range above is obtained from a known value of
566 a directive's argument or its bounds and not the result of
567 heuristics that depend on warning levels. */
568 bool knownrange;
570 /* True for a directive that may fail (such as wide character
571 directives). */
572 bool mayfail;
574 /* True when the argument is a null pointer. */
575 bool nullp;
578 /* Adjust result upward to reflect the range ADJUST of values the
579 specified width or precision is known to be in. When non-null,
580 TYPE denotes the type of the directive whose result is being
581 adjusted, BASE gives the base of the directive (octal, decimal,
582 or hex), and ADJ denotes the additional adjustment to the LIKELY
583 counter that may need to be added when ADJUST is a range. */
585 fmtresult&
586 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
587 tree type /* = NULL_TREE */,
588 unsigned base /* = 0 */,
589 unsigned adj /* = 0 */)
591 bool minadjusted = false;
593 /* Adjust the minimum and likely counters. */
594 if (adjust[0] >= 0)
596 if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
598 range.min = adjust[0];
599 minadjusted = true;
602 /* Adjust the likely counter. */
603 if (range.likely < range.min)
604 range.likely = range.min;
606 else if (adjust[0] == target_int_min ()
607 && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
608 knownrange = false;
610 /* Adjust the maximum counter. */
611 if (adjust[1] > 0)
613 if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
615 range.max = adjust[1];
617 /* Set KNOWNRANGE if both the minimum and maximum have been
618 adjusted. Otherwise leave it at what it was before. */
619 knownrange = minadjusted;
623 if (warn_level > 1 && type)
625 /* For large non-constant width or precision whose range spans
626 the maximum number of digits produced by the directive for
627 any argument, set the likely number of bytes to be at most
628 the number digits plus other adjustment determined by the
629 caller (one for sign or two for the hexadecimal "0x"
630 prefix). */
631 unsigned dirdigs = type_max_digits (type, base);
632 if (adjust[0] < dirdigs && dirdigs < adjust[1]
633 && range.likely < dirdigs)
634 range.likely = dirdigs + adj;
636 else if (range.likely < (range.min ? range.min : 1))
638 /* Conservatively, set LIKELY to at least MIN but no less than
639 1 unless MAX is zero. */
640 range.likely = (range.min
641 ? range.min
642 : range.max && (range.max < HOST_WIDE_INT_MAX
643 || warn_level > 1) ? 1 : 0);
646 /* Finally adjust the unlikely counter to be at least as large as
647 the maximum. */
648 if (range.unlikely < range.max)
649 range.unlikely = range.max;
651 return *this;
654 /* Return the maximum number of digits a value of TYPE formats in
655 BASE on output, not counting base prefix . */
657 unsigned
658 fmtresult::type_max_digits (tree type, int base)
660 unsigned prec = TYPE_PRECISION (type);
661 switch (base)
663 case 8:
664 return (prec + 2) / 3;
665 case 10:
666 /* Decimal approximation: yields 3, 5, 10, and 20 for precision
667 of 8, 16, 32, and 64 bits. */
668 return prec * 301 / 1000 + 1;
669 case 16:
670 return prec / 4;
673 gcc_unreachable ();
676 static bool
677 get_int_range (tree, HOST_WIDE_INT *, HOST_WIDE_INT *, bool, HOST_WIDE_INT,
678 class vr_values *vr_values);
680 /* Description of a format directive. A directive is either a plain
681 string or a conversion specification that starts with '%'. */
683 struct directive
685 /* The 1-based directive number (for debugging). */
686 unsigned dirno;
688 /* The first character of the directive and its length. */
689 const char *beg;
690 size_t len;
692 /* A bitmap of flags, one for each character. */
693 unsigned flags[256 / sizeof (int)];
695 /* The range of values of the specified width, or -1 if not specified. */
696 HOST_WIDE_INT width[2];
697 /* The range of values of the specified precision, or -1 if not
698 specified. */
699 HOST_WIDE_INT prec[2];
701 /* Length modifier. */
702 format_lengths modifier;
704 /* Format specifier character. */
705 char specifier;
707 /* The argument of the directive or null when the directive doesn't
708 take one or when none is available (such as for vararg functions). */
709 tree arg;
711 /* Format conversion function that given a directive and an argument
712 returns the formatting result. */
713 fmtresult (*fmtfunc) (const directive &, tree, vr_values *);
715 /* Return True when a the format flag CHR has been used. */
716 bool get_flag (char chr) const
718 unsigned char c = chr & 0xff;
719 return (flags[c / (CHAR_BIT * sizeof *flags)]
720 & (1U << (c % (CHAR_BIT * sizeof *flags))));
723 /* Make a record of the format flag CHR having been used. */
724 void set_flag (char chr)
726 unsigned char c = chr & 0xff;
727 flags[c / (CHAR_BIT * sizeof *flags)]
728 |= (1U << (c % (CHAR_BIT * sizeof *flags)));
731 /* Reset the format flag CHR. */
732 void clear_flag (char chr)
734 unsigned char c = chr & 0xff;
735 flags[c / (CHAR_BIT * sizeof *flags)]
736 &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
739 /* Set both bounds of the width range to VAL. */
740 void set_width (HOST_WIDE_INT val)
742 width[0] = width[1] = val;
745 /* Set the width range according to ARG, with both bounds being
746 no less than 0. For a constant ARG set both bounds to its value
747 or 0, whichever is greater. For a non-constant ARG in some range
748 set width to its range adjusting each bound to -1 if it's less.
749 For an indeterminate ARG set width to [0, INT_MAX]. */
750 void set_width (tree arg, vr_values *vr_values)
752 get_int_range (arg, width, width + 1, true, 0, vr_values);
755 /* Set both bounds of the precision range to VAL. */
756 void set_precision (HOST_WIDE_INT val)
758 prec[0] = prec[1] = val;
761 /* Set the precision range according to ARG, with both bounds being
762 no less than -1. For a constant ARG set both bounds to its value
763 or -1 whichever is greater. For a non-constant ARG in some range
764 set precision to its range adjusting each bound to -1 if it's less.
765 For an indeterminate ARG set precision to [-1, INT_MAX]. */
766 void set_precision (tree arg, vr_values *vr_values)
768 get_int_range (arg, prec, prec + 1, false, -1, vr_values);
771 /* Return true if both width and precision are known to be
772 either constant or in some range, false otherwise. */
773 bool known_width_and_precision () const
775 return ((width[1] < 0
776 || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
777 && (prec[1] < 0
778 || (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
782 /* Return the logarithm of X in BASE. */
784 static int
785 ilog (unsigned HOST_WIDE_INT x, int base)
787 int res = 0;
790 ++res;
791 x /= base;
792 } while (x);
793 return res;
796 /* Return the number of bytes resulting from converting into a string
797 the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
798 PLUS indicates whether 1 for a plus sign should be added for positive
799 numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
800 ('0x') prefix should be added for nonzero numbers. Return -1 if X cannot
801 be represented. */
803 static HOST_WIDE_INT
804 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
806 unsigned HOST_WIDE_INT absval;
808 HOST_WIDE_INT res;
810 if (TYPE_UNSIGNED (TREE_TYPE (x)))
812 if (tree_fits_uhwi_p (x))
814 absval = tree_to_uhwi (x);
815 res = plus;
817 else
818 return -1;
820 else
822 if (tree_fits_shwi_p (x))
824 HOST_WIDE_INT i = tree_to_shwi (x);
825 if (HOST_WIDE_INT_MIN == i)
827 /* Avoid undefined behavior due to negating a minimum. */
828 absval = HOST_WIDE_INT_MAX;
829 res = 1;
831 else if (i < 0)
833 absval = -i;
834 res = 1;
836 else
838 absval = i;
839 res = plus;
842 else
843 return -1;
846 int ndigs = ilog (absval, base);
848 res += prec < ndigs ? ndigs : prec;
850 /* Adjust a non-zero value for the base prefix, either hexadecimal,
851 or, unless precision has resulted in a leading zero, also octal. */
852 if (prefix && absval && (base == 16 || prec <= ndigs))
854 if (base == 8)
855 res += 1;
856 else if (base == 16)
857 res += 2;
860 return res;
863 /* Given the formatting result described by RES and NAVAIL, the number
864 of available in the destination, return the range of bytes remaining
865 in the destination. */
867 static inline result_range
868 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
870 result_range range;
872 if (HOST_WIDE_INT_MAX <= navail)
874 range.min = range.max = range.likely = range.unlikely = navail;
875 return range;
878 /* The lower bound of the available range is the available size
879 minus the maximum output size, and the upper bound is the size
880 minus the minimum. */
881 range.max = res.range.min < navail ? navail - res.range.min : 0;
883 range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
885 if (res.range.max < HOST_WIDE_INT_MAX)
886 range.min = res.range.max < navail ? navail - res.range.max : 0;
887 else
888 range.min = range.likely;
890 range.unlikely = (res.range.unlikely < navail
891 ? navail - res.range.unlikely : 0);
893 return range;
896 /* Description of a call to a formatted function. */
898 struct sprintf_dom_walker::call_info
900 /* Function call statement. */
901 gimple *callstmt;
903 /* Function called. */
904 tree func;
906 /* Called built-in function code. */
907 built_in_function fncode;
909 /* Format argument and format string extracted from it. */
910 tree format;
911 const char *fmtstr;
913 /* The location of the format argument. */
914 location_t fmtloc;
916 /* The destination object size for __builtin___xxx_chk functions
917 typically determined by __builtin_object_size, or -1 if unknown. */
918 unsigned HOST_WIDE_INT objsize;
920 /* Number of the first variable argument. */
921 unsigned HOST_WIDE_INT argidx;
923 /* True for functions like snprintf that specify the size of
924 the destination, false for others like sprintf that don't. */
925 bool bounded;
927 /* True for bounded functions like snprintf that specify a zero-size
928 buffer as a request to compute the size of output without actually
929 writing any. NOWRITE is cleared in response to the %n directive
930 which has side-effects similar to writing output. */
931 bool nowrite;
933 /* Return true if the called function's return value is used. */
934 bool retval_used () const
936 return gimple_get_lhs (callstmt);
939 /* Return the warning option corresponding to the called function. */
940 int warnopt () const
942 return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
946 /* Return the result of formatting a no-op directive (such as '%n'). */
948 static fmtresult
949 format_none (const directive &, tree, vr_values *)
951 fmtresult res (0);
952 return res;
955 /* Return the result of formatting the '%%' directive. */
957 static fmtresult
958 format_percent (const directive &, tree, vr_values *)
960 fmtresult res (1);
961 return res;
965 /* Compute intmax_type_node and uintmax_type_node similarly to how
966 tree.c builds size_type_node. */
968 static void
969 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
971 if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
973 *pintmax = integer_type_node;
974 *puintmax = unsigned_type_node;
976 else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
978 *pintmax = long_integer_type_node;
979 *puintmax = long_unsigned_type_node;
981 else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
983 *pintmax = long_long_integer_type_node;
984 *puintmax = long_long_unsigned_type_node;
986 else
988 for (int i = 0; i < NUM_INT_N_ENTS; i++)
989 if (int_n_enabled_p[i])
991 char name[50];
992 sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
994 if (strcmp (name, UINTMAX_TYPE) == 0)
996 *pintmax = int_n_trees[i].signed_type;
997 *puintmax = int_n_trees[i].unsigned_type;
998 return;
1001 gcc_unreachable ();
1005 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1006 in and that is representable in type int.
1007 Return true when the range is a subrange of that of int.
1008 When ARG is null it is as if it had the full range of int.
1009 When ABSOLUTE is true the range reflects the absolute value of
1010 the argument. When ABSOLUTE is false, negative bounds of
1011 the determined range are replaced with NEGBOUND. */
1013 static bool
1014 get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1015 bool absolute, HOST_WIDE_INT negbound,
1016 class vr_values *vr_values)
1018 /* The type of the result. */
1019 const_tree type = integer_type_node;
1021 bool knownrange = false;
1023 if (!arg)
1025 *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1026 *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1028 else if (TREE_CODE (arg) == INTEGER_CST
1029 && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1031 /* For a constant argument return its value adjusted as specified
1032 by NEGATIVE and NEGBOUND and return true to indicate that the
1033 result is known. */
1034 *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1035 *pmax = *pmin;
1036 knownrange = true;
1038 else
1040 /* True if the argument's range cannot be determined. */
1041 bool unknown = true;
1043 tree argtype = TREE_TYPE (arg);
1045 /* Ignore invalid arguments with greater precision that that
1046 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1047 They will have been detected and diagnosed by -Wformat and
1048 so it's not important to complicate this code to try to deal
1049 with them again. */
1050 if (TREE_CODE (arg) == SSA_NAME
1051 && INTEGRAL_TYPE_P (argtype)
1052 && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1054 /* Try to determine the range of values of the integer argument. */
1055 value_range *vr = vr_values->get_value_range (arg);
1056 if (range_int_cst_p (vr))
1058 HOST_WIDE_INT type_min
1059 = (TYPE_UNSIGNED (argtype)
1060 ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1061 : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1063 HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1065 *pmin = TREE_INT_CST_LOW (vr->min ());
1066 *pmax = TREE_INT_CST_LOW (vr->max ());
1068 if (*pmin < *pmax)
1070 /* Return true if the adjusted range is a subrange of
1071 the full range of the argument's type. *PMAX may
1072 be less than *PMIN when the argument is unsigned
1073 and its upper bound is in excess of TYPE_MAX. In
1074 that (invalid) case disregard the range and use that
1075 of the expected type instead. */
1076 knownrange = type_min < *pmin || *pmax < type_max;
1078 unknown = false;
1083 /* Handle an argument with an unknown range as if none had been
1084 provided. */
1085 if (unknown)
1086 return get_int_range (NULL_TREE, pmin, pmax, absolute,
1087 negbound, vr_values);
1090 /* Adjust each bound as specified by ABSOLUTE and NEGBOUND. */
1091 if (absolute)
1093 if (*pmin < 0)
1095 if (*pmin == *pmax)
1096 *pmin = *pmax = -*pmin;
1097 else
1099 /* Make sure signed overlow is avoided. */
1100 gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1102 HOST_WIDE_INT tmp = -*pmin;
1103 *pmin = 0;
1104 if (*pmax < tmp)
1105 *pmax = tmp;
1109 else if (*pmin < negbound)
1110 *pmin = negbound;
1112 return knownrange;
1115 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1116 argument, due to the conversion from either *ARGMIN or *ARGMAX to
1117 the type of the directive's formal argument it's possible for both
1118 to result in the same number of bytes or a range of bytes that's
1119 less than the number of bytes that would result from formatting
1120 some other value in the range [*ARGMIN, *ARGMAX]. This can be
1121 determined by checking for the actual argument being in the range
1122 of the type of the directive. If it isn't it must be assumed to
1123 take on the full range of the directive's type.
1124 Return true when the range has been adjusted to the full range
1125 of DIRTYPE, and false otherwise. */
1127 static bool
1128 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1130 tree argtype = TREE_TYPE (*argmin);
1131 unsigned argprec = TYPE_PRECISION (argtype);
1132 unsigned dirprec = TYPE_PRECISION (dirtype);
1134 /* If the actual argument and the directive's argument have the same
1135 precision and sign there can be no overflow and so there is nothing
1136 to adjust. */
1137 if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1138 return false;
1140 /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1141 branch in the extract_range_from_unary_expr function in tree-vrp.c. */
1143 if (TREE_CODE (*argmin) == INTEGER_CST
1144 && TREE_CODE (*argmax) == INTEGER_CST
1145 && (dirprec >= argprec
1146 || integer_zerop (int_const_binop (RSHIFT_EXPR,
1147 int_const_binop (MINUS_EXPR,
1148 *argmax,
1149 *argmin),
1150 size_int (dirprec)))))
1152 *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1153 *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1155 /* If *ARGMIN is still less than *ARGMAX the conversion above
1156 is safe. Otherwise, it has overflowed and would be unsafe. */
1157 if (tree_int_cst_le (*argmin, *argmax))
1158 return false;
1161 *argmin = TYPE_MIN_VALUE (dirtype);
1162 *argmax = TYPE_MAX_VALUE (dirtype);
1163 return true;
1166 /* Return a range representing the minimum and maximum number of bytes
1167 that the format directive DIR will output for any argument given
1168 the WIDTH and PRECISION (extracted from DIR). This function is
1169 used when the directive argument or its value isn't known. */
1171 static fmtresult
1172 format_integer (const directive &dir, tree arg, vr_values *vr_values)
1174 tree intmax_type_node;
1175 tree uintmax_type_node;
1177 /* Base to format the number in. */
1178 int base;
1180 /* True when a conversion is preceded by a prefix indicating the base
1181 of the argument (octal or hexadecimal). */
1182 bool maybebase = dir.get_flag ('#');
1184 /* True when a signed conversion is preceded by a sign or space. */
1185 bool maybesign = false;
1187 /* True for signed conversions (i.e., 'd' and 'i'). */
1188 bool sign = false;
1190 switch (dir.specifier)
1192 case 'd':
1193 case 'i':
1194 /* Space and '+' are only meaningful for signed conversions. */
1195 maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1196 sign = true;
1197 base = 10;
1198 break;
1199 case 'u':
1200 base = 10;
1201 break;
1202 case 'o':
1203 base = 8;
1204 break;
1205 case 'X':
1206 case 'x':
1207 base = 16;
1208 break;
1209 default:
1210 gcc_unreachable ();
1213 /* The type of the "formal" argument expected by the directive. */
1214 tree dirtype = NULL_TREE;
1216 /* Determine the expected type of the argument from the length
1217 modifier. */
1218 switch (dir.modifier)
1220 case FMT_LEN_none:
1221 if (dir.specifier == 'p')
1222 dirtype = ptr_type_node;
1223 else
1224 dirtype = sign ? integer_type_node : unsigned_type_node;
1225 break;
1227 case FMT_LEN_h:
1228 dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1229 break;
1231 case FMT_LEN_hh:
1232 dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1233 break;
1235 case FMT_LEN_l:
1236 dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1237 break;
1239 case FMT_LEN_L:
1240 case FMT_LEN_ll:
1241 dirtype = (sign
1242 ? long_long_integer_type_node
1243 : long_long_unsigned_type_node);
1244 break;
1246 case FMT_LEN_z:
1247 dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1248 break;
1250 case FMT_LEN_t:
1251 dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1252 break;
1254 case FMT_LEN_j:
1255 build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1256 dirtype = sign ? intmax_type_node : uintmax_type_node;
1257 break;
1259 default:
1260 return fmtresult ();
1263 /* The type of the argument to the directive, either deduced from
1264 the actual non-constant argument if one is known, or from
1265 the directive itself when none has been provided because it's
1266 a va_list. */
1267 tree argtype = NULL_TREE;
1269 if (!arg)
1271 /* When the argument has not been provided, use the type of
1272 the directive's argument as an approximation. This will
1273 result in false positives for directives like %i with
1274 arguments with smaller precision (such as short or char). */
1275 argtype = dirtype;
1277 else if (TREE_CODE (arg) == INTEGER_CST)
1279 /* When a constant argument has been provided use its value
1280 rather than type to determine the length of the output. */
1281 fmtresult res;
1283 if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1285 /* As a special case, a precision of zero with a zero argument
1286 results in zero bytes except in base 8 when the '#' flag is
1287 specified, and for signed conversions in base 8 and 10 when
1288 either the space or '+' flag has been specified and it results
1289 in just one byte (with width having the normal effect). This
1290 must extend to the case of a specified precision with
1291 an unknown value because it can be zero. */
1292 res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1293 if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1295 res.range.max = 1;
1296 res.range.likely = 1;
1298 else
1300 res.range.max = res.range.min;
1301 res.range.likely = res.range.min;
1304 else
1306 /* Convert the argument to the type of the directive. */
1307 arg = fold_convert (dirtype, arg);
1309 res.range.min = tree_digits (arg, base, dir.prec[0],
1310 maybesign, maybebase);
1311 if (dir.prec[0] == dir.prec[1])
1312 res.range.max = res.range.min;
1313 else
1314 res.range.max = tree_digits (arg, base, dir.prec[1],
1315 maybesign, maybebase);
1316 res.range.likely = res.range.min;
1317 res.knownrange = true;
1320 res.range.unlikely = res.range.max;
1322 /* Bump up the counters if WIDTH is greater than LEN. */
1323 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1324 (sign | maybebase) + (base == 16));
1325 /* Bump up the counters again if PRECision is greater still. */
1326 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1327 (sign | maybebase) + (base == 16));
1329 return res;
1331 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1332 || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1333 /* Determine the type of the provided non-constant argument. */
1334 argtype = TREE_TYPE (arg);
1335 else
1336 /* Don't bother with invalid arguments since they likely would
1337 have already been diagnosed, and disable any further checking
1338 of the format string by returning [-1, -1]. */
1339 return fmtresult ();
1341 fmtresult res;
1343 /* Using either the range the non-constant argument is in, or its
1344 type (either "formal" or actual), create a range of values that
1345 constrain the length of output given the warning level. */
1346 tree argmin = NULL_TREE;
1347 tree argmax = NULL_TREE;
1349 if (arg
1350 && TREE_CODE (arg) == SSA_NAME
1351 && INTEGRAL_TYPE_P (argtype))
1353 /* Try to determine the range of values of the integer argument
1354 (range information is not available for pointers). */
1355 value_range *vr = vr_values->get_value_range (arg);
1356 if (range_int_cst_p (vr))
1358 argmin = vr->min ();
1359 argmax = vr->max ();
1361 /* Set KNOWNRANGE if the argument is in a known subrange
1362 of the directive's type and neither width nor precision
1363 is unknown. (KNOWNRANGE may be reset below). */
1364 res.knownrange
1365 = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1366 || !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1367 && dir.known_width_and_precision ());
1369 res.argmin = argmin;
1370 res.argmax = argmax;
1372 else if (vr->kind () == VR_ANTI_RANGE)
1374 /* Handle anti-ranges if/when bug 71690 is resolved. */
1376 else if (vr->varying_p () || vr->undefined_p ())
1378 /* The argument here may be the result of promoting the actual
1379 argument to int. Try to determine the type of the actual
1380 argument before promotion and narrow down its range that
1381 way. */
1382 gimple *def = SSA_NAME_DEF_STMT (arg);
1383 if (is_gimple_assign (def))
1385 tree_code code = gimple_assign_rhs_code (def);
1386 if (code == INTEGER_CST)
1388 arg = gimple_assign_rhs1 (def);
1389 return format_integer (dir, arg, vr_values);
1392 if (code == NOP_EXPR)
1394 tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1395 if (INTEGRAL_TYPE_P (type)
1396 || TREE_CODE (type) == POINTER_TYPE)
1397 argtype = type;
1403 if (!argmin)
1405 if (TREE_CODE (argtype) == POINTER_TYPE)
1407 argmin = build_int_cst (pointer_sized_int_node, 0);
1408 argmax = build_all_ones_cst (pointer_sized_int_node);
1410 else
1412 argmin = TYPE_MIN_VALUE (argtype);
1413 argmax = TYPE_MAX_VALUE (argtype);
1417 /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1418 of the directive. If it has been cleared then since ARGMIN and/or
1419 ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1420 ARGMAX in the result to include in diagnostics. */
1421 if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1423 res.knownrange = false;
1424 res.argmin = argmin;
1425 res.argmax = argmax;
1428 /* Recursively compute the minimum and maximum from the known range. */
1429 if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1431 /* For unsigned conversions/directives or signed when
1432 the minimum is positive, use the minimum and maximum to compute
1433 the shortest and longest output, respectively. */
1434 res.range.min = format_integer (dir, argmin, vr_values).range.min;
1435 res.range.max = format_integer (dir, argmax, vr_values).range.max;
1437 else if (tree_int_cst_sgn (argmax) < 0)
1439 /* For signed conversions/directives if maximum is negative,
1440 use the minimum as the longest output and maximum as the
1441 shortest output. */
1442 res.range.min = format_integer (dir, argmax, vr_values).range.min;
1443 res.range.max = format_integer (dir, argmin, vr_values).range.max;
1445 else
1447 /* Otherwise, 0 is inside of the range and minimum negative. Use 0
1448 as the shortest output and for the longest output compute the
1449 length of the output of both minimum and maximum and pick the
1450 longer. */
1451 unsigned HOST_WIDE_INT max1
1452 = format_integer (dir, argmin, vr_values).range.max;
1453 unsigned HOST_WIDE_INT max2
1454 = format_integer (dir, argmax, vr_values).range.max;
1455 res.range.min
1456 = format_integer (dir, integer_zero_node, vr_values).range.min;
1457 res.range.max = MAX (max1, max2);
1460 /* If the range is known, use the maximum as the likely length. */
1461 if (res.knownrange)
1462 res.range.likely = res.range.max;
1463 else
1465 /* Otherwise, use the minimum. Except for the case where for %#x or
1466 %#o the minimum is just for a single value in the range (0) and
1467 for all other values it is something longer, like 0x1 or 01.
1468 Use the length for value 1 in that case instead as the likely
1469 length. */
1470 res.range.likely = res.range.min;
1471 if (maybebase
1472 && base != 10
1473 && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1475 if (res.range.min == 1)
1476 res.range.likely += base == 8 ? 1 : 2;
1477 else if (res.range.min == 2
1478 && base == 16
1479 && (dir.width[0] == 2 || dir.prec[0] == 2))
1480 ++res.range.likely;
1484 res.range.unlikely = res.range.max;
1485 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1486 (sign | maybebase) + (base == 16));
1487 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1488 (sign | maybebase) + (base == 16));
1490 return res;
1493 /* Return the number of bytes that a format directive consisting of FLAGS,
1494 PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1495 would result for argument X under ideal conditions (i.e., if PREC
1496 weren't excessive). MPFR 3.1 allocates large amounts of memory for
1497 values of PREC with large magnitude and can fail (see MPFR bug #21056).
1498 This function works around those problems. */
1500 static unsigned HOST_WIDE_INT
1501 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1502 char spec, char rndspec)
1504 char fmtstr[40];
1506 HOST_WIDE_INT len = strlen (flags);
1508 fmtstr[0] = '%';
1509 memcpy (fmtstr + 1, flags, len);
1510 memcpy (fmtstr + 1 + len, ".*R", 3);
1511 fmtstr[len + 4] = rndspec;
1512 fmtstr[len + 5] = spec;
1513 fmtstr[len + 6] = '\0';
1515 spec = TOUPPER (spec);
1516 if (spec == 'E' || spec == 'F')
1518 /* For %e, specify the precision explicitly since mpfr_sprintf
1519 does its own thing just to be different (see MPFR bug 21088). */
1520 if (prec < 0)
1521 prec = 6;
1523 else
1525 /* Avoid passing negative precisions with larger magnitude to MPFR
1526 to avoid exposing its bugs. (A negative precision is supposed
1527 to be ignored.) */
1528 if (prec < 0)
1529 prec = -1;
1532 HOST_WIDE_INT p = prec;
1534 if (spec == 'G' && !strchr (flags, '#'))
1536 /* For G/g without the pound flag, precision gives the maximum number
1537 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1538 a 128 bit IEEE extended precision, 4932. Using twice as much here
1539 should be more than sufficient for any real format. */
1540 if ((IEEE_MAX_10_EXP * 2) < prec)
1541 prec = IEEE_MAX_10_EXP * 2;
1542 p = prec;
1544 else
1546 /* Cap precision arbitrarily at 1KB and add the difference
1547 (if any) to the MPFR result. */
1548 if (prec > 1024)
1549 p = 1024;
1552 len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1554 /* Handle the unlikely (impossible?) error by returning more than
1555 the maximum dictated by the function's return type. */
1556 if (len < 0)
1557 return target_dir_max () + 1;
1559 /* Adjust the return value by the difference. */
1560 if (p < prec)
1561 len += prec - p;
1563 return len;
1566 /* Return the number of bytes to format using the format specifier
1567 SPEC and the precision PREC the largest value in the real floating
1568 TYPE. */
1570 static unsigned HOST_WIDE_INT
1571 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1573 machine_mode mode = TYPE_MODE (type);
1575 /* IBM Extended mode. */
1576 if (MODE_COMPOSITE_P (mode))
1577 mode = DFmode;
1579 /* Get the real type format desription for the target. */
1580 const real_format *rfmt = REAL_MODE_FORMAT (mode);
1581 REAL_VALUE_TYPE rv;
1583 real_maxval (&rv, 0, mode);
1585 /* Convert the GCC real value representation with the precision
1586 of the real type to the mpfr_t format with the GCC default
1587 round-to-nearest mode. */
1588 mpfr_t x;
1589 mpfr_init2 (x, rfmt->p);
1590 mpfr_from_real (x, &rv, GMP_RNDN);
1592 /* Return a value one greater to account for the leading minus sign. */
1593 unsigned HOST_WIDE_INT r
1594 = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1595 mpfr_clear (x);
1596 return r;
1599 /* Return a range representing the minimum and maximum number of bytes
1600 that the directive DIR will output for any argument. PREC gives
1601 the adjusted precision range to account for negative precisions
1602 meaning the default 6. This function is used when the directive
1603 argument or its value isn't known. */
1605 static fmtresult
1606 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1608 tree type;
1610 switch (dir.modifier)
1612 case FMT_LEN_l:
1613 case FMT_LEN_none:
1614 type = double_type_node;
1615 break;
1617 case FMT_LEN_L:
1618 type = long_double_type_node;
1619 break;
1621 case FMT_LEN_ll:
1622 type = long_double_type_node;
1623 break;
1625 default:
1626 return fmtresult ();
1629 /* The minimum and maximum number of bytes produced by the directive. */
1630 fmtresult res;
1632 /* The minimum output as determined by flags. It's always at least 1.
1633 When plus or space are set the output is preceded by either a sign
1634 or a space. */
1635 unsigned flagmin = (1 /* for the first digit */
1636 + (dir.get_flag ('+') | dir.get_flag (' ')));
1638 /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1639 for the plus sign/space with the '+' and ' ' flags, respectively,
1640 unless reduced below. */
1641 res.range.min = 2 + flagmin;
1643 /* When the pound flag is set the decimal point is included in output
1644 regardless of precision. Whether or not a decimal point is included
1645 otherwise depends on the specification and precision. */
1646 bool radix = dir.get_flag ('#');
1648 switch (dir.specifier)
1650 case 'A':
1651 case 'a':
1653 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1654 if (dir.prec[0] <= 0)
1655 minprec = 0;
1656 else if (dir.prec[0] > 0)
1657 minprec = dir.prec[0] + !radix /* decimal point */;
1659 res.range.likely = (2 /* 0x */
1660 + flagmin
1661 + radix
1662 + minprec
1663 + 3 /* p+0 */);
1665 res.range.max = format_floating_max (type, 'a', prec[1]);
1667 /* The unlikely maximum accounts for the longest multibyte
1668 decimal point character. */
1669 res.range.unlikely = res.range.max;
1670 if (dir.prec[1] > 0)
1671 res.range.unlikely += target_mb_len_max () - 1;
1673 break;
1676 case 'E':
1677 case 'e':
1679 /* Minimum output attributable to precision and, when it's
1680 non-zero, decimal point. */
1681 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1683 /* The likely minimum output is "[-+]1.234567e+00" regardless
1684 of the value of the actual argument. */
1685 res.range.likely = (flagmin
1686 + radix
1687 + minprec
1688 + 2 /* e+ */ + 2);
1690 res.range.max = format_floating_max (type, 'e', prec[1]);
1692 /* The unlikely maximum accounts for the longest multibyte
1693 decimal point character. */
1694 if (dir.prec[0] != dir.prec[1]
1695 || dir.prec[0] == -1 || dir.prec[0] > 0)
1696 res.range.unlikely = res.range.max + target_mb_len_max () -1;
1697 else
1698 res.range.unlikely = res.range.max;
1699 break;
1702 case 'F':
1703 case 'f':
1705 /* Minimum output attributable to precision and, when it's non-zero,
1706 decimal point. */
1707 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1709 /* For finite numbers (i.e., not infinity or NaN) the lower bound
1710 when precision isn't specified is 8 bytes ("1.23456" since
1711 precision is taken to be 6). When precision is zero, the lower
1712 bound is 1 byte (e.g., "1"). Otherwise, when precision is greater
1713 than zero, then the lower bound is 2 plus precision (plus flags).
1714 But in all cases, the lower bound is no greater than 3. */
1715 unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1716 if (min < res.range.min)
1717 res.range.min = min;
1719 /* Compute the upper bound for -TYPE_MAX. */
1720 res.range.max = format_floating_max (type, 'f', prec[1]);
1722 /* The minimum output with unknown precision is a single byte
1723 (e.g., "0") but the more likely output is 3 bytes ("0.0"). */
1724 if (dir.prec[0] < 0 && dir.prec[1] > 0)
1725 res.range.likely = 3;
1726 else
1727 res.range.likely = min;
1729 /* The unlikely maximum accounts for the longest multibyte
1730 decimal point character. */
1731 if (dir.prec[0] != dir.prec[1]
1732 || dir.prec[0] == -1 || dir.prec[0] > 0)
1733 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1734 break;
1737 case 'G':
1738 case 'g':
1740 /* The %g output depends on precision and the exponent of
1741 the argument. Since the value of the argument isn't known
1742 the lower bound on the range of bytes (not counting flags
1743 or width) is 1 plus radix (i.e., either "0" or "0." for
1744 "%g" and "%#g", respectively, with a zero argument). */
1745 unsigned HOST_WIDE_INT min = flagmin + radix;
1746 if (min < res.range.min)
1747 res.range.min = min;
1749 char spec = 'g';
1750 HOST_WIDE_INT maxprec = dir.prec[1];
1751 if (radix && maxprec)
1753 /* When the pound flag (radix) is set, trailing zeros aren't
1754 trimmed and so the longest output is the same as for %e,
1755 except with precision minus 1 (as specified in C11). */
1756 spec = 'e';
1757 if (maxprec > 0)
1758 --maxprec;
1759 else if (maxprec < 0)
1760 maxprec = 5;
1762 else
1763 maxprec = prec[1];
1765 res.range.max = format_floating_max (type, spec, maxprec);
1767 /* The likely output is either the maximum computed above
1768 minus 1 (assuming the maximum is positive) when precision
1769 is known (or unspecified), or the same minimum as for %e
1770 (which is computed for a non-negative argument). Unlike
1771 for the other specifiers above the likely output isn't
1772 the minimum because for %g that's 1 which is unlikely. */
1773 if (dir.prec[1] < 0
1774 || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1775 res.range.likely = res.range.max - 1;
1776 else
1778 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1779 res.range.likely = (flagmin
1780 + radix
1781 + minprec
1782 + 2 /* e+ */ + 2);
1785 /* The unlikely maximum accounts for the longest multibyte
1786 decimal point character. */
1787 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1788 break;
1791 default:
1792 return fmtresult ();
1795 /* Bump up the byte counters if WIDTH is greater. */
1796 res.adjust_for_width_or_precision (dir.width);
1797 return res;
1800 /* Return a range representing the minimum and maximum number of bytes
1801 that the directive DIR will write on output for the floating argument
1802 ARG. */
1804 static fmtresult
1805 format_floating (const directive &dir, tree arg, vr_values *)
1807 HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1808 tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1809 ? long_double_type_node : double_type_node);
1811 /* For an indeterminate precision the lower bound must be assumed
1812 to be zero. */
1813 if (TOUPPER (dir.specifier) == 'A')
1815 /* Get the number of fractional decimal digits needed to represent
1816 the argument without a loss of accuracy. */
1817 unsigned fmtprec
1818 = REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1820 /* The precision of the IEEE 754 double format is 53.
1821 The precision of all other GCC binary double formats
1822 is 56 or less. */
1823 unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1825 /* For %a, leave the minimum precision unspecified to let
1826 MFPR trim trailing zeros (as it and many other systems
1827 including Glibc happen to do) and set the maximum
1828 precision to reflect what it would be with trailing zeros
1829 present (as Solaris and derived systems do). */
1830 if (dir.prec[1] < 0)
1832 /* Both bounds are negative implies that precision has
1833 not been specified. */
1834 prec[0] = maxprec;
1835 prec[1] = -1;
1837 else if (dir.prec[0] < 0)
1839 /* With a negative lower bound and a non-negative upper
1840 bound set the minimum precision to zero and the maximum
1841 to the greater of the maximum precision (i.e., with
1842 trailing zeros present) and the specified upper bound. */
1843 prec[0] = 0;
1844 prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1847 else if (dir.prec[0] < 0)
1849 if (dir.prec[1] < 0)
1851 /* A precision in a strictly negative range is ignored and
1852 the default of 6 is used instead. */
1853 prec[0] = prec[1] = 6;
1855 else
1857 /* For a precision in a partly negative range, the lower bound
1858 must be assumed to be zero and the new upper bound is the
1859 greater of 6 (the default precision used when the specified
1860 precision is negative) and the upper bound of the specified
1861 range. */
1862 prec[0] = 0;
1863 prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1867 if (!arg
1868 || TREE_CODE (arg) != REAL_CST
1869 || !useless_type_conversion_p (type, TREE_TYPE (arg)))
1870 return format_floating (dir, prec);
1872 /* The minimum and maximum number of bytes produced by the directive. */
1873 fmtresult res;
1875 /* Get the real type format desription for the target. */
1876 const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
1877 const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
1879 if (!real_isfinite (rvp))
1881 /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
1882 and "[-]nan" with the choice being implementation-defined
1883 but not locale dependent. */
1884 bool sign = dir.get_flag ('+') || real_isneg (rvp);
1885 res.range.min = 3 + sign;
1887 res.range.likely = res.range.min;
1888 res.range.max = res.range.min;
1889 /* The unlikely maximum is "[-/+]infinity" or "[-/+][qs]nan".
1890 For NaN, the C/POSIX standards specify two formats:
1891 "[-/+]nan"
1893 "[-/+]nan(n-char-sequence)"
1894 No known printf implementation outputs the latter format but AIX
1895 outputs QNaN and SNaN for quiet and signalling NaN, respectively,
1896 so the unlikely maximum reflects that. */
1897 res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 4);
1899 /* The range for infinity and NaN is known unless either width
1900 or precision is unknown. Width has the same effect regardless
1901 of whether the argument is finite. Precision is either ignored
1902 (e.g., Glibc) or can have an effect on the short vs long format
1903 such as inf/infinity (e.g., Solaris). */
1904 res.knownrange = dir.known_width_and_precision ();
1906 /* Adjust the range for width but ignore precision. */
1907 res.adjust_for_width_or_precision (dir.width);
1909 return res;
1912 char fmtstr [40];
1913 char *pfmt = fmtstr;
1915 /* Append flags. */
1916 for (const char *pf = "-+ #0"; *pf; ++pf)
1917 if (dir.get_flag (*pf))
1918 *pfmt++ = *pf;
1920 *pfmt = '\0';
1923 /* Set up an array to easily iterate over. */
1924 unsigned HOST_WIDE_INT* const minmax[] = {
1925 &res.range.min, &res.range.max
1928 for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
1930 /* Convert the GCC real value representation with the precision
1931 of the real type to the mpfr_t format rounding down in the
1932 first iteration that computes the minimm and up in the second
1933 that computes the maximum. This order is arbibtrary because
1934 rounding in either direction can result in longer output. */
1935 mpfr_t mpfrval;
1936 mpfr_init2 (mpfrval, rfmt->p);
1937 mpfr_from_real (mpfrval, rvp, i ? GMP_RNDU : GMP_RNDD);
1939 /* Use the MPFR rounding specifier to round down in the first
1940 iteration and then up. In most but not all cases this will
1941 result in the same number of bytes. */
1942 char rndspec = "DU"[i];
1944 /* Format it and store the result in the corresponding member
1945 of the result struct. */
1946 *minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
1947 dir.specifier, rndspec);
1948 mpfr_clear (mpfrval);
1952 /* Make sure the minimum is less than the maximum (MPFR rounding
1953 in the call to mpfr_snprintf can result in the reverse. */
1954 if (res.range.max < res.range.min)
1956 unsigned HOST_WIDE_INT tmp = res.range.min;
1957 res.range.min = res.range.max;
1958 res.range.max = tmp;
1961 /* The range is known unless either width or precision is unknown. */
1962 res.knownrange = dir.known_width_and_precision ();
1964 /* For the same floating point constant, unless width or precision
1965 is unknown, use the longer output as the likely maximum since
1966 with round to nearest either is equally likely. Otheriwse, when
1967 precision is unknown, use the greater of the minimum and 3 as
1968 the likely output (for "0.0" since zero precision is unlikely). */
1969 if (res.knownrange)
1970 res.range.likely = res.range.max;
1971 else if (res.range.min < 3
1972 && dir.prec[0] < 0
1973 && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
1974 res.range.likely = 3;
1975 else
1976 res.range.likely = res.range.min;
1978 res.range.unlikely = res.range.max;
1980 if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
1982 /* Unless the precision is zero output longer than 2 bytes may
1983 include the decimal point which must be a single character
1984 up to MB_LEN_MAX in length. This is overly conservative
1985 since in some conversions some constants result in no decimal
1986 point (e.g., in %g). */
1987 res.range.unlikely += target_mb_len_max () - 1;
1990 res.adjust_for_width_or_precision (dir.width);
1991 return res;
1994 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
1995 strings referenced by the expression STR, or (-1, -1) when not known.
1996 Used by the format_string function below. */
1998 static fmtresult
1999 get_string_length (tree str, unsigned eltsize)
2001 if (!str)
2002 return fmtresult ();
2004 c_strlen_data data;
2005 memset (&data, 0, sizeof (c_strlen_data));
2006 tree slen = c_strlen (str, 1, &data, eltsize);
2007 if (slen && TREE_CODE (slen) == INTEGER_CST)
2009 /* The string is properly terminated and
2010 we know its length. */
2011 fmtresult res (tree_to_shwi (slen));
2012 res.nonstr = NULL_TREE;
2013 return res;
2015 else if (!slen
2016 && data.decl
2017 && data.len
2018 && TREE_CODE (data.len) == INTEGER_CST)
2020 /* STR was not properly NUL terminated, but we have
2021 length information about the unterminated string. */
2022 fmtresult res (tree_to_shwi (data.len));
2023 res.nonstr = data.decl;
2024 return res;
2027 /* Determine the length of the shortest and longest string referenced
2028 by STR. Strings of unknown lengths are bounded by the sizes of
2029 arrays that subexpressions of STR may refer to. Pointers that
2030 aren't known to point any such arrays result in LENRANGE[1] set
2031 to SIZE_MAX. NONSTR is set to the declaration of the constant
2032 array that is known not to be nul-terminated. */
2033 tree lenrange[2];
2034 tree nonstr;
2035 bool flexarray = get_range_strlen (str, lenrange, eltsize, false, &nonstr);
2037 if (lenrange [0] || lenrange [1])
2039 HOST_WIDE_INT min
2040 = (tree_fits_uhwi_p (lenrange[0])
2041 ? tree_to_uhwi (lenrange[0])
2042 : 0);
2044 HOST_WIDE_INT max
2045 = (tree_fits_uhwi_p (lenrange[1])
2046 ? tree_to_uhwi (lenrange[1])
2047 : HOST_WIDE_INT_M1U);
2049 /* get_range_strlen() returns the target value of SIZE_MAX for
2050 strings of unknown length. Bump it up to HOST_WIDE_INT_M1U
2051 which may be bigger. */
2052 if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2053 min = HOST_WIDE_INT_M1U;
2054 if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2055 max = HOST_WIDE_INT_M1U;
2057 fmtresult res (min, max);
2058 res.nonstr = nonstr;
2060 /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2061 by STR are known to be bounded (though not necessarily by their
2062 actual length but perhaps by their maximum possible length). */
2063 if (res.range.max < target_int_max ())
2065 res.knownrange = true;
2066 /* When the the length of the longest string is known and not
2067 excessive use it as the likely length of the string(s). */
2068 res.range.likely = res.range.max;
2070 else
2072 /* When the upper bound is unknown (it can be zero or excessive)
2073 set the likely length to the greater of 1 and the length of
2074 the shortest string and reset the lower bound to zero. */
2075 res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2076 res.range.min = 0;
2079 /* If the range of string length has been estimated from the size
2080 of an array at the end of a struct assume that it's longer than
2081 the array bound says it is in case it's used as a poor man's
2082 flexible array member, such as in struct S { char a[4]; }; */
2083 res.range.unlikely = flexarray ? HOST_WIDE_INT_MAX : res.range.max;
2085 return res;
2088 return fmtresult ();
2091 /* Return the minimum and maximum number of characters formatted
2092 by the '%c' format directives and its wide character form for
2093 the argument ARG. ARG can be null (for functions such as
2094 vsprinf). */
2096 static fmtresult
2097 format_character (const directive &dir, tree arg, vr_values *vr_values)
2099 fmtresult res;
2101 res.knownrange = true;
2103 if (dir.specifier == 'C'
2104 || dir.modifier == FMT_LEN_l)
2106 /* A wide character can result in as few as zero bytes. */
2107 res.range.min = 0;
2109 HOST_WIDE_INT min, max;
2110 if (get_int_range (arg, &min, &max, false, 0, vr_values))
2112 if (min == 0 && max == 0)
2114 /* The NUL wide character results in no bytes. */
2115 res.range.max = 0;
2116 res.range.likely = 0;
2117 res.range.unlikely = 0;
2119 else if (min >= 0 && min < 128)
2121 /* Be conservative if the target execution character set
2122 is not a 1-to-1 mapping to the source character set or
2123 if the source set is not ASCII. */
2124 bool one_2_one_ascii
2125 = (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97);
2127 /* A wide character in the ASCII range most likely results
2128 in a single byte, and only unlikely in up to MB_LEN_MAX. */
2129 res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
2130 res.range.likely = 1;
2131 res.range.unlikely = target_mb_len_max ();
2132 res.mayfail = !one_2_one_ascii;
2134 else
2136 /* A wide character outside the ASCII range likely results
2137 in up to two bytes, and only unlikely in up to MB_LEN_MAX. */
2138 res.range.max = target_mb_len_max ();
2139 res.range.likely = 2;
2140 res.range.unlikely = res.range.max;
2141 /* Converting such a character may fail. */
2142 res.mayfail = true;
2145 else
2147 /* An unknown wide character is treated the same as a wide
2148 character outside the ASCII range. */
2149 res.range.max = target_mb_len_max ();
2150 res.range.likely = 2;
2151 res.range.unlikely = res.range.max;
2152 res.mayfail = true;
2155 else
2157 /* A plain '%c' directive. Its ouput is exactly 1. */
2158 res.range.min = res.range.max = 1;
2159 res.range.likely = res.range.unlikely = 1;
2160 res.knownrange = true;
2163 /* Bump up the byte counters if WIDTH is greater. */
2164 return res.adjust_for_width_or_precision (dir.width);
2167 /* Return the minimum and maximum number of characters formatted
2168 by the '%s' format directive and its wide character form for
2169 the argument ARG. ARG can be null (for functions such as
2170 vsprinf). */
2172 static fmtresult
2173 format_string (const directive &dir, tree arg, vr_values *)
2175 fmtresult res;
2177 /* Compute the range the argument's length can be in. */
2178 int count_by = 1;
2179 if (dir.specifier == 'S' || dir.modifier == FMT_LEN_l)
2181 /* Get a node for a C type that will be the same size
2182 as a wchar_t on the target. */
2183 tree node = get_typenode_from_name (MODIFIED_WCHAR_TYPE);
2185 /* Now that we have a suitable node, get the number of
2186 bytes it occupies. */
2187 count_by = int_size_in_bytes (node);
2188 gcc_checking_assert (count_by == 2 || count_by == 4);
2191 fmtresult slen = get_string_length (arg, count_by);
2192 if (slen.range.min == slen.range.max
2193 && slen.range.min < HOST_WIDE_INT_MAX)
2195 /* The argument is either a string constant or it refers
2196 to one of a number of strings of the same length. */
2198 /* A '%s' directive with a string argument with constant length. */
2199 res.range = slen.range;
2201 if (dir.specifier == 'S'
2202 || dir.modifier == FMT_LEN_l)
2204 /* In the worst case the length of output of a wide string S
2205 is bounded by MB_LEN_MAX * wcslen (S). */
2206 res.range.max *= target_mb_len_max ();
2207 res.range.unlikely = res.range.max;
2208 /* It's likely that the the total length is not more that
2209 2 * wcslen (S).*/
2210 res.range.likely = res.range.min * 2;
2212 if (dir.prec[1] >= 0
2213 && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2215 res.range.max = dir.prec[1];
2216 res.range.likely = dir.prec[1];
2217 res.range.unlikely = dir.prec[1];
2220 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2221 res.range.min = 0;
2222 else if (dir.prec[0] >= 0)
2223 res.range.likely = dir.prec[0];
2225 /* Even a non-empty wide character string need not convert into
2226 any bytes. */
2227 res.range.min = 0;
2229 /* A non-empty wide character conversion may fail. */
2230 if (slen.range.max > 0)
2231 res.mayfail = true;
2233 else
2235 res.knownrange = true;
2237 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2238 res.range.min = 0;
2239 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2240 res.range.min = dir.prec[0];
2242 if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2244 res.range.max = dir.prec[1];
2245 res.range.likely = dir.prec[1];
2246 res.range.unlikely = dir.prec[1];
2250 else if (arg && integer_zerop (arg))
2252 /* Handle null pointer argument. */
2254 fmtresult res (0);
2255 res.nullp = true;
2256 return res;
2258 else
2260 /* For a '%s' and '%ls' directive with a non-constant string (either
2261 one of a number of strings of known length or an unknown string)
2262 the minimum number of characters is lesser of PRECISION[0] and
2263 the length of the shortest known string or zero, and the maximum
2264 is the lessser of the length of the longest known string or
2265 PTRDIFF_MAX and PRECISION[1]. The likely length is either
2266 the minimum at level 1 and the greater of the minimum and 1
2267 at level 2. This result is adjust upward for width (if it's
2268 specified). */
2270 if (dir.specifier == 'S'
2271 || dir.modifier == FMT_LEN_l)
2273 /* A wide character converts to as few as zero bytes. */
2274 slen.range.min = 0;
2275 if (slen.range.max < target_int_max ())
2276 slen.range.max *= target_mb_len_max ();
2278 if (slen.range.likely < target_int_max ())
2279 slen.range.likely *= 2;
2281 if (slen.range.likely < target_int_max ())
2282 slen.range.unlikely *= target_mb_len_max ();
2284 /* A non-empty wide character conversion may fail. */
2285 if (slen.range.max > 0)
2286 res.mayfail = true;
2289 res.range = slen.range;
2291 if (dir.prec[0] >= 0)
2293 /* Adjust the minimum to zero if the string length is unknown,
2294 or at most the lower bound of the precision otherwise. */
2295 if (slen.range.min >= target_int_max ())
2296 res.range.min = 0;
2297 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2298 res.range.min = dir.prec[0];
2300 /* Make both maxima no greater than the upper bound of precision. */
2301 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2302 || slen.range.max >= target_int_max ())
2304 res.range.max = dir.prec[1];
2305 res.range.unlikely = dir.prec[1];
2308 /* If precision is constant, set the likely counter to the lesser
2309 of it and the maximum string length. Otherwise, if the lower
2310 bound of precision is greater than zero, set the likely counter
2311 to the minimum. Otherwise set it to zero or one based on
2312 the warning level. */
2313 if (dir.prec[0] == dir.prec[1])
2314 res.range.likely
2315 = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2316 ? dir.prec[0] : slen.range.max);
2317 else if (dir.prec[0] > 0)
2318 res.range.likely = res.range.min;
2319 else
2320 res.range.likely = warn_level > 1;
2322 else if (dir.prec[1] >= 0)
2324 res.range.min = 0;
2325 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2326 res.range.max = dir.prec[1];
2327 res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2329 else if (slen.range.min >= target_int_max ())
2331 res.range.min = 0;
2332 res.range.max = HOST_WIDE_INT_MAX;
2333 /* At level 1 strings of unknown length are assumed to be
2334 empty, while at level 1 they are assumed to be one byte
2335 long. */
2336 res.range.likely = warn_level > 1;
2338 else
2340 /* A string of unknown length unconstrained by precision is
2341 assumed to be empty at level 1 and just one character long
2342 at higher levels. */
2343 if (res.range.likely >= target_int_max ())
2344 res.range.likely = warn_level > 1;
2347 res.range.unlikely = res.range.max;
2350 /* If the argument isn't a nul-terminated string and the number
2351 of bytes on output isn't bounded by precision, set NONSTR. */
2352 if (slen.nonstr && slen.range.min < (unsigned HOST_WIDE_INT)dir.prec[0])
2353 res.nonstr = slen.nonstr;
2355 /* Bump up the byte counters if WIDTH is greater. */
2356 return res.adjust_for_width_or_precision (dir.width);
2359 /* Format plain string (part of the format string itself). */
2361 static fmtresult
2362 format_plain (const directive &dir, tree, vr_values *)
2364 fmtresult res (dir.len);
2365 return res;
2368 /* Return true if the RESULT of a directive in a call describe by INFO
2369 should be diagnosed given the AVAILable space in the destination. */
2371 static bool
2372 should_warn_p (const sprintf_dom_walker::call_info &info,
2373 const result_range &avail, const result_range &result)
2375 if (result.max <= avail.min)
2377 /* The least amount of space remaining in the destination is big
2378 enough for the longest output. */
2379 return false;
2382 if (info.bounded)
2384 if (warn_format_trunc == 1 && result.min <= avail.max
2385 && info.retval_used ())
2387 /* The likely amount of space remaining in the destination is big
2388 enough for the least output and the return value is used. */
2389 return false;
2392 if (warn_format_trunc == 1 && result.likely <= avail.likely
2393 && !info.retval_used ())
2395 /* The likely amount of space remaining in the destination is big
2396 enough for the likely output and the return value is unused. */
2397 return false;
2400 if (warn_format_trunc == 2
2401 && result.likely <= avail.min
2402 && (result.max <= avail.min
2403 || result.max > HOST_WIDE_INT_MAX))
2405 /* The minimum amount of space remaining in the destination is big
2406 enough for the longest output. */
2407 return false;
2410 else
2412 if (warn_level == 1 && result.likely <= avail.likely)
2414 /* The likely amount of space remaining in the destination is big
2415 enough for the likely output. */
2416 return false;
2419 if (warn_level == 2
2420 && result.likely <= avail.min
2421 && (result.max <= avail.min
2422 || result.max > HOST_WIDE_INT_MAX))
2424 /* The minimum amount of space remaining in the destination is big
2425 enough for the longest output. */
2426 return false;
2430 return true;
2433 /* At format string location describe by DIRLOC in a call described
2434 by INFO, issue a warning for a directive DIR whose output may be
2435 in excess of the available space AVAIL_RANGE in the destination
2436 given the formatting result FMTRES. This function does nothing
2437 except decide whether to issue a warning for a possible write
2438 past the end or truncation and, if so, format the warning.
2439 Return true if a warning has been issued. */
2441 static bool
2442 maybe_warn (substring_loc &dirloc, location_t argloc,
2443 const sprintf_dom_walker::call_info &info,
2444 const result_range &avail_range, const result_range &res,
2445 const directive &dir)
2447 if (!should_warn_p (info, avail_range, res))
2448 return false;
2450 /* A warning will definitely be issued below. */
2452 /* The maximum byte count to reference in the warning. Larger counts
2453 imply that the upper bound is unknown (and could be anywhere between
2454 RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2455 than "between N and X" where X is some huge number. */
2456 unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2458 /* True when there is enough room in the destination for the least
2459 amount of a directive's output but not enough for its likely or
2460 maximum output. */
2461 bool maybe = (res.min <= avail_range.max
2462 && (avail_range.min < res.likely
2463 || (res.max < HOST_WIDE_INT_MAX
2464 && avail_range.min < res.max)));
2466 /* Buffer for the directive in the host character set (used when
2467 the source character set is different). */
2468 char hostdir[32];
2470 if (avail_range.min == avail_range.max)
2472 /* The size of the destination region is exact. */
2473 unsigned HOST_WIDE_INT navail = avail_range.max;
2475 if (target_to_host (*dir.beg) != '%')
2477 /* For plain character directives (i.e., the format string itself)
2478 but not others, point the caret at the first character that's
2479 past the end of the destination. */
2480 if (navail < dir.len)
2481 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2484 if (*dir.beg == '\0')
2486 /* This is the terminating nul. */
2487 gcc_assert (res.min == 1 && res.min == res.max);
2489 return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2490 info.bounded
2491 ? (maybe
2492 ? G_("%qE output may be truncated before the "
2493 "last format character")
2494 : G_("%qE output truncated before the last "
2495 "format character"))
2496 : (maybe
2497 ? G_("%qE may write a terminating nul past the "
2498 "end of the destination")
2499 : G_("%qE writing a terminating nul past the "
2500 "end of the destination")),
2501 info.func);
2504 if (res.min == res.max)
2506 const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2507 if (!info.bounded)
2508 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2509 "%<%.*s%> directive writing %wu byte into a "
2510 "region of size %wu",
2511 "%<%.*s%> directive writing %wu bytes into a "
2512 "region of size %wu",
2513 (int) dir.len, d, res.min, navail);
2514 else if (maybe)
2515 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2516 "%<%.*s%> directive output may be truncated "
2517 "writing %wu byte into a region of size %wu",
2518 "%<%.*s%> directive output may be truncated "
2519 "writing %wu bytes into a region of size %wu",
2520 (int) dir.len, d, res.min, navail);
2521 else
2522 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2523 "%<%.*s%> directive output truncated writing "
2524 "%wu byte into a region of size %wu",
2525 "%<%.*s%> directive output truncated writing "
2526 "%wu bytes into a region of size %wu",
2527 (int) dir.len, d, res.min, navail);
2529 if (res.min == 0 && res.max < maxbytes)
2530 return fmtwarn (dirloc, argloc, NULL,
2531 info.warnopt (),
2532 info.bounded
2533 ? (maybe
2534 ? G_("%<%.*s%> directive output may be truncated "
2535 "writing up to %wu bytes into a region of "
2536 "size %wu")
2537 : G_("%<%.*s%> directive output truncated writing "
2538 "up to %wu bytes into a region of size %wu"))
2539 : G_("%<%.*s%> directive writing up to %wu bytes "
2540 "into a region of size %wu"), (int) dir.len,
2541 target_to_host (hostdir, sizeof hostdir, dir.beg),
2542 res.max, navail);
2544 if (res.min == 0 && maxbytes <= res.max)
2545 /* This is a special case to avoid issuing the potentially
2546 confusing warning:
2547 writing 0 or more bytes into a region of size 0. */
2548 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2549 info.bounded
2550 ? (maybe
2551 ? G_("%<%.*s%> directive output may be truncated "
2552 "writing likely %wu or more bytes into a "
2553 "region of size %wu")
2554 : G_("%<%.*s%> directive output truncated writing "
2555 "likely %wu or more bytes into a region of "
2556 "size %wu"))
2557 : G_("%<%.*s%> directive writing likely %wu or more "
2558 "bytes into a region of size %wu"), (int) dir.len,
2559 target_to_host (hostdir, sizeof hostdir, dir.beg),
2560 res.likely, navail);
2562 if (res.max < maxbytes)
2563 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2564 info.bounded
2565 ? (maybe
2566 ? G_("%<%.*s%> directive output may be truncated "
2567 "writing between %wu and %wu bytes into a "
2568 "region of size %wu")
2569 : G_("%<%.*s%> directive output truncated "
2570 "writing between %wu and %wu bytes into a "
2571 "region of size %wu"))
2572 : G_("%<%.*s%> directive writing between %wu and "
2573 "%wu bytes into a region of size %wu"),
2574 (int) dir.len,
2575 target_to_host (hostdir, sizeof hostdir, dir.beg),
2576 res.min, res.max, navail);
2578 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2579 info.bounded
2580 ? (maybe
2581 ? G_("%<%.*s%> directive output may be truncated "
2582 "writing %wu or more bytes into a region of "
2583 "size %wu")
2584 : G_("%<%.*s%> directive output truncated writing "
2585 "%wu or more bytes into a region of size %wu"))
2586 : G_("%<%.*s%> directive writing %wu or more bytes "
2587 "into a region of size %wu"), (int) dir.len,
2588 target_to_host (hostdir, sizeof hostdir, dir.beg),
2589 res.min, navail);
2592 /* The size of the destination region is a range. */
2594 if (target_to_host (*dir.beg) != '%')
2596 unsigned HOST_WIDE_INT navail = avail_range.max;
2598 /* For plain character directives (i.e., the format string itself)
2599 but not others, point the caret at the first character that's
2600 past the end of the destination. */
2601 if (navail < dir.len)
2602 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2605 if (*dir.beg == '\0')
2607 gcc_assert (res.min == 1 && res.min == res.max);
2609 return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2610 info.bounded
2611 ? (maybe
2612 ? G_("%qE output may be truncated before the last "
2613 "format character")
2614 : G_("%qE output truncated before the last format "
2615 "character"))
2616 : (maybe
2617 ? G_("%qE may write a terminating nul past the end "
2618 "of the destination")
2619 : G_("%qE writing a terminating nul past the end "
2620 "of the destination")), info.func);
2623 if (res.min == res.max)
2625 const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2626 if (!info.bounded)
2627 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2628 "%<%.*s%> directive writing %wu byte into a region "
2629 "of size between %wu and %wu",
2630 "%<%.*s%> directive writing %wu bytes into a region "
2631 "of size between %wu and %wu", (int) dir.len, d,
2632 res.min, avail_range.min, avail_range.max);
2633 else if (maybe)
2634 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2635 "%<%.*s%> directive output may be truncated writing "
2636 "%wu byte into a region of size between %wu and %wu",
2637 "%<%.*s%> directive output may be truncated writing "
2638 "%wu bytes into a region of size between %wu and "
2639 "%wu", (int) dir.len, d, res.min, avail_range.min,
2640 avail_range.max);
2641 else
2642 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2643 "%<%.*s%> directive output truncated writing %wu "
2644 "byte into a region of size between %wu and %wu",
2645 "%<%.*s%> directive output truncated writing %wu "
2646 "bytes into a region of size between %wu and %wu",
2647 (int) dir.len, d, res.min, avail_range.min,
2648 avail_range.max);
2651 if (res.min == 0 && res.max < maxbytes)
2652 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2653 info.bounded
2654 ? (maybe
2655 ? G_("%<%.*s%> directive output may be truncated "
2656 "writing up to %wu bytes into a region of size "
2657 "between %wu and %wu")
2658 : G_("%<%.*s%> directive output truncated writing "
2659 "up to %wu bytes into a region of size between "
2660 "%wu and %wu"))
2661 : G_("%<%.*s%> directive writing up to %wu bytes "
2662 "into a region of size between %wu and %wu"),
2663 (int) dir.len,
2664 target_to_host (hostdir, sizeof hostdir, dir.beg),
2665 res.max, avail_range.min, avail_range.max);
2667 if (res.min == 0 && maxbytes <= res.max)
2668 /* This is a special case to avoid issuing the potentially confusing
2669 warning:
2670 writing 0 or more bytes into a region of size between 0 and N. */
2671 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2672 info.bounded
2673 ? (maybe
2674 ? G_("%<%.*s%> directive output may be truncated "
2675 "writing likely %wu or more bytes into a region "
2676 "of size between %wu and %wu")
2677 : G_("%<%.*s%> directive output truncated writing "
2678 "likely %wu or more bytes into a region of size "
2679 "between %wu and %wu"))
2680 : G_("%<%.*s%> directive writing likely %wu or more bytes "
2681 "into a region of size between %wu and %wu"),
2682 (int) dir.len,
2683 target_to_host (hostdir, sizeof hostdir, dir.beg),
2684 res.likely, avail_range.min, avail_range.max);
2686 if (res.max < maxbytes)
2687 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2688 info.bounded
2689 ? (maybe
2690 ? G_("%<%.*s%> directive output may be truncated "
2691 "writing between %wu and %wu bytes into a region "
2692 "of size between %wu and %wu")
2693 : G_("%<%.*s%> directive output truncated writing "
2694 "between %wu and %wu bytes into a region of size "
2695 "between %wu and %wu"))
2696 : G_("%<%.*s%> directive writing between %wu and "
2697 "%wu bytes into a region of size between %wu and "
2698 "%wu"), (int) dir.len,
2699 target_to_host (hostdir, sizeof hostdir, dir.beg),
2700 res.min, res.max, avail_range.min, avail_range.max);
2702 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2703 info.bounded
2704 ? (maybe
2705 ? G_("%<%.*s%> directive output may be truncated writing "
2706 "%wu or more bytes into a region of size between "
2707 "%wu and %wu")
2708 : G_("%<%.*s%> directive output truncated writing "
2709 "%wu or more bytes into a region of size between "
2710 "%wu and %wu"))
2711 : G_("%<%.*s%> directive writing %wu or more bytes "
2712 "into a region of size between %wu and %wu"),
2713 (int) dir.len,
2714 target_to_host (hostdir, sizeof hostdir, dir.beg),
2715 res.min, avail_range.min, avail_range.max);
2718 /* Compute the length of the output resulting from the directive DIR
2719 in a call described by INFO and update the overall result of the call
2720 in *RES. Return true if the directive has been handled. */
2722 static bool
2723 format_directive (const sprintf_dom_walker::call_info &info,
2724 format_result *res, const directive &dir,
2725 class vr_values *vr_values)
2727 /* Offset of the beginning of the directive from the beginning
2728 of the format string. */
2729 size_t offset = dir.beg - info.fmtstr;
2730 size_t start = offset;
2731 size_t length = offset + dir.len - !!dir.len;
2733 /* Create a location for the whole directive from the % to the format
2734 specifier. */
2735 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
2736 offset, start, length);
2738 /* Also get the location of the argument if possible.
2739 This doesn't work for integer literals or function calls. */
2740 location_t argloc = UNKNOWN_LOCATION;
2741 if (dir.arg)
2742 argloc = EXPR_LOCATION (dir.arg);
2744 /* Bail when there is no function to compute the output length,
2745 or when minimum length checking has been disabled. */
2746 if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
2747 return false;
2749 /* Compute the range of lengths of the formatted output. */
2750 fmtresult fmtres = dir.fmtfunc (dir, dir.arg, vr_values);
2752 /* Record whether the output of all directives is known to be
2753 bounded by some maximum, implying that their arguments are
2754 either known exactly or determined to be in a known range
2755 or, for strings, limited by the upper bounds of the arrays
2756 they refer to. */
2757 res->knownrange &= fmtres.knownrange;
2759 if (!fmtres.knownrange)
2761 /* Only when the range is known, check it against the host value
2762 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
2763 INT_MAX precision, which is the longest possible output of any
2764 single directive). That's the largest valid byte count (though
2765 not valid call to a printf-like function because it can never
2766 return such a count). Otherwise, the range doesn't correspond
2767 to known values of the argument. */
2768 if (fmtres.range.max > target_dir_max ())
2770 /* Normalize the MAX counter to avoid having to deal with it
2771 later. The counter can be less than HOST_WIDE_INT_M1U
2772 when compiling for an ILP32 target on an LP64 host. */
2773 fmtres.range.max = HOST_WIDE_INT_M1U;
2774 /* Disable exact and maximum length checking after a failure
2775 to determine the maximum number of characters (for example
2776 for wide characters or wide character strings) but continue
2777 tracking the minimum number of characters. */
2778 res->range.max = HOST_WIDE_INT_M1U;
2781 if (fmtres.range.min > target_dir_max ())
2783 /* Disable exact length checking after a failure to determine
2784 even the minimum number of characters (it shouldn't happen
2785 except in an error) but keep tracking the minimum and maximum
2786 number of characters. */
2787 return true;
2791 /* Buffer for the directive in the host character set (used when
2792 the source character set is different). */
2793 char hostdir[32];
2795 int dirlen = dir.len;
2797 if (fmtres.nullp)
2799 fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2800 "%G%<%.*s%> directive argument is null",
2801 info.callstmt, dirlen,
2802 target_to_host (hostdir, sizeof hostdir, dir.beg));
2804 /* Don't bother processing the rest of the format string. */
2805 res->warned = true;
2806 res->range.min = HOST_WIDE_INT_M1U;
2807 res->range.max = HOST_WIDE_INT_M1U;
2808 return false;
2811 /* Compute the number of available bytes in the destination. There
2812 must always be at least one byte of space for the terminating
2813 NUL that's appended after the format string has been processed. */
2814 result_range avail_range = bytes_remaining (info.objsize, *res);
2816 bool warned = res->warned;
2818 if (!warned)
2819 warned = maybe_warn (dirloc, argloc, info, avail_range,
2820 fmtres.range, dir);
2822 /* Bump up the total maximum if it isn't too big. */
2823 if (res->range.max < HOST_WIDE_INT_MAX
2824 && fmtres.range.max < HOST_WIDE_INT_MAX)
2825 res->range.max += fmtres.range.max;
2827 /* Raise the total unlikely maximum by the larger of the maximum
2828 and the unlikely maximum. */
2829 unsigned HOST_WIDE_INT save = res->range.unlikely;
2830 if (fmtres.range.max < fmtres.range.unlikely)
2831 res->range.unlikely += fmtres.range.unlikely;
2832 else
2833 res->range.unlikely += fmtres.range.max;
2835 if (res->range.unlikely < save)
2836 res->range.unlikely = HOST_WIDE_INT_M1U;
2838 res->range.min += fmtres.range.min;
2839 res->range.likely += fmtres.range.likely;
2841 /* Has the minimum directive output length exceeded the maximum
2842 of 4095 bytes required to be supported? */
2843 bool minunder4k = fmtres.range.min < 4096;
2844 bool maxunder4k = fmtres.range.max < 4096;
2845 /* Clear POSUNDER4K in the overall result if the maximum has exceeded
2846 the 4k (this is necessary to avoid the return value optimization
2847 that may not be safe in the maximum case). */
2848 if (!maxunder4k)
2849 res->posunder4k = false;
2850 /* Also clear POSUNDER4K if the directive may fail. */
2851 if (fmtres.mayfail)
2852 res->posunder4k = false;
2854 if (!warned
2855 /* Only warn at level 2. */
2856 && warn_level > 1
2857 && (!minunder4k
2858 || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
2860 /* The directive output may be longer than the maximum required
2861 to be handled by an implementation according to 7.21.6.1, p15
2862 of C11. Warn on this only at level 2 but remember this and
2863 prevent folding the return value when done. This allows for
2864 the possibility of the actual libc call failing due to ENOMEM
2865 (like Glibc does under some conditions). */
2867 if (fmtres.range.min == fmtres.range.max)
2868 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2869 "%<%.*s%> directive output of %wu bytes exceeds "
2870 "minimum required size of 4095", dirlen,
2871 target_to_host (hostdir, sizeof hostdir, dir.beg),
2872 fmtres.range.min);
2873 else
2874 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2875 minunder4k
2876 ? G_("%<%.*s%> directive output between %wu and %wu "
2877 "bytes may exceed minimum required size of "
2878 "4095")
2879 : G_("%<%.*s%> directive output between %wu and %wu "
2880 "bytes exceeds minimum required size of 4095"),
2881 dirlen,
2882 target_to_host (hostdir, sizeof hostdir, dir.beg),
2883 fmtres.range.min, fmtres.range.max);
2886 /* Has the likely and maximum directive output exceeded INT_MAX? */
2887 bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
2888 /* Don't consider the maximum to be in excess when it's the result
2889 of a string of unknown length (i.e., whose maximum has been set
2890 to be greater than or equal to HOST_WIDE_INT_MAX. */
2891 bool maxximax = (*dir.beg
2892 && res->range.max > target_int_max ()
2893 && res->range.max < HOST_WIDE_INT_MAX);
2895 if (!warned
2896 /* Warn for the likely output size at level 1. */
2897 && (likelyximax
2898 /* But only warn for the maximum at level 2. */
2899 || (warn_level > 1
2900 && maxximax
2901 && fmtres.range.max < HOST_WIDE_INT_MAX)))
2903 /* The directive output causes the total length of output
2904 to exceed INT_MAX bytes. */
2906 if (fmtres.range.min == fmtres.range.max)
2907 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2908 "%<%.*s%> directive output of %wu bytes causes "
2909 "result to exceed %<INT_MAX%>", dirlen,
2910 target_to_host (hostdir, sizeof hostdir, dir.beg),
2911 fmtres.range.min);
2912 else
2913 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2914 fmtres.range.min > target_int_max ()
2915 ? G_("%<%.*s%> directive output between %wu and "
2916 "%wu bytes causes result to exceed "
2917 "%<INT_MAX%>")
2918 : G_("%<%.*s%> directive output between %wu and "
2919 "%wu bytes may cause result to exceed "
2920 "%<INT_MAX%>"), dirlen,
2921 target_to_host (hostdir, sizeof hostdir, dir.beg),
2922 fmtres.range.min, fmtres.range.max);
2925 if (!warned && fmtres.nonstr)
2927 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2928 "%<%.*s%> directive argument is not a nul-terminated "
2929 "string",
2930 dirlen,
2931 target_to_host (hostdir, sizeof hostdir, dir.beg));
2932 if (warned && DECL_P (fmtres.nonstr))
2933 inform (DECL_SOURCE_LOCATION (fmtres.nonstr),
2934 "referenced argument declared here");
2935 return false;
2938 if (warned && fmtres.range.min < fmtres.range.likely
2939 && fmtres.range.likely < fmtres.range.max)
2940 inform_n (info.fmtloc, fmtres.range.likely,
2941 "assuming directive output of %wu byte",
2942 "assuming directive output of %wu bytes",
2943 fmtres.range.likely);
2945 if (warned && fmtres.argmin)
2947 if (fmtres.argmin == fmtres.argmax)
2948 inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
2949 else if (fmtres.knownrange)
2950 inform (info.fmtloc, "directive argument in the range [%E, %E]",
2951 fmtres.argmin, fmtres.argmax);
2952 else
2953 inform (info.fmtloc,
2954 "using the range [%E, %E] for directive argument",
2955 fmtres.argmin, fmtres.argmax);
2958 res->warned |= warned;
2960 if (!dir.beg[0] && res->warned && info.objsize < HOST_WIDE_INT_MAX)
2962 /* If a warning has been issued for buffer overflow or truncation
2963 (but not otherwise) help the user figure out how big a buffer
2964 they need. */
2966 location_t callloc = gimple_location (info.callstmt);
2968 unsigned HOST_WIDE_INT min = res->range.min;
2969 unsigned HOST_WIDE_INT max = res->range.max;
2971 if (min == max)
2972 inform (callloc,
2973 (min == 1
2974 ? G_("%qE output %wu byte into a destination of size %wu")
2975 : G_("%qE output %wu bytes into a destination of size %wu")),
2976 info.func, min, info.objsize);
2977 else if (max < HOST_WIDE_INT_MAX)
2978 inform (callloc,
2979 "%qE output between %wu and %wu bytes into "
2980 "a destination of size %wu",
2981 info.func, min, max, info.objsize);
2982 else if (min < res->range.likely && res->range.likely < max)
2983 inform (callloc,
2984 "%qE output %wu or more bytes (assuming %wu) into "
2985 "a destination of size %wu",
2986 info.func, min, res->range.likely, info.objsize);
2987 else
2988 inform (callloc,
2989 "%qE output %wu or more bytes into a destination of size %wu",
2990 info.func, min, info.objsize);
2993 if (dump_file && *dir.beg)
2995 fprintf (dump_file,
2996 " Result: "
2997 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
2998 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
2999 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3000 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3001 fmtres.range.min, fmtres.range.likely,
3002 fmtres.range.max, fmtres.range.unlikely,
3003 res->range.min, res->range.likely,
3004 res->range.max, res->range.unlikely);
3007 return true;
3010 /* Parse a format directive in function call described by INFO starting
3011 at STR and populate DIR structure. Bump up *ARGNO by the number of
3012 arguments extracted for the directive. Return the length of
3013 the directive. */
3015 static size_t
3016 parse_directive (sprintf_dom_walker::call_info &info,
3017 directive &dir, format_result *res,
3018 const char *str, unsigned *argno,
3019 vr_values *vr_values)
3021 const char *pcnt = strchr (str, target_percent);
3022 dir.beg = str;
3024 if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3026 /* This directive is either a plain string or the terminating nul
3027 (which isn't really a directive but it simplifies things to
3028 handle it as if it were). */
3029 dir.len = len;
3030 dir.fmtfunc = format_plain;
3032 if (dump_file)
3034 fprintf (dump_file, " Directive %u at offset "
3035 HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3036 "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3037 dir.dirno,
3038 (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3039 (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3042 return len - !*str;
3045 const char *pf = pcnt + 1;
3047 /* POSIX numbered argument index or zero when none. */
3048 HOST_WIDE_INT dollar = 0;
3050 /* With and precision. -1 when not specified, HOST_WIDE_INT_MIN
3051 when given by a va_list argument, and a non-negative value
3052 when specified in the format string itself. */
3053 HOST_WIDE_INT width = -1;
3054 HOST_WIDE_INT precision = -1;
3056 /* Pointers to the beginning of the width and precision decimal
3057 string (if any) within the directive. */
3058 const char *pwidth = 0;
3059 const char *pprec = 0;
3061 /* When the value of the decimal string that specifies width or
3062 precision is out of range, points to the digit that causes
3063 the value to exceed the limit. */
3064 const char *werange = NULL;
3065 const char *perange = NULL;
3067 /* Width specified via the asterisk. Need not be INTEGER_CST.
3068 For vararg functions set to void_node. */
3069 tree star_width = NULL_TREE;
3071 /* Width specified via the asterisk. Need not be INTEGER_CST.
3072 For vararg functions set to void_node. */
3073 tree star_precision = NULL_TREE;
3075 if (ISDIGIT (target_to_host (*pf)))
3077 /* This could be either a POSIX positional argument, the '0'
3078 flag, or a width, depending on what follows. Store it as
3079 width and sort it out later after the next character has
3080 been seen. */
3081 pwidth = pf;
3082 width = target_strtol10 (&pf, &werange);
3084 else if (target_to_host (*pf) == '*')
3086 /* Similarly to the block above, this could be either a POSIX
3087 positional argument or a width, depending on what follows. */
3088 if (*argno < gimple_call_num_args (info.callstmt))
3089 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3090 else
3091 star_width = void_node;
3092 ++pf;
3095 if (target_to_host (*pf) == '$')
3097 /* Handle the POSIX dollar sign which references the 1-based
3098 positional argument number. */
3099 if (width != -1)
3100 dollar = width + info.argidx;
3101 else if (star_width
3102 && TREE_CODE (star_width) == INTEGER_CST
3103 && (TYPE_PRECISION (TREE_TYPE (star_width))
3104 <= TYPE_PRECISION (integer_type_node)))
3105 dollar = width + tree_to_shwi (star_width);
3107 /* Bail when the numbered argument is out of range (it will
3108 have already been diagnosed by -Wformat). */
3109 if (dollar == 0
3110 || dollar == (int)info.argidx
3111 || dollar > gimple_call_num_args (info.callstmt))
3112 return false;
3114 --dollar;
3116 star_width = NULL_TREE;
3117 width = -1;
3118 ++pf;
3121 if (dollar || !star_width)
3123 if (width != -1)
3125 if (width == 0)
3127 /* The '0' that has been interpreted as a width above is
3128 actually a flag. Reset HAVE_WIDTH, set the '0' flag,
3129 and continue processing other flags. */
3130 width = -1;
3131 dir.set_flag ('0');
3133 else if (!dollar)
3135 /* (Non-zero) width has been seen. The next character
3136 is either a period or a digit. */
3137 goto start_precision;
3140 /* When either '$' has been seen, or width has not been seen,
3141 the next field is the optional flags followed by an optional
3142 width. */
3143 for ( ; ; ) {
3144 switch (target_to_host (*pf))
3146 case ' ':
3147 case '0':
3148 case '+':
3149 case '-':
3150 case '#':
3151 dir.set_flag (target_to_host (*pf++));
3152 break;
3154 default:
3155 goto start_width;
3159 start_width:
3160 if (ISDIGIT (target_to_host (*pf)))
3162 werange = 0;
3163 pwidth = pf;
3164 width = target_strtol10 (&pf, &werange);
3166 else if (target_to_host (*pf) == '*')
3168 if (*argno < gimple_call_num_args (info.callstmt))
3169 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3170 else
3172 /* This is (likely) a va_list. It could also be an invalid
3173 call with insufficient arguments. */
3174 star_width = void_node;
3176 ++pf;
3178 else if (target_to_host (*pf) == '\'')
3180 /* The POSIX apostrophe indicating a numeric grouping
3181 in the current locale. Even though it's possible to
3182 estimate the upper bound on the size of the output
3183 based on the number of digits it probably isn't worth
3184 continuing. */
3185 return 0;
3189 start_precision:
3190 if (target_to_host (*pf) == '.')
3192 ++pf;
3194 if (ISDIGIT (target_to_host (*pf)))
3196 pprec = pf;
3197 precision = target_strtol10 (&pf, &perange);
3199 else if (target_to_host (*pf) == '*')
3201 if (*argno < gimple_call_num_args (info.callstmt))
3202 star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3203 else
3205 /* This is (likely) a va_list. It could also be an invalid
3206 call with insufficient arguments. */
3207 star_precision = void_node;
3209 ++pf;
3211 else
3213 /* The decimal precision or the asterisk are optional.
3214 When neither is dirified it's taken to be zero. */
3215 precision = 0;
3219 switch (target_to_host (*pf))
3221 case 'h':
3222 if (target_to_host (pf[1]) == 'h')
3224 ++pf;
3225 dir.modifier = FMT_LEN_hh;
3227 else
3228 dir.modifier = FMT_LEN_h;
3229 ++pf;
3230 break;
3232 case 'j':
3233 dir.modifier = FMT_LEN_j;
3234 ++pf;
3235 break;
3237 case 'L':
3238 dir.modifier = FMT_LEN_L;
3239 ++pf;
3240 break;
3242 case 'l':
3243 if (target_to_host (pf[1]) == 'l')
3245 ++pf;
3246 dir.modifier = FMT_LEN_ll;
3248 else
3249 dir.modifier = FMT_LEN_l;
3250 ++pf;
3251 break;
3253 case 't':
3254 dir.modifier = FMT_LEN_t;
3255 ++pf;
3256 break;
3258 case 'z':
3259 dir.modifier = FMT_LEN_z;
3260 ++pf;
3261 break;
3264 switch (target_to_host (*pf))
3266 /* Handle a sole '%' character the same as "%%" but since it's
3267 undefined prevent the result from being folded. */
3268 case '\0':
3269 --pf;
3270 res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3271 /* FALLTHRU */
3272 case '%':
3273 dir.fmtfunc = format_percent;
3274 break;
3276 case 'a':
3277 case 'A':
3278 case 'e':
3279 case 'E':
3280 case 'f':
3281 case 'F':
3282 case 'g':
3283 case 'G':
3284 res->floating = true;
3285 dir.fmtfunc = format_floating;
3286 break;
3288 case 'd':
3289 case 'i':
3290 case 'o':
3291 case 'u':
3292 case 'x':
3293 case 'X':
3294 dir.fmtfunc = format_integer;
3295 break;
3297 case 'p':
3298 /* The %p output is implementation-defined. It's possible
3299 to determine this format but due to extensions (edirially
3300 those of the Linux kernel -- see bug 78512) the first %p
3301 in the format string disables any further processing. */
3302 return false;
3304 case 'n':
3305 /* %n has side-effects even when nothing is actually printed to
3306 any buffer. */
3307 info.nowrite = false;
3308 dir.fmtfunc = format_none;
3309 break;
3311 case 'C':
3312 case 'c':
3313 /* POSIX wide character and C/POSIX narrow character. */
3314 dir.fmtfunc = format_character;
3315 break;
3317 case 'S':
3318 case 's':
3319 /* POSIX wide string and C/POSIX narrow character string. */
3320 dir.fmtfunc = format_string;
3321 break;
3323 default:
3324 /* Unknown conversion specification. */
3325 return 0;
3328 dir.specifier = target_to_host (*pf++);
3330 /* Store the length of the format directive. */
3331 dir.len = pf - pcnt;
3333 /* Buffer for the directive in the host character set (used when
3334 the source character set is different). */
3335 char hostdir[32];
3337 if (star_width)
3339 if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3340 dir.set_width (star_width, vr_values);
3341 else
3343 /* Width specified by a va_list takes on the range [0, -INT_MIN]
3344 (width is the absolute value of that specified). */
3345 dir.width[0] = 0;
3346 dir.width[1] = target_int_max () + 1;
3349 else
3351 if (width == LONG_MAX && werange)
3353 size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3354 size_t caret = begin + (werange - pcnt);
3355 size_t end = pf - info.fmtstr - 1;
3357 /* Create a location for the width part of the directive,
3358 pointing the caret at the first out-of-range digit. */
3359 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3360 caret, begin, end);
3362 fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3363 "%<%.*s%> directive width out of range", (int) dir.len,
3364 target_to_host (hostdir, sizeof hostdir, dir.beg));
3367 dir.set_width (width);
3370 if (star_precision)
3372 if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3373 dir.set_precision (star_precision, vr_values);
3374 else
3376 /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3377 (unlike width, negative precision is ignored). */
3378 dir.prec[0] = -1;
3379 dir.prec[1] = target_int_max ();
3382 else
3384 if (precision == LONG_MAX && perange)
3386 size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3387 size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3388 size_t end = pf - info.fmtstr - 2;
3390 /* Create a location for the precision part of the directive,
3391 including the leading period, pointing the caret at the first
3392 out-of-range digit . */
3393 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3394 caret, begin, end);
3396 fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3397 "%<%.*s%> directive precision out of range", (int) dir.len,
3398 target_to_host (hostdir, sizeof hostdir, dir.beg));
3401 dir.set_precision (precision);
3404 /* Extract the argument if the directive takes one and if it's
3405 available (e.g., the function doesn't take a va_list). Treat
3406 missing arguments the same as va_list, even though they will
3407 have likely already been diagnosed by -Wformat. */
3408 if (dir.specifier != '%'
3409 && *argno < gimple_call_num_args (info.callstmt))
3410 dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3412 if (dump_file)
3414 fprintf (dump_file,
3415 " Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3416 ": \"%.*s\"",
3417 dir.dirno,
3418 (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3419 (int)dir.len, dir.beg);
3420 if (star_width)
3422 if (dir.width[0] == dir.width[1])
3423 fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3424 dir.width[0]);
3425 else
3426 fprintf (dump_file,
3427 ", width in range [" HOST_WIDE_INT_PRINT_DEC
3428 ", " HOST_WIDE_INT_PRINT_DEC "]",
3429 dir.width[0], dir.width[1]);
3432 if (star_precision)
3434 if (dir.prec[0] == dir.prec[1])
3435 fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3436 dir.prec[0]);
3437 else
3438 fprintf (dump_file,
3439 ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3440 HOST_WIDE_INT_PRINT_DEC "]",
3441 dir.prec[0], dir.prec[1]);
3443 fputc ('\n', dump_file);
3446 return dir.len;
3449 /* Compute the length of the output resulting from the call to a formatted
3450 output function described by INFO and store the result of the call in
3451 *RES. Issue warnings for detected past the end writes. Return true
3452 if the complete format string has been processed and *RES can be relied
3453 on, false otherwise (e.g., when a unknown or unhandled directive was seen
3454 that caused the processing to be terminated early). */
3456 bool
3457 sprintf_dom_walker::compute_format_length (call_info &info,
3458 format_result *res)
3460 if (dump_file)
3462 location_t callloc = gimple_location (info.callstmt);
3463 fprintf (dump_file, "%s:%i: ",
3464 LOCATION_FILE (callloc), LOCATION_LINE (callloc));
3465 print_generic_expr (dump_file, info.func, dump_flags);
3467 fprintf (dump_file,
3468 ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
3469 ", fmtstr = \"%s\"\n",
3470 info.objsize, info.fmtstr);
3473 /* Reset the minimum and maximum byte counters. */
3474 res->range.min = res->range.max = 0;
3476 /* No directive has been seen yet so the length of output is bounded
3477 by the known range [0, 0] (with no conversion resulting in a failure
3478 or producing more than 4K bytes) until determined otherwise. */
3479 res->knownrange = true;
3480 res->floating = false;
3481 res->warned = false;
3483 /* 1-based directive counter. */
3484 unsigned dirno = 1;
3486 /* The variadic argument counter. */
3487 unsigned argno = info.argidx;
3489 for (const char *pf = info.fmtstr; ; ++dirno)
3491 directive dir = directive ();
3492 dir.dirno = dirno;
3494 size_t n = parse_directive (info, dir, res, pf, &argno,
3495 evrp_range_analyzer.get_vr_values ());
3497 /* Return failure if the format function fails. */
3498 if (!format_directive (info, res, dir,
3499 evrp_range_analyzer.get_vr_values ()))
3500 return false;
3502 /* Return success the directive is zero bytes long and it's
3503 the last think in the format string (i.e., it's the terminating
3504 nul, which isn't really a directive but handling it as one makes
3505 things simpler). */
3506 if (!n)
3507 return *pf == '\0';
3509 pf += n;
3512 /* The complete format string was processed (with or without warnings). */
3513 return true;
3516 /* Return the size of the object referenced by the expression DEST if
3517 available, or -1 otherwise. */
3519 static unsigned HOST_WIDE_INT
3520 get_destination_size (tree dest)
3522 /* When there is no destination return -1. */
3523 if (!dest)
3524 return HOST_WIDE_INT_M1U;
3526 /* Initialize object size info before trying to compute it. */
3527 init_object_sizes ();
3529 /* Use __builtin_object_size to determine the size of the destination
3530 object. When optimizing, determine the smallest object (such as
3531 a member array as opposed to the whole enclosing object), otherwise
3532 use type-zero object size to determine the size of the enclosing
3533 object (the function fails without optimization in this type). */
3534 int ost = optimize > 0;
3535 unsigned HOST_WIDE_INT size;
3536 if (compute_builtin_object_size (dest, ost, &size))
3537 return size;
3539 return HOST_WIDE_INT_M1U;
3542 /* Return true if the call described by INFO with result RES safe to
3543 optimize (i.e., no undefined behavior), and set RETVAL to the range
3544 of its return values. */
3546 static bool
3547 is_call_safe (const sprintf_dom_walker::call_info &info,
3548 const format_result &res, bool under4k,
3549 unsigned HOST_WIDE_INT retval[2])
3551 if (under4k && !res.posunder4k)
3552 return false;
3554 /* The minimum return value. */
3555 retval[0] = res.range.min;
3557 /* The maximum return value is in most cases bounded by RES.RANGE.MAX
3558 but in cases involving multibyte characters could be as large as
3559 RES.RANGE.UNLIKELY. */
3560 retval[1]
3561 = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
3563 /* Adjust the number of bytes which includes the terminating nul
3564 to reflect the return value of the function which does not.
3565 Because the valid range of the function is [INT_MIN, INT_MAX],
3566 a valid range before the adjustment below is [0, INT_MAX + 1]
3567 (the functions only return negative values on error or undefined
3568 behavior). */
3569 if (retval[0] <= target_int_max () + 1)
3570 --retval[0];
3571 if (retval[1] <= target_int_max () + 1)
3572 --retval[1];
3574 /* Avoid the return value optimization when the behavior of the call
3575 is undefined either because any directive may have produced 4K or
3576 more of output, or the return value exceeds INT_MAX, or because
3577 the output overflows the destination object (but leave it enabled
3578 when the function is bounded because then the behavior is well-
3579 defined). */
3580 if (retval[0] == retval[1]
3581 && (info.bounded || retval[0] < info.objsize)
3582 && retval[0] <= target_int_max ())
3583 return true;
3585 if ((info.bounded || retval[1] < info.objsize)
3586 && (retval[0] < target_int_max ()
3587 && retval[1] < target_int_max ()))
3588 return true;
3590 if (!under4k && (info.bounded || retval[0] < info.objsize))
3591 return true;
3593 return false;
3596 /* Given a suitable result RES of a call to a formatted output function
3597 described by INFO, substitute the result for the return value of
3598 the call. The result is suitable if the number of bytes it represents
3599 is known and exact. A result that isn't suitable for substitution may
3600 have its range set to the range of return values, if that is known.
3601 Return true if the call is removed and gsi_next should not be performed
3602 in the caller. */
3604 static bool
3605 try_substitute_return_value (gimple_stmt_iterator *gsi,
3606 const sprintf_dom_walker::call_info &info,
3607 const format_result &res)
3609 tree lhs = gimple_get_lhs (info.callstmt);
3611 /* Set to true when the entire call has been removed. */
3612 bool removed = false;
3614 /* The minimum and maximum return value. */
3615 unsigned HOST_WIDE_INT retval[2];
3616 bool safe = is_call_safe (info, res, true, retval);
3618 if (safe
3619 && retval[0] == retval[1]
3620 /* Not prepared to handle possibly throwing calls here; they shouldn't
3621 appear in non-artificial testcases, except when the __*_chk routines
3622 are badly declared. */
3623 && !stmt_ends_bb_p (info.callstmt))
3625 tree cst = build_int_cst (integer_type_node, retval[0]);
3627 if (lhs == NULL_TREE
3628 && info.nowrite)
3630 /* Remove the call to the bounded function with a zero size
3631 (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs. */
3632 unlink_stmt_vdef (info.callstmt);
3633 gsi_remove (gsi, true);
3634 removed = true;
3636 else if (info.nowrite)
3638 /* Replace the call to the bounded function with a zero size
3639 (e.g., snprintf(0, 0, "%i", 123) with the constant result
3640 of the function. */
3641 if (!update_call_from_tree (gsi, cst))
3642 gimplify_and_update_call_from_tree (gsi, cst);
3643 gimple *callstmt = gsi_stmt (*gsi);
3644 update_stmt (callstmt);
3646 else if (lhs)
3648 /* Replace the left-hand side of the call with the constant
3649 result of the formatted function. */
3650 gimple_call_set_lhs (info.callstmt, NULL_TREE);
3651 gimple *g = gimple_build_assign (lhs, cst);
3652 gsi_insert_after (gsi, g, GSI_NEW_STMT);
3653 update_stmt (info.callstmt);
3656 if (dump_file)
3658 if (removed)
3659 fprintf (dump_file, " Removing call statement.");
3660 else
3662 fprintf (dump_file, " Substituting ");
3663 print_generic_expr (dump_file, cst, dump_flags);
3664 fprintf (dump_file, " for %s.\n",
3665 info.nowrite ? "statement" : "return value");
3669 else if (lhs)
3671 bool setrange = false;
3673 if (safe
3674 && (info.bounded || retval[1] < info.objsize)
3675 && (retval[0] < target_int_max ()
3676 && retval[1] < target_int_max ()))
3678 /* If the result is in a valid range bounded by the size of
3679 the destination set it so that it can be used for subsequent
3680 optimizations. */
3681 int prec = TYPE_PRECISION (integer_type_node);
3683 wide_int min = wi::shwi (retval[0], prec);
3684 wide_int max = wi::shwi (retval[1], prec);
3685 set_range_info (lhs, VR_RANGE, min, max);
3687 setrange = true;
3690 if (dump_file)
3692 const char *inbounds
3693 = (retval[0] < info.objsize
3694 ? (retval[1] < info.objsize
3695 ? "in" : "potentially out-of")
3696 : "out-of");
3698 const char *what = setrange ? "Setting" : "Discarding";
3699 if (retval[0] != retval[1])
3700 fprintf (dump_file,
3701 " %s %s-bounds return value range ["
3702 HOST_WIDE_INT_PRINT_UNSIGNED ", "
3703 HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
3704 what, inbounds, retval[0], retval[1]);
3705 else
3706 fprintf (dump_file, " %s %s-bounds return value "
3707 HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
3708 what, inbounds, retval[0]);
3712 if (dump_file)
3713 fputc ('\n', dump_file);
3715 return removed;
3718 /* Try to simplify a s{,n}printf call described by INFO with result
3719 RES by replacing it with a simpler and presumably more efficient
3720 call (such as strcpy). */
3722 static bool
3723 try_simplify_call (gimple_stmt_iterator *gsi,
3724 const sprintf_dom_walker::call_info &info,
3725 const format_result &res)
3727 unsigned HOST_WIDE_INT dummy[2];
3728 if (!is_call_safe (info, res, info.retval_used (), dummy))
3729 return false;
3731 switch (info.fncode)
3733 case BUILT_IN_SNPRINTF:
3734 return gimple_fold_builtin_snprintf (gsi);
3736 case BUILT_IN_SPRINTF:
3737 return gimple_fold_builtin_sprintf (gsi);
3739 default:
3743 return false;
3746 /* Return the zero-based index of the format string argument of a printf
3747 like function and set *IDX_ARGS to the first format argument. When
3748 no such index exists return UINT_MAX. */
3750 static unsigned
3751 get_user_idx_format (tree fndecl, unsigned *idx_args)
3753 tree attrs = lookup_attribute ("format", DECL_ATTRIBUTES (fndecl));
3754 if (!attrs)
3755 attrs = lookup_attribute ("format", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
3757 if (!attrs)
3758 return UINT_MAX;
3760 attrs = TREE_VALUE (attrs);
3762 tree archetype = TREE_VALUE (attrs);
3763 if (strcmp ("printf", IDENTIFIER_POINTER (archetype)))
3764 return UINT_MAX;
3766 attrs = TREE_CHAIN (attrs);
3767 tree fmtarg = TREE_VALUE (attrs);
3769 attrs = TREE_CHAIN (attrs);
3770 tree elliparg = TREE_VALUE (attrs);
3772 /* Attribute argument indices are 1-based but we use zero-based. */
3773 *idx_args = tree_to_uhwi (elliparg) - 1;
3774 return tree_to_uhwi (fmtarg) - 1;
3777 /* Determine if a GIMPLE CALL is to one of the sprintf-like built-in
3778 functions and if so, handle it. Return true if the call is removed
3779 and gsi_next should not be performed in the caller. */
3781 bool
3782 sprintf_dom_walker::handle_gimple_call (gimple_stmt_iterator *gsi)
3784 call_info info = call_info ();
3786 info.callstmt = gsi_stmt (*gsi);
3787 info.func = gimple_call_fndecl (info.callstmt);
3788 if (!info.func)
3789 return false;
3791 info.fncode = DECL_FUNCTION_CODE (info.func);
3793 /* Format string argument number (valid for all functions). */
3794 unsigned idx_format = UINT_MAX;
3795 if (!gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
3797 unsigned idx_args;
3798 idx_format = get_user_idx_format (info.func, &idx_args);
3799 if (idx_format == UINT_MAX)
3800 return false;
3801 info.argidx = idx_args;
3804 /* The size of the destination as in snprintf(dest, size, ...). */
3805 unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
3807 /* The size of the destination determined by __builtin_object_size. */
3808 unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
3810 /* Zero-based buffer size argument number (snprintf and vsnprintf). */
3811 unsigned idx_dstsize = UINT_MAX;
3813 /* Object size argument number (snprintf_chk and vsnprintf_chk). */
3814 unsigned idx_objsize = UINT_MAX;
3816 /* Destinaton argument number (valid for sprintf functions only). */
3817 unsigned idx_dstptr = 0;
3819 switch (info.fncode)
3821 case BUILT_IN_NONE:
3822 // User-defined function with attribute format (printf).
3823 idx_dstptr = -1;
3824 break;
3826 case BUILT_IN_FPRINTF:
3827 // Signature:
3828 // __builtin_fprintf (FILE*, format, ...)
3829 idx_format = 1;
3830 info.argidx = 2;
3831 idx_dstptr = -1;
3832 break;
3834 case BUILT_IN_FPRINTF_CHK:
3835 // Signature:
3836 // __builtin_fprintf_chk (FILE*, ost, format, ...)
3837 idx_format = 2;
3838 info.argidx = 3;
3839 idx_dstptr = -1;
3840 break;
3842 case BUILT_IN_FPRINTF_UNLOCKED:
3843 // Signature:
3844 // __builtin_fprintf_unnlocked (FILE*, format, ...)
3845 idx_format = 1;
3846 info.argidx = 2;
3847 idx_dstptr = -1;
3848 break;
3850 case BUILT_IN_PRINTF:
3851 // Signature:
3852 // __builtin_printf (format, ...)
3853 idx_format = 0;
3854 info.argidx = 1;
3855 idx_dstptr = -1;
3856 break;
3858 case BUILT_IN_PRINTF_CHK:
3859 // Signature:
3860 // __builtin_printf_chk (it, format, ...)
3861 idx_format = 1;
3862 info.argidx = 2;
3863 idx_dstptr = -1;
3864 break;
3866 case BUILT_IN_PRINTF_UNLOCKED:
3867 // Signature:
3868 // __builtin_printf (format, ...)
3869 idx_format = 0;
3870 info.argidx = 1;
3871 idx_dstptr = -1;
3872 break;
3874 case BUILT_IN_SPRINTF:
3875 // Signature:
3876 // __builtin_sprintf (dst, format, ...)
3877 idx_format = 1;
3878 info.argidx = 2;
3879 break;
3881 case BUILT_IN_SPRINTF_CHK:
3882 // Signature:
3883 // __builtin___sprintf_chk (dst, ost, objsize, format, ...)
3884 idx_objsize = 2;
3885 idx_format = 3;
3886 info.argidx = 4;
3887 break;
3889 case BUILT_IN_SNPRINTF:
3890 // Signature:
3891 // __builtin_snprintf (dst, size, format, ...)
3892 idx_dstsize = 1;
3893 idx_format = 2;
3894 info.argidx = 3;
3895 info.bounded = true;
3896 break;
3898 case BUILT_IN_SNPRINTF_CHK:
3899 // Signature:
3900 // __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
3901 idx_dstsize = 1;
3902 idx_objsize = 3;
3903 idx_format = 4;
3904 info.argidx = 5;
3905 info.bounded = true;
3906 break;
3908 case BUILT_IN_VFPRINTF:
3909 // Signature:
3910 // __builtin_vprintf (FILE*, format, va_list)
3911 idx_format = 1;
3912 info.argidx = -1;
3913 idx_dstptr = -1;
3914 break;
3916 case BUILT_IN_VFPRINTF_CHK:
3917 // Signature:
3918 // __builtin___vfprintf_chk (FILE*, ost, format, va_list)
3919 idx_format = 2;
3920 info.argidx = -1;
3921 idx_dstptr = -1;
3922 break;
3924 case BUILT_IN_VPRINTF:
3925 // Signature:
3926 // __builtin_vprintf (format, va_list)
3927 idx_format = 0;
3928 info.argidx = -1;
3929 idx_dstptr = -1;
3930 break;
3932 case BUILT_IN_VPRINTF_CHK:
3933 // Signature:
3934 // __builtin___vprintf_chk (ost, format, va_list)
3935 idx_format = 1;
3936 info.argidx = -1;
3937 idx_dstptr = -1;
3938 break;
3940 case BUILT_IN_VSNPRINTF:
3941 // Signature:
3942 // __builtin_vsprintf (dst, size, format, va)
3943 idx_dstsize = 1;
3944 idx_format = 2;
3945 info.argidx = -1;
3946 info.bounded = true;
3947 break;
3949 case BUILT_IN_VSNPRINTF_CHK:
3950 // Signature:
3951 // __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
3952 idx_dstsize = 1;
3953 idx_objsize = 3;
3954 idx_format = 4;
3955 info.argidx = -1;
3956 info.bounded = true;
3957 break;
3959 case BUILT_IN_VSPRINTF:
3960 // Signature:
3961 // __builtin_vsprintf (dst, format, va)
3962 idx_format = 1;
3963 info.argidx = -1;
3964 break;
3966 case BUILT_IN_VSPRINTF_CHK:
3967 // Signature:
3968 // __builtin___vsprintf_chk (dst, ost, objsize, format, va)
3969 idx_format = 3;
3970 idx_objsize = 2;
3971 info.argidx = -1;
3972 break;
3974 default:
3975 return false;
3978 /* Set the global warning level for this function. */
3979 warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
3981 /* For all string functions the first argument is a pointer to
3982 the destination. */
3983 tree dstptr = (idx_dstptr < gimple_call_num_args (info.callstmt)
3984 ? gimple_call_arg (info.callstmt, 0) : NULL_TREE);
3986 info.format = gimple_call_arg (info.callstmt, idx_format);
3988 /* True when the destination size is constant as opposed to the lower
3989 or upper bound of a range. */
3990 bool dstsize_cst_p = true;
3992 if (idx_dstsize == UINT_MAX)
3994 /* For non-bounded functions like sprintf, determine the size
3995 of the destination from the object or pointer passed to it
3996 as the first argument. */
3997 dstsize = get_destination_size (dstptr);
3999 else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
4001 /* For bounded functions try to get the size argument. */
4003 if (TREE_CODE (size) == INTEGER_CST)
4005 dstsize = tree_to_uhwi (size);
4006 /* No object can be larger than SIZE_MAX bytes (half the address
4007 space) on the target.
4008 The functions are defined only for output of at most INT_MAX
4009 bytes. Specifying a bound in excess of that limit effectively
4010 defeats the bounds checking (and on some implementations such
4011 as Solaris cause the function to fail with EINVAL). */
4012 if (dstsize > target_size_max () / 2)
4014 /* Avoid warning if -Wstringop-overflow is specified since
4015 it also warns for the same thing though only for the
4016 checking built-ins. */
4017 if ((idx_objsize == UINT_MAX
4018 || !warn_stringop_overflow))
4019 warning_at (gimple_location (info.callstmt), info.warnopt (),
4020 "specified bound %wu exceeds maximum object size "
4021 "%wu",
4022 dstsize, target_size_max () / 2);
4024 else if (dstsize > target_int_max ())
4025 warning_at (gimple_location (info.callstmt), info.warnopt (),
4026 "specified bound %wu exceeds %<INT_MAX%>",
4027 dstsize);
4029 else if (TREE_CODE (size) == SSA_NAME)
4031 /* Try to determine the range of values of the argument
4032 and use the greater of the two at level 1 and the smaller
4033 of them at level 2. */
4034 value_range *vr = evrp_range_analyzer.get_value_range (size);
4035 if (range_int_cst_p (vr))
4036 dstsize = (warn_level < 2
4037 ? TREE_INT_CST_LOW (vr->max ())
4038 : TREE_INT_CST_LOW (vr->min ()));
4040 /* The destination size is not constant. If the function is
4041 bounded (e.g., snprintf) a lower bound of zero doesn't
4042 necessarily imply it can be eliminated. */
4043 dstsize_cst_p = false;
4047 if (idx_objsize != UINT_MAX)
4048 if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
4049 if (tree_fits_uhwi_p (size))
4050 objsize = tree_to_uhwi (size);
4052 if (info.bounded && !dstsize)
4054 /* As a special case, when the explicitly specified destination
4055 size argument (to a bounded function like snprintf) is zero
4056 it is a request to determine the number of bytes on output
4057 without actually producing any. Pretend the size is
4058 unlimited in this case. */
4059 info.objsize = HOST_WIDE_INT_MAX;
4060 info.nowrite = dstsize_cst_p;
4062 else
4064 /* For calls to non-bounded functions or to those of bounded
4065 functions with a non-zero size, warn if the destination
4066 pointer is null. */
4067 if (dstptr && integer_zerop (dstptr))
4069 /* This is diagnosed with -Wformat only when the null is a constant
4070 pointer. The warning here diagnoses instances where the pointer
4071 is not constant. */
4072 location_t loc = gimple_location (info.callstmt);
4073 warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
4074 info.warnopt (), "%Gnull destination pointer",
4075 info.callstmt);
4076 return false;
4079 /* Set the object size to the smaller of the two arguments
4080 of both have been specified and they're not equal. */
4081 info.objsize = dstsize < objsize ? dstsize : objsize;
4083 if (info.bounded
4084 && dstsize < target_size_max () / 2 && objsize < dstsize
4085 /* Avoid warning if -Wstringop-overflow is specified since
4086 it also warns for the same thing though only for the
4087 checking built-ins. */
4088 && (idx_objsize == UINT_MAX
4089 || !warn_stringop_overflow))
4091 warning_at (gimple_location (info.callstmt), info.warnopt (),
4092 "specified bound %wu exceeds the size %wu "
4093 "of the destination object", dstsize, objsize);
4097 /* Determine if the format argument may be null and warn if not
4098 and if the argument is null. */
4099 if (integer_zerop (info.format)
4100 && gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4102 location_t loc = gimple_location (info.callstmt);
4103 warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4104 info.warnopt (), "%Gnull format string",
4105 info.callstmt);
4106 return false;
4109 info.fmtstr = get_format_string (info.format, &info.fmtloc);
4110 if (!info.fmtstr)
4111 return false;
4113 /* The result is the number of bytes output by the formatted function,
4114 including the terminating NUL. */
4115 format_result res = format_result ();
4117 /* I/O functions with no destination argument (i.e., all forms of fprintf
4118 and printf) may fail under any conditions. Others (i.e., all forms of
4119 sprintf) may only fail under specific conditions determined for each
4120 directive. Clear POSUNDER4K for the former set of functions and set
4121 it to true for the latter (it can only be cleared later, but it is
4122 never set to true again). */
4123 res.posunder4k = dstptr;
4125 bool success = compute_format_length (info, &res);
4126 if (res.warned)
4127 gimple_set_no_warning (info.callstmt, true);
4129 /* When optimizing and the printf return value optimization is enabled,
4130 attempt to substitute the computed result for the return value of
4131 the call. Avoid this optimization when -frounding-math is in effect
4132 and the format string contains a floating point directive. */
4133 bool call_removed = false;
4134 if (success && optimize > 0)
4136 /* Save a copy of the iterator pointing at the call. The iterator
4137 may change to point past the call in try_substitute_return_value
4138 but the original value is needed in try_simplify_call. */
4139 gimple_stmt_iterator gsi_call = *gsi;
4141 if (flag_printf_return_value
4142 && (!flag_rounding_math || !res.floating))
4143 call_removed = try_substitute_return_value (gsi, info, res);
4145 if (!call_removed)
4146 try_simplify_call (&gsi_call, info, res);
4149 return call_removed;
4152 edge
4153 sprintf_dom_walker::before_dom_children (basic_block bb)
4155 evrp_range_analyzer.enter (bb);
4156 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); )
4158 /* Iterate over statements, looking for function calls. */
4159 gimple *stmt = gsi_stmt (si);
4161 /* First record ranges generated by this statement. */
4162 evrp_range_analyzer.record_ranges_from_stmt (stmt, false);
4164 if (is_gimple_call (stmt) && handle_gimple_call (&si))
4165 /* If handle_gimple_call returns true, the iterator is
4166 already pointing to the next statement. */
4167 continue;
4169 gsi_next (&si);
4171 return NULL;
4174 void
4175 sprintf_dom_walker::after_dom_children (basic_block bb)
4177 evrp_range_analyzer.leave (bb);
4180 /* Execute the pass for function FUN. */
4182 unsigned int
4183 pass_sprintf_length::execute (function *fun)
4185 init_target_to_host_charmap ();
4187 calculate_dominance_info (CDI_DOMINATORS);
4189 sprintf_dom_walker sprintf_dom_walker;
4190 sprintf_dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
4192 /* Clean up object size info. */
4193 fini_object_sizes ();
4194 return 0;
4197 } /* Unnamed namespace. */
4199 /* Return a pointer to a pass object newly constructed from the context
4200 CTXT. */
4202 gimple_opt_pass *
4203 make_pass_sprintf_length (gcc::context *ctxt)
4205 return new pass_sprintf_length (ctxt);