aix: Fix _STDC_FORMAT_MACROS in inttypes.h [PR97044]
[official-gcc.git] / gcc / gimple-ssa-sprintf.c
blob70b031fe7b94c656fef11a00f94598eb4fbe4554
1 /* Copyright (C) 2016-2020 Free Software Foundation, Inc.
2 Contributed by Martin Sebor <msebor@redhat.com>.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the printf-return-value pass. The pass does
21 two things: 1) it analyzes calls to formatted output functions like
22 sprintf looking for possible buffer overflows and calls to bounded
23 functions like snprintf for early truncation (and under the control
24 of the -Wformat-length option issues warnings), and 2) under the
25 control of the -fprintf-return-value option it folds the return
26 value of safe calls into constants, making it possible to eliminate
27 code that depends on the value of those constants.
29 For all functions (bounded or not) the pass uses the size of the
30 destination object. That means that it will diagnose calls to
31 snprintf not on the basis of the size specified by the function's
32 second argument but rather on the basis of the size the first
33 argument points to (if possible). For bound-checking built-ins
34 like __builtin___snprintf_chk the pass uses the size typically
35 determined by __builtin_object_size and passed to the built-in
36 by the Glibc inline wrapper.
38 The pass handles all forms standard sprintf format directives,
39 including character, integer, floating point, pointer, and strings,
40 with the standard C flags, widths, and precisions. For integers
41 and strings it computes the length of output itself. For floating
42 point it uses MPFR to format known constants with up and down
43 rounding and uses the resulting range of output lengths. For
44 strings it uses the length of string literals and the sizes of
45 character arrays that a character pointer may point to as a bound
46 on the longest string. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "tree-cfg.h"
64 #include "tree-ssa-propagate.h"
65 #include "calls.h"
66 #include "cfgloop.h"
67 #include "tree-scalar-evolution.h"
68 #include "tree-ssa-loop.h"
69 #include "intl.h"
70 #include "langhooks.h"
72 #include "attribs.h"
73 #include "builtins.h"
74 #include "stor-layout.h"
76 #include "realmpfr.h"
77 #include "target.h"
79 #include "cpplib.h"
80 #include "input.h"
81 #include "toplev.h"
82 #include "substring-locations.h"
83 #include "diagnostic.h"
84 #include "domwalk.h"
85 #include "alloc-pool.h"
86 #include "vr-values.h"
87 #include "tree-ssa-strlen.h"
88 #include "tree-dfa.h"
90 /* The likely worst case value of MB_LEN_MAX for the target, large enough
91 for UTF-8. Ideally, this would be obtained by a target hook if it were
92 to be used for optimization but it's good enough as is for warnings. */
93 #define target_mb_len_max() 6
95 /* The maximum number of bytes a single non-string directive can result
96 in. This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
97 LDBL_MAX_10_EXP of 4932. */
98 #define IEEE_MAX_10_EXP 4932
99 #define target_dir_max() (target_int_max () + IEEE_MAX_10_EXP + 2)
101 namespace {
103 /* Set to the warning level for the current function which is equal
104 either to warn_format_trunc for bounded functions or to
105 warn_format_overflow otherwise. */
107 static int warn_level;
109 /* The minimum, maximum, likely, and unlikely maximum number of bytes
110 of output either a formatting function or an individual directive
111 can result in. */
113 struct result_range
115 /* The absolute minimum number of bytes. The result of a successful
116 conversion is guaranteed to be no less than this. (An erroneous
117 conversion can be indicated by MIN > HOST_WIDE_INT_MAX.) */
118 unsigned HOST_WIDE_INT min;
119 /* The likely maximum result that is used in diagnostics. In most
120 cases MAX is the same as the worst case UNLIKELY result. */
121 unsigned HOST_WIDE_INT max;
122 /* The likely result used to trigger diagnostics. For conversions
123 that result in a range of bytes [MIN, MAX], LIKELY is somewhere
124 in that range. */
125 unsigned HOST_WIDE_INT likely;
126 /* In rare cases (e.g., for multibyte characters) UNLIKELY gives
127 the worst cases maximum result of a directive. In most cases
128 UNLIKELY == MAX. UNLIKELY is used to control the return value
129 optimization but not in diagnostics. */
130 unsigned HOST_WIDE_INT unlikely;
133 /* Return the value of INT_MIN for the target. */
135 static inline HOST_WIDE_INT
136 target_int_min ()
138 return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
141 /* Return the value of INT_MAX for the target. */
143 static inline unsigned HOST_WIDE_INT
144 target_int_max ()
146 return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
149 /* Return the value of SIZE_MAX for the target. */
151 static inline unsigned HOST_WIDE_INT
152 target_size_max ()
154 return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
157 /* A straightforward mapping from the execution character set to the host
158 character set indexed by execution character. */
160 static char target_to_host_charmap[256];
162 /* Initialize a mapping from the execution character set to the host
163 character set. */
165 static bool
166 init_target_to_host_charmap ()
168 /* If the percent sign is non-zero the mapping has already been
169 initialized. */
170 if (target_to_host_charmap['%'])
171 return true;
173 /* Initialize the target_percent character (done elsewhere). */
174 if (!init_target_chars ())
175 return false;
177 /* The subset of the source character set used by printf conversion
178 specifications (strictly speaking, not all letters are used but
179 they are included here for the sake of simplicity). The dollar
180 sign must be included even though it's not in the basic source
181 character set. */
182 const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
183 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
185 /* Set the mapping for all characters to some ordinary value (i,e.,
186 not none used in printf conversion specifications) and overwrite
187 those that are used by conversion specifications with their
188 corresponding values. */
189 memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
191 /* Are the two sets of characters the same? */
192 bool all_same_p = true;
194 for (const char *pc = srcset; *pc; ++pc)
196 /* Slice off the high end bits in case target characters are
197 signed. All values are expected to be non-nul, otherwise
198 there's a problem. */
199 if (unsigned char tc = lang_hooks.to_target_charset (*pc))
201 target_to_host_charmap[tc] = *pc;
202 if (tc != *pc)
203 all_same_p = false;
205 else
206 return false;
210 /* Set the first element to a non-zero value if the mapping
211 is 1-to-1, otherwise leave it clear (NUL is assumed to be
212 the same in both character sets). */
213 target_to_host_charmap[0] = all_same_p;
215 return true;
218 /* Return the host source character corresponding to the character
219 CH in the execution character set if one exists, or some innocuous
220 (non-special, non-nul) source character otherwise. */
222 static inline unsigned char
223 target_to_host (unsigned char ch)
225 return target_to_host_charmap[ch];
228 /* Convert an initial substring of the string TARGSTR consisting of
229 characters in the execution character set into a string in the
230 source character set on the host and store up to HOSTSZ characters
231 in the buffer pointed to by HOSTR. Return HOSTR. */
233 static const char*
234 target_to_host (char *hostr, size_t hostsz, const char *targstr)
236 /* Make sure the buffer is reasonably big. */
237 gcc_assert (hostsz > 4);
239 /* The interesting subset of source and execution characters are
240 the same so no conversion is necessary. However, truncate
241 overlong strings just like the translated strings are. */
242 if (target_to_host_charmap['\0'] == 1)
244 size_t len = strlen (targstr);
245 if (len >= hostsz)
247 memcpy (hostr, targstr, hostsz - 4);
248 strcpy (hostr + hostsz - 4, "...");
250 else
251 memcpy (hostr, targstr, len + 1);
252 return hostr;
255 /* Convert the initial substring of TARGSTR to the corresponding
256 characters in the host set, appending "..." if TARGSTR is too
257 long to fit. Using the static buffer assumes the function is
258 not called in between sequence points (which it isn't). */
259 for (char *ph = hostr; ; ++targstr)
261 *ph++ = target_to_host (*targstr);
262 if (!*targstr)
263 break;
265 if (size_t (ph - hostr) == hostsz)
267 strcpy (ph - 4, "...");
268 break;
272 return hostr;
275 /* Convert the sequence of decimal digits in the execution character
276 starting at *PS to a HOST_WIDE_INT, analogously to strtol. Return
277 the result and set *PS to one past the last converted character.
278 On range error set ERANGE to the digit that caused it. */
280 static inline HOST_WIDE_INT
281 target_strtowi (const char **ps, const char **erange)
283 unsigned HOST_WIDE_INT val = 0;
284 for ( ; ; ++*ps)
286 unsigned char c = target_to_host (**ps);
287 if (ISDIGIT (c))
289 c -= '0';
291 /* Check for overflow. */
292 if (val > ((unsigned HOST_WIDE_INT) HOST_WIDE_INT_MAX - c) / 10LU)
294 val = HOST_WIDE_INT_MAX;
295 *erange = *ps;
297 /* Skip the remaining digits. */
299 c = target_to_host (*++*ps);
300 while (ISDIGIT (c));
301 break;
303 else
304 val = val * 10 + c;
306 else
307 break;
310 return val;
313 /* Given FORMAT, set *PLOC to the source location of the format string
314 and return the format string if it is known or null otherwise. */
316 static const char*
317 get_format_string (tree format, location_t *ploc)
319 *ploc = EXPR_LOC_OR_LOC (format, input_location);
321 return c_getstr (format);
324 /* For convenience and brevity, shorter named entrypoints of
325 format_string_diagnostic_t::emit_warning_va and
326 format_string_diagnostic_t::emit_warning_n_va.
327 These have to be functions with the attribute so that exgettext
328 works properly. */
330 static bool
331 ATTRIBUTE_GCC_DIAG (5, 6)
332 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
333 const char *corrected_substring, int opt, const char *gmsgid, ...)
335 format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
336 corrected_substring);
337 va_list ap;
338 va_start (ap, gmsgid);
339 bool warned = diag.emit_warning_va (opt, gmsgid, &ap);
340 va_end (ap);
342 return warned;
345 static bool
346 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
347 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
348 const char *corrected_substring, int opt, unsigned HOST_WIDE_INT n,
349 const char *singular_gmsgid, const char *plural_gmsgid, ...)
351 format_string_diagnostic_t diag (fmt_loc, NULL, param_loc, NULL,
352 corrected_substring);
353 va_list ap;
354 va_start (ap, plural_gmsgid);
355 bool warned = diag.emit_warning_n_va (opt, n, singular_gmsgid, plural_gmsgid,
356 &ap);
357 va_end (ap);
359 return warned;
362 /* Format length modifiers. */
364 enum format_lengths
366 FMT_LEN_none,
367 FMT_LEN_hh, // char argument
368 FMT_LEN_h, // short
369 FMT_LEN_l, // long
370 FMT_LEN_ll, // long long
371 FMT_LEN_L, // long double (and GNU long long)
372 FMT_LEN_z, // size_t
373 FMT_LEN_t, // ptrdiff_t
374 FMT_LEN_j // intmax_t
378 /* Description of the result of conversion either of a single directive
379 or the whole format string. */
381 class fmtresult
383 public:
384 /* Construct a FMTRESULT object with all counters initialized
385 to MIN. KNOWNRANGE is set when MIN is valid. */
386 fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
387 : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
388 knownrange (min < HOST_WIDE_INT_MAX),
389 mayfail (), nullp ()
391 range.min = min;
392 range.max = min;
393 range.likely = min;
394 range.unlikely = min;
397 /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
398 KNOWNRANGE is set when both MIN and MAX are valid. */
399 fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
400 unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
401 : argmin (), argmax (), dst_offset (HOST_WIDE_INT_MIN), nonstr (),
402 knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
403 mayfail (), nullp ()
405 range.min = min;
406 range.max = max;
407 range.likely = max < likely ? min : likely;
408 range.unlikely = max;
411 /* Adjust result upward to reflect the RANGE of values the specified
412 width or precision is known to be in. */
413 fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
414 tree = NULL_TREE,
415 unsigned = 0, unsigned = 0);
417 /* Return the maximum number of decimal digits a value of TYPE
418 formats as on output. */
419 static unsigned type_max_digits (tree, int);
421 /* The range a directive's argument is in. */
422 tree argmin, argmax;
424 /* The starting offset into the destination of the formatted function
425 call of the %s argument that points into (aliases with) the same
426 destination array. */
427 HOST_WIDE_INT dst_offset;
429 /* The minimum and maximum number of bytes that a directive
430 results in on output for an argument in the range above. */
431 result_range range;
433 /* Non-nul when the argument of a string directive is not a nul
434 terminated string. */
435 tree nonstr;
437 /* True when the range above is obtained from a known value of
438 a directive's argument or its bounds and not the result of
439 heuristics that depend on warning levels. */
440 bool knownrange;
442 /* True for a directive that may fail (such as wide character
443 directives). */
444 bool mayfail;
446 /* True when the argument is a null pointer. */
447 bool nullp;
450 /* Adjust result upward to reflect the range ADJUST of values the
451 specified width or precision is known to be in. When non-null,
452 TYPE denotes the type of the directive whose result is being
453 adjusted, BASE gives the base of the directive (octal, decimal,
454 or hex), and ADJ denotes the additional adjustment to the LIKELY
455 counter that may need to be added when ADJUST is a range. */
457 fmtresult&
458 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
459 tree type /* = NULL_TREE */,
460 unsigned base /* = 0 */,
461 unsigned adj /* = 0 */)
463 bool minadjusted = false;
465 /* Adjust the minimum and likely counters. */
466 if (adjust[0] >= 0)
468 if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
470 range.min = adjust[0];
471 minadjusted = true;
474 /* Adjust the likely counter. */
475 if (range.likely < range.min)
476 range.likely = range.min;
478 else if (adjust[0] == target_int_min ()
479 && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
480 knownrange = false;
482 /* Adjust the maximum counter. */
483 if (adjust[1] > 0)
485 if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
487 range.max = adjust[1];
489 /* Set KNOWNRANGE if both the minimum and maximum have been
490 adjusted. Otherwise leave it at what it was before. */
491 knownrange = minadjusted;
495 if (warn_level > 1 && type)
497 /* For large non-constant width or precision whose range spans
498 the maximum number of digits produced by the directive for
499 any argument, set the likely number of bytes to be at most
500 the number digits plus other adjustment determined by the
501 caller (one for sign or two for the hexadecimal "0x"
502 prefix). */
503 unsigned dirdigs = type_max_digits (type, base);
504 if (adjust[0] < dirdigs && dirdigs < adjust[1]
505 && range.likely < dirdigs)
506 range.likely = dirdigs + adj;
508 else if (range.likely < (range.min ? range.min : 1))
510 /* Conservatively, set LIKELY to at least MIN but no less than
511 1 unless MAX is zero. */
512 range.likely = (range.min
513 ? range.min
514 : range.max && (range.max < HOST_WIDE_INT_MAX
515 || warn_level > 1) ? 1 : 0);
518 /* Finally adjust the unlikely counter to be at least as large as
519 the maximum. */
520 if (range.unlikely < range.max)
521 range.unlikely = range.max;
523 return *this;
526 /* Return the maximum number of digits a value of TYPE formats in
527 BASE on output, not counting base prefix . */
529 unsigned
530 fmtresult::type_max_digits (tree type, int base)
532 unsigned prec = TYPE_PRECISION (type);
533 switch (base)
535 case 8:
536 return (prec + 2) / 3;
537 case 10:
538 /* Decimal approximation: yields 3, 5, 10, and 20 for precision
539 of 8, 16, 32, and 64 bits. */
540 return prec * 301 / 1000 + 1;
541 case 16:
542 return prec / 4;
545 gcc_unreachable ();
548 static bool
549 get_int_range (tree, HOST_WIDE_INT *, HOST_WIDE_INT *, bool, HOST_WIDE_INT,
550 const vr_values *);
552 struct call_info;
554 /* Description of a format directive. A directive is either a plain
555 string or a conversion specification that starts with '%'. */
557 struct directive
559 directive (const call_info *inf, unsigned dno)
560 : info (inf), dirno (dno), argno (), beg (), len (), flags (),
561 width (), prec (), modifier (), specifier (), arg (), fmtfunc ()
564 /* Reference to the info structure describing the call that this
565 directive is a part of. */
566 const call_info *info;
568 /* The 1-based directive number (for debugging). */
569 unsigned dirno;
571 /* The zero-based argument number of the directive's argument ARG in
572 the function's argument list. */
573 unsigned argno;
575 /* The first character of the directive and its length. */
576 const char *beg;
577 size_t len;
579 /* A bitmap of flags, one for each character. */
580 unsigned flags[256 / sizeof (int)];
582 /* The range of values of the specified width, or -1 if not specified. */
583 HOST_WIDE_INT width[2];
584 /* The range of values of the specified precision, or -1 if not
585 specified. */
586 HOST_WIDE_INT prec[2];
588 /* Length modifier. */
589 format_lengths modifier;
591 /* Format specifier character. */
592 char specifier;
594 /* The argument of the directive or null when the directive doesn't
595 take one or when none is available (such as for vararg functions). */
596 tree arg;
598 /* Format conversion function that given a directive and an argument
599 returns the formatting result. */
600 fmtresult (*fmtfunc) (const directive &, tree, const vr_values *);
602 /* Return True when the format flag CHR has been used. */
603 bool get_flag (char chr) const
605 unsigned char c = chr & 0xff;
606 return (flags[c / (CHAR_BIT * sizeof *flags)]
607 & (1U << (c % (CHAR_BIT * sizeof *flags))));
610 /* Make a record of the format flag CHR having been used. */
611 void set_flag (char chr)
613 unsigned char c = chr & 0xff;
614 flags[c / (CHAR_BIT * sizeof *flags)]
615 |= (1U << (c % (CHAR_BIT * sizeof *flags)));
618 /* Reset the format flag CHR. */
619 void clear_flag (char chr)
621 unsigned char c = chr & 0xff;
622 flags[c / (CHAR_BIT * sizeof *flags)]
623 &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
626 /* Set both bounds of the width range to VAL. */
627 void set_width (HOST_WIDE_INT val)
629 width[0] = width[1] = val;
632 /* Set the width range according to ARG, with both bounds being
633 no less than 0. For a constant ARG set both bounds to its value
634 or 0, whichever is greater. For a non-constant ARG in some range
635 set width to its range adjusting each bound to -1 if it's less.
636 For an indeterminate ARG set width to [0, INT_MAX]. */
637 void set_width (tree arg, const vr_values *vr)
639 get_int_range (arg, width, width + 1, true, 0, vr);
642 /* Set both bounds of the precision range to VAL. */
643 void set_precision (HOST_WIDE_INT val)
645 prec[0] = prec[1] = val;
648 /* Set the precision range according to ARG, with both bounds being
649 no less than -1. For a constant ARG set both bounds to its value
650 or -1 whichever is greater. For a non-constant ARG in some range
651 set precision to its range adjusting each bound to -1 if it's less.
652 For an indeterminate ARG set precision to [-1, INT_MAX]. */
653 void set_precision (tree arg, const vr_values *vr)
655 get_int_range (arg, prec, prec + 1, false, -1, vr);
658 /* Return true if both width and precision are known to be
659 either constant or in some range, false otherwise. */
660 bool known_width_and_precision () const
662 return ((width[1] < 0
663 || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
664 && (prec[1] < 0
665 || (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
669 /* The result of a call to a formatted function. */
671 struct format_result
673 format_result ()
674 : range (), aliases (), alias_count (), knownrange (), posunder4k (),
675 floating (), warned () { /* No-op. */ }
677 ~format_result ()
679 XDELETEVEC (aliases);
682 /* Range of characters written by the formatted function.
683 Setting the minimum to HOST_WIDE_INT_MAX disables all
684 length tracking for the remainder of the format string. */
685 result_range range;
687 struct alias_info
689 directive dir; /* The directive that aliases the destination. */
690 HOST_WIDE_INT offset; /* The offset at which it aliases it. */
691 result_range range; /* The raw result of the directive. */
694 /* An array of directives whose pointer argument aliases a part
695 of the destination object of the formatted function. */
696 alias_info *aliases;
697 unsigned alias_count;
699 /* True when the range above is obtained from known values of
700 directive arguments, or bounds on the amount of output such
701 as width and precision, and not the result of heuristics that
702 depend on warning levels. It's used to issue stricter diagnostics
703 in cases where strings of unknown lengths are bounded by the arrays
704 they are determined to refer to. KNOWNRANGE must not be used for
705 the return value optimization. */
706 bool knownrange;
708 /* True if no individual directive could fail or result in more than
709 4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be
710 greater). Implementations are not required to handle directives
711 that produce more than 4K bytes (leading to undefined behavior)
712 and so when one is found it disables the return value optimization.
713 Similarly, directives that can fail (such as wide character
714 directives) disable the optimization. */
715 bool posunder4k;
717 /* True when a floating point directive has been seen in the format
718 string. */
719 bool floating;
721 /* True when an intermediate result has caused a warning. Used to
722 avoid issuing duplicate warnings while finishing the processing
723 of a call. WARNED also disables the return value optimization. */
724 bool warned;
726 /* Preincrement the number of output characters by 1. */
727 format_result& operator++ ()
729 return *this += 1;
732 /* Postincrement the number of output characters by 1. */
733 format_result operator++ (int)
735 format_result prev (*this);
736 *this += 1;
737 return prev;
740 /* Increment the number of output characters by N. */
741 format_result& operator+= (unsigned HOST_WIDE_INT);
743 /* Add a directive to the sequence of those with potentially aliasing
744 arguments. */
745 void append_alias (const directive &, HOST_WIDE_INT, const result_range &);
747 private:
748 /* Not copyable or assignable. */
749 format_result (format_result&);
750 void operator= (format_result&);
753 format_result&
754 format_result::operator+= (unsigned HOST_WIDE_INT n)
756 gcc_assert (n < HOST_WIDE_INT_MAX);
758 if (range.min < HOST_WIDE_INT_MAX)
759 range.min += n;
761 if (range.max < HOST_WIDE_INT_MAX)
762 range.max += n;
764 if (range.likely < HOST_WIDE_INT_MAX)
765 range.likely += n;
767 if (range.unlikely < HOST_WIDE_INT_MAX)
768 range.unlikely += n;
770 return *this;
773 void
774 format_result::append_alias (const directive &d, HOST_WIDE_INT off,
775 const result_range &resrng)
777 unsigned cnt = alias_count + 1;
778 alias_info *ar = XNEWVEC (alias_info, cnt);
780 for (unsigned i = 0; i != alias_count; ++i)
781 ar[i] = aliases[i];
783 ar[alias_count].dir = d;
784 ar[alias_count].offset = off;
785 ar[alias_count].range = resrng;
787 XDELETEVEC (aliases);
789 alias_count = cnt;
790 aliases = ar;
793 /* Return the logarithm of X in BASE. */
795 static int
796 ilog (unsigned HOST_WIDE_INT x, int base)
798 int res = 0;
801 ++res;
802 x /= base;
803 } while (x);
804 return res;
807 /* Return the number of bytes resulting from converting into a string
808 the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
809 PLUS indicates whether 1 for a plus sign should be added for positive
810 numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
811 ('0x') prefix should be added for nonzero numbers. Return -1 if X cannot
812 be represented. */
814 static HOST_WIDE_INT
815 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
817 unsigned HOST_WIDE_INT absval;
819 HOST_WIDE_INT res;
821 if (TYPE_UNSIGNED (TREE_TYPE (x)))
823 if (tree_fits_uhwi_p (x))
825 absval = tree_to_uhwi (x);
826 res = plus;
828 else
829 return -1;
831 else
833 if (tree_fits_shwi_p (x))
835 HOST_WIDE_INT i = tree_to_shwi (x);
836 if (HOST_WIDE_INT_MIN == i)
838 /* Avoid undefined behavior due to negating a minimum. */
839 absval = HOST_WIDE_INT_MAX;
840 res = 1;
842 else if (i < 0)
844 absval = -i;
845 res = 1;
847 else
849 absval = i;
850 res = plus;
853 else
854 return -1;
857 int ndigs = ilog (absval, base);
859 res += prec < ndigs ? ndigs : prec;
861 /* Adjust a non-zero value for the base prefix, either hexadecimal,
862 or, unless precision has resulted in a leading zero, also octal. */
863 if (prefix && absval && (base == 16 || prec <= ndigs))
865 if (base == 8)
866 res += 1;
867 else if (base == 16)
868 res += 2;
871 return res;
874 /* Description of a call to a formatted function. */
876 struct call_info
878 /* Function call statement. */
879 gimple *callstmt;
881 /* Function called. */
882 tree func;
884 /* Called built-in function code. */
885 built_in_function fncode;
887 /* The "origin" of the destination pointer argument, which is either
888 the DECL of the destination buffer being written into or a pointer
889 that points to it, plus some offset. */
890 tree dst_origin;
892 /* For a destination pointing to a struct array member, the offset of
893 the member. */
894 HOST_WIDE_INT dst_field;
896 /* The offset into the destination buffer. */
897 HOST_WIDE_INT dst_offset;
899 /* Format argument and format string extracted from it. */
900 tree format;
901 const char *fmtstr;
903 /* The location of the format argument. */
904 location_t fmtloc;
906 /* The destination object size for __builtin___xxx_chk functions
907 typically determined by __builtin_object_size, or -1 if unknown. */
908 unsigned HOST_WIDE_INT objsize;
910 /* Number of the first variable argument. */
911 unsigned HOST_WIDE_INT argidx;
913 /* True for functions like snprintf that specify the size of
914 the destination, false for others like sprintf that don't. */
915 bool bounded;
917 /* True for bounded functions like snprintf that specify a zero-size
918 buffer as a request to compute the size of output without actually
919 writing any. NOWRITE is cleared in response to the %n directive
920 which has side-effects similar to writing output. */
921 bool nowrite;
923 /* Return true if the called function's return value is used. */
924 bool retval_used () const
926 return gimple_get_lhs (callstmt);
929 /* Return the warning option corresponding to the called function. */
930 int warnopt () const
932 return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
935 /* Return true for calls to file formatted functions. */
936 bool is_file_func () const
938 return (fncode == BUILT_IN_FPRINTF
939 || fncode == BUILT_IN_FPRINTF_CHK
940 || fncode == BUILT_IN_FPRINTF_UNLOCKED
941 || fncode == BUILT_IN_VFPRINTF
942 || fncode == BUILT_IN_VFPRINTF_CHK);
945 /* Return true for calls to string formatted functions. */
946 bool is_string_func () const
948 return (fncode == BUILT_IN_SPRINTF
949 || fncode == BUILT_IN_SPRINTF_CHK
950 || fncode == BUILT_IN_SNPRINTF
951 || fncode == BUILT_IN_SNPRINTF_CHK
952 || fncode == BUILT_IN_VSPRINTF
953 || fncode == BUILT_IN_VSPRINTF_CHK
954 || fncode == BUILT_IN_VSNPRINTF
955 || fncode == BUILT_IN_VSNPRINTF_CHK);
959 /* Return the result of formatting a no-op directive (such as '%n'). */
961 static fmtresult
962 format_none (const directive &, tree, const vr_values *)
964 fmtresult res (0);
965 return res;
968 /* Return the result of formatting the '%%' directive. */
970 static fmtresult
971 format_percent (const directive &, tree, const vr_values *)
973 fmtresult res (1);
974 return res;
978 /* Compute intmax_type_node and uintmax_type_node similarly to how
979 tree.c builds size_type_node. */
981 static void
982 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
984 if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
986 *pintmax = integer_type_node;
987 *puintmax = unsigned_type_node;
989 else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
991 *pintmax = long_integer_type_node;
992 *puintmax = long_unsigned_type_node;
994 else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
996 *pintmax = long_long_integer_type_node;
997 *puintmax = long_long_unsigned_type_node;
999 else
1001 for (int i = 0; i < NUM_INT_N_ENTS; i++)
1002 if (int_n_enabled_p[i])
1004 char name[50], altname[50];
1005 sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
1006 sprintf (altname, "__int%d__ unsigned", int_n_data[i].bitsize);
1008 if (strcmp (name, UINTMAX_TYPE) == 0
1009 || strcmp (altname, UINTMAX_TYPE) == 0)
1011 *pintmax = int_n_trees[i].signed_type;
1012 *puintmax = int_n_trees[i].unsigned_type;
1013 return;
1016 gcc_unreachable ();
1020 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1021 in and that is representable in type int.
1022 Return true when the range is a subrange of that of int.
1023 When ARG is null it is as if it had the full range of int.
1024 When ABSOLUTE is true the range reflects the absolute value of
1025 the argument. When ABSOLUTE is false, negative bounds of
1026 the determined range are replaced with NEGBOUND. */
1028 static bool
1029 get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1030 bool absolute, HOST_WIDE_INT negbound,
1031 const class vr_values *vr_values)
1033 /* The type of the result. */
1034 const_tree type = integer_type_node;
1036 bool knownrange = false;
1038 if (!arg)
1040 *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1041 *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1043 else if (TREE_CODE (arg) == INTEGER_CST
1044 && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1046 /* For a constant argument return its value adjusted as specified
1047 by NEGATIVE and NEGBOUND and return true to indicate that the
1048 result is known. */
1049 *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1050 *pmax = *pmin;
1051 knownrange = true;
1053 else
1055 /* True if the argument's range cannot be determined. */
1056 bool unknown = true;
1058 tree argtype = TREE_TYPE (arg);
1060 /* Ignore invalid arguments with greater precision that that
1061 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1062 They will have been detected and diagnosed by -Wformat and
1063 so it's not important to complicate this code to try to deal
1064 with them again. */
1065 if (TREE_CODE (arg) == SSA_NAME
1066 && INTEGRAL_TYPE_P (argtype)
1067 && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1069 /* Try to determine the range of values of the integer argument. */
1070 const value_range_equiv *vr
1071 = CONST_CAST (class vr_values *, vr_values)->get_value_range (arg);
1073 if (!vr->undefined_p () && !vr->varying_p () && !vr->symbolic_p ())
1075 HOST_WIDE_INT type_min
1076 = (TYPE_UNSIGNED (argtype)
1077 ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1078 : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1080 HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1082 tree type = TREE_TYPE (arg);
1083 tree tmin = wide_int_to_tree (type, vr->lower_bound ());
1084 tree tmax = wide_int_to_tree (type, vr->upper_bound ());
1085 *pmin = TREE_INT_CST_LOW (tmin);
1086 *pmax = TREE_INT_CST_LOW (tmax);
1088 if (*pmin < *pmax)
1090 /* Return true if the adjusted range is a subrange of
1091 the full range of the argument's type. *PMAX may
1092 be less than *PMIN when the argument is unsigned
1093 and its upper bound is in excess of TYPE_MAX. In
1094 that (invalid) case disregard the range and use that
1095 of the expected type instead. */
1096 knownrange = type_min < *pmin || *pmax < type_max;
1098 unknown = false;
1103 /* Handle an argument with an unknown range as if none had been
1104 provided. */
1105 if (unknown)
1106 return get_int_range (NULL_TREE, pmin, pmax, absolute,
1107 negbound, vr_values);
1110 /* Adjust each bound as specified by ABSOLUTE and NEGBOUND. */
1111 if (absolute)
1113 if (*pmin < 0)
1115 if (*pmin == *pmax)
1116 *pmin = *pmax = -*pmin;
1117 else
1119 /* Make sure signed overlow is avoided. */
1120 gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1122 HOST_WIDE_INT tmp = -*pmin;
1123 *pmin = 0;
1124 if (*pmax < tmp)
1125 *pmax = tmp;
1129 else if (*pmin < negbound)
1130 *pmin = negbound;
1132 return knownrange;
1135 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1136 argument, due to the conversion from either *ARGMIN or *ARGMAX to
1137 the type of the directive's formal argument it's possible for both
1138 to result in the same number of bytes or a range of bytes that's
1139 less than the number of bytes that would result from formatting
1140 some other value in the range [*ARGMIN, *ARGMAX]. This can be
1141 determined by checking for the actual argument being in the range
1142 of the type of the directive. If it isn't it must be assumed to
1143 take on the full range of the directive's type.
1144 Return true when the range has been adjusted to the full range
1145 of DIRTYPE, and false otherwise. */
1147 static bool
1148 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1150 tree argtype = TREE_TYPE (*argmin);
1151 unsigned argprec = TYPE_PRECISION (argtype);
1152 unsigned dirprec = TYPE_PRECISION (dirtype);
1154 /* If the actual argument and the directive's argument have the same
1155 precision and sign there can be no overflow and so there is nothing
1156 to adjust. */
1157 if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1158 return false;
1160 /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1161 branch in the extract_range_from_unary_expr function in tree-vrp.c. */
1163 if (TREE_CODE (*argmin) == INTEGER_CST
1164 && TREE_CODE (*argmax) == INTEGER_CST
1165 && (dirprec >= argprec
1166 || integer_zerop (int_const_binop (RSHIFT_EXPR,
1167 int_const_binop (MINUS_EXPR,
1168 *argmax,
1169 *argmin),
1170 size_int (dirprec)))))
1172 *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1173 *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1175 /* If *ARGMIN is still less than *ARGMAX the conversion above
1176 is safe. Otherwise, it has overflowed and would be unsafe. */
1177 if (tree_int_cst_le (*argmin, *argmax))
1178 return false;
1181 *argmin = TYPE_MIN_VALUE (dirtype);
1182 *argmax = TYPE_MAX_VALUE (dirtype);
1183 return true;
1186 /* Return a range representing the minimum and maximum number of bytes
1187 that the format directive DIR will output for any argument given
1188 the WIDTH and PRECISION (extracted from DIR). This function is
1189 used when the directive argument or its value isn't known. */
1191 static fmtresult
1192 format_integer (const directive &dir, tree arg, const vr_values *vr_values)
1194 tree intmax_type_node;
1195 tree uintmax_type_node;
1197 /* Base to format the number in. */
1198 int base;
1200 /* True when a conversion is preceded by a prefix indicating the base
1201 of the argument (octal or hexadecimal). */
1202 bool maybebase = dir.get_flag ('#');
1204 /* True when a signed conversion is preceded by a sign or space. */
1205 bool maybesign = false;
1207 /* True for signed conversions (i.e., 'd' and 'i'). */
1208 bool sign = false;
1210 switch (dir.specifier)
1212 case 'd':
1213 case 'i':
1214 /* Space and '+' are only meaningful for signed conversions. */
1215 maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1216 sign = true;
1217 base = 10;
1218 break;
1219 case 'u':
1220 base = 10;
1221 break;
1222 case 'o':
1223 base = 8;
1224 break;
1225 case 'X':
1226 case 'x':
1227 base = 16;
1228 break;
1229 default:
1230 gcc_unreachable ();
1233 /* The type of the "formal" argument expected by the directive. */
1234 tree dirtype = NULL_TREE;
1236 /* Determine the expected type of the argument from the length
1237 modifier. */
1238 switch (dir.modifier)
1240 case FMT_LEN_none:
1241 if (dir.specifier == 'p')
1242 dirtype = ptr_type_node;
1243 else
1244 dirtype = sign ? integer_type_node : unsigned_type_node;
1245 break;
1247 case FMT_LEN_h:
1248 dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1249 break;
1251 case FMT_LEN_hh:
1252 dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1253 break;
1255 case FMT_LEN_l:
1256 dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1257 break;
1259 case FMT_LEN_L:
1260 case FMT_LEN_ll:
1261 dirtype = (sign
1262 ? long_long_integer_type_node
1263 : long_long_unsigned_type_node);
1264 break;
1266 case FMT_LEN_z:
1267 dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1268 break;
1270 case FMT_LEN_t:
1271 dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1272 break;
1274 case FMT_LEN_j:
1275 build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1276 dirtype = sign ? intmax_type_node : uintmax_type_node;
1277 break;
1279 default:
1280 return fmtresult ();
1283 /* The type of the argument to the directive, either deduced from
1284 the actual non-constant argument if one is known, or from
1285 the directive itself when none has been provided because it's
1286 a va_list. */
1287 tree argtype = NULL_TREE;
1289 if (!arg)
1291 /* When the argument has not been provided, use the type of
1292 the directive's argument as an approximation. This will
1293 result in false positives for directives like %i with
1294 arguments with smaller precision (such as short or char). */
1295 argtype = dirtype;
1297 else if (TREE_CODE (arg) == INTEGER_CST)
1299 /* When a constant argument has been provided use its value
1300 rather than type to determine the length of the output. */
1301 fmtresult res;
1303 if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1305 /* As a special case, a precision of zero with a zero argument
1306 results in zero bytes except in base 8 when the '#' flag is
1307 specified, and for signed conversions in base 8 and 10 when
1308 either the space or '+' flag has been specified and it results
1309 in just one byte (with width having the normal effect). This
1310 must extend to the case of a specified precision with
1311 an unknown value because it can be zero. */
1312 res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1313 if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1315 res.range.max = 1;
1316 res.range.likely = 1;
1318 else
1320 res.range.max = res.range.min;
1321 res.range.likely = res.range.min;
1324 else
1326 /* Convert the argument to the type of the directive. */
1327 arg = fold_convert (dirtype, arg);
1329 res.range.min = tree_digits (arg, base, dir.prec[0],
1330 maybesign, maybebase);
1331 if (dir.prec[0] == dir.prec[1])
1332 res.range.max = res.range.min;
1333 else
1334 res.range.max = tree_digits (arg, base, dir.prec[1],
1335 maybesign, maybebase);
1336 res.range.likely = res.range.min;
1337 res.knownrange = true;
1340 res.range.unlikely = res.range.max;
1342 /* Bump up the counters if WIDTH is greater than LEN. */
1343 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1344 (sign | maybebase) + (base == 16));
1345 /* Bump up the counters again if PRECision is greater still. */
1346 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1347 (sign | maybebase) + (base == 16));
1349 return res;
1351 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1352 || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1353 /* Determine the type of the provided non-constant argument. */
1354 argtype = TREE_TYPE (arg);
1355 else
1356 /* Don't bother with invalid arguments since they likely would
1357 have already been diagnosed, and disable any further checking
1358 of the format string by returning [-1, -1]. */
1359 return fmtresult ();
1361 fmtresult res;
1363 /* Using either the range the non-constant argument is in, or its
1364 type (either "formal" or actual), create a range of values that
1365 constrain the length of output given the warning level. */
1366 tree argmin = NULL_TREE;
1367 tree argmax = NULL_TREE;
1369 if (arg
1370 && TREE_CODE (arg) == SSA_NAME
1371 && INTEGRAL_TYPE_P (argtype))
1373 /* Try to determine the range of values of the integer argument
1374 (range information is not available for pointers). */
1375 const value_range_equiv *vr
1376 = CONST_CAST (class vr_values *, vr_values)->get_value_range (arg);
1378 if (!vr->varying_p () && !vr->undefined_p () && !vr->symbolic_p ())
1380 argmin = wide_int_to_tree (TREE_TYPE (arg), vr->lower_bound ());
1381 argmax = wide_int_to_tree (TREE_TYPE (arg), vr->upper_bound ());
1383 /* Set KNOWNRANGE if the argument is in a known subrange
1384 of the directive's type and neither width nor precision
1385 is unknown. (KNOWNRANGE may be reset below). */
1386 res.knownrange
1387 = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1388 || !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1389 && dir.known_width_and_precision ());
1391 res.argmin = argmin;
1392 res.argmax = argmax;
1394 else
1396 /* The argument here may be the result of promoting the actual
1397 argument to int. Try to determine the type of the actual
1398 argument before promotion and narrow down its range that
1399 way. */
1400 gimple *def = SSA_NAME_DEF_STMT (arg);
1401 if (is_gimple_assign (def))
1403 tree_code code = gimple_assign_rhs_code (def);
1404 if (code == INTEGER_CST)
1406 arg = gimple_assign_rhs1 (def);
1407 return format_integer (dir, arg, vr_values);
1410 if (code == NOP_EXPR)
1412 tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1413 if (INTEGRAL_TYPE_P (type)
1414 || TREE_CODE (type) == POINTER_TYPE)
1415 argtype = type;
1421 if (!argmin)
1423 if (TREE_CODE (argtype) == POINTER_TYPE)
1425 argmin = build_int_cst (pointer_sized_int_node, 0);
1426 argmax = build_all_ones_cst (pointer_sized_int_node);
1428 else
1430 argmin = TYPE_MIN_VALUE (argtype);
1431 argmax = TYPE_MAX_VALUE (argtype);
1435 /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1436 of the directive. If it has been cleared then since ARGMIN and/or
1437 ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1438 ARGMAX in the result to include in diagnostics. */
1439 if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1441 res.knownrange = false;
1442 res.argmin = argmin;
1443 res.argmax = argmax;
1446 /* Recursively compute the minimum and maximum from the known range. */
1447 if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1449 /* For unsigned conversions/directives or signed when
1450 the minimum is positive, use the minimum and maximum to compute
1451 the shortest and longest output, respectively. */
1452 res.range.min = format_integer (dir, argmin, vr_values).range.min;
1453 res.range.max = format_integer (dir, argmax, vr_values).range.max;
1455 else if (tree_int_cst_sgn (argmax) < 0)
1457 /* For signed conversions/directives if maximum is negative,
1458 use the minimum as the longest output and maximum as the
1459 shortest output. */
1460 res.range.min = format_integer (dir, argmax, vr_values).range.min;
1461 res.range.max = format_integer (dir, argmin, vr_values).range.max;
1463 else
1465 /* Otherwise, 0 is inside of the range and minimum negative. Use 0
1466 as the shortest output and for the longest output compute the
1467 length of the output of both minimum and maximum and pick the
1468 longer. */
1469 unsigned HOST_WIDE_INT max1
1470 = format_integer (dir, argmin, vr_values).range.max;
1471 unsigned HOST_WIDE_INT max2
1472 = format_integer (dir, argmax, vr_values).range.max;
1473 res.range.min
1474 = format_integer (dir, integer_zero_node, vr_values).range.min;
1475 res.range.max = MAX (max1, max2);
1478 /* If the range is known, use the maximum as the likely length. */
1479 if (res.knownrange)
1480 res.range.likely = res.range.max;
1481 else
1483 /* Otherwise, use the minimum. Except for the case where for %#x or
1484 %#o the minimum is just for a single value in the range (0) and
1485 for all other values it is something longer, like 0x1 or 01.
1486 Use the length for value 1 in that case instead as the likely
1487 length. */
1488 res.range.likely = res.range.min;
1489 if (maybebase
1490 && base != 10
1491 && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1493 if (res.range.min == 1)
1494 res.range.likely += base == 8 ? 1 : 2;
1495 else if (res.range.min == 2
1496 && base == 16
1497 && (dir.width[0] == 2 || dir.prec[0] == 2))
1498 ++res.range.likely;
1502 res.range.unlikely = res.range.max;
1503 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1504 (sign | maybebase) + (base == 16));
1505 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1506 (sign | maybebase) + (base == 16));
1508 return res;
1511 /* Return the number of bytes that a format directive consisting of FLAGS,
1512 PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1513 would result for argument X under ideal conditions (i.e., if PREC
1514 weren't excessive). MPFR 3.1 allocates large amounts of memory for
1515 values of PREC with large magnitude and can fail (see MPFR bug #21056).
1516 This function works around those problems. */
1518 static unsigned HOST_WIDE_INT
1519 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1520 char spec, char rndspec)
1522 char fmtstr[40];
1524 HOST_WIDE_INT len = strlen (flags);
1526 fmtstr[0] = '%';
1527 memcpy (fmtstr + 1, flags, len);
1528 memcpy (fmtstr + 1 + len, ".*R", 3);
1529 fmtstr[len + 4] = rndspec;
1530 fmtstr[len + 5] = spec;
1531 fmtstr[len + 6] = '\0';
1533 spec = TOUPPER (spec);
1534 if (spec == 'E' || spec == 'F')
1536 /* For %e, specify the precision explicitly since mpfr_sprintf
1537 does its own thing just to be different (see MPFR bug 21088). */
1538 if (prec < 0)
1539 prec = 6;
1541 else
1543 /* Avoid passing negative precisions with larger magnitude to MPFR
1544 to avoid exposing its bugs. (A negative precision is supposed
1545 to be ignored.) */
1546 if (prec < 0)
1547 prec = -1;
1550 HOST_WIDE_INT p = prec;
1552 if (spec == 'G' && !strchr (flags, '#'))
1554 /* For G/g without the pound flag, precision gives the maximum number
1555 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1556 a 128 bit IEEE extended precision, 4932. Using twice as much here
1557 should be more than sufficient for any real format. */
1558 if ((IEEE_MAX_10_EXP * 2) < prec)
1559 prec = IEEE_MAX_10_EXP * 2;
1560 p = prec;
1562 else
1564 /* Cap precision arbitrarily at 1KB and add the difference
1565 (if any) to the MPFR result. */
1566 if (prec > 1024)
1567 p = 1024;
1570 len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1572 /* Handle the unlikely (impossible?) error by returning more than
1573 the maximum dictated by the function's return type. */
1574 if (len < 0)
1575 return target_dir_max () + 1;
1577 /* Adjust the return value by the difference. */
1578 if (p < prec)
1579 len += prec - p;
1581 return len;
1584 /* Return the number of bytes to format using the format specifier
1585 SPEC and the precision PREC the largest value in the real floating
1586 TYPE. */
1588 static unsigned HOST_WIDE_INT
1589 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1591 machine_mode mode = TYPE_MODE (type);
1593 /* IBM Extended mode. */
1594 if (MODE_COMPOSITE_P (mode))
1595 mode = DFmode;
1597 /* Get the real type format description for the target. */
1598 const real_format *rfmt = REAL_MODE_FORMAT (mode);
1599 REAL_VALUE_TYPE rv;
1601 real_maxval (&rv, 0, mode);
1603 /* Convert the GCC real value representation with the precision
1604 of the real type to the mpfr_t format with the GCC default
1605 round-to-nearest mode. */
1606 mpfr_t x;
1607 mpfr_init2 (x, rfmt->p);
1608 mpfr_from_real (x, &rv, MPFR_RNDN);
1610 /* Return a value one greater to account for the leading minus sign. */
1611 unsigned HOST_WIDE_INT r
1612 = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1613 mpfr_clear (x);
1614 return r;
1617 /* Return a range representing the minimum and maximum number of bytes
1618 that the directive DIR will output for any argument. PREC gives
1619 the adjusted precision range to account for negative precisions
1620 meaning the default 6. This function is used when the directive
1621 argument or its value isn't known. */
1623 static fmtresult
1624 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1626 tree type;
1628 switch (dir.modifier)
1630 case FMT_LEN_l:
1631 case FMT_LEN_none:
1632 type = double_type_node;
1633 break;
1635 case FMT_LEN_L:
1636 type = long_double_type_node;
1637 break;
1639 case FMT_LEN_ll:
1640 type = long_double_type_node;
1641 break;
1643 default:
1644 return fmtresult ();
1647 /* The minimum and maximum number of bytes produced by the directive. */
1648 fmtresult res;
1650 /* The minimum output as determined by flags. It's always at least 1.
1651 When plus or space are set the output is preceded by either a sign
1652 or a space. */
1653 unsigned flagmin = (1 /* for the first digit */
1654 + (dir.get_flag ('+') | dir.get_flag (' ')));
1656 /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1657 for the plus sign/space with the '+' and ' ' flags, respectively,
1658 unless reduced below. */
1659 res.range.min = 2 + flagmin;
1661 /* When the pound flag is set the decimal point is included in output
1662 regardless of precision. Whether or not a decimal point is included
1663 otherwise depends on the specification and precision. */
1664 bool radix = dir.get_flag ('#');
1666 switch (dir.specifier)
1668 case 'A':
1669 case 'a':
1671 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1672 if (dir.prec[0] <= 0)
1673 minprec = 0;
1674 else if (dir.prec[0] > 0)
1675 minprec = dir.prec[0] + !radix /* decimal point */;
1677 res.range.likely = (2 /* 0x */
1678 + flagmin
1679 + radix
1680 + minprec
1681 + 3 /* p+0 */);
1683 res.range.max = format_floating_max (type, 'a', prec[1]);
1685 /* The unlikely maximum accounts for the longest multibyte
1686 decimal point character. */
1687 res.range.unlikely = res.range.max;
1688 if (dir.prec[1] > 0)
1689 res.range.unlikely += target_mb_len_max () - 1;
1691 break;
1694 case 'E':
1695 case 'e':
1697 /* Minimum output attributable to precision and, when it's
1698 non-zero, decimal point. */
1699 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1701 /* The likely minimum output is "[-+]1.234567e+00" regardless
1702 of the value of the actual argument. */
1703 res.range.likely = (flagmin
1704 + radix
1705 + minprec
1706 + 2 /* e+ */ + 2);
1708 res.range.max = format_floating_max (type, 'e', prec[1]);
1710 /* The unlikely maximum accounts for the longest multibyte
1711 decimal point character. */
1712 if (dir.prec[0] != dir.prec[1]
1713 || dir.prec[0] == -1 || dir.prec[0] > 0)
1714 res.range.unlikely = res.range.max + target_mb_len_max () -1;
1715 else
1716 res.range.unlikely = res.range.max;
1717 break;
1720 case 'F':
1721 case 'f':
1723 /* Minimum output attributable to precision and, when it's non-zero,
1724 decimal point. */
1725 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1727 /* For finite numbers (i.e., not infinity or NaN) the lower bound
1728 when precision isn't specified is 8 bytes ("1.23456" since
1729 precision is taken to be 6). When precision is zero, the lower
1730 bound is 1 byte (e.g., "1"). Otherwise, when precision is greater
1731 than zero, then the lower bound is 2 plus precision (plus flags).
1732 But in all cases, the lower bound is no greater than 3. */
1733 unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1734 if (min < res.range.min)
1735 res.range.min = min;
1737 /* Compute the upper bound for -TYPE_MAX. */
1738 res.range.max = format_floating_max (type, 'f', prec[1]);
1740 /* The minimum output with unknown precision is a single byte
1741 (e.g., "0") but the more likely output is 3 bytes ("0.0"). */
1742 if (dir.prec[0] < 0 && dir.prec[1] > 0)
1743 res.range.likely = 3;
1744 else
1745 res.range.likely = min;
1747 /* The unlikely maximum accounts for the longest multibyte
1748 decimal point character. */
1749 if (dir.prec[0] != dir.prec[1]
1750 || dir.prec[0] == -1 || dir.prec[0] > 0)
1751 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1752 break;
1755 case 'G':
1756 case 'g':
1758 /* The %g output depends on precision and the exponent of
1759 the argument. Since the value of the argument isn't known
1760 the lower bound on the range of bytes (not counting flags
1761 or width) is 1 plus radix (i.e., either "0" or "0." for
1762 "%g" and "%#g", respectively, with a zero argument). */
1763 unsigned HOST_WIDE_INT min = flagmin + radix;
1764 if (min < res.range.min)
1765 res.range.min = min;
1767 char spec = 'g';
1768 HOST_WIDE_INT maxprec = dir.prec[1];
1769 if (radix && maxprec)
1771 /* When the pound flag (radix) is set, trailing zeros aren't
1772 trimmed and so the longest output is the same as for %e,
1773 except with precision minus 1 (as specified in C11). */
1774 spec = 'e';
1775 if (maxprec > 0)
1776 --maxprec;
1777 else if (maxprec < 0)
1778 maxprec = 5;
1780 else
1781 maxprec = prec[1];
1783 res.range.max = format_floating_max (type, spec, maxprec);
1785 /* The likely output is either the maximum computed above
1786 minus 1 (assuming the maximum is positive) when precision
1787 is known (or unspecified), or the same minimum as for %e
1788 (which is computed for a non-negative argument). Unlike
1789 for the other specifiers above the likely output isn't
1790 the minimum because for %g that's 1 which is unlikely. */
1791 if (dir.prec[1] < 0
1792 || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1793 res.range.likely = res.range.max - 1;
1794 else
1796 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1797 res.range.likely = (flagmin
1798 + radix
1799 + minprec
1800 + 2 /* e+ */ + 2);
1803 /* The unlikely maximum accounts for the longest multibyte
1804 decimal point character. */
1805 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1806 break;
1809 default:
1810 return fmtresult ();
1813 /* Bump up the byte counters if WIDTH is greater. */
1814 res.adjust_for_width_or_precision (dir.width);
1815 return res;
1818 /* Return a range representing the minimum and maximum number of bytes
1819 that the directive DIR will write on output for the floating argument
1820 ARG. */
1822 static fmtresult
1823 format_floating (const directive &dir, tree arg, const vr_values *)
1825 HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1826 tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1827 ? long_double_type_node : double_type_node);
1829 /* For an indeterminate precision the lower bound must be assumed
1830 to be zero. */
1831 if (TOUPPER (dir.specifier) == 'A')
1833 /* Get the number of fractional decimal digits needed to represent
1834 the argument without a loss of accuracy. */
1835 unsigned fmtprec
1836 = REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1838 /* The precision of the IEEE 754 double format is 53.
1839 The precision of all other GCC binary double formats
1840 is 56 or less. */
1841 unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1843 /* For %a, leave the minimum precision unspecified to let
1844 MFPR trim trailing zeros (as it and many other systems
1845 including Glibc happen to do) and set the maximum
1846 precision to reflect what it would be with trailing zeros
1847 present (as Solaris and derived systems do). */
1848 if (dir.prec[1] < 0)
1850 /* Both bounds are negative implies that precision has
1851 not been specified. */
1852 prec[0] = maxprec;
1853 prec[1] = -1;
1855 else if (dir.prec[0] < 0)
1857 /* With a negative lower bound and a non-negative upper
1858 bound set the minimum precision to zero and the maximum
1859 to the greater of the maximum precision (i.e., with
1860 trailing zeros present) and the specified upper bound. */
1861 prec[0] = 0;
1862 prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1865 else if (dir.prec[0] < 0)
1867 if (dir.prec[1] < 0)
1869 /* A precision in a strictly negative range is ignored and
1870 the default of 6 is used instead. */
1871 prec[0] = prec[1] = 6;
1873 else
1875 /* For a precision in a partly negative range, the lower bound
1876 must be assumed to be zero and the new upper bound is the
1877 greater of 6 (the default precision used when the specified
1878 precision is negative) and the upper bound of the specified
1879 range. */
1880 prec[0] = 0;
1881 prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1885 if (!arg
1886 || TREE_CODE (arg) != REAL_CST
1887 || !useless_type_conversion_p (type, TREE_TYPE (arg)))
1888 return format_floating (dir, prec);
1890 /* The minimum and maximum number of bytes produced by the directive. */
1891 fmtresult res;
1893 /* Get the real type format description for the target. */
1894 const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
1895 const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
1897 if (!real_isfinite (rvp))
1899 /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
1900 and "[-]nan" with the choice being implementation-defined
1901 but not locale dependent. */
1902 bool sign = dir.get_flag ('+') || real_isneg (rvp);
1903 res.range.min = 3 + sign;
1905 res.range.likely = res.range.min;
1906 res.range.max = res.range.min;
1907 /* The unlikely maximum is "[-/+]infinity" or "[-/+][qs]nan".
1908 For NaN, the C/POSIX standards specify two formats:
1909 "[-/+]nan"
1911 "[-/+]nan(n-char-sequence)"
1912 No known printf implementation outputs the latter format but AIX
1913 outputs QNaN and SNaN for quiet and signalling NaN, respectively,
1914 so the unlikely maximum reflects that. */
1915 res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 4);
1917 /* The range for infinity and NaN is known unless either width
1918 or precision is unknown. Width has the same effect regardless
1919 of whether the argument is finite. Precision is either ignored
1920 (e.g., Glibc) or can have an effect on the short vs long format
1921 such as inf/infinity (e.g., Solaris). */
1922 res.knownrange = dir.known_width_and_precision ();
1924 /* Adjust the range for width but ignore precision. */
1925 res.adjust_for_width_or_precision (dir.width);
1927 return res;
1930 char fmtstr [40];
1931 char *pfmt = fmtstr;
1933 /* Append flags. */
1934 for (const char *pf = "-+ #0"; *pf; ++pf)
1935 if (dir.get_flag (*pf))
1936 *pfmt++ = *pf;
1938 *pfmt = '\0';
1941 /* Set up an array to easily iterate over. */
1942 unsigned HOST_WIDE_INT* const minmax[] = {
1943 &res.range.min, &res.range.max
1946 for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
1948 /* Convert the GCC real value representation with the precision
1949 of the real type to the mpfr_t format rounding down in the
1950 first iteration that computes the minimum and up in the second
1951 that computes the maximum. This order is arbitrary because
1952 rounding in either direction can result in longer output. */
1953 mpfr_t mpfrval;
1954 mpfr_init2 (mpfrval, rfmt->p);
1955 mpfr_from_real (mpfrval, rvp, i ? MPFR_RNDU : MPFR_RNDD);
1957 /* Use the MPFR rounding specifier to round down in the first
1958 iteration and then up. In most but not all cases this will
1959 result in the same number of bytes. */
1960 char rndspec = "DU"[i];
1962 /* Format it and store the result in the corresponding member
1963 of the result struct. */
1964 *minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
1965 dir.specifier, rndspec);
1966 mpfr_clear (mpfrval);
1970 /* Make sure the minimum is less than the maximum (MPFR rounding
1971 in the call to mpfr_snprintf can result in the reverse. */
1972 if (res.range.max < res.range.min)
1974 unsigned HOST_WIDE_INT tmp = res.range.min;
1975 res.range.min = res.range.max;
1976 res.range.max = tmp;
1979 /* The range is known unless either width or precision is unknown. */
1980 res.knownrange = dir.known_width_and_precision ();
1982 /* For the same floating point constant, unless width or precision
1983 is unknown, use the longer output as the likely maximum since
1984 with round to nearest either is equally likely. Otherwise, when
1985 precision is unknown, use the greater of the minimum and 3 as
1986 the likely output (for "0.0" since zero precision is unlikely). */
1987 if (res.knownrange)
1988 res.range.likely = res.range.max;
1989 else if (res.range.min < 3
1990 && dir.prec[0] < 0
1991 && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
1992 res.range.likely = 3;
1993 else
1994 res.range.likely = res.range.min;
1996 res.range.unlikely = res.range.max;
1998 if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
2000 /* Unless the precision is zero output longer than 2 bytes may
2001 include the decimal point which must be a single character
2002 up to MB_LEN_MAX in length. This is overly conservative
2003 since in some conversions some constants result in no decimal
2004 point (e.g., in %g). */
2005 res.range.unlikely += target_mb_len_max () - 1;
2008 res.adjust_for_width_or_precision (dir.width);
2009 return res;
2012 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
2013 strings referenced by the expression STR, or (-1, -1) when not known.
2014 Used by the format_string function below. */
2016 static fmtresult
2017 get_string_length (tree str, unsigned eltsize, const vr_values *vr)
2019 if (!str)
2020 return fmtresult ();
2022 /* Try to determine the dynamic string length first.
2023 Set MAXBOUND to an arbitrary non-null non-integer node as a request
2024 to have it set to the length of the longest string in a PHI. */
2025 c_strlen_data lendata = { };
2026 lendata.maxbound = str;
2027 if (eltsize == 1)
2028 get_range_strlen_dynamic (str, &lendata, vr);
2029 else
2031 /* Determine the length of the shortest and longest string referenced
2032 by STR. Strings of unknown lengths are bounded by the sizes of
2033 arrays that subexpressions of STR may refer to. Pointers that
2034 aren't known to point any such arrays result in LENDATA.MAXLEN
2035 set to SIZE_MAX. */
2036 get_range_strlen (str, &lendata, eltsize);
2039 /* If LENDATA.MAXBOUND is not equal to .MINLEN it corresponds to the bound
2040 of the largest array STR refers to, if known, or it's set to SIZE_MAX
2041 otherwise. */
2043 /* Return the default result when nothing is known about the string. */
2044 if ((lendata.maxbound && !tree_fits_uhwi_p (lendata.maxbound))
2045 || !tree_fits_uhwi_p (lendata.maxlen))
2047 fmtresult res;
2048 res.nonstr = lendata.decl;
2049 return res;
2052 unsigned HOST_WIDE_INT lenmax = tree_to_uhwi (max_object_size ()) - 2;
2053 if (integer_zerop (lendata.minlen)
2054 && (!lendata.maxbound || lenmax <= tree_to_uhwi (lendata.maxbound))
2055 && lenmax <= tree_to_uhwi (lendata.maxlen))
2057 fmtresult res;
2058 res.nonstr = lendata.decl;
2059 return res;
2062 HOST_WIDE_INT min
2063 = (tree_fits_uhwi_p (lendata.minlen)
2064 ? tree_to_uhwi (lendata.minlen)
2065 : 0);
2067 HOST_WIDE_INT max
2068 = (lendata.maxbound && tree_fits_uhwi_p (lendata.maxbound)
2069 ? tree_to_uhwi (lendata.maxbound)
2070 : HOST_WIDE_INT_M1U);
2072 const bool unbounded = integer_all_onesp (lendata.maxlen);
2074 /* Set the max/likely counters to unbounded when a minimum is known
2075 but the maximum length isn't bounded. This implies that STR is
2076 a conditional expression involving a string of known length and
2077 an expression of unknown/unbounded length. */
2078 if (min
2079 && (unsigned HOST_WIDE_INT)min < HOST_WIDE_INT_M1U
2080 && unbounded)
2081 max = HOST_WIDE_INT_M1U;
2083 /* get_range_strlen() returns the target value of SIZE_MAX for
2084 strings of unknown length. Bump it up to HOST_WIDE_INT_M1U
2085 which may be bigger. */
2086 if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2087 min = HOST_WIDE_INT_M1U;
2088 if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2089 max = HOST_WIDE_INT_M1U;
2091 fmtresult res (min, max);
2092 res.nonstr = lendata.decl;
2094 /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2095 by STR are known to be bounded (though not necessarily by their
2096 actual length but perhaps by their maximum possible length). */
2097 if (res.range.max < target_int_max ())
2099 res.knownrange = true;
2100 /* When the length of the longest string is known and not
2101 excessive use it as the likely length of the string(s). */
2102 res.range.likely = res.range.max;
2104 else
2106 /* When the upper bound is unknown (it can be zero or excessive)
2107 set the likely length to the greater of 1. If MAXBOUND is
2108 known, also reset the length of the lower bound to zero. */
2109 res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2110 if (lendata.maxbound && !integer_all_onesp (lendata.maxbound))
2111 res.range.min = 0;
2114 res.range.unlikely = unbounded ? HOST_WIDE_INT_MAX : res.range.max;
2116 return res;
2119 /* Return the minimum and maximum number of characters formatted
2120 by the '%c' format directives and its wide character form for
2121 the argument ARG. ARG can be null (for functions such as
2122 vsprinf). */
2124 static fmtresult
2125 format_character (const directive &dir, tree arg, const vr_values *vr_values)
2127 fmtresult res;
2129 res.knownrange = true;
2131 if (dir.specifier == 'C'
2132 || dir.modifier == FMT_LEN_l)
2134 /* A wide character can result in as few as zero bytes. */
2135 res.range.min = 0;
2137 HOST_WIDE_INT min, max;
2138 if (get_int_range (arg, &min, &max, false, 0, vr_values))
2140 if (min == 0 && max == 0)
2142 /* The NUL wide character results in no bytes. */
2143 res.range.max = 0;
2144 res.range.likely = 0;
2145 res.range.unlikely = 0;
2147 else if (min >= 0 && min < 128)
2149 /* Be conservative if the target execution character set
2150 is not a 1-to-1 mapping to the source character set or
2151 if the source set is not ASCII. */
2152 bool one_2_one_ascii
2153 = (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97);
2155 /* A wide character in the ASCII range most likely results
2156 in a single byte, and only unlikely in up to MB_LEN_MAX. */
2157 res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
2158 res.range.likely = 1;
2159 res.range.unlikely = target_mb_len_max ();
2160 res.mayfail = !one_2_one_ascii;
2162 else
2164 /* A wide character outside the ASCII range likely results
2165 in up to two bytes, and only unlikely in up to MB_LEN_MAX. */
2166 res.range.max = target_mb_len_max ();
2167 res.range.likely = 2;
2168 res.range.unlikely = res.range.max;
2169 /* Converting such a character may fail. */
2170 res.mayfail = true;
2173 else
2175 /* An unknown wide character is treated the same as a wide
2176 character outside the ASCII range. */
2177 res.range.max = target_mb_len_max ();
2178 res.range.likely = 2;
2179 res.range.unlikely = res.range.max;
2180 res.mayfail = true;
2183 else
2185 /* A plain '%c' directive. Its output is exactly 1. */
2186 res.range.min = res.range.max = 1;
2187 res.range.likely = res.range.unlikely = 1;
2188 res.knownrange = true;
2191 /* Bump up the byte counters if WIDTH is greater. */
2192 return res.adjust_for_width_or_precision (dir.width);
2195 /* Determine the offset *INDEX of the first byte of an array element of
2196 TYPE (possibly recursively) into which the byte offset OFF points.
2197 On success set *INDEX to the offset of the first byte and return type.
2198 Otherwise, if no such element can be found, return null. */
2200 static tree
2201 array_elt_at_offset (tree type, HOST_WIDE_INT off, HOST_WIDE_INT *index)
2203 gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
2205 tree eltype = type;
2206 while (TREE_CODE (TREE_TYPE (eltype)) == ARRAY_TYPE)
2207 eltype = TREE_TYPE (eltype);
2209 if (TYPE_MODE (TREE_TYPE (eltype)) != TYPE_MODE (char_type_node))
2210 eltype = TREE_TYPE (eltype);
2212 if (eltype == type)
2214 *index = 0;
2215 return type;
2218 HOST_WIDE_INT typsz = int_size_in_bytes (type);
2219 HOST_WIDE_INT eltsz = int_size_in_bytes (eltype);
2220 if (off < typsz * eltsz)
2222 *index = (off / eltsz) * eltsz;
2223 return TREE_CODE (eltype) == ARRAY_TYPE ? TREE_TYPE (eltype) : eltype;
2226 return NULL_TREE;
2229 /* Determine the offset *INDEX of the first byte of a struct member of TYPE
2230 (possibly recursively) into which the byte offset OFF points. On success
2231 set *INDEX to the offset of the first byte and return true. Otherwise,
2232 if no such member can be found, return false. */
2234 static bool
2235 field_at_offset (tree type, HOST_WIDE_INT off, HOST_WIDE_INT *index)
2237 gcc_assert (RECORD_OR_UNION_TYPE_P (type));
2239 for (tree fld = TYPE_FIELDS (type); fld; fld = TREE_CHAIN (fld))
2241 if (TREE_CODE (fld) != FIELD_DECL || DECL_ARTIFICIAL (fld))
2242 continue;
2244 tree fldtype = TREE_TYPE (fld);
2245 HOST_WIDE_INT fldoff = int_byte_position (fld);
2247 /* If the size is not available the field is a flexible array
2248 member. Treat this case as success. */
2249 tree typesize = TYPE_SIZE_UNIT (fldtype);
2250 HOST_WIDE_INT fldsize = (tree_fits_uhwi_p (typesize)
2251 ? tree_to_uhwi (typesize)
2252 : off);
2254 if (fldoff + fldsize < off)
2255 continue;
2257 if (TREE_CODE (fldtype) == ARRAY_TYPE)
2259 HOST_WIDE_INT idx = 0;
2260 if (tree ft = array_elt_at_offset (fldtype, off, &idx))
2261 fldtype = ft;
2262 else
2263 break;
2265 *index += idx;
2266 fldoff -= idx;
2267 off -= idx;
2270 if (RECORD_OR_UNION_TYPE_P (fldtype))
2272 *index += fldoff;
2273 return field_at_offset (fldtype, off - fldoff, index);
2276 *index += fldoff;
2277 return true;
2280 return false;
2283 /* For an expression X of pointer type, recursively try to find the same
2284 origin (object or pointer) as Y it references and return such an X.
2285 When X refers to a struct member, set *FLDOFF to the offset of the
2286 member from the beginning of the "most derived" object. */
2288 static tree
2289 get_origin_and_offset (tree x, HOST_WIDE_INT *fldoff, HOST_WIDE_INT *off)
2291 if (!x)
2292 return NULL_TREE;
2294 switch (TREE_CODE (x))
2296 case ADDR_EXPR:
2297 x = TREE_OPERAND (x, 0);
2298 return get_origin_and_offset (x, fldoff, off);
2300 case ARRAY_REF:
2302 tree offset = TREE_OPERAND (x, 1);
2303 HOST_WIDE_INT idx = (tree_fits_uhwi_p (offset)
2304 ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2306 tree eltype = TREE_TYPE (x);
2307 if (TREE_CODE (eltype) == INTEGER_TYPE)
2309 if (off)
2310 *off = idx;
2312 else if (idx < HOST_WIDE_INT_MAX)
2313 *fldoff += idx * int_size_in_bytes (eltype);
2314 else
2315 *fldoff = idx;
2317 x = TREE_OPERAND (x, 0);
2318 return get_origin_and_offset (x, fldoff, NULL);
2321 case MEM_REF:
2322 if (off)
2324 tree offset = TREE_OPERAND (x, 1);
2325 *off = (tree_fits_uhwi_p (offset)
2326 ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2329 x = TREE_OPERAND (x, 0);
2331 if (off)
2333 tree xtype
2334 = (TREE_CODE (x) == ADDR_EXPR
2335 ? TREE_TYPE (TREE_OPERAND (x, 0)) : TREE_TYPE (TREE_TYPE (x)));
2337 /* The byte offset of the most basic struct member the byte
2338 offset *OFF corresponds to, or for a (multidimensional)
2339 array member, the byte offset of the array element. */
2340 HOST_WIDE_INT index = 0;
2342 if ((RECORD_OR_UNION_TYPE_P (xtype)
2343 && field_at_offset (xtype, *off, &index))
2344 || (TREE_CODE (xtype) == ARRAY_TYPE
2345 && TREE_CODE (TREE_TYPE (xtype)) == ARRAY_TYPE
2346 && array_elt_at_offset (xtype, *off, &index)))
2348 *fldoff += index;
2349 *off -= index;
2353 return get_origin_and_offset (x, fldoff, NULL);
2355 case COMPONENT_REF:
2357 tree fld = TREE_OPERAND (x, 1);
2358 *fldoff += int_byte_position (fld);
2360 get_origin_and_offset (fld, fldoff, off);
2361 x = TREE_OPERAND (x, 0);
2362 return get_origin_and_offset (x, fldoff, off);
2365 case SSA_NAME:
2367 gimple *def = SSA_NAME_DEF_STMT (x);
2368 if (is_gimple_assign (def))
2370 tree_code code = gimple_assign_rhs_code (def);
2371 if (code == ADDR_EXPR)
2373 x = gimple_assign_rhs1 (def);
2374 return get_origin_and_offset (x, fldoff, off);
2377 if (code == POINTER_PLUS_EXPR)
2379 tree offset = gimple_assign_rhs2 (def);
2380 if (off)
2381 *off = (tree_fits_uhwi_p (offset)
2382 ? tree_to_uhwi (offset) : HOST_WIDE_INT_MAX);
2384 x = gimple_assign_rhs1 (def);
2385 return get_origin_and_offset (x, fldoff, NULL);
2387 else if (code == VAR_DECL)
2389 x = gimple_assign_rhs1 (def);
2390 return get_origin_and_offset (x, fldoff, off);
2393 else if (gimple_nop_p (def) && SSA_NAME_VAR (x))
2394 x = SSA_NAME_VAR (x);
2397 default:
2398 break;
2401 return x;
2404 /* If ARG refers to the same (sub)object or array element as described
2405 by DST and DST_FLD, return the byte offset into the struct member or
2406 array element referenced by ARG. Otherwise return HOST_WIDE_INT_MIN
2407 to indicate that ARG and DST do not refer to the same object. */
2409 static HOST_WIDE_INT
2410 alias_offset (tree arg, tree dst, HOST_WIDE_INT dst_fld)
2412 /* See if the argument refers to the same base object as the destination
2413 of the formatted function call, and if so, try to determine if they
2414 can alias. */
2415 if (!arg || !dst || !ptr_derefs_may_alias_p (arg, dst))
2416 return HOST_WIDE_INT_MIN;
2418 /* The two arguments may refer to the same object. If they both refer
2419 to a struct member, see if the members are one and the same. */
2420 HOST_WIDE_INT arg_off = 0, arg_fld = 0;
2422 tree arg_orig = get_origin_and_offset (arg, &arg_fld, &arg_off);
2424 if (arg_orig == dst && arg_fld == dst_fld)
2425 return arg_off;
2427 return HOST_WIDE_INT_MIN;
2430 /* Return the minimum and maximum number of characters formatted
2431 by the '%s' format directive and its wide character form for
2432 the argument ARG. ARG can be null (for functions such as
2433 vsprinf). */
2435 static fmtresult
2436 format_string (const directive &dir, tree arg, const vr_values *vr_values)
2438 fmtresult res;
2440 if (warn_restrict)
2442 /* See if ARG might alias the destination of the call with
2443 DST_ORIGIN and DST_FIELD. If so, store the starting offset
2444 so that the overlap can be determined for certain later,
2445 when the amount of output of the call (including subsequent
2446 directives) has been computed. Otherwise, store HWI_MIN. */
2447 res.dst_offset = alias_offset (arg, dir.info->dst_origin,
2448 dir.info->dst_field);
2451 /* Compute the range the argument's length can be in. */
2452 int count_by = 1;
2453 if (dir.specifier == 'S' || dir.modifier == FMT_LEN_l)
2455 /* Get a node for a C type that will be the same size
2456 as a wchar_t on the target. */
2457 tree node = get_typenode_from_name (MODIFIED_WCHAR_TYPE);
2459 /* Now that we have a suitable node, get the number of
2460 bytes it occupies. */
2461 count_by = int_size_in_bytes (node);
2462 gcc_checking_assert (count_by == 2 || count_by == 4);
2465 fmtresult slen = get_string_length (arg, count_by, vr_values);
2466 if (slen.range.min == slen.range.max
2467 && slen.range.min < HOST_WIDE_INT_MAX)
2469 /* The argument is either a string constant or it refers
2470 to one of a number of strings of the same length. */
2472 /* A '%s' directive with a string argument with constant length. */
2473 res.range = slen.range;
2475 if (dir.specifier == 'S'
2476 || dir.modifier == FMT_LEN_l)
2478 /* In the worst case the length of output of a wide string S
2479 is bounded by MB_LEN_MAX * wcslen (S). */
2480 res.range.max *= target_mb_len_max ();
2481 res.range.unlikely = res.range.max;
2482 /* It's likely that the total length is not more that
2483 2 * wcslen (S).*/
2484 res.range.likely = res.range.min * 2;
2486 if (dir.prec[1] >= 0
2487 && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2489 res.range.max = dir.prec[1];
2490 res.range.likely = dir.prec[1];
2491 res.range.unlikely = dir.prec[1];
2494 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2495 res.range.min = 0;
2496 else if (dir.prec[0] >= 0)
2497 res.range.likely = dir.prec[0];
2499 /* Even a non-empty wide character string need not convert into
2500 any bytes. */
2501 res.range.min = 0;
2503 /* A non-empty wide character conversion may fail. */
2504 if (slen.range.max > 0)
2505 res.mayfail = true;
2507 else
2509 res.knownrange = true;
2511 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2512 res.range.min = 0;
2513 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2514 res.range.min = dir.prec[0];
2516 if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2518 res.range.max = dir.prec[1];
2519 res.range.likely = dir.prec[1];
2520 res.range.unlikely = dir.prec[1];
2524 else if (arg && integer_zerop (arg))
2526 /* Handle null pointer argument. */
2528 fmtresult res (0);
2529 res.nullp = true;
2530 return res;
2532 else
2534 /* For a '%s' and '%ls' directive with a non-constant string (either
2535 one of a number of strings of known length or an unknown string)
2536 the minimum number of characters is lesser of PRECISION[0] and
2537 the length of the shortest known string or zero, and the maximum
2538 is the lesser of the length of the longest known string or
2539 PTRDIFF_MAX and PRECISION[1]. The likely length is either
2540 the minimum at level 1 and the greater of the minimum and 1
2541 at level 2. This result is adjust upward for width (if it's
2542 specified). */
2544 if (dir.specifier == 'S'
2545 || dir.modifier == FMT_LEN_l)
2547 /* A wide character converts to as few as zero bytes. */
2548 slen.range.min = 0;
2549 if (slen.range.max < target_int_max ())
2550 slen.range.max *= target_mb_len_max ();
2552 if (slen.range.likely < target_int_max ())
2553 slen.range.likely *= 2;
2555 if (slen.range.likely < target_int_max ())
2556 slen.range.unlikely *= target_mb_len_max ();
2558 /* A non-empty wide character conversion may fail. */
2559 if (slen.range.max > 0)
2560 res.mayfail = true;
2563 res.range = slen.range;
2565 if (dir.prec[0] >= 0)
2567 /* Adjust the minimum to zero if the string length is unknown,
2568 or at most the lower bound of the precision otherwise. */
2569 if (slen.range.min >= target_int_max ())
2570 res.range.min = 0;
2571 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2572 res.range.min = dir.prec[0];
2574 /* Make both maxima no greater than the upper bound of precision. */
2575 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2576 || slen.range.max >= target_int_max ())
2578 res.range.max = dir.prec[1];
2579 res.range.unlikely = dir.prec[1];
2582 /* If precision is constant, set the likely counter to the lesser
2583 of it and the maximum string length. Otherwise, if the lower
2584 bound of precision is greater than zero, set the likely counter
2585 to the minimum. Otherwise set it to zero or one based on
2586 the warning level. */
2587 if (dir.prec[0] == dir.prec[1])
2588 res.range.likely
2589 = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2590 ? dir.prec[0] : slen.range.max);
2591 else if (dir.prec[0] > 0)
2592 res.range.likely = res.range.min;
2593 else
2594 res.range.likely = warn_level > 1;
2596 else if (dir.prec[1] >= 0)
2598 res.range.min = 0;
2599 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2600 res.range.max = dir.prec[1];
2601 res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2602 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.unlikely)
2603 res.range.unlikely = dir.prec[1];
2605 else if (slen.range.min >= target_int_max ())
2607 res.range.min = 0;
2608 res.range.max = HOST_WIDE_INT_MAX;
2609 /* At level 1 strings of unknown length are assumed to be
2610 empty, while at level 1 they are assumed to be one byte
2611 long. */
2612 res.range.likely = warn_level > 1;
2613 res.range.unlikely = HOST_WIDE_INT_MAX;
2615 else
2617 /* A string of unknown length unconstrained by precision is
2618 assumed to be empty at level 1 and just one character long
2619 at higher levels. */
2620 if (res.range.likely >= target_int_max ())
2621 res.range.likely = warn_level > 1;
2625 /* If the argument isn't a nul-terminated string and the number
2626 of bytes on output isn't bounded by precision, set NONSTR. */
2627 if (slen.nonstr && slen.range.min < (unsigned HOST_WIDE_INT)dir.prec[0])
2628 res.nonstr = slen.nonstr;
2630 /* Bump up the byte counters if WIDTH is greater. */
2631 return res.adjust_for_width_or_precision (dir.width);
2634 /* Format plain string (part of the format string itself). */
2636 static fmtresult
2637 format_plain (const directive &dir, tree, const vr_values *)
2639 fmtresult res (dir.len);
2640 return res;
2643 /* Return true if the RESULT of a directive in a call describe by INFO
2644 should be diagnosed given the AVAILable space in the destination. */
2646 static bool
2647 should_warn_p (const call_info &info,
2648 const result_range &avail, const result_range &result)
2650 if (result.max <= avail.min)
2652 /* The least amount of space remaining in the destination is big
2653 enough for the longest output. */
2654 return false;
2657 if (info.bounded)
2659 if (warn_format_trunc == 1 && result.min <= avail.max
2660 && info.retval_used ())
2662 /* The likely amount of space remaining in the destination is big
2663 enough for the least output and the return value is used. */
2664 return false;
2667 if (warn_format_trunc == 1 && result.likely <= avail.likely
2668 && !info.retval_used ())
2670 /* The likely amount of space remaining in the destination is big
2671 enough for the likely output and the return value is unused. */
2672 return false;
2675 if (warn_format_trunc == 2
2676 && result.likely <= avail.min
2677 && (result.max <= avail.min
2678 || result.max > HOST_WIDE_INT_MAX))
2680 /* The minimum amount of space remaining in the destination is big
2681 enough for the longest output. */
2682 return false;
2685 else
2687 if (warn_level == 1 && result.likely <= avail.likely)
2689 /* The likely amount of space remaining in the destination is big
2690 enough for the likely output. */
2691 return false;
2694 if (warn_level == 2
2695 && result.likely <= avail.min
2696 && (result.max <= avail.min
2697 || result.max > HOST_WIDE_INT_MAX))
2699 /* The minimum amount of space remaining in the destination is big
2700 enough for the longest output. */
2701 return false;
2705 return true;
2708 /* At format string location describe by DIRLOC in a call described
2709 by INFO, issue a warning for a directive DIR whose output may be
2710 in excess of the available space AVAIL_RANGE in the destination
2711 given the formatting result FMTRES. This function does nothing
2712 except decide whether to issue a warning for a possible write
2713 past the end or truncation and, if so, format the warning.
2714 Return true if a warning has been issued. */
2716 static bool
2717 maybe_warn (substring_loc &dirloc, location_t argloc,
2718 const call_info &info,
2719 const result_range &avail_range, const result_range &res,
2720 const directive &dir)
2722 if (!should_warn_p (info, avail_range, res))
2723 return false;
2725 /* A warning will definitely be issued below. */
2727 /* The maximum byte count to reference in the warning. Larger counts
2728 imply that the upper bound is unknown (and could be anywhere between
2729 RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2730 than "between N and X" where X is some huge number. */
2731 unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2733 /* True when there is enough room in the destination for the least
2734 amount of a directive's output but not enough for its likely or
2735 maximum output. */
2736 bool maybe = (res.min <= avail_range.max
2737 && (avail_range.min < res.likely
2738 || (res.max < HOST_WIDE_INT_MAX
2739 && avail_range.min < res.max)));
2741 /* Buffer for the directive in the host character set (used when
2742 the source character set is different). */
2743 char hostdir[32];
2745 if (avail_range.min == avail_range.max)
2747 /* The size of the destination region is exact. */
2748 unsigned HOST_WIDE_INT navail = avail_range.max;
2750 if (target_to_host (*dir.beg) != '%')
2752 /* For plain character directives (i.e., the format string itself)
2753 but not others, point the caret at the first character that's
2754 past the end of the destination. */
2755 if (navail < dir.len)
2756 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2759 if (*dir.beg == '\0')
2761 /* This is the terminating nul. */
2762 gcc_assert (res.min == 1 && res.min == res.max);
2764 return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2765 info.bounded
2766 ? (maybe
2767 ? G_("%qE output may be truncated before the "
2768 "last format character")
2769 : G_("%qE output truncated before the last "
2770 "format character"))
2771 : (maybe
2772 ? G_("%qE may write a terminating nul past the "
2773 "end of the destination")
2774 : G_("%qE writing a terminating nul past the "
2775 "end of the destination")),
2776 info.func);
2779 if (res.min == res.max)
2781 const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2782 if (!info.bounded)
2783 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2784 "%<%.*s%> directive writing %wu byte into a "
2785 "region of size %wu",
2786 "%<%.*s%> directive writing %wu bytes into a "
2787 "region of size %wu",
2788 (int) dir.len, d, res.min, navail);
2789 else if (maybe)
2790 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2791 "%<%.*s%> directive output may be truncated "
2792 "writing %wu byte into a region of size %wu",
2793 "%<%.*s%> directive output may be truncated "
2794 "writing %wu bytes into a region of size %wu",
2795 (int) dir.len, d, res.min, navail);
2796 else
2797 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2798 "%<%.*s%> directive output truncated writing "
2799 "%wu byte into a region of size %wu",
2800 "%<%.*s%> directive output truncated writing "
2801 "%wu bytes into a region of size %wu",
2802 (int) dir.len, d, res.min, navail);
2804 if (res.min == 0 && res.max < maxbytes)
2805 return fmtwarn (dirloc, argloc, NULL,
2806 info.warnopt (),
2807 info.bounded
2808 ? (maybe
2809 ? G_("%<%.*s%> directive output may be truncated "
2810 "writing up to %wu bytes into a region of "
2811 "size %wu")
2812 : G_("%<%.*s%> directive output truncated writing "
2813 "up to %wu bytes into a region of size %wu"))
2814 : G_("%<%.*s%> directive writing up to %wu bytes "
2815 "into a region of size %wu"), (int) dir.len,
2816 target_to_host (hostdir, sizeof hostdir, dir.beg),
2817 res.max, navail);
2819 if (res.min == 0 && maxbytes <= res.max)
2820 /* This is a special case to avoid issuing the potentially
2821 confusing warning:
2822 writing 0 or more bytes into a region of size 0. */
2823 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2824 info.bounded
2825 ? (maybe
2826 ? G_("%<%.*s%> directive output may be truncated "
2827 "writing likely %wu or more bytes into a "
2828 "region of size %wu")
2829 : G_("%<%.*s%> directive output truncated writing "
2830 "likely %wu or more bytes into a region of "
2831 "size %wu"))
2832 : G_("%<%.*s%> directive writing likely %wu or more "
2833 "bytes into a region of size %wu"), (int) dir.len,
2834 target_to_host (hostdir, sizeof hostdir, dir.beg),
2835 res.likely, navail);
2837 if (res.max < maxbytes)
2838 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2839 info.bounded
2840 ? (maybe
2841 ? G_("%<%.*s%> directive output may be truncated "
2842 "writing between %wu and %wu bytes into a "
2843 "region of size %wu")
2844 : G_("%<%.*s%> directive output truncated "
2845 "writing between %wu and %wu bytes into a "
2846 "region of size %wu"))
2847 : G_("%<%.*s%> directive writing between %wu and "
2848 "%wu bytes into a region of size %wu"),
2849 (int) dir.len,
2850 target_to_host (hostdir, sizeof hostdir, dir.beg),
2851 res.min, res.max, navail);
2853 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2854 info.bounded
2855 ? (maybe
2856 ? G_("%<%.*s%> directive output may be truncated "
2857 "writing %wu or more bytes into a region of "
2858 "size %wu")
2859 : G_("%<%.*s%> directive output truncated writing "
2860 "%wu or more bytes into a region of size %wu"))
2861 : G_("%<%.*s%> directive writing %wu or more bytes "
2862 "into a region of size %wu"), (int) dir.len,
2863 target_to_host (hostdir, sizeof hostdir, dir.beg),
2864 res.min, navail);
2867 /* The size of the destination region is a range. */
2869 if (target_to_host (*dir.beg) != '%')
2871 unsigned HOST_WIDE_INT navail = avail_range.max;
2873 /* For plain character directives (i.e., the format string itself)
2874 but not others, point the caret at the first character that's
2875 past the end of the destination. */
2876 if (navail < dir.len)
2877 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2880 if (*dir.beg == '\0')
2882 gcc_assert (res.min == 1 && res.min == res.max);
2884 return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2885 info.bounded
2886 ? (maybe
2887 ? G_("%qE output may be truncated before the last "
2888 "format character")
2889 : G_("%qE output truncated before the last format "
2890 "character"))
2891 : (maybe
2892 ? G_("%qE may write a terminating nul past the end "
2893 "of the destination")
2894 : G_("%qE writing a terminating nul past the end "
2895 "of the destination")), info.func);
2898 if (res.min == res.max)
2900 const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2901 if (!info.bounded)
2902 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2903 "%<%.*s%> directive writing %wu byte into a region "
2904 "of size between %wu and %wu",
2905 "%<%.*s%> directive writing %wu bytes into a region "
2906 "of size between %wu and %wu", (int) dir.len, d,
2907 res.min, avail_range.min, avail_range.max);
2908 else if (maybe)
2909 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2910 "%<%.*s%> directive output may be truncated writing "
2911 "%wu byte into a region of size between %wu and %wu",
2912 "%<%.*s%> directive output may be truncated writing "
2913 "%wu bytes into a region of size between %wu and "
2914 "%wu", (int) dir.len, d, res.min, avail_range.min,
2915 avail_range.max);
2916 else
2917 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2918 "%<%.*s%> directive output truncated writing %wu "
2919 "byte into a region of size between %wu and %wu",
2920 "%<%.*s%> directive output truncated writing %wu "
2921 "bytes into a region of size between %wu and %wu",
2922 (int) dir.len, d, res.min, avail_range.min,
2923 avail_range.max);
2926 if (res.min == 0 && res.max < maxbytes)
2927 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2928 info.bounded
2929 ? (maybe
2930 ? G_("%<%.*s%> directive output may be truncated "
2931 "writing up to %wu bytes into a region of size "
2932 "between %wu and %wu")
2933 : G_("%<%.*s%> directive output truncated writing "
2934 "up to %wu bytes into a region of size between "
2935 "%wu and %wu"))
2936 : G_("%<%.*s%> directive writing up to %wu bytes "
2937 "into a region of size between %wu and %wu"),
2938 (int) dir.len,
2939 target_to_host (hostdir, sizeof hostdir, dir.beg),
2940 res.max, avail_range.min, avail_range.max);
2942 if (res.min == 0 && maxbytes <= res.max)
2943 /* This is a special case to avoid issuing the potentially confusing
2944 warning:
2945 writing 0 or more bytes into a region of size between 0 and N. */
2946 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2947 info.bounded
2948 ? (maybe
2949 ? G_("%<%.*s%> directive output may be truncated "
2950 "writing likely %wu or more bytes into a region "
2951 "of size between %wu and %wu")
2952 : G_("%<%.*s%> directive output truncated writing "
2953 "likely %wu or more bytes into a region of size "
2954 "between %wu and %wu"))
2955 : G_("%<%.*s%> directive writing likely %wu or more bytes "
2956 "into a region of size between %wu and %wu"),
2957 (int) dir.len,
2958 target_to_host (hostdir, sizeof hostdir, dir.beg),
2959 res.likely, avail_range.min, avail_range.max);
2961 if (res.max < maxbytes)
2962 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2963 info.bounded
2964 ? (maybe
2965 ? G_("%<%.*s%> directive output may be truncated "
2966 "writing between %wu and %wu bytes into a region "
2967 "of size between %wu and %wu")
2968 : G_("%<%.*s%> directive output truncated writing "
2969 "between %wu and %wu bytes into a region of size "
2970 "between %wu and %wu"))
2971 : G_("%<%.*s%> directive writing between %wu and "
2972 "%wu bytes into a region of size between %wu and "
2973 "%wu"), (int) dir.len,
2974 target_to_host (hostdir, sizeof hostdir, dir.beg),
2975 res.min, res.max, avail_range.min, avail_range.max);
2977 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2978 info.bounded
2979 ? (maybe
2980 ? G_("%<%.*s%> directive output may be truncated writing "
2981 "%wu or more bytes into a region of size between "
2982 "%wu and %wu")
2983 : G_("%<%.*s%> directive output truncated writing "
2984 "%wu or more bytes into a region of size between "
2985 "%wu and %wu"))
2986 : G_("%<%.*s%> directive writing %wu or more bytes "
2987 "into a region of size between %wu and %wu"),
2988 (int) dir.len,
2989 target_to_host (hostdir, sizeof hostdir, dir.beg),
2990 res.min, avail_range.min, avail_range.max);
2993 /* Given the formatting result described by RES and NAVAIL, the number
2994 of available bytes in the destination, return the range of bytes
2995 remaining in the destination. */
2997 static inline result_range
2998 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
3000 result_range range;
3002 if (HOST_WIDE_INT_MAX <= navail)
3004 range.min = range.max = range.likely = range.unlikely = navail;
3005 return range;
3008 /* The lower bound of the available range is the available size
3009 minus the maximum output size, and the upper bound is the size
3010 minus the minimum. */
3011 range.max = res.range.min < navail ? navail - res.range.min : 0;
3013 range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
3015 if (res.range.max < HOST_WIDE_INT_MAX)
3016 range.min = res.range.max < navail ? navail - res.range.max : 0;
3017 else
3018 range.min = range.likely;
3020 range.unlikely = (res.range.unlikely < navail
3021 ? navail - res.range.unlikely : 0);
3023 return range;
3026 /* Compute the length of the output resulting from the directive DIR
3027 in a call described by INFO and update the overall result of the call
3028 in *RES. Return true if the directive has been handled. */
3030 static bool
3031 format_directive (const call_info &info,
3032 format_result *res, const directive &dir,
3033 const class vr_values *vr_values)
3035 /* Offset of the beginning of the directive from the beginning
3036 of the format string. */
3037 size_t offset = dir.beg - info.fmtstr;
3038 size_t start = offset;
3039 size_t length = offset + dir.len - !!dir.len;
3041 /* Create a location for the whole directive from the % to the format
3042 specifier. */
3043 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3044 offset, start, length);
3046 /* Also get the location of the argument if possible.
3047 This doesn't work for integer literals or function calls. */
3048 location_t argloc = UNKNOWN_LOCATION;
3049 if (dir.arg)
3050 argloc = EXPR_LOCATION (dir.arg);
3052 /* Bail when there is no function to compute the output length,
3053 or when minimum length checking has been disabled. */
3054 if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
3055 return false;
3057 /* Compute the range of lengths of the formatted output. */
3058 fmtresult fmtres = dir.fmtfunc (dir, dir.arg, vr_values);
3060 /* Record whether the output of all directives is known to be
3061 bounded by some maximum, implying that their arguments are
3062 either known exactly or determined to be in a known range
3063 or, for strings, limited by the upper bounds of the arrays
3064 they refer to. */
3065 res->knownrange &= fmtres.knownrange;
3067 if (!fmtres.knownrange)
3069 /* Only when the range is known, check it against the host value
3070 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
3071 INT_MAX precision, which is the longest possible output of any
3072 single directive). That's the largest valid byte count (though
3073 not valid call to a printf-like function because it can never
3074 return such a count). Otherwise, the range doesn't correspond
3075 to known values of the argument. */
3076 if (fmtres.range.max > target_dir_max ())
3078 /* Normalize the MAX counter to avoid having to deal with it
3079 later. The counter can be less than HOST_WIDE_INT_M1U
3080 when compiling for an ILP32 target on an LP64 host. */
3081 fmtres.range.max = HOST_WIDE_INT_M1U;
3082 /* Disable exact and maximum length checking after a failure
3083 to determine the maximum number of characters (for example
3084 for wide characters or wide character strings) but continue
3085 tracking the minimum number of characters. */
3086 res->range.max = HOST_WIDE_INT_M1U;
3089 if (fmtres.range.min > target_dir_max ())
3091 /* Disable exact length checking after a failure to determine
3092 even the minimum number of characters (it shouldn't happen
3093 except in an error) but keep tracking the minimum and maximum
3094 number of characters. */
3095 return true;
3099 /* Buffer for the directive in the host character set (used when
3100 the source character set is different). */
3101 char hostdir[32];
3103 int dirlen = dir.len;
3105 if (fmtres.nullp)
3107 fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3108 "%G%<%.*s%> directive argument is null",
3109 info.callstmt, dirlen,
3110 target_to_host (hostdir, sizeof hostdir, dir.beg));
3112 /* Don't bother processing the rest of the format string. */
3113 res->warned = true;
3114 res->range.min = HOST_WIDE_INT_M1U;
3115 res->range.max = HOST_WIDE_INT_M1U;
3116 return false;
3119 /* Compute the number of available bytes in the destination. There
3120 must always be at least one byte of space for the terminating
3121 NUL that's appended after the format string has been processed. */
3122 result_range avail_range = bytes_remaining (info.objsize, *res);
3124 /* If the argument aliases a part of the destination of the formatted
3125 call at offset FMTRES.DST_OFFSET append the directive and its result
3126 to the set of aliases for later processing. */
3127 if (fmtres.dst_offset != HOST_WIDE_INT_MIN)
3128 res->append_alias (dir, fmtres.dst_offset, fmtres.range);
3130 bool warned = res->warned;
3132 if (!warned)
3133 warned = maybe_warn (dirloc, argloc, info, avail_range,
3134 fmtres.range, dir);
3136 /* Bump up the total maximum if it isn't too big. */
3137 if (res->range.max < HOST_WIDE_INT_MAX
3138 && fmtres.range.max < HOST_WIDE_INT_MAX)
3139 res->range.max += fmtres.range.max;
3141 /* Raise the total unlikely maximum by the larger of the maximum
3142 and the unlikely maximum. */
3143 unsigned HOST_WIDE_INT save = res->range.unlikely;
3144 if (fmtres.range.max < fmtres.range.unlikely)
3145 res->range.unlikely += fmtres.range.unlikely;
3146 else
3147 res->range.unlikely += fmtres.range.max;
3149 if (res->range.unlikely < save)
3150 res->range.unlikely = HOST_WIDE_INT_M1U;
3152 res->range.min += fmtres.range.min;
3153 res->range.likely += fmtres.range.likely;
3155 /* Has the minimum directive output length exceeded the maximum
3156 of 4095 bytes required to be supported? */
3157 bool minunder4k = fmtres.range.min < 4096;
3158 bool maxunder4k = fmtres.range.max < 4096;
3159 /* Clear POSUNDER4K in the overall result if the maximum has exceeded
3160 the 4k (this is necessary to avoid the return value optimization
3161 that may not be safe in the maximum case). */
3162 if (!maxunder4k)
3163 res->posunder4k = false;
3164 /* Also clear POSUNDER4K if the directive may fail. */
3165 if (fmtres.mayfail)
3166 res->posunder4k = false;
3168 if (!warned
3169 /* Only warn at level 2. */
3170 && warn_level > 1
3171 /* Only warn for string functions. */
3172 && info.is_string_func ()
3173 && (!minunder4k
3174 || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
3176 /* The directive output may be longer than the maximum required
3177 to be handled by an implementation according to 7.21.6.1, p15
3178 of C11. Warn on this only at level 2 but remember this and
3179 prevent folding the return value when done. This allows for
3180 the possibility of the actual libc call failing due to ENOMEM
3181 (like Glibc does with very large precision or width).
3182 Issue the "may exceed" warning only for string functions and
3183 not for fprintf or printf. */
3185 if (fmtres.range.min == fmtres.range.max)
3186 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3187 "%<%.*s%> directive output of %wu bytes exceeds "
3188 "minimum required size of 4095", dirlen,
3189 target_to_host (hostdir, sizeof hostdir, dir.beg),
3190 fmtres.range.min);
3191 else if (!minunder4k)
3192 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3193 "%<%.*s%> directive output between %wu and %wu "
3194 "bytes exceeds minimum required size of 4095",
3195 dirlen,
3196 target_to_host (hostdir, sizeof hostdir, dir.beg),
3197 fmtres.range.min, fmtres.range.max);
3198 else if (!info.retval_used () && info.is_string_func ())
3199 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3200 "%<%.*s%> directive output between %wu and %wu "
3201 "bytes may exceed minimum required size of "
3202 "4095",
3203 dirlen,
3204 target_to_host (hostdir, sizeof hostdir, dir.beg),
3205 fmtres.range.min, fmtres.range.max);
3208 /* Has the likely and maximum directive output exceeded INT_MAX? */
3209 bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
3210 /* Don't consider the maximum to be in excess when it's the result
3211 of a string of unknown length (i.e., whose maximum has been set
3212 to be greater than or equal to HOST_WIDE_INT_MAX. */
3213 bool maxximax = (*dir.beg
3214 && res->range.max > target_int_max ()
3215 && res->range.max < HOST_WIDE_INT_MAX);
3217 if (!warned
3218 /* Warn for the likely output size at level 1. */
3219 && (likelyximax
3220 /* But only warn for the maximum at level 2. */
3221 || (warn_level > 1
3222 && maxximax
3223 && fmtres.range.max < HOST_WIDE_INT_MAX)))
3225 if (fmtres.range.min > target_int_max ())
3227 /* The directive output exceeds INT_MAX bytes. */
3228 if (fmtres.range.min == fmtres.range.max)
3229 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3230 "%<%.*s%> directive output of %wu bytes exceeds "
3231 "%<INT_MAX%>", dirlen,
3232 target_to_host (hostdir, sizeof hostdir, dir.beg),
3233 fmtres.range.min);
3234 else
3235 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3236 "%<%.*s%> directive output between %wu and "
3237 "%wu bytes exceeds %<INT_MAX%>", dirlen,
3238 target_to_host (hostdir, sizeof hostdir, dir.beg),
3239 fmtres.range.min, fmtres.range.max);
3241 else if (res->range.min > target_int_max ())
3243 /* The directive output is under INT_MAX but causes the result
3244 to exceed INT_MAX bytes. */
3245 if (fmtres.range.min == fmtres.range.max)
3246 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3247 "%<%.*s%> directive output of %wu bytes causes "
3248 "result to exceed %<INT_MAX%>", dirlen,
3249 target_to_host (hostdir, sizeof hostdir, dir.beg),
3250 fmtres.range.min);
3251 else
3252 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3253 "%<%.*s%> directive output between %wu and "
3254 "%wu bytes causes result to exceed %<INT_MAX%>",
3255 dirlen,
3256 target_to_host (hostdir, sizeof hostdir, dir.beg),
3257 fmtres.range.min, fmtres.range.max);
3259 else if ((!info.retval_used () || !info.bounded)
3260 && (info.is_string_func ()))
3261 /* Warn for calls to string functions that either aren't bounded
3262 (sprintf) or whose return value isn't used. */
3263 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3264 "%<%.*s%> directive output between %wu and "
3265 "%wu bytes may cause result to exceed "
3266 "%<INT_MAX%>", dirlen,
3267 target_to_host (hostdir, sizeof hostdir, dir.beg),
3268 fmtres.range.min, fmtres.range.max);
3271 if (!warned && fmtres.nonstr)
3273 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3274 "%<%.*s%> directive argument is not a nul-terminated "
3275 "string",
3276 dirlen,
3277 target_to_host (hostdir, sizeof hostdir, dir.beg));
3278 if (warned && DECL_P (fmtres.nonstr))
3279 inform (DECL_SOURCE_LOCATION (fmtres.nonstr),
3280 "referenced argument declared here");
3281 return false;
3284 if (warned && fmtres.range.min < fmtres.range.likely
3285 && fmtres.range.likely < fmtres.range.max)
3286 inform_n (info.fmtloc, fmtres.range.likely,
3287 "assuming directive output of %wu byte",
3288 "assuming directive output of %wu bytes",
3289 fmtres.range.likely);
3291 if (warned && fmtres.argmin)
3293 if (fmtres.argmin == fmtres.argmax)
3294 inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
3295 else if (fmtres.knownrange)
3296 inform (info.fmtloc, "directive argument in the range [%E, %E]",
3297 fmtres.argmin, fmtres.argmax);
3298 else
3299 inform (info.fmtloc,
3300 "using the range [%E, %E] for directive argument",
3301 fmtres.argmin, fmtres.argmax);
3304 res->warned |= warned;
3306 if (!dir.beg[0] && res->warned)
3308 location_t callloc = gimple_location (info.callstmt);
3310 unsigned HOST_WIDE_INT min = res->range.min;
3311 unsigned HOST_WIDE_INT max = res->range.max;
3313 if (info.objsize < HOST_WIDE_INT_MAX)
3315 /* If a warning has been issued for buffer overflow or truncation
3316 help the user figure out how big a buffer they need. */
3318 if (min == max)
3319 inform_n (callloc, min,
3320 "%qE output %wu byte into a destination of size %wu",
3321 "%qE output %wu bytes into a destination of size %wu",
3322 info.func, min, info.objsize);
3323 else if (max < HOST_WIDE_INT_MAX)
3324 inform (callloc,
3325 "%qE output between %wu and %wu bytes into "
3326 "a destination of size %wu",
3327 info.func, min, max, info.objsize);
3328 else if (min < res->range.likely && res->range.likely < max)
3329 inform (callloc,
3330 "%qE output %wu or more bytes (assuming %wu) into "
3331 "a destination of size %wu",
3332 info.func, min, res->range.likely, info.objsize);
3333 else
3334 inform (callloc,
3335 "%qE output %wu or more bytes into a destination of size "
3336 "%wu",
3337 info.func, min, info.objsize);
3339 else if (!info.is_string_func ())
3341 /* If the warning is for a file function like fprintf
3342 of printf with no destination size just print the computed
3343 result. */
3344 if (min == max)
3345 inform_n (callloc, min,
3346 "%qE output %wu byte", "%qE output %wu bytes",
3347 info.func, min);
3348 else if (max < HOST_WIDE_INT_MAX)
3349 inform (callloc,
3350 "%qE output between %wu and %wu bytes",
3351 info.func, min, max);
3352 else if (min < res->range.likely && res->range.likely < max)
3353 inform (callloc,
3354 "%qE output %wu or more bytes (assuming %wu)",
3355 info.func, min, res->range.likely);
3356 else
3357 inform (callloc,
3358 "%qE output %wu or more bytes",
3359 info.func, min);
3363 if (dump_file && *dir.beg)
3365 fprintf (dump_file,
3366 " Result: "
3367 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3368 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
3369 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3370 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3371 fmtres.range.min, fmtres.range.likely,
3372 fmtres.range.max, fmtres.range.unlikely,
3373 res->range.min, res->range.likely,
3374 res->range.max, res->range.unlikely);
3377 return true;
3380 /* Parse a format directive in function call described by INFO starting
3381 at STR and populate DIR structure. Bump up *ARGNO by the number of
3382 arguments extracted for the directive. Return the length of
3383 the directive. */
3385 static size_t
3386 parse_directive (call_info &info,
3387 directive &dir, format_result *res,
3388 const char *str, unsigned *argno,
3389 const vr_values *vr_values)
3391 const char *pcnt = strchr (str, target_percent);
3392 dir.beg = str;
3394 if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3396 /* This directive is either a plain string or the terminating nul
3397 (which isn't really a directive but it simplifies things to
3398 handle it as if it were). */
3399 dir.len = len;
3400 dir.fmtfunc = format_plain;
3402 if (dump_file)
3404 fprintf (dump_file, " Directive %u at offset "
3405 HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3406 "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3407 dir.dirno,
3408 (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3409 (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3412 return len - !*str;
3415 /* Set the directive argument's number to correspond to its position
3416 in the formatted function call's argument list. */
3417 dir.argno = *argno;
3419 const char *pf = pcnt + 1;
3421 /* POSIX numbered argument index or zero when none. */
3422 HOST_WIDE_INT dollar = 0;
3424 /* With and precision. -1 when not specified, HOST_WIDE_INT_MIN
3425 when given by a va_list argument, and a non-negative value
3426 when specified in the format string itself. */
3427 HOST_WIDE_INT width = -1;
3428 HOST_WIDE_INT precision = -1;
3430 /* Pointers to the beginning of the width and precision decimal
3431 string (if any) within the directive. */
3432 const char *pwidth = 0;
3433 const char *pprec = 0;
3435 /* When the value of the decimal string that specifies width or
3436 precision is out of range, points to the digit that causes
3437 the value to exceed the limit. */
3438 const char *werange = NULL;
3439 const char *perange = NULL;
3441 /* Width specified via the asterisk. Need not be INTEGER_CST.
3442 For vararg functions set to void_node. */
3443 tree star_width = NULL_TREE;
3445 /* Width specified via the asterisk. Need not be INTEGER_CST.
3446 For vararg functions set to void_node. */
3447 tree star_precision = NULL_TREE;
3449 if (ISDIGIT (target_to_host (*pf)))
3451 /* This could be either a POSIX positional argument, the '0'
3452 flag, or a width, depending on what follows. Store it as
3453 width and sort it out later after the next character has
3454 been seen. */
3455 pwidth = pf;
3456 width = target_strtowi (&pf, &werange);
3458 else if (target_to_host (*pf) == '*')
3460 /* Similarly to the block above, this could be either a POSIX
3461 positional argument or a width, depending on what follows. */
3462 if (*argno < gimple_call_num_args (info.callstmt))
3463 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3464 else
3465 star_width = void_node;
3466 ++pf;
3469 if (target_to_host (*pf) == '$')
3471 /* Handle the POSIX dollar sign which references the 1-based
3472 positional argument number. */
3473 if (width != -1)
3474 dollar = width + info.argidx;
3475 else if (star_width
3476 && TREE_CODE (star_width) == INTEGER_CST
3477 && (TYPE_PRECISION (TREE_TYPE (star_width))
3478 <= TYPE_PRECISION (integer_type_node)))
3479 dollar = width + tree_to_shwi (star_width);
3481 /* Bail when the numbered argument is out of range (it will
3482 have already been diagnosed by -Wformat). */
3483 if (dollar == 0
3484 || dollar == (int)info.argidx
3485 || dollar > gimple_call_num_args (info.callstmt))
3486 return false;
3488 --dollar;
3490 star_width = NULL_TREE;
3491 width = -1;
3492 ++pf;
3495 if (dollar || !star_width)
3497 if (width != -1)
3499 if (width == 0)
3501 /* The '0' that has been interpreted as a width above is
3502 actually a flag. Reset HAVE_WIDTH, set the '0' flag,
3503 and continue processing other flags. */
3504 width = -1;
3505 dir.set_flag ('0');
3507 else if (!dollar)
3509 /* (Non-zero) width has been seen. The next character
3510 is either a period or a digit. */
3511 goto start_precision;
3514 /* When either '$' has been seen, or width has not been seen,
3515 the next field is the optional flags followed by an optional
3516 width. */
3517 for ( ; ; ) {
3518 switch (target_to_host (*pf))
3520 case ' ':
3521 case '0':
3522 case '+':
3523 case '-':
3524 case '#':
3525 dir.set_flag (target_to_host (*pf++));
3526 break;
3528 default:
3529 goto start_width;
3533 start_width:
3534 if (ISDIGIT (target_to_host (*pf)))
3536 werange = 0;
3537 pwidth = pf;
3538 width = target_strtowi (&pf, &werange);
3540 else if (target_to_host (*pf) == '*')
3542 if (*argno < gimple_call_num_args (info.callstmt))
3543 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3544 else
3546 /* This is (likely) a va_list. It could also be an invalid
3547 call with insufficient arguments. */
3548 star_width = void_node;
3550 ++pf;
3552 else if (target_to_host (*pf) == '\'')
3554 /* The POSIX apostrophe indicating a numeric grouping
3555 in the current locale. Even though it's possible to
3556 estimate the upper bound on the size of the output
3557 based on the number of digits it probably isn't worth
3558 continuing. */
3559 return 0;
3563 start_precision:
3564 if (target_to_host (*pf) == '.')
3566 ++pf;
3568 if (ISDIGIT (target_to_host (*pf)))
3570 pprec = pf;
3571 precision = target_strtowi (&pf, &perange);
3573 else if (target_to_host (*pf) == '*')
3575 if (*argno < gimple_call_num_args (info.callstmt))
3576 star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3577 else
3579 /* This is (likely) a va_list. It could also be an invalid
3580 call with insufficient arguments. */
3581 star_precision = void_node;
3583 ++pf;
3585 else
3587 /* The decimal precision or the asterisk are optional.
3588 When neither is specified it's taken to be zero. */
3589 precision = 0;
3593 switch (target_to_host (*pf))
3595 case 'h':
3596 if (target_to_host (pf[1]) == 'h')
3598 ++pf;
3599 dir.modifier = FMT_LEN_hh;
3601 else
3602 dir.modifier = FMT_LEN_h;
3603 ++pf;
3604 break;
3606 case 'j':
3607 dir.modifier = FMT_LEN_j;
3608 ++pf;
3609 break;
3611 case 'L':
3612 dir.modifier = FMT_LEN_L;
3613 ++pf;
3614 break;
3616 case 'l':
3617 if (target_to_host (pf[1]) == 'l')
3619 ++pf;
3620 dir.modifier = FMT_LEN_ll;
3622 else
3623 dir.modifier = FMT_LEN_l;
3624 ++pf;
3625 break;
3627 case 't':
3628 dir.modifier = FMT_LEN_t;
3629 ++pf;
3630 break;
3632 case 'z':
3633 dir.modifier = FMT_LEN_z;
3634 ++pf;
3635 break;
3638 switch (target_to_host (*pf))
3640 /* Handle a sole '%' character the same as "%%" but since it's
3641 undefined prevent the result from being folded. */
3642 case '\0':
3643 --pf;
3644 res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3645 /* FALLTHRU */
3646 case '%':
3647 dir.fmtfunc = format_percent;
3648 break;
3650 case 'a':
3651 case 'A':
3652 case 'e':
3653 case 'E':
3654 case 'f':
3655 case 'F':
3656 case 'g':
3657 case 'G':
3658 res->floating = true;
3659 dir.fmtfunc = format_floating;
3660 break;
3662 case 'd':
3663 case 'i':
3664 case 'o':
3665 case 'u':
3666 case 'x':
3667 case 'X':
3668 dir.fmtfunc = format_integer;
3669 break;
3671 case 'p':
3672 /* The %p output is implementation-defined. It's possible
3673 to determine this format but due to extensions (especially
3674 those of the Linux kernel -- see bug 78512) the first %p
3675 in the format string disables any further processing. */
3676 return false;
3678 case 'n':
3679 /* %n has side-effects even when nothing is actually printed to
3680 any buffer. */
3681 info.nowrite = false;
3682 dir.fmtfunc = format_none;
3683 break;
3685 case 'C':
3686 case 'c':
3687 /* POSIX wide character and C/POSIX narrow character. */
3688 dir.fmtfunc = format_character;
3689 break;
3691 case 'S':
3692 case 's':
3693 /* POSIX wide string and C/POSIX narrow character string. */
3694 dir.fmtfunc = format_string;
3695 break;
3697 default:
3698 /* Unknown conversion specification. */
3699 return 0;
3702 dir.specifier = target_to_host (*pf++);
3704 /* Store the length of the format directive. */
3705 dir.len = pf - pcnt;
3707 /* Buffer for the directive in the host character set (used when
3708 the source character set is different). */
3709 char hostdir[32];
3711 if (star_width)
3713 if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3714 dir.set_width (star_width, vr_values);
3715 else
3717 /* Width specified by a va_list takes on the range [0, -INT_MIN]
3718 (width is the absolute value of that specified). */
3719 dir.width[0] = 0;
3720 dir.width[1] = target_int_max () + 1;
3723 else
3725 if (width == HOST_WIDE_INT_MAX && werange)
3727 size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3728 size_t caret = begin + (werange - pcnt);
3729 size_t end = pf - info.fmtstr - 1;
3731 /* Create a location for the width part of the directive,
3732 pointing the caret at the first out-of-range digit. */
3733 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3734 caret, begin, end);
3736 fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3737 "%<%.*s%> directive width out of range", (int) dir.len,
3738 target_to_host (hostdir, sizeof hostdir, dir.beg));
3741 dir.set_width (width);
3744 if (star_precision)
3746 if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3747 dir.set_precision (star_precision, vr_values);
3748 else
3750 /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3751 (unlike width, negative precision is ignored). */
3752 dir.prec[0] = -1;
3753 dir.prec[1] = target_int_max ();
3756 else
3758 if (precision == HOST_WIDE_INT_MAX && perange)
3760 size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3761 size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3762 size_t end = pf - info.fmtstr - 2;
3764 /* Create a location for the precision part of the directive,
3765 including the leading period, pointing the caret at the first
3766 out-of-range digit . */
3767 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3768 caret, begin, end);
3770 fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3771 "%<%.*s%> directive precision out of range", (int) dir.len,
3772 target_to_host (hostdir, sizeof hostdir, dir.beg));
3775 dir.set_precision (precision);
3778 /* Extract the argument if the directive takes one and if it's
3779 available (e.g., the function doesn't take a va_list). Treat
3780 missing arguments the same as va_list, even though they will
3781 have likely already been diagnosed by -Wformat. */
3782 if (dir.specifier != '%'
3783 && *argno < gimple_call_num_args (info.callstmt))
3784 dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3786 if (dump_file)
3788 fprintf (dump_file,
3789 " Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3790 ": \"%.*s\"",
3791 dir.dirno,
3792 (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3793 (int)dir.len, dir.beg);
3794 if (star_width)
3796 if (dir.width[0] == dir.width[1])
3797 fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3798 dir.width[0]);
3799 else
3800 fprintf (dump_file,
3801 ", width in range [" HOST_WIDE_INT_PRINT_DEC
3802 ", " HOST_WIDE_INT_PRINT_DEC "]",
3803 dir.width[0], dir.width[1]);
3806 if (star_precision)
3808 if (dir.prec[0] == dir.prec[1])
3809 fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3810 dir.prec[0]);
3811 else
3812 fprintf (dump_file,
3813 ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3814 HOST_WIDE_INT_PRINT_DEC "]",
3815 dir.prec[0], dir.prec[1]);
3817 fputc ('\n', dump_file);
3820 return dir.len;
3823 /* Diagnose overlap between destination and %s directive arguments. */
3825 static void
3826 maybe_warn_overlap (call_info &info, format_result *res)
3828 /* Two vectors of 1-based indices corresponding to either certainly
3829 or possibly aliasing arguments. */
3830 auto_vec<int, 16> aliasarg[2];
3832 /* Go through the array of potentially aliasing directives and collect
3833 argument numbers of those that do or may overlap the destination
3834 object given the full result. */
3835 for (unsigned i = 0; i != res->alias_count; ++i)
3837 const format_result::alias_info &alias = res->aliases[i];
3839 enum { possible = -1, none = 0, certain = 1 } overlap = none;
3841 /* If the precision is zero there is no overlap. (This only
3842 considers %s directives and ignores %n.) */
3843 if (alias.dir.prec[0] == 0 && alias.dir.prec[1] == 0)
3844 continue;
3846 if (alias.offset == HOST_WIDE_INT_MAX
3847 || info.dst_offset == HOST_WIDE_INT_MAX)
3848 overlap = possible;
3849 else if (alias.offset == info.dst_offset)
3850 overlap = alias.dir.prec[0] == 0 ? possible : certain;
3851 else
3853 /* Determine overlap from the range of output and offsets
3854 into the same destination as the source, and rule out
3855 impossible overlap. */
3856 unsigned HOST_WIDE_INT albeg = alias.offset;
3857 unsigned HOST_WIDE_INT dstbeg = info.dst_offset;
3859 unsigned HOST_WIDE_INT alend = albeg + alias.range.min;
3860 unsigned HOST_WIDE_INT dstend = dstbeg + res->range.min - 1;
3862 if ((albeg <= dstbeg && alend > dstbeg)
3863 || (albeg >= dstbeg && albeg < dstend))
3864 overlap = certain;
3865 else
3867 alend = albeg + alias.range.max;
3868 if (alend < albeg)
3869 alend = HOST_WIDE_INT_M1U;
3871 dstend = dstbeg + res->range.max - 1;
3872 if (dstend < dstbeg)
3873 dstend = HOST_WIDE_INT_M1U;
3875 if ((albeg >= dstbeg && albeg <= dstend)
3876 || (alend >= dstbeg && alend <= dstend))
3877 overlap = possible;
3881 if (overlap == none)
3882 continue;
3884 /* Append the 1-based argument number. */
3885 aliasarg[overlap != certain].safe_push (alias.dir.argno + 1);
3887 /* Disable any kind of optimization. */
3888 res->range.unlikely = HOST_WIDE_INT_M1U;
3891 tree arg0 = gimple_call_arg (info.callstmt, 0);
3892 location_t loc = gimple_location (info.callstmt);
3894 bool aliaswarn = false;
3896 unsigned ncertain = aliasarg[0].length ();
3897 unsigned npossible = aliasarg[1].length ();
3898 if (ncertain && npossible)
3900 /* If there are multiple arguments that overlap, some certainly
3901 and some possibly, handle both sets in a single diagnostic. */
3902 aliaswarn
3903 = warning_at (loc, OPT_Wrestrict,
3904 "%qE arguments %Z and maybe %Z overlap destination "
3905 "object %qE",
3906 info.func, aliasarg[0].address (), ncertain,
3907 aliasarg[1].address (), npossible,
3908 info.dst_origin);
3910 else if (ncertain)
3912 /* There is only one set of two or more arguments and they all
3913 certainly overlap the destination. */
3914 aliaswarn
3915 = warning_n (loc, OPT_Wrestrict, ncertain,
3916 "%qE argument %Z overlaps destination object %qE",
3917 "%qE arguments %Z overlap destination object %qE",
3918 info.func, aliasarg[0].address (), ncertain,
3919 info.dst_origin);
3921 else if (npossible)
3923 /* There is only one set of two or more arguments and they all
3924 may overlap (but need not). */
3925 aliaswarn
3926 = warning_n (loc, OPT_Wrestrict, npossible,
3927 "%qE argument %Z may overlap destination object %qE",
3928 "%qE arguments %Z may overlap destination object %qE",
3929 info.func, aliasarg[1].address (), npossible,
3930 info.dst_origin);
3933 if (aliaswarn)
3935 res->warned = true;
3937 if (info.dst_origin != arg0)
3939 /* If its location is different from the first argument of the call
3940 point either at the destination object itself or at the expression
3941 that was used to determine the overlap. */
3942 loc = (DECL_P (info.dst_origin)
3943 ? DECL_SOURCE_LOCATION (info.dst_origin)
3944 : EXPR_LOCATION (info.dst_origin));
3945 if (loc != UNKNOWN_LOCATION)
3946 inform (loc,
3947 "destination object referenced by %<restrict%>-qualified "
3948 "argument 1 was declared here");
3953 /* Compute the length of the output resulting from the call to a formatted
3954 output function described by INFO and store the result of the call in
3955 *RES. Issue warnings for detected past the end writes. Return true
3956 if the complete format string has been processed and *RES can be relied
3957 on, false otherwise (e.g., when a unknown or unhandled directive was seen
3958 that caused the processing to be terminated early). */
3960 static bool
3961 compute_format_length (call_info &info, format_result *res, const vr_values *vr)
3963 if (dump_file)
3965 location_t callloc = gimple_location (info.callstmt);
3966 fprintf (dump_file, "%s:%i: ",
3967 LOCATION_FILE (callloc), LOCATION_LINE (callloc));
3968 print_generic_expr (dump_file, info.func, dump_flags);
3970 fprintf (dump_file,
3971 ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
3972 ", fmtstr = \"%s\"\n",
3973 info.objsize, info.fmtstr);
3976 /* Reset the minimum and maximum byte counters. */
3977 res->range.min = res->range.max = 0;
3979 /* No directive has been seen yet so the length of output is bounded
3980 by the known range [0, 0] (with no conversion resulting in a failure
3981 or producing more than 4K bytes) until determined otherwise. */
3982 res->knownrange = true;
3983 res->floating = false;
3984 res->warned = false;
3986 /* 1-based directive counter. */
3987 unsigned dirno = 1;
3989 /* The variadic argument counter. */
3990 unsigned argno = info.argidx;
3992 bool success = true;
3994 for (const char *pf = info.fmtstr; ; ++dirno)
3996 directive dir (&info, dirno);
3998 size_t n = parse_directive (info, dir, res, pf, &argno, vr);
4000 /* Return failure if the format function fails. */
4001 if (!format_directive (info, res, dir, vr))
4002 return false;
4004 /* Return success when the directive is zero bytes long and it's
4005 the last thing in the format string (i.e., it's the terminating
4006 nul, which isn't really a directive but handling it as one makes
4007 things simpler). */
4008 if (!n)
4010 success = *pf == '\0';
4011 break;
4014 pf += n;
4017 maybe_warn_overlap (info, res);
4019 /* The complete format string was processed (with or without warnings). */
4020 return success;
4023 /* Return the size of the object referenced by the expression DEST if
4024 available, or the maximum possible size otherwise. */
4026 static unsigned HOST_WIDE_INT
4027 get_destination_size (tree dest)
4029 /* When there is no destination return the maximum. */
4030 if (!dest)
4031 return HOST_WIDE_INT_MAX;
4033 /* Initialize object size info before trying to compute it. */
4034 init_object_sizes ();
4036 /* Use __builtin_object_size to determine the size of the destination
4037 object. When optimizing, determine the smallest object (such as
4038 a member array as opposed to the whole enclosing object), otherwise
4039 use type-zero object size to determine the size of the enclosing
4040 object (the function fails without optimization in this type). */
4041 int ost = optimize > 0;
4042 unsigned HOST_WIDE_INT size;
4043 if (compute_builtin_object_size (dest, ost, &size))
4044 return size;
4046 return HOST_WIDE_INT_MAX;
4049 /* Return true if the call described by INFO with result RES safe to
4050 optimize (i.e., no undefined behavior), and set RETVAL to the range
4051 of its return values. */
4053 static bool
4054 is_call_safe (const call_info &info,
4055 const format_result &res, bool under4k,
4056 unsigned HOST_WIDE_INT retval[2])
4058 if (under4k && !res.posunder4k)
4059 return false;
4061 /* The minimum return value. */
4062 retval[0] = res.range.min;
4064 /* The maximum return value is in most cases bounded by RES.RANGE.MAX
4065 but in cases involving multibyte characters could be as large as
4066 RES.RANGE.UNLIKELY. */
4067 retval[1]
4068 = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
4070 /* Adjust the number of bytes which includes the terminating nul
4071 to reflect the return value of the function which does not.
4072 Because the valid range of the function is [INT_MIN, INT_MAX],
4073 a valid range before the adjustment below is [0, INT_MAX + 1]
4074 (the functions only return negative values on error or undefined
4075 behavior). */
4076 if (retval[0] <= target_int_max () + 1)
4077 --retval[0];
4078 if (retval[1] <= target_int_max () + 1)
4079 --retval[1];
4081 /* Avoid the return value optimization when the behavior of the call
4082 is undefined either because any directive may have produced 4K or
4083 more of output, or the return value exceeds INT_MAX, or because
4084 the output overflows the destination object (but leave it enabled
4085 when the function is bounded because then the behavior is well-
4086 defined). */
4087 if (retval[0] == retval[1]
4088 && (info.bounded || retval[0] < info.objsize)
4089 && retval[0] <= target_int_max ())
4090 return true;
4092 if ((info.bounded || retval[1] < info.objsize)
4093 && (retval[0] < target_int_max ()
4094 && retval[1] < target_int_max ()))
4095 return true;
4097 if (!under4k && (info.bounded || retval[0] < info.objsize))
4098 return true;
4100 return false;
4103 /* Given a suitable result RES of a call to a formatted output function
4104 described by INFO, substitute the result for the return value of
4105 the call. The result is suitable if the number of bytes it represents
4106 is known and exact. A result that isn't suitable for substitution may
4107 have its range set to the range of return values, if that is known.
4108 Return true if the call is removed and gsi_next should not be performed
4109 in the caller. */
4111 static bool
4112 try_substitute_return_value (gimple_stmt_iterator *gsi,
4113 const call_info &info,
4114 const format_result &res)
4116 tree lhs = gimple_get_lhs (info.callstmt);
4118 /* Set to true when the entire call has been removed. */
4119 bool removed = false;
4121 /* The minimum and maximum return value. */
4122 unsigned HOST_WIDE_INT retval[2] = {0};
4123 bool safe = is_call_safe (info, res, true, retval);
4125 if (safe
4126 && retval[0] == retval[1]
4127 /* Not prepared to handle possibly throwing calls here; they shouldn't
4128 appear in non-artificial testcases, except when the __*_chk routines
4129 are badly declared. */
4130 && !stmt_ends_bb_p (info.callstmt))
4132 tree cst = build_int_cst (lhs ? TREE_TYPE (lhs) : integer_type_node,
4133 retval[0]);
4135 if (lhs == NULL_TREE && info.nowrite)
4137 /* Remove the call to the bounded function with a zero size
4138 (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs. */
4139 unlink_stmt_vdef (info.callstmt);
4140 gsi_remove (gsi, true);
4141 removed = true;
4143 else if (info.nowrite)
4145 /* Replace the call to the bounded function with a zero size
4146 (e.g., snprintf(0, 0, "%i", 123) with the constant result
4147 of the function. */
4148 if (!update_call_from_tree (gsi, cst))
4149 gimplify_and_update_call_from_tree (gsi, cst);
4150 gimple *callstmt = gsi_stmt (*gsi);
4151 update_stmt (callstmt);
4153 else if (lhs)
4155 /* Replace the left-hand side of the call with the constant
4156 result of the formatted function. */
4157 gimple_call_set_lhs (info.callstmt, NULL_TREE);
4158 gimple *g = gimple_build_assign (lhs, cst);
4159 gsi_insert_after (gsi, g, GSI_NEW_STMT);
4160 update_stmt (info.callstmt);
4163 if (dump_file)
4165 if (removed)
4166 fprintf (dump_file, " Removing call statement.");
4167 else
4169 fprintf (dump_file, " Substituting ");
4170 print_generic_expr (dump_file, cst, dump_flags);
4171 fprintf (dump_file, " for %s.\n",
4172 info.nowrite ? "statement" : "return value");
4176 else if (lhs && types_compatible_p (TREE_TYPE (lhs), integer_type_node))
4178 bool setrange = false;
4180 if (safe
4181 && (info.bounded || retval[1] < info.objsize)
4182 && (retval[0] < target_int_max ()
4183 && retval[1] < target_int_max ()))
4185 /* If the result is in a valid range bounded by the size of
4186 the destination set it so that it can be used for subsequent
4187 optimizations. */
4188 int prec = TYPE_PRECISION (integer_type_node);
4190 wide_int min = wi::shwi (retval[0], prec);
4191 wide_int max = wi::shwi (retval[1], prec);
4192 set_range_info (lhs, VR_RANGE, min, max);
4194 setrange = true;
4197 if (dump_file)
4199 const char *inbounds
4200 = (retval[0] < info.objsize
4201 ? (retval[1] < info.objsize
4202 ? "in" : "potentially out-of")
4203 : "out-of");
4205 const char *what = setrange ? "Setting" : "Discarding";
4206 if (retval[0] != retval[1])
4207 fprintf (dump_file,
4208 " %s %s-bounds return value range ["
4209 HOST_WIDE_INT_PRINT_UNSIGNED ", "
4210 HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
4211 what, inbounds, retval[0], retval[1]);
4212 else
4213 fprintf (dump_file, " %s %s-bounds return value "
4214 HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
4215 what, inbounds, retval[0]);
4219 if (dump_file)
4220 fputc ('\n', dump_file);
4222 return removed;
4225 /* Try to simplify a s{,n}printf call described by INFO with result
4226 RES by replacing it with a simpler and presumably more efficient
4227 call (such as strcpy). */
4229 static bool
4230 try_simplify_call (gimple_stmt_iterator *gsi,
4231 const call_info &info,
4232 const format_result &res)
4234 unsigned HOST_WIDE_INT dummy[2];
4235 if (!is_call_safe (info, res, info.retval_used (), dummy))
4236 return false;
4238 switch (info.fncode)
4240 case BUILT_IN_SNPRINTF:
4241 return gimple_fold_builtin_snprintf (gsi);
4243 case BUILT_IN_SPRINTF:
4244 return gimple_fold_builtin_sprintf (gsi);
4246 default:
4250 return false;
4253 /* Return the zero-based index of the format string argument of a printf
4254 like function and set *IDX_ARGS to the first format argument. When
4255 no such index exists return UINT_MAX. */
4257 static unsigned
4258 get_user_idx_format (tree fndecl, unsigned *idx_args)
4260 tree attrs = lookup_attribute ("format", DECL_ATTRIBUTES (fndecl));
4261 if (!attrs)
4262 attrs = lookup_attribute ("format", TYPE_ATTRIBUTES (TREE_TYPE (fndecl)));
4264 if (!attrs)
4265 return UINT_MAX;
4267 attrs = TREE_VALUE (attrs);
4269 tree archetype = TREE_VALUE (attrs);
4270 if (strcmp ("printf", IDENTIFIER_POINTER (archetype)))
4271 return UINT_MAX;
4273 attrs = TREE_CHAIN (attrs);
4274 tree fmtarg = TREE_VALUE (attrs);
4276 attrs = TREE_CHAIN (attrs);
4277 tree elliparg = TREE_VALUE (attrs);
4279 /* Attribute argument indices are 1-based but we use zero-based. */
4280 *idx_args = tree_to_uhwi (elliparg) - 1;
4281 return tree_to_uhwi (fmtarg) - 1;
4284 } /* Unnamed namespace. */
4286 /* Determine if a GIMPLE call at *GSI is to one of the sprintf-like built-in
4287 functions and if so, handle it. Return true if the call is removed and
4288 gsi_next should not be performed in the caller. */
4290 bool
4291 handle_printf_call (gimple_stmt_iterator *gsi, const vr_values *vr_values)
4293 init_target_to_host_charmap ();
4295 call_info info = call_info ();
4297 info.callstmt = gsi_stmt (*gsi);
4298 info.func = gimple_call_fndecl (info.callstmt);
4299 if (!info.func)
4300 return false;
4302 /* Format string argument number (valid for all functions). */
4303 unsigned idx_format = UINT_MAX;
4304 if (gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4305 info.fncode = DECL_FUNCTION_CODE (info.func);
4306 else
4308 unsigned idx_args;
4309 idx_format = get_user_idx_format (info.func, &idx_args);
4310 if (idx_format == UINT_MAX
4311 || idx_format >= gimple_call_num_args (info.callstmt)
4312 || idx_args > gimple_call_num_args (info.callstmt)
4313 || !POINTER_TYPE_P (TREE_TYPE (gimple_call_arg (info.callstmt,
4314 idx_format))))
4315 return false;
4316 info.fncode = BUILT_IN_NONE;
4317 info.argidx = idx_args;
4320 /* The size of the destination as in snprintf(dest, size, ...). */
4321 unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
4323 /* The size of the destination determined by __builtin_object_size. */
4324 unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
4326 /* Zero-based buffer size argument number (snprintf and vsnprintf). */
4327 unsigned idx_dstsize = UINT_MAX;
4329 /* Object size argument number (snprintf_chk and vsnprintf_chk). */
4330 unsigned idx_objsize = UINT_MAX;
4332 /* Destinaton argument number (valid for sprintf functions only). */
4333 unsigned idx_dstptr = 0;
4335 switch (info.fncode)
4337 case BUILT_IN_NONE:
4338 // User-defined function with attribute format (printf).
4339 idx_dstptr = -1;
4340 break;
4342 case BUILT_IN_FPRINTF:
4343 // Signature:
4344 // __builtin_fprintf (FILE*, format, ...)
4345 idx_format = 1;
4346 info.argidx = 2;
4347 idx_dstptr = -1;
4348 break;
4350 case BUILT_IN_FPRINTF_CHK:
4351 // Signature:
4352 // __builtin_fprintf_chk (FILE*, ost, format, ...)
4353 idx_format = 2;
4354 info.argidx = 3;
4355 idx_dstptr = -1;
4356 break;
4358 case BUILT_IN_FPRINTF_UNLOCKED:
4359 // Signature:
4360 // __builtin_fprintf_unnlocked (FILE*, format, ...)
4361 idx_format = 1;
4362 info.argidx = 2;
4363 idx_dstptr = -1;
4364 break;
4366 case BUILT_IN_PRINTF:
4367 // Signature:
4368 // __builtin_printf (format, ...)
4369 idx_format = 0;
4370 info.argidx = 1;
4371 idx_dstptr = -1;
4372 break;
4374 case BUILT_IN_PRINTF_CHK:
4375 // Signature:
4376 // __builtin_printf_chk (ost, format, ...)
4377 idx_format = 1;
4378 info.argidx = 2;
4379 idx_dstptr = -1;
4380 break;
4382 case BUILT_IN_PRINTF_UNLOCKED:
4383 // Signature:
4384 // __builtin_printf (format, ...)
4385 idx_format = 0;
4386 info.argidx = 1;
4387 idx_dstptr = -1;
4388 break;
4390 case BUILT_IN_SPRINTF:
4391 // Signature:
4392 // __builtin_sprintf (dst, format, ...)
4393 idx_format = 1;
4394 info.argidx = 2;
4395 break;
4397 case BUILT_IN_SPRINTF_CHK:
4398 // Signature:
4399 // __builtin___sprintf_chk (dst, ost, objsize, format, ...)
4400 idx_objsize = 2;
4401 idx_format = 3;
4402 info.argidx = 4;
4403 break;
4405 case BUILT_IN_SNPRINTF:
4406 // Signature:
4407 // __builtin_snprintf (dst, size, format, ...)
4408 idx_dstsize = 1;
4409 idx_format = 2;
4410 info.argidx = 3;
4411 info.bounded = true;
4412 break;
4414 case BUILT_IN_SNPRINTF_CHK:
4415 // Signature:
4416 // __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
4417 idx_dstsize = 1;
4418 idx_objsize = 3;
4419 idx_format = 4;
4420 info.argidx = 5;
4421 info.bounded = true;
4422 break;
4424 case BUILT_IN_VFPRINTF:
4425 // Signature:
4426 // __builtin_vprintf (FILE*, format, va_list)
4427 idx_format = 1;
4428 info.argidx = -1;
4429 idx_dstptr = -1;
4430 break;
4432 case BUILT_IN_VFPRINTF_CHK:
4433 // Signature:
4434 // __builtin___vfprintf_chk (FILE*, ost, format, va_list)
4435 idx_format = 2;
4436 info.argidx = -1;
4437 idx_dstptr = -1;
4438 break;
4440 case BUILT_IN_VPRINTF:
4441 // Signature:
4442 // __builtin_vprintf (format, va_list)
4443 idx_format = 0;
4444 info.argidx = -1;
4445 idx_dstptr = -1;
4446 break;
4448 case BUILT_IN_VPRINTF_CHK:
4449 // Signature:
4450 // __builtin___vprintf_chk (ost, format, va_list)
4451 idx_format = 1;
4452 info.argidx = -1;
4453 idx_dstptr = -1;
4454 break;
4456 case BUILT_IN_VSNPRINTF:
4457 // Signature:
4458 // __builtin_vsprintf (dst, size, format, va)
4459 idx_dstsize = 1;
4460 idx_format = 2;
4461 info.argidx = -1;
4462 info.bounded = true;
4463 break;
4465 case BUILT_IN_VSNPRINTF_CHK:
4466 // Signature:
4467 // __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
4468 idx_dstsize = 1;
4469 idx_objsize = 3;
4470 idx_format = 4;
4471 info.argidx = -1;
4472 info.bounded = true;
4473 break;
4475 case BUILT_IN_VSPRINTF:
4476 // Signature:
4477 // __builtin_vsprintf (dst, format, va)
4478 idx_format = 1;
4479 info.argidx = -1;
4480 break;
4482 case BUILT_IN_VSPRINTF_CHK:
4483 // Signature:
4484 // __builtin___vsprintf_chk (dst, ost, objsize, format, va)
4485 idx_format = 3;
4486 idx_objsize = 2;
4487 info.argidx = -1;
4488 break;
4490 default:
4491 return false;
4494 /* Set the global warning level for this function. */
4495 warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
4497 /* For all string functions the first argument is a pointer to
4498 the destination. */
4499 tree dstptr = (idx_dstptr < gimple_call_num_args (info.callstmt)
4500 ? gimple_call_arg (info.callstmt, 0) : NULL_TREE);
4502 info.format = gimple_call_arg (info.callstmt, idx_format);
4504 /* True when the destination size is constant as opposed to the lower
4505 or upper bound of a range. */
4506 bool dstsize_cst_p = true;
4507 bool posunder4k = true;
4509 if (idx_dstsize == UINT_MAX)
4511 /* For non-bounded functions like sprintf, determine the size
4512 of the destination from the object or pointer passed to it
4513 as the first argument. */
4514 dstsize = get_destination_size (dstptr);
4516 else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
4518 /* For bounded functions try to get the size argument. */
4520 if (TREE_CODE (size) == INTEGER_CST)
4522 dstsize = tree_to_uhwi (size);
4523 /* No object can be larger than SIZE_MAX bytes (half the address
4524 space) on the target.
4525 The functions are defined only for output of at most INT_MAX
4526 bytes. Specifying a bound in excess of that limit effectively
4527 defeats the bounds checking (and on some implementations such
4528 as Solaris cause the function to fail with EINVAL). */
4529 if (dstsize > target_size_max () / 2)
4531 /* Avoid warning if -Wstringop-overflow is specified since
4532 it also warns for the same thing though only for the
4533 checking built-ins. */
4534 if ((idx_objsize == UINT_MAX
4535 || !warn_stringop_overflow))
4536 warning_at (gimple_location (info.callstmt), info.warnopt (),
4537 "specified bound %wu exceeds maximum object size "
4538 "%wu",
4539 dstsize, target_size_max () / 2);
4540 /* POSIX requires snprintf to fail if DSTSIZE is greater
4541 than INT_MAX. Even though not all POSIX implementations
4542 conform to the requirement, avoid folding in this case. */
4543 posunder4k = false;
4545 else if (dstsize > target_int_max ())
4547 warning_at (gimple_location (info.callstmt), info.warnopt (),
4548 "specified bound %wu exceeds %<INT_MAX%>",
4549 dstsize);
4550 /* POSIX requires snprintf to fail if DSTSIZE is greater
4551 than INT_MAX. Avoid folding in that case. */
4552 posunder4k = false;
4555 else if (TREE_CODE (size) == SSA_NAME)
4557 /* Try to determine the range of values of the argument
4558 and use the greater of the two at level 1 and the smaller
4559 of them at level 2. */
4560 const value_range_equiv *vr
4561 = CONST_CAST (class vr_values *, vr_values)->get_value_range (size);
4563 if (!vr->undefined_p () && !vr->symbolic_p ())
4565 tree type = TREE_TYPE (size);
4566 tree tmin = wide_int_to_tree (type, vr->lower_bound ());
4567 tree tmax = wide_int_to_tree (type, vr->upper_bound ());
4568 unsigned HOST_WIDE_INT minsize = TREE_INT_CST_LOW (tmin);
4569 unsigned HOST_WIDE_INT maxsize = TREE_INT_CST_LOW (tmax);
4570 dstsize = warn_level < 2 ? maxsize : minsize;
4572 if (minsize > target_int_max ())
4573 warning_at (gimple_location (info.callstmt), info.warnopt (),
4574 "specified bound range [%wu, %wu] exceeds "
4575 "%<INT_MAX%>",
4576 minsize, maxsize);
4578 /* POSIX requires snprintf to fail if DSTSIZE is greater
4579 than INT_MAX. Avoid folding if that's possible. */
4580 if (maxsize > target_int_max ())
4581 posunder4k = false;
4584 /* The destination size is not constant. If the function is
4585 bounded (e.g., snprintf) a lower bound of zero doesn't
4586 necessarily imply it can be eliminated. */
4587 dstsize_cst_p = false;
4591 if (idx_objsize != UINT_MAX)
4592 if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
4593 if (tree_fits_uhwi_p (size))
4594 objsize = tree_to_uhwi (size);
4596 if (info.bounded && !dstsize)
4598 /* As a special case, when the explicitly specified destination
4599 size argument (to a bounded function like snprintf) is zero
4600 it is a request to determine the number of bytes on output
4601 without actually producing any. Pretend the size is
4602 unlimited in this case. */
4603 info.objsize = HOST_WIDE_INT_MAX;
4604 info.nowrite = dstsize_cst_p;
4606 else
4608 /* For calls to non-bounded functions or to those of bounded
4609 functions with a non-zero size, warn if the destination
4610 pointer is null. */
4611 if (dstptr && integer_zerop (dstptr))
4613 /* This is diagnosed with -Wformat only when the null is a constant
4614 pointer. The warning here diagnoses instances where the pointer
4615 is not constant. */
4616 location_t loc = gimple_location (info.callstmt);
4617 warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
4618 info.warnopt (), "%Gnull destination pointer",
4619 info.callstmt);
4620 return false;
4623 /* Set the object size to the smaller of the two arguments
4624 of both have been specified and they're not equal. */
4625 info.objsize = dstsize < objsize ? dstsize : objsize;
4627 if (info.bounded
4628 && dstsize < target_size_max () / 2 && objsize < dstsize
4629 /* Avoid warning if -Wstringop-overflow is specified since
4630 it also warns for the same thing though only for the
4631 checking built-ins. */
4632 && (idx_objsize == UINT_MAX
4633 || !warn_stringop_overflow))
4635 warning_at (gimple_location (info.callstmt), info.warnopt (),
4636 "specified bound %wu exceeds the size %wu "
4637 "of the destination object", dstsize, objsize);
4641 /* Determine if the format argument may be null and warn if not
4642 and if the argument is null. */
4643 if (integer_zerop (info.format)
4644 && gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
4646 location_t loc = gimple_location (info.callstmt);
4647 warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4648 info.warnopt (), "%Gnull format string",
4649 info.callstmt);
4650 return false;
4653 info.fmtstr = get_format_string (info.format, &info.fmtloc);
4654 if (!info.fmtstr)
4655 return false;
4657 if (warn_restrict)
4659 /* Compute the origin of the destination pointer and its offset
4660 from the base object/pointer if possible. */
4661 info.dst_offset = 0;
4662 info.dst_origin = get_origin_and_offset (dstptr, &info.dst_field,
4663 &info.dst_offset);
4666 /* The result is the number of bytes output by the formatted function,
4667 including the terminating NUL. */
4668 format_result res;
4670 /* I/O functions with no destination argument (i.e., all forms of fprintf
4671 and printf) may fail under any conditions. Others (i.e., all forms of
4672 sprintf) may only fail under specific conditions determined for each
4673 directive. Clear POSUNDER4K for the former set of functions and set
4674 it to true for the latter (it can only be cleared later, but it is
4675 never set to true again). */
4676 res.posunder4k = posunder4k && dstptr;
4678 bool success = compute_format_length (info, &res, vr_values);
4679 if (res.warned)
4680 gimple_set_no_warning (info.callstmt, true);
4682 /* When optimizing and the printf return value optimization is enabled,
4683 attempt to substitute the computed result for the return value of
4684 the call. Avoid this optimization when -frounding-math is in effect
4685 and the format string contains a floating point directive. */
4686 bool call_removed = false;
4687 if (success && optimize > 0)
4689 /* Save a copy of the iterator pointing at the call. The iterator
4690 may change to point past the call in try_substitute_return_value
4691 but the original value is needed in try_simplify_call. */
4692 gimple_stmt_iterator gsi_call = *gsi;
4694 if (flag_printf_return_value
4695 && (!flag_rounding_math || !res.floating))
4696 call_removed = try_substitute_return_value (gsi, info, res);
4698 if (!call_removed)
4699 try_simplify_call (&gsi_call, info, res);
4702 return call_removed;