libgo: add misc/cgo files
[official-gcc.git] / gcc / gimple-ssa-sprintf.c
blobf43778bbcfcd6ddf51ec9f8fc6c54f4fefe27bf4
1 /* Copyright (C) 2016-2017 Free Software Foundation, Inc.
2 Contributed by Martin Sebor <msebor@redhat.com>.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the printf-return-value pass. The pass does
21 two things: 1) it analyzes calls to formatted output functions like
22 sprintf looking for possible buffer overflows and calls to bounded
23 functions like snprintf for early truncation (and under the control
24 of the -Wformat-length option issues warnings), and 2) under the
25 control of the -fprintf-return-value option it folds the return
26 value of safe calls into constants, making it possible to eliminate
27 code that depends on the value of those constants.
29 For all functions (bounded or not) the pass uses the size of the
30 destination object. That means that it will diagnose calls to
31 snprintf not on the basis of the size specified by the function's
32 second argument but rathger on the basis of the size the first
33 argument points to (if possible). For bound-checking built-ins
34 like __builtin___snprintf_chk the pass uses the size typically
35 determined by __builtin_object_size and passed to the built-in
36 by the Glibc inline wrapper.
38 The pass handles all forms standard sprintf format directives,
39 including character, integer, floating point, pointer, and strings,
40 with the standard C flags, widths, and precisions. For integers
41 and strings it computes the length of output itself. For floating
42 point it uses MPFR to fornmat known constants with up and down
43 rounding and uses the resulting range of output lengths. For
44 strings it uses the length of string literals and the sizes of
45 character arrays that a character pointer may point to as a bound
46 on the longest string. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "params.h"
64 #include "tree-cfg.h"
65 #include "tree-ssa-propagate.h"
66 #include "calls.h"
67 #include "cfgloop.h"
68 #include "intl.h"
69 #include "langhooks.h"
71 #include "builtins.h"
72 #include "stor-layout.h"
74 #include "realmpfr.h"
75 #include "target.h"
77 #include "cpplib.h"
78 #include "input.h"
79 #include "toplev.h"
80 #include "substring-locations.h"
81 #include "diagnostic.h"
83 /* The likely worst case value of MB_LEN_MAX for the target, large enough
84 for UTF-8. Ideally, this would be obtained by a target hook if it were
85 to be used for optimization but it's good enough as is for warnings. */
86 #define target_mb_len_max() 6
88 /* The maximum number of bytes a single non-string directive can result
89 in. This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
90 LDBL_MAX_10_EXP of 4932. */
91 #define IEEE_MAX_10_EXP 4932
92 #define target_dir_max() (target_int_max () + IEEE_MAX_10_EXP + 2)
94 namespace {
96 const pass_data pass_data_sprintf_length = {
97 GIMPLE_PASS, // pass type
98 "printf-return-value", // pass name
99 OPTGROUP_NONE, // optinfo_flags
100 TV_NONE, // tv_id
101 PROP_cfg, // properties_required
102 0, // properties_provided
103 0, // properties_destroyed
104 0, // properties_start
105 0, // properties_finish
108 /* Set to the warning level for the current function which is equal
109 either to warn_format_trunc for bounded functions or to
110 warn_format_overflow otherwise. */
112 static int warn_level;
114 struct format_result;
116 class pass_sprintf_length : public gimple_opt_pass
118 bool fold_return_value;
120 public:
121 pass_sprintf_length (gcc::context *ctxt)
122 : gimple_opt_pass (pass_data_sprintf_length, ctxt),
123 fold_return_value (false)
126 opt_pass * clone () { return new pass_sprintf_length (m_ctxt); }
128 virtual bool gate (function *);
130 virtual unsigned int execute (function *);
132 void set_pass_param (unsigned int n, bool param)
134 gcc_assert (n == 0);
135 fold_return_value = param;
138 bool handle_gimple_call (gimple_stmt_iterator *);
140 struct call_info;
141 bool compute_format_length (call_info &, format_result *);
144 bool
145 pass_sprintf_length::gate (function *)
147 /* Run the pass iff -Warn-format-overflow or -Warn-format-truncation
148 is specified and either not optimizing and the pass is being invoked
149 early, or when optimizing and the pass is being invoked during
150 optimization (i.e., "late"). */
151 return ((warn_format_overflow > 0
152 || warn_format_trunc > 0
153 || flag_printf_return_value)
154 && (optimize > 0) == fold_return_value);
157 /* The minimum, maximum, likely, and unlikely maximum number of bytes
158 of output either a formatting function or an individual directive
159 can result in. */
161 struct result_range
163 /* The absolute minimum number of bytes. The result of a successful
164 conversion is guaranteed to be no less than this. (An erroneous
165 conversion can be indicated by MIN > HOST_WIDE_INT_MAX.) */
166 unsigned HOST_WIDE_INT min;
167 /* The likely maximum result that is used in diagnostics. In most
168 cases MAX is the same as the worst case UNLIKELY result. */
169 unsigned HOST_WIDE_INT max;
170 /* The likely result used to trigger diagnostics. For conversions
171 that result in a range of bytes [MIN, MAX], LIKELY is somewhere
172 in that range. */
173 unsigned HOST_WIDE_INT likely;
174 /* In rare cases (e.g., for nultibyte characters) UNLIKELY gives
175 the worst cases maximum result of a directive. In most cases
176 UNLIKELY == MAX. UNLIKELY is used to control the return value
177 optimization but not in diagnostics. */
178 unsigned HOST_WIDE_INT unlikely;
181 /* The result of a call to a formatted function. */
183 struct format_result
185 /* Range of characters written by the formatted function.
186 Setting the minimum to HOST_WIDE_INT_MAX disables all
187 length tracking for the remainder of the format string. */
188 result_range range;
190 /* True when the range above is obtained from known values of
191 directive arguments, or bounds on the amount of output such
192 as width and precision, and not the result of heuristics that
193 depend on warning levels. It's used to issue stricter diagnostics
194 in cases where strings of unknown lengths are bounded by the arrays
195 they are determined to refer to. KNOWNRANGE must not be used for
196 the return value optimization. */
197 bool knownrange;
199 /* True if no individual directive resulted in more than 4095 bytes
200 of output (the total NUMBER_CHARS_{MIN,MAX} might be greater).
201 Implementations are not required to handle directives that produce
202 more than 4K bytes (leading to undefined behavior) and so when one
203 is found it disables the return value optimization. */
204 bool under4k;
206 /* True when a floating point directive has been seen in the format
207 string. */
208 bool floating;
210 /* True when an intermediate result has caused a warning. Used to
211 avoid issuing duplicate warnings while finishing the processing
212 of a call. WARNED also disables the return value optimization. */
213 bool warned;
215 /* Preincrement the number of output characters by 1. */
216 format_result& operator++ ()
218 return *this += 1;
221 /* Postincrement the number of output characters by 1. */
222 format_result operator++ (int)
224 format_result prev (*this);
225 *this += 1;
226 return prev;
229 /* Increment the number of output characters by N. */
230 format_result& operator+= (unsigned HOST_WIDE_INT);
233 format_result&
234 format_result::operator+= (unsigned HOST_WIDE_INT n)
236 gcc_assert (n < HOST_WIDE_INT_MAX);
238 if (range.min < HOST_WIDE_INT_MAX)
239 range.min += n;
241 if (range.max < HOST_WIDE_INT_MAX)
242 range.max += n;
244 if (range.likely < HOST_WIDE_INT_MAX)
245 range.likely += n;
247 if (range.unlikely < HOST_WIDE_INT_MAX)
248 range.unlikely += n;
250 return *this;
253 /* Return the value of INT_MIN for the target. */
255 static inline HOST_WIDE_INT
256 target_int_min ()
258 return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
261 /* Return the value of INT_MAX for the target. */
263 static inline unsigned HOST_WIDE_INT
264 target_int_max ()
266 return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
269 /* Return the value of SIZE_MAX for the target. */
271 static inline unsigned HOST_WIDE_INT
272 target_size_max ()
274 return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
277 /* A straightforward mapping from the execution character set to the host
278 character set indexed by execution character. */
280 static char target_to_host_charmap[256];
282 /* Initialize a mapping from the execution character set to the host
283 character set. */
285 static bool
286 init_target_to_host_charmap ()
288 /* If the percent sign is non-zero the mapping has already been
289 initialized. */
290 if (target_to_host_charmap['%'])
291 return true;
293 /* Initialize the target_percent character (done elsewhere). */
294 if (!init_target_chars ())
295 return false;
297 /* The subset of the source character set used by printf conversion
298 specifications (strictly speaking, not all letters are used but
299 they are included here for the sake of simplicity). The dollar
300 sign must be included even though it's not in the basic source
301 character set. */
302 const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
303 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
305 /* Set the mapping for all characters to some ordinary value (i,e.,
306 not none used in printf conversion specifications) and overwrite
307 those that are used by conversion specifications with their
308 corresponding values. */
309 memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
311 /* Are the two sets of characters the same? */
312 bool all_same_p = true;
314 for (const char *pc = srcset; *pc; ++pc)
316 /* Slice off the high end bits in case target characters are
317 signed. All values are expected to be non-nul, otherwise
318 there's a problem. */
319 if (unsigned char tc = lang_hooks.to_target_charset (*pc))
321 target_to_host_charmap[tc] = *pc;
322 if (tc != *pc)
323 all_same_p = false;
325 else
326 return false;
330 /* Set the first element to a non-zero value if the mapping
331 is 1-to-1, otherwise leave it clear (NUL is assumed to be
332 the same in both character sets). */
333 target_to_host_charmap[0] = all_same_p;
335 return true;
338 /* Return the host source character corresponding to the character
339 CH in the execution character set if one exists, or some innocuous
340 (non-special, non-nul) source character otherwise. */
342 static inline unsigned char
343 target_to_host (unsigned char ch)
345 return target_to_host_charmap[ch];
348 /* Convert an initial substring of the string TARGSTR consisting of
349 characters in the execution character set into a string in the
350 source character set on the host and store up to HOSTSZ characters
351 in the buffer pointed to by HOSTR. Return HOSTR. */
353 static const char*
354 target_to_host (char *hostr, size_t hostsz, const char *targstr)
356 /* Make sure the buffer is reasonably big. */
357 gcc_assert (hostsz > 4);
359 /* The interesting subset of source and execution characters are
360 the same so no conversion is necessary. However, truncate
361 overlong strings just like the translated strings are. */
362 if (target_to_host_charmap['\0'] == 1)
364 strncpy (hostr, targstr, hostsz - 4);
365 if (strlen (targstr) >= hostsz)
366 strcpy (hostr + hostsz - 4, "...");
367 return hostr;
370 /* Convert the initial substring of TARGSTR to the corresponding
371 characters in the host set, appending "..." if TARGSTR is too
372 long to fit. Using the static buffer assumes the function is
373 not called in between sequence points (which it isn't). */
374 for (char *ph = hostr; ; ++targstr)
376 *ph++ = target_to_host (*targstr);
377 if (!*targstr)
378 break;
380 if (size_t (ph - hostr) == hostsz - 4)
382 *ph = '\0';
383 strcat (ph, "...");
384 break;
388 return hostr;
391 /* Convert the sequence of decimal digits in the execution character
392 starting at S to a long, just like strtol does. Return the result
393 and set *END to one past the last converted character. On range
394 error set ERANGE to the digit that caused it. */
396 static inline long
397 target_strtol10 (const char **ps, const char **erange)
399 unsigned HOST_WIDE_INT val = 0;
400 for ( ; ; ++*ps)
402 unsigned char c = target_to_host (**ps);
403 if (ISDIGIT (c))
405 c -= '0';
407 /* Check for overflow. */
408 if (val > (LONG_MAX - c) / 10LU)
410 val = LONG_MAX;
411 *erange = *ps;
413 /* Skip the remaining digits. */
415 c = target_to_host (*++*ps);
416 while (ISDIGIT (c));
417 break;
419 else
420 val = val * 10 + c;
422 else
423 break;
426 return val;
429 /* Return the constant initial value of DECL if available or DECL
430 otherwise. Same as the synonymous function in c/c-typeck.c. */
432 static tree
433 decl_constant_value (tree decl)
435 if (/* Don't change a variable array bound or initial value to a constant
436 in a place where a variable is invalid. Note that DECL_INITIAL
437 isn't valid for a PARM_DECL. */
438 current_function_decl != 0
439 && TREE_CODE (decl) != PARM_DECL
440 && !TREE_THIS_VOLATILE (decl)
441 && TREE_READONLY (decl)
442 && DECL_INITIAL (decl) != 0
443 && TREE_CODE (DECL_INITIAL (decl)) != ERROR_MARK
444 /* This is invalid if initial value is not constant.
445 If it has either a function call, a memory reference,
446 or a variable, then re-evaluating it could give different results. */
447 && TREE_CONSTANT (DECL_INITIAL (decl))
448 /* Check for cases where this is sub-optimal, even though valid. */
449 && TREE_CODE (DECL_INITIAL (decl)) != CONSTRUCTOR)
450 return DECL_INITIAL (decl);
451 return decl;
454 /* Given FORMAT, set *PLOC to the source location of the format string
455 and return the format string if it is known or null otherwise. */
457 static const char*
458 get_format_string (tree format, location_t *ploc)
460 if (VAR_P (format))
462 /* Pull out a constant value if the front end didn't. */
463 format = decl_constant_value (format);
464 STRIP_NOPS (format);
467 if (integer_zerop (format))
469 /* FIXME: Diagnose null format string if it hasn't been diagnosed
470 by -Wformat (the latter diagnoses only nul pointer constants,
471 this pass can do better). */
472 return NULL;
475 HOST_WIDE_INT offset = 0;
477 if (TREE_CODE (format) == POINTER_PLUS_EXPR)
479 tree arg0 = TREE_OPERAND (format, 0);
480 tree arg1 = TREE_OPERAND (format, 1);
481 STRIP_NOPS (arg0);
482 STRIP_NOPS (arg1);
484 if (TREE_CODE (arg1) != INTEGER_CST)
485 return NULL;
487 format = arg0;
489 /* POINTER_PLUS_EXPR offsets are to be interpreted signed. */
490 if (!cst_and_fits_in_hwi (arg1))
491 return NULL;
493 offset = int_cst_value (arg1);
496 if (TREE_CODE (format) != ADDR_EXPR)
497 return NULL;
499 *ploc = EXPR_LOC_OR_LOC (format, input_location);
501 format = TREE_OPERAND (format, 0);
503 if (TREE_CODE (format) == ARRAY_REF
504 && tree_fits_shwi_p (TREE_OPERAND (format, 1))
505 && (offset += tree_to_shwi (TREE_OPERAND (format, 1))) >= 0)
506 format = TREE_OPERAND (format, 0);
508 if (offset < 0)
509 return NULL;
511 tree array_init;
512 tree array_size = NULL_TREE;
514 if (VAR_P (format)
515 && TREE_CODE (TREE_TYPE (format)) == ARRAY_TYPE
516 && (array_init = decl_constant_value (format)) != format
517 && TREE_CODE (array_init) == STRING_CST)
519 /* Extract the string constant initializer. Note that this may
520 include a trailing NUL character that is not in the array (e.g.
521 const char a[3] = "foo";). */
522 array_size = DECL_SIZE_UNIT (format);
523 format = array_init;
526 if (TREE_CODE (format) != STRING_CST)
527 return NULL;
529 tree type = TREE_TYPE (format);
531 if (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (type))) != MODE_INT
532 || GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))) != 1)
534 /* Wide format string. */
535 return NULL;
538 const char *fmtstr = TREE_STRING_POINTER (format);
539 unsigned fmtlen = TREE_STRING_LENGTH (format);
541 if (array_size)
543 /* Variable length arrays can't be initialized. */
544 gcc_assert (TREE_CODE (array_size) == INTEGER_CST);
546 if (tree_fits_shwi_p (array_size))
548 HOST_WIDE_INT array_size_value = tree_to_shwi (array_size);
549 if (array_size_value > 0
550 && array_size_value == (int) array_size_value
551 && fmtlen > array_size_value)
552 fmtlen = array_size_value;
555 if (offset)
557 if (offset >= fmtlen)
558 return NULL;
560 fmtstr += offset;
561 fmtlen -= offset;
564 if (fmtlen < 1 || fmtstr[--fmtlen] != 0)
566 /* FIXME: Diagnose an unterminated format string if it hasn't been
567 diagnosed by -Wformat. Similarly to a null format pointer,
568 -Wformay diagnoses only nul pointer constants, this pass can
569 do better). */
570 return NULL;
573 return fmtstr;
576 /* The format_warning_at_substring function is not used here in a way
577 that makes using attribute format viable. Suppress the warning. */
579 #pragma GCC diagnostic push
580 #pragma GCC diagnostic ignored "-Wsuggest-attribute=format"
582 /* For convenience and brevity. */
584 static bool
585 (* const fmtwarn) (const substring_loc &, const source_range *,
586 const char *, int, const char *, ...)
587 = format_warning_at_substring;
589 /* Format length modifiers. */
591 enum format_lengths
593 FMT_LEN_none,
594 FMT_LEN_hh, // char argument
595 FMT_LEN_h, // short
596 FMT_LEN_l, // long
597 FMT_LEN_ll, // long long
598 FMT_LEN_L, // long double (and GNU long long)
599 FMT_LEN_z, // size_t
600 FMT_LEN_t, // ptrdiff_t
601 FMT_LEN_j // intmax_t
605 /* Description of the result of conversion either of a single directive
606 or the whole format string. */
608 struct fmtresult
610 /* Construct a FMTRESULT object with all counters initialized
611 to MIN. KNOWNRANGE is set when MIN is valid. */
612 fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
613 : argmin (), argmax (),
614 knownrange (min < HOST_WIDE_INT_MAX),
615 nullp ()
617 range.min = min;
618 range.max = min;
619 range.likely = min;
620 range.unlikely = min;
623 /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
624 KNOWNRANGE is set when both MIN and MAX are valid. */
625 fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
626 unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
627 : argmin (), argmax (),
628 knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
629 nullp ()
631 range.min = min;
632 range.max = max;
633 range.likely = max < likely ? min : likely;
634 range.unlikely = max;
637 /* Adjust result upward to reflect the RANGE of values the specified
638 width or precision is known to be in. */
639 fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
640 tree = NULL_TREE,
641 unsigned = 0, unsigned = 0);
643 /* Return the maximum number of decimal digits a value of TYPE
644 formats as on output. */
645 static unsigned type_max_digits (tree, int);
647 /* The range a directive's argument is in. */
648 tree argmin, argmax;
650 /* The minimum and maximum number of bytes that a directive
651 results in on output for an argument in the range above. */
652 result_range range;
654 /* True when the range above is obtained from a known value of
655 a directive's argument or its bounds and not the result of
656 heuristics that depend on warning levels. */
657 bool knownrange;
659 /* True when the argument is a null pointer. */
660 bool nullp;
663 /* Adjust result upward to reflect the range ADJUST of values the
664 specified width or precision is known to be in. When non-null,
665 TYPE denotes the type of the directive whose result is being
666 adjusted, BASE gives the base of the directive (octal, decimal,
667 or hex), and ADJ denotes the additional adjustment to the LIKELY
668 counter that may need to be added when ADJUST is a range. */
670 fmtresult&
671 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
672 tree type /* = NULL_TREE */,
673 unsigned base /* = 0 */,
674 unsigned adj /* = 0 */)
676 bool minadjusted = false;
678 /* Adjust the minimum and likely counters. */
679 if (adjust[0] >= 0)
681 if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
683 range.min = adjust[0];
684 minadjusted = true;
687 /* Adjust the likely counter. */
688 if (range.likely < range.min)
689 range.likely = range.min;
691 else if (adjust[0] == target_int_min ()
692 && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
693 knownrange = false;
695 /* Adjust the maximum counter. */
696 if (adjust[1] > 0)
698 if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
700 range.max = adjust[1];
702 /* Set KNOWNRANGE if both the minimum and maximum have been
703 adjusted. Otherwise leave it at what it was before. */
704 knownrange = minadjusted;
708 if (warn_level > 1 && type)
710 /* For large non-constant width or precision whose range spans
711 the maximum number of digits produced by the directive for
712 any argument, set the likely number of bytes to be at most
713 the number digits plus other adjustment determined by the
714 caller (one for sign or two for the hexadecimal "0x"
715 prefix). */
716 unsigned dirdigs = type_max_digits (type, base);
717 if (adjust[0] < dirdigs && dirdigs < adjust[1]
718 && range.likely < dirdigs)
719 range.likely = dirdigs + adj;
721 else if (range.likely < (range.min ? range.min : 1))
723 /* Conservatively, set LIKELY to at least MIN but no less than
724 1 unless MAX is zero. */
725 range.likely = (range.min
726 ? range.min
727 : range.max && (range.max < HOST_WIDE_INT_MAX
728 || warn_level > 1) ? 1 : 0);
731 /* Finally adjust the unlikely counter to be at least as large as
732 the maximum. */
733 if (range.unlikely < range.max)
734 range.unlikely = range.max;
736 return *this;
739 /* Return the maximum number of digits a value of TYPE formats in
740 BASE on output, not counting base prefix . */
742 unsigned
743 fmtresult::type_max_digits (tree type, int base)
745 unsigned prec = TYPE_PRECISION (type);
746 if (base == 8)
747 return (prec + 2) / 3;
749 if (base == 16)
750 return prec / 4;
752 /* Decimal approximation: yields 3, 5, 10, and 20 for precision
753 of 8, 16, 32, and 64 bits. */
754 return prec * 301 / 1000 + 1;
757 static bool
758 get_int_range (tree, HOST_WIDE_INT *, HOST_WIDE_INT *, bool, HOST_WIDE_INT);
760 /* Description of a format directive. A directive is either a plain
761 string or a conversion specification that starts with '%'. */
763 struct directive
765 /* The 1-based directive number (for debugging). */
766 unsigned dirno;
768 /* The first character of the directive and its length. */
769 const char *beg;
770 size_t len;
772 /* A bitmap of flags, one for each character. */
773 unsigned flags[256 / sizeof (int)];
775 /* The range of values of the specified width, or -1 if not specified. */
776 HOST_WIDE_INT width[2];
777 /* The range of values of the specified precision, or -1 if not
778 specified. */
779 HOST_WIDE_INT prec[2];
781 /* Length modifier. */
782 format_lengths modifier;
784 /* Format specifier character. */
785 char specifier;
787 /* The argument of the directive or null when the directive doesn't
788 take one or when none is available (such as for vararg functions). */
789 tree arg;
791 /* Format conversion function that given a directive and an argument
792 returns the formatting result. */
793 fmtresult (*fmtfunc) (const directive &, tree);
795 /* Return True when a the format flag CHR has been used. */
796 bool get_flag (char chr) const
798 unsigned char c = chr & 0xff;
799 return (flags[c / (CHAR_BIT * sizeof *flags)]
800 & (1U << (c % (CHAR_BIT * sizeof *flags))));
803 /* Make a record of the format flag CHR having been used. */
804 void set_flag (char chr)
806 unsigned char c = chr & 0xff;
807 flags[c / (CHAR_BIT * sizeof *flags)]
808 |= (1U << (c % (CHAR_BIT * sizeof *flags)));
811 /* Reset the format flag CHR. */
812 void clear_flag (char chr)
814 unsigned char c = chr & 0xff;
815 flags[c / (CHAR_BIT * sizeof *flags)]
816 &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
819 /* Set both bounds of the width range to VAL. */
820 void set_width (HOST_WIDE_INT val)
822 width[0] = width[1] = val;
825 /* Set the width range according to ARG, with both bounds being
826 no less than 0. For a constant ARG set both bounds to its value
827 or 0, whichever is greater. For a non-constant ARG in some range
828 set width to its range adjusting each bound to -1 if it's less.
829 For an indeterminate ARG set width to [0, INT_MAX]. */
830 void set_width (tree arg)
832 get_int_range (arg, width, width + 1, true, 0);
835 /* Set both bounds of the precision range to VAL. */
836 void set_precision (HOST_WIDE_INT val)
838 prec[0] = prec[1] = val;
841 /* Set the precision range according to ARG, with both bounds being
842 no less than -1. For a constant ARG set both bounds to its value
843 or -1 whichever is greater. For a non-constant ARG in some range
844 set precision to its range adjusting each bound to -1 if it's less.
845 For an indeterminate ARG set precision to [-1, INT_MAX]. */
846 void set_precision (tree arg)
848 get_int_range (arg, prec, prec + 1, false, -1);
851 /* Return true if both width and precision are known to be
852 either constant or in some range, false otherwise. */
853 bool known_width_and_precision () const
855 return ((width[1] < 0
856 || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
857 && (prec[1] < 0
858 || (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
862 /* Return the logarithm of X in BASE. */
864 static int
865 ilog (unsigned HOST_WIDE_INT x, int base)
867 int res = 0;
870 ++res;
871 x /= base;
872 } while (x);
873 return res;
876 /* Return the number of bytes resulting from converting into a string
877 the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
878 PLUS indicates whether 1 for a plus sign should be added for positive
879 numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
880 ('0x') prefix should be added for nonzero numbers. Return -1 if X cannot
881 be represented. */
883 static HOST_WIDE_INT
884 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
886 unsigned HOST_WIDE_INT absval;
888 HOST_WIDE_INT res;
890 if (TYPE_UNSIGNED (TREE_TYPE (x)))
892 if (tree_fits_uhwi_p (x))
894 absval = tree_to_uhwi (x);
895 res = plus;
897 else
898 return -1;
900 else
902 if (tree_fits_shwi_p (x))
904 HOST_WIDE_INT i = tree_to_shwi (x);
905 if (HOST_WIDE_INT_MIN == i)
907 /* Avoid undefined behavior due to negating a minimum. */
908 absval = HOST_WIDE_INT_MAX;
909 res = 1;
911 else if (i < 0)
913 absval = -i;
914 res = 1;
916 else
918 absval = i;
919 res = plus;
922 else
923 return -1;
926 int ndigs = ilog (absval, base);
928 res += prec < ndigs ? ndigs : prec;
930 /* Adjust a non-zero value for the base prefix, either hexadecimal,
931 or, unless precision has resulted in a leading zero, also octal. */
932 if (prefix && absval && (base == 16 || prec <= ndigs))
934 if (base == 8)
935 res += 1;
936 else if (base == 16)
937 res += 2;
940 return res;
943 /* Given the formatting result described by RES and NAVAIL, the number
944 of available in the destination, return the range of bytes remaining
945 in the destination. */
947 static inline result_range
948 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
950 result_range range;
952 if (HOST_WIDE_INT_MAX <= navail)
954 range.min = range.max = range.likely = range.unlikely = navail;
955 return range;
958 /* The lower bound of the available range is the available size
959 minus the maximum output size, and the upper bound is the size
960 minus the minimum. */
961 range.max = res.range.min < navail ? navail - res.range.min : 0;
963 range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
965 if (res.range.max < HOST_WIDE_INT_MAX)
966 range.min = res.range.max < navail ? navail - res.range.max : 0;
967 else
968 range.min = range.likely;
970 range.unlikely = (res.range.unlikely < navail
971 ? navail - res.range.unlikely : 0);
973 return range;
976 /* Description of a call to a formatted function. */
978 struct pass_sprintf_length::call_info
980 /* Function call statement. */
981 gimple *callstmt;
983 /* Function called. */
984 tree func;
986 /* Called built-in function code. */
987 built_in_function fncode;
989 /* Format argument and format string extracted from it. */
990 tree format;
991 const char *fmtstr;
993 /* The location of the format argument. */
994 location_t fmtloc;
996 /* The destination object size for __builtin___xxx_chk functions
997 typically determined by __builtin_object_size, or -1 if unknown. */
998 unsigned HOST_WIDE_INT objsize;
1000 /* Number of the first variable argument. */
1001 unsigned HOST_WIDE_INT argidx;
1003 /* True for functions like snprintf that specify the size of
1004 the destination, false for others like sprintf that don't. */
1005 bool bounded;
1007 /* True for bounded functions like snprintf that specify a zero-size
1008 buffer as a request to compute the size of output without actually
1009 writing any. NOWRITE is cleared in response to the %n directive
1010 which has side-effects similar to writing output. */
1011 bool nowrite;
1013 /* Return true if the called function's return value is used. */
1014 bool retval_used () const
1016 return gimple_get_lhs (callstmt);
1019 /* Return the warning option corresponding to the called function. */
1020 int warnopt () const
1022 return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
1026 /* Return the result of formatting a no-op directive (such as '%n'). */
1028 static fmtresult
1029 format_none (const directive &, tree)
1031 fmtresult res (0);
1032 return res;
1035 /* Return the result of formatting the '%%' directive. */
1037 static fmtresult
1038 format_percent (const directive &, tree)
1040 fmtresult res (1);
1041 return res;
1045 /* Compute intmax_type_node and uintmax_type_node similarly to how
1046 tree.c builds size_type_node. */
1048 static void
1049 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
1051 if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
1053 *pintmax = integer_type_node;
1054 *puintmax = unsigned_type_node;
1056 else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
1058 *pintmax = long_integer_type_node;
1059 *puintmax = long_unsigned_type_node;
1061 else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
1063 *pintmax = long_long_integer_type_node;
1064 *puintmax = long_long_unsigned_type_node;
1066 else
1068 for (int i = 0; i < NUM_INT_N_ENTS; i++)
1069 if (int_n_enabled_p[i])
1071 char name[50];
1072 sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
1074 if (strcmp (name, UINTMAX_TYPE) == 0)
1076 *pintmax = int_n_trees[i].signed_type;
1077 *puintmax = int_n_trees[i].unsigned_type;
1078 return;
1081 gcc_unreachable ();
1085 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1086 in and that is representable in type int.
1087 Return true when the range is a subrange of that of int.
1088 When ARG is null it is as if it had the full range of int.
1089 When ABSOLUTE is true the range reflects the absolute value of
1090 the argument. When ABSOLUTE is false, negative bounds of
1091 the determined range are replaced with NEGBOUND. */
1093 static bool
1094 get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1095 bool absolute, HOST_WIDE_INT negbound)
1097 /* The type of the result. */
1098 const_tree type = integer_type_node;
1100 bool knownrange = false;
1102 if (!arg)
1104 *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1105 *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1107 else if (TREE_CODE (arg) == INTEGER_CST
1108 && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1110 /* For a constant argument return its value adjusted as specified
1111 by NEGATIVE and NEGBOUND and return true to indicate that the
1112 result is known. */
1113 *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1114 *pmax = *pmin;
1115 knownrange = true;
1117 else
1119 /* True if the argument's range cannot be determined. */
1120 bool unknown = true;
1122 tree argtype = TREE_TYPE (arg);
1124 /* Ignore invalid arguments with greater precision that that
1125 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1126 They will have been detected and diagnosed by -Wformat and
1127 so it's not important to complicate this code to try to deal
1128 with them again. */
1129 if (TREE_CODE (arg) == SSA_NAME
1130 && INTEGRAL_TYPE_P (argtype)
1131 && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1133 /* Try to determine the range of values of the integer argument. */
1134 wide_int min, max;
1135 enum value_range_type range_type = get_range_info (arg, &min, &max);
1136 if (range_type == VR_RANGE)
1138 HOST_WIDE_INT type_min
1139 = (TYPE_UNSIGNED (argtype)
1140 ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1141 : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1143 HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1145 *pmin = min.to_shwi ();
1146 *pmax = max.to_shwi ();
1148 if (*pmin < *pmax)
1150 /* Return true if the adjusted range is a subrange of
1151 the full range of the argument's type. *PMAX may
1152 be less than *PMIN when the argument is unsigned
1153 and its upper bound is in excess of TYPE_MAX. In
1154 that (invalid) case disregard the range and use that
1155 of the expected type instead. */
1156 knownrange = type_min < *pmin || *pmax < type_max;
1158 unknown = false;
1163 /* Handle an argument with an unknown range as if none had been
1164 provided. */
1165 if (unknown)
1166 return get_int_range (NULL_TREE, pmin, pmax, absolute, negbound);
1169 /* Adjust each bound as specified by ABSOLUTE and NEGBOUND. */
1170 if (absolute)
1172 if (*pmin < 0)
1174 if (*pmin == *pmax)
1175 *pmin = *pmax = -*pmin;
1176 else
1178 /* Make sure signed overlow is avoided. */
1179 gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1181 HOST_WIDE_INT tmp = -*pmin;
1182 *pmin = 0;
1183 if (*pmax < tmp)
1184 *pmax = tmp;
1188 else if (*pmin < negbound)
1189 *pmin = negbound;
1191 return knownrange;
1194 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1195 argument, due to the conversion from either *ARGMIN or *ARGMAX to
1196 the type of the directive's formal argument it's possible for both
1197 to result in the same number of bytes or a range of bytes that's
1198 less than the number of bytes that would result from formatting
1199 some other value in the range [*ARGMIN, *ARGMAX]. This can be
1200 determined by checking for the actual argument being in the range
1201 of the type of the directive. If it isn't it must be assumed to
1202 take on the full range of the directive's type.
1203 Return true when the range has been adjusted to the full range
1204 of DIRTYPE, and false otherwise. */
1206 static bool
1207 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1209 tree argtype = TREE_TYPE (*argmin);
1210 unsigned argprec = TYPE_PRECISION (argtype);
1211 unsigned dirprec = TYPE_PRECISION (dirtype);
1213 /* If the actual argument and the directive's argument have the same
1214 precision and sign there can be no overflow and so there is nothing
1215 to adjust. */
1216 if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1217 return false;
1219 /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1220 branch in the extract_range_from_unary_expr function in tree-vrp.c. */
1222 if (TREE_CODE (*argmin) == INTEGER_CST
1223 && TREE_CODE (*argmax) == INTEGER_CST
1224 && (dirprec >= argprec
1225 || integer_zerop (int_const_binop (RSHIFT_EXPR,
1226 int_const_binop (MINUS_EXPR,
1227 *argmax,
1228 *argmin),
1229 size_int (dirprec)))))
1231 *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1232 *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1234 /* If *ARGMIN is still less than *ARGMAX the conversion above
1235 is safe. Otherwise, it has overflowed and would be unsafe. */
1236 if (tree_int_cst_le (*argmin, *argmax))
1237 return false;
1240 *argmin = TYPE_MIN_VALUE (dirtype);
1241 *argmax = TYPE_MAX_VALUE (dirtype);
1242 return true;
1245 /* Return a range representing the minimum and maximum number of bytes
1246 that the format directive DIR will output for any argument given
1247 the WIDTH and PRECISION (extracted from DIR). This function is
1248 used when the directive argument or its value isn't known. */
1250 static fmtresult
1251 format_integer (const directive &dir, tree arg)
1253 tree intmax_type_node;
1254 tree uintmax_type_node;
1256 /* Base to format the number in. */
1257 int base;
1259 /* True when a conversion is preceded by a prefix indicating the base
1260 of the argument (octal or hexadecimal). */
1261 bool maybebase = dir.get_flag ('#');
1263 /* True when a signed conversion is preceded by a sign or space. */
1264 bool maybesign = false;
1266 /* True for signed conversions (i.e., 'd' and 'i'). */
1267 bool sign = false;
1269 switch (dir.specifier)
1271 case 'd':
1272 case 'i':
1273 /* Space and '+' are only meaningful for signed conversions. */
1274 maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1275 sign = true;
1276 base = 10;
1277 break;
1278 case 'u':
1279 base = 10;
1280 break;
1281 case 'o':
1282 base = 8;
1283 break;
1284 case 'X':
1285 case 'x':
1286 base = 16;
1287 break;
1288 default:
1289 gcc_unreachable ();
1292 /* The type of the "formal" argument expected by the directive. */
1293 tree dirtype = NULL_TREE;
1295 /* Determine the expected type of the argument from the length
1296 modifier. */
1297 switch (dir.modifier)
1299 case FMT_LEN_none:
1300 if (dir.specifier == 'p')
1301 dirtype = ptr_type_node;
1302 else
1303 dirtype = sign ? integer_type_node : unsigned_type_node;
1304 break;
1306 case FMT_LEN_h:
1307 dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1308 break;
1310 case FMT_LEN_hh:
1311 dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1312 break;
1314 case FMT_LEN_l:
1315 dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1316 break;
1318 case FMT_LEN_L:
1319 case FMT_LEN_ll:
1320 dirtype = (sign
1321 ? long_long_integer_type_node
1322 : long_long_unsigned_type_node);
1323 break;
1325 case FMT_LEN_z:
1326 dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1327 break;
1329 case FMT_LEN_t:
1330 dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1331 break;
1333 case FMT_LEN_j:
1334 build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1335 dirtype = sign ? intmax_type_node : uintmax_type_node;
1336 break;
1338 default:
1339 return fmtresult ();
1342 /* The type of the argument to the directive, either deduced from
1343 the actual non-constant argument if one is known, or from
1344 the directive itself when none has been provided because it's
1345 a va_list. */
1346 tree argtype = NULL_TREE;
1348 if (!arg)
1350 /* When the argument has not been provided, use the type of
1351 the directive's argument as an approximation. This will
1352 result in false positives for directives like %i with
1353 arguments with smaller precision (such as short or char). */
1354 argtype = dirtype;
1356 else if (TREE_CODE (arg) == INTEGER_CST)
1358 /* When a constant argument has been provided use its value
1359 rather than type to determine the length of the output. */
1360 fmtresult res;
1362 if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1364 /* As a special case, a precision of zero with a zero argument
1365 results in zero bytes except in base 8 when the '#' flag is
1366 specified, and for signed conversions in base 8 and 10 when
1367 either the space or '+' flag has been specified and it results
1368 in just one byte (with width having the normal effect). This
1369 must extend to the case of a specified precision with
1370 an unknown value because it can be zero. */
1371 res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1372 if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1374 res.range.max = 1;
1375 res.range.likely = 1;
1377 else
1379 res.range.max = res.range.min;
1380 res.range.likely = res.range.min;
1383 else
1385 /* Convert the argument to the type of the directive. */
1386 arg = fold_convert (dirtype, arg);
1388 res.range.min = tree_digits (arg, base, dir.prec[0],
1389 maybesign, maybebase);
1390 if (dir.prec[0] == dir.prec[1])
1391 res.range.max = res.range.min;
1392 else
1393 res.range.max = tree_digits (arg, base, dir.prec[1],
1394 maybesign, maybebase);
1395 res.range.likely = res.range.min;
1396 res.knownrange = true;
1399 res.range.unlikely = res.range.max;
1401 /* Bump up the counters if WIDTH is greater than LEN. */
1402 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1403 (sign | maybebase) + (base == 16));
1404 /* Bump up the counters again if PRECision is greater still. */
1405 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1406 (sign | maybebase) + (base == 16));
1408 return res;
1410 else if (TREE_CODE (TREE_TYPE (arg)) == INTEGER_TYPE
1411 || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1412 /* Determine the type of the provided non-constant argument. */
1413 argtype = TREE_TYPE (arg);
1414 else
1415 /* Don't bother with invalid arguments since they likely would
1416 have already been diagnosed, and disable any further checking
1417 of the format string by returning [-1, -1]. */
1418 return fmtresult ();
1420 fmtresult res;
1422 /* Using either the range the non-constant argument is in, or its
1423 type (either "formal" or actual), create a range of values that
1424 constrain the length of output given the warning level. */
1425 tree argmin = NULL_TREE;
1426 tree argmax = NULL_TREE;
1428 if (arg
1429 && TREE_CODE (arg) == SSA_NAME
1430 && TREE_CODE (argtype) == INTEGER_TYPE)
1432 /* Try to determine the range of values of the integer argument
1433 (range information is not available for pointers). */
1434 wide_int min, max;
1435 enum value_range_type range_type = get_range_info (arg, &min, &max);
1436 if (range_type == VR_RANGE)
1438 argmin = wide_int_to_tree (argtype, min);
1439 argmax = wide_int_to_tree (argtype, max);
1441 /* Set KNOWNRANGE if the argument is in a known subrange
1442 of the directive's type and neither width nor precision
1443 is unknown. (KNOWNRANGE may be reset below). */
1444 res.knownrange
1445 = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1446 || !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1447 && dir.known_width_and_precision ());
1449 res.argmin = argmin;
1450 res.argmax = argmax;
1452 else if (range_type == VR_ANTI_RANGE)
1454 /* Handle anti-ranges if/when bug 71690 is resolved. */
1456 else if (range_type == VR_VARYING)
1458 /* The argument here may be the result of promoting the actual
1459 argument to int. Try to determine the type of the actual
1460 argument before promotion and narrow down its range that
1461 way. */
1462 gimple *def = SSA_NAME_DEF_STMT (arg);
1463 if (is_gimple_assign (def))
1465 tree_code code = gimple_assign_rhs_code (def);
1466 if (code == INTEGER_CST)
1468 arg = gimple_assign_rhs1 (def);
1469 return format_integer (dir, arg);
1472 if (code == NOP_EXPR)
1474 tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1475 if (TREE_CODE (type) == INTEGER_TYPE
1476 || TREE_CODE (type) == POINTER_TYPE)
1477 argtype = type;
1483 if (!argmin)
1485 if (TREE_CODE (argtype) == POINTER_TYPE)
1487 argmin = build_int_cst (pointer_sized_int_node, 0);
1488 argmax = build_all_ones_cst (pointer_sized_int_node);
1490 else
1492 argmin = TYPE_MIN_VALUE (argtype);
1493 argmax = TYPE_MAX_VALUE (argtype);
1497 /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1498 of the directive. If it has been cleared then since ARGMIN and/or
1499 ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1500 ARGMAX in the result to include in diagnostics. */
1501 if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1503 res.knownrange = false;
1504 res.argmin = argmin;
1505 res.argmax = argmax;
1508 /* Recursively compute the minimum and maximum from the known range. */
1509 if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1511 /* For unsigned conversions/directives or signed when
1512 the minimum is positive, use the minimum and maximum to compute
1513 the shortest and longest output, respectively. */
1514 res.range.min = format_integer (dir, argmin).range.min;
1515 res.range.max = format_integer (dir, argmax).range.max;
1517 else if (tree_int_cst_sgn (argmax) < 0)
1519 /* For signed conversions/directives if maximum is negative,
1520 use the minimum as the longest output and maximum as the
1521 shortest output. */
1522 res.range.min = format_integer (dir, argmax).range.min;
1523 res.range.max = format_integer (dir, argmin).range.max;
1525 else
1527 /* Otherwise, 0 is inside of the range and minimum negative. Use 0
1528 as the shortest output and for the longest output compute the
1529 length of the output of both minimum and maximum and pick the
1530 longer. */
1531 unsigned HOST_WIDE_INT max1 = format_integer (dir, argmin).range.max;
1532 unsigned HOST_WIDE_INT max2 = format_integer (dir, argmax).range.max;
1533 res.range.min = format_integer (dir, integer_zero_node).range.min;
1534 res.range.max = MAX (max1, max2);
1537 /* If the range is known, use the maximum as the likely length. */
1538 if (res.knownrange)
1539 res.range.likely = res.range.max;
1540 else
1542 /* Otherwise, use the minimum. Except for the case where for %#x or
1543 %#o the minimum is just for a single value in the range (0) and
1544 for all other values it is something longer, like 0x1 or 01.
1545 Use the length for value 1 in that case instead as the likely
1546 length. */
1547 res.range.likely = res.range.min;
1548 if (maybebase
1549 && base != 10
1550 && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1552 if (res.range.min == 1)
1553 res.range.likely += base == 8 ? 1 : 2;
1554 else if (res.range.min == 2
1555 && base == 16
1556 && (dir.width[0] == 2 || dir.prec[0] == 2))
1557 ++res.range.likely;
1561 res.range.unlikely = res.range.max;
1562 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1563 (sign | maybebase) + (base == 16));
1564 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1565 (sign | maybebase) + (base == 16));
1567 return res;
1570 /* Return the number of bytes that a format directive consisting of FLAGS,
1571 PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1572 would result for argument X under ideal conditions (i.e., if PREC
1573 weren't excessive). MPFR 3.1 allocates large amounts of memory for
1574 values of PREC with large magnitude and can fail (see MPFR bug #21056).
1575 This function works around those problems. */
1577 static unsigned HOST_WIDE_INT
1578 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1579 char spec, char rndspec)
1581 char fmtstr[40];
1583 HOST_WIDE_INT len = strlen (flags);
1585 fmtstr[0] = '%';
1586 memcpy (fmtstr + 1, flags, len);
1587 memcpy (fmtstr + 1 + len, ".*R", 3);
1588 fmtstr[len + 4] = rndspec;
1589 fmtstr[len + 5] = spec;
1590 fmtstr[len + 6] = '\0';
1592 spec = TOUPPER (spec);
1593 if (spec == 'E' || spec == 'F')
1595 /* For %e, specify the precision explicitly since mpfr_sprintf
1596 does its own thing just to be different (see MPFR bug 21088). */
1597 if (prec < 0)
1598 prec = 6;
1600 else
1602 /* Avoid passing negative precisions with larger magnitude to MPFR
1603 to avoid exposing its bugs. (A negative precision is supposed
1604 to be ignored.) */
1605 if (prec < 0)
1606 prec = -1;
1609 HOST_WIDE_INT p = prec;
1611 if (spec == 'G' && !strchr (flags, '#'))
1613 /* For G/g without the pound flag, precision gives the maximum number
1614 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1615 a 128 bit IEEE extended precision, 4932. Using twice as much here
1616 should be more than sufficient for any real format. */
1617 if ((IEEE_MAX_10_EXP * 2) < prec)
1618 prec = IEEE_MAX_10_EXP * 2;
1619 p = prec;
1621 else
1623 /* Cap precision arbitrarily at 1KB and add the difference
1624 (if any) to the MPFR result. */
1625 if (prec > 1024)
1626 p = 1024;
1629 len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1631 /* Handle the unlikely (impossible?) error by returning more than
1632 the maximum dictated by the function's return type. */
1633 if (len < 0)
1634 return target_dir_max () + 1;
1636 /* Adjust the return value by the difference. */
1637 if (p < prec)
1638 len += prec - p;
1640 return len;
1643 /* Return the number of bytes to format using the format specifier
1644 SPEC and the precision PREC the largest value in the real floating
1645 TYPE. */
1647 static unsigned HOST_WIDE_INT
1648 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1650 machine_mode mode = TYPE_MODE (type);
1652 /* IBM Extended mode. */
1653 if (MODE_COMPOSITE_P (mode))
1654 mode = DFmode;
1656 /* Get the real type format desription for the target. */
1657 const real_format *rfmt = REAL_MODE_FORMAT (mode);
1658 REAL_VALUE_TYPE rv;
1660 real_maxval (&rv, 0, mode);
1662 /* Convert the GCC real value representation with the precision
1663 of the real type to the mpfr_t format with the GCC default
1664 round-to-nearest mode. */
1665 mpfr_t x;
1666 mpfr_init2 (x, rfmt->p);
1667 mpfr_from_real (x, &rv, GMP_RNDN);
1669 /* Return a value one greater to account for the leading minus sign. */
1670 unsigned HOST_WIDE_INT r
1671 = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1672 mpfr_clear (x);
1673 return r;
1676 /* Return a range representing the minimum and maximum number of bytes
1677 that the directive DIR will output for any argument. PREC gives
1678 the adjusted precision range to account for negative precisions
1679 meaning the default 6. This function is used when the directive
1680 argument or its value isn't known. */
1682 static fmtresult
1683 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1685 tree type;
1687 switch (dir.modifier)
1689 case FMT_LEN_l:
1690 case FMT_LEN_none:
1691 type = double_type_node;
1692 break;
1694 case FMT_LEN_L:
1695 type = long_double_type_node;
1696 break;
1698 case FMT_LEN_ll:
1699 type = long_double_type_node;
1700 break;
1702 default:
1703 return fmtresult ();
1706 /* The minimum and maximum number of bytes produced by the directive. */
1707 fmtresult res;
1709 /* The minimum output as determined by flags. It's always at least 1.
1710 When plus or space are set the output is preceded by either a sign
1711 or a space. */
1712 unsigned flagmin = (1 /* for the first digit */
1713 + (dir.get_flag ('+') | dir.get_flag (' ')));
1715 /* When the pound flag is set the decimal point is included in output
1716 regardless of precision. Whether or not a decimal point is included
1717 otherwise depends on the specification and precision. */
1718 bool radix = dir.get_flag ('#');
1720 switch (dir.specifier)
1722 case 'A':
1723 case 'a':
1725 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1726 if (dir.prec[0] <= 0)
1727 minprec = 0;
1728 else if (dir.prec[0] > 0)
1729 minprec = dir.prec[0] + !radix /* decimal point */;
1731 res.range.min = (2 /* 0x */
1732 + flagmin
1733 + radix
1734 + minprec
1735 + 3 /* p+0 */);
1737 res.range.max = format_floating_max (type, 'a', prec[1]);
1738 res.range.likely = res.range.min;
1740 /* The unlikely maximum accounts for the longest multibyte
1741 decimal point character. */
1742 res.range.unlikely = res.range.max;
1743 if (dir.prec[1] > 0)
1744 res.range.unlikely += target_mb_len_max () - 1;
1746 break;
1749 case 'E':
1750 case 'e':
1752 /* Minimum output attributable to precision and, when it's
1753 non-zero, decimal point. */
1754 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1756 /* The minimum output is "[-+]1.234567e+00" regardless
1757 of the value of the actual argument. */
1758 res.range.min = (flagmin
1759 + radix
1760 + minprec
1761 + 2 /* e+ */ + 2);
1763 res.range.max = format_floating_max (type, 'e', prec[1]);
1764 res.range.likely = res.range.min;
1766 /* The unlikely maximum accounts for the longest multibyte
1767 decimal point character. */
1768 if (dir.prec[0] != dir.prec[1]
1769 || dir.prec[0] == -1 || dir.prec[0] > 0)
1770 res.range.unlikely = res.range.max + target_mb_len_max () -1;
1771 else
1772 res.range.unlikely = res.range.max;
1773 break;
1776 case 'F':
1777 case 'f':
1779 /* Minimum output attributable to precision and, when it's non-zero,
1780 decimal point. */
1781 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1783 /* The lower bound when precision isn't specified is 8 bytes
1784 ("1.23456" since precision is taken to be 6). When precision
1785 is zero, the lower bound is 1 byte (e.g., "1"). Otherwise,
1786 when precision is greater than zero, then the lower bound
1787 is 2 plus precision (plus flags). */
1788 res.range.min = flagmin + radix + minprec;
1790 /* Compute the upper bound for -TYPE_MAX. */
1791 res.range.max = format_floating_max (type, 'f', prec[1]);
1793 /* The minimum output with unknown precision is a single byte
1794 (e.g., "0") but the more likely output is 3 bytes ("0.0"). */
1795 if (dir.prec[0] < 0 && dir.prec[1] > 0)
1796 res.range.likely = 3;
1797 else
1798 res.range.likely = res.range.min;
1800 /* The unlikely maximum accounts for the longest multibyte
1801 decimal point character. */
1802 if (dir.prec[0] != dir.prec[1]
1803 || dir.prec[0] == -1 || dir.prec[0] > 0)
1804 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1805 break;
1808 case 'G':
1809 case 'g':
1811 /* The %g output depends on precision and the exponent of
1812 the argument. Since the value of the argument isn't known
1813 the lower bound on the range of bytes (not counting flags
1814 or width) is 1 plus radix (i.e., either "0" or "0." for
1815 "%g" and "%#g", respectively, with a zero argument). */
1816 res.range.min = flagmin + radix;
1818 char spec = 'g';
1819 HOST_WIDE_INT maxprec = dir.prec[1];
1820 if (radix && maxprec)
1822 /* When the pound flag (radix) is set, trailing zeros aren't
1823 trimmed and so the longest output is the same as for %e,
1824 except with precision minus 1 (as specified in C11). */
1825 spec = 'e';
1826 if (maxprec > 0)
1827 --maxprec;
1828 else if (maxprec < 0)
1829 maxprec = 5;
1831 else
1832 maxprec = prec[1];
1834 res.range.max = format_floating_max (type, spec, maxprec);
1836 /* The likely output is either the maximum computed above
1837 minus 1 (assuming the maximum is positive) when precision
1838 is known (or unspecified), or the same minimum as for %e
1839 (which is computed for a non-negative argument). Unlike
1840 for the other specifiers above the likely output isn't
1841 the minimum because for %g that's 1 which is unlikely. */
1842 if (dir.prec[1] < 0
1843 || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1844 res.range.likely = res.range.max - 1;
1845 else
1847 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1848 res.range.likely = (flagmin
1849 + radix
1850 + minprec
1851 + 2 /* e+ */ + 2);
1854 /* The unlikely maximum accounts for the longest multibyte
1855 decimal point character. */
1856 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1857 break;
1860 default:
1861 return fmtresult ();
1864 /* Bump up the byte counters if WIDTH is greater. */
1865 res.adjust_for_width_or_precision (dir.width);
1866 return res;
1869 /* Return a range representing the minimum and maximum number of bytes
1870 that the directive DIR will write on output for the floating argument
1871 ARG. */
1873 static fmtresult
1874 format_floating (const directive &dir, tree arg)
1876 HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1878 /* For an indeterminate precision the lower bound must be assumed
1879 to be zero. */
1880 if (TOUPPER (dir.specifier) == 'A')
1882 /* Get the number of fractional decimal digits needed to represent
1883 the argument without a loss of accuracy. */
1884 tree type = arg ? TREE_TYPE (arg) :
1885 (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1886 ? long_double_type_node : double_type_node);
1888 unsigned fmtprec
1889 = REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1891 /* The precision of the IEEE 754 double format is 53.
1892 The precision of all other GCC binary double formats
1893 is 56 or less. */
1894 unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1896 /* For %a, leave the minimum precision unspecified to let
1897 MFPR trim trailing zeros (as it and many other systems
1898 including Glibc happen to do) and set the maximum
1899 precision to reflect what it would be with trailing zeros
1900 present (as Solaris and derived systems do). */
1901 if (dir.prec[1] < 0)
1903 /* Both bounds are negative implies that precision has
1904 not been specified. */
1905 prec[0] = maxprec;
1906 prec[1] = -1;
1908 else if (dir.prec[0] < 0)
1910 /* With a negative lower bound and a non-negative upper
1911 bound set the minimum precision to zero and the maximum
1912 to the greater of the maximum precision (i.e., with
1913 trailing zeros present) and the specified upper bound. */
1914 prec[0] = 0;
1915 prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1918 else if (dir.prec[0] < 0)
1920 if (dir.prec[1] < 0)
1922 /* A precision in a strictly negative range is ignored and
1923 the default of 6 is used instead. */
1924 prec[0] = prec[1] = 6;
1926 else
1928 /* For a precision in a partly negative range, the lower bound
1929 must be assumed to be zero and the new upper bound is the
1930 greater of 6 (the default precision used when the specified
1931 precision is negative) and the upper bound of the specified
1932 range. */
1933 prec[0] = 0;
1934 prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
1938 if (!arg || TREE_CODE (arg) != REAL_CST)
1939 return format_floating (dir, prec);
1941 /* The minimum and maximum number of bytes produced by the directive. */
1942 fmtresult res;
1944 /* Get the real type format desription for the target. */
1945 const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
1946 const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
1948 char fmtstr [40];
1949 char *pfmt = fmtstr;
1951 /* Append flags. */
1952 for (const char *pf = "-+ #0"; *pf; ++pf)
1953 if (dir.get_flag (*pf))
1954 *pfmt++ = *pf;
1956 *pfmt = '\0';
1959 /* Set up an array to easily iterate over. */
1960 unsigned HOST_WIDE_INT* const minmax[] = {
1961 &res.range.min, &res.range.max
1964 for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
1966 /* Convert the GCC real value representation with the precision
1967 of the real type to the mpfr_t format rounding down in the
1968 first iteration that computes the minimm and up in the second
1969 that computes the maximum. This order is arbibtrary because
1970 rounding in either direction can result in longer output. */
1971 mpfr_t mpfrval;
1972 mpfr_init2 (mpfrval, rfmt->p);
1973 mpfr_from_real (mpfrval, rvp, i ? GMP_RNDU : GMP_RNDD);
1975 /* Use the MPFR rounding specifier to round down in the first
1976 iteration and then up. In most but not all cases this will
1977 result in the same number of bytes. */
1978 char rndspec = "DU"[i];
1980 /* Format it and store the result in the corresponding member
1981 of the result struct. */
1982 *minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
1983 dir.specifier, rndspec);
1984 mpfr_clear (mpfrval);
1988 /* Make sure the minimum is less than the maximum (MPFR rounding
1989 in the call to mpfr_snprintf can result in the reverse. */
1990 if (res.range.max < res.range.min)
1992 unsigned HOST_WIDE_INT tmp = res.range.min;
1993 res.range.min = res.range.max;
1994 res.range.max = tmp;
1997 /* The range is known unless either width or precision is unknown. */
1998 res.knownrange = dir.known_width_and_precision ();
2000 /* For the same floating point constant, unless width or precision
2001 is unknown, use the longer output as the likely maximum since
2002 with round to nearest either is equally likely. Otheriwse, when
2003 precision is unknown, use the greater of the minimum and 3 as
2004 the likely output (for "0.0" since zero precision is unlikely). */
2005 if (res.knownrange)
2006 res.range.likely = res.range.max;
2007 else if (res.range.min < 3
2008 && dir.prec[0] < 0
2009 && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
2010 res.range.likely = 3;
2011 else
2012 res.range.likely = res.range.min;
2014 res.range.unlikely = res.range.max;
2016 if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
2018 /* Unless the precision is zero output longer than 2 bytes may
2019 include the decimal point which must be a single character
2020 up to MB_LEN_MAX in length. This is overly conservative
2021 since in some conversions some constants result in no decimal
2022 point (e.g., in %g). */
2023 res.range.unlikely += target_mb_len_max () - 1;
2026 res.adjust_for_width_or_precision (dir.width);
2027 return res;
2030 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
2031 strings referenced by the expression STR, or (-1, -1) when not known.
2032 Used by the format_string function below. */
2034 static fmtresult
2035 get_string_length (tree str)
2037 if (!str)
2038 return fmtresult ();
2040 if (tree slen = c_strlen (str, 1))
2042 /* Simply return the length of the string. */
2043 fmtresult res (tree_to_shwi (slen));
2044 return res;
2047 /* Determine the length of the shortest and longest string referenced
2048 by STR. Strings of unknown lengths are bounded by the sizes of
2049 arrays that subexpressions of STR may refer to. Pointers that
2050 aren't known to point any such arrays result in LENRANGE[1] set
2051 to SIZE_MAX. */
2052 tree lenrange[2];
2053 bool flexarray = get_range_strlen (str, lenrange);
2055 if (lenrange [0] || lenrange [1])
2057 HOST_WIDE_INT min
2058 = (tree_fits_uhwi_p (lenrange[0])
2059 ? tree_to_uhwi (lenrange[0])
2060 : 0);
2062 HOST_WIDE_INT max
2063 = (tree_fits_uhwi_p (lenrange[1])
2064 ? tree_to_uhwi (lenrange[1])
2065 : HOST_WIDE_INT_M1U);
2067 /* get_range_strlen() returns the target value of SIZE_MAX for
2068 strings of unknown length. Bump it up to HOST_WIDE_INT_M1U
2069 which may be bigger. */
2070 if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2071 min = HOST_WIDE_INT_M1U;
2072 if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2073 max = HOST_WIDE_INT_M1U;
2075 fmtresult res (min, max);
2077 /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2078 by STR are known to be bounded (though not necessarily by their
2079 actual length but perhaps by their maximum possible length). */
2080 if (res.range.max < target_int_max ())
2082 res.knownrange = true;
2083 /* When the the length of the longest string is known and not
2084 excessive use it as the likely length of the string(s). */
2085 res.range.likely = res.range.max;
2087 else
2089 /* When the upper bound is unknown (it can be zero or excessive)
2090 set the likely length to the greater of 1 and the length of
2091 the shortest string and reset the lower bound to zero. */
2092 res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2093 res.range.min = 0;
2096 /* If the range of string length has been estimated from the size
2097 of an array at the end of a struct assume that it's longer than
2098 the array bound says it is in case it's used as a poor man's
2099 flexible array member, such as in struct S { char a[4]; }; */
2100 res.range.unlikely = flexarray ? HOST_WIDE_INT_MAX : res.range.max;
2102 return res;
2105 return get_string_length (NULL_TREE);
2108 /* Return the minimum and maximum number of characters formatted
2109 by the '%c' format directives and its wide character form for
2110 the argument ARG. ARG can be null (for functions such as
2111 vsprinf). */
2113 static fmtresult
2114 format_character (const directive &dir, tree arg)
2116 fmtresult res;
2118 res.knownrange = true;
2120 if (dir.modifier == FMT_LEN_l)
2122 /* A wide character can result in as few as zero bytes. */
2123 res.range.min = 0;
2125 HOST_WIDE_INT min, max;
2126 if (get_int_range (arg, &min, &max, false, 0))
2128 if (min == 0 && max == 0)
2130 /* The NUL wide character results in no bytes. */
2131 res.range.max = 0;
2132 res.range.likely = 0;
2133 res.range.unlikely = 0;
2135 else if (min > 0 && min < 128)
2137 /* A wide character in the ASCII range most likely results
2138 in a single byte, and only unlikely in up to MB_LEN_MAX. */
2139 res.range.max = 1;
2140 res.range.likely = 1;
2141 res.range.unlikely = target_mb_len_max ();
2143 else
2145 /* A wide character outside the ASCII range likely results
2146 in up to two bytes, and only unlikely in up to MB_LEN_MAX. */
2147 res.range.max = target_mb_len_max ();
2148 res.range.likely = 2;
2149 res.range.unlikely = res.range.max;
2152 else
2154 /* An unknown wide character is treated the same as a wide
2155 character outside the ASCII range. */
2156 res.range.max = target_mb_len_max ();
2157 res.range.likely = 2;
2158 res.range.unlikely = res.range.max;
2161 else
2163 /* A plain '%c' directive. Its ouput is exactly 1. */
2164 res.range.min = res.range.max = 1;
2165 res.range.likely = res.range.unlikely = 1;
2166 res.knownrange = true;
2169 /* Bump up the byte counters if WIDTH is greater. */
2170 return res.adjust_for_width_or_precision (dir.width);
2173 /* Return the minimum and maximum number of characters formatted
2174 by the '%s' format directive and its wide character form for
2175 the argument ARG. ARG can be null (for functions such as
2176 vsprinf). */
2178 static fmtresult
2179 format_string (const directive &dir, tree arg)
2181 fmtresult res;
2183 /* Compute the range the argument's length can be in. */
2184 fmtresult slen = get_string_length (arg);
2185 if (slen.range.min == slen.range.max
2186 && slen.range.min < HOST_WIDE_INT_MAX)
2188 /* The argument is either a string constant or it refers
2189 to one of a number of strings of the same length. */
2191 /* A '%s' directive with a string argument with constant length. */
2192 res.range = slen.range;
2194 if (dir.modifier == FMT_LEN_l)
2196 /* In the worst case the length of output of a wide string S
2197 is bounded by MB_LEN_MAX * wcslen (S). */
2198 res.range.max *= target_mb_len_max ();
2199 res.range.unlikely = res.range.max;
2200 /* It's likely that the the total length is not more that
2201 2 * wcslen (S).*/
2202 res.range.likely = res.range.min * 2;
2204 if (dir.prec[1] >= 0
2205 && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2207 res.range.max = dir.prec[1];
2208 res.range.likely = dir.prec[1];
2209 res.range.unlikely = dir.prec[1];
2212 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2213 res.range.min = 0;
2214 else if (dir.prec[0] >= 0)
2215 res.range.likely = dir.prec[0];
2217 /* Even a non-empty wide character string need not convert into
2218 any bytes. */
2219 res.range.min = 0;
2221 else
2223 res.knownrange = true;
2225 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2226 res.range.min = 0;
2227 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2228 res.range.min = dir.prec[0];
2230 if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2232 res.range.max = dir.prec[1];
2233 res.range.likely = dir.prec[1];
2234 res.range.unlikely = dir.prec[1];
2238 else if (arg && integer_zerop (arg))
2240 /* Handle null pointer argument. */
2242 fmtresult res (0);
2243 res.nullp = true;
2244 return res;
2246 else
2248 /* For a '%s' and '%ls' directive with a non-constant string (either
2249 one of a number of strings of known length or an unknown string)
2250 the minimum number of characters is lesser of PRECISION[0] and
2251 the length of the shortest known string or zero, and the maximum
2252 is the lessser of the length of the longest known string or
2253 PTRDIFF_MAX and PRECISION[1]. The likely length is either
2254 the minimum at level 1 and the greater of the minimum and 1
2255 at level 2. This result is adjust upward for width (if it's
2256 specified). */
2258 if (dir.modifier == FMT_LEN_l)
2260 /* A wide character converts to as few as zero bytes. */
2261 slen.range.min = 0;
2262 if (slen.range.max < target_int_max ())
2263 slen.range.max *= target_mb_len_max ();
2265 if (slen.range.likely < target_int_max ())
2266 slen.range.likely *= 2;
2268 if (slen.range.likely < target_int_max ())
2269 slen.range.unlikely *= target_mb_len_max ();
2272 res.range = slen.range;
2274 if (dir.prec[0] >= 0)
2276 /* Adjust the minimum to zero if the string length is unknown,
2277 or at most the lower bound of the precision otherwise. */
2278 if (slen.range.min >= target_int_max ())
2279 res.range.min = 0;
2280 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2281 res.range.min = dir.prec[0];
2283 /* Make both maxima no greater than the upper bound of precision. */
2284 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2285 || slen.range.max >= target_int_max ())
2287 res.range.max = dir.prec[1];
2288 res.range.unlikely = dir.prec[1];
2291 /* If precision is constant, set the likely counter to the lesser
2292 of it and the maximum string length. Otherwise, if the lower
2293 bound of precision is greater than zero, set the likely counter
2294 to the minimum. Otherwise set it to zero or one based on
2295 the warning level. */
2296 if (dir.prec[0] == dir.prec[1])
2297 res.range.likely
2298 = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2299 ? dir.prec[0] : slen.range.max);
2300 else if (dir.prec[0] > 0)
2301 res.range.likely = res.range.min;
2302 else
2303 res.range.likely = warn_level > 1;
2305 else if (dir.prec[1] >= 0)
2307 res.range.min = 0;
2308 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2309 res.range.max = dir.prec[1];
2310 res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2312 else if (slen.range.min >= target_int_max ())
2314 res.range.min = 0;
2315 res.range.max = HOST_WIDE_INT_MAX;
2316 /* At level 1 strings of unknown length are assumed to be
2317 empty, while at level 1 they are assumed to be one byte
2318 long. */
2319 res.range.likely = warn_level > 1;
2321 else
2323 /* A string of unknown length unconstrained by precision is
2324 assumed to be empty at level 1 and just one character long
2325 at higher levels. */
2326 if (res.range.likely >= target_int_max ())
2327 res.range.likely = warn_level > 1;
2330 res.range.unlikely = res.range.max;
2333 /* Bump up the byte counters if WIDTH is greater. */
2334 return res.adjust_for_width_or_precision (dir.width);
2337 /* Format plain string (part of the format string itself). */
2339 static fmtresult
2340 format_plain (const directive &dir, tree)
2342 fmtresult res (dir.len);
2343 return res;
2346 /* Return true if the RESULT of a directive in a call describe by INFO
2347 should be diagnosed given the AVAILable space in the destination. */
2349 static bool
2350 should_warn_p (const pass_sprintf_length::call_info &info,
2351 const result_range &avail, const result_range &result)
2353 if (result.max <= avail.min)
2355 /* The least amount of space remaining in the destination is big
2356 enough for the longest output. */
2357 return false;
2360 if (info.bounded)
2362 if (warn_format_trunc == 1 && result.min <= avail.max
2363 && info.retval_used ())
2365 /* The likely amount of space remaining in the destination is big
2366 enough for the least output and the return value is used. */
2367 return false;
2370 if (warn_format_trunc == 1 && result.likely <= avail.likely
2371 && !info.retval_used ())
2373 /* The likely amount of space remaining in the destination is big
2374 enough for the likely output and the return value is unused. */
2375 return false;
2378 if (warn_format_trunc == 2
2379 && result.likely <= avail.min
2380 && (result.max <= avail.min
2381 || result.max > HOST_WIDE_INT_MAX))
2383 /* The minimum amount of space remaining in the destination is big
2384 enough for the longest output. */
2385 return false;
2388 else
2390 if (warn_level == 1 && result.likely <= avail.likely)
2392 /* The likely amount of space remaining in the destination is big
2393 enough for the likely output. */
2394 return false;
2397 if (warn_level == 2
2398 && result.likely <= avail.min
2399 && (result.max <= avail.min
2400 || result.max > HOST_WIDE_INT_MAX))
2402 /* The minimum amount of space remaining in the destination is big
2403 enough for the longest output. */
2404 return false;
2408 return true;
2411 /* At format string location describe by DIRLOC in a call described
2412 by INFO, issue a warning for a directive DIR whose output may be
2413 in excess of the available space AVAIL_RANGE in the destination
2414 given the formatting result FMTRES. This function does nothing
2415 except decide whether to issue a warning for a possible write
2416 past the end or truncation and, if so, format the warning.
2417 Return true if a warning has been issued. */
2419 static bool
2420 maybe_warn (substring_loc &dirloc, source_range *pargrange,
2421 const pass_sprintf_length::call_info &info,
2422 const result_range &avail_range, const result_range &res,
2423 const directive &dir)
2425 if (!should_warn_p (info, avail_range, res))
2426 return false;
2428 /* A warning will definitely be issued below. */
2430 /* The maximum byte count to reference in the warning. Larger counts
2431 imply that the upper bound is unknown (and could be anywhere between
2432 RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2433 than "between N and X" where X is some huge number. */
2434 unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2436 /* True when there is enough room in the destination for the least
2437 amount of a directive's output but not enough for its likely or
2438 maximum output. */
2439 bool maybe = (res.min <= avail_range.max
2440 && (avail_range.min < res.likely
2441 || (res.max < HOST_WIDE_INT_MAX
2442 && avail_range.min < res.max)));
2444 /* Buffer for the directive in the host character set (used when
2445 the source character set is different). */
2446 char hostdir[32];
2448 if (avail_range.min == avail_range.max)
2450 /* The size of the destination region is exact. */
2451 unsigned HOST_WIDE_INT navail = avail_range.max;
2453 if (target_to_host (*dir.beg) != '%')
2455 /* For plain character directives (i.e., the format string itself)
2456 but not others, point the caret at the first character that's
2457 past the end of the destination. */
2458 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2461 if (*dir.beg == '\0')
2463 /* This is the terminating nul. */
2464 gcc_assert (res.min == 1 && res.min == res.max);
2466 const char *fmtstr
2467 = (info.bounded
2468 ? (maybe
2469 ? G_("%qE output may be truncated before the last format "
2470 "character")
2471 : G_("%qE output truncated before the last format character"))
2472 : (maybe
2473 ? G_("%qE may write a terminating nul past the end "
2474 "of the destination")
2475 : G_("%qE writing a terminating nul past the end "
2476 "of the destination")));
2478 return fmtwarn (dirloc, NULL, NULL, info.warnopt (), fmtstr,
2479 info.func);
2482 if (res.min == res.max)
2484 const char* fmtstr
2485 = (res.min == 1
2486 ? (info.bounded
2487 ? (maybe
2488 ? G_("%<%.*s%> directive output may be truncated writing "
2489 "%wu byte into a region of size %wu")
2490 : G_("%<%.*s%> directive output truncated writing "
2491 "%wu byte into a region of size %wu"))
2492 : G_("%<%.*s%> directive writing %wu byte "
2493 "into a region of size %wu"))
2494 : (info.bounded
2495 ? (maybe
2496 ? G_("%<%.*s%> directive output may be truncated writing "
2497 "%wu bytes into a region of size %wu")
2498 : G_("%<%.*s%> directive output truncated writing "
2499 "%wu bytes into a region of size %wu"))
2500 : G_("%<%.*s%> directive writing %wu bytes "
2501 "into a region of size %wu")));
2502 return fmtwarn (dirloc, pargrange, NULL,
2503 info.warnopt (), fmtstr, dir.len,
2504 target_to_host (hostdir, sizeof hostdir, dir.beg),
2505 res.min, navail);
2508 if (res.min == 0 && res.max < maxbytes)
2510 const char* fmtstr
2511 = (info.bounded
2512 ? (maybe
2513 ? G_("%<%.*s%> directive output may be truncated writing "
2514 "up to %wu bytes into a region of size %wu")
2515 : G_("%<%.*s%> directive output truncated writing "
2516 "up to %wu bytes into a region of size %wu"))
2517 : G_("%<%.*s%> directive writing up to %wu bytes "
2518 "into a region of size %wu"));
2519 return fmtwarn (dirloc, pargrange, NULL,
2520 info.warnopt (), fmtstr, dir.len,
2521 target_to_host (hostdir, sizeof hostdir, dir.beg),
2522 res.max, navail);
2525 if (res.min == 0 && maxbytes <= res.max)
2527 /* This is a special case to avoid issuing the potentially
2528 confusing warning:
2529 writing 0 or more bytes into a region of size 0. */
2530 const char* fmtstr
2531 = (info.bounded
2532 ? (maybe
2533 ? G_("%<%.*s%> directive output may be truncated writing "
2534 "likely %wu or more bytes into a region of size %wu")
2535 : G_("%<%.*s%> directive output truncated writing "
2536 "likely %wu or more bytes into a region of size %wu"))
2537 : G_("%<%.*s%> directive writing likely %wu or more bytes "
2538 "into a region of size %wu"));
2539 return fmtwarn (dirloc, pargrange, NULL,
2540 info.warnopt (), fmtstr, dir.len,
2541 target_to_host (hostdir, sizeof hostdir, dir.beg),
2542 res.likely, navail);
2545 if (res.max < maxbytes)
2547 const char* fmtstr
2548 = (info.bounded
2549 ? (maybe
2550 ? G_("%<%.*s%> directive output may be truncated writing "
2551 "between %wu and %wu bytes into a region of size %wu")
2552 : G_("%<%.*s%> directive output truncated writing "
2553 "between %wu and %wu bytes into a region of size %wu"))
2554 : G_("%<%.*s%> directive writing between %wu and "
2555 "%wu bytes into a region of size %wu"));
2556 return fmtwarn (dirloc, pargrange, NULL,
2557 info.warnopt (), fmtstr, dir.len,
2558 target_to_host (hostdir, sizeof hostdir, dir.beg),
2559 res.min, res.max, navail);
2562 const char* fmtstr
2563 = (info.bounded
2564 ? (maybe
2565 ? G_("%<%.*s%> directive output may be truncated writing "
2566 "%wu or more bytes into a region of size %wu")
2567 : G_("%<%.*s%> directive output truncated writing "
2568 "%wu or more bytes into a region of size %wu"))
2569 : G_("%<%.*s%> directive writing %wu or more bytes "
2570 "into a region of size %wu"));
2571 return fmtwarn (dirloc, pargrange, NULL,
2572 info.warnopt (), fmtstr, dir.len,
2573 target_to_host (hostdir, sizeof hostdir, dir.beg),
2574 res.min, navail);
2577 /* The size of the destination region is a range. */
2579 if (target_to_host (*dir.beg) != '%')
2581 unsigned HOST_WIDE_INT navail = avail_range.max;
2583 /* For plain character directives (i.e., the format string itself)
2584 but not others, point the caret at the first character that's
2585 past the end of the destination. */
2586 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2589 if (*dir.beg == '\0')
2591 gcc_assert (res.min == 1 && res.min == res.max);
2593 const char *fmtstr
2594 = (info.bounded
2595 ? (maybe
2596 ? G_("%qE output may be truncated before the last format "
2597 "character")
2598 : G_("%qE output truncated before the last format character"))
2599 : (maybe
2600 ? G_("%qE may write a terminating nul past the end "
2601 "of the destination")
2602 : G_("%qE writing a terminating nul past the end "
2603 "of the destination")));
2605 return fmtwarn (dirloc, NULL, NULL, info.warnopt (), fmtstr,
2606 info.func);
2609 if (res.min == res.max)
2611 const char* fmtstr
2612 = (res.min == 1
2613 ? (info.bounded
2614 ? (maybe
2615 ? G_("%<%.*s%> directive output may be truncated writing "
2616 "%wu byte into a region of size between %wu and %wu")
2617 : G_("%<%.*s%> directive output truncated writing "
2618 "%wu byte into a region of size between %wu and %wu"))
2619 : G_("%<%.*s%> directive writing %wu byte "
2620 "into a region of size between %wu and %wu"))
2621 : (info.bounded
2622 ? (maybe
2623 ? G_("%<%.*s%> directive output may be truncated writing "
2624 "%wu bytes into a region of size between %wu and %wu")
2625 : G_("%<%.*s%> directive output truncated writing "
2626 "%wu bytes into a region of size between %wu and %wu"))
2627 : G_("%<%.*s%> directive writing %wu bytes "
2628 "into a region of size between %wu and %wu")));
2630 return fmtwarn (dirloc, pargrange, NULL,
2631 info.warnopt (), fmtstr, dir.len,
2632 target_to_host (hostdir, sizeof hostdir, dir.beg),
2633 res.min, avail_range.min, avail_range.max);
2636 if (res.min == 0 && res.max < maxbytes)
2638 const char* fmtstr
2639 = (info.bounded
2640 ? (maybe
2641 ? G_("%<%.*s%> directive output may be truncated writing "
2642 "up to %wu bytes into a region of size between "
2643 "%wu and %wu")
2644 : G_("%<%.*s%> directive output truncated writing "
2645 "up to %wu bytes into a region of size between "
2646 "%wu and %wu"))
2647 : G_("%<%.*s%> directive writing up to %wu bytes "
2648 "into a region of size between %wu and %wu"));
2649 return fmtwarn (dirloc, pargrange, NULL,
2650 info.warnopt (), fmtstr, dir.len,
2651 target_to_host (hostdir, sizeof hostdir, dir.beg),
2652 res.max, avail_range.min, avail_range.max);
2655 if (res.min == 0 && maxbytes <= res.max)
2657 /* This is a special case to avoid issuing the potentially confusing
2658 warning:
2659 writing 0 or more bytes into a region of size between 0 and N. */
2660 const char* fmtstr
2661 = (info.bounded
2662 ? (maybe
2663 ? G_("%<%.*s%> directive output may be truncated writing "
2664 "likely %wu or more bytes into a region of size between "
2665 "%wu and %wu")
2666 : G_("%<%.*s%> directive output truncated writing likely "
2667 "%wu or more bytes into a region of size between "
2668 "%wu and %wu"))
2669 : G_("%<%.*s%> directive writing likely %wu or more bytes "
2670 "into a region of size between %wu and %wu"));
2671 return fmtwarn (dirloc, pargrange, NULL,
2672 info.warnopt (), fmtstr, dir.len,
2673 target_to_host (hostdir, sizeof hostdir, dir.beg),
2674 res.likely, avail_range.min, avail_range.max);
2677 if (res.max < maxbytes)
2679 const char* fmtstr
2680 = (info.bounded
2681 ? (maybe
2682 ? G_("%<%.*s%> directive output may be truncated writing "
2683 "between %wu and %wu bytes into a region of size "
2684 "between %wu and %wu")
2685 : G_("%<%.*s%> directive output truncated writing "
2686 "between %wu and %wu bytes into a region of size "
2687 "between %wu and %wu"))
2688 : G_("%<%.*s%> directive writing between %wu and "
2689 "%wu bytes into a region of size between %wu and %wu"));
2690 return fmtwarn (dirloc, pargrange, NULL,
2691 info.warnopt (), fmtstr, dir.len,
2692 target_to_host (hostdir, sizeof hostdir, dir.beg),
2693 res.min, res.max, avail_range.min, avail_range.max);
2696 const char* fmtstr
2697 = (info.bounded
2698 ? (maybe
2699 ? G_("%<%.*s%> directive output may be truncated writing "
2700 "%wu or more bytes into a region of size between "
2701 "%wu and %wu")
2702 : G_("%<%.*s%> directive output truncated writing "
2703 "%wu or more bytes into a region of size between "
2704 "%wu and %wu"))
2705 : G_("%<%.*s%> directive writing %wu or more bytes "
2706 "into a region of size between %wu and %wu"));
2707 return fmtwarn (dirloc, pargrange, NULL,
2708 info.warnopt (), fmtstr, dir.len,
2709 target_to_host (hostdir, sizeof hostdir, dir.beg),
2710 res.min, avail_range.min, avail_range.max);
2713 /* Compute the length of the output resulting from the directive DIR
2714 in a call described by INFO and update the overall result of the call
2715 in *RES. Return true if the directive has been handled. */
2717 static bool
2718 format_directive (const pass_sprintf_length::call_info &info,
2719 format_result *res, const directive &dir)
2721 /* Offset of the beginning of the directive from the beginning
2722 of the format string. */
2723 size_t offset = dir.beg - info.fmtstr;
2724 size_t start = offset;
2725 size_t length = offset + dir.len - !!dir.len;
2727 /* Create a location for the whole directive from the % to the format
2728 specifier. */
2729 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
2730 offset, start, length);
2732 /* Also create a location range for the argument if possible.
2733 This doesn't work for integer literals or function calls. */
2734 source_range argrange;
2735 source_range *pargrange;
2736 if (dir.arg && CAN_HAVE_LOCATION_P (dir.arg))
2738 argrange = EXPR_LOCATION_RANGE (dir.arg);
2739 pargrange = &argrange;
2741 else
2742 pargrange = NULL;
2744 /* Bail when there is no function to compute the output length,
2745 or when minimum length checking has been disabled. */
2746 if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
2747 return false;
2749 /* Compute the range of lengths of the formatted output. */
2750 fmtresult fmtres = dir.fmtfunc (dir, dir.arg);
2752 /* Record whether the output of all directives is known to be
2753 bounded by some maximum, implying that their arguments are
2754 either known exactly or determined to be in a known range
2755 or, for strings, limited by the upper bounds of the arrays
2756 they refer to. */
2757 res->knownrange &= fmtres.knownrange;
2759 if (!fmtres.knownrange)
2761 /* Only when the range is known, check it against the host value
2762 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
2763 INT_MAX precision, which is the longest possible output of any
2764 single directive). That's the largest valid byte count (though
2765 not valid call to a printf-like function because it can never
2766 return such a count). Otherwise, the range doesn't correspond
2767 to known values of the argument. */
2768 if (fmtres.range.max > target_dir_max ())
2770 /* Normalize the MAX counter to avoid having to deal with it
2771 later. The counter can be less than HOST_WIDE_INT_M1U
2772 when compiling for an ILP32 target on an LP64 host. */
2773 fmtres.range.max = HOST_WIDE_INT_M1U;
2774 /* Disable exact and maximum length checking after a failure
2775 to determine the maximum number of characters (for example
2776 for wide characters or wide character strings) but continue
2777 tracking the minimum number of characters. */
2778 res->range.max = HOST_WIDE_INT_M1U;
2781 if (fmtres.range.min > target_dir_max ())
2783 /* Disable exact length checking after a failure to determine
2784 even the minimum number of characters (it shouldn't happen
2785 except in an error) but keep tracking the minimum and maximum
2786 number of characters. */
2787 return true;
2791 /* Buffer for the directive in the host character set (used when
2792 the source character set is different). */
2793 char hostdir[32];
2795 int dirlen = dir.len;
2797 if (fmtres.nullp)
2799 fmtwarn (dirloc, pargrange, NULL, info.warnopt (),
2800 "%<%.*s%> directive argument is null",
2801 dirlen, target_to_host (hostdir, sizeof hostdir, dir.beg));
2803 /* Don't bother processing the rest of the format string. */
2804 res->warned = true;
2805 res->range.min = HOST_WIDE_INT_M1U;
2806 res->range.max = HOST_WIDE_INT_M1U;
2807 return false;
2810 /* Compute the number of available bytes in the destination. There
2811 must always be at least one byte of space for the terminating
2812 NUL that's appended after the format string has been processed. */
2813 result_range avail_range = bytes_remaining (info.objsize, *res);
2815 bool warned = res->warned;
2817 if (!warned)
2818 warned = maybe_warn (dirloc, pargrange, info, avail_range,
2819 fmtres.range, dir);
2821 /* Bump up the total maximum if it isn't too big. */
2822 if (res->range.max < HOST_WIDE_INT_MAX
2823 && fmtres.range.max < HOST_WIDE_INT_MAX)
2824 res->range.max += fmtres.range.max;
2826 /* Raise the total unlikely maximum by the larger of the maximum
2827 and the unlikely maximum. */
2828 unsigned HOST_WIDE_INT save = res->range.unlikely;
2829 if (fmtres.range.max < fmtres.range.unlikely)
2830 res->range.unlikely += fmtres.range.unlikely;
2831 else
2832 res->range.unlikely += fmtres.range.max;
2834 if (res->range.unlikely < save)
2835 res->range.unlikely = HOST_WIDE_INT_M1U;
2837 res->range.min += fmtres.range.min;
2838 res->range.likely += fmtres.range.likely;
2840 /* Has the minimum directive output length exceeded the maximum
2841 of 4095 bytes required to be supported? */
2842 bool minunder4k = fmtres.range.min < 4096;
2843 bool maxunder4k = fmtres.range.max < 4096;
2844 /* Clear UNDER4K in the overall result if the maximum has exceeded
2845 the 4k (this is necessary to avoid the return valuye optimization
2846 that may not be safe in the maximum case). */
2847 if (!maxunder4k)
2848 res->under4k = false;
2850 if (!warned
2851 /* Only warn at level 2. */
2852 && 1 < warn_level
2853 && (!minunder4k
2854 || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
2856 /* The directive output may be longer than the maximum required
2857 to be handled by an implementation according to 7.21.6.1, p15
2858 of C11. Warn on this only at level 2 but remember this and
2859 prevent folding the return value when done. This allows for
2860 the possibility of the actual libc call failing due to ENOMEM
2861 (like Glibc does under some conditions). */
2863 if (fmtres.range.min == fmtres.range.max)
2864 warned = fmtwarn (dirloc, pargrange, NULL,
2865 info.warnopt (),
2866 "%<%.*s%> directive output of %wu bytes exceeds "
2867 "minimum required size of 4095",
2868 dirlen,
2869 target_to_host (hostdir, sizeof hostdir, dir.beg),
2870 fmtres.range.min);
2871 else
2873 const char *fmtstr
2874 = (minunder4k
2875 ? G_("%<%.*s%> directive output between %wu and %wu "
2876 "bytes may exceed minimum required size of 4095")
2877 : G_("%<%.*s%> directive output between %wu and %wu "
2878 "bytes exceeds minimum required size of 4095"));
2880 warned = fmtwarn (dirloc, pargrange, NULL,
2881 info.warnopt (), fmtstr, dirlen,
2882 target_to_host (hostdir, sizeof hostdir, dir.beg),
2883 fmtres.range.min, fmtres.range.max);
2887 /* Has the likely and maximum directive output exceeded INT_MAX? */
2888 bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
2889 /* Don't consider the maximum to be in excess when it's the result
2890 of a string of unknown length (i.e., whose maximum has been set
2891 to be greater than or equal to HOST_WIDE_INT_MAX. */
2892 bool maxximax = (*dir.beg
2893 && res->range.max > target_int_max ()
2894 && res->range.max < HOST_WIDE_INT_MAX);
2896 if (!warned
2897 /* Warn for the likely output size at level 1. */
2898 && (likelyximax
2899 /* But only warn for the maximum at level 2. */
2900 || (1 < warn_level
2901 && maxximax
2902 && fmtres.range.max < HOST_WIDE_INT_MAX)))
2904 /* The directive output causes the total length of output
2905 to exceed INT_MAX bytes. */
2907 if (fmtres.range.min == fmtres.range.max)
2908 warned = fmtwarn (dirloc, pargrange, NULL, info.warnopt (),
2909 "%<%.*s%> directive output of %wu bytes causes "
2910 "result to exceed %<INT_MAX%>",
2911 dirlen,
2912 target_to_host (hostdir, sizeof hostdir, dir.beg),
2913 fmtres.range.min);
2914 else
2916 const char *fmtstr
2917 = (fmtres.range.min > target_int_max ()
2918 ? G_ ("%<%.*s%> directive output between %wu and %wu "
2919 "bytes causes result to exceed %<INT_MAX%>")
2920 : G_ ("%<%.*s%> directive output between %wu and %wu "
2921 "bytes may cause result to exceed %<INT_MAX%>"));
2922 warned = fmtwarn (dirloc, pargrange, NULL,
2923 info.warnopt (), fmtstr, dirlen,
2924 target_to_host (hostdir, sizeof hostdir, dir.beg),
2925 fmtres.range.min, fmtres.range.max);
2929 if (warned && fmtres.range.min < fmtres.range.likely
2930 && fmtres.range.likely < fmtres.range.max)
2932 inform (info.fmtloc,
2933 (1 == fmtres.range.likely
2934 ? G_("assuming directive output of %wu byte")
2935 : G_("assuming directive output of %wu bytes")),
2936 fmtres.range.likely);
2939 if (warned && fmtres.argmin)
2941 if (fmtres.argmin == fmtres.argmax)
2942 inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
2943 else if (fmtres.knownrange)
2944 inform (info.fmtloc, "directive argument in the range [%E, %E]",
2945 fmtres.argmin, fmtres.argmax);
2946 else
2947 inform (info.fmtloc,
2948 "using the range [%E, %E] for directive argument",
2949 fmtres.argmin, fmtres.argmax);
2952 res->warned |= warned;
2954 if (!dir.beg[0] && res->warned && info.objsize < HOST_WIDE_INT_MAX)
2956 /* If a warning has been issued for buffer overflow or truncation
2957 (but not otherwise) help the user figure out how big a buffer
2958 they need. */
2960 location_t callloc = gimple_location (info.callstmt);
2962 unsigned HOST_WIDE_INT min = res->range.min;
2963 unsigned HOST_WIDE_INT max = res->range.max;
2965 if (min == max)
2966 inform (callloc,
2967 (min == 1
2968 ? G_("%qE output %wu byte into a destination of size %wu")
2969 : G_("%qE output %wu bytes into a destination of size %wu")),
2970 info.func, min, info.objsize);
2971 else if (max < HOST_WIDE_INT_MAX)
2972 inform (callloc,
2973 "%qE output between %wu and %wu bytes into "
2974 "a destination of size %wu",
2975 info.func, min, max, info.objsize);
2976 else if (min < res->range.likely && res->range.likely < max)
2977 inform (callloc,
2978 "%qE output %wu or more bytes (assuming %wu) into "
2979 "a destination of size %wu",
2980 info.func, min, res->range.likely, info.objsize);
2981 else
2982 inform (callloc,
2983 "%qE output %wu or more bytes into a destination of size %wu",
2984 info.func, min, info.objsize);
2987 if (dump_file && *dir.beg)
2989 fprintf (dump_file, " Result: %lli, %lli, %lli, %lli "
2990 "(%lli, %lli, %lli, %lli)\n",
2991 (long long)fmtres.range.min,
2992 (long long)fmtres.range.likely,
2993 (long long)fmtres.range.max,
2994 (long long)fmtres.range.unlikely,
2995 (long long)res->range.min,
2996 (long long)res->range.likely,
2997 (long long)res->range.max,
2998 (long long)res->range.unlikely);
3001 return true;
3004 #pragma GCC diagnostic pop
3006 /* Parse a format directive in function call described by INFO starting
3007 at STR and populate DIR structure. Bump up *ARGNO by the number of
3008 arguments extracted for the directive. Return the length of
3009 the directive. */
3011 static size_t
3012 parse_directive (pass_sprintf_length::call_info &info,
3013 directive &dir, format_result *res,
3014 const char *str, unsigned *argno)
3016 const char *pcnt = strchr (str, target_percent);
3017 dir.beg = str;
3019 if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3021 /* This directive is either a plain string or the terminating nul
3022 (which isn't really a directive but it simplifies things to
3023 handle it as if it were). */
3024 dir.len = len;
3025 dir.fmtfunc = format_plain;
3027 if (dump_file)
3029 fprintf (dump_file, " Directive %u at offset %llu: \"%.*s\", "
3030 "length = %llu\n",
3031 dir.dirno,
3032 (unsigned long long)(size_t)(dir.beg - info.fmtstr),
3033 (int)dir.len, dir.beg, (unsigned long long)dir.len);
3036 return len - !*str;
3039 const char *pf = pcnt + 1;
3041 /* POSIX numbered argument index or zero when none. */
3042 HOST_WIDE_INT dollar = 0;
3044 /* With and precision. -1 when not specified, HOST_WIDE_INT_MIN
3045 when given by a va_list argument, and a non-negative value
3046 when specified in the format string itself. */
3047 HOST_WIDE_INT width = -1;
3048 HOST_WIDE_INT precision = -1;
3050 /* Pointers to the beginning of the width and precision decimal
3051 string (if any) within the directive. */
3052 const char *pwidth = 0;
3053 const char *pprec = 0;
3055 /* When the value of the decimal string that specifies width or
3056 precision is out of range, points to the digit that causes
3057 the value to exceed the limit. */
3058 const char *werange = NULL;
3059 const char *perange = NULL;
3061 /* Width specified via the asterisk. Need not be INTEGER_CST.
3062 For vararg functions set to void_node. */
3063 tree star_width = NULL_TREE;
3065 /* Width specified via the asterisk. Need not be INTEGER_CST.
3066 For vararg functions set to void_node. */
3067 tree star_precision = NULL_TREE;
3069 if (ISDIGIT (target_to_host (*pf)))
3071 /* This could be either a POSIX positional argument, the '0'
3072 flag, or a width, depending on what follows. Store it as
3073 width and sort it out later after the next character has
3074 been seen. */
3075 pwidth = pf;
3076 width = target_strtol10 (&pf, &werange);
3078 else if (target_to_host (*pf) == '*')
3080 /* Similarly to the block above, this could be either a POSIX
3081 positional argument or a width, depending on what follows. */
3082 if (*argno < gimple_call_num_args (info.callstmt))
3083 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3084 else
3085 star_width = void_node;
3086 ++pf;
3089 if (target_to_host (*pf) == '$')
3091 /* Handle the POSIX dollar sign which references the 1-based
3092 positional argument number. */
3093 if (width != -1)
3094 dollar = width + info.argidx;
3095 else if (star_width
3096 && TREE_CODE (star_width) == INTEGER_CST
3097 && (TYPE_PRECISION (TREE_TYPE (star_width))
3098 <= TYPE_PRECISION (integer_type_node)))
3099 dollar = width + tree_to_shwi (star_width);
3101 /* Bail when the numbered argument is out of range (it will
3102 have already been diagnosed by -Wformat). */
3103 if (dollar == 0
3104 || dollar == (int)info.argidx
3105 || dollar > gimple_call_num_args (info.callstmt))
3106 return false;
3108 --dollar;
3110 star_width = NULL_TREE;
3111 width = -1;
3112 ++pf;
3115 if (dollar || !star_width)
3117 if (width != -1)
3119 if (width == 0)
3121 /* The '0' that has been interpreted as a width above is
3122 actually a flag. Reset HAVE_WIDTH, set the '0' flag,
3123 and continue processing other flags. */
3124 width = -1;
3125 dir.set_flag ('0');
3127 else if (!dollar)
3129 /* (Non-zero) width has been seen. The next character
3130 is either a period or a digit. */
3131 goto start_precision;
3134 /* When either '$' has been seen, or width has not been seen,
3135 the next field is the optional flags followed by an optional
3136 width. */
3137 for ( ; ; ) {
3138 switch (target_to_host (*pf))
3140 case ' ':
3141 case '0':
3142 case '+':
3143 case '-':
3144 case '#':
3145 dir.set_flag (target_to_host (*pf++));
3146 break;
3148 default:
3149 goto start_width;
3153 start_width:
3154 if (ISDIGIT (target_to_host (*pf)))
3156 werange = 0;
3157 pwidth = pf;
3158 width = target_strtol10 (&pf, &werange);
3160 else if (target_to_host (*pf) == '*')
3162 if (*argno < gimple_call_num_args (info.callstmt))
3163 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3164 else
3166 /* This is (likely) a va_list. It could also be an invalid
3167 call with insufficient arguments. */
3168 star_width = void_node;
3170 ++pf;
3172 else if (target_to_host (*pf) == '\'')
3174 /* The POSIX apostrophe indicating a numeric grouping
3175 in the current locale. Even though it's possible to
3176 estimate the upper bound on the size of the output
3177 based on the number of digits it probably isn't worth
3178 continuing. */
3179 return 0;
3183 start_precision:
3184 if (target_to_host (*pf) == '.')
3186 ++pf;
3188 if (ISDIGIT (target_to_host (*pf)))
3190 pprec = pf;
3191 precision = target_strtol10 (&pf, &perange);
3193 else if (target_to_host (*pf) == '*')
3195 if (*argno < gimple_call_num_args (info.callstmt))
3196 star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3197 else
3199 /* This is (likely) a va_list. It could also be an invalid
3200 call with insufficient arguments. */
3201 star_precision = void_node;
3203 ++pf;
3205 else
3207 /* The decimal precision or the asterisk are optional.
3208 When neither is dirified it's taken to be zero. */
3209 precision = 0;
3213 switch (target_to_host (*pf))
3215 case 'h':
3216 if (target_to_host (pf[1]) == 'h')
3218 ++pf;
3219 dir.modifier = FMT_LEN_hh;
3221 else
3222 dir.modifier = FMT_LEN_h;
3223 ++pf;
3224 break;
3226 case 'j':
3227 dir.modifier = FMT_LEN_j;
3228 ++pf;
3229 break;
3231 case 'L':
3232 dir.modifier = FMT_LEN_L;
3233 ++pf;
3234 break;
3236 case 'l':
3237 if (target_to_host (pf[1]) == 'l')
3239 ++pf;
3240 dir.modifier = FMT_LEN_ll;
3242 else
3243 dir.modifier = FMT_LEN_l;
3244 ++pf;
3245 break;
3247 case 't':
3248 dir.modifier = FMT_LEN_t;
3249 ++pf;
3250 break;
3252 case 'z':
3253 dir.modifier = FMT_LEN_z;
3254 ++pf;
3255 break;
3258 switch (target_to_host (*pf))
3260 /* Handle a sole '%' character the same as "%%" but since it's
3261 undefined prevent the result from being folded. */
3262 case '\0':
3263 --pf;
3264 res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3265 /* FALLTHRU */
3266 case '%':
3267 dir.fmtfunc = format_percent;
3268 break;
3270 case 'a':
3271 case 'A':
3272 case 'e':
3273 case 'E':
3274 case 'f':
3275 case 'F':
3276 case 'g':
3277 case 'G':
3278 res->floating = true;
3279 dir.fmtfunc = format_floating;
3280 break;
3282 case 'd':
3283 case 'i':
3284 case 'o':
3285 case 'u':
3286 case 'x':
3287 case 'X':
3288 dir.fmtfunc = format_integer;
3289 break;
3291 case 'p':
3292 /* The %p output is implementation-defined. It's possible
3293 to determine this format but due to extensions (edirially
3294 those of the Linux kernel -- see bug 78512) the first %p
3295 in the format string disables any further processing. */
3296 return false;
3298 case 'n':
3299 /* %n has side-effects even when nothing is actually printed to
3300 any buffer. */
3301 info.nowrite = false;
3302 dir.fmtfunc = format_none;
3303 break;
3305 case 'c':
3306 dir.fmtfunc = format_character;
3307 break;
3309 case 'S':
3310 case 's':
3311 dir.fmtfunc = format_string;
3312 break;
3314 default:
3315 /* Unknown conversion specification. */
3316 return 0;
3319 dir.specifier = target_to_host (*pf++);
3321 /* Store the length of the format directive. */
3322 dir.len = pf - pcnt;
3324 /* Buffer for the directive in the host character set (used when
3325 the source character set is different). */
3326 char hostdir[32];
3328 if (star_width)
3330 if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3331 dir.set_width (star_width);
3332 else
3334 /* Width specified by a va_list takes on the range [0, -INT_MIN]
3335 (width is the absolute value of that specified). */
3336 dir.width[0] = 0;
3337 dir.width[1] = target_int_max () + 1;
3340 else
3342 if (width == LONG_MAX && werange)
3344 size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3345 size_t caret = begin + (werange - pcnt);
3346 size_t end = pf - info.fmtstr - 1;
3348 /* Create a location for the width part of the directive,
3349 pointing the caret at the first out-of-range digit. */
3350 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3351 caret, begin, end);
3353 fmtwarn (dirloc, NULL, NULL,
3354 info.warnopt (), "%<%.*s%> directive width out of range",
3355 dir.len, target_to_host (hostdir, sizeof hostdir, dir.beg));
3358 dir.set_width (width);
3361 if (star_precision)
3363 if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3364 dir.set_precision (star_precision);
3365 else
3367 /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3368 (unlike width, negative precision is ignored). */
3369 dir.prec[0] = -1;
3370 dir.prec[1] = target_int_max ();
3373 else
3375 if (precision == LONG_MAX && perange)
3377 size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3378 size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3379 size_t end = pf - info.fmtstr - 2;
3381 /* Create a location for the precision part of the directive,
3382 including the leading period, pointing the caret at the first
3383 out-of-range digit . */
3384 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3385 caret, begin, end);
3387 fmtwarn (dirloc, NULL, NULL,
3388 info.warnopt (), "%<%.*s%> directive precision out of range",
3389 dir.len, target_to_host (hostdir, sizeof hostdir, dir.beg));
3392 dir.set_precision (precision);
3395 /* Extract the argument if the directive takes one and if it's
3396 available (e.g., the function doesn't take a va_list). Treat
3397 missing arguments the same as va_list, even though they will
3398 have likely already been diagnosed by -Wformat. */
3399 if (dir.specifier != '%'
3400 && *argno < gimple_call_num_args (info.callstmt))
3401 dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3403 if (dump_file)
3405 fprintf (dump_file, " Directive %u at offset %llu: \"%.*s\"",
3406 dir.dirno, (unsigned long long)(size_t)(dir.beg - info.fmtstr),
3407 (int)dir.len, dir.beg);
3408 if (star_width)
3410 if (dir.width[0] == dir.width[1])
3411 fprintf (dump_file, ", width = %lli", (long long)dir.width[0]);
3412 else
3413 fprintf (dump_file, ", width in range [%lli, %lli]",
3414 (long long)dir.width[0], (long long)dir.width[1]);
3417 if (star_precision)
3419 if (dir.prec[0] == dir.prec[1])
3420 fprintf (dump_file, ", precision = %lli", (long long)dir.prec[0]);
3421 else
3422 fprintf (dump_file, ", precision in range [%lli, %lli]",
3423 (long long)dir.prec[0], (long long)dir.prec[1]);
3425 fputc ('\n', dump_file);
3428 return dir.len;
3431 /* Compute the length of the output resulting from the call to a formatted
3432 output function described by INFO and store the result of the call in
3433 *RES. Issue warnings for detected past the end writes. Return true
3434 if the complete format string has been processed and *RES can be relied
3435 on, false otherwise (e.g., when a unknown or unhandled directive was seen
3436 that caused the processing to be terminated early). */
3438 bool
3439 pass_sprintf_length::compute_format_length (call_info &info,
3440 format_result *res)
3442 if (dump_file)
3444 location_t callloc = gimple_location (info.callstmt);
3445 fprintf (dump_file, "%s:%i: ",
3446 LOCATION_FILE (callloc), LOCATION_LINE (callloc));
3447 print_generic_expr (dump_file, info.func, dump_flags);
3449 fprintf (dump_file, ": objsize = %llu, fmtstr = \"%s\"\n",
3450 (unsigned long long)info.objsize, info.fmtstr);
3453 /* Reset the minimum and maximum byte counters. */
3454 res->range.min = res->range.max = 0;
3456 /* No directive has been seen yet so the length of output is bounded
3457 by the known range [0, 0] (with no conversion producing more than
3458 4K bytes) until determined otherwise. */
3459 res->knownrange = true;
3460 res->under4k = true;
3461 res->floating = false;
3462 res->warned = false;
3464 /* 1-based directive counter. */
3465 unsigned dirno = 1;
3467 /* The variadic argument counter. */
3468 unsigned argno = info.argidx;
3470 for (const char *pf = info.fmtstr; ; ++dirno)
3472 directive dir = directive ();
3473 dir.dirno = dirno;
3475 size_t n = parse_directive (info, dir, res, pf, &argno);
3477 /* Return failure if the format function fails. */
3478 if (!format_directive (info, res, dir))
3479 return false;
3481 /* Return success the directive is zero bytes long and it's
3482 the last think in the format string (i.e., it's the terminating
3483 nul, which isn't really a directive but handling it as one makes
3484 things simpler). */
3485 if (!n)
3486 return *pf == '\0';
3488 pf += n;
3491 /* The complete format string was processed (with or without warnings). */
3492 return true;
3495 /* Return the size of the object referenced by the expression DEST if
3496 available, or -1 otherwise. */
3498 static unsigned HOST_WIDE_INT
3499 get_destination_size (tree dest)
3501 /* Initialize object size info before trying to compute it. */
3502 init_object_sizes ();
3504 /* Use __builtin_object_size to determine the size of the destination
3505 object. When optimizing, determine the smallest object (such as
3506 a member array as opposed to the whole enclosing object), otherwise
3507 use type-zero object size to determine the size of the enclosing
3508 object (the function fails without optimization in this type). */
3509 int ost = optimize > 0;
3510 unsigned HOST_WIDE_INT size;
3511 if (compute_builtin_object_size (dest, ost, &size))
3512 return size;
3514 return HOST_WIDE_INT_M1U;
3517 /* Return true if the call described by INFO with result RES safe to
3518 optimize (i.e., no undefined behavior), and set RETVAL to the range
3519 of its return values. */
3521 static bool
3522 is_call_safe (const pass_sprintf_length::call_info &info,
3523 const format_result &res, bool under4k,
3524 unsigned HOST_WIDE_INT retval[2])
3526 if (under4k && !res.under4k)
3527 return false;
3529 /* The minimum return value. */
3530 retval[0] = res.range.min;
3532 /* The maximum return value is in most cases bounded by RES.RANGE.MAX
3533 but in cases involving multibyte characters could be as large as
3534 RES.RANGE.UNLIKELY. */
3535 retval[1]
3536 = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
3538 /* Adjust the number of bytes which includes the terminating nul
3539 to reflect the return value of the function which does not.
3540 Because the valid range of the function is [INT_MIN, INT_MAX],
3541 a valid range before the adjustment below is [0, INT_MAX + 1]
3542 (the functions only return negative values on error or undefined
3543 behavior). */
3544 if (retval[0] <= target_int_max () + 1)
3545 --retval[0];
3546 if (retval[1] <= target_int_max () + 1)
3547 --retval[1];
3549 /* Avoid the return value optimization when the behavior of the call
3550 is undefined either because any directive may have produced 4K or
3551 more of output, or the return value exceeds INT_MAX, or because
3552 the output overflows the destination object (but leave it enabled
3553 when the function is bounded because then the behavior is well-
3554 defined). */
3555 if (retval[0] == retval[1]
3556 && (info.bounded || retval[0] < info.objsize)
3557 && retval[0] <= target_int_max ())
3558 return true;
3560 if ((info.bounded || retval[1] < info.objsize)
3561 && (retval[0] < target_int_max ()
3562 && retval[1] < target_int_max ()))
3563 return true;
3565 if (!under4k && (info.bounded || retval[0] < info.objsize))
3566 return true;
3568 return false;
3571 /* Given a suitable result RES of a call to a formatted output function
3572 described by INFO, substitute the result for the return value of
3573 the call. The result is suitable if the number of bytes it represents
3574 is known and exact. A result that isn't suitable for substitution may
3575 have its range set to the range of return values, if that is known.
3576 Return true if the call is removed and gsi_next should not be performed
3577 in the caller. */
3579 static bool
3580 try_substitute_return_value (gimple_stmt_iterator *gsi,
3581 const pass_sprintf_length::call_info &info,
3582 const format_result &res)
3584 tree lhs = gimple_get_lhs (info.callstmt);
3586 /* Set to true when the entire call has been removed. */
3587 bool removed = false;
3589 /* The minimum and maximum return value. */
3590 unsigned HOST_WIDE_INT retval[2];
3591 bool safe = is_call_safe (info, res, true, retval);
3593 if (safe
3594 && retval[0] == retval[1]
3595 /* Not prepared to handle possibly throwing calls here; they shouldn't
3596 appear in non-artificial testcases, except when the __*_chk routines
3597 are badly declared. */
3598 && !stmt_ends_bb_p (info.callstmt))
3600 tree cst = build_int_cst (integer_type_node, retval[0]);
3602 if (lhs == NULL_TREE
3603 && info.nowrite)
3605 /* Remove the call to the bounded function with a zero size
3606 (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs. */
3607 unlink_stmt_vdef (info.callstmt);
3608 gsi_remove (gsi, true);
3609 removed = true;
3611 else if (info.nowrite)
3613 /* Replace the call to the bounded function with a zero size
3614 (e.g., snprintf(0, 0, "%i", 123) with the constant result
3615 of the function. */
3616 if (!update_call_from_tree (gsi, cst))
3617 gimplify_and_update_call_from_tree (gsi, cst);
3618 gimple *callstmt = gsi_stmt (*gsi);
3619 update_stmt (callstmt);
3621 else if (lhs)
3623 /* Replace the left-hand side of the call with the constant
3624 result of the formatted function. */
3625 gimple_call_set_lhs (info.callstmt, NULL_TREE);
3626 gimple *g = gimple_build_assign (lhs, cst);
3627 gsi_insert_after (gsi, g, GSI_NEW_STMT);
3628 update_stmt (info.callstmt);
3631 if (dump_file)
3633 if (removed)
3634 fprintf (dump_file, " Removing call statement.");
3635 else
3637 fprintf (dump_file, " Substituting ");
3638 print_generic_expr (dump_file, cst, dump_flags);
3639 fprintf (dump_file, " for %s.\n",
3640 info.nowrite ? "statement" : "return value");
3644 else if (lhs)
3646 bool setrange = false;
3648 if (safe
3649 && (info.bounded || retval[1] < info.objsize)
3650 && (retval[0] < target_int_max ()
3651 && retval[1] < target_int_max ()))
3653 /* If the result is in a valid range bounded by the size of
3654 the destination set it so that it can be used for subsequent
3655 optimizations. */
3656 int prec = TYPE_PRECISION (integer_type_node);
3658 wide_int min = wi::shwi (retval[0], prec);
3659 wide_int max = wi::shwi (retval[1], prec);
3660 set_range_info (lhs, VR_RANGE, min, max);
3662 setrange = true;
3665 if (dump_file)
3667 const char *inbounds
3668 = (retval[0] < info.objsize
3669 ? (retval[1] < info.objsize
3670 ? "in" : "potentially out-of")
3671 : "out-of");
3673 const char *what = setrange ? "Setting" : "Discarding";
3674 if (retval[0] != retval[1])
3675 fprintf (dump_file,
3676 " %s %s-bounds return value range [%llu, %llu].\n",
3677 what, inbounds,
3678 (unsigned long long)retval[0],
3679 (unsigned long long)retval[1]);
3680 else
3681 fprintf (dump_file, " %s %s-bounds return value %llu.\n",
3682 what, inbounds, (unsigned long long)retval[0]);
3686 if (dump_file)
3687 fputc ('\n', dump_file);
3689 return removed;
3692 /* Try to simplify a s{,n}printf call described by INFO with result
3693 RES by replacing it with a simpler and presumably more efficient
3694 call (such as strcpy). */
3696 static bool
3697 try_simplify_call (gimple_stmt_iterator *gsi,
3698 const pass_sprintf_length::call_info &info,
3699 const format_result &res)
3701 unsigned HOST_WIDE_INT dummy[2];
3702 if (!is_call_safe (info, res, info.retval_used (), dummy))
3703 return false;
3705 switch (info.fncode)
3707 case BUILT_IN_SNPRINTF:
3708 return gimple_fold_builtin_snprintf (gsi);
3710 case BUILT_IN_SPRINTF:
3711 return gimple_fold_builtin_sprintf (gsi);
3713 default:
3717 return false;
3720 /* Determine if a GIMPLE CALL is to one of the sprintf-like built-in
3721 functions and if so, handle it. Return true if the call is removed
3722 and gsi_next should not be performed in the caller. */
3724 bool
3725 pass_sprintf_length::handle_gimple_call (gimple_stmt_iterator *gsi)
3727 call_info info = call_info ();
3729 info.callstmt = gsi_stmt (*gsi);
3730 if (!gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
3731 return false;
3733 info.func = gimple_call_fndecl (info.callstmt);
3734 info.fncode = DECL_FUNCTION_CODE (info.func);
3736 /* The size of the destination as in snprintf(dest, size, ...). */
3737 unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
3739 /* The size of the destination determined by __builtin_object_size. */
3740 unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
3742 /* Buffer size argument number (snprintf and vsnprintf). */
3743 unsigned HOST_WIDE_INT idx_dstsize = HOST_WIDE_INT_M1U;
3745 /* Object size argument number (snprintf_chk and vsnprintf_chk). */
3746 unsigned HOST_WIDE_INT idx_objsize = HOST_WIDE_INT_M1U;
3748 /* Format string argument number (valid for all functions). */
3749 unsigned idx_format;
3751 switch (info.fncode)
3753 case BUILT_IN_SPRINTF:
3754 // Signature:
3755 // __builtin_sprintf (dst, format, ...)
3756 idx_format = 1;
3757 info.argidx = 2;
3758 break;
3760 case BUILT_IN_SPRINTF_CHK:
3761 // Signature:
3762 // __builtin___sprintf_chk (dst, ost, objsize, format, ...)
3763 idx_objsize = 2;
3764 idx_format = 3;
3765 info.argidx = 4;
3766 break;
3768 case BUILT_IN_SNPRINTF:
3769 // Signature:
3770 // __builtin_snprintf (dst, size, format, ...)
3771 idx_dstsize = 1;
3772 idx_format = 2;
3773 info.argidx = 3;
3774 info.bounded = true;
3775 break;
3777 case BUILT_IN_SNPRINTF_CHK:
3778 // Signature:
3779 // __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
3780 idx_dstsize = 1;
3781 idx_objsize = 3;
3782 idx_format = 4;
3783 info.argidx = 5;
3784 info.bounded = true;
3785 break;
3787 case BUILT_IN_VSNPRINTF:
3788 // Signature:
3789 // __builtin_vsprintf (dst, size, format, va)
3790 idx_dstsize = 1;
3791 idx_format = 2;
3792 info.argidx = -1;
3793 info.bounded = true;
3794 break;
3796 case BUILT_IN_VSNPRINTF_CHK:
3797 // Signature:
3798 // __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
3799 idx_dstsize = 1;
3800 idx_objsize = 3;
3801 idx_format = 4;
3802 info.argidx = -1;
3803 info.bounded = true;
3804 break;
3806 case BUILT_IN_VSPRINTF:
3807 // Signature:
3808 // __builtin_vsprintf (dst, format, va)
3809 idx_format = 1;
3810 info.argidx = -1;
3811 break;
3813 case BUILT_IN_VSPRINTF_CHK:
3814 // Signature:
3815 // __builtin___vsprintf_chk (dst, ost, objsize, format, va)
3816 idx_format = 3;
3817 idx_objsize = 2;
3818 info.argidx = -1;
3819 break;
3821 default:
3822 return false;
3825 /* Set the global warning level for this function. */
3826 warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
3828 /* The first argument is a pointer to the destination. */
3829 tree dstptr = gimple_call_arg (info.callstmt, 0);
3831 info.format = gimple_call_arg (info.callstmt, idx_format);
3833 /* True when the destination size is constant as opposed to the lower
3834 or upper bound of a range. */
3835 bool dstsize_cst_p = true;
3837 if (idx_dstsize == HOST_WIDE_INT_M1U)
3839 /* For non-bounded functions like sprintf, determine the size
3840 of the destination from the object or pointer passed to it
3841 as the first argument. */
3842 dstsize = get_destination_size (dstptr);
3844 else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
3846 /* For bounded functions try to get the size argument. */
3848 if (TREE_CODE (size) == INTEGER_CST)
3850 dstsize = tree_to_uhwi (size);
3851 /* No object can be larger than SIZE_MAX bytes (half the address
3852 space) on the target.
3853 The functions are defined only for output of at most INT_MAX
3854 bytes. Specifying a bound in excess of that limit effectively
3855 defeats the bounds checking (and on some implementations such
3856 as Solaris cause the function to fail with EINVAL). */
3857 if (dstsize > target_size_max () / 2)
3859 /* Avoid warning if -Wstringop-overflow is specified since
3860 it also warns for the same thing though only for the
3861 checking built-ins. */
3862 if ((idx_objsize == HOST_WIDE_INT_M1U
3863 || !warn_stringop_overflow))
3864 warning_at (gimple_location (info.callstmt), info.warnopt (),
3865 "specified bound %wu exceeds maximum object size "
3866 "%wu",
3867 dstsize, target_size_max () / 2);
3869 else if (dstsize > target_int_max ())
3870 warning_at (gimple_location (info.callstmt), info.warnopt (),
3871 "specified bound %wu exceeds %<INT_MAX %>",
3872 dstsize);
3874 else if (TREE_CODE (size) == SSA_NAME)
3876 /* Try to determine the range of values of the argument
3877 and use the greater of the two at level 1 and the smaller
3878 of them at level 2. */
3879 wide_int min, max;
3880 enum value_range_type range_type
3881 = get_range_info (size, &min, &max);
3882 if (range_type == VR_RANGE)
3884 dstsize
3885 = (warn_level < 2
3886 ? wi::fits_uhwi_p (max) ? max.to_uhwi () : max.to_shwi ()
3887 : wi::fits_uhwi_p (min) ? min.to_uhwi () : min.to_shwi ());
3890 /* The destination size is not constant. If the function is
3891 bounded (e.g., snprintf) a lower bound of zero doesn't
3892 necessarily imply it can be eliminated. */
3893 dstsize_cst_p = false;
3897 if (idx_objsize != HOST_WIDE_INT_M1U)
3898 if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
3899 if (tree_fits_uhwi_p (size))
3900 objsize = tree_to_uhwi (size);
3902 if (info.bounded && !dstsize)
3904 /* As a special case, when the explicitly specified destination
3905 size argument (to a bounded function like snprintf) is zero
3906 it is a request to determine the number of bytes on output
3907 without actually producing any. Pretend the size is
3908 unlimited in this case. */
3909 info.objsize = HOST_WIDE_INT_MAX;
3910 info.nowrite = dstsize_cst_p;
3912 else
3914 /* For calls to non-bounded functions or to those of bounded
3915 functions with a non-zero size, warn if the destination
3916 pointer is null. */
3917 if (integer_zerop (dstptr))
3919 /* This is diagnosed with -Wformat only when the null is a constant
3920 pointer. The warning here diagnoses instances where the pointer
3921 is not constant. */
3922 location_t loc = gimple_location (info.callstmt);
3923 warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
3924 info.warnopt (), "null destination pointer");
3925 return false;
3928 /* Set the object size to the smaller of the two arguments
3929 of both have been specified and they're not equal. */
3930 info.objsize = dstsize < objsize ? dstsize : objsize;
3932 if (info.bounded
3933 && dstsize < target_size_max () / 2 && objsize < dstsize
3934 /* Avoid warning if -Wstringop-overflow is specified since
3935 it also warns for the same thing though only for the
3936 checking built-ins. */
3937 && (idx_objsize == HOST_WIDE_INT_M1U
3938 || !warn_stringop_overflow))
3940 warning_at (gimple_location (info.callstmt), info.warnopt (),
3941 "specified bound %wu exceeds the size %wu "
3942 "of the destination object", dstsize, objsize);
3946 if (integer_zerop (info.format))
3948 /* This is diagnosed with -Wformat only when the null is a constant
3949 pointer. The warning here diagnoses instances where the pointer
3950 is not constant. */
3951 location_t loc = gimple_location (info.callstmt);
3952 warning_at (EXPR_LOC_OR_LOC (info.format, loc),
3953 info.warnopt (), "null format string");
3954 return false;
3957 info.fmtstr = get_format_string (info.format, &info.fmtloc);
3958 if (!info.fmtstr)
3959 return false;
3961 /* The result is the number of bytes output by the formatted function,
3962 including the terminating NUL. */
3963 format_result res = format_result ();
3965 bool success = compute_format_length (info, &res);
3967 /* When optimizing and the printf return value optimization is enabled,
3968 attempt to substitute the computed result for the return value of
3969 the call. Avoid this optimization when -frounding-math is in effect
3970 and the format string contains a floating point directive. */
3971 bool call_removed = false;
3972 if (success && optimize > 0)
3974 /* Save a copy of the iterator pointing at the call. The iterator
3975 may change to point past the call in try_substitute_return_value
3976 but the original value is needed in try_simplify_call. */
3977 gimple_stmt_iterator gsi_call = *gsi;
3979 if (flag_printf_return_value
3980 && (!flag_rounding_math || !res.floating))
3981 call_removed = try_substitute_return_value (gsi, info, res);
3983 if (!call_removed)
3984 try_simplify_call (&gsi_call, info, res);
3987 return call_removed;
3990 /* Execute the pass for function FUN. */
3992 unsigned int
3993 pass_sprintf_length::execute (function *fun)
3995 init_target_to_host_charmap ();
3997 basic_block bb;
3998 FOR_EACH_BB_FN (bb, fun)
4000 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); )
4002 /* Iterate over statements, looking for function calls. */
4003 gimple *stmt = gsi_stmt (si);
4005 if (is_gimple_call (stmt) && handle_gimple_call (&si))
4006 /* If handle_gimple_call returns true, the iterator is
4007 already pointing to the next statement. */
4008 continue;
4010 gsi_next (&si);
4014 /* Clean up object size info. */
4015 fini_object_sizes ();
4017 return 0;
4020 } /* Unnamed namespace. */
4022 /* Return a pointer to a pass object newly constructed from the context
4023 CTXT. */
4025 gimple_opt_pass *
4026 make_pass_sprintf_length (gcc::context *ctxt)
4028 return new pass_sprintf_length (ctxt);