C++: -Wwrite-strings: use location of string constant
[official-gcc.git] / gcc / gimple-ssa-sprintf.c
blobb5e1a08a88b769b75c7a91479f234150117cec31
1 /* Copyright (C) 2016-2018 Free Software Foundation, Inc.
2 Contributed by Martin Sebor <msebor@redhat.com>.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This file implements the printf-return-value pass. The pass does
21 two things: 1) it analyzes calls to formatted output functions like
22 sprintf looking for possible buffer overflows and calls to bounded
23 functions like snprintf for early truncation (and under the control
24 of the -Wformat-length option issues warnings), and 2) under the
25 control of the -fprintf-return-value option it folds the return
26 value of safe calls into constants, making it possible to eliminate
27 code that depends on the value of those constants.
29 For all functions (bounded or not) the pass uses the size of the
30 destination object. That means that it will diagnose calls to
31 snprintf not on the basis of the size specified by the function's
32 second argument but rathger on the basis of the size the first
33 argument points to (if possible). For bound-checking built-ins
34 like __builtin___snprintf_chk the pass uses the size typically
35 determined by __builtin_object_size and passed to the built-in
36 by the Glibc inline wrapper.
38 The pass handles all forms standard sprintf format directives,
39 including character, integer, floating point, pointer, and strings,
40 with the standard C flags, widths, and precisions. For integers
41 and strings it computes the length of output itself. For floating
42 point it uses MPFR to fornmat known constants with up and down
43 rounding and uses the resulting range of output lengths. For
44 strings it uses the length of string literals and the sizes of
45 character arrays that a character pointer may point to as a bound
46 on the longest string. */
48 #include "config.h"
49 #include "system.h"
50 #include "coretypes.h"
51 #include "backend.h"
52 #include "tree.h"
53 #include "gimple.h"
54 #include "tree-pass.h"
55 #include "ssa.h"
56 #include "gimple-fold.h"
57 #include "gimple-pretty-print.h"
58 #include "diagnostic-core.h"
59 #include "fold-const.h"
60 #include "gimple-iterator.h"
61 #include "tree-ssa.h"
62 #include "tree-object-size.h"
63 #include "params.h"
64 #include "tree-cfg.h"
65 #include "tree-ssa-propagate.h"
66 #include "calls.h"
67 #include "cfgloop.h"
68 #include "intl.h"
69 #include "langhooks.h"
71 #include "builtins.h"
72 #include "stor-layout.h"
74 #include "realmpfr.h"
75 #include "target.h"
77 #include "cpplib.h"
78 #include "input.h"
79 #include "toplev.h"
80 #include "substring-locations.h"
81 #include "diagnostic.h"
82 #include "domwalk.h"
83 #include "alloc-pool.h"
84 #include "vr-values.h"
85 #include "gimple-ssa-evrp-analyze.h"
87 /* The likely worst case value of MB_LEN_MAX for the target, large enough
88 for UTF-8. Ideally, this would be obtained by a target hook if it were
89 to be used for optimization but it's good enough as is for warnings. */
90 #define target_mb_len_max() 6
92 /* The maximum number of bytes a single non-string directive can result
93 in. This is the result of printf("%.*Lf", INT_MAX, -LDBL_MAX) for
94 LDBL_MAX_10_EXP of 4932. */
95 #define IEEE_MAX_10_EXP 4932
96 #define target_dir_max() (target_int_max () + IEEE_MAX_10_EXP + 2)
98 namespace {
100 const pass_data pass_data_sprintf_length = {
101 GIMPLE_PASS, // pass type
102 "printf-return-value", // pass name
103 OPTGROUP_NONE, // optinfo_flags
104 TV_NONE, // tv_id
105 PROP_cfg, // properties_required
106 0, // properties_provided
107 0, // properties_destroyed
108 0, // properties_start
109 0, // properties_finish
112 /* Set to the warning level for the current function which is equal
113 either to warn_format_trunc for bounded functions or to
114 warn_format_overflow otherwise. */
116 static int warn_level;
118 struct format_result;
120 class sprintf_dom_walker : public dom_walker
122 public:
123 sprintf_dom_walker () : dom_walker (CDI_DOMINATORS) {}
124 ~sprintf_dom_walker () {}
126 edge before_dom_children (basic_block) FINAL OVERRIDE;
127 void after_dom_children (basic_block) FINAL OVERRIDE;
128 bool handle_gimple_call (gimple_stmt_iterator *);
130 struct call_info;
131 bool compute_format_length (call_info &, format_result *);
132 class evrp_range_analyzer evrp_range_analyzer;
135 class pass_sprintf_length : public gimple_opt_pass
137 bool fold_return_value;
139 public:
140 pass_sprintf_length (gcc::context *ctxt)
141 : gimple_opt_pass (pass_data_sprintf_length, ctxt),
142 fold_return_value (false)
145 opt_pass * clone () { return new pass_sprintf_length (m_ctxt); }
147 virtual bool gate (function *);
149 virtual unsigned int execute (function *);
151 void set_pass_param (unsigned int n, bool param)
153 gcc_assert (n == 0);
154 fold_return_value = param;
159 bool
160 pass_sprintf_length::gate (function *)
162 /* Run the pass iff -Warn-format-overflow or -Warn-format-truncation
163 is specified and either not optimizing and the pass is being invoked
164 early, or when optimizing and the pass is being invoked during
165 optimization (i.e., "late"). */
166 return ((warn_format_overflow > 0
167 || warn_format_trunc > 0
168 || flag_printf_return_value)
169 && (optimize > 0) == fold_return_value);
172 /* The minimum, maximum, likely, and unlikely maximum number of bytes
173 of output either a formatting function or an individual directive
174 can result in. */
176 struct result_range
178 /* The absolute minimum number of bytes. The result of a successful
179 conversion is guaranteed to be no less than this. (An erroneous
180 conversion can be indicated by MIN > HOST_WIDE_INT_MAX.) */
181 unsigned HOST_WIDE_INT min;
182 /* The likely maximum result that is used in diagnostics. In most
183 cases MAX is the same as the worst case UNLIKELY result. */
184 unsigned HOST_WIDE_INT max;
185 /* The likely result used to trigger diagnostics. For conversions
186 that result in a range of bytes [MIN, MAX], LIKELY is somewhere
187 in that range. */
188 unsigned HOST_WIDE_INT likely;
189 /* In rare cases (e.g., for nultibyte characters) UNLIKELY gives
190 the worst cases maximum result of a directive. In most cases
191 UNLIKELY == MAX. UNLIKELY is used to control the return value
192 optimization but not in diagnostics. */
193 unsigned HOST_WIDE_INT unlikely;
196 /* The result of a call to a formatted function. */
198 struct format_result
200 /* Range of characters written by the formatted function.
201 Setting the minimum to HOST_WIDE_INT_MAX disables all
202 length tracking for the remainder of the format string. */
203 result_range range;
205 /* True when the range above is obtained from known values of
206 directive arguments, or bounds on the amount of output such
207 as width and precision, and not the result of heuristics that
208 depend on warning levels. It's used to issue stricter diagnostics
209 in cases where strings of unknown lengths are bounded by the arrays
210 they are determined to refer to. KNOWNRANGE must not be used for
211 the return value optimization. */
212 bool knownrange;
214 /* True if no individual directive could fail or result in more than
215 4095 bytes of output (the total NUMBER_CHARS_{MIN,MAX} might be
216 greater). Implementations are not required to handle directives
217 that produce more than 4K bytes (leading to undefined behavior)
218 and so when one is found it disables the return value optimization.
219 Similarly, directives that can fail (such as wide character
220 directives) disable the optimization. */
221 bool posunder4k;
223 /* True when a floating point directive has been seen in the format
224 string. */
225 bool floating;
227 /* True when an intermediate result has caused a warning. Used to
228 avoid issuing duplicate warnings while finishing the processing
229 of a call. WARNED also disables the return value optimization. */
230 bool warned;
232 /* Preincrement the number of output characters by 1. */
233 format_result& operator++ ()
235 return *this += 1;
238 /* Postincrement the number of output characters by 1. */
239 format_result operator++ (int)
241 format_result prev (*this);
242 *this += 1;
243 return prev;
246 /* Increment the number of output characters by N. */
247 format_result& operator+= (unsigned HOST_WIDE_INT);
250 format_result&
251 format_result::operator+= (unsigned HOST_WIDE_INT n)
253 gcc_assert (n < HOST_WIDE_INT_MAX);
255 if (range.min < HOST_WIDE_INT_MAX)
256 range.min += n;
258 if (range.max < HOST_WIDE_INT_MAX)
259 range.max += n;
261 if (range.likely < HOST_WIDE_INT_MAX)
262 range.likely += n;
264 if (range.unlikely < HOST_WIDE_INT_MAX)
265 range.unlikely += n;
267 return *this;
270 /* Return the value of INT_MIN for the target. */
272 static inline HOST_WIDE_INT
273 target_int_min ()
275 return tree_to_shwi (TYPE_MIN_VALUE (integer_type_node));
278 /* Return the value of INT_MAX for the target. */
280 static inline unsigned HOST_WIDE_INT
281 target_int_max ()
283 return tree_to_uhwi (TYPE_MAX_VALUE (integer_type_node));
286 /* Return the value of SIZE_MAX for the target. */
288 static inline unsigned HOST_WIDE_INT
289 target_size_max ()
291 return tree_to_uhwi (TYPE_MAX_VALUE (size_type_node));
294 /* A straightforward mapping from the execution character set to the host
295 character set indexed by execution character. */
297 static char target_to_host_charmap[256];
299 /* Initialize a mapping from the execution character set to the host
300 character set. */
302 static bool
303 init_target_to_host_charmap ()
305 /* If the percent sign is non-zero the mapping has already been
306 initialized. */
307 if (target_to_host_charmap['%'])
308 return true;
310 /* Initialize the target_percent character (done elsewhere). */
311 if (!init_target_chars ())
312 return false;
314 /* The subset of the source character set used by printf conversion
315 specifications (strictly speaking, not all letters are used but
316 they are included here for the sake of simplicity). The dollar
317 sign must be included even though it's not in the basic source
318 character set. */
319 const char srcset[] = " 0123456789!\"#%&'()*+,-./:;<=>?[\\]^_{|}~$"
320 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
322 /* Set the mapping for all characters to some ordinary value (i,e.,
323 not none used in printf conversion specifications) and overwrite
324 those that are used by conversion specifications with their
325 corresponding values. */
326 memset (target_to_host_charmap + 1, '?', sizeof target_to_host_charmap - 1);
328 /* Are the two sets of characters the same? */
329 bool all_same_p = true;
331 for (const char *pc = srcset; *pc; ++pc)
333 /* Slice off the high end bits in case target characters are
334 signed. All values are expected to be non-nul, otherwise
335 there's a problem. */
336 if (unsigned char tc = lang_hooks.to_target_charset (*pc))
338 target_to_host_charmap[tc] = *pc;
339 if (tc != *pc)
340 all_same_p = false;
342 else
343 return false;
347 /* Set the first element to a non-zero value if the mapping
348 is 1-to-1, otherwise leave it clear (NUL is assumed to be
349 the same in both character sets). */
350 target_to_host_charmap[0] = all_same_p;
352 return true;
355 /* Return the host source character corresponding to the character
356 CH in the execution character set if one exists, or some innocuous
357 (non-special, non-nul) source character otherwise. */
359 static inline unsigned char
360 target_to_host (unsigned char ch)
362 return target_to_host_charmap[ch];
365 /* Convert an initial substring of the string TARGSTR consisting of
366 characters in the execution character set into a string in the
367 source character set on the host and store up to HOSTSZ characters
368 in the buffer pointed to by HOSTR. Return HOSTR. */
370 static const char*
371 target_to_host (char *hostr, size_t hostsz, const char *targstr)
373 /* Make sure the buffer is reasonably big. */
374 gcc_assert (hostsz > 4);
376 /* The interesting subset of source and execution characters are
377 the same so no conversion is necessary. However, truncate
378 overlong strings just like the translated strings are. */
379 if (target_to_host_charmap['\0'] == 1)
381 strncpy (hostr, targstr, hostsz - 4);
382 if (strlen (targstr) >= hostsz)
383 strcpy (hostr + hostsz - 4, "...");
384 return hostr;
387 /* Convert the initial substring of TARGSTR to the corresponding
388 characters in the host set, appending "..." if TARGSTR is too
389 long to fit. Using the static buffer assumes the function is
390 not called in between sequence points (which it isn't). */
391 for (char *ph = hostr; ; ++targstr)
393 *ph++ = target_to_host (*targstr);
394 if (!*targstr)
395 break;
397 if (size_t (ph - hostr) == hostsz - 4)
399 *ph = '\0';
400 strcat (ph, "...");
401 break;
405 return hostr;
408 /* Convert the sequence of decimal digits in the execution character
409 starting at S to a long, just like strtol does. Return the result
410 and set *END to one past the last converted character. On range
411 error set ERANGE to the digit that caused it. */
413 static inline long
414 target_strtol10 (const char **ps, const char **erange)
416 unsigned HOST_WIDE_INT val = 0;
417 for ( ; ; ++*ps)
419 unsigned char c = target_to_host (**ps);
420 if (ISDIGIT (c))
422 c -= '0';
424 /* Check for overflow. */
425 if (val > (LONG_MAX - c) / 10LU)
427 val = LONG_MAX;
428 *erange = *ps;
430 /* Skip the remaining digits. */
432 c = target_to_host (*++*ps);
433 while (ISDIGIT (c));
434 break;
436 else
437 val = val * 10 + c;
439 else
440 break;
443 return val;
446 /* Return the constant initial value of DECL if available or DECL
447 otherwise. Same as the synonymous function in c/c-typeck.c. */
449 static tree
450 decl_constant_value (tree decl)
452 if (/* Don't change a variable array bound or initial value to a constant
453 in a place where a variable is invalid. Note that DECL_INITIAL
454 isn't valid for a PARM_DECL. */
455 current_function_decl != 0
456 && TREE_CODE (decl) != PARM_DECL
457 && !TREE_THIS_VOLATILE (decl)
458 && TREE_READONLY (decl)
459 && DECL_INITIAL (decl) != 0
460 && TREE_CODE (DECL_INITIAL (decl)) != ERROR_MARK
461 /* This is invalid if initial value is not constant.
462 If it has either a function call, a memory reference,
463 or a variable, then re-evaluating it could give different results. */
464 && TREE_CONSTANT (DECL_INITIAL (decl))
465 /* Check for cases where this is sub-optimal, even though valid. */
466 && TREE_CODE (DECL_INITIAL (decl)) != CONSTRUCTOR)
467 return DECL_INITIAL (decl);
468 return decl;
471 /* Given FORMAT, set *PLOC to the source location of the format string
472 and return the format string if it is known or null otherwise. */
474 static const char*
475 get_format_string (tree format, location_t *ploc)
477 if (VAR_P (format))
479 /* Pull out a constant value if the front end didn't. */
480 format = decl_constant_value (format);
481 STRIP_NOPS (format);
484 if (integer_zerop (format))
486 /* FIXME: Diagnose null format string if it hasn't been diagnosed
487 by -Wformat (the latter diagnoses only nul pointer constants,
488 this pass can do better). */
489 return NULL;
492 HOST_WIDE_INT offset = 0;
494 if (TREE_CODE (format) == POINTER_PLUS_EXPR)
496 tree arg0 = TREE_OPERAND (format, 0);
497 tree arg1 = TREE_OPERAND (format, 1);
498 STRIP_NOPS (arg0);
499 STRIP_NOPS (arg1);
501 if (TREE_CODE (arg1) != INTEGER_CST)
502 return NULL;
504 format = arg0;
506 /* POINTER_PLUS_EXPR offsets are to be interpreted signed. */
507 if (!cst_and_fits_in_hwi (arg1))
508 return NULL;
510 offset = int_cst_value (arg1);
513 if (TREE_CODE (format) != ADDR_EXPR)
514 return NULL;
516 *ploc = EXPR_LOC_OR_LOC (format, input_location);
518 format = TREE_OPERAND (format, 0);
520 if (TREE_CODE (format) == ARRAY_REF
521 && tree_fits_shwi_p (TREE_OPERAND (format, 1))
522 && (offset += tree_to_shwi (TREE_OPERAND (format, 1))) >= 0)
523 format = TREE_OPERAND (format, 0);
525 if (offset < 0)
526 return NULL;
528 tree array_init;
529 tree array_size = NULL_TREE;
531 if (VAR_P (format)
532 && TREE_CODE (TREE_TYPE (format)) == ARRAY_TYPE
533 && (array_init = decl_constant_value (format)) != format
534 && TREE_CODE (array_init) == STRING_CST)
536 /* Extract the string constant initializer. Note that this may
537 include a trailing NUL character that is not in the array (e.g.
538 const char a[3] = "foo";). */
539 array_size = DECL_SIZE_UNIT (format);
540 format = array_init;
543 if (TREE_CODE (format) != STRING_CST)
544 return NULL;
546 tree type = TREE_TYPE (format);
548 scalar_int_mode char_mode;
549 if (!is_int_mode (TYPE_MODE (TREE_TYPE (type)), &char_mode)
550 || GET_MODE_SIZE (char_mode) != 1)
552 /* Wide format string. */
553 return NULL;
556 const char *fmtstr = TREE_STRING_POINTER (format);
557 unsigned fmtlen = TREE_STRING_LENGTH (format);
559 if (array_size)
561 /* Variable length arrays can't be initialized. */
562 gcc_assert (TREE_CODE (array_size) == INTEGER_CST);
564 if (tree_fits_shwi_p (array_size))
566 HOST_WIDE_INT array_size_value = tree_to_shwi (array_size);
567 if (array_size_value > 0
568 && array_size_value == (int) array_size_value
569 && fmtlen > array_size_value)
570 fmtlen = array_size_value;
573 if (offset)
575 if (offset >= fmtlen)
576 return NULL;
578 fmtstr += offset;
579 fmtlen -= offset;
582 if (fmtlen < 1 || fmtstr[--fmtlen] != 0)
584 /* FIXME: Diagnose an unterminated format string if it hasn't been
585 diagnosed by -Wformat. Similarly to a null format pointer,
586 -Wformay diagnoses only nul pointer constants, this pass can
587 do better). */
588 return NULL;
591 return fmtstr;
594 /* For convenience and brevity, shorter named entrypoints of
595 format_warning_at_substring and format_warning_at_substring_n.
596 These have to be functions with the attribute so that exgettext
597 works properly. */
599 static bool
600 ATTRIBUTE_GCC_DIAG (5, 6)
601 fmtwarn (const substring_loc &fmt_loc, location_t param_loc,
602 const char *corrected_substring, int opt, const char *gmsgid, ...)
604 va_list ap;
605 va_start (ap, gmsgid);
606 bool warned = format_warning_va (fmt_loc, NULL, param_loc, NULL,
607 corrected_substring, opt, gmsgid, &ap);
608 va_end (ap);
610 return warned;
613 static bool
614 ATTRIBUTE_GCC_DIAG (6, 8) ATTRIBUTE_GCC_DIAG (7, 8)
615 fmtwarn_n (const substring_loc &fmt_loc, location_t param_loc,
616 const char *corrected_substring, int opt, unsigned HOST_WIDE_INT n,
617 const char *singular_gmsgid, const char *plural_gmsgid, ...)
619 va_list ap;
620 va_start (ap, plural_gmsgid);
621 bool warned = format_warning_n_va (fmt_loc, NULL, param_loc, NULL,
622 corrected_substring,
623 opt, n, singular_gmsgid, plural_gmsgid,
624 &ap);
625 va_end (ap);
627 return warned;
630 /* Format length modifiers. */
632 enum format_lengths
634 FMT_LEN_none,
635 FMT_LEN_hh, // char argument
636 FMT_LEN_h, // short
637 FMT_LEN_l, // long
638 FMT_LEN_ll, // long long
639 FMT_LEN_L, // long double (and GNU long long)
640 FMT_LEN_z, // size_t
641 FMT_LEN_t, // ptrdiff_t
642 FMT_LEN_j // intmax_t
646 /* Description of the result of conversion either of a single directive
647 or the whole format string. */
649 struct fmtresult
651 /* Construct a FMTRESULT object with all counters initialized
652 to MIN. KNOWNRANGE is set when MIN is valid. */
653 fmtresult (unsigned HOST_WIDE_INT min = HOST_WIDE_INT_MAX)
654 : argmin (), argmax (),
655 knownrange (min < HOST_WIDE_INT_MAX),
656 mayfail (), nullp ()
658 range.min = min;
659 range.max = min;
660 range.likely = min;
661 range.unlikely = min;
664 /* Construct a FMTRESULT object with MIN, MAX, and LIKELY counters.
665 KNOWNRANGE is set when both MIN and MAX are valid. */
666 fmtresult (unsigned HOST_WIDE_INT min, unsigned HOST_WIDE_INT max,
667 unsigned HOST_WIDE_INT likely = HOST_WIDE_INT_MAX)
668 : argmin (), argmax (),
669 knownrange (min < HOST_WIDE_INT_MAX && max < HOST_WIDE_INT_MAX),
670 mayfail (), nullp ()
672 range.min = min;
673 range.max = max;
674 range.likely = max < likely ? min : likely;
675 range.unlikely = max;
678 /* Adjust result upward to reflect the RANGE of values the specified
679 width or precision is known to be in. */
680 fmtresult& adjust_for_width_or_precision (const HOST_WIDE_INT[2],
681 tree = NULL_TREE,
682 unsigned = 0, unsigned = 0);
684 /* Return the maximum number of decimal digits a value of TYPE
685 formats as on output. */
686 static unsigned type_max_digits (tree, int);
688 /* The range a directive's argument is in. */
689 tree argmin, argmax;
691 /* The minimum and maximum number of bytes that a directive
692 results in on output for an argument in the range above. */
693 result_range range;
695 /* True when the range above is obtained from a known value of
696 a directive's argument or its bounds and not the result of
697 heuristics that depend on warning levels. */
698 bool knownrange;
700 /* True for a directive that may fail (such as wide character
701 directives). */
702 bool mayfail;
704 /* True when the argument is a null pointer. */
705 bool nullp;
708 /* Adjust result upward to reflect the range ADJUST of values the
709 specified width or precision is known to be in. When non-null,
710 TYPE denotes the type of the directive whose result is being
711 adjusted, BASE gives the base of the directive (octal, decimal,
712 or hex), and ADJ denotes the additional adjustment to the LIKELY
713 counter that may need to be added when ADJUST is a range. */
715 fmtresult&
716 fmtresult::adjust_for_width_or_precision (const HOST_WIDE_INT adjust[2],
717 tree type /* = NULL_TREE */,
718 unsigned base /* = 0 */,
719 unsigned adj /* = 0 */)
721 bool minadjusted = false;
723 /* Adjust the minimum and likely counters. */
724 if (adjust[0] >= 0)
726 if (range.min < (unsigned HOST_WIDE_INT)adjust[0])
728 range.min = adjust[0];
729 minadjusted = true;
732 /* Adjust the likely counter. */
733 if (range.likely < range.min)
734 range.likely = range.min;
736 else if (adjust[0] == target_int_min ()
737 && (unsigned HOST_WIDE_INT)adjust[1] == target_int_max ())
738 knownrange = false;
740 /* Adjust the maximum counter. */
741 if (adjust[1] > 0)
743 if (range.max < (unsigned HOST_WIDE_INT)adjust[1])
745 range.max = adjust[1];
747 /* Set KNOWNRANGE if both the minimum and maximum have been
748 adjusted. Otherwise leave it at what it was before. */
749 knownrange = minadjusted;
753 if (warn_level > 1 && type)
755 /* For large non-constant width or precision whose range spans
756 the maximum number of digits produced by the directive for
757 any argument, set the likely number of bytes to be at most
758 the number digits plus other adjustment determined by the
759 caller (one for sign or two for the hexadecimal "0x"
760 prefix). */
761 unsigned dirdigs = type_max_digits (type, base);
762 if (adjust[0] < dirdigs && dirdigs < adjust[1]
763 && range.likely < dirdigs)
764 range.likely = dirdigs + adj;
766 else if (range.likely < (range.min ? range.min : 1))
768 /* Conservatively, set LIKELY to at least MIN but no less than
769 1 unless MAX is zero. */
770 range.likely = (range.min
771 ? range.min
772 : range.max && (range.max < HOST_WIDE_INT_MAX
773 || warn_level > 1) ? 1 : 0);
776 /* Finally adjust the unlikely counter to be at least as large as
777 the maximum. */
778 if (range.unlikely < range.max)
779 range.unlikely = range.max;
781 return *this;
784 /* Return the maximum number of digits a value of TYPE formats in
785 BASE on output, not counting base prefix . */
787 unsigned
788 fmtresult::type_max_digits (tree type, int base)
790 unsigned prec = TYPE_PRECISION (type);
791 switch (base)
793 case 8:
794 return (prec + 2) / 3;
795 case 10:
796 /* Decimal approximation: yields 3, 5, 10, and 20 for precision
797 of 8, 16, 32, and 64 bits. */
798 return prec * 301 / 1000 + 1;
799 case 16:
800 return prec / 4;
803 gcc_unreachable ();
806 static bool
807 get_int_range (tree, HOST_WIDE_INT *, HOST_WIDE_INT *, bool, HOST_WIDE_INT,
808 class vr_values *vr_values);
810 /* Description of a format directive. A directive is either a plain
811 string or a conversion specification that starts with '%'. */
813 struct directive
815 /* The 1-based directive number (for debugging). */
816 unsigned dirno;
818 /* The first character of the directive and its length. */
819 const char *beg;
820 size_t len;
822 /* A bitmap of flags, one for each character. */
823 unsigned flags[256 / sizeof (int)];
825 /* The range of values of the specified width, or -1 if not specified. */
826 HOST_WIDE_INT width[2];
827 /* The range of values of the specified precision, or -1 if not
828 specified. */
829 HOST_WIDE_INT prec[2];
831 /* Length modifier. */
832 format_lengths modifier;
834 /* Format specifier character. */
835 char specifier;
837 /* The argument of the directive or null when the directive doesn't
838 take one or when none is available (such as for vararg functions). */
839 tree arg;
841 /* Format conversion function that given a directive and an argument
842 returns the formatting result. */
843 fmtresult (*fmtfunc) (const directive &, tree, vr_values *);
845 /* Return True when a the format flag CHR has been used. */
846 bool get_flag (char chr) const
848 unsigned char c = chr & 0xff;
849 return (flags[c / (CHAR_BIT * sizeof *flags)]
850 & (1U << (c % (CHAR_BIT * sizeof *flags))));
853 /* Make a record of the format flag CHR having been used. */
854 void set_flag (char chr)
856 unsigned char c = chr & 0xff;
857 flags[c / (CHAR_BIT * sizeof *flags)]
858 |= (1U << (c % (CHAR_BIT * sizeof *flags)));
861 /* Reset the format flag CHR. */
862 void clear_flag (char chr)
864 unsigned char c = chr & 0xff;
865 flags[c / (CHAR_BIT * sizeof *flags)]
866 &= ~(1U << (c % (CHAR_BIT * sizeof *flags)));
869 /* Set both bounds of the width range to VAL. */
870 void set_width (HOST_WIDE_INT val)
872 width[0] = width[1] = val;
875 /* Set the width range according to ARG, with both bounds being
876 no less than 0. For a constant ARG set both bounds to its value
877 or 0, whichever is greater. For a non-constant ARG in some range
878 set width to its range adjusting each bound to -1 if it's less.
879 For an indeterminate ARG set width to [0, INT_MAX]. */
880 void set_width (tree arg, vr_values *vr_values)
882 get_int_range (arg, width, width + 1, true, 0, vr_values);
885 /* Set both bounds of the precision range to VAL. */
886 void set_precision (HOST_WIDE_INT val)
888 prec[0] = prec[1] = val;
891 /* Set the precision range according to ARG, with both bounds being
892 no less than -1. For a constant ARG set both bounds to its value
893 or -1 whichever is greater. For a non-constant ARG in some range
894 set precision to its range adjusting each bound to -1 if it's less.
895 For an indeterminate ARG set precision to [-1, INT_MAX]. */
896 void set_precision (tree arg, vr_values *vr_values)
898 get_int_range (arg, prec, prec + 1, false, -1, vr_values);
901 /* Return true if both width and precision are known to be
902 either constant or in some range, false otherwise. */
903 bool known_width_and_precision () const
905 return ((width[1] < 0
906 || (unsigned HOST_WIDE_INT)width[1] <= target_int_max ())
907 && (prec[1] < 0
908 || (unsigned HOST_WIDE_INT)prec[1] < target_int_max ()));
912 /* Return the logarithm of X in BASE. */
914 static int
915 ilog (unsigned HOST_WIDE_INT x, int base)
917 int res = 0;
920 ++res;
921 x /= base;
922 } while (x);
923 return res;
926 /* Return the number of bytes resulting from converting into a string
927 the INTEGER_CST tree node X in BASE with a minimum of PREC digits.
928 PLUS indicates whether 1 for a plus sign should be added for positive
929 numbers, and PREFIX whether the length of an octal ('O') or hexadecimal
930 ('0x') prefix should be added for nonzero numbers. Return -1 if X cannot
931 be represented. */
933 static HOST_WIDE_INT
934 tree_digits (tree x, int base, HOST_WIDE_INT prec, bool plus, bool prefix)
936 unsigned HOST_WIDE_INT absval;
938 HOST_WIDE_INT res;
940 if (TYPE_UNSIGNED (TREE_TYPE (x)))
942 if (tree_fits_uhwi_p (x))
944 absval = tree_to_uhwi (x);
945 res = plus;
947 else
948 return -1;
950 else
952 if (tree_fits_shwi_p (x))
954 HOST_WIDE_INT i = tree_to_shwi (x);
955 if (HOST_WIDE_INT_MIN == i)
957 /* Avoid undefined behavior due to negating a minimum. */
958 absval = HOST_WIDE_INT_MAX;
959 res = 1;
961 else if (i < 0)
963 absval = -i;
964 res = 1;
966 else
968 absval = i;
969 res = plus;
972 else
973 return -1;
976 int ndigs = ilog (absval, base);
978 res += prec < ndigs ? ndigs : prec;
980 /* Adjust a non-zero value for the base prefix, either hexadecimal,
981 or, unless precision has resulted in a leading zero, also octal. */
982 if (prefix && absval && (base == 16 || prec <= ndigs))
984 if (base == 8)
985 res += 1;
986 else if (base == 16)
987 res += 2;
990 return res;
993 /* Given the formatting result described by RES and NAVAIL, the number
994 of available in the destination, return the range of bytes remaining
995 in the destination. */
997 static inline result_range
998 bytes_remaining (unsigned HOST_WIDE_INT navail, const format_result &res)
1000 result_range range;
1002 if (HOST_WIDE_INT_MAX <= navail)
1004 range.min = range.max = range.likely = range.unlikely = navail;
1005 return range;
1008 /* The lower bound of the available range is the available size
1009 minus the maximum output size, and the upper bound is the size
1010 minus the minimum. */
1011 range.max = res.range.min < navail ? navail - res.range.min : 0;
1013 range.likely = res.range.likely < navail ? navail - res.range.likely : 0;
1015 if (res.range.max < HOST_WIDE_INT_MAX)
1016 range.min = res.range.max < navail ? navail - res.range.max : 0;
1017 else
1018 range.min = range.likely;
1020 range.unlikely = (res.range.unlikely < navail
1021 ? navail - res.range.unlikely : 0);
1023 return range;
1026 /* Description of a call to a formatted function. */
1028 struct sprintf_dom_walker::call_info
1030 /* Function call statement. */
1031 gimple *callstmt;
1033 /* Function called. */
1034 tree func;
1036 /* Called built-in function code. */
1037 built_in_function fncode;
1039 /* Format argument and format string extracted from it. */
1040 tree format;
1041 const char *fmtstr;
1043 /* The location of the format argument. */
1044 location_t fmtloc;
1046 /* The destination object size for __builtin___xxx_chk functions
1047 typically determined by __builtin_object_size, or -1 if unknown. */
1048 unsigned HOST_WIDE_INT objsize;
1050 /* Number of the first variable argument. */
1051 unsigned HOST_WIDE_INT argidx;
1053 /* True for functions like snprintf that specify the size of
1054 the destination, false for others like sprintf that don't. */
1055 bool bounded;
1057 /* True for bounded functions like snprintf that specify a zero-size
1058 buffer as a request to compute the size of output without actually
1059 writing any. NOWRITE is cleared in response to the %n directive
1060 which has side-effects similar to writing output. */
1061 bool nowrite;
1063 /* Return true if the called function's return value is used. */
1064 bool retval_used () const
1066 return gimple_get_lhs (callstmt);
1069 /* Return the warning option corresponding to the called function. */
1070 int warnopt () const
1072 return bounded ? OPT_Wformat_truncation_ : OPT_Wformat_overflow_;
1076 /* Return the result of formatting a no-op directive (such as '%n'). */
1078 static fmtresult
1079 format_none (const directive &, tree, vr_values *)
1081 fmtresult res (0);
1082 return res;
1085 /* Return the result of formatting the '%%' directive. */
1087 static fmtresult
1088 format_percent (const directive &, tree, vr_values *)
1090 fmtresult res (1);
1091 return res;
1095 /* Compute intmax_type_node and uintmax_type_node similarly to how
1096 tree.c builds size_type_node. */
1098 static void
1099 build_intmax_type_nodes (tree *pintmax, tree *puintmax)
1101 if (strcmp (UINTMAX_TYPE, "unsigned int") == 0)
1103 *pintmax = integer_type_node;
1104 *puintmax = unsigned_type_node;
1106 else if (strcmp (UINTMAX_TYPE, "long unsigned int") == 0)
1108 *pintmax = long_integer_type_node;
1109 *puintmax = long_unsigned_type_node;
1111 else if (strcmp (UINTMAX_TYPE, "long long unsigned int") == 0)
1113 *pintmax = long_long_integer_type_node;
1114 *puintmax = long_long_unsigned_type_node;
1116 else
1118 for (int i = 0; i < NUM_INT_N_ENTS; i++)
1119 if (int_n_enabled_p[i])
1121 char name[50];
1122 sprintf (name, "__int%d unsigned", int_n_data[i].bitsize);
1124 if (strcmp (name, UINTMAX_TYPE) == 0)
1126 *pintmax = int_n_trees[i].signed_type;
1127 *puintmax = int_n_trees[i].unsigned_type;
1128 return;
1131 gcc_unreachable ();
1135 /* Determine the range [*PMIN, *PMAX] that the expression ARG is
1136 in and that is representable in type int.
1137 Return true when the range is a subrange of that of int.
1138 When ARG is null it is as if it had the full range of int.
1139 When ABSOLUTE is true the range reflects the absolute value of
1140 the argument. When ABSOLUTE is false, negative bounds of
1141 the determined range are replaced with NEGBOUND. */
1143 static bool
1144 get_int_range (tree arg, HOST_WIDE_INT *pmin, HOST_WIDE_INT *pmax,
1145 bool absolute, HOST_WIDE_INT negbound,
1146 class vr_values *vr_values)
1148 /* The type of the result. */
1149 const_tree type = integer_type_node;
1151 bool knownrange = false;
1153 if (!arg)
1155 *pmin = tree_to_shwi (TYPE_MIN_VALUE (type));
1156 *pmax = tree_to_shwi (TYPE_MAX_VALUE (type));
1158 else if (TREE_CODE (arg) == INTEGER_CST
1159 && TYPE_PRECISION (TREE_TYPE (arg)) <= TYPE_PRECISION (type))
1161 /* For a constant argument return its value adjusted as specified
1162 by NEGATIVE and NEGBOUND and return true to indicate that the
1163 result is known. */
1164 *pmin = tree_fits_shwi_p (arg) ? tree_to_shwi (arg) : tree_to_uhwi (arg);
1165 *pmax = *pmin;
1166 knownrange = true;
1168 else
1170 /* True if the argument's range cannot be determined. */
1171 bool unknown = true;
1173 tree argtype = TREE_TYPE (arg);
1175 /* Ignore invalid arguments with greater precision that that
1176 of the expected type (e.g., in sprintf("%*i", 12LL, i)).
1177 They will have been detected and diagnosed by -Wformat and
1178 so it's not important to complicate this code to try to deal
1179 with them again. */
1180 if (TREE_CODE (arg) == SSA_NAME
1181 && INTEGRAL_TYPE_P (argtype)
1182 && TYPE_PRECISION (argtype) <= TYPE_PRECISION (type))
1184 /* Try to determine the range of values of the integer argument. */
1185 value_range *vr = vr_values->get_value_range (arg);
1186 if (vr->type == VR_RANGE
1187 && TREE_CODE (vr->min) == INTEGER_CST
1188 && TREE_CODE (vr->max) == INTEGER_CST)
1190 HOST_WIDE_INT type_min
1191 = (TYPE_UNSIGNED (argtype)
1192 ? tree_to_uhwi (TYPE_MIN_VALUE (argtype))
1193 : tree_to_shwi (TYPE_MIN_VALUE (argtype)));
1195 HOST_WIDE_INT type_max = tree_to_uhwi (TYPE_MAX_VALUE (argtype));
1197 *pmin = TREE_INT_CST_LOW (vr->min);
1198 *pmax = TREE_INT_CST_LOW (vr->max);
1200 if (*pmin < *pmax)
1202 /* Return true if the adjusted range is a subrange of
1203 the full range of the argument's type. *PMAX may
1204 be less than *PMIN when the argument is unsigned
1205 and its upper bound is in excess of TYPE_MAX. In
1206 that (invalid) case disregard the range and use that
1207 of the expected type instead. */
1208 knownrange = type_min < *pmin || *pmax < type_max;
1210 unknown = false;
1215 /* Handle an argument with an unknown range as if none had been
1216 provided. */
1217 if (unknown)
1218 return get_int_range (NULL_TREE, pmin, pmax, absolute,
1219 negbound, vr_values);
1222 /* Adjust each bound as specified by ABSOLUTE and NEGBOUND. */
1223 if (absolute)
1225 if (*pmin < 0)
1227 if (*pmin == *pmax)
1228 *pmin = *pmax = -*pmin;
1229 else
1231 /* Make sure signed overlow is avoided. */
1232 gcc_assert (*pmin != HOST_WIDE_INT_MIN);
1234 HOST_WIDE_INT tmp = -*pmin;
1235 *pmin = 0;
1236 if (*pmax < tmp)
1237 *pmax = tmp;
1241 else if (*pmin < negbound)
1242 *pmin = negbound;
1244 return knownrange;
1247 /* With the range [*ARGMIN, *ARGMAX] of an integer directive's actual
1248 argument, due to the conversion from either *ARGMIN or *ARGMAX to
1249 the type of the directive's formal argument it's possible for both
1250 to result in the same number of bytes or a range of bytes that's
1251 less than the number of bytes that would result from formatting
1252 some other value in the range [*ARGMIN, *ARGMAX]. This can be
1253 determined by checking for the actual argument being in the range
1254 of the type of the directive. If it isn't it must be assumed to
1255 take on the full range of the directive's type.
1256 Return true when the range has been adjusted to the full range
1257 of DIRTYPE, and false otherwise. */
1259 static bool
1260 adjust_range_for_overflow (tree dirtype, tree *argmin, tree *argmax)
1262 tree argtype = TREE_TYPE (*argmin);
1263 unsigned argprec = TYPE_PRECISION (argtype);
1264 unsigned dirprec = TYPE_PRECISION (dirtype);
1266 /* If the actual argument and the directive's argument have the same
1267 precision and sign there can be no overflow and so there is nothing
1268 to adjust. */
1269 if (argprec == dirprec && TYPE_SIGN (argtype) == TYPE_SIGN (dirtype))
1270 return false;
1272 /* The logic below was inspired/lifted from the CONVERT_EXPR_CODE_P
1273 branch in the extract_range_from_unary_expr function in tree-vrp.c. */
1275 if (TREE_CODE (*argmin) == INTEGER_CST
1276 && TREE_CODE (*argmax) == INTEGER_CST
1277 && (dirprec >= argprec
1278 || integer_zerop (int_const_binop (RSHIFT_EXPR,
1279 int_const_binop (MINUS_EXPR,
1280 *argmax,
1281 *argmin),
1282 size_int (dirprec)))))
1284 *argmin = force_fit_type (dirtype, wi::to_widest (*argmin), 0, false);
1285 *argmax = force_fit_type (dirtype, wi::to_widest (*argmax), 0, false);
1287 /* If *ARGMIN is still less than *ARGMAX the conversion above
1288 is safe. Otherwise, it has overflowed and would be unsafe. */
1289 if (tree_int_cst_le (*argmin, *argmax))
1290 return false;
1293 *argmin = TYPE_MIN_VALUE (dirtype);
1294 *argmax = TYPE_MAX_VALUE (dirtype);
1295 return true;
1298 /* Return a range representing the minimum and maximum number of bytes
1299 that the format directive DIR will output for any argument given
1300 the WIDTH and PRECISION (extracted from DIR). This function is
1301 used when the directive argument or its value isn't known. */
1303 static fmtresult
1304 format_integer (const directive &dir, tree arg, vr_values *vr_values)
1306 tree intmax_type_node;
1307 tree uintmax_type_node;
1309 /* Base to format the number in. */
1310 int base;
1312 /* True when a conversion is preceded by a prefix indicating the base
1313 of the argument (octal or hexadecimal). */
1314 bool maybebase = dir.get_flag ('#');
1316 /* True when a signed conversion is preceded by a sign or space. */
1317 bool maybesign = false;
1319 /* True for signed conversions (i.e., 'd' and 'i'). */
1320 bool sign = false;
1322 switch (dir.specifier)
1324 case 'd':
1325 case 'i':
1326 /* Space and '+' are only meaningful for signed conversions. */
1327 maybesign = dir.get_flag (' ') | dir.get_flag ('+');
1328 sign = true;
1329 base = 10;
1330 break;
1331 case 'u':
1332 base = 10;
1333 break;
1334 case 'o':
1335 base = 8;
1336 break;
1337 case 'X':
1338 case 'x':
1339 base = 16;
1340 break;
1341 default:
1342 gcc_unreachable ();
1345 /* The type of the "formal" argument expected by the directive. */
1346 tree dirtype = NULL_TREE;
1348 /* Determine the expected type of the argument from the length
1349 modifier. */
1350 switch (dir.modifier)
1352 case FMT_LEN_none:
1353 if (dir.specifier == 'p')
1354 dirtype = ptr_type_node;
1355 else
1356 dirtype = sign ? integer_type_node : unsigned_type_node;
1357 break;
1359 case FMT_LEN_h:
1360 dirtype = sign ? short_integer_type_node : short_unsigned_type_node;
1361 break;
1363 case FMT_LEN_hh:
1364 dirtype = sign ? signed_char_type_node : unsigned_char_type_node;
1365 break;
1367 case FMT_LEN_l:
1368 dirtype = sign ? long_integer_type_node : long_unsigned_type_node;
1369 break;
1371 case FMT_LEN_L:
1372 case FMT_LEN_ll:
1373 dirtype = (sign
1374 ? long_long_integer_type_node
1375 : long_long_unsigned_type_node);
1376 break;
1378 case FMT_LEN_z:
1379 dirtype = signed_or_unsigned_type_for (!sign, size_type_node);
1380 break;
1382 case FMT_LEN_t:
1383 dirtype = signed_or_unsigned_type_for (!sign, ptrdiff_type_node);
1384 break;
1386 case FMT_LEN_j:
1387 build_intmax_type_nodes (&intmax_type_node, &uintmax_type_node);
1388 dirtype = sign ? intmax_type_node : uintmax_type_node;
1389 break;
1391 default:
1392 return fmtresult ();
1395 /* The type of the argument to the directive, either deduced from
1396 the actual non-constant argument if one is known, or from
1397 the directive itself when none has been provided because it's
1398 a va_list. */
1399 tree argtype = NULL_TREE;
1401 if (!arg)
1403 /* When the argument has not been provided, use the type of
1404 the directive's argument as an approximation. This will
1405 result in false positives for directives like %i with
1406 arguments with smaller precision (such as short or char). */
1407 argtype = dirtype;
1409 else if (TREE_CODE (arg) == INTEGER_CST)
1411 /* When a constant argument has been provided use its value
1412 rather than type to determine the length of the output. */
1413 fmtresult res;
1415 if ((dir.prec[0] <= 0 && dir.prec[1] >= 0) && integer_zerop (arg))
1417 /* As a special case, a precision of zero with a zero argument
1418 results in zero bytes except in base 8 when the '#' flag is
1419 specified, and for signed conversions in base 8 and 10 when
1420 either the space or '+' flag has been specified and it results
1421 in just one byte (with width having the normal effect). This
1422 must extend to the case of a specified precision with
1423 an unknown value because it can be zero. */
1424 res.range.min = ((base == 8 && dir.get_flag ('#')) || maybesign);
1425 if (res.range.min == 0 && dir.prec[0] != dir.prec[1])
1427 res.range.max = 1;
1428 res.range.likely = 1;
1430 else
1432 res.range.max = res.range.min;
1433 res.range.likely = res.range.min;
1436 else
1438 /* Convert the argument to the type of the directive. */
1439 arg = fold_convert (dirtype, arg);
1441 res.range.min = tree_digits (arg, base, dir.prec[0],
1442 maybesign, maybebase);
1443 if (dir.prec[0] == dir.prec[1])
1444 res.range.max = res.range.min;
1445 else
1446 res.range.max = tree_digits (arg, base, dir.prec[1],
1447 maybesign, maybebase);
1448 res.range.likely = res.range.min;
1449 res.knownrange = true;
1452 res.range.unlikely = res.range.max;
1454 /* Bump up the counters if WIDTH is greater than LEN. */
1455 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1456 (sign | maybebase) + (base == 16));
1457 /* Bump up the counters again if PRECision is greater still. */
1458 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1459 (sign | maybebase) + (base == 16));
1461 return res;
1463 else if (INTEGRAL_TYPE_P (TREE_TYPE (arg))
1464 || TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE)
1465 /* Determine the type of the provided non-constant argument. */
1466 argtype = TREE_TYPE (arg);
1467 else
1468 /* Don't bother with invalid arguments since they likely would
1469 have already been diagnosed, and disable any further checking
1470 of the format string by returning [-1, -1]. */
1471 return fmtresult ();
1473 fmtresult res;
1475 /* Using either the range the non-constant argument is in, or its
1476 type (either "formal" or actual), create a range of values that
1477 constrain the length of output given the warning level. */
1478 tree argmin = NULL_TREE;
1479 tree argmax = NULL_TREE;
1481 if (arg
1482 && TREE_CODE (arg) == SSA_NAME
1483 && INTEGRAL_TYPE_P (argtype))
1485 /* Try to determine the range of values of the integer argument
1486 (range information is not available for pointers). */
1487 value_range *vr = vr_values->get_value_range (arg);
1488 if (vr->type == VR_RANGE
1489 && TREE_CODE (vr->min) == INTEGER_CST
1490 && TREE_CODE (vr->max) == INTEGER_CST)
1492 argmin = vr->min;
1493 argmax = vr->max;
1495 /* Set KNOWNRANGE if the argument is in a known subrange
1496 of the directive's type and neither width nor precision
1497 is unknown. (KNOWNRANGE may be reset below). */
1498 res.knownrange
1499 = ((!tree_int_cst_equal (TYPE_MIN_VALUE (dirtype), argmin)
1500 || !tree_int_cst_equal (TYPE_MAX_VALUE (dirtype), argmax))
1501 && dir.known_width_and_precision ());
1503 res.argmin = argmin;
1504 res.argmax = argmax;
1506 else if (vr->type == VR_ANTI_RANGE)
1508 /* Handle anti-ranges if/when bug 71690 is resolved. */
1510 else if (vr->type == VR_VARYING
1511 || vr->type == VR_UNDEFINED)
1513 /* The argument here may be the result of promoting the actual
1514 argument to int. Try to determine the type of the actual
1515 argument before promotion and narrow down its range that
1516 way. */
1517 gimple *def = SSA_NAME_DEF_STMT (arg);
1518 if (is_gimple_assign (def))
1520 tree_code code = gimple_assign_rhs_code (def);
1521 if (code == INTEGER_CST)
1523 arg = gimple_assign_rhs1 (def);
1524 return format_integer (dir, arg, vr_values);
1527 if (code == NOP_EXPR)
1529 tree type = TREE_TYPE (gimple_assign_rhs1 (def));
1530 if (INTEGRAL_TYPE_P (type)
1531 || TREE_CODE (type) == POINTER_TYPE)
1532 argtype = type;
1538 if (!argmin)
1540 if (TREE_CODE (argtype) == POINTER_TYPE)
1542 argmin = build_int_cst (pointer_sized_int_node, 0);
1543 argmax = build_all_ones_cst (pointer_sized_int_node);
1545 else
1547 argmin = TYPE_MIN_VALUE (argtype);
1548 argmax = TYPE_MAX_VALUE (argtype);
1552 /* Clear KNOWNRANGE if the range has been adjusted to the maximum
1553 of the directive. If it has been cleared then since ARGMIN and/or
1554 ARGMAX have been adjusted also adjust the corresponding ARGMIN and
1555 ARGMAX in the result to include in diagnostics. */
1556 if (adjust_range_for_overflow (dirtype, &argmin, &argmax))
1558 res.knownrange = false;
1559 res.argmin = argmin;
1560 res.argmax = argmax;
1563 /* Recursively compute the minimum and maximum from the known range. */
1564 if (TYPE_UNSIGNED (dirtype) || tree_int_cst_sgn (argmin) >= 0)
1566 /* For unsigned conversions/directives or signed when
1567 the minimum is positive, use the minimum and maximum to compute
1568 the shortest and longest output, respectively. */
1569 res.range.min = format_integer (dir, argmin, vr_values).range.min;
1570 res.range.max = format_integer (dir, argmax, vr_values).range.max;
1572 else if (tree_int_cst_sgn (argmax) < 0)
1574 /* For signed conversions/directives if maximum is negative,
1575 use the minimum as the longest output and maximum as the
1576 shortest output. */
1577 res.range.min = format_integer (dir, argmax, vr_values).range.min;
1578 res.range.max = format_integer (dir, argmin, vr_values).range.max;
1580 else
1582 /* Otherwise, 0 is inside of the range and minimum negative. Use 0
1583 as the shortest output and for the longest output compute the
1584 length of the output of both minimum and maximum and pick the
1585 longer. */
1586 unsigned HOST_WIDE_INT max1
1587 = format_integer (dir, argmin, vr_values).range.max;
1588 unsigned HOST_WIDE_INT max2
1589 = format_integer (dir, argmax, vr_values).range.max;
1590 res.range.min
1591 = format_integer (dir, integer_zero_node, vr_values).range.min;
1592 res.range.max = MAX (max1, max2);
1595 /* If the range is known, use the maximum as the likely length. */
1596 if (res.knownrange)
1597 res.range.likely = res.range.max;
1598 else
1600 /* Otherwise, use the minimum. Except for the case where for %#x or
1601 %#o the minimum is just for a single value in the range (0) and
1602 for all other values it is something longer, like 0x1 or 01.
1603 Use the length for value 1 in that case instead as the likely
1604 length. */
1605 res.range.likely = res.range.min;
1606 if (maybebase
1607 && base != 10
1608 && (tree_int_cst_sgn (argmin) < 0 || tree_int_cst_sgn (argmax) > 0))
1610 if (res.range.min == 1)
1611 res.range.likely += base == 8 ? 1 : 2;
1612 else if (res.range.min == 2
1613 && base == 16
1614 && (dir.width[0] == 2 || dir.prec[0] == 2))
1615 ++res.range.likely;
1619 res.range.unlikely = res.range.max;
1620 res.adjust_for_width_or_precision (dir.width, dirtype, base,
1621 (sign | maybebase) + (base == 16));
1622 res.adjust_for_width_or_precision (dir.prec, dirtype, base,
1623 (sign | maybebase) + (base == 16));
1625 return res;
1628 /* Return the number of bytes that a format directive consisting of FLAGS,
1629 PRECision, format SPECification, and MPFR rounding specifier RNDSPEC,
1630 would result for argument X under ideal conditions (i.e., if PREC
1631 weren't excessive). MPFR 3.1 allocates large amounts of memory for
1632 values of PREC with large magnitude and can fail (see MPFR bug #21056).
1633 This function works around those problems. */
1635 static unsigned HOST_WIDE_INT
1636 get_mpfr_format_length (mpfr_ptr x, const char *flags, HOST_WIDE_INT prec,
1637 char spec, char rndspec)
1639 char fmtstr[40];
1641 HOST_WIDE_INT len = strlen (flags);
1643 fmtstr[0] = '%';
1644 memcpy (fmtstr + 1, flags, len);
1645 memcpy (fmtstr + 1 + len, ".*R", 3);
1646 fmtstr[len + 4] = rndspec;
1647 fmtstr[len + 5] = spec;
1648 fmtstr[len + 6] = '\0';
1650 spec = TOUPPER (spec);
1651 if (spec == 'E' || spec == 'F')
1653 /* For %e, specify the precision explicitly since mpfr_sprintf
1654 does its own thing just to be different (see MPFR bug 21088). */
1655 if (prec < 0)
1656 prec = 6;
1658 else
1660 /* Avoid passing negative precisions with larger magnitude to MPFR
1661 to avoid exposing its bugs. (A negative precision is supposed
1662 to be ignored.) */
1663 if (prec < 0)
1664 prec = -1;
1667 HOST_WIDE_INT p = prec;
1669 if (spec == 'G' && !strchr (flags, '#'))
1671 /* For G/g without the pound flag, precision gives the maximum number
1672 of significant digits which is bounded by LDBL_MAX_10_EXP, or, for
1673 a 128 bit IEEE extended precision, 4932. Using twice as much here
1674 should be more than sufficient for any real format. */
1675 if ((IEEE_MAX_10_EXP * 2) < prec)
1676 prec = IEEE_MAX_10_EXP * 2;
1677 p = prec;
1679 else
1681 /* Cap precision arbitrarily at 1KB and add the difference
1682 (if any) to the MPFR result. */
1683 if (prec > 1024)
1684 p = 1024;
1687 len = mpfr_snprintf (NULL, 0, fmtstr, (int)p, x);
1689 /* Handle the unlikely (impossible?) error by returning more than
1690 the maximum dictated by the function's return type. */
1691 if (len < 0)
1692 return target_dir_max () + 1;
1694 /* Adjust the return value by the difference. */
1695 if (p < prec)
1696 len += prec - p;
1698 return len;
1701 /* Return the number of bytes to format using the format specifier
1702 SPEC and the precision PREC the largest value in the real floating
1703 TYPE. */
1705 static unsigned HOST_WIDE_INT
1706 format_floating_max (tree type, char spec, HOST_WIDE_INT prec)
1708 machine_mode mode = TYPE_MODE (type);
1710 /* IBM Extended mode. */
1711 if (MODE_COMPOSITE_P (mode))
1712 mode = DFmode;
1714 /* Get the real type format desription for the target. */
1715 const real_format *rfmt = REAL_MODE_FORMAT (mode);
1716 REAL_VALUE_TYPE rv;
1718 real_maxval (&rv, 0, mode);
1720 /* Convert the GCC real value representation with the precision
1721 of the real type to the mpfr_t format with the GCC default
1722 round-to-nearest mode. */
1723 mpfr_t x;
1724 mpfr_init2 (x, rfmt->p);
1725 mpfr_from_real (x, &rv, GMP_RNDN);
1727 /* Return a value one greater to account for the leading minus sign. */
1728 unsigned HOST_WIDE_INT r
1729 = 1 + get_mpfr_format_length (x, "", prec, spec, 'D');
1730 mpfr_clear (x);
1731 return r;
1734 /* Return a range representing the minimum and maximum number of bytes
1735 that the directive DIR will output for any argument. PREC gives
1736 the adjusted precision range to account for negative precisions
1737 meaning the default 6. This function is used when the directive
1738 argument or its value isn't known. */
1740 static fmtresult
1741 format_floating (const directive &dir, const HOST_WIDE_INT prec[2])
1743 tree type;
1745 switch (dir.modifier)
1747 case FMT_LEN_l:
1748 case FMT_LEN_none:
1749 type = double_type_node;
1750 break;
1752 case FMT_LEN_L:
1753 type = long_double_type_node;
1754 break;
1756 case FMT_LEN_ll:
1757 type = long_double_type_node;
1758 break;
1760 default:
1761 return fmtresult ();
1764 /* The minimum and maximum number of bytes produced by the directive. */
1765 fmtresult res;
1767 /* The minimum output as determined by flags. It's always at least 1.
1768 When plus or space are set the output is preceded by either a sign
1769 or a space. */
1770 unsigned flagmin = (1 /* for the first digit */
1771 + (dir.get_flag ('+') | dir.get_flag (' ')));
1773 /* The minimum is 3 for "inf" and "nan" for all specifiers, plus 1
1774 for the plus sign/space with the '+' and ' ' flags, respectively,
1775 unless reduced below. */
1776 res.range.min = 2 + flagmin;
1778 /* When the pound flag is set the decimal point is included in output
1779 regardless of precision. Whether or not a decimal point is included
1780 otherwise depends on the specification and precision. */
1781 bool radix = dir.get_flag ('#');
1783 switch (dir.specifier)
1785 case 'A':
1786 case 'a':
1788 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1789 if (dir.prec[0] <= 0)
1790 minprec = 0;
1791 else if (dir.prec[0] > 0)
1792 minprec = dir.prec[0] + !radix /* decimal point */;
1794 res.range.likely = (2 /* 0x */
1795 + flagmin
1796 + radix
1797 + minprec
1798 + 3 /* p+0 */);
1800 res.range.max = format_floating_max (type, 'a', prec[1]);
1802 /* The unlikely maximum accounts for the longest multibyte
1803 decimal point character. */
1804 res.range.unlikely = res.range.max;
1805 if (dir.prec[1] > 0)
1806 res.range.unlikely += target_mb_len_max () - 1;
1808 break;
1811 case 'E':
1812 case 'e':
1814 /* Minimum output attributable to precision and, when it's
1815 non-zero, decimal point. */
1816 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1818 /* The likely minimum output is "[-+]1.234567e+00" regardless
1819 of the value of the actual argument. */
1820 res.range.likely = (flagmin
1821 + radix
1822 + minprec
1823 + 2 /* e+ */ + 2);
1825 res.range.max = format_floating_max (type, 'e', prec[1]);
1827 /* The unlikely maximum accounts for the longest multibyte
1828 decimal point character. */
1829 if (dir.prec[0] != dir.prec[1]
1830 || dir.prec[0] == -1 || dir.prec[0] > 0)
1831 res.range.unlikely = res.range.max + target_mb_len_max () -1;
1832 else
1833 res.range.unlikely = res.range.max;
1834 break;
1837 case 'F':
1838 case 'f':
1840 /* Minimum output attributable to precision and, when it's non-zero,
1841 decimal point. */
1842 HOST_WIDE_INT minprec = prec[0] ? prec[0] + !radix : 0;
1844 /* For finite numbers (i.e., not infinity or NaN) the lower bound
1845 when precision isn't specified is 8 bytes ("1.23456" since
1846 precision is taken to be 6). When precision is zero, the lower
1847 bound is 1 byte (e.g., "1"). Otherwise, when precision is greater
1848 than zero, then the lower bound is 2 plus precision (plus flags).
1849 But in all cases, the lower bound is no greater than 3. */
1850 unsigned HOST_WIDE_INT min = flagmin + radix + minprec;
1851 if (min < res.range.min)
1852 res.range.min = min;
1854 /* Compute the upper bound for -TYPE_MAX. */
1855 res.range.max = format_floating_max (type, 'f', prec[1]);
1857 /* The minimum output with unknown precision is a single byte
1858 (e.g., "0") but the more likely output is 3 bytes ("0.0"). */
1859 if (dir.prec[0] < 0 && dir.prec[1] > 0)
1860 res.range.likely = 3;
1861 else
1862 res.range.likely = min;
1864 /* The unlikely maximum accounts for the longest multibyte
1865 decimal point character. */
1866 if (dir.prec[0] != dir.prec[1]
1867 || dir.prec[0] == -1 || dir.prec[0] > 0)
1868 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1869 break;
1872 case 'G':
1873 case 'g':
1875 /* The %g output depends on precision and the exponent of
1876 the argument. Since the value of the argument isn't known
1877 the lower bound on the range of bytes (not counting flags
1878 or width) is 1 plus radix (i.e., either "0" or "0." for
1879 "%g" and "%#g", respectively, with a zero argument). */
1880 unsigned HOST_WIDE_INT min = flagmin + radix;
1881 if (min < res.range.min)
1882 res.range.min = min;
1884 char spec = 'g';
1885 HOST_WIDE_INT maxprec = dir.prec[1];
1886 if (radix && maxprec)
1888 /* When the pound flag (radix) is set, trailing zeros aren't
1889 trimmed and so the longest output is the same as for %e,
1890 except with precision minus 1 (as specified in C11). */
1891 spec = 'e';
1892 if (maxprec > 0)
1893 --maxprec;
1894 else if (maxprec < 0)
1895 maxprec = 5;
1897 else
1898 maxprec = prec[1];
1900 res.range.max = format_floating_max (type, spec, maxprec);
1902 /* The likely output is either the maximum computed above
1903 minus 1 (assuming the maximum is positive) when precision
1904 is known (or unspecified), or the same minimum as for %e
1905 (which is computed for a non-negative argument). Unlike
1906 for the other specifiers above the likely output isn't
1907 the minimum because for %g that's 1 which is unlikely. */
1908 if (dir.prec[1] < 0
1909 || (unsigned HOST_WIDE_INT)dir.prec[1] < target_int_max ())
1910 res.range.likely = res.range.max - 1;
1911 else
1913 HOST_WIDE_INT minprec = 6 + !radix /* decimal point */;
1914 res.range.likely = (flagmin
1915 + radix
1916 + minprec
1917 + 2 /* e+ */ + 2);
1920 /* The unlikely maximum accounts for the longest multibyte
1921 decimal point character. */
1922 res.range.unlikely = res.range.max + target_mb_len_max () - 1;
1923 break;
1926 default:
1927 return fmtresult ();
1930 /* Bump up the byte counters if WIDTH is greater. */
1931 res.adjust_for_width_or_precision (dir.width);
1932 return res;
1935 /* Return a range representing the minimum and maximum number of bytes
1936 that the directive DIR will write on output for the floating argument
1937 ARG. */
1939 static fmtresult
1940 format_floating (const directive &dir, tree arg, vr_values *)
1942 HOST_WIDE_INT prec[] = { dir.prec[0], dir.prec[1] };
1943 tree type = (dir.modifier == FMT_LEN_L || dir.modifier == FMT_LEN_ll
1944 ? long_double_type_node : double_type_node);
1946 /* For an indeterminate precision the lower bound must be assumed
1947 to be zero. */
1948 if (TOUPPER (dir.specifier) == 'A')
1950 /* Get the number of fractional decimal digits needed to represent
1951 the argument without a loss of accuracy. */
1952 unsigned fmtprec
1953 = REAL_MODE_FORMAT (TYPE_MODE (type))->p;
1955 /* The precision of the IEEE 754 double format is 53.
1956 The precision of all other GCC binary double formats
1957 is 56 or less. */
1958 unsigned maxprec = fmtprec <= 56 ? 13 : 15;
1960 /* For %a, leave the minimum precision unspecified to let
1961 MFPR trim trailing zeros (as it and many other systems
1962 including Glibc happen to do) and set the maximum
1963 precision to reflect what it would be with trailing zeros
1964 present (as Solaris and derived systems do). */
1965 if (dir.prec[1] < 0)
1967 /* Both bounds are negative implies that precision has
1968 not been specified. */
1969 prec[0] = maxprec;
1970 prec[1] = -1;
1972 else if (dir.prec[0] < 0)
1974 /* With a negative lower bound and a non-negative upper
1975 bound set the minimum precision to zero and the maximum
1976 to the greater of the maximum precision (i.e., with
1977 trailing zeros present) and the specified upper bound. */
1978 prec[0] = 0;
1979 prec[1] = dir.prec[1] < maxprec ? maxprec : dir.prec[1];
1982 else if (dir.prec[0] < 0)
1984 if (dir.prec[1] < 0)
1986 /* A precision in a strictly negative range is ignored and
1987 the default of 6 is used instead. */
1988 prec[0] = prec[1] = 6;
1990 else
1992 /* For a precision in a partly negative range, the lower bound
1993 must be assumed to be zero and the new upper bound is the
1994 greater of 6 (the default precision used when the specified
1995 precision is negative) and the upper bound of the specified
1996 range. */
1997 prec[0] = 0;
1998 prec[1] = dir.prec[1] < 6 ? 6 : dir.prec[1];
2002 if (!arg
2003 || TREE_CODE (arg) != REAL_CST
2004 || !useless_type_conversion_p (type, TREE_TYPE (arg)))
2005 return format_floating (dir, prec);
2007 /* The minimum and maximum number of bytes produced by the directive. */
2008 fmtresult res;
2010 /* Get the real type format desription for the target. */
2011 const REAL_VALUE_TYPE *rvp = TREE_REAL_CST_PTR (arg);
2012 const real_format *rfmt = REAL_MODE_FORMAT (TYPE_MODE (TREE_TYPE (arg)));
2014 if (!real_isfinite (rvp))
2016 /* The format for Infinity and NaN is "[-]inf"/"[-]infinity"
2017 and "[-]nan" with the choice being implementation-defined
2018 but not locale dependent. */
2019 bool sign = dir.get_flag ('+') || real_isneg (rvp);
2020 res.range.min = 3 + sign;
2022 res.range.likely = res.range.min;
2023 res.range.max = res.range.min;
2024 /* The unlikely maximum is "[-/+]infinity" or "[-/+][qs]nan".
2025 For NaN, the C/POSIX standards specify two formats:
2026 "[-/+]nan"
2028 "[-/+]nan(n-char-sequence)"
2029 No known printf implementation outputs the latter format but AIX
2030 outputs QNaN and SNaN for quiet and signalling NaN, respectively,
2031 so the unlikely maximum reflects that. */
2032 res.range.unlikely = sign + (real_isinf (rvp) ? 8 : 4);
2034 /* The range for infinity and NaN is known unless either width
2035 or precision is unknown. Width has the same effect regardless
2036 of whether the argument is finite. Precision is either ignored
2037 (e.g., Glibc) or can have an effect on the short vs long format
2038 such as inf/infinity (e.g., Solaris). */
2039 res.knownrange = dir.known_width_and_precision ();
2041 /* Adjust the range for width but ignore precision. */
2042 res.adjust_for_width_or_precision (dir.width);
2044 return res;
2047 char fmtstr [40];
2048 char *pfmt = fmtstr;
2050 /* Append flags. */
2051 for (const char *pf = "-+ #0"; *pf; ++pf)
2052 if (dir.get_flag (*pf))
2053 *pfmt++ = *pf;
2055 *pfmt = '\0';
2058 /* Set up an array to easily iterate over. */
2059 unsigned HOST_WIDE_INT* const minmax[] = {
2060 &res.range.min, &res.range.max
2063 for (int i = 0; i != sizeof minmax / sizeof *minmax; ++i)
2065 /* Convert the GCC real value representation with the precision
2066 of the real type to the mpfr_t format rounding down in the
2067 first iteration that computes the minimm and up in the second
2068 that computes the maximum. This order is arbibtrary because
2069 rounding in either direction can result in longer output. */
2070 mpfr_t mpfrval;
2071 mpfr_init2 (mpfrval, rfmt->p);
2072 mpfr_from_real (mpfrval, rvp, i ? GMP_RNDU : GMP_RNDD);
2074 /* Use the MPFR rounding specifier to round down in the first
2075 iteration and then up. In most but not all cases this will
2076 result in the same number of bytes. */
2077 char rndspec = "DU"[i];
2079 /* Format it and store the result in the corresponding member
2080 of the result struct. */
2081 *minmax[i] = get_mpfr_format_length (mpfrval, fmtstr, prec[i],
2082 dir.specifier, rndspec);
2083 mpfr_clear (mpfrval);
2087 /* Make sure the minimum is less than the maximum (MPFR rounding
2088 in the call to mpfr_snprintf can result in the reverse. */
2089 if (res.range.max < res.range.min)
2091 unsigned HOST_WIDE_INT tmp = res.range.min;
2092 res.range.min = res.range.max;
2093 res.range.max = tmp;
2096 /* The range is known unless either width or precision is unknown. */
2097 res.knownrange = dir.known_width_and_precision ();
2099 /* For the same floating point constant, unless width or precision
2100 is unknown, use the longer output as the likely maximum since
2101 with round to nearest either is equally likely. Otheriwse, when
2102 precision is unknown, use the greater of the minimum and 3 as
2103 the likely output (for "0.0" since zero precision is unlikely). */
2104 if (res.knownrange)
2105 res.range.likely = res.range.max;
2106 else if (res.range.min < 3
2107 && dir.prec[0] < 0
2108 && (unsigned HOST_WIDE_INT)dir.prec[1] == target_int_max ())
2109 res.range.likely = 3;
2110 else
2111 res.range.likely = res.range.min;
2113 res.range.unlikely = res.range.max;
2115 if (res.range.max > 2 && (prec[0] != 0 || prec[1] != 0))
2117 /* Unless the precision is zero output longer than 2 bytes may
2118 include the decimal point which must be a single character
2119 up to MB_LEN_MAX in length. This is overly conservative
2120 since in some conversions some constants result in no decimal
2121 point (e.g., in %g). */
2122 res.range.unlikely += target_mb_len_max () - 1;
2125 res.adjust_for_width_or_precision (dir.width);
2126 return res;
2129 /* Return a FMTRESULT struct set to the lengths of the shortest and longest
2130 strings referenced by the expression STR, or (-1, -1) when not known.
2131 Used by the format_string function below. */
2133 static fmtresult
2134 get_string_length (tree str, unsigned eltsize)
2136 if (!str)
2137 return fmtresult ();
2139 if (tree slen = c_strlen (str, 1, eltsize))
2141 /* Simply return the length of the string. */
2142 fmtresult res (tree_to_shwi (slen));
2143 return res;
2146 /* Determine the length of the shortest and longest string referenced
2147 by STR. Strings of unknown lengths are bounded by the sizes of
2148 arrays that subexpressions of STR may refer to. Pointers that
2149 aren't known to point any such arrays result in LENRANGE[1] set
2150 to SIZE_MAX. */
2151 tree lenrange[2];
2152 bool flexarray = get_range_strlen (str, lenrange, eltsize);
2154 if (lenrange [0] || lenrange [1])
2156 HOST_WIDE_INT min
2157 = (tree_fits_uhwi_p (lenrange[0])
2158 ? tree_to_uhwi (lenrange[0])
2159 : 0);
2161 HOST_WIDE_INT max
2162 = (tree_fits_uhwi_p (lenrange[1])
2163 ? tree_to_uhwi (lenrange[1])
2164 : HOST_WIDE_INT_M1U);
2166 /* get_range_strlen() returns the target value of SIZE_MAX for
2167 strings of unknown length. Bump it up to HOST_WIDE_INT_M1U
2168 which may be bigger. */
2169 if ((unsigned HOST_WIDE_INT)min == target_size_max ())
2170 min = HOST_WIDE_INT_M1U;
2171 if ((unsigned HOST_WIDE_INT)max == target_size_max ())
2172 max = HOST_WIDE_INT_M1U;
2174 fmtresult res (min, max);
2176 /* Set RES.KNOWNRANGE to true if and only if all strings referenced
2177 by STR are known to be bounded (though not necessarily by their
2178 actual length but perhaps by their maximum possible length). */
2179 if (res.range.max < target_int_max ())
2181 res.knownrange = true;
2182 /* When the the length of the longest string is known and not
2183 excessive use it as the likely length of the string(s). */
2184 res.range.likely = res.range.max;
2186 else
2188 /* When the upper bound is unknown (it can be zero or excessive)
2189 set the likely length to the greater of 1 and the length of
2190 the shortest string and reset the lower bound to zero. */
2191 res.range.likely = res.range.min ? res.range.min : warn_level > 1;
2192 res.range.min = 0;
2195 /* If the range of string length has been estimated from the size
2196 of an array at the end of a struct assume that it's longer than
2197 the array bound says it is in case it's used as a poor man's
2198 flexible array member, such as in struct S { char a[4]; }; */
2199 res.range.unlikely = flexarray ? HOST_WIDE_INT_MAX : res.range.max;
2201 return res;
2204 return fmtresult ();
2207 /* Return the minimum and maximum number of characters formatted
2208 by the '%c' format directives and its wide character form for
2209 the argument ARG. ARG can be null (for functions such as
2210 vsprinf). */
2212 static fmtresult
2213 format_character (const directive &dir, tree arg, vr_values *vr_values)
2215 fmtresult res;
2217 res.knownrange = true;
2219 if (dir.specifier == 'C'
2220 || dir.modifier == FMT_LEN_l)
2222 /* A wide character can result in as few as zero bytes. */
2223 res.range.min = 0;
2225 HOST_WIDE_INT min, max;
2226 if (get_int_range (arg, &min, &max, false, 0, vr_values))
2228 if (min == 0 && max == 0)
2230 /* The NUL wide character results in no bytes. */
2231 res.range.max = 0;
2232 res.range.likely = 0;
2233 res.range.unlikely = 0;
2235 else if (min >= 0 && min < 128)
2237 /* Be conservative if the target execution character set
2238 is not a 1-to-1 mapping to the source character set or
2239 if the source set is not ASCII. */
2240 bool one_2_one_ascii
2241 = (target_to_host_charmap[0] == 1 && target_to_host ('a') == 97);
2243 /* A wide character in the ASCII range most likely results
2244 in a single byte, and only unlikely in up to MB_LEN_MAX. */
2245 res.range.max = one_2_one_ascii ? 1 : target_mb_len_max ();;
2246 res.range.likely = 1;
2247 res.range.unlikely = target_mb_len_max ();
2248 res.mayfail = !one_2_one_ascii;
2250 else
2252 /* A wide character outside the ASCII range likely results
2253 in up to two bytes, and only unlikely in up to MB_LEN_MAX. */
2254 res.range.max = target_mb_len_max ();
2255 res.range.likely = 2;
2256 res.range.unlikely = res.range.max;
2257 /* Converting such a character may fail. */
2258 res.mayfail = true;
2261 else
2263 /* An unknown wide character is treated the same as a wide
2264 character outside the ASCII range. */
2265 res.range.max = target_mb_len_max ();
2266 res.range.likely = 2;
2267 res.range.unlikely = res.range.max;
2268 res.mayfail = true;
2271 else
2273 /* A plain '%c' directive. Its ouput is exactly 1. */
2274 res.range.min = res.range.max = 1;
2275 res.range.likely = res.range.unlikely = 1;
2276 res.knownrange = true;
2279 /* Bump up the byte counters if WIDTH is greater. */
2280 return res.adjust_for_width_or_precision (dir.width);
2283 /* Return the minimum and maximum number of characters formatted
2284 by the '%s' format directive and its wide character form for
2285 the argument ARG. ARG can be null (for functions such as
2286 vsprinf). */
2288 static fmtresult
2289 format_string (const directive &dir, tree arg, vr_values *)
2291 fmtresult res;
2293 /* Compute the range the argument's length can be in. */
2294 int count_by = dir.specifier == 'S' || dir.modifier == FMT_LEN_l ? 4 : 1;
2295 fmtresult slen = get_string_length (arg, count_by);
2296 if (slen.range.min == slen.range.max
2297 && slen.range.min < HOST_WIDE_INT_MAX)
2299 /* The argument is either a string constant or it refers
2300 to one of a number of strings of the same length. */
2302 /* A '%s' directive with a string argument with constant length. */
2303 res.range = slen.range;
2305 if (dir.specifier == 'S'
2306 || dir.modifier == FMT_LEN_l)
2308 /* In the worst case the length of output of a wide string S
2309 is bounded by MB_LEN_MAX * wcslen (S). */
2310 res.range.max *= target_mb_len_max ();
2311 res.range.unlikely = res.range.max;
2312 /* It's likely that the the total length is not more that
2313 2 * wcslen (S).*/
2314 res.range.likely = res.range.min * 2;
2316 if (dir.prec[1] >= 0
2317 && (unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2319 res.range.max = dir.prec[1];
2320 res.range.likely = dir.prec[1];
2321 res.range.unlikely = dir.prec[1];
2324 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2325 res.range.min = 0;
2326 else if (dir.prec[0] >= 0)
2327 res.range.likely = dir.prec[0];
2329 /* Even a non-empty wide character string need not convert into
2330 any bytes. */
2331 res.range.min = 0;
2333 /* A non-empty wide character conversion may fail. */
2334 if (slen.range.max > 0)
2335 res.mayfail = true;
2337 else
2339 res.knownrange = true;
2341 if (dir.prec[0] < 0 && dir.prec[1] > -1)
2342 res.range.min = 0;
2343 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < res.range.min)
2344 res.range.min = dir.prec[0];
2346 if ((unsigned HOST_WIDE_INT)dir.prec[1] < res.range.max)
2348 res.range.max = dir.prec[1];
2349 res.range.likely = dir.prec[1];
2350 res.range.unlikely = dir.prec[1];
2354 else if (arg && integer_zerop (arg))
2356 /* Handle null pointer argument. */
2358 fmtresult res (0);
2359 res.nullp = true;
2360 return res;
2362 else
2364 /* For a '%s' and '%ls' directive with a non-constant string (either
2365 one of a number of strings of known length or an unknown string)
2366 the minimum number of characters is lesser of PRECISION[0] and
2367 the length of the shortest known string or zero, and the maximum
2368 is the lessser of the length of the longest known string or
2369 PTRDIFF_MAX and PRECISION[1]. The likely length is either
2370 the minimum at level 1 and the greater of the minimum and 1
2371 at level 2. This result is adjust upward for width (if it's
2372 specified). */
2374 if (dir.specifier == 'S'
2375 || dir.modifier == FMT_LEN_l)
2377 /* A wide character converts to as few as zero bytes. */
2378 slen.range.min = 0;
2379 if (slen.range.max < target_int_max ())
2380 slen.range.max *= target_mb_len_max ();
2382 if (slen.range.likely < target_int_max ())
2383 slen.range.likely *= 2;
2385 if (slen.range.likely < target_int_max ())
2386 slen.range.unlikely *= target_mb_len_max ();
2388 /* A non-empty wide character conversion may fail. */
2389 if (slen.range.max > 0)
2390 res.mayfail = true;
2393 res.range = slen.range;
2395 if (dir.prec[0] >= 0)
2397 /* Adjust the minimum to zero if the string length is unknown,
2398 or at most the lower bound of the precision otherwise. */
2399 if (slen.range.min >= target_int_max ())
2400 res.range.min = 0;
2401 else if ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.min)
2402 res.range.min = dir.prec[0];
2404 /* Make both maxima no greater than the upper bound of precision. */
2405 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max
2406 || slen.range.max >= target_int_max ())
2408 res.range.max = dir.prec[1];
2409 res.range.unlikely = dir.prec[1];
2412 /* If precision is constant, set the likely counter to the lesser
2413 of it and the maximum string length. Otherwise, if the lower
2414 bound of precision is greater than zero, set the likely counter
2415 to the minimum. Otherwise set it to zero or one based on
2416 the warning level. */
2417 if (dir.prec[0] == dir.prec[1])
2418 res.range.likely
2419 = ((unsigned HOST_WIDE_INT)dir.prec[0] < slen.range.max
2420 ? dir.prec[0] : slen.range.max);
2421 else if (dir.prec[0] > 0)
2422 res.range.likely = res.range.min;
2423 else
2424 res.range.likely = warn_level > 1;
2426 else if (dir.prec[1] >= 0)
2428 res.range.min = 0;
2429 if ((unsigned HOST_WIDE_INT)dir.prec[1] < slen.range.max)
2430 res.range.max = dir.prec[1];
2431 res.range.likely = dir.prec[1] ? warn_level > 1 : 0;
2433 else if (slen.range.min >= target_int_max ())
2435 res.range.min = 0;
2436 res.range.max = HOST_WIDE_INT_MAX;
2437 /* At level 1 strings of unknown length are assumed to be
2438 empty, while at level 1 they are assumed to be one byte
2439 long. */
2440 res.range.likely = warn_level > 1;
2442 else
2444 /* A string of unknown length unconstrained by precision is
2445 assumed to be empty at level 1 and just one character long
2446 at higher levels. */
2447 if (res.range.likely >= target_int_max ())
2448 res.range.likely = warn_level > 1;
2451 res.range.unlikely = res.range.max;
2454 /* Bump up the byte counters if WIDTH is greater. */
2455 return res.adjust_for_width_or_precision (dir.width);
2458 /* Format plain string (part of the format string itself). */
2460 static fmtresult
2461 format_plain (const directive &dir, tree, vr_values *)
2463 fmtresult res (dir.len);
2464 return res;
2467 /* Return true if the RESULT of a directive in a call describe by INFO
2468 should be diagnosed given the AVAILable space in the destination. */
2470 static bool
2471 should_warn_p (const sprintf_dom_walker::call_info &info,
2472 const result_range &avail, const result_range &result)
2474 if (result.max <= avail.min)
2476 /* The least amount of space remaining in the destination is big
2477 enough for the longest output. */
2478 return false;
2481 if (info.bounded)
2483 if (warn_format_trunc == 1 && result.min <= avail.max
2484 && info.retval_used ())
2486 /* The likely amount of space remaining in the destination is big
2487 enough for the least output and the return value is used. */
2488 return false;
2491 if (warn_format_trunc == 1 && result.likely <= avail.likely
2492 && !info.retval_used ())
2494 /* The likely amount of space remaining in the destination is big
2495 enough for the likely output and the return value is unused. */
2496 return false;
2499 if (warn_format_trunc == 2
2500 && result.likely <= avail.min
2501 && (result.max <= avail.min
2502 || result.max > HOST_WIDE_INT_MAX))
2504 /* The minimum amount of space remaining in the destination is big
2505 enough for the longest output. */
2506 return false;
2509 else
2511 if (warn_level == 1 && result.likely <= avail.likely)
2513 /* The likely amount of space remaining in the destination is big
2514 enough for the likely output. */
2515 return false;
2518 if (warn_level == 2
2519 && result.likely <= avail.min
2520 && (result.max <= avail.min
2521 || result.max > HOST_WIDE_INT_MAX))
2523 /* The minimum amount of space remaining in the destination is big
2524 enough for the longest output. */
2525 return false;
2529 return true;
2532 /* At format string location describe by DIRLOC in a call described
2533 by INFO, issue a warning for a directive DIR whose output may be
2534 in excess of the available space AVAIL_RANGE in the destination
2535 given the formatting result FMTRES. This function does nothing
2536 except decide whether to issue a warning for a possible write
2537 past the end or truncation and, if so, format the warning.
2538 Return true if a warning has been issued. */
2540 static bool
2541 maybe_warn (substring_loc &dirloc, location_t argloc,
2542 const sprintf_dom_walker::call_info &info,
2543 const result_range &avail_range, const result_range &res,
2544 const directive &dir)
2546 if (!should_warn_p (info, avail_range, res))
2547 return false;
2549 /* A warning will definitely be issued below. */
2551 /* The maximum byte count to reference in the warning. Larger counts
2552 imply that the upper bound is unknown (and could be anywhere between
2553 RES.MIN + 1 and SIZE_MAX / 2) are printed as "N or more bytes" rather
2554 than "between N and X" where X is some huge number. */
2555 unsigned HOST_WIDE_INT maxbytes = target_dir_max ();
2557 /* True when there is enough room in the destination for the least
2558 amount of a directive's output but not enough for its likely or
2559 maximum output. */
2560 bool maybe = (res.min <= avail_range.max
2561 && (avail_range.min < res.likely
2562 || (res.max < HOST_WIDE_INT_MAX
2563 && avail_range.min < res.max)));
2565 /* Buffer for the directive in the host character set (used when
2566 the source character set is different). */
2567 char hostdir[32];
2569 if (avail_range.min == avail_range.max)
2571 /* The size of the destination region is exact. */
2572 unsigned HOST_WIDE_INT navail = avail_range.max;
2574 if (target_to_host (*dir.beg) != '%')
2576 /* For plain character directives (i.e., the format string itself)
2577 but not others, point the caret at the first character that's
2578 past the end of the destination. */
2579 if (navail < dir.len)
2580 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2583 if (*dir.beg == '\0')
2585 /* This is the terminating nul. */
2586 gcc_assert (res.min == 1 && res.min == res.max);
2588 return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2589 info.bounded
2590 ? (maybe
2591 ? G_("%qE output may be truncated before the "
2592 "last format character")
2593 : G_("%qE output truncated before the last "
2594 "format character"))
2595 : (maybe
2596 ? G_("%qE may write a terminating nul past the "
2597 "end of the destination")
2598 : G_("%qE writing a terminating nul past the "
2599 "end of the destination")),
2600 info.func);
2603 if (res.min == res.max)
2605 const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2606 if (!info.bounded)
2607 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2608 "%<%.*s%> directive writing %wu byte into a "
2609 "region of size %wu",
2610 "%<%.*s%> directive writing %wu bytes into a "
2611 "region of size %wu",
2612 (int) dir.len, d, res.min, navail);
2613 else if (maybe)
2614 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2615 "%<%.*s%> directive output may be truncated "
2616 "writing %wu byte into a region of size %wu",
2617 "%<%.*s%> directive output may be truncated "
2618 "writing %wu bytes into a region of size %wu",
2619 (int) dir.len, d, res.min, navail);
2620 else
2621 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2622 "%<%.*s%> directive output truncated writing "
2623 "%wu byte into a region of size %wu",
2624 "%<%.*s%> directive output truncated writing "
2625 "%wu bytes into a region of size %wu",
2626 (int) dir.len, d, res.min, navail);
2628 if (res.min == 0 && res.max < maxbytes)
2629 return fmtwarn (dirloc, argloc, NULL,
2630 info.warnopt (),
2631 info.bounded
2632 ? (maybe
2633 ? G_("%<%.*s%> directive output may be truncated "
2634 "writing up to %wu bytes into a region of "
2635 "size %wu")
2636 : G_("%<%.*s%> directive output truncated writing "
2637 "up to %wu bytes into a region of size %wu"))
2638 : G_("%<%.*s%> directive writing up to %wu bytes "
2639 "into a region of size %wu"), (int) dir.len,
2640 target_to_host (hostdir, sizeof hostdir, dir.beg),
2641 res.max, navail);
2643 if (res.min == 0 && maxbytes <= res.max)
2644 /* This is a special case to avoid issuing the potentially
2645 confusing warning:
2646 writing 0 or more bytes into a region of size 0. */
2647 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2648 info.bounded
2649 ? (maybe
2650 ? G_("%<%.*s%> directive output may be truncated "
2651 "writing likely %wu or more bytes into a "
2652 "region of size %wu")
2653 : G_("%<%.*s%> directive output truncated writing "
2654 "likely %wu or more bytes into a region of "
2655 "size %wu"))
2656 : G_("%<%.*s%> directive writing likely %wu or more "
2657 "bytes into a region of size %wu"), (int) dir.len,
2658 target_to_host (hostdir, sizeof hostdir, dir.beg),
2659 res.likely, navail);
2661 if (res.max < maxbytes)
2662 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2663 info.bounded
2664 ? (maybe
2665 ? G_("%<%.*s%> directive output may be truncated "
2666 "writing between %wu and %wu bytes into a "
2667 "region of size %wu")
2668 : G_("%<%.*s%> directive output truncated "
2669 "writing between %wu and %wu bytes into a "
2670 "region of size %wu"))
2671 : G_("%<%.*s%> directive writing between %wu and "
2672 "%wu bytes into a region of size %wu"),
2673 (int) dir.len,
2674 target_to_host (hostdir, sizeof hostdir, dir.beg),
2675 res.min, res.max, navail);
2677 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2678 info.bounded
2679 ? (maybe
2680 ? G_("%<%.*s%> directive output may be truncated "
2681 "writing %wu or more bytes into a region of "
2682 "size %wu")
2683 : G_("%<%.*s%> directive output truncated writing "
2684 "%wu or more bytes into a region of size %wu"))
2685 : G_("%<%.*s%> directive writing %wu or more bytes "
2686 "into a region of size %wu"), (int) dir.len,
2687 target_to_host (hostdir, sizeof hostdir, dir.beg),
2688 res.min, navail);
2691 /* The size of the destination region is a range. */
2693 if (target_to_host (*dir.beg) != '%')
2695 unsigned HOST_WIDE_INT navail = avail_range.max;
2697 /* For plain character directives (i.e., the format string itself)
2698 but not others, point the caret at the first character that's
2699 past the end of the destination. */
2700 if (navail < dir.len)
2701 dirloc.set_caret_index (dirloc.get_caret_idx () + navail);
2704 if (*dir.beg == '\0')
2706 gcc_assert (res.min == 1 && res.min == res.max);
2708 return fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
2709 info.bounded
2710 ? (maybe
2711 ? G_("%qE output may be truncated before the last "
2712 "format character")
2713 : G_("%qE output truncated before the last format "
2714 "character"))
2715 : (maybe
2716 ? G_("%qE may write a terminating nul past the end "
2717 "of the destination")
2718 : G_("%qE writing a terminating nul past the end "
2719 "of the destination")), info.func);
2722 if (res.min == res.max)
2724 const char *d = target_to_host (hostdir, sizeof hostdir, dir.beg);
2725 if (!info.bounded)
2726 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2727 "%<%.*s%> directive writing %wu byte into a region "
2728 "of size between %wu and %wu",
2729 "%<%.*s%> directive writing %wu bytes into a region "
2730 "of size between %wu and %wu", (int) dir.len, d,
2731 res.min, avail_range.min, avail_range.max);
2732 else if (maybe)
2733 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2734 "%<%.*s%> directive output may be truncated writing "
2735 "%wu byte into a region of size between %wu and %wu",
2736 "%<%.*s%> directive output may be truncated writing "
2737 "%wu bytes into a region of size between %wu and "
2738 "%wu", (int) dir.len, d, res.min, avail_range.min,
2739 avail_range.max);
2740 else
2741 return fmtwarn_n (dirloc, argloc, NULL, info.warnopt (), res.min,
2742 "%<%.*s%> directive output truncated writing %wu "
2743 "byte into a region of size between %wu and %wu",
2744 "%<%.*s%> directive output truncated writing %wu "
2745 "bytes into a region of size between %wu and %wu",
2746 (int) dir.len, d, res.min, avail_range.min,
2747 avail_range.max);
2750 if (res.min == 0 && res.max < maxbytes)
2751 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2752 info.bounded
2753 ? (maybe
2754 ? G_("%<%.*s%> directive output may be truncated "
2755 "writing up to %wu bytes into a region of size "
2756 "between %wu and %wu")
2757 : G_("%<%.*s%> directive output truncated writing "
2758 "up to %wu bytes into a region of size between "
2759 "%wu and %wu"))
2760 : G_("%<%.*s%> directive writing up to %wu bytes "
2761 "into a region of size between %wu and %wu"),
2762 (int) dir.len,
2763 target_to_host (hostdir, sizeof hostdir, dir.beg),
2764 res.max, avail_range.min, avail_range.max);
2766 if (res.min == 0 && maxbytes <= res.max)
2767 /* This is a special case to avoid issuing the potentially confusing
2768 warning:
2769 writing 0 or more bytes into a region of size between 0 and N. */
2770 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2771 info.bounded
2772 ? (maybe
2773 ? G_("%<%.*s%> directive output may be truncated "
2774 "writing likely %wu or more bytes into a region "
2775 "of size between %wu and %wu")
2776 : G_("%<%.*s%> directive output truncated writing "
2777 "likely %wu or more bytes into a region of size "
2778 "between %wu and %wu"))
2779 : G_("%<%.*s%> directive writing likely %wu or more bytes "
2780 "into a region of size between %wu and %wu"),
2781 (int) dir.len,
2782 target_to_host (hostdir, sizeof hostdir, dir.beg),
2783 res.likely, avail_range.min, avail_range.max);
2785 if (res.max < maxbytes)
2786 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2787 info.bounded
2788 ? (maybe
2789 ? G_("%<%.*s%> directive output may be truncated "
2790 "writing between %wu and %wu bytes into a region "
2791 "of size between %wu and %wu")
2792 : G_("%<%.*s%> directive output truncated writing "
2793 "between %wu and %wu bytes into a region of size "
2794 "between %wu and %wu"))
2795 : G_("%<%.*s%> directive writing between %wu and "
2796 "%wu bytes into a region of size between %wu and "
2797 "%wu"), (int) dir.len,
2798 target_to_host (hostdir, sizeof hostdir, dir.beg),
2799 res.min, res.max, avail_range.min, avail_range.max);
2801 return fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2802 info.bounded
2803 ? (maybe
2804 ? G_("%<%.*s%> directive output may be truncated writing "
2805 "%wu or more bytes into a region of size between "
2806 "%wu and %wu")
2807 : G_("%<%.*s%> directive output truncated writing "
2808 "%wu or more bytes into a region of size between "
2809 "%wu and %wu"))
2810 : G_("%<%.*s%> directive writing %wu or more bytes "
2811 "into a region of size between %wu and %wu"),
2812 (int) dir.len,
2813 target_to_host (hostdir, sizeof hostdir, dir.beg),
2814 res.min, avail_range.min, avail_range.max);
2817 /* Compute the length of the output resulting from the directive DIR
2818 in a call described by INFO and update the overall result of the call
2819 in *RES. Return true if the directive has been handled. */
2821 static bool
2822 format_directive (const sprintf_dom_walker::call_info &info,
2823 format_result *res, const directive &dir,
2824 class vr_values *vr_values)
2826 /* Offset of the beginning of the directive from the beginning
2827 of the format string. */
2828 size_t offset = dir.beg - info.fmtstr;
2829 size_t start = offset;
2830 size_t length = offset + dir.len - !!dir.len;
2832 /* Create a location for the whole directive from the % to the format
2833 specifier. */
2834 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
2835 offset, start, length);
2837 /* Also get the location of the argument if possible.
2838 This doesn't work for integer literals or function calls. */
2839 location_t argloc = UNKNOWN_LOCATION;
2840 if (dir.arg)
2841 argloc = EXPR_LOCATION (dir.arg);
2843 /* Bail when there is no function to compute the output length,
2844 or when minimum length checking has been disabled. */
2845 if (!dir.fmtfunc || res->range.min >= HOST_WIDE_INT_MAX)
2846 return false;
2848 /* Compute the range of lengths of the formatted output. */
2849 fmtresult fmtres = dir.fmtfunc (dir, dir.arg, vr_values);
2851 /* Record whether the output of all directives is known to be
2852 bounded by some maximum, implying that their arguments are
2853 either known exactly or determined to be in a known range
2854 or, for strings, limited by the upper bounds of the arrays
2855 they refer to. */
2856 res->knownrange &= fmtres.knownrange;
2858 if (!fmtres.knownrange)
2860 /* Only when the range is known, check it against the host value
2861 of INT_MAX + (the number of bytes of the "%.*Lf" directive with
2862 INT_MAX precision, which is the longest possible output of any
2863 single directive). That's the largest valid byte count (though
2864 not valid call to a printf-like function because it can never
2865 return such a count). Otherwise, the range doesn't correspond
2866 to known values of the argument. */
2867 if (fmtres.range.max > target_dir_max ())
2869 /* Normalize the MAX counter to avoid having to deal with it
2870 later. The counter can be less than HOST_WIDE_INT_M1U
2871 when compiling for an ILP32 target on an LP64 host. */
2872 fmtres.range.max = HOST_WIDE_INT_M1U;
2873 /* Disable exact and maximum length checking after a failure
2874 to determine the maximum number of characters (for example
2875 for wide characters or wide character strings) but continue
2876 tracking the minimum number of characters. */
2877 res->range.max = HOST_WIDE_INT_M1U;
2880 if (fmtres.range.min > target_dir_max ())
2882 /* Disable exact length checking after a failure to determine
2883 even the minimum number of characters (it shouldn't happen
2884 except in an error) but keep tracking the minimum and maximum
2885 number of characters. */
2886 return true;
2890 /* Buffer for the directive in the host character set (used when
2891 the source character set is different). */
2892 char hostdir[32];
2894 int dirlen = dir.len;
2896 if (fmtres.nullp)
2898 fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2899 "%<%.*s%> directive argument is null",
2900 dirlen, target_to_host (hostdir, sizeof hostdir, dir.beg));
2902 /* Don't bother processing the rest of the format string. */
2903 res->warned = true;
2904 res->range.min = HOST_WIDE_INT_M1U;
2905 res->range.max = HOST_WIDE_INT_M1U;
2906 return false;
2909 /* Compute the number of available bytes in the destination. There
2910 must always be at least one byte of space for the terminating
2911 NUL that's appended after the format string has been processed. */
2912 result_range avail_range = bytes_remaining (info.objsize, *res);
2914 bool warned = res->warned;
2916 if (!warned)
2917 warned = maybe_warn (dirloc, argloc, info, avail_range,
2918 fmtres.range, dir);
2920 /* Bump up the total maximum if it isn't too big. */
2921 if (res->range.max < HOST_WIDE_INT_MAX
2922 && fmtres.range.max < HOST_WIDE_INT_MAX)
2923 res->range.max += fmtres.range.max;
2925 /* Raise the total unlikely maximum by the larger of the maximum
2926 and the unlikely maximum. */
2927 unsigned HOST_WIDE_INT save = res->range.unlikely;
2928 if (fmtres.range.max < fmtres.range.unlikely)
2929 res->range.unlikely += fmtres.range.unlikely;
2930 else
2931 res->range.unlikely += fmtres.range.max;
2933 if (res->range.unlikely < save)
2934 res->range.unlikely = HOST_WIDE_INT_M1U;
2936 res->range.min += fmtres.range.min;
2937 res->range.likely += fmtres.range.likely;
2939 /* Has the minimum directive output length exceeded the maximum
2940 of 4095 bytes required to be supported? */
2941 bool minunder4k = fmtres.range.min < 4096;
2942 bool maxunder4k = fmtres.range.max < 4096;
2943 /* Clear POSUNDER4K in the overall result if the maximum has exceeded
2944 the 4k (this is necessary to avoid the return value optimization
2945 that may not be safe in the maximum case). */
2946 if (!maxunder4k)
2947 res->posunder4k = false;
2948 /* Also clear POSUNDER4K if the directive may fail. */
2949 if (fmtres.mayfail)
2950 res->posunder4k = false;
2952 if (!warned
2953 /* Only warn at level 2. */
2954 && warn_level > 1
2955 && (!minunder4k
2956 || (!maxunder4k && fmtres.range.max < HOST_WIDE_INT_MAX)))
2958 /* The directive output may be longer than the maximum required
2959 to be handled by an implementation according to 7.21.6.1, p15
2960 of C11. Warn on this only at level 2 but remember this and
2961 prevent folding the return value when done. This allows for
2962 the possibility of the actual libc call failing due to ENOMEM
2963 (like Glibc does under some conditions). */
2965 if (fmtres.range.min == fmtres.range.max)
2966 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2967 "%<%.*s%> directive output of %wu bytes exceeds "
2968 "minimum required size of 4095", dirlen,
2969 target_to_host (hostdir, sizeof hostdir, dir.beg),
2970 fmtres.range.min);
2971 else
2972 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
2973 minunder4k
2974 ? G_("%<%.*s%> directive output between %wu and %wu "
2975 "bytes may exceed minimum required size of "
2976 "4095")
2977 : G_("%<%.*s%> directive output between %wu and %wu "
2978 "bytes exceeds minimum required size of 4095"),
2979 dirlen,
2980 target_to_host (hostdir, sizeof hostdir, dir.beg),
2981 fmtres.range.min, fmtres.range.max);
2984 /* Has the likely and maximum directive output exceeded INT_MAX? */
2985 bool likelyximax = *dir.beg && res->range.likely > target_int_max ();
2986 /* Don't consider the maximum to be in excess when it's the result
2987 of a string of unknown length (i.e., whose maximum has been set
2988 to be greater than or equal to HOST_WIDE_INT_MAX. */
2989 bool maxximax = (*dir.beg
2990 && res->range.max > target_int_max ()
2991 && res->range.max < HOST_WIDE_INT_MAX);
2993 if (!warned
2994 /* Warn for the likely output size at level 1. */
2995 && (likelyximax
2996 /* But only warn for the maximum at level 2. */
2997 || (warn_level > 1
2998 && maxximax
2999 && fmtres.range.max < HOST_WIDE_INT_MAX)))
3001 /* The directive output causes the total length of output
3002 to exceed INT_MAX bytes. */
3004 if (fmtres.range.min == fmtres.range.max)
3005 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3006 "%<%.*s%> directive output of %wu bytes causes "
3007 "result to exceed %<INT_MAX%>", dirlen,
3008 target_to_host (hostdir, sizeof hostdir, dir.beg),
3009 fmtres.range.min);
3010 else
3011 warned = fmtwarn (dirloc, argloc, NULL, info.warnopt (),
3012 fmtres.range.min > target_int_max ()
3013 ? G_("%<%.*s%> directive output between %wu and "
3014 "%wu bytes causes result to exceed "
3015 "%<INT_MAX%>")
3016 : G_("%<%.*s%> directive output between %wu and "
3017 "%wu bytes may cause result to exceed "
3018 "%<INT_MAX%>"), dirlen,
3019 target_to_host (hostdir, sizeof hostdir, dir.beg),
3020 fmtres.range.min, fmtres.range.max);
3023 if (warned && fmtres.range.min < fmtres.range.likely
3024 && fmtres.range.likely < fmtres.range.max)
3025 inform_n (info.fmtloc, fmtres.range.likely,
3026 "assuming directive output of %wu byte",
3027 "assuming directive output of %wu bytes",
3028 fmtres.range.likely);
3030 if (warned && fmtres.argmin)
3032 if (fmtres.argmin == fmtres.argmax)
3033 inform (info.fmtloc, "directive argument %qE", fmtres.argmin);
3034 else if (fmtres.knownrange)
3035 inform (info.fmtloc, "directive argument in the range [%E, %E]",
3036 fmtres.argmin, fmtres.argmax);
3037 else
3038 inform (info.fmtloc,
3039 "using the range [%E, %E] for directive argument",
3040 fmtres.argmin, fmtres.argmax);
3043 res->warned |= warned;
3045 if (!dir.beg[0] && res->warned && info.objsize < HOST_WIDE_INT_MAX)
3047 /* If a warning has been issued for buffer overflow or truncation
3048 (but not otherwise) help the user figure out how big a buffer
3049 they need. */
3051 location_t callloc = gimple_location (info.callstmt);
3053 unsigned HOST_WIDE_INT min = res->range.min;
3054 unsigned HOST_WIDE_INT max = res->range.max;
3056 if (min == max)
3057 inform (callloc,
3058 (min == 1
3059 ? G_("%qE output %wu byte into a destination of size %wu")
3060 : G_("%qE output %wu bytes into a destination of size %wu")),
3061 info.func, min, info.objsize);
3062 else if (max < HOST_WIDE_INT_MAX)
3063 inform (callloc,
3064 "%qE output between %wu and %wu bytes into "
3065 "a destination of size %wu",
3066 info.func, min, max, info.objsize);
3067 else if (min < res->range.likely && res->range.likely < max)
3068 inform (callloc,
3069 "%qE output %wu or more bytes (assuming %wu) into "
3070 "a destination of size %wu",
3071 info.func, min, res->range.likely, info.objsize);
3072 else
3073 inform (callloc,
3074 "%qE output %wu or more bytes into a destination of size %wu",
3075 info.func, min, info.objsize);
3078 if (dump_file && *dir.beg)
3080 fprintf (dump_file,
3081 " Result: "
3082 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3083 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC " ("
3084 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ", "
3085 HOST_WIDE_INT_PRINT_DEC ", " HOST_WIDE_INT_PRINT_DEC ")\n",
3086 fmtres.range.min, fmtres.range.likely,
3087 fmtres.range.max, fmtres.range.unlikely,
3088 res->range.min, res->range.likely,
3089 res->range.max, res->range.unlikely);
3092 return true;
3095 /* Parse a format directive in function call described by INFO starting
3096 at STR and populate DIR structure. Bump up *ARGNO by the number of
3097 arguments extracted for the directive. Return the length of
3098 the directive. */
3100 static size_t
3101 parse_directive (sprintf_dom_walker::call_info &info,
3102 directive &dir, format_result *res,
3103 const char *str, unsigned *argno,
3104 vr_values *vr_values)
3106 const char *pcnt = strchr (str, target_percent);
3107 dir.beg = str;
3109 if (size_t len = pcnt ? pcnt - str : *str ? strlen (str) : 1)
3111 /* This directive is either a plain string or the terminating nul
3112 (which isn't really a directive but it simplifies things to
3113 handle it as if it were). */
3114 dir.len = len;
3115 dir.fmtfunc = format_plain;
3117 if (dump_file)
3119 fprintf (dump_file, " Directive %u at offset "
3120 HOST_WIDE_INT_PRINT_UNSIGNED ": \"%.*s\", "
3121 "length = " HOST_WIDE_INT_PRINT_UNSIGNED "\n",
3122 dir.dirno,
3123 (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3124 (int)dir.len, dir.beg, (unsigned HOST_WIDE_INT) dir.len);
3127 return len - !*str;
3130 const char *pf = pcnt + 1;
3132 /* POSIX numbered argument index or zero when none. */
3133 HOST_WIDE_INT dollar = 0;
3135 /* With and precision. -1 when not specified, HOST_WIDE_INT_MIN
3136 when given by a va_list argument, and a non-negative value
3137 when specified in the format string itself. */
3138 HOST_WIDE_INT width = -1;
3139 HOST_WIDE_INT precision = -1;
3141 /* Pointers to the beginning of the width and precision decimal
3142 string (if any) within the directive. */
3143 const char *pwidth = 0;
3144 const char *pprec = 0;
3146 /* When the value of the decimal string that specifies width or
3147 precision is out of range, points to the digit that causes
3148 the value to exceed the limit. */
3149 const char *werange = NULL;
3150 const char *perange = NULL;
3152 /* Width specified via the asterisk. Need not be INTEGER_CST.
3153 For vararg functions set to void_node. */
3154 tree star_width = NULL_TREE;
3156 /* Width specified via the asterisk. Need not be INTEGER_CST.
3157 For vararg functions set to void_node. */
3158 tree star_precision = NULL_TREE;
3160 if (ISDIGIT (target_to_host (*pf)))
3162 /* This could be either a POSIX positional argument, the '0'
3163 flag, or a width, depending on what follows. Store it as
3164 width and sort it out later after the next character has
3165 been seen. */
3166 pwidth = pf;
3167 width = target_strtol10 (&pf, &werange);
3169 else if (target_to_host (*pf) == '*')
3171 /* Similarly to the block above, this could be either a POSIX
3172 positional argument or a width, depending on what follows. */
3173 if (*argno < gimple_call_num_args (info.callstmt))
3174 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3175 else
3176 star_width = void_node;
3177 ++pf;
3180 if (target_to_host (*pf) == '$')
3182 /* Handle the POSIX dollar sign which references the 1-based
3183 positional argument number. */
3184 if (width != -1)
3185 dollar = width + info.argidx;
3186 else if (star_width
3187 && TREE_CODE (star_width) == INTEGER_CST
3188 && (TYPE_PRECISION (TREE_TYPE (star_width))
3189 <= TYPE_PRECISION (integer_type_node)))
3190 dollar = width + tree_to_shwi (star_width);
3192 /* Bail when the numbered argument is out of range (it will
3193 have already been diagnosed by -Wformat). */
3194 if (dollar == 0
3195 || dollar == (int)info.argidx
3196 || dollar > gimple_call_num_args (info.callstmt))
3197 return false;
3199 --dollar;
3201 star_width = NULL_TREE;
3202 width = -1;
3203 ++pf;
3206 if (dollar || !star_width)
3208 if (width != -1)
3210 if (width == 0)
3212 /* The '0' that has been interpreted as a width above is
3213 actually a flag. Reset HAVE_WIDTH, set the '0' flag,
3214 and continue processing other flags. */
3215 width = -1;
3216 dir.set_flag ('0');
3218 else if (!dollar)
3220 /* (Non-zero) width has been seen. The next character
3221 is either a period or a digit. */
3222 goto start_precision;
3225 /* When either '$' has been seen, or width has not been seen,
3226 the next field is the optional flags followed by an optional
3227 width. */
3228 for ( ; ; ) {
3229 switch (target_to_host (*pf))
3231 case ' ':
3232 case '0':
3233 case '+':
3234 case '-':
3235 case '#':
3236 dir.set_flag (target_to_host (*pf++));
3237 break;
3239 default:
3240 goto start_width;
3244 start_width:
3245 if (ISDIGIT (target_to_host (*pf)))
3247 werange = 0;
3248 pwidth = pf;
3249 width = target_strtol10 (&pf, &werange);
3251 else if (target_to_host (*pf) == '*')
3253 if (*argno < gimple_call_num_args (info.callstmt))
3254 star_width = gimple_call_arg (info.callstmt, (*argno)++);
3255 else
3257 /* This is (likely) a va_list. It could also be an invalid
3258 call with insufficient arguments. */
3259 star_width = void_node;
3261 ++pf;
3263 else if (target_to_host (*pf) == '\'')
3265 /* The POSIX apostrophe indicating a numeric grouping
3266 in the current locale. Even though it's possible to
3267 estimate the upper bound on the size of the output
3268 based on the number of digits it probably isn't worth
3269 continuing. */
3270 return 0;
3274 start_precision:
3275 if (target_to_host (*pf) == '.')
3277 ++pf;
3279 if (ISDIGIT (target_to_host (*pf)))
3281 pprec = pf;
3282 precision = target_strtol10 (&pf, &perange);
3284 else if (target_to_host (*pf) == '*')
3286 if (*argno < gimple_call_num_args (info.callstmt))
3287 star_precision = gimple_call_arg (info.callstmt, (*argno)++);
3288 else
3290 /* This is (likely) a va_list. It could also be an invalid
3291 call with insufficient arguments. */
3292 star_precision = void_node;
3294 ++pf;
3296 else
3298 /* The decimal precision or the asterisk are optional.
3299 When neither is dirified it's taken to be zero. */
3300 precision = 0;
3304 switch (target_to_host (*pf))
3306 case 'h':
3307 if (target_to_host (pf[1]) == 'h')
3309 ++pf;
3310 dir.modifier = FMT_LEN_hh;
3312 else
3313 dir.modifier = FMT_LEN_h;
3314 ++pf;
3315 break;
3317 case 'j':
3318 dir.modifier = FMT_LEN_j;
3319 ++pf;
3320 break;
3322 case 'L':
3323 dir.modifier = FMT_LEN_L;
3324 ++pf;
3325 break;
3327 case 'l':
3328 if (target_to_host (pf[1]) == 'l')
3330 ++pf;
3331 dir.modifier = FMT_LEN_ll;
3333 else
3334 dir.modifier = FMT_LEN_l;
3335 ++pf;
3336 break;
3338 case 't':
3339 dir.modifier = FMT_LEN_t;
3340 ++pf;
3341 break;
3343 case 'z':
3344 dir.modifier = FMT_LEN_z;
3345 ++pf;
3346 break;
3349 switch (target_to_host (*pf))
3351 /* Handle a sole '%' character the same as "%%" but since it's
3352 undefined prevent the result from being folded. */
3353 case '\0':
3354 --pf;
3355 res->range.min = res->range.max = HOST_WIDE_INT_M1U;
3356 /* FALLTHRU */
3357 case '%':
3358 dir.fmtfunc = format_percent;
3359 break;
3361 case 'a':
3362 case 'A':
3363 case 'e':
3364 case 'E':
3365 case 'f':
3366 case 'F':
3367 case 'g':
3368 case 'G':
3369 res->floating = true;
3370 dir.fmtfunc = format_floating;
3371 break;
3373 case 'd':
3374 case 'i':
3375 case 'o':
3376 case 'u':
3377 case 'x':
3378 case 'X':
3379 dir.fmtfunc = format_integer;
3380 break;
3382 case 'p':
3383 /* The %p output is implementation-defined. It's possible
3384 to determine this format but due to extensions (edirially
3385 those of the Linux kernel -- see bug 78512) the first %p
3386 in the format string disables any further processing. */
3387 return false;
3389 case 'n':
3390 /* %n has side-effects even when nothing is actually printed to
3391 any buffer. */
3392 info.nowrite = false;
3393 dir.fmtfunc = format_none;
3394 break;
3396 case 'C':
3397 case 'c':
3398 /* POSIX wide character and C/POSIX narrow character. */
3399 dir.fmtfunc = format_character;
3400 break;
3402 case 'S':
3403 case 's':
3404 /* POSIX wide string and C/POSIX narrow character string. */
3405 dir.fmtfunc = format_string;
3406 break;
3408 default:
3409 /* Unknown conversion specification. */
3410 return 0;
3413 dir.specifier = target_to_host (*pf++);
3415 /* Store the length of the format directive. */
3416 dir.len = pf - pcnt;
3418 /* Buffer for the directive in the host character set (used when
3419 the source character set is different). */
3420 char hostdir[32];
3422 if (star_width)
3424 if (INTEGRAL_TYPE_P (TREE_TYPE (star_width)))
3425 dir.set_width (star_width, vr_values);
3426 else
3428 /* Width specified by a va_list takes on the range [0, -INT_MIN]
3429 (width is the absolute value of that specified). */
3430 dir.width[0] = 0;
3431 dir.width[1] = target_int_max () + 1;
3434 else
3436 if (width == LONG_MAX && werange)
3438 size_t begin = dir.beg - info.fmtstr + (pwidth - pcnt);
3439 size_t caret = begin + (werange - pcnt);
3440 size_t end = pf - info.fmtstr - 1;
3442 /* Create a location for the width part of the directive,
3443 pointing the caret at the first out-of-range digit. */
3444 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3445 caret, begin, end);
3447 fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3448 "%<%.*s%> directive width out of range", (int) dir.len,
3449 target_to_host (hostdir, sizeof hostdir, dir.beg));
3452 dir.set_width (width);
3455 if (star_precision)
3457 if (INTEGRAL_TYPE_P (TREE_TYPE (star_precision)))
3458 dir.set_precision (star_precision, vr_values);
3459 else
3461 /* Precision specified by a va_list takes on the range [-1, INT_MAX]
3462 (unlike width, negative precision is ignored). */
3463 dir.prec[0] = -1;
3464 dir.prec[1] = target_int_max ();
3467 else
3469 if (precision == LONG_MAX && perange)
3471 size_t begin = dir.beg - info.fmtstr + (pprec - pcnt) - 1;
3472 size_t caret = dir.beg - info.fmtstr + (perange - pcnt) - 1;
3473 size_t end = pf - info.fmtstr - 2;
3475 /* Create a location for the precision part of the directive,
3476 including the leading period, pointing the caret at the first
3477 out-of-range digit . */
3478 substring_loc dirloc (info.fmtloc, TREE_TYPE (info.format),
3479 caret, begin, end);
3481 fmtwarn (dirloc, UNKNOWN_LOCATION, NULL, info.warnopt (),
3482 "%<%.*s%> directive precision out of range", (int) dir.len,
3483 target_to_host (hostdir, sizeof hostdir, dir.beg));
3486 dir.set_precision (precision);
3489 /* Extract the argument if the directive takes one and if it's
3490 available (e.g., the function doesn't take a va_list). Treat
3491 missing arguments the same as va_list, even though they will
3492 have likely already been diagnosed by -Wformat. */
3493 if (dir.specifier != '%'
3494 && *argno < gimple_call_num_args (info.callstmt))
3495 dir.arg = gimple_call_arg (info.callstmt, dollar ? dollar : (*argno)++);
3497 if (dump_file)
3499 fprintf (dump_file,
3500 " Directive %u at offset " HOST_WIDE_INT_PRINT_UNSIGNED
3501 ": \"%.*s\"",
3502 dir.dirno,
3503 (unsigned HOST_WIDE_INT)(size_t)(dir.beg - info.fmtstr),
3504 (int)dir.len, dir.beg);
3505 if (star_width)
3507 if (dir.width[0] == dir.width[1])
3508 fprintf (dump_file, ", width = " HOST_WIDE_INT_PRINT_DEC,
3509 dir.width[0]);
3510 else
3511 fprintf (dump_file,
3512 ", width in range [" HOST_WIDE_INT_PRINT_DEC
3513 ", " HOST_WIDE_INT_PRINT_DEC "]",
3514 dir.width[0], dir.width[1]);
3517 if (star_precision)
3519 if (dir.prec[0] == dir.prec[1])
3520 fprintf (dump_file, ", precision = " HOST_WIDE_INT_PRINT_DEC,
3521 dir.prec[0]);
3522 else
3523 fprintf (dump_file,
3524 ", precision in range [" HOST_WIDE_INT_PRINT_DEC
3525 HOST_WIDE_INT_PRINT_DEC "]",
3526 dir.prec[0], dir.prec[1]);
3528 fputc ('\n', dump_file);
3531 return dir.len;
3534 /* Compute the length of the output resulting from the call to a formatted
3535 output function described by INFO and store the result of the call in
3536 *RES. Issue warnings for detected past the end writes. Return true
3537 if the complete format string has been processed and *RES can be relied
3538 on, false otherwise (e.g., when a unknown or unhandled directive was seen
3539 that caused the processing to be terminated early). */
3541 bool
3542 sprintf_dom_walker::compute_format_length (call_info &info,
3543 format_result *res)
3545 if (dump_file)
3547 location_t callloc = gimple_location (info.callstmt);
3548 fprintf (dump_file, "%s:%i: ",
3549 LOCATION_FILE (callloc), LOCATION_LINE (callloc));
3550 print_generic_expr (dump_file, info.func, dump_flags);
3552 fprintf (dump_file,
3553 ": objsize = " HOST_WIDE_INT_PRINT_UNSIGNED
3554 ", fmtstr = \"%s\"\n",
3555 info.objsize, info.fmtstr);
3558 /* Reset the minimum and maximum byte counters. */
3559 res->range.min = res->range.max = 0;
3561 /* No directive has been seen yet so the length of output is bounded
3562 by the known range [0, 0] (with no conversion resulting in a failure
3563 or producing more than 4K bytes) until determined otherwise. */
3564 res->knownrange = true;
3565 res->posunder4k = true;
3566 res->floating = false;
3567 res->warned = false;
3569 /* 1-based directive counter. */
3570 unsigned dirno = 1;
3572 /* The variadic argument counter. */
3573 unsigned argno = info.argidx;
3575 for (const char *pf = info.fmtstr; ; ++dirno)
3577 directive dir = directive ();
3578 dir.dirno = dirno;
3580 size_t n = parse_directive (info, dir, res, pf, &argno,
3581 evrp_range_analyzer.get_vr_values ());
3583 /* Return failure if the format function fails. */
3584 if (!format_directive (info, res, dir,
3585 evrp_range_analyzer.get_vr_values ()))
3586 return false;
3588 /* Return success the directive is zero bytes long and it's
3589 the last think in the format string (i.e., it's the terminating
3590 nul, which isn't really a directive but handling it as one makes
3591 things simpler). */
3592 if (!n)
3593 return *pf == '\0';
3595 pf += n;
3598 /* The complete format string was processed (with or without warnings). */
3599 return true;
3602 /* Return the size of the object referenced by the expression DEST if
3603 available, or -1 otherwise. */
3605 static unsigned HOST_WIDE_INT
3606 get_destination_size (tree dest)
3608 /* Initialize object size info before trying to compute it. */
3609 init_object_sizes ();
3611 /* Use __builtin_object_size to determine the size of the destination
3612 object. When optimizing, determine the smallest object (such as
3613 a member array as opposed to the whole enclosing object), otherwise
3614 use type-zero object size to determine the size of the enclosing
3615 object (the function fails without optimization in this type). */
3616 int ost = optimize > 0;
3617 unsigned HOST_WIDE_INT size;
3618 if (compute_builtin_object_size (dest, ost, &size))
3619 return size;
3621 return HOST_WIDE_INT_M1U;
3624 /* Return true if the call described by INFO with result RES safe to
3625 optimize (i.e., no undefined behavior), and set RETVAL to the range
3626 of its return values. */
3628 static bool
3629 is_call_safe (const sprintf_dom_walker::call_info &info,
3630 const format_result &res, bool under4k,
3631 unsigned HOST_WIDE_INT retval[2])
3633 if (under4k && !res.posunder4k)
3634 return false;
3636 /* The minimum return value. */
3637 retval[0] = res.range.min;
3639 /* The maximum return value is in most cases bounded by RES.RANGE.MAX
3640 but in cases involving multibyte characters could be as large as
3641 RES.RANGE.UNLIKELY. */
3642 retval[1]
3643 = res.range.unlikely < res.range.max ? res.range.max : res.range.unlikely;
3645 /* Adjust the number of bytes which includes the terminating nul
3646 to reflect the return value of the function which does not.
3647 Because the valid range of the function is [INT_MIN, INT_MAX],
3648 a valid range before the adjustment below is [0, INT_MAX + 1]
3649 (the functions only return negative values on error or undefined
3650 behavior). */
3651 if (retval[0] <= target_int_max () + 1)
3652 --retval[0];
3653 if (retval[1] <= target_int_max () + 1)
3654 --retval[1];
3656 /* Avoid the return value optimization when the behavior of the call
3657 is undefined either because any directive may have produced 4K or
3658 more of output, or the return value exceeds INT_MAX, or because
3659 the output overflows the destination object (but leave it enabled
3660 when the function is bounded because then the behavior is well-
3661 defined). */
3662 if (retval[0] == retval[1]
3663 && (info.bounded || retval[0] < info.objsize)
3664 && retval[0] <= target_int_max ())
3665 return true;
3667 if ((info.bounded || retval[1] < info.objsize)
3668 && (retval[0] < target_int_max ()
3669 && retval[1] < target_int_max ()))
3670 return true;
3672 if (!under4k && (info.bounded || retval[0] < info.objsize))
3673 return true;
3675 return false;
3678 /* Given a suitable result RES of a call to a formatted output function
3679 described by INFO, substitute the result for the return value of
3680 the call. The result is suitable if the number of bytes it represents
3681 is known and exact. A result that isn't suitable for substitution may
3682 have its range set to the range of return values, if that is known.
3683 Return true if the call is removed and gsi_next should not be performed
3684 in the caller. */
3686 static bool
3687 try_substitute_return_value (gimple_stmt_iterator *gsi,
3688 const sprintf_dom_walker::call_info &info,
3689 const format_result &res)
3691 tree lhs = gimple_get_lhs (info.callstmt);
3693 /* Set to true when the entire call has been removed. */
3694 bool removed = false;
3696 /* The minimum and maximum return value. */
3697 unsigned HOST_WIDE_INT retval[2];
3698 bool safe = is_call_safe (info, res, true, retval);
3700 if (safe
3701 && retval[0] == retval[1]
3702 /* Not prepared to handle possibly throwing calls here; they shouldn't
3703 appear in non-artificial testcases, except when the __*_chk routines
3704 are badly declared. */
3705 && !stmt_ends_bb_p (info.callstmt))
3707 tree cst = build_int_cst (integer_type_node, retval[0]);
3709 if (lhs == NULL_TREE
3710 && info.nowrite)
3712 /* Remove the call to the bounded function with a zero size
3713 (e.g., snprintf(0, 0, "%i", 123)) if there is no lhs. */
3714 unlink_stmt_vdef (info.callstmt);
3715 gsi_remove (gsi, true);
3716 removed = true;
3718 else if (info.nowrite)
3720 /* Replace the call to the bounded function with a zero size
3721 (e.g., snprintf(0, 0, "%i", 123) with the constant result
3722 of the function. */
3723 if (!update_call_from_tree (gsi, cst))
3724 gimplify_and_update_call_from_tree (gsi, cst);
3725 gimple *callstmt = gsi_stmt (*gsi);
3726 update_stmt (callstmt);
3728 else if (lhs)
3730 /* Replace the left-hand side of the call with the constant
3731 result of the formatted function. */
3732 gimple_call_set_lhs (info.callstmt, NULL_TREE);
3733 gimple *g = gimple_build_assign (lhs, cst);
3734 gsi_insert_after (gsi, g, GSI_NEW_STMT);
3735 update_stmt (info.callstmt);
3738 if (dump_file)
3740 if (removed)
3741 fprintf (dump_file, " Removing call statement.");
3742 else
3744 fprintf (dump_file, " Substituting ");
3745 print_generic_expr (dump_file, cst, dump_flags);
3746 fprintf (dump_file, " for %s.\n",
3747 info.nowrite ? "statement" : "return value");
3751 else if (lhs)
3753 bool setrange = false;
3755 if (safe
3756 && (info.bounded || retval[1] < info.objsize)
3757 && (retval[0] < target_int_max ()
3758 && retval[1] < target_int_max ()))
3760 /* If the result is in a valid range bounded by the size of
3761 the destination set it so that it can be used for subsequent
3762 optimizations. */
3763 int prec = TYPE_PRECISION (integer_type_node);
3765 wide_int min = wi::shwi (retval[0], prec);
3766 wide_int max = wi::shwi (retval[1], prec);
3767 set_range_info (lhs, VR_RANGE, min, max);
3769 setrange = true;
3772 if (dump_file)
3774 const char *inbounds
3775 = (retval[0] < info.objsize
3776 ? (retval[1] < info.objsize
3777 ? "in" : "potentially out-of")
3778 : "out-of");
3780 const char *what = setrange ? "Setting" : "Discarding";
3781 if (retval[0] != retval[1])
3782 fprintf (dump_file,
3783 " %s %s-bounds return value range ["
3784 HOST_WIDE_INT_PRINT_UNSIGNED ", "
3785 HOST_WIDE_INT_PRINT_UNSIGNED "].\n",
3786 what, inbounds, retval[0], retval[1]);
3787 else
3788 fprintf (dump_file, " %s %s-bounds return value "
3789 HOST_WIDE_INT_PRINT_UNSIGNED ".\n",
3790 what, inbounds, retval[0]);
3794 if (dump_file)
3795 fputc ('\n', dump_file);
3797 return removed;
3800 /* Try to simplify a s{,n}printf call described by INFO with result
3801 RES by replacing it with a simpler and presumably more efficient
3802 call (such as strcpy). */
3804 static bool
3805 try_simplify_call (gimple_stmt_iterator *gsi,
3806 const sprintf_dom_walker::call_info &info,
3807 const format_result &res)
3809 unsigned HOST_WIDE_INT dummy[2];
3810 if (!is_call_safe (info, res, info.retval_used (), dummy))
3811 return false;
3813 switch (info.fncode)
3815 case BUILT_IN_SNPRINTF:
3816 return gimple_fold_builtin_snprintf (gsi);
3818 case BUILT_IN_SPRINTF:
3819 return gimple_fold_builtin_sprintf (gsi);
3821 default:
3825 return false;
3828 /* Determine if a GIMPLE CALL is to one of the sprintf-like built-in
3829 functions and if so, handle it. Return true if the call is removed
3830 and gsi_next should not be performed in the caller. */
3832 bool
3833 sprintf_dom_walker::handle_gimple_call (gimple_stmt_iterator *gsi)
3835 call_info info = call_info ();
3837 info.callstmt = gsi_stmt (*gsi);
3838 if (!gimple_call_builtin_p (info.callstmt, BUILT_IN_NORMAL))
3839 return false;
3841 info.func = gimple_call_fndecl (info.callstmt);
3842 info.fncode = DECL_FUNCTION_CODE (info.func);
3844 /* The size of the destination as in snprintf(dest, size, ...). */
3845 unsigned HOST_WIDE_INT dstsize = HOST_WIDE_INT_M1U;
3847 /* The size of the destination determined by __builtin_object_size. */
3848 unsigned HOST_WIDE_INT objsize = HOST_WIDE_INT_M1U;
3850 /* Buffer size argument number (snprintf and vsnprintf). */
3851 unsigned HOST_WIDE_INT idx_dstsize = HOST_WIDE_INT_M1U;
3853 /* Object size argument number (snprintf_chk and vsnprintf_chk). */
3854 unsigned HOST_WIDE_INT idx_objsize = HOST_WIDE_INT_M1U;
3856 /* Format string argument number (valid for all functions). */
3857 unsigned idx_format;
3859 switch (info.fncode)
3861 case BUILT_IN_SPRINTF:
3862 // Signature:
3863 // __builtin_sprintf (dst, format, ...)
3864 idx_format = 1;
3865 info.argidx = 2;
3866 break;
3868 case BUILT_IN_SPRINTF_CHK:
3869 // Signature:
3870 // __builtin___sprintf_chk (dst, ost, objsize, format, ...)
3871 idx_objsize = 2;
3872 idx_format = 3;
3873 info.argidx = 4;
3874 break;
3876 case BUILT_IN_SNPRINTF:
3877 // Signature:
3878 // __builtin_snprintf (dst, size, format, ...)
3879 idx_dstsize = 1;
3880 idx_format = 2;
3881 info.argidx = 3;
3882 info.bounded = true;
3883 break;
3885 case BUILT_IN_SNPRINTF_CHK:
3886 // Signature:
3887 // __builtin___snprintf_chk (dst, size, ost, objsize, format, ...)
3888 idx_dstsize = 1;
3889 idx_objsize = 3;
3890 idx_format = 4;
3891 info.argidx = 5;
3892 info.bounded = true;
3893 break;
3895 case BUILT_IN_VSNPRINTF:
3896 // Signature:
3897 // __builtin_vsprintf (dst, size, format, va)
3898 idx_dstsize = 1;
3899 idx_format = 2;
3900 info.argidx = -1;
3901 info.bounded = true;
3902 break;
3904 case BUILT_IN_VSNPRINTF_CHK:
3905 // Signature:
3906 // __builtin___vsnprintf_chk (dst, size, ost, objsize, format, va)
3907 idx_dstsize = 1;
3908 idx_objsize = 3;
3909 idx_format = 4;
3910 info.argidx = -1;
3911 info.bounded = true;
3912 break;
3914 case BUILT_IN_VSPRINTF:
3915 // Signature:
3916 // __builtin_vsprintf (dst, format, va)
3917 idx_format = 1;
3918 info.argidx = -1;
3919 break;
3921 case BUILT_IN_VSPRINTF_CHK:
3922 // Signature:
3923 // __builtin___vsprintf_chk (dst, ost, objsize, format, va)
3924 idx_format = 3;
3925 idx_objsize = 2;
3926 info.argidx = -1;
3927 break;
3929 default:
3930 return false;
3933 /* Set the global warning level for this function. */
3934 warn_level = info.bounded ? warn_format_trunc : warn_format_overflow;
3936 /* The first argument is a pointer to the destination. */
3937 tree dstptr = gimple_call_arg (info.callstmt, 0);
3939 info.format = gimple_call_arg (info.callstmt, idx_format);
3941 /* True when the destination size is constant as opposed to the lower
3942 or upper bound of a range. */
3943 bool dstsize_cst_p = true;
3945 if (idx_dstsize == HOST_WIDE_INT_M1U)
3947 /* For non-bounded functions like sprintf, determine the size
3948 of the destination from the object or pointer passed to it
3949 as the first argument. */
3950 dstsize = get_destination_size (dstptr);
3952 else if (tree size = gimple_call_arg (info.callstmt, idx_dstsize))
3954 /* For bounded functions try to get the size argument. */
3956 if (TREE_CODE (size) == INTEGER_CST)
3958 dstsize = tree_to_uhwi (size);
3959 /* No object can be larger than SIZE_MAX bytes (half the address
3960 space) on the target.
3961 The functions are defined only for output of at most INT_MAX
3962 bytes. Specifying a bound in excess of that limit effectively
3963 defeats the bounds checking (and on some implementations such
3964 as Solaris cause the function to fail with EINVAL). */
3965 if (dstsize > target_size_max () / 2)
3967 /* Avoid warning if -Wstringop-overflow is specified since
3968 it also warns for the same thing though only for the
3969 checking built-ins. */
3970 if ((idx_objsize == HOST_WIDE_INT_M1U
3971 || !warn_stringop_overflow))
3972 warning_at (gimple_location (info.callstmt), info.warnopt (),
3973 "specified bound %wu exceeds maximum object size "
3974 "%wu",
3975 dstsize, target_size_max () / 2);
3977 else if (dstsize > target_int_max ())
3978 warning_at (gimple_location (info.callstmt), info.warnopt (),
3979 "specified bound %wu exceeds %<INT_MAX%>",
3980 dstsize);
3982 else if (TREE_CODE (size) == SSA_NAME)
3984 /* Try to determine the range of values of the argument
3985 and use the greater of the two at level 1 and the smaller
3986 of them at level 2. */
3987 value_range *vr = evrp_range_analyzer.get_value_range (size);
3988 if (vr->type == VR_RANGE
3989 && TREE_CODE (vr->min) == INTEGER_CST
3990 && TREE_CODE (vr->max) == INTEGER_CST)
3991 dstsize = (warn_level < 2
3992 ? TREE_INT_CST_LOW (vr->max)
3993 : TREE_INT_CST_LOW (vr->min));
3995 /* The destination size is not constant. If the function is
3996 bounded (e.g., snprintf) a lower bound of zero doesn't
3997 necessarily imply it can be eliminated. */
3998 dstsize_cst_p = false;
4002 if (idx_objsize != HOST_WIDE_INT_M1U)
4003 if (tree size = gimple_call_arg (info.callstmt, idx_objsize))
4004 if (tree_fits_uhwi_p (size))
4005 objsize = tree_to_uhwi (size);
4007 if (info.bounded && !dstsize)
4009 /* As a special case, when the explicitly specified destination
4010 size argument (to a bounded function like snprintf) is zero
4011 it is a request to determine the number of bytes on output
4012 without actually producing any. Pretend the size is
4013 unlimited in this case. */
4014 info.objsize = HOST_WIDE_INT_MAX;
4015 info.nowrite = dstsize_cst_p;
4017 else
4019 /* For calls to non-bounded functions or to those of bounded
4020 functions with a non-zero size, warn if the destination
4021 pointer is null. */
4022 if (integer_zerop (dstptr))
4024 /* This is diagnosed with -Wformat only when the null is a constant
4025 pointer. The warning here diagnoses instances where the pointer
4026 is not constant. */
4027 location_t loc = gimple_location (info.callstmt);
4028 warning_at (EXPR_LOC_OR_LOC (dstptr, loc),
4029 info.warnopt (), "null destination pointer");
4030 return false;
4033 /* Set the object size to the smaller of the two arguments
4034 of both have been specified and they're not equal. */
4035 info.objsize = dstsize < objsize ? dstsize : objsize;
4037 if (info.bounded
4038 && dstsize < target_size_max () / 2 && objsize < dstsize
4039 /* Avoid warning if -Wstringop-overflow is specified since
4040 it also warns for the same thing though only for the
4041 checking built-ins. */
4042 && (idx_objsize == HOST_WIDE_INT_M1U
4043 || !warn_stringop_overflow))
4045 warning_at (gimple_location (info.callstmt), info.warnopt (),
4046 "specified bound %wu exceeds the size %wu "
4047 "of the destination object", dstsize, objsize);
4051 if (integer_zerop (info.format))
4053 /* This is diagnosed with -Wformat only when the null is a constant
4054 pointer. The warning here diagnoses instances where the pointer
4055 is not constant. */
4056 location_t loc = gimple_location (info.callstmt);
4057 warning_at (EXPR_LOC_OR_LOC (info.format, loc),
4058 info.warnopt (), "null format string");
4059 return false;
4062 info.fmtstr = get_format_string (info.format, &info.fmtloc);
4063 if (!info.fmtstr)
4064 return false;
4066 /* The result is the number of bytes output by the formatted function,
4067 including the terminating NUL. */
4068 format_result res = format_result ();
4070 bool success = compute_format_length (info, &res);
4072 /* When optimizing and the printf return value optimization is enabled,
4073 attempt to substitute the computed result for the return value of
4074 the call. Avoid this optimization when -frounding-math is in effect
4075 and the format string contains a floating point directive. */
4076 bool call_removed = false;
4077 if (success && optimize > 0)
4079 /* Save a copy of the iterator pointing at the call. The iterator
4080 may change to point past the call in try_substitute_return_value
4081 but the original value is needed in try_simplify_call. */
4082 gimple_stmt_iterator gsi_call = *gsi;
4084 if (flag_printf_return_value
4085 && (!flag_rounding_math || !res.floating))
4086 call_removed = try_substitute_return_value (gsi, info, res);
4088 if (!call_removed)
4089 try_simplify_call (&gsi_call, info, res);
4092 return call_removed;
4095 edge
4096 sprintf_dom_walker::before_dom_children (basic_block bb)
4098 evrp_range_analyzer.enter (bb);
4099 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); )
4101 /* Iterate over statements, looking for function calls. */
4102 gimple *stmt = gsi_stmt (si);
4104 /* First record ranges generated by this statement. */
4105 evrp_range_analyzer.record_ranges_from_stmt (stmt, false);
4107 if (is_gimple_call (stmt) && handle_gimple_call (&si))
4108 /* If handle_gimple_call returns true, the iterator is
4109 already pointing to the next statement. */
4110 continue;
4112 gsi_next (&si);
4114 return NULL;
4117 void
4118 sprintf_dom_walker::after_dom_children (basic_block bb)
4120 evrp_range_analyzer.leave (bb);
4123 /* Execute the pass for function FUN. */
4125 unsigned int
4126 pass_sprintf_length::execute (function *fun)
4128 init_target_to_host_charmap ();
4130 calculate_dominance_info (CDI_DOMINATORS);
4132 sprintf_dom_walker sprintf_dom_walker;
4133 sprintf_dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
4135 /* Clean up object size info. */
4136 fini_object_sizes ();
4137 return 0;
4140 } /* Unnamed namespace. */
4142 /* Return a pointer to a pass object newly constructed from the context
4143 CTXT. */
4145 gimple_opt_pass *
4146 make_pass_sprintf_length (gcc::context *ctxt)
4148 return new pass_sprintf_length (ctxt);