Don't warn when alignment of global common data exceeds maximum alignment.
[official-gcc.git] / gcc / tree-ssa-strlen.c
blob15391da81048b148f312d87af2522910282a511b
1 /* String length optimization
2 Copyright (C) 2011-2021 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-access.h"
34 #include "gimple-ssa-warn-restrict.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-fold.h"
38 #include "tree-eh.h"
39 #include "gimplify.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
42 #include "expr.h"
43 #include "tree-cfg.h"
44 #include "tree-dfa.h"
45 #include "domwalk.h"
46 #include "tree-ssa-alias.h"
47 #include "tree-ssa-propagate.h"
48 #include "tree-ssa-strlen.h"
49 #include "tree-hash-traits.h"
50 #include "builtins.h"
51 #include "pointer-query.h"
52 #include "target.h"
53 #include "diagnostic-core.h"
54 #include "diagnostic.h"
55 #include "intl.h"
56 #include "attribs.h"
57 #include "calls.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "vr-values.h"
62 #include "gimple-ssa-evrp-analyze.h"
63 #include "tree-ssa.h"
65 /* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
66 is an index into strinfo vector, negative value stands for
67 string length of a string literal (~strlen). */
68 static vec<int> ssa_ver_to_stridx;
70 /* Number of currently active string indexes plus one. */
71 static int max_stridx;
73 /* Set to true to optimize, false when just checking. */
74 static bool strlen_optimize;
76 /* String information record. */
77 struct strinfo
79 /* Number of leading characters that are known to be nonzero. This is
80 also the length of the string if FULL_STRING_P.
82 The values in a list of related string pointers must be consistent;
83 that is, if strinfo B comes X bytes after strinfo A, it must be
84 the case that A->nonzero_chars == X + B->nonzero_chars. */
85 tree nonzero_chars;
86 /* Any of the corresponding pointers for querying alias oracle. */
87 tree ptr;
88 /* STMT is used for two things:
90 - To record the statement that should be used for delayed length
91 computations. We maintain the invariant that all related strinfos
92 have delayed lengths or none do.
94 - To record the malloc or calloc call that produced this result
95 to optimize away malloc/memset sequences. STMT is reset after
96 a calloc-allocated object has been stored a non-zero value into. */
97 gimple *stmt;
98 /* Set to the dynamic allocation statement for the object (alloca,
99 calloc, malloc, or VLA). Unlike STMT, once set for a strinfo
100 object, ALLOC doesn't change. */
101 gimple *alloc;
102 /* Pointer to '\0' if known, if NULL, it can be computed as
103 ptr + length. */
104 tree endptr;
105 /* Reference count. Any changes to strinfo entry possibly shared
106 with dominating basic blocks need unshare_strinfo first, except
107 for dont_invalidate which affects only the immediately next
108 maybe_invalidate. */
109 int refcount;
110 /* Copy of index. get_strinfo (si->idx) should return si; */
111 int idx;
112 /* These 3 fields are for chaining related string pointers together.
113 E.g. for
114 bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
115 strcpy (c, d); e = c + dl;
116 strinfo(a) -> strinfo(c) -> strinfo(e)
117 All have ->first field equal to strinfo(a)->idx and are doubly
118 chained through prev/next fields. The later strinfos are required
119 to point into the same string with zero or more bytes after
120 the previous pointer and all bytes in between the two pointers
121 must be non-zero. Functions like strcpy or memcpy are supposed
122 to adjust all previous strinfo lengths, but not following strinfo
123 lengths (those are uncertain, usually invalidated during
124 maybe_invalidate, except when the alias oracle knows better).
125 Functions like strcat on the other side adjust the whole
126 related strinfo chain.
127 They are updated lazily, so to use the chain the same first fields
128 and si->prev->next == si->idx needs to be verified. */
129 int first;
130 int next;
131 int prev;
132 /* A flag whether the string is known to be written in the current
133 function. */
134 bool writable;
135 /* A flag for the next maybe_invalidate that this strinfo shouldn't
136 be invalidated. Always cleared by maybe_invalidate. */
137 bool dont_invalidate;
138 /* True if the string is known to be nul-terminated after NONZERO_CHARS
139 characters. False is useful when detecting strings that are built
140 up via successive memcpys. */
141 bool full_string_p;
144 /* Pool for allocating strinfo_struct entries. */
145 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
147 /* Vector mapping positive string indexes to strinfo, for the
148 current basic block. The first pointer in the vector is special,
149 it is either NULL, meaning the vector isn't shared, or it is
150 a basic block pointer to the owner basic_block if shared.
151 If some other bb wants to modify the vector, the vector needs
152 to be unshared first, and only the owner bb is supposed to free it. */
153 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
155 /* One OFFSET->IDX mapping. */
156 struct stridxlist
158 struct stridxlist *next;
159 HOST_WIDE_INT offset;
160 int idx;
163 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings. */
164 struct decl_stridxlist_map
166 struct tree_map_base base;
167 struct stridxlist list;
170 /* Hash table for mapping decls to a chained list of offset -> idx
171 mappings. */
172 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
173 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
175 /* Hash table mapping strlen (or strnlen with constant bound and return
176 smaller than bound) calls to stridx instances describing
177 the calls' arguments. Non-null only when warn_stringop_truncation
178 is non-zero. */
179 typedef std::pair<int, location_t> stridx_strlenloc;
180 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
182 /* Obstack for struct stridxlist and struct decl_stridxlist_map. */
183 static struct obstack stridx_obstack;
185 /* Last memcpy statement if it could be adjusted if the trailing
186 '\0' written is immediately overwritten, or
187 *x = '\0' store that could be removed if it is immediately overwritten. */
188 struct laststmt_struct
190 gimple *stmt;
191 tree len;
192 int stridx;
193 } laststmt;
195 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
196 static void handle_builtin_stxncpy_strncat (bool, gimple_stmt_iterator *);
197 static bool handle_assign (gimple_stmt_iterator *, tree, bool *,
198 pointer_query &);
200 /* Sets MINMAX to either the constant value or the range VAL is in
201 and returns either the constant value or VAL on success or null
202 when the range couldn't be determined. Uses RVALS when nonnull
203 to determine the range, otherwise uses global range info. */
205 tree
206 get_range (tree val, gimple *stmt, wide_int minmax[2],
207 range_query *rvals /* = NULL */)
209 if (TREE_CODE (val) == INTEGER_CST)
211 minmax[0] = minmax[1] = wi::to_wide (val);
212 return val;
215 if (TREE_CODE (val) != SSA_NAME)
216 return NULL_TREE;
218 value_range vr;
219 if (rvals && stmt)
221 if (!rvals->range_of_expr (vr, val, stmt))
222 return NULL_TREE;
223 value_range_kind rng = vr.kind ();
224 if (rng != VR_RANGE)
225 return NULL_TREE;
227 minmax[0] = wi::to_wide (vr.min ());
228 minmax[1] = wi::to_wide (vr.max ());
229 return val;
232 // ?? This entire function should use get_range_query or get_global_range_query (),
233 // instead of doing something different for RVALS and global ranges.
235 if (!get_global_range_query ()->range_of_expr (vr, val) || vr.undefined_p ())
236 return NULL_TREE;
238 minmax[0] = wi::to_wide (vr.min ());
239 minmax[1] = wi::to_wide (vr.max ());
240 value_range_kind rng = vr.kind ();
241 if (rng == VR_RANGE)
242 /* This may be an inverted range whose MINMAX[1] < MINMAX[0]. */
243 return val;
245 if (rng == VR_ANTI_RANGE)
247 /* An anti-range is the same as an ordinary range with inverted
248 bounds (where MINMAX[1] < MINMAX[0] is true) that may result
249 from the conversion of a signed anti-range to unsigned. */
250 wide_int tmp = minmax[0];
251 minmax[0] = minmax[1] + 1;
252 minmax[1] = wi::sub (tmp, 1);
253 return val;
256 /* Do not handle anti-ranges and instead make use of the on-demand
257 VRP if/when it becomes available (hopefully in GCC 11). */
258 return NULL_TREE;
261 /* Return:
263 * +1 if SI is known to start with more than OFF nonzero characters.
265 * 0 if SI is known to start with exactly OFF nonzero characters.
267 * -1 if SI either does not start with OFF nonzero characters
268 or the relationship between the number of leading nonzero
269 characters in SI and OFF is unknown. */
271 static inline int
272 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
274 if (si->nonzero_chars
275 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
276 return compare_tree_int (si->nonzero_chars, off);
277 else
278 return -1;
281 /* Same as above but suitable also for strings with non-constant lengths.
282 Uses RVALS to determine length range. */
284 static int
285 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off,
286 range_query *rvals)
288 if (!si->nonzero_chars)
289 return -1;
291 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
292 return compare_tree_int (si->nonzero_chars, off);
294 if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
295 return -1;
297 value_range vr;
298 if (!rvals->range_of_expr (vr, si->nonzero_chars, si->stmt))
299 return -1;
300 value_range_kind rng = vr.kind ();
301 if (rng != VR_RANGE)
302 return -1;
304 /* If the offset is less than the minimum length or if the bounds
305 of the length range are equal return the result of the comparison
306 same as in the constant case. Otherwise return a conservative
307 result. */
308 int cmpmin = compare_tree_int (vr.min (), off);
309 if (cmpmin > 0 || tree_int_cst_equal (vr.min (), vr.max ()))
310 return cmpmin;
312 return -1;
315 /* Return true if SI is known to be a zero-length string. */
317 static inline bool
318 zero_length_string_p (strinfo *si)
320 return si->full_string_p && integer_zerop (si->nonzero_chars);
323 /* Return strinfo vector entry IDX. */
325 static inline strinfo *
326 get_strinfo (int idx)
328 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
329 return NULL;
330 return (*stridx_to_strinfo)[idx];
333 /* Get the next strinfo in the chain after SI, or null if none. */
335 static inline strinfo *
336 get_next_strinfo (strinfo *si)
338 if (si->next == 0)
339 return NULL;
340 strinfo *nextsi = get_strinfo (si->next);
341 if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
342 return NULL;
343 return nextsi;
346 /* Helper function for get_stridx. Return the strinfo index of the address
347 of EXP, which is available in PTR if nonnull. If OFFSET_OUT, it is
348 OK to return the index for some X <= &EXP and store &EXP - X in
349 *OFFSET_OUT. When RVALS is nonnull uses it to determine range
350 information. */
352 static int
353 get_addr_stridx (tree exp, tree ptr, unsigned HOST_WIDE_INT *offset_out,
354 range_query *rvals = NULL)
356 HOST_WIDE_INT off;
357 struct stridxlist *list, *last = NULL;
358 tree base;
360 if (!decl_to_stridxlist_htab)
361 return 0;
363 poly_int64 poff;
364 base = get_addr_base_and_unit_offset (exp, &poff);
365 if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
366 return 0;
368 list = decl_to_stridxlist_htab->get (base);
369 if (list == NULL)
370 return 0;
374 if (list->offset == off)
376 if (offset_out)
377 *offset_out = 0;
378 return list->idx;
380 if (list->offset > off)
381 return 0;
382 last = list;
383 list = list->next;
385 while (list);
387 if ((offset_out || ptr) && last && last->idx > 0)
389 unsigned HOST_WIDE_INT rel_off
390 = (unsigned HOST_WIDE_INT) off - last->offset;
391 strinfo *si = get_strinfo (last->idx);
392 if (si && compare_nonzero_chars (si, rel_off, rvals) >= 0)
394 if (offset_out)
396 *offset_out = rel_off;
397 return last->idx;
399 else
400 return get_stridx_plus_constant (si, rel_off, ptr);
403 return 0;
406 /* Returns string index for EXP. When EXP is an SSA_NAME that refers
407 to a known strinfo with an offset and OFFRNG is non-null, sets
408 both elements of the OFFRNG array to the range of the offset and
409 returns the index of the known strinfo. In this case the result
410 must not be used in for functions that modify the string.
411 When nonnull, uses RVALS to determine range information. */
413 static int
414 get_stridx (tree exp, wide_int offrng[2] = NULL, range_query *rvals = NULL)
416 if (offrng)
417 offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
419 if (TREE_CODE (exp) == SSA_NAME)
421 if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
422 return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
424 tree e = exp;
425 int last_idx = 0;
426 HOST_WIDE_INT offset = 0;
427 /* Follow a chain of at most 5 assignments. */
428 for (int i = 0; i < 5; i++)
430 gimple *def_stmt = SSA_NAME_DEF_STMT (e);
431 if (!is_gimple_assign (def_stmt))
432 return last_idx;
434 tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
435 tree ptr, off;
437 if (rhs_code == ADDR_EXPR)
439 /* Handle indices/offsets into VLAs which are implemented
440 as pointers to arrays. */
441 ptr = gimple_assign_rhs1 (def_stmt);
442 ptr = TREE_OPERAND (ptr, 0);
444 /* Handle also VLAs of types larger than char. */
445 if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
447 if (TREE_CODE (ptr) == ARRAY_REF)
449 off = TREE_OPERAND (ptr, 1);
450 ptr = TREE_OPERAND (ptr, 0);
451 if (!integer_onep (eltsize))
453 /* Scale the array index by the size of the element
454 type in the rare case that it's greater than
455 the typical 1 for char, making sure both operands
456 have the same type. */
457 eltsize = fold_convert (ssizetype, eltsize);
458 off = fold_convert (ssizetype, off);
459 off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
462 else
463 off = integer_zero_node;
465 else
466 return 0;
468 if (TREE_CODE (ptr) != MEM_REF)
469 return 0;
471 /* Add the MEM_REF byte offset. */
472 tree mem_off = TREE_OPERAND (ptr, 1);
473 off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
474 ptr = TREE_OPERAND (ptr, 0);
476 else if (rhs_code == POINTER_PLUS_EXPR)
478 ptr = gimple_assign_rhs1 (def_stmt);
479 off = gimple_assign_rhs2 (def_stmt);
481 else
482 return 0;
484 if (TREE_CODE (ptr) != SSA_NAME)
485 return 0;
487 if (!tree_fits_shwi_p (off))
489 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
490 if (offrng)
492 /* Only when requested by setting OFFRNG to non-null,
493 return the index corresponding to the SSA_NAME.
494 Do this irrespective of the whether the offset
495 is known. */
496 if (get_range (off, def_stmt, offrng, rvals))
498 /* When the offset range is known, increment it
499 it by the constant offset computed in prior
500 iterations and store it in the OFFRNG array. */
501 offrng[0] += offset;
502 offrng[1] += offset;
504 else
506 /* When the offset range cannot be determined
507 store [0, SIZE_MAX] and let the caller decide
508 if the offset matters. */
509 offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
510 offrng[0] = wi::zero (offrng[1].get_precision ());
512 return idx;
514 return 0;
517 HOST_WIDE_INT this_off = tree_to_shwi (off);
518 if (offrng)
520 offrng[0] += wi::shwi (this_off, offrng->get_precision ());
521 offrng[1] += offrng[0];
524 if (this_off < 0)
525 return last_idx;
527 offset = (unsigned HOST_WIDE_INT) offset + this_off;
528 if (offset < 0)
529 return last_idx;
531 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
533 strinfo *si = get_strinfo (idx);
534 if (si)
536 if (compare_nonzero_chars (si, offset) >= 0)
537 return get_stridx_plus_constant (si, offset, exp);
539 if (offrng)
540 last_idx = idx;
543 e = ptr;
546 return last_idx;
549 if (TREE_CODE (exp) == ADDR_EXPR)
551 int idx = get_addr_stridx (TREE_OPERAND (exp, 0), exp, NULL);
552 if (idx != 0)
553 return idx;
556 const char *p = c_getstr (exp);
557 if (p)
558 return ~(int) strlen (p);
560 return 0;
563 /* Return true if strinfo vector is shared with the immediate dominator. */
565 static inline bool
566 strinfo_shared (void)
568 return vec_safe_length (stridx_to_strinfo)
569 && (*stridx_to_strinfo)[0] != NULL;
572 /* Unshare strinfo vector that is shared with the immediate dominator. */
574 static void
575 unshare_strinfo_vec (void)
577 strinfo *si;
578 unsigned int i = 0;
580 gcc_assert (strinfo_shared ());
581 stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
582 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
583 if (si != NULL)
584 si->refcount++;
585 (*stridx_to_strinfo)[0] = NULL;
588 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
589 Return a pointer to the location where the string index can
590 be stored (if 0) or is stored, or NULL if this can't be tracked. */
592 static int *
593 addr_stridxptr (tree exp)
595 HOST_WIDE_INT off;
597 poly_int64 poff;
598 tree base = get_addr_base_and_unit_offset (exp, &poff);
599 if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
600 return NULL;
602 if (!decl_to_stridxlist_htab)
604 decl_to_stridxlist_htab
605 = new hash_map<tree_decl_hash, stridxlist> (64);
606 gcc_obstack_init (&stridx_obstack);
609 bool existed;
610 stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
611 if (existed)
613 int i;
614 stridxlist *before = NULL;
615 for (i = 0; i < 32; i++)
617 if (list->offset == off)
618 return &list->idx;
619 if (list->offset > off && before == NULL)
620 before = list;
621 if (list->next == NULL)
622 break;
623 list = list->next;
625 if (i == 32)
626 return NULL;
627 if (before)
629 list = before;
630 before = XOBNEW (&stridx_obstack, struct stridxlist);
631 *before = *list;
632 list->next = before;
633 list->offset = off;
634 list->idx = 0;
635 return &list->idx;
637 list->next = XOBNEW (&stridx_obstack, struct stridxlist);
638 list = list->next;
641 list->next = NULL;
642 list->offset = off;
643 list->idx = 0;
644 return &list->idx;
647 /* Create a new string index, or return 0 if reached limit. */
649 static int
650 new_stridx (tree exp)
652 int idx;
653 if (max_stridx >= param_max_tracked_strlens)
654 return 0;
655 if (TREE_CODE (exp) == SSA_NAME)
657 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
658 return 0;
659 idx = max_stridx++;
660 ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
661 return idx;
663 if (TREE_CODE (exp) == ADDR_EXPR)
665 int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
666 if (pidx != NULL)
668 gcc_assert (*pidx == 0);
669 *pidx = max_stridx++;
670 return *pidx;
673 return 0;
676 /* Like new_stridx, but for ADDR_EXPR's operand instead. */
678 static int
679 new_addr_stridx (tree exp)
681 int *pidx;
682 if (max_stridx >= param_max_tracked_strlens)
683 return 0;
684 pidx = addr_stridxptr (exp);
685 if (pidx != NULL)
687 gcc_assert (*pidx == 0);
688 *pidx = max_stridx++;
689 return *pidx;
691 return 0;
694 /* Create a new strinfo. */
696 static strinfo *
697 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
699 strinfo *si = strinfo_pool.allocate ();
700 si->nonzero_chars = nonzero_chars;
701 STRIP_USELESS_TYPE_CONVERSION (ptr);
702 si->ptr = ptr;
703 si->stmt = NULL;
704 si->alloc = NULL;
705 si->endptr = NULL_TREE;
706 si->refcount = 1;
707 si->idx = idx;
708 si->first = 0;
709 si->prev = 0;
710 si->next = 0;
711 si->writable = false;
712 si->dont_invalidate = false;
713 si->full_string_p = full_string_p;
714 return si;
717 /* Decrease strinfo refcount and free it if not referenced anymore. */
719 static inline void
720 free_strinfo (strinfo *si)
722 if (si && --si->refcount == 0)
723 strinfo_pool.remove (si);
726 /* Set strinfo in the vector entry IDX to SI. */
728 static inline void
729 set_strinfo (int idx, strinfo *si)
731 if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
732 unshare_strinfo_vec ();
733 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
734 vec_safe_grow_cleared (stridx_to_strinfo, idx + 1, true);
735 (*stridx_to_strinfo)[idx] = si;
738 /* Return the first strinfo in the related strinfo chain
739 if all strinfos in between belong to the chain, otherwise NULL. */
741 static strinfo *
742 verify_related_strinfos (strinfo *origsi)
744 strinfo *si = origsi, *psi;
746 if (origsi->first == 0)
747 return NULL;
748 for (; si->prev; si = psi)
750 if (si->first != origsi->first)
751 return NULL;
752 psi = get_strinfo (si->prev);
753 if (psi == NULL)
754 return NULL;
755 if (psi->next != si->idx)
756 return NULL;
758 if (si->idx != si->first)
759 return NULL;
760 return si;
763 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
764 Use LOC for folding. */
766 static void
767 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
769 si->endptr = endptr;
770 si->stmt = NULL;
771 tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
772 tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
773 si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
774 end_as_size, start_as_size);
775 si->full_string_p = true;
778 /* Return the string length, or NULL if it can't be computed.
779 The length may but need not be constant. Instead, it might be
780 the result of a strlen() call. */
782 static tree
783 get_string_length (strinfo *si)
785 /* If the length has already been computed return it if it's exact
786 (i.e., the string is nul-terminated at NONZERO_CHARS), or return
787 null if it isn't. */
788 if (si->nonzero_chars)
789 return si->full_string_p ? si->nonzero_chars : NULL;
791 /* If the string is the result of one of the built-in calls below
792 attempt to compute the length from the call statement. */
793 if (si->stmt)
795 gimple *stmt = si->stmt, *lenstmt;
796 tree callee, lhs, fn, tem;
797 location_t loc;
798 gimple_stmt_iterator gsi;
800 gcc_assert (is_gimple_call (stmt));
801 callee = gimple_call_fndecl (stmt);
802 gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
803 lhs = gimple_call_lhs (stmt);
804 /* unshare_strinfo is intentionally not called here. The (delayed)
805 transformation of strcpy or strcat into stpcpy is done at the place
806 of the former strcpy/strcat call and so can affect all the strinfos
807 with the same stmt. If they were unshared before and transformation
808 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
809 just compute the right length. */
810 switch (DECL_FUNCTION_CODE (callee))
812 case BUILT_IN_STRCAT:
813 case BUILT_IN_STRCAT_CHK:
814 gsi = gsi_for_stmt (stmt);
815 fn = builtin_decl_implicit (BUILT_IN_STRLEN);
816 gcc_assert (lhs == NULL_TREE);
817 tem = unshare_expr (gimple_call_arg (stmt, 0));
818 lenstmt = gimple_build_call (fn, 1, tem);
819 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
820 gimple_call_set_lhs (lenstmt, lhs);
821 gimple_set_vuse (lenstmt, gimple_vuse (stmt));
822 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
823 tem = gimple_call_arg (stmt, 0);
824 if (!ptrofftype_p (TREE_TYPE (lhs)))
826 lhs = convert_to_ptrofftype (lhs);
827 lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
828 true, GSI_SAME_STMT);
830 lenstmt = gimple_build_assign
831 (make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
832 POINTER_PLUS_EXPR,tem, lhs);
833 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
834 gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
835 lhs = NULL_TREE;
836 /* FALLTHRU */
837 case BUILT_IN_STRCPY:
838 case BUILT_IN_STRCPY_CHK:
839 gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
840 if (gimple_call_num_args (stmt) == 2)
841 fn = builtin_decl_implicit (BUILT_IN_STPCPY);
842 else
843 fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
844 gcc_assert (lhs == NULL_TREE);
845 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
847 fprintf (dump_file, "Optimizing: ");
848 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
850 gimple_call_set_fndecl (stmt, fn);
851 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
852 gimple_call_set_lhs (stmt, lhs);
853 update_stmt (stmt);
854 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
856 fprintf (dump_file, "into: ");
857 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
859 /* FALLTHRU */
860 case BUILT_IN_STPCPY:
861 case BUILT_IN_STPCPY_CHK:
862 gcc_assert (lhs != NULL_TREE);
863 loc = gimple_location (stmt);
864 set_endptr_and_length (loc, si, lhs);
865 for (strinfo *chainsi = verify_related_strinfos (si);
866 chainsi != NULL;
867 chainsi = get_next_strinfo (chainsi))
868 if (chainsi->nonzero_chars == NULL)
869 set_endptr_and_length (loc, chainsi, lhs);
870 break;
871 case BUILT_IN_ALLOCA:
872 case BUILT_IN_ALLOCA_WITH_ALIGN:
873 case BUILT_IN_MALLOC:
874 break;
875 /* BUILT_IN_CALLOC always has si->nonzero_chars set. */
876 default:
877 gcc_unreachable ();
878 break;
882 return si->nonzero_chars;
885 /* Dump strlen data to FP for statement STMT. When non-null, RVALS
886 points to the valuation engine used to calculate ranges, and is
887 used to dump strlen range for non-constant results. */
889 DEBUG_FUNCTION void
890 dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
892 if (stmt)
894 fprintf (fp, "\nDumping strlen pass data after ");
895 print_gimple_expr (fp, stmt, TDF_LINENO);
896 fputc ('\n', fp);
898 else
899 fprintf (fp, "\nDumping strlen pass data\n");
901 fprintf (fp, "max_stridx = %i\n", max_stridx);
902 fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
903 ssa_ver_to_stridx.length ());
904 fprintf (fp, "stridx_to_strinfo");
905 if (stridx_to_strinfo)
907 fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
908 for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
910 if (strinfo *si = (*stridx_to_strinfo)[i])
912 if (!si->idx)
913 continue;
914 fprintf (fp, " idx = %i", si->idx);
915 if (si->ptr)
917 fprintf (fp, ", ptr = ");
918 print_generic_expr (fp, si->ptr);
921 if (si->nonzero_chars)
923 fprintf (fp, ", nonzero_chars = ");
924 print_generic_expr (fp, si->nonzero_chars);
925 if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
927 value_range_kind rng = VR_UNDEFINED;
928 wide_int min, max;
929 if (rvals)
931 value_range vr;
932 rvals->range_of_expr (vr, si->nonzero_chars,
933 si->stmt);
934 rng = vr.kind ();
935 if (range_int_cst_p (&vr))
937 min = wi::to_wide (vr.min ());
938 max = wi::to_wide (vr.max ());
940 else
941 rng = VR_UNDEFINED;
943 else
945 value_range vr;
946 get_global_range_query ()->range_of_expr (vr,
947 si->nonzero_chars);
948 rng = vr.kind ();
949 if (!vr.undefined_p ())
951 min = wi::to_wide (vr.min ());
952 max = wi::to_wide (vr.max ());
956 if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
958 fprintf (fp, " %s[%llu, %llu]",
959 rng == VR_RANGE ? "" : "~",
960 (long long) min.to_uhwi (),
961 (long long) max.to_uhwi ());
966 fprintf (fp, ", refcount = %i", si->refcount);
967 if (si->stmt)
969 fprintf (fp, ", stmt = ");
970 print_gimple_expr (fp, si->stmt, 0);
972 if (si->alloc)
974 fprintf (fp, ", alloc = ");
975 print_gimple_expr (fp, si->alloc, 0);
977 if (si->writable)
978 fprintf (fp, ", writable");
979 if (si->dont_invalidate)
980 fprintf (fp, ", dont_invalidate");
981 if (si->full_string_p)
982 fprintf (fp, ", full_string_p");
983 if (strinfo *next = get_next_strinfo (si))
985 fprintf (fp, ", {");
987 fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
988 while ((next = get_next_strinfo (next)));
989 fprintf (fp, "}");
991 fputs ("\n", fp);
995 else
996 fprintf (fp, " = null\n");
998 fprintf (fp, "decl_to_stridxlist_htab");
999 if (decl_to_stridxlist_htab)
1001 fputs ("\n", fp);
1002 typedef decl_to_stridxlist_htab_t::iterator iter_t;
1003 for (iter_t it = decl_to_stridxlist_htab->begin ();
1004 it != decl_to_stridxlist_htab->end (); ++it)
1006 tree decl = (*it).first;
1007 stridxlist *list = &(*it).second;
1008 fprintf (fp, " decl = ");
1009 print_generic_expr (fp, decl);
1010 if (list)
1012 fprintf (fp, ", offsets = {");
1013 for (; list; list = list->next)
1014 fprintf (fp, "%lli%s", (long long) list->offset,
1015 list->next ? ", " : "");
1016 fputs ("}", fp);
1018 fputs ("\n", fp);
1021 else
1022 fprintf (fp, " = null\n");
1024 if (laststmt.stmt)
1026 fprintf (fp, "laststmt = ");
1027 print_gimple_expr (fp, laststmt.stmt, 0);
1028 fprintf (fp, ", len = ");
1029 print_generic_expr (fp, laststmt.len);
1030 fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1034 /* Attempt to determine the length of the string SRC. On success, store
1035 the length in *PDATA and return true. Otherwise, return false.
1036 VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation
1037 engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME
1038 assignment limit used to prevent runaway recursion. */
1040 static bool
1041 get_range_strlen_dynamic (tree src, gimple *stmt,
1042 c_strlen_data *pdata, bitmap *visited,
1043 range_query *rvals, unsigned *pssa_def_max)
1045 int idx = get_stridx (src);
1046 if (!idx)
1048 if (TREE_CODE (src) == SSA_NAME)
1050 gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1051 if (gimple_code (def_stmt) == GIMPLE_PHI)
1053 if (!*visited)
1054 *visited = BITMAP_ALLOC (NULL);
1056 if (!bitmap_set_bit (*visited, SSA_NAME_VERSION (src)))
1057 return true;
1059 if (*pssa_def_max == 0)
1060 return false;
1062 --*pssa_def_max;
1064 /* Iterate over the PHI arguments and determine the minimum
1065 and maximum length/size of each and incorporate them into
1066 the overall result. */
1067 gphi *phi = as_a <gphi *> (def_stmt);
1068 for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1070 tree arg = gimple_phi_arg_def (phi, i);
1071 if (arg == gimple_phi_result (def_stmt))
1072 continue;
1074 c_strlen_data argdata = { };
1075 if (get_range_strlen_dynamic (arg, phi, &argdata, visited,
1076 rvals, pssa_def_max))
1078 /* Set the DECL of an unterminated array this argument
1079 refers to if one hasn't been found yet. */
1080 if (!pdata->decl && argdata.decl)
1081 pdata->decl = argdata.decl;
1083 if (!argdata.minlen
1084 || (integer_zerop (argdata.minlen)
1085 && (!argdata.maxbound
1086 || integer_all_onesp (argdata.maxbound))
1087 && integer_all_onesp (argdata.maxlen)))
1089 /* Set the upper bound of the length to unbounded. */
1090 pdata->maxlen = build_all_ones_cst (size_type_node);
1091 continue;
1094 /* Adjust the minimum and maximum length determined
1095 so far and the upper bound on the array size. */
1096 if (!pdata->minlen
1097 || tree_int_cst_lt (argdata.minlen, pdata->minlen))
1098 pdata->minlen = argdata.minlen;
1099 if (!pdata->maxlen
1100 || (argdata.maxlen
1101 && tree_int_cst_lt (pdata->maxlen, argdata.maxlen)))
1102 pdata->maxlen = argdata.maxlen;
1103 if (!pdata->maxbound
1104 || TREE_CODE (pdata->maxbound) != INTEGER_CST
1105 || (argdata.maxbound
1106 && tree_int_cst_lt (pdata->maxbound,
1107 argdata.maxbound)
1108 && !integer_all_onesp (argdata.maxbound)))
1109 pdata->maxbound = argdata.maxbound;
1111 else
1112 pdata->maxlen = build_all_ones_cst (size_type_node);
1115 return true;
1119 /* Return success regardless of the result and handle *PDATA
1120 in the caller. */
1121 get_range_strlen (src, pdata, 1);
1122 return true;
1125 if (idx < 0)
1127 /* SRC is a string of constant length. */
1128 pdata->minlen = build_int_cst (size_type_node, ~idx);
1129 pdata->maxlen = pdata->minlen;
1130 pdata->maxbound = pdata->maxlen;
1131 return true;
1134 if (strinfo *si = get_strinfo (idx))
1136 pdata->minlen = get_string_length (si);
1137 if (!pdata->minlen && si->nonzero_chars)
1139 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1140 pdata->minlen = si->nonzero_chars;
1141 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1143 value_range vr;
1144 rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
1145 if (range_int_cst_p (&vr))
1147 pdata->minlen = vr.min ();
1148 pdata->maxlen = vr.max ();
1150 else
1151 pdata->minlen = build_zero_cst (size_type_node);
1153 else
1154 pdata->minlen = build_zero_cst (size_type_node);
1156 tree base = si->ptr;
1157 if (TREE_CODE (base) == ADDR_EXPR)
1158 base = TREE_OPERAND (base, 0);
1160 HOST_WIDE_INT off;
1161 poly_int64 poff;
1162 base = get_addr_base_and_unit_offset (base, &poff);
1163 if (base
1164 && DECL_P (base)
1165 && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1166 && TYPE_SIZE_UNIT (TREE_TYPE (base))
1167 && poff.is_constant (&off))
1169 tree basetype = TREE_TYPE (base);
1170 tree size = TYPE_SIZE_UNIT (basetype);
1171 if (TREE_CODE (size) == INTEGER_CST)
1173 ++off; /* Increment for the terminating nul. */
1174 tree toffset = build_int_cst (size_type_node, off);
1175 pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1176 toffset);
1177 pdata->maxbound = pdata->maxlen;
1179 else
1180 pdata->maxlen = build_all_ones_cst (size_type_node);
1182 else
1183 pdata->maxlen = build_all_ones_cst (size_type_node);
1185 else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1187 value_range vr;
1188 rvals->range_of_expr (vr, si->nonzero_chars, stmt);
1189 if (range_int_cst_p (&vr))
1191 pdata->minlen = vr.min ();
1192 pdata->maxlen = vr.max ();
1193 pdata->maxbound = pdata->maxlen;
1195 else
1197 pdata->minlen = build_zero_cst (size_type_node);
1198 pdata->maxlen = build_all_ones_cst (size_type_node);
1201 else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1203 pdata->maxlen = pdata->minlen;
1204 pdata->maxbound = pdata->minlen;
1206 else
1208 /* For PDATA->MINLEN that's a non-constant expression such
1209 as PLUS_EXPR whose value range is unknown, set the bounds
1210 to zero and SIZE_MAX. */
1211 pdata->minlen = build_zero_cst (size_type_node);
1212 pdata->maxlen = build_all_ones_cst (size_type_node);
1215 return true;
1218 return false;
1221 /* Analogous to get_range_strlen but for dynamically created strings,
1222 i.e., those created by calls to strcpy as opposed to just string
1223 constants.
1224 Try to obtain the range of the lengths of the string(s) referenced
1225 by SRC, or the size of the largest array SRC refers to if the range
1226 of lengths cannot be determined, and store all in *PDATA. RVALS
1227 points to the valuation engine used to calculate ranges. */
1229 void
1230 get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1231 range_query *rvals)
1233 bitmap visited = NULL;
1234 tree maxbound = pdata->maxbound;
1236 unsigned limit = param_ssa_name_def_chain_limit;
1237 if (!get_range_strlen_dynamic (src, stmt, pdata, &visited, rvals, &limit))
1239 /* On failure extend the length range to an impossible maximum
1240 (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other
1241 members can stay unchanged regardless. */
1242 pdata->minlen = ssize_int (0);
1243 pdata->maxlen = build_all_ones_cst (size_type_node);
1245 else if (!pdata->minlen)
1246 pdata->minlen = ssize_int (0);
1248 /* If it's unchanged from it initial non-null value, set the conservative
1249 MAXBOUND to SIZE_MAX. Otherwise leave it null (if it is null). */
1250 if (maxbound && pdata->maxbound == maxbound)
1251 pdata->maxbound = build_all_ones_cst (size_type_node);
1253 if (visited)
1254 BITMAP_FREE (visited);
1257 /* Invalidate string length information for strings whose length might
1258 change due to stores in STMT, except those marked DONT_INVALIDATE.
1259 For string-modifying statements, ZERO_WRITE is set when the statement
1260 wrote only zeros.
1261 Returns true if any STRIDX_TO_STRINFO entries were considered
1262 for invalidation. */
1264 static bool
1265 maybe_invalidate (gimple *stmt, bool zero_write = false)
1267 if (dump_file && (dump_flags & TDF_DETAILS))
1269 fprintf (dump_file, "%s called for ", __func__);
1270 print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1273 strinfo *si;
1274 bool nonempty = false;
1276 for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1278 if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1279 continue;
1281 nonempty = true;
1283 /* Unconditionally reset DONT_INVALIDATE. */
1284 bool dont_invalidate = si->dont_invalidate;
1285 si->dont_invalidate = false;
1287 if (dont_invalidate)
1288 continue;
1290 ao_ref r;
1291 tree size = si->nonzero_chars;
1292 ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1293 /* Include the terminating nul in the size of the string
1294 to consider when determining possible clobber. But do not
1295 add it to 'size' since we don't know whether it would
1296 actually fit the allocated area. */
1297 if (known_size_p (r.size))
1299 if (known_le (r.size, HOST_WIDE_INT_MAX - BITS_PER_UNIT))
1300 r.max_size += BITS_PER_UNIT;
1301 else
1302 r.max_size = -1;
1304 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1306 if (dump_file && (dump_flags & TDF_DETAILS))
1308 fputs (" statement may clobber object ", dump_file);
1309 print_generic_expr (dump_file, si->ptr);
1310 if (size && tree_fits_uhwi_p (size))
1311 fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1312 " bytes in size", tree_to_uhwi (size));
1313 fputc ('\n', dump_file);
1316 set_strinfo (i, NULL);
1317 free_strinfo (si);
1318 continue;
1321 if (size
1322 && !zero_write
1323 && si->stmt
1324 && is_gimple_call (si->stmt)
1325 && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1326 == BUILT_IN_CALLOC))
1328 /* If the clobber test above considered the length of
1329 the string (including the nul), then for (potentially)
1330 non-zero writes that might modify storage allocated by
1331 calloc consider the whole object and if it might be
1332 clobbered by the statement reset the statement. */
1333 ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1334 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1335 si->stmt = NULL;
1339 if (dump_file && (dump_flags & TDF_DETAILS))
1340 fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1342 return nonempty;
1345 /* Unshare strinfo record SI, if it has refcount > 1 or
1346 if stridx_to_strinfo vector is shared with some other
1347 bbs. */
1349 static strinfo *
1350 unshare_strinfo (strinfo *si)
1352 strinfo *nsi;
1354 if (si->refcount == 1 && !strinfo_shared ())
1355 return si;
1357 nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1358 nsi->stmt = si->stmt;
1359 nsi->alloc = si->alloc;
1360 nsi->endptr = si->endptr;
1361 nsi->first = si->first;
1362 nsi->prev = si->prev;
1363 nsi->next = si->next;
1364 nsi->writable = si->writable;
1365 set_strinfo (si->idx, nsi);
1366 free_strinfo (si);
1367 return nsi;
1370 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1371 strinfo if there is any. Return it's idx, or 0 if no strinfo has
1372 been created. */
1374 static int
1375 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1376 tree ptr)
1378 if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1379 return 0;
1381 if (compare_nonzero_chars (basesi, off) < 0
1382 || !tree_fits_uhwi_p (basesi->nonzero_chars))
1383 return 0;
1385 unsigned HOST_WIDE_INT nonzero_chars
1386 = tree_to_uhwi (basesi->nonzero_chars) - off;
1387 strinfo *si = basesi, *chainsi;
1388 if (si->first || si->prev || si->next)
1389 si = verify_related_strinfos (basesi);
1390 if (si == NULL
1391 || si->nonzero_chars == NULL_TREE
1392 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1393 return 0;
1395 if (TREE_CODE (ptr) == SSA_NAME
1396 && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1397 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1399 gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1400 for (chainsi = si; chainsi->next; chainsi = si)
1402 si = get_next_strinfo (chainsi);
1403 if (si == NULL
1404 || si->nonzero_chars == NULL_TREE
1405 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1406 break;
1407 int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1408 if (r != 1)
1410 if (r == 0)
1412 if (TREE_CODE (ptr) == SSA_NAME)
1413 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1414 else
1416 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1417 if (pidx != NULL && *pidx == 0)
1418 *pidx = si->idx;
1420 return si->idx;
1422 break;
1426 int idx = new_stridx (ptr);
1427 if (idx == 0)
1428 return 0;
1429 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1430 basesi->full_string_p);
1431 set_strinfo (idx, si);
1432 if (strinfo *nextsi = get_strinfo (chainsi->next))
1434 nextsi = unshare_strinfo (nextsi);
1435 si->next = nextsi->idx;
1436 nextsi->prev = idx;
1438 chainsi = unshare_strinfo (chainsi);
1439 if (chainsi->first == 0)
1440 chainsi->first = chainsi->idx;
1441 chainsi->next = idx;
1442 if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1443 chainsi->endptr = ptr;
1444 si->endptr = chainsi->endptr;
1445 si->prev = chainsi->idx;
1446 si->first = chainsi->first;
1447 si->writable = chainsi->writable;
1448 return si->idx;
1451 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1452 to a zero-length string and if possible chain it to a related strinfo
1453 chain whose part is or might be CHAINSI. */
1455 static strinfo *
1456 zero_length_string (tree ptr, strinfo *chainsi)
1458 strinfo *si;
1459 int idx;
1460 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1461 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1462 gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1463 && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1465 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1466 return NULL;
1467 if (chainsi != NULL)
1469 si = verify_related_strinfos (chainsi);
1470 if (si)
1474 /* We shouldn't mix delayed and non-delayed lengths. */
1475 gcc_assert (si->full_string_p);
1476 if (si->endptr == NULL_TREE)
1478 si = unshare_strinfo (si);
1479 si->endptr = ptr;
1481 chainsi = si;
1482 si = get_next_strinfo (si);
1484 while (si != NULL);
1485 if (zero_length_string_p (chainsi))
1487 if (chainsi->next)
1489 chainsi = unshare_strinfo (chainsi);
1490 chainsi->next = 0;
1492 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1493 return chainsi;
1496 else
1498 /* We shouldn't mix delayed and non-delayed lengths. */
1499 gcc_assert (chainsi->full_string_p);
1500 if (chainsi->first || chainsi->prev || chainsi->next)
1502 chainsi = unshare_strinfo (chainsi);
1503 chainsi->first = 0;
1504 chainsi->prev = 0;
1505 chainsi->next = 0;
1509 idx = new_stridx (ptr);
1510 if (idx == 0)
1511 return NULL;
1512 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1513 set_strinfo (idx, si);
1514 si->endptr = ptr;
1515 if (chainsi != NULL)
1517 chainsi = unshare_strinfo (chainsi);
1518 if (chainsi->first == 0)
1519 chainsi->first = chainsi->idx;
1520 chainsi->next = idx;
1521 if (chainsi->endptr == NULL_TREE)
1522 chainsi->endptr = ptr;
1523 si->prev = chainsi->idx;
1524 si->first = chainsi->first;
1525 si->writable = chainsi->writable;
1527 return si;
1530 /* For strinfo ORIGSI whose length has been just updated, adjust other
1531 related strinfos so that they match the new ORIGSI. This involves:
1533 - adding ADJ to the nonzero_chars fields
1534 - copying full_string_p from the new ORIGSI. */
1536 static void
1537 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1539 strinfo *si = verify_related_strinfos (origsi);
1541 if (si == NULL)
1542 return;
1544 while (1)
1546 strinfo *nsi;
1548 if (si != origsi)
1550 tree tem;
1552 si = unshare_strinfo (si);
1553 /* We shouldn't see delayed lengths here; the caller must
1554 have calculated the old length in order to calculate
1555 the adjustment. */
1556 gcc_assert (si->nonzero_chars);
1557 tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1558 si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1559 TREE_TYPE (si->nonzero_chars),
1560 si->nonzero_chars, tem);
1561 si->full_string_p = origsi->full_string_p;
1563 si->endptr = NULL_TREE;
1564 si->dont_invalidate = true;
1566 nsi = get_next_strinfo (si);
1567 if (nsi == NULL)
1568 return;
1569 si = nsi;
1573 /* Find if there are other SSA_NAME pointers equal to PTR
1574 for which we don't track their string lengths yet. If so, use
1575 IDX for them. */
1577 static void
1578 find_equal_ptrs (tree ptr, int idx)
1580 if (TREE_CODE (ptr) != SSA_NAME)
1581 return;
1582 while (1)
1584 gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1585 if (!is_gimple_assign (stmt))
1586 return;
1587 ptr = gimple_assign_rhs1 (stmt);
1588 switch (gimple_assign_rhs_code (stmt))
1590 case SSA_NAME:
1591 break;
1592 CASE_CONVERT:
1593 if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1594 return;
1595 if (TREE_CODE (ptr) == SSA_NAME)
1596 break;
1597 if (TREE_CODE (ptr) != ADDR_EXPR)
1598 return;
1599 /* FALLTHRU */
1600 case ADDR_EXPR:
1602 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1603 if (pidx != NULL && *pidx == 0)
1604 *pidx = idx;
1605 return;
1607 default:
1608 return;
1611 /* We might find an endptr created in this pass. Grow the
1612 vector in that case. */
1613 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1614 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1616 if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1617 return;
1618 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1622 /* Return true if STMT is a call to a builtin function with the right
1623 arguments and attributes that should be considered for optimization
1624 by this pass. */
1626 static bool
1627 valid_builtin_call (gimple *stmt)
1629 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1630 return false;
1632 tree callee = gimple_call_fndecl (stmt);
1633 tree decl = builtin_decl_explicit (DECL_FUNCTION_CODE (callee));
1634 if (decl
1635 && decl != callee
1636 && !gimple_builtin_call_types_compatible_p (stmt, decl))
1637 return false;
1639 switch (DECL_FUNCTION_CODE (callee))
1641 case BUILT_IN_MEMCMP:
1642 case BUILT_IN_MEMCMP_EQ:
1643 case BUILT_IN_STRCMP:
1644 case BUILT_IN_STRNCMP:
1645 case BUILT_IN_STRCHR:
1646 case BUILT_IN_STRLEN:
1647 case BUILT_IN_STRNLEN:
1648 /* The above functions should be pure. Punt if they aren't. */
1649 if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1650 return false;
1651 break;
1653 case BUILT_IN_ALLOCA:
1654 case BUILT_IN_ALLOCA_WITH_ALIGN:
1655 case BUILT_IN_CALLOC:
1656 case BUILT_IN_MALLOC:
1657 case BUILT_IN_MEMCPY:
1658 case BUILT_IN_MEMCPY_CHK:
1659 case BUILT_IN_MEMPCPY:
1660 case BUILT_IN_MEMPCPY_CHK:
1661 case BUILT_IN_MEMSET:
1662 case BUILT_IN_STPCPY:
1663 case BUILT_IN_STPCPY_CHK:
1664 case BUILT_IN_STPNCPY:
1665 case BUILT_IN_STPNCPY_CHK:
1666 case BUILT_IN_STRCAT:
1667 case BUILT_IN_STRCAT_CHK:
1668 case BUILT_IN_STRCPY:
1669 case BUILT_IN_STRCPY_CHK:
1670 case BUILT_IN_STRNCAT:
1671 case BUILT_IN_STRNCAT_CHK:
1672 case BUILT_IN_STRNCPY:
1673 case BUILT_IN_STRNCPY_CHK:
1674 /* The above functions should be neither const nor pure. Punt if they
1675 aren't. */
1676 if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1677 return false;
1678 break;
1680 default:
1681 break;
1684 return true;
1687 /* If the last .MEM setter statement before STMT is
1688 memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1689 and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1690 just memcpy (x, y, strlen (y)). SI must be the zero length
1691 strinfo. */
1693 static void
1694 adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat,
1695 pointer_query &ptr_qry)
1697 tree vuse, callee, len;
1698 struct laststmt_struct last = laststmt;
1699 strinfo *lastsi, *firstsi;
1700 unsigned len_arg_no = 2;
1702 laststmt.stmt = NULL;
1703 laststmt.len = NULL_TREE;
1704 laststmt.stridx = 0;
1706 if (last.stmt == NULL)
1707 return;
1709 vuse = gimple_vuse (stmt);
1710 if (vuse == NULL_TREE
1711 || SSA_NAME_DEF_STMT (vuse) != last.stmt
1712 || !has_single_use (vuse))
1713 return;
1715 gcc_assert (last.stridx > 0);
1716 lastsi = get_strinfo (last.stridx);
1717 if (lastsi == NULL)
1718 return;
1720 if (lastsi != si)
1722 if (lastsi->first == 0 || lastsi->first != si->first)
1723 return;
1725 firstsi = verify_related_strinfos (si);
1726 if (firstsi == NULL)
1727 return;
1728 while (firstsi != lastsi)
1730 firstsi = get_next_strinfo (firstsi);
1731 if (firstsi == NULL)
1732 return;
1736 if (!is_strcat && !zero_length_string_p (si))
1737 return;
1739 if (is_gimple_assign (last.stmt))
1741 gimple_stmt_iterator gsi;
1743 if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1744 return;
1745 if (stmt_could_throw_p (cfun, last.stmt))
1746 return;
1747 gsi = gsi_for_stmt (last.stmt);
1748 unlink_stmt_vdef (last.stmt);
1749 release_defs (last.stmt);
1750 gsi_remove (&gsi, true);
1751 return;
1754 if (!valid_builtin_call (last.stmt))
1755 return;
1757 callee = gimple_call_fndecl (last.stmt);
1758 switch (DECL_FUNCTION_CODE (callee))
1760 case BUILT_IN_MEMCPY:
1761 case BUILT_IN_MEMCPY_CHK:
1762 break;
1763 default:
1764 return;
1767 len = gimple_call_arg (last.stmt, len_arg_no);
1768 if (tree_fits_uhwi_p (len))
1770 if (!tree_fits_uhwi_p (last.len)
1771 || integer_zerop (len)
1772 || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1773 return;
1774 /* Don't adjust the length if it is divisible by 4, it is more efficient
1775 to store the extra '\0' in that case. */
1776 if ((tree_to_uhwi (len) & 3) == 0)
1777 return;
1779 /* Don't fold away an out of bounds access, as this defeats proper
1780 warnings. */
1781 tree dst = gimple_call_arg (last.stmt, 0);
1783 access_ref aref;
1784 tree size = compute_objsize (dst, 1, &aref, &ptr_qry);
1785 if (size && tree_int_cst_lt (size, len))
1786 return;
1788 else if (TREE_CODE (len) == SSA_NAME)
1790 gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1791 if (!is_gimple_assign (def_stmt)
1792 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1793 || gimple_assign_rhs1 (def_stmt) != last.len
1794 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1795 return;
1797 else
1798 return;
1800 gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1801 update_stmt (last.stmt);
1804 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1805 call, or when BOUND is non-null, of a strnlen() call, set LHS
1806 range info to [0, min (MAX, BOUND)] when the range includes more
1807 than one value and return LHS. Otherwise, when the range
1808 [MIN, MAX] is such that MIN == MAX, return the tree representation
1809 of (MIN). The latter allows callers to fold suitable strnlen() calls
1810 to constants. */
1812 tree
1813 set_strlen_range (tree lhs, wide_int min, wide_int max,
1814 tree bound /* = NULL_TREE */)
1816 if (TREE_CODE (lhs) != SSA_NAME
1817 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1818 return NULL_TREE;
1820 if (bound)
1822 /* For strnlen, adjust MIN and MAX as necessary. If the bound
1823 is less than the size of the array set MAX to it. It it's
1824 greater than MAX and MAX is non-zero bump MAX down to account
1825 for the necessary terminating nul. Otherwise leave it alone. */
1826 if (TREE_CODE (bound) == INTEGER_CST)
1828 wide_int wibnd = wi::to_wide (bound);
1829 int cmp = wi::cmpu (wibnd, max);
1830 if (cmp < 0)
1831 max = wibnd;
1832 else if (cmp && wi::ne_p (max, min))
1833 --max;
1835 else if (TREE_CODE (bound) == SSA_NAME)
1837 value_range r;
1838 get_range_query (cfun)->range_of_expr (r, bound);
1839 if (!r.undefined_p ())
1841 /* For a bound in a known range, adjust the range determined
1842 above as necessary. For a bound in some anti-range or
1843 in an unknown range, use the range determined by callers. */
1844 if (wi::ltu_p (r.lower_bound (), min))
1845 min = r.lower_bound ();
1846 if (wi::ltu_p (r.upper_bound (), max))
1847 max = r.upper_bound ();
1852 if (min == max)
1853 return wide_int_to_tree (size_type_node, min);
1855 set_range_info (lhs, VR_RANGE, min, max);
1856 return lhs;
1859 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1860 SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1861 a character array A[N] with unknown length bounded by N, and for
1862 strnlen(), by min (N, BOUND). */
1864 static tree
1865 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1867 if (TREE_CODE (lhs) != SSA_NAME
1868 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1869 return NULL_TREE;
1871 if (TREE_CODE (src) == SSA_NAME)
1873 gimple *def = SSA_NAME_DEF_STMT (src);
1874 if (is_gimple_assign (def)
1875 && gimple_assign_rhs_code (def) == ADDR_EXPR)
1876 src = gimple_assign_rhs1 (def);
1879 /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1880 NUL so that the difference between a pointer to just past it and
1881 one to its beginning is positive. */
1882 wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1884 if (TREE_CODE (src) == ADDR_EXPR)
1886 /* The last array member of a struct can be bigger than its size
1887 suggests if it's treated as a poor-man's flexible array member. */
1888 src = TREE_OPERAND (src, 0);
1889 if (TREE_CODE (src) != MEM_REF
1890 && !array_at_struct_end_p (src))
1892 tree type = TREE_TYPE (src);
1893 tree size = TYPE_SIZE_UNIT (type);
1894 if (size
1895 && TREE_CODE (size) == INTEGER_CST
1896 && !integer_zerop (size))
1898 /* Even though such uses of strlen would be undefined,
1899 avoid relying on arrays of arrays in case some genius
1900 decides to call strlen on an unterminated array element
1901 that's followed by a terminated one. Likewise, avoid
1902 assuming that a struct array member is necessarily
1903 nul-terminated (the nul may be in the member that
1904 follows). In those cases, assume that the length
1905 of the string stored in such an array is bounded
1906 by the size of the enclosing object if one can be
1907 determined. */
1908 tree base = get_base_address (src);
1909 if (VAR_P (base))
1911 if (tree size = DECL_SIZE_UNIT (base))
1912 if (size
1913 && TREE_CODE (size) == INTEGER_CST
1914 && TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
1915 max = wi::to_wide (size);
1919 /* For strlen() the upper bound above is equal to
1920 the longest string that can be stored in the array
1921 (i.e., it accounts for the terminating nul. For
1922 strnlen() bump up the maximum by one since the array
1923 need not be nul-terminated. */
1924 if (!bound && max != 0)
1925 --max;
1929 wide_int min = wi::zero (max.get_precision ());
1930 return set_strlen_range (lhs, min, max, bound);
1933 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
1934 either into a region allocated for the object SI when non-null,
1935 or into an object designated by the LHS of STMT otherwise.
1936 For a call STMT, when CALL_LHS is set use its left hand side
1937 as the destination, otherwise use argument zero.
1938 When nonnull uses RVALS to determine range information.
1939 RAWMEM may be set by memcpy and other raw memory functions
1940 to allow accesses across subobject boundaries. */
1942 static void
1943 maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
1944 pointer_query &ptr_qry,
1945 strinfo *si = NULL, bool plus_one = false,
1946 bool rawmem = false)
1948 if (!len || warning_suppressed_p (stmt, OPT_Wstringop_overflow_))
1949 return;
1951 /* The DECL of the function performing the write if it is done
1952 by one. */
1953 tree writefn = NULL_TREE;
1954 /* The destination expression involved in the store or call STMT. */
1955 tree dest = NULL_TREE;
1957 if (is_gimple_assign (stmt))
1958 dest = gimple_assign_lhs (stmt);
1959 else if (is_gimple_call (stmt))
1961 if (call_lhs)
1962 dest = gimple_call_lhs (stmt);
1963 else
1965 gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL));
1966 dest = gimple_call_arg (stmt, 0);
1969 if (!dest)
1970 return;
1971 writefn = gimple_call_fndecl (stmt);
1973 else
1974 return;
1976 if (warning_suppressed_p (dest, OPT_Wstringop_overflow_))
1977 return;
1979 const int ostype = rawmem ? 0 : 1;
1981 /* Use maximum precision to avoid overflow in the addition below.
1982 Make sure all operands have the same precision to keep wide_int
1983 from ICE'ing. */
1985 access_ref aref;
1986 /* The size of the destination region (which is smaller than
1987 the destination object for stores at a non-zero offset). */
1988 tree destsize = compute_objsize (dest, ostype, &aref, &ptr_qry);
1990 if (!destsize)
1992 aref.sizrng[0] = 0;
1993 aref.sizrng[1] = wi::to_offset (max_object_size ());
1996 /* Return early if the DESTSIZE size expression is the same as LEN
1997 and the offset into the destination is zero. This might happen
1998 in the case of a pair of malloc and memset calls to allocate
1999 an object and clear it as if by calloc. */
2000 if (destsize == len && !plus_one
2001 && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
2002 return;
2004 wide_int rng[2];
2005 if (!get_range (len, stmt, rng, ptr_qry.rvals))
2006 return;
2008 widest_int lenrng[2] =
2009 { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2011 if (plus_one)
2013 lenrng[0] += 1;
2014 lenrng[1] += 1;
2017 /* The size of the remaining space in the destination computed
2018 as the size of the latter minus the offset into it. */
2019 widest_int spcrng[2];
2021 offset_int remrng[2];
2022 remrng[1] = aref.size_remaining (remrng);
2023 spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (remrng[0], UNSIGNED);
2024 spcrng[1] = widest_int::from (remrng[1], UNSIGNED);
2027 if (wi::leu_p (lenrng[0], spcrng[0])
2028 && wi::leu_p (lenrng[1], spcrng[1]))
2029 return;
2031 location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2032 bool warned = false;
2033 if (wi::leu_p (lenrng[0], spcrng[1]))
2035 if (len != destsize
2036 && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2037 return;
2039 warned = (writefn
2040 ? warning_at (loc, OPT_Wstringop_overflow_,
2041 "%qD writing one too many bytes into a region "
2042 "of a size that depends on %<strlen%>",
2043 writefn)
2044 : warning_at (loc, OPT_Wstringop_overflow_,
2045 "writing one too many bytes into a region "
2046 "of a size that depends on %<strlen%>"));
2048 else if (lenrng[0] == lenrng[1])
2050 if (spcrng[0] == spcrng[1])
2051 warned = (writefn
2052 ? warning_n (loc, OPT_Wstringop_overflow_,
2053 lenrng[0].to_uhwi (),
2054 "%qD writing %wu byte into a region "
2055 "of size %wu",
2056 "%qD writing %wu bytes into a region "
2057 "of size %wu",
2058 writefn, lenrng[0].to_uhwi (),
2059 spcrng[0].to_uhwi ())
2060 : warning_n (loc, OPT_Wstringop_overflow_,
2061 lenrng[0].to_uhwi (),
2062 "writing %wu byte into a region "
2063 "of size %wu",
2064 "writing %wu bytes into a region "
2065 "of size %wu",
2066 lenrng[0].to_uhwi (),
2067 spcrng[0].to_uhwi ()));
2068 else
2069 warned = (writefn
2070 ? warning_n (loc, OPT_Wstringop_overflow_,
2071 lenrng[0].to_uhwi (),
2072 "%qD writing %wu byte into a region "
2073 "of size between %wu and %wu",
2074 "%qD writing %wu bytes into a region "
2075 "of size between %wu and %wu",
2076 writefn, lenrng[0].to_uhwi (),
2077 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2078 : warning_n (loc, OPT_Wstringop_overflow_,
2079 lenrng[0].to_uhwi (),
2080 "writing %wu byte into a region "
2081 "of size between %wu and %wu",
2082 "writing %wu bytes into a region "
2083 "of size between %wu and %wu",
2084 lenrng[0].to_uhwi (),
2085 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2087 else if (spcrng[0] == spcrng[1])
2088 warned = (writefn
2089 ? warning_at (loc, OPT_Wstringop_overflow_,
2090 "%qD writing between %wu and %wu bytes "
2091 "into a region of size %wu",
2092 writefn, lenrng[0].to_uhwi (),
2093 lenrng[1].to_uhwi (),
2094 spcrng[0].to_uhwi ())
2095 : warning_at (loc, OPT_Wstringop_overflow_,
2096 "writing between %wu and %wu bytes "
2097 "into a region of size %wu",
2098 lenrng[0].to_uhwi (),
2099 lenrng[1].to_uhwi (),
2100 spcrng[0].to_uhwi ()));
2101 else
2102 warned = (writefn
2103 ? warning_at (loc, OPT_Wstringop_overflow_,
2104 "%qD writing between %wu and %wu bytes "
2105 "into a region of size between %wu and %wu",
2106 writefn, lenrng[0].to_uhwi (),
2107 lenrng[1].to_uhwi (),
2108 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2109 : warning_at (loc, OPT_Wstringop_overflow_,
2110 "writing between %wu and %wu bytes "
2111 "into a region of size between %wu and %wu",
2112 lenrng[0].to_uhwi (),
2113 lenrng[1].to_uhwi (),
2114 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2116 if (!warned)
2117 return;
2119 suppress_warning (stmt, OPT_Wstringop_overflow_);
2121 aref.inform_access (access_write_only);
2124 /* Convenience wrapper for the above. */
2126 static inline void
2127 maybe_warn_overflow (gimple *stmt, bool call_lhs, unsigned HOST_WIDE_INT len,
2128 pointer_query &ptr_qry, strinfo *si = NULL,
2129 bool plus_one = false, bool rawmem = false)
2131 tree tlen = build_int_cst (size_type_node, len);
2132 maybe_warn_overflow (stmt, call_lhs, tlen, ptr_qry, si, plus_one, rawmem);
2135 /* Handle a strlen call. If strlen of the argument is known, replace
2136 the strlen call with the known value, otherwise remember that strlen
2137 of the argument is stored in the lhs SSA_NAME. */
2139 static void
2140 handle_builtin_strlen (gimple_stmt_iterator *gsi)
2142 gimple *stmt = gsi_stmt (*gsi);
2143 tree lhs = gimple_call_lhs (stmt);
2145 if (lhs == NULL_TREE)
2146 return;
2148 location_t loc = gimple_location (stmt);
2149 tree callee = gimple_call_fndecl (stmt);
2150 tree src = gimple_call_arg (stmt, 0);
2151 tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2152 ? gimple_call_arg (stmt, 1) : NULL_TREE);
2153 int idx = get_stridx (src);
2154 if (idx || (bound && integer_zerop (bound)))
2156 strinfo *si = NULL;
2157 tree rhs;
2159 if (idx < 0)
2160 rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2161 else if (idx == 0)
2162 rhs = bound;
2163 else
2165 rhs = NULL_TREE;
2166 si = get_strinfo (idx);
2167 if (si != NULL)
2169 rhs = get_string_length (si);
2170 /* For strnlen, if bound is constant, even if si is not known
2171 to be zero terminated, if we know at least bound bytes are
2172 not zero, the return value will be bound. */
2173 if (rhs == NULL_TREE
2174 && bound != NULL_TREE
2175 && TREE_CODE (bound) == INTEGER_CST
2176 && si->nonzero_chars != NULL_TREE
2177 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2178 && tree_int_cst_le (bound, si->nonzero_chars))
2179 rhs = bound;
2182 if (rhs != NULL_TREE)
2184 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2186 fprintf (dump_file, "Optimizing: ");
2187 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2189 rhs = unshare_expr (rhs);
2190 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2191 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2193 if (bound)
2194 rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2196 gimplify_and_update_call_from_tree (gsi, rhs);
2197 stmt = gsi_stmt (*gsi);
2198 update_stmt (stmt);
2199 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2201 fprintf (dump_file, "into: ");
2202 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2205 if (si != NULL
2206 /* Don't update anything for strnlen. */
2207 && bound == NULL_TREE
2208 && TREE_CODE (si->nonzero_chars) != SSA_NAME
2209 && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2210 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2212 si = unshare_strinfo (si);
2213 si->nonzero_chars = lhs;
2214 gcc_assert (si->full_string_p);
2217 if (strlen_to_stridx
2218 && (bound == NULL_TREE
2219 /* For strnlen record this only if the call is proven
2220 to return the same value as strlen would. */
2221 || (TREE_CODE (bound) == INTEGER_CST
2222 && TREE_CODE (rhs) == INTEGER_CST
2223 && tree_int_cst_lt (rhs, bound))))
2224 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2226 return;
2229 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2230 return;
2232 if (idx == 0)
2233 idx = new_stridx (src);
2234 else
2236 strinfo *si = get_strinfo (idx);
2237 if (si != NULL)
2239 if (!si->full_string_p && !si->stmt)
2241 /* Until now we only had a lower bound on the string length.
2242 Install LHS as the actual length. */
2243 si = unshare_strinfo (si);
2244 tree old = si->nonzero_chars;
2245 si->nonzero_chars = lhs;
2246 si->full_string_p = true;
2247 if (old && TREE_CODE (old) == INTEGER_CST)
2249 old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2250 tree adj = fold_build2_loc (loc, MINUS_EXPR,
2251 TREE_TYPE (lhs), lhs, old);
2252 adjust_related_strinfos (loc, si, adj);
2253 /* Use the constant minimum length as the lower bound
2254 of the non-constant length. */
2255 wide_int min = wi::to_wide (old);
2256 wide_int max
2257 = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2258 set_strlen_range (lhs, min, max);
2260 else
2262 si->first = 0;
2263 si->prev = 0;
2264 si->next = 0;
2267 return;
2270 if (idx)
2272 if (!bound)
2274 /* Only store the new length information for calls to strlen(),
2275 not for those to strnlen(). */
2276 strinfo *si = new_strinfo (src, idx, lhs, true);
2277 set_strinfo (idx, si);
2278 find_equal_ptrs (src, idx);
2281 /* For SRC that is an array of N elements, set LHS's range
2282 to [0, min (N, BOUND)]. A constant return value means
2283 the range would have consisted of a single value. In
2284 that case, fold the result into the returned constant. */
2285 if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2286 if (TREE_CODE (ret) == INTEGER_CST)
2288 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2290 fprintf (dump_file, "Optimizing: ");
2291 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2293 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2294 ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2295 gimplify_and_update_call_from_tree (gsi, ret);
2296 stmt = gsi_stmt (*gsi);
2297 update_stmt (stmt);
2298 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2300 fprintf (dump_file, "into: ");
2301 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2305 if (strlen_to_stridx && !bound)
2306 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2310 /* Handle a strchr call. If strlen of the first argument is known, replace
2311 the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2312 that lhs of the call is endptr and strlen of the argument is endptr - x. */
2314 static void
2315 handle_builtin_strchr (gimple_stmt_iterator *gsi)
2317 gimple *stmt = gsi_stmt (*gsi);
2318 tree lhs = gimple_call_lhs (stmt);
2320 if (lhs == NULL_TREE)
2321 return;
2323 if (!integer_zerop (gimple_call_arg (stmt, 1)))
2324 return;
2326 tree src = gimple_call_arg (stmt, 0);
2328 /* Avoid folding if the first argument is not a nul-terminated array.
2329 Defer warning until later. */
2330 if (!check_nul_terminated_array (NULL_TREE, src))
2331 return;
2333 int idx = get_stridx (src);
2334 if (idx)
2336 strinfo *si = NULL;
2337 tree rhs;
2339 if (idx < 0)
2340 rhs = build_int_cst (size_type_node, ~idx);
2341 else
2343 rhs = NULL_TREE;
2344 si = get_strinfo (idx);
2345 if (si != NULL)
2346 rhs = get_string_length (si);
2348 if (rhs != NULL_TREE)
2350 location_t loc = gimple_location (stmt);
2352 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2354 fprintf (dump_file, "Optimizing: ");
2355 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2357 if (si != NULL && si->endptr != NULL_TREE)
2359 rhs = unshare_expr (si->endptr);
2360 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2361 TREE_TYPE (rhs)))
2362 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2364 else
2366 rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2367 rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2368 TREE_TYPE (src), src, rhs);
2369 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2370 TREE_TYPE (rhs)))
2371 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2373 gimplify_and_update_call_from_tree (gsi, rhs);
2374 stmt = gsi_stmt (*gsi);
2375 update_stmt (stmt);
2376 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2378 fprintf (dump_file, "into: ");
2379 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2381 if (si != NULL
2382 && si->endptr == NULL_TREE
2383 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2385 si = unshare_strinfo (si);
2386 si->endptr = lhs;
2388 zero_length_string (lhs, si);
2389 return;
2392 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2393 return;
2394 if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2396 if (idx == 0)
2397 idx = new_stridx (src);
2398 else if (get_strinfo (idx) != NULL)
2400 zero_length_string (lhs, NULL);
2401 return;
2403 if (idx)
2405 location_t loc = gimple_location (stmt);
2406 tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2407 tree srcu = fold_convert_loc (loc, size_type_node, src);
2408 tree length = fold_build2_loc (loc, MINUS_EXPR,
2409 size_type_node, lhsu, srcu);
2410 strinfo *si = new_strinfo (src, idx, length, true);
2411 si->endptr = lhs;
2412 set_strinfo (idx, si);
2413 find_equal_ptrs (src, idx);
2414 zero_length_string (lhs, si);
2417 else
2418 zero_length_string (lhs, NULL);
2421 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2422 If strlen of the second argument is known, strlen of the first argument
2423 is the same after this call. Furthermore, attempt to convert it to
2424 memcpy. Uses RVALS to determine range information. */
2426 static void
2427 handle_builtin_strcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi,
2428 pointer_query &ptr_qry)
2430 int idx, didx;
2431 tree src, dst, srclen, len, lhs, type, fn, oldlen;
2432 bool success;
2433 gimple *stmt = gsi_stmt (*gsi);
2434 strinfo *si, *dsi, *olddsi, *zsi;
2435 location_t loc;
2437 src = gimple_call_arg (stmt, 1);
2438 dst = gimple_call_arg (stmt, 0);
2439 lhs = gimple_call_lhs (stmt);
2440 idx = get_stridx (src);
2441 si = NULL;
2442 if (idx > 0)
2443 si = get_strinfo (idx);
2445 didx = get_stridx (dst);
2446 olddsi = NULL;
2447 oldlen = NULL_TREE;
2448 if (didx > 0)
2449 olddsi = get_strinfo (didx);
2450 else if (didx < 0)
2451 return;
2453 if (olddsi != NULL)
2454 adjust_last_stmt (olddsi, stmt, false, ptr_qry);
2456 srclen = NULL_TREE;
2457 if (si != NULL)
2458 srclen = get_string_length (si);
2459 else if (idx < 0)
2460 srclen = build_int_cst (size_type_node, ~idx);
2462 maybe_warn_overflow (stmt, false, srclen, ptr_qry, olddsi, true);
2464 if (olddsi != NULL)
2465 adjust_last_stmt (olddsi, stmt, false, ptr_qry);
2467 loc = gimple_location (stmt);
2468 if (srclen == NULL_TREE)
2469 switch (bcode)
2471 case BUILT_IN_STRCPY:
2472 case BUILT_IN_STRCPY_CHK:
2473 if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2474 return;
2475 break;
2476 case BUILT_IN_STPCPY:
2477 case BUILT_IN_STPCPY_CHK:
2478 if (lhs == NULL_TREE)
2479 return;
2480 else
2482 tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2483 srclen = fold_convert_loc (loc, size_type_node, dst);
2484 srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2485 lhsuint, srclen);
2487 break;
2488 default:
2489 gcc_unreachable ();
2492 if (didx == 0)
2494 didx = new_stridx (dst);
2495 if (didx == 0)
2496 return;
2498 if (olddsi != NULL)
2500 oldlen = olddsi->nonzero_chars;
2501 dsi = unshare_strinfo (olddsi);
2502 dsi->nonzero_chars = srclen;
2503 dsi->full_string_p = (srclen != NULL_TREE);
2504 /* Break the chain, so adjust_related_strinfo on later pointers in
2505 the chain won't adjust this one anymore. */
2506 dsi->next = 0;
2507 dsi->stmt = NULL;
2508 dsi->endptr = NULL_TREE;
2510 else
2512 dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2513 set_strinfo (didx, dsi);
2514 find_equal_ptrs (dst, didx);
2516 dsi->writable = true;
2517 dsi->dont_invalidate = true;
2519 if (dsi->nonzero_chars == NULL_TREE)
2521 strinfo *chainsi;
2523 /* If string length of src is unknown, use delayed length
2524 computation. If string length of dst will be needed, it
2525 can be computed by transforming this strcpy call into
2526 stpcpy and subtracting dst from the return value. */
2528 /* Look for earlier strings whose length could be determined if
2529 this strcpy is turned into an stpcpy. */
2531 if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2533 for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2535 /* When setting a stmt for delayed length computation
2536 prevent all strinfos through dsi from being
2537 invalidated. */
2538 chainsi = unshare_strinfo (chainsi);
2539 chainsi->stmt = stmt;
2540 chainsi->nonzero_chars = NULL_TREE;
2541 chainsi->full_string_p = false;
2542 chainsi->endptr = NULL_TREE;
2543 chainsi->dont_invalidate = true;
2546 dsi->stmt = stmt;
2548 /* Try to detect overlap before returning. This catches cases
2549 like strcpy (d, d + n) where n is non-constant whose range
2550 is such that (n <= strlen (d) holds).
2552 OLDDSI->NONZERO_chars may have been reset by this point with
2553 oldlen holding it original value. */
2554 if (olddsi && oldlen)
2556 /* Add 1 for the terminating NUL. */
2557 tree type = TREE_TYPE (oldlen);
2558 oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2559 build_int_cst (type, 1));
2560 check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2563 return;
2566 if (olddsi != NULL)
2568 tree adj = NULL_TREE;
2569 if (oldlen == NULL_TREE)
2571 else if (integer_zerop (oldlen))
2572 adj = srclen;
2573 else if (TREE_CODE (oldlen) == INTEGER_CST
2574 || TREE_CODE (srclen) == INTEGER_CST)
2575 adj = fold_build2_loc (loc, MINUS_EXPR,
2576 TREE_TYPE (srclen), srclen,
2577 fold_convert_loc (loc, TREE_TYPE (srclen),
2578 oldlen));
2579 if (adj != NULL_TREE)
2580 adjust_related_strinfos (loc, dsi, adj);
2581 else
2582 dsi->prev = 0;
2584 /* strcpy src may not overlap dst, so src doesn't need to be
2585 invalidated either. */
2586 if (si != NULL)
2587 si->dont_invalidate = true;
2589 fn = NULL_TREE;
2590 zsi = NULL;
2591 switch (bcode)
2593 case BUILT_IN_STRCPY:
2594 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2595 if (lhs)
2596 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2597 break;
2598 case BUILT_IN_STRCPY_CHK:
2599 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2600 if (lhs)
2601 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2602 break;
2603 case BUILT_IN_STPCPY:
2604 /* This would need adjustment of the lhs (subtract one),
2605 or detection that the trailing '\0' doesn't need to be
2606 written, if it will be immediately overwritten.
2607 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY); */
2608 if (lhs)
2610 dsi->endptr = lhs;
2611 zsi = zero_length_string (lhs, dsi);
2613 break;
2614 case BUILT_IN_STPCPY_CHK:
2615 /* This would need adjustment of the lhs (subtract one),
2616 or detection that the trailing '\0' doesn't need to be
2617 written, if it will be immediately overwritten.
2618 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK); */
2619 if (lhs)
2621 dsi->endptr = lhs;
2622 zsi = zero_length_string (lhs, dsi);
2624 break;
2625 default:
2626 gcc_unreachable ();
2628 if (zsi != NULL)
2629 zsi->dont_invalidate = true;
2631 if (fn)
2633 tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2634 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2636 else
2637 type = size_type_node;
2639 len = fold_convert_loc (loc, type, unshare_expr (srclen));
2640 len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2642 /* Disable warning for the transformed statement? */
2643 opt_code no_warning_opt = no_warning;
2645 if (const strinfo *chksi = si ? olddsi ? olddsi : dsi : NULL)
2647 no_warning_opt = check_bounds_or_overlap (stmt, chksi->ptr, si->ptr,
2648 NULL_TREE, len);
2649 if (no_warning_opt)
2650 suppress_warning (stmt, no_warning_opt);
2653 if (fn == NULL_TREE)
2654 return;
2656 len = force_gimple_operand_gsi (gsi, len, true, NULL_TREE, true,
2657 GSI_SAME_STMT);
2658 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2660 fprintf (dump_file, "Optimizing: ");
2661 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2663 if (gimple_call_num_args (stmt) == 2)
2664 success = update_gimple_call (gsi, fn, 3, dst, src, len);
2665 else
2666 success = update_gimple_call (gsi, fn, 4, dst, src, len,
2667 gimple_call_arg (stmt, 2));
2668 if (success)
2670 stmt = gsi_stmt (*gsi);
2671 update_stmt (stmt);
2672 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2674 fprintf (dump_file, "into: ");
2675 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2677 /* Allow adjust_last_stmt to decrease this memcpy's size. */
2678 laststmt.stmt = stmt;
2679 laststmt.len = srclen;
2680 laststmt.stridx = dsi->idx;
2682 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2683 fprintf (dump_file, "not possible.\n");
2685 if (no_warning_opt)
2686 suppress_warning (stmt, no_warning_opt);
2689 /* Check the size argument to the built-in forms of stpncpy and strncpy
2690 for out-of-bounds offsets or overlapping access, and to see if the
2691 size argument is derived from a call to strlen() on the source argument,
2692 and if so, issue an appropriate warning. */
2694 static void
2695 handle_builtin_strncat (built_in_function, gimple_stmt_iterator *gsi)
2697 /* Same as stxncpy(). */
2698 handle_builtin_stxncpy_strncat (true, gsi);
2701 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2702 way. LEN can either be an integer expression, or a pointer (to char).
2703 When it is the latter (such as in recursive calls to self) it is
2704 assumed to be the argument in some call to strlen() whose relationship
2705 to SRC is being ascertained. */
2707 bool
2708 is_strlen_related_p (tree src, tree len)
2710 if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2711 && operand_equal_p (src, len, 0))
2712 return true;
2714 if (TREE_CODE (len) != SSA_NAME)
2715 return false;
2717 if (TREE_CODE (src) == SSA_NAME)
2719 gimple *srcdef = SSA_NAME_DEF_STMT (src);
2720 if (is_gimple_assign (srcdef))
2722 /* Handle bitwise AND used in conversions from wider size_t
2723 to narrower unsigned types. */
2724 tree_code code = gimple_assign_rhs_code (srcdef);
2725 if (code == BIT_AND_EXPR
2726 || code == NOP_EXPR)
2727 return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2729 return false;
2732 if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2734 /* If SRC is the result of a call to an allocation function
2735 or strlen, use the function's argument instead. */
2736 tree func = gimple_call_fndecl (srcdef);
2737 built_in_function code = DECL_FUNCTION_CODE (func);
2738 if (code == BUILT_IN_ALLOCA
2739 || code == BUILT_IN_ALLOCA_WITH_ALIGN
2740 || code == BUILT_IN_MALLOC
2741 || code == BUILT_IN_STRLEN)
2742 return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2744 /* FIXME: Handle other functions with attribute alloc_size. */
2745 return false;
2749 gimple *lendef = SSA_NAME_DEF_STMT (len);
2750 if (!lendef)
2751 return false;
2753 if (is_gimple_call (lendef))
2755 tree func = gimple_call_fndecl (lendef);
2756 if (!valid_builtin_call (lendef)
2757 || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2758 return false;
2760 tree arg = gimple_call_arg (lendef, 0);
2761 return is_strlen_related_p (src, arg);
2764 if (!is_gimple_assign (lendef))
2765 return false;
2767 tree_code code = gimple_assign_rhs_code (lendef);
2768 tree rhs1 = gimple_assign_rhs1 (lendef);
2769 tree rhstype = TREE_TYPE (rhs1);
2771 if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2772 || (INTEGRAL_TYPE_P (rhstype)
2773 && (code == BIT_AND_EXPR
2774 || code == NOP_EXPR)))
2776 /* Pointer plus (an integer), and truncation are considered among
2777 the (potentially) related expressions to strlen. */
2778 return is_strlen_related_p (src, rhs1);
2781 if (tree rhs2 = gimple_assign_rhs2 (lendef))
2783 /* Integer subtraction is considered strlen-related when both
2784 arguments are integers and second one is strlen-related. */
2785 rhstype = TREE_TYPE (rhs2);
2786 if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2787 return is_strlen_related_p (src, rhs2);
2790 return false;
2793 /* Called by handle_builtin_stxncpy_strncat and by
2794 gimple_fold_builtin_strncpy in gimple-fold.c.
2795 Check to see if the specified bound is a) equal to the size of
2796 the destination DST and if so, b) if it's immediately followed by
2797 DST[CNT - 1] = '\0'. If a) holds and b) does not, warn. Otherwise,
2798 do nothing. Return true if diagnostic has been issued.
2800 The purpose is to diagnose calls to strncpy and stpncpy that do
2801 not nul-terminate the copy while allowing for the idiom where
2802 such a call is immediately followed by setting the last element
2803 to nul, as in:
2804 char a[32];
2805 strncpy (a, s, sizeof a);
2806 a[sizeof a - 1] = '\0';
2809 bool
2810 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2811 pointer_query *ptr_qry /* = NULL */)
2813 gimple *stmt = gsi_stmt (gsi);
2814 if (warning_suppressed_p (stmt, OPT_Wstringop_truncation))
2815 return false;
2817 wide_int cntrange[2];
2818 value_range r;
2819 if (!get_range_query (cfun)->range_of_expr (r, cnt)
2820 || r.varying_p ()
2821 || r.undefined_p ())
2822 return false;
2824 cntrange[0] = wi::to_wide (r.min ());
2825 cntrange[1] = wi::to_wide (r.max ());
2826 if (r.kind () == VR_ANTI_RANGE)
2828 wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2830 if (wi::ltu_p (cntrange[1], maxobjsize))
2832 cntrange[0] = cntrange[1] + 1;
2833 cntrange[1] = maxobjsize;
2835 else
2837 cntrange[1] = cntrange[0] - 1;
2838 cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2842 /* Negative value is the constant string length. If it's less than
2843 the lower bound there is no truncation. Avoid calling get_stridx()
2844 when ssa_ver_to_stridx is empty. That implies the caller isn't
2845 running under the control of this pass and ssa_ver_to_stridx hasn't
2846 been created yet. */
2847 int sidx = ssa_ver_to_stridx.length () ? get_stridx (src) : 0;
2848 if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
2849 return false;
2851 tree dst = gimple_call_arg (stmt, 0);
2852 tree dstdecl = dst;
2853 if (TREE_CODE (dstdecl) == ADDR_EXPR)
2854 dstdecl = TREE_OPERAND (dstdecl, 0);
2856 tree ref = NULL_TREE;
2858 if (!sidx)
2860 /* If the source is a non-string return early to avoid warning
2861 for possible truncation (if the truncation is certain SIDX
2862 is non-zero). */
2863 tree srcdecl = gimple_call_arg (stmt, 1);
2864 if (TREE_CODE (srcdecl) == ADDR_EXPR)
2865 srcdecl = TREE_OPERAND (srcdecl, 0);
2866 if (get_attr_nonstring_decl (srcdecl, &ref))
2867 return false;
2870 /* Likewise, if the destination refers to an array/pointer declared
2871 nonstring return early. */
2872 if (get_attr_nonstring_decl (dstdecl, &ref))
2873 return false;
2875 /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2876 avoid the truncation warning. */
2877 gsi_next_nondebug (&gsi);
2878 gimple *next_stmt = gsi_stmt (gsi);
2879 if (!next_stmt)
2881 /* When there is no statement in the same basic block check
2882 the immediate successor block. */
2883 if (basic_block bb = gimple_bb (stmt))
2885 if (single_succ_p (bb))
2887 /* For simplicity, ignore blocks with multiple outgoing
2888 edges for now and only consider successor blocks along
2889 normal edges. */
2890 edge e = EDGE_SUCC (bb, 0);
2891 if (!(e->flags & EDGE_ABNORMAL))
2893 gsi = gsi_start_bb (e->dest);
2894 next_stmt = gsi_stmt (gsi);
2895 if (next_stmt && is_gimple_debug (next_stmt))
2897 gsi_next_nondebug (&gsi);
2898 next_stmt = gsi_stmt (gsi);
2905 if (next_stmt && is_gimple_assign (next_stmt))
2907 tree lhs = gimple_assign_lhs (next_stmt);
2908 tree_code code = TREE_CODE (lhs);
2909 if (code == ARRAY_REF || code == MEM_REF)
2910 lhs = TREE_OPERAND (lhs, 0);
2912 tree func = gimple_call_fndecl (stmt);
2913 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
2915 tree ret = gimple_call_lhs (stmt);
2916 if (ret && operand_equal_p (ret, lhs, 0))
2917 return false;
2920 /* Determine the base address and offset of the reference,
2921 ignoring the innermost array index. */
2922 if (TREE_CODE (ref) == ARRAY_REF)
2923 ref = TREE_OPERAND (ref, 0);
2925 poly_int64 dstoff;
2926 tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
2928 poly_int64 lhsoff;
2929 tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
2930 if (lhsbase
2931 && dstbase
2932 && known_eq (dstoff, lhsoff)
2933 && operand_equal_p (dstbase, lhsbase, 0))
2934 return false;
2937 int prec = TYPE_PRECISION (TREE_TYPE (cnt));
2938 wide_int lenrange[2];
2939 if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
2941 lenrange[0] = (sisrc->nonzero_chars
2942 && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
2943 ? wi::to_wide (sisrc->nonzero_chars)
2944 : wi::zero (prec));
2945 lenrange[1] = lenrange[0];
2947 else if (sidx < 0)
2948 lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
2949 else
2951 c_strlen_data lendata = { };
2952 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
2953 to have it set to the length of the longest string in a PHI. */
2954 lendata.maxbound = src;
2955 get_range_strlen (src, &lendata, /* eltsize = */1);
2956 if (TREE_CODE (lendata.minlen) == INTEGER_CST
2957 && TREE_CODE (lendata.maxbound) == INTEGER_CST)
2959 /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
2960 which stores the length of the shortest known string. */
2961 if (integer_all_onesp (lendata.maxlen))
2962 lenrange[0] = wi::shwi (0, prec);
2963 else
2964 lenrange[0] = wi::to_wide (lendata.minlen, prec);
2965 lenrange[1] = wi::to_wide (lendata.maxbound, prec);
2967 else
2969 lenrange[0] = wi::shwi (0, prec);
2970 lenrange[1] = wi::shwi (-1, prec);
2974 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
2975 tree func = gimple_call_fndecl (stmt);
2977 if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
2979 /* If the longest source string is shorter than the lower bound
2980 of the specified count the copy is definitely nul-terminated. */
2981 if (wi::ltu_p (lenrange[1], cntrange[0]))
2982 return false;
2984 if (wi::neg_p (lenrange[1]))
2986 /* The length of one of the strings is unknown but at least
2987 one has non-zero length and that length is stored in
2988 LENRANGE[1]. Swap the bounds to force a "may be truncated"
2989 warning below. */
2990 lenrange[1] = lenrange[0];
2991 lenrange[0] = wi::shwi (0, prec);
2994 /* Set to true for strncat whose bound is derived from the length
2995 of the destination (the expected usage pattern). */
2996 bool cat_dstlen_bounded = false;
2997 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
2998 cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3000 if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3001 return warning_n (callloc, OPT_Wstringop_truncation,
3002 cntrange[0].to_uhwi (),
3003 "%qD output truncated before terminating "
3004 "nul copying %E byte from a string of the "
3005 "same length",
3006 "%qD output truncated before terminating nul "
3007 "copying %E bytes from a string of the same "
3008 "length",
3009 func, cnt);
3010 else if (!cat_dstlen_bounded)
3012 if (wi::geu_p (lenrange[0], cntrange[1]))
3014 /* The shortest string is longer than the upper bound of
3015 the count so the truncation is certain. */
3016 if (cntrange[0] == cntrange[1])
3017 return warning_n (callloc, OPT_Wstringop_truncation,
3018 cntrange[0].to_uhwi (),
3019 "%qD output truncated copying %E byte "
3020 "from a string of length %wu",
3021 "%qD output truncated copying %E bytes "
3022 "from a string of length %wu",
3023 func, cnt, lenrange[0].to_uhwi ());
3025 return warning_at (callloc, OPT_Wstringop_truncation,
3026 "%qD output truncated copying between %wu "
3027 "and %wu bytes from a string of length %wu",
3028 func, cntrange[0].to_uhwi (),
3029 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3031 else if (wi::geu_p (lenrange[1], cntrange[1]))
3033 /* The longest string is longer than the upper bound of
3034 the count so the truncation is possible. */
3035 if (cntrange[0] == cntrange[1])
3036 return warning_n (callloc, OPT_Wstringop_truncation,
3037 cntrange[0].to_uhwi (),
3038 "%qD output may be truncated copying %E "
3039 "byte from a string of length %wu",
3040 "%qD output may be truncated copying %E "
3041 "bytes from a string of length %wu",
3042 func, cnt, lenrange[1].to_uhwi ());
3044 return warning_at (callloc, OPT_Wstringop_truncation,
3045 "%qD output may be truncated copying between "
3046 "%wu and %wu bytes from a string of length %wu",
3047 func, cntrange[0].to_uhwi (),
3048 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3052 if (!cat_dstlen_bounded
3053 && cntrange[0] != cntrange[1]
3054 && wi::leu_p (cntrange[0], lenrange[0])
3055 && wi::leu_p (cntrange[1], lenrange[0] + 1))
3057 /* If the source (including the terminating nul) is longer than
3058 the lower bound of the specified count but shorter than the
3059 upper bound the copy may (but need not) be truncated. */
3060 return warning_at (callloc, OPT_Wstringop_truncation,
3061 "%qD output may be truncated copying between "
3062 "%wu and %wu bytes from a string of length %wu",
3063 func, cntrange[0].to_uhwi (),
3064 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3068 access_ref aref;
3069 if (tree dstsize = compute_objsize (dst, 1, &aref, ptr_qry))
3071 /* The source length is unknown. Try to determine the destination
3072 size and see if it matches the specified bound. If not, bail.
3073 Otherwise go on to see if it should be diagnosed for possible
3074 truncation. */
3075 if (!dstsize)
3076 return false;
3078 if (wi::to_wide (dstsize) != cntrange[1])
3079 return false;
3081 /* Avoid warning for strncpy(a, b, N) calls where the following
3082 equalities hold:
3083 N == sizeof a && N == sizeof b */
3084 if (tree srcsize = compute_objsize (src, 1, &aref, ptr_qry))
3085 if (wi::to_wide (srcsize) == cntrange[1])
3086 return false;
3088 if (cntrange[0] == cntrange[1])
3089 return warning_at (callloc, OPT_Wstringop_truncation,
3090 "%qD specified bound %E equals destination size",
3091 func, cnt);
3094 return false;
3097 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3098 strncat, for out-of-bounds offsets or overlapping access, and to see
3099 if the size is derived from calling strlen() on the source argument,
3100 and if so, issue the appropriate warning.
3101 APPEND_P is true for strncat. */
3103 static void
3104 handle_builtin_stxncpy_strncat (bool append_p, gimple_stmt_iterator *gsi)
3106 if (!strlen_to_stridx)
3107 return;
3109 gimple *stmt = gsi_stmt (*gsi);
3111 tree dst = gimple_call_arg (stmt, 0);
3112 tree src = gimple_call_arg (stmt, 1);
3113 tree len = gimple_call_arg (stmt, 2);
3114 /* An upper bound of the size of the destination. */
3115 tree dstsize = NULL_TREE;
3116 /* The length of the destination and source strings (plus 1 for those
3117 whose FULL_STRING_P is set, i.e., whose length is exact rather than
3118 a lower bound). */
3119 tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3121 int didx = get_stridx (dst);
3122 if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3124 /* Compute the size of the destination string including the nul
3125 if it is known to be nul-terminated. */
3126 if (sidst->nonzero_chars)
3128 if (sidst->full_string_p)
3130 /* String is known to be nul-terminated. */
3131 tree type = TREE_TYPE (sidst->nonzero_chars);
3132 dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3133 build_int_cst (type, 1));
3135 else
3136 dstlenp1 = sidst->nonzero_chars;
3138 else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3140 gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3141 dstsize = gimple_call_alloc_size (def_stmt);
3144 dst = sidst->ptr;
3147 int sidx = get_stridx (src);
3148 strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3149 if (sisrc)
3151 /* strncat() and strncpy() can modify the source string by writing
3152 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3153 clear. */
3155 /* Compute the size of the source string including the terminating
3156 nul if its known to be nul-terminated. */
3157 if (sisrc->nonzero_chars)
3159 if (sisrc->full_string_p)
3161 tree type = TREE_TYPE (sisrc->nonzero_chars);
3162 srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3163 build_int_cst (type, 1));
3165 else
3166 srclenp1 = sisrc->nonzero_chars;
3169 src = sisrc->ptr;
3171 else
3172 srclenp1 = NULL_TREE;
3174 opt_code opt = check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1);
3175 if (opt != no_warning)
3177 suppress_warning (stmt, opt);
3178 return;
3181 /* If the length argument was computed from strlen(S) for some string
3182 S retrieve the strinfo index for the string (PSS->FIRST) along with
3183 the location of the strlen() call (PSS->SECOND). */
3184 stridx_strlenloc *pss = strlen_to_stridx->get (len);
3185 if (!pss || pss->first <= 0)
3187 if (maybe_diag_stxncpy_trunc (*gsi, src, len))
3188 suppress_warning (stmt, OPT_Wstringop_truncation);
3190 return;
3193 /* Retrieve the strinfo data for the string S that LEN was computed
3194 from as some function F of strlen (S) (i.e., LEN need not be equal
3195 to strlen(S)). */
3196 strinfo *silen = get_strinfo (pss->first);
3198 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3200 tree func = gimple_call_fndecl (stmt);
3202 bool warned = false;
3204 /* When -Wstringop-truncation is set, try to determine truncation
3205 before diagnosing possible overflow. Truncation is implied by
3206 the LEN argument being equal to strlen(SRC), regardless of
3207 whether its value is known. Otherwise, when appending, or
3208 when copying into a destination of known size, issue the more
3209 generic -Wstringop-overflow which triggers for LEN arguments
3210 that in any meaningful way depend on strlen(SRC). */
3211 if (!append_p
3212 && sisrc == silen
3213 && is_strlen_related_p (src, len)
3214 && warning_at (callloc, OPT_Wstringop_truncation,
3215 "%qD output truncated before terminating nul "
3216 "copying as many bytes from a string as its length",
3217 func))
3218 warned = true;
3219 else if ((append_p || !dstsize || len == dstlenp1)
3220 && silen && is_strlen_related_p (src, silen->ptr))
3222 /* Issue -Wstringop-overflow when appending or when writing into
3223 a destination of a known size. Otherwise, when copying into
3224 a destination of an unknown size, it's truncation. */
3225 opt_code opt = (append_p || dstsize
3226 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3227 warned = warning_at (callloc, opt,
3228 "%qD specified bound depends on the length "
3229 "of the source argument",
3230 func);
3232 if (warned)
3234 location_t strlenloc = pss->second;
3235 if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3236 inform (strlenloc, "length computed here");
3240 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3241 If strlen of the second argument is known and length of the third argument
3242 is that plus one, strlen of the first argument is the same after this
3243 call. Uses RVALS to determine range information. */
3245 static void
3246 handle_builtin_memcpy (enum built_in_function bcode, gimple_stmt_iterator *gsi,
3247 pointer_query &ptr_qry)
3249 tree lhs, oldlen, newlen;
3250 gimple *stmt = gsi_stmt (*gsi);
3251 strinfo *si, *dsi;
3253 tree len = gimple_call_arg (stmt, 2);
3254 tree src = gimple_call_arg (stmt, 1);
3255 tree dst = gimple_call_arg (stmt, 0);
3257 int didx = get_stridx (dst);
3258 strinfo *olddsi = NULL;
3259 if (didx > 0)
3260 olddsi = get_strinfo (didx);
3261 else if (didx < 0)
3262 return;
3264 if (olddsi != NULL
3265 && !integer_zerop (len))
3267 maybe_warn_overflow (stmt, false, len, ptr_qry, olddsi, false, true);
3268 adjust_last_stmt (olddsi, stmt, false, ptr_qry);
3271 int idx = get_stridx (src);
3272 if (idx == 0)
3273 return;
3275 bool full_string_p;
3276 if (idx > 0)
3278 gimple *def_stmt;
3280 /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3281 is known. */
3282 si = get_strinfo (idx);
3283 if (si == NULL || si->nonzero_chars == NULL_TREE)
3284 return;
3285 if (TREE_CODE (len) == INTEGER_CST
3286 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3288 if (tree_int_cst_le (len, si->nonzero_chars))
3290 /* Copying LEN nonzero characters, where LEN is constant. */
3291 newlen = len;
3292 full_string_p = false;
3294 else
3296 /* Copying the whole of the analyzed part of SI. */
3297 newlen = si->nonzero_chars;
3298 full_string_p = si->full_string_p;
3301 else
3303 if (!si->full_string_p)
3304 return;
3305 if (TREE_CODE (len) != SSA_NAME)
3306 return;
3307 def_stmt = SSA_NAME_DEF_STMT (len);
3308 if (!is_gimple_assign (def_stmt)
3309 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3310 || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3311 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3312 return;
3313 /* Copying variable-length string SI (and no more). */
3314 newlen = si->nonzero_chars;
3315 full_string_p = true;
3318 else
3320 si = NULL;
3321 /* Handle memcpy (x, "abcd", 5) or
3322 memcpy (x, "abc\0uvw", 7). */
3323 if (!tree_fits_uhwi_p (len))
3324 return;
3326 unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3327 unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3328 newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3329 full_string_p = clen > nonzero_chars;
3332 if (!full_string_p
3333 && olddsi
3334 && olddsi->nonzero_chars
3335 && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3336 && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3338 /* The SRC substring being written strictly overlaps
3339 a subsequence of the existing string OLDDSI. */
3340 newlen = olddsi->nonzero_chars;
3341 full_string_p = olddsi->full_string_p;
3344 if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3345 adjust_last_stmt (olddsi, stmt, false, ptr_qry);
3347 if (didx == 0)
3349 didx = new_stridx (dst);
3350 if (didx == 0)
3351 return;
3353 oldlen = NULL_TREE;
3354 if (olddsi != NULL)
3356 dsi = unshare_strinfo (olddsi);
3357 oldlen = olddsi->nonzero_chars;
3358 dsi->nonzero_chars = newlen;
3359 dsi->full_string_p = full_string_p;
3360 /* Break the chain, so adjust_related_strinfo on later pointers in
3361 the chain won't adjust this one anymore. */
3362 dsi->next = 0;
3363 dsi->stmt = NULL;
3364 dsi->endptr = NULL_TREE;
3366 else
3368 dsi = new_strinfo (dst, didx, newlen, full_string_p);
3369 set_strinfo (didx, dsi);
3370 find_equal_ptrs (dst, didx);
3372 dsi->writable = true;
3373 dsi->dont_invalidate = true;
3374 if (olddsi != NULL)
3376 tree adj = NULL_TREE;
3377 location_t loc = gimple_location (stmt);
3378 if (oldlen == NULL_TREE)
3380 else if (integer_zerop (oldlen))
3381 adj = newlen;
3382 else if (TREE_CODE (oldlen) == INTEGER_CST
3383 || TREE_CODE (newlen) == INTEGER_CST)
3384 adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3385 fold_convert_loc (loc, TREE_TYPE (newlen),
3386 oldlen));
3387 if (adj != NULL_TREE)
3388 adjust_related_strinfos (loc, dsi, adj);
3389 else
3390 dsi->prev = 0;
3392 /* memcpy src may not overlap dst, so src doesn't need to be
3393 invalidated either. */
3394 if (si != NULL)
3395 si->dont_invalidate = true;
3397 if (full_string_p)
3399 lhs = gimple_call_lhs (stmt);
3400 switch (bcode)
3402 case BUILT_IN_MEMCPY:
3403 case BUILT_IN_MEMCPY_CHK:
3404 /* Allow adjust_last_stmt to decrease this memcpy's size. */
3405 laststmt.stmt = stmt;
3406 laststmt.len = dsi->nonzero_chars;
3407 laststmt.stridx = dsi->idx;
3408 if (lhs)
3409 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3410 break;
3411 case BUILT_IN_MEMPCPY:
3412 case BUILT_IN_MEMPCPY_CHK:
3413 break;
3414 default:
3415 gcc_unreachable ();
3420 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3421 If strlen of the second argument is known, strlen of the first argument
3422 is increased by the length of the second argument. Furthermore, attempt
3423 to convert it to memcpy/strcpy if the length of the first argument
3424 is known. */
3426 static void
3427 handle_builtin_strcat (enum built_in_function bcode, gimple_stmt_iterator *gsi,
3428 pointer_query &ptr_qry)
3430 int idx, didx;
3431 tree srclen, args, type, fn, objsz, endptr;
3432 bool success;
3433 gimple *stmt = gsi_stmt (*gsi);
3434 strinfo *si, *dsi;
3435 location_t loc = gimple_location (stmt);
3437 tree src = gimple_call_arg (stmt, 1);
3438 tree dst = gimple_call_arg (stmt, 0);
3440 /* Bail if the source is the same as destination. It will be diagnosed
3441 elsewhere. */
3442 if (operand_equal_p (src, dst, 0))
3443 return;
3445 tree lhs = gimple_call_lhs (stmt);
3447 didx = get_stridx (dst);
3448 if (didx < 0)
3449 return;
3451 dsi = NULL;
3452 if (didx > 0)
3453 dsi = get_strinfo (didx);
3455 srclen = NULL_TREE;
3456 si = NULL;
3457 idx = get_stridx (src);
3458 if (idx < 0)
3459 srclen = build_int_cst (size_type_node, ~idx);
3460 else if (idx > 0)
3462 si = get_strinfo (idx);
3463 if (si != NULL)
3464 srclen = get_string_length (si);
3467 /* Disable warning for the transformed statement? */
3468 opt_code no_warning_opt = no_warning;
3470 if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3473 /* The concatenation always involves copying at least one byte
3474 (the terminating nul), even if the source string is empty.
3475 If the source is unknown assume it's one character long and
3476 used that as both sizes. */
3477 tree slen = srclen;
3478 if (slen)
3480 tree type = TREE_TYPE (slen);
3481 slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3484 tree sptr = si && si->ptr ? si->ptr : src;
3485 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE,
3486 slen);
3487 if (no_warning_opt)
3488 suppress_warning (stmt, no_warning_opt);
3491 /* strcat (p, q) can be transformed into
3492 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3493 with length endptr - p if we need to compute the length
3494 later on. Don't do this transformation if we don't need
3495 it. */
3496 if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3498 if (didx == 0)
3500 didx = new_stridx (dst);
3501 if (didx == 0)
3502 return;
3504 if (dsi == NULL)
3506 dsi = new_strinfo (dst, didx, NULL_TREE, false);
3507 set_strinfo (didx, dsi);
3508 find_equal_ptrs (dst, didx);
3510 else
3512 dsi = unshare_strinfo (dsi);
3513 dsi->nonzero_chars = NULL_TREE;
3514 dsi->full_string_p = false;
3515 dsi->next = 0;
3516 dsi->endptr = NULL_TREE;
3518 dsi->writable = true;
3519 dsi->stmt = stmt;
3520 dsi->dont_invalidate = true;
3522 return;
3525 tree dstlen = dsi->nonzero_chars;
3526 endptr = dsi->endptr;
3528 dsi = unshare_strinfo (dsi);
3529 dsi->endptr = NULL_TREE;
3530 dsi->stmt = NULL;
3531 dsi->writable = true;
3533 if (srclen != NULL_TREE)
3535 dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3536 TREE_TYPE (dsi->nonzero_chars),
3537 dsi->nonzero_chars, srclen);
3538 gcc_assert (dsi->full_string_p);
3539 adjust_related_strinfos (loc, dsi, srclen);
3540 dsi->dont_invalidate = true;
3542 else
3544 dsi->nonzero_chars = NULL;
3545 dsi->full_string_p = false;
3546 if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3547 dsi->dont_invalidate = true;
3550 if (si != NULL)
3551 /* strcat src may not overlap dst, so src doesn't need to be
3552 invalidated either. */
3553 si->dont_invalidate = true;
3555 /* For now. Could remove the lhs from the call and add
3556 lhs = dst; afterwards. */
3557 if (lhs)
3558 return;
3560 fn = NULL_TREE;
3561 objsz = NULL_TREE;
3562 switch (bcode)
3564 case BUILT_IN_STRCAT:
3565 if (srclen != NULL_TREE)
3566 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3567 else
3568 fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3569 break;
3570 case BUILT_IN_STRCAT_CHK:
3571 if (srclen != NULL_TREE)
3572 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3573 else
3574 fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3575 objsz = gimple_call_arg (stmt, 2);
3576 break;
3577 default:
3578 gcc_unreachable ();
3581 if (fn == NULL_TREE)
3582 return;
3584 if (dsi && dstlen)
3586 tree type = TREE_TYPE (dstlen);
3588 /* Compute the size of the source sequence, including the nul. */
3589 tree srcsize = srclen ? srclen : size_zero_node;
3590 tree one = build_int_cst (type, 1);
3591 srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3592 tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3593 tree sptr = si && si->ptr ? si->ptr : src;
3595 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, dstsize,
3596 srcsize);
3597 if (no_warning_opt)
3598 suppress_warning (stmt, no_warning_opt);
3601 tree len = NULL_TREE;
3602 if (srclen != NULL_TREE)
3604 args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3605 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3607 len = fold_convert_loc (loc, type, unshare_expr (srclen));
3608 len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3609 build_int_cst (type, 1));
3610 len = force_gimple_operand_gsi (gsi, len, true, NULL_TREE, true,
3611 GSI_SAME_STMT);
3613 if (endptr)
3614 dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3615 else
3616 dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3617 fold_convert_loc (loc, sizetype,
3618 unshare_expr (dstlen)));
3619 dst = force_gimple_operand_gsi (gsi, dst, true, NULL_TREE, true,
3620 GSI_SAME_STMT);
3621 if (objsz)
3623 objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3624 fold_convert_loc (loc, TREE_TYPE (objsz),
3625 unshare_expr (dstlen)));
3626 objsz = force_gimple_operand_gsi (gsi, objsz, true, NULL_TREE, true,
3627 GSI_SAME_STMT);
3629 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3631 fprintf (dump_file, "Optimizing: ");
3632 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3634 if (srclen != NULL_TREE)
3635 success = update_gimple_call (gsi, fn, 3 + (objsz != NULL_TREE),
3636 dst, src, len, objsz);
3637 else
3638 success = update_gimple_call (gsi, fn, 2 + (objsz != NULL_TREE),
3639 dst, src, objsz);
3640 if (success)
3642 stmt = gsi_stmt (*gsi);
3643 update_stmt (stmt);
3644 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3646 fprintf (dump_file, "into: ");
3647 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3649 /* If srclen == NULL, note that current string length can be
3650 computed by transforming this strcpy into stpcpy. */
3651 if (srclen == NULL_TREE && dsi->dont_invalidate)
3652 dsi->stmt = stmt;
3653 adjust_last_stmt (dsi, stmt, true, ptr_qry);
3654 if (srclen != NULL_TREE)
3656 laststmt.stmt = stmt;
3657 laststmt.len = srclen;
3658 laststmt.stridx = dsi->idx;
3661 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3662 fprintf (dump_file, "not possible.\n");
3664 if (no_warning_opt)
3665 suppress_warning (stmt, no_warning_opt);
3668 /* Handle a call to an allocation function like alloca, malloc or calloc,
3669 or an ordinary allocation function declared with attribute alloc_size. */
3671 static void
3672 handle_alloc_call (enum built_in_function bcode, gimple_stmt_iterator *gsi)
3674 gimple *stmt = gsi_stmt (*gsi);
3675 tree lhs = gimple_call_lhs (stmt);
3676 if (lhs == NULL_TREE)
3677 return;
3679 gcc_assert (get_stridx (lhs) == 0);
3680 int idx = new_stridx (lhs);
3681 tree length = NULL_TREE;
3682 if (bcode == BUILT_IN_CALLOC)
3683 length = build_int_cst (size_type_node, 0);
3684 strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3685 if (bcode == BUILT_IN_CALLOC)
3687 /* Only set STMT for calloc and malloc. */
3688 si->stmt = stmt;
3689 /* Only set ENDPTR for calloc. */
3690 si->endptr = lhs;
3692 else if (bcode == BUILT_IN_MALLOC)
3693 si->stmt = stmt;
3695 /* Set ALLOC is set for all allocation functions. */
3696 si->alloc = stmt;
3697 set_strinfo (idx, si);
3698 si->writable = true;
3699 si->dont_invalidate = true;
3702 /* Handle a call to memset.
3703 After a call to calloc, memset(,0,) is unnecessary.
3704 memset(malloc(n),0,n) is calloc(n,1).
3705 return true when the call is transformed, false otherwise.
3706 When nonnull uses RVALS to determine range information. */
3708 static bool
3709 handle_builtin_memset (gimple_stmt_iterator *gsi, bool *zero_write,
3710 pointer_query &ptr_qry)
3712 gimple *memset_stmt = gsi_stmt (*gsi);
3713 tree ptr = gimple_call_arg (memset_stmt, 0);
3714 /* Set to the non-constant offset added to PTR. */
3715 wide_int offrng[2];
3716 int idx1 = get_stridx (ptr, offrng, ptr_qry.rvals);
3717 if (idx1 <= 0)
3718 return false;
3719 strinfo *si1 = get_strinfo (idx1);
3720 if (!si1)
3721 return false;
3722 gimple *alloc_stmt = si1->alloc;
3723 if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3724 return false;
3725 tree callee1 = gimple_call_fndecl (alloc_stmt);
3726 if (!valid_builtin_call (alloc_stmt))
3727 return false;
3728 tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3729 tree memset_size = gimple_call_arg (memset_stmt, 2);
3731 /* Check for overflow. */
3732 maybe_warn_overflow (memset_stmt, false, memset_size, ptr_qry, NULL,
3733 false, true);
3735 /* Bail when there is no statement associated with the destination
3736 (the statement may be null even when SI1->ALLOC is not). */
3737 if (!si1->stmt)
3738 return false;
3740 /* Avoid optimizing if store is at a variable offset from the beginning
3741 of the allocated object. */
3742 if (offrng[0] != 0 || offrng[0] != offrng[1])
3743 return false;
3745 /* Bail when the call writes a non-zero value. */
3746 if (!integer_zerop (gimple_call_arg (memset_stmt, 1)))
3747 return false;
3749 /* Let the caller know the memset call cleared the destination. */
3750 *zero_write = true;
3752 enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3753 if (code1 == BUILT_IN_CALLOC)
3754 /* Not touching alloc_stmt */ ;
3755 else if (code1 == BUILT_IN_MALLOC
3756 && operand_equal_p (memset_size, alloc_size, 0))
3758 /* Replace the malloc + memset calls with calloc. */
3759 gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3760 update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3761 alloc_size, build_one_cst (size_type_node));
3762 si1->nonzero_chars = build_int_cst (size_type_node, 0);
3763 si1->full_string_p = true;
3764 si1->stmt = gsi_stmt (gsi1);
3766 else
3767 return false;
3768 tree lhs = gimple_call_lhs (memset_stmt);
3769 unlink_stmt_vdef (memset_stmt);
3770 if (lhs)
3772 gimple *assign = gimple_build_assign (lhs, ptr);
3773 gsi_replace (gsi, assign, false);
3775 else
3777 gsi_remove (gsi, true);
3778 release_defs (memset_stmt);
3781 return true;
3784 /* Return first such statement if RES is used in statements testing its
3785 equality to zero, and null otherwise. If EXCLUSIVE is true, return
3786 nonnull if and only RES is used in such expressions exclusively and
3787 in none other. */
3789 static gimple *
3790 use_in_zero_equality (tree res, bool exclusive = true)
3792 gimple *first_use = NULL;
3794 use_operand_p use_p;
3795 imm_use_iterator iter;
3797 FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3799 gimple *use_stmt = USE_STMT (use_p);
3801 if (is_gimple_debug (use_stmt))
3802 continue;
3804 if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
3806 tree_code code = gimple_assign_rhs_code (use_stmt);
3807 if (code == COND_EXPR)
3809 tree cond_expr = gimple_assign_rhs1 (use_stmt);
3810 if ((TREE_CODE (cond_expr) != EQ_EXPR
3811 && (TREE_CODE (cond_expr) != NE_EXPR))
3812 || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3814 if (exclusive)
3815 return NULL;
3816 continue;
3819 else if (code == EQ_EXPR || code == NE_EXPR)
3821 if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
3823 if (exclusive)
3824 return NULL;
3825 continue;
3828 else if (exclusive)
3829 return NULL;
3830 else
3831 continue;
3833 else if (gimple_code (use_stmt) == GIMPLE_COND)
3835 tree_code code = gimple_cond_code (use_stmt);
3836 if ((code != EQ_EXPR && code != NE_EXPR)
3837 || !integer_zerop (gimple_cond_rhs (use_stmt)))
3839 if (exclusive)
3840 return NULL;
3841 continue;
3844 else if (exclusive)
3845 return NULL;
3846 else
3847 continue;
3849 if (!first_use)
3850 first_use = use_stmt;
3853 return first_use;
3856 /* Handle a call to memcmp. We try to handle small comparisons by
3857 converting them to load and compare, and replacing the call to memcmp
3858 with a __builtin_memcmp_eq call where possible.
3859 return true when call is transformed, return false otherwise. */
3861 static bool
3862 handle_builtin_memcmp (gimple_stmt_iterator *gsi)
3864 gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
3865 tree res = gimple_call_lhs (stmt);
3867 if (!res || !use_in_zero_equality (res))
3868 return false;
3870 tree arg1 = gimple_call_arg (stmt, 0);
3871 tree arg2 = gimple_call_arg (stmt, 1);
3872 tree len = gimple_call_arg (stmt, 2);
3873 unsigned HOST_WIDE_INT leni;
3875 if (tree_fits_uhwi_p (len)
3876 && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
3877 && pow2p_hwi (leni))
3879 leni *= CHAR_TYPE_SIZE;
3880 unsigned align1 = get_pointer_alignment (arg1);
3881 unsigned align2 = get_pointer_alignment (arg2);
3882 unsigned align = MIN (align1, align2);
3883 scalar_int_mode mode;
3884 if (int_mode_for_size (leni, 1).exists (&mode)
3885 && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
3887 location_t loc = gimple_location (stmt);
3888 tree type, off;
3889 type = build_nonstandard_integer_type (leni, 1);
3890 gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
3891 tree ptrtype = build_pointer_type_for_mode (char_type_node,
3892 ptr_mode, true);
3893 off = build_int_cst (ptrtype, 0);
3894 arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
3895 arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
3896 tree tem1 = fold_const_aggregate_ref (arg1);
3897 if (tem1)
3898 arg1 = tem1;
3899 tree tem2 = fold_const_aggregate_ref (arg2);
3900 if (tem2)
3901 arg2 = tem2;
3902 res = fold_convert_loc (loc, TREE_TYPE (res),
3903 fold_build2_loc (loc, NE_EXPR,
3904 boolean_type_node,
3905 arg1, arg2));
3906 gimplify_and_update_call_from_tree (gsi, res);
3907 return true;
3911 gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
3912 return true;
3915 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
3916 of the string(s) referenced by ARG if it can be determined.
3917 If the length cannot be determined, sets *SIZE to the size of
3918 the array the string is stored in, if any. If no such array is
3919 known, sets *SIZE to -1. When the strings are nul-terminated sets
3920 *NULTERM to true, otherwise to false. When nonnull uses RVALS to
3921 determine range information. Returns true on success. */
3923 static bool
3924 get_len_or_size (gimple *stmt, tree arg, int idx,
3925 unsigned HOST_WIDE_INT lenrng[2],
3926 unsigned HOST_WIDE_INT *size, bool *nulterm,
3927 range_query *rvals)
3929 /* Invalidate. */
3930 *size = HOST_WIDE_INT_M1U;
3932 if (idx < 0)
3934 /* IDX is the inverted constant string length. */
3935 lenrng[0] = ~idx;
3936 lenrng[1] = lenrng[0];
3937 *nulterm = true;
3938 return true;
3941 /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
3942 possible length + 1. */
3943 lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
3945 if (strinfo *si = idx ? get_strinfo (idx) : NULL)
3947 /* FIXME: Handle all this in_range_strlen_dynamic. */
3948 if (!si->nonzero_chars)
3950 else if (tree_fits_uhwi_p (si->nonzero_chars))
3952 lenrng[0] = tree_to_uhwi (si->nonzero_chars);
3953 *nulterm = si->full_string_p;
3954 /* Set the upper bound only if the string is known to be
3955 nul-terminated, otherwise leave it at maximum + 1. */
3956 if (*nulterm)
3957 lenrng[1] = lenrng[0];
3959 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
3961 value_range r;
3962 get_range_query (cfun)->range_of_expr (r, si->nonzero_chars);
3963 if (r.kind () == VR_RANGE)
3965 lenrng[0] = r.lower_bound ().to_uhwi ();
3966 lenrng[1] = r.upper_bound ().to_uhwi ();
3967 *nulterm = si->full_string_p;
3972 if (lenrng[0] != HOST_WIDE_INT_MAX)
3973 return true;
3975 /* Compute the minimum and maximum real or possible lengths. */
3976 c_strlen_data lendata = { };
3977 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3978 to have it set to the length of the longest string in a PHI. */
3979 lendata.maxbound = arg;
3980 get_range_strlen_dynamic (arg, stmt, &lendata, rvals);
3982 unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
3983 if (tree_fits_uhwi_p (lendata.maxbound)
3984 && !integer_all_onesp (lendata.maxbound))
3985 maxbound = tree_to_uhwi (lendata.maxbound);
3987 if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
3989 unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
3990 unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
3992 /* The longest string in this data model. */
3993 const unsigned HOST_WIDE_INT lenmax
3994 = tree_to_uhwi (max_object_size ()) - 2;
3996 if (maxbound == HOST_WIDE_INT_M1U)
3998 lenrng[0] = minlen;
3999 lenrng[1] = maxlen;
4000 *nulterm = minlen == maxlen;
4002 else if (maxlen < lenmax)
4004 *size = maxbound + 1;
4005 *nulterm = false;
4007 else
4008 return false;
4010 return true;
4013 if (maxbound != HOST_WIDE_INT_M1U
4014 && lendata.maxlen
4015 && !integer_all_onesp (lendata.maxlen))
4017 /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4018 of the longest string based on the sizes of the arrays referenced
4019 by ARG. */
4020 *size = maxbound + 1;
4021 *nulterm = false;
4022 return true;
4025 return false;
4028 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4029 the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4030 for a sufficiently large BOUND). If the result is based on the length
4031 of one string being greater than the longest string that would fit in
4032 the array pointer to by the argument, set *PLEN and *PSIZE to
4033 the corresponding length (or its complement when the string is known
4034 to be at least as long and need not be nul-terminated) and size.
4035 Otherwise return null. */
4037 static tree
4038 strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1, tree arg2, int idx2,
4039 unsigned HOST_WIDE_INT bound, unsigned HOST_WIDE_INT len[2],
4040 unsigned HOST_WIDE_INT *psize, range_query *rvals)
4042 /* Determine the range the length of each string is in and whether it's
4043 known to be nul-terminated, or the size of the array it's stored in. */
4044 bool nul1, nul2;
4045 unsigned HOST_WIDE_INT siz1, siz2;
4046 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4047 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1, rvals)
4048 || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2, rvals))
4049 return NULL_TREE;
4051 /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4052 to HWI_MAX when invalid. Adjust the length of each string to consider
4053 to be no more than BOUND. */
4054 if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4055 len1rng[0] = bound;
4056 if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4057 len1rng[1] = bound;
4058 if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4059 len2rng[0] = bound;
4060 if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4061 len2rng[1] = bound;
4063 /* Two empty strings are equal. */
4064 if (len1rng[1] == 0 && len2rng[1] == 0)
4065 return integer_one_node;
4067 /* The strings are definitely unequal when the lower bound of the length
4068 of one of them is greater than the length of the longest string that
4069 would fit into the other array. */
4070 if (len1rng[0] == HOST_WIDE_INT_MAX
4071 && len2rng[0] != HOST_WIDE_INT_MAX
4072 && ((len2rng[0] < bound && len2rng[0] >= siz1)
4073 || len2rng[0] > siz1))
4075 *psize = siz1;
4076 len[0] = len1rng[0];
4077 /* Set LEN[0] to the lower bound of ARG1's length when it's
4078 nul-terminated or to the complement of its minimum length
4079 otherwise, */
4080 len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4081 return integer_zero_node;
4084 if (len2rng[0] == HOST_WIDE_INT_MAX
4085 && len1rng[0] != HOST_WIDE_INT_MAX
4086 && ((len1rng[0] < bound && len1rng[0] >= siz2)
4087 || len1rng[0] > siz2))
4089 *psize = siz2;
4090 len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4091 len[1] = len2rng[0];
4092 return integer_zero_node;
4095 /* The strings are also definitely unequal when their lengths are unequal
4096 and at least one is nul-terminated. */
4097 if (len1rng[0] != HOST_WIDE_INT_MAX
4098 && len2rng[0] != HOST_WIDE_INT_MAX
4099 && ((len1rng[1] < len2rng[0] && nul1)
4100 || (len2rng[1] < len1rng[0] && nul2)))
4102 if (bound <= len1rng[0] || bound <= len2rng[0])
4103 *psize = bound;
4104 else
4105 *psize = HOST_WIDE_INT_M1U;
4107 len[0] = len1rng[0];
4108 len[1] = len2rng[0];
4109 return integer_zero_node;
4112 /* The string lengths may be equal or unequal. Even when equal and
4113 both strings nul-terminated, without the string contents there's
4114 no way to determine whether they are equal. */
4115 return NULL_TREE;
4118 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4119 arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4120 whose result is used in equality expressions that evaluate to
4121 a constant due to one argument being longer than the size of
4122 the other. */
4124 static void
4125 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4126 unsigned HOST_WIDE_INT len[2],
4127 unsigned HOST_WIDE_INT siz)
4129 tree lhs = gimple_call_lhs (stmt);
4130 gimple *use = use_in_zero_equality (lhs, /* exclusive = */ false);
4131 if (!use)
4132 return;
4134 bool at_least = false;
4136 /* Excessive LEN[i] indicates a lower bound. */
4137 if (len[0] > HOST_WIDE_INT_MAX)
4139 at_least = true;
4140 len[0] = ~len[0];
4143 if (len[1] > HOST_WIDE_INT_MAX)
4145 at_least = true;
4146 len[1] = ~len[1];
4149 unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4151 /* FIXME: Include a note pointing to the declaration of the smaller
4152 array. */
4153 location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4155 tree callee = gimple_call_fndecl (stmt);
4156 bool warned = false;
4157 if (siz <= minlen && bound == -1)
4158 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4159 (at_least
4160 ? G_("%qD of a string of length %wu or more and "
4161 "an array of size %wu evaluates to nonzero")
4162 : G_("%qD of a string of length %wu and an array "
4163 "of size %wu evaluates to nonzero")),
4164 callee, minlen, siz);
4165 else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4167 if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4168 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4169 "%qD of strings of length %wu and %wu "
4170 "and bound of %wu evaluates to nonzero",
4171 callee, len[0], len[1], bound);
4172 else
4173 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4174 "%qD of a string of length %wu, an array "
4175 "of size %wu and bound of %wu evaluates to "
4176 "nonzero",
4177 callee, minlen, siz, bound);
4180 if (!warned)
4181 return;
4183 location_t use_loc = gimple_location (use);
4184 if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4185 inform (use_loc, "in this expression");
4189 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4190 when possible or by transforming the latter to the former. Warn about
4191 calls where the length of one argument is greater than the size of
4192 the array to which the other argument points if the latter's length
4193 is not known. Return true when the call has been transformed into
4194 another and false otherwise. */
4196 static bool
4197 handle_builtin_string_cmp (gimple_stmt_iterator *gsi, range_query *rvals)
4199 gcall *stmt = as_a <gcall *> (gsi_stmt (*gsi));
4200 tree lhs = gimple_call_lhs (stmt);
4202 if (!lhs)
4203 return false;
4205 tree arg1 = gimple_call_arg (stmt, 0);
4206 tree arg2 = gimple_call_arg (stmt, 1);
4207 int idx1 = get_stridx (arg1);
4208 int idx2 = get_stridx (arg2);
4210 /* For strncmp set to the value of the third argument if known. */
4211 HOST_WIDE_INT bound = -1;
4212 tree len = NULL_TREE;
4213 /* Extract the strncmp bound. */
4214 if (gimple_call_num_args (stmt) == 3)
4216 len = gimple_call_arg (stmt, 2);
4217 if (tree_fits_shwi_p (len))
4218 bound = tree_to_shwi (len);
4220 /* If the bound argument is NOT known, do nothing. */
4221 if (bound < 0)
4222 return false;
4225 /* Avoid folding if either argument is not a nul-terminated array.
4226 Defer warning until later. */
4227 if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4228 || !check_nul_terminated_array (NULL_TREE, arg2, len))
4229 return false;
4232 /* Set to the length of one argument (or its complement if it's
4233 the lower bound of a range) and the size of the array storing
4234 the other if the result is based on the former being equal to
4235 or greater than the latter. */
4236 unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4237 unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4239 /* Try to determine if the two strings are either definitely equal
4240 or definitely unequal and if so, either fold the result to zero
4241 (when equal) or set the range of the result to ~[0, 0] otherwise. */
4242 if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4243 len, &siz, rvals))
4245 if (integer_zerop (eqz))
4247 maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4249 /* When the lengths of the first two string arguments are
4250 known to be unequal set the range of the result to non-zero.
4251 This allows the call to be eliminated if its result is only
4252 used in tests for equality to zero. */
4253 wide_int zero = wi::zero (TYPE_PRECISION (TREE_TYPE (lhs)));
4254 set_range_info (lhs, VR_ANTI_RANGE, zero, zero);
4255 return false;
4257 /* When the two strings are definitely equal (such as when they
4258 are both empty) fold the call to the constant result. */
4259 replace_call_with_value (gsi, integer_zero_node);
4260 return true;
4264 /* Return if nothing is known about the strings pointed to by ARG1
4265 and ARG2. */
4266 if (idx1 == 0 && idx2 == 0)
4267 return false;
4269 /* Determine either the length or the size of each of the strings,
4270 whichever is available. */
4271 HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4272 HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4275 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4276 unsigned HOST_WIDE_INT arsz1, arsz2;
4277 bool nulterm[2];
4279 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm, rvals)
4280 || !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1,
4281 rvals))
4282 return false;
4284 if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4285 cstlen1 = len1rng[0];
4286 else if (arsz1 < HOST_WIDE_INT_M1U)
4287 arysiz1 = arsz1;
4289 if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4290 cstlen2 = len2rng[0];
4291 else if (arsz2 < HOST_WIDE_INT_M1U)
4292 arysiz2 = arsz2;
4295 /* Bail if neither the string length nor the size of the array
4296 it is stored in can be determined. */
4297 if ((cstlen1 < 0 && arysiz1 < 0)
4298 || (cstlen2 < 0 && arysiz2 < 0)
4299 || (cstlen1 < 0 && cstlen2 < 0))
4300 return false;
4302 if (cstlen1 >= 0)
4303 ++cstlen1;
4304 if (cstlen2 >= 0)
4305 ++cstlen2;
4307 /* The exact number of characters to compare. */
4308 HOST_WIDE_INT cmpsiz;
4309 if (cstlen1 >= 0 && cstlen2 >= 0)
4310 cmpsiz = MIN (cstlen1, cstlen2);
4311 else if (cstlen1 >= 0)
4312 cmpsiz = cstlen1;
4313 else
4314 cmpsiz = cstlen2;
4315 if (bound >= 0)
4316 cmpsiz = MIN (cmpsiz, bound);
4317 /* The size of the array in which the unknown string is stored. */
4318 HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4320 if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (lhs))
4322 /* If the known length is less than the size of the other array
4323 and the strcmp result is only used to test equality to zero,
4324 transform the call to the equivalent _eq call. */
4325 if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4326 : BUILT_IN_STRNCMP_EQ))
4328 tree n = build_int_cst (size_type_node, cmpsiz);
4329 update_gimple_call (gsi, fn, 3, arg1, arg2, n);
4330 return true;
4334 return false;
4337 /* Handle a POINTER_PLUS_EXPR statement.
4338 For p = "abcd" + 2; compute associated length, or if
4339 p = q + off is pointing to a '\0' character of a string, call
4340 zero_length_string on it. */
4342 static void
4343 handle_pointer_plus (gimple_stmt_iterator *gsi)
4345 gimple *stmt = gsi_stmt (*gsi);
4346 tree lhs = gimple_assign_lhs (stmt), off;
4347 int idx = get_stridx (gimple_assign_rhs1 (stmt));
4348 strinfo *si, *zsi;
4350 if (idx == 0)
4351 return;
4353 if (idx < 0)
4355 tree off = gimple_assign_rhs2 (stmt);
4356 if (tree_fits_uhwi_p (off)
4357 && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4358 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4359 = ~(~idx - (int) tree_to_uhwi (off));
4360 return;
4363 si = get_strinfo (idx);
4364 if (si == NULL || si->nonzero_chars == NULL_TREE)
4365 return;
4367 off = gimple_assign_rhs2 (stmt);
4368 zsi = NULL;
4369 if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4370 zsi = zero_length_string (lhs, si);
4371 else if (TREE_CODE (off) == SSA_NAME)
4373 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4374 if (gimple_assign_single_p (def_stmt)
4375 && si->full_string_p
4376 && operand_equal_p (si->nonzero_chars,
4377 gimple_assign_rhs1 (def_stmt), 0))
4378 zsi = zero_length_string (lhs, si);
4380 if (zsi != NULL
4381 && si->endptr != NULL_TREE
4382 && si->endptr != lhs
4383 && TREE_CODE (si->endptr) == SSA_NAME)
4385 enum tree_code rhs_code
4386 = useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4387 ? SSA_NAME : NOP_EXPR;
4388 gimple_assign_set_rhs_with_ops (gsi, rhs_code, si->endptr);
4389 gcc_assert (gsi_stmt (*gsi) == stmt);
4390 update_stmt (stmt);
4394 /* Set LENRANGE to the number of nonzero bytes for a store of TYPE and
4395 clear all flags. Return true on success and false on failure. */
4397 static bool
4398 nonzero_bytes_for_type (tree type, unsigned lenrange[3],
4399 bool *nulterm, bool *allnul, bool *allnonnul)
4401 /* Use the size of the type of the expression as the size of the store,
4402 and set the upper bound of the length range to that of the size.
4403 Nothing is known about the contents so clear all flags. */
4404 tree typesize = TYPE_SIZE_UNIT (type);
4405 if (!type)
4406 return false;
4408 if (!tree_fits_uhwi_p (typesize))
4409 return false;
4411 unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize);
4412 if (sz > UINT_MAX)
4413 return false;
4415 lenrange[2] = sz;
4416 lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0;
4417 lenrange[0] = 0;
4418 *nulterm = false;
4419 *allnul = false;
4420 *allnonnul = false;
4421 return true;
4424 static bool
4425 count_nonzero_bytes_addr (tree, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4426 unsigned [3], bool *, bool *, bool *,
4427 range_query *, ssa_name_limit_t &);
4429 /* Recursively determine the minimum and maximum number of leading nonzero
4430 bytes in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
4431 to each.
4432 Sets LENRANGE[2] to the total size of the access (which may be less
4433 than LENRANGE[1] when what's being referenced by EXP is a pointer
4434 rather than an array).
4435 Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL
4436 if all the bytes are zero, and *ALLNONNUL is all are nonzero.
4437 OFFSET and NBYTES are the offset into the representation and
4438 the size of the access to it determined from an ADDR_EXPR (i.e.,
4439 a pointer) or MEM_REF or zero for other expressions.
4440 Uses RVALS to determine range information.
4441 Avoids recursing deeper than the limits in SNLIM allow.
4442 Returns true on success and false otherwise. */
4444 static bool
4445 count_nonzero_bytes (tree exp, unsigned HOST_WIDE_INT offset,
4446 unsigned HOST_WIDE_INT nbytes,
4447 unsigned lenrange[3], bool *nulterm,
4448 bool *allnul, bool *allnonnul, range_query *rvals,
4449 ssa_name_limit_t &snlim)
4451 if (TREE_CODE (exp) == SSA_NAME)
4453 /* Handle non-zero single-character stores specially. */
4454 tree type = TREE_TYPE (exp);
4455 if (TREE_CODE (type) == INTEGER_TYPE
4456 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4457 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4458 && tree_expr_nonzero_p (exp))
4460 /* If the character EXP is known to be non-zero (even if its
4461 exact value is not known) recurse once to set the range
4462 for an arbitrary constant. */
4463 exp = build_int_cst (type, 1);
4464 return count_nonzero_bytes (exp, offset, 1, lenrange,
4465 nulterm, allnul, allnonnul, rvals, snlim);
4468 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4469 if (gimple_assign_single_p (stmt))
4471 exp = gimple_assign_rhs1 (stmt);
4472 if (!DECL_P (exp)
4473 && TREE_CODE (exp) != CONSTRUCTOR
4474 && TREE_CODE (exp) != MEM_REF)
4475 return false;
4476 /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */
4478 else if (gimple_code (stmt) == GIMPLE_PHI)
4480 /* Avoid processing an SSA_NAME that has already been visited
4481 or if an SSA_NAME limit has been reached. Indicate success
4482 if the former and failure if the latter. */
4483 if (int res = snlim.next_phi (exp))
4484 return res > 0;
4486 /* Determine the minimum and maximum from the PHI arguments. */
4487 unsigned int n = gimple_phi_num_args (stmt);
4488 for (unsigned i = 0; i != n; i++)
4490 tree def = gimple_phi_arg_def (stmt, i);
4491 if (!count_nonzero_bytes (def, offset, nbytes, lenrange, nulterm,
4492 allnul, allnonnul, rvals, snlim))
4493 return false;
4496 return true;
4500 if (TREE_CODE (exp) == CONSTRUCTOR)
4502 if (nbytes)
4503 /* If NBYTES has already been determined by an outer MEM_REF
4504 fail rather than overwriting it (this shouldn't happen). */
4505 return false;
4507 tree type = TREE_TYPE (exp);
4508 tree size = TYPE_SIZE_UNIT (type);
4509 if (!size || !tree_fits_uhwi_p (size))
4510 return false;
4512 unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
4513 if (byte_size < offset)
4514 return false;
4516 nbytes = byte_size - offset;
4519 if (TREE_CODE (exp) == MEM_REF)
4521 if (nbytes)
4522 return false;
4524 tree arg = TREE_OPERAND (exp, 0);
4525 tree off = TREE_OPERAND (exp, 1);
4527 if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4528 return false;
4530 unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4531 if (INT_MAX < wioff)
4532 return false;
4534 offset += wioff;
4535 if (INT_MAX < offset)
4536 return false;
4538 /* The size of the MEM_REF access determines the number of bytes. */
4539 tree type = TREE_TYPE (exp);
4540 tree typesize = TYPE_SIZE_UNIT (type);
4541 if (!typesize || !tree_fits_uhwi_p (typesize))
4542 return false;
4543 nbytes = tree_to_uhwi (typesize);
4544 if (!nbytes)
4545 return false;
4547 /* Handle MEM_REF = SSA_NAME types of assignments. */
4548 return count_nonzero_bytes_addr (arg, offset, nbytes, lenrange, nulterm,
4549 allnul, allnonnul, rvals, snlim);
4552 if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4554 /* If EXP can be folded into a constant use the result. Otherwise
4555 proceed to use EXP to determine a range of the result. */
4556 if (tree fold_exp = ctor_for_folding (exp))
4557 if (fold_exp != error_mark_node)
4558 exp = fold_exp;
4561 const char *prep = NULL;
4562 if (TREE_CODE (exp) == STRING_CST)
4564 unsigned nchars = TREE_STRING_LENGTH (exp);
4565 if (nchars < offset)
4566 return false;
4568 if (!nbytes)
4569 /* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4570 (i.e., it's the size of a pointer), or from MEM_REF (as the size
4571 of the access), set it here to the size of the string, including
4572 all internal and trailing nuls if the string has any. */
4573 nbytes = nchars - offset;
4574 else if (nchars - offset < nbytes)
4575 return false;
4577 prep = TREE_STRING_POINTER (exp) + offset;
4580 unsigned char buf[256];
4581 if (!prep)
4583 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4584 return false;
4585 /* If the pointer to representation hasn't been set above
4586 for STRING_CST point it at the buffer. */
4587 prep = reinterpret_cast <char *>(buf);
4588 /* Try to extract the representation of the constant object
4589 or expression starting from the offset. */
4590 unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4591 if (repsize < nbytes)
4593 /* This should only happen when REPSIZE is zero because EXP
4594 doesn't denote an object with a known initializer, except
4595 perhaps when the reference reads past its end. */
4596 lenrange[0] = 0;
4597 prep = NULL;
4599 else if (!nbytes)
4600 nbytes = repsize;
4601 else if (nbytes < repsize)
4602 return false;
4605 if (!nbytes)
4606 return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange,
4607 nulterm, allnul, allnonnul);
4609 /* Compute the number of leading nonzero bytes in the representation
4610 and update the minimum and maximum. */
4611 unsigned n = prep ? strnlen (prep, nbytes) : nbytes;
4613 if (n < lenrange[0])
4614 lenrange[0] = n;
4615 if (lenrange[1] < n)
4616 lenrange[1] = n;
4618 /* Set the size of the representation. */
4619 if (lenrange[2] < nbytes)
4620 lenrange[2] = nbytes;
4622 /* Clear NULTERM if none of the bytes is zero. */
4623 if (n == nbytes)
4624 *nulterm = false;
4626 if (n)
4628 /* When the initial number of non-zero bytes N is non-zero, reset
4629 *ALLNUL; if N is less than that the size of the representation
4630 also clear *ALLNONNUL. */
4631 *allnul = false;
4632 if (n < nbytes)
4633 *allnonnul = false;
4635 else if (*allnul || *allnonnul)
4637 *allnonnul = false;
4639 if (*allnul)
4641 /* When either ALLNUL is set and N is zero, also determine
4642 whether all subsequent bytes after the first one (which
4643 is nul) are zero or nonzero and clear ALLNUL if not. */
4644 for (const char *p = prep; p != prep + nbytes; ++p)
4645 if (*p)
4647 *allnul = false;
4648 break;
4653 return true;
4656 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4657 bytes that are pointed to by EXP, which should be a pointer. */
4659 static bool
4660 count_nonzero_bytes_addr (tree exp, unsigned HOST_WIDE_INT offset,
4661 unsigned HOST_WIDE_INT nbytes,
4662 unsigned lenrange[3], bool *nulterm,
4663 bool *allnul, bool *allnonnul,
4664 range_query *rvals, ssa_name_limit_t &snlim)
4666 int idx = get_stridx (exp);
4667 if (idx > 0)
4669 strinfo *si = get_strinfo (idx);
4670 if (!si)
4671 return false;
4673 /* Handle both constant lengths as well non-constant lengths
4674 in some range. */
4675 unsigned HOST_WIDE_INT minlen, maxlen;
4676 if (tree_fits_shwi_p (si->nonzero_chars))
4677 minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4678 else if (si->nonzero_chars
4679 && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4681 value_range vr;
4682 rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
4683 if (vr.kind () != VR_RANGE)
4684 return false;
4686 minlen = tree_to_uhwi (vr.min ());
4687 maxlen = tree_to_uhwi (vr.max ());
4689 else
4690 return false;
4692 if (maxlen < offset)
4693 return false;
4695 minlen = minlen < offset ? 0 : minlen - offset;
4696 maxlen -= offset;
4697 if (maxlen + 1 < nbytes)
4698 return false;
4700 if (nbytes <= minlen)
4701 *nulterm = false;
4703 if (nbytes < minlen)
4705 minlen = nbytes;
4706 if (nbytes < maxlen)
4707 maxlen = nbytes;
4710 if (minlen < lenrange[0])
4711 lenrange[0] = minlen;
4712 if (lenrange[1] < maxlen)
4713 lenrange[1] = maxlen;
4715 if (lenrange[2] < nbytes)
4716 lenrange[2] = nbytes;
4718 /* Since only the length of the string are known and not its contents,
4719 clear ALLNUL and ALLNONNUL purely on the basis of the length. */
4720 *allnul = false;
4721 if (minlen < nbytes)
4722 *allnonnul = false;
4724 return true;
4727 if (TREE_CODE (exp) == ADDR_EXPR)
4728 return count_nonzero_bytes (TREE_OPERAND (exp, 0), offset, nbytes,
4729 lenrange, nulterm, allnul, allnonnul, rvals,
4730 snlim);
4732 if (TREE_CODE (exp) == SSA_NAME)
4734 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4735 if (gimple_code (stmt) == GIMPLE_PHI)
4737 /* Avoid processing an SSA_NAME that has already been visited
4738 or if an SSA_NAME limit has been reached. Indicate success
4739 if the former and failure if the latter. */
4740 if (int res = snlim.next_phi (exp))
4741 return res > 0;
4743 /* Determine the minimum and maximum from the PHI arguments. */
4744 unsigned int n = gimple_phi_num_args (stmt);
4745 for (unsigned i = 0; i != n; i++)
4747 tree def = gimple_phi_arg_def (stmt, i);
4748 if (!count_nonzero_bytes_addr (def, offset, nbytes, lenrange,
4749 nulterm, allnul, allnonnul, rvals,
4750 snlim))
4751 return false;
4754 return true;
4758 /* Otherwise we don't know anything. */
4759 lenrange[0] = 0;
4760 if (lenrange[1] < nbytes)
4761 lenrange[1] = nbytes;
4762 if (lenrange[2] < nbytes)
4763 lenrange[2] = nbytes;
4764 *nulterm = false;
4765 *allnul = false;
4766 *allnonnul = false;
4767 return true;
4770 /* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE
4771 is a type rather than an expression use its size to compute the range.
4772 RVALS is used to determine ranges of dynamically computed string lengths
4773 (the results of strlen). */
4775 static bool
4776 count_nonzero_bytes (tree expr_or_type, unsigned lenrange[3], bool *nulterm,
4777 bool *allnul, bool *allnonnul, range_query *rvals)
4779 if (TYPE_P (expr_or_type))
4780 return nonzero_bytes_for_type (expr_or_type, lenrange,
4781 nulterm, allnul, allnonnul);
4783 /* Set to optimistic values so the caller doesn't have to worry about
4784 initializing these and to what. On success, the function will clear
4785 these if it determines their values are different but being recursive
4786 it never sets either to true. On failure, their values are
4787 unspecified. */
4788 *nulterm = true;
4789 *allnul = true;
4790 *allnonnul = true;
4792 ssa_name_limit_t snlim;
4793 tree expr = expr_or_type;
4794 return count_nonzero_bytes (expr, 0, 0, lenrange, nulterm, allnul, allnonnul,
4795 rvals, snlim);
4798 /* Handle a single or multibyte store other than by a built-in function,
4799 either via a single character assignment or by multi-byte assignment
4800 either via MEM_REF or via a type other than char (such as in
4801 '*(int*)a = 12345'). Return true to let the caller advance *GSI to
4802 the next statement in the basic block and false otherwise. */
4804 static bool
4805 handle_store (gimple_stmt_iterator *gsi, bool *zero_write,
4806 pointer_query &ptr_qry)
4808 gimple *stmt = gsi_stmt (*gsi);
4809 /* The LHS and RHS of the store. The RHS is null if STMT is a function
4810 call. STORETYPE is the type of the store (determined from either
4811 the RHS of the assignment statement or the LHS of a function call. */
4812 tree lhs, rhs, storetype;
4813 if (is_gimple_assign (stmt))
4815 lhs = gimple_assign_lhs (stmt);
4816 rhs = gimple_assign_rhs1 (stmt);
4817 storetype = TREE_TYPE (rhs);
4819 else if (is_gimple_call (stmt))
4821 lhs = gimple_call_lhs (stmt);
4822 rhs = NULL_TREE;
4823 storetype = TREE_TYPE (lhs);
4825 else
4826 return true;
4828 tree ssaname = NULL_TREE;
4829 strinfo *si = NULL;
4830 int idx = -1;
4832 range_query *const rvals = ptr_qry.rvals;
4834 /* The offset of the first byte in LHS modified by the store. */
4835 unsigned HOST_WIDE_INT offset = 0;
4837 if (TREE_CODE (lhs) == MEM_REF
4838 && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4840 tree mem_offset = TREE_OPERAND (lhs, 1);
4841 if (tree_fits_uhwi_p (mem_offset))
4843 /* Get the strinfo for the base, and use it if it starts with at
4844 least OFFSET nonzero characters. This is trivially true if
4845 OFFSET is zero. */
4846 offset = tree_to_uhwi (mem_offset);
4847 idx = get_stridx (TREE_OPERAND (lhs, 0));
4848 if (idx > 0)
4849 si = get_strinfo (idx);
4850 if (offset == 0)
4851 ssaname = TREE_OPERAND (lhs, 0);
4852 else if (si == NULL || compare_nonzero_chars (si, offset, rvals) < 0)
4854 *zero_write = rhs ? initializer_zerop (rhs) : false;
4856 bool dummy;
4857 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4858 if (count_nonzero_bytes (rhs ? rhs : storetype, lenrange,
4859 &dummy, &dummy, &dummy, rvals))
4860 maybe_warn_overflow (stmt, true, lenrange[2], ptr_qry);
4862 return true;
4866 else
4868 idx = get_addr_stridx (lhs, NULL_TREE, &offset, rvals);
4869 if (idx > 0)
4870 si = get_strinfo (idx);
4873 /* Minimum and maximum leading non-zero bytes and the size of the store. */
4874 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4876 /* Set to the minimum length of the string being assigned if known. */
4877 unsigned HOST_WIDE_INT rhs_minlen;
4879 /* STORING_NONZERO_P is true iff not all stored characters are zero.
4880 STORING_ALL_NONZERO_P is true if all stored characters are zero.
4881 STORING_ALL_ZEROS_P is true iff all stored characters are zero.
4882 Both are false when it's impossible to determine which is true. */
4883 bool storing_nonzero_p;
4884 bool storing_all_nonzero_p;
4885 bool storing_all_zeros_p;
4886 /* FULL_STRING_P is set when the stored sequence of characters form
4887 a nul-terminated string. */
4888 bool full_string_p;
4890 const bool ranges_valid
4891 = count_nonzero_bytes (rhs ? rhs : storetype, lenrange, &full_string_p,
4892 &storing_all_zeros_p, &storing_all_nonzero_p,
4893 rvals);
4895 if (ranges_valid)
4897 rhs_minlen = lenrange[0];
4898 storing_nonzero_p = lenrange[1] > 0;
4899 *zero_write = storing_all_zeros_p;
4901 maybe_warn_overflow (stmt, true, lenrange[2], ptr_qry);
4903 else
4905 rhs_minlen = HOST_WIDE_INT_M1U;
4906 full_string_p = false;
4907 storing_nonzero_p = false;
4908 storing_all_zeros_p = false;
4909 storing_all_nonzero_p = false;
4912 if (si != NULL)
4914 /* The corresponding element is set to 1 if the first and last
4915 element, respectively, of the sequence of characters being
4916 written over the string described by SI ends before
4917 the terminating nul (if it has one), to zero if the nul is
4918 being overwritten but not beyond, or negative otherwise. */
4919 int store_before_nul[2];
4920 if (ranges_valid)
4922 /* The offset of the last stored byte. */
4923 unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
4924 store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
4925 if (endoff == offset)
4926 store_before_nul[1] = store_before_nul[0];
4927 else
4928 store_before_nul[1] = compare_nonzero_chars (si, endoff, rvals);
4930 else
4932 store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
4933 store_before_nul[1] = store_before_nul[0];
4934 gcc_assert (offset == 0 || store_before_nul[0] >= 0);
4937 if (storing_all_zeros_p
4938 && store_before_nul[0] == 0
4939 && store_before_nul[1] == 0
4940 && si->full_string_p)
4942 /* When overwriting a '\0' with a '\0', the store can be removed
4943 if we know it has been stored in the current function. */
4944 if (!stmt_could_throw_p (cfun, stmt) && si->writable)
4946 unlink_stmt_vdef (stmt);
4947 release_defs (stmt);
4948 gsi_remove (gsi, true);
4949 return false;
4951 else
4953 si->writable = true;
4954 gsi_next (gsi);
4955 return false;
4959 if (store_before_nul[1] > 0
4960 && storing_nonzero_p
4961 && lenrange[0] == lenrange[1]
4962 && lenrange[0] == lenrange[2]
4963 && TREE_CODE (storetype) == INTEGER_TYPE)
4965 /* Handle a store of one or more non-nul characters that ends
4966 before the terminating nul of the destination and so does
4967 not affect its length
4968 If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
4969 and if we aren't storing '\0', we know that the length of
4970 the string and any other zero terminated string in memory
4971 remains the same. In that case we move to the next gimple
4972 statement and return to signal the caller that it shouldn't
4973 invalidate anything.
4975 This is beneficial for cases like:
4977 char p[20];
4978 void foo (char *q)
4980 strcpy (p, "foobar");
4981 size_t len = strlen (p); // can be folded to 6
4982 size_t len2 = strlen (q); // has to be computed
4983 p[0] = 'X';
4984 size_t len3 = strlen (p); // can be folded to 6
4985 size_t len4 = strlen (q); // can be folded to len2
4986 bar (len, len2, len3, len4);
4987 } */
4988 gsi_next (gsi);
4989 return false;
4992 if (storing_all_zeros_p
4993 || storing_nonzero_p
4994 || (offset != 0 && store_before_nul[1] > 0))
4996 /* When STORING_NONZERO_P, we know that the string will start
4997 with at least OFFSET + 1 nonzero characters. If storing
4998 a single character, set si->NONZERO_CHARS to the result.
4999 If storing multiple characters, try to determine the number
5000 of leading non-zero characters and set si->NONZERO_CHARS to
5001 the result instead.
5003 When STORING_ALL_ZEROS_P, we know that the string is now
5004 OFFSET characters long.
5006 Otherwise, we're storing an unknown value at offset OFFSET,
5007 so need to clip the nonzero_chars to OFFSET.
5008 Use the minimum length of the string (or individual character)
5009 being stored if it's known. Otherwise, STORING_NONZERO_P
5010 guarantees it's at least 1. */
5011 HOST_WIDE_INT len
5012 = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5013 location_t loc = gimple_location (stmt);
5014 tree oldlen = si->nonzero_chars;
5015 if (store_before_nul[1] == 0 && si->full_string_p)
5016 /* We're overwriting the nul terminator with a nonzero or
5017 unknown character. If the previous stmt was a memcpy,
5018 its length may be decreased. */
5019 adjust_last_stmt (si, stmt, false, ptr_qry);
5020 si = unshare_strinfo (si);
5021 if (storing_nonzero_p)
5023 gcc_assert (len >= 0);
5024 si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5026 else
5027 si->nonzero_chars = build_int_cst (size_type_node, offset);
5029 /* Set FULL_STRING_P only if the length of the strings being
5030 written is the same, and clear it if the strings have
5031 different lengths. In the latter case the length stored
5032 in si->NONZERO_CHARS becomes the lower bound.
5033 FIXME: Handle the upper bound of the length if possible. */
5034 si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5036 if (storing_all_zeros_p
5037 && ssaname
5038 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5039 si->endptr = ssaname;
5040 else
5041 si->endptr = NULL;
5042 si->next = 0;
5043 si->stmt = NULL;
5044 si->writable = true;
5045 si->dont_invalidate = true;
5046 if (oldlen)
5048 tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5049 si->nonzero_chars, oldlen);
5050 adjust_related_strinfos (loc, si, adj);
5052 else
5053 si->prev = 0;
5056 else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5058 if (ssaname)
5059 idx = new_stridx (ssaname);
5060 else
5061 idx = new_addr_stridx (lhs);
5062 if (idx != 0)
5064 tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5066 HOST_WIDE_INT slen;
5067 if (storing_all_zeros_p)
5068 slen = 0;
5069 else if (storing_nonzero_p && ranges_valid)
5071 /* FIXME: Handle the upper bound of the length when
5072 LENRANGE[0] != LENRANGE[1]. */
5073 slen = lenrange[0];
5074 if (lenrange[0] != lenrange[1])
5075 /* Set the minimum length but ignore the maximum
5076 for now. */
5077 full_string_p = false;
5079 else
5080 slen = -1;
5082 tree len = (slen <= 0
5083 ? size_zero_node
5084 : build_int_cst (size_type_node, slen));
5085 si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5086 set_strinfo (idx, si);
5087 if (storing_all_zeros_p
5088 && ssaname
5089 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5090 si->endptr = ssaname;
5091 si->dont_invalidate = true;
5092 si->writable = true;
5095 else if (idx == 0
5096 && rhs_minlen < HOST_WIDE_INT_M1U
5097 && ssaname == NULL_TREE
5098 && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5100 HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5101 if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5103 int idx = new_addr_stridx (lhs);
5104 if (idx != 0)
5106 si = new_strinfo (build_fold_addr_expr (lhs), idx,
5107 build_int_cst (size_type_node, rhs_minlen),
5108 full_string_p);
5109 set_strinfo (idx, si);
5110 si->dont_invalidate = true;
5115 if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5117 /* For single-byte stores only, allow adjust_last_stmt to remove
5118 the statement if the stored '\0' is immediately overwritten. */
5119 laststmt.stmt = stmt;
5120 laststmt.len = build_int_cst (size_type_node, 1);
5121 laststmt.stridx = si->idx;
5123 return true;
5126 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0. */
5128 static void
5129 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5131 if (TREE_CODE (rhs1) != SSA_NAME
5132 || TREE_CODE (rhs2) != SSA_NAME)
5133 return;
5135 gimple *call_stmt = NULL;
5136 for (int pass = 0; pass < 2; pass++)
5138 gimple *g = SSA_NAME_DEF_STMT (rhs1);
5139 if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5140 && has_single_use (rhs1)
5141 && gimple_call_arg (g, 0) == rhs2)
5143 call_stmt = g;
5144 break;
5146 std::swap (rhs1, rhs2);
5149 if (call_stmt)
5151 tree arg0 = gimple_call_arg (call_stmt, 0);
5153 if (arg0 == rhs2)
5155 tree arg1 = gimple_call_arg (call_stmt, 1);
5156 tree arg1_len = NULL_TREE;
5157 int idx = get_stridx (arg1);
5159 if (idx)
5161 if (idx < 0)
5162 arg1_len = build_int_cst (size_type_node, ~idx);
5163 else
5165 strinfo *si = get_strinfo (idx);
5166 if (si)
5167 arg1_len = get_string_length (si);
5171 if (arg1_len != NULL_TREE)
5173 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5174 tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5176 if (!is_gimple_val (arg1_len))
5178 tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5179 gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5180 arg1_len);
5181 gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5182 arg1_len = arg1_len_tmp;
5185 gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5186 arg0, arg1, arg1_len);
5187 tree strncmp_lhs = make_ssa_name (integer_type_node);
5188 gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5189 gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5190 gsi_remove (&gsi, true);
5191 gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5192 tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5194 if (is_gimple_assign (stmt))
5196 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5198 tree cond = gimple_assign_rhs1 (stmt);
5199 TREE_OPERAND (cond, 0) = strncmp_lhs;
5200 TREE_OPERAND (cond, 1) = zero;
5202 else
5204 gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5205 gimple_assign_set_rhs2 (stmt, zero);
5208 else
5210 gcond *cond = as_a<gcond *> (stmt);
5211 gimple_cond_set_lhs (cond, strncmp_lhs);
5212 gimple_cond_set_rhs (cond, zero);
5214 update_stmt (stmt);
5220 /* Return true if TYPE corresponds to a narrow character type. */
5222 static bool
5223 is_char_type (tree type)
5225 return (TREE_CODE (type) == INTEGER_TYPE
5226 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5227 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5230 /* Check the built-in call at GSI for validity and optimize it.
5231 Uses RVALS to determine range information.
5232 Return true to let the caller advance *GSI to the next statement
5233 in the basic block and false otherwise. */
5235 static bool
5236 strlen_check_and_optimize_call (gimple_stmt_iterator *gsi, bool *zero_write,
5237 pointer_query &ptr_qry)
5239 gimple *stmt = gsi_stmt (*gsi);
5241 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5243 tree fntype = gimple_call_fntype (stmt);
5244 if (!fntype)
5245 return true;
5247 if (lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5249 handle_alloc_call (BUILT_IN_NONE, gsi);
5250 return true;
5253 if (tree lhs = gimple_call_lhs (stmt))
5254 handle_assign (gsi, lhs, zero_write, ptr_qry);
5256 /* Proceed to handle user-defined formatting functions. */
5259 /* When not optimizing we must be checking printf calls which
5260 we do even for user-defined functions when they are declared
5261 with attribute format. */
5262 if (!flag_optimize_strlen
5263 || !strlen_optimize
5264 || !valid_builtin_call (stmt))
5265 return !handle_printf_call (gsi, ptr_qry);
5267 tree callee = gimple_call_fndecl (stmt);
5268 switch (DECL_FUNCTION_CODE (callee))
5270 case BUILT_IN_STRLEN:
5271 case BUILT_IN_STRNLEN:
5272 handle_builtin_strlen (gsi);
5273 break;
5274 case BUILT_IN_STRCHR:
5275 handle_builtin_strchr (gsi);
5276 break;
5277 case BUILT_IN_STRCPY:
5278 case BUILT_IN_STRCPY_CHK:
5279 case BUILT_IN_STPCPY:
5280 case BUILT_IN_STPCPY_CHK:
5281 handle_builtin_strcpy (DECL_FUNCTION_CODE (callee), gsi, ptr_qry);
5282 break;
5284 case BUILT_IN_STRNCAT:
5285 case BUILT_IN_STRNCAT_CHK:
5286 handle_builtin_strncat (DECL_FUNCTION_CODE (callee), gsi);
5287 break;
5289 case BUILT_IN_STPNCPY:
5290 case BUILT_IN_STPNCPY_CHK:
5291 case BUILT_IN_STRNCPY:
5292 case BUILT_IN_STRNCPY_CHK:
5293 handle_builtin_stxncpy_strncat (false, gsi);
5294 break;
5296 case BUILT_IN_MEMCPY:
5297 case BUILT_IN_MEMCPY_CHK:
5298 case BUILT_IN_MEMPCPY:
5299 case BUILT_IN_MEMPCPY_CHK:
5300 handle_builtin_memcpy (DECL_FUNCTION_CODE (callee), gsi, ptr_qry);
5301 break;
5302 case BUILT_IN_STRCAT:
5303 case BUILT_IN_STRCAT_CHK:
5304 handle_builtin_strcat (DECL_FUNCTION_CODE (callee), gsi, ptr_qry);
5305 break;
5306 case BUILT_IN_ALLOCA:
5307 case BUILT_IN_ALLOCA_WITH_ALIGN:
5308 case BUILT_IN_MALLOC:
5309 case BUILT_IN_CALLOC:
5310 handle_alloc_call (DECL_FUNCTION_CODE (callee), gsi);
5311 break;
5312 case BUILT_IN_MEMSET:
5313 if (handle_builtin_memset (gsi, zero_write, ptr_qry))
5314 return false;
5315 break;
5316 case BUILT_IN_MEMCMP:
5317 if (handle_builtin_memcmp (gsi))
5318 return false;
5319 break;
5320 case BUILT_IN_STRCMP:
5321 case BUILT_IN_STRNCMP:
5322 if (handle_builtin_string_cmp (gsi, ptr_qry.rvals))
5323 return false;
5324 break;
5325 default:
5326 if (handle_printf_call (gsi, ptr_qry))
5327 return false;
5328 break;
5331 return true;
5334 /* Handle an assignment statement at *GSI to a LHS of integral type.
5335 If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true. */
5337 static void
5338 handle_integral_assign (gimple_stmt_iterator *gsi, bool *cleanup_eh,
5339 range_query *rvals)
5341 gimple *stmt = gsi_stmt (*gsi);
5342 tree lhs = gimple_assign_lhs (stmt);
5343 tree lhs_type = TREE_TYPE (lhs);
5345 enum tree_code code = gimple_assign_rhs_code (stmt);
5346 if (code == COND_EXPR)
5348 tree cond = gimple_assign_rhs1 (stmt);
5349 enum tree_code cond_code = TREE_CODE (cond);
5351 if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5352 fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5353 TREE_OPERAND (cond, 1), stmt);
5355 else if (code == EQ_EXPR || code == NE_EXPR)
5356 fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5357 gimple_assign_rhs2 (stmt), stmt);
5358 else if (gimple_assign_load_p (stmt)
5359 && TREE_CODE (lhs_type) == INTEGER_TYPE
5360 && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5361 && (TYPE_PRECISION (lhs_type)
5362 == TYPE_PRECISION (char_type_node))
5363 && !gimple_has_volatile_ops (stmt))
5365 tree off = integer_zero_node;
5366 unsigned HOST_WIDE_INT coff = 0;
5367 int idx = 0;
5368 tree rhs1 = gimple_assign_rhs1 (stmt);
5369 if (code == MEM_REF)
5371 idx = get_stridx (TREE_OPERAND (rhs1, 0));
5372 if (idx > 0)
5374 strinfo *si = get_strinfo (idx);
5375 if (si
5376 && si->nonzero_chars
5377 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5378 && (wi::to_widest (si->nonzero_chars)
5379 >= wi::to_widest (off)))
5380 off = TREE_OPERAND (rhs1, 1);
5381 else
5382 /* This case is not useful. See if get_addr_stridx
5383 returns something usable. */
5384 idx = 0;
5387 if (idx <= 0)
5388 idx = get_addr_stridx (rhs1, NULL_TREE, &coff);
5389 if (idx > 0)
5391 strinfo *si = get_strinfo (idx);
5392 if (si
5393 && si->nonzero_chars
5394 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5396 widest_int w1 = wi::to_widest (si->nonzero_chars);
5397 widest_int w2 = wi::to_widest (off) + coff;
5398 if (w1 == w2
5399 && si->full_string_p)
5401 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5403 fprintf (dump_file, "Optimizing: ");
5404 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5407 /* Reading the final '\0' character. */
5408 tree zero = build_int_cst (lhs_type, 0);
5409 gimple_set_vuse (stmt, NULL_TREE);
5410 gimple_assign_set_rhs_from_tree (gsi, zero);
5411 *cleanup_eh
5412 |= maybe_clean_or_replace_eh_stmt (stmt,
5413 gsi_stmt (*gsi));
5414 stmt = gsi_stmt (*gsi);
5415 update_stmt (stmt);
5417 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5419 fprintf (dump_file, "into: ");
5420 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5423 else if (w1 > w2)
5425 /* Reading a character before the final '\0'
5426 character. Just set the value range to ~[0, 0]
5427 if we don't have anything better. */
5428 value_range r;
5429 if (!get_range_query (cfun)->range_of_expr (r, lhs)
5430 || r.varying_p ())
5432 r.set_nonzero (lhs_type);
5433 set_range_info (lhs, r);
5439 else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5441 if (int idx = new_stridx (lhs))
5443 /* Record multi-byte assignments from MEM_REFs. */
5444 bool storing_all_nonzero_p;
5445 bool storing_all_zeros_p;
5446 bool full_string_p;
5447 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5448 tree rhs = gimple_assign_rhs1 (stmt);
5449 const bool ranges_valid
5450 = count_nonzero_bytes (rhs, lenrange, &full_string_p,
5451 &storing_all_zeros_p, &storing_all_nonzero_p,
5452 rvals);
5453 if (ranges_valid)
5455 tree length = build_int_cst (sizetype, lenrange[0]);
5456 strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5457 set_strinfo (idx, si);
5458 si->writable = true;
5459 si->dont_invalidate = true;
5464 if (strlen_to_stridx)
5466 tree rhs1 = gimple_assign_rhs1 (stmt);
5467 if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5468 strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5472 /* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if
5473 the assignent stores all zero bytes.. */
5475 static bool
5476 handle_assign (gimple_stmt_iterator *gsi, tree lhs, bool *zero_write,
5477 pointer_query &ptr_qry)
5479 tree type = TREE_TYPE (lhs);
5480 if (TREE_CODE (type) == ARRAY_TYPE)
5481 type = TREE_TYPE (type);
5483 bool is_char_store = is_char_type (type);
5484 if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5486 /* To consider stores into char objects via integer types other
5487 than char but not those to non-character objects, determine
5488 the type of the destination rather than just the type of
5489 the access. */
5490 for (int i = 0; i != 2; ++i)
5492 tree ref = TREE_OPERAND (lhs, i);
5493 type = TREE_TYPE (ref);
5494 if (TREE_CODE (type) == POINTER_TYPE)
5495 type = TREE_TYPE (type);
5496 if (TREE_CODE (type) == ARRAY_TYPE)
5497 type = TREE_TYPE (type);
5498 if (is_char_type (type))
5500 is_char_store = true;
5501 break;
5506 /* Handle a single or multibyte assignment. */
5507 if (is_char_store && !handle_store (gsi, zero_write, ptr_qry))
5508 return false;
5510 return true;
5514 /* Attempt to check for validity of the performed access a single statement
5515 at *GSI using string length knowledge, and to optimize it.
5516 If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5517 true. Return true to let the caller advance *GSI to the next statement
5518 in the basic block and false otherwise. */
5520 static bool
5521 check_and_optimize_stmt (gimple_stmt_iterator *gsi, bool *cleanup_eh,
5522 pointer_query &ptr_qry)
5524 gimple *stmt = gsi_stmt (*gsi);
5526 /* For statements that modify a string, set to true if the write
5527 is only zeros. */
5528 bool zero_write = false;
5530 if (is_gimple_call (stmt))
5532 if (!strlen_check_and_optimize_call (gsi, &zero_write, ptr_qry))
5533 return false;
5535 else if (!flag_optimize_strlen || !strlen_optimize)
5536 return true;
5537 else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5539 /* Handle non-clobbering assignment. */
5540 tree lhs = gimple_assign_lhs (stmt);
5541 tree lhs_type = TREE_TYPE (lhs);
5543 if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5545 if (gimple_assign_single_p (stmt)
5546 || (gimple_assign_cast_p (stmt)
5547 && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5549 int idx = get_stridx (gimple_assign_rhs1 (stmt));
5550 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5552 else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5553 handle_pointer_plus (gsi);
5555 else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5556 /* Handle assignment to a character. */
5557 handle_integral_assign (gsi, cleanup_eh, ptr_qry.rvals);
5558 else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5559 if (!handle_assign (gsi, lhs, &zero_write, ptr_qry))
5560 return false;
5562 else if (gcond *cond = dyn_cast<gcond *> (stmt))
5564 enum tree_code code = gimple_cond_code (cond);
5565 if (code == EQ_EXPR || code == NE_EXPR)
5566 fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5567 gimple_cond_rhs (stmt), stmt);
5570 if (gimple_vdef (stmt))
5571 maybe_invalidate (stmt, zero_write);
5572 return true;
5575 /* Recursively call maybe_invalidate on stmts that might be executed
5576 in between dombb and current bb and that contain a vdef. Stop when
5577 *count stmts are inspected, or if the whole strinfo vector has
5578 been invalidated. */
5580 static void
5581 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5583 unsigned int i, n = gimple_phi_num_args (phi);
5585 for (i = 0; i < n; i++)
5587 tree vuse = gimple_phi_arg_def (phi, i);
5588 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5589 basic_block bb = gimple_bb (stmt);
5590 if (bb == NULL
5591 || bb == dombb
5592 || !bitmap_set_bit (visited, bb->index)
5593 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5594 continue;
5595 while (1)
5597 if (gimple_code (stmt) == GIMPLE_PHI)
5599 do_invalidate (dombb, stmt, visited, count);
5600 if (*count == 0)
5601 return;
5602 break;
5604 if (--*count == 0)
5605 return;
5606 if (!maybe_invalidate (stmt))
5608 *count = 0;
5609 return;
5611 vuse = gimple_vuse (stmt);
5612 stmt = SSA_NAME_DEF_STMT (vuse);
5613 if (gimple_bb (stmt) != bb)
5615 bb = gimple_bb (stmt);
5616 if (bb == NULL
5617 || bb == dombb
5618 || !bitmap_set_bit (visited, bb->index)
5619 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5620 break;
5626 class strlen_dom_walker : public dom_walker
5628 public:
5629 strlen_dom_walker (cdi_direction direction)
5630 : dom_walker (direction),
5631 evrp (false),
5632 ptr_qry (&evrp, &var_cache),
5633 var_cache (),
5634 m_cleanup_cfg (false)
5637 ~strlen_dom_walker ();
5639 virtual edge before_dom_children (basic_block);
5640 virtual void after_dom_children (basic_block);
5642 /* EVRP analyzer used for printf argument range processing, and
5643 to track strlen results across integer variable assignments. */
5644 evrp_range_analyzer evrp;
5646 /* A pointer_query object and its cache to store information about
5647 pointers and their targets in. */
5648 pointer_query ptr_qry;
5649 pointer_query::cache_type var_cache;
5651 /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
5652 execute function. */
5653 bool m_cleanup_cfg;
5656 /* Release pointer_query cache. */
5658 strlen_dom_walker::~strlen_dom_walker ()
5660 ptr_qry.flush_cache ();
5663 /* Callback for walk_dominator_tree. Attempt to optimize various
5664 string ops by remembering string lengths pointed by pointer SSA_NAMEs. */
5666 edge
5667 strlen_dom_walker::before_dom_children (basic_block bb)
5669 evrp.enter (bb);
5671 basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5673 if (dombb == NULL)
5674 stridx_to_strinfo = NULL;
5675 else
5677 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5678 if (stridx_to_strinfo)
5680 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5681 gsi_next (&gsi))
5683 gphi *phi = gsi.phi ();
5684 if (virtual_operand_p (gimple_phi_result (phi)))
5686 bitmap visited = BITMAP_ALLOC (NULL);
5687 int count_vdef = 100;
5688 do_invalidate (dombb, phi, visited, &count_vdef);
5689 BITMAP_FREE (visited);
5690 if (count_vdef == 0)
5692 /* If there were too many vdefs in between immediate
5693 dominator and current bb, invalidate everything.
5694 If stridx_to_strinfo has been unshared, we need
5695 to free it, otherwise just set it to NULL. */
5696 if (!strinfo_shared ())
5698 unsigned int i;
5699 strinfo *si;
5701 for (i = 1;
5702 vec_safe_iterate (stridx_to_strinfo, i, &si);
5703 ++i)
5705 free_strinfo (si);
5706 (*stridx_to_strinfo)[i] = NULL;
5709 else
5710 stridx_to_strinfo = NULL;
5712 break;
5718 /* If all PHI arguments have the same string index, the PHI result
5719 has it as well. */
5720 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5721 gsi_next (&gsi))
5723 gphi *phi = gsi.phi ();
5724 tree result = gimple_phi_result (phi);
5725 if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5727 int idx = get_stridx (gimple_phi_arg_def (phi, 0));
5728 if (idx != 0)
5730 unsigned int i, n = gimple_phi_num_args (phi);
5731 for (i = 1; i < n; i++)
5732 if (idx != get_stridx (gimple_phi_arg_def (phi, i)))
5733 break;
5734 if (i == n)
5735 ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5740 bool cleanup_eh = false;
5742 /* Attempt to optimize individual statements. */
5743 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
5745 gimple *stmt = gsi_stmt (gsi);
5747 /* First record ranges generated by this statement so they
5748 can be used by printf argument processing. */
5749 evrp.record_ranges_from_stmt (stmt, false);
5751 /* Reset search depth preformance counter. */
5752 ptr_qry.depth = 0;
5754 if (check_and_optimize_stmt (&gsi, &cleanup_eh, ptr_qry))
5755 gsi_next (&gsi);
5758 if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5759 m_cleanup_cfg = true;
5761 bb->aux = stridx_to_strinfo;
5762 if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5763 (*stridx_to_strinfo)[0] = (strinfo *) bb;
5764 return NULL;
5767 /* Callback for walk_dominator_tree. Free strinfo vector if it is
5768 owned by the current bb, clear bb->aux. */
5770 void
5771 strlen_dom_walker::after_dom_children (basic_block bb)
5773 evrp.leave (bb);
5775 if (bb->aux)
5777 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5778 if (vec_safe_length (stridx_to_strinfo)
5779 && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5781 unsigned int i;
5782 strinfo *si;
5784 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5785 free_strinfo (si);
5786 vec_free (stridx_to_strinfo);
5788 bb->aux = NULL;
5792 namespace {
5794 static unsigned int
5795 printf_strlen_execute (function *fun, bool warn_only)
5797 strlen_optimize = !warn_only;
5799 calculate_dominance_info (CDI_DOMINATORS);
5801 bool use_scev = optimize > 0 && flag_printf_return_value;
5802 if (use_scev)
5804 loop_optimizer_init (LOOPS_NORMAL);
5805 scev_initialize ();
5808 gcc_assert (!strlen_to_stridx);
5809 if (warn_stringop_overflow || warn_stringop_truncation)
5810 strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5812 /* This has to happen after initializing the loop optimizer
5813 and initializing SCEV as they create new SSA_NAMEs. */
5814 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
5815 max_stridx = 1;
5817 /* String length optimization is implemented as a walk of the dominator
5818 tree and a forward walk of statements within each block. */
5819 strlen_dom_walker walker (CDI_DOMINATORS);
5820 walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5822 if (dump_file && (dump_flags & TDF_DETAILS))
5824 unsigned nused = 0;
5825 unsigned nidxs = walker.ptr_qry.var_cache->indices.length ();
5826 for (unsigned i = 0; i != nidxs; ++i)
5827 if (walker.ptr_qry.var_cache->indices[i])
5828 ++nused;
5830 fprintf (dump_file, "pointer_query counters\n"
5831 " index cache size: %u\n"
5832 " utilization: %u%%\n"
5833 " access cache size: %u\n"
5834 " hits: %u\n"
5835 " misses: %u\n"
5836 " failures: %u\n"
5837 " max_depth: %u\n",
5838 nidxs,
5839 nidxs == 0 ? 0 : (nused * 100) / nidxs,
5840 walker.ptr_qry.var_cache->access_refs.length (),
5841 walker.ptr_qry.hits, walker.ptr_qry.misses,
5842 walker.ptr_qry.failures, walker.ptr_qry.max_depth);
5845 ssa_ver_to_stridx.release ();
5846 strinfo_pool.release ();
5847 if (decl_to_stridxlist_htab)
5849 obstack_free (&stridx_obstack, NULL);
5850 delete decl_to_stridxlist_htab;
5851 decl_to_stridxlist_htab = NULL;
5853 laststmt.stmt = NULL;
5854 laststmt.len = NULL_TREE;
5855 laststmt.stridx = 0;
5857 if (strlen_to_stridx)
5859 strlen_to_stridx->empty ();
5860 delete strlen_to_stridx;
5861 strlen_to_stridx = NULL;
5864 if (use_scev)
5866 scev_finalize ();
5867 loop_optimizer_finalize ();
5870 return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5873 /* This file defines two passes: one for warnings that runs only when
5874 optimization is disabled, and another that implements optimizations
5875 and also issues warnings. */
5877 const pass_data pass_data_warn_printf =
5879 GIMPLE_PASS, /* type */
5880 "warn-printf", /* name */
5881 OPTGROUP_NONE, /* optinfo_flags */
5882 TV_NONE, /* tv_id */
5883 /* Normally an optimization pass would require PROP_ssa but because
5884 this pass runs early, with no optimization, to do sprintf format
5885 checking, it only requires PROP_cfg. */
5886 PROP_cfg, /* properties_required */
5887 0, /* properties_provided */
5888 0, /* properties_destroyed */
5889 0, /* todo_flags_start */
5890 0, /* todo_flags_finish */
5893 class pass_warn_printf : public gimple_opt_pass
5895 public:
5896 pass_warn_printf (gcc::context *ctxt)
5897 : gimple_opt_pass (pass_data_warn_printf, ctxt)
5900 virtual bool gate (function *);
5901 virtual unsigned int execute (function *fun)
5903 return printf_strlen_execute (fun, true);
5908 /* Return true to run the warning pass only when not optimizing and
5909 iff either -Wformat-overflow or -Wformat-truncation is specified. */
5911 bool
5912 pass_warn_printf::gate (function *)
5914 return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5917 const pass_data pass_data_strlen =
5919 GIMPLE_PASS, /* type */
5920 "strlen", /* name */
5921 OPTGROUP_NONE, /* optinfo_flags */
5922 TV_TREE_STRLEN, /* tv_id */
5923 PROP_cfg | PROP_ssa, /* properties_required */
5924 0, /* properties_provided */
5925 0, /* properties_destroyed */
5926 0, /* todo_flags_start */
5927 0, /* todo_flags_finish */
5930 class pass_strlen : public gimple_opt_pass
5932 public:
5933 pass_strlen (gcc::context *ctxt)
5934 : gimple_opt_pass (pass_data_strlen, ctxt)
5937 opt_pass * clone () { return new pass_strlen (m_ctxt); }
5939 virtual bool gate (function *);
5940 virtual unsigned int execute (function *fun)
5942 return printf_strlen_execute (fun, false);
5946 /* Return true to run the pass only when the sprintf and/or strlen
5947 optimizations are enabled and -Wformat-overflow or -Wformat-truncation
5948 are specified. */
5950 bool
5951 pass_strlen::gate (function *)
5953 return ((warn_format_overflow > 0
5954 || warn_format_trunc > 0
5955 || warn_restrict > 0
5956 || flag_optimize_strlen > 0
5957 || flag_printf_return_value)
5958 && optimize > 0);
5961 } // anon namespace
5963 gimple_opt_pass *
5964 make_pass_warn_printf (gcc::context *ctxt)
5966 return new pass_warn_printf (ctxt);
5969 gimple_opt_pass *
5970 make_pass_strlen (gcc::context *ctxt)
5972 return new pass_strlen (ctxt);