Daily bump.
[official-gcc.git] / gcc / tree-ssa-strlen.c
blobc0ec7d20a6082b8264adfce8dba358ff989e00f3
1 /* String length optimization
2 Copyright (C) 2011-2021 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-access.h"
34 #include "gimple-ssa-warn-restrict.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-fold.h"
38 #include "tree-eh.h"
39 #include "gimplify.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
42 #include "expr.h"
43 #include "tree-cfg.h"
44 #include "tree-dfa.h"
45 #include "domwalk.h"
46 #include "tree-ssa-alias.h"
47 #include "tree-ssa-propagate.h"
48 #include "tree-ssa-strlen.h"
49 #include "tree-hash-traits.h"
50 #include "builtins.h"
51 #include "pointer-query.h"
52 #include "target.h"
53 #include "diagnostic-core.h"
54 #include "diagnostic.h"
55 #include "intl.h"
56 #include "attribs.h"
57 #include "calls.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "vr-values.h"
62 #include "gimple-range.h"
63 #include "tree-ssa.h"
65 /* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
66 is an index into strinfo vector, negative value stands for
67 string length of a string literal (~strlen). */
68 static vec<int> ssa_ver_to_stridx;
70 /* Number of currently active string indexes plus one. */
71 static int max_stridx;
73 /* Set to true to optimize, false when just checking. */
74 static bool strlen_optimize;
76 /* String information record. */
77 struct strinfo
79 /* Number of leading characters that are known to be nonzero. This is
80 also the length of the string if FULL_STRING_P.
82 The values in a list of related string pointers must be consistent;
83 that is, if strinfo B comes X bytes after strinfo A, it must be
84 the case that A->nonzero_chars == X + B->nonzero_chars. */
85 tree nonzero_chars;
86 /* Any of the corresponding pointers for querying alias oracle. */
87 tree ptr;
88 /* STMT is used for two things:
90 - To record the statement that should be used for delayed length
91 computations. We maintain the invariant that all related strinfos
92 have delayed lengths or none do.
94 - To record the malloc or calloc call that produced this result
95 to optimize away malloc/memset sequences. STMT is reset after
96 a calloc-allocated object has been stored a non-zero value into. */
97 gimple *stmt;
98 /* Set to the dynamic allocation statement for the object (alloca,
99 calloc, malloc, or VLA). Unlike STMT, once set for a strinfo
100 object, ALLOC doesn't change. */
101 gimple *alloc;
102 /* Pointer to '\0' if known, if NULL, it can be computed as
103 ptr + length. */
104 tree endptr;
105 /* Reference count. Any changes to strinfo entry possibly shared
106 with dominating basic blocks need unshare_strinfo first, except
107 for dont_invalidate which affects only the immediately next
108 maybe_invalidate. */
109 int refcount;
110 /* Copy of index. get_strinfo (si->idx) should return si; */
111 int idx;
112 /* These 3 fields are for chaining related string pointers together.
113 E.g. for
114 bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
115 strcpy (c, d); e = c + dl;
116 strinfo(a) -> strinfo(c) -> strinfo(e)
117 All have ->first field equal to strinfo(a)->idx and are doubly
118 chained through prev/next fields. The later strinfos are required
119 to point into the same string with zero or more bytes after
120 the previous pointer and all bytes in between the two pointers
121 must be non-zero. Functions like strcpy or memcpy are supposed
122 to adjust all previous strinfo lengths, but not following strinfo
123 lengths (those are uncertain, usually invalidated during
124 maybe_invalidate, except when the alias oracle knows better).
125 Functions like strcat on the other side adjust the whole
126 related strinfo chain.
127 They are updated lazily, so to use the chain the same first fields
128 and si->prev->next == si->idx needs to be verified. */
129 int first;
130 int next;
131 int prev;
132 /* A flag whether the string is known to be written in the current
133 function. */
134 bool writable;
135 /* A flag for the next maybe_invalidate that this strinfo shouldn't
136 be invalidated. Always cleared by maybe_invalidate. */
137 bool dont_invalidate;
138 /* True if the string is known to be nul-terminated after NONZERO_CHARS
139 characters. False is useful when detecting strings that are built
140 up via successive memcpys. */
141 bool full_string_p;
144 /* Pool for allocating strinfo_struct entries. */
145 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
147 /* Vector mapping positive string indexes to strinfo, for the
148 current basic block. The first pointer in the vector is special,
149 it is either NULL, meaning the vector isn't shared, or it is
150 a basic block pointer to the owner basic_block if shared.
151 If some other bb wants to modify the vector, the vector needs
152 to be unshared first, and only the owner bb is supposed to free it. */
153 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
155 /* One OFFSET->IDX mapping. */
156 struct stridxlist
158 struct stridxlist *next;
159 HOST_WIDE_INT offset;
160 int idx;
163 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings. */
164 struct decl_stridxlist_map
166 struct tree_map_base base;
167 struct stridxlist list;
170 /* Hash table for mapping decls to a chained list of offset -> idx
171 mappings. */
172 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
173 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
175 /* Hash table mapping strlen (or strnlen with constant bound and return
176 smaller than bound) calls to stridx instances describing
177 the calls' arguments. Non-null only when warn_stringop_truncation
178 is non-zero. */
179 typedef std::pair<int, location_t> stridx_strlenloc;
180 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
182 /* Obstack for struct stridxlist and struct decl_stridxlist_map. */
183 static struct obstack stridx_obstack;
185 /* Last memcpy statement if it could be adjusted if the trailing
186 '\0' written is immediately overwritten, or
187 *x = '\0' store that could be removed if it is immediately overwritten. */
188 struct laststmt_struct
190 gimple *stmt;
191 tree len;
192 int stridx;
193 } laststmt;
195 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
197 /* Sets MINMAX to either the constant value or the range VAL is in
198 and returns either the constant value or VAL on success or null
199 when the range couldn't be determined. Uses RVALS or CFUN for
200 range info, whichever is nonnull. */
202 tree
203 get_range (tree val, gimple *stmt, wide_int minmax[2],
204 range_query *rvals /* = NULL */)
206 if (!rvals)
208 if (!cfun)
209 /* When called from front ends for global initializers CFUN
210 may be null. */
211 return NULL_TREE;
213 rvals = get_range_query (cfun);
216 value_range vr;
217 if (!rvals->range_of_expr (vr, val, stmt))
218 return NULL_TREE;
220 value_range_kind rng = vr.kind ();
221 if (rng == VR_RANGE)
223 /* Only handle straight ranges. */
224 minmax[0] = wi::to_wide (vr.min ());
225 minmax[1] = wi::to_wide (vr.max ());
226 return val;
229 return NULL_TREE;
232 class strlen_pass : public dom_walker
234 public:
235 strlen_pass (cdi_direction direction)
236 : dom_walker (direction),
237 ptr_qry (&m_ranger, &var_cache),
238 var_cache (),
239 m_cleanup_cfg (false)
243 ~strlen_pass ();
245 virtual edge before_dom_children (basic_block);
246 virtual void after_dom_children (basic_block);
248 bool check_and_optimize_stmt (bool *cleanup_eh);
249 bool check_and_optimize_call (bool *zero_write);
250 bool handle_assign (tree lhs, bool *zero_write);
251 bool handle_store (bool *zero_write);
252 void handle_pointer_plus ();
253 void handle_builtin_strlen ();
254 void handle_builtin_strchr ();
255 void handle_builtin_strcpy (built_in_function);
256 void handle_integral_assign (bool *cleanup_eh);
257 void handle_builtin_stxncpy_strncat (bool append_p);
258 void handle_builtin_memcpy (built_in_function bcode);
259 void handle_builtin_strcat (built_in_function bcode);
260 void handle_builtin_strncat (built_in_function);
261 bool handle_builtin_memset (bool *zero_write);
262 bool handle_builtin_memcmp ();
263 bool handle_builtin_string_cmp ();
264 void handle_alloc_call (built_in_function);
265 void maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
266 strinfo *si = NULL, bool plus_one = false,
267 bool rawmem = false);
268 void maybe_warn_overflow (gimple *stmt, bool call_lhs,
269 unsigned HOST_WIDE_INT len,
270 strinfo *si = NULL,
271 bool plus_one = false, bool rawmem = false);
272 void adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat);
273 tree strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
274 tree arg2, int idx2,
275 unsigned HOST_WIDE_INT bound,
276 unsigned HOST_WIDE_INT len[2],
277 unsigned HOST_WIDE_INT *psize);
278 bool count_nonzero_bytes (tree expr_or_type,
279 gimple *stmt,
280 unsigned lenrange[3], bool *nulterm,
281 bool *allnul, bool *allnonnul);
282 bool count_nonzero_bytes (tree exp,
283 gimple *stmt,
284 unsigned HOST_WIDE_INT offset,
285 unsigned HOST_WIDE_INT nbytes,
286 unsigned lenrange[3], bool *nulterm,
287 bool *allnul, bool *allnonnul,
288 ssa_name_limit_t &snlim);
289 bool count_nonzero_bytes_addr (tree exp,
290 gimple *stmt,
291 unsigned HOST_WIDE_INT offset,
292 unsigned HOST_WIDE_INT nbytes,
293 unsigned lenrange[3], bool *nulterm,
294 bool *allnul, bool *allnonnul,
295 ssa_name_limit_t &snlim);
296 bool get_len_or_size (gimple *stmt, tree arg, int idx,
297 unsigned HOST_WIDE_INT lenrng[2],
298 unsigned HOST_WIDE_INT *size, bool *nulterm);
300 gimple_ranger m_ranger;
302 /* A pointer_query object and its cache to store information about
303 pointers and their targets in. */
304 pointer_query ptr_qry;
305 pointer_query::cache_type var_cache;
307 gimple_stmt_iterator m_gsi;
309 /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
310 execute function. */
311 bool m_cleanup_cfg;
314 /* Return:
316 * +1 if SI is known to start with more than OFF nonzero characters.
318 * 0 if SI is known to start with exactly OFF nonzero characters.
320 * -1 if SI either does not start with OFF nonzero characters
321 or the relationship between the number of leading nonzero
322 characters in SI and OFF is unknown. */
324 static int
325 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
327 if (si->nonzero_chars
328 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
329 return compare_tree_int (si->nonzero_chars, off);
330 else
331 return -1;
334 /* Same as above but suitable also for strings with non-constant lengths.
335 Uses RVALS to determine length range. */
337 static int
338 compare_nonzero_chars (strinfo *si, gimple *stmt,
339 unsigned HOST_WIDE_INT off,
340 range_query *rvals)
342 if (!si->nonzero_chars)
343 return -1;
345 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
346 return compare_tree_int (si->nonzero_chars, off);
348 if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
349 return -1;
351 value_range vr;
352 if (!rvals->range_of_expr (vr, si->nonzero_chars, stmt))
353 return -1;
354 value_range_kind rng = vr.kind ();
355 if (rng != VR_RANGE)
356 return -1;
358 /* If the offset is less than the minimum length or if the bounds
359 of the length range are equal return the result of the comparison
360 same as in the constant case. Otherwise return a conservative
361 result. */
362 int cmpmin = compare_tree_int (vr.min (), off);
363 if (cmpmin > 0 || tree_int_cst_equal (vr.min (), vr.max ()))
364 return cmpmin;
366 return -1;
369 /* Return true if SI is known to be a zero-length string. */
371 static inline bool
372 zero_length_string_p (strinfo *si)
374 return si->full_string_p && integer_zerop (si->nonzero_chars);
377 /* Return strinfo vector entry IDX. */
379 static inline strinfo *
380 get_strinfo (int idx)
382 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
383 return NULL;
384 return (*stridx_to_strinfo)[idx];
387 /* Get the next strinfo in the chain after SI, or null if none. */
389 static inline strinfo *
390 get_next_strinfo (strinfo *si)
392 if (si->next == 0)
393 return NULL;
394 strinfo *nextsi = get_strinfo (si->next);
395 if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
396 return NULL;
397 return nextsi;
400 /* Helper function for get_stridx. Return the strinfo index of the address
401 of EXP, which is available in PTR if nonnull. If OFFSET_OUT, it is
402 OK to return the index for some X <= &EXP and store &EXP - X in
403 *OFFSET_OUT. When RVALS is nonnull uses it to determine range
404 information. */
406 static int
407 get_addr_stridx (tree exp, gimple *stmt,
408 tree ptr, unsigned HOST_WIDE_INT *offset_out,
409 range_query *rvals = NULL)
411 HOST_WIDE_INT off;
412 struct stridxlist *list, *last = NULL;
413 tree base;
415 if (!decl_to_stridxlist_htab)
416 return 0;
418 poly_int64 poff;
419 base = get_addr_base_and_unit_offset (exp, &poff);
420 if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
421 return 0;
423 list = decl_to_stridxlist_htab->get (base);
424 if (list == NULL)
425 return 0;
429 if (list->offset == off)
431 if (offset_out)
432 *offset_out = 0;
433 return list->idx;
435 if (list->offset > off)
436 return 0;
437 last = list;
438 list = list->next;
440 while (list);
442 if ((offset_out || ptr) && last && last->idx > 0)
444 unsigned HOST_WIDE_INT rel_off
445 = (unsigned HOST_WIDE_INT) off - last->offset;
446 strinfo *si = get_strinfo (last->idx);
447 if (si && compare_nonzero_chars (si, stmt, rel_off, rvals) >= 0)
449 if (offset_out)
451 *offset_out = rel_off;
452 return last->idx;
454 else
455 return get_stridx_plus_constant (si, rel_off, ptr);
458 return 0;
461 /* Returns string index for EXP. When EXP is an SSA_NAME that refers
462 to a known strinfo with an offset and OFFRNG is non-null, sets
463 both elements of the OFFRNG array to the range of the offset and
464 returns the index of the known strinfo. In this case the result
465 must not be used in for functions that modify the string.
466 When nonnull, uses RVALS to determine range information. */
468 static int
469 get_stridx (tree exp, gimple *stmt,
470 wide_int offrng[2] = NULL, range_query *rvals = NULL)
472 if (offrng)
473 offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
475 if (TREE_CODE (exp) == SSA_NAME)
477 if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
478 return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
480 tree e = exp;
481 int last_idx = 0;
482 HOST_WIDE_INT offset = 0;
483 /* Follow a chain of at most 5 assignments. */
484 for (int i = 0; i < 5; i++)
486 gimple *def_stmt = SSA_NAME_DEF_STMT (e);
487 if (!is_gimple_assign (def_stmt))
488 return last_idx;
490 tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
491 tree ptr, off;
493 if (rhs_code == ADDR_EXPR)
495 /* Handle indices/offsets into VLAs which are implemented
496 as pointers to arrays. */
497 ptr = gimple_assign_rhs1 (def_stmt);
498 ptr = TREE_OPERAND (ptr, 0);
500 /* Handle also VLAs of types larger than char. */
501 if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
503 if (TREE_CODE (ptr) == ARRAY_REF)
505 off = TREE_OPERAND (ptr, 1);
506 ptr = TREE_OPERAND (ptr, 0);
507 if (!integer_onep (eltsize))
509 /* Scale the array index by the size of the element
510 type in the rare case that it's greater than
511 the typical 1 for char, making sure both operands
512 have the same type. */
513 eltsize = fold_convert (ssizetype, eltsize);
514 off = fold_convert (ssizetype, off);
515 off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
518 else
519 off = integer_zero_node;
521 else
522 return 0;
524 if (TREE_CODE (ptr) != MEM_REF)
525 return 0;
527 /* Add the MEM_REF byte offset. */
528 tree mem_off = TREE_OPERAND (ptr, 1);
529 off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
530 ptr = TREE_OPERAND (ptr, 0);
532 else if (rhs_code == POINTER_PLUS_EXPR)
534 ptr = gimple_assign_rhs1 (def_stmt);
535 off = gimple_assign_rhs2 (def_stmt);
537 else
538 return 0;
540 if (TREE_CODE (ptr) != SSA_NAME)
541 return 0;
543 if (!tree_fits_shwi_p (off))
545 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
546 if (offrng)
548 /* Only when requested by setting OFFRNG to non-null,
549 return the index corresponding to the SSA_NAME.
550 Do this irrespective of the whether the offset
551 is known. */
552 if (get_range (off, def_stmt, offrng, rvals))
554 /* When the offset range is known, increment it
555 it by the constant offset computed in prior
556 iterations and store it in the OFFRNG array. */
557 offrng[0] += offset;
558 offrng[1] += offset;
560 else
562 /* When the offset range cannot be determined
563 store [0, SIZE_MAX] and let the caller decide
564 if the offset matters. */
565 offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
566 offrng[0] = wi::zero (offrng[1].get_precision ());
568 return idx;
570 return 0;
573 HOST_WIDE_INT this_off = tree_to_shwi (off);
574 if (offrng)
576 offrng[0] += wi::shwi (this_off, offrng->get_precision ());
577 offrng[1] += offrng[0];
580 if (this_off < 0)
581 return last_idx;
583 offset = (unsigned HOST_WIDE_INT) offset + this_off;
584 if (offset < 0)
585 return last_idx;
587 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
589 strinfo *si = get_strinfo (idx);
590 if (si)
592 if (compare_nonzero_chars (si, offset) >= 0)
593 return get_stridx_plus_constant (si, offset, exp);
595 if (offrng)
596 last_idx = idx;
599 e = ptr;
602 return last_idx;
605 if (TREE_CODE (exp) == ADDR_EXPR)
607 int idx = get_addr_stridx (TREE_OPERAND (exp, 0), stmt, exp, NULL);
608 if (idx != 0)
609 return idx;
612 const char *p = c_getstr (exp);
613 if (p)
614 return ~(int) strlen (p);
616 return 0;
619 /* Return true if strinfo vector is shared with the immediate dominator. */
621 static inline bool
622 strinfo_shared (void)
624 return vec_safe_length (stridx_to_strinfo)
625 && (*stridx_to_strinfo)[0] != NULL;
628 /* Unshare strinfo vector that is shared with the immediate dominator. */
630 static void
631 unshare_strinfo_vec (void)
633 strinfo *si;
634 unsigned int i = 0;
636 gcc_assert (strinfo_shared ());
637 stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
638 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
639 if (si != NULL)
640 si->refcount++;
641 (*stridx_to_strinfo)[0] = NULL;
644 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
645 Return a pointer to the location where the string index can
646 be stored (if 0) or is stored, or NULL if this can't be tracked. */
648 static int *
649 addr_stridxptr (tree exp)
651 HOST_WIDE_INT off;
653 poly_int64 poff;
654 tree base = get_addr_base_and_unit_offset (exp, &poff);
655 if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
656 return NULL;
658 if (!decl_to_stridxlist_htab)
660 decl_to_stridxlist_htab
661 = new hash_map<tree_decl_hash, stridxlist> (64);
662 gcc_obstack_init (&stridx_obstack);
665 bool existed;
666 stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
667 if (existed)
669 int i;
670 stridxlist *before = NULL;
671 for (i = 0; i < 32; i++)
673 if (list->offset == off)
674 return &list->idx;
675 if (list->offset > off && before == NULL)
676 before = list;
677 if (list->next == NULL)
678 break;
679 list = list->next;
681 if (i == 32)
682 return NULL;
683 if (before)
685 list = before;
686 before = XOBNEW (&stridx_obstack, struct stridxlist);
687 *before = *list;
688 list->next = before;
689 list->offset = off;
690 list->idx = 0;
691 return &list->idx;
693 list->next = XOBNEW (&stridx_obstack, struct stridxlist);
694 list = list->next;
697 list->next = NULL;
698 list->offset = off;
699 list->idx = 0;
700 return &list->idx;
703 /* Create a new string index, or return 0 if reached limit. */
705 static int
706 new_stridx (tree exp)
708 int idx;
709 if (max_stridx >= param_max_tracked_strlens)
710 return 0;
711 if (TREE_CODE (exp) == SSA_NAME)
713 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
714 return 0;
715 idx = max_stridx++;
716 ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
717 return idx;
719 if (TREE_CODE (exp) == ADDR_EXPR)
721 int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
722 if (pidx != NULL)
724 gcc_assert (*pidx == 0);
725 *pidx = max_stridx++;
726 return *pidx;
729 return 0;
732 /* Like new_stridx, but for ADDR_EXPR's operand instead. */
734 static int
735 new_addr_stridx (tree exp)
737 int *pidx;
738 if (max_stridx >= param_max_tracked_strlens)
739 return 0;
740 pidx = addr_stridxptr (exp);
741 if (pidx != NULL)
743 gcc_assert (*pidx == 0);
744 *pidx = max_stridx++;
745 return *pidx;
747 return 0;
750 /* Create a new strinfo. */
752 static strinfo *
753 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
755 strinfo *si = strinfo_pool.allocate ();
756 si->nonzero_chars = nonzero_chars;
757 STRIP_USELESS_TYPE_CONVERSION (ptr);
758 si->ptr = ptr;
759 si->stmt = NULL;
760 si->alloc = NULL;
761 si->endptr = NULL_TREE;
762 si->refcount = 1;
763 si->idx = idx;
764 si->first = 0;
765 si->prev = 0;
766 si->next = 0;
767 si->writable = false;
768 si->dont_invalidate = false;
769 si->full_string_p = full_string_p;
770 return si;
773 /* Decrease strinfo refcount and free it if not referenced anymore. */
775 static inline void
776 free_strinfo (strinfo *si)
778 if (si && --si->refcount == 0)
779 strinfo_pool.remove (si);
782 /* Set strinfo in the vector entry IDX to SI. */
784 static inline void
785 set_strinfo (int idx, strinfo *si)
787 if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
788 unshare_strinfo_vec ();
789 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
790 vec_safe_grow_cleared (stridx_to_strinfo, idx + 1, true);
791 (*stridx_to_strinfo)[idx] = si;
794 /* Return the first strinfo in the related strinfo chain
795 if all strinfos in between belong to the chain, otherwise NULL. */
797 static strinfo *
798 verify_related_strinfos (strinfo *origsi)
800 strinfo *si = origsi, *psi;
802 if (origsi->first == 0)
803 return NULL;
804 for (; si->prev; si = psi)
806 if (si->first != origsi->first)
807 return NULL;
808 psi = get_strinfo (si->prev);
809 if (psi == NULL)
810 return NULL;
811 if (psi->next != si->idx)
812 return NULL;
814 if (si->idx != si->first)
815 return NULL;
816 return si;
819 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
820 Use LOC for folding. */
822 static void
823 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
825 si->endptr = endptr;
826 si->stmt = NULL;
827 tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
828 tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
829 si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
830 end_as_size, start_as_size);
831 si->full_string_p = true;
834 /* Return the string length, or NULL if it can't be computed.
835 The length may but need not be constant. Instead, it might be
836 the result of a strlen() call. */
838 static tree
839 get_string_length (strinfo *si)
841 /* If the length has already been computed return it if it's exact
842 (i.e., the string is nul-terminated at NONZERO_CHARS), or return
843 null if it isn't. */
844 if (si->nonzero_chars)
845 return si->full_string_p ? si->nonzero_chars : NULL;
847 /* If the string is the result of one of the built-in calls below
848 attempt to compute the length from the call statement. */
849 if (si->stmt)
851 gimple *stmt = si->stmt, *lenstmt;
852 tree callee, lhs, fn, tem;
853 location_t loc;
854 gimple_stmt_iterator gsi;
856 gcc_assert (is_gimple_call (stmt));
857 callee = gimple_call_fndecl (stmt);
858 gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
859 lhs = gimple_call_lhs (stmt);
860 /* unshare_strinfo is intentionally not called here. The (delayed)
861 transformation of strcpy or strcat into stpcpy is done at the place
862 of the former strcpy/strcat call and so can affect all the strinfos
863 with the same stmt. If they were unshared before and transformation
864 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
865 just compute the right length. */
866 switch (DECL_FUNCTION_CODE (callee))
868 case BUILT_IN_STRCAT:
869 case BUILT_IN_STRCAT_CHK:
870 gsi = gsi_for_stmt (stmt);
871 fn = builtin_decl_implicit (BUILT_IN_STRLEN);
872 gcc_assert (lhs == NULL_TREE);
873 tem = unshare_expr (gimple_call_arg (stmt, 0));
874 lenstmt = gimple_build_call (fn, 1, tem);
875 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
876 gimple_call_set_lhs (lenstmt, lhs);
877 gimple_set_vuse (lenstmt, gimple_vuse (stmt));
878 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
879 tem = gimple_call_arg (stmt, 0);
880 if (!ptrofftype_p (TREE_TYPE (lhs)))
882 lhs = convert_to_ptrofftype (lhs);
883 lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
884 true, GSI_SAME_STMT);
886 lenstmt = gimple_build_assign
887 (make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
888 POINTER_PLUS_EXPR,tem, lhs);
889 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
890 gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
891 lhs = NULL_TREE;
892 /* FALLTHRU */
893 case BUILT_IN_STRCPY:
894 case BUILT_IN_STRCPY_CHK:
895 gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
896 if (gimple_call_num_args (stmt) == 2)
897 fn = builtin_decl_implicit (BUILT_IN_STPCPY);
898 else
899 fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
900 gcc_assert (lhs == NULL_TREE);
901 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
903 fprintf (dump_file, "Optimizing: ");
904 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
906 gimple_call_set_fndecl (stmt, fn);
907 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
908 gimple_call_set_lhs (stmt, lhs);
909 update_stmt (stmt);
910 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
912 fprintf (dump_file, "into: ");
913 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
915 /* FALLTHRU */
916 case BUILT_IN_STPCPY:
917 case BUILT_IN_STPCPY_CHK:
918 gcc_assert (lhs != NULL_TREE);
919 loc = gimple_location (stmt);
920 set_endptr_and_length (loc, si, lhs);
921 for (strinfo *chainsi = verify_related_strinfos (si);
922 chainsi != NULL;
923 chainsi = get_next_strinfo (chainsi))
924 if (chainsi->nonzero_chars == NULL)
925 set_endptr_and_length (loc, chainsi, lhs);
926 break;
927 case BUILT_IN_ALLOCA:
928 case BUILT_IN_ALLOCA_WITH_ALIGN:
929 case BUILT_IN_MALLOC:
930 break;
931 /* BUILT_IN_CALLOC always has si->nonzero_chars set. */
932 default:
933 gcc_unreachable ();
934 break;
938 return si->nonzero_chars;
941 /* Dump strlen data to FP for statement STMT. When non-null, RVALS
942 points to the valuation engine used to calculate ranges, and is
943 used to dump strlen range for non-constant results. */
945 DEBUG_FUNCTION void
946 dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
948 if (stmt)
950 fprintf (fp, "\nDumping strlen pass data after ");
951 print_gimple_expr (fp, stmt, TDF_LINENO);
952 fputc ('\n', fp);
954 else
955 fprintf (fp, "\nDumping strlen pass data\n");
957 fprintf (fp, "max_stridx = %i\n", max_stridx);
958 fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
959 ssa_ver_to_stridx.length ());
960 fprintf (fp, "stridx_to_strinfo");
961 if (stridx_to_strinfo)
963 fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
964 for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
966 if (strinfo *si = (*stridx_to_strinfo)[i])
968 if (!si->idx)
969 continue;
970 fprintf (fp, " idx = %i", si->idx);
971 if (si->ptr)
973 fprintf (fp, ", ptr = ");
974 print_generic_expr (fp, si->ptr);
977 if (si->nonzero_chars)
979 fprintf (fp, ", nonzero_chars = ");
980 print_generic_expr (fp, si->nonzero_chars);
981 if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
983 value_range_kind rng = VR_UNDEFINED;
984 wide_int min, max;
985 if (rvals)
987 value_range vr;
988 rvals->range_of_expr (vr, si->nonzero_chars,
989 si->stmt);
990 rng = vr.kind ();
991 if (range_int_cst_p (&vr))
993 min = wi::to_wide (vr.min ());
994 max = wi::to_wide (vr.max ());
996 else
997 rng = VR_UNDEFINED;
999 else
1001 value_range vr;
1002 get_range_query (cfun)
1003 ->range_of_expr (vr, si->nonzero_chars);
1004 rng = vr.kind ();
1005 if (!vr.undefined_p ())
1007 min = wi::to_wide (vr.min ());
1008 max = wi::to_wide (vr.max ());
1012 if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
1014 fprintf (fp, " %s[%llu, %llu]",
1015 rng == VR_RANGE ? "" : "~",
1016 (long long) min.to_uhwi (),
1017 (long long) max.to_uhwi ());
1022 fprintf (fp, ", refcount = %i", si->refcount);
1023 if (si->stmt)
1025 fprintf (fp, ", stmt = ");
1026 print_gimple_expr (fp, si->stmt, 0);
1028 if (si->alloc)
1030 fprintf (fp, ", alloc = ");
1031 print_gimple_expr (fp, si->alloc, 0);
1033 if (si->writable)
1034 fprintf (fp, ", writable");
1035 if (si->dont_invalidate)
1036 fprintf (fp, ", dont_invalidate");
1037 if (si->full_string_p)
1038 fprintf (fp, ", full_string_p");
1039 if (strinfo *next = get_next_strinfo (si))
1041 fprintf (fp, ", {");
1043 fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
1044 while ((next = get_next_strinfo (next)));
1045 fprintf (fp, "}");
1047 fputs ("\n", fp);
1051 else
1052 fprintf (fp, " = null\n");
1054 fprintf (fp, "decl_to_stridxlist_htab");
1055 if (decl_to_stridxlist_htab)
1057 fputs ("\n", fp);
1058 typedef decl_to_stridxlist_htab_t::iterator iter_t;
1059 for (iter_t it = decl_to_stridxlist_htab->begin ();
1060 it != decl_to_stridxlist_htab->end (); ++it)
1062 tree decl = (*it).first;
1063 stridxlist *list = &(*it).second;
1064 fprintf (fp, " decl = ");
1065 print_generic_expr (fp, decl);
1066 if (list)
1068 fprintf (fp, ", offsets = {");
1069 for (; list; list = list->next)
1070 fprintf (fp, "%lli%s", (long long) list->offset,
1071 list->next ? ", " : "");
1072 fputs ("}", fp);
1074 fputs ("\n", fp);
1077 else
1078 fprintf (fp, " = null\n");
1080 if (laststmt.stmt)
1082 fprintf (fp, "laststmt = ");
1083 print_gimple_expr (fp, laststmt.stmt, 0);
1084 fprintf (fp, ", len = ");
1085 print_generic_expr (fp, laststmt.len);
1086 fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1090 /* Attempt to determine the length of the string SRC. On success, store
1091 the length in *PDATA and return true. Otherwise, return false.
1092 VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation
1093 engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME
1094 assignment limit used to prevent runaway recursion. */
1096 static bool
1097 get_range_strlen_dynamic (tree src, gimple *stmt,
1098 c_strlen_data *pdata, bitmap *visited,
1099 range_query *rvals, unsigned *pssa_def_max)
1101 int idx = get_stridx (src, stmt);
1102 if (!idx)
1104 if (TREE_CODE (src) == SSA_NAME)
1106 gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1107 if (gimple_code (def_stmt) == GIMPLE_PHI)
1109 if (!*visited)
1110 *visited = BITMAP_ALLOC (NULL);
1112 if (!bitmap_set_bit (*visited, SSA_NAME_VERSION (src)))
1113 return true;
1115 if (*pssa_def_max == 0)
1116 return false;
1118 --*pssa_def_max;
1120 /* Iterate over the PHI arguments and determine the minimum
1121 and maximum length/size of each and incorporate them into
1122 the overall result. */
1123 gphi *phi = as_a <gphi *> (def_stmt);
1124 for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1126 tree arg = gimple_phi_arg_def (phi, i);
1127 if (arg == gimple_phi_result (def_stmt))
1128 continue;
1130 c_strlen_data argdata = { };
1131 if (get_range_strlen_dynamic (arg, phi, &argdata, visited,
1132 rvals, pssa_def_max))
1134 /* Set the DECL of an unterminated array this argument
1135 refers to if one hasn't been found yet. */
1136 if (!pdata->decl && argdata.decl)
1137 pdata->decl = argdata.decl;
1139 if (!argdata.minlen
1140 || (integer_zerop (argdata.minlen)
1141 && (!argdata.maxbound
1142 || integer_all_onesp (argdata.maxbound))
1143 && integer_all_onesp (argdata.maxlen)))
1145 /* Set the upper bound of the length to unbounded. */
1146 pdata->maxlen = build_all_ones_cst (size_type_node);
1147 continue;
1150 /* Adjust the minimum and maximum length determined
1151 so far and the upper bound on the array size. */
1152 if (!pdata->minlen
1153 || tree_int_cst_lt (argdata.minlen, pdata->minlen))
1154 pdata->minlen = argdata.minlen;
1155 if (!pdata->maxlen
1156 || (argdata.maxlen
1157 && tree_int_cst_lt (pdata->maxlen, argdata.maxlen)))
1158 pdata->maxlen = argdata.maxlen;
1159 if (!pdata->maxbound
1160 || TREE_CODE (pdata->maxbound) != INTEGER_CST
1161 || (argdata.maxbound
1162 && tree_int_cst_lt (pdata->maxbound,
1163 argdata.maxbound)
1164 && !integer_all_onesp (argdata.maxbound)))
1165 pdata->maxbound = argdata.maxbound;
1167 else
1168 pdata->maxlen = build_all_ones_cst (size_type_node);
1171 return true;
1175 /* Return success regardless of the result and handle *PDATA
1176 in the caller. */
1177 get_range_strlen (src, pdata, 1);
1178 return true;
1181 if (idx < 0)
1183 /* SRC is a string of constant length. */
1184 pdata->minlen = build_int_cst (size_type_node, ~idx);
1185 pdata->maxlen = pdata->minlen;
1186 pdata->maxbound = pdata->maxlen;
1187 return true;
1190 if (strinfo *si = get_strinfo (idx))
1192 pdata->minlen = get_string_length (si);
1193 if (!pdata->minlen && si->nonzero_chars)
1195 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1196 pdata->minlen = si->nonzero_chars;
1197 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1199 value_range vr;
1200 rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
1201 if (range_int_cst_p (&vr))
1203 pdata->minlen = vr.min ();
1204 pdata->maxlen = vr.max ();
1206 else
1207 pdata->minlen = build_zero_cst (size_type_node);
1209 else
1210 pdata->minlen = build_zero_cst (size_type_node);
1212 tree base = si->ptr;
1213 if (TREE_CODE (base) == ADDR_EXPR)
1214 base = TREE_OPERAND (base, 0);
1216 HOST_WIDE_INT off;
1217 poly_int64 poff;
1218 base = get_addr_base_and_unit_offset (base, &poff);
1219 if (base
1220 && DECL_P (base)
1221 && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1222 && TYPE_SIZE_UNIT (TREE_TYPE (base))
1223 && poff.is_constant (&off))
1225 tree basetype = TREE_TYPE (base);
1226 tree size = TYPE_SIZE_UNIT (basetype);
1227 if (TREE_CODE (size) == INTEGER_CST)
1229 ++off; /* Increment for the terminating nul. */
1230 tree toffset = build_int_cst (size_type_node, off);
1231 pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1232 toffset);
1233 pdata->maxbound = pdata->maxlen;
1235 else
1236 pdata->maxlen = build_all_ones_cst (size_type_node);
1238 else
1239 pdata->maxlen = build_all_ones_cst (size_type_node);
1241 else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1243 value_range vr;
1244 rvals->range_of_expr (vr, si->nonzero_chars, stmt);
1245 if (range_int_cst_p (&vr))
1247 pdata->minlen = vr.min ();
1248 pdata->maxlen = vr.max ();
1249 pdata->maxbound = pdata->maxlen;
1251 else
1253 pdata->minlen = build_zero_cst (size_type_node);
1254 pdata->maxlen = build_all_ones_cst (size_type_node);
1257 else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1259 pdata->maxlen = pdata->minlen;
1260 pdata->maxbound = pdata->minlen;
1262 else
1264 /* For PDATA->MINLEN that's a non-constant expression such
1265 as PLUS_EXPR whose value range is unknown, set the bounds
1266 to zero and SIZE_MAX. */
1267 pdata->minlen = build_zero_cst (size_type_node);
1268 pdata->maxlen = build_all_ones_cst (size_type_node);
1271 return true;
1274 return false;
1277 /* Analogous to get_range_strlen but for dynamically created strings,
1278 i.e., those created by calls to strcpy as opposed to just string
1279 constants.
1280 Try to obtain the range of the lengths of the string(s) referenced
1281 by SRC, or the size of the largest array SRC refers to if the range
1282 of lengths cannot be determined, and store all in *PDATA. RVALS
1283 points to the valuation engine used to calculate ranges. */
1285 void
1286 get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1287 range_query *rvals)
1289 bitmap visited = NULL;
1290 tree maxbound = pdata->maxbound;
1292 unsigned limit = param_ssa_name_def_chain_limit;
1293 if (!get_range_strlen_dynamic (src, stmt, pdata, &visited, rvals, &limit))
1295 /* On failure extend the length range to an impossible maximum
1296 (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other
1297 members can stay unchanged regardless. */
1298 pdata->minlen = ssize_int (0);
1299 pdata->maxlen = build_all_ones_cst (size_type_node);
1301 else if (!pdata->minlen)
1302 pdata->minlen = ssize_int (0);
1304 /* If it's unchanged from it initial non-null value, set the conservative
1305 MAXBOUND to SIZE_MAX. Otherwise leave it null (if it is null). */
1306 if (maxbound && pdata->maxbound == maxbound)
1307 pdata->maxbound = build_all_ones_cst (size_type_node);
1309 if (visited)
1310 BITMAP_FREE (visited);
1313 /* Invalidate string length information for strings whose length might
1314 change due to stores in STMT, except those marked DONT_INVALIDATE.
1315 For string-modifying statements, ZERO_WRITE is set when the statement
1316 wrote only zeros.
1317 Returns true if any STRIDX_TO_STRINFO entries were considered
1318 for invalidation. */
1320 static bool
1321 maybe_invalidate (gimple *stmt, bool zero_write = false)
1323 if (dump_file && (dump_flags & TDF_DETAILS))
1325 fprintf (dump_file, "%s called for ", __func__);
1326 print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1329 strinfo *si;
1330 bool nonempty = false;
1332 for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1334 if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1335 continue;
1337 nonempty = true;
1339 /* Unconditionally reset DONT_INVALIDATE. */
1340 bool dont_invalidate = si->dont_invalidate;
1341 si->dont_invalidate = false;
1343 if (dont_invalidate)
1344 continue;
1346 ao_ref r;
1347 tree size = si->nonzero_chars;
1348 ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1349 /* Include the terminating nul in the size of the string
1350 to consider when determining possible clobber. But do not
1351 add it to 'size' since we don't know whether it would
1352 actually fit the allocated area. */
1353 if (known_size_p (r.size))
1355 if (known_le (r.size, HOST_WIDE_INT_MAX - BITS_PER_UNIT))
1356 r.max_size += BITS_PER_UNIT;
1357 else
1358 r.max_size = -1;
1360 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1362 if (dump_file && (dump_flags & TDF_DETAILS))
1364 fputs (" statement may clobber object ", dump_file);
1365 print_generic_expr (dump_file, si->ptr);
1366 if (size && tree_fits_uhwi_p (size))
1367 fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1368 " bytes in size", tree_to_uhwi (size));
1369 fputc ('\n', dump_file);
1372 set_strinfo (i, NULL);
1373 free_strinfo (si);
1374 continue;
1377 if (size
1378 && !zero_write
1379 && si->stmt
1380 && is_gimple_call (si->stmt)
1381 && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1382 == BUILT_IN_CALLOC))
1384 /* If the clobber test above considered the length of
1385 the string (including the nul), then for (potentially)
1386 non-zero writes that might modify storage allocated by
1387 calloc consider the whole object and if it might be
1388 clobbered by the statement reset the statement. */
1389 ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1390 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1391 si->stmt = NULL;
1395 if (dump_file && (dump_flags & TDF_DETAILS))
1396 fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1398 return nonempty;
1401 /* Unshare strinfo record SI, if it has refcount > 1 or
1402 if stridx_to_strinfo vector is shared with some other
1403 bbs. */
1405 static strinfo *
1406 unshare_strinfo (strinfo *si)
1408 strinfo *nsi;
1410 if (si->refcount == 1 && !strinfo_shared ())
1411 return si;
1413 nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1414 nsi->stmt = si->stmt;
1415 nsi->alloc = si->alloc;
1416 nsi->endptr = si->endptr;
1417 nsi->first = si->first;
1418 nsi->prev = si->prev;
1419 nsi->next = si->next;
1420 nsi->writable = si->writable;
1421 set_strinfo (si->idx, nsi);
1422 free_strinfo (si);
1423 return nsi;
1426 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1427 strinfo if there is any. Return it's idx, or 0 if no strinfo has
1428 been created. */
1430 static int
1431 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1432 tree ptr)
1434 if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1435 return 0;
1437 if (compare_nonzero_chars (basesi, off) < 0
1438 || !tree_fits_uhwi_p (basesi->nonzero_chars))
1439 return 0;
1441 unsigned HOST_WIDE_INT nonzero_chars
1442 = tree_to_uhwi (basesi->nonzero_chars) - off;
1443 strinfo *si = basesi, *chainsi;
1444 if (si->first || si->prev || si->next)
1445 si = verify_related_strinfos (basesi);
1446 if (si == NULL
1447 || si->nonzero_chars == NULL_TREE
1448 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1449 return 0;
1451 if (TREE_CODE (ptr) == SSA_NAME
1452 && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1453 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1455 gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1456 for (chainsi = si; chainsi->next; chainsi = si)
1458 si = get_next_strinfo (chainsi);
1459 if (si == NULL
1460 || si->nonzero_chars == NULL_TREE
1461 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1462 break;
1463 int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1464 if (r != 1)
1466 if (r == 0)
1468 if (TREE_CODE (ptr) == SSA_NAME)
1469 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1470 else
1472 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1473 if (pidx != NULL && *pidx == 0)
1474 *pidx = si->idx;
1476 return si->idx;
1478 break;
1482 int idx = new_stridx (ptr);
1483 if (idx == 0)
1484 return 0;
1485 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1486 basesi->full_string_p);
1487 set_strinfo (idx, si);
1488 if (strinfo *nextsi = get_strinfo (chainsi->next))
1490 nextsi = unshare_strinfo (nextsi);
1491 si->next = nextsi->idx;
1492 nextsi->prev = idx;
1494 chainsi = unshare_strinfo (chainsi);
1495 if (chainsi->first == 0)
1496 chainsi->first = chainsi->idx;
1497 chainsi->next = idx;
1498 if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1499 chainsi->endptr = ptr;
1500 si->endptr = chainsi->endptr;
1501 si->prev = chainsi->idx;
1502 si->first = chainsi->first;
1503 si->writable = chainsi->writable;
1504 return si->idx;
1507 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1508 to a zero-length string and if possible chain it to a related strinfo
1509 chain whose part is or might be CHAINSI. */
1511 static strinfo *
1512 zero_length_string (tree ptr, strinfo *chainsi)
1514 strinfo *si;
1515 int idx;
1516 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1517 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1518 gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1519 && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1521 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1522 return NULL;
1523 if (chainsi != NULL)
1525 si = verify_related_strinfos (chainsi);
1526 if (si)
1530 /* We shouldn't mix delayed and non-delayed lengths. */
1531 gcc_assert (si->full_string_p);
1532 if (si->endptr == NULL_TREE)
1534 si = unshare_strinfo (si);
1535 si->endptr = ptr;
1537 chainsi = si;
1538 si = get_next_strinfo (si);
1540 while (si != NULL);
1541 if (zero_length_string_p (chainsi))
1543 if (chainsi->next)
1545 chainsi = unshare_strinfo (chainsi);
1546 chainsi->next = 0;
1548 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1549 return chainsi;
1552 else
1554 /* We shouldn't mix delayed and non-delayed lengths. */
1555 gcc_assert (chainsi->full_string_p);
1556 if (chainsi->first || chainsi->prev || chainsi->next)
1558 chainsi = unshare_strinfo (chainsi);
1559 chainsi->first = 0;
1560 chainsi->prev = 0;
1561 chainsi->next = 0;
1565 idx = new_stridx (ptr);
1566 if (idx == 0)
1567 return NULL;
1568 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1569 set_strinfo (idx, si);
1570 si->endptr = ptr;
1571 if (chainsi != NULL)
1573 chainsi = unshare_strinfo (chainsi);
1574 if (chainsi->first == 0)
1575 chainsi->first = chainsi->idx;
1576 chainsi->next = idx;
1577 if (chainsi->endptr == NULL_TREE)
1578 chainsi->endptr = ptr;
1579 si->prev = chainsi->idx;
1580 si->first = chainsi->first;
1581 si->writable = chainsi->writable;
1583 return si;
1586 /* For strinfo ORIGSI whose length has been just updated, adjust other
1587 related strinfos so that they match the new ORIGSI. This involves:
1589 - adding ADJ to the nonzero_chars fields
1590 - copying full_string_p from the new ORIGSI. */
1592 static void
1593 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1595 strinfo *si = verify_related_strinfos (origsi);
1597 if (si == NULL)
1598 return;
1600 while (1)
1602 strinfo *nsi;
1604 if (si != origsi)
1606 tree tem;
1608 si = unshare_strinfo (si);
1609 /* We shouldn't see delayed lengths here; the caller must
1610 have calculated the old length in order to calculate
1611 the adjustment. */
1612 gcc_assert (si->nonzero_chars);
1613 tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1614 si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1615 TREE_TYPE (si->nonzero_chars),
1616 si->nonzero_chars, tem);
1617 si->full_string_p = origsi->full_string_p;
1619 si->endptr = NULL_TREE;
1620 si->dont_invalidate = true;
1622 nsi = get_next_strinfo (si);
1623 if (nsi == NULL)
1624 return;
1625 si = nsi;
1629 /* Find if there are other SSA_NAME pointers equal to PTR
1630 for which we don't track their string lengths yet. If so, use
1631 IDX for them. */
1633 static void
1634 find_equal_ptrs (tree ptr, int idx)
1636 if (TREE_CODE (ptr) != SSA_NAME)
1637 return;
1638 while (1)
1640 gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1641 if (!is_gimple_assign (stmt))
1642 return;
1643 ptr = gimple_assign_rhs1 (stmt);
1644 switch (gimple_assign_rhs_code (stmt))
1646 case SSA_NAME:
1647 break;
1648 CASE_CONVERT:
1649 if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1650 return;
1651 if (TREE_CODE (ptr) == SSA_NAME)
1652 break;
1653 if (TREE_CODE (ptr) != ADDR_EXPR)
1654 return;
1655 /* FALLTHRU */
1656 case ADDR_EXPR:
1658 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1659 if (pidx != NULL && *pidx == 0)
1660 *pidx = idx;
1661 return;
1663 default:
1664 return;
1667 /* We might find an endptr created in this pass. Grow the
1668 vector in that case. */
1669 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1670 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1672 if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1673 return;
1674 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1678 /* Return true if STMT is a call to a builtin function with the right
1679 arguments and attributes that should be considered for optimization
1680 by this pass. */
1682 static bool
1683 valid_builtin_call (gimple *stmt)
1685 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1686 return false;
1688 tree callee = gimple_call_fndecl (stmt);
1689 tree decl = builtin_decl_explicit (DECL_FUNCTION_CODE (callee));
1690 if (decl
1691 && decl != callee
1692 && !gimple_builtin_call_types_compatible_p (stmt, decl))
1693 return false;
1695 switch (DECL_FUNCTION_CODE (callee))
1697 case BUILT_IN_MEMCMP:
1698 case BUILT_IN_MEMCMP_EQ:
1699 case BUILT_IN_STRCMP:
1700 case BUILT_IN_STRNCMP:
1701 case BUILT_IN_STRCHR:
1702 case BUILT_IN_STRLEN:
1703 case BUILT_IN_STRNLEN:
1704 /* The above functions should be pure. Punt if they aren't. */
1705 if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1706 return false;
1707 break;
1709 case BUILT_IN_ALLOCA:
1710 case BUILT_IN_ALLOCA_WITH_ALIGN:
1711 case BUILT_IN_CALLOC:
1712 case BUILT_IN_MALLOC:
1713 case BUILT_IN_MEMCPY:
1714 case BUILT_IN_MEMCPY_CHK:
1715 case BUILT_IN_MEMPCPY:
1716 case BUILT_IN_MEMPCPY_CHK:
1717 case BUILT_IN_MEMSET:
1718 case BUILT_IN_STPCPY:
1719 case BUILT_IN_STPCPY_CHK:
1720 case BUILT_IN_STPNCPY:
1721 case BUILT_IN_STPNCPY_CHK:
1722 case BUILT_IN_STRCAT:
1723 case BUILT_IN_STRCAT_CHK:
1724 case BUILT_IN_STRCPY:
1725 case BUILT_IN_STRCPY_CHK:
1726 case BUILT_IN_STRNCAT:
1727 case BUILT_IN_STRNCAT_CHK:
1728 case BUILT_IN_STRNCPY:
1729 case BUILT_IN_STRNCPY_CHK:
1730 /* The above functions should be neither const nor pure. Punt if they
1731 aren't. */
1732 if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1733 return false;
1734 break;
1736 default:
1737 break;
1740 return true;
1743 /* If the last .MEM setter statement before STMT is
1744 memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1745 and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1746 just memcpy (x, y, strlen (y)). SI must be the zero length
1747 strinfo. */
1749 void
1750 strlen_pass::adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1752 tree vuse, callee, len;
1753 struct laststmt_struct last = laststmt;
1754 strinfo *lastsi, *firstsi;
1755 unsigned len_arg_no = 2;
1757 laststmt.stmt = NULL;
1758 laststmt.len = NULL_TREE;
1759 laststmt.stridx = 0;
1761 if (last.stmt == NULL)
1762 return;
1764 vuse = gimple_vuse (stmt);
1765 if (vuse == NULL_TREE
1766 || SSA_NAME_DEF_STMT (vuse) != last.stmt
1767 || !has_single_use (vuse))
1768 return;
1770 gcc_assert (last.stridx > 0);
1771 lastsi = get_strinfo (last.stridx);
1772 if (lastsi == NULL)
1773 return;
1775 if (lastsi != si)
1777 if (lastsi->first == 0 || lastsi->first != si->first)
1778 return;
1780 firstsi = verify_related_strinfos (si);
1781 if (firstsi == NULL)
1782 return;
1783 while (firstsi != lastsi)
1785 firstsi = get_next_strinfo (firstsi);
1786 if (firstsi == NULL)
1787 return;
1791 if (!is_strcat && !zero_length_string_p (si))
1792 return;
1794 if (is_gimple_assign (last.stmt))
1796 gimple_stmt_iterator gsi;
1798 if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1799 return;
1800 if (stmt_could_throw_p (cfun, last.stmt))
1801 return;
1802 gsi = gsi_for_stmt (last.stmt);
1803 unlink_stmt_vdef (last.stmt);
1804 release_defs (last.stmt);
1805 gsi_remove (&gsi, true);
1806 return;
1809 if (!valid_builtin_call (last.stmt))
1810 return;
1812 callee = gimple_call_fndecl (last.stmt);
1813 switch (DECL_FUNCTION_CODE (callee))
1815 case BUILT_IN_MEMCPY:
1816 case BUILT_IN_MEMCPY_CHK:
1817 break;
1818 default:
1819 return;
1822 len = gimple_call_arg (last.stmt, len_arg_no);
1823 if (tree_fits_uhwi_p (len))
1825 if (!tree_fits_uhwi_p (last.len)
1826 || integer_zerop (len)
1827 || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1828 return;
1829 /* Don't adjust the length if it is divisible by 4, it is more efficient
1830 to store the extra '\0' in that case. */
1831 if ((tree_to_uhwi (len) & 3) == 0)
1832 return;
1834 /* Don't fold away an out of bounds access, as this defeats proper
1835 warnings. */
1836 tree dst = gimple_call_arg (last.stmt, 0);
1838 access_ref aref;
1839 tree size = compute_objsize (dst, stmt, 1, &aref, &ptr_qry);
1840 if (size && tree_int_cst_lt (size, len))
1841 return;
1843 else if (TREE_CODE (len) == SSA_NAME)
1845 gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1846 if (!is_gimple_assign (def_stmt)
1847 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1848 || gimple_assign_rhs1 (def_stmt) != last.len
1849 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1850 return;
1852 else
1853 return;
1855 gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1856 update_stmt (last.stmt);
1859 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1860 call, or when BOUND is non-null, of a strnlen() call, set LHS
1861 range info to [0, min (MAX, BOUND)] when the range includes more
1862 than one value and return LHS. Otherwise, when the range
1863 [MIN, MAX] is such that MIN == MAX, return the tree representation
1864 of (MIN). The latter allows callers to fold suitable strnlen() calls
1865 to constants. */
1867 tree
1868 set_strlen_range (tree lhs, wide_int min, wide_int max,
1869 tree bound /* = NULL_TREE */)
1871 if (TREE_CODE (lhs) != SSA_NAME
1872 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1873 return NULL_TREE;
1875 if (bound)
1877 /* For strnlen, adjust MIN and MAX as necessary. If the bound
1878 is less than the size of the array set MAX to it. It it's
1879 greater than MAX and MAX is non-zero bump MAX down to account
1880 for the necessary terminating nul. Otherwise leave it alone. */
1881 if (TREE_CODE (bound) == INTEGER_CST)
1883 wide_int wibnd = wi::to_wide (bound);
1884 int cmp = wi::cmpu (wibnd, max);
1885 if (cmp < 0)
1886 max = wibnd;
1887 else if (cmp && wi::ne_p (max, min))
1888 --max;
1890 else if (TREE_CODE (bound) == SSA_NAME)
1892 value_range r;
1893 get_range_query (cfun)->range_of_expr (r, bound);
1894 if (!r.undefined_p ())
1896 /* For a bound in a known range, adjust the range determined
1897 above as necessary. For a bound in some anti-range or
1898 in an unknown range, use the range determined by callers. */
1899 if (wi::ltu_p (r.lower_bound (), min))
1900 min = r.lower_bound ();
1901 if (wi::ltu_p (r.upper_bound (), max))
1902 max = r.upper_bound ();
1907 if (min == max)
1908 return wide_int_to_tree (size_type_node, min);
1910 set_range_info (lhs, VR_RANGE, min, max);
1911 return lhs;
1914 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1915 SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1916 a character array A[N] with unknown length bounded by N, and for
1917 strnlen(), by min (N, BOUND). */
1919 static tree
1920 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1922 if (TREE_CODE (lhs) != SSA_NAME
1923 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1924 return NULL_TREE;
1926 if (TREE_CODE (src) == SSA_NAME)
1928 gimple *def = SSA_NAME_DEF_STMT (src);
1929 if (is_gimple_assign (def)
1930 && gimple_assign_rhs_code (def) == ADDR_EXPR)
1931 src = gimple_assign_rhs1 (def);
1934 /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1935 NUL so that the difference between a pointer to just past it and
1936 one to its beginning is positive. */
1937 wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1939 if (TREE_CODE (src) == ADDR_EXPR)
1941 /* The last array member of a struct can be bigger than its size
1942 suggests if it's treated as a poor-man's flexible array member. */
1943 src = TREE_OPERAND (src, 0);
1944 if (TREE_CODE (src) != MEM_REF
1945 && !array_at_struct_end_p (src))
1947 tree type = TREE_TYPE (src);
1948 tree size = TYPE_SIZE_UNIT (type);
1949 if (size
1950 && TREE_CODE (size) == INTEGER_CST
1951 && !integer_zerop (size))
1953 /* Even though such uses of strlen would be undefined,
1954 avoid relying on arrays of arrays in case some genius
1955 decides to call strlen on an unterminated array element
1956 that's followed by a terminated one. Likewise, avoid
1957 assuming that a struct array member is necessarily
1958 nul-terminated (the nul may be in the member that
1959 follows). In those cases, assume that the length
1960 of the string stored in such an array is bounded
1961 by the size of the enclosing object if one can be
1962 determined. */
1963 tree base = get_base_address (src);
1964 if (VAR_P (base))
1966 if (tree size = DECL_SIZE_UNIT (base))
1967 if (size
1968 && TREE_CODE (size) == INTEGER_CST
1969 && TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
1970 max = wi::to_wide (size);
1974 /* For strlen() the upper bound above is equal to
1975 the longest string that can be stored in the array
1976 (i.e., it accounts for the terminating nul. For
1977 strnlen() bump up the maximum by one since the array
1978 need not be nul-terminated. */
1979 if (!bound && max != 0)
1980 --max;
1984 wide_int min = wi::zero (max.get_precision ());
1985 return set_strlen_range (lhs, min, max, bound);
1988 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
1989 either into a region allocated for the object SI when non-null,
1990 or into an object designated by the LHS of STMT otherwise.
1991 For a call STMT, when CALL_LHS is set use its left hand side
1992 as the destination, otherwise use argument zero.
1993 When nonnull uses RVALS to determine range information.
1994 RAWMEM may be set by memcpy and other raw memory functions
1995 to allow accesses across subobject boundaries. */
1997 void
1998 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
1999 strinfo *si, bool plus_one, bool rawmem)
2001 if (!len || warning_suppressed_p (stmt, OPT_Wstringop_overflow_))
2002 return;
2004 /* The DECL of the function performing the write if it is done
2005 by one. */
2006 tree writefn = NULL_TREE;
2007 /* The destination expression involved in the store or call STMT. */
2008 tree dest = NULL_TREE;
2010 if (is_gimple_assign (stmt))
2011 dest = gimple_assign_lhs (stmt);
2012 else if (is_gimple_call (stmt))
2014 if (call_lhs)
2015 dest = gimple_call_lhs (stmt);
2016 else
2018 gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL));
2019 dest = gimple_call_arg (stmt, 0);
2022 if (!dest)
2023 return;
2024 writefn = gimple_call_fndecl (stmt);
2026 else
2027 return;
2029 if (warning_suppressed_p (dest, OPT_Wstringop_overflow_))
2030 return;
2032 const int ostype = rawmem ? 0 : 1;
2034 /* Use maximum precision to avoid overflow in the addition below.
2035 Make sure all operands have the same precision to keep wide_int
2036 from ICE'ing. */
2038 access_ref aref;
2039 /* The size of the destination region (which is smaller than
2040 the destination object for stores at a non-zero offset). */
2041 tree destsize = compute_objsize (dest, stmt, ostype, &aref, &ptr_qry);
2043 if (!destsize)
2045 aref.sizrng[0] = 0;
2046 aref.sizrng[1] = wi::to_offset (max_object_size ());
2049 /* Return early if the DESTSIZE size expression is the same as LEN
2050 and the offset into the destination is zero. This might happen
2051 in the case of a pair of malloc and memset calls to allocate
2052 an object and clear it as if by calloc. */
2053 if (destsize == len && !plus_one
2054 && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
2055 return;
2057 wide_int rng[2];
2058 if (!get_range (len, stmt, rng, ptr_qry.rvals))
2059 return;
2061 widest_int lenrng[2] =
2062 { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2064 if (plus_one)
2066 lenrng[0] += 1;
2067 lenrng[1] += 1;
2070 /* The size of the remaining space in the destination computed
2071 as the size of the latter minus the offset into it. */
2072 widest_int spcrng[2];
2074 offset_int remrng[2];
2075 remrng[1] = aref.size_remaining (remrng);
2076 spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (remrng[0], UNSIGNED);
2077 spcrng[1] = widest_int::from (remrng[1], UNSIGNED);
2080 if (wi::leu_p (lenrng[0], spcrng[0])
2081 && wi::leu_p (lenrng[1], spcrng[1]))
2082 return;
2084 location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2085 bool warned = false;
2086 if (wi::leu_p (lenrng[0], spcrng[1]))
2088 if (len != destsize
2089 && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2090 return;
2092 warned = (writefn
2093 ? warning_at (loc, OPT_Wstringop_overflow_,
2094 "%qD writing one too many bytes into a region "
2095 "of a size that depends on %<strlen%>",
2096 writefn)
2097 : warning_at (loc, OPT_Wstringop_overflow_,
2098 "writing one too many bytes into a region "
2099 "of a size that depends on %<strlen%>"));
2101 else if (lenrng[0] == lenrng[1])
2103 if (spcrng[0] == spcrng[1])
2104 warned = (writefn
2105 ? warning_n (loc, OPT_Wstringop_overflow_,
2106 lenrng[0].to_uhwi (),
2107 "%qD writing %wu byte into a region "
2108 "of size %wu",
2109 "%qD writing %wu bytes into a region "
2110 "of size %wu",
2111 writefn, lenrng[0].to_uhwi (),
2112 spcrng[0].to_uhwi ())
2113 : warning_n (loc, OPT_Wstringop_overflow_,
2114 lenrng[0].to_uhwi (),
2115 "writing %wu byte into a region "
2116 "of size %wu",
2117 "writing %wu bytes into a region "
2118 "of size %wu",
2119 lenrng[0].to_uhwi (),
2120 spcrng[0].to_uhwi ()));
2121 else
2122 warned = (writefn
2123 ? warning_n (loc, OPT_Wstringop_overflow_,
2124 lenrng[0].to_uhwi (),
2125 "%qD writing %wu byte into a region "
2126 "of size between %wu and %wu",
2127 "%qD writing %wu bytes into a region "
2128 "of size between %wu and %wu",
2129 writefn, lenrng[0].to_uhwi (),
2130 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2131 : warning_n (loc, OPT_Wstringop_overflow_,
2132 lenrng[0].to_uhwi (),
2133 "writing %wu byte into a region "
2134 "of size between %wu and %wu",
2135 "writing %wu bytes into a region "
2136 "of size between %wu and %wu",
2137 lenrng[0].to_uhwi (),
2138 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2140 else if (spcrng[0] == spcrng[1])
2141 warned = (writefn
2142 ? warning_at (loc, OPT_Wstringop_overflow_,
2143 "%qD writing between %wu and %wu bytes "
2144 "into a region of size %wu",
2145 writefn, lenrng[0].to_uhwi (),
2146 lenrng[1].to_uhwi (),
2147 spcrng[0].to_uhwi ())
2148 : warning_at (loc, OPT_Wstringop_overflow_,
2149 "writing between %wu and %wu bytes "
2150 "into a region of size %wu",
2151 lenrng[0].to_uhwi (),
2152 lenrng[1].to_uhwi (),
2153 spcrng[0].to_uhwi ()));
2154 else
2155 warned = (writefn
2156 ? warning_at (loc, OPT_Wstringop_overflow_,
2157 "%qD writing between %wu and %wu bytes "
2158 "into a region of size between %wu and %wu",
2159 writefn, lenrng[0].to_uhwi (),
2160 lenrng[1].to_uhwi (),
2161 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2162 : warning_at (loc, OPT_Wstringop_overflow_,
2163 "writing between %wu and %wu bytes "
2164 "into a region of size between %wu and %wu",
2165 lenrng[0].to_uhwi (),
2166 lenrng[1].to_uhwi (),
2167 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2169 if (!warned)
2170 return;
2172 suppress_warning (stmt, OPT_Wstringop_overflow_);
2174 aref.inform_access (access_write_only);
2177 /* Convenience wrapper for the above. */
2179 void
2180 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs,
2181 unsigned HOST_WIDE_INT len,
2182 strinfo *si, bool plus_one, bool rawmem)
2184 tree tlen = build_int_cst (size_type_node, len);
2185 maybe_warn_overflow (stmt, call_lhs, tlen, si, plus_one, rawmem);
2188 /* Handle a strlen call. If strlen of the argument is known, replace
2189 the strlen call with the known value, otherwise remember that strlen
2190 of the argument is stored in the lhs SSA_NAME. */
2192 void
2193 strlen_pass::handle_builtin_strlen ()
2195 gimple *stmt = gsi_stmt (m_gsi);
2196 tree lhs = gimple_call_lhs (stmt);
2198 if (lhs == NULL_TREE)
2199 return;
2201 location_t loc = gimple_location (stmt);
2202 tree callee = gimple_call_fndecl (stmt);
2203 tree src = gimple_call_arg (stmt, 0);
2204 tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2205 ? gimple_call_arg (stmt, 1) : NULL_TREE);
2206 int idx = get_stridx (src, stmt);
2207 if (idx || (bound && integer_zerop (bound)))
2209 strinfo *si = NULL;
2210 tree rhs;
2212 if (idx < 0)
2213 rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2214 else if (idx == 0)
2215 rhs = bound;
2216 else
2218 rhs = NULL_TREE;
2219 si = get_strinfo (idx);
2220 if (si != NULL)
2222 rhs = get_string_length (si);
2223 /* For strnlen, if bound is constant, even if si is not known
2224 to be zero terminated, if we know at least bound bytes are
2225 not zero, the return value will be bound. */
2226 if (rhs == NULL_TREE
2227 && bound != NULL_TREE
2228 && TREE_CODE (bound) == INTEGER_CST
2229 && si->nonzero_chars != NULL_TREE
2230 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2231 && tree_int_cst_le (bound, si->nonzero_chars))
2232 rhs = bound;
2235 if (rhs != NULL_TREE)
2237 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2239 fprintf (dump_file, "Optimizing: ");
2240 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2242 rhs = unshare_expr (rhs);
2243 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2244 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2246 if (bound)
2247 rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2249 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2250 stmt = gsi_stmt (m_gsi);
2251 update_stmt (stmt);
2252 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2254 fprintf (dump_file, "into: ");
2255 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2258 if (si != NULL
2259 /* Don't update anything for strnlen. */
2260 && bound == NULL_TREE
2261 && TREE_CODE (si->nonzero_chars) != SSA_NAME
2262 && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2263 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2265 si = unshare_strinfo (si);
2266 si->nonzero_chars = lhs;
2267 gcc_assert (si->full_string_p);
2270 if (strlen_to_stridx
2271 && (bound == NULL_TREE
2272 /* For strnlen record this only if the call is proven
2273 to return the same value as strlen would. */
2274 || (TREE_CODE (bound) == INTEGER_CST
2275 && TREE_CODE (rhs) == INTEGER_CST
2276 && tree_int_cst_lt (rhs, bound))))
2277 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2279 return;
2282 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2283 return;
2285 if (idx == 0)
2286 idx = new_stridx (src);
2287 else
2289 strinfo *si = get_strinfo (idx);
2290 if (si != NULL)
2292 if (!si->full_string_p && !si->stmt)
2294 /* Until now we only had a lower bound on the string length.
2295 Install LHS as the actual length. */
2296 si = unshare_strinfo (si);
2297 tree old = si->nonzero_chars;
2298 si->nonzero_chars = lhs;
2299 si->full_string_p = true;
2300 if (old && TREE_CODE (old) == INTEGER_CST)
2302 old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2303 tree adj = fold_build2_loc (loc, MINUS_EXPR,
2304 TREE_TYPE (lhs), lhs, old);
2305 adjust_related_strinfos (loc, si, adj);
2306 /* Use the constant minimum length as the lower bound
2307 of the non-constant length. */
2308 wide_int min = wi::to_wide (old);
2309 wide_int max
2310 = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2311 set_strlen_range (lhs, min, max);
2313 else
2315 si->first = 0;
2316 si->prev = 0;
2317 si->next = 0;
2320 return;
2323 if (idx)
2325 if (!bound)
2327 /* Only store the new length information for calls to strlen(),
2328 not for those to strnlen(). */
2329 strinfo *si = new_strinfo (src, idx, lhs, true);
2330 set_strinfo (idx, si);
2331 find_equal_ptrs (src, idx);
2334 /* For SRC that is an array of N elements, set LHS's range
2335 to [0, min (N, BOUND)]. A constant return value means
2336 the range would have consisted of a single value. In
2337 that case, fold the result into the returned constant. */
2338 if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2339 if (TREE_CODE (ret) == INTEGER_CST)
2341 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2343 fprintf (dump_file, "Optimizing: ");
2344 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2346 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2347 ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2348 gimplify_and_update_call_from_tree (&m_gsi, ret);
2349 stmt = gsi_stmt (m_gsi);
2350 update_stmt (stmt);
2351 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2353 fprintf (dump_file, "into: ");
2354 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2358 if (strlen_to_stridx && !bound)
2359 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2363 /* Handle a strchr call. If strlen of the first argument is known, replace
2364 the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2365 that lhs of the call is endptr and strlen of the argument is endptr - x. */
2367 void
2368 strlen_pass::handle_builtin_strchr ()
2370 gimple *stmt = gsi_stmt (m_gsi);
2371 tree lhs = gimple_call_lhs (stmt);
2373 if (lhs == NULL_TREE)
2374 return;
2376 if (!integer_zerop (gimple_call_arg (stmt, 1)))
2377 return;
2379 tree src = gimple_call_arg (stmt, 0);
2381 /* Avoid folding if the first argument is not a nul-terminated array.
2382 Defer warning until later. */
2383 if (!check_nul_terminated_array (NULL_TREE, src))
2384 return;
2386 int idx = get_stridx (src, stmt);
2387 if (idx)
2389 strinfo *si = NULL;
2390 tree rhs;
2392 if (idx < 0)
2393 rhs = build_int_cst (size_type_node, ~idx);
2394 else
2396 rhs = NULL_TREE;
2397 si = get_strinfo (idx);
2398 if (si != NULL)
2399 rhs = get_string_length (si);
2401 if (rhs != NULL_TREE)
2403 location_t loc = gimple_location (stmt);
2405 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2407 fprintf (dump_file, "Optimizing: ");
2408 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2410 if (si != NULL && si->endptr != NULL_TREE)
2412 rhs = unshare_expr (si->endptr);
2413 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2414 TREE_TYPE (rhs)))
2415 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2417 else
2419 rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2420 rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2421 TREE_TYPE (src), src, rhs);
2422 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2423 TREE_TYPE (rhs)))
2424 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2426 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2427 stmt = gsi_stmt (m_gsi);
2428 update_stmt (stmt);
2429 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2431 fprintf (dump_file, "into: ");
2432 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2434 if (si != NULL
2435 && si->endptr == NULL_TREE
2436 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2438 si = unshare_strinfo (si);
2439 si->endptr = lhs;
2441 zero_length_string (lhs, si);
2442 return;
2445 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2446 return;
2447 if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2449 if (idx == 0)
2450 idx = new_stridx (src);
2451 else if (get_strinfo (idx) != NULL)
2453 zero_length_string (lhs, NULL);
2454 return;
2456 if (idx)
2458 location_t loc = gimple_location (stmt);
2459 tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2460 tree srcu = fold_convert_loc (loc, size_type_node, src);
2461 tree length = fold_build2_loc (loc, MINUS_EXPR,
2462 size_type_node, lhsu, srcu);
2463 strinfo *si = new_strinfo (src, idx, length, true);
2464 si->endptr = lhs;
2465 set_strinfo (idx, si);
2466 find_equal_ptrs (src, idx);
2467 zero_length_string (lhs, si);
2470 else
2471 zero_length_string (lhs, NULL);
2474 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2475 If strlen of the second argument is known, strlen of the first argument
2476 is the same after this call. Furthermore, attempt to convert it to
2477 memcpy. Uses RVALS to determine range information. */
2479 void
2480 strlen_pass::handle_builtin_strcpy (built_in_function bcode)
2482 int idx, didx;
2483 tree src, dst, srclen, len, lhs, type, fn, oldlen;
2484 bool success;
2485 gimple *stmt = gsi_stmt (m_gsi);
2486 strinfo *si, *dsi, *olddsi, *zsi;
2487 location_t loc;
2489 src = gimple_call_arg (stmt, 1);
2490 dst = gimple_call_arg (stmt, 0);
2491 lhs = gimple_call_lhs (stmt);
2492 idx = get_stridx (src, stmt);
2493 si = NULL;
2494 if (idx > 0)
2495 si = get_strinfo (idx);
2497 didx = get_stridx (dst, stmt);
2498 olddsi = NULL;
2499 oldlen = NULL_TREE;
2500 if (didx > 0)
2501 olddsi = get_strinfo (didx);
2502 else if (didx < 0)
2503 return;
2505 if (olddsi != NULL)
2506 adjust_last_stmt (olddsi, stmt, false);
2508 srclen = NULL_TREE;
2509 if (si != NULL)
2510 srclen = get_string_length (si);
2511 else if (idx < 0)
2512 srclen = build_int_cst (size_type_node, ~idx);
2514 maybe_warn_overflow (stmt, false, srclen, olddsi, true);
2516 if (olddsi != NULL)
2517 adjust_last_stmt (olddsi, stmt, false);
2519 loc = gimple_location (stmt);
2520 if (srclen == NULL_TREE)
2521 switch (bcode)
2523 case BUILT_IN_STRCPY:
2524 case BUILT_IN_STRCPY_CHK:
2525 if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2526 return;
2527 break;
2528 case BUILT_IN_STPCPY:
2529 case BUILT_IN_STPCPY_CHK:
2530 if (lhs == NULL_TREE)
2531 return;
2532 else
2534 tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2535 srclen = fold_convert_loc (loc, size_type_node, dst);
2536 srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2537 lhsuint, srclen);
2539 break;
2540 default:
2541 gcc_unreachable ();
2544 if (didx == 0)
2546 didx = new_stridx (dst);
2547 if (didx == 0)
2548 return;
2550 if (olddsi != NULL)
2552 oldlen = olddsi->nonzero_chars;
2553 dsi = unshare_strinfo (olddsi);
2554 dsi->nonzero_chars = srclen;
2555 dsi->full_string_p = (srclen != NULL_TREE);
2556 /* Break the chain, so adjust_related_strinfo on later pointers in
2557 the chain won't adjust this one anymore. */
2558 dsi->next = 0;
2559 dsi->stmt = NULL;
2560 dsi->endptr = NULL_TREE;
2562 else
2564 dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2565 set_strinfo (didx, dsi);
2566 find_equal_ptrs (dst, didx);
2568 dsi->writable = true;
2569 dsi->dont_invalidate = true;
2571 if (dsi->nonzero_chars == NULL_TREE)
2573 strinfo *chainsi;
2575 /* If string length of src is unknown, use delayed length
2576 computation. If string length of dst will be needed, it
2577 can be computed by transforming this strcpy call into
2578 stpcpy and subtracting dst from the return value. */
2580 /* Look for earlier strings whose length could be determined if
2581 this strcpy is turned into an stpcpy. */
2583 if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2585 for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2587 /* When setting a stmt for delayed length computation
2588 prevent all strinfos through dsi from being
2589 invalidated. */
2590 chainsi = unshare_strinfo (chainsi);
2591 chainsi->stmt = stmt;
2592 chainsi->nonzero_chars = NULL_TREE;
2593 chainsi->full_string_p = false;
2594 chainsi->endptr = NULL_TREE;
2595 chainsi->dont_invalidate = true;
2598 dsi->stmt = stmt;
2600 /* Try to detect overlap before returning. This catches cases
2601 like strcpy (d, d + n) where n is non-constant whose range
2602 is such that (n <= strlen (d) holds).
2604 OLDDSI->NONZERO_chars may have been reset by this point with
2605 oldlen holding it original value. */
2606 if (olddsi && oldlen)
2608 /* Add 1 for the terminating NUL. */
2609 tree type = TREE_TYPE (oldlen);
2610 oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2611 build_int_cst (type, 1));
2612 check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2615 return;
2618 if (olddsi != NULL)
2620 tree adj = NULL_TREE;
2621 if (oldlen == NULL_TREE)
2623 else if (integer_zerop (oldlen))
2624 adj = srclen;
2625 else if (TREE_CODE (oldlen) == INTEGER_CST
2626 || TREE_CODE (srclen) == INTEGER_CST)
2627 adj = fold_build2_loc (loc, MINUS_EXPR,
2628 TREE_TYPE (srclen), srclen,
2629 fold_convert_loc (loc, TREE_TYPE (srclen),
2630 oldlen));
2631 if (adj != NULL_TREE)
2632 adjust_related_strinfos (loc, dsi, adj);
2633 else
2634 dsi->prev = 0;
2636 /* strcpy src may not overlap dst, so src doesn't need to be
2637 invalidated either. */
2638 if (si != NULL)
2639 si->dont_invalidate = true;
2641 fn = NULL_TREE;
2642 zsi = NULL;
2643 switch (bcode)
2645 case BUILT_IN_STRCPY:
2646 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2647 if (lhs)
2648 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2649 break;
2650 case BUILT_IN_STRCPY_CHK:
2651 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2652 if (lhs)
2653 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2654 break;
2655 case BUILT_IN_STPCPY:
2656 /* This would need adjustment of the lhs (subtract one),
2657 or detection that the trailing '\0' doesn't need to be
2658 written, if it will be immediately overwritten.
2659 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY); */
2660 if (lhs)
2662 dsi->endptr = lhs;
2663 zsi = zero_length_string (lhs, dsi);
2665 break;
2666 case BUILT_IN_STPCPY_CHK:
2667 /* This would need adjustment of the lhs (subtract one),
2668 or detection that the trailing '\0' doesn't need to be
2669 written, if it will be immediately overwritten.
2670 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK); */
2671 if (lhs)
2673 dsi->endptr = lhs;
2674 zsi = zero_length_string (lhs, dsi);
2676 break;
2677 default:
2678 gcc_unreachable ();
2680 if (zsi != NULL)
2681 zsi->dont_invalidate = true;
2683 if (fn)
2685 tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2686 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2688 else
2689 type = size_type_node;
2691 len = fold_convert_loc (loc, type, unshare_expr (srclen));
2692 len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2694 /* Disable warning for the transformed statement? */
2695 opt_code no_warning_opt = no_warning;
2697 if (const strinfo *chksi = si ? olddsi ? olddsi : dsi : NULL)
2699 no_warning_opt = check_bounds_or_overlap (stmt, chksi->ptr, si->ptr,
2700 NULL_TREE, len);
2701 if (no_warning_opt)
2702 suppress_warning (stmt, no_warning_opt);
2705 if (fn == NULL_TREE)
2706 return;
2708 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
2709 GSI_SAME_STMT);
2710 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2712 fprintf (dump_file, "Optimizing: ");
2713 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2715 if (gimple_call_num_args (stmt) == 2)
2716 success = update_gimple_call (&m_gsi, fn, 3, dst, src, len);
2717 else
2718 success = update_gimple_call (&m_gsi, fn, 4, dst, src, len,
2719 gimple_call_arg (stmt, 2));
2720 if (success)
2722 stmt = gsi_stmt (m_gsi);
2723 update_stmt (stmt);
2724 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2726 fprintf (dump_file, "into: ");
2727 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2729 /* Allow adjust_last_stmt to decrease this memcpy's size. */
2730 laststmt.stmt = stmt;
2731 laststmt.len = srclen;
2732 laststmt.stridx = dsi->idx;
2734 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2735 fprintf (dump_file, "not possible.\n");
2737 if (no_warning_opt)
2738 suppress_warning (stmt, no_warning_opt);
2741 /* Check the size argument to the built-in forms of stpncpy and strncpy
2742 for out-of-bounds offsets or overlapping access, and to see if the
2743 size argument is derived from a call to strlen() on the source argument,
2744 and if so, issue an appropriate warning. */
2746 void
2747 strlen_pass::handle_builtin_strncat (built_in_function)
2749 /* Same as stxncpy(). */
2750 handle_builtin_stxncpy_strncat (true);
2753 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2754 way. LEN can either be an integer expression, or a pointer (to char).
2755 When it is the latter (such as in recursive calls to self) it is
2756 assumed to be the argument in some call to strlen() whose relationship
2757 to SRC is being ascertained. */
2759 bool
2760 is_strlen_related_p (tree src, tree len)
2762 if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2763 && operand_equal_p (src, len, 0))
2764 return true;
2766 if (TREE_CODE (len) != SSA_NAME)
2767 return false;
2769 if (TREE_CODE (src) == SSA_NAME)
2771 gimple *srcdef = SSA_NAME_DEF_STMT (src);
2772 if (is_gimple_assign (srcdef))
2774 /* Handle bitwise AND used in conversions from wider size_t
2775 to narrower unsigned types. */
2776 tree_code code = gimple_assign_rhs_code (srcdef);
2777 if (code == BIT_AND_EXPR
2778 || code == NOP_EXPR)
2779 return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2781 return false;
2784 if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2786 /* If SRC is the result of a call to an allocation function
2787 or strlen, use the function's argument instead. */
2788 tree func = gimple_call_fndecl (srcdef);
2789 built_in_function code = DECL_FUNCTION_CODE (func);
2790 if (code == BUILT_IN_ALLOCA
2791 || code == BUILT_IN_ALLOCA_WITH_ALIGN
2792 || code == BUILT_IN_MALLOC
2793 || code == BUILT_IN_STRLEN)
2794 return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2796 /* FIXME: Handle other functions with attribute alloc_size. */
2797 return false;
2801 gimple *lendef = SSA_NAME_DEF_STMT (len);
2802 if (!lendef)
2803 return false;
2805 if (is_gimple_call (lendef))
2807 tree func = gimple_call_fndecl (lendef);
2808 if (!valid_builtin_call (lendef)
2809 || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2810 return false;
2812 tree arg = gimple_call_arg (lendef, 0);
2813 return is_strlen_related_p (src, arg);
2816 if (!is_gimple_assign (lendef))
2817 return false;
2819 tree_code code = gimple_assign_rhs_code (lendef);
2820 tree rhs1 = gimple_assign_rhs1 (lendef);
2821 tree rhstype = TREE_TYPE (rhs1);
2823 if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2824 || (INTEGRAL_TYPE_P (rhstype)
2825 && (code == BIT_AND_EXPR
2826 || code == NOP_EXPR)))
2828 /* Pointer plus (an integer), and truncation are considered among
2829 the (potentially) related expressions to strlen. */
2830 return is_strlen_related_p (src, rhs1);
2833 if (tree rhs2 = gimple_assign_rhs2 (lendef))
2835 /* Integer subtraction is considered strlen-related when both
2836 arguments are integers and second one is strlen-related. */
2837 rhstype = TREE_TYPE (rhs2);
2838 if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2839 return is_strlen_related_p (src, rhs2);
2842 return false;
2845 /* Called by handle_builtin_stxncpy_strncat and by
2846 gimple_fold_builtin_strncpy in gimple-fold.c.
2847 Check to see if the specified bound is a) equal to the size of
2848 the destination DST and if so, b) if it's immediately followed by
2849 DST[CNT - 1] = '\0'. If a) holds and b) does not, warn. Otherwise,
2850 do nothing. Return true if diagnostic has been issued.
2852 The purpose is to diagnose calls to strncpy and stpncpy that do
2853 not nul-terminate the copy while allowing for the idiom where
2854 such a call is immediately followed by setting the last element
2855 to nul, as in:
2856 char a[32];
2857 strncpy (a, s, sizeof a);
2858 a[sizeof a - 1] = '\0';
2861 bool
2862 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2863 pointer_query *ptr_qry /* = NULL */)
2865 gimple *stmt = gsi_stmt (gsi);
2866 if (warning_suppressed_p (stmt, OPT_Wstringop_truncation))
2867 return false;
2869 wide_int cntrange[2];
2870 value_range r;
2871 if (!get_range_query (cfun)->range_of_expr (r, cnt)
2872 || r.varying_p ()
2873 || r.undefined_p ())
2874 return false;
2876 cntrange[0] = wi::to_wide (r.min ());
2877 cntrange[1] = wi::to_wide (r.max ());
2878 if (r.kind () == VR_ANTI_RANGE)
2880 wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2882 if (wi::ltu_p (cntrange[1], maxobjsize))
2884 cntrange[0] = cntrange[1] + 1;
2885 cntrange[1] = maxobjsize;
2887 else
2889 cntrange[1] = cntrange[0] - 1;
2890 cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2894 /* Negative value is the constant string length. If it's less than
2895 the lower bound there is no truncation. Avoid calling get_stridx()
2896 when ssa_ver_to_stridx is empty. That implies the caller isn't
2897 running under the control of this pass and ssa_ver_to_stridx hasn't
2898 been created yet. */
2899 int sidx = ssa_ver_to_stridx.length () ? get_stridx (src, stmt) : 0;
2900 if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
2901 return false;
2903 tree dst = gimple_call_arg (stmt, 0);
2904 tree dstdecl = dst;
2905 if (TREE_CODE (dstdecl) == ADDR_EXPR)
2906 dstdecl = TREE_OPERAND (dstdecl, 0);
2908 tree ref = NULL_TREE;
2910 if (!sidx)
2912 /* If the source is a non-string return early to avoid warning
2913 for possible truncation (if the truncation is certain SIDX
2914 is non-zero). */
2915 tree srcdecl = gimple_call_arg (stmt, 1);
2916 if (TREE_CODE (srcdecl) == ADDR_EXPR)
2917 srcdecl = TREE_OPERAND (srcdecl, 0);
2918 if (get_attr_nonstring_decl (srcdecl, &ref))
2919 return false;
2922 /* Likewise, if the destination refers to an array/pointer declared
2923 nonstring return early. */
2924 if (get_attr_nonstring_decl (dstdecl, &ref))
2925 return false;
2927 /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2928 avoid the truncation warning. */
2929 gsi_next_nondebug (&gsi);
2930 gimple *next_stmt = gsi_stmt (gsi);
2931 if (!next_stmt)
2933 /* When there is no statement in the same basic block check
2934 the immediate successor block. */
2935 if (basic_block bb = gimple_bb (stmt))
2937 if (single_succ_p (bb))
2939 /* For simplicity, ignore blocks with multiple outgoing
2940 edges for now and only consider successor blocks along
2941 normal edges. */
2942 edge e = EDGE_SUCC (bb, 0);
2943 if (!(e->flags & EDGE_ABNORMAL))
2945 gsi = gsi_start_bb (e->dest);
2946 next_stmt = gsi_stmt (gsi);
2947 if (next_stmt && is_gimple_debug (next_stmt))
2949 gsi_next_nondebug (&gsi);
2950 next_stmt = gsi_stmt (gsi);
2957 if (next_stmt && is_gimple_assign (next_stmt))
2959 tree lhs = gimple_assign_lhs (next_stmt);
2960 tree_code code = TREE_CODE (lhs);
2961 if (code == ARRAY_REF || code == MEM_REF)
2962 lhs = TREE_OPERAND (lhs, 0);
2964 tree func = gimple_call_fndecl (stmt);
2965 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
2967 tree ret = gimple_call_lhs (stmt);
2968 if (ret && operand_equal_p (ret, lhs, 0))
2969 return false;
2972 /* Determine the base address and offset of the reference,
2973 ignoring the innermost array index. */
2974 if (TREE_CODE (ref) == ARRAY_REF)
2975 ref = TREE_OPERAND (ref, 0);
2977 poly_int64 dstoff;
2978 tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
2980 poly_int64 lhsoff;
2981 tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
2982 if (lhsbase
2983 && dstbase
2984 && known_eq (dstoff, lhsoff)
2985 && operand_equal_p (dstbase, lhsbase, 0))
2986 return false;
2989 int prec = TYPE_PRECISION (TREE_TYPE (cnt));
2990 wide_int lenrange[2];
2991 if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
2993 lenrange[0] = (sisrc->nonzero_chars
2994 && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
2995 ? wi::to_wide (sisrc->nonzero_chars)
2996 : wi::zero (prec));
2997 lenrange[1] = lenrange[0];
2999 else if (sidx < 0)
3000 lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
3001 else
3003 c_strlen_data lendata = { };
3004 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3005 to have it set to the length of the longest string in a PHI. */
3006 lendata.maxbound = src;
3007 get_range_strlen (src, &lendata, /* eltsize = */1);
3008 if (TREE_CODE (lendata.minlen) == INTEGER_CST
3009 && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3011 /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3012 which stores the length of the shortest known string. */
3013 if (integer_all_onesp (lendata.maxlen))
3014 lenrange[0] = wi::shwi (0, prec);
3015 else
3016 lenrange[0] = wi::to_wide (lendata.minlen, prec);
3017 lenrange[1] = wi::to_wide (lendata.maxbound, prec);
3019 else
3021 lenrange[0] = wi::shwi (0, prec);
3022 lenrange[1] = wi::shwi (-1, prec);
3026 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3027 tree func = gimple_call_fndecl (stmt);
3029 if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
3031 /* If the longest source string is shorter than the lower bound
3032 of the specified count the copy is definitely nul-terminated. */
3033 if (wi::ltu_p (lenrange[1], cntrange[0]))
3034 return false;
3036 if (wi::neg_p (lenrange[1]))
3038 /* The length of one of the strings is unknown but at least
3039 one has non-zero length and that length is stored in
3040 LENRANGE[1]. Swap the bounds to force a "may be truncated"
3041 warning below. */
3042 lenrange[1] = lenrange[0];
3043 lenrange[0] = wi::shwi (0, prec);
3046 /* Set to true for strncat whose bound is derived from the length
3047 of the destination (the expected usage pattern). */
3048 bool cat_dstlen_bounded = false;
3049 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
3050 cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3052 if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3053 return warning_n (callloc, OPT_Wstringop_truncation,
3054 cntrange[0].to_uhwi (),
3055 "%qD output truncated before terminating "
3056 "nul copying %E byte from a string of the "
3057 "same length",
3058 "%qD output truncated before terminating nul "
3059 "copying %E bytes from a string of the same "
3060 "length",
3061 func, cnt);
3062 else if (!cat_dstlen_bounded)
3064 if (wi::geu_p (lenrange[0], cntrange[1]))
3066 /* The shortest string is longer than the upper bound of
3067 the count so the truncation is certain. */
3068 if (cntrange[0] == cntrange[1])
3069 return warning_n (callloc, OPT_Wstringop_truncation,
3070 cntrange[0].to_uhwi (),
3071 "%qD output truncated copying %E byte "
3072 "from a string of length %wu",
3073 "%qD output truncated copying %E bytes "
3074 "from a string of length %wu",
3075 func, cnt, lenrange[0].to_uhwi ());
3077 return warning_at (callloc, OPT_Wstringop_truncation,
3078 "%qD output truncated copying between %wu "
3079 "and %wu bytes from a string of length %wu",
3080 func, cntrange[0].to_uhwi (),
3081 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3083 else if (wi::geu_p (lenrange[1], cntrange[1]))
3085 /* The longest string is longer than the upper bound of
3086 the count so the truncation is possible. */
3087 if (cntrange[0] == cntrange[1])
3088 return warning_n (callloc, OPT_Wstringop_truncation,
3089 cntrange[0].to_uhwi (),
3090 "%qD output may be truncated copying %E "
3091 "byte from a string of length %wu",
3092 "%qD output may be truncated copying %E "
3093 "bytes from a string of length %wu",
3094 func, cnt, lenrange[1].to_uhwi ());
3096 return warning_at (callloc, OPT_Wstringop_truncation,
3097 "%qD output may be truncated copying between "
3098 "%wu and %wu bytes from a string of length %wu",
3099 func, cntrange[0].to_uhwi (),
3100 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3104 if (!cat_dstlen_bounded
3105 && cntrange[0] != cntrange[1]
3106 && wi::leu_p (cntrange[0], lenrange[0])
3107 && wi::leu_p (cntrange[1], lenrange[0] + 1))
3109 /* If the source (including the terminating nul) is longer than
3110 the lower bound of the specified count but shorter than the
3111 upper bound the copy may (but need not) be truncated. */
3112 return warning_at (callloc, OPT_Wstringop_truncation,
3113 "%qD output may be truncated copying between "
3114 "%wu and %wu bytes from a string of length %wu",
3115 func, cntrange[0].to_uhwi (),
3116 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3120 access_ref aref;
3121 if (tree dstsize = compute_objsize (dst, stmt, 1, &aref, ptr_qry))
3123 /* The source length is unknown. Try to determine the destination
3124 size and see if it matches the specified bound. If not, bail.
3125 Otherwise go on to see if it should be diagnosed for possible
3126 truncation. */
3127 if (!dstsize)
3128 return false;
3130 if (wi::to_wide (dstsize) != cntrange[1])
3131 return false;
3133 /* Avoid warning for strncpy(a, b, N) calls where the following
3134 equalities hold:
3135 N == sizeof a && N == sizeof b */
3136 if (tree srcsize = compute_objsize (src, stmt, 1, &aref, ptr_qry))
3137 if (wi::to_wide (srcsize) == cntrange[1])
3138 return false;
3140 if (cntrange[0] == cntrange[1])
3141 return warning_at (callloc, OPT_Wstringop_truncation,
3142 "%qD specified bound %E equals destination size",
3143 func, cnt);
3146 return false;
3149 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3150 strncat, for out-of-bounds offsets or overlapping access, and to see
3151 if the size is derived from calling strlen() on the source argument,
3152 and if so, issue the appropriate warning.
3153 APPEND_P is true for strncat. */
3155 void
3156 strlen_pass::handle_builtin_stxncpy_strncat (bool append_p)
3158 if (!strlen_to_stridx)
3159 return;
3161 gimple *stmt = gsi_stmt (m_gsi);
3163 tree dst = gimple_call_arg (stmt, 0);
3164 tree src = gimple_call_arg (stmt, 1);
3165 tree len = gimple_call_arg (stmt, 2);
3166 /* An upper bound of the size of the destination. */
3167 tree dstsize = NULL_TREE;
3168 /* The length of the destination and source strings (plus 1 for those
3169 whose FULL_STRING_P is set, i.e., whose length is exact rather than
3170 a lower bound). */
3171 tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3173 int didx = get_stridx (dst, stmt);
3174 if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3176 /* Compute the size of the destination string including the nul
3177 if it is known to be nul-terminated. */
3178 if (sidst->nonzero_chars)
3180 if (sidst->full_string_p)
3182 /* String is known to be nul-terminated. */
3183 tree type = TREE_TYPE (sidst->nonzero_chars);
3184 dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3185 build_int_cst (type, 1));
3187 else
3188 dstlenp1 = sidst->nonzero_chars;
3190 else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3192 gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3193 dstsize = gimple_call_alloc_size (def_stmt);
3196 dst = sidst->ptr;
3199 int sidx = get_stridx (src, stmt);
3200 strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3201 if (sisrc)
3203 /* strncat() and strncpy() can modify the source string by writing
3204 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3205 clear. */
3207 /* Compute the size of the source string including the terminating
3208 nul if its known to be nul-terminated. */
3209 if (sisrc->nonzero_chars)
3211 if (sisrc->full_string_p)
3213 tree type = TREE_TYPE (sisrc->nonzero_chars);
3214 srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3215 build_int_cst (type, 1));
3217 else
3218 srclenp1 = sisrc->nonzero_chars;
3221 src = sisrc->ptr;
3223 else
3224 srclenp1 = NULL_TREE;
3226 opt_code opt = check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1);
3227 if (opt != no_warning)
3229 suppress_warning (stmt, opt);
3230 return;
3233 /* If the length argument was computed from strlen(S) for some string
3234 S retrieve the strinfo index for the string (PSS->FIRST) along with
3235 the location of the strlen() call (PSS->SECOND). */
3236 stridx_strlenloc *pss = strlen_to_stridx->get (len);
3237 if (!pss || pss->first <= 0)
3239 if (maybe_diag_stxncpy_trunc (m_gsi, src, len))
3240 suppress_warning (stmt, OPT_Wstringop_truncation);
3242 return;
3245 /* Retrieve the strinfo data for the string S that LEN was computed
3246 from as some function F of strlen (S) (i.e., LEN need not be equal
3247 to strlen(S)). */
3248 strinfo *silen = get_strinfo (pss->first);
3250 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3252 tree func = gimple_call_fndecl (stmt);
3254 bool warned = false;
3256 /* When -Wstringop-truncation is set, try to determine truncation
3257 before diagnosing possible overflow. Truncation is implied by
3258 the LEN argument being equal to strlen(SRC), regardless of
3259 whether its value is known. Otherwise, when appending, or
3260 when copying into a destination of known size, issue the more
3261 generic -Wstringop-overflow which triggers for LEN arguments
3262 that in any meaningful way depend on strlen(SRC). */
3263 if (!append_p
3264 && sisrc == silen
3265 && is_strlen_related_p (src, len)
3266 && warning_at (callloc, OPT_Wstringop_truncation,
3267 "%qD output truncated before terminating nul "
3268 "copying as many bytes from a string as its length",
3269 func))
3270 warned = true;
3271 else if ((append_p || !dstsize || len == dstlenp1)
3272 && silen && is_strlen_related_p (src, silen->ptr))
3274 /* Issue -Wstringop-overflow when appending or when writing into
3275 a destination of a known size. Otherwise, when copying into
3276 a destination of an unknown size, it's truncation. */
3277 opt_code opt = (append_p || dstsize
3278 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3279 warned = warning_at (callloc, opt,
3280 "%qD specified bound depends on the length "
3281 "of the source argument",
3282 func);
3284 if (warned)
3286 location_t strlenloc = pss->second;
3287 if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3288 inform (strlenloc, "length computed here");
3292 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3293 If strlen of the second argument is known and length of the third argument
3294 is that plus one, strlen of the first argument is the same after this
3295 call. Uses RVALS to determine range information. */
3297 void
3298 strlen_pass::handle_builtin_memcpy (built_in_function bcode)
3300 tree lhs, oldlen, newlen;
3301 gimple *stmt = gsi_stmt (m_gsi);
3302 strinfo *si, *dsi;
3304 tree len = gimple_call_arg (stmt, 2);
3305 tree src = gimple_call_arg (stmt, 1);
3306 tree dst = gimple_call_arg (stmt, 0);
3308 int didx = get_stridx (dst, stmt);
3309 strinfo *olddsi = NULL;
3310 if (didx > 0)
3311 olddsi = get_strinfo (didx);
3312 else if (didx < 0)
3313 return;
3315 if (olddsi != NULL
3316 && !integer_zerop (len))
3318 maybe_warn_overflow (stmt, false, len, olddsi, false, true);
3319 adjust_last_stmt (olddsi, stmt, false);
3322 int idx = get_stridx (src, stmt);
3323 if (idx == 0)
3324 return;
3326 bool full_string_p;
3327 if (idx > 0)
3329 gimple *def_stmt;
3331 /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3332 is known. */
3333 si = get_strinfo (idx);
3334 if (si == NULL || si->nonzero_chars == NULL_TREE)
3335 return;
3336 if (TREE_CODE (len) == INTEGER_CST
3337 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3339 if (tree_int_cst_le (len, si->nonzero_chars))
3341 /* Copying LEN nonzero characters, where LEN is constant. */
3342 newlen = len;
3343 full_string_p = false;
3345 else
3347 /* Copying the whole of the analyzed part of SI. */
3348 newlen = si->nonzero_chars;
3349 full_string_p = si->full_string_p;
3352 else
3354 if (!si->full_string_p)
3355 return;
3356 if (TREE_CODE (len) != SSA_NAME)
3357 return;
3358 def_stmt = SSA_NAME_DEF_STMT (len);
3359 if (!is_gimple_assign (def_stmt)
3360 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3361 || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3362 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3363 return;
3364 /* Copying variable-length string SI (and no more). */
3365 newlen = si->nonzero_chars;
3366 full_string_p = true;
3369 else
3371 si = NULL;
3372 /* Handle memcpy (x, "abcd", 5) or
3373 memcpy (x, "abc\0uvw", 7). */
3374 if (!tree_fits_uhwi_p (len))
3375 return;
3377 unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3378 unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3379 newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3380 full_string_p = clen > nonzero_chars;
3383 if (!full_string_p
3384 && olddsi
3385 && olddsi->nonzero_chars
3386 && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3387 && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3389 /* The SRC substring being written strictly overlaps
3390 a subsequence of the existing string OLDDSI. */
3391 newlen = olddsi->nonzero_chars;
3392 full_string_p = olddsi->full_string_p;
3395 if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3396 adjust_last_stmt (olddsi, stmt, false);
3398 if (didx == 0)
3400 didx = new_stridx (dst);
3401 if (didx == 0)
3402 return;
3404 oldlen = NULL_TREE;
3405 if (olddsi != NULL)
3407 dsi = unshare_strinfo (olddsi);
3408 oldlen = olddsi->nonzero_chars;
3409 dsi->nonzero_chars = newlen;
3410 dsi->full_string_p = full_string_p;
3411 /* Break the chain, so adjust_related_strinfo on later pointers in
3412 the chain won't adjust this one anymore. */
3413 dsi->next = 0;
3414 dsi->stmt = NULL;
3415 dsi->endptr = NULL_TREE;
3417 else
3419 dsi = new_strinfo (dst, didx, newlen, full_string_p);
3420 set_strinfo (didx, dsi);
3421 find_equal_ptrs (dst, didx);
3423 dsi->writable = true;
3424 dsi->dont_invalidate = true;
3425 if (olddsi != NULL)
3427 tree adj = NULL_TREE;
3428 location_t loc = gimple_location (stmt);
3429 if (oldlen == NULL_TREE)
3431 else if (integer_zerop (oldlen))
3432 adj = newlen;
3433 else if (TREE_CODE (oldlen) == INTEGER_CST
3434 || TREE_CODE (newlen) == INTEGER_CST)
3435 adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3436 fold_convert_loc (loc, TREE_TYPE (newlen),
3437 oldlen));
3438 if (adj != NULL_TREE)
3439 adjust_related_strinfos (loc, dsi, adj);
3440 else
3441 dsi->prev = 0;
3443 /* memcpy src may not overlap dst, so src doesn't need to be
3444 invalidated either. */
3445 if (si != NULL)
3446 si->dont_invalidate = true;
3448 if (full_string_p)
3450 lhs = gimple_call_lhs (stmt);
3451 switch (bcode)
3453 case BUILT_IN_MEMCPY:
3454 case BUILT_IN_MEMCPY_CHK:
3455 /* Allow adjust_last_stmt to decrease this memcpy's size. */
3456 laststmt.stmt = stmt;
3457 laststmt.len = dsi->nonzero_chars;
3458 laststmt.stridx = dsi->idx;
3459 if (lhs)
3460 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3461 break;
3462 case BUILT_IN_MEMPCPY:
3463 case BUILT_IN_MEMPCPY_CHK:
3464 break;
3465 default:
3466 gcc_unreachable ();
3471 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3472 If strlen of the second argument is known, strlen of the first argument
3473 is increased by the length of the second argument. Furthermore, attempt
3474 to convert it to memcpy/strcpy if the length of the first argument
3475 is known. */
3477 void
3478 strlen_pass::handle_builtin_strcat (built_in_function bcode)
3480 int idx, didx;
3481 tree srclen, args, type, fn, objsz, endptr;
3482 bool success;
3483 gimple *stmt = gsi_stmt (m_gsi);
3484 strinfo *si, *dsi;
3485 location_t loc = gimple_location (stmt);
3487 tree src = gimple_call_arg (stmt, 1);
3488 tree dst = gimple_call_arg (stmt, 0);
3490 /* Bail if the source is the same as destination. It will be diagnosed
3491 elsewhere. */
3492 if (operand_equal_p (src, dst, 0))
3493 return;
3495 tree lhs = gimple_call_lhs (stmt);
3497 didx = get_stridx (dst, stmt);
3498 if (didx < 0)
3499 return;
3501 dsi = NULL;
3502 if (didx > 0)
3503 dsi = get_strinfo (didx);
3505 srclen = NULL_TREE;
3506 si = NULL;
3507 idx = get_stridx (src, stmt);
3508 if (idx < 0)
3509 srclen = build_int_cst (size_type_node, ~idx);
3510 else if (idx > 0)
3512 si = get_strinfo (idx);
3513 if (si != NULL)
3514 srclen = get_string_length (si);
3517 /* Disable warning for the transformed statement? */
3518 opt_code no_warning_opt = no_warning;
3520 if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3523 /* The concatenation always involves copying at least one byte
3524 (the terminating nul), even if the source string is empty.
3525 If the source is unknown assume it's one character long and
3526 used that as both sizes. */
3527 tree slen = srclen;
3528 if (slen)
3530 tree type = TREE_TYPE (slen);
3531 slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3534 tree sptr = si && si->ptr ? si->ptr : src;
3535 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE,
3536 slen);
3537 if (no_warning_opt)
3538 suppress_warning (stmt, no_warning_opt);
3541 /* strcat (p, q) can be transformed into
3542 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3543 with length endptr - p if we need to compute the length
3544 later on. Don't do this transformation if we don't need
3545 it. */
3546 if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3548 if (didx == 0)
3550 didx = new_stridx (dst);
3551 if (didx == 0)
3552 return;
3554 if (dsi == NULL)
3556 dsi = new_strinfo (dst, didx, NULL_TREE, false);
3557 set_strinfo (didx, dsi);
3558 find_equal_ptrs (dst, didx);
3560 else
3562 dsi = unshare_strinfo (dsi);
3563 dsi->nonzero_chars = NULL_TREE;
3564 dsi->full_string_p = false;
3565 dsi->next = 0;
3566 dsi->endptr = NULL_TREE;
3568 dsi->writable = true;
3569 dsi->stmt = stmt;
3570 dsi->dont_invalidate = true;
3572 return;
3575 tree dstlen = dsi->nonzero_chars;
3576 endptr = dsi->endptr;
3578 dsi = unshare_strinfo (dsi);
3579 dsi->endptr = NULL_TREE;
3580 dsi->stmt = NULL;
3581 dsi->writable = true;
3583 if (srclen != NULL_TREE)
3585 dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3586 TREE_TYPE (dsi->nonzero_chars),
3587 dsi->nonzero_chars, srclen);
3588 gcc_assert (dsi->full_string_p);
3589 adjust_related_strinfos (loc, dsi, srclen);
3590 dsi->dont_invalidate = true;
3592 else
3594 dsi->nonzero_chars = NULL;
3595 dsi->full_string_p = false;
3596 if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3597 dsi->dont_invalidate = true;
3600 if (si != NULL)
3601 /* strcat src may not overlap dst, so src doesn't need to be
3602 invalidated either. */
3603 si->dont_invalidate = true;
3605 /* For now. Could remove the lhs from the call and add
3606 lhs = dst; afterwards. */
3607 if (lhs)
3608 return;
3610 fn = NULL_TREE;
3611 objsz = NULL_TREE;
3612 switch (bcode)
3614 case BUILT_IN_STRCAT:
3615 if (srclen != NULL_TREE)
3616 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3617 else
3618 fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3619 break;
3620 case BUILT_IN_STRCAT_CHK:
3621 if (srclen != NULL_TREE)
3622 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3623 else
3624 fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3625 objsz = gimple_call_arg (stmt, 2);
3626 break;
3627 default:
3628 gcc_unreachable ();
3631 if (fn == NULL_TREE)
3632 return;
3634 if (dsi && dstlen)
3636 tree type = TREE_TYPE (dstlen);
3638 /* Compute the size of the source sequence, including the nul. */
3639 tree srcsize = srclen ? srclen : size_zero_node;
3640 tree one = build_int_cst (type, 1);
3641 srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3642 tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3643 tree sptr = si && si->ptr ? si->ptr : src;
3645 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, dstsize,
3646 srcsize);
3647 if (no_warning_opt)
3648 suppress_warning (stmt, no_warning_opt);
3651 tree len = NULL_TREE;
3652 if (srclen != NULL_TREE)
3654 args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3655 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3657 len = fold_convert_loc (loc, type, unshare_expr (srclen));
3658 len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3659 build_int_cst (type, 1));
3660 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
3661 GSI_SAME_STMT);
3663 if (endptr)
3664 dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3665 else
3666 dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3667 fold_convert_loc (loc, sizetype,
3668 unshare_expr (dstlen)));
3669 dst = force_gimple_operand_gsi (&m_gsi, dst, true, NULL_TREE, true,
3670 GSI_SAME_STMT);
3671 if (objsz)
3673 objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3674 fold_convert_loc (loc, TREE_TYPE (objsz),
3675 unshare_expr (dstlen)));
3676 objsz = force_gimple_operand_gsi (&m_gsi, objsz, true, NULL_TREE, true,
3677 GSI_SAME_STMT);
3679 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3681 fprintf (dump_file, "Optimizing: ");
3682 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3684 if (srclen != NULL_TREE)
3685 success = update_gimple_call (&m_gsi, fn, 3 + (objsz != NULL_TREE),
3686 dst, src, len, objsz);
3687 else
3688 success = update_gimple_call (&m_gsi, fn, 2 + (objsz != NULL_TREE),
3689 dst, src, objsz);
3690 if (success)
3692 stmt = gsi_stmt (m_gsi);
3693 update_stmt (stmt);
3694 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3696 fprintf (dump_file, "into: ");
3697 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3699 /* If srclen == NULL, note that current string length can be
3700 computed by transforming this strcpy into stpcpy. */
3701 if (srclen == NULL_TREE && dsi->dont_invalidate)
3702 dsi->stmt = stmt;
3703 adjust_last_stmt (dsi, stmt, true);
3704 if (srclen != NULL_TREE)
3706 laststmt.stmt = stmt;
3707 laststmt.len = srclen;
3708 laststmt.stridx = dsi->idx;
3711 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3712 fprintf (dump_file, "not possible.\n");
3714 if (no_warning_opt)
3715 suppress_warning (stmt, no_warning_opt);
3718 /* Handle a call to an allocation function like alloca, malloc or calloc,
3719 or an ordinary allocation function declared with attribute alloc_size. */
3721 void
3722 strlen_pass::handle_alloc_call (built_in_function bcode)
3724 gimple *stmt = gsi_stmt (m_gsi);
3725 tree lhs = gimple_call_lhs (stmt);
3726 if (lhs == NULL_TREE)
3727 return;
3729 gcc_assert (get_stridx (lhs, stmt) == 0);
3730 int idx = new_stridx (lhs);
3731 tree length = NULL_TREE;
3732 if (bcode == BUILT_IN_CALLOC)
3733 length = build_int_cst (size_type_node, 0);
3734 strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3735 if (bcode == BUILT_IN_CALLOC)
3737 /* Only set STMT for calloc and malloc. */
3738 si->stmt = stmt;
3739 /* Only set ENDPTR for calloc. */
3740 si->endptr = lhs;
3742 else if (bcode == BUILT_IN_MALLOC)
3743 si->stmt = stmt;
3745 /* Set ALLOC is set for all allocation functions. */
3746 si->alloc = stmt;
3747 set_strinfo (idx, si);
3748 si->writable = true;
3749 si->dont_invalidate = true;
3752 /* Handle a call to memset.
3753 After a call to calloc, memset(,0,) is unnecessary.
3754 memset(malloc(n),0,n) is calloc(n,1).
3755 return true when the call is transformed, false otherwise.
3756 When nonnull uses RVALS to determine range information. */
3758 bool
3759 strlen_pass::handle_builtin_memset (bool *zero_write)
3761 gimple *memset_stmt = gsi_stmt (m_gsi);
3762 tree ptr = gimple_call_arg (memset_stmt, 0);
3763 /* Set to the non-constant offset added to PTR. */
3764 wide_int offrng[2];
3765 int idx1 = get_stridx (ptr, memset_stmt, offrng, ptr_qry.rvals);
3766 if (idx1 <= 0)
3767 return false;
3768 strinfo *si1 = get_strinfo (idx1);
3769 if (!si1)
3770 return false;
3771 gimple *alloc_stmt = si1->alloc;
3772 if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3773 return false;
3774 tree callee1 = gimple_call_fndecl (alloc_stmt);
3775 if (!valid_builtin_call (alloc_stmt))
3776 return false;
3777 tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3778 tree memset_size = gimple_call_arg (memset_stmt, 2);
3780 /* Check for overflow. */
3781 maybe_warn_overflow (memset_stmt, false, memset_size, NULL, false, true);
3783 /* Bail when there is no statement associated with the destination
3784 (the statement may be null even when SI1->ALLOC is not). */
3785 if (!si1->stmt)
3786 return false;
3788 /* Avoid optimizing if store is at a variable offset from the beginning
3789 of the allocated object. */
3790 if (offrng[0] != 0 || offrng[0] != offrng[1])
3791 return false;
3793 /* Bail when the call writes a non-zero value. */
3794 if (!integer_zerop (gimple_call_arg (memset_stmt, 1)))
3795 return false;
3797 /* Let the caller know the memset call cleared the destination. */
3798 *zero_write = true;
3800 enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3801 if (code1 == BUILT_IN_CALLOC)
3802 /* Not touching alloc_stmt */ ;
3803 else if (code1 == BUILT_IN_MALLOC
3804 && operand_equal_p (memset_size, alloc_size, 0))
3806 /* Replace the malloc + memset calls with calloc. */
3807 gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3808 update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3809 alloc_size, build_one_cst (size_type_node));
3810 si1->nonzero_chars = build_int_cst (size_type_node, 0);
3811 si1->full_string_p = true;
3812 si1->stmt = gsi_stmt (gsi1);
3814 else
3815 return false;
3816 tree lhs = gimple_call_lhs (memset_stmt);
3817 unlink_stmt_vdef (memset_stmt);
3818 if (lhs)
3820 gimple *assign = gimple_build_assign (lhs, ptr);
3821 gsi_replace (&m_gsi, assign, false);
3823 else
3825 gsi_remove (&m_gsi, true);
3826 release_defs (memset_stmt);
3829 return true;
3832 /* Return first such statement if RES is used in statements testing its
3833 equality to zero, and null otherwise. If EXCLUSIVE is true, return
3834 nonnull if and only RES is used in such expressions exclusively and
3835 in none other. */
3837 static gimple *
3838 use_in_zero_equality (tree res, bool exclusive = true)
3840 gimple *first_use = NULL;
3842 use_operand_p use_p;
3843 imm_use_iterator iter;
3845 FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3847 gimple *use_stmt = USE_STMT (use_p);
3849 if (is_gimple_debug (use_stmt))
3850 continue;
3852 if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
3854 tree_code code = gimple_assign_rhs_code (use_stmt);
3855 if (code == COND_EXPR)
3857 tree cond_expr = gimple_assign_rhs1 (use_stmt);
3858 if ((TREE_CODE (cond_expr) != EQ_EXPR
3859 && (TREE_CODE (cond_expr) != NE_EXPR))
3860 || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3862 if (exclusive)
3863 return NULL;
3864 continue;
3867 else if (code == EQ_EXPR || code == NE_EXPR)
3869 if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
3871 if (exclusive)
3872 return NULL;
3873 continue;
3876 else if (exclusive)
3877 return NULL;
3878 else
3879 continue;
3881 else if (gimple_code (use_stmt) == GIMPLE_COND)
3883 tree_code code = gimple_cond_code (use_stmt);
3884 if ((code != EQ_EXPR && code != NE_EXPR)
3885 || !integer_zerop (gimple_cond_rhs (use_stmt)))
3887 if (exclusive)
3888 return NULL;
3889 continue;
3892 else if (exclusive)
3893 return NULL;
3894 else
3895 continue;
3897 if (!first_use)
3898 first_use = use_stmt;
3901 return first_use;
3904 /* Handle a call to memcmp. We try to handle small comparisons by
3905 converting them to load and compare, and replacing the call to memcmp
3906 with a __builtin_memcmp_eq call where possible.
3907 return true when call is transformed, return false otherwise. */
3909 bool
3910 strlen_pass::handle_builtin_memcmp ()
3912 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
3913 tree res = gimple_call_lhs (stmt);
3915 if (!res || !use_in_zero_equality (res))
3916 return false;
3918 tree arg1 = gimple_call_arg (stmt, 0);
3919 tree arg2 = gimple_call_arg (stmt, 1);
3920 tree len = gimple_call_arg (stmt, 2);
3921 unsigned HOST_WIDE_INT leni;
3923 if (tree_fits_uhwi_p (len)
3924 && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
3925 && pow2p_hwi (leni))
3927 leni *= CHAR_TYPE_SIZE;
3928 unsigned align1 = get_pointer_alignment (arg1);
3929 unsigned align2 = get_pointer_alignment (arg2);
3930 unsigned align = MIN (align1, align2);
3931 scalar_int_mode mode;
3932 if (int_mode_for_size (leni, 1).exists (&mode)
3933 && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
3935 location_t loc = gimple_location (stmt);
3936 tree type, off;
3937 type = build_nonstandard_integer_type (leni, 1);
3938 gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
3939 tree ptrtype = build_pointer_type_for_mode (char_type_node,
3940 ptr_mode, true);
3941 off = build_int_cst (ptrtype, 0);
3942 arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
3943 arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
3944 tree tem1 = fold_const_aggregate_ref (arg1);
3945 if (tem1)
3946 arg1 = tem1;
3947 tree tem2 = fold_const_aggregate_ref (arg2);
3948 if (tem2)
3949 arg2 = tem2;
3950 res = fold_convert_loc (loc, TREE_TYPE (res),
3951 fold_build2_loc (loc, NE_EXPR,
3952 boolean_type_node,
3953 arg1, arg2));
3954 gimplify_and_update_call_from_tree (&m_gsi, res);
3955 return true;
3959 gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
3960 return true;
3963 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
3964 of the string(s) referenced by ARG if it can be determined.
3965 If the length cannot be determined, sets *SIZE to the size of
3966 the array the string is stored in, if any. If no such array is
3967 known, sets *SIZE to -1. When the strings are nul-terminated sets
3968 *NULTERM to true, otherwise to false. When nonnull uses RVALS to
3969 determine range information. Returns true on success. */
3971 bool
3972 strlen_pass::get_len_or_size (gimple *stmt, tree arg, int idx,
3973 unsigned HOST_WIDE_INT lenrng[2],
3974 unsigned HOST_WIDE_INT *size, bool *nulterm)
3976 /* Invalidate. */
3977 *size = HOST_WIDE_INT_M1U;
3979 if (idx < 0)
3981 /* IDX is the inverted constant string length. */
3982 lenrng[0] = ~idx;
3983 lenrng[1] = lenrng[0];
3984 *nulterm = true;
3985 return true;
3988 /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
3989 possible length + 1. */
3990 lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
3992 if (strinfo *si = idx ? get_strinfo (idx) : NULL)
3994 /* FIXME: Handle all this in_range_strlen_dynamic. */
3995 if (!si->nonzero_chars)
3997 else if (tree_fits_uhwi_p (si->nonzero_chars))
3999 lenrng[0] = tree_to_uhwi (si->nonzero_chars);
4000 *nulterm = si->full_string_p;
4001 /* Set the upper bound only if the string is known to be
4002 nul-terminated, otherwise leave it at maximum + 1. */
4003 if (*nulterm)
4004 lenrng[1] = lenrng[0];
4006 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4008 value_range r;
4009 get_range_query (cfun)->range_of_expr (r, si->nonzero_chars);
4010 if (r.kind () == VR_RANGE)
4012 lenrng[0] = r.lower_bound ().to_uhwi ();
4013 lenrng[1] = r.upper_bound ().to_uhwi ();
4014 *nulterm = si->full_string_p;
4019 if (lenrng[0] != HOST_WIDE_INT_MAX)
4020 return true;
4022 /* Compute the minimum and maximum real or possible lengths. */
4023 c_strlen_data lendata = { };
4024 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4025 to have it set to the length of the longest string in a PHI. */
4026 lendata.maxbound = arg;
4027 get_range_strlen_dynamic (arg, stmt, &lendata, ptr_qry.rvals);
4029 unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4030 if (tree_fits_uhwi_p (lendata.maxbound)
4031 && !integer_all_onesp (lendata.maxbound))
4032 maxbound = tree_to_uhwi (lendata.maxbound);
4034 if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4036 unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4037 unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4039 /* The longest string in this data model. */
4040 const unsigned HOST_WIDE_INT lenmax
4041 = tree_to_uhwi (max_object_size ()) - 2;
4043 if (maxbound == HOST_WIDE_INT_M1U)
4045 lenrng[0] = minlen;
4046 lenrng[1] = maxlen;
4047 *nulterm = minlen == maxlen;
4049 else if (maxlen < lenmax)
4051 *size = maxbound + 1;
4052 *nulterm = false;
4054 else
4055 return false;
4057 return true;
4060 if (maxbound != HOST_WIDE_INT_M1U
4061 && lendata.maxlen
4062 && !integer_all_onesp (lendata.maxlen))
4064 /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4065 of the longest string based on the sizes of the arrays referenced
4066 by ARG. */
4067 *size = maxbound + 1;
4068 *nulterm = false;
4069 return true;
4072 return false;
4075 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4076 the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4077 for a sufficiently large BOUND). If the result is based on the length
4078 of one string being greater than the longest string that would fit in
4079 the array pointer to by the argument, set *PLEN and *PSIZE to
4080 the corresponding length (or its complement when the string is known
4081 to be at least as long and need not be nul-terminated) and size.
4082 Otherwise return null. */
4084 tree
4085 strlen_pass::strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
4086 tree arg2, int idx2,
4087 unsigned HOST_WIDE_INT bound,
4088 unsigned HOST_WIDE_INT len[2],
4089 unsigned HOST_WIDE_INT *psize)
4091 /* Determine the range the length of each string is in and whether it's
4092 known to be nul-terminated, or the size of the array it's stored in. */
4093 bool nul1, nul2;
4094 unsigned HOST_WIDE_INT siz1, siz2;
4095 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4096 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1)
4097 || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2))
4098 return NULL_TREE;
4100 /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4101 to HWI_MAX when invalid. Adjust the length of each string to consider
4102 to be no more than BOUND. */
4103 if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4104 len1rng[0] = bound;
4105 if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4106 len1rng[1] = bound;
4107 if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4108 len2rng[0] = bound;
4109 if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4110 len2rng[1] = bound;
4112 /* Two empty strings are equal. */
4113 if (len1rng[1] == 0 && len2rng[1] == 0)
4114 return integer_one_node;
4116 /* The strings are definitely unequal when the lower bound of the length
4117 of one of them is greater than the length of the longest string that
4118 would fit into the other array. */
4119 if (len1rng[0] == HOST_WIDE_INT_MAX
4120 && len2rng[0] != HOST_WIDE_INT_MAX
4121 && ((len2rng[0] < bound && len2rng[0] >= siz1)
4122 || len2rng[0] > siz1))
4124 *psize = siz1;
4125 len[0] = len1rng[0];
4126 /* Set LEN[0] to the lower bound of ARG1's length when it's
4127 nul-terminated or to the complement of its minimum length
4128 otherwise, */
4129 len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4130 return integer_zero_node;
4133 if (len2rng[0] == HOST_WIDE_INT_MAX
4134 && len1rng[0] != HOST_WIDE_INT_MAX
4135 && ((len1rng[0] < bound && len1rng[0] >= siz2)
4136 || len1rng[0] > siz2))
4138 *psize = siz2;
4139 len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4140 len[1] = len2rng[0];
4141 return integer_zero_node;
4144 /* The strings are also definitely unequal when their lengths are unequal
4145 and at least one is nul-terminated. */
4146 if (len1rng[0] != HOST_WIDE_INT_MAX
4147 && len2rng[0] != HOST_WIDE_INT_MAX
4148 && ((len1rng[1] < len2rng[0] && nul1)
4149 || (len2rng[1] < len1rng[0] && nul2)))
4151 if (bound <= len1rng[0] || bound <= len2rng[0])
4152 *psize = bound;
4153 else
4154 *psize = HOST_WIDE_INT_M1U;
4156 len[0] = len1rng[0];
4157 len[1] = len2rng[0];
4158 return integer_zero_node;
4161 /* The string lengths may be equal or unequal. Even when equal and
4162 both strings nul-terminated, without the string contents there's
4163 no way to determine whether they are equal. */
4164 return NULL_TREE;
4167 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4168 arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4169 whose result is used in equality expressions that evaluate to
4170 a constant due to one argument being longer than the size of
4171 the other. */
4173 static void
4174 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4175 unsigned HOST_WIDE_INT len[2],
4176 unsigned HOST_WIDE_INT siz)
4178 tree lhs = gimple_call_lhs (stmt);
4179 gimple *use = use_in_zero_equality (lhs, /* exclusive = */ false);
4180 if (!use)
4181 return;
4183 bool at_least = false;
4185 /* Excessive LEN[i] indicates a lower bound. */
4186 if (len[0] > HOST_WIDE_INT_MAX)
4188 at_least = true;
4189 len[0] = ~len[0];
4192 if (len[1] > HOST_WIDE_INT_MAX)
4194 at_least = true;
4195 len[1] = ~len[1];
4198 unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4200 /* FIXME: Include a note pointing to the declaration of the smaller
4201 array. */
4202 location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4204 tree callee = gimple_call_fndecl (stmt);
4205 bool warned = false;
4206 if (siz <= minlen && bound == -1)
4207 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4208 (at_least
4209 ? G_("%qD of a string of length %wu or more and "
4210 "an array of size %wu evaluates to nonzero")
4211 : G_("%qD of a string of length %wu and an array "
4212 "of size %wu evaluates to nonzero")),
4213 callee, minlen, siz);
4214 else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4216 if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4217 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4218 "%qD of strings of length %wu and %wu "
4219 "and bound of %wu evaluates to nonzero",
4220 callee, len[0], len[1], bound);
4221 else
4222 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4223 "%qD of a string of length %wu, an array "
4224 "of size %wu and bound of %wu evaluates to "
4225 "nonzero",
4226 callee, minlen, siz, bound);
4229 if (!warned)
4230 return;
4232 location_t use_loc = gimple_location (use);
4233 if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4234 inform (use_loc, "in this expression");
4238 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4239 when possible or by transforming the latter to the former. Warn about
4240 calls where the length of one argument is greater than the size of
4241 the array to which the other argument points if the latter's length
4242 is not known. Return true when the call has been transformed into
4243 another and false otherwise. */
4245 bool
4246 strlen_pass::handle_builtin_string_cmp ()
4248 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
4249 tree lhs = gimple_call_lhs (stmt);
4251 if (!lhs)
4252 return false;
4254 tree arg1 = gimple_call_arg (stmt, 0);
4255 tree arg2 = gimple_call_arg (stmt, 1);
4256 int idx1 = get_stridx (arg1, stmt);
4257 int idx2 = get_stridx (arg2, stmt);
4259 /* For strncmp set to the value of the third argument if known. */
4260 HOST_WIDE_INT bound = -1;
4261 tree len = NULL_TREE;
4262 /* Extract the strncmp bound. */
4263 if (gimple_call_num_args (stmt) == 3)
4265 len = gimple_call_arg (stmt, 2);
4266 if (tree_fits_shwi_p (len))
4267 bound = tree_to_shwi (len);
4269 /* If the bound argument is NOT known, do nothing. */
4270 if (bound < 0)
4271 return false;
4274 /* Avoid folding if either argument is not a nul-terminated array.
4275 Defer warning until later. */
4276 if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4277 || !check_nul_terminated_array (NULL_TREE, arg2, len))
4278 return false;
4281 /* Set to the length of one argument (or its complement if it's
4282 the lower bound of a range) and the size of the array storing
4283 the other if the result is based on the former being equal to
4284 or greater than the latter. */
4285 unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4286 unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4288 /* Try to determine if the two strings are either definitely equal
4289 or definitely unequal and if so, either fold the result to zero
4290 (when equal) or set the range of the result to ~[0, 0] otherwise. */
4291 if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4292 len, &siz))
4294 if (integer_zerop (eqz))
4296 maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4298 /* When the lengths of the first two string arguments are
4299 known to be unequal set the range of the result to non-zero.
4300 This allows the call to be eliminated if its result is only
4301 used in tests for equality to zero. */
4302 wide_int zero = wi::zero (TYPE_PRECISION (TREE_TYPE (lhs)));
4303 set_range_info (lhs, VR_ANTI_RANGE, zero, zero);
4304 return false;
4306 /* When the two strings are definitely equal (such as when they
4307 are both empty) fold the call to the constant result. */
4308 replace_call_with_value (&m_gsi, integer_zero_node);
4309 return true;
4313 /* Return if nothing is known about the strings pointed to by ARG1
4314 and ARG2. */
4315 if (idx1 == 0 && idx2 == 0)
4316 return false;
4318 /* Determine either the length or the size of each of the strings,
4319 whichever is available. */
4320 HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4321 HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4324 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4325 unsigned HOST_WIDE_INT arsz1, arsz2;
4326 bool nulterm[2];
4328 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm)
4329 || !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1))
4330 return false;
4332 if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4333 cstlen1 = len1rng[0];
4334 else if (arsz1 < HOST_WIDE_INT_M1U)
4335 arysiz1 = arsz1;
4337 if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4338 cstlen2 = len2rng[0];
4339 else if (arsz2 < HOST_WIDE_INT_M1U)
4340 arysiz2 = arsz2;
4343 /* Bail if neither the string length nor the size of the array
4344 it is stored in can be determined. */
4345 if ((cstlen1 < 0 && arysiz1 < 0)
4346 || (cstlen2 < 0 && arysiz2 < 0)
4347 || (cstlen1 < 0 && cstlen2 < 0))
4348 return false;
4350 if (cstlen1 >= 0)
4351 ++cstlen1;
4352 if (cstlen2 >= 0)
4353 ++cstlen2;
4355 /* The exact number of characters to compare. */
4356 HOST_WIDE_INT cmpsiz;
4357 if (cstlen1 >= 0 && cstlen2 >= 0)
4358 cmpsiz = MIN (cstlen1, cstlen2);
4359 else if (cstlen1 >= 0)
4360 cmpsiz = cstlen1;
4361 else
4362 cmpsiz = cstlen2;
4363 if (bound >= 0)
4364 cmpsiz = MIN (cmpsiz, bound);
4365 /* The size of the array in which the unknown string is stored. */
4366 HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4368 if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (lhs))
4370 /* If the known length is less than the size of the other array
4371 and the strcmp result is only used to test equality to zero,
4372 transform the call to the equivalent _eq call. */
4373 if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4374 : BUILT_IN_STRNCMP_EQ))
4376 tree n = build_int_cst (size_type_node, cmpsiz);
4377 update_gimple_call (&m_gsi, fn, 3, arg1, arg2, n);
4378 return true;
4382 return false;
4385 /* Handle a POINTER_PLUS_EXPR statement.
4386 For p = "abcd" + 2; compute associated length, or if
4387 p = q + off is pointing to a '\0' character of a string, call
4388 zero_length_string on it. */
4390 void
4391 strlen_pass::handle_pointer_plus ()
4393 gimple *stmt = gsi_stmt (m_gsi);
4394 tree lhs = gimple_assign_lhs (stmt), off;
4395 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
4396 strinfo *si, *zsi;
4398 if (idx == 0)
4399 return;
4401 if (idx < 0)
4403 tree off = gimple_assign_rhs2 (stmt);
4404 if (tree_fits_uhwi_p (off)
4405 && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4406 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4407 = ~(~idx - (int) tree_to_uhwi (off));
4408 return;
4411 si = get_strinfo (idx);
4412 if (si == NULL || si->nonzero_chars == NULL_TREE)
4413 return;
4415 off = gimple_assign_rhs2 (stmt);
4416 zsi = NULL;
4417 if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4418 zsi = zero_length_string (lhs, si);
4419 else if (TREE_CODE (off) == SSA_NAME)
4421 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4422 if (gimple_assign_single_p (def_stmt)
4423 && si->full_string_p
4424 && operand_equal_p (si->nonzero_chars,
4425 gimple_assign_rhs1 (def_stmt), 0))
4426 zsi = zero_length_string (lhs, si);
4428 if (zsi != NULL
4429 && si->endptr != NULL_TREE
4430 && si->endptr != lhs
4431 && TREE_CODE (si->endptr) == SSA_NAME)
4433 enum tree_code rhs_code
4434 = useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4435 ? SSA_NAME : NOP_EXPR;
4436 gimple_assign_set_rhs_with_ops (&m_gsi, rhs_code, si->endptr);
4437 gcc_assert (gsi_stmt (m_gsi) == stmt);
4438 update_stmt (stmt);
4442 /* Set LENRANGE to the number of nonzero bytes for a store of TYPE and
4443 clear all flags. Return true on success and false on failure. */
4445 static bool
4446 nonzero_bytes_for_type (tree type, unsigned lenrange[3],
4447 bool *nulterm, bool *allnul, bool *allnonnul)
4449 /* Use the size of the type of the expression as the size of the store,
4450 and set the upper bound of the length range to that of the size.
4451 Nothing is known about the contents so clear all flags. */
4452 tree typesize = TYPE_SIZE_UNIT (type);
4453 if (!type)
4454 return false;
4456 if (!tree_fits_uhwi_p (typesize))
4457 return false;
4459 unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize);
4460 if (sz > UINT_MAX)
4461 return false;
4463 lenrange[2] = sz;
4464 lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0;
4465 lenrange[0] = 0;
4466 *nulterm = false;
4467 *allnul = false;
4468 *allnonnul = false;
4469 return true;
4472 /* Recursively determine the minimum and maximum number of leading nonzero
4473 bytes in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
4474 to each.
4475 Sets LENRANGE[2] to the total size of the access (which may be less
4476 than LENRANGE[1] when what's being referenced by EXP is a pointer
4477 rather than an array).
4478 Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL
4479 if all the bytes are zero, and *ALLNONNUL is all are nonzero.
4480 OFFSET and NBYTES are the offset into the representation and
4481 the size of the access to it determined from an ADDR_EXPR (i.e.,
4482 a pointer) or MEM_REF or zero for other expressions.
4483 Uses RVALS to determine range information.
4484 Avoids recursing deeper than the limits in SNLIM allow.
4485 Returns true on success and false otherwise. */
4487 bool
4488 strlen_pass::count_nonzero_bytes (tree exp, gimple *stmt,
4489 unsigned HOST_WIDE_INT offset,
4490 unsigned HOST_WIDE_INT nbytes,
4491 unsigned lenrange[3], bool *nulterm,
4492 bool *allnul, bool *allnonnul,
4493 ssa_name_limit_t &snlim)
4495 if (TREE_CODE (exp) == SSA_NAME)
4497 /* Handle non-zero single-character stores specially. */
4498 tree type = TREE_TYPE (exp);
4499 if (TREE_CODE (type) == INTEGER_TYPE
4500 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4501 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4502 && tree_expr_nonzero_p (exp))
4504 /* If the character EXP is known to be non-zero (even if its
4505 exact value is not known) recurse once to set the range
4506 for an arbitrary constant. */
4507 exp = build_int_cst (type, 1);
4508 return count_nonzero_bytes (exp, stmt,
4509 offset, 1, lenrange,
4510 nulterm, allnul, allnonnul, snlim);
4513 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4514 if (gimple_assign_single_p (stmt))
4516 exp = gimple_assign_rhs1 (stmt);
4517 if (!DECL_P (exp)
4518 && TREE_CODE (exp) != CONSTRUCTOR
4519 && TREE_CODE (exp) != MEM_REF)
4520 return false;
4521 /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */
4523 else if (gimple_code (stmt) == GIMPLE_PHI)
4525 /* Avoid processing an SSA_NAME that has already been visited
4526 or if an SSA_NAME limit has been reached. Indicate success
4527 if the former and failure if the latter. */
4528 if (int res = snlim.next_phi (exp))
4529 return res > 0;
4531 /* Determine the minimum and maximum from the PHI arguments. */
4532 unsigned int n = gimple_phi_num_args (stmt);
4533 for (unsigned i = 0; i != n; i++)
4535 tree def = gimple_phi_arg_def (stmt, i);
4536 if (!count_nonzero_bytes (def, stmt,
4537 offset, nbytes, lenrange, nulterm,
4538 allnul, allnonnul, snlim))
4539 return false;
4542 return true;
4546 if (TREE_CODE (exp) == CONSTRUCTOR)
4548 if (nbytes)
4549 /* If NBYTES has already been determined by an outer MEM_REF
4550 fail rather than overwriting it (this shouldn't happen). */
4551 return false;
4553 tree type = TREE_TYPE (exp);
4554 tree size = TYPE_SIZE_UNIT (type);
4555 if (!size || !tree_fits_uhwi_p (size))
4556 return false;
4558 unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
4559 if (byte_size < offset)
4560 return false;
4562 nbytes = byte_size - offset;
4565 if (TREE_CODE (exp) == MEM_REF)
4567 if (nbytes)
4568 return false;
4570 tree arg = TREE_OPERAND (exp, 0);
4571 tree off = TREE_OPERAND (exp, 1);
4573 if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4574 return false;
4576 unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4577 if (INT_MAX < wioff)
4578 return false;
4580 offset += wioff;
4581 if (INT_MAX < offset)
4582 return false;
4584 /* The size of the MEM_REF access determines the number of bytes. */
4585 tree type = TREE_TYPE (exp);
4586 tree typesize = TYPE_SIZE_UNIT (type);
4587 if (!typesize || !tree_fits_uhwi_p (typesize))
4588 return false;
4589 nbytes = tree_to_uhwi (typesize);
4590 if (!nbytes)
4591 return false;
4593 /* Handle MEM_REF = SSA_NAME types of assignments. */
4594 return count_nonzero_bytes_addr (arg, stmt,
4595 offset, nbytes, lenrange, nulterm,
4596 allnul, allnonnul, snlim);
4599 if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4601 /* If EXP can be folded into a constant use the result. Otherwise
4602 proceed to use EXP to determine a range of the result. */
4603 if (tree fold_exp = ctor_for_folding (exp))
4604 if (fold_exp != error_mark_node)
4605 exp = fold_exp;
4608 const char *prep = NULL;
4609 if (TREE_CODE (exp) == STRING_CST)
4611 unsigned nchars = TREE_STRING_LENGTH (exp);
4612 if (nchars < offset)
4613 return false;
4615 if (!nbytes)
4616 /* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4617 (i.e., it's the size of a pointer), or from MEM_REF (as the size
4618 of the access), set it here to the size of the string, including
4619 all internal and trailing nuls if the string has any. */
4620 nbytes = nchars - offset;
4621 else if (nchars - offset < nbytes)
4622 return false;
4624 prep = TREE_STRING_POINTER (exp) + offset;
4627 unsigned char buf[256];
4628 if (!prep)
4630 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4631 return false;
4632 /* If the pointer to representation hasn't been set above
4633 for STRING_CST point it at the buffer. */
4634 prep = reinterpret_cast <char *>(buf);
4635 /* Try to extract the representation of the constant object
4636 or expression starting from the offset. */
4637 unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4638 if (repsize < nbytes)
4640 /* This should only happen when REPSIZE is zero because EXP
4641 doesn't denote an object with a known initializer, except
4642 perhaps when the reference reads past its end. */
4643 lenrange[0] = 0;
4644 prep = NULL;
4646 else if (!nbytes)
4647 nbytes = repsize;
4648 else if (nbytes < repsize)
4649 return false;
4652 if (!nbytes)
4653 return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange,
4654 nulterm, allnul, allnonnul);
4656 /* Compute the number of leading nonzero bytes in the representation
4657 and update the minimum and maximum. */
4658 unsigned n = prep ? strnlen (prep, nbytes) : nbytes;
4660 if (n < lenrange[0])
4661 lenrange[0] = n;
4662 if (lenrange[1] < n)
4663 lenrange[1] = n;
4665 /* Set the size of the representation. */
4666 if (lenrange[2] < nbytes)
4667 lenrange[2] = nbytes;
4669 /* Clear NULTERM if none of the bytes is zero. */
4670 if (n == nbytes)
4671 *nulterm = false;
4673 if (n)
4675 /* When the initial number of non-zero bytes N is non-zero, reset
4676 *ALLNUL; if N is less than that the size of the representation
4677 also clear *ALLNONNUL. */
4678 *allnul = false;
4679 if (n < nbytes)
4680 *allnonnul = false;
4682 else if (*allnul || *allnonnul)
4684 *allnonnul = false;
4686 if (*allnul)
4688 /* When either ALLNUL is set and N is zero, also determine
4689 whether all subsequent bytes after the first one (which
4690 is nul) are zero or nonzero and clear ALLNUL if not. */
4691 for (const char *p = prep; p != prep + nbytes; ++p)
4692 if (*p)
4694 *allnul = false;
4695 break;
4700 return true;
4703 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4704 bytes that are pointed to by EXP, which should be a pointer. */
4706 bool
4707 strlen_pass::count_nonzero_bytes_addr (tree exp, gimple *stmt,
4708 unsigned HOST_WIDE_INT offset,
4709 unsigned HOST_WIDE_INT nbytes,
4710 unsigned lenrange[3], bool *nulterm,
4711 bool *allnul, bool *allnonnul,
4712 ssa_name_limit_t &snlim)
4714 int idx = get_stridx (exp, stmt);
4715 if (idx > 0)
4717 strinfo *si = get_strinfo (idx);
4718 if (!si)
4719 return false;
4721 /* Handle both constant lengths as well non-constant lengths
4722 in some range. */
4723 unsigned HOST_WIDE_INT minlen, maxlen;
4724 if (tree_fits_shwi_p (si->nonzero_chars))
4725 minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4726 else if (si->nonzero_chars
4727 && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4729 value_range vr;
4730 ptr_qry.rvals->range_of_expr (vr, si->nonzero_chars, stmt);
4731 if (vr.kind () != VR_RANGE)
4732 return false;
4734 minlen = tree_to_uhwi (vr.min ());
4735 maxlen = tree_to_uhwi (vr.max ());
4737 else
4738 return false;
4740 if (maxlen < offset)
4741 return false;
4743 minlen = minlen < offset ? 0 : minlen - offset;
4744 maxlen -= offset;
4745 if (maxlen + 1 < nbytes)
4746 return false;
4748 if (nbytes <= minlen)
4749 *nulterm = false;
4751 if (nbytes < minlen)
4753 minlen = nbytes;
4754 if (nbytes < maxlen)
4755 maxlen = nbytes;
4758 if (minlen < lenrange[0])
4759 lenrange[0] = minlen;
4760 if (lenrange[1] < maxlen)
4761 lenrange[1] = maxlen;
4763 if (lenrange[2] < nbytes)
4764 lenrange[2] = nbytes;
4766 /* Since only the length of the string are known and not its contents,
4767 clear ALLNUL and ALLNONNUL purely on the basis of the length. */
4768 *allnul = false;
4769 if (minlen < nbytes)
4770 *allnonnul = false;
4772 return true;
4775 if (TREE_CODE (exp) == ADDR_EXPR)
4776 return count_nonzero_bytes (TREE_OPERAND (exp, 0), stmt,
4777 offset, nbytes,
4778 lenrange, nulterm, allnul, allnonnul, snlim);
4780 if (TREE_CODE (exp) == SSA_NAME)
4782 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4783 if (gimple_code (stmt) == GIMPLE_PHI)
4785 /* Avoid processing an SSA_NAME that has already been visited
4786 or if an SSA_NAME limit has been reached. Indicate success
4787 if the former and failure if the latter. */
4788 if (int res = snlim.next_phi (exp))
4789 return res > 0;
4791 /* Determine the minimum and maximum from the PHI arguments. */
4792 unsigned int n = gimple_phi_num_args (stmt);
4793 for (unsigned i = 0; i != n; i++)
4795 tree def = gimple_phi_arg_def (stmt, i);
4796 if (!count_nonzero_bytes_addr (def, stmt,
4797 offset, nbytes, lenrange,
4798 nulterm, allnul, allnonnul,
4799 snlim))
4800 return false;
4803 return true;
4807 /* Otherwise we don't know anything. */
4808 lenrange[0] = 0;
4809 if (lenrange[1] < nbytes)
4810 lenrange[1] = nbytes;
4811 if (lenrange[2] < nbytes)
4812 lenrange[2] = nbytes;
4813 *nulterm = false;
4814 *allnul = false;
4815 *allnonnul = false;
4816 return true;
4819 /* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE
4820 is a type rather than an expression use its size to compute the range.
4821 RVALS is used to determine ranges of dynamically computed string lengths
4822 (the results of strlen). */
4824 bool
4825 strlen_pass::count_nonzero_bytes (tree expr_or_type, gimple *stmt,
4826 unsigned lenrange[3], bool *nulterm,
4827 bool *allnul, bool *allnonnul)
4829 if (TYPE_P (expr_or_type))
4830 return nonzero_bytes_for_type (expr_or_type, lenrange,
4831 nulterm, allnul, allnonnul);
4833 /* Set to optimistic values so the caller doesn't have to worry about
4834 initializing these and to what. On success, the function will clear
4835 these if it determines their values are different but being recursive
4836 it never sets either to true. On failure, their values are
4837 unspecified. */
4838 *nulterm = true;
4839 *allnul = true;
4840 *allnonnul = true;
4842 ssa_name_limit_t snlim;
4843 tree expr = expr_or_type;
4844 return count_nonzero_bytes (expr, stmt,
4845 0, 0, lenrange, nulterm, allnul, allnonnul,
4846 snlim);
4849 /* Handle a single or multibyte store other than by a built-in function,
4850 either via a single character assignment or by multi-byte assignment
4851 either via MEM_REF or via a type other than char (such as in
4852 '*(int*)a = 12345'). Return true to let the caller advance *GSI to
4853 the next statement in the basic block and false otherwise. */
4855 bool
4856 strlen_pass::handle_store (bool *zero_write)
4858 gimple *stmt = gsi_stmt (m_gsi);
4859 /* The LHS and RHS of the store. The RHS is null if STMT is a function
4860 call. STORETYPE is the type of the store (determined from either
4861 the RHS of the assignment statement or the LHS of a function call. */
4862 tree lhs, rhs, storetype;
4863 if (is_gimple_assign (stmt))
4865 lhs = gimple_assign_lhs (stmt);
4866 rhs = gimple_assign_rhs1 (stmt);
4867 storetype = TREE_TYPE (rhs);
4869 else if (is_gimple_call (stmt))
4871 lhs = gimple_call_lhs (stmt);
4872 rhs = NULL_TREE;
4873 storetype = TREE_TYPE (lhs);
4875 else
4876 return true;
4878 tree ssaname = NULL_TREE;
4879 strinfo *si = NULL;
4880 int idx = -1;
4882 range_query *const rvals = ptr_qry.rvals;
4884 /* The offset of the first byte in LHS modified by the store. */
4885 unsigned HOST_WIDE_INT offset = 0;
4887 if (TREE_CODE (lhs) == MEM_REF
4888 && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4890 tree mem_offset = TREE_OPERAND (lhs, 1);
4891 if (tree_fits_uhwi_p (mem_offset))
4893 /* Get the strinfo for the base, and use it if it starts with at
4894 least OFFSET nonzero characters. This is trivially true if
4895 OFFSET is zero. */
4896 offset = tree_to_uhwi (mem_offset);
4897 idx = get_stridx (TREE_OPERAND (lhs, 0), stmt);
4898 if (idx > 0)
4899 si = get_strinfo (idx);
4900 if (offset == 0)
4901 ssaname = TREE_OPERAND (lhs, 0);
4902 else if (si == NULL
4903 || compare_nonzero_chars (si, stmt, offset, rvals) < 0)
4905 *zero_write = rhs ? initializer_zerop (rhs) : false;
4907 bool dummy;
4908 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4909 if (count_nonzero_bytes (rhs ? rhs : storetype, stmt, lenrange,
4910 &dummy, &dummy, &dummy))
4911 maybe_warn_overflow (stmt, true, lenrange[2]);
4913 return true;
4917 else
4919 idx = get_addr_stridx (lhs, stmt, NULL_TREE, &offset, rvals);
4920 if (idx > 0)
4921 si = get_strinfo (idx);
4924 /* Minimum and maximum leading non-zero bytes and the size of the store. */
4925 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4927 /* Set to the minimum length of the string being assigned if known. */
4928 unsigned HOST_WIDE_INT rhs_minlen;
4930 /* STORING_NONZERO_P is true iff not all stored characters are zero.
4931 STORING_ALL_NONZERO_P is true if all stored characters are zero.
4932 STORING_ALL_ZEROS_P is true iff all stored characters are zero.
4933 Both are false when it's impossible to determine which is true. */
4934 bool storing_nonzero_p;
4935 bool storing_all_nonzero_p;
4936 bool storing_all_zeros_p;
4937 /* FULL_STRING_P is set when the stored sequence of characters form
4938 a nul-terminated string. */
4939 bool full_string_p;
4941 const bool ranges_valid
4942 = count_nonzero_bytes (rhs ? rhs : storetype, stmt,
4943 lenrange, &full_string_p,
4944 &storing_all_zeros_p, &storing_all_nonzero_p);
4946 if (ranges_valid)
4948 rhs_minlen = lenrange[0];
4949 storing_nonzero_p = lenrange[1] > 0;
4950 *zero_write = storing_all_zeros_p;
4952 maybe_warn_overflow (stmt, true, lenrange[2]);
4954 else
4956 rhs_minlen = HOST_WIDE_INT_M1U;
4957 full_string_p = false;
4958 storing_nonzero_p = false;
4959 storing_all_zeros_p = false;
4960 storing_all_nonzero_p = false;
4963 if (si != NULL)
4965 /* The corresponding element is set to 1 if the first and last
4966 element, respectively, of the sequence of characters being
4967 written over the string described by SI ends before
4968 the terminating nul (if it has one), to zero if the nul is
4969 being overwritten but not beyond, or negative otherwise. */
4970 int store_before_nul[2];
4971 if (ranges_valid)
4973 /* The offset of the last stored byte. */
4974 unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
4975 store_before_nul[0]
4976 = compare_nonzero_chars (si, stmt, offset, rvals);
4977 if (endoff == offset)
4978 store_before_nul[1] = store_before_nul[0];
4979 else
4980 store_before_nul[1]
4981 = compare_nonzero_chars (si, stmt, endoff, rvals);
4983 else
4985 store_before_nul[0]
4986 = compare_nonzero_chars (si, stmt, offset, rvals);
4987 store_before_nul[1] = store_before_nul[0];
4988 gcc_assert (offset == 0 || store_before_nul[0] >= 0);
4991 if (storing_all_zeros_p
4992 && store_before_nul[0] == 0
4993 && store_before_nul[1] == 0
4994 && si->full_string_p)
4996 /* When overwriting a '\0' with a '\0', the store can be removed
4997 if we know it has been stored in the current function. */
4998 if (!stmt_could_throw_p (cfun, stmt) && si->writable)
5000 unlink_stmt_vdef (stmt);
5001 release_defs (stmt);
5002 gsi_remove (&m_gsi, true);
5003 return false;
5005 else
5007 si->writable = true;
5008 gsi_next (&m_gsi);
5009 return false;
5013 if (store_before_nul[1] > 0
5014 && storing_nonzero_p
5015 && lenrange[0] == lenrange[1]
5016 && lenrange[0] == lenrange[2]
5017 && TREE_CODE (storetype) == INTEGER_TYPE)
5019 /* Handle a store of one or more non-nul characters that ends
5020 before the terminating nul of the destination and so does
5021 not affect its length
5022 If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5023 and if we aren't storing '\0', we know that the length of
5024 the string and any other zero terminated string in memory
5025 remains the same. In that case we move to the next gimple
5026 statement and return to signal the caller that it shouldn't
5027 invalidate anything.
5029 This is beneficial for cases like:
5031 char p[20];
5032 void foo (char *q)
5034 strcpy (p, "foobar");
5035 size_t len = strlen (p); // can be folded to 6
5036 size_t len2 = strlen (q); // has to be computed
5037 p[0] = 'X';
5038 size_t len3 = strlen (p); // can be folded to 6
5039 size_t len4 = strlen (q); // can be folded to len2
5040 bar (len, len2, len3, len4);
5041 } */
5042 gsi_next (&m_gsi);
5043 return false;
5046 if (storing_all_zeros_p
5047 || storing_nonzero_p
5048 || (offset != 0 && store_before_nul[1] > 0))
5050 /* When STORING_NONZERO_P, we know that the string will start
5051 with at least OFFSET + 1 nonzero characters. If storing
5052 a single character, set si->NONZERO_CHARS to the result.
5053 If storing multiple characters, try to determine the number
5054 of leading non-zero characters and set si->NONZERO_CHARS to
5055 the result instead.
5057 When STORING_ALL_ZEROS_P, we know that the string is now
5058 OFFSET characters long.
5060 Otherwise, we're storing an unknown value at offset OFFSET,
5061 so need to clip the nonzero_chars to OFFSET.
5062 Use the minimum length of the string (or individual character)
5063 being stored if it's known. Otherwise, STORING_NONZERO_P
5064 guarantees it's at least 1. */
5065 HOST_WIDE_INT len
5066 = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5067 location_t loc = gimple_location (stmt);
5068 tree oldlen = si->nonzero_chars;
5069 if (store_before_nul[1] == 0 && si->full_string_p)
5070 /* We're overwriting the nul terminator with a nonzero or
5071 unknown character. If the previous stmt was a memcpy,
5072 its length may be decreased. */
5073 adjust_last_stmt (si, stmt, false);
5074 si = unshare_strinfo (si);
5075 if (storing_nonzero_p)
5077 gcc_assert (len >= 0);
5078 si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5080 else
5081 si->nonzero_chars = build_int_cst (size_type_node, offset);
5083 /* Set FULL_STRING_P only if the length of the strings being
5084 written is the same, and clear it if the strings have
5085 different lengths. In the latter case the length stored
5086 in si->NONZERO_CHARS becomes the lower bound.
5087 FIXME: Handle the upper bound of the length if possible. */
5088 si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5090 if (storing_all_zeros_p
5091 && ssaname
5092 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5093 si->endptr = ssaname;
5094 else
5095 si->endptr = NULL;
5096 si->next = 0;
5097 si->stmt = NULL;
5098 si->writable = true;
5099 si->dont_invalidate = true;
5100 if (oldlen)
5102 tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5103 si->nonzero_chars, oldlen);
5104 adjust_related_strinfos (loc, si, adj);
5106 else
5107 si->prev = 0;
5110 else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5112 if (ssaname)
5113 idx = new_stridx (ssaname);
5114 else
5115 idx = new_addr_stridx (lhs);
5116 if (idx != 0)
5118 tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5120 HOST_WIDE_INT slen;
5121 if (storing_all_zeros_p)
5122 slen = 0;
5123 else if (storing_nonzero_p && ranges_valid)
5125 /* FIXME: Handle the upper bound of the length when
5126 LENRANGE[0] != LENRANGE[1]. */
5127 slen = lenrange[0];
5128 if (lenrange[0] != lenrange[1])
5129 /* Set the minimum length but ignore the maximum
5130 for now. */
5131 full_string_p = false;
5133 else
5134 slen = -1;
5136 tree len = (slen <= 0
5137 ? size_zero_node
5138 : build_int_cst (size_type_node, slen));
5139 si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5140 set_strinfo (idx, si);
5141 if (storing_all_zeros_p
5142 && ssaname
5143 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5144 si->endptr = ssaname;
5145 si->dont_invalidate = true;
5146 si->writable = true;
5149 else if (idx == 0
5150 && rhs_minlen < HOST_WIDE_INT_M1U
5151 && ssaname == NULL_TREE
5152 && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5154 HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5155 if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5157 int idx = new_addr_stridx (lhs);
5158 if (idx != 0)
5160 si = new_strinfo (build_fold_addr_expr (lhs), idx,
5161 build_int_cst (size_type_node, rhs_minlen),
5162 full_string_p);
5163 set_strinfo (idx, si);
5164 si->dont_invalidate = true;
5169 if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5171 /* For single-byte stores only, allow adjust_last_stmt to remove
5172 the statement if the stored '\0' is immediately overwritten. */
5173 laststmt.stmt = stmt;
5174 laststmt.len = build_int_cst (size_type_node, 1);
5175 laststmt.stridx = si->idx;
5177 return true;
5180 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0. */
5182 static void
5183 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5185 if (TREE_CODE (rhs1) != SSA_NAME
5186 || TREE_CODE (rhs2) != SSA_NAME)
5187 return;
5189 gimple *call_stmt = NULL;
5190 for (int pass = 0; pass < 2; pass++)
5192 gimple *g = SSA_NAME_DEF_STMT (rhs1);
5193 if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5194 && has_single_use (rhs1)
5195 && gimple_call_arg (g, 0) == rhs2)
5197 call_stmt = g;
5198 break;
5200 std::swap (rhs1, rhs2);
5203 if (call_stmt)
5205 tree arg0 = gimple_call_arg (call_stmt, 0);
5207 if (arg0 == rhs2)
5209 tree arg1 = gimple_call_arg (call_stmt, 1);
5210 tree arg1_len = NULL_TREE;
5211 int idx = get_stridx (arg1, call_stmt);
5213 if (idx)
5215 if (idx < 0)
5216 arg1_len = build_int_cst (size_type_node, ~idx);
5217 else
5219 strinfo *si = get_strinfo (idx);
5220 if (si)
5221 arg1_len = get_string_length (si);
5225 if (arg1_len != NULL_TREE)
5227 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5228 tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5230 if (!is_gimple_val (arg1_len))
5232 tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5233 gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5234 arg1_len);
5235 gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5236 arg1_len = arg1_len_tmp;
5239 gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5240 arg0, arg1, arg1_len);
5241 tree strncmp_lhs = make_ssa_name (integer_type_node);
5242 gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5243 gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5244 gsi_remove (&gsi, true);
5245 gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5246 tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5248 if (is_gimple_assign (stmt))
5250 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5252 tree cond = gimple_assign_rhs1 (stmt);
5253 TREE_OPERAND (cond, 0) = strncmp_lhs;
5254 TREE_OPERAND (cond, 1) = zero;
5256 else
5258 gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5259 gimple_assign_set_rhs2 (stmt, zero);
5262 else
5264 gcond *cond = as_a<gcond *> (stmt);
5265 gimple_cond_set_lhs (cond, strncmp_lhs);
5266 gimple_cond_set_rhs (cond, zero);
5268 update_stmt (stmt);
5274 /* Return true if TYPE corresponds to a narrow character type. */
5276 static bool
5277 is_char_type (tree type)
5279 return (TREE_CODE (type) == INTEGER_TYPE
5280 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5281 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5284 /* Check the built-in call at GSI for validity and optimize it.
5285 Uses RVALS to determine range information.
5286 Return true to let the caller advance *GSI to the next statement
5287 in the basic block and false otherwise. */
5289 bool
5290 strlen_pass::check_and_optimize_call (bool *zero_write)
5292 gimple *stmt = gsi_stmt (m_gsi);
5294 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5296 tree fntype = gimple_call_fntype (stmt);
5297 if (!fntype)
5298 return true;
5300 if (lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5302 handle_alloc_call (BUILT_IN_NONE);
5303 return true;
5306 if (tree lhs = gimple_call_lhs (stmt))
5307 handle_assign (lhs, zero_write);
5309 /* Proceed to handle user-defined formatting functions. */
5312 /* When not optimizing we must be checking printf calls which
5313 we do even for user-defined functions when they are declared
5314 with attribute format. */
5315 if (!flag_optimize_strlen
5316 || !strlen_optimize
5317 || !valid_builtin_call (stmt))
5318 return !handle_printf_call (&m_gsi, ptr_qry);
5320 tree callee = gimple_call_fndecl (stmt);
5321 switch (DECL_FUNCTION_CODE (callee))
5323 case BUILT_IN_STRLEN:
5324 case BUILT_IN_STRNLEN:
5325 handle_builtin_strlen ();
5326 break;
5327 case BUILT_IN_STRCHR:
5328 handle_builtin_strchr ();
5329 break;
5330 case BUILT_IN_STRCPY:
5331 case BUILT_IN_STRCPY_CHK:
5332 case BUILT_IN_STPCPY:
5333 case BUILT_IN_STPCPY_CHK:
5334 handle_builtin_strcpy (DECL_FUNCTION_CODE (callee));
5335 break;
5337 case BUILT_IN_STRNCAT:
5338 case BUILT_IN_STRNCAT_CHK:
5339 handle_builtin_strncat (DECL_FUNCTION_CODE (callee));
5340 break;
5342 case BUILT_IN_STPNCPY:
5343 case BUILT_IN_STPNCPY_CHK:
5344 case BUILT_IN_STRNCPY:
5345 case BUILT_IN_STRNCPY_CHK:
5346 handle_builtin_stxncpy_strncat (false);
5347 break;
5349 case BUILT_IN_MEMCPY:
5350 case BUILT_IN_MEMCPY_CHK:
5351 case BUILT_IN_MEMPCPY:
5352 case BUILT_IN_MEMPCPY_CHK:
5353 handle_builtin_memcpy (DECL_FUNCTION_CODE (callee));
5354 break;
5355 case BUILT_IN_STRCAT:
5356 case BUILT_IN_STRCAT_CHK:
5357 handle_builtin_strcat (DECL_FUNCTION_CODE (callee));
5358 break;
5359 case BUILT_IN_ALLOCA:
5360 case BUILT_IN_ALLOCA_WITH_ALIGN:
5361 case BUILT_IN_MALLOC:
5362 case BUILT_IN_CALLOC:
5363 handle_alloc_call (DECL_FUNCTION_CODE (callee));
5364 break;
5365 case BUILT_IN_MEMSET:
5366 if (handle_builtin_memset (zero_write))
5367 return false;
5368 break;
5369 case BUILT_IN_MEMCMP:
5370 if (handle_builtin_memcmp ())
5371 return false;
5372 break;
5373 case BUILT_IN_STRCMP:
5374 case BUILT_IN_STRNCMP:
5375 if (handle_builtin_string_cmp ())
5376 return false;
5377 break;
5378 default:
5379 if (handle_printf_call (&m_gsi, ptr_qry))
5380 return false;
5381 break;
5384 return true;
5387 /* Handle an assignment statement at *GSI to a LHS of integral type.
5388 If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true. */
5390 void
5391 strlen_pass::handle_integral_assign (bool *cleanup_eh)
5393 gimple *stmt = gsi_stmt (m_gsi);
5394 tree lhs = gimple_assign_lhs (stmt);
5395 tree lhs_type = TREE_TYPE (lhs);
5397 enum tree_code code = gimple_assign_rhs_code (stmt);
5398 if (code == COND_EXPR)
5400 tree cond = gimple_assign_rhs1 (stmt);
5401 enum tree_code cond_code = TREE_CODE (cond);
5403 if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5404 fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5405 TREE_OPERAND (cond, 1), stmt);
5407 else if (code == EQ_EXPR || code == NE_EXPR)
5408 fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5409 gimple_assign_rhs2 (stmt), stmt);
5410 else if (gimple_assign_load_p (stmt)
5411 && TREE_CODE (lhs_type) == INTEGER_TYPE
5412 && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5413 && (TYPE_PRECISION (lhs_type)
5414 == TYPE_PRECISION (char_type_node))
5415 && !gimple_has_volatile_ops (stmt))
5417 tree off = integer_zero_node;
5418 unsigned HOST_WIDE_INT coff = 0;
5419 int idx = 0;
5420 tree rhs1 = gimple_assign_rhs1 (stmt);
5421 if (code == MEM_REF)
5423 idx = get_stridx (TREE_OPERAND (rhs1, 0), stmt);
5424 if (idx > 0)
5426 strinfo *si = get_strinfo (idx);
5427 if (si
5428 && si->nonzero_chars
5429 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5430 && (wi::to_widest (si->nonzero_chars)
5431 >= wi::to_widest (off)))
5432 off = TREE_OPERAND (rhs1, 1);
5433 else
5434 /* This case is not useful. See if get_addr_stridx
5435 returns something usable. */
5436 idx = 0;
5439 if (idx <= 0)
5440 idx = get_addr_stridx (rhs1, stmt, NULL_TREE, &coff);
5441 if (idx > 0)
5443 strinfo *si = get_strinfo (idx);
5444 if (si
5445 && si->nonzero_chars
5446 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5448 widest_int w1 = wi::to_widest (si->nonzero_chars);
5449 widest_int w2 = wi::to_widest (off) + coff;
5450 if (w1 == w2
5451 && si->full_string_p)
5453 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5455 fprintf (dump_file, "Optimizing: ");
5456 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5459 /* Reading the final '\0' character. */
5460 tree zero = build_int_cst (lhs_type, 0);
5461 gimple_set_vuse (stmt, NULL_TREE);
5462 gimple_assign_set_rhs_from_tree (&m_gsi, zero);
5463 *cleanup_eh
5464 |= maybe_clean_or_replace_eh_stmt (stmt,
5465 gsi_stmt (m_gsi));
5466 stmt = gsi_stmt (m_gsi);
5467 update_stmt (stmt);
5469 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5471 fprintf (dump_file, "into: ");
5472 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5475 else if (w1 > w2)
5477 /* Reading a character before the final '\0'
5478 character. Just set the value range to ~[0, 0]
5479 if we don't have anything better. */
5480 value_range r;
5481 if (!get_range_query (cfun)->range_of_expr (r, lhs)
5482 || r.varying_p ())
5484 r.set_nonzero (lhs_type);
5485 set_range_info (lhs, r);
5491 else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5493 if (int idx = new_stridx (lhs))
5495 /* Record multi-byte assignments from MEM_REFs. */
5496 bool storing_all_nonzero_p;
5497 bool storing_all_zeros_p;
5498 bool full_string_p;
5499 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5500 tree rhs = gimple_assign_rhs1 (stmt);
5501 const bool ranges_valid
5502 = count_nonzero_bytes (rhs, stmt,
5503 lenrange, &full_string_p,
5504 &storing_all_zeros_p,
5505 &storing_all_nonzero_p);
5506 if (ranges_valid)
5508 tree length = build_int_cst (sizetype, lenrange[0]);
5509 strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5510 set_strinfo (idx, si);
5511 si->writable = true;
5512 si->dont_invalidate = true;
5517 if (strlen_to_stridx)
5519 tree rhs1 = gimple_assign_rhs1 (stmt);
5520 if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5521 strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5525 /* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if
5526 the assignent stores all zero bytes.. */
5528 bool
5529 strlen_pass::handle_assign (tree lhs, bool *zero_write)
5531 tree type = TREE_TYPE (lhs);
5532 if (TREE_CODE (type) == ARRAY_TYPE)
5533 type = TREE_TYPE (type);
5535 bool is_char_store = is_char_type (type);
5536 if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5538 /* To consider stores into char objects via integer types other
5539 than char but not those to non-character objects, determine
5540 the type of the destination rather than just the type of
5541 the access. */
5542 for (int i = 0; i != 2; ++i)
5544 tree ref = TREE_OPERAND (lhs, i);
5545 type = TREE_TYPE (ref);
5546 if (TREE_CODE (type) == POINTER_TYPE)
5547 type = TREE_TYPE (type);
5548 if (TREE_CODE (type) == ARRAY_TYPE)
5549 type = TREE_TYPE (type);
5550 if (is_char_type (type))
5552 is_char_store = true;
5553 break;
5558 /* Handle a single or multibyte assignment. */
5559 if (is_char_store && !handle_store (zero_write))
5560 return false;
5562 return true;
5566 /* Attempt to check for validity of the performed access a single statement
5567 at *GSI using string length knowledge, and to optimize it.
5568 If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5569 true. Return true to let the caller advance *GSI to the next statement
5570 in the basic block and false otherwise. */
5572 bool
5573 strlen_pass::check_and_optimize_stmt (bool *cleanup_eh)
5575 gimple *stmt = gsi_stmt (m_gsi);
5577 /* For statements that modify a string, set to true if the write
5578 is only zeros. */
5579 bool zero_write = false;
5581 if (is_gimple_call (stmt))
5583 if (!check_and_optimize_call (&zero_write))
5584 return false;
5586 else if (!flag_optimize_strlen || !strlen_optimize)
5587 return true;
5588 else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5590 /* Handle non-clobbering assignment. */
5591 tree lhs = gimple_assign_lhs (stmt);
5592 tree lhs_type = TREE_TYPE (lhs);
5594 if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5596 if (gimple_assign_single_p (stmt)
5597 || (gimple_assign_cast_p (stmt)
5598 && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5600 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
5601 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5603 else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5604 handle_pointer_plus ();
5606 else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5607 /* Handle assignment to a character. */
5608 handle_integral_assign (cleanup_eh);
5609 else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5610 if (!handle_assign (lhs, &zero_write))
5611 return false;
5613 else if (gcond *cond = dyn_cast<gcond *> (stmt))
5615 enum tree_code code = gimple_cond_code (cond);
5616 if (code == EQ_EXPR || code == NE_EXPR)
5617 fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5618 gimple_cond_rhs (stmt), stmt);
5621 if (gimple_vdef (stmt))
5622 maybe_invalidate (stmt, zero_write);
5623 return true;
5626 /* Recursively call maybe_invalidate on stmts that might be executed
5627 in between dombb and current bb and that contain a vdef. Stop when
5628 *count stmts are inspected, or if the whole strinfo vector has
5629 been invalidated. */
5631 static void
5632 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5634 unsigned int i, n = gimple_phi_num_args (phi);
5636 for (i = 0; i < n; i++)
5638 tree vuse = gimple_phi_arg_def (phi, i);
5639 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5640 basic_block bb = gimple_bb (stmt);
5641 if (bb == NULL
5642 || bb == dombb
5643 || !bitmap_set_bit (visited, bb->index)
5644 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5645 continue;
5646 while (1)
5648 if (gimple_code (stmt) == GIMPLE_PHI)
5650 do_invalidate (dombb, stmt, visited, count);
5651 if (*count == 0)
5652 return;
5653 break;
5655 if (--*count == 0)
5656 return;
5657 if (!maybe_invalidate (stmt))
5659 *count = 0;
5660 return;
5662 vuse = gimple_vuse (stmt);
5663 stmt = SSA_NAME_DEF_STMT (vuse);
5664 if (gimple_bb (stmt) != bb)
5666 bb = gimple_bb (stmt);
5667 if (bb == NULL
5668 || bb == dombb
5669 || !bitmap_set_bit (visited, bb->index)
5670 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5671 break;
5677 /* Release pointer_query cache. */
5679 strlen_pass::~strlen_pass ()
5681 ptr_qry.flush_cache ();
5684 /* Callback for walk_dominator_tree. Attempt to optimize various
5685 string ops by remembering string lengths pointed by pointer SSA_NAMEs. */
5687 edge
5688 strlen_pass::before_dom_children (basic_block bb)
5690 basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5692 if (dombb == NULL)
5693 stridx_to_strinfo = NULL;
5694 else
5696 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5697 if (stridx_to_strinfo)
5699 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5700 gsi_next (&gsi))
5702 gphi *phi = gsi.phi ();
5703 if (virtual_operand_p (gimple_phi_result (phi)))
5705 bitmap visited = BITMAP_ALLOC (NULL);
5706 int count_vdef = 100;
5707 do_invalidate (dombb, phi, visited, &count_vdef);
5708 BITMAP_FREE (visited);
5709 if (count_vdef == 0)
5711 /* If there were too many vdefs in between immediate
5712 dominator and current bb, invalidate everything.
5713 If stridx_to_strinfo has been unshared, we need
5714 to free it, otherwise just set it to NULL. */
5715 if (!strinfo_shared ())
5717 unsigned int i;
5718 strinfo *si;
5720 for (i = 1;
5721 vec_safe_iterate (stridx_to_strinfo, i, &si);
5722 ++i)
5724 free_strinfo (si);
5725 (*stridx_to_strinfo)[i] = NULL;
5728 else
5729 stridx_to_strinfo = NULL;
5731 break;
5737 /* If all PHI arguments have the same string index, the PHI result
5738 has it as well. */
5739 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5740 gsi_next (&gsi))
5742 gphi *phi = gsi.phi ();
5743 tree result = gimple_phi_result (phi);
5744 if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5746 int idx = get_stridx (gimple_phi_arg_def (phi, 0), phi);
5747 if (idx != 0)
5749 unsigned int i, n = gimple_phi_num_args (phi);
5750 for (i = 1; i < n; i++)
5751 if (idx != get_stridx (gimple_phi_arg_def (phi, i), phi))
5752 break;
5753 if (i == n)
5754 ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5759 bool cleanup_eh = false;
5761 /* Attempt to optimize individual statements. */
5762 for (m_gsi = gsi_start_bb (bb); !gsi_end_p (m_gsi); )
5764 /* Reset search depth preformance counter. */
5765 ptr_qry.depth = 0;
5767 if (check_and_optimize_stmt (&cleanup_eh))
5768 gsi_next (&m_gsi);
5771 if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5772 m_cleanup_cfg = true;
5774 bb->aux = stridx_to_strinfo;
5775 if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5776 (*stridx_to_strinfo)[0] = (strinfo *) bb;
5777 return NULL;
5780 /* Callback for walk_dominator_tree. Free strinfo vector if it is
5781 owned by the current bb, clear bb->aux. */
5783 void
5784 strlen_pass::after_dom_children (basic_block bb)
5786 if (bb->aux)
5788 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5789 if (vec_safe_length (stridx_to_strinfo)
5790 && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5792 unsigned int i;
5793 strinfo *si;
5795 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5796 free_strinfo (si);
5797 vec_free (stridx_to_strinfo);
5799 bb->aux = NULL;
5803 namespace {
5805 static unsigned int
5806 printf_strlen_execute (function *fun, bool warn_only)
5808 strlen_optimize = !warn_only;
5810 calculate_dominance_info (CDI_DOMINATORS);
5812 bool use_scev = optimize > 0 && flag_printf_return_value;
5813 if (use_scev)
5815 loop_optimizer_init (LOOPS_NORMAL);
5816 scev_initialize ();
5819 gcc_assert (!strlen_to_stridx);
5820 if (warn_stringop_overflow || warn_stringop_truncation)
5821 strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5823 /* This has to happen after initializing the loop optimizer
5824 and initializing SCEV as they create new SSA_NAMEs. */
5825 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
5826 max_stridx = 1;
5828 /* String length optimization is implemented as a walk of the dominator
5829 tree and a forward walk of statements within each block. */
5830 strlen_pass walker (CDI_DOMINATORS);
5831 walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5833 if (dump_file && (dump_flags & TDF_DETAILS))
5834 walker.ptr_qry.dump (dump_file);
5836 ssa_ver_to_stridx.release ();
5837 strinfo_pool.release ();
5838 if (decl_to_stridxlist_htab)
5840 obstack_free (&stridx_obstack, NULL);
5841 delete decl_to_stridxlist_htab;
5842 decl_to_stridxlist_htab = NULL;
5844 laststmt.stmt = NULL;
5845 laststmt.len = NULL_TREE;
5846 laststmt.stridx = 0;
5848 if (strlen_to_stridx)
5850 strlen_to_stridx->empty ();
5851 delete strlen_to_stridx;
5852 strlen_to_stridx = NULL;
5855 if (use_scev)
5857 scev_finalize ();
5858 loop_optimizer_finalize ();
5861 return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5864 /* This file defines two passes: one for warnings that runs only when
5865 optimization is disabled, and another that implements optimizations
5866 and also issues warnings. */
5868 const pass_data pass_data_warn_printf =
5870 GIMPLE_PASS, /* type */
5871 "warn-printf", /* name */
5872 OPTGROUP_NONE, /* optinfo_flags */
5873 TV_NONE, /* tv_id */
5874 /* Normally an optimization pass would require PROP_ssa but because
5875 this pass runs early, with no optimization, to do sprintf format
5876 checking, it only requires PROP_cfg. */
5877 PROP_cfg, /* properties_required */
5878 0, /* properties_provided */
5879 0, /* properties_destroyed */
5880 0, /* todo_flags_start */
5881 0, /* todo_flags_finish */
5884 class pass_warn_printf : public gimple_opt_pass
5886 public:
5887 pass_warn_printf (gcc::context *ctxt)
5888 : gimple_opt_pass (pass_data_warn_printf, ctxt)
5891 virtual bool gate (function *);
5892 virtual unsigned int execute (function *fun)
5894 return printf_strlen_execute (fun, true);
5899 /* Return true to run the warning pass only when not optimizing and
5900 iff either -Wformat-overflow or -Wformat-truncation is specified. */
5902 bool
5903 pass_warn_printf::gate (function *)
5905 return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5908 const pass_data pass_data_strlen =
5910 GIMPLE_PASS, /* type */
5911 "strlen", /* name */
5912 OPTGROUP_NONE, /* optinfo_flags */
5913 TV_TREE_STRLEN, /* tv_id */
5914 PROP_cfg | PROP_ssa, /* properties_required */
5915 0, /* properties_provided */
5916 0, /* properties_destroyed */
5917 0, /* todo_flags_start */
5918 0, /* todo_flags_finish */
5921 class pass_strlen : public gimple_opt_pass
5923 public:
5924 pass_strlen (gcc::context *ctxt)
5925 : gimple_opt_pass (pass_data_strlen, ctxt)
5928 opt_pass * clone () { return new pass_strlen (m_ctxt); }
5930 virtual bool gate (function *);
5931 virtual unsigned int execute (function *fun)
5933 return printf_strlen_execute (fun, false);
5937 /* Return true to run the pass only when the sprintf and/or strlen
5938 optimizations are enabled and -Wformat-overflow or -Wformat-truncation
5939 are specified. */
5941 bool
5942 pass_strlen::gate (function *)
5944 return ((warn_format_overflow > 0
5945 || warn_format_trunc > 0
5946 || warn_restrict > 0
5947 || flag_optimize_strlen > 0
5948 || flag_printf_return_value)
5949 && optimize > 0);
5952 } // anon namespace
5954 gimple_opt_pass *
5955 make_pass_warn_printf (gcc::context *ctxt)
5957 return new pass_warn_printf (ctxt);
5960 gimple_opt_pass *
5961 make_pass_strlen (gcc::context *ctxt)
5963 return new pass_strlen (ctxt);