Objective-C, NeXT, v2: Correct a regression in code-gen.
[official-gcc.git] / gcc / tree-ssa-strlen.cc
blob61c3da22322cfed7bd1d8c1884f63c77af2620e1
1 /* String length optimization
2 Copyright (C) 2011-2024 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-access.h"
34 #include "gimple-ssa-warn-restrict.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "gimple-fold.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimplify-me.h"
42 #include "expr.h"
43 #include "tree-cfg.h"
44 #include "tree-dfa.h"
45 #include "domwalk.h"
46 #include "tree-ssa-alias.h"
47 #include "tree-ssa-propagate.h"
48 #include "tree-ssa-strlen.h"
49 #include "tree-hash-traits.h"
50 #include "builtins.h"
51 #include "pointer-query.h"
52 #include "target.h"
53 #include "diagnostic-core.h"
54 #include "diagnostic.h"
55 #include "intl.h"
56 #include "attribs.h"
57 #include "calls.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "vr-values.h"
62 #include "gimple-range.h"
63 #include "tree-ssa.h"
65 /* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
66 is an index into strinfo vector, negative value stands for
67 string length of a string literal (~strlen). */
68 static vec<int> ssa_ver_to_stridx;
70 /* Number of currently active string indexes plus one. */
71 static int max_stridx;
73 /* Set to true to optimize, false when just checking. */
74 static bool strlen_optimize;
76 /* String information record. */
77 struct strinfo
79 /* Number of leading characters that are known to be nonzero. This is
80 also the length of the string if FULL_STRING_P.
82 The values in a list of related string pointers must be consistent;
83 that is, if strinfo B comes X bytes after strinfo A, it must be
84 the case that A->nonzero_chars == X + B->nonzero_chars. */
85 tree nonzero_chars;
86 /* Any of the corresponding pointers for querying alias oracle. */
87 tree ptr;
88 /* STMT is used for two things:
90 - To record the statement that should be used for delayed length
91 computations. We maintain the invariant that all related strinfos
92 have delayed lengths or none do.
94 - To record the malloc or calloc call that produced this result
95 to optimize away malloc/memset sequences. STMT is reset after
96 a calloc-allocated object has been stored a non-zero value into. */
97 gimple *stmt;
98 /* Set to the dynamic allocation statement for the object (alloca,
99 calloc, malloc, or VLA). Unlike STMT, once set for a strinfo
100 object, ALLOC doesn't change. */
101 gimple *alloc;
102 /* Pointer to '\0' if known, if NULL, it can be computed as
103 ptr + length. */
104 tree endptr;
105 /* Reference count. Any changes to strinfo entry possibly shared
106 with dominating basic blocks need unshare_strinfo first, except
107 for dont_invalidate which affects only the immediately next
108 maybe_invalidate. */
109 int refcount;
110 /* Copy of index. get_strinfo (si->idx) should return si; */
111 int idx;
112 /* These 3 fields are for chaining related string pointers together.
113 E.g. for
114 bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
115 strcpy (c, d); e = c + dl;
116 strinfo(a) -> strinfo(c) -> strinfo(e)
117 All have ->first field equal to strinfo(a)->idx and are doubly
118 chained through prev/next fields. The later strinfos are required
119 to point into the same string with zero or more bytes after
120 the previous pointer and all bytes in between the two pointers
121 must be non-zero. Functions like strcpy or memcpy are supposed
122 to adjust all previous strinfo lengths, but not following strinfo
123 lengths (those are uncertain, usually invalidated during
124 maybe_invalidate, except when the alias oracle knows better).
125 Functions like strcat on the other side adjust the whole
126 related strinfo chain.
127 They are updated lazily, so to use the chain the same first fields
128 and si->prev->next == si->idx needs to be verified. */
129 int first;
130 int next;
131 int prev;
132 /* A flag whether the string is known to be written in the current
133 function. */
134 bool writable;
135 /* A flag for the next maybe_invalidate that this strinfo shouldn't
136 be invalidated. Always cleared by maybe_invalidate. */
137 bool dont_invalidate;
138 /* True if the string is known to be nul-terminated after NONZERO_CHARS
139 characters. False is useful when detecting strings that are built
140 up via successive memcpys. */
141 bool full_string_p;
144 /* Pool for allocating strinfo_struct entries. */
145 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
147 /* Vector mapping positive string indexes to strinfo, for the
148 current basic block. The first pointer in the vector is special,
149 it is either NULL, meaning the vector isn't shared, or it is
150 a basic block pointer to the owner basic_block if shared.
151 If some other bb wants to modify the vector, the vector needs
152 to be unshared first, and only the owner bb is supposed to free it. */
153 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
155 /* One OFFSET->IDX mapping. */
156 struct stridxlist
158 struct stridxlist *next;
159 HOST_WIDE_INT offset;
160 int idx;
163 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings. */
164 struct decl_stridxlist_map
166 struct tree_map_base base;
167 struct stridxlist list;
170 /* Hash table for mapping decls to a chained list of offset -> idx
171 mappings. */
172 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
173 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
175 /* Hash table mapping strlen (or strnlen with constant bound and return
176 smaller than bound) calls to stridx instances describing
177 the calls' arguments. Non-null only when warn_stringop_truncation
178 is non-zero. */
179 typedef std::pair<int, location_t> stridx_strlenloc;
180 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
182 /* Obstack for struct stridxlist and struct decl_stridxlist_map. */
183 static struct obstack stridx_obstack;
185 /* Last memcpy statement if it could be adjusted if the trailing
186 '\0' written is immediately overwritten, or
187 *x = '\0' store that could be removed if it is immediately overwritten. */
188 struct laststmt_struct
190 gimple *stmt;
191 tree len;
192 int stridx;
193 } laststmt;
195 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
196 static bool get_range_strlen_dynamic (tree, gimple *, c_strlen_data *,
197 bitmap, pointer_query *, unsigned *);
199 /* Sets MINMAX to either the constant value or the range VAL is in
200 and returns either the constant value or VAL on success or null
201 when the range couldn't be determined. Uses RVALS or CFUN for
202 range info, whichever is nonnull. */
204 tree
205 get_range (tree val, gimple *stmt, wide_int minmax[2],
206 range_query *rvals /* = NULL */)
208 if (!rvals)
210 if (!cfun)
211 /* When called from front ends for global initializers CFUN
212 may be null. */
213 return NULL_TREE;
215 rvals = get_range_query (cfun);
218 Value_Range vr (TREE_TYPE (val));
219 if (!rvals->range_of_expr (vr, val, stmt))
220 return NULL_TREE;
222 tree vrmin, vrmax;
223 value_range_kind rng = get_legacy_range (vr, vrmin, vrmax);
224 if (rng == VR_RANGE)
226 /* Only handle straight ranges. */
227 minmax[0] = wi::to_wide (vrmin);
228 minmax[1] = wi::to_wide (vrmax);
229 return val;
232 return NULL_TREE;
235 class strlen_pass : public dom_walker
237 public:
238 strlen_pass (cdi_direction direction)
239 : dom_walker (direction),
240 ptr_qry (&m_ranger),
241 m_cleanup_cfg (false)
245 ~strlen_pass ();
247 edge before_dom_children (basic_block) final override;
248 void after_dom_children (basic_block) final override;
250 bool check_and_optimize_stmt (bool *cleanup_eh);
251 bool check_and_optimize_call (bool *zero_write);
252 bool handle_assign (tree lhs, bool *zero_write);
253 bool handle_store (bool *zero_write);
254 void handle_pointer_plus ();
255 void handle_builtin_strlen ();
256 void handle_builtin_strchr ();
257 void handle_builtin_strcpy (built_in_function);
258 void handle_integral_assign (bool *cleanup_eh);
259 void handle_builtin_stxncpy_strncat (bool append_p);
260 void handle_builtin_memcpy (built_in_function bcode);
261 void handle_builtin_strcat (built_in_function bcode);
262 void handle_builtin_strncat (built_in_function);
263 bool handle_builtin_memset (bool *zero_write);
264 bool handle_builtin_memcmp ();
265 bool handle_builtin_string_cmp ();
266 void handle_alloc_call (built_in_function);
267 void maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
268 strinfo *si = NULL, bool plus_one = false,
269 bool rawmem = false);
270 void maybe_warn_overflow (gimple *stmt, bool call_lhs,
271 unsigned HOST_WIDE_INT len,
272 strinfo *si = NULL,
273 bool plus_one = false, bool rawmem = false);
274 void adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat);
275 tree strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
276 tree arg2, int idx2,
277 unsigned HOST_WIDE_INT bound,
278 unsigned HOST_WIDE_INT len[2],
279 unsigned HOST_WIDE_INT *psize);
280 bool count_nonzero_bytes (tree expr_or_type,
281 gimple *stmt,
282 unsigned lenrange[3], bool *nulterm,
283 bool *allnul, bool *allnonnul);
284 bool count_nonzero_bytes (tree exp, tree vuse,
285 gimple *stmt,
286 unsigned HOST_WIDE_INT offset,
287 unsigned HOST_WIDE_INT nbytes,
288 unsigned lenrange[3], bool *nulterm,
289 bool *allnul, bool *allnonnul,
290 ssa_name_limit_t &snlim);
291 bool count_nonzero_bytes_addr (tree exp, tree vuse,
292 gimple *stmt,
293 unsigned HOST_WIDE_INT offset,
294 unsigned HOST_WIDE_INT nbytes,
295 unsigned lenrange[3], bool *nulterm,
296 bool *allnul, bool *allnonnul,
297 ssa_name_limit_t &snlim);
298 bool get_len_or_size (gimple *stmt, tree arg, int idx,
299 unsigned HOST_WIDE_INT lenrng[2],
300 unsigned HOST_WIDE_INT *size, bool *nulterm);
302 gimple_ranger m_ranger;
304 /* A pointer_query object to store information about pointers and
305 their targets in. */
306 pointer_query ptr_qry;
308 gimple_stmt_iterator m_gsi;
310 /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
311 execute function. */
312 bool m_cleanup_cfg;
315 /* Return:
317 * +1 if SI is known to start with more than OFF nonzero characters.
319 * 0 if SI is known to start with exactly OFF nonzero characters.
321 * -1 if SI either does not start with OFF nonzero characters
322 or the relationship between the number of leading nonzero
323 characters in SI and OFF is unknown. */
325 static int
326 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
328 if (si->nonzero_chars
329 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
330 return compare_tree_int (si->nonzero_chars, off);
331 else
332 return -1;
335 /* Same as above but suitable also for strings with non-constant lengths.
336 Uses RVALS to determine length range. */
338 static int
339 compare_nonzero_chars (strinfo *si, gimple *stmt,
340 unsigned HOST_WIDE_INT off,
341 range_query *rvals)
343 if (!si->nonzero_chars)
344 return -1;
346 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
347 return compare_tree_int (si->nonzero_chars, off);
349 if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
350 return -1;
352 value_range vr;
353 if (!rvals->range_of_expr (vr, si->nonzero_chars, stmt)
354 || vr.varying_p ()
355 || vr.undefined_p ())
356 return -1;
358 /* If the offset is less than the minimum length or if the bounds
359 of the length range are equal return the result of the comparison
360 same as in the constant case. Otherwise return a conservative
361 result. */
362 signop sign = TYPE_SIGN (vr.type ());
363 unsigned prec = TYPE_PRECISION (vr.type ());
364 int cmpmin = wi::cmp (vr.lower_bound (), wi::uhwi (off, prec), sign);
365 if (cmpmin > 0 || vr.singleton_p ())
366 return cmpmin;
368 return -1;
371 /* Return true if SI is known to be a zero-length string. */
373 static inline bool
374 zero_length_string_p (strinfo *si)
376 return si->full_string_p && integer_zerop (si->nonzero_chars);
379 /* Return strinfo vector entry IDX. */
381 static inline strinfo *
382 get_strinfo (int idx)
384 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
385 return NULL;
386 return (*stridx_to_strinfo)[idx];
389 /* Get the next strinfo in the chain after SI, or null if none. */
391 static inline strinfo *
392 get_next_strinfo (strinfo *si)
394 if (si->next == 0)
395 return NULL;
396 strinfo *nextsi = get_strinfo (si->next);
397 if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
398 return NULL;
399 return nextsi;
402 /* Helper function for get_stridx. Return the strinfo index of the address
403 of EXP, which is available in PTR if nonnull. If OFFSET_OUT, it is
404 OK to return the index for some X <= &EXP and store &EXP - X in
405 *OFFSET_OUT. When RVALS is nonnull uses it to determine range
406 information. */
408 static int
409 get_addr_stridx (tree exp, gimple *stmt,
410 tree ptr, unsigned HOST_WIDE_INT *offset_out,
411 range_query *rvals = NULL)
413 HOST_WIDE_INT off;
414 struct stridxlist *list, *last = NULL;
415 tree base;
417 if (!decl_to_stridxlist_htab)
418 return 0;
420 poly_int64 poff;
421 base = get_addr_base_and_unit_offset (exp, &poff);
422 if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
423 return 0;
425 list = decl_to_stridxlist_htab->get (base);
426 if (list == NULL)
427 return 0;
431 if (list->offset == off)
433 if (offset_out)
434 *offset_out = 0;
435 return list->idx;
437 if (list->offset > off)
438 return 0;
439 last = list;
440 list = list->next;
442 while (list);
444 if ((offset_out || ptr) && last && last->idx > 0)
446 unsigned HOST_WIDE_INT rel_off
447 = (unsigned HOST_WIDE_INT) off - last->offset;
448 strinfo *si = get_strinfo (last->idx);
449 if (si && compare_nonzero_chars (si, stmt, rel_off, rvals) >= 0)
451 if (offset_out)
453 *offset_out = rel_off;
454 return last->idx;
456 else
457 return get_stridx_plus_constant (si, rel_off, ptr);
460 return 0;
463 /* Returns string index for EXP. When EXP is an SSA_NAME that refers
464 to a known strinfo with an offset and OFFRNG is non-null, sets
465 both elements of the OFFRNG array to the range of the offset and
466 returns the index of the known strinfo. In this case the result
467 must not be used in for functions that modify the string.
468 When nonnull, uses RVALS to determine range information. */
470 static int
471 get_stridx (tree exp, gimple *stmt,
472 wide_int offrng[2] = NULL, range_query *rvals = NULL)
474 if (offrng)
475 offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
477 if (TREE_CODE (exp) == SSA_NAME)
479 if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
480 return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
482 tree e = exp;
483 int last_idx = 0;
484 HOST_WIDE_INT offset = 0;
485 /* Follow a chain of at most 5 assignments. */
486 for (int i = 0; i < 5; i++)
488 gimple *def_stmt = SSA_NAME_DEF_STMT (e);
489 if (!is_gimple_assign (def_stmt))
490 return last_idx;
492 tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
493 tree ptr, off;
495 if (rhs_code == ADDR_EXPR)
497 /* Handle indices/offsets into VLAs which are implemented
498 as pointers to arrays. */
499 ptr = gimple_assign_rhs1 (def_stmt);
500 ptr = TREE_OPERAND (ptr, 0);
502 /* Handle also VLAs of types larger than char. */
503 if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
505 if (TREE_CODE (ptr) == ARRAY_REF)
507 off = TREE_OPERAND (ptr, 1);
508 ptr = TREE_OPERAND (ptr, 0);
509 if (!integer_onep (eltsize))
511 /* Scale the array index by the size of the element
512 type in the rare case that it's greater than
513 the typical 1 for char, making sure both operands
514 have the same type. */
515 eltsize = fold_convert (ssizetype, eltsize);
516 off = fold_convert (ssizetype, off);
517 off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
520 else
521 off = integer_zero_node;
523 else
524 return 0;
526 if (TREE_CODE (ptr) != MEM_REF)
527 return 0;
529 /* Add the MEM_REF byte offset. */
530 tree mem_off = TREE_OPERAND (ptr, 1);
531 off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
532 ptr = TREE_OPERAND (ptr, 0);
534 else if (rhs_code == POINTER_PLUS_EXPR)
536 ptr = gimple_assign_rhs1 (def_stmt);
537 off = gimple_assign_rhs2 (def_stmt);
539 else
540 return 0;
542 if (TREE_CODE (ptr) != SSA_NAME)
543 return 0;
545 if (!tree_fits_shwi_p (off))
547 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
548 if (offrng)
550 /* Only when requested by setting OFFRNG to non-null,
551 return the index corresponding to the SSA_NAME.
552 Do this irrespective of the whether the offset
553 is known. */
554 if (get_range (off, def_stmt, offrng, rvals))
556 /* When the offset range is known, increment it
557 it by the constant offset computed in prior
558 iterations and store it in the OFFRNG array. */
559 offrng[0] += offset;
560 offrng[1] += offset;
562 else
564 /* When the offset range cannot be determined
565 store [0, SIZE_MAX] and let the caller decide
566 if the offset matters. */
567 offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
568 offrng[0] = wi::zero (offrng[1].get_precision ());
570 return idx;
572 return 0;
575 HOST_WIDE_INT this_off = tree_to_shwi (off);
576 if (offrng)
578 offrng[0] += wi::shwi (this_off, offrng->get_precision ());
579 offrng[1] += offrng[0];
582 if (this_off < 0)
583 return last_idx;
585 offset = (unsigned HOST_WIDE_INT) offset + this_off;
586 if (offset < 0)
587 return last_idx;
589 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
591 strinfo *si = get_strinfo (idx);
592 if (si)
594 if (compare_nonzero_chars (si, offset) >= 0)
595 return get_stridx_plus_constant (si, offset, exp);
597 if (offrng)
598 last_idx = idx;
601 e = ptr;
604 return last_idx;
607 if (TREE_CODE (exp) == ADDR_EXPR)
609 int idx = get_addr_stridx (TREE_OPERAND (exp, 0), stmt, exp, NULL);
610 if (idx != 0)
611 return idx;
614 const char *p = c_getstr (exp);
615 if (p)
616 return ~(int) strlen (p);
618 return 0;
621 /* Return true if strinfo vector is shared with the immediate dominator. */
623 static inline bool
624 strinfo_shared (void)
626 return vec_safe_length (stridx_to_strinfo)
627 && (*stridx_to_strinfo)[0] != NULL;
630 /* Unshare strinfo vector that is shared with the immediate dominator. */
632 static void
633 unshare_strinfo_vec (void)
635 strinfo *si;
636 unsigned int i = 0;
638 gcc_assert (strinfo_shared ());
639 stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
640 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
641 if (si != NULL)
642 si->refcount++;
643 (*stridx_to_strinfo)[0] = NULL;
646 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
647 Return a pointer to the location where the string index can
648 be stored (if 0) or is stored, or NULL if this can't be tracked. */
650 static int *
651 addr_stridxptr (tree exp)
653 HOST_WIDE_INT off;
655 poly_int64 poff;
656 tree base = get_addr_base_and_unit_offset (exp, &poff);
657 if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
658 return NULL;
660 if (!decl_to_stridxlist_htab)
662 decl_to_stridxlist_htab
663 = new hash_map<tree_decl_hash, stridxlist> (64);
664 gcc_obstack_init (&stridx_obstack);
667 bool existed;
668 stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
669 if (existed)
671 int i;
672 stridxlist *before = NULL;
673 for (i = 0; i < 32; i++)
675 if (list->offset == off)
676 return &list->idx;
677 if (list->offset > off && before == NULL)
678 before = list;
679 if (list->next == NULL)
680 break;
681 list = list->next;
683 if (i == 32)
684 return NULL;
685 if (before)
687 list = before;
688 before = XOBNEW (&stridx_obstack, struct stridxlist);
689 *before = *list;
690 list->next = before;
691 list->offset = off;
692 list->idx = 0;
693 return &list->idx;
695 list->next = XOBNEW (&stridx_obstack, struct stridxlist);
696 list = list->next;
699 list->next = NULL;
700 list->offset = off;
701 list->idx = 0;
702 return &list->idx;
705 /* Create a new string index, or return 0 if reached limit. */
707 static int
708 new_stridx (tree exp)
710 int idx;
711 if (max_stridx >= param_max_tracked_strlens)
712 return 0;
713 if (TREE_CODE (exp) == SSA_NAME)
715 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
716 return 0;
717 idx = max_stridx++;
718 ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
719 return idx;
721 if (TREE_CODE (exp) == ADDR_EXPR)
723 int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
724 if (pidx != NULL)
726 gcc_assert (*pidx == 0);
727 *pidx = max_stridx++;
728 return *pidx;
731 return 0;
734 /* Like new_stridx, but for ADDR_EXPR's operand instead. */
736 static int
737 new_addr_stridx (tree exp)
739 int *pidx;
740 if (max_stridx >= param_max_tracked_strlens)
741 return 0;
742 pidx = addr_stridxptr (exp);
743 if (pidx != NULL)
745 gcc_assert (*pidx == 0);
746 *pidx = max_stridx++;
747 return *pidx;
749 return 0;
752 /* Create a new strinfo. */
754 static strinfo *
755 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
757 strinfo *si = strinfo_pool.allocate ();
758 si->nonzero_chars = nonzero_chars;
759 STRIP_USELESS_TYPE_CONVERSION (ptr);
760 si->ptr = ptr;
761 si->stmt = NULL;
762 si->alloc = NULL;
763 si->endptr = NULL_TREE;
764 si->refcount = 1;
765 si->idx = idx;
766 si->first = 0;
767 si->prev = 0;
768 si->next = 0;
769 si->writable = false;
770 si->dont_invalidate = false;
771 si->full_string_p = full_string_p;
772 return si;
775 /* Decrease strinfo refcount and free it if not referenced anymore. */
777 static inline void
778 free_strinfo (strinfo *si)
780 if (si && --si->refcount == 0)
781 strinfo_pool.remove (si);
784 /* Set strinfo in the vector entry IDX to SI. */
786 static inline void
787 set_strinfo (int idx, strinfo *si)
789 if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
790 unshare_strinfo_vec ();
791 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
792 vec_safe_grow_cleared (stridx_to_strinfo, idx + 1, true);
793 (*stridx_to_strinfo)[idx] = si;
796 /* Return the first strinfo in the related strinfo chain
797 if all strinfos in between belong to the chain, otherwise NULL. */
799 static strinfo *
800 verify_related_strinfos (strinfo *origsi)
802 strinfo *si = origsi, *psi;
804 if (origsi->first == 0)
805 return NULL;
806 for (; si->prev; si = psi)
808 if (si->first != origsi->first)
809 return NULL;
810 psi = get_strinfo (si->prev);
811 if (psi == NULL)
812 return NULL;
813 if (psi->next != si->idx)
814 return NULL;
816 if (si->idx != si->first)
817 return NULL;
818 return si;
821 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
822 Use LOC for folding. */
824 static void
825 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
827 si->endptr = endptr;
828 si->stmt = NULL;
829 tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
830 tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
831 si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
832 end_as_size, start_as_size);
833 si->full_string_p = true;
836 /* Return the string length, or NULL if it can't be computed.
837 The length may but need not be constant. Instead, it might be
838 the result of a strlen() call. */
840 static tree
841 get_string_length (strinfo *si)
843 /* If the length has already been computed return it if it's exact
844 (i.e., the string is nul-terminated at NONZERO_CHARS), or return
845 null if it isn't. */
846 if (si->nonzero_chars)
847 return si->full_string_p ? si->nonzero_chars : NULL;
849 /* If the string is the result of one of the built-in calls below
850 attempt to compute the length from the call statement. */
851 if (si->stmt)
853 gimple *stmt = si->stmt, *lenstmt;
854 tree callee, lhs, fn, tem;
855 location_t loc;
856 gimple_stmt_iterator gsi;
858 gcc_assert (is_gimple_call (stmt));
859 callee = gimple_call_fndecl (stmt);
860 gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
861 lhs = gimple_call_lhs (stmt);
862 /* unshare_strinfo is intentionally not called here. The (delayed)
863 transformation of strcpy or strcat into stpcpy is done at the place
864 of the former strcpy/strcat call and so can affect all the strinfos
865 with the same stmt. If they were unshared before and transformation
866 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
867 just compute the right length. */
868 switch (DECL_FUNCTION_CODE (callee))
870 case BUILT_IN_STRCAT:
871 case BUILT_IN_STRCAT_CHK:
872 gsi = gsi_for_stmt (stmt);
873 fn = builtin_decl_implicit (BUILT_IN_STRLEN);
874 gcc_assert (lhs == NULL_TREE);
875 tem = unshare_expr (gimple_call_arg (stmt, 0));
876 lenstmt = gimple_build_call (fn, 1, tem);
877 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
878 gimple_call_set_lhs (lenstmt, lhs);
879 gimple_set_vuse (lenstmt, gimple_vuse (stmt));
880 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
881 tem = gimple_call_arg (stmt, 0);
882 if (!ptrofftype_p (TREE_TYPE (lhs)))
884 lhs = convert_to_ptrofftype (lhs);
885 lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
886 true, GSI_SAME_STMT);
888 lenstmt = gimple_build_assign
889 (make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
890 POINTER_PLUS_EXPR,tem, lhs);
891 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
892 gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
893 lhs = NULL_TREE;
894 /* FALLTHRU */
895 case BUILT_IN_STRCPY:
896 case BUILT_IN_STRCPY_CHK:
897 gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
898 if (gimple_call_num_args (stmt) == 2)
899 fn = builtin_decl_implicit (BUILT_IN_STPCPY);
900 else
901 fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
902 gcc_assert (lhs == NULL_TREE);
903 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
905 fprintf (dump_file, "Optimizing: ");
906 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
908 gimple_call_set_fndecl (stmt, fn);
909 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
910 gimple_call_set_lhs (stmt, lhs);
911 update_stmt (stmt);
912 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
914 fprintf (dump_file, "into: ");
915 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
917 /* FALLTHRU */
918 case BUILT_IN_STPCPY:
919 case BUILT_IN_STPCPY_CHK:
920 gcc_assert (lhs != NULL_TREE);
921 loc = gimple_location (stmt);
922 set_endptr_and_length (loc, si, lhs);
923 for (strinfo *chainsi = verify_related_strinfos (si);
924 chainsi != NULL;
925 chainsi = get_next_strinfo (chainsi))
926 if (chainsi->nonzero_chars == NULL)
927 set_endptr_and_length (loc, chainsi, lhs);
928 break;
929 case BUILT_IN_ALLOCA:
930 case BUILT_IN_ALLOCA_WITH_ALIGN:
931 case BUILT_IN_MALLOC:
932 break;
933 /* BUILT_IN_CALLOC always has si->nonzero_chars set. */
934 default:
935 gcc_unreachable ();
936 break;
940 return si->nonzero_chars;
943 /* Dump strlen data to FP for statement STMT. When non-null, RVALS
944 points to the valuation engine used to calculate ranges, and is
945 used to dump strlen range for non-constant results. */
947 DEBUG_FUNCTION void
948 dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
950 if (stmt)
952 fprintf (fp, "\nDumping strlen pass data after ");
953 print_gimple_expr (fp, stmt, TDF_LINENO);
954 fputc ('\n', fp);
956 else
957 fprintf (fp, "\nDumping strlen pass data\n");
959 fprintf (fp, "max_stridx = %i\n", max_stridx);
960 fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
961 ssa_ver_to_stridx.length ());
962 fprintf (fp, "stridx_to_strinfo");
963 if (stridx_to_strinfo)
965 fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
966 for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
968 if (strinfo *si = (*stridx_to_strinfo)[i])
970 if (!si->idx)
971 continue;
972 fprintf (fp, " idx = %i", si->idx);
973 if (si->ptr)
975 fprintf (fp, ", ptr = ");
976 print_generic_expr (fp, si->ptr);
979 if (si->nonzero_chars)
981 fprintf (fp, ", nonzero_chars = ");
982 print_generic_expr (fp, si->nonzero_chars);
983 if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
985 value_range vr;
986 if (rvals)
987 rvals->range_of_expr (vr, si->nonzero_chars,
988 si->stmt);
989 else
990 get_range_query (cfun)->range_of_expr (vr,
991 si->nonzero_chars);
992 vr.dump (fp);
996 fprintf (fp, ", refcount = %i", si->refcount);
997 if (si->stmt)
999 fprintf (fp, ", stmt = ");
1000 print_gimple_expr (fp, si->stmt, 0);
1002 if (si->alloc)
1004 fprintf (fp, ", alloc = ");
1005 print_gimple_expr (fp, si->alloc, 0);
1007 if (si->writable)
1008 fprintf (fp, ", writable");
1009 if (si->dont_invalidate)
1010 fprintf (fp, ", dont_invalidate");
1011 if (si->full_string_p)
1012 fprintf (fp, ", full_string_p");
1013 if (strinfo *next = get_next_strinfo (si))
1015 fprintf (fp, ", {");
1017 fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
1018 while ((next = get_next_strinfo (next)));
1019 fprintf (fp, "}");
1021 fputs ("\n", fp);
1025 else
1026 fprintf (fp, " = null\n");
1028 fprintf (fp, "decl_to_stridxlist_htab");
1029 if (decl_to_stridxlist_htab)
1031 fputs ("\n", fp);
1032 typedef decl_to_stridxlist_htab_t::iterator iter_t;
1033 for (iter_t it = decl_to_stridxlist_htab->begin ();
1034 it != decl_to_stridxlist_htab->end (); ++it)
1036 tree decl = (*it).first;
1037 stridxlist *list = &(*it).second;
1038 fprintf (fp, " decl = ");
1039 print_generic_expr (fp, decl);
1040 if (list)
1042 fprintf (fp, ", offsets = {");
1043 for (; list; list = list->next)
1044 fprintf (fp, "%lli%s", (long long) list->offset,
1045 list->next ? ", " : "");
1046 fputs ("}", fp);
1048 fputs ("\n", fp);
1051 else
1052 fprintf (fp, " = null\n");
1054 if (laststmt.stmt)
1056 fprintf (fp, "laststmt = ");
1057 print_gimple_expr (fp, laststmt.stmt, 0);
1058 fprintf (fp, ", len = ");
1059 print_generic_expr (fp, laststmt.len);
1060 fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1064 /* Helper of get_range_strlen_dynamic(). See below. */
1066 static bool
1067 get_range_strlen_phi (tree src, gphi *phi,
1068 c_strlen_data *pdata, bitmap visited,
1069 pointer_query *ptr_qry, unsigned *pssa_def_max)
1071 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (src)))
1072 return true;
1074 if (*pssa_def_max == 0)
1075 return false;
1077 --*pssa_def_max;
1079 /* Iterate over the PHI arguments and determine the minimum and maximum
1080 length/size of each and incorporate them into the overall result. */
1081 for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1083 tree arg = gimple_phi_arg_def (phi, i);
1084 if (arg == gimple_phi_result (phi))
1085 continue;
1087 c_strlen_data argdata = { };
1088 if (!get_range_strlen_dynamic (arg, phi, &argdata, visited, ptr_qry,
1089 pssa_def_max))
1091 pdata->maxlen = build_all_ones_cst (size_type_node);
1092 continue;
1095 /* Set the DECL of an unterminated array this argument refers to
1096 if one hasn't been found yet. */
1097 if (!pdata->decl && argdata.decl)
1098 pdata->decl = argdata.decl;
1100 if (!argdata.minlen
1101 || (integer_zerop (argdata.minlen)
1102 && (!argdata.maxbound
1103 || integer_all_onesp (argdata.maxbound))
1104 && integer_all_onesp (argdata.maxlen)))
1106 /* Set the upper bound of the length to unbounded. */
1107 pdata->maxlen = build_all_ones_cst (size_type_node);
1108 continue;
1111 /* Adjust the minimum and maximum length determined so far and
1112 the upper bound on the array size. */
1113 if (TREE_CODE (argdata.minlen) == INTEGER_CST
1114 && (!pdata->minlen
1115 || tree_int_cst_lt (argdata.minlen, pdata->minlen)))
1116 pdata->minlen = argdata.minlen;
1118 if (TREE_CODE (argdata.maxlen) == INTEGER_CST
1119 && (!pdata->maxlen
1120 || (argdata.maxlen
1121 && tree_int_cst_lt (pdata->maxlen, argdata.maxlen))))
1122 pdata->maxlen = argdata.maxlen;
1124 if (!pdata->maxbound
1125 || TREE_CODE (pdata->maxbound) != INTEGER_CST
1126 || (argdata.maxbound
1127 && tree_int_cst_lt (pdata->maxbound, argdata.maxbound)
1128 && !integer_all_onesp (argdata.maxbound)))
1129 pdata->maxbound = argdata.maxbound;
1132 return true;
1135 /* Return the maximum possible length of the string PTR that's less
1136 than MAXLEN given the size of the object of subobject it points
1137 to at the given STMT. MAXLEN is the maximum length of the string
1138 determined so far. Return null when no such maximum can be
1139 determined. */
1141 static tree
1142 get_maxbound (tree ptr, gimple *stmt, offset_int maxlen,
1143 pointer_query *ptr_qry)
1145 access_ref aref;
1146 if (!ptr_qry->get_ref (ptr, stmt, &aref))
1147 return NULL_TREE;
1149 offset_int sizrem = aref.size_remaining ();
1150 if (sizrem <= 0)
1151 return NULL_TREE;
1153 if (sizrem < maxlen)
1154 maxlen = sizrem - 1;
1156 /* Try to determine the maximum from the subobject at the offset.
1157 This handles MEM [&some-struct, member-offset] that's often
1158 the result of folding COMPONENT_REF [some-struct, member]. */
1159 tree reftype = TREE_TYPE (aref.ref);
1160 if (!RECORD_OR_UNION_TYPE_P (reftype)
1161 || aref.offrng[0] != aref.offrng[1]
1162 || !wi::fits_shwi_p (aref.offrng[0]))
1163 return wide_int_to_tree (size_type_node, maxlen);
1165 HOST_WIDE_INT off = aref.offrng[0].to_shwi ();
1166 tree fld = field_at_offset (reftype, NULL_TREE, off);
1167 if (!fld || !DECL_SIZE_UNIT (fld))
1168 return wide_int_to_tree (size_type_node, maxlen);
1170 offset_int size = wi::to_offset (DECL_SIZE_UNIT (fld));
1171 if (maxlen < size)
1172 return wide_int_to_tree (size_type_node, maxlen);
1174 return wide_int_to_tree (size_type_node, size - 1);
1177 /* Attempt to determine the length of the string SRC. On success, store
1178 the length in *PDATA and return true. Otherwise, return false.
1179 VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation
1180 engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME
1181 assignment limit used to prevent runaway recursion. */
1183 static bool
1184 get_range_strlen_dynamic (tree src, gimple *stmt,
1185 c_strlen_data *pdata, bitmap visited,
1186 pointer_query *ptr_qry, unsigned *pssa_def_max)
1188 int idx = get_stridx (src, stmt);
1189 if (!idx)
1191 if (TREE_CODE (src) == SSA_NAME)
1193 gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1194 if (gphi *phi = dyn_cast<gphi *>(def_stmt))
1195 return get_range_strlen_phi (src, phi, pdata, visited, ptr_qry,
1196 pssa_def_max);
1199 /* Return success regardless of the result and handle *PDATA
1200 in the caller. */
1201 get_range_strlen (src, pdata, 1);
1202 return true;
1205 if (idx < 0)
1207 /* SRC is a string of constant length. */
1208 pdata->minlen = build_int_cst (size_type_node, ~idx);
1209 pdata->maxlen = pdata->minlen;
1210 pdata->maxbound = pdata->maxlen;
1211 return true;
1214 if (strinfo *si = get_strinfo (idx))
1216 pdata->minlen = get_string_length (si);
1217 if (!pdata->minlen && si->nonzero_chars)
1219 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1220 pdata->minlen = si->nonzero_chars;
1221 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1223 value_range vr;
1224 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
1225 if (vr.undefined_p () || vr.varying_p ())
1226 pdata->minlen = build_zero_cst (size_type_node);
1227 else
1229 tree type = vr.type ();
1230 pdata->minlen = wide_int_to_tree (type, vr.lower_bound ());
1233 else
1234 pdata->minlen = build_zero_cst (size_type_node);
1236 tree base = si->ptr;
1237 if (TREE_CODE (base) == ADDR_EXPR)
1238 base = TREE_OPERAND (base, 0);
1240 HOST_WIDE_INT off;
1241 poly_int64 poff;
1242 base = get_addr_base_and_unit_offset (base, &poff);
1243 if (base
1244 && DECL_P (base)
1245 && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1246 && TYPE_SIZE_UNIT (TREE_TYPE (base))
1247 && poff.is_constant (&off))
1249 tree basetype = TREE_TYPE (base);
1250 tree size = TYPE_SIZE_UNIT (basetype);
1251 if (TREE_CODE (size) == INTEGER_CST)
1253 ++off; /* Increment for the terminating nul. */
1254 tree toffset = build_int_cst (size_type_node, off);
1255 pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node,
1256 size, toffset);
1257 if (tree_int_cst_lt (pdata->maxlen, pdata->minlen))
1258 /* This can happen when triggering UB, when base is an
1259 array which is known to be filled with at least size
1260 non-zero bytes. E.g. for
1261 char a[2]; memcpy (a, "12", sizeof a);
1262 We don't want to create an invalid range [2, 1]
1263 where 2 comes from the number of non-zero bytes and
1264 1 from longest valid zero-terminated string that can
1265 be stored in such an array, so pick just one of
1266 those, pdata->minlen. See PR110603. */
1267 pdata->maxlen = build_all_ones_cst (size_type_node);
1268 else
1269 pdata->maxbound = pdata->maxlen;
1271 else
1272 pdata->maxlen = build_all_ones_cst (size_type_node);
1274 else
1275 pdata->maxlen = build_all_ones_cst (size_type_node);
1277 else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1279 value_range vr;
1280 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, stmt);
1281 if (vr.varying_p () || vr.undefined_p ())
1283 pdata->minlen = build_zero_cst (size_type_node);
1284 pdata->maxlen = build_all_ones_cst (size_type_node);
1286 else
1288 tree type = vr.type ();
1289 pdata->minlen = wide_int_to_tree (type, vr.lower_bound ());
1290 pdata->maxlen = wide_int_to_tree (type, vr.upper_bound ());
1291 offset_int max = offset_int::from (vr.upper_bound (0), SIGNED);
1292 if (tree maxbound = get_maxbound (si->ptr, stmt, max, ptr_qry))
1293 pdata->maxbound = maxbound;
1294 else
1295 pdata->maxbound = pdata->maxlen;
1298 else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1300 pdata->maxlen = pdata->minlen;
1301 pdata->maxbound = pdata->minlen;
1303 else
1305 /* For PDATA->MINLEN that's a non-constant expression such
1306 as PLUS_EXPR whose value range is unknown, set the bounds
1307 to zero and SIZE_MAX. */
1308 pdata->minlen = build_zero_cst (size_type_node);
1309 pdata->maxlen = build_all_ones_cst (size_type_node);
1312 return true;
1315 return false;
1318 /* Analogous to get_range_strlen but for dynamically created strings,
1319 i.e., those created by calls to strcpy as opposed to just string
1320 constants.
1321 Try to obtain the range of the lengths of the string(s) referenced
1322 by SRC, or the size of the largest array SRC refers to if the range
1323 of lengths cannot be determined, and store all in *PDATA. RVALS
1324 points to the valuation engine used to calculate ranges. */
1326 void
1327 get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1328 pointer_query &ptr_qry)
1330 auto_bitmap visited;
1331 tree maxbound = pdata->maxbound;
1333 unsigned limit = param_ssa_name_def_chain_limit;
1334 if (!get_range_strlen_dynamic (src, stmt, pdata, visited, &ptr_qry, &limit))
1336 /* On failure extend the length range to an impossible maximum
1337 (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other
1338 members can stay unchanged regardless. */
1339 pdata->minlen = ssize_int (0);
1340 pdata->maxlen = build_all_ones_cst (size_type_node);
1342 else if (!pdata->minlen)
1343 pdata->minlen = ssize_int (0);
1345 /* If it's unchanged from it initial non-null value, set the conservative
1346 MAXBOUND to SIZE_MAX. Otherwise leave it null (if it is null). */
1347 if (maxbound && pdata->maxbound == maxbound)
1348 pdata->maxbound = build_all_ones_cst (size_type_node);
1351 /* Invalidate string length information for strings whose length might
1352 change due to stores in STMT, except those marked DONT_INVALIDATE.
1353 For string-modifying statements, ZERO_WRITE is set when the statement
1354 wrote only zeros.
1355 Returns true if any STRIDX_TO_STRINFO entries were considered
1356 for invalidation. */
1358 static bool
1359 maybe_invalidate (gimple *stmt, bool zero_write = false)
1361 if (dump_file && (dump_flags & TDF_DETAILS))
1363 fprintf (dump_file, "%s called for ", __func__);
1364 print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1367 strinfo *si;
1368 bool nonempty = false;
1370 for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1372 if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1373 continue;
1375 nonempty = true;
1377 /* Unconditionally reset DONT_INVALIDATE. */
1378 bool dont_invalidate = si->dont_invalidate;
1379 si->dont_invalidate = false;
1381 if (dont_invalidate)
1382 continue;
1384 ao_ref r;
1385 tree size = si->nonzero_chars;
1386 ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1387 /* Include the terminating nul in the size of the string
1388 to consider when determining possible clobber. But do not
1389 add it to 'size' since we don't know whether it would
1390 actually fit the allocated area. */
1391 if (known_size_p (r.size))
1393 if (known_le (r.size, HOST_WIDE_INT_MAX - BITS_PER_UNIT))
1394 r.max_size += BITS_PER_UNIT;
1395 else
1396 r.max_size = -1;
1398 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1400 if (dump_file && (dump_flags & TDF_DETAILS))
1402 fputs (" statement may clobber object ", dump_file);
1403 print_generic_expr (dump_file, si->ptr);
1404 if (size && tree_fits_uhwi_p (size))
1405 fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1406 " bytes in size", tree_to_uhwi (size));
1407 fputc ('\n', dump_file);
1410 set_strinfo (i, NULL);
1411 free_strinfo (si);
1412 continue;
1415 if (size
1416 && !zero_write
1417 && si->stmt
1418 && is_gimple_call (si->stmt)
1419 && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1420 == BUILT_IN_CALLOC))
1422 /* If the clobber test above considered the length of
1423 the string (including the nul), then for (potentially)
1424 non-zero writes that might modify storage allocated by
1425 calloc consider the whole object and if it might be
1426 clobbered by the statement reset the statement. */
1427 ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1428 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1429 si->stmt = NULL;
1433 if (dump_file && (dump_flags & TDF_DETAILS))
1434 fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1436 return nonempty;
1439 /* Unshare strinfo record SI, if it has refcount > 1 or
1440 if stridx_to_strinfo vector is shared with some other
1441 bbs. */
1443 static strinfo *
1444 unshare_strinfo (strinfo *si)
1446 strinfo *nsi;
1448 if (si->refcount == 1 && !strinfo_shared ())
1449 return si;
1451 nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1452 nsi->stmt = si->stmt;
1453 nsi->alloc = si->alloc;
1454 nsi->endptr = si->endptr;
1455 nsi->first = si->first;
1456 nsi->prev = si->prev;
1457 nsi->next = si->next;
1458 nsi->writable = si->writable;
1459 set_strinfo (si->idx, nsi);
1460 free_strinfo (si);
1461 return nsi;
1464 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1465 strinfo if there is any. Return it's idx, or 0 if no strinfo has
1466 been created. */
1468 static int
1469 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1470 tree ptr)
1472 if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1473 return 0;
1475 if (compare_nonzero_chars (basesi, off) < 0
1476 || !tree_fits_uhwi_p (basesi->nonzero_chars))
1477 return 0;
1479 unsigned HOST_WIDE_INT nonzero_chars
1480 = tree_to_uhwi (basesi->nonzero_chars) - off;
1481 strinfo *si = basesi, *chainsi;
1482 if (si->first || si->prev || si->next)
1483 si = verify_related_strinfos (basesi);
1484 if (si == NULL
1485 || si->nonzero_chars == NULL_TREE
1486 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1487 return 0;
1489 if (TREE_CODE (ptr) == SSA_NAME
1490 && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1491 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1493 gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1494 for (chainsi = si; chainsi->next; chainsi = si)
1496 si = get_next_strinfo (chainsi);
1497 if (si == NULL
1498 || si->nonzero_chars == NULL_TREE
1499 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1500 break;
1501 int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1502 if (r != 1)
1504 if (r == 0)
1506 if (TREE_CODE (ptr) == SSA_NAME)
1507 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1508 else
1510 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1511 if (pidx != NULL && *pidx == 0)
1512 *pidx = si->idx;
1514 return si->idx;
1516 break;
1520 int idx = new_stridx (ptr);
1521 if (idx == 0)
1522 return 0;
1523 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1524 basesi->full_string_p);
1525 set_strinfo (idx, si);
1526 if (strinfo *nextsi = get_strinfo (chainsi->next))
1528 nextsi = unshare_strinfo (nextsi);
1529 si->next = nextsi->idx;
1530 nextsi->prev = idx;
1532 chainsi = unshare_strinfo (chainsi);
1533 if (chainsi->first == 0)
1534 chainsi->first = chainsi->idx;
1535 chainsi->next = idx;
1536 if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1537 chainsi->endptr = ptr;
1538 si->endptr = chainsi->endptr;
1539 si->prev = chainsi->idx;
1540 si->first = chainsi->first;
1541 si->writable = chainsi->writable;
1542 return si->idx;
1545 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1546 to a zero-length string and if possible chain it to a related strinfo
1547 chain whose part is or might be CHAINSI. */
1549 static strinfo *
1550 zero_length_string (tree ptr, strinfo *chainsi)
1552 strinfo *si;
1553 int idx;
1554 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1555 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1556 gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1557 && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1559 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1560 return NULL;
1561 if (chainsi != NULL)
1563 si = verify_related_strinfos (chainsi);
1564 if (si)
1568 /* We shouldn't mix delayed and non-delayed lengths. */
1569 gcc_assert (si->full_string_p);
1570 if (si->endptr == NULL_TREE)
1572 si = unshare_strinfo (si);
1573 si->endptr = ptr;
1575 chainsi = si;
1576 si = get_next_strinfo (si);
1578 while (si != NULL);
1579 if (zero_length_string_p (chainsi))
1581 if (chainsi->next)
1583 chainsi = unshare_strinfo (chainsi);
1584 chainsi->next = 0;
1586 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1587 return chainsi;
1590 else
1592 /* We shouldn't mix delayed and non-delayed lengths. */
1593 gcc_assert (chainsi->full_string_p);
1594 if (chainsi->first || chainsi->prev || chainsi->next)
1596 chainsi = unshare_strinfo (chainsi);
1597 chainsi->first = 0;
1598 chainsi->prev = 0;
1599 chainsi->next = 0;
1603 idx = new_stridx (ptr);
1604 if (idx == 0)
1605 return NULL;
1606 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1607 set_strinfo (idx, si);
1608 si->endptr = ptr;
1609 if (chainsi != NULL)
1611 chainsi = unshare_strinfo (chainsi);
1612 if (chainsi->first == 0)
1613 chainsi->first = chainsi->idx;
1614 chainsi->next = idx;
1615 if (chainsi->endptr == NULL_TREE)
1616 chainsi->endptr = ptr;
1617 si->prev = chainsi->idx;
1618 si->first = chainsi->first;
1619 si->writable = chainsi->writable;
1621 return si;
1624 /* For strinfo ORIGSI whose length has been just updated, adjust other
1625 related strinfos so that they match the new ORIGSI. This involves:
1627 - adding ADJ to the nonzero_chars fields
1628 - copying full_string_p from the new ORIGSI. */
1630 static void
1631 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1633 strinfo *si = verify_related_strinfos (origsi);
1635 if (si == NULL)
1636 return;
1638 while (1)
1640 strinfo *nsi;
1642 if (si != origsi)
1644 tree tem;
1646 si = unshare_strinfo (si);
1647 /* We shouldn't see delayed lengths here; the caller must
1648 have calculated the old length in order to calculate
1649 the adjustment. */
1650 gcc_assert (si->nonzero_chars);
1651 tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1652 si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1653 TREE_TYPE (si->nonzero_chars),
1654 si->nonzero_chars, tem);
1655 si->full_string_p = origsi->full_string_p;
1657 si->endptr = NULL_TREE;
1658 si->dont_invalidate = true;
1660 nsi = get_next_strinfo (si);
1661 if (nsi == NULL)
1662 return;
1663 si = nsi;
1667 /* Find if there are other SSA_NAME pointers equal to PTR
1668 for which we don't track their string lengths yet. If so, use
1669 IDX for them. */
1671 static void
1672 find_equal_ptrs (tree ptr, int idx)
1674 if (TREE_CODE (ptr) != SSA_NAME)
1675 return;
1676 while (1)
1678 gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1679 if (!is_gimple_assign (stmt))
1680 return;
1681 ptr = gimple_assign_rhs1 (stmt);
1682 switch (gimple_assign_rhs_code (stmt))
1684 case SSA_NAME:
1685 break;
1686 CASE_CONVERT:
1687 if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1688 return;
1689 if (TREE_CODE (ptr) == SSA_NAME)
1690 break;
1691 if (TREE_CODE (ptr) != ADDR_EXPR)
1692 return;
1693 /* FALLTHRU */
1694 case ADDR_EXPR:
1696 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1697 if (pidx != NULL && *pidx == 0)
1698 *pidx = idx;
1699 return;
1701 default:
1702 return;
1705 /* We might find an endptr created in this pass. Grow the
1706 vector in that case. */
1707 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1708 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1710 if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1711 return;
1712 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1716 /* Return true if STMT is a call to a builtin function with the right
1717 arguments and attributes that should be considered for optimization
1718 by this pass. */
1720 static bool
1721 valid_builtin_call (gimple *stmt)
1723 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1724 return false;
1726 tree callee = gimple_call_fndecl (stmt);
1727 switch (DECL_FUNCTION_CODE (callee))
1729 case BUILT_IN_MEMCMP:
1730 case BUILT_IN_MEMCMP_EQ:
1731 case BUILT_IN_STRCMP:
1732 case BUILT_IN_STRNCMP:
1733 case BUILT_IN_STRCHR:
1734 case BUILT_IN_STRLEN:
1735 case BUILT_IN_STRNLEN:
1736 /* The above functions should be pure. Punt if they aren't. */
1737 if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1738 return false;
1739 break;
1741 case BUILT_IN_ALLOCA:
1742 case BUILT_IN_ALLOCA_WITH_ALIGN:
1743 case BUILT_IN_CALLOC:
1744 case BUILT_IN_MALLOC:
1745 case BUILT_IN_MEMCPY:
1746 case BUILT_IN_MEMCPY_CHK:
1747 case BUILT_IN_MEMPCPY:
1748 case BUILT_IN_MEMPCPY_CHK:
1749 case BUILT_IN_MEMSET:
1750 case BUILT_IN_STPCPY:
1751 case BUILT_IN_STPCPY_CHK:
1752 case BUILT_IN_STPNCPY:
1753 case BUILT_IN_STPNCPY_CHK:
1754 case BUILT_IN_STRCAT:
1755 case BUILT_IN_STRCAT_CHK:
1756 case BUILT_IN_STRCPY:
1757 case BUILT_IN_STRCPY_CHK:
1758 case BUILT_IN_STRNCAT:
1759 case BUILT_IN_STRNCAT_CHK:
1760 case BUILT_IN_STRNCPY:
1761 case BUILT_IN_STRNCPY_CHK:
1762 /* The above functions should be neither const nor pure. Punt if they
1763 aren't. */
1764 if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1765 return false;
1766 break;
1768 default:
1769 break;
1772 return true;
1775 /* If the last .MEM setter statement before STMT is
1776 memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1777 and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1778 just memcpy (x, y, strlen (y)). SI must be the zero length
1779 strinfo. */
1781 void
1782 strlen_pass::adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1784 tree vuse, callee, len;
1785 struct laststmt_struct last = laststmt;
1786 strinfo *lastsi, *firstsi;
1787 unsigned len_arg_no = 2;
1789 laststmt.stmt = NULL;
1790 laststmt.len = NULL_TREE;
1791 laststmt.stridx = 0;
1793 if (last.stmt == NULL)
1794 return;
1796 vuse = gimple_vuse (stmt);
1797 if (vuse == NULL_TREE
1798 || SSA_NAME_DEF_STMT (vuse) != last.stmt
1799 || !has_single_use (vuse))
1800 return;
1802 gcc_assert (last.stridx > 0);
1803 lastsi = get_strinfo (last.stridx);
1804 if (lastsi == NULL)
1805 return;
1807 if (lastsi != si)
1809 if (lastsi->first == 0 || lastsi->first != si->first)
1810 return;
1812 firstsi = verify_related_strinfos (si);
1813 if (firstsi == NULL)
1814 return;
1815 while (firstsi != lastsi)
1817 firstsi = get_next_strinfo (firstsi);
1818 if (firstsi == NULL)
1819 return;
1823 if (!is_strcat && !zero_length_string_p (si))
1824 return;
1826 if (is_gimple_assign (last.stmt))
1828 gimple_stmt_iterator gsi;
1830 if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1831 return;
1832 if (stmt_could_throw_p (cfun, last.stmt))
1833 return;
1834 gsi = gsi_for_stmt (last.stmt);
1835 unlink_stmt_vdef (last.stmt);
1836 release_defs (last.stmt);
1837 gsi_remove (&gsi, true);
1838 return;
1841 if (!valid_builtin_call (last.stmt))
1842 return;
1844 callee = gimple_call_fndecl (last.stmt);
1845 switch (DECL_FUNCTION_CODE (callee))
1847 case BUILT_IN_MEMCPY:
1848 case BUILT_IN_MEMCPY_CHK:
1849 break;
1850 default:
1851 return;
1854 len = gimple_call_arg (last.stmt, len_arg_no);
1855 if (tree_fits_uhwi_p (len))
1857 if (!tree_fits_uhwi_p (last.len)
1858 || integer_zerop (len)
1859 || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1860 return;
1861 /* Don't adjust the length if it is divisible by 4, it is more efficient
1862 to store the extra '\0' in that case. */
1863 if ((tree_to_uhwi (len) & 3) == 0)
1864 return;
1866 /* Don't fold away an out of bounds access, as this defeats proper
1867 warnings. */
1868 tree dst = gimple_call_arg (last.stmt, 0);
1870 access_ref aref;
1871 tree size = compute_objsize (dst, stmt, 1, &aref, &ptr_qry);
1872 if (size && tree_int_cst_lt (size, len))
1873 return;
1875 else if (TREE_CODE (len) == SSA_NAME)
1877 gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1878 if (!is_gimple_assign (def_stmt)
1879 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1880 || gimple_assign_rhs1 (def_stmt) != last.len
1881 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1882 return;
1884 else
1885 return;
1887 gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1888 update_stmt (last.stmt);
1891 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1892 call, or when BOUND is non-null, of a strnlen() call, set LHS
1893 range info to [0, min (MAX, BOUND)] when the range includes more
1894 than one value and return LHS. Otherwise, when the range
1895 [MIN, MAX] is such that MIN == MAX, return the tree representation
1896 of (MIN). The latter allows callers to fold suitable strnlen() calls
1897 to constants. */
1899 tree
1900 set_strlen_range (tree lhs, wide_int min, wide_int max,
1901 tree bound /* = NULL_TREE */)
1903 if (TREE_CODE (lhs) != SSA_NAME
1904 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1905 return NULL_TREE;
1907 if (bound)
1909 /* For strnlen, adjust MIN and MAX as necessary. If the bound
1910 is less than the size of the array set MAX to it. It it's
1911 greater than MAX and MAX is non-zero bump MAX down to account
1912 for the necessary terminating nul. Otherwise leave it alone. */
1913 if (TREE_CODE (bound) == INTEGER_CST)
1915 wide_int wibnd = wi::to_wide (bound);
1916 int cmp = wi::cmpu (wibnd, max);
1917 if (cmp < 0)
1918 max = wibnd;
1919 else if (cmp && wi::ne_p (max, min))
1920 --max;
1922 else if (TREE_CODE (bound) == SSA_NAME)
1924 value_range r;
1925 get_range_query (cfun)->range_of_expr (r, bound);
1926 if (!r.undefined_p ())
1928 /* For a bound in a known range, adjust the range determined
1929 above as necessary. For a bound in some anti-range or
1930 in an unknown range, use the range determined by callers. */
1931 if (wi::ltu_p (r.lower_bound (), min))
1932 min = r.lower_bound ();
1933 if (wi::ltu_p (r.upper_bound (), max))
1934 max = r.upper_bound ();
1939 if (min == max)
1940 return wide_int_to_tree (size_type_node, min);
1942 value_range vr (TREE_TYPE (lhs), min, max);
1943 set_range_info (lhs, vr);
1944 return lhs;
1947 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1948 SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1949 a character array A[N] with unknown length bounded by N, and for
1950 strnlen(), by min (N, BOUND). */
1952 static tree
1953 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1955 if (TREE_CODE (lhs) != SSA_NAME
1956 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1957 return NULL_TREE;
1959 if (TREE_CODE (src) == SSA_NAME)
1961 gimple *def = SSA_NAME_DEF_STMT (src);
1962 if (is_gimple_assign (def)
1963 && gimple_assign_rhs_code (def) == ADDR_EXPR)
1964 src = gimple_assign_rhs1 (def);
1967 /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1968 NUL so that the difference between a pointer to just past it and
1969 one to its beginning is positive. */
1970 wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1972 if (TREE_CODE (src) == ADDR_EXPR)
1974 /* The last array member of a struct can be bigger than its size
1975 suggests if it's treated as a poor-man's flexible array member. */
1976 src = TREE_OPERAND (src, 0);
1977 if (TREE_CODE (src) != MEM_REF
1978 && !array_ref_flexible_size_p (src))
1980 tree type = TREE_TYPE (src);
1981 tree size = TYPE_SIZE_UNIT (type);
1982 if (size
1983 && TREE_CODE (size) == INTEGER_CST
1984 && !integer_zerop (size))
1986 /* Even though such uses of strlen would be undefined,
1987 avoid relying on arrays of arrays in case some genius
1988 decides to call strlen on an unterminated array element
1989 that's followed by a terminated one. Likewise, avoid
1990 assuming that a struct array member is necessarily
1991 nul-terminated (the nul may be in the member that
1992 follows). In those cases, assume that the length
1993 of the string stored in such an array is bounded
1994 by the size of the enclosing object if one can be
1995 determined. */
1996 tree base = get_base_address (src);
1997 if (VAR_P (base))
1999 if (tree size = DECL_SIZE_UNIT (base))
2000 if (size
2001 && TREE_CODE (size) == INTEGER_CST
2002 && TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
2003 max = wi::to_wide (size);
2007 /* For strlen() the upper bound above is equal to
2008 the longest string that can be stored in the array
2009 (i.e., it accounts for the terminating nul. For
2010 strnlen() bump up the maximum by one since the array
2011 need not be nul-terminated. */
2012 if (!bound && max != 0)
2013 --max;
2017 wide_int min = wi::zero (max.get_precision ());
2018 return set_strlen_range (lhs, min, max, bound);
2021 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
2022 either into a region allocated for the object SI when non-null,
2023 or into an object designated by the LHS of STMT otherwise.
2024 For a call STMT, when CALL_LHS is set use its left hand side
2025 as the destination, otherwise use argument zero.
2026 When nonnull uses RVALS to determine range information.
2027 RAWMEM may be set by memcpy and other raw memory functions
2028 to allow accesses across subobject boundaries. */
2030 void
2031 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
2032 strinfo *si, bool plus_one, bool rawmem)
2034 if (!len || warning_suppressed_p (stmt, OPT_Wstringop_overflow_))
2035 return;
2037 /* The DECL of the function performing the write if it is done
2038 by one. */
2039 tree writefn = NULL_TREE;
2040 /* The destination expression involved in the store or call STMT. */
2041 tree dest = NULL_TREE;
2043 if (is_gimple_assign (stmt))
2044 dest = gimple_assign_lhs (stmt);
2045 else if (is_gimple_call (stmt))
2047 if (call_lhs)
2048 dest = gimple_call_lhs (stmt);
2049 else
2051 gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL));
2052 dest = gimple_call_arg (stmt, 0);
2055 if (!dest)
2056 return;
2057 writefn = gimple_call_fndecl (stmt);
2059 else
2060 return;
2062 if (warning_suppressed_p (dest, OPT_Wstringop_overflow_))
2063 return;
2065 const int ostype = rawmem ? 0 : 1;
2067 /* Use maximum precision to avoid overflow in the addition below.
2068 Make sure all operands have the same precision to keep wide_int
2069 from ICE'ing. */
2071 access_ref aref;
2072 /* The size of the destination region (which is smaller than
2073 the destination object for stores at a non-zero offset). */
2074 tree destsize = compute_objsize (dest, stmt, ostype, &aref, &ptr_qry);
2076 if (!destsize)
2078 aref.sizrng[0] = 0;
2079 aref.sizrng[1] = wi::to_offset (max_object_size ());
2082 /* Return early if the DESTSIZE size expression is the same as LEN
2083 and the offset into the destination is zero. This might happen
2084 in the case of a pair of malloc and memset calls to allocate
2085 an object and clear it as if by calloc. */
2086 if (destsize == len && !plus_one
2087 && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
2088 return;
2090 wide_int rng[2];
2091 if (!get_range (len, stmt, rng, ptr_qry.rvals))
2092 return;
2094 widest_int lenrng[2] =
2095 { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2097 if (plus_one)
2099 lenrng[0] += 1;
2100 lenrng[1] += 1;
2103 /* The size of the remaining space in the destination computed
2104 as the size of the latter minus the offset into it. */
2105 widest_int spcrng[2];
2107 offset_int remrng[2];
2108 remrng[1] = aref.size_remaining (remrng);
2109 spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (remrng[0], UNSIGNED);
2110 spcrng[1] = widest_int::from (remrng[1], UNSIGNED);
2113 if (wi::leu_p (lenrng[0], spcrng[0])
2114 && wi::leu_p (lenrng[1], spcrng[1]))
2115 return;
2117 location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2118 bool warned = false;
2119 if (wi::leu_p (lenrng[0], spcrng[1]))
2121 if (len != destsize
2122 && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2123 return;
2125 warned = (writefn
2126 ? warning_at (loc, OPT_Wstringop_overflow_,
2127 "%qD writing one too many bytes into a region "
2128 "of a size that depends on %<strlen%>",
2129 writefn)
2130 : warning_at (loc, OPT_Wstringop_overflow_,
2131 "writing one too many bytes into a region "
2132 "of a size that depends on %<strlen%>"));
2134 else if (lenrng[0] == lenrng[1])
2136 if (spcrng[0] == spcrng[1])
2137 warned = (writefn
2138 ? warning_n (loc, OPT_Wstringop_overflow_,
2139 lenrng[0].to_uhwi (),
2140 "%qD writing %wu byte into a region "
2141 "of size %wu",
2142 "%qD writing %wu bytes into a region "
2143 "of size %wu",
2144 writefn, lenrng[0].to_uhwi (),
2145 spcrng[0].to_uhwi ())
2146 : warning_n (loc, OPT_Wstringop_overflow_,
2147 lenrng[0].to_uhwi (),
2148 "writing %wu byte into a region "
2149 "of size %wu",
2150 "writing %wu bytes into a region "
2151 "of size %wu",
2152 lenrng[0].to_uhwi (),
2153 spcrng[0].to_uhwi ()));
2154 else
2155 warned = (writefn
2156 ? warning_n (loc, OPT_Wstringop_overflow_,
2157 lenrng[0].to_uhwi (),
2158 "%qD writing %wu byte into a region "
2159 "of size between %wu and %wu",
2160 "%qD writing %wu bytes into a region "
2161 "of size between %wu and %wu",
2162 writefn, lenrng[0].to_uhwi (),
2163 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2164 : warning_n (loc, OPT_Wstringop_overflow_,
2165 lenrng[0].to_uhwi (),
2166 "writing %wu byte into a region "
2167 "of size between %wu and %wu",
2168 "writing %wu bytes into a region "
2169 "of size between %wu and %wu",
2170 lenrng[0].to_uhwi (),
2171 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2173 else if (spcrng[0] == spcrng[1])
2174 warned = (writefn
2175 ? warning_at (loc, OPT_Wstringop_overflow_,
2176 "%qD writing between %wu and %wu bytes "
2177 "into a region of size %wu",
2178 writefn, lenrng[0].to_uhwi (),
2179 lenrng[1].to_uhwi (),
2180 spcrng[0].to_uhwi ())
2181 : warning_at (loc, OPT_Wstringop_overflow_,
2182 "writing between %wu and %wu bytes "
2183 "into a region of size %wu",
2184 lenrng[0].to_uhwi (),
2185 lenrng[1].to_uhwi (),
2186 spcrng[0].to_uhwi ()));
2187 else
2188 warned = (writefn
2189 ? warning_at (loc, OPT_Wstringop_overflow_,
2190 "%qD writing between %wu and %wu bytes "
2191 "into a region of size between %wu and %wu",
2192 writefn, lenrng[0].to_uhwi (),
2193 lenrng[1].to_uhwi (),
2194 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2195 : warning_at (loc, OPT_Wstringop_overflow_,
2196 "writing between %wu and %wu bytes "
2197 "into a region of size between %wu and %wu",
2198 lenrng[0].to_uhwi (),
2199 lenrng[1].to_uhwi (),
2200 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2202 if (!warned)
2203 return;
2205 suppress_warning (stmt, OPT_Wstringop_overflow_);
2207 aref.inform_access (access_write_only);
2210 /* Convenience wrapper for the above. */
2212 void
2213 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs,
2214 unsigned HOST_WIDE_INT len,
2215 strinfo *si, bool plus_one, bool rawmem)
2217 tree tlen = build_int_cst (size_type_node, len);
2218 maybe_warn_overflow (stmt, call_lhs, tlen, si, plus_one, rawmem);
2221 /* Handle a strlen call. If strlen of the argument is known, replace
2222 the strlen call with the known value, otherwise remember that strlen
2223 of the argument is stored in the lhs SSA_NAME. */
2225 void
2226 strlen_pass::handle_builtin_strlen ()
2228 gimple *stmt = gsi_stmt (m_gsi);
2229 tree lhs = gimple_call_lhs (stmt);
2231 if (lhs == NULL_TREE)
2232 return;
2234 location_t loc = gimple_location (stmt);
2235 tree callee = gimple_call_fndecl (stmt);
2236 tree src = gimple_call_arg (stmt, 0);
2237 tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2238 ? gimple_call_arg (stmt, 1) : NULL_TREE);
2239 int idx = get_stridx (src, stmt);
2240 if (idx || (bound && integer_zerop (bound)))
2242 strinfo *si = NULL;
2243 tree rhs;
2245 if (idx < 0)
2246 rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2247 else if (idx == 0)
2248 rhs = bound;
2249 else
2251 rhs = NULL_TREE;
2252 si = get_strinfo (idx);
2253 if (si != NULL)
2255 rhs = get_string_length (si);
2256 /* For strnlen, if bound is constant, even if si is not known
2257 to be zero terminated, if we know at least bound bytes are
2258 not zero, the return value will be bound. */
2259 if (rhs == NULL_TREE
2260 && bound != NULL_TREE
2261 && TREE_CODE (bound) == INTEGER_CST
2262 && si->nonzero_chars != NULL_TREE
2263 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2264 && tree_int_cst_le (bound, si->nonzero_chars))
2265 rhs = bound;
2268 if (rhs != NULL_TREE)
2270 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2272 fprintf (dump_file, "Optimizing: ");
2273 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2275 rhs = unshare_expr (rhs);
2276 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2277 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2279 if (bound)
2280 rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2282 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2283 stmt = gsi_stmt (m_gsi);
2284 update_stmt (stmt);
2285 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2287 fprintf (dump_file, "into: ");
2288 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2291 if (si != NULL
2292 /* Don't update anything for strnlen. */
2293 && bound == NULL_TREE
2294 && TREE_CODE (si->nonzero_chars) != SSA_NAME
2295 && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2296 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2298 si = unshare_strinfo (si);
2299 si->nonzero_chars = lhs;
2300 gcc_assert (si->full_string_p);
2303 if (strlen_to_stridx
2304 && (bound == NULL_TREE
2305 /* For strnlen record this only if the call is proven
2306 to return the same value as strlen would. */
2307 || (TREE_CODE (bound) == INTEGER_CST
2308 && TREE_CODE (rhs) == INTEGER_CST
2309 && tree_int_cst_lt (rhs, bound))))
2310 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2312 return;
2315 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2316 return;
2318 if (idx == 0)
2319 idx = new_stridx (src);
2320 else
2322 strinfo *si = get_strinfo (idx);
2323 if (si != NULL)
2325 if (!si->full_string_p && !si->stmt)
2327 /* Until now we only had a lower bound on the string length.
2328 Install LHS as the actual length. */
2329 si = unshare_strinfo (si);
2330 tree old = si->nonzero_chars;
2331 si->nonzero_chars = lhs;
2332 si->full_string_p = true;
2333 if (old && TREE_CODE (old) == INTEGER_CST)
2335 old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2336 tree adj = fold_build2_loc (loc, MINUS_EXPR,
2337 TREE_TYPE (lhs), lhs, old);
2338 adjust_related_strinfos (loc, si, adj);
2339 /* Use the constant minimum length as the lower bound
2340 of the non-constant length. */
2341 wide_int min = wi::to_wide (old);
2342 wide_int max
2343 = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2344 if (wi::gtu_p (min, max))
2345 max = wi::to_wide (TYPE_MAX_VALUE (TREE_TYPE (lhs)));
2346 set_strlen_range (lhs, min, max);
2348 else
2350 si->first = 0;
2351 si->prev = 0;
2352 si->next = 0;
2355 return;
2358 if (idx)
2360 if (!bound)
2362 /* Only store the new length information for calls to strlen(),
2363 not for those to strnlen(). */
2364 strinfo *si = new_strinfo (src, idx, lhs, true);
2365 set_strinfo (idx, si);
2366 find_equal_ptrs (src, idx);
2369 /* For SRC that is an array of N elements, set LHS's range
2370 to [0, min (N, BOUND)]. A constant return value means
2371 the range would have consisted of a single value. In
2372 that case, fold the result into the returned constant. */
2373 if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2374 if (TREE_CODE (ret) == INTEGER_CST)
2376 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2378 fprintf (dump_file, "Optimizing: ");
2379 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2381 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2382 ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2383 gimplify_and_update_call_from_tree (&m_gsi, ret);
2384 stmt = gsi_stmt (m_gsi);
2385 update_stmt (stmt);
2386 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2388 fprintf (dump_file, "into: ");
2389 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2393 if (strlen_to_stridx && !bound)
2394 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2398 /* Handle a strchr call. If strlen of the first argument is known, replace
2399 the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2400 that lhs of the call is endptr and strlen of the argument is endptr - x. */
2402 void
2403 strlen_pass::handle_builtin_strchr ()
2405 gimple *stmt = gsi_stmt (m_gsi);
2406 tree lhs = gimple_call_lhs (stmt);
2408 if (lhs == NULL_TREE)
2409 return;
2411 if (!integer_zerop (gimple_call_arg (stmt, 1)))
2412 return;
2414 tree src = gimple_call_arg (stmt, 0);
2416 /* Avoid folding if the first argument is not a nul-terminated array.
2417 Defer warning until later. */
2418 if (!check_nul_terminated_array (NULL_TREE, src))
2419 return;
2421 int idx = get_stridx (src, stmt);
2422 if (idx)
2424 strinfo *si = NULL;
2425 tree rhs;
2427 if (idx < 0)
2428 rhs = build_int_cst (size_type_node, ~idx);
2429 else
2431 rhs = NULL_TREE;
2432 si = get_strinfo (idx);
2433 if (si != NULL)
2434 rhs = get_string_length (si);
2436 if (rhs != NULL_TREE)
2438 location_t loc = gimple_location (stmt);
2440 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2442 fprintf (dump_file, "Optimizing: ");
2443 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2445 if (si != NULL && si->endptr != NULL_TREE)
2447 rhs = unshare_expr (si->endptr);
2448 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2449 TREE_TYPE (rhs)))
2450 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2452 else
2454 rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2455 rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2456 TREE_TYPE (src), src, rhs);
2457 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2458 TREE_TYPE (rhs)))
2459 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2461 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2462 stmt = gsi_stmt (m_gsi);
2463 update_stmt (stmt);
2464 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2466 fprintf (dump_file, "into: ");
2467 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2469 if (si != NULL
2470 && si->endptr == NULL_TREE
2471 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2473 si = unshare_strinfo (si);
2474 si->endptr = lhs;
2476 zero_length_string (lhs, si);
2477 return;
2480 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2481 return;
2482 if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2484 if (idx == 0)
2485 idx = new_stridx (src);
2486 else if (get_strinfo (idx) != NULL)
2488 zero_length_string (lhs, NULL);
2489 return;
2491 if (idx)
2493 location_t loc = gimple_location (stmt);
2494 tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2495 tree srcu = fold_convert_loc (loc, size_type_node, src);
2496 tree length = fold_build2_loc (loc, MINUS_EXPR,
2497 size_type_node, lhsu, srcu);
2498 strinfo *si = new_strinfo (src, idx, length, true);
2499 si->endptr = lhs;
2500 set_strinfo (idx, si);
2501 find_equal_ptrs (src, idx);
2502 zero_length_string (lhs, si);
2505 else
2506 zero_length_string (lhs, NULL);
2509 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2510 If strlen of the second argument is known, strlen of the first argument
2511 is the same after this call. Furthermore, attempt to convert it to
2512 memcpy. Uses RVALS to determine range information. */
2514 void
2515 strlen_pass::handle_builtin_strcpy (built_in_function bcode)
2517 int idx, didx;
2518 tree src, dst, srclen, len, lhs, type, fn, oldlen;
2519 bool success;
2520 gimple *stmt = gsi_stmt (m_gsi);
2521 strinfo *si, *dsi, *olddsi, *zsi;
2522 location_t loc;
2524 src = gimple_call_arg (stmt, 1);
2525 dst = gimple_call_arg (stmt, 0);
2526 lhs = gimple_call_lhs (stmt);
2527 idx = get_stridx (src, stmt);
2528 si = NULL;
2529 if (idx > 0)
2530 si = get_strinfo (idx);
2532 didx = get_stridx (dst, stmt);
2533 olddsi = NULL;
2534 oldlen = NULL_TREE;
2535 if (didx > 0)
2536 olddsi = get_strinfo (didx);
2537 else if (didx < 0)
2538 return;
2540 if (olddsi != NULL)
2541 adjust_last_stmt (olddsi, stmt, false);
2543 srclen = NULL_TREE;
2544 if (si != NULL)
2545 srclen = get_string_length (si);
2546 else if (idx < 0)
2547 srclen = build_int_cst (size_type_node, ~idx);
2549 maybe_warn_overflow (stmt, false, srclen, olddsi, true);
2551 if (olddsi != NULL)
2552 adjust_last_stmt (olddsi, stmt, false);
2554 loc = gimple_location (stmt);
2555 if (srclen == NULL_TREE)
2556 switch (bcode)
2558 case BUILT_IN_STRCPY:
2559 case BUILT_IN_STRCPY_CHK:
2560 if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2561 return;
2562 break;
2563 case BUILT_IN_STPCPY:
2564 case BUILT_IN_STPCPY_CHK:
2565 if (lhs == NULL_TREE)
2566 return;
2567 else
2569 tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2570 srclen = fold_convert_loc (loc, size_type_node, dst);
2571 srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2572 lhsuint, srclen);
2574 break;
2575 default:
2576 gcc_unreachable ();
2579 if (didx == 0)
2581 didx = new_stridx (dst);
2582 if (didx == 0)
2583 return;
2585 if (olddsi != NULL)
2587 oldlen = olddsi->nonzero_chars;
2588 dsi = unshare_strinfo (olddsi);
2589 dsi->nonzero_chars = srclen;
2590 dsi->full_string_p = (srclen != NULL_TREE);
2591 /* Break the chain, so adjust_related_strinfo on later pointers in
2592 the chain won't adjust this one anymore. */
2593 dsi->next = 0;
2594 dsi->stmt = NULL;
2595 dsi->endptr = NULL_TREE;
2597 else
2599 dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2600 set_strinfo (didx, dsi);
2601 find_equal_ptrs (dst, didx);
2603 dsi->writable = true;
2604 dsi->dont_invalidate = true;
2606 if (dsi->nonzero_chars == NULL_TREE)
2608 strinfo *chainsi;
2610 /* If string length of src is unknown, use delayed length
2611 computation. If string length of dst will be needed, it
2612 can be computed by transforming this strcpy call into
2613 stpcpy and subtracting dst from the return value. */
2615 /* Look for earlier strings whose length could be determined if
2616 this strcpy is turned into an stpcpy. */
2618 if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2620 for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2622 /* When setting a stmt for delayed length computation
2623 prevent all strinfos through dsi from being
2624 invalidated. */
2625 chainsi = unshare_strinfo (chainsi);
2626 chainsi->stmt = stmt;
2627 chainsi->nonzero_chars = NULL_TREE;
2628 chainsi->full_string_p = false;
2629 chainsi->endptr = NULL_TREE;
2630 chainsi->dont_invalidate = true;
2633 dsi->stmt = stmt;
2635 /* Try to detect overlap before returning. This catches cases
2636 like strcpy (d, d + n) where n is non-constant whose range
2637 is such that (n <= strlen (d) holds).
2639 OLDDSI->NONZERO_chars may have been reset by this point with
2640 oldlen holding it original value. */
2641 if (olddsi && oldlen)
2643 /* Add 1 for the terminating NUL. */
2644 tree type = TREE_TYPE (oldlen);
2645 oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2646 build_int_cst (type, 1));
2647 check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2650 return;
2653 if (olddsi != NULL)
2655 tree adj = NULL_TREE;
2656 if (oldlen == NULL_TREE)
2658 else if (integer_zerop (oldlen))
2659 adj = srclen;
2660 else if (TREE_CODE (oldlen) == INTEGER_CST
2661 || TREE_CODE (srclen) == INTEGER_CST)
2662 adj = fold_build2_loc (loc, MINUS_EXPR,
2663 TREE_TYPE (srclen), srclen,
2664 fold_convert_loc (loc, TREE_TYPE (srclen),
2665 oldlen));
2666 if (adj != NULL_TREE)
2667 adjust_related_strinfos (loc, dsi, adj);
2668 else
2669 dsi->prev = 0;
2671 /* strcpy src may not overlap dst, so src doesn't need to be
2672 invalidated either. */
2673 if (si != NULL)
2674 si->dont_invalidate = true;
2676 fn = NULL_TREE;
2677 zsi = NULL;
2678 switch (bcode)
2680 case BUILT_IN_STRCPY:
2681 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2682 if (lhs)
2683 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2684 break;
2685 case BUILT_IN_STRCPY_CHK:
2686 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2687 if (lhs)
2688 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2689 break;
2690 case BUILT_IN_STPCPY:
2691 /* This would need adjustment of the lhs (subtract one),
2692 or detection that the trailing '\0' doesn't need to be
2693 written, if it will be immediately overwritten.
2694 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY); */
2695 if (lhs)
2697 dsi->endptr = lhs;
2698 zsi = zero_length_string (lhs, dsi);
2700 break;
2701 case BUILT_IN_STPCPY_CHK:
2702 /* This would need adjustment of the lhs (subtract one),
2703 or detection that the trailing '\0' doesn't need to be
2704 written, if it will be immediately overwritten.
2705 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK); */
2706 if (lhs)
2708 dsi->endptr = lhs;
2709 zsi = zero_length_string (lhs, dsi);
2711 break;
2712 default:
2713 gcc_unreachable ();
2715 if (zsi != NULL)
2716 zsi->dont_invalidate = true;
2718 if (fn)
2720 tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2721 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2723 else
2724 type = size_type_node;
2726 len = fold_convert_loc (loc, type, unshare_expr (srclen));
2727 len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2729 /* Disable warning for the transformed statement? */
2730 opt_code no_warning_opt = no_warning;
2732 if (const strinfo *chksi = si ? olddsi ? olddsi : dsi : NULL)
2734 no_warning_opt = check_bounds_or_overlap (stmt, chksi->ptr, si->ptr,
2735 NULL_TREE, len);
2736 if (no_warning_opt)
2737 suppress_warning (stmt, no_warning_opt);
2740 if (fn == NULL_TREE)
2741 return;
2743 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
2744 GSI_SAME_STMT);
2745 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2747 fprintf (dump_file, "Optimizing: ");
2748 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2750 if (gimple_call_num_args (stmt) == 2)
2751 success = update_gimple_call (&m_gsi, fn, 3, dst, src, len);
2752 else
2753 success = update_gimple_call (&m_gsi, fn, 4, dst, src, len,
2754 gimple_call_arg (stmt, 2));
2755 if (success)
2757 stmt = gsi_stmt (m_gsi);
2758 update_stmt (stmt);
2759 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2761 fprintf (dump_file, "into: ");
2762 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2764 /* Allow adjust_last_stmt to decrease this memcpy's size. */
2765 laststmt.stmt = stmt;
2766 laststmt.len = srclen;
2767 laststmt.stridx = dsi->idx;
2769 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2770 fprintf (dump_file, "not possible.\n");
2772 if (no_warning_opt)
2773 suppress_warning (stmt, no_warning_opt);
2776 /* Check the size argument to the built-in forms of stpncpy and strncpy
2777 for out-of-bounds offsets or overlapping access, and to see if the
2778 size argument is derived from a call to strlen() on the source argument,
2779 and if so, issue an appropriate warning. */
2781 void
2782 strlen_pass::handle_builtin_strncat (built_in_function)
2784 /* Same as stxncpy(). */
2785 handle_builtin_stxncpy_strncat (true);
2788 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2789 way. LEN can either be an integer expression, or a pointer (to char).
2790 When it is the latter (such as in recursive calls to self) it is
2791 assumed to be the argument in some call to strlen() whose relationship
2792 to SRC is being ascertained. */
2794 bool
2795 is_strlen_related_p (tree src, tree len)
2797 if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2798 && operand_equal_p (src, len, 0))
2799 return true;
2801 if (TREE_CODE (len) != SSA_NAME)
2802 return false;
2804 if (TREE_CODE (src) == SSA_NAME)
2806 gimple *srcdef = SSA_NAME_DEF_STMT (src);
2807 if (is_gimple_assign (srcdef))
2809 /* Handle bitwise AND used in conversions from wider size_t
2810 to narrower unsigned types. */
2811 tree_code code = gimple_assign_rhs_code (srcdef);
2812 if (code == BIT_AND_EXPR
2813 || code == NOP_EXPR)
2814 return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2816 return false;
2819 if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2821 /* If SRC is the result of a call to an allocation function
2822 or strlen, use the function's argument instead. */
2823 tree func = gimple_call_fndecl (srcdef);
2824 built_in_function code = DECL_FUNCTION_CODE (func);
2825 if (code == BUILT_IN_ALLOCA
2826 || code == BUILT_IN_ALLOCA_WITH_ALIGN
2827 || code == BUILT_IN_MALLOC
2828 || code == BUILT_IN_STRLEN)
2829 return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2831 /* FIXME: Handle other functions with attribute alloc_size. */
2832 return false;
2836 gimple *lendef = SSA_NAME_DEF_STMT (len);
2837 if (!lendef)
2838 return false;
2840 if (is_gimple_call (lendef))
2842 tree func = gimple_call_fndecl (lendef);
2843 if (!valid_builtin_call (lendef)
2844 || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2845 return false;
2847 tree arg = gimple_call_arg (lendef, 0);
2848 return is_strlen_related_p (src, arg);
2851 if (!is_gimple_assign (lendef))
2852 return false;
2854 tree_code code = gimple_assign_rhs_code (lendef);
2855 tree rhs1 = gimple_assign_rhs1 (lendef);
2856 tree rhstype = TREE_TYPE (rhs1);
2858 if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2859 || (INTEGRAL_TYPE_P (rhstype)
2860 && (code == BIT_AND_EXPR
2861 || code == NOP_EXPR)))
2863 /* Pointer plus (an integer), and truncation are considered among
2864 the (potentially) related expressions to strlen. */
2865 return is_strlen_related_p (src, rhs1);
2868 if (tree rhs2 = gimple_assign_rhs2 (lendef))
2870 /* Integer subtraction is considered strlen-related when both
2871 arguments are integers and second one is strlen-related. */
2872 rhstype = TREE_TYPE (rhs2);
2873 if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2874 return is_strlen_related_p (src, rhs2);
2877 return false;
2880 /* Called by handle_builtin_stxncpy_strncat and by
2881 gimple_fold_builtin_strncpy in gimple-fold.cc.
2882 Check to see if the specified bound is a) equal to the size of
2883 the destination DST and if so, b) if it's immediately followed by
2884 DST[CNT - 1] = '\0'. If a) holds and b) does not, warn. Otherwise,
2885 do nothing. Return true if diagnostic has been issued.
2887 The purpose is to diagnose calls to strncpy and stpncpy that do
2888 not nul-terminate the copy while allowing for the idiom where
2889 such a call is immediately followed by setting the last element
2890 to nul, as in:
2891 char a[32];
2892 strncpy (a, s, sizeof a);
2893 a[sizeof a - 1] = '\0';
2896 bool
2897 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2898 pointer_query *ptr_qry /* = NULL */)
2900 gimple *stmt = gsi_stmt (gsi);
2901 if (warning_suppressed_p (stmt, OPT_Wstringop_truncation))
2902 return false;
2904 wide_int cntrange[2];
2905 value_range r;
2906 if (!get_range_query (cfun)->range_of_expr (r, cnt)
2907 || r.varying_p ()
2908 || r.undefined_p ())
2909 return false;
2911 tree min, max;
2912 value_range_kind kind = get_legacy_range (r, min, max);
2913 cntrange[0] = wi::to_wide (min);
2914 cntrange[1] = wi::to_wide (max);
2915 if (kind == VR_ANTI_RANGE)
2917 wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2919 if (wi::ltu_p (cntrange[1], maxobjsize))
2921 cntrange[0] = cntrange[1] + 1;
2922 cntrange[1] = maxobjsize;
2924 else
2926 cntrange[1] = cntrange[0] - 1;
2927 cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2931 /* Negative value is the constant string length. If it's less than
2932 the lower bound there is no truncation. Avoid calling get_stridx()
2933 when ssa_ver_to_stridx is empty. That implies the caller isn't
2934 running under the control of this pass and ssa_ver_to_stridx hasn't
2935 been created yet. */
2936 int sidx = ssa_ver_to_stridx.length () ? get_stridx (src, stmt) : 0;
2937 if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
2938 return false;
2940 tree dst = gimple_call_arg (stmt, 0);
2941 tree dstdecl = dst;
2942 if (TREE_CODE (dstdecl) == ADDR_EXPR)
2943 dstdecl = TREE_OPERAND (dstdecl, 0);
2945 tree ref = NULL_TREE;
2947 if (!sidx)
2949 /* If the source is a non-string return early to avoid warning
2950 for possible truncation (if the truncation is certain SIDX
2951 is non-zero). */
2952 tree srcdecl = gimple_call_arg (stmt, 1);
2953 if (TREE_CODE (srcdecl) == ADDR_EXPR)
2954 srcdecl = TREE_OPERAND (srcdecl, 0);
2955 if (get_attr_nonstring_decl (srcdecl, &ref))
2956 return false;
2959 /* Likewise, if the destination refers to an array/pointer declared
2960 nonstring return early. */
2961 if (get_attr_nonstring_decl (dstdecl, &ref))
2962 return false;
2964 /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2965 avoid the truncation warning. */
2966 gsi_next_nondebug (&gsi);
2967 gimple *next_stmt = gsi_stmt (gsi);
2968 if (!next_stmt)
2970 /* When there is no statement in the same basic block check
2971 the immediate successor block. */
2972 if (basic_block bb = gimple_bb (stmt))
2974 if (single_succ_p (bb))
2976 /* For simplicity, ignore blocks with multiple outgoing
2977 edges for now and only consider successor blocks along
2978 normal edges. */
2979 edge e = EDGE_SUCC (bb, 0);
2980 if (!(e->flags & EDGE_ABNORMAL))
2982 gsi = gsi_start_bb (e->dest);
2983 next_stmt = gsi_stmt (gsi);
2984 if (next_stmt && is_gimple_debug (next_stmt))
2986 gsi_next_nondebug (&gsi);
2987 next_stmt = gsi_stmt (gsi);
2994 if (next_stmt && is_gimple_assign (next_stmt))
2996 tree lhs = gimple_assign_lhs (next_stmt);
2997 tree_code code = TREE_CODE (lhs);
2998 if (code == ARRAY_REF || code == MEM_REF)
2999 lhs = TREE_OPERAND (lhs, 0);
3001 tree func = gimple_call_fndecl (stmt);
3002 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
3004 tree ret = gimple_call_lhs (stmt);
3005 if (ret && operand_equal_p (ret, lhs, 0))
3006 return false;
3009 /* Determine the base address and offset of the reference,
3010 ignoring the innermost array index. */
3011 if (TREE_CODE (ref) == ARRAY_REF)
3012 ref = TREE_OPERAND (ref, 0);
3014 poly_int64 dstoff;
3015 tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
3017 poly_int64 lhsoff;
3018 tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
3019 if (lhsbase
3020 && dstbase
3021 && known_eq (dstoff, lhsoff)
3022 && operand_equal_p (dstbase, lhsbase, 0))
3023 return false;
3026 int prec = TYPE_PRECISION (TREE_TYPE (cnt));
3027 wide_int lenrange[2];
3028 if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
3030 lenrange[0] = (sisrc->nonzero_chars
3031 && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
3032 ? wi::to_wide (sisrc->nonzero_chars)
3033 : wi::zero (prec));
3034 lenrange[1] = lenrange[0];
3036 else if (sidx < 0)
3037 lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
3038 else
3040 c_strlen_data lendata = { };
3041 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3042 to have it set to the length of the longest string in a PHI. */
3043 lendata.maxbound = src;
3044 get_range_strlen (src, &lendata, /* eltsize = */1);
3045 if (TREE_CODE (lendata.minlen) == INTEGER_CST
3046 && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3048 /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3049 which stores the length of the shortest known string. */
3050 if (integer_all_onesp (lendata.maxlen))
3051 lenrange[0] = wi::shwi (0, prec);
3052 else
3053 lenrange[0] = wi::to_wide (lendata.minlen, prec);
3054 lenrange[1] = wi::to_wide (lendata.maxbound, prec);
3056 else
3058 lenrange[0] = wi::shwi (0, prec);
3059 lenrange[1] = wi::shwi (-1, prec);
3063 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3064 tree func = gimple_call_fndecl (stmt);
3066 if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
3068 /* If the longest source string is shorter than the lower bound
3069 of the specified count the copy is definitely nul-terminated. */
3070 if (wi::ltu_p (lenrange[1], cntrange[0]))
3071 return false;
3073 if (wi::neg_p (lenrange[1]))
3075 /* The length of one of the strings is unknown but at least
3076 one has non-zero length and that length is stored in
3077 LENRANGE[1]. Swap the bounds to force a "may be truncated"
3078 warning below. */
3079 lenrange[1] = lenrange[0];
3080 lenrange[0] = wi::shwi (0, prec);
3083 /* Set to true for strncat whose bound is derived from the length
3084 of the destination (the expected usage pattern). */
3085 bool cat_dstlen_bounded = false;
3086 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
3087 cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3089 if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3090 return warning_n (callloc, OPT_Wstringop_truncation,
3091 cntrange[0].to_uhwi (),
3092 "%qD output truncated before terminating "
3093 "nul copying %E byte from a string of the "
3094 "same length",
3095 "%qD output truncated before terminating nul "
3096 "copying %E bytes from a string of the same "
3097 "length",
3098 func, cnt);
3099 else if (!cat_dstlen_bounded)
3101 if (wi::geu_p (lenrange[0], cntrange[1]))
3103 /* The shortest string is longer than the upper bound of
3104 the count so the truncation is certain. */
3105 if (cntrange[0] == cntrange[1])
3106 return warning_n (callloc, OPT_Wstringop_truncation,
3107 cntrange[0].to_uhwi (),
3108 "%qD output truncated copying %E byte "
3109 "from a string of length %wu",
3110 "%qD output truncated copying %E bytes "
3111 "from a string of length %wu",
3112 func, cnt, lenrange[0].to_uhwi ());
3114 return warning_at (callloc, OPT_Wstringop_truncation,
3115 "%qD output truncated copying between %wu "
3116 "and %wu bytes from a string of length %wu",
3117 func, cntrange[0].to_uhwi (),
3118 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3120 else if (wi::geu_p (lenrange[1], cntrange[1]))
3122 /* The longest string is longer than the upper bound of
3123 the count so the truncation is possible. */
3124 if (cntrange[0] == cntrange[1])
3125 return warning_n (callloc, OPT_Wstringop_truncation,
3126 cntrange[0].to_uhwi (),
3127 "%qD output may be truncated copying %E "
3128 "byte from a string of length %wu",
3129 "%qD output may be truncated copying %E "
3130 "bytes from a string of length %wu",
3131 func, cnt, lenrange[1].to_uhwi ());
3133 return warning_at (callloc, OPT_Wstringop_truncation,
3134 "%qD output may be truncated copying between "
3135 "%wu and %wu bytes from a string of length %wu",
3136 func, cntrange[0].to_uhwi (),
3137 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3141 if (!cat_dstlen_bounded
3142 && cntrange[0] != cntrange[1]
3143 && wi::leu_p (cntrange[0], lenrange[0])
3144 && wi::leu_p (cntrange[1], lenrange[0] + 1))
3146 /* If the source (including the terminating nul) is longer than
3147 the lower bound of the specified count but shorter than the
3148 upper bound the copy may (but need not) be truncated. */
3149 return warning_at (callloc, OPT_Wstringop_truncation,
3150 "%qD output may be truncated copying between "
3151 "%wu and %wu bytes from a string of length %wu",
3152 func, cntrange[0].to_uhwi (),
3153 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3157 access_ref aref;
3158 if (tree dstsize = compute_objsize (dst, stmt, 1, &aref, ptr_qry))
3160 /* The source length is unknown. Try to determine the destination
3161 size and see if it matches the specified bound. If not, bail.
3162 Otherwise go on to see if it should be diagnosed for possible
3163 truncation. */
3164 if (!dstsize)
3165 return false;
3167 if (wi::to_wide (dstsize) != cntrange[1])
3168 return false;
3170 /* Avoid warning for strncpy(a, b, N) calls where the following
3171 equalities hold:
3172 N == sizeof a && N == sizeof b */
3173 if (tree srcsize = compute_objsize (src, stmt, 1, &aref, ptr_qry))
3174 if (wi::to_wide (srcsize) == cntrange[1])
3175 return false;
3177 if (cntrange[0] == cntrange[1])
3178 return warning_at (callloc, OPT_Wstringop_truncation,
3179 "%qD specified bound %E equals destination size",
3180 func, cnt);
3183 return false;
3186 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3187 strncat, for out-of-bounds offsets or overlapping access, and to see
3188 if the size is derived from calling strlen() on the source argument,
3189 and if so, issue the appropriate warning.
3190 APPEND_P is true for strncat. */
3192 void
3193 strlen_pass::handle_builtin_stxncpy_strncat (bool append_p)
3195 if (!strlen_to_stridx)
3196 return;
3198 gimple *stmt = gsi_stmt (m_gsi);
3200 tree dst = gimple_call_arg (stmt, 0);
3201 tree src = gimple_call_arg (stmt, 1);
3202 tree len = gimple_call_arg (stmt, 2);
3203 /* An upper bound of the size of the destination. */
3204 tree dstsize = NULL_TREE;
3205 /* The length of the destination and source strings (plus 1 for those
3206 whose FULL_STRING_P is set, i.e., whose length is exact rather than
3207 a lower bound). */
3208 tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3210 int didx = get_stridx (dst, stmt);
3211 if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3213 /* Compute the size of the destination string including the nul
3214 if it is known to be nul-terminated. */
3215 if (sidst->nonzero_chars)
3217 if (sidst->full_string_p)
3219 /* String is known to be nul-terminated. */
3220 tree type = TREE_TYPE (sidst->nonzero_chars);
3221 dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3222 build_int_cst (type, 1));
3224 else
3225 dstlenp1 = sidst->nonzero_chars;
3227 else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3229 gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3230 dstsize = gimple_call_alloc_size (def_stmt);
3233 dst = sidst->ptr;
3236 int sidx = get_stridx (src, stmt);
3237 strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3238 if (sisrc)
3240 /* strncat() and strncpy() can modify the source string by writing
3241 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3242 clear. */
3244 /* Compute the size of the source string including the terminating
3245 nul if its known to be nul-terminated. */
3246 if (sisrc->nonzero_chars)
3248 if (sisrc->full_string_p)
3250 tree type = TREE_TYPE (sisrc->nonzero_chars);
3251 srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3252 build_int_cst (type, 1));
3254 else
3255 srclenp1 = sisrc->nonzero_chars;
3258 src = sisrc->ptr;
3260 else
3261 srclenp1 = NULL_TREE;
3263 opt_code opt = check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1);
3264 if (opt != no_warning)
3266 suppress_warning (stmt, opt);
3267 return;
3270 /* If the length argument was computed from strlen(S) for some string
3271 S retrieve the strinfo index for the string (PSS->FIRST) along with
3272 the location of the strlen() call (PSS->SECOND). */
3273 stridx_strlenloc *pss = strlen_to_stridx->get (len);
3274 if (!pss || pss->first <= 0)
3276 if (maybe_diag_stxncpy_trunc (m_gsi, src, len))
3277 suppress_warning (stmt, OPT_Wstringop_truncation);
3279 return;
3282 /* Retrieve the strinfo data for the string S that LEN was computed
3283 from as some function F of strlen (S) (i.e., LEN need not be equal
3284 to strlen(S)). */
3285 strinfo *silen = get_strinfo (pss->first);
3287 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3289 tree func = gimple_call_fndecl (stmt);
3291 bool warned = false;
3293 /* When -Wstringop-truncation is set, try to determine truncation
3294 before diagnosing possible overflow. Truncation is implied by
3295 the LEN argument being equal to strlen(SRC), regardless of
3296 whether its value is known. Otherwise, when appending, or
3297 when copying into a destination of known size, issue the more
3298 generic -Wstringop-overflow which triggers for LEN arguments
3299 that in any meaningful way depend on strlen(SRC). */
3300 if (!append_p
3301 && sisrc == silen
3302 && is_strlen_related_p (src, len)
3303 && warning_at (callloc, OPT_Wstringop_truncation,
3304 "%qD output truncated before terminating nul "
3305 "copying as many bytes from a string as its length",
3306 func))
3307 warned = true;
3308 else if ((append_p || !dstsize || len == dstlenp1)
3309 && silen && is_strlen_related_p (src, silen->ptr))
3311 /* Issue -Wstringop-overflow when appending or when writing into
3312 a destination of a known size. Otherwise, when copying into
3313 a destination of an unknown size, it's truncation. */
3314 opt_code opt = (append_p || dstsize
3315 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3316 warned = warning_at (callloc, opt,
3317 "%qD specified bound depends on the length "
3318 "of the source argument",
3319 func);
3321 if (warned)
3323 location_t strlenloc = pss->second;
3324 if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3325 inform (strlenloc, "length computed here");
3329 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3330 If strlen of the second argument is known and length of the third argument
3331 is that plus one, strlen of the first argument is the same after this
3332 call. Uses RVALS to determine range information. */
3334 void
3335 strlen_pass::handle_builtin_memcpy (built_in_function bcode)
3337 tree lhs, oldlen, newlen;
3338 gimple *stmt = gsi_stmt (m_gsi);
3339 strinfo *si, *dsi;
3341 tree len = gimple_call_arg (stmt, 2);
3342 tree src = gimple_call_arg (stmt, 1);
3343 tree dst = gimple_call_arg (stmt, 0);
3345 int didx = get_stridx (dst, stmt);
3346 strinfo *olddsi = NULL;
3347 if (didx > 0)
3348 olddsi = get_strinfo (didx);
3349 else if (didx < 0)
3350 return;
3352 if (olddsi != NULL
3353 && !integer_zerop (len))
3355 maybe_warn_overflow (stmt, false, len, olddsi, false, true);
3356 if (tree_fits_uhwi_p (len))
3357 adjust_last_stmt (olddsi, stmt, false);
3360 int idx = get_stridx (src, stmt);
3361 if (idx == 0)
3362 return;
3364 bool full_string_p;
3365 if (idx > 0)
3367 gimple *def_stmt;
3369 /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3370 is known. */
3371 si = get_strinfo (idx);
3372 if (si == NULL || si->nonzero_chars == NULL_TREE)
3373 return;
3374 if (TREE_CODE (len) == INTEGER_CST
3375 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3377 if (tree_int_cst_le (len, si->nonzero_chars))
3379 /* Copying LEN nonzero characters, where LEN is constant. */
3380 newlen = len;
3381 full_string_p = false;
3383 else
3385 /* Copying the whole of the analyzed part of SI. */
3386 newlen = si->nonzero_chars;
3387 full_string_p = si->full_string_p;
3390 else
3392 if (!si->full_string_p)
3393 return;
3394 if (TREE_CODE (len) != SSA_NAME)
3395 return;
3396 def_stmt = SSA_NAME_DEF_STMT (len);
3397 if (!is_gimple_assign (def_stmt)
3398 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3399 || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3400 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3401 return;
3402 /* Copying variable-length string SI (and no more). */
3403 newlen = si->nonzero_chars;
3404 full_string_p = true;
3407 else
3409 si = NULL;
3410 /* Handle memcpy (x, "abcd", 5) or
3411 memcpy (x, "abc\0uvw", 7). */
3412 if (!tree_fits_uhwi_p (len))
3413 return;
3415 unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3416 unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3417 newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3418 full_string_p = clen > nonzero_chars;
3421 if (!full_string_p
3422 && olddsi
3423 && olddsi->nonzero_chars
3424 && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3425 && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3427 /* The SRC substring being written strictly overlaps
3428 a subsequence of the existing string OLDDSI. */
3429 newlen = olddsi->nonzero_chars;
3430 full_string_p = olddsi->full_string_p;
3433 if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3434 adjust_last_stmt (olddsi, stmt, false);
3436 if (didx == 0)
3438 didx = new_stridx (dst);
3439 if (didx == 0)
3440 return;
3442 oldlen = NULL_TREE;
3443 if (olddsi != NULL)
3445 dsi = unshare_strinfo (olddsi);
3446 oldlen = olddsi->nonzero_chars;
3447 dsi->nonzero_chars = newlen;
3448 dsi->full_string_p = full_string_p;
3449 /* Break the chain, so adjust_related_strinfo on later pointers in
3450 the chain won't adjust this one anymore. */
3451 dsi->next = 0;
3452 dsi->stmt = NULL;
3453 dsi->endptr = NULL_TREE;
3455 else
3457 dsi = new_strinfo (dst, didx, newlen, full_string_p);
3458 set_strinfo (didx, dsi);
3459 find_equal_ptrs (dst, didx);
3461 dsi->writable = true;
3462 dsi->dont_invalidate = true;
3463 if (olddsi != NULL)
3465 tree adj = NULL_TREE;
3466 location_t loc = gimple_location (stmt);
3467 if (oldlen == NULL_TREE)
3469 else if (integer_zerop (oldlen))
3470 adj = newlen;
3471 else if (TREE_CODE (oldlen) == INTEGER_CST
3472 || TREE_CODE (newlen) == INTEGER_CST)
3473 adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3474 fold_convert_loc (loc, TREE_TYPE (newlen),
3475 oldlen));
3476 if (adj != NULL_TREE)
3477 adjust_related_strinfos (loc, dsi, adj);
3478 else
3479 dsi->prev = 0;
3481 /* memcpy src may not overlap dst, so src doesn't need to be
3482 invalidated either. */
3483 if (si != NULL)
3484 si->dont_invalidate = true;
3486 if (full_string_p)
3488 lhs = gimple_call_lhs (stmt);
3489 switch (bcode)
3491 case BUILT_IN_MEMCPY:
3492 case BUILT_IN_MEMCPY_CHK:
3493 /* Allow adjust_last_stmt to decrease this memcpy's size. */
3494 laststmt.stmt = stmt;
3495 laststmt.len = dsi->nonzero_chars;
3496 laststmt.stridx = dsi->idx;
3497 if (lhs)
3498 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3499 break;
3500 case BUILT_IN_MEMPCPY:
3501 case BUILT_IN_MEMPCPY_CHK:
3502 break;
3503 default:
3504 gcc_unreachable ();
3509 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3510 If strlen of the second argument is known, strlen of the first argument
3511 is increased by the length of the second argument. Furthermore, attempt
3512 to convert it to memcpy/strcpy if the length of the first argument
3513 is known. */
3515 void
3516 strlen_pass::handle_builtin_strcat (built_in_function bcode)
3518 int idx, didx;
3519 tree srclen, args, type, fn, objsz, endptr;
3520 bool success;
3521 gimple *stmt = gsi_stmt (m_gsi);
3522 strinfo *si, *dsi;
3523 location_t loc = gimple_location (stmt);
3525 tree src = gimple_call_arg (stmt, 1);
3526 tree dst = gimple_call_arg (stmt, 0);
3528 /* Bail if the source is the same as destination. It will be diagnosed
3529 elsewhere. */
3530 if (operand_equal_p (src, dst, 0))
3531 return;
3533 tree lhs = gimple_call_lhs (stmt);
3535 didx = get_stridx (dst, stmt);
3536 if (didx < 0)
3537 return;
3539 dsi = NULL;
3540 if (didx > 0)
3541 dsi = get_strinfo (didx);
3543 srclen = NULL_TREE;
3544 si = NULL;
3545 idx = get_stridx (src, stmt);
3546 if (idx < 0)
3547 srclen = build_int_cst (size_type_node, ~idx);
3548 else if (idx > 0)
3550 si = get_strinfo (idx);
3551 if (si != NULL)
3552 srclen = get_string_length (si);
3555 /* Disable warning for the transformed statement? */
3556 opt_code no_warning_opt = no_warning;
3558 if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3561 /* The concatenation always involves copying at least one byte
3562 (the terminating nul), even if the source string is empty.
3563 If the source is unknown assume it's one character long and
3564 used that as both sizes. */
3565 tree slen = srclen;
3566 if (slen)
3568 tree type = TREE_TYPE (slen);
3569 slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3572 tree sptr = si && si->ptr ? si->ptr : src;
3573 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE,
3574 slen);
3575 if (no_warning_opt)
3576 suppress_warning (stmt, no_warning_opt);
3579 /* strcat (p, q) can be transformed into
3580 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3581 with length endptr - p if we need to compute the length
3582 later on. Don't do this transformation if we don't need
3583 it. */
3584 if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3586 if (didx == 0)
3588 didx = new_stridx (dst);
3589 if (didx == 0)
3590 return;
3592 if (dsi == NULL)
3594 dsi = new_strinfo (dst, didx, NULL_TREE, false);
3595 set_strinfo (didx, dsi);
3596 find_equal_ptrs (dst, didx);
3598 else
3600 dsi = unshare_strinfo (dsi);
3601 dsi->nonzero_chars = NULL_TREE;
3602 dsi->full_string_p = false;
3603 dsi->next = 0;
3604 dsi->endptr = NULL_TREE;
3606 dsi->writable = true;
3607 dsi->stmt = stmt;
3608 dsi->dont_invalidate = true;
3610 return;
3613 tree dstlen = dsi->nonzero_chars;
3614 endptr = dsi->endptr;
3616 dsi = unshare_strinfo (dsi);
3617 dsi->endptr = NULL_TREE;
3618 dsi->stmt = NULL;
3619 dsi->writable = true;
3621 if (srclen != NULL_TREE)
3623 dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3624 TREE_TYPE (dsi->nonzero_chars),
3625 dsi->nonzero_chars, srclen);
3626 gcc_assert (dsi->full_string_p);
3627 adjust_related_strinfos (loc, dsi, srclen);
3628 dsi->dont_invalidate = true;
3630 else
3632 dsi->nonzero_chars = NULL;
3633 dsi->full_string_p = false;
3634 if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3635 dsi->dont_invalidate = true;
3638 if (si != NULL)
3639 /* strcat src may not overlap dst, so src doesn't need to be
3640 invalidated either. */
3641 si->dont_invalidate = true;
3643 /* For now. Could remove the lhs from the call and add
3644 lhs = dst; afterwards. */
3645 if (lhs)
3646 return;
3648 fn = NULL_TREE;
3649 objsz = NULL_TREE;
3650 switch (bcode)
3652 case BUILT_IN_STRCAT:
3653 if (srclen != NULL_TREE)
3654 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3655 else
3656 fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3657 break;
3658 case BUILT_IN_STRCAT_CHK:
3659 if (srclen != NULL_TREE)
3660 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3661 else
3662 fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3663 objsz = gimple_call_arg (stmt, 2);
3664 break;
3665 default:
3666 gcc_unreachable ();
3669 if (fn == NULL_TREE)
3670 return;
3672 if (dsi && dstlen)
3674 tree type = TREE_TYPE (dstlen);
3676 /* Compute the size of the source sequence, including the nul. */
3677 tree srcsize = srclen ? srclen : size_zero_node;
3678 tree one = build_int_cst (type, 1);
3679 srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3680 tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3681 tree sptr = si && si->ptr ? si->ptr : src;
3683 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, dstsize,
3684 srcsize);
3685 if (no_warning_opt)
3686 suppress_warning (stmt, no_warning_opt);
3689 tree len = NULL_TREE;
3690 if (srclen != NULL_TREE)
3692 args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3693 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3695 len = fold_convert_loc (loc, type, unshare_expr (srclen));
3696 len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3697 build_int_cst (type, 1));
3698 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
3699 GSI_SAME_STMT);
3701 if (endptr)
3702 dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3703 else
3704 dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3705 fold_convert_loc (loc, sizetype,
3706 unshare_expr (dstlen)));
3707 dst = force_gimple_operand_gsi (&m_gsi, dst, true, NULL_TREE, true,
3708 GSI_SAME_STMT);
3709 if (objsz)
3711 objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3712 fold_convert_loc (loc, TREE_TYPE (objsz),
3713 unshare_expr (dstlen)));
3714 objsz = force_gimple_operand_gsi (&m_gsi, objsz, true, NULL_TREE, true,
3715 GSI_SAME_STMT);
3717 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3719 fprintf (dump_file, "Optimizing: ");
3720 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3722 if (srclen != NULL_TREE)
3723 success = update_gimple_call (&m_gsi, fn, 3 + (objsz != NULL_TREE),
3724 dst, src, len, objsz);
3725 else
3726 success = update_gimple_call (&m_gsi, fn, 2 + (objsz != NULL_TREE),
3727 dst, src, objsz);
3728 if (success)
3730 stmt = gsi_stmt (m_gsi);
3731 update_stmt (stmt);
3732 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3734 fprintf (dump_file, "into: ");
3735 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3737 /* If srclen == NULL, note that current string length can be
3738 computed by transforming this strcpy into stpcpy. */
3739 if (srclen == NULL_TREE && dsi->dont_invalidate)
3740 dsi->stmt = stmt;
3741 adjust_last_stmt (dsi, stmt, true);
3742 if (srclen != NULL_TREE)
3744 laststmt.stmt = stmt;
3745 laststmt.len = srclen;
3746 laststmt.stridx = dsi->idx;
3749 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3750 fprintf (dump_file, "not possible.\n");
3752 if (no_warning_opt)
3753 suppress_warning (stmt, no_warning_opt);
3756 /* Handle a call to an allocation function like alloca, malloc or calloc,
3757 or an ordinary allocation function declared with attribute alloc_size. */
3759 void
3760 strlen_pass::handle_alloc_call (built_in_function bcode)
3762 gimple *stmt = gsi_stmt (m_gsi);
3763 tree lhs = gimple_call_lhs (stmt);
3764 if (lhs == NULL_TREE)
3765 return;
3767 gcc_assert (get_stridx (lhs, stmt) == 0);
3768 int idx = new_stridx (lhs);
3769 tree length = NULL_TREE;
3770 if (bcode == BUILT_IN_CALLOC)
3771 length = build_int_cst (size_type_node, 0);
3772 strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3773 if (bcode == BUILT_IN_CALLOC)
3775 /* Only set STMT for calloc and malloc. */
3776 si->stmt = stmt;
3777 /* Only set ENDPTR for calloc. */
3778 si->endptr = lhs;
3780 else if (bcode == BUILT_IN_MALLOC)
3781 si->stmt = stmt;
3783 /* Set ALLOC is set for all allocation functions. */
3784 si->alloc = stmt;
3785 set_strinfo (idx, si);
3786 si->writable = true;
3787 si->dont_invalidate = true;
3790 /* Handle a call to memset.
3791 After a call to calloc, memset(,0,) is unnecessary.
3792 memset(malloc(n),0,n) is calloc(n,1).
3793 return true when the call is transformed, false otherwise.
3794 When nonnull uses RVALS to determine range information. */
3796 bool
3797 strlen_pass::handle_builtin_memset (bool *zero_write)
3799 gimple *memset_stmt = gsi_stmt (m_gsi);
3800 tree ptr = gimple_call_arg (memset_stmt, 0);
3801 tree memset_val = gimple_call_arg (memset_stmt, 1);
3802 tree memset_size = gimple_call_arg (memset_stmt, 2);
3804 /* Set to the non-constant offset added to PTR. */
3805 wide_int offrng[2];
3806 int idx1 = get_stridx (ptr, memset_stmt, offrng, ptr_qry.rvals);
3807 if (idx1 == 0
3808 && TREE_CODE (memset_val) == INTEGER_CST
3809 && ((TREE_CODE (memset_size) == INTEGER_CST
3810 && !integer_zerop (memset_size))
3811 || TREE_CODE (memset_size) == SSA_NAME))
3813 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << CHAR_TYPE_SIZE) - 1;
3814 bool full_string_p = (wi::to_wide (memset_val) & mask) == 0;
3816 /* We only handle symbolic lengths when writing non-zero values. */
3817 if (full_string_p && TREE_CODE (memset_size) != INTEGER_CST)
3818 return false;
3820 idx1 = new_stridx (ptr);
3821 if (idx1 == 0)
3822 return false;
3823 tree newlen;
3824 if (full_string_p)
3825 newlen = build_int_cst (size_type_node, 0);
3826 else if (TREE_CODE (memset_size) == INTEGER_CST)
3827 newlen = fold_convert (size_type_node, memset_size);
3828 else
3829 newlen = memset_size;
3831 strinfo *dsi = new_strinfo (ptr, idx1, newlen, full_string_p);
3832 set_strinfo (idx1, dsi);
3833 find_equal_ptrs (ptr, idx1);
3834 dsi->dont_invalidate = true;
3835 dsi->writable = true;
3836 return false;
3839 if (idx1 <= 0)
3840 return false;
3841 strinfo *si1 = get_strinfo (idx1);
3842 if (!si1)
3843 return false;
3844 gimple *alloc_stmt = si1->alloc;
3845 if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3846 return false;
3847 tree callee1 = gimple_call_fndecl (alloc_stmt);
3848 if (!valid_builtin_call (alloc_stmt))
3849 return false;
3850 tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3852 /* Check for overflow. */
3853 maybe_warn_overflow (memset_stmt, false, memset_size, NULL, false, true);
3855 /* Bail when there is no statement associated with the destination
3856 (the statement may be null even when SI1->ALLOC is not). */
3857 if (!si1->stmt)
3858 return false;
3860 /* Avoid optimizing if store is at a variable offset from the beginning
3861 of the allocated object. */
3862 if (offrng[0] != 0 || offrng[0] != offrng[1])
3863 return false;
3865 /* Bail when the call writes a non-zero value. */
3866 if (!integer_zerop (memset_val))
3867 return false;
3869 /* Let the caller know the memset call cleared the destination. */
3870 *zero_write = true;
3872 enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3873 if (code1 == BUILT_IN_CALLOC)
3874 /* Not touching alloc_stmt */ ;
3875 else if (code1 == BUILT_IN_MALLOC
3876 && operand_equal_p (memset_size, alloc_size, 0))
3878 /* Replace the malloc + memset calls with calloc. */
3879 gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3880 update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3881 alloc_size, build_one_cst (size_type_node));
3882 si1->nonzero_chars = build_int_cst (size_type_node, 0);
3883 si1->full_string_p = true;
3884 si1->stmt = gsi_stmt (gsi1);
3886 else
3887 return false;
3888 tree lhs = gimple_call_lhs (memset_stmt);
3889 unlink_stmt_vdef (memset_stmt);
3890 if (lhs)
3892 gimple *assign = gimple_build_assign (lhs, ptr);
3893 gsi_replace (&m_gsi, assign, false);
3895 else
3897 gsi_remove (&m_gsi, true);
3898 release_defs (memset_stmt);
3901 return true;
3904 /* Return first such statement if RES is used in statements testing its
3905 equality to zero, and null otherwise. If EXCLUSIVE is true, return
3906 nonnull if and only RES is used in such expressions exclusively and
3907 in none other. */
3909 gimple *
3910 use_in_zero_equality (tree res, bool exclusive)
3912 gimple *first_use = NULL;
3914 use_operand_p use_p;
3915 imm_use_iterator iter;
3917 FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3919 gimple *use_stmt = USE_STMT (use_p);
3921 if (is_gimple_debug (use_stmt))
3922 continue;
3924 if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
3926 tree_code code = gimple_assign_rhs_code (use_stmt);
3927 if (code == COND_EXPR)
3929 tree cond_expr = gimple_assign_rhs1 (use_stmt);
3930 if ((TREE_CODE (cond_expr) != EQ_EXPR
3931 && (TREE_CODE (cond_expr) != NE_EXPR))
3932 || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3934 if (exclusive)
3935 return NULL;
3936 continue;
3939 else if (code == EQ_EXPR || code == NE_EXPR)
3941 if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
3943 if (exclusive)
3944 return NULL;
3945 continue;
3948 else if (exclusive)
3949 return NULL;
3950 else
3951 continue;
3953 else if (gimple_code (use_stmt) == GIMPLE_COND)
3955 tree_code code = gimple_cond_code (use_stmt);
3956 if ((code != EQ_EXPR && code != NE_EXPR)
3957 || !integer_zerop (gimple_cond_rhs (use_stmt)))
3959 if (exclusive)
3960 return NULL;
3961 continue;
3964 else if (exclusive)
3965 return NULL;
3966 else
3967 continue;
3969 if (!first_use)
3970 first_use = use_stmt;
3973 return first_use;
3976 /* Handle a call to memcmp. We try to handle small comparisons by
3977 converting them to load and compare, and replacing the call to memcmp
3978 with a __builtin_memcmp_eq call where possible.
3979 return true when call is transformed, return false otherwise. */
3981 bool
3982 strlen_pass::handle_builtin_memcmp ()
3984 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
3985 tree res = gimple_call_lhs (stmt);
3987 if (!res || !use_in_zero_equality (res))
3988 return false;
3990 tree arg1 = gimple_call_arg (stmt, 0);
3991 tree arg2 = gimple_call_arg (stmt, 1);
3992 tree len = gimple_call_arg (stmt, 2);
3993 unsigned HOST_WIDE_INT leni;
3995 if (tree_fits_uhwi_p (len)
3996 && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
3997 && pow2p_hwi (leni))
3999 leni *= CHAR_TYPE_SIZE;
4000 unsigned align1 = get_pointer_alignment (arg1);
4001 unsigned align2 = get_pointer_alignment (arg2);
4002 unsigned align = MIN (align1, align2);
4003 scalar_int_mode mode;
4004 if (int_mode_for_size (leni, 1).exists (&mode)
4005 && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
4007 location_t loc = gimple_location (stmt);
4008 tree type, off;
4009 type = build_nonstandard_integer_type (leni, 1);
4010 gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
4011 tree ptrtype = build_pointer_type_for_mode (char_type_node,
4012 ptr_mode, true);
4013 off = build_int_cst (ptrtype, 0);
4014 arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
4015 arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
4016 tree tem1 = fold_const_aggregate_ref (arg1);
4017 if (tem1)
4018 arg1 = tem1;
4019 tree tem2 = fold_const_aggregate_ref (arg2);
4020 if (tem2)
4021 arg2 = tem2;
4022 res = fold_convert_loc (loc, TREE_TYPE (res),
4023 fold_build2_loc (loc, NE_EXPR,
4024 boolean_type_node,
4025 arg1, arg2));
4026 gimplify_and_update_call_from_tree (&m_gsi, res);
4027 return true;
4031 gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
4032 return true;
4035 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
4036 of the string(s) referenced by ARG if it can be determined.
4037 If the length cannot be determined, sets *SIZE to the size of
4038 the array the string is stored in, if any. If no such array is
4039 known, sets *SIZE to -1. When the strings are nul-terminated sets
4040 *NULTERM to true, otherwise to false. When nonnull uses RVALS to
4041 determine range information. Returns true on success. */
4043 bool
4044 strlen_pass::get_len_or_size (gimple *stmt, tree arg, int idx,
4045 unsigned HOST_WIDE_INT lenrng[2],
4046 unsigned HOST_WIDE_INT *size, bool *nulterm)
4048 /* Invalidate. */
4049 *size = HOST_WIDE_INT_M1U;
4051 if (idx < 0)
4053 /* IDX is the inverted constant string length. */
4054 lenrng[0] = ~idx;
4055 lenrng[1] = lenrng[0];
4056 *nulterm = true;
4057 return true;
4060 /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
4061 possible length + 1. */
4062 lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
4064 if (strinfo *si = idx ? get_strinfo (idx) : NULL)
4066 /* FIXME: Handle all this in_range_strlen_dynamic. */
4067 if (!si->nonzero_chars)
4069 else if (tree_fits_uhwi_p (si->nonzero_chars))
4071 lenrng[0] = tree_to_uhwi (si->nonzero_chars);
4072 *nulterm = si->full_string_p;
4073 /* Set the upper bound only if the string is known to be
4074 nul-terminated, otherwise leave it at maximum + 1. */
4075 if (*nulterm)
4076 lenrng[1] = lenrng[0];
4078 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4080 value_range r;
4081 if (get_range_query (cfun)->range_of_expr (r, si->nonzero_chars)
4082 && !r.undefined_p ()
4083 && !r.varying_p ())
4085 lenrng[0] = r.lower_bound ().to_uhwi ();
4086 lenrng[1] = r.upper_bound ().to_uhwi ();
4087 *nulterm = si->full_string_p;
4092 if (lenrng[0] != HOST_WIDE_INT_MAX)
4093 return true;
4095 /* Compute the minimum and maximum real or possible lengths. */
4096 c_strlen_data lendata = { };
4097 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4098 to have it set to the length of the longest string in a PHI. */
4099 lendata.maxbound = arg;
4100 get_range_strlen_dynamic (arg, stmt, &lendata, ptr_qry);
4102 unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4103 if (tree_fits_uhwi_p (lendata.maxbound)
4104 && !integer_all_onesp (lendata.maxbound))
4105 maxbound = tree_to_uhwi (lendata.maxbound);
4107 if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4109 unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4110 unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4112 /* The longest string in this data model. */
4113 const unsigned HOST_WIDE_INT lenmax
4114 = tree_to_uhwi (max_object_size ()) - 2;
4116 if (maxbound == HOST_WIDE_INT_M1U)
4118 lenrng[0] = minlen;
4119 lenrng[1] = maxlen;
4120 *nulterm = minlen == maxlen;
4122 else if (maxlen < lenmax)
4124 *size = maxbound + 1;
4125 *nulterm = false;
4127 else
4128 return false;
4130 return true;
4133 if (maxbound != HOST_WIDE_INT_M1U
4134 && lendata.maxlen
4135 && !integer_all_onesp (lendata.maxlen))
4137 /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4138 of the longest string based on the sizes of the arrays referenced
4139 by ARG. */
4140 *size = maxbound + 1;
4141 *nulterm = false;
4142 return true;
4145 return false;
4148 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4149 the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4150 for a sufficiently large BOUND). If the result is based on the length
4151 of one string being greater than the longest string that would fit in
4152 the array pointer to by the argument, set *PLEN and *PSIZE to
4153 the corresponding length (or its complement when the string is known
4154 to be at least as long and need not be nul-terminated) and size.
4155 Otherwise return null. */
4157 tree
4158 strlen_pass::strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
4159 tree arg2, int idx2,
4160 unsigned HOST_WIDE_INT bound,
4161 unsigned HOST_WIDE_INT len[2],
4162 unsigned HOST_WIDE_INT *psize)
4164 /* Determine the range the length of each string is in and whether it's
4165 known to be nul-terminated, or the size of the array it's stored in. */
4166 bool nul1, nul2;
4167 unsigned HOST_WIDE_INT siz1, siz2;
4168 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4169 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1)
4170 || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2))
4171 return NULL_TREE;
4173 /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4174 to HWI_MAX when invalid. Adjust the length of each string to consider
4175 to be no more than BOUND. */
4176 if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4177 len1rng[0] = bound;
4178 if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4179 len1rng[1] = bound;
4180 if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4181 len2rng[0] = bound;
4182 if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4183 len2rng[1] = bound;
4185 /* Two empty strings are equal. */
4186 if (len1rng[1] == 0 && len2rng[1] == 0)
4187 return integer_one_node;
4189 /* The strings are definitely unequal when the lower bound of the length
4190 of one of them is greater than the length of the longest string that
4191 would fit into the other array. */
4192 if (len1rng[0] == HOST_WIDE_INT_MAX
4193 && len2rng[0] != HOST_WIDE_INT_MAX
4194 && ((len2rng[0] < bound && len2rng[0] >= siz1)
4195 || len2rng[0] > siz1))
4197 *psize = siz1;
4198 len[0] = len1rng[0];
4199 /* Set LEN[0] to the lower bound of ARG1's length when it's
4200 nul-terminated or to the complement of its minimum length
4201 otherwise, */
4202 len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4203 return integer_zero_node;
4206 if (len2rng[0] == HOST_WIDE_INT_MAX
4207 && len1rng[0] != HOST_WIDE_INT_MAX
4208 && ((len1rng[0] < bound && len1rng[0] >= siz2)
4209 || len1rng[0] > siz2))
4211 *psize = siz2;
4212 len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4213 len[1] = len2rng[0];
4214 return integer_zero_node;
4217 /* The strings are also definitely unequal when their lengths are unequal
4218 and at least one is nul-terminated. */
4219 if (len1rng[0] != HOST_WIDE_INT_MAX
4220 && len2rng[0] != HOST_WIDE_INT_MAX
4221 && ((len1rng[1] < len2rng[0] && nul1)
4222 || (len2rng[1] < len1rng[0] && nul2)))
4224 if (bound <= len1rng[0] || bound <= len2rng[0])
4225 *psize = bound;
4226 else
4227 *psize = HOST_WIDE_INT_M1U;
4229 len[0] = len1rng[0];
4230 len[1] = len2rng[0];
4231 return integer_zero_node;
4234 /* The string lengths may be equal or unequal. Even when equal and
4235 both strings nul-terminated, without the string contents there's
4236 no way to determine whether they are equal. */
4237 return NULL_TREE;
4240 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4241 arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4242 whose result is used in equality expressions that evaluate to
4243 a constant due to one argument being longer than the size of
4244 the other. */
4246 static void
4247 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4248 unsigned HOST_WIDE_INT len[2],
4249 unsigned HOST_WIDE_INT siz)
4251 tree lhs = gimple_call_lhs (stmt);
4252 gimple *use = use_in_zero_equality (lhs, /* exclusive = */ false);
4253 if (!use)
4254 return;
4256 bool at_least = false;
4258 /* Excessive LEN[i] indicates a lower bound. */
4259 if (len[0] > HOST_WIDE_INT_MAX)
4261 at_least = true;
4262 len[0] = ~len[0];
4265 if (len[1] > HOST_WIDE_INT_MAX)
4267 at_least = true;
4268 len[1] = ~len[1];
4271 unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4273 /* FIXME: Include a note pointing to the declaration of the smaller
4274 array. */
4275 location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4277 tree callee = gimple_call_fndecl (stmt);
4278 bool warned = false;
4279 if (siz <= minlen && bound == -1)
4280 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4281 (at_least
4282 ? G_("%qD of a string of length %wu or more and "
4283 "an array of size %wu evaluates to nonzero")
4284 : G_("%qD of a string of length %wu and an array "
4285 "of size %wu evaluates to nonzero")),
4286 callee, minlen, siz);
4287 else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4289 if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4290 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4291 "%qD of strings of length %wu and %wu "
4292 "and bound of %wu evaluates to nonzero",
4293 callee, len[0], len[1], bound);
4294 else
4295 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4296 "%qD of a string of length %wu, an array "
4297 "of size %wu and bound of %wu evaluates to "
4298 "nonzero",
4299 callee, minlen, siz, bound);
4302 if (!warned)
4303 return;
4305 location_t use_loc = gimple_location (use);
4306 if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4307 inform (use_loc, "in this expression");
4311 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4312 when possible or by transforming the latter to the former. Warn about
4313 calls where the length of one argument is greater than the size of
4314 the array to which the other argument points if the latter's length
4315 is not known. Return true when the call has been transformed into
4316 another and false otherwise. */
4318 bool
4319 strlen_pass::handle_builtin_string_cmp ()
4321 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
4322 tree lhs = gimple_call_lhs (stmt);
4324 if (!lhs)
4325 return false;
4327 tree arg1 = gimple_call_arg (stmt, 0);
4328 tree arg2 = gimple_call_arg (stmt, 1);
4329 int idx1 = get_stridx (arg1, stmt);
4330 int idx2 = get_stridx (arg2, stmt);
4332 /* For strncmp set to the value of the third argument if known. */
4333 HOST_WIDE_INT bound = -1;
4334 tree len = NULL_TREE;
4335 /* Extract the strncmp bound. */
4336 if (gimple_call_num_args (stmt) == 3)
4338 len = gimple_call_arg (stmt, 2);
4339 if (tree_fits_shwi_p (len))
4340 bound = tree_to_shwi (len);
4342 /* If the bound argument is NOT known, do nothing. */
4343 if (bound < 0)
4344 return false;
4347 /* Avoid folding if either argument is not a nul-terminated array.
4348 Defer warning until later. */
4349 if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4350 || !check_nul_terminated_array (NULL_TREE, arg2, len))
4351 return false;
4354 /* Set to the length of one argument (or its complement if it's
4355 the lower bound of a range) and the size of the array storing
4356 the other if the result is based on the former being equal to
4357 or greater than the latter. */
4358 unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4359 unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4361 /* Try to determine if the two strings are either definitely equal
4362 or definitely unequal and if so, either fold the result to zero
4363 (when equal) or set the range of the result to ~[0, 0] otherwise. */
4364 if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4365 len, &siz))
4367 if (integer_zerop (eqz))
4369 maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4371 /* When the lengths of the first two string arguments are
4372 known to be unequal set the range of the result to non-zero.
4373 This allows the call to be eliminated if its result is only
4374 used in tests for equality to zero. */
4375 value_range nz;
4376 nz.set_nonzero (TREE_TYPE (lhs));
4377 set_range_info (lhs, nz);
4378 return false;
4380 /* When the two strings are definitely equal (such as when they
4381 are both empty) fold the call to the constant result. */
4382 replace_call_with_value (&m_gsi, integer_zero_node);
4383 return true;
4387 /* Return if nothing is known about the strings pointed to by ARG1
4388 and ARG2. */
4389 if (idx1 == 0 && idx2 == 0)
4390 return false;
4392 /* Determine either the length or the size of each of the strings,
4393 whichever is available. */
4394 HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4395 HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4398 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4399 unsigned HOST_WIDE_INT arsz1, arsz2;
4400 bool nulterm[2];
4402 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm)
4403 || !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1))
4404 return false;
4406 if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4407 cstlen1 = len1rng[0];
4408 else if (arsz1 < HOST_WIDE_INT_M1U)
4409 arysiz1 = arsz1;
4411 if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4412 cstlen2 = len2rng[0];
4413 else if (arsz2 < HOST_WIDE_INT_M1U)
4414 arysiz2 = arsz2;
4417 /* Bail if neither the string length nor the size of the array
4418 it is stored in can be determined. */
4419 if ((cstlen1 < 0 && arysiz1 < 0)
4420 || (cstlen2 < 0 && arysiz2 < 0)
4421 || (cstlen1 < 0 && cstlen2 < 0))
4422 return false;
4424 if (cstlen1 >= 0)
4425 ++cstlen1;
4426 if (cstlen2 >= 0)
4427 ++cstlen2;
4429 /* The exact number of characters to compare. */
4430 HOST_WIDE_INT cmpsiz;
4431 if (cstlen1 >= 0 && cstlen2 >= 0)
4432 cmpsiz = MIN (cstlen1, cstlen2);
4433 else if (cstlen1 >= 0)
4434 cmpsiz = cstlen1;
4435 else
4436 cmpsiz = cstlen2;
4437 if (bound >= 0)
4438 cmpsiz = MIN (cmpsiz, bound);
4439 /* The size of the array in which the unknown string is stored. */
4440 HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4442 if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (lhs))
4444 /* If the known length is less than the size of the other array
4445 and the strcmp result is only used to test equality to zero,
4446 transform the call to the equivalent _eq call. */
4447 if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4448 : BUILT_IN_STRNCMP_EQ))
4450 tree n = build_int_cst (size_type_node, cmpsiz);
4451 update_gimple_call (&m_gsi, fn, 3, arg1, arg2, n);
4452 return true;
4456 return false;
4459 /* Handle a POINTER_PLUS_EXPR statement.
4460 For p = "abcd" + 2; compute associated length, or if
4461 p = q + off is pointing to a '\0' character of a string, call
4462 zero_length_string on it. */
4464 void
4465 strlen_pass::handle_pointer_plus ()
4467 gimple *stmt = gsi_stmt (m_gsi);
4468 tree lhs = gimple_assign_lhs (stmt), off;
4469 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
4470 strinfo *si, *zsi;
4472 if (idx == 0)
4473 return;
4475 if (idx < 0)
4477 tree off = gimple_assign_rhs2 (stmt);
4478 if (tree_fits_uhwi_p (off)
4479 && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4480 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4481 = ~(~idx - (int) tree_to_uhwi (off));
4482 return;
4485 si = get_strinfo (idx);
4486 if (si == NULL || si->nonzero_chars == NULL_TREE)
4487 return;
4489 off = gimple_assign_rhs2 (stmt);
4490 zsi = NULL;
4491 if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4492 zsi = zero_length_string (lhs, si);
4493 else if (TREE_CODE (off) == SSA_NAME)
4495 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4496 if (gimple_assign_single_p (def_stmt)
4497 && si->full_string_p
4498 && operand_equal_p (si->nonzero_chars,
4499 gimple_assign_rhs1 (def_stmt), 0))
4500 zsi = zero_length_string (lhs, si);
4502 if (zsi != NULL
4503 && si->endptr != NULL_TREE
4504 && si->endptr != lhs
4505 && TREE_CODE (si->endptr) == SSA_NAME)
4507 enum tree_code rhs_code
4508 = useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4509 ? SSA_NAME : NOP_EXPR;
4510 gimple_assign_set_rhs_with_ops (&m_gsi, rhs_code, si->endptr);
4511 gcc_assert (gsi_stmt (m_gsi) == stmt);
4512 update_stmt (stmt);
4516 /* Set LENRANGE to the number of nonzero bytes for a store of TYPE and
4517 clear all flags. Return true on success and false on failure. */
4519 static bool
4520 nonzero_bytes_for_type (tree type, unsigned lenrange[3],
4521 bool *nulterm, bool *allnul, bool *allnonnul)
4523 /* Use the size of the type of the expression as the size of the store,
4524 and set the upper bound of the length range to that of the size.
4525 Nothing is known about the contents so clear all flags. */
4526 tree typesize = TYPE_SIZE_UNIT (type);
4527 if (!type)
4528 return false;
4530 if (!tree_fits_uhwi_p (typesize))
4531 return false;
4533 unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize);
4534 if (sz > UINT_MAX)
4535 return false;
4537 lenrange[2] = sz;
4538 lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0;
4539 lenrange[0] = 0;
4540 *nulterm = false;
4541 *allnul = false;
4542 *allnonnul = false;
4543 return true;
4546 /* Recursively determine the minimum and maximum number of leading nonzero
4547 bytes in the representation of EXP at memory state VUSE and set
4548 LENRANGE[0] and LENRANGE[1] to each.
4549 Sets LENRANGE[2] to the total size of the access (which may be less
4550 than LENRANGE[1] when what's being referenced by EXP is a pointer
4551 rather than an array).
4552 Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL
4553 if all the bytes are zero, and *ALLNONNUL is all are nonzero.
4554 OFFSET and NBYTES are the offset into the representation and
4555 the size of the access to it determined from an ADDR_EXPR (i.e.,
4556 a pointer) or MEM_REF or zero for other expressions.
4557 Uses RVALS to determine range information.
4558 Avoids recursing deeper than the limits in SNLIM allow.
4559 Returns true on success and false otherwise. */
4561 bool
4562 strlen_pass::count_nonzero_bytes (tree exp, tree vuse, gimple *stmt,
4563 unsigned HOST_WIDE_INT offset,
4564 unsigned HOST_WIDE_INT nbytes,
4565 unsigned lenrange[3], bool *nulterm,
4566 bool *allnul, bool *allnonnul,
4567 ssa_name_limit_t &snlim)
4569 if (TREE_CODE (exp) == SSA_NAME)
4571 /* Handle non-zero single-character stores specially. */
4572 tree type = TREE_TYPE (exp);
4573 if (TREE_CODE (type) == INTEGER_TYPE
4574 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4575 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4576 && tree_expr_nonzero_p (exp))
4578 /* If the character EXP is known to be non-zero (even if its
4579 exact value is not known) recurse once to set the range
4580 for an arbitrary constant. */
4581 exp = build_int_cst (type, 1);
4582 return count_nonzero_bytes (exp, vuse, stmt,
4583 offset, 1, lenrange,
4584 nulterm, allnul, allnonnul, snlim);
4587 gimple *g = SSA_NAME_DEF_STMT (exp);
4588 if (gimple_assign_single_p (g))
4590 exp = gimple_assign_rhs1 (g);
4591 if (!DECL_P (exp)
4592 && TREE_CODE (exp) != CONSTRUCTOR
4593 && TREE_CODE (exp) != MEM_REF)
4594 return false;
4595 /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */
4596 stmt = g;
4598 else if (gimple_code (g) == GIMPLE_PHI)
4600 /* Avoid processing an SSA_NAME that has already been visited
4601 or if an SSA_NAME limit has been reached. Indicate success
4602 if the former and failure if the latter. */
4603 if (int res = snlim.next_phi (exp))
4604 return res > 0;
4606 /* Determine the minimum and maximum from the PHI arguments. */
4607 unsigned int n = gimple_phi_num_args (g);
4608 for (unsigned i = 0; i != n; i++)
4610 tree def = gimple_phi_arg_def (g, i);
4611 if (!count_nonzero_bytes (def, vuse, g,
4612 offset, nbytes, lenrange, nulterm,
4613 allnul, allnonnul, snlim))
4614 return false;
4617 return true;
4621 if (TREE_CODE (exp) == CONSTRUCTOR)
4623 if (nbytes)
4624 /* If NBYTES has already been determined by an outer MEM_REF
4625 fail rather than overwriting it (this shouldn't happen). */
4626 return false;
4628 tree type = TREE_TYPE (exp);
4629 tree size = TYPE_SIZE_UNIT (type);
4630 if (!size || !tree_fits_uhwi_p (size))
4631 return false;
4633 unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
4634 if (byte_size < offset)
4635 return false;
4637 nbytes = byte_size - offset;
4640 if (TREE_CODE (exp) == MEM_REF)
4642 if (nbytes)
4643 return false;
4645 tree arg = TREE_OPERAND (exp, 0);
4646 tree off = TREE_OPERAND (exp, 1);
4648 if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4649 return false;
4651 unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4652 if (INT_MAX < wioff)
4653 return false;
4655 offset += wioff;
4656 if (INT_MAX < offset)
4657 return false;
4659 /* The size of the MEM_REF access determines the number of bytes. */
4660 tree type = TREE_TYPE (exp);
4661 tree typesize = TYPE_SIZE_UNIT (type);
4662 if (!typesize || !tree_fits_uhwi_p (typesize))
4663 return false;
4664 nbytes = tree_to_uhwi (typesize);
4665 if (!nbytes)
4666 return false;
4668 /* Handle MEM_REF = SSA_NAME types of assignments. */
4669 return count_nonzero_bytes_addr (arg, vuse, stmt,
4670 offset, nbytes, lenrange, nulterm,
4671 allnul, allnonnul, snlim);
4674 if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4676 /* If EXP can be folded into a constant use the result. Otherwise
4677 proceed to use EXP to determine a range of the result. */
4678 if (tree fold_exp = ctor_for_folding (exp))
4679 if (fold_exp != error_mark_node)
4680 exp = fold_exp;
4683 const char *prep = NULL;
4684 if (TREE_CODE (exp) == STRING_CST)
4686 unsigned nchars = TREE_STRING_LENGTH (exp);
4687 if (nchars < offset)
4688 return false;
4690 if (!nbytes)
4691 /* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4692 (i.e., it's the size of a pointer), or from MEM_REF (as the size
4693 of the access), set it here to the size of the string, including
4694 all internal and trailing nuls if the string has any. */
4695 nbytes = nchars - offset;
4696 else if (nchars - offset < nbytes)
4697 return false;
4699 prep = TREE_STRING_POINTER (exp) + offset;
4702 unsigned char buf[256];
4703 if (!prep)
4705 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4706 return false;
4707 /* If the pointer to representation hasn't been set above
4708 for STRING_CST point it at the buffer. */
4709 prep = reinterpret_cast <char *>(buf);
4710 /* Try to extract the representation of the constant object
4711 or expression starting from the offset. */
4712 unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4713 if (repsize < nbytes)
4715 /* This should only happen when REPSIZE is zero because EXP
4716 doesn't denote an object with a known initializer, except
4717 perhaps when the reference reads past its end. */
4718 lenrange[0] = 0;
4719 prep = NULL;
4721 else if (!nbytes)
4722 nbytes = repsize;
4723 else if (nbytes < repsize)
4724 return false;
4727 if (!nbytes)
4728 return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange,
4729 nulterm, allnul, allnonnul);
4731 /* Compute the number of leading nonzero bytes in the representation
4732 and update the minimum and maximum. */
4733 unsigned HOST_WIDE_INT n = prep ? strnlen (prep, nbytes) : nbytes;
4735 if (n < lenrange[0])
4736 lenrange[0] = n;
4737 if (lenrange[1] < n)
4738 lenrange[1] = n;
4740 /* Set the size of the representation. */
4741 if (lenrange[2] < nbytes)
4742 lenrange[2] = nbytes;
4744 /* Clear NULTERM if none of the bytes is zero. */
4745 if (n == nbytes)
4746 *nulterm = false;
4748 if (n)
4750 /* When the initial number of non-zero bytes N is non-zero, reset
4751 *ALLNUL; if N is less than that the size of the representation
4752 also clear *ALLNONNUL. */
4753 *allnul = false;
4754 if (n < nbytes)
4755 *allnonnul = false;
4757 else if (*allnul || *allnonnul)
4759 *allnonnul = false;
4761 if (*allnul)
4763 /* When either ALLNUL is set and N is zero, also determine
4764 whether all subsequent bytes after the first one (which
4765 is nul) are zero or nonzero and clear ALLNUL if not. */
4766 for (const char *p = prep; p != prep + nbytes; ++p)
4767 if (*p)
4769 *allnul = false;
4770 break;
4775 return true;
4778 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4779 bytes that are pointed to by EXP, which should be a pointer. */
4781 bool
4782 strlen_pass::count_nonzero_bytes_addr (tree exp, tree vuse, gimple *stmt,
4783 unsigned HOST_WIDE_INT offset,
4784 unsigned HOST_WIDE_INT nbytes,
4785 unsigned lenrange[3], bool *nulterm,
4786 bool *allnul, bool *allnonnul,
4787 ssa_name_limit_t &snlim)
4789 int idx = get_stridx (exp, stmt);
4790 if (idx > 0)
4792 /* get_strinfo reflects string lengths before the current statement,
4793 where the current statement is the outermost count_nonzero_bytes
4794 stmt. If there are any stores in between stmt and that
4795 current statement, the string length information might describe
4796 something significantly different. */
4797 if (gimple_vuse (stmt) != vuse)
4798 return false;
4800 strinfo *si = get_strinfo (idx);
4801 if (!si)
4802 return false;
4804 /* Handle both constant lengths as well non-constant lengths
4805 in some range. */
4806 unsigned HOST_WIDE_INT minlen, maxlen;
4807 if (tree_fits_shwi_p (si->nonzero_chars))
4808 minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4809 else if (si->nonzero_chars
4810 && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4812 value_range vr;
4813 if (!ptr_qry.rvals->range_of_expr (vr, si->nonzero_chars, stmt)
4814 || vr.undefined_p ()
4815 || vr.varying_p ())
4816 return false;
4818 minlen = vr.lower_bound ().to_uhwi ();
4819 maxlen = vr.upper_bound ().to_uhwi ();
4821 else
4822 return false;
4824 if (maxlen < offset)
4825 return false;
4827 minlen = minlen < offset ? 0 : minlen - offset;
4828 maxlen -= offset;
4829 if (maxlen + 1 < nbytes)
4830 return false;
4832 if (nbytes <= minlen)
4833 *nulterm = false;
4835 if (nbytes < minlen)
4837 minlen = nbytes;
4838 if (nbytes < maxlen)
4839 maxlen = nbytes;
4842 if (minlen < lenrange[0])
4843 lenrange[0] = minlen;
4844 if (lenrange[1] < maxlen)
4845 lenrange[1] = maxlen;
4847 if (lenrange[2] < nbytes)
4848 lenrange[2] = nbytes;
4850 /* Since only the length of the string are known and not its contents,
4851 clear ALLNUL and ALLNONNUL purely on the basis of the length. */
4852 *allnul = false;
4853 if (minlen < nbytes)
4854 *allnonnul = false;
4856 return true;
4859 if (TREE_CODE (exp) == ADDR_EXPR)
4860 return count_nonzero_bytes (TREE_OPERAND (exp, 0), vuse, stmt,
4861 offset, nbytes,
4862 lenrange, nulterm, allnul, allnonnul, snlim);
4864 if (TREE_CODE (exp) == SSA_NAME)
4866 gimple *g = SSA_NAME_DEF_STMT (exp);
4867 if (gimple_code (g) == GIMPLE_PHI)
4869 /* Avoid processing an SSA_NAME that has already been visited
4870 or if an SSA_NAME limit has been reached. Indicate success
4871 if the former and failure if the latter. */
4872 if (int res = snlim.next_phi (exp))
4873 return res > 0;
4875 /* Determine the minimum and maximum from the PHI arguments. */
4876 unsigned int n = gimple_phi_num_args (g);
4877 for (unsigned i = 0; i != n; i++)
4879 tree def = gimple_phi_arg_def (g, i);
4880 if (!count_nonzero_bytes_addr (def, vuse, g,
4881 offset, nbytes, lenrange,
4882 nulterm, allnul, allnonnul,
4883 snlim))
4884 return false;
4887 return true;
4891 /* Otherwise we don't know anything. */
4892 lenrange[0] = 0;
4893 if (lenrange[1] < nbytes)
4894 lenrange[1] = nbytes;
4895 if (lenrange[2] < nbytes)
4896 lenrange[2] = nbytes;
4897 *nulterm = false;
4898 *allnul = false;
4899 *allnonnul = false;
4900 return true;
4903 /* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE
4904 is a type rather than an expression use its size to compute the range.
4905 RVALS is used to determine ranges of dynamically computed string lengths
4906 (the results of strlen). */
4908 bool
4909 strlen_pass::count_nonzero_bytes (tree expr_or_type, gimple *stmt,
4910 unsigned lenrange[3], bool *nulterm,
4911 bool *allnul, bool *allnonnul)
4913 if (TYPE_P (expr_or_type))
4914 return nonzero_bytes_for_type (expr_or_type, lenrange,
4915 nulterm, allnul, allnonnul);
4917 /* Set to optimistic values so the caller doesn't have to worry about
4918 initializing these and to what. On success, the function will clear
4919 these if it determines their values are different but being recursive
4920 it never sets either to true. On failure, their values are
4921 unspecified. */
4922 *nulterm = true;
4923 *allnul = true;
4924 *allnonnul = true;
4926 ssa_name_limit_t snlim;
4927 tree expr = expr_or_type;
4928 return count_nonzero_bytes (expr, gimple_vuse (stmt), stmt,
4929 0, 0, lenrange, nulterm, allnul, allnonnul,
4930 snlim);
4933 /* Handle a single or multibyte store other than by a built-in function,
4934 either via a single character assignment or by multi-byte assignment
4935 either via MEM_REF or via a type other than char (such as in
4936 '*(int*)a = 12345'). Return true to let the caller advance *GSI to
4937 the next statement in the basic block and false otherwise. */
4939 bool
4940 strlen_pass::handle_store (bool *zero_write)
4942 gimple *stmt = gsi_stmt (m_gsi);
4943 /* The LHS and RHS of the store. The RHS is null if STMT is a function
4944 call. STORETYPE is the type of the store (determined from either
4945 the RHS of the assignment statement or the LHS of a function call. */
4946 tree lhs, rhs, storetype;
4947 if (is_gimple_assign (stmt))
4949 lhs = gimple_assign_lhs (stmt);
4950 rhs = gimple_assign_rhs1 (stmt);
4951 storetype = TREE_TYPE (rhs);
4953 else if (is_gimple_call (stmt))
4955 lhs = gimple_call_lhs (stmt);
4956 rhs = NULL_TREE;
4957 storetype = TREE_TYPE (lhs);
4959 else
4960 return true;
4962 tree ssaname = NULL_TREE;
4963 strinfo *si = NULL;
4964 int idx = -1;
4966 range_query *const rvals = ptr_qry.rvals;
4968 /* The offset of the first byte in LHS modified by the store. */
4969 unsigned HOST_WIDE_INT offset = 0;
4971 if (TREE_CODE (lhs) == MEM_REF
4972 && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4974 tree mem_offset = TREE_OPERAND (lhs, 1);
4975 if (tree_fits_uhwi_p (mem_offset))
4977 /* Get the strinfo for the base, and use it if it starts with at
4978 least OFFSET nonzero characters. This is trivially true if
4979 OFFSET is zero. */
4980 offset = tree_to_uhwi (mem_offset);
4981 idx = get_stridx (TREE_OPERAND (lhs, 0), stmt);
4982 if (idx > 0)
4983 si = get_strinfo (idx);
4984 if (offset == 0)
4985 ssaname = TREE_OPERAND (lhs, 0);
4986 else if (si == NULL
4987 || compare_nonzero_chars (si, stmt, offset, rvals) < 0)
4989 *zero_write = rhs ? initializer_zerop (rhs) : false;
4991 bool dummy;
4992 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4993 if (count_nonzero_bytes (rhs ? rhs : storetype, stmt, lenrange,
4994 &dummy, &dummy, &dummy))
4995 maybe_warn_overflow (stmt, true, lenrange[2]);
4997 return true;
5001 else
5003 idx = get_addr_stridx (lhs, stmt, NULL_TREE, &offset, rvals);
5004 if (idx > 0)
5005 si = get_strinfo (idx);
5008 /* Minimum and maximum leading non-zero bytes and the size of the store. */
5009 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5011 /* Set to the minimum length of the string being assigned if known. */
5012 unsigned HOST_WIDE_INT rhs_minlen;
5014 /* STORING_NONZERO_P is true iff not all stored characters are zero.
5015 STORING_ALL_NONZERO_P is true if all stored characters are zero.
5016 STORING_ALL_ZEROS_P is true iff all stored characters are zero.
5017 Both are false when it's impossible to determine which is true. */
5018 bool storing_nonzero_p;
5019 bool storing_all_nonzero_p;
5020 bool storing_all_zeros_p;
5021 /* FULL_STRING_P is set when the stored sequence of characters form
5022 a nul-terminated string. */
5023 bool full_string_p;
5025 const bool ranges_valid
5026 = count_nonzero_bytes (rhs ? rhs : storetype, stmt,
5027 lenrange, &full_string_p,
5028 &storing_all_zeros_p, &storing_all_nonzero_p);
5030 if (ranges_valid)
5032 rhs_minlen = lenrange[0];
5033 storing_nonzero_p = lenrange[1] > 0;
5034 *zero_write = storing_all_zeros_p;
5036 maybe_warn_overflow (stmt, true, lenrange[2]);
5038 else
5040 rhs_minlen = HOST_WIDE_INT_M1U;
5041 full_string_p = false;
5042 storing_nonzero_p = false;
5043 storing_all_zeros_p = false;
5044 storing_all_nonzero_p = false;
5047 if (si != NULL)
5049 /* The count_nonzero_bytes call above might have unshared si.
5050 Fetch it again from the vector. */
5051 si = get_strinfo (idx);
5052 /* The corresponding element is set to 1 if the first and last
5053 element, respectively, of the sequence of characters being
5054 written over the string described by SI ends before
5055 the terminating nul (if it has one), to zero if the nul is
5056 being overwritten but not beyond, or negative otherwise. */
5057 int store_before_nul[2];
5058 if (ranges_valid)
5060 /* The offset of the last stored byte. */
5061 unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
5062 store_before_nul[0]
5063 = compare_nonzero_chars (si, stmt, offset, rvals);
5064 if (endoff == offset)
5065 store_before_nul[1] = store_before_nul[0];
5066 else
5067 store_before_nul[1]
5068 = compare_nonzero_chars (si, stmt, endoff, rvals);
5070 else
5072 store_before_nul[0]
5073 = compare_nonzero_chars (si, stmt, offset, rvals);
5074 store_before_nul[1] = store_before_nul[0];
5075 gcc_assert (offset == 0 || store_before_nul[0] >= 0);
5078 if (storing_all_zeros_p
5079 && store_before_nul[0] == 0
5080 && store_before_nul[1] == 0
5081 && si->full_string_p)
5083 /* When overwriting a '\0' with a '\0', the store can be removed
5084 if we know it has been stored in the current function. */
5085 if (!stmt_could_throw_p (cfun, stmt) && si->writable)
5087 unlink_stmt_vdef (stmt);
5088 release_defs (stmt);
5089 gsi_remove (&m_gsi, true);
5090 return false;
5092 else
5094 si->writable = true;
5095 gsi_next (&m_gsi);
5096 return false;
5100 if (store_before_nul[1] > 0
5101 && storing_nonzero_p
5102 && lenrange[0] == lenrange[1]
5103 && lenrange[0] == lenrange[2]
5104 && TREE_CODE (storetype) == INTEGER_TYPE)
5106 /* Handle a store of one or more non-nul characters that ends
5107 before the terminating nul of the destination and so does
5108 not affect its length
5109 If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5110 and if we aren't storing '\0', we know that the length of
5111 the string and any other zero terminated string in memory
5112 remains the same. In that case we move to the next gimple
5113 statement and return to signal the caller that it shouldn't
5114 invalidate anything.
5116 This is beneficial for cases like:
5118 char p[20];
5119 void foo (char *q)
5121 strcpy (p, "foobar");
5122 size_t len = strlen (p); // can be folded to 6
5123 size_t len2 = strlen (q); // has to be computed
5124 p[0] = 'X';
5125 size_t len3 = strlen (p); // can be folded to 6
5126 size_t len4 = strlen (q); // can be folded to len2
5127 bar (len, len2, len3, len4);
5128 } */
5129 gsi_next (&m_gsi);
5130 return false;
5133 if (storing_nonzero_p
5134 || storing_all_zeros_p
5135 || (full_string_p && lenrange[1] == 0)
5136 || (offset != 0 && store_before_nul[1] > 0))
5138 /* When STORING_NONZERO_P, we know that the string will start
5139 with at least OFFSET + 1 nonzero characters. If storing
5140 a single character, set si->NONZERO_CHARS to the result.
5141 If storing multiple characters, try to determine the number
5142 of leading non-zero characters and set si->NONZERO_CHARS to
5143 the result instead.
5145 When STORING_ALL_ZEROS_P, or the first byte written is zero,
5146 i.e. FULL_STRING_P && LENRANGE[1] == 0, we know that the
5147 string is now OFFSET characters long.
5149 Otherwise, we're storing an unknown value at offset OFFSET,
5150 so need to clip the nonzero_chars to OFFSET.
5151 Use the minimum length of the string (or individual character)
5152 being stored if it's known. Otherwise, STORING_NONZERO_P
5153 guarantees it's at least 1. */
5154 HOST_WIDE_INT len
5155 = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5156 location_t loc = gimple_location (stmt);
5157 tree oldlen = si->nonzero_chars;
5158 if (store_before_nul[1] == 0 && si->full_string_p)
5159 /* We're overwriting the nul terminator with a nonzero or
5160 unknown character. If the previous stmt was a memcpy,
5161 its length may be decreased. */
5162 adjust_last_stmt (si, stmt, false);
5163 si = unshare_strinfo (si);
5164 if (storing_nonzero_p)
5166 gcc_assert (len >= 0);
5167 si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5169 else
5170 si->nonzero_chars = build_int_cst (size_type_node, offset);
5172 /* Set FULL_STRING_P only if the length of the strings being
5173 written is the same, and clear it if the strings have
5174 different lengths. In the latter case the length stored
5175 in si->NONZERO_CHARS becomes the lower bound.
5176 FIXME: Handle the upper bound of the length if possible. */
5177 si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5179 if (storing_all_zeros_p
5180 && ssaname
5181 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5182 si->endptr = ssaname;
5183 else
5184 si->endptr = NULL;
5185 si->next = 0;
5186 si->stmt = NULL;
5187 si->writable = true;
5188 si->dont_invalidate = true;
5189 if (oldlen)
5191 tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5192 si->nonzero_chars, oldlen);
5193 adjust_related_strinfos (loc, si, adj);
5195 else
5196 si->prev = 0;
5199 else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5201 if (ssaname)
5202 idx = new_stridx (ssaname);
5203 else
5204 idx = new_addr_stridx (lhs);
5205 if (idx != 0)
5207 tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5209 HOST_WIDE_INT slen;
5210 if (storing_all_zeros_p)
5211 slen = 0;
5212 else if (storing_nonzero_p && ranges_valid)
5214 /* FIXME: Handle the upper bound of the length when
5215 LENRANGE[0] != LENRANGE[1]. */
5216 slen = lenrange[0];
5217 if (lenrange[0] != lenrange[1])
5218 /* Set the minimum length but ignore the maximum
5219 for now. */
5220 full_string_p = false;
5222 else
5223 slen = -1;
5225 tree len = (slen <= 0
5226 ? size_zero_node
5227 : build_int_cst (size_type_node, slen));
5228 si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5229 set_strinfo (idx, si);
5230 if (storing_all_zeros_p
5231 && ssaname
5232 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5233 si->endptr = ssaname;
5234 si->dont_invalidate = true;
5235 si->writable = true;
5238 else if (idx == 0
5239 && rhs_minlen < HOST_WIDE_INT_M1U
5240 && ssaname == NULL_TREE
5241 && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5243 HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5244 if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5246 int idx = new_addr_stridx (lhs);
5247 if (idx != 0)
5249 si = new_strinfo (build_fold_addr_expr (lhs), idx,
5250 build_int_cst (size_type_node, rhs_minlen),
5251 full_string_p);
5252 set_strinfo (idx, si);
5253 si->dont_invalidate = true;
5258 if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5260 /* For single-byte stores only, allow adjust_last_stmt to remove
5261 the statement if the stored '\0' is immediately overwritten. */
5262 laststmt.stmt = stmt;
5263 laststmt.len = build_int_cst (size_type_node, 1);
5264 laststmt.stridx = si->idx;
5266 return true;
5269 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0. */
5271 static void
5272 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5274 if (TREE_CODE (rhs1) != SSA_NAME
5275 || TREE_CODE (rhs2) != SSA_NAME)
5276 return;
5278 gimple *call_stmt = NULL;
5279 for (int pass = 0; pass < 2; pass++)
5281 gimple *g = SSA_NAME_DEF_STMT (rhs1);
5282 if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5283 && has_single_use (rhs1)
5284 && gimple_call_arg (g, 0) == rhs2)
5286 call_stmt = g;
5287 break;
5289 std::swap (rhs1, rhs2);
5292 if (call_stmt)
5294 tree arg0 = gimple_call_arg (call_stmt, 0);
5296 if (arg0 == rhs2)
5298 tree arg1 = gimple_call_arg (call_stmt, 1);
5299 tree arg1_len = NULL_TREE;
5300 int idx = get_stridx (arg1, call_stmt);
5302 if (idx)
5304 if (idx < 0)
5305 arg1_len = build_int_cst (size_type_node, ~idx);
5306 else
5308 strinfo *si = get_strinfo (idx);
5309 if (si)
5310 arg1_len = get_string_length (si);
5314 if (arg1_len != NULL_TREE)
5316 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5317 tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5319 if (!is_gimple_val (arg1_len))
5321 tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5322 gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5323 arg1_len);
5324 gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5325 arg1_len = arg1_len_tmp;
5328 gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5329 arg0, arg1, arg1_len);
5330 tree strncmp_lhs = make_ssa_name (integer_type_node);
5331 gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5332 gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5333 gsi_remove (&gsi, true);
5334 gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5335 tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5337 if (is_gimple_assign (stmt))
5339 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5341 tree cond = gimple_assign_rhs1 (stmt);
5342 TREE_OPERAND (cond, 0) = strncmp_lhs;
5343 TREE_OPERAND (cond, 1) = zero;
5345 else
5347 gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5348 gimple_assign_set_rhs2 (stmt, zero);
5351 else
5353 gcond *cond = as_a<gcond *> (stmt);
5354 gimple_cond_set_lhs (cond, strncmp_lhs);
5355 gimple_cond_set_rhs (cond, zero);
5357 update_stmt (stmt);
5363 /* Return true if TYPE corresponds to a narrow character type. */
5365 static bool
5366 is_char_type (tree type)
5368 return (TREE_CODE (type) == INTEGER_TYPE
5369 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5370 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5373 /* Check the built-in call at GSI for validity and optimize it.
5374 Uses RVALS to determine range information.
5375 Return true to let the caller advance *GSI to the next statement
5376 in the basic block and false otherwise. */
5378 bool
5379 strlen_pass::check_and_optimize_call (bool *zero_write)
5381 gimple *stmt = gsi_stmt (m_gsi);
5383 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5385 tree fntype = gimple_call_fntype (stmt);
5386 if (!fntype)
5387 return true;
5389 if (lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5391 handle_alloc_call (BUILT_IN_NONE);
5392 return true;
5395 if (tree lhs = gimple_call_lhs (stmt))
5396 handle_assign (lhs, zero_write);
5398 /* Proceed to handle user-defined formatting functions. */
5401 /* When not optimizing we must be checking printf calls which
5402 we do even for user-defined functions when they are declared
5403 with attribute format. */
5404 if (!flag_optimize_strlen
5405 || !strlen_optimize
5406 || !valid_builtin_call (stmt))
5407 return !handle_printf_call (&m_gsi, ptr_qry);
5409 tree callee = gimple_call_fndecl (stmt);
5410 switch (DECL_FUNCTION_CODE (callee))
5412 case BUILT_IN_STRLEN:
5413 case BUILT_IN_STRNLEN:
5414 handle_builtin_strlen ();
5415 break;
5416 case BUILT_IN_STRCHR:
5417 handle_builtin_strchr ();
5418 break;
5419 case BUILT_IN_STRCPY:
5420 case BUILT_IN_STRCPY_CHK:
5421 case BUILT_IN_STPCPY:
5422 case BUILT_IN_STPCPY_CHK:
5423 handle_builtin_strcpy (DECL_FUNCTION_CODE (callee));
5424 break;
5426 case BUILT_IN_STRNCAT:
5427 case BUILT_IN_STRNCAT_CHK:
5428 handle_builtin_strncat (DECL_FUNCTION_CODE (callee));
5429 break;
5431 case BUILT_IN_STPNCPY:
5432 case BUILT_IN_STPNCPY_CHK:
5433 case BUILT_IN_STRNCPY:
5434 case BUILT_IN_STRNCPY_CHK:
5435 handle_builtin_stxncpy_strncat (false);
5436 break;
5438 case BUILT_IN_MEMCPY:
5439 case BUILT_IN_MEMCPY_CHK:
5440 case BUILT_IN_MEMPCPY:
5441 case BUILT_IN_MEMPCPY_CHK:
5442 handle_builtin_memcpy (DECL_FUNCTION_CODE (callee));
5443 break;
5444 case BUILT_IN_STRCAT:
5445 case BUILT_IN_STRCAT_CHK:
5446 handle_builtin_strcat (DECL_FUNCTION_CODE (callee));
5447 break;
5448 case BUILT_IN_ALLOCA:
5449 case BUILT_IN_ALLOCA_WITH_ALIGN:
5450 case BUILT_IN_MALLOC:
5451 case BUILT_IN_CALLOC:
5452 handle_alloc_call (DECL_FUNCTION_CODE (callee));
5453 break;
5454 case BUILT_IN_MEMSET:
5455 if (handle_builtin_memset (zero_write))
5456 return false;
5457 break;
5458 case BUILT_IN_MEMCMP:
5459 if (handle_builtin_memcmp ())
5460 return false;
5461 break;
5462 case BUILT_IN_STRCMP:
5463 case BUILT_IN_STRNCMP:
5464 if (handle_builtin_string_cmp ())
5465 return false;
5466 break;
5467 default:
5468 if (handle_printf_call (&m_gsi, ptr_qry))
5469 return false;
5470 break;
5473 return true;
5476 /* Handle an assignment statement at *GSI to a LHS of integral type.
5477 If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true. */
5479 void
5480 strlen_pass::handle_integral_assign (bool *cleanup_eh)
5482 gimple *stmt = gsi_stmt (m_gsi);
5483 tree lhs = gimple_assign_lhs (stmt);
5484 tree lhs_type = TREE_TYPE (lhs);
5486 enum tree_code code = gimple_assign_rhs_code (stmt);
5487 if (code == COND_EXPR)
5489 tree cond = gimple_assign_rhs1 (stmt);
5490 enum tree_code cond_code = TREE_CODE (cond);
5492 if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5493 fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5494 TREE_OPERAND (cond, 1), stmt);
5496 else if (code == EQ_EXPR || code == NE_EXPR)
5497 fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5498 gimple_assign_rhs2 (stmt), stmt);
5499 else if (gimple_assign_load_p (stmt)
5500 && TREE_CODE (lhs_type) == INTEGER_TYPE
5501 && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5502 && (TYPE_PRECISION (lhs_type)
5503 == TYPE_PRECISION (char_type_node))
5504 && !gimple_has_volatile_ops (stmt))
5506 tree off = integer_zero_node;
5507 unsigned HOST_WIDE_INT coff = 0;
5508 int idx = 0;
5509 tree rhs1 = gimple_assign_rhs1 (stmt);
5510 if (code == MEM_REF)
5512 idx = get_stridx (TREE_OPERAND (rhs1, 0), stmt);
5513 if (idx > 0)
5515 strinfo *si = get_strinfo (idx);
5516 if (si
5517 && si->nonzero_chars
5518 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5519 && (wi::to_widest (si->nonzero_chars)
5520 >= wi::to_widest (off)))
5521 off = TREE_OPERAND (rhs1, 1);
5522 else
5523 /* This case is not useful. See if get_addr_stridx
5524 returns something usable. */
5525 idx = 0;
5528 if (idx <= 0)
5529 idx = get_addr_stridx (rhs1, stmt, NULL_TREE, &coff);
5530 if (idx > 0)
5532 strinfo *si = get_strinfo (idx);
5533 if (si
5534 && si->nonzero_chars
5535 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5537 widest_int w1 = wi::to_widest (si->nonzero_chars);
5538 widest_int w2 = wi::to_widest (off) + coff;
5539 if (w1 == w2
5540 && si->full_string_p)
5542 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5544 fprintf (dump_file, "Optimizing: ");
5545 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5548 /* Reading the final '\0' character. */
5549 tree zero = build_int_cst (lhs_type, 0);
5550 gimple_set_vuse (stmt, NULL_TREE);
5551 gimple_assign_set_rhs_from_tree (&m_gsi, zero);
5552 *cleanup_eh
5553 |= maybe_clean_or_replace_eh_stmt (stmt,
5554 gsi_stmt (m_gsi));
5555 stmt = gsi_stmt (m_gsi);
5556 update_stmt (stmt);
5558 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5560 fprintf (dump_file, "into: ");
5561 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5564 else if (w1 > w2)
5566 /* Reading a character before the final '\0'
5567 character. Just set the value range to ~[0, 0]
5568 if we don't have anything better. */
5569 value_range r;
5570 if (!get_range_query (cfun)->range_of_expr (r, lhs)
5571 || r.varying_p ())
5573 r.set_nonzero (lhs_type);
5574 set_range_info (lhs, r);
5580 else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5582 if (int idx = new_stridx (lhs))
5584 /* Record multi-byte assignments from MEM_REFs. */
5585 bool storing_all_nonzero_p;
5586 bool storing_all_zeros_p;
5587 bool full_string_p;
5588 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5589 tree rhs = gimple_assign_rhs1 (stmt);
5590 const bool ranges_valid
5591 = count_nonzero_bytes (rhs, stmt,
5592 lenrange, &full_string_p,
5593 &storing_all_zeros_p,
5594 &storing_all_nonzero_p);
5595 if (ranges_valid)
5597 tree length = build_int_cst (sizetype, lenrange[0]);
5598 strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5599 set_strinfo (idx, si);
5600 si->writable = true;
5601 si->dont_invalidate = true;
5606 if (strlen_to_stridx)
5608 tree rhs1 = gimple_assign_rhs1 (stmt);
5609 if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5610 strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5614 /* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if
5615 the assignment stores all zero bytes. */
5617 bool
5618 strlen_pass::handle_assign (tree lhs, bool *zero_write)
5620 tree type = TREE_TYPE (lhs);
5621 if (TREE_CODE (type) == ARRAY_TYPE)
5622 type = TREE_TYPE (type);
5624 bool is_char_store = is_char_type (type);
5625 if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5627 /* To consider stores into char objects via integer types other
5628 than char but not those to non-character objects, determine
5629 the type of the destination rather than just the type of
5630 the access. */
5631 for (int i = 0; i != 2; ++i)
5633 tree ref = TREE_OPERAND (lhs, i);
5634 type = TREE_TYPE (ref);
5635 if (TREE_CODE (type) == POINTER_TYPE)
5636 type = TREE_TYPE (type);
5637 if (TREE_CODE (type) == ARRAY_TYPE)
5638 type = TREE_TYPE (type);
5639 if (is_char_type (type))
5641 is_char_store = true;
5642 break;
5647 /* Handle a single or multibyte assignment. */
5648 if (is_char_store && !handle_store (zero_write))
5649 return false;
5651 return true;
5655 /* Attempt to check for validity of the performed access a single statement
5656 at *GSI using string length knowledge, and to optimize it.
5657 If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5658 true. Return true to let the caller advance *GSI to the next statement
5659 in the basic block and false otherwise. */
5661 bool
5662 strlen_pass::check_and_optimize_stmt (bool *cleanup_eh)
5664 gimple *stmt = gsi_stmt (m_gsi);
5666 /* For statements that modify a string, set to true if the write
5667 is only zeros. */
5668 bool zero_write = false;
5670 if (is_gimple_call (stmt))
5672 if (!check_and_optimize_call (&zero_write))
5673 return false;
5675 else if (!flag_optimize_strlen || !strlen_optimize)
5676 return true;
5677 else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5679 /* Handle non-clobbering assignment. */
5680 tree lhs = gimple_assign_lhs (stmt);
5681 tree lhs_type = TREE_TYPE (lhs);
5683 if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5685 if (gimple_assign_single_p (stmt)
5686 || (gimple_assign_cast_p (stmt)
5687 && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5689 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
5690 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5692 else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5693 handle_pointer_plus ();
5695 else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5696 /* Handle assignment to a character. */
5697 handle_integral_assign (cleanup_eh);
5698 else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5699 if (!handle_assign (lhs, &zero_write))
5700 return false;
5702 else if (gcond *cond = dyn_cast<gcond *> (stmt))
5704 enum tree_code code = gimple_cond_code (cond);
5705 if (code == EQ_EXPR || code == NE_EXPR)
5706 fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5707 gimple_cond_rhs (stmt), stmt);
5710 if (gimple_vdef (stmt))
5711 maybe_invalidate (stmt, zero_write);
5712 return true;
5715 /* Recursively call maybe_invalidate on stmts that might be executed
5716 in between dombb and current bb and that contain a vdef. Stop when
5717 *count stmts are inspected, or if the whole strinfo vector has
5718 been invalidated. */
5720 static void
5721 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5723 unsigned int i, n = gimple_phi_num_args (phi);
5725 for (i = 0; i < n; i++)
5727 tree vuse = gimple_phi_arg_def (phi, i);
5728 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5729 basic_block bb = gimple_bb (stmt);
5730 if (bb == NULL
5731 || bb == dombb
5732 || !bitmap_set_bit (visited, bb->index)
5733 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5734 continue;
5735 while (1)
5737 if (gimple_code (stmt) == GIMPLE_PHI)
5739 do_invalidate (dombb, stmt, visited, count);
5740 if (*count == 0)
5741 return;
5742 break;
5744 if (--*count == 0)
5745 return;
5746 if (!maybe_invalidate (stmt))
5748 *count = 0;
5749 return;
5751 vuse = gimple_vuse (stmt);
5752 stmt = SSA_NAME_DEF_STMT (vuse);
5753 if (gimple_bb (stmt) != bb)
5755 bb = gimple_bb (stmt);
5756 if (bb == NULL
5757 || bb == dombb
5758 || !bitmap_set_bit (visited, bb->index)
5759 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5760 break;
5766 /* Release pointer_query cache. */
5768 strlen_pass::~strlen_pass ()
5770 ptr_qry.flush_cache ();
5773 /* Callback for walk_dominator_tree. Attempt to optimize various
5774 string ops by remembering string lengths pointed by pointer SSA_NAMEs. */
5776 edge
5777 strlen_pass::before_dom_children (basic_block bb)
5779 basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5781 if (dombb == NULL)
5782 stridx_to_strinfo = NULL;
5783 else
5785 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5786 if (stridx_to_strinfo)
5788 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5789 gsi_next (&gsi))
5791 gphi *phi = gsi.phi ();
5792 if (virtual_operand_p (gimple_phi_result (phi)))
5794 bitmap visited = BITMAP_ALLOC (NULL);
5795 int count_vdef = 100;
5796 do_invalidate (dombb, phi, visited, &count_vdef);
5797 BITMAP_FREE (visited);
5798 if (count_vdef == 0)
5800 /* If there were too many vdefs in between immediate
5801 dominator and current bb, invalidate everything.
5802 If stridx_to_strinfo has been unshared, we need
5803 to free it, otherwise just set it to NULL. */
5804 if (!strinfo_shared ())
5806 unsigned int i;
5807 strinfo *si;
5809 for (i = 1;
5810 vec_safe_iterate (stridx_to_strinfo, i, &si);
5811 ++i)
5813 free_strinfo (si);
5814 (*stridx_to_strinfo)[i] = NULL;
5817 else
5818 stridx_to_strinfo = NULL;
5820 break;
5826 /* If all PHI arguments have the same string index, the PHI result
5827 has it as well. */
5828 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5829 gsi_next (&gsi))
5831 gphi *phi = gsi.phi ();
5832 tree result = gimple_phi_result (phi);
5833 if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5835 int idx = get_stridx (gimple_phi_arg_def (phi, 0), phi);
5836 if (idx != 0)
5838 unsigned int i, n = gimple_phi_num_args (phi);
5839 for (i = 1; i < n; i++)
5840 if (idx != get_stridx (gimple_phi_arg_def (phi, i), phi))
5841 break;
5842 if (i == n)
5843 ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5848 bool cleanup_eh = false;
5850 /* Attempt to optimize individual statements. */
5851 for (m_gsi = gsi_start_bb (bb); !gsi_end_p (m_gsi); )
5853 /* Reset search depth performance counter. */
5854 ptr_qry.depth = 0;
5856 if (check_and_optimize_stmt (&cleanup_eh))
5857 gsi_next (&m_gsi);
5860 if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5861 m_cleanup_cfg = true;
5863 bb->aux = stridx_to_strinfo;
5864 if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5865 (*stridx_to_strinfo)[0] = (strinfo *) bb;
5866 return NULL;
5869 /* Callback for walk_dominator_tree. Free strinfo vector if it is
5870 owned by the current bb, clear bb->aux. */
5872 void
5873 strlen_pass::after_dom_children (basic_block bb)
5875 if (bb->aux)
5877 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5878 if (vec_safe_length (stridx_to_strinfo)
5879 && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5881 unsigned int i;
5882 strinfo *si;
5884 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5885 free_strinfo (si);
5886 vec_free (stridx_to_strinfo);
5888 bb->aux = NULL;
5892 namespace {
5894 static unsigned int
5895 printf_strlen_execute (function *fun, bool warn_only)
5897 strlen_optimize = !warn_only;
5899 calculate_dominance_info (CDI_DOMINATORS);
5900 loop_optimizer_init (LOOPS_NORMAL);
5901 scev_initialize ();
5903 gcc_assert (!strlen_to_stridx);
5904 if (warn_stringop_overflow || warn_stringop_truncation)
5905 strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5907 /* This has to happen after initializing the loop optimizer
5908 and initializing SCEV as they create new SSA_NAMEs. */
5909 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
5910 max_stridx = 1;
5912 /* String length optimization is implemented as a walk of the dominator
5913 tree and a forward walk of statements within each block. */
5914 strlen_pass walker (CDI_DOMINATORS);
5915 walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5917 if (dump_file && (dump_flags & TDF_DETAILS))
5918 walker.ptr_qry.dump (dump_file, true);
5920 ssa_ver_to_stridx.release ();
5921 strinfo_pool.release ();
5922 if (decl_to_stridxlist_htab)
5924 obstack_free (&stridx_obstack, NULL);
5925 delete decl_to_stridxlist_htab;
5926 decl_to_stridxlist_htab = NULL;
5928 laststmt.stmt = NULL;
5929 laststmt.len = NULL_TREE;
5930 laststmt.stridx = 0;
5932 if (strlen_to_stridx)
5934 strlen_to_stridx->empty ();
5935 delete strlen_to_stridx;
5936 strlen_to_stridx = NULL;
5939 scev_finalize ();
5940 loop_optimizer_finalize ();
5942 return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5945 /* This file defines two passes: one for warnings that runs only when
5946 optimization is disabled, and another that implements optimizations
5947 and also issues warnings. */
5949 const pass_data pass_data_warn_printf =
5951 GIMPLE_PASS, /* type */
5952 "warn-printf", /* name */
5953 OPTGROUP_NONE, /* optinfo_flags */
5954 TV_NONE, /* tv_id */
5955 /* Normally an optimization pass would require PROP_ssa but because
5956 this pass runs early, with no optimization, to do sprintf format
5957 checking, it only requires PROP_cfg. */
5958 PROP_cfg, /* properties_required */
5959 0, /* properties_provided */
5960 0, /* properties_destroyed */
5961 0, /* todo_flags_start */
5962 0, /* todo_flags_finish */
5965 class pass_warn_printf : public gimple_opt_pass
5967 public:
5968 pass_warn_printf (gcc::context *ctxt)
5969 : gimple_opt_pass (pass_data_warn_printf, ctxt)
5972 bool gate (function *) final override;
5973 unsigned int execute (function *fun) final override
5975 return printf_strlen_execute (fun, true);
5980 /* Return true to run the warning pass only when not optimizing and
5981 iff either -Wformat-overflow or -Wformat-truncation is specified. */
5983 bool
5984 pass_warn_printf::gate (function *)
5986 return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5989 const pass_data pass_data_strlen =
5991 GIMPLE_PASS, /* type */
5992 "strlen", /* name */
5993 OPTGROUP_NONE, /* optinfo_flags */
5994 TV_TREE_STRLEN, /* tv_id */
5995 PROP_cfg | PROP_ssa, /* properties_required */
5996 0, /* properties_provided */
5997 0, /* properties_destroyed */
5998 0, /* todo_flags_start */
5999 0, /* todo_flags_finish */
6002 class pass_strlen : public gimple_opt_pass
6004 public:
6005 pass_strlen (gcc::context *ctxt)
6006 : gimple_opt_pass (pass_data_strlen, ctxt)
6009 opt_pass * clone () final override { return new pass_strlen (m_ctxt); }
6011 bool gate (function *) final override;
6012 unsigned int execute (function *fun) final override
6014 return printf_strlen_execute (fun, false);
6018 /* Return true to run the pass only when the sprintf and/or strlen
6019 optimizations are enabled and -Wformat-overflow or -Wformat-truncation
6020 are specified. */
6022 bool
6023 pass_strlen::gate (function *)
6025 return ((warn_format_overflow > 0
6026 || warn_format_trunc > 0
6027 || warn_restrict > 0
6028 || flag_optimize_strlen > 0
6029 || flag_printf_return_value)
6030 && optimize > 0);
6033 } // anon namespace
6035 gimple_opt_pass *
6036 make_pass_warn_printf (gcc::context *ctxt)
6038 return new pass_warn_printf (ctxt);
6041 gimple_opt_pass *
6042 make_pass_strlen (gcc::context *ctxt)
6044 return new pass_strlen (ctxt);