Small fix for -fdump-ada-spec
[official-gcc.git] / gcc / tree-ssa-strlen.cc
blob7508c1768a54fe1f928f67f44abda243bb339509
1 /* String length optimization
2 Copyright (C) 2011-2023 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-access.h"
34 #include "gimple-ssa-warn-restrict.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "gimple-fold.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimplify-me.h"
42 #include "expr.h"
43 #include "tree-cfg.h"
44 #include "tree-dfa.h"
45 #include "domwalk.h"
46 #include "tree-ssa-alias.h"
47 #include "tree-ssa-propagate.h"
48 #include "tree-ssa-strlen.h"
49 #include "tree-hash-traits.h"
50 #include "builtins.h"
51 #include "pointer-query.h"
52 #include "target.h"
53 #include "diagnostic-core.h"
54 #include "diagnostic.h"
55 #include "intl.h"
56 #include "attribs.h"
57 #include "calls.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "vr-values.h"
62 #include "gimple-range.h"
63 #include "tree-ssa.h"
65 /* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
66 is an index into strinfo vector, negative value stands for
67 string length of a string literal (~strlen). */
68 static vec<int> ssa_ver_to_stridx;
70 /* Number of currently active string indexes plus one. */
71 static int max_stridx;
73 /* Set to true to optimize, false when just checking. */
74 static bool strlen_optimize;
76 /* String information record. */
77 struct strinfo
79 /* Number of leading characters that are known to be nonzero. This is
80 also the length of the string if FULL_STRING_P.
82 The values in a list of related string pointers must be consistent;
83 that is, if strinfo B comes X bytes after strinfo A, it must be
84 the case that A->nonzero_chars == X + B->nonzero_chars. */
85 tree nonzero_chars;
86 /* Any of the corresponding pointers for querying alias oracle. */
87 tree ptr;
88 /* STMT is used for two things:
90 - To record the statement that should be used for delayed length
91 computations. We maintain the invariant that all related strinfos
92 have delayed lengths or none do.
94 - To record the malloc or calloc call that produced this result
95 to optimize away malloc/memset sequences. STMT is reset after
96 a calloc-allocated object has been stored a non-zero value into. */
97 gimple *stmt;
98 /* Set to the dynamic allocation statement for the object (alloca,
99 calloc, malloc, or VLA). Unlike STMT, once set for a strinfo
100 object, ALLOC doesn't change. */
101 gimple *alloc;
102 /* Pointer to '\0' if known, if NULL, it can be computed as
103 ptr + length. */
104 tree endptr;
105 /* Reference count. Any changes to strinfo entry possibly shared
106 with dominating basic blocks need unshare_strinfo first, except
107 for dont_invalidate which affects only the immediately next
108 maybe_invalidate. */
109 int refcount;
110 /* Copy of index. get_strinfo (si->idx) should return si; */
111 int idx;
112 /* These 3 fields are for chaining related string pointers together.
113 E.g. for
114 bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
115 strcpy (c, d); e = c + dl;
116 strinfo(a) -> strinfo(c) -> strinfo(e)
117 All have ->first field equal to strinfo(a)->idx and are doubly
118 chained through prev/next fields. The later strinfos are required
119 to point into the same string with zero or more bytes after
120 the previous pointer and all bytes in between the two pointers
121 must be non-zero. Functions like strcpy or memcpy are supposed
122 to adjust all previous strinfo lengths, but not following strinfo
123 lengths (those are uncertain, usually invalidated during
124 maybe_invalidate, except when the alias oracle knows better).
125 Functions like strcat on the other side adjust the whole
126 related strinfo chain.
127 They are updated lazily, so to use the chain the same first fields
128 and si->prev->next == si->idx needs to be verified. */
129 int first;
130 int next;
131 int prev;
132 /* A flag whether the string is known to be written in the current
133 function. */
134 bool writable;
135 /* A flag for the next maybe_invalidate that this strinfo shouldn't
136 be invalidated. Always cleared by maybe_invalidate. */
137 bool dont_invalidate;
138 /* True if the string is known to be nul-terminated after NONZERO_CHARS
139 characters. False is useful when detecting strings that are built
140 up via successive memcpys. */
141 bool full_string_p;
144 /* Pool for allocating strinfo_struct entries. */
145 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
147 /* Vector mapping positive string indexes to strinfo, for the
148 current basic block. The first pointer in the vector is special,
149 it is either NULL, meaning the vector isn't shared, or it is
150 a basic block pointer to the owner basic_block if shared.
151 If some other bb wants to modify the vector, the vector needs
152 to be unshared first, and only the owner bb is supposed to free it. */
153 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
155 /* One OFFSET->IDX mapping. */
156 struct stridxlist
158 struct stridxlist *next;
159 HOST_WIDE_INT offset;
160 int idx;
163 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings. */
164 struct decl_stridxlist_map
166 struct tree_map_base base;
167 struct stridxlist list;
170 /* Hash table for mapping decls to a chained list of offset -> idx
171 mappings. */
172 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
173 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
175 /* Hash table mapping strlen (or strnlen with constant bound and return
176 smaller than bound) calls to stridx instances describing
177 the calls' arguments. Non-null only when warn_stringop_truncation
178 is non-zero. */
179 typedef std::pair<int, location_t> stridx_strlenloc;
180 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
182 /* Obstack for struct stridxlist and struct decl_stridxlist_map. */
183 static struct obstack stridx_obstack;
185 /* Last memcpy statement if it could be adjusted if the trailing
186 '\0' written is immediately overwritten, or
187 *x = '\0' store that could be removed if it is immediately overwritten. */
188 struct laststmt_struct
190 gimple *stmt;
191 tree len;
192 int stridx;
193 } laststmt;
195 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
196 static bool get_range_strlen_dynamic (tree, gimple *, c_strlen_data *,
197 bitmap, pointer_query *, unsigned *);
199 /* Sets MINMAX to either the constant value or the range VAL is in
200 and returns either the constant value or VAL on success or null
201 when the range couldn't be determined. Uses RVALS or CFUN for
202 range info, whichever is nonnull. */
204 tree
205 get_range (tree val, gimple *stmt, wide_int minmax[2],
206 range_query *rvals /* = NULL */)
208 if (!rvals)
210 if (!cfun)
211 /* When called from front ends for global initializers CFUN
212 may be null. */
213 return NULL_TREE;
215 rvals = get_range_query (cfun);
218 value_range vr;
219 if (!rvals->range_of_expr (vr, val, stmt))
220 return NULL_TREE;
222 value_range_kind rng = vr.kind ();
223 if (rng == VR_RANGE)
225 /* Only handle straight ranges. */
226 minmax[0] = wi::to_wide (vr.min ());
227 minmax[1] = wi::to_wide (vr.max ());
228 return val;
231 return NULL_TREE;
234 class strlen_pass : public dom_walker
236 public:
237 strlen_pass (cdi_direction direction)
238 : dom_walker (direction),
239 ptr_qry (&m_ranger),
240 m_cleanup_cfg (false)
244 ~strlen_pass ();
246 edge before_dom_children (basic_block) final override;
247 void after_dom_children (basic_block) final override;
249 bool check_and_optimize_stmt (bool *cleanup_eh);
250 bool check_and_optimize_call (bool *zero_write);
251 bool handle_assign (tree lhs, bool *zero_write);
252 bool handle_store (bool *zero_write);
253 void handle_pointer_plus ();
254 void handle_builtin_strlen ();
255 void handle_builtin_strchr ();
256 void handle_builtin_strcpy (built_in_function);
257 void handle_integral_assign (bool *cleanup_eh);
258 void handle_builtin_stxncpy_strncat (bool append_p);
259 void handle_builtin_memcpy (built_in_function bcode);
260 void handle_builtin_strcat (built_in_function bcode);
261 void handle_builtin_strncat (built_in_function);
262 bool handle_builtin_memset (bool *zero_write);
263 bool handle_builtin_memcmp ();
264 bool handle_builtin_string_cmp ();
265 void handle_alloc_call (built_in_function);
266 void maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
267 strinfo *si = NULL, bool plus_one = false,
268 bool rawmem = false);
269 void maybe_warn_overflow (gimple *stmt, bool call_lhs,
270 unsigned HOST_WIDE_INT len,
271 strinfo *si = NULL,
272 bool plus_one = false, bool rawmem = false);
273 void adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat);
274 tree strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
275 tree arg2, int idx2,
276 unsigned HOST_WIDE_INT bound,
277 unsigned HOST_WIDE_INT len[2],
278 unsigned HOST_WIDE_INT *psize);
279 bool count_nonzero_bytes (tree expr_or_type,
280 gimple *stmt,
281 unsigned lenrange[3], bool *nulterm,
282 bool *allnul, bool *allnonnul);
283 bool count_nonzero_bytes (tree exp,
284 gimple *stmt,
285 unsigned HOST_WIDE_INT offset,
286 unsigned HOST_WIDE_INT nbytes,
287 unsigned lenrange[3], bool *nulterm,
288 bool *allnul, bool *allnonnul,
289 ssa_name_limit_t &snlim);
290 bool count_nonzero_bytes_addr (tree exp,
291 gimple *stmt,
292 unsigned HOST_WIDE_INT offset,
293 unsigned HOST_WIDE_INT nbytes,
294 unsigned lenrange[3], bool *nulterm,
295 bool *allnul, bool *allnonnul,
296 ssa_name_limit_t &snlim);
297 bool get_len_or_size (gimple *stmt, tree arg, int idx,
298 unsigned HOST_WIDE_INT lenrng[2],
299 unsigned HOST_WIDE_INT *size, bool *nulterm);
301 gimple_ranger m_ranger;
303 /* A pointer_query object to store information about pointers and
304 their targets in. */
305 pointer_query ptr_qry;
307 gimple_stmt_iterator m_gsi;
309 /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
310 execute function. */
311 bool m_cleanup_cfg;
314 /* Return:
316 * +1 if SI is known to start with more than OFF nonzero characters.
318 * 0 if SI is known to start with exactly OFF nonzero characters.
320 * -1 if SI either does not start with OFF nonzero characters
321 or the relationship between the number of leading nonzero
322 characters in SI and OFF is unknown. */
324 static int
325 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
327 if (si->nonzero_chars
328 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
329 return compare_tree_int (si->nonzero_chars, off);
330 else
331 return -1;
334 /* Same as above but suitable also for strings with non-constant lengths.
335 Uses RVALS to determine length range. */
337 static int
338 compare_nonzero_chars (strinfo *si, gimple *stmt,
339 unsigned HOST_WIDE_INT off,
340 range_query *rvals)
342 if (!si->nonzero_chars)
343 return -1;
345 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
346 return compare_tree_int (si->nonzero_chars, off);
348 if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
349 return -1;
351 value_range vr;
352 if (!rvals->range_of_expr (vr, si->nonzero_chars, stmt))
353 return -1;
354 value_range_kind rng = vr.kind ();
355 if (rng != VR_RANGE)
356 return -1;
358 /* If the offset is less than the minimum length or if the bounds
359 of the length range are equal return the result of the comparison
360 same as in the constant case. Otherwise return a conservative
361 result. */
362 int cmpmin = compare_tree_int (vr.min (), off);
363 if (cmpmin > 0 || tree_int_cst_equal (vr.min (), vr.max ()))
364 return cmpmin;
366 return -1;
369 /* Return true if SI is known to be a zero-length string. */
371 static inline bool
372 zero_length_string_p (strinfo *si)
374 return si->full_string_p && integer_zerop (si->nonzero_chars);
377 /* Return strinfo vector entry IDX. */
379 static inline strinfo *
380 get_strinfo (int idx)
382 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
383 return NULL;
384 return (*stridx_to_strinfo)[idx];
387 /* Get the next strinfo in the chain after SI, or null if none. */
389 static inline strinfo *
390 get_next_strinfo (strinfo *si)
392 if (si->next == 0)
393 return NULL;
394 strinfo *nextsi = get_strinfo (si->next);
395 if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
396 return NULL;
397 return nextsi;
400 /* Helper function for get_stridx. Return the strinfo index of the address
401 of EXP, which is available in PTR if nonnull. If OFFSET_OUT, it is
402 OK to return the index for some X <= &EXP and store &EXP - X in
403 *OFFSET_OUT. When RVALS is nonnull uses it to determine range
404 information. */
406 static int
407 get_addr_stridx (tree exp, gimple *stmt,
408 tree ptr, unsigned HOST_WIDE_INT *offset_out,
409 range_query *rvals = NULL)
411 HOST_WIDE_INT off;
412 struct stridxlist *list, *last = NULL;
413 tree base;
415 if (!decl_to_stridxlist_htab)
416 return 0;
418 poly_int64 poff;
419 base = get_addr_base_and_unit_offset (exp, &poff);
420 if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
421 return 0;
423 list = decl_to_stridxlist_htab->get (base);
424 if (list == NULL)
425 return 0;
429 if (list->offset == off)
431 if (offset_out)
432 *offset_out = 0;
433 return list->idx;
435 if (list->offset > off)
436 return 0;
437 last = list;
438 list = list->next;
440 while (list);
442 if ((offset_out || ptr) && last && last->idx > 0)
444 unsigned HOST_WIDE_INT rel_off
445 = (unsigned HOST_WIDE_INT) off - last->offset;
446 strinfo *si = get_strinfo (last->idx);
447 if (si && compare_nonzero_chars (si, stmt, rel_off, rvals) >= 0)
449 if (offset_out)
451 *offset_out = rel_off;
452 return last->idx;
454 else
455 return get_stridx_plus_constant (si, rel_off, ptr);
458 return 0;
461 /* Returns string index for EXP. When EXP is an SSA_NAME that refers
462 to a known strinfo with an offset and OFFRNG is non-null, sets
463 both elements of the OFFRNG array to the range of the offset and
464 returns the index of the known strinfo. In this case the result
465 must not be used in for functions that modify the string.
466 When nonnull, uses RVALS to determine range information. */
468 static int
469 get_stridx (tree exp, gimple *stmt,
470 wide_int offrng[2] = NULL, range_query *rvals = NULL)
472 if (offrng)
473 offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
475 if (TREE_CODE (exp) == SSA_NAME)
477 if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
478 return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
480 tree e = exp;
481 int last_idx = 0;
482 HOST_WIDE_INT offset = 0;
483 /* Follow a chain of at most 5 assignments. */
484 for (int i = 0; i < 5; i++)
486 gimple *def_stmt = SSA_NAME_DEF_STMT (e);
487 if (!is_gimple_assign (def_stmt))
488 return last_idx;
490 tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
491 tree ptr, off;
493 if (rhs_code == ADDR_EXPR)
495 /* Handle indices/offsets into VLAs which are implemented
496 as pointers to arrays. */
497 ptr = gimple_assign_rhs1 (def_stmt);
498 ptr = TREE_OPERAND (ptr, 0);
500 /* Handle also VLAs of types larger than char. */
501 if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
503 if (TREE_CODE (ptr) == ARRAY_REF)
505 off = TREE_OPERAND (ptr, 1);
506 ptr = TREE_OPERAND (ptr, 0);
507 if (!integer_onep (eltsize))
509 /* Scale the array index by the size of the element
510 type in the rare case that it's greater than
511 the typical 1 for char, making sure both operands
512 have the same type. */
513 eltsize = fold_convert (ssizetype, eltsize);
514 off = fold_convert (ssizetype, off);
515 off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
518 else
519 off = integer_zero_node;
521 else
522 return 0;
524 if (TREE_CODE (ptr) != MEM_REF)
525 return 0;
527 /* Add the MEM_REF byte offset. */
528 tree mem_off = TREE_OPERAND (ptr, 1);
529 off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
530 ptr = TREE_OPERAND (ptr, 0);
532 else if (rhs_code == POINTER_PLUS_EXPR)
534 ptr = gimple_assign_rhs1 (def_stmt);
535 off = gimple_assign_rhs2 (def_stmt);
537 else
538 return 0;
540 if (TREE_CODE (ptr) != SSA_NAME)
541 return 0;
543 if (!tree_fits_shwi_p (off))
545 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
546 if (offrng)
548 /* Only when requested by setting OFFRNG to non-null,
549 return the index corresponding to the SSA_NAME.
550 Do this irrespective of the whether the offset
551 is known. */
552 if (get_range (off, def_stmt, offrng, rvals))
554 /* When the offset range is known, increment it
555 it by the constant offset computed in prior
556 iterations and store it in the OFFRNG array. */
557 offrng[0] += offset;
558 offrng[1] += offset;
560 else
562 /* When the offset range cannot be determined
563 store [0, SIZE_MAX] and let the caller decide
564 if the offset matters. */
565 offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
566 offrng[0] = wi::zero (offrng[1].get_precision ());
568 return idx;
570 return 0;
573 HOST_WIDE_INT this_off = tree_to_shwi (off);
574 if (offrng)
576 offrng[0] += wi::shwi (this_off, offrng->get_precision ());
577 offrng[1] += offrng[0];
580 if (this_off < 0)
581 return last_idx;
583 offset = (unsigned HOST_WIDE_INT) offset + this_off;
584 if (offset < 0)
585 return last_idx;
587 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
589 strinfo *si = get_strinfo (idx);
590 if (si)
592 if (compare_nonzero_chars (si, offset) >= 0)
593 return get_stridx_plus_constant (si, offset, exp);
595 if (offrng)
596 last_idx = idx;
599 e = ptr;
602 return last_idx;
605 if (TREE_CODE (exp) == ADDR_EXPR)
607 int idx = get_addr_stridx (TREE_OPERAND (exp, 0), stmt, exp, NULL);
608 if (idx != 0)
609 return idx;
612 const char *p = c_getstr (exp);
613 if (p)
614 return ~(int) strlen (p);
616 return 0;
619 /* Return true if strinfo vector is shared with the immediate dominator. */
621 static inline bool
622 strinfo_shared (void)
624 return vec_safe_length (stridx_to_strinfo)
625 && (*stridx_to_strinfo)[0] != NULL;
628 /* Unshare strinfo vector that is shared with the immediate dominator. */
630 static void
631 unshare_strinfo_vec (void)
633 strinfo *si;
634 unsigned int i = 0;
636 gcc_assert (strinfo_shared ());
637 stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
638 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
639 if (si != NULL)
640 si->refcount++;
641 (*stridx_to_strinfo)[0] = NULL;
644 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
645 Return a pointer to the location where the string index can
646 be stored (if 0) or is stored, or NULL if this can't be tracked. */
648 static int *
649 addr_stridxptr (tree exp)
651 HOST_WIDE_INT off;
653 poly_int64 poff;
654 tree base = get_addr_base_and_unit_offset (exp, &poff);
655 if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
656 return NULL;
658 if (!decl_to_stridxlist_htab)
660 decl_to_stridxlist_htab
661 = new hash_map<tree_decl_hash, stridxlist> (64);
662 gcc_obstack_init (&stridx_obstack);
665 bool existed;
666 stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
667 if (existed)
669 int i;
670 stridxlist *before = NULL;
671 for (i = 0; i < 32; i++)
673 if (list->offset == off)
674 return &list->idx;
675 if (list->offset > off && before == NULL)
676 before = list;
677 if (list->next == NULL)
678 break;
679 list = list->next;
681 if (i == 32)
682 return NULL;
683 if (before)
685 list = before;
686 before = XOBNEW (&stridx_obstack, struct stridxlist);
687 *before = *list;
688 list->next = before;
689 list->offset = off;
690 list->idx = 0;
691 return &list->idx;
693 list->next = XOBNEW (&stridx_obstack, struct stridxlist);
694 list = list->next;
697 list->next = NULL;
698 list->offset = off;
699 list->idx = 0;
700 return &list->idx;
703 /* Create a new string index, or return 0 if reached limit. */
705 static int
706 new_stridx (tree exp)
708 int idx;
709 if (max_stridx >= param_max_tracked_strlens)
710 return 0;
711 if (TREE_CODE (exp) == SSA_NAME)
713 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
714 return 0;
715 idx = max_stridx++;
716 ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
717 return idx;
719 if (TREE_CODE (exp) == ADDR_EXPR)
721 int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
722 if (pidx != NULL)
724 gcc_assert (*pidx == 0);
725 *pidx = max_stridx++;
726 return *pidx;
729 return 0;
732 /* Like new_stridx, but for ADDR_EXPR's operand instead. */
734 static int
735 new_addr_stridx (tree exp)
737 int *pidx;
738 if (max_stridx >= param_max_tracked_strlens)
739 return 0;
740 pidx = addr_stridxptr (exp);
741 if (pidx != NULL)
743 gcc_assert (*pidx == 0);
744 *pidx = max_stridx++;
745 return *pidx;
747 return 0;
750 /* Create a new strinfo. */
752 static strinfo *
753 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
755 strinfo *si = strinfo_pool.allocate ();
756 si->nonzero_chars = nonzero_chars;
757 STRIP_USELESS_TYPE_CONVERSION (ptr);
758 si->ptr = ptr;
759 si->stmt = NULL;
760 si->alloc = NULL;
761 si->endptr = NULL_TREE;
762 si->refcount = 1;
763 si->idx = idx;
764 si->first = 0;
765 si->prev = 0;
766 si->next = 0;
767 si->writable = false;
768 si->dont_invalidate = false;
769 si->full_string_p = full_string_p;
770 return si;
773 /* Decrease strinfo refcount and free it if not referenced anymore. */
775 static inline void
776 free_strinfo (strinfo *si)
778 if (si && --si->refcount == 0)
779 strinfo_pool.remove (si);
782 /* Set strinfo in the vector entry IDX to SI. */
784 static inline void
785 set_strinfo (int idx, strinfo *si)
787 if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
788 unshare_strinfo_vec ();
789 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
790 vec_safe_grow_cleared (stridx_to_strinfo, idx + 1, true);
791 (*stridx_to_strinfo)[idx] = si;
794 /* Return the first strinfo in the related strinfo chain
795 if all strinfos in between belong to the chain, otherwise NULL. */
797 static strinfo *
798 verify_related_strinfos (strinfo *origsi)
800 strinfo *si = origsi, *psi;
802 if (origsi->first == 0)
803 return NULL;
804 for (; si->prev; si = psi)
806 if (si->first != origsi->first)
807 return NULL;
808 psi = get_strinfo (si->prev);
809 if (psi == NULL)
810 return NULL;
811 if (psi->next != si->idx)
812 return NULL;
814 if (si->idx != si->first)
815 return NULL;
816 return si;
819 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
820 Use LOC for folding. */
822 static void
823 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
825 si->endptr = endptr;
826 si->stmt = NULL;
827 tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
828 tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
829 si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
830 end_as_size, start_as_size);
831 si->full_string_p = true;
834 /* Return the string length, or NULL if it can't be computed.
835 The length may but need not be constant. Instead, it might be
836 the result of a strlen() call. */
838 static tree
839 get_string_length (strinfo *si)
841 /* If the length has already been computed return it if it's exact
842 (i.e., the string is nul-terminated at NONZERO_CHARS), or return
843 null if it isn't. */
844 if (si->nonzero_chars)
845 return si->full_string_p ? si->nonzero_chars : NULL;
847 /* If the string is the result of one of the built-in calls below
848 attempt to compute the length from the call statement. */
849 if (si->stmt)
851 gimple *stmt = si->stmt, *lenstmt;
852 tree callee, lhs, fn, tem;
853 location_t loc;
854 gimple_stmt_iterator gsi;
856 gcc_assert (is_gimple_call (stmt));
857 callee = gimple_call_fndecl (stmt);
858 gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
859 lhs = gimple_call_lhs (stmt);
860 /* unshare_strinfo is intentionally not called here. The (delayed)
861 transformation of strcpy or strcat into stpcpy is done at the place
862 of the former strcpy/strcat call and so can affect all the strinfos
863 with the same stmt. If they were unshared before and transformation
864 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
865 just compute the right length. */
866 switch (DECL_FUNCTION_CODE (callee))
868 case BUILT_IN_STRCAT:
869 case BUILT_IN_STRCAT_CHK:
870 gsi = gsi_for_stmt (stmt);
871 fn = builtin_decl_implicit (BUILT_IN_STRLEN);
872 gcc_assert (lhs == NULL_TREE);
873 tem = unshare_expr (gimple_call_arg (stmt, 0));
874 lenstmt = gimple_build_call (fn, 1, tem);
875 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
876 gimple_call_set_lhs (lenstmt, lhs);
877 gimple_set_vuse (lenstmt, gimple_vuse (stmt));
878 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
879 tem = gimple_call_arg (stmt, 0);
880 if (!ptrofftype_p (TREE_TYPE (lhs)))
882 lhs = convert_to_ptrofftype (lhs);
883 lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
884 true, GSI_SAME_STMT);
886 lenstmt = gimple_build_assign
887 (make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
888 POINTER_PLUS_EXPR,tem, lhs);
889 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
890 gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
891 lhs = NULL_TREE;
892 /* FALLTHRU */
893 case BUILT_IN_STRCPY:
894 case BUILT_IN_STRCPY_CHK:
895 gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
896 if (gimple_call_num_args (stmt) == 2)
897 fn = builtin_decl_implicit (BUILT_IN_STPCPY);
898 else
899 fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
900 gcc_assert (lhs == NULL_TREE);
901 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
903 fprintf (dump_file, "Optimizing: ");
904 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
906 gimple_call_set_fndecl (stmt, fn);
907 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
908 gimple_call_set_lhs (stmt, lhs);
909 update_stmt (stmt);
910 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
912 fprintf (dump_file, "into: ");
913 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
915 /* FALLTHRU */
916 case BUILT_IN_STPCPY:
917 case BUILT_IN_STPCPY_CHK:
918 gcc_assert (lhs != NULL_TREE);
919 loc = gimple_location (stmt);
920 set_endptr_and_length (loc, si, lhs);
921 for (strinfo *chainsi = verify_related_strinfos (si);
922 chainsi != NULL;
923 chainsi = get_next_strinfo (chainsi))
924 if (chainsi->nonzero_chars == NULL)
925 set_endptr_and_length (loc, chainsi, lhs);
926 break;
927 case BUILT_IN_ALLOCA:
928 case BUILT_IN_ALLOCA_WITH_ALIGN:
929 case BUILT_IN_MALLOC:
930 break;
931 /* BUILT_IN_CALLOC always has si->nonzero_chars set. */
932 default:
933 gcc_unreachable ();
934 break;
938 return si->nonzero_chars;
941 /* Dump strlen data to FP for statement STMT. When non-null, RVALS
942 points to the valuation engine used to calculate ranges, and is
943 used to dump strlen range for non-constant results. */
945 DEBUG_FUNCTION void
946 dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
948 if (stmt)
950 fprintf (fp, "\nDumping strlen pass data after ");
951 print_gimple_expr (fp, stmt, TDF_LINENO);
952 fputc ('\n', fp);
954 else
955 fprintf (fp, "\nDumping strlen pass data\n");
957 fprintf (fp, "max_stridx = %i\n", max_stridx);
958 fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
959 ssa_ver_to_stridx.length ());
960 fprintf (fp, "stridx_to_strinfo");
961 if (stridx_to_strinfo)
963 fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
964 for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
966 if (strinfo *si = (*stridx_to_strinfo)[i])
968 if (!si->idx)
969 continue;
970 fprintf (fp, " idx = %i", si->idx);
971 if (si->ptr)
973 fprintf (fp, ", ptr = ");
974 print_generic_expr (fp, si->ptr);
977 if (si->nonzero_chars)
979 fprintf (fp, ", nonzero_chars = ");
980 print_generic_expr (fp, si->nonzero_chars);
981 if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
983 value_range_kind rng = VR_UNDEFINED;
984 wide_int min, max;
985 if (rvals)
987 value_range vr;
988 rvals->range_of_expr (vr, si->nonzero_chars,
989 si->stmt);
990 rng = vr.kind ();
991 if (range_int_cst_p (&vr))
993 min = wi::to_wide (vr.min ());
994 max = wi::to_wide (vr.max ());
996 else
997 rng = VR_UNDEFINED;
999 else
1001 value_range vr;
1002 get_range_query (cfun)
1003 ->range_of_expr (vr, si->nonzero_chars);
1004 rng = vr.kind ();
1005 if (!vr.undefined_p ())
1007 min = wi::to_wide (vr.min ());
1008 max = wi::to_wide (vr.max ());
1012 if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
1014 fprintf (fp, " %s[%llu, %llu]",
1015 rng == VR_RANGE ? "" : "~",
1016 (long long) min.to_uhwi (),
1017 (long long) max.to_uhwi ());
1022 fprintf (fp, ", refcount = %i", si->refcount);
1023 if (si->stmt)
1025 fprintf (fp, ", stmt = ");
1026 print_gimple_expr (fp, si->stmt, 0);
1028 if (si->alloc)
1030 fprintf (fp, ", alloc = ");
1031 print_gimple_expr (fp, si->alloc, 0);
1033 if (si->writable)
1034 fprintf (fp, ", writable");
1035 if (si->dont_invalidate)
1036 fprintf (fp, ", dont_invalidate");
1037 if (si->full_string_p)
1038 fprintf (fp, ", full_string_p");
1039 if (strinfo *next = get_next_strinfo (si))
1041 fprintf (fp, ", {");
1043 fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
1044 while ((next = get_next_strinfo (next)));
1045 fprintf (fp, "}");
1047 fputs ("\n", fp);
1051 else
1052 fprintf (fp, " = null\n");
1054 fprintf (fp, "decl_to_stridxlist_htab");
1055 if (decl_to_stridxlist_htab)
1057 fputs ("\n", fp);
1058 typedef decl_to_stridxlist_htab_t::iterator iter_t;
1059 for (iter_t it = decl_to_stridxlist_htab->begin ();
1060 it != decl_to_stridxlist_htab->end (); ++it)
1062 tree decl = (*it).first;
1063 stridxlist *list = &(*it).second;
1064 fprintf (fp, " decl = ");
1065 print_generic_expr (fp, decl);
1066 if (list)
1068 fprintf (fp, ", offsets = {");
1069 for (; list; list = list->next)
1070 fprintf (fp, "%lli%s", (long long) list->offset,
1071 list->next ? ", " : "");
1072 fputs ("}", fp);
1074 fputs ("\n", fp);
1077 else
1078 fprintf (fp, " = null\n");
1080 if (laststmt.stmt)
1082 fprintf (fp, "laststmt = ");
1083 print_gimple_expr (fp, laststmt.stmt, 0);
1084 fprintf (fp, ", len = ");
1085 print_generic_expr (fp, laststmt.len);
1086 fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1090 /* Helper of get_range_strlen_dynamic(). See below. */
1092 static bool
1093 get_range_strlen_phi (tree src, gphi *phi,
1094 c_strlen_data *pdata, bitmap visited,
1095 pointer_query *ptr_qry, unsigned *pssa_def_max)
1097 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (src)))
1098 return true;
1100 if (*pssa_def_max == 0)
1101 return false;
1103 --*pssa_def_max;
1105 /* Iterate over the PHI arguments and determine the minimum and maximum
1106 length/size of each and incorporate them into the overall result. */
1107 for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1109 tree arg = gimple_phi_arg_def (phi, i);
1110 if (arg == gimple_phi_result (phi))
1111 continue;
1113 c_strlen_data argdata = { };
1114 if (!get_range_strlen_dynamic (arg, phi, &argdata, visited, ptr_qry,
1115 pssa_def_max))
1117 pdata->maxlen = build_all_ones_cst (size_type_node);
1118 continue;
1121 /* Set the DECL of an unterminated array this argument refers to
1122 if one hasn't been found yet. */
1123 if (!pdata->decl && argdata.decl)
1124 pdata->decl = argdata.decl;
1126 if (!argdata.minlen
1127 || (integer_zerop (argdata.minlen)
1128 && (!argdata.maxbound
1129 || integer_all_onesp (argdata.maxbound))
1130 && integer_all_onesp (argdata.maxlen)))
1132 /* Set the upper bound of the length to unbounded. */
1133 pdata->maxlen = build_all_ones_cst (size_type_node);
1134 continue;
1137 /* Adjust the minimum and maximum length determined so far and
1138 the upper bound on the array size. */
1139 if (TREE_CODE (argdata.minlen) == INTEGER_CST
1140 && (!pdata->minlen
1141 || tree_int_cst_lt (argdata.minlen, pdata->minlen)))
1142 pdata->minlen = argdata.minlen;
1144 if (TREE_CODE (argdata.maxlen) == INTEGER_CST
1145 && (!pdata->maxlen
1146 || (argdata.maxlen
1147 && tree_int_cst_lt (pdata->maxlen, argdata.maxlen))))
1148 pdata->maxlen = argdata.maxlen;
1150 if (!pdata->maxbound
1151 || TREE_CODE (pdata->maxbound) != INTEGER_CST
1152 || (argdata.maxbound
1153 && tree_int_cst_lt (pdata->maxbound, argdata.maxbound)
1154 && !integer_all_onesp (argdata.maxbound)))
1155 pdata->maxbound = argdata.maxbound;
1158 return true;
1161 /* Return the maximum possible length of the string PTR that's less
1162 than MAXLEN given the size of the object of subobject it points
1163 to at the given STMT. MAXLEN is the maximum length of the string
1164 determined so far. Return null when no such maximum can be
1165 determined. */
1167 static tree
1168 get_maxbound (tree ptr, gimple *stmt, offset_int maxlen,
1169 pointer_query *ptr_qry)
1171 access_ref aref;
1172 if (!ptr_qry->get_ref (ptr, stmt, &aref))
1173 return NULL_TREE;
1175 offset_int sizrem = aref.size_remaining ();
1176 if (sizrem <= 0)
1177 return NULL_TREE;
1179 if (sizrem < maxlen)
1180 maxlen = sizrem - 1;
1182 /* Try to determine the maximum from the subobject at the offset.
1183 This handles MEM [&some-struct, member-offset] that's often
1184 the result of folding COMPONENT_REF [some-struct, member]. */
1185 tree reftype = TREE_TYPE (aref.ref);
1186 if (!RECORD_OR_UNION_TYPE_P (reftype)
1187 || aref.offrng[0] != aref.offrng[1]
1188 || !wi::fits_shwi_p (aref.offrng[0]))
1189 return wide_int_to_tree (size_type_node, maxlen);
1191 HOST_WIDE_INT off = aref.offrng[0].to_shwi ();
1192 tree fld = field_at_offset (reftype, NULL_TREE, off);
1193 if (!fld || !DECL_SIZE_UNIT (fld))
1194 return wide_int_to_tree (size_type_node, maxlen);
1196 offset_int size = wi::to_offset (DECL_SIZE_UNIT (fld));
1197 if (maxlen < size)
1198 return wide_int_to_tree (size_type_node, maxlen);
1200 return wide_int_to_tree (size_type_node, size - 1);
1203 /* Attempt to determine the length of the string SRC. On success, store
1204 the length in *PDATA and return true. Otherwise, return false.
1205 VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation
1206 engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME
1207 assignment limit used to prevent runaway recursion. */
1209 static bool
1210 get_range_strlen_dynamic (tree src, gimple *stmt,
1211 c_strlen_data *pdata, bitmap visited,
1212 pointer_query *ptr_qry, unsigned *pssa_def_max)
1214 int idx = get_stridx (src, stmt);
1215 if (!idx)
1217 if (TREE_CODE (src) == SSA_NAME)
1219 gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1220 if (gphi *phi = dyn_cast<gphi *>(def_stmt))
1221 return get_range_strlen_phi (src, phi, pdata, visited, ptr_qry,
1222 pssa_def_max);
1225 /* Return success regardless of the result and handle *PDATA
1226 in the caller. */
1227 get_range_strlen (src, pdata, 1);
1228 return true;
1231 if (idx < 0)
1233 /* SRC is a string of constant length. */
1234 pdata->minlen = build_int_cst (size_type_node, ~idx);
1235 pdata->maxlen = pdata->minlen;
1236 pdata->maxbound = pdata->maxlen;
1237 return true;
1240 if (strinfo *si = get_strinfo (idx))
1242 pdata->minlen = get_string_length (si);
1243 if (!pdata->minlen && si->nonzero_chars)
1245 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1246 pdata->minlen = si->nonzero_chars;
1247 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1249 value_range vr;
1250 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
1251 if (range_int_cst_p (&vr))
1253 pdata->minlen = vr.min ();
1254 pdata->maxlen = vr.max ();
1256 else
1257 pdata->minlen = build_zero_cst (size_type_node);
1259 else
1260 pdata->minlen = build_zero_cst (size_type_node);
1262 tree base = si->ptr;
1263 if (TREE_CODE (base) == ADDR_EXPR)
1264 base = TREE_OPERAND (base, 0);
1266 HOST_WIDE_INT off;
1267 poly_int64 poff;
1268 base = get_addr_base_and_unit_offset (base, &poff);
1269 if (base
1270 && DECL_P (base)
1271 && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1272 && TYPE_SIZE_UNIT (TREE_TYPE (base))
1273 && poff.is_constant (&off))
1275 tree basetype = TREE_TYPE (base);
1276 tree size = TYPE_SIZE_UNIT (basetype);
1277 if (TREE_CODE (size) == INTEGER_CST)
1279 ++off; /* Increment for the terminating nul. */
1280 tree toffset = build_int_cst (size_type_node, off);
1281 pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1282 toffset);
1283 pdata->maxbound = pdata->maxlen;
1285 else
1286 pdata->maxlen = build_all_ones_cst (size_type_node);
1288 else
1289 pdata->maxlen = build_all_ones_cst (size_type_node);
1291 else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1293 value_range vr;
1294 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, stmt);
1295 if (range_int_cst_p (&vr))
1297 pdata->minlen = vr.min ();
1298 pdata->maxlen = vr.max ();
1299 offset_int max = offset_int::from (vr.upper_bound (0), SIGNED);
1300 if (tree maxbound = get_maxbound (si->ptr, stmt, max, ptr_qry))
1301 pdata->maxbound = maxbound;
1302 else
1303 pdata->maxbound = pdata->maxlen;
1305 else
1307 pdata->minlen = build_zero_cst (size_type_node);
1308 pdata->maxlen = build_all_ones_cst (size_type_node);
1311 else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1313 pdata->maxlen = pdata->minlen;
1314 pdata->maxbound = pdata->minlen;
1316 else
1318 /* For PDATA->MINLEN that's a non-constant expression such
1319 as PLUS_EXPR whose value range is unknown, set the bounds
1320 to zero and SIZE_MAX. */
1321 pdata->minlen = build_zero_cst (size_type_node);
1322 pdata->maxlen = build_all_ones_cst (size_type_node);
1325 return true;
1328 return false;
1331 /* Analogous to get_range_strlen but for dynamically created strings,
1332 i.e., those created by calls to strcpy as opposed to just string
1333 constants.
1334 Try to obtain the range of the lengths of the string(s) referenced
1335 by SRC, or the size of the largest array SRC refers to if the range
1336 of lengths cannot be determined, and store all in *PDATA. RVALS
1337 points to the valuation engine used to calculate ranges. */
1339 void
1340 get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1341 pointer_query &ptr_qry)
1343 auto_bitmap visited;
1344 tree maxbound = pdata->maxbound;
1346 unsigned limit = param_ssa_name_def_chain_limit;
1347 if (!get_range_strlen_dynamic (src, stmt, pdata, visited, &ptr_qry, &limit))
1349 /* On failure extend the length range to an impossible maximum
1350 (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other
1351 members can stay unchanged regardless. */
1352 pdata->minlen = ssize_int (0);
1353 pdata->maxlen = build_all_ones_cst (size_type_node);
1355 else if (!pdata->minlen)
1356 pdata->minlen = ssize_int (0);
1358 /* If it's unchanged from it initial non-null value, set the conservative
1359 MAXBOUND to SIZE_MAX. Otherwise leave it null (if it is null). */
1360 if (maxbound && pdata->maxbound == maxbound)
1361 pdata->maxbound = build_all_ones_cst (size_type_node);
1364 /* Invalidate string length information for strings whose length might
1365 change due to stores in STMT, except those marked DONT_INVALIDATE.
1366 For string-modifying statements, ZERO_WRITE is set when the statement
1367 wrote only zeros.
1368 Returns true if any STRIDX_TO_STRINFO entries were considered
1369 for invalidation. */
1371 static bool
1372 maybe_invalidate (gimple *stmt, bool zero_write = false)
1374 if (dump_file && (dump_flags & TDF_DETAILS))
1376 fprintf (dump_file, "%s called for ", __func__);
1377 print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1380 strinfo *si;
1381 bool nonempty = false;
1383 for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1385 if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1386 continue;
1388 nonempty = true;
1390 /* Unconditionally reset DONT_INVALIDATE. */
1391 bool dont_invalidate = si->dont_invalidate;
1392 si->dont_invalidate = false;
1394 if (dont_invalidate)
1395 continue;
1397 ao_ref r;
1398 tree size = si->nonzero_chars;
1399 ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1400 /* Include the terminating nul in the size of the string
1401 to consider when determining possible clobber. But do not
1402 add it to 'size' since we don't know whether it would
1403 actually fit the allocated area. */
1404 if (known_size_p (r.size))
1406 if (known_le (r.size, HOST_WIDE_INT_MAX - BITS_PER_UNIT))
1407 r.max_size += BITS_PER_UNIT;
1408 else
1409 r.max_size = -1;
1411 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1413 if (dump_file && (dump_flags & TDF_DETAILS))
1415 fputs (" statement may clobber object ", dump_file);
1416 print_generic_expr (dump_file, si->ptr);
1417 if (size && tree_fits_uhwi_p (size))
1418 fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1419 " bytes in size", tree_to_uhwi (size));
1420 fputc ('\n', dump_file);
1423 set_strinfo (i, NULL);
1424 free_strinfo (si);
1425 continue;
1428 if (size
1429 && !zero_write
1430 && si->stmt
1431 && is_gimple_call (si->stmt)
1432 && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1433 == BUILT_IN_CALLOC))
1435 /* If the clobber test above considered the length of
1436 the string (including the nul), then for (potentially)
1437 non-zero writes that might modify storage allocated by
1438 calloc consider the whole object and if it might be
1439 clobbered by the statement reset the statement. */
1440 ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1441 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1442 si->stmt = NULL;
1446 if (dump_file && (dump_flags & TDF_DETAILS))
1447 fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1449 return nonempty;
1452 /* Unshare strinfo record SI, if it has refcount > 1 or
1453 if stridx_to_strinfo vector is shared with some other
1454 bbs. */
1456 static strinfo *
1457 unshare_strinfo (strinfo *si)
1459 strinfo *nsi;
1461 if (si->refcount == 1 && !strinfo_shared ())
1462 return si;
1464 nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1465 nsi->stmt = si->stmt;
1466 nsi->alloc = si->alloc;
1467 nsi->endptr = si->endptr;
1468 nsi->first = si->first;
1469 nsi->prev = si->prev;
1470 nsi->next = si->next;
1471 nsi->writable = si->writable;
1472 set_strinfo (si->idx, nsi);
1473 free_strinfo (si);
1474 return nsi;
1477 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1478 strinfo if there is any. Return it's idx, or 0 if no strinfo has
1479 been created. */
1481 static int
1482 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1483 tree ptr)
1485 if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1486 return 0;
1488 if (compare_nonzero_chars (basesi, off) < 0
1489 || !tree_fits_uhwi_p (basesi->nonzero_chars))
1490 return 0;
1492 unsigned HOST_WIDE_INT nonzero_chars
1493 = tree_to_uhwi (basesi->nonzero_chars) - off;
1494 strinfo *si = basesi, *chainsi;
1495 if (si->first || si->prev || si->next)
1496 si = verify_related_strinfos (basesi);
1497 if (si == NULL
1498 || si->nonzero_chars == NULL_TREE
1499 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1500 return 0;
1502 if (TREE_CODE (ptr) == SSA_NAME
1503 && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1504 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1506 gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1507 for (chainsi = si; chainsi->next; chainsi = si)
1509 si = get_next_strinfo (chainsi);
1510 if (si == NULL
1511 || si->nonzero_chars == NULL_TREE
1512 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1513 break;
1514 int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1515 if (r != 1)
1517 if (r == 0)
1519 if (TREE_CODE (ptr) == SSA_NAME)
1520 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1521 else
1523 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1524 if (pidx != NULL && *pidx == 0)
1525 *pidx = si->idx;
1527 return si->idx;
1529 break;
1533 int idx = new_stridx (ptr);
1534 if (idx == 0)
1535 return 0;
1536 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1537 basesi->full_string_p);
1538 set_strinfo (idx, si);
1539 if (strinfo *nextsi = get_strinfo (chainsi->next))
1541 nextsi = unshare_strinfo (nextsi);
1542 si->next = nextsi->idx;
1543 nextsi->prev = idx;
1545 chainsi = unshare_strinfo (chainsi);
1546 if (chainsi->first == 0)
1547 chainsi->first = chainsi->idx;
1548 chainsi->next = idx;
1549 if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1550 chainsi->endptr = ptr;
1551 si->endptr = chainsi->endptr;
1552 si->prev = chainsi->idx;
1553 si->first = chainsi->first;
1554 si->writable = chainsi->writable;
1555 return si->idx;
1558 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1559 to a zero-length string and if possible chain it to a related strinfo
1560 chain whose part is or might be CHAINSI. */
1562 static strinfo *
1563 zero_length_string (tree ptr, strinfo *chainsi)
1565 strinfo *si;
1566 int idx;
1567 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1568 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1569 gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1570 && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1572 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1573 return NULL;
1574 if (chainsi != NULL)
1576 si = verify_related_strinfos (chainsi);
1577 if (si)
1581 /* We shouldn't mix delayed and non-delayed lengths. */
1582 gcc_assert (si->full_string_p);
1583 if (si->endptr == NULL_TREE)
1585 si = unshare_strinfo (si);
1586 si->endptr = ptr;
1588 chainsi = si;
1589 si = get_next_strinfo (si);
1591 while (si != NULL);
1592 if (zero_length_string_p (chainsi))
1594 if (chainsi->next)
1596 chainsi = unshare_strinfo (chainsi);
1597 chainsi->next = 0;
1599 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1600 return chainsi;
1603 else
1605 /* We shouldn't mix delayed and non-delayed lengths. */
1606 gcc_assert (chainsi->full_string_p);
1607 if (chainsi->first || chainsi->prev || chainsi->next)
1609 chainsi = unshare_strinfo (chainsi);
1610 chainsi->first = 0;
1611 chainsi->prev = 0;
1612 chainsi->next = 0;
1616 idx = new_stridx (ptr);
1617 if (idx == 0)
1618 return NULL;
1619 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1620 set_strinfo (idx, si);
1621 si->endptr = ptr;
1622 if (chainsi != NULL)
1624 chainsi = unshare_strinfo (chainsi);
1625 if (chainsi->first == 0)
1626 chainsi->first = chainsi->idx;
1627 chainsi->next = idx;
1628 if (chainsi->endptr == NULL_TREE)
1629 chainsi->endptr = ptr;
1630 si->prev = chainsi->idx;
1631 si->first = chainsi->first;
1632 si->writable = chainsi->writable;
1634 return si;
1637 /* For strinfo ORIGSI whose length has been just updated, adjust other
1638 related strinfos so that they match the new ORIGSI. This involves:
1640 - adding ADJ to the nonzero_chars fields
1641 - copying full_string_p from the new ORIGSI. */
1643 static void
1644 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1646 strinfo *si = verify_related_strinfos (origsi);
1648 if (si == NULL)
1649 return;
1651 while (1)
1653 strinfo *nsi;
1655 if (si != origsi)
1657 tree tem;
1659 si = unshare_strinfo (si);
1660 /* We shouldn't see delayed lengths here; the caller must
1661 have calculated the old length in order to calculate
1662 the adjustment. */
1663 gcc_assert (si->nonzero_chars);
1664 tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1665 si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1666 TREE_TYPE (si->nonzero_chars),
1667 si->nonzero_chars, tem);
1668 si->full_string_p = origsi->full_string_p;
1670 si->endptr = NULL_TREE;
1671 si->dont_invalidate = true;
1673 nsi = get_next_strinfo (si);
1674 if (nsi == NULL)
1675 return;
1676 si = nsi;
1680 /* Find if there are other SSA_NAME pointers equal to PTR
1681 for which we don't track their string lengths yet. If so, use
1682 IDX for them. */
1684 static void
1685 find_equal_ptrs (tree ptr, int idx)
1687 if (TREE_CODE (ptr) != SSA_NAME)
1688 return;
1689 while (1)
1691 gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1692 if (!is_gimple_assign (stmt))
1693 return;
1694 ptr = gimple_assign_rhs1 (stmt);
1695 switch (gimple_assign_rhs_code (stmt))
1697 case SSA_NAME:
1698 break;
1699 CASE_CONVERT:
1700 if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1701 return;
1702 if (TREE_CODE (ptr) == SSA_NAME)
1703 break;
1704 if (TREE_CODE (ptr) != ADDR_EXPR)
1705 return;
1706 /* FALLTHRU */
1707 case ADDR_EXPR:
1709 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1710 if (pidx != NULL && *pidx == 0)
1711 *pidx = idx;
1712 return;
1714 default:
1715 return;
1718 /* We might find an endptr created in this pass. Grow the
1719 vector in that case. */
1720 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1721 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1723 if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1724 return;
1725 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1729 /* Return true if STMT is a call to a builtin function with the right
1730 arguments and attributes that should be considered for optimization
1731 by this pass. */
1733 static bool
1734 valid_builtin_call (gimple *stmt)
1736 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1737 return false;
1739 tree callee = gimple_call_fndecl (stmt);
1740 switch (DECL_FUNCTION_CODE (callee))
1742 case BUILT_IN_MEMCMP:
1743 case BUILT_IN_MEMCMP_EQ:
1744 case BUILT_IN_STRCMP:
1745 case BUILT_IN_STRNCMP:
1746 case BUILT_IN_STRCHR:
1747 case BUILT_IN_STRLEN:
1748 case BUILT_IN_STRNLEN:
1749 /* The above functions should be pure. Punt if they aren't. */
1750 if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1751 return false;
1752 break;
1754 case BUILT_IN_ALLOCA:
1755 case BUILT_IN_ALLOCA_WITH_ALIGN:
1756 case BUILT_IN_CALLOC:
1757 case BUILT_IN_MALLOC:
1758 case BUILT_IN_MEMCPY:
1759 case BUILT_IN_MEMCPY_CHK:
1760 case BUILT_IN_MEMPCPY:
1761 case BUILT_IN_MEMPCPY_CHK:
1762 case BUILT_IN_MEMSET:
1763 case BUILT_IN_STPCPY:
1764 case BUILT_IN_STPCPY_CHK:
1765 case BUILT_IN_STPNCPY:
1766 case BUILT_IN_STPNCPY_CHK:
1767 case BUILT_IN_STRCAT:
1768 case BUILT_IN_STRCAT_CHK:
1769 case BUILT_IN_STRCPY:
1770 case BUILT_IN_STRCPY_CHK:
1771 case BUILT_IN_STRNCAT:
1772 case BUILT_IN_STRNCAT_CHK:
1773 case BUILT_IN_STRNCPY:
1774 case BUILT_IN_STRNCPY_CHK:
1775 /* The above functions should be neither const nor pure. Punt if they
1776 aren't. */
1777 if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1778 return false;
1779 break;
1781 default:
1782 break;
1785 return true;
1788 /* If the last .MEM setter statement before STMT is
1789 memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1790 and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1791 just memcpy (x, y, strlen (y)). SI must be the zero length
1792 strinfo. */
1794 void
1795 strlen_pass::adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1797 tree vuse, callee, len;
1798 struct laststmt_struct last = laststmt;
1799 strinfo *lastsi, *firstsi;
1800 unsigned len_arg_no = 2;
1802 laststmt.stmt = NULL;
1803 laststmt.len = NULL_TREE;
1804 laststmt.stridx = 0;
1806 if (last.stmt == NULL)
1807 return;
1809 vuse = gimple_vuse (stmt);
1810 if (vuse == NULL_TREE
1811 || SSA_NAME_DEF_STMT (vuse) != last.stmt
1812 || !has_single_use (vuse))
1813 return;
1815 gcc_assert (last.stridx > 0);
1816 lastsi = get_strinfo (last.stridx);
1817 if (lastsi == NULL)
1818 return;
1820 if (lastsi != si)
1822 if (lastsi->first == 0 || lastsi->first != si->first)
1823 return;
1825 firstsi = verify_related_strinfos (si);
1826 if (firstsi == NULL)
1827 return;
1828 while (firstsi != lastsi)
1830 firstsi = get_next_strinfo (firstsi);
1831 if (firstsi == NULL)
1832 return;
1836 if (!is_strcat && !zero_length_string_p (si))
1837 return;
1839 if (is_gimple_assign (last.stmt))
1841 gimple_stmt_iterator gsi;
1843 if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1844 return;
1845 if (stmt_could_throw_p (cfun, last.stmt))
1846 return;
1847 gsi = gsi_for_stmt (last.stmt);
1848 unlink_stmt_vdef (last.stmt);
1849 release_defs (last.stmt);
1850 gsi_remove (&gsi, true);
1851 return;
1854 if (!valid_builtin_call (last.stmt))
1855 return;
1857 callee = gimple_call_fndecl (last.stmt);
1858 switch (DECL_FUNCTION_CODE (callee))
1860 case BUILT_IN_MEMCPY:
1861 case BUILT_IN_MEMCPY_CHK:
1862 break;
1863 default:
1864 return;
1867 len = gimple_call_arg (last.stmt, len_arg_no);
1868 if (tree_fits_uhwi_p (len))
1870 if (!tree_fits_uhwi_p (last.len)
1871 || integer_zerop (len)
1872 || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1873 return;
1874 /* Don't adjust the length if it is divisible by 4, it is more efficient
1875 to store the extra '\0' in that case. */
1876 if ((tree_to_uhwi (len) & 3) == 0)
1877 return;
1879 /* Don't fold away an out of bounds access, as this defeats proper
1880 warnings. */
1881 tree dst = gimple_call_arg (last.stmt, 0);
1883 access_ref aref;
1884 tree size = compute_objsize (dst, stmt, 1, &aref, &ptr_qry);
1885 if (size && tree_int_cst_lt (size, len))
1886 return;
1888 else if (TREE_CODE (len) == SSA_NAME)
1890 gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1891 if (!is_gimple_assign (def_stmt)
1892 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1893 || gimple_assign_rhs1 (def_stmt) != last.len
1894 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1895 return;
1897 else
1898 return;
1900 gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1901 update_stmt (last.stmt);
1904 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1905 call, or when BOUND is non-null, of a strnlen() call, set LHS
1906 range info to [0, min (MAX, BOUND)] when the range includes more
1907 than one value and return LHS. Otherwise, when the range
1908 [MIN, MAX] is such that MIN == MAX, return the tree representation
1909 of (MIN). The latter allows callers to fold suitable strnlen() calls
1910 to constants. */
1912 tree
1913 set_strlen_range (tree lhs, wide_int min, wide_int max,
1914 tree bound /* = NULL_TREE */)
1916 if (TREE_CODE (lhs) != SSA_NAME
1917 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1918 return NULL_TREE;
1920 if (bound)
1922 /* For strnlen, adjust MIN and MAX as necessary. If the bound
1923 is less than the size of the array set MAX to it. It it's
1924 greater than MAX and MAX is non-zero bump MAX down to account
1925 for the necessary terminating nul. Otherwise leave it alone. */
1926 if (TREE_CODE (bound) == INTEGER_CST)
1928 wide_int wibnd = wi::to_wide (bound);
1929 int cmp = wi::cmpu (wibnd, max);
1930 if (cmp < 0)
1931 max = wibnd;
1932 else if (cmp && wi::ne_p (max, min))
1933 --max;
1935 else if (TREE_CODE (bound) == SSA_NAME)
1937 value_range r;
1938 get_range_query (cfun)->range_of_expr (r, bound);
1939 if (!r.undefined_p ())
1941 /* For a bound in a known range, adjust the range determined
1942 above as necessary. For a bound in some anti-range or
1943 in an unknown range, use the range determined by callers. */
1944 if (wi::ltu_p (r.lower_bound (), min))
1945 min = r.lower_bound ();
1946 if (wi::ltu_p (r.upper_bound (), max))
1947 max = r.upper_bound ();
1952 if (min == max)
1953 return wide_int_to_tree (size_type_node, min);
1955 value_range vr (TREE_TYPE (lhs), min, max);
1956 set_range_info (lhs, vr);
1957 return lhs;
1960 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1961 SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1962 a character array A[N] with unknown length bounded by N, and for
1963 strnlen(), by min (N, BOUND). */
1965 static tree
1966 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1968 if (TREE_CODE (lhs) != SSA_NAME
1969 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1970 return NULL_TREE;
1972 if (TREE_CODE (src) == SSA_NAME)
1974 gimple *def = SSA_NAME_DEF_STMT (src);
1975 if (is_gimple_assign (def)
1976 && gimple_assign_rhs_code (def) == ADDR_EXPR)
1977 src = gimple_assign_rhs1 (def);
1980 /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1981 NUL so that the difference between a pointer to just past it and
1982 one to its beginning is positive. */
1983 wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1985 if (TREE_CODE (src) == ADDR_EXPR)
1987 /* The last array member of a struct can be bigger than its size
1988 suggests if it's treated as a poor-man's flexible array member. */
1989 src = TREE_OPERAND (src, 0);
1990 if (TREE_CODE (src) != MEM_REF
1991 && !array_ref_flexible_size_p (src))
1993 tree type = TREE_TYPE (src);
1994 tree size = TYPE_SIZE_UNIT (type);
1995 if (size
1996 && TREE_CODE (size) == INTEGER_CST
1997 && !integer_zerop (size))
1999 /* Even though such uses of strlen would be undefined,
2000 avoid relying on arrays of arrays in case some genius
2001 decides to call strlen on an unterminated array element
2002 that's followed by a terminated one. Likewise, avoid
2003 assuming that a struct array member is necessarily
2004 nul-terminated (the nul may be in the member that
2005 follows). In those cases, assume that the length
2006 of the string stored in such an array is bounded
2007 by the size of the enclosing object if one can be
2008 determined. */
2009 tree base = get_base_address (src);
2010 if (VAR_P (base))
2012 if (tree size = DECL_SIZE_UNIT (base))
2013 if (size
2014 && TREE_CODE (size) == INTEGER_CST
2015 && TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
2016 max = wi::to_wide (size);
2020 /* For strlen() the upper bound above is equal to
2021 the longest string that can be stored in the array
2022 (i.e., it accounts for the terminating nul. For
2023 strnlen() bump up the maximum by one since the array
2024 need not be nul-terminated. */
2025 if (!bound && max != 0)
2026 --max;
2030 wide_int min = wi::zero (max.get_precision ());
2031 return set_strlen_range (lhs, min, max, bound);
2034 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
2035 either into a region allocated for the object SI when non-null,
2036 or into an object designated by the LHS of STMT otherwise.
2037 For a call STMT, when CALL_LHS is set use its left hand side
2038 as the destination, otherwise use argument zero.
2039 When nonnull uses RVALS to determine range information.
2040 RAWMEM may be set by memcpy and other raw memory functions
2041 to allow accesses across subobject boundaries. */
2043 void
2044 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
2045 strinfo *si, bool plus_one, bool rawmem)
2047 if (!len || warning_suppressed_p (stmt, OPT_Wstringop_overflow_))
2048 return;
2050 /* The DECL of the function performing the write if it is done
2051 by one. */
2052 tree writefn = NULL_TREE;
2053 /* The destination expression involved in the store or call STMT. */
2054 tree dest = NULL_TREE;
2056 if (is_gimple_assign (stmt))
2057 dest = gimple_assign_lhs (stmt);
2058 else if (is_gimple_call (stmt))
2060 if (call_lhs)
2061 dest = gimple_call_lhs (stmt);
2062 else
2064 gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL));
2065 dest = gimple_call_arg (stmt, 0);
2068 if (!dest)
2069 return;
2070 writefn = gimple_call_fndecl (stmt);
2072 else
2073 return;
2075 if (warning_suppressed_p (dest, OPT_Wstringop_overflow_))
2076 return;
2078 const int ostype = rawmem ? 0 : 1;
2080 /* Use maximum precision to avoid overflow in the addition below.
2081 Make sure all operands have the same precision to keep wide_int
2082 from ICE'ing. */
2084 access_ref aref;
2085 /* The size of the destination region (which is smaller than
2086 the destination object for stores at a non-zero offset). */
2087 tree destsize = compute_objsize (dest, stmt, ostype, &aref, &ptr_qry);
2089 if (!destsize)
2091 aref.sizrng[0] = 0;
2092 aref.sizrng[1] = wi::to_offset (max_object_size ());
2095 /* Return early if the DESTSIZE size expression is the same as LEN
2096 and the offset into the destination is zero. This might happen
2097 in the case of a pair of malloc and memset calls to allocate
2098 an object and clear it as if by calloc. */
2099 if (destsize == len && !plus_one
2100 && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
2101 return;
2103 wide_int rng[2];
2104 if (!get_range (len, stmt, rng, ptr_qry.rvals))
2105 return;
2107 widest_int lenrng[2] =
2108 { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2110 if (plus_one)
2112 lenrng[0] += 1;
2113 lenrng[1] += 1;
2116 /* The size of the remaining space in the destination computed
2117 as the size of the latter minus the offset into it. */
2118 widest_int spcrng[2];
2120 offset_int remrng[2];
2121 remrng[1] = aref.size_remaining (remrng);
2122 spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (remrng[0], UNSIGNED);
2123 spcrng[1] = widest_int::from (remrng[1], UNSIGNED);
2126 if (wi::leu_p (lenrng[0], spcrng[0])
2127 && wi::leu_p (lenrng[1], spcrng[1]))
2128 return;
2130 location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2131 bool warned = false;
2132 if (wi::leu_p (lenrng[0], spcrng[1]))
2134 if (len != destsize
2135 && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2136 return;
2138 warned = (writefn
2139 ? warning_at (loc, OPT_Wstringop_overflow_,
2140 "%qD writing one too many bytes into a region "
2141 "of a size that depends on %<strlen%>",
2142 writefn)
2143 : warning_at (loc, OPT_Wstringop_overflow_,
2144 "writing one too many bytes into a region "
2145 "of a size that depends on %<strlen%>"));
2147 else if (lenrng[0] == lenrng[1])
2149 if (spcrng[0] == spcrng[1])
2150 warned = (writefn
2151 ? warning_n (loc, OPT_Wstringop_overflow_,
2152 lenrng[0].to_uhwi (),
2153 "%qD writing %wu byte into a region "
2154 "of size %wu",
2155 "%qD writing %wu bytes into a region "
2156 "of size %wu",
2157 writefn, lenrng[0].to_uhwi (),
2158 spcrng[0].to_uhwi ())
2159 : warning_n (loc, OPT_Wstringop_overflow_,
2160 lenrng[0].to_uhwi (),
2161 "writing %wu byte into a region "
2162 "of size %wu",
2163 "writing %wu bytes into a region "
2164 "of size %wu",
2165 lenrng[0].to_uhwi (),
2166 spcrng[0].to_uhwi ()));
2167 else
2168 warned = (writefn
2169 ? warning_n (loc, OPT_Wstringop_overflow_,
2170 lenrng[0].to_uhwi (),
2171 "%qD writing %wu byte into a region "
2172 "of size between %wu and %wu",
2173 "%qD writing %wu bytes into a region "
2174 "of size between %wu and %wu",
2175 writefn, lenrng[0].to_uhwi (),
2176 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2177 : warning_n (loc, OPT_Wstringop_overflow_,
2178 lenrng[0].to_uhwi (),
2179 "writing %wu byte into a region "
2180 "of size between %wu and %wu",
2181 "writing %wu bytes into a region "
2182 "of size between %wu and %wu",
2183 lenrng[0].to_uhwi (),
2184 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2186 else if (spcrng[0] == spcrng[1])
2187 warned = (writefn
2188 ? warning_at (loc, OPT_Wstringop_overflow_,
2189 "%qD writing between %wu and %wu bytes "
2190 "into a region of size %wu",
2191 writefn, lenrng[0].to_uhwi (),
2192 lenrng[1].to_uhwi (),
2193 spcrng[0].to_uhwi ())
2194 : warning_at (loc, OPT_Wstringop_overflow_,
2195 "writing between %wu and %wu bytes "
2196 "into a region of size %wu",
2197 lenrng[0].to_uhwi (),
2198 lenrng[1].to_uhwi (),
2199 spcrng[0].to_uhwi ()));
2200 else
2201 warned = (writefn
2202 ? warning_at (loc, OPT_Wstringop_overflow_,
2203 "%qD writing between %wu and %wu bytes "
2204 "into a region of size between %wu and %wu",
2205 writefn, lenrng[0].to_uhwi (),
2206 lenrng[1].to_uhwi (),
2207 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2208 : warning_at (loc, OPT_Wstringop_overflow_,
2209 "writing between %wu and %wu bytes "
2210 "into a region of size between %wu and %wu",
2211 lenrng[0].to_uhwi (),
2212 lenrng[1].to_uhwi (),
2213 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2215 if (!warned)
2216 return;
2218 suppress_warning (stmt, OPT_Wstringop_overflow_);
2220 aref.inform_access (access_write_only);
2223 /* Convenience wrapper for the above. */
2225 void
2226 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs,
2227 unsigned HOST_WIDE_INT len,
2228 strinfo *si, bool plus_one, bool rawmem)
2230 tree tlen = build_int_cst (size_type_node, len);
2231 maybe_warn_overflow (stmt, call_lhs, tlen, si, plus_one, rawmem);
2234 /* Handle a strlen call. If strlen of the argument is known, replace
2235 the strlen call with the known value, otherwise remember that strlen
2236 of the argument is stored in the lhs SSA_NAME. */
2238 void
2239 strlen_pass::handle_builtin_strlen ()
2241 gimple *stmt = gsi_stmt (m_gsi);
2242 tree lhs = gimple_call_lhs (stmt);
2244 if (lhs == NULL_TREE)
2245 return;
2247 location_t loc = gimple_location (stmt);
2248 tree callee = gimple_call_fndecl (stmt);
2249 tree src = gimple_call_arg (stmt, 0);
2250 tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2251 ? gimple_call_arg (stmt, 1) : NULL_TREE);
2252 int idx = get_stridx (src, stmt);
2253 if (idx || (bound && integer_zerop (bound)))
2255 strinfo *si = NULL;
2256 tree rhs;
2258 if (idx < 0)
2259 rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2260 else if (idx == 0)
2261 rhs = bound;
2262 else
2264 rhs = NULL_TREE;
2265 si = get_strinfo (idx);
2266 if (si != NULL)
2268 rhs = get_string_length (si);
2269 /* For strnlen, if bound is constant, even if si is not known
2270 to be zero terminated, if we know at least bound bytes are
2271 not zero, the return value will be bound. */
2272 if (rhs == NULL_TREE
2273 && bound != NULL_TREE
2274 && TREE_CODE (bound) == INTEGER_CST
2275 && si->nonzero_chars != NULL_TREE
2276 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2277 && tree_int_cst_le (bound, si->nonzero_chars))
2278 rhs = bound;
2281 if (rhs != NULL_TREE)
2283 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2285 fprintf (dump_file, "Optimizing: ");
2286 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2288 rhs = unshare_expr (rhs);
2289 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2290 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2292 if (bound)
2293 rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2295 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2296 stmt = gsi_stmt (m_gsi);
2297 update_stmt (stmt);
2298 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2300 fprintf (dump_file, "into: ");
2301 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2304 if (si != NULL
2305 /* Don't update anything for strnlen. */
2306 && bound == NULL_TREE
2307 && TREE_CODE (si->nonzero_chars) != SSA_NAME
2308 && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2309 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2311 si = unshare_strinfo (si);
2312 si->nonzero_chars = lhs;
2313 gcc_assert (si->full_string_p);
2316 if (strlen_to_stridx
2317 && (bound == NULL_TREE
2318 /* For strnlen record this only if the call is proven
2319 to return the same value as strlen would. */
2320 || (TREE_CODE (bound) == INTEGER_CST
2321 && TREE_CODE (rhs) == INTEGER_CST
2322 && tree_int_cst_lt (rhs, bound))))
2323 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2325 return;
2328 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2329 return;
2331 if (idx == 0)
2332 idx = new_stridx (src);
2333 else
2335 strinfo *si = get_strinfo (idx);
2336 if (si != NULL)
2338 if (!si->full_string_p && !si->stmt)
2340 /* Until now we only had a lower bound on the string length.
2341 Install LHS as the actual length. */
2342 si = unshare_strinfo (si);
2343 tree old = si->nonzero_chars;
2344 si->nonzero_chars = lhs;
2345 si->full_string_p = true;
2346 if (old && TREE_CODE (old) == INTEGER_CST)
2348 old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2349 tree adj = fold_build2_loc (loc, MINUS_EXPR,
2350 TREE_TYPE (lhs), lhs, old);
2351 adjust_related_strinfos (loc, si, adj);
2352 /* Use the constant minimum length as the lower bound
2353 of the non-constant length. */
2354 wide_int min = wi::to_wide (old);
2355 wide_int max
2356 = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2357 set_strlen_range (lhs, min, max);
2359 else
2361 si->first = 0;
2362 si->prev = 0;
2363 si->next = 0;
2366 return;
2369 if (idx)
2371 if (!bound)
2373 /* Only store the new length information for calls to strlen(),
2374 not for those to strnlen(). */
2375 strinfo *si = new_strinfo (src, idx, lhs, true);
2376 set_strinfo (idx, si);
2377 find_equal_ptrs (src, idx);
2380 /* For SRC that is an array of N elements, set LHS's range
2381 to [0, min (N, BOUND)]. A constant return value means
2382 the range would have consisted of a single value. In
2383 that case, fold the result into the returned constant. */
2384 if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2385 if (TREE_CODE (ret) == INTEGER_CST)
2387 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2389 fprintf (dump_file, "Optimizing: ");
2390 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2392 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2393 ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2394 gimplify_and_update_call_from_tree (&m_gsi, ret);
2395 stmt = gsi_stmt (m_gsi);
2396 update_stmt (stmt);
2397 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2399 fprintf (dump_file, "into: ");
2400 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2404 if (strlen_to_stridx && !bound)
2405 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2409 /* Handle a strchr call. If strlen of the first argument is known, replace
2410 the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2411 that lhs of the call is endptr and strlen of the argument is endptr - x. */
2413 void
2414 strlen_pass::handle_builtin_strchr ()
2416 gimple *stmt = gsi_stmt (m_gsi);
2417 tree lhs = gimple_call_lhs (stmt);
2419 if (lhs == NULL_TREE)
2420 return;
2422 if (!integer_zerop (gimple_call_arg (stmt, 1)))
2423 return;
2425 tree src = gimple_call_arg (stmt, 0);
2427 /* Avoid folding if the first argument is not a nul-terminated array.
2428 Defer warning until later. */
2429 if (!check_nul_terminated_array (NULL_TREE, src))
2430 return;
2432 int idx = get_stridx (src, stmt);
2433 if (idx)
2435 strinfo *si = NULL;
2436 tree rhs;
2438 if (idx < 0)
2439 rhs = build_int_cst (size_type_node, ~idx);
2440 else
2442 rhs = NULL_TREE;
2443 si = get_strinfo (idx);
2444 if (si != NULL)
2445 rhs = get_string_length (si);
2447 if (rhs != NULL_TREE)
2449 location_t loc = gimple_location (stmt);
2451 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2453 fprintf (dump_file, "Optimizing: ");
2454 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2456 if (si != NULL && si->endptr != NULL_TREE)
2458 rhs = unshare_expr (si->endptr);
2459 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2460 TREE_TYPE (rhs)))
2461 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2463 else
2465 rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2466 rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2467 TREE_TYPE (src), src, rhs);
2468 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2469 TREE_TYPE (rhs)))
2470 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2472 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2473 stmt = gsi_stmt (m_gsi);
2474 update_stmt (stmt);
2475 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2477 fprintf (dump_file, "into: ");
2478 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2480 if (si != NULL
2481 && si->endptr == NULL_TREE
2482 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2484 si = unshare_strinfo (si);
2485 si->endptr = lhs;
2487 zero_length_string (lhs, si);
2488 return;
2491 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2492 return;
2493 if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2495 if (idx == 0)
2496 idx = new_stridx (src);
2497 else if (get_strinfo (idx) != NULL)
2499 zero_length_string (lhs, NULL);
2500 return;
2502 if (idx)
2504 location_t loc = gimple_location (stmt);
2505 tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2506 tree srcu = fold_convert_loc (loc, size_type_node, src);
2507 tree length = fold_build2_loc (loc, MINUS_EXPR,
2508 size_type_node, lhsu, srcu);
2509 strinfo *si = new_strinfo (src, idx, length, true);
2510 si->endptr = lhs;
2511 set_strinfo (idx, si);
2512 find_equal_ptrs (src, idx);
2513 zero_length_string (lhs, si);
2516 else
2517 zero_length_string (lhs, NULL);
2520 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2521 If strlen of the second argument is known, strlen of the first argument
2522 is the same after this call. Furthermore, attempt to convert it to
2523 memcpy. Uses RVALS to determine range information. */
2525 void
2526 strlen_pass::handle_builtin_strcpy (built_in_function bcode)
2528 int idx, didx;
2529 tree src, dst, srclen, len, lhs, type, fn, oldlen;
2530 bool success;
2531 gimple *stmt = gsi_stmt (m_gsi);
2532 strinfo *si, *dsi, *olddsi, *zsi;
2533 location_t loc;
2535 src = gimple_call_arg (stmt, 1);
2536 dst = gimple_call_arg (stmt, 0);
2537 lhs = gimple_call_lhs (stmt);
2538 idx = get_stridx (src, stmt);
2539 si = NULL;
2540 if (idx > 0)
2541 si = get_strinfo (idx);
2543 didx = get_stridx (dst, stmt);
2544 olddsi = NULL;
2545 oldlen = NULL_TREE;
2546 if (didx > 0)
2547 olddsi = get_strinfo (didx);
2548 else if (didx < 0)
2549 return;
2551 if (olddsi != NULL)
2552 adjust_last_stmt (olddsi, stmt, false);
2554 srclen = NULL_TREE;
2555 if (si != NULL)
2556 srclen = get_string_length (si);
2557 else if (idx < 0)
2558 srclen = build_int_cst (size_type_node, ~idx);
2560 maybe_warn_overflow (stmt, false, srclen, olddsi, true);
2562 if (olddsi != NULL)
2563 adjust_last_stmt (olddsi, stmt, false);
2565 loc = gimple_location (stmt);
2566 if (srclen == NULL_TREE)
2567 switch (bcode)
2569 case BUILT_IN_STRCPY:
2570 case BUILT_IN_STRCPY_CHK:
2571 if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2572 return;
2573 break;
2574 case BUILT_IN_STPCPY:
2575 case BUILT_IN_STPCPY_CHK:
2576 if (lhs == NULL_TREE)
2577 return;
2578 else
2580 tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2581 srclen = fold_convert_loc (loc, size_type_node, dst);
2582 srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2583 lhsuint, srclen);
2585 break;
2586 default:
2587 gcc_unreachable ();
2590 if (didx == 0)
2592 didx = new_stridx (dst);
2593 if (didx == 0)
2594 return;
2596 if (olddsi != NULL)
2598 oldlen = olddsi->nonzero_chars;
2599 dsi = unshare_strinfo (olddsi);
2600 dsi->nonzero_chars = srclen;
2601 dsi->full_string_p = (srclen != NULL_TREE);
2602 /* Break the chain, so adjust_related_strinfo on later pointers in
2603 the chain won't adjust this one anymore. */
2604 dsi->next = 0;
2605 dsi->stmt = NULL;
2606 dsi->endptr = NULL_TREE;
2608 else
2610 dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2611 set_strinfo (didx, dsi);
2612 find_equal_ptrs (dst, didx);
2614 dsi->writable = true;
2615 dsi->dont_invalidate = true;
2617 if (dsi->nonzero_chars == NULL_TREE)
2619 strinfo *chainsi;
2621 /* If string length of src is unknown, use delayed length
2622 computation. If string length of dst will be needed, it
2623 can be computed by transforming this strcpy call into
2624 stpcpy and subtracting dst from the return value. */
2626 /* Look for earlier strings whose length could be determined if
2627 this strcpy is turned into an stpcpy. */
2629 if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2631 for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2633 /* When setting a stmt for delayed length computation
2634 prevent all strinfos through dsi from being
2635 invalidated. */
2636 chainsi = unshare_strinfo (chainsi);
2637 chainsi->stmt = stmt;
2638 chainsi->nonzero_chars = NULL_TREE;
2639 chainsi->full_string_p = false;
2640 chainsi->endptr = NULL_TREE;
2641 chainsi->dont_invalidate = true;
2644 dsi->stmt = stmt;
2646 /* Try to detect overlap before returning. This catches cases
2647 like strcpy (d, d + n) where n is non-constant whose range
2648 is such that (n <= strlen (d) holds).
2650 OLDDSI->NONZERO_chars may have been reset by this point with
2651 oldlen holding it original value. */
2652 if (olddsi && oldlen)
2654 /* Add 1 for the terminating NUL. */
2655 tree type = TREE_TYPE (oldlen);
2656 oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2657 build_int_cst (type, 1));
2658 check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2661 return;
2664 if (olddsi != NULL)
2666 tree adj = NULL_TREE;
2667 if (oldlen == NULL_TREE)
2669 else if (integer_zerop (oldlen))
2670 adj = srclen;
2671 else if (TREE_CODE (oldlen) == INTEGER_CST
2672 || TREE_CODE (srclen) == INTEGER_CST)
2673 adj = fold_build2_loc (loc, MINUS_EXPR,
2674 TREE_TYPE (srclen), srclen,
2675 fold_convert_loc (loc, TREE_TYPE (srclen),
2676 oldlen));
2677 if (adj != NULL_TREE)
2678 adjust_related_strinfos (loc, dsi, adj);
2679 else
2680 dsi->prev = 0;
2682 /* strcpy src may not overlap dst, so src doesn't need to be
2683 invalidated either. */
2684 if (si != NULL)
2685 si->dont_invalidate = true;
2687 fn = NULL_TREE;
2688 zsi = NULL;
2689 switch (bcode)
2691 case BUILT_IN_STRCPY:
2692 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2693 if (lhs)
2694 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2695 break;
2696 case BUILT_IN_STRCPY_CHK:
2697 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2698 if (lhs)
2699 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2700 break;
2701 case BUILT_IN_STPCPY:
2702 /* This would need adjustment of the lhs (subtract one),
2703 or detection that the trailing '\0' doesn't need to be
2704 written, if it will be immediately overwritten.
2705 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY); */
2706 if (lhs)
2708 dsi->endptr = lhs;
2709 zsi = zero_length_string (lhs, dsi);
2711 break;
2712 case BUILT_IN_STPCPY_CHK:
2713 /* This would need adjustment of the lhs (subtract one),
2714 or detection that the trailing '\0' doesn't need to be
2715 written, if it will be immediately overwritten.
2716 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK); */
2717 if (lhs)
2719 dsi->endptr = lhs;
2720 zsi = zero_length_string (lhs, dsi);
2722 break;
2723 default:
2724 gcc_unreachable ();
2726 if (zsi != NULL)
2727 zsi->dont_invalidate = true;
2729 if (fn)
2731 tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2732 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2734 else
2735 type = size_type_node;
2737 len = fold_convert_loc (loc, type, unshare_expr (srclen));
2738 len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2740 /* Disable warning for the transformed statement? */
2741 opt_code no_warning_opt = no_warning;
2743 if (const strinfo *chksi = si ? olddsi ? olddsi : dsi : NULL)
2745 no_warning_opt = check_bounds_or_overlap (stmt, chksi->ptr, si->ptr,
2746 NULL_TREE, len);
2747 if (no_warning_opt)
2748 suppress_warning (stmt, no_warning_opt);
2751 if (fn == NULL_TREE)
2752 return;
2754 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
2755 GSI_SAME_STMT);
2756 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2758 fprintf (dump_file, "Optimizing: ");
2759 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2761 if (gimple_call_num_args (stmt) == 2)
2762 success = update_gimple_call (&m_gsi, fn, 3, dst, src, len);
2763 else
2764 success = update_gimple_call (&m_gsi, fn, 4, dst, src, len,
2765 gimple_call_arg (stmt, 2));
2766 if (success)
2768 stmt = gsi_stmt (m_gsi);
2769 update_stmt (stmt);
2770 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2772 fprintf (dump_file, "into: ");
2773 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2775 /* Allow adjust_last_stmt to decrease this memcpy's size. */
2776 laststmt.stmt = stmt;
2777 laststmt.len = srclen;
2778 laststmt.stridx = dsi->idx;
2780 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2781 fprintf (dump_file, "not possible.\n");
2783 if (no_warning_opt)
2784 suppress_warning (stmt, no_warning_opt);
2787 /* Check the size argument to the built-in forms of stpncpy and strncpy
2788 for out-of-bounds offsets or overlapping access, and to see if the
2789 size argument is derived from a call to strlen() on the source argument,
2790 and if so, issue an appropriate warning. */
2792 void
2793 strlen_pass::handle_builtin_strncat (built_in_function)
2795 /* Same as stxncpy(). */
2796 handle_builtin_stxncpy_strncat (true);
2799 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2800 way. LEN can either be an integer expression, or a pointer (to char).
2801 When it is the latter (such as in recursive calls to self) it is
2802 assumed to be the argument in some call to strlen() whose relationship
2803 to SRC is being ascertained. */
2805 bool
2806 is_strlen_related_p (tree src, tree len)
2808 if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2809 && operand_equal_p (src, len, 0))
2810 return true;
2812 if (TREE_CODE (len) != SSA_NAME)
2813 return false;
2815 if (TREE_CODE (src) == SSA_NAME)
2817 gimple *srcdef = SSA_NAME_DEF_STMT (src);
2818 if (is_gimple_assign (srcdef))
2820 /* Handle bitwise AND used in conversions from wider size_t
2821 to narrower unsigned types. */
2822 tree_code code = gimple_assign_rhs_code (srcdef);
2823 if (code == BIT_AND_EXPR
2824 || code == NOP_EXPR)
2825 return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2827 return false;
2830 if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2832 /* If SRC is the result of a call to an allocation function
2833 or strlen, use the function's argument instead. */
2834 tree func = gimple_call_fndecl (srcdef);
2835 built_in_function code = DECL_FUNCTION_CODE (func);
2836 if (code == BUILT_IN_ALLOCA
2837 || code == BUILT_IN_ALLOCA_WITH_ALIGN
2838 || code == BUILT_IN_MALLOC
2839 || code == BUILT_IN_STRLEN)
2840 return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2842 /* FIXME: Handle other functions with attribute alloc_size. */
2843 return false;
2847 gimple *lendef = SSA_NAME_DEF_STMT (len);
2848 if (!lendef)
2849 return false;
2851 if (is_gimple_call (lendef))
2853 tree func = gimple_call_fndecl (lendef);
2854 if (!valid_builtin_call (lendef)
2855 || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2856 return false;
2858 tree arg = gimple_call_arg (lendef, 0);
2859 return is_strlen_related_p (src, arg);
2862 if (!is_gimple_assign (lendef))
2863 return false;
2865 tree_code code = gimple_assign_rhs_code (lendef);
2866 tree rhs1 = gimple_assign_rhs1 (lendef);
2867 tree rhstype = TREE_TYPE (rhs1);
2869 if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2870 || (INTEGRAL_TYPE_P (rhstype)
2871 && (code == BIT_AND_EXPR
2872 || code == NOP_EXPR)))
2874 /* Pointer plus (an integer), and truncation are considered among
2875 the (potentially) related expressions to strlen. */
2876 return is_strlen_related_p (src, rhs1);
2879 if (tree rhs2 = gimple_assign_rhs2 (lendef))
2881 /* Integer subtraction is considered strlen-related when both
2882 arguments are integers and second one is strlen-related. */
2883 rhstype = TREE_TYPE (rhs2);
2884 if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2885 return is_strlen_related_p (src, rhs2);
2888 return false;
2891 /* Called by handle_builtin_stxncpy_strncat and by
2892 gimple_fold_builtin_strncpy in gimple-fold.cc.
2893 Check to see if the specified bound is a) equal to the size of
2894 the destination DST and if so, b) if it's immediately followed by
2895 DST[CNT - 1] = '\0'. If a) holds and b) does not, warn. Otherwise,
2896 do nothing. Return true if diagnostic has been issued.
2898 The purpose is to diagnose calls to strncpy and stpncpy that do
2899 not nul-terminate the copy while allowing for the idiom where
2900 such a call is immediately followed by setting the last element
2901 to nul, as in:
2902 char a[32];
2903 strncpy (a, s, sizeof a);
2904 a[sizeof a - 1] = '\0';
2907 bool
2908 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2909 pointer_query *ptr_qry /* = NULL */)
2911 gimple *stmt = gsi_stmt (gsi);
2912 if (warning_suppressed_p (stmt, OPT_Wstringop_truncation))
2913 return false;
2915 wide_int cntrange[2];
2916 value_range r;
2917 if (!get_range_query (cfun)->range_of_expr (r, cnt)
2918 || r.varying_p ()
2919 || r.undefined_p ())
2920 return false;
2922 cntrange[0] = wi::to_wide (r.min ());
2923 cntrange[1] = wi::to_wide (r.max ());
2924 if (r.kind () == VR_ANTI_RANGE)
2926 wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2928 if (wi::ltu_p (cntrange[1], maxobjsize))
2930 cntrange[0] = cntrange[1] + 1;
2931 cntrange[1] = maxobjsize;
2933 else
2935 cntrange[1] = cntrange[0] - 1;
2936 cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2940 /* Negative value is the constant string length. If it's less than
2941 the lower bound there is no truncation. Avoid calling get_stridx()
2942 when ssa_ver_to_stridx is empty. That implies the caller isn't
2943 running under the control of this pass and ssa_ver_to_stridx hasn't
2944 been created yet. */
2945 int sidx = ssa_ver_to_stridx.length () ? get_stridx (src, stmt) : 0;
2946 if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
2947 return false;
2949 tree dst = gimple_call_arg (stmt, 0);
2950 tree dstdecl = dst;
2951 if (TREE_CODE (dstdecl) == ADDR_EXPR)
2952 dstdecl = TREE_OPERAND (dstdecl, 0);
2954 tree ref = NULL_TREE;
2956 if (!sidx)
2958 /* If the source is a non-string return early to avoid warning
2959 for possible truncation (if the truncation is certain SIDX
2960 is non-zero). */
2961 tree srcdecl = gimple_call_arg (stmt, 1);
2962 if (TREE_CODE (srcdecl) == ADDR_EXPR)
2963 srcdecl = TREE_OPERAND (srcdecl, 0);
2964 if (get_attr_nonstring_decl (srcdecl, &ref))
2965 return false;
2968 /* Likewise, if the destination refers to an array/pointer declared
2969 nonstring return early. */
2970 if (get_attr_nonstring_decl (dstdecl, &ref))
2971 return false;
2973 /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2974 avoid the truncation warning. */
2975 gsi_next_nondebug (&gsi);
2976 gimple *next_stmt = gsi_stmt (gsi);
2977 if (!next_stmt)
2979 /* When there is no statement in the same basic block check
2980 the immediate successor block. */
2981 if (basic_block bb = gimple_bb (stmt))
2983 if (single_succ_p (bb))
2985 /* For simplicity, ignore blocks with multiple outgoing
2986 edges for now and only consider successor blocks along
2987 normal edges. */
2988 edge e = EDGE_SUCC (bb, 0);
2989 if (!(e->flags & EDGE_ABNORMAL))
2991 gsi = gsi_start_bb (e->dest);
2992 next_stmt = gsi_stmt (gsi);
2993 if (next_stmt && is_gimple_debug (next_stmt))
2995 gsi_next_nondebug (&gsi);
2996 next_stmt = gsi_stmt (gsi);
3003 if (next_stmt && is_gimple_assign (next_stmt))
3005 tree lhs = gimple_assign_lhs (next_stmt);
3006 tree_code code = TREE_CODE (lhs);
3007 if (code == ARRAY_REF || code == MEM_REF)
3008 lhs = TREE_OPERAND (lhs, 0);
3010 tree func = gimple_call_fndecl (stmt);
3011 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
3013 tree ret = gimple_call_lhs (stmt);
3014 if (ret && operand_equal_p (ret, lhs, 0))
3015 return false;
3018 /* Determine the base address and offset of the reference,
3019 ignoring the innermost array index. */
3020 if (TREE_CODE (ref) == ARRAY_REF)
3021 ref = TREE_OPERAND (ref, 0);
3023 poly_int64 dstoff;
3024 tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
3026 poly_int64 lhsoff;
3027 tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
3028 if (lhsbase
3029 && dstbase
3030 && known_eq (dstoff, lhsoff)
3031 && operand_equal_p (dstbase, lhsbase, 0))
3032 return false;
3035 int prec = TYPE_PRECISION (TREE_TYPE (cnt));
3036 wide_int lenrange[2];
3037 if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
3039 lenrange[0] = (sisrc->nonzero_chars
3040 && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
3041 ? wi::to_wide (sisrc->nonzero_chars)
3042 : wi::zero (prec));
3043 lenrange[1] = lenrange[0];
3045 else if (sidx < 0)
3046 lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
3047 else
3049 c_strlen_data lendata = { };
3050 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3051 to have it set to the length of the longest string in a PHI. */
3052 lendata.maxbound = src;
3053 get_range_strlen (src, &lendata, /* eltsize = */1);
3054 if (TREE_CODE (lendata.minlen) == INTEGER_CST
3055 && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3057 /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3058 which stores the length of the shortest known string. */
3059 if (integer_all_onesp (lendata.maxlen))
3060 lenrange[0] = wi::shwi (0, prec);
3061 else
3062 lenrange[0] = wi::to_wide (lendata.minlen, prec);
3063 lenrange[1] = wi::to_wide (lendata.maxbound, prec);
3065 else
3067 lenrange[0] = wi::shwi (0, prec);
3068 lenrange[1] = wi::shwi (-1, prec);
3072 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3073 tree func = gimple_call_fndecl (stmt);
3075 if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
3077 /* If the longest source string is shorter than the lower bound
3078 of the specified count the copy is definitely nul-terminated. */
3079 if (wi::ltu_p (lenrange[1], cntrange[0]))
3080 return false;
3082 if (wi::neg_p (lenrange[1]))
3084 /* The length of one of the strings is unknown but at least
3085 one has non-zero length and that length is stored in
3086 LENRANGE[1]. Swap the bounds to force a "may be truncated"
3087 warning below. */
3088 lenrange[1] = lenrange[0];
3089 lenrange[0] = wi::shwi (0, prec);
3092 /* Set to true for strncat whose bound is derived from the length
3093 of the destination (the expected usage pattern). */
3094 bool cat_dstlen_bounded = false;
3095 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
3096 cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3098 if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3099 return warning_n (callloc, OPT_Wstringop_truncation,
3100 cntrange[0].to_uhwi (),
3101 "%qD output truncated before terminating "
3102 "nul copying %E byte from a string of the "
3103 "same length",
3104 "%qD output truncated before terminating nul "
3105 "copying %E bytes from a string of the same "
3106 "length",
3107 func, cnt);
3108 else if (!cat_dstlen_bounded)
3110 if (wi::geu_p (lenrange[0], cntrange[1]))
3112 /* The shortest string is longer than the upper bound of
3113 the count so the truncation is certain. */
3114 if (cntrange[0] == cntrange[1])
3115 return warning_n (callloc, OPT_Wstringop_truncation,
3116 cntrange[0].to_uhwi (),
3117 "%qD output truncated copying %E byte "
3118 "from a string of length %wu",
3119 "%qD output truncated copying %E bytes "
3120 "from a string of length %wu",
3121 func, cnt, lenrange[0].to_uhwi ());
3123 return warning_at (callloc, OPT_Wstringop_truncation,
3124 "%qD output truncated copying between %wu "
3125 "and %wu bytes from a string of length %wu",
3126 func, cntrange[0].to_uhwi (),
3127 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3129 else if (wi::geu_p (lenrange[1], cntrange[1]))
3131 /* The longest string is longer than the upper bound of
3132 the count so the truncation is possible. */
3133 if (cntrange[0] == cntrange[1])
3134 return warning_n (callloc, OPT_Wstringop_truncation,
3135 cntrange[0].to_uhwi (),
3136 "%qD output may be truncated copying %E "
3137 "byte from a string of length %wu",
3138 "%qD output may be truncated copying %E "
3139 "bytes from a string of length %wu",
3140 func, cnt, lenrange[1].to_uhwi ());
3142 return warning_at (callloc, OPT_Wstringop_truncation,
3143 "%qD output may be truncated copying between "
3144 "%wu and %wu bytes from a string of length %wu",
3145 func, cntrange[0].to_uhwi (),
3146 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3150 if (!cat_dstlen_bounded
3151 && cntrange[0] != cntrange[1]
3152 && wi::leu_p (cntrange[0], lenrange[0])
3153 && wi::leu_p (cntrange[1], lenrange[0] + 1))
3155 /* If the source (including the terminating nul) is longer than
3156 the lower bound of the specified count but shorter than the
3157 upper bound the copy may (but need not) be truncated. */
3158 return warning_at (callloc, OPT_Wstringop_truncation,
3159 "%qD output may be truncated copying between "
3160 "%wu and %wu bytes from a string of length %wu",
3161 func, cntrange[0].to_uhwi (),
3162 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3166 access_ref aref;
3167 if (tree dstsize = compute_objsize (dst, stmt, 1, &aref, ptr_qry))
3169 /* The source length is unknown. Try to determine the destination
3170 size and see if it matches the specified bound. If not, bail.
3171 Otherwise go on to see if it should be diagnosed for possible
3172 truncation. */
3173 if (!dstsize)
3174 return false;
3176 if (wi::to_wide (dstsize) != cntrange[1])
3177 return false;
3179 /* Avoid warning for strncpy(a, b, N) calls where the following
3180 equalities hold:
3181 N == sizeof a && N == sizeof b */
3182 if (tree srcsize = compute_objsize (src, stmt, 1, &aref, ptr_qry))
3183 if (wi::to_wide (srcsize) == cntrange[1])
3184 return false;
3186 if (cntrange[0] == cntrange[1])
3187 return warning_at (callloc, OPT_Wstringop_truncation,
3188 "%qD specified bound %E equals destination size",
3189 func, cnt);
3192 return false;
3195 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3196 strncat, for out-of-bounds offsets or overlapping access, and to see
3197 if the size is derived from calling strlen() on the source argument,
3198 and if so, issue the appropriate warning.
3199 APPEND_P is true for strncat. */
3201 void
3202 strlen_pass::handle_builtin_stxncpy_strncat (bool append_p)
3204 if (!strlen_to_stridx)
3205 return;
3207 gimple *stmt = gsi_stmt (m_gsi);
3209 tree dst = gimple_call_arg (stmt, 0);
3210 tree src = gimple_call_arg (stmt, 1);
3211 tree len = gimple_call_arg (stmt, 2);
3212 /* An upper bound of the size of the destination. */
3213 tree dstsize = NULL_TREE;
3214 /* The length of the destination and source strings (plus 1 for those
3215 whose FULL_STRING_P is set, i.e., whose length is exact rather than
3216 a lower bound). */
3217 tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3219 int didx = get_stridx (dst, stmt);
3220 if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3222 /* Compute the size of the destination string including the nul
3223 if it is known to be nul-terminated. */
3224 if (sidst->nonzero_chars)
3226 if (sidst->full_string_p)
3228 /* String is known to be nul-terminated. */
3229 tree type = TREE_TYPE (sidst->nonzero_chars);
3230 dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3231 build_int_cst (type, 1));
3233 else
3234 dstlenp1 = sidst->nonzero_chars;
3236 else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3238 gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3239 dstsize = gimple_call_alloc_size (def_stmt);
3242 dst = sidst->ptr;
3245 int sidx = get_stridx (src, stmt);
3246 strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3247 if (sisrc)
3249 /* strncat() and strncpy() can modify the source string by writing
3250 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3251 clear. */
3253 /* Compute the size of the source string including the terminating
3254 nul if its known to be nul-terminated. */
3255 if (sisrc->nonzero_chars)
3257 if (sisrc->full_string_p)
3259 tree type = TREE_TYPE (sisrc->nonzero_chars);
3260 srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3261 build_int_cst (type, 1));
3263 else
3264 srclenp1 = sisrc->nonzero_chars;
3267 src = sisrc->ptr;
3269 else
3270 srclenp1 = NULL_TREE;
3272 opt_code opt = check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1);
3273 if (opt != no_warning)
3275 suppress_warning (stmt, opt);
3276 return;
3279 /* If the length argument was computed from strlen(S) for some string
3280 S retrieve the strinfo index for the string (PSS->FIRST) along with
3281 the location of the strlen() call (PSS->SECOND). */
3282 stridx_strlenloc *pss = strlen_to_stridx->get (len);
3283 if (!pss || pss->first <= 0)
3285 if (maybe_diag_stxncpy_trunc (m_gsi, src, len))
3286 suppress_warning (stmt, OPT_Wstringop_truncation);
3288 return;
3291 /* Retrieve the strinfo data for the string S that LEN was computed
3292 from as some function F of strlen (S) (i.e., LEN need not be equal
3293 to strlen(S)). */
3294 strinfo *silen = get_strinfo (pss->first);
3296 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3298 tree func = gimple_call_fndecl (stmt);
3300 bool warned = false;
3302 /* When -Wstringop-truncation is set, try to determine truncation
3303 before diagnosing possible overflow. Truncation is implied by
3304 the LEN argument being equal to strlen(SRC), regardless of
3305 whether its value is known. Otherwise, when appending, or
3306 when copying into a destination of known size, issue the more
3307 generic -Wstringop-overflow which triggers for LEN arguments
3308 that in any meaningful way depend on strlen(SRC). */
3309 if (!append_p
3310 && sisrc == silen
3311 && is_strlen_related_p (src, len)
3312 && warning_at (callloc, OPT_Wstringop_truncation,
3313 "%qD output truncated before terminating nul "
3314 "copying as many bytes from a string as its length",
3315 func))
3316 warned = true;
3317 else if ((append_p || !dstsize || len == dstlenp1)
3318 && silen && is_strlen_related_p (src, silen->ptr))
3320 /* Issue -Wstringop-overflow when appending or when writing into
3321 a destination of a known size. Otherwise, when copying into
3322 a destination of an unknown size, it's truncation. */
3323 opt_code opt = (append_p || dstsize
3324 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3325 warned = warning_at (callloc, opt,
3326 "%qD specified bound depends on the length "
3327 "of the source argument",
3328 func);
3330 if (warned)
3332 location_t strlenloc = pss->second;
3333 if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3334 inform (strlenloc, "length computed here");
3338 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3339 If strlen of the second argument is known and length of the third argument
3340 is that plus one, strlen of the first argument is the same after this
3341 call. Uses RVALS to determine range information. */
3343 void
3344 strlen_pass::handle_builtin_memcpy (built_in_function bcode)
3346 tree lhs, oldlen, newlen;
3347 gimple *stmt = gsi_stmt (m_gsi);
3348 strinfo *si, *dsi;
3350 tree len = gimple_call_arg (stmt, 2);
3351 tree src = gimple_call_arg (stmt, 1);
3352 tree dst = gimple_call_arg (stmt, 0);
3354 int didx = get_stridx (dst, stmt);
3355 strinfo *olddsi = NULL;
3356 if (didx > 0)
3357 olddsi = get_strinfo (didx);
3358 else if (didx < 0)
3359 return;
3361 if (olddsi != NULL
3362 && !integer_zerop (len))
3364 maybe_warn_overflow (stmt, false, len, olddsi, false, true);
3365 adjust_last_stmt (olddsi, stmt, false);
3368 int idx = get_stridx (src, stmt);
3369 if (idx == 0)
3370 return;
3372 bool full_string_p;
3373 if (idx > 0)
3375 gimple *def_stmt;
3377 /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3378 is known. */
3379 si = get_strinfo (idx);
3380 if (si == NULL || si->nonzero_chars == NULL_TREE)
3381 return;
3382 if (TREE_CODE (len) == INTEGER_CST
3383 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3385 if (tree_int_cst_le (len, si->nonzero_chars))
3387 /* Copying LEN nonzero characters, where LEN is constant. */
3388 newlen = len;
3389 full_string_p = false;
3391 else
3393 /* Copying the whole of the analyzed part of SI. */
3394 newlen = si->nonzero_chars;
3395 full_string_p = si->full_string_p;
3398 else
3400 if (!si->full_string_p)
3401 return;
3402 if (TREE_CODE (len) != SSA_NAME)
3403 return;
3404 def_stmt = SSA_NAME_DEF_STMT (len);
3405 if (!is_gimple_assign (def_stmt)
3406 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3407 || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3408 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3409 return;
3410 /* Copying variable-length string SI (and no more). */
3411 newlen = si->nonzero_chars;
3412 full_string_p = true;
3415 else
3417 si = NULL;
3418 /* Handle memcpy (x, "abcd", 5) or
3419 memcpy (x, "abc\0uvw", 7). */
3420 if (!tree_fits_uhwi_p (len))
3421 return;
3423 unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3424 unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3425 newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3426 full_string_p = clen > nonzero_chars;
3429 if (!full_string_p
3430 && olddsi
3431 && olddsi->nonzero_chars
3432 && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3433 && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3435 /* The SRC substring being written strictly overlaps
3436 a subsequence of the existing string OLDDSI. */
3437 newlen = olddsi->nonzero_chars;
3438 full_string_p = olddsi->full_string_p;
3441 if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3442 adjust_last_stmt (olddsi, stmt, false);
3444 if (didx == 0)
3446 didx = new_stridx (dst);
3447 if (didx == 0)
3448 return;
3450 oldlen = NULL_TREE;
3451 if (olddsi != NULL)
3453 dsi = unshare_strinfo (olddsi);
3454 oldlen = olddsi->nonzero_chars;
3455 dsi->nonzero_chars = newlen;
3456 dsi->full_string_p = full_string_p;
3457 /* Break the chain, so adjust_related_strinfo on later pointers in
3458 the chain won't adjust this one anymore. */
3459 dsi->next = 0;
3460 dsi->stmt = NULL;
3461 dsi->endptr = NULL_TREE;
3463 else
3465 dsi = new_strinfo (dst, didx, newlen, full_string_p);
3466 set_strinfo (didx, dsi);
3467 find_equal_ptrs (dst, didx);
3469 dsi->writable = true;
3470 dsi->dont_invalidate = true;
3471 if (olddsi != NULL)
3473 tree adj = NULL_TREE;
3474 location_t loc = gimple_location (stmt);
3475 if (oldlen == NULL_TREE)
3477 else if (integer_zerop (oldlen))
3478 adj = newlen;
3479 else if (TREE_CODE (oldlen) == INTEGER_CST
3480 || TREE_CODE (newlen) == INTEGER_CST)
3481 adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3482 fold_convert_loc (loc, TREE_TYPE (newlen),
3483 oldlen));
3484 if (adj != NULL_TREE)
3485 adjust_related_strinfos (loc, dsi, adj);
3486 else
3487 dsi->prev = 0;
3489 /* memcpy src may not overlap dst, so src doesn't need to be
3490 invalidated either. */
3491 if (si != NULL)
3492 si->dont_invalidate = true;
3494 if (full_string_p)
3496 lhs = gimple_call_lhs (stmt);
3497 switch (bcode)
3499 case BUILT_IN_MEMCPY:
3500 case BUILT_IN_MEMCPY_CHK:
3501 /* Allow adjust_last_stmt to decrease this memcpy's size. */
3502 laststmt.stmt = stmt;
3503 laststmt.len = dsi->nonzero_chars;
3504 laststmt.stridx = dsi->idx;
3505 if (lhs)
3506 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3507 break;
3508 case BUILT_IN_MEMPCPY:
3509 case BUILT_IN_MEMPCPY_CHK:
3510 break;
3511 default:
3512 gcc_unreachable ();
3517 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3518 If strlen of the second argument is known, strlen of the first argument
3519 is increased by the length of the second argument. Furthermore, attempt
3520 to convert it to memcpy/strcpy if the length of the first argument
3521 is known. */
3523 void
3524 strlen_pass::handle_builtin_strcat (built_in_function bcode)
3526 int idx, didx;
3527 tree srclen, args, type, fn, objsz, endptr;
3528 bool success;
3529 gimple *stmt = gsi_stmt (m_gsi);
3530 strinfo *si, *dsi;
3531 location_t loc = gimple_location (stmt);
3533 tree src = gimple_call_arg (stmt, 1);
3534 tree dst = gimple_call_arg (stmt, 0);
3536 /* Bail if the source is the same as destination. It will be diagnosed
3537 elsewhere. */
3538 if (operand_equal_p (src, dst, 0))
3539 return;
3541 tree lhs = gimple_call_lhs (stmt);
3543 didx = get_stridx (dst, stmt);
3544 if (didx < 0)
3545 return;
3547 dsi = NULL;
3548 if (didx > 0)
3549 dsi = get_strinfo (didx);
3551 srclen = NULL_TREE;
3552 si = NULL;
3553 idx = get_stridx (src, stmt);
3554 if (idx < 0)
3555 srclen = build_int_cst (size_type_node, ~idx);
3556 else if (idx > 0)
3558 si = get_strinfo (idx);
3559 if (si != NULL)
3560 srclen = get_string_length (si);
3563 /* Disable warning for the transformed statement? */
3564 opt_code no_warning_opt = no_warning;
3566 if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3569 /* The concatenation always involves copying at least one byte
3570 (the terminating nul), even if the source string is empty.
3571 If the source is unknown assume it's one character long and
3572 used that as both sizes. */
3573 tree slen = srclen;
3574 if (slen)
3576 tree type = TREE_TYPE (slen);
3577 slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3580 tree sptr = si && si->ptr ? si->ptr : src;
3581 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE,
3582 slen);
3583 if (no_warning_opt)
3584 suppress_warning (stmt, no_warning_opt);
3587 /* strcat (p, q) can be transformed into
3588 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3589 with length endptr - p if we need to compute the length
3590 later on. Don't do this transformation if we don't need
3591 it. */
3592 if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3594 if (didx == 0)
3596 didx = new_stridx (dst);
3597 if (didx == 0)
3598 return;
3600 if (dsi == NULL)
3602 dsi = new_strinfo (dst, didx, NULL_TREE, false);
3603 set_strinfo (didx, dsi);
3604 find_equal_ptrs (dst, didx);
3606 else
3608 dsi = unshare_strinfo (dsi);
3609 dsi->nonzero_chars = NULL_TREE;
3610 dsi->full_string_p = false;
3611 dsi->next = 0;
3612 dsi->endptr = NULL_TREE;
3614 dsi->writable = true;
3615 dsi->stmt = stmt;
3616 dsi->dont_invalidate = true;
3618 return;
3621 tree dstlen = dsi->nonzero_chars;
3622 endptr = dsi->endptr;
3624 dsi = unshare_strinfo (dsi);
3625 dsi->endptr = NULL_TREE;
3626 dsi->stmt = NULL;
3627 dsi->writable = true;
3629 if (srclen != NULL_TREE)
3631 dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3632 TREE_TYPE (dsi->nonzero_chars),
3633 dsi->nonzero_chars, srclen);
3634 gcc_assert (dsi->full_string_p);
3635 adjust_related_strinfos (loc, dsi, srclen);
3636 dsi->dont_invalidate = true;
3638 else
3640 dsi->nonzero_chars = NULL;
3641 dsi->full_string_p = false;
3642 if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3643 dsi->dont_invalidate = true;
3646 if (si != NULL)
3647 /* strcat src may not overlap dst, so src doesn't need to be
3648 invalidated either. */
3649 si->dont_invalidate = true;
3651 /* For now. Could remove the lhs from the call and add
3652 lhs = dst; afterwards. */
3653 if (lhs)
3654 return;
3656 fn = NULL_TREE;
3657 objsz = NULL_TREE;
3658 switch (bcode)
3660 case BUILT_IN_STRCAT:
3661 if (srclen != NULL_TREE)
3662 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3663 else
3664 fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3665 break;
3666 case BUILT_IN_STRCAT_CHK:
3667 if (srclen != NULL_TREE)
3668 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3669 else
3670 fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3671 objsz = gimple_call_arg (stmt, 2);
3672 break;
3673 default:
3674 gcc_unreachable ();
3677 if (fn == NULL_TREE)
3678 return;
3680 if (dsi && dstlen)
3682 tree type = TREE_TYPE (dstlen);
3684 /* Compute the size of the source sequence, including the nul. */
3685 tree srcsize = srclen ? srclen : size_zero_node;
3686 tree one = build_int_cst (type, 1);
3687 srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3688 tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3689 tree sptr = si && si->ptr ? si->ptr : src;
3691 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, dstsize,
3692 srcsize);
3693 if (no_warning_opt)
3694 suppress_warning (stmt, no_warning_opt);
3697 tree len = NULL_TREE;
3698 if (srclen != NULL_TREE)
3700 args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3701 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3703 len = fold_convert_loc (loc, type, unshare_expr (srclen));
3704 len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3705 build_int_cst (type, 1));
3706 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
3707 GSI_SAME_STMT);
3709 if (endptr)
3710 dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3711 else
3712 dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3713 fold_convert_loc (loc, sizetype,
3714 unshare_expr (dstlen)));
3715 dst = force_gimple_operand_gsi (&m_gsi, dst, true, NULL_TREE, true,
3716 GSI_SAME_STMT);
3717 if (objsz)
3719 objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3720 fold_convert_loc (loc, TREE_TYPE (objsz),
3721 unshare_expr (dstlen)));
3722 objsz = force_gimple_operand_gsi (&m_gsi, objsz, true, NULL_TREE, true,
3723 GSI_SAME_STMT);
3725 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3727 fprintf (dump_file, "Optimizing: ");
3728 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3730 if (srclen != NULL_TREE)
3731 success = update_gimple_call (&m_gsi, fn, 3 + (objsz != NULL_TREE),
3732 dst, src, len, objsz);
3733 else
3734 success = update_gimple_call (&m_gsi, fn, 2 + (objsz != NULL_TREE),
3735 dst, src, objsz);
3736 if (success)
3738 stmt = gsi_stmt (m_gsi);
3739 update_stmt (stmt);
3740 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3742 fprintf (dump_file, "into: ");
3743 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3745 /* If srclen == NULL, note that current string length can be
3746 computed by transforming this strcpy into stpcpy. */
3747 if (srclen == NULL_TREE && dsi->dont_invalidate)
3748 dsi->stmt = stmt;
3749 adjust_last_stmt (dsi, stmt, true);
3750 if (srclen != NULL_TREE)
3752 laststmt.stmt = stmt;
3753 laststmt.len = srclen;
3754 laststmt.stridx = dsi->idx;
3757 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3758 fprintf (dump_file, "not possible.\n");
3760 if (no_warning_opt)
3761 suppress_warning (stmt, no_warning_opt);
3764 /* Handle a call to an allocation function like alloca, malloc or calloc,
3765 or an ordinary allocation function declared with attribute alloc_size. */
3767 void
3768 strlen_pass::handle_alloc_call (built_in_function bcode)
3770 gimple *stmt = gsi_stmt (m_gsi);
3771 tree lhs = gimple_call_lhs (stmt);
3772 if (lhs == NULL_TREE)
3773 return;
3775 gcc_assert (get_stridx (lhs, stmt) == 0);
3776 int idx = new_stridx (lhs);
3777 tree length = NULL_TREE;
3778 if (bcode == BUILT_IN_CALLOC)
3779 length = build_int_cst (size_type_node, 0);
3780 strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3781 if (bcode == BUILT_IN_CALLOC)
3783 /* Only set STMT for calloc and malloc. */
3784 si->stmt = stmt;
3785 /* Only set ENDPTR for calloc. */
3786 si->endptr = lhs;
3788 else if (bcode == BUILT_IN_MALLOC)
3789 si->stmt = stmt;
3791 /* Set ALLOC is set for all allocation functions. */
3792 si->alloc = stmt;
3793 set_strinfo (idx, si);
3794 si->writable = true;
3795 si->dont_invalidate = true;
3798 /* Handle a call to memset.
3799 After a call to calloc, memset(,0,) is unnecessary.
3800 memset(malloc(n),0,n) is calloc(n,1).
3801 return true when the call is transformed, false otherwise.
3802 When nonnull uses RVALS to determine range information. */
3804 bool
3805 strlen_pass::handle_builtin_memset (bool *zero_write)
3807 gimple *memset_stmt = gsi_stmt (m_gsi);
3808 tree ptr = gimple_call_arg (memset_stmt, 0);
3809 tree memset_val = gimple_call_arg (memset_stmt, 1);
3810 tree memset_size = gimple_call_arg (memset_stmt, 2);
3812 /* Set to the non-constant offset added to PTR. */
3813 wide_int offrng[2];
3814 int idx1 = get_stridx (ptr, memset_stmt, offrng, ptr_qry.rvals);
3815 if (idx1 == 0
3816 && TREE_CODE (memset_val) == INTEGER_CST
3817 && ((TREE_CODE (memset_size) == INTEGER_CST
3818 && !integer_zerop (memset_size))
3819 || TREE_CODE (memset_size) == SSA_NAME))
3821 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << CHAR_TYPE_SIZE) - 1;
3822 bool full_string_p = (wi::to_wide (memset_val) & mask) == 0;
3824 /* We only handle symbolic lengths when writing non-zero values. */
3825 if (full_string_p && TREE_CODE (memset_size) != INTEGER_CST)
3826 return false;
3828 idx1 = new_stridx (ptr);
3829 if (idx1 == 0)
3830 return false;
3831 tree newlen;
3832 if (full_string_p)
3833 newlen = build_int_cst (size_type_node, 0);
3834 else if (TREE_CODE (memset_size) == INTEGER_CST)
3835 newlen = fold_convert (size_type_node, memset_size);
3836 else
3837 newlen = memset_size;
3839 strinfo *dsi = new_strinfo (ptr, idx1, newlen, full_string_p);
3840 set_strinfo (idx1, dsi);
3841 find_equal_ptrs (ptr, idx1);
3842 dsi->dont_invalidate = true;
3843 dsi->writable = true;
3844 return false;
3847 if (idx1 <= 0)
3848 return false;
3849 strinfo *si1 = get_strinfo (idx1);
3850 if (!si1)
3851 return false;
3852 gimple *alloc_stmt = si1->alloc;
3853 if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3854 return false;
3855 tree callee1 = gimple_call_fndecl (alloc_stmt);
3856 if (!valid_builtin_call (alloc_stmt))
3857 return false;
3858 tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3860 /* Check for overflow. */
3861 maybe_warn_overflow (memset_stmt, false, memset_size, NULL, false, true);
3863 /* Bail when there is no statement associated with the destination
3864 (the statement may be null even when SI1->ALLOC is not). */
3865 if (!si1->stmt)
3866 return false;
3868 /* Avoid optimizing if store is at a variable offset from the beginning
3869 of the allocated object. */
3870 if (offrng[0] != 0 || offrng[0] != offrng[1])
3871 return false;
3873 /* Bail when the call writes a non-zero value. */
3874 if (!integer_zerop (memset_val))
3875 return false;
3877 /* Let the caller know the memset call cleared the destination. */
3878 *zero_write = true;
3880 enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3881 if (code1 == BUILT_IN_CALLOC)
3882 /* Not touching alloc_stmt */ ;
3883 else if (code1 == BUILT_IN_MALLOC
3884 && operand_equal_p (memset_size, alloc_size, 0))
3886 /* Replace the malloc + memset calls with calloc. */
3887 gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3888 update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3889 alloc_size, build_one_cst (size_type_node));
3890 si1->nonzero_chars = build_int_cst (size_type_node, 0);
3891 si1->full_string_p = true;
3892 si1->stmt = gsi_stmt (gsi1);
3894 else
3895 return false;
3896 tree lhs = gimple_call_lhs (memset_stmt);
3897 unlink_stmt_vdef (memset_stmt);
3898 if (lhs)
3900 gimple *assign = gimple_build_assign (lhs, ptr);
3901 gsi_replace (&m_gsi, assign, false);
3903 else
3905 gsi_remove (&m_gsi, true);
3906 release_defs (memset_stmt);
3909 return true;
3912 /* Return first such statement if RES is used in statements testing its
3913 equality to zero, and null otherwise. If EXCLUSIVE is true, return
3914 nonnull if and only RES is used in such expressions exclusively and
3915 in none other. */
3917 gimple *
3918 use_in_zero_equality (tree res, bool exclusive)
3920 gimple *first_use = NULL;
3922 use_operand_p use_p;
3923 imm_use_iterator iter;
3925 FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3927 gimple *use_stmt = USE_STMT (use_p);
3929 if (is_gimple_debug (use_stmt))
3930 continue;
3932 if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
3934 tree_code code = gimple_assign_rhs_code (use_stmt);
3935 if (code == COND_EXPR)
3937 tree cond_expr = gimple_assign_rhs1 (use_stmt);
3938 if ((TREE_CODE (cond_expr) != EQ_EXPR
3939 && (TREE_CODE (cond_expr) != NE_EXPR))
3940 || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3942 if (exclusive)
3943 return NULL;
3944 continue;
3947 else if (code == EQ_EXPR || code == NE_EXPR)
3949 if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
3951 if (exclusive)
3952 return NULL;
3953 continue;
3956 else if (exclusive)
3957 return NULL;
3958 else
3959 continue;
3961 else if (gimple_code (use_stmt) == GIMPLE_COND)
3963 tree_code code = gimple_cond_code (use_stmt);
3964 if ((code != EQ_EXPR && code != NE_EXPR)
3965 || !integer_zerop (gimple_cond_rhs (use_stmt)))
3967 if (exclusive)
3968 return NULL;
3969 continue;
3972 else if (exclusive)
3973 return NULL;
3974 else
3975 continue;
3977 if (!first_use)
3978 first_use = use_stmt;
3981 return first_use;
3984 /* Handle a call to memcmp. We try to handle small comparisons by
3985 converting them to load and compare, and replacing the call to memcmp
3986 with a __builtin_memcmp_eq call where possible.
3987 return true when call is transformed, return false otherwise. */
3989 bool
3990 strlen_pass::handle_builtin_memcmp ()
3992 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
3993 tree res = gimple_call_lhs (stmt);
3995 if (!res || !use_in_zero_equality (res))
3996 return false;
3998 tree arg1 = gimple_call_arg (stmt, 0);
3999 tree arg2 = gimple_call_arg (stmt, 1);
4000 tree len = gimple_call_arg (stmt, 2);
4001 unsigned HOST_WIDE_INT leni;
4003 if (tree_fits_uhwi_p (len)
4004 && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
4005 && pow2p_hwi (leni))
4007 leni *= CHAR_TYPE_SIZE;
4008 unsigned align1 = get_pointer_alignment (arg1);
4009 unsigned align2 = get_pointer_alignment (arg2);
4010 unsigned align = MIN (align1, align2);
4011 scalar_int_mode mode;
4012 if (int_mode_for_size (leni, 1).exists (&mode)
4013 && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
4015 location_t loc = gimple_location (stmt);
4016 tree type, off;
4017 type = build_nonstandard_integer_type (leni, 1);
4018 gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
4019 tree ptrtype = build_pointer_type_for_mode (char_type_node,
4020 ptr_mode, true);
4021 off = build_int_cst (ptrtype, 0);
4022 arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
4023 arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
4024 tree tem1 = fold_const_aggregate_ref (arg1);
4025 if (tem1)
4026 arg1 = tem1;
4027 tree tem2 = fold_const_aggregate_ref (arg2);
4028 if (tem2)
4029 arg2 = tem2;
4030 res = fold_convert_loc (loc, TREE_TYPE (res),
4031 fold_build2_loc (loc, NE_EXPR,
4032 boolean_type_node,
4033 arg1, arg2));
4034 gimplify_and_update_call_from_tree (&m_gsi, res);
4035 return true;
4039 gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
4040 return true;
4043 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
4044 of the string(s) referenced by ARG if it can be determined.
4045 If the length cannot be determined, sets *SIZE to the size of
4046 the array the string is stored in, if any. If no such array is
4047 known, sets *SIZE to -1. When the strings are nul-terminated sets
4048 *NULTERM to true, otherwise to false. When nonnull uses RVALS to
4049 determine range information. Returns true on success. */
4051 bool
4052 strlen_pass::get_len_or_size (gimple *stmt, tree arg, int idx,
4053 unsigned HOST_WIDE_INT lenrng[2],
4054 unsigned HOST_WIDE_INT *size, bool *nulterm)
4056 /* Invalidate. */
4057 *size = HOST_WIDE_INT_M1U;
4059 if (idx < 0)
4061 /* IDX is the inverted constant string length. */
4062 lenrng[0] = ~idx;
4063 lenrng[1] = lenrng[0];
4064 *nulterm = true;
4065 return true;
4068 /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
4069 possible length + 1. */
4070 lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
4072 if (strinfo *si = idx ? get_strinfo (idx) : NULL)
4074 /* FIXME: Handle all this in_range_strlen_dynamic. */
4075 if (!si->nonzero_chars)
4077 else if (tree_fits_uhwi_p (si->nonzero_chars))
4079 lenrng[0] = tree_to_uhwi (si->nonzero_chars);
4080 *nulterm = si->full_string_p;
4081 /* Set the upper bound only if the string is known to be
4082 nul-terminated, otherwise leave it at maximum + 1. */
4083 if (*nulterm)
4084 lenrng[1] = lenrng[0];
4086 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4088 value_range r;
4089 get_range_query (cfun)->range_of_expr (r, si->nonzero_chars);
4090 if (r.kind () == VR_RANGE)
4092 lenrng[0] = r.lower_bound ().to_uhwi ();
4093 lenrng[1] = r.upper_bound ().to_uhwi ();
4094 *nulterm = si->full_string_p;
4099 if (lenrng[0] != HOST_WIDE_INT_MAX)
4100 return true;
4102 /* Compute the minimum and maximum real or possible lengths. */
4103 c_strlen_data lendata = { };
4104 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4105 to have it set to the length of the longest string in a PHI. */
4106 lendata.maxbound = arg;
4107 get_range_strlen_dynamic (arg, stmt, &lendata, ptr_qry);
4109 unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4110 if (tree_fits_uhwi_p (lendata.maxbound)
4111 && !integer_all_onesp (lendata.maxbound))
4112 maxbound = tree_to_uhwi (lendata.maxbound);
4114 if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4116 unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4117 unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4119 /* The longest string in this data model. */
4120 const unsigned HOST_WIDE_INT lenmax
4121 = tree_to_uhwi (max_object_size ()) - 2;
4123 if (maxbound == HOST_WIDE_INT_M1U)
4125 lenrng[0] = minlen;
4126 lenrng[1] = maxlen;
4127 *nulterm = minlen == maxlen;
4129 else if (maxlen < lenmax)
4131 *size = maxbound + 1;
4132 *nulterm = false;
4134 else
4135 return false;
4137 return true;
4140 if (maxbound != HOST_WIDE_INT_M1U
4141 && lendata.maxlen
4142 && !integer_all_onesp (lendata.maxlen))
4144 /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4145 of the longest string based on the sizes of the arrays referenced
4146 by ARG. */
4147 *size = maxbound + 1;
4148 *nulterm = false;
4149 return true;
4152 return false;
4155 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4156 the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4157 for a sufficiently large BOUND). If the result is based on the length
4158 of one string being greater than the longest string that would fit in
4159 the array pointer to by the argument, set *PLEN and *PSIZE to
4160 the corresponding length (or its complement when the string is known
4161 to be at least as long and need not be nul-terminated) and size.
4162 Otherwise return null. */
4164 tree
4165 strlen_pass::strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
4166 tree arg2, int idx2,
4167 unsigned HOST_WIDE_INT bound,
4168 unsigned HOST_WIDE_INT len[2],
4169 unsigned HOST_WIDE_INT *psize)
4171 /* Determine the range the length of each string is in and whether it's
4172 known to be nul-terminated, or the size of the array it's stored in. */
4173 bool nul1, nul2;
4174 unsigned HOST_WIDE_INT siz1, siz2;
4175 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4176 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1)
4177 || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2))
4178 return NULL_TREE;
4180 /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4181 to HWI_MAX when invalid. Adjust the length of each string to consider
4182 to be no more than BOUND. */
4183 if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4184 len1rng[0] = bound;
4185 if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4186 len1rng[1] = bound;
4187 if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4188 len2rng[0] = bound;
4189 if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4190 len2rng[1] = bound;
4192 /* Two empty strings are equal. */
4193 if (len1rng[1] == 0 && len2rng[1] == 0)
4194 return integer_one_node;
4196 /* The strings are definitely unequal when the lower bound of the length
4197 of one of them is greater than the length of the longest string that
4198 would fit into the other array. */
4199 if (len1rng[0] == HOST_WIDE_INT_MAX
4200 && len2rng[0] != HOST_WIDE_INT_MAX
4201 && ((len2rng[0] < bound && len2rng[0] >= siz1)
4202 || len2rng[0] > siz1))
4204 *psize = siz1;
4205 len[0] = len1rng[0];
4206 /* Set LEN[0] to the lower bound of ARG1's length when it's
4207 nul-terminated or to the complement of its minimum length
4208 otherwise, */
4209 len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4210 return integer_zero_node;
4213 if (len2rng[0] == HOST_WIDE_INT_MAX
4214 && len1rng[0] != HOST_WIDE_INT_MAX
4215 && ((len1rng[0] < bound && len1rng[0] >= siz2)
4216 || len1rng[0] > siz2))
4218 *psize = siz2;
4219 len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4220 len[1] = len2rng[0];
4221 return integer_zero_node;
4224 /* The strings are also definitely unequal when their lengths are unequal
4225 and at least one is nul-terminated. */
4226 if (len1rng[0] != HOST_WIDE_INT_MAX
4227 && len2rng[0] != HOST_WIDE_INT_MAX
4228 && ((len1rng[1] < len2rng[0] && nul1)
4229 || (len2rng[1] < len1rng[0] && nul2)))
4231 if (bound <= len1rng[0] || bound <= len2rng[0])
4232 *psize = bound;
4233 else
4234 *psize = HOST_WIDE_INT_M1U;
4236 len[0] = len1rng[0];
4237 len[1] = len2rng[0];
4238 return integer_zero_node;
4241 /* The string lengths may be equal or unequal. Even when equal and
4242 both strings nul-terminated, without the string contents there's
4243 no way to determine whether they are equal. */
4244 return NULL_TREE;
4247 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4248 arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4249 whose result is used in equality expressions that evaluate to
4250 a constant due to one argument being longer than the size of
4251 the other. */
4253 static void
4254 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4255 unsigned HOST_WIDE_INT len[2],
4256 unsigned HOST_WIDE_INT siz)
4258 tree lhs = gimple_call_lhs (stmt);
4259 gimple *use = use_in_zero_equality (lhs, /* exclusive = */ false);
4260 if (!use)
4261 return;
4263 bool at_least = false;
4265 /* Excessive LEN[i] indicates a lower bound. */
4266 if (len[0] > HOST_WIDE_INT_MAX)
4268 at_least = true;
4269 len[0] = ~len[0];
4272 if (len[1] > HOST_WIDE_INT_MAX)
4274 at_least = true;
4275 len[1] = ~len[1];
4278 unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4280 /* FIXME: Include a note pointing to the declaration of the smaller
4281 array. */
4282 location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4284 tree callee = gimple_call_fndecl (stmt);
4285 bool warned = false;
4286 if (siz <= minlen && bound == -1)
4287 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4288 (at_least
4289 ? G_("%qD of a string of length %wu or more and "
4290 "an array of size %wu evaluates to nonzero")
4291 : G_("%qD of a string of length %wu and an array "
4292 "of size %wu evaluates to nonzero")),
4293 callee, minlen, siz);
4294 else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4296 if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4297 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4298 "%qD of strings of length %wu and %wu "
4299 "and bound of %wu evaluates to nonzero",
4300 callee, len[0], len[1], bound);
4301 else
4302 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4303 "%qD of a string of length %wu, an array "
4304 "of size %wu and bound of %wu evaluates to "
4305 "nonzero",
4306 callee, minlen, siz, bound);
4309 if (!warned)
4310 return;
4312 location_t use_loc = gimple_location (use);
4313 if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4314 inform (use_loc, "in this expression");
4318 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4319 when possible or by transforming the latter to the former. Warn about
4320 calls where the length of one argument is greater than the size of
4321 the array to which the other argument points if the latter's length
4322 is not known. Return true when the call has been transformed into
4323 another and false otherwise. */
4325 bool
4326 strlen_pass::handle_builtin_string_cmp ()
4328 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
4329 tree lhs = gimple_call_lhs (stmt);
4331 if (!lhs)
4332 return false;
4334 tree arg1 = gimple_call_arg (stmt, 0);
4335 tree arg2 = gimple_call_arg (stmt, 1);
4336 int idx1 = get_stridx (arg1, stmt);
4337 int idx2 = get_stridx (arg2, stmt);
4339 /* For strncmp set to the value of the third argument if known. */
4340 HOST_WIDE_INT bound = -1;
4341 tree len = NULL_TREE;
4342 /* Extract the strncmp bound. */
4343 if (gimple_call_num_args (stmt) == 3)
4345 len = gimple_call_arg (stmt, 2);
4346 if (tree_fits_shwi_p (len))
4347 bound = tree_to_shwi (len);
4349 /* If the bound argument is NOT known, do nothing. */
4350 if (bound < 0)
4351 return false;
4354 /* Avoid folding if either argument is not a nul-terminated array.
4355 Defer warning until later. */
4356 if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4357 || !check_nul_terminated_array (NULL_TREE, arg2, len))
4358 return false;
4361 /* Set to the length of one argument (or its complement if it's
4362 the lower bound of a range) and the size of the array storing
4363 the other if the result is based on the former being equal to
4364 or greater than the latter. */
4365 unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4366 unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4368 /* Try to determine if the two strings are either definitely equal
4369 or definitely unequal and if so, either fold the result to zero
4370 (when equal) or set the range of the result to ~[0, 0] otherwise. */
4371 if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4372 len, &siz))
4374 if (integer_zerop (eqz))
4376 maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4378 /* When the lengths of the first two string arguments are
4379 known to be unequal set the range of the result to non-zero.
4380 This allows the call to be eliminated if its result is only
4381 used in tests for equality to zero. */
4382 value_range nz;
4383 nz.set_nonzero (TREE_TYPE (lhs));
4384 set_range_info (lhs, nz);
4385 return false;
4387 /* When the two strings are definitely equal (such as when they
4388 are both empty) fold the call to the constant result. */
4389 replace_call_with_value (&m_gsi, integer_zero_node);
4390 return true;
4394 /* Return if nothing is known about the strings pointed to by ARG1
4395 and ARG2. */
4396 if (idx1 == 0 && idx2 == 0)
4397 return false;
4399 /* Determine either the length or the size of each of the strings,
4400 whichever is available. */
4401 HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4402 HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4405 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4406 unsigned HOST_WIDE_INT arsz1, arsz2;
4407 bool nulterm[2];
4409 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm)
4410 || !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1))
4411 return false;
4413 if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4414 cstlen1 = len1rng[0];
4415 else if (arsz1 < HOST_WIDE_INT_M1U)
4416 arysiz1 = arsz1;
4418 if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4419 cstlen2 = len2rng[0];
4420 else if (arsz2 < HOST_WIDE_INT_M1U)
4421 arysiz2 = arsz2;
4424 /* Bail if neither the string length nor the size of the array
4425 it is stored in can be determined. */
4426 if ((cstlen1 < 0 && arysiz1 < 0)
4427 || (cstlen2 < 0 && arysiz2 < 0)
4428 || (cstlen1 < 0 && cstlen2 < 0))
4429 return false;
4431 if (cstlen1 >= 0)
4432 ++cstlen1;
4433 if (cstlen2 >= 0)
4434 ++cstlen2;
4436 /* The exact number of characters to compare. */
4437 HOST_WIDE_INT cmpsiz;
4438 if (cstlen1 >= 0 && cstlen2 >= 0)
4439 cmpsiz = MIN (cstlen1, cstlen2);
4440 else if (cstlen1 >= 0)
4441 cmpsiz = cstlen1;
4442 else
4443 cmpsiz = cstlen2;
4444 if (bound >= 0)
4445 cmpsiz = MIN (cmpsiz, bound);
4446 /* The size of the array in which the unknown string is stored. */
4447 HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4449 if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (lhs))
4451 /* If the known length is less than the size of the other array
4452 and the strcmp result is only used to test equality to zero,
4453 transform the call to the equivalent _eq call. */
4454 if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4455 : BUILT_IN_STRNCMP_EQ))
4457 tree n = build_int_cst (size_type_node, cmpsiz);
4458 update_gimple_call (&m_gsi, fn, 3, arg1, arg2, n);
4459 return true;
4463 return false;
4466 /* Handle a POINTER_PLUS_EXPR statement.
4467 For p = "abcd" + 2; compute associated length, or if
4468 p = q + off is pointing to a '\0' character of a string, call
4469 zero_length_string on it. */
4471 void
4472 strlen_pass::handle_pointer_plus ()
4474 gimple *stmt = gsi_stmt (m_gsi);
4475 tree lhs = gimple_assign_lhs (stmt), off;
4476 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
4477 strinfo *si, *zsi;
4479 if (idx == 0)
4480 return;
4482 if (idx < 0)
4484 tree off = gimple_assign_rhs2 (stmt);
4485 if (tree_fits_uhwi_p (off)
4486 && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4487 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4488 = ~(~idx - (int) tree_to_uhwi (off));
4489 return;
4492 si = get_strinfo (idx);
4493 if (si == NULL || si->nonzero_chars == NULL_TREE)
4494 return;
4496 off = gimple_assign_rhs2 (stmt);
4497 zsi = NULL;
4498 if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4499 zsi = zero_length_string (lhs, si);
4500 else if (TREE_CODE (off) == SSA_NAME)
4502 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4503 if (gimple_assign_single_p (def_stmt)
4504 && si->full_string_p
4505 && operand_equal_p (si->nonzero_chars,
4506 gimple_assign_rhs1 (def_stmt), 0))
4507 zsi = zero_length_string (lhs, si);
4509 if (zsi != NULL
4510 && si->endptr != NULL_TREE
4511 && si->endptr != lhs
4512 && TREE_CODE (si->endptr) == SSA_NAME)
4514 enum tree_code rhs_code
4515 = useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4516 ? SSA_NAME : NOP_EXPR;
4517 gimple_assign_set_rhs_with_ops (&m_gsi, rhs_code, si->endptr);
4518 gcc_assert (gsi_stmt (m_gsi) == stmt);
4519 update_stmt (stmt);
4523 /* Set LENRANGE to the number of nonzero bytes for a store of TYPE and
4524 clear all flags. Return true on success and false on failure. */
4526 static bool
4527 nonzero_bytes_for_type (tree type, unsigned lenrange[3],
4528 bool *nulterm, bool *allnul, bool *allnonnul)
4530 /* Use the size of the type of the expression as the size of the store,
4531 and set the upper bound of the length range to that of the size.
4532 Nothing is known about the contents so clear all flags. */
4533 tree typesize = TYPE_SIZE_UNIT (type);
4534 if (!type)
4535 return false;
4537 if (!tree_fits_uhwi_p (typesize))
4538 return false;
4540 unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize);
4541 if (sz > UINT_MAX)
4542 return false;
4544 lenrange[2] = sz;
4545 lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0;
4546 lenrange[0] = 0;
4547 *nulterm = false;
4548 *allnul = false;
4549 *allnonnul = false;
4550 return true;
4553 /* Recursively determine the minimum and maximum number of leading nonzero
4554 bytes in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
4555 to each.
4556 Sets LENRANGE[2] to the total size of the access (which may be less
4557 than LENRANGE[1] when what's being referenced by EXP is a pointer
4558 rather than an array).
4559 Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL
4560 if all the bytes are zero, and *ALLNONNUL is all are nonzero.
4561 OFFSET and NBYTES are the offset into the representation and
4562 the size of the access to it determined from an ADDR_EXPR (i.e.,
4563 a pointer) or MEM_REF or zero for other expressions.
4564 Uses RVALS to determine range information.
4565 Avoids recursing deeper than the limits in SNLIM allow.
4566 Returns true on success and false otherwise. */
4568 bool
4569 strlen_pass::count_nonzero_bytes (tree exp, gimple *stmt,
4570 unsigned HOST_WIDE_INT offset,
4571 unsigned HOST_WIDE_INT nbytes,
4572 unsigned lenrange[3], bool *nulterm,
4573 bool *allnul, bool *allnonnul,
4574 ssa_name_limit_t &snlim)
4576 if (TREE_CODE (exp) == SSA_NAME)
4578 /* Handle non-zero single-character stores specially. */
4579 tree type = TREE_TYPE (exp);
4580 if (TREE_CODE (type) == INTEGER_TYPE
4581 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4582 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4583 && tree_expr_nonzero_p (exp))
4585 /* If the character EXP is known to be non-zero (even if its
4586 exact value is not known) recurse once to set the range
4587 for an arbitrary constant. */
4588 exp = build_int_cst (type, 1);
4589 return count_nonzero_bytes (exp, stmt,
4590 offset, 1, lenrange,
4591 nulterm, allnul, allnonnul, snlim);
4594 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4595 if (gimple_assign_single_p (stmt))
4597 exp = gimple_assign_rhs1 (stmt);
4598 if (!DECL_P (exp)
4599 && TREE_CODE (exp) != CONSTRUCTOR
4600 && TREE_CODE (exp) != MEM_REF)
4601 return false;
4602 /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */
4604 else if (gimple_code (stmt) == GIMPLE_PHI)
4606 /* Avoid processing an SSA_NAME that has already been visited
4607 or if an SSA_NAME limit has been reached. Indicate success
4608 if the former and failure if the latter. */
4609 if (int res = snlim.next_phi (exp))
4610 return res > 0;
4612 /* Determine the minimum and maximum from the PHI arguments. */
4613 unsigned int n = gimple_phi_num_args (stmt);
4614 for (unsigned i = 0; i != n; i++)
4616 tree def = gimple_phi_arg_def (stmt, i);
4617 if (!count_nonzero_bytes (def, stmt,
4618 offset, nbytes, lenrange, nulterm,
4619 allnul, allnonnul, snlim))
4620 return false;
4623 return true;
4627 if (TREE_CODE (exp) == CONSTRUCTOR)
4629 if (nbytes)
4630 /* If NBYTES has already been determined by an outer MEM_REF
4631 fail rather than overwriting it (this shouldn't happen). */
4632 return false;
4634 tree type = TREE_TYPE (exp);
4635 tree size = TYPE_SIZE_UNIT (type);
4636 if (!size || !tree_fits_uhwi_p (size))
4637 return false;
4639 unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
4640 if (byte_size < offset)
4641 return false;
4643 nbytes = byte_size - offset;
4646 if (TREE_CODE (exp) == MEM_REF)
4648 if (nbytes)
4649 return false;
4651 tree arg = TREE_OPERAND (exp, 0);
4652 tree off = TREE_OPERAND (exp, 1);
4654 if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4655 return false;
4657 unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4658 if (INT_MAX < wioff)
4659 return false;
4661 offset += wioff;
4662 if (INT_MAX < offset)
4663 return false;
4665 /* The size of the MEM_REF access determines the number of bytes. */
4666 tree type = TREE_TYPE (exp);
4667 tree typesize = TYPE_SIZE_UNIT (type);
4668 if (!typesize || !tree_fits_uhwi_p (typesize))
4669 return false;
4670 nbytes = tree_to_uhwi (typesize);
4671 if (!nbytes)
4672 return false;
4674 /* Handle MEM_REF = SSA_NAME types of assignments. */
4675 return count_nonzero_bytes_addr (arg, stmt,
4676 offset, nbytes, lenrange, nulterm,
4677 allnul, allnonnul, snlim);
4680 if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4682 /* If EXP can be folded into a constant use the result. Otherwise
4683 proceed to use EXP to determine a range of the result. */
4684 if (tree fold_exp = ctor_for_folding (exp))
4685 if (fold_exp != error_mark_node)
4686 exp = fold_exp;
4689 const char *prep = NULL;
4690 if (TREE_CODE (exp) == STRING_CST)
4692 unsigned nchars = TREE_STRING_LENGTH (exp);
4693 if (nchars < offset)
4694 return false;
4696 if (!nbytes)
4697 /* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4698 (i.e., it's the size of a pointer), or from MEM_REF (as the size
4699 of the access), set it here to the size of the string, including
4700 all internal and trailing nuls if the string has any. */
4701 nbytes = nchars - offset;
4702 else if (nchars - offset < nbytes)
4703 return false;
4705 prep = TREE_STRING_POINTER (exp) + offset;
4708 unsigned char buf[256];
4709 if (!prep)
4711 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4712 return false;
4713 /* If the pointer to representation hasn't been set above
4714 for STRING_CST point it at the buffer. */
4715 prep = reinterpret_cast <char *>(buf);
4716 /* Try to extract the representation of the constant object
4717 or expression starting from the offset. */
4718 unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4719 if (repsize < nbytes)
4721 /* This should only happen when REPSIZE is zero because EXP
4722 doesn't denote an object with a known initializer, except
4723 perhaps when the reference reads past its end. */
4724 lenrange[0] = 0;
4725 prep = NULL;
4727 else if (!nbytes)
4728 nbytes = repsize;
4729 else if (nbytes < repsize)
4730 return false;
4733 if (!nbytes)
4734 return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange,
4735 nulterm, allnul, allnonnul);
4737 /* Compute the number of leading nonzero bytes in the representation
4738 and update the minimum and maximum. */
4739 unsigned HOST_WIDE_INT n = prep ? strnlen (prep, nbytes) : nbytes;
4741 if (n < lenrange[0])
4742 lenrange[0] = n;
4743 if (lenrange[1] < n)
4744 lenrange[1] = n;
4746 /* Set the size of the representation. */
4747 if (lenrange[2] < nbytes)
4748 lenrange[2] = nbytes;
4750 /* Clear NULTERM if none of the bytes is zero. */
4751 if (n == nbytes)
4752 *nulterm = false;
4754 if (n)
4756 /* When the initial number of non-zero bytes N is non-zero, reset
4757 *ALLNUL; if N is less than that the size of the representation
4758 also clear *ALLNONNUL. */
4759 *allnul = false;
4760 if (n < nbytes)
4761 *allnonnul = false;
4763 else if (*allnul || *allnonnul)
4765 *allnonnul = false;
4767 if (*allnul)
4769 /* When either ALLNUL is set and N is zero, also determine
4770 whether all subsequent bytes after the first one (which
4771 is nul) are zero or nonzero and clear ALLNUL if not. */
4772 for (const char *p = prep; p != prep + nbytes; ++p)
4773 if (*p)
4775 *allnul = false;
4776 break;
4781 return true;
4784 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4785 bytes that are pointed to by EXP, which should be a pointer. */
4787 bool
4788 strlen_pass::count_nonzero_bytes_addr (tree exp, gimple *stmt,
4789 unsigned HOST_WIDE_INT offset,
4790 unsigned HOST_WIDE_INT nbytes,
4791 unsigned lenrange[3], bool *nulterm,
4792 bool *allnul, bool *allnonnul,
4793 ssa_name_limit_t &snlim)
4795 int idx = get_stridx (exp, stmt);
4796 if (idx > 0)
4798 strinfo *si = get_strinfo (idx);
4799 if (!si)
4800 return false;
4802 /* Handle both constant lengths as well non-constant lengths
4803 in some range. */
4804 unsigned HOST_WIDE_INT minlen, maxlen;
4805 if (tree_fits_shwi_p (si->nonzero_chars))
4806 minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4807 else if (si->nonzero_chars
4808 && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4810 value_range vr;
4811 ptr_qry.rvals->range_of_expr (vr, si->nonzero_chars, stmt);
4812 if (vr.kind () != VR_RANGE)
4813 return false;
4815 minlen = tree_to_uhwi (vr.min ());
4816 maxlen = tree_to_uhwi (vr.max ());
4818 else
4819 return false;
4821 if (maxlen < offset)
4822 return false;
4824 minlen = minlen < offset ? 0 : minlen - offset;
4825 maxlen -= offset;
4826 if (maxlen + 1 < nbytes)
4827 return false;
4829 if (nbytes <= minlen)
4830 *nulterm = false;
4832 if (nbytes < minlen)
4834 minlen = nbytes;
4835 if (nbytes < maxlen)
4836 maxlen = nbytes;
4839 if (minlen < lenrange[0])
4840 lenrange[0] = minlen;
4841 if (lenrange[1] < maxlen)
4842 lenrange[1] = maxlen;
4844 if (lenrange[2] < nbytes)
4845 lenrange[2] = nbytes;
4847 /* Since only the length of the string are known and not its contents,
4848 clear ALLNUL and ALLNONNUL purely on the basis of the length. */
4849 *allnul = false;
4850 if (minlen < nbytes)
4851 *allnonnul = false;
4853 return true;
4856 if (TREE_CODE (exp) == ADDR_EXPR)
4857 return count_nonzero_bytes (TREE_OPERAND (exp, 0), stmt,
4858 offset, nbytes,
4859 lenrange, nulterm, allnul, allnonnul, snlim);
4861 if (TREE_CODE (exp) == SSA_NAME)
4863 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4864 if (gimple_code (stmt) == GIMPLE_PHI)
4866 /* Avoid processing an SSA_NAME that has already been visited
4867 or if an SSA_NAME limit has been reached. Indicate success
4868 if the former and failure if the latter. */
4869 if (int res = snlim.next_phi (exp))
4870 return res > 0;
4872 /* Determine the minimum and maximum from the PHI arguments. */
4873 unsigned int n = gimple_phi_num_args (stmt);
4874 for (unsigned i = 0; i != n; i++)
4876 tree def = gimple_phi_arg_def (stmt, i);
4877 if (!count_nonzero_bytes_addr (def, stmt,
4878 offset, nbytes, lenrange,
4879 nulterm, allnul, allnonnul,
4880 snlim))
4881 return false;
4884 return true;
4888 /* Otherwise we don't know anything. */
4889 lenrange[0] = 0;
4890 if (lenrange[1] < nbytes)
4891 lenrange[1] = nbytes;
4892 if (lenrange[2] < nbytes)
4893 lenrange[2] = nbytes;
4894 *nulterm = false;
4895 *allnul = false;
4896 *allnonnul = false;
4897 return true;
4900 /* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE
4901 is a type rather than an expression use its size to compute the range.
4902 RVALS is used to determine ranges of dynamically computed string lengths
4903 (the results of strlen). */
4905 bool
4906 strlen_pass::count_nonzero_bytes (tree expr_or_type, gimple *stmt,
4907 unsigned lenrange[3], bool *nulterm,
4908 bool *allnul, bool *allnonnul)
4910 if (TYPE_P (expr_or_type))
4911 return nonzero_bytes_for_type (expr_or_type, lenrange,
4912 nulterm, allnul, allnonnul);
4914 /* Set to optimistic values so the caller doesn't have to worry about
4915 initializing these and to what. On success, the function will clear
4916 these if it determines their values are different but being recursive
4917 it never sets either to true. On failure, their values are
4918 unspecified. */
4919 *nulterm = true;
4920 *allnul = true;
4921 *allnonnul = true;
4923 ssa_name_limit_t snlim;
4924 tree expr = expr_or_type;
4925 return count_nonzero_bytes (expr, stmt,
4926 0, 0, lenrange, nulterm, allnul, allnonnul,
4927 snlim);
4930 /* Handle a single or multibyte store other than by a built-in function,
4931 either via a single character assignment or by multi-byte assignment
4932 either via MEM_REF or via a type other than char (such as in
4933 '*(int*)a = 12345'). Return true to let the caller advance *GSI to
4934 the next statement in the basic block and false otherwise. */
4936 bool
4937 strlen_pass::handle_store (bool *zero_write)
4939 gimple *stmt = gsi_stmt (m_gsi);
4940 /* The LHS and RHS of the store. The RHS is null if STMT is a function
4941 call. STORETYPE is the type of the store (determined from either
4942 the RHS of the assignment statement or the LHS of a function call. */
4943 tree lhs, rhs, storetype;
4944 if (is_gimple_assign (stmt))
4946 lhs = gimple_assign_lhs (stmt);
4947 rhs = gimple_assign_rhs1 (stmt);
4948 storetype = TREE_TYPE (rhs);
4950 else if (is_gimple_call (stmt))
4952 lhs = gimple_call_lhs (stmt);
4953 rhs = NULL_TREE;
4954 storetype = TREE_TYPE (lhs);
4956 else
4957 return true;
4959 tree ssaname = NULL_TREE;
4960 strinfo *si = NULL;
4961 int idx = -1;
4963 range_query *const rvals = ptr_qry.rvals;
4965 /* The offset of the first byte in LHS modified by the store. */
4966 unsigned HOST_WIDE_INT offset = 0;
4968 if (TREE_CODE (lhs) == MEM_REF
4969 && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4971 tree mem_offset = TREE_OPERAND (lhs, 1);
4972 if (tree_fits_uhwi_p (mem_offset))
4974 /* Get the strinfo for the base, and use it if it starts with at
4975 least OFFSET nonzero characters. This is trivially true if
4976 OFFSET is zero. */
4977 offset = tree_to_uhwi (mem_offset);
4978 idx = get_stridx (TREE_OPERAND (lhs, 0), stmt);
4979 if (idx > 0)
4980 si = get_strinfo (idx);
4981 if (offset == 0)
4982 ssaname = TREE_OPERAND (lhs, 0);
4983 else if (si == NULL
4984 || compare_nonzero_chars (si, stmt, offset, rvals) < 0)
4986 *zero_write = rhs ? initializer_zerop (rhs) : false;
4988 bool dummy;
4989 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4990 if (count_nonzero_bytes (rhs ? rhs : storetype, stmt, lenrange,
4991 &dummy, &dummy, &dummy))
4992 maybe_warn_overflow (stmt, true, lenrange[2]);
4994 return true;
4998 else
5000 idx = get_addr_stridx (lhs, stmt, NULL_TREE, &offset, rvals);
5001 if (idx > 0)
5002 si = get_strinfo (idx);
5005 /* Minimum and maximum leading non-zero bytes and the size of the store. */
5006 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5008 /* Set to the minimum length of the string being assigned if known. */
5009 unsigned HOST_WIDE_INT rhs_minlen;
5011 /* STORING_NONZERO_P is true iff not all stored characters are zero.
5012 STORING_ALL_NONZERO_P is true if all stored characters are zero.
5013 STORING_ALL_ZEROS_P is true iff all stored characters are zero.
5014 Both are false when it's impossible to determine which is true. */
5015 bool storing_nonzero_p;
5016 bool storing_all_nonzero_p;
5017 bool storing_all_zeros_p;
5018 /* FULL_STRING_P is set when the stored sequence of characters form
5019 a nul-terminated string. */
5020 bool full_string_p;
5022 const bool ranges_valid
5023 = count_nonzero_bytes (rhs ? rhs : storetype, stmt,
5024 lenrange, &full_string_p,
5025 &storing_all_zeros_p, &storing_all_nonzero_p);
5027 if (ranges_valid)
5029 rhs_minlen = lenrange[0];
5030 storing_nonzero_p = lenrange[1] > 0;
5031 *zero_write = storing_all_zeros_p;
5033 maybe_warn_overflow (stmt, true, lenrange[2]);
5035 else
5037 rhs_minlen = HOST_WIDE_INT_M1U;
5038 full_string_p = false;
5039 storing_nonzero_p = false;
5040 storing_all_zeros_p = false;
5041 storing_all_nonzero_p = false;
5044 if (si != NULL)
5046 /* The corresponding element is set to 1 if the first and last
5047 element, respectively, of the sequence of characters being
5048 written over the string described by SI ends before
5049 the terminating nul (if it has one), to zero if the nul is
5050 being overwritten but not beyond, or negative otherwise. */
5051 int store_before_nul[2];
5052 if (ranges_valid)
5054 /* The offset of the last stored byte. */
5055 unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
5056 store_before_nul[0]
5057 = compare_nonzero_chars (si, stmt, offset, rvals);
5058 if (endoff == offset)
5059 store_before_nul[1] = store_before_nul[0];
5060 else
5061 store_before_nul[1]
5062 = compare_nonzero_chars (si, stmt, endoff, rvals);
5064 else
5066 store_before_nul[0]
5067 = compare_nonzero_chars (si, stmt, offset, rvals);
5068 store_before_nul[1] = store_before_nul[0];
5069 gcc_assert (offset == 0 || store_before_nul[0] >= 0);
5072 if (storing_all_zeros_p
5073 && store_before_nul[0] == 0
5074 && store_before_nul[1] == 0
5075 && si->full_string_p)
5077 /* When overwriting a '\0' with a '\0', the store can be removed
5078 if we know it has been stored in the current function. */
5079 if (!stmt_could_throw_p (cfun, stmt) && si->writable)
5081 unlink_stmt_vdef (stmt);
5082 release_defs (stmt);
5083 gsi_remove (&m_gsi, true);
5084 return false;
5086 else
5088 si->writable = true;
5089 gsi_next (&m_gsi);
5090 return false;
5094 if (store_before_nul[1] > 0
5095 && storing_nonzero_p
5096 && lenrange[0] == lenrange[1]
5097 && lenrange[0] == lenrange[2]
5098 && TREE_CODE (storetype) == INTEGER_TYPE)
5100 /* Handle a store of one or more non-nul characters that ends
5101 before the terminating nul of the destination and so does
5102 not affect its length
5103 If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5104 and if we aren't storing '\0', we know that the length of
5105 the string and any other zero terminated string in memory
5106 remains the same. In that case we move to the next gimple
5107 statement and return to signal the caller that it shouldn't
5108 invalidate anything.
5110 This is beneficial for cases like:
5112 char p[20];
5113 void foo (char *q)
5115 strcpy (p, "foobar");
5116 size_t len = strlen (p); // can be folded to 6
5117 size_t len2 = strlen (q); // has to be computed
5118 p[0] = 'X';
5119 size_t len3 = strlen (p); // can be folded to 6
5120 size_t len4 = strlen (q); // can be folded to len2
5121 bar (len, len2, len3, len4);
5122 } */
5123 gsi_next (&m_gsi);
5124 return false;
5127 if (storing_nonzero_p
5128 || storing_all_zeros_p
5129 || (full_string_p && lenrange[1] == 0)
5130 || (offset != 0 && store_before_nul[1] > 0))
5132 /* When STORING_NONZERO_P, we know that the string will start
5133 with at least OFFSET + 1 nonzero characters. If storing
5134 a single character, set si->NONZERO_CHARS to the result.
5135 If storing multiple characters, try to determine the number
5136 of leading non-zero characters and set si->NONZERO_CHARS to
5137 the result instead.
5139 When STORING_ALL_ZEROS_P, or the first byte written is zero,
5140 i.e. FULL_STRING_P && LENRANGE[1] == 0, we know that the
5141 string is now OFFSET characters long.
5143 Otherwise, we're storing an unknown value at offset OFFSET,
5144 so need to clip the nonzero_chars to OFFSET.
5145 Use the minimum length of the string (or individual character)
5146 being stored if it's known. Otherwise, STORING_NONZERO_P
5147 guarantees it's at least 1. */
5148 HOST_WIDE_INT len
5149 = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5150 location_t loc = gimple_location (stmt);
5151 tree oldlen = si->nonzero_chars;
5152 if (store_before_nul[1] == 0 && si->full_string_p)
5153 /* We're overwriting the nul terminator with a nonzero or
5154 unknown character. If the previous stmt was a memcpy,
5155 its length may be decreased. */
5156 adjust_last_stmt (si, stmt, false);
5157 si = unshare_strinfo (si);
5158 if (storing_nonzero_p)
5160 gcc_assert (len >= 0);
5161 si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5163 else
5164 si->nonzero_chars = build_int_cst (size_type_node, offset);
5166 /* Set FULL_STRING_P only if the length of the strings being
5167 written is the same, and clear it if the strings have
5168 different lengths. In the latter case the length stored
5169 in si->NONZERO_CHARS becomes the lower bound.
5170 FIXME: Handle the upper bound of the length if possible. */
5171 si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5173 if (storing_all_zeros_p
5174 && ssaname
5175 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5176 si->endptr = ssaname;
5177 else
5178 si->endptr = NULL;
5179 si->next = 0;
5180 si->stmt = NULL;
5181 si->writable = true;
5182 si->dont_invalidate = true;
5183 if (oldlen)
5185 tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5186 si->nonzero_chars, oldlen);
5187 adjust_related_strinfos (loc, si, adj);
5189 else
5190 si->prev = 0;
5193 else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5195 if (ssaname)
5196 idx = new_stridx (ssaname);
5197 else
5198 idx = new_addr_stridx (lhs);
5199 if (idx != 0)
5201 tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5203 HOST_WIDE_INT slen;
5204 if (storing_all_zeros_p)
5205 slen = 0;
5206 else if (storing_nonzero_p && ranges_valid)
5208 /* FIXME: Handle the upper bound of the length when
5209 LENRANGE[0] != LENRANGE[1]. */
5210 slen = lenrange[0];
5211 if (lenrange[0] != lenrange[1])
5212 /* Set the minimum length but ignore the maximum
5213 for now. */
5214 full_string_p = false;
5216 else
5217 slen = -1;
5219 tree len = (slen <= 0
5220 ? size_zero_node
5221 : build_int_cst (size_type_node, slen));
5222 si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5223 set_strinfo (idx, si);
5224 if (storing_all_zeros_p
5225 && ssaname
5226 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5227 si->endptr = ssaname;
5228 si->dont_invalidate = true;
5229 si->writable = true;
5232 else if (idx == 0
5233 && rhs_minlen < HOST_WIDE_INT_M1U
5234 && ssaname == NULL_TREE
5235 && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5237 HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5238 if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5240 int idx = new_addr_stridx (lhs);
5241 if (idx != 0)
5243 si = new_strinfo (build_fold_addr_expr (lhs), idx,
5244 build_int_cst (size_type_node, rhs_minlen),
5245 full_string_p);
5246 set_strinfo (idx, si);
5247 si->dont_invalidate = true;
5252 if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5254 /* For single-byte stores only, allow adjust_last_stmt to remove
5255 the statement if the stored '\0' is immediately overwritten. */
5256 laststmt.stmt = stmt;
5257 laststmt.len = build_int_cst (size_type_node, 1);
5258 laststmt.stridx = si->idx;
5260 return true;
5263 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0. */
5265 static void
5266 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5268 if (TREE_CODE (rhs1) != SSA_NAME
5269 || TREE_CODE (rhs2) != SSA_NAME)
5270 return;
5272 gimple *call_stmt = NULL;
5273 for (int pass = 0; pass < 2; pass++)
5275 gimple *g = SSA_NAME_DEF_STMT (rhs1);
5276 if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5277 && has_single_use (rhs1)
5278 && gimple_call_arg (g, 0) == rhs2)
5280 call_stmt = g;
5281 break;
5283 std::swap (rhs1, rhs2);
5286 if (call_stmt)
5288 tree arg0 = gimple_call_arg (call_stmt, 0);
5290 if (arg0 == rhs2)
5292 tree arg1 = gimple_call_arg (call_stmt, 1);
5293 tree arg1_len = NULL_TREE;
5294 int idx = get_stridx (arg1, call_stmt);
5296 if (idx)
5298 if (idx < 0)
5299 arg1_len = build_int_cst (size_type_node, ~idx);
5300 else
5302 strinfo *si = get_strinfo (idx);
5303 if (si)
5304 arg1_len = get_string_length (si);
5308 if (arg1_len != NULL_TREE)
5310 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5311 tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5313 if (!is_gimple_val (arg1_len))
5315 tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5316 gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5317 arg1_len);
5318 gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5319 arg1_len = arg1_len_tmp;
5322 gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5323 arg0, arg1, arg1_len);
5324 tree strncmp_lhs = make_ssa_name (integer_type_node);
5325 gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5326 gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5327 gsi_remove (&gsi, true);
5328 gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5329 tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5331 if (is_gimple_assign (stmt))
5333 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5335 tree cond = gimple_assign_rhs1 (stmt);
5336 TREE_OPERAND (cond, 0) = strncmp_lhs;
5337 TREE_OPERAND (cond, 1) = zero;
5339 else
5341 gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5342 gimple_assign_set_rhs2 (stmt, zero);
5345 else
5347 gcond *cond = as_a<gcond *> (stmt);
5348 gimple_cond_set_lhs (cond, strncmp_lhs);
5349 gimple_cond_set_rhs (cond, zero);
5351 update_stmt (stmt);
5357 /* Return true if TYPE corresponds to a narrow character type. */
5359 static bool
5360 is_char_type (tree type)
5362 return (TREE_CODE (type) == INTEGER_TYPE
5363 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5364 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5367 /* Check the built-in call at GSI for validity and optimize it.
5368 Uses RVALS to determine range information.
5369 Return true to let the caller advance *GSI to the next statement
5370 in the basic block and false otherwise. */
5372 bool
5373 strlen_pass::check_and_optimize_call (bool *zero_write)
5375 gimple *stmt = gsi_stmt (m_gsi);
5377 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5379 tree fntype = gimple_call_fntype (stmt);
5380 if (!fntype)
5381 return true;
5383 if (lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5385 handle_alloc_call (BUILT_IN_NONE);
5386 return true;
5389 if (tree lhs = gimple_call_lhs (stmt))
5390 handle_assign (lhs, zero_write);
5392 /* Proceed to handle user-defined formatting functions. */
5395 /* When not optimizing we must be checking printf calls which
5396 we do even for user-defined functions when they are declared
5397 with attribute format. */
5398 if (!flag_optimize_strlen
5399 || !strlen_optimize
5400 || !valid_builtin_call (stmt))
5401 return !handle_printf_call (&m_gsi, ptr_qry);
5403 tree callee = gimple_call_fndecl (stmt);
5404 switch (DECL_FUNCTION_CODE (callee))
5406 case BUILT_IN_STRLEN:
5407 case BUILT_IN_STRNLEN:
5408 handle_builtin_strlen ();
5409 break;
5410 case BUILT_IN_STRCHR:
5411 handle_builtin_strchr ();
5412 break;
5413 case BUILT_IN_STRCPY:
5414 case BUILT_IN_STRCPY_CHK:
5415 case BUILT_IN_STPCPY:
5416 case BUILT_IN_STPCPY_CHK:
5417 handle_builtin_strcpy (DECL_FUNCTION_CODE (callee));
5418 break;
5420 case BUILT_IN_STRNCAT:
5421 case BUILT_IN_STRNCAT_CHK:
5422 handle_builtin_strncat (DECL_FUNCTION_CODE (callee));
5423 break;
5425 case BUILT_IN_STPNCPY:
5426 case BUILT_IN_STPNCPY_CHK:
5427 case BUILT_IN_STRNCPY:
5428 case BUILT_IN_STRNCPY_CHK:
5429 handle_builtin_stxncpy_strncat (false);
5430 break;
5432 case BUILT_IN_MEMCPY:
5433 case BUILT_IN_MEMCPY_CHK:
5434 case BUILT_IN_MEMPCPY:
5435 case BUILT_IN_MEMPCPY_CHK:
5436 handle_builtin_memcpy (DECL_FUNCTION_CODE (callee));
5437 break;
5438 case BUILT_IN_STRCAT:
5439 case BUILT_IN_STRCAT_CHK:
5440 handle_builtin_strcat (DECL_FUNCTION_CODE (callee));
5441 break;
5442 case BUILT_IN_ALLOCA:
5443 case BUILT_IN_ALLOCA_WITH_ALIGN:
5444 case BUILT_IN_MALLOC:
5445 case BUILT_IN_CALLOC:
5446 handle_alloc_call (DECL_FUNCTION_CODE (callee));
5447 break;
5448 case BUILT_IN_MEMSET:
5449 if (handle_builtin_memset (zero_write))
5450 return false;
5451 break;
5452 case BUILT_IN_MEMCMP:
5453 if (handle_builtin_memcmp ())
5454 return false;
5455 break;
5456 case BUILT_IN_STRCMP:
5457 case BUILT_IN_STRNCMP:
5458 if (handle_builtin_string_cmp ())
5459 return false;
5460 break;
5461 default:
5462 if (handle_printf_call (&m_gsi, ptr_qry))
5463 return false;
5464 break;
5467 return true;
5470 /* Handle an assignment statement at *GSI to a LHS of integral type.
5471 If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true. */
5473 void
5474 strlen_pass::handle_integral_assign (bool *cleanup_eh)
5476 gimple *stmt = gsi_stmt (m_gsi);
5477 tree lhs = gimple_assign_lhs (stmt);
5478 tree lhs_type = TREE_TYPE (lhs);
5480 enum tree_code code = gimple_assign_rhs_code (stmt);
5481 if (code == COND_EXPR)
5483 tree cond = gimple_assign_rhs1 (stmt);
5484 enum tree_code cond_code = TREE_CODE (cond);
5486 if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5487 fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5488 TREE_OPERAND (cond, 1), stmt);
5490 else if (code == EQ_EXPR || code == NE_EXPR)
5491 fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5492 gimple_assign_rhs2 (stmt), stmt);
5493 else if (gimple_assign_load_p (stmt)
5494 && TREE_CODE (lhs_type) == INTEGER_TYPE
5495 && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5496 && (TYPE_PRECISION (lhs_type)
5497 == TYPE_PRECISION (char_type_node))
5498 && !gimple_has_volatile_ops (stmt))
5500 tree off = integer_zero_node;
5501 unsigned HOST_WIDE_INT coff = 0;
5502 int idx = 0;
5503 tree rhs1 = gimple_assign_rhs1 (stmt);
5504 if (code == MEM_REF)
5506 idx = get_stridx (TREE_OPERAND (rhs1, 0), stmt);
5507 if (idx > 0)
5509 strinfo *si = get_strinfo (idx);
5510 if (si
5511 && si->nonzero_chars
5512 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5513 && (wi::to_widest (si->nonzero_chars)
5514 >= wi::to_widest (off)))
5515 off = TREE_OPERAND (rhs1, 1);
5516 else
5517 /* This case is not useful. See if get_addr_stridx
5518 returns something usable. */
5519 idx = 0;
5522 if (idx <= 0)
5523 idx = get_addr_stridx (rhs1, stmt, NULL_TREE, &coff);
5524 if (idx > 0)
5526 strinfo *si = get_strinfo (idx);
5527 if (si
5528 && si->nonzero_chars
5529 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5531 widest_int w1 = wi::to_widest (si->nonzero_chars);
5532 widest_int w2 = wi::to_widest (off) + coff;
5533 if (w1 == w2
5534 && si->full_string_p)
5536 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5538 fprintf (dump_file, "Optimizing: ");
5539 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5542 /* Reading the final '\0' character. */
5543 tree zero = build_int_cst (lhs_type, 0);
5544 gimple_set_vuse (stmt, NULL_TREE);
5545 gimple_assign_set_rhs_from_tree (&m_gsi, zero);
5546 *cleanup_eh
5547 |= maybe_clean_or_replace_eh_stmt (stmt,
5548 gsi_stmt (m_gsi));
5549 stmt = gsi_stmt (m_gsi);
5550 update_stmt (stmt);
5552 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5554 fprintf (dump_file, "into: ");
5555 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5558 else if (w1 > w2)
5560 /* Reading a character before the final '\0'
5561 character. Just set the value range to ~[0, 0]
5562 if we don't have anything better. */
5563 value_range r;
5564 if (!get_range_query (cfun)->range_of_expr (r, lhs)
5565 || r.varying_p ())
5567 r.set_nonzero (lhs_type);
5568 set_range_info (lhs, r);
5574 else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5576 if (int idx = new_stridx (lhs))
5578 /* Record multi-byte assignments from MEM_REFs. */
5579 bool storing_all_nonzero_p;
5580 bool storing_all_zeros_p;
5581 bool full_string_p;
5582 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5583 tree rhs = gimple_assign_rhs1 (stmt);
5584 const bool ranges_valid
5585 = count_nonzero_bytes (rhs, stmt,
5586 lenrange, &full_string_p,
5587 &storing_all_zeros_p,
5588 &storing_all_nonzero_p);
5589 if (ranges_valid)
5591 tree length = build_int_cst (sizetype, lenrange[0]);
5592 strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5593 set_strinfo (idx, si);
5594 si->writable = true;
5595 si->dont_invalidate = true;
5600 if (strlen_to_stridx)
5602 tree rhs1 = gimple_assign_rhs1 (stmt);
5603 if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5604 strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5608 /* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if
5609 the assignment stores all zero bytes. */
5611 bool
5612 strlen_pass::handle_assign (tree lhs, bool *zero_write)
5614 tree type = TREE_TYPE (lhs);
5615 if (TREE_CODE (type) == ARRAY_TYPE)
5616 type = TREE_TYPE (type);
5618 bool is_char_store = is_char_type (type);
5619 if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5621 /* To consider stores into char objects via integer types other
5622 than char but not those to non-character objects, determine
5623 the type of the destination rather than just the type of
5624 the access. */
5625 for (int i = 0; i != 2; ++i)
5627 tree ref = TREE_OPERAND (lhs, i);
5628 type = TREE_TYPE (ref);
5629 if (TREE_CODE (type) == POINTER_TYPE)
5630 type = TREE_TYPE (type);
5631 if (TREE_CODE (type) == ARRAY_TYPE)
5632 type = TREE_TYPE (type);
5633 if (is_char_type (type))
5635 is_char_store = true;
5636 break;
5641 /* Handle a single or multibyte assignment. */
5642 if (is_char_store && !handle_store (zero_write))
5643 return false;
5645 return true;
5649 /* Attempt to check for validity of the performed access a single statement
5650 at *GSI using string length knowledge, and to optimize it.
5651 If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5652 true. Return true to let the caller advance *GSI to the next statement
5653 in the basic block and false otherwise. */
5655 bool
5656 strlen_pass::check_and_optimize_stmt (bool *cleanup_eh)
5658 gimple *stmt = gsi_stmt (m_gsi);
5660 /* For statements that modify a string, set to true if the write
5661 is only zeros. */
5662 bool zero_write = false;
5664 if (is_gimple_call (stmt))
5666 if (!check_and_optimize_call (&zero_write))
5667 return false;
5669 else if (!flag_optimize_strlen || !strlen_optimize)
5670 return true;
5671 else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5673 /* Handle non-clobbering assignment. */
5674 tree lhs = gimple_assign_lhs (stmt);
5675 tree lhs_type = TREE_TYPE (lhs);
5677 if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5679 if (gimple_assign_single_p (stmt)
5680 || (gimple_assign_cast_p (stmt)
5681 && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5683 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
5684 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5686 else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5687 handle_pointer_plus ();
5689 else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5690 /* Handle assignment to a character. */
5691 handle_integral_assign (cleanup_eh);
5692 else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5693 if (!handle_assign (lhs, &zero_write))
5694 return false;
5696 else if (gcond *cond = dyn_cast<gcond *> (stmt))
5698 enum tree_code code = gimple_cond_code (cond);
5699 if (code == EQ_EXPR || code == NE_EXPR)
5700 fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5701 gimple_cond_rhs (stmt), stmt);
5704 if (gimple_vdef (stmt))
5705 maybe_invalidate (stmt, zero_write);
5706 return true;
5709 /* Recursively call maybe_invalidate on stmts that might be executed
5710 in between dombb and current bb and that contain a vdef. Stop when
5711 *count stmts are inspected, or if the whole strinfo vector has
5712 been invalidated. */
5714 static void
5715 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5717 unsigned int i, n = gimple_phi_num_args (phi);
5719 for (i = 0; i < n; i++)
5721 tree vuse = gimple_phi_arg_def (phi, i);
5722 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5723 basic_block bb = gimple_bb (stmt);
5724 if (bb == NULL
5725 || bb == dombb
5726 || !bitmap_set_bit (visited, bb->index)
5727 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5728 continue;
5729 while (1)
5731 if (gimple_code (stmt) == GIMPLE_PHI)
5733 do_invalidate (dombb, stmt, visited, count);
5734 if (*count == 0)
5735 return;
5736 break;
5738 if (--*count == 0)
5739 return;
5740 if (!maybe_invalidate (stmt))
5742 *count = 0;
5743 return;
5745 vuse = gimple_vuse (stmt);
5746 stmt = SSA_NAME_DEF_STMT (vuse);
5747 if (gimple_bb (stmt) != bb)
5749 bb = gimple_bb (stmt);
5750 if (bb == NULL
5751 || bb == dombb
5752 || !bitmap_set_bit (visited, bb->index)
5753 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5754 break;
5760 /* Release pointer_query cache. */
5762 strlen_pass::~strlen_pass ()
5764 ptr_qry.flush_cache ();
5767 /* Callback for walk_dominator_tree. Attempt to optimize various
5768 string ops by remembering string lengths pointed by pointer SSA_NAMEs. */
5770 edge
5771 strlen_pass::before_dom_children (basic_block bb)
5773 basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5775 if (dombb == NULL)
5776 stridx_to_strinfo = NULL;
5777 else
5779 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5780 if (stridx_to_strinfo)
5782 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5783 gsi_next (&gsi))
5785 gphi *phi = gsi.phi ();
5786 if (virtual_operand_p (gimple_phi_result (phi)))
5788 bitmap visited = BITMAP_ALLOC (NULL);
5789 int count_vdef = 100;
5790 do_invalidate (dombb, phi, visited, &count_vdef);
5791 BITMAP_FREE (visited);
5792 if (count_vdef == 0)
5794 /* If there were too many vdefs in between immediate
5795 dominator and current bb, invalidate everything.
5796 If stridx_to_strinfo has been unshared, we need
5797 to free it, otherwise just set it to NULL. */
5798 if (!strinfo_shared ())
5800 unsigned int i;
5801 strinfo *si;
5803 for (i = 1;
5804 vec_safe_iterate (stridx_to_strinfo, i, &si);
5805 ++i)
5807 free_strinfo (si);
5808 (*stridx_to_strinfo)[i] = NULL;
5811 else
5812 stridx_to_strinfo = NULL;
5814 break;
5820 /* If all PHI arguments have the same string index, the PHI result
5821 has it as well. */
5822 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5823 gsi_next (&gsi))
5825 gphi *phi = gsi.phi ();
5826 tree result = gimple_phi_result (phi);
5827 if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5829 int idx = get_stridx (gimple_phi_arg_def (phi, 0), phi);
5830 if (idx != 0)
5832 unsigned int i, n = gimple_phi_num_args (phi);
5833 for (i = 1; i < n; i++)
5834 if (idx != get_stridx (gimple_phi_arg_def (phi, i), phi))
5835 break;
5836 if (i == n)
5837 ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5842 bool cleanup_eh = false;
5844 /* Attempt to optimize individual statements. */
5845 for (m_gsi = gsi_start_bb (bb); !gsi_end_p (m_gsi); )
5847 /* Reset search depth performance counter. */
5848 ptr_qry.depth = 0;
5850 if (check_and_optimize_stmt (&cleanup_eh))
5851 gsi_next (&m_gsi);
5854 if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5855 m_cleanup_cfg = true;
5857 bb->aux = stridx_to_strinfo;
5858 if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5859 (*stridx_to_strinfo)[0] = (strinfo *) bb;
5860 return NULL;
5863 /* Callback for walk_dominator_tree. Free strinfo vector if it is
5864 owned by the current bb, clear bb->aux. */
5866 void
5867 strlen_pass::after_dom_children (basic_block bb)
5869 if (bb->aux)
5871 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5872 if (vec_safe_length (stridx_to_strinfo)
5873 && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5875 unsigned int i;
5876 strinfo *si;
5878 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5879 free_strinfo (si);
5880 vec_free (stridx_to_strinfo);
5882 bb->aux = NULL;
5886 namespace {
5888 static unsigned int
5889 printf_strlen_execute (function *fun, bool warn_only)
5891 strlen_optimize = !warn_only;
5893 calculate_dominance_info (CDI_DOMINATORS);
5894 loop_optimizer_init (LOOPS_NORMAL);
5895 scev_initialize ();
5897 gcc_assert (!strlen_to_stridx);
5898 if (warn_stringop_overflow || warn_stringop_truncation)
5899 strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5901 /* This has to happen after initializing the loop optimizer
5902 and initializing SCEV as they create new SSA_NAMEs. */
5903 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
5904 max_stridx = 1;
5906 /* String length optimization is implemented as a walk of the dominator
5907 tree and a forward walk of statements within each block. */
5908 strlen_pass walker (CDI_DOMINATORS);
5909 walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5911 if (dump_file && (dump_flags & TDF_DETAILS))
5912 walker.ptr_qry.dump (dump_file, true);
5914 ssa_ver_to_stridx.release ();
5915 strinfo_pool.release ();
5916 if (decl_to_stridxlist_htab)
5918 obstack_free (&stridx_obstack, NULL);
5919 delete decl_to_stridxlist_htab;
5920 decl_to_stridxlist_htab = NULL;
5922 laststmt.stmt = NULL;
5923 laststmt.len = NULL_TREE;
5924 laststmt.stridx = 0;
5926 if (strlen_to_stridx)
5928 strlen_to_stridx->empty ();
5929 delete strlen_to_stridx;
5930 strlen_to_stridx = NULL;
5933 scev_finalize ();
5934 loop_optimizer_finalize ();
5936 return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5939 /* This file defines two passes: one for warnings that runs only when
5940 optimization is disabled, and another that implements optimizations
5941 and also issues warnings. */
5943 const pass_data pass_data_warn_printf =
5945 GIMPLE_PASS, /* type */
5946 "warn-printf", /* name */
5947 OPTGROUP_NONE, /* optinfo_flags */
5948 TV_NONE, /* tv_id */
5949 /* Normally an optimization pass would require PROP_ssa but because
5950 this pass runs early, with no optimization, to do sprintf format
5951 checking, it only requires PROP_cfg. */
5952 PROP_cfg, /* properties_required */
5953 0, /* properties_provided */
5954 0, /* properties_destroyed */
5955 0, /* todo_flags_start */
5956 0, /* todo_flags_finish */
5959 class pass_warn_printf : public gimple_opt_pass
5961 public:
5962 pass_warn_printf (gcc::context *ctxt)
5963 : gimple_opt_pass (pass_data_warn_printf, ctxt)
5966 bool gate (function *) final override;
5967 unsigned int execute (function *fun) final override
5969 return printf_strlen_execute (fun, true);
5974 /* Return true to run the warning pass only when not optimizing and
5975 iff either -Wformat-overflow or -Wformat-truncation is specified. */
5977 bool
5978 pass_warn_printf::gate (function *)
5980 return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5983 const pass_data pass_data_strlen =
5985 GIMPLE_PASS, /* type */
5986 "strlen", /* name */
5987 OPTGROUP_NONE, /* optinfo_flags */
5988 TV_TREE_STRLEN, /* tv_id */
5989 PROP_cfg | PROP_ssa, /* properties_required */
5990 0, /* properties_provided */
5991 0, /* properties_destroyed */
5992 0, /* todo_flags_start */
5993 0, /* todo_flags_finish */
5996 class pass_strlen : public gimple_opt_pass
5998 public:
5999 pass_strlen (gcc::context *ctxt)
6000 : gimple_opt_pass (pass_data_strlen, ctxt)
6003 opt_pass * clone () final override { return new pass_strlen (m_ctxt); }
6005 bool gate (function *) final override;
6006 unsigned int execute (function *fun) final override
6008 return printf_strlen_execute (fun, false);
6012 /* Return true to run the pass only when the sprintf and/or strlen
6013 optimizations are enabled and -Wformat-overflow or -Wformat-truncation
6014 are specified. */
6016 bool
6017 pass_strlen::gate (function *)
6019 return ((warn_format_overflow > 0
6020 || warn_format_trunc > 0
6021 || warn_restrict > 0
6022 || flag_optimize_strlen > 0
6023 || flag_printf_return_value)
6024 && optimize > 0);
6027 } // anon namespace
6029 gimple_opt_pass *
6030 make_pass_warn_printf (gcc::context *ctxt)
6032 return new pass_warn_printf (ctxt);
6035 gimple_opt_pass *
6036 make_pass_strlen (gcc::context *ctxt)
6038 return new pass_strlen (ctxt);