Skip -fwhole-program when merging LTO options.
[official-gcc.git] / gcc / tree-ssa-strlen.cc
blobabec225566d0bddca5757e90a95861a17b279bd1
1 /* String length optimization
2 Copyright (C) 2011-2022 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-access.h"
34 #include "gimple-ssa-warn-restrict.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "gimple-fold.h"
39 #include "tree-eh.h"
40 #include "gimplify.h"
41 #include "gimplify-me.h"
42 #include "expr.h"
43 #include "tree-cfg.h"
44 #include "tree-dfa.h"
45 #include "domwalk.h"
46 #include "tree-ssa-alias.h"
47 #include "tree-ssa-propagate.h"
48 #include "tree-ssa-strlen.h"
49 #include "tree-hash-traits.h"
50 #include "builtins.h"
51 #include "pointer-query.h"
52 #include "target.h"
53 #include "diagnostic-core.h"
54 #include "diagnostic.h"
55 #include "intl.h"
56 #include "attribs.h"
57 #include "calls.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "vr-values.h"
62 #include "gimple-range.h"
63 #include "tree-ssa.h"
65 /* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
66 is an index into strinfo vector, negative value stands for
67 string length of a string literal (~strlen). */
68 static vec<int> ssa_ver_to_stridx;
70 /* Number of currently active string indexes plus one. */
71 static int max_stridx;
73 /* Set to true to optimize, false when just checking. */
74 static bool strlen_optimize;
76 /* String information record. */
77 struct strinfo
79 /* Number of leading characters that are known to be nonzero. This is
80 also the length of the string if FULL_STRING_P.
82 The values in a list of related string pointers must be consistent;
83 that is, if strinfo B comes X bytes after strinfo A, it must be
84 the case that A->nonzero_chars == X + B->nonzero_chars. */
85 tree nonzero_chars;
86 /* Any of the corresponding pointers for querying alias oracle. */
87 tree ptr;
88 /* STMT is used for two things:
90 - To record the statement that should be used for delayed length
91 computations. We maintain the invariant that all related strinfos
92 have delayed lengths or none do.
94 - To record the malloc or calloc call that produced this result
95 to optimize away malloc/memset sequences. STMT is reset after
96 a calloc-allocated object has been stored a non-zero value into. */
97 gimple *stmt;
98 /* Set to the dynamic allocation statement for the object (alloca,
99 calloc, malloc, or VLA). Unlike STMT, once set for a strinfo
100 object, ALLOC doesn't change. */
101 gimple *alloc;
102 /* Pointer to '\0' if known, if NULL, it can be computed as
103 ptr + length. */
104 tree endptr;
105 /* Reference count. Any changes to strinfo entry possibly shared
106 with dominating basic blocks need unshare_strinfo first, except
107 for dont_invalidate which affects only the immediately next
108 maybe_invalidate. */
109 int refcount;
110 /* Copy of index. get_strinfo (si->idx) should return si; */
111 int idx;
112 /* These 3 fields are for chaining related string pointers together.
113 E.g. for
114 bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
115 strcpy (c, d); e = c + dl;
116 strinfo(a) -> strinfo(c) -> strinfo(e)
117 All have ->first field equal to strinfo(a)->idx and are doubly
118 chained through prev/next fields. The later strinfos are required
119 to point into the same string with zero or more bytes after
120 the previous pointer and all bytes in between the two pointers
121 must be non-zero. Functions like strcpy or memcpy are supposed
122 to adjust all previous strinfo lengths, but not following strinfo
123 lengths (those are uncertain, usually invalidated during
124 maybe_invalidate, except when the alias oracle knows better).
125 Functions like strcat on the other side adjust the whole
126 related strinfo chain.
127 They are updated lazily, so to use the chain the same first fields
128 and si->prev->next == si->idx needs to be verified. */
129 int first;
130 int next;
131 int prev;
132 /* A flag whether the string is known to be written in the current
133 function. */
134 bool writable;
135 /* A flag for the next maybe_invalidate that this strinfo shouldn't
136 be invalidated. Always cleared by maybe_invalidate. */
137 bool dont_invalidate;
138 /* True if the string is known to be nul-terminated after NONZERO_CHARS
139 characters. False is useful when detecting strings that are built
140 up via successive memcpys. */
141 bool full_string_p;
144 /* Pool for allocating strinfo_struct entries. */
145 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
147 /* Vector mapping positive string indexes to strinfo, for the
148 current basic block. The first pointer in the vector is special,
149 it is either NULL, meaning the vector isn't shared, or it is
150 a basic block pointer to the owner basic_block if shared.
151 If some other bb wants to modify the vector, the vector needs
152 to be unshared first, and only the owner bb is supposed to free it. */
153 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
155 /* One OFFSET->IDX mapping. */
156 struct stridxlist
158 struct stridxlist *next;
159 HOST_WIDE_INT offset;
160 int idx;
163 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings. */
164 struct decl_stridxlist_map
166 struct tree_map_base base;
167 struct stridxlist list;
170 /* Hash table for mapping decls to a chained list of offset -> idx
171 mappings. */
172 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
173 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
175 /* Hash table mapping strlen (or strnlen with constant bound and return
176 smaller than bound) calls to stridx instances describing
177 the calls' arguments. Non-null only when warn_stringop_truncation
178 is non-zero. */
179 typedef std::pair<int, location_t> stridx_strlenloc;
180 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
182 /* Obstack for struct stridxlist and struct decl_stridxlist_map. */
183 static struct obstack stridx_obstack;
185 /* Last memcpy statement if it could be adjusted if the trailing
186 '\0' written is immediately overwritten, or
187 *x = '\0' store that could be removed if it is immediately overwritten. */
188 struct laststmt_struct
190 gimple *stmt;
191 tree len;
192 int stridx;
193 } laststmt;
195 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
196 static bool get_range_strlen_dynamic (tree, gimple *, c_strlen_data *,
197 bitmap, pointer_query *, unsigned *);
199 /* Sets MINMAX to either the constant value or the range VAL is in
200 and returns either the constant value or VAL on success or null
201 when the range couldn't be determined. Uses RVALS or CFUN for
202 range info, whichever is nonnull. */
204 tree
205 get_range (tree val, gimple *stmt, wide_int minmax[2],
206 range_query *rvals /* = NULL */)
208 if (!rvals)
210 if (!cfun)
211 /* When called from front ends for global initializers CFUN
212 may be null. */
213 return NULL_TREE;
215 rvals = get_range_query (cfun);
218 value_range vr;
219 if (!rvals->range_of_expr (vr, val, stmt))
220 return NULL_TREE;
222 value_range_kind rng = vr.kind ();
223 if (rng == VR_RANGE)
225 /* Only handle straight ranges. */
226 minmax[0] = wi::to_wide (vr.min ());
227 minmax[1] = wi::to_wide (vr.max ());
228 return val;
231 return NULL_TREE;
234 class strlen_pass : public dom_walker
236 public:
237 strlen_pass (cdi_direction direction)
238 : dom_walker (direction),
239 ptr_qry (&m_ranger),
240 m_cleanup_cfg (false)
244 ~strlen_pass ();
246 edge before_dom_children (basic_block) final override;
247 void after_dom_children (basic_block) final override;
249 bool check_and_optimize_stmt (bool *cleanup_eh);
250 bool check_and_optimize_call (bool *zero_write);
251 bool handle_assign (tree lhs, bool *zero_write);
252 bool handle_store (bool *zero_write);
253 void handle_pointer_plus ();
254 void handle_builtin_strlen ();
255 void handle_builtin_strchr ();
256 void handle_builtin_strcpy (built_in_function);
257 void handle_integral_assign (bool *cleanup_eh);
258 void handle_builtin_stxncpy_strncat (bool append_p);
259 void handle_builtin_memcpy (built_in_function bcode);
260 void handle_builtin_strcat (built_in_function bcode);
261 void handle_builtin_strncat (built_in_function);
262 bool handle_builtin_memset (bool *zero_write);
263 bool handle_builtin_memcmp ();
264 bool handle_builtin_string_cmp ();
265 void handle_alloc_call (built_in_function);
266 void maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
267 strinfo *si = NULL, bool plus_one = false,
268 bool rawmem = false);
269 void maybe_warn_overflow (gimple *stmt, bool call_lhs,
270 unsigned HOST_WIDE_INT len,
271 strinfo *si = NULL,
272 bool plus_one = false, bool rawmem = false);
273 void adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat);
274 tree strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
275 tree arg2, int idx2,
276 unsigned HOST_WIDE_INT bound,
277 unsigned HOST_WIDE_INT len[2],
278 unsigned HOST_WIDE_INT *psize);
279 bool count_nonzero_bytes (tree expr_or_type,
280 gimple *stmt,
281 unsigned lenrange[3], bool *nulterm,
282 bool *allnul, bool *allnonnul);
283 bool count_nonzero_bytes (tree exp,
284 gimple *stmt,
285 unsigned HOST_WIDE_INT offset,
286 unsigned HOST_WIDE_INT nbytes,
287 unsigned lenrange[3], bool *nulterm,
288 bool *allnul, bool *allnonnul,
289 ssa_name_limit_t &snlim);
290 bool count_nonzero_bytes_addr (tree exp,
291 gimple *stmt,
292 unsigned HOST_WIDE_INT offset,
293 unsigned HOST_WIDE_INT nbytes,
294 unsigned lenrange[3], bool *nulterm,
295 bool *allnul, bool *allnonnul,
296 ssa_name_limit_t &snlim);
297 bool get_len_or_size (gimple *stmt, tree arg, int idx,
298 unsigned HOST_WIDE_INT lenrng[2],
299 unsigned HOST_WIDE_INT *size, bool *nulterm);
301 gimple_ranger m_ranger;
303 /* A pointer_query object to store information about pointers and
304 their targets in. */
305 pointer_query ptr_qry;
307 gimple_stmt_iterator m_gsi;
309 /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
310 execute function. */
311 bool m_cleanup_cfg;
314 /* Return:
316 * +1 if SI is known to start with more than OFF nonzero characters.
318 * 0 if SI is known to start with exactly OFF nonzero characters.
320 * -1 if SI either does not start with OFF nonzero characters
321 or the relationship between the number of leading nonzero
322 characters in SI and OFF is unknown. */
324 static int
325 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
327 if (si->nonzero_chars
328 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
329 return compare_tree_int (si->nonzero_chars, off);
330 else
331 return -1;
334 /* Same as above but suitable also for strings with non-constant lengths.
335 Uses RVALS to determine length range. */
337 static int
338 compare_nonzero_chars (strinfo *si, gimple *stmt,
339 unsigned HOST_WIDE_INT off,
340 range_query *rvals)
342 if (!si->nonzero_chars)
343 return -1;
345 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
346 return compare_tree_int (si->nonzero_chars, off);
348 if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
349 return -1;
351 value_range vr;
352 if (!rvals->range_of_expr (vr, si->nonzero_chars, stmt))
353 return -1;
354 value_range_kind rng = vr.kind ();
355 if (rng != VR_RANGE)
356 return -1;
358 /* If the offset is less than the minimum length or if the bounds
359 of the length range are equal return the result of the comparison
360 same as in the constant case. Otherwise return a conservative
361 result. */
362 int cmpmin = compare_tree_int (vr.min (), off);
363 if (cmpmin > 0 || tree_int_cst_equal (vr.min (), vr.max ()))
364 return cmpmin;
366 return -1;
369 /* Return true if SI is known to be a zero-length string. */
371 static inline bool
372 zero_length_string_p (strinfo *si)
374 return si->full_string_p && integer_zerop (si->nonzero_chars);
377 /* Return strinfo vector entry IDX. */
379 static inline strinfo *
380 get_strinfo (int idx)
382 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
383 return NULL;
384 return (*stridx_to_strinfo)[idx];
387 /* Get the next strinfo in the chain after SI, or null if none. */
389 static inline strinfo *
390 get_next_strinfo (strinfo *si)
392 if (si->next == 0)
393 return NULL;
394 strinfo *nextsi = get_strinfo (si->next);
395 if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
396 return NULL;
397 return nextsi;
400 /* Helper function for get_stridx. Return the strinfo index of the address
401 of EXP, which is available in PTR if nonnull. If OFFSET_OUT, it is
402 OK to return the index for some X <= &EXP and store &EXP - X in
403 *OFFSET_OUT. When RVALS is nonnull uses it to determine range
404 information. */
406 static int
407 get_addr_stridx (tree exp, gimple *stmt,
408 tree ptr, unsigned HOST_WIDE_INT *offset_out,
409 range_query *rvals = NULL)
411 HOST_WIDE_INT off;
412 struct stridxlist *list, *last = NULL;
413 tree base;
415 if (!decl_to_stridxlist_htab)
416 return 0;
418 poly_int64 poff;
419 base = get_addr_base_and_unit_offset (exp, &poff);
420 if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
421 return 0;
423 list = decl_to_stridxlist_htab->get (base);
424 if (list == NULL)
425 return 0;
429 if (list->offset == off)
431 if (offset_out)
432 *offset_out = 0;
433 return list->idx;
435 if (list->offset > off)
436 return 0;
437 last = list;
438 list = list->next;
440 while (list);
442 if ((offset_out || ptr) && last && last->idx > 0)
444 unsigned HOST_WIDE_INT rel_off
445 = (unsigned HOST_WIDE_INT) off - last->offset;
446 strinfo *si = get_strinfo (last->idx);
447 if (si && compare_nonzero_chars (si, stmt, rel_off, rvals) >= 0)
449 if (offset_out)
451 *offset_out = rel_off;
452 return last->idx;
454 else
455 return get_stridx_plus_constant (si, rel_off, ptr);
458 return 0;
461 /* Returns string index for EXP. When EXP is an SSA_NAME that refers
462 to a known strinfo with an offset and OFFRNG is non-null, sets
463 both elements of the OFFRNG array to the range of the offset and
464 returns the index of the known strinfo. In this case the result
465 must not be used in for functions that modify the string.
466 When nonnull, uses RVALS to determine range information. */
468 static int
469 get_stridx (tree exp, gimple *stmt,
470 wide_int offrng[2] = NULL, range_query *rvals = NULL)
472 if (offrng)
473 offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
475 if (TREE_CODE (exp) == SSA_NAME)
477 if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
478 return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
480 tree e = exp;
481 int last_idx = 0;
482 HOST_WIDE_INT offset = 0;
483 /* Follow a chain of at most 5 assignments. */
484 for (int i = 0; i < 5; i++)
486 gimple *def_stmt = SSA_NAME_DEF_STMT (e);
487 if (!is_gimple_assign (def_stmt))
488 return last_idx;
490 tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
491 tree ptr, off;
493 if (rhs_code == ADDR_EXPR)
495 /* Handle indices/offsets into VLAs which are implemented
496 as pointers to arrays. */
497 ptr = gimple_assign_rhs1 (def_stmt);
498 ptr = TREE_OPERAND (ptr, 0);
500 /* Handle also VLAs of types larger than char. */
501 if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
503 if (TREE_CODE (ptr) == ARRAY_REF)
505 off = TREE_OPERAND (ptr, 1);
506 ptr = TREE_OPERAND (ptr, 0);
507 if (!integer_onep (eltsize))
509 /* Scale the array index by the size of the element
510 type in the rare case that it's greater than
511 the typical 1 for char, making sure both operands
512 have the same type. */
513 eltsize = fold_convert (ssizetype, eltsize);
514 off = fold_convert (ssizetype, off);
515 off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
518 else
519 off = integer_zero_node;
521 else
522 return 0;
524 if (TREE_CODE (ptr) != MEM_REF)
525 return 0;
527 /* Add the MEM_REF byte offset. */
528 tree mem_off = TREE_OPERAND (ptr, 1);
529 off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
530 ptr = TREE_OPERAND (ptr, 0);
532 else if (rhs_code == POINTER_PLUS_EXPR)
534 ptr = gimple_assign_rhs1 (def_stmt);
535 off = gimple_assign_rhs2 (def_stmt);
537 else
538 return 0;
540 if (TREE_CODE (ptr) != SSA_NAME)
541 return 0;
543 if (!tree_fits_shwi_p (off))
545 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
546 if (offrng)
548 /* Only when requested by setting OFFRNG to non-null,
549 return the index corresponding to the SSA_NAME.
550 Do this irrespective of the whether the offset
551 is known. */
552 if (get_range (off, def_stmt, offrng, rvals))
554 /* When the offset range is known, increment it
555 it by the constant offset computed in prior
556 iterations and store it in the OFFRNG array. */
557 offrng[0] += offset;
558 offrng[1] += offset;
560 else
562 /* When the offset range cannot be determined
563 store [0, SIZE_MAX] and let the caller decide
564 if the offset matters. */
565 offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
566 offrng[0] = wi::zero (offrng[1].get_precision ());
568 return idx;
570 return 0;
573 HOST_WIDE_INT this_off = tree_to_shwi (off);
574 if (offrng)
576 offrng[0] += wi::shwi (this_off, offrng->get_precision ());
577 offrng[1] += offrng[0];
580 if (this_off < 0)
581 return last_idx;
583 offset = (unsigned HOST_WIDE_INT) offset + this_off;
584 if (offset < 0)
585 return last_idx;
587 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
589 strinfo *si = get_strinfo (idx);
590 if (si)
592 if (compare_nonzero_chars (si, offset) >= 0)
593 return get_stridx_plus_constant (si, offset, exp);
595 if (offrng)
596 last_idx = idx;
599 e = ptr;
602 return last_idx;
605 if (TREE_CODE (exp) == ADDR_EXPR)
607 int idx = get_addr_stridx (TREE_OPERAND (exp, 0), stmt, exp, NULL);
608 if (idx != 0)
609 return idx;
612 const char *p = c_getstr (exp);
613 if (p)
614 return ~(int) strlen (p);
616 return 0;
619 /* Return true if strinfo vector is shared with the immediate dominator. */
621 static inline bool
622 strinfo_shared (void)
624 return vec_safe_length (stridx_to_strinfo)
625 && (*stridx_to_strinfo)[0] != NULL;
628 /* Unshare strinfo vector that is shared with the immediate dominator. */
630 static void
631 unshare_strinfo_vec (void)
633 strinfo *si;
634 unsigned int i = 0;
636 gcc_assert (strinfo_shared ());
637 stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
638 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
639 if (si != NULL)
640 si->refcount++;
641 (*stridx_to_strinfo)[0] = NULL;
644 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
645 Return a pointer to the location where the string index can
646 be stored (if 0) or is stored, or NULL if this can't be tracked. */
648 static int *
649 addr_stridxptr (tree exp)
651 HOST_WIDE_INT off;
653 poly_int64 poff;
654 tree base = get_addr_base_and_unit_offset (exp, &poff);
655 if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
656 return NULL;
658 if (!decl_to_stridxlist_htab)
660 decl_to_stridxlist_htab
661 = new hash_map<tree_decl_hash, stridxlist> (64);
662 gcc_obstack_init (&stridx_obstack);
665 bool existed;
666 stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
667 if (existed)
669 int i;
670 stridxlist *before = NULL;
671 for (i = 0; i < 32; i++)
673 if (list->offset == off)
674 return &list->idx;
675 if (list->offset > off && before == NULL)
676 before = list;
677 if (list->next == NULL)
678 break;
679 list = list->next;
681 if (i == 32)
682 return NULL;
683 if (before)
685 list = before;
686 before = XOBNEW (&stridx_obstack, struct stridxlist);
687 *before = *list;
688 list->next = before;
689 list->offset = off;
690 list->idx = 0;
691 return &list->idx;
693 list->next = XOBNEW (&stridx_obstack, struct stridxlist);
694 list = list->next;
697 list->next = NULL;
698 list->offset = off;
699 list->idx = 0;
700 return &list->idx;
703 /* Create a new string index, or return 0 if reached limit. */
705 static int
706 new_stridx (tree exp)
708 int idx;
709 if (max_stridx >= param_max_tracked_strlens)
710 return 0;
711 if (TREE_CODE (exp) == SSA_NAME)
713 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
714 return 0;
715 idx = max_stridx++;
716 ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
717 return idx;
719 if (TREE_CODE (exp) == ADDR_EXPR)
721 int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
722 if (pidx != NULL)
724 gcc_assert (*pidx == 0);
725 *pidx = max_stridx++;
726 return *pidx;
729 return 0;
732 /* Like new_stridx, but for ADDR_EXPR's operand instead. */
734 static int
735 new_addr_stridx (tree exp)
737 int *pidx;
738 if (max_stridx >= param_max_tracked_strlens)
739 return 0;
740 pidx = addr_stridxptr (exp);
741 if (pidx != NULL)
743 gcc_assert (*pidx == 0);
744 *pidx = max_stridx++;
745 return *pidx;
747 return 0;
750 /* Create a new strinfo. */
752 static strinfo *
753 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
755 strinfo *si = strinfo_pool.allocate ();
756 si->nonzero_chars = nonzero_chars;
757 STRIP_USELESS_TYPE_CONVERSION (ptr);
758 si->ptr = ptr;
759 si->stmt = NULL;
760 si->alloc = NULL;
761 si->endptr = NULL_TREE;
762 si->refcount = 1;
763 si->idx = idx;
764 si->first = 0;
765 si->prev = 0;
766 si->next = 0;
767 si->writable = false;
768 si->dont_invalidate = false;
769 si->full_string_p = full_string_p;
770 return si;
773 /* Decrease strinfo refcount and free it if not referenced anymore. */
775 static inline void
776 free_strinfo (strinfo *si)
778 if (si && --si->refcount == 0)
779 strinfo_pool.remove (si);
782 /* Set strinfo in the vector entry IDX to SI. */
784 static inline void
785 set_strinfo (int idx, strinfo *si)
787 if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
788 unshare_strinfo_vec ();
789 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
790 vec_safe_grow_cleared (stridx_to_strinfo, idx + 1, true);
791 (*stridx_to_strinfo)[idx] = si;
794 /* Return the first strinfo in the related strinfo chain
795 if all strinfos in between belong to the chain, otherwise NULL. */
797 static strinfo *
798 verify_related_strinfos (strinfo *origsi)
800 strinfo *si = origsi, *psi;
802 if (origsi->first == 0)
803 return NULL;
804 for (; si->prev; si = psi)
806 if (si->first != origsi->first)
807 return NULL;
808 psi = get_strinfo (si->prev);
809 if (psi == NULL)
810 return NULL;
811 if (psi->next != si->idx)
812 return NULL;
814 if (si->idx != si->first)
815 return NULL;
816 return si;
819 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
820 Use LOC for folding. */
822 static void
823 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
825 si->endptr = endptr;
826 si->stmt = NULL;
827 tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
828 tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
829 si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
830 end_as_size, start_as_size);
831 si->full_string_p = true;
834 /* Return the string length, or NULL if it can't be computed.
835 The length may but need not be constant. Instead, it might be
836 the result of a strlen() call. */
838 static tree
839 get_string_length (strinfo *si)
841 /* If the length has already been computed return it if it's exact
842 (i.e., the string is nul-terminated at NONZERO_CHARS), or return
843 null if it isn't. */
844 if (si->nonzero_chars)
845 return si->full_string_p ? si->nonzero_chars : NULL;
847 /* If the string is the result of one of the built-in calls below
848 attempt to compute the length from the call statement. */
849 if (si->stmt)
851 gimple *stmt = si->stmt, *lenstmt;
852 tree callee, lhs, fn, tem;
853 location_t loc;
854 gimple_stmt_iterator gsi;
856 gcc_assert (is_gimple_call (stmt));
857 callee = gimple_call_fndecl (stmt);
858 gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
859 lhs = gimple_call_lhs (stmt);
860 /* unshare_strinfo is intentionally not called here. The (delayed)
861 transformation of strcpy or strcat into stpcpy is done at the place
862 of the former strcpy/strcat call and so can affect all the strinfos
863 with the same stmt. If they were unshared before and transformation
864 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
865 just compute the right length. */
866 switch (DECL_FUNCTION_CODE (callee))
868 case BUILT_IN_STRCAT:
869 case BUILT_IN_STRCAT_CHK:
870 gsi = gsi_for_stmt (stmt);
871 fn = builtin_decl_implicit (BUILT_IN_STRLEN);
872 gcc_assert (lhs == NULL_TREE);
873 tem = unshare_expr (gimple_call_arg (stmt, 0));
874 lenstmt = gimple_build_call (fn, 1, tem);
875 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
876 gimple_call_set_lhs (lenstmt, lhs);
877 gimple_set_vuse (lenstmt, gimple_vuse (stmt));
878 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
879 tem = gimple_call_arg (stmt, 0);
880 if (!ptrofftype_p (TREE_TYPE (lhs)))
882 lhs = convert_to_ptrofftype (lhs);
883 lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
884 true, GSI_SAME_STMT);
886 lenstmt = gimple_build_assign
887 (make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
888 POINTER_PLUS_EXPR,tem, lhs);
889 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
890 gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
891 lhs = NULL_TREE;
892 /* FALLTHRU */
893 case BUILT_IN_STRCPY:
894 case BUILT_IN_STRCPY_CHK:
895 gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
896 if (gimple_call_num_args (stmt) == 2)
897 fn = builtin_decl_implicit (BUILT_IN_STPCPY);
898 else
899 fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
900 gcc_assert (lhs == NULL_TREE);
901 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
903 fprintf (dump_file, "Optimizing: ");
904 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
906 gimple_call_set_fndecl (stmt, fn);
907 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
908 gimple_call_set_lhs (stmt, lhs);
909 update_stmt (stmt);
910 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
912 fprintf (dump_file, "into: ");
913 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
915 /* FALLTHRU */
916 case BUILT_IN_STPCPY:
917 case BUILT_IN_STPCPY_CHK:
918 gcc_assert (lhs != NULL_TREE);
919 loc = gimple_location (stmt);
920 set_endptr_and_length (loc, si, lhs);
921 for (strinfo *chainsi = verify_related_strinfos (si);
922 chainsi != NULL;
923 chainsi = get_next_strinfo (chainsi))
924 if (chainsi->nonzero_chars == NULL)
925 set_endptr_and_length (loc, chainsi, lhs);
926 break;
927 case BUILT_IN_ALLOCA:
928 case BUILT_IN_ALLOCA_WITH_ALIGN:
929 case BUILT_IN_MALLOC:
930 break;
931 /* BUILT_IN_CALLOC always has si->nonzero_chars set. */
932 default:
933 gcc_unreachable ();
934 break;
938 return si->nonzero_chars;
941 /* Dump strlen data to FP for statement STMT. When non-null, RVALS
942 points to the valuation engine used to calculate ranges, and is
943 used to dump strlen range for non-constant results. */
945 DEBUG_FUNCTION void
946 dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
948 if (stmt)
950 fprintf (fp, "\nDumping strlen pass data after ");
951 print_gimple_expr (fp, stmt, TDF_LINENO);
952 fputc ('\n', fp);
954 else
955 fprintf (fp, "\nDumping strlen pass data\n");
957 fprintf (fp, "max_stridx = %i\n", max_stridx);
958 fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
959 ssa_ver_to_stridx.length ());
960 fprintf (fp, "stridx_to_strinfo");
961 if (stridx_to_strinfo)
963 fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
964 for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
966 if (strinfo *si = (*stridx_to_strinfo)[i])
968 if (!si->idx)
969 continue;
970 fprintf (fp, " idx = %i", si->idx);
971 if (si->ptr)
973 fprintf (fp, ", ptr = ");
974 print_generic_expr (fp, si->ptr);
977 if (si->nonzero_chars)
979 fprintf (fp, ", nonzero_chars = ");
980 print_generic_expr (fp, si->nonzero_chars);
981 if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
983 value_range_kind rng = VR_UNDEFINED;
984 wide_int min, max;
985 if (rvals)
987 value_range vr;
988 rvals->range_of_expr (vr, si->nonzero_chars,
989 si->stmt);
990 rng = vr.kind ();
991 if (range_int_cst_p (&vr))
993 min = wi::to_wide (vr.min ());
994 max = wi::to_wide (vr.max ());
996 else
997 rng = VR_UNDEFINED;
999 else
1001 value_range vr;
1002 get_range_query (cfun)
1003 ->range_of_expr (vr, si->nonzero_chars);
1004 rng = vr.kind ();
1005 if (!vr.undefined_p ())
1007 min = wi::to_wide (vr.min ());
1008 max = wi::to_wide (vr.max ());
1012 if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
1014 fprintf (fp, " %s[%llu, %llu]",
1015 rng == VR_RANGE ? "" : "~",
1016 (long long) min.to_uhwi (),
1017 (long long) max.to_uhwi ());
1022 fprintf (fp, ", refcount = %i", si->refcount);
1023 if (si->stmt)
1025 fprintf (fp, ", stmt = ");
1026 print_gimple_expr (fp, si->stmt, 0);
1028 if (si->alloc)
1030 fprintf (fp, ", alloc = ");
1031 print_gimple_expr (fp, si->alloc, 0);
1033 if (si->writable)
1034 fprintf (fp, ", writable");
1035 if (si->dont_invalidate)
1036 fprintf (fp, ", dont_invalidate");
1037 if (si->full_string_p)
1038 fprintf (fp, ", full_string_p");
1039 if (strinfo *next = get_next_strinfo (si))
1041 fprintf (fp, ", {");
1043 fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
1044 while ((next = get_next_strinfo (next)));
1045 fprintf (fp, "}");
1047 fputs ("\n", fp);
1051 else
1052 fprintf (fp, " = null\n");
1054 fprintf (fp, "decl_to_stridxlist_htab");
1055 if (decl_to_stridxlist_htab)
1057 fputs ("\n", fp);
1058 typedef decl_to_stridxlist_htab_t::iterator iter_t;
1059 for (iter_t it = decl_to_stridxlist_htab->begin ();
1060 it != decl_to_stridxlist_htab->end (); ++it)
1062 tree decl = (*it).first;
1063 stridxlist *list = &(*it).second;
1064 fprintf (fp, " decl = ");
1065 print_generic_expr (fp, decl);
1066 if (list)
1068 fprintf (fp, ", offsets = {");
1069 for (; list; list = list->next)
1070 fprintf (fp, "%lli%s", (long long) list->offset,
1071 list->next ? ", " : "");
1072 fputs ("}", fp);
1074 fputs ("\n", fp);
1077 else
1078 fprintf (fp, " = null\n");
1080 if (laststmt.stmt)
1082 fprintf (fp, "laststmt = ");
1083 print_gimple_expr (fp, laststmt.stmt, 0);
1084 fprintf (fp, ", len = ");
1085 print_generic_expr (fp, laststmt.len);
1086 fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1090 /* Helper of get_range_strlen_dynamic(). See below. */
1092 static bool
1093 get_range_strlen_phi (tree src, gphi *phi,
1094 c_strlen_data *pdata, bitmap visited,
1095 pointer_query *ptr_qry, unsigned *pssa_def_max)
1097 if (!bitmap_set_bit (visited, SSA_NAME_VERSION (src)))
1098 return true;
1100 if (*pssa_def_max == 0)
1101 return false;
1103 --*pssa_def_max;
1105 /* Iterate over the PHI arguments and determine the minimum and maximum
1106 length/size of each and incorporate them into the overall result. */
1107 for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1109 tree arg = gimple_phi_arg_def (phi, i);
1110 if (arg == gimple_phi_result (phi))
1111 continue;
1113 c_strlen_data argdata = { };
1114 if (!get_range_strlen_dynamic (arg, phi, &argdata, visited, ptr_qry,
1115 pssa_def_max))
1117 pdata->maxlen = build_all_ones_cst (size_type_node);
1118 continue;
1121 /* Set the DECL of an unterminated array this argument refers to
1122 if one hasn't been found yet. */
1123 if (!pdata->decl && argdata.decl)
1124 pdata->decl = argdata.decl;
1126 if (!argdata.minlen
1127 || (integer_zerop (argdata.minlen)
1128 && (!argdata.maxbound
1129 || integer_all_onesp (argdata.maxbound))
1130 && integer_all_onesp (argdata.maxlen)))
1132 /* Set the upper bound of the length to unbounded. */
1133 pdata->maxlen = build_all_ones_cst (size_type_node);
1134 continue;
1137 /* Adjust the minimum and maximum length determined so far and
1138 the upper bound on the array size. */
1139 if (!pdata->minlen
1140 || tree_int_cst_lt (argdata.minlen, pdata->minlen))
1141 pdata->minlen = argdata.minlen;
1143 if (!pdata->maxlen
1144 || (argdata.maxlen
1145 && TREE_CODE (argdata.maxlen) == INTEGER_CST
1146 && tree_int_cst_lt (pdata->maxlen, argdata.maxlen)))
1147 pdata->maxlen = argdata.maxlen;
1149 if (!pdata->maxbound
1150 || TREE_CODE (pdata->maxbound) != INTEGER_CST
1151 || (argdata.maxbound
1152 && tree_int_cst_lt (pdata->maxbound, argdata.maxbound)
1153 && !integer_all_onesp (argdata.maxbound)))
1154 pdata->maxbound = argdata.maxbound;
1157 return true;
1160 /* Return the maximum possible length of the string PTR that's less
1161 than MAXLEN given the size of the object of subobject it points
1162 to at the given STMT. MAXLEN is the maximum length of the string
1163 determined so far. Return null when no such maximum can be
1164 determined. */
1166 static tree
1167 get_maxbound (tree ptr, gimple *stmt, offset_int maxlen,
1168 pointer_query *ptr_qry)
1170 access_ref aref;
1171 if (!ptr_qry->get_ref (ptr, stmt, &aref))
1172 return NULL_TREE;
1174 offset_int sizrem = aref.size_remaining ();
1175 if (sizrem <= 0)
1176 return NULL_TREE;
1178 if (sizrem < maxlen)
1179 maxlen = sizrem - 1;
1181 /* Try to determine the maximum from the subobject at the offset.
1182 This handles MEM [&some-struct, member-offset] that's often
1183 the result of folding COMPONENT_REF [some-struct, member]. */
1184 tree reftype = TREE_TYPE (aref.ref);
1185 if (!RECORD_OR_UNION_TYPE_P (reftype)
1186 || aref.offrng[0] != aref.offrng[1]
1187 || !wi::fits_shwi_p (aref.offrng[0]))
1188 return wide_int_to_tree (size_type_node, maxlen);
1190 HOST_WIDE_INT off = aref.offrng[0].to_shwi ();
1191 tree fld = field_at_offset (reftype, NULL_TREE, off);
1192 if (!fld || !DECL_SIZE_UNIT (fld))
1193 return wide_int_to_tree (size_type_node, maxlen);
1195 offset_int size = wi::to_offset (DECL_SIZE_UNIT (fld));
1196 if (maxlen < size)
1197 return wide_int_to_tree (size_type_node, maxlen);
1199 return wide_int_to_tree (size_type_node, size - 1);
1202 /* Attempt to determine the length of the string SRC. On success, store
1203 the length in *PDATA and return true. Otherwise, return false.
1204 VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation
1205 engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME
1206 assignment limit used to prevent runaway recursion. */
1208 static bool
1209 get_range_strlen_dynamic (tree src, gimple *stmt,
1210 c_strlen_data *pdata, bitmap visited,
1211 pointer_query *ptr_qry, unsigned *pssa_def_max)
1213 int idx = get_stridx (src, stmt);
1214 if (!idx)
1216 if (TREE_CODE (src) == SSA_NAME)
1218 gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1219 if (gphi *phi = dyn_cast<gphi *>(def_stmt))
1220 return get_range_strlen_phi (src, phi, pdata, visited, ptr_qry,
1221 pssa_def_max);
1224 /* Return success regardless of the result and handle *PDATA
1225 in the caller. */
1226 get_range_strlen (src, pdata, 1);
1227 return true;
1230 if (idx < 0)
1232 /* SRC is a string of constant length. */
1233 pdata->minlen = build_int_cst (size_type_node, ~idx);
1234 pdata->maxlen = pdata->minlen;
1235 pdata->maxbound = pdata->maxlen;
1236 return true;
1239 if (strinfo *si = get_strinfo (idx))
1241 pdata->minlen = get_string_length (si);
1242 if (!pdata->minlen && si->nonzero_chars)
1244 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1245 pdata->minlen = si->nonzero_chars;
1246 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1248 value_range vr;
1249 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
1250 if (range_int_cst_p (&vr))
1252 pdata->minlen = vr.min ();
1253 pdata->maxlen = vr.max ();
1255 else
1256 pdata->minlen = build_zero_cst (size_type_node);
1258 else
1259 pdata->minlen = build_zero_cst (size_type_node);
1261 tree base = si->ptr;
1262 if (TREE_CODE (base) == ADDR_EXPR)
1263 base = TREE_OPERAND (base, 0);
1265 HOST_WIDE_INT off;
1266 poly_int64 poff;
1267 base = get_addr_base_and_unit_offset (base, &poff);
1268 if (base
1269 && DECL_P (base)
1270 && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1271 && TYPE_SIZE_UNIT (TREE_TYPE (base))
1272 && poff.is_constant (&off))
1274 tree basetype = TREE_TYPE (base);
1275 tree size = TYPE_SIZE_UNIT (basetype);
1276 if (TREE_CODE (size) == INTEGER_CST)
1278 ++off; /* Increment for the terminating nul. */
1279 tree toffset = build_int_cst (size_type_node, off);
1280 pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1281 toffset);
1282 pdata->maxbound = pdata->maxlen;
1284 else
1285 pdata->maxlen = build_all_ones_cst (size_type_node);
1287 else
1288 pdata->maxlen = build_all_ones_cst (size_type_node);
1290 else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1292 value_range vr;
1293 ptr_qry->rvals->range_of_expr (vr, si->nonzero_chars, stmt);
1294 if (range_int_cst_p (&vr))
1296 pdata->minlen = vr.min ();
1297 pdata->maxlen = vr.max ();
1298 offset_int max = offset_int::from (vr.upper_bound (0), SIGNED);
1299 if (tree maxbound = get_maxbound (si->ptr, stmt, max, ptr_qry))
1300 pdata->maxbound = maxbound;
1301 else
1302 pdata->maxbound = pdata->maxlen;
1304 else
1306 pdata->minlen = build_zero_cst (size_type_node);
1307 pdata->maxlen = build_all_ones_cst (size_type_node);
1310 else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1312 pdata->maxlen = pdata->minlen;
1313 pdata->maxbound = pdata->minlen;
1315 else
1317 /* For PDATA->MINLEN that's a non-constant expression such
1318 as PLUS_EXPR whose value range is unknown, set the bounds
1319 to zero and SIZE_MAX. */
1320 pdata->minlen = build_zero_cst (size_type_node);
1321 pdata->maxlen = build_all_ones_cst (size_type_node);
1324 return true;
1327 return false;
1330 /* Analogous to get_range_strlen but for dynamically created strings,
1331 i.e., those created by calls to strcpy as opposed to just string
1332 constants.
1333 Try to obtain the range of the lengths of the string(s) referenced
1334 by SRC, or the size of the largest array SRC refers to if the range
1335 of lengths cannot be determined, and store all in *PDATA. RVALS
1336 points to the valuation engine used to calculate ranges. */
1338 void
1339 get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1340 pointer_query &ptr_qry)
1342 auto_bitmap visited;
1343 tree maxbound = pdata->maxbound;
1345 unsigned limit = param_ssa_name_def_chain_limit;
1346 if (!get_range_strlen_dynamic (src, stmt, pdata, visited, &ptr_qry, &limit))
1348 /* On failure extend the length range to an impossible maximum
1349 (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other
1350 members can stay unchanged regardless. */
1351 pdata->minlen = ssize_int (0);
1352 pdata->maxlen = build_all_ones_cst (size_type_node);
1354 else if (!pdata->minlen)
1355 pdata->minlen = ssize_int (0);
1357 /* If it's unchanged from it initial non-null value, set the conservative
1358 MAXBOUND to SIZE_MAX. Otherwise leave it null (if it is null). */
1359 if (maxbound && pdata->maxbound == maxbound)
1360 pdata->maxbound = build_all_ones_cst (size_type_node);
1363 /* Invalidate string length information for strings whose length might
1364 change due to stores in STMT, except those marked DONT_INVALIDATE.
1365 For string-modifying statements, ZERO_WRITE is set when the statement
1366 wrote only zeros.
1367 Returns true if any STRIDX_TO_STRINFO entries were considered
1368 for invalidation. */
1370 static bool
1371 maybe_invalidate (gimple *stmt, bool zero_write = false)
1373 if (dump_file && (dump_flags & TDF_DETAILS))
1375 fprintf (dump_file, "%s called for ", __func__);
1376 print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1379 strinfo *si;
1380 bool nonempty = false;
1382 for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1384 if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1385 continue;
1387 nonempty = true;
1389 /* Unconditionally reset DONT_INVALIDATE. */
1390 bool dont_invalidate = si->dont_invalidate;
1391 si->dont_invalidate = false;
1393 if (dont_invalidate)
1394 continue;
1396 ao_ref r;
1397 tree size = si->nonzero_chars;
1398 ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1399 /* Include the terminating nul in the size of the string
1400 to consider when determining possible clobber. But do not
1401 add it to 'size' since we don't know whether it would
1402 actually fit the allocated area. */
1403 if (known_size_p (r.size))
1405 if (known_le (r.size, HOST_WIDE_INT_MAX - BITS_PER_UNIT))
1406 r.max_size += BITS_PER_UNIT;
1407 else
1408 r.max_size = -1;
1410 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1412 if (dump_file && (dump_flags & TDF_DETAILS))
1414 fputs (" statement may clobber object ", dump_file);
1415 print_generic_expr (dump_file, si->ptr);
1416 if (size && tree_fits_uhwi_p (size))
1417 fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1418 " bytes in size", tree_to_uhwi (size));
1419 fputc ('\n', dump_file);
1422 set_strinfo (i, NULL);
1423 free_strinfo (si);
1424 continue;
1427 if (size
1428 && !zero_write
1429 && si->stmt
1430 && is_gimple_call (si->stmt)
1431 && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1432 == BUILT_IN_CALLOC))
1434 /* If the clobber test above considered the length of
1435 the string (including the nul), then for (potentially)
1436 non-zero writes that might modify storage allocated by
1437 calloc consider the whole object and if it might be
1438 clobbered by the statement reset the statement. */
1439 ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1440 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1441 si->stmt = NULL;
1445 if (dump_file && (dump_flags & TDF_DETAILS))
1446 fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1448 return nonempty;
1451 /* Unshare strinfo record SI, if it has refcount > 1 or
1452 if stridx_to_strinfo vector is shared with some other
1453 bbs. */
1455 static strinfo *
1456 unshare_strinfo (strinfo *si)
1458 strinfo *nsi;
1460 if (si->refcount == 1 && !strinfo_shared ())
1461 return si;
1463 nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1464 nsi->stmt = si->stmt;
1465 nsi->alloc = si->alloc;
1466 nsi->endptr = si->endptr;
1467 nsi->first = si->first;
1468 nsi->prev = si->prev;
1469 nsi->next = si->next;
1470 nsi->writable = si->writable;
1471 set_strinfo (si->idx, nsi);
1472 free_strinfo (si);
1473 return nsi;
1476 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1477 strinfo if there is any. Return it's idx, or 0 if no strinfo has
1478 been created. */
1480 static int
1481 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1482 tree ptr)
1484 if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1485 return 0;
1487 if (compare_nonzero_chars (basesi, off) < 0
1488 || !tree_fits_uhwi_p (basesi->nonzero_chars))
1489 return 0;
1491 unsigned HOST_WIDE_INT nonzero_chars
1492 = tree_to_uhwi (basesi->nonzero_chars) - off;
1493 strinfo *si = basesi, *chainsi;
1494 if (si->first || si->prev || si->next)
1495 si = verify_related_strinfos (basesi);
1496 if (si == NULL
1497 || si->nonzero_chars == NULL_TREE
1498 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1499 return 0;
1501 if (TREE_CODE (ptr) == SSA_NAME
1502 && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1503 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1505 gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1506 for (chainsi = si; chainsi->next; chainsi = si)
1508 si = get_next_strinfo (chainsi);
1509 if (si == NULL
1510 || si->nonzero_chars == NULL_TREE
1511 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1512 break;
1513 int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1514 if (r != 1)
1516 if (r == 0)
1518 if (TREE_CODE (ptr) == SSA_NAME)
1519 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1520 else
1522 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1523 if (pidx != NULL && *pidx == 0)
1524 *pidx = si->idx;
1526 return si->idx;
1528 break;
1532 int idx = new_stridx (ptr);
1533 if (idx == 0)
1534 return 0;
1535 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1536 basesi->full_string_p);
1537 set_strinfo (idx, si);
1538 if (strinfo *nextsi = get_strinfo (chainsi->next))
1540 nextsi = unshare_strinfo (nextsi);
1541 si->next = nextsi->idx;
1542 nextsi->prev = idx;
1544 chainsi = unshare_strinfo (chainsi);
1545 if (chainsi->first == 0)
1546 chainsi->first = chainsi->idx;
1547 chainsi->next = idx;
1548 if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1549 chainsi->endptr = ptr;
1550 si->endptr = chainsi->endptr;
1551 si->prev = chainsi->idx;
1552 si->first = chainsi->first;
1553 si->writable = chainsi->writable;
1554 return si->idx;
1557 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1558 to a zero-length string and if possible chain it to a related strinfo
1559 chain whose part is or might be CHAINSI. */
1561 static strinfo *
1562 zero_length_string (tree ptr, strinfo *chainsi)
1564 strinfo *si;
1565 int idx;
1566 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1567 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1568 gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1569 && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1571 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1572 return NULL;
1573 if (chainsi != NULL)
1575 si = verify_related_strinfos (chainsi);
1576 if (si)
1580 /* We shouldn't mix delayed and non-delayed lengths. */
1581 gcc_assert (si->full_string_p);
1582 if (si->endptr == NULL_TREE)
1584 si = unshare_strinfo (si);
1585 si->endptr = ptr;
1587 chainsi = si;
1588 si = get_next_strinfo (si);
1590 while (si != NULL);
1591 if (zero_length_string_p (chainsi))
1593 if (chainsi->next)
1595 chainsi = unshare_strinfo (chainsi);
1596 chainsi->next = 0;
1598 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1599 return chainsi;
1602 else
1604 /* We shouldn't mix delayed and non-delayed lengths. */
1605 gcc_assert (chainsi->full_string_p);
1606 if (chainsi->first || chainsi->prev || chainsi->next)
1608 chainsi = unshare_strinfo (chainsi);
1609 chainsi->first = 0;
1610 chainsi->prev = 0;
1611 chainsi->next = 0;
1615 idx = new_stridx (ptr);
1616 if (idx == 0)
1617 return NULL;
1618 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1619 set_strinfo (idx, si);
1620 si->endptr = ptr;
1621 if (chainsi != NULL)
1623 chainsi = unshare_strinfo (chainsi);
1624 if (chainsi->first == 0)
1625 chainsi->first = chainsi->idx;
1626 chainsi->next = idx;
1627 if (chainsi->endptr == NULL_TREE)
1628 chainsi->endptr = ptr;
1629 si->prev = chainsi->idx;
1630 si->first = chainsi->first;
1631 si->writable = chainsi->writable;
1633 return si;
1636 /* For strinfo ORIGSI whose length has been just updated, adjust other
1637 related strinfos so that they match the new ORIGSI. This involves:
1639 - adding ADJ to the nonzero_chars fields
1640 - copying full_string_p from the new ORIGSI. */
1642 static void
1643 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1645 strinfo *si = verify_related_strinfos (origsi);
1647 if (si == NULL)
1648 return;
1650 while (1)
1652 strinfo *nsi;
1654 if (si != origsi)
1656 tree tem;
1658 si = unshare_strinfo (si);
1659 /* We shouldn't see delayed lengths here; the caller must
1660 have calculated the old length in order to calculate
1661 the adjustment. */
1662 gcc_assert (si->nonzero_chars);
1663 tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1664 si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1665 TREE_TYPE (si->nonzero_chars),
1666 si->nonzero_chars, tem);
1667 si->full_string_p = origsi->full_string_p;
1669 si->endptr = NULL_TREE;
1670 si->dont_invalidate = true;
1672 nsi = get_next_strinfo (si);
1673 if (nsi == NULL)
1674 return;
1675 si = nsi;
1679 /* Find if there are other SSA_NAME pointers equal to PTR
1680 for which we don't track their string lengths yet. If so, use
1681 IDX for them. */
1683 static void
1684 find_equal_ptrs (tree ptr, int idx)
1686 if (TREE_CODE (ptr) != SSA_NAME)
1687 return;
1688 while (1)
1690 gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1691 if (!is_gimple_assign (stmt))
1692 return;
1693 ptr = gimple_assign_rhs1 (stmt);
1694 switch (gimple_assign_rhs_code (stmt))
1696 case SSA_NAME:
1697 break;
1698 CASE_CONVERT:
1699 if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1700 return;
1701 if (TREE_CODE (ptr) == SSA_NAME)
1702 break;
1703 if (TREE_CODE (ptr) != ADDR_EXPR)
1704 return;
1705 /* FALLTHRU */
1706 case ADDR_EXPR:
1708 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1709 if (pidx != NULL && *pidx == 0)
1710 *pidx = idx;
1711 return;
1713 default:
1714 return;
1717 /* We might find an endptr created in this pass. Grow the
1718 vector in that case. */
1719 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1720 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1722 if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1723 return;
1724 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1728 /* Return true if STMT is a call to a builtin function with the right
1729 arguments and attributes that should be considered for optimization
1730 by this pass. */
1732 static bool
1733 valid_builtin_call (gimple *stmt)
1735 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1736 return false;
1738 tree callee = gimple_call_fndecl (stmt);
1739 switch (DECL_FUNCTION_CODE (callee))
1741 case BUILT_IN_MEMCMP:
1742 case BUILT_IN_MEMCMP_EQ:
1743 case BUILT_IN_STRCMP:
1744 case BUILT_IN_STRNCMP:
1745 case BUILT_IN_STRCHR:
1746 case BUILT_IN_STRLEN:
1747 case BUILT_IN_STRNLEN:
1748 /* The above functions should be pure. Punt if they aren't. */
1749 if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1750 return false;
1751 break;
1753 case BUILT_IN_ALLOCA:
1754 case BUILT_IN_ALLOCA_WITH_ALIGN:
1755 case BUILT_IN_CALLOC:
1756 case BUILT_IN_MALLOC:
1757 case BUILT_IN_MEMCPY:
1758 case BUILT_IN_MEMCPY_CHK:
1759 case BUILT_IN_MEMPCPY:
1760 case BUILT_IN_MEMPCPY_CHK:
1761 case BUILT_IN_MEMSET:
1762 case BUILT_IN_STPCPY:
1763 case BUILT_IN_STPCPY_CHK:
1764 case BUILT_IN_STPNCPY:
1765 case BUILT_IN_STPNCPY_CHK:
1766 case BUILT_IN_STRCAT:
1767 case BUILT_IN_STRCAT_CHK:
1768 case BUILT_IN_STRCPY:
1769 case BUILT_IN_STRCPY_CHK:
1770 case BUILT_IN_STRNCAT:
1771 case BUILT_IN_STRNCAT_CHK:
1772 case BUILT_IN_STRNCPY:
1773 case BUILT_IN_STRNCPY_CHK:
1774 /* The above functions should be neither const nor pure. Punt if they
1775 aren't. */
1776 if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1777 return false;
1778 break;
1780 default:
1781 break;
1784 return true;
1787 /* If the last .MEM setter statement before STMT is
1788 memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1789 and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1790 just memcpy (x, y, strlen (y)). SI must be the zero length
1791 strinfo. */
1793 void
1794 strlen_pass::adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1796 tree vuse, callee, len;
1797 struct laststmt_struct last = laststmt;
1798 strinfo *lastsi, *firstsi;
1799 unsigned len_arg_no = 2;
1801 laststmt.stmt = NULL;
1802 laststmt.len = NULL_TREE;
1803 laststmt.stridx = 0;
1805 if (last.stmt == NULL)
1806 return;
1808 vuse = gimple_vuse (stmt);
1809 if (vuse == NULL_TREE
1810 || SSA_NAME_DEF_STMT (vuse) != last.stmt
1811 || !has_single_use (vuse))
1812 return;
1814 gcc_assert (last.stridx > 0);
1815 lastsi = get_strinfo (last.stridx);
1816 if (lastsi == NULL)
1817 return;
1819 if (lastsi != si)
1821 if (lastsi->first == 0 || lastsi->first != si->first)
1822 return;
1824 firstsi = verify_related_strinfos (si);
1825 if (firstsi == NULL)
1826 return;
1827 while (firstsi != lastsi)
1829 firstsi = get_next_strinfo (firstsi);
1830 if (firstsi == NULL)
1831 return;
1835 if (!is_strcat && !zero_length_string_p (si))
1836 return;
1838 if (is_gimple_assign (last.stmt))
1840 gimple_stmt_iterator gsi;
1842 if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1843 return;
1844 if (stmt_could_throw_p (cfun, last.stmt))
1845 return;
1846 gsi = gsi_for_stmt (last.stmt);
1847 unlink_stmt_vdef (last.stmt);
1848 release_defs (last.stmt);
1849 gsi_remove (&gsi, true);
1850 return;
1853 if (!valid_builtin_call (last.stmt))
1854 return;
1856 callee = gimple_call_fndecl (last.stmt);
1857 switch (DECL_FUNCTION_CODE (callee))
1859 case BUILT_IN_MEMCPY:
1860 case BUILT_IN_MEMCPY_CHK:
1861 break;
1862 default:
1863 return;
1866 len = gimple_call_arg (last.stmt, len_arg_no);
1867 if (tree_fits_uhwi_p (len))
1869 if (!tree_fits_uhwi_p (last.len)
1870 || integer_zerop (len)
1871 || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1872 return;
1873 /* Don't adjust the length if it is divisible by 4, it is more efficient
1874 to store the extra '\0' in that case. */
1875 if ((tree_to_uhwi (len) & 3) == 0)
1876 return;
1878 /* Don't fold away an out of bounds access, as this defeats proper
1879 warnings. */
1880 tree dst = gimple_call_arg (last.stmt, 0);
1882 access_ref aref;
1883 tree size = compute_objsize (dst, stmt, 1, &aref, &ptr_qry);
1884 if (size && tree_int_cst_lt (size, len))
1885 return;
1887 else if (TREE_CODE (len) == SSA_NAME)
1889 gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1890 if (!is_gimple_assign (def_stmt)
1891 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1892 || gimple_assign_rhs1 (def_stmt) != last.len
1893 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1894 return;
1896 else
1897 return;
1899 gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1900 update_stmt (last.stmt);
1903 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1904 call, or when BOUND is non-null, of a strnlen() call, set LHS
1905 range info to [0, min (MAX, BOUND)] when the range includes more
1906 than one value and return LHS. Otherwise, when the range
1907 [MIN, MAX] is such that MIN == MAX, return the tree representation
1908 of (MIN). The latter allows callers to fold suitable strnlen() calls
1909 to constants. */
1911 tree
1912 set_strlen_range (tree lhs, wide_int min, wide_int max,
1913 tree bound /* = NULL_TREE */)
1915 if (TREE_CODE (lhs) != SSA_NAME
1916 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1917 return NULL_TREE;
1919 if (bound)
1921 /* For strnlen, adjust MIN and MAX as necessary. If the bound
1922 is less than the size of the array set MAX to it. It it's
1923 greater than MAX and MAX is non-zero bump MAX down to account
1924 for the necessary terminating nul. Otherwise leave it alone. */
1925 if (TREE_CODE (bound) == INTEGER_CST)
1927 wide_int wibnd = wi::to_wide (bound);
1928 int cmp = wi::cmpu (wibnd, max);
1929 if (cmp < 0)
1930 max = wibnd;
1931 else if (cmp && wi::ne_p (max, min))
1932 --max;
1934 else if (TREE_CODE (bound) == SSA_NAME)
1936 value_range r;
1937 get_range_query (cfun)->range_of_expr (r, bound);
1938 if (!r.undefined_p ())
1940 /* For a bound in a known range, adjust the range determined
1941 above as necessary. For a bound in some anti-range or
1942 in an unknown range, use the range determined by callers. */
1943 if (wi::ltu_p (r.lower_bound (), min))
1944 min = r.lower_bound ();
1945 if (wi::ltu_p (r.upper_bound (), max))
1946 max = r.upper_bound ();
1951 if (min == max)
1952 return wide_int_to_tree (size_type_node, min);
1954 value_range vr (TREE_TYPE (lhs), min, max);
1955 set_range_info (lhs, vr);
1956 return lhs;
1959 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1960 SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1961 a character array A[N] with unknown length bounded by N, and for
1962 strnlen(), by min (N, BOUND). */
1964 static tree
1965 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1967 if (TREE_CODE (lhs) != SSA_NAME
1968 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1969 return NULL_TREE;
1971 if (TREE_CODE (src) == SSA_NAME)
1973 gimple *def = SSA_NAME_DEF_STMT (src);
1974 if (is_gimple_assign (def)
1975 && gimple_assign_rhs_code (def) == ADDR_EXPR)
1976 src = gimple_assign_rhs1 (def);
1979 /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1980 NUL so that the difference between a pointer to just past it and
1981 one to its beginning is positive. */
1982 wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1984 if (TREE_CODE (src) == ADDR_EXPR)
1986 /* The last array member of a struct can be bigger than its size
1987 suggests if it's treated as a poor-man's flexible array member. */
1988 src = TREE_OPERAND (src, 0);
1989 if (TREE_CODE (src) != MEM_REF
1990 && !array_ref_flexible_size_p (src))
1992 tree type = TREE_TYPE (src);
1993 tree size = TYPE_SIZE_UNIT (type);
1994 if (size
1995 && TREE_CODE (size) == INTEGER_CST
1996 && !integer_zerop (size))
1998 /* Even though such uses of strlen would be undefined,
1999 avoid relying on arrays of arrays in case some genius
2000 decides to call strlen on an unterminated array element
2001 that's followed by a terminated one. Likewise, avoid
2002 assuming that a struct array member is necessarily
2003 nul-terminated (the nul may be in the member that
2004 follows). In those cases, assume that the length
2005 of the string stored in such an array is bounded
2006 by the size of the enclosing object if one can be
2007 determined. */
2008 tree base = get_base_address (src);
2009 if (VAR_P (base))
2011 if (tree size = DECL_SIZE_UNIT (base))
2012 if (size
2013 && TREE_CODE (size) == INTEGER_CST
2014 && TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
2015 max = wi::to_wide (size);
2019 /* For strlen() the upper bound above is equal to
2020 the longest string that can be stored in the array
2021 (i.e., it accounts for the terminating nul. For
2022 strnlen() bump up the maximum by one since the array
2023 need not be nul-terminated. */
2024 if (!bound && max != 0)
2025 --max;
2029 wide_int min = wi::zero (max.get_precision ());
2030 return set_strlen_range (lhs, min, max, bound);
2033 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
2034 either into a region allocated for the object SI when non-null,
2035 or into an object designated by the LHS of STMT otherwise.
2036 For a call STMT, when CALL_LHS is set use its left hand side
2037 as the destination, otherwise use argument zero.
2038 When nonnull uses RVALS to determine range information.
2039 RAWMEM may be set by memcpy and other raw memory functions
2040 to allow accesses across subobject boundaries. */
2042 void
2043 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
2044 strinfo *si, bool plus_one, bool rawmem)
2046 if (!len || warning_suppressed_p (stmt, OPT_Wstringop_overflow_))
2047 return;
2049 /* The DECL of the function performing the write if it is done
2050 by one. */
2051 tree writefn = NULL_TREE;
2052 /* The destination expression involved in the store or call STMT. */
2053 tree dest = NULL_TREE;
2055 if (is_gimple_assign (stmt))
2056 dest = gimple_assign_lhs (stmt);
2057 else if (is_gimple_call (stmt))
2059 if (call_lhs)
2060 dest = gimple_call_lhs (stmt);
2061 else
2063 gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL));
2064 dest = gimple_call_arg (stmt, 0);
2067 if (!dest)
2068 return;
2069 writefn = gimple_call_fndecl (stmt);
2071 else
2072 return;
2074 if (warning_suppressed_p (dest, OPT_Wstringop_overflow_))
2075 return;
2077 const int ostype = rawmem ? 0 : 1;
2079 /* Use maximum precision to avoid overflow in the addition below.
2080 Make sure all operands have the same precision to keep wide_int
2081 from ICE'ing. */
2083 access_ref aref;
2084 /* The size of the destination region (which is smaller than
2085 the destination object for stores at a non-zero offset). */
2086 tree destsize = compute_objsize (dest, stmt, ostype, &aref, &ptr_qry);
2088 if (!destsize)
2090 aref.sizrng[0] = 0;
2091 aref.sizrng[1] = wi::to_offset (max_object_size ());
2094 /* Return early if the DESTSIZE size expression is the same as LEN
2095 and the offset into the destination is zero. This might happen
2096 in the case of a pair of malloc and memset calls to allocate
2097 an object and clear it as if by calloc. */
2098 if (destsize == len && !plus_one
2099 && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
2100 return;
2102 wide_int rng[2];
2103 if (!get_range (len, stmt, rng, ptr_qry.rvals))
2104 return;
2106 widest_int lenrng[2] =
2107 { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2109 if (plus_one)
2111 lenrng[0] += 1;
2112 lenrng[1] += 1;
2115 /* The size of the remaining space in the destination computed
2116 as the size of the latter minus the offset into it. */
2117 widest_int spcrng[2];
2119 offset_int remrng[2];
2120 remrng[1] = aref.size_remaining (remrng);
2121 spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (remrng[0], UNSIGNED);
2122 spcrng[1] = widest_int::from (remrng[1], UNSIGNED);
2125 if (wi::leu_p (lenrng[0], spcrng[0])
2126 && wi::leu_p (lenrng[1], spcrng[1]))
2127 return;
2129 location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2130 bool warned = false;
2131 if (wi::leu_p (lenrng[0], spcrng[1]))
2133 if (len != destsize
2134 && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2135 return;
2137 warned = (writefn
2138 ? warning_at (loc, OPT_Wstringop_overflow_,
2139 "%qD writing one too many bytes into a region "
2140 "of a size that depends on %<strlen%>",
2141 writefn)
2142 : warning_at (loc, OPT_Wstringop_overflow_,
2143 "writing one too many bytes into a region "
2144 "of a size that depends on %<strlen%>"));
2146 else if (lenrng[0] == lenrng[1])
2148 if (spcrng[0] == spcrng[1])
2149 warned = (writefn
2150 ? warning_n (loc, OPT_Wstringop_overflow_,
2151 lenrng[0].to_uhwi (),
2152 "%qD writing %wu byte into a region "
2153 "of size %wu",
2154 "%qD writing %wu bytes into a region "
2155 "of size %wu",
2156 writefn, lenrng[0].to_uhwi (),
2157 spcrng[0].to_uhwi ())
2158 : warning_n (loc, OPT_Wstringop_overflow_,
2159 lenrng[0].to_uhwi (),
2160 "writing %wu byte into a region "
2161 "of size %wu",
2162 "writing %wu bytes into a region "
2163 "of size %wu",
2164 lenrng[0].to_uhwi (),
2165 spcrng[0].to_uhwi ()));
2166 else
2167 warned = (writefn
2168 ? warning_n (loc, OPT_Wstringop_overflow_,
2169 lenrng[0].to_uhwi (),
2170 "%qD writing %wu byte into a region "
2171 "of size between %wu and %wu",
2172 "%qD writing %wu bytes into a region "
2173 "of size between %wu and %wu",
2174 writefn, lenrng[0].to_uhwi (),
2175 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2176 : warning_n (loc, OPT_Wstringop_overflow_,
2177 lenrng[0].to_uhwi (),
2178 "writing %wu byte into a region "
2179 "of size between %wu and %wu",
2180 "writing %wu bytes into a region "
2181 "of size between %wu and %wu",
2182 lenrng[0].to_uhwi (),
2183 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2185 else if (spcrng[0] == spcrng[1])
2186 warned = (writefn
2187 ? warning_at (loc, OPT_Wstringop_overflow_,
2188 "%qD writing between %wu and %wu bytes "
2189 "into a region of size %wu",
2190 writefn, lenrng[0].to_uhwi (),
2191 lenrng[1].to_uhwi (),
2192 spcrng[0].to_uhwi ())
2193 : warning_at (loc, OPT_Wstringop_overflow_,
2194 "writing between %wu and %wu bytes "
2195 "into a region of size %wu",
2196 lenrng[0].to_uhwi (),
2197 lenrng[1].to_uhwi (),
2198 spcrng[0].to_uhwi ()));
2199 else
2200 warned = (writefn
2201 ? warning_at (loc, OPT_Wstringop_overflow_,
2202 "%qD writing between %wu and %wu bytes "
2203 "into a region of size between %wu and %wu",
2204 writefn, lenrng[0].to_uhwi (),
2205 lenrng[1].to_uhwi (),
2206 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2207 : warning_at (loc, OPT_Wstringop_overflow_,
2208 "writing between %wu and %wu bytes "
2209 "into a region of size between %wu and %wu",
2210 lenrng[0].to_uhwi (),
2211 lenrng[1].to_uhwi (),
2212 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2214 if (!warned)
2215 return;
2217 suppress_warning (stmt, OPT_Wstringop_overflow_);
2219 aref.inform_access (access_write_only);
2222 /* Convenience wrapper for the above. */
2224 void
2225 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs,
2226 unsigned HOST_WIDE_INT len,
2227 strinfo *si, bool plus_one, bool rawmem)
2229 tree tlen = build_int_cst (size_type_node, len);
2230 maybe_warn_overflow (stmt, call_lhs, tlen, si, plus_one, rawmem);
2233 /* Handle a strlen call. If strlen of the argument is known, replace
2234 the strlen call with the known value, otherwise remember that strlen
2235 of the argument is stored in the lhs SSA_NAME. */
2237 void
2238 strlen_pass::handle_builtin_strlen ()
2240 gimple *stmt = gsi_stmt (m_gsi);
2241 tree lhs = gimple_call_lhs (stmt);
2243 if (lhs == NULL_TREE)
2244 return;
2246 location_t loc = gimple_location (stmt);
2247 tree callee = gimple_call_fndecl (stmt);
2248 tree src = gimple_call_arg (stmt, 0);
2249 tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2250 ? gimple_call_arg (stmt, 1) : NULL_TREE);
2251 int idx = get_stridx (src, stmt);
2252 if (idx || (bound && integer_zerop (bound)))
2254 strinfo *si = NULL;
2255 tree rhs;
2257 if (idx < 0)
2258 rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2259 else if (idx == 0)
2260 rhs = bound;
2261 else
2263 rhs = NULL_TREE;
2264 si = get_strinfo (idx);
2265 if (si != NULL)
2267 rhs = get_string_length (si);
2268 /* For strnlen, if bound is constant, even if si is not known
2269 to be zero terminated, if we know at least bound bytes are
2270 not zero, the return value will be bound. */
2271 if (rhs == NULL_TREE
2272 && bound != NULL_TREE
2273 && TREE_CODE (bound) == INTEGER_CST
2274 && si->nonzero_chars != NULL_TREE
2275 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2276 && tree_int_cst_le (bound, si->nonzero_chars))
2277 rhs = bound;
2280 if (rhs != NULL_TREE)
2282 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2284 fprintf (dump_file, "Optimizing: ");
2285 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2287 rhs = unshare_expr (rhs);
2288 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2289 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2291 if (bound)
2292 rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2294 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2295 stmt = gsi_stmt (m_gsi);
2296 update_stmt (stmt);
2297 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2299 fprintf (dump_file, "into: ");
2300 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2303 if (si != NULL
2304 /* Don't update anything for strnlen. */
2305 && bound == NULL_TREE
2306 && TREE_CODE (si->nonzero_chars) != SSA_NAME
2307 && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2308 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2310 si = unshare_strinfo (si);
2311 si->nonzero_chars = lhs;
2312 gcc_assert (si->full_string_p);
2315 if (strlen_to_stridx
2316 && (bound == NULL_TREE
2317 /* For strnlen record this only if the call is proven
2318 to return the same value as strlen would. */
2319 || (TREE_CODE (bound) == INTEGER_CST
2320 && TREE_CODE (rhs) == INTEGER_CST
2321 && tree_int_cst_lt (rhs, bound))))
2322 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2324 return;
2327 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2328 return;
2330 if (idx == 0)
2331 idx = new_stridx (src);
2332 else
2334 strinfo *si = get_strinfo (idx);
2335 if (si != NULL)
2337 if (!si->full_string_p && !si->stmt)
2339 /* Until now we only had a lower bound on the string length.
2340 Install LHS as the actual length. */
2341 si = unshare_strinfo (si);
2342 tree old = si->nonzero_chars;
2343 si->nonzero_chars = lhs;
2344 si->full_string_p = true;
2345 if (old && TREE_CODE (old) == INTEGER_CST)
2347 old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2348 tree adj = fold_build2_loc (loc, MINUS_EXPR,
2349 TREE_TYPE (lhs), lhs, old);
2350 adjust_related_strinfos (loc, si, adj);
2351 /* Use the constant minimum length as the lower bound
2352 of the non-constant length. */
2353 wide_int min = wi::to_wide (old);
2354 wide_int max
2355 = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2356 set_strlen_range (lhs, min, max);
2358 else
2360 si->first = 0;
2361 si->prev = 0;
2362 si->next = 0;
2365 return;
2368 if (idx)
2370 if (!bound)
2372 /* Only store the new length information for calls to strlen(),
2373 not for those to strnlen(). */
2374 strinfo *si = new_strinfo (src, idx, lhs, true);
2375 set_strinfo (idx, si);
2376 find_equal_ptrs (src, idx);
2379 /* For SRC that is an array of N elements, set LHS's range
2380 to [0, min (N, BOUND)]. A constant return value means
2381 the range would have consisted of a single value. In
2382 that case, fold the result into the returned constant. */
2383 if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2384 if (TREE_CODE (ret) == INTEGER_CST)
2386 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2388 fprintf (dump_file, "Optimizing: ");
2389 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2391 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2392 ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2393 gimplify_and_update_call_from_tree (&m_gsi, ret);
2394 stmt = gsi_stmt (m_gsi);
2395 update_stmt (stmt);
2396 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2398 fprintf (dump_file, "into: ");
2399 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2403 if (strlen_to_stridx && !bound)
2404 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2408 /* Handle a strchr call. If strlen of the first argument is known, replace
2409 the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2410 that lhs of the call is endptr and strlen of the argument is endptr - x. */
2412 void
2413 strlen_pass::handle_builtin_strchr ()
2415 gimple *stmt = gsi_stmt (m_gsi);
2416 tree lhs = gimple_call_lhs (stmt);
2418 if (lhs == NULL_TREE)
2419 return;
2421 if (!integer_zerop (gimple_call_arg (stmt, 1)))
2422 return;
2424 tree src = gimple_call_arg (stmt, 0);
2426 /* Avoid folding if the first argument is not a nul-terminated array.
2427 Defer warning until later. */
2428 if (!check_nul_terminated_array (NULL_TREE, src))
2429 return;
2431 int idx = get_stridx (src, stmt);
2432 if (idx)
2434 strinfo *si = NULL;
2435 tree rhs;
2437 if (idx < 0)
2438 rhs = build_int_cst (size_type_node, ~idx);
2439 else
2441 rhs = NULL_TREE;
2442 si = get_strinfo (idx);
2443 if (si != NULL)
2444 rhs = get_string_length (si);
2446 if (rhs != NULL_TREE)
2448 location_t loc = gimple_location (stmt);
2450 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2452 fprintf (dump_file, "Optimizing: ");
2453 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2455 if (si != NULL && si->endptr != NULL_TREE)
2457 rhs = unshare_expr (si->endptr);
2458 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2459 TREE_TYPE (rhs)))
2460 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2462 else
2464 rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2465 rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2466 TREE_TYPE (src), src, rhs);
2467 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2468 TREE_TYPE (rhs)))
2469 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2471 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2472 stmt = gsi_stmt (m_gsi);
2473 update_stmt (stmt);
2474 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2476 fprintf (dump_file, "into: ");
2477 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2479 if (si != NULL
2480 && si->endptr == NULL_TREE
2481 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2483 si = unshare_strinfo (si);
2484 si->endptr = lhs;
2486 zero_length_string (lhs, si);
2487 return;
2490 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2491 return;
2492 if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2494 if (idx == 0)
2495 idx = new_stridx (src);
2496 else if (get_strinfo (idx) != NULL)
2498 zero_length_string (lhs, NULL);
2499 return;
2501 if (idx)
2503 location_t loc = gimple_location (stmt);
2504 tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2505 tree srcu = fold_convert_loc (loc, size_type_node, src);
2506 tree length = fold_build2_loc (loc, MINUS_EXPR,
2507 size_type_node, lhsu, srcu);
2508 strinfo *si = new_strinfo (src, idx, length, true);
2509 si->endptr = lhs;
2510 set_strinfo (idx, si);
2511 find_equal_ptrs (src, idx);
2512 zero_length_string (lhs, si);
2515 else
2516 zero_length_string (lhs, NULL);
2519 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2520 If strlen of the second argument is known, strlen of the first argument
2521 is the same after this call. Furthermore, attempt to convert it to
2522 memcpy. Uses RVALS to determine range information. */
2524 void
2525 strlen_pass::handle_builtin_strcpy (built_in_function bcode)
2527 int idx, didx;
2528 tree src, dst, srclen, len, lhs, type, fn, oldlen;
2529 bool success;
2530 gimple *stmt = gsi_stmt (m_gsi);
2531 strinfo *si, *dsi, *olddsi, *zsi;
2532 location_t loc;
2534 src = gimple_call_arg (stmt, 1);
2535 dst = gimple_call_arg (stmt, 0);
2536 lhs = gimple_call_lhs (stmt);
2537 idx = get_stridx (src, stmt);
2538 si = NULL;
2539 if (idx > 0)
2540 si = get_strinfo (idx);
2542 didx = get_stridx (dst, stmt);
2543 olddsi = NULL;
2544 oldlen = NULL_TREE;
2545 if (didx > 0)
2546 olddsi = get_strinfo (didx);
2547 else if (didx < 0)
2548 return;
2550 if (olddsi != NULL)
2551 adjust_last_stmt (olddsi, stmt, false);
2553 srclen = NULL_TREE;
2554 if (si != NULL)
2555 srclen = get_string_length (si);
2556 else if (idx < 0)
2557 srclen = build_int_cst (size_type_node, ~idx);
2559 maybe_warn_overflow (stmt, false, srclen, olddsi, true);
2561 if (olddsi != NULL)
2562 adjust_last_stmt (olddsi, stmt, false);
2564 loc = gimple_location (stmt);
2565 if (srclen == NULL_TREE)
2566 switch (bcode)
2568 case BUILT_IN_STRCPY:
2569 case BUILT_IN_STRCPY_CHK:
2570 if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2571 return;
2572 break;
2573 case BUILT_IN_STPCPY:
2574 case BUILT_IN_STPCPY_CHK:
2575 if (lhs == NULL_TREE)
2576 return;
2577 else
2579 tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2580 srclen = fold_convert_loc (loc, size_type_node, dst);
2581 srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2582 lhsuint, srclen);
2584 break;
2585 default:
2586 gcc_unreachable ();
2589 if (didx == 0)
2591 didx = new_stridx (dst);
2592 if (didx == 0)
2593 return;
2595 if (olddsi != NULL)
2597 oldlen = olddsi->nonzero_chars;
2598 dsi = unshare_strinfo (olddsi);
2599 dsi->nonzero_chars = srclen;
2600 dsi->full_string_p = (srclen != NULL_TREE);
2601 /* Break the chain, so adjust_related_strinfo on later pointers in
2602 the chain won't adjust this one anymore. */
2603 dsi->next = 0;
2604 dsi->stmt = NULL;
2605 dsi->endptr = NULL_TREE;
2607 else
2609 dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2610 set_strinfo (didx, dsi);
2611 find_equal_ptrs (dst, didx);
2613 dsi->writable = true;
2614 dsi->dont_invalidate = true;
2616 if (dsi->nonzero_chars == NULL_TREE)
2618 strinfo *chainsi;
2620 /* If string length of src is unknown, use delayed length
2621 computation. If string length of dst will be needed, it
2622 can be computed by transforming this strcpy call into
2623 stpcpy and subtracting dst from the return value. */
2625 /* Look for earlier strings whose length could be determined if
2626 this strcpy is turned into an stpcpy. */
2628 if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2630 for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2632 /* When setting a stmt for delayed length computation
2633 prevent all strinfos through dsi from being
2634 invalidated. */
2635 chainsi = unshare_strinfo (chainsi);
2636 chainsi->stmt = stmt;
2637 chainsi->nonzero_chars = NULL_TREE;
2638 chainsi->full_string_p = false;
2639 chainsi->endptr = NULL_TREE;
2640 chainsi->dont_invalidate = true;
2643 dsi->stmt = stmt;
2645 /* Try to detect overlap before returning. This catches cases
2646 like strcpy (d, d + n) where n is non-constant whose range
2647 is such that (n <= strlen (d) holds).
2649 OLDDSI->NONZERO_chars may have been reset by this point with
2650 oldlen holding it original value. */
2651 if (olddsi && oldlen)
2653 /* Add 1 for the terminating NUL. */
2654 tree type = TREE_TYPE (oldlen);
2655 oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2656 build_int_cst (type, 1));
2657 check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2660 return;
2663 if (olddsi != NULL)
2665 tree adj = NULL_TREE;
2666 if (oldlen == NULL_TREE)
2668 else if (integer_zerop (oldlen))
2669 adj = srclen;
2670 else if (TREE_CODE (oldlen) == INTEGER_CST
2671 || TREE_CODE (srclen) == INTEGER_CST)
2672 adj = fold_build2_loc (loc, MINUS_EXPR,
2673 TREE_TYPE (srclen), srclen,
2674 fold_convert_loc (loc, TREE_TYPE (srclen),
2675 oldlen));
2676 if (adj != NULL_TREE)
2677 adjust_related_strinfos (loc, dsi, adj);
2678 else
2679 dsi->prev = 0;
2681 /* strcpy src may not overlap dst, so src doesn't need to be
2682 invalidated either. */
2683 if (si != NULL)
2684 si->dont_invalidate = true;
2686 fn = NULL_TREE;
2687 zsi = NULL;
2688 switch (bcode)
2690 case BUILT_IN_STRCPY:
2691 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2692 if (lhs)
2693 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2694 break;
2695 case BUILT_IN_STRCPY_CHK:
2696 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2697 if (lhs)
2698 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2699 break;
2700 case BUILT_IN_STPCPY:
2701 /* This would need adjustment of the lhs (subtract one),
2702 or detection that the trailing '\0' doesn't need to be
2703 written, if it will be immediately overwritten.
2704 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY); */
2705 if (lhs)
2707 dsi->endptr = lhs;
2708 zsi = zero_length_string (lhs, dsi);
2710 break;
2711 case BUILT_IN_STPCPY_CHK:
2712 /* This would need adjustment of the lhs (subtract one),
2713 or detection that the trailing '\0' doesn't need to be
2714 written, if it will be immediately overwritten.
2715 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK); */
2716 if (lhs)
2718 dsi->endptr = lhs;
2719 zsi = zero_length_string (lhs, dsi);
2721 break;
2722 default:
2723 gcc_unreachable ();
2725 if (zsi != NULL)
2726 zsi->dont_invalidate = true;
2728 if (fn)
2730 tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2731 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2733 else
2734 type = size_type_node;
2736 len = fold_convert_loc (loc, type, unshare_expr (srclen));
2737 len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2739 /* Disable warning for the transformed statement? */
2740 opt_code no_warning_opt = no_warning;
2742 if (const strinfo *chksi = si ? olddsi ? olddsi : dsi : NULL)
2744 no_warning_opt = check_bounds_or_overlap (stmt, chksi->ptr, si->ptr,
2745 NULL_TREE, len);
2746 if (no_warning_opt)
2747 suppress_warning (stmt, no_warning_opt);
2750 if (fn == NULL_TREE)
2751 return;
2753 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
2754 GSI_SAME_STMT);
2755 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2757 fprintf (dump_file, "Optimizing: ");
2758 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2760 if (gimple_call_num_args (stmt) == 2)
2761 success = update_gimple_call (&m_gsi, fn, 3, dst, src, len);
2762 else
2763 success = update_gimple_call (&m_gsi, fn, 4, dst, src, len,
2764 gimple_call_arg (stmt, 2));
2765 if (success)
2767 stmt = gsi_stmt (m_gsi);
2768 update_stmt (stmt);
2769 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2771 fprintf (dump_file, "into: ");
2772 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2774 /* Allow adjust_last_stmt to decrease this memcpy's size. */
2775 laststmt.stmt = stmt;
2776 laststmt.len = srclen;
2777 laststmt.stridx = dsi->idx;
2779 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2780 fprintf (dump_file, "not possible.\n");
2782 if (no_warning_opt)
2783 suppress_warning (stmt, no_warning_opt);
2786 /* Check the size argument to the built-in forms of stpncpy and strncpy
2787 for out-of-bounds offsets or overlapping access, and to see if the
2788 size argument is derived from a call to strlen() on the source argument,
2789 and if so, issue an appropriate warning. */
2791 void
2792 strlen_pass::handle_builtin_strncat (built_in_function)
2794 /* Same as stxncpy(). */
2795 handle_builtin_stxncpy_strncat (true);
2798 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2799 way. LEN can either be an integer expression, or a pointer (to char).
2800 When it is the latter (such as in recursive calls to self) it is
2801 assumed to be the argument in some call to strlen() whose relationship
2802 to SRC is being ascertained. */
2804 bool
2805 is_strlen_related_p (tree src, tree len)
2807 if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2808 && operand_equal_p (src, len, 0))
2809 return true;
2811 if (TREE_CODE (len) != SSA_NAME)
2812 return false;
2814 if (TREE_CODE (src) == SSA_NAME)
2816 gimple *srcdef = SSA_NAME_DEF_STMT (src);
2817 if (is_gimple_assign (srcdef))
2819 /* Handle bitwise AND used in conversions from wider size_t
2820 to narrower unsigned types. */
2821 tree_code code = gimple_assign_rhs_code (srcdef);
2822 if (code == BIT_AND_EXPR
2823 || code == NOP_EXPR)
2824 return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2826 return false;
2829 if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2831 /* If SRC is the result of a call to an allocation function
2832 or strlen, use the function's argument instead. */
2833 tree func = gimple_call_fndecl (srcdef);
2834 built_in_function code = DECL_FUNCTION_CODE (func);
2835 if (code == BUILT_IN_ALLOCA
2836 || code == BUILT_IN_ALLOCA_WITH_ALIGN
2837 || code == BUILT_IN_MALLOC
2838 || code == BUILT_IN_STRLEN)
2839 return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2841 /* FIXME: Handle other functions with attribute alloc_size. */
2842 return false;
2846 gimple *lendef = SSA_NAME_DEF_STMT (len);
2847 if (!lendef)
2848 return false;
2850 if (is_gimple_call (lendef))
2852 tree func = gimple_call_fndecl (lendef);
2853 if (!valid_builtin_call (lendef)
2854 || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2855 return false;
2857 tree arg = gimple_call_arg (lendef, 0);
2858 return is_strlen_related_p (src, arg);
2861 if (!is_gimple_assign (lendef))
2862 return false;
2864 tree_code code = gimple_assign_rhs_code (lendef);
2865 tree rhs1 = gimple_assign_rhs1 (lendef);
2866 tree rhstype = TREE_TYPE (rhs1);
2868 if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2869 || (INTEGRAL_TYPE_P (rhstype)
2870 && (code == BIT_AND_EXPR
2871 || code == NOP_EXPR)))
2873 /* Pointer plus (an integer), and truncation are considered among
2874 the (potentially) related expressions to strlen. */
2875 return is_strlen_related_p (src, rhs1);
2878 if (tree rhs2 = gimple_assign_rhs2 (lendef))
2880 /* Integer subtraction is considered strlen-related when both
2881 arguments are integers and second one is strlen-related. */
2882 rhstype = TREE_TYPE (rhs2);
2883 if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2884 return is_strlen_related_p (src, rhs2);
2887 return false;
2890 /* Called by handle_builtin_stxncpy_strncat and by
2891 gimple_fold_builtin_strncpy in gimple-fold.cc.
2892 Check to see if the specified bound is a) equal to the size of
2893 the destination DST and if so, b) if it's immediately followed by
2894 DST[CNT - 1] = '\0'. If a) holds and b) does not, warn. Otherwise,
2895 do nothing. Return true if diagnostic has been issued.
2897 The purpose is to diagnose calls to strncpy and stpncpy that do
2898 not nul-terminate the copy while allowing for the idiom where
2899 such a call is immediately followed by setting the last element
2900 to nul, as in:
2901 char a[32];
2902 strncpy (a, s, sizeof a);
2903 a[sizeof a - 1] = '\0';
2906 bool
2907 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2908 pointer_query *ptr_qry /* = NULL */)
2910 gimple *stmt = gsi_stmt (gsi);
2911 if (warning_suppressed_p (stmt, OPT_Wstringop_truncation))
2912 return false;
2914 wide_int cntrange[2];
2915 value_range r;
2916 if (!get_range_query (cfun)->range_of_expr (r, cnt)
2917 || r.varying_p ()
2918 || r.undefined_p ())
2919 return false;
2921 cntrange[0] = wi::to_wide (r.min ());
2922 cntrange[1] = wi::to_wide (r.max ());
2923 if (r.kind () == VR_ANTI_RANGE)
2925 wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2927 if (wi::ltu_p (cntrange[1], maxobjsize))
2929 cntrange[0] = cntrange[1] + 1;
2930 cntrange[1] = maxobjsize;
2932 else
2934 cntrange[1] = cntrange[0] - 1;
2935 cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2939 /* Negative value is the constant string length. If it's less than
2940 the lower bound there is no truncation. Avoid calling get_stridx()
2941 when ssa_ver_to_stridx is empty. That implies the caller isn't
2942 running under the control of this pass and ssa_ver_to_stridx hasn't
2943 been created yet. */
2944 int sidx = ssa_ver_to_stridx.length () ? get_stridx (src, stmt) : 0;
2945 if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
2946 return false;
2948 tree dst = gimple_call_arg (stmt, 0);
2949 tree dstdecl = dst;
2950 if (TREE_CODE (dstdecl) == ADDR_EXPR)
2951 dstdecl = TREE_OPERAND (dstdecl, 0);
2953 tree ref = NULL_TREE;
2955 if (!sidx)
2957 /* If the source is a non-string return early to avoid warning
2958 for possible truncation (if the truncation is certain SIDX
2959 is non-zero). */
2960 tree srcdecl = gimple_call_arg (stmt, 1);
2961 if (TREE_CODE (srcdecl) == ADDR_EXPR)
2962 srcdecl = TREE_OPERAND (srcdecl, 0);
2963 if (get_attr_nonstring_decl (srcdecl, &ref))
2964 return false;
2967 /* Likewise, if the destination refers to an array/pointer declared
2968 nonstring return early. */
2969 if (get_attr_nonstring_decl (dstdecl, &ref))
2970 return false;
2972 /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2973 avoid the truncation warning. */
2974 gsi_next_nondebug (&gsi);
2975 gimple *next_stmt = gsi_stmt (gsi);
2976 if (!next_stmt)
2978 /* When there is no statement in the same basic block check
2979 the immediate successor block. */
2980 if (basic_block bb = gimple_bb (stmt))
2982 if (single_succ_p (bb))
2984 /* For simplicity, ignore blocks with multiple outgoing
2985 edges for now and only consider successor blocks along
2986 normal edges. */
2987 edge e = EDGE_SUCC (bb, 0);
2988 if (!(e->flags & EDGE_ABNORMAL))
2990 gsi = gsi_start_bb (e->dest);
2991 next_stmt = gsi_stmt (gsi);
2992 if (next_stmt && is_gimple_debug (next_stmt))
2994 gsi_next_nondebug (&gsi);
2995 next_stmt = gsi_stmt (gsi);
3002 if (next_stmt && is_gimple_assign (next_stmt))
3004 tree lhs = gimple_assign_lhs (next_stmt);
3005 tree_code code = TREE_CODE (lhs);
3006 if (code == ARRAY_REF || code == MEM_REF)
3007 lhs = TREE_OPERAND (lhs, 0);
3009 tree func = gimple_call_fndecl (stmt);
3010 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
3012 tree ret = gimple_call_lhs (stmt);
3013 if (ret && operand_equal_p (ret, lhs, 0))
3014 return false;
3017 /* Determine the base address and offset of the reference,
3018 ignoring the innermost array index. */
3019 if (TREE_CODE (ref) == ARRAY_REF)
3020 ref = TREE_OPERAND (ref, 0);
3022 poly_int64 dstoff;
3023 tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
3025 poly_int64 lhsoff;
3026 tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
3027 if (lhsbase
3028 && dstbase
3029 && known_eq (dstoff, lhsoff)
3030 && operand_equal_p (dstbase, lhsbase, 0))
3031 return false;
3034 int prec = TYPE_PRECISION (TREE_TYPE (cnt));
3035 wide_int lenrange[2];
3036 if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
3038 lenrange[0] = (sisrc->nonzero_chars
3039 && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
3040 ? wi::to_wide (sisrc->nonzero_chars)
3041 : wi::zero (prec));
3042 lenrange[1] = lenrange[0];
3044 else if (sidx < 0)
3045 lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
3046 else
3048 c_strlen_data lendata = { };
3049 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3050 to have it set to the length of the longest string in a PHI. */
3051 lendata.maxbound = src;
3052 get_range_strlen (src, &lendata, /* eltsize = */1);
3053 if (TREE_CODE (lendata.minlen) == INTEGER_CST
3054 && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3056 /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3057 which stores the length of the shortest known string. */
3058 if (integer_all_onesp (lendata.maxlen))
3059 lenrange[0] = wi::shwi (0, prec);
3060 else
3061 lenrange[0] = wi::to_wide (lendata.minlen, prec);
3062 lenrange[1] = wi::to_wide (lendata.maxbound, prec);
3064 else
3066 lenrange[0] = wi::shwi (0, prec);
3067 lenrange[1] = wi::shwi (-1, prec);
3071 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3072 tree func = gimple_call_fndecl (stmt);
3074 if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
3076 /* If the longest source string is shorter than the lower bound
3077 of the specified count the copy is definitely nul-terminated. */
3078 if (wi::ltu_p (lenrange[1], cntrange[0]))
3079 return false;
3081 if (wi::neg_p (lenrange[1]))
3083 /* The length of one of the strings is unknown but at least
3084 one has non-zero length and that length is stored in
3085 LENRANGE[1]. Swap the bounds to force a "may be truncated"
3086 warning below. */
3087 lenrange[1] = lenrange[0];
3088 lenrange[0] = wi::shwi (0, prec);
3091 /* Set to true for strncat whose bound is derived from the length
3092 of the destination (the expected usage pattern). */
3093 bool cat_dstlen_bounded = false;
3094 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
3095 cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3097 if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3098 return warning_n (callloc, OPT_Wstringop_truncation,
3099 cntrange[0].to_uhwi (),
3100 "%qD output truncated before terminating "
3101 "nul copying %E byte from a string of the "
3102 "same length",
3103 "%qD output truncated before terminating nul "
3104 "copying %E bytes from a string of the same "
3105 "length",
3106 func, cnt);
3107 else if (!cat_dstlen_bounded)
3109 if (wi::geu_p (lenrange[0], cntrange[1]))
3111 /* The shortest string is longer than the upper bound of
3112 the count so the truncation is certain. */
3113 if (cntrange[0] == cntrange[1])
3114 return warning_n (callloc, OPT_Wstringop_truncation,
3115 cntrange[0].to_uhwi (),
3116 "%qD output truncated copying %E byte "
3117 "from a string of length %wu",
3118 "%qD output truncated copying %E bytes "
3119 "from a string of length %wu",
3120 func, cnt, lenrange[0].to_uhwi ());
3122 return warning_at (callloc, OPT_Wstringop_truncation,
3123 "%qD output truncated copying between %wu "
3124 "and %wu bytes from a string of length %wu",
3125 func, cntrange[0].to_uhwi (),
3126 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3128 else if (wi::geu_p (lenrange[1], cntrange[1]))
3130 /* The longest string is longer than the upper bound of
3131 the count so the truncation is possible. */
3132 if (cntrange[0] == cntrange[1])
3133 return warning_n (callloc, OPT_Wstringop_truncation,
3134 cntrange[0].to_uhwi (),
3135 "%qD output may be truncated copying %E "
3136 "byte from a string of length %wu",
3137 "%qD output may be truncated copying %E "
3138 "bytes from a string of length %wu",
3139 func, cnt, lenrange[1].to_uhwi ());
3141 return warning_at (callloc, OPT_Wstringop_truncation,
3142 "%qD output may be truncated copying between "
3143 "%wu and %wu bytes from a string of length %wu",
3144 func, cntrange[0].to_uhwi (),
3145 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3149 if (!cat_dstlen_bounded
3150 && cntrange[0] != cntrange[1]
3151 && wi::leu_p (cntrange[0], lenrange[0])
3152 && wi::leu_p (cntrange[1], lenrange[0] + 1))
3154 /* If the source (including the terminating nul) is longer than
3155 the lower bound of the specified count but shorter than the
3156 upper bound the copy may (but need not) be truncated. */
3157 return warning_at (callloc, OPT_Wstringop_truncation,
3158 "%qD output may be truncated copying between "
3159 "%wu and %wu bytes from a string of length %wu",
3160 func, cntrange[0].to_uhwi (),
3161 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3165 access_ref aref;
3166 if (tree dstsize = compute_objsize (dst, stmt, 1, &aref, ptr_qry))
3168 /* The source length is unknown. Try to determine the destination
3169 size and see if it matches the specified bound. If not, bail.
3170 Otherwise go on to see if it should be diagnosed for possible
3171 truncation. */
3172 if (!dstsize)
3173 return false;
3175 if (wi::to_wide (dstsize) != cntrange[1])
3176 return false;
3178 /* Avoid warning for strncpy(a, b, N) calls where the following
3179 equalities hold:
3180 N == sizeof a && N == sizeof b */
3181 if (tree srcsize = compute_objsize (src, stmt, 1, &aref, ptr_qry))
3182 if (wi::to_wide (srcsize) == cntrange[1])
3183 return false;
3185 if (cntrange[0] == cntrange[1])
3186 return warning_at (callloc, OPT_Wstringop_truncation,
3187 "%qD specified bound %E equals destination size",
3188 func, cnt);
3191 return false;
3194 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3195 strncat, for out-of-bounds offsets or overlapping access, and to see
3196 if the size is derived from calling strlen() on the source argument,
3197 and if so, issue the appropriate warning.
3198 APPEND_P is true for strncat. */
3200 void
3201 strlen_pass::handle_builtin_stxncpy_strncat (bool append_p)
3203 if (!strlen_to_stridx)
3204 return;
3206 gimple *stmt = gsi_stmt (m_gsi);
3208 tree dst = gimple_call_arg (stmt, 0);
3209 tree src = gimple_call_arg (stmt, 1);
3210 tree len = gimple_call_arg (stmt, 2);
3211 /* An upper bound of the size of the destination. */
3212 tree dstsize = NULL_TREE;
3213 /* The length of the destination and source strings (plus 1 for those
3214 whose FULL_STRING_P is set, i.e., whose length is exact rather than
3215 a lower bound). */
3216 tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3218 int didx = get_stridx (dst, stmt);
3219 if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3221 /* Compute the size of the destination string including the nul
3222 if it is known to be nul-terminated. */
3223 if (sidst->nonzero_chars)
3225 if (sidst->full_string_p)
3227 /* String is known to be nul-terminated. */
3228 tree type = TREE_TYPE (sidst->nonzero_chars);
3229 dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3230 build_int_cst (type, 1));
3232 else
3233 dstlenp1 = sidst->nonzero_chars;
3235 else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3237 gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3238 dstsize = gimple_call_alloc_size (def_stmt);
3241 dst = sidst->ptr;
3244 int sidx = get_stridx (src, stmt);
3245 strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3246 if (sisrc)
3248 /* strncat() and strncpy() can modify the source string by writing
3249 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3250 clear. */
3252 /* Compute the size of the source string including the terminating
3253 nul if its known to be nul-terminated. */
3254 if (sisrc->nonzero_chars)
3256 if (sisrc->full_string_p)
3258 tree type = TREE_TYPE (sisrc->nonzero_chars);
3259 srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3260 build_int_cst (type, 1));
3262 else
3263 srclenp1 = sisrc->nonzero_chars;
3266 src = sisrc->ptr;
3268 else
3269 srclenp1 = NULL_TREE;
3271 opt_code opt = check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1);
3272 if (opt != no_warning)
3274 suppress_warning (stmt, opt);
3275 return;
3278 /* If the length argument was computed from strlen(S) for some string
3279 S retrieve the strinfo index for the string (PSS->FIRST) along with
3280 the location of the strlen() call (PSS->SECOND). */
3281 stridx_strlenloc *pss = strlen_to_stridx->get (len);
3282 if (!pss || pss->first <= 0)
3284 if (maybe_diag_stxncpy_trunc (m_gsi, src, len))
3285 suppress_warning (stmt, OPT_Wstringop_truncation);
3287 return;
3290 /* Retrieve the strinfo data for the string S that LEN was computed
3291 from as some function F of strlen (S) (i.e., LEN need not be equal
3292 to strlen(S)). */
3293 strinfo *silen = get_strinfo (pss->first);
3295 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3297 tree func = gimple_call_fndecl (stmt);
3299 bool warned = false;
3301 /* When -Wstringop-truncation is set, try to determine truncation
3302 before diagnosing possible overflow. Truncation is implied by
3303 the LEN argument being equal to strlen(SRC), regardless of
3304 whether its value is known. Otherwise, when appending, or
3305 when copying into a destination of known size, issue the more
3306 generic -Wstringop-overflow which triggers for LEN arguments
3307 that in any meaningful way depend on strlen(SRC). */
3308 if (!append_p
3309 && sisrc == silen
3310 && is_strlen_related_p (src, len)
3311 && warning_at (callloc, OPT_Wstringop_truncation,
3312 "%qD output truncated before terminating nul "
3313 "copying as many bytes from a string as its length",
3314 func))
3315 warned = true;
3316 else if ((append_p || !dstsize || len == dstlenp1)
3317 && silen && is_strlen_related_p (src, silen->ptr))
3319 /* Issue -Wstringop-overflow when appending or when writing into
3320 a destination of a known size. Otherwise, when copying into
3321 a destination of an unknown size, it's truncation. */
3322 opt_code opt = (append_p || dstsize
3323 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3324 warned = warning_at (callloc, opt,
3325 "%qD specified bound depends on the length "
3326 "of the source argument",
3327 func);
3329 if (warned)
3331 location_t strlenloc = pss->second;
3332 if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3333 inform (strlenloc, "length computed here");
3337 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3338 If strlen of the second argument is known and length of the third argument
3339 is that plus one, strlen of the first argument is the same after this
3340 call. Uses RVALS to determine range information. */
3342 void
3343 strlen_pass::handle_builtin_memcpy (built_in_function bcode)
3345 tree lhs, oldlen, newlen;
3346 gimple *stmt = gsi_stmt (m_gsi);
3347 strinfo *si, *dsi;
3349 tree len = gimple_call_arg (stmt, 2);
3350 tree src = gimple_call_arg (stmt, 1);
3351 tree dst = gimple_call_arg (stmt, 0);
3353 int didx = get_stridx (dst, stmt);
3354 strinfo *olddsi = NULL;
3355 if (didx > 0)
3356 olddsi = get_strinfo (didx);
3357 else if (didx < 0)
3358 return;
3360 if (olddsi != NULL
3361 && !integer_zerop (len))
3363 maybe_warn_overflow (stmt, false, len, olddsi, false, true);
3364 adjust_last_stmt (olddsi, stmt, false);
3367 int idx = get_stridx (src, stmt);
3368 if (idx == 0)
3369 return;
3371 bool full_string_p;
3372 if (idx > 0)
3374 gimple *def_stmt;
3376 /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3377 is known. */
3378 si = get_strinfo (idx);
3379 if (si == NULL || si->nonzero_chars == NULL_TREE)
3380 return;
3381 if (TREE_CODE (len) == INTEGER_CST
3382 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3384 if (tree_int_cst_le (len, si->nonzero_chars))
3386 /* Copying LEN nonzero characters, where LEN is constant. */
3387 newlen = len;
3388 full_string_p = false;
3390 else
3392 /* Copying the whole of the analyzed part of SI. */
3393 newlen = si->nonzero_chars;
3394 full_string_p = si->full_string_p;
3397 else
3399 if (!si->full_string_p)
3400 return;
3401 if (TREE_CODE (len) != SSA_NAME)
3402 return;
3403 def_stmt = SSA_NAME_DEF_STMT (len);
3404 if (!is_gimple_assign (def_stmt)
3405 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3406 || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3407 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3408 return;
3409 /* Copying variable-length string SI (and no more). */
3410 newlen = si->nonzero_chars;
3411 full_string_p = true;
3414 else
3416 si = NULL;
3417 /* Handle memcpy (x, "abcd", 5) or
3418 memcpy (x, "abc\0uvw", 7). */
3419 if (!tree_fits_uhwi_p (len))
3420 return;
3422 unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3423 unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3424 newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3425 full_string_p = clen > nonzero_chars;
3428 if (!full_string_p
3429 && olddsi
3430 && olddsi->nonzero_chars
3431 && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3432 && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3434 /* The SRC substring being written strictly overlaps
3435 a subsequence of the existing string OLDDSI. */
3436 newlen = olddsi->nonzero_chars;
3437 full_string_p = olddsi->full_string_p;
3440 if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3441 adjust_last_stmt (olddsi, stmt, false);
3443 if (didx == 0)
3445 didx = new_stridx (dst);
3446 if (didx == 0)
3447 return;
3449 oldlen = NULL_TREE;
3450 if (olddsi != NULL)
3452 dsi = unshare_strinfo (olddsi);
3453 oldlen = olddsi->nonzero_chars;
3454 dsi->nonzero_chars = newlen;
3455 dsi->full_string_p = full_string_p;
3456 /* Break the chain, so adjust_related_strinfo on later pointers in
3457 the chain won't adjust this one anymore. */
3458 dsi->next = 0;
3459 dsi->stmt = NULL;
3460 dsi->endptr = NULL_TREE;
3462 else
3464 dsi = new_strinfo (dst, didx, newlen, full_string_p);
3465 set_strinfo (didx, dsi);
3466 find_equal_ptrs (dst, didx);
3468 dsi->writable = true;
3469 dsi->dont_invalidate = true;
3470 if (olddsi != NULL)
3472 tree adj = NULL_TREE;
3473 location_t loc = gimple_location (stmt);
3474 if (oldlen == NULL_TREE)
3476 else if (integer_zerop (oldlen))
3477 adj = newlen;
3478 else if (TREE_CODE (oldlen) == INTEGER_CST
3479 || TREE_CODE (newlen) == INTEGER_CST)
3480 adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3481 fold_convert_loc (loc, TREE_TYPE (newlen),
3482 oldlen));
3483 if (adj != NULL_TREE)
3484 adjust_related_strinfos (loc, dsi, adj);
3485 else
3486 dsi->prev = 0;
3488 /* memcpy src may not overlap dst, so src doesn't need to be
3489 invalidated either. */
3490 if (si != NULL)
3491 si->dont_invalidate = true;
3493 if (full_string_p)
3495 lhs = gimple_call_lhs (stmt);
3496 switch (bcode)
3498 case BUILT_IN_MEMCPY:
3499 case BUILT_IN_MEMCPY_CHK:
3500 /* Allow adjust_last_stmt to decrease this memcpy's size. */
3501 laststmt.stmt = stmt;
3502 laststmt.len = dsi->nonzero_chars;
3503 laststmt.stridx = dsi->idx;
3504 if (lhs)
3505 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3506 break;
3507 case BUILT_IN_MEMPCPY:
3508 case BUILT_IN_MEMPCPY_CHK:
3509 break;
3510 default:
3511 gcc_unreachable ();
3516 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3517 If strlen of the second argument is known, strlen of the first argument
3518 is increased by the length of the second argument. Furthermore, attempt
3519 to convert it to memcpy/strcpy if the length of the first argument
3520 is known. */
3522 void
3523 strlen_pass::handle_builtin_strcat (built_in_function bcode)
3525 int idx, didx;
3526 tree srclen, args, type, fn, objsz, endptr;
3527 bool success;
3528 gimple *stmt = gsi_stmt (m_gsi);
3529 strinfo *si, *dsi;
3530 location_t loc = gimple_location (stmt);
3532 tree src = gimple_call_arg (stmt, 1);
3533 tree dst = gimple_call_arg (stmt, 0);
3535 /* Bail if the source is the same as destination. It will be diagnosed
3536 elsewhere. */
3537 if (operand_equal_p (src, dst, 0))
3538 return;
3540 tree lhs = gimple_call_lhs (stmt);
3542 didx = get_stridx (dst, stmt);
3543 if (didx < 0)
3544 return;
3546 dsi = NULL;
3547 if (didx > 0)
3548 dsi = get_strinfo (didx);
3550 srclen = NULL_TREE;
3551 si = NULL;
3552 idx = get_stridx (src, stmt);
3553 if (idx < 0)
3554 srclen = build_int_cst (size_type_node, ~idx);
3555 else if (idx > 0)
3557 si = get_strinfo (idx);
3558 if (si != NULL)
3559 srclen = get_string_length (si);
3562 /* Disable warning for the transformed statement? */
3563 opt_code no_warning_opt = no_warning;
3565 if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3568 /* The concatenation always involves copying at least one byte
3569 (the terminating nul), even if the source string is empty.
3570 If the source is unknown assume it's one character long and
3571 used that as both sizes. */
3572 tree slen = srclen;
3573 if (slen)
3575 tree type = TREE_TYPE (slen);
3576 slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3579 tree sptr = si && si->ptr ? si->ptr : src;
3580 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE,
3581 slen);
3582 if (no_warning_opt)
3583 suppress_warning (stmt, no_warning_opt);
3586 /* strcat (p, q) can be transformed into
3587 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3588 with length endptr - p if we need to compute the length
3589 later on. Don't do this transformation if we don't need
3590 it. */
3591 if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3593 if (didx == 0)
3595 didx = new_stridx (dst);
3596 if (didx == 0)
3597 return;
3599 if (dsi == NULL)
3601 dsi = new_strinfo (dst, didx, NULL_TREE, false);
3602 set_strinfo (didx, dsi);
3603 find_equal_ptrs (dst, didx);
3605 else
3607 dsi = unshare_strinfo (dsi);
3608 dsi->nonzero_chars = NULL_TREE;
3609 dsi->full_string_p = false;
3610 dsi->next = 0;
3611 dsi->endptr = NULL_TREE;
3613 dsi->writable = true;
3614 dsi->stmt = stmt;
3615 dsi->dont_invalidate = true;
3617 return;
3620 tree dstlen = dsi->nonzero_chars;
3621 endptr = dsi->endptr;
3623 dsi = unshare_strinfo (dsi);
3624 dsi->endptr = NULL_TREE;
3625 dsi->stmt = NULL;
3626 dsi->writable = true;
3628 if (srclen != NULL_TREE)
3630 dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3631 TREE_TYPE (dsi->nonzero_chars),
3632 dsi->nonzero_chars, srclen);
3633 gcc_assert (dsi->full_string_p);
3634 adjust_related_strinfos (loc, dsi, srclen);
3635 dsi->dont_invalidate = true;
3637 else
3639 dsi->nonzero_chars = NULL;
3640 dsi->full_string_p = false;
3641 if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3642 dsi->dont_invalidate = true;
3645 if (si != NULL)
3646 /* strcat src may not overlap dst, so src doesn't need to be
3647 invalidated either. */
3648 si->dont_invalidate = true;
3650 /* For now. Could remove the lhs from the call and add
3651 lhs = dst; afterwards. */
3652 if (lhs)
3653 return;
3655 fn = NULL_TREE;
3656 objsz = NULL_TREE;
3657 switch (bcode)
3659 case BUILT_IN_STRCAT:
3660 if (srclen != NULL_TREE)
3661 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3662 else
3663 fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3664 break;
3665 case BUILT_IN_STRCAT_CHK:
3666 if (srclen != NULL_TREE)
3667 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3668 else
3669 fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3670 objsz = gimple_call_arg (stmt, 2);
3671 break;
3672 default:
3673 gcc_unreachable ();
3676 if (fn == NULL_TREE)
3677 return;
3679 if (dsi && dstlen)
3681 tree type = TREE_TYPE (dstlen);
3683 /* Compute the size of the source sequence, including the nul. */
3684 tree srcsize = srclen ? srclen : size_zero_node;
3685 tree one = build_int_cst (type, 1);
3686 srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3687 tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3688 tree sptr = si && si->ptr ? si->ptr : src;
3690 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, dstsize,
3691 srcsize);
3692 if (no_warning_opt)
3693 suppress_warning (stmt, no_warning_opt);
3696 tree len = NULL_TREE;
3697 if (srclen != NULL_TREE)
3699 args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3700 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3702 len = fold_convert_loc (loc, type, unshare_expr (srclen));
3703 len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3704 build_int_cst (type, 1));
3705 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
3706 GSI_SAME_STMT);
3708 if (endptr)
3709 dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3710 else
3711 dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3712 fold_convert_loc (loc, sizetype,
3713 unshare_expr (dstlen)));
3714 dst = force_gimple_operand_gsi (&m_gsi, dst, true, NULL_TREE, true,
3715 GSI_SAME_STMT);
3716 if (objsz)
3718 objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3719 fold_convert_loc (loc, TREE_TYPE (objsz),
3720 unshare_expr (dstlen)));
3721 objsz = force_gimple_operand_gsi (&m_gsi, objsz, true, NULL_TREE, true,
3722 GSI_SAME_STMT);
3724 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3726 fprintf (dump_file, "Optimizing: ");
3727 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3729 if (srclen != NULL_TREE)
3730 success = update_gimple_call (&m_gsi, fn, 3 + (objsz != NULL_TREE),
3731 dst, src, len, objsz);
3732 else
3733 success = update_gimple_call (&m_gsi, fn, 2 + (objsz != NULL_TREE),
3734 dst, src, objsz);
3735 if (success)
3737 stmt = gsi_stmt (m_gsi);
3738 update_stmt (stmt);
3739 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3741 fprintf (dump_file, "into: ");
3742 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3744 /* If srclen == NULL, note that current string length can be
3745 computed by transforming this strcpy into stpcpy. */
3746 if (srclen == NULL_TREE && dsi->dont_invalidate)
3747 dsi->stmt = stmt;
3748 adjust_last_stmt (dsi, stmt, true);
3749 if (srclen != NULL_TREE)
3751 laststmt.stmt = stmt;
3752 laststmt.len = srclen;
3753 laststmt.stridx = dsi->idx;
3756 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3757 fprintf (dump_file, "not possible.\n");
3759 if (no_warning_opt)
3760 suppress_warning (stmt, no_warning_opt);
3763 /* Handle a call to an allocation function like alloca, malloc or calloc,
3764 or an ordinary allocation function declared with attribute alloc_size. */
3766 void
3767 strlen_pass::handle_alloc_call (built_in_function bcode)
3769 gimple *stmt = gsi_stmt (m_gsi);
3770 tree lhs = gimple_call_lhs (stmt);
3771 if (lhs == NULL_TREE)
3772 return;
3774 gcc_assert (get_stridx (lhs, stmt) == 0);
3775 int idx = new_stridx (lhs);
3776 tree length = NULL_TREE;
3777 if (bcode == BUILT_IN_CALLOC)
3778 length = build_int_cst (size_type_node, 0);
3779 strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3780 if (bcode == BUILT_IN_CALLOC)
3782 /* Only set STMT for calloc and malloc. */
3783 si->stmt = stmt;
3784 /* Only set ENDPTR for calloc. */
3785 si->endptr = lhs;
3787 else if (bcode == BUILT_IN_MALLOC)
3788 si->stmt = stmt;
3790 /* Set ALLOC is set for all allocation functions. */
3791 si->alloc = stmt;
3792 set_strinfo (idx, si);
3793 si->writable = true;
3794 si->dont_invalidate = true;
3797 /* Handle a call to memset.
3798 After a call to calloc, memset(,0,) is unnecessary.
3799 memset(malloc(n),0,n) is calloc(n,1).
3800 return true when the call is transformed, false otherwise.
3801 When nonnull uses RVALS to determine range information. */
3803 bool
3804 strlen_pass::handle_builtin_memset (bool *zero_write)
3806 gimple *memset_stmt = gsi_stmt (m_gsi);
3807 tree ptr = gimple_call_arg (memset_stmt, 0);
3808 tree memset_val = gimple_call_arg (memset_stmt, 1);
3809 tree memset_size = gimple_call_arg (memset_stmt, 2);
3811 /* Set to the non-constant offset added to PTR. */
3812 wide_int offrng[2];
3813 int idx1 = get_stridx (ptr, memset_stmt, offrng, ptr_qry.rvals);
3814 if (idx1 == 0
3815 && TREE_CODE (memset_val) == INTEGER_CST
3816 && ((TREE_CODE (memset_size) == INTEGER_CST
3817 && !integer_zerop (memset_size))
3818 || TREE_CODE (memset_size) == SSA_NAME))
3820 unsigned HOST_WIDE_INT mask = (HOST_WIDE_INT_1U << CHAR_TYPE_SIZE) - 1;
3821 bool full_string_p = (wi::to_wide (memset_val) & mask) == 0;
3823 /* We only handle symbolic lengths when writing non-zero values. */
3824 if (full_string_p && TREE_CODE (memset_size) != INTEGER_CST)
3825 return false;
3827 idx1 = new_stridx (ptr);
3828 if (idx1 == 0)
3829 return false;
3830 tree newlen;
3831 if (full_string_p)
3832 newlen = build_int_cst (size_type_node, 0);
3833 else if (TREE_CODE (memset_size) == INTEGER_CST)
3834 newlen = fold_convert (size_type_node, memset_size);
3835 else
3836 newlen = memset_size;
3838 strinfo *dsi = new_strinfo (ptr, idx1, newlen, full_string_p);
3839 set_strinfo (idx1, dsi);
3840 find_equal_ptrs (ptr, idx1);
3841 dsi->dont_invalidate = true;
3842 dsi->writable = true;
3843 return false;
3846 if (idx1 <= 0)
3847 return false;
3848 strinfo *si1 = get_strinfo (idx1);
3849 if (!si1)
3850 return false;
3851 gimple *alloc_stmt = si1->alloc;
3852 if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3853 return false;
3854 tree callee1 = gimple_call_fndecl (alloc_stmt);
3855 if (!valid_builtin_call (alloc_stmt))
3856 return false;
3857 tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3859 /* Check for overflow. */
3860 maybe_warn_overflow (memset_stmt, false, memset_size, NULL, false, true);
3862 /* Bail when there is no statement associated with the destination
3863 (the statement may be null even when SI1->ALLOC is not). */
3864 if (!si1->stmt)
3865 return false;
3867 /* Avoid optimizing if store is at a variable offset from the beginning
3868 of the allocated object. */
3869 if (offrng[0] != 0 || offrng[0] != offrng[1])
3870 return false;
3872 /* Bail when the call writes a non-zero value. */
3873 if (!integer_zerop (memset_val))
3874 return false;
3876 /* Let the caller know the memset call cleared the destination. */
3877 *zero_write = true;
3879 enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3880 if (code1 == BUILT_IN_CALLOC)
3881 /* Not touching alloc_stmt */ ;
3882 else if (code1 == BUILT_IN_MALLOC
3883 && operand_equal_p (memset_size, alloc_size, 0))
3885 /* Replace the malloc + memset calls with calloc. */
3886 gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3887 update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3888 alloc_size, build_one_cst (size_type_node));
3889 si1->nonzero_chars = build_int_cst (size_type_node, 0);
3890 si1->full_string_p = true;
3891 si1->stmt = gsi_stmt (gsi1);
3893 else
3894 return false;
3895 tree lhs = gimple_call_lhs (memset_stmt);
3896 unlink_stmt_vdef (memset_stmt);
3897 if (lhs)
3899 gimple *assign = gimple_build_assign (lhs, ptr);
3900 gsi_replace (&m_gsi, assign, false);
3902 else
3904 gsi_remove (&m_gsi, true);
3905 release_defs (memset_stmt);
3908 return true;
3911 /* Return first such statement if RES is used in statements testing its
3912 equality to zero, and null otherwise. If EXCLUSIVE is true, return
3913 nonnull if and only RES is used in such expressions exclusively and
3914 in none other. */
3916 gimple *
3917 use_in_zero_equality (tree res, bool exclusive)
3919 gimple *first_use = NULL;
3921 use_operand_p use_p;
3922 imm_use_iterator iter;
3924 FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3926 gimple *use_stmt = USE_STMT (use_p);
3928 if (is_gimple_debug (use_stmt))
3929 continue;
3931 if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
3933 tree_code code = gimple_assign_rhs_code (use_stmt);
3934 if (code == COND_EXPR)
3936 tree cond_expr = gimple_assign_rhs1 (use_stmt);
3937 if ((TREE_CODE (cond_expr) != EQ_EXPR
3938 && (TREE_CODE (cond_expr) != NE_EXPR))
3939 || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3941 if (exclusive)
3942 return NULL;
3943 continue;
3946 else if (code == EQ_EXPR || code == NE_EXPR)
3948 if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
3950 if (exclusive)
3951 return NULL;
3952 continue;
3955 else if (exclusive)
3956 return NULL;
3957 else
3958 continue;
3960 else if (gimple_code (use_stmt) == GIMPLE_COND)
3962 tree_code code = gimple_cond_code (use_stmt);
3963 if ((code != EQ_EXPR && code != NE_EXPR)
3964 || !integer_zerop (gimple_cond_rhs (use_stmt)))
3966 if (exclusive)
3967 return NULL;
3968 continue;
3971 else if (exclusive)
3972 return NULL;
3973 else
3974 continue;
3976 if (!first_use)
3977 first_use = use_stmt;
3980 return first_use;
3983 /* Handle a call to memcmp. We try to handle small comparisons by
3984 converting them to load and compare, and replacing the call to memcmp
3985 with a __builtin_memcmp_eq call where possible.
3986 return true when call is transformed, return false otherwise. */
3988 bool
3989 strlen_pass::handle_builtin_memcmp ()
3991 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
3992 tree res = gimple_call_lhs (stmt);
3994 if (!res || !use_in_zero_equality (res))
3995 return false;
3997 tree arg1 = gimple_call_arg (stmt, 0);
3998 tree arg2 = gimple_call_arg (stmt, 1);
3999 tree len = gimple_call_arg (stmt, 2);
4000 unsigned HOST_WIDE_INT leni;
4002 if (tree_fits_uhwi_p (len)
4003 && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
4004 && pow2p_hwi (leni))
4006 leni *= CHAR_TYPE_SIZE;
4007 unsigned align1 = get_pointer_alignment (arg1);
4008 unsigned align2 = get_pointer_alignment (arg2);
4009 unsigned align = MIN (align1, align2);
4010 scalar_int_mode mode;
4011 if (int_mode_for_size (leni, 1).exists (&mode)
4012 && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
4014 location_t loc = gimple_location (stmt);
4015 tree type, off;
4016 type = build_nonstandard_integer_type (leni, 1);
4017 gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
4018 tree ptrtype = build_pointer_type_for_mode (char_type_node,
4019 ptr_mode, true);
4020 off = build_int_cst (ptrtype, 0);
4021 arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
4022 arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
4023 tree tem1 = fold_const_aggregate_ref (arg1);
4024 if (tem1)
4025 arg1 = tem1;
4026 tree tem2 = fold_const_aggregate_ref (arg2);
4027 if (tem2)
4028 arg2 = tem2;
4029 res = fold_convert_loc (loc, TREE_TYPE (res),
4030 fold_build2_loc (loc, NE_EXPR,
4031 boolean_type_node,
4032 arg1, arg2));
4033 gimplify_and_update_call_from_tree (&m_gsi, res);
4034 return true;
4038 gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
4039 return true;
4042 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
4043 of the string(s) referenced by ARG if it can be determined.
4044 If the length cannot be determined, sets *SIZE to the size of
4045 the array the string is stored in, if any. If no such array is
4046 known, sets *SIZE to -1. When the strings are nul-terminated sets
4047 *NULTERM to true, otherwise to false. When nonnull uses RVALS to
4048 determine range information. Returns true on success. */
4050 bool
4051 strlen_pass::get_len_or_size (gimple *stmt, tree arg, int idx,
4052 unsigned HOST_WIDE_INT lenrng[2],
4053 unsigned HOST_WIDE_INT *size, bool *nulterm)
4055 /* Invalidate. */
4056 *size = HOST_WIDE_INT_M1U;
4058 if (idx < 0)
4060 /* IDX is the inverted constant string length. */
4061 lenrng[0] = ~idx;
4062 lenrng[1] = lenrng[0];
4063 *nulterm = true;
4064 return true;
4067 /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
4068 possible length + 1. */
4069 lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
4071 if (strinfo *si = idx ? get_strinfo (idx) : NULL)
4073 /* FIXME: Handle all this in_range_strlen_dynamic. */
4074 if (!si->nonzero_chars)
4076 else if (tree_fits_uhwi_p (si->nonzero_chars))
4078 lenrng[0] = tree_to_uhwi (si->nonzero_chars);
4079 *nulterm = si->full_string_p;
4080 /* Set the upper bound only if the string is known to be
4081 nul-terminated, otherwise leave it at maximum + 1. */
4082 if (*nulterm)
4083 lenrng[1] = lenrng[0];
4085 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4087 value_range r;
4088 get_range_query (cfun)->range_of_expr (r, si->nonzero_chars);
4089 if (r.kind () == VR_RANGE)
4091 lenrng[0] = r.lower_bound ().to_uhwi ();
4092 lenrng[1] = r.upper_bound ().to_uhwi ();
4093 *nulterm = si->full_string_p;
4098 if (lenrng[0] != HOST_WIDE_INT_MAX)
4099 return true;
4101 /* Compute the minimum and maximum real or possible lengths. */
4102 c_strlen_data lendata = { };
4103 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4104 to have it set to the length of the longest string in a PHI. */
4105 lendata.maxbound = arg;
4106 get_range_strlen_dynamic (arg, stmt, &lendata, ptr_qry);
4108 unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4109 if (tree_fits_uhwi_p (lendata.maxbound)
4110 && !integer_all_onesp (lendata.maxbound))
4111 maxbound = tree_to_uhwi (lendata.maxbound);
4113 if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4115 unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4116 unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4118 /* The longest string in this data model. */
4119 const unsigned HOST_WIDE_INT lenmax
4120 = tree_to_uhwi (max_object_size ()) - 2;
4122 if (maxbound == HOST_WIDE_INT_M1U)
4124 lenrng[0] = minlen;
4125 lenrng[1] = maxlen;
4126 *nulterm = minlen == maxlen;
4128 else if (maxlen < lenmax)
4130 *size = maxbound + 1;
4131 *nulterm = false;
4133 else
4134 return false;
4136 return true;
4139 if (maxbound != HOST_WIDE_INT_M1U
4140 && lendata.maxlen
4141 && !integer_all_onesp (lendata.maxlen))
4143 /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4144 of the longest string based on the sizes of the arrays referenced
4145 by ARG. */
4146 *size = maxbound + 1;
4147 *nulterm = false;
4148 return true;
4151 return false;
4154 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4155 the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4156 for a sufficiently large BOUND). If the result is based on the length
4157 of one string being greater than the longest string that would fit in
4158 the array pointer to by the argument, set *PLEN and *PSIZE to
4159 the corresponding length (or its complement when the string is known
4160 to be at least as long and need not be nul-terminated) and size.
4161 Otherwise return null. */
4163 tree
4164 strlen_pass::strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
4165 tree arg2, int idx2,
4166 unsigned HOST_WIDE_INT bound,
4167 unsigned HOST_WIDE_INT len[2],
4168 unsigned HOST_WIDE_INT *psize)
4170 /* Determine the range the length of each string is in and whether it's
4171 known to be nul-terminated, or the size of the array it's stored in. */
4172 bool nul1, nul2;
4173 unsigned HOST_WIDE_INT siz1, siz2;
4174 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4175 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1)
4176 || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2))
4177 return NULL_TREE;
4179 /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4180 to HWI_MAX when invalid. Adjust the length of each string to consider
4181 to be no more than BOUND. */
4182 if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4183 len1rng[0] = bound;
4184 if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4185 len1rng[1] = bound;
4186 if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4187 len2rng[0] = bound;
4188 if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4189 len2rng[1] = bound;
4191 /* Two empty strings are equal. */
4192 if (len1rng[1] == 0 && len2rng[1] == 0)
4193 return integer_one_node;
4195 /* The strings are definitely unequal when the lower bound of the length
4196 of one of them is greater than the length of the longest string that
4197 would fit into the other array. */
4198 if (len1rng[0] == HOST_WIDE_INT_MAX
4199 && len2rng[0] != HOST_WIDE_INT_MAX
4200 && ((len2rng[0] < bound && len2rng[0] >= siz1)
4201 || len2rng[0] > siz1))
4203 *psize = siz1;
4204 len[0] = len1rng[0];
4205 /* Set LEN[0] to the lower bound of ARG1's length when it's
4206 nul-terminated or to the complement of its minimum length
4207 otherwise, */
4208 len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4209 return integer_zero_node;
4212 if (len2rng[0] == HOST_WIDE_INT_MAX
4213 && len1rng[0] != HOST_WIDE_INT_MAX
4214 && ((len1rng[0] < bound && len1rng[0] >= siz2)
4215 || len1rng[0] > siz2))
4217 *psize = siz2;
4218 len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4219 len[1] = len2rng[0];
4220 return integer_zero_node;
4223 /* The strings are also definitely unequal when their lengths are unequal
4224 and at least one is nul-terminated. */
4225 if (len1rng[0] != HOST_WIDE_INT_MAX
4226 && len2rng[0] != HOST_WIDE_INT_MAX
4227 && ((len1rng[1] < len2rng[0] && nul1)
4228 || (len2rng[1] < len1rng[0] && nul2)))
4230 if (bound <= len1rng[0] || bound <= len2rng[0])
4231 *psize = bound;
4232 else
4233 *psize = HOST_WIDE_INT_M1U;
4235 len[0] = len1rng[0];
4236 len[1] = len2rng[0];
4237 return integer_zero_node;
4240 /* The string lengths may be equal or unequal. Even when equal and
4241 both strings nul-terminated, without the string contents there's
4242 no way to determine whether they are equal. */
4243 return NULL_TREE;
4246 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4247 arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4248 whose result is used in equality expressions that evaluate to
4249 a constant due to one argument being longer than the size of
4250 the other. */
4252 static void
4253 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4254 unsigned HOST_WIDE_INT len[2],
4255 unsigned HOST_WIDE_INT siz)
4257 tree lhs = gimple_call_lhs (stmt);
4258 gimple *use = use_in_zero_equality (lhs, /* exclusive = */ false);
4259 if (!use)
4260 return;
4262 bool at_least = false;
4264 /* Excessive LEN[i] indicates a lower bound. */
4265 if (len[0] > HOST_WIDE_INT_MAX)
4267 at_least = true;
4268 len[0] = ~len[0];
4271 if (len[1] > HOST_WIDE_INT_MAX)
4273 at_least = true;
4274 len[1] = ~len[1];
4277 unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4279 /* FIXME: Include a note pointing to the declaration of the smaller
4280 array. */
4281 location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4283 tree callee = gimple_call_fndecl (stmt);
4284 bool warned = false;
4285 if (siz <= minlen && bound == -1)
4286 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4287 (at_least
4288 ? G_("%qD of a string of length %wu or more and "
4289 "an array of size %wu evaluates to nonzero")
4290 : G_("%qD of a string of length %wu and an array "
4291 "of size %wu evaluates to nonzero")),
4292 callee, minlen, siz);
4293 else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4295 if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4296 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4297 "%qD of strings of length %wu and %wu "
4298 "and bound of %wu evaluates to nonzero",
4299 callee, len[0], len[1], bound);
4300 else
4301 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4302 "%qD of a string of length %wu, an array "
4303 "of size %wu and bound of %wu evaluates to "
4304 "nonzero",
4305 callee, minlen, siz, bound);
4308 if (!warned)
4309 return;
4311 location_t use_loc = gimple_location (use);
4312 if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4313 inform (use_loc, "in this expression");
4317 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4318 when possible or by transforming the latter to the former. Warn about
4319 calls where the length of one argument is greater than the size of
4320 the array to which the other argument points if the latter's length
4321 is not known. Return true when the call has been transformed into
4322 another and false otherwise. */
4324 bool
4325 strlen_pass::handle_builtin_string_cmp ()
4327 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
4328 tree lhs = gimple_call_lhs (stmt);
4330 if (!lhs)
4331 return false;
4333 tree arg1 = gimple_call_arg (stmt, 0);
4334 tree arg2 = gimple_call_arg (stmt, 1);
4335 int idx1 = get_stridx (arg1, stmt);
4336 int idx2 = get_stridx (arg2, stmt);
4338 /* For strncmp set to the value of the third argument if known. */
4339 HOST_WIDE_INT bound = -1;
4340 tree len = NULL_TREE;
4341 /* Extract the strncmp bound. */
4342 if (gimple_call_num_args (stmt) == 3)
4344 len = gimple_call_arg (stmt, 2);
4345 if (tree_fits_shwi_p (len))
4346 bound = tree_to_shwi (len);
4348 /* If the bound argument is NOT known, do nothing. */
4349 if (bound < 0)
4350 return false;
4353 /* Avoid folding if either argument is not a nul-terminated array.
4354 Defer warning until later. */
4355 if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4356 || !check_nul_terminated_array (NULL_TREE, arg2, len))
4357 return false;
4360 /* Set to the length of one argument (or its complement if it's
4361 the lower bound of a range) and the size of the array storing
4362 the other if the result is based on the former being equal to
4363 or greater than the latter. */
4364 unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4365 unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4367 /* Try to determine if the two strings are either definitely equal
4368 or definitely unequal and if so, either fold the result to zero
4369 (when equal) or set the range of the result to ~[0, 0] otherwise. */
4370 if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4371 len, &siz))
4373 if (integer_zerop (eqz))
4375 maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4377 /* When the lengths of the first two string arguments are
4378 known to be unequal set the range of the result to non-zero.
4379 This allows the call to be eliminated if its result is only
4380 used in tests for equality to zero. */
4381 value_range nz;
4382 nz.set_nonzero (TREE_TYPE (lhs));
4383 set_range_info (lhs, nz);
4384 return false;
4386 /* When the two strings are definitely equal (such as when they
4387 are both empty) fold the call to the constant result. */
4388 replace_call_with_value (&m_gsi, integer_zero_node);
4389 return true;
4393 /* Return if nothing is known about the strings pointed to by ARG1
4394 and ARG2. */
4395 if (idx1 == 0 && idx2 == 0)
4396 return false;
4398 /* Determine either the length or the size of each of the strings,
4399 whichever is available. */
4400 HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4401 HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4404 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4405 unsigned HOST_WIDE_INT arsz1, arsz2;
4406 bool nulterm[2];
4408 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm)
4409 || !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1))
4410 return false;
4412 if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4413 cstlen1 = len1rng[0];
4414 else if (arsz1 < HOST_WIDE_INT_M1U)
4415 arysiz1 = arsz1;
4417 if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4418 cstlen2 = len2rng[0];
4419 else if (arsz2 < HOST_WIDE_INT_M1U)
4420 arysiz2 = arsz2;
4423 /* Bail if neither the string length nor the size of the array
4424 it is stored in can be determined. */
4425 if ((cstlen1 < 0 && arysiz1 < 0)
4426 || (cstlen2 < 0 && arysiz2 < 0)
4427 || (cstlen1 < 0 && cstlen2 < 0))
4428 return false;
4430 if (cstlen1 >= 0)
4431 ++cstlen1;
4432 if (cstlen2 >= 0)
4433 ++cstlen2;
4435 /* The exact number of characters to compare. */
4436 HOST_WIDE_INT cmpsiz;
4437 if (cstlen1 >= 0 && cstlen2 >= 0)
4438 cmpsiz = MIN (cstlen1, cstlen2);
4439 else if (cstlen1 >= 0)
4440 cmpsiz = cstlen1;
4441 else
4442 cmpsiz = cstlen2;
4443 if (bound >= 0)
4444 cmpsiz = MIN (cmpsiz, bound);
4445 /* The size of the array in which the unknown string is stored. */
4446 HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4448 if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (lhs))
4450 /* If the known length is less than the size of the other array
4451 and the strcmp result is only used to test equality to zero,
4452 transform the call to the equivalent _eq call. */
4453 if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4454 : BUILT_IN_STRNCMP_EQ))
4456 tree n = build_int_cst (size_type_node, cmpsiz);
4457 update_gimple_call (&m_gsi, fn, 3, arg1, arg2, n);
4458 return true;
4462 return false;
4465 /* Handle a POINTER_PLUS_EXPR statement.
4466 For p = "abcd" + 2; compute associated length, or if
4467 p = q + off is pointing to a '\0' character of a string, call
4468 zero_length_string on it. */
4470 void
4471 strlen_pass::handle_pointer_plus ()
4473 gimple *stmt = gsi_stmt (m_gsi);
4474 tree lhs = gimple_assign_lhs (stmt), off;
4475 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
4476 strinfo *si, *zsi;
4478 if (idx == 0)
4479 return;
4481 if (idx < 0)
4483 tree off = gimple_assign_rhs2 (stmt);
4484 if (tree_fits_uhwi_p (off)
4485 && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4486 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4487 = ~(~idx - (int) tree_to_uhwi (off));
4488 return;
4491 si = get_strinfo (idx);
4492 if (si == NULL || si->nonzero_chars == NULL_TREE)
4493 return;
4495 off = gimple_assign_rhs2 (stmt);
4496 zsi = NULL;
4497 if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4498 zsi = zero_length_string (lhs, si);
4499 else if (TREE_CODE (off) == SSA_NAME)
4501 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4502 if (gimple_assign_single_p (def_stmt)
4503 && si->full_string_p
4504 && operand_equal_p (si->nonzero_chars,
4505 gimple_assign_rhs1 (def_stmt), 0))
4506 zsi = zero_length_string (lhs, si);
4508 if (zsi != NULL
4509 && si->endptr != NULL_TREE
4510 && si->endptr != lhs
4511 && TREE_CODE (si->endptr) == SSA_NAME)
4513 enum tree_code rhs_code
4514 = useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4515 ? SSA_NAME : NOP_EXPR;
4516 gimple_assign_set_rhs_with_ops (&m_gsi, rhs_code, si->endptr);
4517 gcc_assert (gsi_stmt (m_gsi) == stmt);
4518 update_stmt (stmt);
4522 /* Set LENRANGE to the number of nonzero bytes for a store of TYPE and
4523 clear all flags. Return true on success and false on failure. */
4525 static bool
4526 nonzero_bytes_for_type (tree type, unsigned lenrange[3],
4527 bool *nulterm, bool *allnul, bool *allnonnul)
4529 /* Use the size of the type of the expression as the size of the store,
4530 and set the upper bound of the length range to that of the size.
4531 Nothing is known about the contents so clear all flags. */
4532 tree typesize = TYPE_SIZE_UNIT (type);
4533 if (!type)
4534 return false;
4536 if (!tree_fits_uhwi_p (typesize))
4537 return false;
4539 unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize);
4540 if (sz > UINT_MAX)
4541 return false;
4543 lenrange[2] = sz;
4544 lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0;
4545 lenrange[0] = 0;
4546 *nulterm = false;
4547 *allnul = false;
4548 *allnonnul = false;
4549 return true;
4552 /* Recursively determine the minimum and maximum number of leading nonzero
4553 bytes in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
4554 to each.
4555 Sets LENRANGE[2] to the total size of the access (which may be less
4556 than LENRANGE[1] when what's being referenced by EXP is a pointer
4557 rather than an array).
4558 Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL
4559 if all the bytes are zero, and *ALLNONNUL is all are nonzero.
4560 OFFSET and NBYTES are the offset into the representation and
4561 the size of the access to it determined from an ADDR_EXPR (i.e.,
4562 a pointer) or MEM_REF or zero for other expressions.
4563 Uses RVALS to determine range information.
4564 Avoids recursing deeper than the limits in SNLIM allow.
4565 Returns true on success and false otherwise. */
4567 bool
4568 strlen_pass::count_nonzero_bytes (tree exp, gimple *stmt,
4569 unsigned HOST_WIDE_INT offset,
4570 unsigned HOST_WIDE_INT nbytes,
4571 unsigned lenrange[3], bool *nulterm,
4572 bool *allnul, bool *allnonnul,
4573 ssa_name_limit_t &snlim)
4575 if (TREE_CODE (exp) == SSA_NAME)
4577 /* Handle non-zero single-character stores specially. */
4578 tree type = TREE_TYPE (exp);
4579 if (TREE_CODE (type) == INTEGER_TYPE
4580 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4581 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4582 && tree_expr_nonzero_p (exp))
4584 /* If the character EXP is known to be non-zero (even if its
4585 exact value is not known) recurse once to set the range
4586 for an arbitrary constant. */
4587 exp = build_int_cst (type, 1);
4588 return count_nonzero_bytes (exp, stmt,
4589 offset, 1, lenrange,
4590 nulterm, allnul, allnonnul, snlim);
4593 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4594 if (gimple_assign_single_p (stmt))
4596 exp = gimple_assign_rhs1 (stmt);
4597 if (!DECL_P (exp)
4598 && TREE_CODE (exp) != CONSTRUCTOR
4599 && TREE_CODE (exp) != MEM_REF)
4600 return false;
4601 /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */
4603 else if (gimple_code (stmt) == GIMPLE_PHI)
4605 /* Avoid processing an SSA_NAME that has already been visited
4606 or if an SSA_NAME limit has been reached. Indicate success
4607 if the former and failure if the latter. */
4608 if (int res = snlim.next_phi (exp))
4609 return res > 0;
4611 /* Determine the minimum and maximum from the PHI arguments. */
4612 unsigned int n = gimple_phi_num_args (stmt);
4613 for (unsigned i = 0; i != n; i++)
4615 tree def = gimple_phi_arg_def (stmt, i);
4616 if (!count_nonzero_bytes (def, stmt,
4617 offset, nbytes, lenrange, nulterm,
4618 allnul, allnonnul, snlim))
4619 return false;
4622 return true;
4626 if (TREE_CODE (exp) == CONSTRUCTOR)
4628 if (nbytes)
4629 /* If NBYTES has already been determined by an outer MEM_REF
4630 fail rather than overwriting it (this shouldn't happen). */
4631 return false;
4633 tree type = TREE_TYPE (exp);
4634 tree size = TYPE_SIZE_UNIT (type);
4635 if (!size || !tree_fits_uhwi_p (size))
4636 return false;
4638 unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
4639 if (byte_size < offset)
4640 return false;
4642 nbytes = byte_size - offset;
4645 if (TREE_CODE (exp) == MEM_REF)
4647 if (nbytes)
4648 return false;
4650 tree arg = TREE_OPERAND (exp, 0);
4651 tree off = TREE_OPERAND (exp, 1);
4653 if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4654 return false;
4656 unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4657 if (INT_MAX < wioff)
4658 return false;
4660 offset += wioff;
4661 if (INT_MAX < offset)
4662 return false;
4664 /* The size of the MEM_REF access determines the number of bytes. */
4665 tree type = TREE_TYPE (exp);
4666 tree typesize = TYPE_SIZE_UNIT (type);
4667 if (!typesize || !tree_fits_uhwi_p (typesize))
4668 return false;
4669 nbytes = tree_to_uhwi (typesize);
4670 if (!nbytes)
4671 return false;
4673 /* Handle MEM_REF = SSA_NAME types of assignments. */
4674 return count_nonzero_bytes_addr (arg, stmt,
4675 offset, nbytes, lenrange, nulterm,
4676 allnul, allnonnul, snlim);
4679 if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4681 /* If EXP can be folded into a constant use the result. Otherwise
4682 proceed to use EXP to determine a range of the result. */
4683 if (tree fold_exp = ctor_for_folding (exp))
4684 if (fold_exp != error_mark_node)
4685 exp = fold_exp;
4688 const char *prep = NULL;
4689 if (TREE_CODE (exp) == STRING_CST)
4691 unsigned nchars = TREE_STRING_LENGTH (exp);
4692 if (nchars < offset)
4693 return false;
4695 if (!nbytes)
4696 /* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4697 (i.e., it's the size of a pointer), or from MEM_REF (as the size
4698 of the access), set it here to the size of the string, including
4699 all internal and trailing nuls if the string has any. */
4700 nbytes = nchars - offset;
4701 else if (nchars - offset < nbytes)
4702 return false;
4704 prep = TREE_STRING_POINTER (exp) + offset;
4707 unsigned char buf[256];
4708 if (!prep)
4710 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4711 return false;
4712 /* If the pointer to representation hasn't been set above
4713 for STRING_CST point it at the buffer. */
4714 prep = reinterpret_cast <char *>(buf);
4715 /* Try to extract the representation of the constant object
4716 or expression starting from the offset. */
4717 unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4718 if (repsize < nbytes)
4720 /* This should only happen when REPSIZE is zero because EXP
4721 doesn't denote an object with a known initializer, except
4722 perhaps when the reference reads past its end. */
4723 lenrange[0] = 0;
4724 prep = NULL;
4726 else if (!nbytes)
4727 nbytes = repsize;
4728 else if (nbytes < repsize)
4729 return false;
4732 if (!nbytes)
4733 return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange,
4734 nulterm, allnul, allnonnul);
4736 /* Compute the number of leading nonzero bytes in the representation
4737 and update the minimum and maximum. */
4738 unsigned HOST_WIDE_INT n = prep ? strnlen (prep, nbytes) : nbytes;
4740 if (n < lenrange[0])
4741 lenrange[0] = n;
4742 if (lenrange[1] < n)
4743 lenrange[1] = n;
4745 /* Set the size of the representation. */
4746 if (lenrange[2] < nbytes)
4747 lenrange[2] = nbytes;
4749 /* Clear NULTERM if none of the bytes is zero. */
4750 if (n == nbytes)
4751 *nulterm = false;
4753 if (n)
4755 /* When the initial number of non-zero bytes N is non-zero, reset
4756 *ALLNUL; if N is less than that the size of the representation
4757 also clear *ALLNONNUL. */
4758 *allnul = false;
4759 if (n < nbytes)
4760 *allnonnul = false;
4762 else if (*allnul || *allnonnul)
4764 *allnonnul = false;
4766 if (*allnul)
4768 /* When either ALLNUL is set and N is zero, also determine
4769 whether all subsequent bytes after the first one (which
4770 is nul) are zero or nonzero and clear ALLNUL if not. */
4771 for (const char *p = prep; p != prep + nbytes; ++p)
4772 if (*p)
4774 *allnul = false;
4775 break;
4780 return true;
4783 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4784 bytes that are pointed to by EXP, which should be a pointer. */
4786 bool
4787 strlen_pass::count_nonzero_bytes_addr (tree exp, gimple *stmt,
4788 unsigned HOST_WIDE_INT offset,
4789 unsigned HOST_WIDE_INT nbytes,
4790 unsigned lenrange[3], bool *nulterm,
4791 bool *allnul, bool *allnonnul,
4792 ssa_name_limit_t &snlim)
4794 int idx = get_stridx (exp, stmt);
4795 if (idx > 0)
4797 strinfo *si = get_strinfo (idx);
4798 if (!si)
4799 return false;
4801 /* Handle both constant lengths as well non-constant lengths
4802 in some range. */
4803 unsigned HOST_WIDE_INT minlen, maxlen;
4804 if (tree_fits_shwi_p (si->nonzero_chars))
4805 minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4806 else if (si->nonzero_chars
4807 && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4809 value_range vr;
4810 ptr_qry.rvals->range_of_expr (vr, si->nonzero_chars, stmt);
4811 if (vr.kind () != VR_RANGE)
4812 return false;
4814 minlen = tree_to_uhwi (vr.min ());
4815 maxlen = tree_to_uhwi (vr.max ());
4817 else
4818 return false;
4820 if (maxlen < offset)
4821 return false;
4823 minlen = minlen < offset ? 0 : minlen - offset;
4824 maxlen -= offset;
4825 if (maxlen + 1 < nbytes)
4826 return false;
4828 if (nbytes <= minlen)
4829 *nulterm = false;
4831 if (nbytes < minlen)
4833 minlen = nbytes;
4834 if (nbytes < maxlen)
4835 maxlen = nbytes;
4838 if (minlen < lenrange[0])
4839 lenrange[0] = minlen;
4840 if (lenrange[1] < maxlen)
4841 lenrange[1] = maxlen;
4843 if (lenrange[2] < nbytes)
4844 lenrange[2] = nbytes;
4846 /* Since only the length of the string are known and not its contents,
4847 clear ALLNUL and ALLNONNUL purely on the basis of the length. */
4848 *allnul = false;
4849 if (minlen < nbytes)
4850 *allnonnul = false;
4852 return true;
4855 if (TREE_CODE (exp) == ADDR_EXPR)
4856 return count_nonzero_bytes (TREE_OPERAND (exp, 0), stmt,
4857 offset, nbytes,
4858 lenrange, nulterm, allnul, allnonnul, snlim);
4860 if (TREE_CODE (exp) == SSA_NAME)
4862 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4863 if (gimple_code (stmt) == GIMPLE_PHI)
4865 /* Avoid processing an SSA_NAME that has already been visited
4866 or if an SSA_NAME limit has been reached. Indicate success
4867 if the former and failure if the latter. */
4868 if (int res = snlim.next_phi (exp))
4869 return res > 0;
4871 /* Determine the minimum and maximum from the PHI arguments. */
4872 unsigned int n = gimple_phi_num_args (stmt);
4873 for (unsigned i = 0; i != n; i++)
4875 tree def = gimple_phi_arg_def (stmt, i);
4876 if (!count_nonzero_bytes_addr (def, stmt,
4877 offset, nbytes, lenrange,
4878 nulterm, allnul, allnonnul,
4879 snlim))
4880 return false;
4883 return true;
4887 /* Otherwise we don't know anything. */
4888 lenrange[0] = 0;
4889 if (lenrange[1] < nbytes)
4890 lenrange[1] = nbytes;
4891 if (lenrange[2] < nbytes)
4892 lenrange[2] = nbytes;
4893 *nulterm = false;
4894 *allnul = false;
4895 *allnonnul = false;
4896 return true;
4899 /* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE
4900 is a type rather than an expression use its size to compute the range.
4901 RVALS is used to determine ranges of dynamically computed string lengths
4902 (the results of strlen). */
4904 bool
4905 strlen_pass::count_nonzero_bytes (tree expr_or_type, gimple *stmt,
4906 unsigned lenrange[3], bool *nulterm,
4907 bool *allnul, bool *allnonnul)
4909 if (TYPE_P (expr_or_type))
4910 return nonzero_bytes_for_type (expr_or_type, lenrange,
4911 nulterm, allnul, allnonnul);
4913 /* Set to optimistic values so the caller doesn't have to worry about
4914 initializing these and to what. On success, the function will clear
4915 these if it determines their values are different but being recursive
4916 it never sets either to true. On failure, their values are
4917 unspecified. */
4918 *nulterm = true;
4919 *allnul = true;
4920 *allnonnul = true;
4922 ssa_name_limit_t snlim;
4923 tree expr = expr_or_type;
4924 return count_nonzero_bytes (expr, stmt,
4925 0, 0, lenrange, nulterm, allnul, allnonnul,
4926 snlim);
4929 /* Handle a single or multibyte store other than by a built-in function,
4930 either via a single character assignment or by multi-byte assignment
4931 either via MEM_REF or via a type other than char (such as in
4932 '*(int*)a = 12345'). Return true to let the caller advance *GSI to
4933 the next statement in the basic block and false otherwise. */
4935 bool
4936 strlen_pass::handle_store (bool *zero_write)
4938 gimple *stmt = gsi_stmt (m_gsi);
4939 /* The LHS and RHS of the store. The RHS is null if STMT is a function
4940 call. STORETYPE is the type of the store (determined from either
4941 the RHS of the assignment statement or the LHS of a function call. */
4942 tree lhs, rhs, storetype;
4943 if (is_gimple_assign (stmt))
4945 lhs = gimple_assign_lhs (stmt);
4946 rhs = gimple_assign_rhs1 (stmt);
4947 storetype = TREE_TYPE (rhs);
4949 else if (is_gimple_call (stmt))
4951 lhs = gimple_call_lhs (stmt);
4952 rhs = NULL_TREE;
4953 storetype = TREE_TYPE (lhs);
4955 else
4956 return true;
4958 tree ssaname = NULL_TREE;
4959 strinfo *si = NULL;
4960 int idx = -1;
4962 range_query *const rvals = ptr_qry.rvals;
4964 /* The offset of the first byte in LHS modified by the store. */
4965 unsigned HOST_WIDE_INT offset = 0;
4967 if (TREE_CODE (lhs) == MEM_REF
4968 && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4970 tree mem_offset = TREE_OPERAND (lhs, 1);
4971 if (tree_fits_uhwi_p (mem_offset))
4973 /* Get the strinfo for the base, and use it if it starts with at
4974 least OFFSET nonzero characters. This is trivially true if
4975 OFFSET is zero. */
4976 offset = tree_to_uhwi (mem_offset);
4977 idx = get_stridx (TREE_OPERAND (lhs, 0), stmt);
4978 if (idx > 0)
4979 si = get_strinfo (idx);
4980 if (offset == 0)
4981 ssaname = TREE_OPERAND (lhs, 0);
4982 else if (si == NULL
4983 || compare_nonzero_chars (si, stmt, offset, rvals) < 0)
4985 *zero_write = rhs ? initializer_zerop (rhs) : false;
4987 bool dummy;
4988 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4989 if (count_nonzero_bytes (rhs ? rhs : storetype, stmt, lenrange,
4990 &dummy, &dummy, &dummy))
4991 maybe_warn_overflow (stmt, true, lenrange[2]);
4993 return true;
4997 else
4999 idx = get_addr_stridx (lhs, stmt, NULL_TREE, &offset, rvals);
5000 if (idx > 0)
5001 si = get_strinfo (idx);
5004 /* Minimum and maximum leading non-zero bytes and the size of the store. */
5005 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5007 /* Set to the minimum length of the string being assigned if known. */
5008 unsigned HOST_WIDE_INT rhs_minlen;
5010 /* STORING_NONZERO_P is true iff not all stored characters are zero.
5011 STORING_ALL_NONZERO_P is true if all stored characters are zero.
5012 STORING_ALL_ZEROS_P is true iff all stored characters are zero.
5013 Both are false when it's impossible to determine which is true. */
5014 bool storing_nonzero_p;
5015 bool storing_all_nonzero_p;
5016 bool storing_all_zeros_p;
5017 /* FULL_STRING_P is set when the stored sequence of characters form
5018 a nul-terminated string. */
5019 bool full_string_p;
5021 const bool ranges_valid
5022 = count_nonzero_bytes (rhs ? rhs : storetype, stmt,
5023 lenrange, &full_string_p,
5024 &storing_all_zeros_p, &storing_all_nonzero_p);
5026 if (ranges_valid)
5028 rhs_minlen = lenrange[0];
5029 storing_nonzero_p = lenrange[1] > 0;
5030 *zero_write = storing_all_zeros_p;
5032 maybe_warn_overflow (stmt, true, lenrange[2]);
5034 else
5036 rhs_minlen = HOST_WIDE_INT_M1U;
5037 full_string_p = false;
5038 storing_nonzero_p = false;
5039 storing_all_zeros_p = false;
5040 storing_all_nonzero_p = false;
5043 if (si != NULL)
5045 /* The corresponding element is set to 1 if the first and last
5046 element, respectively, of the sequence of characters being
5047 written over the string described by SI ends before
5048 the terminating nul (if it has one), to zero if the nul is
5049 being overwritten but not beyond, or negative otherwise. */
5050 int store_before_nul[2];
5051 if (ranges_valid)
5053 /* The offset of the last stored byte. */
5054 unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
5055 store_before_nul[0]
5056 = compare_nonzero_chars (si, stmt, offset, rvals);
5057 if (endoff == offset)
5058 store_before_nul[1] = store_before_nul[0];
5059 else
5060 store_before_nul[1]
5061 = compare_nonzero_chars (si, stmt, endoff, rvals);
5063 else
5065 store_before_nul[0]
5066 = compare_nonzero_chars (si, stmt, offset, rvals);
5067 store_before_nul[1] = store_before_nul[0];
5068 gcc_assert (offset == 0 || store_before_nul[0] >= 0);
5071 if (storing_all_zeros_p
5072 && store_before_nul[0] == 0
5073 && store_before_nul[1] == 0
5074 && si->full_string_p)
5076 /* When overwriting a '\0' with a '\0', the store can be removed
5077 if we know it has been stored in the current function. */
5078 if (!stmt_could_throw_p (cfun, stmt) && si->writable)
5080 unlink_stmt_vdef (stmt);
5081 release_defs (stmt);
5082 gsi_remove (&m_gsi, true);
5083 return false;
5085 else
5087 si->writable = true;
5088 gsi_next (&m_gsi);
5089 return false;
5093 if (store_before_nul[1] > 0
5094 && storing_nonzero_p
5095 && lenrange[0] == lenrange[1]
5096 && lenrange[0] == lenrange[2]
5097 && TREE_CODE (storetype) == INTEGER_TYPE)
5099 /* Handle a store of one or more non-nul characters that ends
5100 before the terminating nul of the destination and so does
5101 not affect its length
5102 If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5103 and if we aren't storing '\0', we know that the length of
5104 the string and any other zero terminated string in memory
5105 remains the same. In that case we move to the next gimple
5106 statement and return to signal the caller that it shouldn't
5107 invalidate anything.
5109 This is beneficial for cases like:
5111 char p[20];
5112 void foo (char *q)
5114 strcpy (p, "foobar");
5115 size_t len = strlen (p); // can be folded to 6
5116 size_t len2 = strlen (q); // has to be computed
5117 p[0] = 'X';
5118 size_t len3 = strlen (p); // can be folded to 6
5119 size_t len4 = strlen (q); // can be folded to len2
5120 bar (len, len2, len3, len4);
5121 } */
5122 gsi_next (&m_gsi);
5123 return false;
5126 if (storing_nonzero_p
5127 || storing_all_zeros_p
5128 || (full_string_p && lenrange[1] == 0)
5129 || (offset != 0 && store_before_nul[1] > 0))
5131 /* When STORING_NONZERO_P, we know that the string will start
5132 with at least OFFSET + 1 nonzero characters. If storing
5133 a single character, set si->NONZERO_CHARS to the result.
5134 If storing multiple characters, try to determine the number
5135 of leading non-zero characters and set si->NONZERO_CHARS to
5136 the result instead.
5138 When STORING_ALL_ZEROS_P, or the first byte written is zero,
5139 i.e. FULL_STRING_P && LENRANGE[1] == 0, we know that the
5140 string is now OFFSET characters long.
5142 Otherwise, we're storing an unknown value at offset OFFSET,
5143 so need to clip the nonzero_chars to OFFSET.
5144 Use the minimum length of the string (or individual character)
5145 being stored if it's known. Otherwise, STORING_NONZERO_P
5146 guarantees it's at least 1. */
5147 HOST_WIDE_INT len
5148 = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5149 location_t loc = gimple_location (stmt);
5150 tree oldlen = si->nonzero_chars;
5151 if (store_before_nul[1] == 0 && si->full_string_p)
5152 /* We're overwriting the nul terminator with a nonzero or
5153 unknown character. If the previous stmt was a memcpy,
5154 its length may be decreased. */
5155 adjust_last_stmt (si, stmt, false);
5156 si = unshare_strinfo (si);
5157 if (storing_nonzero_p)
5159 gcc_assert (len >= 0);
5160 si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5162 else
5163 si->nonzero_chars = build_int_cst (size_type_node, offset);
5165 /* Set FULL_STRING_P only if the length of the strings being
5166 written is the same, and clear it if the strings have
5167 different lengths. In the latter case the length stored
5168 in si->NONZERO_CHARS becomes the lower bound.
5169 FIXME: Handle the upper bound of the length if possible. */
5170 si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5172 if (storing_all_zeros_p
5173 && ssaname
5174 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5175 si->endptr = ssaname;
5176 else
5177 si->endptr = NULL;
5178 si->next = 0;
5179 si->stmt = NULL;
5180 si->writable = true;
5181 si->dont_invalidate = true;
5182 if (oldlen)
5184 tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5185 si->nonzero_chars, oldlen);
5186 adjust_related_strinfos (loc, si, adj);
5188 else
5189 si->prev = 0;
5192 else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5194 if (ssaname)
5195 idx = new_stridx (ssaname);
5196 else
5197 idx = new_addr_stridx (lhs);
5198 if (idx != 0)
5200 tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5202 HOST_WIDE_INT slen;
5203 if (storing_all_zeros_p)
5204 slen = 0;
5205 else if (storing_nonzero_p && ranges_valid)
5207 /* FIXME: Handle the upper bound of the length when
5208 LENRANGE[0] != LENRANGE[1]. */
5209 slen = lenrange[0];
5210 if (lenrange[0] != lenrange[1])
5211 /* Set the minimum length but ignore the maximum
5212 for now. */
5213 full_string_p = false;
5215 else
5216 slen = -1;
5218 tree len = (slen <= 0
5219 ? size_zero_node
5220 : build_int_cst (size_type_node, slen));
5221 si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5222 set_strinfo (idx, si);
5223 if (storing_all_zeros_p
5224 && ssaname
5225 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5226 si->endptr = ssaname;
5227 si->dont_invalidate = true;
5228 si->writable = true;
5231 else if (idx == 0
5232 && rhs_minlen < HOST_WIDE_INT_M1U
5233 && ssaname == NULL_TREE
5234 && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5236 HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5237 if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5239 int idx = new_addr_stridx (lhs);
5240 if (idx != 0)
5242 si = new_strinfo (build_fold_addr_expr (lhs), idx,
5243 build_int_cst (size_type_node, rhs_minlen),
5244 full_string_p);
5245 set_strinfo (idx, si);
5246 si->dont_invalidate = true;
5251 if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5253 /* For single-byte stores only, allow adjust_last_stmt to remove
5254 the statement if the stored '\0' is immediately overwritten. */
5255 laststmt.stmt = stmt;
5256 laststmt.len = build_int_cst (size_type_node, 1);
5257 laststmt.stridx = si->idx;
5259 return true;
5262 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0. */
5264 static void
5265 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5267 if (TREE_CODE (rhs1) != SSA_NAME
5268 || TREE_CODE (rhs2) != SSA_NAME)
5269 return;
5271 gimple *call_stmt = NULL;
5272 for (int pass = 0; pass < 2; pass++)
5274 gimple *g = SSA_NAME_DEF_STMT (rhs1);
5275 if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5276 && has_single_use (rhs1)
5277 && gimple_call_arg (g, 0) == rhs2)
5279 call_stmt = g;
5280 break;
5282 std::swap (rhs1, rhs2);
5285 if (call_stmt)
5287 tree arg0 = gimple_call_arg (call_stmt, 0);
5289 if (arg0 == rhs2)
5291 tree arg1 = gimple_call_arg (call_stmt, 1);
5292 tree arg1_len = NULL_TREE;
5293 int idx = get_stridx (arg1, call_stmt);
5295 if (idx)
5297 if (idx < 0)
5298 arg1_len = build_int_cst (size_type_node, ~idx);
5299 else
5301 strinfo *si = get_strinfo (idx);
5302 if (si)
5303 arg1_len = get_string_length (si);
5307 if (arg1_len != NULL_TREE)
5309 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5310 tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5312 if (!is_gimple_val (arg1_len))
5314 tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5315 gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5316 arg1_len);
5317 gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5318 arg1_len = arg1_len_tmp;
5321 gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5322 arg0, arg1, arg1_len);
5323 tree strncmp_lhs = make_ssa_name (integer_type_node);
5324 gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5325 gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5326 gsi_remove (&gsi, true);
5327 gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5328 tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5330 if (is_gimple_assign (stmt))
5332 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5334 tree cond = gimple_assign_rhs1 (stmt);
5335 TREE_OPERAND (cond, 0) = strncmp_lhs;
5336 TREE_OPERAND (cond, 1) = zero;
5338 else
5340 gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5341 gimple_assign_set_rhs2 (stmt, zero);
5344 else
5346 gcond *cond = as_a<gcond *> (stmt);
5347 gimple_cond_set_lhs (cond, strncmp_lhs);
5348 gimple_cond_set_rhs (cond, zero);
5350 update_stmt (stmt);
5356 /* Return true if TYPE corresponds to a narrow character type. */
5358 static bool
5359 is_char_type (tree type)
5361 return (TREE_CODE (type) == INTEGER_TYPE
5362 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5363 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5366 /* Check the built-in call at GSI for validity and optimize it.
5367 Uses RVALS to determine range information.
5368 Return true to let the caller advance *GSI to the next statement
5369 in the basic block and false otherwise. */
5371 bool
5372 strlen_pass::check_and_optimize_call (bool *zero_write)
5374 gimple *stmt = gsi_stmt (m_gsi);
5376 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5378 tree fntype = gimple_call_fntype (stmt);
5379 if (!fntype)
5380 return true;
5382 if (lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5384 handle_alloc_call (BUILT_IN_NONE);
5385 return true;
5388 if (tree lhs = gimple_call_lhs (stmt))
5389 handle_assign (lhs, zero_write);
5391 /* Proceed to handle user-defined formatting functions. */
5394 /* When not optimizing we must be checking printf calls which
5395 we do even for user-defined functions when they are declared
5396 with attribute format. */
5397 if (!flag_optimize_strlen
5398 || !strlen_optimize
5399 || !valid_builtin_call (stmt))
5400 return !handle_printf_call (&m_gsi, ptr_qry);
5402 tree callee = gimple_call_fndecl (stmt);
5403 switch (DECL_FUNCTION_CODE (callee))
5405 case BUILT_IN_STRLEN:
5406 case BUILT_IN_STRNLEN:
5407 handle_builtin_strlen ();
5408 break;
5409 case BUILT_IN_STRCHR:
5410 handle_builtin_strchr ();
5411 break;
5412 case BUILT_IN_STRCPY:
5413 case BUILT_IN_STRCPY_CHK:
5414 case BUILT_IN_STPCPY:
5415 case BUILT_IN_STPCPY_CHK:
5416 handle_builtin_strcpy (DECL_FUNCTION_CODE (callee));
5417 break;
5419 case BUILT_IN_STRNCAT:
5420 case BUILT_IN_STRNCAT_CHK:
5421 handle_builtin_strncat (DECL_FUNCTION_CODE (callee));
5422 break;
5424 case BUILT_IN_STPNCPY:
5425 case BUILT_IN_STPNCPY_CHK:
5426 case BUILT_IN_STRNCPY:
5427 case BUILT_IN_STRNCPY_CHK:
5428 handle_builtin_stxncpy_strncat (false);
5429 break;
5431 case BUILT_IN_MEMCPY:
5432 case BUILT_IN_MEMCPY_CHK:
5433 case BUILT_IN_MEMPCPY:
5434 case BUILT_IN_MEMPCPY_CHK:
5435 handle_builtin_memcpy (DECL_FUNCTION_CODE (callee));
5436 break;
5437 case BUILT_IN_STRCAT:
5438 case BUILT_IN_STRCAT_CHK:
5439 handle_builtin_strcat (DECL_FUNCTION_CODE (callee));
5440 break;
5441 case BUILT_IN_ALLOCA:
5442 case BUILT_IN_ALLOCA_WITH_ALIGN:
5443 case BUILT_IN_MALLOC:
5444 case BUILT_IN_CALLOC:
5445 handle_alloc_call (DECL_FUNCTION_CODE (callee));
5446 break;
5447 case BUILT_IN_MEMSET:
5448 if (handle_builtin_memset (zero_write))
5449 return false;
5450 break;
5451 case BUILT_IN_MEMCMP:
5452 if (handle_builtin_memcmp ())
5453 return false;
5454 break;
5455 case BUILT_IN_STRCMP:
5456 case BUILT_IN_STRNCMP:
5457 if (handle_builtin_string_cmp ())
5458 return false;
5459 break;
5460 default:
5461 if (handle_printf_call (&m_gsi, ptr_qry))
5462 return false;
5463 break;
5466 return true;
5469 /* Handle an assignment statement at *GSI to a LHS of integral type.
5470 If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true. */
5472 void
5473 strlen_pass::handle_integral_assign (bool *cleanup_eh)
5475 gimple *stmt = gsi_stmt (m_gsi);
5476 tree lhs = gimple_assign_lhs (stmt);
5477 tree lhs_type = TREE_TYPE (lhs);
5479 enum tree_code code = gimple_assign_rhs_code (stmt);
5480 if (code == COND_EXPR)
5482 tree cond = gimple_assign_rhs1 (stmt);
5483 enum tree_code cond_code = TREE_CODE (cond);
5485 if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5486 fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5487 TREE_OPERAND (cond, 1), stmt);
5489 else if (code == EQ_EXPR || code == NE_EXPR)
5490 fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5491 gimple_assign_rhs2 (stmt), stmt);
5492 else if (gimple_assign_load_p (stmt)
5493 && TREE_CODE (lhs_type) == INTEGER_TYPE
5494 && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5495 && (TYPE_PRECISION (lhs_type)
5496 == TYPE_PRECISION (char_type_node))
5497 && !gimple_has_volatile_ops (stmt))
5499 tree off = integer_zero_node;
5500 unsigned HOST_WIDE_INT coff = 0;
5501 int idx = 0;
5502 tree rhs1 = gimple_assign_rhs1 (stmt);
5503 if (code == MEM_REF)
5505 idx = get_stridx (TREE_OPERAND (rhs1, 0), stmt);
5506 if (idx > 0)
5508 strinfo *si = get_strinfo (idx);
5509 if (si
5510 && si->nonzero_chars
5511 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5512 && (wi::to_widest (si->nonzero_chars)
5513 >= wi::to_widest (off)))
5514 off = TREE_OPERAND (rhs1, 1);
5515 else
5516 /* This case is not useful. See if get_addr_stridx
5517 returns something usable. */
5518 idx = 0;
5521 if (idx <= 0)
5522 idx = get_addr_stridx (rhs1, stmt, NULL_TREE, &coff);
5523 if (idx > 0)
5525 strinfo *si = get_strinfo (idx);
5526 if (si
5527 && si->nonzero_chars
5528 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5530 widest_int w1 = wi::to_widest (si->nonzero_chars);
5531 widest_int w2 = wi::to_widest (off) + coff;
5532 if (w1 == w2
5533 && si->full_string_p)
5535 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5537 fprintf (dump_file, "Optimizing: ");
5538 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5541 /* Reading the final '\0' character. */
5542 tree zero = build_int_cst (lhs_type, 0);
5543 gimple_set_vuse (stmt, NULL_TREE);
5544 gimple_assign_set_rhs_from_tree (&m_gsi, zero);
5545 *cleanup_eh
5546 |= maybe_clean_or_replace_eh_stmt (stmt,
5547 gsi_stmt (m_gsi));
5548 stmt = gsi_stmt (m_gsi);
5549 update_stmt (stmt);
5551 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5553 fprintf (dump_file, "into: ");
5554 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5557 else if (w1 > w2)
5559 /* Reading a character before the final '\0'
5560 character. Just set the value range to ~[0, 0]
5561 if we don't have anything better. */
5562 value_range r;
5563 if (!get_range_query (cfun)->range_of_expr (r, lhs)
5564 || r.varying_p ())
5566 r.set_nonzero (lhs_type);
5567 set_range_info (lhs, r);
5573 else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5575 if (int idx = new_stridx (lhs))
5577 /* Record multi-byte assignments from MEM_REFs. */
5578 bool storing_all_nonzero_p;
5579 bool storing_all_zeros_p;
5580 bool full_string_p;
5581 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5582 tree rhs = gimple_assign_rhs1 (stmt);
5583 const bool ranges_valid
5584 = count_nonzero_bytes (rhs, stmt,
5585 lenrange, &full_string_p,
5586 &storing_all_zeros_p,
5587 &storing_all_nonzero_p);
5588 if (ranges_valid)
5590 tree length = build_int_cst (sizetype, lenrange[0]);
5591 strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5592 set_strinfo (idx, si);
5593 si->writable = true;
5594 si->dont_invalidate = true;
5599 if (strlen_to_stridx)
5601 tree rhs1 = gimple_assign_rhs1 (stmt);
5602 if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5603 strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5607 /* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if
5608 the assignment stores all zero bytes. */
5610 bool
5611 strlen_pass::handle_assign (tree lhs, bool *zero_write)
5613 tree type = TREE_TYPE (lhs);
5614 if (TREE_CODE (type) == ARRAY_TYPE)
5615 type = TREE_TYPE (type);
5617 bool is_char_store = is_char_type (type);
5618 if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5620 /* To consider stores into char objects via integer types other
5621 than char but not those to non-character objects, determine
5622 the type of the destination rather than just the type of
5623 the access. */
5624 for (int i = 0; i != 2; ++i)
5626 tree ref = TREE_OPERAND (lhs, i);
5627 type = TREE_TYPE (ref);
5628 if (TREE_CODE (type) == POINTER_TYPE)
5629 type = TREE_TYPE (type);
5630 if (TREE_CODE (type) == ARRAY_TYPE)
5631 type = TREE_TYPE (type);
5632 if (is_char_type (type))
5634 is_char_store = true;
5635 break;
5640 /* Handle a single or multibyte assignment. */
5641 if (is_char_store && !handle_store (zero_write))
5642 return false;
5644 return true;
5648 /* Attempt to check for validity of the performed access a single statement
5649 at *GSI using string length knowledge, and to optimize it.
5650 If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5651 true. Return true to let the caller advance *GSI to the next statement
5652 in the basic block and false otherwise. */
5654 bool
5655 strlen_pass::check_and_optimize_stmt (bool *cleanup_eh)
5657 gimple *stmt = gsi_stmt (m_gsi);
5659 /* For statements that modify a string, set to true if the write
5660 is only zeros. */
5661 bool zero_write = false;
5663 if (is_gimple_call (stmt))
5665 if (!check_and_optimize_call (&zero_write))
5666 return false;
5668 else if (!flag_optimize_strlen || !strlen_optimize)
5669 return true;
5670 else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5672 /* Handle non-clobbering assignment. */
5673 tree lhs = gimple_assign_lhs (stmt);
5674 tree lhs_type = TREE_TYPE (lhs);
5676 if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5678 if (gimple_assign_single_p (stmt)
5679 || (gimple_assign_cast_p (stmt)
5680 && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5682 int idx = get_stridx (gimple_assign_rhs1 (stmt), stmt);
5683 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5685 else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5686 handle_pointer_plus ();
5688 else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5689 /* Handle assignment to a character. */
5690 handle_integral_assign (cleanup_eh);
5691 else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5692 if (!handle_assign (lhs, &zero_write))
5693 return false;
5695 else if (gcond *cond = dyn_cast<gcond *> (stmt))
5697 enum tree_code code = gimple_cond_code (cond);
5698 if (code == EQ_EXPR || code == NE_EXPR)
5699 fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5700 gimple_cond_rhs (stmt), stmt);
5703 if (gimple_vdef (stmt))
5704 maybe_invalidate (stmt, zero_write);
5705 return true;
5708 /* Recursively call maybe_invalidate on stmts that might be executed
5709 in between dombb and current bb and that contain a vdef. Stop when
5710 *count stmts are inspected, or if the whole strinfo vector has
5711 been invalidated. */
5713 static void
5714 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5716 unsigned int i, n = gimple_phi_num_args (phi);
5718 for (i = 0; i < n; i++)
5720 tree vuse = gimple_phi_arg_def (phi, i);
5721 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5722 basic_block bb = gimple_bb (stmt);
5723 if (bb == NULL
5724 || bb == dombb
5725 || !bitmap_set_bit (visited, bb->index)
5726 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5727 continue;
5728 while (1)
5730 if (gimple_code (stmt) == GIMPLE_PHI)
5732 do_invalidate (dombb, stmt, visited, count);
5733 if (*count == 0)
5734 return;
5735 break;
5737 if (--*count == 0)
5738 return;
5739 if (!maybe_invalidate (stmt))
5741 *count = 0;
5742 return;
5744 vuse = gimple_vuse (stmt);
5745 stmt = SSA_NAME_DEF_STMT (vuse);
5746 if (gimple_bb (stmt) != bb)
5748 bb = gimple_bb (stmt);
5749 if (bb == NULL
5750 || bb == dombb
5751 || !bitmap_set_bit (visited, bb->index)
5752 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5753 break;
5759 /* Release pointer_query cache. */
5761 strlen_pass::~strlen_pass ()
5763 ptr_qry.flush_cache ();
5766 /* Callback for walk_dominator_tree. Attempt to optimize various
5767 string ops by remembering string lengths pointed by pointer SSA_NAMEs. */
5769 edge
5770 strlen_pass::before_dom_children (basic_block bb)
5772 basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5774 if (dombb == NULL)
5775 stridx_to_strinfo = NULL;
5776 else
5778 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5779 if (stridx_to_strinfo)
5781 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5782 gsi_next (&gsi))
5784 gphi *phi = gsi.phi ();
5785 if (virtual_operand_p (gimple_phi_result (phi)))
5787 bitmap visited = BITMAP_ALLOC (NULL);
5788 int count_vdef = 100;
5789 do_invalidate (dombb, phi, visited, &count_vdef);
5790 BITMAP_FREE (visited);
5791 if (count_vdef == 0)
5793 /* If there were too many vdefs in between immediate
5794 dominator and current bb, invalidate everything.
5795 If stridx_to_strinfo has been unshared, we need
5796 to free it, otherwise just set it to NULL. */
5797 if (!strinfo_shared ())
5799 unsigned int i;
5800 strinfo *si;
5802 for (i = 1;
5803 vec_safe_iterate (stridx_to_strinfo, i, &si);
5804 ++i)
5806 free_strinfo (si);
5807 (*stridx_to_strinfo)[i] = NULL;
5810 else
5811 stridx_to_strinfo = NULL;
5813 break;
5819 /* If all PHI arguments have the same string index, the PHI result
5820 has it as well. */
5821 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5822 gsi_next (&gsi))
5824 gphi *phi = gsi.phi ();
5825 tree result = gimple_phi_result (phi);
5826 if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5828 int idx = get_stridx (gimple_phi_arg_def (phi, 0), phi);
5829 if (idx != 0)
5831 unsigned int i, n = gimple_phi_num_args (phi);
5832 for (i = 1; i < n; i++)
5833 if (idx != get_stridx (gimple_phi_arg_def (phi, i), phi))
5834 break;
5835 if (i == n)
5836 ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5841 bool cleanup_eh = false;
5843 /* Attempt to optimize individual statements. */
5844 for (m_gsi = gsi_start_bb (bb); !gsi_end_p (m_gsi); )
5846 /* Reset search depth performance counter. */
5847 ptr_qry.depth = 0;
5849 if (check_and_optimize_stmt (&cleanup_eh))
5850 gsi_next (&m_gsi);
5853 if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5854 m_cleanup_cfg = true;
5856 bb->aux = stridx_to_strinfo;
5857 if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5858 (*stridx_to_strinfo)[0] = (strinfo *) bb;
5859 return NULL;
5862 /* Callback for walk_dominator_tree. Free strinfo vector if it is
5863 owned by the current bb, clear bb->aux. */
5865 void
5866 strlen_pass::after_dom_children (basic_block bb)
5868 if (bb->aux)
5870 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5871 if (vec_safe_length (stridx_to_strinfo)
5872 && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5874 unsigned int i;
5875 strinfo *si;
5877 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5878 free_strinfo (si);
5879 vec_free (stridx_to_strinfo);
5881 bb->aux = NULL;
5885 namespace {
5887 static unsigned int
5888 printf_strlen_execute (function *fun, bool warn_only)
5890 strlen_optimize = !warn_only;
5892 calculate_dominance_info (CDI_DOMINATORS);
5893 loop_optimizer_init (LOOPS_NORMAL);
5894 scev_initialize ();
5896 gcc_assert (!strlen_to_stridx);
5897 if (warn_stringop_overflow || warn_stringop_truncation)
5898 strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5900 /* This has to happen after initializing the loop optimizer
5901 and initializing SCEV as they create new SSA_NAMEs. */
5902 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
5903 max_stridx = 1;
5905 /* String length optimization is implemented as a walk of the dominator
5906 tree and a forward walk of statements within each block. */
5907 strlen_pass walker (CDI_DOMINATORS);
5908 walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5910 if (dump_file && (dump_flags & TDF_DETAILS))
5911 walker.ptr_qry.dump (dump_file, true);
5913 ssa_ver_to_stridx.release ();
5914 strinfo_pool.release ();
5915 if (decl_to_stridxlist_htab)
5917 obstack_free (&stridx_obstack, NULL);
5918 delete decl_to_stridxlist_htab;
5919 decl_to_stridxlist_htab = NULL;
5921 laststmt.stmt = NULL;
5922 laststmt.len = NULL_TREE;
5923 laststmt.stridx = 0;
5925 if (strlen_to_stridx)
5927 strlen_to_stridx->empty ();
5928 delete strlen_to_stridx;
5929 strlen_to_stridx = NULL;
5932 scev_finalize ();
5933 loop_optimizer_finalize ();
5935 return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5938 /* This file defines two passes: one for warnings that runs only when
5939 optimization is disabled, and another that implements optimizations
5940 and also issues warnings. */
5942 const pass_data pass_data_warn_printf =
5944 GIMPLE_PASS, /* type */
5945 "warn-printf", /* name */
5946 OPTGROUP_NONE, /* optinfo_flags */
5947 TV_NONE, /* tv_id */
5948 /* Normally an optimization pass would require PROP_ssa but because
5949 this pass runs early, with no optimization, to do sprintf format
5950 checking, it only requires PROP_cfg. */
5951 PROP_cfg, /* properties_required */
5952 0, /* properties_provided */
5953 0, /* properties_destroyed */
5954 0, /* todo_flags_start */
5955 0, /* todo_flags_finish */
5958 class pass_warn_printf : public gimple_opt_pass
5960 public:
5961 pass_warn_printf (gcc::context *ctxt)
5962 : gimple_opt_pass (pass_data_warn_printf, ctxt)
5965 bool gate (function *) final override;
5966 unsigned int execute (function *fun) final override
5968 return printf_strlen_execute (fun, true);
5973 /* Return true to run the warning pass only when not optimizing and
5974 iff either -Wformat-overflow or -Wformat-truncation is specified. */
5976 bool
5977 pass_warn_printf::gate (function *)
5979 return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5982 const pass_data pass_data_strlen =
5984 GIMPLE_PASS, /* type */
5985 "strlen", /* name */
5986 OPTGROUP_NONE, /* optinfo_flags */
5987 TV_TREE_STRLEN, /* tv_id */
5988 PROP_cfg | PROP_ssa, /* properties_required */
5989 0, /* properties_provided */
5990 0, /* properties_destroyed */
5991 0, /* todo_flags_start */
5992 0, /* todo_flags_finish */
5995 class pass_strlen : public gimple_opt_pass
5997 public:
5998 pass_strlen (gcc::context *ctxt)
5999 : gimple_opt_pass (pass_data_strlen, ctxt)
6002 opt_pass * clone () final override { return new pass_strlen (m_ctxt); }
6004 bool gate (function *) final override;
6005 unsigned int execute (function *fun) final override
6007 return printf_strlen_execute (fun, false);
6011 /* Return true to run the pass only when the sprintf and/or strlen
6012 optimizations are enabled and -Wformat-overflow or -Wformat-truncation
6013 are specified. */
6015 bool
6016 pass_strlen::gate (function *)
6018 return ((warn_format_overflow > 0
6019 || warn_format_trunc > 0
6020 || warn_restrict > 0
6021 || flag_optimize_strlen > 0
6022 || flag_printf_return_value)
6023 && optimize > 0);
6026 } // anon namespace
6028 gimple_opt_pass *
6029 make_pass_warn_printf (gcc::context *ctxt)
6031 return new pass_warn_printf (ctxt);
6034 gimple_opt_pass *
6035 make_pass_strlen (gcc::context *ctxt)
6037 return new pass_strlen (ctxt);