Daily bump.
[official-gcc.git] / gcc / tree-ssa-strlen.c
blob2de7cb1a6a0555a4b7782f7c0cfc9599963094ae
1 /* String length optimization
2 Copyright (C) 2011-2021 Free Software Foundation, Inc.
3 Contributed by Jakub Jelinek <jakub@redhat.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "alloc-pool.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "gimple-ssa-warn-access.h"
34 #include "gimple-ssa-warn-restrict.h"
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-fold.h"
38 #include "tree-eh.h"
39 #include "gimplify.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
42 #include "expr.h"
43 #include "tree-cfg.h"
44 #include "tree-dfa.h"
45 #include "domwalk.h"
46 #include "tree-ssa-alias.h"
47 #include "tree-ssa-propagate.h"
48 #include "tree-ssa-strlen.h"
49 #include "tree-hash-traits.h"
50 #include "builtins.h"
51 #include "pointer-query.h"
52 #include "target.h"
53 #include "diagnostic-core.h"
54 #include "diagnostic.h"
55 #include "intl.h"
56 #include "attribs.h"
57 #include "calls.h"
58 #include "cfgloop.h"
59 #include "tree-ssa-loop.h"
60 #include "tree-scalar-evolution.h"
61 #include "vr-values.h"
62 #include "gimple-ssa-evrp-analyze.h"
63 #include "tree-ssa.h"
65 /* A vector indexed by SSA_NAME_VERSION. 0 means unknown, positive value
66 is an index into strinfo vector, negative value stands for
67 string length of a string literal (~strlen). */
68 static vec<int> ssa_ver_to_stridx;
70 /* Number of currently active string indexes plus one. */
71 static int max_stridx;
73 /* Set to true to optimize, false when just checking. */
74 static bool strlen_optimize;
76 /* String information record. */
77 struct strinfo
79 /* Number of leading characters that are known to be nonzero. This is
80 also the length of the string if FULL_STRING_P.
82 The values in a list of related string pointers must be consistent;
83 that is, if strinfo B comes X bytes after strinfo A, it must be
84 the case that A->nonzero_chars == X + B->nonzero_chars. */
85 tree nonzero_chars;
86 /* Any of the corresponding pointers for querying alias oracle. */
87 tree ptr;
88 /* STMT is used for two things:
90 - To record the statement that should be used for delayed length
91 computations. We maintain the invariant that all related strinfos
92 have delayed lengths or none do.
94 - To record the malloc or calloc call that produced this result
95 to optimize away malloc/memset sequences. STMT is reset after
96 a calloc-allocated object has been stored a non-zero value into. */
97 gimple *stmt;
98 /* Set to the dynamic allocation statement for the object (alloca,
99 calloc, malloc, or VLA). Unlike STMT, once set for a strinfo
100 object, ALLOC doesn't change. */
101 gimple *alloc;
102 /* Pointer to '\0' if known, if NULL, it can be computed as
103 ptr + length. */
104 tree endptr;
105 /* Reference count. Any changes to strinfo entry possibly shared
106 with dominating basic blocks need unshare_strinfo first, except
107 for dont_invalidate which affects only the immediately next
108 maybe_invalidate. */
109 int refcount;
110 /* Copy of index. get_strinfo (si->idx) should return si; */
111 int idx;
112 /* These 3 fields are for chaining related string pointers together.
113 E.g. for
114 bl = strlen (b); dl = strlen (d); strcpy (a, b); c = a + bl;
115 strcpy (c, d); e = c + dl;
116 strinfo(a) -> strinfo(c) -> strinfo(e)
117 All have ->first field equal to strinfo(a)->idx and are doubly
118 chained through prev/next fields. The later strinfos are required
119 to point into the same string with zero or more bytes after
120 the previous pointer and all bytes in between the two pointers
121 must be non-zero. Functions like strcpy or memcpy are supposed
122 to adjust all previous strinfo lengths, but not following strinfo
123 lengths (those are uncertain, usually invalidated during
124 maybe_invalidate, except when the alias oracle knows better).
125 Functions like strcat on the other side adjust the whole
126 related strinfo chain.
127 They are updated lazily, so to use the chain the same first fields
128 and si->prev->next == si->idx needs to be verified. */
129 int first;
130 int next;
131 int prev;
132 /* A flag whether the string is known to be written in the current
133 function. */
134 bool writable;
135 /* A flag for the next maybe_invalidate that this strinfo shouldn't
136 be invalidated. Always cleared by maybe_invalidate. */
137 bool dont_invalidate;
138 /* True if the string is known to be nul-terminated after NONZERO_CHARS
139 characters. False is useful when detecting strings that are built
140 up via successive memcpys. */
141 bool full_string_p;
144 /* Pool for allocating strinfo_struct entries. */
145 static object_allocator<strinfo> strinfo_pool ("strinfo pool");
147 /* Vector mapping positive string indexes to strinfo, for the
148 current basic block. The first pointer in the vector is special,
149 it is either NULL, meaning the vector isn't shared, or it is
150 a basic block pointer to the owner basic_block if shared.
151 If some other bb wants to modify the vector, the vector needs
152 to be unshared first, and only the owner bb is supposed to free it. */
153 static vec<strinfo *, va_heap, vl_embed> *stridx_to_strinfo;
155 /* One OFFSET->IDX mapping. */
156 struct stridxlist
158 struct stridxlist *next;
159 HOST_WIDE_INT offset;
160 int idx;
163 /* Hash table entry, mapping a DECL to a chain of OFFSET->IDX mappings. */
164 struct decl_stridxlist_map
166 struct tree_map_base base;
167 struct stridxlist list;
170 /* Hash table for mapping decls to a chained list of offset -> idx
171 mappings. */
172 typedef hash_map<tree_decl_hash, stridxlist> decl_to_stridxlist_htab_t;
173 static decl_to_stridxlist_htab_t *decl_to_stridxlist_htab;
175 /* Hash table mapping strlen (or strnlen with constant bound and return
176 smaller than bound) calls to stridx instances describing
177 the calls' arguments. Non-null only when warn_stringop_truncation
178 is non-zero. */
179 typedef std::pair<int, location_t> stridx_strlenloc;
180 static hash_map<tree, stridx_strlenloc> *strlen_to_stridx;
182 /* Obstack for struct stridxlist and struct decl_stridxlist_map. */
183 static struct obstack stridx_obstack;
185 /* Last memcpy statement if it could be adjusted if the trailing
186 '\0' written is immediately overwritten, or
187 *x = '\0' store that could be removed if it is immediately overwritten. */
188 struct laststmt_struct
190 gimple *stmt;
191 tree len;
192 int stridx;
193 } laststmt;
195 static int get_stridx_plus_constant (strinfo *, unsigned HOST_WIDE_INT, tree);
197 /* Sets MINMAX to either the constant value or the range VAL is in
198 and returns either the constant value or VAL on success or null
199 when the range couldn't be determined. Uses RVALS or CFUN for
200 range info, whichever is nonnull. */
202 tree
203 get_range (tree val, gimple *stmt, wide_int minmax[2],
204 range_query *rvals /* = NULL */)
206 if (!rvals)
208 if (!cfun)
209 /* When called from front ends for global initializers CFUN
210 may be null. */
211 return NULL_TREE;
213 rvals = get_range_query (cfun);
216 value_range vr;
217 if (!rvals->range_of_expr (vr, val, stmt))
218 return NULL_TREE;
220 value_range_kind rng = vr.kind ();
221 if (rng == VR_RANGE)
223 /* Only handle straight ranges. */
224 minmax[0] = wi::to_wide (vr.min ());
225 minmax[1] = wi::to_wide (vr.max ());
226 return val;
229 return NULL_TREE;
232 class strlen_pass : public dom_walker
234 public:
235 strlen_pass (cdi_direction direction)
236 : dom_walker (direction),
237 evrp (false),
238 ptr_qry (&evrp, &var_cache),
239 var_cache (),
240 m_cleanup_cfg (false)
244 ~strlen_pass ();
246 virtual edge before_dom_children (basic_block);
247 virtual void after_dom_children (basic_block);
249 bool check_and_optimize_stmt (bool *cleanup_eh);
250 bool check_and_optimize_call (bool *zero_write);
251 bool handle_assign (tree lhs, bool *zero_write);
252 bool handle_store (bool *zero_write);
253 void handle_pointer_plus ();
254 void handle_builtin_strlen ();
255 void handle_builtin_strchr ();
256 void handle_builtin_strcpy (built_in_function);
257 void handle_integral_assign (bool *cleanup_eh);
258 void handle_builtin_stxncpy_strncat (bool append_p);
259 void handle_builtin_memcpy (built_in_function bcode);
260 void handle_builtin_strcat (built_in_function bcode);
261 void handle_builtin_strncat (built_in_function);
262 bool handle_builtin_memset (bool *zero_write);
263 bool handle_builtin_memcmp ();
264 bool handle_builtin_string_cmp ();
265 void handle_alloc_call (built_in_function);
266 void maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
267 strinfo *si = NULL, bool plus_one = false,
268 bool rawmem = false);
269 void maybe_warn_overflow (gimple *stmt, bool call_lhs,
270 unsigned HOST_WIDE_INT len,
271 strinfo *si = NULL,
272 bool plus_one = false, bool rawmem = false);
273 void adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat);
274 tree strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
275 tree arg2, int idx2,
276 unsigned HOST_WIDE_INT bound,
277 unsigned HOST_WIDE_INT len[2],
278 unsigned HOST_WIDE_INT *psize);
279 bool count_nonzero_bytes (tree expr_or_type,
280 unsigned lenrange[3], bool *nulterm,
281 bool *allnul, bool *allnonnul);
282 bool count_nonzero_bytes (tree exp,
283 unsigned HOST_WIDE_INT offset,
284 unsigned HOST_WIDE_INT nbytes,
285 unsigned lenrange[3], bool *nulterm,
286 bool *allnul, bool *allnonnul,
287 ssa_name_limit_t &snlim);
288 bool count_nonzero_bytes_addr (tree exp,
289 unsigned HOST_WIDE_INT offset,
290 unsigned HOST_WIDE_INT nbytes,
291 unsigned lenrange[3], bool *nulterm,
292 bool *allnul, bool *allnonnul,
293 ssa_name_limit_t &snlim);
294 bool get_len_or_size (gimple *stmt, tree arg, int idx,
295 unsigned HOST_WIDE_INT lenrng[2],
296 unsigned HOST_WIDE_INT *size, bool *nulterm);
298 /* EVRP analyzer used for printf argument range processing, and to
299 track strlen results across integer variable assignments. */
300 evrp_range_analyzer evrp;
302 /* A pointer_query object and its cache to store information about
303 pointers and their targets in. */
304 pointer_query ptr_qry;
305 pointer_query::cache_type var_cache;
307 gimple_stmt_iterator m_gsi;
309 /* Flag that will trigger TODO_cleanup_cfg to be returned in strlen
310 execute function. */
311 bool m_cleanup_cfg;
314 /* Return:
316 * +1 if SI is known to start with more than OFF nonzero characters.
318 * 0 if SI is known to start with exactly OFF nonzero characters.
320 * -1 if SI either does not start with OFF nonzero characters
321 or the relationship between the number of leading nonzero
322 characters in SI and OFF is unknown. */
324 static int
325 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off)
327 if (si->nonzero_chars
328 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
329 return compare_tree_int (si->nonzero_chars, off);
330 else
331 return -1;
334 /* Same as above but suitable also for strings with non-constant lengths.
335 Uses RVALS to determine length range. */
337 static int
338 compare_nonzero_chars (strinfo *si, unsigned HOST_WIDE_INT off,
339 range_query *rvals)
341 if (!si->nonzero_chars)
342 return -1;
344 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
345 return compare_tree_int (si->nonzero_chars, off);
347 if (!rvals || TREE_CODE (si->nonzero_chars) != SSA_NAME)
348 return -1;
350 value_range vr;
351 if (!rvals->range_of_expr (vr, si->nonzero_chars, si->stmt))
352 return -1;
353 value_range_kind rng = vr.kind ();
354 if (rng != VR_RANGE)
355 return -1;
357 /* If the offset is less than the minimum length or if the bounds
358 of the length range are equal return the result of the comparison
359 same as in the constant case. Otherwise return a conservative
360 result. */
361 int cmpmin = compare_tree_int (vr.min (), off);
362 if (cmpmin > 0 || tree_int_cst_equal (vr.min (), vr.max ()))
363 return cmpmin;
365 return -1;
368 /* Return true if SI is known to be a zero-length string. */
370 static inline bool
371 zero_length_string_p (strinfo *si)
373 return si->full_string_p && integer_zerop (si->nonzero_chars);
376 /* Return strinfo vector entry IDX. */
378 static inline strinfo *
379 get_strinfo (int idx)
381 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
382 return NULL;
383 return (*stridx_to_strinfo)[idx];
386 /* Get the next strinfo in the chain after SI, or null if none. */
388 static inline strinfo *
389 get_next_strinfo (strinfo *si)
391 if (si->next == 0)
392 return NULL;
393 strinfo *nextsi = get_strinfo (si->next);
394 if (nextsi == NULL || nextsi->first != si->first || nextsi->prev != si->idx)
395 return NULL;
396 return nextsi;
399 /* Helper function for get_stridx. Return the strinfo index of the address
400 of EXP, which is available in PTR if nonnull. If OFFSET_OUT, it is
401 OK to return the index for some X <= &EXP and store &EXP - X in
402 *OFFSET_OUT. When RVALS is nonnull uses it to determine range
403 information. */
405 static int
406 get_addr_stridx (tree exp, tree ptr, unsigned HOST_WIDE_INT *offset_out,
407 range_query *rvals = NULL)
409 HOST_WIDE_INT off;
410 struct stridxlist *list, *last = NULL;
411 tree base;
413 if (!decl_to_stridxlist_htab)
414 return 0;
416 poly_int64 poff;
417 base = get_addr_base_and_unit_offset (exp, &poff);
418 if (base == NULL || !DECL_P (base) || !poff.is_constant (&off))
419 return 0;
421 list = decl_to_stridxlist_htab->get (base);
422 if (list == NULL)
423 return 0;
427 if (list->offset == off)
429 if (offset_out)
430 *offset_out = 0;
431 return list->idx;
433 if (list->offset > off)
434 return 0;
435 last = list;
436 list = list->next;
438 while (list);
440 if ((offset_out || ptr) && last && last->idx > 0)
442 unsigned HOST_WIDE_INT rel_off
443 = (unsigned HOST_WIDE_INT) off - last->offset;
444 strinfo *si = get_strinfo (last->idx);
445 if (si && compare_nonzero_chars (si, rel_off, rvals) >= 0)
447 if (offset_out)
449 *offset_out = rel_off;
450 return last->idx;
452 else
453 return get_stridx_plus_constant (si, rel_off, ptr);
456 return 0;
459 /* Returns string index for EXP. When EXP is an SSA_NAME that refers
460 to a known strinfo with an offset and OFFRNG is non-null, sets
461 both elements of the OFFRNG array to the range of the offset and
462 returns the index of the known strinfo. In this case the result
463 must not be used in for functions that modify the string.
464 When nonnull, uses RVALS to determine range information. */
466 static int
467 get_stridx (tree exp, wide_int offrng[2] = NULL, range_query *rvals = NULL)
469 if (offrng)
470 offrng[0] = offrng[1] = wi::zero (TYPE_PRECISION (ptrdiff_type_node));
472 if (TREE_CODE (exp) == SSA_NAME)
474 if (ssa_ver_to_stridx[SSA_NAME_VERSION (exp)])
475 return ssa_ver_to_stridx[SSA_NAME_VERSION (exp)];
477 tree e = exp;
478 int last_idx = 0;
479 HOST_WIDE_INT offset = 0;
480 /* Follow a chain of at most 5 assignments. */
481 for (int i = 0; i < 5; i++)
483 gimple *def_stmt = SSA_NAME_DEF_STMT (e);
484 if (!is_gimple_assign (def_stmt))
485 return last_idx;
487 tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
488 tree ptr, off;
490 if (rhs_code == ADDR_EXPR)
492 /* Handle indices/offsets into VLAs which are implemented
493 as pointers to arrays. */
494 ptr = gimple_assign_rhs1 (def_stmt);
495 ptr = TREE_OPERAND (ptr, 0);
497 /* Handle also VLAs of types larger than char. */
498 if (tree eltsize = TYPE_SIZE_UNIT (TREE_TYPE (ptr)))
500 if (TREE_CODE (ptr) == ARRAY_REF)
502 off = TREE_OPERAND (ptr, 1);
503 ptr = TREE_OPERAND (ptr, 0);
504 if (!integer_onep (eltsize))
506 /* Scale the array index by the size of the element
507 type in the rare case that it's greater than
508 the typical 1 for char, making sure both operands
509 have the same type. */
510 eltsize = fold_convert (ssizetype, eltsize);
511 off = fold_convert (ssizetype, off);
512 off = fold_build2 (MULT_EXPR, ssizetype, off, eltsize);
515 else
516 off = integer_zero_node;
518 else
519 return 0;
521 if (TREE_CODE (ptr) != MEM_REF)
522 return 0;
524 /* Add the MEM_REF byte offset. */
525 tree mem_off = TREE_OPERAND (ptr, 1);
526 off = fold_build2 (PLUS_EXPR, TREE_TYPE (off), off, mem_off);
527 ptr = TREE_OPERAND (ptr, 0);
529 else if (rhs_code == POINTER_PLUS_EXPR)
531 ptr = gimple_assign_rhs1 (def_stmt);
532 off = gimple_assign_rhs2 (def_stmt);
534 else
535 return 0;
537 if (TREE_CODE (ptr) != SSA_NAME)
538 return 0;
540 if (!tree_fits_shwi_p (off))
542 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
543 if (offrng)
545 /* Only when requested by setting OFFRNG to non-null,
546 return the index corresponding to the SSA_NAME.
547 Do this irrespective of the whether the offset
548 is known. */
549 if (get_range (off, def_stmt, offrng, rvals))
551 /* When the offset range is known, increment it
552 it by the constant offset computed in prior
553 iterations and store it in the OFFRNG array. */
554 offrng[0] += offset;
555 offrng[1] += offset;
557 else
559 /* When the offset range cannot be determined
560 store [0, SIZE_MAX] and let the caller decide
561 if the offset matters. */
562 offrng[1] = wi::to_wide (TYPE_MAX_VALUE (sizetype));
563 offrng[0] = wi::zero (offrng[1].get_precision ());
565 return idx;
567 return 0;
570 HOST_WIDE_INT this_off = tree_to_shwi (off);
571 if (offrng)
573 offrng[0] += wi::shwi (this_off, offrng->get_precision ());
574 offrng[1] += offrng[0];
577 if (this_off < 0)
578 return last_idx;
580 offset = (unsigned HOST_WIDE_INT) offset + this_off;
581 if (offset < 0)
582 return last_idx;
584 if (int idx = ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)])
586 strinfo *si = get_strinfo (idx);
587 if (si)
589 if (compare_nonzero_chars (si, offset) >= 0)
590 return get_stridx_plus_constant (si, offset, exp);
592 if (offrng)
593 last_idx = idx;
596 e = ptr;
599 return last_idx;
602 if (TREE_CODE (exp) == ADDR_EXPR)
604 int idx = get_addr_stridx (TREE_OPERAND (exp, 0), exp, NULL);
605 if (idx != 0)
606 return idx;
609 const char *p = c_getstr (exp);
610 if (p)
611 return ~(int) strlen (p);
613 return 0;
616 /* Return true if strinfo vector is shared with the immediate dominator. */
618 static inline bool
619 strinfo_shared (void)
621 return vec_safe_length (stridx_to_strinfo)
622 && (*stridx_to_strinfo)[0] != NULL;
625 /* Unshare strinfo vector that is shared with the immediate dominator. */
627 static void
628 unshare_strinfo_vec (void)
630 strinfo *si;
631 unsigned int i = 0;
633 gcc_assert (strinfo_shared ());
634 stridx_to_strinfo = vec_safe_copy (stridx_to_strinfo);
635 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
636 if (si != NULL)
637 si->refcount++;
638 (*stridx_to_strinfo)[0] = NULL;
641 /* Attempt to create a string index for exp, ADDR_EXPR's operand.
642 Return a pointer to the location where the string index can
643 be stored (if 0) or is stored, or NULL if this can't be tracked. */
645 static int *
646 addr_stridxptr (tree exp)
648 HOST_WIDE_INT off;
650 poly_int64 poff;
651 tree base = get_addr_base_and_unit_offset (exp, &poff);
652 if (base == NULL_TREE || !DECL_P (base) || !poff.is_constant (&off))
653 return NULL;
655 if (!decl_to_stridxlist_htab)
657 decl_to_stridxlist_htab
658 = new hash_map<tree_decl_hash, stridxlist> (64);
659 gcc_obstack_init (&stridx_obstack);
662 bool existed;
663 stridxlist *list = &decl_to_stridxlist_htab->get_or_insert (base, &existed);
664 if (existed)
666 int i;
667 stridxlist *before = NULL;
668 for (i = 0; i < 32; i++)
670 if (list->offset == off)
671 return &list->idx;
672 if (list->offset > off && before == NULL)
673 before = list;
674 if (list->next == NULL)
675 break;
676 list = list->next;
678 if (i == 32)
679 return NULL;
680 if (before)
682 list = before;
683 before = XOBNEW (&stridx_obstack, struct stridxlist);
684 *before = *list;
685 list->next = before;
686 list->offset = off;
687 list->idx = 0;
688 return &list->idx;
690 list->next = XOBNEW (&stridx_obstack, struct stridxlist);
691 list = list->next;
694 list->next = NULL;
695 list->offset = off;
696 list->idx = 0;
697 return &list->idx;
700 /* Create a new string index, or return 0 if reached limit. */
702 static int
703 new_stridx (tree exp)
705 int idx;
706 if (max_stridx >= param_max_tracked_strlens)
707 return 0;
708 if (TREE_CODE (exp) == SSA_NAME)
710 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp))
711 return 0;
712 idx = max_stridx++;
713 ssa_ver_to_stridx[SSA_NAME_VERSION (exp)] = idx;
714 return idx;
716 if (TREE_CODE (exp) == ADDR_EXPR)
718 int *pidx = addr_stridxptr (TREE_OPERAND (exp, 0));
719 if (pidx != NULL)
721 gcc_assert (*pidx == 0);
722 *pidx = max_stridx++;
723 return *pidx;
726 return 0;
729 /* Like new_stridx, but for ADDR_EXPR's operand instead. */
731 static int
732 new_addr_stridx (tree exp)
734 int *pidx;
735 if (max_stridx >= param_max_tracked_strlens)
736 return 0;
737 pidx = addr_stridxptr (exp);
738 if (pidx != NULL)
740 gcc_assert (*pidx == 0);
741 *pidx = max_stridx++;
742 return *pidx;
744 return 0;
747 /* Create a new strinfo. */
749 static strinfo *
750 new_strinfo (tree ptr, int idx, tree nonzero_chars, bool full_string_p)
752 strinfo *si = strinfo_pool.allocate ();
753 si->nonzero_chars = nonzero_chars;
754 STRIP_USELESS_TYPE_CONVERSION (ptr);
755 si->ptr = ptr;
756 si->stmt = NULL;
757 si->alloc = NULL;
758 si->endptr = NULL_TREE;
759 si->refcount = 1;
760 si->idx = idx;
761 si->first = 0;
762 si->prev = 0;
763 si->next = 0;
764 si->writable = false;
765 si->dont_invalidate = false;
766 si->full_string_p = full_string_p;
767 return si;
770 /* Decrease strinfo refcount and free it if not referenced anymore. */
772 static inline void
773 free_strinfo (strinfo *si)
775 if (si && --si->refcount == 0)
776 strinfo_pool.remove (si);
779 /* Set strinfo in the vector entry IDX to SI. */
781 static inline void
782 set_strinfo (int idx, strinfo *si)
784 if (vec_safe_length (stridx_to_strinfo) && (*stridx_to_strinfo)[0])
785 unshare_strinfo_vec ();
786 if (vec_safe_length (stridx_to_strinfo) <= (unsigned int) idx)
787 vec_safe_grow_cleared (stridx_to_strinfo, idx + 1, true);
788 (*stridx_to_strinfo)[idx] = si;
791 /* Return the first strinfo in the related strinfo chain
792 if all strinfos in between belong to the chain, otherwise NULL. */
794 static strinfo *
795 verify_related_strinfos (strinfo *origsi)
797 strinfo *si = origsi, *psi;
799 if (origsi->first == 0)
800 return NULL;
801 for (; si->prev; si = psi)
803 if (si->first != origsi->first)
804 return NULL;
805 psi = get_strinfo (si->prev);
806 if (psi == NULL)
807 return NULL;
808 if (psi->next != si->idx)
809 return NULL;
811 if (si->idx != si->first)
812 return NULL;
813 return si;
816 /* Set SI's endptr to ENDPTR and compute its length based on SI->ptr.
817 Use LOC for folding. */
819 static void
820 set_endptr_and_length (location_t loc, strinfo *si, tree endptr)
822 si->endptr = endptr;
823 si->stmt = NULL;
824 tree start_as_size = fold_convert_loc (loc, size_type_node, si->ptr);
825 tree end_as_size = fold_convert_loc (loc, size_type_node, endptr);
826 si->nonzero_chars = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
827 end_as_size, start_as_size);
828 si->full_string_p = true;
831 /* Return the string length, or NULL if it can't be computed.
832 The length may but need not be constant. Instead, it might be
833 the result of a strlen() call. */
835 static tree
836 get_string_length (strinfo *si)
838 /* If the length has already been computed return it if it's exact
839 (i.e., the string is nul-terminated at NONZERO_CHARS), or return
840 null if it isn't. */
841 if (si->nonzero_chars)
842 return si->full_string_p ? si->nonzero_chars : NULL;
844 /* If the string is the result of one of the built-in calls below
845 attempt to compute the length from the call statement. */
846 if (si->stmt)
848 gimple *stmt = si->stmt, *lenstmt;
849 tree callee, lhs, fn, tem;
850 location_t loc;
851 gimple_stmt_iterator gsi;
853 gcc_assert (is_gimple_call (stmt));
854 callee = gimple_call_fndecl (stmt);
855 gcc_assert (callee && fndecl_built_in_p (callee, BUILT_IN_NORMAL));
856 lhs = gimple_call_lhs (stmt);
857 /* unshare_strinfo is intentionally not called here. The (delayed)
858 transformation of strcpy or strcat into stpcpy is done at the place
859 of the former strcpy/strcat call and so can affect all the strinfos
860 with the same stmt. If they were unshared before and transformation
861 has been already done, the handling of BUILT_IN_STPCPY{,_CHK} should
862 just compute the right length. */
863 switch (DECL_FUNCTION_CODE (callee))
865 case BUILT_IN_STRCAT:
866 case BUILT_IN_STRCAT_CHK:
867 gsi = gsi_for_stmt (stmt);
868 fn = builtin_decl_implicit (BUILT_IN_STRLEN);
869 gcc_assert (lhs == NULL_TREE);
870 tem = unshare_expr (gimple_call_arg (stmt, 0));
871 lenstmt = gimple_build_call (fn, 1, tem);
872 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), lenstmt);
873 gimple_call_set_lhs (lenstmt, lhs);
874 gimple_set_vuse (lenstmt, gimple_vuse (stmt));
875 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
876 tem = gimple_call_arg (stmt, 0);
877 if (!ptrofftype_p (TREE_TYPE (lhs)))
879 lhs = convert_to_ptrofftype (lhs);
880 lhs = force_gimple_operand_gsi (&gsi, lhs, true, NULL_TREE,
881 true, GSI_SAME_STMT);
883 lenstmt = gimple_build_assign
884 (make_ssa_name (TREE_TYPE (gimple_call_arg (stmt, 0))),
885 POINTER_PLUS_EXPR,tem, lhs);
886 gsi_insert_before (&gsi, lenstmt, GSI_SAME_STMT);
887 gimple_call_set_arg (stmt, 0, gimple_assign_lhs (lenstmt));
888 lhs = NULL_TREE;
889 /* FALLTHRU */
890 case BUILT_IN_STRCPY:
891 case BUILT_IN_STRCPY_CHK:
892 gcc_assert (builtin_decl_implicit_p (BUILT_IN_STPCPY));
893 if (gimple_call_num_args (stmt) == 2)
894 fn = builtin_decl_implicit (BUILT_IN_STPCPY);
895 else
896 fn = builtin_decl_explicit (BUILT_IN_STPCPY_CHK);
897 gcc_assert (lhs == NULL_TREE);
898 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
900 fprintf (dump_file, "Optimizing: ");
901 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
903 gimple_call_set_fndecl (stmt, fn);
904 lhs = make_ssa_name (TREE_TYPE (TREE_TYPE (fn)), stmt);
905 gimple_call_set_lhs (stmt, lhs);
906 update_stmt (stmt);
907 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
909 fprintf (dump_file, "into: ");
910 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
912 /* FALLTHRU */
913 case BUILT_IN_STPCPY:
914 case BUILT_IN_STPCPY_CHK:
915 gcc_assert (lhs != NULL_TREE);
916 loc = gimple_location (stmt);
917 set_endptr_and_length (loc, si, lhs);
918 for (strinfo *chainsi = verify_related_strinfos (si);
919 chainsi != NULL;
920 chainsi = get_next_strinfo (chainsi))
921 if (chainsi->nonzero_chars == NULL)
922 set_endptr_and_length (loc, chainsi, lhs);
923 break;
924 case BUILT_IN_ALLOCA:
925 case BUILT_IN_ALLOCA_WITH_ALIGN:
926 case BUILT_IN_MALLOC:
927 break;
928 /* BUILT_IN_CALLOC always has si->nonzero_chars set. */
929 default:
930 gcc_unreachable ();
931 break;
935 return si->nonzero_chars;
938 /* Dump strlen data to FP for statement STMT. When non-null, RVALS
939 points to the valuation engine used to calculate ranges, and is
940 used to dump strlen range for non-constant results. */
942 DEBUG_FUNCTION void
943 dump_strlen_info (FILE *fp, gimple *stmt, range_query *rvals)
945 if (stmt)
947 fprintf (fp, "\nDumping strlen pass data after ");
948 print_gimple_expr (fp, stmt, TDF_LINENO);
949 fputc ('\n', fp);
951 else
952 fprintf (fp, "\nDumping strlen pass data\n");
954 fprintf (fp, "max_stridx = %i\n", max_stridx);
955 fprintf (fp, "ssa_ver_to_stridx has %u elements\n",
956 ssa_ver_to_stridx.length ());
957 fprintf (fp, "stridx_to_strinfo");
958 if (stridx_to_strinfo)
960 fprintf (fp, " has %u elements\n", stridx_to_strinfo->length ());
961 for (unsigned i = 0; i != stridx_to_strinfo->length (); ++i)
963 if (strinfo *si = (*stridx_to_strinfo)[i])
965 if (!si->idx)
966 continue;
967 fprintf (fp, " idx = %i", si->idx);
968 if (si->ptr)
970 fprintf (fp, ", ptr = ");
971 print_generic_expr (fp, si->ptr);
974 if (si->nonzero_chars)
976 fprintf (fp, ", nonzero_chars = ");
977 print_generic_expr (fp, si->nonzero_chars);
978 if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
980 value_range_kind rng = VR_UNDEFINED;
981 wide_int min, max;
982 if (rvals)
984 value_range vr;
985 rvals->range_of_expr (vr, si->nonzero_chars,
986 si->stmt);
987 rng = vr.kind ();
988 if (range_int_cst_p (&vr))
990 min = wi::to_wide (vr.min ());
991 max = wi::to_wide (vr.max ());
993 else
994 rng = VR_UNDEFINED;
996 else
998 value_range vr;
999 get_range_query (cfun)
1000 ->range_of_expr (vr, si->nonzero_chars);
1001 rng = vr.kind ();
1002 if (!vr.undefined_p ())
1004 min = wi::to_wide (vr.min ());
1005 max = wi::to_wide (vr.max ());
1009 if (rng == VR_RANGE || rng == VR_ANTI_RANGE)
1011 fprintf (fp, " %s[%llu, %llu]",
1012 rng == VR_RANGE ? "" : "~",
1013 (long long) min.to_uhwi (),
1014 (long long) max.to_uhwi ());
1019 fprintf (fp, ", refcount = %i", si->refcount);
1020 if (si->stmt)
1022 fprintf (fp, ", stmt = ");
1023 print_gimple_expr (fp, si->stmt, 0);
1025 if (si->alloc)
1027 fprintf (fp, ", alloc = ");
1028 print_gimple_expr (fp, si->alloc, 0);
1030 if (si->writable)
1031 fprintf (fp, ", writable");
1032 if (si->dont_invalidate)
1033 fprintf (fp, ", dont_invalidate");
1034 if (si->full_string_p)
1035 fprintf (fp, ", full_string_p");
1036 if (strinfo *next = get_next_strinfo (si))
1038 fprintf (fp, ", {");
1040 fprintf (fp, "%i%s", next->idx, next->first ? ", " : "");
1041 while ((next = get_next_strinfo (next)));
1042 fprintf (fp, "}");
1044 fputs ("\n", fp);
1048 else
1049 fprintf (fp, " = null\n");
1051 fprintf (fp, "decl_to_stridxlist_htab");
1052 if (decl_to_stridxlist_htab)
1054 fputs ("\n", fp);
1055 typedef decl_to_stridxlist_htab_t::iterator iter_t;
1056 for (iter_t it = decl_to_stridxlist_htab->begin ();
1057 it != decl_to_stridxlist_htab->end (); ++it)
1059 tree decl = (*it).first;
1060 stridxlist *list = &(*it).second;
1061 fprintf (fp, " decl = ");
1062 print_generic_expr (fp, decl);
1063 if (list)
1065 fprintf (fp, ", offsets = {");
1066 for (; list; list = list->next)
1067 fprintf (fp, "%lli%s", (long long) list->offset,
1068 list->next ? ", " : "");
1069 fputs ("}", fp);
1071 fputs ("\n", fp);
1074 else
1075 fprintf (fp, " = null\n");
1077 if (laststmt.stmt)
1079 fprintf (fp, "laststmt = ");
1080 print_gimple_expr (fp, laststmt.stmt, 0);
1081 fprintf (fp, ", len = ");
1082 print_generic_expr (fp, laststmt.len);
1083 fprintf (fp, ", stridx = %i\n", laststmt.stridx);
1087 /* Attempt to determine the length of the string SRC. On success, store
1088 the length in *PDATA and return true. Otherwise, return false.
1089 VISITED is a bitmap of visited PHI nodes. RVALS points to the valuation
1090 engine used to calculate ranges. PSSA_DEF_MAX to an SSA_NAME
1091 assignment limit used to prevent runaway recursion. */
1093 static bool
1094 get_range_strlen_dynamic (tree src, gimple *stmt,
1095 c_strlen_data *pdata, bitmap *visited,
1096 range_query *rvals, unsigned *pssa_def_max)
1098 int idx = get_stridx (src);
1099 if (!idx)
1101 if (TREE_CODE (src) == SSA_NAME)
1103 gimple *def_stmt = SSA_NAME_DEF_STMT (src);
1104 if (gimple_code (def_stmt) == GIMPLE_PHI)
1106 if (!*visited)
1107 *visited = BITMAP_ALLOC (NULL);
1109 if (!bitmap_set_bit (*visited, SSA_NAME_VERSION (src)))
1110 return true;
1112 if (*pssa_def_max == 0)
1113 return false;
1115 --*pssa_def_max;
1117 /* Iterate over the PHI arguments and determine the minimum
1118 and maximum length/size of each and incorporate them into
1119 the overall result. */
1120 gphi *phi = as_a <gphi *> (def_stmt);
1121 for (unsigned i = 0; i != gimple_phi_num_args (phi); ++i)
1123 tree arg = gimple_phi_arg_def (phi, i);
1124 if (arg == gimple_phi_result (def_stmt))
1125 continue;
1127 c_strlen_data argdata = { };
1128 if (get_range_strlen_dynamic (arg, phi, &argdata, visited,
1129 rvals, pssa_def_max))
1131 /* Set the DECL of an unterminated array this argument
1132 refers to if one hasn't been found yet. */
1133 if (!pdata->decl && argdata.decl)
1134 pdata->decl = argdata.decl;
1136 if (!argdata.minlen
1137 || (integer_zerop (argdata.minlen)
1138 && (!argdata.maxbound
1139 || integer_all_onesp (argdata.maxbound))
1140 && integer_all_onesp (argdata.maxlen)))
1142 /* Set the upper bound of the length to unbounded. */
1143 pdata->maxlen = build_all_ones_cst (size_type_node);
1144 continue;
1147 /* Adjust the minimum and maximum length determined
1148 so far and the upper bound on the array size. */
1149 if (!pdata->minlen
1150 || tree_int_cst_lt (argdata.minlen, pdata->minlen))
1151 pdata->minlen = argdata.minlen;
1152 if (!pdata->maxlen
1153 || (argdata.maxlen
1154 && tree_int_cst_lt (pdata->maxlen, argdata.maxlen)))
1155 pdata->maxlen = argdata.maxlen;
1156 if (!pdata->maxbound
1157 || TREE_CODE (pdata->maxbound) != INTEGER_CST
1158 || (argdata.maxbound
1159 && tree_int_cst_lt (pdata->maxbound,
1160 argdata.maxbound)
1161 && !integer_all_onesp (argdata.maxbound)))
1162 pdata->maxbound = argdata.maxbound;
1164 else
1165 pdata->maxlen = build_all_ones_cst (size_type_node);
1168 return true;
1172 /* Return success regardless of the result and handle *PDATA
1173 in the caller. */
1174 get_range_strlen (src, pdata, 1);
1175 return true;
1178 if (idx < 0)
1180 /* SRC is a string of constant length. */
1181 pdata->minlen = build_int_cst (size_type_node, ~idx);
1182 pdata->maxlen = pdata->minlen;
1183 pdata->maxbound = pdata->maxlen;
1184 return true;
1187 if (strinfo *si = get_strinfo (idx))
1189 pdata->minlen = get_string_length (si);
1190 if (!pdata->minlen && si->nonzero_chars)
1192 if (TREE_CODE (si->nonzero_chars) == INTEGER_CST)
1193 pdata->minlen = si->nonzero_chars;
1194 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
1196 value_range vr;
1197 rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
1198 if (range_int_cst_p (&vr))
1200 pdata->minlen = vr.min ();
1201 pdata->maxlen = vr.max ();
1203 else
1204 pdata->minlen = build_zero_cst (size_type_node);
1206 else
1207 pdata->minlen = build_zero_cst (size_type_node);
1209 tree base = si->ptr;
1210 if (TREE_CODE (base) == ADDR_EXPR)
1211 base = TREE_OPERAND (base, 0);
1213 HOST_WIDE_INT off;
1214 poly_int64 poff;
1215 base = get_addr_base_and_unit_offset (base, &poff);
1216 if (base
1217 && DECL_P (base)
1218 && TREE_CODE (TREE_TYPE (base)) == ARRAY_TYPE
1219 && TYPE_SIZE_UNIT (TREE_TYPE (base))
1220 && poff.is_constant (&off))
1222 tree basetype = TREE_TYPE (base);
1223 tree size = TYPE_SIZE_UNIT (basetype);
1224 if (TREE_CODE (size) == INTEGER_CST)
1226 ++off; /* Increment for the terminating nul. */
1227 tree toffset = build_int_cst (size_type_node, off);
1228 pdata->maxlen = fold_build2 (MINUS_EXPR, size_type_node, size,
1229 toffset);
1230 pdata->maxbound = pdata->maxlen;
1232 else
1233 pdata->maxlen = build_all_ones_cst (size_type_node);
1235 else
1236 pdata->maxlen = build_all_ones_cst (size_type_node);
1238 else if (pdata->minlen && TREE_CODE (pdata->minlen) == SSA_NAME)
1240 value_range vr;
1241 rvals->range_of_expr (vr, si->nonzero_chars, stmt);
1242 if (range_int_cst_p (&vr))
1244 pdata->minlen = vr.min ();
1245 pdata->maxlen = vr.max ();
1246 pdata->maxbound = pdata->maxlen;
1248 else
1250 pdata->minlen = build_zero_cst (size_type_node);
1251 pdata->maxlen = build_all_ones_cst (size_type_node);
1254 else if (pdata->minlen && TREE_CODE (pdata->minlen) == INTEGER_CST)
1256 pdata->maxlen = pdata->minlen;
1257 pdata->maxbound = pdata->minlen;
1259 else
1261 /* For PDATA->MINLEN that's a non-constant expression such
1262 as PLUS_EXPR whose value range is unknown, set the bounds
1263 to zero and SIZE_MAX. */
1264 pdata->minlen = build_zero_cst (size_type_node);
1265 pdata->maxlen = build_all_ones_cst (size_type_node);
1268 return true;
1271 return false;
1274 /* Analogous to get_range_strlen but for dynamically created strings,
1275 i.e., those created by calls to strcpy as opposed to just string
1276 constants.
1277 Try to obtain the range of the lengths of the string(s) referenced
1278 by SRC, or the size of the largest array SRC refers to if the range
1279 of lengths cannot be determined, and store all in *PDATA. RVALS
1280 points to the valuation engine used to calculate ranges. */
1282 void
1283 get_range_strlen_dynamic (tree src, gimple *stmt, c_strlen_data *pdata,
1284 range_query *rvals)
1286 bitmap visited = NULL;
1287 tree maxbound = pdata->maxbound;
1289 unsigned limit = param_ssa_name_def_chain_limit;
1290 if (!get_range_strlen_dynamic (src, stmt, pdata, &visited, rvals, &limit))
1292 /* On failure extend the length range to an impossible maximum
1293 (a valid MAXLEN must be less than PTRDIFF_MAX - 1). Other
1294 members can stay unchanged regardless. */
1295 pdata->minlen = ssize_int (0);
1296 pdata->maxlen = build_all_ones_cst (size_type_node);
1298 else if (!pdata->minlen)
1299 pdata->minlen = ssize_int (0);
1301 /* If it's unchanged from it initial non-null value, set the conservative
1302 MAXBOUND to SIZE_MAX. Otherwise leave it null (if it is null). */
1303 if (maxbound && pdata->maxbound == maxbound)
1304 pdata->maxbound = build_all_ones_cst (size_type_node);
1306 if (visited)
1307 BITMAP_FREE (visited);
1310 /* Invalidate string length information for strings whose length might
1311 change due to stores in STMT, except those marked DONT_INVALIDATE.
1312 For string-modifying statements, ZERO_WRITE is set when the statement
1313 wrote only zeros.
1314 Returns true if any STRIDX_TO_STRINFO entries were considered
1315 for invalidation. */
1317 static bool
1318 maybe_invalidate (gimple *stmt, bool zero_write = false)
1320 if (dump_file && (dump_flags & TDF_DETAILS))
1322 fprintf (dump_file, "%s called for ", __func__);
1323 print_gimple_stmt (dump_file, stmt, TDF_LINENO);
1326 strinfo *si;
1327 bool nonempty = false;
1329 for (unsigned i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
1331 if (si == NULL || !POINTER_TYPE_P (TREE_TYPE (si->ptr)))
1332 continue;
1334 nonempty = true;
1336 /* Unconditionally reset DONT_INVALIDATE. */
1337 bool dont_invalidate = si->dont_invalidate;
1338 si->dont_invalidate = false;
1340 if (dont_invalidate)
1341 continue;
1343 ao_ref r;
1344 tree size = si->nonzero_chars;
1345 ao_ref_init_from_ptr_and_size (&r, si->ptr, size);
1346 /* Include the terminating nul in the size of the string
1347 to consider when determining possible clobber. But do not
1348 add it to 'size' since we don't know whether it would
1349 actually fit the allocated area. */
1350 if (known_size_p (r.size))
1352 if (known_le (r.size, HOST_WIDE_INT_MAX - BITS_PER_UNIT))
1353 r.max_size += BITS_PER_UNIT;
1354 else
1355 r.max_size = -1;
1357 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1359 if (dump_file && (dump_flags & TDF_DETAILS))
1361 fputs (" statement may clobber object ", dump_file);
1362 print_generic_expr (dump_file, si->ptr);
1363 if (size && tree_fits_uhwi_p (size))
1364 fprintf (dump_file, " " HOST_WIDE_INT_PRINT_UNSIGNED
1365 " bytes in size", tree_to_uhwi (size));
1366 fputc ('\n', dump_file);
1369 set_strinfo (i, NULL);
1370 free_strinfo (si);
1371 continue;
1374 if (size
1375 && !zero_write
1376 && si->stmt
1377 && is_gimple_call (si->stmt)
1378 && (DECL_FUNCTION_CODE (gimple_call_fndecl (si->stmt))
1379 == BUILT_IN_CALLOC))
1381 /* If the clobber test above considered the length of
1382 the string (including the nul), then for (potentially)
1383 non-zero writes that might modify storage allocated by
1384 calloc consider the whole object and if it might be
1385 clobbered by the statement reset the statement. */
1386 ao_ref_init_from_ptr_and_size (&r, si->ptr, NULL_TREE);
1387 if (stmt_may_clobber_ref_p_1 (stmt, &r))
1388 si->stmt = NULL;
1392 if (dump_file && (dump_flags & TDF_DETAILS))
1393 fprintf (dump_file, "%s returns %i\n", __func__, nonempty);
1395 return nonempty;
1398 /* Unshare strinfo record SI, if it has refcount > 1 or
1399 if stridx_to_strinfo vector is shared with some other
1400 bbs. */
1402 static strinfo *
1403 unshare_strinfo (strinfo *si)
1405 strinfo *nsi;
1407 if (si->refcount == 1 && !strinfo_shared ())
1408 return si;
1410 nsi = new_strinfo (si->ptr, si->idx, si->nonzero_chars, si->full_string_p);
1411 nsi->stmt = si->stmt;
1412 nsi->alloc = si->alloc;
1413 nsi->endptr = si->endptr;
1414 nsi->first = si->first;
1415 nsi->prev = si->prev;
1416 nsi->next = si->next;
1417 nsi->writable = si->writable;
1418 set_strinfo (si->idx, nsi);
1419 free_strinfo (si);
1420 return nsi;
1423 /* Attempt to create a new strinfo for BASESI + OFF, or find existing
1424 strinfo if there is any. Return it's idx, or 0 if no strinfo has
1425 been created. */
1427 static int
1428 get_stridx_plus_constant (strinfo *basesi, unsigned HOST_WIDE_INT off,
1429 tree ptr)
1431 if (TREE_CODE (ptr) == SSA_NAME && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1432 return 0;
1434 if (compare_nonzero_chars (basesi, off) < 0
1435 || !tree_fits_uhwi_p (basesi->nonzero_chars))
1436 return 0;
1438 unsigned HOST_WIDE_INT nonzero_chars
1439 = tree_to_uhwi (basesi->nonzero_chars) - off;
1440 strinfo *si = basesi, *chainsi;
1441 if (si->first || si->prev || si->next)
1442 si = verify_related_strinfos (basesi);
1443 if (si == NULL
1444 || si->nonzero_chars == NULL_TREE
1445 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1446 return 0;
1448 if (TREE_CODE (ptr) == SSA_NAME
1449 && ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1450 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1452 gcc_checking_assert (compare_tree_int (si->nonzero_chars, off) != -1);
1453 for (chainsi = si; chainsi->next; chainsi = si)
1455 si = get_next_strinfo (chainsi);
1456 if (si == NULL
1457 || si->nonzero_chars == NULL_TREE
1458 || TREE_CODE (si->nonzero_chars) != INTEGER_CST)
1459 break;
1460 int r = compare_tree_int (si->nonzero_chars, nonzero_chars);
1461 if (r != 1)
1463 if (r == 0)
1465 if (TREE_CODE (ptr) == SSA_NAME)
1466 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = si->idx;
1467 else
1469 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1470 if (pidx != NULL && *pidx == 0)
1471 *pidx = si->idx;
1473 return si->idx;
1475 break;
1479 int idx = new_stridx (ptr);
1480 if (idx == 0)
1481 return 0;
1482 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, nonzero_chars),
1483 basesi->full_string_p);
1484 set_strinfo (idx, si);
1485 if (strinfo *nextsi = get_strinfo (chainsi->next))
1487 nextsi = unshare_strinfo (nextsi);
1488 si->next = nextsi->idx;
1489 nextsi->prev = idx;
1491 chainsi = unshare_strinfo (chainsi);
1492 if (chainsi->first == 0)
1493 chainsi->first = chainsi->idx;
1494 chainsi->next = idx;
1495 if (chainsi->endptr == NULL_TREE && zero_length_string_p (si))
1496 chainsi->endptr = ptr;
1497 si->endptr = chainsi->endptr;
1498 si->prev = chainsi->idx;
1499 si->first = chainsi->first;
1500 si->writable = chainsi->writable;
1501 return si->idx;
1504 /* Note that PTR, a pointer SSA_NAME initialized in the current stmt, points
1505 to a zero-length string and if possible chain it to a related strinfo
1506 chain whose part is or might be CHAINSI. */
1508 static strinfo *
1509 zero_length_string (tree ptr, strinfo *chainsi)
1511 strinfo *si;
1512 int idx;
1513 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1514 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1515 gcc_checking_assert (TREE_CODE (ptr) == SSA_NAME
1516 && ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] == 0);
1518 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ptr))
1519 return NULL;
1520 if (chainsi != NULL)
1522 si = verify_related_strinfos (chainsi);
1523 if (si)
1527 /* We shouldn't mix delayed and non-delayed lengths. */
1528 gcc_assert (si->full_string_p);
1529 if (si->endptr == NULL_TREE)
1531 si = unshare_strinfo (si);
1532 si->endptr = ptr;
1534 chainsi = si;
1535 si = get_next_strinfo (si);
1537 while (si != NULL);
1538 if (zero_length_string_p (chainsi))
1540 if (chainsi->next)
1542 chainsi = unshare_strinfo (chainsi);
1543 chainsi->next = 0;
1545 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = chainsi->idx;
1546 return chainsi;
1549 else
1551 /* We shouldn't mix delayed and non-delayed lengths. */
1552 gcc_assert (chainsi->full_string_p);
1553 if (chainsi->first || chainsi->prev || chainsi->next)
1555 chainsi = unshare_strinfo (chainsi);
1556 chainsi->first = 0;
1557 chainsi->prev = 0;
1558 chainsi->next = 0;
1562 idx = new_stridx (ptr);
1563 if (idx == 0)
1564 return NULL;
1565 si = new_strinfo (ptr, idx, build_int_cst (size_type_node, 0), true);
1566 set_strinfo (idx, si);
1567 si->endptr = ptr;
1568 if (chainsi != NULL)
1570 chainsi = unshare_strinfo (chainsi);
1571 if (chainsi->first == 0)
1572 chainsi->first = chainsi->idx;
1573 chainsi->next = idx;
1574 if (chainsi->endptr == NULL_TREE)
1575 chainsi->endptr = ptr;
1576 si->prev = chainsi->idx;
1577 si->first = chainsi->first;
1578 si->writable = chainsi->writable;
1580 return si;
1583 /* For strinfo ORIGSI whose length has been just updated, adjust other
1584 related strinfos so that they match the new ORIGSI. This involves:
1586 - adding ADJ to the nonzero_chars fields
1587 - copying full_string_p from the new ORIGSI. */
1589 static void
1590 adjust_related_strinfos (location_t loc, strinfo *origsi, tree adj)
1592 strinfo *si = verify_related_strinfos (origsi);
1594 if (si == NULL)
1595 return;
1597 while (1)
1599 strinfo *nsi;
1601 if (si != origsi)
1603 tree tem;
1605 si = unshare_strinfo (si);
1606 /* We shouldn't see delayed lengths here; the caller must
1607 have calculated the old length in order to calculate
1608 the adjustment. */
1609 gcc_assert (si->nonzero_chars);
1610 tem = fold_convert_loc (loc, TREE_TYPE (si->nonzero_chars), adj);
1611 si->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
1612 TREE_TYPE (si->nonzero_chars),
1613 si->nonzero_chars, tem);
1614 si->full_string_p = origsi->full_string_p;
1616 si->endptr = NULL_TREE;
1617 si->dont_invalidate = true;
1619 nsi = get_next_strinfo (si);
1620 if (nsi == NULL)
1621 return;
1622 si = nsi;
1626 /* Find if there are other SSA_NAME pointers equal to PTR
1627 for which we don't track their string lengths yet. If so, use
1628 IDX for them. */
1630 static void
1631 find_equal_ptrs (tree ptr, int idx)
1633 if (TREE_CODE (ptr) != SSA_NAME)
1634 return;
1635 while (1)
1637 gimple *stmt = SSA_NAME_DEF_STMT (ptr);
1638 if (!is_gimple_assign (stmt))
1639 return;
1640 ptr = gimple_assign_rhs1 (stmt);
1641 switch (gimple_assign_rhs_code (stmt))
1643 case SSA_NAME:
1644 break;
1645 CASE_CONVERT:
1646 if (!POINTER_TYPE_P (TREE_TYPE (ptr)))
1647 return;
1648 if (TREE_CODE (ptr) == SSA_NAME)
1649 break;
1650 if (TREE_CODE (ptr) != ADDR_EXPR)
1651 return;
1652 /* FALLTHRU */
1653 case ADDR_EXPR:
1655 int *pidx = addr_stridxptr (TREE_OPERAND (ptr, 0));
1656 if (pidx != NULL && *pidx == 0)
1657 *pidx = idx;
1658 return;
1660 default:
1661 return;
1664 /* We might find an endptr created in this pass. Grow the
1665 vector in that case. */
1666 if (ssa_ver_to_stridx.length () <= SSA_NAME_VERSION (ptr))
1667 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
1669 if (ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] != 0)
1670 return;
1671 ssa_ver_to_stridx[SSA_NAME_VERSION (ptr)] = idx;
1675 /* Return true if STMT is a call to a builtin function with the right
1676 arguments and attributes that should be considered for optimization
1677 by this pass. */
1679 static bool
1680 valid_builtin_call (gimple *stmt)
1682 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
1683 return false;
1685 tree callee = gimple_call_fndecl (stmt);
1686 tree decl = builtin_decl_explicit (DECL_FUNCTION_CODE (callee));
1687 if (decl
1688 && decl != callee
1689 && !gimple_builtin_call_types_compatible_p (stmt, decl))
1690 return false;
1692 switch (DECL_FUNCTION_CODE (callee))
1694 case BUILT_IN_MEMCMP:
1695 case BUILT_IN_MEMCMP_EQ:
1696 case BUILT_IN_STRCMP:
1697 case BUILT_IN_STRNCMP:
1698 case BUILT_IN_STRCHR:
1699 case BUILT_IN_STRLEN:
1700 case BUILT_IN_STRNLEN:
1701 /* The above functions should be pure. Punt if they aren't. */
1702 if (gimple_vdef (stmt) || gimple_vuse (stmt) == NULL_TREE)
1703 return false;
1704 break;
1706 case BUILT_IN_ALLOCA:
1707 case BUILT_IN_ALLOCA_WITH_ALIGN:
1708 case BUILT_IN_CALLOC:
1709 case BUILT_IN_MALLOC:
1710 case BUILT_IN_MEMCPY:
1711 case BUILT_IN_MEMCPY_CHK:
1712 case BUILT_IN_MEMPCPY:
1713 case BUILT_IN_MEMPCPY_CHK:
1714 case BUILT_IN_MEMSET:
1715 case BUILT_IN_STPCPY:
1716 case BUILT_IN_STPCPY_CHK:
1717 case BUILT_IN_STPNCPY:
1718 case BUILT_IN_STPNCPY_CHK:
1719 case BUILT_IN_STRCAT:
1720 case BUILT_IN_STRCAT_CHK:
1721 case BUILT_IN_STRCPY:
1722 case BUILT_IN_STRCPY_CHK:
1723 case BUILT_IN_STRNCAT:
1724 case BUILT_IN_STRNCAT_CHK:
1725 case BUILT_IN_STRNCPY:
1726 case BUILT_IN_STRNCPY_CHK:
1727 /* The above functions should be neither const nor pure. Punt if they
1728 aren't. */
1729 if (gimple_vdef (stmt) == NULL_TREE || gimple_vuse (stmt) == NULL_TREE)
1730 return false;
1731 break;
1733 default:
1734 break;
1737 return true;
1740 /* If the last .MEM setter statement before STMT is
1741 memcpy (x, y, strlen (y) + 1), the only .MEM use of it is STMT
1742 and STMT is known to overwrite x[strlen (x)], adjust the last memcpy to
1743 just memcpy (x, y, strlen (y)). SI must be the zero length
1744 strinfo. */
1746 void
1747 strlen_pass::adjust_last_stmt (strinfo *si, gimple *stmt, bool is_strcat)
1749 tree vuse, callee, len;
1750 struct laststmt_struct last = laststmt;
1751 strinfo *lastsi, *firstsi;
1752 unsigned len_arg_no = 2;
1754 laststmt.stmt = NULL;
1755 laststmt.len = NULL_TREE;
1756 laststmt.stridx = 0;
1758 if (last.stmt == NULL)
1759 return;
1761 vuse = gimple_vuse (stmt);
1762 if (vuse == NULL_TREE
1763 || SSA_NAME_DEF_STMT (vuse) != last.stmt
1764 || !has_single_use (vuse))
1765 return;
1767 gcc_assert (last.stridx > 0);
1768 lastsi = get_strinfo (last.stridx);
1769 if (lastsi == NULL)
1770 return;
1772 if (lastsi != si)
1774 if (lastsi->first == 0 || lastsi->first != si->first)
1775 return;
1777 firstsi = verify_related_strinfos (si);
1778 if (firstsi == NULL)
1779 return;
1780 while (firstsi != lastsi)
1782 firstsi = get_next_strinfo (firstsi);
1783 if (firstsi == NULL)
1784 return;
1788 if (!is_strcat && !zero_length_string_p (si))
1789 return;
1791 if (is_gimple_assign (last.stmt))
1793 gimple_stmt_iterator gsi;
1795 if (!integer_zerop (gimple_assign_rhs1 (last.stmt)))
1796 return;
1797 if (stmt_could_throw_p (cfun, last.stmt))
1798 return;
1799 gsi = gsi_for_stmt (last.stmt);
1800 unlink_stmt_vdef (last.stmt);
1801 release_defs (last.stmt);
1802 gsi_remove (&gsi, true);
1803 return;
1806 if (!valid_builtin_call (last.stmt))
1807 return;
1809 callee = gimple_call_fndecl (last.stmt);
1810 switch (DECL_FUNCTION_CODE (callee))
1812 case BUILT_IN_MEMCPY:
1813 case BUILT_IN_MEMCPY_CHK:
1814 break;
1815 default:
1816 return;
1819 len = gimple_call_arg (last.stmt, len_arg_no);
1820 if (tree_fits_uhwi_p (len))
1822 if (!tree_fits_uhwi_p (last.len)
1823 || integer_zerop (len)
1824 || tree_to_uhwi (len) != tree_to_uhwi (last.len) + 1)
1825 return;
1826 /* Don't adjust the length if it is divisible by 4, it is more efficient
1827 to store the extra '\0' in that case. */
1828 if ((tree_to_uhwi (len) & 3) == 0)
1829 return;
1831 /* Don't fold away an out of bounds access, as this defeats proper
1832 warnings. */
1833 tree dst = gimple_call_arg (last.stmt, 0);
1835 access_ref aref;
1836 tree size = compute_objsize (dst, stmt, 1, &aref, &ptr_qry);
1837 if (size && tree_int_cst_lt (size, len))
1838 return;
1840 else if (TREE_CODE (len) == SSA_NAME)
1842 gimple *def_stmt = SSA_NAME_DEF_STMT (len);
1843 if (!is_gimple_assign (def_stmt)
1844 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
1845 || gimple_assign_rhs1 (def_stmt) != last.len
1846 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
1847 return;
1849 else
1850 return;
1852 gimple_call_set_arg (last.stmt, len_arg_no, last.len);
1853 update_stmt (last.stmt);
1856 /* For an LHS that is an SSA_NAME that is the result of a strlen()
1857 call, or when BOUND is non-null, of a strnlen() call, set LHS
1858 range info to [0, min (MAX, BOUND)] when the range includes more
1859 than one value and return LHS. Otherwise, when the range
1860 [MIN, MAX] is such that MIN == MAX, return the tree representation
1861 of (MIN). The latter allows callers to fold suitable strnlen() calls
1862 to constants. */
1864 tree
1865 set_strlen_range (tree lhs, wide_int min, wide_int max,
1866 tree bound /* = NULL_TREE */)
1868 if (TREE_CODE (lhs) != SSA_NAME
1869 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1870 return NULL_TREE;
1872 if (bound)
1874 /* For strnlen, adjust MIN and MAX as necessary. If the bound
1875 is less than the size of the array set MAX to it. It it's
1876 greater than MAX and MAX is non-zero bump MAX down to account
1877 for the necessary terminating nul. Otherwise leave it alone. */
1878 if (TREE_CODE (bound) == INTEGER_CST)
1880 wide_int wibnd = wi::to_wide (bound);
1881 int cmp = wi::cmpu (wibnd, max);
1882 if (cmp < 0)
1883 max = wibnd;
1884 else if (cmp && wi::ne_p (max, min))
1885 --max;
1887 else if (TREE_CODE (bound) == SSA_NAME)
1889 value_range r;
1890 get_range_query (cfun)->range_of_expr (r, bound);
1891 if (!r.undefined_p ())
1893 /* For a bound in a known range, adjust the range determined
1894 above as necessary. For a bound in some anti-range or
1895 in an unknown range, use the range determined by callers. */
1896 if (wi::ltu_p (r.lower_bound (), min))
1897 min = r.lower_bound ();
1898 if (wi::ltu_p (r.upper_bound (), max))
1899 max = r.upper_bound ();
1904 if (min == max)
1905 return wide_int_to_tree (size_type_node, min);
1907 set_range_info (lhs, VR_RANGE, min, max);
1908 return lhs;
1911 /* For an LHS that is an SSA_NAME and for strlen() or strnlen() argument
1912 SRC, set LHS range info to [0, min (N, BOUND)] if SRC refers to
1913 a character array A[N] with unknown length bounded by N, and for
1914 strnlen(), by min (N, BOUND). */
1916 static tree
1917 maybe_set_strlen_range (tree lhs, tree src, tree bound)
1919 if (TREE_CODE (lhs) != SSA_NAME
1920 || !INTEGRAL_TYPE_P (TREE_TYPE (lhs)))
1921 return NULL_TREE;
1923 if (TREE_CODE (src) == SSA_NAME)
1925 gimple *def = SSA_NAME_DEF_STMT (src);
1926 if (is_gimple_assign (def)
1927 && gimple_assign_rhs_code (def) == ADDR_EXPR)
1928 src = gimple_assign_rhs1 (def);
1931 /* The longest string is PTRDIFF_MAX - 1 bytes including the final
1932 NUL so that the difference between a pointer to just past it and
1933 one to its beginning is positive. */
1934 wide_int max = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
1936 if (TREE_CODE (src) == ADDR_EXPR)
1938 /* The last array member of a struct can be bigger than its size
1939 suggests if it's treated as a poor-man's flexible array member. */
1940 src = TREE_OPERAND (src, 0);
1941 if (TREE_CODE (src) != MEM_REF
1942 && !array_at_struct_end_p (src))
1944 tree type = TREE_TYPE (src);
1945 tree size = TYPE_SIZE_UNIT (type);
1946 if (size
1947 && TREE_CODE (size) == INTEGER_CST
1948 && !integer_zerop (size))
1950 /* Even though such uses of strlen would be undefined,
1951 avoid relying on arrays of arrays in case some genius
1952 decides to call strlen on an unterminated array element
1953 that's followed by a terminated one. Likewise, avoid
1954 assuming that a struct array member is necessarily
1955 nul-terminated (the nul may be in the member that
1956 follows). In those cases, assume that the length
1957 of the string stored in such an array is bounded
1958 by the size of the enclosing object if one can be
1959 determined. */
1960 tree base = get_base_address (src);
1961 if (VAR_P (base))
1963 if (tree size = DECL_SIZE_UNIT (base))
1964 if (size
1965 && TREE_CODE (size) == INTEGER_CST
1966 && TREE_CODE (TREE_TYPE (base)) != POINTER_TYPE)
1967 max = wi::to_wide (size);
1971 /* For strlen() the upper bound above is equal to
1972 the longest string that can be stored in the array
1973 (i.e., it accounts for the terminating nul. For
1974 strnlen() bump up the maximum by one since the array
1975 need not be nul-terminated. */
1976 if (!bound && max != 0)
1977 --max;
1981 wide_int min = wi::zero (max.get_precision ());
1982 return set_strlen_range (lhs, min, max, bound);
1985 /* Diagnose buffer overflow by a STMT writing LEN + PLUS_ONE bytes,
1986 either into a region allocated for the object SI when non-null,
1987 or into an object designated by the LHS of STMT otherwise.
1988 For a call STMT, when CALL_LHS is set use its left hand side
1989 as the destination, otherwise use argument zero.
1990 When nonnull uses RVALS to determine range information.
1991 RAWMEM may be set by memcpy and other raw memory functions
1992 to allow accesses across subobject boundaries. */
1994 void
1995 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs, tree len,
1996 strinfo *si, bool plus_one, bool rawmem)
1998 if (!len || warning_suppressed_p (stmt, OPT_Wstringop_overflow_))
1999 return;
2001 /* The DECL of the function performing the write if it is done
2002 by one. */
2003 tree writefn = NULL_TREE;
2004 /* The destination expression involved in the store or call STMT. */
2005 tree dest = NULL_TREE;
2007 if (is_gimple_assign (stmt))
2008 dest = gimple_assign_lhs (stmt);
2009 else if (is_gimple_call (stmt))
2011 if (call_lhs)
2012 dest = gimple_call_lhs (stmt);
2013 else
2015 gcc_assert (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL));
2016 dest = gimple_call_arg (stmt, 0);
2019 if (!dest)
2020 return;
2021 writefn = gimple_call_fndecl (stmt);
2023 else
2024 return;
2026 if (warning_suppressed_p (dest, OPT_Wstringop_overflow_))
2027 return;
2029 const int ostype = rawmem ? 0 : 1;
2031 /* Use maximum precision to avoid overflow in the addition below.
2032 Make sure all operands have the same precision to keep wide_int
2033 from ICE'ing. */
2035 access_ref aref;
2036 /* The size of the destination region (which is smaller than
2037 the destination object for stores at a non-zero offset). */
2038 tree destsize = compute_objsize (dest, stmt, ostype, &aref, &ptr_qry);
2040 if (!destsize)
2042 aref.sizrng[0] = 0;
2043 aref.sizrng[1] = wi::to_offset (max_object_size ());
2046 /* Return early if the DESTSIZE size expression is the same as LEN
2047 and the offset into the destination is zero. This might happen
2048 in the case of a pair of malloc and memset calls to allocate
2049 an object and clear it as if by calloc. */
2050 if (destsize == len && !plus_one
2051 && aref.offrng[0] == 0 && aref.offrng[0] == aref.offrng[1])
2052 return;
2054 wide_int rng[2];
2055 if (!get_range (len, stmt, rng, ptr_qry.rvals))
2056 return;
2058 widest_int lenrng[2] =
2059 { widest_int::from (rng[0], SIGNED), widest_int::from (rng[1], SIGNED) };
2061 if (plus_one)
2063 lenrng[0] += 1;
2064 lenrng[1] += 1;
2067 /* The size of the remaining space in the destination computed
2068 as the size of the latter minus the offset into it. */
2069 widest_int spcrng[2];
2071 offset_int remrng[2];
2072 remrng[1] = aref.size_remaining (remrng);
2073 spcrng[0] = remrng[0] == -1 ? 0 : widest_int::from (remrng[0], UNSIGNED);
2074 spcrng[1] = widest_int::from (remrng[1], UNSIGNED);
2077 if (wi::leu_p (lenrng[0], spcrng[0])
2078 && wi::leu_p (lenrng[1], spcrng[1]))
2079 return;
2081 location_t loc = gimple_or_expr_nonartificial_location (stmt, dest);
2082 bool warned = false;
2083 if (wi::leu_p (lenrng[0], spcrng[1]))
2085 if (len != destsize
2086 && (!si || rawmem || !is_strlen_related_p (si->ptr, len)))
2087 return;
2089 warned = (writefn
2090 ? warning_at (loc, OPT_Wstringop_overflow_,
2091 "%qD writing one too many bytes into a region "
2092 "of a size that depends on %<strlen%>",
2093 writefn)
2094 : warning_at (loc, OPT_Wstringop_overflow_,
2095 "writing one too many bytes into a region "
2096 "of a size that depends on %<strlen%>"));
2098 else if (lenrng[0] == lenrng[1])
2100 if (spcrng[0] == spcrng[1])
2101 warned = (writefn
2102 ? warning_n (loc, OPT_Wstringop_overflow_,
2103 lenrng[0].to_uhwi (),
2104 "%qD writing %wu byte into a region "
2105 "of size %wu",
2106 "%qD writing %wu bytes into a region "
2107 "of size %wu",
2108 writefn, lenrng[0].to_uhwi (),
2109 spcrng[0].to_uhwi ())
2110 : warning_n (loc, OPT_Wstringop_overflow_,
2111 lenrng[0].to_uhwi (),
2112 "writing %wu byte into a region "
2113 "of size %wu",
2114 "writing %wu bytes into a region "
2115 "of size %wu",
2116 lenrng[0].to_uhwi (),
2117 spcrng[0].to_uhwi ()));
2118 else
2119 warned = (writefn
2120 ? warning_n (loc, OPT_Wstringop_overflow_,
2121 lenrng[0].to_uhwi (),
2122 "%qD writing %wu byte into a region "
2123 "of size between %wu and %wu",
2124 "%qD writing %wu bytes into a region "
2125 "of size between %wu and %wu",
2126 writefn, lenrng[0].to_uhwi (),
2127 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2128 : warning_n (loc, OPT_Wstringop_overflow_,
2129 lenrng[0].to_uhwi (),
2130 "writing %wu byte into a region "
2131 "of size between %wu and %wu",
2132 "writing %wu bytes into a region "
2133 "of size between %wu and %wu",
2134 lenrng[0].to_uhwi (),
2135 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2137 else if (spcrng[0] == spcrng[1])
2138 warned = (writefn
2139 ? warning_at (loc, OPT_Wstringop_overflow_,
2140 "%qD writing between %wu and %wu bytes "
2141 "into a region of size %wu",
2142 writefn, lenrng[0].to_uhwi (),
2143 lenrng[1].to_uhwi (),
2144 spcrng[0].to_uhwi ())
2145 : warning_at (loc, OPT_Wstringop_overflow_,
2146 "writing between %wu and %wu bytes "
2147 "into a region of size %wu",
2148 lenrng[0].to_uhwi (),
2149 lenrng[1].to_uhwi (),
2150 spcrng[0].to_uhwi ()));
2151 else
2152 warned = (writefn
2153 ? warning_at (loc, OPT_Wstringop_overflow_,
2154 "%qD writing between %wu and %wu bytes "
2155 "into a region of size between %wu and %wu",
2156 writefn, lenrng[0].to_uhwi (),
2157 lenrng[1].to_uhwi (),
2158 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ())
2159 : warning_at (loc, OPT_Wstringop_overflow_,
2160 "writing between %wu and %wu bytes "
2161 "into a region of size between %wu and %wu",
2162 lenrng[0].to_uhwi (),
2163 lenrng[1].to_uhwi (),
2164 spcrng[0].to_uhwi (), spcrng[1].to_uhwi ()));
2166 if (!warned)
2167 return;
2169 suppress_warning (stmt, OPT_Wstringop_overflow_);
2171 aref.inform_access (access_write_only);
2174 /* Convenience wrapper for the above. */
2176 void
2177 strlen_pass::maybe_warn_overflow (gimple *stmt, bool call_lhs,
2178 unsigned HOST_WIDE_INT len,
2179 strinfo *si, bool plus_one, bool rawmem)
2181 tree tlen = build_int_cst (size_type_node, len);
2182 maybe_warn_overflow (stmt, call_lhs, tlen, si, plus_one, rawmem);
2185 /* Handle a strlen call. If strlen of the argument is known, replace
2186 the strlen call with the known value, otherwise remember that strlen
2187 of the argument is stored in the lhs SSA_NAME. */
2189 void
2190 strlen_pass::handle_builtin_strlen ()
2192 gimple *stmt = gsi_stmt (m_gsi);
2193 tree lhs = gimple_call_lhs (stmt);
2195 if (lhs == NULL_TREE)
2196 return;
2198 location_t loc = gimple_location (stmt);
2199 tree callee = gimple_call_fndecl (stmt);
2200 tree src = gimple_call_arg (stmt, 0);
2201 tree bound = (DECL_FUNCTION_CODE (callee) == BUILT_IN_STRNLEN
2202 ? gimple_call_arg (stmt, 1) : NULL_TREE);
2203 int idx = get_stridx (src);
2204 if (idx || (bound && integer_zerop (bound)))
2206 strinfo *si = NULL;
2207 tree rhs;
2209 if (idx < 0)
2210 rhs = build_int_cst (TREE_TYPE (lhs), ~idx);
2211 else if (idx == 0)
2212 rhs = bound;
2213 else
2215 rhs = NULL_TREE;
2216 si = get_strinfo (idx);
2217 if (si != NULL)
2219 rhs = get_string_length (si);
2220 /* For strnlen, if bound is constant, even if si is not known
2221 to be zero terminated, if we know at least bound bytes are
2222 not zero, the return value will be bound. */
2223 if (rhs == NULL_TREE
2224 && bound != NULL_TREE
2225 && TREE_CODE (bound) == INTEGER_CST
2226 && si->nonzero_chars != NULL_TREE
2227 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
2228 && tree_int_cst_le (bound, si->nonzero_chars))
2229 rhs = bound;
2232 if (rhs != NULL_TREE)
2234 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2236 fprintf (dump_file, "Optimizing: ");
2237 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2239 rhs = unshare_expr (rhs);
2240 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2241 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2243 if (bound)
2244 rhs = fold_build2_loc (loc, MIN_EXPR, TREE_TYPE (rhs), rhs, bound);
2246 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2247 stmt = gsi_stmt (m_gsi);
2248 update_stmt (stmt);
2249 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2251 fprintf (dump_file, "into: ");
2252 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2255 if (si != NULL
2256 /* Don't update anything for strnlen. */
2257 && bound == NULL_TREE
2258 && TREE_CODE (si->nonzero_chars) != SSA_NAME
2259 && TREE_CODE (si->nonzero_chars) != INTEGER_CST
2260 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2262 si = unshare_strinfo (si);
2263 si->nonzero_chars = lhs;
2264 gcc_assert (si->full_string_p);
2267 if (strlen_to_stridx
2268 && (bound == NULL_TREE
2269 /* For strnlen record this only if the call is proven
2270 to return the same value as strlen would. */
2271 || (TREE_CODE (bound) == INTEGER_CST
2272 && TREE_CODE (rhs) == INTEGER_CST
2273 && tree_int_cst_lt (rhs, bound))))
2274 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2276 return;
2279 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2280 return;
2282 if (idx == 0)
2283 idx = new_stridx (src);
2284 else
2286 strinfo *si = get_strinfo (idx);
2287 if (si != NULL)
2289 if (!si->full_string_p && !si->stmt)
2291 /* Until now we only had a lower bound on the string length.
2292 Install LHS as the actual length. */
2293 si = unshare_strinfo (si);
2294 tree old = si->nonzero_chars;
2295 si->nonzero_chars = lhs;
2296 si->full_string_p = true;
2297 if (old && TREE_CODE (old) == INTEGER_CST)
2299 old = fold_convert_loc (loc, TREE_TYPE (lhs), old);
2300 tree adj = fold_build2_loc (loc, MINUS_EXPR,
2301 TREE_TYPE (lhs), lhs, old);
2302 adjust_related_strinfos (loc, si, adj);
2303 /* Use the constant minimum length as the lower bound
2304 of the non-constant length. */
2305 wide_int min = wi::to_wide (old);
2306 wide_int max
2307 = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node)) - 2;
2308 set_strlen_range (lhs, min, max);
2310 else
2312 si->first = 0;
2313 si->prev = 0;
2314 si->next = 0;
2317 return;
2320 if (idx)
2322 if (!bound)
2324 /* Only store the new length information for calls to strlen(),
2325 not for those to strnlen(). */
2326 strinfo *si = new_strinfo (src, idx, lhs, true);
2327 set_strinfo (idx, si);
2328 find_equal_ptrs (src, idx);
2331 /* For SRC that is an array of N elements, set LHS's range
2332 to [0, min (N, BOUND)]. A constant return value means
2333 the range would have consisted of a single value. In
2334 that case, fold the result into the returned constant. */
2335 if (tree ret = maybe_set_strlen_range (lhs, src, bound))
2336 if (TREE_CODE (ret) == INTEGER_CST)
2338 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2340 fprintf (dump_file, "Optimizing: ");
2341 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2343 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (ret)))
2344 ret = fold_convert_loc (loc, TREE_TYPE (lhs), ret);
2345 gimplify_and_update_call_from_tree (&m_gsi, ret);
2346 stmt = gsi_stmt (m_gsi);
2347 update_stmt (stmt);
2348 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2350 fprintf (dump_file, "into: ");
2351 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2355 if (strlen_to_stridx && !bound)
2356 strlen_to_stridx->put (lhs, stridx_strlenloc (idx, loc));
2360 /* Handle a strchr call. If strlen of the first argument is known, replace
2361 the strchr (x, 0) call with the endptr or x + strlen, otherwise remember
2362 that lhs of the call is endptr and strlen of the argument is endptr - x. */
2364 void
2365 strlen_pass::handle_builtin_strchr ()
2367 gimple *stmt = gsi_stmt (m_gsi);
2368 tree lhs = gimple_call_lhs (stmt);
2370 if (lhs == NULL_TREE)
2371 return;
2373 if (!integer_zerop (gimple_call_arg (stmt, 1)))
2374 return;
2376 tree src = gimple_call_arg (stmt, 0);
2378 /* Avoid folding if the first argument is not a nul-terminated array.
2379 Defer warning until later. */
2380 if (!check_nul_terminated_array (NULL_TREE, src))
2381 return;
2383 int idx = get_stridx (src);
2384 if (idx)
2386 strinfo *si = NULL;
2387 tree rhs;
2389 if (idx < 0)
2390 rhs = build_int_cst (size_type_node, ~idx);
2391 else
2393 rhs = NULL_TREE;
2394 si = get_strinfo (idx);
2395 if (si != NULL)
2396 rhs = get_string_length (si);
2398 if (rhs != NULL_TREE)
2400 location_t loc = gimple_location (stmt);
2402 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2404 fprintf (dump_file, "Optimizing: ");
2405 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2407 if (si != NULL && si->endptr != NULL_TREE)
2409 rhs = unshare_expr (si->endptr);
2410 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2411 TREE_TYPE (rhs)))
2412 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2414 else
2416 rhs = fold_convert_loc (loc, sizetype, unshare_expr (rhs));
2417 rhs = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2418 TREE_TYPE (src), src, rhs);
2419 if (!useless_type_conversion_p (TREE_TYPE (lhs),
2420 TREE_TYPE (rhs)))
2421 rhs = fold_convert_loc (loc, TREE_TYPE (lhs), rhs);
2423 gimplify_and_update_call_from_tree (&m_gsi, rhs);
2424 stmt = gsi_stmt (m_gsi);
2425 update_stmt (stmt);
2426 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2428 fprintf (dump_file, "into: ");
2429 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2431 if (si != NULL
2432 && si->endptr == NULL_TREE
2433 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2435 si = unshare_strinfo (si);
2436 si->endptr = lhs;
2438 zero_length_string (lhs, si);
2439 return;
2442 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
2443 return;
2444 if (TREE_CODE (src) != SSA_NAME || !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (src))
2446 if (idx == 0)
2447 idx = new_stridx (src);
2448 else if (get_strinfo (idx) != NULL)
2450 zero_length_string (lhs, NULL);
2451 return;
2453 if (idx)
2455 location_t loc = gimple_location (stmt);
2456 tree lhsu = fold_convert_loc (loc, size_type_node, lhs);
2457 tree srcu = fold_convert_loc (loc, size_type_node, src);
2458 tree length = fold_build2_loc (loc, MINUS_EXPR,
2459 size_type_node, lhsu, srcu);
2460 strinfo *si = new_strinfo (src, idx, length, true);
2461 si->endptr = lhs;
2462 set_strinfo (idx, si);
2463 find_equal_ptrs (src, idx);
2464 zero_length_string (lhs, si);
2467 else
2468 zero_length_string (lhs, NULL);
2471 /* Handle a strcpy-like ({st{r,p}cpy,__st{r,p}cpy_chk}) call.
2472 If strlen of the second argument is known, strlen of the first argument
2473 is the same after this call. Furthermore, attempt to convert it to
2474 memcpy. Uses RVALS to determine range information. */
2476 void
2477 strlen_pass::handle_builtin_strcpy (built_in_function bcode)
2479 int idx, didx;
2480 tree src, dst, srclen, len, lhs, type, fn, oldlen;
2481 bool success;
2482 gimple *stmt = gsi_stmt (m_gsi);
2483 strinfo *si, *dsi, *olddsi, *zsi;
2484 location_t loc;
2486 src = gimple_call_arg (stmt, 1);
2487 dst = gimple_call_arg (stmt, 0);
2488 lhs = gimple_call_lhs (stmt);
2489 idx = get_stridx (src);
2490 si = NULL;
2491 if (idx > 0)
2492 si = get_strinfo (idx);
2494 didx = get_stridx (dst);
2495 olddsi = NULL;
2496 oldlen = NULL_TREE;
2497 if (didx > 0)
2498 olddsi = get_strinfo (didx);
2499 else if (didx < 0)
2500 return;
2502 if (olddsi != NULL)
2503 adjust_last_stmt (olddsi, stmt, false);
2505 srclen = NULL_TREE;
2506 if (si != NULL)
2507 srclen = get_string_length (si);
2508 else if (idx < 0)
2509 srclen = build_int_cst (size_type_node, ~idx);
2511 maybe_warn_overflow (stmt, false, srclen, olddsi, true);
2513 if (olddsi != NULL)
2514 adjust_last_stmt (olddsi, stmt, false);
2516 loc = gimple_location (stmt);
2517 if (srclen == NULL_TREE)
2518 switch (bcode)
2520 case BUILT_IN_STRCPY:
2521 case BUILT_IN_STRCPY_CHK:
2522 if (lhs != NULL_TREE || !builtin_decl_implicit_p (BUILT_IN_STPCPY))
2523 return;
2524 break;
2525 case BUILT_IN_STPCPY:
2526 case BUILT_IN_STPCPY_CHK:
2527 if (lhs == NULL_TREE)
2528 return;
2529 else
2531 tree lhsuint = fold_convert_loc (loc, size_type_node, lhs);
2532 srclen = fold_convert_loc (loc, size_type_node, dst);
2533 srclen = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
2534 lhsuint, srclen);
2536 break;
2537 default:
2538 gcc_unreachable ();
2541 if (didx == 0)
2543 didx = new_stridx (dst);
2544 if (didx == 0)
2545 return;
2547 if (olddsi != NULL)
2549 oldlen = olddsi->nonzero_chars;
2550 dsi = unshare_strinfo (olddsi);
2551 dsi->nonzero_chars = srclen;
2552 dsi->full_string_p = (srclen != NULL_TREE);
2553 /* Break the chain, so adjust_related_strinfo on later pointers in
2554 the chain won't adjust this one anymore. */
2555 dsi->next = 0;
2556 dsi->stmt = NULL;
2557 dsi->endptr = NULL_TREE;
2559 else
2561 dsi = new_strinfo (dst, didx, srclen, srclen != NULL_TREE);
2562 set_strinfo (didx, dsi);
2563 find_equal_ptrs (dst, didx);
2565 dsi->writable = true;
2566 dsi->dont_invalidate = true;
2568 if (dsi->nonzero_chars == NULL_TREE)
2570 strinfo *chainsi;
2572 /* If string length of src is unknown, use delayed length
2573 computation. If string length of dst will be needed, it
2574 can be computed by transforming this strcpy call into
2575 stpcpy and subtracting dst from the return value. */
2577 /* Look for earlier strings whose length could be determined if
2578 this strcpy is turned into an stpcpy. */
2580 if (dsi->prev != 0 && (chainsi = verify_related_strinfos (dsi)) != NULL)
2582 for (; chainsi && chainsi != dsi; chainsi = get_strinfo (chainsi->next))
2584 /* When setting a stmt for delayed length computation
2585 prevent all strinfos through dsi from being
2586 invalidated. */
2587 chainsi = unshare_strinfo (chainsi);
2588 chainsi->stmt = stmt;
2589 chainsi->nonzero_chars = NULL_TREE;
2590 chainsi->full_string_p = false;
2591 chainsi->endptr = NULL_TREE;
2592 chainsi->dont_invalidate = true;
2595 dsi->stmt = stmt;
2597 /* Try to detect overlap before returning. This catches cases
2598 like strcpy (d, d + n) where n is non-constant whose range
2599 is such that (n <= strlen (d) holds).
2601 OLDDSI->NONZERO_chars may have been reset by this point with
2602 oldlen holding it original value. */
2603 if (olddsi && oldlen)
2605 /* Add 1 for the terminating NUL. */
2606 tree type = TREE_TYPE (oldlen);
2607 oldlen = fold_build2 (PLUS_EXPR, type, oldlen,
2608 build_int_cst (type, 1));
2609 check_bounds_or_overlap (stmt, olddsi->ptr, src, oldlen, NULL_TREE);
2612 return;
2615 if (olddsi != NULL)
2617 tree adj = NULL_TREE;
2618 if (oldlen == NULL_TREE)
2620 else if (integer_zerop (oldlen))
2621 adj = srclen;
2622 else if (TREE_CODE (oldlen) == INTEGER_CST
2623 || TREE_CODE (srclen) == INTEGER_CST)
2624 adj = fold_build2_loc (loc, MINUS_EXPR,
2625 TREE_TYPE (srclen), srclen,
2626 fold_convert_loc (loc, TREE_TYPE (srclen),
2627 oldlen));
2628 if (adj != NULL_TREE)
2629 adjust_related_strinfos (loc, dsi, adj);
2630 else
2631 dsi->prev = 0;
2633 /* strcpy src may not overlap dst, so src doesn't need to be
2634 invalidated either. */
2635 if (si != NULL)
2636 si->dont_invalidate = true;
2638 fn = NULL_TREE;
2639 zsi = NULL;
2640 switch (bcode)
2642 case BUILT_IN_STRCPY:
2643 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
2644 if (lhs)
2645 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2646 break;
2647 case BUILT_IN_STRCPY_CHK:
2648 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
2649 if (lhs)
2650 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
2651 break;
2652 case BUILT_IN_STPCPY:
2653 /* This would need adjustment of the lhs (subtract one),
2654 or detection that the trailing '\0' doesn't need to be
2655 written, if it will be immediately overwritten.
2656 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY); */
2657 if (lhs)
2659 dsi->endptr = lhs;
2660 zsi = zero_length_string (lhs, dsi);
2662 break;
2663 case BUILT_IN_STPCPY_CHK:
2664 /* This would need adjustment of the lhs (subtract one),
2665 or detection that the trailing '\0' doesn't need to be
2666 written, if it will be immediately overwritten.
2667 fn = builtin_decl_explicit (BUILT_IN_MEMPCPY_CHK); */
2668 if (lhs)
2670 dsi->endptr = lhs;
2671 zsi = zero_length_string (lhs, dsi);
2673 break;
2674 default:
2675 gcc_unreachable ();
2677 if (zsi != NULL)
2678 zsi->dont_invalidate = true;
2680 if (fn)
2682 tree args = TYPE_ARG_TYPES (TREE_TYPE (fn));
2683 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
2685 else
2686 type = size_type_node;
2688 len = fold_convert_loc (loc, type, unshare_expr (srclen));
2689 len = fold_build2_loc (loc, PLUS_EXPR, type, len, build_int_cst (type, 1));
2691 /* Disable warning for the transformed statement? */
2692 opt_code no_warning_opt = no_warning;
2694 if (const strinfo *chksi = si ? olddsi ? olddsi : dsi : NULL)
2696 no_warning_opt = check_bounds_or_overlap (stmt, chksi->ptr, si->ptr,
2697 NULL_TREE, len);
2698 if (no_warning_opt)
2699 suppress_warning (stmt, no_warning_opt);
2702 if (fn == NULL_TREE)
2703 return;
2705 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
2706 GSI_SAME_STMT);
2707 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2709 fprintf (dump_file, "Optimizing: ");
2710 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2712 if (gimple_call_num_args (stmt) == 2)
2713 success = update_gimple_call (&m_gsi, fn, 3, dst, src, len);
2714 else
2715 success = update_gimple_call (&m_gsi, fn, 4, dst, src, len,
2716 gimple_call_arg (stmt, 2));
2717 if (success)
2719 stmt = gsi_stmt (m_gsi);
2720 update_stmt (stmt);
2721 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2723 fprintf (dump_file, "into: ");
2724 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2726 /* Allow adjust_last_stmt to decrease this memcpy's size. */
2727 laststmt.stmt = stmt;
2728 laststmt.len = srclen;
2729 laststmt.stridx = dsi->idx;
2731 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
2732 fprintf (dump_file, "not possible.\n");
2734 if (no_warning_opt)
2735 suppress_warning (stmt, no_warning_opt);
2738 /* Check the size argument to the built-in forms of stpncpy and strncpy
2739 for out-of-bounds offsets or overlapping access, and to see if the
2740 size argument is derived from a call to strlen() on the source argument,
2741 and if so, issue an appropriate warning. */
2743 void
2744 strlen_pass::handle_builtin_strncat (built_in_function)
2746 /* Same as stxncpy(). */
2747 handle_builtin_stxncpy_strncat (true);
2750 /* Return true if LEN depends on a call to strlen(SRC) in an interesting
2751 way. LEN can either be an integer expression, or a pointer (to char).
2752 When it is the latter (such as in recursive calls to self) it is
2753 assumed to be the argument in some call to strlen() whose relationship
2754 to SRC is being ascertained. */
2756 bool
2757 is_strlen_related_p (tree src, tree len)
2759 if (TREE_CODE (TREE_TYPE (len)) == POINTER_TYPE
2760 && operand_equal_p (src, len, 0))
2761 return true;
2763 if (TREE_CODE (len) != SSA_NAME)
2764 return false;
2766 if (TREE_CODE (src) == SSA_NAME)
2768 gimple *srcdef = SSA_NAME_DEF_STMT (src);
2769 if (is_gimple_assign (srcdef))
2771 /* Handle bitwise AND used in conversions from wider size_t
2772 to narrower unsigned types. */
2773 tree_code code = gimple_assign_rhs_code (srcdef);
2774 if (code == BIT_AND_EXPR
2775 || code == NOP_EXPR)
2776 return is_strlen_related_p (gimple_assign_rhs1 (srcdef), len);
2778 return false;
2781 if (gimple_call_builtin_p (srcdef, BUILT_IN_NORMAL))
2783 /* If SRC is the result of a call to an allocation function
2784 or strlen, use the function's argument instead. */
2785 tree func = gimple_call_fndecl (srcdef);
2786 built_in_function code = DECL_FUNCTION_CODE (func);
2787 if (code == BUILT_IN_ALLOCA
2788 || code == BUILT_IN_ALLOCA_WITH_ALIGN
2789 || code == BUILT_IN_MALLOC
2790 || code == BUILT_IN_STRLEN)
2791 return is_strlen_related_p (gimple_call_arg (srcdef, 0), len);
2793 /* FIXME: Handle other functions with attribute alloc_size. */
2794 return false;
2798 gimple *lendef = SSA_NAME_DEF_STMT (len);
2799 if (!lendef)
2800 return false;
2802 if (is_gimple_call (lendef))
2804 tree func = gimple_call_fndecl (lendef);
2805 if (!valid_builtin_call (lendef)
2806 || DECL_FUNCTION_CODE (func) != BUILT_IN_STRLEN)
2807 return false;
2809 tree arg = gimple_call_arg (lendef, 0);
2810 return is_strlen_related_p (src, arg);
2813 if (!is_gimple_assign (lendef))
2814 return false;
2816 tree_code code = gimple_assign_rhs_code (lendef);
2817 tree rhs1 = gimple_assign_rhs1 (lendef);
2818 tree rhstype = TREE_TYPE (rhs1);
2820 if ((TREE_CODE (rhstype) == POINTER_TYPE && code == POINTER_PLUS_EXPR)
2821 || (INTEGRAL_TYPE_P (rhstype)
2822 && (code == BIT_AND_EXPR
2823 || code == NOP_EXPR)))
2825 /* Pointer plus (an integer), and truncation are considered among
2826 the (potentially) related expressions to strlen. */
2827 return is_strlen_related_p (src, rhs1);
2830 if (tree rhs2 = gimple_assign_rhs2 (lendef))
2832 /* Integer subtraction is considered strlen-related when both
2833 arguments are integers and second one is strlen-related. */
2834 rhstype = TREE_TYPE (rhs2);
2835 if (INTEGRAL_TYPE_P (rhstype) && code == MINUS_EXPR)
2836 return is_strlen_related_p (src, rhs2);
2839 return false;
2842 /* Called by handle_builtin_stxncpy_strncat and by
2843 gimple_fold_builtin_strncpy in gimple-fold.c.
2844 Check to see if the specified bound is a) equal to the size of
2845 the destination DST and if so, b) if it's immediately followed by
2846 DST[CNT - 1] = '\0'. If a) holds and b) does not, warn. Otherwise,
2847 do nothing. Return true if diagnostic has been issued.
2849 The purpose is to diagnose calls to strncpy and stpncpy that do
2850 not nul-terminate the copy while allowing for the idiom where
2851 such a call is immediately followed by setting the last element
2852 to nul, as in:
2853 char a[32];
2854 strncpy (a, s, sizeof a);
2855 a[sizeof a - 1] = '\0';
2858 bool
2859 maybe_diag_stxncpy_trunc (gimple_stmt_iterator gsi, tree src, tree cnt,
2860 pointer_query *ptr_qry /* = NULL */)
2862 gimple *stmt = gsi_stmt (gsi);
2863 if (warning_suppressed_p (stmt, OPT_Wstringop_truncation))
2864 return false;
2866 wide_int cntrange[2];
2867 value_range r;
2868 if (!get_range_query (cfun)->range_of_expr (r, cnt)
2869 || r.varying_p ()
2870 || r.undefined_p ())
2871 return false;
2873 cntrange[0] = wi::to_wide (r.min ());
2874 cntrange[1] = wi::to_wide (r.max ());
2875 if (r.kind () == VR_ANTI_RANGE)
2877 wide_int maxobjsize = wi::to_wide (TYPE_MAX_VALUE (ptrdiff_type_node));
2879 if (wi::ltu_p (cntrange[1], maxobjsize))
2881 cntrange[0] = cntrange[1] + 1;
2882 cntrange[1] = maxobjsize;
2884 else
2886 cntrange[1] = cntrange[0] - 1;
2887 cntrange[0] = wi::zero (TYPE_PRECISION (TREE_TYPE (cnt)));
2891 /* Negative value is the constant string length. If it's less than
2892 the lower bound there is no truncation. Avoid calling get_stridx()
2893 when ssa_ver_to_stridx is empty. That implies the caller isn't
2894 running under the control of this pass and ssa_ver_to_stridx hasn't
2895 been created yet. */
2896 int sidx = ssa_ver_to_stridx.length () ? get_stridx (src) : 0;
2897 if (sidx < 0 && wi::gtu_p (cntrange[0], ~sidx))
2898 return false;
2900 tree dst = gimple_call_arg (stmt, 0);
2901 tree dstdecl = dst;
2902 if (TREE_CODE (dstdecl) == ADDR_EXPR)
2903 dstdecl = TREE_OPERAND (dstdecl, 0);
2905 tree ref = NULL_TREE;
2907 if (!sidx)
2909 /* If the source is a non-string return early to avoid warning
2910 for possible truncation (if the truncation is certain SIDX
2911 is non-zero). */
2912 tree srcdecl = gimple_call_arg (stmt, 1);
2913 if (TREE_CODE (srcdecl) == ADDR_EXPR)
2914 srcdecl = TREE_OPERAND (srcdecl, 0);
2915 if (get_attr_nonstring_decl (srcdecl, &ref))
2916 return false;
2919 /* Likewise, if the destination refers to an array/pointer declared
2920 nonstring return early. */
2921 if (get_attr_nonstring_decl (dstdecl, &ref))
2922 return false;
2924 /* Look for dst[i] = '\0'; after the stxncpy() call and if found
2925 avoid the truncation warning. */
2926 gsi_next_nondebug (&gsi);
2927 gimple *next_stmt = gsi_stmt (gsi);
2928 if (!next_stmt)
2930 /* When there is no statement in the same basic block check
2931 the immediate successor block. */
2932 if (basic_block bb = gimple_bb (stmt))
2934 if (single_succ_p (bb))
2936 /* For simplicity, ignore blocks with multiple outgoing
2937 edges for now and only consider successor blocks along
2938 normal edges. */
2939 edge e = EDGE_SUCC (bb, 0);
2940 if (!(e->flags & EDGE_ABNORMAL))
2942 gsi = gsi_start_bb (e->dest);
2943 next_stmt = gsi_stmt (gsi);
2944 if (next_stmt && is_gimple_debug (next_stmt))
2946 gsi_next_nondebug (&gsi);
2947 next_stmt = gsi_stmt (gsi);
2954 if (next_stmt && is_gimple_assign (next_stmt))
2956 tree lhs = gimple_assign_lhs (next_stmt);
2957 tree_code code = TREE_CODE (lhs);
2958 if (code == ARRAY_REF || code == MEM_REF)
2959 lhs = TREE_OPERAND (lhs, 0);
2961 tree func = gimple_call_fndecl (stmt);
2962 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STPNCPY)
2964 tree ret = gimple_call_lhs (stmt);
2965 if (ret && operand_equal_p (ret, lhs, 0))
2966 return false;
2969 /* Determine the base address and offset of the reference,
2970 ignoring the innermost array index. */
2971 if (TREE_CODE (ref) == ARRAY_REF)
2972 ref = TREE_OPERAND (ref, 0);
2974 poly_int64 dstoff;
2975 tree dstbase = get_addr_base_and_unit_offset (ref, &dstoff);
2977 poly_int64 lhsoff;
2978 tree lhsbase = get_addr_base_and_unit_offset (lhs, &lhsoff);
2979 if (lhsbase
2980 && dstbase
2981 && known_eq (dstoff, lhsoff)
2982 && operand_equal_p (dstbase, lhsbase, 0))
2983 return false;
2986 int prec = TYPE_PRECISION (TREE_TYPE (cnt));
2987 wide_int lenrange[2];
2988 if (strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL)
2990 lenrange[0] = (sisrc->nonzero_chars
2991 && TREE_CODE (sisrc->nonzero_chars) == INTEGER_CST
2992 ? wi::to_wide (sisrc->nonzero_chars)
2993 : wi::zero (prec));
2994 lenrange[1] = lenrange[0];
2996 else if (sidx < 0)
2997 lenrange[0] = lenrange[1] = wi::shwi (~sidx, prec);
2998 else
3000 c_strlen_data lendata = { };
3001 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
3002 to have it set to the length of the longest string in a PHI. */
3003 lendata.maxbound = src;
3004 get_range_strlen (src, &lendata, /* eltsize = */1);
3005 if (TREE_CODE (lendata.minlen) == INTEGER_CST
3006 && TREE_CODE (lendata.maxbound) == INTEGER_CST)
3008 /* When LENDATA.MAXLEN is unknown, reset LENDATA.MINLEN
3009 which stores the length of the shortest known string. */
3010 if (integer_all_onesp (lendata.maxlen))
3011 lenrange[0] = wi::shwi (0, prec);
3012 else
3013 lenrange[0] = wi::to_wide (lendata.minlen, prec);
3014 lenrange[1] = wi::to_wide (lendata.maxbound, prec);
3016 else
3018 lenrange[0] = wi::shwi (0, prec);
3019 lenrange[1] = wi::shwi (-1, prec);
3023 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3024 tree func = gimple_call_fndecl (stmt);
3026 if (lenrange[0] != 0 || !wi::neg_p (lenrange[1]))
3028 /* If the longest source string is shorter than the lower bound
3029 of the specified count the copy is definitely nul-terminated. */
3030 if (wi::ltu_p (lenrange[1], cntrange[0]))
3031 return false;
3033 if (wi::neg_p (lenrange[1]))
3035 /* The length of one of the strings is unknown but at least
3036 one has non-zero length and that length is stored in
3037 LENRANGE[1]. Swap the bounds to force a "may be truncated"
3038 warning below. */
3039 lenrange[1] = lenrange[0];
3040 lenrange[0] = wi::shwi (0, prec);
3043 /* Set to true for strncat whose bound is derived from the length
3044 of the destination (the expected usage pattern). */
3045 bool cat_dstlen_bounded = false;
3046 if (DECL_FUNCTION_CODE (func) == BUILT_IN_STRNCAT)
3047 cat_dstlen_bounded = is_strlen_related_p (dst, cnt);
3049 if (lenrange[0] == cntrange[1] && cntrange[0] == cntrange[1])
3050 return warning_n (callloc, OPT_Wstringop_truncation,
3051 cntrange[0].to_uhwi (),
3052 "%qD output truncated before terminating "
3053 "nul copying %E byte from a string of the "
3054 "same length",
3055 "%qD output truncated before terminating nul "
3056 "copying %E bytes from a string of the same "
3057 "length",
3058 func, cnt);
3059 else if (!cat_dstlen_bounded)
3061 if (wi::geu_p (lenrange[0], cntrange[1]))
3063 /* The shortest string is longer than the upper bound of
3064 the count so the truncation is certain. */
3065 if (cntrange[0] == cntrange[1])
3066 return warning_n (callloc, OPT_Wstringop_truncation,
3067 cntrange[0].to_uhwi (),
3068 "%qD output truncated copying %E byte "
3069 "from a string of length %wu",
3070 "%qD output truncated copying %E bytes "
3071 "from a string of length %wu",
3072 func, cnt, lenrange[0].to_uhwi ());
3074 return warning_at (callloc, OPT_Wstringop_truncation,
3075 "%qD output truncated copying between %wu "
3076 "and %wu bytes from a string of length %wu",
3077 func, cntrange[0].to_uhwi (),
3078 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3080 else if (wi::geu_p (lenrange[1], cntrange[1]))
3082 /* The longest string is longer than the upper bound of
3083 the count so the truncation is possible. */
3084 if (cntrange[0] == cntrange[1])
3085 return warning_n (callloc, OPT_Wstringop_truncation,
3086 cntrange[0].to_uhwi (),
3087 "%qD output may be truncated copying %E "
3088 "byte from a string of length %wu",
3089 "%qD output may be truncated copying %E "
3090 "bytes from a string of length %wu",
3091 func, cnt, lenrange[1].to_uhwi ());
3093 return warning_at (callloc, OPT_Wstringop_truncation,
3094 "%qD output may be truncated copying between "
3095 "%wu and %wu bytes from a string of length %wu",
3096 func, cntrange[0].to_uhwi (),
3097 cntrange[1].to_uhwi (), lenrange[1].to_uhwi ());
3101 if (!cat_dstlen_bounded
3102 && cntrange[0] != cntrange[1]
3103 && wi::leu_p (cntrange[0], lenrange[0])
3104 && wi::leu_p (cntrange[1], lenrange[0] + 1))
3106 /* If the source (including the terminating nul) is longer than
3107 the lower bound of the specified count but shorter than the
3108 upper bound the copy may (but need not) be truncated. */
3109 return warning_at (callloc, OPT_Wstringop_truncation,
3110 "%qD output may be truncated copying between "
3111 "%wu and %wu bytes from a string of length %wu",
3112 func, cntrange[0].to_uhwi (),
3113 cntrange[1].to_uhwi (), lenrange[0].to_uhwi ());
3117 access_ref aref;
3118 if (tree dstsize = compute_objsize (dst, stmt, 1, &aref, ptr_qry))
3120 /* The source length is unknown. Try to determine the destination
3121 size and see if it matches the specified bound. If not, bail.
3122 Otherwise go on to see if it should be diagnosed for possible
3123 truncation. */
3124 if (!dstsize)
3125 return false;
3127 if (wi::to_wide (dstsize) != cntrange[1])
3128 return false;
3130 /* Avoid warning for strncpy(a, b, N) calls where the following
3131 equalities hold:
3132 N == sizeof a && N == sizeof b */
3133 if (tree srcsize = compute_objsize (src, stmt, 1, &aref, ptr_qry))
3134 if (wi::to_wide (srcsize) == cntrange[1])
3135 return false;
3137 if (cntrange[0] == cntrange[1])
3138 return warning_at (callloc, OPT_Wstringop_truncation,
3139 "%qD specified bound %E equals destination size",
3140 func, cnt);
3143 return false;
3146 /* Check the arguments to the built-in forms of stpncpy, strncpy, and
3147 strncat, for out-of-bounds offsets or overlapping access, and to see
3148 if the size is derived from calling strlen() on the source argument,
3149 and if so, issue the appropriate warning.
3150 APPEND_P is true for strncat. */
3152 void
3153 strlen_pass::handle_builtin_stxncpy_strncat (bool append_p)
3155 if (!strlen_to_stridx)
3156 return;
3158 gimple *stmt = gsi_stmt (m_gsi);
3160 tree dst = gimple_call_arg (stmt, 0);
3161 tree src = gimple_call_arg (stmt, 1);
3162 tree len = gimple_call_arg (stmt, 2);
3163 /* An upper bound of the size of the destination. */
3164 tree dstsize = NULL_TREE;
3165 /* The length of the destination and source strings (plus 1 for those
3166 whose FULL_STRING_P is set, i.e., whose length is exact rather than
3167 a lower bound). */
3168 tree dstlenp1 = NULL_TREE, srclenp1 = NULL_TREE;;
3170 int didx = get_stridx (dst);
3171 if (strinfo *sidst = didx > 0 ? get_strinfo (didx) : NULL)
3173 /* Compute the size of the destination string including the nul
3174 if it is known to be nul-terminated. */
3175 if (sidst->nonzero_chars)
3177 if (sidst->full_string_p)
3179 /* String is known to be nul-terminated. */
3180 tree type = TREE_TYPE (sidst->nonzero_chars);
3181 dstlenp1 = fold_build2 (PLUS_EXPR, type, sidst->nonzero_chars,
3182 build_int_cst (type, 1));
3184 else
3185 dstlenp1 = sidst->nonzero_chars;
3187 else if (TREE_CODE (sidst->ptr) == SSA_NAME)
3189 gimple *def_stmt = SSA_NAME_DEF_STMT (sidst->ptr);
3190 dstsize = gimple_call_alloc_size (def_stmt);
3193 dst = sidst->ptr;
3196 int sidx = get_stridx (src);
3197 strinfo *sisrc = sidx > 0 ? get_strinfo (sidx) : NULL;
3198 if (sisrc)
3200 /* strncat() and strncpy() can modify the source string by writing
3201 over the terminating nul so SISRC->DONT_INVALIDATE must be left
3202 clear. */
3204 /* Compute the size of the source string including the terminating
3205 nul if its known to be nul-terminated. */
3206 if (sisrc->nonzero_chars)
3208 if (sisrc->full_string_p)
3210 tree type = TREE_TYPE (sisrc->nonzero_chars);
3211 srclenp1 = fold_build2 (PLUS_EXPR, type, sisrc->nonzero_chars,
3212 build_int_cst (type, 1));
3214 else
3215 srclenp1 = sisrc->nonzero_chars;
3218 src = sisrc->ptr;
3220 else
3221 srclenp1 = NULL_TREE;
3223 opt_code opt = check_bounds_or_overlap (stmt, dst, src, dstlenp1, srclenp1);
3224 if (opt != no_warning)
3226 suppress_warning (stmt, opt);
3227 return;
3230 /* If the length argument was computed from strlen(S) for some string
3231 S retrieve the strinfo index for the string (PSS->FIRST) along with
3232 the location of the strlen() call (PSS->SECOND). */
3233 stridx_strlenloc *pss = strlen_to_stridx->get (len);
3234 if (!pss || pss->first <= 0)
3236 if (maybe_diag_stxncpy_trunc (m_gsi, src, len))
3237 suppress_warning (stmt, OPT_Wstringop_truncation);
3239 return;
3242 /* Retrieve the strinfo data for the string S that LEN was computed
3243 from as some function F of strlen (S) (i.e., LEN need not be equal
3244 to strlen(S)). */
3245 strinfo *silen = get_strinfo (pss->first);
3247 location_t callloc = gimple_or_expr_nonartificial_location (stmt, dst);
3249 tree func = gimple_call_fndecl (stmt);
3251 bool warned = false;
3253 /* When -Wstringop-truncation is set, try to determine truncation
3254 before diagnosing possible overflow. Truncation is implied by
3255 the LEN argument being equal to strlen(SRC), regardless of
3256 whether its value is known. Otherwise, when appending, or
3257 when copying into a destination of known size, issue the more
3258 generic -Wstringop-overflow which triggers for LEN arguments
3259 that in any meaningful way depend on strlen(SRC). */
3260 if (!append_p
3261 && sisrc == silen
3262 && is_strlen_related_p (src, len)
3263 && warning_at (callloc, OPT_Wstringop_truncation,
3264 "%qD output truncated before terminating nul "
3265 "copying as many bytes from a string as its length",
3266 func))
3267 warned = true;
3268 else if ((append_p || !dstsize || len == dstlenp1)
3269 && silen && is_strlen_related_p (src, silen->ptr))
3271 /* Issue -Wstringop-overflow when appending or when writing into
3272 a destination of a known size. Otherwise, when copying into
3273 a destination of an unknown size, it's truncation. */
3274 opt_code opt = (append_p || dstsize
3275 ? OPT_Wstringop_overflow_ : OPT_Wstringop_truncation);
3276 warned = warning_at (callloc, opt,
3277 "%qD specified bound depends on the length "
3278 "of the source argument",
3279 func);
3281 if (warned)
3283 location_t strlenloc = pss->second;
3284 if (strlenloc != UNKNOWN_LOCATION && strlenloc != callloc)
3285 inform (strlenloc, "length computed here");
3289 /* Handle a memcpy-like ({mem{,p}cpy,__mem{,p}cpy_chk}) call.
3290 If strlen of the second argument is known and length of the third argument
3291 is that plus one, strlen of the first argument is the same after this
3292 call. Uses RVALS to determine range information. */
3294 void
3295 strlen_pass::handle_builtin_memcpy (built_in_function bcode)
3297 tree lhs, oldlen, newlen;
3298 gimple *stmt = gsi_stmt (m_gsi);
3299 strinfo *si, *dsi;
3301 tree len = gimple_call_arg (stmt, 2);
3302 tree src = gimple_call_arg (stmt, 1);
3303 tree dst = gimple_call_arg (stmt, 0);
3305 int didx = get_stridx (dst);
3306 strinfo *olddsi = NULL;
3307 if (didx > 0)
3308 olddsi = get_strinfo (didx);
3309 else if (didx < 0)
3310 return;
3312 if (olddsi != NULL
3313 && !integer_zerop (len))
3315 maybe_warn_overflow (stmt, false, len, olddsi, false, true);
3316 adjust_last_stmt (olddsi, stmt, false);
3319 int idx = get_stridx (src);
3320 if (idx == 0)
3321 return;
3323 bool full_string_p;
3324 if (idx > 0)
3326 gimple *def_stmt;
3328 /* Handle memcpy (x, y, l) where l's relationship with strlen (y)
3329 is known. */
3330 si = get_strinfo (idx);
3331 if (si == NULL || si->nonzero_chars == NULL_TREE)
3332 return;
3333 if (TREE_CODE (len) == INTEGER_CST
3334 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
3336 if (tree_int_cst_le (len, si->nonzero_chars))
3338 /* Copying LEN nonzero characters, where LEN is constant. */
3339 newlen = len;
3340 full_string_p = false;
3342 else
3344 /* Copying the whole of the analyzed part of SI. */
3345 newlen = si->nonzero_chars;
3346 full_string_p = si->full_string_p;
3349 else
3351 if (!si->full_string_p)
3352 return;
3353 if (TREE_CODE (len) != SSA_NAME)
3354 return;
3355 def_stmt = SSA_NAME_DEF_STMT (len);
3356 if (!is_gimple_assign (def_stmt)
3357 || gimple_assign_rhs_code (def_stmt) != PLUS_EXPR
3358 || gimple_assign_rhs1 (def_stmt) != si->nonzero_chars
3359 || !integer_onep (gimple_assign_rhs2 (def_stmt)))
3360 return;
3361 /* Copying variable-length string SI (and no more). */
3362 newlen = si->nonzero_chars;
3363 full_string_p = true;
3366 else
3368 si = NULL;
3369 /* Handle memcpy (x, "abcd", 5) or
3370 memcpy (x, "abc\0uvw", 7). */
3371 if (!tree_fits_uhwi_p (len))
3372 return;
3374 unsigned HOST_WIDE_INT clen = tree_to_uhwi (len);
3375 unsigned HOST_WIDE_INT nonzero_chars = ~idx;
3376 newlen = build_int_cst (size_type_node, MIN (nonzero_chars, clen));
3377 full_string_p = clen > nonzero_chars;
3380 if (!full_string_p
3381 && olddsi
3382 && olddsi->nonzero_chars
3383 && TREE_CODE (olddsi->nonzero_chars) == INTEGER_CST
3384 && tree_int_cst_le (newlen, olddsi->nonzero_chars))
3386 /* The SRC substring being written strictly overlaps
3387 a subsequence of the existing string OLDDSI. */
3388 newlen = olddsi->nonzero_chars;
3389 full_string_p = olddsi->full_string_p;
3392 if (olddsi != NULL && TREE_CODE (len) == SSA_NAME)
3393 adjust_last_stmt (olddsi, stmt, false);
3395 if (didx == 0)
3397 didx = new_stridx (dst);
3398 if (didx == 0)
3399 return;
3401 oldlen = NULL_TREE;
3402 if (olddsi != NULL)
3404 dsi = unshare_strinfo (olddsi);
3405 oldlen = olddsi->nonzero_chars;
3406 dsi->nonzero_chars = newlen;
3407 dsi->full_string_p = full_string_p;
3408 /* Break the chain, so adjust_related_strinfo on later pointers in
3409 the chain won't adjust this one anymore. */
3410 dsi->next = 0;
3411 dsi->stmt = NULL;
3412 dsi->endptr = NULL_TREE;
3414 else
3416 dsi = new_strinfo (dst, didx, newlen, full_string_p);
3417 set_strinfo (didx, dsi);
3418 find_equal_ptrs (dst, didx);
3420 dsi->writable = true;
3421 dsi->dont_invalidate = true;
3422 if (olddsi != NULL)
3424 tree adj = NULL_TREE;
3425 location_t loc = gimple_location (stmt);
3426 if (oldlen == NULL_TREE)
3428 else if (integer_zerop (oldlen))
3429 adj = newlen;
3430 else if (TREE_CODE (oldlen) == INTEGER_CST
3431 || TREE_CODE (newlen) == INTEGER_CST)
3432 adj = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (newlen), newlen,
3433 fold_convert_loc (loc, TREE_TYPE (newlen),
3434 oldlen));
3435 if (adj != NULL_TREE)
3436 adjust_related_strinfos (loc, dsi, adj);
3437 else
3438 dsi->prev = 0;
3440 /* memcpy src may not overlap dst, so src doesn't need to be
3441 invalidated either. */
3442 if (si != NULL)
3443 si->dont_invalidate = true;
3445 if (full_string_p)
3447 lhs = gimple_call_lhs (stmt);
3448 switch (bcode)
3450 case BUILT_IN_MEMCPY:
3451 case BUILT_IN_MEMCPY_CHK:
3452 /* Allow adjust_last_stmt to decrease this memcpy's size. */
3453 laststmt.stmt = stmt;
3454 laststmt.len = dsi->nonzero_chars;
3455 laststmt.stridx = dsi->idx;
3456 if (lhs)
3457 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = didx;
3458 break;
3459 case BUILT_IN_MEMPCPY:
3460 case BUILT_IN_MEMPCPY_CHK:
3461 break;
3462 default:
3463 gcc_unreachable ();
3468 /* Handle a strcat-like ({strcat,__strcat_chk}) call.
3469 If strlen of the second argument is known, strlen of the first argument
3470 is increased by the length of the second argument. Furthermore, attempt
3471 to convert it to memcpy/strcpy if the length of the first argument
3472 is known. */
3474 void
3475 strlen_pass::handle_builtin_strcat (built_in_function bcode)
3477 int idx, didx;
3478 tree srclen, args, type, fn, objsz, endptr;
3479 bool success;
3480 gimple *stmt = gsi_stmt (m_gsi);
3481 strinfo *si, *dsi;
3482 location_t loc = gimple_location (stmt);
3484 tree src = gimple_call_arg (stmt, 1);
3485 tree dst = gimple_call_arg (stmt, 0);
3487 /* Bail if the source is the same as destination. It will be diagnosed
3488 elsewhere. */
3489 if (operand_equal_p (src, dst, 0))
3490 return;
3492 tree lhs = gimple_call_lhs (stmt);
3494 didx = get_stridx (dst);
3495 if (didx < 0)
3496 return;
3498 dsi = NULL;
3499 if (didx > 0)
3500 dsi = get_strinfo (didx);
3502 srclen = NULL_TREE;
3503 si = NULL;
3504 idx = get_stridx (src);
3505 if (idx < 0)
3506 srclen = build_int_cst (size_type_node, ~idx);
3507 else if (idx > 0)
3509 si = get_strinfo (idx);
3510 if (si != NULL)
3511 srclen = get_string_length (si);
3514 /* Disable warning for the transformed statement? */
3515 opt_code no_warning_opt = no_warning;
3517 if (dsi == NULL || get_string_length (dsi) == NULL_TREE)
3520 /* The concatenation always involves copying at least one byte
3521 (the terminating nul), even if the source string is empty.
3522 If the source is unknown assume it's one character long and
3523 used that as both sizes. */
3524 tree slen = srclen;
3525 if (slen)
3527 tree type = TREE_TYPE (slen);
3528 slen = fold_build2 (PLUS_EXPR, type, slen, build_int_cst (type, 1));
3531 tree sptr = si && si->ptr ? si->ptr : src;
3532 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, NULL_TREE,
3533 slen);
3534 if (no_warning_opt)
3535 suppress_warning (stmt, no_warning_opt);
3538 /* strcat (p, q) can be transformed into
3539 tmp = p + strlen (p); endptr = stpcpy (tmp, q);
3540 with length endptr - p if we need to compute the length
3541 later on. Don't do this transformation if we don't need
3542 it. */
3543 if (builtin_decl_implicit_p (BUILT_IN_STPCPY) && lhs == NULL_TREE)
3545 if (didx == 0)
3547 didx = new_stridx (dst);
3548 if (didx == 0)
3549 return;
3551 if (dsi == NULL)
3553 dsi = new_strinfo (dst, didx, NULL_TREE, false);
3554 set_strinfo (didx, dsi);
3555 find_equal_ptrs (dst, didx);
3557 else
3559 dsi = unshare_strinfo (dsi);
3560 dsi->nonzero_chars = NULL_TREE;
3561 dsi->full_string_p = false;
3562 dsi->next = 0;
3563 dsi->endptr = NULL_TREE;
3565 dsi->writable = true;
3566 dsi->stmt = stmt;
3567 dsi->dont_invalidate = true;
3569 return;
3572 tree dstlen = dsi->nonzero_chars;
3573 endptr = dsi->endptr;
3575 dsi = unshare_strinfo (dsi);
3576 dsi->endptr = NULL_TREE;
3577 dsi->stmt = NULL;
3578 dsi->writable = true;
3580 if (srclen != NULL_TREE)
3582 dsi->nonzero_chars = fold_build2_loc (loc, PLUS_EXPR,
3583 TREE_TYPE (dsi->nonzero_chars),
3584 dsi->nonzero_chars, srclen);
3585 gcc_assert (dsi->full_string_p);
3586 adjust_related_strinfos (loc, dsi, srclen);
3587 dsi->dont_invalidate = true;
3589 else
3591 dsi->nonzero_chars = NULL;
3592 dsi->full_string_p = false;
3593 if (lhs == NULL_TREE && builtin_decl_implicit_p (BUILT_IN_STPCPY))
3594 dsi->dont_invalidate = true;
3597 if (si != NULL)
3598 /* strcat src may not overlap dst, so src doesn't need to be
3599 invalidated either. */
3600 si->dont_invalidate = true;
3602 /* For now. Could remove the lhs from the call and add
3603 lhs = dst; afterwards. */
3604 if (lhs)
3605 return;
3607 fn = NULL_TREE;
3608 objsz = NULL_TREE;
3609 switch (bcode)
3611 case BUILT_IN_STRCAT:
3612 if (srclen != NULL_TREE)
3613 fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
3614 else
3615 fn = builtin_decl_implicit (BUILT_IN_STRCPY);
3616 break;
3617 case BUILT_IN_STRCAT_CHK:
3618 if (srclen != NULL_TREE)
3619 fn = builtin_decl_explicit (BUILT_IN_MEMCPY_CHK);
3620 else
3621 fn = builtin_decl_explicit (BUILT_IN_STRCPY_CHK);
3622 objsz = gimple_call_arg (stmt, 2);
3623 break;
3624 default:
3625 gcc_unreachable ();
3628 if (fn == NULL_TREE)
3629 return;
3631 if (dsi && dstlen)
3633 tree type = TREE_TYPE (dstlen);
3635 /* Compute the size of the source sequence, including the nul. */
3636 tree srcsize = srclen ? srclen : size_zero_node;
3637 tree one = build_int_cst (type, 1);
3638 srcsize = fold_build2 (PLUS_EXPR, type, srcsize, one);
3639 tree dstsize = fold_build2 (PLUS_EXPR, type, dstlen, one);
3640 tree sptr = si && si->ptr ? si->ptr : src;
3642 no_warning_opt = check_bounds_or_overlap (stmt, dst, sptr, dstsize,
3643 srcsize);
3644 if (no_warning_opt)
3645 suppress_warning (stmt, no_warning_opt);
3648 tree len = NULL_TREE;
3649 if (srclen != NULL_TREE)
3651 args = TYPE_ARG_TYPES (TREE_TYPE (fn));
3652 type = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (args)));
3654 len = fold_convert_loc (loc, type, unshare_expr (srclen));
3655 len = fold_build2_loc (loc, PLUS_EXPR, type, len,
3656 build_int_cst (type, 1));
3657 len = force_gimple_operand_gsi (&m_gsi, len, true, NULL_TREE, true,
3658 GSI_SAME_STMT);
3660 if (endptr)
3661 dst = fold_convert_loc (loc, TREE_TYPE (dst), unshare_expr (endptr));
3662 else
3663 dst = fold_build2_loc (loc, POINTER_PLUS_EXPR, TREE_TYPE (dst), dst,
3664 fold_convert_loc (loc, sizetype,
3665 unshare_expr (dstlen)));
3666 dst = force_gimple_operand_gsi (&m_gsi, dst, true, NULL_TREE, true,
3667 GSI_SAME_STMT);
3668 if (objsz)
3670 objsz = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (objsz), objsz,
3671 fold_convert_loc (loc, TREE_TYPE (objsz),
3672 unshare_expr (dstlen)));
3673 objsz = force_gimple_operand_gsi (&m_gsi, objsz, true, NULL_TREE, true,
3674 GSI_SAME_STMT);
3676 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3678 fprintf (dump_file, "Optimizing: ");
3679 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3681 if (srclen != NULL_TREE)
3682 success = update_gimple_call (&m_gsi, fn, 3 + (objsz != NULL_TREE),
3683 dst, src, len, objsz);
3684 else
3685 success = update_gimple_call (&m_gsi, fn, 2 + (objsz != NULL_TREE),
3686 dst, src, objsz);
3687 if (success)
3689 stmt = gsi_stmt (m_gsi);
3690 update_stmt (stmt);
3691 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3693 fprintf (dump_file, "into: ");
3694 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
3696 /* If srclen == NULL, note that current string length can be
3697 computed by transforming this strcpy into stpcpy. */
3698 if (srclen == NULL_TREE && dsi->dont_invalidate)
3699 dsi->stmt = stmt;
3700 adjust_last_stmt (dsi, stmt, true);
3701 if (srclen != NULL_TREE)
3703 laststmt.stmt = stmt;
3704 laststmt.len = srclen;
3705 laststmt.stridx = dsi->idx;
3708 else if (dump_file && (dump_flags & TDF_DETAILS) != 0)
3709 fprintf (dump_file, "not possible.\n");
3711 if (no_warning_opt)
3712 suppress_warning (stmt, no_warning_opt);
3715 /* Handle a call to an allocation function like alloca, malloc or calloc,
3716 or an ordinary allocation function declared with attribute alloc_size. */
3718 void
3719 strlen_pass::handle_alloc_call (built_in_function bcode)
3721 gimple *stmt = gsi_stmt (m_gsi);
3722 tree lhs = gimple_call_lhs (stmt);
3723 if (lhs == NULL_TREE)
3724 return;
3726 gcc_assert (get_stridx (lhs) == 0);
3727 int idx = new_stridx (lhs);
3728 tree length = NULL_TREE;
3729 if (bcode == BUILT_IN_CALLOC)
3730 length = build_int_cst (size_type_node, 0);
3731 strinfo *si = new_strinfo (lhs, idx, length, length != NULL_TREE);
3732 if (bcode == BUILT_IN_CALLOC)
3734 /* Only set STMT for calloc and malloc. */
3735 si->stmt = stmt;
3736 /* Only set ENDPTR for calloc. */
3737 si->endptr = lhs;
3739 else if (bcode == BUILT_IN_MALLOC)
3740 si->stmt = stmt;
3742 /* Set ALLOC is set for all allocation functions. */
3743 si->alloc = stmt;
3744 set_strinfo (idx, si);
3745 si->writable = true;
3746 si->dont_invalidate = true;
3749 /* Handle a call to memset.
3750 After a call to calloc, memset(,0,) is unnecessary.
3751 memset(malloc(n),0,n) is calloc(n,1).
3752 return true when the call is transformed, false otherwise.
3753 When nonnull uses RVALS to determine range information. */
3755 bool
3756 strlen_pass::handle_builtin_memset (bool *zero_write)
3758 gimple *memset_stmt = gsi_stmt (m_gsi);
3759 tree ptr = gimple_call_arg (memset_stmt, 0);
3760 /* Set to the non-constant offset added to PTR. */
3761 wide_int offrng[2];
3762 int idx1 = get_stridx (ptr, offrng, ptr_qry.rvals);
3763 if (idx1 <= 0)
3764 return false;
3765 strinfo *si1 = get_strinfo (idx1);
3766 if (!si1)
3767 return false;
3768 gimple *alloc_stmt = si1->alloc;
3769 if (!alloc_stmt || !is_gimple_call (alloc_stmt))
3770 return false;
3771 tree callee1 = gimple_call_fndecl (alloc_stmt);
3772 if (!valid_builtin_call (alloc_stmt))
3773 return false;
3774 tree alloc_size = gimple_call_arg (alloc_stmt, 0);
3775 tree memset_size = gimple_call_arg (memset_stmt, 2);
3777 /* Check for overflow. */
3778 maybe_warn_overflow (memset_stmt, false, memset_size, NULL, false, true);
3780 /* Bail when there is no statement associated with the destination
3781 (the statement may be null even when SI1->ALLOC is not). */
3782 if (!si1->stmt)
3783 return false;
3785 /* Avoid optimizing if store is at a variable offset from the beginning
3786 of the allocated object. */
3787 if (offrng[0] != 0 || offrng[0] != offrng[1])
3788 return false;
3790 /* Bail when the call writes a non-zero value. */
3791 if (!integer_zerop (gimple_call_arg (memset_stmt, 1)))
3792 return false;
3794 /* Let the caller know the memset call cleared the destination. */
3795 *zero_write = true;
3797 enum built_in_function code1 = DECL_FUNCTION_CODE (callee1);
3798 if (code1 == BUILT_IN_CALLOC)
3799 /* Not touching alloc_stmt */ ;
3800 else if (code1 == BUILT_IN_MALLOC
3801 && operand_equal_p (memset_size, alloc_size, 0))
3803 /* Replace the malloc + memset calls with calloc. */
3804 gimple_stmt_iterator gsi1 = gsi_for_stmt (si1->stmt);
3805 update_gimple_call (&gsi1, builtin_decl_implicit (BUILT_IN_CALLOC), 2,
3806 alloc_size, build_one_cst (size_type_node));
3807 si1->nonzero_chars = build_int_cst (size_type_node, 0);
3808 si1->full_string_p = true;
3809 si1->stmt = gsi_stmt (gsi1);
3811 else
3812 return false;
3813 tree lhs = gimple_call_lhs (memset_stmt);
3814 unlink_stmt_vdef (memset_stmt);
3815 if (lhs)
3817 gimple *assign = gimple_build_assign (lhs, ptr);
3818 gsi_replace (&m_gsi, assign, false);
3820 else
3822 gsi_remove (&m_gsi, true);
3823 release_defs (memset_stmt);
3826 return true;
3829 /* Return first such statement if RES is used in statements testing its
3830 equality to zero, and null otherwise. If EXCLUSIVE is true, return
3831 nonnull if and only RES is used in such expressions exclusively and
3832 in none other. */
3834 static gimple *
3835 use_in_zero_equality (tree res, bool exclusive = true)
3837 gimple *first_use = NULL;
3839 use_operand_p use_p;
3840 imm_use_iterator iter;
3842 FOR_EACH_IMM_USE_FAST (use_p, iter, res)
3844 gimple *use_stmt = USE_STMT (use_p);
3846 if (is_gimple_debug (use_stmt))
3847 continue;
3849 if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
3851 tree_code code = gimple_assign_rhs_code (use_stmt);
3852 if (code == COND_EXPR)
3854 tree cond_expr = gimple_assign_rhs1 (use_stmt);
3855 if ((TREE_CODE (cond_expr) != EQ_EXPR
3856 && (TREE_CODE (cond_expr) != NE_EXPR))
3857 || !integer_zerop (TREE_OPERAND (cond_expr, 1)))
3859 if (exclusive)
3860 return NULL;
3861 continue;
3864 else if (code == EQ_EXPR || code == NE_EXPR)
3866 if (!integer_zerop (gimple_assign_rhs2 (use_stmt)))
3868 if (exclusive)
3869 return NULL;
3870 continue;
3873 else if (exclusive)
3874 return NULL;
3875 else
3876 continue;
3878 else if (gimple_code (use_stmt) == GIMPLE_COND)
3880 tree_code code = gimple_cond_code (use_stmt);
3881 if ((code != EQ_EXPR && code != NE_EXPR)
3882 || !integer_zerop (gimple_cond_rhs (use_stmt)))
3884 if (exclusive)
3885 return NULL;
3886 continue;
3889 else if (exclusive)
3890 return NULL;
3891 else
3892 continue;
3894 if (!first_use)
3895 first_use = use_stmt;
3898 return first_use;
3901 /* Handle a call to memcmp. We try to handle small comparisons by
3902 converting them to load and compare, and replacing the call to memcmp
3903 with a __builtin_memcmp_eq call where possible.
3904 return true when call is transformed, return false otherwise. */
3906 bool
3907 strlen_pass::handle_builtin_memcmp ()
3909 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
3910 tree res = gimple_call_lhs (stmt);
3912 if (!res || !use_in_zero_equality (res))
3913 return false;
3915 tree arg1 = gimple_call_arg (stmt, 0);
3916 tree arg2 = gimple_call_arg (stmt, 1);
3917 tree len = gimple_call_arg (stmt, 2);
3918 unsigned HOST_WIDE_INT leni;
3920 if (tree_fits_uhwi_p (len)
3921 && (leni = tree_to_uhwi (len)) <= GET_MODE_SIZE (word_mode)
3922 && pow2p_hwi (leni))
3924 leni *= CHAR_TYPE_SIZE;
3925 unsigned align1 = get_pointer_alignment (arg1);
3926 unsigned align2 = get_pointer_alignment (arg2);
3927 unsigned align = MIN (align1, align2);
3928 scalar_int_mode mode;
3929 if (int_mode_for_size (leni, 1).exists (&mode)
3930 && (align >= leni || !targetm.slow_unaligned_access (mode, align)))
3932 location_t loc = gimple_location (stmt);
3933 tree type, off;
3934 type = build_nonstandard_integer_type (leni, 1);
3935 gcc_assert (known_eq (GET_MODE_BITSIZE (TYPE_MODE (type)), leni));
3936 tree ptrtype = build_pointer_type_for_mode (char_type_node,
3937 ptr_mode, true);
3938 off = build_int_cst (ptrtype, 0);
3939 arg1 = build2_loc (loc, MEM_REF, type, arg1, off);
3940 arg2 = build2_loc (loc, MEM_REF, type, arg2, off);
3941 tree tem1 = fold_const_aggregate_ref (arg1);
3942 if (tem1)
3943 arg1 = tem1;
3944 tree tem2 = fold_const_aggregate_ref (arg2);
3945 if (tem2)
3946 arg2 = tem2;
3947 res = fold_convert_loc (loc, TREE_TYPE (res),
3948 fold_build2_loc (loc, NE_EXPR,
3949 boolean_type_node,
3950 arg1, arg2));
3951 gimplify_and_update_call_from_tree (&m_gsi, res);
3952 return true;
3956 gimple_call_set_fndecl (stmt, builtin_decl_explicit (BUILT_IN_MEMCMP_EQ));
3957 return true;
3960 /* Given strinfo IDX for ARG, sets LENRNG[] to the range of lengths
3961 of the string(s) referenced by ARG if it can be determined.
3962 If the length cannot be determined, sets *SIZE to the size of
3963 the array the string is stored in, if any. If no such array is
3964 known, sets *SIZE to -1. When the strings are nul-terminated sets
3965 *NULTERM to true, otherwise to false. When nonnull uses RVALS to
3966 determine range information. Returns true on success. */
3968 bool
3969 strlen_pass::get_len_or_size (gimple *stmt, tree arg, int idx,
3970 unsigned HOST_WIDE_INT lenrng[2],
3971 unsigned HOST_WIDE_INT *size, bool *nulterm)
3973 /* Invalidate. */
3974 *size = HOST_WIDE_INT_M1U;
3976 if (idx < 0)
3978 /* IDX is the inverted constant string length. */
3979 lenrng[0] = ~idx;
3980 lenrng[1] = lenrng[0];
3981 *nulterm = true;
3982 return true;
3985 /* Set so that both LEN and ~LEN are invalid lengths, i.e., maximum
3986 possible length + 1. */
3987 lenrng[0] = lenrng[1] = HOST_WIDE_INT_MAX;
3989 if (strinfo *si = idx ? get_strinfo (idx) : NULL)
3991 /* FIXME: Handle all this in_range_strlen_dynamic. */
3992 if (!si->nonzero_chars)
3994 else if (tree_fits_uhwi_p (si->nonzero_chars))
3996 lenrng[0] = tree_to_uhwi (si->nonzero_chars);
3997 *nulterm = si->full_string_p;
3998 /* Set the upper bound only if the string is known to be
3999 nul-terminated, otherwise leave it at maximum + 1. */
4000 if (*nulterm)
4001 lenrng[1] = lenrng[0];
4003 else if (TREE_CODE (si->nonzero_chars) == SSA_NAME)
4005 value_range r;
4006 get_range_query (cfun)->range_of_expr (r, si->nonzero_chars);
4007 if (r.kind () == VR_RANGE)
4009 lenrng[0] = r.lower_bound ().to_uhwi ();
4010 lenrng[1] = r.upper_bound ().to_uhwi ();
4011 *nulterm = si->full_string_p;
4016 if (lenrng[0] != HOST_WIDE_INT_MAX)
4017 return true;
4019 /* Compute the minimum and maximum real or possible lengths. */
4020 c_strlen_data lendata = { };
4021 /* Set MAXBOUND to an arbitrary non-null non-integer node as a request
4022 to have it set to the length of the longest string in a PHI. */
4023 lendata.maxbound = arg;
4024 get_range_strlen_dynamic (arg, stmt, &lendata, ptr_qry.rvals);
4026 unsigned HOST_WIDE_INT maxbound = HOST_WIDE_INT_M1U;
4027 if (tree_fits_uhwi_p (lendata.maxbound)
4028 && !integer_all_onesp (lendata.maxbound))
4029 maxbound = tree_to_uhwi (lendata.maxbound);
4031 if (tree_fits_uhwi_p (lendata.minlen) && tree_fits_uhwi_p (lendata.maxlen))
4033 unsigned HOST_WIDE_INT minlen = tree_to_uhwi (lendata.minlen);
4034 unsigned HOST_WIDE_INT maxlen = tree_to_uhwi (lendata.maxlen);
4036 /* The longest string in this data model. */
4037 const unsigned HOST_WIDE_INT lenmax
4038 = tree_to_uhwi (max_object_size ()) - 2;
4040 if (maxbound == HOST_WIDE_INT_M1U)
4042 lenrng[0] = minlen;
4043 lenrng[1] = maxlen;
4044 *nulterm = minlen == maxlen;
4046 else if (maxlen < lenmax)
4048 *size = maxbound + 1;
4049 *nulterm = false;
4051 else
4052 return false;
4054 return true;
4057 if (maxbound != HOST_WIDE_INT_M1U
4058 && lendata.maxlen
4059 && !integer_all_onesp (lendata.maxlen))
4061 /* Set *SIZE to LENDATA.MAXBOUND which is a conservative estimate
4062 of the longest string based on the sizes of the arrays referenced
4063 by ARG. */
4064 *size = maxbound + 1;
4065 *nulterm = false;
4066 return true;
4069 return false;
4072 /* If IDX1 and IDX2 refer to strings A and B of unequal lengths, return
4073 the result of 0 == strncmp (A, B, BOUND) (which is the same as strcmp
4074 for a sufficiently large BOUND). If the result is based on the length
4075 of one string being greater than the longest string that would fit in
4076 the array pointer to by the argument, set *PLEN and *PSIZE to
4077 the corresponding length (or its complement when the string is known
4078 to be at least as long and need not be nul-terminated) and size.
4079 Otherwise return null. */
4081 tree
4082 strlen_pass::strxcmp_eqz_result (gimple *stmt, tree arg1, int idx1,
4083 tree arg2, int idx2,
4084 unsigned HOST_WIDE_INT bound,
4085 unsigned HOST_WIDE_INT len[2],
4086 unsigned HOST_WIDE_INT *psize)
4088 /* Determine the range the length of each string is in and whether it's
4089 known to be nul-terminated, or the size of the array it's stored in. */
4090 bool nul1, nul2;
4091 unsigned HOST_WIDE_INT siz1, siz2;
4092 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4093 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &siz1, &nul1)
4094 || !get_len_or_size (stmt, arg2, idx2, len2rng, &siz2, &nul2))
4095 return NULL_TREE;
4097 /* BOUND is set to HWI_M1U for strcmp and less to strncmp, and LENiRNG
4098 to HWI_MAX when invalid. Adjust the length of each string to consider
4099 to be no more than BOUND. */
4100 if (len1rng[0] < HOST_WIDE_INT_MAX && len1rng[0] > bound)
4101 len1rng[0] = bound;
4102 if (len1rng[1] < HOST_WIDE_INT_MAX && len1rng[1] > bound)
4103 len1rng[1] = bound;
4104 if (len2rng[0] < HOST_WIDE_INT_MAX && len2rng[0] > bound)
4105 len2rng[0] = bound;
4106 if (len2rng[1] < HOST_WIDE_INT_MAX && len2rng[1] > bound)
4107 len2rng[1] = bound;
4109 /* Two empty strings are equal. */
4110 if (len1rng[1] == 0 && len2rng[1] == 0)
4111 return integer_one_node;
4113 /* The strings are definitely unequal when the lower bound of the length
4114 of one of them is greater than the length of the longest string that
4115 would fit into the other array. */
4116 if (len1rng[0] == HOST_WIDE_INT_MAX
4117 && len2rng[0] != HOST_WIDE_INT_MAX
4118 && ((len2rng[0] < bound && len2rng[0] >= siz1)
4119 || len2rng[0] > siz1))
4121 *psize = siz1;
4122 len[0] = len1rng[0];
4123 /* Set LEN[0] to the lower bound of ARG1's length when it's
4124 nul-terminated or to the complement of its minimum length
4125 otherwise, */
4126 len[1] = nul2 ? len2rng[0] : ~len2rng[0];
4127 return integer_zero_node;
4130 if (len2rng[0] == HOST_WIDE_INT_MAX
4131 && len1rng[0] != HOST_WIDE_INT_MAX
4132 && ((len1rng[0] < bound && len1rng[0] >= siz2)
4133 || len1rng[0] > siz2))
4135 *psize = siz2;
4136 len[0] = nul1 ? len1rng[0] : ~len1rng[0];
4137 len[1] = len2rng[0];
4138 return integer_zero_node;
4141 /* The strings are also definitely unequal when their lengths are unequal
4142 and at least one is nul-terminated. */
4143 if (len1rng[0] != HOST_WIDE_INT_MAX
4144 && len2rng[0] != HOST_WIDE_INT_MAX
4145 && ((len1rng[1] < len2rng[0] && nul1)
4146 || (len2rng[1] < len1rng[0] && nul2)))
4148 if (bound <= len1rng[0] || bound <= len2rng[0])
4149 *psize = bound;
4150 else
4151 *psize = HOST_WIDE_INT_M1U;
4153 len[0] = len1rng[0];
4154 len[1] = len2rng[0];
4155 return integer_zero_node;
4158 /* The string lengths may be equal or unequal. Even when equal and
4159 both strings nul-terminated, without the string contents there's
4160 no way to determine whether they are equal. */
4161 return NULL_TREE;
4164 /* Diagnose pointless calls to strcmp or strncmp STMT with string
4165 arguments of lengths LEN or size SIZ and (for strncmp) BOUND,
4166 whose result is used in equality expressions that evaluate to
4167 a constant due to one argument being longer than the size of
4168 the other. */
4170 static void
4171 maybe_warn_pointless_strcmp (gimple *stmt, HOST_WIDE_INT bound,
4172 unsigned HOST_WIDE_INT len[2],
4173 unsigned HOST_WIDE_INT siz)
4175 tree lhs = gimple_call_lhs (stmt);
4176 gimple *use = use_in_zero_equality (lhs, /* exclusive = */ false);
4177 if (!use)
4178 return;
4180 bool at_least = false;
4182 /* Excessive LEN[i] indicates a lower bound. */
4183 if (len[0] > HOST_WIDE_INT_MAX)
4185 at_least = true;
4186 len[0] = ~len[0];
4189 if (len[1] > HOST_WIDE_INT_MAX)
4191 at_least = true;
4192 len[1] = ~len[1];
4195 unsigned HOST_WIDE_INT minlen = MIN (len[0], len[1]);
4197 /* FIXME: Include a note pointing to the declaration of the smaller
4198 array. */
4199 location_t stmt_loc = gimple_or_expr_nonartificial_location (stmt, lhs);
4201 tree callee = gimple_call_fndecl (stmt);
4202 bool warned = false;
4203 if (siz <= minlen && bound == -1)
4204 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4205 (at_least
4206 ? G_("%qD of a string of length %wu or more and "
4207 "an array of size %wu evaluates to nonzero")
4208 : G_("%qD of a string of length %wu and an array "
4209 "of size %wu evaluates to nonzero")),
4210 callee, minlen, siz);
4211 else if (!at_least && siz <= HOST_WIDE_INT_MAX)
4213 if (len[0] != HOST_WIDE_INT_MAX && len[1] != HOST_WIDE_INT_MAX)
4214 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4215 "%qD of strings of length %wu and %wu "
4216 "and bound of %wu evaluates to nonzero",
4217 callee, len[0], len[1], bound);
4218 else
4219 warned = warning_at (stmt_loc, OPT_Wstring_compare,
4220 "%qD of a string of length %wu, an array "
4221 "of size %wu and bound of %wu evaluates to "
4222 "nonzero",
4223 callee, minlen, siz, bound);
4226 if (!warned)
4227 return;
4229 location_t use_loc = gimple_location (use);
4230 if (LOCATION_LINE (stmt_loc) != LOCATION_LINE (use_loc))
4231 inform (use_loc, "in this expression");
4235 /* Optimize a call to strcmp or strncmp either by folding it to a constant
4236 when possible or by transforming the latter to the former. Warn about
4237 calls where the length of one argument is greater than the size of
4238 the array to which the other argument points if the latter's length
4239 is not known. Return true when the call has been transformed into
4240 another and false otherwise. */
4242 bool
4243 strlen_pass::handle_builtin_string_cmp ()
4245 gcall *stmt = as_a <gcall *> (gsi_stmt (m_gsi));
4246 tree lhs = gimple_call_lhs (stmt);
4248 if (!lhs)
4249 return false;
4251 tree arg1 = gimple_call_arg (stmt, 0);
4252 tree arg2 = gimple_call_arg (stmt, 1);
4253 int idx1 = get_stridx (arg1);
4254 int idx2 = get_stridx (arg2);
4256 /* For strncmp set to the value of the third argument if known. */
4257 HOST_WIDE_INT bound = -1;
4258 tree len = NULL_TREE;
4259 /* Extract the strncmp bound. */
4260 if (gimple_call_num_args (stmt) == 3)
4262 len = gimple_call_arg (stmt, 2);
4263 if (tree_fits_shwi_p (len))
4264 bound = tree_to_shwi (len);
4266 /* If the bound argument is NOT known, do nothing. */
4267 if (bound < 0)
4268 return false;
4271 /* Avoid folding if either argument is not a nul-terminated array.
4272 Defer warning until later. */
4273 if (!check_nul_terminated_array (NULL_TREE, arg1, len)
4274 || !check_nul_terminated_array (NULL_TREE, arg2, len))
4275 return false;
4278 /* Set to the length of one argument (or its complement if it's
4279 the lower bound of a range) and the size of the array storing
4280 the other if the result is based on the former being equal to
4281 or greater than the latter. */
4282 unsigned HOST_WIDE_INT len[2] = { HOST_WIDE_INT_MAX, HOST_WIDE_INT_MAX };
4283 unsigned HOST_WIDE_INT siz = HOST_WIDE_INT_M1U;
4285 /* Try to determine if the two strings are either definitely equal
4286 or definitely unequal and if so, either fold the result to zero
4287 (when equal) or set the range of the result to ~[0, 0] otherwise. */
4288 if (tree eqz = strxcmp_eqz_result (stmt, arg1, idx1, arg2, idx2, bound,
4289 len, &siz))
4291 if (integer_zerop (eqz))
4293 maybe_warn_pointless_strcmp (stmt, bound, len, siz);
4295 /* When the lengths of the first two string arguments are
4296 known to be unequal set the range of the result to non-zero.
4297 This allows the call to be eliminated if its result is only
4298 used in tests for equality to zero. */
4299 wide_int zero = wi::zero (TYPE_PRECISION (TREE_TYPE (lhs)));
4300 set_range_info (lhs, VR_ANTI_RANGE, zero, zero);
4301 return false;
4303 /* When the two strings are definitely equal (such as when they
4304 are both empty) fold the call to the constant result. */
4305 replace_call_with_value (&m_gsi, integer_zero_node);
4306 return true;
4310 /* Return if nothing is known about the strings pointed to by ARG1
4311 and ARG2. */
4312 if (idx1 == 0 && idx2 == 0)
4313 return false;
4315 /* Determine either the length or the size of each of the strings,
4316 whichever is available. */
4317 HOST_WIDE_INT cstlen1 = -1, cstlen2 = -1;
4318 HOST_WIDE_INT arysiz1 = -1, arysiz2 = -1;
4321 unsigned HOST_WIDE_INT len1rng[2], len2rng[2];
4322 unsigned HOST_WIDE_INT arsz1, arsz2;
4323 bool nulterm[2];
4325 if (!get_len_or_size (stmt, arg1, idx1, len1rng, &arsz1, nulterm)
4326 || !get_len_or_size (stmt, arg2, idx2, len2rng, &arsz2, nulterm + 1))
4327 return false;
4329 if (len1rng[0] == len1rng[1] && len1rng[0] < HOST_WIDE_INT_MAX)
4330 cstlen1 = len1rng[0];
4331 else if (arsz1 < HOST_WIDE_INT_M1U)
4332 arysiz1 = arsz1;
4334 if (len2rng[0] == len2rng[1] && len2rng[0] < HOST_WIDE_INT_MAX)
4335 cstlen2 = len2rng[0];
4336 else if (arsz2 < HOST_WIDE_INT_M1U)
4337 arysiz2 = arsz2;
4340 /* Bail if neither the string length nor the size of the array
4341 it is stored in can be determined. */
4342 if ((cstlen1 < 0 && arysiz1 < 0)
4343 || (cstlen2 < 0 && arysiz2 < 0)
4344 || (cstlen1 < 0 && cstlen2 < 0))
4345 return false;
4347 if (cstlen1 >= 0)
4348 ++cstlen1;
4349 if (cstlen2 >= 0)
4350 ++cstlen2;
4352 /* The exact number of characters to compare. */
4353 HOST_WIDE_INT cmpsiz;
4354 if (cstlen1 >= 0 && cstlen2 >= 0)
4355 cmpsiz = MIN (cstlen1, cstlen2);
4356 else if (cstlen1 >= 0)
4357 cmpsiz = cstlen1;
4358 else
4359 cmpsiz = cstlen2;
4360 if (bound >= 0)
4361 cmpsiz = MIN (cmpsiz, bound);
4362 /* The size of the array in which the unknown string is stored. */
4363 HOST_WIDE_INT varsiz = arysiz1 < 0 ? arysiz2 : arysiz1;
4365 if ((varsiz < 0 || cmpsiz < varsiz) && use_in_zero_equality (lhs))
4367 /* If the known length is less than the size of the other array
4368 and the strcmp result is only used to test equality to zero,
4369 transform the call to the equivalent _eq call. */
4370 if (tree fn = builtin_decl_implicit (bound < 0 ? BUILT_IN_STRCMP_EQ
4371 : BUILT_IN_STRNCMP_EQ))
4373 tree n = build_int_cst (size_type_node, cmpsiz);
4374 update_gimple_call (&m_gsi, fn, 3, arg1, arg2, n);
4375 return true;
4379 return false;
4382 /* Handle a POINTER_PLUS_EXPR statement.
4383 For p = "abcd" + 2; compute associated length, or if
4384 p = q + off is pointing to a '\0' character of a string, call
4385 zero_length_string on it. */
4387 void
4388 strlen_pass::handle_pointer_plus ()
4390 gimple *stmt = gsi_stmt (m_gsi);
4391 tree lhs = gimple_assign_lhs (stmt), off;
4392 int idx = get_stridx (gimple_assign_rhs1 (stmt));
4393 strinfo *si, *zsi;
4395 if (idx == 0)
4396 return;
4398 if (idx < 0)
4400 tree off = gimple_assign_rhs2 (stmt);
4401 if (tree_fits_uhwi_p (off)
4402 && tree_to_uhwi (off) <= (unsigned HOST_WIDE_INT) ~idx)
4403 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)]
4404 = ~(~idx - (int) tree_to_uhwi (off));
4405 return;
4408 si = get_strinfo (idx);
4409 if (si == NULL || si->nonzero_chars == NULL_TREE)
4410 return;
4412 off = gimple_assign_rhs2 (stmt);
4413 zsi = NULL;
4414 if (si->full_string_p && operand_equal_p (si->nonzero_chars, off, 0))
4415 zsi = zero_length_string (lhs, si);
4416 else if (TREE_CODE (off) == SSA_NAME)
4418 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
4419 if (gimple_assign_single_p (def_stmt)
4420 && si->full_string_p
4421 && operand_equal_p (si->nonzero_chars,
4422 gimple_assign_rhs1 (def_stmt), 0))
4423 zsi = zero_length_string (lhs, si);
4425 if (zsi != NULL
4426 && si->endptr != NULL_TREE
4427 && si->endptr != lhs
4428 && TREE_CODE (si->endptr) == SSA_NAME)
4430 enum tree_code rhs_code
4431 = useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (si->endptr))
4432 ? SSA_NAME : NOP_EXPR;
4433 gimple_assign_set_rhs_with_ops (&m_gsi, rhs_code, si->endptr);
4434 gcc_assert (gsi_stmt (m_gsi) == stmt);
4435 update_stmt (stmt);
4439 /* Set LENRANGE to the number of nonzero bytes for a store of TYPE and
4440 clear all flags. Return true on success and false on failure. */
4442 static bool
4443 nonzero_bytes_for_type (tree type, unsigned lenrange[3],
4444 bool *nulterm, bool *allnul, bool *allnonnul)
4446 /* Use the size of the type of the expression as the size of the store,
4447 and set the upper bound of the length range to that of the size.
4448 Nothing is known about the contents so clear all flags. */
4449 tree typesize = TYPE_SIZE_UNIT (type);
4450 if (!type)
4451 return false;
4453 if (!tree_fits_uhwi_p (typesize))
4454 return false;
4456 unsigned HOST_WIDE_INT sz = tree_to_uhwi (typesize);
4457 if (sz > UINT_MAX)
4458 return false;
4460 lenrange[2] = sz;
4461 lenrange[1] = lenrange[2] ? lenrange[2] - 1 : 0;
4462 lenrange[0] = 0;
4463 *nulterm = false;
4464 *allnul = false;
4465 *allnonnul = false;
4466 return true;
4469 /* Recursively determine the minimum and maximum number of leading nonzero
4470 bytes in the representation of EXP and set LENRANGE[0] and LENRANGE[1]
4471 to each.
4472 Sets LENRANGE[2] to the total size of the access (which may be less
4473 than LENRANGE[1] when what's being referenced by EXP is a pointer
4474 rather than an array).
4475 Sets *NULTERM if the representation contains a zero byte, sets *ALLNUL
4476 if all the bytes are zero, and *ALLNONNUL is all are nonzero.
4477 OFFSET and NBYTES are the offset into the representation and
4478 the size of the access to it determined from an ADDR_EXPR (i.e.,
4479 a pointer) or MEM_REF or zero for other expressions.
4480 Uses RVALS to determine range information.
4481 Avoids recursing deeper than the limits in SNLIM allow.
4482 Returns true on success and false otherwise. */
4484 bool
4485 strlen_pass::count_nonzero_bytes (tree exp,
4486 unsigned HOST_WIDE_INT offset,
4487 unsigned HOST_WIDE_INT nbytes,
4488 unsigned lenrange[3], bool *nulterm,
4489 bool *allnul, bool *allnonnul,
4490 ssa_name_limit_t &snlim)
4492 if (TREE_CODE (exp) == SSA_NAME)
4494 /* Handle non-zero single-character stores specially. */
4495 tree type = TREE_TYPE (exp);
4496 if (TREE_CODE (type) == INTEGER_TYPE
4497 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
4498 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node)
4499 && tree_expr_nonzero_p (exp))
4501 /* If the character EXP is known to be non-zero (even if its
4502 exact value is not known) recurse once to set the range
4503 for an arbitrary constant. */
4504 exp = build_int_cst (type, 1);
4505 return count_nonzero_bytes (exp, offset, 1, lenrange,
4506 nulterm, allnul, allnonnul, snlim);
4509 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4510 if (gimple_assign_single_p (stmt))
4512 exp = gimple_assign_rhs1 (stmt);
4513 if (!DECL_P (exp)
4514 && TREE_CODE (exp) != CONSTRUCTOR
4515 && TREE_CODE (exp) != MEM_REF)
4516 return false;
4517 /* Handle DECLs, CONSTRUCTOR and MEM_REF below. */
4519 else if (gimple_code (stmt) == GIMPLE_PHI)
4521 /* Avoid processing an SSA_NAME that has already been visited
4522 or if an SSA_NAME limit has been reached. Indicate success
4523 if the former and failure if the latter. */
4524 if (int res = snlim.next_phi (exp))
4525 return res > 0;
4527 /* Determine the minimum and maximum from the PHI arguments. */
4528 unsigned int n = gimple_phi_num_args (stmt);
4529 for (unsigned i = 0; i != n; i++)
4531 tree def = gimple_phi_arg_def (stmt, i);
4532 if (!count_nonzero_bytes (def, offset, nbytes, lenrange, nulterm,
4533 allnul, allnonnul, snlim))
4534 return false;
4537 return true;
4541 if (TREE_CODE (exp) == CONSTRUCTOR)
4543 if (nbytes)
4544 /* If NBYTES has already been determined by an outer MEM_REF
4545 fail rather than overwriting it (this shouldn't happen). */
4546 return false;
4548 tree type = TREE_TYPE (exp);
4549 tree size = TYPE_SIZE_UNIT (type);
4550 if (!size || !tree_fits_uhwi_p (size))
4551 return false;
4553 unsigned HOST_WIDE_INT byte_size = tree_to_uhwi (size);
4554 if (byte_size < offset)
4555 return false;
4557 nbytes = byte_size - offset;
4560 if (TREE_CODE (exp) == MEM_REF)
4562 if (nbytes)
4563 return false;
4565 tree arg = TREE_OPERAND (exp, 0);
4566 tree off = TREE_OPERAND (exp, 1);
4568 if (TREE_CODE (off) != INTEGER_CST || !tree_fits_uhwi_p (off))
4569 return false;
4571 unsigned HOST_WIDE_INT wioff = tree_to_uhwi (off);
4572 if (INT_MAX < wioff)
4573 return false;
4575 offset += wioff;
4576 if (INT_MAX < offset)
4577 return false;
4579 /* The size of the MEM_REF access determines the number of bytes. */
4580 tree type = TREE_TYPE (exp);
4581 tree typesize = TYPE_SIZE_UNIT (type);
4582 if (!typesize || !tree_fits_uhwi_p (typesize))
4583 return false;
4584 nbytes = tree_to_uhwi (typesize);
4585 if (!nbytes)
4586 return false;
4588 /* Handle MEM_REF = SSA_NAME types of assignments. */
4589 return count_nonzero_bytes_addr (arg, offset, nbytes, lenrange, nulterm,
4590 allnul, allnonnul, snlim);
4593 if (VAR_P (exp) || TREE_CODE (exp) == CONST_DECL)
4595 /* If EXP can be folded into a constant use the result. Otherwise
4596 proceed to use EXP to determine a range of the result. */
4597 if (tree fold_exp = ctor_for_folding (exp))
4598 if (fold_exp != error_mark_node)
4599 exp = fold_exp;
4602 const char *prep = NULL;
4603 if (TREE_CODE (exp) == STRING_CST)
4605 unsigned nchars = TREE_STRING_LENGTH (exp);
4606 if (nchars < offset)
4607 return false;
4609 if (!nbytes)
4610 /* If NBYTES hasn't been determined earlier, either from ADDR_EXPR
4611 (i.e., it's the size of a pointer), or from MEM_REF (as the size
4612 of the access), set it here to the size of the string, including
4613 all internal and trailing nuls if the string has any. */
4614 nbytes = nchars - offset;
4615 else if (nchars - offset < nbytes)
4616 return false;
4618 prep = TREE_STRING_POINTER (exp) + offset;
4621 unsigned char buf[256];
4622 if (!prep)
4624 if (CHAR_BIT != 8 || BITS_PER_UNIT != 8)
4625 return false;
4626 /* If the pointer to representation hasn't been set above
4627 for STRING_CST point it at the buffer. */
4628 prep = reinterpret_cast <char *>(buf);
4629 /* Try to extract the representation of the constant object
4630 or expression starting from the offset. */
4631 unsigned repsize = native_encode_expr (exp, buf, sizeof buf, offset);
4632 if (repsize < nbytes)
4634 /* This should only happen when REPSIZE is zero because EXP
4635 doesn't denote an object with a known initializer, except
4636 perhaps when the reference reads past its end. */
4637 lenrange[0] = 0;
4638 prep = NULL;
4640 else if (!nbytes)
4641 nbytes = repsize;
4642 else if (nbytes < repsize)
4643 return false;
4646 if (!nbytes)
4647 return nonzero_bytes_for_type (TREE_TYPE (exp), lenrange,
4648 nulterm, allnul, allnonnul);
4650 /* Compute the number of leading nonzero bytes in the representation
4651 and update the minimum and maximum. */
4652 unsigned n = prep ? strnlen (prep, nbytes) : nbytes;
4654 if (n < lenrange[0])
4655 lenrange[0] = n;
4656 if (lenrange[1] < n)
4657 lenrange[1] = n;
4659 /* Set the size of the representation. */
4660 if (lenrange[2] < nbytes)
4661 lenrange[2] = nbytes;
4663 /* Clear NULTERM if none of the bytes is zero. */
4664 if (n == nbytes)
4665 *nulterm = false;
4667 if (n)
4669 /* When the initial number of non-zero bytes N is non-zero, reset
4670 *ALLNUL; if N is less than that the size of the representation
4671 also clear *ALLNONNUL. */
4672 *allnul = false;
4673 if (n < nbytes)
4674 *allnonnul = false;
4676 else if (*allnul || *allnonnul)
4678 *allnonnul = false;
4680 if (*allnul)
4682 /* When either ALLNUL is set and N is zero, also determine
4683 whether all subsequent bytes after the first one (which
4684 is nul) are zero or nonzero and clear ALLNUL if not. */
4685 for (const char *p = prep; p != prep + nbytes; ++p)
4686 if (*p)
4688 *allnul = false;
4689 break;
4694 return true;
4697 /* Like count_nonzero_bytes, but instead of counting bytes in EXP, count
4698 bytes that are pointed to by EXP, which should be a pointer. */
4700 bool
4701 strlen_pass::count_nonzero_bytes_addr (tree exp,
4702 unsigned HOST_WIDE_INT offset,
4703 unsigned HOST_WIDE_INT nbytes,
4704 unsigned lenrange[3], bool *nulterm,
4705 bool *allnul, bool *allnonnul,
4706 ssa_name_limit_t &snlim)
4708 int idx = get_stridx (exp);
4709 if (idx > 0)
4711 strinfo *si = get_strinfo (idx);
4712 if (!si)
4713 return false;
4715 /* Handle both constant lengths as well non-constant lengths
4716 in some range. */
4717 unsigned HOST_WIDE_INT minlen, maxlen;
4718 if (tree_fits_shwi_p (si->nonzero_chars))
4719 minlen = maxlen = tree_to_shwi (si->nonzero_chars);
4720 else if (si->nonzero_chars
4721 && TREE_CODE (si->nonzero_chars) == SSA_NAME)
4723 value_range vr;
4724 ptr_qry.rvals->range_of_expr (vr, si->nonzero_chars, si->stmt);
4725 if (vr.kind () != VR_RANGE)
4726 return false;
4728 minlen = tree_to_uhwi (vr.min ());
4729 maxlen = tree_to_uhwi (vr.max ());
4731 else
4732 return false;
4734 if (maxlen < offset)
4735 return false;
4737 minlen = minlen < offset ? 0 : minlen - offset;
4738 maxlen -= offset;
4739 if (maxlen + 1 < nbytes)
4740 return false;
4742 if (nbytes <= minlen)
4743 *nulterm = false;
4745 if (nbytes < minlen)
4747 minlen = nbytes;
4748 if (nbytes < maxlen)
4749 maxlen = nbytes;
4752 if (minlen < lenrange[0])
4753 lenrange[0] = minlen;
4754 if (lenrange[1] < maxlen)
4755 lenrange[1] = maxlen;
4757 if (lenrange[2] < nbytes)
4758 lenrange[2] = nbytes;
4760 /* Since only the length of the string are known and not its contents,
4761 clear ALLNUL and ALLNONNUL purely on the basis of the length. */
4762 *allnul = false;
4763 if (minlen < nbytes)
4764 *allnonnul = false;
4766 return true;
4769 if (TREE_CODE (exp) == ADDR_EXPR)
4770 return count_nonzero_bytes (TREE_OPERAND (exp, 0), offset, nbytes,
4771 lenrange, nulterm, allnul, allnonnul, snlim);
4773 if (TREE_CODE (exp) == SSA_NAME)
4775 gimple *stmt = SSA_NAME_DEF_STMT (exp);
4776 if (gimple_code (stmt) == GIMPLE_PHI)
4778 /* Avoid processing an SSA_NAME that has already been visited
4779 or if an SSA_NAME limit has been reached. Indicate success
4780 if the former and failure if the latter. */
4781 if (int res = snlim.next_phi (exp))
4782 return res > 0;
4784 /* Determine the minimum and maximum from the PHI arguments. */
4785 unsigned int n = gimple_phi_num_args (stmt);
4786 for (unsigned i = 0; i != n; i++)
4788 tree def = gimple_phi_arg_def (stmt, i);
4789 if (!count_nonzero_bytes_addr (def, offset, nbytes, lenrange,
4790 nulterm, allnul, allnonnul,
4791 snlim))
4792 return false;
4795 return true;
4799 /* Otherwise we don't know anything. */
4800 lenrange[0] = 0;
4801 if (lenrange[1] < nbytes)
4802 lenrange[1] = nbytes;
4803 if (lenrange[2] < nbytes)
4804 lenrange[2] = nbytes;
4805 *nulterm = false;
4806 *allnul = false;
4807 *allnonnul = false;
4808 return true;
4811 /* Same as above except with an implicit SSA_NAME limit. When EXPR_OR_TYPE
4812 is a type rather than an expression use its size to compute the range.
4813 RVALS is used to determine ranges of dynamically computed string lengths
4814 (the results of strlen). */
4816 bool
4817 strlen_pass::count_nonzero_bytes (tree expr_or_type,
4818 unsigned lenrange[3], bool *nulterm,
4819 bool *allnul, bool *allnonnul)
4821 if (TYPE_P (expr_or_type))
4822 return nonzero_bytes_for_type (expr_or_type, lenrange,
4823 nulterm, allnul, allnonnul);
4825 /* Set to optimistic values so the caller doesn't have to worry about
4826 initializing these and to what. On success, the function will clear
4827 these if it determines their values are different but being recursive
4828 it never sets either to true. On failure, their values are
4829 unspecified. */
4830 *nulterm = true;
4831 *allnul = true;
4832 *allnonnul = true;
4834 ssa_name_limit_t snlim;
4835 tree expr = expr_or_type;
4836 return count_nonzero_bytes (expr, 0, 0, lenrange, nulterm, allnul, allnonnul,
4837 snlim);
4840 /* Handle a single or multibyte store other than by a built-in function,
4841 either via a single character assignment or by multi-byte assignment
4842 either via MEM_REF or via a type other than char (such as in
4843 '*(int*)a = 12345'). Return true to let the caller advance *GSI to
4844 the next statement in the basic block and false otherwise. */
4846 bool
4847 strlen_pass::handle_store (bool *zero_write)
4849 gimple *stmt = gsi_stmt (m_gsi);
4850 /* The LHS and RHS of the store. The RHS is null if STMT is a function
4851 call. STORETYPE is the type of the store (determined from either
4852 the RHS of the assignment statement or the LHS of a function call. */
4853 tree lhs, rhs, storetype;
4854 if (is_gimple_assign (stmt))
4856 lhs = gimple_assign_lhs (stmt);
4857 rhs = gimple_assign_rhs1 (stmt);
4858 storetype = TREE_TYPE (rhs);
4860 else if (is_gimple_call (stmt))
4862 lhs = gimple_call_lhs (stmt);
4863 rhs = NULL_TREE;
4864 storetype = TREE_TYPE (lhs);
4866 else
4867 return true;
4869 tree ssaname = NULL_TREE;
4870 strinfo *si = NULL;
4871 int idx = -1;
4873 range_query *const rvals = ptr_qry.rvals;
4875 /* The offset of the first byte in LHS modified by the store. */
4876 unsigned HOST_WIDE_INT offset = 0;
4878 if (TREE_CODE (lhs) == MEM_REF
4879 && TREE_CODE (TREE_OPERAND (lhs, 0)) == SSA_NAME)
4881 tree mem_offset = TREE_OPERAND (lhs, 1);
4882 if (tree_fits_uhwi_p (mem_offset))
4884 /* Get the strinfo for the base, and use it if it starts with at
4885 least OFFSET nonzero characters. This is trivially true if
4886 OFFSET is zero. */
4887 offset = tree_to_uhwi (mem_offset);
4888 idx = get_stridx (TREE_OPERAND (lhs, 0));
4889 if (idx > 0)
4890 si = get_strinfo (idx);
4891 if (offset == 0)
4892 ssaname = TREE_OPERAND (lhs, 0);
4893 else if (si == NULL || compare_nonzero_chars (si, offset, rvals) < 0)
4895 *zero_write = rhs ? initializer_zerop (rhs) : false;
4897 bool dummy;
4898 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4899 if (count_nonzero_bytes (rhs ? rhs : storetype, lenrange,
4900 &dummy, &dummy, &dummy))
4901 maybe_warn_overflow (stmt, true, lenrange[2]);
4903 return true;
4907 else
4909 idx = get_addr_stridx (lhs, NULL_TREE, &offset, rvals);
4910 if (idx > 0)
4911 si = get_strinfo (idx);
4914 /* Minimum and maximum leading non-zero bytes and the size of the store. */
4915 unsigned lenrange[] = { UINT_MAX, 0, 0 };
4917 /* Set to the minimum length of the string being assigned if known. */
4918 unsigned HOST_WIDE_INT rhs_minlen;
4920 /* STORING_NONZERO_P is true iff not all stored characters are zero.
4921 STORING_ALL_NONZERO_P is true if all stored characters are zero.
4922 STORING_ALL_ZEROS_P is true iff all stored characters are zero.
4923 Both are false when it's impossible to determine which is true. */
4924 bool storing_nonzero_p;
4925 bool storing_all_nonzero_p;
4926 bool storing_all_zeros_p;
4927 /* FULL_STRING_P is set when the stored sequence of characters form
4928 a nul-terminated string. */
4929 bool full_string_p;
4931 const bool ranges_valid
4932 = count_nonzero_bytes (rhs ? rhs : storetype, lenrange, &full_string_p,
4933 &storing_all_zeros_p, &storing_all_nonzero_p);
4935 if (ranges_valid)
4937 rhs_minlen = lenrange[0];
4938 storing_nonzero_p = lenrange[1] > 0;
4939 *zero_write = storing_all_zeros_p;
4941 maybe_warn_overflow (stmt, true, lenrange[2]);
4943 else
4945 rhs_minlen = HOST_WIDE_INT_M1U;
4946 full_string_p = false;
4947 storing_nonzero_p = false;
4948 storing_all_zeros_p = false;
4949 storing_all_nonzero_p = false;
4952 if (si != NULL)
4954 /* The corresponding element is set to 1 if the first and last
4955 element, respectively, of the sequence of characters being
4956 written over the string described by SI ends before
4957 the terminating nul (if it has one), to zero if the nul is
4958 being overwritten but not beyond, or negative otherwise. */
4959 int store_before_nul[2];
4960 if (ranges_valid)
4962 /* The offset of the last stored byte. */
4963 unsigned HOST_WIDE_INT endoff = offset + lenrange[2] - 1;
4964 store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
4965 if (endoff == offset)
4966 store_before_nul[1] = store_before_nul[0];
4967 else
4968 store_before_nul[1] = compare_nonzero_chars (si, endoff, rvals);
4970 else
4972 store_before_nul[0] = compare_nonzero_chars (si, offset, rvals);
4973 store_before_nul[1] = store_before_nul[0];
4974 gcc_assert (offset == 0 || store_before_nul[0] >= 0);
4977 if (storing_all_zeros_p
4978 && store_before_nul[0] == 0
4979 && store_before_nul[1] == 0
4980 && si->full_string_p)
4982 /* When overwriting a '\0' with a '\0', the store can be removed
4983 if we know it has been stored in the current function. */
4984 if (!stmt_could_throw_p (cfun, stmt) && si->writable)
4986 unlink_stmt_vdef (stmt);
4987 release_defs (stmt);
4988 gsi_remove (&m_gsi, true);
4989 return false;
4991 else
4993 si->writable = true;
4994 gsi_next (&m_gsi);
4995 return false;
4999 if (store_before_nul[1] > 0
5000 && storing_nonzero_p
5001 && lenrange[0] == lenrange[1]
5002 && lenrange[0] == lenrange[2]
5003 && TREE_CODE (storetype) == INTEGER_TYPE)
5005 /* Handle a store of one or more non-nul characters that ends
5006 before the terminating nul of the destination and so does
5007 not affect its length
5008 If si->nonzero_chars > OFFSET, we aren't overwriting '\0',
5009 and if we aren't storing '\0', we know that the length of
5010 the string and any other zero terminated string in memory
5011 remains the same. In that case we move to the next gimple
5012 statement and return to signal the caller that it shouldn't
5013 invalidate anything.
5015 This is beneficial for cases like:
5017 char p[20];
5018 void foo (char *q)
5020 strcpy (p, "foobar");
5021 size_t len = strlen (p); // can be folded to 6
5022 size_t len2 = strlen (q); // has to be computed
5023 p[0] = 'X';
5024 size_t len3 = strlen (p); // can be folded to 6
5025 size_t len4 = strlen (q); // can be folded to len2
5026 bar (len, len2, len3, len4);
5027 } */
5028 gsi_next (&m_gsi);
5029 return false;
5032 if (storing_all_zeros_p
5033 || storing_nonzero_p
5034 || (offset != 0 && store_before_nul[1] > 0))
5036 /* When STORING_NONZERO_P, we know that the string will start
5037 with at least OFFSET + 1 nonzero characters. If storing
5038 a single character, set si->NONZERO_CHARS to the result.
5039 If storing multiple characters, try to determine the number
5040 of leading non-zero characters and set si->NONZERO_CHARS to
5041 the result instead.
5043 When STORING_ALL_ZEROS_P, we know that the string is now
5044 OFFSET characters long.
5046 Otherwise, we're storing an unknown value at offset OFFSET,
5047 so need to clip the nonzero_chars to OFFSET.
5048 Use the minimum length of the string (or individual character)
5049 being stored if it's known. Otherwise, STORING_NONZERO_P
5050 guarantees it's at least 1. */
5051 HOST_WIDE_INT len
5052 = storing_nonzero_p && ranges_valid ? lenrange[0] : 1;
5053 location_t loc = gimple_location (stmt);
5054 tree oldlen = si->nonzero_chars;
5055 if (store_before_nul[1] == 0 && si->full_string_p)
5056 /* We're overwriting the nul terminator with a nonzero or
5057 unknown character. If the previous stmt was a memcpy,
5058 its length may be decreased. */
5059 adjust_last_stmt (si, stmt, false);
5060 si = unshare_strinfo (si);
5061 if (storing_nonzero_p)
5063 gcc_assert (len >= 0);
5064 si->nonzero_chars = build_int_cst (size_type_node, offset + len);
5066 else
5067 si->nonzero_chars = build_int_cst (size_type_node, offset);
5069 /* Set FULL_STRING_P only if the length of the strings being
5070 written is the same, and clear it if the strings have
5071 different lengths. In the latter case the length stored
5072 in si->NONZERO_CHARS becomes the lower bound.
5073 FIXME: Handle the upper bound of the length if possible. */
5074 si->full_string_p = full_string_p && lenrange[0] == lenrange[1];
5076 if (storing_all_zeros_p
5077 && ssaname
5078 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5079 si->endptr = ssaname;
5080 else
5081 si->endptr = NULL;
5082 si->next = 0;
5083 si->stmt = NULL;
5084 si->writable = true;
5085 si->dont_invalidate = true;
5086 if (oldlen)
5088 tree adj = fold_build2_loc (loc, MINUS_EXPR, size_type_node,
5089 si->nonzero_chars, oldlen);
5090 adjust_related_strinfos (loc, si, adj);
5092 else
5093 si->prev = 0;
5096 else if (idx == 0 && (storing_all_zeros_p || storing_nonzero_p))
5098 if (ssaname)
5099 idx = new_stridx (ssaname);
5100 else
5101 idx = new_addr_stridx (lhs);
5102 if (idx != 0)
5104 tree ptr = (ssaname ? ssaname : build_fold_addr_expr (lhs));
5106 HOST_WIDE_INT slen;
5107 if (storing_all_zeros_p)
5108 slen = 0;
5109 else if (storing_nonzero_p && ranges_valid)
5111 /* FIXME: Handle the upper bound of the length when
5112 LENRANGE[0] != LENRANGE[1]. */
5113 slen = lenrange[0];
5114 if (lenrange[0] != lenrange[1])
5115 /* Set the minimum length but ignore the maximum
5116 for now. */
5117 full_string_p = false;
5119 else
5120 slen = -1;
5122 tree len = (slen <= 0
5123 ? size_zero_node
5124 : build_int_cst (size_type_node, slen));
5125 si = new_strinfo (ptr, idx, len, slen >= 0 && full_string_p);
5126 set_strinfo (idx, si);
5127 if (storing_all_zeros_p
5128 && ssaname
5129 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (ssaname))
5130 si->endptr = ssaname;
5131 si->dont_invalidate = true;
5132 si->writable = true;
5135 else if (idx == 0
5136 && rhs_minlen < HOST_WIDE_INT_M1U
5137 && ssaname == NULL_TREE
5138 && TREE_CODE (TREE_TYPE (lhs)) == ARRAY_TYPE)
5140 HOST_WIDE_INT a = int_size_in_bytes (TREE_TYPE (lhs));
5141 if (a > 0 && (unsigned HOST_WIDE_INT) a > rhs_minlen)
5143 int idx = new_addr_stridx (lhs);
5144 if (idx != 0)
5146 si = new_strinfo (build_fold_addr_expr (lhs), idx,
5147 build_int_cst (size_type_node, rhs_minlen),
5148 full_string_p);
5149 set_strinfo (idx, si);
5150 si->dont_invalidate = true;
5155 if (si != NULL && offset == 0 && storing_all_zeros_p && lenrange[2] == 1)
5157 /* For single-byte stores only, allow adjust_last_stmt to remove
5158 the statement if the stored '\0' is immediately overwritten. */
5159 laststmt.stmt = stmt;
5160 laststmt.len = build_int_cst (size_type_node, 1);
5161 laststmt.stridx = si->idx;
5163 return true;
5166 /* Try to fold strstr (s, t) eq/ne s to strncmp (s, t, strlen (t)) eq/ne 0. */
5168 static void
5169 fold_strstr_to_strncmp (tree rhs1, tree rhs2, gimple *stmt)
5171 if (TREE_CODE (rhs1) != SSA_NAME
5172 || TREE_CODE (rhs2) != SSA_NAME)
5173 return;
5175 gimple *call_stmt = NULL;
5176 for (int pass = 0; pass < 2; pass++)
5178 gimple *g = SSA_NAME_DEF_STMT (rhs1);
5179 if (gimple_call_builtin_p (g, BUILT_IN_STRSTR)
5180 && has_single_use (rhs1)
5181 && gimple_call_arg (g, 0) == rhs2)
5183 call_stmt = g;
5184 break;
5186 std::swap (rhs1, rhs2);
5189 if (call_stmt)
5191 tree arg0 = gimple_call_arg (call_stmt, 0);
5193 if (arg0 == rhs2)
5195 tree arg1 = gimple_call_arg (call_stmt, 1);
5196 tree arg1_len = NULL_TREE;
5197 int idx = get_stridx (arg1);
5199 if (idx)
5201 if (idx < 0)
5202 arg1_len = build_int_cst (size_type_node, ~idx);
5203 else
5205 strinfo *si = get_strinfo (idx);
5206 if (si)
5207 arg1_len = get_string_length (si);
5211 if (arg1_len != NULL_TREE)
5213 gimple_stmt_iterator gsi = gsi_for_stmt (call_stmt);
5214 tree strncmp_decl = builtin_decl_explicit (BUILT_IN_STRNCMP);
5216 if (!is_gimple_val (arg1_len))
5218 tree arg1_len_tmp = make_ssa_name (TREE_TYPE (arg1_len));
5219 gassign *arg1_stmt = gimple_build_assign (arg1_len_tmp,
5220 arg1_len);
5221 gsi_insert_before (&gsi, arg1_stmt, GSI_SAME_STMT);
5222 arg1_len = arg1_len_tmp;
5225 gcall *strncmp_call = gimple_build_call (strncmp_decl, 3,
5226 arg0, arg1, arg1_len);
5227 tree strncmp_lhs = make_ssa_name (integer_type_node);
5228 gimple_set_vuse (strncmp_call, gimple_vuse (call_stmt));
5229 gimple_call_set_lhs (strncmp_call, strncmp_lhs);
5230 gsi_remove (&gsi, true);
5231 gsi_insert_before (&gsi, strncmp_call, GSI_SAME_STMT);
5232 tree zero = build_zero_cst (TREE_TYPE (strncmp_lhs));
5234 if (is_gimple_assign (stmt))
5236 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
5238 tree cond = gimple_assign_rhs1 (stmt);
5239 TREE_OPERAND (cond, 0) = strncmp_lhs;
5240 TREE_OPERAND (cond, 1) = zero;
5242 else
5244 gimple_assign_set_rhs1 (stmt, strncmp_lhs);
5245 gimple_assign_set_rhs2 (stmt, zero);
5248 else
5250 gcond *cond = as_a<gcond *> (stmt);
5251 gimple_cond_set_lhs (cond, strncmp_lhs);
5252 gimple_cond_set_rhs (cond, zero);
5254 update_stmt (stmt);
5260 /* Return true if TYPE corresponds to a narrow character type. */
5262 static bool
5263 is_char_type (tree type)
5265 return (TREE_CODE (type) == INTEGER_TYPE
5266 && TYPE_MODE (type) == TYPE_MODE (char_type_node)
5267 && TYPE_PRECISION (type) == TYPE_PRECISION (char_type_node));
5270 /* Check the built-in call at GSI for validity and optimize it.
5271 Uses RVALS to determine range information.
5272 Return true to let the caller advance *GSI to the next statement
5273 in the basic block and false otherwise. */
5275 bool
5276 strlen_pass::check_and_optimize_call (bool *zero_write)
5278 gimple *stmt = gsi_stmt (m_gsi);
5280 if (!gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
5282 tree fntype = gimple_call_fntype (stmt);
5283 if (!fntype)
5284 return true;
5286 if (lookup_attribute ("alloc_size", TYPE_ATTRIBUTES (fntype)))
5288 handle_alloc_call (BUILT_IN_NONE);
5289 return true;
5292 if (tree lhs = gimple_call_lhs (stmt))
5293 handle_assign (lhs, zero_write);
5295 /* Proceed to handle user-defined formatting functions. */
5298 /* When not optimizing we must be checking printf calls which
5299 we do even for user-defined functions when they are declared
5300 with attribute format. */
5301 if (!flag_optimize_strlen
5302 || !strlen_optimize
5303 || !valid_builtin_call (stmt))
5304 return !handle_printf_call (&m_gsi, ptr_qry);
5306 tree callee = gimple_call_fndecl (stmt);
5307 switch (DECL_FUNCTION_CODE (callee))
5309 case BUILT_IN_STRLEN:
5310 case BUILT_IN_STRNLEN:
5311 handle_builtin_strlen ();
5312 break;
5313 case BUILT_IN_STRCHR:
5314 handle_builtin_strchr ();
5315 break;
5316 case BUILT_IN_STRCPY:
5317 case BUILT_IN_STRCPY_CHK:
5318 case BUILT_IN_STPCPY:
5319 case BUILT_IN_STPCPY_CHK:
5320 handle_builtin_strcpy (DECL_FUNCTION_CODE (callee));
5321 break;
5323 case BUILT_IN_STRNCAT:
5324 case BUILT_IN_STRNCAT_CHK:
5325 handle_builtin_strncat (DECL_FUNCTION_CODE (callee));
5326 break;
5328 case BUILT_IN_STPNCPY:
5329 case BUILT_IN_STPNCPY_CHK:
5330 case BUILT_IN_STRNCPY:
5331 case BUILT_IN_STRNCPY_CHK:
5332 handle_builtin_stxncpy_strncat (false);
5333 break;
5335 case BUILT_IN_MEMCPY:
5336 case BUILT_IN_MEMCPY_CHK:
5337 case BUILT_IN_MEMPCPY:
5338 case BUILT_IN_MEMPCPY_CHK:
5339 handle_builtin_memcpy (DECL_FUNCTION_CODE (callee));
5340 break;
5341 case BUILT_IN_STRCAT:
5342 case BUILT_IN_STRCAT_CHK:
5343 handle_builtin_strcat (DECL_FUNCTION_CODE (callee));
5344 break;
5345 case BUILT_IN_ALLOCA:
5346 case BUILT_IN_ALLOCA_WITH_ALIGN:
5347 case BUILT_IN_MALLOC:
5348 case BUILT_IN_CALLOC:
5349 handle_alloc_call (DECL_FUNCTION_CODE (callee));
5350 break;
5351 case BUILT_IN_MEMSET:
5352 if (handle_builtin_memset (zero_write))
5353 return false;
5354 break;
5355 case BUILT_IN_MEMCMP:
5356 if (handle_builtin_memcmp ())
5357 return false;
5358 break;
5359 case BUILT_IN_STRCMP:
5360 case BUILT_IN_STRNCMP:
5361 if (handle_builtin_string_cmp ())
5362 return false;
5363 break;
5364 default:
5365 if (handle_printf_call (&m_gsi, ptr_qry))
5366 return false;
5367 break;
5370 return true;
5373 /* Handle an assignment statement at *GSI to a LHS of integral type.
5374 If GSI's basic block needs clean-up of EH, set *CLEANUP_EH to true. */
5376 void
5377 strlen_pass::handle_integral_assign (bool *cleanup_eh)
5379 gimple *stmt = gsi_stmt (m_gsi);
5380 tree lhs = gimple_assign_lhs (stmt);
5381 tree lhs_type = TREE_TYPE (lhs);
5383 enum tree_code code = gimple_assign_rhs_code (stmt);
5384 if (code == COND_EXPR)
5386 tree cond = gimple_assign_rhs1 (stmt);
5387 enum tree_code cond_code = TREE_CODE (cond);
5389 if (cond_code == EQ_EXPR || cond_code == NE_EXPR)
5390 fold_strstr_to_strncmp (TREE_OPERAND (cond, 0),
5391 TREE_OPERAND (cond, 1), stmt);
5393 else if (code == EQ_EXPR || code == NE_EXPR)
5394 fold_strstr_to_strncmp (gimple_assign_rhs1 (stmt),
5395 gimple_assign_rhs2 (stmt), stmt);
5396 else if (gimple_assign_load_p (stmt)
5397 && TREE_CODE (lhs_type) == INTEGER_TYPE
5398 && TYPE_MODE (lhs_type) == TYPE_MODE (char_type_node)
5399 && (TYPE_PRECISION (lhs_type)
5400 == TYPE_PRECISION (char_type_node))
5401 && !gimple_has_volatile_ops (stmt))
5403 tree off = integer_zero_node;
5404 unsigned HOST_WIDE_INT coff = 0;
5405 int idx = 0;
5406 tree rhs1 = gimple_assign_rhs1 (stmt);
5407 if (code == MEM_REF)
5409 idx = get_stridx (TREE_OPERAND (rhs1, 0));
5410 if (idx > 0)
5412 strinfo *si = get_strinfo (idx);
5413 if (si
5414 && si->nonzero_chars
5415 && TREE_CODE (si->nonzero_chars) == INTEGER_CST
5416 && (wi::to_widest (si->nonzero_chars)
5417 >= wi::to_widest (off)))
5418 off = TREE_OPERAND (rhs1, 1);
5419 else
5420 /* This case is not useful. See if get_addr_stridx
5421 returns something usable. */
5422 idx = 0;
5425 if (idx <= 0)
5426 idx = get_addr_stridx (rhs1, NULL_TREE, &coff);
5427 if (idx > 0)
5429 strinfo *si = get_strinfo (idx);
5430 if (si
5431 && si->nonzero_chars
5432 && TREE_CODE (si->nonzero_chars) == INTEGER_CST)
5434 widest_int w1 = wi::to_widest (si->nonzero_chars);
5435 widest_int w2 = wi::to_widest (off) + coff;
5436 if (w1 == w2
5437 && si->full_string_p)
5439 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5441 fprintf (dump_file, "Optimizing: ");
5442 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5445 /* Reading the final '\0' character. */
5446 tree zero = build_int_cst (lhs_type, 0);
5447 gimple_set_vuse (stmt, NULL_TREE);
5448 gimple_assign_set_rhs_from_tree (&m_gsi, zero);
5449 *cleanup_eh
5450 |= maybe_clean_or_replace_eh_stmt (stmt,
5451 gsi_stmt (m_gsi));
5452 stmt = gsi_stmt (m_gsi);
5453 update_stmt (stmt);
5455 if (dump_file && (dump_flags & TDF_DETAILS) != 0)
5457 fprintf (dump_file, "into: ");
5458 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
5461 else if (w1 > w2)
5463 /* Reading a character before the final '\0'
5464 character. Just set the value range to ~[0, 0]
5465 if we don't have anything better. */
5466 value_range r;
5467 if (!get_range_query (cfun)->range_of_expr (r, lhs)
5468 || r.varying_p ())
5470 r.set_nonzero (lhs_type);
5471 set_range_info (lhs, r);
5477 else if (code == MEM_REF && TREE_CODE (lhs) == SSA_NAME)
5479 if (int idx = new_stridx (lhs))
5481 /* Record multi-byte assignments from MEM_REFs. */
5482 bool storing_all_nonzero_p;
5483 bool storing_all_zeros_p;
5484 bool full_string_p;
5485 unsigned lenrange[] = { UINT_MAX, 0, 0 };
5486 tree rhs = gimple_assign_rhs1 (stmt);
5487 const bool ranges_valid
5488 = count_nonzero_bytes (rhs, lenrange, &full_string_p,
5489 &storing_all_zeros_p,
5490 &storing_all_nonzero_p);
5491 if (ranges_valid)
5493 tree length = build_int_cst (sizetype, lenrange[0]);
5494 strinfo *si = new_strinfo (lhs, idx, length, full_string_p);
5495 set_strinfo (idx, si);
5496 si->writable = true;
5497 si->dont_invalidate = true;
5502 if (strlen_to_stridx)
5504 tree rhs1 = gimple_assign_rhs1 (stmt);
5505 if (stridx_strlenloc *ps = strlen_to_stridx->get (rhs1))
5506 strlen_to_stridx->put (lhs, stridx_strlenloc (*ps));
5510 /* Handle assignment statement at *GSI to LHS. Set *ZERO_WRITE if
5511 the assignent stores all zero bytes.. */
5513 bool
5514 strlen_pass::handle_assign (tree lhs, bool *zero_write)
5516 tree type = TREE_TYPE (lhs);
5517 if (TREE_CODE (type) == ARRAY_TYPE)
5518 type = TREE_TYPE (type);
5520 bool is_char_store = is_char_type (type);
5521 if (!is_char_store && TREE_CODE (lhs) == MEM_REF)
5523 /* To consider stores into char objects via integer types other
5524 than char but not those to non-character objects, determine
5525 the type of the destination rather than just the type of
5526 the access. */
5527 for (int i = 0; i != 2; ++i)
5529 tree ref = TREE_OPERAND (lhs, i);
5530 type = TREE_TYPE (ref);
5531 if (TREE_CODE (type) == POINTER_TYPE)
5532 type = TREE_TYPE (type);
5533 if (TREE_CODE (type) == ARRAY_TYPE)
5534 type = TREE_TYPE (type);
5535 if (is_char_type (type))
5537 is_char_store = true;
5538 break;
5543 /* Handle a single or multibyte assignment. */
5544 if (is_char_store && !handle_store (zero_write))
5545 return false;
5547 return true;
5551 /* Attempt to check for validity of the performed access a single statement
5552 at *GSI using string length knowledge, and to optimize it.
5553 If the given basic block needs clean-up of EH, CLEANUP_EH is set to
5554 true. Return true to let the caller advance *GSI to the next statement
5555 in the basic block and false otherwise. */
5557 bool
5558 strlen_pass::check_and_optimize_stmt (bool *cleanup_eh)
5560 gimple *stmt = gsi_stmt (m_gsi);
5562 /* For statements that modify a string, set to true if the write
5563 is only zeros. */
5564 bool zero_write = false;
5566 if (is_gimple_call (stmt))
5568 if (!check_and_optimize_call (&zero_write))
5569 return false;
5571 else if (!flag_optimize_strlen || !strlen_optimize)
5572 return true;
5573 else if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
5575 /* Handle non-clobbering assignment. */
5576 tree lhs = gimple_assign_lhs (stmt);
5577 tree lhs_type = TREE_TYPE (lhs);
5579 if (TREE_CODE (lhs) == SSA_NAME && POINTER_TYPE_P (lhs_type))
5581 if (gimple_assign_single_p (stmt)
5582 || (gimple_assign_cast_p (stmt)
5583 && POINTER_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))))
5585 int idx = get_stridx (gimple_assign_rhs1 (stmt));
5586 ssa_ver_to_stridx[SSA_NAME_VERSION (lhs)] = idx;
5588 else if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
5589 handle_pointer_plus ();
5591 else if (TREE_CODE (lhs) == SSA_NAME && INTEGRAL_TYPE_P (lhs_type))
5592 /* Handle assignment to a character. */
5593 handle_integral_assign (cleanup_eh);
5594 else if (TREE_CODE (lhs) != SSA_NAME && !TREE_SIDE_EFFECTS (lhs))
5595 if (!handle_assign (lhs, &zero_write))
5596 return false;
5598 else if (gcond *cond = dyn_cast<gcond *> (stmt))
5600 enum tree_code code = gimple_cond_code (cond);
5601 if (code == EQ_EXPR || code == NE_EXPR)
5602 fold_strstr_to_strncmp (gimple_cond_lhs (stmt),
5603 gimple_cond_rhs (stmt), stmt);
5606 if (gimple_vdef (stmt))
5607 maybe_invalidate (stmt, zero_write);
5608 return true;
5611 /* Recursively call maybe_invalidate on stmts that might be executed
5612 in between dombb and current bb and that contain a vdef. Stop when
5613 *count stmts are inspected, or if the whole strinfo vector has
5614 been invalidated. */
5616 static void
5617 do_invalidate (basic_block dombb, gimple *phi, bitmap visited, int *count)
5619 unsigned int i, n = gimple_phi_num_args (phi);
5621 for (i = 0; i < n; i++)
5623 tree vuse = gimple_phi_arg_def (phi, i);
5624 gimple *stmt = SSA_NAME_DEF_STMT (vuse);
5625 basic_block bb = gimple_bb (stmt);
5626 if (bb == NULL
5627 || bb == dombb
5628 || !bitmap_set_bit (visited, bb->index)
5629 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5630 continue;
5631 while (1)
5633 if (gimple_code (stmt) == GIMPLE_PHI)
5635 do_invalidate (dombb, stmt, visited, count);
5636 if (*count == 0)
5637 return;
5638 break;
5640 if (--*count == 0)
5641 return;
5642 if (!maybe_invalidate (stmt))
5644 *count = 0;
5645 return;
5647 vuse = gimple_vuse (stmt);
5648 stmt = SSA_NAME_DEF_STMT (vuse);
5649 if (gimple_bb (stmt) != bb)
5651 bb = gimple_bb (stmt);
5652 if (bb == NULL
5653 || bb == dombb
5654 || !bitmap_set_bit (visited, bb->index)
5655 || !dominated_by_p (CDI_DOMINATORS, bb, dombb))
5656 break;
5662 /* Release pointer_query cache. */
5664 strlen_pass::~strlen_pass ()
5666 ptr_qry.flush_cache ();
5669 /* Callback for walk_dominator_tree. Attempt to optimize various
5670 string ops by remembering string lengths pointed by pointer SSA_NAMEs. */
5672 edge
5673 strlen_pass::before_dom_children (basic_block bb)
5675 evrp.enter (bb);
5677 basic_block dombb = get_immediate_dominator (CDI_DOMINATORS, bb);
5679 if (dombb == NULL)
5680 stridx_to_strinfo = NULL;
5681 else
5683 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) dombb->aux);
5684 if (stridx_to_strinfo)
5686 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5687 gsi_next (&gsi))
5689 gphi *phi = gsi.phi ();
5690 if (virtual_operand_p (gimple_phi_result (phi)))
5692 bitmap visited = BITMAP_ALLOC (NULL);
5693 int count_vdef = 100;
5694 do_invalidate (dombb, phi, visited, &count_vdef);
5695 BITMAP_FREE (visited);
5696 if (count_vdef == 0)
5698 /* If there were too many vdefs in between immediate
5699 dominator and current bb, invalidate everything.
5700 If stridx_to_strinfo has been unshared, we need
5701 to free it, otherwise just set it to NULL. */
5702 if (!strinfo_shared ())
5704 unsigned int i;
5705 strinfo *si;
5707 for (i = 1;
5708 vec_safe_iterate (stridx_to_strinfo, i, &si);
5709 ++i)
5711 free_strinfo (si);
5712 (*stridx_to_strinfo)[i] = NULL;
5715 else
5716 stridx_to_strinfo = NULL;
5718 break;
5724 /* If all PHI arguments have the same string index, the PHI result
5725 has it as well. */
5726 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
5727 gsi_next (&gsi))
5729 gphi *phi = gsi.phi ();
5730 tree result = gimple_phi_result (phi);
5731 if (!virtual_operand_p (result) && POINTER_TYPE_P (TREE_TYPE (result)))
5733 int idx = get_stridx (gimple_phi_arg_def (phi, 0));
5734 if (idx != 0)
5736 unsigned int i, n = gimple_phi_num_args (phi);
5737 for (i = 1; i < n; i++)
5738 if (idx != get_stridx (gimple_phi_arg_def (phi, i)))
5739 break;
5740 if (i == n)
5741 ssa_ver_to_stridx[SSA_NAME_VERSION (result)] = idx;
5746 bool cleanup_eh = false;
5748 /* Attempt to optimize individual statements. */
5749 for (m_gsi = gsi_start_bb (bb); !gsi_end_p (m_gsi); )
5751 gimple *stmt = gsi_stmt (m_gsi);
5753 /* First record ranges generated by this statement so they
5754 can be used by printf argument processing. */
5755 evrp.record_ranges_from_stmt (stmt, false);
5757 /* Reset search depth preformance counter. */
5758 ptr_qry.depth = 0;
5760 if (check_and_optimize_stmt (&cleanup_eh))
5761 gsi_next (&m_gsi);
5764 if (cleanup_eh && gimple_purge_dead_eh_edges (bb))
5765 m_cleanup_cfg = true;
5767 bb->aux = stridx_to_strinfo;
5768 if (vec_safe_length (stridx_to_strinfo) && !strinfo_shared ())
5769 (*stridx_to_strinfo)[0] = (strinfo *) bb;
5770 return NULL;
5773 /* Callback for walk_dominator_tree. Free strinfo vector if it is
5774 owned by the current bb, clear bb->aux. */
5776 void
5777 strlen_pass::after_dom_children (basic_block bb)
5779 evrp.leave (bb);
5781 if (bb->aux)
5783 stridx_to_strinfo = ((vec<strinfo *, va_heap, vl_embed> *) bb->aux);
5784 if (vec_safe_length (stridx_to_strinfo)
5785 && (*stridx_to_strinfo)[0] == (strinfo *) bb)
5787 unsigned int i;
5788 strinfo *si;
5790 for (i = 1; vec_safe_iterate (stridx_to_strinfo, i, &si); ++i)
5791 free_strinfo (si);
5792 vec_free (stridx_to_strinfo);
5794 bb->aux = NULL;
5798 namespace {
5800 static unsigned int
5801 printf_strlen_execute (function *fun, bool warn_only)
5803 strlen_optimize = !warn_only;
5805 calculate_dominance_info (CDI_DOMINATORS);
5807 bool use_scev = optimize > 0 && flag_printf_return_value;
5808 if (use_scev)
5810 loop_optimizer_init (LOOPS_NORMAL);
5811 scev_initialize ();
5814 gcc_assert (!strlen_to_stridx);
5815 if (warn_stringop_overflow || warn_stringop_truncation)
5816 strlen_to_stridx = new hash_map<tree, stridx_strlenloc> ();
5818 /* This has to happen after initializing the loop optimizer
5819 and initializing SCEV as they create new SSA_NAMEs. */
5820 ssa_ver_to_stridx.safe_grow_cleared (num_ssa_names, true);
5821 max_stridx = 1;
5823 /* String length optimization is implemented as a walk of the dominator
5824 tree and a forward walk of statements within each block. */
5825 strlen_pass walker (CDI_DOMINATORS);
5826 walker.walk (ENTRY_BLOCK_PTR_FOR_FN (fun));
5828 if (dump_file && (dump_flags & TDF_DETAILS))
5829 walker.ptr_qry.dump (dump_file);
5831 ssa_ver_to_stridx.release ();
5832 strinfo_pool.release ();
5833 if (decl_to_stridxlist_htab)
5835 obstack_free (&stridx_obstack, NULL);
5836 delete decl_to_stridxlist_htab;
5837 decl_to_stridxlist_htab = NULL;
5839 laststmt.stmt = NULL;
5840 laststmt.len = NULL_TREE;
5841 laststmt.stridx = 0;
5843 if (strlen_to_stridx)
5845 strlen_to_stridx->empty ();
5846 delete strlen_to_stridx;
5847 strlen_to_stridx = NULL;
5850 if (use_scev)
5852 scev_finalize ();
5853 loop_optimizer_finalize ();
5856 return walker.m_cleanup_cfg ? TODO_cleanup_cfg : 0;
5859 /* This file defines two passes: one for warnings that runs only when
5860 optimization is disabled, and another that implements optimizations
5861 and also issues warnings. */
5863 const pass_data pass_data_warn_printf =
5865 GIMPLE_PASS, /* type */
5866 "warn-printf", /* name */
5867 OPTGROUP_NONE, /* optinfo_flags */
5868 TV_NONE, /* tv_id */
5869 /* Normally an optimization pass would require PROP_ssa but because
5870 this pass runs early, with no optimization, to do sprintf format
5871 checking, it only requires PROP_cfg. */
5872 PROP_cfg, /* properties_required */
5873 0, /* properties_provided */
5874 0, /* properties_destroyed */
5875 0, /* todo_flags_start */
5876 0, /* todo_flags_finish */
5879 class pass_warn_printf : public gimple_opt_pass
5881 public:
5882 pass_warn_printf (gcc::context *ctxt)
5883 : gimple_opt_pass (pass_data_warn_printf, ctxt)
5886 virtual bool gate (function *);
5887 virtual unsigned int execute (function *fun)
5889 return printf_strlen_execute (fun, true);
5894 /* Return true to run the warning pass only when not optimizing and
5895 iff either -Wformat-overflow or -Wformat-truncation is specified. */
5897 bool
5898 pass_warn_printf::gate (function *)
5900 return !optimize && (warn_format_overflow > 0 || warn_format_trunc > 0);
5903 const pass_data pass_data_strlen =
5905 GIMPLE_PASS, /* type */
5906 "strlen", /* name */
5907 OPTGROUP_NONE, /* optinfo_flags */
5908 TV_TREE_STRLEN, /* tv_id */
5909 PROP_cfg | PROP_ssa, /* properties_required */
5910 0, /* properties_provided */
5911 0, /* properties_destroyed */
5912 0, /* todo_flags_start */
5913 0, /* todo_flags_finish */
5916 class pass_strlen : public gimple_opt_pass
5918 public:
5919 pass_strlen (gcc::context *ctxt)
5920 : gimple_opt_pass (pass_data_strlen, ctxt)
5923 opt_pass * clone () { return new pass_strlen (m_ctxt); }
5925 virtual bool gate (function *);
5926 virtual unsigned int execute (function *fun)
5928 return printf_strlen_execute (fun, false);
5932 /* Return true to run the pass only when the sprintf and/or strlen
5933 optimizations are enabled and -Wformat-overflow or -Wformat-truncation
5934 are specified. */
5936 bool
5937 pass_strlen::gate (function *)
5939 return ((warn_format_overflow > 0
5940 || warn_format_trunc > 0
5941 || warn_restrict > 0
5942 || flag_optimize_strlen > 0
5943 || flag_printf_return_value)
5944 && optimize > 0);
5947 } // anon namespace
5949 gimple_opt_pass *
5950 make_pass_warn_printf (gcc::context *ctxt)
5952 return new pass_warn_printf (ctxt);
5955 gimple_opt_pass *
5956 make_pass_strlen (gcc::context *ctxt)
5958 return new pass_strlen (ctxt);