1 /* Dead store elimination
2 Copyright (C) 2004, 2005, 2006 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to
18 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
19 Boston, MA 02110-1301, USA. */
23 #include "coretypes.h"
29 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-pass.h"
34 #include "tree-dump.h"
40 /* This file implements dead store elimination.
42 A dead store is a store into a memory location which will later be
43 overwritten by another store without any intervening loads. In this
44 case the earlier store can be deleted.
46 In our SSA + virtual operand world we use immediate uses of virtual
47 operands to detect dead stores. If a store's virtual definition
48 is used precisely once by a later store to the same location which
49 post dominates the first store, then the first store is dead.
51 The single use of the store's virtual definition ensures that
52 there are no intervening aliased loads and the requirement that
53 the second load post dominate the first ensures that if the earlier
54 store executes, then the later stores will execute before the function
57 It may help to think of this as first moving the earlier store to
58 the point immediately before the later store. Again, the single
59 use of the virtual definition and the post-dominance relationship
60 ensure that such movement would be safe. Clearly if there are
61 back to back stores, then the second is redundant.
63 Reviewing section 10.7.2 in Morgan's "Building an Optimizing Compiler"
64 may also help in understanding this code since it discusses the
65 relationship between dead store and redundant load elimination. In
66 fact, they are the same transformation applied to different views of
70 /* Given an aggregate, this records the parts of it which have been
72 struct aggregate_vardecl_d
77 /* Some aggregates are too big for us to handle or never get stored
78 to as a whole. If this field is TRUE, we don't care about this
82 /* Number of parts in the whole. */
85 /* A bitmap of parts of the aggregate that have been set. If part N
86 of an aggregate has been stored to, bit N should be on. */
90 struct dse_global_data
92 /* This is the global bitmap for store statements.
94 Each statement has a unique ID. When we encounter a store statement
95 that we want to record, set the bit corresponding to the statement's
96 unique ID in this bitmap. */
99 /* A hash table containing the parts of an aggregate which have been
101 htab_t aggregate_vardecl
;
104 /* We allocate a bitmap-per-block for stores which are encountered
105 during the scan of that block. This allows us to restore the
106 global bitmap of stores when we finish processing a block. */
107 struct dse_block_local_data
112 /* Basic blocks of the potentially dead store and the following
113 store, for memory_address_same. */
114 struct address_walk_data
116 basic_block store1_bb
, store2_bb
;
119 static bool gate_dse (void);
120 static unsigned int tree_ssa_dse (void);
121 static void dse_initialize_block_local_data (struct dom_walk_data
*,
124 static void dse_optimize_stmt (struct dom_walk_data
*,
126 block_stmt_iterator
);
127 static void dse_record_phis (struct dom_walk_data
*, basic_block
);
128 static void dse_finalize_block (struct dom_walk_data
*, basic_block
);
129 static void record_voperand_set (bitmap
, bitmap
*, unsigned int);
130 static void dse_record_partial_aggregate_store (tree
, struct dse_global_data
*);
132 static unsigned max_stmt_uid
; /* Maximal uid of a statement. Uids to phi
133 nodes are assigned using the versions of
134 ssa names they define. */
136 /* Returns uid of statement STMT. */
139 get_stmt_uid (tree stmt
)
141 if (TREE_CODE (stmt
) == PHI_NODE
)
142 return SSA_NAME_VERSION (PHI_RESULT (stmt
)) + max_stmt_uid
;
144 return stmt_ann (stmt
)->uid
;
147 /* Set bit UID in bitmaps GLOBAL and *LOCAL, creating *LOCAL as needed. */
150 record_voperand_set (bitmap global
, bitmap
*local
, unsigned int uid
)
152 /* Lazily allocate the bitmap. Note that we do not get a notification
153 when the block local data structures die, so we allocate the local
154 bitmap backed by the GC system. */
156 *local
= BITMAP_GGC_ALLOC ();
158 /* Set the bit in the local and global bitmaps. */
159 bitmap_set_bit (*local
, uid
);
160 bitmap_set_bit (global
, uid
);
163 /* Initialize block local data structures. */
166 dse_initialize_block_local_data (struct dom_walk_data
*walk_data
,
167 basic_block bb ATTRIBUTE_UNUSED
,
170 struct dse_block_local_data
*bd
171 = VEC_last (void_p
, walk_data
->block_data_stack
);
173 /* If we are given a recycled block local data structure, ensure any
174 bitmap associated with the block is cleared. */
178 bitmap_clear (bd
->stores
);
182 /* Helper function for memory_address_same via walk_tree. Returns
183 non-NULL if it finds an SSA_NAME which is part of the address,
184 such that the definition of the SSA_NAME post-dominates the store
185 we want to delete but not the store that we believe makes it
186 redundant. This indicates that the address may change between
190 memory_ssa_name_same (tree
*expr_p
, int *walk_subtrees ATTRIBUTE_UNUSED
,
193 struct address_walk_data
*walk_data
= data
;
198 if (TREE_CODE (expr
) != SSA_NAME
)
201 /* If we've found a default definition, then there's no problem. Both
202 stores will post-dominate it. And def_bb will be NULL. */
203 if (SSA_NAME_IS_DEFAULT_DEF (expr
))
206 def_stmt
= SSA_NAME_DEF_STMT (expr
);
207 def_bb
= bb_for_stmt (def_stmt
);
209 /* DEF_STMT must dominate both stores. So if it is in the same
210 basic block as one, it does not post-dominate that store. */
211 if (walk_data
->store1_bb
!= def_bb
212 && dominated_by_p (CDI_POST_DOMINATORS
, walk_data
->store1_bb
, def_bb
))
214 if (walk_data
->store2_bb
== def_bb
215 || !dominated_by_p (CDI_POST_DOMINATORS
, walk_data
->store2_bb
,
217 /* Return non-NULL to stop the walk. */
224 /* Return TRUE if the destination memory address in STORE1 and STORE2
225 might be modified after STORE1, before control reaches STORE2. */
228 memory_address_same (tree store1
, tree store2
)
230 struct address_walk_data walk_data
;
232 walk_data
.store1_bb
= bb_for_stmt (store1
);
233 walk_data
.store2_bb
= bb_for_stmt (store2
);
235 return (walk_tree (&GIMPLE_STMT_OPERAND (store1
, 0), memory_ssa_name_same
,
241 /* A helper of dse_optimize_stmt.
242 Given a GIMPLE_MODIFY_STMT in STMT, check that each VDEF has one
243 use, and that one use is another VDEF clobbering the first one.
245 Return TRUE if the above conditions are met, otherwise FALSE. */
248 dse_possible_dead_store_p (tree stmt
,
249 use_operand_p
*first_use_p
,
250 use_operand_p
*use_p
,
252 struct dse_global_data
*dse_gd
,
253 struct dse_block_local_data
*bd
)
259 tree defvar
= NULL_TREE
, temp
;
260 tree prev_defvar
= NULL_TREE
;
261 stmt_ann_t ann
= stmt_ann (stmt
);
263 /* We want to verify that each virtual definition in STMT has
264 precisely one use and that all the virtual definitions are
265 used by the same single statement. When complete, we
266 want USE_STMT to refer to the one statement which uses
267 all of the virtual definitions from STMT. */
269 FOR_EACH_SSA_VDEF_OPERAND (var1
, vv
, stmt
, op_iter
)
271 defvar
= DEF_FROM_PTR (var1
);
273 /* If this virtual def does not have precisely one use, then
274 we will not be able to eliminate STMT. */
275 if (!has_single_use (defvar
))
281 /* Get the one and only immediate use of DEFVAR. */
282 single_imm_use (defvar
, use_p
, &temp
);
283 gcc_assert (*use_p
!= NULL_USE_OPERAND_P
);
284 *first_use_p
= *use_p
;
286 /* If the immediate use of DEF_VAR is not the same as the
287 previously find immediate uses, then we will not be able
288 to eliminate STMT. */
289 if (*use_stmt
== NULL
)
292 prev_defvar
= defvar
;
294 else if (temp
!= *use_stmt
)
296 /* The immediate use and the previously found immediate use
297 must be the same, except... if they're uses of different
298 parts of the whole. */
299 if (TREE_CODE (defvar
) == SSA_NAME
300 && TREE_CODE (SSA_NAME_VAR (defvar
)) == STRUCT_FIELD_TAG
301 && TREE_CODE (prev_defvar
) == SSA_NAME
302 && TREE_CODE (SSA_NAME_VAR (prev_defvar
)) == STRUCT_FIELD_TAG
303 && (SFT_PARENT_VAR (SSA_NAME_VAR (defvar
))
304 == SFT_PARENT_VAR (SSA_NAME_VAR (prev_defvar
))))
316 record_voperand_set (dse_gd
->stores
, &bd
->stores
, ann
->uid
);
317 dse_record_partial_aggregate_store (stmt
, dse_gd
);
321 /* Skip through any PHI nodes we have already seen if the PHI
322 represents the only use of this store.
324 Note this does not handle the case where the store has
325 multiple VDEFs which all reach a set of PHI nodes in the same block. */
326 while (*use_p
!= NULL_USE_OPERAND_P
327 && TREE_CODE (*use_stmt
) == PHI_NODE
328 && bitmap_bit_p (dse_gd
->stores
, get_stmt_uid (*use_stmt
)))
330 /* A PHI node can both define and use the same SSA_NAME if
331 the PHI is at the top of a loop and the PHI_RESULT is
332 a loop invariant and copies have not been fully propagated.
334 The safe thing to do is exit assuming no optimization is
336 if (SSA_NAME_DEF_STMT (PHI_RESULT (*use_stmt
)) == *use_stmt
)
339 /* Skip past this PHI and loop again in case we had a PHI
341 single_imm_use (PHI_RESULT (*use_stmt
), use_p
, use_stmt
);
348 /* Given a DECL, return its AGGREGATE_VARDECL_D entry. If no entry is
349 found and INSERT is TRUE, add a new entry. */
351 static struct aggregate_vardecl_d
*
352 get_aggregate_vardecl (tree decl
, struct dse_global_data
*dse_gd
, bool insert
)
354 struct aggregate_vardecl_d av
, *av_p
;
358 slot
= htab_find_slot (dse_gd
->aggregate_vardecl
, &av
, insert
? INSERT
: NO_INSERT
);
361 /* Not found, and we don't want to insert. */
365 /* Create new entry. */
368 av_p
= XNEW (struct aggregate_vardecl_d
);
371 /* Record how many parts the whole has. */
372 if (TREE_CODE (TREE_TYPE (decl
)) == COMPLEX_TYPE
)
374 else if (TREE_CODE (TREE_TYPE (decl
)) == RECORD_TYPE
)
378 /* Count the number of fields. */
379 fields
= TYPE_FIELDS (TREE_TYPE (decl
));
384 fields
= TREE_CHAIN (fields
);
391 av_p
->parts_set
= sbitmap_alloc (HOST_BITS_PER_LONG
);
392 sbitmap_zero (av_p
->parts_set
);
396 av_p
= (struct aggregate_vardecl_d
*) *slot
;
402 /* If STMT is a partial store into an aggregate, record which part got set. */
405 dse_record_partial_aggregate_store (tree stmt
, struct dse_global_data
*dse_gd
)
409 struct aggregate_vardecl_d
*av_p
;
412 gcc_assert (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
);
414 lhs
= GIMPLE_STMT_OPERAND (stmt
, 0);
415 code
= TREE_CODE (lhs
);
416 if (code
!= IMAGPART_EXPR
417 && code
!= REALPART_EXPR
418 && code
!= COMPONENT_REF
)
420 decl
= TREE_OPERAND (lhs
, 0);
421 /* Early bail on things like nested COMPONENT_REFs. */
422 if (TREE_CODE (decl
) != VAR_DECL
)
424 /* Early bail on unions. */
425 if (code
== COMPONENT_REF
426 && TREE_CODE (TREE_TYPE (TREE_OPERAND (lhs
, 0))) != RECORD_TYPE
)
429 av_p
= get_aggregate_vardecl (decl
, dse_gd
, /*insert=*/false);
430 /* Run away, this isn't an aggregate we care about. */
431 if (!av_p
|| av_p
->ignore
)
444 tree orig_field
, fields
;
445 tree record_type
= TREE_TYPE (TREE_OPERAND (lhs
, 0));
447 /* Get FIELD_DECL. */
448 orig_field
= TREE_OPERAND (lhs
, 1);
450 /* FIXME: Eeech, do this more efficiently. Perhaps
451 calculate bit/byte offsets. */
453 fields
= TYPE_FIELDS (record_type
);
457 if (fields
== orig_field
)
459 fields
= TREE_CHAIN (fields
);
461 gcc_assert (part
>= 0);
468 /* Record which part was set. */
469 SET_BIT (av_p
->parts_set
, part
);
473 /* Return TRUE if all parts in an AGGREGATE_VARDECL have been set. */
476 dse_whole_aggregate_clobbered_p (struct aggregate_vardecl_d
*av_p
)
479 sbitmap_iterator sbi
;
482 /* Count the number of partial stores (bits set). */
483 EXECUTE_IF_SET_IN_SBITMAP (av_p
->parts_set
, 0, i
, sbi
)
485 return ((unsigned) nbits_set
== av_p
->nparts
);
489 /* Return TRUE if STMT is a store into a whole aggregate whose parts we
490 have already seen and recorded. */
493 dse_partial_kill_p (tree stmt
, struct dse_global_data
*dse_gd
)
496 struct aggregate_vardecl_d
*av_p
;
498 /* Make sure this is a store into the whole. */
499 if (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
)
503 decl
= GIMPLE_STMT_OPERAND (stmt
, 0);
504 code
= TREE_CODE (TREE_TYPE (decl
));
506 if (code
!= COMPLEX_TYPE
&& code
!= RECORD_TYPE
)
509 if (TREE_CODE (decl
) != VAR_DECL
)
515 av_p
= get_aggregate_vardecl (decl
, dse_gd
, /*insert=*/false);
516 gcc_assert (av_p
!= NULL
);
518 return dse_whole_aggregate_clobbered_p (av_p
);
522 /* Attempt to eliminate dead stores in the statement referenced by BSI.
524 A dead store is a store into a memory location which will later be
525 overwritten by another store without any intervening loads. In this
526 case the earlier store can be deleted.
528 In our SSA + virtual operand world we use immediate uses of virtual
529 operands to detect dead stores. If a store's virtual definition
530 is used precisely once by a later store to the same location which
531 post dominates the first store, then the first store is dead. */
534 dse_optimize_stmt (struct dom_walk_data
*walk_data
,
535 basic_block bb ATTRIBUTE_UNUSED
,
536 block_stmt_iterator bsi
)
538 struct dse_block_local_data
*bd
539 = VEC_last (void_p
, walk_data
->block_data_stack
);
540 struct dse_global_data
*dse_gd
= walk_data
->global_data
;
541 tree stmt
= bsi_stmt (bsi
);
542 stmt_ann_t ann
= stmt_ann (stmt
);
544 /* If this statement has no virtual defs, then there is nothing
546 if (ZERO_SSA_OPERANDS (stmt
, SSA_OP_VDEF
))
549 /* We know we have virtual definitions. If this is a GIMPLE_MODIFY_STMT
550 that's not also a function call, then record it into our table. */
551 if (get_call_expr_in (stmt
))
554 if (ann
->has_volatile_ops
)
557 if (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
)
559 use_operand_p first_use_p
= NULL_USE_OPERAND_P
;
560 use_operand_p use_p
= NULL
;
563 if (!dse_possible_dead_store_p (stmt
, &first_use_p
, &use_p
, &use_stmt
,
567 /* If this is a partial store into an aggregate, record it. */
568 dse_record_partial_aggregate_store (stmt
, dse_gd
);
570 /* If we have precisely one immediate use at this point, then we may
571 have found redundant store. Make sure that the stores are to
572 the same memory location. This includes checking that any
573 SSA-form variables in the address will have the same values. */
574 if (use_p
!= NULL_USE_OPERAND_P
575 && bitmap_bit_p (dse_gd
->stores
, get_stmt_uid (use_stmt
))
576 && (operand_equal_p (GIMPLE_STMT_OPERAND (stmt
, 0),
577 GIMPLE_STMT_OPERAND (use_stmt
, 0), 0)
578 || dse_partial_kill_p (stmt
, dse_gd
))
579 && memory_address_same (stmt
, use_stmt
))
586 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
588 fprintf (dump_file
, " Deleted dead store '");
589 print_generic_expr (dump_file
, bsi_stmt (bsi
), dump_flags
);
590 fprintf (dump_file
, "'\n");
593 /* Then we need to fix the operand of the consuming stmt. */
594 stmt_lhs
= USE_FROM_PTR (first_use_p
);
595 FOR_EACH_SSA_VDEF_OPERAND (var1
, vv
, stmt
, op_iter
)
599 single_imm_use (DEF_FROM_PTR (var1
), &use_p
, &temp
);
600 gcc_assert (VUSE_VECT_NUM_ELEM (*vv
) == 1);
601 usevar
= VUSE_ELEMENT_VAR (*vv
, 0);
602 SET_USE (use_p
, usevar
);
604 /* Make sure we propagate the ABNORMAL bit setting. */
605 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (stmt_lhs
))
606 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (usevar
) = 1;
609 /* Remove the dead store. */
610 bsi_remove (&bsi
, true);
612 /* And release any SSA_NAMEs set in this statement back to the
617 record_voperand_set (dse_gd
->stores
, &bd
->stores
, ann
->uid
);
621 /* Record that we have seen the PHIs at the start of BB which correspond
622 to virtual operands. */
624 dse_record_phis (struct dom_walk_data
*walk_data
, basic_block bb
)
626 struct dse_block_local_data
*bd
627 = VEC_last (void_p
, walk_data
->block_data_stack
);
628 struct dse_global_data
*dse_gd
= walk_data
->global_data
;
631 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
632 if (!is_gimple_reg (PHI_RESULT (phi
)))
633 record_voperand_set (dse_gd
->stores
,
639 dse_finalize_block (struct dom_walk_data
*walk_data
,
640 basic_block bb ATTRIBUTE_UNUSED
)
642 struct dse_block_local_data
*bd
643 = VEC_last (void_p
, walk_data
->block_data_stack
);
644 struct dse_global_data
*dse_gd
= walk_data
->global_data
;
645 bitmap stores
= dse_gd
->stores
;
649 /* Unwind the stores noted in this basic block. */
651 EXECUTE_IF_SET_IN_BITMAP (bd
->stores
, 0, i
, bi
)
653 bitmap_clear_bit (stores
, i
);
658 /* Hashing and equality functions for AGGREGATE_VARDECL. */
661 aggregate_vardecl_hash (const void *p
)
663 return htab_hash_pointer
664 ((const void *)((const struct aggregate_vardecl_d
*)p
)->decl
);
668 aggregate_vardecl_eq (const void *p1
, const void *p2
)
670 return ((const struct aggregate_vardecl_d
*)p1
)->decl
671 == ((const struct aggregate_vardecl_d
*)p2
)->decl
;
675 /* Free memory allocated by one entry in AGGREGATE_VARDECL. */
678 aggregate_vardecl_free (void *p
)
680 struct aggregate_vardecl_d
*entry
= (struct aggregate_vardecl_d
*) p
;
681 sbitmap_free (entry
->parts_set
);
686 /* Return true if STMT is a store into an entire aggregate. */
689 aggregate_whole_store_p (tree stmt
)
691 if (TREE_CODE (stmt
) == GIMPLE_MODIFY_STMT
)
693 tree lhs
= GIMPLE_STMT_OPERAND (stmt
, 0);
694 enum tree_code code
= TREE_CODE (TREE_TYPE (lhs
));
696 if (code
== COMPLEX_TYPE
|| code
== RECORD_TYPE
)
703 /* Main entry point. */
708 struct dom_walk_data walk_data
;
709 struct dse_global_data dse_gd
;
712 dse_gd
.aggregate_vardecl
=
713 htab_create (37, aggregate_vardecl_hash
,
714 aggregate_vardecl_eq
, aggregate_vardecl_free
);
719 block_stmt_iterator bsi
;
721 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
723 tree stmt
= bsi_stmt (bsi
);
725 /* Record aggregates which have been stored into as a whole. */
726 if (aggregate_whole_store_p (stmt
))
728 tree lhs
= GIMPLE_STMT_OPERAND (stmt
, 0);
729 if (TREE_CODE (lhs
) == VAR_DECL
)
731 struct aggregate_vardecl_d
*av_p
;
733 av_p
= get_aggregate_vardecl (lhs
, &dse_gd
, /*insert=*/true);
734 av_p
->ignore
= false;
736 /* Ignore aggregates with too many parts. */
737 if (av_p
->nparts
> HOST_BITS_PER_LONG
)
742 /* Create a UID for each statement in the function.
743 Ordering of the UIDs is not important for this pass. */
744 stmt_ann (stmt
)->uid
= max_stmt_uid
++;
748 /* We might consider making this a property of each pass so that it
749 can be [re]computed on an as-needed basis. Particularly since
750 this pass could be seen as an extension of DCE which needs post
752 calculate_dominance_info (CDI_POST_DOMINATORS
);
754 /* Dead store elimination is fundamentally a walk of the post-dominator
755 tree and a backwards walk of statements within each block. */
756 walk_data
.walk_stmts_backward
= true;
757 walk_data
.dom_direction
= CDI_POST_DOMINATORS
;
758 walk_data
.initialize_block_local_data
= dse_initialize_block_local_data
;
759 walk_data
.before_dom_children_before_stmts
= NULL
;
760 walk_data
.before_dom_children_walk_stmts
= dse_optimize_stmt
;
761 walk_data
.before_dom_children_after_stmts
= dse_record_phis
;
762 walk_data
.after_dom_children_before_stmts
= NULL
;
763 walk_data
.after_dom_children_walk_stmts
= NULL
;
764 walk_data
.after_dom_children_after_stmts
= dse_finalize_block
;
765 walk_data
.interesting_blocks
= NULL
;
767 walk_data
.block_local_data_size
= sizeof (struct dse_block_local_data
);
769 /* This is the main hash table for the dead store elimination pass. */
770 dse_gd
.stores
= BITMAP_ALLOC (NULL
);
772 walk_data
.global_data
= &dse_gd
;
774 /* Initialize the dominator walker. */
775 init_walk_dominator_tree (&walk_data
);
777 /* Recursively walk the dominator tree. */
778 walk_dominator_tree (&walk_data
, EXIT_BLOCK_PTR
);
780 /* Finalize the dominator walker. */
781 fini_walk_dominator_tree (&walk_data
);
783 /* Release unneeded data. */
784 BITMAP_FREE (dse_gd
.stores
);
785 htab_delete (dse_gd
.aggregate_vardecl
);
787 /* For now, just wipe the post-dominator information. */
788 free_dominance_info (CDI_POST_DOMINATORS
);
795 return flag_tree_dse
!= 0;
798 struct tree_opt_pass pass_dse
= {
801 tree_ssa_dse
, /* execute */
804 0, /* static_pass_number */
805 TV_TREE_DSE
, /* tv_id */
808 | PROP_alias
, /* properties_required */
809 0, /* properties_provided */
810 0, /* properties_destroyed */
811 0, /* todo_flags_start */
814 | TODO_verify_ssa
, /* todo_flags_finish */