1 /* Tail merging for gimple.
2 Copyright (C) 2011, 2012 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
33 const charD.1 * restrict outputFileName.0D.3914;
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
61 # SUCC: 7 [100.0%] (fallthru,exec)
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
83 # SUCC: 7 [100.0%] (fallthru,exec)
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
101 # VUSE <.MEMD.3923_11>
103 # SUCC: EXIT [100.0%]
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
159 1. The pass first determines all groups of blocks with the same successor
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
181 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
185 #include "coretypes.h"
189 #include "basic-block.h"
192 #include "function.h"
193 #include "tree-flow.h"
196 #include "tree-ssa-alias.h"
198 #include "tree-pretty-print.h"
200 #include "gimple-pretty-print.h"
201 #include "tree-ssa-sccvn.h"
202 #include "tree-dump.h"
204 /* Describes a group of bbs with the same successors. The successor bbs are
205 cached in succs, and the successor edge flags are cached in succ_flags.
206 If a bb has the EDGE_TRUE/VALSE_VALUE flags swapped compared to succ_flags,
207 it's marked in inverse.
208 Additionally, the hash value for the struct is cached in hashval, and
209 in_worklist indicates whether it's currently part of worklist. */
213 /* The bbs that have the same successor bbs. */
215 /* The successor bbs. */
217 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
220 /* The edge flags for each of the successor bbs. */
221 VEC (int, heap
) *succ_flags
;
222 /* Indicates whether the struct is currently in the worklist. */
224 /* The hash value of the struct. */
227 typedef struct same_succ_def
*same_succ
;
228 typedef const struct same_succ_def
*const_same_succ
;
230 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
232 struct bb_cluster_def
234 /* The bbs in the cluster. */
236 /* The preds of the bbs in the cluster. */
238 /* Index in all_clusters vector. */
240 /* The bb to replace the cluster with. */
243 typedef struct bb_cluster_def
*bb_cluster
;
244 typedef const struct bb_cluster_def
*const_bb_cluster
;
250 /* The number of non-debug statements in the bb. */
252 /* The same_succ that this bb is a member of. */
253 same_succ bb_same_succ
;
254 /* The cluster that this bb is a member of. */
256 /* The vop state at the exit of a bb. This is shortlived data, used to
257 communicate data between update_block_by and update_vuses. */
259 /* The bb that either contains or is dominated by the dependencies of the
264 /* Macros to access the fields of struct aux_bb_info. */
266 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
267 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
268 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
269 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
270 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
272 /* VAL1 and VAL2 are either:
273 - uses in BB1 and BB2, or
274 - phi alternatives for BB1 and BB2.
275 Return true if the uses have the same gvn value. */
278 gvn_uses_equal (tree val1
, tree val2
)
280 gcc_checking_assert (val1
!= NULL_TREE
&& val2
!= NULL_TREE
);
285 if (vn_valueize (val1
) != vn_valueize (val2
))
288 return ((TREE_CODE (val1
) == SSA_NAME
|| CONSTANT_CLASS_P (val1
))
289 && (TREE_CODE (val2
) == SSA_NAME
|| CONSTANT_CLASS_P (val2
)));
292 /* Prints E to FILE. */
295 same_succ_print (FILE *file
, const same_succ e
)
298 bitmap_print (file
, e
->bbs
, "bbs:", "\n");
299 bitmap_print (file
, e
->succs
, "succs:", "\n");
300 bitmap_print (file
, e
->inverse
, "inverse:", "\n");
301 fprintf (file
, "flags:");
302 for (i
= 0; i
< VEC_length (int, e
->succ_flags
); ++i
)
303 fprintf (file
, " %x", VEC_index (int, e
->succ_flags
, i
));
304 fprintf (file
, "\n");
307 /* Prints same_succ VE to VFILE. */
310 same_succ_print_traverse (void **ve
, void *vfile
)
312 const same_succ e
= *((const same_succ
*)ve
);
313 FILE *file
= ((FILE*)vfile
);
314 same_succ_print (file
, e
);
318 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
321 update_dep_bb (basic_block use_bb
, tree val
)
326 if (TREE_CODE (val
) != SSA_NAME
)
329 /* Skip use of global def. */
330 if (SSA_NAME_IS_DEFAULT_DEF (val
))
333 /* Skip use of local def. */
334 dep_bb
= gimple_bb (SSA_NAME_DEF_STMT (val
));
335 if (dep_bb
== use_bb
)
338 if (BB_DEP_BB (use_bb
) == NULL
339 || dominated_by_p (CDI_DOMINATORS
, dep_bb
, BB_DEP_BB (use_bb
)))
340 BB_DEP_BB (use_bb
) = dep_bb
;
343 /* Update BB_DEP_BB, given the dependencies in STMT. */
346 stmt_update_dep_bb (gimple stmt
)
351 FOR_EACH_SSA_USE_OPERAND (use
, stmt
, iter
, SSA_OP_USE
)
352 update_dep_bb (gimple_bb (stmt
), USE_FROM_PTR (use
));
355 /* Returns whether VAL is used in the same bb as in which it is defined, or
356 in the phi of a successor bb. */
361 gimple stmt
, def_stmt
;
362 basic_block bb
, def_bb
;
363 imm_use_iterator iter
;
366 if (TREE_CODE (val
) != SSA_NAME
)
368 def_stmt
= SSA_NAME_DEF_STMT (val
);
369 def_bb
= gimple_bb (def_stmt
);
372 FOR_EACH_IMM_USE_STMT (stmt
, iter
, val
)
374 if (is_gimple_debug (stmt
))
376 bb
= gimple_bb (stmt
);
379 if (gimple_code (stmt
) == GIMPLE_PHI
380 && find_edge (def_bb
, bb
))
383 BREAK_FROM_IMM_USE_STMT (iter
);
388 /* Calculates hash value for same_succ VE. */
391 same_succ_hash (const void *ve
)
393 const_same_succ e
= (const_same_succ
)ve
;
394 hashval_t hashval
= bitmap_hash (e
->succs
);
397 unsigned int first
= bitmap_first_set_bit (e
->bbs
);
398 basic_block bb
= BASIC_BLOCK (first
);
400 gimple_stmt_iterator gsi
;
406 for (gsi
= gsi_start_nondebug_bb (bb
);
407 !gsi_end_p (gsi
); gsi_next_nondebug (&gsi
))
409 stmt
= gsi_stmt (gsi
);
410 stmt_update_dep_bb (stmt
);
411 if (is_gimple_assign (stmt
) && local_def (gimple_get_lhs (stmt
))
412 && !gimple_has_side_effects (stmt
))
416 hashval
= iterative_hash_hashval_t (gimple_code (stmt
), hashval
);
417 if (is_gimple_assign (stmt
))
418 hashval
= iterative_hash_hashval_t (gimple_assign_rhs_code (stmt
),
420 if (!is_gimple_call (stmt
))
422 if (gimple_call_internal_p (stmt
))
423 hashval
= iterative_hash_hashval_t
424 ((hashval_t
) gimple_call_internal_fn (stmt
), hashval
);
426 hashval
= iterative_hash_expr (gimple_call_fn (stmt
), hashval
);
427 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
429 arg
= gimple_call_arg (stmt
, i
);
430 arg
= vn_valueize (arg
);
431 hashval
= iterative_hash_expr (arg
, hashval
);
435 hashval
= iterative_hash_hashval_t (size
, hashval
);
438 for (i
= 0; i
< VEC_length (int, e
->succ_flags
); ++i
)
440 flags
= VEC_index (int, e
->succ_flags
, i
);
441 flags
= flags
& ~(EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
);
442 hashval
= iterative_hash_hashval_t (flags
, hashval
);
445 EXECUTE_IF_SET_IN_BITMAP (e
->succs
, 0, s
, bs
)
447 int n
= find_edge (bb
, BASIC_BLOCK (s
))->dest_idx
;
448 for (gsi
= gsi_start_phis (BASIC_BLOCK (s
)); !gsi_end_p (gsi
);
451 gimple phi
= gsi_stmt (gsi
);
452 tree lhs
= gimple_phi_result (phi
);
453 tree val
= gimple_phi_arg_def (phi
, n
);
455 if (!is_gimple_reg (lhs
))
457 update_dep_bb (bb
, val
);
464 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
465 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
466 the other edge flags. */
469 inverse_flags (const_same_succ e1
, const_same_succ e2
)
471 int f1a
, f1b
, f2a
, f2b
;
472 int mask
= ~(EDGE_TRUE_VALUE
| EDGE_FALSE_VALUE
);
474 if (VEC_length (int, e1
->succ_flags
) != 2)
477 f1a
= VEC_index (int, e1
->succ_flags
, 0);
478 f1b
= VEC_index (int, e1
->succ_flags
, 1);
479 f2a
= VEC_index (int, e2
->succ_flags
, 0);
480 f2b
= VEC_index (int, e2
->succ_flags
, 1);
482 if (f1a
== f2a
&& f1b
== f2b
)
485 return (f1a
& mask
) == (f2a
& mask
) && (f1b
& mask
) == (f2b
& mask
);
488 /* Compares SAME_SUCCs VE1 and VE2. */
491 same_succ_equal (const void *ve1
, const void *ve2
)
493 const_same_succ e1
= (const_same_succ
)ve1
;
494 const_same_succ e2
= (const_same_succ
)ve2
;
495 unsigned int i
, first1
, first2
;
496 gimple_stmt_iterator gsi1
, gsi2
;
498 basic_block bb1
, bb2
;
500 if (e1
->hashval
!= e2
->hashval
)
503 if (VEC_length (int, e1
->succ_flags
) != VEC_length (int, e2
->succ_flags
))
506 if (!bitmap_equal_p (e1
->succs
, e2
->succs
))
509 if (!inverse_flags (e1
, e2
))
511 for (i
= 0; i
< VEC_length (int, e1
->succ_flags
); ++i
)
512 if (VEC_index (int, e1
->succ_flags
, i
)
513 != VEC_index (int, e1
->succ_flags
, i
))
517 first1
= bitmap_first_set_bit (e1
->bbs
);
518 first2
= bitmap_first_set_bit (e2
->bbs
);
520 bb1
= BASIC_BLOCK (first1
);
521 bb2
= BASIC_BLOCK (first2
);
523 if (BB_SIZE (bb1
) != BB_SIZE (bb2
))
526 gsi1
= gsi_start_nondebug_bb (bb1
);
527 gsi2
= gsi_start_nondebug_bb (bb2
);
528 while (!(gsi_end_p (gsi1
) || gsi_end_p (gsi2
)))
530 s1
= gsi_stmt (gsi1
);
531 s2
= gsi_stmt (gsi2
);
532 if (gimple_code (s1
) != gimple_code (s2
))
534 if (is_gimple_call (s1
) && !gimple_call_same_target_p (s1
, s2
))
536 gsi_next_nondebug (&gsi1
);
537 gsi_next_nondebug (&gsi2
);
543 /* Alloc and init a new SAME_SUCC. */
546 same_succ_alloc (void)
548 same_succ same
= XNEW (struct same_succ_def
);
550 same
->bbs
= BITMAP_ALLOC (NULL
);
551 same
->succs
= BITMAP_ALLOC (NULL
);
552 same
->inverse
= BITMAP_ALLOC (NULL
);
553 same
->succ_flags
= VEC_alloc (int, heap
, 10);
554 same
->in_worklist
= false;
559 /* Delete same_succ VE. */
562 same_succ_delete (void *ve
)
564 same_succ e
= (same_succ
)ve
;
566 BITMAP_FREE (e
->bbs
);
567 BITMAP_FREE (e
->succs
);
568 BITMAP_FREE (e
->inverse
);
569 VEC_free (int, heap
, e
->succ_flags
);
574 /* Reset same_succ SAME. */
577 same_succ_reset (same_succ same
)
579 bitmap_clear (same
->bbs
);
580 bitmap_clear (same
->succs
);
581 bitmap_clear (same
->inverse
);
582 VEC_truncate (int, same
->succ_flags
, 0);
585 /* Hash table with all same_succ entries. */
587 static htab_t same_succ_htab
;
589 /* Array that is used to store the edge flags for a successor. */
591 static int *same_succ_edge_flags
;
593 /* Bitmap that is used to mark bbs that are recently deleted. */
595 static bitmap deleted_bbs
;
597 /* Bitmap that is used to mark predecessors of bbs that are
600 static bitmap deleted_bb_preds
;
602 /* Prints same_succ_htab to stderr. */
604 extern void debug_same_succ (void);
606 debug_same_succ ( void)
608 htab_traverse (same_succ_htab
, same_succ_print_traverse
, stderr
);
611 DEF_VEC_P (same_succ
);
612 DEF_VEC_ALLOC_P (same_succ
, heap
);
614 /* Vector of bbs to process. */
616 static VEC (same_succ
, heap
) *worklist
;
618 /* Prints worklist to FILE. */
621 print_worklist (FILE *file
)
624 for (i
= 0; i
< VEC_length (same_succ
, worklist
); ++i
)
625 same_succ_print (file
, VEC_index (same_succ
, worklist
, i
));
628 /* Adds SAME to worklist. */
631 add_to_worklist (same_succ same
)
633 if (same
->in_worklist
)
636 if (bitmap_count_bits (same
->bbs
) < 2)
639 same
->in_worklist
= true;
640 VEC_safe_push (same_succ
, heap
, worklist
, same
);
643 /* Add BB to same_succ_htab. */
646 find_same_succ_bb (basic_block bb
, same_succ
*same_p
)
650 same_succ same
= *same_p
;
657 bitmap_set_bit (same
->bbs
, bb
->index
);
658 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
660 int index
= e
->dest
->index
;
661 bitmap_set_bit (same
->succs
, index
);
662 same_succ_edge_flags
[index
] = e
->flags
;
664 EXECUTE_IF_SET_IN_BITMAP (same
->succs
, 0, j
, bj
)
665 VEC_safe_push (int, heap
, same
->succ_flags
, same_succ_edge_flags
[j
]);
667 same
->hashval
= same_succ_hash (same
);
669 slot
= (same_succ
*) htab_find_slot_with_hash (same_succ_htab
, same
,
670 same
->hashval
, INSERT
);
674 BB_SAME_SUCC (bb
) = same
;
675 add_to_worklist (same
);
680 bitmap_set_bit ((*slot
)->bbs
, bb
->index
);
681 BB_SAME_SUCC (bb
) = *slot
;
682 add_to_worklist (*slot
);
683 if (inverse_flags (same
, *slot
))
684 bitmap_set_bit ((*slot
)->inverse
, bb
->index
);
685 same_succ_reset (same
);
689 /* Find bbs with same successors. */
692 find_same_succ (void)
694 same_succ same
= same_succ_alloc ();
699 find_same_succ_bb (bb
, &same
);
701 same
= same_succ_alloc ();
704 same_succ_delete (same
);
707 /* Initializes worklist administration. */
712 alloc_aux_for_blocks (sizeof (struct aux_bb_info
));
714 = htab_create (n_basic_blocks
, same_succ_hash
, same_succ_equal
,
716 same_succ_edge_flags
= XCNEWVEC (int, last_basic_block
);
717 deleted_bbs
= BITMAP_ALLOC (NULL
);
718 deleted_bb_preds
= BITMAP_ALLOC (NULL
);
719 worklist
= VEC_alloc (same_succ
, heap
, n_basic_blocks
);
722 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
724 fprintf (dump_file
, "initial worklist:\n");
725 print_worklist (dump_file
);
729 /* Deletes worklist administration. */
732 delete_worklist (void)
734 free_aux_for_blocks ();
735 htab_delete (same_succ_htab
);
736 same_succ_htab
= NULL
;
737 XDELETEVEC (same_succ_edge_flags
);
738 same_succ_edge_flags
= NULL
;
739 BITMAP_FREE (deleted_bbs
);
740 BITMAP_FREE (deleted_bb_preds
);
741 VEC_free (same_succ
, heap
, worklist
);
744 /* Mark BB as deleted, and mark its predecessors. */
747 mark_basic_block_deleted (basic_block bb
)
752 bitmap_set_bit (deleted_bbs
, bb
->index
);
754 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
755 bitmap_set_bit (deleted_bb_preds
, e
->src
->index
);
758 /* Removes BB from its corresponding same_succ. */
761 same_succ_flush_bb (basic_block bb
)
763 same_succ same
= BB_SAME_SUCC (bb
);
764 BB_SAME_SUCC (bb
) = NULL
;
765 if (bitmap_single_bit_set_p (same
->bbs
))
766 htab_remove_elt_with_hash (same_succ_htab
, same
, same
->hashval
);
768 bitmap_clear_bit (same
->bbs
, bb
->index
);
771 /* Removes all bbs in BBS from their corresponding same_succ. */
774 same_succ_flush_bbs (bitmap bbs
)
779 EXECUTE_IF_SET_IN_BITMAP (bbs
, 0, i
, bi
)
780 same_succ_flush_bb (BASIC_BLOCK (i
));
783 /* Release the last vdef in BB, either normal or phi result. */
786 release_last_vdef (basic_block bb
)
788 gimple_stmt_iterator i
;
790 for (i
= gsi_last_bb (bb
); !gsi_end_p (i
); gsi_prev_nondebug (&i
))
792 gimple stmt
= gsi_stmt (i
);
793 if (gimple_vdef (stmt
) == NULL_TREE
)
796 mark_virtual_operand_for_renaming (gimple_vdef (stmt
));
800 for (i
= gsi_start_phis (bb
); !gsi_end_p (i
); gsi_next (&i
))
802 gimple phi
= gsi_stmt (i
);
803 tree res
= gimple_phi_result (phi
);
805 if (is_gimple_reg (res
))
808 mark_virtual_phi_result_for_renaming (phi
);
814 /* For deleted_bb_preds, find bbs with same successors. */
817 update_worklist (void)
824 bitmap_and_compl_into (deleted_bb_preds
, deleted_bbs
);
825 bitmap_clear (deleted_bbs
);
827 bitmap_clear_bit (deleted_bb_preds
, ENTRY_BLOCK
);
828 same_succ_flush_bbs (deleted_bb_preds
);
830 same
= same_succ_alloc ();
831 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds
, 0, i
, bi
)
833 bb
= BASIC_BLOCK (i
);
834 gcc_assert (bb
!= NULL
);
835 find_same_succ_bb (bb
, &same
);
837 same
= same_succ_alloc ();
839 same_succ_delete (same
);
840 bitmap_clear (deleted_bb_preds
);
843 /* Prints cluster C to FILE. */
846 print_cluster (FILE *file
, bb_cluster c
)
850 bitmap_print (file
, c
->bbs
, "bbs:", "\n");
851 bitmap_print (file
, c
->preds
, "preds:", "\n");
854 /* Prints cluster C to stderr. */
856 extern void debug_cluster (bb_cluster
);
858 debug_cluster (bb_cluster c
)
860 print_cluster (stderr
, c
);
863 /* Update C->rep_bb, given that BB is added to the cluster. */
866 update_rep_bb (bb_cluster c
, basic_block bb
)
869 if (c
->rep_bb
== NULL
)
875 /* Current needs no deps, keep it. */
876 if (BB_DEP_BB (c
->rep_bb
) == NULL
)
879 /* Bb needs no deps, change rep_bb. */
880 if (BB_DEP_BB (bb
) == NULL
)
886 /* Bb needs last deps earlier than current, change rep_bb. A potential
887 problem with this, is that the first deps might also be earlier, which
888 would mean we prefer longer lifetimes for the deps. To be able to check
889 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
890 BB_DEP_BB, which is really BB_LAST_DEP_BB.
891 The benefit of choosing the bb with last deps earlier, is that it can
892 potentially be used as replacement for more bbs. */
893 if (dominated_by_p (CDI_DOMINATORS
, BB_DEP_BB (c
->rep_bb
), BB_DEP_BB (bb
)))
897 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
900 add_bb_to_cluster (bb_cluster c
, basic_block bb
)
905 bitmap_set_bit (c
->bbs
, bb
->index
);
907 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
908 bitmap_set_bit (c
->preds
, e
->src
->index
);
910 update_rep_bb (c
, bb
);
913 /* Allocate and init new cluster. */
919 c
= XCNEW (struct bb_cluster_def
);
920 c
->bbs
= BITMAP_ALLOC (NULL
);
921 c
->preds
= BITMAP_ALLOC (NULL
);
926 /* Delete clusters. */
929 delete_cluster (bb_cluster c
)
933 BITMAP_FREE (c
->bbs
);
934 BITMAP_FREE (c
->preds
);
938 DEF_VEC_P (bb_cluster
);
939 DEF_VEC_ALLOC_P (bb_cluster
, heap
);
941 /* Array that contains all clusters. */
943 static VEC (bb_cluster
, heap
) *all_clusters
;
945 /* Allocate all cluster vectors. */
948 alloc_cluster_vectors (void)
950 all_clusters
= VEC_alloc (bb_cluster
, heap
, n_basic_blocks
);
953 /* Reset all cluster vectors. */
956 reset_cluster_vectors (void)
960 for (i
= 0; i
< VEC_length (bb_cluster
, all_clusters
); ++i
)
961 delete_cluster (VEC_index (bb_cluster
, all_clusters
, i
));
962 VEC_truncate (bb_cluster
, all_clusters
, 0);
964 BB_CLUSTER (bb
) = NULL
;
967 /* Delete all cluster vectors. */
970 delete_cluster_vectors (void)
973 for (i
= 0; i
< VEC_length (bb_cluster
, all_clusters
); ++i
)
974 delete_cluster (VEC_index (bb_cluster
, all_clusters
, i
));
975 VEC_free (bb_cluster
, heap
, all_clusters
);
978 /* Merge cluster C2 into C1. */
981 merge_clusters (bb_cluster c1
, bb_cluster c2
)
983 bitmap_ior_into (c1
->bbs
, c2
->bbs
);
984 bitmap_ior_into (c1
->preds
, c2
->preds
);
987 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
988 all_clusters, or merge c with existing cluster. */
991 set_cluster (basic_block bb1
, basic_block bb2
)
993 basic_block merge_bb
, other_bb
;
994 bb_cluster merge
, old
, c
;
996 if (BB_CLUSTER (bb1
) == NULL
&& BB_CLUSTER (bb2
) == NULL
)
999 add_bb_to_cluster (c
, bb1
);
1000 add_bb_to_cluster (c
, bb2
);
1001 BB_CLUSTER (bb1
) = c
;
1002 BB_CLUSTER (bb2
) = c
;
1003 c
->index
= VEC_length (bb_cluster
, all_clusters
);
1004 VEC_safe_push (bb_cluster
, heap
, all_clusters
, c
);
1006 else if (BB_CLUSTER (bb1
) == NULL
|| BB_CLUSTER (bb2
) == NULL
)
1008 merge_bb
= BB_CLUSTER (bb1
) == NULL
? bb2
: bb1
;
1009 other_bb
= BB_CLUSTER (bb1
) == NULL
? bb1
: bb2
;
1010 merge
= BB_CLUSTER (merge_bb
);
1011 add_bb_to_cluster (merge
, other_bb
);
1012 BB_CLUSTER (other_bb
) = merge
;
1014 else if (BB_CLUSTER (bb1
) != BB_CLUSTER (bb2
))
1019 old
= BB_CLUSTER (bb2
);
1020 merge
= BB_CLUSTER (bb1
);
1021 merge_clusters (merge
, old
);
1022 EXECUTE_IF_SET_IN_BITMAP (old
->bbs
, 0, i
, bi
)
1023 BB_CLUSTER (BASIC_BLOCK (i
)) = merge
;
1024 VEC_replace (bb_cluster
, all_clusters
, old
->index
, NULL
);
1025 update_rep_bb (merge
, old
->rep_bb
);
1026 delete_cluster (old
);
1032 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1033 gimple_bb (s2) are members of SAME_SUCC. */
1036 gimple_equal_p (same_succ same_succ
, gimple s1
, gimple s2
)
1040 basic_block bb1
= gimple_bb (s1
), bb2
= gimple_bb (s2
);
1042 bool equal
, inv_cond
;
1043 enum tree_code code1
, code2
;
1045 if (gimple_code (s1
) != gimple_code (s2
))
1048 switch (gimple_code (s1
))
1051 if (gimple_call_num_args (s1
) != gimple_call_num_args (s2
))
1053 if (!gimple_call_same_target_p (s1
, s2
))
1056 /* Eventually, we'll significantly complicate the CFG by adding
1057 back edges to properly model the effects of transaction restart.
1058 For the bulk of optimization this does not matter, but what we
1059 cannot recover from is tail merging blocks between two separate
1060 transactions. Avoid that by making commit not match. */
1061 if (gimple_call_builtin_p (s1
, BUILT_IN_TM_COMMIT
))
1065 for (i
= 0; i
< gimple_call_num_args (s1
); ++i
)
1067 t1
= gimple_call_arg (s1
, i
);
1068 t2
= gimple_call_arg (s2
, i
);
1069 if (operand_equal_p (t1
, t2
, 0))
1071 if (gvn_uses_equal (t1
, t2
))
1079 lhs1
= gimple_get_lhs (s1
);
1080 lhs2
= gimple_get_lhs (s2
);
1081 if (lhs1
== NULL_TREE
&& lhs2
== NULL_TREE
)
1083 if (lhs1
== NULL_TREE
|| lhs2
== NULL_TREE
)
1085 if (TREE_CODE (lhs1
) == SSA_NAME
&& TREE_CODE (lhs2
) == SSA_NAME
)
1086 return vn_valueize (lhs1
) == vn_valueize (lhs2
);
1087 return operand_equal_p (lhs1
, lhs2
, 0);
1090 lhs1
= gimple_get_lhs (s1
);
1091 lhs2
= gimple_get_lhs (s2
);
1092 return (TREE_CODE (lhs1
) == SSA_NAME
1093 && TREE_CODE (lhs2
) == SSA_NAME
1094 && vn_valueize (lhs1
) == vn_valueize (lhs2
));
1097 t1
= gimple_cond_lhs (s1
);
1098 t2
= gimple_cond_lhs (s2
);
1099 if (!operand_equal_p (t1
, t2
, 0)
1100 && !gvn_uses_equal (t1
, t2
))
1103 t1
= gimple_cond_rhs (s1
);
1104 t2
= gimple_cond_rhs (s2
);
1105 if (!operand_equal_p (t1
, t2
, 0)
1106 && !gvn_uses_equal (t1
, t2
))
1109 code1
= gimple_expr_code (s1
);
1110 code2
= gimple_expr_code (s2
);
1111 inv_cond
= (bitmap_bit_p (same_succ
->inverse
, bb1
->index
)
1112 != bitmap_bit_p (same_succ
->inverse
, bb2
->index
));
1116 = HONOR_NANS (TYPE_MODE (TREE_TYPE (gimple_cond_lhs (s1
))));
1117 code2
= invert_tree_comparison (code2
, honor_nans
);
1119 return code1
== code2
;
1126 /* Let GSI skip backwards over local defs. */
1129 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator
*gsi
)
1135 if (gsi_end_p (*gsi
))
1137 stmt
= gsi_stmt (*gsi
);
1138 if (!(is_gimple_assign (stmt
) && local_def (gimple_get_lhs (stmt
))
1139 && !gimple_has_side_effects (stmt
)))
1141 gsi_prev_nondebug (gsi
);
1145 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1149 find_duplicate (same_succ same_succ
, basic_block bb1
, basic_block bb2
)
1151 gimple_stmt_iterator gsi1
= gsi_last_nondebug_bb (bb1
);
1152 gimple_stmt_iterator gsi2
= gsi_last_nondebug_bb (bb2
);
1154 gsi_advance_bw_nondebug_nonlocal (&gsi1
);
1155 gsi_advance_bw_nondebug_nonlocal (&gsi2
);
1157 while (!gsi_end_p (gsi1
) && !gsi_end_p (gsi2
))
1159 if (!gimple_equal_p (same_succ
, gsi_stmt (gsi1
), gsi_stmt (gsi2
)))
1162 gsi_prev_nondebug (&gsi1
);
1163 gsi_prev_nondebug (&gsi2
);
1164 gsi_advance_bw_nondebug_nonlocal (&gsi1
);
1165 gsi_advance_bw_nondebug_nonlocal (&gsi2
);
1168 if (!(gsi_end_p (gsi1
) && gsi_end_p (gsi2
)))
1172 fprintf (dump_file
, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1173 bb1
->index
, bb2
->index
);
1175 set_cluster (bb1
, bb2
);
1178 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1182 same_phi_alternatives_1 (basic_block dest
, edge e1
, edge e2
)
1184 int n1
= e1
->dest_idx
, n2
= e2
->dest_idx
;
1185 gimple_stmt_iterator gsi
;
1187 for (gsi
= gsi_start_phis (dest
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1189 gimple phi
= gsi_stmt (gsi
);
1190 tree lhs
= gimple_phi_result (phi
);
1191 tree val1
= gimple_phi_arg_def (phi
, n1
);
1192 tree val2
= gimple_phi_arg_def (phi
, n2
);
1194 if (!is_gimple_reg (lhs
))
1197 if (operand_equal_for_phi_arg_p (val1
, val2
))
1199 if (gvn_uses_equal (val1
, val2
))
1208 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1209 phi alternatives for BB1 and BB2 are equal. */
1212 same_phi_alternatives (same_succ same_succ
, basic_block bb1
, basic_block bb2
)
1219 EXECUTE_IF_SET_IN_BITMAP (same_succ
->succs
, 0, s
, bs
)
1221 succ
= BASIC_BLOCK (s
);
1222 e1
= find_edge (bb1
, succ
);
1223 e2
= find_edge (bb2
, succ
);
1224 if (e1
->flags
& EDGE_COMPLEX
1225 || e2
->flags
& EDGE_COMPLEX
)
1228 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1230 if (!same_phi_alternatives_1 (succ
, e1
, e2
))
1237 /* Return true if BB has non-vop phis. */
1240 bb_has_non_vop_phi (basic_block bb
)
1242 gimple_seq phis
= phi_nodes (bb
);
1248 if (!gimple_seq_singleton_p (phis
))
1251 phi
= gimple_seq_first_stmt (phis
);
1252 return is_gimple_reg (gimple_phi_result (phi
));
1255 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1256 invariant that uses in FROM are dominates by their defs. */
1259 deps_ok_for_redirect_from_bb_to_bb (basic_block from
, basic_block to
)
1261 basic_block cd
, dep_bb
= BB_DEP_BB (to
);
1264 bitmap from_preds
= BITMAP_ALLOC (NULL
);
1269 FOR_EACH_EDGE (e
, ei
, from
->preds
)
1270 bitmap_set_bit (from_preds
, e
->src
->index
);
1271 cd
= nearest_common_dominator_for_set (CDI_DOMINATORS
, from_preds
);
1272 BITMAP_FREE (from_preds
);
1274 return dominated_by_p (CDI_DOMINATORS
, dep_bb
, cd
);
1277 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1278 replacement bb) and vice versa maintains the invariant that uses in the
1279 replacement are dominates by their defs. */
1282 deps_ok_for_redirect (basic_block bb1
, basic_block bb2
)
1284 if (BB_CLUSTER (bb1
) != NULL
)
1285 bb1
= BB_CLUSTER (bb1
)->rep_bb
;
1287 if (BB_CLUSTER (bb2
) != NULL
)
1288 bb2
= BB_CLUSTER (bb2
)->rep_bb
;
1290 return (deps_ok_for_redirect_from_bb_to_bb (bb1
, bb2
)
1291 && deps_ok_for_redirect_from_bb_to_bb (bb2
, bb1
));
1294 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1297 find_clusters_1 (same_succ same_succ
)
1299 basic_block bb1
, bb2
;
1301 bitmap_iterator bi
, bj
;
1303 int max_comparisons
= PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS
);
1305 EXECUTE_IF_SET_IN_BITMAP (same_succ
->bbs
, 0, i
, bi
)
1307 bb1
= BASIC_BLOCK (i
);
1309 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1310 phi-nodes in bb1 and bb2, with the same alternatives for the same
1312 if (bb_has_non_vop_phi (bb1
))
1316 EXECUTE_IF_SET_IN_BITMAP (same_succ
->bbs
, i
+ 1, j
, bj
)
1318 bb2
= BASIC_BLOCK (j
);
1320 if (bb_has_non_vop_phi (bb2
))
1323 if (BB_CLUSTER (bb1
) != NULL
&& BB_CLUSTER (bb1
) == BB_CLUSTER (bb2
))
1326 /* Limit quadratic behaviour. */
1328 if (nr_comparisons
> max_comparisons
)
1331 /* This is a conservative dependency check. We could test more
1332 precise for allowed replacement direction. */
1333 if (!deps_ok_for_redirect (bb1
, bb2
))
1336 if (!(same_phi_alternatives (same_succ
, bb1
, bb2
)))
1339 find_duplicate (same_succ
, bb1
, bb2
);
1344 /* Find clusters of bbs which can be merged. */
1347 find_clusters (void)
1351 while (!VEC_empty (same_succ
, worklist
))
1353 same
= VEC_pop (same_succ
, worklist
);
1354 same
->in_worklist
= false;
1355 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1357 fprintf (dump_file
, "processing worklist entry\n");
1358 same_succ_print (dump_file
, same
);
1360 find_clusters_1 (same
);
1364 /* Returns the vop phi of BB, if any. */
1367 vop_phi (basic_block bb
)
1370 gimple_stmt_iterator gsi
;
1371 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1373 stmt
= gsi_stmt (gsi
);
1374 if (is_gimple_reg (gimple_phi_result (stmt
)))
1381 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1384 replace_block_by (basic_block bb1
, basic_block bb2
)
1390 bb2_phi
= vop_phi (bb2
);
1392 /* Mark the basic block as deleted. */
1393 mark_basic_block_deleted (bb1
);
1395 /* Redirect the incoming edges of bb1 to bb2. */
1396 for (i
= EDGE_COUNT (bb1
->preds
); i
> 0 ; --i
)
1398 pred_edge
= EDGE_PRED (bb1
, i
- 1);
1399 pred_edge
= redirect_edge_and_branch (pred_edge
, bb2
);
1400 gcc_assert (pred_edge
!= NULL
);
1402 if (bb2_phi
== NULL
)
1405 /* The phi might have run out of capacity when the redirect added an
1406 argument, which means it could have been replaced. Refresh it. */
1407 bb2_phi
= vop_phi (bb2
);
1409 add_phi_arg (bb2_phi
, SSA_NAME_VAR (gimple_phi_result (bb2_phi
)),
1410 pred_edge
, UNKNOWN_LOCATION
);
1413 bb2
->frequency
+= bb1
->frequency
;
1414 if (bb2
->frequency
> BB_FREQ_MAX
)
1415 bb2
->frequency
= BB_FREQ_MAX
;
1418 /* Do updates that use bb1, before deleting bb1. */
1419 release_last_vdef (bb1
);
1420 same_succ_flush_bb (bb1
);
1422 delete_basic_block (bb1
);
1425 /* Bbs for which update_debug_stmt need to be called. */
1427 static bitmap update_bbs
;
1429 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1430 number of bbs removed. */
1433 apply_clusters (void)
1435 basic_block bb1
, bb2
;
1439 int nr_bbs_removed
= 0;
1441 for (i
= 0; i
< VEC_length (bb_cluster
, all_clusters
); ++i
)
1443 c
= VEC_index (bb_cluster
, all_clusters
, i
);
1448 bitmap_set_bit (update_bbs
, bb2
->index
);
1450 bitmap_clear_bit (c
->bbs
, bb2
->index
);
1451 EXECUTE_IF_SET_IN_BITMAP (c
->bbs
, 0, j
, bj
)
1453 bb1
= BASIC_BLOCK (j
);
1454 bitmap_clear_bit (update_bbs
, bb1
->index
);
1456 replace_block_by (bb1
, bb2
);
1461 return nr_bbs_removed
;
1464 /* Resets debug statement STMT if it has uses that are not dominated by their
1468 update_debug_stmt (gimple stmt
)
1470 use_operand_p use_p
;
1472 basic_block bbdef
, bbuse
;
1476 if (!gimple_debug_bind_p (stmt
))
1479 bbuse
= gimple_bb (stmt
);
1480 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, oi
, SSA_OP_USE
)
1482 name
= USE_FROM_PTR (use_p
);
1483 gcc_assert (TREE_CODE (name
) == SSA_NAME
);
1485 def_stmt
= SSA_NAME_DEF_STMT (name
);
1486 gcc_assert (def_stmt
!= NULL
);
1488 bbdef
= gimple_bb (def_stmt
);
1489 if (bbdef
== NULL
|| bbuse
== bbdef
1490 || dominated_by_p (CDI_DOMINATORS
, bbuse
, bbdef
))
1493 gimple_debug_bind_reset_value (stmt
);
1498 /* Resets all debug statements that have uses that are not
1499 dominated by their defs. */
1502 update_debug_stmts (void)
1508 EXECUTE_IF_SET_IN_BITMAP (update_bbs
, 0, i
, bi
)
1511 gimple_stmt_iterator gsi
;
1513 bb
= BASIC_BLOCK (i
);
1514 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
1516 stmt
= gsi_stmt (gsi
);
1517 if (!is_gimple_debug (stmt
))
1519 update_debug_stmt (stmt
);
1524 /* Runs tail merge optimization. */
1527 tail_merge_optimize (unsigned int todo
)
1529 int nr_bbs_removed_total
= 0;
1531 bool loop_entered
= false;
1532 int iteration_nr
= 0;
1533 int max_iterations
= PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS
);
1535 if (!flag_tree_tail_merge
|| max_iterations
== 0)
1538 timevar_push (TV_TREE_TAIL_MERGE
);
1540 calculate_dominance_info (CDI_DOMINATORS
);
1543 while (!VEC_empty (same_succ
, worklist
))
1547 loop_entered
= true;
1548 alloc_cluster_vectors ();
1549 update_bbs
= BITMAP_ALLOC (NULL
);
1552 reset_cluster_vectors ();
1555 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1556 fprintf (dump_file
, "worklist iteration #%d\n", iteration_nr
);
1559 gcc_assert (VEC_empty (same_succ
, worklist
));
1560 if (VEC_empty (bb_cluster
, all_clusters
))
1563 nr_bbs_removed
= apply_clusters ();
1564 nr_bbs_removed_total
+= nr_bbs_removed
;
1565 if (nr_bbs_removed
== 0)
1568 free_dominance_info (CDI_DOMINATORS
);
1570 if (iteration_nr
== max_iterations
)
1573 calculate_dominance_info (CDI_DOMINATORS
);
1577 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1578 fprintf (dump_file
, "htab collision / search: %f\n",
1579 htab_collisions (same_succ_htab
));
1581 if (nr_bbs_removed_total
> 0)
1583 if (MAY_HAVE_DEBUG_STMTS
)
1585 calculate_dominance_info (CDI_DOMINATORS
);
1586 update_debug_stmts ();
1589 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1591 fprintf (dump_file
, "Before TODOs.\n");
1592 dump_function_to_file (current_function_decl
, dump_file
, dump_flags
);
1595 todo
|= (TODO_verify_ssa
| TODO_verify_stmts
| TODO_verify_flow
1597 mark_sym_for_renaming (gimple_vop (cfun
));
1603 delete_cluster_vectors ();
1604 BITMAP_FREE (update_bbs
);
1607 timevar_pop (TV_TREE_TAIL_MERGE
);