PR tree-optimization/66718
[official-gcc.git] / gcc / tree-ssa-tail-merge.c
blob3c660f9f0bd02e1effd81a02976a3e82fdb5f2e7
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2015 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Pass overview.
24 MOTIVATIONAL EXAMPLE
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
110 CONTEXT
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
157 IMPLEMENTATION
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
167 LIMITATIONS/TODO
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
179 PASS PLACEMENT
180 This 'pass' is not a stand-alone gimple pass, but runs as part of
181 pass_pre, in order to share the value numbering.
184 SWITCHES
186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "tm.h"
192 #include "alias.h"
193 #include "symtab.h"
194 #include "tree.h"
195 #include "fold-const.h"
196 #include "stor-layout.h"
197 #include "trans-mem.h"
198 #include "tm_p.h"
199 #include "predict.h"
200 #include "hard-reg-set.h"
201 #include "function.h"
202 #include "dominance.h"
203 #include "cfg.h"
204 #include "cfganal.h"
205 #include "cfgcleanup.h"
206 #include "basic-block.h"
207 #include "flags.h"
208 #include "tree-ssa-alias.h"
209 #include "internal-fn.h"
210 #include "tree-eh.h"
211 #include "gimple-expr.h"
212 #include "gimple.h"
213 #include "gimple-iterator.h"
214 #include "gimple-ssa.h"
215 #include "tree-cfg.h"
216 #include "tree-phinodes.h"
217 #include "ssa-iterators.h"
218 #include "tree-into-ssa.h"
219 #include "params.h"
220 #include "gimple-pretty-print.h"
221 #include "tree-ssa-sccvn.h"
222 #include "tree-dump.h"
223 #include "cfgloop.h"
224 #include "tree-pass.h"
225 #include "trans-mem.h"
227 /* Describes a group of bbs with the same successors. The successor bbs are
228 cached in succs, and the successor edge flags are cached in succ_flags.
229 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
230 it's marked in inverse.
231 Additionally, the hash value for the struct is cached in hashval, and
232 in_worklist indicates whether it's currently part of worklist. */
234 struct same_succ_def : pointer_hash <same_succ_def>
236 /* The bbs that have the same successor bbs. */
237 bitmap bbs;
238 /* The successor bbs. */
239 bitmap succs;
240 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
241 bb. */
242 bitmap inverse;
243 /* The edge flags for each of the successor bbs. */
244 vec<int> succ_flags;
245 /* Indicates whether the struct is currently in the worklist. */
246 bool in_worklist;
247 /* The hash value of the struct. */
248 hashval_t hashval;
250 /* hash_table support. */
251 static inline hashval_t hash (const same_succ_def *);
252 static int equal (const same_succ_def *, const same_succ_def *);
253 static void remove (same_succ_def *);
255 typedef struct same_succ_def *same_succ;
256 typedef const struct same_succ_def *const_same_succ;
258 /* hash routine for hash_table support, returns hashval of E. */
260 inline hashval_t
261 same_succ_def::hash (const same_succ_def *e)
263 return e->hashval;
266 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
268 struct bb_cluster_def
270 /* The bbs in the cluster. */
271 bitmap bbs;
272 /* The preds of the bbs in the cluster. */
273 bitmap preds;
274 /* Index in all_clusters vector. */
275 int index;
276 /* The bb to replace the cluster with. */
277 basic_block rep_bb;
279 typedef struct bb_cluster_def *bb_cluster;
280 typedef const struct bb_cluster_def *const_bb_cluster;
282 /* Per bb-info. */
284 struct aux_bb_info
286 /* The number of non-debug statements in the bb. */
287 int size;
288 /* The same_succ that this bb is a member of. */
289 same_succ bb_same_succ;
290 /* The cluster that this bb is a member of. */
291 bb_cluster cluster;
292 /* The vop state at the exit of a bb. This is shortlived data, used to
293 communicate data between update_block_by and update_vuses. */
294 tree vop_at_exit;
295 /* The bb that either contains or is dominated by the dependencies of the
296 bb. */
297 basic_block dep_bb;
300 /* Macros to access the fields of struct aux_bb_info. */
302 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
303 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
304 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
305 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
306 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
308 /* Returns true if the only effect a statement STMT has, is to define locally
309 used SSA_NAMEs. */
311 static bool
312 stmt_local_def (gimple stmt)
314 basic_block bb, def_bb;
315 imm_use_iterator iter;
316 use_operand_p use_p;
317 tree val;
318 def_operand_p def_p;
320 if (gimple_vdef (stmt) != NULL_TREE
321 || gimple_has_side_effects (stmt)
322 || gimple_could_trap_p_1 (stmt, false, false)
323 || gimple_vuse (stmt) != NULL_TREE)
324 return false;
326 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
327 if (def_p == NULL)
328 return false;
330 val = DEF_FROM_PTR (def_p);
331 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
332 return false;
334 def_bb = gimple_bb (stmt);
336 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
338 if (is_gimple_debug (USE_STMT (use_p)))
339 continue;
340 bb = gimple_bb (USE_STMT (use_p));
341 if (bb == def_bb)
342 continue;
344 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
345 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
346 continue;
348 return false;
351 return true;
354 /* Let GSI skip forwards over local defs. */
356 static void
357 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
359 gimple stmt;
361 while (true)
363 if (gsi_end_p (*gsi))
364 return;
365 stmt = gsi_stmt (*gsi);
366 if (!stmt_local_def (stmt))
367 return;
368 gsi_next_nondebug (gsi);
372 /* VAL1 and VAL2 are either:
373 - uses in BB1 and BB2, or
374 - phi alternatives for BB1 and BB2.
375 Return true if the uses have the same gvn value. */
377 static bool
378 gvn_uses_equal (tree val1, tree val2)
380 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
382 if (val1 == val2)
383 return true;
385 if (vn_valueize (val1) != vn_valueize (val2))
386 return false;
388 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
389 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
392 /* Prints E to FILE. */
394 static void
395 same_succ_print (FILE *file, const same_succ e)
397 unsigned int i;
398 bitmap_print (file, e->bbs, "bbs:", "\n");
399 bitmap_print (file, e->succs, "succs:", "\n");
400 bitmap_print (file, e->inverse, "inverse:", "\n");
401 fprintf (file, "flags:");
402 for (i = 0; i < e->succ_flags.length (); ++i)
403 fprintf (file, " %x", e->succ_flags[i]);
404 fprintf (file, "\n");
407 /* Prints same_succ VE to VFILE. */
409 inline int
410 ssa_same_succ_print_traverse (same_succ *pe, FILE *file)
412 const same_succ e = *pe;
413 same_succ_print (file, e);
414 return 1;
417 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
419 static void
420 update_dep_bb (basic_block use_bb, tree val)
422 basic_block dep_bb;
424 /* Not a dep. */
425 if (TREE_CODE (val) != SSA_NAME)
426 return;
428 /* Skip use of global def. */
429 if (SSA_NAME_IS_DEFAULT_DEF (val))
430 return;
432 /* Skip use of local def. */
433 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
434 if (dep_bb == use_bb)
435 return;
437 if (BB_DEP_BB (use_bb) == NULL
438 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
439 BB_DEP_BB (use_bb) = dep_bb;
442 /* Update BB_DEP_BB, given the dependencies in STMT. */
444 static void
445 stmt_update_dep_bb (gimple stmt)
447 ssa_op_iter iter;
448 use_operand_p use;
450 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
451 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
454 /* Calculates hash value for same_succ VE. */
456 static hashval_t
457 same_succ_hash (const_same_succ e)
459 inchash::hash hstate (bitmap_hash (e->succs));
460 int flags;
461 unsigned int i;
462 unsigned int first = bitmap_first_set_bit (e->bbs);
463 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
464 int size = 0;
465 gimple stmt;
466 tree arg;
467 unsigned int s;
468 bitmap_iterator bs;
470 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
471 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
473 stmt = gsi_stmt (gsi);
474 stmt_update_dep_bb (stmt);
475 if (stmt_local_def (stmt))
476 continue;
477 size++;
479 hstate.add_int (gimple_code (stmt));
480 if (is_gimple_assign (stmt))
481 hstate.add_int (gimple_assign_rhs_code (stmt));
482 if (!is_gimple_call (stmt))
483 continue;
484 if (gimple_call_internal_p (stmt))
485 hstate.add_int (gimple_call_internal_fn (stmt));
486 else
488 inchash::add_expr (gimple_call_fn (stmt), hstate);
489 if (gimple_call_chain (stmt))
490 inchash::add_expr (gimple_call_chain (stmt), hstate);
492 for (i = 0; i < gimple_call_num_args (stmt); i++)
494 arg = gimple_call_arg (stmt, i);
495 arg = vn_valueize (arg);
496 inchash::add_expr (arg, hstate);
500 hstate.add_int (size);
501 BB_SIZE (bb) = size;
503 for (i = 0; i < e->succ_flags.length (); ++i)
505 flags = e->succ_flags[i];
506 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
507 hstate.add_int (flags);
510 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
512 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
513 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
514 !gsi_end_p (gsi);
515 gsi_next (&gsi))
517 gphi *phi = gsi.phi ();
518 tree lhs = gimple_phi_result (phi);
519 tree val = gimple_phi_arg_def (phi, n);
521 if (virtual_operand_p (lhs))
522 continue;
523 update_dep_bb (bb, val);
527 return hstate.end ();
530 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
531 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
532 the other edge flags. */
534 static bool
535 inverse_flags (const_same_succ e1, const_same_succ e2)
537 int f1a, f1b, f2a, f2b;
538 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
540 if (e1->succ_flags.length () != 2)
541 return false;
543 f1a = e1->succ_flags[0];
544 f1b = e1->succ_flags[1];
545 f2a = e2->succ_flags[0];
546 f2b = e2->succ_flags[1];
548 if (f1a == f2a && f1b == f2b)
549 return false;
551 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
554 /* Compares SAME_SUCCs E1 and E2. */
557 same_succ_def::equal (const same_succ_def *e1, const same_succ_def *e2)
559 unsigned int i, first1, first2;
560 gimple_stmt_iterator gsi1, gsi2;
561 gimple s1, s2;
562 basic_block bb1, bb2;
564 if (e1->hashval != e2->hashval)
565 return 0;
567 if (e1->succ_flags.length () != e2->succ_flags.length ())
568 return 0;
570 if (!bitmap_equal_p (e1->succs, e2->succs))
571 return 0;
573 if (!inverse_flags (e1, e2))
575 for (i = 0; i < e1->succ_flags.length (); ++i)
576 if (e1->succ_flags[i] != e2->succ_flags[i])
577 return 0;
580 first1 = bitmap_first_set_bit (e1->bbs);
581 first2 = bitmap_first_set_bit (e2->bbs);
583 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
584 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
586 if (BB_SIZE (bb1) != BB_SIZE (bb2))
587 return 0;
589 gsi1 = gsi_start_nondebug_bb (bb1);
590 gsi2 = gsi_start_nondebug_bb (bb2);
591 gsi_advance_fw_nondebug_nonlocal (&gsi1);
592 gsi_advance_fw_nondebug_nonlocal (&gsi2);
593 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
595 s1 = gsi_stmt (gsi1);
596 s2 = gsi_stmt (gsi2);
597 if (gimple_code (s1) != gimple_code (s2))
598 return 0;
599 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
600 return 0;
601 gsi_next_nondebug (&gsi1);
602 gsi_next_nondebug (&gsi2);
603 gsi_advance_fw_nondebug_nonlocal (&gsi1);
604 gsi_advance_fw_nondebug_nonlocal (&gsi2);
607 return 1;
610 /* Alloc and init a new SAME_SUCC. */
612 static same_succ
613 same_succ_alloc (void)
615 same_succ same = XNEW (struct same_succ_def);
617 same->bbs = BITMAP_ALLOC (NULL);
618 same->succs = BITMAP_ALLOC (NULL);
619 same->inverse = BITMAP_ALLOC (NULL);
620 same->succ_flags.create (10);
621 same->in_worklist = false;
623 return same;
626 /* Delete same_succ E. */
628 void
629 same_succ_def::remove (same_succ e)
631 BITMAP_FREE (e->bbs);
632 BITMAP_FREE (e->succs);
633 BITMAP_FREE (e->inverse);
634 e->succ_flags.release ();
636 XDELETE (e);
639 /* Reset same_succ SAME. */
641 static void
642 same_succ_reset (same_succ same)
644 bitmap_clear (same->bbs);
645 bitmap_clear (same->succs);
646 bitmap_clear (same->inverse);
647 same->succ_flags.truncate (0);
650 static hash_table<same_succ_def> *same_succ_htab;
652 /* Array that is used to store the edge flags for a successor. */
654 static int *same_succ_edge_flags;
656 /* Bitmap that is used to mark bbs that are recently deleted. */
658 static bitmap deleted_bbs;
660 /* Bitmap that is used to mark predecessors of bbs that are
661 deleted. */
663 static bitmap deleted_bb_preds;
665 /* Prints same_succ_htab to stderr. */
667 extern void debug_same_succ (void);
668 DEBUG_FUNCTION void
669 debug_same_succ ( void)
671 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
675 /* Vector of bbs to process. */
677 static vec<same_succ> worklist;
679 /* Prints worklist to FILE. */
681 static void
682 print_worklist (FILE *file)
684 unsigned int i;
685 for (i = 0; i < worklist.length (); ++i)
686 same_succ_print (file, worklist[i]);
689 /* Adds SAME to worklist. */
691 static void
692 add_to_worklist (same_succ same)
694 if (same->in_worklist)
695 return;
697 if (bitmap_count_bits (same->bbs) < 2)
698 return;
700 same->in_worklist = true;
701 worklist.safe_push (same);
704 /* Add BB to same_succ_htab. */
706 static void
707 find_same_succ_bb (basic_block bb, same_succ *same_p)
709 unsigned int j;
710 bitmap_iterator bj;
711 same_succ same = *same_p;
712 same_succ *slot;
713 edge_iterator ei;
714 edge e;
716 if (bb == NULL
717 /* Be conservative with loop structure. It's not evident that this test
718 is sufficient. Before tail-merge, we've just called
719 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
720 set, so there's no guarantee that the loop->latch value is still valid.
721 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
722 start of pre, we've kept that property intact throughout pre, and are
723 keeping it throughout tail-merge using this test. */
724 || bb->loop_father->latch == bb)
725 return;
726 bitmap_set_bit (same->bbs, bb->index);
727 FOR_EACH_EDGE (e, ei, bb->succs)
729 int index = e->dest->index;
730 bitmap_set_bit (same->succs, index);
731 same_succ_edge_flags[index] = e->flags;
733 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
734 same->succ_flags.safe_push (same_succ_edge_flags[j]);
736 same->hashval = same_succ_hash (same);
738 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
739 if (*slot == NULL)
741 *slot = same;
742 BB_SAME_SUCC (bb) = same;
743 add_to_worklist (same);
744 *same_p = NULL;
746 else
748 bitmap_set_bit ((*slot)->bbs, bb->index);
749 BB_SAME_SUCC (bb) = *slot;
750 add_to_worklist (*slot);
751 if (inverse_flags (same, *slot))
752 bitmap_set_bit ((*slot)->inverse, bb->index);
753 same_succ_reset (same);
757 /* Find bbs with same successors. */
759 static void
760 find_same_succ (void)
762 same_succ same = same_succ_alloc ();
763 basic_block bb;
765 FOR_EACH_BB_FN (bb, cfun)
767 find_same_succ_bb (bb, &same);
768 if (same == NULL)
769 same = same_succ_alloc ();
772 same_succ_def::remove (same);
775 /* Initializes worklist administration. */
777 static void
778 init_worklist (void)
780 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
781 same_succ_htab = new hash_table<same_succ_def> (n_basic_blocks_for_fn (cfun));
782 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
783 deleted_bbs = BITMAP_ALLOC (NULL);
784 deleted_bb_preds = BITMAP_ALLOC (NULL);
785 worklist.create (n_basic_blocks_for_fn (cfun));
786 find_same_succ ();
788 if (dump_file && (dump_flags & TDF_DETAILS))
790 fprintf (dump_file, "initial worklist:\n");
791 print_worklist (dump_file);
795 /* Deletes worklist administration. */
797 static void
798 delete_worklist (void)
800 free_aux_for_blocks ();
801 delete same_succ_htab;
802 same_succ_htab = NULL;
803 XDELETEVEC (same_succ_edge_flags);
804 same_succ_edge_flags = NULL;
805 BITMAP_FREE (deleted_bbs);
806 BITMAP_FREE (deleted_bb_preds);
807 worklist.release ();
810 /* Mark BB as deleted, and mark its predecessors. */
812 static void
813 mark_basic_block_deleted (basic_block bb)
815 edge e;
816 edge_iterator ei;
818 bitmap_set_bit (deleted_bbs, bb->index);
820 FOR_EACH_EDGE (e, ei, bb->preds)
821 bitmap_set_bit (deleted_bb_preds, e->src->index);
824 /* Removes BB from its corresponding same_succ. */
826 static void
827 same_succ_flush_bb (basic_block bb)
829 same_succ same = BB_SAME_SUCC (bb);
830 BB_SAME_SUCC (bb) = NULL;
831 if (bitmap_single_bit_set_p (same->bbs))
832 same_succ_htab->remove_elt_with_hash (same, same->hashval);
833 else
834 bitmap_clear_bit (same->bbs, bb->index);
837 /* Removes all bbs in BBS from their corresponding same_succ. */
839 static void
840 same_succ_flush_bbs (bitmap bbs)
842 unsigned int i;
843 bitmap_iterator bi;
845 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
846 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
849 /* Release the last vdef in BB, either normal or phi result. */
851 static void
852 release_last_vdef (basic_block bb)
854 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
855 gsi_prev_nondebug (&i))
857 gimple stmt = gsi_stmt (i);
858 if (gimple_vdef (stmt) == NULL_TREE)
859 continue;
861 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
862 return;
865 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
866 gsi_next (&i))
868 gphi *phi = i.phi ();
869 tree res = gimple_phi_result (phi);
871 if (!virtual_operand_p (res))
872 continue;
874 mark_virtual_phi_result_for_renaming (phi);
875 return;
879 /* For deleted_bb_preds, find bbs with same successors. */
881 static void
882 update_worklist (void)
884 unsigned int i;
885 bitmap_iterator bi;
886 basic_block bb;
887 same_succ same;
889 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
890 bitmap_clear (deleted_bbs);
892 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
893 same_succ_flush_bbs (deleted_bb_preds);
895 same = same_succ_alloc ();
896 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
898 bb = BASIC_BLOCK_FOR_FN (cfun, i);
899 gcc_assert (bb != NULL);
900 find_same_succ_bb (bb, &same);
901 if (same == NULL)
902 same = same_succ_alloc ();
904 same_succ_def::remove (same);
905 bitmap_clear (deleted_bb_preds);
908 /* Prints cluster C to FILE. */
910 static void
911 print_cluster (FILE *file, bb_cluster c)
913 if (c == NULL)
914 return;
915 bitmap_print (file, c->bbs, "bbs:", "\n");
916 bitmap_print (file, c->preds, "preds:", "\n");
919 /* Prints cluster C to stderr. */
921 extern void debug_cluster (bb_cluster);
922 DEBUG_FUNCTION void
923 debug_cluster (bb_cluster c)
925 print_cluster (stderr, c);
928 /* Update C->rep_bb, given that BB is added to the cluster. */
930 static void
931 update_rep_bb (bb_cluster c, basic_block bb)
933 /* Initial. */
934 if (c->rep_bb == NULL)
936 c->rep_bb = bb;
937 return;
940 /* Current needs no deps, keep it. */
941 if (BB_DEP_BB (c->rep_bb) == NULL)
942 return;
944 /* Bb needs no deps, change rep_bb. */
945 if (BB_DEP_BB (bb) == NULL)
947 c->rep_bb = bb;
948 return;
951 /* Bb needs last deps earlier than current, change rep_bb. A potential
952 problem with this, is that the first deps might also be earlier, which
953 would mean we prefer longer lifetimes for the deps. To be able to check
954 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
955 BB_DEP_BB, which is really BB_LAST_DEP_BB.
956 The benefit of choosing the bb with last deps earlier, is that it can
957 potentially be used as replacement for more bbs. */
958 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
959 c->rep_bb = bb;
962 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
964 static void
965 add_bb_to_cluster (bb_cluster c, basic_block bb)
967 edge e;
968 edge_iterator ei;
970 bitmap_set_bit (c->bbs, bb->index);
972 FOR_EACH_EDGE (e, ei, bb->preds)
973 bitmap_set_bit (c->preds, e->src->index);
975 update_rep_bb (c, bb);
978 /* Allocate and init new cluster. */
980 static bb_cluster
981 new_cluster (void)
983 bb_cluster c;
984 c = XCNEW (struct bb_cluster_def);
985 c->bbs = BITMAP_ALLOC (NULL);
986 c->preds = BITMAP_ALLOC (NULL);
987 c->rep_bb = NULL;
988 return c;
991 /* Delete clusters. */
993 static void
994 delete_cluster (bb_cluster c)
996 if (c == NULL)
997 return;
998 BITMAP_FREE (c->bbs);
999 BITMAP_FREE (c->preds);
1000 XDELETE (c);
1004 /* Array that contains all clusters. */
1006 static vec<bb_cluster> all_clusters;
1008 /* Allocate all cluster vectors. */
1010 static void
1011 alloc_cluster_vectors (void)
1013 all_clusters.create (n_basic_blocks_for_fn (cfun));
1016 /* Reset all cluster vectors. */
1018 static void
1019 reset_cluster_vectors (void)
1021 unsigned int i;
1022 basic_block bb;
1023 for (i = 0; i < all_clusters.length (); ++i)
1024 delete_cluster (all_clusters[i]);
1025 all_clusters.truncate (0);
1026 FOR_EACH_BB_FN (bb, cfun)
1027 BB_CLUSTER (bb) = NULL;
1030 /* Delete all cluster vectors. */
1032 static void
1033 delete_cluster_vectors (void)
1035 unsigned int i;
1036 for (i = 0; i < all_clusters.length (); ++i)
1037 delete_cluster (all_clusters[i]);
1038 all_clusters.release ();
1041 /* Merge cluster C2 into C1. */
1043 static void
1044 merge_clusters (bb_cluster c1, bb_cluster c2)
1046 bitmap_ior_into (c1->bbs, c2->bbs);
1047 bitmap_ior_into (c1->preds, c2->preds);
1050 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1051 all_clusters, or merge c with existing cluster. */
1053 static void
1054 set_cluster (basic_block bb1, basic_block bb2)
1056 basic_block merge_bb, other_bb;
1057 bb_cluster merge, old, c;
1059 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1061 c = new_cluster ();
1062 add_bb_to_cluster (c, bb1);
1063 add_bb_to_cluster (c, bb2);
1064 BB_CLUSTER (bb1) = c;
1065 BB_CLUSTER (bb2) = c;
1066 c->index = all_clusters.length ();
1067 all_clusters.safe_push (c);
1069 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1071 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1072 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1073 merge = BB_CLUSTER (merge_bb);
1074 add_bb_to_cluster (merge, other_bb);
1075 BB_CLUSTER (other_bb) = merge;
1077 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1079 unsigned int i;
1080 bitmap_iterator bi;
1082 old = BB_CLUSTER (bb2);
1083 merge = BB_CLUSTER (bb1);
1084 merge_clusters (merge, old);
1085 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1086 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1087 all_clusters[old->index] = NULL;
1088 update_rep_bb (merge, old->rep_bb);
1089 delete_cluster (old);
1091 else
1092 gcc_unreachable ();
1095 /* Return true if gimple operands T1 and T2 have the same value. */
1097 static bool
1098 gimple_operand_equal_value_p (tree t1, tree t2)
1100 if (t1 == t2)
1101 return true;
1103 if (t1 == NULL_TREE
1104 || t2 == NULL_TREE)
1105 return false;
1107 if (operand_equal_p (t1, t2, 0))
1108 return true;
1110 return gvn_uses_equal (t1, t2);
1113 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1114 gimple_bb (s2) are members of SAME_SUCC. */
1116 static bool
1117 gimple_equal_p (same_succ same_succ, gimple s1, gimple s2)
1119 unsigned int i;
1120 tree lhs1, lhs2;
1121 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1122 tree t1, t2;
1123 bool inv_cond;
1124 enum tree_code code1, code2;
1126 if (gimple_code (s1) != gimple_code (s2))
1127 return false;
1129 switch (gimple_code (s1))
1131 case GIMPLE_CALL:
1132 if (!gimple_call_same_target_p (s1, s2))
1133 return false;
1135 t1 = gimple_call_chain (s1);
1136 t2 = gimple_call_chain (s2);
1137 if (!gimple_operand_equal_value_p (t1, t2))
1138 return false;
1140 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1141 return false;
1143 for (i = 0; i < gimple_call_num_args (s1); ++i)
1145 t1 = gimple_call_arg (s1, i);
1146 t2 = gimple_call_arg (s2, i);
1147 if (!gimple_operand_equal_value_p (t1, t2))
1148 return false;
1151 lhs1 = gimple_get_lhs (s1);
1152 lhs2 = gimple_get_lhs (s2);
1153 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1154 return true;
1155 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1156 return false;
1157 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1158 return vn_valueize (lhs1) == vn_valueize (lhs2);
1159 return operand_equal_p (lhs1, lhs2, 0);
1161 case GIMPLE_ASSIGN:
1162 lhs1 = gimple_get_lhs (s1);
1163 lhs2 = gimple_get_lhs (s2);
1164 if (TREE_CODE (lhs1) != SSA_NAME
1165 && TREE_CODE (lhs2) != SSA_NAME)
1166 return (operand_equal_p (lhs1, lhs2, 0)
1167 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1168 gimple_assign_rhs1 (s2)));
1169 else if (TREE_CODE (lhs1) == SSA_NAME
1170 && TREE_CODE (lhs2) == SSA_NAME)
1171 return operand_equal_p (gimple_assign_rhs1 (s1),
1172 gimple_assign_rhs1 (s2), 0);
1173 return false;
1175 case GIMPLE_COND:
1176 t1 = gimple_cond_lhs (s1);
1177 t2 = gimple_cond_lhs (s2);
1178 if (!gimple_operand_equal_value_p (t1, t2))
1179 return false;
1181 t1 = gimple_cond_rhs (s1);
1182 t2 = gimple_cond_rhs (s2);
1183 if (!gimple_operand_equal_value_p (t1, t2))
1184 return false;
1186 code1 = gimple_expr_code (s1);
1187 code2 = gimple_expr_code (s2);
1188 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1189 != bitmap_bit_p (same_succ->inverse, bb2->index));
1190 if (inv_cond)
1192 bool honor_nans = HONOR_NANS (t1);
1193 code2 = invert_tree_comparison (code2, honor_nans);
1195 return code1 == code2;
1197 default:
1198 return false;
1202 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1203 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1204 processed statements. */
1206 static void
1207 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1208 bool *vuse_escaped)
1210 gimple stmt;
1211 tree lvuse;
1213 while (true)
1215 if (gsi_end_p (*gsi))
1216 return;
1217 stmt = gsi_stmt (*gsi);
1219 lvuse = gimple_vuse (stmt);
1220 if (lvuse != NULL_TREE)
1222 *vuse = lvuse;
1223 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1224 *vuse_escaped = true;
1227 if (!stmt_local_def (stmt))
1228 return;
1229 gsi_prev_nondebug (gsi);
1233 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1234 clusters them. */
1236 static void
1237 find_duplicate (same_succ same_succ, basic_block bb1, basic_block bb2)
1239 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1240 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1241 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1242 bool vuse_escaped = false;
1244 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1245 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1247 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1249 gimple stmt1 = gsi_stmt (gsi1);
1250 gimple stmt2 = gsi_stmt (gsi2);
1252 /* What could be better than to this this here is to blacklist the bb
1253 containing the stmt, when encountering the stmt f.i. in
1254 same_succ_hash. */
1255 if (is_tm_ending (stmt1)
1256 || is_tm_ending (stmt2))
1257 return;
1259 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1260 return;
1262 gsi_prev_nondebug (&gsi1);
1263 gsi_prev_nondebug (&gsi2);
1264 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1265 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1268 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1269 return;
1271 /* If the incoming vuses are not the same, and the vuse escaped into an
1272 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1273 which potentially means the semantics of one of the blocks will be changed.
1274 TODO: make this check more precise. */
1275 if (vuse_escaped && vuse1 != vuse2)
1276 return;
1278 if (dump_file)
1279 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1280 bb1->index, bb2->index);
1282 set_cluster (bb1, bb2);
1285 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1286 E2 are equal. */
1288 static bool
1289 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1291 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1292 gphi_iterator gsi;
1294 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1296 gphi *phi = gsi.phi ();
1297 tree lhs = gimple_phi_result (phi);
1298 tree val1 = gimple_phi_arg_def (phi, n1);
1299 tree val2 = gimple_phi_arg_def (phi, n2);
1301 if (virtual_operand_p (lhs))
1302 continue;
1304 if (operand_equal_for_phi_arg_p (val1, val2))
1305 continue;
1306 if (gvn_uses_equal (val1, val2))
1307 continue;
1309 return false;
1312 return true;
1315 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1316 phi alternatives for BB1 and BB2 are equal. */
1318 static bool
1319 same_phi_alternatives (same_succ same_succ, basic_block bb1, basic_block bb2)
1321 unsigned int s;
1322 bitmap_iterator bs;
1323 edge e1, e2;
1324 basic_block succ;
1326 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1328 succ = BASIC_BLOCK_FOR_FN (cfun, s);
1329 e1 = find_edge (bb1, succ);
1330 e2 = find_edge (bb2, succ);
1331 if (e1->flags & EDGE_COMPLEX
1332 || e2->flags & EDGE_COMPLEX)
1333 return false;
1335 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1336 the same value. */
1337 if (!same_phi_alternatives_1 (succ, e1, e2))
1338 return false;
1341 return true;
1344 /* Return true if BB has non-vop phis. */
1346 static bool
1347 bb_has_non_vop_phi (basic_block bb)
1349 gimple_seq phis = phi_nodes (bb);
1350 gimple phi;
1352 if (phis == NULL)
1353 return false;
1355 if (!gimple_seq_singleton_p (phis))
1356 return true;
1358 phi = gimple_seq_first_stmt (phis);
1359 return !virtual_operand_p (gimple_phi_result (phi));
1362 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1363 invariant that uses in FROM are dominates by their defs. */
1365 static bool
1366 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1368 basic_block cd, dep_bb = BB_DEP_BB (to);
1369 edge_iterator ei;
1370 edge e;
1371 bitmap from_preds = BITMAP_ALLOC (NULL);
1373 if (dep_bb == NULL)
1374 return true;
1376 FOR_EACH_EDGE (e, ei, from->preds)
1377 bitmap_set_bit (from_preds, e->src->index);
1378 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1379 BITMAP_FREE (from_preds);
1381 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1384 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1385 replacement bb) and vice versa maintains the invariant that uses in the
1386 replacement are dominates by their defs. */
1388 static bool
1389 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1391 if (BB_CLUSTER (bb1) != NULL)
1392 bb1 = BB_CLUSTER (bb1)->rep_bb;
1394 if (BB_CLUSTER (bb2) != NULL)
1395 bb2 = BB_CLUSTER (bb2)->rep_bb;
1397 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1398 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1401 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1403 static void
1404 find_clusters_1 (same_succ same_succ)
1406 basic_block bb1, bb2;
1407 unsigned int i, j;
1408 bitmap_iterator bi, bj;
1409 int nr_comparisons;
1410 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1412 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1414 bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1416 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1417 phi-nodes in bb1 and bb2, with the same alternatives for the same
1418 preds. */
1419 if (bb_has_non_vop_phi (bb1))
1420 continue;
1422 nr_comparisons = 0;
1423 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1425 bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1427 if (bb_has_non_vop_phi (bb2))
1428 continue;
1430 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1431 continue;
1433 /* Limit quadratic behaviour. */
1434 nr_comparisons++;
1435 if (nr_comparisons > max_comparisons)
1436 break;
1438 /* This is a conservative dependency check. We could test more
1439 precise for allowed replacement direction. */
1440 if (!deps_ok_for_redirect (bb1, bb2))
1441 continue;
1443 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1444 continue;
1446 find_duplicate (same_succ, bb1, bb2);
1451 /* Find clusters of bbs which can be merged. */
1453 static void
1454 find_clusters (void)
1456 same_succ same;
1458 while (!worklist.is_empty ())
1460 same = worklist.pop ();
1461 same->in_worklist = false;
1462 if (dump_file && (dump_flags & TDF_DETAILS))
1464 fprintf (dump_file, "processing worklist entry\n");
1465 same_succ_print (dump_file, same);
1467 find_clusters_1 (same);
1471 /* Returns the vop phi of BB, if any. */
1473 static gphi *
1474 vop_phi (basic_block bb)
1476 gphi *stmt;
1477 gphi_iterator gsi;
1478 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1480 stmt = gsi.phi ();
1481 if (! virtual_operand_p (gimple_phi_result (stmt)))
1482 continue;
1483 return stmt;
1485 return NULL;
1488 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1490 static void
1491 replace_block_by (basic_block bb1, basic_block bb2)
1493 edge pred_edge;
1494 edge e1, e2;
1495 edge_iterator ei;
1496 unsigned int i;
1497 gphi *bb2_phi;
1499 bb2_phi = vop_phi (bb2);
1501 /* Mark the basic block as deleted. */
1502 mark_basic_block_deleted (bb1);
1504 /* Redirect the incoming edges of bb1 to bb2. */
1505 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1507 pred_edge = EDGE_PRED (bb1, i - 1);
1508 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1509 gcc_assert (pred_edge != NULL);
1511 if (bb2_phi == NULL)
1512 continue;
1514 /* The phi might have run out of capacity when the redirect added an
1515 argument, which means it could have been replaced. Refresh it. */
1516 bb2_phi = vop_phi (bb2);
1518 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1519 pred_edge, UNKNOWN_LOCATION);
1522 bb2->frequency += bb1->frequency;
1523 if (bb2->frequency > BB_FREQ_MAX)
1524 bb2->frequency = BB_FREQ_MAX;
1526 bb2->count += bb1->count;
1528 /* Merge the outgoing edge counts from bb1 onto bb2. */
1529 gcov_type out_sum = 0;
1530 FOR_EACH_EDGE (e1, ei, bb1->succs)
1532 e2 = find_edge (bb2, e1->dest);
1533 gcc_assert (e2);
1534 e2->count += e1->count;
1535 out_sum += e2->count;
1537 /* Recompute the edge probabilities from the new merged edge count.
1538 Use the sum of the new merged edge counts computed above instead
1539 of bb2's merged count, in case there are profile count insanities
1540 making the bb count inconsistent with the edge weights. */
1541 FOR_EACH_EDGE (e2, ei, bb2->succs)
1543 e2->probability = GCOV_COMPUTE_SCALE (e2->count, out_sum);
1546 /* Do updates that use bb1, before deleting bb1. */
1547 release_last_vdef (bb1);
1548 same_succ_flush_bb (bb1);
1550 delete_basic_block (bb1);
1553 /* Bbs for which update_debug_stmt need to be called. */
1555 static bitmap update_bbs;
1557 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1558 number of bbs removed. */
1560 static int
1561 apply_clusters (void)
1563 basic_block bb1, bb2;
1564 bb_cluster c;
1565 unsigned int i, j;
1566 bitmap_iterator bj;
1567 int nr_bbs_removed = 0;
1569 for (i = 0; i < all_clusters.length (); ++i)
1571 c = all_clusters[i];
1572 if (c == NULL)
1573 continue;
1575 bb2 = c->rep_bb;
1576 bitmap_set_bit (update_bbs, bb2->index);
1578 bitmap_clear_bit (c->bbs, bb2->index);
1579 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1581 bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1582 bitmap_clear_bit (update_bbs, bb1->index);
1584 replace_block_by (bb1, bb2);
1585 nr_bbs_removed++;
1589 return nr_bbs_removed;
1592 /* Resets debug statement STMT if it has uses that are not dominated by their
1593 defs. */
1595 static void
1596 update_debug_stmt (gimple stmt)
1598 use_operand_p use_p;
1599 ssa_op_iter oi;
1600 basic_block bbuse;
1602 if (!gimple_debug_bind_p (stmt))
1603 return;
1605 bbuse = gimple_bb (stmt);
1606 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1608 tree name = USE_FROM_PTR (use_p);
1609 gimple def_stmt = SSA_NAME_DEF_STMT (name);
1610 basic_block bbdef = gimple_bb (def_stmt);
1611 if (bbdef == NULL || bbuse == bbdef
1612 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1613 continue;
1615 gimple_debug_bind_reset_value (stmt);
1616 update_stmt (stmt);
1617 break;
1621 /* Resets all debug statements that have uses that are not
1622 dominated by their defs. */
1624 static void
1625 update_debug_stmts (void)
1627 basic_block bb;
1628 bitmap_iterator bi;
1629 unsigned int i;
1631 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1633 gimple stmt;
1634 gimple_stmt_iterator gsi;
1636 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1637 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1639 stmt = gsi_stmt (gsi);
1640 if (!is_gimple_debug (stmt))
1641 continue;
1642 update_debug_stmt (stmt);
1647 /* Runs tail merge optimization. */
1649 unsigned int
1650 tail_merge_optimize (unsigned int todo)
1652 int nr_bbs_removed_total = 0;
1653 int nr_bbs_removed;
1654 bool loop_entered = false;
1655 int iteration_nr = 0;
1656 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1658 if (!flag_tree_tail_merge
1659 || max_iterations == 0)
1660 return 0;
1662 timevar_push (TV_TREE_TAIL_MERGE);
1664 if (!dom_info_available_p (CDI_DOMINATORS))
1666 /* PRE can leave us with unreachable blocks, remove them now. */
1667 delete_unreachable_blocks ();
1668 calculate_dominance_info (CDI_DOMINATORS);
1670 init_worklist ();
1672 while (!worklist.is_empty ())
1674 if (!loop_entered)
1676 loop_entered = true;
1677 alloc_cluster_vectors ();
1678 update_bbs = BITMAP_ALLOC (NULL);
1680 else
1681 reset_cluster_vectors ();
1683 iteration_nr++;
1684 if (dump_file && (dump_flags & TDF_DETAILS))
1685 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1687 find_clusters ();
1688 gcc_assert (worklist.is_empty ());
1689 if (all_clusters.is_empty ())
1690 break;
1692 nr_bbs_removed = apply_clusters ();
1693 nr_bbs_removed_total += nr_bbs_removed;
1694 if (nr_bbs_removed == 0)
1695 break;
1697 free_dominance_info (CDI_DOMINATORS);
1699 if (iteration_nr == max_iterations)
1700 break;
1702 calculate_dominance_info (CDI_DOMINATORS);
1703 update_worklist ();
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1707 fprintf (dump_file, "htab collision / search: %f\n",
1708 same_succ_htab->collisions ());
1710 if (nr_bbs_removed_total > 0)
1712 if (MAY_HAVE_DEBUG_STMTS)
1714 calculate_dominance_info (CDI_DOMINATORS);
1715 update_debug_stmts ();
1718 if (dump_file && (dump_flags & TDF_DETAILS))
1720 fprintf (dump_file, "Before TODOs.\n");
1721 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1724 mark_virtual_operands_for_renaming (cfun);
1727 delete_worklist ();
1728 if (loop_entered)
1730 delete_cluster_vectors ();
1731 BITMAP_FREE (update_bbs);
1734 timevar_pop (TV_TREE_TAIL_MERGE);
1736 return todo;