2018-06-21 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-ssa-tail-merge.c
blob15838816d8674429f24bf23a785f6c89e7be28f8
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Pass overview.
24 MOTIVATIONAL EXAMPLE
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
110 CONTEXT
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
157 IMPLEMENTATION
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
167 LIMITATIONS/TODO
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
179 PASS PLACEMENT
180 This 'pass' is not a stand-alone gimple pass, but runs as part of
181 pass_pre, in order to share the value numbering.
184 SWITCHES
186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "backend.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "cfghooks.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "fold-const.h"
198 #include "trans-mem.h"
199 #include "cfganal.h"
200 #include "cfgcleanup.h"
201 #include "gimple-iterator.h"
202 #include "tree-cfg.h"
203 #include "tree-into-ssa.h"
204 #include "params.h"
205 #include "tree-ssa-sccvn.h"
206 #include "cfgloop.h"
207 #include "tree-eh.h"
208 #include "tree-cfgcleanup.h"
210 const int ignore_edge_flags = EDGE_DFS_BACK | EDGE_EXECUTABLE;
212 /* Describes a group of bbs with the same successors. The successor bbs are
213 cached in succs, and the successor edge flags are cached in succ_flags.
214 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
215 it's marked in inverse.
216 Additionally, the hash value for the struct is cached in hashval, and
217 in_worklist indicates whether it's currently part of worklist. */
219 struct same_succ : pointer_hash <same_succ>
221 /* The bbs that have the same successor bbs. */
222 bitmap bbs;
223 /* The successor bbs. */
224 bitmap succs;
225 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
226 bb. */
227 bitmap inverse;
228 /* The edge flags for each of the successor bbs. */
229 vec<int> succ_flags;
230 /* Indicates whether the struct is currently in the worklist. */
231 bool in_worklist;
232 /* The hash value of the struct. */
233 hashval_t hashval;
235 /* hash_table support. */
236 static inline hashval_t hash (const same_succ *);
237 static int equal (const same_succ *, const same_succ *);
238 static void remove (same_succ *);
241 /* hash routine for hash_table support, returns hashval of E. */
243 inline hashval_t
244 same_succ::hash (const same_succ *e)
246 return e->hashval;
249 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
251 struct bb_cluster
253 /* The bbs in the cluster. */
254 bitmap bbs;
255 /* The preds of the bbs in the cluster. */
256 bitmap preds;
257 /* Index in all_clusters vector. */
258 int index;
259 /* The bb to replace the cluster with. */
260 basic_block rep_bb;
263 /* Per bb-info. */
265 struct aux_bb_info
267 /* The number of non-debug statements in the bb. */
268 int size;
269 /* The same_succ that this bb is a member of. */
270 same_succ *bb_same_succ;
271 /* The cluster that this bb is a member of. */
272 bb_cluster *cluster;
273 /* The vop state at the exit of a bb. This is shortlived data, used to
274 communicate data between update_block_by and update_vuses. */
275 tree vop_at_exit;
276 /* The bb that either contains or is dominated by the dependencies of the
277 bb. */
278 basic_block dep_bb;
281 /* Macros to access the fields of struct aux_bb_info. */
283 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
284 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
285 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
286 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
287 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
289 /* Returns true if the only effect a statement STMT has, is to define locally
290 used SSA_NAMEs. */
292 static bool
293 stmt_local_def (gimple *stmt)
295 basic_block bb, def_bb;
296 imm_use_iterator iter;
297 use_operand_p use_p;
298 tree val;
299 def_operand_p def_p;
301 if (gimple_vdef (stmt) != NULL_TREE
302 || gimple_has_side_effects (stmt)
303 || gimple_could_trap_p_1 (stmt, false, false)
304 || gimple_vuse (stmt) != NULL_TREE
305 /* Copied from tree-ssa-ifcombine.c:bb_no_side_effects_p():
306 const calls don't match any of the above, yet they could
307 still have some side-effects - they could contain
308 gimple_could_trap_p statements, like floating point
309 exceptions or integer division by zero. See PR70586.
310 FIXME: perhaps gimple_has_side_effects or gimple_could_trap_p
311 should handle this. */
312 || is_gimple_call (stmt))
313 return false;
315 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
316 if (def_p == NULL)
317 return false;
319 val = DEF_FROM_PTR (def_p);
320 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
321 return false;
323 def_bb = gimple_bb (stmt);
325 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
327 if (is_gimple_debug (USE_STMT (use_p)))
328 continue;
329 bb = gimple_bb (USE_STMT (use_p));
330 if (bb == def_bb)
331 continue;
333 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
334 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
335 continue;
337 return false;
340 return true;
343 /* Let GSI skip forwards over local defs. */
345 static void
346 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
348 gimple *stmt;
350 while (true)
352 if (gsi_end_p (*gsi))
353 return;
354 stmt = gsi_stmt (*gsi);
355 if (!stmt_local_def (stmt))
356 return;
357 gsi_next_nondebug (gsi);
361 /* VAL1 and VAL2 are either:
362 - uses in BB1 and BB2, or
363 - phi alternatives for BB1 and BB2.
364 Return true if the uses have the same gvn value. */
366 static bool
367 gvn_uses_equal (tree val1, tree val2)
369 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
371 if (val1 == val2)
372 return true;
374 if (vn_valueize (val1) != vn_valueize (val2))
375 return false;
377 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
378 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
381 /* Prints E to FILE. */
383 static void
384 same_succ_print (FILE *file, const same_succ *e)
386 unsigned int i;
387 bitmap_print (file, e->bbs, "bbs:", "\n");
388 bitmap_print (file, e->succs, "succs:", "\n");
389 bitmap_print (file, e->inverse, "inverse:", "\n");
390 fprintf (file, "flags:");
391 for (i = 0; i < e->succ_flags.length (); ++i)
392 fprintf (file, " %x", e->succ_flags[i]);
393 fprintf (file, "\n");
396 /* Prints same_succ VE to VFILE. */
398 inline int
399 ssa_same_succ_print_traverse (same_succ **pe, FILE *file)
401 const same_succ *e = *pe;
402 same_succ_print (file, e);
403 return 1;
406 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
408 static void
409 update_dep_bb (basic_block use_bb, tree val)
411 basic_block dep_bb;
413 /* Not a dep. */
414 if (TREE_CODE (val) != SSA_NAME)
415 return;
417 /* Skip use of global def. */
418 if (SSA_NAME_IS_DEFAULT_DEF (val))
419 return;
421 /* Skip use of local def. */
422 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
423 if (dep_bb == use_bb)
424 return;
426 if (BB_DEP_BB (use_bb) == NULL
427 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
428 BB_DEP_BB (use_bb) = dep_bb;
431 /* Update BB_DEP_BB, given the dependencies in STMT. */
433 static void
434 stmt_update_dep_bb (gimple *stmt)
436 ssa_op_iter iter;
437 use_operand_p use;
439 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
440 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
443 /* Calculates hash value for same_succ VE. */
445 static hashval_t
446 same_succ_hash (const same_succ *e)
448 inchash::hash hstate (bitmap_hash (e->succs));
449 int flags;
450 unsigned int i;
451 unsigned int first = bitmap_first_set_bit (e->bbs);
452 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
453 int size = 0;
454 gimple *stmt;
455 tree arg;
456 unsigned int s;
457 bitmap_iterator bs;
459 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
460 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
462 stmt = gsi_stmt (gsi);
463 stmt_update_dep_bb (stmt);
464 if (stmt_local_def (stmt))
465 continue;
466 size++;
468 hstate.add_int (gimple_code (stmt));
469 if (is_gimple_assign (stmt))
470 hstate.add_int (gimple_assign_rhs_code (stmt));
471 if (!is_gimple_call (stmt))
472 continue;
473 if (gimple_call_internal_p (stmt))
474 hstate.add_int (gimple_call_internal_fn (stmt));
475 else
477 inchash::add_expr (gimple_call_fn (stmt), hstate);
478 if (gimple_call_chain (stmt))
479 inchash::add_expr (gimple_call_chain (stmt), hstate);
481 for (i = 0; i < gimple_call_num_args (stmt); i++)
483 arg = gimple_call_arg (stmt, i);
484 arg = vn_valueize (arg);
485 inchash::add_expr (arg, hstate);
489 hstate.add_int (size);
490 BB_SIZE (bb) = size;
492 hstate.add_int (bb->loop_father->num);
494 for (i = 0; i < e->succ_flags.length (); ++i)
496 flags = e->succ_flags[i];
497 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
498 hstate.add_int (flags);
501 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
503 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
504 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
505 !gsi_end_p (gsi);
506 gsi_next (&gsi))
508 gphi *phi = gsi.phi ();
509 tree lhs = gimple_phi_result (phi);
510 tree val = gimple_phi_arg_def (phi, n);
512 if (virtual_operand_p (lhs))
513 continue;
514 update_dep_bb (bb, val);
518 return hstate.end ();
521 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
522 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
523 the other edge flags. */
525 static bool
526 inverse_flags (const same_succ *e1, const same_succ *e2)
528 int f1a, f1b, f2a, f2b;
529 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
531 if (e1->succ_flags.length () != 2)
532 return false;
534 f1a = e1->succ_flags[0];
535 f1b = e1->succ_flags[1];
536 f2a = e2->succ_flags[0];
537 f2b = e2->succ_flags[1];
539 if (f1a == f2a && f1b == f2b)
540 return false;
542 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
545 /* Compares SAME_SUCCs E1 and E2. */
548 same_succ::equal (const same_succ *e1, const same_succ *e2)
550 unsigned int i, first1, first2;
551 gimple_stmt_iterator gsi1, gsi2;
552 gimple *s1, *s2;
553 basic_block bb1, bb2;
555 if (e1 == e2)
556 return 1;
558 if (e1->hashval != e2->hashval)
559 return 0;
561 if (e1->succ_flags.length () != e2->succ_flags.length ())
562 return 0;
564 if (!bitmap_equal_p (e1->succs, e2->succs))
565 return 0;
567 if (!inverse_flags (e1, e2))
569 for (i = 0; i < e1->succ_flags.length (); ++i)
570 if (e1->succ_flags[i] != e2->succ_flags[i])
571 return 0;
574 first1 = bitmap_first_set_bit (e1->bbs);
575 first2 = bitmap_first_set_bit (e2->bbs);
577 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
578 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
580 if (BB_SIZE (bb1) != BB_SIZE (bb2))
581 return 0;
583 if (bb1->loop_father != bb2->loop_father)
584 return 0;
586 gsi1 = gsi_start_nondebug_bb (bb1);
587 gsi2 = gsi_start_nondebug_bb (bb2);
588 gsi_advance_fw_nondebug_nonlocal (&gsi1);
589 gsi_advance_fw_nondebug_nonlocal (&gsi2);
590 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
592 s1 = gsi_stmt (gsi1);
593 s2 = gsi_stmt (gsi2);
594 if (gimple_code (s1) != gimple_code (s2))
595 return 0;
596 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
597 return 0;
598 gsi_next_nondebug (&gsi1);
599 gsi_next_nondebug (&gsi2);
600 gsi_advance_fw_nondebug_nonlocal (&gsi1);
601 gsi_advance_fw_nondebug_nonlocal (&gsi2);
604 return 1;
607 /* Alloc and init a new SAME_SUCC. */
609 static same_succ *
610 same_succ_alloc (void)
612 same_succ *same = XNEW (struct same_succ);
614 same->bbs = BITMAP_ALLOC (NULL);
615 same->succs = BITMAP_ALLOC (NULL);
616 same->inverse = BITMAP_ALLOC (NULL);
617 same->succ_flags.create (10);
618 same->in_worklist = false;
620 return same;
623 /* Delete same_succ E. */
625 void
626 same_succ::remove (same_succ *e)
628 BITMAP_FREE (e->bbs);
629 BITMAP_FREE (e->succs);
630 BITMAP_FREE (e->inverse);
631 e->succ_flags.release ();
633 XDELETE (e);
636 /* Reset same_succ SAME. */
638 static void
639 same_succ_reset (same_succ *same)
641 bitmap_clear (same->bbs);
642 bitmap_clear (same->succs);
643 bitmap_clear (same->inverse);
644 same->succ_flags.truncate (0);
647 static hash_table<same_succ> *same_succ_htab;
649 /* Array that is used to store the edge flags for a successor. */
651 static int *same_succ_edge_flags;
653 /* Bitmap that is used to mark bbs that are recently deleted. */
655 static bitmap deleted_bbs;
657 /* Bitmap that is used to mark predecessors of bbs that are
658 deleted. */
660 static bitmap deleted_bb_preds;
662 /* Prints same_succ_htab to stderr. */
664 extern void debug_same_succ (void);
665 DEBUG_FUNCTION void
666 debug_same_succ ( void)
668 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
672 /* Vector of bbs to process. */
674 static vec<same_succ *> worklist;
676 /* Prints worklist to FILE. */
678 static void
679 print_worklist (FILE *file)
681 unsigned int i;
682 for (i = 0; i < worklist.length (); ++i)
683 same_succ_print (file, worklist[i]);
686 /* Adds SAME to worklist. */
688 static void
689 add_to_worklist (same_succ *same)
691 if (same->in_worklist)
692 return;
694 if (bitmap_count_bits (same->bbs) < 2)
695 return;
697 same->in_worklist = true;
698 worklist.safe_push (same);
701 /* Add BB to same_succ_htab. */
703 static void
704 find_same_succ_bb (basic_block bb, same_succ **same_p)
706 unsigned int j;
707 bitmap_iterator bj;
708 same_succ *same = *same_p;
709 same_succ **slot;
710 edge_iterator ei;
711 edge e;
713 if (bb == NULL)
714 return;
715 bitmap_set_bit (same->bbs, bb->index);
716 FOR_EACH_EDGE (e, ei, bb->succs)
718 int index = e->dest->index;
719 bitmap_set_bit (same->succs, index);
720 same_succ_edge_flags[index] = (e->flags & ~ignore_edge_flags);
722 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
723 same->succ_flags.safe_push (same_succ_edge_flags[j]);
725 same->hashval = same_succ_hash (same);
727 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
728 if (*slot == NULL)
730 *slot = same;
731 BB_SAME_SUCC (bb) = same;
732 add_to_worklist (same);
733 *same_p = NULL;
735 else
737 bitmap_set_bit ((*slot)->bbs, bb->index);
738 BB_SAME_SUCC (bb) = *slot;
739 add_to_worklist (*slot);
740 if (inverse_flags (same, *slot))
741 bitmap_set_bit ((*slot)->inverse, bb->index);
742 same_succ_reset (same);
746 /* Find bbs with same successors. */
748 static void
749 find_same_succ (void)
751 same_succ *same = same_succ_alloc ();
752 basic_block bb;
754 FOR_EACH_BB_FN (bb, cfun)
756 find_same_succ_bb (bb, &same);
757 if (same == NULL)
758 same = same_succ_alloc ();
761 same_succ::remove (same);
764 /* Initializes worklist administration. */
766 static void
767 init_worklist (void)
769 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
770 same_succ_htab = new hash_table<same_succ> (n_basic_blocks_for_fn (cfun));
771 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
772 deleted_bbs = BITMAP_ALLOC (NULL);
773 deleted_bb_preds = BITMAP_ALLOC (NULL);
774 worklist.create (n_basic_blocks_for_fn (cfun));
775 find_same_succ ();
777 if (dump_file && (dump_flags & TDF_DETAILS))
779 fprintf (dump_file, "initial worklist:\n");
780 print_worklist (dump_file);
784 /* Deletes worklist administration. */
786 static void
787 delete_worklist (void)
789 free_aux_for_blocks ();
790 delete same_succ_htab;
791 same_succ_htab = NULL;
792 XDELETEVEC (same_succ_edge_flags);
793 same_succ_edge_flags = NULL;
794 BITMAP_FREE (deleted_bbs);
795 BITMAP_FREE (deleted_bb_preds);
796 worklist.release ();
799 /* Mark BB as deleted, and mark its predecessors. */
801 static void
802 mark_basic_block_deleted (basic_block bb)
804 edge e;
805 edge_iterator ei;
807 bitmap_set_bit (deleted_bbs, bb->index);
809 FOR_EACH_EDGE (e, ei, bb->preds)
810 bitmap_set_bit (deleted_bb_preds, e->src->index);
813 /* Removes BB from its corresponding same_succ. */
815 static void
816 same_succ_flush_bb (basic_block bb)
818 same_succ *same = BB_SAME_SUCC (bb);
819 if (! same)
820 return;
822 BB_SAME_SUCC (bb) = NULL;
823 if (bitmap_single_bit_set_p (same->bbs))
824 same_succ_htab->remove_elt_with_hash (same, same->hashval);
825 else
826 bitmap_clear_bit (same->bbs, bb->index);
829 /* Removes all bbs in BBS from their corresponding same_succ. */
831 static void
832 same_succ_flush_bbs (bitmap bbs)
834 unsigned int i;
835 bitmap_iterator bi;
837 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
838 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
841 /* Release the last vdef in BB, either normal or phi result. */
843 static void
844 release_last_vdef (basic_block bb)
846 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
847 gsi_prev_nondebug (&i))
849 gimple *stmt = gsi_stmt (i);
850 if (gimple_vdef (stmt) == NULL_TREE)
851 continue;
853 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
854 return;
857 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
858 gsi_next (&i))
860 gphi *phi = i.phi ();
861 tree res = gimple_phi_result (phi);
863 if (!virtual_operand_p (res))
864 continue;
866 mark_virtual_phi_result_for_renaming (phi);
867 return;
871 /* For deleted_bb_preds, find bbs with same successors. */
873 static void
874 update_worklist (void)
876 unsigned int i;
877 bitmap_iterator bi;
878 basic_block bb;
879 same_succ *same;
881 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
882 bitmap_clear (deleted_bbs);
884 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
885 same_succ_flush_bbs (deleted_bb_preds);
887 same = same_succ_alloc ();
888 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
890 bb = BASIC_BLOCK_FOR_FN (cfun, i);
891 gcc_assert (bb != NULL);
892 find_same_succ_bb (bb, &same);
893 if (same == NULL)
894 same = same_succ_alloc ();
896 same_succ::remove (same);
897 bitmap_clear (deleted_bb_preds);
900 /* Prints cluster C to FILE. */
902 static void
903 print_cluster (FILE *file, bb_cluster *c)
905 if (c == NULL)
906 return;
907 bitmap_print (file, c->bbs, "bbs:", "\n");
908 bitmap_print (file, c->preds, "preds:", "\n");
911 /* Prints cluster C to stderr. */
913 extern void debug_cluster (bb_cluster *);
914 DEBUG_FUNCTION void
915 debug_cluster (bb_cluster *c)
917 print_cluster (stderr, c);
920 /* Update C->rep_bb, given that BB is added to the cluster. */
922 static void
923 update_rep_bb (bb_cluster *c, basic_block bb)
925 /* Initial. */
926 if (c->rep_bb == NULL)
928 c->rep_bb = bb;
929 return;
932 /* Current needs no deps, keep it. */
933 if (BB_DEP_BB (c->rep_bb) == NULL)
934 return;
936 /* Bb needs no deps, change rep_bb. */
937 if (BB_DEP_BB (bb) == NULL)
939 c->rep_bb = bb;
940 return;
943 /* Bb needs last deps earlier than current, change rep_bb. A potential
944 problem with this, is that the first deps might also be earlier, which
945 would mean we prefer longer lifetimes for the deps. To be able to check
946 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
947 BB_DEP_BB, which is really BB_LAST_DEP_BB.
948 The benefit of choosing the bb with last deps earlier, is that it can
949 potentially be used as replacement for more bbs. */
950 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
951 c->rep_bb = bb;
954 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
956 static void
957 add_bb_to_cluster (bb_cluster *c, basic_block bb)
959 edge e;
960 edge_iterator ei;
962 bitmap_set_bit (c->bbs, bb->index);
964 FOR_EACH_EDGE (e, ei, bb->preds)
965 bitmap_set_bit (c->preds, e->src->index);
967 update_rep_bb (c, bb);
970 /* Allocate and init new cluster. */
972 static bb_cluster *
973 new_cluster (void)
975 bb_cluster *c;
976 c = XCNEW (bb_cluster);
977 c->bbs = BITMAP_ALLOC (NULL);
978 c->preds = BITMAP_ALLOC (NULL);
979 c->rep_bb = NULL;
980 return c;
983 /* Delete clusters. */
985 static void
986 delete_cluster (bb_cluster *c)
988 if (c == NULL)
989 return;
990 BITMAP_FREE (c->bbs);
991 BITMAP_FREE (c->preds);
992 XDELETE (c);
996 /* Array that contains all clusters. */
998 static vec<bb_cluster *> all_clusters;
1000 /* Allocate all cluster vectors. */
1002 static void
1003 alloc_cluster_vectors (void)
1005 all_clusters.create (n_basic_blocks_for_fn (cfun));
1008 /* Reset all cluster vectors. */
1010 static void
1011 reset_cluster_vectors (void)
1013 unsigned int i;
1014 basic_block bb;
1015 for (i = 0; i < all_clusters.length (); ++i)
1016 delete_cluster (all_clusters[i]);
1017 all_clusters.truncate (0);
1018 FOR_EACH_BB_FN (bb, cfun)
1019 BB_CLUSTER (bb) = NULL;
1022 /* Delete all cluster vectors. */
1024 static void
1025 delete_cluster_vectors (void)
1027 unsigned int i;
1028 for (i = 0; i < all_clusters.length (); ++i)
1029 delete_cluster (all_clusters[i]);
1030 all_clusters.release ();
1033 /* Merge cluster C2 into C1. */
1035 static void
1036 merge_clusters (bb_cluster *c1, bb_cluster *c2)
1038 bitmap_ior_into (c1->bbs, c2->bbs);
1039 bitmap_ior_into (c1->preds, c2->preds);
1042 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1043 all_clusters, or merge c with existing cluster. */
1045 static void
1046 set_cluster (basic_block bb1, basic_block bb2)
1048 basic_block merge_bb, other_bb;
1049 bb_cluster *merge, *old, *c;
1051 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1053 c = new_cluster ();
1054 add_bb_to_cluster (c, bb1);
1055 add_bb_to_cluster (c, bb2);
1056 BB_CLUSTER (bb1) = c;
1057 BB_CLUSTER (bb2) = c;
1058 c->index = all_clusters.length ();
1059 all_clusters.safe_push (c);
1061 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1063 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1064 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1065 merge = BB_CLUSTER (merge_bb);
1066 add_bb_to_cluster (merge, other_bb);
1067 BB_CLUSTER (other_bb) = merge;
1069 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1071 unsigned int i;
1072 bitmap_iterator bi;
1074 old = BB_CLUSTER (bb2);
1075 merge = BB_CLUSTER (bb1);
1076 merge_clusters (merge, old);
1077 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1078 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1079 all_clusters[old->index] = NULL;
1080 update_rep_bb (merge, old->rep_bb);
1081 delete_cluster (old);
1083 else
1084 gcc_unreachable ();
1087 /* Return true if gimple operands T1 and T2 have the same value. */
1089 static bool
1090 gimple_operand_equal_value_p (tree t1, tree t2)
1092 if (t1 == t2)
1093 return true;
1095 if (t1 == NULL_TREE
1096 || t2 == NULL_TREE)
1097 return false;
1099 if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS))
1100 return true;
1102 return gvn_uses_equal (t1, t2);
1105 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1106 gimple_bb (s2) are members of SAME_SUCC. */
1108 static bool
1109 gimple_equal_p (same_succ *same_succ, gimple *s1, gimple *s2)
1111 unsigned int i;
1112 tree lhs1, lhs2;
1113 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1114 tree t1, t2;
1115 bool inv_cond;
1116 enum tree_code code1, code2;
1118 if (gimple_code (s1) != gimple_code (s2))
1119 return false;
1121 switch (gimple_code (s1))
1123 case GIMPLE_CALL:
1124 if (!gimple_call_same_target_p (s1, s2))
1125 return false;
1127 t1 = gimple_call_chain (s1);
1128 t2 = gimple_call_chain (s2);
1129 if (!gimple_operand_equal_value_p (t1, t2))
1130 return false;
1132 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1133 return false;
1135 for (i = 0; i < gimple_call_num_args (s1); ++i)
1137 t1 = gimple_call_arg (s1, i);
1138 t2 = gimple_call_arg (s2, i);
1139 if (!gimple_operand_equal_value_p (t1, t2))
1140 return false;
1143 lhs1 = gimple_get_lhs (s1);
1144 lhs2 = gimple_get_lhs (s2);
1145 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1146 return true;
1147 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1148 return false;
1149 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1150 return vn_valueize (lhs1) == vn_valueize (lhs2);
1151 return operand_equal_p (lhs1, lhs2, 0);
1153 case GIMPLE_ASSIGN:
1154 lhs1 = gimple_get_lhs (s1);
1155 lhs2 = gimple_get_lhs (s2);
1156 if (TREE_CODE (lhs1) != SSA_NAME
1157 && TREE_CODE (lhs2) != SSA_NAME)
1158 return (operand_equal_p (lhs1, lhs2, 0)
1159 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1160 gimple_assign_rhs1 (s2)));
1161 else if (TREE_CODE (lhs1) == SSA_NAME
1162 && TREE_CODE (lhs2) == SSA_NAME)
1163 return operand_equal_p (gimple_assign_rhs1 (s1),
1164 gimple_assign_rhs1 (s2), 0);
1165 return false;
1167 case GIMPLE_COND:
1168 t1 = gimple_cond_lhs (s1);
1169 t2 = gimple_cond_lhs (s2);
1170 if (!gimple_operand_equal_value_p (t1, t2))
1171 return false;
1173 t1 = gimple_cond_rhs (s1);
1174 t2 = gimple_cond_rhs (s2);
1175 if (!gimple_operand_equal_value_p (t1, t2))
1176 return false;
1178 code1 = gimple_expr_code (s1);
1179 code2 = gimple_expr_code (s2);
1180 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1181 != bitmap_bit_p (same_succ->inverse, bb2->index));
1182 if (inv_cond)
1184 bool honor_nans = HONOR_NANS (t1);
1185 code2 = invert_tree_comparison (code2, honor_nans);
1187 return code1 == code2;
1189 default:
1190 return false;
1194 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1195 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1196 processed statements. */
1198 static void
1199 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1200 bool *vuse_escaped)
1202 gimple *stmt;
1203 tree lvuse;
1205 while (true)
1207 if (gsi_end_p (*gsi))
1208 return;
1209 stmt = gsi_stmt (*gsi);
1211 lvuse = gimple_vuse (stmt);
1212 if (lvuse != NULL_TREE)
1214 *vuse = lvuse;
1215 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1216 *vuse_escaped = true;
1219 if (!stmt_local_def (stmt))
1220 return;
1221 gsi_prev_nondebug (gsi);
1225 /* Return true if equal (in the sense of gimple_equal_p) statements STMT1 and
1226 STMT2 are allowed to be merged. */
1228 static bool
1229 merge_stmts_p (gimple *stmt1, gimple *stmt2)
1231 /* What could be better than this here is to blacklist the bb
1232 containing the stmt, when encountering the stmt f.i. in
1233 same_succ_hash. */
1234 if (is_tm_ending (stmt1))
1235 return false;
1237 /* Verify EH landing pads. */
1238 if (lookup_stmt_eh_lp_fn (cfun, stmt1) != lookup_stmt_eh_lp_fn (cfun, stmt2))
1239 return false;
1241 if (is_gimple_call (stmt1)
1242 && gimple_call_internal_p (stmt1))
1243 switch (gimple_call_internal_fn (stmt1))
1245 case IFN_UBSAN_NULL:
1246 case IFN_UBSAN_BOUNDS:
1247 case IFN_UBSAN_VPTR:
1248 case IFN_UBSAN_CHECK_ADD:
1249 case IFN_UBSAN_CHECK_SUB:
1250 case IFN_UBSAN_CHECK_MUL:
1251 case IFN_UBSAN_OBJECT_SIZE:
1252 case IFN_UBSAN_PTR:
1253 case IFN_ASAN_CHECK:
1254 /* For these internal functions, gimple_location is an implicit
1255 parameter, which will be used explicitly after expansion.
1256 Merging these statements may cause confusing line numbers in
1257 sanitizer messages. */
1258 return gimple_location (stmt1) == gimple_location (stmt2);
1259 default:
1260 break;
1263 return true;
1266 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1267 clusters them. */
1269 static void
1270 find_duplicate (same_succ *same_succ, basic_block bb1, basic_block bb2)
1272 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1273 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1274 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1275 bool vuse_escaped = false;
1277 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1278 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1280 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1282 gimple *stmt1 = gsi_stmt (gsi1);
1283 gimple *stmt2 = gsi_stmt (gsi2);
1285 if (gimple_code (stmt1) == GIMPLE_LABEL
1286 && gimple_code (stmt2) == GIMPLE_LABEL)
1287 break;
1289 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1290 return;
1292 if (!merge_stmts_p (stmt1, stmt2))
1293 return;
1295 gsi_prev_nondebug (&gsi1);
1296 gsi_prev_nondebug (&gsi2);
1297 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1298 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1301 while (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1303 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1304 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1305 return;
1306 gsi_prev (&gsi1);
1308 while (!gsi_end_p (gsi2) && gimple_code (gsi_stmt (gsi2)) == GIMPLE_LABEL)
1310 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi2)));
1311 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1312 return;
1313 gsi_prev (&gsi2);
1315 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1316 return;
1318 /* If the incoming vuses are not the same, and the vuse escaped into an
1319 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1320 which potentially means the semantics of one of the blocks will be changed.
1321 TODO: make this check more precise. */
1322 if (vuse_escaped && vuse1 != vuse2)
1323 return;
1325 if (dump_file)
1326 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1327 bb1->index, bb2->index);
1329 set_cluster (bb1, bb2);
1332 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1333 E2 are equal. */
1335 static bool
1336 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1338 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1339 gphi_iterator gsi;
1341 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1343 gphi *phi = gsi.phi ();
1344 tree lhs = gimple_phi_result (phi);
1345 tree val1 = gimple_phi_arg_def (phi, n1);
1346 tree val2 = gimple_phi_arg_def (phi, n2);
1348 if (virtual_operand_p (lhs))
1349 continue;
1351 if (operand_equal_for_phi_arg_p (val1, val2))
1352 continue;
1353 if (gvn_uses_equal (val1, val2))
1354 continue;
1356 return false;
1359 return true;
1362 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1363 phi alternatives for BB1 and BB2 are equal. */
1365 static bool
1366 same_phi_alternatives (same_succ *same_succ, basic_block bb1, basic_block bb2)
1368 unsigned int s;
1369 bitmap_iterator bs;
1370 edge e1, e2;
1371 basic_block succ;
1373 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1375 succ = BASIC_BLOCK_FOR_FN (cfun, s);
1376 e1 = find_edge (bb1, succ);
1377 e2 = find_edge (bb2, succ);
1378 if (e1->flags & EDGE_COMPLEX
1379 || e2->flags & EDGE_COMPLEX)
1380 return false;
1382 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1383 the same value. */
1384 if (!same_phi_alternatives_1 (succ, e1, e2))
1385 return false;
1388 return true;
1391 /* Return true if BB has non-vop phis. */
1393 static bool
1394 bb_has_non_vop_phi (basic_block bb)
1396 gimple_seq phis = phi_nodes (bb);
1397 gimple *phi;
1399 if (phis == NULL)
1400 return false;
1402 if (!gimple_seq_singleton_p (phis))
1403 return true;
1405 phi = gimple_seq_first_stmt (phis);
1406 return !virtual_operand_p (gimple_phi_result (phi));
1409 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1410 invariant that uses in FROM are dominates by their defs. */
1412 static bool
1413 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1415 basic_block cd, dep_bb = BB_DEP_BB (to);
1416 edge_iterator ei;
1417 edge e;
1419 if (dep_bb == NULL)
1420 return true;
1422 bitmap from_preds = BITMAP_ALLOC (NULL);
1423 FOR_EACH_EDGE (e, ei, from->preds)
1424 bitmap_set_bit (from_preds, e->src->index);
1425 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1426 BITMAP_FREE (from_preds);
1428 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1431 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1432 replacement bb) and vice versa maintains the invariant that uses in the
1433 replacement are dominates by their defs. */
1435 static bool
1436 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1438 if (BB_CLUSTER (bb1) != NULL)
1439 bb1 = BB_CLUSTER (bb1)->rep_bb;
1441 if (BB_CLUSTER (bb2) != NULL)
1442 bb2 = BB_CLUSTER (bb2)->rep_bb;
1444 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1445 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1448 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1450 static void
1451 find_clusters_1 (same_succ *same_succ)
1453 basic_block bb1, bb2;
1454 unsigned int i, j;
1455 bitmap_iterator bi, bj;
1456 int nr_comparisons;
1457 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1459 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1461 bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1463 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1464 phi-nodes in bb1 and bb2, with the same alternatives for the same
1465 preds. */
1466 if (bb_has_non_vop_phi (bb1) || bb_has_eh_pred (bb1)
1467 || bb_has_abnormal_pred (bb1))
1468 continue;
1470 nr_comparisons = 0;
1471 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1473 bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1475 if (bb_has_non_vop_phi (bb2) || bb_has_eh_pred (bb2)
1476 || bb_has_abnormal_pred (bb2))
1477 continue;
1479 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1480 continue;
1482 /* Limit quadratic behavior. */
1483 nr_comparisons++;
1484 if (nr_comparisons > max_comparisons)
1485 break;
1487 /* This is a conservative dependency check. We could test more
1488 precise for allowed replacement direction. */
1489 if (!deps_ok_for_redirect (bb1, bb2))
1490 continue;
1492 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1493 continue;
1495 find_duplicate (same_succ, bb1, bb2);
1500 /* Find clusters of bbs which can be merged. */
1502 static void
1503 find_clusters (void)
1505 same_succ *same;
1507 while (!worklist.is_empty ())
1509 same = worklist.pop ();
1510 same->in_worklist = false;
1511 if (dump_file && (dump_flags & TDF_DETAILS))
1513 fprintf (dump_file, "processing worklist entry\n");
1514 same_succ_print (dump_file, same);
1516 find_clusters_1 (same);
1520 /* Returns the vop phi of BB, if any. */
1522 static gphi *
1523 vop_phi (basic_block bb)
1525 gphi *stmt;
1526 gphi_iterator gsi;
1527 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1529 stmt = gsi.phi ();
1530 if (! virtual_operand_p (gimple_phi_result (stmt)))
1531 continue;
1532 return stmt;
1534 return NULL;
1537 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1539 static void
1540 replace_block_by (basic_block bb1, basic_block bb2)
1542 edge pred_edge;
1543 unsigned int i;
1544 gphi *bb2_phi;
1546 bb2_phi = vop_phi (bb2);
1548 /* Mark the basic block as deleted. */
1549 mark_basic_block_deleted (bb1);
1551 /* Redirect the incoming edges of bb1 to bb2. */
1552 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1554 pred_edge = EDGE_PRED (bb1, i - 1);
1555 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1556 gcc_assert (pred_edge != NULL);
1558 if (bb2_phi == NULL)
1559 continue;
1561 /* The phi might have run out of capacity when the redirect added an
1562 argument, which means it could have been replaced. Refresh it. */
1563 bb2_phi = vop_phi (bb2);
1565 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1566 pred_edge, UNKNOWN_LOCATION);
1570 /* Merge the outgoing edge counts from bb1 onto bb2. */
1571 edge e1, e2;
1572 edge_iterator ei;
1574 if (bb2->count.initialized_p ())
1575 FOR_EACH_EDGE (e1, ei, bb1->succs)
1577 e2 = find_edge (bb2, e1->dest);
1578 gcc_assert (e2);
1580 /* If probabilities are same, we are done.
1581 If counts are nonzero we can distribute accordingly. In remaining
1582 cases just avreage the values and hope for the best. */
1583 e2->probability = e1->probability.combine_with_count
1584 (bb1->count, e2->probability, bb2->count);
1586 bb2->count += bb1->count;
1588 /* Move over any user labels from bb1 after the bb2 labels. */
1589 gimple_stmt_iterator gsi1 = gsi_start_bb (bb1);
1590 if (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1592 gimple_stmt_iterator gsi2 = gsi_after_labels (bb2);
1593 while (!gsi_end_p (gsi1)
1594 && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1596 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1597 gcc_assert (!DECL_NONLOCAL (label) && !FORCED_LABEL (label));
1598 if (DECL_ARTIFICIAL (label))
1599 gsi_next (&gsi1);
1600 else
1601 gsi_move_before (&gsi1, &gsi2);
1605 /* Clear range info from all stmts in BB2 -- this transformation
1606 could make them out of date. */
1607 reset_flow_sensitive_info_in_bb (bb2);
1609 /* Do updates that use bb1, before deleting bb1. */
1610 release_last_vdef (bb1);
1611 same_succ_flush_bb (bb1);
1613 delete_basic_block (bb1);
1616 /* Bbs for which update_debug_stmt need to be called. */
1618 static bitmap update_bbs;
1620 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1621 number of bbs removed. */
1623 static int
1624 apply_clusters (void)
1626 basic_block bb1, bb2;
1627 bb_cluster *c;
1628 unsigned int i, j;
1629 bitmap_iterator bj;
1630 int nr_bbs_removed = 0;
1632 for (i = 0; i < all_clusters.length (); ++i)
1634 c = all_clusters[i];
1635 if (c == NULL)
1636 continue;
1638 bb2 = c->rep_bb;
1639 bitmap_set_bit (update_bbs, bb2->index);
1641 bitmap_clear_bit (c->bbs, bb2->index);
1642 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1644 bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1645 bitmap_clear_bit (update_bbs, bb1->index);
1647 replace_block_by (bb1, bb2);
1648 nr_bbs_removed++;
1652 return nr_bbs_removed;
1655 /* Resets debug statement STMT if it has uses that are not dominated by their
1656 defs. */
1658 static void
1659 update_debug_stmt (gimple *stmt)
1661 use_operand_p use_p;
1662 ssa_op_iter oi;
1663 basic_block bbuse;
1665 if (!gimple_debug_bind_p (stmt))
1666 return;
1668 bbuse = gimple_bb (stmt);
1669 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1671 tree name = USE_FROM_PTR (use_p);
1672 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1673 basic_block bbdef = gimple_bb (def_stmt);
1674 if (bbdef == NULL || bbuse == bbdef
1675 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1676 continue;
1678 gimple_debug_bind_reset_value (stmt);
1679 update_stmt (stmt);
1680 break;
1684 /* Resets all debug statements that have uses that are not
1685 dominated by their defs. */
1687 static void
1688 update_debug_stmts (void)
1690 basic_block bb;
1691 bitmap_iterator bi;
1692 unsigned int i;
1694 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1696 gimple *stmt;
1697 gimple_stmt_iterator gsi;
1699 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1700 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1702 stmt = gsi_stmt (gsi);
1703 if (!is_gimple_debug (stmt))
1704 continue;
1705 update_debug_stmt (stmt);
1710 /* Runs tail merge optimization. */
1712 unsigned int
1713 tail_merge_optimize (unsigned int todo)
1715 int nr_bbs_removed_total = 0;
1716 int nr_bbs_removed;
1717 bool loop_entered = false;
1718 int iteration_nr = 0;
1719 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1721 if (!flag_tree_tail_merge
1722 || max_iterations == 0)
1723 return 0;
1725 timevar_push (TV_TREE_TAIL_MERGE);
1727 /* We enter from PRE which has critical edges split. Elimination
1728 does not process trivially dead code so cleanup the CFG if we
1729 are told so. And re-split critical edges then. */
1730 if (todo & TODO_cleanup_cfg)
1732 cleanup_tree_cfg ();
1733 todo &= ~TODO_cleanup_cfg;
1734 split_critical_edges ();
1737 if (!dom_info_available_p (CDI_DOMINATORS))
1739 /* PRE can leave us with unreachable blocks, remove them now. */
1740 delete_unreachable_blocks ();
1741 calculate_dominance_info (CDI_DOMINATORS);
1743 init_worklist ();
1745 while (!worklist.is_empty ())
1747 if (!loop_entered)
1749 loop_entered = true;
1750 alloc_cluster_vectors ();
1751 update_bbs = BITMAP_ALLOC (NULL);
1753 else
1754 reset_cluster_vectors ();
1756 iteration_nr++;
1757 if (dump_file && (dump_flags & TDF_DETAILS))
1758 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1760 find_clusters ();
1761 gcc_assert (worklist.is_empty ());
1762 if (all_clusters.is_empty ())
1763 break;
1765 nr_bbs_removed = apply_clusters ();
1766 nr_bbs_removed_total += nr_bbs_removed;
1767 if (nr_bbs_removed == 0)
1768 break;
1770 free_dominance_info (CDI_DOMINATORS);
1772 if (iteration_nr == max_iterations)
1773 break;
1775 calculate_dominance_info (CDI_DOMINATORS);
1776 update_worklist ();
1779 if (dump_file && (dump_flags & TDF_DETAILS))
1780 fprintf (dump_file, "htab collision / search: %f\n",
1781 same_succ_htab->collisions ());
1783 if (nr_bbs_removed_total > 0)
1785 if (MAY_HAVE_DEBUG_BIND_STMTS)
1787 calculate_dominance_info (CDI_DOMINATORS);
1788 update_debug_stmts ();
1791 if (dump_file && (dump_flags & TDF_DETAILS))
1793 fprintf (dump_file, "Before TODOs.\n");
1794 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1797 mark_virtual_operands_for_renaming (cfun);
1800 delete_worklist ();
1801 if (loop_entered)
1803 delete_cluster_vectors ();
1804 BITMAP_FREE (update_bbs);
1807 timevar_pop (TV_TREE_TAIL_MERGE);
1809 return todo;