libgo: add misc/cgo files
[official-gcc.git] / gcc / tree-ssa-tail-merge.c
blobf6c9878a0a3510d349eb4ebe4ddbb7db7f7fbd8c
1 /* Tail merging for gimple.
2 Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 Contributed by Tom de Vries (tom@codesourcery.com)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* Pass overview.
24 MOTIVATIONAL EXAMPLE
26 gimple representation of gcc/testsuite/gcc.dg/pr43864.c at
28 hprofStartupp (charD.1 * outputFileNameD.2600, charD.1 * ctxD.2601)
30 struct FILED.1638 * fpD.2605;
31 charD.1 fileNameD.2604[1000];
32 intD.0 D.3915;
33 const charD.1 * restrict outputFileName.0D.3914;
35 # BLOCK 2 freq:10000
36 # PRED: ENTRY [100.0%] (fallthru,exec)
37 # PT = nonlocal { D.3926 } (restr)
38 outputFileName.0D.3914_3
39 = (const charD.1 * restrict) outputFileNameD.2600_2(D);
40 # .MEMD.3923_13 = VDEF <.MEMD.3923_12(D)>
41 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
42 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
43 sprintfD.759 (&fileNameD.2604, outputFileName.0D.3914_3);
44 # .MEMD.3923_14 = VDEF <.MEMD.3923_13>
45 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
46 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
47 D.3915_4 = accessD.2606 (&fileNameD.2604, 1);
48 if (D.3915_4 == 0)
49 goto <bb 3>;
50 else
51 goto <bb 4>;
52 # SUCC: 3 [10.0%] (true,exec) 4 [90.0%] (false,exec)
54 # BLOCK 3 freq:1000
55 # PRED: 2 [10.0%] (true,exec)
56 # .MEMD.3923_15 = VDEF <.MEMD.3923_14>
57 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
58 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
59 freeD.898 (ctxD.2601_5(D));
60 goto <bb 7>;
61 # SUCC: 7 [100.0%] (fallthru,exec)
63 # BLOCK 4 freq:9000
64 # PRED: 2 [90.0%] (false,exec)
65 # .MEMD.3923_16 = VDEF <.MEMD.3923_14>
66 # PT = nonlocal escaped
67 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
68 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
69 fpD.2605_8 = fopenD.1805 (&fileNameD.2604[0], 0B);
70 if (fpD.2605_8 == 0B)
71 goto <bb 5>;
72 else
73 goto <bb 6>;
74 # SUCC: 5 [1.9%] (true,exec) 6 [98.1%] (false,exec)
76 # BLOCK 5 freq:173
77 # PRED: 4 [1.9%] (true,exec)
78 # .MEMD.3923_17 = VDEF <.MEMD.3923_16>
79 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
80 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
81 freeD.898 (ctxD.2601_5(D));
82 goto <bb 7>;
83 # SUCC: 7 [100.0%] (fallthru,exec)
85 # BLOCK 6 freq:8827
86 # PRED: 4 [98.1%] (false,exec)
87 # .MEMD.3923_18 = VDEF <.MEMD.3923_16>
88 # USE = nonlocal null { fileNameD.2604 D.3926 } (restr)
89 # CLB = nonlocal null { fileNameD.2604 D.3926 } (restr)
90 fooD.2599 (outputFileNameD.2600_2(D), fpD.2605_8);
91 # SUCC: 7 [100.0%] (fallthru,exec)
93 # BLOCK 7 freq:10000
94 # PRED: 3 [100.0%] (fallthru,exec) 5 [100.0%] (fallthru,exec)
95 6 [100.0%] (fallthru,exec)
96 # PT = nonlocal null
98 # ctxD.2601_1 = PHI <0B(3), 0B(5), ctxD.2601_5(D)(6)>
99 # .MEMD.3923_11 = PHI <.MEMD.3923_15(3), .MEMD.3923_17(5),
100 .MEMD.3923_18(6)>
101 # VUSE <.MEMD.3923_11>
102 return ctxD.2601_1;
103 # SUCC: EXIT [100.0%]
106 bb 3 and bb 5 can be merged. The blocks have different predecessors, but the
107 same successors, and the same operations.
110 CONTEXT
112 A technique called tail merging (or cross jumping) can fix the example
113 above. For a block, we look for common code at the end (the tail) of the
114 predecessor blocks, and insert jumps from one block to the other.
115 The example is a special case for tail merging, in that 2 whole blocks
116 can be merged, rather than just the end parts of it.
117 We currently only focus on whole block merging, so in that sense
118 calling this pass tail merge is a bit of a misnomer.
120 We distinguish 2 kinds of situations in which blocks can be merged:
121 - same operations, same predecessors. The successor edges coming from one
122 block are redirected to come from the other block.
123 - same operations, same successors. The predecessor edges entering one block
124 are redirected to enter the other block. Note that this operation might
125 involve introducing phi operations.
127 For efficient implementation, we would like to value numbers the blocks, and
128 have a comparison operator that tells us whether the blocks are equal.
129 Besides being runtime efficient, block value numbering should also abstract
130 from irrelevant differences in order of operations, much like normal value
131 numbering abstracts from irrelevant order of operations.
133 For the first situation (same_operations, same predecessors), normal value
134 numbering fits well. We can calculate a block value number based on the
135 value numbers of the defs and vdefs.
137 For the second situation (same operations, same successors), this approach
138 doesn't work so well. We can illustrate this using the example. The calls
139 to free use different vdefs: MEMD.3923_16 and MEMD.3923_14, and these will
140 remain different in value numbering, since they represent different memory
141 states. So the resulting vdefs of the frees will be different in value
142 numbering, so the block value numbers will be different.
144 The reason why we call the blocks equal is not because they define the same
145 values, but because uses in the blocks use (possibly different) defs in the
146 same way. To be able to detect this efficiently, we need to do some kind of
147 reverse value numbering, meaning number the uses rather than the defs, and
148 calculate a block value number based on the value number of the uses.
149 Ideally, a block comparison operator will also indicate which phis are needed
150 to merge the blocks.
152 For the moment, we don't do block value numbering, but we do insn-by-insn
153 matching, using scc value numbers to match operations with results, and
154 structural comparison otherwise, while ignoring vop mismatches.
157 IMPLEMENTATION
159 1. The pass first determines all groups of blocks with the same successor
160 blocks.
161 2. Within each group, it tries to determine clusters of equal basic blocks.
162 3. The clusters are applied.
163 4. The same successor groups are updated.
164 5. This process is repeated from 2 onwards, until no more changes.
167 LIMITATIONS/TODO
169 - block only
170 - handles only 'same operations, same successors'.
171 It handles same predecessors as a special subcase though.
172 - does not implement the reverse value numbering and block value numbering.
173 - improve memory allocation: use garbage collected memory, obstacks,
174 allocpools where appropriate.
175 - no insertion of gimple_reg phis, We only introduce vop-phis.
176 - handle blocks with gimple_reg phi_nodes.
179 PASS PLACEMENT
180 This 'pass' is not a stand-alone gimple pass, but runs as part of
181 pass_pre, in order to share the value numbering.
184 SWITCHES
186 - ftree-tail-merge. On at -O2. We may have to enable it only at -Os. */
188 #include "config.h"
189 #include "system.h"
190 #include "coretypes.h"
191 #include "backend.h"
192 #include "tree.h"
193 #include "gimple.h"
194 #include "cfghooks.h"
195 #include "tree-pass.h"
196 #include "ssa.h"
197 #include "fold-const.h"
198 #include "trans-mem.h"
199 #include "cfganal.h"
200 #include "cfgcleanup.h"
201 #include "gimple-iterator.h"
202 #include "tree-cfg.h"
203 #include "tree-into-ssa.h"
204 #include "params.h"
205 #include "tree-ssa-sccvn.h"
206 #include "cfgloop.h"
207 #include "tree-eh.h"
208 #include "tree-cfgcleanup.h"
210 /* Describes a group of bbs with the same successors. The successor bbs are
211 cached in succs, and the successor edge flags are cached in succ_flags.
212 If a bb has the EDGE_TRUE/FALSE_VALUE flags swapped compared to succ_flags,
213 it's marked in inverse.
214 Additionally, the hash value for the struct is cached in hashval, and
215 in_worklist indicates whether it's currently part of worklist. */
217 struct same_succ : pointer_hash <same_succ>
219 /* The bbs that have the same successor bbs. */
220 bitmap bbs;
221 /* The successor bbs. */
222 bitmap succs;
223 /* Indicates whether the EDGE_TRUE/FALSE_VALUEs of succ_flags are swapped for
224 bb. */
225 bitmap inverse;
226 /* The edge flags for each of the successor bbs. */
227 vec<int> succ_flags;
228 /* Indicates whether the struct is currently in the worklist. */
229 bool in_worklist;
230 /* The hash value of the struct. */
231 hashval_t hashval;
233 /* hash_table support. */
234 static inline hashval_t hash (const same_succ *);
235 static int equal (const same_succ *, const same_succ *);
236 static void remove (same_succ *);
239 /* hash routine for hash_table support, returns hashval of E. */
241 inline hashval_t
242 same_succ::hash (const same_succ *e)
244 return e->hashval;
247 /* A group of bbs where 1 bb from bbs can replace the other bbs. */
249 struct bb_cluster
251 /* The bbs in the cluster. */
252 bitmap bbs;
253 /* The preds of the bbs in the cluster. */
254 bitmap preds;
255 /* Index in all_clusters vector. */
256 int index;
257 /* The bb to replace the cluster with. */
258 basic_block rep_bb;
261 /* Per bb-info. */
263 struct aux_bb_info
265 /* The number of non-debug statements in the bb. */
266 int size;
267 /* The same_succ that this bb is a member of. */
268 same_succ *bb_same_succ;
269 /* The cluster that this bb is a member of. */
270 bb_cluster *cluster;
271 /* The vop state at the exit of a bb. This is shortlived data, used to
272 communicate data between update_block_by and update_vuses. */
273 tree vop_at_exit;
274 /* The bb that either contains or is dominated by the dependencies of the
275 bb. */
276 basic_block dep_bb;
279 /* Macros to access the fields of struct aux_bb_info. */
281 #define BB_SIZE(bb) (((struct aux_bb_info *)bb->aux)->size)
282 #define BB_SAME_SUCC(bb) (((struct aux_bb_info *)bb->aux)->bb_same_succ)
283 #define BB_CLUSTER(bb) (((struct aux_bb_info *)bb->aux)->cluster)
284 #define BB_VOP_AT_EXIT(bb) (((struct aux_bb_info *)bb->aux)->vop_at_exit)
285 #define BB_DEP_BB(bb) (((struct aux_bb_info *)bb->aux)->dep_bb)
287 /* Returns true if the only effect a statement STMT has, is to define locally
288 used SSA_NAMEs. */
290 static bool
291 stmt_local_def (gimple *stmt)
293 basic_block bb, def_bb;
294 imm_use_iterator iter;
295 use_operand_p use_p;
296 tree val;
297 def_operand_p def_p;
299 if (gimple_vdef (stmt) != NULL_TREE
300 || gimple_has_side_effects (stmt)
301 || gimple_could_trap_p_1 (stmt, false, false)
302 || gimple_vuse (stmt) != NULL_TREE)
303 return false;
305 def_p = SINGLE_SSA_DEF_OPERAND (stmt, SSA_OP_DEF);
306 if (def_p == NULL)
307 return false;
309 val = DEF_FROM_PTR (def_p);
310 if (val == NULL_TREE || TREE_CODE (val) != SSA_NAME)
311 return false;
313 def_bb = gimple_bb (stmt);
315 FOR_EACH_IMM_USE_FAST (use_p, iter, val)
317 if (is_gimple_debug (USE_STMT (use_p)))
318 continue;
319 bb = gimple_bb (USE_STMT (use_p));
320 if (bb == def_bb)
321 continue;
323 if (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI
324 && EDGE_PRED (bb, PHI_ARG_INDEX_FROM_USE (use_p))->src == def_bb)
325 continue;
327 return false;
330 return true;
333 /* Let GSI skip forwards over local defs. */
335 static void
336 gsi_advance_fw_nondebug_nonlocal (gimple_stmt_iterator *gsi)
338 gimple *stmt;
340 while (true)
342 if (gsi_end_p (*gsi))
343 return;
344 stmt = gsi_stmt (*gsi);
345 if (!stmt_local_def (stmt))
346 return;
347 gsi_next_nondebug (gsi);
351 /* VAL1 and VAL2 are either:
352 - uses in BB1 and BB2, or
353 - phi alternatives for BB1 and BB2.
354 Return true if the uses have the same gvn value. */
356 static bool
357 gvn_uses_equal (tree val1, tree val2)
359 gcc_checking_assert (val1 != NULL_TREE && val2 != NULL_TREE);
361 if (val1 == val2)
362 return true;
364 if (vn_valueize (val1) != vn_valueize (val2))
365 return false;
367 return ((TREE_CODE (val1) == SSA_NAME || CONSTANT_CLASS_P (val1))
368 && (TREE_CODE (val2) == SSA_NAME || CONSTANT_CLASS_P (val2)));
371 /* Prints E to FILE. */
373 static void
374 same_succ_print (FILE *file, const same_succ *e)
376 unsigned int i;
377 bitmap_print (file, e->bbs, "bbs:", "\n");
378 bitmap_print (file, e->succs, "succs:", "\n");
379 bitmap_print (file, e->inverse, "inverse:", "\n");
380 fprintf (file, "flags:");
381 for (i = 0; i < e->succ_flags.length (); ++i)
382 fprintf (file, " %x", e->succ_flags[i]);
383 fprintf (file, "\n");
386 /* Prints same_succ VE to VFILE. */
388 inline int
389 ssa_same_succ_print_traverse (same_succ **pe, FILE *file)
391 const same_succ *e = *pe;
392 same_succ_print (file, e);
393 return 1;
396 /* Update BB_DEP_BB (USE_BB), given a use of VAL in USE_BB. */
398 static void
399 update_dep_bb (basic_block use_bb, tree val)
401 basic_block dep_bb;
403 /* Not a dep. */
404 if (TREE_CODE (val) != SSA_NAME)
405 return;
407 /* Skip use of global def. */
408 if (SSA_NAME_IS_DEFAULT_DEF (val))
409 return;
411 /* Skip use of local def. */
412 dep_bb = gimple_bb (SSA_NAME_DEF_STMT (val));
413 if (dep_bb == use_bb)
414 return;
416 if (BB_DEP_BB (use_bb) == NULL
417 || dominated_by_p (CDI_DOMINATORS, dep_bb, BB_DEP_BB (use_bb)))
418 BB_DEP_BB (use_bb) = dep_bb;
421 /* Update BB_DEP_BB, given the dependencies in STMT. */
423 static void
424 stmt_update_dep_bb (gimple *stmt)
426 ssa_op_iter iter;
427 use_operand_p use;
429 FOR_EACH_SSA_USE_OPERAND (use, stmt, iter, SSA_OP_USE)
430 update_dep_bb (gimple_bb (stmt), USE_FROM_PTR (use));
433 /* Calculates hash value for same_succ VE. */
435 static hashval_t
436 same_succ_hash (const same_succ *e)
438 inchash::hash hstate (bitmap_hash (e->succs));
439 int flags;
440 unsigned int i;
441 unsigned int first = bitmap_first_set_bit (e->bbs);
442 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, first);
443 int size = 0;
444 gimple *stmt;
445 tree arg;
446 unsigned int s;
447 bitmap_iterator bs;
449 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
450 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
452 stmt = gsi_stmt (gsi);
453 stmt_update_dep_bb (stmt);
454 if (stmt_local_def (stmt))
455 continue;
456 size++;
458 hstate.add_int (gimple_code (stmt));
459 if (is_gimple_assign (stmt))
460 hstate.add_int (gimple_assign_rhs_code (stmt));
461 if (!is_gimple_call (stmt))
462 continue;
463 if (gimple_call_internal_p (stmt))
464 hstate.add_int (gimple_call_internal_fn (stmt));
465 else
467 inchash::add_expr (gimple_call_fn (stmt), hstate);
468 if (gimple_call_chain (stmt))
469 inchash::add_expr (gimple_call_chain (stmt), hstate);
471 for (i = 0; i < gimple_call_num_args (stmt); i++)
473 arg = gimple_call_arg (stmt, i);
474 arg = vn_valueize (arg);
475 inchash::add_expr (arg, hstate);
479 hstate.add_int (size);
480 BB_SIZE (bb) = size;
482 for (i = 0; i < e->succ_flags.length (); ++i)
484 flags = e->succ_flags[i];
485 flags = flags & ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
486 hstate.add_int (flags);
489 EXECUTE_IF_SET_IN_BITMAP (e->succs, 0, s, bs)
491 int n = find_edge (bb, BASIC_BLOCK_FOR_FN (cfun, s))->dest_idx;
492 for (gphi_iterator gsi = gsi_start_phis (BASIC_BLOCK_FOR_FN (cfun, s));
493 !gsi_end_p (gsi);
494 gsi_next (&gsi))
496 gphi *phi = gsi.phi ();
497 tree lhs = gimple_phi_result (phi);
498 tree val = gimple_phi_arg_def (phi, n);
500 if (virtual_operand_p (lhs))
501 continue;
502 update_dep_bb (bb, val);
506 return hstate.end ();
509 /* Returns true if E1 and E2 have 2 successors, and if the successor flags
510 are inverse for the EDGE_TRUE_VALUE and EDGE_FALSE_VALUE flags, and equal for
511 the other edge flags. */
513 static bool
514 inverse_flags (const same_succ *e1, const same_succ *e2)
516 int f1a, f1b, f2a, f2b;
517 int mask = ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
519 if (e1->succ_flags.length () != 2)
520 return false;
522 f1a = e1->succ_flags[0];
523 f1b = e1->succ_flags[1];
524 f2a = e2->succ_flags[0];
525 f2b = e2->succ_flags[1];
527 if (f1a == f2a && f1b == f2b)
528 return false;
530 return (f1a & mask) == (f2a & mask) && (f1b & mask) == (f2b & mask);
533 /* Compares SAME_SUCCs E1 and E2. */
536 same_succ::equal (const same_succ *e1, const same_succ *e2)
538 unsigned int i, first1, first2;
539 gimple_stmt_iterator gsi1, gsi2;
540 gimple *s1, *s2;
541 basic_block bb1, bb2;
543 if (e1 == e2)
544 return 1;
546 if (e1->hashval != e2->hashval)
547 return 0;
549 if (e1->succ_flags.length () != e2->succ_flags.length ())
550 return 0;
552 if (!bitmap_equal_p (e1->succs, e2->succs))
553 return 0;
555 if (!inverse_flags (e1, e2))
557 for (i = 0; i < e1->succ_flags.length (); ++i)
558 if (e1->succ_flags[i] != e2->succ_flags[i])
559 return 0;
562 first1 = bitmap_first_set_bit (e1->bbs);
563 first2 = bitmap_first_set_bit (e2->bbs);
565 bb1 = BASIC_BLOCK_FOR_FN (cfun, first1);
566 bb2 = BASIC_BLOCK_FOR_FN (cfun, first2);
568 if (BB_SIZE (bb1) != BB_SIZE (bb2))
569 return 0;
571 gsi1 = gsi_start_nondebug_bb (bb1);
572 gsi2 = gsi_start_nondebug_bb (bb2);
573 gsi_advance_fw_nondebug_nonlocal (&gsi1);
574 gsi_advance_fw_nondebug_nonlocal (&gsi2);
575 while (!(gsi_end_p (gsi1) || gsi_end_p (gsi2)))
577 s1 = gsi_stmt (gsi1);
578 s2 = gsi_stmt (gsi2);
579 if (gimple_code (s1) != gimple_code (s2))
580 return 0;
581 if (is_gimple_call (s1) && !gimple_call_same_target_p (s1, s2))
582 return 0;
583 gsi_next_nondebug (&gsi1);
584 gsi_next_nondebug (&gsi2);
585 gsi_advance_fw_nondebug_nonlocal (&gsi1);
586 gsi_advance_fw_nondebug_nonlocal (&gsi2);
589 return 1;
592 /* Alloc and init a new SAME_SUCC. */
594 static same_succ *
595 same_succ_alloc (void)
597 same_succ *same = XNEW (struct same_succ);
599 same->bbs = BITMAP_ALLOC (NULL);
600 same->succs = BITMAP_ALLOC (NULL);
601 same->inverse = BITMAP_ALLOC (NULL);
602 same->succ_flags.create (10);
603 same->in_worklist = false;
605 return same;
608 /* Delete same_succ E. */
610 void
611 same_succ::remove (same_succ *e)
613 BITMAP_FREE (e->bbs);
614 BITMAP_FREE (e->succs);
615 BITMAP_FREE (e->inverse);
616 e->succ_flags.release ();
618 XDELETE (e);
621 /* Reset same_succ SAME. */
623 static void
624 same_succ_reset (same_succ *same)
626 bitmap_clear (same->bbs);
627 bitmap_clear (same->succs);
628 bitmap_clear (same->inverse);
629 same->succ_flags.truncate (0);
632 static hash_table<same_succ> *same_succ_htab;
634 /* Array that is used to store the edge flags for a successor. */
636 static int *same_succ_edge_flags;
638 /* Bitmap that is used to mark bbs that are recently deleted. */
640 static bitmap deleted_bbs;
642 /* Bitmap that is used to mark predecessors of bbs that are
643 deleted. */
645 static bitmap deleted_bb_preds;
647 /* Prints same_succ_htab to stderr. */
649 extern void debug_same_succ (void);
650 DEBUG_FUNCTION void
651 debug_same_succ ( void)
653 same_succ_htab->traverse <FILE *, ssa_same_succ_print_traverse> (stderr);
657 /* Vector of bbs to process. */
659 static vec<same_succ *> worklist;
661 /* Prints worklist to FILE. */
663 static void
664 print_worklist (FILE *file)
666 unsigned int i;
667 for (i = 0; i < worklist.length (); ++i)
668 same_succ_print (file, worklist[i]);
671 /* Adds SAME to worklist. */
673 static void
674 add_to_worklist (same_succ *same)
676 if (same->in_worklist)
677 return;
679 if (bitmap_count_bits (same->bbs) < 2)
680 return;
682 same->in_worklist = true;
683 worklist.safe_push (same);
686 /* Add BB to same_succ_htab. */
688 static void
689 find_same_succ_bb (basic_block bb, same_succ **same_p)
691 unsigned int j;
692 bitmap_iterator bj;
693 same_succ *same = *same_p;
694 same_succ **slot;
695 edge_iterator ei;
696 edge e;
698 if (bb == NULL
699 /* Be conservative with loop structure. It's not evident that this test
700 is sufficient. Before tail-merge, we've just called
701 loop_optimizer_finalize, and LOOPS_MAY_HAVE_MULTIPLE_LATCHES is now
702 set, so there's no guarantee that the loop->latch value is still valid.
703 But we assume that, since we've forced LOOPS_HAVE_SIMPLE_LATCHES at the
704 start of pre, we've kept that property intact throughout pre, and are
705 keeping it throughout tail-merge using this test. */
706 || bb->loop_father->latch == bb)
707 return;
708 bitmap_set_bit (same->bbs, bb->index);
709 FOR_EACH_EDGE (e, ei, bb->succs)
711 int index = e->dest->index;
712 bitmap_set_bit (same->succs, index);
713 same_succ_edge_flags[index] = e->flags;
715 EXECUTE_IF_SET_IN_BITMAP (same->succs, 0, j, bj)
716 same->succ_flags.safe_push (same_succ_edge_flags[j]);
718 same->hashval = same_succ_hash (same);
720 slot = same_succ_htab->find_slot_with_hash (same, same->hashval, INSERT);
721 if (*slot == NULL)
723 *slot = same;
724 BB_SAME_SUCC (bb) = same;
725 add_to_worklist (same);
726 *same_p = NULL;
728 else
730 bitmap_set_bit ((*slot)->bbs, bb->index);
731 BB_SAME_SUCC (bb) = *slot;
732 add_to_worklist (*slot);
733 if (inverse_flags (same, *slot))
734 bitmap_set_bit ((*slot)->inverse, bb->index);
735 same_succ_reset (same);
739 /* Find bbs with same successors. */
741 static void
742 find_same_succ (void)
744 same_succ *same = same_succ_alloc ();
745 basic_block bb;
747 FOR_EACH_BB_FN (bb, cfun)
749 find_same_succ_bb (bb, &same);
750 if (same == NULL)
751 same = same_succ_alloc ();
754 same_succ::remove (same);
757 /* Initializes worklist administration. */
759 static void
760 init_worklist (void)
762 alloc_aux_for_blocks (sizeof (struct aux_bb_info));
763 same_succ_htab = new hash_table<same_succ> (n_basic_blocks_for_fn (cfun));
764 same_succ_edge_flags = XCNEWVEC (int, last_basic_block_for_fn (cfun));
765 deleted_bbs = BITMAP_ALLOC (NULL);
766 deleted_bb_preds = BITMAP_ALLOC (NULL);
767 worklist.create (n_basic_blocks_for_fn (cfun));
768 find_same_succ ();
770 if (dump_file && (dump_flags & TDF_DETAILS))
772 fprintf (dump_file, "initial worklist:\n");
773 print_worklist (dump_file);
777 /* Deletes worklist administration. */
779 static void
780 delete_worklist (void)
782 free_aux_for_blocks ();
783 delete same_succ_htab;
784 same_succ_htab = NULL;
785 XDELETEVEC (same_succ_edge_flags);
786 same_succ_edge_flags = NULL;
787 BITMAP_FREE (deleted_bbs);
788 BITMAP_FREE (deleted_bb_preds);
789 worklist.release ();
792 /* Mark BB as deleted, and mark its predecessors. */
794 static void
795 mark_basic_block_deleted (basic_block bb)
797 edge e;
798 edge_iterator ei;
800 bitmap_set_bit (deleted_bbs, bb->index);
802 FOR_EACH_EDGE (e, ei, bb->preds)
803 bitmap_set_bit (deleted_bb_preds, e->src->index);
806 /* Removes BB from its corresponding same_succ. */
808 static void
809 same_succ_flush_bb (basic_block bb)
811 same_succ *same = BB_SAME_SUCC (bb);
812 BB_SAME_SUCC (bb) = NULL;
813 if (bitmap_single_bit_set_p (same->bbs))
814 same_succ_htab->remove_elt_with_hash (same, same->hashval);
815 else
816 bitmap_clear_bit (same->bbs, bb->index);
819 /* Removes all bbs in BBS from their corresponding same_succ. */
821 static void
822 same_succ_flush_bbs (bitmap bbs)
824 unsigned int i;
825 bitmap_iterator bi;
827 EXECUTE_IF_SET_IN_BITMAP (bbs, 0, i, bi)
828 same_succ_flush_bb (BASIC_BLOCK_FOR_FN (cfun, i));
831 /* Release the last vdef in BB, either normal or phi result. */
833 static void
834 release_last_vdef (basic_block bb)
836 for (gimple_stmt_iterator i = gsi_last_bb (bb); !gsi_end_p (i);
837 gsi_prev_nondebug (&i))
839 gimple *stmt = gsi_stmt (i);
840 if (gimple_vdef (stmt) == NULL_TREE)
841 continue;
843 mark_virtual_operand_for_renaming (gimple_vdef (stmt));
844 return;
847 for (gphi_iterator i = gsi_start_phis (bb); !gsi_end_p (i);
848 gsi_next (&i))
850 gphi *phi = i.phi ();
851 tree res = gimple_phi_result (phi);
853 if (!virtual_operand_p (res))
854 continue;
856 mark_virtual_phi_result_for_renaming (phi);
857 return;
861 /* For deleted_bb_preds, find bbs with same successors. */
863 static void
864 update_worklist (void)
866 unsigned int i;
867 bitmap_iterator bi;
868 basic_block bb;
869 same_succ *same;
871 bitmap_and_compl_into (deleted_bb_preds, deleted_bbs);
872 bitmap_clear (deleted_bbs);
874 bitmap_clear_bit (deleted_bb_preds, ENTRY_BLOCK);
875 same_succ_flush_bbs (deleted_bb_preds);
877 same = same_succ_alloc ();
878 EXECUTE_IF_SET_IN_BITMAP (deleted_bb_preds, 0, i, bi)
880 bb = BASIC_BLOCK_FOR_FN (cfun, i);
881 gcc_assert (bb != NULL);
882 find_same_succ_bb (bb, &same);
883 if (same == NULL)
884 same = same_succ_alloc ();
886 same_succ::remove (same);
887 bitmap_clear (deleted_bb_preds);
890 /* Prints cluster C to FILE. */
892 static void
893 print_cluster (FILE *file, bb_cluster *c)
895 if (c == NULL)
896 return;
897 bitmap_print (file, c->bbs, "bbs:", "\n");
898 bitmap_print (file, c->preds, "preds:", "\n");
901 /* Prints cluster C to stderr. */
903 extern void debug_cluster (bb_cluster *);
904 DEBUG_FUNCTION void
905 debug_cluster (bb_cluster *c)
907 print_cluster (stderr, c);
910 /* Update C->rep_bb, given that BB is added to the cluster. */
912 static void
913 update_rep_bb (bb_cluster *c, basic_block bb)
915 /* Initial. */
916 if (c->rep_bb == NULL)
918 c->rep_bb = bb;
919 return;
922 /* Current needs no deps, keep it. */
923 if (BB_DEP_BB (c->rep_bb) == NULL)
924 return;
926 /* Bb needs no deps, change rep_bb. */
927 if (BB_DEP_BB (bb) == NULL)
929 c->rep_bb = bb;
930 return;
933 /* Bb needs last deps earlier than current, change rep_bb. A potential
934 problem with this, is that the first deps might also be earlier, which
935 would mean we prefer longer lifetimes for the deps. To be able to check
936 for this, we would have to trace BB_FIRST_DEP_BB as well, besides
937 BB_DEP_BB, which is really BB_LAST_DEP_BB.
938 The benefit of choosing the bb with last deps earlier, is that it can
939 potentially be used as replacement for more bbs. */
940 if (dominated_by_p (CDI_DOMINATORS, BB_DEP_BB (c->rep_bb), BB_DEP_BB (bb)))
941 c->rep_bb = bb;
944 /* Add BB to cluster C. Sets BB in C->bbs, and preds of BB in C->preds. */
946 static void
947 add_bb_to_cluster (bb_cluster *c, basic_block bb)
949 edge e;
950 edge_iterator ei;
952 bitmap_set_bit (c->bbs, bb->index);
954 FOR_EACH_EDGE (e, ei, bb->preds)
955 bitmap_set_bit (c->preds, e->src->index);
957 update_rep_bb (c, bb);
960 /* Allocate and init new cluster. */
962 static bb_cluster *
963 new_cluster (void)
965 bb_cluster *c;
966 c = XCNEW (bb_cluster);
967 c->bbs = BITMAP_ALLOC (NULL);
968 c->preds = BITMAP_ALLOC (NULL);
969 c->rep_bb = NULL;
970 return c;
973 /* Delete clusters. */
975 static void
976 delete_cluster (bb_cluster *c)
978 if (c == NULL)
979 return;
980 BITMAP_FREE (c->bbs);
981 BITMAP_FREE (c->preds);
982 XDELETE (c);
986 /* Array that contains all clusters. */
988 static vec<bb_cluster *> all_clusters;
990 /* Allocate all cluster vectors. */
992 static void
993 alloc_cluster_vectors (void)
995 all_clusters.create (n_basic_blocks_for_fn (cfun));
998 /* Reset all cluster vectors. */
1000 static void
1001 reset_cluster_vectors (void)
1003 unsigned int i;
1004 basic_block bb;
1005 for (i = 0; i < all_clusters.length (); ++i)
1006 delete_cluster (all_clusters[i]);
1007 all_clusters.truncate (0);
1008 FOR_EACH_BB_FN (bb, cfun)
1009 BB_CLUSTER (bb) = NULL;
1012 /* Delete all cluster vectors. */
1014 static void
1015 delete_cluster_vectors (void)
1017 unsigned int i;
1018 for (i = 0; i < all_clusters.length (); ++i)
1019 delete_cluster (all_clusters[i]);
1020 all_clusters.release ();
1023 /* Merge cluster C2 into C1. */
1025 static void
1026 merge_clusters (bb_cluster *c1, bb_cluster *c2)
1028 bitmap_ior_into (c1->bbs, c2->bbs);
1029 bitmap_ior_into (c1->preds, c2->preds);
1032 /* Register equivalence of BB1 and BB2 (members of cluster C). Store c in
1033 all_clusters, or merge c with existing cluster. */
1035 static void
1036 set_cluster (basic_block bb1, basic_block bb2)
1038 basic_block merge_bb, other_bb;
1039 bb_cluster *merge, *old, *c;
1041 if (BB_CLUSTER (bb1) == NULL && BB_CLUSTER (bb2) == NULL)
1043 c = new_cluster ();
1044 add_bb_to_cluster (c, bb1);
1045 add_bb_to_cluster (c, bb2);
1046 BB_CLUSTER (bb1) = c;
1047 BB_CLUSTER (bb2) = c;
1048 c->index = all_clusters.length ();
1049 all_clusters.safe_push (c);
1051 else if (BB_CLUSTER (bb1) == NULL || BB_CLUSTER (bb2) == NULL)
1053 merge_bb = BB_CLUSTER (bb1) == NULL ? bb2 : bb1;
1054 other_bb = BB_CLUSTER (bb1) == NULL ? bb1 : bb2;
1055 merge = BB_CLUSTER (merge_bb);
1056 add_bb_to_cluster (merge, other_bb);
1057 BB_CLUSTER (other_bb) = merge;
1059 else if (BB_CLUSTER (bb1) != BB_CLUSTER (bb2))
1061 unsigned int i;
1062 bitmap_iterator bi;
1064 old = BB_CLUSTER (bb2);
1065 merge = BB_CLUSTER (bb1);
1066 merge_clusters (merge, old);
1067 EXECUTE_IF_SET_IN_BITMAP (old->bbs, 0, i, bi)
1068 BB_CLUSTER (BASIC_BLOCK_FOR_FN (cfun, i)) = merge;
1069 all_clusters[old->index] = NULL;
1070 update_rep_bb (merge, old->rep_bb);
1071 delete_cluster (old);
1073 else
1074 gcc_unreachable ();
1077 /* Return true if gimple operands T1 and T2 have the same value. */
1079 static bool
1080 gimple_operand_equal_value_p (tree t1, tree t2)
1082 if (t1 == t2)
1083 return true;
1085 if (t1 == NULL_TREE
1086 || t2 == NULL_TREE)
1087 return false;
1089 if (operand_equal_p (t1, t2, OEP_MATCH_SIDE_EFFECTS))
1090 return true;
1092 return gvn_uses_equal (t1, t2);
1095 /* Return true if gimple statements S1 and S2 are equal. Gimple_bb (s1) and
1096 gimple_bb (s2) are members of SAME_SUCC. */
1098 static bool
1099 gimple_equal_p (same_succ *same_succ, gimple *s1, gimple *s2)
1101 unsigned int i;
1102 tree lhs1, lhs2;
1103 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
1104 tree t1, t2;
1105 bool inv_cond;
1106 enum tree_code code1, code2;
1108 if (gimple_code (s1) != gimple_code (s2))
1109 return false;
1111 switch (gimple_code (s1))
1113 case GIMPLE_CALL:
1114 if (!gimple_call_same_target_p (s1, s2))
1115 return false;
1117 t1 = gimple_call_chain (s1);
1118 t2 = gimple_call_chain (s2);
1119 if (!gimple_operand_equal_value_p (t1, t2))
1120 return false;
1122 if (gimple_call_num_args (s1) != gimple_call_num_args (s2))
1123 return false;
1125 for (i = 0; i < gimple_call_num_args (s1); ++i)
1127 t1 = gimple_call_arg (s1, i);
1128 t2 = gimple_call_arg (s2, i);
1129 if (!gimple_operand_equal_value_p (t1, t2))
1130 return false;
1133 lhs1 = gimple_get_lhs (s1);
1134 lhs2 = gimple_get_lhs (s2);
1135 if (lhs1 == NULL_TREE && lhs2 == NULL_TREE)
1136 return true;
1137 if (lhs1 == NULL_TREE || lhs2 == NULL_TREE)
1138 return false;
1139 if (TREE_CODE (lhs1) == SSA_NAME && TREE_CODE (lhs2) == SSA_NAME)
1140 return vn_valueize (lhs1) == vn_valueize (lhs2);
1141 return operand_equal_p (lhs1, lhs2, 0);
1143 case GIMPLE_ASSIGN:
1144 lhs1 = gimple_get_lhs (s1);
1145 lhs2 = gimple_get_lhs (s2);
1146 if (TREE_CODE (lhs1) != SSA_NAME
1147 && TREE_CODE (lhs2) != SSA_NAME)
1148 return (operand_equal_p (lhs1, lhs2, 0)
1149 && gimple_operand_equal_value_p (gimple_assign_rhs1 (s1),
1150 gimple_assign_rhs1 (s2)));
1151 else if (TREE_CODE (lhs1) == SSA_NAME
1152 && TREE_CODE (lhs2) == SSA_NAME)
1153 return operand_equal_p (gimple_assign_rhs1 (s1),
1154 gimple_assign_rhs1 (s2), 0);
1155 return false;
1157 case GIMPLE_COND:
1158 t1 = gimple_cond_lhs (s1);
1159 t2 = gimple_cond_lhs (s2);
1160 if (!gimple_operand_equal_value_p (t1, t2))
1161 return false;
1163 t1 = gimple_cond_rhs (s1);
1164 t2 = gimple_cond_rhs (s2);
1165 if (!gimple_operand_equal_value_p (t1, t2))
1166 return false;
1168 code1 = gimple_expr_code (s1);
1169 code2 = gimple_expr_code (s2);
1170 inv_cond = (bitmap_bit_p (same_succ->inverse, bb1->index)
1171 != bitmap_bit_p (same_succ->inverse, bb2->index));
1172 if (inv_cond)
1174 bool honor_nans = HONOR_NANS (t1);
1175 code2 = invert_tree_comparison (code2, honor_nans);
1177 return code1 == code2;
1179 default:
1180 return false;
1184 /* Let GSI skip backwards over local defs. Return the earliest vuse in VUSE.
1185 Return true in VUSE_ESCAPED if the vuse influenced a SSA_OP_DEF of one of the
1186 processed statements. */
1188 static void
1189 gsi_advance_bw_nondebug_nonlocal (gimple_stmt_iterator *gsi, tree *vuse,
1190 bool *vuse_escaped)
1192 gimple *stmt;
1193 tree lvuse;
1195 while (true)
1197 if (gsi_end_p (*gsi))
1198 return;
1199 stmt = gsi_stmt (*gsi);
1201 lvuse = gimple_vuse (stmt);
1202 if (lvuse != NULL_TREE)
1204 *vuse = lvuse;
1205 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_DEF))
1206 *vuse_escaped = true;
1209 if (!stmt_local_def (stmt))
1210 return;
1211 gsi_prev_nondebug (gsi);
1215 /* Return true if equal (in the sense of gimple_equal_p) statements STMT1 and
1216 STMT2 are allowed to be merged. */
1218 static bool
1219 merge_stmts_p (gimple *stmt1, gimple *stmt2)
1221 /* What could be better than this here is to blacklist the bb
1222 containing the stmt, when encountering the stmt f.i. in
1223 same_succ_hash. */
1224 if (is_tm_ending (stmt1))
1225 return false;
1227 /* Verify EH landing pads. */
1228 if (lookup_stmt_eh_lp_fn (cfun, stmt1) != lookup_stmt_eh_lp_fn (cfun, stmt2))
1229 return false;
1231 if (is_gimple_call (stmt1)
1232 && gimple_call_internal_p (stmt1))
1233 switch (gimple_call_internal_fn (stmt1))
1235 case IFN_UBSAN_NULL:
1236 case IFN_UBSAN_BOUNDS:
1237 case IFN_UBSAN_VPTR:
1238 case IFN_UBSAN_CHECK_ADD:
1239 case IFN_UBSAN_CHECK_SUB:
1240 case IFN_UBSAN_CHECK_MUL:
1241 case IFN_UBSAN_OBJECT_SIZE:
1242 case IFN_ASAN_CHECK:
1243 /* For these internal functions, gimple_location is an implicit
1244 parameter, which will be used explicitly after expansion.
1245 Merging these statements may cause confusing line numbers in
1246 sanitizer messages. */
1247 return gimple_location (stmt1) == gimple_location (stmt2);
1248 default:
1249 break;
1252 return true;
1255 /* Determines whether BB1 and BB2 (members of same_succ) are duplicates. If so,
1256 clusters them. */
1258 static void
1259 find_duplicate (same_succ *same_succ, basic_block bb1, basic_block bb2)
1261 gimple_stmt_iterator gsi1 = gsi_last_nondebug_bb (bb1);
1262 gimple_stmt_iterator gsi2 = gsi_last_nondebug_bb (bb2);
1263 tree vuse1 = NULL_TREE, vuse2 = NULL_TREE;
1264 bool vuse_escaped = false;
1266 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1267 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1269 while (!gsi_end_p (gsi1) && !gsi_end_p (gsi2))
1271 gimple *stmt1 = gsi_stmt (gsi1);
1272 gimple *stmt2 = gsi_stmt (gsi2);
1274 if (gimple_code (stmt1) == GIMPLE_LABEL
1275 && gimple_code (stmt2) == GIMPLE_LABEL)
1276 break;
1278 if (!gimple_equal_p (same_succ, stmt1, stmt2))
1279 return;
1281 if (!merge_stmts_p (stmt1, stmt2))
1282 return;
1284 gsi_prev_nondebug (&gsi1);
1285 gsi_prev_nondebug (&gsi2);
1286 gsi_advance_bw_nondebug_nonlocal (&gsi1, &vuse1, &vuse_escaped);
1287 gsi_advance_bw_nondebug_nonlocal (&gsi2, &vuse2, &vuse_escaped);
1290 while (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1292 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1293 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1294 return;
1295 gsi_prev (&gsi1);
1297 while (!gsi_end_p (gsi2) && gimple_code (gsi_stmt (gsi2)) == GIMPLE_LABEL)
1299 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi2)));
1300 if (DECL_NONLOCAL (label) || FORCED_LABEL (label))
1301 return;
1302 gsi_prev (&gsi2);
1304 if (!(gsi_end_p (gsi1) && gsi_end_p (gsi2)))
1305 return;
1307 /* If the incoming vuses are not the same, and the vuse escaped into an
1308 SSA_OP_DEF, then merging the 2 blocks will change the value of the def,
1309 which potentially means the semantics of one of the blocks will be changed.
1310 TODO: make this check more precise. */
1311 if (vuse_escaped && vuse1 != vuse2)
1312 return;
1314 if (dump_file)
1315 fprintf (dump_file, "find_duplicates: <bb %d> duplicate of <bb %d>\n",
1316 bb1->index, bb2->index);
1318 set_cluster (bb1, bb2);
1321 /* Returns whether for all phis in DEST the phi alternatives for E1 and
1322 E2 are equal. */
1324 static bool
1325 same_phi_alternatives_1 (basic_block dest, edge e1, edge e2)
1327 int n1 = e1->dest_idx, n2 = e2->dest_idx;
1328 gphi_iterator gsi;
1330 for (gsi = gsi_start_phis (dest); !gsi_end_p (gsi); gsi_next (&gsi))
1332 gphi *phi = gsi.phi ();
1333 tree lhs = gimple_phi_result (phi);
1334 tree val1 = gimple_phi_arg_def (phi, n1);
1335 tree val2 = gimple_phi_arg_def (phi, n2);
1337 if (virtual_operand_p (lhs))
1338 continue;
1340 if (operand_equal_for_phi_arg_p (val1, val2))
1341 continue;
1342 if (gvn_uses_equal (val1, val2))
1343 continue;
1345 return false;
1348 return true;
1351 /* Returns whether for all successors of BB1 and BB2 (members of SAME_SUCC), the
1352 phi alternatives for BB1 and BB2 are equal. */
1354 static bool
1355 same_phi_alternatives (same_succ *same_succ, basic_block bb1, basic_block bb2)
1357 unsigned int s;
1358 bitmap_iterator bs;
1359 edge e1, e2;
1360 basic_block succ;
1362 EXECUTE_IF_SET_IN_BITMAP (same_succ->succs, 0, s, bs)
1364 succ = BASIC_BLOCK_FOR_FN (cfun, s);
1365 e1 = find_edge (bb1, succ);
1366 e2 = find_edge (bb2, succ);
1367 if (e1->flags & EDGE_COMPLEX
1368 || e2->flags & EDGE_COMPLEX)
1369 return false;
1371 /* For all phis in bb, the phi alternatives for e1 and e2 need to have
1372 the same value. */
1373 if (!same_phi_alternatives_1 (succ, e1, e2))
1374 return false;
1377 return true;
1380 /* Return true if BB has non-vop phis. */
1382 static bool
1383 bb_has_non_vop_phi (basic_block bb)
1385 gimple_seq phis = phi_nodes (bb);
1386 gimple *phi;
1388 if (phis == NULL)
1389 return false;
1391 if (!gimple_seq_singleton_p (phis))
1392 return true;
1394 phi = gimple_seq_first_stmt (phis);
1395 return !virtual_operand_p (gimple_phi_result (phi));
1398 /* Returns true if redirecting the incoming edges of FROM to TO maintains the
1399 invariant that uses in FROM are dominates by their defs. */
1401 static bool
1402 deps_ok_for_redirect_from_bb_to_bb (basic_block from, basic_block to)
1404 basic_block cd, dep_bb = BB_DEP_BB (to);
1405 edge_iterator ei;
1406 edge e;
1408 if (dep_bb == NULL)
1409 return true;
1411 bitmap from_preds = BITMAP_ALLOC (NULL);
1412 FOR_EACH_EDGE (e, ei, from->preds)
1413 bitmap_set_bit (from_preds, e->src->index);
1414 cd = nearest_common_dominator_for_set (CDI_DOMINATORS, from_preds);
1415 BITMAP_FREE (from_preds);
1417 return dominated_by_p (CDI_DOMINATORS, dep_bb, cd);
1420 /* Returns true if replacing BB1 (or its replacement bb) by BB2 (or its
1421 replacement bb) and vice versa maintains the invariant that uses in the
1422 replacement are dominates by their defs. */
1424 static bool
1425 deps_ok_for_redirect (basic_block bb1, basic_block bb2)
1427 if (BB_CLUSTER (bb1) != NULL)
1428 bb1 = BB_CLUSTER (bb1)->rep_bb;
1430 if (BB_CLUSTER (bb2) != NULL)
1431 bb2 = BB_CLUSTER (bb2)->rep_bb;
1433 return (deps_ok_for_redirect_from_bb_to_bb (bb1, bb2)
1434 && deps_ok_for_redirect_from_bb_to_bb (bb2, bb1));
1437 /* Within SAME_SUCC->bbs, find clusters of bbs which can be merged. */
1439 static void
1440 find_clusters_1 (same_succ *same_succ)
1442 basic_block bb1, bb2;
1443 unsigned int i, j;
1444 bitmap_iterator bi, bj;
1445 int nr_comparisons;
1446 int max_comparisons = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_COMPARISONS);
1448 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, 0, i, bi)
1450 bb1 = BASIC_BLOCK_FOR_FN (cfun, i);
1452 /* TODO: handle blocks with phi-nodes. We'll have to find corresponding
1453 phi-nodes in bb1 and bb2, with the same alternatives for the same
1454 preds. */
1455 if (bb_has_non_vop_phi (bb1) || bb_has_eh_pred (bb1))
1456 continue;
1458 nr_comparisons = 0;
1459 EXECUTE_IF_SET_IN_BITMAP (same_succ->bbs, i + 1, j, bj)
1461 bb2 = BASIC_BLOCK_FOR_FN (cfun, j);
1463 if (bb_has_non_vop_phi (bb2) || bb_has_eh_pred (bb2))
1464 continue;
1466 if (BB_CLUSTER (bb1) != NULL && BB_CLUSTER (bb1) == BB_CLUSTER (bb2))
1467 continue;
1469 /* Limit quadratic behavior. */
1470 nr_comparisons++;
1471 if (nr_comparisons > max_comparisons)
1472 break;
1474 /* This is a conservative dependency check. We could test more
1475 precise for allowed replacement direction. */
1476 if (!deps_ok_for_redirect (bb1, bb2))
1477 continue;
1479 if (!(same_phi_alternatives (same_succ, bb1, bb2)))
1480 continue;
1482 find_duplicate (same_succ, bb1, bb2);
1487 /* Find clusters of bbs which can be merged. */
1489 static void
1490 find_clusters (void)
1492 same_succ *same;
1494 while (!worklist.is_empty ())
1496 same = worklist.pop ();
1497 same->in_worklist = false;
1498 if (dump_file && (dump_flags & TDF_DETAILS))
1500 fprintf (dump_file, "processing worklist entry\n");
1501 same_succ_print (dump_file, same);
1503 find_clusters_1 (same);
1507 /* Returns the vop phi of BB, if any. */
1509 static gphi *
1510 vop_phi (basic_block bb)
1512 gphi *stmt;
1513 gphi_iterator gsi;
1514 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1516 stmt = gsi.phi ();
1517 if (! virtual_operand_p (gimple_phi_result (stmt)))
1518 continue;
1519 return stmt;
1521 return NULL;
1524 /* Redirect all edges from BB1 to BB2, removes BB1 and marks it as removed. */
1526 static void
1527 replace_block_by (basic_block bb1, basic_block bb2)
1529 edge pred_edge;
1530 edge e1, e2;
1531 edge_iterator ei;
1532 unsigned int i;
1533 gphi *bb2_phi;
1535 bb2_phi = vop_phi (bb2);
1537 /* Mark the basic block as deleted. */
1538 mark_basic_block_deleted (bb1);
1540 /* Redirect the incoming edges of bb1 to bb2. */
1541 for (i = EDGE_COUNT (bb1->preds); i > 0 ; --i)
1543 pred_edge = EDGE_PRED (bb1, i - 1);
1544 pred_edge = redirect_edge_and_branch (pred_edge, bb2);
1545 gcc_assert (pred_edge != NULL);
1547 if (bb2_phi == NULL)
1548 continue;
1550 /* The phi might have run out of capacity when the redirect added an
1551 argument, which means it could have been replaced. Refresh it. */
1552 bb2_phi = vop_phi (bb2);
1554 add_phi_arg (bb2_phi, SSA_NAME_VAR (gimple_phi_result (bb2_phi)),
1555 pred_edge, UNKNOWN_LOCATION);
1558 bb2->count += bb1->count;
1560 /* Merge the outgoing edge counts from bb1 onto bb2. */
1561 profile_count out_sum = profile_count::zero ();
1562 int out_freq_sum = 0;
1564 /* Recompute the edge probabilities from the new merged edge count.
1565 Use the sum of the new merged edge counts computed above instead
1566 of bb2's merged count, in case there are profile count insanities
1567 making the bb count inconsistent with the edge weights. */
1568 FOR_EACH_EDGE (e1, ei, bb1->succs)
1570 if (e1->count.initialized_p ())
1571 out_sum += e1->count;
1572 out_freq_sum += EDGE_FREQUENCY (e1);
1574 FOR_EACH_EDGE (e1, ei, bb2->succs)
1576 if (e1->count.initialized_p ())
1577 out_sum += e1->count;
1578 out_freq_sum += EDGE_FREQUENCY (e1);
1581 FOR_EACH_EDGE (e1, ei, bb1->succs)
1583 e2 = find_edge (bb2, e1->dest);
1584 gcc_assert (e2);
1585 e2->count += e1->count;
1586 if (out_sum > 0 && e2->count.initialized_p ())
1588 e2->probability = e2->count.probability_in (bb2->count);
1590 else if (bb1->frequency && bb2->frequency)
1591 e2->probability = e1->probability;
1592 else if (bb2->frequency && !bb1->frequency)
1594 else if (out_freq_sum)
1595 e2->probability = GCOV_COMPUTE_SCALE (EDGE_FREQUENCY (e1)
1596 + EDGE_FREQUENCY (e2),
1597 out_freq_sum);
1598 out_sum += e2->count;
1600 bb2->frequency += bb1->frequency;
1601 if (bb2->frequency > BB_FREQ_MAX)
1602 bb2->frequency = BB_FREQ_MAX;
1604 /* Move over any user labels from bb1 after the bb2 labels. */
1605 gimple_stmt_iterator gsi1 = gsi_start_bb (bb1);
1606 if (!gsi_end_p (gsi1) && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1608 gimple_stmt_iterator gsi2 = gsi_after_labels (bb2);
1609 while (!gsi_end_p (gsi1)
1610 && gimple_code (gsi_stmt (gsi1)) == GIMPLE_LABEL)
1612 tree label = gimple_label_label (as_a <glabel *> (gsi_stmt (gsi1)));
1613 gcc_assert (!DECL_NONLOCAL (label) && !FORCED_LABEL (label));
1614 if (DECL_ARTIFICIAL (label))
1615 gsi_next (&gsi1);
1616 else
1617 gsi_move_before (&gsi1, &gsi2);
1621 /* Clear range info from all stmts in BB2 -- this transformation
1622 could make them out of date. */
1623 reset_flow_sensitive_info_in_bb (bb2);
1625 /* Do updates that use bb1, before deleting bb1. */
1626 release_last_vdef (bb1);
1627 same_succ_flush_bb (bb1);
1629 delete_basic_block (bb1);
1632 /* Bbs for which update_debug_stmt need to be called. */
1634 static bitmap update_bbs;
1636 /* For each cluster in all_clusters, merge all cluster->bbs. Returns
1637 number of bbs removed. */
1639 static int
1640 apply_clusters (void)
1642 basic_block bb1, bb2;
1643 bb_cluster *c;
1644 unsigned int i, j;
1645 bitmap_iterator bj;
1646 int nr_bbs_removed = 0;
1648 for (i = 0; i < all_clusters.length (); ++i)
1650 c = all_clusters[i];
1651 if (c == NULL)
1652 continue;
1654 bb2 = c->rep_bb;
1655 bitmap_set_bit (update_bbs, bb2->index);
1657 bitmap_clear_bit (c->bbs, bb2->index);
1658 EXECUTE_IF_SET_IN_BITMAP (c->bbs, 0, j, bj)
1660 bb1 = BASIC_BLOCK_FOR_FN (cfun, j);
1661 bitmap_clear_bit (update_bbs, bb1->index);
1663 replace_block_by (bb1, bb2);
1664 nr_bbs_removed++;
1668 return nr_bbs_removed;
1671 /* Resets debug statement STMT if it has uses that are not dominated by their
1672 defs. */
1674 static void
1675 update_debug_stmt (gimple *stmt)
1677 use_operand_p use_p;
1678 ssa_op_iter oi;
1679 basic_block bbuse;
1681 if (!gimple_debug_bind_p (stmt))
1682 return;
1684 bbuse = gimple_bb (stmt);
1685 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, oi, SSA_OP_USE)
1687 tree name = USE_FROM_PTR (use_p);
1688 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
1689 basic_block bbdef = gimple_bb (def_stmt);
1690 if (bbdef == NULL || bbuse == bbdef
1691 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef))
1692 continue;
1694 gimple_debug_bind_reset_value (stmt);
1695 update_stmt (stmt);
1696 break;
1700 /* Resets all debug statements that have uses that are not
1701 dominated by their defs. */
1703 static void
1704 update_debug_stmts (void)
1706 basic_block bb;
1707 bitmap_iterator bi;
1708 unsigned int i;
1710 EXECUTE_IF_SET_IN_BITMAP (update_bbs, 0, i, bi)
1712 gimple *stmt;
1713 gimple_stmt_iterator gsi;
1715 bb = BASIC_BLOCK_FOR_FN (cfun, i);
1716 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1718 stmt = gsi_stmt (gsi);
1719 if (!is_gimple_debug (stmt))
1720 continue;
1721 update_debug_stmt (stmt);
1726 /* Runs tail merge optimization. */
1728 unsigned int
1729 tail_merge_optimize (unsigned int todo)
1731 int nr_bbs_removed_total = 0;
1732 int nr_bbs_removed;
1733 bool loop_entered = false;
1734 int iteration_nr = 0;
1735 int max_iterations = PARAM_VALUE (PARAM_MAX_TAIL_MERGE_ITERATIONS);
1737 if (!flag_tree_tail_merge
1738 || max_iterations == 0)
1739 return 0;
1741 timevar_push (TV_TREE_TAIL_MERGE);
1743 /* We enter from PRE which has critical edges split. Elimination
1744 does not process trivially dead code so cleanup the CFG if we
1745 are told so. And re-split critical edges then. */
1746 if (todo & TODO_cleanup_cfg)
1748 cleanup_tree_cfg ();
1749 todo &= ~TODO_cleanup_cfg;
1750 split_critical_edges ();
1753 if (!dom_info_available_p (CDI_DOMINATORS))
1755 /* PRE can leave us with unreachable blocks, remove them now. */
1756 delete_unreachable_blocks ();
1757 calculate_dominance_info (CDI_DOMINATORS);
1759 init_worklist ();
1761 while (!worklist.is_empty ())
1763 if (!loop_entered)
1765 loop_entered = true;
1766 alloc_cluster_vectors ();
1767 update_bbs = BITMAP_ALLOC (NULL);
1769 else
1770 reset_cluster_vectors ();
1772 iteration_nr++;
1773 if (dump_file && (dump_flags & TDF_DETAILS))
1774 fprintf (dump_file, "worklist iteration #%d\n", iteration_nr);
1776 find_clusters ();
1777 gcc_assert (worklist.is_empty ());
1778 if (all_clusters.is_empty ())
1779 break;
1781 nr_bbs_removed = apply_clusters ();
1782 nr_bbs_removed_total += nr_bbs_removed;
1783 if (nr_bbs_removed == 0)
1784 break;
1786 free_dominance_info (CDI_DOMINATORS);
1788 if (iteration_nr == max_iterations)
1789 break;
1791 calculate_dominance_info (CDI_DOMINATORS);
1792 update_worklist ();
1795 if (dump_file && (dump_flags & TDF_DETAILS))
1796 fprintf (dump_file, "htab collision / search: %f\n",
1797 same_succ_htab->collisions ());
1799 if (nr_bbs_removed_total > 0)
1801 if (MAY_HAVE_DEBUG_STMTS)
1803 calculate_dominance_info (CDI_DOMINATORS);
1804 update_debug_stmts ();
1807 if (dump_file && (dump_flags & TDF_DETAILS))
1809 fprintf (dump_file, "Before TODOs.\n");
1810 dump_function_to_file (current_function_decl, dump_file, dump_flags);
1813 mark_virtual_operands_for_renaming (cfun);
1816 delete_worklist ();
1817 if (loop_entered)
1819 delete_cluster_vectors ();
1820 BITMAP_FREE (update_bbs);
1823 timevar_pop (TV_TREE_TAIL_MERGE);
1825 return todo;