2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "fold-const.h"
29 #include "gimple-iterator.h"
31 #include "tree-ssa-threadupdate.h"
32 #include "tree-ssa-loop.h"
34 #include "tree-pass.h"
35 #include "gimple-ssa.h"
36 #include "tree-phinodes.h"
37 #include "tree-inline.h"
38 #include "tree-vectorizer.h"
39 #include "value-range.h"
40 #include "gimple-range.h"
41 #include "tree-ssa-threadedge.h"
42 #include "gimple-range-path.h"
44 #include "tree-cfgcleanup.h"
45 #include "tree-pretty-print.h"
49 // Path registry for the backwards threader. After all paths have been
50 // registered with register_path(), thread_through_all_blocks() is called
53 class back_threader_registry
: public back_jt_path_registry
56 bool register_path (const vec
<basic_block
> &, edge taken
);
59 // Class to abstract the profitability code for the backwards threader.
61 class back_threader_profitability
64 back_threader_profitability (bool speed_p
, gimple
*stmt
);
65 bool possibly_profitable_path_p (const vec
<basic_block
> &, tree
, bool *);
66 bool profitable_path_p (const vec
<basic_block
> &,
67 edge taken
, bool *irreducible_loop
);
70 int m_exit_jump_benefit
;
71 bool m_threaded_multiway_branch
;
72 // The following are computed by possibly_profitable_path_p
73 bool m_threaded_through_latch
;
74 bool m_multiway_branch_in_path
;
75 bool m_contains_hot_bb
;
79 back_threader_profitability::back_threader_profitability (bool speed_p
,
83 m_threaded_multiway_branch
= (gimple_code (last
) == GIMPLE_SWITCH
84 || gimple_code (last
) == GIMPLE_GOTO
);
85 // The forward threader has estimate_threading_killed_stmts, in
86 // particular it estimates further DCE from eliminating the exit
88 m_exit_jump_benefit
= estimate_num_insns (last
, &eni_size_weights
);
91 // Back threader flags.
93 // Generate fast code at the expense of code size.
95 // Resolve unknown SSAs on entry to a threading path. If set, use the
96 // ranger. If not, assume all ranges on entry to a path are VARYING.
102 back_threader (function
*fun
, unsigned flags
, bool first
);
104 unsigned thread_blocks ();
106 void maybe_thread_block (basic_block bb
);
107 bool debug_counter ();
108 edge
maybe_register_path (back_threader_profitability
&);
109 void maybe_register_path_dump (edge taken_edge
);
110 void find_paths_to_names (basic_block bb
, bitmap imports
, unsigned,
111 back_threader_profitability
&);
112 edge
find_taken_edge (const vec
<basic_block
> &path
);
113 edge
find_taken_edge_cond (const vec
<basic_block
> &path
, gcond
*);
114 edge
find_taken_edge_switch (const vec
<basic_block
> &path
, gswitch
*);
115 virtual void debug ();
116 virtual void dump (FILE *out
);
118 back_threader_registry m_registry
;
120 // Current path being analyzed.
121 auto_vec
<basic_block
> m_path
;
122 // Hash to mark visited BBs while analyzing a path.
123 hash_set
<basic_block
> m_visited_bbs
;
124 // The set of SSA names, any of which could potentially change the
125 // value of the final conditional in a path.
126 auto_bitmap m_imports
;
127 // The last statement in the path.
129 // This is a bit of a wart. It's used to pass the LHS SSA name to
130 // the profitability engine.
132 // Marker to differentiate unreachable edges.
133 static const edge UNREACHABLE_EDGE
;
134 // Set to TRUE if unknown SSA names along a path should be resolved
135 // with the ranger. Otherwise, unknown SSA names are assumed to be
136 // VARYING. Setting to true is more precise but slower.
138 // Ranger for the path solver.
139 gimple_ranger
*m_ranger
;
141 // Set to TRUE for the first of each thread[12] pass or the first of
142 // each threadfull[12] pass. This is used to differentiate between
143 // the different threading passes so we can set up debug counters.
147 // Used to differentiate unreachable edges, so we may stop the search
148 // in a the given direction.
149 const edge
back_threader::UNREACHABLE_EDGE
= (edge
) -1;
151 back_threader::back_threader (function
*fun
, unsigned flags
, bool first
)
154 if (flags
& BT_SPEED
)
155 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
| LOOPS_HAVE_SIMPLE_LATCHES
);
157 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
163 // The path solver needs EDGE_DFS_BACK in resolving mode.
164 if (flags
& BT_RESOLVE
)
165 mark_dfs_back_edges ();
167 m_ranger
= new gimple_ranger
;
170 back_threader::~back_threader ()
173 loop_optimizer_finalize ();
176 // A wrapper for the various debug counters for the threading passes.
177 // Returns TRUE if it's OK to register the current threading
181 back_threader::debug_counter ()
183 // The ethread pass is mostly harmless ;-).
184 if ((m_flags
& BT_SPEED
) == 0)
187 if (m_flags
& BT_RESOLVE
)
189 if (m_first
&& !dbg_cnt (back_threadfull1
))
192 if (!m_first
&& !dbg_cnt (back_threadfull2
))
197 if (m_first
&& !dbg_cnt (back_thread1
))
200 if (!m_first
&& !dbg_cnt (back_thread2
))
207 dump_path (FILE *dump_file
, const vec
<basic_block
> &path
)
209 for (unsigned i
= path
.length (); i
> 0; --i
)
211 basic_block bb
= path
[i
- 1];
212 fprintf (dump_file
, "%d", bb
->index
);
214 fprintf (dump_file
, "->");
218 // Dump details of an attempt to register a path.
221 back_threader::maybe_register_path_dump (edge taken
)
223 if (m_path
.is_empty ())
226 fprintf (dump_file
, "path: ");
227 dump_path (dump_file
, m_path
);
228 fprintf (dump_file
, "->");
230 if (taken
== UNREACHABLE_EDGE
)
231 fprintf (dump_file
, "xx REJECTED (unreachable)\n");
233 fprintf (dump_file
, "%d SUCCESS\n", taken
->dest
->index
);
235 fprintf (dump_file
, "xx REJECTED\n");
238 // If an outgoing edge can be determined out of the current path,
239 // register it for jump threading and return the taken edge.
241 // Return NULL if it is unprofitable to thread this path, or the
242 // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
246 back_threader::maybe_register_path (back_threader_profitability
&profit
)
248 edge taken_edge
= find_taken_edge (m_path
);
250 if (taken_edge
&& taken_edge
!= UNREACHABLE_EDGE
)
252 bool irreducible
= false;
253 if (profit
.profitable_path_p (m_path
, taken_edge
, &irreducible
)
255 && m_registry
.register_path (m_path
, taken_edge
))
258 vect_free_loop_info_assumptions (m_path
[0]->loop_father
);
264 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
265 maybe_register_path_dump (taken_edge
);
270 // Return the known taken edge out of a path. If the path can be
271 // determined to be unreachable, return UNREACHABLE_EDGE. If no
272 // outgoing edge can be calculated, return NULL.
275 back_threader::find_taken_edge (const vec
<basic_block
> &path
)
277 gcc_checking_assert (path
.length () > 1);
278 switch (gimple_code (m_last_stmt
))
281 return find_taken_edge_cond (path
, as_a
<gcond
*> (m_last_stmt
));
284 return find_taken_edge_switch (path
, as_a
<gswitch
*> (m_last_stmt
));
291 // Same as find_taken_edge, but for paths ending in a switch.
294 back_threader::find_taken_edge_switch (const vec
<basic_block
> &path
,
297 tree name
= gimple_switch_index (sw
);
300 path_range_query
solver (*m_ranger
, path
, m_imports
, m_flags
& BT_RESOLVE
);
301 solver
.range_of_expr (r
, name
, sw
);
303 if (r
.undefined_p ())
304 return UNREACHABLE_EDGE
;
309 tree label
= find_case_label_range (sw
, &r
);
313 return find_edge (gimple_bb (sw
), label_to_block (cfun
, CASE_LABEL (label
)));
316 // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
319 back_threader::find_taken_edge_cond (const vec
<basic_block
> &path
,
324 path_range_query
solver (*m_ranger
, path
, m_imports
, m_flags
& BT_RESOLVE
);
325 solver
.range_of_stmt (r
, cond
);
327 if (solver
.unreachable_path_p ())
328 return UNREACHABLE_EDGE
;
330 int_range
<2> true_range
= range_true ();
331 int_range
<2> false_range
= range_false ();
333 if (r
== true_range
|| r
== false_range
)
335 edge e_true
, e_false
;
336 basic_block bb
= gimple_bb (cond
);
337 extract_true_false_edges_from_block (bb
, &e_true
, &e_false
);
338 return r
== true_range
? e_true
: e_false
;
343 // Find jump threading paths to any of the SSA names in the
344 // INTERESTING bitmap, and register any such paths.
346 // BB is the current path being processed.
348 // OVERALL_PATHS is the search space up to this block
351 back_threader::find_paths_to_names (basic_block bb
, bitmap interesting
,
352 unsigned overall_paths
,
353 back_threader_profitability
&profit
)
355 if (m_visited_bbs
.add (bb
))
358 m_path
.safe_push (bb
);
360 // Try to resolve the path without looking back. Avoid resolving paths
361 // we know are large but are not (yet) recognized as Finite State Machine.
362 // ??? Ideally we'd explore the cheapest path to the loop backedge here,
363 // avoiding the exponential greedy search and only start that from there.
364 // Precomputing a path-size-to-immediate-dominator-of-successor for each
365 // edge might help here. Alternatively copying divergent control flow
366 // on the way to the backedge could be worthwhile.
368 if (m_path
.length () > 1
369 && (!profit
.possibly_profitable_path_p (m_path
, m_name
, &large_non_fsm
)
371 && maybe_register_path (profit
))))
374 // The backwards thread copier cannot copy blocks that do not belong
375 // to the same loop, so when the new source of the path entry no
376 // longer belongs to it we don't need to search further.
377 else if (m_path
[0]->loop_father
!= bb
->loop_father
)
380 // Continue looking for ways to extend the path but limit the
381 // search space along a branch
382 else if ((overall_paths
= overall_paths
* EDGE_COUNT (bb
->preds
))
383 <= (unsigned)param_max_jump_thread_paths
)
385 // For further greedy searching we want to remove interesting
386 // names defined in BB but add ones on the PHI edges for the
387 // respective edges and adding imports from those stmts.
388 // We do this by starting with all names
389 // not defined in BB as interesting, collecting a list of
390 // interesting PHIs in BB on the fly. Then we iterate over
391 // predecessor edges, adding interesting PHI edge defs to
392 // the set of interesting names to consider when processing it.
393 auto_bitmap new_interesting
;
394 auto_vec
<int, 16> new_imports
;
395 auto_vec
<gphi
*, 4> interesting_phis
;
398 auto_vec
<tree
, 16> worklist
;
399 EXECUTE_IF_SET_IN_BITMAP (interesting
, 0, i
, bi
)
401 tree name
= ssa_name (i
);
402 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
403 /* Imports remain interesting. */
404 if (gimple_bb (def_stmt
) != bb
)
406 bitmap_set_bit (new_interesting
, i
);
409 worklist
.quick_push (name
);
410 while (!worklist
.is_empty ())
412 tree name
= worklist
.pop ();
413 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
414 /* Newly discovered imports are interesting. */
415 if (gimple_bb (def_stmt
) != bb
)
417 bitmap_set_bit (new_interesting
, SSA_NAME_VERSION (name
));
420 /* Local PHIs participate in renaming below. */
421 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt
))
423 tree res
= gimple_phi_result (phi
);
424 if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res
))
425 interesting_phis
.safe_push (phi
);
427 /* For other local defs process their uses, amending
428 imports on the way. */
432 unsigned lim
= gimple_range_ssa_names (ssa
, 3, def_stmt
);
433 for (unsigned j
= 0; j
< lim
; ++j
)
437 && bitmap_set_bit (m_imports
,
438 SSA_NAME_VERSION (rhs
)))
440 new_imports
.safe_push (SSA_NAME_VERSION (rhs
));
441 worklist
.safe_push (rhs
);
447 if (!bitmap_empty_p (new_interesting
)
448 || !interesting_phis
.is_empty ())
450 auto_vec
<int, 4> unwind (interesting_phis
.length ());
451 auto_vec
<int, 4> imports_unwind (interesting_phis
.length ());
454 FOR_EACH_EDGE (e
, iter
, bb
->preds
)
456 if (e
->flags
& EDGE_ABNORMAL
457 // This is like path_crosses_loops in profitable_path_p but
458 // more restrictive to avoid peeling off loop iterations (see
459 // tree-ssa/pr14341.c for an example).
460 // ??? Note this restriction only applied when visiting an
461 // interesting PHI with the former resolve_phi.
462 || (!interesting_phis
.is_empty ()
463 && m_path
[0]->loop_father
!= e
->src
->loop_father
))
465 for (gphi
*phi
: interesting_phis
)
467 tree def
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
468 if (TREE_CODE (def
) == SSA_NAME
)
470 int ver
= SSA_NAME_VERSION (def
);
471 if (bitmap_set_bit (new_interesting
, ver
))
473 if (bitmap_set_bit (m_imports
, ver
))
474 imports_unwind
.quick_push (ver
);
475 unwind
.quick_push (ver
);
479 find_paths_to_names (e
->src
, new_interesting
, overall_paths
,
481 // Restore new_interesting.
482 for (int def
: unwind
)
483 bitmap_clear_bit (new_interesting
, def
);
485 // Restore and m_imports.
486 for (int def
: imports_unwind
)
487 bitmap_clear_bit (m_imports
, def
);
488 imports_unwind
.truncate (0);
491 /* m_imports tracks all interesting names on the path, so when
492 backtracking we have to restore it. */
493 for (int j
: new_imports
)
494 bitmap_clear_bit (m_imports
, j
);
496 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
497 fprintf (dump_file
, " FAIL: Search space limit %d reached.\n",
498 param_max_jump_thread_paths
);
500 // Reset things to their original state.
502 m_visited_bbs
.remove (bb
);
505 // Search backwards from BB looking for paths where the final
506 // conditional maybe threaded to a successor block. Record such paths
507 // for jump threading.
510 back_threader::maybe_thread_block (basic_block bb
)
512 if (EDGE_COUNT (bb
->succs
) <= 1)
515 gimple
*stmt
= *gsi_last_bb (bb
);
519 enum gimple_code code
= gimple_code (stmt
);
521 if (code
== GIMPLE_SWITCH
)
522 name
= gimple_switch_index (as_a
<gswitch
*> (stmt
));
523 else if (code
== GIMPLE_COND
)
524 name
= gimple_cond_lhs (stmt
);
529 m_visited_bbs
.empty ();
533 // We compute imports of the path during discovery starting
534 // just with names used in the conditional.
535 bitmap_clear (m_imports
);
537 FOR_EACH_SSA_TREE_OPERAND (name
, stmt
, iter
, SSA_OP_USE
)
539 if (!gimple_range_ssa_p (name
))
541 bitmap_set_bit (m_imports
, SSA_NAME_VERSION (name
));
544 // Interesting is the set of imports we still not have see
545 // the definition of. So while imports only grow, the
546 // set of interesting defs dwindles and once empty we can
548 auto_bitmap interesting
;
549 bitmap_copy (interesting
, m_imports
);
550 back_threader_profitability
profit (m_flags
& BT_SPEED
, stmt
);
551 find_paths_to_names (bb
, interesting
, 1, profit
);
555 debug (const vec
<basic_block
> &path
)
557 dump_path (stderr
, path
);
558 fputc ('\n', stderr
);
562 back_threader::dump (FILE *out
)
564 fprintf (out
, "\nCandidates for pre-computation:\n");
565 fprintf (out
, "===================================\n");
570 EXECUTE_IF_SET_IN_BITMAP (m_imports
, 0, i
, bi
)
572 tree name
= ssa_name (i
);
573 print_generic_expr (out
, name
, TDF_NONE
);
579 back_threader::debug ()
584 /* Examine jump threading path PATH and return TRUE if it is possibly
585 profitable to thread it, otherwise return FALSE. If this function
586 returns TRUE profitable_path_p might not be satisfied but when
587 the path is extended it might be. In particular indicate in
588 *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
589 but would be OK if we extend the path to cover the loop backedge.
591 NAME is the SSA_NAME of the variable we found to have a constant
592 value on PATH. If unknown, SSA_NAME is NULL.
594 ?? It seems we should be able to loosen some of the restrictions in
595 this function after loop optimizations have run. */
598 back_threader_profitability::possibly_profitable_path_p
599 (const vec
<basic_block
> &m_path
, tree name
,
602 gcc_checking_assert (!m_path
.is_empty ());
604 /* We can an empty path here (excluding the DEF block) when the
605 statement that makes a conditional generate a compile-time
606 constant result is in the same block as the conditional.
608 That's not really a jump threading opportunity, but instead is
609 simple cprop & simplification. We could handle it here if we
610 wanted by wiring up all the incoming edges. If we run this
611 early in IPA, that might be worth doing. For now we just
613 if (m_path
.length () <= 1)
616 gimple_stmt_iterator gsi
;
617 loop_p loop
= m_path
[0]->loop_father
;
619 // We recompute the following, when we rewrite possibly_profitable_path_p
620 // to work incrementally on added BBs we have to unwind them on backtracking
622 m_threaded_through_latch
= false;
623 m_multiway_branch_in_path
= false;
624 m_contains_hot_bb
= false;
626 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
627 fprintf (dump_file
, "Checking profitability of path (backwards): ");
629 /* Count the number of instructions on the path: as these instructions
630 will have to be duplicated, we will not record the path if there
631 are too many instructions on the path. Also check that all the
632 blocks in the path belong to a single loop. */
633 for (unsigned j
= 0; j
< m_path
.length (); j
++)
635 basic_block bb
= m_path
[j
];
637 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
638 fprintf (dump_file
, " bb:%i", bb
->index
);
639 /* Remember, blocks in the path are stored in opposite order in
640 the PATH array. The last entry in the array represents the
641 block with an outgoing edge that we will redirect to the jump
642 threading path. Thus we don't care how many statements are
643 in that block because it will not be copied or whether or not
644 it ends in a multiway branch. */
645 if (j
< m_path
.length () - 1)
647 int orig_n_insns
= m_n_insns
;
648 /* PHIs in the path will create degenerate PHIS in the
649 copied path which will then get propagated away, so
650 looking at just the duplicate path the PHIs would
653 But those PHIs, because they're assignments to objects
654 typically with lives that exist outside the thread path,
655 will tend to generate PHIs (or at least new PHI arguments)
656 at points where we leave the thread path and rejoin
657 the original blocks. So we do want to account for them.
659 We ignore virtual PHIs. We also ignore cases where BB
660 has a single incoming edge. That's the most common
661 degenerate PHI we'll see here. Finally we ignore PHIs
662 that are associated with the value we're tracking as
663 that object likely dies. */
664 if (EDGE_COUNT (bb
->succs
) > 1 && EDGE_COUNT (bb
->preds
) > 1)
666 for (gphi_iterator gsip
= gsi_start_phis (bb
);
670 gphi
*phi
= gsip
.phi ();
671 tree dst
= gimple_phi_result (phi
);
673 /* Note that if both NAME and DST are anonymous
674 SSA_NAMEs, then we do not have enough information
675 to consider them associated. */
678 && TREE_CODE (name
) == SSA_NAME
679 && (SSA_NAME_VAR (dst
) != SSA_NAME_VAR (name
)
680 || !SSA_NAME_VAR (dst
))
681 && !virtual_operand_p (dst
))
686 if (!m_contains_hot_bb
&& m_speed_p
)
687 m_contains_hot_bb
|= optimize_bb_for_speed_p (bb
);
688 for (gsi
= gsi_after_labels (bb
);
690 gsi_next_nondebug (&gsi
))
692 /* Do not allow OpenACC loop markers and __builtin_constant_p on
693 threading paths. The latter is disallowed, because an
694 expression might be constant on two threading paths, and
695 become non-constant (i.e.: phi) when they merge. */
696 gimple
*stmt
= gsi_stmt (gsi
);
697 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
)
698 || gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
))
700 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
701 fputc ('\n', dump_file
);
704 /* Do not count empty statements and labels. */
705 if (gimple_code (stmt
) != GIMPLE_NOP
706 && !is_gimple_debug (stmt
))
707 m_n_insns
+= estimate_num_insns (stmt
, &eni_size_weights
);
709 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
710 fprintf (dump_file
, " (%i insns)", m_n_insns
-orig_n_insns
);
712 /* We do not look at the block with the threaded branch
713 in this loop. So if any block with a last statement that
714 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
715 multiway branch on our path.
717 The block in PATH[0] is special, it's the block were we're
718 going to be able to eliminate its branch. */
721 gimple
*last
= *gsi_last_bb (bb
);
723 && (gimple_code (last
) == GIMPLE_SWITCH
724 || gimple_code (last
) == GIMPLE_GOTO
))
725 m_multiway_branch_in_path
= true;
729 /* Note if we thread through the latch, we will want to include
730 the last entry in the array when determining if we thread
731 through the loop latch. */
732 if (loop
->latch
== bb
)
734 m_threaded_through_latch
= true;
735 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
736 fprintf (dump_file
, " (latch)");
740 /* We are going to remove the control statement at the end of the
741 last block in the threading path. So don't count it against our
743 m_n_insns
-= m_exit_jump_benefit
;
745 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
746 fprintf (dump_file
, "\n Control statement insns: %i\n"
747 " Overall: %i insns\n",
748 m_exit_jump_benefit
, m_n_insns
);
750 /* Threading is profitable if the path duplicated is hot but also
751 in a case we separate cold path from hot path and permit optimization
752 of the hot path later. Be on the agressive side here. In some testcases,
753 as in PR 78407 this leads to noticeable improvements. */
756 if (m_n_insns
>= param_max_fsm_thread_path_insns
)
758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
759 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
760 "the number of instructions on the path "
761 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
764 edge entry
= find_edge (m_path
[m_path
.length () - 1],
765 m_path
[m_path
.length () - 2]);
766 if (probably_never_executed_edge_p (cfun
, entry
))
768 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
769 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
770 "path entry is probably never executed.\n");
774 else if (m_n_insns
> 1)
776 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
777 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
778 "duplication of %i insns is needed and optimizing for size.\n",
783 /* The generic copier used by the backthreader does not re-use an
784 existing threading path to reduce code duplication. So for that
785 case, drastically reduce the number of statements we are allowed
786 to copy. We don't know yet whether we will thread through the latch
787 so we have to be permissive and continue threading, but indicate
788 to the caller the thread, if final, wouldn't be profitable. */
789 if ((!m_threaded_multiway_branch
791 || loop
->latch
->index
== EXIT_BLOCK
)
792 && (m_n_insns
* param_fsm_scale_path_stmts
793 >= param_max_jump_thread_duplication_stmts
))
795 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
797 " FAIL: Did not thread around loop and would copy too "
798 "many statements.\n");
801 *large_non_fsm
= (!(m_threaded_through_latch
&& m_threaded_multiway_branch
)
802 && (m_n_insns
* param_fsm_scale_path_stmts
803 >= param_max_jump_thread_duplication_stmts
));
805 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
806 fputc ('\n', dump_file
);
810 /* Examine jump threading path PATH and return TRUE if it is profitable to
811 thread it, otherwise return FALSE.
813 The taken edge out of the path is TAKEN_EDGE.
815 CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
816 would create an irreducible loop.
818 ?? It seems we should be able to loosen some of the restrictions in
819 this function after loop optimizations have run. */
822 back_threader_profitability::profitable_path_p (const vec
<basic_block
> &m_path
,
824 bool *creates_irreducible_loop
)
826 // We can assume that possibly_profitable_path_p holds here
828 loop_p loop
= m_path
[0]->loop_father
;
830 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
831 fprintf (dump_file
, "Checking profitability of path (backwards): ");
833 /* If this path threaded through the loop latch back into the
834 same loop and the destination does not dominate the loop
835 latch, then this thread would create an irreducible loop. */
836 *creates_irreducible_loop
= false;
837 if (m_threaded_through_latch
838 && loop
== taken_edge
->dest
->loop_father
839 && (determine_bb_domination_status (loop
, taken_edge
->dest
)
840 == DOMST_NONDOMINATING
))
841 *creates_irreducible_loop
= true;
843 /* Threading is profitable if the path duplicated is hot but also
844 in a case we separate cold path from hot path and permit optimization
845 of the hot path later. Be on the agressive side here. In some testcases,
846 as in PR 78407 this leads to noticeable improvements. */
848 && (optimize_edge_for_speed_p (taken_edge
) || m_contains_hot_bb
))
850 if (probably_never_executed_edge_p (cfun
, taken_edge
))
852 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
853 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
854 "path leads to probably never executed edge.\n");
858 else if (m_n_insns
> 1)
860 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
861 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
862 "duplication of %i insns is needed and optimizing for size.\n",
867 /* We avoid creating irreducible inner loops unless we thread through
868 a multiway branch, in which case we have deemed it worth losing
869 other loop optimizations later.
871 We also consider it worth creating an irreducible inner loop after
872 loop optimizations if the number of copied statement is low. */
873 if (!m_threaded_multiway_branch
874 && *creates_irreducible_loop
875 && (!(cfun
->curr_properties
& PROP_loop_opts_done
)
876 || (m_n_insns
* param_fsm_scale_path_stmts
877 >= param_max_jump_thread_duplication_stmts
)))
879 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
881 " FAIL: Would create irreducible loop early without "
882 "threading multiway branch.\n");
883 /* We compute creates_irreducible_loop only late. */
887 /* The generic copier used by the backthreader does not re-use an
888 existing threading path to reduce code duplication. So for that
889 case, drastically reduce the number of statements we are allowed
891 if (!(m_threaded_through_latch
&& m_threaded_multiway_branch
)
892 && (m_n_insns
* param_fsm_scale_path_stmts
893 >= param_max_jump_thread_duplication_stmts
))
895 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
897 " FAIL: Did not thread around loop and would copy too "
898 "many statements.\n");
902 /* When there is a multi-way branch on the path, then threading can
903 explode the CFG due to duplicating the edges for that multi-way
904 branch. So like above, only allow a multi-way branch on the path
905 if we actually thread a multi-way branch. */
906 if (!m_threaded_multiway_branch
&& m_multiway_branch_in_path
)
908 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
910 " FAIL: Thread through multiway branch without threading "
911 "a multiway branch.\n");
915 /* Threading through an empty latch would cause code to be added to
916 the latch. This could alter the loop form sufficiently to cause
917 loop optimizations to fail. Disable these threads until after
918 loop optimizations have run. */
919 if ((m_threaded_through_latch
|| taken_edge
->dest
== loop
->latch
)
920 && !(cfun
->curr_properties
& PROP_loop_opts_done
)
921 && empty_block_p (loop
->latch
))
923 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
925 " FAIL: Thread through latch before loop opts would create "
926 "non-empty latch\n");
929 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
930 fputc ('\n', dump_file
);
935 /* The current path PATH is a vector of blocks forming a jump threading
936 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
938 Convert the current path into the form used by register_jump_thread and
941 Return TRUE if successful or FALSE otherwise. */
944 back_threader_registry::register_path (const vec
<basic_block
> &m_path
,
947 vec
<jump_thread_edge
*> *jump_thread_path
= allocate_thread_path ();
949 // The generic copier ignores the edge type. We can build the
950 // thread edges with any type.
951 for (unsigned int j
= 0; j
+ 1 < m_path
.length (); j
++)
953 basic_block bb1
= m_path
[m_path
.length () - j
- 1];
954 basic_block bb2
= m_path
[m_path
.length () - j
- 2];
956 edge e
= find_edge (bb1
, bb2
);
958 push_edge (jump_thread_path
, e
, EDGE_COPY_SRC_BLOCK
);
961 push_edge (jump_thread_path
, taken_edge
, EDGE_NO_COPY_SRC_BLOCK
);
962 return register_jump_thread (jump_thread_path
);
965 // Thread all suitable paths in the current function.
967 // Return TODO_flags.
970 back_threader::thread_blocks ()
973 FOR_EACH_BB_FN (bb
, m_fun
)
974 if (EDGE_COUNT (bb
->succs
) > 1)
975 maybe_thread_block (bb
);
977 bool changed
= m_registry
.thread_through_all_blocks (true);
979 if (m_flags
& BT_SPEED
)
980 return changed
? TODO_cleanup_cfg
: 0;
987 const pass_data pass_data_early_thread_jumps
=
992 TV_TREE_SSA_THREAD_JUMPS
,
993 ( PROP_cfg
| PROP_ssa
),
997 ( TODO_cleanup_cfg
| TODO_update_ssa
),
1000 const pass_data pass_data_thread_jumps
=
1005 TV_TREE_SSA_THREAD_JUMPS
,
1006 ( PROP_cfg
| PROP_ssa
),
1013 const pass_data pass_data_thread_jumps_full
=
1018 TV_TREE_SSA_THREAD_JUMPS
,
1019 ( PROP_cfg
| PROP_ssa
),
1026 // Early jump threading pass optimizing for size.
1027 class pass_early_thread_jumps
: public gimple_opt_pass
1030 pass_early_thread_jumps (gcc::context
*ctxt
)
1031 : gimple_opt_pass (pass_data_early_thread_jumps
, ctxt
)
1034 opt_pass
* clone () override
1036 return new pass_early_thread_jumps (m_ctxt
);
1038 void set_pass_param (unsigned int, bool param
) override
1042 bool gate (function
*) override
1044 return flag_thread_jumps
;
1046 unsigned int execute (function
*fun
) override
1048 back_threader
threader (fun
, BT_NONE
, m_first
);
1049 return threader
.thread_blocks ();
1055 // Jump threading pass without resolving of unknown SSAs.
1056 class pass_thread_jumps
: public gimple_opt_pass
1059 pass_thread_jumps (gcc::context
*ctxt
)
1060 : gimple_opt_pass (pass_data_thread_jumps
, ctxt
)
1062 opt_pass
* clone (void) override
1064 return new pass_thread_jumps (m_ctxt
);
1066 void set_pass_param (unsigned int, bool param
) override
1070 bool gate (function
*) override
1072 return flag_thread_jumps
&& flag_expensive_optimizations
;
1074 unsigned int execute (function
*fun
) override
1076 back_threader
threader (fun
, BT_SPEED
, m_first
);
1077 return threader
.thread_blocks ();
1083 // Jump threading pass that fully resolves unknown SSAs.
1084 class pass_thread_jumps_full
: public gimple_opt_pass
1087 pass_thread_jumps_full (gcc::context
*ctxt
)
1088 : gimple_opt_pass (pass_data_thread_jumps_full
, ctxt
)
1090 opt_pass
* clone (void) override
1092 return new pass_thread_jumps_full (m_ctxt
);
1094 void set_pass_param (unsigned int, bool param
) override
1098 bool gate (function
*) override
1100 return flag_thread_jumps
&& flag_expensive_optimizations
;
1102 unsigned int execute (function
*fun
) override
1104 back_threader
threader (fun
, BT_SPEED
| BT_RESOLVE
, m_first
);
1105 return threader
.thread_blocks ();
1114 make_pass_thread_jumps (gcc::context
*ctxt
)
1116 return new pass_thread_jumps (ctxt
);
1120 make_pass_thread_jumps_full (gcc::context
*ctxt
)
1122 return new pass_thread_jumps_full (ctxt
);
1126 make_pass_early_thread_jumps (gcc::context
*ctxt
)
1128 return new pass_early_thread_jumps (ctxt
);