2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "fold-const.h"
29 #include "gimple-iterator.h"
31 #include "tree-ssa-threadupdate.h"
32 #include "tree-ssa-loop.h"
34 #include "tree-pass.h"
35 #include "gimple-ssa.h"
36 #include "tree-phinodes.h"
37 #include "tree-inline.h"
38 #include "tree-vectorizer.h"
39 #include "value-range.h"
40 #include "gimple-range.h"
41 #include "tree-ssa-threadedge.h"
42 #include "gimple-range-path.h"
44 #include "tree-cfgcleanup.h"
45 #include "tree-pretty-print.h"
49 // Path registry for the backwards threader. After all paths have been
50 // registered with register_path(), thread_through_all_blocks() is called
53 class back_threader_registry
: public back_jt_path_registry
56 bool register_path (const vec
<basic_block
> &, edge taken
);
59 // Class to abstract the profitability code for the backwards threader.
61 class back_threader_profitability
64 back_threader_profitability (bool speed_p
, gimple
*stmt
);
65 bool possibly_profitable_path_p (const vec
<basic_block
> &, bool *);
66 bool profitable_path_p (const vec
<basic_block
> &,
67 edge taken
, bool *irreducible_loop
);
70 int m_exit_jump_benefit
;
71 bool m_threaded_multiway_branch
;
72 // The following are computed by possibly_profitable_path_p
73 bool m_threaded_through_latch
;
74 bool m_multiway_branch_in_path
;
75 bool m_contains_hot_bb
;
79 back_threader_profitability::back_threader_profitability (bool speed_p
,
83 m_threaded_multiway_branch
= (gimple_code (last
) == GIMPLE_SWITCH
84 || gimple_code (last
) == GIMPLE_GOTO
);
85 // The forward threader has estimate_threading_killed_stmts, in
86 // particular it estimates further DCE from eliminating the exit
88 m_exit_jump_benefit
= estimate_num_insns (last
, &eni_size_weights
);
91 // Back threader flags.
93 // Generate fast code at the expense of code size.
95 // Resolve unknown SSAs on entry to a threading path. If set, use the
96 // ranger. If not, assume all ranges on entry to a path are VARYING.
102 back_threader (function
*fun
, unsigned flags
, bool first
);
104 unsigned thread_blocks ();
106 void maybe_thread_block (basic_block bb
);
107 bool debug_counter ();
108 edge
maybe_register_path (back_threader_profitability
&);
109 void maybe_register_path_dump (edge taken_edge
);
110 void find_paths_to_names (basic_block bb
, bitmap imports
, unsigned,
111 back_threader_profitability
&);
112 edge
find_taken_edge (const vec
<basic_block
> &path
);
113 edge
find_taken_edge_cond (const vec
<basic_block
> &path
, gcond
*);
114 edge
find_taken_edge_switch (const vec
<basic_block
> &path
, gswitch
*);
115 virtual void debug ();
116 virtual void dump (FILE *out
);
118 back_threader_registry m_registry
;
120 // Current path being analyzed.
121 auto_vec
<basic_block
> m_path
;
122 // Hash to mark visited BBs while analyzing a path.
123 hash_set
<basic_block
> m_visited_bbs
;
124 // The set of SSA names, any of which could potentially change the
125 // value of the final conditional in a path.
126 auto_bitmap m_imports
;
127 // The last statement in the path.
129 // Marker to differentiate unreachable edges.
130 static const edge UNREACHABLE_EDGE
;
131 // Set to TRUE if unknown SSA names along a path should be resolved
132 // with the ranger. Otherwise, unknown SSA names are assumed to be
133 // VARYING. Setting to true is more precise but slower.
135 // Ranger for the path solver.
136 gimple_ranger
*m_ranger
;
138 // Set to TRUE for the first of each thread[12] pass or the first of
139 // each threadfull[12] pass. This is used to differentiate between
140 // the different threading passes so we can set up debug counters.
144 // Used to differentiate unreachable edges, so we may stop the search
145 // in a the given direction.
146 const edge
back_threader::UNREACHABLE_EDGE
= (edge
) -1;
148 back_threader::back_threader (function
*fun
, unsigned flags
, bool first
)
151 if (flags
& BT_SPEED
)
152 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
| LOOPS_HAVE_SIMPLE_LATCHES
);
154 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
160 // The path solver needs EDGE_DFS_BACK in resolving mode.
161 if (flags
& BT_RESOLVE
)
162 mark_dfs_back_edges ();
164 m_ranger
= new gimple_ranger
;
167 back_threader::~back_threader ()
170 loop_optimizer_finalize ();
173 // A wrapper for the various debug counters for the threading passes.
174 // Returns TRUE if it's OK to register the current threading
178 back_threader::debug_counter ()
180 // The ethread pass is mostly harmless ;-).
181 if ((m_flags
& BT_SPEED
) == 0)
184 if (m_flags
& BT_RESOLVE
)
186 if (m_first
&& !dbg_cnt (back_threadfull1
))
189 if (!m_first
&& !dbg_cnt (back_threadfull2
))
194 if (m_first
&& !dbg_cnt (back_thread1
))
197 if (!m_first
&& !dbg_cnt (back_thread2
))
204 dump_path (FILE *dump_file
, const vec
<basic_block
> &path
)
206 for (unsigned i
= path
.length (); i
> 0; --i
)
208 basic_block bb
= path
[i
- 1];
209 fprintf (dump_file
, "%d", bb
->index
);
211 fprintf (dump_file
, "->");
215 // Dump details of an attempt to register a path.
218 back_threader::maybe_register_path_dump (edge taken
)
220 if (m_path
.is_empty ())
223 fprintf (dump_file
, "path: ");
224 dump_path (dump_file
, m_path
);
225 fprintf (dump_file
, "->");
227 if (taken
== UNREACHABLE_EDGE
)
228 fprintf (dump_file
, "xx REJECTED (unreachable)\n");
230 fprintf (dump_file
, "%d SUCCESS\n", taken
->dest
->index
);
232 fprintf (dump_file
, "xx REJECTED\n");
235 // If an outgoing edge can be determined out of the current path,
236 // register it for jump threading and return the taken edge.
238 // Return NULL if it is unprofitable to thread this path, or the
239 // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
243 back_threader::maybe_register_path (back_threader_profitability
&profit
)
245 edge taken_edge
= find_taken_edge (m_path
);
247 if (taken_edge
&& taken_edge
!= UNREACHABLE_EDGE
)
249 bool irreducible
= false;
250 if (profit
.profitable_path_p (m_path
, taken_edge
, &irreducible
)
252 && m_registry
.register_path (m_path
, taken_edge
))
255 vect_free_loop_info_assumptions (m_path
[0]->loop_father
);
261 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
262 maybe_register_path_dump (taken_edge
);
267 // Return the known taken edge out of a path. If the path can be
268 // determined to be unreachable, return UNREACHABLE_EDGE. If no
269 // outgoing edge can be calculated, return NULL.
272 back_threader::find_taken_edge (const vec
<basic_block
> &path
)
274 gcc_checking_assert (path
.length () > 1);
275 switch (gimple_code (m_last_stmt
))
278 return find_taken_edge_cond (path
, as_a
<gcond
*> (m_last_stmt
));
281 return find_taken_edge_switch (path
, as_a
<gswitch
*> (m_last_stmt
));
288 // Same as find_taken_edge, but for paths ending in a switch.
291 back_threader::find_taken_edge_switch (const vec
<basic_block
> &path
,
294 tree name
= gimple_switch_index (sw
);
297 path_range_query
solver (*m_ranger
, path
, m_imports
, m_flags
& BT_RESOLVE
);
298 solver
.range_of_expr (r
, name
, sw
);
300 if (r
.undefined_p ())
301 return UNREACHABLE_EDGE
;
306 tree label
= find_case_label_range (sw
, &r
);
310 return find_edge (gimple_bb (sw
), label_to_block (cfun
, CASE_LABEL (label
)));
313 // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
316 back_threader::find_taken_edge_cond (const vec
<basic_block
> &path
,
321 path_range_query
solver (*m_ranger
, path
, m_imports
, m_flags
& BT_RESOLVE
);
322 solver
.range_of_stmt (r
, cond
);
324 if (solver
.unreachable_path_p ())
325 return UNREACHABLE_EDGE
;
327 int_range
<2> true_range
= range_true ();
328 int_range
<2> false_range
= range_false ();
330 if (r
== true_range
|| r
== false_range
)
332 edge e_true
, e_false
;
333 basic_block bb
= gimple_bb (cond
);
334 extract_true_false_edges_from_block (bb
, &e_true
, &e_false
);
335 return r
== true_range
? e_true
: e_false
;
340 // Find jump threading paths to any of the SSA names in the
341 // INTERESTING bitmap, and register any such paths.
343 // BB is the current path being processed.
345 // OVERALL_PATHS is the search space up to this block
348 back_threader::find_paths_to_names (basic_block bb
, bitmap interesting
,
349 unsigned overall_paths
,
350 back_threader_profitability
&profit
)
352 if (m_visited_bbs
.add (bb
))
355 m_path
.safe_push (bb
);
357 // Try to resolve the path without looking back. Avoid resolving paths
358 // we know are large but are not (yet) recognized as Finite State Machine.
359 // ??? Ideally we'd explore the cheapest path to the loop backedge here,
360 // avoiding the exponential greedy search and only start that from there.
361 // Precomputing a path-size-to-immediate-dominator-of-successor for each
362 // edge might help here. Alternatively copying divergent control flow
363 // on the way to the backedge could be worthwhile.
365 if (m_path
.length () > 1
366 && (!profit
.possibly_profitable_path_p (m_path
, &large_non_fsm
)
368 && maybe_register_path (profit
))))
371 // The backwards thread copier cannot copy blocks that do not belong
372 // to the same loop, so when the new source of the path entry no
373 // longer belongs to it we don't need to search further.
374 else if (m_path
[0]->loop_father
!= bb
->loop_father
)
377 // Continue looking for ways to extend the path but limit the
378 // search space along a branch
379 else if ((overall_paths
= overall_paths
* EDGE_COUNT (bb
->preds
))
380 <= (unsigned)param_max_jump_thread_paths
)
382 // For further greedy searching we want to remove interesting
383 // names defined in BB but add ones on the PHI edges for the
384 // respective edges and adding imports from those stmts.
385 // We do this by starting with all names
386 // not defined in BB as interesting, collecting a list of
387 // interesting PHIs in BB on the fly. Then we iterate over
388 // predecessor edges, adding interesting PHI edge defs to
389 // the set of interesting names to consider when processing it.
390 auto_bitmap new_interesting
;
391 auto_vec
<int, 16> new_imports
;
392 auto_vec
<gphi
*, 4> interesting_phis
;
395 auto_vec
<tree
, 16> worklist
;
396 EXECUTE_IF_SET_IN_BITMAP (interesting
, 0, i
, bi
)
398 tree name
= ssa_name (i
);
399 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
400 /* Imports remain interesting. */
401 if (gimple_bb (def_stmt
) != bb
)
403 bitmap_set_bit (new_interesting
, i
);
406 worklist
.quick_push (name
);
407 while (!worklist
.is_empty ())
409 tree name
= worklist
.pop ();
410 gimple
*def_stmt
= SSA_NAME_DEF_STMT (name
);
411 /* Newly discovered imports are interesting. */
412 if (gimple_bb (def_stmt
) != bb
)
414 bitmap_set_bit (new_interesting
, SSA_NAME_VERSION (name
));
417 /* Local PHIs participate in renaming below. */
418 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt
))
420 tree res
= gimple_phi_result (phi
);
421 if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res
))
422 interesting_phis
.safe_push (phi
);
424 /* For other local defs process their uses, amending
425 imports on the way. */
429 unsigned lim
= gimple_range_ssa_names (ssa
, 3, def_stmt
);
430 for (unsigned j
= 0; j
< lim
; ++j
)
434 && bitmap_set_bit (m_imports
,
435 SSA_NAME_VERSION (rhs
)))
437 new_imports
.safe_push (SSA_NAME_VERSION (rhs
));
438 worklist
.safe_push (rhs
);
444 if (!bitmap_empty_p (new_interesting
)
445 || !interesting_phis
.is_empty ())
447 auto_vec
<int, 4> unwind (interesting_phis
.length ());
448 auto_vec
<int, 4> imports_unwind (interesting_phis
.length ());
451 FOR_EACH_EDGE (e
, iter
, bb
->preds
)
453 if (e
->flags
& EDGE_ABNORMAL
454 // This is like path_crosses_loops in profitable_path_p but
455 // more restrictive to avoid peeling off loop iterations (see
456 // tree-ssa/pr14341.c for an example).
457 // ??? Note this restriction only applied when visiting an
458 // interesting PHI with the former resolve_phi.
459 || (!interesting_phis
.is_empty ()
460 && m_path
[0]->loop_father
!= e
->src
->loop_father
))
462 for (gphi
*phi
: interesting_phis
)
464 tree def
= PHI_ARG_DEF_FROM_EDGE (phi
, e
);
465 if (TREE_CODE (def
) == SSA_NAME
)
467 int ver
= SSA_NAME_VERSION (def
);
468 if (bitmap_set_bit (new_interesting
, ver
))
470 if (bitmap_set_bit (m_imports
, ver
))
471 imports_unwind
.quick_push (ver
);
472 unwind
.quick_push (ver
);
476 find_paths_to_names (e
->src
, new_interesting
, overall_paths
,
478 // Restore new_interesting.
479 for (int def
: unwind
)
480 bitmap_clear_bit (new_interesting
, def
);
482 // Restore and m_imports.
483 for (int def
: imports_unwind
)
484 bitmap_clear_bit (m_imports
, def
);
485 imports_unwind
.truncate (0);
488 /* m_imports tracks all interesting names on the path, so when
489 backtracking we have to restore it. */
490 for (int j
: new_imports
)
491 bitmap_clear_bit (m_imports
, j
);
493 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
494 fprintf (dump_file
, " FAIL: Search space limit %d reached.\n",
495 param_max_jump_thread_paths
);
497 // Reset things to their original state.
499 m_visited_bbs
.remove (bb
);
502 // Search backwards from BB looking for paths where the final
503 // conditional maybe threaded to a successor block. Record such paths
504 // for jump threading.
507 back_threader::maybe_thread_block (basic_block bb
)
509 if (EDGE_COUNT (bb
->succs
) <= 1)
512 gimple
*stmt
= *gsi_last_bb (bb
);
516 enum gimple_code code
= gimple_code (stmt
);
517 if (code
!= GIMPLE_SWITCH
518 && code
!= GIMPLE_COND
)
522 m_visited_bbs
.empty ();
525 // We compute imports of the path during discovery starting
526 // just with names used in the conditional.
527 bitmap_clear (m_imports
);
530 FOR_EACH_SSA_TREE_OPERAND (name
, stmt
, iter
, SSA_OP_USE
)
532 if (!gimple_range_ssa_p (name
))
534 bitmap_set_bit (m_imports
, SSA_NAME_VERSION (name
));
537 // Interesting is the set of imports we still not have see
538 // the definition of. So while imports only grow, the
539 // set of interesting defs dwindles and once empty we can
541 auto_bitmap interesting
;
542 bitmap_copy (interesting
, m_imports
);
543 back_threader_profitability
profit (m_flags
& BT_SPEED
, stmt
);
544 find_paths_to_names (bb
, interesting
, 1, profit
);
548 debug (const vec
<basic_block
> &path
)
550 dump_path (stderr
, path
);
551 fputc ('\n', stderr
);
555 back_threader::dump (FILE *out
)
557 fprintf (out
, "\nCandidates for pre-computation:\n");
558 fprintf (out
, "===================================\n");
563 EXECUTE_IF_SET_IN_BITMAP (m_imports
, 0, i
, bi
)
565 tree name
= ssa_name (i
);
566 print_generic_expr (out
, name
, TDF_NONE
);
572 back_threader::debug ()
577 /* Examine jump threading path PATH and return TRUE if it is possibly
578 profitable to thread it, otherwise return FALSE. If this function
579 returns TRUE profitable_path_p might not be satisfied but when
580 the path is extended it might be. In particular indicate in
581 *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
582 but would be OK if we extend the path to cover the loop backedge.
584 ?? It seems we should be able to loosen some of the restrictions in
585 this function after loop optimizations have run. */
588 back_threader_profitability::possibly_profitable_path_p
589 (const vec
<basic_block
> &m_path
,
592 gcc_checking_assert (!m_path
.is_empty ());
594 /* We can an empty path here (excluding the DEF block) when the
595 statement that makes a conditional generate a compile-time
596 constant result is in the same block as the conditional.
598 That's not really a jump threading opportunity, but instead is
599 simple cprop & simplification. We could handle it here if we
600 wanted by wiring up all the incoming edges. If we run this
601 early in IPA, that might be worth doing. For now we just
603 if (m_path
.length () <= 1)
606 gimple_stmt_iterator gsi
;
607 loop_p loop
= m_path
[0]->loop_father
;
609 // We recompute the following, when we rewrite possibly_profitable_path_p
610 // to work incrementally on added BBs we have to unwind them on backtracking
612 m_threaded_through_latch
= false;
613 m_multiway_branch_in_path
= false;
614 m_contains_hot_bb
= false;
616 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
617 fprintf (dump_file
, "Checking profitability of path (backwards): ");
619 /* Count the number of instructions on the path: as these instructions
620 will have to be duplicated, we will not record the path if there
621 are too many instructions on the path. Also check that all the
622 blocks in the path belong to a single loop. */
623 for (unsigned j
= 0; j
< m_path
.length (); j
++)
625 basic_block bb
= m_path
[j
];
627 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
628 fprintf (dump_file
, " bb:%i", bb
->index
);
629 /* Remember, blocks in the path are stored in opposite order in
630 the PATH array. The last entry in the array represents the
631 block with an outgoing edge that we will redirect to the jump
632 threading path. Thus we don't care how many statements are
633 in that block because it will not be copied or whether or not
634 it ends in a multiway branch. */
635 if (j
< m_path
.length () - 1)
637 int orig_n_insns
= m_n_insns
;
638 if (!m_contains_hot_bb
&& m_speed_p
)
639 m_contains_hot_bb
|= optimize_bb_for_speed_p (bb
);
640 for (gsi
= gsi_after_labels (bb
);
642 gsi_next_nondebug (&gsi
))
644 /* Do not allow OpenACC loop markers and __builtin_constant_p on
645 threading paths. The latter is disallowed, because an
646 expression might be constant on two threading paths, and
647 become non-constant (i.e.: phi) when they merge. */
648 gimple
*stmt
= gsi_stmt (gsi
);
649 if (gimple_call_internal_p (stmt
, IFN_UNIQUE
)
650 || gimple_call_builtin_p (stmt
, BUILT_IN_CONSTANT_P
))
652 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
653 fputc ('\n', dump_file
);
656 /* Do not count empty statements and labels. */
657 if (gimple_code (stmt
) != GIMPLE_NOP
658 && !is_gimple_debug (stmt
))
659 m_n_insns
+= estimate_num_insns (stmt
, &eni_size_weights
);
661 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
662 fprintf (dump_file
, " (%i insns)", m_n_insns
-orig_n_insns
);
664 /* We do not look at the block with the threaded branch
665 in this loop. So if any block with a last statement that
666 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
667 multiway branch on our path.
669 The block in PATH[0] is special, it's the block were we're
670 going to be able to eliminate its branch. */
673 gimple
*last
= *gsi_last_bb (bb
);
675 && (gimple_code (last
) == GIMPLE_SWITCH
676 || gimple_code (last
) == GIMPLE_GOTO
))
677 m_multiway_branch_in_path
= true;
681 /* Note if we thread through the latch, we will want to include
682 the last entry in the array when determining if we thread
683 through the loop latch. */
684 if (loop
->latch
== bb
)
686 m_threaded_through_latch
= true;
687 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
688 fprintf (dump_file
, " (latch)");
692 /* We are going to remove the control statement at the end of the
693 last block in the threading path. So don't count it against our
695 m_n_insns
-= m_exit_jump_benefit
;
697 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
698 fprintf (dump_file
, "\n Control statement insns: %i\n"
699 " Overall: %i insns\n",
700 m_exit_jump_benefit
, m_n_insns
);
702 /* Threading is profitable if the path duplicated is hot but also
703 in a case we separate cold path from hot path and permit optimization
704 of the hot path later. Be on the agressive side here. In some testcases,
705 as in PR 78407 this leads to noticeable improvements. */
708 if (m_n_insns
>= param_max_fsm_thread_path_insns
)
710 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
711 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
712 "the number of instructions on the path "
713 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
716 edge entry
= find_edge (m_path
[m_path
.length () - 1],
717 m_path
[m_path
.length () - 2]);
718 if (probably_never_executed_edge_p (cfun
, entry
))
720 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
721 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
722 "path entry is probably never executed.\n");
726 else if (m_n_insns
> 1)
728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
729 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
730 "duplication of %i insns is needed and optimizing for size.\n",
735 /* The generic copier used by the backthreader does not re-use an
736 existing threading path to reduce code duplication. So for that
737 case, drastically reduce the number of statements we are allowed
738 to copy. We don't know yet whether we will thread through the latch
739 so we have to be permissive and continue threading, but indicate
740 to the caller the thread, if final, wouldn't be profitable. */
741 if ((!m_threaded_multiway_branch
743 || loop
->latch
->index
== EXIT_BLOCK
)
744 && (m_n_insns
* param_fsm_scale_path_stmts
745 >= param_max_jump_thread_duplication_stmts
))
747 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
749 " FAIL: Did not thread around loop and would copy too "
750 "many statements.\n");
753 *large_non_fsm
= (!(m_threaded_through_latch
&& m_threaded_multiway_branch
)
754 && (m_n_insns
* param_fsm_scale_path_stmts
755 >= param_max_jump_thread_duplication_stmts
));
757 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
758 fputc ('\n', dump_file
);
762 /* Examine jump threading path PATH and return TRUE if it is profitable to
763 thread it, otherwise return FALSE.
765 The taken edge out of the path is TAKEN_EDGE.
767 CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
768 would create an irreducible loop.
770 ?? It seems we should be able to loosen some of the restrictions in
771 this function after loop optimizations have run. */
774 back_threader_profitability::profitable_path_p (const vec
<basic_block
> &m_path
,
776 bool *creates_irreducible_loop
)
778 // We can assume that possibly_profitable_path_p holds here
780 loop_p loop
= m_path
[0]->loop_father
;
782 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
783 fprintf (dump_file
, "Checking profitability of path (backwards): ");
785 /* If this path threaded through the loop latch back into the
786 same loop and the destination does not dominate the loop
787 latch, then this thread would create an irreducible loop. */
788 *creates_irreducible_loop
= false;
789 if (m_threaded_through_latch
790 && loop
== taken_edge
->dest
->loop_father
791 && (determine_bb_domination_status (loop
, taken_edge
->dest
)
792 == DOMST_NONDOMINATING
))
793 *creates_irreducible_loop
= true;
795 /* Threading is profitable if the path duplicated is hot but also
796 in a case we separate cold path from hot path and permit optimization
797 of the hot path later. Be on the agressive side here. In some testcases,
798 as in PR 78407 this leads to noticeable improvements. */
800 && (optimize_edge_for_speed_p (taken_edge
) || m_contains_hot_bb
))
802 if (probably_never_executed_edge_p (cfun
, taken_edge
))
804 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
805 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
806 "path leads to probably never executed edge.\n");
810 else if (m_n_insns
> 1)
812 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
813 fprintf (dump_file
, " FAIL: Jump-thread path not considered: "
814 "duplication of %i insns is needed and optimizing for size.\n",
819 /* We avoid creating irreducible inner loops unless we thread through
820 a multiway branch, in which case we have deemed it worth losing
821 other loop optimizations later.
823 We also consider it worth creating an irreducible inner loop after
824 loop optimizations if the number of copied statement is low. */
825 if (!m_threaded_multiway_branch
826 && *creates_irreducible_loop
827 && (!(cfun
->curr_properties
& PROP_loop_opts_done
)
828 || (m_n_insns
* param_fsm_scale_path_stmts
829 >= param_max_jump_thread_duplication_stmts
)))
831 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
833 " FAIL: Would create irreducible loop early without "
834 "threading multiway branch.\n");
835 /* We compute creates_irreducible_loop only late. */
839 /* The generic copier used by the backthreader does not re-use an
840 existing threading path to reduce code duplication. So for that
841 case, drastically reduce the number of statements we are allowed
843 if (!(m_threaded_through_latch
&& m_threaded_multiway_branch
)
844 && (m_n_insns
* param_fsm_scale_path_stmts
845 >= param_max_jump_thread_duplication_stmts
))
847 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
849 " FAIL: Did not thread around loop and would copy too "
850 "many statements.\n");
854 /* When there is a multi-way branch on the path, then threading can
855 explode the CFG due to duplicating the edges for that multi-way
856 branch. So like above, only allow a multi-way branch on the path
857 if we actually thread a multi-way branch. */
858 if (!m_threaded_multiway_branch
&& m_multiway_branch_in_path
)
860 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
862 " FAIL: Thread through multiway branch without threading "
863 "a multiway branch.\n");
867 /* Threading through an empty latch would cause code to be added to
868 the latch. This could alter the loop form sufficiently to cause
869 loop optimizations to fail. Disable these threads until after
870 loop optimizations have run. */
871 if ((m_threaded_through_latch
|| taken_edge
->dest
== loop
->latch
)
872 && !(cfun
->curr_properties
& PROP_loop_opts_done
)
873 && empty_block_p (loop
->latch
))
875 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
877 " FAIL: Thread through latch before loop opts would create "
878 "non-empty latch\n");
881 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
882 fputc ('\n', dump_file
);
887 /* The current path PATH is a vector of blocks forming a jump threading
888 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
890 Convert the current path into the form used by register_jump_thread and
893 Return TRUE if successful or FALSE otherwise. */
896 back_threader_registry::register_path (const vec
<basic_block
> &m_path
,
899 vec
<jump_thread_edge
*> *jump_thread_path
= allocate_thread_path ();
901 // The generic copier ignores the edge type. We can build the
902 // thread edges with any type.
903 for (unsigned int j
= 0; j
+ 1 < m_path
.length (); j
++)
905 basic_block bb1
= m_path
[m_path
.length () - j
- 1];
906 basic_block bb2
= m_path
[m_path
.length () - j
- 2];
908 edge e
= find_edge (bb1
, bb2
);
910 push_edge (jump_thread_path
, e
, EDGE_COPY_SRC_BLOCK
);
913 push_edge (jump_thread_path
, taken_edge
, EDGE_NO_COPY_SRC_BLOCK
);
914 return register_jump_thread (jump_thread_path
);
917 // Thread all suitable paths in the current function.
919 // Return TODO_flags.
922 back_threader::thread_blocks ()
925 FOR_EACH_BB_FN (bb
, m_fun
)
926 if (EDGE_COUNT (bb
->succs
) > 1)
927 maybe_thread_block (bb
);
929 bool changed
= m_registry
.thread_through_all_blocks (true);
931 if (m_flags
& BT_SPEED
)
932 return changed
? TODO_cleanup_cfg
: 0;
939 const pass_data pass_data_early_thread_jumps
=
944 TV_TREE_SSA_THREAD_JUMPS
,
945 ( PROP_cfg
| PROP_ssa
),
949 ( TODO_cleanup_cfg
| TODO_update_ssa
),
952 const pass_data pass_data_thread_jumps
=
957 TV_TREE_SSA_THREAD_JUMPS
,
958 ( PROP_cfg
| PROP_ssa
),
965 const pass_data pass_data_thread_jumps_full
=
970 TV_TREE_SSA_THREAD_JUMPS
,
971 ( PROP_cfg
| PROP_ssa
),
978 // Early jump threading pass optimizing for size.
979 class pass_early_thread_jumps
: public gimple_opt_pass
982 pass_early_thread_jumps (gcc::context
*ctxt
)
983 : gimple_opt_pass (pass_data_early_thread_jumps
, ctxt
)
986 opt_pass
* clone () override
988 return new pass_early_thread_jumps (m_ctxt
);
990 void set_pass_param (unsigned int, bool param
) override
994 bool gate (function
*) override
996 return flag_thread_jumps
;
998 unsigned int execute (function
*fun
) override
1000 back_threader
threader (fun
, BT_NONE
, m_first
);
1001 return threader
.thread_blocks ();
1007 // Jump threading pass without resolving of unknown SSAs.
1008 class pass_thread_jumps
: public gimple_opt_pass
1011 pass_thread_jumps (gcc::context
*ctxt
)
1012 : gimple_opt_pass (pass_data_thread_jumps
, ctxt
)
1014 opt_pass
* clone (void) override
1016 return new pass_thread_jumps (m_ctxt
);
1018 void set_pass_param (unsigned int, bool param
) override
1022 bool gate (function
*) override
1024 return flag_thread_jumps
&& flag_expensive_optimizations
;
1026 unsigned int execute (function
*fun
) override
1028 back_threader
threader (fun
, BT_SPEED
, m_first
);
1029 return threader
.thread_blocks ();
1035 // Jump threading pass that fully resolves unknown SSAs.
1036 class pass_thread_jumps_full
: public gimple_opt_pass
1039 pass_thread_jumps_full (gcc::context
*ctxt
)
1040 : gimple_opt_pass (pass_data_thread_jumps_full
, ctxt
)
1042 opt_pass
* clone (void) override
1044 return new pass_thread_jumps_full (m_ctxt
);
1046 void set_pass_param (unsigned int, bool param
) override
1050 bool gate (function
*) override
1052 return flag_thread_jumps
&& flag_expensive_optimizations
;
1054 unsigned int execute (function
*fun
) override
1056 back_threader
threader (fun
, BT_SPEED
| BT_RESOLVE
, m_first
);
1057 return threader
.thread_blocks ();
1066 make_pass_thread_jumps (gcc::context
*ctxt
)
1068 return new pass_thread_jumps (ctxt
);
1072 make_pass_thread_jumps_full (gcc::context
*ctxt
)
1074 return new pass_thread_jumps_full (ctxt
);
1078 make_pass_early_thread_jumps (gcc::context
*ctxt
)
1080 return new pass_early_thread_jumps (ctxt
);