compiler: only build thunk struct type when it is needed
[official-gcc.git] / gcc / tree-ssa-threadbackward.cc
blob2a8cfa3ee01a71c8c3303232dfcfd9fec3b3c5d7
1 /* SSA Jump Threading
2 Copyright (C) 2005-2022 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "predict.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "fold-const.h"
28 #include "cfgloop.h"
29 #include "gimple-iterator.h"
30 #include "tree-cfg.h"
31 #include "tree-ssa-threadupdate.h"
32 #include "tree-ssa-loop.h"
33 #include "cfganal.h"
34 #include "tree-pass.h"
35 #include "gimple-ssa.h"
36 #include "tree-phinodes.h"
37 #include "tree-inline.h"
38 #include "tree-vectorizer.h"
39 #include "value-range.h"
40 #include "gimple-range.h"
41 #include "tree-ssa-threadedge.h"
42 #include "gimple-range-path.h"
43 #include "ssa.h"
44 #include "tree-cfgcleanup.h"
45 #include "tree-pretty-print.h"
46 #include "cfghooks.h"
47 #include "dbgcnt.h"
49 // Path registry for the backwards threader. After all paths have been
50 // registered with register_path(), thread_through_all_blocks() is called
51 // to modify the CFG.
53 class back_threader_registry : public back_jt_path_registry
55 public:
56 bool register_path (const vec<basic_block> &, edge taken);
59 // Class to abstract the profitability code for the backwards threader.
61 class back_threader_profitability
63 public:
64 back_threader_profitability (bool speed_p, gimple *stmt);
65 bool possibly_profitable_path_p (const vec<basic_block> &, tree, bool *);
66 bool profitable_path_p (const vec<basic_block> &,
67 edge taken, bool *irreducible_loop);
68 private:
69 const bool m_speed_p;
70 int m_exit_jump_benefit;
71 bool m_threaded_multiway_branch;
72 // The following are computed by possibly_profitable_path_p
73 bool m_threaded_through_latch;
74 bool m_multiway_branch_in_path;
75 bool m_contains_hot_bb;
76 int m_n_insns;
79 back_threader_profitability::back_threader_profitability (bool speed_p,
80 gimple *last)
81 : m_speed_p (speed_p)
83 m_threaded_multiway_branch = (gimple_code (last) == GIMPLE_SWITCH
84 || gimple_code (last) == GIMPLE_GOTO);
85 // The forward threader has estimate_threading_killed_stmts, in
86 // particular it estimates further DCE from eliminating the exit
87 // control stmt.
88 m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
91 // Back threader flags.
92 #define BT_NONE 0
93 // Generate fast code at the expense of code size.
94 #define BT_SPEED 1
95 // Resolve unknown SSAs on entry to a threading path. If set, use the
96 // ranger. If not, assume all ranges on entry to a path are VARYING.
97 #define BT_RESOLVE 2
99 class back_threader
101 public:
102 back_threader (function *fun, unsigned flags, bool first);
103 ~back_threader ();
104 unsigned thread_blocks ();
105 private:
106 void maybe_thread_block (basic_block bb);
107 bool debug_counter ();
108 edge maybe_register_path (back_threader_profitability &);
109 void maybe_register_path_dump (edge taken_edge);
110 void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
111 back_threader_profitability &);
112 edge find_taken_edge (const vec<basic_block> &path);
113 edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
114 edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
115 virtual void debug ();
116 virtual void dump (FILE *out);
118 back_threader_registry m_registry;
120 // Current path being analyzed.
121 auto_vec<basic_block> m_path;
122 // Hash to mark visited BBs while analyzing a path.
123 hash_set<basic_block> m_visited_bbs;
124 // The set of SSA names, any of which could potentially change the
125 // value of the final conditional in a path.
126 auto_bitmap m_imports;
127 // The last statement in the path.
128 gimple *m_last_stmt;
129 // This is a bit of a wart. It's used to pass the LHS SSA name to
130 // the profitability engine.
131 tree m_name;
132 // Marker to differentiate unreachable edges.
133 static const edge UNREACHABLE_EDGE;
134 // Set to TRUE if unknown SSA names along a path should be resolved
135 // with the ranger. Otherwise, unknown SSA names are assumed to be
136 // VARYING. Setting to true is more precise but slower.
137 function *m_fun;
138 // Ranger for the path solver.
139 gimple_ranger *m_ranger;
140 unsigned m_flags;
141 // Set to TRUE for the first of each thread[12] pass or the first of
142 // each threadfull[12] pass. This is used to differentiate between
143 // the different threading passes so we can set up debug counters.
144 bool m_first;
147 // Used to differentiate unreachable edges, so we may stop the search
148 // in a the given direction.
149 const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
151 back_threader::back_threader (function *fun, unsigned flags, bool first)
152 : m_first (first)
154 if (flags & BT_SPEED)
155 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
156 else
157 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
159 m_fun = fun;
160 m_flags = flags;
161 m_last_stmt = NULL;
163 // The path solver needs EDGE_DFS_BACK in resolving mode.
164 if (flags & BT_RESOLVE)
165 mark_dfs_back_edges ();
167 m_ranger = new gimple_ranger;
170 back_threader::~back_threader ()
172 delete m_ranger;
173 loop_optimizer_finalize ();
176 // A wrapper for the various debug counters for the threading passes.
177 // Returns TRUE if it's OK to register the current threading
178 // candidate.
180 bool
181 back_threader::debug_counter ()
183 // The ethread pass is mostly harmless ;-).
184 if ((m_flags & BT_SPEED) == 0)
185 return true;
187 if (m_flags & BT_RESOLVE)
189 if (m_first && !dbg_cnt (back_threadfull1))
190 return false;
192 if (!m_first && !dbg_cnt (back_threadfull2))
193 return false;
195 else
197 if (m_first && !dbg_cnt (back_thread1))
198 return false;
200 if (!m_first && !dbg_cnt (back_thread2))
201 return false;
203 return true;
206 static void
207 dump_path (FILE *dump_file, const vec<basic_block> &path)
209 for (unsigned i = path.length (); i > 0; --i)
211 basic_block bb = path[i - 1];
212 fprintf (dump_file, "%d", bb->index);
213 if (i > 1)
214 fprintf (dump_file, "->");
218 // Dump details of an attempt to register a path.
220 void
221 back_threader::maybe_register_path_dump (edge taken)
223 if (m_path.is_empty ())
224 return;
226 fprintf (dump_file, "path: ");
227 dump_path (dump_file, m_path);
228 fprintf (dump_file, "->");
230 if (taken == UNREACHABLE_EDGE)
231 fprintf (dump_file, "xx REJECTED (unreachable)\n");
232 else if (taken)
233 fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
234 else
235 fprintf (dump_file, "xx REJECTED\n");
238 // If an outgoing edge can be determined out of the current path,
239 // register it for jump threading and return the taken edge.
241 // Return NULL if it is unprofitable to thread this path, or the
242 // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
243 // unreachable.
245 edge
246 back_threader::maybe_register_path (back_threader_profitability &profit)
248 edge taken_edge = find_taken_edge (m_path);
250 if (taken_edge && taken_edge != UNREACHABLE_EDGE)
252 if (m_visited_bbs.contains (taken_edge->dest))
254 // Avoid circular paths by indicating there is nothing to
255 // see in this direction.
256 taken_edge = UNREACHABLE_EDGE;
258 else
260 bool irreducible = false;
261 if (profit.profitable_path_p (m_path, taken_edge, &irreducible)
262 && debug_counter ()
263 && m_registry.register_path (m_path, taken_edge))
265 if (irreducible)
266 vect_free_loop_info_assumptions (m_path[0]->loop_father);
268 else
269 taken_edge = NULL;
273 if (dump_file && (dump_flags & TDF_DETAILS))
274 maybe_register_path_dump (taken_edge);
276 return taken_edge;
279 // Return the known taken edge out of a path. If the path can be
280 // determined to be unreachable, return UNREACHABLE_EDGE. If no
281 // outgoing edge can be calculated, return NULL.
283 edge
284 back_threader::find_taken_edge (const vec<basic_block> &path)
286 gcc_checking_assert (path.length () > 1);
287 switch (gimple_code (m_last_stmt))
289 case GIMPLE_COND:
290 return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
292 case GIMPLE_SWITCH:
293 return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
295 default:
296 return NULL;
300 // Same as find_taken_edge, but for paths ending in a switch.
302 edge
303 back_threader::find_taken_edge_switch (const vec<basic_block> &path,
304 gswitch *sw)
306 tree name = gimple_switch_index (sw);
307 int_range_max r;
309 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
310 solver.range_of_expr (r, name, sw);
312 if (r.undefined_p ())
313 return UNREACHABLE_EDGE;
315 if (r.varying_p ())
316 return NULL;
318 tree label = find_case_label_range (sw, &r);
319 if (!label)
320 return NULL;
322 return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
325 // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
327 edge
328 back_threader::find_taken_edge_cond (const vec<basic_block> &path,
329 gcond *cond)
331 int_range_max r;
333 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
334 solver.range_of_stmt (r, cond);
336 if (solver.unreachable_path_p ())
337 return UNREACHABLE_EDGE;
339 int_range<2> true_range (boolean_true_node, boolean_true_node);
340 int_range<2> false_range (boolean_false_node, boolean_false_node);
342 if (r == true_range || r == false_range)
344 edge e_true, e_false;
345 basic_block bb = gimple_bb (cond);
346 extract_true_false_edges_from_block (bb, &e_true, &e_false);
347 return r == true_range ? e_true : e_false;
349 return NULL;
352 // Find jump threading paths to any of the SSA names in the
353 // INTERESTING bitmap, and register any such paths.
355 // BB is the current path being processed.
357 // OVERALL_PATHS is the search space up to this block
359 void
360 back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
361 unsigned overall_paths,
362 back_threader_profitability &profit)
364 if (m_visited_bbs.add (bb))
365 return;
367 m_path.safe_push (bb);
369 // Try to resolve the path without looking back. Avoid resolving paths
370 // we know are large but are not (yet) recognized as Finite State Machine.
371 // ??? Ideally we'd explore the cheapest path to the loop backedge here,
372 // avoiding the exponential greedy search and only start that from there.
373 // Precomputing a path-size-to-immediate-dominator-of-successor for each
374 // edge might help here. Alternatively copying divergent control flow
375 // on the way to the backedge could be worthwhile.
376 bool large_non_fsm;
377 if (m_path.length () > 1
378 && (!profit.possibly_profitable_path_p (m_path, m_name, &large_non_fsm)
379 || (!large_non_fsm
380 && maybe_register_path (profit))))
383 // The backwards thread copier cannot copy blocks that do not belong
384 // to the same loop, so when the new source of the path entry no
385 // longer belongs to it we don't need to search further.
386 else if (m_path[0]->loop_father != bb->loop_father)
389 // Continue looking for ways to extend the path but limit the
390 // search space along a branch
391 else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
392 <= (unsigned)param_max_jump_thread_paths)
394 // For further greedy searching we want to remove interesting
395 // names defined in BB but add ones on the PHI edges for the
396 // respective edges and adding imports from those stmts.
397 // We do this by starting with all names
398 // not defined in BB as interesting, collecting a list of
399 // interesting PHIs in BB on the fly. Then we iterate over
400 // predecessor edges, adding interesting PHI edge defs to
401 // the set of interesting names to consider when processing it.
402 auto_bitmap new_interesting;
403 auto_vec<int, 16> new_imports;
404 auto_vec<gphi *, 4> interesting_phis;
405 bitmap_iterator bi;
406 unsigned i;
407 auto_vec<tree, 16> worklist;
408 EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
410 tree name = ssa_name (i);
411 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
412 /* Imports remain interesting. */
413 if (gimple_bb (def_stmt) != bb)
415 bitmap_set_bit (new_interesting, i);
416 continue;
418 worklist.quick_push (name);
419 while (!worklist.is_empty ())
421 tree name = worklist.pop ();
422 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
423 /* Newly discovered imports are interesting. */
424 if (gimple_bb (def_stmt) != bb)
426 bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
427 continue;
429 /* Local PHIs participate in renaming below. */
430 if (gphi *phi = dyn_cast<gphi *> (def_stmt))
432 tree res = gimple_phi_result (phi);
433 if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
434 interesting_phis.safe_push (phi);
436 /* For other local defs process their uses, amending
437 imports on the way. */
438 else
440 tree ssa[3];
441 unsigned lim = gimple_range_ssa_names (ssa, 3, def_stmt);
442 for (unsigned j = 0; j < lim; ++j)
444 tree rhs = ssa[j];
445 if (rhs
446 && bitmap_set_bit (m_imports,
447 SSA_NAME_VERSION (rhs)))
449 new_imports.safe_push (SSA_NAME_VERSION (rhs));
450 worklist.safe_push (rhs);
456 if (!bitmap_empty_p (new_interesting)
457 || !interesting_phis.is_empty ())
459 auto_vec<int, 4> unwind (interesting_phis.length ());
460 auto_vec<int, 4> imports_unwind (interesting_phis.length ());
461 edge_iterator iter;
462 edge e;
463 FOR_EACH_EDGE (e, iter, bb->preds)
465 if (e->flags & EDGE_ABNORMAL
466 // This is like path_crosses_loops in profitable_path_p but
467 // more restrictive to avoid peeling off loop iterations (see
468 // tree-ssa/pr14341.c for an example).
469 // ??? Note this restriction only applied when visiting an
470 // interesting PHI with the former resolve_phi.
471 || (!interesting_phis.is_empty ()
472 && m_path[0]->loop_father != e->src->loop_father))
473 continue;
474 for (gphi *phi : interesting_phis)
476 tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
477 if (TREE_CODE (def) == SSA_NAME)
479 int ver = SSA_NAME_VERSION (def);
480 if (bitmap_set_bit (new_interesting, ver))
482 if (bitmap_set_bit (m_imports, ver))
483 imports_unwind.quick_push (ver);
484 unwind.quick_push (ver);
488 find_paths_to_names (e->src, new_interesting, overall_paths,
489 profit);
490 // Restore new_interesting.
491 for (int def : unwind)
492 bitmap_clear_bit (new_interesting, def);
493 unwind.truncate (0);
494 // Restore and m_imports.
495 for (int def : imports_unwind)
496 bitmap_clear_bit (m_imports, def);
497 imports_unwind.truncate (0);
500 /* m_imports tracks all interesting names on the path, so when
501 backtracking we have to restore it. */
502 for (int j : new_imports)
503 bitmap_clear_bit (m_imports, j);
505 else if (dump_file && (dump_flags & TDF_DETAILS))
506 fprintf (dump_file, " FAIL: Search space limit %d reached.\n",
507 param_max_jump_thread_paths);
509 // Reset things to their original state.
510 m_path.pop ();
511 m_visited_bbs.remove (bb);
514 // Search backwards from BB looking for paths where the final
515 // conditional maybe threaded to a successor block. Record such paths
516 // for jump threading.
518 void
519 back_threader::maybe_thread_block (basic_block bb)
521 if (EDGE_COUNT (bb->succs) <= 1)
522 return;
524 gimple *stmt = last_stmt (bb);
525 if (!stmt)
526 return;
528 enum gimple_code code = gimple_code (stmt);
529 tree name;
530 if (code == GIMPLE_SWITCH)
531 name = gimple_switch_index (as_a <gswitch *> (stmt));
532 else if (code == GIMPLE_COND)
533 name = gimple_cond_lhs (stmt);
534 else
535 return;
537 m_last_stmt = stmt;
538 m_visited_bbs.empty ();
539 m_path.truncate (0);
540 m_name = name;
542 // We compute imports of the path during discovery starting
543 // just with names used in the conditional.
544 bitmap_clear (m_imports);
545 ssa_op_iter iter;
546 FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
548 if (!gimple_range_ssa_p (name))
549 return;
550 bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
553 // Interesting is the set of imports we still not have see
554 // the definition of. So while imports only grow, the
555 // set of interesting defs dwindles and once empty we can
556 // stop searching.
557 auto_bitmap interesting;
558 bitmap_copy (interesting, m_imports);
559 back_threader_profitability profit (m_flags & BT_SPEED, stmt);
560 find_paths_to_names (bb, interesting, 1, profit);
563 DEBUG_FUNCTION void
564 debug (const vec <basic_block> &path)
566 dump_path (stderr, path);
567 fputc ('\n', stderr);
570 void
571 back_threader::dump (FILE *out)
573 fprintf (out, "\nCandidates for pre-computation:\n");
574 fprintf (out, "===================================\n");
576 bitmap_iterator bi;
577 unsigned i;
579 EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
581 tree name = ssa_name (i);
582 print_generic_expr (out, name, TDF_NONE);
583 fprintf (out, "\n");
587 void
588 back_threader::debug ()
590 dump (stderr);
593 /* Examine jump threading path PATH and return TRUE if it is possibly
594 profitable to thread it, otherwise return FALSE. If this function
595 returns TRUE profitable_path_p might not be satisfied but when
596 the path is extended it might be. In particular indicate in
597 *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
598 but would be OK if we extend the path to cover the loop backedge.
600 NAME is the SSA_NAME of the variable we found to have a constant
601 value on PATH. If unknown, SSA_NAME is NULL.
603 ?? It seems we should be able to loosen some of the restrictions in
604 this function after loop optimizations have run. */
606 bool
607 back_threader_profitability::possibly_profitable_path_p
608 (const vec<basic_block> &m_path, tree name,
609 bool *large_non_fsm)
611 gcc_checking_assert (!m_path.is_empty ());
613 /* We can an empty path here (excluding the DEF block) when the
614 statement that makes a conditional generate a compile-time
615 constant result is in the same block as the conditional.
617 That's not really a jump threading opportunity, but instead is
618 simple cprop & simplification. We could handle it here if we
619 wanted by wiring up all the incoming edges. If we run this
620 early in IPA, that might be worth doing. For now we just
621 reject that case. */
622 if (m_path.length () <= 1)
623 return false;
625 gimple_stmt_iterator gsi;
626 loop_p loop = m_path[0]->loop_father;
628 // We recompute the following, when we rewrite possibly_profitable_path_p
629 // to work incrementally on added BBs we have to unwind them on backtracking
630 m_n_insns = 0;
631 m_threaded_through_latch = false;
632 m_multiway_branch_in_path = false;
633 m_contains_hot_bb = false;
635 if (dump_file && (dump_flags & TDF_DETAILS))
636 fprintf (dump_file, "Checking profitability of path (backwards): ");
638 /* Count the number of instructions on the path: as these instructions
639 will have to be duplicated, we will not record the path if there
640 are too many instructions on the path. Also check that all the
641 blocks in the path belong to a single loop. */
642 for (unsigned j = 0; j < m_path.length (); j++)
644 basic_block bb = m_path[j];
646 if (dump_file && (dump_flags & TDF_DETAILS))
647 fprintf (dump_file, " bb:%i", bb->index);
648 /* Remember, blocks in the path are stored in opposite order in
649 the PATH array. The last entry in the array represents the
650 block with an outgoing edge that we will redirect to the jump
651 threading path. Thus we don't care how many statements are
652 in that block because it will not be copied or whether or not
653 it ends in a multiway branch. */
654 if (j < m_path.length () - 1)
656 int orig_n_insns = m_n_insns;
657 /* PHIs in the path will create degenerate PHIS in the
658 copied path which will then get propagated away, so
659 looking at just the duplicate path the PHIs would
660 seem unimportant.
662 But those PHIs, because they're assignments to objects
663 typically with lives that exist outside the thread path,
664 will tend to generate PHIs (or at least new PHI arguments)
665 at points where we leave the thread path and rejoin
666 the original blocks. So we do want to account for them.
668 We ignore virtual PHIs. We also ignore cases where BB
669 has a single incoming edge. That's the most common
670 degenerate PHI we'll see here. Finally we ignore PHIs
671 that are associated with the value we're tracking as
672 that object likely dies. */
673 if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1)
675 for (gphi_iterator gsip = gsi_start_phis (bb);
676 !gsi_end_p (gsip);
677 gsi_next (&gsip))
679 gphi *phi = gsip.phi ();
680 tree dst = gimple_phi_result (phi);
682 /* Note that if both NAME and DST are anonymous
683 SSA_NAMEs, then we do not have enough information
684 to consider them associated. */
685 if (dst != name
686 && name
687 && TREE_CODE (name) == SSA_NAME
688 && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name)
689 || !SSA_NAME_VAR (dst))
690 && !virtual_operand_p (dst))
691 ++m_n_insns;
695 if (!m_contains_hot_bb && m_speed_p)
696 m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
697 for (gsi = gsi_after_labels (bb);
698 !gsi_end_p (gsi);
699 gsi_next_nondebug (&gsi))
701 /* Do not allow OpenACC loop markers and __builtin_constant_p on
702 threading paths. The latter is disallowed, because an
703 expression might be constant on two threading paths, and
704 become non-constant (i.e.: phi) when they merge. */
705 gimple *stmt = gsi_stmt (gsi);
706 if (gimple_call_internal_p (stmt, IFN_UNIQUE)
707 || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
709 if (dump_file && (dump_flags & TDF_DETAILS))
710 fputc ('\n', dump_file);
711 return false;
713 /* Do not count empty statements and labels. */
714 if (gimple_code (stmt) != GIMPLE_NOP
715 && !is_gimple_debug (stmt))
716 m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
718 if (dump_file && (dump_flags & TDF_DETAILS))
719 fprintf (dump_file, " (%i insns)", m_n_insns-orig_n_insns);
721 /* We do not look at the block with the threaded branch
722 in this loop. So if any block with a last statement that
723 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
724 multiway branch on our path.
726 The block in PATH[0] is special, it's the block were we're
727 going to be able to eliminate its branch. */
728 if (j > 0)
730 gimple *last = last_stmt (bb);
731 if (last
732 && (gimple_code (last) == GIMPLE_SWITCH
733 || gimple_code (last) == GIMPLE_GOTO))
734 m_multiway_branch_in_path = true;
738 /* Note if we thread through the latch, we will want to include
739 the last entry in the array when determining if we thread
740 through the loop latch. */
741 if (loop->latch == bb)
743 m_threaded_through_latch = true;
744 if (dump_file && (dump_flags & TDF_DETAILS))
745 fprintf (dump_file, " (latch)");
749 /* We are going to remove the control statement at the end of the
750 last block in the threading path. So don't count it against our
751 statement count. */
752 m_n_insns -= m_exit_jump_benefit;
754 if (dump_file && (dump_flags & TDF_DETAILS))
755 fprintf (dump_file, "\n Control statement insns: %i\n"
756 " Overall: %i insns\n",
757 m_exit_jump_benefit, m_n_insns);
759 /* Threading is profitable if the path duplicated is hot but also
760 in a case we separate cold path from hot path and permit optimization
761 of the hot path later. Be on the agressive side here. In some testcases,
762 as in PR 78407 this leads to noticeable improvements. */
763 if (m_speed_p)
765 if (m_n_insns >= param_max_fsm_thread_path_insns)
767 if (dump_file && (dump_flags & TDF_DETAILS))
768 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
769 "the number of instructions on the path "
770 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
771 return false;
773 edge entry = find_edge (m_path[m_path.length () - 1],
774 m_path[m_path.length () - 2]);
775 if (probably_never_executed_edge_p (cfun, entry))
777 if (dump_file && (dump_flags & TDF_DETAILS))
778 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
779 "path entry is probably never executed.\n");
780 return false;
783 else if (m_n_insns > 1)
785 if (dump_file && (dump_flags & TDF_DETAILS))
786 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
787 "duplication of %i insns is needed and optimizing for size.\n",
788 m_n_insns);
789 return false;
792 /* The generic copier used by the backthreader does not re-use an
793 existing threading path to reduce code duplication. So for that
794 case, drastically reduce the number of statements we are allowed
795 to copy. We don't know yet whether we will thread through the latch
796 so we have to be permissive and continue threading, but indicate
797 to the caller the thread, if final, wouldn't be profitable. */
798 if ((!m_threaded_multiway_branch
799 || !loop->latch
800 || loop->latch->index == EXIT_BLOCK)
801 && (m_n_insns * param_fsm_scale_path_stmts
802 >= param_max_jump_thread_duplication_stmts))
804 if (dump_file && (dump_flags & TDF_DETAILS))
805 fprintf (dump_file,
806 " FAIL: Did not thread around loop and would copy too "
807 "many statements.\n");
808 return false;
810 *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
811 && (m_n_insns * param_fsm_scale_path_stmts
812 >= param_max_jump_thread_duplication_stmts));
814 if (dump_file && (dump_flags & TDF_DETAILS))
815 fputc ('\n', dump_file);
816 return true;
819 /* Examine jump threading path PATH and return TRUE if it is profitable to
820 thread it, otherwise return FALSE.
822 The taken edge out of the path is TAKEN_EDGE.
824 CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
825 would create an irreducible loop.
827 ?? It seems we should be able to loosen some of the restrictions in
828 this function after loop optimizations have run. */
830 bool
831 back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
832 edge taken_edge,
833 bool *creates_irreducible_loop)
835 // We can assume that possibly_profitable_path_p holds here
837 loop_p loop = m_path[0]->loop_father;
839 if (dump_file && (dump_flags & TDF_DETAILS))
840 fprintf (dump_file, "Checking profitability of path (backwards): ");
842 /* If this path threaded through the loop latch back into the
843 same loop and the destination does not dominate the loop
844 latch, then this thread would create an irreducible loop. */
845 *creates_irreducible_loop = false;
846 if (m_threaded_through_latch
847 && loop == taken_edge->dest->loop_father
848 && (determine_bb_domination_status (loop, taken_edge->dest)
849 == DOMST_NONDOMINATING))
850 *creates_irreducible_loop = true;
852 /* Threading is profitable if the path duplicated is hot but also
853 in a case we separate cold path from hot path and permit optimization
854 of the hot path later. Be on the agressive side here. In some testcases,
855 as in PR 78407 this leads to noticeable improvements. */
856 if (m_speed_p
857 && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
859 if (probably_never_executed_edge_p (cfun, taken_edge))
861 if (dump_file && (dump_flags & TDF_DETAILS))
862 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
863 "path leads to probably never executed edge.\n");
864 return false;
867 else if (m_n_insns > 1)
869 if (dump_file && (dump_flags & TDF_DETAILS))
870 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
871 "duplication of %i insns is needed and optimizing for size.\n",
872 m_n_insns);
873 return false;
876 /* We avoid creating irreducible inner loops unless we thread through
877 a multiway branch, in which case we have deemed it worth losing
878 other loop optimizations later.
880 We also consider it worth creating an irreducible inner loop if
881 the number of copied statement is low relative to the length of
882 the path -- in that case there's little the traditional loop
883 optimizer would have done anyway, so an irreducible loop is not
884 so bad. */
885 if (!m_threaded_multiway_branch
886 && *creates_irreducible_loop
887 && (m_n_insns * (unsigned) param_fsm_scale_path_stmts
888 > (m_path.length () *
889 (unsigned) param_fsm_scale_path_blocks)))
892 if (dump_file && (dump_flags & TDF_DETAILS))
893 fprintf (dump_file,
894 " FAIL: Would create irreducible loop without threading "
895 "multiway branch.\n");
896 /* We compute creates_irreducible_loop only late. */
897 return false;
900 /* The generic copier used by the backthreader does not re-use an
901 existing threading path to reduce code duplication. So for that
902 case, drastically reduce the number of statements we are allowed
903 to copy. */
904 if (!(m_threaded_through_latch && m_threaded_multiway_branch)
905 && (m_n_insns * param_fsm_scale_path_stmts
906 >= param_max_jump_thread_duplication_stmts))
908 if (dump_file && (dump_flags & TDF_DETAILS))
909 fprintf (dump_file,
910 " FAIL: Did not thread around loop and would copy too "
911 "many statements.\n");
912 return false;
915 /* When there is a multi-way branch on the path, then threading can
916 explode the CFG due to duplicating the edges for that multi-way
917 branch. So like above, only allow a multi-way branch on the path
918 if we actually thread a multi-way branch. */
919 if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
921 if (dump_file && (dump_flags & TDF_DETAILS))
922 fprintf (dump_file,
923 " FAIL: Thread through multiway branch without threading "
924 "a multiway branch.\n");
925 return false;
928 /* Threading through an empty latch would cause code to be added to
929 the latch. This could alter the loop form sufficiently to cause
930 loop optimizations to fail. Disable these threads until after
931 loop optimizations have run. */
932 if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
933 && !(cfun->curr_properties & PROP_loop_opts_done)
934 && empty_block_p (loop->latch))
936 if (dump_file && (dump_flags & TDF_DETAILS))
937 fprintf (dump_file,
938 " FAIL: Thread through latch before loop opts would create "
939 "non-empty latch\n");
940 return false;
942 if (dump_file && (dump_flags & TDF_DETAILS))
943 fputc ('\n', dump_file);
944 return true;
948 /* The current path PATH is a vector of blocks forming a jump threading
949 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
951 Convert the current path into the form used by register_jump_thread and
952 register it.
954 Return TRUE if successful or FALSE otherwise. */
956 bool
957 back_threader_registry::register_path (const vec<basic_block> &m_path,
958 edge taken_edge)
960 vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
962 // The generic copier ignores the edge type. We can build the
963 // thread edges with any type.
964 for (unsigned int j = 0; j + 1 < m_path.length (); j++)
966 basic_block bb1 = m_path[m_path.length () - j - 1];
967 basic_block bb2 = m_path[m_path.length () - j - 2];
969 edge e = find_edge (bb1, bb2);
970 gcc_assert (e);
971 push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
974 push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
975 return register_jump_thread (jump_thread_path);
978 // Thread all suitable paths in the current function.
980 // Return TODO_flags.
982 unsigned int
983 back_threader::thread_blocks ()
985 basic_block bb;
986 FOR_EACH_BB_FN (bb, m_fun)
987 if (EDGE_COUNT (bb->succs) > 1)
988 maybe_thread_block (bb);
990 bool changed = m_registry.thread_through_all_blocks (true);
992 if (m_flags & BT_SPEED)
993 return changed ? TODO_cleanup_cfg : 0;
995 return false;
998 namespace {
1000 const pass_data pass_data_early_thread_jumps =
1002 GIMPLE_PASS,
1003 "ethread",
1004 OPTGROUP_NONE,
1005 TV_TREE_SSA_THREAD_JUMPS,
1006 ( PROP_cfg | PROP_ssa ),
1010 ( TODO_cleanup_cfg | TODO_update_ssa ),
1013 const pass_data pass_data_thread_jumps =
1015 GIMPLE_PASS,
1016 "thread",
1017 OPTGROUP_NONE,
1018 TV_TREE_SSA_THREAD_JUMPS,
1019 ( PROP_cfg | PROP_ssa ),
1023 TODO_update_ssa,
1026 const pass_data pass_data_thread_jumps_full =
1028 GIMPLE_PASS,
1029 "threadfull",
1030 OPTGROUP_NONE,
1031 TV_TREE_SSA_THREAD_JUMPS,
1032 ( PROP_cfg | PROP_ssa ),
1036 TODO_update_ssa,
1039 // Early jump threading pass optimizing for size.
1040 class pass_early_thread_jumps : public gimple_opt_pass
1042 public:
1043 pass_early_thread_jumps (gcc::context *ctxt)
1044 : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
1047 opt_pass * clone () override
1049 return new pass_early_thread_jumps (m_ctxt);
1051 void set_pass_param (unsigned int, bool param) override
1053 m_first = param;
1055 bool gate (function *) override
1057 return flag_thread_jumps;
1059 unsigned int execute (function *fun) override
1061 back_threader threader (fun, BT_NONE, m_first);
1062 return threader.thread_blocks ();
1064 private:
1065 bool m_first;
1068 // Jump threading pass without resolving of unknown SSAs.
1069 class pass_thread_jumps : public gimple_opt_pass
1071 public:
1072 pass_thread_jumps (gcc::context *ctxt)
1073 : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1075 opt_pass * clone (void) override
1077 return new pass_thread_jumps (m_ctxt);
1079 void set_pass_param (unsigned int, bool param) override
1081 m_first = param;
1083 bool gate (function *) override
1085 return flag_thread_jumps && flag_expensive_optimizations;
1087 unsigned int execute (function *fun) override
1089 back_threader threader (fun, BT_SPEED, m_first);
1090 return threader.thread_blocks ();
1092 private:
1093 bool m_first;
1096 // Jump threading pass that fully resolves unknown SSAs.
1097 class pass_thread_jumps_full : public gimple_opt_pass
1099 public:
1100 pass_thread_jumps_full (gcc::context *ctxt)
1101 : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1103 opt_pass * clone (void) override
1105 return new pass_thread_jumps_full (m_ctxt);
1107 void set_pass_param (unsigned int, bool param) override
1109 m_first = param;
1111 bool gate (function *) override
1113 return flag_thread_jumps && flag_expensive_optimizations;
1115 unsigned int execute (function *fun) override
1117 back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1118 return threader.thread_blocks ();
1120 private:
1121 bool m_first;
1124 } // namespace {
1126 gimple_opt_pass *
1127 make_pass_thread_jumps (gcc::context *ctxt)
1129 return new pass_thread_jumps (ctxt);
1132 gimple_opt_pass *
1133 make_pass_thread_jumps_full (gcc::context *ctxt)
1135 return new pass_thread_jumps_full (ctxt);
1138 gimple_opt_pass *
1139 make_pass_early_thread_jumps (gcc::context *ctxt)
1141 return new pass_early_thread_jumps (ctxt);