middle-end: Fix ifcvt predicate generation for masked function calls
[official-gcc.git] / gcc / tree-ssa-threadbackward.cc
blob4bc72ec23755836aa995f9898ff0e303e6b69a1c
1 /* SSA Jump Threading
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #define INCLUDE_MEMORY
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "predict.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "fold-const.h"
29 #include "cfgloop.h"
30 #include "gimple-iterator.h"
31 #include "tree-cfg.h"
32 #include "tree-ssa-threadupdate.h"
33 #include "tree-ssa-loop.h"
34 #include "cfganal.h"
35 #include "tree-pass.h"
36 #include "gimple-ssa.h"
37 #include "tree-phinodes.h"
38 #include "tree-inline.h"
39 #include "tree-vectorizer.h"
40 #include "value-range.h"
41 #include "gimple-range.h"
42 #include "tree-ssa-threadedge.h"
43 #include "gimple-range-path.h"
44 #include "ssa.h"
45 #include "tree-cfgcleanup.h"
46 #include "tree-pretty-print.h"
47 #include "cfghooks.h"
48 #include "dbgcnt.h"
50 // Path registry for the backwards threader. After all paths have been
51 // registered with register_path(), thread_through_all_blocks() is called
52 // to modify the CFG.
54 class back_threader_registry : public back_jt_path_registry
56 public:
57 bool register_path (const vec<basic_block> &, edge taken);
60 // Class to abstract the profitability code for the backwards threader.
62 class back_threader_profitability
64 public:
65 back_threader_profitability (bool speed_p, gimple *stmt);
66 bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
67 bool profitable_path_p (const vec<basic_block> &,
68 edge taken, bool *irreducible_loop);
69 private:
70 const bool m_speed_p;
71 int m_exit_jump_benefit;
72 bool m_threaded_multiway_branch;
73 // The following are computed by possibly_profitable_path_p
74 bool m_threaded_through_latch;
75 bool m_multiway_branch_in_path;
76 bool m_contains_hot_bb;
77 int m_n_insns;
80 back_threader_profitability::back_threader_profitability (bool speed_p,
81 gimple *last)
82 : m_speed_p (speed_p)
84 m_threaded_multiway_branch = (gimple_code (last) == GIMPLE_SWITCH
85 || gimple_code (last) == GIMPLE_GOTO);
86 // The forward threader has estimate_threading_killed_stmts, in
87 // particular it estimates further DCE from eliminating the exit
88 // control stmt.
89 m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
92 // Back threader flags.
93 #define BT_NONE 0
94 // Generate fast code at the expense of code size.
95 #define BT_SPEED 1
96 // Resolve unknown SSAs on entry to a threading path. If set, use the
97 // ranger. If not, assume all ranges on entry to a path are VARYING.
98 #define BT_RESOLVE 2
100 class back_threader
102 public:
103 back_threader (function *fun, unsigned flags, bool first);
104 ~back_threader ();
105 unsigned thread_blocks ();
106 private:
107 void maybe_thread_block (basic_block bb);
108 bool debug_counter ();
109 edge maybe_register_path (back_threader_profitability &);
110 void maybe_register_path_dump (edge taken_edge);
111 void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
112 back_threader_profitability &);
113 edge find_taken_edge (const vec<basic_block> &path);
114 edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
115 edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
116 virtual void debug ();
117 virtual void dump (FILE *out);
119 back_threader_registry m_registry;
121 // Current path being analyzed.
122 auto_vec<basic_block> m_path;
123 // Hash to mark visited BBs while analyzing a path.
124 hash_set<basic_block> m_visited_bbs;
125 // The set of SSA names, any of which could potentially change the
126 // value of the final conditional in a path.
127 auto_bitmap m_imports;
128 // The last statement in the path.
129 gimple *m_last_stmt;
130 // Marker to differentiate unreachable edges.
131 static const edge UNREACHABLE_EDGE;
132 // Set to TRUE if unknown SSA names along a path should be resolved
133 // with the ranger. Otherwise, unknown SSA names are assumed to be
134 // VARYING. Setting to true is more precise but slower.
135 function *m_fun;
136 // Ranger for the path solver.
137 gimple_ranger *m_ranger;
138 unsigned m_flags;
139 // Set to TRUE for the first of each thread[12] pass or the first of
140 // each threadfull[12] pass. This is used to differentiate between
141 // the different threading passes so we can set up debug counters.
142 bool m_first;
145 // Used to differentiate unreachable edges, so we may stop the search
146 // in a the given direction.
147 const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
149 back_threader::back_threader (function *fun, unsigned flags, bool first)
150 : m_first (first)
152 if (flags & BT_SPEED)
153 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
154 else
155 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
157 m_fun = fun;
158 m_flags = flags;
159 m_last_stmt = NULL;
161 // The path solver needs EDGE_DFS_BACK in resolving mode.
162 if (flags & BT_RESOLVE)
163 mark_dfs_back_edges ();
165 m_ranger = new gimple_ranger;
168 back_threader::~back_threader ()
170 delete m_ranger;
171 loop_optimizer_finalize ();
174 // A wrapper for the various debug counters for the threading passes.
175 // Returns TRUE if it's OK to register the current threading
176 // candidate.
178 bool
179 back_threader::debug_counter ()
181 // The ethread pass is mostly harmless ;-).
182 if ((m_flags & BT_SPEED) == 0)
183 return true;
185 if (m_flags & BT_RESOLVE)
187 if (m_first && !dbg_cnt (back_threadfull1))
188 return false;
190 if (!m_first && !dbg_cnt (back_threadfull2))
191 return false;
193 else
195 if (m_first && !dbg_cnt (back_thread1))
196 return false;
198 if (!m_first && !dbg_cnt (back_thread2))
199 return false;
201 return true;
204 static void
205 dump_path (FILE *dump_file, const vec<basic_block> &path)
207 for (unsigned i = path.length (); i > 0; --i)
209 basic_block bb = path[i - 1];
210 fprintf (dump_file, "%d", bb->index);
211 if (i > 1)
212 fprintf (dump_file, "->");
216 // Dump details of an attempt to register a path.
218 void
219 back_threader::maybe_register_path_dump (edge taken)
221 if (m_path.is_empty ())
222 return;
224 fprintf (dump_file, "path: ");
225 dump_path (dump_file, m_path);
226 fprintf (dump_file, "->");
228 if (taken == UNREACHABLE_EDGE)
229 fprintf (dump_file, "xx REJECTED (unreachable)\n");
230 else if (taken)
231 fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
232 else
233 fprintf (dump_file, "xx REJECTED\n");
236 // If an outgoing edge can be determined out of the current path,
237 // register it for jump threading and return the taken edge.
239 // Return NULL if it is unprofitable to thread this path, or the
240 // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
241 // unreachable.
243 edge
244 back_threader::maybe_register_path (back_threader_profitability &profit)
246 edge taken_edge = find_taken_edge (m_path);
248 if (taken_edge && taken_edge != UNREACHABLE_EDGE)
250 bool irreducible = false;
251 if (profit.profitable_path_p (m_path, taken_edge, &irreducible)
252 && debug_counter ()
253 && m_registry.register_path (m_path, taken_edge))
255 if (irreducible)
256 vect_free_loop_info_assumptions (m_path[0]->loop_father);
258 else
259 taken_edge = NULL;
262 if (dump_file && (dump_flags & TDF_DETAILS))
263 maybe_register_path_dump (taken_edge);
265 return taken_edge;
268 // Return the known taken edge out of a path. If the path can be
269 // determined to be unreachable, return UNREACHABLE_EDGE. If no
270 // outgoing edge can be calculated, return NULL.
272 edge
273 back_threader::find_taken_edge (const vec<basic_block> &path)
275 gcc_checking_assert (path.length () > 1);
276 switch (gimple_code (m_last_stmt))
278 case GIMPLE_COND:
279 return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
281 case GIMPLE_SWITCH:
282 return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
284 default:
285 return NULL;
289 // Same as find_taken_edge, but for paths ending in a switch.
291 edge
292 back_threader::find_taken_edge_switch (const vec<basic_block> &path,
293 gswitch *sw)
295 tree name = gimple_switch_index (sw);
296 int_range_max r;
298 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
299 solver.range_of_expr (r, name, sw);
301 if (r.undefined_p ())
302 return UNREACHABLE_EDGE;
304 if (r.varying_p ())
305 return NULL;
307 tree label = find_case_label_range (sw, &r);
308 if (!label)
309 return NULL;
311 return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
314 // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
316 edge
317 back_threader::find_taken_edge_cond (const vec<basic_block> &path,
318 gcond *cond)
320 int_range_max r;
322 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
323 solver.range_of_stmt (r, cond);
325 if (solver.unreachable_path_p ())
326 return UNREACHABLE_EDGE;
328 int_range<2> true_range = range_true ();
329 int_range<2> false_range = range_false ();
331 if (r == true_range || r == false_range)
333 edge e_true, e_false;
334 basic_block bb = gimple_bb (cond);
335 extract_true_false_edges_from_block (bb, &e_true, &e_false);
336 return r == true_range ? e_true : e_false;
338 return NULL;
341 // Find jump threading paths to any of the SSA names in the
342 // INTERESTING bitmap, and register any such paths.
344 // BB is the current path being processed.
346 // OVERALL_PATHS is the search space up to this block
348 void
349 back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
350 unsigned overall_paths,
351 back_threader_profitability &profit)
353 if (m_visited_bbs.add (bb))
354 return;
356 m_path.safe_push (bb);
358 // Try to resolve the path without looking back. Avoid resolving paths
359 // we know are large but are not (yet) recognized as Finite State Machine.
360 // ??? Ideally we'd explore the cheapest path to the loop backedge here,
361 // avoiding the exponential greedy search and only start that from there.
362 // Precomputing a path-size-to-immediate-dominator-of-successor for each
363 // edge might help here. Alternatively copying divergent control flow
364 // on the way to the backedge could be worthwhile.
365 bool large_non_fsm;
366 if (m_path.length () > 1
367 && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
368 || (!large_non_fsm
369 && maybe_register_path (profit))))
372 // The backwards thread copier cannot copy blocks that do not belong
373 // to the same loop, so when the new source of the path entry no
374 // longer belongs to it we don't need to search further.
375 else if (m_path[0]->loop_father != bb->loop_father)
378 // Continue looking for ways to extend the path but limit the
379 // search space along a branch
380 else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
381 <= (unsigned)param_max_jump_thread_paths)
383 // For further greedy searching we want to remove interesting
384 // names defined in BB but add ones on the PHI edges for the
385 // respective edges and adding imports from those stmts.
386 // We do this by starting with all names
387 // not defined in BB as interesting, collecting a list of
388 // interesting PHIs in BB on the fly. Then we iterate over
389 // predecessor edges, adding interesting PHI edge defs to
390 // the set of interesting names to consider when processing it.
391 auto_bitmap new_interesting;
392 auto_vec<int, 16> new_imports;
393 auto_vec<gphi *, 4> interesting_phis;
394 bitmap_iterator bi;
395 unsigned i;
396 auto_vec<tree, 16> worklist;
397 EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
399 tree name = ssa_name (i);
400 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
401 /* Imports remain interesting. */
402 if (gimple_bb (def_stmt) != bb)
404 bitmap_set_bit (new_interesting, i);
405 continue;
407 worklist.quick_push (name);
408 while (!worklist.is_empty ())
410 tree name = worklist.pop ();
411 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
412 /* Newly discovered imports are interesting. */
413 if (gimple_bb (def_stmt) != bb)
415 bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
416 continue;
418 /* Local PHIs participate in renaming below. */
419 if (gphi *phi = dyn_cast<gphi *> (def_stmt))
421 tree res = gimple_phi_result (phi);
422 if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
423 interesting_phis.safe_push (phi);
425 /* For other local defs process their uses, amending
426 imports on the way. */
427 else
429 tree ssa[3];
430 unsigned lim = gimple_range_ssa_names (ssa, 3, def_stmt);
431 for (unsigned j = 0; j < lim; ++j)
433 tree rhs = ssa[j];
434 if (rhs
435 && bitmap_set_bit (m_imports,
436 SSA_NAME_VERSION (rhs)))
438 new_imports.safe_push (SSA_NAME_VERSION (rhs));
439 worklist.safe_push (rhs);
445 if (!bitmap_empty_p (new_interesting)
446 || !interesting_phis.is_empty ())
448 auto_vec<int, 4> unwind (interesting_phis.length ());
449 auto_vec<int, 4> imports_unwind (interesting_phis.length ());
450 edge_iterator iter;
451 edge e;
452 FOR_EACH_EDGE (e, iter, bb->preds)
454 if (e->flags & EDGE_ABNORMAL
455 // This is like path_crosses_loops in profitable_path_p but
456 // more restrictive to avoid peeling off loop iterations (see
457 // tree-ssa/pr14341.c for an example).
458 // ??? Note this restriction only applied when visiting an
459 // interesting PHI with the former resolve_phi.
460 || (!interesting_phis.is_empty ()
461 && m_path[0]->loop_father != e->src->loop_father))
462 continue;
463 for (gphi *phi : interesting_phis)
465 tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
466 if (TREE_CODE (def) == SSA_NAME)
468 int ver = SSA_NAME_VERSION (def);
469 if (bitmap_set_bit (new_interesting, ver))
471 if (bitmap_set_bit (m_imports, ver))
472 imports_unwind.quick_push (ver);
473 unwind.quick_push (ver);
477 find_paths_to_names (e->src, new_interesting, overall_paths,
478 profit);
479 // Restore new_interesting.
480 for (int def : unwind)
481 bitmap_clear_bit (new_interesting, def);
482 unwind.truncate (0);
483 // Restore and m_imports.
484 for (int def : imports_unwind)
485 bitmap_clear_bit (m_imports, def);
486 imports_unwind.truncate (0);
489 /* m_imports tracks all interesting names on the path, so when
490 backtracking we have to restore it. */
491 for (int j : new_imports)
492 bitmap_clear_bit (m_imports, j);
494 else if (dump_file && (dump_flags & TDF_DETAILS))
495 fprintf (dump_file, " FAIL: Search space limit %d reached.\n",
496 param_max_jump_thread_paths);
498 // Reset things to their original state.
499 m_path.pop ();
500 m_visited_bbs.remove (bb);
503 // Search backwards from BB looking for paths where the final
504 // conditional maybe threaded to a successor block. Record such paths
505 // for jump threading.
507 void
508 back_threader::maybe_thread_block (basic_block bb)
510 if (EDGE_COUNT (bb->succs) <= 1)
511 return;
513 gimple *stmt = *gsi_last_bb (bb);
514 if (!stmt)
515 return;
517 enum gimple_code code = gimple_code (stmt);
518 if (code != GIMPLE_SWITCH
519 && code != GIMPLE_COND)
520 return;
522 m_last_stmt = stmt;
523 m_visited_bbs.empty ();
524 m_path.truncate (0);
526 // We compute imports of the path during discovery starting
527 // just with names used in the conditional.
528 bitmap_clear (m_imports);
529 ssa_op_iter iter;
530 tree name;
531 FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
533 if (!gimple_range_ssa_p (name))
534 return;
535 bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
538 // Interesting is the set of imports we still not have see
539 // the definition of. So while imports only grow, the
540 // set of interesting defs dwindles and once empty we can
541 // stop searching.
542 auto_bitmap interesting;
543 bitmap_copy (interesting, m_imports);
544 back_threader_profitability profit (m_flags & BT_SPEED, stmt);
545 find_paths_to_names (bb, interesting, 1, profit);
548 DEBUG_FUNCTION void
549 debug (const vec <basic_block> &path)
551 dump_path (stderr, path);
552 fputc ('\n', stderr);
555 void
556 back_threader::dump (FILE *out)
558 fprintf (out, "\nCandidates for pre-computation:\n");
559 fprintf (out, "===================================\n");
561 bitmap_iterator bi;
562 unsigned i;
564 EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
566 tree name = ssa_name (i);
567 print_generic_expr (out, name, TDF_NONE);
568 fprintf (out, "\n");
572 void
573 back_threader::debug ()
575 dump (stderr);
578 /* Examine jump threading path PATH and return TRUE if it is possibly
579 profitable to thread it, otherwise return FALSE. If this function
580 returns TRUE profitable_path_p might not be satisfied but when
581 the path is extended it might be. In particular indicate in
582 *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
583 but would be OK if we extend the path to cover the loop backedge.
585 ?? It seems we should be able to loosen some of the restrictions in
586 this function after loop optimizations have run. */
588 bool
589 back_threader_profitability::possibly_profitable_path_p
590 (const vec<basic_block> &m_path,
591 bool *large_non_fsm)
593 gcc_checking_assert (!m_path.is_empty ());
595 /* We can an empty path here (excluding the DEF block) when the
596 statement that makes a conditional generate a compile-time
597 constant result is in the same block as the conditional.
599 That's not really a jump threading opportunity, but instead is
600 simple cprop & simplification. We could handle it here if we
601 wanted by wiring up all the incoming edges. If we run this
602 early in IPA, that might be worth doing. For now we just
603 reject that case. */
604 if (m_path.length () <= 1)
605 return false;
607 gimple_stmt_iterator gsi;
608 loop_p loop = m_path[0]->loop_father;
610 // We recompute the following, when we rewrite possibly_profitable_path_p
611 // to work incrementally on added BBs we have to unwind them on backtracking
612 m_n_insns = 0;
613 m_threaded_through_latch = false;
614 m_multiway_branch_in_path = false;
615 m_contains_hot_bb = false;
617 if (dump_file && (dump_flags & TDF_DETAILS))
618 fprintf (dump_file, "Checking profitability of path (backwards): ");
620 /* Count the number of instructions on the path: as these instructions
621 will have to be duplicated, we will not record the path if there
622 are too many instructions on the path. Also check that all the
623 blocks in the path belong to a single loop. */
624 for (unsigned j = 0; j < m_path.length (); j++)
626 basic_block bb = m_path[j];
628 if (dump_file && (dump_flags & TDF_DETAILS))
629 fprintf (dump_file, " bb:%i", bb->index);
630 /* Remember, blocks in the path are stored in opposite order in
631 the PATH array. The last entry in the array represents the
632 block with an outgoing edge that we will redirect to the jump
633 threading path. Thus we don't care how many statements are
634 in that block because it will not be copied or whether or not
635 it ends in a multiway branch. */
636 if (j < m_path.length () - 1)
638 int orig_n_insns = m_n_insns;
639 if (!m_contains_hot_bb && m_speed_p)
640 m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
641 for (gsi = gsi_after_labels (bb);
642 !gsi_end_p (gsi);
643 gsi_next_nondebug (&gsi))
645 /* Do not allow OpenACC loop markers and __builtin_constant_p on
646 threading paths. The latter is disallowed, because an
647 expression might be constant on two threading paths, and
648 become non-constant (i.e.: phi) when they merge. */
649 gimple *stmt = gsi_stmt (gsi);
650 if (gimple_call_internal_p (stmt, IFN_UNIQUE)
651 || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
653 if (dump_file && (dump_flags & TDF_DETAILS))
654 fputc ('\n', dump_file);
655 return false;
657 /* Do not count empty statements and labels. */
658 if (gimple_code (stmt) != GIMPLE_NOP
659 && !is_gimple_debug (stmt))
660 m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
662 if (dump_file && (dump_flags & TDF_DETAILS))
663 fprintf (dump_file, " (%i insns)", m_n_insns-orig_n_insns);
665 /* We do not look at the block with the threaded branch
666 in this loop. So if any block with a last statement that
667 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
668 multiway branch on our path.
670 The block in PATH[0] is special, it's the block were we're
671 going to be able to eliminate its branch. */
672 if (j > 0)
674 gimple *last = *gsi_last_bb (bb);
675 if (last
676 && (gimple_code (last) == GIMPLE_SWITCH
677 || gimple_code (last) == GIMPLE_GOTO))
678 m_multiway_branch_in_path = true;
682 /* Note if we thread through the latch, we will want to include
683 the last entry in the array when determining if we thread
684 through the loop latch. */
685 if (loop->latch == bb)
687 m_threaded_through_latch = true;
688 if (dump_file && (dump_flags & TDF_DETAILS))
689 fprintf (dump_file, " (latch)");
693 /* We are going to remove the control statement at the end of the
694 last block in the threading path. So don't count it against our
695 statement count. */
696 m_n_insns -= m_exit_jump_benefit;
698 if (dump_file && (dump_flags & TDF_DETAILS))
699 fprintf (dump_file, "\n Control statement insns: %i\n"
700 " Overall: %i insns\n",
701 m_exit_jump_benefit, m_n_insns);
703 /* Threading is profitable if the path duplicated is hot but also
704 in a case we separate cold path from hot path and permit optimization
705 of the hot path later. Be on the agressive side here. In some testcases,
706 as in PR 78407 this leads to noticeable improvements. */
707 if (m_speed_p)
709 if (m_n_insns >= param_max_fsm_thread_path_insns)
711 if (dump_file && (dump_flags & TDF_DETAILS))
712 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
713 "the number of instructions on the path "
714 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
715 return false;
717 edge entry = find_edge (m_path[m_path.length () - 1],
718 m_path[m_path.length () - 2]);
719 if (probably_never_executed_edge_p (cfun, entry))
721 if (dump_file && (dump_flags & TDF_DETAILS))
722 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
723 "path entry is probably never executed.\n");
724 return false;
727 else if (m_n_insns > 1)
729 if (dump_file && (dump_flags & TDF_DETAILS))
730 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
731 "duplication of %i insns is needed and optimizing for size.\n",
732 m_n_insns);
733 return false;
736 /* The generic copier used by the backthreader does not re-use an
737 existing threading path to reduce code duplication. So for that
738 case, drastically reduce the number of statements we are allowed
739 to copy. We don't know yet whether we will thread through the latch
740 so we have to be permissive and continue threading, but indicate
741 to the caller the thread, if final, wouldn't be profitable. */
742 if ((!m_threaded_multiway_branch
743 || !loop->latch
744 || loop->latch->index == EXIT_BLOCK)
745 && (m_n_insns * param_fsm_scale_path_stmts
746 >= param_max_jump_thread_duplication_stmts))
748 if (dump_file && (dump_flags & TDF_DETAILS))
749 fprintf (dump_file,
750 " FAIL: Did not thread around loop and would copy too "
751 "many statements.\n");
752 return false;
754 *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
755 && (m_n_insns * param_fsm_scale_path_stmts
756 >= param_max_jump_thread_duplication_stmts));
758 if (dump_file && (dump_flags & TDF_DETAILS))
759 fputc ('\n', dump_file);
760 return true;
763 /* Examine jump threading path PATH and return TRUE if it is profitable to
764 thread it, otherwise return FALSE.
766 The taken edge out of the path is TAKEN_EDGE.
768 CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
769 would create an irreducible loop.
771 ?? It seems we should be able to loosen some of the restrictions in
772 this function after loop optimizations have run. */
774 bool
775 back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
776 edge taken_edge,
777 bool *creates_irreducible_loop)
779 // We can assume that possibly_profitable_path_p holds here
781 loop_p loop = m_path[0]->loop_father;
783 if (dump_file && (dump_flags & TDF_DETAILS))
784 fprintf (dump_file, "Checking profitability of path (backwards): ");
786 /* If this path threaded through the loop latch back into the
787 same loop and the destination does not dominate the loop
788 latch, then this thread would create an irreducible loop. */
789 *creates_irreducible_loop = false;
790 if (m_threaded_through_latch
791 && loop == taken_edge->dest->loop_father
792 && (determine_bb_domination_status (loop, taken_edge->dest)
793 == DOMST_NONDOMINATING))
794 *creates_irreducible_loop = true;
796 /* Threading is profitable if the path duplicated is hot but also
797 in a case we separate cold path from hot path and permit optimization
798 of the hot path later. Be on the agressive side here. In some testcases,
799 as in PR 78407 this leads to noticeable improvements. */
800 if (m_speed_p
801 && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
803 if (probably_never_executed_edge_p (cfun, taken_edge))
805 if (dump_file && (dump_flags & TDF_DETAILS))
806 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
807 "path leads to probably never executed edge.\n");
808 return false;
811 else if (m_n_insns > 1)
813 if (dump_file && (dump_flags & TDF_DETAILS))
814 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
815 "duplication of %i insns is needed and optimizing for size.\n",
816 m_n_insns);
817 return false;
820 /* We avoid creating irreducible inner loops unless we thread through
821 a multiway branch, in which case we have deemed it worth losing
822 other loop optimizations later.
824 We also consider it worth creating an irreducible inner loop after
825 loop optimizations if the number of copied statement is low. */
826 if (!m_threaded_multiway_branch
827 && *creates_irreducible_loop
828 && (!(cfun->curr_properties & PROP_loop_opts_done)
829 || (m_n_insns * param_fsm_scale_path_stmts
830 >= param_max_jump_thread_duplication_stmts)))
832 if (dump_file && (dump_flags & TDF_DETAILS))
833 fprintf (dump_file,
834 " FAIL: Would create irreducible loop early without "
835 "threading multiway branch.\n");
836 /* We compute creates_irreducible_loop only late. */
837 return false;
840 /* The generic copier used by the backthreader does not re-use an
841 existing threading path to reduce code duplication. So for that
842 case, drastically reduce the number of statements we are allowed
843 to copy. */
844 if (!(m_threaded_through_latch && m_threaded_multiway_branch)
845 && (m_n_insns * param_fsm_scale_path_stmts
846 >= param_max_jump_thread_duplication_stmts))
848 if (dump_file && (dump_flags & TDF_DETAILS))
849 fprintf (dump_file,
850 " FAIL: Did not thread around loop and would copy too "
851 "many statements.\n");
852 return false;
855 /* When there is a multi-way branch on the path, then threading can
856 explode the CFG due to duplicating the edges for that multi-way
857 branch. So like above, only allow a multi-way branch on the path
858 if we actually thread a multi-way branch. */
859 if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
861 if (dump_file && (dump_flags & TDF_DETAILS))
862 fprintf (dump_file,
863 " FAIL: Thread through multiway branch without threading "
864 "a multiway branch.\n");
865 return false;
868 /* Threading through an empty latch would cause code to be added to
869 the latch. This could alter the loop form sufficiently to cause
870 loop optimizations to fail. Disable these threads until after
871 loop optimizations have run. */
872 if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
873 && !(cfun->curr_properties & PROP_loop_opts_done)
874 && empty_block_p (loop->latch))
876 if (dump_file && (dump_flags & TDF_DETAILS))
877 fprintf (dump_file,
878 " FAIL: Thread through latch before loop opts would create "
879 "non-empty latch\n");
880 return false;
882 if (dump_file && (dump_flags & TDF_DETAILS))
883 fputc ('\n', dump_file);
884 return true;
888 /* The current path PATH is a vector of blocks forming a jump threading
889 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
891 Convert the current path into the form used by register_jump_thread and
892 register it.
894 Return TRUE if successful or FALSE otherwise. */
896 bool
897 back_threader_registry::register_path (const vec<basic_block> &m_path,
898 edge taken_edge)
900 vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
902 // The generic copier ignores the edge type. We can build the
903 // thread edges with any type.
904 for (unsigned int j = 0; j + 1 < m_path.length (); j++)
906 basic_block bb1 = m_path[m_path.length () - j - 1];
907 basic_block bb2 = m_path[m_path.length () - j - 2];
909 edge e = find_edge (bb1, bb2);
910 gcc_assert (e);
911 push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
914 push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
915 return register_jump_thread (jump_thread_path);
918 // Thread all suitable paths in the current function.
920 // Return TODO_flags.
922 unsigned int
923 back_threader::thread_blocks ()
925 basic_block bb;
926 FOR_EACH_BB_FN (bb, m_fun)
927 if (EDGE_COUNT (bb->succs) > 1)
928 maybe_thread_block (bb);
930 bool changed = m_registry.thread_through_all_blocks (true);
932 if (m_flags & BT_SPEED)
933 return changed ? TODO_cleanup_cfg : 0;
935 return false;
938 namespace {
940 const pass_data pass_data_early_thread_jumps =
942 GIMPLE_PASS,
943 "ethread",
944 OPTGROUP_NONE,
945 TV_TREE_SSA_THREAD_JUMPS,
946 ( PROP_cfg | PROP_ssa ),
950 ( TODO_cleanup_cfg | TODO_update_ssa ),
953 const pass_data pass_data_thread_jumps =
955 GIMPLE_PASS,
956 "thread",
957 OPTGROUP_NONE,
958 TV_TREE_SSA_THREAD_JUMPS,
959 ( PROP_cfg | PROP_ssa ),
963 TODO_update_ssa,
966 const pass_data pass_data_thread_jumps_full =
968 GIMPLE_PASS,
969 "threadfull",
970 OPTGROUP_NONE,
971 TV_TREE_SSA_THREAD_JUMPS,
972 ( PROP_cfg | PROP_ssa ),
976 TODO_update_ssa,
979 // Early jump threading pass optimizing for size.
980 class pass_early_thread_jumps : public gimple_opt_pass
982 public:
983 pass_early_thread_jumps (gcc::context *ctxt)
984 : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
987 opt_pass * clone () override
989 return new pass_early_thread_jumps (m_ctxt);
991 void set_pass_param (unsigned int, bool param) override
993 m_first = param;
995 bool gate (function *) override
997 return flag_thread_jumps;
999 unsigned int execute (function *fun) override
1001 back_threader threader (fun, BT_NONE, m_first);
1002 return threader.thread_blocks ();
1004 private:
1005 bool m_first;
1008 // Jump threading pass without resolving of unknown SSAs.
1009 class pass_thread_jumps : public gimple_opt_pass
1011 public:
1012 pass_thread_jumps (gcc::context *ctxt)
1013 : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1015 opt_pass * clone (void) override
1017 return new pass_thread_jumps (m_ctxt);
1019 void set_pass_param (unsigned int, bool param) override
1021 m_first = param;
1023 bool gate (function *) override
1025 return flag_thread_jumps && flag_expensive_optimizations;
1027 unsigned int execute (function *fun) override
1029 back_threader threader (fun, BT_SPEED, m_first);
1030 return threader.thread_blocks ();
1032 private:
1033 bool m_first;
1036 // Jump threading pass that fully resolves unknown SSAs.
1037 class pass_thread_jumps_full : public gimple_opt_pass
1039 public:
1040 pass_thread_jumps_full (gcc::context *ctxt)
1041 : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1043 opt_pass * clone (void) override
1045 return new pass_thread_jumps_full (m_ctxt);
1047 void set_pass_param (unsigned int, bool param) override
1049 m_first = param;
1051 bool gate (function *) override
1053 return flag_thread_jumps && flag_expensive_optimizations;
1055 unsigned int execute (function *fun) override
1057 back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1058 return threader.thread_blocks ();
1060 private:
1061 bool m_first;
1064 } // namespace {
1066 gimple_opt_pass *
1067 make_pass_thread_jumps (gcc::context *ctxt)
1069 return new pass_thread_jumps (ctxt);
1072 gimple_opt_pass *
1073 make_pass_thread_jumps_full (gcc::context *ctxt)
1075 return new pass_thread_jumps_full (ctxt);
1078 gimple_opt_pass *
1079 make_pass_early_thread_jumps (gcc::context *ctxt)
1081 return new pass_early_thread_jumps (ctxt);