c++: ttp TEMPLATE_DECL equivalence [PR112737]
[official-gcc.git] / gcc / tree-ssa-threadbackward.cc
blobea8d7b882d0823559880176e3691e9f2191c0695
1 /* SSA Jump Threading
2 Copyright (C) 2005-2024 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "predict.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "fold-const.h"
28 #include "cfgloop.h"
29 #include "gimple-iterator.h"
30 #include "tree-cfg.h"
31 #include "tree-ssa-threadupdate.h"
32 #include "tree-ssa-loop.h"
33 #include "cfganal.h"
34 #include "tree-pass.h"
35 #include "gimple-ssa.h"
36 #include "tree-phinodes.h"
37 #include "tree-inline.h"
38 #include "tree-vectorizer.h"
39 #include "value-range.h"
40 #include "gimple-range.h"
41 #include "tree-ssa-threadedge.h"
42 #include "gimple-range-path.h"
43 #include "ssa.h"
44 #include "tree-cfgcleanup.h"
45 #include "tree-pretty-print.h"
46 #include "cfghooks.h"
47 #include "dbgcnt.h"
49 // Path registry for the backwards threader. After all paths have been
50 // registered with register_path(), thread_through_all_blocks() is called
51 // to modify the CFG.
53 class back_threader_registry : public back_jt_path_registry
55 public:
56 bool register_path (const vec<basic_block> &, edge taken);
59 // Class to abstract the profitability code for the backwards threader.
61 class back_threader_profitability
63 public:
64 back_threader_profitability (bool speed_p, gimple *stmt);
65 bool possibly_profitable_path_p (const vec<basic_block> &, bool *);
66 bool profitable_path_p (const vec<basic_block> &,
67 edge taken, bool *irreducible_loop);
68 private:
69 const bool m_speed_p;
70 int m_exit_jump_benefit;
71 bool m_threaded_multiway_branch;
72 // The following are computed by possibly_profitable_path_p
73 bool m_threaded_through_latch;
74 bool m_multiway_branch_in_path;
75 bool m_contains_hot_bb;
76 int m_n_insns;
79 back_threader_profitability::back_threader_profitability (bool speed_p,
80 gimple *last)
81 : m_speed_p (speed_p)
83 m_threaded_multiway_branch = (gimple_code (last) == GIMPLE_SWITCH
84 || gimple_code (last) == GIMPLE_GOTO);
85 // The forward threader has estimate_threading_killed_stmts, in
86 // particular it estimates further DCE from eliminating the exit
87 // control stmt.
88 m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
91 // Back threader flags.
92 #define BT_NONE 0
93 // Generate fast code at the expense of code size.
94 #define BT_SPEED 1
95 // Resolve unknown SSAs on entry to a threading path. If set, use the
96 // ranger. If not, assume all ranges on entry to a path are VARYING.
97 #define BT_RESOLVE 2
99 class back_threader
101 public:
102 back_threader (function *fun, unsigned flags, bool first);
103 ~back_threader ();
104 unsigned thread_blocks ();
105 private:
106 void maybe_thread_block (basic_block bb);
107 bool debug_counter ();
108 edge maybe_register_path (back_threader_profitability &);
109 void maybe_register_path_dump (edge taken_edge);
110 void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
111 back_threader_profitability &);
112 edge find_taken_edge (const vec<basic_block> &path);
113 edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
114 edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
115 virtual void debug ();
116 virtual void dump (FILE *out);
118 back_threader_registry m_registry;
120 // Current path being analyzed.
121 auto_vec<basic_block> m_path;
122 // Hash to mark visited BBs while analyzing a path.
123 hash_set<basic_block> m_visited_bbs;
124 // The set of SSA names, any of which could potentially change the
125 // value of the final conditional in a path.
126 auto_bitmap m_imports;
127 // The last statement in the path.
128 gimple *m_last_stmt;
129 // Marker to differentiate unreachable edges.
130 static const edge UNREACHABLE_EDGE;
131 // Set to TRUE if unknown SSA names along a path should be resolved
132 // with the ranger. Otherwise, unknown SSA names are assumed to be
133 // VARYING. Setting to true is more precise but slower.
134 function *m_fun;
135 // Ranger for the path solver.
136 gimple_ranger *m_ranger;
137 unsigned m_flags;
138 // Set to TRUE for the first of each thread[12] pass or the first of
139 // each threadfull[12] pass. This is used to differentiate between
140 // the different threading passes so we can set up debug counters.
141 bool m_first;
144 // Used to differentiate unreachable edges, so we may stop the search
145 // in a the given direction.
146 const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
148 back_threader::back_threader (function *fun, unsigned flags, bool first)
149 : m_first (first)
151 if (flags & BT_SPEED)
152 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
153 else
154 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
156 m_fun = fun;
157 m_flags = flags;
158 m_last_stmt = NULL;
160 // The path solver needs EDGE_DFS_BACK in resolving mode.
161 if (flags & BT_RESOLVE)
162 mark_dfs_back_edges ();
164 m_ranger = new gimple_ranger;
167 back_threader::~back_threader ()
169 delete m_ranger;
170 loop_optimizer_finalize ();
173 // A wrapper for the various debug counters for the threading passes.
174 // Returns TRUE if it's OK to register the current threading
175 // candidate.
177 bool
178 back_threader::debug_counter ()
180 // The ethread pass is mostly harmless ;-).
181 if ((m_flags & BT_SPEED) == 0)
182 return true;
184 if (m_flags & BT_RESOLVE)
186 if (m_first && !dbg_cnt (back_threadfull1))
187 return false;
189 if (!m_first && !dbg_cnt (back_threadfull2))
190 return false;
192 else
194 if (m_first && !dbg_cnt (back_thread1))
195 return false;
197 if (!m_first && !dbg_cnt (back_thread2))
198 return false;
200 return true;
203 static void
204 dump_path (FILE *dump_file, const vec<basic_block> &path)
206 for (unsigned i = path.length (); i > 0; --i)
208 basic_block bb = path[i - 1];
209 fprintf (dump_file, "%d", bb->index);
210 if (i > 1)
211 fprintf (dump_file, "->");
215 // Dump details of an attempt to register a path.
217 void
218 back_threader::maybe_register_path_dump (edge taken)
220 if (m_path.is_empty ())
221 return;
223 fprintf (dump_file, "path: ");
224 dump_path (dump_file, m_path);
225 fprintf (dump_file, "->");
227 if (taken == UNREACHABLE_EDGE)
228 fprintf (dump_file, "xx REJECTED (unreachable)\n");
229 else if (taken)
230 fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
231 else
232 fprintf (dump_file, "xx REJECTED\n");
235 // If an outgoing edge can be determined out of the current path,
236 // register it for jump threading and return the taken edge.
238 // Return NULL if it is unprofitable to thread this path, or the
239 // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
240 // unreachable.
242 edge
243 back_threader::maybe_register_path (back_threader_profitability &profit)
245 edge taken_edge = find_taken_edge (m_path);
247 if (taken_edge && taken_edge != UNREACHABLE_EDGE)
249 bool irreducible = false;
250 if (profit.profitable_path_p (m_path, taken_edge, &irreducible)
251 && debug_counter ()
252 && m_registry.register_path (m_path, taken_edge))
254 if (irreducible)
255 vect_free_loop_info_assumptions (m_path[0]->loop_father);
257 else
258 taken_edge = NULL;
261 if (dump_file && (dump_flags & TDF_DETAILS))
262 maybe_register_path_dump (taken_edge);
264 return taken_edge;
267 // Return the known taken edge out of a path. If the path can be
268 // determined to be unreachable, return UNREACHABLE_EDGE. If no
269 // outgoing edge can be calculated, return NULL.
271 edge
272 back_threader::find_taken_edge (const vec<basic_block> &path)
274 gcc_checking_assert (path.length () > 1);
275 switch (gimple_code (m_last_stmt))
277 case GIMPLE_COND:
278 return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
280 case GIMPLE_SWITCH:
281 return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
283 default:
284 return NULL;
288 // Same as find_taken_edge, but for paths ending in a switch.
290 edge
291 back_threader::find_taken_edge_switch (const vec<basic_block> &path,
292 gswitch *sw)
294 tree name = gimple_switch_index (sw);
295 int_range_max r;
297 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
298 solver.range_of_expr (r, name, sw);
300 if (r.undefined_p ())
301 return UNREACHABLE_EDGE;
303 if (r.varying_p ())
304 return NULL;
306 tree label = find_case_label_range (sw, &r);
307 if (!label)
308 return NULL;
310 return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
313 // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
315 edge
316 back_threader::find_taken_edge_cond (const vec<basic_block> &path,
317 gcond *cond)
319 int_range_max r;
321 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
322 solver.range_of_stmt (r, cond);
324 if (solver.unreachable_path_p ())
325 return UNREACHABLE_EDGE;
327 int_range<2> true_range = range_true ();
328 int_range<2> false_range = range_false ();
330 if (r == true_range || r == false_range)
332 edge e_true, e_false;
333 basic_block bb = gimple_bb (cond);
334 extract_true_false_edges_from_block (bb, &e_true, &e_false);
335 return r == true_range ? e_true : e_false;
337 return NULL;
340 // Find jump threading paths to any of the SSA names in the
341 // INTERESTING bitmap, and register any such paths.
343 // BB is the current path being processed.
345 // OVERALL_PATHS is the search space up to this block
347 void
348 back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
349 unsigned overall_paths,
350 back_threader_profitability &profit)
352 if (m_visited_bbs.add (bb))
353 return;
355 m_path.safe_push (bb);
357 // Try to resolve the path without looking back. Avoid resolving paths
358 // we know are large but are not (yet) recognized as Finite State Machine.
359 // ??? Ideally we'd explore the cheapest path to the loop backedge here,
360 // avoiding the exponential greedy search and only start that from there.
361 // Precomputing a path-size-to-immediate-dominator-of-successor for each
362 // edge might help here. Alternatively copying divergent control flow
363 // on the way to the backedge could be worthwhile.
364 bool large_non_fsm;
365 if (m_path.length () > 1
366 && (!profit.possibly_profitable_path_p (m_path, &large_non_fsm)
367 || (!large_non_fsm
368 && maybe_register_path (profit))))
371 // The backwards thread copier cannot copy blocks that do not belong
372 // to the same loop, so when the new source of the path entry no
373 // longer belongs to it we don't need to search further.
374 else if (m_path[0]->loop_father != bb->loop_father)
377 // Continue looking for ways to extend the path but limit the
378 // search space along a branch
379 else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
380 <= (unsigned)param_max_jump_thread_paths)
382 // For further greedy searching we want to remove interesting
383 // names defined in BB but add ones on the PHI edges for the
384 // respective edges and adding imports from those stmts.
385 // We do this by starting with all names
386 // not defined in BB as interesting, collecting a list of
387 // interesting PHIs in BB on the fly. Then we iterate over
388 // predecessor edges, adding interesting PHI edge defs to
389 // the set of interesting names to consider when processing it.
390 auto_bitmap new_interesting;
391 auto_vec<int, 16> new_imports;
392 auto_vec<gphi *, 4> interesting_phis;
393 bitmap_iterator bi;
394 unsigned i;
395 auto_vec<tree, 16> worklist;
396 EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
398 tree name = ssa_name (i);
399 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
400 /* Imports remain interesting. */
401 if (gimple_bb (def_stmt) != bb)
403 bitmap_set_bit (new_interesting, i);
404 continue;
406 worklist.quick_push (name);
407 while (!worklist.is_empty ())
409 tree name = worklist.pop ();
410 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
411 /* Newly discovered imports are interesting. */
412 if (gimple_bb (def_stmt) != bb)
414 bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
415 continue;
417 /* Local PHIs participate in renaming below. */
418 if (gphi *phi = dyn_cast<gphi *> (def_stmt))
420 tree res = gimple_phi_result (phi);
421 if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
422 interesting_phis.safe_push (phi);
424 /* For other local defs process their uses, amending
425 imports on the way. */
426 else
428 tree ssa[3];
429 unsigned lim = gimple_range_ssa_names (ssa, 3, def_stmt);
430 for (unsigned j = 0; j < lim; ++j)
432 tree rhs = ssa[j];
433 if (rhs
434 && bitmap_set_bit (m_imports,
435 SSA_NAME_VERSION (rhs)))
437 new_imports.safe_push (SSA_NAME_VERSION (rhs));
438 worklist.safe_push (rhs);
444 if (!bitmap_empty_p (new_interesting)
445 || !interesting_phis.is_empty ())
447 auto_vec<int, 4> unwind (interesting_phis.length ());
448 auto_vec<int, 4> imports_unwind (interesting_phis.length ());
449 edge_iterator iter;
450 edge e;
451 FOR_EACH_EDGE (e, iter, bb->preds)
453 if (e->flags & EDGE_ABNORMAL
454 // This is like path_crosses_loops in profitable_path_p but
455 // more restrictive to avoid peeling off loop iterations (see
456 // tree-ssa/pr14341.c for an example).
457 // ??? Note this restriction only applied when visiting an
458 // interesting PHI with the former resolve_phi.
459 || (!interesting_phis.is_empty ()
460 && m_path[0]->loop_father != e->src->loop_father))
461 continue;
462 for (gphi *phi : interesting_phis)
464 tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
465 if (TREE_CODE (def) == SSA_NAME)
467 int ver = SSA_NAME_VERSION (def);
468 if (bitmap_set_bit (new_interesting, ver))
470 if (bitmap_set_bit (m_imports, ver))
471 imports_unwind.quick_push (ver);
472 unwind.quick_push (ver);
476 find_paths_to_names (e->src, new_interesting, overall_paths,
477 profit);
478 // Restore new_interesting.
479 for (int def : unwind)
480 bitmap_clear_bit (new_interesting, def);
481 unwind.truncate (0);
482 // Restore and m_imports.
483 for (int def : imports_unwind)
484 bitmap_clear_bit (m_imports, def);
485 imports_unwind.truncate (0);
488 /* m_imports tracks all interesting names on the path, so when
489 backtracking we have to restore it. */
490 for (int j : new_imports)
491 bitmap_clear_bit (m_imports, j);
493 else if (dump_file && (dump_flags & TDF_DETAILS))
494 fprintf (dump_file, " FAIL: Search space limit %d reached.\n",
495 param_max_jump_thread_paths);
497 // Reset things to their original state.
498 m_path.pop ();
499 m_visited_bbs.remove (bb);
502 // Search backwards from BB looking for paths where the final
503 // conditional maybe threaded to a successor block. Record such paths
504 // for jump threading.
506 void
507 back_threader::maybe_thread_block (basic_block bb)
509 if (EDGE_COUNT (bb->succs) <= 1)
510 return;
512 gimple *stmt = *gsi_last_bb (bb);
513 if (!stmt)
514 return;
516 enum gimple_code code = gimple_code (stmt);
517 if (code != GIMPLE_SWITCH
518 && code != GIMPLE_COND)
519 return;
521 m_last_stmt = stmt;
522 m_visited_bbs.empty ();
523 m_path.truncate (0);
525 // We compute imports of the path during discovery starting
526 // just with names used in the conditional.
527 bitmap_clear (m_imports);
528 ssa_op_iter iter;
529 tree name;
530 FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
532 if (!gimple_range_ssa_p (name))
533 return;
534 bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
537 // Interesting is the set of imports we still not have see
538 // the definition of. So while imports only grow, the
539 // set of interesting defs dwindles and once empty we can
540 // stop searching.
541 auto_bitmap interesting;
542 bitmap_copy (interesting, m_imports);
543 back_threader_profitability profit (m_flags & BT_SPEED, stmt);
544 find_paths_to_names (bb, interesting, 1, profit);
547 DEBUG_FUNCTION void
548 debug (const vec <basic_block> &path)
550 dump_path (stderr, path);
551 fputc ('\n', stderr);
554 void
555 back_threader::dump (FILE *out)
557 fprintf (out, "\nCandidates for pre-computation:\n");
558 fprintf (out, "===================================\n");
560 bitmap_iterator bi;
561 unsigned i;
563 EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
565 tree name = ssa_name (i);
566 print_generic_expr (out, name, TDF_NONE);
567 fprintf (out, "\n");
571 void
572 back_threader::debug ()
574 dump (stderr);
577 /* Examine jump threading path PATH and return TRUE if it is possibly
578 profitable to thread it, otherwise return FALSE. If this function
579 returns TRUE profitable_path_p might not be satisfied but when
580 the path is extended it might be. In particular indicate in
581 *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
582 but would be OK if we extend the path to cover the loop backedge.
584 ?? It seems we should be able to loosen some of the restrictions in
585 this function after loop optimizations have run. */
587 bool
588 back_threader_profitability::possibly_profitable_path_p
589 (const vec<basic_block> &m_path,
590 bool *large_non_fsm)
592 gcc_checking_assert (!m_path.is_empty ());
594 /* We can an empty path here (excluding the DEF block) when the
595 statement that makes a conditional generate a compile-time
596 constant result is in the same block as the conditional.
598 That's not really a jump threading opportunity, but instead is
599 simple cprop & simplification. We could handle it here if we
600 wanted by wiring up all the incoming edges. If we run this
601 early in IPA, that might be worth doing. For now we just
602 reject that case. */
603 if (m_path.length () <= 1)
604 return false;
606 gimple_stmt_iterator gsi;
607 loop_p loop = m_path[0]->loop_father;
609 // We recompute the following, when we rewrite possibly_profitable_path_p
610 // to work incrementally on added BBs we have to unwind them on backtracking
611 m_n_insns = 0;
612 m_threaded_through_latch = false;
613 m_multiway_branch_in_path = false;
614 m_contains_hot_bb = false;
616 if (dump_file && (dump_flags & TDF_DETAILS))
617 fprintf (dump_file, "Checking profitability of path (backwards): ");
619 /* Count the number of instructions on the path: as these instructions
620 will have to be duplicated, we will not record the path if there
621 are too many instructions on the path. Also check that all the
622 blocks in the path belong to a single loop. */
623 for (unsigned j = 0; j < m_path.length (); j++)
625 basic_block bb = m_path[j];
627 if (dump_file && (dump_flags & TDF_DETAILS))
628 fprintf (dump_file, " bb:%i", bb->index);
629 /* Remember, blocks in the path are stored in opposite order in
630 the PATH array. The last entry in the array represents the
631 block with an outgoing edge that we will redirect to the jump
632 threading path. Thus we don't care how many statements are
633 in that block because it will not be copied or whether or not
634 it ends in a multiway branch. */
635 if (j < m_path.length () - 1)
637 int orig_n_insns = m_n_insns;
638 if (!m_contains_hot_bb && m_speed_p)
639 m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
640 for (gsi = gsi_after_labels (bb);
641 !gsi_end_p (gsi);
642 gsi_next_nondebug (&gsi))
644 /* Do not allow OpenACC loop markers and __builtin_constant_p on
645 threading paths. The latter is disallowed, because an
646 expression might be constant on two threading paths, and
647 become non-constant (i.e.: phi) when they merge. */
648 gimple *stmt = gsi_stmt (gsi);
649 if (gimple_call_internal_p (stmt, IFN_UNIQUE)
650 || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
652 if (dump_file && (dump_flags & TDF_DETAILS))
653 fputc ('\n', dump_file);
654 return false;
656 /* Do not count empty statements and labels. */
657 if (gimple_code (stmt) != GIMPLE_NOP
658 && !is_gimple_debug (stmt))
659 m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
661 if (dump_file && (dump_flags & TDF_DETAILS))
662 fprintf (dump_file, " (%i insns)", m_n_insns-orig_n_insns);
664 /* We do not look at the block with the threaded branch
665 in this loop. So if any block with a last statement that
666 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
667 multiway branch on our path.
669 The block in PATH[0] is special, it's the block were we're
670 going to be able to eliminate its branch. */
671 if (j > 0)
673 gimple *last = *gsi_last_bb (bb);
674 if (last
675 && (gimple_code (last) == GIMPLE_SWITCH
676 || gimple_code (last) == GIMPLE_GOTO))
677 m_multiway_branch_in_path = true;
681 /* Note if we thread through the latch, we will want to include
682 the last entry in the array when determining if we thread
683 through the loop latch. */
684 if (loop->latch == bb)
686 m_threaded_through_latch = true;
687 if (dump_file && (dump_flags & TDF_DETAILS))
688 fprintf (dump_file, " (latch)");
692 /* We are going to remove the control statement at the end of the
693 last block in the threading path. So don't count it against our
694 statement count. */
695 m_n_insns -= m_exit_jump_benefit;
697 if (dump_file && (dump_flags & TDF_DETAILS))
698 fprintf (dump_file, "\n Control statement insns: %i\n"
699 " Overall: %i insns\n",
700 m_exit_jump_benefit, m_n_insns);
702 /* Threading is profitable if the path duplicated is hot but also
703 in a case we separate cold path from hot path and permit optimization
704 of the hot path later. Be on the agressive side here. In some testcases,
705 as in PR 78407 this leads to noticeable improvements. */
706 if (m_speed_p)
708 if (m_n_insns >= param_max_fsm_thread_path_insns)
710 if (dump_file && (dump_flags & TDF_DETAILS))
711 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
712 "the number of instructions on the path "
713 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
714 return false;
716 edge entry = find_edge (m_path[m_path.length () - 1],
717 m_path[m_path.length () - 2]);
718 if (probably_never_executed_edge_p (cfun, entry))
720 if (dump_file && (dump_flags & TDF_DETAILS))
721 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
722 "path entry is probably never executed.\n");
723 return false;
726 else if (m_n_insns > 1)
728 if (dump_file && (dump_flags & TDF_DETAILS))
729 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
730 "duplication of %i insns is needed and optimizing for size.\n",
731 m_n_insns);
732 return false;
735 /* The generic copier used by the backthreader does not re-use an
736 existing threading path to reduce code duplication. So for that
737 case, drastically reduce the number of statements we are allowed
738 to copy. We don't know yet whether we will thread through the latch
739 so we have to be permissive and continue threading, but indicate
740 to the caller the thread, if final, wouldn't be profitable. */
741 if ((!m_threaded_multiway_branch
742 || !loop->latch
743 || loop->latch->index == EXIT_BLOCK)
744 && (m_n_insns * param_fsm_scale_path_stmts
745 >= param_max_jump_thread_duplication_stmts))
747 if (dump_file && (dump_flags & TDF_DETAILS))
748 fprintf (dump_file,
749 " FAIL: Did not thread around loop and would copy too "
750 "many statements.\n");
751 return false;
753 *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
754 && (m_n_insns * param_fsm_scale_path_stmts
755 >= param_max_jump_thread_duplication_stmts));
757 if (dump_file && (dump_flags & TDF_DETAILS))
758 fputc ('\n', dump_file);
759 return true;
762 /* Examine jump threading path PATH and return TRUE if it is profitable to
763 thread it, otherwise return FALSE.
765 The taken edge out of the path is TAKEN_EDGE.
767 CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
768 would create an irreducible loop.
770 ?? It seems we should be able to loosen some of the restrictions in
771 this function after loop optimizations have run. */
773 bool
774 back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
775 edge taken_edge,
776 bool *creates_irreducible_loop)
778 // We can assume that possibly_profitable_path_p holds here
780 loop_p loop = m_path[0]->loop_father;
782 if (dump_file && (dump_flags & TDF_DETAILS))
783 fprintf (dump_file, "Checking profitability of path (backwards): ");
785 /* If this path threaded through the loop latch back into the
786 same loop and the destination does not dominate the loop
787 latch, then this thread would create an irreducible loop. */
788 *creates_irreducible_loop = false;
789 if (m_threaded_through_latch
790 && loop == taken_edge->dest->loop_father
791 && (determine_bb_domination_status (loop, taken_edge->dest)
792 == DOMST_NONDOMINATING))
793 *creates_irreducible_loop = true;
795 /* Threading is profitable if the path duplicated is hot but also
796 in a case we separate cold path from hot path and permit optimization
797 of the hot path later. Be on the agressive side here. In some testcases,
798 as in PR 78407 this leads to noticeable improvements. */
799 if (m_speed_p
800 && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
802 if (probably_never_executed_edge_p (cfun, taken_edge))
804 if (dump_file && (dump_flags & TDF_DETAILS))
805 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
806 "path leads to probably never executed edge.\n");
807 return false;
810 else if (m_n_insns > 1)
812 if (dump_file && (dump_flags & TDF_DETAILS))
813 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
814 "duplication of %i insns is needed and optimizing for size.\n",
815 m_n_insns);
816 return false;
819 /* We avoid creating irreducible inner loops unless we thread through
820 a multiway branch, in which case we have deemed it worth losing
821 other loop optimizations later.
823 We also consider it worth creating an irreducible inner loop after
824 loop optimizations if the number of copied statement is low. */
825 if (!m_threaded_multiway_branch
826 && *creates_irreducible_loop
827 && (!(cfun->curr_properties & PROP_loop_opts_done)
828 || (m_n_insns * param_fsm_scale_path_stmts
829 >= param_max_jump_thread_duplication_stmts)))
831 if (dump_file && (dump_flags & TDF_DETAILS))
832 fprintf (dump_file,
833 " FAIL: Would create irreducible loop early without "
834 "threading multiway branch.\n");
835 /* We compute creates_irreducible_loop only late. */
836 return false;
839 /* The generic copier used by the backthreader does not re-use an
840 existing threading path to reduce code duplication. So for that
841 case, drastically reduce the number of statements we are allowed
842 to copy. */
843 if (!(m_threaded_through_latch && m_threaded_multiway_branch)
844 && (m_n_insns * param_fsm_scale_path_stmts
845 >= param_max_jump_thread_duplication_stmts))
847 if (dump_file && (dump_flags & TDF_DETAILS))
848 fprintf (dump_file,
849 " FAIL: Did not thread around loop and would copy too "
850 "many statements.\n");
851 return false;
854 /* When there is a multi-way branch on the path, then threading can
855 explode the CFG due to duplicating the edges for that multi-way
856 branch. So like above, only allow a multi-way branch on the path
857 if we actually thread a multi-way branch. */
858 if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
860 if (dump_file && (dump_flags & TDF_DETAILS))
861 fprintf (dump_file,
862 " FAIL: Thread through multiway branch without threading "
863 "a multiway branch.\n");
864 return false;
867 /* Threading through an empty latch would cause code to be added to
868 the latch. This could alter the loop form sufficiently to cause
869 loop optimizations to fail. Disable these threads until after
870 loop optimizations have run. */
871 if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
872 && !(cfun->curr_properties & PROP_loop_opts_done)
873 && empty_block_p (loop->latch))
875 if (dump_file && (dump_flags & TDF_DETAILS))
876 fprintf (dump_file,
877 " FAIL: Thread through latch before loop opts would create "
878 "non-empty latch\n");
879 return false;
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fputc ('\n', dump_file);
883 return true;
887 /* The current path PATH is a vector of blocks forming a jump threading
888 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
890 Convert the current path into the form used by register_jump_thread and
891 register it.
893 Return TRUE if successful or FALSE otherwise. */
895 bool
896 back_threader_registry::register_path (const vec<basic_block> &m_path,
897 edge taken_edge)
899 vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
901 // The generic copier ignores the edge type. We can build the
902 // thread edges with any type.
903 for (unsigned int j = 0; j + 1 < m_path.length (); j++)
905 basic_block bb1 = m_path[m_path.length () - j - 1];
906 basic_block bb2 = m_path[m_path.length () - j - 2];
908 edge e = find_edge (bb1, bb2);
909 gcc_assert (e);
910 push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
913 push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
914 return register_jump_thread (jump_thread_path);
917 // Thread all suitable paths in the current function.
919 // Return TODO_flags.
921 unsigned int
922 back_threader::thread_blocks ()
924 basic_block bb;
925 FOR_EACH_BB_FN (bb, m_fun)
926 if (EDGE_COUNT (bb->succs) > 1)
927 maybe_thread_block (bb);
929 bool changed = m_registry.thread_through_all_blocks (true);
931 if (m_flags & BT_SPEED)
932 return changed ? TODO_cleanup_cfg : 0;
934 return false;
937 namespace {
939 const pass_data pass_data_early_thread_jumps =
941 GIMPLE_PASS,
942 "ethread",
943 OPTGROUP_NONE,
944 TV_TREE_SSA_THREAD_JUMPS,
945 ( PROP_cfg | PROP_ssa ),
949 ( TODO_cleanup_cfg | TODO_update_ssa ),
952 const pass_data pass_data_thread_jumps =
954 GIMPLE_PASS,
955 "thread",
956 OPTGROUP_NONE,
957 TV_TREE_SSA_THREAD_JUMPS,
958 ( PROP_cfg | PROP_ssa ),
962 TODO_update_ssa,
965 const pass_data pass_data_thread_jumps_full =
967 GIMPLE_PASS,
968 "threadfull",
969 OPTGROUP_NONE,
970 TV_TREE_SSA_THREAD_JUMPS,
971 ( PROP_cfg | PROP_ssa ),
975 TODO_update_ssa,
978 // Early jump threading pass optimizing for size.
979 class pass_early_thread_jumps : public gimple_opt_pass
981 public:
982 pass_early_thread_jumps (gcc::context *ctxt)
983 : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
986 opt_pass * clone () override
988 return new pass_early_thread_jumps (m_ctxt);
990 void set_pass_param (unsigned int, bool param) override
992 m_first = param;
994 bool gate (function *) override
996 return flag_thread_jumps;
998 unsigned int execute (function *fun) override
1000 back_threader threader (fun, BT_NONE, m_first);
1001 return threader.thread_blocks ();
1003 private:
1004 bool m_first;
1007 // Jump threading pass without resolving of unknown SSAs.
1008 class pass_thread_jumps : public gimple_opt_pass
1010 public:
1011 pass_thread_jumps (gcc::context *ctxt)
1012 : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1014 opt_pass * clone (void) override
1016 return new pass_thread_jumps (m_ctxt);
1018 void set_pass_param (unsigned int, bool param) override
1020 m_first = param;
1022 bool gate (function *) override
1024 return flag_thread_jumps && flag_expensive_optimizations;
1026 unsigned int execute (function *fun) override
1028 back_threader threader (fun, BT_SPEED, m_first);
1029 return threader.thread_blocks ();
1031 private:
1032 bool m_first;
1035 // Jump threading pass that fully resolves unknown SSAs.
1036 class pass_thread_jumps_full : public gimple_opt_pass
1038 public:
1039 pass_thread_jumps_full (gcc::context *ctxt)
1040 : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1042 opt_pass * clone (void) override
1044 return new pass_thread_jumps_full (m_ctxt);
1046 void set_pass_param (unsigned int, bool param) override
1048 m_first = param;
1050 bool gate (function *) override
1052 return flag_thread_jumps && flag_expensive_optimizations;
1054 unsigned int execute (function *fun) override
1056 back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1057 return threader.thread_blocks ();
1059 private:
1060 bool m_first;
1063 } // namespace {
1065 gimple_opt_pass *
1066 make_pass_thread_jumps (gcc::context *ctxt)
1068 return new pass_thread_jumps (ctxt);
1071 gimple_opt_pass *
1072 make_pass_thread_jumps_full (gcc::context *ctxt)
1074 return new pass_thread_jumps_full (ctxt);
1077 gimple_opt_pass *
1078 make_pass_early_thread_jumps (gcc::context *ctxt)
1080 return new pass_early_thread_jumps (ctxt);