Fix gnat.dg/opt39.adb on hppa.
[official-gcc.git] / gcc / tree-ssa-threadbackward.cc
blobfcbb95b08beb246b16b8c8dbc1b954149a0d5e2f
1 /* SSA Jump Threading
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "predict.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "fold-const.h"
28 #include "cfgloop.h"
29 #include "gimple-iterator.h"
30 #include "tree-cfg.h"
31 #include "tree-ssa-threadupdate.h"
32 #include "tree-ssa-loop.h"
33 #include "cfganal.h"
34 #include "tree-pass.h"
35 #include "gimple-ssa.h"
36 #include "tree-phinodes.h"
37 #include "tree-inline.h"
38 #include "tree-vectorizer.h"
39 #include "value-range.h"
40 #include "gimple-range.h"
41 #include "tree-ssa-threadedge.h"
42 #include "gimple-range-path.h"
43 #include "ssa.h"
44 #include "tree-cfgcleanup.h"
45 #include "tree-pretty-print.h"
46 #include "cfghooks.h"
47 #include "dbgcnt.h"
49 // Path registry for the backwards threader. After all paths have been
50 // registered with register_path(), thread_through_all_blocks() is called
51 // to modify the CFG.
53 class back_threader_registry : public back_jt_path_registry
55 public:
56 bool register_path (const vec<basic_block> &, edge taken);
59 // Class to abstract the profitability code for the backwards threader.
61 class back_threader_profitability
63 public:
64 back_threader_profitability (bool speed_p, gimple *stmt);
65 bool possibly_profitable_path_p (const vec<basic_block> &, tree, bool *);
66 bool profitable_path_p (const vec<basic_block> &,
67 edge taken, bool *irreducible_loop);
68 private:
69 const bool m_speed_p;
70 int m_exit_jump_benefit;
71 bool m_threaded_multiway_branch;
72 // The following are computed by possibly_profitable_path_p
73 bool m_threaded_through_latch;
74 bool m_multiway_branch_in_path;
75 bool m_contains_hot_bb;
76 int m_n_insns;
79 back_threader_profitability::back_threader_profitability (bool speed_p,
80 gimple *last)
81 : m_speed_p (speed_p)
83 m_threaded_multiway_branch = (gimple_code (last) == GIMPLE_SWITCH
84 || gimple_code (last) == GIMPLE_GOTO);
85 // The forward threader has estimate_threading_killed_stmts, in
86 // particular it estimates further DCE from eliminating the exit
87 // control stmt.
88 m_exit_jump_benefit = estimate_num_insns (last, &eni_size_weights);
91 // Back threader flags.
92 #define BT_NONE 0
93 // Generate fast code at the expense of code size.
94 #define BT_SPEED 1
95 // Resolve unknown SSAs on entry to a threading path. If set, use the
96 // ranger. If not, assume all ranges on entry to a path are VARYING.
97 #define BT_RESOLVE 2
99 class back_threader
101 public:
102 back_threader (function *fun, unsigned flags, bool first);
103 ~back_threader ();
104 unsigned thread_blocks ();
105 private:
106 void maybe_thread_block (basic_block bb);
107 bool debug_counter ();
108 edge maybe_register_path (back_threader_profitability &);
109 void maybe_register_path_dump (edge taken_edge);
110 void find_paths_to_names (basic_block bb, bitmap imports, unsigned,
111 back_threader_profitability &);
112 edge find_taken_edge (const vec<basic_block> &path);
113 edge find_taken_edge_cond (const vec<basic_block> &path, gcond *);
114 edge find_taken_edge_switch (const vec<basic_block> &path, gswitch *);
115 virtual void debug ();
116 virtual void dump (FILE *out);
118 back_threader_registry m_registry;
120 // Current path being analyzed.
121 auto_vec<basic_block> m_path;
122 // Hash to mark visited BBs while analyzing a path.
123 hash_set<basic_block> m_visited_bbs;
124 // The set of SSA names, any of which could potentially change the
125 // value of the final conditional in a path.
126 auto_bitmap m_imports;
127 // The last statement in the path.
128 gimple *m_last_stmt;
129 // This is a bit of a wart. It's used to pass the LHS SSA name to
130 // the profitability engine.
131 tree m_name;
132 // Marker to differentiate unreachable edges.
133 static const edge UNREACHABLE_EDGE;
134 // Set to TRUE if unknown SSA names along a path should be resolved
135 // with the ranger. Otherwise, unknown SSA names are assumed to be
136 // VARYING. Setting to true is more precise but slower.
137 function *m_fun;
138 // Ranger for the path solver.
139 gimple_ranger *m_ranger;
140 unsigned m_flags;
141 // Set to TRUE for the first of each thread[12] pass or the first of
142 // each threadfull[12] pass. This is used to differentiate between
143 // the different threading passes so we can set up debug counters.
144 bool m_first;
147 // Used to differentiate unreachable edges, so we may stop the search
148 // in a the given direction.
149 const edge back_threader::UNREACHABLE_EDGE = (edge) -1;
151 back_threader::back_threader (function *fun, unsigned flags, bool first)
152 : m_first (first)
154 if (flags & BT_SPEED)
155 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
156 else
157 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
159 m_fun = fun;
160 m_flags = flags;
161 m_last_stmt = NULL;
163 // The path solver needs EDGE_DFS_BACK in resolving mode.
164 if (flags & BT_RESOLVE)
165 mark_dfs_back_edges ();
167 m_ranger = new gimple_ranger;
170 back_threader::~back_threader ()
172 delete m_ranger;
173 loop_optimizer_finalize ();
176 // A wrapper for the various debug counters for the threading passes.
177 // Returns TRUE if it's OK to register the current threading
178 // candidate.
180 bool
181 back_threader::debug_counter ()
183 // The ethread pass is mostly harmless ;-).
184 if ((m_flags & BT_SPEED) == 0)
185 return true;
187 if (m_flags & BT_RESOLVE)
189 if (m_first && !dbg_cnt (back_threadfull1))
190 return false;
192 if (!m_first && !dbg_cnt (back_threadfull2))
193 return false;
195 else
197 if (m_first && !dbg_cnt (back_thread1))
198 return false;
200 if (!m_first && !dbg_cnt (back_thread2))
201 return false;
203 return true;
206 static void
207 dump_path (FILE *dump_file, const vec<basic_block> &path)
209 for (unsigned i = path.length (); i > 0; --i)
211 basic_block bb = path[i - 1];
212 fprintf (dump_file, "%d", bb->index);
213 if (i > 1)
214 fprintf (dump_file, "->");
218 // Dump details of an attempt to register a path.
220 void
221 back_threader::maybe_register_path_dump (edge taken)
223 if (m_path.is_empty ())
224 return;
226 fprintf (dump_file, "path: ");
227 dump_path (dump_file, m_path);
228 fprintf (dump_file, "->");
230 if (taken == UNREACHABLE_EDGE)
231 fprintf (dump_file, "xx REJECTED (unreachable)\n");
232 else if (taken)
233 fprintf (dump_file, "%d SUCCESS\n", taken->dest->index);
234 else
235 fprintf (dump_file, "xx REJECTED\n");
238 // If an outgoing edge can be determined out of the current path,
239 // register it for jump threading and return the taken edge.
241 // Return NULL if it is unprofitable to thread this path, or the
242 // outgoing edge is unknown. Return UNREACHABLE_EDGE if the path is
243 // unreachable.
245 edge
246 back_threader::maybe_register_path (back_threader_profitability &profit)
248 edge taken_edge = find_taken_edge (m_path);
250 if (taken_edge && taken_edge != UNREACHABLE_EDGE)
252 bool irreducible = false;
253 if (profit.profitable_path_p (m_path, taken_edge, &irreducible)
254 && debug_counter ()
255 && m_registry.register_path (m_path, taken_edge))
257 if (irreducible)
258 vect_free_loop_info_assumptions (m_path[0]->loop_father);
260 else
261 taken_edge = NULL;
264 if (dump_file && (dump_flags & TDF_DETAILS))
265 maybe_register_path_dump (taken_edge);
267 return taken_edge;
270 // Return the known taken edge out of a path. If the path can be
271 // determined to be unreachable, return UNREACHABLE_EDGE. If no
272 // outgoing edge can be calculated, return NULL.
274 edge
275 back_threader::find_taken_edge (const vec<basic_block> &path)
277 gcc_checking_assert (path.length () > 1);
278 switch (gimple_code (m_last_stmt))
280 case GIMPLE_COND:
281 return find_taken_edge_cond (path, as_a<gcond *> (m_last_stmt));
283 case GIMPLE_SWITCH:
284 return find_taken_edge_switch (path, as_a<gswitch *> (m_last_stmt));
286 default:
287 return NULL;
291 // Same as find_taken_edge, but for paths ending in a switch.
293 edge
294 back_threader::find_taken_edge_switch (const vec<basic_block> &path,
295 gswitch *sw)
297 tree name = gimple_switch_index (sw);
298 int_range_max r;
300 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
301 solver.range_of_expr (r, name, sw);
303 if (r.undefined_p ())
304 return UNREACHABLE_EDGE;
306 if (r.varying_p ())
307 return NULL;
309 tree label = find_case_label_range (sw, &r);
310 if (!label)
311 return NULL;
313 return find_edge (gimple_bb (sw), label_to_block (cfun, CASE_LABEL (label)));
316 // Same as find_taken_edge, but for paths ending in a GIMPLE_COND.
318 edge
319 back_threader::find_taken_edge_cond (const vec<basic_block> &path,
320 gcond *cond)
322 int_range_max r;
324 path_range_query solver (*m_ranger, path, m_imports, m_flags & BT_RESOLVE);
325 solver.range_of_stmt (r, cond);
327 if (solver.unreachable_path_p ())
328 return UNREACHABLE_EDGE;
330 int_range<2> true_range (boolean_true_node, boolean_true_node);
331 int_range<2> false_range (boolean_false_node, boolean_false_node);
333 if (r == true_range || r == false_range)
335 edge e_true, e_false;
336 basic_block bb = gimple_bb (cond);
337 extract_true_false_edges_from_block (bb, &e_true, &e_false);
338 return r == true_range ? e_true : e_false;
340 return NULL;
343 // Find jump threading paths to any of the SSA names in the
344 // INTERESTING bitmap, and register any such paths.
346 // BB is the current path being processed.
348 // OVERALL_PATHS is the search space up to this block
350 void
351 back_threader::find_paths_to_names (basic_block bb, bitmap interesting,
352 unsigned overall_paths,
353 back_threader_profitability &profit)
355 if (m_visited_bbs.add (bb))
356 return;
358 m_path.safe_push (bb);
360 // Try to resolve the path without looking back. Avoid resolving paths
361 // we know are large but are not (yet) recognized as Finite State Machine.
362 // ??? Ideally we'd explore the cheapest path to the loop backedge here,
363 // avoiding the exponential greedy search and only start that from there.
364 // Precomputing a path-size-to-immediate-dominator-of-successor for each
365 // edge might help here. Alternatively copying divergent control flow
366 // on the way to the backedge could be worthwhile.
367 bool large_non_fsm;
368 if (m_path.length () > 1
369 && (!profit.possibly_profitable_path_p (m_path, m_name, &large_non_fsm)
370 || (!large_non_fsm
371 && maybe_register_path (profit))))
374 // The backwards thread copier cannot copy blocks that do not belong
375 // to the same loop, so when the new source of the path entry no
376 // longer belongs to it we don't need to search further.
377 else if (m_path[0]->loop_father != bb->loop_father)
380 // Continue looking for ways to extend the path but limit the
381 // search space along a branch
382 else if ((overall_paths = overall_paths * EDGE_COUNT (bb->preds))
383 <= (unsigned)param_max_jump_thread_paths)
385 // For further greedy searching we want to remove interesting
386 // names defined in BB but add ones on the PHI edges for the
387 // respective edges and adding imports from those stmts.
388 // We do this by starting with all names
389 // not defined in BB as interesting, collecting a list of
390 // interesting PHIs in BB on the fly. Then we iterate over
391 // predecessor edges, adding interesting PHI edge defs to
392 // the set of interesting names to consider when processing it.
393 auto_bitmap new_interesting;
394 auto_vec<int, 16> new_imports;
395 auto_vec<gphi *, 4> interesting_phis;
396 bitmap_iterator bi;
397 unsigned i;
398 auto_vec<tree, 16> worklist;
399 EXECUTE_IF_SET_IN_BITMAP (interesting, 0, i, bi)
401 tree name = ssa_name (i);
402 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
403 /* Imports remain interesting. */
404 if (gimple_bb (def_stmt) != bb)
406 bitmap_set_bit (new_interesting, i);
407 continue;
409 worklist.quick_push (name);
410 while (!worklist.is_empty ())
412 tree name = worklist.pop ();
413 gimple *def_stmt = SSA_NAME_DEF_STMT (name);
414 /* Newly discovered imports are interesting. */
415 if (gimple_bb (def_stmt) != bb)
417 bitmap_set_bit (new_interesting, SSA_NAME_VERSION (name));
418 continue;
420 /* Local PHIs participate in renaming below. */
421 if (gphi *phi = dyn_cast<gphi *> (def_stmt))
423 tree res = gimple_phi_result (phi);
424 if (!SSA_NAME_OCCURS_IN_ABNORMAL_PHI (res))
425 interesting_phis.safe_push (phi);
427 /* For other local defs process their uses, amending
428 imports on the way. */
429 else
431 tree ssa[3];
432 unsigned lim = gimple_range_ssa_names (ssa, 3, def_stmt);
433 for (unsigned j = 0; j < lim; ++j)
435 tree rhs = ssa[j];
436 if (rhs
437 && bitmap_set_bit (m_imports,
438 SSA_NAME_VERSION (rhs)))
440 new_imports.safe_push (SSA_NAME_VERSION (rhs));
441 worklist.safe_push (rhs);
447 if (!bitmap_empty_p (new_interesting)
448 || !interesting_phis.is_empty ())
450 auto_vec<int, 4> unwind (interesting_phis.length ());
451 auto_vec<int, 4> imports_unwind (interesting_phis.length ());
452 edge_iterator iter;
453 edge e;
454 FOR_EACH_EDGE (e, iter, bb->preds)
456 if (e->flags & EDGE_ABNORMAL
457 // This is like path_crosses_loops in profitable_path_p but
458 // more restrictive to avoid peeling off loop iterations (see
459 // tree-ssa/pr14341.c for an example).
460 // ??? Note this restriction only applied when visiting an
461 // interesting PHI with the former resolve_phi.
462 || (!interesting_phis.is_empty ()
463 && m_path[0]->loop_father != e->src->loop_father))
464 continue;
465 for (gphi *phi : interesting_phis)
467 tree def = PHI_ARG_DEF_FROM_EDGE (phi, e);
468 if (TREE_CODE (def) == SSA_NAME)
470 int ver = SSA_NAME_VERSION (def);
471 if (bitmap_set_bit (new_interesting, ver))
473 if (bitmap_set_bit (m_imports, ver))
474 imports_unwind.quick_push (ver);
475 unwind.quick_push (ver);
479 find_paths_to_names (e->src, new_interesting, overall_paths,
480 profit);
481 // Restore new_interesting.
482 for (int def : unwind)
483 bitmap_clear_bit (new_interesting, def);
484 unwind.truncate (0);
485 // Restore and m_imports.
486 for (int def : imports_unwind)
487 bitmap_clear_bit (m_imports, def);
488 imports_unwind.truncate (0);
491 /* m_imports tracks all interesting names on the path, so when
492 backtracking we have to restore it. */
493 for (int j : new_imports)
494 bitmap_clear_bit (m_imports, j);
496 else if (dump_file && (dump_flags & TDF_DETAILS))
497 fprintf (dump_file, " FAIL: Search space limit %d reached.\n",
498 param_max_jump_thread_paths);
500 // Reset things to their original state.
501 m_path.pop ();
502 m_visited_bbs.remove (bb);
505 // Search backwards from BB looking for paths where the final
506 // conditional maybe threaded to a successor block. Record such paths
507 // for jump threading.
509 void
510 back_threader::maybe_thread_block (basic_block bb)
512 if (EDGE_COUNT (bb->succs) <= 1)
513 return;
515 gimple *stmt = last_stmt (bb);
516 if (!stmt)
517 return;
519 enum gimple_code code = gimple_code (stmt);
520 tree name;
521 if (code == GIMPLE_SWITCH)
522 name = gimple_switch_index (as_a <gswitch *> (stmt));
523 else if (code == GIMPLE_COND)
524 name = gimple_cond_lhs (stmt);
525 else
526 return;
528 m_last_stmt = stmt;
529 m_visited_bbs.empty ();
530 m_path.truncate (0);
531 m_name = name;
533 // We compute imports of the path during discovery starting
534 // just with names used in the conditional.
535 bitmap_clear (m_imports);
536 ssa_op_iter iter;
537 FOR_EACH_SSA_TREE_OPERAND (name, stmt, iter, SSA_OP_USE)
539 if (!gimple_range_ssa_p (name))
540 return;
541 bitmap_set_bit (m_imports, SSA_NAME_VERSION (name));
544 // Interesting is the set of imports we still not have see
545 // the definition of. So while imports only grow, the
546 // set of interesting defs dwindles and once empty we can
547 // stop searching.
548 auto_bitmap interesting;
549 bitmap_copy (interesting, m_imports);
550 back_threader_profitability profit (m_flags & BT_SPEED, stmt);
551 find_paths_to_names (bb, interesting, 1, profit);
554 DEBUG_FUNCTION void
555 debug (const vec <basic_block> &path)
557 dump_path (stderr, path);
558 fputc ('\n', stderr);
561 void
562 back_threader::dump (FILE *out)
564 fprintf (out, "\nCandidates for pre-computation:\n");
565 fprintf (out, "===================================\n");
567 bitmap_iterator bi;
568 unsigned i;
570 EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
572 tree name = ssa_name (i);
573 print_generic_expr (out, name, TDF_NONE);
574 fprintf (out, "\n");
578 void
579 back_threader::debug ()
581 dump (stderr);
584 /* Examine jump threading path PATH and return TRUE if it is possibly
585 profitable to thread it, otherwise return FALSE. If this function
586 returns TRUE profitable_path_p might not be satisfied but when
587 the path is extended it might be. In particular indicate in
588 *LARGE_NON_FSM whether the thread is too large for a non-FSM thread
589 but would be OK if we extend the path to cover the loop backedge.
591 NAME is the SSA_NAME of the variable we found to have a constant
592 value on PATH. If unknown, SSA_NAME is NULL.
594 ?? It seems we should be able to loosen some of the restrictions in
595 this function after loop optimizations have run. */
597 bool
598 back_threader_profitability::possibly_profitable_path_p
599 (const vec<basic_block> &m_path, tree name,
600 bool *large_non_fsm)
602 gcc_checking_assert (!m_path.is_empty ());
604 /* We can an empty path here (excluding the DEF block) when the
605 statement that makes a conditional generate a compile-time
606 constant result is in the same block as the conditional.
608 That's not really a jump threading opportunity, but instead is
609 simple cprop & simplification. We could handle it here if we
610 wanted by wiring up all the incoming edges. If we run this
611 early in IPA, that might be worth doing. For now we just
612 reject that case. */
613 if (m_path.length () <= 1)
614 return false;
616 gimple_stmt_iterator gsi;
617 loop_p loop = m_path[0]->loop_father;
619 // We recompute the following, when we rewrite possibly_profitable_path_p
620 // to work incrementally on added BBs we have to unwind them on backtracking
621 m_n_insns = 0;
622 m_threaded_through_latch = false;
623 m_multiway_branch_in_path = false;
624 m_contains_hot_bb = false;
626 if (dump_file && (dump_flags & TDF_DETAILS))
627 fprintf (dump_file, "Checking profitability of path (backwards): ");
629 /* Count the number of instructions on the path: as these instructions
630 will have to be duplicated, we will not record the path if there
631 are too many instructions on the path. Also check that all the
632 blocks in the path belong to a single loop. */
633 for (unsigned j = 0; j < m_path.length (); j++)
635 basic_block bb = m_path[j];
637 if (dump_file && (dump_flags & TDF_DETAILS))
638 fprintf (dump_file, " bb:%i", bb->index);
639 /* Remember, blocks in the path are stored in opposite order in
640 the PATH array. The last entry in the array represents the
641 block with an outgoing edge that we will redirect to the jump
642 threading path. Thus we don't care how many statements are
643 in that block because it will not be copied or whether or not
644 it ends in a multiway branch. */
645 if (j < m_path.length () - 1)
647 int orig_n_insns = m_n_insns;
648 /* PHIs in the path will create degenerate PHIS in the
649 copied path which will then get propagated away, so
650 looking at just the duplicate path the PHIs would
651 seem unimportant.
653 But those PHIs, because they're assignments to objects
654 typically with lives that exist outside the thread path,
655 will tend to generate PHIs (or at least new PHI arguments)
656 at points where we leave the thread path and rejoin
657 the original blocks. So we do want to account for them.
659 We ignore virtual PHIs. We also ignore cases where BB
660 has a single incoming edge. That's the most common
661 degenerate PHI we'll see here. Finally we ignore PHIs
662 that are associated with the value we're tracking as
663 that object likely dies. */
664 if (EDGE_COUNT (bb->succs) > 1 && EDGE_COUNT (bb->preds) > 1)
666 for (gphi_iterator gsip = gsi_start_phis (bb);
667 !gsi_end_p (gsip);
668 gsi_next (&gsip))
670 gphi *phi = gsip.phi ();
671 tree dst = gimple_phi_result (phi);
673 /* Note that if both NAME and DST are anonymous
674 SSA_NAMEs, then we do not have enough information
675 to consider them associated. */
676 if (dst != name
677 && name
678 && TREE_CODE (name) == SSA_NAME
679 && (SSA_NAME_VAR (dst) != SSA_NAME_VAR (name)
680 || !SSA_NAME_VAR (dst))
681 && !virtual_operand_p (dst))
682 ++m_n_insns;
686 if (!m_contains_hot_bb && m_speed_p)
687 m_contains_hot_bb |= optimize_bb_for_speed_p (bb);
688 for (gsi = gsi_after_labels (bb);
689 !gsi_end_p (gsi);
690 gsi_next_nondebug (&gsi))
692 /* Do not allow OpenACC loop markers and __builtin_constant_p on
693 threading paths. The latter is disallowed, because an
694 expression might be constant on two threading paths, and
695 become non-constant (i.e.: phi) when they merge. */
696 gimple *stmt = gsi_stmt (gsi);
697 if (gimple_call_internal_p (stmt, IFN_UNIQUE)
698 || gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
700 if (dump_file && (dump_flags & TDF_DETAILS))
701 fputc ('\n', dump_file);
702 return false;
704 /* Do not count empty statements and labels. */
705 if (gimple_code (stmt) != GIMPLE_NOP
706 && !is_gimple_debug (stmt))
707 m_n_insns += estimate_num_insns (stmt, &eni_size_weights);
709 if (dump_file && (dump_flags & TDF_DETAILS))
710 fprintf (dump_file, " (%i insns)", m_n_insns-orig_n_insns);
712 /* We do not look at the block with the threaded branch
713 in this loop. So if any block with a last statement that
714 is a GIMPLE_SWITCH or GIMPLE_GOTO is seen, then we have a
715 multiway branch on our path.
717 The block in PATH[0] is special, it's the block were we're
718 going to be able to eliminate its branch. */
719 if (j > 0)
721 gimple *last = last_stmt (bb);
722 if (last
723 && (gimple_code (last) == GIMPLE_SWITCH
724 || gimple_code (last) == GIMPLE_GOTO))
725 m_multiway_branch_in_path = true;
729 /* Note if we thread through the latch, we will want to include
730 the last entry in the array when determining if we thread
731 through the loop latch. */
732 if (loop->latch == bb)
734 m_threaded_through_latch = true;
735 if (dump_file && (dump_flags & TDF_DETAILS))
736 fprintf (dump_file, " (latch)");
740 /* We are going to remove the control statement at the end of the
741 last block in the threading path. So don't count it against our
742 statement count. */
743 m_n_insns -= m_exit_jump_benefit;
745 if (dump_file && (dump_flags & TDF_DETAILS))
746 fprintf (dump_file, "\n Control statement insns: %i\n"
747 " Overall: %i insns\n",
748 m_exit_jump_benefit, m_n_insns);
750 /* Threading is profitable if the path duplicated is hot but also
751 in a case we separate cold path from hot path and permit optimization
752 of the hot path later. Be on the agressive side here. In some testcases,
753 as in PR 78407 this leads to noticeable improvements. */
754 if (m_speed_p)
756 if (m_n_insns >= param_max_fsm_thread_path_insns)
758 if (dump_file && (dump_flags & TDF_DETAILS))
759 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
760 "the number of instructions on the path "
761 "exceeds PARAM_MAX_FSM_THREAD_PATH_INSNS.\n");
762 return false;
764 edge entry = find_edge (m_path[m_path.length () - 1],
765 m_path[m_path.length () - 2]);
766 if (probably_never_executed_edge_p (cfun, entry))
768 if (dump_file && (dump_flags & TDF_DETAILS))
769 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
770 "path entry is probably never executed.\n");
771 return false;
774 else if (m_n_insns > 1)
776 if (dump_file && (dump_flags & TDF_DETAILS))
777 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
778 "duplication of %i insns is needed and optimizing for size.\n",
779 m_n_insns);
780 return false;
783 /* The generic copier used by the backthreader does not re-use an
784 existing threading path to reduce code duplication. So for that
785 case, drastically reduce the number of statements we are allowed
786 to copy. We don't know yet whether we will thread through the latch
787 so we have to be permissive and continue threading, but indicate
788 to the caller the thread, if final, wouldn't be profitable. */
789 if ((!m_threaded_multiway_branch
790 || !loop->latch
791 || loop->latch->index == EXIT_BLOCK)
792 && (m_n_insns * param_fsm_scale_path_stmts
793 >= param_max_jump_thread_duplication_stmts))
795 if (dump_file && (dump_flags & TDF_DETAILS))
796 fprintf (dump_file,
797 " FAIL: Did not thread around loop and would copy too "
798 "many statements.\n");
799 return false;
801 *large_non_fsm = (!(m_threaded_through_latch && m_threaded_multiway_branch)
802 && (m_n_insns * param_fsm_scale_path_stmts
803 >= param_max_jump_thread_duplication_stmts));
805 if (dump_file && (dump_flags & TDF_DETAILS))
806 fputc ('\n', dump_file);
807 return true;
810 /* Examine jump threading path PATH and return TRUE if it is profitable to
811 thread it, otherwise return FALSE.
813 The taken edge out of the path is TAKEN_EDGE.
815 CREATES_IRREDUCIBLE_LOOP is set to TRUE if threading this path
816 would create an irreducible loop.
818 ?? It seems we should be able to loosen some of the restrictions in
819 this function after loop optimizations have run. */
821 bool
822 back_threader_profitability::profitable_path_p (const vec<basic_block> &m_path,
823 edge taken_edge,
824 bool *creates_irreducible_loop)
826 // We can assume that possibly_profitable_path_p holds here
828 loop_p loop = m_path[0]->loop_father;
830 if (dump_file && (dump_flags & TDF_DETAILS))
831 fprintf (dump_file, "Checking profitability of path (backwards): ");
833 /* If this path threaded through the loop latch back into the
834 same loop and the destination does not dominate the loop
835 latch, then this thread would create an irreducible loop. */
836 *creates_irreducible_loop = false;
837 if (m_threaded_through_latch
838 && loop == taken_edge->dest->loop_father
839 && (determine_bb_domination_status (loop, taken_edge->dest)
840 == DOMST_NONDOMINATING))
841 *creates_irreducible_loop = true;
843 /* Threading is profitable if the path duplicated is hot but also
844 in a case we separate cold path from hot path and permit optimization
845 of the hot path later. Be on the agressive side here. In some testcases,
846 as in PR 78407 this leads to noticeable improvements. */
847 if (m_speed_p
848 && (optimize_edge_for_speed_p (taken_edge) || m_contains_hot_bb))
850 if (probably_never_executed_edge_p (cfun, taken_edge))
852 if (dump_file && (dump_flags & TDF_DETAILS))
853 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
854 "path leads to probably never executed edge.\n");
855 return false;
858 else if (m_n_insns > 1)
860 if (dump_file && (dump_flags & TDF_DETAILS))
861 fprintf (dump_file, " FAIL: Jump-thread path not considered: "
862 "duplication of %i insns is needed and optimizing for size.\n",
863 m_n_insns);
864 return false;
867 /* We avoid creating irreducible inner loops unless we thread through
868 a multiway branch, in which case we have deemed it worth losing
869 other loop optimizations later.
871 We also consider it worth creating an irreducible inner loop after
872 loop optimizations if the number of copied statement is low. */
873 if (!m_threaded_multiway_branch
874 && *creates_irreducible_loop
875 && (!(cfun->curr_properties & PROP_loop_opts_done)
876 || (m_n_insns * param_fsm_scale_path_stmts
877 >= param_max_jump_thread_duplication_stmts)))
879 if (dump_file && (dump_flags & TDF_DETAILS))
880 fprintf (dump_file,
881 " FAIL: Would create irreducible loop early without "
882 "threading multiway branch.\n");
883 /* We compute creates_irreducible_loop only late. */
884 return false;
887 /* The generic copier used by the backthreader does not re-use an
888 existing threading path to reduce code duplication. So for that
889 case, drastically reduce the number of statements we are allowed
890 to copy. */
891 if (!(m_threaded_through_latch && m_threaded_multiway_branch)
892 && (m_n_insns * param_fsm_scale_path_stmts
893 >= param_max_jump_thread_duplication_stmts))
895 if (dump_file && (dump_flags & TDF_DETAILS))
896 fprintf (dump_file,
897 " FAIL: Did not thread around loop and would copy too "
898 "many statements.\n");
899 return false;
902 /* When there is a multi-way branch on the path, then threading can
903 explode the CFG due to duplicating the edges for that multi-way
904 branch. So like above, only allow a multi-way branch on the path
905 if we actually thread a multi-way branch. */
906 if (!m_threaded_multiway_branch && m_multiway_branch_in_path)
908 if (dump_file && (dump_flags & TDF_DETAILS))
909 fprintf (dump_file,
910 " FAIL: Thread through multiway branch without threading "
911 "a multiway branch.\n");
912 return false;
915 /* Threading through an empty latch would cause code to be added to
916 the latch. This could alter the loop form sufficiently to cause
917 loop optimizations to fail. Disable these threads until after
918 loop optimizations have run. */
919 if ((m_threaded_through_latch || taken_edge->dest == loop->latch)
920 && !(cfun->curr_properties & PROP_loop_opts_done)
921 && empty_block_p (loop->latch))
923 if (dump_file && (dump_flags & TDF_DETAILS))
924 fprintf (dump_file,
925 " FAIL: Thread through latch before loop opts would create "
926 "non-empty latch\n");
927 return false;
929 if (dump_file && (dump_flags & TDF_DETAILS))
930 fputc ('\n', dump_file);
931 return true;
935 /* The current path PATH is a vector of blocks forming a jump threading
936 path in reverse order. TAKEN_EDGE is the edge taken from path[0].
938 Convert the current path into the form used by register_jump_thread and
939 register it.
941 Return TRUE if successful or FALSE otherwise. */
943 bool
944 back_threader_registry::register_path (const vec<basic_block> &m_path,
945 edge taken_edge)
947 vec<jump_thread_edge *> *jump_thread_path = allocate_thread_path ();
949 // The generic copier ignores the edge type. We can build the
950 // thread edges with any type.
951 for (unsigned int j = 0; j + 1 < m_path.length (); j++)
953 basic_block bb1 = m_path[m_path.length () - j - 1];
954 basic_block bb2 = m_path[m_path.length () - j - 2];
956 edge e = find_edge (bb1, bb2);
957 gcc_assert (e);
958 push_edge (jump_thread_path, e, EDGE_COPY_SRC_BLOCK);
961 push_edge (jump_thread_path, taken_edge, EDGE_NO_COPY_SRC_BLOCK);
962 return register_jump_thread (jump_thread_path);
965 // Thread all suitable paths in the current function.
967 // Return TODO_flags.
969 unsigned int
970 back_threader::thread_blocks ()
972 basic_block bb;
973 FOR_EACH_BB_FN (bb, m_fun)
974 if (EDGE_COUNT (bb->succs) > 1)
975 maybe_thread_block (bb);
977 bool changed = m_registry.thread_through_all_blocks (true);
979 if (m_flags & BT_SPEED)
980 return changed ? TODO_cleanup_cfg : 0;
982 return false;
985 namespace {
987 const pass_data pass_data_early_thread_jumps =
989 GIMPLE_PASS,
990 "ethread",
991 OPTGROUP_NONE,
992 TV_TREE_SSA_THREAD_JUMPS,
993 ( PROP_cfg | PROP_ssa ),
997 ( TODO_cleanup_cfg | TODO_update_ssa ),
1000 const pass_data pass_data_thread_jumps =
1002 GIMPLE_PASS,
1003 "thread",
1004 OPTGROUP_NONE,
1005 TV_TREE_SSA_THREAD_JUMPS,
1006 ( PROP_cfg | PROP_ssa ),
1010 TODO_update_ssa,
1013 const pass_data pass_data_thread_jumps_full =
1015 GIMPLE_PASS,
1016 "threadfull",
1017 OPTGROUP_NONE,
1018 TV_TREE_SSA_THREAD_JUMPS,
1019 ( PROP_cfg | PROP_ssa ),
1023 TODO_update_ssa,
1026 // Early jump threading pass optimizing for size.
1027 class pass_early_thread_jumps : public gimple_opt_pass
1029 public:
1030 pass_early_thread_jumps (gcc::context *ctxt)
1031 : gimple_opt_pass (pass_data_early_thread_jumps, ctxt)
1034 opt_pass * clone () override
1036 return new pass_early_thread_jumps (m_ctxt);
1038 void set_pass_param (unsigned int, bool param) override
1040 m_first = param;
1042 bool gate (function *) override
1044 return flag_thread_jumps;
1046 unsigned int execute (function *fun) override
1048 back_threader threader (fun, BT_NONE, m_first);
1049 return threader.thread_blocks ();
1051 private:
1052 bool m_first;
1055 // Jump threading pass without resolving of unknown SSAs.
1056 class pass_thread_jumps : public gimple_opt_pass
1058 public:
1059 pass_thread_jumps (gcc::context *ctxt)
1060 : gimple_opt_pass (pass_data_thread_jumps, ctxt)
1062 opt_pass * clone (void) override
1064 return new pass_thread_jumps (m_ctxt);
1066 void set_pass_param (unsigned int, bool param) override
1068 m_first = param;
1070 bool gate (function *) override
1072 return flag_thread_jumps && flag_expensive_optimizations;
1074 unsigned int execute (function *fun) override
1076 back_threader threader (fun, BT_SPEED, m_first);
1077 return threader.thread_blocks ();
1079 private:
1080 bool m_first;
1083 // Jump threading pass that fully resolves unknown SSAs.
1084 class pass_thread_jumps_full : public gimple_opt_pass
1086 public:
1087 pass_thread_jumps_full (gcc::context *ctxt)
1088 : gimple_opt_pass (pass_data_thread_jumps_full, ctxt)
1090 opt_pass * clone (void) override
1092 return new pass_thread_jumps_full (m_ctxt);
1094 void set_pass_param (unsigned int, bool param) override
1096 m_first = param;
1098 bool gate (function *) override
1100 return flag_thread_jumps && flag_expensive_optimizations;
1102 unsigned int execute (function *fun) override
1104 back_threader threader (fun, BT_SPEED | BT_RESOLVE, m_first);
1105 return threader.thread_blocks ();
1107 private:
1108 bool m_first;
1111 } // namespace {
1113 gimple_opt_pass *
1114 make_pass_thread_jumps (gcc::context *ctxt)
1116 return new pass_thread_jumps (ctxt);
1119 gimple_opt_pass *
1120 make_pass_thread_jumps_full (gcc::context *ctxt)
1122 return new pass_thread_jumps_full (ctxt);
1125 gimple_opt_pass *
1126 make_pass_early_thread_jumps (gcc::context *ctxt)
1128 return new pass_early_thread_jumps (ctxt);