1 /* Loop header copying on trees.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
28 #include "gimple-ssa.h"
29 #include "gimple-iterator.h"
31 #include "tree-into-ssa.h"
33 #include "tree-inline.h"
34 #include "tree-ssa-threadedge.h"
35 #include "tree-ssa-sccvn.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "value-range.h"
39 #include "gimple-range.h"
40 #include "gimple-range-path.h"
43 /* Duplicates headers of loops if they are small enough, so that the statements
44 in the loop body are always executed when the loop is entered. This
45 increases effectiveness of code motion optimizations, and reduces the need
46 for loop preconditioning. */
48 /* Given a path through edge E, whose last statement is COND, return
49 the range of the solved conditional in R. */
52 edge_range_query (irange
&r
, edge e
, gcond
*cond
, gimple_ranger
&ranger
)
54 auto_vec
<basic_block
> path (2);
55 path
.safe_push (e
->dest
);
56 path
.safe_push (e
->src
);
57 path_range_query
query (ranger
, path
);
58 if (!query
.range_of_stmt (r
, cond
))
59 r
.set_varying (boolean_type_node
);
62 /* Return true if the condition on the first iteration of the loop can
63 be statically determined. */
66 entry_loop_condition_is_static (class loop
*l
, gimple_ranger
*ranger
)
68 edge e
= loop_preheader_edge (l
);
69 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (e
->dest
));
75 extract_true_false_edges_from_block (e
->dest
, &true_e
, &false_e
);
77 /* If neither edge is the exit edge, this is not a case we'd like to
79 if (!loop_exit_edge_p (l
, true_e
) && !loop_exit_edge_p (l
, false_e
))
82 int_range
<1> desired_static_range
;
83 if (loop_exit_edge_p (l
, true_e
))
84 desired_static_range
= range_false ();
86 desired_static_range
= range_true ();
89 edge_range_query (r
, e
, last
, *ranger
);
90 return r
== desired_static_range
;
93 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
94 instructions should be duplicated, limit is decreased by the actual
98 should_duplicate_loop_header_p (basic_block header
, class loop
*loop
,
101 gimple_stmt_iterator bsi
;
103 gcc_assert (!header
->aux
);
105 gcc_assert (EDGE_COUNT (header
->succs
) > 0);
106 if (single_succ_p (header
))
108 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
110 " Not duplicating bb %i: it is single succ.\n",
115 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
)
116 && flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 1)->dest
))
118 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
120 " Not duplicating bb %i: both successors are in loop.\n",
125 /* If this is not the original loop header, we want it to have just
126 one predecessor in order to match the && pattern. */
127 if (header
!= loop
->header
&& !single_pred_p (header
))
129 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
131 " Not duplicating bb %i: it has mutiple predecestors.\n",
136 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (header
));
139 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
141 " Not duplicating bb %i: it does not end by conditional.\n",
146 for (gphi_iterator psi
= gsi_start_phis (header
); !gsi_end_p (psi
);
149 gphi
*phi
= psi
.phi ();
150 tree res
= gimple_phi_result (phi
);
151 if (INTEGRAL_TYPE_P (TREE_TYPE (res
))
152 || POINTER_TYPE_P (TREE_TYPE (res
)))
153 gimple_set_uid (phi
, 1 /* IV */);
155 gimple_set_uid (phi
, 0);
158 /* Count number of instructions and punt on calls.
159 Populate stmts INV/IV flag to later apply heuristics to the
160 kind of conditions we want to copy. */
161 for (bsi
= gsi_start_bb (header
); !gsi_end_p (bsi
); gsi_next (&bsi
))
163 gimple
*last
= gsi_stmt (bsi
);
165 if (gimple_code (last
) == GIMPLE_LABEL
)
168 if (is_gimple_debug (last
))
171 if (gimple_code (last
) == GIMPLE_CALL
172 && (!gimple_inexpensive_call_p (as_a
<gcall
*> (last
))
173 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed
174 at current loop's header. Don't copy in this case. */
175 || gimple_call_internal_p (last
, IFN_LOOP_DIST_ALIAS
)))
177 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
179 " Not duplicating bb %i: it contains call.\n",
184 *limit
-= estimate_num_insns (last
, &eni_size_weights
);
187 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
189 " Not duplicating bb %i contains too many insns.\n",
194 /* Classify the stmt based on whether its computation is based
195 on a IV or whether it is invariant in the loop. */
196 gimple_set_uid (last
, 0);
197 if (!gimple_vuse (last
))
203 FOR_EACH_SSA_TREE_OPERAND (op
, last
, i
, SSA_OP_USE
)
204 if (!SSA_NAME_IS_DEFAULT_DEF (op
)
205 && flow_bb_inside_loop_p (loop
,
206 gimple_bb (SSA_NAME_DEF_STMT (op
))))
208 if (!(gimple_uid (SSA_NAME_DEF_STMT (op
)) & 2 /* INV */))
210 if (gimple_uid (SSA_NAME_DEF_STMT (op
)) & 1 /* IV */)
213 gimple_set_uid (last
, (iv
? 1 : 0) | (inv
? 2 : 0));
217 /* If the condition tests a non-IV loop variant we do not want to rotate
218 the loop further. Unless this is the original loop header. */
219 tree lhs
= gimple_cond_lhs (last
);
220 tree rhs
= gimple_cond_rhs (last
);
221 if (header
!= loop
->header
222 && ((TREE_CODE (lhs
) == SSA_NAME
223 && !SSA_NAME_IS_DEFAULT_DEF (lhs
)
224 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (lhs
)))
225 && gimple_uid (SSA_NAME_DEF_STMT (lhs
)) == 0)
226 || (TREE_CODE (rhs
) == SSA_NAME
227 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
228 && flow_bb_inside_loop_p (loop
,
229 gimple_bb (SSA_NAME_DEF_STMT (rhs
)))
230 && gimple_uid (SSA_NAME_DEF_STMT (rhs
)) == 0)))
232 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
234 " Not duplicating bb %i: condition based on non-IV loop"
235 " variant.\n", header
->index
);
242 /* Checks whether LOOP is a do-while style loop. */
245 do_while_loop_p (class loop
*loop
)
247 gimple
*stmt
= last_nondebug_stmt (loop
->latch
);
249 /* If the latch of the loop is not empty, it is not a do-while loop. */
251 && gimple_code (stmt
) != GIMPLE_LABEL
)
253 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
255 "Loop %i is not do-while loop: latch is not empty.\n",
260 /* If the latch does not have a single predecessor, it is not a
262 if (!single_pred_p (loop
->latch
))
264 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
266 "Loop %i is not do-while loop: latch has multiple "
267 "predecessors.\n", loop
->num
);
271 /* If the latch predecessor doesn't exit the loop, it is not a
273 if (!loop_exits_from_bb_p (loop
, single_pred (loop
->latch
)))
275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
277 "Loop %i is not do-while loop: latch predecessor "
278 "does not exit loop.\n", loop
->num
);
282 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
283 fprintf (dump_file
, "Loop %i is do-while loop\n", loop
->num
);
290 /* Common superclass for both header-copying phases. */
291 class ch_base
: public gimple_opt_pass
294 ch_base (pass_data data
, gcc::context
*ctxt
)
295 : gimple_opt_pass (data
, ctxt
)
298 /* Copies headers of all loops in FUN for which process_loop_p is true. */
299 unsigned int copy_headers (function
*fun
);
301 /* Return true to copy headers of LOOP or false to skip. */
302 virtual bool process_loop_p (class loop
*loop
) = 0;
305 const pass_data pass_data_ch
=
307 GIMPLE_PASS
, /* type */
309 OPTGROUP_LOOP
, /* optinfo_flags */
310 TV_TREE_CH
, /* tv_id */
311 ( PROP_cfg
| PROP_ssa
), /* properties_required */
312 0, /* properties_provided */
313 0, /* properties_destroyed */
314 0, /* todo_flags_start */
315 0, /* todo_flags_finish */
318 class pass_ch
: public ch_base
321 pass_ch (gcc::context
*ctxt
)
322 : ch_base (pass_data_ch
, ctxt
)
325 /* opt_pass methods: */
326 bool gate (function
*) final override
{ return flag_tree_ch
!= 0; }
328 /* Initialize and finalize loop structures, copying headers inbetween. */
329 unsigned int execute (function
*) final override
;
331 opt_pass
* clone () final override
{ return new pass_ch (m_ctxt
); }
334 /* ch_base method: */
335 bool process_loop_p (class loop
*loop
) final override
;
338 const pass_data pass_data_ch_vect
=
340 GIMPLE_PASS
, /* type */
341 "ch_vect", /* name */
342 OPTGROUP_LOOP
, /* optinfo_flags */
343 TV_TREE_CH
, /* tv_id */
344 ( PROP_cfg
| PROP_ssa
), /* properties_required */
345 0, /* properties_provided */
346 0, /* properties_destroyed */
347 0, /* todo_flags_start */
348 0, /* todo_flags_finish */
351 /* This is a more aggressive version of the same pass, designed to run just
352 before if-conversion and vectorization, to put more loops into the form
353 required for those phases. */
354 class pass_ch_vect
: public ch_base
357 pass_ch_vect (gcc::context
*ctxt
)
358 : ch_base (pass_data_ch_vect
, ctxt
)
361 /* opt_pass methods: */
362 bool gate (function
*fun
) final override
364 return flag_tree_ch
!= 0
365 && (flag_tree_loop_vectorize
!= 0 || fun
->has_force_vectorize_loops
);
368 /* Just copy headers, no initialization/finalization of loop structures. */
369 unsigned int execute (function
*) final override
;
372 /* ch_base method: */
373 bool process_loop_p (class loop
*loop
) final override
;
374 }; // class pass_ch_vect
376 /* For all loops, copy the condition at the end of the loop body in front
377 of the loop. This is beneficial since it increases efficiency of
378 code motion optimizations. It also saves one jump on entry to the loop. */
381 ch_base::copy_headers (function
*fun
)
384 edge exit
, nonexit
, entry
;
385 basic_block
*bbs
, *copied_bbs
;
388 bool changed
= false;
390 if (number_of_loops (fun
) <= 1)
393 bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (fun
));
394 copied_bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (fun
));
395 bbs_size
= n_basic_blocks_for_fn (fun
);
397 auto_vec
<loop_p
> candidates
;
398 auto_vec
<std::pair
<edge
, loop_p
> > copied
;
399 auto_vec
<class loop
*> loops_to_unloop
;
400 auto_vec
<int> loops_to_unloop_nunroll
;
402 mark_dfs_back_edges ();
403 gimple_ranger
*ranger
= new gimple_ranger
;
404 for (auto loop
: loops_list (cfun
, 0))
406 int initial_limit
= param_max_loop_header_insns
;
407 int remaining_limit
= initial_limit
;
408 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
410 "Analyzing loop %i\n", loop
->num
);
412 header
= loop
->header
;
413 if (!get_max_loop_iterations_int (loop
))
415 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
416 fprintf (dump_file
, "Loop %d never loops.\n", loop
->num
);
417 loops_to_unloop
.safe_push (loop
);
418 loops_to_unloop_nunroll
.safe_push (0);
422 /* If the loop is already a do-while style one (either because it was
423 written as such, or because jump threading transformed it into one),
424 we might be in fact peeling the first iteration of the loop. This
425 in general is not a good idea. Also avoid touching infinite loops. */
426 if (!loop_has_exit_edges (loop
)
427 || !process_loop_p (loop
))
430 /* Avoid loop header copying when optimizing for size unless we can
431 determine that the loop condition is static in the first
433 if (optimize_loop_for_size_p (loop
)
434 && !loop
->force_vectorize
435 && !entry_loop_condition_is_static (loop
, ranger
))
437 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
439 " Not duplicating bb %i: optimizing for size.\n",
444 if (should_duplicate_loop_header_p (header
, loop
, &remaining_limit
))
445 candidates
.safe_push (loop
);
447 /* Do not use ranger after we change the IL and not have updated SSA. */
450 for (auto loop
: candidates
)
452 int initial_limit
= param_max_loop_header_insns
;
453 int remaining_limit
= initial_limit
;
454 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
456 "Copying headers of loop %i\n", loop
->num
);
458 header
= loop
->header
;
460 /* Iterate the header copying up to limit; this takes care of the cases
461 like while (a && b) {...}, where we want to have both of the conditions
462 copied. TODO -- handle while (a || b) - like cases, by not requiring
463 the header to have just a single successor and copying up to
469 profile_count exit_count
= profile_count::zero ();
470 profile_count entry_count
= profile_count::zero ();
473 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
474 if (e
->src
!= loop
->latch
)
475 entry_count
+= e
->count ();
476 while (should_duplicate_loop_header_p (header
, loop
, &remaining_limit
))
478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
479 fprintf (dump_file
, " Will duplicate bb %i\n", header
->index
);
481 /* Find a successor of header that is inside a loop; i.e. the new
482 header after the condition is copied. */
483 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
))
485 nonexit
= EDGE_SUCC (header
, 0);
486 exit
= EDGE_SUCC (header
, 1);
490 nonexit
= EDGE_SUCC (header
, 1);
491 exit
= EDGE_SUCC (header
, 0);
493 exit_count
+= exit
->count ();
495 bbs
[n_bbs
++] = header
;
496 gcc_assert (bbs_size
> n_bbs
);
497 header
= nonexit
->dest
;
503 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
505 "Duplicating header of the loop %d up to edge %d->%d,"
507 loop
->num
, exit
->src
->index
, exit
->dest
->index
,
508 initial_limit
- remaining_limit
);
510 /* Ensure that the header will have just the latch as a predecessor
512 if (!single_pred_p (nonexit
->dest
))
514 header
= split_edge (nonexit
);
515 exit
= single_pred_edge (header
);
518 entry
= loop_preheader_edge (loop
);
520 propagate_threaded_block_debug_into (exit
->dest
, entry
->dest
);
521 if (!gimple_duplicate_sese_region (entry
, exit
, bbs
, n_bbs
, copied_bbs
,
524 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
525 fprintf (dump_file
, "Duplication failed.\n");
528 copied
.safe_push (std::make_pair (entry
, loop
));
530 /* If the loop has the form "for (i = j; i < j + 10; i++)" then
531 this copying can introduce a case where we rely on undefined
532 signed overflow to eliminate the preheader condition, because
533 we assume that "j < j + 10" is true. We don't want to warn
534 about that case for -Wstrict-overflow, because in general we
535 don't warn about overflow involving loops. Prevent the
536 warning by setting the no_warning flag in the condition. */
537 if (warn_strict_overflow
> 0)
541 for (i
= 0; i
< n_bbs
; ++i
)
543 gimple_stmt_iterator bsi
;
545 for (bsi
= gsi_start_bb (copied_bbs
[i
]);
549 gimple
*stmt
= gsi_stmt (bsi
);
550 if (gimple_code (stmt
) == GIMPLE_COND
)
552 tree lhs
= gimple_cond_lhs (stmt
);
553 if (gimple_cond_code (stmt
) != EQ_EXPR
554 && gimple_cond_code (stmt
) != NE_EXPR
555 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
556 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
557 suppress_warning (stmt
, OPT_Wstrict_overflow_
);
559 else if (is_gimple_assign (stmt
))
561 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
562 tree rhs1
= gimple_assign_rhs1 (stmt
);
563 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
564 && rhs_code
!= EQ_EXPR
565 && rhs_code
!= NE_EXPR
566 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
567 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1
)))
568 suppress_warning (stmt
, OPT_Wstrict_overflow_
);
574 /* Update header of the loop. */
575 loop
->header
= header
;
576 /* Find correct latch. We only duplicate chain of conditionals so
577 there should be precisely two edges to the new header. One entry
578 edge and one to latch. */
579 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
580 if (header
!= e
->src
)
582 loop
->latch
= e
->src
;
585 /* Ensure that the latch is simple. */
586 if (!single_succ_p (loop_latch_edge (loop
)->src
))
587 split_edge (loop_latch_edge (loop
));
589 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
591 if (do_while_loop_p (loop
))
592 fprintf (dump_file
, "Loop %d is now do-while loop.\n", loop
->num
);
594 fprintf (dump_file
, "Loop %d is still not do-while loop.\n",
596 fprintf (dump_file
, "Exit count: ");
597 exit_count
.dump (dump_file
);
598 fprintf (dump_file
, "\nEntry count: ");
599 entry_count
.dump (dump_file
);
600 fprintf (dump_file
, "\n");
603 /* We possibly decreased number of itrations by 1. */
604 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
605 bool precise
= (nexits
== (int) exits
.length ());
606 /* Check that loop may not terminate in other way than via
607 basic blocks we duplicated. */
610 basic_block
*bbs
= get_loop_body (loop
);
611 for (unsigned i
= 0; i
< loop
->num_nodes
&& precise
; ++i
)
613 basic_block bb
= bbs
[i
];
614 bool found_exit
= false;
615 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
616 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
621 /* If BB has exit, it was duplicated. */
624 /* Give up on irreducible loops. */
625 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
630 /* Check that inner loops are finite. */
631 for (class loop
*l
= bb
->loop_father
; l
!= loop
&& precise
;
638 /* Verify that there is no statement that may be terminate
639 execution in a way not visible to CFG. */
640 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
);
641 !gsi_end_p (bsi
); gsi_next (&bsi
))
642 if (stmt_can_terminate_bb_p (gsi_stmt (bsi
)))
648 && get_max_loop_iterations_int (loop
) == 1)
650 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
651 fprintf (dump_file
, "Loop %d no longer loops.\n", loop
->num
);
652 loops_to_unloop
.safe_push (loop
);
653 loops_to_unloop_nunroll
.safe_push (0);
657 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
660 " decreased number of iterations of loop %d by 1.\n",
662 adjust_loop_info_after_peeling (loop
, 1, true);
664 else if (exit_count
>= entry_count
.apply_scale (9, 10))
666 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
668 "Peeled likely exits: likely decreased number "
669 "of iterations of loop %d by 1.\n", loop
->num
);
670 adjust_loop_info_after_peeling (loop
, 1, false);
672 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
674 "Not decreased number"
675 " of iterations of loop %d; likely exits remains.\n",
683 update_ssa (TODO_update_ssa
);
684 /* After updating SSA form perform CSE on the loop header
685 copies. This is esp. required for the pass before
686 vectorization since nothing cleans up copied exit tests
687 that can now be simplified. CSE from the entry of the
688 region we copied till all loop exit blocks but not
689 entering the loop itself. */
690 for (unsigned i
= 0; i
< copied
.length (); ++i
)
692 edge entry
= copied
[i
].first
;
693 loop_p loop
= copied
[i
].second
;
694 auto_vec
<edge
> exit_edges
= get_loop_exit_edges (loop
);
695 bitmap exit_bbs
= BITMAP_ALLOC (NULL
);
696 for (unsigned j
= 0; j
< exit_edges
.length (); ++j
)
697 bitmap_set_bit (exit_bbs
, exit_edges
[j
]->dest
->index
);
698 bitmap_set_bit (exit_bbs
, loop
->header
->index
);
699 do_rpo_vn (cfun
, entry
, exit_bbs
);
700 BITMAP_FREE (exit_bbs
);
703 if (!loops_to_unloop
.is_empty ())
705 bool irred_invalidated
;
706 unloop_loops (loops_to_unloop
, loops_to_unloop_nunroll
, NULL
, &irred_invalidated
);
712 return changed
? TODO_cleanup_cfg
: 0;
715 /* Initialize the loop structures we need, and finalize after. */
718 pass_ch::execute (function
*fun
)
720 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
721 | LOOPS_HAVE_SIMPLE_LATCHES
722 | LOOPS_HAVE_RECORDED_EXITS
);
724 unsigned int res
= copy_headers (fun
);
726 loop_optimizer_finalize ();
730 /* Assume an earlier phase has already initialized all the loop structures that
731 we need here (and perhaps others too), and that these will be finalized by
735 pass_ch_vect::execute (function
*fun
)
737 return copy_headers (fun
);
740 /* Apply header copying according to a very simple test of do-while shape. */
743 pass_ch::process_loop_p (class loop
*loop
)
745 return !do_while_loop_p (loop
);
748 /* Apply header-copying to loops where we might enable vectorization. */
751 pass_ch_vect::process_loop_p (class loop
*loop
)
753 if (!flag_tree_loop_vectorize
&& !loop
->force_vectorize
)
756 if (loop
->dont_vectorize
)
759 /* The vectorizer won't handle anything with multiple exits, so skip. */
760 edge exit
= single_exit (loop
);
764 if (!do_while_loop_p (loop
))
773 make_pass_ch_vect (gcc::context
*ctxt
)
775 return new pass_ch_vect (ctxt
);
779 make_pass_ch (gcc::context
*ctxt
)
781 return new pass_ch (ctxt
);