1 /* Loop header copying on trees.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
28 #include "gimple-ssa.h"
29 #include "gimple-iterator.h"
31 #include "tree-into-ssa.h"
33 #include "tree-inline.h"
34 #include "tree-ssa-threadedge.h"
35 #include "tree-ssa-sccvn.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "value-range.h"
39 #include "gimple-range.h"
40 #include "gimple-range-path.h"
41 #include "gimple-pretty-print.h"
43 #include "tree-ssa-loop-manip.h"
45 /* Return path query insteance for testing ranges of statements
46 in headers of LOOP contained in basic block BB.
47 Use RANGER instance. */
49 static path_range_query
*
50 get_range_query (class loop
*loop
,
52 gimple_ranger
&ranger
)
54 auto_vec
<basic_block
, 8> path
;
55 for (; bb
!= loop
->header
; bb
= single_pred_edge (bb
)->src
)
57 path
.safe_push (loop
->header
);
58 path
.safe_push (loop_preheader_edge (loop
)->src
);
59 return new path_range_query (ranger
, path
);
62 /* Return edge that is true in the first iteration of the loop
64 Formulate corrent ranger query to RANGER. */
67 static_loop_exit (class loop
*l
, basic_block bb
, gimple_ranger
&ranger
,
68 path_range_query
*&query
)
70 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb
));
77 extract_true_false_edges_from_block (bb
, &true_e
, &false_e
);
79 /* If neither edge is the exit edge, this is not a case we'd like to
81 if (!loop_exit_edge_p (l
, true_e
) && !loop_exit_edge_p (l
, false_e
))
84 int_range
<1> desired_static_range
;
85 if (loop_exit_edge_p (l
, true_e
))
87 desired_static_range
= range_false ();
92 desired_static_range
= range_true ();
97 query
= get_range_query (l
, gimple_bb (last
), ranger
);
100 if (!query
->range_of_stmt (r
, last
))
102 return r
== desired_static_range
? ret_e
: NULL
;
105 /* Return true if STMT is static in LOOP. This means that its value
106 is constant in the first iteration.
107 Use RANGER and formulate query cached in QUERY. */
110 loop_static_stmt_p (class loop
*loop
,
111 gimple_ranger
&ranger
,
112 path_range_query
*&query
,
115 tree type
= gimple_range_type (stmt
);
116 if (!type
|| !Value_Range::supports_type_p (type
))
120 query
= get_range_query (loop
, gimple_bb (stmt
), ranger
);
122 Value_Range
r (gimple_range_type (stmt
));
123 if (!query
->range_of_stmt (r
, stmt
))
125 return r
.singleton_p ();
128 /* Return true if OP is invariant. */
131 loop_invariant_op_p (class loop
*loop
,
134 if (is_gimple_min_invariant (op
))
136 if (SSA_NAME_IS_DEFAULT_DEF (op
)
137 || !flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (op
))))
139 return gimple_uid (SSA_NAME_DEF_STMT (op
)) & 1;
142 /* Return true if OP combines outcome of static and
143 loop invariant conditional. */
146 loop_static_op_p (class loop
*loop
, tree op
)
148 /* Always check for invariant first. */
149 gcc_checking_assert (!is_gimple_min_invariant (op
)
150 && !SSA_NAME_IS_DEFAULT_DEF (op
)
151 && flow_bb_inside_loop_p (loop
,
152 gimple_bb (SSA_NAME_DEF_STMT (op
))));
153 return gimple_uid (SSA_NAME_DEF_STMT (op
)) & 2;
157 /* Return true if OP combines outcome of static and
158 loop invariant conditional. */
161 loop_combined_static_and_iv_p (class loop
*loop
,
164 /* Always check for invariant first. */
165 gcc_checking_assert (!is_gimple_min_invariant (op
)
166 && !SSA_NAME_IS_DEFAULT_DEF (op
)
167 && flow_bb_inside_loop_p (loop
,
168 gimple_bb (SSA_NAME_DEF_STMT (op
))));
169 return gimple_uid (SSA_NAME_DEF_STMT (op
)) & 4;
172 /* Decision about posibility of copying a given header. */
176 /* We do not want to copy this header. */
178 /* We can copy it if it enables wins. */
180 /* We can "copy" it if it enables wins and doing
181 so will introduce no new code. */
182 ch_possible_zero_cost
,
183 /* We want to copy. */
185 /* We want to copy and we will eliminate loop exit. */
186 ch_win_invariant_exit
189 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
190 instructions should be duplicated, limit is decreased by the actual
194 should_duplicate_loop_header_p (basic_block header
, class loop
*loop
,
195 gimple_ranger
*ranger
,
197 hash_set
<edge
> *invariant_exits
,
198 hash_set
<edge
> *static_exits
)
200 gimple_stmt_iterator bsi
;
202 gcc_assert (!header
->aux
);
204 gcc_assert (EDGE_COUNT (header
->succs
) > 0);
205 if (single_succ_p (header
))
207 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
209 " Not duplicating bb %i: it is single succ.\n",
211 return ch_impossible
;
214 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
)
215 && flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 1)->dest
))
217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
219 " Not duplicating bb %i: both successors are in loop.\n",
221 return ch_impossible
;
224 /* If this is not the original loop header, we want it to have just
225 one predecessor in order to match the && pattern. */
226 if (header
!= loop
->header
&& !single_pred_p (header
))
228 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
230 " Not duplicating bb %i: it has mutiple predecestors.\n",
232 return ch_impossible
;
235 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (header
));
238 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
240 " Not duplicating bb %i: it does not end by conditional.\n",
242 return ch_impossible
;
245 path_range_query
*query
= NULL
;
246 for (gphi_iterator psi
= gsi_start_phis (header
); !gsi_end_p (psi
);
248 if (!virtual_operand_p (gimple_phi_result (psi
.phi ())))
250 bool static_p
= loop_static_stmt_p (loop
, *ranger
,
252 gimple_set_uid (psi
.phi (), static_p
? 2 : 0);
254 bool code_size_cost
= false;
256 /* Count number of instructions and punt on calls.
257 Populate stmts INV flag to later apply heuristics to the
258 kind of conditions we want to copy. */
259 for (bsi
= gsi_start_bb (header
); !gsi_end_p (bsi
); gsi_next (&bsi
))
261 gimple
*last
= gsi_stmt (bsi
);
263 if (gimple_code (last
) == GIMPLE_LABEL
)
266 if (is_gimple_debug (last
))
269 if (gimple_code (last
) == GIMPLE_COND
)
272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
274 fprintf (dump_file
, " Analyzing: ");
275 print_gimple_stmt (dump_file
, last
, 0, TDF_SLIM
);
278 if (gimple_code (last
) == GIMPLE_CALL
279 && (!gimple_inexpensive_call_p (as_a
<gcall
*> (last
))
280 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed
281 at current loop's header. Don't copy in this case. */
282 || gimple_call_internal_p (last
, IFN_LOOP_DIST_ALIAS
)))
284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
286 " Not duplicating bb %i: it contains call.\n",
290 return ch_impossible
;
293 /* Classify the stmt is invariant in the loop. */
294 gimple_set_uid (last
, 0);
295 if (!gimple_vuse (last
)
296 && gimple_code (last
) != GIMPLE_ASM
297 && (gimple_code (last
) != GIMPLE_CALL
298 || gimple_call_flags (last
) & ECF_CONST
))
300 bool inv
= true, static_p
= false;
303 FOR_EACH_SSA_TREE_OPERAND (op
, last
, i
, SSA_OP_USE
)
304 if (!loop_invariant_op_p (loop
, op
))
306 /* Assume that code is reasonably optimized and invariant
307 constants are already identified. */
309 static_p
= loop_static_stmt_p (loop
, *ranger
, query
, last
);
310 gimple_set_uid (last
, (inv
? 1 : 0) | (static_p
? 2 : 0));
311 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
314 fprintf (dump_file
, " Stmt is loop invariant\n");
316 fprintf (dump_file
, " Stmt is static "
317 "(constant in the first iteration)\n");
319 /* Loop invariants will be optimized out in loop body after
320 duplication; do not account invariant computation in code
323 Similarly static computations will be optimized out in the
328 /* Match the following:
329 _1 = i_1 < 10 <- static condtion
330 _2 = n != 0 <- loop invariant condition
331 _3 = _1 & _2 <- combined static and iv statement. */
333 if (gimple_code (last
) == GIMPLE_ASSIGN
334 && ((code
= gimple_assign_rhs_code (last
)) == BIT_AND_EXPR
335 || code
== BIT_IOR_EXPR
|| code
== BIT_XOR_EXPR
))
337 tree op1
= gimple_assign_rhs1 (last
);
338 tree op2
= gimple_assign_rhs2 (last
);
340 if ((loop_invariant_op_p (loop
, op1
)
341 || loop_combined_static_and_iv_p (loop
, op1
)
342 || loop_static_op_p (loop
, op1
))
343 && (loop_invariant_op_p (loop
, op2
)
344 || loop_combined_static_and_iv_p (loop
, op2
)
345 || loop_static_op_p (loop
, op2
)))
347 /* Duplicating loop header with combned conditional will
348 remove this statement in each copy. But we account for
349 that later when seeing that condition.
351 Note that this may be overly optimistic for bit operations
352 where the static parameter may still result in non-trivial
354 gimple_set_uid (last
, 4);
355 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
357 " Stmt combines static and invariant op\n");
363 int insns
= estimate_num_insns (last
, &eni_size_weights
);
366 code_size_cost
= true;
367 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
369 " Acconting stmt as %i insns\n", insns
);
372 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
374 " Not duplicating bb %i contains too many insns.\n",
378 return ch_impossible
;
382 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
384 fprintf (dump_file
, " Analyzing: ");
385 print_gimple_stmt (dump_file
, last
, 0, TDF_SLIM
);
388 /* If the condition tests a non-IV loop variant we do not want to rotate
389 the loop further. Unless this is the original loop header. */
390 tree lhs
= gimple_cond_lhs (last
);
391 tree rhs
= gimple_cond_rhs (last
);
392 bool lhs_invariant
= loop_invariant_op_p (loop
, lhs
);
393 bool rhs_invariant
= loop_invariant_op_p (loop
, rhs
);
395 /* Combined conditional is a result of if combining:
397 _1 = i_1 < 10 <- static condtion
398 _2 = n != 0 <- loop invariant condition
399 _3 = _1 & _2 <- combined static and iv statement
400 if (_3 != 0) <- combined conditional
402 Combined conditionals will not be optimized out in either copy.
403 However duplicaed header simplifies to:
411 So effectively the resulting code sequence will be of same length as
414 Combined conditionals are never static or invariant, so save some work
416 if (lhs_invariant
!= rhs_invariant
418 || loop_combined_static_and_iv_p (loop
, lhs
))
420 || loop_combined_static_and_iv_p (loop
, rhs
)))
424 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
426 " Conditional combines static and invariant op.\n");
430 edge static_exit
= static_loop_exit (loop
, header
, *ranger
, query
);
434 if (static_exit
&& static_exits
)
436 static_exits
->add (static_exit
);
437 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
439 " Will eliminate peeled conditional in bb %d.\n",
440 static_exit
->src
->index
);
441 /* Still look for invariant exits; exit may be both. */
443 if (lhs_invariant
&& rhs_invariant
)
449 FOR_EACH_EDGE (e
, ei
, header
->succs
)
450 if (loop_exit_edge_p (loop
, e
))
452 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
454 " Will elliminate invariant exit %i->%i\n",
455 e
->src
->index
, e
->dest
->index
);
456 invariant_exits
->add (e
);
459 return ch_win_invariant_exit
;
462 /* If the static exit fully optimize out, it is win to "duplicate"
465 TODO: Even if duplication costs some size we may opt to do so in case
466 exit probability is significant enough (do partial peeling). */
468 return !code_size_cost
? ch_possible_zero_cost
: ch_possible
;
470 /* We was not able to prove that conditional will be eliminated. */
471 int insns
= estimate_num_insns (last
, &eni_size_weights
);
473 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
475 " Acconting stmt as %i insns\n", insns
);
478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
480 " Not duplicating bb %i contains too many insns.\n",
482 return ch_impossible
;
488 /* Checks whether LOOP is a do-while style loop. */
491 do_while_loop_p (class loop
*loop
)
493 gimple
*stmt
= last_nondebug_stmt (loop
->latch
);
495 /* If the latch of the loop is not empty, it is not a do-while loop. */
497 && gimple_code (stmt
) != GIMPLE_LABEL
)
499 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
501 "Loop %i is not do-while loop: latch is not empty.\n",
506 /* If the latch does not have a single predecessor, it is not a
508 if (!single_pred_p (loop
->latch
))
510 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
512 "Loop %i is not do-while loop: latch has multiple "
513 "predecessors.\n", loop
->num
);
516 basic_block pred
= single_pred (loop
->latch
);
518 /* If the latch predecessor doesn't exit the loop, it is not a
520 if (!loop_exits_from_bb_p (loop
, pred
))
522 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
524 "Loop %i is not do-while loop: latch predecessor "
525 "does not exit loop.\n", loop
->num
);
528 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (pred
));
530 && (gimple_cond_lhs (last
) == boolean_false_node
531 || gimple_cond_lhs (last
) == boolean_true_node
)
532 && gimple_cond_rhs (last
) == boolean_false_node
)
534 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
536 "Loop %i is not do-while loop: latch predecessor "
537 "contains exit we optimized out.\n", loop
->num
);
541 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
542 fprintf (dump_file
, "Loop %i is do-while loop\n", loop
->num
);
547 /* Update profile after header copying of LOOP.
548 REGION is the original (in loop) sequence, REGION_COPY is the
549 duplicated header (now outside of loop). N_REGION is number of
551 ELIMINATED_EDGE is edge to be removed from duplicated sequence.
552 INVARIANT_EXITS are edges in the loop body to be elimianted
553 since they are loop invariants
555 So We expect the following:
557 // region_copy_start entry will be scaled to entry_count
558 if (cond1) <- this condition will become false
559 and we update probabilities
561 if (cond2) <- this condition is loop invariant
563 goto loop_header <- this will be redirected to loop.
569 if (cond1) <- we need to update probabbility here
571 if (cond2) <- and determine scaling factor here.
572 moreover cond2 is now always true
578 Adding support for more exits can be done similarly,
579 but only consumer so far is tree-ssa-loop-ch and it uses only this
580 to handle the common case of peeling headers which have
581 conditionals known to be always true upon entry. */
584 update_profile_after_ch (class loop
*loop
,
585 basic_block
*region
, basic_block
*region_copy
,
587 hash_set
<edge
> *invariant_exits
,
588 hash_set
<edge
> *static_exits
,
589 profile_count entry_count
)
591 for (unsigned int i
= 0; i
< n_region
; i
++)
593 edge exit_e
, exit_e_copy
, e
, e_copy
;
594 if (EDGE_COUNT (region
[i
]->succs
) == 1)
596 region_copy
[i
]->count
= entry_count
;
597 region
[i
]->count
-= entry_count
;
601 gcc_checking_assert (EDGE_COUNT (region
[i
]->succs
) == 2);
602 if (loop_exit_edge_p (loop
,
603 EDGE_SUCC (region
[i
], 0)))
605 exit_e
= EDGE_SUCC (region
[i
], 0);
606 exit_e_copy
= EDGE_SUCC (region_copy
[i
], 0);
607 e
= EDGE_SUCC (region
[i
], 1);
608 e_copy
= EDGE_SUCC (region_copy
[i
], 1);
612 exit_e
= EDGE_SUCC (region
[i
], 1);
613 exit_e_copy
= EDGE_SUCC (region_copy
[i
], 1);
614 e
= EDGE_SUCC (region
[i
], 0);
615 e_copy
= EDGE_SUCC (region_copy
[i
], 0);
617 gcc_assert (i
== n_region
- 1
618 || (e
->dest
== region
[i
+ 1]
619 && e_copy
->dest
== region_copy
[i
+ 1]));
620 region_copy
[i
]->count
= entry_count
;
621 profile_count exit_e_count
= exit_e
->count ();
622 bool was_static
= false;
623 if (static_exits
->contains (exit_e
))
625 /* Update profile and the conditional.
626 CFG update is done by caller. */
627 static_exits
->remove (exit_e
);
629 e_copy
->probability
= profile_probability::always ();
630 exit_e_copy
->probability
= profile_probability::never ();
632 = as_a
<gcond
*>(*gsi_last_bb (region_copy
[i
]));
633 if (e_copy
->flags
& EDGE_TRUE_VALUE
)
634 gimple_cond_make_true (cond_stmt
);
636 gimple_cond_make_false (cond_stmt
);
637 update_stmt (cond_stmt
);
638 /* Header copying is a special case of jump threading, so use
639 common code to update loop body exit condition. */
640 update_bb_profile_for_threading (region
[i
], entry_count
, e
);
643 region
[i
]->count
-= region_copy
[i
]->count
;
644 if (invariant_exits
->contains (exit_e
))
646 invariant_exits
->remove (exit_e
);
647 /* All exits will happen in exit_e_copy which is out of the
648 loop, so increase probability accordingly.
649 If the edge is eliminated_edge we already corrected
651 if (entry_count
.nonzero_p () && !was_static
)
652 set_edge_probability_and_rescale_others
653 (exit_e_copy
, exit_e_count
.probability_in
655 /* Eliminate in-loop conditional. */
656 e
->probability
= profile_probability::always ();
657 exit_e
->probability
= profile_probability::never ();
658 gcond
*cond_stmt
= as_a
<gcond
*>(*gsi_last_bb (region
[i
]));
659 if (e
->flags
& EDGE_TRUE_VALUE
)
660 gimple_cond_make_true (cond_stmt
);
662 gimple_cond_make_false (cond_stmt
);
663 update_stmt (cond_stmt
);
665 entry_count
= e_copy
->count ();
667 /* Be sure that we seen all invariant exit edges we are supposed to update.
668 We may have recorded some static exists we decided to not duplicate. */
669 gcc_checking_assert (invariant_exits
->is_empty ());
674 /* Common superclass for both header-copying phases. */
675 class ch_base
: public gimple_opt_pass
678 ch_base (pass_data data
, gcc::context
*ctxt
)
679 : gimple_opt_pass (data
, ctxt
)
682 /* Copies headers of all loops in FUN for which process_loop_p is true. */
683 unsigned int copy_headers (function
*fun
);
685 /* Return true to copy headers of LOOP or false to skip. */
686 virtual bool process_loop_p (class loop
*loop
) = 0;
689 const pass_data pass_data_ch
=
691 GIMPLE_PASS
, /* type */
693 OPTGROUP_LOOP
, /* optinfo_flags */
694 TV_TREE_CH
, /* tv_id */
695 ( PROP_cfg
| PROP_ssa
), /* properties_required */
696 0, /* properties_provided */
697 0, /* properties_destroyed */
698 0, /* todo_flags_start */
699 0, /* todo_flags_finish */
702 class pass_ch
: public ch_base
705 pass_ch (gcc::context
*ctxt
)
706 : ch_base (pass_data_ch
, ctxt
)
709 /* opt_pass methods: */
710 bool gate (function
*) final override
{ return flag_tree_ch
!= 0; }
712 /* Initialize and finalize loop structures, copying headers inbetween. */
713 unsigned int execute (function
*) final override
;
715 opt_pass
* clone () final override
{ return new pass_ch (m_ctxt
); }
718 /* ch_base method: */
719 bool process_loop_p (class loop
*loop
) final override
;
722 const pass_data pass_data_ch_vect
=
724 GIMPLE_PASS
, /* type */
725 "ch_vect", /* name */
726 OPTGROUP_LOOP
, /* optinfo_flags */
727 TV_TREE_CH
, /* tv_id */
728 ( PROP_cfg
| PROP_ssa
), /* properties_required */
729 0, /* properties_provided */
730 0, /* properties_destroyed */
731 0, /* todo_flags_start */
732 0, /* todo_flags_finish */
735 /* This is a more aggressive version of the same pass, designed to run just
736 before if-conversion and vectorization, to put more loops into the form
737 required for those phases. */
738 class pass_ch_vect
: public ch_base
741 pass_ch_vect (gcc::context
*ctxt
)
742 : ch_base (pass_data_ch_vect
, ctxt
)
745 /* opt_pass methods: */
746 bool gate (function
*fun
) final override
748 return flag_tree_ch
!= 0
749 && (flag_tree_loop_vectorize
!= 0 || fun
->has_force_vectorize_loops
);
752 /* Just copy headers, no initialization/finalization of loop structures. */
753 unsigned int execute (function
*) final override
;
756 /* ch_base method: */
757 bool process_loop_p (class loop
*loop
) final override
;
758 }; // class pass_ch_vect
760 /* For all loops, copy the condition at the end of the loop body in front
761 of the loop. This is beneficial since it increases efficiency of
762 code motion optimizations. It also saves one jump on entry to the loop. */
765 ch_base::copy_headers (function
*fun
)
767 basic_block
*bbs
, *copied_bbs
;
769 bool changed
= false;
771 if (number_of_loops (fun
) <= 1)
774 bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (fun
));
775 copied_bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (fun
));
776 bbs_size
= n_basic_blocks_for_fn (fun
);
781 unsigned int nheaders
;
782 hash_set
<edge
> *invariant_exits
, *static_exits
;
784 auto_vec
<struct candidate
> candidates
;
785 auto_vec
<std::pair
<edge
, loop_p
> > copied
;
786 auto_vec
<class loop
*> loops_to_unloop
;
787 auto_vec
<int> loops_to_unloop_nunroll
;
789 mark_dfs_back_edges ();
790 gimple_ranger
*ranger
= new gimple_ranger
;
791 for (auto loop
: loops_list (cfun
, 0))
793 int initial_limit
= optimize_loop_for_speed_p (loop
)
794 ? param_max_loop_header_insns
: 0;
795 int remaining_limit
= initial_limit
;
796 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
798 "Analyzing loop %i\n", loop
->num
);
800 basic_block header
= loop
->header
;
801 if (!get_max_loop_iterations_int (loop
))
803 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
804 fprintf (dump_file
, "Loop %d never loops.\n", loop
->num
);
805 scale_loop_profile (loop
, profile_probability::always (), 0);
806 loops_to_unloop
.safe_push (loop
);
807 loops_to_unloop_nunroll
.safe_push (0);
811 /* If the loop is already a do-while style one (either because it was
812 written as such, or because jump threading transformed it into one),
813 we might be in fact peeling the first iteration of the loop. This
814 in general is not a good idea. Also avoid touching infinite loops. */
815 if (!loop_has_exit_edges (loop
)
816 || !process_loop_p (loop
))
819 /* Iterate the header copying up to limit; this takes care of the cases
820 like while (a && b) {...}, where we want to have both of the conditions
821 copied. TODO -- handle while (a || b) - like cases, by not requiring
822 the header to have just a single successor and copying up to
825 int last_win_nheaders
= 0;
826 bool last_win_invariant_exit
= false;
828 auto_vec
<ch_decision
, 32> decision
;
829 hash_set
<edge
> *invariant_exits
= new hash_set
<edge
>;
830 hash_set
<edge
> *static_exits
= new hash_set
<edge
>;
831 while ((ret
= should_duplicate_loop_header_p (header
, loop
, ranger
,
838 decision
.safe_push (ret
);
841 last_win_nheaders
= nheaders
;
842 last_win_invariant_exit
= (ret
== ch_win_invariant_exit
);
843 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
844 fprintf (dump_file
, " Duplicating bb %i is a win\n",
848 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
849 fprintf (dump_file
, " May duplicate bb %i\n", header
->index
);
851 /* Find a successor of header that is inside a loop; i.e. the new
852 header after the condition is copied. */
853 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
))
854 header
= EDGE_SUCC (header
, 0)->dest
;
856 header
= EDGE_SUCC (header
, 1)->dest
;
859 /* Try to turn loop into do-while. This means ensuring that
860 last duplicated header will not have loop invariant exit. */
861 if (last_win_nheaders
&& last_win_invariant_exit
862 && nheaders
> last_win_nheaders
)
865 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
867 " Duplicating additional BB to obtain do-while loop\n");
869 else if (!last_win_nheaders
&& nheaders
&& !do_while_loop_p (loop
))
871 last_win_nheaders
= 1;
872 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
874 " Duplicating header BB to obtain do-while loop\n");
876 /* "Duplicate" all BBs with zero cost following last basic blocks we
878 while (last_win_nheaders
< (int)decision
.length ()
879 && decision
[last_win_nheaders
] == ch_possible_zero_cost
)
881 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
883 " Duplicating extra bb is a win; it has zero cost\n");
887 if (last_win_nheaders
)
888 candidates
.safe_push ({loop
, last_win_nheaders
,
889 invariant_exits
, static_exits
});
892 delete invariant_exits
;
896 /* Do not use ranger after we change the IL and not have updated SSA. */
899 for (auto candidate
: candidates
)
901 class loop
*loop
= candidate
.loop
;
902 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
904 "Copying headers of loop %i\n", loop
->num
);
906 basic_block header
= loop
->header
;
909 unsigned int n_bbs
= 0;
911 profile_count exit_count
= profile_count::zero ();
912 profile_count entry_count
= profile_count::zero ();
916 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
917 if (e
->src
!= loop
->latch
)
918 entry_count
+= e
->count ();
919 while (n_bbs
< candidate
.nheaders
)
921 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
922 fprintf (dump_file
, " Will duplicate bb %i\n", header
->index
);
923 /* Find a successor of header that is inside a loop; i.e. the new
924 header after the condition is copied. */
925 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
))
927 nonexit
= EDGE_SUCC (header
, 0);
928 exit
= EDGE_SUCC (header
, 1);
932 nonexit
= EDGE_SUCC (header
, 1);
933 exit
= EDGE_SUCC (header
, 0);
935 exit_count
+= exit
->count ();
937 bbs
[n_bbs
++] = header
;
938 gcc_assert (bbs_size
> n_bbs
);
939 header
= nonexit
->dest
;
942 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
944 "Duplicating header of the loop %d up to edge %d->%d\n",
945 loop
->num
, exit
->src
->index
, exit
->dest
->index
);
947 /* Ensure that the header will have just the latch as a predecessor
949 if (!single_pred_p (nonexit
->dest
))
951 header
= split_edge (nonexit
);
952 exit
= single_pred_edge (header
);
955 edge entry
= loop_preheader_edge (loop
);
957 propagate_threaded_block_debug_into (exit
->dest
, entry
->dest
);
958 if (!gimple_duplicate_seme_region (entry
, exit
, bbs
, n_bbs
, copied_bbs
,
961 delete candidate
.static_exits
;
962 delete candidate
.invariant_exits
;
963 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
964 fprintf (dump_file
, "Duplication failed.\n");
967 if (loop
->header
->count
.initialized_p ())
968 update_profile_after_ch (loop
, bbs
, copied_bbs
, n_bbs
,
969 candidate
.invariant_exits
,
970 candidate
.static_exits
,
972 delete candidate
.static_exits
;
973 delete candidate
.invariant_exits
;
974 copied
.safe_push (std::make_pair (entry
, loop
));
976 /* If the loop has the form "for (i = j; i < j + 10; i++)" then
977 this copying can introduce a case where we rely on undefined
978 signed overflow to eliminate the preheader condition, because
979 we assume that "j < j + 10" is true. We don't want to warn
980 about that case for -Wstrict-overflow, because in general we
981 don't warn about overflow involving loops. Prevent the
982 warning by setting the no_warning flag in the condition. */
983 if (warn_strict_overflow
> 0)
987 for (i
= 0; i
< n_bbs
; ++i
)
989 gimple_stmt_iterator bsi
;
991 for (bsi
= gsi_start_bb (copied_bbs
[i
]);
995 gimple
*stmt
= gsi_stmt (bsi
);
996 if (gimple_code (stmt
) == GIMPLE_COND
)
998 tree lhs
= gimple_cond_lhs (stmt
);
999 if (gimple_cond_code (stmt
) != EQ_EXPR
1000 && gimple_cond_code (stmt
) != NE_EXPR
1001 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
1002 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
1003 suppress_warning (stmt
, OPT_Wstrict_overflow_
);
1005 else if (is_gimple_assign (stmt
))
1007 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
1008 tree rhs1
= gimple_assign_rhs1 (stmt
);
1009 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
1010 && rhs_code
!= EQ_EXPR
1011 && rhs_code
!= NE_EXPR
1012 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
1013 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1
)))
1014 suppress_warning (stmt
, OPT_Wstrict_overflow_
);
1020 /* Update header of the loop. */
1021 loop
->header
= header
;
1022 /* Find correct latch. We only duplicate chain of conditionals so
1023 there should be precisely two edges to the new header. One entry
1024 edge and one to latch. */
1025 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
1026 if (header
!= e
->src
)
1028 loop
->latch
= e
->src
;
1031 /* Ensure that the latch is simple. */
1032 if (!single_succ_p (loop_latch_edge (loop
)->src
))
1033 split_edge (loop_latch_edge (loop
));
1035 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1037 if (do_while_loop_p (loop
))
1038 fprintf (dump_file
, "Loop %d is now do-while loop.\n", loop
->num
);
1040 fprintf (dump_file
, "Loop %d is still not do-while loop.\n",
1042 fprintf (dump_file
, "Exit count: ");
1043 exit_count
.dump (dump_file
);
1044 fprintf (dump_file
, "\nEntry count: ");
1045 entry_count
.dump (dump_file
);
1046 fprintf (dump_file
, "\n");
1049 /* We possibly decreased number of itrations by 1. */
1050 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
1051 bool precise
= (nexits
== (int) exits
.length ());
1052 /* Check that loop may not terminate in other way than via
1053 basic blocks we duplicated. */
1056 basic_block
*bbs
= get_loop_body (loop
);
1057 for (unsigned i
= 0; i
< loop
->num_nodes
&& precise
; ++i
)
1059 basic_block bb
= bbs
[i
];
1060 bool found_exit
= false;
1061 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1062 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
1067 /* If BB has exit, it was duplicated. */
1070 /* Give up on irreducible loops. */
1071 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
1076 /* Check that inner loops are finite. */
1077 for (class loop
*l
= bb
->loop_father
; l
!= loop
&& precise
;
1084 /* Verify that there is no statement that may be terminate
1085 execution in a way not visible to CFG. */
1086 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
);
1087 !gsi_end_p (bsi
); gsi_next (&bsi
))
1088 if (stmt_can_terminate_bb_p (gsi_stmt (bsi
)))
1094 && get_max_loop_iterations_int (loop
) == 1)
1096 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1097 fprintf (dump_file
, "Loop %d no longer loops.\n", loop
->num
);
1098 scale_loop_profile (loop
, profile_probability::always (), 0);
1099 loops_to_unloop
.safe_push (loop
);
1100 loops_to_unloop_nunroll
.safe_push (0);
1104 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1107 " decreased number of iterations of loop %d by 1.\n",
1109 adjust_loop_info_after_peeling (loop
, 1, true);
1111 else if (exit_count
>= entry_count
.apply_scale (9, 10))
1113 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1115 "Peeled likely exits: likely decreased number "
1116 "of iterations of loop %d by 1.\n", loop
->num
);
1117 adjust_loop_info_after_peeling (loop
, 1, false);
1119 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1121 "Not decreased number"
1122 " of iterations of loop %d; likely exits remains.\n",
1130 update_ssa (TODO_update_ssa
);
1131 /* After updating SSA form perform CSE on the loop header
1132 copies. This is esp. required for the pass before
1133 vectorization since nothing cleans up copied exit tests
1134 that can now be simplified. CSE from the entry of the
1135 region we copied till all loop exit blocks but not
1136 entering the loop itself. */
1137 for (unsigned i
= 0; i
< copied
.length (); ++i
)
1139 edge entry
= copied
[i
].first
;
1140 loop_p loop
= copied
[i
].second
;
1141 auto_vec
<edge
> exit_edges
= get_loop_exit_edges (loop
);
1142 bitmap exit_bbs
= BITMAP_ALLOC (NULL
);
1143 for (unsigned j
= 0; j
< exit_edges
.length (); ++j
)
1144 bitmap_set_bit (exit_bbs
, exit_edges
[j
]->dest
->index
);
1145 bitmap_set_bit (exit_bbs
, loop
->header
->index
);
1146 do_rpo_vn (cfun
, entry
, exit_bbs
);
1147 BITMAP_FREE (exit_bbs
);
1150 if (!loops_to_unloop
.is_empty ())
1152 bool irred_invalidated
;
1153 auto_bitmap lc_invalidated
;
1154 auto_vec
<edge
> edges_to_remove
;
1155 unloop_loops (loops_to_unloop
, loops_to_unloop_nunroll
, edges_to_remove
,
1156 lc_invalidated
, &irred_invalidated
);
1157 if (loops_state_satisfies_p (fun
, LOOP_CLOSED_SSA
)
1158 && !bitmap_empty_p (lc_invalidated
))
1159 rewrite_into_loop_closed_ssa (NULL
, 0);
1165 return changed
? TODO_cleanup_cfg
: 0;
1168 /* Initialize the loop structures we need, and finalize after. */
1171 pass_ch::execute (function
*fun
)
1173 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1174 | LOOPS_HAVE_SIMPLE_LATCHES
1175 | LOOPS_HAVE_RECORDED_EXITS
);
1177 unsigned int res
= copy_headers (fun
);
1179 loop_optimizer_finalize ();
1183 /* Assume an earlier phase has already initialized all the loop structures that
1184 we need here (and perhaps others too), and that these will be finalized by
1188 pass_ch_vect::execute (function
*fun
)
1190 return copy_headers (fun
);
1193 /* Apply header copying according to a very simple test of do-while shape. */
1196 pass_ch::process_loop_p (class loop
*)
1201 /* Apply header-copying to loops where we might enable vectorization. */
1204 pass_ch_vect::process_loop_p (class loop
*loop
)
1206 if (!flag_tree_loop_vectorize
&& !loop
->force_vectorize
)
1209 if (loop
->dont_vectorize
)
1212 /* The vectorizer won't handle anything with multiple exits, so skip. */
1213 edge exit
= single_exit (loop
);
1217 if (!do_while_loop_p (loop
))
1226 make_pass_ch_vect (gcc::context
*ctxt
)
1228 return new pass_ch_vect (ctxt
);
1232 make_pass_ch (gcc::context
*ctxt
)
1234 return new pass_ch (ctxt
);