1 /* Loop header copying on trees.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
27 #include "tree-pass.h"
28 #include "gimple-ssa.h"
29 #include "gimple-iterator.h"
31 #include "tree-into-ssa.h"
33 #include "tree-inline.h"
34 #include "tree-ssa-threadedge.h"
35 #include "tree-ssa-sccvn.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "value-range.h"
39 #include "gimple-range.h"
40 #include "gimple-range-path.h"
41 #include "gimple-pretty-print.h"
44 /* Return path query insteance for testing ranges of statements
45 in headers of LOOP contained in basic block BB.
46 Use RANGER instance. */
48 static path_range_query
*
49 get_range_query (class loop
*loop
,
51 gimple_ranger
&ranger
)
53 auto_vec
<basic_block
, 8> path
;
54 for (; bb
!= loop
->header
; bb
= single_pred_edge (bb
)->src
)
56 path
.safe_push (loop
->header
);
57 path
.safe_push (loop_preheader_edge (loop
)->src
);
58 return new path_range_query (ranger
, path
);
61 /* Return edge that is true in the first iteration of the loop
63 Formulate corrent ranger query to RANGER. */
66 static_loop_exit (class loop
*l
, basic_block bb
, gimple_ranger
&ranger
,
67 path_range_query
*&query
)
69 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (bb
));
76 extract_true_false_edges_from_block (bb
, &true_e
, &false_e
);
78 /* If neither edge is the exit edge, this is not a case we'd like to
80 if (!loop_exit_edge_p (l
, true_e
) && !loop_exit_edge_p (l
, false_e
))
83 int_range
<1> desired_static_range
;
84 if (loop_exit_edge_p (l
, true_e
))
86 desired_static_range
= range_false ();
91 desired_static_range
= range_true ();
96 query
= get_range_query (l
, gimple_bb (last
), ranger
);
99 if (!query
->range_of_stmt (r
, last
))
101 return r
== desired_static_range
? ret_e
: NULL
;
104 /* Return true if STMT is static in LOOP. This means that its value
105 is constant in the first iteration.
106 Use RANGER and formulate query cached in QUERY. */
109 loop_static_stmt_p (class loop
*loop
,
110 gimple_ranger
&ranger
,
111 path_range_query
*&query
,
114 tree type
= gimple_range_type (stmt
);
115 if (!type
|| !Value_Range::supports_type_p (type
))
119 query
= get_range_query (loop
, gimple_bb (stmt
), ranger
);
121 Value_Range
r (gimple_range_type (stmt
));
122 if (!query
->range_of_stmt (r
, stmt
))
124 return r
.singleton_p ();
127 /* Return true if OP is invariant. */
130 loop_invariant_op_p (class loop
*loop
,
133 if (is_gimple_min_invariant (op
))
135 if (SSA_NAME_IS_DEFAULT_DEF (op
)
136 || !flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (op
))))
138 return gimple_uid (SSA_NAME_DEF_STMT (op
)) & 1;
141 /* Return true if OP combines outcome of static and
142 loop invariant conditional. */
145 loop_static_op_p (class loop
*loop
, tree op
)
147 /* Always check for invariant first. */
148 gcc_checking_assert (!is_gimple_min_invariant (op
)
149 && !SSA_NAME_IS_DEFAULT_DEF (op
)
150 && flow_bb_inside_loop_p (loop
,
151 gimple_bb (SSA_NAME_DEF_STMT (op
))));
152 return gimple_uid (SSA_NAME_DEF_STMT (op
)) & 2;
156 /* Return true if OP combines outcome of static and
157 loop invariant conditional. */
160 loop_combined_static_and_iv_p (class loop
*loop
,
163 /* Always check for invariant first. */
164 gcc_checking_assert (!is_gimple_min_invariant (op
)
165 && !SSA_NAME_IS_DEFAULT_DEF (op
)
166 && flow_bb_inside_loop_p (loop
,
167 gimple_bb (SSA_NAME_DEF_STMT (op
))));
168 return gimple_uid (SSA_NAME_DEF_STMT (op
)) & 4;
171 /* Decision about posibility of copying a given header. */
175 /* We do not want to copy this header. */
177 /* We can copy it if it enables wins. */
179 /* We can "copy" it if it enables wins and doing
180 so will introduce no new code. */
181 ch_possible_zero_cost
,
182 /* We want to copy. */
184 /* We want to copy and we will eliminate loop exit. */
185 ch_win_invariant_exit
188 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
189 instructions should be duplicated, limit is decreased by the actual
193 should_duplicate_loop_header_p (basic_block header
, class loop
*loop
,
194 gimple_ranger
*ranger
,
196 hash_set
<edge
> *invariant_exits
,
197 hash_set
<edge
> *static_exits
)
199 gimple_stmt_iterator bsi
;
201 gcc_assert (!header
->aux
);
203 gcc_assert (EDGE_COUNT (header
->succs
) > 0);
204 if (single_succ_p (header
))
206 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
208 " Not duplicating bb %i: it is single succ.\n",
210 return ch_impossible
;
213 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
)
214 && flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 1)->dest
))
216 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
218 " Not duplicating bb %i: both successors are in loop.\n",
220 return ch_impossible
;
223 /* If this is not the original loop header, we want it to have just
224 one predecessor in order to match the && pattern. */
225 if (header
!= loop
->header
&& !single_pred_p (header
))
227 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
229 " Not duplicating bb %i: it has mutiple predecestors.\n",
231 return ch_impossible
;
234 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (header
));
237 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
239 " Not duplicating bb %i: it does not end by conditional.\n",
241 return ch_impossible
;
244 path_range_query
*query
= NULL
;
245 for (gphi_iterator psi
= gsi_start_phis (header
); !gsi_end_p (psi
);
247 if (!virtual_operand_p (gimple_phi_result (psi
.phi ())))
249 bool static_p
= loop_static_stmt_p (loop
, *ranger
,
251 gimple_set_uid (psi
.phi (), static_p
? 2 : 0);
253 bool code_size_cost
= false;
255 /* Count number of instructions and punt on calls.
256 Populate stmts INV flag to later apply heuristics to the
257 kind of conditions we want to copy. */
258 for (bsi
= gsi_start_bb (header
); !gsi_end_p (bsi
); gsi_next (&bsi
))
260 gimple
*last
= gsi_stmt (bsi
);
262 if (gimple_code (last
) == GIMPLE_LABEL
)
265 if (is_gimple_debug (last
))
268 if (gimple_code (last
) == GIMPLE_COND
)
271 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
273 fprintf (dump_file
, " Analyzing: ");
274 print_gimple_stmt (dump_file
, last
, 0, TDF_SLIM
);
277 if (gimple_code (last
) == GIMPLE_CALL
278 && (!gimple_inexpensive_call_p (as_a
<gcall
*> (last
))
279 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed
280 at current loop's header. Don't copy in this case. */
281 || gimple_call_internal_p (last
, IFN_LOOP_DIST_ALIAS
)))
283 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
285 " Not duplicating bb %i: it contains call.\n",
289 return ch_impossible
;
292 /* Classify the stmt is invariant in the loop. */
293 gimple_set_uid (last
, 0);
294 if (!gimple_vuse (last
)
295 && gimple_code (last
) != GIMPLE_ASM
296 && (gimple_code (last
) != GIMPLE_CALL
297 || gimple_call_flags (last
) & ECF_CONST
))
299 bool inv
= true, static_p
= false;
302 FOR_EACH_SSA_TREE_OPERAND (op
, last
, i
, SSA_OP_USE
)
303 if (!loop_invariant_op_p (loop
, op
))
305 /* Assume that code is reasonably optimized and invariant
306 constants are already identified. */
308 static_p
= loop_static_stmt_p (loop
, *ranger
, query
, last
);
309 gimple_set_uid (last
, (inv
? 1 : 0) | (static_p
? 2 : 0));
310 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
313 fprintf (dump_file
, " Stmt is loop invariant\n");
315 fprintf (dump_file
, " Stmt is static "
316 "(constant in the first iteration)\n");
318 /* Loop invariants will be optimized out in loop body after
319 duplication; do not account invariant computation in code
322 Similarly static computations will be optimized out in the
327 /* Match the following:
328 _1 = i_1 < 10 <- static condtion
329 _2 = n != 0 <- loop invariant condition
330 _3 = _1 & _2 <- combined static and iv statement. */
332 if (gimple_code (last
) == GIMPLE_ASSIGN
333 && ((code
= gimple_assign_rhs_code (last
)) == BIT_AND_EXPR
334 || code
== BIT_IOR_EXPR
|| code
== BIT_XOR_EXPR
))
336 tree op1
= gimple_assign_rhs1 (last
);
337 tree op2
= gimple_assign_rhs2 (last
);
339 if ((loop_invariant_op_p (loop
, op1
)
340 || loop_combined_static_and_iv_p (loop
, op1
)
341 || loop_static_op_p (loop
, op1
))
342 && (loop_invariant_op_p (loop
, op2
)
343 || loop_combined_static_and_iv_p (loop
, op2
)
344 || loop_static_op_p (loop
, op2
)))
346 /* Duplicating loop header with combned conditional will
347 remove this statement in each copy. But we account for
348 that later when seeing that condition.
350 Note that this may be overly optimistic for bit operations
351 where the static parameter may still result in non-trivial
353 gimple_set_uid (last
, 4);
354 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
356 " Stmt combines static and invariant op\n");
362 int insns
= estimate_num_insns (last
, &eni_size_weights
);
365 code_size_cost
= true;
366 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
368 " Acconting stmt as %i insns\n", insns
);
371 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
373 " Not duplicating bb %i contains too many insns.\n",
377 return ch_impossible
;
381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
383 fprintf (dump_file
, " Analyzing: ");
384 print_gimple_stmt (dump_file
, last
, 0, TDF_SLIM
);
387 /* If the condition tests a non-IV loop variant we do not want to rotate
388 the loop further. Unless this is the original loop header. */
389 tree lhs
= gimple_cond_lhs (last
);
390 tree rhs
= gimple_cond_rhs (last
);
391 bool lhs_invariant
= loop_invariant_op_p (loop
, lhs
);
392 bool rhs_invariant
= loop_invariant_op_p (loop
, rhs
);
394 /* Combined conditional is a result of if combining:
396 _1 = i_1 < 10 <- static condtion
397 _2 = n != 0 <- loop invariant condition
398 _3 = _1 & _2 <- combined static and iv statement
399 if (_3 != 0) <- combined conditional
401 Combined conditionals will not be optimized out in either copy.
402 However duplicaed header simplifies to:
410 So effectively the resulting code sequence will be of same length as
413 Combined conditionals are never static or invariant, so save some work
415 if (lhs_invariant
!= rhs_invariant
417 || loop_combined_static_and_iv_p (loop
, lhs
))
419 || loop_combined_static_and_iv_p (loop
, rhs
)))
423 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
425 " Conditional combines static and invariant op.\n");
429 edge static_exit
= static_loop_exit (loop
, header
, *ranger
, query
);
433 if (static_exit
&& static_exits
)
435 static_exits
->add (static_exit
);
436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
438 " Will eliminate peeled conditional in bb %d.\n",
439 static_exit
->src
->index
);
440 /* Still look for invariant exits; exit may be both. */
442 if (lhs_invariant
&& rhs_invariant
)
448 FOR_EACH_EDGE (e
, ei
, header
->succs
)
449 if (loop_exit_edge_p (loop
, e
))
451 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
453 " Will elliminate invariant exit %i->%i\n",
454 e
->src
->index
, e
->dest
->index
);
455 invariant_exits
->add (e
);
458 return ch_win_invariant_exit
;
461 /* If the static exit fully optimize out, it is win to "duplicate"
464 TODO: Even if duplication costs some size we may opt to do so in case
465 exit probability is significant enough (do partial peeling). */
467 return !code_size_cost
? ch_possible_zero_cost
: ch_possible
;
469 /* We was not able to prove that conditional will be eliminated. */
470 int insns
= estimate_num_insns (last
, &eni_size_weights
);
472 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
474 " Acconting stmt as %i insns\n", insns
);
477 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
479 " Not duplicating bb %i contains too many insns.\n",
481 return ch_impossible
;
487 /* Checks whether LOOP is a do-while style loop. */
490 do_while_loop_p (class loop
*loop
)
492 gimple
*stmt
= last_nondebug_stmt (loop
->latch
);
494 /* If the latch of the loop is not empty, it is not a do-while loop. */
496 && gimple_code (stmt
) != GIMPLE_LABEL
)
498 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
500 "Loop %i is not do-while loop: latch is not empty.\n",
505 /* If the latch does not have a single predecessor, it is not a
507 if (!single_pred_p (loop
->latch
))
509 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
511 "Loop %i is not do-while loop: latch has multiple "
512 "predecessors.\n", loop
->num
);
515 basic_block pred
= single_pred (loop
->latch
);
517 /* If the latch predecessor doesn't exit the loop, it is not a
519 if (!loop_exits_from_bb_p (loop
, pred
))
521 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
523 "Loop %i is not do-while loop: latch predecessor "
524 "does not exit loop.\n", loop
->num
);
527 gcond
*last
= safe_dyn_cast
<gcond
*> (*gsi_last_bb (pred
));
529 && (gimple_cond_lhs (last
) == boolean_false_node
530 || gimple_cond_lhs (last
) == boolean_true_node
)
531 && gimple_cond_rhs (last
) == boolean_false_node
)
533 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
535 "Loop %i is not do-while loop: latch predecessor "
536 "contains exit we optimized out.\n", loop
->num
);
540 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
541 fprintf (dump_file
, "Loop %i is do-while loop\n", loop
->num
);
546 /* Update profile after header copying of LOOP.
547 REGION is the original (in loop) sequence, REGION_COPY is the
548 duplicated header (now outside of loop). N_REGION is number of
550 ELIMINATED_EDGE is edge to be removed from duplicated sequence.
551 INVARIANT_EXITS are edges in the loop body to be elimianted
552 since they are loop invariants
554 So We expect the following:
556 // region_copy_start entry will be scaled to entry_count
557 if (cond1) <- this condition will become false
558 and we update probabilities
560 if (cond2) <- this condition is loop invariant
562 goto loop_header <- this will be redirected to loop.
568 if (cond1) <- we need to update probabbility here
570 if (cond2) <- and determine scaling factor here.
571 moreover cond2 is now always true
577 Adding support for more exits can be done similarly,
578 but only consumer so far is tree-ssa-loop-ch and it uses only this
579 to handle the common case of peeling headers which have
580 conditionals known to be always true upon entry. */
583 update_profile_after_ch (class loop
*loop
,
584 basic_block
*region
, basic_block
*region_copy
,
586 hash_set
<edge
> *invariant_exits
,
587 hash_set
<edge
> *static_exits
,
588 profile_count entry_count
)
590 for (unsigned int i
= 0; i
< n_region
; i
++)
592 edge exit_e
, exit_e_copy
, e
, e_copy
;
593 if (EDGE_COUNT (region
[i
]->succs
) == 1)
595 region_copy
[i
]->count
= entry_count
;
596 region
[i
]->count
-= entry_count
;
600 gcc_checking_assert (EDGE_COUNT (region
[i
]->succs
) == 2);
601 if (loop_exit_edge_p (loop
,
602 EDGE_SUCC (region
[i
], 0)))
604 exit_e
= EDGE_SUCC (region
[i
], 0);
605 exit_e_copy
= EDGE_SUCC (region_copy
[i
], 0);
606 e
= EDGE_SUCC (region
[i
], 1);
607 e_copy
= EDGE_SUCC (region_copy
[i
], 1);
611 exit_e
= EDGE_SUCC (region
[i
], 1);
612 exit_e_copy
= EDGE_SUCC (region_copy
[i
], 1);
613 e
= EDGE_SUCC (region
[i
], 0);
614 e_copy
= EDGE_SUCC (region_copy
[i
], 0);
616 gcc_assert (i
== n_region
- 1
617 || (e
->dest
== region
[i
+ 1]
618 && e_copy
->dest
== region_copy
[i
+ 1]));
619 region_copy
[i
]->count
= entry_count
;
620 profile_count exit_e_count
= exit_e
->count ();
621 bool was_static
= false;
622 if (static_exits
->contains (exit_e
))
624 /* Update profile and the conditional.
625 CFG update is done by caller. */
626 static_exits
->remove (exit_e
);
628 e_copy
->probability
= profile_probability::always ();
629 exit_e_copy
->probability
= profile_probability::never ();
631 = as_a
<gcond
*>(*gsi_last_bb (region_copy
[i
]));
632 if (e_copy
->flags
& EDGE_TRUE_VALUE
)
633 gimple_cond_make_true (cond_stmt
);
635 gimple_cond_make_false (cond_stmt
);
636 update_stmt (cond_stmt
);
637 /* Header copying is a special case of jump threading, so use
638 common code to update loop body exit condition. */
639 update_bb_profile_for_threading (region
[i
], entry_count
, e
);
642 region
[i
]->count
-= region_copy
[i
]->count
;
643 if (invariant_exits
->contains (exit_e
))
645 invariant_exits
->remove (exit_e
);
646 /* All exits will happen in exit_e_copy which is out of the
647 loop, so increase probability accordingly.
648 If the edge is eliminated_edge we already corrected
650 if (entry_count
.nonzero_p () && !was_static
)
651 set_edge_probability_and_rescale_others
652 (exit_e_copy
, exit_e_count
.probability_in
654 /* Eliminate in-loop conditional. */
655 e
->probability
= profile_probability::always ();
656 exit_e
->probability
= profile_probability::never ();
657 gcond
*cond_stmt
= as_a
<gcond
*>(*gsi_last_bb (region
[i
]));
658 if (e
->flags
& EDGE_TRUE_VALUE
)
659 gimple_cond_make_true (cond_stmt
);
661 gimple_cond_make_false (cond_stmt
);
662 update_stmt (cond_stmt
);
664 entry_count
= e_copy
->count ();
666 /* Be sure that we seen all invariant exit edges we are supposed to update.
667 We may have recorded some static exists we decided to not duplicate. */
668 gcc_checking_assert (invariant_exits
->is_empty ());
673 /* Common superclass for both header-copying phases. */
674 class ch_base
: public gimple_opt_pass
677 ch_base (pass_data data
, gcc::context
*ctxt
)
678 : gimple_opt_pass (data
, ctxt
)
681 /* Copies headers of all loops in FUN for which process_loop_p is true. */
682 unsigned int copy_headers (function
*fun
);
684 /* Return true to copy headers of LOOP or false to skip. */
685 virtual bool process_loop_p (class loop
*loop
) = 0;
688 const pass_data pass_data_ch
=
690 GIMPLE_PASS
, /* type */
692 OPTGROUP_LOOP
, /* optinfo_flags */
693 TV_TREE_CH
, /* tv_id */
694 ( PROP_cfg
| PROP_ssa
), /* properties_required */
695 0, /* properties_provided */
696 0, /* properties_destroyed */
697 0, /* todo_flags_start */
698 0, /* todo_flags_finish */
701 class pass_ch
: public ch_base
704 pass_ch (gcc::context
*ctxt
)
705 : ch_base (pass_data_ch
, ctxt
)
708 /* opt_pass methods: */
709 bool gate (function
*) final override
{ return flag_tree_ch
!= 0; }
711 /* Initialize and finalize loop structures, copying headers inbetween. */
712 unsigned int execute (function
*) final override
;
714 opt_pass
* clone () final override
{ return new pass_ch (m_ctxt
); }
717 /* ch_base method: */
718 bool process_loop_p (class loop
*loop
) final override
;
721 const pass_data pass_data_ch_vect
=
723 GIMPLE_PASS
, /* type */
724 "ch_vect", /* name */
725 OPTGROUP_LOOP
, /* optinfo_flags */
726 TV_TREE_CH
, /* tv_id */
727 ( PROP_cfg
| PROP_ssa
), /* properties_required */
728 0, /* properties_provided */
729 0, /* properties_destroyed */
730 0, /* todo_flags_start */
731 0, /* todo_flags_finish */
734 /* This is a more aggressive version of the same pass, designed to run just
735 before if-conversion and vectorization, to put more loops into the form
736 required for those phases. */
737 class pass_ch_vect
: public ch_base
740 pass_ch_vect (gcc::context
*ctxt
)
741 : ch_base (pass_data_ch_vect
, ctxt
)
744 /* opt_pass methods: */
745 bool gate (function
*fun
) final override
747 return flag_tree_ch
!= 0
748 && (flag_tree_loop_vectorize
!= 0 || fun
->has_force_vectorize_loops
);
751 /* Just copy headers, no initialization/finalization of loop structures. */
752 unsigned int execute (function
*) final override
;
755 /* ch_base method: */
756 bool process_loop_p (class loop
*loop
) final override
;
757 }; // class pass_ch_vect
759 /* For all loops, copy the condition at the end of the loop body in front
760 of the loop. This is beneficial since it increases efficiency of
761 code motion optimizations. It also saves one jump on entry to the loop. */
764 ch_base::copy_headers (function
*fun
)
766 basic_block
*bbs
, *copied_bbs
;
768 bool changed
= false;
770 if (number_of_loops (fun
) <= 1)
773 bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (fun
));
774 copied_bbs
= XNEWVEC (basic_block
, n_basic_blocks_for_fn (fun
));
775 bbs_size
= n_basic_blocks_for_fn (fun
);
780 unsigned int nheaders
;
781 hash_set
<edge
> *invariant_exits
, *static_exits
;
783 auto_vec
<struct candidate
> candidates
;
784 auto_vec
<std::pair
<edge
, loop_p
> > copied
;
785 auto_vec
<class loop
*> loops_to_unloop
;
786 auto_vec
<int> loops_to_unloop_nunroll
;
788 mark_dfs_back_edges ();
789 gimple_ranger
*ranger
= new gimple_ranger
;
790 for (auto loop
: loops_list (cfun
, 0))
792 int initial_limit
= optimize_loop_for_speed_p (loop
)
793 ? param_max_loop_header_insns
: 0;
794 int remaining_limit
= initial_limit
;
795 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
797 "Analyzing loop %i\n", loop
->num
);
799 basic_block header
= loop
->header
;
800 if (!get_max_loop_iterations_int (loop
))
802 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
803 fprintf (dump_file
, "Loop %d never loops.\n", loop
->num
);
804 scale_loop_profile (loop
, profile_probability::always (), 0);
805 loops_to_unloop
.safe_push (loop
);
806 loops_to_unloop_nunroll
.safe_push (0);
810 /* If the loop is already a do-while style one (either because it was
811 written as such, or because jump threading transformed it into one),
812 we might be in fact peeling the first iteration of the loop. This
813 in general is not a good idea. Also avoid touching infinite loops. */
814 if (!loop_has_exit_edges (loop
)
815 || !process_loop_p (loop
))
818 /* Iterate the header copying up to limit; this takes care of the cases
819 like while (a && b) {...}, where we want to have both of the conditions
820 copied. TODO -- handle while (a || b) - like cases, by not requiring
821 the header to have just a single successor and copying up to
824 int last_win_nheaders
= 0;
825 bool last_win_invariant_exit
= false;
827 auto_vec
<ch_decision
, 32> decision
;
828 hash_set
<edge
> *invariant_exits
= new hash_set
<edge
>;
829 hash_set
<edge
> *static_exits
= new hash_set
<edge
>;
830 while ((ret
= should_duplicate_loop_header_p (header
, loop
, ranger
,
837 decision
.safe_push (ret
);
840 last_win_nheaders
= nheaders
;
841 last_win_invariant_exit
= (ret
== ch_win_invariant_exit
);
842 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
843 fprintf (dump_file
, " Duplicating bb %i is a win\n",
847 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
848 fprintf (dump_file
, " May duplicate bb %i\n", header
->index
);
850 /* Find a successor of header that is inside a loop; i.e. the new
851 header after the condition is copied. */
852 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
))
853 header
= EDGE_SUCC (header
, 0)->dest
;
855 header
= EDGE_SUCC (header
, 1)->dest
;
858 /* Try to turn loop into do-while. This means ensuring that
859 last duplicated header will not have loop invariant exit. */
860 if (last_win_nheaders
&& last_win_invariant_exit
861 && nheaders
> last_win_nheaders
)
864 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
866 " Duplicating additional BB to obtain do-while loop\n");
868 else if (!last_win_nheaders
&& nheaders
&& !do_while_loop_p (loop
))
870 last_win_nheaders
= 1;
871 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
873 " Duplicating header BB to obtain do-while loop\n");
875 /* "Duplicate" all BBs with zero cost following last basic blocks we
877 while (last_win_nheaders
< (int)decision
.length ()
878 && decision
[last_win_nheaders
] == ch_possible_zero_cost
)
880 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
882 " Duplicating extra bb is a win; it has zero cost\n");
886 if (last_win_nheaders
)
887 candidates
.safe_push ({loop
, last_win_nheaders
,
888 invariant_exits
, static_exits
});
891 delete invariant_exits
;
895 /* Do not use ranger after we change the IL and not have updated SSA. */
898 for (auto candidate
: candidates
)
900 class loop
*loop
= candidate
.loop
;
901 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
903 "Copying headers of loop %i\n", loop
->num
);
905 basic_block header
= loop
->header
;
908 unsigned int n_bbs
= 0;
910 profile_count exit_count
= profile_count::zero ();
911 profile_count entry_count
= profile_count::zero ();
915 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
916 if (e
->src
!= loop
->latch
)
917 entry_count
+= e
->count ();
918 while (n_bbs
< candidate
.nheaders
)
920 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
921 fprintf (dump_file
, " Will duplicate bb %i\n", header
->index
);
922 /* Find a successor of header that is inside a loop; i.e. the new
923 header after the condition is copied. */
924 if (flow_bb_inside_loop_p (loop
, EDGE_SUCC (header
, 0)->dest
))
926 nonexit
= EDGE_SUCC (header
, 0);
927 exit
= EDGE_SUCC (header
, 1);
931 nonexit
= EDGE_SUCC (header
, 1);
932 exit
= EDGE_SUCC (header
, 0);
934 exit_count
+= exit
->count ();
936 bbs
[n_bbs
++] = header
;
937 gcc_assert (bbs_size
> n_bbs
);
938 header
= nonexit
->dest
;
941 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
943 "Duplicating header of the loop %d up to edge %d->%d\n",
944 loop
->num
, exit
->src
->index
, exit
->dest
->index
);
946 /* Ensure that the header will have just the latch as a predecessor
948 if (!single_pred_p (nonexit
->dest
))
950 header
= split_edge (nonexit
);
951 exit
= single_pred_edge (header
);
954 edge entry
= loop_preheader_edge (loop
);
956 propagate_threaded_block_debug_into (exit
->dest
, entry
->dest
);
957 if (!gimple_duplicate_seme_region (entry
, exit
, bbs
, n_bbs
, copied_bbs
,
960 delete candidate
.static_exits
;
961 delete candidate
.invariant_exits
;
962 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
963 fprintf (dump_file
, "Duplication failed.\n");
966 if (loop
->header
->count
.initialized_p ())
967 update_profile_after_ch (loop
, bbs
, copied_bbs
, n_bbs
,
968 candidate
.invariant_exits
,
969 candidate
.static_exits
,
971 delete candidate
.static_exits
;
972 delete candidate
.invariant_exits
;
973 copied
.safe_push (std::make_pair (entry
, loop
));
975 /* If the loop has the form "for (i = j; i < j + 10; i++)" then
976 this copying can introduce a case where we rely on undefined
977 signed overflow to eliminate the preheader condition, because
978 we assume that "j < j + 10" is true. We don't want to warn
979 about that case for -Wstrict-overflow, because in general we
980 don't warn about overflow involving loops. Prevent the
981 warning by setting the no_warning flag in the condition. */
982 if (warn_strict_overflow
> 0)
986 for (i
= 0; i
< n_bbs
; ++i
)
988 gimple_stmt_iterator bsi
;
990 for (bsi
= gsi_start_bb (copied_bbs
[i
]);
994 gimple
*stmt
= gsi_stmt (bsi
);
995 if (gimple_code (stmt
) == GIMPLE_COND
)
997 tree lhs
= gimple_cond_lhs (stmt
);
998 if (gimple_cond_code (stmt
) != EQ_EXPR
999 && gimple_cond_code (stmt
) != NE_EXPR
1000 && INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
1001 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs
)))
1002 suppress_warning (stmt
, OPT_Wstrict_overflow_
);
1004 else if (is_gimple_assign (stmt
))
1006 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
1007 tree rhs1
= gimple_assign_rhs1 (stmt
);
1008 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
1009 && rhs_code
!= EQ_EXPR
1010 && rhs_code
!= NE_EXPR
1011 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
1012 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1
)))
1013 suppress_warning (stmt
, OPT_Wstrict_overflow_
);
1019 /* Update header of the loop. */
1020 loop
->header
= header
;
1021 /* Find correct latch. We only duplicate chain of conditionals so
1022 there should be precisely two edges to the new header. One entry
1023 edge and one to latch. */
1024 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
1025 if (header
!= e
->src
)
1027 loop
->latch
= e
->src
;
1030 /* Ensure that the latch is simple. */
1031 if (!single_succ_p (loop_latch_edge (loop
)->src
))
1032 split_edge (loop_latch_edge (loop
));
1034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1036 if (do_while_loop_p (loop
))
1037 fprintf (dump_file
, "Loop %d is now do-while loop.\n", loop
->num
);
1039 fprintf (dump_file
, "Loop %d is still not do-while loop.\n",
1041 fprintf (dump_file
, "Exit count: ");
1042 exit_count
.dump (dump_file
);
1043 fprintf (dump_file
, "\nEntry count: ");
1044 entry_count
.dump (dump_file
);
1045 fprintf (dump_file
, "\n");
1048 /* We possibly decreased number of itrations by 1. */
1049 auto_vec
<edge
> exits
= get_loop_exit_edges (loop
);
1050 bool precise
= (nexits
== (int) exits
.length ());
1051 /* Check that loop may not terminate in other way than via
1052 basic blocks we duplicated. */
1055 basic_block
*bbs
= get_loop_body (loop
);
1056 for (unsigned i
= 0; i
< loop
->num_nodes
&& precise
; ++i
)
1058 basic_block bb
= bbs
[i
];
1059 bool found_exit
= false;
1060 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1061 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
1066 /* If BB has exit, it was duplicated. */
1069 /* Give up on irreducible loops. */
1070 if (bb
->flags
& BB_IRREDUCIBLE_LOOP
)
1075 /* Check that inner loops are finite. */
1076 for (class loop
*l
= bb
->loop_father
; l
!= loop
&& precise
;
1083 /* Verify that there is no statement that may be terminate
1084 execution in a way not visible to CFG. */
1085 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
);
1086 !gsi_end_p (bsi
); gsi_next (&bsi
))
1087 if (stmt_can_terminate_bb_p (gsi_stmt (bsi
)))
1093 && get_max_loop_iterations_int (loop
) == 1)
1095 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1096 fprintf (dump_file
, "Loop %d no longer loops.\n", loop
->num
);
1097 scale_loop_profile (loop
, profile_probability::always (), 0);
1098 loops_to_unloop
.safe_push (loop
);
1099 loops_to_unloop_nunroll
.safe_push (0);
1103 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1106 " decreased number of iterations of loop %d by 1.\n",
1108 adjust_loop_info_after_peeling (loop
, 1, true);
1110 else if (exit_count
>= entry_count
.apply_scale (9, 10))
1112 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1114 "Peeled likely exits: likely decreased number "
1115 "of iterations of loop %d by 1.\n", loop
->num
);
1116 adjust_loop_info_after_peeling (loop
, 1, false);
1118 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1120 "Not decreased number"
1121 " of iterations of loop %d; likely exits remains.\n",
1129 update_ssa (TODO_update_ssa
);
1130 /* After updating SSA form perform CSE on the loop header
1131 copies. This is esp. required for the pass before
1132 vectorization since nothing cleans up copied exit tests
1133 that can now be simplified. CSE from the entry of the
1134 region we copied till all loop exit blocks but not
1135 entering the loop itself. */
1136 for (unsigned i
= 0; i
< copied
.length (); ++i
)
1138 edge entry
= copied
[i
].first
;
1139 loop_p loop
= copied
[i
].second
;
1140 auto_vec
<edge
> exit_edges
= get_loop_exit_edges (loop
);
1141 bitmap exit_bbs
= BITMAP_ALLOC (NULL
);
1142 for (unsigned j
= 0; j
< exit_edges
.length (); ++j
)
1143 bitmap_set_bit (exit_bbs
, exit_edges
[j
]->dest
->index
);
1144 bitmap_set_bit (exit_bbs
, loop
->header
->index
);
1145 do_rpo_vn (cfun
, entry
, exit_bbs
);
1146 BITMAP_FREE (exit_bbs
);
1149 if (!loops_to_unloop
.is_empty ())
1151 bool irred_invalidated
;
1152 unloop_loops (loops_to_unloop
, loops_to_unloop_nunroll
, NULL
, &irred_invalidated
);
1158 return changed
? TODO_cleanup_cfg
: 0;
1161 /* Initialize the loop structures we need, and finalize after. */
1164 pass_ch::execute (function
*fun
)
1166 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1167 | LOOPS_HAVE_SIMPLE_LATCHES
1168 | LOOPS_HAVE_RECORDED_EXITS
);
1170 unsigned int res
= copy_headers (fun
);
1172 loop_optimizer_finalize ();
1176 /* Assume an earlier phase has already initialized all the loop structures that
1177 we need here (and perhaps others too), and that these will be finalized by
1181 pass_ch_vect::execute (function
*fun
)
1183 return copy_headers (fun
);
1186 /* Apply header copying according to a very simple test of do-while shape. */
1189 pass_ch::process_loop_p (class loop
*)
1194 /* Apply header-copying to loops where we might enable vectorization. */
1197 pass_ch_vect::process_loop_p (class loop
*loop
)
1199 if (!flag_tree_loop_vectorize
&& !loop
->force_vectorize
)
1202 if (loop
->dont_vectorize
)
1205 /* The vectorizer won't handle anything with multiple exits, so skip. */
1206 edge exit
= single_exit (loop
);
1210 if (!do_while_loop_p (loop
))
1219 make_pass_ch_vect (gcc::context
*ctxt
)
1221 return new pass_ch_vect (ctxt
);
1225 make_pass_ch (gcc::context
*ctxt
)
1227 return new pass_ch (ctxt
);