hppa: Fix pr110279-1.c on hppa
[official-gcc.git] / gcc / tree-ssa-loop-ch.cc
blobdd9ee40a022307a01e098f3468bdd55e71026903
1 /* Loop header copying on trees.
2 Copyright (C) 2004-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "backend.h"
24 #include "tree.h"
25 #include "gimple.h"
26 #include "cfghooks.h"
27 #include "tree-pass.h"
28 #include "gimple-ssa.h"
29 #include "gimple-iterator.h"
30 #include "tree-cfg.h"
31 #include "tree-into-ssa.h"
32 #include "cfgloop.h"
33 #include "tree-inline.h"
34 #include "tree-ssa-threadedge.h"
35 #include "tree-ssa-sccvn.h"
36 #include "tree-phinodes.h"
37 #include "ssa-iterators.h"
38 #include "value-range.h"
39 #include "gimple-range.h"
40 #include "gimple-range-path.h"
41 #include "gimple-pretty-print.h"
42 #include "cfganal.h"
43 #include "tree-ssa-loop-manip.h"
45 /* Return path query insteance for testing ranges of statements
46 in headers of LOOP contained in basic block BB.
47 Use RANGER instance. */
49 static path_range_query *
50 get_range_query (class loop *loop,
51 basic_block bb,
52 gimple_ranger &ranger)
54 auto_vec<basic_block, 8> path;
55 for (; bb != loop->header; bb = single_pred_edge (bb)->src)
56 path.safe_push (bb);
57 path.safe_push (loop->header);
58 path.safe_push (loop_preheader_edge (loop)->src);
59 return new path_range_query (ranger, path);
62 /* Return edge that is true in the first iteration of the loop
63 and NULL otherwise.
64 Formulate corrent ranger query to RANGER. */
66 static edge
67 static_loop_exit (class loop *l, basic_block bb, gimple_ranger &ranger,
68 path_range_query *&query)
70 gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (bb));
71 edge ret_e;
73 if (!last)
74 return NULL;
76 edge true_e, false_e;
77 extract_true_false_edges_from_block (bb, &true_e, &false_e);
79 /* If neither edge is the exit edge, this is not a case we'd like to
80 special-case. */
81 if (!loop_exit_edge_p (l, true_e) && !loop_exit_edge_p (l, false_e))
82 return NULL;
84 int_range<1> desired_static_range;
85 if (loop_exit_edge_p (l, true_e))
87 desired_static_range = range_false ();
88 ret_e = true_e;
90 else
92 desired_static_range = range_true ();
93 ret_e = false_e;
96 if (!query)
97 query = get_range_query (l, gimple_bb (last), ranger);
99 int_range<2> r;
100 if (!query->range_of_stmt (r, last))
101 return NULL;
102 return r == desired_static_range ? ret_e : NULL;
105 /* Return true if STMT is static in LOOP. This means that its value
106 is constant in the first iteration.
107 Use RANGER and formulate query cached in QUERY. */
109 static bool
110 loop_static_stmt_p (class loop *loop,
111 gimple_ranger &ranger,
112 path_range_query *&query,
113 gimple *stmt)
115 tree type = gimple_range_type (stmt);
116 if (!type || !Value_Range::supports_type_p (type))
117 return false;
119 if (!query)
120 query = get_range_query (loop, gimple_bb (stmt), ranger);
122 Value_Range r (gimple_range_type (stmt));
123 if (!query->range_of_stmt (r, stmt))
124 return false;
125 return r.singleton_p ();
128 /* Return true if OP is invariant. */
130 static bool
131 loop_invariant_op_p (class loop *loop,
132 tree op)
134 if (is_gimple_min_invariant (op))
135 return true;
136 if (SSA_NAME_IS_DEFAULT_DEF (op)
137 || !flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (op))))
138 return true;
139 return gimple_uid (SSA_NAME_DEF_STMT (op)) & 1;
142 /* Return true if OP combines outcome of static and
143 loop invariant conditional. */
145 static bool
146 loop_static_op_p (class loop *loop, tree op)
148 /* Always check for invariant first. */
149 gcc_checking_assert (!is_gimple_min_invariant (op)
150 && !SSA_NAME_IS_DEFAULT_DEF (op)
151 && flow_bb_inside_loop_p (loop,
152 gimple_bb (SSA_NAME_DEF_STMT (op))));
153 return gimple_uid (SSA_NAME_DEF_STMT (op)) & 2;
157 /* Return true if OP combines outcome of static and
158 loop invariant conditional. */
160 static bool
161 loop_combined_static_and_iv_p (class loop *loop,
162 tree op)
164 /* Always check for invariant first. */
165 gcc_checking_assert (!is_gimple_min_invariant (op)
166 && !SSA_NAME_IS_DEFAULT_DEF (op)
167 && flow_bb_inside_loop_p (loop,
168 gimple_bb (SSA_NAME_DEF_STMT (op))));
169 return gimple_uid (SSA_NAME_DEF_STMT (op)) & 4;
172 /* Decision about posibility of copying a given header. */
174 enum ch_decision
176 /* We do not want to copy this header. */
177 ch_impossible,
178 /* We can copy it if it enables wins. */
179 ch_possible,
180 /* We can "copy" it if it enables wins and doing
181 so will introduce no new code. */
182 ch_possible_zero_cost,
183 /* We want to copy. */
184 ch_win,
185 /* We want to copy and we will eliminate loop exit. */
186 ch_win_invariant_exit
189 /* Check whether we should duplicate HEADER of LOOP. At most *LIMIT
190 instructions should be duplicated, limit is decreased by the actual
191 amount. */
193 static ch_decision
194 should_duplicate_loop_header_p (basic_block header, class loop *loop,
195 gimple_ranger *ranger,
196 int *limit,
197 hash_set <edge> *invariant_exits,
198 hash_set <edge> *static_exits)
200 gimple_stmt_iterator bsi;
202 gcc_assert (!header->aux);
204 gcc_assert (EDGE_COUNT (header->succs) > 0);
205 if (single_succ_p (header))
207 if (dump_file && (dump_flags & TDF_DETAILS))
208 fprintf (dump_file,
209 " Not duplicating bb %i: it is single succ.\n",
210 header->index);
211 return ch_impossible;
214 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest)
215 && flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 1)->dest))
217 if (dump_file && (dump_flags & TDF_DETAILS))
218 fprintf (dump_file,
219 " Not duplicating bb %i: both successors are in loop.\n",
220 loop->num);
221 return ch_impossible;
224 /* If this is not the original loop header, we want it to have just
225 one predecessor in order to match the && pattern. */
226 if (header != loop->header && !single_pred_p (header))
228 if (dump_file && (dump_flags & TDF_DETAILS))
229 fprintf (dump_file,
230 " Not duplicating bb %i: it has mutiple predecestors.\n",
231 header->index);
232 return ch_impossible;
235 gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (header));
236 if (!last)
238 if (dump_file && (dump_flags & TDF_DETAILS))
239 fprintf (dump_file,
240 " Not duplicating bb %i: it does not end by conditional.\n",
241 header->index);
242 return ch_impossible;
245 path_range_query *query = NULL;
246 for (gphi_iterator psi = gsi_start_phis (header); !gsi_end_p (psi);
247 gsi_next (&psi))
248 if (!virtual_operand_p (gimple_phi_result (psi.phi ())))
250 bool static_p = loop_static_stmt_p (loop, *ranger,
251 query, psi.phi ());
252 gimple_set_uid (psi.phi (), static_p ? 2 : 0);
254 bool code_size_cost = false;
256 /* Count number of instructions and punt on calls.
257 Populate stmts INV flag to later apply heuristics to the
258 kind of conditions we want to copy. */
259 for (bsi = gsi_start_bb (header); !gsi_end_p (bsi); gsi_next (&bsi))
261 gimple *last = gsi_stmt (bsi);
263 if (gimple_code (last) == GIMPLE_LABEL)
264 continue;
266 if (is_gimple_debug (last))
267 continue;
269 if (gimple_code (last) == GIMPLE_COND)
270 break;
272 if (dump_file && (dump_flags & TDF_DETAILS))
274 fprintf (dump_file, " Analyzing: ");
275 print_gimple_stmt (dump_file, last, 0, TDF_SLIM);
278 if (gimple_code (last) == GIMPLE_CALL
279 && (!gimple_inexpensive_call_p (as_a <gcall *> (last))
280 /* IFN_LOOP_DIST_ALIAS means that inner loop is distributed
281 at current loop's header. Don't copy in this case. */
282 || gimple_call_internal_p (last, IFN_LOOP_DIST_ALIAS)))
284 if (dump_file && (dump_flags & TDF_DETAILS))
285 fprintf (dump_file,
286 " Not duplicating bb %i: it contains call.\n",
287 header->index);
288 if (query)
289 delete query;
290 return ch_impossible;
293 /* Classify the stmt is invariant in the loop. */
294 gimple_set_uid (last, 0);
295 if (!gimple_vuse (last)
296 && gimple_code (last) != GIMPLE_ASM
297 && (gimple_code (last) != GIMPLE_CALL
298 || gimple_call_flags (last) & ECF_CONST))
300 bool inv = true, static_p = false;
301 ssa_op_iter i;
302 tree op;
303 FOR_EACH_SSA_TREE_OPERAND (op, last, i, SSA_OP_USE)
304 if (!loop_invariant_op_p (loop, op))
305 inv = false;
306 /* Assume that code is reasonably optimized and invariant
307 constants are already identified. */
308 if (!inv)
309 static_p = loop_static_stmt_p (loop, *ranger, query, last);
310 gimple_set_uid (last, (inv ? 1 : 0) | (static_p ? 2 : 0));
311 if (dump_file && (dump_flags & TDF_DETAILS))
313 if (inv)
314 fprintf (dump_file, " Stmt is loop invariant\n");
315 if (static_p)
316 fprintf (dump_file, " Stmt is static "
317 "(constant in the first iteration)\n");
319 /* Loop invariants will be optimized out in loop body after
320 duplication; do not account invariant computation in code
321 size costs.
323 Similarly static computations will be optimized out in the
324 duplicatd header. */
325 if (inv || static_p)
326 continue;
328 /* Match the following:
329 _1 = i_1 < 10 <- static condtion
330 _2 = n != 0 <- loop invariant condition
331 _3 = _1 & _2 <- combined static and iv statement. */
332 tree_code code;
333 if (gimple_code (last) == GIMPLE_ASSIGN
334 && ((code = gimple_assign_rhs_code (last)) == BIT_AND_EXPR
335 || code == BIT_IOR_EXPR || code == BIT_XOR_EXPR))
337 tree op1 = gimple_assign_rhs1 (last);
338 tree op2 = gimple_assign_rhs2 (last);
340 if ((loop_invariant_op_p (loop, op1)
341 || loop_combined_static_and_iv_p (loop, op1)
342 || loop_static_op_p (loop, op1))
343 && (loop_invariant_op_p (loop, op2)
344 || loop_combined_static_and_iv_p (loop, op2)
345 || loop_static_op_p (loop, op2)))
347 /* Duplicating loop header with combned conditional will
348 remove this statement in each copy. But we account for
349 that later when seeing that condition.
351 Note that this may be overly optimistic for bit operations
352 where the static parameter may still result in non-trivial
353 bit operation. */
354 gimple_set_uid (last, 4);
355 if (dump_file && (dump_flags & TDF_DETAILS))
356 fprintf (dump_file,
357 " Stmt combines static and invariant op\n");
358 continue;
363 int insns = estimate_num_insns (last, &eni_size_weights);
364 *limit -= insns;
365 if (insns)
366 code_size_cost = true;
367 if (dump_file && (dump_flags & TDF_DETAILS))
368 fprintf (dump_file,
369 " Acconting stmt as %i insns\n", insns);
370 if (*limit < 0)
372 if (dump_file && (dump_flags & TDF_DETAILS))
373 fprintf (dump_file,
374 " Not duplicating bb %i contains too many insns.\n",
375 header->index);
376 if (query)
377 delete query;
378 return ch_impossible;
382 if (dump_file && (dump_flags & TDF_DETAILS))
384 fprintf (dump_file, " Analyzing: ");
385 print_gimple_stmt (dump_file, last, 0, TDF_SLIM);
388 /* If the condition tests a non-IV loop variant we do not want to rotate
389 the loop further. Unless this is the original loop header. */
390 tree lhs = gimple_cond_lhs (last);
391 tree rhs = gimple_cond_rhs (last);
392 bool lhs_invariant = loop_invariant_op_p (loop, lhs);
393 bool rhs_invariant = loop_invariant_op_p (loop, rhs);
395 /* Combined conditional is a result of if combining:
397 _1 = i_1 < 10 <- static condtion
398 _2 = n != 0 <- loop invariant condition
399 _3 = _1 & _2 <- combined static and iv statement
400 if (_3 != 0) <- combined conditional
402 Combined conditionals will not be optimized out in either copy.
403 However duplicaed header simplifies to:
405 if (n < 10)
407 while loop body to
409 if (i_1 < 10)
411 So effectively the resulting code sequence will be of same length as
412 the original code.
414 Combined conditionals are never static or invariant, so save some work
415 below. */
416 if (lhs_invariant != rhs_invariant
417 && (lhs_invariant
418 || loop_combined_static_and_iv_p (loop, lhs))
419 && (rhs_invariant
420 || loop_combined_static_and_iv_p (loop, rhs)))
422 if (query)
423 delete query;
424 if (dump_file && (dump_flags & TDF_DETAILS))
425 fprintf (dump_file,
426 " Conditional combines static and invariant op.\n");
427 return ch_win;
430 edge static_exit = static_loop_exit (loop, header, *ranger, query);
431 if (query)
432 delete query;
434 if (static_exit && static_exits)
436 static_exits->add (static_exit);
437 if (dump_file && (dump_flags & TDF_DETAILS))
438 fprintf (dump_file,
439 " Will eliminate peeled conditional in bb %d.\n",
440 static_exit->src->index);
441 /* Still look for invariant exits; exit may be both. */
443 if (lhs_invariant && rhs_invariant)
445 if (invariant_exits)
447 edge e;
448 edge_iterator ei;
449 FOR_EACH_EDGE (e, ei, header->succs)
450 if (loop_exit_edge_p (loop, e))
452 if (dump_file && (dump_flags & TDF_DETAILS))
453 fprintf (dump_file,
454 " Will elliminate invariant exit %i->%i\n",
455 e->src->index, e->dest->index);
456 invariant_exits->add (e);
459 return ch_win_invariant_exit;
462 /* If the static exit fully optimize out, it is win to "duplicate"
465 TODO: Even if duplication costs some size we may opt to do so in case
466 exit probability is significant enough (do partial peeling). */
467 if (static_exit)
468 return !code_size_cost ? ch_possible_zero_cost : ch_possible;
470 /* We was not able to prove that conditional will be eliminated. */
471 int insns = estimate_num_insns (last, &eni_size_weights);
472 *limit -= insns;
473 if (dump_file && (dump_flags & TDF_DETAILS))
474 fprintf (dump_file,
475 " Acconting stmt as %i insns\n", insns);
476 if (*limit < 0)
478 if (dump_file && (dump_flags & TDF_DETAILS))
479 fprintf (dump_file,
480 " Not duplicating bb %i contains too many insns.\n",
481 header->index);
482 return ch_impossible;
485 return ch_possible;
488 /* Checks whether LOOP is a do-while style loop. */
490 static bool
491 do_while_loop_p (class loop *loop)
493 gimple *stmt = last_nondebug_stmt (loop->latch);
495 /* If the latch of the loop is not empty, it is not a do-while loop. */
496 if (stmt
497 && gimple_code (stmt) != GIMPLE_LABEL)
499 if (dump_file && (dump_flags & TDF_DETAILS))
500 fprintf (dump_file,
501 "Loop %i is not do-while loop: latch is not empty.\n",
502 loop->num);
503 return false;
506 /* If the latch does not have a single predecessor, it is not a
507 do-while loop. */
508 if (!single_pred_p (loop->latch))
510 if (dump_file && (dump_flags & TDF_DETAILS))
511 fprintf (dump_file,
512 "Loop %i is not do-while loop: latch has multiple "
513 "predecessors.\n", loop->num);
514 return false;
516 basic_block pred = single_pred (loop->latch);
518 /* If the latch predecessor doesn't exit the loop, it is not a
519 do-while loop. */
520 if (!loop_exits_from_bb_p (loop, pred))
522 if (dump_file && (dump_flags & TDF_DETAILS))
523 fprintf (dump_file,
524 "Loop %i is not do-while loop: latch predecessor "
525 "does not exit loop.\n", loop->num);
526 return false;
528 gcond *last = safe_dyn_cast <gcond *> (*gsi_last_bb (pred));
529 if (last
530 && (gimple_cond_lhs (last) == boolean_false_node
531 || gimple_cond_lhs (last) == boolean_true_node)
532 && gimple_cond_rhs (last) == boolean_false_node)
534 if (dump_file && (dump_flags & TDF_DETAILS))
535 fprintf (dump_file,
536 "Loop %i is not do-while loop: latch predecessor "
537 "contains exit we optimized out.\n", loop->num);
538 return false;
541 if (dump_file && (dump_flags & TDF_DETAILS))
542 fprintf (dump_file, "Loop %i is do-while loop\n", loop->num);
544 return true;
547 /* Update profile after header copying of LOOP.
548 REGION is the original (in loop) sequence, REGION_COPY is the
549 duplicated header (now outside of loop). N_REGION is number of
550 bbs duplicated.
551 ELIMINATED_EDGE is edge to be removed from duplicated sequence.
552 INVARIANT_EXITS are edges in the loop body to be elimianted
553 since they are loop invariants
555 So We expect the following:
557 // region_copy_start entry will be scaled to entry_count
558 if (cond1) <- this condition will become false
559 and we update probabilities
560 goto loop_exit;
561 if (cond2) <- this condition is loop invariant
562 goto loop_exit;
563 goto loop_header <- this will be redirected to loop.
564 // region_copy_end
565 loop:
566 <body>
567 // region start
568 loop_header:
569 if (cond1) <- we need to update probabbility here
570 goto loop_exit;
571 if (cond2) <- and determine scaling factor here.
572 moreover cond2 is now always true
573 goto loop_exit;
574 else
575 goto loop;
576 // region end
578 Adding support for more exits can be done similarly,
579 but only consumer so far is tree-ssa-loop-ch and it uses only this
580 to handle the common case of peeling headers which have
581 conditionals known to be always true upon entry. */
583 static void
584 update_profile_after_ch (class loop *loop,
585 basic_block *region, basic_block *region_copy,
586 unsigned n_region,
587 hash_set <edge> *invariant_exits,
588 hash_set <edge> *static_exits,
589 profile_count entry_count)
591 for (unsigned int i = 0; i < n_region; i++)
593 edge exit_e, exit_e_copy, e, e_copy;
594 if (EDGE_COUNT (region[i]->succs) == 1)
596 region_copy[i]->count = entry_count;
597 region[i]->count -= entry_count;
598 continue;
601 gcc_checking_assert (EDGE_COUNT (region[i]->succs) == 2);
602 if (loop_exit_edge_p (loop,
603 EDGE_SUCC (region[i], 0)))
605 exit_e = EDGE_SUCC (region[i], 0);
606 exit_e_copy = EDGE_SUCC (region_copy[i], 0);
607 e = EDGE_SUCC (region[i], 1);
608 e_copy = EDGE_SUCC (region_copy[i], 1);
610 else
612 exit_e = EDGE_SUCC (region[i], 1);
613 exit_e_copy = EDGE_SUCC (region_copy[i], 1);
614 e = EDGE_SUCC (region[i], 0);
615 e_copy = EDGE_SUCC (region_copy[i], 0);
617 gcc_assert (i == n_region - 1
618 || (e->dest == region[i + 1]
619 && e_copy->dest == region_copy[i + 1]));
620 region_copy[i]->count = entry_count;
621 profile_count exit_e_count = exit_e->count ();
622 bool was_static = false;
623 if (static_exits->contains (exit_e))
625 /* Update profile and the conditional.
626 CFG update is done by caller. */
627 static_exits->remove (exit_e);
628 was_static = true;
629 e_copy->probability = profile_probability::always ();
630 exit_e_copy->probability = profile_probability::never ();
631 gcond *cond_stmt
632 = as_a <gcond *>(*gsi_last_bb (region_copy[i]));
633 if (e_copy->flags & EDGE_TRUE_VALUE)
634 gimple_cond_make_true (cond_stmt);
635 else
636 gimple_cond_make_false (cond_stmt);
637 update_stmt (cond_stmt);
638 /* Header copying is a special case of jump threading, so use
639 common code to update loop body exit condition. */
640 update_bb_profile_for_threading (region[i], entry_count, e);
642 else
643 region[i]->count -= region_copy[i]->count;
644 if (invariant_exits->contains (exit_e))
646 invariant_exits->remove (exit_e);
647 /* All exits will happen in exit_e_copy which is out of the
648 loop, so increase probability accordingly.
649 If the edge is eliminated_edge we already corrected
650 profile above. */
651 if (entry_count.nonzero_p () && !was_static)
652 set_edge_probability_and_rescale_others
653 (exit_e_copy, exit_e_count.probability_in
654 (entry_count));
655 /* Eliminate in-loop conditional. */
656 e->probability = profile_probability::always ();
657 exit_e->probability = profile_probability::never ();
658 gcond *cond_stmt = as_a <gcond *>(*gsi_last_bb (region[i]));
659 if (e->flags & EDGE_TRUE_VALUE)
660 gimple_cond_make_true (cond_stmt);
661 else
662 gimple_cond_make_false (cond_stmt);
663 update_stmt (cond_stmt);
665 entry_count = e_copy->count ();
667 /* Be sure that we seen all invariant exit edges we are supposed to update.
668 We may have recorded some static exists we decided to not duplicate. */
669 gcc_checking_assert (invariant_exits->is_empty ());
672 namespace {
674 /* Common superclass for both header-copying phases. */
675 class ch_base : public gimple_opt_pass
677 protected:
678 ch_base (pass_data data, gcc::context *ctxt)
679 : gimple_opt_pass (data, ctxt)
682 /* Copies headers of all loops in FUN for which process_loop_p is true. */
683 unsigned int copy_headers (function *fun);
685 /* Return true to copy headers of LOOP or false to skip. */
686 virtual bool process_loop_p (class loop *loop) = 0;
689 const pass_data pass_data_ch =
691 GIMPLE_PASS, /* type */
692 "ch", /* name */
693 OPTGROUP_LOOP, /* optinfo_flags */
694 TV_TREE_CH, /* tv_id */
695 ( PROP_cfg | PROP_ssa ), /* properties_required */
696 0, /* properties_provided */
697 0, /* properties_destroyed */
698 0, /* todo_flags_start */
699 0, /* todo_flags_finish */
702 class pass_ch : public ch_base
704 public:
705 pass_ch (gcc::context *ctxt)
706 : ch_base (pass_data_ch, ctxt)
709 /* opt_pass methods: */
710 bool gate (function *) final override { return flag_tree_ch != 0; }
712 /* Initialize and finalize loop structures, copying headers inbetween. */
713 unsigned int execute (function *) final override;
715 opt_pass * clone () final override { return new pass_ch (m_ctxt); }
717 protected:
718 /* ch_base method: */
719 bool process_loop_p (class loop *loop) final override;
720 }; // class pass_ch
722 const pass_data pass_data_ch_vect =
724 GIMPLE_PASS, /* type */
725 "ch_vect", /* name */
726 OPTGROUP_LOOP, /* optinfo_flags */
727 TV_TREE_CH, /* tv_id */
728 ( PROP_cfg | PROP_ssa ), /* properties_required */
729 0, /* properties_provided */
730 0, /* properties_destroyed */
731 0, /* todo_flags_start */
732 0, /* todo_flags_finish */
735 /* This is a more aggressive version of the same pass, designed to run just
736 before if-conversion and vectorization, to put more loops into the form
737 required for those phases. */
738 class pass_ch_vect : public ch_base
740 public:
741 pass_ch_vect (gcc::context *ctxt)
742 : ch_base (pass_data_ch_vect, ctxt)
745 /* opt_pass methods: */
746 bool gate (function *fun) final override
748 return flag_tree_ch != 0
749 && (flag_tree_loop_vectorize != 0 || fun->has_force_vectorize_loops);
752 /* Just copy headers, no initialization/finalization of loop structures. */
753 unsigned int execute (function *) final override;
755 protected:
756 /* ch_base method: */
757 bool process_loop_p (class loop *loop) final override;
758 }; // class pass_ch_vect
760 /* For all loops, copy the condition at the end of the loop body in front
761 of the loop. This is beneficial since it increases efficiency of
762 code motion optimizations. It also saves one jump on entry to the loop. */
764 unsigned int
765 ch_base::copy_headers (function *fun)
767 basic_block *bbs, *copied_bbs;
768 unsigned bbs_size;
769 bool changed = false;
771 if (number_of_loops (fun) <= 1)
772 return 0;
774 bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
775 copied_bbs = XNEWVEC (basic_block, n_basic_blocks_for_fn (fun));
776 bbs_size = n_basic_blocks_for_fn (fun);
778 struct candidate
780 class loop *loop;
781 unsigned int nheaders;
782 hash_set <edge> *invariant_exits, *static_exits;
784 auto_vec<struct candidate> candidates;
785 auto_vec<std::pair<edge, loop_p> > copied;
786 auto_vec<class loop *> loops_to_unloop;
787 auto_vec<int> loops_to_unloop_nunroll;
789 mark_dfs_back_edges ();
790 gimple_ranger *ranger = new gimple_ranger;
791 for (auto loop : loops_list (cfun, 0))
793 int initial_limit = optimize_loop_for_speed_p (loop)
794 ? param_max_loop_header_insns : 0;
795 int remaining_limit = initial_limit;
796 if (dump_file && (dump_flags & TDF_DETAILS))
797 fprintf (dump_file,
798 "Analyzing loop %i\n", loop->num);
800 basic_block header = loop->header;
801 if (!get_max_loop_iterations_int (loop))
803 if (dump_file && (dump_flags & TDF_DETAILS))
804 fprintf (dump_file, "Loop %d never loops.\n", loop->num);
805 scale_loop_profile (loop, profile_probability::always (), 0);
806 loops_to_unloop.safe_push (loop);
807 loops_to_unloop_nunroll.safe_push (0);
808 continue;
811 /* If the loop is already a do-while style one (either because it was
812 written as such, or because jump threading transformed it into one),
813 we might be in fact peeling the first iteration of the loop. This
814 in general is not a good idea. Also avoid touching infinite loops. */
815 if (!loop_has_exit_edges (loop)
816 || !process_loop_p (loop))
817 continue;
819 /* Iterate the header copying up to limit; this takes care of the cases
820 like while (a && b) {...}, where we want to have both of the conditions
821 copied. TODO -- handle while (a || b) - like cases, by not requiring
822 the header to have just a single successor and copying up to
823 postdominator. */
824 int nheaders = 0;
825 int last_win_nheaders = 0;
826 bool last_win_invariant_exit = false;
827 ch_decision ret;
828 auto_vec <ch_decision, 32> decision;
829 hash_set <edge> *invariant_exits = new hash_set <edge>;
830 hash_set <edge> *static_exits = new hash_set <edge>;
831 while ((ret = should_duplicate_loop_header_p (header, loop, ranger,
832 &remaining_limit,
833 invariant_exits,
834 static_exits))
835 != ch_impossible)
837 nheaders++;
838 decision.safe_push (ret);
839 if (ret >= ch_win)
841 last_win_nheaders = nheaders;
842 last_win_invariant_exit = (ret == ch_win_invariant_exit);
843 if (dump_file && (dump_flags & TDF_DETAILS))
844 fprintf (dump_file, " Duplicating bb %i is a win\n",
845 header->index);
847 else
848 if (dump_file && (dump_flags & TDF_DETAILS))
849 fprintf (dump_file, " May duplicate bb %i\n", header->index);
851 /* Find a successor of header that is inside a loop; i.e. the new
852 header after the condition is copied. */
853 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
854 header = EDGE_SUCC (header, 0)->dest;
855 else
856 header = EDGE_SUCC (header, 1)->dest;
859 /* Try to turn loop into do-while. This means ensuring that
860 last duplicated header will not have loop invariant exit. */
861 if (last_win_nheaders && last_win_invariant_exit
862 && nheaders > last_win_nheaders)
864 last_win_nheaders++;
865 if (dump_file && (dump_flags & TDF_DETAILS))
866 fprintf (dump_file,
867 " Duplicating additional BB to obtain do-while loop\n");
869 else if (!last_win_nheaders && nheaders && !do_while_loop_p (loop))
871 last_win_nheaders = 1;
872 if (dump_file && (dump_flags & TDF_DETAILS))
873 fprintf (dump_file,
874 " Duplicating header BB to obtain do-while loop\n");
876 /* "Duplicate" all BBs with zero cost following last basic blocks we
877 decided to copy. */
878 while (last_win_nheaders < (int)decision.length ()
879 && decision[last_win_nheaders] == ch_possible_zero_cost)
881 if (dump_file && (dump_flags & TDF_DETAILS))
882 fprintf (dump_file,
883 " Duplicating extra bb is a win; it has zero cost\n");
884 last_win_nheaders++;
887 if (last_win_nheaders)
888 candidates.safe_push ({loop, last_win_nheaders,
889 invariant_exits, static_exits});
890 else
892 delete invariant_exits;
893 delete static_exits;
896 /* Do not use ranger after we change the IL and not have updated SSA. */
897 delete ranger;
899 for (auto candidate : candidates)
901 class loop *loop = candidate.loop;
902 if (dump_file && (dump_flags & TDF_DETAILS))
903 fprintf (dump_file,
904 "Copying headers of loop %i\n", loop->num);
906 basic_block header = loop->header;
907 edge nonexit = NULL;
908 edge exit = NULL;
909 unsigned int n_bbs = 0;
910 int nexits = 0;
911 profile_count exit_count = profile_count::zero ();
912 profile_count entry_count = profile_count::zero ();
913 edge e;
914 edge_iterator ei;
916 FOR_EACH_EDGE (e, ei, loop->header->preds)
917 if (e->src != loop->latch)
918 entry_count += e->count ();
919 while (n_bbs < candidate.nheaders)
921 if (dump_file && (dump_flags & TDF_DETAILS))
922 fprintf (dump_file, " Will duplicate bb %i\n", header->index);
923 /* Find a successor of header that is inside a loop; i.e. the new
924 header after the condition is copied. */
925 if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
927 nonexit = EDGE_SUCC (header, 0);
928 exit = EDGE_SUCC (header, 1);
930 else
932 nonexit = EDGE_SUCC (header, 1);
933 exit = EDGE_SUCC (header, 0);
935 exit_count += exit->count ();
936 nexits++;
937 bbs[n_bbs++] = header;
938 gcc_assert (bbs_size > n_bbs);
939 header = nonexit->dest;
942 if (dump_file && (dump_flags & TDF_DETAILS))
943 fprintf (dump_file,
944 "Duplicating header of the loop %d up to edge %d->%d\n",
945 loop->num, exit->src->index, exit->dest->index);
947 /* Ensure that the header will have just the latch as a predecessor
948 inside the loop. */
949 if (!single_pred_p (nonexit->dest))
951 header = split_edge (nonexit);
952 exit = single_pred_edge (header);
955 edge entry = loop_preheader_edge (loop);
957 propagate_threaded_block_debug_into (exit->dest, entry->dest);
958 if (!gimple_duplicate_seme_region (entry, exit, bbs, n_bbs, copied_bbs,
959 true))
961 delete candidate.static_exits;
962 delete candidate.invariant_exits;
963 if (dump_file && (dump_flags & TDF_DETAILS))
964 fprintf (dump_file, "Duplication failed.\n");
965 continue;
967 if (loop->header->count.initialized_p ())
968 update_profile_after_ch (loop, bbs, copied_bbs, n_bbs,
969 candidate.invariant_exits,
970 candidate.static_exits,
971 entry_count);
972 delete candidate.static_exits;
973 delete candidate.invariant_exits;
974 copied.safe_push (std::make_pair (entry, loop));
976 /* If the loop has the form "for (i = j; i < j + 10; i++)" then
977 this copying can introduce a case where we rely on undefined
978 signed overflow to eliminate the preheader condition, because
979 we assume that "j < j + 10" is true. We don't want to warn
980 about that case for -Wstrict-overflow, because in general we
981 don't warn about overflow involving loops. Prevent the
982 warning by setting the no_warning flag in the condition. */
983 if (warn_strict_overflow > 0)
985 unsigned int i;
987 for (i = 0; i < n_bbs; ++i)
989 gimple_stmt_iterator bsi;
991 for (bsi = gsi_start_bb (copied_bbs[i]);
992 !gsi_end_p (bsi);
993 gsi_next (&bsi))
995 gimple *stmt = gsi_stmt (bsi);
996 if (gimple_code (stmt) == GIMPLE_COND)
998 tree lhs = gimple_cond_lhs (stmt);
999 if (gimple_cond_code (stmt) != EQ_EXPR
1000 && gimple_cond_code (stmt) != NE_EXPR
1001 && INTEGRAL_TYPE_P (TREE_TYPE (lhs))
1002 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (lhs)))
1003 suppress_warning (stmt, OPT_Wstrict_overflow_);
1005 else if (is_gimple_assign (stmt))
1007 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
1008 tree rhs1 = gimple_assign_rhs1 (stmt);
1009 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison
1010 && rhs_code != EQ_EXPR
1011 && rhs_code != NE_EXPR
1012 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
1013 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (rhs1)))
1014 suppress_warning (stmt, OPT_Wstrict_overflow_);
1020 /* Update header of the loop. */
1021 loop->header = header;
1022 /* Find correct latch. We only duplicate chain of conditionals so
1023 there should be precisely two edges to the new header. One entry
1024 edge and one to latch. */
1025 FOR_EACH_EDGE (e, ei, loop->header->preds)
1026 if (header != e->src)
1028 loop->latch = e->src;
1029 break;
1031 /* Ensure that the latch is simple. */
1032 if (!single_succ_p (loop_latch_edge (loop)->src))
1033 split_edge (loop_latch_edge (loop));
1035 if (dump_file && (dump_flags & TDF_DETAILS))
1037 if (do_while_loop_p (loop))
1038 fprintf (dump_file, "Loop %d is now do-while loop.\n", loop->num);
1039 else
1040 fprintf (dump_file, "Loop %d is still not do-while loop.\n",
1041 loop->num);
1042 fprintf (dump_file, "Exit count: ");
1043 exit_count.dump (dump_file);
1044 fprintf (dump_file, "\nEntry count: ");
1045 entry_count.dump (dump_file);
1046 fprintf (dump_file, "\n");
1049 /* We possibly decreased number of itrations by 1. */
1050 auto_vec<edge> exits = get_loop_exit_edges (loop);
1051 bool precise = (nexits == (int) exits.length ());
1052 /* Check that loop may not terminate in other way than via
1053 basic blocks we duplicated. */
1054 if (precise)
1056 basic_block *bbs = get_loop_body (loop);
1057 for (unsigned i = 0; i < loop->num_nodes && precise; ++i)
1059 basic_block bb = bbs[i];
1060 bool found_exit = false;
1061 FOR_EACH_EDGE (e, ei, bb->succs)
1062 if (!flow_bb_inside_loop_p (loop, e->dest))
1064 found_exit = true;
1065 break;
1067 /* If BB has exit, it was duplicated. */
1068 if (found_exit)
1069 continue;
1070 /* Give up on irreducible loops. */
1071 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1073 precise = false;
1074 break;
1076 /* Check that inner loops are finite. */
1077 for (class loop *l = bb->loop_father; l != loop && precise;
1078 l = loop_outer (l))
1079 if (!l->finite_p)
1081 precise = false;
1082 break;
1084 /* Verify that there is no statement that may be terminate
1085 execution in a way not visible to CFG. */
1086 for (gimple_stmt_iterator bsi = gsi_start_bb (bb);
1087 !gsi_end_p (bsi); gsi_next (&bsi))
1088 if (stmt_can_terminate_bb_p (gsi_stmt (bsi)))
1089 precise = false;
1091 free (bbs);
1093 if (precise
1094 && get_max_loop_iterations_int (loop) == 1)
1096 if (dump_file && (dump_flags & TDF_DETAILS))
1097 fprintf (dump_file, "Loop %d no longer loops.\n", loop->num);
1098 scale_loop_profile (loop, profile_probability::always (), 0);
1099 loops_to_unloop.safe_push (loop);
1100 loops_to_unloop_nunroll.safe_push (0);
1102 else if (precise)
1104 if (dump_file && (dump_flags & TDF_DETAILS))
1105 fprintf (dump_file,
1106 "Peeled all exits:"
1107 " decreased number of iterations of loop %d by 1.\n",
1108 loop->num);
1109 adjust_loop_info_after_peeling (loop, 1, true);
1111 else if (exit_count >= entry_count.apply_scale (9, 10))
1113 if (dump_file && (dump_flags & TDF_DETAILS))
1114 fprintf (dump_file,
1115 "Peeled likely exits: likely decreased number "
1116 "of iterations of loop %d by 1.\n", loop->num);
1117 adjust_loop_info_after_peeling (loop, 1, false);
1119 else if (dump_file && (dump_flags & TDF_DETAILS))
1120 fprintf (dump_file,
1121 "Not decreased number"
1122 " of iterations of loop %d; likely exits remains.\n",
1123 loop->num);
1125 changed = true;
1128 if (changed)
1130 update_ssa (TODO_update_ssa);
1131 /* After updating SSA form perform CSE on the loop header
1132 copies. This is esp. required for the pass before
1133 vectorization since nothing cleans up copied exit tests
1134 that can now be simplified. CSE from the entry of the
1135 region we copied till all loop exit blocks but not
1136 entering the loop itself. */
1137 for (unsigned i = 0; i < copied.length (); ++i)
1139 edge entry = copied[i].first;
1140 loop_p loop = copied[i].second;
1141 auto_vec<edge> exit_edges = get_loop_exit_edges (loop);
1142 bitmap exit_bbs = BITMAP_ALLOC (NULL);
1143 for (unsigned j = 0; j < exit_edges.length (); ++j)
1144 bitmap_set_bit (exit_bbs, exit_edges[j]->dest->index);
1145 bitmap_set_bit (exit_bbs, loop->header->index);
1146 do_rpo_vn (cfun, entry, exit_bbs);
1147 BITMAP_FREE (exit_bbs);
1150 if (!loops_to_unloop.is_empty ())
1152 bool irred_invalidated;
1153 auto_bitmap lc_invalidated;
1154 auto_vec<edge> edges_to_remove;
1155 unloop_loops (loops_to_unloop, loops_to_unloop_nunroll, edges_to_remove,
1156 lc_invalidated, &irred_invalidated);
1157 if (loops_state_satisfies_p (fun, LOOP_CLOSED_SSA)
1158 && !bitmap_empty_p (lc_invalidated))
1159 rewrite_into_loop_closed_ssa (NULL, 0);
1160 changed = true;
1162 free (bbs);
1163 free (copied_bbs);
1165 return changed ? TODO_cleanup_cfg : 0;
1168 /* Initialize the loop structures we need, and finalize after. */
1170 unsigned int
1171 pass_ch::execute (function *fun)
1173 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
1174 | LOOPS_HAVE_SIMPLE_LATCHES
1175 | LOOPS_HAVE_RECORDED_EXITS);
1177 unsigned int res = copy_headers (fun);
1179 loop_optimizer_finalize ();
1180 return res;
1183 /* Assume an earlier phase has already initialized all the loop structures that
1184 we need here (and perhaps others too), and that these will be finalized by
1185 a later phase. */
1187 unsigned int
1188 pass_ch_vect::execute (function *fun)
1190 return copy_headers (fun);
1193 /* Apply header copying according to a very simple test of do-while shape. */
1195 bool
1196 pass_ch::process_loop_p (class loop *)
1198 return true;
1201 /* Apply header-copying to loops where we might enable vectorization. */
1203 bool
1204 pass_ch_vect::process_loop_p (class loop *loop)
1206 if (!flag_tree_loop_vectorize && !loop->force_vectorize)
1207 return false;
1209 if (loop->dont_vectorize)
1210 return false;
1212 /* The vectorizer won't handle anything with multiple exits, so skip. */
1213 edge exit = single_exit (loop);
1214 if (!exit)
1215 return false;
1217 if (!do_while_loop_p (loop))
1218 return true;
1220 return false;
1223 } // anon namespace
1225 gimple_opt_pass *
1226 make_pass_ch_vect (gcc::context *ctxt)
1228 return new pass_ch_vect (ctxt);
1231 gimple_opt_pass *
1232 make_pass_ch (gcc::context *ctxt)
1234 return new pass_ch (ctxt);