Re-factor inclusion of tree.h.
[official-gcc.git] / gcc / cfgloopmanip.c
blobcf9a7fc6ee9d1ef0edb4ab1481d7c4ed69a82709
1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "basic-block.h"
26 #include "cfgloop.h"
27 #include "tree.h"
28 #include "tree-ssa.h"
29 #include "dumpfile.h"
31 static void copy_loops_to (struct loop **, int,
32 struct loop *);
33 static void loop_redirect_edge (edge, basic_block);
34 static void remove_bbs (basic_block *, int);
35 static bool rpe_enum_p (const_basic_block, const void *);
36 static int find_path (edge, basic_block **);
37 static void fix_loop_placements (struct loop *, bool *);
38 static bool fix_bb_placement (basic_block);
39 static void fix_bb_placements (basic_block, bool *, bitmap);
41 /* Checks whether basic block BB is dominated by DATA. */
42 static bool
43 rpe_enum_p (const_basic_block bb, const void *data)
45 return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
48 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
50 static void
51 remove_bbs (basic_block *bbs, int nbbs)
53 int i;
55 for (i = 0; i < nbbs; i++)
56 delete_basic_block (bbs[i]);
59 /* Find path -- i.e. the basic blocks dominated by edge E and put them
60 into array BBS, that will be allocated large enough to contain them.
61 E->dest must have exactly one predecessor for this to work (it is
62 easy to achieve and we do not put it here because we do not want to
63 alter anything by this function). The number of basic blocks in the
64 path is returned. */
65 static int
66 find_path (edge e, basic_block **bbs)
68 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
70 /* Find bbs in the path. */
71 *bbs = XNEWVEC (basic_block, n_basic_blocks);
72 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
73 n_basic_blocks, e->dest);
76 /* Fix placement of basic block BB inside loop hierarchy --
77 Let L be a loop to that BB belongs. Then every successor of BB must either
78 1) belong to some superloop of loop L, or
79 2) be a header of loop K such that K->outer is superloop of L
80 Returns true if we had to move BB into other loop to enforce this condition,
81 false if the placement of BB was already correct (provided that placements
82 of its successors are correct). */
83 static bool
84 fix_bb_placement (basic_block bb)
86 edge e;
87 edge_iterator ei;
88 struct loop *loop = current_loops->tree_root, *act;
90 FOR_EACH_EDGE (e, ei, bb->succs)
92 if (e->dest == EXIT_BLOCK_PTR)
93 continue;
95 act = e->dest->loop_father;
96 if (act->header == e->dest)
97 act = loop_outer (act);
99 if (flow_loop_nested_p (loop, act))
100 loop = act;
103 if (loop == bb->loop_father)
104 return false;
106 remove_bb_from_loops (bb);
107 add_bb_to_loop (bb, loop);
109 return true;
112 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
113 of LOOP to that leads at least one exit edge of LOOP, and set it
114 as the immediate superloop of LOOP. Return true if the immediate superloop
115 of LOOP changed.
117 IRRED_INVALIDATED is set to true if a change in the loop structures might
118 invalidate the information about irreducible regions. */
120 static bool
121 fix_loop_placement (struct loop *loop, bool *irred_invalidated)
123 unsigned i;
124 edge e;
125 vec<edge> exits = get_loop_exit_edges (loop);
126 struct loop *father = current_loops->tree_root, *act;
127 bool ret = false;
129 FOR_EACH_VEC_ELT (exits, i, e)
131 act = find_common_loop (loop, e->dest->loop_father);
132 if (flow_loop_nested_p (father, act))
133 father = act;
136 if (father != loop_outer (loop))
138 for (act = loop_outer (loop); act != father; act = loop_outer (act))
139 act->num_nodes -= loop->num_nodes;
140 flow_loop_tree_node_remove (loop);
141 flow_loop_tree_node_add (father, loop);
143 /* The exit edges of LOOP no longer exits its original immediate
144 superloops; remove them from the appropriate exit lists. */
145 FOR_EACH_VEC_ELT (exits, i, e)
147 /* We may need to recompute irreducible loops. */
148 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
149 *irred_invalidated = true;
150 rescan_loop_exit (e, false, false);
153 ret = true;
156 exits.release ();
157 return ret;
160 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
161 enforce condition condition stated in description of fix_bb_placement. We
162 start from basic block FROM that had some of its successors removed, so that
163 his placement no longer has to be correct, and iteratively fix placement of
164 its predecessors that may change if placement of FROM changed. Also fix
165 placement of subloops of FROM->loop_father, that might also be altered due
166 to this change; the condition for them is similar, except that instead of
167 successors we consider edges coming out of the loops.
169 If the changes may invalidate the information about irreducible regions,
170 IRRED_INVALIDATED is set to true.
172 If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
173 changed loop_father are collected there. */
175 static void
176 fix_bb_placements (basic_block from,
177 bool *irred_invalidated,
178 bitmap loop_closed_ssa_invalidated)
180 sbitmap in_queue;
181 basic_block *queue, *qtop, *qbeg, *qend;
182 struct loop *base_loop, *target_loop;
183 edge e;
185 /* We pass through blocks back-reachable from FROM, testing whether some
186 of their successors moved to outer loop. It may be necessary to
187 iterate several times, but it is finite, as we stop unless we move
188 the basic block up the loop structure. The whole story is a bit
189 more complicated due to presence of subloops, those are moved using
190 fix_loop_placement. */
192 base_loop = from->loop_father;
193 /* If we are already in the outermost loop, the basic blocks cannot be moved
194 outside of it. If FROM is the header of the base loop, it cannot be moved
195 outside of it, either. In both cases, we can end now. */
196 if (base_loop == current_loops->tree_root
197 || from == base_loop->header)
198 return;
200 in_queue = sbitmap_alloc (last_basic_block);
201 bitmap_clear (in_queue);
202 bitmap_set_bit (in_queue, from->index);
203 /* Prevent us from going out of the base_loop. */
204 bitmap_set_bit (in_queue, base_loop->header->index);
206 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
207 qtop = queue + base_loop->num_nodes + 1;
208 qbeg = queue;
209 qend = queue + 1;
210 *qbeg = from;
212 while (qbeg != qend)
214 edge_iterator ei;
215 from = *qbeg;
216 qbeg++;
217 if (qbeg == qtop)
218 qbeg = queue;
219 bitmap_clear_bit (in_queue, from->index);
221 if (from->loop_father->header == from)
223 /* Subloop header, maybe move the loop upward. */
224 if (!fix_loop_placement (from->loop_father, irred_invalidated))
225 continue;
226 target_loop = loop_outer (from->loop_father);
227 if (loop_closed_ssa_invalidated)
229 basic_block *bbs = get_loop_body (from->loop_father);
230 for (unsigned i = 0; i < from->loop_father->num_nodes; ++i)
231 bitmap_set_bit (loop_closed_ssa_invalidated, bbs[i]->index);
232 free (bbs);
235 else
237 /* Ordinary basic block. */
238 if (!fix_bb_placement (from))
239 continue;
240 target_loop = from->loop_father;
241 if (loop_closed_ssa_invalidated)
242 bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
245 FOR_EACH_EDGE (e, ei, from->succs)
247 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
248 *irred_invalidated = true;
251 /* Something has changed, insert predecessors into queue. */
252 FOR_EACH_EDGE (e, ei, from->preds)
254 basic_block pred = e->src;
255 struct loop *nca;
257 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
258 *irred_invalidated = true;
260 if (bitmap_bit_p (in_queue, pred->index))
261 continue;
263 /* If it is subloop, then it either was not moved, or
264 the path up the loop tree from base_loop do not contain
265 it. */
266 nca = find_common_loop (pred->loop_father, base_loop);
267 if (pred->loop_father != base_loop
268 && (nca == base_loop
269 || nca != pred->loop_father))
270 pred = pred->loop_father->header;
271 else if (!flow_loop_nested_p (target_loop, pred->loop_father))
273 /* If PRED is already higher in the loop hierarchy than the
274 TARGET_LOOP to that we moved FROM, the change of the position
275 of FROM does not affect the position of PRED, so there is no
276 point in processing it. */
277 continue;
280 if (bitmap_bit_p (in_queue, pred->index))
281 continue;
283 /* Schedule the basic block. */
284 *qend = pred;
285 qend++;
286 if (qend == qtop)
287 qend = queue;
288 bitmap_set_bit (in_queue, pred->index);
291 free (in_queue);
292 free (queue);
295 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
296 and update loop structures and dominators. Return true if we were able
297 to remove the path, false otherwise (and nothing is affected then). */
298 bool
299 remove_path (edge e)
301 edge ae;
302 basic_block *rem_bbs, *bord_bbs, from, bb;
303 vec<basic_block> dom_bbs;
304 int i, nrem, n_bord_bbs;
305 sbitmap seen;
306 bool irred_invalidated = false;
307 edge_iterator ei;
308 struct loop *l, *f;
310 if (!can_remove_branch_p (e))
311 return false;
313 /* Keep track of whether we need to update information about irreducible
314 regions. This is the case if the removed area is a part of the
315 irreducible region, or if the set of basic blocks that belong to a loop
316 that is inside an irreducible region is changed, or if such a loop is
317 removed. */
318 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
319 irred_invalidated = true;
321 /* We need to check whether basic blocks are dominated by the edge
322 e, but we only have basic block dominators. This is easy to
323 fix -- when e->dest has exactly one predecessor, this corresponds
324 to blocks dominated by e->dest, if not, split the edge. */
325 if (!single_pred_p (e->dest))
326 e = single_pred_edge (split_edge (e));
328 /* It may happen that by removing path we remove one or more loops
329 we belong to. In this case first unloop the loops, then proceed
330 normally. We may assume that e->dest is not a header of any loop,
331 as it now has exactly one predecessor. */
332 for (l = e->src->loop_father; loop_outer (l); l = f)
334 f = loop_outer (l);
335 if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
336 unloop (l, &irred_invalidated, NULL);
339 /* Identify the path. */
340 nrem = find_path (e, &rem_bbs);
342 n_bord_bbs = 0;
343 bord_bbs = XNEWVEC (basic_block, n_basic_blocks);
344 seen = sbitmap_alloc (last_basic_block);
345 bitmap_clear (seen);
347 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
348 for (i = 0; i < nrem; i++)
349 bitmap_set_bit (seen, rem_bbs[i]->index);
350 if (!irred_invalidated)
351 FOR_EACH_EDGE (ae, ei, e->src->succs)
352 if (ae != e && ae->dest != EXIT_BLOCK_PTR && !bitmap_bit_p (seen, ae->dest->index)
353 && ae->flags & EDGE_IRREDUCIBLE_LOOP)
355 irred_invalidated = true;
356 break;
359 for (i = 0; i < nrem; i++)
361 bb = rem_bbs[i];
362 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
363 if (ae->dest != EXIT_BLOCK_PTR && !bitmap_bit_p (seen, ae->dest->index))
365 bitmap_set_bit (seen, ae->dest->index);
366 bord_bbs[n_bord_bbs++] = ae->dest;
368 if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
369 irred_invalidated = true;
373 /* Remove the path. */
374 from = e->src;
375 remove_branch (e);
376 dom_bbs.create (0);
378 /* Cancel loops contained in the path. */
379 for (i = 0; i < nrem; i++)
380 if (rem_bbs[i]->loop_father->header == rem_bbs[i])
381 cancel_loop_tree (rem_bbs[i]->loop_father);
383 remove_bbs (rem_bbs, nrem);
384 free (rem_bbs);
386 /* Find blocks whose dominators may be affected. */
387 bitmap_clear (seen);
388 for (i = 0; i < n_bord_bbs; i++)
390 basic_block ldom;
392 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
393 if (bitmap_bit_p (seen, bb->index))
394 continue;
395 bitmap_set_bit (seen, bb->index);
397 for (ldom = first_dom_son (CDI_DOMINATORS, bb);
398 ldom;
399 ldom = next_dom_son (CDI_DOMINATORS, ldom))
400 if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
401 dom_bbs.safe_push (ldom);
404 free (seen);
406 /* Recount dominators. */
407 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
408 dom_bbs.release ();
409 free (bord_bbs);
411 /* Fix placements of basic blocks inside loops and the placement of
412 loops in the loop tree. */
413 fix_bb_placements (from, &irred_invalidated, NULL);
414 fix_loop_placements (from->loop_father, &irred_invalidated);
416 if (irred_invalidated
417 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
418 mark_irreducible_loops ();
420 return true;
423 /* Creates place for a new LOOP in loops structure of FN. */
425 void
426 place_new_loop (struct function *fn, struct loop *loop)
428 loop->num = number_of_loops (fn);
429 vec_safe_push (loops_for_fn (fn)->larray, loop);
432 /* Given LOOP structure with filled header and latch, find the body of the
433 corresponding loop and add it to loops tree. Insert the LOOP as a son of
434 outer. */
436 void
437 add_loop (struct loop *loop, struct loop *outer)
439 basic_block *bbs;
440 int i, n;
441 struct loop *subloop;
442 edge e;
443 edge_iterator ei;
445 /* Add it to loop structure. */
446 place_new_loop (cfun, loop);
447 flow_loop_tree_node_add (outer, loop);
449 /* Find its nodes. */
450 bbs = XNEWVEC (basic_block, n_basic_blocks);
451 n = get_loop_body_with_size (loop, bbs, n_basic_blocks);
453 for (i = 0; i < n; i++)
455 if (bbs[i]->loop_father == outer)
457 remove_bb_from_loops (bbs[i]);
458 add_bb_to_loop (bbs[i], loop);
459 continue;
462 loop->num_nodes++;
464 /* If we find a direct subloop of OUTER, move it to LOOP. */
465 subloop = bbs[i]->loop_father;
466 if (loop_outer (subloop) == outer
467 && subloop->header == bbs[i])
469 flow_loop_tree_node_remove (subloop);
470 flow_loop_tree_node_add (loop, subloop);
474 /* Update the information about loop exit edges. */
475 for (i = 0; i < n; i++)
477 FOR_EACH_EDGE (e, ei, bbs[i]->succs)
479 rescan_loop_exit (e, false, false);
483 free (bbs);
486 /* Multiply all frequencies in LOOP by NUM/DEN. */
488 void
489 scale_loop_frequencies (struct loop *loop, int num, int den)
491 basic_block *bbs;
493 bbs = get_loop_body (loop);
494 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
495 free (bbs);
498 /* Multiply all frequencies in LOOP by SCALE/REG_BR_PROB_BASE.
499 If ITERATION_BOUND is non-zero, scale even further if loop is predicted
500 to iterate too many times. */
502 void
503 scale_loop_profile (struct loop *loop, int scale, gcov_type iteration_bound)
505 gcov_type iterations = expected_loop_iterations_unbounded (loop);
506 edge e;
507 edge_iterator ei;
509 if (dump_file && (dump_flags & TDF_DETAILS))
510 fprintf (dump_file, ";; Scaling loop %i with scale %f, "
511 "bounding iterations to %i from guessed %i\n",
512 loop->num, (double)scale / REG_BR_PROB_BASE,
513 (int)iteration_bound, (int)iterations);
515 /* See if loop is predicted to iterate too many times. */
516 if (iteration_bound && iterations > 0
517 && apply_probability (iterations, scale) > iteration_bound)
519 /* Fixing loop profile for different trip count is not trivial; the exit
520 probabilities has to be updated to match and frequencies propagated down
521 to the loop body.
523 We fully update only the simple case of loop with single exit that is
524 either from the latch or BB just before latch and leads from BB with
525 simple conditional jump. This is OK for use in vectorizer. */
526 e = single_exit (loop);
527 if (e)
529 edge other_e;
530 int freq_delta;
531 gcov_type count_delta;
533 FOR_EACH_EDGE (other_e, ei, e->src->succs)
534 if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
535 && e != other_e)
536 break;
538 /* Probability of exit must be 1/iterations. */
539 freq_delta = EDGE_FREQUENCY (e);
540 e->probability = REG_BR_PROB_BASE / iteration_bound;
541 other_e->probability = inverse_probability (e->probability);
542 freq_delta -= EDGE_FREQUENCY (e);
544 /* Adjust counts accordingly. */
545 count_delta = e->count;
546 e->count = apply_probability (e->src->count, e->probability);
547 other_e->count = apply_probability (e->src->count, other_e->probability);
548 count_delta -= e->count;
550 /* If latch exists, change its frequency and count, since we changed
551 probability of exit. Theoretically we should update everything from
552 source of exit edge to latch, but for vectorizer this is enough. */
553 if (loop->latch
554 && loop->latch != e->src)
556 loop->latch->frequency += freq_delta;
557 if (loop->latch->frequency < 0)
558 loop->latch->frequency = 0;
559 loop->latch->count += count_delta;
560 if (loop->latch->count < 0)
561 loop->latch->count = 0;
565 /* Roughly speaking we want to reduce the loop body profile by the
566 the difference of loop iterations. We however can do better if
567 we look at the actual profile, if it is available. */
568 scale = RDIV (iteration_bound * scale, iterations);
569 if (loop->header->count)
571 gcov_type count_in = 0;
573 FOR_EACH_EDGE (e, ei, loop->header->preds)
574 if (e->src != loop->latch)
575 count_in += e->count;
577 if (count_in != 0)
578 scale = GCOV_COMPUTE_SCALE (count_in * iteration_bound,
579 loop->header->count);
581 else if (loop->header->frequency)
583 int freq_in = 0;
585 FOR_EACH_EDGE (e, ei, loop->header->preds)
586 if (e->src != loop->latch)
587 freq_in += EDGE_FREQUENCY (e);
589 if (freq_in != 0)
590 scale = GCOV_COMPUTE_SCALE (freq_in * iteration_bound,
591 loop->header->frequency);
593 if (!scale)
594 scale = 1;
597 if (scale == REG_BR_PROB_BASE)
598 return;
600 /* Scale the actual probabilities. */
601 scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
602 if (dump_file && (dump_flags & TDF_DETAILS))
603 fprintf (dump_file, ";; guessed iterations are now %i\n",
604 (int)expected_loop_iterations_unbounded (loop));
607 /* Recompute dominance information for basic blocks outside LOOP. */
609 static void
610 update_dominators_in_loop (struct loop *loop)
612 vec<basic_block> dom_bbs = vNULL;
613 sbitmap seen;
614 basic_block *body;
615 unsigned i;
617 seen = sbitmap_alloc (last_basic_block);
618 bitmap_clear (seen);
619 body = get_loop_body (loop);
621 for (i = 0; i < loop->num_nodes; i++)
622 bitmap_set_bit (seen, body[i]->index);
624 for (i = 0; i < loop->num_nodes; i++)
626 basic_block ldom;
628 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
629 ldom;
630 ldom = next_dom_son (CDI_DOMINATORS, ldom))
631 if (!bitmap_bit_p (seen, ldom->index))
633 bitmap_set_bit (seen, ldom->index);
634 dom_bbs.safe_push (ldom);
638 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
639 free (body);
640 free (seen);
641 dom_bbs.release ();
644 /* Creates an if region as shown above. CONDITION is used to create
645 the test for the if.
648 | ------------- -------------
649 | | pred_bb | | pred_bb |
650 | ------------- -------------
651 | | |
652 | | | ENTRY_EDGE
653 | | ENTRY_EDGE V
654 | | ====> -------------
655 | | | cond_bb |
656 | | | CONDITION |
657 | | -------------
658 | V / \
659 | ------------- e_false / \ e_true
660 | | succ_bb | V V
661 | ------------- ----------- -----------
662 | | false_bb | | true_bb |
663 | ----------- -----------
664 | \ /
665 | \ /
666 | V V
667 | -------------
668 | | join_bb |
669 | -------------
670 | | exit_edge (result)
672 | -----------
673 | | succ_bb |
674 | -----------
678 edge
679 create_empty_if_region_on_edge (edge entry_edge, tree condition)
682 basic_block cond_bb, true_bb, false_bb, join_bb;
683 edge e_true, e_false, exit_edge;
684 gimple cond_stmt;
685 tree simple_cond;
686 gimple_stmt_iterator gsi;
688 cond_bb = split_edge (entry_edge);
690 /* Insert condition in cond_bb. */
691 gsi = gsi_last_bb (cond_bb);
692 simple_cond =
693 force_gimple_operand_gsi (&gsi, condition, true, NULL,
694 false, GSI_NEW_STMT);
695 cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
696 gsi = gsi_last_bb (cond_bb);
697 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
699 join_bb = split_edge (single_succ_edge (cond_bb));
701 e_true = single_succ_edge (cond_bb);
702 true_bb = split_edge (e_true);
704 e_false = make_edge (cond_bb, join_bb, 0);
705 false_bb = split_edge (e_false);
707 e_true->flags &= ~EDGE_FALLTHRU;
708 e_true->flags |= EDGE_TRUE_VALUE;
709 e_false->flags &= ~EDGE_FALLTHRU;
710 e_false->flags |= EDGE_FALSE_VALUE;
712 set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
713 set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
714 set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
715 set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
717 exit_edge = single_succ_edge (join_bb);
719 if (single_pred_p (exit_edge->dest))
720 set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
722 return exit_edge;
725 /* create_empty_loop_on_edge
727 | - pred_bb - ------ pred_bb ------
728 | | | | iv0 = initial_value |
729 | -----|----- ---------|-----------
730 | | ______ | entry_edge
731 | | entry_edge / | |
732 | | ====> | -V---V- loop_header -------------
733 | V | | iv_before = phi (iv0, iv_after) |
734 | - succ_bb - | ---|-----------------------------
735 | | | | |
736 | ----------- | ---V--- loop_body ---------------
737 | | | iv_after = iv_before + stride |
738 | | | if (iv_before < upper_bound) |
739 | | ---|--------------\--------------
740 | | | \ exit_e
741 | | V \
742 | | - loop_latch - V- succ_bb -
743 | | | | | |
744 | | /------------- -----------
745 | \ ___ /
747 Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
748 that is used before the increment of IV. IV_BEFORE should be used for
749 adding code to the body that uses the IV. OUTER is the outer loop in
750 which the new loop should be inserted.
752 Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
753 inserted on the loop entry edge. This implies that this function
754 should be used only when the UPPER_BOUND expression is a loop
755 invariant. */
757 struct loop *
758 create_empty_loop_on_edge (edge entry_edge,
759 tree initial_value,
760 tree stride, tree upper_bound,
761 tree iv,
762 tree *iv_before,
763 tree *iv_after,
764 struct loop *outer)
766 basic_block loop_header, loop_latch, succ_bb, pred_bb;
767 struct loop *loop;
768 gimple_stmt_iterator gsi;
769 gimple_seq stmts;
770 gimple cond_expr;
771 tree exit_test;
772 edge exit_e;
773 int prob;
775 gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
777 /* Create header, latch and wire up the loop. */
778 pred_bb = entry_edge->src;
779 loop_header = split_edge (entry_edge);
780 loop_latch = split_edge (single_succ_edge (loop_header));
781 succ_bb = single_succ (loop_latch);
782 make_edge (loop_header, succ_bb, 0);
783 redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
785 /* Set immediate dominator information. */
786 set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
787 set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
788 set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
790 /* Initialize a loop structure and put it in a loop hierarchy. */
791 loop = alloc_loop ();
792 loop->header = loop_header;
793 loop->latch = loop_latch;
794 add_loop (loop, outer);
796 /* TODO: Fix frequencies and counts. */
797 prob = REG_BR_PROB_BASE / 2;
799 scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
801 /* Update dominators. */
802 update_dominators_in_loop (loop);
804 /* Modify edge flags. */
805 exit_e = single_exit (loop);
806 exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
807 single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
809 /* Construct IV code in loop. */
810 initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
811 if (stmts)
813 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
814 gsi_commit_edge_inserts ();
817 upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
818 if (stmts)
820 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
821 gsi_commit_edge_inserts ();
824 gsi = gsi_last_bb (loop_header);
825 create_iv (initial_value, stride, iv, loop, &gsi, false,
826 iv_before, iv_after);
828 /* Insert loop exit condition. */
829 cond_expr = gimple_build_cond
830 (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
832 exit_test = gimple_cond_lhs (cond_expr);
833 exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
834 false, GSI_NEW_STMT);
835 gimple_cond_set_lhs (cond_expr, exit_test);
836 gsi = gsi_last_bb (exit_e->src);
837 gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
839 split_block_after_labels (loop_header);
841 return loop;
844 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
845 latch to header and update loop tree and dominators
846 accordingly. Everything between them plus LATCH_EDGE destination must
847 be dominated by HEADER_EDGE destination, and back-reachable from
848 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
849 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
850 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
851 Returns the newly created loop. Frequencies and counts in the new loop
852 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
854 struct loop *
855 loopify (edge latch_edge, edge header_edge,
856 basic_block switch_bb, edge true_edge, edge false_edge,
857 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
859 basic_block succ_bb = latch_edge->dest;
860 basic_block pred_bb = header_edge->src;
861 struct loop *loop = alloc_loop ();
862 struct loop *outer = loop_outer (succ_bb->loop_father);
863 int freq;
864 gcov_type cnt;
865 edge e;
866 edge_iterator ei;
868 loop->header = header_edge->dest;
869 loop->latch = latch_edge->src;
871 freq = EDGE_FREQUENCY (header_edge);
872 cnt = header_edge->count;
874 /* Redirect edges. */
875 loop_redirect_edge (latch_edge, loop->header);
876 loop_redirect_edge (true_edge, succ_bb);
878 /* During loop versioning, one of the switch_bb edge is already properly
879 set. Do not redirect it again unless redirect_all_edges is true. */
880 if (redirect_all_edges)
882 loop_redirect_edge (header_edge, switch_bb);
883 loop_redirect_edge (false_edge, loop->header);
885 /* Update dominators. */
886 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
887 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
890 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
892 /* Compute new loop. */
893 add_loop (loop, outer);
895 /* Add switch_bb to appropriate loop. */
896 if (switch_bb->loop_father)
897 remove_bb_from_loops (switch_bb);
898 add_bb_to_loop (switch_bb, outer);
900 /* Fix frequencies. */
901 if (redirect_all_edges)
903 switch_bb->frequency = freq;
904 switch_bb->count = cnt;
905 FOR_EACH_EDGE (e, ei, switch_bb->succs)
907 e->count = apply_probability (switch_bb->count, e->probability);
910 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
911 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
912 update_dominators_in_loop (loop);
914 return loop;
917 /* Remove the latch edge of a LOOP and update loops to indicate that
918 the LOOP was removed. After this function, original loop latch will
919 have no successor, which caller is expected to fix somehow.
921 If this may cause the information about irreducible regions to become
922 invalid, IRRED_INVALIDATED is set to true.
924 LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
925 basic blocks that had non-trivial update on their loop_father.*/
927 void
928 unloop (struct loop *loop, bool *irred_invalidated,
929 bitmap loop_closed_ssa_invalidated)
931 basic_block *body;
932 struct loop *ploop;
933 unsigned i, n;
934 basic_block latch = loop->latch;
935 bool dummy = false;
937 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
938 *irred_invalidated = true;
940 /* This is relatively straightforward. The dominators are unchanged, as
941 loop header dominates loop latch, so the only thing we have to care of
942 is the placement of loops and basic blocks inside the loop tree. We
943 move them all to the loop->outer, and then let fix_bb_placements do
944 its work. */
946 body = get_loop_body (loop);
947 n = loop->num_nodes;
948 for (i = 0; i < n; i++)
949 if (body[i]->loop_father == loop)
951 remove_bb_from_loops (body[i]);
952 add_bb_to_loop (body[i], loop_outer (loop));
954 free (body);
956 while (loop->inner)
958 ploop = loop->inner;
959 flow_loop_tree_node_remove (ploop);
960 flow_loop_tree_node_add (loop_outer (loop), ploop);
963 /* Remove the loop and free its data. */
964 delete_loop (loop);
966 remove_edge (single_succ_edge (latch));
968 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
969 there is an irreducible region inside the cancelled loop, the flags will
970 be still correct. */
971 fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
974 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
975 condition stated in description of fix_loop_placement holds for them.
976 It is used in case when we removed some edges coming out of LOOP, which
977 may cause the right placement of LOOP inside loop tree to change.
979 IRRED_INVALIDATED is set to true if a change in the loop structures might
980 invalidate the information about irreducible regions. */
982 static void
983 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
985 struct loop *outer;
987 while (loop_outer (loop))
989 outer = loop_outer (loop);
990 if (!fix_loop_placement (loop, irred_invalidated))
991 break;
993 /* Changing the placement of a loop in the loop tree may alter the
994 validity of condition 2) of the description of fix_bb_placement
995 for its preheader, because the successor is the header and belongs
996 to the loop. So call fix_bb_placements to fix up the placement
997 of the preheader and (possibly) of its predecessors. */
998 fix_bb_placements (loop_preheader_edge (loop)->src,
999 irred_invalidated, NULL);
1000 loop = outer;
1004 /* Duplicate loop bounds and other information we store about
1005 the loop into its duplicate. */
1007 void
1008 copy_loop_info (struct loop *loop, struct loop *target)
1010 gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1011 target->any_upper_bound = loop->any_upper_bound;
1012 target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1013 target->any_estimate = loop->any_estimate;
1014 target->nb_iterations_estimate = loop->nb_iterations_estimate;
1015 target->estimate_state = loop->estimate_state;
1018 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1019 created loop into loops structure. */
1020 struct loop *
1021 duplicate_loop (struct loop *loop, struct loop *target)
1023 struct loop *cloop;
1024 cloop = alloc_loop ();
1025 place_new_loop (cfun, cloop);
1027 copy_loop_info (loop, cloop);
1029 /* Mark the new loop as copy of LOOP. */
1030 set_loop_copy (loop, cloop);
1032 /* Add it to target. */
1033 flow_loop_tree_node_add (target, cloop);
1035 return cloop;
1038 /* Copies structure of subloops of LOOP into TARGET loop, placing
1039 newly created loops into loop tree. */
1040 void
1041 duplicate_subloops (struct loop *loop, struct loop *target)
1043 struct loop *aloop, *cloop;
1045 for (aloop = loop->inner; aloop; aloop = aloop->next)
1047 cloop = duplicate_loop (aloop, target);
1048 duplicate_subloops (aloop, cloop);
1052 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1053 into TARGET loop, placing newly created loops into loop tree. */
1054 static void
1055 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
1057 struct loop *aloop;
1058 int i;
1060 for (i = 0; i < n; i++)
1062 aloop = duplicate_loop (copied_loops[i], target);
1063 duplicate_subloops (copied_loops[i], aloop);
1067 /* Redirects edge E to basic block DEST. */
1068 static void
1069 loop_redirect_edge (edge e, basic_block dest)
1071 if (e->dest == dest)
1072 return;
1074 redirect_edge_and_branch_force (e, dest);
1077 /* Check whether LOOP's body can be duplicated. */
1078 bool
1079 can_duplicate_loop_p (const struct loop *loop)
1081 int ret;
1082 basic_block *bbs = get_loop_body (loop);
1084 ret = can_copy_bbs_p (bbs, loop->num_nodes);
1085 free (bbs);
1087 return ret;
1090 /* Sets probability and count of edge E to zero. The probability and count
1091 is redistributed evenly to the remaining edges coming from E->src. */
1093 static void
1094 set_zero_probability (edge e)
1096 basic_block bb = e->src;
1097 edge_iterator ei;
1098 edge ae, last = NULL;
1099 unsigned n = EDGE_COUNT (bb->succs);
1100 gcov_type cnt = e->count, cnt1;
1101 unsigned prob = e->probability, prob1;
1103 gcc_assert (n > 1);
1104 cnt1 = cnt / (n - 1);
1105 prob1 = prob / (n - 1);
1107 FOR_EACH_EDGE (ae, ei, bb->succs)
1109 if (ae == e)
1110 continue;
1112 ae->probability += prob1;
1113 ae->count += cnt1;
1114 last = ae;
1117 /* Move the rest to one of the edges. */
1118 last->probability += prob % (n - 1);
1119 last->count += cnt % (n - 1);
1121 e->probability = 0;
1122 e->count = 0;
1125 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1126 loop structure and dominators. E's destination must be LOOP header for
1127 this to work, i.e. it must be entry or latch edge of this loop; these are
1128 unique, as the loops must have preheaders for this function to work
1129 correctly (in case E is latch, the function unrolls the loop, if E is entry
1130 edge, it peels the loop). Store edges created by copying ORIG edge from
1131 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
1132 original LOOP body, the other copies are numbered in order given by control
1133 flow through them) into TO_REMOVE array. Returns false if duplication is
1134 impossible. */
1136 bool
1137 duplicate_loop_to_header_edge (struct loop *loop, edge e,
1138 unsigned int ndupl, sbitmap wont_exit,
1139 edge orig, vec<edge> *to_remove,
1140 int flags)
1142 struct loop *target, *aloop;
1143 struct loop **orig_loops;
1144 unsigned n_orig_loops;
1145 basic_block header = loop->header, latch = loop->latch;
1146 basic_block *new_bbs, *bbs, *first_active;
1147 basic_block new_bb, bb, first_active_latch = NULL;
1148 edge ae, latch_edge;
1149 edge spec_edges[2], new_spec_edges[2];
1150 #define SE_LATCH 0
1151 #define SE_ORIG 1
1152 unsigned i, j, n;
1153 int is_latch = (latch == e->src);
1154 int scale_act = 0, *scale_step = NULL, scale_main = 0;
1155 int scale_after_exit = 0;
1156 int p, freq_in, freq_le, freq_out_orig;
1157 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
1158 int add_irreducible_flag;
1159 basic_block place_after;
1160 bitmap bbs_to_scale = NULL;
1161 bitmap_iterator bi;
1163 gcc_assert (e->dest == loop->header);
1164 gcc_assert (ndupl > 0);
1166 if (orig)
1168 /* Orig must be edge out of the loop. */
1169 gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1170 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1173 n = loop->num_nodes;
1174 bbs = get_loop_body_in_dom_order (loop);
1175 gcc_assert (bbs[0] == loop->header);
1176 gcc_assert (bbs[n - 1] == loop->latch);
1178 /* Check whether duplication is possible. */
1179 if (!can_copy_bbs_p (bbs, loop->num_nodes))
1181 free (bbs);
1182 return false;
1184 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1186 /* In case we are doing loop peeling and the loop is in the middle of
1187 irreducible region, the peeled copies will be inside it too. */
1188 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1189 gcc_assert (!is_latch || !add_irreducible_flag);
1191 /* Find edge from latch. */
1192 latch_edge = loop_latch_edge (loop);
1194 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1196 /* Calculate coefficients by that we have to scale frequencies
1197 of duplicated loop bodies. */
1198 freq_in = header->frequency;
1199 freq_le = EDGE_FREQUENCY (latch_edge);
1200 if (freq_in == 0)
1201 freq_in = 1;
1202 if (freq_in < freq_le)
1203 freq_in = freq_le;
1204 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
1205 if (freq_out_orig > freq_in - freq_le)
1206 freq_out_orig = freq_in - freq_le;
1207 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
1208 prob_pass_wont_exit =
1209 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
1211 if (orig
1212 && REG_BR_PROB_BASE - orig->probability != 0)
1214 /* The blocks that are dominated by a removed exit edge ORIG have
1215 frequencies scaled by this. */
1216 scale_after_exit
1217 = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE,
1218 REG_BR_PROB_BASE - orig->probability);
1219 bbs_to_scale = BITMAP_ALLOC (NULL);
1220 for (i = 0; i < n; i++)
1222 if (bbs[i] != orig->src
1223 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1224 bitmap_set_bit (bbs_to_scale, i);
1228 scale_step = XNEWVEC (int, ndupl);
1230 for (i = 1; i <= ndupl; i++)
1231 scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1232 ? prob_pass_wont_exit
1233 : prob_pass_thru;
1235 /* Complete peeling is special as the probability of exit in last
1236 copy becomes 1. */
1237 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1239 int wanted_freq = EDGE_FREQUENCY (e);
1241 if (wanted_freq > freq_in)
1242 wanted_freq = freq_in;
1244 gcc_assert (!is_latch);
1245 /* First copy has frequency of incoming edge. Each subsequent
1246 frequency should be reduced by prob_pass_wont_exit. Caller
1247 should've managed the flags so all except for original loop
1248 has won't exist set. */
1249 scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
1250 /* Now simulate the duplication adjustments and compute header
1251 frequency of the last copy. */
1252 for (i = 0; i < ndupl; i++)
1253 wanted_freq = combine_probabilities (wanted_freq, scale_step[i]);
1254 scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
1256 else if (is_latch)
1258 prob_pass_main = bitmap_bit_p (wont_exit, 0)
1259 ? prob_pass_wont_exit
1260 : prob_pass_thru;
1261 p = prob_pass_main;
1262 scale_main = REG_BR_PROB_BASE;
1263 for (i = 0; i < ndupl; i++)
1265 scale_main += p;
1266 p = combine_probabilities (p, scale_step[i]);
1268 scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main);
1269 scale_act = combine_probabilities (scale_main, prob_pass_main);
1271 else
1273 scale_main = REG_BR_PROB_BASE;
1274 for (i = 0; i < ndupl; i++)
1275 scale_main = combine_probabilities (scale_main, scale_step[i]);
1276 scale_act = REG_BR_PROB_BASE - prob_pass_thru;
1278 for (i = 0; i < ndupl; i++)
1279 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
1280 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
1281 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
1284 /* Loop the new bbs will belong to. */
1285 target = e->src->loop_father;
1287 /* Original loops. */
1288 n_orig_loops = 0;
1289 for (aloop = loop->inner; aloop; aloop = aloop->next)
1290 n_orig_loops++;
1291 orig_loops = XNEWVEC (struct loop *, n_orig_loops);
1292 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1293 orig_loops[i] = aloop;
1295 set_loop_copy (loop, target);
1297 first_active = XNEWVEC (basic_block, n);
1298 if (is_latch)
1300 memcpy (first_active, bbs, n * sizeof (basic_block));
1301 first_active_latch = latch;
1304 spec_edges[SE_ORIG] = orig;
1305 spec_edges[SE_LATCH] = latch_edge;
1307 place_after = e->src;
1308 for (j = 0; j < ndupl; j++)
1310 /* Copy loops. */
1311 copy_loops_to (orig_loops, n_orig_loops, target);
1313 /* Copy bbs. */
1314 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1315 place_after, true);
1316 place_after = new_spec_edges[SE_LATCH]->src;
1318 if (flags & DLTHE_RECORD_COPY_NUMBER)
1319 for (i = 0; i < n; i++)
1321 gcc_assert (!new_bbs[i]->aux);
1322 new_bbs[i]->aux = (void *)(size_t)(j + 1);
1325 /* Note whether the blocks and edges belong to an irreducible loop. */
1326 if (add_irreducible_flag)
1328 for (i = 0; i < n; i++)
1329 new_bbs[i]->flags |= BB_DUPLICATED;
1330 for (i = 0; i < n; i++)
1332 edge_iterator ei;
1333 new_bb = new_bbs[i];
1334 if (new_bb->loop_father == target)
1335 new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1337 FOR_EACH_EDGE (ae, ei, new_bb->succs)
1338 if ((ae->dest->flags & BB_DUPLICATED)
1339 && (ae->src->loop_father == target
1340 || ae->dest->loop_father == target))
1341 ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1343 for (i = 0; i < n; i++)
1344 new_bbs[i]->flags &= ~BB_DUPLICATED;
1347 /* Redirect the special edges. */
1348 if (is_latch)
1350 redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1351 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1352 loop->header);
1353 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1354 latch = loop->latch = new_bbs[n - 1];
1355 e = latch_edge = new_spec_edges[SE_LATCH];
1357 else
1359 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1360 loop->header);
1361 redirect_edge_and_branch_force (e, new_bbs[0]);
1362 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1363 e = new_spec_edges[SE_LATCH];
1366 /* Record exit edge in this copy. */
1367 if (orig && bitmap_bit_p (wont_exit, j + 1))
1369 if (to_remove)
1370 to_remove->safe_push (new_spec_edges[SE_ORIG]);
1371 set_zero_probability (new_spec_edges[SE_ORIG]);
1373 /* Scale the frequencies of the blocks dominated by the exit. */
1374 if (bbs_to_scale)
1376 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1378 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
1379 REG_BR_PROB_BASE);
1384 /* Record the first copy in the control flow order if it is not
1385 the original loop (i.e. in case of peeling). */
1386 if (!first_active_latch)
1388 memcpy (first_active, new_bbs, n * sizeof (basic_block));
1389 first_active_latch = new_bbs[n - 1];
1392 /* Set counts and frequencies. */
1393 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1395 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1396 scale_act = combine_probabilities (scale_act, scale_step[j]);
1399 free (new_bbs);
1400 free (orig_loops);
1402 /* Record the exit edge in the original loop body, and update the frequencies. */
1403 if (orig && bitmap_bit_p (wont_exit, 0))
1405 if (to_remove)
1406 to_remove->safe_push (orig);
1407 set_zero_probability (orig);
1409 /* Scale the frequencies of the blocks dominated by the exit. */
1410 if (bbs_to_scale)
1412 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1414 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
1415 REG_BR_PROB_BASE);
1420 /* Update the original loop. */
1421 if (!is_latch)
1422 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1423 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1425 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1426 free (scale_step);
1429 /* Update dominators of outer blocks if affected. */
1430 for (i = 0; i < n; i++)
1432 basic_block dominated, dom_bb;
1433 vec<basic_block> dom_bbs;
1434 unsigned j;
1436 bb = bbs[i];
1437 bb->aux = 0;
1439 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1440 FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1442 if (flow_bb_inside_loop_p (loop, dominated))
1443 continue;
1444 dom_bb = nearest_common_dominator (
1445 CDI_DOMINATORS, first_active[i], first_active_latch);
1446 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1448 dom_bbs.release ();
1450 free (first_active);
1452 free (bbs);
1453 BITMAP_FREE (bbs_to_scale);
1455 return true;
1458 /* A callback for make_forwarder block, to redirect all edges except for
1459 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1460 whether to redirect it. */
1462 edge mfb_kj_edge;
1463 bool
1464 mfb_keep_just (edge e)
1466 return e != mfb_kj_edge;
1469 /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1471 static bool
1472 has_preds_from_loop (basic_block block, struct loop *loop)
1474 edge e;
1475 edge_iterator ei;
1477 FOR_EACH_EDGE (e, ei, block->preds)
1478 if (e->src->loop_father == loop)
1479 return true;
1480 return false;
1483 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1484 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1485 entry; otherwise we also force preheader block to have only one successor.
1486 When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1487 to be a fallthru predecessor to the loop header and to have only
1488 predecessors from outside of the loop.
1489 The function also updates dominators. */
1491 basic_block
1492 create_preheader (struct loop *loop, int flags)
1494 edge e, fallthru;
1495 basic_block dummy;
1496 int nentry = 0;
1497 bool irred = false;
1498 bool latch_edge_was_fallthru;
1499 edge one_succ_pred = NULL, single_entry = NULL;
1500 edge_iterator ei;
1502 FOR_EACH_EDGE (e, ei, loop->header->preds)
1504 if (e->src == loop->latch)
1505 continue;
1506 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1507 nentry++;
1508 single_entry = e;
1509 if (single_succ_p (e->src))
1510 one_succ_pred = e;
1512 gcc_assert (nentry);
1513 if (nentry == 1)
1515 bool need_forwarder_block = false;
1517 /* We do not allow entry block to be the loop preheader, since we
1518 cannot emit code there. */
1519 if (single_entry->src == ENTRY_BLOCK_PTR)
1520 need_forwarder_block = true;
1521 else
1523 /* If we want simple preheaders, also force the preheader to have
1524 just a single successor. */
1525 if ((flags & CP_SIMPLE_PREHEADERS)
1526 && !single_succ_p (single_entry->src))
1527 need_forwarder_block = true;
1528 /* If we want fallthru preheaders, also create forwarder block when
1529 preheader ends with a jump or has predecessors from loop. */
1530 else if ((flags & CP_FALLTHRU_PREHEADERS)
1531 && (JUMP_P (BB_END (single_entry->src))
1532 || has_preds_from_loop (single_entry->src, loop)))
1533 need_forwarder_block = true;
1535 if (! need_forwarder_block)
1536 return NULL;
1539 mfb_kj_edge = loop_latch_edge (loop);
1540 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1541 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1542 dummy = fallthru->src;
1543 loop->header = fallthru->dest;
1545 /* Try to be clever in placing the newly created preheader. The idea is to
1546 avoid breaking any "fallthruness" relationship between blocks.
1548 The preheader was created just before the header and all incoming edges
1549 to the header were redirected to the preheader, except the latch edge.
1550 So the only problematic case is when this latch edge was a fallthru
1551 edge: it is not anymore after the preheader creation so we have broken
1552 the fallthruness. We're therefore going to look for a better place. */
1553 if (latch_edge_was_fallthru)
1555 if (one_succ_pred)
1556 e = one_succ_pred;
1557 else
1558 e = EDGE_PRED (dummy, 0);
1560 move_block_after (dummy, e->src);
1563 if (irred)
1565 dummy->flags |= BB_IRREDUCIBLE_LOOP;
1566 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1569 if (dump_file)
1570 fprintf (dump_file, "Created preheader block for loop %i\n",
1571 loop->num);
1573 if (flags & CP_FALLTHRU_PREHEADERS)
1574 gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1575 && !JUMP_P (BB_END (dummy)));
1577 return dummy;
1580 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1582 void
1583 create_preheaders (int flags)
1585 loop_iterator li;
1586 struct loop *loop;
1588 if (!current_loops)
1589 return;
1591 FOR_EACH_LOOP (li, loop, 0)
1592 create_preheader (loop, flags);
1593 loops_state_set (LOOPS_HAVE_PREHEADERS);
1596 /* Forces all loop latches to have only single successor. */
1598 void
1599 force_single_succ_latches (void)
1601 loop_iterator li;
1602 struct loop *loop;
1603 edge e;
1605 FOR_EACH_LOOP (li, loop, 0)
1607 if (loop->latch != loop->header && single_succ_p (loop->latch))
1608 continue;
1610 e = find_edge (loop->latch, loop->header);
1611 gcc_checking_assert (e != NULL);
1613 split_edge (e);
1615 loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1618 /* This function is called from loop_version. It splits the entry edge
1619 of the loop we want to version, adds the versioning condition, and
1620 adjust the edges to the two versions of the loop appropriately.
1621 e is an incoming edge. Returns the basic block containing the
1622 condition.
1624 --- edge e ---- > [second_head]
1626 Split it and insert new conditional expression and adjust edges.
1628 --- edge e ---> [cond expr] ---> [first_head]
1630 +---------> [second_head]
1632 THEN_PROB is the probability of then branch of the condition. */
1634 static basic_block
1635 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1636 edge e, void *cond_expr, unsigned then_prob)
1638 basic_block new_head = NULL;
1639 edge e1;
1641 gcc_assert (e->dest == second_head);
1643 /* Split edge 'e'. This will create a new basic block, where we can
1644 insert conditional expr. */
1645 new_head = split_edge (e);
1647 lv_add_condition_to_bb (first_head, second_head, new_head,
1648 cond_expr);
1650 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1651 e = single_succ_edge (new_head);
1652 e1 = make_edge (new_head, first_head,
1653 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1654 e1->probability = then_prob;
1655 e->probability = REG_BR_PROB_BASE - then_prob;
1656 e1->count = apply_probability (e->count, e1->probability);
1657 e->count = apply_probability (e->count, e->probability);
1659 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1660 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1662 /* Adjust loop header phi nodes. */
1663 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1665 return new_head;
1668 /* Main entry point for Loop Versioning transformation.
1670 This transformation given a condition and a loop, creates
1671 -if (condition) { loop_copy1 } else { loop_copy2 },
1672 where loop_copy1 is the loop transformed in one way, and loop_copy2
1673 is the loop transformed in another way (or unchanged). 'condition'
1674 may be a run time test for things that were not resolved by static
1675 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1677 THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1678 is the ratio by that the frequencies in the original loop should
1679 be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1680 new loop should be scaled.
1682 If PLACE_AFTER is true, we place the new loop after LOOP in the
1683 instruction stream, otherwise it is placed before LOOP. */
1685 struct loop *
1686 loop_version (struct loop *loop,
1687 void *cond_expr, basic_block *condition_bb,
1688 unsigned then_prob, unsigned then_scale, unsigned else_scale,
1689 bool place_after)
1691 basic_block first_head, second_head;
1692 edge entry, latch_edge, true_edge, false_edge;
1693 int irred_flag;
1694 struct loop *nloop;
1695 basic_block cond_bb;
1697 /* Record entry and latch edges for the loop */
1698 entry = loop_preheader_edge (loop);
1699 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1700 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1702 /* Note down head of loop as first_head. */
1703 first_head = entry->dest;
1705 /* Duplicate loop. */
1706 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1707 NULL, NULL, NULL, 0))
1709 entry->flags |= irred_flag;
1710 return NULL;
1713 /* After duplication entry edge now points to new loop head block.
1714 Note down new head as second_head. */
1715 second_head = entry->dest;
1717 /* Split loop entry edge and insert new block with cond expr. */
1718 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1719 entry, cond_expr, then_prob);
1720 if (condition_bb)
1721 *condition_bb = cond_bb;
1723 if (!cond_bb)
1725 entry->flags |= irred_flag;
1726 return NULL;
1729 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1731 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1732 nloop = loopify (latch_edge,
1733 single_pred_edge (get_bb_copy (loop->header)),
1734 cond_bb, true_edge, false_edge,
1735 false /* Do not redirect all edges. */,
1736 then_scale, else_scale);
1738 copy_loop_info (loop, nloop);
1740 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1741 lv_flush_pending_stmts (latch_edge);
1743 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1744 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1745 lv_flush_pending_stmts (false_edge);
1746 /* Adjust irreducible flag. */
1747 if (irred_flag)
1749 cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1750 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1751 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1752 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1755 if (place_after)
1757 basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1758 unsigned i;
1760 after = loop->latch;
1762 for (i = 0; i < nloop->num_nodes; i++)
1764 move_block_after (bbs[i], after);
1765 after = bbs[i];
1767 free (bbs);
1770 /* At this point condition_bb is loop preheader with two successors,
1771 first_head and second_head. Make sure that loop preheader has only
1772 one successor. */
1773 split_edge (loop_preheader_edge (loop));
1774 split_edge (loop_preheader_edge (nloop));
1776 return nloop;