2013-08-02 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc.git] / gcc / cfgloopmanip.c
blobf35e5aedc7902c6591f67ac1248029347c351324
1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002-2013 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 3, or (at your option) any later
9 version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
24 #include "rtl.h"
25 #include "basic-block.h"
26 #include "cfgloop.h"
27 #include "tree-flow.h"
28 #include "dumpfile.h"
30 static void copy_loops_to (struct loop **, int,
31 struct loop *);
32 static void loop_redirect_edge (edge, basic_block);
33 static void remove_bbs (basic_block *, int);
34 static bool rpe_enum_p (const_basic_block, const void *);
35 static int find_path (edge, basic_block **);
36 static void fix_loop_placements (struct loop *, bool *);
37 static bool fix_bb_placement (basic_block);
38 static void fix_bb_placements (basic_block, bool *, bitmap);
40 /* Checks whether basic block BB is dominated by DATA. */
41 static bool
42 rpe_enum_p (const_basic_block bb, const void *data)
44 return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
47 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
49 static void
50 remove_bbs (basic_block *bbs, int nbbs)
52 int i;
54 for (i = 0; i < nbbs; i++)
55 delete_basic_block (bbs[i]);
58 /* Find path -- i.e. the basic blocks dominated by edge E and put them
59 into array BBS, that will be allocated large enough to contain them.
60 E->dest must have exactly one predecessor for this to work (it is
61 easy to achieve and we do not put it here because we do not want to
62 alter anything by this function). The number of basic blocks in the
63 path is returned. */
64 static int
65 find_path (edge e, basic_block **bbs)
67 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
69 /* Find bbs in the path. */
70 *bbs = XNEWVEC (basic_block, n_basic_blocks);
71 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
72 n_basic_blocks, e->dest);
75 /* Fix placement of basic block BB inside loop hierarchy --
76 Let L be a loop to that BB belongs. Then every successor of BB must either
77 1) belong to some superloop of loop L, or
78 2) be a header of loop K such that K->outer is superloop of L
79 Returns true if we had to move BB into other loop to enforce this condition,
80 false if the placement of BB was already correct (provided that placements
81 of its successors are correct). */
82 static bool
83 fix_bb_placement (basic_block bb)
85 edge e;
86 edge_iterator ei;
87 struct loop *loop = current_loops->tree_root, *act;
89 FOR_EACH_EDGE (e, ei, bb->succs)
91 if (e->dest == EXIT_BLOCK_PTR)
92 continue;
94 act = e->dest->loop_father;
95 if (act->header == e->dest)
96 act = loop_outer (act);
98 if (flow_loop_nested_p (loop, act))
99 loop = act;
102 if (loop == bb->loop_father)
103 return false;
105 remove_bb_from_loops (bb);
106 add_bb_to_loop (bb, loop);
108 return true;
111 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
112 of LOOP to that leads at least one exit edge of LOOP, and set it
113 as the immediate superloop of LOOP. Return true if the immediate superloop
114 of LOOP changed.
116 IRRED_INVALIDATED is set to true if a change in the loop structures might
117 invalidate the information about irreducible regions. */
119 static bool
120 fix_loop_placement (struct loop *loop, bool *irred_invalidated)
122 unsigned i;
123 edge e;
124 vec<edge> exits = get_loop_exit_edges (loop);
125 struct loop *father = current_loops->tree_root, *act;
126 bool ret = false;
128 FOR_EACH_VEC_ELT (exits, i, e)
130 act = find_common_loop (loop, e->dest->loop_father);
131 if (flow_loop_nested_p (father, act))
132 father = act;
135 if (father != loop_outer (loop))
137 for (act = loop_outer (loop); act != father; act = loop_outer (act))
138 act->num_nodes -= loop->num_nodes;
139 flow_loop_tree_node_remove (loop);
140 flow_loop_tree_node_add (father, loop);
142 /* The exit edges of LOOP no longer exits its original immediate
143 superloops; remove them from the appropriate exit lists. */
144 FOR_EACH_VEC_ELT (exits, i, e)
146 /* We may need to recompute irreducible loops. */
147 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
148 *irred_invalidated = true;
149 rescan_loop_exit (e, false, false);
152 ret = true;
155 exits.release ();
156 return ret;
159 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
160 enforce condition condition stated in description of fix_bb_placement. We
161 start from basic block FROM that had some of its successors removed, so that
162 his placement no longer has to be correct, and iteratively fix placement of
163 its predecessors that may change if placement of FROM changed. Also fix
164 placement of subloops of FROM->loop_father, that might also be altered due
165 to this change; the condition for them is similar, except that instead of
166 successors we consider edges coming out of the loops.
168 If the changes may invalidate the information about irreducible regions,
169 IRRED_INVALIDATED is set to true.
171 If LOOP_CLOSED_SSA_INVLIDATED is non-zero then all basic blocks with
172 changed loop_father are collected there. */
174 static void
175 fix_bb_placements (basic_block from,
176 bool *irred_invalidated,
177 bitmap loop_closed_ssa_invalidated)
179 sbitmap in_queue;
180 basic_block *queue, *qtop, *qbeg, *qend;
181 struct loop *base_loop, *target_loop;
182 edge e;
184 /* We pass through blocks back-reachable from FROM, testing whether some
185 of their successors moved to outer loop. It may be necessary to
186 iterate several times, but it is finite, as we stop unless we move
187 the basic block up the loop structure. The whole story is a bit
188 more complicated due to presence of subloops, those are moved using
189 fix_loop_placement. */
191 base_loop = from->loop_father;
192 /* If we are already in the outermost loop, the basic blocks cannot be moved
193 outside of it. If FROM is the header of the base loop, it cannot be moved
194 outside of it, either. In both cases, we can end now. */
195 if (base_loop == current_loops->tree_root
196 || from == base_loop->header)
197 return;
199 in_queue = sbitmap_alloc (last_basic_block);
200 bitmap_clear (in_queue);
201 bitmap_set_bit (in_queue, from->index);
202 /* Prevent us from going out of the base_loop. */
203 bitmap_set_bit (in_queue, base_loop->header->index);
205 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
206 qtop = queue + base_loop->num_nodes + 1;
207 qbeg = queue;
208 qend = queue + 1;
209 *qbeg = from;
211 while (qbeg != qend)
213 edge_iterator ei;
214 from = *qbeg;
215 qbeg++;
216 if (qbeg == qtop)
217 qbeg = queue;
218 bitmap_clear_bit (in_queue, from->index);
220 if (from->loop_father->header == from)
222 /* Subloop header, maybe move the loop upward. */
223 if (!fix_loop_placement (from->loop_father, irred_invalidated))
224 continue;
225 target_loop = loop_outer (from->loop_father);
227 else
229 /* Ordinary basic block. */
230 if (!fix_bb_placement (from))
231 continue;
232 if (loop_closed_ssa_invalidated)
233 bitmap_set_bit (loop_closed_ssa_invalidated, from->index);
234 target_loop = from->loop_father;
237 FOR_EACH_EDGE (e, ei, from->succs)
239 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
240 *irred_invalidated = true;
243 /* Something has changed, insert predecessors into queue. */
244 FOR_EACH_EDGE (e, ei, from->preds)
246 basic_block pred = e->src;
247 struct loop *nca;
249 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
250 *irred_invalidated = true;
252 if (bitmap_bit_p (in_queue, pred->index))
253 continue;
255 /* If it is subloop, then it either was not moved, or
256 the path up the loop tree from base_loop do not contain
257 it. */
258 nca = find_common_loop (pred->loop_father, base_loop);
259 if (pred->loop_father != base_loop
260 && (nca == base_loop
261 || nca != pred->loop_father))
262 pred = pred->loop_father->header;
263 else if (!flow_loop_nested_p (target_loop, pred->loop_father))
265 /* If PRED is already higher in the loop hierarchy than the
266 TARGET_LOOP to that we moved FROM, the change of the position
267 of FROM does not affect the position of PRED, so there is no
268 point in processing it. */
269 continue;
272 if (bitmap_bit_p (in_queue, pred->index))
273 continue;
275 /* Schedule the basic block. */
276 *qend = pred;
277 qend++;
278 if (qend == qtop)
279 qend = queue;
280 bitmap_set_bit (in_queue, pred->index);
283 free (in_queue);
284 free (queue);
287 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
288 and update loop structures and dominators. Return true if we were able
289 to remove the path, false otherwise (and nothing is affected then). */
290 bool
291 remove_path (edge e)
293 edge ae;
294 basic_block *rem_bbs, *bord_bbs, from, bb;
295 vec<basic_block> dom_bbs;
296 int i, nrem, n_bord_bbs;
297 sbitmap seen;
298 bool irred_invalidated = false;
299 edge_iterator ei;
300 struct loop *l, *f;
302 if (!can_remove_branch_p (e))
303 return false;
305 /* Keep track of whether we need to update information about irreducible
306 regions. This is the case if the removed area is a part of the
307 irreducible region, or if the set of basic blocks that belong to a loop
308 that is inside an irreducible region is changed, or if such a loop is
309 removed. */
310 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
311 irred_invalidated = true;
313 /* We need to check whether basic blocks are dominated by the edge
314 e, but we only have basic block dominators. This is easy to
315 fix -- when e->dest has exactly one predecessor, this corresponds
316 to blocks dominated by e->dest, if not, split the edge. */
317 if (!single_pred_p (e->dest))
318 e = single_pred_edge (split_edge (e));
320 /* It may happen that by removing path we remove one or more loops
321 we belong to. In this case first unloop the loops, then proceed
322 normally. We may assume that e->dest is not a header of any loop,
323 as it now has exactly one predecessor. */
324 for (l = e->src->loop_father; loop_outer (l); l = f)
326 f = loop_outer (l);
327 if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
328 unloop (l, &irred_invalidated, NULL);
331 /* Identify the path. */
332 nrem = find_path (e, &rem_bbs);
334 n_bord_bbs = 0;
335 bord_bbs = XNEWVEC (basic_block, n_basic_blocks);
336 seen = sbitmap_alloc (last_basic_block);
337 bitmap_clear (seen);
339 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
340 for (i = 0; i < nrem; i++)
341 bitmap_set_bit (seen, rem_bbs[i]->index);
342 if (!irred_invalidated)
343 FOR_EACH_EDGE (ae, ei, e->src->succs)
344 if (ae != e && ae->dest != EXIT_BLOCK_PTR && !bitmap_bit_p (seen, ae->dest->index)
345 && ae->flags & EDGE_IRREDUCIBLE_LOOP)
347 irred_invalidated = true;
348 break;
351 for (i = 0; i < nrem; i++)
353 bb = rem_bbs[i];
354 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
355 if (ae->dest != EXIT_BLOCK_PTR && !bitmap_bit_p (seen, ae->dest->index))
357 bitmap_set_bit (seen, ae->dest->index);
358 bord_bbs[n_bord_bbs++] = ae->dest;
360 if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
361 irred_invalidated = true;
365 /* Remove the path. */
366 from = e->src;
367 remove_branch (e);
368 dom_bbs.create (0);
370 /* Cancel loops contained in the path. */
371 for (i = 0; i < nrem; i++)
372 if (rem_bbs[i]->loop_father->header == rem_bbs[i])
373 cancel_loop_tree (rem_bbs[i]->loop_father);
375 remove_bbs (rem_bbs, nrem);
376 free (rem_bbs);
378 /* Find blocks whose dominators may be affected. */
379 bitmap_clear (seen);
380 for (i = 0; i < n_bord_bbs; i++)
382 basic_block ldom;
384 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
385 if (bitmap_bit_p (seen, bb->index))
386 continue;
387 bitmap_set_bit (seen, bb->index);
389 for (ldom = first_dom_son (CDI_DOMINATORS, bb);
390 ldom;
391 ldom = next_dom_son (CDI_DOMINATORS, ldom))
392 if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
393 dom_bbs.safe_push (ldom);
396 free (seen);
398 /* Recount dominators. */
399 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
400 dom_bbs.release ();
401 free (bord_bbs);
403 /* Fix placements of basic blocks inside loops and the placement of
404 loops in the loop tree. */
405 fix_bb_placements (from, &irred_invalidated, NULL);
406 fix_loop_placements (from->loop_father, &irred_invalidated);
408 if (irred_invalidated
409 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
410 mark_irreducible_loops ();
412 return true;
415 /* Creates place for a new LOOP in loops structure of FN. */
417 void
418 place_new_loop (struct function *fn, struct loop *loop)
420 loop->num = number_of_loops (fn);
421 vec_safe_push (loops_for_fn (fn)->larray, loop);
424 /* Given LOOP structure with filled header and latch, find the body of the
425 corresponding loop and add it to loops tree. Insert the LOOP as a son of
426 outer. */
428 void
429 add_loop (struct loop *loop, struct loop *outer)
431 basic_block *bbs;
432 int i, n;
433 struct loop *subloop;
434 edge e;
435 edge_iterator ei;
437 /* Add it to loop structure. */
438 place_new_loop (cfun, loop);
439 flow_loop_tree_node_add (outer, loop);
441 /* Find its nodes. */
442 bbs = XNEWVEC (basic_block, n_basic_blocks);
443 n = get_loop_body_with_size (loop, bbs, n_basic_blocks);
445 for (i = 0; i < n; i++)
447 if (bbs[i]->loop_father == outer)
449 remove_bb_from_loops (bbs[i]);
450 add_bb_to_loop (bbs[i], loop);
451 continue;
454 loop->num_nodes++;
456 /* If we find a direct subloop of OUTER, move it to LOOP. */
457 subloop = bbs[i]->loop_father;
458 if (loop_outer (subloop) == outer
459 && subloop->header == bbs[i])
461 flow_loop_tree_node_remove (subloop);
462 flow_loop_tree_node_add (loop, subloop);
466 /* Update the information about loop exit edges. */
467 for (i = 0; i < n; i++)
469 FOR_EACH_EDGE (e, ei, bbs[i]->succs)
471 rescan_loop_exit (e, false, false);
475 free (bbs);
478 /* Multiply all frequencies in LOOP by NUM/DEN. */
480 void
481 scale_loop_frequencies (struct loop *loop, int num, int den)
483 basic_block *bbs;
485 bbs = get_loop_body (loop);
486 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
487 free (bbs);
490 /* Multiply all frequencies in LOOP by SCALE/REG_BR_PROB_BASE.
491 If ITERATION_BOUND is non-zero, scale even further if loop is predicted
492 to iterate too many times. */
494 void
495 scale_loop_profile (struct loop *loop, int scale, gcov_type iteration_bound)
497 gcov_type iterations = expected_loop_iterations_unbounded (loop);
498 edge e;
499 edge_iterator ei;
501 if (dump_file && (dump_flags & TDF_DETAILS))
502 fprintf (dump_file, ";; Scaling loop %i with scale %f, "
503 "bounding iterations to %i from guessed %i\n",
504 loop->num, (double)scale / REG_BR_PROB_BASE,
505 (int)iteration_bound, (int)iterations);
507 /* See if loop is predicted to iterate too many times. */
508 if (iteration_bound && iterations > 0
509 && apply_probability (iterations, scale) > iteration_bound)
511 /* Fixing loop profile for different trip count is not trivial; the exit
512 probabilities has to be updated to match and frequencies propagated down
513 to the loop body.
515 We fully update only the simple case of loop with single exit that is
516 either from the latch or BB just before latch and leads from BB with
517 simple conditional jump. This is OK for use in vectorizer. */
518 e = single_exit (loop);
519 if (e)
521 edge other_e;
522 int freq_delta;
523 gcov_type count_delta;
525 FOR_EACH_EDGE (other_e, ei, e->src->succs)
526 if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
527 && e != other_e)
528 break;
530 /* Probability of exit must be 1/iterations. */
531 freq_delta = EDGE_FREQUENCY (e);
532 e->probability = REG_BR_PROB_BASE / iteration_bound;
533 other_e->probability = inverse_probability (e->probability);
534 freq_delta -= EDGE_FREQUENCY (e);
536 /* Adjust counts accordingly. */
537 count_delta = e->count;
538 e->count = apply_probability (e->src->count, e->probability);
539 other_e->count = apply_probability (e->src->count, other_e->probability);
540 count_delta -= e->count;
542 /* If latch exists, change its frequency and count, since we changed
543 probability of exit. Theoretically we should update everything from
544 source of exit edge to latch, but for vectorizer this is enough. */
545 if (loop->latch
546 && loop->latch != e->src)
548 loop->latch->frequency += freq_delta;
549 if (loop->latch->frequency < 0)
550 loop->latch->frequency = 0;
551 loop->latch->count += count_delta;
552 if (loop->latch->count < 0)
553 loop->latch->count = 0;
557 /* Roughly speaking we want to reduce the loop body profile by the
558 the difference of loop iterations. We however can do better if
559 we look at the actual profile, if it is available. */
560 scale = RDIV (iteration_bound * scale, iterations);
561 if (loop->header->count)
563 gcov_type count_in = 0;
565 FOR_EACH_EDGE (e, ei, loop->header->preds)
566 if (e->src != loop->latch)
567 count_in += e->count;
569 if (count_in != 0)
570 scale = GCOV_COMPUTE_SCALE (count_in * iteration_bound,
571 loop->header->count);
573 else if (loop->header->frequency)
575 int freq_in = 0;
577 FOR_EACH_EDGE (e, ei, loop->header->preds)
578 if (e->src != loop->latch)
579 freq_in += EDGE_FREQUENCY (e);
581 if (freq_in != 0)
582 scale = GCOV_COMPUTE_SCALE (freq_in * iteration_bound,
583 loop->header->frequency);
585 if (!scale)
586 scale = 1;
589 if (scale == REG_BR_PROB_BASE)
590 return;
592 /* Scale the actual probabilities. */
593 scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
594 if (dump_file && (dump_flags & TDF_DETAILS))
595 fprintf (dump_file, ";; guessed iterations are now %i\n",
596 (int)expected_loop_iterations_unbounded (loop));
599 /* Recompute dominance information for basic blocks outside LOOP. */
601 static void
602 update_dominators_in_loop (struct loop *loop)
604 vec<basic_block> dom_bbs = vNULL;
605 sbitmap seen;
606 basic_block *body;
607 unsigned i;
609 seen = sbitmap_alloc (last_basic_block);
610 bitmap_clear (seen);
611 body = get_loop_body (loop);
613 for (i = 0; i < loop->num_nodes; i++)
614 bitmap_set_bit (seen, body[i]->index);
616 for (i = 0; i < loop->num_nodes; i++)
618 basic_block ldom;
620 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
621 ldom;
622 ldom = next_dom_son (CDI_DOMINATORS, ldom))
623 if (!bitmap_bit_p (seen, ldom->index))
625 bitmap_set_bit (seen, ldom->index);
626 dom_bbs.safe_push (ldom);
630 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
631 free (body);
632 free (seen);
633 dom_bbs.release ();
636 /* Creates an if region as shown above. CONDITION is used to create
637 the test for the if.
640 | ------------- -------------
641 | | pred_bb | | pred_bb |
642 | ------------- -------------
643 | | |
644 | | | ENTRY_EDGE
645 | | ENTRY_EDGE V
646 | | ====> -------------
647 | | | cond_bb |
648 | | | CONDITION |
649 | | -------------
650 | V / \
651 | ------------- e_false / \ e_true
652 | | succ_bb | V V
653 | ------------- ----------- -----------
654 | | false_bb | | true_bb |
655 | ----------- -----------
656 | \ /
657 | \ /
658 | V V
659 | -------------
660 | | join_bb |
661 | -------------
662 | | exit_edge (result)
664 | -----------
665 | | succ_bb |
666 | -----------
670 edge
671 create_empty_if_region_on_edge (edge entry_edge, tree condition)
674 basic_block cond_bb, true_bb, false_bb, join_bb;
675 edge e_true, e_false, exit_edge;
676 gimple cond_stmt;
677 tree simple_cond;
678 gimple_stmt_iterator gsi;
680 cond_bb = split_edge (entry_edge);
682 /* Insert condition in cond_bb. */
683 gsi = gsi_last_bb (cond_bb);
684 simple_cond =
685 force_gimple_operand_gsi (&gsi, condition, true, NULL,
686 false, GSI_NEW_STMT);
687 cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
688 gsi = gsi_last_bb (cond_bb);
689 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
691 join_bb = split_edge (single_succ_edge (cond_bb));
693 e_true = single_succ_edge (cond_bb);
694 true_bb = split_edge (e_true);
696 e_false = make_edge (cond_bb, join_bb, 0);
697 false_bb = split_edge (e_false);
699 e_true->flags &= ~EDGE_FALLTHRU;
700 e_true->flags |= EDGE_TRUE_VALUE;
701 e_false->flags &= ~EDGE_FALLTHRU;
702 e_false->flags |= EDGE_FALSE_VALUE;
704 set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
705 set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
706 set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
707 set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
709 exit_edge = single_succ_edge (join_bb);
711 if (single_pred_p (exit_edge->dest))
712 set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
714 return exit_edge;
717 /* create_empty_loop_on_edge
719 | - pred_bb - ------ pred_bb ------
720 | | | | iv0 = initial_value |
721 | -----|----- ---------|-----------
722 | | ______ | entry_edge
723 | | entry_edge / | |
724 | | ====> | -V---V- loop_header -------------
725 | V | | iv_before = phi (iv0, iv_after) |
726 | - succ_bb - | ---|-----------------------------
727 | | | | |
728 | ----------- | ---V--- loop_body ---------------
729 | | | iv_after = iv_before + stride |
730 | | | if (iv_before < upper_bound) |
731 | | ---|--------------\--------------
732 | | | \ exit_e
733 | | V \
734 | | - loop_latch - V- succ_bb -
735 | | | | | |
736 | | /------------- -----------
737 | \ ___ /
739 Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
740 that is used before the increment of IV. IV_BEFORE should be used for
741 adding code to the body that uses the IV. OUTER is the outer loop in
742 which the new loop should be inserted.
744 Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
745 inserted on the loop entry edge. This implies that this function
746 should be used only when the UPPER_BOUND expression is a loop
747 invariant. */
749 struct loop *
750 create_empty_loop_on_edge (edge entry_edge,
751 tree initial_value,
752 tree stride, tree upper_bound,
753 tree iv,
754 tree *iv_before,
755 tree *iv_after,
756 struct loop *outer)
758 basic_block loop_header, loop_latch, succ_bb, pred_bb;
759 struct loop *loop;
760 gimple_stmt_iterator gsi;
761 gimple_seq stmts;
762 gimple cond_expr;
763 tree exit_test;
764 edge exit_e;
765 int prob;
767 gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
769 /* Create header, latch and wire up the loop. */
770 pred_bb = entry_edge->src;
771 loop_header = split_edge (entry_edge);
772 loop_latch = split_edge (single_succ_edge (loop_header));
773 succ_bb = single_succ (loop_latch);
774 make_edge (loop_header, succ_bb, 0);
775 redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
777 /* Set immediate dominator information. */
778 set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
779 set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
780 set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
782 /* Initialize a loop structure and put it in a loop hierarchy. */
783 loop = alloc_loop ();
784 loop->header = loop_header;
785 loop->latch = loop_latch;
786 add_loop (loop, outer);
788 /* TODO: Fix frequencies and counts. */
789 prob = REG_BR_PROB_BASE / 2;
791 scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
793 /* Update dominators. */
794 update_dominators_in_loop (loop);
796 /* Modify edge flags. */
797 exit_e = single_exit (loop);
798 exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
799 single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
801 /* Construct IV code in loop. */
802 initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
803 if (stmts)
805 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
806 gsi_commit_edge_inserts ();
809 upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
810 if (stmts)
812 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
813 gsi_commit_edge_inserts ();
816 gsi = gsi_last_bb (loop_header);
817 create_iv (initial_value, stride, iv, loop, &gsi, false,
818 iv_before, iv_after);
820 /* Insert loop exit condition. */
821 cond_expr = gimple_build_cond
822 (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
824 exit_test = gimple_cond_lhs (cond_expr);
825 exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
826 false, GSI_NEW_STMT);
827 gimple_cond_set_lhs (cond_expr, exit_test);
828 gsi = gsi_last_bb (exit_e->src);
829 gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
831 split_block_after_labels (loop_header);
833 return loop;
836 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
837 latch to header and update loop tree and dominators
838 accordingly. Everything between them plus LATCH_EDGE destination must
839 be dominated by HEADER_EDGE destination, and back-reachable from
840 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
841 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
842 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
843 Returns the newly created loop. Frequencies and counts in the new loop
844 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
846 struct loop *
847 loopify (edge latch_edge, edge header_edge,
848 basic_block switch_bb, edge true_edge, edge false_edge,
849 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
851 basic_block succ_bb = latch_edge->dest;
852 basic_block pred_bb = header_edge->src;
853 struct loop *loop = alloc_loop ();
854 struct loop *outer = loop_outer (succ_bb->loop_father);
855 int freq;
856 gcov_type cnt;
857 edge e;
858 edge_iterator ei;
860 loop->header = header_edge->dest;
861 loop->latch = latch_edge->src;
863 freq = EDGE_FREQUENCY (header_edge);
864 cnt = header_edge->count;
866 /* Redirect edges. */
867 loop_redirect_edge (latch_edge, loop->header);
868 loop_redirect_edge (true_edge, succ_bb);
870 /* During loop versioning, one of the switch_bb edge is already properly
871 set. Do not redirect it again unless redirect_all_edges is true. */
872 if (redirect_all_edges)
874 loop_redirect_edge (header_edge, switch_bb);
875 loop_redirect_edge (false_edge, loop->header);
877 /* Update dominators. */
878 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
879 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
882 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
884 /* Compute new loop. */
885 add_loop (loop, outer);
887 /* Add switch_bb to appropriate loop. */
888 if (switch_bb->loop_father)
889 remove_bb_from_loops (switch_bb);
890 add_bb_to_loop (switch_bb, outer);
892 /* Fix frequencies. */
893 if (redirect_all_edges)
895 switch_bb->frequency = freq;
896 switch_bb->count = cnt;
897 FOR_EACH_EDGE (e, ei, switch_bb->succs)
899 e->count = apply_probability (switch_bb->count, e->probability);
902 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
903 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
904 update_dominators_in_loop (loop);
906 return loop;
909 /* Remove the latch edge of a LOOP and update loops to indicate that
910 the LOOP was removed. After this function, original loop latch will
911 have no successor, which caller is expected to fix somehow.
913 If this may cause the information about irreducible regions to become
914 invalid, IRRED_INVALIDATED is set to true.
916 LOOP_CLOSED_SSA_INVALIDATED, if non-NULL, is a bitmap where we store
917 basic blocks that had non-trivial update on their loop_father.*/
919 void
920 unloop (struct loop *loop, bool *irred_invalidated,
921 bitmap loop_closed_ssa_invalidated)
923 basic_block *body;
924 struct loop *ploop;
925 unsigned i, n;
926 basic_block latch = loop->latch;
927 bool dummy = false;
929 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
930 *irred_invalidated = true;
932 /* This is relatively straightforward. The dominators are unchanged, as
933 loop header dominates loop latch, so the only thing we have to care of
934 is the placement of loops and basic blocks inside the loop tree. We
935 move them all to the loop->outer, and then let fix_bb_placements do
936 its work. */
938 body = get_loop_body (loop);
939 n = loop->num_nodes;
940 for (i = 0; i < n; i++)
941 if (body[i]->loop_father == loop)
943 remove_bb_from_loops (body[i]);
944 add_bb_to_loop (body[i], loop_outer (loop));
946 free(body);
948 while (loop->inner)
950 ploop = loop->inner;
951 flow_loop_tree_node_remove (ploop);
952 flow_loop_tree_node_add (loop_outer (loop), ploop);
955 /* Remove the loop and free its data. */
956 delete_loop (loop);
958 remove_edge (single_succ_edge (latch));
960 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
961 there is an irreducible region inside the cancelled loop, the flags will
962 be still correct. */
963 fix_bb_placements (latch, &dummy, loop_closed_ssa_invalidated);
966 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
967 condition stated in description of fix_loop_placement holds for them.
968 It is used in case when we removed some edges coming out of LOOP, which
969 may cause the right placement of LOOP inside loop tree to change.
971 IRRED_INVALIDATED is set to true if a change in the loop structures might
972 invalidate the information about irreducible regions. */
974 static void
975 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
977 struct loop *outer;
979 while (loop_outer (loop))
981 outer = loop_outer (loop);
982 if (!fix_loop_placement (loop, irred_invalidated))
983 break;
985 /* Changing the placement of a loop in the loop tree may alter the
986 validity of condition 2) of the description of fix_bb_placement
987 for its preheader, because the successor is the header and belongs
988 to the loop. So call fix_bb_placements to fix up the placement
989 of the preheader and (possibly) of its predecessors. */
990 fix_bb_placements (loop_preheader_edge (loop)->src,
991 irred_invalidated, NULL);
992 loop = outer;
996 /* Duplicate loop bounds and other information we store about
997 the loop into its duplicate. */
999 void
1000 copy_loop_info (struct loop *loop, struct loop *target)
1002 gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
1003 target->any_upper_bound = loop->any_upper_bound;
1004 target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
1005 target->any_estimate = loop->any_estimate;
1006 target->nb_iterations_estimate = loop->nb_iterations_estimate;
1007 target->estimate_state = loop->estimate_state;
1010 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
1011 created loop into loops structure. */
1012 struct loop *
1013 duplicate_loop (struct loop *loop, struct loop *target)
1015 struct loop *cloop;
1016 cloop = alloc_loop ();
1017 place_new_loop (cfun, cloop);
1019 copy_loop_info (loop, cloop);
1021 /* Mark the new loop as copy of LOOP. */
1022 set_loop_copy (loop, cloop);
1024 /* Add it to target. */
1025 flow_loop_tree_node_add (target, cloop);
1027 return cloop;
1030 /* Copies structure of subloops of LOOP into TARGET loop, placing
1031 newly created loops into loop tree. */
1032 void
1033 duplicate_subloops (struct loop *loop, struct loop *target)
1035 struct loop *aloop, *cloop;
1037 for (aloop = loop->inner; aloop; aloop = aloop->next)
1039 cloop = duplicate_loop (aloop, target);
1040 duplicate_subloops (aloop, cloop);
1044 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1045 into TARGET loop, placing newly created loops into loop tree. */
1046 static void
1047 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
1049 struct loop *aloop;
1050 int i;
1052 for (i = 0; i < n; i++)
1054 aloop = duplicate_loop (copied_loops[i], target);
1055 duplicate_subloops (copied_loops[i], aloop);
1059 /* Redirects edge E to basic block DEST. */
1060 static void
1061 loop_redirect_edge (edge e, basic_block dest)
1063 if (e->dest == dest)
1064 return;
1066 redirect_edge_and_branch_force (e, dest);
1069 /* Check whether LOOP's body can be duplicated. */
1070 bool
1071 can_duplicate_loop_p (const struct loop *loop)
1073 int ret;
1074 basic_block *bbs = get_loop_body (loop);
1076 ret = can_copy_bbs_p (bbs, loop->num_nodes);
1077 free (bbs);
1079 return ret;
1082 /* Sets probability and count of edge E to zero. The probability and count
1083 is redistributed evenly to the remaining edges coming from E->src. */
1085 static void
1086 set_zero_probability (edge e)
1088 basic_block bb = e->src;
1089 edge_iterator ei;
1090 edge ae, last = NULL;
1091 unsigned n = EDGE_COUNT (bb->succs);
1092 gcov_type cnt = e->count, cnt1;
1093 unsigned prob = e->probability, prob1;
1095 gcc_assert (n > 1);
1096 cnt1 = cnt / (n - 1);
1097 prob1 = prob / (n - 1);
1099 FOR_EACH_EDGE (ae, ei, bb->succs)
1101 if (ae == e)
1102 continue;
1104 ae->probability += prob1;
1105 ae->count += cnt1;
1106 last = ae;
1109 /* Move the rest to one of the edges. */
1110 last->probability += prob % (n - 1);
1111 last->count += cnt % (n - 1);
1113 e->probability = 0;
1114 e->count = 0;
1117 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1118 loop structure and dominators. E's destination must be LOOP header for
1119 this to work, i.e. it must be entry or latch edge of this loop; these are
1120 unique, as the loops must have preheaders for this function to work
1121 correctly (in case E is latch, the function unrolls the loop, if E is entry
1122 edge, it peels the loop). Store edges created by copying ORIG edge from
1123 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
1124 original LOOP body, the other copies are numbered in order given by control
1125 flow through them) into TO_REMOVE array. Returns false if duplication is
1126 impossible. */
1128 bool
1129 duplicate_loop_to_header_edge (struct loop *loop, edge e,
1130 unsigned int ndupl, sbitmap wont_exit,
1131 edge orig, vec<edge> *to_remove,
1132 int flags)
1134 struct loop *target, *aloop;
1135 struct loop **orig_loops;
1136 unsigned n_orig_loops;
1137 basic_block header = loop->header, latch = loop->latch;
1138 basic_block *new_bbs, *bbs, *first_active;
1139 basic_block new_bb, bb, first_active_latch = NULL;
1140 edge ae, latch_edge;
1141 edge spec_edges[2], new_spec_edges[2];
1142 #define SE_LATCH 0
1143 #define SE_ORIG 1
1144 unsigned i, j, n;
1145 int is_latch = (latch == e->src);
1146 int scale_act = 0, *scale_step = NULL, scale_main = 0;
1147 int scale_after_exit = 0;
1148 int p, freq_in, freq_le, freq_out_orig;
1149 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
1150 int add_irreducible_flag;
1151 basic_block place_after;
1152 bitmap bbs_to_scale = NULL;
1153 bitmap_iterator bi;
1155 gcc_assert (e->dest == loop->header);
1156 gcc_assert (ndupl > 0);
1158 if (orig)
1160 /* Orig must be edge out of the loop. */
1161 gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1162 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1165 n = loop->num_nodes;
1166 bbs = get_loop_body_in_dom_order (loop);
1167 gcc_assert (bbs[0] == loop->header);
1168 gcc_assert (bbs[n - 1] == loop->latch);
1170 /* Check whether duplication is possible. */
1171 if (!can_copy_bbs_p (bbs, loop->num_nodes))
1173 free (bbs);
1174 return false;
1176 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1178 /* In case we are doing loop peeling and the loop is in the middle of
1179 irreducible region, the peeled copies will be inside it too. */
1180 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1181 gcc_assert (!is_latch || !add_irreducible_flag);
1183 /* Find edge from latch. */
1184 latch_edge = loop_latch_edge (loop);
1186 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1188 /* Calculate coefficients by that we have to scale frequencies
1189 of duplicated loop bodies. */
1190 freq_in = header->frequency;
1191 freq_le = EDGE_FREQUENCY (latch_edge);
1192 if (freq_in == 0)
1193 freq_in = 1;
1194 if (freq_in < freq_le)
1195 freq_in = freq_le;
1196 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
1197 if (freq_out_orig > freq_in - freq_le)
1198 freq_out_orig = freq_in - freq_le;
1199 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
1200 prob_pass_wont_exit =
1201 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
1203 if (orig
1204 && REG_BR_PROB_BASE - orig->probability != 0)
1206 /* The blocks that are dominated by a removed exit edge ORIG have
1207 frequencies scaled by this. */
1208 scale_after_exit
1209 = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE,
1210 REG_BR_PROB_BASE - orig->probability);
1211 bbs_to_scale = BITMAP_ALLOC (NULL);
1212 for (i = 0; i < n; i++)
1214 if (bbs[i] != orig->src
1215 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1216 bitmap_set_bit (bbs_to_scale, i);
1220 scale_step = XNEWVEC (int, ndupl);
1222 for (i = 1; i <= ndupl; i++)
1223 scale_step[i - 1] = bitmap_bit_p (wont_exit, i)
1224 ? prob_pass_wont_exit
1225 : prob_pass_thru;
1227 /* Complete peeling is special as the probability of exit in last
1228 copy becomes 1. */
1229 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1231 int wanted_freq = EDGE_FREQUENCY (e);
1233 if (wanted_freq > freq_in)
1234 wanted_freq = freq_in;
1236 gcc_assert (!is_latch);
1237 /* First copy has frequency of incoming edge. Each subsequent
1238 frequency should be reduced by prob_pass_wont_exit. Caller
1239 should've managed the flags so all except for original loop
1240 has won't exist set. */
1241 scale_act = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
1242 /* Now simulate the duplication adjustments and compute header
1243 frequency of the last copy. */
1244 for (i = 0; i < ndupl; i++)
1245 wanted_freq = combine_probabilities (wanted_freq, scale_step[i]);
1246 scale_main = GCOV_COMPUTE_SCALE (wanted_freq, freq_in);
1248 else if (is_latch)
1250 prob_pass_main = bitmap_bit_p (wont_exit, 0)
1251 ? prob_pass_wont_exit
1252 : prob_pass_thru;
1253 p = prob_pass_main;
1254 scale_main = REG_BR_PROB_BASE;
1255 for (i = 0; i < ndupl; i++)
1257 scale_main += p;
1258 p = combine_probabilities (p, scale_step[i]);
1260 scale_main = GCOV_COMPUTE_SCALE (REG_BR_PROB_BASE, scale_main);
1261 scale_act = combine_probabilities (scale_main, prob_pass_main);
1263 else
1265 scale_main = REG_BR_PROB_BASE;
1266 for (i = 0; i < ndupl; i++)
1267 scale_main = combine_probabilities (scale_main, scale_step[i]);
1268 scale_act = REG_BR_PROB_BASE - prob_pass_thru;
1270 for (i = 0; i < ndupl; i++)
1271 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
1272 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
1273 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
1276 /* Loop the new bbs will belong to. */
1277 target = e->src->loop_father;
1279 /* Original loops. */
1280 n_orig_loops = 0;
1281 for (aloop = loop->inner; aloop; aloop = aloop->next)
1282 n_orig_loops++;
1283 orig_loops = XNEWVEC (struct loop *, n_orig_loops);
1284 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1285 orig_loops[i] = aloop;
1287 set_loop_copy (loop, target);
1289 first_active = XNEWVEC (basic_block, n);
1290 if (is_latch)
1292 memcpy (first_active, bbs, n * sizeof (basic_block));
1293 first_active_latch = latch;
1296 spec_edges[SE_ORIG] = orig;
1297 spec_edges[SE_LATCH] = latch_edge;
1299 place_after = e->src;
1300 for (j = 0; j < ndupl; j++)
1302 /* Copy loops. */
1303 copy_loops_to (orig_loops, n_orig_loops, target);
1305 /* Copy bbs. */
1306 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1307 place_after, true);
1308 place_after = new_spec_edges[SE_LATCH]->src;
1310 if (flags & DLTHE_RECORD_COPY_NUMBER)
1311 for (i = 0; i < n; i++)
1313 gcc_assert (!new_bbs[i]->aux);
1314 new_bbs[i]->aux = (void *)(size_t)(j + 1);
1317 /* Note whether the blocks and edges belong to an irreducible loop. */
1318 if (add_irreducible_flag)
1320 for (i = 0; i < n; i++)
1321 new_bbs[i]->flags |= BB_DUPLICATED;
1322 for (i = 0; i < n; i++)
1324 edge_iterator ei;
1325 new_bb = new_bbs[i];
1326 if (new_bb->loop_father == target)
1327 new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1329 FOR_EACH_EDGE (ae, ei, new_bb->succs)
1330 if ((ae->dest->flags & BB_DUPLICATED)
1331 && (ae->src->loop_father == target
1332 || ae->dest->loop_father == target))
1333 ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1335 for (i = 0; i < n; i++)
1336 new_bbs[i]->flags &= ~BB_DUPLICATED;
1339 /* Redirect the special edges. */
1340 if (is_latch)
1342 redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1343 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1344 loop->header);
1345 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1346 latch = loop->latch = new_bbs[n - 1];
1347 e = latch_edge = new_spec_edges[SE_LATCH];
1349 else
1351 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1352 loop->header);
1353 redirect_edge_and_branch_force (e, new_bbs[0]);
1354 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1355 e = new_spec_edges[SE_LATCH];
1358 /* Record exit edge in this copy. */
1359 if (orig && bitmap_bit_p (wont_exit, j + 1))
1361 if (to_remove)
1362 to_remove->safe_push (new_spec_edges[SE_ORIG]);
1363 set_zero_probability (new_spec_edges[SE_ORIG]);
1365 /* Scale the frequencies of the blocks dominated by the exit. */
1366 if (bbs_to_scale)
1368 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1370 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
1371 REG_BR_PROB_BASE);
1376 /* Record the first copy in the control flow order if it is not
1377 the original loop (i.e. in case of peeling). */
1378 if (!first_active_latch)
1380 memcpy (first_active, new_bbs, n * sizeof (basic_block));
1381 first_active_latch = new_bbs[n - 1];
1384 /* Set counts and frequencies. */
1385 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1387 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1388 scale_act = combine_probabilities (scale_act, scale_step[j]);
1391 free (new_bbs);
1392 free (orig_loops);
1394 /* Record the exit edge in the original loop body, and update the frequencies. */
1395 if (orig && bitmap_bit_p (wont_exit, 0))
1397 if (to_remove)
1398 to_remove->safe_push (orig);
1399 set_zero_probability (orig);
1401 /* Scale the frequencies of the blocks dominated by the exit. */
1402 if (bbs_to_scale)
1404 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1406 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
1407 REG_BR_PROB_BASE);
1412 /* Update the original loop. */
1413 if (!is_latch)
1414 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1415 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1417 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1418 free (scale_step);
1421 /* Update dominators of outer blocks if affected. */
1422 for (i = 0; i < n; i++)
1424 basic_block dominated, dom_bb;
1425 vec<basic_block> dom_bbs;
1426 unsigned j;
1428 bb = bbs[i];
1429 bb->aux = 0;
1431 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1432 FOR_EACH_VEC_ELT (dom_bbs, j, dominated)
1434 if (flow_bb_inside_loop_p (loop, dominated))
1435 continue;
1436 dom_bb = nearest_common_dominator (
1437 CDI_DOMINATORS, first_active[i], first_active_latch);
1438 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1440 dom_bbs.release ();
1442 free (first_active);
1444 free (bbs);
1445 BITMAP_FREE (bbs_to_scale);
1447 return true;
1450 /* A callback for make_forwarder block, to redirect all edges except for
1451 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1452 whether to redirect it. */
1454 edge mfb_kj_edge;
1455 bool
1456 mfb_keep_just (edge e)
1458 return e != mfb_kj_edge;
1461 /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1463 static bool
1464 has_preds_from_loop (basic_block block, struct loop *loop)
1466 edge e;
1467 edge_iterator ei;
1469 FOR_EACH_EDGE (e, ei, block->preds)
1470 if (e->src->loop_father == loop)
1471 return true;
1472 return false;
1475 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1476 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1477 entry; otherwise we also force preheader block to have only one successor.
1478 When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1479 to be a fallthru predecessor to the loop header and to have only
1480 predecessors from outside of the loop.
1481 The function also updates dominators. */
1483 basic_block
1484 create_preheader (struct loop *loop, int flags)
1486 edge e, fallthru;
1487 basic_block dummy;
1488 int nentry = 0;
1489 bool irred = false;
1490 bool latch_edge_was_fallthru;
1491 edge one_succ_pred = NULL, single_entry = NULL;
1492 edge_iterator ei;
1494 FOR_EACH_EDGE (e, ei, loop->header->preds)
1496 if (e->src == loop->latch)
1497 continue;
1498 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1499 nentry++;
1500 single_entry = e;
1501 if (single_succ_p (e->src))
1502 one_succ_pred = e;
1504 gcc_assert (nentry);
1505 if (nentry == 1)
1507 bool need_forwarder_block = false;
1509 /* We do not allow entry block to be the loop preheader, since we
1510 cannot emit code there. */
1511 if (single_entry->src == ENTRY_BLOCK_PTR)
1512 need_forwarder_block = true;
1513 else
1515 /* If we want simple preheaders, also force the preheader to have
1516 just a single successor. */
1517 if ((flags & CP_SIMPLE_PREHEADERS)
1518 && !single_succ_p (single_entry->src))
1519 need_forwarder_block = true;
1520 /* If we want fallthru preheaders, also create forwarder block when
1521 preheader ends with a jump or has predecessors from loop. */
1522 else if ((flags & CP_FALLTHRU_PREHEADERS)
1523 && (JUMP_P (BB_END (single_entry->src))
1524 || has_preds_from_loop (single_entry->src, loop)))
1525 need_forwarder_block = true;
1527 if (! need_forwarder_block)
1528 return NULL;
1531 mfb_kj_edge = loop_latch_edge (loop);
1532 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1533 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1534 dummy = fallthru->src;
1535 loop->header = fallthru->dest;
1537 /* Try to be clever in placing the newly created preheader. The idea is to
1538 avoid breaking any "fallthruness" relationship between blocks.
1540 The preheader was created just before the header and all incoming edges
1541 to the header were redirected to the preheader, except the latch edge.
1542 So the only problematic case is when this latch edge was a fallthru
1543 edge: it is not anymore after the preheader creation so we have broken
1544 the fallthruness. We're therefore going to look for a better place. */
1545 if (latch_edge_was_fallthru)
1547 if (one_succ_pred)
1548 e = one_succ_pred;
1549 else
1550 e = EDGE_PRED (dummy, 0);
1552 move_block_after (dummy, e->src);
1555 if (irred)
1557 dummy->flags |= BB_IRREDUCIBLE_LOOP;
1558 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1561 if (dump_file)
1562 fprintf (dump_file, "Created preheader block for loop %i\n",
1563 loop->num);
1565 if (flags & CP_FALLTHRU_PREHEADERS)
1566 gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1567 && !JUMP_P (BB_END (dummy)));
1569 return dummy;
1572 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1574 void
1575 create_preheaders (int flags)
1577 loop_iterator li;
1578 struct loop *loop;
1580 if (!current_loops)
1581 return;
1583 FOR_EACH_LOOP (li, loop, 0)
1584 create_preheader (loop, flags);
1585 loops_state_set (LOOPS_HAVE_PREHEADERS);
1588 /* Forces all loop latches to have only single successor. */
1590 void
1591 force_single_succ_latches (void)
1593 loop_iterator li;
1594 struct loop *loop;
1595 edge e;
1597 FOR_EACH_LOOP (li, loop, 0)
1599 if (loop->latch != loop->header && single_succ_p (loop->latch))
1600 continue;
1602 e = find_edge (loop->latch, loop->header);
1603 gcc_checking_assert (e != NULL);
1605 split_edge (e);
1607 loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1610 /* This function is called from loop_version. It splits the entry edge
1611 of the loop we want to version, adds the versioning condition, and
1612 adjust the edges to the two versions of the loop appropriately.
1613 e is an incoming edge. Returns the basic block containing the
1614 condition.
1616 --- edge e ---- > [second_head]
1618 Split it and insert new conditional expression and adjust edges.
1620 --- edge e ---> [cond expr] ---> [first_head]
1622 +---------> [second_head]
1624 THEN_PROB is the probability of then branch of the condition. */
1626 static basic_block
1627 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1628 edge e, void *cond_expr, unsigned then_prob)
1630 basic_block new_head = NULL;
1631 edge e1;
1633 gcc_assert (e->dest == second_head);
1635 /* Split edge 'e'. This will create a new basic block, where we can
1636 insert conditional expr. */
1637 new_head = split_edge (e);
1639 lv_add_condition_to_bb (first_head, second_head, new_head,
1640 cond_expr);
1642 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1643 e = single_succ_edge (new_head);
1644 e1 = make_edge (new_head, first_head,
1645 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1646 e1->probability = then_prob;
1647 e->probability = REG_BR_PROB_BASE - then_prob;
1648 e1->count = apply_probability (e->count, e1->probability);
1649 e->count = apply_probability (e->count, e->probability);
1651 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1652 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1654 /* Adjust loop header phi nodes. */
1655 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1657 return new_head;
1660 /* Main entry point for Loop Versioning transformation.
1662 This transformation given a condition and a loop, creates
1663 -if (condition) { loop_copy1 } else { loop_copy2 },
1664 where loop_copy1 is the loop transformed in one way, and loop_copy2
1665 is the loop transformed in another way (or unchanged). 'condition'
1666 may be a run time test for things that were not resolved by static
1667 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1669 THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1670 is the ratio by that the frequencies in the original loop should
1671 be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1672 new loop should be scaled.
1674 If PLACE_AFTER is true, we place the new loop after LOOP in the
1675 instruction stream, otherwise it is placed before LOOP. */
1677 struct loop *
1678 loop_version (struct loop *loop,
1679 void *cond_expr, basic_block *condition_bb,
1680 unsigned then_prob, unsigned then_scale, unsigned else_scale,
1681 bool place_after)
1683 basic_block first_head, second_head;
1684 edge entry, latch_edge, true_edge, false_edge;
1685 int irred_flag;
1686 struct loop *nloop;
1687 basic_block cond_bb;
1689 /* Record entry and latch edges for the loop */
1690 entry = loop_preheader_edge (loop);
1691 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1692 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1694 /* Note down head of loop as first_head. */
1695 first_head = entry->dest;
1697 /* Duplicate loop. */
1698 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1699 NULL, NULL, NULL, 0))
1701 entry->flags |= irred_flag;
1702 return NULL;
1705 /* After duplication entry edge now points to new loop head block.
1706 Note down new head as second_head. */
1707 second_head = entry->dest;
1709 /* Split loop entry edge and insert new block with cond expr. */
1710 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1711 entry, cond_expr, then_prob);
1712 if (condition_bb)
1713 *condition_bb = cond_bb;
1715 if (!cond_bb)
1717 entry->flags |= irred_flag;
1718 return NULL;
1721 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1723 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1724 nloop = loopify (latch_edge,
1725 single_pred_edge (get_bb_copy (loop->header)),
1726 cond_bb, true_edge, false_edge,
1727 false /* Do not redirect all edges. */,
1728 then_scale, else_scale);
1730 copy_loop_info (loop, nloop);
1732 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1733 lv_flush_pending_stmts (latch_edge);
1735 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1736 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1737 lv_flush_pending_stmts (false_edge);
1738 /* Adjust irreducible flag. */
1739 if (irred_flag)
1741 cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1742 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1743 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1744 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1747 if (place_after)
1749 basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1750 unsigned i;
1752 after = loop->latch;
1754 for (i = 0; i < nloop->num_nodes; i++)
1756 move_block_after (bbs[i], after);
1757 after = bbs[i];
1759 free (bbs);
1762 /* At this point condition_bb is loop preheader with two successors,
1763 first_head and second_head. Make sure that loop preheader has only
1764 one successor. */
1765 split_edge (loop_preheader_edge (loop));
1766 split_edge (loop_preheader_edge (nloop));
1768 return nloop;