2012-10-18 François Dumont <fdumont@gcc.gnu.org>
[official-gcc.git] / gcc / cfgloopmanip.c
blob97a90bbff1d400f03cd047528c38ebc89074ccc1
1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "basic-block.h"
27 #include "cfgloop.h"
28 #include "tree-flow.h"
29 #include "dumpfile.h"
31 static void copy_loops_to (struct loop **, int,
32 struct loop *);
33 static void loop_redirect_edge (edge, basic_block);
34 static void remove_bbs (basic_block *, int);
35 static bool rpe_enum_p (const_basic_block, const void *);
36 static int find_path (edge, basic_block **);
37 static void fix_loop_placements (struct loop *, bool *);
38 static bool fix_bb_placement (basic_block);
39 static void fix_bb_placements (basic_block, bool *);
41 /* Checks whether basic block BB is dominated by DATA. */
42 static bool
43 rpe_enum_p (const_basic_block bb, const void *data)
45 return dominated_by_p (CDI_DOMINATORS, bb, (const_basic_block) data);
48 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
50 static void
51 remove_bbs (basic_block *bbs, int nbbs)
53 int i;
55 for (i = 0; i < nbbs; i++)
56 delete_basic_block (bbs[i]);
59 /* Find path -- i.e. the basic blocks dominated by edge E and put them
60 into array BBS, that will be allocated large enough to contain them.
61 E->dest must have exactly one predecessor for this to work (it is
62 easy to achieve and we do not put it here because we do not want to
63 alter anything by this function). The number of basic blocks in the
64 path is returned. */
65 static int
66 find_path (edge e, basic_block **bbs)
68 gcc_assert (EDGE_COUNT (e->dest->preds) <= 1);
70 /* Find bbs in the path. */
71 *bbs = XNEWVEC (basic_block, n_basic_blocks);
72 return dfs_enumerate_from (e->dest, 0, rpe_enum_p, *bbs,
73 n_basic_blocks, e->dest);
76 /* Fix placement of basic block BB inside loop hierarchy --
77 Let L be a loop to that BB belongs. Then every successor of BB must either
78 1) belong to some superloop of loop L, or
79 2) be a header of loop K such that K->outer is superloop of L
80 Returns true if we had to move BB into other loop to enforce this condition,
81 false if the placement of BB was already correct (provided that placements
82 of its successors are correct). */
83 static bool
84 fix_bb_placement (basic_block bb)
86 edge e;
87 edge_iterator ei;
88 struct loop *loop = current_loops->tree_root, *act;
90 FOR_EACH_EDGE (e, ei, bb->succs)
92 if (e->dest == EXIT_BLOCK_PTR)
93 continue;
95 act = e->dest->loop_father;
96 if (act->header == e->dest)
97 act = loop_outer (act);
99 if (flow_loop_nested_p (loop, act))
100 loop = act;
103 if (loop == bb->loop_father)
104 return false;
106 remove_bb_from_loops (bb);
107 add_bb_to_loop (bb, loop);
109 return true;
112 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
113 of LOOP to that leads at least one exit edge of LOOP, and set it
114 as the immediate superloop of LOOP. Return true if the immediate superloop
115 of LOOP changed. */
117 static bool
118 fix_loop_placement (struct loop *loop)
120 unsigned i;
121 edge e;
122 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
123 struct loop *father = current_loops->tree_root, *act;
124 bool ret = false;
126 FOR_EACH_VEC_ELT (edge, exits, i, e)
128 act = find_common_loop (loop, e->dest->loop_father);
129 if (flow_loop_nested_p (father, act))
130 father = act;
133 if (father != loop_outer (loop))
135 for (act = loop_outer (loop); act != father; act = loop_outer (act))
136 act->num_nodes -= loop->num_nodes;
137 flow_loop_tree_node_remove (loop);
138 flow_loop_tree_node_add (father, loop);
140 /* The exit edges of LOOP no longer exits its original immediate
141 superloops; remove them from the appropriate exit lists. */
142 FOR_EACH_VEC_ELT (edge, exits, i, e)
143 rescan_loop_exit (e, false, false);
145 ret = true;
148 VEC_free (edge, heap, exits);
149 return ret;
152 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
153 enforce condition condition stated in description of fix_bb_placement. We
154 start from basic block FROM that had some of its successors removed, so that
155 his placement no longer has to be correct, and iteratively fix placement of
156 its predecessors that may change if placement of FROM changed. Also fix
157 placement of subloops of FROM->loop_father, that might also be altered due
158 to this change; the condition for them is similar, except that instead of
159 successors we consider edges coming out of the loops.
161 If the changes may invalidate the information about irreducible regions,
162 IRRED_INVALIDATED is set to true. */
164 static void
165 fix_bb_placements (basic_block from,
166 bool *irred_invalidated)
168 sbitmap in_queue;
169 basic_block *queue, *qtop, *qbeg, *qend;
170 struct loop *base_loop, *target_loop;
171 edge e;
173 /* We pass through blocks back-reachable from FROM, testing whether some
174 of their successors moved to outer loop. It may be necessary to
175 iterate several times, but it is finite, as we stop unless we move
176 the basic block up the loop structure. The whole story is a bit
177 more complicated due to presence of subloops, those are moved using
178 fix_loop_placement. */
180 base_loop = from->loop_father;
181 /* If we are already in the outermost loop, the basic blocks cannot be moved
182 outside of it. If FROM is the header of the base loop, it cannot be moved
183 outside of it, either. In both cases, we can end now. */
184 if (base_loop == current_loops->tree_root
185 || from == base_loop->header)
186 return;
188 in_queue = sbitmap_alloc (last_basic_block);
189 sbitmap_zero (in_queue);
190 SET_BIT (in_queue, from->index);
191 /* Prevent us from going out of the base_loop. */
192 SET_BIT (in_queue, base_loop->header->index);
194 queue = XNEWVEC (basic_block, base_loop->num_nodes + 1);
195 qtop = queue + base_loop->num_nodes + 1;
196 qbeg = queue;
197 qend = queue + 1;
198 *qbeg = from;
200 while (qbeg != qend)
202 edge_iterator ei;
203 from = *qbeg;
204 qbeg++;
205 if (qbeg == qtop)
206 qbeg = queue;
207 RESET_BIT (in_queue, from->index);
209 if (from->loop_father->header == from)
211 /* Subloop header, maybe move the loop upward. */
212 if (!fix_loop_placement (from->loop_father))
213 continue;
214 target_loop = loop_outer (from->loop_father);
216 else
218 /* Ordinary basic block. */
219 if (!fix_bb_placement (from))
220 continue;
221 target_loop = from->loop_father;
224 FOR_EACH_EDGE (e, ei, from->succs)
226 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
227 *irred_invalidated = true;
230 /* Something has changed, insert predecessors into queue. */
231 FOR_EACH_EDGE (e, ei, from->preds)
233 basic_block pred = e->src;
234 struct loop *nca;
236 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
237 *irred_invalidated = true;
239 if (TEST_BIT (in_queue, pred->index))
240 continue;
242 /* If it is subloop, then it either was not moved, or
243 the path up the loop tree from base_loop do not contain
244 it. */
245 nca = find_common_loop (pred->loop_father, base_loop);
246 if (pred->loop_father != base_loop
247 && (nca == base_loop
248 || nca != pred->loop_father))
249 pred = pred->loop_father->header;
250 else if (!flow_loop_nested_p (target_loop, pred->loop_father))
252 /* If PRED is already higher in the loop hierarchy than the
253 TARGET_LOOP to that we moved FROM, the change of the position
254 of FROM does not affect the position of PRED, so there is no
255 point in processing it. */
256 continue;
259 if (TEST_BIT (in_queue, pred->index))
260 continue;
262 /* Schedule the basic block. */
263 *qend = pred;
264 qend++;
265 if (qend == qtop)
266 qend = queue;
267 SET_BIT (in_queue, pred->index);
270 free (in_queue);
271 free (queue);
274 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
275 and update loop structures and dominators. Return true if we were able
276 to remove the path, false otherwise (and nothing is affected then). */
277 bool
278 remove_path (edge e)
280 edge ae;
281 basic_block *rem_bbs, *bord_bbs, from, bb;
282 VEC (basic_block, heap) *dom_bbs;
283 int i, nrem, n_bord_bbs;
284 sbitmap seen;
285 bool irred_invalidated = false;
286 edge_iterator ei;
287 struct loop *l, *f;
289 if (!can_remove_branch_p (e))
290 return false;
292 /* Keep track of whether we need to update information about irreducible
293 regions. This is the case if the removed area is a part of the
294 irreducible region, or if the set of basic blocks that belong to a loop
295 that is inside an irreducible region is changed, or if such a loop is
296 removed. */
297 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
298 irred_invalidated = true;
300 /* We need to check whether basic blocks are dominated by the edge
301 e, but we only have basic block dominators. This is easy to
302 fix -- when e->dest has exactly one predecessor, this corresponds
303 to blocks dominated by e->dest, if not, split the edge. */
304 if (!single_pred_p (e->dest))
305 e = single_pred_edge (split_edge (e));
307 /* It may happen that by removing path we remove one or more loops
308 we belong to. In this case first unloop the loops, then proceed
309 normally. We may assume that e->dest is not a header of any loop,
310 as it now has exactly one predecessor. */
311 for (l = e->src->loop_father; loop_outer (l); l = f)
313 f = loop_outer (l);
314 if (dominated_by_p (CDI_DOMINATORS, l->latch, e->dest))
315 unloop (l, &irred_invalidated);
318 /* Identify the path. */
319 nrem = find_path (e, &rem_bbs);
321 n_bord_bbs = 0;
322 bord_bbs = XNEWVEC (basic_block, n_basic_blocks);
323 seen = sbitmap_alloc (last_basic_block);
324 sbitmap_zero (seen);
326 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
327 for (i = 0; i < nrem; i++)
328 SET_BIT (seen, rem_bbs[i]->index);
329 if (!irred_invalidated)
330 FOR_EACH_EDGE (ae, ei, e->src->succs)
331 if (ae != e && ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index)
332 && ae->flags & EDGE_IRREDUCIBLE_LOOP)
333 irred_invalidated = true;
334 for (i = 0; i < nrem; i++)
336 bb = rem_bbs[i];
337 FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
338 if (ae->dest != EXIT_BLOCK_PTR && !TEST_BIT (seen, ae->dest->index))
340 SET_BIT (seen, ae->dest->index);
341 bord_bbs[n_bord_bbs++] = ae->dest;
343 if (ae->flags & EDGE_IRREDUCIBLE_LOOP)
344 irred_invalidated = true;
348 /* Remove the path. */
349 from = e->src;
350 remove_branch (e);
351 dom_bbs = NULL;
353 /* Cancel loops contained in the path. */
354 for (i = 0; i < nrem; i++)
355 if (rem_bbs[i]->loop_father->header == rem_bbs[i])
356 cancel_loop_tree (rem_bbs[i]->loop_father);
358 remove_bbs (rem_bbs, nrem);
359 free (rem_bbs);
361 /* Find blocks whose dominators may be affected. */
362 sbitmap_zero (seen);
363 for (i = 0; i < n_bord_bbs; i++)
365 basic_block ldom;
367 bb = get_immediate_dominator (CDI_DOMINATORS, bord_bbs[i]);
368 if (TEST_BIT (seen, bb->index))
369 continue;
370 SET_BIT (seen, bb->index);
372 for (ldom = first_dom_son (CDI_DOMINATORS, bb);
373 ldom;
374 ldom = next_dom_son (CDI_DOMINATORS, ldom))
375 if (!dominated_by_p (CDI_DOMINATORS, from, ldom))
376 VEC_safe_push (basic_block, heap, dom_bbs, ldom);
379 free (seen);
381 /* Recount dominators. */
382 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, true);
383 VEC_free (basic_block, heap, dom_bbs);
384 free (bord_bbs);
386 /* Fix placements of basic blocks inside loops and the placement of
387 loops in the loop tree. */
388 fix_bb_placements (from, &irred_invalidated);
389 fix_loop_placements (from->loop_father, &irred_invalidated);
391 if (irred_invalidated
392 && loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
393 mark_irreducible_loops ();
395 return true;
398 /* Creates place for a new LOOP in loops structure. */
400 static void
401 place_new_loop (struct loop *loop)
403 loop->num = number_of_loops ();
404 VEC_safe_push (loop_p, gc, current_loops->larray, loop);
407 /* Given LOOP structure with filled header and latch, find the body of the
408 corresponding loop and add it to loops tree. Insert the LOOP as a son of
409 outer. */
411 void
412 add_loop (struct loop *loop, struct loop *outer)
414 basic_block *bbs;
415 int i, n;
416 struct loop *subloop;
417 edge e;
418 edge_iterator ei;
420 /* Add it to loop structure. */
421 place_new_loop (loop);
422 flow_loop_tree_node_add (outer, loop);
424 /* Find its nodes. */
425 bbs = XNEWVEC (basic_block, n_basic_blocks);
426 n = get_loop_body_with_size (loop, bbs, n_basic_blocks);
428 for (i = 0; i < n; i++)
430 if (bbs[i]->loop_father == outer)
432 remove_bb_from_loops (bbs[i]);
433 add_bb_to_loop (bbs[i], loop);
434 continue;
437 loop->num_nodes++;
439 /* If we find a direct subloop of OUTER, move it to LOOP. */
440 subloop = bbs[i]->loop_father;
441 if (loop_outer (subloop) == outer
442 && subloop->header == bbs[i])
444 flow_loop_tree_node_remove (subloop);
445 flow_loop_tree_node_add (loop, subloop);
449 /* Update the information about loop exit edges. */
450 for (i = 0; i < n; i++)
452 FOR_EACH_EDGE (e, ei, bbs[i]->succs)
454 rescan_loop_exit (e, false, false);
458 free (bbs);
461 /* Multiply all frequencies in LOOP by NUM/DEN. */
463 void
464 scale_loop_frequencies (struct loop *loop, int num, int den)
466 basic_block *bbs;
468 bbs = get_loop_body (loop);
469 scale_bbs_frequencies_int (bbs, loop->num_nodes, num, den);
470 free (bbs);
473 /* Multiply all frequencies in LOOP by SCALE/REG_BR_PROB_BASE.
474 If ITERATION_BOUND is non-zero, scale even further if loop is predicted
475 to iterate too many times. */
477 void
478 scale_loop_profile (struct loop *loop, int scale, int iteration_bound)
480 gcov_type iterations = expected_loop_iterations_unbounded (loop);
481 edge e;
482 edge_iterator ei;
484 if (dump_file && (dump_flags & TDF_DETAILS))
485 fprintf (dump_file, ";; Scaling loop %i with scale %f, "
486 "bounding iterations to %i from guessed %i\n",
487 loop->num, (double)scale / REG_BR_PROB_BASE,
488 iteration_bound, (int)iterations);
490 /* See if loop is predicted to iterate too many times. */
491 if (iteration_bound && iterations > 0
492 && RDIV (iterations * scale, REG_BR_PROB_BASE) > iteration_bound)
494 /* Fixing loop profile for different trip count is not trivial; the exit
495 probabilities has to be updated to match and frequencies propagated down
496 to the loop body.
498 We fully update only the simple case of loop with single exit that is
499 either from the latch or BB just before latch and leads from BB with
500 simple conditional jump. This is OK for use in vectorizer. */
501 e = single_exit (loop);
502 if (e)
504 edge other_e;
505 int freq_delta;
506 gcov_type count_delta;
508 FOR_EACH_EDGE (other_e, ei, e->src->succs)
509 if (!(other_e->flags & (EDGE_ABNORMAL | EDGE_FAKE))
510 && e != other_e)
511 break;
513 /* Probability of exit must be 1/iterations. */
514 freq_delta = EDGE_FREQUENCY (e);
515 e->probability = REG_BR_PROB_BASE / iteration_bound;
516 other_e->probability = inverse_probability (e->probability);
517 freq_delta -= EDGE_FREQUENCY (e);
519 /* Adjust counts accordingly. */
520 count_delta = e->count;
521 e->count = apply_probability (e->src->count, e->probability);
522 other_e->count = apply_probability (e->src->count, other_e->probability);
523 count_delta -= e->count;
525 /* If latch exists, change its frequency and count, since we changed
526 probability of exit. Theoretically we should update everything from
527 source of exit edge to latch, but for vectorizer this is enough. */
528 if (loop->latch
529 && loop->latch != e->src)
531 loop->latch->frequency += freq_delta;
532 if (loop->latch->frequency < 0)
533 loop->latch->frequency = 0;
534 loop->latch->count += count_delta;
535 if (loop->latch->count < 0)
536 loop->latch->count = 0;
540 /* Roughly speaking we want to reduce the loop body profile by the
541 the difference of loop iterations. We however can do better if
542 we look at the actual profile, if it is available. */
543 scale = RDIV (iteration_bound * scale, iterations);
544 if (loop->header->count)
546 gcov_type count_in = 0;
548 FOR_EACH_EDGE (e, ei, loop->header->preds)
549 if (e->src != loop->latch)
550 count_in += e->count;
552 if (count_in != 0)
553 scale = RDIV (count_in * iteration_bound * REG_BR_PROB_BASE, loop->header->count);
555 else if (loop->header->frequency)
557 int freq_in = 0;
559 FOR_EACH_EDGE (e, ei, loop->header->preds)
560 if (e->src != loop->latch)
561 freq_in += EDGE_FREQUENCY (e);
563 if (freq_in != 0)
564 scale = RDIV (freq_in * iteration_bound * REG_BR_PROB_BASE, loop->header->frequency);
566 if (!scale)
567 scale = 1;
570 if (scale == REG_BR_PROB_BASE)
571 return;
573 /* Scale the actual probabilities. */
574 scale_loop_frequencies (loop, scale, REG_BR_PROB_BASE);
575 if (dump_file && (dump_flags & TDF_DETAILS))
576 fprintf (dump_file, ";; guessed iterations are now %i\n",
577 (int)expected_loop_iterations_unbounded (loop));
580 /* Recompute dominance information for basic blocks outside LOOP. */
582 static void
583 update_dominators_in_loop (struct loop *loop)
585 VEC (basic_block, heap) *dom_bbs = NULL;
586 sbitmap seen;
587 basic_block *body;
588 unsigned i;
590 seen = sbitmap_alloc (last_basic_block);
591 sbitmap_zero (seen);
592 body = get_loop_body (loop);
594 for (i = 0; i < loop->num_nodes; i++)
595 SET_BIT (seen, body[i]->index);
597 for (i = 0; i < loop->num_nodes; i++)
599 basic_block ldom;
601 for (ldom = first_dom_son (CDI_DOMINATORS, body[i]);
602 ldom;
603 ldom = next_dom_son (CDI_DOMINATORS, ldom))
604 if (!TEST_BIT (seen, ldom->index))
606 SET_BIT (seen, ldom->index);
607 VEC_safe_push (basic_block, heap, dom_bbs, ldom);
611 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false);
612 free (body);
613 free (seen);
614 VEC_free (basic_block, heap, dom_bbs);
617 /* Creates an if region as shown above. CONDITION is used to create
618 the test for the if.
621 | ------------- -------------
622 | | pred_bb | | pred_bb |
623 | ------------- -------------
624 | | |
625 | | | ENTRY_EDGE
626 | | ENTRY_EDGE V
627 | | ====> -------------
628 | | | cond_bb |
629 | | | CONDITION |
630 | | -------------
631 | V / \
632 | ------------- e_false / \ e_true
633 | | succ_bb | V V
634 | ------------- ----------- -----------
635 | | false_bb | | true_bb |
636 | ----------- -----------
637 | \ /
638 | \ /
639 | V V
640 | -------------
641 | | join_bb |
642 | -------------
643 | | exit_edge (result)
645 | -----------
646 | | succ_bb |
647 | -----------
651 edge
652 create_empty_if_region_on_edge (edge entry_edge, tree condition)
655 basic_block cond_bb, true_bb, false_bb, join_bb;
656 edge e_true, e_false, exit_edge;
657 gimple cond_stmt;
658 tree simple_cond;
659 gimple_stmt_iterator gsi;
661 cond_bb = split_edge (entry_edge);
663 /* Insert condition in cond_bb. */
664 gsi = gsi_last_bb (cond_bb);
665 simple_cond =
666 force_gimple_operand_gsi (&gsi, condition, true, NULL,
667 false, GSI_NEW_STMT);
668 cond_stmt = gimple_build_cond_from_tree (simple_cond, NULL_TREE, NULL_TREE);
669 gsi = gsi_last_bb (cond_bb);
670 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
672 join_bb = split_edge (single_succ_edge (cond_bb));
674 e_true = single_succ_edge (cond_bb);
675 true_bb = split_edge (e_true);
677 e_false = make_edge (cond_bb, join_bb, 0);
678 false_bb = split_edge (e_false);
680 e_true->flags &= ~EDGE_FALLTHRU;
681 e_true->flags |= EDGE_TRUE_VALUE;
682 e_false->flags &= ~EDGE_FALLTHRU;
683 e_false->flags |= EDGE_FALSE_VALUE;
685 set_immediate_dominator (CDI_DOMINATORS, cond_bb, entry_edge->src);
686 set_immediate_dominator (CDI_DOMINATORS, true_bb, cond_bb);
687 set_immediate_dominator (CDI_DOMINATORS, false_bb, cond_bb);
688 set_immediate_dominator (CDI_DOMINATORS, join_bb, cond_bb);
690 exit_edge = single_succ_edge (join_bb);
692 if (single_pred_p (exit_edge->dest))
693 set_immediate_dominator (CDI_DOMINATORS, exit_edge->dest, join_bb);
695 return exit_edge;
698 /* create_empty_loop_on_edge
700 | - pred_bb - ------ pred_bb ------
701 | | | | iv0 = initial_value |
702 | -----|----- ---------|-----------
703 | | ______ | entry_edge
704 | | entry_edge / | |
705 | | ====> | -V---V- loop_header -------------
706 | V | | iv_before = phi (iv0, iv_after) |
707 | - succ_bb - | ---|-----------------------------
708 | | | | |
709 | ----------- | ---V--- loop_body ---------------
710 | | | iv_after = iv_before + stride |
711 | | | if (iv_before < upper_bound) |
712 | | ---|--------------\--------------
713 | | | \ exit_e
714 | | V \
715 | | - loop_latch - V- succ_bb -
716 | | | | | |
717 | | /------------- -----------
718 | \ ___ /
720 Creates an empty loop as shown above, the IV_BEFORE is the SSA_NAME
721 that is used before the increment of IV. IV_BEFORE should be used for
722 adding code to the body that uses the IV. OUTER is the outer loop in
723 which the new loop should be inserted.
725 Both INITIAL_VALUE and UPPER_BOUND expressions are gimplified and
726 inserted on the loop entry edge. This implies that this function
727 should be used only when the UPPER_BOUND expression is a loop
728 invariant. */
730 struct loop *
731 create_empty_loop_on_edge (edge entry_edge,
732 tree initial_value,
733 tree stride, tree upper_bound,
734 tree iv,
735 tree *iv_before,
736 tree *iv_after,
737 struct loop *outer)
739 basic_block loop_header, loop_latch, succ_bb, pred_bb;
740 struct loop *loop;
741 gimple_stmt_iterator gsi;
742 gimple_seq stmts;
743 gimple cond_expr;
744 tree exit_test;
745 edge exit_e;
746 int prob;
748 gcc_assert (entry_edge && initial_value && stride && upper_bound && iv);
750 /* Create header, latch and wire up the loop. */
751 pred_bb = entry_edge->src;
752 loop_header = split_edge (entry_edge);
753 loop_latch = split_edge (single_succ_edge (loop_header));
754 succ_bb = single_succ (loop_latch);
755 make_edge (loop_header, succ_bb, 0);
756 redirect_edge_succ_nodup (single_succ_edge (loop_latch), loop_header);
758 /* Set immediate dominator information. */
759 set_immediate_dominator (CDI_DOMINATORS, loop_header, pred_bb);
760 set_immediate_dominator (CDI_DOMINATORS, loop_latch, loop_header);
761 set_immediate_dominator (CDI_DOMINATORS, succ_bb, loop_header);
763 /* Initialize a loop structure and put it in a loop hierarchy. */
764 loop = alloc_loop ();
765 loop->header = loop_header;
766 loop->latch = loop_latch;
767 add_loop (loop, outer);
769 /* TODO: Fix frequencies and counts. */
770 prob = REG_BR_PROB_BASE / 2;
772 scale_loop_frequencies (loop, REG_BR_PROB_BASE - prob, REG_BR_PROB_BASE);
774 /* Update dominators. */
775 update_dominators_in_loop (loop);
777 /* Modify edge flags. */
778 exit_e = single_exit (loop);
779 exit_e->flags = EDGE_LOOP_EXIT | EDGE_FALSE_VALUE;
780 single_pred_edge (loop_latch)->flags = EDGE_TRUE_VALUE;
782 /* Construct IV code in loop. */
783 initial_value = force_gimple_operand (initial_value, &stmts, true, iv);
784 if (stmts)
786 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
787 gsi_commit_edge_inserts ();
790 upper_bound = force_gimple_operand (upper_bound, &stmts, true, NULL);
791 if (stmts)
793 gsi_insert_seq_on_edge (loop_preheader_edge (loop), stmts);
794 gsi_commit_edge_inserts ();
797 gsi = gsi_last_bb (loop_header);
798 create_iv (initial_value, stride, iv, loop, &gsi, false,
799 iv_before, iv_after);
801 /* Insert loop exit condition. */
802 cond_expr = gimple_build_cond
803 (LT_EXPR, *iv_before, upper_bound, NULL_TREE, NULL_TREE);
805 exit_test = gimple_cond_lhs (cond_expr);
806 exit_test = force_gimple_operand_gsi (&gsi, exit_test, true, NULL,
807 false, GSI_NEW_STMT);
808 gimple_cond_set_lhs (cond_expr, exit_test);
809 gsi = gsi_last_bb (exit_e->src);
810 gsi_insert_after (&gsi, cond_expr, GSI_NEW_STMT);
812 split_block_after_labels (loop_header);
814 return loop;
817 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
818 latch to header and update loop tree and dominators
819 accordingly. Everything between them plus LATCH_EDGE destination must
820 be dominated by HEADER_EDGE destination, and back-reachable from
821 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
822 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
823 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
824 Returns the newly created loop. Frequencies and counts in the new loop
825 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
827 struct loop *
828 loopify (edge latch_edge, edge header_edge,
829 basic_block switch_bb, edge true_edge, edge false_edge,
830 bool redirect_all_edges, unsigned true_scale, unsigned false_scale)
832 basic_block succ_bb = latch_edge->dest;
833 basic_block pred_bb = header_edge->src;
834 struct loop *loop = alloc_loop ();
835 struct loop *outer = loop_outer (succ_bb->loop_father);
836 int freq;
837 gcov_type cnt;
838 edge e;
839 edge_iterator ei;
841 loop->header = header_edge->dest;
842 loop->latch = latch_edge->src;
844 freq = EDGE_FREQUENCY (header_edge);
845 cnt = header_edge->count;
847 /* Redirect edges. */
848 loop_redirect_edge (latch_edge, loop->header);
849 loop_redirect_edge (true_edge, succ_bb);
851 /* During loop versioning, one of the switch_bb edge is already properly
852 set. Do not redirect it again unless redirect_all_edges is true. */
853 if (redirect_all_edges)
855 loop_redirect_edge (header_edge, switch_bb);
856 loop_redirect_edge (false_edge, loop->header);
858 /* Update dominators. */
859 set_immediate_dominator (CDI_DOMINATORS, switch_bb, pred_bb);
860 set_immediate_dominator (CDI_DOMINATORS, loop->header, switch_bb);
863 set_immediate_dominator (CDI_DOMINATORS, succ_bb, switch_bb);
865 /* Compute new loop. */
866 add_loop (loop, outer);
868 /* Add switch_bb to appropriate loop. */
869 if (switch_bb->loop_father)
870 remove_bb_from_loops (switch_bb);
871 add_bb_to_loop (switch_bb, outer);
873 /* Fix frequencies. */
874 if (redirect_all_edges)
876 switch_bb->frequency = freq;
877 switch_bb->count = cnt;
878 FOR_EACH_EDGE (e, ei, switch_bb->succs)
880 e->count = RDIV (switch_bb->count * e->probability, REG_BR_PROB_BASE);
883 scale_loop_frequencies (loop, false_scale, REG_BR_PROB_BASE);
884 scale_loop_frequencies (succ_bb->loop_father, true_scale, REG_BR_PROB_BASE);
885 update_dominators_in_loop (loop);
887 return loop;
890 /* Remove the latch edge of a LOOP and update loops to indicate that
891 the LOOP was removed. After this function, original loop latch will
892 have no successor, which caller is expected to fix somehow.
894 If this may cause the information about irreducible regions to become
895 invalid, IRRED_INVALIDATED is set to true. */
897 void
898 unloop (struct loop *loop, bool *irred_invalidated)
900 basic_block *body;
901 struct loop *ploop;
902 unsigned i, n;
903 basic_block latch = loop->latch;
904 bool dummy = false;
906 if (loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP)
907 *irred_invalidated = true;
909 /* This is relatively straightforward. The dominators are unchanged, as
910 loop header dominates loop latch, so the only thing we have to care of
911 is the placement of loops and basic blocks inside the loop tree. We
912 move them all to the loop->outer, and then let fix_bb_placements do
913 its work. */
915 body = get_loop_body (loop);
916 n = loop->num_nodes;
917 for (i = 0; i < n; i++)
918 if (body[i]->loop_father == loop)
920 remove_bb_from_loops (body[i]);
921 add_bb_to_loop (body[i], loop_outer (loop));
923 free(body);
925 while (loop->inner)
927 ploop = loop->inner;
928 flow_loop_tree_node_remove (ploop);
929 flow_loop_tree_node_add (loop_outer (loop), ploop);
932 /* Remove the loop and free its data. */
933 delete_loop (loop);
935 remove_edge (single_succ_edge (latch));
937 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
938 there is an irreducible region inside the cancelled loop, the flags will
939 be still correct. */
940 fix_bb_placements (latch, &dummy);
943 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
944 condition stated in description of fix_loop_placement holds for them.
945 It is used in case when we removed some edges coming out of LOOP, which
946 may cause the right placement of LOOP inside loop tree to change.
948 IRRED_INVALIDATED is set to true if a change in the loop structures might
949 invalidate the information about irreducible regions. */
951 static void
952 fix_loop_placements (struct loop *loop, bool *irred_invalidated)
954 struct loop *outer;
956 while (loop_outer (loop))
958 outer = loop_outer (loop);
959 if (!fix_loop_placement (loop))
960 break;
962 /* Changing the placement of a loop in the loop tree may alter the
963 validity of condition 2) of the description of fix_bb_placement
964 for its preheader, because the successor is the header and belongs
965 to the loop. So call fix_bb_placements to fix up the placement
966 of the preheader and (possibly) of its predecessors. */
967 fix_bb_placements (loop_preheader_edge (loop)->src,
968 irred_invalidated);
969 loop = outer;
973 /* Duplicate loop bounds and other information we store about
974 the loop into its duplicate. */
976 void
977 copy_loop_info (struct loop *loop, struct loop *target)
979 gcc_checking_assert (!target->any_upper_bound && !target->any_estimate);
980 target->any_upper_bound = loop->any_upper_bound;
981 target->nb_iterations_upper_bound = loop->nb_iterations_upper_bound;
982 target->any_estimate = loop->any_estimate;
983 target->nb_iterations_estimate = loop->nb_iterations_estimate;
984 target->estimate_state = loop->estimate_state;
987 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
988 created loop into loops structure. */
989 struct loop *
990 duplicate_loop (struct loop *loop, struct loop *target)
992 struct loop *cloop;
993 cloop = alloc_loop ();
994 place_new_loop (cloop);
996 copy_loop_info (loop, cloop);
998 /* Mark the new loop as copy of LOOP. */
999 set_loop_copy (loop, cloop);
1001 /* Add it to target. */
1002 flow_loop_tree_node_add (target, cloop);
1004 return cloop;
1007 /* Copies structure of subloops of LOOP into TARGET loop, placing
1008 newly created loops into loop tree. */
1009 void
1010 duplicate_subloops (struct loop *loop, struct loop *target)
1012 struct loop *aloop, *cloop;
1014 for (aloop = loop->inner; aloop; aloop = aloop->next)
1016 cloop = duplicate_loop (aloop, target);
1017 duplicate_subloops (aloop, cloop);
1021 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
1022 into TARGET loop, placing newly created loops into loop tree. */
1023 static void
1024 copy_loops_to (struct loop **copied_loops, int n, struct loop *target)
1026 struct loop *aloop;
1027 int i;
1029 for (i = 0; i < n; i++)
1031 aloop = duplicate_loop (copied_loops[i], target);
1032 duplicate_subloops (copied_loops[i], aloop);
1036 /* Redirects edge E to basic block DEST. */
1037 static void
1038 loop_redirect_edge (edge e, basic_block dest)
1040 if (e->dest == dest)
1041 return;
1043 redirect_edge_and_branch_force (e, dest);
1046 /* Check whether LOOP's body can be duplicated. */
1047 bool
1048 can_duplicate_loop_p (const struct loop *loop)
1050 int ret;
1051 basic_block *bbs = get_loop_body (loop);
1053 ret = can_copy_bbs_p (bbs, loop->num_nodes);
1054 free (bbs);
1056 return ret;
1059 /* Sets probability and count of edge E to zero. The probability and count
1060 is redistributed evenly to the remaining edges coming from E->src. */
1062 static void
1063 set_zero_probability (edge e)
1065 basic_block bb = e->src;
1066 edge_iterator ei;
1067 edge ae, last = NULL;
1068 unsigned n = EDGE_COUNT (bb->succs);
1069 gcov_type cnt = e->count, cnt1;
1070 unsigned prob = e->probability, prob1;
1072 gcc_assert (n > 1);
1073 cnt1 = cnt / (n - 1);
1074 prob1 = prob / (n - 1);
1076 FOR_EACH_EDGE (ae, ei, bb->succs)
1078 if (ae == e)
1079 continue;
1081 ae->probability += prob1;
1082 ae->count += cnt1;
1083 last = ae;
1086 /* Move the rest to one of the edges. */
1087 last->probability += prob % (n - 1);
1088 last->count += cnt % (n - 1);
1090 e->probability = 0;
1091 e->count = 0;
1094 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
1095 loop structure and dominators. E's destination must be LOOP header for
1096 this to work, i.e. it must be entry or latch edge of this loop; these are
1097 unique, as the loops must have preheaders for this function to work
1098 correctly (in case E is latch, the function unrolls the loop, if E is entry
1099 edge, it peels the loop). Store edges created by copying ORIG edge from
1100 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
1101 original LOOP body, the other copies are numbered in order given by control
1102 flow through them) into TO_REMOVE array. Returns false if duplication is
1103 impossible. */
1105 bool
1106 duplicate_loop_to_header_edge (struct loop *loop, edge e,
1107 unsigned int ndupl, sbitmap wont_exit,
1108 edge orig, VEC (edge, heap) **to_remove,
1109 int flags)
1111 struct loop *target, *aloop;
1112 struct loop **orig_loops;
1113 unsigned n_orig_loops;
1114 basic_block header = loop->header, latch = loop->latch;
1115 basic_block *new_bbs, *bbs, *first_active;
1116 basic_block new_bb, bb, first_active_latch = NULL;
1117 edge ae, latch_edge;
1118 edge spec_edges[2], new_spec_edges[2];
1119 #define SE_LATCH 0
1120 #define SE_ORIG 1
1121 unsigned i, j, n;
1122 int is_latch = (latch == e->src);
1123 int scale_act = 0, *scale_step = NULL, scale_main = 0;
1124 int scale_after_exit = 0;
1125 int p, freq_in, freq_le, freq_out_orig;
1126 int prob_pass_thru, prob_pass_wont_exit, prob_pass_main;
1127 int add_irreducible_flag;
1128 basic_block place_after;
1129 bitmap bbs_to_scale = NULL;
1130 bitmap_iterator bi;
1132 gcc_assert (e->dest == loop->header);
1133 gcc_assert (ndupl > 0);
1135 if (orig)
1137 /* Orig must be edge out of the loop. */
1138 gcc_assert (flow_bb_inside_loop_p (loop, orig->src));
1139 gcc_assert (!flow_bb_inside_loop_p (loop, orig->dest));
1142 n = loop->num_nodes;
1143 bbs = get_loop_body_in_dom_order (loop);
1144 gcc_assert (bbs[0] == loop->header);
1145 gcc_assert (bbs[n - 1] == loop->latch);
1147 /* Check whether duplication is possible. */
1148 if (!can_copy_bbs_p (bbs, loop->num_nodes))
1150 free (bbs);
1151 return false;
1153 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
1155 /* In case we are doing loop peeling and the loop is in the middle of
1156 irreducible region, the peeled copies will be inside it too. */
1157 add_irreducible_flag = e->flags & EDGE_IRREDUCIBLE_LOOP;
1158 gcc_assert (!is_latch || !add_irreducible_flag);
1160 /* Find edge from latch. */
1161 latch_edge = loop_latch_edge (loop);
1163 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1165 /* Calculate coefficients by that we have to scale frequencies
1166 of duplicated loop bodies. */
1167 freq_in = header->frequency;
1168 freq_le = EDGE_FREQUENCY (latch_edge);
1169 if (freq_in == 0)
1170 freq_in = 1;
1171 if (freq_in < freq_le)
1172 freq_in = freq_le;
1173 freq_out_orig = orig ? EDGE_FREQUENCY (orig) : freq_in - freq_le;
1174 if (freq_out_orig > freq_in - freq_le)
1175 freq_out_orig = freq_in - freq_le;
1176 prob_pass_thru = RDIV (REG_BR_PROB_BASE * freq_le, freq_in);
1177 prob_pass_wont_exit =
1178 RDIV (REG_BR_PROB_BASE * (freq_le + freq_out_orig), freq_in);
1180 if (orig
1181 && REG_BR_PROB_BASE - orig->probability != 0)
1183 /* The blocks that are dominated by a removed exit edge ORIG have
1184 frequencies scaled by this. */
1185 scale_after_exit = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE,
1186 REG_BR_PROB_BASE - orig->probability);
1187 bbs_to_scale = BITMAP_ALLOC (NULL);
1188 for (i = 0; i < n; i++)
1190 if (bbs[i] != orig->src
1191 && dominated_by_p (CDI_DOMINATORS, bbs[i], orig->src))
1192 bitmap_set_bit (bbs_to_scale, i);
1196 scale_step = XNEWVEC (int, ndupl);
1198 for (i = 1; i <= ndupl; i++)
1199 scale_step[i - 1] = TEST_BIT (wont_exit, i)
1200 ? prob_pass_wont_exit
1201 : prob_pass_thru;
1203 /* Complete peeling is special as the probability of exit in last
1204 copy becomes 1. */
1205 if (flags & DLTHE_FLAG_COMPLETTE_PEEL)
1207 int wanted_freq = EDGE_FREQUENCY (e);
1209 if (wanted_freq > freq_in)
1210 wanted_freq = freq_in;
1212 gcc_assert (!is_latch);
1213 /* First copy has frequency of incoming edge. Each subsequent
1214 frequency should be reduced by prob_pass_wont_exit. Caller
1215 should've managed the flags so all except for original loop
1216 has won't exist set. */
1217 scale_act = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
1218 /* Now simulate the duplication adjustments and compute header
1219 frequency of the last copy. */
1220 for (i = 0; i < ndupl; i++)
1221 wanted_freq = RDIV (wanted_freq * scale_step[i], REG_BR_PROB_BASE);
1222 scale_main = RDIV (wanted_freq * REG_BR_PROB_BASE, freq_in);
1224 else if (is_latch)
1226 prob_pass_main = TEST_BIT (wont_exit, 0)
1227 ? prob_pass_wont_exit
1228 : prob_pass_thru;
1229 p = prob_pass_main;
1230 scale_main = REG_BR_PROB_BASE;
1231 for (i = 0; i < ndupl; i++)
1233 scale_main += p;
1234 p = RDIV (p * scale_step[i], REG_BR_PROB_BASE);
1236 scale_main = RDIV (REG_BR_PROB_BASE * REG_BR_PROB_BASE, scale_main);
1237 scale_act = RDIV (scale_main * prob_pass_main, REG_BR_PROB_BASE);
1239 else
1241 scale_main = REG_BR_PROB_BASE;
1242 for (i = 0; i < ndupl; i++)
1243 scale_main = RDIV (scale_main * scale_step[i], REG_BR_PROB_BASE);
1244 scale_act = REG_BR_PROB_BASE - prob_pass_thru;
1246 for (i = 0; i < ndupl; i++)
1247 gcc_assert (scale_step[i] >= 0 && scale_step[i] <= REG_BR_PROB_BASE);
1248 gcc_assert (scale_main >= 0 && scale_main <= REG_BR_PROB_BASE
1249 && scale_act >= 0 && scale_act <= REG_BR_PROB_BASE);
1252 /* Loop the new bbs will belong to. */
1253 target = e->src->loop_father;
1255 /* Original loops. */
1256 n_orig_loops = 0;
1257 for (aloop = loop->inner; aloop; aloop = aloop->next)
1258 n_orig_loops++;
1259 orig_loops = XNEWVEC (struct loop *, n_orig_loops);
1260 for (aloop = loop->inner, i = 0; aloop; aloop = aloop->next, i++)
1261 orig_loops[i] = aloop;
1263 set_loop_copy (loop, target);
1265 first_active = XNEWVEC (basic_block, n);
1266 if (is_latch)
1268 memcpy (first_active, bbs, n * sizeof (basic_block));
1269 first_active_latch = latch;
1272 spec_edges[SE_ORIG] = orig;
1273 spec_edges[SE_LATCH] = latch_edge;
1275 place_after = e->src;
1276 for (j = 0; j < ndupl; j++)
1278 /* Copy loops. */
1279 copy_loops_to (orig_loops, n_orig_loops, target);
1281 /* Copy bbs. */
1282 copy_bbs (bbs, n, new_bbs, spec_edges, 2, new_spec_edges, loop,
1283 place_after);
1284 place_after = new_spec_edges[SE_LATCH]->src;
1286 if (flags & DLTHE_RECORD_COPY_NUMBER)
1287 for (i = 0; i < n; i++)
1289 gcc_assert (!new_bbs[i]->aux);
1290 new_bbs[i]->aux = (void *)(size_t)(j + 1);
1293 /* Note whether the blocks and edges belong to an irreducible loop. */
1294 if (add_irreducible_flag)
1296 for (i = 0; i < n; i++)
1297 new_bbs[i]->flags |= BB_DUPLICATED;
1298 for (i = 0; i < n; i++)
1300 edge_iterator ei;
1301 new_bb = new_bbs[i];
1302 if (new_bb->loop_father == target)
1303 new_bb->flags |= BB_IRREDUCIBLE_LOOP;
1305 FOR_EACH_EDGE (ae, ei, new_bb->succs)
1306 if ((ae->dest->flags & BB_DUPLICATED)
1307 && (ae->src->loop_father == target
1308 || ae->dest->loop_father == target))
1309 ae->flags |= EDGE_IRREDUCIBLE_LOOP;
1311 for (i = 0; i < n; i++)
1312 new_bbs[i]->flags &= ~BB_DUPLICATED;
1315 /* Redirect the special edges. */
1316 if (is_latch)
1318 redirect_edge_and_branch_force (latch_edge, new_bbs[0]);
1319 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1320 loop->header);
1321 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], latch);
1322 latch = loop->latch = new_bbs[n - 1];
1323 e = latch_edge = new_spec_edges[SE_LATCH];
1325 else
1327 redirect_edge_and_branch_force (new_spec_edges[SE_LATCH],
1328 loop->header);
1329 redirect_edge_and_branch_force (e, new_bbs[0]);
1330 set_immediate_dominator (CDI_DOMINATORS, new_bbs[0], e->src);
1331 e = new_spec_edges[SE_LATCH];
1334 /* Record exit edge in this copy. */
1335 if (orig && TEST_BIT (wont_exit, j + 1))
1337 if (to_remove)
1338 VEC_safe_push (edge, heap, *to_remove, new_spec_edges[SE_ORIG]);
1339 set_zero_probability (new_spec_edges[SE_ORIG]);
1341 /* Scale the frequencies of the blocks dominated by the exit. */
1342 if (bbs_to_scale)
1344 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1346 scale_bbs_frequencies_int (new_bbs + i, 1, scale_after_exit,
1347 REG_BR_PROB_BASE);
1352 /* Record the first copy in the control flow order if it is not
1353 the original loop (i.e. in case of peeling). */
1354 if (!first_active_latch)
1356 memcpy (first_active, new_bbs, n * sizeof (basic_block));
1357 first_active_latch = new_bbs[n - 1];
1360 /* Set counts and frequencies. */
1361 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1363 scale_bbs_frequencies_int (new_bbs, n, scale_act, REG_BR_PROB_BASE);
1364 scale_act = RDIV (scale_act * scale_step[j], REG_BR_PROB_BASE);
1367 free (new_bbs);
1368 free (orig_loops);
1370 /* Record the exit edge in the original loop body, and update the frequencies. */
1371 if (orig && TEST_BIT (wont_exit, 0))
1373 if (to_remove)
1374 VEC_safe_push (edge, heap, *to_remove, orig);
1375 set_zero_probability (orig);
1377 /* Scale the frequencies of the blocks dominated by the exit. */
1378 if (bbs_to_scale)
1380 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale, 0, i, bi)
1382 scale_bbs_frequencies_int (bbs + i, 1, scale_after_exit,
1383 REG_BR_PROB_BASE);
1388 /* Update the original loop. */
1389 if (!is_latch)
1390 set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src);
1391 if (flags & DLTHE_FLAG_UPDATE_FREQ)
1393 scale_bbs_frequencies_int (bbs, n, scale_main, REG_BR_PROB_BASE);
1394 free (scale_step);
1397 /* Update dominators of outer blocks if affected. */
1398 for (i = 0; i < n; i++)
1400 basic_block dominated, dom_bb;
1401 VEC (basic_block, heap) *dom_bbs;
1402 unsigned j;
1404 bb = bbs[i];
1405 bb->aux = 0;
1407 dom_bbs = get_dominated_by (CDI_DOMINATORS, bb);
1408 FOR_EACH_VEC_ELT (basic_block, dom_bbs, j, dominated)
1410 if (flow_bb_inside_loop_p (loop, dominated))
1411 continue;
1412 dom_bb = nearest_common_dominator (
1413 CDI_DOMINATORS, first_active[i], first_active_latch);
1414 set_immediate_dominator (CDI_DOMINATORS, dominated, dom_bb);
1416 VEC_free (basic_block, heap, dom_bbs);
1418 free (first_active);
1420 free (bbs);
1421 BITMAP_FREE (bbs_to_scale);
1423 return true;
1426 /* A callback for make_forwarder block, to redirect all edges except for
1427 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1428 whether to redirect it. */
1430 edge mfb_kj_edge;
1431 bool
1432 mfb_keep_just (edge e)
1434 return e != mfb_kj_edge;
1437 /* True when a candidate preheader BLOCK has predecessors from LOOP. */
1439 static bool
1440 has_preds_from_loop (basic_block block, struct loop *loop)
1442 edge e;
1443 edge_iterator ei;
1445 FOR_EACH_EDGE (e, ei, block->preds)
1446 if (e->src->loop_father == loop)
1447 return true;
1448 return false;
1451 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1452 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1453 entry; otherwise we also force preheader block to have only one successor.
1454 When CP_FALLTHRU_PREHEADERS is set in FLAGS, we force the preheader block
1455 to be a fallthru predecessor to the loop header and to have only
1456 predecessors from outside of the loop.
1457 The function also updates dominators. */
1459 basic_block
1460 create_preheader (struct loop *loop, int flags)
1462 edge e, fallthru;
1463 basic_block dummy;
1464 int nentry = 0;
1465 bool irred = false;
1466 bool latch_edge_was_fallthru;
1467 edge one_succ_pred = NULL, single_entry = NULL;
1468 edge_iterator ei;
1470 FOR_EACH_EDGE (e, ei, loop->header->preds)
1472 if (e->src == loop->latch)
1473 continue;
1474 irred |= (e->flags & EDGE_IRREDUCIBLE_LOOP) != 0;
1475 nentry++;
1476 single_entry = e;
1477 if (single_succ_p (e->src))
1478 one_succ_pred = e;
1480 gcc_assert (nentry);
1481 if (nentry == 1)
1483 bool need_forwarder_block = false;
1485 /* We do not allow entry block to be the loop preheader, since we
1486 cannot emit code there. */
1487 if (single_entry->src == ENTRY_BLOCK_PTR)
1488 need_forwarder_block = true;
1489 else
1491 /* If we want simple preheaders, also force the preheader to have
1492 just a single successor. */
1493 if ((flags & CP_SIMPLE_PREHEADERS)
1494 && !single_succ_p (single_entry->src))
1495 need_forwarder_block = true;
1496 /* If we want fallthru preheaders, also create forwarder block when
1497 preheader ends with a jump or has predecessors from loop. */
1498 else if ((flags & CP_FALLTHRU_PREHEADERS)
1499 && (JUMP_P (BB_END (single_entry->src))
1500 || has_preds_from_loop (single_entry->src, loop)))
1501 need_forwarder_block = true;
1503 if (! need_forwarder_block)
1504 return NULL;
1507 mfb_kj_edge = loop_latch_edge (loop);
1508 latch_edge_was_fallthru = (mfb_kj_edge->flags & EDGE_FALLTHRU) != 0;
1509 fallthru = make_forwarder_block (loop->header, mfb_keep_just, NULL);
1510 dummy = fallthru->src;
1511 loop->header = fallthru->dest;
1513 /* Try to be clever in placing the newly created preheader. The idea is to
1514 avoid breaking any "fallthruness" relationship between blocks.
1516 The preheader was created just before the header and all incoming edges
1517 to the header were redirected to the preheader, except the latch edge.
1518 So the only problematic case is when this latch edge was a fallthru
1519 edge: it is not anymore after the preheader creation so we have broken
1520 the fallthruness. We're therefore going to look for a better place. */
1521 if (latch_edge_was_fallthru)
1523 if (one_succ_pred)
1524 e = one_succ_pred;
1525 else
1526 e = EDGE_PRED (dummy, 0);
1528 move_block_after (dummy, e->src);
1531 if (irred)
1533 dummy->flags |= BB_IRREDUCIBLE_LOOP;
1534 single_succ_edge (dummy)->flags |= EDGE_IRREDUCIBLE_LOOP;
1537 if (dump_file)
1538 fprintf (dump_file, "Created preheader block for loop %i\n",
1539 loop->num);
1541 if (flags & CP_FALLTHRU_PREHEADERS)
1542 gcc_assert ((single_succ_edge (dummy)->flags & EDGE_FALLTHRU)
1543 && !JUMP_P (BB_END (dummy)));
1545 return dummy;
1548 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1550 void
1551 create_preheaders (int flags)
1553 loop_iterator li;
1554 struct loop *loop;
1556 if (!current_loops)
1557 return;
1559 FOR_EACH_LOOP (li, loop, 0)
1560 create_preheader (loop, flags);
1561 loops_state_set (LOOPS_HAVE_PREHEADERS);
1564 /* Forces all loop latches to have only single successor. */
1566 void
1567 force_single_succ_latches (void)
1569 loop_iterator li;
1570 struct loop *loop;
1571 edge e;
1573 FOR_EACH_LOOP (li, loop, 0)
1575 if (loop->latch != loop->header && single_succ_p (loop->latch))
1576 continue;
1578 e = find_edge (loop->latch, loop->header);
1580 split_edge (e);
1582 loops_state_set (LOOPS_HAVE_SIMPLE_LATCHES);
1585 /* This function is called from loop_version. It splits the entry edge
1586 of the loop we want to version, adds the versioning condition, and
1587 adjust the edges to the two versions of the loop appropriately.
1588 e is an incoming edge. Returns the basic block containing the
1589 condition.
1591 --- edge e ---- > [second_head]
1593 Split it and insert new conditional expression and adjust edges.
1595 --- edge e ---> [cond expr] ---> [first_head]
1597 +---------> [second_head]
1599 THEN_PROB is the probability of then branch of the condition. */
1601 static basic_block
1602 lv_adjust_loop_entry_edge (basic_block first_head, basic_block second_head,
1603 edge e, void *cond_expr, unsigned then_prob)
1605 basic_block new_head = NULL;
1606 edge e1;
1608 gcc_assert (e->dest == second_head);
1610 /* Split edge 'e'. This will create a new basic block, where we can
1611 insert conditional expr. */
1612 new_head = split_edge (e);
1614 lv_add_condition_to_bb (first_head, second_head, new_head,
1615 cond_expr);
1617 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1618 e = single_succ_edge (new_head);
1619 e1 = make_edge (new_head, first_head,
1620 current_ir_type () == IR_GIMPLE ? EDGE_TRUE_VALUE : 0);
1621 e1->probability = then_prob;
1622 e->probability = REG_BR_PROB_BASE - then_prob;
1623 e1->count = RDIV (e->count * e1->probability, REG_BR_PROB_BASE);
1624 e->count = RDIV (e->count * e->probability, REG_BR_PROB_BASE);
1626 set_immediate_dominator (CDI_DOMINATORS, first_head, new_head);
1627 set_immediate_dominator (CDI_DOMINATORS, second_head, new_head);
1629 /* Adjust loop header phi nodes. */
1630 lv_adjust_loop_header_phi (first_head, second_head, new_head, e1);
1632 return new_head;
1635 /* Main entry point for Loop Versioning transformation.
1637 This transformation given a condition and a loop, creates
1638 -if (condition) { loop_copy1 } else { loop_copy2 },
1639 where loop_copy1 is the loop transformed in one way, and loop_copy2
1640 is the loop transformed in another way (or unchanged). 'condition'
1641 may be a run time test for things that were not resolved by static
1642 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1644 THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1645 is the ratio by that the frequencies in the original loop should
1646 be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1647 new loop should be scaled.
1649 If PLACE_AFTER is true, we place the new loop after LOOP in the
1650 instruction stream, otherwise it is placed before LOOP. */
1652 struct loop *
1653 loop_version (struct loop *loop,
1654 void *cond_expr, basic_block *condition_bb,
1655 unsigned then_prob, unsigned then_scale, unsigned else_scale,
1656 bool place_after)
1658 basic_block first_head, second_head;
1659 edge entry, latch_edge, true_edge, false_edge;
1660 int irred_flag;
1661 struct loop *nloop;
1662 basic_block cond_bb;
1664 /* Record entry and latch edges for the loop */
1665 entry = loop_preheader_edge (loop);
1666 irred_flag = entry->flags & EDGE_IRREDUCIBLE_LOOP;
1667 entry->flags &= ~EDGE_IRREDUCIBLE_LOOP;
1669 /* Note down head of loop as first_head. */
1670 first_head = entry->dest;
1672 /* Duplicate loop. */
1673 if (!cfg_hook_duplicate_loop_to_header_edge (loop, entry, 1,
1674 NULL, NULL, NULL, 0))
1676 entry->flags |= irred_flag;
1677 return NULL;
1680 /* After duplication entry edge now points to new loop head block.
1681 Note down new head as second_head. */
1682 second_head = entry->dest;
1684 /* Split loop entry edge and insert new block with cond expr. */
1685 cond_bb = lv_adjust_loop_entry_edge (first_head, second_head,
1686 entry, cond_expr, then_prob);
1687 if (condition_bb)
1688 *condition_bb = cond_bb;
1690 if (!cond_bb)
1692 entry->flags |= irred_flag;
1693 return NULL;
1696 latch_edge = single_succ_edge (get_bb_copy (loop->latch));
1698 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1699 nloop = loopify (latch_edge,
1700 single_pred_edge (get_bb_copy (loop->header)),
1701 cond_bb, true_edge, false_edge,
1702 false /* Do not redirect all edges. */,
1703 then_scale, else_scale);
1705 copy_loop_info (loop, nloop);
1707 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1708 lv_flush_pending_stmts (latch_edge);
1710 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1711 extract_cond_bb_edges (cond_bb, &true_edge, &false_edge);
1712 lv_flush_pending_stmts (false_edge);
1713 /* Adjust irreducible flag. */
1714 if (irred_flag)
1716 cond_bb->flags |= BB_IRREDUCIBLE_LOOP;
1717 loop_preheader_edge (loop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1718 loop_preheader_edge (nloop)->flags |= EDGE_IRREDUCIBLE_LOOP;
1719 single_pred_edge (cond_bb)->flags |= EDGE_IRREDUCIBLE_LOOP;
1722 if (place_after)
1724 basic_block *bbs = get_loop_body_in_dom_order (nloop), after;
1725 unsigned i;
1727 after = loop->latch;
1729 for (i = 0; i < nloop->num_nodes; i++)
1731 move_block_after (bbs[i], after);
1732 after = bbs[i];
1734 free (bbs);
1737 /* At this point condition_bb is loop preheader with two successors,
1738 first_head and second_head. Make sure that loop preheader has only
1739 one successor. */
1740 split_edge (loop_preheader_edge (loop));
1741 split_edge (loop_preheader_edge (nloop));
1743 return nloop;
1746 /* The structure of loops might have changed. Some loops might get removed
1747 (and their headers and latches were set to NULL), loop exists might get
1748 removed (thus the loop nesting may be wrong), and some blocks and edges
1749 were changed (so the information about bb --> loop mapping does not have
1750 to be correct). But still for the remaining loops the header dominates
1751 the latch, and loops did not get new subloops (new loops might possibly
1752 get created, but we are not interested in them). Fix up the mess.
1754 If CHANGED_BBS is not NULL, basic blocks whose loop has changed are
1755 marked in it. */
1757 void
1758 fix_loop_structure (bitmap changed_bbs)
1760 basic_block bb;
1761 struct loop *loop, *ploop;
1762 loop_iterator li;
1763 bool record_exits = false;
1764 struct loop **superloop = XNEWVEC (struct loop *, number_of_loops ());
1766 /* We need exact and fast dominance info to be available. */
1767 gcc_assert (dom_info_state (CDI_DOMINATORS) == DOM_OK);
1769 /* Remove the old bb -> loop mapping. Remember the depth of the blocks in
1770 the loop hierarchy, so that we can recognize blocks whose loop nesting
1771 relationship has changed. */
1772 FOR_EACH_BB (bb)
1774 if (changed_bbs)
1775 bb->aux = (void *) (size_t) loop_depth (bb->loop_father);
1776 bb->loop_father = current_loops->tree_root;
1779 if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
1781 release_recorded_exits ();
1782 record_exits = true;
1785 /* Remove the dead loops from structures. We start from the innermost
1786 loops, so that when we remove the loops, we know that the loops inside
1787 are preserved, and do not waste time relinking loops that will be
1788 removed later. */
1789 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
1791 if (loop->header)
1792 continue;
1794 while (loop->inner)
1796 ploop = loop->inner;
1797 flow_loop_tree_node_remove (ploop);
1798 flow_loop_tree_node_add (loop_outer (loop), ploop);
1801 /* Remove the loop and free its data. */
1802 delete_loop (loop);
1805 /* Rescan the bodies of loops, starting from the outermost ones. We assume
1806 that no optimization interchanges the order of the loops, i.e., it cannot
1807 happen that L1 was superloop of L2 before and it is subloop of L2 now
1808 (without explicitly updating loop information). At the same time, we also
1809 determine the new loop structure. */
1810 current_loops->tree_root->num_nodes = n_basic_blocks;
1811 FOR_EACH_LOOP (li, loop, 0)
1813 superloop[loop->num] = loop->header->loop_father;
1814 loop->num_nodes = flow_loop_nodes_find (loop->header, loop);
1817 /* Now fix the loop nesting. */
1818 FOR_EACH_LOOP (li, loop, 0)
1820 ploop = superloop[loop->num];
1821 if (ploop != loop_outer (loop))
1823 flow_loop_tree_node_remove (loop);
1824 flow_loop_tree_node_add (ploop, loop);
1827 free (superloop);
1829 /* Mark the blocks whose loop has changed. */
1830 if (changed_bbs)
1832 FOR_EACH_BB (bb)
1834 if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux)
1835 bitmap_set_bit (changed_bbs, bb->index);
1837 bb->aux = NULL;
1841 if (loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
1842 create_preheaders (CP_SIMPLE_PREHEADERS);
1844 if (loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
1845 force_single_succ_latches ();
1847 if (loops_state_satisfies_p (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS))
1848 mark_irreducible_loops ();
1850 if (record_exits)
1851 record_loop_exits ();
1853 loops_state_clear (LOOPS_NEED_FIXUP);
1855 #ifdef ENABLE_CHECKING
1856 verify_loop_structure ();
1857 #endif