1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
28 #include "basic-block.h"
30 #include "cfglayout.h"
34 static void duplicate_subloops (struct loops
*, struct loop
*, struct loop
*);
35 static void copy_loops_to (struct loops
*, struct loop
**, int,
37 static void loop_redirect_edge (edge
, basic_block
);
38 static bool loop_delete_branch_edge (edge
, int);
39 static void remove_bbs (basic_block
*, int);
40 static bool rpe_enum_p (basic_block
, void *);
41 static int find_path (edge
, basic_block
**);
42 static bool alp_enum_p (basic_block
, void *);
43 static void fix_loop_placements (struct loops
*, struct loop
*, bool *);
44 static bool fix_bb_placement (struct loops
*, basic_block
);
45 static void fix_bb_placements (struct loops
*, basic_block
, bool *);
46 static void place_new_loop (struct loops
*, struct loop
*);
47 static void scale_loop_frequencies (struct loop
*, int, int);
48 static basic_block
create_preheader (struct loop
*, int);
49 static void unloop (struct loops
*, struct loop
*, bool *);
51 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
53 /* Checks whether basic block BB is dominated by DATA. */
55 rpe_enum_p (basic_block bb
, void *data
)
57 return dominated_by_p (CDI_DOMINATORS
, bb
, data
);
60 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
63 remove_bbs (basic_block
*bbs
, int nbbs
)
67 for (i
= 0; i
< nbbs
; i
++)
68 delete_basic_block (bbs
[i
]);
71 /* Find path -- i.e. the basic blocks dominated by edge E and put them
72 into array BBS, that will be allocated large enough to contain them.
73 E->dest must have exactly one predecessor for this to work (it is
74 easy to achieve and we do not put it here because we do not want to
75 alter anything by this function). The number of basic blocks in the
78 find_path (edge e
, basic_block
**bbs
)
80 gcc_assert (EDGE_COUNT (e
->dest
->preds
) <= 1);
82 /* Find bbs in the path. */
83 *bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
84 return dfs_enumerate_from (e
->dest
, 0, rpe_enum_p
, *bbs
,
85 n_basic_blocks
, e
->dest
);
88 /* Fix placement of basic block BB inside loop hierarchy stored in LOOPS --
89 Let L be a loop to that BB belongs. Then every successor of BB must either
90 1) belong to some superloop of loop L, or
91 2) be a header of loop K such that K->outer is superloop of L
92 Returns true if we had to move BB into other loop to enforce this condition,
93 false if the placement of BB was already correct (provided that placements
94 of its successors are correct). */
96 fix_bb_placement (struct loops
*loops
, basic_block bb
)
100 struct loop
*loop
= loops
->tree_root
, *act
;
102 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
104 if (e
->dest
== EXIT_BLOCK_PTR
)
107 act
= e
->dest
->loop_father
;
108 if (act
->header
== e
->dest
)
111 if (flow_loop_nested_p (loop
, act
))
115 if (loop
== bb
->loop_father
)
118 remove_bb_from_loops (bb
);
119 add_bb_to_loop (bb
, loop
);
124 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
125 enforce condition condition stated in description of fix_bb_placement. We
126 start from basic block FROM that had some of its successors removed, so that
127 his placement no longer has to be correct, and iteratively fix placement of
128 its predecessors that may change if placement of FROM changed. Also fix
129 placement of subloops of FROM->loop_father, that might also be altered due
130 to this change; the condition for them is similar, except that instead of
131 successors we consider edges coming out of the loops.
133 If the changes may invalidate the information about irreducible regions,
134 IRRED_INVALIDATED is set to true. */
137 fix_bb_placements (struct loops
*loops
, basic_block from
,
138 bool *irred_invalidated
)
141 basic_block
*queue
, *qtop
, *qbeg
, *qend
;
142 struct loop
*base_loop
;
145 /* We pass through blocks back-reachable from FROM, testing whether some
146 of their successors moved to outer loop. It may be necessary to
147 iterate several times, but it is finite, as we stop unless we move
148 the basic block up the loop structure. The whole story is a bit
149 more complicated due to presence of subloops, those are moved using
150 fix_loop_placement. */
152 base_loop
= from
->loop_father
;
153 if (base_loop
== loops
->tree_root
)
156 in_queue
= sbitmap_alloc (last_basic_block
);
157 sbitmap_zero (in_queue
);
158 SET_BIT (in_queue
, from
->index
);
159 /* Prevent us from going out of the base_loop. */
160 SET_BIT (in_queue
, base_loop
->header
->index
);
162 queue
= XNEWVEC (basic_block
, base_loop
->num_nodes
+ 1);
163 qtop
= queue
+ base_loop
->num_nodes
+ 1;
175 RESET_BIT (in_queue
, from
->index
);
177 if (from
->loop_father
->header
== from
)
179 /* Subloop header, maybe move the loop upward. */
180 if (!fix_loop_placement (from
->loop_father
))
185 /* Ordinary basic block. */
186 if (!fix_bb_placement (loops
, from
))
190 FOR_EACH_EDGE (e
, ei
, from
->succs
)
192 if (e
->flags
& EDGE_IRREDUCIBLE_LOOP
)
193 *irred_invalidated
= true;
196 /* Something has changed, insert predecessors into queue. */
197 FOR_EACH_EDGE (e
, ei
, from
->preds
)
199 basic_block pred
= e
->src
;
202 if (e
->flags
& EDGE_IRREDUCIBLE_LOOP
)
203 *irred_invalidated
= true;
205 if (TEST_BIT (in_queue
, pred
->index
))
208 /* If it is subloop, then it either was not moved, or
209 the path up the loop tree from base_loop do not contain
211 nca
= find_common_loop (pred
->loop_father
, base_loop
);
212 if (pred
->loop_father
!= base_loop
214 || nca
!= pred
->loop_father
))
215 pred
= pred
->loop_father
->header
;
216 else if (!flow_loop_nested_p (from
->loop_father
, pred
->loop_father
))
218 /* No point in processing it. */
222 if (TEST_BIT (in_queue
, pred
->index
))
225 /* Schedule the basic block. */
230 SET_BIT (in_queue
, pred
->index
);
237 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
238 and update loop structure stored in LOOPS and dominators. Return true if
239 we were able to remove the path, false otherwise (and nothing is affected
242 remove_path (struct loops
*loops
, edge e
)
245 basic_block
*rem_bbs
, *bord_bbs
, *dom_bbs
, from
, bb
;
246 int i
, nrem
, n_bord_bbs
, n_dom_bbs
, nreml
;
248 bool deleted
, irred_invalidated
= false;
249 struct loop
**deleted_loop
;
251 if (!loop_delete_branch_edge (e
, 0))
254 /* Keep track of whether we need to update information about irreducible
255 regions. This is the case if the removed area is a part of the
256 irreducible region, or if the set of basic blocks that belong to a loop
257 that is inside an irreducible region is changed, or if such a loop is
259 if (e
->flags
& EDGE_IRREDUCIBLE_LOOP
)
260 irred_invalidated
= true;
262 /* We need to check whether basic blocks are dominated by the edge
263 e, but we only have basic block dominators. This is easy to
264 fix -- when e->dest has exactly one predecessor, this corresponds
265 to blocks dominated by e->dest, if not, split the edge. */
266 if (!single_pred_p (e
->dest
))
267 e
= single_pred_edge (split_edge (e
));
269 /* It may happen that by removing path we remove one or more loops
270 we belong to. In this case first unloop the loops, then proceed
271 normally. We may assume that e->dest is not a header of any loop,
272 as it now has exactly one predecessor. */
273 while (e
->src
->loop_father
->outer
274 && dominated_by_p (CDI_DOMINATORS
,
275 e
->src
->loop_father
->latch
, e
->dest
))
276 unloop (loops
, e
->src
->loop_father
, &irred_invalidated
);
278 /* Identify the path. */
279 nrem
= find_path (e
, &rem_bbs
);
282 bord_bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
283 seen
= sbitmap_alloc (last_basic_block
);
286 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
287 for (i
= 0; i
< nrem
; i
++)
288 SET_BIT (seen
, rem_bbs
[i
]->index
);
289 for (i
= 0; i
< nrem
; i
++)
293 FOR_EACH_EDGE (ae
, ei
, rem_bbs
[i
]->succs
)
294 if (ae
->dest
!= EXIT_BLOCK_PTR
&& !TEST_BIT (seen
, ae
->dest
->index
))
296 SET_BIT (seen
, ae
->dest
->index
);
297 bord_bbs
[n_bord_bbs
++] = ae
->dest
;
299 if (ae
->flags
& EDGE_IRREDUCIBLE_LOOP
)
300 irred_invalidated
= true;
304 /* Remove the path. */
306 deleted
= loop_delete_branch_edge (e
, 1);
307 gcc_assert (deleted
);
308 dom_bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
310 /* Cancel loops contained in the path. */
311 deleted_loop
= XNEWVEC (struct loop
*, nrem
);
313 for (i
= 0; i
< nrem
; i
++)
314 if (rem_bbs
[i
]->loop_father
->header
== rem_bbs
[i
])
315 deleted_loop
[nreml
++] = rem_bbs
[i
]->loop_father
;
317 remove_bbs (rem_bbs
, nrem
);
320 for (i
= 0; i
< nreml
; i
++)
321 cancel_loop_tree (loops
, deleted_loop
[i
]);
324 /* Find blocks whose dominators may be affected. */
327 for (i
= 0; i
< n_bord_bbs
; i
++)
331 bb
= get_immediate_dominator (CDI_DOMINATORS
, bord_bbs
[i
]);
332 if (TEST_BIT (seen
, bb
->index
))
334 SET_BIT (seen
, bb
->index
);
336 for (ldom
= first_dom_son (CDI_DOMINATORS
, bb
);
338 ldom
= next_dom_son (CDI_DOMINATORS
, ldom
))
339 if (!dominated_by_p (CDI_DOMINATORS
, from
, ldom
))
340 dom_bbs
[n_dom_bbs
++] = ldom
;
345 /* Recount dominators. */
346 iterate_fix_dominators (CDI_DOMINATORS
, dom_bbs
, n_dom_bbs
);
350 /* Fix placements of basic blocks inside loops and the placement of
351 loops in the loop tree. */
352 fix_bb_placements (loops
, from
, &irred_invalidated
);
353 fix_loop_placements (loops
, from
->loop_father
, &irred_invalidated
);
355 if (irred_invalidated
356 && (loops
->state
& LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
) != 0)
357 mark_irreducible_loops (loops
);
362 /* Predicate for enumeration in add_loop. */
364 alp_enum_p (basic_block bb
, void *alp_header
)
366 return bb
!= (basic_block
) alp_header
;
369 /* Given LOOP structure with filled header and latch, find the body of the
370 corresponding loop and add it to LOOPS tree. Insert the LOOP as a son of
374 add_loop (struct loops
*loops
, struct loop
*loop
, struct loop
*outer
)
379 /* Add it to loop structure. */
380 place_new_loop (loops
, loop
);
381 flow_loop_tree_node_add (outer
, loop
);
383 /* Find its nodes. */
384 bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
385 n
= dfs_enumerate_from (loop
->latch
, 1, alp_enum_p
,
386 bbs
, n_basic_blocks
, loop
->header
);
388 for (i
= 0; i
< n
; i
++)
390 remove_bb_from_loops (bbs
[i
]);
391 add_bb_to_loop (bbs
[i
], loop
);
393 remove_bb_from_loops (loop
->header
);
394 add_bb_to_loop (loop
->header
, loop
);
399 /* Multiply all frequencies in LOOP by NUM/DEN. */
401 scale_loop_frequencies (struct loop
*loop
, int num
, int den
)
405 bbs
= get_loop_body (loop
);
406 scale_bbs_frequencies_int (bbs
, loop
->num_nodes
, num
, den
);
410 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
411 latch to header and update loop tree stored in LOOPS and dominators
412 accordingly. Everything between them plus LATCH_EDGE destination must
413 be dominated by HEADER_EDGE destination, and back-reachable from
414 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
415 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
416 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
417 Returns newly created loop. */
420 loopify (struct loops
*loops
, edge latch_edge
, edge header_edge
,
421 basic_block switch_bb
, edge true_edge
, edge false_edge
,
422 bool redirect_all_edges
)
424 basic_block succ_bb
= latch_edge
->dest
;
425 basic_block pred_bb
= header_edge
->src
;
426 basic_block
*dom_bbs
, *body
;
427 unsigned n_dom_bbs
, i
;
429 struct loop
*loop
= XCNEW (struct loop
);
430 struct loop
*outer
= succ_bb
->loop_father
->outer
;
431 int freq
, prob
, tot_prob
;
436 loop
->header
= header_edge
->dest
;
437 loop
->latch
= latch_edge
->src
;
439 freq
= EDGE_FREQUENCY (header_edge
);
440 cnt
= header_edge
->count
;
441 prob
= EDGE_SUCC (switch_bb
, 0)->probability
;
442 tot_prob
= prob
+ EDGE_SUCC (switch_bb
, 1)->probability
;
446 /* Redirect edges. */
447 loop_redirect_edge (latch_edge
, loop
->header
);
448 loop_redirect_edge (true_edge
, succ_bb
);
450 /* During loop versioning, one of the switch_bb edge is already properly
451 set. Do not redirect it again unless redirect_all_edges is true. */
452 if (redirect_all_edges
)
454 loop_redirect_edge (header_edge
, switch_bb
);
455 loop_redirect_edge (false_edge
, loop
->header
);
457 /* Update dominators. */
458 set_immediate_dominator (CDI_DOMINATORS
, switch_bb
, pred_bb
);
459 set_immediate_dominator (CDI_DOMINATORS
, loop
->header
, switch_bb
);
462 set_immediate_dominator (CDI_DOMINATORS
, succ_bb
, switch_bb
);
464 /* Compute new loop. */
465 add_loop (loops
, loop
, outer
);
467 /* Add switch_bb to appropriate loop. */
468 if (switch_bb
->loop_father
)
469 remove_bb_from_loops (switch_bb
);
470 add_bb_to_loop (switch_bb
, outer
);
472 /* Fix frequencies. */
473 switch_bb
->frequency
= freq
;
474 switch_bb
->count
= cnt
;
475 FOR_EACH_EDGE (e
, ei
, switch_bb
->succs
)
476 e
->count
= (switch_bb
->count
* e
->probability
) / REG_BR_PROB_BASE
;
477 scale_loop_frequencies (loop
, prob
, tot_prob
);
478 scale_loop_frequencies (succ_bb
->loop_father
, tot_prob
- prob
, tot_prob
);
480 /* Update dominators of blocks outside of LOOP. */
481 dom_bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
483 seen
= sbitmap_alloc (last_basic_block
);
485 body
= get_loop_body (loop
);
487 for (i
= 0; i
< loop
->num_nodes
; i
++)
488 SET_BIT (seen
, body
[i
]->index
);
490 for (i
= 0; i
< loop
->num_nodes
; i
++)
494 for (ldom
= first_dom_son (CDI_DOMINATORS
, body
[i
]);
496 ldom
= next_dom_son (CDI_DOMINATORS
, ldom
))
497 if (!TEST_BIT (seen
, ldom
->index
))
499 SET_BIT (seen
, ldom
->index
);
500 dom_bbs
[n_dom_bbs
++] = ldom
;
504 iterate_fix_dominators (CDI_DOMINATORS
, dom_bbs
, n_dom_bbs
);
513 /* Remove the latch edge of a LOOP and update LOOPS tree to indicate that
514 the LOOP was removed. After this function, original loop latch will
515 have no successor, which caller is expected to fix somehow.
517 If this may cause the information about irreducible regions to become
518 invalid, IRRED_INVALIDATED is set to true. */
521 unloop (struct loops
*loops
, struct loop
*loop
, bool *irred_invalidated
)
526 basic_block latch
= loop
->latch
;
529 if (loop_preheader_edge (loop
)->flags
& EDGE_IRREDUCIBLE_LOOP
)
530 *irred_invalidated
= true;
532 /* This is relatively straightforward. The dominators are unchanged, as
533 loop header dominates loop latch, so the only thing we have to care of
534 is the placement of loops and basic blocks inside the loop tree. We
535 move them all to the loop->outer, and then let fix_bb_placements do
538 body
= get_loop_body (loop
);
540 for (i
= 0; i
< n
; i
++)
541 if (body
[i
]->loop_father
== loop
)
543 remove_bb_from_loops (body
[i
]);
544 add_bb_to_loop (body
[i
], loop
->outer
);
551 flow_loop_tree_node_remove (ploop
);
552 flow_loop_tree_node_add (loop
->outer
, ploop
);
555 /* Remove the loop and free its data. */
556 flow_loop_tree_node_remove (loop
);
557 loops
->parray
[loop
->num
] = NULL
;
558 flow_loop_free (loop
);
560 remove_edge (single_succ_edge (latch
));
562 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
563 there is an irreducible region inside the cancelled loop, the flags will
565 fix_bb_placements (loops
, latch
, &dummy
);
568 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
569 FATHER of LOOP such that all of the edges coming out of LOOP belong to
570 FATHER, and set it as outer loop of LOOP. Return true if placement of
574 fix_loop_placement (struct loop
*loop
)
580 struct loop
*father
= loop
->pred
[0], *act
;
582 body
= get_loop_body (loop
);
583 for (i
= 0; i
< loop
->num_nodes
; i
++)
584 FOR_EACH_EDGE (e
, ei
, body
[i
]->succs
)
585 if (!flow_bb_inside_loop_p (loop
, e
->dest
))
587 act
= find_common_loop (loop
, e
->dest
->loop_father
);
588 if (flow_loop_nested_p (father
, act
))
593 if (father
!= loop
->outer
)
595 for (act
= loop
->outer
; act
!= father
; act
= act
->outer
)
596 act
->num_nodes
-= loop
->num_nodes
;
597 flow_loop_tree_node_remove (loop
);
598 flow_loop_tree_node_add (father
, loop
);
604 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
605 condition stated in description of fix_loop_placement holds for them.
606 It is used in case when we removed some edges coming out of LOOP, which
607 may cause the right placement of LOOP inside loop tree to change.
609 IRRED_INVALIDATED is set to true if a change in the loop structures might
610 invalidate the information about irreducible regions. */
613 fix_loop_placements (struct loops
*loops
, struct loop
*loop
,
614 bool *irred_invalidated
)
621 if (!fix_loop_placement (loop
))
624 /* Changing the placement of a loop in the loop tree may alter the
625 validity of condition 2) of the description of fix_bb_placement
626 for its preheader, because the successor is the header and belongs
627 to the loop. So call fix_bb_placements to fix up the placement
628 of the preheader and (possibly) of its predecessors. */
629 fix_bb_placements (loops
, loop_preheader_edge (loop
)->src
,
635 /* Creates place for a new LOOP in LOOPS structure. */
637 place_new_loop (struct loops
*loops
, struct loop
*loop
)
640 xrealloc (loops
->parray
, (loops
->num
+ 1) * sizeof (struct loop
*));
641 loops
->parray
[loops
->num
] = loop
;
643 loop
->num
= loops
->num
++;
646 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
647 created loop into LOOPS structure. */
649 duplicate_loop (struct loops
*loops
, struct loop
*loop
, struct loop
*target
)
652 cloop
= XCNEW (struct loop
);
653 place_new_loop (loops
, cloop
);
655 /* Mark the new loop as copy of LOOP. */
658 /* Add it to target. */
659 flow_loop_tree_node_add (target
, cloop
);
664 /* Copies structure of subloops of LOOP into TARGET loop, placing
665 newly created loops into loop tree stored in LOOPS. */
667 duplicate_subloops (struct loops
*loops
, struct loop
*loop
, struct loop
*target
)
669 struct loop
*aloop
, *cloop
;
671 for (aloop
= loop
->inner
; aloop
; aloop
= aloop
->next
)
673 cloop
= duplicate_loop (loops
, aloop
, target
);
674 duplicate_subloops (loops
, aloop
, cloop
);
678 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
679 into TARGET loop, placing newly created loops into loop tree LOOPS. */
681 copy_loops_to (struct loops
*loops
, struct loop
**copied_loops
, int n
, struct loop
*target
)
686 for (i
= 0; i
< n
; i
++)
688 aloop
= duplicate_loop (loops
, copied_loops
[i
], target
);
689 duplicate_subloops (loops
, copied_loops
[i
], aloop
);
693 /* Redirects edge E to basic block DEST. */
695 loop_redirect_edge (edge e
, basic_block dest
)
700 redirect_edge_and_branch_force (e
, dest
);
703 /* Deletes edge E from a branch if possible. Unless REALLY_DELETE is set,
704 just test whether it is possible to remove the edge. */
706 loop_delete_branch_edge (edge e
, int really_delete
)
708 basic_block src
= e
->src
;
713 gcc_assert (EDGE_COUNT (src
->succs
) > 1);
715 /* Cannot handle more than two exit edges. */
716 if (EDGE_COUNT (src
->succs
) > 2)
718 /* And it must be just a simple branch. */
719 if (!any_condjump_p (BB_END (src
)))
722 snd
= e
== EDGE_SUCC (src
, 0) ? EDGE_SUCC (src
, 1) : EDGE_SUCC (src
, 0);
724 if (newdest
== EXIT_BLOCK_PTR
)
727 /* Hopefully the above conditions should suffice. */
731 /* Redirecting behaves wrongly wrto this flag. */
732 irr
= snd
->flags
& EDGE_IRREDUCIBLE_LOOP
;
734 if (!redirect_edge_and_branch (e
, newdest
))
736 single_succ_edge (src
)->flags
&= ~EDGE_IRREDUCIBLE_LOOP
;
737 single_succ_edge (src
)->flags
|= irr
;
742 /* Check whether LOOP's body can be duplicated. */
744 can_duplicate_loop_p (struct loop
*loop
)
747 basic_block
*bbs
= get_loop_body (loop
);
749 ret
= can_copy_bbs_p (bbs
, loop
->num_nodes
);
755 /* The NBBS blocks in BBS will get duplicated and the copies will be placed
756 to LOOP. Update the single_exit information in superloops of LOOP. */
759 update_single_exits_after_duplication (basic_block
*bbs
, unsigned nbbs
,
764 for (i
= 0; i
< nbbs
; i
++)
765 bbs
[i
]->flags
|= BB_DUPLICATED
;
767 for (; loop
->outer
; loop
= loop
->outer
)
769 if (!single_exit (loop
))
772 if (single_exit (loop
)->src
->flags
& BB_DUPLICATED
)
773 set_single_exit (loop
, NULL
);
776 for (i
= 0; i
< nbbs
; i
++)
777 bbs
[i
]->flags
&= ~BB_DUPLICATED
;
780 /* Updates single exit information for the copy of LOOP. */
783 update_single_exit_for_duplicated_loop (struct loop
*loop
)
785 struct loop
*copy
= loop
->copy
;
786 basic_block src
, dest
;
787 edge exit
= single_exit (loop
);
792 src
= get_bb_copy (exit
->src
);
794 if (dest
->flags
& BB_DUPLICATED
)
795 dest
= get_bb_copy (dest
);
797 exit
= find_edge (src
, dest
);
798 gcc_assert (exit
!= NULL
);
799 set_single_exit (copy
, exit
);
802 /* Updates single exit information for copies of ORIG_LOOPS and their subloops.
803 N is the number of the loops in the ORIG_LOOPS array. */
806 update_single_exit_for_duplicated_loops (struct loop
*orig_loops
[], unsigned n
)
810 for (i
= 0; i
< n
; i
++)
811 update_single_exit_for_duplicated_loop (orig_loops
[i
]);
814 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
815 LOOPS structure and dominators. E's destination must be LOOP header for
816 this to work, i.e. it must be entry or latch edge of this loop; these are
817 unique, as the loops must have preheaders for this function to work
818 correctly (in case E is latch, the function unrolls the loop, if E is entry
819 edge, it peels the loop). Store edges created by copying ORIG edge from
820 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
821 original LOOP body, the other copies are numbered in order given by control
822 flow through them) into TO_REMOVE array. Returns false if duplication is
825 duplicate_loop_to_header_edge (struct loop
*loop
, edge e
, struct loops
*loops
,
826 unsigned int ndupl
, sbitmap wont_exit
,
827 edge orig
, edge
*to_remove
,
828 unsigned int *n_to_remove
, int flags
)
830 struct loop
*target
, *aloop
;
831 struct loop
**orig_loops
;
832 unsigned n_orig_loops
;
833 basic_block header
= loop
->header
, latch
= loop
->latch
;
834 basic_block
*new_bbs
, *bbs
, *first_active
;
835 basic_block new_bb
, bb
, first_active_latch
= NULL
;
837 edge spec_edges
[2], new_spec_edges
[2];
841 int is_latch
= (latch
== e
->src
);
842 int scale_act
= 0, *scale_step
= NULL
, scale_main
= 0;
843 int p
, freq_in
, freq_le
, freq_out_orig
;
844 int prob_pass_thru
, prob_pass_wont_exit
, prob_pass_main
;
845 int add_irreducible_flag
;
846 basic_block place_after
;
848 gcc_assert (e
->dest
== loop
->header
);
849 gcc_assert (ndupl
> 0);
853 /* Orig must be edge out of the loop. */
854 gcc_assert (flow_bb_inside_loop_p (loop
, orig
->src
));
855 gcc_assert (!flow_bb_inside_loop_p (loop
, orig
->dest
));
859 bbs
= get_loop_body_in_dom_order (loop
);
860 gcc_assert (bbs
[0] == loop
->header
);
861 gcc_assert (bbs
[n
- 1] == loop
->latch
);
863 /* Check whether duplication is possible. */
864 if (!can_copy_bbs_p (bbs
, loop
->num_nodes
))
869 new_bbs
= XNEWVEC (basic_block
, loop
->num_nodes
);
871 /* In case we are doing loop peeling and the loop is in the middle of
872 irreducible region, the peeled copies will be inside it too. */
873 add_irreducible_flag
= e
->flags
& EDGE_IRREDUCIBLE_LOOP
;
874 gcc_assert (!is_latch
|| !add_irreducible_flag
);
876 /* Find edge from latch. */
877 latch_edge
= loop_latch_edge (loop
);
879 if (flags
& DLTHE_FLAG_UPDATE_FREQ
)
881 /* Calculate coefficients by that we have to scale frequencies
882 of duplicated loop bodies. */
883 freq_in
= header
->frequency
;
884 freq_le
= EDGE_FREQUENCY (latch_edge
);
887 if (freq_in
< freq_le
)
889 freq_out_orig
= orig
? EDGE_FREQUENCY (orig
) : freq_in
- freq_le
;
890 if (freq_out_orig
> freq_in
- freq_le
)
891 freq_out_orig
= freq_in
- freq_le
;
892 prob_pass_thru
= RDIV (REG_BR_PROB_BASE
* freq_le
, freq_in
);
893 prob_pass_wont_exit
=
894 RDIV (REG_BR_PROB_BASE
* (freq_le
+ freq_out_orig
), freq_in
);
896 scale_step
= XNEWVEC (int, ndupl
);
898 for (i
= 1; i
<= ndupl
; i
++)
899 scale_step
[i
- 1] = TEST_BIT (wont_exit
, i
)
900 ? prob_pass_wont_exit
903 /* Complete peeling is special as the probability of exit in last
905 if (flags
& DLTHE_FLAG_COMPLETTE_PEEL
)
907 int wanted_freq
= EDGE_FREQUENCY (e
);
909 if (wanted_freq
> freq_in
)
910 wanted_freq
= freq_in
;
912 gcc_assert (!is_latch
);
913 /* First copy has frequency of incoming edge. Each subsequent
914 frequency should be reduced by prob_pass_wont_exit. Caller
915 should've managed the flags so all except for original loop
916 has won't exist set. */
917 scale_act
= RDIV (wanted_freq
* REG_BR_PROB_BASE
, freq_in
);
918 /* Now simulate the duplication adjustments and compute header
919 frequency of the last copy. */
920 for (i
= 0; i
< ndupl
; i
++)
921 wanted_freq
= RDIV (wanted_freq
* scale_step
[i
], REG_BR_PROB_BASE
);
922 scale_main
= RDIV (wanted_freq
* REG_BR_PROB_BASE
, freq_in
);
926 prob_pass_main
= TEST_BIT (wont_exit
, 0)
927 ? prob_pass_wont_exit
930 scale_main
= REG_BR_PROB_BASE
;
931 for (i
= 0; i
< ndupl
; i
++)
934 p
= RDIV (p
* scale_step
[i
], REG_BR_PROB_BASE
);
936 scale_main
= RDIV (REG_BR_PROB_BASE
* REG_BR_PROB_BASE
, scale_main
);
937 scale_act
= RDIV (scale_main
* prob_pass_main
, REG_BR_PROB_BASE
);
941 scale_main
= REG_BR_PROB_BASE
;
942 for (i
= 0; i
< ndupl
; i
++)
943 scale_main
= RDIV (scale_main
* scale_step
[i
], REG_BR_PROB_BASE
);
944 scale_act
= REG_BR_PROB_BASE
- prob_pass_thru
;
946 for (i
= 0; i
< ndupl
; i
++)
947 gcc_assert (scale_step
[i
] >= 0 && scale_step
[i
] <= REG_BR_PROB_BASE
);
948 gcc_assert (scale_main
>= 0 && scale_main
<= REG_BR_PROB_BASE
949 && scale_act
>= 0 && scale_act
<= REG_BR_PROB_BASE
);
952 /* Loop the new bbs will belong to. */
953 target
= e
->src
->loop_father
;
955 /* Original loops. */
957 for (aloop
= loop
->inner
; aloop
; aloop
= aloop
->next
)
959 orig_loops
= XCNEWVEC (struct loop
*, n_orig_loops
);
960 for (aloop
= loop
->inner
, i
= 0; aloop
; aloop
= aloop
->next
, i
++)
961 orig_loops
[i
] = aloop
;
965 first_active
= XNEWVEC (basic_block
, n
);
968 memcpy (first_active
, bbs
, n
* sizeof (basic_block
));
969 first_active_latch
= latch
;
972 /* Update the information about single exits. */
973 if (loops
->state
& LOOPS_HAVE_MARKED_SINGLE_EXITS
)
974 update_single_exits_after_duplication (bbs
, n
, target
);
976 /* Record exit edge in original loop body. */
977 if (orig
&& TEST_BIT (wont_exit
, 0))
978 to_remove
[(*n_to_remove
)++] = orig
;
980 spec_edges
[SE_ORIG
] = orig
;
981 spec_edges
[SE_LATCH
] = latch_edge
;
983 place_after
= e
->src
;
984 for (j
= 0; j
< ndupl
; j
++)
987 copy_loops_to (loops
, orig_loops
, n_orig_loops
, target
);
990 copy_bbs (bbs
, n
, new_bbs
, spec_edges
, 2, new_spec_edges
, loop
,
992 place_after
= new_spec_edges
[SE_LATCH
]->src
;
994 if (loops
->state
& LOOPS_HAVE_MARKED_SINGLE_EXITS
)
996 for (i
= 0; i
< n
; i
++)
997 bbs
[i
]->flags
|= BB_DUPLICATED
;
998 update_single_exit_for_duplicated_loops (orig_loops
, n_orig_loops
);
999 for (i
= 0; i
< n
; i
++)
1000 bbs
[i
]->flags
&= ~BB_DUPLICATED
;
1003 if (flags
& DLTHE_RECORD_COPY_NUMBER
)
1004 for (i
= 0; i
< n
; i
++)
1006 gcc_assert (!new_bbs
[i
]->aux
);
1007 new_bbs
[i
]->aux
= (void *)(size_t)(j
+ 1);
1010 /* Note whether the blocks and edges belong to an irreducible loop. */
1011 if (add_irreducible_flag
)
1013 for (i
= 0; i
< n
; i
++)
1014 new_bbs
[i
]->flags
|= BB_DUPLICATED
;
1015 for (i
= 0; i
< n
; i
++)
1018 new_bb
= new_bbs
[i
];
1019 if (new_bb
->loop_father
== target
)
1020 new_bb
->flags
|= BB_IRREDUCIBLE_LOOP
;
1022 FOR_EACH_EDGE (ae
, ei
, new_bb
->succs
)
1023 if ((ae
->dest
->flags
& BB_DUPLICATED
)
1024 && (ae
->src
->loop_father
== target
1025 || ae
->dest
->loop_father
== target
))
1026 ae
->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1028 for (i
= 0; i
< n
; i
++)
1029 new_bbs
[i
]->flags
&= ~BB_DUPLICATED
;
1032 /* Redirect the special edges. */
1035 redirect_edge_and_branch_force (latch_edge
, new_bbs
[0]);
1036 redirect_edge_and_branch_force (new_spec_edges
[SE_LATCH
],
1038 set_immediate_dominator (CDI_DOMINATORS
, new_bbs
[0], latch
);
1039 latch
= loop
->latch
= new_bbs
[n
- 1];
1040 e
= latch_edge
= new_spec_edges
[SE_LATCH
];
1044 redirect_edge_and_branch_force (new_spec_edges
[SE_LATCH
],
1046 redirect_edge_and_branch_force (e
, new_bbs
[0]);
1047 set_immediate_dominator (CDI_DOMINATORS
, new_bbs
[0], e
->src
);
1048 e
= new_spec_edges
[SE_LATCH
];
1051 /* Record exit edge in this copy. */
1052 if (orig
&& TEST_BIT (wont_exit
, j
+ 1))
1053 to_remove
[(*n_to_remove
)++] = new_spec_edges
[SE_ORIG
];
1055 /* Record the first copy in the control flow order if it is not
1056 the original loop (i.e. in case of peeling). */
1057 if (!first_active_latch
)
1059 memcpy (first_active
, new_bbs
, n
* sizeof (basic_block
));
1060 first_active_latch
= new_bbs
[n
- 1];
1063 /* Set counts and frequencies. */
1064 if (flags
& DLTHE_FLAG_UPDATE_FREQ
)
1066 scale_bbs_frequencies_int (new_bbs
, n
, scale_act
, REG_BR_PROB_BASE
);
1067 scale_act
= RDIV (scale_act
* scale_step
[j
], REG_BR_PROB_BASE
);
1073 /* Update the original loop. */
1075 set_immediate_dominator (CDI_DOMINATORS
, e
->dest
, e
->src
);
1076 if (flags
& DLTHE_FLAG_UPDATE_FREQ
)
1078 scale_bbs_frequencies_int (bbs
, n
, scale_main
, REG_BR_PROB_BASE
);
1082 /* Update dominators of outer blocks if affected. */
1083 for (i
= 0; i
< n
; i
++)
1085 basic_block dominated
, dom_bb
, *dom_bbs
;
1091 n_dom_bbs
= get_dominated_by (CDI_DOMINATORS
, bb
, &dom_bbs
);
1092 for (j
= 0; j
< n_dom_bbs
; j
++)
1094 dominated
= dom_bbs
[j
];
1095 if (flow_bb_inside_loop_p (loop
, dominated
))
1097 dom_bb
= nearest_common_dominator (
1098 CDI_DOMINATORS
, first_active
[i
], first_active_latch
);
1099 set_immediate_dominator (CDI_DOMINATORS
, dominated
, dom_bb
);
1103 free (first_active
);
1110 /* A callback for make_forwarder block, to redirect all edges except for
1111 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1112 whether to redirect it. */
1114 static edge mfb_kj_edge
;
1116 mfb_keep_just (edge e
)
1118 return e
!= mfb_kj_edge
;
1121 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1122 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1123 entry; otherwise we also force preheader block to have only one successor.
1124 The function also updates dominators. */
1127 create_preheader (struct loop
*loop
, int flags
)
1133 bool latch_edge_was_fallthru
;
1134 edge one_succ_pred
= 0;
1137 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
1139 if (e
->src
== loop
->latch
)
1141 irred
|= (e
->flags
& EDGE_IRREDUCIBLE_LOOP
) != 0;
1143 if (single_succ_p (e
->src
))
1146 gcc_assert (nentry
);
1149 /* Get an edge that is different from the one from loop->latch
1151 e
= EDGE_PRED (loop
->header
,
1152 EDGE_PRED (loop
->header
, 0)->src
== loop
->latch
);
1154 if (!(flags
& CP_SIMPLE_PREHEADERS
) || single_succ_p (e
->src
))
1158 mfb_kj_edge
= loop_latch_edge (loop
);
1159 latch_edge_was_fallthru
= (mfb_kj_edge
->flags
& EDGE_FALLTHRU
) != 0;
1160 fallthru
= make_forwarder_block (loop
->header
, mfb_keep_just
, NULL
);
1161 dummy
= fallthru
->src
;
1162 loop
->header
= fallthru
->dest
;
1164 /* Try to be clever in placing the newly created preheader. The idea is to
1165 avoid breaking any "fallthruness" relationship between blocks.
1167 The preheader was created just before the header and all incoming edges
1168 to the header were redirected to the preheader, except the latch edge.
1169 So the only problematic case is when this latch edge was a fallthru
1170 edge: it is not anymore after the preheader creation so we have broken
1171 the fallthruness. We're therefore going to look for a better place. */
1172 if (latch_edge_was_fallthru
)
1177 e
= EDGE_PRED (dummy
, 0);
1179 move_block_after (dummy
, e
->src
);
1184 dummy
->flags
|= BB_IRREDUCIBLE_LOOP
;
1185 single_succ_edge (dummy
)->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1189 fprintf (dump_file
, "Created preheader block for loop %i\n",
1195 /* Create preheaders for each loop from loop tree stored in LOOPS; for meaning
1196 of FLAGS see create_preheader. */
1198 create_preheaders (struct loops
*loops
, int flags
)
1201 for (i
= 1; i
< loops
->num
; i
++)
1202 create_preheader (loops
->parray
[i
], flags
);
1203 loops
->state
|= LOOPS_HAVE_PREHEADERS
;
1206 /* Forces all loop latches of loops from loop tree LOOPS to have only single
1209 force_single_succ_latches (struct loops
*loops
)
1215 for (i
= 1; i
< loops
->num
; i
++)
1217 loop
= loops
->parray
[i
];
1218 if (loop
->latch
!= loop
->header
&& single_succ_p (loop
->latch
))
1221 e
= find_edge (loop
->latch
, loop
->header
);
1225 loops
->state
|= LOOPS_HAVE_SIMPLE_LATCHES
;
1228 /* This function is called from loop_version. It splits the entry edge
1229 of the loop we want to version, adds the versioning condition, and
1230 adjust the edges to the two versions of the loop appropriately.
1231 e is an incoming edge. Returns the basic block containing the
1234 --- edge e ---- > [second_head]
1236 Split it and insert new conditional expression and adjust edges.
1238 --- edge e ---> [cond expr] ---> [first_head]
1240 +---------> [second_head]
1244 lv_adjust_loop_entry_edge (basic_block first_head
,
1245 basic_block second_head
,
1249 basic_block new_head
= NULL
;
1252 gcc_assert (e
->dest
== second_head
);
1254 /* Split edge 'e'. This will create a new basic block, where we can
1255 insert conditional expr. */
1256 new_head
= split_edge (e
);
1259 lv_add_condition_to_bb (first_head
, second_head
, new_head
,
1262 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1263 e1
= make_edge (new_head
, first_head
,
1264 current_ir_type () == IR_GIMPLE
? EDGE_TRUE_VALUE
: 0);
1265 set_immediate_dominator (CDI_DOMINATORS
, first_head
, new_head
);
1266 set_immediate_dominator (CDI_DOMINATORS
, second_head
, new_head
);
1268 /* Adjust loop header phi nodes. */
1269 lv_adjust_loop_header_phi (first_head
, second_head
, new_head
, e1
);
1274 /* Main entry point for Loop Versioning transformation.
1276 This transformation given a condition and a loop, creates
1277 -if (condition) { loop_copy1 } else { loop_copy2 },
1278 where loop_copy1 is the loop transformed in one way, and loop_copy2
1279 is the loop transformed in another way (or unchanged). 'condition'
1280 may be a run time test for things that were not resolved by static
1281 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1283 If PLACE_AFTER is true, we place the new loop after LOOP in the
1284 instruction stream, otherwise it is placed before LOOP. */
1287 loop_version (struct loops
*loops
, struct loop
* loop
,
1288 void *cond_expr
, basic_block
*condition_bb
,
1291 basic_block first_head
, second_head
;
1292 edge entry
, latch_edge
, exit
, true_edge
, false_edge
;
1295 basic_block cond_bb
;
1297 /* CHECKME: Loop versioning does not handle nested loop at this point. */
1301 /* Record entry and latch edges for the loop */
1302 entry
= loop_preheader_edge (loop
);
1303 irred_flag
= entry
->flags
& EDGE_IRREDUCIBLE_LOOP
;
1304 entry
->flags
&= ~EDGE_IRREDUCIBLE_LOOP
;
1306 /* Note down head of loop as first_head. */
1307 first_head
= entry
->dest
;
1309 /* Duplicate loop. */
1310 if (!cfg_hook_duplicate_loop_to_header_edge (loop
, entry
, loops
, 1,
1311 NULL
, NULL
, NULL
, NULL
, 0))
1314 /* After duplication entry edge now points to new loop head block.
1315 Note down new head as second_head. */
1316 second_head
= entry
->dest
;
1318 /* Split loop entry edge and insert new block with cond expr. */
1319 cond_bb
= lv_adjust_loop_entry_edge (first_head
, second_head
,
1322 *condition_bb
= cond_bb
;
1326 entry
->flags
|= irred_flag
;
1330 latch_edge
= single_succ_edge (get_bb_copy (loop
->latch
));
1332 extract_cond_bb_edges (cond_bb
, &true_edge
, &false_edge
);
1333 nloop
= loopify (loops
,
1335 single_pred_edge (get_bb_copy (loop
->header
)),
1336 cond_bb
, true_edge
, false_edge
,
1337 false /* Do not redirect all edges. */);
1339 exit
= single_exit (loop
);
1341 set_single_exit (nloop
, find_edge (get_bb_copy (exit
->src
), exit
->dest
));
1343 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1344 lv_flush_pending_stmts (latch_edge
);
1346 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1347 extract_cond_bb_edges (cond_bb
, &true_edge
, &false_edge
);
1348 lv_flush_pending_stmts (false_edge
);
1349 /* Adjust irreducible flag. */
1352 cond_bb
->flags
|= BB_IRREDUCIBLE_LOOP
;
1353 loop_preheader_edge (loop
)->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1354 loop_preheader_edge (nloop
)->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1355 single_pred_edge (cond_bb
)->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1360 basic_block
*bbs
= get_loop_body_in_dom_order (nloop
), after
;
1363 after
= loop
->latch
;
1365 for (i
= 0; i
< nloop
->num_nodes
; i
++)
1367 move_block_after (bbs
[i
], after
);
1373 /* At this point condition_bb is loop predheader with two successors,
1374 first_head and second_head. Make sure that loop predheader has only
1376 split_edge (loop_preheader_edge (loop
));
1377 split_edge (loop_preheader_edge (nloop
));
1382 /* The structure of LOOPS might have changed. Some loops might get removed
1383 (and their headers and latches were set to NULL), loop exists might get
1384 removed (thus the loop nesting may be wrong), and some blocks and edges
1385 were changed (so the information about bb --> loop mapping does not have
1386 to be correct). But still for the remaining loops the header dominates
1387 the latch, and loops did not get new subloobs (new loops might possibly
1388 get created, but we are not interested in them). Fix up the mess.
1390 If CHANGED_BBS is not NULL, basic blocks whose loop has changed are
1394 fix_loop_structure (struct loops
*loops
, bitmap changed_bbs
)
1397 struct loop
*loop
, *ploop
;
1400 /* Remove the old bb -> loop mapping. */
1403 bb
->aux
= (void *) (size_t) bb
->loop_father
->depth
;
1404 bb
->loop_father
= loops
->tree_root
;
1407 /* Remove the dead loops from structures. */
1408 loops
->tree_root
->num_nodes
= n_basic_blocks
;
1409 for (i
= 1; i
< loops
->num
; i
++)
1411 loop
= loops
->parray
[i
];
1415 loop
->num_nodes
= 0;
1421 ploop
= loop
->inner
;
1422 flow_loop_tree_node_remove (ploop
);
1423 flow_loop_tree_node_add (loop
->outer
, ploop
);
1426 /* Remove the loop and free its data. */
1427 flow_loop_tree_node_remove (loop
);
1428 loops
->parray
[loop
->num
] = NULL
;
1429 flow_loop_free (loop
);
1432 /* Rescan the bodies of loops, starting from the outermost. */
1433 loop
= loops
->tree_root
;
1441 && loop
!= loops
->tree_root
)
1443 if (loop
== loops
->tree_root
)
1449 loop
->num_nodes
= flow_loop_nodes_find (loop
->header
, loop
);
1452 /* Now fix the loop nesting. */
1453 for (i
= 1; i
< loops
->num
; i
++)
1455 loop
= loops
->parray
[i
];
1459 bb
= loop_preheader_edge (loop
)->src
;
1460 if (bb
->loop_father
!= loop
->outer
)
1462 flow_loop_tree_node_remove (loop
);
1463 flow_loop_tree_node_add (bb
->loop_father
, loop
);
1467 /* Mark the blocks whose loop has changed. */
1471 && (void *) (size_t) bb
->loop_father
->depth
!= bb
->aux
)
1472 bitmap_set_bit (changed_bbs
, bb
->index
);
1477 if (loops
->state
& LOOPS_HAVE_MARKED_SINGLE_EXITS
)
1478 mark_single_exit_loops (loops
);
1479 if (loops
->state
& LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
)
1480 mark_irreducible_loops (loops
);