1 /* Loop manipulation code for GNU compiler.
2 Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
23 #include "coretypes.h"
26 #include "hard-reg-set.h"
28 #include "basic-block.h"
30 #include "cfglayout.h"
34 static void duplicate_subloops (struct loop
*, struct loop
*);
35 static void copy_loops_to (struct loop
**, int,
37 static void loop_redirect_edge (edge
, basic_block
);
38 static bool loop_delete_branch_edge (edge
, int);
39 static void remove_bbs (basic_block
*, int);
40 static bool rpe_enum_p (basic_block
, void *);
41 static int find_path (edge
, basic_block
**);
42 static bool alp_enum_p (basic_block
, void *);
43 static void fix_loop_placements (struct loop
*, bool *);
44 static bool fix_bb_placement (basic_block
);
45 static void fix_bb_placements (basic_block
, bool *);
46 static void place_new_loop (struct loop
*);
47 static basic_block
create_preheader (struct loop
*, int);
48 static void unloop (struct loop
*, bool *);
50 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
52 /* Checks whether basic block BB is dominated by DATA. */
54 rpe_enum_p (basic_block bb
, void *data
)
56 return dominated_by_p (CDI_DOMINATORS
, bb
, data
);
59 /* Remove basic blocks BBS. NBBS is the number of the basic blocks. */
62 remove_bbs (basic_block
*bbs
, int nbbs
)
66 for (i
= 0; i
< nbbs
; i
++)
67 delete_basic_block (bbs
[i
]);
70 /* Find path -- i.e. the basic blocks dominated by edge E and put them
71 into array BBS, that will be allocated large enough to contain them.
72 E->dest must have exactly one predecessor for this to work (it is
73 easy to achieve and we do not put it here because we do not want to
74 alter anything by this function). The number of basic blocks in the
77 find_path (edge e
, basic_block
**bbs
)
79 gcc_assert (EDGE_COUNT (e
->dest
->preds
) <= 1);
81 /* Find bbs in the path. */
82 *bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
83 return dfs_enumerate_from (e
->dest
, 0, rpe_enum_p
, *bbs
,
84 n_basic_blocks
, e
->dest
);
87 /* Fix placement of basic block BB inside loop hierarchy --
88 Let L be a loop to that BB belongs. Then every successor of BB must either
89 1) belong to some superloop of loop L, or
90 2) be a header of loop K such that K->outer is superloop of L
91 Returns true if we had to move BB into other loop to enforce this condition,
92 false if the placement of BB was already correct (provided that placements
93 of its successors are correct). */
95 fix_bb_placement (basic_block bb
)
99 struct loop
*loop
= current_loops
->tree_root
, *act
;
101 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
103 if (e
->dest
== EXIT_BLOCK_PTR
)
106 act
= e
->dest
->loop_father
;
107 if (act
->header
== e
->dest
)
110 if (flow_loop_nested_p (loop
, act
))
114 if (loop
== bb
->loop_father
)
117 remove_bb_from_loops (bb
);
118 add_bb_to_loop (bb
, loop
);
123 /* Fix placement of LOOP inside loop tree, i.e. find the innermost superloop
124 of LOOP to that leads at least one exit edge of LOOP, and set it
125 as the immediate superloop of LOOP. Return true if the immediate superloop
129 fix_loop_placement (struct loop
*loop
)
133 VEC (edge
, heap
) *exits
= get_loop_exit_edges (loop
);
134 struct loop
*father
= current_loops
->tree_root
, *act
;
137 for (i
= 0; VEC_iterate (edge
, exits
, i
, e
); i
++)
139 act
= find_common_loop (loop
, e
->dest
->loop_father
);
140 if (flow_loop_nested_p (father
, act
))
144 if (father
!= loop
->outer
)
146 for (act
= loop
->outer
; act
!= father
; act
= act
->outer
)
147 act
->num_nodes
-= loop
->num_nodes
;
148 flow_loop_tree_node_remove (loop
);
149 flow_loop_tree_node_add (father
, loop
);
151 /* The exit edges of LOOP no longer exits its original immediate
152 superloops; remove them from the appropriate exit lists. */
153 for (i
= 0; VEC_iterate (edge
, exits
, i
, e
); i
++)
154 rescan_loop_exit (e
, false, false);
159 VEC_free (edge
, heap
, exits
);
163 /* Fix placements of basic blocks inside loop hierarchy stored in loops; i.e.
164 enforce condition condition stated in description of fix_bb_placement. We
165 start from basic block FROM that had some of its successors removed, so that
166 his placement no longer has to be correct, and iteratively fix placement of
167 its predecessors that may change if placement of FROM changed. Also fix
168 placement of subloops of FROM->loop_father, that might also be altered due
169 to this change; the condition for them is similar, except that instead of
170 successors we consider edges coming out of the loops.
172 If the changes may invalidate the information about irreducible regions,
173 IRRED_INVALIDATED is set to true. */
176 fix_bb_placements (basic_block from
,
177 bool *irred_invalidated
)
180 basic_block
*queue
, *qtop
, *qbeg
, *qend
;
181 struct loop
*base_loop
;
184 /* We pass through blocks back-reachable from FROM, testing whether some
185 of their successors moved to outer loop. It may be necessary to
186 iterate several times, but it is finite, as we stop unless we move
187 the basic block up the loop structure. The whole story is a bit
188 more complicated due to presence of subloops, those are moved using
189 fix_loop_placement. */
191 base_loop
= from
->loop_father
;
192 if (base_loop
== current_loops
->tree_root
)
195 in_queue
= sbitmap_alloc (last_basic_block
);
196 sbitmap_zero (in_queue
);
197 SET_BIT (in_queue
, from
->index
);
198 /* Prevent us from going out of the base_loop. */
199 SET_BIT (in_queue
, base_loop
->header
->index
);
201 queue
= XNEWVEC (basic_block
, base_loop
->num_nodes
+ 1);
202 qtop
= queue
+ base_loop
->num_nodes
+ 1;
214 RESET_BIT (in_queue
, from
->index
);
216 if (from
->loop_father
->header
== from
)
218 /* Subloop header, maybe move the loop upward. */
219 if (!fix_loop_placement (from
->loop_father
))
224 /* Ordinary basic block. */
225 if (!fix_bb_placement (from
))
229 FOR_EACH_EDGE (e
, ei
, from
->succs
)
231 if (e
->flags
& EDGE_IRREDUCIBLE_LOOP
)
232 *irred_invalidated
= true;
235 /* Something has changed, insert predecessors into queue. */
236 FOR_EACH_EDGE (e
, ei
, from
->preds
)
238 basic_block pred
= e
->src
;
241 if (e
->flags
& EDGE_IRREDUCIBLE_LOOP
)
242 *irred_invalidated
= true;
244 if (TEST_BIT (in_queue
, pred
->index
))
247 /* If it is subloop, then it either was not moved, or
248 the path up the loop tree from base_loop do not contain
250 nca
= find_common_loop (pred
->loop_father
, base_loop
);
251 if (pred
->loop_father
!= base_loop
253 || nca
!= pred
->loop_father
))
254 pred
= pred
->loop_father
->header
;
255 else if (!flow_loop_nested_p (from
->loop_father
, pred
->loop_father
))
257 /* No point in processing it. */
261 if (TEST_BIT (in_queue
, pred
->index
))
264 /* Schedule the basic block. */
269 SET_BIT (in_queue
, pred
->index
);
276 /* Removes path beginning at edge E, i.e. remove basic blocks dominated by E
277 and update loop structures and dominators. Return true if we were able
278 to remove the path, false otherwise (and nothing is affected then). */
283 basic_block
*rem_bbs
, *bord_bbs
, *dom_bbs
, from
, bb
;
284 int i
, nrem
, n_bord_bbs
, n_dom_bbs
, nreml
;
286 bool deleted
, irred_invalidated
= false;
287 struct loop
**deleted_loop
;
289 if (!loop_delete_branch_edge (e
, 0))
292 /* Keep track of whether we need to update information about irreducible
293 regions. This is the case if the removed area is a part of the
294 irreducible region, or if the set of basic blocks that belong to a loop
295 that is inside an irreducible region is changed, or if such a loop is
297 if (e
->flags
& EDGE_IRREDUCIBLE_LOOP
)
298 irred_invalidated
= true;
300 /* We need to check whether basic blocks are dominated by the edge
301 e, but we only have basic block dominators. This is easy to
302 fix -- when e->dest has exactly one predecessor, this corresponds
303 to blocks dominated by e->dest, if not, split the edge. */
304 if (!single_pred_p (e
->dest
))
305 e
= single_pred_edge (split_edge (e
));
307 /* It may happen that by removing path we remove one or more loops
308 we belong to. In this case first unloop the loops, then proceed
309 normally. We may assume that e->dest is not a header of any loop,
310 as it now has exactly one predecessor. */
311 while (e
->src
->loop_father
->outer
312 && dominated_by_p (CDI_DOMINATORS
,
313 e
->src
->loop_father
->latch
, e
->dest
))
314 unloop (e
->src
->loop_father
, &irred_invalidated
);
316 /* Identify the path. */
317 nrem
= find_path (e
, &rem_bbs
);
320 bord_bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
321 seen
= sbitmap_alloc (last_basic_block
);
324 /* Find "border" hexes -- i.e. those with predecessor in removed path. */
325 for (i
= 0; i
< nrem
; i
++)
326 SET_BIT (seen
, rem_bbs
[i
]->index
);
327 for (i
= 0; i
< nrem
; i
++)
331 FOR_EACH_EDGE (ae
, ei
, rem_bbs
[i
]->succs
)
332 if (ae
->dest
!= EXIT_BLOCK_PTR
&& !TEST_BIT (seen
, ae
->dest
->index
))
334 SET_BIT (seen
, ae
->dest
->index
);
335 bord_bbs
[n_bord_bbs
++] = ae
->dest
;
337 if (ae
->flags
& EDGE_IRREDUCIBLE_LOOP
)
338 irred_invalidated
= true;
342 /* Remove the path. */
344 deleted
= loop_delete_branch_edge (e
, 1);
345 gcc_assert (deleted
);
346 dom_bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
348 /* Cancel loops contained in the path. */
349 deleted_loop
= XNEWVEC (struct loop
*, nrem
);
351 for (i
= 0; i
< nrem
; i
++)
352 if (rem_bbs
[i
]->loop_father
->header
== rem_bbs
[i
])
353 deleted_loop
[nreml
++] = rem_bbs
[i
]->loop_father
;
355 remove_bbs (rem_bbs
, nrem
);
358 for (i
= 0; i
< nreml
; i
++)
359 cancel_loop_tree (deleted_loop
[i
]);
362 /* Find blocks whose dominators may be affected. */
365 for (i
= 0; i
< n_bord_bbs
; i
++)
369 bb
= get_immediate_dominator (CDI_DOMINATORS
, bord_bbs
[i
]);
370 if (TEST_BIT (seen
, bb
->index
))
372 SET_BIT (seen
, bb
->index
);
374 for (ldom
= first_dom_son (CDI_DOMINATORS
, bb
);
376 ldom
= next_dom_son (CDI_DOMINATORS
, ldom
))
377 if (!dominated_by_p (CDI_DOMINATORS
, from
, ldom
))
378 dom_bbs
[n_dom_bbs
++] = ldom
;
383 /* Recount dominators. */
384 iterate_fix_dominators (CDI_DOMINATORS
, dom_bbs
, n_dom_bbs
);
388 /* Fix placements of basic blocks inside loops and the placement of
389 loops in the loop tree. */
390 fix_bb_placements (from
, &irred_invalidated
);
391 fix_loop_placements (from
->loop_father
, &irred_invalidated
);
393 if (irred_invalidated
394 && (current_loops
->state
& LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
) != 0)
395 mark_irreducible_loops ();
400 /* Predicate for enumeration in add_loop. */
402 alp_enum_p (basic_block bb
, void *alp_header
)
404 return bb
!= (basic_block
) alp_header
;
407 /* Given LOOP structure with filled header and latch, find the body of the
408 corresponding loop and add it to loops tree. Insert the LOOP as a son of
412 add_loop (struct loop
*loop
, struct loop
*outer
)
417 /* Add it to loop structure. */
418 place_new_loop (loop
);
419 flow_loop_tree_node_add (outer
, loop
);
421 /* Find its nodes. */
422 bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
423 n
= dfs_enumerate_from (loop
->latch
, 1, alp_enum_p
,
424 bbs
, n_basic_blocks
, loop
->header
);
426 for (i
= 0; i
< n
; i
++)
428 remove_bb_from_loops (bbs
[i
]);
429 add_bb_to_loop (bbs
[i
], loop
);
431 remove_bb_from_loops (loop
->header
);
432 add_bb_to_loop (loop
->header
, loop
);
437 /* Multiply all frequencies in LOOP by NUM/DEN. */
439 scale_loop_frequencies (struct loop
*loop
, int num
, int den
)
443 bbs
= get_loop_body (loop
);
444 scale_bbs_frequencies_int (bbs
, loop
->num_nodes
, num
, den
);
448 /* Make area between HEADER_EDGE and LATCH_EDGE a loop by connecting
449 latch to header and update loop tree and dominators
450 accordingly. Everything between them plus LATCH_EDGE destination must
451 be dominated by HEADER_EDGE destination, and back-reachable from
452 LATCH_EDGE source. HEADER_EDGE is redirected to basic block SWITCH_BB,
453 FALSE_EDGE of SWITCH_BB to original destination of HEADER_EDGE and
454 TRUE_EDGE of SWITCH_BB to original destination of LATCH_EDGE.
455 Returns the newly created loop. Frequencies and counts in the new loop
456 are scaled by FALSE_SCALE and in the old one by TRUE_SCALE. */
459 loopify (edge latch_edge
, edge header_edge
,
460 basic_block switch_bb
, edge true_edge
, edge false_edge
,
461 bool redirect_all_edges
, unsigned true_scale
, unsigned false_scale
)
463 basic_block succ_bb
= latch_edge
->dest
;
464 basic_block pred_bb
= header_edge
->src
;
465 basic_block
*dom_bbs
, *body
;
466 unsigned n_dom_bbs
, i
;
468 struct loop
*loop
= alloc_loop ();
469 struct loop
*outer
= succ_bb
->loop_father
->outer
;
475 loop
->header
= header_edge
->dest
;
476 loop
->latch
= latch_edge
->src
;
478 freq
= EDGE_FREQUENCY (header_edge
);
479 cnt
= header_edge
->count
;
481 /* Redirect edges. */
482 loop_redirect_edge (latch_edge
, loop
->header
);
483 loop_redirect_edge (true_edge
, succ_bb
);
485 /* During loop versioning, one of the switch_bb edge is already properly
486 set. Do not redirect it again unless redirect_all_edges is true. */
487 if (redirect_all_edges
)
489 loop_redirect_edge (header_edge
, switch_bb
);
490 loop_redirect_edge (false_edge
, loop
->header
);
492 /* Update dominators. */
493 set_immediate_dominator (CDI_DOMINATORS
, switch_bb
, pred_bb
);
494 set_immediate_dominator (CDI_DOMINATORS
, loop
->header
, switch_bb
);
497 set_immediate_dominator (CDI_DOMINATORS
, succ_bb
, switch_bb
);
499 /* Compute new loop. */
500 add_loop (loop
, outer
);
502 /* Add switch_bb to appropriate loop. */
503 if (switch_bb
->loop_father
)
504 remove_bb_from_loops (switch_bb
);
505 add_bb_to_loop (switch_bb
, outer
);
507 /* Fix frequencies. */
508 if (redirect_all_edges
)
510 switch_bb
->frequency
= freq
;
511 switch_bb
->count
= cnt
;
512 FOR_EACH_EDGE (e
, ei
, switch_bb
->succs
)
514 e
->count
= (switch_bb
->count
* e
->probability
) / REG_BR_PROB_BASE
;
517 scale_loop_frequencies (loop
, false_scale
, REG_BR_PROB_BASE
);
518 scale_loop_frequencies (succ_bb
->loop_father
, true_scale
, REG_BR_PROB_BASE
);
520 /* Update dominators of blocks outside of LOOP. */
521 dom_bbs
= XCNEWVEC (basic_block
, n_basic_blocks
);
523 seen
= sbitmap_alloc (last_basic_block
);
525 body
= get_loop_body (loop
);
527 for (i
= 0; i
< loop
->num_nodes
; i
++)
528 SET_BIT (seen
, body
[i
]->index
);
530 for (i
= 0; i
< loop
->num_nodes
; i
++)
534 for (ldom
= first_dom_son (CDI_DOMINATORS
, body
[i
]);
536 ldom
= next_dom_son (CDI_DOMINATORS
, ldom
))
537 if (!TEST_BIT (seen
, ldom
->index
))
539 SET_BIT (seen
, ldom
->index
);
540 dom_bbs
[n_dom_bbs
++] = ldom
;
544 iterate_fix_dominators (CDI_DOMINATORS
, dom_bbs
, n_dom_bbs
);
553 /* Remove the latch edge of a LOOP and update loops to indicate that
554 the LOOP was removed. After this function, original loop latch will
555 have no successor, which caller is expected to fix somehow.
557 If this may cause the information about irreducible regions to become
558 invalid, IRRED_INVALIDATED is set to true. */
561 unloop (struct loop
*loop
, bool *irred_invalidated
)
566 basic_block latch
= loop
->latch
;
569 if (loop_preheader_edge (loop
)->flags
& EDGE_IRREDUCIBLE_LOOP
)
570 *irred_invalidated
= true;
572 /* This is relatively straightforward. The dominators are unchanged, as
573 loop header dominates loop latch, so the only thing we have to care of
574 is the placement of loops and basic blocks inside the loop tree. We
575 move them all to the loop->outer, and then let fix_bb_placements do
578 body
= get_loop_body (loop
);
580 for (i
= 0; i
< n
; i
++)
581 if (body
[i
]->loop_father
== loop
)
583 remove_bb_from_loops (body
[i
]);
584 add_bb_to_loop (body
[i
], loop
->outer
);
591 flow_loop_tree_node_remove (ploop
);
592 flow_loop_tree_node_add (loop
->outer
, ploop
);
595 /* Remove the loop and free its data. */
598 remove_edge (single_succ_edge (latch
));
600 /* We do not pass IRRED_INVALIDATED to fix_bb_placements here, as even if
601 there is an irreducible region inside the cancelled loop, the flags will
603 fix_bb_placements (latch
, &dummy
);
606 /* Fix placement of superloops of LOOP inside loop tree, i.e. ensure that
607 condition stated in description of fix_loop_placement holds for them.
608 It is used in case when we removed some edges coming out of LOOP, which
609 may cause the right placement of LOOP inside loop tree to change.
611 IRRED_INVALIDATED is set to true if a change in the loop structures might
612 invalidate the information about irreducible regions. */
615 fix_loop_placements (struct loop
*loop
, bool *irred_invalidated
)
622 if (!fix_loop_placement (loop
))
625 /* Changing the placement of a loop in the loop tree may alter the
626 validity of condition 2) of the description of fix_bb_placement
627 for its preheader, because the successor is the header and belongs
628 to the loop. So call fix_bb_placements to fix up the placement
629 of the preheader and (possibly) of its predecessors. */
630 fix_bb_placements (loop_preheader_edge (loop
)->src
,
636 /* Creates place for a new LOOP in loops structure. */
638 place_new_loop (struct loop
*loop
)
640 loop
->num
= number_of_loops ();
641 VEC_safe_push (loop_p
, heap
, current_loops
->larray
, loop
);
644 /* Copies copy of LOOP as subloop of TARGET loop, placing newly
645 created loop into loops structure. */
647 duplicate_loop (struct loop
*loop
, struct loop
*target
)
650 cloop
= alloc_loop ();
651 place_new_loop (cloop
);
653 /* Mark the new loop as copy of LOOP. */
656 /* Add it to target. */
657 flow_loop_tree_node_add (target
, cloop
);
662 /* Copies structure of subloops of LOOP into TARGET loop, placing
663 newly created loops into loop tree. */
665 duplicate_subloops (struct loop
*loop
, struct loop
*target
)
667 struct loop
*aloop
, *cloop
;
669 for (aloop
= loop
->inner
; aloop
; aloop
= aloop
->next
)
671 cloop
= duplicate_loop (aloop
, target
);
672 duplicate_subloops (aloop
, cloop
);
676 /* Copies structure of subloops of N loops, stored in array COPIED_LOOPS,
677 into TARGET loop, placing newly created loops into loop tree. */
679 copy_loops_to (struct loop
**copied_loops
, int n
, struct loop
*target
)
684 for (i
= 0; i
< n
; i
++)
686 aloop
= duplicate_loop (copied_loops
[i
], target
);
687 duplicate_subloops (copied_loops
[i
], aloop
);
691 /* Redirects edge E to basic block DEST. */
693 loop_redirect_edge (edge e
, basic_block dest
)
698 redirect_edge_and_branch_force (e
, dest
);
701 /* Deletes edge E from a branch if possible. Unless REALLY_DELETE is set,
702 just test whether it is possible to remove the edge. */
704 loop_delete_branch_edge (edge e
, int really_delete
)
706 basic_block src
= e
->src
;
711 gcc_assert (EDGE_COUNT (src
->succs
) > 1);
713 /* Cannot handle more than two exit edges. */
714 if (EDGE_COUNT (src
->succs
) > 2)
716 /* And it must be just a simple branch. */
717 if (!any_condjump_p (BB_END (src
)))
720 snd
= e
== EDGE_SUCC (src
, 0) ? EDGE_SUCC (src
, 1) : EDGE_SUCC (src
, 0);
722 if (newdest
== EXIT_BLOCK_PTR
)
725 /* Hopefully the above conditions should suffice. */
729 /* Redirecting behaves wrongly wrto this flag. */
730 irr
= snd
->flags
& EDGE_IRREDUCIBLE_LOOP
;
732 if (!redirect_edge_and_branch (e
, newdest
))
734 single_succ_edge (src
)->flags
&= ~EDGE_IRREDUCIBLE_LOOP
;
735 single_succ_edge (src
)->flags
|= irr
;
740 /* Check whether LOOP's body can be duplicated. */
742 can_duplicate_loop_p (struct loop
*loop
)
745 basic_block
*bbs
= get_loop_body (loop
);
747 ret
= can_copy_bbs_p (bbs
, loop
->num_nodes
);
753 /* Sets probability and count of edge E to zero. The probability and count
754 is redistributed evenly to the remaining edges coming from E->src. */
757 set_zero_probability (edge e
)
759 basic_block bb
= e
->src
;
761 edge ae
, last
= NULL
;
762 unsigned n
= EDGE_COUNT (bb
->succs
);
763 gcov_type cnt
= e
->count
, cnt1
;
764 unsigned prob
= e
->probability
, prob1
;
767 cnt1
= cnt
/ (n
- 1);
768 prob1
= prob
/ (n
- 1);
770 FOR_EACH_EDGE (ae
, ei
, bb
->succs
)
775 ae
->probability
+= prob1
;
780 /* Move the rest to one of the edges. */
781 last
->probability
+= prob
% (n
- 1);
782 last
->count
+= cnt
% (n
- 1);
788 /* Duplicates body of LOOP to given edge E NDUPL times. Takes care of updating
789 loop structure and dominators. E's destination must be LOOP header for
790 this to work, i.e. it must be entry or latch edge of this loop; these are
791 unique, as the loops must have preheaders for this function to work
792 correctly (in case E is latch, the function unrolls the loop, if E is entry
793 edge, it peels the loop). Store edges created by copying ORIG edge from
794 copies corresponding to set bits in WONT_EXIT bitmap (bit 0 corresponds to
795 original LOOP body, the other copies are numbered in order given by control
796 flow through them) into TO_REMOVE array. Returns false if duplication is
800 duplicate_loop_to_header_edge (struct loop
*loop
, edge e
,
801 unsigned int ndupl
, sbitmap wont_exit
,
802 edge orig
, VEC (edge
, heap
) **to_remove
,
805 struct loop
*target
, *aloop
;
806 struct loop
**orig_loops
;
807 unsigned n_orig_loops
;
808 basic_block header
= loop
->header
, latch
= loop
->latch
;
809 basic_block
*new_bbs
, *bbs
, *first_active
;
810 basic_block new_bb
, bb
, first_active_latch
= NULL
;
812 edge spec_edges
[2], new_spec_edges
[2];
816 int is_latch
= (latch
== e
->src
);
817 int scale_act
= 0, *scale_step
= NULL
, scale_main
= 0;
818 int scale_after_exit
= 0;
819 int p
, freq_in
, freq_le
, freq_out_orig
;
820 int prob_pass_thru
, prob_pass_wont_exit
, prob_pass_main
;
821 int add_irreducible_flag
;
822 basic_block place_after
;
823 bitmap bbs_to_scale
= NULL
;
826 gcc_assert (e
->dest
== loop
->header
);
827 gcc_assert (ndupl
> 0);
831 /* Orig must be edge out of the loop. */
832 gcc_assert (flow_bb_inside_loop_p (loop
, orig
->src
));
833 gcc_assert (!flow_bb_inside_loop_p (loop
, orig
->dest
));
837 bbs
= get_loop_body_in_dom_order (loop
);
838 gcc_assert (bbs
[0] == loop
->header
);
839 gcc_assert (bbs
[n
- 1] == loop
->latch
);
841 /* Check whether duplication is possible. */
842 if (!can_copy_bbs_p (bbs
, loop
->num_nodes
))
847 new_bbs
= XNEWVEC (basic_block
, loop
->num_nodes
);
849 /* In case we are doing loop peeling and the loop is in the middle of
850 irreducible region, the peeled copies will be inside it too. */
851 add_irreducible_flag
= e
->flags
& EDGE_IRREDUCIBLE_LOOP
;
852 gcc_assert (!is_latch
|| !add_irreducible_flag
);
854 /* Find edge from latch. */
855 latch_edge
= loop_latch_edge (loop
);
857 if (flags
& DLTHE_FLAG_UPDATE_FREQ
)
859 /* Calculate coefficients by that we have to scale frequencies
860 of duplicated loop bodies. */
861 freq_in
= header
->frequency
;
862 freq_le
= EDGE_FREQUENCY (latch_edge
);
865 if (freq_in
< freq_le
)
867 freq_out_orig
= orig
? EDGE_FREQUENCY (orig
) : freq_in
- freq_le
;
868 if (freq_out_orig
> freq_in
- freq_le
)
869 freq_out_orig
= freq_in
- freq_le
;
870 prob_pass_thru
= RDIV (REG_BR_PROB_BASE
* freq_le
, freq_in
);
871 prob_pass_wont_exit
=
872 RDIV (REG_BR_PROB_BASE
* (freq_le
+ freq_out_orig
), freq_in
);
875 && REG_BR_PROB_BASE
- orig
->probability
!= 0)
877 /* The blocks that are dominated by a removed exit edge ORIG have
878 frequencies scaled by this. */
879 scale_after_exit
= RDIV (REG_BR_PROB_BASE
* REG_BR_PROB_BASE
,
880 REG_BR_PROB_BASE
- orig
->probability
);
881 bbs_to_scale
= BITMAP_ALLOC (NULL
);
882 for (i
= 0; i
< n
; i
++)
884 if (bbs
[i
] != orig
->src
885 && dominated_by_p (CDI_DOMINATORS
, bbs
[i
], orig
->src
))
886 bitmap_set_bit (bbs_to_scale
, i
);
890 scale_step
= XNEWVEC (int, ndupl
);
892 for (i
= 1; i
<= ndupl
; i
++)
893 scale_step
[i
- 1] = TEST_BIT (wont_exit
, i
)
894 ? prob_pass_wont_exit
897 /* Complete peeling is special as the probability of exit in last
899 if (flags
& DLTHE_FLAG_COMPLETTE_PEEL
)
901 int wanted_freq
= EDGE_FREQUENCY (e
);
903 if (wanted_freq
> freq_in
)
904 wanted_freq
= freq_in
;
906 gcc_assert (!is_latch
);
907 /* First copy has frequency of incoming edge. Each subsequent
908 frequency should be reduced by prob_pass_wont_exit. Caller
909 should've managed the flags so all except for original loop
910 has won't exist set. */
911 scale_act
= RDIV (wanted_freq
* REG_BR_PROB_BASE
, freq_in
);
912 /* Now simulate the duplication adjustments and compute header
913 frequency of the last copy. */
914 for (i
= 0; i
< ndupl
; i
++)
915 wanted_freq
= RDIV (wanted_freq
* scale_step
[i
], REG_BR_PROB_BASE
);
916 scale_main
= RDIV (wanted_freq
* REG_BR_PROB_BASE
, freq_in
);
920 prob_pass_main
= TEST_BIT (wont_exit
, 0)
921 ? prob_pass_wont_exit
924 scale_main
= REG_BR_PROB_BASE
;
925 for (i
= 0; i
< ndupl
; i
++)
928 p
= RDIV (p
* scale_step
[i
], REG_BR_PROB_BASE
);
930 scale_main
= RDIV (REG_BR_PROB_BASE
* REG_BR_PROB_BASE
, scale_main
);
931 scale_act
= RDIV (scale_main
* prob_pass_main
, REG_BR_PROB_BASE
);
935 scale_main
= REG_BR_PROB_BASE
;
936 for (i
= 0; i
< ndupl
; i
++)
937 scale_main
= RDIV (scale_main
* scale_step
[i
], REG_BR_PROB_BASE
);
938 scale_act
= REG_BR_PROB_BASE
- prob_pass_thru
;
940 for (i
= 0; i
< ndupl
; i
++)
941 gcc_assert (scale_step
[i
] >= 0 && scale_step
[i
] <= REG_BR_PROB_BASE
);
942 gcc_assert (scale_main
>= 0 && scale_main
<= REG_BR_PROB_BASE
943 && scale_act
>= 0 && scale_act
<= REG_BR_PROB_BASE
);
946 /* Loop the new bbs will belong to. */
947 target
= e
->src
->loop_father
;
949 /* Original loops. */
951 for (aloop
= loop
->inner
; aloop
; aloop
= aloop
->next
)
953 orig_loops
= XCNEWVEC (struct loop
*, n_orig_loops
);
954 for (aloop
= loop
->inner
, i
= 0; aloop
; aloop
= aloop
->next
, i
++)
955 orig_loops
[i
] = aloop
;
959 first_active
= XNEWVEC (basic_block
, n
);
962 memcpy (first_active
, bbs
, n
* sizeof (basic_block
));
963 first_active_latch
= latch
;
966 spec_edges
[SE_ORIG
] = orig
;
967 spec_edges
[SE_LATCH
] = latch_edge
;
969 place_after
= e
->src
;
970 for (j
= 0; j
< ndupl
; j
++)
973 copy_loops_to (orig_loops
, n_orig_loops
, target
);
976 copy_bbs (bbs
, n
, new_bbs
, spec_edges
, 2, new_spec_edges
, loop
,
978 place_after
= new_spec_edges
[SE_LATCH
]->src
;
980 if (flags
& DLTHE_RECORD_COPY_NUMBER
)
981 for (i
= 0; i
< n
; i
++)
983 gcc_assert (!new_bbs
[i
]->aux
);
984 new_bbs
[i
]->aux
= (void *)(size_t)(j
+ 1);
987 /* Note whether the blocks and edges belong to an irreducible loop. */
988 if (add_irreducible_flag
)
990 for (i
= 0; i
< n
; i
++)
991 new_bbs
[i
]->flags
|= BB_DUPLICATED
;
992 for (i
= 0; i
< n
; i
++)
996 if (new_bb
->loop_father
== target
)
997 new_bb
->flags
|= BB_IRREDUCIBLE_LOOP
;
999 FOR_EACH_EDGE (ae
, ei
, new_bb
->succs
)
1000 if ((ae
->dest
->flags
& BB_DUPLICATED
)
1001 && (ae
->src
->loop_father
== target
1002 || ae
->dest
->loop_father
== target
))
1003 ae
->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1005 for (i
= 0; i
< n
; i
++)
1006 new_bbs
[i
]->flags
&= ~BB_DUPLICATED
;
1009 /* Redirect the special edges. */
1012 redirect_edge_and_branch_force (latch_edge
, new_bbs
[0]);
1013 redirect_edge_and_branch_force (new_spec_edges
[SE_LATCH
],
1015 set_immediate_dominator (CDI_DOMINATORS
, new_bbs
[0], latch
);
1016 latch
= loop
->latch
= new_bbs
[n
- 1];
1017 e
= latch_edge
= new_spec_edges
[SE_LATCH
];
1021 redirect_edge_and_branch_force (new_spec_edges
[SE_LATCH
],
1023 redirect_edge_and_branch_force (e
, new_bbs
[0]);
1024 set_immediate_dominator (CDI_DOMINATORS
, new_bbs
[0], e
->src
);
1025 e
= new_spec_edges
[SE_LATCH
];
1028 /* Record exit edge in this copy. */
1029 if (orig
&& TEST_BIT (wont_exit
, j
+ 1))
1032 VEC_safe_push (edge
, heap
, *to_remove
, new_spec_edges
[SE_ORIG
]);
1033 set_zero_probability (new_spec_edges
[SE_ORIG
]);
1035 /* Scale the frequencies of the blocks dominated by the exit. */
1038 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale
, 0, i
, bi
)
1040 scale_bbs_frequencies_int (new_bbs
+ i
, 1, scale_after_exit
,
1046 /* Record the first copy in the control flow order if it is not
1047 the original loop (i.e. in case of peeling). */
1048 if (!first_active_latch
)
1050 memcpy (first_active
, new_bbs
, n
* sizeof (basic_block
));
1051 first_active_latch
= new_bbs
[n
- 1];
1054 /* Set counts and frequencies. */
1055 if (flags
& DLTHE_FLAG_UPDATE_FREQ
)
1057 scale_bbs_frequencies_int (new_bbs
, n
, scale_act
, REG_BR_PROB_BASE
);
1058 scale_act
= RDIV (scale_act
* scale_step
[j
], REG_BR_PROB_BASE
);
1064 /* Record the exit edge in the original loop body, and update the frequencies. */
1065 if (orig
&& TEST_BIT (wont_exit
, 0))
1068 VEC_safe_push (edge
, heap
, *to_remove
, orig
);
1069 set_zero_probability (orig
);
1071 /* Scale the frequencies of the blocks dominated by the exit. */
1074 EXECUTE_IF_SET_IN_BITMAP (bbs_to_scale
, 0, i
, bi
)
1076 scale_bbs_frequencies_int (bbs
+ i
, 1, scale_after_exit
,
1082 /* Update the original loop. */
1084 set_immediate_dominator (CDI_DOMINATORS
, e
->dest
, e
->src
);
1085 if (flags
& DLTHE_FLAG_UPDATE_FREQ
)
1087 scale_bbs_frequencies_int (bbs
, n
, scale_main
, REG_BR_PROB_BASE
);
1091 /* Update dominators of outer blocks if affected. */
1092 for (i
= 0; i
< n
; i
++)
1094 basic_block dominated
, dom_bb
, *dom_bbs
;
1100 n_dom_bbs
= get_dominated_by (CDI_DOMINATORS
, bb
, &dom_bbs
);
1101 for (j
= 0; j
< n_dom_bbs
; j
++)
1103 dominated
= dom_bbs
[j
];
1104 if (flow_bb_inside_loop_p (loop
, dominated
))
1106 dom_bb
= nearest_common_dominator (
1107 CDI_DOMINATORS
, first_active
[i
], first_active_latch
);
1108 set_immediate_dominator (CDI_DOMINATORS
, dominated
, dom_bb
);
1112 free (first_active
);
1115 BITMAP_FREE (bbs_to_scale
);
1120 /* A callback for make_forwarder block, to redirect all edges except for
1121 MFB_KJ_EDGE to the entry part. E is the edge for that we should decide
1122 whether to redirect it. */
1124 static edge mfb_kj_edge
;
1126 mfb_keep_just (edge e
)
1128 return e
!= mfb_kj_edge
;
1131 /* Creates a pre-header for a LOOP. Returns newly created block. Unless
1132 CP_SIMPLE_PREHEADERS is set in FLAGS, we only force LOOP to have single
1133 entry; otherwise we also force preheader block to have only one successor.
1134 The function also updates dominators. */
1137 create_preheader (struct loop
*loop
, int flags
)
1143 bool latch_edge_was_fallthru
;
1144 edge one_succ_pred
= 0;
1147 FOR_EACH_EDGE (e
, ei
, loop
->header
->preds
)
1149 if (e
->src
== loop
->latch
)
1151 irred
|= (e
->flags
& EDGE_IRREDUCIBLE_LOOP
) != 0;
1153 if (single_succ_p (e
->src
))
1156 gcc_assert (nentry
);
1159 /* Get an edge that is different from the one from loop->latch
1161 e
= EDGE_PRED (loop
->header
,
1162 EDGE_PRED (loop
->header
, 0)->src
== loop
->latch
);
1164 if (!(flags
& CP_SIMPLE_PREHEADERS
) || single_succ_p (e
->src
))
1168 mfb_kj_edge
= loop_latch_edge (loop
);
1169 latch_edge_was_fallthru
= (mfb_kj_edge
->flags
& EDGE_FALLTHRU
) != 0;
1170 fallthru
= make_forwarder_block (loop
->header
, mfb_keep_just
, NULL
);
1171 dummy
= fallthru
->src
;
1172 loop
->header
= fallthru
->dest
;
1174 /* Try to be clever in placing the newly created preheader. The idea is to
1175 avoid breaking any "fallthruness" relationship between blocks.
1177 The preheader was created just before the header and all incoming edges
1178 to the header were redirected to the preheader, except the latch edge.
1179 So the only problematic case is when this latch edge was a fallthru
1180 edge: it is not anymore after the preheader creation so we have broken
1181 the fallthruness. We're therefore going to look for a better place. */
1182 if (latch_edge_was_fallthru
)
1187 e
= EDGE_PRED (dummy
, 0);
1189 move_block_after (dummy
, e
->src
);
1194 dummy
->flags
|= BB_IRREDUCIBLE_LOOP
;
1195 single_succ_edge (dummy
)->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1199 fprintf (dump_file
, "Created preheader block for loop %i\n",
1205 /* Create preheaders for each loop; for meaning of FLAGS see create_preheader. */
1208 create_preheaders (int flags
)
1213 FOR_EACH_LOOP (li
, loop
, 0)
1214 create_preheader (loop
, flags
);
1215 current_loops
->state
|= LOOPS_HAVE_PREHEADERS
;
1218 /* Forces all loop latches to have only single successor. */
1221 force_single_succ_latches (void)
1227 FOR_EACH_LOOP (li
, loop
, 0)
1229 if (loop
->latch
!= loop
->header
&& single_succ_p (loop
->latch
))
1232 e
= find_edge (loop
->latch
, loop
->header
);
1236 current_loops
->state
|= LOOPS_HAVE_SIMPLE_LATCHES
;
1239 /* This function is called from loop_version. It splits the entry edge
1240 of the loop we want to version, adds the versioning condition, and
1241 adjust the edges to the two versions of the loop appropriately.
1242 e is an incoming edge. Returns the basic block containing the
1245 --- edge e ---- > [second_head]
1247 Split it and insert new conditional expression and adjust edges.
1249 --- edge e ---> [cond expr] ---> [first_head]
1251 +---------> [second_head]
1253 THEN_PROB is the probability of then branch of the condition. */
1256 lv_adjust_loop_entry_edge (basic_block first_head
, basic_block second_head
,
1257 edge e
, void *cond_expr
, unsigned then_prob
)
1259 basic_block new_head
= NULL
;
1262 gcc_assert (e
->dest
== second_head
);
1264 /* Split edge 'e'. This will create a new basic block, where we can
1265 insert conditional expr. */
1266 new_head
= split_edge (e
);
1268 lv_add_condition_to_bb (first_head
, second_head
, new_head
,
1271 /* Don't set EDGE_TRUE_VALUE in RTL mode, as it's invalid there. */
1272 e
= single_succ_edge (new_head
);
1273 e1
= make_edge (new_head
, first_head
,
1274 current_ir_type () == IR_GIMPLE
? EDGE_TRUE_VALUE
: 0);
1275 e1
->probability
= then_prob
;
1276 e
->probability
= REG_BR_PROB_BASE
- then_prob
;
1277 e1
->count
= RDIV (e
->count
* e1
->probability
, REG_BR_PROB_BASE
);
1278 e
->count
= RDIV (e
->count
* e
->probability
, REG_BR_PROB_BASE
);
1280 set_immediate_dominator (CDI_DOMINATORS
, first_head
, new_head
);
1281 set_immediate_dominator (CDI_DOMINATORS
, second_head
, new_head
);
1283 /* Adjust loop header phi nodes. */
1284 lv_adjust_loop_header_phi (first_head
, second_head
, new_head
, e1
);
1289 /* Main entry point for Loop Versioning transformation.
1291 This transformation given a condition and a loop, creates
1292 -if (condition) { loop_copy1 } else { loop_copy2 },
1293 where loop_copy1 is the loop transformed in one way, and loop_copy2
1294 is the loop transformed in another way (or unchanged). 'condition'
1295 may be a run time test for things that were not resolved by static
1296 analysis (overlapping ranges (anti-aliasing), alignment, etc.).
1298 THEN_PROB is the probability of the then edge of the if. THEN_SCALE
1299 is the ratio by that the frequencies in the original loop should
1300 be scaled. ELSE_SCALE is the ratio by that the frequencies in the
1301 new loop should be scaled.
1303 If PLACE_AFTER is true, we place the new loop after LOOP in the
1304 instruction stream, otherwise it is placed before LOOP. */
1307 loop_version (struct loop
*loop
,
1308 void *cond_expr
, basic_block
*condition_bb
,
1309 unsigned then_prob
, unsigned then_scale
, unsigned else_scale
,
1312 basic_block first_head
, second_head
;
1313 edge entry
, latch_edge
, true_edge
, false_edge
;
1316 basic_block cond_bb
;
1318 /* CHECKME: Loop versioning does not handle nested loop at this point. */
1322 /* Record entry and latch edges for the loop */
1323 entry
= loop_preheader_edge (loop
);
1324 irred_flag
= entry
->flags
& EDGE_IRREDUCIBLE_LOOP
;
1325 entry
->flags
&= ~EDGE_IRREDUCIBLE_LOOP
;
1327 /* Note down head of loop as first_head. */
1328 first_head
= entry
->dest
;
1330 /* Duplicate loop. */
1331 if (!cfg_hook_duplicate_loop_to_header_edge (loop
, entry
, 1,
1332 NULL
, NULL
, NULL
, 0))
1335 /* After duplication entry edge now points to new loop head block.
1336 Note down new head as second_head. */
1337 second_head
= entry
->dest
;
1339 /* Split loop entry edge and insert new block with cond expr. */
1340 cond_bb
= lv_adjust_loop_entry_edge (first_head
, second_head
,
1341 entry
, cond_expr
, then_prob
);
1343 *condition_bb
= cond_bb
;
1347 entry
->flags
|= irred_flag
;
1351 latch_edge
= single_succ_edge (get_bb_copy (loop
->latch
));
1353 extract_cond_bb_edges (cond_bb
, &true_edge
, &false_edge
);
1354 nloop
= loopify (latch_edge
,
1355 single_pred_edge (get_bb_copy (loop
->header
)),
1356 cond_bb
, true_edge
, false_edge
,
1357 false /* Do not redirect all edges. */,
1358 then_scale
, else_scale
);
1360 /* loopify redirected latch_edge. Update its PENDING_STMTS. */
1361 lv_flush_pending_stmts (latch_edge
);
1363 /* loopify redirected condition_bb's succ edge. Update its PENDING_STMTS. */
1364 extract_cond_bb_edges (cond_bb
, &true_edge
, &false_edge
);
1365 lv_flush_pending_stmts (false_edge
);
1366 /* Adjust irreducible flag. */
1369 cond_bb
->flags
|= BB_IRREDUCIBLE_LOOP
;
1370 loop_preheader_edge (loop
)->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1371 loop_preheader_edge (nloop
)->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1372 single_pred_edge (cond_bb
)->flags
|= EDGE_IRREDUCIBLE_LOOP
;
1377 basic_block
*bbs
= get_loop_body_in_dom_order (nloop
), after
;
1380 after
= loop
->latch
;
1382 for (i
= 0; i
< nloop
->num_nodes
; i
++)
1384 move_block_after (bbs
[i
], after
);
1390 /* At this point condition_bb is loop predheader with two successors,
1391 first_head and second_head. Make sure that loop predheader has only
1393 split_edge (loop_preheader_edge (loop
));
1394 split_edge (loop_preheader_edge (nloop
));
1399 /* The structure of loops might have changed. Some loops might get removed
1400 (and their headers and latches were set to NULL), loop exists might get
1401 removed (thus the loop nesting may be wrong), and some blocks and edges
1402 were changed (so the information about bb --> loop mapping does not have
1403 to be correct). But still for the remaining loops the header dominates
1404 the latch, and loops did not get new subloobs (new loops might possibly
1405 get created, but we are not interested in them). Fix up the mess.
1407 If CHANGED_BBS is not NULL, basic blocks whose loop has changed are
1411 fix_loop_structure (bitmap changed_bbs
)
1414 struct loop
*loop
, *ploop
;
1417 /* Remove the old bb -> loop mapping. */
1420 bb
->aux
= (void *) (size_t) bb
->loop_father
->depth
;
1421 bb
->loop_father
= current_loops
->tree_root
;
1424 /* Remove the dead loops from structures. */
1425 current_loops
->tree_root
->num_nodes
= n_basic_blocks
;
1426 FOR_EACH_LOOP (li
, loop
, 0)
1428 loop
->num_nodes
= 0;
1434 ploop
= loop
->inner
;
1435 flow_loop_tree_node_remove (ploop
);
1436 flow_loop_tree_node_add (loop
->outer
, ploop
);
1439 /* Remove the loop and free its data. */
1443 /* Rescan the bodies of loops, starting from the outermost. */
1444 FOR_EACH_LOOP (li
, loop
, 0)
1446 loop
->num_nodes
= flow_loop_nodes_find (loop
->header
, loop
);
1449 /* Now fix the loop nesting. */
1450 FOR_EACH_LOOP (li
, loop
, 0)
1452 bb
= loop_preheader_edge (loop
)->src
;
1453 if (bb
->loop_father
!= loop
->outer
)
1455 flow_loop_tree_node_remove (loop
);
1456 flow_loop_tree_node_add (bb
->loop_father
, loop
);
1460 /* Mark the blocks whose loop has changed. */
1464 && (void *) (size_t) bb
->loop_father
->depth
!= bb
->aux
)
1465 bitmap_set_bit (changed_bbs
, bb
->index
);
1470 if (current_loops
->state
& LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
)
1471 mark_irreducible_loops ();
1473 if (current_loops
->state
& LOOPS_HAVE_RECORDED_EXITS
)
1475 release_recorded_exits ();
1476 record_loop_exits ();