1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "hard-reg-set.h"
30 #include "basic-block.h"
33 #include "cfglayout.h"
38 #include "hw-doloop.h"
40 #ifdef HAVE_doloop_end
42 /* Dump information collected in LOOPS. */
44 dump_hwloops (hwloop_info loops
)
48 for (loop
= loops
; loop
; loop
= loop
->next
)
54 fprintf (dump_file
, ";; loop %d: ", loop
->loop_no
);
56 fprintf (dump_file
, "(bad) ");
57 fprintf (dump_file
, "{head:%d, depth:%d, reg:%u}",
58 loop
->head
== NULL
? -1 : loop
->head
->index
,
59 loop
->depth
, REGNO (loop
->iter_reg
));
61 fprintf (dump_file
, " blocks: [ ");
62 for (ix
= 0; VEC_iterate (basic_block
, loop
->blocks
, ix
, b
); ix
++)
63 fprintf (dump_file
, "%d ", b
->index
);
64 fprintf (dump_file
, "] ");
66 fprintf (dump_file
, " inner loops: [ ");
67 for (ix
= 0; VEC_iterate (hwloop_info
, loop
->loops
, ix
, i
); ix
++)
68 fprintf (dump_file
, "%d ", i
->loop_no
);
69 fprintf (dump_file
, "]\n");
71 fprintf (dump_file
, "\n");
74 /* Return true if BB is part of LOOP. */
76 bb_in_loop_p (hwloop_info loop
, basic_block bb
)
78 return bitmap_bit_p (loop
->block_bitmap
, bb
->index
);
81 /* Scan the blocks of LOOP (and its inferiors), and record things such as
82 hard registers set, jumps out of and within the loop. */
84 scan_loop (hwloop_info loop
)
92 if (REGNO_REG_SET_P (df_get_live_in (loop
->successor
),
93 REGNO (loop
->iter_reg
)))
94 loop
->iter_reg_used_outside
= true;
96 for (ix
= 0; VEC_iterate (basic_block
, loop
->blocks
, ix
, bb
); ix
++)
102 if (bb
!= loop
->tail
)
103 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
105 if (bb_in_loop_p (loop
, e
->dest
))
107 if (!(e
->flags
& EDGE_FALLTHRU
))
108 loop
->jumps_within
= true;
112 loop
->jumps_outof
= true;
114 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e
->dest
),
115 REGNO (loop
->iter_reg
)));
119 for (insn
= BB_HEAD (bb
);
120 insn
!= NEXT_INSN (BB_END (bb
));
121 insn
= NEXT_INSN (insn
))
124 HARD_REG_SET set_this_insn
;
126 if (!NONDEBUG_INSN_P (insn
))
129 if (recog_memoized (insn
) < 0
130 && (GET_CODE (PATTERN (insn
)) == ASM_INPUT
131 || asm_noperands (PATTERN (insn
)) >= 0))
132 loop
->has_asm
= true;
134 CLEAR_HARD_REG_SET (set_this_insn
);
135 for (def_rec
= DF_INSN_DEFS (insn
); *def_rec
; def_rec
++)
137 rtx dreg
= DF_REF_REG (*def_rec
);
142 add_to_hard_reg_set (&set_this_insn
, GET_MODE (dreg
),
146 if (insn
== loop
->loop_end
)
147 CLEAR_HARD_REG_BIT (set_this_insn
, REGNO (loop
->iter_reg
));
148 else if (reg_mentioned_p (loop
->iter_reg
, PATTERN (insn
)))
149 loop
->iter_reg_used
= true;
150 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, set_this_insn
);
155 /* Compute the incoming_dest and incoming_src members of LOOP by
156 identifying the edges that jump into the loop. If there is more
157 than one block that jumps into the loop, incoming_src will be set
158 to NULL; likewise, if there is more than one block in the loop that
159 is the destination of an incoming edge, incoming_dest will be NULL.
161 Return true if either of these two fields is nonnull, false
164 process_incoming_edges (hwloop_info loop
)
170 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
174 loop
->incoming_src
= e
->src
;
175 loop
->incoming_dest
= e
->dest
;
180 if (e
->dest
!= loop
->incoming_dest
)
181 loop
->incoming_dest
= NULL
;
182 if (e
->src
!= loop
->incoming_src
)
183 loop
->incoming_src
= NULL
;
186 if (loop
->incoming_src
== NULL
&& loop
->incoming_dest
== NULL
)
192 /* Try to identify a forwarder block that jump into LOOP, and add it to
193 the set of blocks in the loop, updating the vector of incoming blocks as
194 well. This transformation gives a second chance to loops we couldn't
195 otherwise handle by increasing the chance that we'll end up with one
197 Return true if we made a change, false otherwise. */
199 add_forwarder_blocks (hwloop_info loop
)
204 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
206 if (forwarder_block_p (e
->src
))
213 ";; Adding forwarder block %d to loop %d and retrying\n",
214 e
->src
->index
, loop
->loop_no
);
215 VEC_safe_push (basic_block
, heap
, loop
->blocks
, e
->src
);
216 bitmap_set_bit (loop
->block_bitmap
, e
->src
->index
);
217 FOR_EACH_EDGE (e2
, ei2
, e
->src
->preds
)
218 VEC_safe_push (edge
, gc
, loop
->incoming
, e2
);
219 VEC_unordered_remove (edge
, loop
->incoming
, ei
.index
);
226 /* Called from reorg_loops when a potential loop end is found. LOOP is
227 a newly set up structure describing the loop, it is this function's
228 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
229 loop_end insn and its enclosing basic block. REG is the loop counter
231 For our purposes, a loop is defined by the set of blocks reachable from
232 the loop head in which the loop counter register is live. This matches
233 the expected use; targets that call into this code usually replace the
234 loop counter with a different special register. */
236 discover_loop (hwloop_info loop
, basic_block tail_bb
, rtx tail_insn
, rtx reg
)
241 VEC (basic_block
,heap
) *works
;
243 loop
->tail
= tail_bb
;
244 loop
->loop_end
= tail_insn
;
245 loop
->iter_reg
= reg
;
246 loop
->incoming
= VEC_alloc (edge
, gc
, 2);
247 loop
->start_label
= JUMP_LABEL (tail_insn
);
249 if (EDGE_COUNT (tail_bb
->succs
) != 2)
254 loop
->head
= BRANCH_EDGE (tail_bb
)->dest
;
255 loop
->successor
= FALLTHRU_EDGE (tail_bb
)->dest
;
257 works
= VEC_alloc (basic_block
, heap
, 20);
258 VEC_safe_push (basic_block
, heap
, works
, loop
->head
);
261 for (dwork
= 0; VEC_iterate (basic_block
, works
, dwork
, bb
); dwork
++)
265 if (bb
== EXIT_BLOCK_PTR
)
267 /* We've reached the exit block. The loop must be bad. */
270 ";; Loop is bad - reached exit block while scanning\n");
275 if (bitmap_bit_p (loop
->block_bitmap
, bb
->index
))
278 /* We've not seen this block before. Add it to the loop's
279 list and then add each successor to the work list. */
281 VEC_safe_push (basic_block
, heap
, loop
->blocks
, bb
);
282 bitmap_set_bit (loop
->block_bitmap
, bb
->index
);
288 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
290 basic_block succ
= EDGE_SUCC (bb
, ei
.index
)->dest
;
291 if (REGNO_REG_SET_P (df_get_live_in (succ
),
292 REGNO (loop
->iter_reg
)))
293 VEC_safe_push (basic_block
, heap
, works
, succ
);
301 /* Find the predecessor, and make sure nothing else jumps into this loop. */
304 FOR_EACH_VEC_ELT (basic_block
, loop
->blocks
, dwork
, bb
)
308 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
310 basic_block pred
= e
->src
;
312 if (!bb_in_loop_p (loop
, pred
))
315 fprintf (dump_file
, ";; Loop %d: incoming edge %d -> %d\n",
316 loop
->loop_no
, pred
->index
,
318 VEC_safe_push (edge
, gc
, loop
->incoming
, e
);
323 if (!process_incoming_edges (loop
))
327 ";; retrying loop %d with forwarder blocks\n",
329 if (!add_forwarder_blocks (loop
))
332 fprintf (dump_file
, ";; No forwarder blocks found\n");
335 else if (!process_incoming_edges (loop
))
339 ";; can't find suitable entry for loop %d\n",
345 VEC_free (basic_block
, heap
, works
);
348 /* Analyze the structure of the loops in the current function. Use
349 STACK for bitmap allocations. Returns all the valid candidates for
350 hardware loops found in this function. HOOKS is the argument
351 passed to reorg_loops, used here to find the iteration registers
352 from a loop_end pattern. */
354 discover_loops (bitmap_obstack
*stack
, struct hw_doloop_hooks
*hooks
)
356 hwloop_info loops
= NULL
;
361 /* Find all the possible loop tails. This means searching for every
362 loop_end instruction. For each one found, create a hwloop_info
363 structure and add the head block to the work list. */
366 rtx tail
= BB_END (bb
);
369 while (tail
&& GET_CODE (tail
) == NOTE
&& tail
!= BB_HEAD (bb
))
370 tail
= PREV_INSN (tail
);
372 if (tail
== NULL_RTX
)
377 reg
= hooks
->end_pattern_reg (tail
);
381 /* A possible loop end */
383 /* There's a degenerate case we can handle - an empty loop consisting
384 of only a back branch. Handle that by deleting the branch. */
385 insn
= JUMP_LABEL (tail
);
386 while (insn
&& !NONDEBUG_INSN_P (insn
))
387 insn
= NEXT_INSN (insn
);
390 basic_block succ
= FALLTHRU_EDGE (bb
)->dest
;
393 fprintf (dump_file
, ";; degenerate loop ending at\n");
394 print_rtl_single (dump_file
, tail
);
396 if (!REGNO_REG_SET_P (df_get_live_in (succ
), REGNO (reg
)))
399 fprintf (dump_file
, ";; deleting it\n");
400 delete_insn_and_edges (tail
);
405 loop
= XCNEW (struct hwloop_info_d
);
408 loop
->loop_no
= nloops
++;
409 loop
->blocks
= VEC_alloc (basic_block
, heap
, 20);
410 loop
->block_bitmap
= BITMAP_ALLOC (stack
);
414 fprintf (dump_file
, ";; potential loop %d ending at\n",
416 print_rtl_single (dump_file
, tail
);
419 discover_loop (loop
, bb
, tail
, reg
);
422 /* Compute loop nestings. Given two loops A and B, either the sets
423 of their blocks don't intersect at all, or one is the subset of
424 the other, or the blocks don't form a good nesting structure. */
425 for (loop
= loops
; loop
; loop
= loop
->next
)
432 for (other
= loops
; other
; other
= other
->next
)
437 if (!bitmap_intersect_p (other
->block_bitmap
, loop
->block_bitmap
))
439 if (!bitmap_intersect_compl_p (other
->block_bitmap
,
441 VEC_safe_push (hwloop_info
, heap
, loop
->loops
, other
);
442 else if (!bitmap_intersect_compl_p (loop
->block_bitmap
,
443 other
->block_bitmap
))
444 VEC_safe_push (hwloop_info
, heap
, other
->loops
, loop
);
449 ";; can't find suitable nesting for loops %d and %d\n",
450 loop
->loop_no
, other
->loop_no
);
451 loop
->bad
= other
->bad
= true;
457 dump_hwloops (loops
);
462 /* Free up the loop structures in LOOPS. */
464 free_loops (hwloop_info loops
)
468 hwloop_info loop
= loops
;
470 VEC_free (hwloop_info
, heap
, loop
->loops
);
471 VEC_free (basic_block
, heap
, loop
->blocks
);
472 BITMAP_FREE (loop
->block_bitmap
);
477 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
479 /* Initialize the aux fields to give ascending indices to basic blocks. */
481 set_bb_indices (void)
488 bb
->aux
= (void *) index
++;
491 /* The taken-branch edge from the loop end can actually go forward.
492 If the target's hardware loop support requires that the loop end be
493 after the loop start, try to reorder a loop's basic blocks when we
495 This is not very aggressive; it only moves at most one block. It
496 does not introduce new branches into loops; it may remove them, or
497 it may switch fallthru/jump edges. */
499 reorder_loops (hwloop_info loops
)
504 cfg_layout_initialize (0);
508 for (loop
= loops
; loop
; loop
= loop
->next
)
516 if (BB_AUX_INDEX (loop
->head
) <= BB_AUX_INDEX (loop
->tail
))
519 FOR_EACH_EDGE (e
, ei
, loop
->head
->succs
)
521 if (bitmap_bit_p (loop
->block_bitmap
, e
->dest
->index
)
522 && BB_AUX_INDEX (e
->dest
) < BB_AUX_INDEX (loop
->tail
))
524 basic_block start_bb
= e
->dest
;
525 basic_block start_prev_bb
= start_bb
->prev_bb
;
528 fprintf (dump_file
, ";; Moving block %d before block %d\n",
529 loop
->head
->index
, start_bb
->index
);
530 loop
->head
->prev_bb
->next_bb
= loop
->head
->next_bb
;
531 loop
->head
->next_bb
->prev_bb
= loop
->head
->prev_bb
;
533 loop
->head
->prev_bb
= start_prev_bb
;
534 loop
->head
->next_bb
= start_bb
;
535 start_prev_bb
->next_bb
= start_bb
->prev_bb
= loop
->head
;
546 if (bb
->next_bb
!= EXIT_BLOCK_PTR
)
547 bb
->aux
= bb
->next_bb
;
551 cfg_layout_finalize ();
552 clear_aux_for_blocks ();
556 /* Call the OPT function for LOOP and all of its sub-loops. This is
557 done in a depth-first search; innermost loops are visited first.
558 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
559 target's reorg pass. */
561 optimize_loop (hwloop_info loop
, struct hw_doloop_hooks
*hooks
)
575 fprintf (dump_file
, ";; loop %d bad when found\n", loop
->loop_no
);
579 /* Every loop contains in its list of inner loops every loop nested inside
580 it, even if there are intermediate loops. This works because we're doing
581 a depth-first search here and never visit a loop more than once.
582 Recursion depth is effectively limited by the number of available
583 hardware registers. */
584 for (ix
= 0; VEC_iterate (hwloop_info
, loop
->loops
, ix
, inner
); ix
++)
586 optimize_loop (inner
, hooks
);
588 if (!inner
->bad
&& inner_depth
< inner
->depth
)
589 inner_depth
= inner
->depth
;
590 /* The set of registers may be changed while optimizing the inner
592 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, inner
->regs_set_in_loop
);
595 loop
->depth
= inner_depth
+ 1;
597 if (hooks
->opt (loop
))
602 fprintf (dump_file
, ";; loop %d is bad\n", loop
->loop_no
);
608 /* This function can be used from a port's machine_dependent_reorg to
609 find and analyze loops that end in loop_end instructions. It uses
610 a set of function pointers in HOOKS to call back into the
611 target-specific functions to perform the actual machine-specific
614 Such transformations typically involve additional set-up
615 instructions before the loop, to define loop bounds or set up a
616 special loop counter register.
618 DO_REORDER should be set to true if we should try to use the
619 reorder_loops function to ensure the loop end occurs after the loop
620 start. This is for use by targets where the loop hardware requires
623 HOOKS is used to pass in target specific hooks; see
626 reorg_loops (bool do_reorder
, struct hw_doloop_hooks
*hooks
)
628 hwloop_info loops
= NULL
;
630 bitmap_obstack stack
;
632 df_live_add_problem ();
633 df_live_set_all_dirty ();
636 bitmap_obstack_initialize (&stack
);
639 fprintf (dump_file
, ";; Find loops, first pass\n\n");
641 loops
= discover_loops (&stack
, hooks
);
645 reorder_loops (loops
);
649 fprintf (dump_file
, ";; Find loops, second pass\n\n");
651 loops
= discover_loops (&stack
, hooks
);
654 for (loop
= loops
; loop
; loop
= loop
->next
)
657 /* Now apply the optimizations. */
658 for (loop
= loops
; loop
; loop
= loop
->next
)
659 optimize_loop (loop
, hooks
);
663 fprintf (dump_file
, ";; After hardware loops optimization:\n\n");
664 dump_hwloops (loops
);
670 print_rtl (dump_file
, get_insns ());