1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011-2014 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "hard-reg-set.h"
30 #include "basic-block.h"
36 #include "hw-doloop.h"
39 #ifdef HAVE_doloop_end
41 /* Dump information collected in LOOPS. */
43 dump_hwloops (hwloop_info loops
)
47 for (loop
= loops
; loop
; loop
= loop
->next
)
53 fprintf (dump_file
, ";; loop %d: ", loop
->loop_no
);
55 fprintf (dump_file
, "(bad) ");
56 fprintf (dump_file
, "{head:%d, depth:%d, reg:%u}",
57 loop
->head
== NULL
? -1 : loop
->head
->index
,
58 loop
->depth
, REGNO (loop
->iter_reg
));
60 fprintf (dump_file
, " blocks: [ ");
61 for (ix
= 0; loop
->blocks
.iterate (ix
, &b
); ix
++)
62 fprintf (dump_file
, "%d ", b
->index
);
63 fprintf (dump_file
, "] ");
65 fprintf (dump_file
, " inner loops: [ ");
66 for (ix
= 0; loop
->loops
.iterate (ix
, &i
); ix
++)
67 fprintf (dump_file
, "%d ", i
->loop_no
);
68 fprintf (dump_file
, "]\n");
70 fprintf (dump_file
, "\n");
73 /* Return true if BB is part of LOOP. */
75 bb_in_loop_p (hwloop_info loop
, basic_block bb
)
77 return bitmap_bit_p (loop
->block_bitmap
, bb
->index
);
80 /* Scan the blocks of LOOP (and its inferiors), and record things such as
81 hard registers set, jumps out of and within the loop. */
83 scan_loop (hwloop_info loop
)
91 if (REGNO_REG_SET_P (df_get_live_in (loop
->successor
),
92 REGNO (loop
->iter_reg
)))
93 loop
->iter_reg_used_outside
= true;
95 for (ix
= 0; loop
->blocks
.iterate (ix
, &bb
); ix
++)
101 if (bb
!= loop
->tail
)
102 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
104 if (bb_in_loop_p (loop
, e
->dest
))
106 if (!(e
->flags
& EDGE_FALLTHRU
))
107 loop
->jumps_within
= true;
111 loop
->jumps_outof
= true;
113 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e
->dest
),
114 REGNO (loop
->iter_reg
)));
118 for (insn
= BB_HEAD (bb
);
119 insn
!= NEXT_INSN (BB_END (bb
));
120 insn
= NEXT_INSN (insn
))
123 HARD_REG_SET set_this_insn
;
125 if (!NONDEBUG_INSN_P (insn
))
128 if (recog_memoized (insn
) < 0
129 && (GET_CODE (PATTERN (insn
)) == ASM_INPUT
130 || asm_noperands (PATTERN (insn
)) >= 0))
131 loop
->has_asm
= true;
133 CLEAR_HARD_REG_SET (set_this_insn
);
134 FOR_EACH_INSN_DEF (def
, insn
)
136 rtx dreg
= DF_REF_REG (def
);
141 add_to_hard_reg_set (&set_this_insn
, GET_MODE (dreg
),
145 if (insn
== loop
->loop_end
)
146 CLEAR_HARD_REG_BIT (set_this_insn
, REGNO (loop
->iter_reg
));
147 else if (reg_mentioned_p (loop
->iter_reg
, PATTERN (insn
)))
148 loop
->iter_reg_used
= true;
149 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, set_this_insn
);
154 /* Compute the incoming_dest and incoming_src members of LOOP by
155 identifying the edges that jump into the loop. If there is more
156 than one block that jumps into the loop, incoming_src will be set
157 to NULL; likewise, if there is more than one block in the loop that
158 is the destination of an incoming edge, incoming_dest will be NULL.
160 Return true if either of these two fields is nonnull, false
163 process_incoming_edges (hwloop_info loop
)
169 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
173 loop
->incoming_src
= e
->src
;
174 loop
->incoming_dest
= e
->dest
;
179 if (e
->dest
!= loop
->incoming_dest
)
180 loop
->incoming_dest
= NULL
;
181 if (e
->src
!= loop
->incoming_src
)
182 loop
->incoming_src
= NULL
;
185 if (loop
->incoming_src
== NULL
&& loop
->incoming_dest
== NULL
)
191 /* Try to identify a forwarder block that jump into LOOP, and add it to
192 the set of blocks in the loop, updating the vector of incoming blocks as
193 well. This transformation gives a second chance to loops we couldn't
194 otherwise handle by increasing the chance that we'll end up with one
196 Return true if we made a change, false otherwise. */
198 add_forwarder_blocks (hwloop_info loop
)
203 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
205 if (forwarder_block_p (e
->src
))
212 ";; Adding forwarder block %d to loop %d and retrying\n",
213 e
->src
->index
, loop
->loop_no
);
214 loop
->blocks
.safe_push (e
->src
);
215 bitmap_set_bit (loop
->block_bitmap
, e
->src
->index
);
216 FOR_EACH_EDGE (e2
, ei2
, e
->src
->preds
)
217 vec_safe_push (loop
->incoming
, e2
);
218 loop
->incoming
->unordered_remove (ei
.index
);
225 /* Called from reorg_loops when a potential loop end is found. LOOP is
226 a newly set up structure describing the loop, it is this function's
227 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
228 loop_end insn and its enclosing basic block. REG is the loop counter
230 For our purposes, a loop is defined by the set of blocks reachable from
231 the loop head in which the loop counter register is live. This matches
232 the expected use; targets that call into this code usually replace the
233 loop counter with a different special register. */
235 discover_loop (hwloop_info loop
, basic_block tail_bb
, rtx_insn
*tail_insn
, rtx reg
)
241 loop
->tail
= tail_bb
;
242 loop
->loop_end
= tail_insn
;
243 loop
->iter_reg
= reg
;
244 vec_alloc (loop
->incoming
, 2);
245 loop
->start_label
= as_a
<rtx_insn
*> (JUMP_LABEL (tail_insn
));
247 if (EDGE_COUNT (tail_bb
->succs
) != 2)
252 loop
->head
= BRANCH_EDGE (tail_bb
)->dest
;
253 loop
->successor
= FALLTHRU_EDGE (tail_bb
)->dest
;
255 auto_vec
<basic_block
, 20> works
;
256 works
.safe_push (loop
->head
);
259 for (dwork
= 0; works
.iterate (dwork
, &bb
); dwork
++)
263 if (bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
265 /* We've reached the exit block. The loop must be bad. */
268 ";; Loop is bad - reached exit block while scanning\n");
273 if (bitmap_bit_p (loop
->block_bitmap
, bb
->index
))
276 /* We've not seen this block before. Add it to the loop's
277 list and then add each successor to the work list. */
279 loop
->blocks
.safe_push (bb
);
280 bitmap_set_bit (loop
->block_bitmap
, bb
->index
);
286 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
288 basic_block succ
= EDGE_SUCC (bb
, ei
.index
)->dest
;
289 if (REGNO_REG_SET_P (df_get_live_in (succ
),
290 REGNO (loop
->iter_reg
)))
291 works
.safe_push (succ
);
299 /* Find the predecessor, and make sure nothing else jumps into this loop. */
302 FOR_EACH_VEC_ELT (loop
->blocks
, dwork
, bb
)
306 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
308 basic_block pred
= e
->src
;
310 if (!bb_in_loop_p (loop
, pred
))
313 fprintf (dump_file
, ";; Loop %d: incoming edge %d -> %d\n",
314 loop
->loop_no
, pred
->index
,
316 vec_safe_push (loop
->incoming
, e
);
321 if (!process_incoming_edges (loop
))
325 ";; retrying loop %d with forwarder blocks\n",
327 if (!add_forwarder_blocks (loop
))
330 fprintf (dump_file
, ";; No forwarder blocks found\n");
333 else if (!process_incoming_edges (loop
))
337 ";; can't find suitable entry for loop %d\n",
344 /* Analyze the structure of the loops in the current function. Use
345 LOOP_STACK for bitmap allocations. Returns all the valid candidates for
346 hardware loops found in this function. HOOKS is the argument
347 passed to reorg_loops, used here to find the iteration registers
348 from a loop_end pattern. */
350 discover_loops (bitmap_obstack
*loop_stack
, struct hw_doloop_hooks
*hooks
)
352 hwloop_info loops
= NULL
;
357 /* Find all the possible loop tails. This means searching for every
358 loop_end instruction. For each one found, create a hwloop_info
359 structure and add the head block to the work list. */
360 FOR_EACH_BB_FN (bb
, cfun
)
362 rtx_insn
*tail
= BB_END (bb
);
366 while (tail
&& NOTE_P (tail
) && tail
!= BB_HEAD (bb
))
367 tail
= PREV_INSN (tail
);
369 if (tail
== NULL_RTX
)
374 reg
= hooks
->end_pattern_reg (tail
);
378 /* A possible loop end */
380 /* There's a degenerate case we can handle - an empty loop consisting
381 of only a back branch. Handle that by deleting the branch. */
382 insn
= JUMP_LABEL_AS_INSN (tail
);
383 while (insn
&& !NONDEBUG_INSN_P (insn
))
384 insn
= NEXT_INSN (insn
);
387 basic_block succ
= FALLTHRU_EDGE (bb
)->dest
;
390 fprintf (dump_file
, ";; degenerate loop ending at\n");
391 print_rtl_single (dump_file
, tail
);
393 if (!REGNO_REG_SET_P (df_get_live_in (succ
), REGNO (reg
)))
396 fprintf (dump_file
, ";; deleting it\n");
397 delete_insn_and_edges (tail
);
402 loop
= XCNEW (struct hwloop_info_d
);
405 loop
->loop_no
= nloops
++;
406 loop
->blocks
.create (20);
407 loop
->block_bitmap
= BITMAP_ALLOC (loop_stack
);
411 fprintf (dump_file
, ";; potential loop %d ending at\n",
413 print_rtl_single (dump_file
, tail
);
416 discover_loop (loop
, bb
, tail
, reg
);
419 /* Compute loop nestings. Given two loops A and B, either the sets
420 of their blocks don't intersect at all, or one is the subset of
421 the other, or the blocks don't form a good nesting structure. */
422 for (loop
= loops
; loop
; loop
= loop
->next
)
429 for (other
= loops
; other
; other
= other
->next
)
434 if (!bitmap_intersect_p (other
->block_bitmap
, loop
->block_bitmap
))
436 if (!bitmap_intersect_compl_p (other
->block_bitmap
,
438 loop
->loops
.safe_push (other
);
439 else if (!bitmap_intersect_compl_p (loop
->block_bitmap
,
440 other
->block_bitmap
))
441 other
->loops
.safe_push (loop
);
446 ";; can't find suitable nesting for loops %d and %d\n",
447 loop
->loop_no
, other
->loop_no
);
448 loop
->bad
= other
->bad
= true;
454 dump_hwloops (loops
);
459 /* Free up the loop structures in LOOPS. */
461 free_loops (hwloop_info loops
)
465 hwloop_info loop
= loops
;
467 loop
->loops
.release ();
468 loop
->blocks
.release ();
469 BITMAP_FREE (loop
->block_bitmap
);
474 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
476 /* Initialize the aux fields to give ascending indices to basic blocks. */
478 set_bb_indices (void)
484 FOR_EACH_BB_FN (bb
, cfun
)
485 bb
->aux
= (void *) index
++;
488 /* The taken-branch edge from the loop end can actually go forward.
489 If the target's hardware loop support requires that the loop end be
490 after the loop start, try to reorder a loop's basic blocks when we
492 This is not very aggressive; it only moves at most one block. It
493 does not introduce new branches into loops; it may remove them, or
494 it may switch fallthru/jump edges. */
496 reorder_loops (hwloop_info loops
)
501 cfg_layout_initialize (0);
505 for (loop
= loops
; loop
; loop
= loop
->next
)
513 if (BB_AUX_INDEX (loop
->head
) <= BB_AUX_INDEX (loop
->tail
))
516 FOR_EACH_EDGE (e
, ei
, loop
->head
->succs
)
518 if (bitmap_bit_p (loop
->block_bitmap
, e
->dest
->index
)
519 && BB_AUX_INDEX (e
->dest
) < BB_AUX_INDEX (loop
->tail
))
521 basic_block start_bb
= e
->dest
;
522 basic_block start_prev_bb
= start_bb
->prev_bb
;
525 fprintf (dump_file
, ";; Moving block %d before block %d\n",
526 loop
->head
->index
, start_bb
->index
);
527 loop
->head
->prev_bb
->next_bb
= loop
->head
->next_bb
;
528 loop
->head
->next_bb
->prev_bb
= loop
->head
->prev_bb
;
530 loop
->head
->prev_bb
= start_prev_bb
;
531 loop
->head
->next_bb
= start_bb
;
532 start_prev_bb
->next_bb
= start_bb
->prev_bb
= loop
->head
;
541 FOR_EACH_BB_FN (bb
, cfun
)
543 if (bb
->next_bb
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
544 bb
->aux
= bb
->next_bb
;
548 cfg_layout_finalize ();
549 clear_aux_for_blocks ();
553 /* Call the OPT function for LOOP and all of its sub-loops. This is
554 done in a depth-first search; innermost loops are visited first.
555 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
556 target's reorg pass. */
558 optimize_loop (hwloop_info loop
, struct hw_doloop_hooks
*hooks
)
572 fprintf (dump_file
, ";; loop %d bad when found\n", loop
->loop_no
);
576 /* Every loop contains in its list of inner loops every loop nested inside
577 it, even if there are intermediate loops. This works because we're doing
578 a depth-first search here and never visit a loop more than once.
579 Recursion depth is effectively limited by the number of available
580 hardware registers. */
581 for (ix
= 0; loop
->loops
.iterate (ix
, &inner
); ix
++)
583 optimize_loop (inner
, hooks
);
585 if (!inner
->bad
&& inner_depth
< inner
->depth
)
586 inner_depth
= inner
->depth
;
587 /* The set of registers may be changed while optimizing the inner
589 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, inner
->regs_set_in_loop
);
592 loop
->depth
= inner_depth
+ 1;
594 if (hooks
->opt (loop
))
599 fprintf (dump_file
, ";; loop %d is bad\n", loop
->loop_no
);
605 /* This function can be used from a port's machine_dependent_reorg to
606 find and analyze loops that end in loop_end instructions. It uses
607 a set of function pointers in HOOKS to call back into the
608 target-specific functions to perform the actual machine-specific
611 Such transformations typically involve additional set-up
612 instructions before the loop, to define loop bounds or set up a
613 special loop counter register.
615 DO_REORDER should be set to true if we should try to use the
616 reorder_loops function to ensure the loop end occurs after the loop
617 start. This is for use by targets where the loop hardware requires
620 HOOKS is used to pass in target specific hooks; see
623 reorg_loops (bool do_reorder
, struct hw_doloop_hooks
*hooks
)
625 hwloop_info loops
= NULL
;
627 bitmap_obstack loop_stack
;
629 df_live_add_problem ();
630 df_live_set_all_dirty ();
633 bitmap_obstack_initialize (&loop_stack
);
636 fprintf (dump_file
, ";; Find loops, first pass\n\n");
638 loops
= discover_loops (&loop_stack
, hooks
);
640 /* We can't enter cfglayout mode anymore if basic block partitioning
642 if (do_reorder
&& !flag_reorder_blocks_and_partition
)
644 reorder_loops (loops
);
648 fprintf (dump_file
, ";; Find loops, second pass\n\n");
650 loops
= discover_loops (&loop_stack
, hooks
);
653 for (loop
= loops
; loop
; loop
= loop
->next
)
656 /* Now apply the optimizations. */
657 for (loop
= loops
; loop
; loop
= loop
->next
)
658 optimize_loop (loop
, hooks
);
662 fprintf (dump_file
, ";; After hardware loops optimization:\n\n");
663 dump_hwloops (loops
);
667 bitmap_obstack_release (&loop_stack
);
670 print_rtl (dump_file
, get_insns ());