1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
32 #include "hard-reg-set.h"
35 #include "statistics.h"
36 #include "double-int.h"
38 #include "fixed-value.h"
43 #include "insn-config.h"
54 #include "dominance.h"
57 #include "basic-block.h"
63 #include "hw-doloop.h"
66 #ifdef HAVE_doloop_end
68 /* Dump information collected in LOOPS. */
70 dump_hwloops (hwloop_info loops
)
74 for (loop
= loops
; loop
; loop
= loop
->next
)
80 fprintf (dump_file
, ";; loop %d: ", loop
->loop_no
);
82 fprintf (dump_file
, "(bad) ");
83 fprintf (dump_file
, "{head:%d, depth:%d, reg:%u}",
84 loop
->head
== NULL
? -1 : loop
->head
->index
,
85 loop
->depth
, REGNO (loop
->iter_reg
));
87 fprintf (dump_file
, " blocks: [ ");
88 for (ix
= 0; loop
->blocks
.iterate (ix
, &b
); ix
++)
89 fprintf (dump_file
, "%d ", b
->index
);
90 fprintf (dump_file
, "] ");
92 fprintf (dump_file
, " inner loops: [ ");
93 for (ix
= 0; loop
->loops
.iterate (ix
, &i
); ix
++)
94 fprintf (dump_file
, "%d ", i
->loop_no
);
95 fprintf (dump_file
, "]\n");
97 fprintf (dump_file
, "\n");
100 /* Return true if BB is part of LOOP. */
102 bb_in_loop_p (hwloop_info loop
, basic_block bb
)
104 return bitmap_bit_p (loop
->block_bitmap
, bb
->index
);
107 /* Scan the blocks of LOOP (and its inferiors), and record things such as
108 hard registers set, jumps out of and within the loop. */
110 scan_loop (hwloop_info loop
)
118 if (REGNO_REG_SET_P (df_get_live_in (loop
->successor
),
119 REGNO (loop
->iter_reg
)))
120 loop
->iter_reg_used_outside
= true;
122 for (ix
= 0; loop
->blocks
.iterate (ix
, &bb
); ix
++)
128 if (bb
!= loop
->tail
)
129 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
131 if (bb_in_loop_p (loop
, e
->dest
))
133 if (!(e
->flags
& EDGE_FALLTHRU
))
134 loop
->jumps_within
= true;
138 loop
->jumps_outof
= true;
140 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e
->dest
),
141 REGNO (loop
->iter_reg
)));
145 for (insn
= BB_HEAD (bb
);
146 insn
!= NEXT_INSN (BB_END (bb
));
147 insn
= NEXT_INSN (insn
))
150 HARD_REG_SET set_this_insn
;
152 if (!NONDEBUG_INSN_P (insn
))
155 if (recog_memoized (insn
) < 0
156 && (GET_CODE (PATTERN (insn
)) == ASM_INPUT
157 || asm_noperands (PATTERN (insn
)) >= 0))
158 loop
->has_asm
= true;
160 CLEAR_HARD_REG_SET (set_this_insn
);
161 FOR_EACH_INSN_DEF (def
, insn
)
163 rtx dreg
= DF_REF_REG (def
);
168 add_to_hard_reg_set (&set_this_insn
, GET_MODE (dreg
),
172 if (insn
== loop
->loop_end
)
173 CLEAR_HARD_REG_BIT (set_this_insn
, REGNO (loop
->iter_reg
));
174 else if (reg_mentioned_p (loop
->iter_reg
, PATTERN (insn
)))
175 loop
->iter_reg_used
= true;
176 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, set_this_insn
);
181 /* Compute the incoming_dest and incoming_src members of LOOP by
182 identifying the edges that jump into the loop. If there is more
183 than one block that jumps into the loop, incoming_src will be set
184 to NULL; likewise, if there is more than one block in the loop that
185 is the destination of an incoming edge, incoming_dest will be NULL.
187 Return true if either of these two fields is nonnull, false
190 process_incoming_edges (hwloop_info loop
)
196 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
200 loop
->incoming_src
= e
->src
;
201 loop
->incoming_dest
= e
->dest
;
206 if (e
->dest
!= loop
->incoming_dest
)
207 loop
->incoming_dest
= NULL
;
208 if (e
->src
!= loop
->incoming_src
)
209 loop
->incoming_src
= NULL
;
212 if (loop
->incoming_src
== NULL
&& loop
->incoming_dest
== NULL
)
218 /* Try to identify a forwarder block that jump into LOOP, and add it to
219 the set of blocks in the loop, updating the vector of incoming blocks as
220 well. This transformation gives a second chance to loops we couldn't
221 otherwise handle by increasing the chance that we'll end up with one
223 Return true if we made a change, false otherwise. */
225 add_forwarder_blocks (hwloop_info loop
)
230 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
232 if (forwarder_block_p (e
->src
))
239 ";; Adding forwarder block %d to loop %d and retrying\n",
240 e
->src
->index
, loop
->loop_no
);
241 loop
->blocks
.safe_push (e
->src
);
242 bitmap_set_bit (loop
->block_bitmap
, e
->src
->index
);
243 FOR_EACH_EDGE (e2
, ei2
, e
->src
->preds
)
244 vec_safe_push (loop
->incoming
, e2
);
245 loop
->incoming
->unordered_remove (ei
.index
);
252 /* Called from reorg_loops when a potential loop end is found. LOOP is
253 a newly set up structure describing the loop, it is this function's
254 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
255 loop_end insn and its enclosing basic block. REG is the loop counter
257 For our purposes, a loop is defined by the set of blocks reachable from
258 the loop head in which the loop counter register is live. This matches
259 the expected use; targets that call into this code usually replace the
260 loop counter with a different special register. */
262 discover_loop (hwloop_info loop
, basic_block tail_bb
, rtx_insn
*tail_insn
, rtx reg
)
268 loop
->tail
= tail_bb
;
269 loop
->loop_end
= tail_insn
;
270 loop
->iter_reg
= reg
;
271 vec_alloc (loop
->incoming
, 2);
272 loop
->start_label
= as_a
<rtx_insn
*> (JUMP_LABEL (tail_insn
));
274 if (EDGE_COUNT (tail_bb
->succs
) != 2)
279 loop
->head
= BRANCH_EDGE (tail_bb
)->dest
;
280 loop
->successor
= FALLTHRU_EDGE (tail_bb
)->dest
;
282 auto_vec
<basic_block
, 20> works
;
283 works
.safe_push (loop
->head
);
286 for (dwork
= 0; works
.iterate (dwork
, &bb
); dwork
++)
290 if (bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
292 /* We've reached the exit block. The loop must be bad. */
295 ";; Loop is bad - reached exit block while scanning\n");
300 if (bitmap_bit_p (loop
->block_bitmap
, bb
->index
))
303 /* We've not seen this block before. Add it to the loop's
304 list and then add each successor to the work list. */
306 loop
->blocks
.safe_push (bb
);
307 bitmap_set_bit (loop
->block_bitmap
, bb
->index
);
313 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
315 basic_block succ
= EDGE_SUCC (bb
, ei
.index
)->dest
;
316 if (REGNO_REG_SET_P (df_get_live_in (succ
),
317 REGNO (loop
->iter_reg
)))
318 works
.safe_push (succ
);
326 /* Find the predecessor, and make sure nothing else jumps into this loop. */
329 FOR_EACH_VEC_ELT (loop
->blocks
, dwork
, bb
)
333 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
335 basic_block pred
= e
->src
;
337 if (!bb_in_loop_p (loop
, pred
))
340 fprintf (dump_file
, ";; Loop %d: incoming edge %d -> %d\n",
341 loop
->loop_no
, pred
->index
,
343 vec_safe_push (loop
->incoming
, e
);
348 if (!process_incoming_edges (loop
))
352 ";; retrying loop %d with forwarder blocks\n",
354 if (!add_forwarder_blocks (loop
))
357 fprintf (dump_file
, ";; No forwarder blocks found\n");
360 else if (!process_incoming_edges (loop
))
364 ";; can't find suitable entry for loop %d\n",
371 /* Analyze the structure of the loops in the current function. Use
372 LOOP_STACK for bitmap allocations. Returns all the valid candidates for
373 hardware loops found in this function. HOOKS is the argument
374 passed to reorg_loops, used here to find the iteration registers
375 from a loop_end pattern. */
377 discover_loops (bitmap_obstack
*loop_stack
, struct hw_doloop_hooks
*hooks
)
379 hwloop_info loops
= NULL
;
384 /* Find all the possible loop tails. This means searching for every
385 loop_end instruction. For each one found, create a hwloop_info
386 structure and add the head block to the work list. */
387 FOR_EACH_BB_FN (bb
, cfun
)
389 rtx_insn
*tail
= BB_END (bb
);
393 while (tail
&& NOTE_P (tail
) && tail
!= BB_HEAD (bb
))
394 tail
= PREV_INSN (tail
);
396 if (tail
== NULL_RTX
)
401 reg
= hooks
->end_pattern_reg (tail
);
405 /* A possible loop end */
407 /* There's a degenerate case we can handle - an empty loop consisting
408 of only a back branch. Handle that by deleting the branch. */
409 insn
= JUMP_LABEL_AS_INSN (tail
);
410 while (insn
&& !NONDEBUG_INSN_P (insn
))
411 insn
= NEXT_INSN (insn
);
414 basic_block succ
= FALLTHRU_EDGE (bb
)->dest
;
417 fprintf (dump_file
, ";; degenerate loop ending at\n");
418 print_rtl_single (dump_file
, tail
);
420 if (!REGNO_REG_SET_P (df_get_live_in (succ
), REGNO (reg
)))
423 fprintf (dump_file
, ";; deleting it\n");
424 delete_insn_and_edges (tail
);
429 loop
= XCNEW (struct hwloop_info_d
);
432 loop
->loop_no
= nloops
++;
433 loop
->blocks
.create (20);
434 loop
->block_bitmap
= BITMAP_ALLOC (loop_stack
);
438 fprintf (dump_file
, ";; potential loop %d ending at\n",
440 print_rtl_single (dump_file
, tail
);
443 discover_loop (loop
, bb
, tail
, reg
);
446 /* Compute loop nestings. Given two loops A and B, either the sets
447 of their blocks don't intersect at all, or one is the subset of
448 the other, or the blocks don't form a good nesting structure. */
449 for (loop
= loops
; loop
; loop
= loop
->next
)
456 for (other
= loops
; other
; other
= other
->next
)
461 if (!bitmap_intersect_p (other
->block_bitmap
, loop
->block_bitmap
))
463 if (!bitmap_intersect_compl_p (other
->block_bitmap
,
465 loop
->loops
.safe_push (other
);
466 else if (!bitmap_intersect_compl_p (loop
->block_bitmap
,
467 other
->block_bitmap
))
468 other
->loops
.safe_push (loop
);
473 ";; can't find suitable nesting for loops %d and %d\n",
474 loop
->loop_no
, other
->loop_no
);
475 loop
->bad
= other
->bad
= true;
481 dump_hwloops (loops
);
486 /* Free up the loop structures in LOOPS. */
488 free_loops (hwloop_info loops
)
492 hwloop_info loop
= loops
;
494 loop
->loops
.release ();
495 loop
->blocks
.release ();
496 BITMAP_FREE (loop
->block_bitmap
);
501 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
503 /* Initialize the aux fields to give ascending indices to basic blocks. */
505 set_bb_indices (void)
511 FOR_EACH_BB_FN (bb
, cfun
)
512 bb
->aux
= (void *) index
++;
515 /* The taken-branch edge from the loop end can actually go forward.
516 If the target's hardware loop support requires that the loop end be
517 after the loop start, try to reorder a loop's basic blocks when we
519 This is not very aggressive; it only moves at most one block. It
520 does not introduce new branches into loops; it may remove them, or
521 it may switch fallthru/jump edges. */
523 reorder_loops (hwloop_info loops
)
528 cfg_layout_initialize (0);
532 for (loop
= loops
; loop
; loop
= loop
->next
)
540 if (BB_AUX_INDEX (loop
->head
) <= BB_AUX_INDEX (loop
->tail
))
543 FOR_EACH_EDGE (e
, ei
, loop
->head
->succs
)
545 if (bitmap_bit_p (loop
->block_bitmap
, e
->dest
->index
)
546 && BB_AUX_INDEX (e
->dest
) < BB_AUX_INDEX (loop
->tail
))
548 basic_block start_bb
= e
->dest
;
549 basic_block start_prev_bb
= start_bb
->prev_bb
;
552 fprintf (dump_file
, ";; Moving block %d before block %d\n",
553 loop
->head
->index
, start_bb
->index
);
554 loop
->head
->prev_bb
->next_bb
= loop
->head
->next_bb
;
555 loop
->head
->next_bb
->prev_bb
= loop
->head
->prev_bb
;
557 loop
->head
->prev_bb
= start_prev_bb
;
558 loop
->head
->next_bb
= start_bb
;
559 start_prev_bb
->next_bb
= start_bb
->prev_bb
= loop
->head
;
568 FOR_EACH_BB_FN (bb
, cfun
)
570 if (bb
->next_bb
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
571 bb
->aux
= bb
->next_bb
;
575 cfg_layout_finalize ();
576 clear_aux_for_blocks ();
580 /* Call the OPT function for LOOP and all of its sub-loops. This is
581 done in a depth-first search; innermost loops are visited first.
582 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
583 target's reorg pass. */
585 optimize_loop (hwloop_info loop
, struct hw_doloop_hooks
*hooks
)
599 fprintf (dump_file
, ";; loop %d bad when found\n", loop
->loop_no
);
603 /* Every loop contains in its list of inner loops every loop nested inside
604 it, even if there are intermediate loops. This works because we're doing
605 a depth-first search here and never visit a loop more than once.
606 Recursion depth is effectively limited by the number of available
607 hardware registers. */
608 for (ix
= 0; loop
->loops
.iterate (ix
, &inner
); ix
++)
610 optimize_loop (inner
, hooks
);
612 if (!inner
->bad
&& inner_depth
< inner
->depth
)
613 inner_depth
= inner
->depth
;
614 /* The set of registers may be changed while optimizing the inner
616 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, inner
->regs_set_in_loop
);
619 loop
->depth
= inner_depth
+ 1;
621 if (hooks
->opt (loop
))
626 fprintf (dump_file
, ";; loop %d is bad\n", loop
->loop_no
);
632 /* This function can be used from a port's machine_dependent_reorg to
633 find and analyze loops that end in loop_end instructions. It uses
634 a set of function pointers in HOOKS to call back into the
635 target-specific functions to perform the actual machine-specific
638 Such transformations typically involve additional set-up
639 instructions before the loop, to define loop bounds or set up a
640 special loop counter register.
642 DO_REORDER should be set to true if we should try to use the
643 reorder_loops function to ensure the loop end occurs after the loop
644 start. This is for use by targets where the loop hardware requires
647 HOOKS is used to pass in target specific hooks; see
650 reorg_loops (bool do_reorder
, struct hw_doloop_hooks
*hooks
)
652 hwloop_info loops
= NULL
;
654 bitmap_obstack loop_stack
;
656 df_live_add_problem ();
657 df_live_set_all_dirty ();
660 bitmap_obstack_initialize (&loop_stack
);
663 fprintf (dump_file
, ";; Find loops, first pass\n\n");
665 loops
= discover_loops (&loop_stack
, hooks
);
667 /* We can't enter cfglayout mode anymore if basic block partitioning
669 if (do_reorder
&& !flag_reorder_blocks_and_partition
)
671 reorder_loops (loops
);
675 fprintf (dump_file
, ";; Find loops, second pass\n\n");
677 loops
= discover_loops (&loop_stack
, hooks
);
680 for (loop
= loops
; loop
; loop
= loop
->next
)
683 /* Now apply the optimizations. */
684 for (loop
= loops
; loop
; loop
= loop
->next
)
685 optimize_loop (loop
, hooks
);
689 fprintf (dump_file
, ";; After hardware loops optimization:\n\n");
690 dump_hwloops (loops
);
694 bitmap_obstack_release (&loop_stack
);
697 print_rtl (dump_file
, get_insns ());