1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "hard-reg-set.h"
33 #include "insn-config.h"
44 #include "dominance.h"
47 #include "basic-block.h"
53 #include "hw-doloop.h"
56 #ifdef HAVE_doloop_end
58 /* Dump information collected in LOOPS. */
60 dump_hwloops (hwloop_info loops
)
64 for (loop
= loops
; loop
; loop
= loop
->next
)
70 fprintf (dump_file
, ";; loop %d: ", loop
->loop_no
);
72 fprintf (dump_file
, "(bad) ");
73 fprintf (dump_file
, "{head:%d, depth:%d, reg:%u}",
74 loop
->head
== NULL
? -1 : loop
->head
->index
,
75 loop
->depth
, REGNO (loop
->iter_reg
));
77 fprintf (dump_file
, " blocks: [ ");
78 for (ix
= 0; loop
->blocks
.iterate (ix
, &b
); ix
++)
79 fprintf (dump_file
, "%d ", b
->index
);
80 fprintf (dump_file
, "] ");
82 fprintf (dump_file
, " inner loops: [ ");
83 for (ix
= 0; loop
->loops
.iterate (ix
, &i
); ix
++)
84 fprintf (dump_file
, "%d ", i
->loop_no
);
85 fprintf (dump_file
, "]\n");
87 fprintf (dump_file
, "\n");
90 /* Return true if BB is part of LOOP. */
92 bb_in_loop_p (hwloop_info loop
, basic_block bb
)
94 return bitmap_bit_p (loop
->block_bitmap
, bb
->index
);
97 /* Scan the blocks of LOOP (and its inferiors), and record things such as
98 hard registers set, jumps out of and within the loop. */
100 scan_loop (hwloop_info loop
)
108 if (REGNO_REG_SET_P (df_get_live_in (loop
->successor
),
109 REGNO (loop
->iter_reg
)))
110 loop
->iter_reg_used_outside
= true;
112 for (ix
= 0; loop
->blocks
.iterate (ix
, &bb
); ix
++)
118 if (bb
!= loop
->tail
)
119 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
121 if (bb_in_loop_p (loop
, e
->dest
))
123 if (!(e
->flags
& EDGE_FALLTHRU
))
124 loop
->jumps_within
= true;
128 loop
->jumps_outof
= true;
130 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e
->dest
),
131 REGNO (loop
->iter_reg
)));
135 for (insn
= BB_HEAD (bb
);
136 insn
!= NEXT_INSN (BB_END (bb
));
137 insn
= NEXT_INSN (insn
))
140 HARD_REG_SET set_this_insn
;
142 if (!NONDEBUG_INSN_P (insn
))
145 if (recog_memoized (insn
) < 0
146 && (GET_CODE (PATTERN (insn
)) == ASM_INPUT
147 || asm_noperands (PATTERN (insn
)) >= 0))
148 loop
->has_asm
= true;
150 CLEAR_HARD_REG_SET (set_this_insn
);
151 FOR_EACH_INSN_DEF (def
, insn
)
153 rtx dreg
= DF_REF_REG (def
);
158 add_to_hard_reg_set (&set_this_insn
, GET_MODE (dreg
),
162 if (insn
== loop
->loop_end
)
163 CLEAR_HARD_REG_BIT (set_this_insn
, REGNO (loop
->iter_reg
));
164 else if (reg_mentioned_p (loop
->iter_reg
, PATTERN (insn
)))
165 loop
->iter_reg_used
= true;
166 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, set_this_insn
);
171 /* Compute the incoming_dest and incoming_src members of LOOP by
172 identifying the edges that jump into the loop. If there is more
173 than one block that jumps into the loop, incoming_src will be set
174 to NULL; likewise, if there is more than one block in the loop that
175 is the destination of an incoming edge, incoming_dest will be NULL.
177 Return true if either of these two fields is nonnull, false
180 process_incoming_edges (hwloop_info loop
)
186 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
190 loop
->incoming_src
= e
->src
;
191 loop
->incoming_dest
= e
->dest
;
196 if (e
->dest
!= loop
->incoming_dest
)
197 loop
->incoming_dest
= NULL
;
198 if (e
->src
!= loop
->incoming_src
)
199 loop
->incoming_src
= NULL
;
202 if (loop
->incoming_src
== NULL
&& loop
->incoming_dest
== NULL
)
208 /* Try to identify a forwarder block that jump into LOOP, and add it to
209 the set of blocks in the loop, updating the vector of incoming blocks as
210 well. This transformation gives a second chance to loops we couldn't
211 otherwise handle by increasing the chance that we'll end up with one
213 Return true if we made a change, false otherwise. */
215 add_forwarder_blocks (hwloop_info loop
)
220 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
222 if (forwarder_block_p (e
->src
))
229 ";; Adding forwarder block %d to loop %d and retrying\n",
230 e
->src
->index
, loop
->loop_no
);
231 loop
->blocks
.safe_push (e
->src
);
232 bitmap_set_bit (loop
->block_bitmap
, e
->src
->index
);
233 FOR_EACH_EDGE (e2
, ei2
, e
->src
->preds
)
234 vec_safe_push (loop
->incoming
, e2
);
235 loop
->incoming
->unordered_remove (ei
.index
);
242 /* Called from reorg_loops when a potential loop end is found. LOOP is
243 a newly set up structure describing the loop, it is this function's
244 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
245 loop_end insn and its enclosing basic block. REG is the loop counter
247 For our purposes, a loop is defined by the set of blocks reachable from
248 the loop head in which the loop counter register is live. This matches
249 the expected use; targets that call into this code usually replace the
250 loop counter with a different special register. */
252 discover_loop (hwloop_info loop
, basic_block tail_bb
, rtx_insn
*tail_insn
, rtx reg
)
258 loop
->tail
= tail_bb
;
259 loop
->loop_end
= tail_insn
;
260 loop
->iter_reg
= reg
;
261 vec_alloc (loop
->incoming
, 2);
262 loop
->start_label
= as_a
<rtx_insn
*> (JUMP_LABEL (tail_insn
));
264 if (EDGE_COUNT (tail_bb
->succs
) != 2)
269 loop
->head
= BRANCH_EDGE (tail_bb
)->dest
;
270 loop
->successor
= FALLTHRU_EDGE (tail_bb
)->dest
;
272 auto_vec
<basic_block
, 20> works
;
273 works
.safe_push (loop
->head
);
276 for (dwork
= 0; works
.iterate (dwork
, &bb
); dwork
++)
280 if (bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
282 /* We've reached the exit block. The loop must be bad. */
285 ";; Loop is bad - reached exit block while scanning\n");
290 if (bitmap_bit_p (loop
->block_bitmap
, bb
->index
))
293 /* We've not seen this block before. Add it to the loop's
294 list and then add each successor to the work list. */
296 loop
->blocks
.safe_push (bb
);
297 bitmap_set_bit (loop
->block_bitmap
, bb
->index
);
303 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
305 basic_block succ
= EDGE_SUCC (bb
, ei
.index
)->dest
;
306 if (REGNO_REG_SET_P (df_get_live_in (succ
),
307 REGNO (loop
->iter_reg
)))
308 works
.safe_push (succ
);
316 /* Find the predecessor, and make sure nothing else jumps into this loop. */
319 FOR_EACH_VEC_ELT (loop
->blocks
, dwork
, bb
)
323 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
325 basic_block pred
= e
->src
;
327 if (!bb_in_loop_p (loop
, pred
))
330 fprintf (dump_file
, ";; Loop %d: incoming edge %d -> %d\n",
331 loop
->loop_no
, pred
->index
,
333 vec_safe_push (loop
->incoming
, e
);
338 if (!process_incoming_edges (loop
))
342 ";; retrying loop %d with forwarder blocks\n",
344 if (!add_forwarder_blocks (loop
))
347 fprintf (dump_file
, ";; No forwarder blocks found\n");
350 else if (!process_incoming_edges (loop
))
354 ";; can't find suitable entry for loop %d\n",
361 /* Analyze the structure of the loops in the current function. Use
362 LOOP_STACK for bitmap allocations. Returns all the valid candidates for
363 hardware loops found in this function. HOOKS is the argument
364 passed to reorg_loops, used here to find the iteration registers
365 from a loop_end pattern. */
367 discover_loops (bitmap_obstack
*loop_stack
, struct hw_doloop_hooks
*hooks
)
369 hwloop_info loops
= NULL
;
374 /* Find all the possible loop tails. This means searching for every
375 loop_end instruction. For each one found, create a hwloop_info
376 structure and add the head block to the work list. */
377 FOR_EACH_BB_FN (bb
, cfun
)
379 rtx_insn
*tail
= BB_END (bb
);
383 while (tail
&& NOTE_P (tail
) && tail
!= BB_HEAD (bb
))
384 tail
= PREV_INSN (tail
);
386 if (tail
== NULL_RTX
)
391 reg
= hooks
->end_pattern_reg (tail
);
395 /* A possible loop end */
397 /* There's a degenerate case we can handle - an empty loop consisting
398 of only a back branch. Handle that by deleting the branch. */
399 insn
= JUMP_LABEL_AS_INSN (tail
);
400 while (insn
&& !NONDEBUG_INSN_P (insn
))
401 insn
= NEXT_INSN (insn
);
404 basic_block succ
= FALLTHRU_EDGE (bb
)->dest
;
407 fprintf (dump_file
, ";; degenerate loop ending at\n");
408 print_rtl_single (dump_file
, tail
);
410 if (!REGNO_REG_SET_P (df_get_live_in (succ
), REGNO (reg
)))
413 fprintf (dump_file
, ";; deleting it\n");
414 delete_insn_and_edges (tail
);
419 loop
= XCNEW (struct hwloop_info_d
);
422 loop
->loop_no
= nloops
++;
423 loop
->blocks
.create (20);
424 loop
->block_bitmap
= BITMAP_ALLOC (loop_stack
);
428 fprintf (dump_file
, ";; potential loop %d ending at\n",
430 print_rtl_single (dump_file
, tail
);
433 discover_loop (loop
, bb
, tail
, reg
);
436 /* Compute loop nestings. Given two loops A and B, either the sets
437 of their blocks don't intersect at all, or one is the subset of
438 the other, or the blocks don't form a good nesting structure. */
439 for (loop
= loops
; loop
; loop
= loop
->next
)
446 for (other
= loops
; other
; other
= other
->next
)
451 if (!bitmap_intersect_p (other
->block_bitmap
, loop
->block_bitmap
))
453 if (!bitmap_intersect_compl_p (other
->block_bitmap
,
455 loop
->loops
.safe_push (other
);
456 else if (!bitmap_intersect_compl_p (loop
->block_bitmap
,
457 other
->block_bitmap
))
458 other
->loops
.safe_push (loop
);
463 ";; can't find suitable nesting for loops %d and %d\n",
464 loop
->loop_no
, other
->loop_no
);
465 loop
->bad
= other
->bad
= true;
471 dump_hwloops (loops
);
476 /* Free up the loop structures in LOOPS. */
478 free_loops (hwloop_info loops
)
482 hwloop_info loop
= loops
;
484 loop
->loops
.release ();
485 loop
->blocks
.release ();
486 BITMAP_FREE (loop
->block_bitmap
);
491 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
493 /* Initialize the aux fields to give ascending indices to basic blocks. */
495 set_bb_indices (void)
501 FOR_EACH_BB_FN (bb
, cfun
)
502 bb
->aux
= (void *) index
++;
505 /* The taken-branch edge from the loop end can actually go forward.
506 If the target's hardware loop support requires that the loop end be
507 after the loop start, try to reorder a loop's basic blocks when we
509 This is not very aggressive; it only moves at most one block. It
510 does not introduce new branches into loops; it may remove them, or
511 it may switch fallthru/jump edges. */
513 reorder_loops (hwloop_info loops
)
518 cfg_layout_initialize (0);
522 for (loop
= loops
; loop
; loop
= loop
->next
)
530 if (BB_AUX_INDEX (loop
->head
) <= BB_AUX_INDEX (loop
->tail
))
533 FOR_EACH_EDGE (e
, ei
, loop
->head
->succs
)
535 if (bitmap_bit_p (loop
->block_bitmap
, e
->dest
->index
)
536 && BB_AUX_INDEX (e
->dest
) < BB_AUX_INDEX (loop
->tail
))
538 basic_block start_bb
= e
->dest
;
539 basic_block start_prev_bb
= start_bb
->prev_bb
;
542 fprintf (dump_file
, ";; Moving block %d before block %d\n",
543 loop
->head
->index
, start_bb
->index
);
544 loop
->head
->prev_bb
->next_bb
= loop
->head
->next_bb
;
545 loop
->head
->next_bb
->prev_bb
= loop
->head
->prev_bb
;
547 loop
->head
->prev_bb
= start_prev_bb
;
548 loop
->head
->next_bb
= start_bb
;
549 start_prev_bb
->next_bb
= start_bb
->prev_bb
= loop
->head
;
558 FOR_EACH_BB_FN (bb
, cfun
)
560 if (bb
->next_bb
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
561 bb
->aux
= bb
->next_bb
;
565 cfg_layout_finalize ();
566 clear_aux_for_blocks ();
570 /* Call the OPT function for LOOP and all of its sub-loops. This is
571 done in a depth-first search; innermost loops are visited first.
572 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
573 target's reorg pass. */
575 optimize_loop (hwloop_info loop
, struct hw_doloop_hooks
*hooks
)
589 fprintf (dump_file
, ";; loop %d bad when found\n", loop
->loop_no
);
593 /* Every loop contains in its list of inner loops every loop nested inside
594 it, even if there are intermediate loops. This works because we're doing
595 a depth-first search here and never visit a loop more than once.
596 Recursion depth is effectively limited by the number of available
597 hardware registers. */
598 for (ix
= 0; loop
->loops
.iterate (ix
, &inner
); ix
++)
600 optimize_loop (inner
, hooks
);
602 if (!inner
->bad
&& inner_depth
< inner
->depth
)
603 inner_depth
= inner
->depth
;
604 /* The set of registers may be changed while optimizing the inner
606 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, inner
->regs_set_in_loop
);
609 loop
->depth
= inner_depth
+ 1;
611 if (hooks
->opt (loop
))
616 fprintf (dump_file
, ";; loop %d is bad\n", loop
->loop_no
);
622 /* This function can be used from a port's machine_dependent_reorg to
623 find and analyze loops that end in loop_end instructions. It uses
624 a set of function pointers in HOOKS to call back into the
625 target-specific functions to perform the actual machine-specific
628 Such transformations typically involve additional set-up
629 instructions before the loop, to define loop bounds or set up a
630 special loop counter register.
632 DO_REORDER should be set to true if we should try to use the
633 reorder_loops function to ensure the loop end occurs after the loop
634 start. This is for use by targets where the loop hardware requires
637 HOOKS is used to pass in target specific hooks; see
640 reorg_loops (bool do_reorder
, struct hw_doloop_hooks
*hooks
)
642 hwloop_info loops
= NULL
;
644 bitmap_obstack loop_stack
;
646 df_live_add_problem ();
647 df_live_set_all_dirty ();
650 bitmap_obstack_initialize (&loop_stack
);
653 fprintf (dump_file
, ";; Find loops, first pass\n\n");
655 loops
= discover_loops (&loop_stack
, hooks
);
657 /* We can't enter cfglayout mode anymore if basic block partitioning
659 if (do_reorder
&& !flag_reorder_blocks_and_partition
)
661 reorder_loops (loops
);
665 fprintf (dump_file
, ";; Find loops, second pass\n\n");
667 loops
= discover_loops (&loop_stack
, hooks
);
670 for (loop
= loops
; loop
; loop
= loop
->next
)
673 /* Now apply the optimizations. */
674 for (loop
= loops
; loop
; loop
= loop
->next
)
675 optimize_loop (loop
, hooks
);
679 fprintf (dump_file
, ";; After hardware loops optimization:\n\n");
680 dump_hwloops (loops
);
684 bitmap_obstack_release (&loop_stack
);
687 print_rtl (dump_file
, get_insns ());