1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011-2017 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
27 #include "insn-config.h"
33 #include "hw-doloop.h"
36 /* Dump information collected in LOOPS. */
38 dump_hwloops (hwloop_info loops
)
42 for (loop
= loops
; loop
; loop
= loop
->next
)
48 fprintf (dump_file
, ";; loop %d: ", loop
->loop_no
);
50 fprintf (dump_file
, "(bad) ");
51 fprintf (dump_file
, "{head:%d, depth:%d, reg:%u}",
52 loop
->head
== NULL
? -1 : loop
->head
->index
,
53 loop
->depth
, REGNO (loop
->iter_reg
));
55 fprintf (dump_file
, " blocks: [ ");
56 for (ix
= 0; loop
->blocks
.iterate (ix
, &b
); ix
++)
57 fprintf (dump_file
, "%d ", b
->index
);
58 fprintf (dump_file
, "] ");
60 fprintf (dump_file
, " inner loops: [ ");
61 for (ix
= 0; loop
->loops
.iterate (ix
, &i
); ix
++)
62 fprintf (dump_file
, "%d ", i
->loop_no
);
63 fprintf (dump_file
, "]\n");
65 fprintf (dump_file
, "\n");
68 /* Return true if BB is part of LOOP. */
70 bb_in_loop_p (hwloop_info loop
, basic_block bb
)
72 return bitmap_bit_p (loop
->block_bitmap
, bb
->index
);
75 /* Scan the blocks of LOOP (and its inferiors), and record things such as
76 hard registers set, jumps out of and within the loop. */
78 scan_loop (hwloop_info loop
)
86 if (REGNO_REG_SET_P (df_get_live_in (loop
->successor
),
87 REGNO (loop
->iter_reg
)))
88 loop
->iter_reg_used_outside
= true;
90 for (ix
= 0; loop
->blocks
.iterate (ix
, &bb
); ix
++)
97 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
99 if (bb_in_loop_p (loop
, e
->dest
))
101 if (!(e
->flags
& EDGE_FALLTHRU
))
102 loop
->jumps_within
= true;
106 loop
->jumps_outof
= true;
108 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e
->dest
),
109 REGNO (loop
->iter_reg
)));
113 for (insn
= BB_HEAD (bb
);
114 insn
!= NEXT_INSN (BB_END (bb
));
115 insn
= NEXT_INSN (insn
))
118 HARD_REG_SET set_this_insn
;
120 if (!NONDEBUG_INSN_P (insn
))
123 if (recog_memoized (insn
) < 0
124 && (GET_CODE (PATTERN (insn
)) == ASM_INPUT
125 || asm_noperands (PATTERN (insn
)) >= 0))
126 loop
->has_asm
= true;
128 CLEAR_HARD_REG_SET (set_this_insn
);
129 FOR_EACH_INSN_DEF (def
, insn
)
131 rtx dreg
= DF_REF_REG (def
);
136 add_to_hard_reg_set (&set_this_insn
, GET_MODE (dreg
),
140 if (insn
== loop
->loop_end
)
141 CLEAR_HARD_REG_BIT (set_this_insn
, REGNO (loop
->iter_reg
));
142 else if (reg_mentioned_p (loop
->iter_reg
, PATTERN (insn
)))
143 loop
->iter_reg_used
= true;
144 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, set_this_insn
);
149 /* Compute the incoming_dest and incoming_src members of LOOP by
150 identifying the edges that jump into the loop. If there is more
151 than one block that jumps into the loop, incoming_src will be set
152 to NULL; likewise, if there is more than one block in the loop that
153 is the destination of an incoming edge, incoming_dest will be NULL.
155 Return true if either of these two fields is nonnull, false
158 process_incoming_edges (hwloop_info loop
)
164 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
168 loop
->incoming_src
= e
->src
;
169 loop
->incoming_dest
= e
->dest
;
174 if (e
->dest
!= loop
->incoming_dest
)
175 loop
->incoming_dest
= NULL
;
176 if (e
->src
!= loop
->incoming_src
)
177 loop
->incoming_src
= NULL
;
180 if (loop
->incoming_src
== NULL
&& loop
->incoming_dest
== NULL
)
186 /* Try to identify a forwarder block that jump into LOOP, and add it to
187 the set of blocks in the loop, updating the vector of incoming blocks as
188 well. This transformation gives a second chance to loops we couldn't
189 otherwise handle by increasing the chance that we'll end up with one
191 Return true if we made a change, false otherwise. */
193 add_forwarder_blocks (hwloop_info loop
)
198 FOR_EACH_EDGE (e
, ei
, loop
->incoming
)
200 if (forwarder_block_p (e
->src
))
207 ";; Adding forwarder block %d to loop %d and retrying\n",
208 e
->src
->index
, loop
->loop_no
);
209 loop
->blocks
.safe_push (e
->src
);
210 bitmap_set_bit (loop
->block_bitmap
, e
->src
->index
);
211 FOR_EACH_EDGE (e2
, ei2
, e
->src
->preds
)
212 vec_safe_push (loop
->incoming
, e2
);
213 loop
->incoming
->unordered_remove (ei
.index
);
220 /* Called from reorg_loops when a potential loop end is found. LOOP is
221 a newly set up structure describing the loop, it is this function's
222 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
223 loop_end insn and its enclosing basic block. REG is the loop counter
225 For our purposes, a loop is defined by the set of blocks reachable from
226 the loop head in which the loop counter register is live. This matches
227 the expected use; targets that call into this code usually replace the
228 loop counter with a different special register. */
230 discover_loop (hwloop_info loop
, basic_block tail_bb
, rtx_insn
*tail_insn
, rtx reg
)
236 loop
->tail
= tail_bb
;
237 loop
->loop_end
= tail_insn
;
238 loop
->iter_reg
= reg
;
239 vec_alloc (loop
->incoming
, 2);
240 loop
->start_label
= as_a
<rtx_insn
*> (JUMP_LABEL (tail_insn
));
242 if (EDGE_COUNT (tail_bb
->succs
) != 2)
247 loop
->head
= BRANCH_EDGE (tail_bb
)->dest
;
248 loop
->successor
= FALLTHRU_EDGE (tail_bb
)->dest
;
250 auto_vec
<basic_block
, 20> works
;
251 works
.safe_push (loop
->head
);
254 for (dwork
= 0; works
.iterate (dwork
, &bb
); dwork
++)
258 if (bb
== EXIT_BLOCK_PTR_FOR_FN (cfun
))
260 /* We've reached the exit block. The loop must be bad. */
263 ";; Loop is bad - reached exit block while scanning\n");
268 if (bitmap_bit_p (loop
->block_bitmap
, bb
->index
))
271 /* We've not seen this block before. Add it to the loop's
272 list and then add each successor to the work list. */
274 loop
->blocks
.safe_push (bb
);
275 bitmap_set_bit (loop
->block_bitmap
, bb
->index
);
281 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
283 basic_block succ
= EDGE_SUCC (bb
, ei
.index
)->dest
;
284 if (REGNO_REG_SET_P (df_get_live_in (succ
),
285 REGNO (loop
->iter_reg
)))
286 works
.safe_push (succ
);
294 /* Find the predecessor, and make sure nothing else jumps into this loop. */
297 FOR_EACH_VEC_ELT (loop
->blocks
, dwork
, bb
)
301 FOR_EACH_EDGE (e
, ei
, bb
->preds
)
303 basic_block pred
= e
->src
;
305 if (!bb_in_loop_p (loop
, pred
))
308 fprintf (dump_file
, ";; Loop %d: incoming edge %d -> %d\n",
309 loop
->loop_no
, pred
->index
,
311 vec_safe_push (loop
->incoming
, e
);
316 if (!process_incoming_edges (loop
))
320 ";; retrying loop %d with forwarder blocks\n",
322 if (!add_forwarder_blocks (loop
))
325 fprintf (dump_file
, ";; No forwarder blocks found\n");
328 else if (!process_incoming_edges (loop
))
332 ";; can't find suitable entry for loop %d\n",
339 /* Analyze the structure of the loops in the current function. Use
340 LOOP_STACK for bitmap allocations. Returns all the valid candidates for
341 hardware loops found in this function. HOOKS is the argument
342 passed to reorg_loops, used here to find the iteration registers
343 from a loop_end pattern. */
345 discover_loops (bitmap_obstack
*loop_stack
, struct hw_doloop_hooks
*hooks
)
347 hwloop_info loops
= NULL
;
352 /* Find all the possible loop tails. This means searching for every
353 loop_end instruction. For each one found, create a hwloop_info
354 structure and add the head block to the work list. */
355 FOR_EACH_BB_FN (bb
, cfun
)
357 rtx_insn
*tail
= BB_END (bb
);
361 while (tail
&& NOTE_P (tail
) && tail
!= BB_HEAD (bb
))
362 tail
= PREV_INSN (tail
);
364 if (tail
== NULL_RTX
)
369 reg
= hooks
->end_pattern_reg (tail
);
373 /* A possible loop end */
375 /* There's a degenerate case we can handle - an empty loop consisting
376 of only a back branch. Handle that by deleting the branch. */
377 insn
= JUMP_LABEL_AS_INSN (tail
);
378 while (insn
&& !NONDEBUG_INSN_P (insn
))
379 insn
= NEXT_INSN (insn
);
382 basic_block succ
= FALLTHRU_EDGE (bb
)->dest
;
385 fprintf (dump_file
, ";; degenerate loop ending at\n");
386 print_rtl_single (dump_file
, tail
);
388 if (!REGNO_REG_SET_P (df_get_live_in (succ
), REGNO (reg
)))
391 fprintf (dump_file
, ";; deleting it\n");
392 delete_insn_and_edges (tail
);
397 loop
= XCNEW (struct hwloop_info_d
);
400 loop
->loop_no
= nloops
++;
401 loop
->blocks
.create (20);
402 loop
->block_bitmap
= BITMAP_ALLOC (loop_stack
);
406 fprintf (dump_file
, ";; potential loop %d ending at\n",
408 print_rtl_single (dump_file
, tail
);
411 discover_loop (loop
, bb
, tail
, reg
);
414 /* Compute loop nestings. Given two loops A and B, either the sets
415 of their blocks don't intersect at all, or one is the subset of
416 the other, or the blocks don't form a good nesting structure. */
417 for (loop
= loops
; loop
; loop
= loop
->next
)
424 for (other
= loops
; other
; other
= other
->next
)
429 if (!bitmap_intersect_p (other
->block_bitmap
, loop
->block_bitmap
))
431 if (!bitmap_intersect_compl_p (other
->block_bitmap
,
433 loop
->loops
.safe_push (other
);
434 else if (!bitmap_intersect_compl_p (loop
->block_bitmap
,
435 other
->block_bitmap
))
436 other
->loops
.safe_push (loop
);
441 ";; can't find suitable nesting for loops %d and %d\n",
442 loop
->loop_no
, other
->loop_no
);
443 loop
->bad
= other
->bad
= true;
449 dump_hwloops (loops
);
454 /* Free up the loop structures in LOOPS. */
456 free_loops (hwloop_info loops
)
460 hwloop_info loop
= loops
;
462 loop
->loops
.release ();
463 loop
->blocks
.release ();
464 BITMAP_FREE (loop
->block_bitmap
);
469 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
471 /* Initialize the aux fields to give ascending indices to basic blocks. */
473 set_bb_indices (void)
479 FOR_EACH_BB_FN (bb
, cfun
)
480 bb
->aux
= (void *) index
++;
483 /* The taken-branch edge from the loop end can actually go forward.
484 If the target's hardware loop support requires that the loop end be
485 after the loop start, try to reorder a loop's basic blocks when we
487 This is not very aggressive; it only moves at most one block. It
488 does not introduce new branches into loops; it may remove them, or
489 it may switch fallthru/jump edges. */
491 reorder_loops (hwloop_info loops
)
496 cfg_layout_initialize (0);
500 for (loop
= loops
; loop
; loop
= loop
->next
)
508 if (BB_AUX_INDEX (loop
->head
) <= BB_AUX_INDEX (loop
->tail
))
511 FOR_EACH_EDGE (e
, ei
, loop
->head
->succs
)
513 if (bitmap_bit_p (loop
->block_bitmap
, e
->dest
->index
)
514 && BB_AUX_INDEX (e
->dest
) < BB_AUX_INDEX (loop
->tail
))
516 basic_block start_bb
= e
->dest
;
517 basic_block start_prev_bb
= start_bb
->prev_bb
;
520 fprintf (dump_file
, ";; Moving block %d before block %d\n",
521 loop
->head
->index
, start_bb
->index
);
522 loop
->head
->prev_bb
->next_bb
= loop
->head
->next_bb
;
523 loop
->head
->next_bb
->prev_bb
= loop
->head
->prev_bb
;
525 loop
->head
->prev_bb
= start_prev_bb
;
526 loop
->head
->next_bb
= start_bb
;
527 start_prev_bb
->next_bb
= start_bb
->prev_bb
= loop
->head
;
536 FOR_EACH_BB_FN (bb
, cfun
)
538 if (bb
->next_bb
!= EXIT_BLOCK_PTR_FOR_FN (cfun
))
539 bb
->aux
= bb
->next_bb
;
543 cfg_layout_finalize ();
544 clear_aux_for_blocks ();
548 /* Call the OPT function for LOOP and all of its sub-loops. This is
549 done in a depth-first search; innermost loops are visited first.
550 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
551 target's reorg pass. */
553 optimize_loop (hwloop_info loop
, struct hw_doloop_hooks
*hooks
)
567 fprintf (dump_file
, ";; loop %d bad when found\n", loop
->loop_no
);
571 /* Every loop contains in its list of inner loops every loop nested inside
572 it, even if there are intermediate loops. This works because we're doing
573 a depth-first search here and never visit a loop more than once.
574 Recursion depth is effectively limited by the number of available
575 hardware registers. */
576 for (ix
= 0; loop
->loops
.iterate (ix
, &inner
); ix
++)
578 optimize_loop (inner
, hooks
);
580 if (!inner
->bad
&& inner_depth
< inner
->depth
)
581 inner_depth
= inner
->depth
;
582 /* The set of registers may be changed while optimizing the inner
584 IOR_HARD_REG_SET (loop
->regs_set_in_loop
, inner
->regs_set_in_loop
);
587 loop
->depth
= inner_depth
+ 1;
589 if (hooks
->opt (loop
))
594 fprintf (dump_file
, ";; loop %d is bad\n", loop
->loop_no
);
600 /* This function can be used from a port's machine_dependent_reorg to
601 find and analyze loops that end in loop_end instructions. It uses
602 a set of function pointers in HOOKS to call back into the
603 target-specific functions to perform the actual machine-specific
606 Such transformations typically involve additional set-up
607 instructions before the loop, to define loop bounds or set up a
608 special loop counter register.
610 DO_REORDER should be set to true if we should try to use the
611 reorder_loops function to ensure the loop end occurs after the loop
612 start. This is for use by targets where the loop hardware requires
615 HOOKS is used to pass in target specific hooks; see
618 reorg_loops (bool do_reorder
, struct hw_doloop_hooks
*hooks
)
620 hwloop_info loops
= NULL
;
622 bitmap_obstack loop_stack
;
624 df_live_add_problem ();
625 df_live_set_all_dirty ();
628 bitmap_obstack_initialize (&loop_stack
);
631 fprintf (dump_file
, ";; Find loops, first pass\n\n");
633 loops
= discover_loops (&loop_stack
, hooks
);
635 /* We can't enter cfglayout mode anymore if basic block partitioning
637 if (do_reorder
&& !crtl
->has_bb_partition
)
639 reorder_loops (loops
);
643 fprintf (dump_file
, ";; Find loops, second pass\n\n");
645 loops
= discover_loops (&loop_stack
, hooks
);
648 for (loop
= loops
; loop
; loop
= loop
->next
)
651 /* Now apply the optimizations. */
652 for (loop
= loops
; loop
; loop
= loop
->next
)
653 optimize_loop (loop
, hooks
);
657 fprintf (dump_file
, ";; After hardware loops optimization:\n\n");
658 dump_hwloops (loops
);
662 bitmap_obstack_release (&loop_stack
);
665 print_rtl (dump_file
, get_insns ());