PR target/65604
[official-gcc.git] / gcc / hw-doloop.c
blobc73b1060da1041b25231902cb67074166155db00
1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011-2016 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "rtl.h"
26 #include "df.h"
27 #include "insn-config.h"
28 #include "regs.h"
29 #include "emit-rtl.h"
30 #include "recog.h"
31 #include "cfgrtl.h"
32 #include "hw-doloop.h"
33 #include "dumpfile.h"
35 /* Dump information collected in LOOPS. */
36 static void
37 dump_hwloops (hwloop_info loops)
39 hwloop_info loop;
41 for (loop = loops; loop; loop = loop->next)
43 hwloop_info i;
44 basic_block b;
45 unsigned ix;
47 fprintf (dump_file, ";; loop %d: ", loop->loop_no);
48 if (loop->bad)
49 fprintf (dump_file, "(bad) ");
50 fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
51 loop->head == NULL ? -1 : loop->head->index,
52 loop->depth, REGNO (loop->iter_reg));
54 fprintf (dump_file, " blocks: [ ");
55 for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
56 fprintf (dump_file, "%d ", b->index);
57 fprintf (dump_file, "] ");
59 fprintf (dump_file, " inner loops: [ ");
60 for (ix = 0; loop->loops.iterate (ix, &i); ix++)
61 fprintf (dump_file, "%d ", i->loop_no);
62 fprintf (dump_file, "]\n");
64 fprintf (dump_file, "\n");
67 /* Return true if BB is part of LOOP. */
68 static bool
69 bb_in_loop_p (hwloop_info loop, basic_block bb)
71 return bitmap_bit_p (loop->block_bitmap, bb->index);
74 /* Scan the blocks of LOOP (and its inferiors), and record things such as
75 hard registers set, jumps out of and within the loop. */
76 static void
77 scan_loop (hwloop_info loop)
79 unsigned ix;
80 basic_block bb;
82 if (loop->bad)
83 return;
85 if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
86 REGNO (loop->iter_reg)))
87 loop->iter_reg_used_outside = true;
89 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
91 rtx_insn *insn;
92 edge e;
93 edge_iterator ei;
95 if (bb != loop->tail)
96 FOR_EACH_EDGE (e, ei, bb->succs)
98 if (bb_in_loop_p (loop, e->dest))
100 if (!(e->flags & EDGE_FALLTHRU))
101 loop->jumps_within = true;
103 else
105 loop->jumps_outof = true;
106 if (!loop->bad)
107 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
108 REGNO (loop->iter_reg)));
112 for (insn = BB_HEAD (bb);
113 insn != NEXT_INSN (BB_END (bb));
114 insn = NEXT_INSN (insn))
116 df_ref def;
117 HARD_REG_SET set_this_insn;
119 if (!NONDEBUG_INSN_P (insn))
120 continue;
122 if (recog_memoized (insn) < 0
123 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
124 || asm_noperands (PATTERN (insn)) >= 0))
125 loop->has_asm = true;
127 CLEAR_HARD_REG_SET (set_this_insn);
128 FOR_EACH_INSN_DEF (def, insn)
130 rtx dreg = DF_REF_REG (def);
132 if (!REG_P (dreg))
133 continue;
135 add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
136 REGNO (dreg));
139 if (insn == loop->loop_end)
140 CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
141 else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
142 loop->iter_reg_used = true;
143 IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
148 /* Compute the incoming_dest and incoming_src members of LOOP by
149 identifying the edges that jump into the loop. If there is more
150 than one block that jumps into the loop, incoming_src will be set
151 to NULL; likewise, if there is more than one block in the loop that
152 is the destination of an incoming edge, incoming_dest will be NULL.
154 Return true if either of these two fields is nonnull, false
155 otherwise. */
156 static bool
157 process_incoming_edges (hwloop_info loop)
159 edge e;
160 edge_iterator ei;
161 bool first = true;
163 FOR_EACH_EDGE (e, ei, loop->incoming)
165 if (first)
167 loop->incoming_src = e->src;
168 loop->incoming_dest = e->dest;
169 first = false;
171 else
173 if (e->dest != loop->incoming_dest)
174 loop->incoming_dest = NULL;
175 if (e->src != loop->incoming_src)
176 loop->incoming_src = NULL;
179 if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
180 return false;
182 return true;
185 /* Try to identify a forwarder block that jump into LOOP, and add it to
186 the set of blocks in the loop, updating the vector of incoming blocks as
187 well. This transformation gives a second chance to loops we couldn't
188 otherwise handle by increasing the chance that we'll end up with one
189 incoming_src block.
190 Return true if we made a change, false otherwise. */
191 static bool
192 add_forwarder_blocks (hwloop_info loop)
194 edge e;
195 edge_iterator ei;
197 FOR_EACH_EDGE (e, ei, loop->incoming)
199 if (forwarder_block_p (e->src))
201 edge e2;
202 edge_iterator ei2;
204 if (dump_file)
205 fprintf (dump_file,
206 ";; Adding forwarder block %d to loop %d and retrying\n",
207 e->src->index, loop->loop_no);
208 loop->blocks.safe_push (e->src);
209 bitmap_set_bit (loop->block_bitmap, e->src->index);
210 FOR_EACH_EDGE (e2, ei2, e->src->preds)
211 vec_safe_push (loop->incoming, e2);
212 loop->incoming->unordered_remove (ei.index);
213 return true;
216 return false;
219 /* Called from reorg_loops when a potential loop end is found. LOOP is
220 a newly set up structure describing the loop, it is this function's
221 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
222 loop_end insn and its enclosing basic block. REG is the loop counter
223 register.
224 For our purposes, a loop is defined by the set of blocks reachable from
225 the loop head in which the loop counter register is live. This matches
226 the expected use; targets that call into this code usually replace the
227 loop counter with a different special register. */
228 static void
229 discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg)
231 bool found_tail;
232 unsigned dwork = 0;
233 basic_block bb;
235 loop->tail = tail_bb;
236 loop->loop_end = tail_insn;
237 loop->iter_reg = reg;
238 vec_alloc (loop->incoming, 2);
239 loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn));
241 if (EDGE_COUNT (tail_bb->succs) != 2)
243 loop->bad = true;
244 return;
246 loop->head = BRANCH_EDGE (tail_bb)->dest;
247 loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
249 auto_vec<basic_block, 20> works;
250 works.safe_push (loop->head);
252 found_tail = false;
253 for (dwork = 0; works.iterate (dwork, &bb); dwork++)
255 edge e;
256 edge_iterator ei;
257 if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
259 /* We've reached the exit block. The loop must be bad. */
260 if (dump_file)
261 fprintf (dump_file,
262 ";; Loop is bad - reached exit block while scanning\n");
263 loop->bad = true;
264 break;
267 if (bitmap_bit_p (loop->block_bitmap, bb->index))
268 continue;
270 /* We've not seen this block before. Add it to the loop's
271 list and then add each successor to the work list. */
273 loop->blocks.safe_push (bb);
274 bitmap_set_bit (loop->block_bitmap, bb->index);
276 if (bb == tail_bb)
277 found_tail = true;
278 else
280 FOR_EACH_EDGE (e, ei, bb->succs)
282 basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
283 if (REGNO_REG_SET_P (df_get_live_in (succ),
284 REGNO (loop->iter_reg)))
285 works.safe_push (succ);
290 if (!found_tail)
291 loop->bad = true;
293 /* Find the predecessor, and make sure nothing else jumps into this loop. */
294 if (!loop->bad)
296 FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
298 edge e;
299 edge_iterator ei;
300 FOR_EACH_EDGE (e, ei, bb->preds)
302 basic_block pred = e->src;
304 if (!bb_in_loop_p (loop, pred))
306 if (dump_file)
307 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
308 loop->loop_no, pred->index,
309 e->dest->index);
310 vec_safe_push (loop->incoming, e);
315 if (!process_incoming_edges (loop))
317 if (dump_file)
318 fprintf (dump_file,
319 ";; retrying loop %d with forwarder blocks\n",
320 loop->loop_no);
321 if (!add_forwarder_blocks (loop))
323 if (dump_file)
324 fprintf (dump_file, ";; No forwarder blocks found\n");
325 loop->bad = true;
327 else if (!process_incoming_edges (loop))
329 if (dump_file)
330 fprintf (dump_file,
331 ";; can't find suitable entry for loop %d\n",
332 loop->loop_no);
338 /* Analyze the structure of the loops in the current function. Use
339 LOOP_STACK for bitmap allocations. Returns all the valid candidates for
340 hardware loops found in this function. HOOKS is the argument
341 passed to reorg_loops, used here to find the iteration registers
342 from a loop_end pattern. */
343 static hwloop_info
344 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
346 hwloop_info loops = NULL;
347 hwloop_info loop;
348 basic_block bb;
349 int nloops = 0;
351 /* Find all the possible loop tails. This means searching for every
352 loop_end instruction. For each one found, create a hwloop_info
353 structure and add the head block to the work list. */
354 FOR_EACH_BB_FN (bb, cfun)
356 rtx_insn *tail = BB_END (bb);
357 rtx_insn *insn;
358 rtx reg;
360 while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
361 tail = PREV_INSN (tail);
363 if (tail == NULL_RTX)
364 continue;
366 if (!JUMP_P (tail))
367 continue;
368 reg = hooks->end_pattern_reg (tail);
369 if (reg == NULL_RTX)
370 continue;
372 /* A possible loop end */
374 /* There's a degenerate case we can handle - an empty loop consisting
375 of only a back branch. Handle that by deleting the branch. */
376 insn = JUMP_LABEL_AS_INSN (tail);
377 while (insn && !NONDEBUG_INSN_P (insn))
378 insn = NEXT_INSN (insn);
379 if (insn == tail)
381 basic_block succ = FALLTHRU_EDGE (bb)->dest;
382 if (dump_file)
384 fprintf (dump_file, ";; degenerate loop ending at\n");
385 print_rtl_single (dump_file, tail);
387 if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
389 if (dump_file)
390 fprintf (dump_file, ";; deleting it\n");
391 delete_insn_and_edges (tail);
392 continue;
396 loop = XCNEW (struct hwloop_info_d);
397 loop->next = loops;
398 loops = loop;
399 loop->loop_no = nloops++;
400 loop->blocks.create (20);
401 loop->block_bitmap = BITMAP_ALLOC (loop_stack);
403 if (dump_file)
405 fprintf (dump_file, ";; potential loop %d ending at\n",
406 loop->loop_no);
407 print_rtl_single (dump_file, tail);
410 discover_loop (loop, bb, tail, reg);
413 /* Compute loop nestings. Given two loops A and B, either the sets
414 of their blocks don't intersect at all, or one is the subset of
415 the other, or the blocks don't form a good nesting structure. */
416 for (loop = loops; loop; loop = loop->next)
418 hwloop_info other;
420 if (loop->bad)
421 continue;
423 for (other = loops; other; other = other->next)
425 if (other->bad)
426 continue;
428 if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
429 continue;
430 if (!bitmap_intersect_compl_p (other->block_bitmap,
431 loop->block_bitmap))
432 loop->loops.safe_push (other);
433 else if (!bitmap_intersect_compl_p (loop->block_bitmap,
434 other->block_bitmap))
435 other->loops.safe_push (loop);
436 else
438 if (dump_file)
439 fprintf (dump_file,
440 ";; can't find suitable nesting for loops %d and %d\n",
441 loop->loop_no, other->loop_no);
442 loop->bad = other->bad = true;
447 if (dump_file)
448 dump_hwloops (loops);
450 return loops;
453 /* Free up the loop structures in LOOPS. */
454 static void
455 free_loops (hwloop_info loops)
457 while (loops)
459 hwloop_info loop = loops;
460 loops = loop->next;
461 loop->loops.release ();
462 loop->blocks.release ();
463 BITMAP_FREE (loop->block_bitmap);
464 XDELETE (loop);
468 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
470 /* Initialize the aux fields to give ascending indices to basic blocks. */
471 static void
472 set_bb_indices (void)
474 basic_block bb;
475 intptr_t index;
477 index = 0;
478 FOR_EACH_BB_FN (bb, cfun)
479 bb->aux = (void *) index++;
482 /* The taken-branch edge from the loop end can actually go forward.
483 If the target's hardware loop support requires that the loop end be
484 after the loop start, try to reorder a loop's basic blocks when we
485 find such a case.
486 This is not very aggressive; it only moves at most one block. It
487 does not introduce new branches into loops; it may remove them, or
488 it may switch fallthru/jump edges. */
489 static void
490 reorder_loops (hwloop_info loops)
492 basic_block bb;
493 hwloop_info loop;
495 cfg_layout_initialize (0);
497 set_bb_indices ();
499 for (loop = loops; loop; loop = loop->next)
501 edge e;
502 edge_iterator ei;
504 if (loop->bad)
505 continue;
507 if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
508 continue;
510 FOR_EACH_EDGE (e, ei, loop->head->succs)
512 if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
513 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
515 basic_block start_bb = e->dest;
516 basic_block start_prev_bb = start_bb->prev_bb;
518 if (dump_file)
519 fprintf (dump_file, ";; Moving block %d before block %d\n",
520 loop->head->index, start_bb->index);
521 loop->head->prev_bb->next_bb = loop->head->next_bb;
522 loop->head->next_bb->prev_bb = loop->head->prev_bb;
524 loop->head->prev_bb = start_prev_bb;
525 loop->head->next_bb = start_bb;
526 start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
528 set_bb_indices ();
529 break;
532 loops = loops->next;
535 FOR_EACH_BB_FN (bb, cfun)
537 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
538 bb->aux = bb->next_bb;
539 else
540 bb->aux = NULL;
542 cfg_layout_finalize ();
543 clear_aux_for_blocks ();
544 df_analyze ();
547 /* Call the OPT function for LOOP and all of its sub-loops. This is
548 done in a depth-first search; innermost loops are visited first.
549 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
550 target's reorg pass. */
551 static void
552 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
554 int ix;
555 hwloop_info inner;
556 int inner_depth = 0;
558 if (loop->visited)
559 return;
561 loop->visited = 1;
563 if (loop->bad)
565 if (dump_file)
566 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
567 goto bad_loop;
570 /* Every loop contains in its list of inner loops every loop nested inside
571 it, even if there are intermediate loops. This works because we're doing
572 a depth-first search here and never visit a loop more than once.
573 Recursion depth is effectively limited by the number of available
574 hardware registers. */
575 for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
577 optimize_loop (inner, hooks);
579 if (!inner->bad && inner_depth < inner->depth)
580 inner_depth = inner->depth;
581 /* The set of registers may be changed while optimizing the inner
582 loop. */
583 IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
586 loop->depth = inner_depth + 1;
588 if (hooks->opt (loop))
589 return;
591 bad_loop:
592 if (dump_file)
593 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
595 loop->bad = true;
596 hooks->fail (loop);
599 /* This function can be used from a port's machine_dependent_reorg to
600 find and analyze loops that end in loop_end instructions. It uses
601 a set of function pointers in HOOKS to call back into the
602 target-specific functions to perform the actual machine-specific
603 transformations.
605 Such transformations typically involve additional set-up
606 instructions before the loop, to define loop bounds or set up a
607 special loop counter register.
609 DO_REORDER should be set to true if we should try to use the
610 reorder_loops function to ensure the loop end occurs after the loop
611 start. This is for use by targets where the loop hardware requires
612 this condition.
614 HOOKS is used to pass in target specific hooks; see
615 hw-doloop.h. */
616 void
617 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
619 hwloop_info loops = NULL;
620 hwloop_info loop;
621 bitmap_obstack loop_stack;
623 df_live_add_problem ();
624 df_live_set_all_dirty ();
625 df_analyze ();
627 bitmap_obstack_initialize (&loop_stack);
629 if (dump_file)
630 fprintf (dump_file, ";; Find loops, first pass\n\n");
632 loops = discover_loops (&loop_stack, hooks);
634 /* We can't enter cfglayout mode anymore if basic block partitioning
635 already happened. */
636 if (do_reorder && !flag_reorder_blocks_and_partition)
638 reorder_loops (loops);
639 free_loops (loops);
641 if (dump_file)
642 fprintf (dump_file, ";; Find loops, second pass\n\n");
644 loops = discover_loops (&loop_stack, hooks);
647 for (loop = loops; loop; loop = loop->next)
648 scan_loop (loop);
650 /* Now apply the optimizations. */
651 for (loop = loops; loop; loop = loop->next)
652 optimize_loop (loop, hooks);
654 if (dump_file)
656 fprintf (dump_file, ";; After hardware loops optimization:\n\n");
657 dump_hwloops (loops);
660 free_loops (loops);
661 bitmap_obstack_release (&loop_stack);
663 if (dump_file)
664 print_rtl (dump_file, get_insns ());