2015-06-11 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / gcc / hw-doloop.c
blob27f2220b08a9cebdf077c2c8b9e08ac35fede1d7
1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "flags.h"
27 #include "symtab.h"
28 #include "hard-reg-set.h"
29 #include "input.h"
30 #include "function.h"
31 #include "alias.h"
32 #include "tree.h"
33 #include "insn-config.h"
34 #include "expmed.h"
35 #include "dojump.h"
36 #include "explow.h"
37 #include "calls.h"
38 #include "emit-rtl.h"
39 #include "varasm.h"
40 #include "stmt.h"
41 #include "expr.h"
42 #include "regs.h"
43 #include "predict.h"
44 #include "dominance.h"
45 #include "cfg.h"
46 #include "cfgrtl.h"
47 #include "basic-block.h"
48 #include "tm_p.h"
49 #include "df.h"
50 #include "cfgloop.h"
51 #include "recog.h"
52 #include "target.h"
53 #include "hw-doloop.h"
54 #include "dumpfile.h"
56 #ifdef HAVE_doloop_end
58 /* Dump information collected in LOOPS. */
59 static void
60 dump_hwloops (hwloop_info loops)
62 hwloop_info loop;
64 for (loop = loops; loop; loop = loop->next)
66 hwloop_info i;
67 basic_block b;
68 unsigned ix;
70 fprintf (dump_file, ";; loop %d: ", loop->loop_no);
71 if (loop->bad)
72 fprintf (dump_file, "(bad) ");
73 fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
74 loop->head == NULL ? -1 : loop->head->index,
75 loop->depth, REGNO (loop->iter_reg));
77 fprintf (dump_file, " blocks: [ ");
78 for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
79 fprintf (dump_file, "%d ", b->index);
80 fprintf (dump_file, "] ");
82 fprintf (dump_file, " inner loops: [ ");
83 for (ix = 0; loop->loops.iterate (ix, &i); ix++)
84 fprintf (dump_file, "%d ", i->loop_no);
85 fprintf (dump_file, "]\n");
87 fprintf (dump_file, "\n");
90 /* Return true if BB is part of LOOP. */
91 static bool
92 bb_in_loop_p (hwloop_info loop, basic_block bb)
94 return bitmap_bit_p (loop->block_bitmap, bb->index);
97 /* Scan the blocks of LOOP (and its inferiors), and record things such as
98 hard registers set, jumps out of and within the loop. */
99 static void
100 scan_loop (hwloop_info loop)
102 unsigned ix;
103 basic_block bb;
105 if (loop->bad)
106 return;
108 if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
109 REGNO (loop->iter_reg)))
110 loop->iter_reg_used_outside = true;
112 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
114 rtx_insn *insn;
115 edge e;
116 edge_iterator ei;
118 if (bb != loop->tail)
119 FOR_EACH_EDGE (e, ei, bb->succs)
121 if (bb_in_loop_p (loop, e->dest))
123 if (!(e->flags & EDGE_FALLTHRU))
124 loop->jumps_within = true;
126 else
128 loop->jumps_outof = true;
129 if (!loop->bad)
130 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
131 REGNO (loop->iter_reg)));
135 for (insn = BB_HEAD (bb);
136 insn != NEXT_INSN (BB_END (bb));
137 insn = NEXT_INSN (insn))
139 df_ref def;
140 HARD_REG_SET set_this_insn;
142 if (!NONDEBUG_INSN_P (insn))
143 continue;
145 if (recog_memoized (insn) < 0
146 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
147 || asm_noperands (PATTERN (insn)) >= 0))
148 loop->has_asm = true;
150 CLEAR_HARD_REG_SET (set_this_insn);
151 FOR_EACH_INSN_DEF (def, insn)
153 rtx dreg = DF_REF_REG (def);
155 if (!REG_P (dreg))
156 continue;
158 add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
159 REGNO (dreg));
162 if (insn == loop->loop_end)
163 CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
164 else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
165 loop->iter_reg_used = true;
166 IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
171 /* Compute the incoming_dest and incoming_src members of LOOP by
172 identifying the edges that jump into the loop. If there is more
173 than one block that jumps into the loop, incoming_src will be set
174 to NULL; likewise, if there is more than one block in the loop that
175 is the destination of an incoming edge, incoming_dest will be NULL.
177 Return true if either of these two fields is nonnull, false
178 otherwise. */
179 static bool
180 process_incoming_edges (hwloop_info loop)
182 edge e;
183 edge_iterator ei;
184 bool first = true;
186 FOR_EACH_EDGE (e, ei, loop->incoming)
188 if (first)
190 loop->incoming_src = e->src;
191 loop->incoming_dest = e->dest;
192 first = false;
194 else
196 if (e->dest != loop->incoming_dest)
197 loop->incoming_dest = NULL;
198 if (e->src != loop->incoming_src)
199 loop->incoming_src = NULL;
202 if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
203 return false;
205 return true;
208 /* Try to identify a forwarder block that jump into LOOP, and add it to
209 the set of blocks in the loop, updating the vector of incoming blocks as
210 well. This transformation gives a second chance to loops we couldn't
211 otherwise handle by increasing the chance that we'll end up with one
212 incoming_src block.
213 Return true if we made a change, false otherwise. */
214 static bool
215 add_forwarder_blocks (hwloop_info loop)
217 edge e;
218 edge_iterator ei;
220 FOR_EACH_EDGE (e, ei, loop->incoming)
222 if (forwarder_block_p (e->src))
224 edge e2;
225 edge_iterator ei2;
227 if (dump_file)
228 fprintf (dump_file,
229 ";; Adding forwarder block %d to loop %d and retrying\n",
230 e->src->index, loop->loop_no);
231 loop->blocks.safe_push (e->src);
232 bitmap_set_bit (loop->block_bitmap, e->src->index);
233 FOR_EACH_EDGE (e2, ei2, e->src->preds)
234 vec_safe_push (loop->incoming, e2);
235 loop->incoming->unordered_remove (ei.index);
236 return true;
239 return false;
242 /* Called from reorg_loops when a potential loop end is found. LOOP is
243 a newly set up structure describing the loop, it is this function's
244 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
245 loop_end insn and its enclosing basic block. REG is the loop counter
246 register.
247 For our purposes, a loop is defined by the set of blocks reachable from
248 the loop head in which the loop counter register is live. This matches
249 the expected use; targets that call into this code usually replace the
250 loop counter with a different special register. */
251 static void
252 discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg)
254 bool found_tail;
255 unsigned dwork = 0;
256 basic_block bb;
258 loop->tail = tail_bb;
259 loop->loop_end = tail_insn;
260 loop->iter_reg = reg;
261 vec_alloc (loop->incoming, 2);
262 loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn));
264 if (EDGE_COUNT (tail_bb->succs) != 2)
266 loop->bad = true;
267 return;
269 loop->head = BRANCH_EDGE (tail_bb)->dest;
270 loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
272 auto_vec<basic_block, 20> works;
273 works.safe_push (loop->head);
275 found_tail = false;
276 for (dwork = 0; works.iterate (dwork, &bb); dwork++)
278 edge e;
279 edge_iterator ei;
280 if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
282 /* We've reached the exit block. The loop must be bad. */
283 if (dump_file)
284 fprintf (dump_file,
285 ";; Loop is bad - reached exit block while scanning\n");
286 loop->bad = true;
287 break;
290 if (bitmap_bit_p (loop->block_bitmap, bb->index))
291 continue;
293 /* We've not seen this block before. Add it to the loop's
294 list and then add each successor to the work list. */
296 loop->blocks.safe_push (bb);
297 bitmap_set_bit (loop->block_bitmap, bb->index);
299 if (bb == tail_bb)
300 found_tail = true;
301 else
303 FOR_EACH_EDGE (e, ei, bb->succs)
305 basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
306 if (REGNO_REG_SET_P (df_get_live_in (succ),
307 REGNO (loop->iter_reg)))
308 works.safe_push (succ);
313 if (!found_tail)
314 loop->bad = true;
316 /* Find the predecessor, and make sure nothing else jumps into this loop. */
317 if (!loop->bad)
319 FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
321 edge e;
322 edge_iterator ei;
323 FOR_EACH_EDGE (e, ei, bb->preds)
325 basic_block pred = e->src;
327 if (!bb_in_loop_p (loop, pred))
329 if (dump_file)
330 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
331 loop->loop_no, pred->index,
332 e->dest->index);
333 vec_safe_push (loop->incoming, e);
338 if (!process_incoming_edges (loop))
340 if (dump_file)
341 fprintf (dump_file,
342 ";; retrying loop %d with forwarder blocks\n",
343 loop->loop_no);
344 if (!add_forwarder_blocks (loop))
346 if (dump_file)
347 fprintf (dump_file, ";; No forwarder blocks found\n");
348 loop->bad = true;
350 else if (!process_incoming_edges (loop))
352 if (dump_file)
353 fprintf (dump_file,
354 ";; can't find suitable entry for loop %d\n",
355 loop->loop_no);
361 /* Analyze the structure of the loops in the current function. Use
362 LOOP_STACK for bitmap allocations. Returns all the valid candidates for
363 hardware loops found in this function. HOOKS is the argument
364 passed to reorg_loops, used here to find the iteration registers
365 from a loop_end pattern. */
366 static hwloop_info
367 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
369 hwloop_info loops = NULL;
370 hwloop_info loop;
371 basic_block bb;
372 int nloops = 0;
374 /* Find all the possible loop tails. This means searching for every
375 loop_end instruction. For each one found, create a hwloop_info
376 structure and add the head block to the work list. */
377 FOR_EACH_BB_FN (bb, cfun)
379 rtx_insn *tail = BB_END (bb);
380 rtx_insn *insn;
381 rtx reg;
383 while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
384 tail = PREV_INSN (tail);
386 if (tail == NULL_RTX)
387 continue;
389 if (!JUMP_P (tail))
390 continue;
391 reg = hooks->end_pattern_reg (tail);
392 if (reg == NULL_RTX)
393 continue;
395 /* A possible loop end */
397 /* There's a degenerate case we can handle - an empty loop consisting
398 of only a back branch. Handle that by deleting the branch. */
399 insn = JUMP_LABEL_AS_INSN (tail);
400 while (insn && !NONDEBUG_INSN_P (insn))
401 insn = NEXT_INSN (insn);
402 if (insn == tail)
404 basic_block succ = FALLTHRU_EDGE (bb)->dest;
405 if (dump_file)
407 fprintf (dump_file, ";; degenerate loop ending at\n");
408 print_rtl_single (dump_file, tail);
410 if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
412 if (dump_file)
413 fprintf (dump_file, ";; deleting it\n");
414 delete_insn_and_edges (tail);
415 continue;
419 loop = XCNEW (struct hwloop_info_d);
420 loop->next = loops;
421 loops = loop;
422 loop->loop_no = nloops++;
423 loop->blocks.create (20);
424 loop->block_bitmap = BITMAP_ALLOC (loop_stack);
426 if (dump_file)
428 fprintf (dump_file, ";; potential loop %d ending at\n",
429 loop->loop_no);
430 print_rtl_single (dump_file, tail);
433 discover_loop (loop, bb, tail, reg);
436 /* Compute loop nestings. Given two loops A and B, either the sets
437 of their blocks don't intersect at all, or one is the subset of
438 the other, or the blocks don't form a good nesting structure. */
439 for (loop = loops; loop; loop = loop->next)
441 hwloop_info other;
443 if (loop->bad)
444 continue;
446 for (other = loops; other; other = other->next)
448 if (other->bad)
449 continue;
451 if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
452 continue;
453 if (!bitmap_intersect_compl_p (other->block_bitmap,
454 loop->block_bitmap))
455 loop->loops.safe_push (other);
456 else if (!bitmap_intersect_compl_p (loop->block_bitmap,
457 other->block_bitmap))
458 other->loops.safe_push (loop);
459 else
461 if (dump_file)
462 fprintf (dump_file,
463 ";; can't find suitable nesting for loops %d and %d\n",
464 loop->loop_no, other->loop_no);
465 loop->bad = other->bad = true;
470 if (dump_file)
471 dump_hwloops (loops);
473 return loops;
476 /* Free up the loop structures in LOOPS. */
477 static void
478 free_loops (hwloop_info loops)
480 while (loops)
482 hwloop_info loop = loops;
483 loops = loop->next;
484 loop->loops.release ();
485 loop->blocks.release ();
486 BITMAP_FREE (loop->block_bitmap);
487 XDELETE (loop);
491 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
493 /* Initialize the aux fields to give ascending indices to basic blocks. */
494 static void
495 set_bb_indices (void)
497 basic_block bb;
498 intptr_t index;
500 index = 0;
501 FOR_EACH_BB_FN (bb, cfun)
502 bb->aux = (void *) index++;
505 /* The taken-branch edge from the loop end can actually go forward.
506 If the target's hardware loop support requires that the loop end be
507 after the loop start, try to reorder a loop's basic blocks when we
508 find such a case.
509 This is not very aggressive; it only moves at most one block. It
510 does not introduce new branches into loops; it may remove them, or
511 it may switch fallthru/jump edges. */
512 static void
513 reorder_loops (hwloop_info loops)
515 basic_block bb;
516 hwloop_info loop;
518 cfg_layout_initialize (0);
520 set_bb_indices ();
522 for (loop = loops; loop; loop = loop->next)
524 edge e;
525 edge_iterator ei;
527 if (loop->bad)
528 continue;
530 if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
531 continue;
533 FOR_EACH_EDGE (e, ei, loop->head->succs)
535 if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
536 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
538 basic_block start_bb = e->dest;
539 basic_block start_prev_bb = start_bb->prev_bb;
541 if (dump_file)
542 fprintf (dump_file, ";; Moving block %d before block %d\n",
543 loop->head->index, start_bb->index);
544 loop->head->prev_bb->next_bb = loop->head->next_bb;
545 loop->head->next_bb->prev_bb = loop->head->prev_bb;
547 loop->head->prev_bb = start_prev_bb;
548 loop->head->next_bb = start_bb;
549 start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
551 set_bb_indices ();
552 break;
555 loops = loops->next;
558 FOR_EACH_BB_FN (bb, cfun)
560 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
561 bb->aux = bb->next_bb;
562 else
563 bb->aux = NULL;
565 cfg_layout_finalize ();
566 clear_aux_for_blocks ();
567 df_analyze ();
570 /* Call the OPT function for LOOP and all of its sub-loops. This is
571 done in a depth-first search; innermost loops are visited first.
572 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
573 target's reorg pass. */
574 static void
575 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
577 int ix;
578 hwloop_info inner;
579 int inner_depth = 0;
581 if (loop->visited)
582 return;
584 loop->visited = 1;
586 if (loop->bad)
588 if (dump_file)
589 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
590 goto bad_loop;
593 /* Every loop contains in its list of inner loops every loop nested inside
594 it, even if there are intermediate loops. This works because we're doing
595 a depth-first search here and never visit a loop more than once.
596 Recursion depth is effectively limited by the number of available
597 hardware registers. */
598 for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
600 optimize_loop (inner, hooks);
602 if (!inner->bad && inner_depth < inner->depth)
603 inner_depth = inner->depth;
604 /* The set of registers may be changed while optimizing the inner
605 loop. */
606 IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
609 loop->depth = inner_depth + 1;
611 if (hooks->opt (loop))
612 return;
614 bad_loop:
615 if (dump_file)
616 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
618 loop->bad = true;
619 hooks->fail (loop);
622 /* This function can be used from a port's machine_dependent_reorg to
623 find and analyze loops that end in loop_end instructions. It uses
624 a set of function pointers in HOOKS to call back into the
625 target-specific functions to perform the actual machine-specific
626 transformations.
628 Such transformations typically involve additional set-up
629 instructions before the loop, to define loop bounds or set up a
630 special loop counter register.
632 DO_REORDER should be set to true if we should try to use the
633 reorder_loops function to ensure the loop end occurs after the loop
634 start. This is for use by targets where the loop hardware requires
635 this condition.
637 HOOKS is used to pass in target specific hooks; see
638 hw-doloop.h. */
639 void
640 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
642 hwloop_info loops = NULL;
643 hwloop_info loop;
644 bitmap_obstack loop_stack;
646 df_live_add_problem ();
647 df_live_set_all_dirty ();
648 df_analyze ();
650 bitmap_obstack_initialize (&loop_stack);
652 if (dump_file)
653 fprintf (dump_file, ";; Find loops, first pass\n\n");
655 loops = discover_loops (&loop_stack, hooks);
657 /* We can't enter cfglayout mode anymore if basic block partitioning
658 already happened. */
659 if (do_reorder && !flag_reorder_blocks_and_partition)
661 reorder_loops (loops);
662 free_loops (loops);
664 if (dump_file)
665 fprintf (dump_file, ";; Find loops, second pass\n\n");
667 loops = discover_loops (&loop_stack, hooks);
670 for (loop = loops; loop; loop = loop->next)
671 scan_loop (loop);
673 /* Now apply the optimizations. */
674 for (loop = loops; loop; loop = loop->next)
675 optimize_loop (loop, hooks);
677 if (dump_file)
679 fprintf (dump_file, ";; After hardware loops optimization:\n\n");
680 dump_hwloops (loops);
683 free_loops (loops);
684 bitmap_obstack_release (&loop_stack);
686 if (dump_file)
687 print_rtl (dump_file, get_insns ());
689 #endif