gcc/
[official-gcc.git] / gcc / hw-doloop.c
blobe00c3d75a8ccfdd65fc8bb72c5580e50c56f4b11
1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "flags.h"
27 #include "symtab.h"
28 #include "hard-reg-set.h"
29 #include "function.h"
30 #include "alias.h"
31 #include "tree.h"
32 #include "insn-config.h"
33 #include "expmed.h"
34 #include "dojump.h"
35 #include "explow.h"
36 #include "calls.h"
37 #include "emit-rtl.h"
38 #include "varasm.h"
39 #include "stmt.h"
40 #include "expr.h"
41 #include "regs.h"
42 #include "predict.h"
43 #include "dominance.h"
44 #include "cfg.h"
45 #include "cfgrtl.h"
46 #include "basic-block.h"
47 #include "tm_p.h"
48 #include "df.h"
49 #include "cfgloop.h"
50 #include "recog.h"
51 #include "target.h"
52 #include "hw-doloop.h"
53 #include "dumpfile.h"
55 #ifdef HAVE_doloop_end
57 /* Dump information collected in LOOPS. */
58 static void
59 dump_hwloops (hwloop_info loops)
61 hwloop_info loop;
63 for (loop = loops; loop; loop = loop->next)
65 hwloop_info i;
66 basic_block b;
67 unsigned ix;
69 fprintf (dump_file, ";; loop %d: ", loop->loop_no);
70 if (loop->bad)
71 fprintf (dump_file, "(bad) ");
72 fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
73 loop->head == NULL ? -1 : loop->head->index,
74 loop->depth, REGNO (loop->iter_reg));
76 fprintf (dump_file, " blocks: [ ");
77 for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
78 fprintf (dump_file, "%d ", b->index);
79 fprintf (dump_file, "] ");
81 fprintf (dump_file, " inner loops: [ ");
82 for (ix = 0; loop->loops.iterate (ix, &i); ix++)
83 fprintf (dump_file, "%d ", i->loop_no);
84 fprintf (dump_file, "]\n");
86 fprintf (dump_file, "\n");
89 /* Return true if BB is part of LOOP. */
90 static bool
91 bb_in_loop_p (hwloop_info loop, basic_block bb)
93 return bitmap_bit_p (loop->block_bitmap, bb->index);
96 /* Scan the blocks of LOOP (and its inferiors), and record things such as
97 hard registers set, jumps out of and within the loop. */
98 static void
99 scan_loop (hwloop_info loop)
101 unsigned ix;
102 basic_block bb;
104 if (loop->bad)
105 return;
107 if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
108 REGNO (loop->iter_reg)))
109 loop->iter_reg_used_outside = true;
111 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
113 rtx_insn *insn;
114 edge e;
115 edge_iterator ei;
117 if (bb != loop->tail)
118 FOR_EACH_EDGE (e, ei, bb->succs)
120 if (bb_in_loop_p (loop, e->dest))
122 if (!(e->flags & EDGE_FALLTHRU))
123 loop->jumps_within = true;
125 else
127 loop->jumps_outof = true;
128 if (!loop->bad)
129 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
130 REGNO (loop->iter_reg)));
134 for (insn = BB_HEAD (bb);
135 insn != NEXT_INSN (BB_END (bb));
136 insn = NEXT_INSN (insn))
138 df_ref def;
139 HARD_REG_SET set_this_insn;
141 if (!NONDEBUG_INSN_P (insn))
142 continue;
144 if (recog_memoized (insn) < 0
145 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
146 || asm_noperands (PATTERN (insn)) >= 0))
147 loop->has_asm = true;
149 CLEAR_HARD_REG_SET (set_this_insn);
150 FOR_EACH_INSN_DEF (def, insn)
152 rtx dreg = DF_REF_REG (def);
154 if (!REG_P (dreg))
155 continue;
157 add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
158 REGNO (dreg));
161 if (insn == loop->loop_end)
162 CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
163 else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
164 loop->iter_reg_used = true;
165 IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
170 /* Compute the incoming_dest and incoming_src members of LOOP by
171 identifying the edges that jump into the loop. If there is more
172 than one block that jumps into the loop, incoming_src will be set
173 to NULL; likewise, if there is more than one block in the loop that
174 is the destination of an incoming edge, incoming_dest will be NULL.
176 Return true if either of these two fields is nonnull, false
177 otherwise. */
178 static bool
179 process_incoming_edges (hwloop_info loop)
181 edge e;
182 edge_iterator ei;
183 bool first = true;
185 FOR_EACH_EDGE (e, ei, loop->incoming)
187 if (first)
189 loop->incoming_src = e->src;
190 loop->incoming_dest = e->dest;
191 first = false;
193 else
195 if (e->dest != loop->incoming_dest)
196 loop->incoming_dest = NULL;
197 if (e->src != loop->incoming_src)
198 loop->incoming_src = NULL;
201 if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
202 return false;
204 return true;
207 /* Try to identify a forwarder block that jump into LOOP, and add it to
208 the set of blocks in the loop, updating the vector of incoming blocks as
209 well. This transformation gives a second chance to loops we couldn't
210 otherwise handle by increasing the chance that we'll end up with one
211 incoming_src block.
212 Return true if we made a change, false otherwise. */
213 static bool
214 add_forwarder_blocks (hwloop_info loop)
216 edge e;
217 edge_iterator ei;
219 FOR_EACH_EDGE (e, ei, loop->incoming)
221 if (forwarder_block_p (e->src))
223 edge e2;
224 edge_iterator ei2;
226 if (dump_file)
227 fprintf (dump_file,
228 ";; Adding forwarder block %d to loop %d and retrying\n",
229 e->src->index, loop->loop_no);
230 loop->blocks.safe_push (e->src);
231 bitmap_set_bit (loop->block_bitmap, e->src->index);
232 FOR_EACH_EDGE (e2, ei2, e->src->preds)
233 vec_safe_push (loop->incoming, e2);
234 loop->incoming->unordered_remove (ei.index);
235 return true;
238 return false;
241 /* Called from reorg_loops when a potential loop end is found. LOOP is
242 a newly set up structure describing the loop, it is this function's
243 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
244 loop_end insn and its enclosing basic block. REG is the loop counter
245 register.
246 For our purposes, a loop is defined by the set of blocks reachable from
247 the loop head in which the loop counter register is live. This matches
248 the expected use; targets that call into this code usually replace the
249 loop counter with a different special register. */
250 static void
251 discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg)
253 bool found_tail;
254 unsigned dwork = 0;
255 basic_block bb;
257 loop->tail = tail_bb;
258 loop->loop_end = tail_insn;
259 loop->iter_reg = reg;
260 vec_alloc (loop->incoming, 2);
261 loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn));
263 if (EDGE_COUNT (tail_bb->succs) != 2)
265 loop->bad = true;
266 return;
268 loop->head = BRANCH_EDGE (tail_bb)->dest;
269 loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
271 auto_vec<basic_block, 20> works;
272 works.safe_push (loop->head);
274 found_tail = false;
275 for (dwork = 0; works.iterate (dwork, &bb); dwork++)
277 edge e;
278 edge_iterator ei;
279 if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
281 /* We've reached the exit block. The loop must be bad. */
282 if (dump_file)
283 fprintf (dump_file,
284 ";; Loop is bad - reached exit block while scanning\n");
285 loop->bad = true;
286 break;
289 if (bitmap_bit_p (loop->block_bitmap, bb->index))
290 continue;
292 /* We've not seen this block before. Add it to the loop's
293 list and then add each successor to the work list. */
295 loop->blocks.safe_push (bb);
296 bitmap_set_bit (loop->block_bitmap, bb->index);
298 if (bb == tail_bb)
299 found_tail = true;
300 else
302 FOR_EACH_EDGE (e, ei, bb->succs)
304 basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
305 if (REGNO_REG_SET_P (df_get_live_in (succ),
306 REGNO (loop->iter_reg)))
307 works.safe_push (succ);
312 if (!found_tail)
313 loop->bad = true;
315 /* Find the predecessor, and make sure nothing else jumps into this loop. */
316 if (!loop->bad)
318 FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
320 edge e;
321 edge_iterator ei;
322 FOR_EACH_EDGE (e, ei, bb->preds)
324 basic_block pred = e->src;
326 if (!bb_in_loop_p (loop, pred))
328 if (dump_file)
329 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
330 loop->loop_no, pred->index,
331 e->dest->index);
332 vec_safe_push (loop->incoming, e);
337 if (!process_incoming_edges (loop))
339 if (dump_file)
340 fprintf (dump_file,
341 ";; retrying loop %d with forwarder blocks\n",
342 loop->loop_no);
343 if (!add_forwarder_blocks (loop))
345 if (dump_file)
346 fprintf (dump_file, ";; No forwarder blocks found\n");
347 loop->bad = true;
349 else if (!process_incoming_edges (loop))
351 if (dump_file)
352 fprintf (dump_file,
353 ";; can't find suitable entry for loop %d\n",
354 loop->loop_no);
360 /* Analyze the structure of the loops in the current function. Use
361 LOOP_STACK for bitmap allocations. Returns all the valid candidates for
362 hardware loops found in this function. HOOKS is the argument
363 passed to reorg_loops, used here to find the iteration registers
364 from a loop_end pattern. */
365 static hwloop_info
366 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
368 hwloop_info loops = NULL;
369 hwloop_info loop;
370 basic_block bb;
371 int nloops = 0;
373 /* Find all the possible loop tails. This means searching for every
374 loop_end instruction. For each one found, create a hwloop_info
375 structure and add the head block to the work list. */
376 FOR_EACH_BB_FN (bb, cfun)
378 rtx_insn *tail = BB_END (bb);
379 rtx_insn *insn;
380 rtx reg;
382 while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
383 tail = PREV_INSN (tail);
385 if (tail == NULL_RTX)
386 continue;
388 if (!JUMP_P (tail))
389 continue;
390 reg = hooks->end_pattern_reg (tail);
391 if (reg == NULL_RTX)
392 continue;
394 /* A possible loop end */
396 /* There's a degenerate case we can handle - an empty loop consisting
397 of only a back branch. Handle that by deleting the branch. */
398 insn = JUMP_LABEL_AS_INSN (tail);
399 while (insn && !NONDEBUG_INSN_P (insn))
400 insn = NEXT_INSN (insn);
401 if (insn == tail)
403 basic_block succ = FALLTHRU_EDGE (bb)->dest;
404 if (dump_file)
406 fprintf (dump_file, ";; degenerate loop ending at\n");
407 print_rtl_single (dump_file, tail);
409 if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
411 if (dump_file)
412 fprintf (dump_file, ";; deleting it\n");
413 delete_insn_and_edges (tail);
414 continue;
418 loop = XCNEW (struct hwloop_info_d);
419 loop->next = loops;
420 loops = loop;
421 loop->loop_no = nloops++;
422 loop->blocks.create (20);
423 loop->block_bitmap = BITMAP_ALLOC (loop_stack);
425 if (dump_file)
427 fprintf (dump_file, ";; potential loop %d ending at\n",
428 loop->loop_no);
429 print_rtl_single (dump_file, tail);
432 discover_loop (loop, bb, tail, reg);
435 /* Compute loop nestings. Given two loops A and B, either the sets
436 of their blocks don't intersect at all, or one is the subset of
437 the other, or the blocks don't form a good nesting structure. */
438 for (loop = loops; loop; loop = loop->next)
440 hwloop_info other;
442 if (loop->bad)
443 continue;
445 for (other = loops; other; other = other->next)
447 if (other->bad)
448 continue;
450 if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
451 continue;
452 if (!bitmap_intersect_compl_p (other->block_bitmap,
453 loop->block_bitmap))
454 loop->loops.safe_push (other);
455 else if (!bitmap_intersect_compl_p (loop->block_bitmap,
456 other->block_bitmap))
457 other->loops.safe_push (loop);
458 else
460 if (dump_file)
461 fprintf (dump_file,
462 ";; can't find suitable nesting for loops %d and %d\n",
463 loop->loop_no, other->loop_no);
464 loop->bad = other->bad = true;
469 if (dump_file)
470 dump_hwloops (loops);
472 return loops;
475 /* Free up the loop structures in LOOPS. */
476 static void
477 free_loops (hwloop_info loops)
479 while (loops)
481 hwloop_info loop = loops;
482 loops = loop->next;
483 loop->loops.release ();
484 loop->blocks.release ();
485 BITMAP_FREE (loop->block_bitmap);
486 XDELETE (loop);
490 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
492 /* Initialize the aux fields to give ascending indices to basic blocks. */
493 static void
494 set_bb_indices (void)
496 basic_block bb;
497 intptr_t index;
499 index = 0;
500 FOR_EACH_BB_FN (bb, cfun)
501 bb->aux = (void *) index++;
504 /* The taken-branch edge from the loop end can actually go forward.
505 If the target's hardware loop support requires that the loop end be
506 after the loop start, try to reorder a loop's basic blocks when we
507 find such a case.
508 This is not very aggressive; it only moves at most one block. It
509 does not introduce new branches into loops; it may remove them, or
510 it may switch fallthru/jump edges. */
511 static void
512 reorder_loops (hwloop_info loops)
514 basic_block bb;
515 hwloop_info loop;
517 cfg_layout_initialize (0);
519 set_bb_indices ();
521 for (loop = loops; loop; loop = loop->next)
523 edge e;
524 edge_iterator ei;
526 if (loop->bad)
527 continue;
529 if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
530 continue;
532 FOR_EACH_EDGE (e, ei, loop->head->succs)
534 if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
535 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
537 basic_block start_bb = e->dest;
538 basic_block start_prev_bb = start_bb->prev_bb;
540 if (dump_file)
541 fprintf (dump_file, ";; Moving block %d before block %d\n",
542 loop->head->index, start_bb->index);
543 loop->head->prev_bb->next_bb = loop->head->next_bb;
544 loop->head->next_bb->prev_bb = loop->head->prev_bb;
546 loop->head->prev_bb = start_prev_bb;
547 loop->head->next_bb = start_bb;
548 start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
550 set_bb_indices ();
551 break;
554 loops = loops->next;
557 FOR_EACH_BB_FN (bb, cfun)
559 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
560 bb->aux = bb->next_bb;
561 else
562 bb->aux = NULL;
564 cfg_layout_finalize ();
565 clear_aux_for_blocks ();
566 df_analyze ();
569 /* Call the OPT function for LOOP and all of its sub-loops. This is
570 done in a depth-first search; innermost loops are visited first.
571 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
572 target's reorg pass. */
573 static void
574 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
576 int ix;
577 hwloop_info inner;
578 int inner_depth = 0;
580 if (loop->visited)
581 return;
583 loop->visited = 1;
585 if (loop->bad)
587 if (dump_file)
588 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
589 goto bad_loop;
592 /* Every loop contains in its list of inner loops every loop nested inside
593 it, even if there are intermediate loops. This works because we're doing
594 a depth-first search here and never visit a loop more than once.
595 Recursion depth is effectively limited by the number of available
596 hardware registers. */
597 for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
599 optimize_loop (inner, hooks);
601 if (!inner->bad && inner_depth < inner->depth)
602 inner_depth = inner->depth;
603 /* The set of registers may be changed while optimizing the inner
604 loop. */
605 IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
608 loop->depth = inner_depth + 1;
610 if (hooks->opt (loop))
611 return;
613 bad_loop:
614 if (dump_file)
615 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
617 loop->bad = true;
618 hooks->fail (loop);
621 /* This function can be used from a port's machine_dependent_reorg to
622 find and analyze loops that end in loop_end instructions. It uses
623 a set of function pointers in HOOKS to call back into the
624 target-specific functions to perform the actual machine-specific
625 transformations.
627 Such transformations typically involve additional set-up
628 instructions before the loop, to define loop bounds or set up a
629 special loop counter register.
631 DO_REORDER should be set to true if we should try to use the
632 reorder_loops function to ensure the loop end occurs after the loop
633 start. This is for use by targets where the loop hardware requires
634 this condition.
636 HOOKS is used to pass in target specific hooks; see
637 hw-doloop.h. */
638 void
639 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
641 hwloop_info loops = NULL;
642 hwloop_info loop;
643 bitmap_obstack loop_stack;
645 df_live_add_problem ();
646 df_live_set_all_dirty ();
647 df_analyze ();
649 bitmap_obstack_initialize (&loop_stack);
651 if (dump_file)
652 fprintf (dump_file, ";; Find loops, first pass\n\n");
654 loops = discover_loops (&loop_stack, hooks);
656 /* We can't enter cfglayout mode anymore if basic block partitioning
657 already happened. */
658 if (do_reorder && !flag_reorder_blocks_and_partition)
660 reorder_loops (loops);
661 free_loops (loops);
663 if (dump_file)
664 fprintf (dump_file, ";; Find loops, second pass\n\n");
666 loops = discover_loops (&loop_stack, hooks);
669 for (loop = loops; loop; loop = loop->next)
670 scan_loop (loop);
672 /* Now apply the optimizations. */
673 for (loop = loops; loop; loop = loop->next)
674 optimize_loop (loop, hooks);
676 if (dump_file)
678 fprintf (dump_file, ";; After hardware loops optimization:\n\n");
679 dump_hwloops (loops);
682 free_loops (loops);
683 bitmap_obstack_release (&loop_stack);
685 if (dump_file)
686 print_rtl (dump_file, get_insns ());
688 #endif