2015-09-25 Vladimir Makarov <vmakarov@redhat.com>
[official-gcc.git] / gcc / hw-doloop.c
blobeadbcd6d51a0e7bc8d6b9fee7c081f3936c3ac06
1 /* Code to analyze doloop loops in order for targets to perform late
2 optimizations converting doloops to other forms of hardware loops.
3 Copyright (C) 2011-2015 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "tree.h"
26 #include "rtl.h"
27 #include "df.h"
28 #include "flags.h"
29 #include "alias.h"
30 #include "insn-config.h"
31 #include "expmed.h"
32 #include "dojump.h"
33 #include "explow.h"
34 #include "calls.h"
35 #include "emit-rtl.h"
36 #include "varasm.h"
37 #include "stmt.h"
38 #include "expr.h"
39 #include "regs.h"
40 #include "cfgrtl.h"
41 #include "tm_p.h"
42 #include "cfgloop.h"
43 #include "recog.h"
44 #include "target.h"
45 #include "hw-doloop.h"
46 #include "dumpfile.h"
48 /* Dump information collected in LOOPS. */
49 static void
50 dump_hwloops (hwloop_info loops)
52 hwloop_info loop;
54 for (loop = loops; loop; loop = loop->next)
56 hwloop_info i;
57 basic_block b;
58 unsigned ix;
60 fprintf (dump_file, ";; loop %d: ", loop->loop_no);
61 if (loop->bad)
62 fprintf (dump_file, "(bad) ");
63 fprintf (dump_file, "{head:%d, depth:%d, reg:%u}",
64 loop->head == NULL ? -1 : loop->head->index,
65 loop->depth, REGNO (loop->iter_reg));
67 fprintf (dump_file, " blocks: [ ");
68 for (ix = 0; loop->blocks.iterate (ix, &b); ix++)
69 fprintf (dump_file, "%d ", b->index);
70 fprintf (dump_file, "] ");
72 fprintf (dump_file, " inner loops: [ ");
73 for (ix = 0; loop->loops.iterate (ix, &i); ix++)
74 fprintf (dump_file, "%d ", i->loop_no);
75 fprintf (dump_file, "]\n");
77 fprintf (dump_file, "\n");
80 /* Return true if BB is part of LOOP. */
81 static bool
82 bb_in_loop_p (hwloop_info loop, basic_block bb)
84 return bitmap_bit_p (loop->block_bitmap, bb->index);
87 /* Scan the blocks of LOOP (and its inferiors), and record things such as
88 hard registers set, jumps out of and within the loop. */
89 static void
90 scan_loop (hwloop_info loop)
92 unsigned ix;
93 basic_block bb;
95 if (loop->bad)
96 return;
98 if (REGNO_REG_SET_P (df_get_live_in (loop->successor),
99 REGNO (loop->iter_reg)))
100 loop->iter_reg_used_outside = true;
102 for (ix = 0; loop->blocks.iterate (ix, &bb); ix++)
104 rtx_insn *insn;
105 edge e;
106 edge_iterator ei;
108 if (bb != loop->tail)
109 FOR_EACH_EDGE (e, ei, bb->succs)
111 if (bb_in_loop_p (loop, e->dest))
113 if (!(e->flags & EDGE_FALLTHRU))
114 loop->jumps_within = true;
116 else
118 loop->jumps_outof = true;
119 if (!loop->bad)
120 gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest),
121 REGNO (loop->iter_reg)));
125 for (insn = BB_HEAD (bb);
126 insn != NEXT_INSN (BB_END (bb));
127 insn = NEXT_INSN (insn))
129 df_ref def;
130 HARD_REG_SET set_this_insn;
132 if (!NONDEBUG_INSN_P (insn))
133 continue;
135 if (recog_memoized (insn) < 0
136 && (GET_CODE (PATTERN (insn)) == ASM_INPUT
137 || asm_noperands (PATTERN (insn)) >= 0))
138 loop->has_asm = true;
140 CLEAR_HARD_REG_SET (set_this_insn);
141 FOR_EACH_INSN_DEF (def, insn)
143 rtx dreg = DF_REF_REG (def);
145 if (!REG_P (dreg))
146 continue;
148 add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg),
149 REGNO (dreg));
152 if (insn == loop->loop_end)
153 CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg));
154 else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn)))
155 loop->iter_reg_used = true;
156 IOR_HARD_REG_SET (loop->regs_set_in_loop, set_this_insn);
161 /* Compute the incoming_dest and incoming_src members of LOOP by
162 identifying the edges that jump into the loop. If there is more
163 than one block that jumps into the loop, incoming_src will be set
164 to NULL; likewise, if there is more than one block in the loop that
165 is the destination of an incoming edge, incoming_dest will be NULL.
167 Return true if either of these two fields is nonnull, false
168 otherwise. */
169 static bool
170 process_incoming_edges (hwloop_info loop)
172 edge e;
173 edge_iterator ei;
174 bool first = true;
176 FOR_EACH_EDGE (e, ei, loop->incoming)
178 if (first)
180 loop->incoming_src = e->src;
181 loop->incoming_dest = e->dest;
182 first = false;
184 else
186 if (e->dest != loop->incoming_dest)
187 loop->incoming_dest = NULL;
188 if (e->src != loop->incoming_src)
189 loop->incoming_src = NULL;
192 if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
193 return false;
195 return true;
198 /* Try to identify a forwarder block that jump into LOOP, and add it to
199 the set of blocks in the loop, updating the vector of incoming blocks as
200 well. This transformation gives a second chance to loops we couldn't
201 otherwise handle by increasing the chance that we'll end up with one
202 incoming_src block.
203 Return true if we made a change, false otherwise. */
204 static bool
205 add_forwarder_blocks (hwloop_info loop)
207 edge e;
208 edge_iterator ei;
210 FOR_EACH_EDGE (e, ei, loop->incoming)
212 if (forwarder_block_p (e->src))
214 edge e2;
215 edge_iterator ei2;
217 if (dump_file)
218 fprintf (dump_file,
219 ";; Adding forwarder block %d to loop %d and retrying\n",
220 e->src->index, loop->loop_no);
221 loop->blocks.safe_push (e->src);
222 bitmap_set_bit (loop->block_bitmap, e->src->index);
223 FOR_EACH_EDGE (e2, ei2, e->src->preds)
224 vec_safe_push (loop->incoming, e2);
225 loop->incoming->unordered_remove (ei.index);
226 return true;
229 return false;
232 /* Called from reorg_loops when a potential loop end is found. LOOP is
233 a newly set up structure describing the loop, it is this function's
234 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
235 loop_end insn and its enclosing basic block. REG is the loop counter
236 register.
237 For our purposes, a loop is defined by the set of blocks reachable from
238 the loop head in which the loop counter register is live. This matches
239 the expected use; targets that call into this code usually replace the
240 loop counter with a different special register. */
241 static void
242 discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg)
244 bool found_tail;
245 unsigned dwork = 0;
246 basic_block bb;
248 loop->tail = tail_bb;
249 loop->loop_end = tail_insn;
250 loop->iter_reg = reg;
251 vec_alloc (loop->incoming, 2);
252 loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn));
254 if (EDGE_COUNT (tail_bb->succs) != 2)
256 loop->bad = true;
257 return;
259 loop->head = BRANCH_EDGE (tail_bb)->dest;
260 loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
262 auto_vec<basic_block, 20> works;
263 works.safe_push (loop->head);
265 found_tail = false;
266 for (dwork = 0; works.iterate (dwork, &bb); dwork++)
268 edge e;
269 edge_iterator ei;
270 if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
272 /* We've reached the exit block. The loop must be bad. */
273 if (dump_file)
274 fprintf (dump_file,
275 ";; Loop is bad - reached exit block while scanning\n");
276 loop->bad = true;
277 break;
280 if (bitmap_bit_p (loop->block_bitmap, bb->index))
281 continue;
283 /* We've not seen this block before. Add it to the loop's
284 list and then add each successor to the work list. */
286 loop->blocks.safe_push (bb);
287 bitmap_set_bit (loop->block_bitmap, bb->index);
289 if (bb == tail_bb)
290 found_tail = true;
291 else
293 FOR_EACH_EDGE (e, ei, bb->succs)
295 basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
296 if (REGNO_REG_SET_P (df_get_live_in (succ),
297 REGNO (loop->iter_reg)))
298 works.safe_push (succ);
303 if (!found_tail)
304 loop->bad = true;
306 /* Find the predecessor, and make sure nothing else jumps into this loop. */
307 if (!loop->bad)
309 FOR_EACH_VEC_ELT (loop->blocks, dwork, bb)
311 edge e;
312 edge_iterator ei;
313 FOR_EACH_EDGE (e, ei, bb->preds)
315 basic_block pred = e->src;
317 if (!bb_in_loop_p (loop, pred))
319 if (dump_file)
320 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
321 loop->loop_no, pred->index,
322 e->dest->index);
323 vec_safe_push (loop->incoming, e);
328 if (!process_incoming_edges (loop))
330 if (dump_file)
331 fprintf (dump_file,
332 ";; retrying loop %d with forwarder blocks\n",
333 loop->loop_no);
334 if (!add_forwarder_blocks (loop))
336 if (dump_file)
337 fprintf (dump_file, ";; No forwarder blocks found\n");
338 loop->bad = true;
340 else if (!process_incoming_edges (loop))
342 if (dump_file)
343 fprintf (dump_file,
344 ";; can't find suitable entry for loop %d\n",
345 loop->loop_no);
351 /* Analyze the structure of the loops in the current function. Use
352 LOOP_STACK for bitmap allocations. Returns all the valid candidates for
353 hardware loops found in this function. HOOKS is the argument
354 passed to reorg_loops, used here to find the iteration registers
355 from a loop_end pattern. */
356 static hwloop_info
357 discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks)
359 hwloop_info loops = NULL;
360 hwloop_info loop;
361 basic_block bb;
362 int nloops = 0;
364 /* Find all the possible loop tails. This means searching for every
365 loop_end instruction. For each one found, create a hwloop_info
366 structure and add the head block to the work list. */
367 FOR_EACH_BB_FN (bb, cfun)
369 rtx_insn *tail = BB_END (bb);
370 rtx_insn *insn;
371 rtx reg;
373 while (tail && NOTE_P (tail) && tail != BB_HEAD (bb))
374 tail = PREV_INSN (tail);
376 if (tail == NULL_RTX)
377 continue;
379 if (!JUMP_P (tail))
380 continue;
381 reg = hooks->end_pattern_reg (tail);
382 if (reg == NULL_RTX)
383 continue;
385 /* A possible loop end */
387 /* There's a degenerate case we can handle - an empty loop consisting
388 of only a back branch. Handle that by deleting the branch. */
389 insn = JUMP_LABEL_AS_INSN (tail);
390 while (insn && !NONDEBUG_INSN_P (insn))
391 insn = NEXT_INSN (insn);
392 if (insn == tail)
394 basic_block succ = FALLTHRU_EDGE (bb)->dest;
395 if (dump_file)
397 fprintf (dump_file, ";; degenerate loop ending at\n");
398 print_rtl_single (dump_file, tail);
400 if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg)))
402 if (dump_file)
403 fprintf (dump_file, ";; deleting it\n");
404 delete_insn_and_edges (tail);
405 continue;
409 loop = XCNEW (struct hwloop_info_d);
410 loop->next = loops;
411 loops = loop;
412 loop->loop_no = nloops++;
413 loop->blocks.create (20);
414 loop->block_bitmap = BITMAP_ALLOC (loop_stack);
416 if (dump_file)
418 fprintf (dump_file, ";; potential loop %d ending at\n",
419 loop->loop_no);
420 print_rtl_single (dump_file, tail);
423 discover_loop (loop, bb, tail, reg);
426 /* Compute loop nestings. Given two loops A and B, either the sets
427 of their blocks don't intersect at all, or one is the subset of
428 the other, or the blocks don't form a good nesting structure. */
429 for (loop = loops; loop; loop = loop->next)
431 hwloop_info other;
433 if (loop->bad)
434 continue;
436 for (other = loops; other; other = other->next)
438 if (other->bad)
439 continue;
441 if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap))
442 continue;
443 if (!bitmap_intersect_compl_p (other->block_bitmap,
444 loop->block_bitmap))
445 loop->loops.safe_push (other);
446 else if (!bitmap_intersect_compl_p (loop->block_bitmap,
447 other->block_bitmap))
448 other->loops.safe_push (loop);
449 else
451 if (dump_file)
452 fprintf (dump_file,
453 ";; can't find suitable nesting for loops %d and %d\n",
454 loop->loop_no, other->loop_no);
455 loop->bad = other->bad = true;
460 if (dump_file)
461 dump_hwloops (loops);
463 return loops;
466 /* Free up the loop structures in LOOPS. */
467 static void
468 free_loops (hwloop_info loops)
470 while (loops)
472 hwloop_info loop = loops;
473 loops = loop->next;
474 loop->loops.release ();
475 loop->blocks.release ();
476 BITMAP_FREE (loop->block_bitmap);
477 XDELETE (loop);
481 #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux)
483 /* Initialize the aux fields to give ascending indices to basic blocks. */
484 static void
485 set_bb_indices (void)
487 basic_block bb;
488 intptr_t index;
490 index = 0;
491 FOR_EACH_BB_FN (bb, cfun)
492 bb->aux = (void *) index++;
495 /* The taken-branch edge from the loop end can actually go forward.
496 If the target's hardware loop support requires that the loop end be
497 after the loop start, try to reorder a loop's basic blocks when we
498 find such a case.
499 This is not very aggressive; it only moves at most one block. It
500 does not introduce new branches into loops; it may remove them, or
501 it may switch fallthru/jump edges. */
502 static void
503 reorder_loops (hwloop_info loops)
505 basic_block bb;
506 hwloop_info loop;
508 cfg_layout_initialize (0);
510 set_bb_indices ();
512 for (loop = loops; loop; loop = loop->next)
514 edge e;
515 edge_iterator ei;
517 if (loop->bad)
518 continue;
520 if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail))
521 continue;
523 FOR_EACH_EDGE (e, ei, loop->head->succs)
525 if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
526 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
528 basic_block start_bb = e->dest;
529 basic_block start_prev_bb = start_bb->prev_bb;
531 if (dump_file)
532 fprintf (dump_file, ";; Moving block %d before block %d\n",
533 loop->head->index, start_bb->index);
534 loop->head->prev_bb->next_bb = loop->head->next_bb;
535 loop->head->next_bb->prev_bb = loop->head->prev_bb;
537 loop->head->prev_bb = start_prev_bb;
538 loop->head->next_bb = start_bb;
539 start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
541 set_bb_indices ();
542 break;
545 loops = loops->next;
548 FOR_EACH_BB_FN (bb, cfun)
550 if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
551 bb->aux = bb->next_bb;
552 else
553 bb->aux = NULL;
555 cfg_layout_finalize ();
556 clear_aux_for_blocks ();
557 df_analyze ();
560 /* Call the OPT function for LOOP and all of its sub-loops. This is
561 done in a depth-first search; innermost loops are visited first.
562 OPTIMIZE and FAIL are the functions passed to reorg_loops by the
563 target's reorg pass. */
564 static void
565 optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks)
567 int ix;
568 hwloop_info inner;
569 int inner_depth = 0;
571 if (loop->visited)
572 return;
574 loop->visited = 1;
576 if (loop->bad)
578 if (dump_file)
579 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
580 goto bad_loop;
583 /* Every loop contains in its list of inner loops every loop nested inside
584 it, even if there are intermediate loops. This works because we're doing
585 a depth-first search here and never visit a loop more than once.
586 Recursion depth is effectively limited by the number of available
587 hardware registers. */
588 for (ix = 0; loop->loops.iterate (ix, &inner); ix++)
590 optimize_loop (inner, hooks);
592 if (!inner->bad && inner_depth < inner->depth)
593 inner_depth = inner->depth;
594 /* The set of registers may be changed while optimizing the inner
595 loop. */
596 IOR_HARD_REG_SET (loop->regs_set_in_loop, inner->regs_set_in_loop);
599 loop->depth = inner_depth + 1;
601 if (hooks->opt (loop))
602 return;
604 bad_loop:
605 if (dump_file)
606 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
608 loop->bad = true;
609 hooks->fail (loop);
612 /* This function can be used from a port's machine_dependent_reorg to
613 find and analyze loops that end in loop_end instructions. It uses
614 a set of function pointers in HOOKS to call back into the
615 target-specific functions to perform the actual machine-specific
616 transformations.
618 Such transformations typically involve additional set-up
619 instructions before the loop, to define loop bounds or set up a
620 special loop counter register.
622 DO_REORDER should be set to true if we should try to use the
623 reorder_loops function to ensure the loop end occurs after the loop
624 start. This is for use by targets where the loop hardware requires
625 this condition.
627 HOOKS is used to pass in target specific hooks; see
628 hw-doloop.h. */
629 void
630 reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks)
632 hwloop_info loops = NULL;
633 hwloop_info loop;
634 bitmap_obstack loop_stack;
636 df_live_add_problem ();
637 df_live_set_all_dirty ();
638 df_analyze ();
640 bitmap_obstack_initialize (&loop_stack);
642 if (dump_file)
643 fprintf (dump_file, ";; Find loops, first pass\n\n");
645 loops = discover_loops (&loop_stack, hooks);
647 /* We can't enter cfglayout mode anymore if basic block partitioning
648 already happened. */
649 if (do_reorder && !flag_reorder_blocks_and_partition)
651 reorder_loops (loops);
652 free_loops (loops);
654 if (dump_file)
655 fprintf (dump_file, ";; Find loops, second pass\n\n");
657 loops = discover_loops (&loop_stack, hooks);
660 for (loop = loops; loop; loop = loop->next)
661 scan_loop (loop);
663 /* Now apply the optimizations. */
664 for (loop = loops; loop; loop = loop->next)
665 optimize_loop (loop, hooks);
667 if (dump_file)
669 fprintf (dump_file, ";; After hardware loops optimization:\n\n");
670 dump_hwloops (loops);
673 free_loops (loops);
674 bitmap_obstack_release (&loop_stack);
676 if (dump_file)
677 print_rtl (dump_file, get_insns ());