Merge branch 'master' into python
[official-gcc.git] / gcc / loop-doloop.c
blob4f0850b7702a5015ab71dc891ab7344199fa202b
1 /* Perform doloop optimizations
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010
3 Free Software Foundation, Inc.
4 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "flags.h"
28 #include "expr.h"
29 #include "hard-reg-set.h"
30 #include "basic-block.h"
31 #include "diagnostic-core.h"
32 #include "toplev.h"
33 #include "tm_p.h"
34 #include "cfgloop.h"
35 #include "output.h"
36 #include "params.h"
37 #include "target.h"
39 /* This module is used to modify loops with a determinable number of
40 iterations to use special low-overhead looping instructions.
42 It first validates whether the loop is well behaved and has a
43 determinable number of iterations (either at compile or run-time).
44 It then modifies the loop to use a low-overhead looping pattern as
45 follows:
47 1. A pseudo register is allocated as the loop iteration counter.
49 2. The number of loop iterations is calculated and is stored
50 in the loop counter.
52 3. At the end of the loop, the jump insn is replaced by the
53 doloop_end pattern. The compare must remain because it might be
54 used elsewhere. If the loop-variable or condition register are
55 used elsewhere, they will be eliminated by flow.
57 4. An optional doloop_begin pattern is inserted at the top of the
58 loop.
60 TODO The optimization should only performed when either the biv used for exit
61 condition is unused at all except for the exit test, or if we do not have to
62 change its value, since otherwise we have to add a new induction variable,
63 which usually will not pay up (unless the cost of the doloop pattern is
64 somehow extremely lower than the cost of compare & jump, or unless the bct
65 register cannot be used for anything else but doloop -- ??? detect these
66 cases). */
68 #ifdef HAVE_doloop_end
70 /* Return the loop termination condition for PATTERN or zero
71 if it is not a decrement and branch jump insn. */
73 rtx
74 doloop_condition_get (rtx doloop_pat)
76 rtx cmp;
77 rtx inc;
78 rtx reg;
79 rtx inc_src;
80 rtx condition;
81 rtx pattern;
83 /* The canonical doloop pattern we expect has one of the following
84 forms:
86 1) (parallel [(set (pc) (if_then_else (condition)
87 (label_ref (label))
88 (pc)))
89 (set (reg) (plus (reg) (const_int -1)))
90 (additional clobbers and uses)])
92 The branch must be the first entry of the parallel (also required
93 by jump.c), and the second entry of the parallel must be a set of
94 the loop counter register. Some targets (IA-64) wrap the set of
95 the loop counter in an if_then_else too.
97 2) (set (reg) (plus (reg) (const_int -1))
98 (set (pc) (if_then_else (reg != 0)
99 (label_ref (label))
100 (pc))). */
102 pattern = PATTERN (doloop_pat);
104 if (GET_CODE (pattern) != PARALLEL)
106 rtx cond;
107 rtx prev_insn = prev_nondebug_insn (doloop_pat);
109 /* We expect the decrement to immediately precede the branch. */
111 if (prev_insn == NULL_RTX || !INSN_P (prev_insn))
112 return 0;
114 cmp = pattern;
115 inc = PATTERN (PREV_INSN (doloop_pat));
116 /* We expect the condition to be of the form (reg != 0) */
117 cond = XEXP (SET_SRC (cmp), 0);
118 if (GET_CODE (cond) != NE || XEXP (cond, 1) != const0_rtx)
119 return 0;
122 else
124 cmp = XVECEXP (pattern, 0, 0);
125 inc = XVECEXP (pattern, 0, 1);
128 /* Check for (set (reg) (something)). */
129 if (GET_CODE (inc) != SET)
130 return 0;
131 reg = SET_DEST (inc);
132 if (! REG_P (reg))
133 return 0;
135 /* Check if something = (plus (reg) (const_int -1)).
136 On IA-64, this decrement is wrapped in an if_then_else. */
137 inc_src = SET_SRC (inc);
138 if (GET_CODE (inc_src) == IF_THEN_ELSE)
139 inc_src = XEXP (inc_src, 1);
140 if (GET_CODE (inc_src) != PLUS
141 || XEXP (inc_src, 0) != reg
142 || XEXP (inc_src, 1) != constm1_rtx)
143 return 0;
145 /* Check for (set (pc) (if_then_else (condition)
146 (label_ref (label))
147 (pc))). */
148 if (GET_CODE (cmp) != SET
149 || SET_DEST (cmp) != pc_rtx
150 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
151 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
152 || XEXP (SET_SRC (cmp), 2) != pc_rtx)
153 return 0;
155 /* Extract loop termination condition. */
156 condition = XEXP (SET_SRC (cmp), 0);
158 /* We expect a GE or NE comparison with 0 or 1. */
159 if ((GET_CODE (condition) != GE
160 && GET_CODE (condition) != NE)
161 || (XEXP (condition, 1) != const0_rtx
162 && XEXP (condition, 1) != const1_rtx))
163 return 0;
165 if ((XEXP (condition, 0) == reg)
166 || (GET_CODE (XEXP (condition, 0)) == PLUS
167 && XEXP (XEXP (condition, 0), 0) == reg))
169 if (GET_CODE (pattern) != PARALLEL)
170 /* The second form we expect:
172 (set (reg) (plus (reg) (const_int -1))
173 (set (pc) (if_then_else (reg != 0)
174 (label_ref (label))
175 (pc))).
177 is equivalent to the following:
179 (parallel [(set (pc) (if_then_else (reg != 1)
180 (label_ref (label))
181 (pc)))
182 (set (reg) (plus (reg) (const_int -1)))
183 (additional clobbers and uses)])
185 So we return that form instead.
187 condition = gen_rtx_fmt_ee (NE, VOIDmode, inc_src, const1_rtx);
189 return condition;
192 /* ??? If a machine uses a funny comparison, we could return a
193 canonicalized form here. */
195 return 0;
198 /* Return nonzero if the loop specified by LOOP is suitable for
199 the use of special low-overhead looping instructions. DESC
200 describes the number of iterations of the loop. */
202 static bool
203 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
205 basic_block *body = get_loop_body (loop), bb;
206 rtx insn;
207 unsigned i;
208 bool result = true;
210 /* Check for loops that may not terminate under special conditions. */
211 if (!desc->simple_p
212 || desc->assumptions
213 || desc->infinite)
215 /* There are some cases that would require a special attention.
216 For example if the comparison is LEU and the comparison value
217 is UINT_MAX then the loop will not terminate. Similarly, if the
218 comparison code is GEU and the comparison value is 0, the
219 loop will not terminate.
221 If the absolute increment is not 1, the loop can be infinite
222 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
224 ??? We could compute these conditions at run-time and have a
225 additional jump around the loop to ensure an infinite loop.
226 However, it is very unlikely that this is the intended
227 behavior of the loop and checking for these rare boundary
228 conditions would pessimize all other code.
230 If the loop is executed only a few times an extra check to
231 restart the loop could use up most of the benefits of using a
232 count register loop. Note however, that normally, this
233 restart branch would never execute, so it could be predicted
234 well by the CPU. We should generate the pessimistic code by
235 default, and have an option, e.g. -funsafe-loops that would
236 enable count-register loops in this case. */
237 if (dump_file)
238 fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
239 result = false;
240 goto cleanup;
243 for (i = 0; i < loop->num_nodes; i++)
245 bb = body[i];
247 for (insn = BB_HEAD (bb);
248 insn != NEXT_INSN (BB_END (bb));
249 insn = NEXT_INSN (insn))
251 /* Different targets have different necessities for low-overhead
252 looping. Call the back end for each instruction within the loop
253 to let it decide whether the insn prohibits a low-overhead loop.
254 It will then return the cause for it to emit to the dump file. */
255 const char * invalid = targetm.invalid_within_doloop (insn);
256 if (invalid)
258 if (dump_file)
259 fprintf (dump_file, "Doloop: %s\n", invalid);
260 result = false;
261 goto cleanup;
265 result = true;
267 cleanup:
268 free (body);
270 return result;
273 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
274 edge. If the condition is always false, do not do anything. If it is always
275 true, redirect E to DEST and return false. In all other cases, true is
276 returned. */
278 static bool
279 add_test (rtx cond, edge *e, basic_block dest)
281 rtx seq, jump, label;
282 enum machine_mode mode;
283 rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
284 enum rtx_code code = GET_CODE (cond);
285 basic_block bb;
287 mode = GET_MODE (XEXP (cond, 0));
288 if (mode == VOIDmode)
289 mode = GET_MODE (XEXP (cond, 1));
291 start_sequence ();
292 op0 = force_operand (op0, NULL_RTX);
293 op1 = force_operand (op1, NULL_RTX);
294 label = block_label (dest);
295 do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX,
296 NULL_RTX, label, -1);
298 jump = get_last_insn ();
299 if (!jump || !JUMP_P (jump))
301 /* The condition is always false and the jump was optimized out. */
302 end_sequence ();
303 return true;
306 seq = get_insns ();
307 end_sequence ();
309 /* There always is at least the jump insn in the sequence. */
310 gcc_assert (seq != NULL_RTX);
312 bb = split_edge_and_insert (*e, seq);
313 *e = single_succ_edge (bb);
315 if (any_uncondjump_p (jump))
317 /* The condition is always true. */
318 delete_insn (jump);
319 redirect_edge_and_branch_force (*e, dest);
320 return false;
323 JUMP_LABEL (jump) = label;
325 /* The jump is supposed to handle an unlikely special case. */
326 add_reg_note (jump, REG_BR_PROB, const0_rtx);
328 LABEL_NUSES (label)++;
330 make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
331 return true;
334 /* Modify the loop to use the low-overhead looping insn where LOOP
335 describes the loop, DESC describes the number of iterations of the
336 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
337 end of the loop. CONDITION is the condition separated from the
338 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP.
339 ZERO_EXTEND_P says to zero extend COUNT after the increment of it to
340 word_mode from FROM_MODE. */
342 static void
343 doloop_modify (struct loop *loop, struct niter_desc *desc,
344 rtx doloop_seq, rtx condition, rtx count,
345 bool zero_extend_p, enum machine_mode from_mode)
347 rtx counter_reg;
348 rtx tmp, noloop = NULL_RTX;
349 rtx sequence;
350 rtx jump_insn;
351 rtx jump_label;
352 int nonneg = 0;
353 bool increment_count;
354 basic_block loop_end = desc->out_edge->src;
355 enum machine_mode mode;
356 rtx true_prob_val;
358 jump_insn = BB_END (loop_end);
360 if (dump_file)
362 fprintf (dump_file, "Doloop: Inserting doloop pattern (");
363 if (desc->const_iter)
364 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
365 else
366 fputs ("runtime", dump_file);
367 fputs (" iterations).\n", dump_file);
370 /* Get the probability of the original branch. If it exists we would
371 need to update REG_BR_PROB of the new jump_insn. */
372 true_prob_val = find_reg_note (jump_insn, REG_BR_PROB, NULL_RTX);
374 /* Discard original jump to continue loop. The original compare
375 result may still be live, so it cannot be discarded explicitly. */
376 delete_insn (jump_insn);
378 counter_reg = XEXP (condition, 0);
379 if (GET_CODE (counter_reg) == PLUS)
380 counter_reg = XEXP (counter_reg, 0);
381 mode = GET_MODE (counter_reg);
383 increment_count = false;
384 switch (GET_CODE (condition))
386 case NE:
387 /* Currently only NE tests against zero and one are supported. */
388 noloop = XEXP (condition, 1);
389 if (noloop != const0_rtx)
391 gcc_assert (noloop == const1_rtx);
392 increment_count = true;
394 break;
396 case GE:
397 /* Currently only GE tests against zero are supported. */
398 gcc_assert (XEXP (condition, 1) == const0_rtx);
400 noloop = constm1_rtx;
402 /* The iteration count does not need incrementing for a GE test. */
403 increment_count = false;
405 /* Determine if the iteration counter will be non-negative.
406 Note that the maximum value loaded is iterations_max - 1. */
407 if (desc->niter_max
408 <= ((unsigned HOST_WIDEST_INT) 1
409 << (GET_MODE_BITSIZE (mode) - 1)))
410 nonneg = 1;
411 break;
413 /* Abort if an invalid doloop pattern has been generated. */
414 default:
415 gcc_unreachable ();
418 if (increment_count)
419 count = simplify_gen_binary (PLUS, from_mode, count, const1_rtx);
421 if (zero_extend_p)
422 count = simplify_gen_unary (ZERO_EXTEND, word_mode,
423 count, from_mode);
425 /* Insert initialization of the count register into the loop header. */
426 start_sequence ();
427 tmp = force_operand (count, counter_reg);
428 convert_move (counter_reg, tmp, 1);
429 sequence = get_insns ();
430 end_sequence ();
431 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
433 if (desc->noloop_assumptions)
435 rtx ass = copy_rtx (desc->noloop_assumptions);
436 basic_block preheader = loop_preheader_edge (loop)->src;
437 basic_block set_zero
438 = split_edge (loop_preheader_edge (loop));
439 basic_block new_preheader
440 = split_edge (loop_preheader_edge (loop));
441 edge te;
443 /* Expand the condition testing the assumptions and if it does not pass,
444 reset the count register to 0. */
445 redirect_edge_and_branch_force (single_succ_edge (preheader), new_preheader);
446 set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
448 set_zero->count = 0;
449 set_zero->frequency = 0;
451 te = single_succ_edge (preheader);
452 for (; ass; ass = XEXP (ass, 1))
453 if (!add_test (XEXP (ass, 0), &te, set_zero))
454 break;
456 if (ass)
458 /* We reached a condition that is always true. This is very hard to
459 reproduce (such a loop does not roll, and thus it would most
460 likely get optimized out by some of the preceding optimizations).
461 In fact, I do not have any testcase for it. However, it would
462 also be very hard to show that it is impossible, so we must
463 handle this case. */
464 set_zero->count = preheader->count;
465 set_zero->frequency = preheader->frequency;
468 if (EDGE_COUNT (set_zero->preds) == 0)
470 /* All the conditions were simplified to false, remove the
471 unreachable set_zero block. */
472 delete_basic_block (set_zero);
474 else
476 /* Reset the counter to zero in the set_zero block. */
477 start_sequence ();
478 convert_move (counter_reg, noloop, 0);
479 sequence = get_insns ();
480 end_sequence ();
481 emit_insn_after (sequence, BB_END (set_zero));
483 set_immediate_dominator (CDI_DOMINATORS, set_zero,
484 recompute_dominator (CDI_DOMINATORS,
485 set_zero));
488 set_immediate_dominator (CDI_DOMINATORS, new_preheader,
489 recompute_dominator (CDI_DOMINATORS,
490 new_preheader));
493 /* Some targets (eg, C4x) need to initialize special looping
494 registers. */
495 #ifdef HAVE_doloop_begin
497 rtx init;
498 unsigned level = get_loop_level (loop) + 1;
499 init = gen_doloop_begin (counter_reg,
500 desc->const_iter ? desc->niter_expr : const0_rtx,
501 GEN_INT (desc->niter_max),
502 GEN_INT (level));
503 if (init)
505 start_sequence ();
506 emit_insn (init);
507 sequence = get_insns ();
508 end_sequence ();
509 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
512 #endif
514 /* Insert the new low-overhead looping insn. */
515 emit_jump_insn_after (doloop_seq, BB_END (loop_end));
516 jump_insn = BB_END (loop_end);
517 jump_label = block_label (desc->in_edge->dest);
518 JUMP_LABEL (jump_insn) = jump_label;
519 LABEL_NUSES (jump_label)++;
521 /* Ensure the right fallthru edge is marked, for case we have reversed
522 the condition. */
523 desc->in_edge->flags &= ~EDGE_FALLTHRU;
524 desc->out_edge->flags |= EDGE_FALLTHRU;
526 /* Add a REG_NONNEG note if the actual or estimated maximum number
527 of iterations is non-negative. */
528 if (nonneg)
529 add_reg_note (jump_insn, REG_NONNEG, NULL_RTX);
531 /* Update the REG_BR_PROB note. */
532 if (true_prob_val)
534 /* Seems safer to use the branch probability. */
535 add_reg_note (jump_insn, REG_BR_PROB,
536 GEN_INT (desc->in_edge->probability));
540 /* Process loop described by LOOP validating that the loop is suitable for
541 conversion to use a low overhead looping instruction, replacing the jump
542 insn where suitable. Returns true if the loop was successfully
543 modified. */
545 static bool
546 doloop_optimize (struct loop *loop)
548 enum machine_mode mode;
549 rtx doloop_seq, doloop_pat, doloop_reg;
550 rtx iterations, count;
551 rtx iterations_max;
552 rtx start_label;
553 rtx condition;
554 unsigned level, est_niter;
555 int max_cost;
556 struct niter_desc *desc;
557 unsigned word_mode_size;
558 unsigned HOST_WIDE_INT word_mode_max;
559 bool zero_extend_p = false;
561 if (dump_file)
562 fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
564 iv_analysis_loop_init (loop);
566 /* Find the simple exit of a LOOP. */
567 desc = get_simple_loop_desc (loop);
569 /* Check that loop is a candidate for a low-overhead looping insn. */
570 if (!doloop_valid_p (loop, desc))
572 if (dump_file)
573 fprintf (dump_file,
574 "Doloop: The loop is not suitable.\n");
575 return false;
577 mode = desc->mode;
579 est_niter = 3;
580 if (desc->const_iter)
581 est_niter = desc->niter;
582 /* If the estimate on number of iterations is reliable (comes from profile
583 feedback), use it. Do not use it normally, since the expected number
584 of iterations of an unrolled loop is 2. */
585 if (loop->header->count)
586 est_niter = expected_loop_iterations (loop);
588 if (est_niter < 3)
590 if (dump_file)
591 fprintf (dump_file,
592 "Doloop: Too few iterations (%u) to be profitable.\n",
593 est_niter);
594 return false;
597 max_cost
598 = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST));
599 if (rtx_cost (desc->niter_expr, SET, optimize_loop_for_speed_p (loop))
600 > max_cost)
602 if (dump_file)
603 fprintf (dump_file,
604 "Doloop: number of iterations too costly to compute.\n");
605 return false;
608 count = copy_rtx (desc->niter_expr);
609 iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
610 iterations_max = GEN_INT (desc->niter_max);
611 level = get_loop_level (loop) + 1;
613 /* Generate looping insn. If the pattern FAILs then give up trying
614 to modify the loop since there is some aspect the back-end does
615 not like. */
616 start_label = block_label (desc->in_edge->dest);
617 doloop_reg = gen_reg_rtx (mode);
618 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
619 GEN_INT (level), start_label);
621 word_mode_size = GET_MODE_BITSIZE (word_mode);
622 word_mode_max
623 = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
624 if (! doloop_seq
625 && mode != word_mode
626 /* Before trying mode different from the one in that # of iterations is
627 computed, we must be sure that the number of iterations fits into
628 the new mode. */
629 && (word_mode_size >= GET_MODE_BITSIZE (mode)
630 || desc->niter_max <= word_mode_max))
632 if (word_mode_size > GET_MODE_BITSIZE (mode))
634 zero_extend_p = true;
635 iterations = simplify_gen_unary (ZERO_EXTEND, word_mode,
636 iterations, mode);
637 iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode,
638 iterations_max, mode);
640 else
642 count = lowpart_subreg (word_mode, count, mode);
643 iterations = lowpart_subreg (word_mode, iterations, mode);
644 iterations_max = lowpart_subreg (word_mode, iterations_max, mode);
646 PUT_MODE (doloop_reg, word_mode);
647 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
648 GEN_INT (level), start_label);
650 if (! doloop_seq)
652 if (dump_file)
653 fprintf (dump_file,
654 "Doloop: Target unwilling to use doloop pattern!\n");
655 return false;
658 /* If multiple instructions were created, the last must be the
659 jump instruction. Also, a raw define_insn may yield a plain
660 pattern. */
661 doloop_pat = doloop_seq;
662 if (INSN_P (doloop_pat))
664 while (NEXT_INSN (doloop_pat) != NULL_RTX)
665 doloop_pat = NEXT_INSN (doloop_pat);
666 if (!JUMP_P (doloop_pat))
667 doloop_pat = NULL_RTX;
670 if (! doloop_pat
671 || ! (condition = doloop_condition_get (doloop_pat)))
673 if (dump_file)
674 fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
675 return false;
678 doloop_modify (loop, desc, doloop_seq, condition, count,
679 zero_extend_p, mode);
680 return true;
683 /* This is the main entry point. Process all loops using doloop_optimize. */
685 void
686 doloop_optimize_loops (void)
688 loop_iterator li;
689 struct loop *loop;
691 FOR_EACH_LOOP (li, loop, 0)
693 doloop_optimize (loop);
696 iv_analysis_done ();
698 #ifdef ENABLE_CHECKING
699 verify_dominators (CDI_DOMINATORS);
700 verify_loop_structure ();
701 #endif
703 #endif /* HAVE_doloop_end */