1 /* Perform doloop optimizations
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2010
3 Free Software Foundation, Inc.
4 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "hard-reg-set.h"
30 #include "basic-block.h"
31 #include "diagnostic-core.h"
39 /* This module is used to modify loops with a determinable number of
40 iterations to use special low-overhead looping instructions.
42 It first validates whether the loop is well behaved and has a
43 determinable number of iterations (either at compile or run-time).
44 It then modifies the loop to use a low-overhead looping pattern as
47 1. A pseudo register is allocated as the loop iteration counter.
49 2. The number of loop iterations is calculated and is stored
52 3. At the end of the loop, the jump insn is replaced by the
53 doloop_end pattern. The compare must remain because it might be
54 used elsewhere. If the loop-variable or condition register are
55 used elsewhere, they will be eliminated by flow.
57 4. An optional doloop_begin pattern is inserted at the top of the
60 TODO The optimization should only performed when either the biv used for exit
61 condition is unused at all except for the exit test, or if we do not have to
62 change its value, since otherwise we have to add a new induction variable,
63 which usually will not pay up (unless the cost of the doloop pattern is
64 somehow extremely lower than the cost of compare & jump, or unless the bct
65 register cannot be used for anything else but doloop -- ??? detect these
68 #ifdef HAVE_doloop_end
70 /* Return the loop termination condition for PATTERN or zero
71 if it is not a decrement and branch jump insn. */
74 doloop_condition_get (rtx doloop_pat
)
83 /* The canonical doloop pattern we expect has one of the following
86 1) (parallel [(set (pc) (if_then_else (condition)
89 (set (reg) (plus (reg) (const_int -1)))
90 (additional clobbers and uses)])
92 The branch must be the first entry of the parallel (also required
93 by jump.c), and the second entry of the parallel must be a set of
94 the loop counter register. Some targets (IA-64) wrap the set of
95 the loop counter in an if_then_else too.
97 2) (set (reg) (plus (reg) (const_int -1))
98 (set (pc) (if_then_else (reg != 0)
102 pattern
= PATTERN (doloop_pat
);
104 if (GET_CODE (pattern
) != PARALLEL
)
107 rtx prev_insn
= prev_nondebug_insn (doloop_pat
);
109 /* We expect the decrement to immediately precede the branch. */
111 if (prev_insn
== NULL_RTX
|| !INSN_P (prev_insn
))
115 inc
= PATTERN (PREV_INSN (doloop_pat
));
116 /* We expect the condition to be of the form (reg != 0) */
117 cond
= XEXP (SET_SRC (cmp
), 0);
118 if (GET_CODE (cond
) != NE
|| XEXP (cond
, 1) != const0_rtx
)
124 cmp
= XVECEXP (pattern
, 0, 0);
125 inc
= XVECEXP (pattern
, 0, 1);
128 /* Check for (set (reg) (something)). */
129 if (GET_CODE (inc
) != SET
)
131 reg
= SET_DEST (inc
);
135 /* Check if something = (plus (reg) (const_int -1)).
136 On IA-64, this decrement is wrapped in an if_then_else. */
137 inc_src
= SET_SRC (inc
);
138 if (GET_CODE (inc_src
) == IF_THEN_ELSE
)
139 inc_src
= XEXP (inc_src
, 1);
140 if (GET_CODE (inc_src
) != PLUS
141 || XEXP (inc_src
, 0) != reg
142 || XEXP (inc_src
, 1) != constm1_rtx
)
145 /* Check for (set (pc) (if_then_else (condition)
148 if (GET_CODE (cmp
) != SET
149 || SET_DEST (cmp
) != pc_rtx
150 || GET_CODE (SET_SRC (cmp
)) != IF_THEN_ELSE
151 || GET_CODE (XEXP (SET_SRC (cmp
), 1)) != LABEL_REF
152 || XEXP (SET_SRC (cmp
), 2) != pc_rtx
)
155 /* Extract loop termination condition. */
156 condition
= XEXP (SET_SRC (cmp
), 0);
158 /* We expect a GE or NE comparison with 0 or 1. */
159 if ((GET_CODE (condition
) != GE
160 && GET_CODE (condition
) != NE
)
161 || (XEXP (condition
, 1) != const0_rtx
162 && XEXP (condition
, 1) != const1_rtx
))
165 if ((XEXP (condition
, 0) == reg
)
166 || (GET_CODE (XEXP (condition
, 0)) == PLUS
167 && XEXP (XEXP (condition
, 0), 0) == reg
))
169 if (GET_CODE (pattern
) != PARALLEL
)
170 /* The second form we expect:
172 (set (reg) (plus (reg) (const_int -1))
173 (set (pc) (if_then_else (reg != 0)
177 is equivalent to the following:
179 (parallel [(set (pc) (if_then_else (reg != 1)
182 (set (reg) (plus (reg) (const_int -1)))
183 (additional clobbers and uses)])
185 So we return that form instead.
187 condition
= gen_rtx_fmt_ee (NE
, VOIDmode
, inc_src
, const1_rtx
);
192 /* ??? If a machine uses a funny comparison, we could return a
193 canonicalized form here. */
198 /* Return nonzero if the loop specified by LOOP is suitable for
199 the use of special low-overhead looping instructions. DESC
200 describes the number of iterations of the loop. */
203 doloop_valid_p (struct loop
*loop
, struct niter_desc
*desc
)
205 basic_block
*body
= get_loop_body (loop
), bb
;
210 /* Check for loops that may not terminate under special conditions. */
215 /* There are some cases that would require a special attention.
216 For example if the comparison is LEU and the comparison value
217 is UINT_MAX then the loop will not terminate. Similarly, if the
218 comparison code is GEU and the comparison value is 0, the
219 loop will not terminate.
221 If the absolute increment is not 1, the loop can be infinite
222 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
224 ??? We could compute these conditions at run-time and have a
225 additional jump around the loop to ensure an infinite loop.
226 However, it is very unlikely that this is the intended
227 behavior of the loop and checking for these rare boundary
228 conditions would pessimize all other code.
230 If the loop is executed only a few times an extra check to
231 restart the loop could use up most of the benefits of using a
232 count register loop. Note however, that normally, this
233 restart branch would never execute, so it could be predicted
234 well by the CPU. We should generate the pessimistic code by
235 default, and have an option, e.g. -funsafe-loops that would
236 enable count-register loops in this case. */
238 fprintf (dump_file
, "Doloop: Possible infinite iteration case.\n");
243 for (i
= 0; i
< loop
->num_nodes
; i
++)
247 for (insn
= BB_HEAD (bb
);
248 insn
!= NEXT_INSN (BB_END (bb
));
249 insn
= NEXT_INSN (insn
))
251 /* Different targets have different necessities for low-overhead
252 looping. Call the back end for each instruction within the loop
253 to let it decide whether the insn prohibits a low-overhead loop.
254 It will then return the cause for it to emit to the dump file. */
255 const char * invalid
= targetm
.invalid_within_doloop (insn
);
259 fprintf (dump_file
, "Doloop: %s\n", invalid
);
273 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
274 edge. If the condition is always false, do not do anything. If it is always
275 true, redirect E to DEST and return false. In all other cases, true is
279 add_test (rtx cond
, edge
*e
, basic_block dest
)
281 rtx seq
, jump
, label
;
282 enum machine_mode mode
;
283 rtx op0
= XEXP (cond
, 0), op1
= XEXP (cond
, 1);
284 enum rtx_code code
= GET_CODE (cond
);
287 mode
= GET_MODE (XEXP (cond
, 0));
288 if (mode
== VOIDmode
)
289 mode
= GET_MODE (XEXP (cond
, 1));
292 op0
= force_operand (op0
, NULL_RTX
);
293 op1
= force_operand (op1
, NULL_RTX
);
294 label
= block_label (dest
);
295 do_compare_rtx_and_jump (op0
, op1
, code
, 0, mode
, NULL_RTX
,
296 NULL_RTX
, label
, -1);
298 jump
= get_last_insn ();
299 if (!jump
|| !JUMP_P (jump
))
301 /* The condition is always false and the jump was optimized out. */
309 /* There always is at least the jump insn in the sequence. */
310 gcc_assert (seq
!= NULL_RTX
);
312 bb
= split_edge_and_insert (*e
, seq
);
313 *e
= single_succ_edge (bb
);
315 if (any_uncondjump_p (jump
))
317 /* The condition is always true. */
319 redirect_edge_and_branch_force (*e
, dest
);
323 JUMP_LABEL (jump
) = label
;
325 /* The jump is supposed to handle an unlikely special case. */
326 add_reg_note (jump
, REG_BR_PROB
, const0_rtx
);
328 LABEL_NUSES (label
)++;
330 make_edge (bb
, dest
, (*e
)->flags
& ~EDGE_FALLTHRU
);
334 /* Modify the loop to use the low-overhead looping insn where LOOP
335 describes the loop, DESC describes the number of iterations of the
336 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
337 end of the loop. CONDITION is the condition separated from the
338 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP.
339 ZERO_EXTEND_P says to zero extend COUNT after the increment of it to
340 word_mode from FROM_MODE. */
343 doloop_modify (struct loop
*loop
, struct niter_desc
*desc
,
344 rtx doloop_seq
, rtx condition
, rtx count
,
345 bool zero_extend_p
, enum machine_mode from_mode
)
348 rtx tmp
, noloop
= NULL_RTX
;
353 bool increment_count
;
354 basic_block loop_end
= desc
->out_edge
->src
;
355 enum machine_mode mode
;
358 jump_insn
= BB_END (loop_end
);
362 fprintf (dump_file
, "Doloop: Inserting doloop pattern (");
363 if (desc
->const_iter
)
364 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, desc
->niter
);
366 fputs ("runtime", dump_file
);
367 fputs (" iterations).\n", dump_file
);
370 /* Get the probability of the original branch. If it exists we would
371 need to update REG_BR_PROB of the new jump_insn. */
372 true_prob_val
= find_reg_note (jump_insn
, REG_BR_PROB
, NULL_RTX
);
374 /* Discard original jump to continue loop. The original compare
375 result may still be live, so it cannot be discarded explicitly. */
376 delete_insn (jump_insn
);
378 counter_reg
= XEXP (condition
, 0);
379 if (GET_CODE (counter_reg
) == PLUS
)
380 counter_reg
= XEXP (counter_reg
, 0);
381 mode
= GET_MODE (counter_reg
);
383 increment_count
= false;
384 switch (GET_CODE (condition
))
387 /* Currently only NE tests against zero and one are supported. */
388 noloop
= XEXP (condition
, 1);
389 if (noloop
!= const0_rtx
)
391 gcc_assert (noloop
== const1_rtx
);
392 increment_count
= true;
397 /* Currently only GE tests against zero are supported. */
398 gcc_assert (XEXP (condition
, 1) == const0_rtx
);
400 noloop
= constm1_rtx
;
402 /* The iteration count does not need incrementing for a GE test. */
403 increment_count
= false;
405 /* Determine if the iteration counter will be non-negative.
406 Note that the maximum value loaded is iterations_max - 1. */
408 <= ((unsigned HOST_WIDEST_INT
) 1
409 << (GET_MODE_BITSIZE (mode
) - 1)))
413 /* Abort if an invalid doloop pattern has been generated. */
419 count
= simplify_gen_binary (PLUS
, from_mode
, count
, const1_rtx
);
422 count
= simplify_gen_unary (ZERO_EXTEND
, word_mode
,
425 /* Insert initialization of the count register into the loop header. */
427 tmp
= force_operand (count
, counter_reg
);
428 convert_move (counter_reg
, tmp
, 1);
429 sequence
= get_insns ();
431 emit_insn_after (sequence
, BB_END (loop_preheader_edge (loop
)->src
));
433 if (desc
->noloop_assumptions
)
435 rtx ass
= copy_rtx (desc
->noloop_assumptions
);
436 basic_block preheader
= loop_preheader_edge (loop
)->src
;
438 = split_edge (loop_preheader_edge (loop
));
439 basic_block new_preheader
440 = split_edge (loop_preheader_edge (loop
));
443 /* Expand the condition testing the assumptions and if it does not pass,
444 reset the count register to 0. */
445 redirect_edge_and_branch_force (single_succ_edge (preheader
), new_preheader
);
446 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, preheader
);
449 set_zero
->frequency
= 0;
451 te
= single_succ_edge (preheader
);
452 for (; ass
; ass
= XEXP (ass
, 1))
453 if (!add_test (XEXP (ass
, 0), &te
, set_zero
))
458 /* We reached a condition that is always true. This is very hard to
459 reproduce (such a loop does not roll, and thus it would most
460 likely get optimized out by some of the preceding optimizations).
461 In fact, I do not have any testcase for it. However, it would
462 also be very hard to show that it is impossible, so we must
464 set_zero
->count
= preheader
->count
;
465 set_zero
->frequency
= preheader
->frequency
;
468 if (EDGE_COUNT (set_zero
->preds
) == 0)
470 /* All the conditions were simplified to false, remove the
471 unreachable set_zero block. */
472 delete_basic_block (set_zero
);
476 /* Reset the counter to zero in the set_zero block. */
478 convert_move (counter_reg
, noloop
, 0);
479 sequence
= get_insns ();
481 emit_insn_after (sequence
, BB_END (set_zero
));
483 set_immediate_dominator (CDI_DOMINATORS
, set_zero
,
484 recompute_dominator (CDI_DOMINATORS
,
488 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
,
489 recompute_dominator (CDI_DOMINATORS
,
493 /* Some targets (eg, C4x) need to initialize special looping
495 #ifdef HAVE_doloop_begin
498 unsigned level
= get_loop_level (loop
) + 1;
499 init
= gen_doloop_begin (counter_reg
,
500 desc
->const_iter
? desc
->niter_expr
: const0_rtx
,
501 GEN_INT (desc
->niter_max
),
507 sequence
= get_insns ();
509 emit_insn_after (sequence
, BB_END (loop_preheader_edge (loop
)->src
));
514 /* Insert the new low-overhead looping insn. */
515 emit_jump_insn_after (doloop_seq
, BB_END (loop_end
));
516 jump_insn
= BB_END (loop_end
);
517 jump_label
= block_label (desc
->in_edge
->dest
);
518 JUMP_LABEL (jump_insn
) = jump_label
;
519 LABEL_NUSES (jump_label
)++;
521 /* Ensure the right fallthru edge is marked, for case we have reversed
523 desc
->in_edge
->flags
&= ~EDGE_FALLTHRU
;
524 desc
->out_edge
->flags
|= EDGE_FALLTHRU
;
526 /* Add a REG_NONNEG note if the actual or estimated maximum number
527 of iterations is non-negative. */
529 add_reg_note (jump_insn
, REG_NONNEG
, NULL_RTX
);
531 /* Update the REG_BR_PROB note. */
534 /* Seems safer to use the branch probability. */
535 add_reg_note (jump_insn
, REG_BR_PROB
,
536 GEN_INT (desc
->in_edge
->probability
));
540 /* Process loop described by LOOP validating that the loop is suitable for
541 conversion to use a low overhead looping instruction, replacing the jump
542 insn where suitable. Returns true if the loop was successfully
546 doloop_optimize (struct loop
*loop
)
548 enum machine_mode mode
;
549 rtx doloop_seq
, doloop_pat
, doloop_reg
;
550 rtx iterations
, count
;
554 unsigned level
, est_niter
;
556 struct niter_desc
*desc
;
557 unsigned word_mode_size
;
558 unsigned HOST_WIDE_INT word_mode_max
;
559 bool zero_extend_p
= false;
562 fprintf (dump_file
, "Doloop: Processing loop %d.\n", loop
->num
);
564 iv_analysis_loop_init (loop
);
566 /* Find the simple exit of a LOOP. */
567 desc
= get_simple_loop_desc (loop
);
569 /* Check that loop is a candidate for a low-overhead looping insn. */
570 if (!doloop_valid_p (loop
, desc
))
574 "Doloop: The loop is not suitable.\n");
580 if (desc
->const_iter
)
581 est_niter
= desc
->niter
;
582 /* If the estimate on number of iterations is reliable (comes from profile
583 feedback), use it. Do not use it normally, since the expected number
584 of iterations of an unrolled loop is 2. */
585 if (loop
->header
->count
)
586 est_niter
= expected_loop_iterations (loop
);
592 "Doloop: Too few iterations (%u) to be profitable.\n",
598 = COSTS_N_INSNS (PARAM_VALUE (PARAM_MAX_ITERATIONS_COMPUTATION_COST
));
599 if (rtx_cost (desc
->niter_expr
, SET
, optimize_loop_for_speed_p (loop
))
604 "Doloop: number of iterations too costly to compute.\n");
608 count
= copy_rtx (desc
->niter_expr
);
609 iterations
= desc
->const_iter
? desc
->niter_expr
: const0_rtx
;
610 iterations_max
= GEN_INT (desc
->niter_max
);
611 level
= get_loop_level (loop
) + 1;
613 /* Generate looping insn. If the pattern FAILs then give up trying
614 to modify the loop since there is some aspect the back-end does
616 start_label
= block_label (desc
->in_edge
->dest
);
617 doloop_reg
= gen_reg_rtx (mode
);
618 doloop_seq
= gen_doloop_end (doloop_reg
, iterations
, iterations_max
,
619 GEN_INT (level
), start_label
);
621 word_mode_size
= GET_MODE_BITSIZE (word_mode
);
623 = ((unsigned HOST_WIDE_INT
) 1 << (word_mode_size
- 1) << 1) - 1;
626 /* Before trying mode different from the one in that # of iterations is
627 computed, we must be sure that the number of iterations fits into
629 && (word_mode_size
>= GET_MODE_BITSIZE (mode
)
630 || desc
->niter_max
<= word_mode_max
))
632 if (word_mode_size
> GET_MODE_BITSIZE (mode
))
634 zero_extend_p
= true;
635 iterations
= simplify_gen_unary (ZERO_EXTEND
, word_mode
,
637 iterations_max
= simplify_gen_unary (ZERO_EXTEND
, word_mode
,
638 iterations_max
, mode
);
642 count
= lowpart_subreg (word_mode
, count
, mode
);
643 iterations
= lowpart_subreg (word_mode
, iterations
, mode
);
644 iterations_max
= lowpart_subreg (word_mode
, iterations_max
, mode
);
646 PUT_MODE (doloop_reg
, word_mode
);
647 doloop_seq
= gen_doloop_end (doloop_reg
, iterations
, iterations_max
,
648 GEN_INT (level
), start_label
);
654 "Doloop: Target unwilling to use doloop pattern!\n");
658 /* If multiple instructions were created, the last must be the
659 jump instruction. Also, a raw define_insn may yield a plain
661 doloop_pat
= doloop_seq
;
662 if (INSN_P (doloop_pat
))
664 while (NEXT_INSN (doloop_pat
) != NULL_RTX
)
665 doloop_pat
= NEXT_INSN (doloop_pat
);
666 if (!JUMP_P (doloop_pat
))
667 doloop_pat
= NULL_RTX
;
671 || ! (condition
= doloop_condition_get (doloop_pat
)))
674 fprintf (dump_file
, "Doloop: Unrecognizable doloop pattern!\n");
678 doloop_modify (loop
, desc
, doloop_seq
, condition
, count
,
679 zero_extend_p
, mode
);
683 /* This is the main entry point. Process all loops using doloop_optimize. */
686 doloop_optimize_loops (void)
691 FOR_EACH_LOOP (li
, loop
, 0)
693 doloop_optimize (loop
);
698 #ifdef ENABLE_CHECKING
699 verify_dominators (CDI_DOMINATORS
);
700 verify_loop_structure ();
703 #endif /* HAVE_doloop_end */