1 /* Perform doloop optimizations
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
3 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
29 #include "hard-reg-set.h"
30 #include "basic-block.h"
38 /* This module is used to modify loops with a determinable number of
39 iterations to use special low-overhead looping instructions.
41 It first validates whether the loop is well behaved and has a
42 determinable number of iterations (either at compile or run-time).
43 It then modifies the loop to use a low-overhead looping pattern as
46 1. A pseudo register is allocated as the loop iteration counter.
48 2. The number of loop iterations is calculated and is stored
51 3. At the end of the loop, the jump insn is replaced by the
52 doloop_end pattern. The compare must remain because it might be
53 used elsewhere. If the loop-variable or condition register are
54 used elsewhere, they will be eliminated by flow.
56 4. An optional doloop_begin pattern is inserted at the top of the
59 TODO The optimization should only performed when either the biv used for exit
60 condition is unused at all except for the exit test, or if we do not have to
61 change its value, since otherwise we have to add a new induction variable,
62 which usually will not pay up (unless the cost of the doloop pattern is
63 somehow extremely lower than the cost of compare & jump, or unless the bct
64 register cannot be used for anything else but doloop -- ??? detect these
67 #ifdef HAVE_doloop_end
69 /* Return the loop termination condition for PATTERN or zero
70 if it is not a decrement and branch jump insn. */
73 doloop_condition_get (rtx pattern
)
80 /* The canonical doloop pattern we expect is:
82 (parallel [(set (pc) (if_then_else (condition)
85 (set (reg) (plus (reg) (const_int -1)))
86 (additional clobbers and uses)])
88 Some machines (IA-64) make the decrement conditional on
89 the condition as well, so we don't bother verifying the
90 actual decrement. In summary, the branch must be the
91 first entry of the parallel (also required by jump.c),
92 and the second entry of the parallel must be a set of
93 the loop counter register. */
95 if (GET_CODE (pattern
) != PARALLEL
)
98 cmp
= XVECEXP (pattern
, 0, 0);
99 inc
= XVECEXP (pattern
, 0, 1);
101 /* Check for (set (reg) (something)). */
102 if (GET_CODE (inc
) != SET
|| ! REG_P (SET_DEST (inc
)))
105 /* Extract loop counter register. */
106 reg
= SET_DEST (inc
);
108 /* Check for (set (pc) (if_then_else (condition)
111 if (GET_CODE (cmp
) != SET
112 || SET_DEST (cmp
) != pc_rtx
113 || GET_CODE (SET_SRC (cmp
)) != IF_THEN_ELSE
114 || GET_CODE (XEXP (SET_SRC (cmp
), 1)) != LABEL_REF
115 || XEXP (SET_SRC (cmp
), 2) != pc_rtx
)
118 /* Extract loop termination condition. */
119 condition
= XEXP (SET_SRC (cmp
), 0);
121 if ((GET_CODE (condition
) != GE
&& GET_CODE (condition
) != NE
)
122 || GET_CODE (XEXP (condition
, 1)) != CONST_INT
)
125 if (XEXP (condition
, 0) == reg
)
128 if (GET_CODE (XEXP (condition
, 0)) == PLUS
129 && XEXP (XEXP (condition
, 0), 0) == reg
)
132 /* ??? If a machine uses a funny comparison, we could return a
133 canonicalized form here. */
138 /* Return nonzero if the loop specified by LOOP is suitable for
139 the use of special low-overhead looping instructions. DESC
140 describes the number of iterations of the loop. */
143 doloop_valid_p (struct loop
*loop
, struct niter_desc
*desc
)
145 basic_block
*body
= get_loop_body (loop
), bb
;
150 /* Check for loops that may not terminate under special conditions. */
155 /* There are some cases that would require a special attention.
156 For example if the comparison is LEU and the comparison value
157 is UINT_MAX then the loop will not terminate. Similarly, if the
158 comparison code is GEU and the comparison value is 0, the
159 loop will not terminate.
161 If the absolute increment is not 1, the loop can be infinite
162 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
164 ??? We could compute these conditions at run-time and have a
165 additional jump around the loop to ensure an infinite loop.
166 However, it is very unlikely that this is the intended
167 behavior of the loop and checking for these rare boundary
168 conditions would pessimize all other code.
170 If the loop is executed only a few times an extra check to
171 restart the loop could use up most of the benefits of using a
172 count register loop. Note however, that normally, this
173 restart branch would never execute, so it could be predicted
174 well by the CPU. We should generate the pessimistic code by
175 default, and have an option, e.g. -funsafe-loops that would
176 enable count-register loops in this case. */
178 fprintf (dump_file
, "Doloop: Possible infinite iteration case.\n");
183 for (i
= 0; i
< loop
->num_nodes
; i
++)
187 for (insn
= BB_HEAD (bb
);
188 insn
!= NEXT_INSN (BB_END (bb
));
189 insn
= NEXT_INSN (insn
))
191 /* Different targets have different necessities for low-overhead
192 looping. Call the back end for each instruction within the loop
193 to let it decide whether the insn is valid. */
194 if (!targetm
.insn_valid_within_doloop (insn
))
209 /* Adds test of COND jumping to DEST to the end of BB. */
212 add_test (rtx cond
, basic_block bb
, basic_block dest
)
214 rtx seq
, jump
, label
;
215 enum machine_mode mode
;
216 rtx op0
= XEXP (cond
, 0), op1
= XEXP (cond
, 1);
217 enum rtx_code code
= GET_CODE (cond
);
219 mode
= GET_MODE (XEXP (cond
, 0));
220 if (mode
== VOIDmode
)
221 mode
= GET_MODE (XEXP (cond
, 1));
224 op0
= force_operand (op0
, NULL_RTX
);
225 op1
= force_operand (op1
, NULL_RTX
);
226 label
= block_label (dest
);
227 do_compare_rtx_and_jump (op0
, op1
, code
, 0, mode
, NULL_RTX
, NULL_RTX
, label
);
229 jump
= get_last_insn ();
230 JUMP_LABEL (jump
) = label
;
232 /* The jump is supposed to handle an unlikely special case. */
234 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
235 const0_rtx
, REG_NOTES (jump
));
237 LABEL_NUSES (label
)++;
241 emit_insn_after (seq
, BB_END (bb
));
244 /* Modify the loop to use the low-overhead looping insn where LOOP
245 describes the loop, DESC describes the number of iterations of the
246 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
247 end of the loop. CONDITION is the condition separated from the
248 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */
251 doloop_modify (struct loop
*loop
, struct niter_desc
*desc
,
252 rtx doloop_seq
, rtx condition
, rtx count
)
255 rtx tmp
, noloop
= NULL_RTX
;
260 bool increment_count
;
261 basic_block loop_end
= desc
->out_edge
->src
;
262 enum machine_mode mode
;
264 jump_insn
= BB_END (loop_end
);
268 fprintf (dump_file
, "Doloop: Inserting doloop pattern (");
269 if (desc
->const_iter
)
270 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, desc
->niter
);
272 fputs ("runtime", dump_file
);
273 fputs (" iterations).\n", dump_file
);
276 /* Discard original jump to continue loop. The original compare
277 result may still be live, so it cannot be discarded explicitly. */
278 delete_insn (jump_insn
);
280 counter_reg
= XEXP (condition
, 0);
281 if (GET_CODE (counter_reg
) == PLUS
)
282 counter_reg
= XEXP (counter_reg
, 0);
283 mode
= GET_MODE (counter_reg
);
285 increment_count
= false;
286 switch (GET_CODE (condition
))
289 /* Currently only NE tests against zero and one are supported. */
290 noloop
= XEXP (condition
, 1);
291 if (noloop
!= const0_rtx
)
293 gcc_assert (noloop
== const1_rtx
);
294 increment_count
= true;
299 /* Currently only GE tests against zero are supported. */
300 gcc_assert (XEXP (condition
, 1) == const0_rtx
);
302 noloop
= constm1_rtx
;
304 /* The iteration count does not need incrementing for a GE test. */
305 increment_count
= false;
307 /* Determine if the iteration counter will be non-negative.
308 Note that the maximum value loaded is iterations_max - 1. */
310 <= ((unsigned HOST_WIDEST_INT
) 1
311 << (GET_MODE_BITSIZE (mode
) - 1)))
315 /* Abort if an invalid doloop pattern has been generated. */
321 count
= simplify_gen_binary (PLUS
, mode
, count
, const1_rtx
);
323 /* Insert initialization of the count register into the loop header. */
325 tmp
= force_operand (count
, counter_reg
);
326 convert_move (counter_reg
, tmp
, 1);
327 sequence
= get_insns ();
329 emit_insn_after (sequence
, BB_END (loop_preheader_edge (loop
)->src
));
331 if (desc
->noloop_assumptions
)
333 rtx ass
= copy_rtx (desc
->noloop_assumptions
);
334 basic_block preheader
= loop_preheader_edge (loop
)->src
;
336 = loop_split_edge_with (loop_preheader_edge (loop
), NULL_RTX
);
337 basic_block new_preheader
338 = loop_split_edge_with (loop_preheader_edge (loop
), NULL_RTX
);
343 /* Expand the condition testing the assumptions and if it does not pass,
344 reset the count register to 0. */
345 add_test (XEXP (ass
, 0), preheader
, set_zero
);
346 single_succ_edge (preheader
)->flags
&= ~EDGE_FALLTHRU
;
347 cnt
= single_succ_edge (preheader
)->count
;
348 single_succ_edge (preheader
)->probability
= 0;
349 single_succ_edge (preheader
)->count
= 0;
350 irr
= single_succ_edge (preheader
)->flags
& EDGE_IRREDUCIBLE_LOOP
;
351 te
= make_edge (preheader
, new_preheader
, EDGE_FALLTHRU
| irr
);
352 te
->probability
= REG_BR_PROB_BASE
;
354 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, preheader
);
357 set_zero
->frequency
= 0;
359 for (ass
= XEXP (ass
, 1); ass
; ass
= XEXP (ass
, 1))
361 bb
= loop_split_edge_with (te
, NULL_RTX
);
362 te
= single_succ_edge (bb
);
363 add_test (XEXP (ass
, 0), bb
, set_zero
);
364 make_edge (bb
, set_zero
, irr
);
368 convert_move (counter_reg
, noloop
, 0);
369 sequence
= get_insns ();
371 emit_insn_after (sequence
, BB_END (set_zero
));
374 /* Some targets (eg, C4x) need to initialize special looping
376 #ifdef HAVE_doloop_begin
379 unsigned level
= get_loop_level (loop
) + 1;
380 init
= gen_doloop_begin (counter_reg
,
381 desc
->const_iter
? desc
->niter_expr
: const0_rtx
,
388 sequence
= get_insns ();
390 emit_insn_after (sequence
, BB_END (loop_preheader_edge (loop
)->src
));
395 /* Insert the new low-overhead looping insn. */
396 emit_jump_insn_after (doloop_seq
, BB_END (loop_end
));
397 jump_insn
= BB_END (loop_end
);
398 jump_label
= block_label (desc
->in_edge
->dest
);
399 JUMP_LABEL (jump_insn
) = jump_label
;
400 LABEL_NUSES (jump_label
)++;
402 /* Ensure the right fallthru edge is marked, for case we have reversed
404 desc
->in_edge
->flags
&= ~EDGE_FALLTHRU
;
405 desc
->out_edge
->flags
|= EDGE_FALLTHRU
;
407 /* Add a REG_NONNEG note if the actual or estimated maximum number
408 of iterations is non-negative. */
411 REG_NOTES (jump_insn
)
412 = gen_rtx_EXPR_LIST (REG_NONNEG
, NULL_RTX
, REG_NOTES (jump_insn
));
416 /* Process loop described by LOOP validating that the loop is suitable for
417 conversion to use a low overhead looping instruction, replacing the jump
418 insn where suitable. Returns true if the loop was successfully
422 doloop_optimize (struct loop
*loop
)
424 enum machine_mode mode
;
425 rtx doloop_seq
, doloop_pat
, doloop_reg
;
426 rtx iterations
, count
;
430 unsigned level
, est_niter
;
431 struct niter_desc
*desc
;
432 unsigned word_mode_size
;
433 unsigned HOST_WIDE_INT word_mode_max
;
436 fprintf (dump_file
, "Doloop: Processing loop %d.\n", loop
->num
);
438 iv_analysis_loop_init (loop
);
440 /* Find the simple exit of a LOOP. */
441 desc
= get_simple_loop_desc (loop
);
443 /* Check that loop is a candidate for a low-overhead looping insn. */
444 if (!doloop_valid_p (loop
, desc
))
448 "Doloop: The loop is not suitable.\n");
454 if (desc
->const_iter
)
455 est_niter
= desc
->niter
;
456 /* If the estimate on number of iterations is reliable (comes from profile
457 feedback), use it. Do not use it normally, since the expected number
458 of iterations of an unrolled loop is 2. */
459 if (loop
->header
->count
)
460 est_niter
= expected_loop_iterations (loop
);
466 "Doloop: Too few iterations (%u) to be profitable.\n",
471 count
= copy_rtx (desc
->niter_expr
);
472 iterations
= desc
->const_iter
? desc
->niter_expr
: const0_rtx
;
473 iterations_max
= GEN_INT (desc
->niter_max
);
474 level
= get_loop_level (loop
) + 1;
476 /* Generate looping insn. If the pattern FAILs then give up trying
477 to modify the loop since there is some aspect the back-end does
479 start_label
= block_label (desc
->in_edge
->dest
);
480 doloop_reg
= gen_reg_rtx (mode
);
481 doloop_seq
= gen_doloop_end (doloop_reg
, iterations
, iterations_max
,
482 GEN_INT (level
), start_label
);
484 word_mode_size
= GET_MODE_BITSIZE (word_mode
);
486 = ((unsigned HOST_WIDE_INT
) 1 << (word_mode_size
- 1) << 1) - 1;
489 /* Before trying mode different from the one in that # of iterations is
490 computed, we must be sure that the number of iterations fits into
492 && (word_mode_size
>= GET_MODE_BITSIZE (mode
)
493 || desc
->niter_max
<= word_mode_max
))
495 if (word_mode_size
> GET_MODE_BITSIZE (mode
))
497 count
= simplify_gen_unary (ZERO_EXTEND
, word_mode
,
499 iterations
= simplify_gen_unary (ZERO_EXTEND
, word_mode
,
501 iterations_max
= simplify_gen_unary (ZERO_EXTEND
, word_mode
,
502 iterations_max
, mode
);
506 count
= lowpart_subreg (word_mode
, count
, mode
);
507 iterations
= lowpart_subreg (word_mode
, iterations
, mode
);
508 iterations_max
= lowpart_subreg (word_mode
, iterations_max
, mode
);
510 PUT_MODE (doloop_reg
, word_mode
);
511 doloop_seq
= gen_doloop_end (doloop_reg
, iterations
, iterations_max
,
512 GEN_INT (level
), start_label
);
518 "Doloop: Target unwilling to use doloop pattern!\n");
522 /* If multiple instructions were created, the last must be the
523 jump instruction. Also, a raw define_insn may yield a plain
525 doloop_pat
= doloop_seq
;
526 if (INSN_P (doloop_pat
))
528 while (NEXT_INSN (doloop_pat
) != NULL_RTX
)
529 doloop_pat
= NEXT_INSN (doloop_pat
);
530 if (JUMP_P (doloop_pat
))
531 doloop_pat
= PATTERN (doloop_pat
);
533 doloop_pat
= NULL_RTX
;
537 || ! (condition
= doloop_condition_get (doloop_pat
)))
540 fprintf (dump_file
, "Doloop: Unrecognizable doloop pattern!\n");
544 doloop_modify (loop
, desc
, doloop_seq
, condition
, count
);
548 /* This is the main entry point. Process all LOOPS using doloop_optimize. */
551 doloop_optimize_loops (struct loops
*loops
)
556 for (i
= 1; i
< loops
->num
; i
++)
558 loop
= loops
->parray
[i
];
562 doloop_optimize (loop
);
567 #ifdef ENABLE_CHECKING
568 verify_dominators (CDI_DOMINATORS
);
569 verify_loop_structure (loops
);
572 #endif /* HAVE_doloop_end */