1 /* Perform doloop optimizations
2 Copyright (C) 2004 Free Software Foundation, Inc.
3 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
29 #include "hard-reg-set.h"
30 #include "basic-block.h"
37 /* This module is used to modify loops with a determinable number of
38 iterations to use special low-overhead looping instructions.
40 It first validates whether the loop is well behaved and has a
41 determinable number of iterations (either at compile or run-time).
42 It then modifies the loop to use a low-overhead looping pattern as
45 1. A pseudo register is allocated as the loop iteration counter.
47 2. The number of loop iterations is calculated and is stored
50 3. At the end of the loop, the jump insn is replaced by the
51 doloop_end pattern. The compare must remain because it might be
52 used elsewhere. If the loop-variable or condition register are
53 used elsewhere, they will be eliminated by flow.
55 4. An optional doloop_begin pattern is inserted at the top of the
58 TODO The optimization should only performed when either the biv used for exit
59 condition is unused at all except for the exit test, or if we do not have to
60 change its value, since otherwise we have to add a new induction variable,
61 which usually will not pay up (unless the cost of the doloop pattern is
62 somehow extremely lower than the cost of compare & jump, or unless the bct
63 register cannot be used for anything else but doloop -- ??? detect these
66 #ifdef HAVE_doloop_end
68 /* Return the loop termination condition for PATTERN or zero
69 if it is not a decrement and branch jump insn. */
72 doloop_condition_get (rtx pattern
)
79 /* The canonical doloop pattern we expect is:
81 (parallel [(set (pc) (if_then_else (condition)
84 (set (reg) (plus (reg) (const_int -1)))
85 (additional clobbers and uses)])
87 Some machines (IA-64) make the decrement conditional on
88 the condition as well, so we don't bother verifying the
89 actual decrement. In summary, the branch must be the
90 first entry of the parallel (also required by jump.c),
91 and the second entry of the parallel must be a set of
92 the loop counter register. */
94 if (GET_CODE (pattern
) != PARALLEL
)
97 cmp
= XVECEXP (pattern
, 0, 0);
98 inc
= XVECEXP (pattern
, 0, 1);
100 /* Check for (set (reg) (something)). */
101 if (GET_CODE (inc
) != SET
|| ! REG_P (SET_DEST (inc
)))
104 /* Extract loop counter register. */
105 reg
= SET_DEST (inc
);
107 /* Check for (set (pc) (if_then_else (condition)
110 if (GET_CODE (cmp
) != SET
111 || SET_DEST (cmp
) != pc_rtx
112 || GET_CODE (SET_SRC (cmp
)) != IF_THEN_ELSE
113 || GET_CODE (XEXP (SET_SRC (cmp
), 1)) != LABEL_REF
114 || XEXP (SET_SRC (cmp
), 2) != pc_rtx
)
117 /* Extract loop termination condition. */
118 condition
= XEXP (SET_SRC (cmp
), 0);
120 if ((GET_CODE (condition
) != GE
&& GET_CODE (condition
) != NE
)
121 || GET_CODE (XEXP (condition
, 1)) != CONST_INT
)
124 if (XEXP (condition
, 0) == reg
)
127 if (GET_CODE (XEXP (condition
, 0)) == PLUS
128 && XEXP (XEXP (condition
, 0), 0) == reg
)
131 /* ??? If a machine uses a funny comparison, we could return a
132 canonicalised form here. */
137 /* Return nonzero if the loop specified by LOOP is suitable for
138 the use of special low-overhead looping instructions. DESC
139 describes the number of iterations of the loop. */
142 doloop_valid_p (struct loop
*loop
, struct niter_desc
*desc
)
144 basic_block
*body
= get_loop_body (loop
), bb
;
148 /* Check for loops that may not terminate under special conditions. */
153 /* There are some cases that would require a special attention.
154 For example if the comparison is LEU and the comparison value
155 is UINT_MAX then the loop will not terminate. Similarly, if the
156 comparison code is GEU and the comparison value is 0, the
157 loop will not terminate.
159 If the absolute increment is not 1, the loop can be infinite
160 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
162 ??? We could compute these conditions at run-time and have a
163 additional jump around the loop to ensure an infinite loop.
164 However, it is very unlikely that this is the intended
165 behavior of the loop and checking for these rare boundary
166 conditions would pessimize all other code.
168 If the loop is executed only a few times an extra check to
169 restart the loop could use up most of the benefits of using a
170 count register loop. Note however, that normally, this
171 restart branch would never execute, so it could be predicted
172 well by the CPU. We should generate the pessimistic code by
173 default, and have an option, e.g. -funsafe-loops that would
174 enable count-register loops in this case. */
176 fprintf (dump_file
, "Doloop: Possible infinite iteration case.\n");
180 for (i
= 0; i
< loop
->num_nodes
; i
++)
184 for (insn
= BB_HEAD (bb
);
185 insn
!= NEXT_INSN (BB_END (bb
));
186 insn
= NEXT_INSN (insn
))
188 /* A called function may clobber any special registers required for
189 low-overhead looping. */
190 if (GET_CODE (insn
) == CALL_INSN
)
193 fprintf (dump_file
, "Doloop: Function call in loop.\n");
197 /* Some targets (eg, PPC) use the count register for branch on table
198 instructions. ??? This should be a target specific check. */
199 if (GET_CODE (insn
) == JUMP_INSN
200 && (GET_CODE (PATTERN (insn
)) == ADDR_DIFF_VEC
201 || GET_CODE (PATTERN (insn
)) == ADDR_VEC
))
204 fprintf (dump_file
, "Doloop: Computed branch in the loop.\n");
214 /* Adds test of COND jumping to DEST to the end of BB. */
217 add_test (rtx cond
, basic_block bb
, basic_block dest
)
219 rtx seq
, jump
, label
;
220 enum machine_mode mode
;
221 rtx op0
= XEXP (cond
, 0), op1
= XEXP (cond
, 1);
222 enum rtx_code code
= GET_CODE (cond
);
224 mode
= GET_MODE (XEXP (cond
, 0));
225 if (mode
== VOIDmode
)
226 mode
= GET_MODE (XEXP (cond
, 1));
229 op0
= force_operand (op0
, NULL_RTX
);
230 op1
= force_operand (op1
, NULL_RTX
);
231 label
= block_label (dest
);
232 do_compare_rtx_and_jump (op0
, op1
, code
, 0, mode
, NULL_RTX
, NULL_RTX
, label
);
234 jump
= get_last_insn ();
235 JUMP_LABEL (jump
) = label
;
237 /* The jump is supposed to handle an unlikely special case. */
239 = gen_rtx_EXPR_LIST (REG_BR_PROB
,
240 GEN_INT (0), REG_NOTES (jump
));
242 LABEL_NUSES (label
)++;
246 emit_insn_after (seq
, BB_END (bb
));
249 /* Modify the loop to use the low-overhead looping insn where LOOP
250 describes the loop, DESC describes the number of iterations of the
251 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
252 end of the loop. CONDITION is the condition separated from the
256 doloop_modify (struct loop
*loop
, struct niter_desc
*desc
,
257 rtx doloop_seq
, rtx condition
)
260 rtx count
, tmp
, noloop
= NULL_RTX
;
265 bool increment_count
;
266 basic_block loop_end
= desc
->out_edge
->src
;
268 jump_insn
= BB_END (loop_end
);
272 fprintf (dump_file
, "Doloop: Inserting doloop pattern (");
273 if (desc
->const_iter
)
274 fprintf (dump_file
, HOST_WIDEST_INT_PRINT_DEC
, desc
->niter
);
276 fputs ("runtime", dump_file
);
277 fputs (" iterations).\n", dump_file
);
280 /* Discard original jump to continue loop. The original compare
281 result may still be live, so it cannot be discarded explicitly. */
282 delete_insn (jump_insn
);
284 counter_reg
= XEXP (condition
, 0);
285 if (GET_CODE (counter_reg
) == PLUS
)
286 counter_reg
= XEXP (counter_reg
, 0);
288 count
= desc
->niter_expr
;
289 increment_count
= false;
290 switch (GET_CODE (condition
))
293 /* Currently only NE tests against zero and one are supported. */
294 if (XEXP (condition
, 1) == const1_rtx
)
296 increment_count
= true;
299 else if (XEXP (condition
, 1) == const0_rtx
)
306 /* Currently only GE tests against zero are supported. */
307 if (XEXP (condition
, 1) != const0_rtx
)
310 noloop
= constm1_rtx
;
312 /* The iteration count does not need incrementing for a GE test. */
313 increment_count
= false;
315 /* Determine if the iteration counter will be non-negative.
316 Note that the maximum value loaded is iterations_max - 1. */
318 <= ((unsigned HOST_WIDEST_INT
) 1
319 << (GET_MODE_BITSIZE (GET_MODE (counter_reg
)) - 1)))
323 /* Abort if an invalid doloop pattern has been generated. */
329 count
= simplify_gen_binary (PLUS
, desc
->mode
, count
, const1_rtx
);
331 /* Insert initialization of the count register into the loop header. */
333 tmp
= force_operand (count
, counter_reg
);
334 convert_move (counter_reg
, tmp
, 1);
335 sequence
= get_insns ();
337 emit_insn_after (sequence
, BB_END (loop_preheader_edge (loop
)->src
));
339 if (desc
->noloop_assumptions
)
341 rtx ass
= desc
->noloop_assumptions
;
342 basic_block preheader
= loop_preheader_edge (loop
)->src
;
344 = loop_split_edge_with (loop_preheader_edge (loop
), NULL_RTX
);
345 basic_block new_preheader
346 = loop_split_edge_with (loop_preheader_edge (loop
), NULL_RTX
);
351 /* Expand the condition testing the assumptions and if it does not pass,
352 reset the count register to 0. */
353 add_test (XEXP (ass
, 0), preheader
, set_zero
);
354 preheader
->succ
->flags
&= ~EDGE_FALLTHRU
;
355 cnt
= preheader
->succ
->count
;
356 preheader
->succ
->probability
= 0;
357 preheader
->succ
->count
= 0;
358 irr
= preheader
->succ
->flags
& EDGE_IRREDUCIBLE_LOOP
;
359 te
= make_edge (preheader
, new_preheader
, EDGE_FALLTHRU
| irr
);
360 te
->probability
= REG_BR_PROB_BASE
;
362 set_immediate_dominator (CDI_DOMINATORS
, new_preheader
, preheader
);
365 set_zero
->frequency
= 0;
367 for (ass
= XEXP (ass
, 1); ass
; ass
= XEXP (ass
, 1))
369 bb
= loop_split_edge_with (te
, NULL_RTX
);
371 add_test (XEXP (ass
, 0), bb
, set_zero
);
372 make_edge (bb
, set_zero
, irr
);
376 convert_move (counter_reg
, noloop
, 0);
377 sequence
= get_insns ();
379 emit_insn_after (sequence
, BB_END (set_zero
));
382 /* Some targets (eg, C4x) need to initialize special looping
384 #ifdef HAVE_doloop_begin
387 unsigned level
= get_loop_level (loop
) + 1;
388 init
= gen_doloop_begin (counter_reg
,
389 desc
->const_iter
? desc
->niter_expr
: const0_rtx
,
396 sequence
= get_insns ();
398 emit_insn_after (sequence
, BB_END (loop_preheader_edge (loop
)->src
));
403 /* Insert the new low-overhead looping insn. */
404 emit_jump_insn_after (doloop_seq
, BB_END (loop_end
));
405 jump_insn
= BB_END (loop_end
);
406 jump_label
= block_label (desc
->in_edge
->dest
);
407 JUMP_LABEL (jump_insn
) = jump_label
;
408 LABEL_NUSES (jump_label
)++;
410 /* Ensure the right fallthru edge is marked, for case we have reversed
412 desc
->in_edge
->flags
&= ~EDGE_FALLTHRU
;
413 desc
->out_edge
->flags
|= EDGE_FALLTHRU
;
415 /* Add a REG_NONNEG note if the actual or estimated maximum number
416 of iterations is non-negative. */
419 REG_NOTES (jump_insn
)
420 = gen_rtx_EXPR_LIST (REG_NONNEG
, NULL_RTX
, REG_NOTES (jump_insn
));
424 /* Process loop described by LOOP validating that the loop is suitable for
425 conversion to use a low overhead looping instruction, replacing the jump
426 insn where suitable. Returns true if the loop was successfully
430 doloop_optimize (struct loop
*loop
)
432 enum machine_mode mode
;
433 rtx doloop_seq
, doloop_pat
, doloop_reg
;
438 unsigned level
, est_niter
;
439 struct niter_desc
*desc
;
442 fprintf (dump_file
, "Doloop: Processing loop %d.\n", loop
->num
);
444 iv_analysis_loop_init (loop
);
446 /* Find the simple exit of a LOOP. */
447 desc
= get_simple_loop_desc (loop
);
449 /* Check that loop is a candidate for a low-overhead looping insn. */
450 if (!doloop_valid_p (loop
, desc
))
454 "Doloop: The loop is not suitable.\n");
460 if (desc
->const_iter
)
461 est_niter
= desc
->niter
;
462 /* If the estimate on number of iterations is reliable (comes from profile
463 feedback), use it. Do not use it normally, since the expected number
464 of iterations of an unrolled loop is 2. */
465 if (loop
->header
->count
)
466 est_niter
= expected_loop_iterations (loop
);
472 "Doloop: Too few iterations (%u) to be profitable.\n",
477 iterations
= desc
->const_iter
? desc
->niter_expr
: const0_rtx
;
478 iterations_max
= GEN_INT (desc
->niter_max
);
479 level
= get_loop_level (loop
) + 1;
481 /* Generate looping insn. If the pattern FAILs then give up trying
482 to modify the loop since there is some aspect the back-end does
484 start_label
= block_label (desc
->in_edge
->dest
);
485 doloop_reg
= gen_reg_rtx (mode
);
486 doloop_seq
= gen_doloop_end (doloop_reg
, iterations
, iterations_max
,
487 GEN_INT (level
), start_label
);
488 if (! doloop_seq
&& mode
!= word_mode
)
490 PUT_MODE (doloop_reg
, word_mode
);
491 doloop_seq
= gen_doloop_end (doloop_reg
, iterations
, iterations_max
,
492 GEN_INT (level
), start_label
);
498 "Doloop: Target unwilling to use doloop pattern!\n");
502 /* If multiple instructions were created, the last must be the
503 jump instruction. Also, a raw define_insn may yield a plain
505 doloop_pat
= doloop_seq
;
506 if (INSN_P (doloop_pat
))
508 while (NEXT_INSN (doloop_pat
) != NULL_RTX
)
509 doloop_pat
= NEXT_INSN (doloop_pat
);
510 if (GET_CODE (doloop_pat
) == JUMP_INSN
)
511 doloop_pat
= PATTERN (doloop_pat
);
513 doloop_pat
= NULL_RTX
;
517 || ! (condition
= doloop_condition_get (doloop_pat
)))
520 fprintf (dump_file
, "Doloop: Unrecognizable doloop pattern!\n");
524 doloop_modify (loop
, desc
, doloop_seq
, condition
);
528 /* This is the main entry point. Process all LOOPS using doloop_optimize. */
531 doloop_optimize_loops (struct loops
*loops
)
536 for (i
= 1; i
< loops
->num
; i
++)
538 loop
= loops
->parray
[i
];
542 doloop_optimize (loop
);
547 #ifdef ENABLE_CHECKING
548 verify_dominators (CDI_DOMINATORS
);
549 verify_loop_structure (loops
);
552 #endif /* HAVE_doloop_end */