* opt-functions.awk (var_type): New function.
[official-gcc.git] / gcc / loop-doloop.c
blob4a2bb8774ee278baadc9c85d2d06e22c110370a5
1 /* Perform doloop optimizations
2 Copyright (C) 2004, 2005 Free Software Foundation, Inc.
3 Based on code by Michael P. Hayes (m.hayes@elec.canterbury.ac.nz)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "flags.h"
28 #include "expr.h"
29 #include "hard-reg-set.h"
30 #include "basic-block.h"
31 #include "toplev.h"
32 #include "tm_p.h"
33 #include "cfgloop.h"
34 #include "output.h"
35 #include "params.h"
36 #include "target.h"
38 /* This module is used to modify loops with a determinable number of
39 iterations to use special low-overhead looping instructions.
41 It first validates whether the loop is well behaved and has a
42 determinable number of iterations (either at compile or run-time).
43 It then modifies the loop to use a low-overhead looping pattern as
44 follows:
46 1. A pseudo register is allocated as the loop iteration counter.
48 2. The number of loop iterations is calculated and is stored
49 in the loop counter.
51 3. At the end of the loop, the jump insn is replaced by the
52 doloop_end pattern. The compare must remain because it might be
53 used elsewhere. If the loop-variable or condition register are
54 used elsewhere, they will be eliminated by flow.
56 4. An optional doloop_begin pattern is inserted at the top of the
57 loop.
59 TODO The optimization should only performed when either the biv used for exit
60 condition is unused at all except for the exit test, or if we do not have to
61 change its value, since otherwise we have to add a new induction variable,
62 which usually will not pay up (unless the cost of the doloop pattern is
63 somehow extremely lower than the cost of compare & jump, or unless the bct
64 register cannot be used for anything else but doloop -- ??? detect these
65 cases). */
67 #ifdef HAVE_doloop_end
69 /* Return the loop termination condition for PATTERN or zero
70 if it is not a decrement and branch jump insn. */
72 static rtx
73 doloop_condition_get (rtx pattern)
75 rtx cmp;
76 rtx inc;
77 rtx reg;
78 rtx condition;
80 /* The canonical doloop pattern we expect is:
82 (parallel [(set (pc) (if_then_else (condition)
83 (label_ref (label))
84 (pc)))
85 (set (reg) (plus (reg) (const_int -1)))
86 (additional clobbers and uses)])
88 Some machines (IA-64) make the decrement conditional on
89 the condition as well, so we don't bother verifying the
90 actual decrement. In summary, the branch must be the
91 first entry of the parallel (also required by jump.c),
92 and the second entry of the parallel must be a set of
93 the loop counter register. */
95 if (GET_CODE (pattern) != PARALLEL)
96 return 0;
98 cmp = XVECEXP (pattern, 0, 0);
99 inc = XVECEXP (pattern, 0, 1);
101 /* Check for (set (reg) (something)). */
102 if (GET_CODE (inc) != SET || ! REG_P (SET_DEST (inc)))
103 return 0;
105 /* Extract loop counter register. */
106 reg = SET_DEST (inc);
108 /* Check for (set (pc) (if_then_else (condition)
109 (label_ref (label))
110 (pc))). */
111 if (GET_CODE (cmp) != SET
112 || SET_DEST (cmp) != pc_rtx
113 || GET_CODE (SET_SRC (cmp)) != IF_THEN_ELSE
114 || GET_CODE (XEXP (SET_SRC (cmp), 1)) != LABEL_REF
115 || XEXP (SET_SRC (cmp), 2) != pc_rtx)
116 return 0;
118 /* Extract loop termination condition. */
119 condition = XEXP (SET_SRC (cmp), 0);
121 if ((GET_CODE (condition) != GE && GET_CODE (condition) != NE)
122 || GET_CODE (XEXP (condition, 1)) != CONST_INT)
123 return 0;
125 if (XEXP (condition, 0) == reg)
126 return condition;
128 if (GET_CODE (XEXP (condition, 0)) == PLUS
129 && XEXP (XEXP (condition, 0), 0) == reg)
130 return condition;
132 /* ??? If a machine uses a funny comparison, we could return a
133 canonicalized form here. */
135 return 0;
138 /* Return nonzero if the loop specified by LOOP is suitable for
139 the use of special low-overhead looping instructions. DESC
140 describes the number of iterations of the loop. */
142 static bool
143 doloop_valid_p (struct loop *loop, struct niter_desc *desc)
145 basic_block *body = get_loop_body (loop), bb;
146 rtx insn;
147 unsigned i;
148 bool result = true;
150 /* Check for loops that may not terminate under special conditions. */
151 if (!desc->simple_p
152 || desc->assumptions
153 || desc->infinite)
155 /* There are some cases that would require a special attention.
156 For example if the comparison is LEU and the comparison value
157 is UINT_MAX then the loop will not terminate. Similarly, if the
158 comparison code is GEU and the comparison value is 0, the
159 loop will not terminate.
161 If the absolute increment is not 1, the loop can be infinite
162 even with LTU/GTU, e.g. for (i = 3; i > 0; i -= 2)
164 ??? We could compute these conditions at run-time and have a
165 additional jump around the loop to ensure an infinite loop.
166 However, it is very unlikely that this is the intended
167 behavior of the loop and checking for these rare boundary
168 conditions would pessimize all other code.
170 If the loop is executed only a few times an extra check to
171 restart the loop could use up most of the benefits of using a
172 count register loop. Note however, that normally, this
173 restart branch would never execute, so it could be predicted
174 well by the CPU. We should generate the pessimistic code by
175 default, and have an option, e.g. -funsafe-loops that would
176 enable count-register loops in this case. */
177 if (dump_file)
178 fprintf (dump_file, "Doloop: Possible infinite iteration case.\n");
179 result = false;
180 goto cleanup;
183 for (i = 0; i < loop->num_nodes; i++)
185 bb = body[i];
187 for (insn = BB_HEAD (bb);
188 insn != NEXT_INSN (BB_END (bb));
189 insn = NEXT_INSN (insn))
191 /* Different targets have different necessities for low-overhead
192 looping. Call the back end for each instruction within the loop
193 to let it decide whether the insn is valid. */
194 if (!targetm.insn_valid_within_doloop (insn))
196 result = false;
197 goto cleanup;
201 result = true;
203 cleanup:
204 free (body);
206 return result;
209 /* Adds test of COND jumping to DEST to the end of BB. */
211 static void
212 add_test (rtx cond, basic_block bb, basic_block dest)
214 rtx seq, jump, label;
215 enum machine_mode mode;
216 rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
217 enum rtx_code code = GET_CODE (cond);
219 mode = GET_MODE (XEXP (cond, 0));
220 if (mode == VOIDmode)
221 mode = GET_MODE (XEXP (cond, 1));
223 start_sequence ();
224 op0 = force_operand (op0, NULL_RTX);
225 op1 = force_operand (op1, NULL_RTX);
226 label = block_label (dest);
227 do_compare_rtx_and_jump (op0, op1, code, 0, mode, NULL_RTX, NULL_RTX, label);
229 jump = get_last_insn ();
230 JUMP_LABEL (jump) = label;
232 /* The jump is supposed to handle an unlikely special case. */
233 REG_NOTES (jump)
234 = gen_rtx_EXPR_LIST (REG_BR_PROB,
235 const0_rtx, REG_NOTES (jump));
237 LABEL_NUSES (label)++;
239 seq = get_insns ();
240 end_sequence ();
241 emit_insn_after (seq, BB_END (bb));
244 /* Modify the loop to use the low-overhead looping insn where LOOP
245 describes the loop, DESC describes the number of iterations of the
246 loop, and DOLOOP_INSN is the low-overhead looping insn to emit at the
247 end of the loop. CONDITION is the condition separated from the
248 DOLOOP_SEQ. COUNT is the number of iterations of the LOOP. */
250 static void
251 doloop_modify (struct loop *loop, struct niter_desc *desc,
252 rtx doloop_seq, rtx condition, rtx count)
254 rtx counter_reg;
255 rtx tmp, noloop = NULL_RTX;
256 rtx sequence;
257 rtx jump_insn;
258 rtx jump_label;
259 int nonneg = 0, irr;
260 bool increment_count;
261 basic_block loop_end = desc->out_edge->src;
262 enum machine_mode mode;
264 jump_insn = BB_END (loop_end);
266 if (dump_file)
268 fprintf (dump_file, "Doloop: Inserting doloop pattern (");
269 if (desc->const_iter)
270 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter);
271 else
272 fputs ("runtime", dump_file);
273 fputs (" iterations).\n", dump_file);
276 /* Discard original jump to continue loop. The original compare
277 result may still be live, so it cannot be discarded explicitly. */
278 delete_insn (jump_insn);
280 counter_reg = XEXP (condition, 0);
281 if (GET_CODE (counter_reg) == PLUS)
282 counter_reg = XEXP (counter_reg, 0);
283 mode = GET_MODE (counter_reg);
285 increment_count = false;
286 switch (GET_CODE (condition))
288 case NE:
289 /* Currently only NE tests against zero and one are supported. */
290 noloop = XEXP (condition, 1);
291 if (noloop != const0_rtx)
293 gcc_assert (noloop == const1_rtx);
294 increment_count = true;
296 break;
298 case GE:
299 /* Currently only GE tests against zero are supported. */
300 gcc_assert (XEXP (condition, 1) == const0_rtx);
302 noloop = constm1_rtx;
304 /* The iteration count does not need incrementing for a GE test. */
305 increment_count = false;
307 /* Determine if the iteration counter will be non-negative.
308 Note that the maximum value loaded is iterations_max - 1. */
309 if (desc->niter_max
310 <= ((unsigned HOST_WIDEST_INT) 1
311 << (GET_MODE_BITSIZE (mode) - 1)))
312 nonneg = 1;
313 break;
315 /* Abort if an invalid doloop pattern has been generated. */
316 default:
317 gcc_unreachable ();
320 if (increment_count)
321 count = simplify_gen_binary (PLUS, mode, count, const1_rtx);
323 /* Insert initialization of the count register into the loop header. */
324 start_sequence ();
325 tmp = force_operand (count, counter_reg);
326 convert_move (counter_reg, tmp, 1);
327 sequence = get_insns ();
328 end_sequence ();
329 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
331 if (desc->noloop_assumptions)
333 rtx ass = copy_rtx (desc->noloop_assumptions);
334 basic_block preheader = loop_preheader_edge (loop)->src;
335 basic_block set_zero
336 = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
337 basic_block new_preheader
338 = loop_split_edge_with (loop_preheader_edge (loop), NULL_RTX);
339 basic_block bb;
340 edge te;
341 gcov_type cnt;
343 /* Expand the condition testing the assumptions and if it does not pass,
344 reset the count register to 0. */
345 add_test (XEXP (ass, 0), preheader, set_zero);
346 single_succ_edge (preheader)->flags &= ~EDGE_FALLTHRU;
347 cnt = single_succ_edge (preheader)->count;
348 single_succ_edge (preheader)->probability = 0;
349 single_succ_edge (preheader)->count = 0;
350 irr = single_succ_edge (preheader)->flags & EDGE_IRREDUCIBLE_LOOP;
351 te = make_edge (preheader, new_preheader, EDGE_FALLTHRU | irr);
352 te->probability = REG_BR_PROB_BASE;
353 te->count = cnt;
354 set_immediate_dominator (CDI_DOMINATORS, new_preheader, preheader);
356 set_zero->count = 0;
357 set_zero->frequency = 0;
359 for (ass = XEXP (ass, 1); ass; ass = XEXP (ass, 1))
361 bb = loop_split_edge_with (te, NULL_RTX);
362 te = single_succ_edge (bb);
363 add_test (XEXP (ass, 0), bb, set_zero);
364 make_edge (bb, set_zero, irr);
367 start_sequence ();
368 convert_move (counter_reg, noloop, 0);
369 sequence = get_insns ();
370 end_sequence ();
371 emit_insn_after (sequence, BB_END (set_zero));
374 /* Some targets (eg, C4x) need to initialize special looping
375 registers. */
376 #ifdef HAVE_doloop_begin
378 rtx init;
379 unsigned level = get_loop_level (loop) + 1;
380 init = gen_doloop_begin (counter_reg,
381 desc->const_iter ? desc->niter_expr : const0_rtx,
382 desc->niter_max,
383 GEN_INT (level));
384 if (init)
386 start_sequence ();
387 emit_insn (init);
388 sequence = get_insns ();
389 end_sequence ();
390 emit_insn_after (sequence, BB_END (loop_preheader_edge (loop)->src));
393 #endif
395 /* Insert the new low-overhead looping insn. */
396 emit_jump_insn_after (doloop_seq, BB_END (loop_end));
397 jump_insn = BB_END (loop_end);
398 jump_label = block_label (desc->in_edge->dest);
399 JUMP_LABEL (jump_insn) = jump_label;
400 LABEL_NUSES (jump_label)++;
402 /* Ensure the right fallthru edge is marked, for case we have reversed
403 the condition. */
404 desc->in_edge->flags &= ~EDGE_FALLTHRU;
405 desc->out_edge->flags |= EDGE_FALLTHRU;
407 /* Add a REG_NONNEG note if the actual or estimated maximum number
408 of iterations is non-negative. */
409 if (nonneg)
411 REG_NOTES (jump_insn)
412 = gen_rtx_EXPR_LIST (REG_NONNEG, NULL_RTX, REG_NOTES (jump_insn));
416 /* Process loop described by LOOP validating that the loop is suitable for
417 conversion to use a low overhead looping instruction, replacing the jump
418 insn where suitable. Returns true if the loop was successfully
419 modified. */
421 static bool
422 doloop_optimize (struct loop *loop)
424 enum machine_mode mode;
425 rtx doloop_seq, doloop_pat, doloop_reg;
426 rtx iterations, count;
427 rtx iterations_max;
428 rtx start_label;
429 rtx condition;
430 unsigned level, est_niter;
431 struct niter_desc *desc;
432 unsigned word_mode_size;
433 unsigned HOST_WIDE_INT word_mode_max;
435 if (dump_file)
436 fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
438 iv_analysis_loop_init (loop);
440 /* Find the simple exit of a LOOP. */
441 desc = get_simple_loop_desc (loop);
443 /* Check that loop is a candidate for a low-overhead looping insn. */
444 if (!doloop_valid_p (loop, desc))
446 if (dump_file)
447 fprintf (dump_file,
448 "Doloop: The loop is not suitable.\n");
449 return false;
451 mode = desc->mode;
453 est_niter = 3;
454 if (desc->const_iter)
455 est_niter = desc->niter;
456 /* If the estimate on number of iterations is reliable (comes from profile
457 feedback), use it. Do not use it normally, since the expected number
458 of iterations of an unrolled loop is 2. */
459 if (loop->header->count)
460 est_niter = expected_loop_iterations (loop);
462 if (est_niter < 3)
464 if (dump_file)
465 fprintf (dump_file,
466 "Doloop: Too few iterations (%u) to be profitable.\n",
467 est_niter);
468 return false;
471 count = copy_rtx (desc->niter_expr);
472 iterations = desc->const_iter ? desc->niter_expr : const0_rtx;
473 iterations_max = GEN_INT (desc->niter_max);
474 level = get_loop_level (loop) + 1;
476 /* Generate looping insn. If the pattern FAILs then give up trying
477 to modify the loop since there is some aspect the back-end does
478 not like. */
479 start_label = block_label (desc->in_edge->dest);
480 doloop_reg = gen_reg_rtx (mode);
481 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
482 GEN_INT (level), start_label);
484 word_mode_size = GET_MODE_BITSIZE (word_mode);
485 word_mode_max
486 = ((unsigned HOST_WIDE_INT) 1 << (word_mode_size - 1) << 1) - 1;
487 if (! doloop_seq
488 && mode != word_mode
489 /* Before trying mode different from the one in that # of iterations is
490 computed, we must be sure that the number of iterations fits into
491 the new mode. */
492 && (word_mode_size >= GET_MODE_BITSIZE (mode)
493 || desc->niter_max <= word_mode_max))
495 if (word_mode_size > GET_MODE_BITSIZE (mode))
497 count = simplify_gen_unary (ZERO_EXTEND, word_mode,
498 count, mode);
499 iterations = simplify_gen_unary (ZERO_EXTEND, word_mode,
500 iterations, mode);
501 iterations_max = simplify_gen_unary (ZERO_EXTEND, word_mode,
502 iterations_max, mode);
504 else
506 count = lowpart_subreg (word_mode, count, mode);
507 iterations = lowpart_subreg (word_mode, iterations, mode);
508 iterations_max = lowpart_subreg (word_mode, iterations_max, mode);
510 PUT_MODE (doloop_reg, word_mode);
511 doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
512 GEN_INT (level), start_label);
514 if (! doloop_seq)
516 if (dump_file)
517 fprintf (dump_file,
518 "Doloop: Target unwilling to use doloop pattern!\n");
519 return false;
522 /* If multiple instructions were created, the last must be the
523 jump instruction. Also, a raw define_insn may yield a plain
524 pattern. */
525 doloop_pat = doloop_seq;
526 if (INSN_P (doloop_pat))
528 while (NEXT_INSN (doloop_pat) != NULL_RTX)
529 doloop_pat = NEXT_INSN (doloop_pat);
530 if (JUMP_P (doloop_pat))
531 doloop_pat = PATTERN (doloop_pat);
532 else
533 doloop_pat = NULL_RTX;
536 if (! doloop_pat
537 || ! (condition = doloop_condition_get (doloop_pat)))
539 if (dump_file)
540 fprintf (dump_file, "Doloop: Unrecognizable doloop pattern!\n");
541 return false;
544 doloop_modify (loop, desc, doloop_seq, condition, count);
545 return true;
548 /* This is the main entry point. Process all LOOPS using doloop_optimize. */
550 void
551 doloop_optimize_loops (struct loops *loops)
553 unsigned i;
554 struct loop *loop;
556 for (i = 1; i < loops->num; i++)
558 loop = loops->parray[i];
559 if (!loop)
560 continue;
562 doloop_optimize (loop);
565 iv_analysis_done ();
567 #ifdef ENABLE_CHECKING
568 verify_dominators (CDI_DOMINATORS);
569 verify_loop_structure (loops);
570 #endif
572 #endif /* HAVE_doloop_end */