* config/xtensa/xtensa.h (ASM_OUTPUT_POOL_PROLOGUE): Emit a
[official-gcc.git] / gcc / unroll.c
blobcc3865641f1830c7be24b3e4a669fb212df96b9f
1 /* Try to unroll loops, and split induction variables.
2 Copyright (C) 1992, 1993, 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
23 /* Try to unroll a loop, and split induction variables.
25 Loops for which the number of iterations can be calculated exactly are
26 handled specially. If the number of iterations times the insn_count is
27 less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
28 Otherwise, we try to unroll the loop a number of times modulo the number
29 of iterations, so that only one exit test will be needed. It is unrolled
30 a number of times approximately equal to MAX_UNROLLED_INSNS divided by
31 the insn count.
33 Otherwise, if the number of iterations can be calculated exactly at
34 run time, and the loop is always entered at the top, then we try to
35 precondition the loop. That is, at run time, calculate how many times
36 the loop will execute, and then execute the loop body a few times so
37 that the remaining iterations will be some multiple of 4 (or 2 if the
38 loop is large). Then fall through to a loop unrolled 4 (or 2) times,
39 with only one exit test needed at the end of the loop.
41 Otherwise, if the number of iterations can not be calculated exactly,
42 not even at run time, then we still unroll the loop a number of times
43 approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
44 but there must be an exit test after each copy of the loop body.
46 For each induction variable, which is dead outside the loop (replaceable)
47 or for which we can easily calculate the final value, if we can easily
48 calculate its value at each place where it is set as a function of the
49 current loop unroll count and the variable's value at loop entry, then
50 the induction variable is split into `N' different variables, one for
51 each copy of the loop body. One variable is live across the backward
52 branch, and the others are all calculated as a function of this variable.
53 This helps eliminate data dependencies, and leads to further opportunities
54 for cse. */
56 /* Possible improvements follow: */
58 /* ??? Add an extra pass somewhere to determine whether unrolling will
59 give any benefit. E.g. after generating all unrolled insns, compute the
60 cost of all insns and compare against cost of insns in rolled loop.
62 - On traditional architectures, unrolling a non-constant bound loop
63 is a win if there is a giv whose only use is in memory addresses, the
64 memory addresses can be split, and hence giv increments can be
65 eliminated.
66 - It is also a win if the loop is executed many times, and preconditioning
67 can be performed for the loop.
68 Add code to check for these and similar cases. */
70 /* ??? Improve control of which loops get unrolled. Could use profiling
71 info to only unroll the most commonly executed loops. Perhaps have
72 a user specifyable option to control the amount of code expansion,
73 or the percent of loops to consider for unrolling. Etc. */
75 /* ??? Look at the register copies inside the loop to see if they form a
76 simple permutation. If so, iterate the permutation until it gets back to
77 the start state. This is how many times we should unroll the loop, for
78 best results, because then all register copies can be eliminated.
79 For example, the lisp nreverse function should be unrolled 3 times
80 while (this)
82 next = this->cdr;
83 this->cdr = prev;
84 prev = this;
85 this = next;
88 ??? The number of times to unroll the loop may also be based on data
89 references in the loop. For example, if we have a loop that references
90 x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */
92 /* ??? Add some simple linear equation solving capability so that we can
93 determine the number of loop iterations for more complex loops.
94 For example, consider this loop from gdb
95 #define SWAP_TARGET_AND_HOST(buffer,len)
97 char tmp;
98 char *p = (char *) buffer;
99 char *q = ((char *) buffer) + len - 1;
100 int iterations = (len + 1) >> 1;
101 int i;
102 for (p; p < q; p++, q--;)
104 tmp = *q;
105 *q = *p;
106 *p = tmp;
109 Note that:
110 start value = p = &buffer + current_iteration
111 end value = q = &buffer + len - 1 - current_iteration
112 Given the loop exit test of "p < q", then there must be "q - p" iterations,
113 set equal to zero and solve for number of iterations:
114 q - p = len - 1 - 2*current_iteration = 0
115 current_iteration = (len - 1) / 2
116 Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
117 iterations of this loop. */
119 /* ??? Currently, no labels are marked as loop invariant when doing loop
120 unrolling. This is because an insn inside the loop, that loads the address
121 of a label inside the loop into a register, could be moved outside the loop
122 by the invariant code motion pass if labels were invariant. If the loop
123 is subsequently unrolled, the code will be wrong because each unrolled
124 body of the loop will use the same address, whereas each actually needs a
125 different address. A case where this happens is when a loop containing
126 a switch statement is unrolled.
128 It would be better to let labels be considered invariant. When we
129 unroll loops here, check to see if any insns using a label local to the
130 loop were moved before the loop. If so, then correct the problem, by
131 moving the insn back into the loop, or perhaps replicate the insn before
132 the loop, one copy for each time the loop is unrolled. */
134 #include "config.h"
135 #include "system.h"
136 #include "rtl.h"
137 #include "tm_p.h"
138 #include "insn-config.h"
139 #include "integrate.h"
140 #include "regs.h"
141 #include "recog.h"
142 #include "flags.h"
143 #include "function.h"
144 #include "expr.h"
145 #include "loop.h"
146 #include "toplev.h"
147 #include "hard-reg-set.h"
148 #include "basic-block.h"
149 #include "predict.h"
150 #include "params.h"
152 /* The prime factors looked for when trying to unroll a loop by some
153 number which is modulo the total number of iterations. Just checking
154 for these 4 prime factors will find at least one factor for 75% of
155 all numbers theoretically. Practically speaking, this will succeed
156 almost all of the time since loops are generally a multiple of 2
157 and/or 5. */
159 #define NUM_FACTORS 4
161 static struct _factor { const int factor; int count; }
162 factors[NUM_FACTORS] = { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
164 /* Describes the different types of loop unrolling performed. */
166 enum unroll_types
168 UNROLL_COMPLETELY,
169 UNROLL_MODULO,
170 UNROLL_NAIVE
173 /* Indexed by register number, if non-zero, then it contains a pointer
174 to a struct induction for a DEST_REG giv which has been combined with
175 one of more address givs. This is needed because whenever such a DEST_REG
176 giv is modified, we must modify the value of all split address givs
177 that were combined with this DEST_REG giv. */
179 static struct induction **addr_combined_regs;
181 /* Indexed by register number, if this is a splittable induction variable,
182 then this will hold the current value of the register, which depends on the
183 iteration number. */
185 static rtx *splittable_regs;
187 /* Indexed by register number, if this is a splittable induction variable,
188 then this will hold the number of instructions in the loop that modify
189 the induction variable. Used to ensure that only the last insn modifying
190 a split iv will update the original iv of the dest. */
192 static int *splittable_regs_updates;
194 /* Forward declarations. */
196 static void init_reg_map PARAMS ((struct inline_remap *, int));
197 static rtx calculate_giv_inc PARAMS ((rtx, rtx, unsigned int));
198 static rtx initial_reg_note_copy PARAMS ((rtx, struct inline_remap *));
199 static void final_reg_note_copy PARAMS ((rtx *, struct inline_remap *));
200 static void copy_loop_body PARAMS ((struct loop *, rtx, rtx,
201 struct inline_remap *, rtx, int,
202 enum unroll_types, rtx, rtx, rtx, rtx));
203 static int find_splittable_regs PARAMS ((const struct loop *,
204 enum unroll_types, int));
205 static int find_splittable_givs PARAMS ((const struct loop *,
206 struct iv_class *, enum unroll_types,
207 rtx, int));
208 static int reg_dead_after_loop PARAMS ((const struct loop *, rtx));
209 static rtx fold_rtx_mult_add PARAMS ((rtx, rtx, rtx, enum machine_mode));
210 static int verify_addresses PARAMS ((struct induction *, rtx, int));
211 static rtx remap_split_bivs PARAMS ((struct loop *, rtx));
212 static rtx find_common_reg_term PARAMS ((rtx, rtx));
213 static rtx subtract_reg_term PARAMS ((rtx, rtx));
214 static rtx loop_find_equiv_value PARAMS ((const struct loop *, rtx));
215 static rtx ujump_to_loop_cont PARAMS ((rtx, rtx));
217 /* Try to unroll one loop and split induction variables in the loop.
219 The loop is described by the arguments LOOP and INSN_COUNT.
220 STRENGTH_REDUCTION_P indicates whether information generated in the
221 strength reduction pass is available.
223 This function is intended to be called from within `strength_reduce'
224 in loop.c. */
226 void
227 unroll_loop (loop, insn_count, strength_reduce_p)
228 struct loop *loop;
229 int insn_count;
230 int strength_reduce_p;
232 struct loop_info *loop_info = LOOP_INFO (loop);
233 struct loop_ivs *ivs = LOOP_IVS (loop);
234 int i, j;
235 unsigned int r;
236 unsigned HOST_WIDE_INT temp;
237 int unroll_number = 1;
238 rtx copy_start, copy_end;
239 rtx insn, sequence, pattern, tem;
240 int max_labelno, max_insnno;
241 rtx insert_before;
242 struct inline_remap *map;
243 char *local_label = NULL;
244 char *local_regno;
245 unsigned int max_local_regnum;
246 unsigned int maxregnum;
247 rtx exit_label = 0;
248 rtx start_label;
249 struct iv_class *bl;
250 int splitting_not_safe = 0;
251 enum unroll_types unroll_type = UNROLL_NAIVE;
252 int loop_preconditioned = 0;
253 rtx safety_label;
254 /* This points to the last real insn in the loop, which should be either
255 a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
256 jumps). */
257 rtx last_loop_insn;
258 rtx loop_start = loop->start;
259 rtx loop_end = loop->end;
261 /* Don't bother unrolling huge loops. Since the minimum factor is
262 two, loops greater than one half of MAX_UNROLLED_INSNS will never
263 be unrolled. */
264 if (insn_count > MAX_UNROLLED_INSNS / 2)
266 if (loop_dump_stream)
267 fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
268 return;
271 /* When emitting debugger info, we can't unroll loops with unequal numbers
272 of block_beg and block_end notes, because that would unbalance the block
273 structure of the function. This can happen as a result of the
274 "if (foo) bar; else break;" optimization in jump.c. */
275 /* ??? Gcc has a general policy that -g is never supposed to change the code
276 that the compiler emits, so we must disable this optimization always,
277 even if debug info is not being output. This is rare, so this should
278 not be a significant performance problem. */
280 if (1 /* write_symbols != NO_DEBUG */)
282 int block_begins = 0;
283 int block_ends = 0;
285 for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn))
287 if (GET_CODE (insn) == NOTE)
289 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
290 block_begins++;
291 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
292 block_ends++;
293 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
294 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END)
296 /* Note, would be nice to add code to unroll EH
297 regions, but until that time, we punt (don't
298 unroll). For the proper way of doing it, see
299 expand_inline_function. */
301 if (loop_dump_stream)
302 fprintf (loop_dump_stream,
303 "Unrolling failure: cannot unroll EH regions.\n");
304 return;
309 if (block_begins != block_ends)
311 if (loop_dump_stream)
312 fprintf (loop_dump_stream,
313 "Unrolling failure: Unbalanced block notes.\n");
314 return;
318 /* Determine type of unroll to perform. Depends on the number of iterations
319 and the size of the loop. */
321 /* If there is no strength reduce info, then set
322 loop_info->n_iterations to zero. This can happen if
323 strength_reduce can't find any bivs in the loop. A value of zero
324 indicates that the number of iterations could not be calculated. */
326 if (! strength_reduce_p)
327 loop_info->n_iterations = 0;
329 if (loop_dump_stream && loop_info->n_iterations > 0)
331 fputs ("Loop unrolling: ", loop_dump_stream);
332 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
333 loop_info->n_iterations);
334 fputs (" iterations.\n", loop_dump_stream);
337 /* Find and save a pointer to the last nonnote insn in the loop. */
339 last_loop_insn = prev_nonnote_insn (loop_end);
341 /* Calculate how many times to unroll the loop. Indicate whether or
342 not the loop is being completely unrolled. */
344 if (loop_info->n_iterations == 1)
346 /* Handle the case where the loop begins with an unconditional
347 jump to the loop condition. Make sure to delete the jump
348 insn, otherwise the loop body will never execute. */
350 rtx ujump = ujump_to_loop_cont (loop->start, loop->cont);
351 if (ujump)
352 delete_related_insns (ujump);
354 /* If number of iterations is exactly 1, then eliminate the compare and
355 branch at the end of the loop since they will never be taken.
356 Then return, since no other action is needed here. */
358 /* If the last instruction is not a BARRIER or a JUMP_INSN, then
359 don't do anything. */
361 if (GET_CODE (last_loop_insn) == BARRIER)
363 /* Delete the jump insn. This will delete the barrier also. */
364 delete_related_insns (PREV_INSN (last_loop_insn));
366 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
368 #ifdef HAVE_cc0
369 rtx prev = PREV_INSN (last_loop_insn);
370 #endif
371 delete_related_insns (last_loop_insn);
372 #ifdef HAVE_cc0
373 /* The immediately preceding insn may be a compare which must be
374 deleted. */
375 if (only_sets_cc0_p (prev))
376 delete_related_insns (prev);
377 #endif
380 /* Remove the loop notes since this is no longer a loop. */
381 if (loop->vtop)
382 delete_related_insns (loop->vtop);
383 if (loop->cont)
384 delete_related_insns (loop->cont);
385 if (loop_start)
386 delete_related_insns (loop_start);
387 if (loop_end)
388 delete_related_insns (loop_end);
390 return;
392 else if (loop_info->n_iterations > 0
393 /* Avoid overflow in the next expression. */
394 && loop_info->n_iterations < MAX_UNROLLED_INSNS
395 && loop_info->n_iterations * insn_count < MAX_UNROLLED_INSNS)
397 unroll_number = loop_info->n_iterations;
398 unroll_type = UNROLL_COMPLETELY;
400 else if (loop_info->n_iterations > 0)
402 /* Try to factor the number of iterations. Don't bother with the
403 general case, only using 2, 3, 5, and 7 will get 75% of all
404 numbers theoretically, and almost all in practice. */
406 for (i = 0; i < NUM_FACTORS; i++)
407 factors[i].count = 0;
409 temp = loop_info->n_iterations;
410 for (i = NUM_FACTORS - 1; i >= 0; i--)
411 while (temp % factors[i].factor == 0)
413 factors[i].count++;
414 temp = temp / factors[i].factor;
417 /* Start with the larger factors first so that we generally
418 get lots of unrolling. */
420 unroll_number = 1;
421 temp = insn_count;
422 for (i = 3; i >= 0; i--)
423 while (factors[i].count--)
425 if (temp * factors[i].factor < MAX_UNROLLED_INSNS)
427 unroll_number *= factors[i].factor;
428 temp *= factors[i].factor;
430 else
431 break;
434 /* If we couldn't find any factors, then unroll as in the normal
435 case. */
436 if (unroll_number == 1)
438 if (loop_dump_stream)
439 fprintf (loop_dump_stream, "Loop unrolling: No factors found.\n");
441 else
442 unroll_type = UNROLL_MODULO;
445 /* Default case, calculate number of times to unroll loop based on its
446 size. */
447 if (unroll_type == UNROLL_NAIVE)
449 if (8 * insn_count < MAX_UNROLLED_INSNS)
450 unroll_number = 8;
451 else if (4 * insn_count < MAX_UNROLLED_INSNS)
452 unroll_number = 4;
453 else
454 unroll_number = 2;
457 /* Now we know how many times to unroll the loop. */
459 if (loop_dump_stream)
460 fprintf (loop_dump_stream, "Unrolling loop %d times.\n", unroll_number);
462 if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO)
464 /* Loops of these types can start with jump down to the exit condition
465 in rare circumstances.
467 Consider a pair of nested loops where the inner loop is part
468 of the exit code for the outer loop.
470 In this case jump.c will not duplicate the exit test for the outer
471 loop, so it will start with a jump to the exit code.
473 Then consider if the inner loop turns out to iterate once and
474 only once. We will end up deleting the jumps associated with
475 the inner loop. However, the loop notes are not removed from
476 the instruction stream.
478 And finally assume that we can compute the number of iterations
479 for the outer loop.
481 In this case unroll may want to unroll the outer loop even though
482 it starts with a jump to the outer loop's exit code.
484 We could try to optimize this case, but it hardly seems worth it.
485 Just return without unrolling the loop in such cases. */
487 insn = loop_start;
488 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
489 insn = NEXT_INSN (insn);
490 if (GET_CODE (insn) == JUMP_INSN)
491 return;
494 if (unroll_type == UNROLL_COMPLETELY)
496 /* Completely unrolling the loop: Delete the compare and branch at
497 the end (the last two instructions). This delete must done at the
498 very end of loop unrolling, to avoid problems with calls to
499 back_branch_in_range_p, which is called by find_splittable_regs.
500 All increments of splittable bivs/givs are changed to load constant
501 instructions. */
503 copy_start = loop_start;
505 /* Set insert_before to the instruction immediately after the JUMP_INSN
506 (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
507 the loop will be correctly handled by copy_loop_body. */
508 insert_before = NEXT_INSN (last_loop_insn);
510 /* Set copy_end to the insn before the jump at the end of the loop. */
511 if (GET_CODE (last_loop_insn) == BARRIER)
512 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
513 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
515 copy_end = PREV_INSN (last_loop_insn);
516 #ifdef HAVE_cc0
517 /* The instruction immediately before the JUMP_INSN may be a compare
518 instruction which we do not want to copy. */
519 if (sets_cc0_p (PREV_INSN (copy_end)))
520 copy_end = PREV_INSN (copy_end);
521 #endif
523 else
525 /* We currently can't unroll a loop if it doesn't end with a
526 JUMP_INSN. There would need to be a mechanism that recognizes
527 this case, and then inserts a jump after each loop body, which
528 jumps to after the last loop body. */
529 if (loop_dump_stream)
530 fprintf (loop_dump_stream,
531 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
532 return;
535 else if (unroll_type == UNROLL_MODULO)
537 /* Partially unrolling the loop: The compare and branch at the end
538 (the last two instructions) must remain. Don't copy the compare
539 and branch instructions at the end of the loop. Insert the unrolled
540 code immediately before the compare/branch at the end so that the
541 code will fall through to them as before. */
543 copy_start = loop_start;
545 /* Set insert_before to the jump insn at the end of the loop.
546 Set copy_end to before the jump insn at the end of the loop. */
547 if (GET_CODE (last_loop_insn) == BARRIER)
549 insert_before = PREV_INSN (last_loop_insn);
550 copy_end = PREV_INSN (insert_before);
552 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
554 insert_before = last_loop_insn;
555 #ifdef HAVE_cc0
556 /* The instruction immediately before the JUMP_INSN may be a compare
557 instruction which we do not want to copy or delete. */
558 if (sets_cc0_p (PREV_INSN (insert_before)))
559 insert_before = PREV_INSN (insert_before);
560 #endif
561 copy_end = PREV_INSN (insert_before);
563 else
565 /* We currently can't unroll a loop if it doesn't end with a
566 JUMP_INSN. There would need to be a mechanism that recognizes
567 this case, and then inserts a jump after each loop body, which
568 jumps to after the last loop body. */
569 if (loop_dump_stream)
570 fprintf (loop_dump_stream,
571 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
572 return;
575 else
577 /* Normal case: Must copy the compare and branch instructions at the
578 end of the loop. */
580 if (GET_CODE (last_loop_insn) == BARRIER)
582 /* Loop ends with an unconditional jump and a barrier.
583 Handle this like above, don't copy jump and barrier.
584 This is not strictly necessary, but doing so prevents generating
585 unconditional jumps to an immediately following label.
587 This will be corrected below if the target of this jump is
588 not the start_label. */
590 insert_before = PREV_INSN (last_loop_insn);
591 copy_end = PREV_INSN (insert_before);
593 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
595 /* Set insert_before to immediately after the JUMP_INSN, so that
596 NOTEs at the end of the loop will be correctly handled by
597 copy_loop_body. */
598 insert_before = NEXT_INSN (last_loop_insn);
599 copy_end = last_loop_insn;
601 else
603 /* We currently can't unroll a loop if it doesn't end with a
604 JUMP_INSN. There would need to be a mechanism that recognizes
605 this case, and then inserts a jump after each loop body, which
606 jumps to after the last loop body. */
607 if (loop_dump_stream)
608 fprintf (loop_dump_stream,
609 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
610 return;
613 /* If copying exit test branches because they can not be eliminated,
614 then must convert the fall through case of the branch to a jump past
615 the end of the loop. Create a label to emit after the loop and save
616 it for later use. Do not use the label after the loop, if any, since
617 it might be used by insns outside the loop, or there might be insns
618 added before it later by final_[bg]iv_value which must be after
619 the real exit label. */
620 exit_label = gen_label_rtx ();
622 insn = loop_start;
623 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
624 insn = NEXT_INSN (insn);
626 if (GET_CODE (insn) == JUMP_INSN)
628 /* The loop starts with a jump down to the exit condition test.
629 Start copying the loop after the barrier following this
630 jump insn. */
631 copy_start = NEXT_INSN (insn);
633 /* Splitting induction variables doesn't work when the loop is
634 entered via a jump to the bottom, because then we end up doing
635 a comparison against a new register for a split variable, but
636 we did not execute the set insn for the new register because
637 it was skipped over. */
638 splitting_not_safe = 1;
639 if (loop_dump_stream)
640 fprintf (loop_dump_stream,
641 "Splitting not safe, because loop not entered at top.\n");
643 else
644 copy_start = loop_start;
647 /* This should always be the first label in the loop. */
648 start_label = NEXT_INSN (copy_start);
649 /* There may be a line number note and/or a loop continue note here. */
650 while (GET_CODE (start_label) == NOTE)
651 start_label = NEXT_INSN (start_label);
652 if (GET_CODE (start_label) != CODE_LABEL)
654 /* This can happen as a result of jump threading. If the first insns in
655 the loop test the same condition as the loop's backward jump, or the
656 opposite condition, then the backward jump will be modified to point
657 to elsewhere, and the loop's start label is deleted.
659 This case currently can not be handled by the loop unrolling code. */
661 if (loop_dump_stream)
662 fprintf (loop_dump_stream,
663 "Unrolling failure: unknown insns between BEG note and loop label.\n");
664 return;
666 if (LABEL_NAME (start_label))
668 /* The jump optimization pass must have combined the original start label
669 with a named label for a goto. We can't unroll this case because
670 jumps which go to the named label must be handled differently than
671 jumps to the loop start, and it is impossible to differentiate them
672 in this case. */
673 if (loop_dump_stream)
674 fprintf (loop_dump_stream,
675 "Unrolling failure: loop start label is gone\n");
676 return;
679 if (unroll_type == UNROLL_NAIVE
680 && GET_CODE (last_loop_insn) == BARRIER
681 && GET_CODE (PREV_INSN (last_loop_insn)) == JUMP_INSN
682 && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
684 /* In this case, we must copy the jump and barrier, because they will
685 not be converted to jumps to an immediately following label. */
687 insert_before = NEXT_INSN (last_loop_insn);
688 copy_end = last_loop_insn;
691 if (unroll_type == UNROLL_NAIVE
692 && GET_CODE (last_loop_insn) == JUMP_INSN
693 && start_label != JUMP_LABEL (last_loop_insn))
695 /* ??? The loop ends with a conditional branch that does not branch back
696 to the loop start label. In this case, we must emit an unconditional
697 branch to the loop exit after emitting the final branch.
698 copy_loop_body does not have support for this currently, so we
699 give up. It doesn't seem worthwhile to unroll anyways since
700 unrolling would increase the number of branch instructions
701 executed. */
702 if (loop_dump_stream)
703 fprintf (loop_dump_stream,
704 "Unrolling failure: final conditional branch not to loop start\n");
705 return;
708 /* Allocate a translation table for the labels and insn numbers.
709 They will be filled in as we copy the insns in the loop. */
711 max_labelno = max_label_num ();
712 max_insnno = get_max_uid ();
714 /* Various paths through the unroll code may reach the "egress" label
715 without initializing fields within the map structure.
717 To be safe, we use xcalloc to zero the memory. */
718 map = (struct inline_remap *) xcalloc (1, sizeof (struct inline_remap));
720 /* Allocate the label map. */
722 if (max_labelno > 0)
724 map->label_map = (rtx *) xmalloc (max_labelno * sizeof (rtx));
726 local_label = (char *) xcalloc (max_labelno, sizeof (char));
729 /* Search the loop and mark all local labels, i.e. the ones which have to
730 be distinct labels when copied. For all labels which might be
731 non-local, set their label_map entries to point to themselves.
732 If they happen to be local their label_map entries will be overwritten
733 before the loop body is copied. The label_map entries for local labels
734 will be set to a different value each time the loop body is copied. */
736 for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
738 rtx note;
740 if (GET_CODE (insn) == CODE_LABEL)
741 local_label[CODE_LABEL_NUMBER (insn)] = 1;
742 else if (GET_CODE (insn) == JUMP_INSN)
744 if (JUMP_LABEL (insn))
745 set_label_in_map (map,
746 CODE_LABEL_NUMBER (JUMP_LABEL (insn)),
747 JUMP_LABEL (insn));
748 else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
749 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
751 rtx pat = PATTERN (insn);
752 int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
753 int len = XVECLEN (pat, diff_vec_p);
754 rtx label;
756 for (i = 0; i < len; i++)
758 label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
759 set_label_in_map (map, CODE_LABEL_NUMBER (label), label);
763 if ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)))
764 set_label_in_map (map, CODE_LABEL_NUMBER (XEXP (note, 0)),
765 XEXP (note, 0));
768 /* Allocate space for the insn map. */
770 map->insn_map = (rtx *) xmalloc (max_insnno * sizeof (rtx));
772 /* Set this to zero, to indicate that we are doing loop unrolling,
773 not function inlining. */
774 map->inline_target = 0;
776 /* The register and constant maps depend on the number of registers
777 present, so the final maps can't be created until after
778 find_splittable_regs is called. However, they are needed for
779 preconditioning, so we create temporary maps when preconditioning
780 is performed. */
782 /* The preconditioning code may allocate two new pseudo registers. */
783 maxregnum = max_reg_num ();
785 /* local_regno is only valid for regnos < max_local_regnum. */
786 max_local_regnum = maxregnum;
788 /* Allocate and zero out the splittable_regs and addr_combined_regs
789 arrays. These must be zeroed here because they will be used if
790 loop preconditioning is performed, and must be zero for that case.
792 It is safe to do this here, since the extra registers created by the
793 preconditioning code and find_splittable_regs will never be used
794 to access the splittable_regs[] and addr_combined_regs[] arrays. */
796 splittable_regs = (rtx *) xcalloc (maxregnum, sizeof (rtx));
797 splittable_regs_updates = (int *) xcalloc (maxregnum, sizeof (int));
798 addr_combined_regs
799 = (struct induction **) xcalloc (maxregnum, sizeof (struct induction *));
800 local_regno = (char *) xcalloc (maxregnum, sizeof (char));
802 /* Mark all local registers, i.e. the ones which are referenced only
803 inside the loop. */
804 if (INSN_UID (copy_end) < max_uid_for_loop)
806 int copy_start_luid = INSN_LUID (copy_start);
807 int copy_end_luid = INSN_LUID (copy_end);
809 /* If a register is used in the jump insn, we must not duplicate it
810 since it will also be used outside the loop. */
811 if (GET_CODE (copy_end) == JUMP_INSN)
812 copy_end_luid--;
814 /* If we have a target that uses cc0, then we also must not duplicate
815 the insn that sets cc0 before the jump insn, if one is present. */
816 #ifdef HAVE_cc0
817 if (GET_CODE (copy_end) == JUMP_INSN
818 && sets_cc0_p (PREV_INSN (copy_end)))
819 copy_end_luid--;
820 #endif
822 /* If copy_start points to the NOTE that starts the loop, then we must
823 use the next luid, because invariant pseudo-regs moved out of the loop
824 have their lifetimes modified to start here, but they are not safe
825 to duplicate. */
826 if (copy_start == loop_start)
827 copy_start_luid++;
829 /* If a pseudo's lifetime is entirely contained within this loop, then we
830 can use a different pseudo in each unrolled copy of the loop. This
831 results in better code. */
832 /* We must limit the generic test to max_reg_before_loop, because only
833 these pseudo registers have valid regno_first_uid info. */
834 for (r = FIRST_PSEUDO_REGISTER; r < max_reg_before_loop; ++r)
835 if (REGNO_FIRST_UID (r) > 0 && REGNO_FIRST_UID (r) <= max_uid_for_loop
836 && REGNO_FIRST_LUID (r) >= copy_start_luid
837 && REGNO_LAST_UID (r) > 0 && REGNO_LAST_UID (r) <= max_uid_for_loop
838 && REGNO_LAST_LUID (r) <= copy_end_luid)
840 /* However, we must also check for loop-carried dependencies.
841 If the value the pseudo has at the end of iteration X is
842 used by iteration X+1, then we can not use a different pseudo
843 for each unrolled copy of the loop. */
844 /* A pseudo is safe if regno_first_uid is a set, and this
845 set dominates all instructions from regno_first_uid to
846 regno_last_uid. */
847 /* ??? This check is simplistic. We would get better code if
848 this check was more sophisticated. */
849 if (set_dominates_use (r, REGNO_FIRST_UID (r), REGNO_LAST_UID (r),
850 copy_start, copy_end))
851 local_regno[r] = 1;
853 if (loop_dump_stream)
855 if (local_regno[r])
856 fprintf (loop_dump_stream, "Marked reg %d as local\n", r);
857 else
858 fprintf (loop_dump_stream, "Did not mark reg %d as local\n",
864 /* If this loop requires exit tests when unrolled, check to see if we
865 can precondition the loop so as to make the exit tests unnecessary.
866 Just like variable splitting, this is not safe if the loop is entered
867 via a jump to the bottom. Also, can not do this if no strength
868 reduce info, because precondition_loop_p uses this info. */
870 /* Must copy the loop body for preconditioning before the following
871 find_splittable_regs call since that will emit insns which need to
872 be after the preconditioned loop copies, but immediately before the
873 unrolled loop copies. */
875 /* Also, it is not safe to split induction variables for the preconditioned
876 copies of the loop body. If we split induction variables, then the code
877 assumes that each induction variable can be represented as a function
878 of its initial value and the loop iteration number. This is not true
879 in this case, because the last preconditioned copy of the loop body
880 could be any iteration from the first up to the `unroll_number-1'th,
881 depending on the initial value of the iteration variable. Therefore
882 we can not split induction variables here, because we can not calculate
883 their value. Hence, this code must occur before find_splittable_regs
884 is called. */
886 if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
888 rtx initial_value, final_value, increment;
889 enum machine_mode mode;
891 if (precondition_loop_p (loop,
892 &initial_value, &final_value, &increment,
893 &mode))
895 rtx diff;
896 rtx *labels;
897 int abs_inc, neg_inc;
898 enum rtx_code cc = loop_info->comparison_code;
899 int less_p = (cc == LE || cc == LEU || cc == LT || cc == LTU);
900 int unsigned_p = (cc == LEU || cc == GEU || cc == LTU || cc == GTU);
902 map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx));
904 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, maxregnum,
905 "unroll_loop_precondition");
906 global_const_equiv_varray = map->const_equiv_varray;
908 init_reg_map (map, maxregnum);
910 /* Limit loop unrolling to 4, since this will make 7 copies of
911 the loop body. */
912 if (unroll_number > 4)
913 unroll_number = 4;
915 /* Save the absolute value of the increment, and also whether or
916 not it is negative. */
917 neg_inc = 0;
918 abs_inc = INTVAL (increment);
919 if (abs_inc < 0)
921 abs_inc = -abs_inc;
922 neg_inc = 1;
925 start_sequence ();
927 /* Final value may have form of (PLUS val1 const1_rtx). We need
928 to convert it into general operand, so compute the real value. */
930 if (GET_CODE (final_value) == PLUS)
932 final_value = expand_simple_binop (mode, PLUS,
933 copy_rtx (XEXP (final_value, 0)),
934 copy_rtx (XEXP (final_value, 1)),
935 NULL_RTX, 0, OPTAB_LIB_WIDEN);
937 if (!nonmemory_operand (final_value, VOIDmode))
938 final_value = force_reg (mode, copy_rtx (final_value));
940 /* Calculate the difference between the final and initial values.
941 Final value may be a (plus (reg x) (const_int 1)) rtx.
942 Let the following cse pass simplify this if initial value is
943 a constant.
945 We must copy the final and initial values here to avoid
946 improperly shared rtl.
948 We have to deal with for (i = 0; --i < 6;) type loops.
949 For such loops the real final value is the first time the
950 loop variable overflows, so the diff we calculate is the
951 distance from the overflow value. This is 0 or ~0 for
952 unsigned loops depending on the direction, or INT_MAX,
953 INT_MAX+1 for signed loops. We really do not need the
954 exact value, since we are only interested in the diff
955 modulo the increment, and the increment is a power of 2,
956 so we can pretend that the overflow value is 0/~0. */
958 if (cc == NE || less_p != neg_inc)
959 diff = expand_simple_binop (mode, MINUS, final_value,
960 copy_rtx (initial_value), NULL_RTX, 0,
961 OPTAB_LIB_WIDEN);
962 else
963 diff = expand_simple_unop (mode, neg_inc ? NOT : NEG,
964 copy_rtx (initial_value), NULL_RTX, 0);
966 /* Now calculate (diff % (unroll * abs (increment))) by using an
967 and instruction. */
968 diff = expand_simple_binop (GET_MODE (diff), AND, diff,
969 GEN_INT (unroll_number * abs_inc - 1),
970 NULL_RTX, 0, OPTAB_LIB_WIDEN);
972 /* Now emit a sequence of branches to jump to the proper precond
973 loop entry point. */
975 labels = (rtx *) xmalloc (sizeof (rtx) * unroll_number);
976 for (i = 0; i < unroll_number; i++)
977 labels[i] = gen_label_rtx ();
979 /* Check for the case where the initial value is greater than or
980 equal to the final value. In that case, we want to execute
981 exactly one loop iteration. The code below will fail for this
982 case. This check does not apply if the loop has a NE
983 comparison at the end. */
985 if (cc != NE)
987 rtx incremented_initval;
988 incremented_initval = expand_simple_binop (mode, PLUS,
989 initial_value,
990 increment,
991 NULL_RTX, 0,
992 OPTAB_LIB_WIDEN);
993 emit_cmp_and_jump_insns (incremented_initval, final_value,
994 less_p ? GE : LE, NULL_RTX,
995 mode, unsigned_p, labels[1]);
996 predict_insn_def (get_last_insn (), PRED_LOOP_CONDITION,
997 TAKEN);
998 JUMP_LABEL (get_last_insn ()) = labels[1];
999 LABEL_NUSES (labels[1])++;
1002 /* Assuming the unroll_number is 4, and the increment is 2, then
1003 for a negative increment: for a positive increment:
1004 diff = 0,1 precond 0 diff = 0,7 precond 0
1005 diff = 2,3 precond 3 diff = 1,2 precond 1
1006 diff = 4,5 precond 2 diff = 3,4 precond 2
1007 diff = 6,7 precond 1 diff = 5,6 precond 3 */
1009 /* We only need to emit (unroll_number - 1) branches here, the
1010 last case just falls through to the following code. */
1012 /* ??? This would give better code if we emitted a tree of branches
1013 instead of the current linear list of branches. */
1015 for (i = 0; i < unroll_number - 1; i++)
1017 int cmp_const;
1018 enum rtx_code cmp_code;
1020 /* For negative increments, must invert the constant compared
1021 against, except when comparing against zero. */
1022 if (i == 0)
1024 cmp_const = 0;
1025 cmp_code = EQ;
1027 else if (neg_inc)
1029 cmp_const = unroll_number - i;
1030 cmp_code = GE;
1032 else
1034 cmp_const = i;
1035 cmp_code = LE;
1038 emit_cmp_and_jump_insns (diff, GEN_INT (abs_inc * cmp_const),
1039 cmp_code, NULL_RTX, mode, 0, labels[i]);
1040 JUMP_LABEL (get_last_insn ()) = labels[i];
1041 LABEL_NUSES (labels[i])++;
1042 predict_insn (get_last_insn (), PRED_LOOP_PRECONDITIONING,
1043 REG_BR_PROB_BASE / (unroll_number - i));
1046 /* If the increment is greater than one, then we need another branch,
1047 to handle other cases equivalent to 0. */
1049 /* ??? This should be merged into the code above somehow to help
1050 simplify the code here, and reduce the number of branches emitted.
1051 For the negative increment case, the branch here could easily
1052 be merged with the `0' case branch above. For the positive
1053 increment case, it is not clear how this can be simplified. */
1055 if (abs_inc != 1)
1057 int cmp_const;
1058 enum rtx_code cmp_code;
1060 if (neg_inc)
1062 cmp_const = abs_inc - 1;
1063 cmp_code = LE;
1065 else
1067 cmp_const = abs_inc * (unroll_number - 1) + 1;
1068 cmp_code = GE;
1071 emit_cmp_and_jump_insns (diff, GEN_INT (cmp_const), cmp_code,
1072 NULL_RTX, mode, 0, labels[0]);
1073 JUMP_LABEL (get_last_insn ()) = labels[0];
1074 LABEL_NUSES (labels[0])++;
1077 sequence = gen_sequence ();
1078 end_sequence ();
1079 loop_insn_hoist (loop, sequence);
1081 /* Only the last copy of the loop body here needs the exit
1082 test, so set copy_end to exclude the compare/branch here,
1083 and then reset it inside the loop when get to the last
1084 copy. */
1086 if (GET_CODE (last_loop_insn) == BARRIER)
1087 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1088 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
1090 copy_end = PREV_INSN (last_loop_insn);
1091 #ifdef HAVE_cc0
1092 /* The immediately preceding insn may be a compare which
1093 we do not want to copy. */
1094 if (sets_cc0_p (PREV_INSN (copy_end)))
1095 copy_end = PREV_INSN (copy_end);
1096 #endif
1098 else
1099 abort ();
1101 for (i = 1; i < unroll_number; i++)
1103 emit_label_after (labels[unroll_number - i],
1104 PREV_INSN (loop_start));
1106 memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx));
1107 memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0),
1108 0, (VARRAY_SIZE (map->const_equiv_varray)
1109 * sizeof (struct const_equiv_data)));
1110 map->const_age = 0;
1112 for (j = 0; j < max_labelno; j++)
1113 if (local_label[j])
1114 set_label_in_map (map, j, gen_label_rtx ());
1116 for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++)
1117 if (local_regno[r])
1119 map->reg_map[r]
1120 = gen_reg_rtx (GET_MODE (regno_reg_rtx[r]));
1121 record_base_value (REGNO (map->reg_map[r]),
1122 regno_reg_rtx[r], 0);
1124 /* The last copy needs the compare/branch insns at the end,
1125 so reset copy_end here if the loop ends with a conditional
1126 branch. */
1128 if (i == unroll_number - 1)
1130 if (GET_CODE (last_loop_insn) == BARRIER)
1131 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1132 else
1133 copy_end = last_loop_insn;
1136 /* None of the copies are the `last_iteration', so just
1137 pass zero for that parameter. */
1138 copy_loop_body (loop, copy_start, copy_end, map, exit_label, 0,
1139 unroll_type, start_label, loop_end,
1140 loop_start, copy_end);
1142 emit_label_after (labels[0], PREV_INSN (loop_start));
1144 if (GET_CODE (last_loop_insn) == BARRIER)
1146 insert_before = PREV_INSN (last_loop_insn);
1147 copy_end = PREV_INSN (insert_before);
1149 else
1151 insert_before = last_loop_insn;
1152 #ifdef HAVE_cc0
1153 /* The instruction immediately before the JUMP_INSN may
1154 be a compare instruction which we do not want to copy
1155 or delete. */
1156 if (sets_cc0_p (PREV_INSN (insert_before)))
1157 insert_before = PREV_INSN (insert_before);
1158 #endif
1159 copy_end = PREV_INSN (insert_before);
1162 /* Set unroll type to MODULO now. */
1163 unroll_type = UNROLL_MODULO;
1164 loop_preconditioned = 1;
1166 /* Clean up. */
1167 free (labels);
1171 /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
1172 the loop unless all loops are being unrolled. */
1173 if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
1175 if (loop_dump_stream)
1176 fprintf (loop_dump_stream,
1177 "Unrolling failure: Naive unrolling not being done.\n");
1178 goto egress;
1181 /* At this point, we are guaranteed to unroll the loop. */
1183 /* Keep track of the unroll factor for the loop. */
1184 loop_info->unroll_number = unroll_number;
1186 /* For each biv and giv, determine whether it can be safely split into
1187 a different variable for each unrolled copy of the loop body.
1188 We precalculate and save this info here, since computing it is
1189 expensive.
1191 Do this before deleting any instructions from the loop, so that
1192 back_branch_in_range_p will work correctly. */
1194 if (splitting_not_safe)
1195 temp = 0;
1196 else
1197 temp = find_splittable_regs (loop, unroll_type, unroll_number);
1199 /* find_splittable_regs may have created some new registers, so must
1200 reallocate the reg_map with the new larger size, and must realloc
1201 the constant maps also. */
1203 maxregnum = max_reg_num ();
1204 map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx));
1206 init_reg_map (map, maxregnum);
1208 if (map->const_equiv_varray == 0)
1209 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray,
1210 maxregnum + temp * unroll_number * 2,
1211 "unroll_loop");
1212 global_const_equiv_varray = map->const_equiv_varray;
1214 /* Search the list of bivs and givs to find ones which need to be remapped
1215 when split, and set their reg_map entry appropriately. */
1217 for (bl = ivs->list; bl; bl = bl->next)
1219 if (REGNO (bl->biv->src_reg) != bl->regno)
1220 map->reg_map[bl->regno] = bl->biv->src_reg;
1221 #if 0
1222 /* Currently, non-reduced/final-value givs are never split. */
1223 for (v = bl->giv; v; v = v->next_iv)
1224 if (REGNO (v->src_reg) != bl->regno)
1225 map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
1226 #endif
1229 /* Use our current register alignment and pointer flags. */
1230 map->regno_pointer_align = cfun->emit->regno_pointer_align;
1231 map->x_regno_reg_rtx = cfun->emit->x_regno_reg_rtx;
1233 /* If the loop is being partially unrolled, and the iteration variables
1234 are being split, and are being renamed for the split, then must fix up
1235 the compare/jump instruction at the end of the loop to refer to the new
1236 registers. This compare isn't copied, so the registers used in it
1237 will never be replaced if it isn't done here. */
1239 if (unroll_type == UNROLL_MODULO)
1241 insn = NEXT_INSN (copy_end);
1242 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
1243 PATTERN (insn) = remap_split_bivs (loop, PATTERN (insn));
1246 /* For unroll_number times, make a copy of each instruction
1247 between copy_start and copy_end, and insert these new instructions
1248 before the end of the loop. */
1250 for (i = 0; i < unroll_number; i++)
1252 memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx));
1253 memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0), 0,
1254 VARRAY_SIZE (map->const_equiv_varray) * sizeof (struct const_equiv_data));
1255 map->const_age = 0;
1257 for (j = 0; j < max_labelno; j++)
1258 if (local_label[j])
1259 set_label_in_map (map, j, gen_label_rtx ());
1261 for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++)
1262 if (local_regno[r])
1264 map->reg_map[r] = gen_reg_rtx (GET_MODE (regno_reg_rtx[r]));
1265 record_base_value (REGNO (map->reg_map[r]),
1266 regno_reg_rtx[r], 0);
1269 /* If loop starts with a branch to the test, then fix it so that
1270 it points to the test of the first unrolled copy of the loop. */
1271 if (i == 0 && loop_start != copy_start)
1273 insn = PREV_INSN (copy_start);
1274 pattern = PATTERN (insn);
1276 tem = get_label_from_map (map,
1277 CODE_LABEL_NUMBER
1278 (XEXP (SET_SRC (pattern), 0)));
1279 SET_SRC (pattern) = gen_rtx_LABEL_REF (VOIDmode, tem);
1281 /* Set the jump label so that it can be used by later loop unrolling
1282 passes. */
1283 JUMP_LABEL (insn) = tem;
1284 LABEL_NUSES (tem)++;
1287 copy_loop_body (loop, copy_start, copy_end, map, exit_label,
1288 i == unroll_number - 1, unroll_type, start_label,
1289 loop_end, insert_before, insert_before);
1292 /* Before deleting any insns, emit a CODE_LABEL immediately after the last
1293 insn to be deleted. This prevents any runaway delete_insn call from
1294 more insns that it should, as it always stops at a CODE_LABEL. */
1296 /* Delete the compare and branch at the end of the loop if completely
1297 unrolling the loop. Deleting the backward branch at the end also
1298 deletes the code label at the start of the loop. This is done at
1299 the very end to avoid problems with back_branch_in_range_p. */
1301 if (unroll_type == UNROLL_COMPLETELY)
1302 safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
1303 else
1304 safety_label = emit_label_after (gen_label_rtx (), copy_end);
1306 /* Delete all of the original loop instructions. Don't delete the
1307 LOOP_BEG note, or the first code label in the loop. */
1309 insn = NEXT_INSN (copy_start);
1310 while (insn != safety_label)
1312 /* ??? Don't delete named code labels. They will be deleted when the
1313 jump that references them is deleted. Otherwise, we end up deleting
1314 them twice, which causes them to completely disappear instead of turn
1315 into NOTE_INSN_DELETED_LABEL notes. This in turn causes aborts in
1316 dwarfout.c/dwarf2out.c. We could perhaps fix the dwarf*out.c files
1317 to handle deleted labels instead. Or perhaps fix DECL_RTL of the
1318 associated LABEL_DECL to point to one of the new label instances. */
1319 /* ??? Likewise, we can't delete a NOTE_INSN_DELETED_LABEL note. */
1320 if (insn != start_label
1321 && ! (GET_CODE (insn) == CODE_LABEL && LABEL_NAME (insn))
1322 && ! (GET_CODE (insn) == NOTE
1323 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))
1324 insn = delete_related_insns (insn);
1325 else
1326 insn = NEXT_INSN (insn);
1329 /* Can now delete the 'safety' label emitted to protect us from runaway
1330 delete_related_insns calls. */
1331 if (INSN_DELETED_P (safety_label))
1332 abort ();
1333 delete_related_insns (safety_label);
1335 /* If exit_label exists, emit it after the loop. Doing the emit here
1336 forces it to have a higher INSN_UID than any insn in the unrolled loop.
1337 This is needed so that mostly_true_jump in reorg.c will treat jumps
1338 to this loop end label correctly, i.e. predict that they are usually
1339 not taken. */
1340 if (exit_label)
1341 emit_label_after (exit_label, loop_end);
1343 egress:
1344 if (unroll_type == UNROLL_COMPLETELY)
1346 /* Remove the loop notes since this is no longer a loop. */
1347 if (loop->vtop)
1348 delete_related_insns (loop->vtop);
1349 if (loop->cont)
1350 delete_related_insns (loop->cont);
1351 if (loop_start)
1352 delete_related_insns (loop_start);
1353 if (loop_end)
1354 delete_related_insns (loop_end);
1357 if (map->const_equiv_varray)
1358 VARRAY_FREE (map->const_equiv_varray);
1359 if (map->label_map)
1361 free (map->label_map);
1362 free (local_label);
1364 free (map->insn_map);
1365 free (splittable_regs);
1366 free (splittable_regs_updates);
1367 free (addr_combined_regs);
1368 free (local_regno);
1369 if (map->reg_map)
1370 free (map->reg_map);
1371 free (map);
1374 /* Return true if the loop can be safely, and profitably, preconditioned
1375 so that the unrolled copies of the loop body don't need exit tests.
1377 This only works if final_value, initial_value and increment can be
1378 determined, and if increment is a constant power of 2.
1379 If increment is not a power of 2, then the preconditioning modulo
1380 operation would require a real modulo instead of a boolean AND, and this
1381 is not considered `profitable'. */
1383 /* ??? If the loop is known to be executed very many times, or the machine
1384 has a very cheap divide instruction, then preconditioning is a win even
1385 when the increment is not a power of 2. Use RTX_COST to compute
1386 whether divide is cheap.
1387 ??? A divide by constant doesn't actually need a divide, look at
1388 expand_divmod. The reduced cost of this optimized modulo is not
1389 reflected in RTX_COST. */
1392 precondition_loop_p (loop, initial_value, final_value, increment, mode)
1393 const struct loop *loop;
1394 rtx *initial_value, *final_value, *increment;
1395 enum machine_mode *mode;
1397 rtx loop_start = loop->start;
1398 struct loop_info *loop_info = LOOP_INFO (loop);
1400 if (loop_info->n_iterations > 0)
1402 if (INTVAL (loop_info->increment) > 0)
1404 *initial_value = const0_rtx;
1405 *increment = const1_rtx;
1406 *final_value = GEN_INT (loop_info->n_iterations);
1408 else
1410 *initial_value = GEN_INT (loop_info->n_iterations);
1411 *increment = constm1_rtx;
1412 *final_value = const0_rtx;
1414 *mode = word_mode;
1416 if (loop_dump_stream)
1418 fputs ("Preconditioning: Success, number of iterations known, ",
1419 loop_dump_stream);
1420 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
1421 loop_info->n_iterations);
1422 fputs (".\n", loop_dump_stream);
1424 return 1;
1427 if (loop_info->iteration_var == 0)
1429 if (loop_dump_stream)
1430 fprintf (loop_dump_stream,
1431 "Preconditioning: Could not find iteration variable.\n");
1432 return 0;
1434 else if (loop_info->initial_value == 0)
1436 if (loop_dump_stream)
1437 fprintf (loop_dump_stream,
1438 "Preconditioning: Could not find initial value.\n");
1439 return 0;
1441 else if (loop_info->increment == 0)
1443 if (loop_dump_stream)
1444 fprintf (loop_dump_stream,
1445 "Preconditioning: Could not find increment value.\n");
1446 return 0;
1448 else if (GET_CODE (loop_info->increment) != CONST_INT)
1450 if (loop_dump_stream)
1451 fprintf (loop_dump_stream,
1452 "Preconditioning: Increment not a constant.\n");
1453 return 0;
1455 else if ((exact_log2 (INTVAL (loop_info->increment)) < 0)
1456 && (exact_log2 (-INTVAL (loop_info->increment)) < 0))
1458 if (loop_dump_stream)
1459 fprintf (loop_dump_stream,
1460 "Preconditioning: Increment not a constant power of 2.\n");
1461 return 0;
1464 /* Unsigned_compare and compare_dir can be ignored here, since they do
1465 not matter for preconditioning. */
1467 if (loop_info->final_value == 0)
1469 if (loop_dump_stream)
1470 fprintf (loop_dump_stream,
1471 "Preconditioning: EQ comparison loop.\n");
1472 return 0;
1475 /* Must ensure that final_value is invariant, so call
1476 loop_invariant_p to check. Before doing so, must check regno
1477 against max_reg_before_loop to make sure that the register is in
1478 the range covered by loop_invariant_p. If it isn't, then it is
1479 most likely a biv/giv which by definition are not invariant. */
1480 if ((GET_CODE (loop_info->final_value) == REG
1481 && REGNO (loop_info->final_value) >= max_reg_before_loop)
1482 || (GET_CODE (loop_info->final_value) == PLUS
1483 && REGNO (XEXP (loop_info->final_value, 0)) >= max_reg_before_loop)
1484 || ! loop_invariant_p (loop, loop_info->final_value))
1486 if (loop_dump_stream)
1487 fprintf (loop_dump_stream,
1488 "Preconditioning: Final value not invariant.\n");
1489 return 0;
1492 /* Fail for floating point values, since the caller of this function
1493 does not have code to deal with them. */
1494 if (GET_MODE_CLASS (GET_MODE (loop_info->final_value)) == MODE_FLOAT
1495 || GET_MODE_CLASS (GET_MODE (loop_info->initial_value)) == MODE_FLOAT)
1497 if (loop_dump_stream)
1498 fprintf (loop_dump_stream,
1499 "Preconditioning: Floating point final or initial value.\n");
1500 return 0;
1503 /* Fail if loop_info->iteration_var is not live before loop_start,
1504 since we need to test its value in the preconditioning code. */
1506 if (REGNO_FIRST_LUID (REGNO (loop_info->iteration_var))
1507 > INSN_LUID (loop_start))
1509 if (loop_dump_stream)
1510 fprintf (loop_dump_stream,
1511 "Preconditioning: Iteration var not live before loop start.\n");
1512 return 0;
1515 /* Note that loop_iterations biases the initial value for GIV iterators
1516 such as "while (i-- > 0)" so that we can calculate the number of
1517 iterations just like for BIV iterators.
1519 Also note that the absolute values of initial_value and
1520 final_value are unimportant as only their difference is used for
1521 calculating the number of loop iterations. */
1522 *initial_value = loop_info->initial_value;
1523 *increment = loop_info->increment;
1524 *final_value = loop_info->final_value;
1526 /* Decide what mode to do these calculations in. Choose the larger
1527 of final_value's mode and initial_value's mode, or a full-word if
1528 both are constants. */
1529 *mode = GET_MODE (*final_value);
1530 if (*mode == VOIDmode)
1532 *mode = GET_MODE (*initial_value);
1533 if (*mode == VOIDmode)
1534 *mode = word_mode;
1536 else if (*mode != GET_MODE (*initial_value)
1537 && (GET_MODE_SIZE (*mode)
1538 < GET_MODE_SIZE (GET_MODE (*initial_value))))
1539 *mode = GET_MODE (*initial_value);
1541 /* Success! */
1542 if (loop_dump_stream)
1543 fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
1544 return 1;
1547 /* All pseudo-registers must be mapped to themselves. Two hard registers
1548 must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
1549 REGNUM, to avoid function-inlining specific conversions of these
1550 registers. All other hard regs can not be mapped because they may be
1551 used with different
1552 modes. */
1554 static void
1555 init_reg_map (map, maxregnum)
1556 struct inline_remap *map;
1557 int maxregnum;
1559 int i;
1561 for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
1562 map->reg_map[i] = regno_reg_rtx[i];
1563 /* Just clear the rest of the entries. */
1564 for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
1565 map->reg_map[i] = 0;
1567 map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
1568 = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
1569 map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
1570 = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
1573 /* Strength-reduction will often emit code for optimized biv/givs which
1574 calculates their value in a temporary register, and then copies the result
1575 to the iv. This procedure reconstructs the pattern computing the iv;
1576 verifying that all operands are of the proper form.
1578 PATTERN must be the result of single_set.
1579 The return value is the amount that the giv is incremented by. */
1581 static rtx
1582 calculate_giv_inc (pattern, src_insn, regno)
1583 rtx pattern, src_insn;
1584 unsigned int regno;
1586 rtx increment;
1587 rtx increment_total = 0;
1588 int tries = 0;
1590 retry:
1591 /* Verify that we have an increment insn here. First check for a plus
1592 as the set source. */
1593 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1595 /* SR sometimes computes the new giv value in a temp, then copies it
1596 to the new_reg. */
1597 src_insn = PREV_INSN (src_insn);
1598 pattern = single_set (src_insn);
1599 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1600 abort ();
1602 /* The last insn emitted is not needed, so delete it to avoid confusing
1603 the second cse pass. This insn sets the giv unnecessarily. */
1604 delete_related_insns (get_last_insn ());
1607 /* Verify that we have a constant as the second operand of the plus. */
1608 increment = XEXP (SET_SRC (pattern), 1);
1609 if (GET_CODE (increment) != CONST_INT)
1611 /* SR sometimes puts the constant in a register, especially if it is
1612 too big to be an add immed operand. */
1613 increment = find_last_value (increment, &src_insn, NULL_RTX, 0);
1615 /* SR may have used LO_SUM to compute the constant if it is too large
1616 for a load immed operand. In this case, the constant is in operand
1617 one of the LO_SUM rtx. */
1618 if (GET_CODE (increment) == LO_SUM)
1619 increment = XEXP (increment, 1);
1621 /* Some ports store large constants in memory and add a REG_EQUAL
1622 note to the store insn. */
1623 else if (GET_CODE (increment) == MEM)
1625 rtx note = find_reg_note (src_insn, REG_EQUAL, 0);
1626 if (note)
1627 increment = XEXP (note, 0);
1630 else if (GET_CODE (increment) == IOR
1631 || GET_CODE (increment) == ASHIFT
1632 || GET_CODE (increment) == PLUS)
1634 /* The rs6000 port loads some constants with IOR.
1635 The alpha port loads some constants with ASHIFT and PLUS. */
1636 rtx second_part = XEXP (increment, 1);
1637 enum rtx_code code = GET_CODE (increment);
1639 increment = find_last_value (XEXP (increment, 0),
1640 &src_insn, NULL_RTX, 0);
1641 /* Don't need the last insn anymore. */
1642 delete_related_insns (get_last_insn ());
1644 if (GET_CODE (second_part) != CONST_INT
1645 || GET_CODE (increment) != CONST_INT)
1646 abort ();
1648 if (code == IOR)
1649 increment = GEN_INT (INTVAL (increment) | INTVAL (second_part));
1650 else if (code == PLUS)
1651 increment = GEN_INT (INTVAL (increment) + INTVAL (second_part));
1652 else
1653 increment = GEN_INT (INTVAL (increment) << INTVAL (second_part));
1656 if (GET_CODE (increment) != CONST_INT)
1657 abort ();
1659 /* The insn loading the constant into a register is no longer needed,
1660 so delete it. */
1661 delete_related_insns (get_last_insn ());
1664 if (increment_total)
1665 increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment));
1666 else
1667 increment_total = increment;
1669 /* Check that the source register is the same as the register we expected
1670 to see as the source. If not, something is seriously wrong. */
1671 if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
1672 || REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
1674 /* Some machines (e.g. the romp), may emit two add instructions for
1675 certain constants, so lets try looking for another add immediately
1676 before this one if we have only seen one add insn so far. */
1678 if (tries == 0)
1680 tries++;
1682 src_insn = PREV_INSN (src_insn);
1683 pattern = single_set (src_insn);
1685 delete_related_insns (get_last_insn ());
1687 goto retry;
1690 abort ();
1693 return increment_total;
1696 /* Copy REG_NOTES, except for insn references, because not all insn_map
1697 entries are valid yet. We do need to copy registers now though, because
1698 the reg_map entries can change during copying. */
1700 static rtx
1701 initial_reg_note_copy (notes, map)
1702 rtx notes;
1703 struct inline_remap *map;
1705 rtx copy;
1707 if (notes == 0)
1708 return 0;
1710 copy = rtx_alloc (GET_CODE (notes));
1711 PUT_REG_NOTE_KIND (copy, REG_NOTE_KIND (notes));
1713 if (GET_CODE (notes) == EXPR_LIST)
1714 XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map, 0);
1715 else if (GET_CODE (notes) == INSN_LIST)
1716 /* Don't substitute for these yet. */
1717 XEXP (copy, 0) = copy_rtx (XEXP (notes, 0));
1718 else
1719 abort ();
1721 XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map);
1723 return copy;
1726 /* Fixup insn references in copied REG_NOTES. */
1728 static void
1729 final_reg_note_copy (notesp, map)
1730 rtx *notesp;
1731 struct inline_remap *map;
1733 while (*notesp)
1735 rtx note = *notesp;
1737 if (GET_CODE (note) == INSN_LIST)
1739 /* Sometimes, we have a REG_WAS_0 note that points to a
1740 deleted instruction. In that case, we can just delete the
1741 note. */
1742 if (REG_NOTE_KIND (note) == REG_WAS_0)
1744 *notesp = XEXP (note, 1);
1745 continue;
1747 else
1749 rtx insn = map->insn_map[INSN_UID (XEXP (note, 0))];
1751 /* If we failed to remap the note, something is awry.
1752 Allow REG_LABEL as it may reference label outside
1753 the unrolled loop. */
1754 if (!insn)
1756 if (REG_NOTE_KIND (note) != REG_LABEL)
1757 abort ();
1759 else
1760 XEXP (note, 0) = insn;
1764 notesp = &XEXP (note, 1);
1768 /* Copy each instruction in the loop, substituting from map as appropriate.
1769 This is very similar to a loop in expand_inline_function. */
1771 static void
1772 copy_loop_body (loop, copy_start, copy_end, map, exit_label, last_iteration,
1773 unroll_type, start_label, loop_end, insert_before,
1774 copy_notes_from)
1775 struct loop *loop;
1776 rtx copy_start, copy_end;
1777 struct inline_remap *map;
1778 rtx exit_label;
1779 int last_iteration;
1780 enum unroll_types unroll_type;
1781 rtx start_label, loop_end, insert_before, copy_notes_from;
1783 struct loop_ivs *ivs = LOOP_IVS (loop);
1784 rtx insn, pattern;
1785 rtx set, tem, copy = NULL_RTX;
1786 int dest_reg_was_split, i;
1787 #ifdef HAVE_cc0
1788 rtx cc0_insn = 0;
1789 #endif
1790 rtx final_label = 0;
1791 rtx giv_inc, giv_dest_reg, giv_src_reg;
1793 /* If this isn't the last iteration, then map any references to the
1794 start_label to final_label. Final label will then be emitted immediately
1795 after the end of this loop body if it was ever used.
1797 If this is the last iteration, then map references to the start_label
1798 to itself. */
1799 if (! last_iteration)
1801 final_label = gen_label_rtx ();
1802 set_label_in_map (map, CODE_LABEL_NUMBER (start_label), final_label);
1804 else
1805 set_label_in_map (map, CODE_LABEL_NUMBER (start_label), start_label);
1807 start_sequence ();
1809 /* Emit a NOTE_INSN_DELETED to force at least two insns onto the sequence.
1810 Else gen_sequence could return a raw pattern for a jump which we pass
1811 off to emit_insn_before (instead of emit_jump_insn_before) which causes
1812 a variety of losing behaviors later. */
1813 emit_note (0, NOTE_INSN_DELETED);
1815 insn = copy_start;
1818 insn = NEXT_INSN (insn);
1820 map->orig_asm_operands_vector = 0;
1822 switch (GET_CODE (insn))
1824 case INSN:
1825 pattern = PATTERN (insn);
1826 copy = 0;
1827 giv_inc = 0;
1829 /* Check to see if this is a giv that has been combined with
1830 some split address givs. (Combined in the sense that
1831 `combine_givs' in loop.c has put two givs in the same register.)
1832 In this case, we must search all givs based on the same biv to
1833 find the address givs. Then split the address givs.
1834 Do this before splitting the giv, since that may map the
1835 SET_DEST to a new register. */
1837 if ((set = single_set (insn))
1838 && GET_CODE (SET_DEST (set)) == REG
1839 && addr_combined_regs[REGNO (SET_DEST (set))])
1841 struct iv_class *bl;
1842 struct induction *v, *tv;
1843 unsigned int regno = REGNO (SET_DEST (set));
1845 v = addr_combined_regs[REGNO (SET_DEST (set))];
1846 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
1848 /* Although the giv_inc amount is not needed here, we must call
1849 calculate_giv_inc here since it might try to delete the
1850 last insn emitted. If we wait until later to call it,
1851 we might accidentally delete insns generated immediately
1852 below by emit_unrolled_add. */
1854 giv_inc = calculate_giv_inc (set, insn, regno);
1856 /* Now find all address giv's that were combined with this
1857 giv 'v'. */
1858 for (tv = bl->giv; tv; tv = tv->next_iv)
1859 if (tv->giv_type == DEST_ADDR && tv->same == v)
1861 int this_giv_inc;
1863 /* If this DEST_ADDR giv was not split, then ignore it. */
1864 if (*tv->location != tv->dest_reg)
1865 continue;
1867 /* Scale this_giv_inc if the multiplicative factors of
1868 the two givs are different. */
1869 this_giv_inc = INTVAL (giv_inc);
1870 if (tv->mult_val != v->mult_val)
1871 this_giv_inc = (this_giv_inc / INTVAL (v->mult_val)
1872 * INTVAL (tv->mult_val));
1874 tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc);
1875 *tv->location = tv->dest_reg;
1877 if (last_iteration && unroll_type != UNROLL_COMPLETELY)
1879 /* Must emit an insn to increment the split address
1880 giv. Add in the const_adjust field in case there
1881 was a constant eliminated from the address. */
1882 rtx value, dest_reg;
1884 /* tv->dest_reg will be either a bare register,
1885 or else a register plus a constant. */
1886 if (GET_CODE (tv->dest_reg) == REG)
1887 dest_reg = tv->dest_reg;
1888 else
1889 dest_reg = XEXP (tv->dest_reg, 0);
1891 /* Check for shared address givs, and avoid
1892 incrementing the shared pseudo reg more than
1893 once. */
1894 if (! tv->same_insn && ! tv->shared)
1896 /* tv->dest_reg may actually be a (PLUS (REG)
1897 (CONST)) here, so we must call plus_constant
1898 to add the const_adjust amount before calling
1899 emit_unrolled_add below. */
1900 value = plus_constant (tv->dest_reg,
1901 tv->const_adjust);
1903 if (GET_CODE (value) == PLUS)
1905 /* The constant could be too large for an add
1906 immediate, so can't directly emit an insn
1907 here. */
1908 emit_unrolled_add (dest_reg, XEXP (value, 0),
1909 XEXP (value, 1));
1913 /* Reset the giv to be just the register again, in case
1914 it is used after the set we have just emitted.
1915 We must subtract the const_adjust factor added in
1916 above. */
1917 tv->dest_reg = plus_constant (dest_reg,
1918 -tv->const_adjust);
1919 *tv->location = tv->dest_reg;
1924 /* If this is a setting of a splittable variable, then determine
1925 how to split the variable, create a new set based on this split,
1926 and set up the reg_map so that later uses of the variable will
1927 use the new split variable. */
1929 dest_reg_was_split = 0;
1931 if ((set = single_set (insn))
1932 && GET_CODE (SET_DEST (set)) == REG
1933 && splittable_regs[REGNO (SET_DEST (set))])
1935 unsigned int regno = REGNO (SET_DEST (set));
1936 unsigned int src_regno;
1938 dest_reg_was_split = 1;
1940 giv_dest_reg = SET_DEST (set);
1941 giv_src_reg = giv_dest_reg;
1942 /* Compute the increment value for the giv, if it wasn't
1943 already computed above. */
1944 if (giv_inc == 0)
1945 giv_inc = calculate_giv_inc (set, insn, regno);
1947 src_regno = REGNO (giv_src_reg);
1949 if (unroll_type == UNROLL_COMPLETELY)
1951 /* Completely unrolling the loop. Set the induction
1952 variable to a known constant value. */
1954 /* The value in splittable_regs may be an invariant
1955 value, so we must use plus_constant here. */
1956 splittable_regs[regno]
1957 = plus_constant (splittable_regs[src_regno],
1958 INTVAL (giv_inc));
1960 if (GET_CODE (splittable_regs[regno]) == PLUS)
1962 giv_src_reg = XEXP (splittable_regs[regno], 0);
1963 giv_inc = XEXP (splittable_regs[regno], 1);
1965 else
1967 /* The splittable_regs value must be a REG or a
1968 CONST_INT, so put the entire value in the giv_src_reg
1969 variable. */
1970 giv_src_reg = splittable_regs[regno];
1971 giv_inc = const0_rtx;
1974 else
1976 /* Partially unrolling loop. Create a new pseudo
1977 register for the iteration variable, and set it to
1978 be a constant plus the original register. Except
1979 on the last iteration, when the result has to
1980 go back into the original iteration var register. */
1982 /* Handle bivs which must be mapped to a new register
1983 when split. This happens for bivs which need their
1984 final value set before loop entry. The new register
1985 for the biv was stored in the biv's first struct
1986 induction entry by find_splittable_regs. */
1988 if (regno < ivs->n_regs
1989 && REG_IV_TYPE (ivs, regno) == BASIC_INDUCT)
1991 giv_src_reg = REG_IV_CLASS (ivs, regno)->biv->src_reg;
1992 giv_dest_reg = giv_src_reg;
1995 #if 0
1996 /* If non-reduced/final-value givs were split, then
1997 this would have to remap those givs also. See
1998 find_splittable_regs. */
1999 #endif
2001 splittable_regs[regno]
2002 = simplify_gen_binary (PLUS, GET_MODE (giv_src_reg),
2003 giv_inc,
2004 splittable_regs[src_regno]);
2005 giv_inc = splittable_regs[regno];
2007 /* Now split the induction variable by changing the dest
2008 of this insn to a new register, and setting its
2009 reg_map entry to point to this new register.
2011 If this is the last iteration, and this is the last insn
2012 that will update the iv, then reuse the original dest,
2013 to ensure that the iv will have the proper value when
2014 the loop exits or repeats.
2016 Using splittable_regs_updates here like this is safe,
2017 because it can only be greater than one if all
2018 instructions modifying the iv are always executed in
2019 order. */
2021 if (! last_iteration
2022 || (splittable_regs_updates[regno]-- != 1))
2024 tem = gen_reg_rtx (GET_MODE (giv_src_reg));
2025 giv_dest_reg = tem;
2026 map->reg_map[regno] = tem;
2027 record_base_value (REGNO (tem),
2028 giv_inc == const0_rtx
2029 ? giv_src_reg
2030 : gen_rtx_PLUS (GET_MODE (giv_src_reg),
2031 giv_src_reg, giv_inc),
2034 else
2035 map->reg_map[regno] = giv_src_reg;
2038 /* The constant being added could be too large for an add
2039 immediate, so can't directly emit an insn here. */
2040 emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
2041 copy = get_last_insn ();
2042 pattern = PATTERN (copy);
2044 else
2046 pattern = copy_rtx_and_substitute (pattern, map, 0);
2047 copy = emit_insn (pattern);
2049 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2051 #ifdef HAVE_cc0
2052 /* If this insn is setting CC0, it may need to look at
2053 the insn that uses CC0 to see what type of insn it is.
2054 In that case, the call to recog via validate_change will
2055 fail. So don't substitute constants here. Instead,
2056 do it when we emit the following insn.
2058 For example, see the pyr.md file. That machine has signed and
2059 unsigned compares. The compare patterns must check the
2060 following branch insn to see which what kind of compare to
2061 emit.
2063 If the previous insn set CC0, substitute constants on it as
2064 well. */
2065 if (sets_cc0_p (PATTERN (copy)) != 0)
2066 cc0_insn = copy;
2067 else
2069 if (cc0_insn)
2070 try_constants (cc0_insn, map);
2071 cc0_insn = 0;
2072 try_constants (copy, map);
2074 #else
2075 try_constants (copy, map);
2076 #endif
2078 /* Make split induction variable constants `permanent' since we
2079 know there are no backward branches across iteration variable
2080 settings which would invalidate this. */
2081 if (dest_reg_was_split)
2083 int regno = REGNO (SET_DEST (set));
2085 if ((size_t) regno < VARRAY_SIZE (map->const_equiv_varray)
2086 && (VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age
2087 == map->const_age))
2088 VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age = -1;
2090 break;
2092 case JUMP_INSN:
2093 pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0);
2094 copy = emit_jump_insn (pattern);
2095 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2097 if (JUMP_LABEL (insn))
2099 JUMP_LABEL (copy) = get_label_from_map (map,
2100 CODE_LABEL_NUMBER
2101 (JUMP_LABEL (insn)));
2102 LABEL_NUSES (JUMP_LABEL (copy))++;
2104 if (JUMP_LABEL (insn) == start_label && insn == copy_end
2105 && ! last_iteration)
2108 /* This is a branch to the beginning of the loop; this is the
2109 last insn being copied; and this is not the last iteration.
2110 In this case, we want to change the original fall through
2111 case to be a branch past the end of the loop, and the
2112 original jump label case to fall_through. */
2114 if (!invert_jump (copy, exit_label, 0))
2116 rtx jmp;
2117 rtx lab = gen_label_rtx ();
2118 /* Can't do it by reversing the jump (probably because we
2119 couldn't reverse the conditions), so emit a new
2120 jump_insn after COPY, and redirect the jump around
2121 that. */
2122 jmp = emit_jump_insn_after (gen_jump (exit_label), copy);
2123 JUMP_LABEL (jmp) = exit_label;
2124 LABEL_NUSES (exit_label)++;
2125 jmp = emit_barrier_after (jmp);
2126 emit_label_after (lab, jmp);
2127 LABEL_NUSES (lab) = 0;
2128 if (!redirect_jump (copy, lab, 0))
2129 abort ();
2133 #ifdef HAVE_cc0
2134 if (cc0_insn)
2135 try_constants (cc0_insn, map);
2136 cc0_insn = 0;
2137 #endif
2138 try_constants (copy, map);
2140 /* Set the jump label of COPY correctly to avoid problems with
2141 later passes of unroll_loop, if INSN had jump label set. */
2142 if (JUMP_LABEL (insn))
2144 rtx label = 0;
2146 /* Can't use the label_map for every insn, since this may be
2147 the backward branch, and hence the label was not mapped. */
2148 if ((set = single_set (copy)))
2150 tem = SET_SRC (set);
2151 if (GET_CODE (tem) == LABEL_REF)
2152 label = XEXP (tem, 0);
2153 else if (GET_CODE (tem) == IF_THEN_ELSE)
2155 if (XEXP (tem, 1) != pc_rtx)
2156 label = XEXP (XEXP (tem, 1), 0);
2157 else
2158 label = XEXP (XEXP (tem, 2), 0);
2162 if (label && GET_CODE (label) == CODE_LABEL)
2163 JUMP_LABEL (copy) = label;
2164 else
2166 /* An unrecognizable jump insn, probably the entry jump
2167 for a switch statement. This label must have been mapped,
2168 so just use the label_map to get the new jump label. */
2169 JUMP_LABEL (copy)
2170 = get_label_from_map (map,
2171 CODE_LABEL_NUMBER (JUMP_LABEL (insn)));
2174 /* If this is a non-local jump, then must increase the label
2175 use count so that the label will not be deleted when the
2176 original jump is deleted. */
2177 LABEL_NUSES (JUMP_LABEL (copy))++;
2179 else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
2180 || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
2182 rtx pat = PATTERN (copy);
2183 int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
2184 int len = XVECLEN (pat, diff_vec_p);
2185 int i;
2187 for (i = 0; i < len; i++)
2188 LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
2191 /* If this used to be a conditional jump insn but whose branch
2192 direction is now known, we must do something special. */
2193 if (any_condjump_p (insn) && onlyjump_p (insn) && map->last_pc_value)
2195 #ifdef HAVE_cc0
2196 /* If the previous insn set cc0 for us, delete it. */
2197 if (only_sets_cc0_p (PREV_INSN (copy)))
2198 delete_related_insns (PREV_INSN (copy));
2199 #endif
2201 /* If this is now a no-op, delete it. */
2202 if (map->last_pc_value == pc_rtx)
2204 delete_insn (copy);
2205 copy = 0;
2207 else
2208 /* Otherwise, this is unconditional jump so we must put a
2209 BARRIER after it. We could do some dead code elimination
2210 here, but jump.c will do it just as well. */
2211 emit_barrier ();
2213 break;
2215 case CALL_INSN:
2216 pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0);
2217 copy = emit_call_insn (pattern);
2218 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2219 SIBLING_CALL_P (copy) = SIBLING_CALL_P (insn);
2221 /* Because the USAGE information potentially contains objects other
2222 than hard registers, we need to copy it. */
2223 CALL_INSN_FUNCTION_USAGE (copy)
2224 = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn),
2225 map, 0);
2227 #ifdef HAVE_cc0
2228 if (cc0_insn)
2229 try_constants (cc0_insn, map);
2230 cc0_insn = 0;
2231 #endif
2232 try_constants (copy, map);
2234 /* Be lazy and assume CALL_INSNs clobber all hard registers. */
2235 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2236 VARRAY_CONST_EQUIV (map->const_equiv_varray, i).rtx = 0;
2237 break;
2239 case CODE_LABEL:
2240 /* If this is the loop start label, then we don't need to emit a
2241 copy of this label since no one will use it. */
2243 if (insn != start_label)
2245 copy = emit_label (get_label_from_map (map,
2246 CODE_LABEL_NUMBER (insn)));
2247 map->const_age++;
2249 break;
2251 case BARRIER:
2252 copy = emit_barrier ();
2253 break;
2255 case NOTE:
2256 /* VTOP and CONT notes are valid only before the loop exit test.
2257 If placed anywhere else, loop may generate bad code. */
2258 /* BASIC_BLOCK notes exist to stabilize basic block structures with
2259 the associated rtl. We do not want to share the structure in
2260 this new block. */
2262 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2263 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL
2264 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK
2265 && ((NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
2266 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT)
2267 || (last_iteration && unroll_type != UNROLL_COMPLETELY)))
2268 copy = emit_note (NOTE_SOURCE_FILE (insn),
2269 NOTE_LINE_NUMBER (insn));
2270 else
2271 copy = 0;
2272 break;
2274 default:
2275 abort ();
2278 map->insn_map[INSN_UID (insn)] = copy;
2280 while (insn != copy_end);
2282 /* Now finish coping the REG_NOTES. */
2283 insn = copy_start;
2286 insn = NEXT_INSN (insn);
2287 if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN
2288 || GET_CODE (insn) == CALL_INSN)
2289 && map->insn_map[INSN_UID (insn)])
2290 final_reg_note_copy (&REG_NOTES (map->insn_map[INSN_UID (insn)]), map);
2292 while (insn != copy_end);
2294 /* There may be notes between copy_notes_from and loop_end. Emit a copy of
2295 each of these notes here, since there may be some important ones, such as
2296 NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last
2297 iteration, because the original notes won't be deleted.
2299 We can't use insert_before here, because when from preconditioning,
2300 insert_before points before the loop. We can't use copy_end, because
2301 there may be insns already inserted after it (which we don't want to
2302 copy) when not from preconditioning code. */
2304 if (! last_iteration)
2306 for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
2308 /* VTOP notes are valid only before the loop exit test.
2309 If placed anywhere else, loop may generate bad code.
2310 There is no need to test for NOTE_INSN_LOOP_CONT notes
2311 here, since COPY_NOTES_FROM will be at most one or two (for cc0)
2312 instructions before the last insn in the loop, and if the
2313 end test is that short, there will be a VTOP note between
2314 the CONT note and the test. */
2315 if (GET_CODE (insn) == NOTE
2316 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2317 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK
2318 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP)
2319 emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
2323 if (final_label && LABEL_NUSES (final_label) > 0)
2324 emit_label (final_label);
2326 tem = gen_sequence ();
2327 end_sequence ();
2328 loop_insn_emit_before (loop, 0, insert_before, tem);
2331 /* Emit an insn, using the expand_binop to ensure that a valid insn is
2332 emitted. This will correctly handle the case where the increment value
2333 won't fit in the immediate field of a PLUS insns. */
2335 void
2336 emit_unrolled_add (dest_reg, src_reg, increment)
2337 rtx dest_reg, src_reg, increment;
2339 rtx result;
2341 result = expand_simple_binop (GET_MODE (dest_reg), PLUS, src_reg, increment,
2342 dest_reg, 0, OPTAB_LIB_WIDEN);
2344 if (dest_reg != result)
2345 emit_move_insn (dest_reg, result);
2348 /* Searches the insns between INSN and LOOP->END. Returns 1 if there
2349 is a backward branch in that range that branches to somewhere between
2350 LOOP->START and INSN. Returns 0 otherwise. */
2352 /* ??? This is quadratic algorithm. Could be rewritten to be linear.
2353 In practice, this is not a problem, because this function is seldom called,
2354 and uses a negligible amount of CPU time on average. */
2357 back_branch_in_range_p (loop, insn)
2358 const struct loop *loop;
2359 rtx insn;
2361 rtx p, q, target_insn;
2362 rtx loop_start = loop->start;
2363 rtx loop_end = loop->end;
2364 rtx orig_loop_end = loop->end;
2366 /* Stop before we get to the backward branch at the end of the loop. */
2367 loop_end = prev_nonnote_insn (loop_end);
2368 if (GET_CODE (loop_end) == BARRIER)
2369 loop_end = PREV_INSN (loop_end);
2371 /* Check in case insn has been deleted, search forward for first non
2372 deleted insn following it. */
2373 while (INSN_DELETED_P (insn))
2374 insn = NEXT_INSN (insn);
2376 /* Check for the case where insn is the last insn in the loop. Deal
2377 with the case where INSN was a deleted loop test insn, in which case
2378 it will now be the NOTE_LOOP_END. */
2379 if (insn == loop_end || insn == orig_loop_end)
2380 return 0;
2382 for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
2384 if (GET_CODE (p) == JUMP_INSN)
2386 target_insn = JUMP_LABEL (p);
2388 /* Search from loop_start to insn, to see if one of them is
2389 the target_insn. We can't use INSN_LUID comparisons here,
2390 since insn may not have an LUID entry. */
2391 for (q = loop_start; q != insn; q = NEXT_INSN (q))
2392 if (q == target_insn)
2393 return 1;
2397 return 0;
2400 /* Try to generate the simplest rtx for the expression
2401 (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
2402 value of giv's. */
2404 static rtx
2405 fold_rtx_mult_add (mult1, mult2, add1, mode)
2406 rtx mult1, mult2, add1;
2407 enum machine_mode mode;
2409 rtx temp, mult_res;
2410 rtx result;
2412 /* The modes must all be the same. This should always be true. For now,
2413 check to make sure. */
2414 if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
2415 || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
2416 || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
2417 abort ();
2419 /* Ensure that if at least one of mult1/mult2 are constant, then mult2
2420 will be a constant. */
2421 if (GET_CODE (mult1) == CONST_INT)
2423 temp = mult2;
2424 mult2 = mult1;
2425 mult1 = temp;
2428 mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
2429 if (! mult_res)
2430 mult_res = gen_rtx_MULT (mode, mult1, mult2);
2432 /* Again, put the constant second. */
2433 if (GET_CODE (add1) == CONST_INT)
2435 temp = add1;
2436 add1 = mult_res;
2437 mult_res = temp;
2440 result = simplify_binary_operation (PLUS, mode, add1, mult_res);
2441 if (! result)
2442 result = gen_rtx_PLUS (mode, add1, mult_res);
2444 return result;
2447 /* Searches the list of induction struct's for the biv BL, to try to calculate
2448 the total increment value for one iteration of the loop as a constant.
2450 Returns the increment value as an rtx, simplified as much as possible,
2451 if it can be calculated. Otherwise, returns 0. */
2454 biv_total_increment (bl)
2455 const struct iv_class *bl;
2457 struct induction *v;
2458 rtx result;
2460 /* For increment, must check every instruction that sets it. Each
2461 instruction must be executed only once each time through the loop.
2462 To verify this, we check that the insn is always executed, and that
2463 there are no backward branches after the insn that branch to before it.
2464 Also, the insn must have a mult_val of one (to make sure it really is
2465 an increment). */
2467 result = const0_rtx;
2468 for (v = bl->biv; v; v = v->next_iv)
2470 if (v->always_computable && v->mult_val == const1_rtx
2471 && ! v->maybe_multiple)
2472 result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
2473 else
2474 return 0;
2477 return result;
2480 /* For each biv and giv, determine whether it can be safely split into
2481 a different variable for each unrolled copy of the loop body. If it
2482 is safe to split, then indicate that by saving some useful info
2483 in the splittable_regs array.
2485 If the loop is being completely unrolled, then splittable_regs will hold
2486 the current value of the induction variable while the loop is unrolled.
2487 It must be set to the initial value of the induction variable here.
2488 Otherwise, splittable_regs will hold the difference between the current
2489 value of the induction variable and the value the induction variable had
2490 at the top of the loop. It must be set to the value 0 here.
2492 Returns the total number of instructions that set registers that are
2493 splittable. */
2495 /* ?? If the loop is only unrolled twice, then most of the restrictions to
2496 constant values are unnecessary, since we can easily calculate increment
2497 values in this case even if nothing is constant. The increment value
2498 should not involve a multiply however. */
2500 /* ?? Even if the biv/giv increment values aren't constant, it may still
2501 be beneficial to split the variable if the loop is only unrolled a few
2502 times, since multiplies by small integers (1,2,3,4) are very cheap. */
2504 static int
2505 find_splittable_regs (loop, unroll_type, unroll_number)
2506 const struct loop *loop;
2507 enum unroll_types unroll_type;
2508 int unroll_number;
2510 struct loop_ivs *ivs = LOOP_IVS (loop);
2511 struct iv_class *bl;
2512 struct induction *v;
2513 rtx increment, tem;
2514 rtx biv_final_value;
2515 int biv_splittable;
2516 int result = 0;
2518 for (bl = ivs->list; bl; bl = bl->next)
2520 /* Biv_total_increment must return a constant value,
2521 otherwise we can not calculate the split values. */
2523 increment = biv_total_increment (bl);
2524 if (! increment || GET_CODE (increment) != CONST_INT)
2525 continue;
2527 /* The loop must be unrolled completely, or else have a known number
2528 of iterations and only one exit, or else the biv must be dead
2529 outside the loop, or else the final value must be known. Otherwise,
2530 it is unsafe to split the biv since it may not have the proper
2531 value on loop exit. */
2533 /* loop_number_exit_count is non-zero if the loop has an exit other than
2534 a fall through at the end. */
2536 biv_splittable = 1;
2537 biv_final_value = 0;
2538 if (unroll_type != UNROLL_COMPLETELY
2539 && (loop->exit_count || unroll_type == UNROLL_NAIVE)
2540 && (REGNO_LAST_LUID (bl->regno) >= INSN_LUID (loop->end)
2541 || ! bl->init_insn
2542 || INSN_UID (bl->init_insn) >= max_uid_for_loop
2543 || (REGNO_FIRST_LUID (bl->regno)
2544 < INSN_LUID (bl->init_insn))
2545 || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
2546 && ! (biv_final_value = final_biv_value (loop, bl)))
2547 biv_splittable = 0;
2549 /* If any of the insns setting the BIV don't do so with a simple
2550 PLUS, we don't know how to split it. */
2551 for (v = bl->biv; biv_splittable && v; v = v->next_iv)
2552 if ((tem = single_set (v->insn)) == 0
2553 || GET_CODE (SET_DEST (tem)) != REG
2554 || REGNO (SET_DEST (tem)) != bl->regno
2555 || GET_CODE (SET_SRC (tem)) != PLUS)
2556 biv_splittable = 0;
2558 /* If final value is non-zero, then must emit an instruction which sets
2559 the value of the biv to the proper value. This is done after
2560 handling all of the givs, since some of them may need to use the
2561 biv's value in their initialization code. */
2563 /* This biv is splittable. If completely unrolling the loop, save
2564 the biv's initial value. Otherwise, save the constant zero. */
2566 if (biv_splittable == 1)
2568 if (unroll_type == UNROLL_COMPLETELY)
2570 /* If the initial value of the biv is itself (i.e. it is too
2571 complicated for strength_reduce to compute), or is a hard
2572 register, or it isn't invariant, then we must create a new
2573 pseudo reg to hold the initial value of the biv. */
2575 if (GET_CODE (bl->initial_value) == REG
2576 && (REGNO (bl->initial_value) == bl->regno
2577 || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER
2578 || ! loop_invariant_p (loop, bl->initial_value)))
2580 rtx tem = gen_reg_rtx (bl->biv->mode);
2582 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2583 loop_insn_hoist (loop,
2584 gen_move_insn (tem, bl->biv->src_reg));
2586 if (loop_dump_stream)
2587 fprintf (loop_dump_stream,
2588 "Biv %d initial value remapped to %d.\n",
2589 bl->regno, REGNO (tem));
2591 splittable_regs[bl->regno] = tem;
2593 else
2594 splittable_regs[bl->regno] = bl->initial_value;
2596 else
2597 splittable_regs[bl->regno] = const0_rtx;
2599 /* Save the number of instructions that modify the biv, so that
2600 we can treat the last one specially. */
2602 splittable_regs_updates[bl->regno] = bl->biv_count;
2603 result += bl->biv_count;
2605 if (loop_dump_stream)
2606 fprintf (loop_dump_stream,
2607 "Biv %d safe to split.\n", bl->regno);
2610 /* Check every giv that depends on this biv to see whether it is
2611 splittable also. Even if the biv isn't splittable, givs which
2612 depend on it may be splittable if the biv is live outside the
2613 loop, and the givs aren't. */
2615 result += find_splittable_givs (loop, bl, unroll_type, increment,
2616 unroll_number);
2618 /* If final value is non-zero, then must emit an instruction which sets
2619 the value of the biv to the proper value. This is done after
2620 handling all of the givs, since some of them may need to use the
2621 biv's value in their initialization code. */
2622 if (biv_final_value)
2624 /* If the loop has multiple exits, emit the insns before the
2625 loop to ensure that it will always be executed no matter
2626 how the loop exits. Otherwise emit the insn after the loop,
2627 since this is slightly more efficient. */
2628 if (! loop->exit_count)
2629 loop_insn_sink (loop, gen_move_insn (bl->biv->src_reg,
2630 biv_final_value));
2631 else
2633 /* Create a new register to hold the value of the biv, and then
2634 set the biv to its final value before the loop start. The biv
2635 is set to its final value before loop start to ensure that
2636 this insn will always be executed, no matter how the loop
2637 exits. */
2638 rtx tem = gen_reg_rtx (bl->biv->mode);
2639 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2641 loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg));
2642 loop_insn_hoist (loop, gen_move_insn (bl->biv->src_reg,
2643 biv_final_value));
2645 if (loop_dump_stream)
2646 fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
2647 REGNO (bl->biv->src_reg), REGNO (tem));
2649 /* Set up the mapping from the original biv register to the new
2650 register. */
2651 bl->biv->src_reg = tem;
2655 return result;
2658 /* Return 1 if the first and last unrolled copy of the address giv V is valid
2659 for the instruction that is using it. Do not make any changes to that
2660 instruction. */
2662 static int
2663 verify_addresses (v, giv_inc, unroll_number)
2664 struct induction *v;
2665 rtx giv_inc;
2666 int unroll_number;
2668 int ret = 1;
2669 rtx orig_addr = *v->location;
2670 rtx last_addr = plus_constant (v->dest_reg,
2671 INTVAL (giv_inc) * (unroll_number - 1));
2673 /* First check to see if either address would fail. Handle the fact
2674 that we have may have a match_dup. */
2675 if (! validate_replace_rtx (*v->location, v->dest_reg, v->insn)
2676 || ! validate_replace_rtx (*v->location, last_addr, v->insn))
2677 ret = 0;
2679 /* Now put things back the way they were before. This should always
2680 succeed. */
2681 if (! validate_replace_rtx (*v->location, orig_addr, v->insn))
2682 abort ();
2684 return ret;
2687 /* For every giv based on the biv BL, check to determine whether it is
2688 splittable. This is a subroutine to find_splittable_regs ().
2690 Return the number of instructions that set splittable registers. */
2692 static int
2693 find_splittable_givs (loop, bl, unroll_type, increment, unroll_number)
2694 const struct loop *loop;
2695 struct iv_class *bl;
2696 enum unroll_types unroll_type;
2697 rtx increment;
2698 int unroll_number;
2700 struct loop_ivs *ivs = LOOP_IVS (loop);
2701 struct induction *v, *v2;
2702 rtx final_value;
2703 rtx tem;
2704 int result = 0;
2706 /* Scan the list of givs, and set the same_insn field when there are
2707 multiple identical givs in the same insn. */
2708 for (v = bl->giv; v; v = v->next_iv)
2709 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2710 if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg)
2711 && ! v2->same_insn)
2712 v2->same_insn = v;
2714 for (v = bl->giv; v; v = v->next_iv)
2716 rtx giv_inc, value;
2718 /* Only split the giv if it has already been reduced, or if the loop is
2719 being completely unrolled. */
2720 if (unroll_type != UNROLL_COMPLETELY && v->ignore)
2721 continue;
2723 /* The giv can be split if the insn that sets the giv is executed once
2724 and only once on every iteration of the loop. */
2725 /* An address giv can always be split. v->insn is just a use not a set,
2726 and hence it does not matter whether it is always executed. All that
2727 matters is that all the biv increments are always executed, and we
2728 won't reach here if they aren't. */
2729 if (v->giv_type != DEST_ADDR
2730 && (! v->always_computable
2731 || back_branch_in_range_p (loop, v->insn)))
2732 continue;
2734 /* The giv increment value must be a constant. */
2735 giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2736 v->mode);
2737 if (! giv_inc || GET_CODE (giv_inc) != CONST_INT)
2738 continue;
2740 /* The loop must be unrolled completely, or else have a known number of
2741 iterations and only one exit, or else the giv must be dead outside
2742 the loop, or else the final value of the giv must be known.
2743 Otherwise, it is not safe to split the giv since it may not have the
2744 proper value on loop exit. */
2746 /* The used outside loop test will fail for DEST_ADDR givs. They are
2747 never used outside the loop anyways, so it is always safe to split a
2748 DEST_ADDR giv. */
2750 final_value = 0;
2751 if (unroll_type != UNROLL_COMPLETELY
2752 && (loop->exit_count || unroll_type == UNROLL_NAIVE)
2753 && v->giv_type != DEST_ADDR
2754 /* The next part is true if the pseudo is used outside the loop.
2755 We assume that this is true for any pseudo created after loop
2756 starts, because we don't have a reg_n_info entry for them. */
2757 && (REGNO (v->dest_reg) >= max_reg_before_loop
2758 || (REGNO_FIRST_UID (REGNO (v->dest_reg)) != INSN_UID (v->insn)
2759 /* Check for the case where the pseudo is set by a shift/add
2760 sequence, in which case the first insn setting the pseudo
2761 is the first insn of the shift/add sequence. */
2762 && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX))
2763 || (REGNO_FIRST_UID (REGNO (v->dest_reg))
2764 != INSN_UID (XEXP (tem, 0)))))
2765 /* Line above always fails if INSN was moved by loop opt. */
2766 || (REGNO_LAST_LUID (REGNO (v->dest_reg))
2767 >= INSN_LUID (loop->end)))
2768 && ! (final_value = v->final_value))
2769 continue;
2771 #if 0
2772 /* Currently, non-reduced/final-value givs are never split. */
2773 /* Should emit insns after the loop if possible, as the biv final value
2774 code below does. */
2776 /* If the final value is non-zero, and the giv has not been reduced,
2777 then must emit an instruction to set the final value. */
2778 if (final_value && !v->new_reg)
2780 /* Create a new register to hold the value of the giv, and then set
2781 the giv to its final value before the loop start. The giv is set
2782 to its final value before loop start to ensure that this insn
2783 will always be executed, no matter how we exit. */
2784 tem = gen_reg_rtx (v->mode);
2785 loop_insn_hoist (loop, gen_move_insn (tem, v->dest_reg));
2786 loop_insn_hoist (loop, gen_move_insn (v->dest_reg, final_value));
2788 if (loop_dump_stream)
2789 fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
2790 REGNO (v->dest_reg), REGNO (tem));
2792 v->src_reg = tem;
2794 #endif
2796 /* This giv is splittable. If completely unrolling the loop, save the
2797 giv's initial value. Otherwise, save the constant zero for it. */
2799 if (unroll_type == UNROLL_COMPLETELY)
2801 /* It is not safe to use bl->initial_value here, because it may not
2802 be invariant. It is safe to use the initial value stored in
2803 the splittable_regs array if it is set. In rare cases, it won't
2804 be set, so then we do exactly the same thing as
2805 find_splittable_regs does to get a safe value. */
2806 rtx biv_initial_value;
2808 if (splittable_regs[bl->regno])
2809 biv_initial_value = splittable_regs[bl->regno];
2810 else if (GET_CODE (bl->initial_value) != REG
2811 || (REGNO (bl->initial_value) != bl->regno
2812 && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER))
2813 biv_initial_value = bl->initial_value;
2814 else
2816 rtx tem = gen_reg_rtx (bl->biv->mode);
2818 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2819 loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg));
2820 biv_initial_value = tem;
2822 biv_initial_value = extend_value_for_giv (v, biv_initial_value);
2823 value = fold_rtx_mult_add (v->mult_val, biv_initial_value,
2824 v->add_val, v->mode);
2826 else
2827 value = const0_rtx;
2829 if (v->new_reg)
2831 /* If a giv was combined with another giv, then we can only split
2832 this giv if the giv it was combined with was reduced. This
2833 is because the value of v->new_reg is meaningless in this
2834 case. */
2835 if (v->same && ! v->same->new_reg)
2837 if (loop_dump_stream)
2838 fprintf (loop_dump_stream,
2839 "giv combined with unreduced giv not split.\n");
2840 continue;
2842 /* If the giv is an address destination, it could be something other
2843 than a simple register, these have to be treated differently. */
2844 else if (v->giv_type == DEST_REG)
2846 /* If value is not a constant, register, or register plus
2847 constant, then compute its value into a register before
2848 loop start. This prevents invalid rtx sharing, and should
2849 generate better code. We can use bl->initial_value here
2850 instead of splittable_regs[bl->regno] because this code
2851 is going before the loop start. */
2852 if (unroll_type == UNROLL_COMPLETELY
2853 && GET_CODE (value) != CONST_INT
2854 && GET_CODE (value) != REG
2855 && (GET_CODE (value) != PLUS
2856 || GET_CODE (XEXP (value, 0)) != REG
2857 || GET_CODE (XEXP (value, 1)) != CONST_INT))
2859 rtx tem = gen_reg_rtx (v->mode);
2860 record_base_value (REGNO (tem), v->add_val, 0);
2861 loop_iv_add_mult_hoist (loop, bl->initial_value, v->mult_val,
2862 v->add_val, tem);
2863 value = tem;
2866 splittable_regs[REGNO (v->new_reg)] = value;
2868 else
2870 /* Splitting address givs is useful since it will often allow us
2871 to eliminate some increment insns for the base giv as
2872 unnecessary. */
2874 /* If the addr giv is combined with a dest_reg giv, then all
2875 references to that dest reg will be remapped, which is NOT
2876 what we want for split addr regs. We always create a new
2877 register for the split addr giv, just to be safe. */
2879 /* If we have multiple identical address givs within a
2880 single instruction, then use a single pseudo reg for
2881 both. This is necessary in case one is a match_dup
2882 of the other. */
2884 v->const_adjust = 0;
2886 if (v->same_insn)
2888 v->dest_reg = v->same_insn->dest_reg;
2889 if (loop_dump_stream)
2890 fprintf (loop_dump_stream,
2891 "Sharing address givs in insn %d\n",
2892 INSN_UID (v->insn));
2894 /* If multiple address GIVs have been combined with the
2895 same dest_reg GIV, do not create a new register for
2896 each. */
2897 else if (unroll_type != UNROLL_COMPLETELY
2898 && v->giv_type == DEST_ADDR
2899 && v->same && v->same->giv_type == DEST_ADDR
2900 && v->same->unrolled
2901 /* combine_givs_p may return true for some cases
2902 where the add and mult values are not equal.
2903 To share a register here, the values must be
2904 equal. */
2905 && rtx_equal_p (v->same->mult_val, v->mult_val)
2906 && rtx_equal_p (v->same->add_val, v->add_val)
2907 /* If the memory references have different modes,
2908 then the address may not be valid and we must
2909 not share registers. */
2910 && verify_addresses (v, giv_inc, unroll_number))
2912 v->dest_reg = v->same->dest_reg;
2913 v->shared = 1;
2915 else if (unroll_type != UNROLL_COMPLETELY)
2917 /* If not completely unrolling the loop, then create a new
2918 register to hold the split value of the DEST_ADDR giv.
2919 Emit insn to initialize its value before loop start. */
2921 rtx tem = gen_reg_rtx (v->mode);
2922 struct induction *same = v->same;
2923 rtx new_reg = v->new_reg;
2924 record_base_value (REGNO (tem), v->add_val, 0);
2926 /* If the address giv has a constant in its new_reg value,
2927 then this constant can be pulled out and put in value,
2928 instead of being part of the initialization code. */
2930 if (GET_CODE (new_reg) == PLUS
2931 && GET_CODE (XEXP (new_reg, 1)) == CONST_INT)
2933 v->dest_reg
2934 = plus_constant (tem, INTVAL (XEXP (new_reg, 1)));
2936 /* Only succeed if this will give valid addresses.
2937 Try to validate both the first and the last
2938 address resulting from loop unrolling, if
2939 one fails, then can't do const elim here. */
2940 if (verify_addresses (v, giv_inc, unroll_number))
2942 /* Save the negative of the eliminated const, so
2943 that we can calculate the dest_reg's increment
2944 value later. */
2945 v->const_adjust = -INTVAL (XEXP (new_reg, 1));
2947 new_reg = XEXP (new_reg, 0);
2948 if (loop_dump_stream)
2949 fprintf (loop_dump_stream,
2950 "Eliminating constant from giv %d\n",
2951 REGNO (tem));
2953 else
2954 v->dest_reg = tem;
2956 else
2957 v->dest_reg = tem;
2959 /* If the address hasn't been checked for validity yet, do so
2960 now, and fail completely if either the first or the last
2961 unrolled copy of the address is not a valid address
2962 for the instruction that uses it. */
2963 if (v->dest_reg == tem
2964 && ! verify_addresses (v, giv_inc, unroll_number))
2966 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2967 if (v2->same_insn == v)
2968 v2->same_insn = 0;
2970 if (loop_dump_stream)
2971 fprintf (loop_dump_stream,
2972 "Invalid address for giv at insn %d\n",
2973 INSN_UID (v->insn));
2974 continue;
2977 v->new_reg = new_reg;
2978 v->same = same;
2980 /* We set this after the address check, to guarantee that
2981 the register will be initialized. */
2982 v->unrolled = 1;
2984 /* To initialize the new register, just move the value of
2985 new_reg into it. This is not guaranteed to give a valid
2986 instruction on machines with complex addressing modes.
2987 If we can't recognize it, then delete it and emit insns
2988 to calculate the value from scratch. */
2989 loop_insn_hoist (loop, gen_rtx_SET (VOIDmode, tem,
2990 copy_rtx (v->new_reg)));
2991 if (recog_memoized (PREV_INSN (loop->start)) < 0)
2993 rtx sequence, ret;
2995 /* We can't use bl->initial_value to compute the initial
2996 value, because the loop may have been preconditioned.
2997 We must calculate it from NEW_REG. */
2998 delete_related_insns (PREV_INSN (loop->start));
3000 start_sequence ();
3001 ret = force_operand (v->new_reg, tem);
3002 if (ret != tem)
3003 emit_move_insn (tem, ret);
3004 sequence = gen_sequence ();
3005 end_sequence ();
3006 loop_insn_hoist (loop, sequence);
3008 if (loop_dump_stream)
3009 fprintf (loop_dump_stream,
3010 "Invalid init insn, rewritten.\n");
3013 else
3015 v->dest_reg = value;
3017 /* Check the resulting address for validity, and fail
3018 if the resulting address would be invalid. */
3019 if (! verify_addresses (v, giv_inc, unroll_number))
3021 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
3022 if (v2->same_insn == v)
3023 v2->same_insn = 0;
3025 if (loop_dump_stream)
3026 fprintf (loop_dump_stream,
3027 "Invalid address for giv at insn %d\n",
3028 INSN_UID (v->insn));
3029 continue;
3033 /* Store the value of dest_reg into the insn. This sharing
3034 will not be a problem as this insn will always be copied
3035 later. */
3037 *v->location = v->dest_reg;
3039 /* If this address giv is combined with a dest reg giv, then
3040 save the base giv's induction pointer so that we will be
3041 able to handle this address giv properly. The base giv
3042 itself does not have to be splittable. */
3044 if (v->same && v->same->giv_type == DEST_REG)
3045 addr_combined_regs[REGNO (v->same->new_reg)] = v->same;
3047 if (GET_CODE (v->new_reg) == REG)
3049 /* This giv maybe hasn't been combined with any others.
3050 Make sure that it's giv is marked as splittable here. */
3052 splittable_regs[REGNO (v->new_reg)] = value;
3054 /* Make it appear to depend upon itself, so that the
3055 giv will be properly split in the main loop above. */
3056 if (! v->same)
3058 v->same = v;
3059 addr_combined_regs[REGNO (v->new_reg)] = v;
3063 if (loop_dump_stream)
3064 fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n");
3067 else
3069 #if 0
3070 /* Currently, unreduced giv's can't be split. This is not too much
3071 of a problem since unreduced giv's are not live across loop
3072 iterations anyways. When unrolling a loop completely though,
3073 it makes sense to reduce&split givs when possible, as this will
3074 result in simpler instructions, and will not require that a reg
3075 be live across loop iterations. */
3077 splittable_regs[REGNO (v->dest_reg)] = value;
3078 fprintf (stderr, "Giv %d at insn %d not reduced\n",
3079 REGNO (v->dest_reg), INSN_UID (v->insn));
3080 #else
3081 continue;
3082 #endif
3085 /* Unreduced givs are only updated once by definition. Reduced givs
3086 are updated as many times as their biv is. Mark it so if this is
3087 a splittable register. Don't need to do anything for address givs
3088 where this may not be a register. */
3090 if (GET_CODE (v->new_reg) == REG)
3092 int count = 1;
3093 if (! v->ignore)
3094 count = REG_IV_CLASS (ivs, REGNO (v->src_reg))->biv_count;
3096 splittable_regs_updates[REGNO (v->new_reg)] = count;
3099 result++;
3101 if (loop_dump_stream)
3103 int regnum;
3105 if (GET_CODE (v->dest_reg) == CONST_INT)
3106 regnum = -1;
3107 else if (GET_CODE (v->dest_reg) != REG)
3108 regnum = REGNO (XEXP (v->dest_reg, 0));
3109 else
3110 regnum = REGNO (v->dest_reg);
3111 fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
3112 regnum, INSN_UID (v->insn));
3116 return result;
3119 /* Try to prove that the register is dead after the loop exits. Trace every
3120 loop exit looking for an insn that will always be executed, which sets
3121 the register to some value, and appears before the first use of the register
3122 is found. If successful, then return 1, otherwise return 0. */
3124 /* ?? Could be made more intelligent in the handling of jumps, so that
3125 it can search past if statements and other similar structures. */
3127 static int
3128 reg_dead_after_loop (loop, reg)
3129 const struct loop *loop;
3130 rtx reg;
3132 rtx insn, label;
3133 enum rtx_code code;
3134 int jump_count = 0;
3135 int label_count = 0;
3137 /* In addition to checking all exits of this loop, we must also check
3138 all exits of inner nested loops that would exit this loop. We don't
3139 have any way to identify those, so we just give up if there are any
3140 such inner loop exits. */
3142 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
3143 label_count++;
3145 if (label_count != loop->exit_count)
3146 return 0;
3148 /* HACK: Must also search the loop fall through exit, create a label_ref
3149 here which points to the loop->end, and append the loop_number_exit_labels
3150 list to it. */
3151 label = gen_rtx_LABEL_REF (VOIDmode, loop->end);
3152 LABEL_NEXTREF (label) = loop->exit_labels;
3154 for (; label; label = LABEL_NEXTREF (label))
3156 /* Succeed if find an insn which sets the biv or if reach end of
3157 function. Fail if find an insn that uses the biv, or if come to
3158 a conditional jump. */
3160 insn = NEXT_INSN (XEXP (label, 0));
3161 while (insn)
3163 code = GET_CODE (insn);
3164 if (GET_RTX_CLASS (code) == 'i')
3166 rtx set;
3168 if (reg_referenced_p (reg, PATTERN (insn)))
3169 return 0;
3171 set = single_set (insn);
3172 if (set && rtx_equal_p (SET_DEST (set), reg))
3173 break;
3176 if (code == JUMP_INSN)
3178 if (GET_CODE (PATTERN (insn)) == RETURN)
3179 break;
3180 else if (!any_uncondjump_p (insn)
3181 /* Prevent infinite loop following infinite loops. */
3182 || jump_count++ > 20)
3183 return 0;
3184 else
3185 insn = JUMP_LABEL (insn);
3188 insn = NEXT_INSN (insn);
3192 /* Success, the register is dead on all loop exits. */
3193 return 1;
3196 /* Try to calculate the final value of the biv, the value it will have at
3197 the end of the loop. If we can do it, return that value. */
3200 final_biv_value (loop, bl)
3201 const struct loop *loop;
3202 struct iv_class *bl;
3204 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
3205 rtx increment, tem;
3207 /* ??? This only works for MODE_INT biv's. Reject all others for now. */
3209 if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
3210 return 0;
3212 /* The final value for reversed bivs must be calculated differently than
3213 for ordinary bivs. In this case, there is already an insn after the
3214 loop which sets this biv's final value (if necessary), and there are
3215 no other loop exits, so we can return any value. */
3216 if (bl->reversed)
3218 if (loop_dump_stream)
3219 fprintf (loop_dump_stream,
3220 "Final biv value for %d, reversed biv.\n", bl->regno);
3222 return const0_rtx;
3225 /* Try to calculate the final value as initial value + (number of iterations
3226 * increment). For this to work, increment must be invariant, the only
3227 exit from the loop must be the fall through at the bottom (otherwise
3228 it may not have its final value when the loop exits), and the initial
3229 value of the biv must be invariant. */
3231 if (n_iterations != 0
3232 && ! loop->exit_count
3233 && loop_invariant_p (loop, bl->initial_value))
3235 increment = biv_total_increment (bl);
3237 if (increment && loop_invariant_p (loop, increment))
3239 /* Can calculate the loop exit value, emit insns after loop
3240 end to calculate this value into a temporary register in
3241 case it is needed later. */
3243 tem = gen_reg_rtx (bl->biv->mode);
3244 record_base_value (REGNO (tem), bl->biv->add_val, 0);
3245 loop_iv_add_mult_sink (loop, increment, GEN_INT (n_iterations),
3246 bl->initial_value, tem);
3248 if (loop_dump_stream)
3249 fprintf (loop_dump_stream,
3250 "Final biv value for %d, calculated.\n", bl->regno);
3252 return tem;
3256 /* Check to see if the biv is dead at all loop exits. */
3257 if (reg_dead_after_loop (loop, bl->biv->src_reg))
3259 if (loop_dump_stream)
3260 fprintf (loop_dump_stream,
3261 "Final biv value for %d, biv dead after loop exit.\n",
3262 bl->regno);
3264 return const0_rtx;
3267 return 0;
3270 /* Try to calculate the final value of the giv, the value it will have at
3271 the end of the loop. If we can do it, return that value. */
3274 final_giv_value (loop, v)
3275 const struct loop *loop;
3276 struct induction *v;
3278 struct loop_ivs *ivs = LOOP_IVS (loop);
3279 struct iv_class *bl;
3280 rtx insn;
3281 rtx increment, tem;
3282 rtx seq;
3283 rtx loop_end = loop->end;
3284 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
3286 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
3288 /* The final value for givs which depend on reversed bivs must be calculated
3289 differently than for ordinary givs. In this case, there is already an
3290 insn after the loop which sets this giv's final value (if necessary),
3291 and there are no other loop exits, so we can return any value. */
3292 if (bl->reversed)
3294 if (loop_dump_stream)
3295 fprintf (loop_dump_stream,
3296 "Final giv value for %d, depends on reversed biv\n",
3297 REGNO (v->dest_reg));
3298 return const0_rtx;
3301 /* Try to calculate the final value as a function of the biv it depends
3302 upon. The only exit from the loop must be the fall through at the bottom
3303 (otherwise it may not have its final value when the loop exits). */
3305 /* ??? Can calculate the final giv value by subtracting off the
3306 extra biv increments times the giv's mult_val. The loop must have
3307 only one exit for this to work, but the loop iterations does not need
3308 to be known. */
3310 if (n_iterations != 0
3311 && ! loop->exit_count)
3313 /* ?? It is tempting to use the biv's value here since these insns will
3314 be put after the loop, and hence the biv will have its final value
3315 then. However, this fails if the biv is subsequently eliminated.
3316 Perhaps determine whether biv's are eliminable before trying to
3317 determine whether giv's are replaceable so that we can use the
3318 biv value here if it is not eliminable. */
3320 /* We are emitting code after the end of the loop, so we must make
3321 sure that bl->initial_value is still valid then. It will still
3322 be valid if it is invariant. */
3324 increment = biv_total_increment (bl);
3326 if (increment && loop_invariant_p (loop, increment)
3327 && loop_invariant_p (loop, bl->initial_value))
3329 /* Can calculate the loop exit value of its biv as
3330 (n_iterations * increment) + initial_value */
3332 /* The loop exit value of the giv is then
3333 (final_biv_value - extra increments) * mult_val + add_val.
3334 The extra increments are any increments to the biv which
3335 occur in the loop after the giv's value is calculated.
3336 We must search from the insn that sets the giv to the end
3337 of the loop to calculate this value. */
3339 /* Put the final biv value in tem. */
3340 tem = gen_reg_rtx (v->mode);
3341 record_base_value (REGNO (tem), bl->biv->add_val, 0);
3342 loop_iv_add_mult_sink (loop, extend_value_for_giv (v, increment),
3343 GEN_INT (n_iterations),
3344 extend_value_for_giv (v, bl->initial_value),
3345 tem);
3347 /* Subtract off extra increments as we find them. */
3348 for (insn = NEXT_INSN (v->insn); insn != loop_end;
3349 insn = NEXT_INSN (insn))
3351 struct induction *biv;
3353 for (biv = bl->biv; biv; biv = biv->next_iv)
3354 if (biv->insn == insn)
3356 start_sequence ();
3357 tem = expand_simple_binop (GET_MODE (tem), MINUS, tem,
3358 biv->add_val, NULL_RTX, 0,
3359 OPTAB_LIB_WIDEN);
3360 seq = gen_sequence ();
3361 end_sequence ();
3362 loop_insn_sink (loop, seq);
3366 /* Now calculate the giv's final value. */
3367 loop_iv_add_mult_sink (loop, tem, v->mult_val, v->add_val, tem);
3369 if (loop_dump_stream)
3370 fprintf (loop_dump_stream,
3371 "Final giv value for %d, calc from biv's value.\n",
3372 REGNO (v->dest_reg));
3374 return tem;
3378 /* Replaceable giv's should never reach here. */
3379 if (v->replaceable)
3380 abort ();
3382 /* Check to see if the biv is dead at all loop exits. */
3383 if (reg_dead_after_loop (loop, v->dest_reg))
3385 if (loop_dump_stream)
3386 fprintf (loop_dump_stream,
3387 "Final giv value for %d, giv dead after loop exit.\n",
3388 REGNO (v->dest_reg));
3390 return const0_rtx;
3393 return 0;
3396 /* Look back before LOOP->START for the insn that sets REG and return
3397 the equivalent constant if there is a REG_EQUAL note otherwise just
3398 the SET_SRC of REG. */
3400 static rtx
3401 loop_find_equiv_value (loop, reg)
3402 const struct loop *loop;
3403 rtx reg;
3405 rtx loop_start = loop->start;
3406 rtx insn, set;
3407 rtx ret;
3409 ret = reg;
3410 for (insn = PREV_INSN (loop_start); insn; insn = PREV_INSN (insn))
3412 if (GET_CODE (insn) == CODE_LABEL)
3413 break;
3415 else if (INSN_P (insn) && reg_set_p (reg, insn))
3417 /* We found the last insn before the loop that sets the register.
3418 If it sets the entire register, and has a REG_EQUAL note,
3419 then use the value of the REG_EQUAL note. */
3420 if ((set = single_set (insn))
3421 && (SET_DEST (set) == reg))
3423 rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3425 /* Only use the REG_EQUAL note if it is a constant.
3426 Other things, divide in particular, will cause
3427 problems later if we use them. */
3428 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
3429 && CONSTANT_P (XEXP (note, 0)))
3430 ret = XEXP (note, 0);
3431 else
3432 ret = SET_SRC (set);
3434 /* We cannot do this if it changes between the
3435 assignment and loop start though. */
3436 if (modified_between_p (ret, insn, loop_start))
3437 ret = reg;
3439 break;
3442 return ret;
3445 /* Return a simplified rtx for the expression OP - REG.
3447 REG must appear in OP, and OP must be a register or the sum of a register
3448 and a second term.
3450 Thus, the return value must be const0_rtx or the second term.
3452 The caller is responsible for verifying that REG appears in OP and OP has
3453 the proper form. */
3455 static rtx
3456 subtract_reg_term (op, reg)
3457 rtx op, reg;
3459 if (op == reg)
3460 return const0_rtx;
3461 if (GET_CODE (op) == PLUS)
3463 if (XEXP (op, 0) == reg)
3464 return XEXP (op, 1);
3465 else if (XEXP (op, 1) == reg)
3466 return XEXP (op, 0);
3468 /* OP does not contain REG as a term. */
3469 abort ();
3472 /* Find and return register term common to both expressions OP0 and
3473 OP1 or NULL_RTX if no such term exists. Each expression must be a
3474 REG or a PLUS of a REG. */
3476 static rtx
3477 find_common_reg_term (op0, op1)
3478 rtx op0, op1;
3480 if ((GET_CODE (op0) == REG || GET_CODE (op0) == PLUS)
3481 && (GET_CODE (op1) == REG || GET_CODE (op1) == PLUS))
3483 rtx op00;
3484 rtx op01;
3485 rtx op10;
3486 rtx op11;
3488 if (GET_CODE (op0) == PLUS)
3489 op01 = XEXP (op0, 1), op00 = XEXP (op0, 0);
3490 else
3491 op01 = const0_rtx, op00 = op0;
3493 if (GET_CODE (op1) == PLUS)
3494 op11 = XEXP (op1, 1), op10 = XEXP (op1, 0);
3495 else
3496 op11 = const0_rtx, op10 = op1;
3498 /* Find and return common register term if present. */
3499 if (REG_P (op00) && (op00 == op10 || op00 == op11))
3500 return op00;
3501 else if (REG_P (op01) && (op01 == op10 || op01 == op11))
3502 return op01;
3505 /* No common register term found. */
3506 return NULL_RTX;
3509 /* Determine the loop iterator and calculate the number of loop
3510 iterations. Returns the exact number of loop iterations if it can
3511 be calculated, otherwise returns zero. */
3513 unsigned HOST_WIDE_INT
3514 loop_iterations (loop)
3515 struct loop *loop;
3517 struct loop_info *loop_info = LOOP_INFO (loop);
3518 struct loop_ivs *ivs = LOOP_IVS (loop);
3519 rtx comparison, comparison_value;
3520 rtx iteration_var, initial_value, increment, final_value;
3521 enum rtx_code comparison_code;
3522 HOST_WIDE_INT inc;
3523 unsigned HOST_WIDE_INT abs_inc;
3524 unsigned HOST_WIDE_INT abs_diff;
3525 int off_by_one;
3526 int increment_dir;
3527 int unsigned_p, compare_dir, final_larger;
3528 rtx last_loop_insn;
3529 rtx reg_term;
3530 struct iv_class *bl;
3532 loop_info->n_iterations = 0;
3533 loop_info->initial_value = 0;
3534 loop_info->initial_equiv_value = 0;
3535 loop_info->comparison_value = 0;
3536 loop_info->final_value = 0;
3537 loop_info->final_equiv_value = 0;
3538 loop_info->increment = 0;
3539 loop_info->iteration_var = 0;
3540 loop_info->unroll_number = 1;
3541 loop_info->iv = 0;
3543 /* We used to use prev_nonnote_insn here, but that fails because it might
3544 accidentally get the branch for a contained loop if the branch for this
3545 loop was deleted. We can only trust branches immediately before the
3546 loop_end. */
3547 last_loop_insn = PREV_INSN (loop->end);
3549 /* ??? We should probably try harder to find the jump insn
3550 at the end of the loop. The following code assumes that
3551 the last loop insn is a jump to the top of the loop. */
3552 if (GET_CODE (last_loop_insn) != JUMP_INSN)
3554 if (loop_dump_stream)
3555 fprintf (loop_dump_stream,
3556 "Loop iterations: No final conditional branch found.\n");
3557 return 0;
3560 /* If there is a more than a single jump to the top of the loop
3561 we cannot (easily) determine the iteration count. */
3562 if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1)
3564 if (loop_dump_stream)
3565 fprintf (loop_dump_stream,
3566 "Loop iterations: Loop has multiple back edges.\n");
3567 return 0;
3570 /* If there are multiple conditionalized loop exit tests, they may jump
3571 back to differing CODE_LABELs. */
3572 if (loop->top && loop->cont)
3574 rtx temp = PREV_INSN (last_loop_insn);
3578 if (GET_CODE (temp) == JUMP_INSN)
3580 /* There are some kinds of jumps we can't deal with easily. */
3581 if (JUMP_LABEL (temp) == 0)
3583 if (loop_dump_stream)
3584 fprintf
3585 (loop_dump_stream,
3586 "Loop iterations: Jump insn has null JUMP_LABEL.\n");
3587 return 0;
3590 if (/* Previous unrolling may have generated new insns not
3591 covered by the uid_luid array. */
3592 INSN_UID (JUMP_LABEL (temp)) < max_uid_for_loop
3593 /* Check if we jump back into the loop body. */
3594 && INSN_LUID (JUMP_LABEL (temp)) > INSN_LUID (loop->top)
3595 && INSN_LUID (JUMP_LABEL (temp)) < INSN_LUID (loop->cont))
3597 if (loop_dump_stream)
3598 fprintf
3599 (loop_dump_stream,
3600 "Loop iterations: Loop has multiple back edges.\n");
3601 return 0;
3605 while ((temp = PREV_INSN (temp)) != loop->cont);
3608 /* Find the iteration variable. If the last insn is a conditional
3609 branch, and the insn before tests a register value, make that the
3610 iteration variable. */
3612 comparison = get_condition_for_loop (loop, last_loop_insn);
3613 if (comparison == 0)
3615 if (loop_dump_stream)
3616 fprintf (loop_dump_stream,
3617 "Loop iterations: No final comparison found.\n");
3618 return 0;
3621 /* ??? Get_condition may switch position of induction variable and
3622 invariant register when it canonicalizes the comparison. */
3624 comparison_code = GET_CODE (comparison);
3625 iteration_var = XEXP (comparison, 0);
3626 comparison_value = XEXP (comparison, 1);
3628 if (GET_CODE (iteration_var) != REG)
3630 if (loop_dump_stream)
3631 fprintf (loop_dump_stream,
3632 "Loop iterations: Comparison not against register.\n");
3633 return 0;
3636 /* The only new registers that are created before loop iterations
3637 are givs made from biv increments or registers created by
3638 load_mems. In the latter case, it is possible that try_copy_prop
3639 will propagate a new pseudo into the old iteration register but
3640 this will be marked by having the REG_USERVAR_P bit set. */
3642 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs
3643 && ! REG_USERVAR_P (iteration_var))
3644 abort ();
3646 /* Determine the initial value of the iteration variable, and the amount
3647 that it is incremented each loop. Use the tables constructed by
3648 the strength reduction pass to calculate these values. */
3650 /* Clear the result values, in case no answer can be found. */
3651 initial_value = 0;
3652 increment = 0;
3654 /* The iteration variable can be either a giv or a biv. Check to see
3655 which it is, and compute the variable's initial value, and increment
3656 value if possible. */
3658 /* If this is a new register, can't handle it since we don't have any
3659 reg_iv_type entry for it. */
3660 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs)
3662 if (loop_dump_stream)
3663 fprintf (loop_dump_stream,
3664 "Loop iterations: No reg_iv_type entry for iteration var.\n");
3665 return 0;
3668 /* Reject iteration variables larger than the host wide int size, since they
3669 could result in a number of iterations greater than the range of our
3670 `unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */
3671 else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var))
3672 > HOST_BITS_PER_WIDE_INT))
3674 if (loop_dump_stream)
3675 fprintf (loop_dump_stream,
3676 "Loop iterations: Iteration var rejected because mode too large.\n");
3677 return 0;
3679 else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
3681 if (loop_dump_stream)
3682 fprintf (loop_dump_stream,
3683 "Loop iterations: Iteration var not an integer.\n");
3684 return 0;
3686 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT)
3688 if (REGNO (iteration_var) >= ivs->n_regs)
3689 abort ();
3691 /* Grab initial value, only useful if it is a constant. */
3692 bl = REG_IV_CLASS (ivs, REGNO (iteration_var));
3693 initial_value = bl->initial_value;
3694 if (!bl->biv->always_executed || bl->biv->maybe_multiple)
3696 if (loop_dump_stream)
3697 fprintf (loop_dump_stream,
3698 "Loop iterations: Basic induction var not set once in each iteration.\n");
3699 return 0;
3702 increment = biv_total_increment (bl);
3704 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT)
3706 HOST_WIDE_INT offset = 0;
3707 struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var));
3708 rtx biv_initial_value;
3710 if (REGNO (v->src_reg) >= ivs->n_regs)
3711 abort ();
3713 if (!v->always_executed || v->maybe_multiple)
3715 if (loop_dump_stream)
3716 fprintf (loop_dump_stream,
3717 "Loop iterations: General induction var not set once in each iteration.\n");
3718 return 0;
3721 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
3723 /* Increment value is mult_val times the increment value of the biv. */
3725 increment = biv_total_increment (bl);
3726 if (increment)
3728 struct induction *biv_inc;
3730 increment = fold_rtx_mult_add (v->mult_val,
3731 extend_value_for_giv (v, increment),
3732 const0_rtx, v->mode);
3733 /* The caller assumes that one full increment has occurred at the
3734 first loop test. But that's not true when the biv is incremented
3735 after the giv is set (which is the usual case), e.g.:
3736 i = 6; do {;} while (i++ < 9) .
3737 Therefore, we bias the initial value by subtracting the amount of
3738 the increment that occurs between the giv set and the giv test. */
3739 for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv)
3741 if (loop_insn_first_p (v->insn, biv_inc->insn))
3743 if (REG_P (biv_inc->add_val))
3745 if (loop_dump_stream)
3746 fprintf (loop_dump_stream,
3747 "Loop iterations: Basic induction var add_val is REG %d.\n",
3748 REGNO (biv_inc->add_val));
3749 return 0;
3752 offset -= INTVAL (biv_inc->add_val);
3756 if (loop_dump_stream)
3757 fprintf (loop_dump_stream,
3758 "Loop iterations: Giv iterator, initial value bias %ld.\n",
3759 (long) offset);
3761 /* Initial value is mult_val times the biv's initial value plus
3762 add_val. Only useful if it is a constant. */
3763 biv_initial_value = extend_value_for_giv (v, bl->initial_value);
3764 initial_value
3765 = fold_rtx_mult_add (v->mult_val,
3766 plus_constant (biv_initial_value, offset),
3767 v->add_val, v->mode);
3769 else
3771 if (loop_dump_stream)
3772 fprintf (loop_dump_stream,
3773 "Loop iterations: Not basic or general induction var.\n");
3774 return 0;
3777 if (initial_value == 0)
3778 return 0;
3780 unsigned_p = 0;
3781 off_by_one = 0;
3782 switch (comparison_code)
3784 case LEU:
3785 unsigned_p = 1;
3786 case LE:
3787 compare_dir = 1;
3788 off_by_one = 1;
3789 break;
3790 case GEU:
3791 unsigned_p = 1;
3792 case GE:
3793 compare_dir = -1;
3794 off_by_one = -1;
3795 break;
3796 case EQ:
3797 /* Cannot determine loop iterations with this case. */
3798 compare_dir = 0;
3799 break;
3800 case LTU:
3801 unsigned_p = 1;
3802 case LT:
3803 compare_dir = 1;
3804 break;
3805 case GTU:
3806 unsigned_p = 1;
3807 case GT:
3808 compare_dir = -1;
3809 case NE:
3810 compare_dir = 0;
3811 break;
3812 default:
3813 abort ();
3816 /* If the comparison value is an invariant register, then try to find
3817 its value from the insns before the start of the loop. */
3819 final_value = comparison_value;
3820 if (GET_CODE (comparison_value) == REG
3821 && loop_invariant_p (loop, comparison_value))
3823 final_value = loop_find_equiv_value (loop, comparison_value);
3825 /* If we don't get an invariant final value, we are better
3826 off with the original register. */
3827 if (! loop_invariant_p (loop, final_value))
3828 final_value = comparison_value;
3831 /* Calculate the approximate final value of the induction variable
3832 (on the last successful iteration). The exact final value
3833 depends on the branch operator, and increment sign. It will be
3834 wrong if the iteration variable is not incremented by one each
3835 time through the loop and (comparison_value + off_by_one -
3836 initial_value) % increment != 0.
3837 ??? Note that the final_value may overflow and thus final_larger
3838 will be bogus. A potentially infinite loop will be classified
3839 as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */
3840 if (off_by_one)
3841 final_value = plus_constant (final_value, off_by_one);
3843 /* Save the calculated values describing this loop's bounds, in case
3844 precondition_loop_p will need them later. These values can not be
3845 recalculated inside precondition_loop_p because strength reduction
3846 optimizations may obscure the loop's structure.
3848 These values are only required by precondition_loop_p and insert_bct
3849 whenever the number of iterations cannot be computed at compile time.
3850 Only the difference between final_value and initial_value is
3851 important. Note that final_value is only approximate. */
3852 loop_info->initial_value = initial_value;
3853 loop_info->comparison_value = comparison_value;
3854 loop_info->final_value = plus_constant (comparison_value, off_by_one);
3855 loop_info->increment = increment;
3856 loop_info->iteration_var = iteration_var;
3857 loop_info->comparison_code = comparison_code;
3858 loop_info->iv = bl;
3860 /* Try to determine the iteration count for loops such
3861 as (for i = init; i < init + const; i++). When running the
3862 loop optimization twice, the first pass often converts simple
3863 loops into this form. */
3865 if (REG_P (initial_value))
3867 rtx reg1;
3868 rtx reg2;
3869 rtx const2;
3871 reg1 = initial_value;
3872 if (GET_CODE (final_value) == PLUS)
3873 reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1);
3874 else
3875 reg2 = final_value, const2 = const0_rtx;
3877 /* Check for initial_value = reg1, final_value = reg2 + const2,
3878 where reg1 != reg2. */
3879 if (REG_P (reg2) && reg2 != reg1)
3881 rtx temp;
3883 /* Find what reg1 is equivalent to. Hopefully it will
3884 either be reg2 or reg2 plus a constant. */
3885 temp = loop_find_equiv_value (loop, reg1);
3887 if (find_common_reg_term (temp, reg2))
3888 initial_value = temp;
3889 else
3891 /* Find what reg2 is equivalent to. Hopefully it will
3892 either be reg1 or reg1 plus a constant. Let's ignore
3893 the latter case for now since it is not so common. */
3894 temp = loop_find_equiv_value (loop, reg2);
3896 if (temp == loop_info->iteration_var)
3897 temp = initial_value;
3898 if (temp == reg1)
3899 final_value = (const2 == const0_rtx)
3900 ? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2);
3903 else if (loop->vtop && GET_CODE (reg2) == CONST_INT)
3905 rtx temp;
3907 /* When running the loop optimizer twice, check_dbra_loop
3908 further obfuscates reversible loops of the form:
3909 for (i = init; i < init + const; i++). We often end up with
3910 final_value = 0, initial_value = temp, temp = temp2 - init,
3911 where temp2 = init + const. If the loop has a vtop we
3912 can replace initial_value with const. */
3914 temp = loop_find_equiv_value (loop, reg1);
3916 if (GET_CODE (temp) == MINUS && REG_P (XEXP (temp, 0)))
3918 rtx temp2 = loop_find_equiv_value (loop, XEXP (temp, 0));
3920 if (GET_CODE (temp2) == PLUS
3921 && XEXP (temp2, 0) == XEXP (temp, 1))
3922 initial_value = XEXP (temp2, 1);
3927 /* If have initial_value = reg + const1 and final_value = reg +
3928 const2, then replace initial_value with const1 and final_value
3929 with const2. This should be safe since we are protected by the
3930 initial comparison before entering the loop if we have a vtop.
3931 For example, a + b < a + c is not equivalent to b < c for all a
3932 when using modulo arithmetic.
3934 ??? Without a vtop we could still perform the optimization if we check
3935 the initial and final values carefully. */
3936 if (loop->vtop
3937 && (reg_term = find_common_reg_term (initial_value, final_value)))
3939 initial_value = subtract_reg_term (initial_value, reg_term);
3940 final_value = subtract_reg_term (final_value, reg_term);
3943 loop_info->initial_equiv_value = initial_value;
3944 loop_info->final_equiv_value = final_value;
3946 /* For EQ comparison loops, we don't have a valid final value.
3947 Check this now so that we won't leave an invalid value if we
3948 return early for any other reason. */
3949 if (comparison_code == EQ)
3950 loop_info->final_equiv_value = loop_info->final_value = 0;
3952 if (increment == 0)
3954 if (loop_dump_stream)
3955 fprintf (loop_dump_stream,
3956 "Loop iterations: Increment value can't be calculated.\n");
3957 return 0;
3960 if (GET_CODE (increment) != CONST_INT)
3962 /* If we have a REG, check to see if REG holds a constant value. */
3963 /* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't
3964 clear if it is worthwhile to try to handle such RTL. */
3965 if (GET_CODE (increment) == REG || GET_CODE (increment) == SUBREG)
3966 increment = loop_find_equiv_value (loop, increment);
3968 if (GET_CODE (increment) != CONST_INT)
3970 if (loop_dump_stream)
3972 fprintf (loop_dump_stream,
3973 "Loop iterations: Increment value not constant ");
3974 print_simple_rtl (loop_dump_stream, increment);
3975 fprintf (loop_dump_stream, ".\n");
3977 return 0;
3979 loop_info->increment = increment;
3982 if (GET_CODE (initial_value) != CONST_INT)
3984 if (loop_dump_stream)
3986 fprintf (loop_dump_stream,
3987 "Loop iterations: Initial value not constant ");
3988 print_simple_rtl (loop_dump_stream, initial_value);
3989 fprintf (loop_dump_stream, ".\n");
3991 return 0;
3993 else if (comparison_code == EQ)
3995 if (loop_dump_stream)
3996 fprintf (loop_dump_stream, "Loop iterations: EQ comparison loop.\n");
3997 return 0;
3999 else if (GET_CODE (final_value) != CONST_INT)
4001 if (loop_dump_stream)
4003 fprintf (loop_dump_stream,
4004 "Loop iterations: Final value not constant ");
4005 print_simple_rtl (loop_dump_stream, final_value);
4006 fprintf (loop_dump_stream, ".\n");
4008 return 0;
4011 /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
4012 if (unsigned_p)
4013 final_larger
4014 = ((unsigned HOST_WIDE_INT) INTVAL (final_value)
4015 > (unsigned HOST_WIDE_INT) INTVAL (initial_value))
4016 - ((unsigned HOST_WIDE_INT) INTVAL (final_value)
4017 < (unsigned HOST_WIDE_INT) INTVAL (initial_value));
4018 else
4019 final_larger = (INTVAL (final_value) > INTVAL (initial_value))
4020 - (INTVAL (final_value) < INTVAL (initial_value));
4022 if (INTVAL (increment) > 0)
4023 increment_dir = 1;
4024 else if (INTVAL (increment) == 0)
4025 increment_dir = 0;
4026 else
4027 increment_dir = -1;
4029 /* There are 27 different cases: compare_dir = -1, 0, 1;
4030 final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
4031 There are 4 normal cases, 4 reverse cases (where the iteration variable
4032 will overflow before the loop exits), 4 infinite loop cases, and 15
4033 immediate exit (0 or 1 iteration depending on loop type) cases.
4034 Only try to optimize the normal cases. */
4036 /* (compare_dir/final_larger/increment_dir)
4037 Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
4038 Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
4039 Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
4040 Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
4042 /* ?? If the meaning of reverse loops (where the iteration variable
4043 will overflow before the loop exits) is undefined, then could
4044 eliminate all of these special checks, and just always assume
4045 the loops are normal/immediate/infinite. Note that this means
4046 the sign of increment_dir does not have to be known. Also,
4047 since it does not really hurt if immediate exit loops or infinite loops
4048 are optimized, then that case could be ignored also, and hence all
4049 loops can be optimized.
4051 According to ANSI Spec, the reverse loop case result is undefined,
4052 because the action on overflow is undefined.
4054 See also the special test for NE loops below. */
4056 if (final_larger == increment_dir && final_larger != 0
4057 && (final_larger == compare_dir || compare_dir == 0))
4058 /* Normal case. */
4060 else
4062 if (loop_dump_stream)
4063 fprintf (loop_dump_stream, "Loop iterations: Not normal loop.\n");
4064 return 0;
4067 /* Calculate the number of iterations, final_value is only an approximation,
4068 so correct for that. Note that abs_diff and n_iterations are
4069 unsigned, because they can be as large as 2^n - 1. */
4071 inc = INTVAL (increment);
4072 if (inc > 0)
4074 abs_diff = INTVAL (final_value) - INTVAL (initial_value);
4075 abs_inc = inc;
4077 else if (inc < 0)
4079 abs_diff = INTVAL (initial_value) - INTVAL (final_value);
4080 abs_inc = -inc;
4082 else
4083 abort ();
4085 /* Given that iteration_var is going to iterate over its own mode,
4086 not HOST_WIDE_INT, disregard higher bits that might have come
4087 into the picture due to sign extension of initial and final
4088 values. */
4089 abs_diff &= ((unsigned HOST_WIDE_INT) 1
4090 << (GET_MODE_BITSIZE (GET_MODE (iteration_var)) - 1)
4091 << 1) - 1;
4093 /* For NE tests, make sure that the iteration variable won't miss
4094 the final value. If abs_diff mod abs_incr is not zero, then the
4095 iteration variable will overflow before the loop exits, and we
4096 can not calculate the number of iterations. */
4097 if (compare_dir == 0 && (abs_diff % abs_inc) != 0)
4098 return 0;
4100 /* Note that the number of iterations could be calculated using
4101 (abs_diff + abs_inc - 1) / abs_inc, provided care was taken to
4102 handle potential overflow of the summation. */
4103 loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0);
4104 return loop_info->n_iterations;
4107 /* Replace uses of split bivs with their split pseudo register. This is
4108 for original instructions which remain after loop unrolling without
4109 copying. */
4111 static rtx
4112 remap_split_bivs (loop, x)
4113 struct loop *loop;
4114 rtx x;
4116 struct loop_ivs *ivs = LOOP_IVS (loop);
4117 enum rtx_code code;
4118 int i;
4119 const char *fmt;
4121 if (x == 0)
4122 return x;
4124 code = GET_CODE (x);
4125 switch (code)
4127 case SCRATCH:
4128 case PC:
4129 case CC0:
4130 case CONST_INT:
4131 case CONST_DOUBLE:
4132 case CONST:
4133 case SYMBOL_REF:
4134 case LABEL_REF:
4135 return x;
4137 case REG:
4138 #if 0
4139 /* If non-reduced/final-value givs were split, then this would also
4140 have to remap those givs also. */
4141 #endif
4142 if (REGNO (x) < ivs->n_regs
4143 && REG_IV_TYPE (ivs, REGNO (x)) == BASIC_INDUCT)
4144 return REG_IV_CLASS (ivs, REGNO (x))->biv->src_reg;
4145 break;
4147 default:
4148 break;
4151 fmt = GET_RTX_FORMAT (code);
4152 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4154 if (fmt[i] == 'e')
4155 XEXP (x, i) = remap_split_bivs (loop, XEXP (x, i));
4156 else if (fmt[i] == 'E')
4158 int j;
4159 for (j = 0; j < XVECLEN (x, i); j++)
4160 XVECEXP (x, i, j) = remap_split_bivs (loop, XVECEXP (x, i, j));
4163 return x;
4166 /* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g.
4167 FIST_UID is always executed if LAST_UID is), then return 1. Otherwise
4168 return 0. COPY_START is where we can start looking for the insns
4169 FIRST_UID and LAST_UID. COPY_END is where we stop looking for these
4170 insns.
4172 If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID
4173 must dominate LAST_UID.
4175 If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
4176 may not dominate LAST_UID.
4178 If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
4179 must dominate LAST_UID. */
4182 set_dominates_use (regno, first_uid, last_uid, copy_start, copy_end)
4183 int regno;
4184 int first_uid;
4185 int last_uid;
4186 rtx copy_start;
4187 rtx copy_end;
4189 int passed_jump = 0;
4190 rtx p = NEXT_INSN (copy_start);
4192 while (INSN_UID (p) != first_uid)
4194 if (GET_CODE (p) == JUMP_INSN)
4195 passed_jump = 1;
4196 /* Could not find FIRST_UID. */
4197 if (p == copy_end)
4198 return 0;
4199 p = NEXT_INSN (p);
4202 /* Verify that FIRST_UID is an insn that entirely sets REGNO. */
4203 if (! INSN_P (p) || ! dead_or_set_regno_p (p, regno))
4204 return 0;
4206 /* FIRST_UID is always executed. */
4207 if (passed_jump == 0)
4208 return 1;
4210 while (INSN_UID (p) != last_uid)
4212 /* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we
4213 can not be sure that FIRST_UID dominates LAST_UID. */
4214 if (GET_CODE (p) == CODE_LABEL)
4215 return 0;
4216 /* Could not find LAST_UID, but we reached the end of the loop, so
4217 it must be safe. */
4218 else if (p == copy_end)
4219 return 1;
4220 p = NEXT_INSN (p);
4223 /* FIRST_UID is always executed if LAST_UID is executed. */
4224 return 1;
4227 /* This routine is called when the number of iterations for the unrolled
4228 loop is one. The goal is to identify a loop that begins with an
4229 unconditional branch to the loop continuation note (or a label just after).
4230 In this case, the unconditional branch that starts the loop needs to be
4231 deleted so that we execute the single iteration. */
4233 static rtx
4234 ujump_to_loop_cont (loop_start, loop_cont)
4235 rtx loop_start;
4236 rtx loop_cont;
4238 rtx x, label, label_ref;
4240 /* See if loop start, or the next insn is an unconditional jump. */
4241 loop_start = next_nonnote_insn (loop_start);
4243 x = pc_set (loop_start);
4244 if (!x)
4245 return NULL_RTX;
4247 label_ref = SET_SRC (x);
4248 if (!label_ref)
4249 return NULL_RTX;
4251 /* Examine insn after loop continuation note. Return if not a label. */
4252 label = next_nonnote_insn (loop_cont);
4253 if (label == 0 || GET_CODE (label) != CODE_LABEL)
4254 return NULL_RTX;
4256 /* Return the loop start if the branch label matches the code label. */
4257 if (CODE_LABEL_NUMBER (label) == CODE_LABEL_NUMBER (XEXP (label_ref, 0)))
4258 return loop_start;
4259 else
4260 return NULL_RTX;