2002-05-02 David S. Miller <davem@redhat.com>
[official-gcc.git] / gcc / unroll.c
blob1047ebfb8f5776e86e58573fad3848ff949bd4c6
1 /* Try to unroll loops, and split induction variables.
2 Copyright (C) 1992, 1993, 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002
3 Free Software Foundation, Inc.
4 Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
23 /* Try to unroll a loop, and split induction variables.
25 Loops for which the number of iterations can be calculated exactly are
26 handled specially. If the number of iterations times the insn_count is
27 less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
28 Otherwise, we try to unroll the loop a number of times modulo the number
29 of iterations, so that only one exit test will be needed. It is unrolled
30 a number of times approximately equal to MAX_UNROLLED_INSNS divided by
31 the insn count.
33 Otherwise, if the number of iterations can be calculated exactly at
34 run time, and the loop is always entered at the top, then we try to
35 precondition the loop. That is, at run time, calculate how many times
36 the loop will execute, and then execute the loop body a few times so
37 that the remaining iterations will be some multiple of 4 (or 2 if the
38 loop is large). Then fall through to a loop unrolled 4 (or 2) times,
39 with only one exit test needed at the end of the loop.
41 Otherwise, if the number of iterations can not be calculated exactly,
42 not even at run time, then we still unroll the loop a number of times
43 approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
44 but there must be an exit test after each copy of the loop body.
46 For each induction variable, which is dead outside the loop (replaceable)
47 or for which we can easily calculate the final value, if we can easily
48 calculate its value at each place where it is set as a function of the
49 current loop unroll count and the variable's value at loop entry, then
50 the induction variable is split into `N' different variables, one for
51 each copy of the loop body. One variable is live across the backward
52 branch, and the others are all calculated as a function of this variable.
53 This helps eliminate data dependencies, and leads to further opportunities
54 for cse. */
56 /* Possible improvements follow: */
58 /* ??? Add an extra pass somewhere to determine whether unrolling will
59 give any benefit. E.g. after generating all unrolled insns, compute the
60 cost of all insns and compare against cost of insns in rolled loop.
62 - On traditional architectures, unrolling a non-constant bound loop
63 is a win if there is a giv whose only use is in memory addresses, the
64 memory addresses can be split, and hence giv increments can be
65 eliminated.
66 - It is also a win if the loop is executed many times, and preconditioning
67 can be performed for the loop.
68 Add code to check for these and similar cases. */
70 /* ??? Improve control of which loops get unrolled. Could use profiling
71 info to only unroll the most commonly executed loops. Perhaps have
72 a user specifyable option to control the amount of code expansion,
73 or the percent of loops to consider for unrolling. Etc. */
75 /* ??? Look at the register copies inside the loop to see if they form a
76 simple permutation. If so, iterate the permutation until it gets back to
77 the start state. This is how many times we should unroll the loop, for
78 best results, because then all register copies can be eliminated.
79 For example, the lisp nreverse function should be unrolled 3 times
80 while (this)
82 next = this->cdr;
83 this->cdr = prev;
84 prev = this;
85 this = next;
88 ??? The number of times to unroll the loop may also be based on data
89 references in the loop. For example, if we have a loop that references
90 x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */
92 /* ??? Add some simple linear equation solving capability so that we can
93 determine the number of loop iterations for more complex loops.
94 For example, consider this loop from gdb
95 #define SWAP_TARGET_AND_HOST(buffer,len)
97 char tmp;
98 char *p = (char *) buffer;
99 char *q = ((char *) buffer) + len - 1;
100 int iterations = (len + 1) >> 1;
101 int i;
102 for (p; p < q; p++, q--;)
104 tmp = *q;
105 *q = *p;
106 *p = tmp;
109 Note that:
110 start value = p = &buffer + current_iteration
111 end value = q = &buffer + len - 1 - current_iteration
112 Given the loop exit test of "p < q", then there must be "q - p" iterations,
113 set equal to zero and solve for number of iterations:
114 q - p = len - 1 - 2*current_iteration = 0
115 current_iteration = (len - 1) / 2
116 Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
117 iterations of this loop. */
119 /* ??? Currently, no labels are marked as loop invariant when doing loop
120 unrolling. This is because an insn inside the loop, that loads the address
121 of a label inside the loop into a register, could be moved outside the loop
122 by the invariant code motion pass if labels were invariant. If the loop
123 is subsequently unrolled, the code will be wrong because each unrolled
124 body of the loop will use the same address, whereas each actually needs a
125 different address. A case where this happens is when a loop containing
126 a switch statement is unrolled.
128 It would be better to let labels be considered invariant. When we
129 unroll loops here, check to see if any insns using a label local to the
130 loop were moved before the loop. If so, then correct the problem, by
131 moving the insn back into the loop, or perhaps replicate the insn before
132 the loop, one copy for each time the loop is unrolled. */
134 #include "config.h"
135 #include "system.h"
136 #include "rtl.h"
137 #include "tm_p.h"
138 #include "insn-config.h"
139 #include "integrate.h"
140 #include "regs.h"
141 #include "recog.h"
142 #include "flags.h"
143 #include "function.h"
144 #include "expr.h"
145 #include "loop.h"
146 #include "toplev.h"
147 #include "hard-reg-set.h"
148 #include "basic-block.h"
149 #include "predict.h"
150 #include "params.h"
152 /* The prime factors looked for when trying to unroll a loop by some
153 number which is modulo the total number of iterations. Just checking
154 for these 4 prime factors will find at least one factor for 75% of
155 all numbers theoretically. Practically speaking, this will succeed
156 almost all of the time since loops are generally a multiple of 2
157 and/or 5. */
159 #define NUM_FACTORS 4
161 static struct _factor { const int factor; int count; }
162 factors[NUM_FACTORS] = { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
164 /* Describes the different types of loop unrolling performed. */
166 enum unroll_types
168 UNROLL_COMPLETELY,
169 UNROLL_MODULO,
170 UNROLL_NAIVE
173 /* Indexed by register number, if non-zero, then it contains a pointer
174 to a struct induction for a DEST_REG giv which has been combined with
175 one of more address givs. This is needed because whenever such a DEST_REG
176 giv is modified, we must modify the value of all split address givs
177 that were combined with this DEST_REG giv. */
179 static struct induction **addr_combined_regs;
181 /* Indexed by register number, if this is a splittable induction variable,
182 then this will hold the current value of the register, which depends on the
183 iteration number. */
185 static rtx *splittable_regs;
187 /* Indexed by register number, if this is a splittable induction variable,
188 then this will hold the number of instructions in the loop that modify
189 the induction variable. Used to ensure that only the last insn modifying
190 a split iv will update the original iv of the dest. */
192 static int *splittable_regs_updates;
194 /* Forward declarations. */
196 static void init_reg_map PARAMS ((struct inline_remap *, int));
197 static rtx calculate_giv_inc PARAMS ((rtx, rtx, unsigned int));
198 static rtx initial_reg_note_copy PARAMS ((rtx, struct inline_remap *));
199 static void final_reg_note_copy PARAMS ((rtx *, struct inline_remap *));
200 static void copy_loop_body PARAMS ((struct loop *, rtx, rtx,
201 struct inline_remap *, rtx, int,
202 enum unroll_types, rtx, rtx, rtx, rtx));
203 static int find_splittable_regs PARAMS ((const struct loop *,
204 enum unroll_types, int));
205 static int find_splittable_givs PARAMS ((const struct loop *,
206 struct iv_class *, enum unroll_types,
207 rtx, int));
208 static int reg_dead_after_loop PARAMS ((const struct loop *, rtx));
209 static rtx fold_rtx_mult_add PARAMS ((rtx, rtx, rtx, enum machine_mode));
210 static int verify_addresses PARAMS ((struct induction *, rtx, int));
211 static rtx remap_split_bivs PARAMS ((struct loop *, rtx));
212 static rtx find_common_reg_term PARAMS ((rtx, rtx));
213 static rtx subtract_reg_term PARAMS ((rtx, rtx));
214 static rtx loop_find_equiv_value PARAMS ((const struct loop *, rtx));
215 static rtx ujump_to_loop_cont PARAMS ((rtx, rtx));
217 /* Try to unroll one loop and split induction variables in the loop.
219 The loop is described by the arguments LOOP and INSN_COUNT.
220 STRENGTH_REDUCTION_P indicates whether information generated in the
221 strength reduction pass is available.
223 This function is intended to be called from within `strength_reduce'
224 in loop.c. */
226 void
227 unroll_loop (loop, insn_count, strength_reduce_p)
228 struct loop *loop;
229 int insn_count;
230 int strength_reduce_p;
232 struct loop_info *loop_info = LOOP_INFO (loop);
233 struct loop_ivs *ivs = LOOP_IVS (loop);
234 int i, j;
235 unsigned int r;
236 unsigned HOST_WIDE_INT temp;
237 int unroll_number = 1;
238 rtx copy_start, copy_end;
239 rtx insn, sequence, pattern, tem;
240 int max_labelno, max_insnno;
241 rtx insert_before;
242 struct inline_remap *map;
243 char *local_label = NULL;
244 char *local_regno;
245 unsigned int max_local_regnum;
246 unsigned int maxregnum;
247 rtx exit_label = 0;
248 rtx start_label;
249 struct iv_class *bl;
250 int splitting_not_safe = 0;
251 enum unroll_types unroll_type = UNROLL_NAIVE;
252 int loop_preconditioned = 0;
253 rtx safety_label;
254 /* This points to the last real insn in the loop, which should be either
255 a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
256 jumps). */
257 rtx last_loop_insn;
258 rtx loop_start = loop->start;
259 rtx loop_end = loop->end;
261 /* Don't bother unrolling huge loops. Since the minimum factor is
262 two, loops greater than one half of MAX_UNROLLED_INSNS will never
263 be unrolled. */
264 if (insn_count > MAX_UNROLLED_INSNS / 2)
266 if (loop_dump_stream)
267 fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
268 return;
271 /* When emitting debugger info, we can't unroll loops with unequal numbers
272 of block_beg and block_end notes, because that would unbalance the block
273 structure of the function. This can happen as a result of the
274 "if (foo) bar; else break;" optimization in jump.c. */
275 /* ??? Gcc has a general policy that -g is never supposed to change the code
276 that the compiler emits, so we must disable this optimization always,
277 even if debug info is not being output. This is rare, so this should
278 not be a significant performance problem. */
280 if (1 /* write_symbols != NO_DEBUG */)
282 int block_begins = 0;
283 int block_ends = 0;
285 for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn))
287 if (GET_CODE (insn) == NOTE)
289 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
290 block_begins++;
291 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
292 block_ends++;
293 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
294 || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END)
296 /* Note, would be nice to add code to unroll EH
297 regions, but until that time, we punt (don't
298 unroll). For the proper way of doing it, see
299 expand_inline_function. */
301 if (loop_dump_stream)
302 fprintf (loop_dump_stream,
303 "Unrolling failure: cannot unroll EH regions.\n");
304 return;
309 if (block_begins != block_ends)
311 if (loop_dump_stream)
312 fprintf (loop_dump_stream,
313 "Unrolling failure: Unbalanced block notes.\n");
314 return;
318 /* Determine type of unroll to perform. Depends on the number of iterations
319 and the size of the loop. */
321 /* If there is no strength reduce info, then set
322 loop_info->n_iterations to zero. This can happen if
323 strength_reduce can't find any bivs in the loop. A value of zero
324 indicates that the number of iterations could not be calculated. */
326 if (! strength_reduce_p)
327 loop_info->n_iterations = 0;
329 if (loop_dump_stream && loop_info->n_iterations > 0)
331 fputs ("Loop unrolling: ", loop_dump_stream);
332 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
333 loop_info->n_iterations);
334 fputs (" iterations.\n", loop_dump_stream);
337 /* Find and save a pointer to the last nonnote insn in the loop. */
339 last_loop_insn = prev_nonnote_insn (loop_end);
341 /* Calculate how many times to unroll the loop. Indicate whether or
342 not the loop is being completely unrolled. */
344 if (loop_info->n_iterations == 1)
346 /* Handle the case where the loop begins with an unconditional
347 jump to the loop condition. Make sure to delete the jump
348 insn, otherwise the loop body will never execute. */
350 rtx ujump = ujump_to_loop_cont (loop->start, loop->cont);
351 if (ujump)
352 delete_related_insns (ujump);
354 /* If number of iterations is exactly 1, then eliminate the compare and
355 branch at the end of the loop since they will never be taken.
356 Then return, since no other action is needed here. */
358 /* If the last instruction is not a BARRIER or a JUMP_INSN, then
359 don't do anything. */
361 if (GET_CODE (last_loop_insn) == BARRIER)
363 /* Delete the jump insn. This will delete the barrier also. */
364 delete_related_insns (PREV_INSN (last_loop_insn));
366 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
368 #ifdef HAVE_cc0
369 rtx prev = PREV_INSN (last_loop_insn);
370 #endif
371 delete_related_insns (last_loop_insn);
372 #ifdef HAVE_cc0
373 /* The immediately preceding insn may be a compare which must be
374 deleted. */
375 if (only_sets_cc0_p (prev))
376 delete_related_insns (prev);
377 #endif
380 /* Remove the loop notes since this is no longer a loop. */
381 if (loop->vtop)
382 delete_related_insns (loop->vtop);
383 if (loop->cont)
384 delete_related_insns (loop->cont);
385 if (loop_start)
386 delete_related_insns (loop_start);
387 if (loop_end)
388 delete_related_insns (loop_end);
390 return;
392 else if (loop_info->n_iterations > 0
393 /* Avoid overflow in the next expression. */
394 && loop_info->n_iterations < MAX_UNROLLED_INSNS
395 && loop_info->n_iterations * insn_count < MAX_UNROLLED_INSNS)
397 unroll_number = loop_info->n_iterations;
398 unroll_type = UNROLL_COMPLETELY;
400 else if (loop_info->n_iterations > 0)
402 /* Try to factor the number of iterations. Don't bother with the
403 general case, only using 2, 3, 5, and 7 will get 75% of all
404 numbers theoretically, and almost all in practice. */
406 for (i = 0; i < NUM_FACTORS; i++)
407 factors[i].count = 0;
409 temp = loop_info->n_iterations;
410 for (i = NUM_FACTORS - 1; i >= 0; i--)
411 while (temp % factors[i].factor == 0)
413 factors[i].count++;
414 temp = temp / factors[i].factor;
417 /* Start with the larger factors first so that we generally
418 get lots of unrolling. */
420 unroll_number = 1;
421 temp = insn_count;
422 for (i = 3; i >= 0; i--)
423 while (factors[i].count--)
425 if (temp * factors[i].factor < MAX_UNROLLED_INSNS)
427 unroll_number *= factors[i].factor;
428 temp *= factors[i].factor;
430 else
431 break;
434 /* If we couldn't find any factors, then unroll as in the normal
435 case. */
436 if (unroll_number == 1)
438 if (loop_dump_stream)
439 fprintf (loop_dump_stream, "Loop unrolling: No factors found.\n");
441 else
442 unroll_type = UNROLL_MODULO;
445 /* Default case, calculate number of times to unroll loop based on its
446 size. */
447 if (unroll_type == UNROLL_NAIVE)
449 if (8 * insn_count < MAX_UNROLLED_INSNS)
450 unroll_number = 8;
451 else if (4 * insn_count < MAX_UNROLLED_INSNS)
452 unroll_number = 4;
453 else
454 unroll_number = 2;
457 /* Now we know how many times to unroll the loop. */
459 if (loop_dump_stream)
460 fprintf (loop_dump_stream, "Unrolling loop %d times.\n", unroll_number);
462 if (unroll_type == UNROLL_COMPLETELY || unroll_type == UNROLL_MODULO)
464 /* Loops of these types can start with jump down to the exit condition
465 in rare circumstances.
467 Consider a pair of nested loops where the inner loop is part
468 of the exit code for the outer loop.
470 In this case jump.c will not duplicate the exit test for the outer
471 loop, so it will start with a jump to the exit code.
473 Then consider if the inner loop turns out to iterate once and
474 only once. We will end up deleting the jumps associated with
475 the inner loop. However, the loop notes are not removed from
476 the instruction stream.
478 And finally assume that we can compute the number of iterations
479 for the outer loop.
481 In this case unroll may want to unroll the outer loop even though
482 it starts with a jump to the outer loop's exit code.
484 We could try to optimize this case, but it hardly seems worth it.
485 Just return without unrolling the loop in such cases. */
487 insn = loop_start;
488 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
489 insn = NEXT_INSN (insn);
490 if (GET_CODE (insn) == JUMP_INSN)
491 return;
494 if (unroll_type == UNROLL_COMPLETELY)
496 /* Completely unrolling the loop: Delete the compare and branch at
497 the end (the last two instructions). This delete must done at the
498 very end of loop unrolling, to avoid problems with calls to
499 back_branch_in_range_p, which is called by find_splittable_regs.
500 All increments of splittable bivs/givs are changed to load constant
501 instructions. */
503 copy_start = loop_start;
505 /* Set insert_before to the instruction immediately after the JUMP_INSN
506 (or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
507 the loop will be correctly handled by copy_loop_body. */
508 insert_before = NEXT_INSN (last_loop_insn);
510 /* Set copy_end to the insn before the jump at the end of the loop. */
511 if (GET_CODE (last_loop_insn) == BARRIER)
512 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
513 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
515 copy_end = PREV_INSN (last_loop_insn);
516 #ifdef HAVE_cc0
517 /* The instruction immediately before the JUMP_INSN may be a compare
518 instruction which we do not want to copy. */
519 if (sets_cc0_p (PREV_INSN (copy_end)))
520 copy_end = PREV_INSN (copy_end);
521 #endif
523 else
525 /* We currently can't unroll a loop if it doesn't end with a
526 JUMP_INSN. There would need to be a mechanism that recognizes
527 this case, and then inserts a jump after each loop body, which
528 jumps to after the last loop body. */
529 if (loop_dump_stream)
530 fprintf (loop_dump_stream,
531 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
532 return;
535 else if (unroll_type == UNROLL_MODULO)
537 /* Partially unrolling the loop: The compare and branch at the end
538 (the last two instructions) must remain. Don't copy the compare
539 and branch instructions at the end of the loop. Insert the unrolled
540 code immediately before the compare/branch at the end so that the
541 code will fall through to them as before. */
543 copy_start = loop_start;
545 /* Set insert_before to the jump insn at the end of the loop.
546 Set copy_end to before the jump insn at the end of the loop. */
547 if (GET_CODE (last_loop_insn) == BARRIER)
549 insert_before = PREV_INSN (last_loop_insn);
550 copy_end = PREV_INSN (insert_before);
552 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
554 insert_before = last_loop_insn;
555 #ifdef HAVE_cc0
556 /* The instruction immediately before the JUMP_INSN may be a compare
557 instruction which we do not want to copy or delete. */
558 if (sets_cc0_p (PREV_INSN (insert_before)))
559 insert_before = PREV_INSN (insert_before);
560 #endif
561 copy_end = PREV_INSN (insert_before);
563 else
565 /* We currently can't unroll a loop if it doesn't end with a
566 JUMP_INSN. There would need to be a mechanism that recognizes
567 this case, and then inserts a jump after each loop body, which
568 jumps to after the last loop body. */
569 if (loop_dump_stream)
570 fprintf (loop_dump_stream,
571 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
572 return;
575 else
577 /* Normal case: Must copy the compare and branch instructions at the
578 end of the loop. */
580 if (GET_CODE (last_loop_insn) == BARRIER)
582 /* Loop ends with an unconditional jump and a barrier.
583 Handle this like above, don't copy jump and barrier.
584 This is not strictly necessary, but doing so prevents generating
585 unconditional jumps to an immediately following label.
587 This will be corrected below if the target of this jump is
588 not the start_label. */
590 insert_before = PREV_INSN (last_loop_insn);
591 copy_end = PREV_INSN (insert_before);
593 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
595 /* Set insert_before to immediately after the JUMP_INSN, so that
596 NOTEs at the end of the loop will be correctly handled by
597 copy_loop_body. */
598 insert_before = NEXT_INSN (last_loop_insn);
599 copy_end = last_loop_insn;
601 else
603 /* We currently can't unroll a loop if it doesn't end with a
604 JUMP_INSN. There would need to be a mechanism that recognizes
605 this case, and then inserts a jump after each loop body, which
606 jumps to after the last loop body. */
607 if (loop_dump_stream)
608 fprintf (loop_dump_stream,
609 "Unrolling failure: loop does not end with a JUMP_INSN.\n");
610 return;
613 /* If copying exit test branches because they can not be eliminated,
614 then must convert the fall through case of the branch to a jump past
615 the end of the loop. Create a label to emit after the loop and save
616 it for later use. Do not use the label after the loop, if any, since
617 it might be used by insns outside the loop, or there might be insns
618 added before it later by final_[bg]iv_value which must be after
619 the real exit label. */
620 exit_label = gen_label_rtx ();
622 insn = loop_start;
623 while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
624 insn = NEXT_INSN (insn);
626 if (GET_CODE (insn) == JUMP_INSN)
628 /* The loop starts with a jump down to the exit condition test.
629 Start copying the loop after the barrier following this
630 jump insn. */
631 copy_start = NEXT_INSN (insn);
633 /* Splitting induction variables doesn't work when the loop is
634 entered via a jump to the bottom, because then we end up doing
635 a comparison against a new register for a split variable, but
636 we did not execute the set insn for the new register because
637 it was skipped over. */
638 splitting_not_safe = 1;
639 if (loop_dump_stream)
640 fprintf (loop_dump_stream,
641 "Splitting not safe, because loop not entered at top.\n");
643 else
644 copy_start = loop_start;
647 /* This should always be the first label in the loop. */
648 start_label = NEXT_INSN (copy_start);
649 /* There may be a line number note and/or a loop continue note here. */
650 while (GET_CODE (start_label) == NOTE)
651 start_label = NEXT_INSN (start_label);
652 if (GET_CODE (start_label) != CODE_LABEL)
654 /* This can happen as a result of jump threading. If the first insns in
655 the loop test the same condition as the loop's backward jump, or the
656 opposite condition, then the backward jump will be modified to point
657 to elsewhere, and the loop's start label is deleted.
659 This case currently can not be handled by the loop unrolling code. */
661 if (loop_dump_stream)
662 fprintf (loop_dump_stream,
663 "Unrolling failure: unknown insns between BEG note and loop label.\n");
664 return;
666 if (LABEL_NAME (start_label))
668 /* The jump optimization pass must have combined the original start label
669 with a named label for a goto. We can't unroll this case because
670 jumps which go to the named label must be handled differently than
671 jumps to the loop start, and it is impossible to differentiate them
672 in this case. */
673 if (loop_dump_stream)
674 fprintf (loop_dump_stream,
675 "Unrolling failure: loop start label is gone\n");
676 return;
679 if (unroll_type == UNROLL_NAIVE
680 && GET_CODE (last_loop_insn) == BARRIER
681 && GET_CODE (PREV_INSN (last_loop_insn)) == JUMP_INSN
682 && start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
684 /* In this case, we must copy the jump and barrier, because they will
685 not be converted to jumps to an immediately following label. */
687 insert_before = NEXT_INSN (last_loop_insn);
688 copy_end = last_loop_insn;
691 if (unroll_type == UNROLL_NAIVE
692 && GET_CODE (last_loop_insn) == JUMP_INSN
693 && start_label != JUMP_LABEL (last_loop_insn))
695 /* ??? The loop ends with a conditional branch that does not branch back
696 to the loop start label. In this case, we must emit an unconditional
697 branch to the loop exit after emitting the final branch.
698 copy_loop_body does not have support for this currently, so we
699 give up. It doesn't seem worthwhile to unroll anyways since
700 unrolling would increase the number of branch instructions
701 executed. */
702 if (loop_dump_stream)
703 fprintf (loop_dump_stream,
704 "Unrolling failure: final conditional branch not to loop start\n");
705 return;
708 /* Allocate a translation table for the labels and insn numbers.
709 They will be filled in as we copy the insns in the loop. */
711 max_labelno = max_label_num ();
712 max_insnno = get_max_uid ();
714 /* Various paths through the unroll code may reach the "egress" label
715 without initializing fields within the map structure.
717 To be safe, we use xcalloc to zero the memory. */
718 map = (struct inline_remap *) xcalloc (1, sizeof (struct inline_remap));
720 /* Allocate the label map. */
722 if (max_labelno > 0)
724 map->label_map = (rtx *) xcalloc (max_labelno, sizeof (rtx));
725 local_label = (char *) xcalloc (max_labelno, sizeof (char));
728 /* Search the loop and mark all local labels, i.e. the ones which have to
729 be distinct labels when copied. For all labels which might be
730 non-local, set their label_map entries to point to themselves.
731 If they happen to be local their label_map entries will be overwritten
732 before the loop body is copied. The label_map entries for local labels
733 will be set to a different value each time the loop body is copied. */
735 for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
737 rtx note;
739 if (GET_CODE (insn) == CODE_LABEL)
740 local_label[CODE_LABEL_NUMBER (insn)] = 1;
741 else if (GET_CODE (insn) == JUMP_INSN)
743 if (JUMP_LABEL (insn))
744 set_label_in_map (map,
745 CODE_LABEL_NUMBER (JUMP_LABEL (insn)),
746 JUMP_LABEL (insn));
747 else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
748 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
750 rtx pat = PATTERN (insn);
751 int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
752 int len = XVECLEN (pat, diff_vec_p);
753 rtx label;
755 for (i = 0; i < len; i++)
757 label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
758 set_label_in_map (map, CODE_LABEL_NUMBER (label), label);
762 if ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)))
763 set_label_in_map (map, CODE_LABEL_NUMBER (XEXP (note, 0)),
764 XEXP (note, 0));
767 /* Allocate space for the insn map. */
769 map->insn_map = (rtx *) xmalloc (max_insnno * sizeof (rtx));
771 /* Set this to zero, to indicate that we are doing loop unrolling,
772 not function inlining. */
773 map->inline_target = 0;
775 /* The register and constant maps depend on the number of registers
776 present, so the final maps can't be created until after
777 find_splittable_regs is called. However, they are needed for
778 preconditioning, so we create temporary maps when preconditioning
779 is performed. */
781 /* The preconditioning code may allocate two new pseudo registers. */
782 maxregnum = max_reg_num ();
784 /* local_regno is only valid for regnos < max_local_regnum. */
785 max_local_regnum = maxregnum;
787 /* Allocate and zero out the splittable_regs and addr_combined_regs
788 arrays. These must be zeroed here because they will be used if
789 loop preconditioning is performed, and must be zero for that case.
791 It is safe to do this here, since the extra registers created by the
792 preconditioning code and find_splittable_regs will never be used
793 to access the splittable_regs[] and addr_combined_regs[] arrays. */
795 splittable_regs = (rtx *) xcalloc (maxregnum, sizeof (rtx));
796 splittable_regs_updates = (int *) xcalloc (maxregnum, sizeof (int));
797 addr_combined_regs
798 = (struct induction **) xcalloc (maxregnum, sizeof (struct induction *));
799 local_regno = (char *) xcalloc (maxregnum, sizeof (char));
801 /* Mark all local registers, i.e. the ones which are referenced only
802 inside the loop. */
803 if (INSN_UID (copy_end) < max_uid_for_loop)
805 int copy_start_luid = INSN_LUID (copy_start);
806 int copy_end_luid = INSN_LUID (copy_end);
808 /* If a register is used in the jump insn, we must not duplicate it
809 since it will also be used outside the loop. */
810 if (GET_CODE (copy_end) == JUMP_INSN)
811 copy_end_luid--;
813 /* If we have a target that uses cc0, then we also must not duplicate
814 the insn that sets cc0 before the jump insn, if one is present. */
815 #ifdef HAVE_cc0
816 if (GET_CODE (copy_end) == JUMP_INSN
817 && sets_cc0_p (PREV_INSN (copy_end)))
818 copy_end_luid--;
819 #endif
821 /* If copy_start points to the NOTE that starts the loop, then we must
822 use the next luid, because invariant pseudo-regs moved out of the loop
823 have their lifetimes modified to start here, but they are not safe
824 to duplicate. */
825 if (copy_start == loop_start)
826 copy_start_luid++;
828 /* If a pseudo's lifetime is entirely contained within this loop, then we
829 can use a different pseudo in each unrolled copy of the loop. This
830 results in better code. */
831 /* We must limit the generic test to max_reg_before_loop, because only
832 these pseudo registers have valid regno_first_uid info. */
833 for (r = FIRST_PSEUDO_REGISTER; r < max_reg_before_loop; ++r)
834 if (REGNO_FIRST_UID (r) > 0 && REGNO_FIRST_UID (r) <= max_uid_for_loop
835 && REGNO_FIRST_LUID (r) >= copy_start_luid
836 && REGNO_LAST_UID (r) > 0 && REGNO_LAST_UID (r) <= max_uid_for_loop
837 && REGNO_LAST_LUID (r) <= copy_end_luid)
839 /* However, we must also check for loop-carried dependencies.
840 If the value the pseudo has at the end of iteration X is
841 used by iteration X+1, then we can not use a different pseudo
842 for each unrolled copy of the loop. */
843 /* A pseudo is safe if regno_first_uid is a set, and this
844 set dominates all instructions from regno_first_uid to
845 regno_last_uid. */
846 /* ??? This check is simplistic. We would get better code if
847 this check was more sophisticated. */
848 if (set_dominates_use (r, REGNO_FIRST_UID (r), REGNO_LAST_UID (r),
849 copy_start, copy_end))
850 local_regno[r] = 1;
852 if (loop_dump_stream)
854 if (local_regno[r])
855 fprintf (loop_dump_stream, "Marked reg %d as local\n", r);
856 else
857 fprintf (loop_dump_stream, "Did not mark reg %d as local\n",
863 /* If this loop requires exit tests when unrolled, check to see if we
864 can precondition the loop so as to make the exit tests unnecessary.
865 Just like variable splitting, this is not safe if the loop is entered
866 via a jump to the bottom. Also, can not do this if no strength
867 reduce info, because precondition_loop_p uses this info. */
869 /* Must copy the loop body for preconditioning before the following
870 find_splittable_regs call since that will emit insns which need to
871 be after the preconditioned loop copies, but immediately before the
872 unrolled loop copies. */
874 /* Also, it is not safe to split induction variables for the preconditioned
875 copies of the loop body. If we split induction variables, then the code
876 assumes that each induction variable can be represented as a function
877 of its initial value and the loop iteration number. This is not true
878 in this case, because the last preconditioned copy of the loop body
879 could be any iteration from the first up to the `unroll_number-1'th,
880 depending on the initial value of the iteration variable. Therefore
881 we can not split induction variables here, because we can not calculate
882 their value. Hence, this code must occur before find_splittable_regs
883 is called. */
885 if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
887 rtx initial_value, final_value, increment;
888 enum machine_mode mode;
890 if (precondition_loop_p (loop,
891 &initial_value, &final_value, &increment,
892 &mode))
894 rtx diff;
895 rtx *labels;
896 int abs_inc, neg_inc;
897 enum rtx_code cc = loop_info->comparison_code;
898 int less_p = (cc == LE || cc == LEU || cc == LT || cc == LTU);
899 int unsigned_p = (cc == LEU || cc == GEU || cc == LTU || cc == GTU);
901 map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx));
903 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray, maxregnum,
904 "unroll_loop_precondition");
905 global_const_equiv_varray = map->const_equiv_varray;
907 init_reg_map (map, maxregnum);
909 /* Limit loop unrolling to 4, since this will make 7 copies of
910 the loop body. */
911 if (unroll_number > 4)
912 unroll_number = 4;
914 /* Save the absolute value of the increment, and also whether or
915 not it is negative. */
916 neg_inc = 0;
917 abs_inc = INTVAL (increment);
918 if (abs_inc < 0)
920 abs_inc = -abs_inc;
921 neg_inc = 1;
924 start_sequence ();
926 /* Final value may have form of (PLUS val1 const1_rtx). We need
927 to convert it into general operand, so compute the real value. */
929 if (GET_CODE (final_value) == PLUS)
931 final_value = expand_simple_binop (mode, PLUS,
932 copy_rtx (XEXP (final_value, 0)),
933 copy_rtx (XEXP (final_value, 1)),
934 NULL_RTX, 0, OPTAB_LIB_WIDEN);
936 if (!nonmemory_operand (final_value, VOIDmode))
937 final_value = force_reg (mode, copy_rtx (final_value));
939 /* Calculate the difference between the final and initial values.
940 Final value may be a (plus (reg x) (const_int 1)) rtx.
941 Let the following cse pass simplify this if initial value is
942 a constant.
944 We must copy the final and initial values here to avoid
945 improperly shared rtl.
947 We have to deal with for (i = 0; --i < 6;) type loops.
948 For such loops the real final value is the first time the
949 loop variable overflows, so the diff we calculate is the
950 distance from the overflow value. This is 0 or ~0 for
951 unsigned loops depending on the direction, or INT_MAX,
952 INT_MAX+1 for signed loops. We really do not need the
953 exact value, since we are only interested in the diff
954 modulo the increment, and the increment is a power of 2,
955 so we can pretend that the overflow value is 0/~0. */
957 if (cc == NE || less_p != neg_inc)
958 diff = expand_simple_binop (mode, MINUS, final_value,
959 copy_rtx (initial_value), NULL_RTX, 0,
960 OPTAB_LIB_WIDEN);
961 else
962 diff = expand_simple_unop (mode, neg_inc ? NOT : NEG,
963 copy_rtx (initial_value), NULL_RTX, 0);
965 /* Now calculate (diff % (unroll * abs (increment))) by using an
966 and instruction. */
967 diff = expand_simple_binop (GET_MODE (diff), AND, diff,
968 GEN_INT (unroll_number * abs_inc - 1),
969 NULL_RTX, 0, OPTAB_LIB_WIDEN);
971 /* Now emit a sequence of branches to jump to the proper precond
972 loop entry point. */
974 labels = (rtx *) xmalloc (sizeof (rtx) * unroll_number);
975 for (i = 0; i < unroll_number; i++)
976 labels[i] = gen_label_rtx ();
978 /* Check for the case where the initial value is greater than or
979 equal to the final value. In that case, we want to execute
980 exactly one loop iteration. The code below will fail for this
981 case. This check does not apply if the loop has a NE
982 comparison at the end. */
984 if (cc != NE)
986 rtx incremented_initval;
987 incremented_initval = expand_simple_binop (mode, PLUS,
988 initial_value,
989 increment,
990 NULL_RTX, 0,
991 OPTAB_LIB_WIDEN);
992 emit_cmp_and_jump_insns (incremented_initval, final_value,
993 less_p ? GE : LE, NULL_RTX,
994 mode, unsigned_p, labels[1]);
995 predict_insn_def (get_last_insn (), PRED_LOOP_CONDITION,
996 TAKEN);
997 JUMP_LABEL (get_last_insn ()) = labels[1];
998 LABEL_NUSES (labels[1])++;
1001 /* Assuming the unroll_number is 4, and the increment is 2, then
1002 for a negative increment: for a positive increment:
1003 diff = 0,1 precond 0 diff = 0,7 precond 0
1004 diff = 2,3 precond 3 diff = 1,2 precond 1
1005 diff = 4,5 precond 2 diff = 3,4 precond 2
1006 diff = 6,7 precond 1 diff = 5,6 precond 3 */
1008 /* We only need to emit (unroll_number - 1) branches here, the
1009 last case just falls through to the following code. */
1011 /* ??? This would give better code if we emitted a tree of branches
1012 instead of the current linear list of branches. */
1014 for (i = 0; i < unroll_number - 1; i++)
1016 int cmp_const;
1017 enum rtx_code cmp_code;
1019 /* For negative increments, must invert the constant compared
1020 against, except when comparing against zero. */
1021 if (i == 0)
1023 cmp_const = 0;
1024 cmp_code = EQ;
1026 else if (neg_inc)
1028 cmp_const = unroll_number - i;
1029 cmp_code = GE;
1031 else
1033 cmp_const = i;
1034 cmp_code = LE;
1037 emit_cmp_and_jump_insns (diff, GEN_INT (abs_inc * cmp_const),
1038 cmp_code, NULL_RTX, mode, 0, labels[i]);
1039 JUMP_LABEL (get_last_insn ()) = labels[i];
1040 LABEL_NUSES (labels[i])++;
1041 predict_insn (get_last_insn (), PRED_LOOP_PRECONDITIONING,
1042 REG_BR_PROB_BASE / (unroll_number - i));
1045 /* If the increment is greater than one, then we need another branch,
1046 to handle other cases equivalent to 0. */
1048 /* ??? This should be merged into the code above somehow to help
1049 simplify the code here, and reduce the number of branches emitted.
1050 For the negative increment case, the branch here could easily
1051 be merged with the `0' case branch above. For the positive
1052 increment case, it is not clear how this can be simplified. */
1054 if (abs_inc != 1)
1056 int cmp_const;
1057 enum rtx_code cmp_code;
1059 if (neg_inc)
1061 cmp_const = abs_inc - 1;
1062 cmp_code = LE;
1064 else
1066 cmp_const = abs_inc * (unroll_number - 1) + 1;
1067 cmp_code = GE;
1070 emit_cmp_and_jump_insns (diff, GEN_INT (cmp_const), cmp_code,
1071 NULL_RTX, mode, 0, labels[0]);
1072 JUMP_LABEL (get_last_insn ()) = labels[0];
1073 LABEL_NUSES (labels[0])++;
1076 sequence = gen_sequence ();
1077 end_sequence ();
1078 loop_insn_hoist (loop, sequence);
1080 /* Only the last copy of the loop body here needs the exit
1081 test, so set copy_end to exclude the compare/branch here,
1082 and then reset it inside the loop when get to the last
1083 copy. */
1085 if (GET_CODE (last_loop_insn) == BARRIER)
1086 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1087 else if (GET_CODE (last_loop_insn) == JUMP_INSN)
1089 copy_end = PREV_INSN (last_loop_insn);
1090 #ifdef HAVE_cc0
1091 /* The immediately preceding insn may be a compare which
1092 we do not want to copy. */
1093 if (sets_cc0_p (PREV_INSN (copy_end)))
1094 copy_end = PREV_INSN (copy_end);
1095 #endif
1097 else
1098 abort ();
1100 for (i = 1; i < unroll_number; i++)
1102 emit_label_after (labels[unroll_number - i],
1103 PREV_INSN (loop_start));
1105 memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx));
1106 memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0),
1107 0, (VARRAY_SIZE (map->const_equiv_varray)
1108 * sizeof (struct const_equiv_data)));
1109 map->const_age = 0;
1111 for (j = 0; j < max_labelno; j++)
1112 if (local_label[j])
1113 set_label_in_map (map, j, gen_label_rtx ());
1115 for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++)
1116 if (local_regno[r])
1118 map->reg_map[r]
1119 = gen_reg_rtx (GET_MODE (regno_reg_rtx[r]));
1120 record_base_value (REGNO (map->reg_map[r]),
1121 regno_reg_rtx[r], 0);
1123 /* The last copy needs the compare/branch insns at the end,
1124 so reset copy_end here if the loop ends with a conditional
1125 branch. */
1127 if (i == unroll_number - 1)
1129 if (GET_CODE (last_loop_insn) == BARRIER)
1130 copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1131 else
1132 copy_end = last_loop_insn;
1135 /* None of the copies are the `last_iteration', so just
1136 pass zero for that parameter. */
1137 copy_loop_body (loop, copy_start, copy_end, map, exit_label, 0,
1138 unroll_type, start_label, loop_end,
1139 loop_start, copy_end);
1141 emit_label_after (labels[0], PREV_INSN (loop_start));
1143 if (GET_CODE (last_loop_insn) == BARRIER)
1145 insert_before = PREV_INSN (last_loop_insn);
1146 copy_end = PREV_INSN (insert_before);
1148 else
1150 insert_before = last_loop_insn;
1151 #ifdef HAVE_cc0
1152 /* The instruction immediately before the JUMP_INSN may
1153 be a compare instruction which we do not want to copy
1154 or delete. */
1155 if (sets_cc0_p (PREV_INSN (insert_before)))
1156 insert_before = PREV_INSN (insert_before);
1157 #endif
1158 copy_end = PREV_INSN (insert_before);
1161 /* Set unroll type to MODULO now. */
1162 unroll_type = UNROLL_MODULO;
1163 loop_preconditioned = 1;
1165 /* Clean up. */
1166 free (labels);
1170 /* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
1171 the loop unless all loops are being unrolled. */
1172 if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
1174 if (loop_dump_stream)
1175 fprintf (loop_dump_stream,
1176 "Unrolling failure: Naive unrolling not being done.\n");
1177 goto egress;
1180 /* At this point, we are guaranteed to unroll the loop. */
1182 /* Keep track of the unroll factor for the loop. */
1183 loop_info->unroll_number = unroll_number;
1185 /* For each biv and giv, determine whether it can be safely split into
1186 a different variable for each unrolled copy of the loop body.
1187 We precalculate and save this info here, since computing it is
1188 expensive.
1190 Do this before deleting any instructions from the loop, so that
1191 back_branch_in_range_p will work correctly. */
1193 if (splitting_not_safe)
1194 temp = 0;
1195 else
1196 temp = find_splittable_regs (loop, unroll_type, unroll_number);
1198 /* find_splittable_regs may have created some new registers, so must
1199 reallocate the reg_map with the new larger size, and must realloc
1200 the constant maps also. */
1202 maxregnum = max_reg_num ();
1203 map->reg_map = (rtx *) xmalloc (maxregnum * sizeof (rtx));
1205 init_reg_map (map, maxregnum);
1207 if (map->const_equiv_varray == 0)
1208 VARRAY_CONST_EQUIV_INIT (map->const_equiv_varray,
1209 maxregnum + temp * unroll_number * 2,
1210 "unroll_loop");
1211 global_const_equiv_varray = map->const_equiv_varray;
1213 /* Search the list of bivs and givs to find ones which need to be remapped
1214 when split, and set their reg_map entry appropriately. */
1216 for (bl = ivs->list; bl; bl = bl->next)
1218 if (REGNO (bl->biv->src_reg) != bl->regno)
1219 map->reg_map[bl->regno] = bl->biv->src_reg;
1220 #if 0
1221 /* Currently, non-reduced/final-value givs are never split. */
1222 for (v = bl->giv; v; v = v->next_iv)
1223 if (REGNO (v->src_reg) != bl->regno)
1224 map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
1225 #endif
1228 /* Use our current register alignment and pointer flags. */
1229 map->regno_pointer_align = cfun->emit->regno_pointer_align;
1230 map->x_regno_reg_rtx = cfun->emit->x_regno_reg_rtx;
1232 /* If the loop is being partially unrolled, and the iteration variables
1233 are being split, and are being renamed for the split, then must fix up
1234 the compare/jump instruction at the end of the loop to refer to the new
1235 registers. This compare isn't copied, so the registers used in it
1236 will never be replaced if it isn't done here. */
1238 if (unroll_type == UNROLL_MODULO)
1240 insn = NEXT_INSN (copy_end);
1241 if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
1242 PATTERN (insn) = remap_split_bivs (loop, PATTERN (insn));
1245 /* For unroll_number times, make a copy of each instruction
1246 between copy_start and copy_end, and insert these new instructions
1247 before the end of the loop. */
1249 for (i = 0; i < unroll_number; i++)
1251 memset ((char *) map->insn_map, 0, max_insnno * sizeof (rtx));
1252 memset ((char *) &VARRAY_CONST_EQUIV (map->const_equiv_varray, 0), 0,
1253 VARRAY_SIZE (map->const_equiv_varray) * sizeof (struct const_equiv_data));
1254 map->const_age = 0;
1256 for (j = 0; j < max_labelno; j++)
1257 if (local_label[j])
1258 set_label_in_map (map, j, gen_label_rtx ());
1260 for (r = FIRST_PSEUDO_REGISTER; r < max_local_regnum; r++)
1261 if (local_regno[r])
1263 map->reg_map[r] = gen_reg_rtx (GET_MODE (regno_reg_rtx[r]));
1264 record_base_value (REGNO (map->reg_map[r]),
1265 regno_reg_rtx[r], 0);
1268 /* If loop starts with a branch to the test, then fix it so that
1269 it points to the test of the first unrolled copy of the loop. */
1270 if (i == 0 && loop_start != copy_start)
1272 insn = PREV_INSN (copy_start);
1273 pattern = PATTERN (insn);
1275 tem = get_label_from_map (map,
1276 CODE_LABEL_NUMBER
1277 (XEXP (SET_SRC (pattern), 0)));
1278 SET_SRC (pattern) = gen_rtx_LABEL_REF (VOIDmode, tem);
1280 /* Set the jump label so that it can be used by later loop unrolling
1281 passes. */
1282 JUMP_LABEL (insn) = tem;
1283 LABEL_NUSES (tem)++;
1286 copy_loop_body (loop, copy_start, copy_end, map, exit_label,
1287 i == unroll_number - 1, unroll_type, start_label,
1288 loop_end, insert_before, insert_before);
1291 /* Before deleting any insns, emit a CODE_LABEL immediately after the last
1292 insn to be deleted. This prevents any runaway delete_insn call from
1293 more insns that it should, as it always stops at a CODE_LABEL. */
1295 /* Delete the compare and branch at the end of the loop if completely
1296 unrolling the loop. Deleting the backward branch at the end also
1297 deletes the code label at the start of the loop. This is done at
1298 the very end to avoid problems with back_branch_in_range_p. */
1300 if (unroll_type == UNROLL_COMPLETELY)
1301 safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
1302 else
1303 safety_label = emit_label_after (gen_label_rtx (), copy_end);
1305 /* Delete all of the original loop instructions. Don't delete the
1306 LOOP_BEG note, or the first code label in the loop. */
1308 insn = NEXT_INSN (copy_start);
1309 while (insn != safety_label)
1311 /* ??? Don't delete named code labels. They will be deleted when the
1312 jump that references them is deleted. Otherwise, we end up deleting
1313 them twice, which causes them to completely disappear instead of turn
1314 into NOTE_INSN_DELETED_LABEL notes. This in turn causes aborts in
1315 dwarfout.c/dwarf2out.c. We could perhaps fix the dwarf*out.c files
1316 to handle deleted labels instead. Or perhaps fix DECL_RTL of the
1317 associated LABEL_DECL to point to one of the new label instances. */
1318 /* ??? Likewise, we can't delete a NOTE_INSN_DELETED_LABEL note. */
1319 if (insn != start_label
1320 && ! (GET_CODE (insn) == CODE_LABEL && LABEL_NAME (insn))
1321 && ! (GET_CODE (insn) == NOTE
1322 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL))
1323 insn = delete_related_insns (insn);
1324 else
1325 insn = NEXT_INSN (insn);
1328 /* Can now delete the 'safety' label emitted to protect us from runaway
1329 delete_related_insns calls. */
1330 if (INSN_DELETED_P (safety_label))
1331 abort ();
1332 delete_related_insns (safety_label);
1334 /* If exit_label exists, emit it after the loop. Doing the emit here
1335 forces it to have a higher INSN_UID than any insn in the unrolled loop.
1336 This is needed so that mostly_true_jump in reorg.c will treat jumps
1337 to this loop end label correctly, i.e. predict that they are usually
1338 not taken. */
1339 if (exit_label)
1340 emit_label_after (exit_label, loop_end);
1342 egress:
1343 if (unroll_type == UNROLL_COMPLETELY)
1345 /* Remove the loop notes since this is no longer a loop. */
1346 if (loop->vtop)
1347 delete_related_insns (loop->vtop);
1348 if (loop->cont)
1349 delete_related_insns (loop->cont);
1350 if (loop_start)
1351 delete_related_insns (loop_start);
1352 if (loop_end)
1353 delete_related_insns (loop_end);
1356 if (map->const_equiv_varray)
1357 VARRAY_FREE (map->const_equiv_varray);
1358 if (map->label_map)
1360 free (map->label_map);
1361 free (local_label);
1363 free (map->insn_map);
1364 free (splittable_regs);
1365 free (splittable_regs_updates);
1366 free (addr_combined_regs);
1367 free (local_regno);
1368 if (map->reg_map)
1369 free (map->reg_map);
1370 free (map);
1373 /* Return true if the loop can be safely, and profitably, preconditioned
1374 so that the unrolled copies of the loop body don't need exit tests.
1376 This only works if final_value, initial_value and increment can be
1377 determined, and if increment is a constant power of 2.
1378 If increment is not a power of 2, then the preconditioning modulo
1379 operation would require a real modulo instead of a boolean AND, and this
1380 is not considered `profitable'. */
1382 /* ??? If the loop is known to be executed very many times, or the machine
1383 has a very cheap divide instruction, then preconditioning is a win even
1384 when the increment is not a power of 2. Use RTX_COST to compute
1385 whether divide is cheap.
1386 ??? A divide by constant doesn't actually need a divide, look at
1387 expand_divmod. The reduced cost of this optimized modulo is not
1388 reflected in RTX_COST. */
1391 precondition_loop_p (loop, initial_value, final_value, increment, mode)
1392 const struct loop *loop;
1393 rtx *initial_value, *final_value, *increment;
1394 enum machine_mode *mode;
1396 rtx loop_start = loop->start;
1397 struct loop_info *loop_info = LOOP_INFO (loop);
1399 if (loop_info->n_iterations > 0)
1401 if (INTVAL (loop_info->increment) > 0)
1403 *initial_value = const0_rtx;
1404 *increment = const1_rtx;
1405 *final_value = GEN_INT (loop_info->n_iterations);
1407 else
1409 *initial_value = GEN_INT (loop_info->n_iterations);
1410 *increment = constm1_rtx;
1411 *final_value = const0_rtx;
1413 *mode = word_mode;
1415 if (loop_dump_stream)
1417 fputs ("Preconditioning: Success, number of iterations known, ",
1418 loop_dump_stream);
1419 fprintf (loop_dump_stream, HOST_WIDE_INT_PRINT_DEC,
1420 loop_info->n_iterations);
1421 fputs (".\n", loop_dump_stream);
1423 return 1;
1426 if (loop_info->iteration_var == 0)
1428 if (loop_dump_stream)
1429 fprintf (loop_dump_stream,
1430 "Preconditioning: Could not find iteration variable.\n");
1431 return 0;
1433 else if (loop_info->initial_value == 0)
1435 if (loop_dump_stream)
1436 fprintf (loop_dump_stream,
1437 "Preconditioning: Could not find initial value.\n");
1438 return 0;
1440 else if (loop_info->increment == 0)
1442 if (loop_dump_stream)
1443 fprintf (loop_dump_stream,
1444 "Preconditioning: Could not find increment value.\n");
1445 return 0;
1447 else if (GET_CODE (loop_info->increment) != CONST_INT)
1449 if (loop_dump_stream)
1450 fprintf (loop_dump_stream,
1451 "Preconditioning: Increment not a constant.\n");
1452 return 0;
1454 else if ((exact_log2 (INTVAL (loop_info->increment)) < 0)
1455 && (exact_log2 (-INTVAL (loop_info->increment)) < 0))
1457 if (loop_dump_stream)
1458 fprintf (loop_dump_stream,
1459 "Preconditioning: Increment not a constant power of 2.\n");
1460 return 0;
1463 /* Unsigned_compare and compare_dir can be ignored here, since they do
1464 not matter for preconditioning. */
1466 if (loop_info->final_value == 0)
1468 if (loop_dump_stream)
1469 fprintf (loop_dump_stream,
1470 "Preconditioning: EQ comparison loop.\n");
1471 return 0;
1474 /* Must ensure that final_value is invariant, so call
1475 loop_invariant_p to check. Before doing so, must check regno
1476 against max_reg_before_loop to make sure that the register is in
1477 the range covered by loop_invariant_p. If it isn't, then it is
1478 most likely a biv/giv which by definition are not invariant. */
1479 if ((GET_CODE (loop_info->final_value) == REG
1480 && REGNO (loop_info->final_value) >= max_reg_before_loop)
1481 || (GET_CODE (loop_info->final_value) == PLUS
1482 && REGNO (XEXP (loop_info->final_value, 0)) >= max_reg_before_loop)
1483 || ! loop_invariant_p (loop, loop_info->final_value))
1485 if (loop_dump_stream)
1486 fprintf (loop_dump_stream,
1487 "Preconditioning: Final value not invariant.\n");
1488 return 0;
1491 /* Fail for floating point values, since the caller of this function
1492 does not have code to deal with them. */
1493 if (GET_MODE_CLASS (GET_MODE (loop_info->final_value)) == MODE_FLOAT
1494 || GET_MODE_CLASS (GET_MODE (loop_info->initial_value)) == MODE_FLOAT)
1496 if (loop_dump_stream)
1497 fprintf (loop_dump_stream,
1498 "Preconditioning: Floating point final or initial value.\n");
1499 return 0;
1502 /* Fail if loop_info->iteration_var is not live before loop_start,
1503 since we need to test its value in the preconditioning code. */
1505 if (REGNO_FIRST_LUID (REGNO (loop_info->iteration_var))
1506 > INSN_LUID (loop_start))
1508 if (loop_dump_stream)
1509 fprintf (loop_dump_stream,
1510 "Preconditioning: Iteration var not live before loop start.\n");
1511 return 0;
1514 /* Note that loop_iterations biases the initial value for GIV iterators
1515 such as "while (i-- > 0)" so that we can calculate the number of
1516 iterations just like for BIV iterators.
1518 Also note that the absolute values of initial_value and
1519 final_value are unimportant as only their difference is used for
1520 calculating the number of loop iterations. */
1521 *initial_value = loop_info->initial_value;
1522 *increment = loop_info->increment;
1523 *final_value = loop_info->final_value;
1525 /* Decide what mode to do these calculations in. Choose the larger
1526 of final_value's mode and initial_value's mode, or a full-word if
1527 both are constants. */
1528 *mode = GET_MODE (*final_value);
1529 if (*mode == VOIDmode)
1531 *mode = GET_MODE (*initial_value);
1532 if (*mode == VOIDmode)
1533 *mode = word_mode;
1535 else if (*mode != GET_MODE (*initial_value)
1536 && (GET_MODE_SIZE (*mode)
1537 < GET_MODE_SIZE (GET_MODE (*initial_value))))
1538 *mode = GET_MODE (*initial_value);
1540 /* Success! */
1541 if (loop_dump_stream)
1542 fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
1543 return 1;
1546 /* All pseudo-registers must be mapped to themselves. Two hard registers
1547 must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
1548 REGNUM, to avoid function-inlining specific conversions of these
1549 registers. All other hard regs can not be mapped because they may be
1550 used with different
1551 modes. */
1553 static void
1554 init_reg_map (map, maxregnum)
1555 struct inline_remap *map;
1556 int maxregnum;
1558 int i;
1560 for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
1561 map->reg_map[i] = regno_reg_rtx[i];
1562 /* Just clear the rest of the entries. */
1563 for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
1564 map->reg_map[i] = 0;
1566 map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
1567 = regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
1568 map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
1569 = regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
1572 /* Strength-reduction will often emit code for optimized biv/givs which
1573 calculates their value in a temporary register, and then copies the result
1574 to the iv. This procedure reconstructs the pattern computing the iv;
1575 verifying that all operands are of the proper form.
1577 PATTERN must be the result of single_set.
1578 The return value is the amount that the giv is incremented by. */
1580 static rtx
1581 calculate_giv_inc (pattern, src_insn, regno)
1582 rtx pattern, src_insn;
1583 unsigned int regno;
1585 rtx increment;
1586 rtx increment_total = 0;
1587 int tries = 0;
1589 retry:
1590 /* Verify that we have an increment insn here. First check for a plus
1591 as the set source. */
1592 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1594 /* SR sometimes computes the new giv value in a temp, then copies it
1595 to the new_reg. */
1596 src_insn = PREV_INSN (src_insn);
1597 pattern = single_set (src_insn);
1598 if (GET_CODE (SET_SRC (pattern)) != PLUS)
1599 abort ();
1601 /* The last insn emitted is not needed, so delete it to avoid confusing
1602 the second cse pass. This insn sets the giv unnecessarily. */
1603 delete_related_insns (get_last_insn ());
1606 /* Verify that we have a constant as the second operand of the plus. */
1607 increment = XEXP (SET_SRC (pattern), 1);
1608 if (GET_CODE (increment) != CONST_INT)
1610 /* SR sometimes puts the constant in a register, especially if it is
1611 too big to be an add immed operand. */
1612 increment = find_last_value (increment, &src_insn, NULL_RTX, 0);
1614 /* SR may have used LO_SUM to compute the constant if it is too large
1615 for a load immed operand. In this case, the constant is in operand
1616 one of the LO_SUM rtx. */
1617 if (GET_CODE (increment) == LO_SUM)
1618 increment = XEXP (increment, 1);
1620 /* Some ports store large constants in memory and add a REG_EQUAL
1621 note to the store insn. */
1622 else if (GET_CODE (increment) == MEM)
1624 rtx note = find_reg_note (src_insn, REG_EQUAL, 0);
1625 if (note)
1626 increment = XEXP (note, 0);
1629 else if (GET_CODE (increment) == IOR
1630 || GET_CODE (increment) == ASHIFT
1631 || GET_CODE (increment) == PLUS)
1633 /* The rs6000 port loads some constants with IOR.
1634 The alpha port loads some constants with ASHIFT and PLUS. */
1635 rtx second_part = XEXP (increment, 1);
1636 enum rtx_code code = GET_CODE (increment);
1638 increment = find_last_value (XEXP (increment, 0),
1639 &src_insn, NULL_RTX, 0);
1640 /* Don't need the last insn anymore. */
1641 delete_related_insns (get_last_insn ());
1643 if (GET_CODE (second_part) != CONST_INT
1644 || GET_CODE (increment) != CONST_INT)
1645 abort ();
1647 if (code == IOR)
1648 increment = GEN_INT (INTVAL (increment) | INTVAL (second_part));
1649 else if (code == PLUS)
1650 increment = GEN_INT (INTVAL (increment) + INTVAL (second_part));
1651 else
1652 increment = GEN_INT (INTVAL (increment) << INTVAL (second_part));
1655 if (GET_CODE (increment) != CONST_INT)
1656 abort ();
1658 /* The insn loading the constant into a register is no longer needed,
1659 so delete it. */
1660 delete_related_insns (get_last_insn ());
1663 if (increment_total)
1664 increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment));
1665 else
1666 increment_total = increment;
1668 /* Check that the source register is the same as the register we expected
1669 to see as the source. If not, something is seriously wrong. */
1670 if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
1671 || REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
1673 /* Some machines (e.g. the romp), may emit two add instructions for
1674 certain constants, so lets try looking for another add immediately
1675 before this one if we have only seen one add insn so far. */
1677 if (tries == 0)
1679 tries++;
1681 src_insn = PREV_INSN (src_insn);
1682 pattern = single_set (src_insn);
1684 delete_related_insns (get_last_insn ());
1686 goto retry;
1689 abort ();
1692 return increment_total;
1695 /* Copy REG_NOTES, except for insn references, because not all insn_map
1696 entries are valid yet. We do need to copy registers now though, because
1697 the reg_map entries can change during copying. */
1699 static rtx
1700 initial_reg_note_copy (notes, map)
1701 rtx notes;
1702 struct inline_remap *map;
1704 rtx copy;
1706 if (notes == 0)
1707 return 0;
1709 copy = rtx_alloc (GET_CODE (notes));
1710 PUT_REG_NOTE_KIND (copy, REG_NOTE_KIND (notes));
1712 if (GET_CODE (notes) == EXPR_LIST)
1713 XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map, 0);
1714 else if (GET_CODE (notes) == INSN_LIST)
1715 /* Don't substitute for these yet. */
1716 XEXP (copy, 0) = copy_rtx (XEXP (notes, 0));
1717 else
1718 abort ();
1720 XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map);
1722 return copy;
1725 /* Fixup insn references in copied REG_NOTES. */
1727 static void
1728 final_reg_note_copy (notesp, map)
1729 rtx *notesp;
1730 struct inline_remap *map;
1732 while (*notesp)
1734 rtx note = *notesp;
1736 if (GET_CODE (note) == INSN_LIST)
1738 /* Sometimes, we have a REG_WAS_0 note that points to a
1739 deleted instruction. In that case, we can just delete the
1740 note. */
1741 if (REG_NOTE_KIND (note) == REG_WAS_0)
1743 *notesp = XEXP (note, 1);
1744 continue;
1746 else
1748 rtx insn = map->insn_map[INSN_UID (XEXP (note, 0))];
1750 /* If we failed to remap the note, something is awry.
1751 Allow REG_LABEL as it may reference label outside
1752 the unrolled loop. */
1753 if (!insn)
1755 if (REG_NOTE_KIND (note) != REG_LABEL)
1756 abort ();
1758 else
1759 XEXP (note, 0) = insn;
1763 notesp = &XEXP (note, 1);
1767 /* Copy each instruction in the loop, substituting from map as appropriate.
1768 This is very similar to a loop in expand_inline_function. */
1770 static void
1771 copy_loop_body (loop, copy_start, copy_end, map, exit_label, last_iteration,
1772 unroll_type, start_label, loop_end, insert_before,
1773 copy_notes_from)
1774 struct loop *loop;
1775 rtx copy_start, copy_end;
1776 struct inline_remap *map;
1777 rtx exit_label;
1778 int last_iteration;
1779 enum unroll_types unroll_type;
1780 rtx start_label, loop_end, insert_before, copy_notes_from;
1782 struct loop_ivs *ivs = LOOP_IVS (loop);
1783 rtx insn, pattern;
1784 rtx set, tem, copy = NULL_RTX;
1785 int dest_reg_was_split, i;
1786 #ifdef HAVE_cc0
1787 rtx cc0_insn = 0;
1788 #endif
1789 rtx final_label = 0;
1790 rtx giv_inc, giv_dest_reg, giv_src_reg;
1792 /* If this isn't the last iteration, then map any references to the
1793 start_label to final_label. Final label will then be emitted immediately
1794 after the end of this loop body if it was ever used.
1796 If this is the last iteration, then map references to the start_label
1797 to itself. */
1798 if (! last_iteration)
1800 final_label = gen_label_rtx ();
1801 set_label_in_map (map, CODE_LABEL_NUMBER (start_label), final_label);
1803 else
1804 set_label_in_map (map, CODE_LABEL_NUMBER (start_label), start_label);
1806 start_sequence ();
1808 /* Emit a NOTE_INSN_DELETED to force at least two insns onto the sequence.
1809 Else gen_sequence could return a raw pattern for a jump which we pass
1810 off to emit_insn_before (instead of emit_jump_insn_before) which causes
1811 a variety of losing behaviors later. */
1812 emit_note (0, NOTE_INSN_DELETED);
1814 insn = copy_start;
1817 insn = NEXT_INSN (insn);
1819 map->orig_asm_operands_vector = 0;
1821 switch (GET_CODE (insn))
1823 case INSN:
1824 pattern = PATTERN (insn);
1825 copy = 0;
1826 giv_inc = 0;
1828 /* Check to see if this is a giv that has been combined with
1829 some split address givs. (Combined in the sense that
1830 `combine_givs' in loop.c has put two givs in the same register.)
1831 In this case, we must search all givs based on the same biv to
1832 find the address givs. Then split the address givs.
1833 Do this before splitting the giv, since that may map the
1834 SET_DEST to a new register. */
1836 if ((set = single_set (insn))
1837 && GET_CODE (SET_DEST (set)) == REG
1838 && addr_combined_regs[REGNO (SET_DEST (set))])
1840 struct iv_class *bl;
1841 struct induction *v, *tv;
1842 unsigned int regno = REGNO (SET_DEST (set));
1844 v = addr_combined_regs[REGNO (SET_DEST (set))];
1845 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
1847 /* Although the giv_inc amount is not needed here, we must call
1848 calculate_giv_inc here since it might try to delete the
1849 last insn emitted. If we wait until later to call it,
1850 we might accidentally delete insns generated immediately
1851 below by emit_unrolled_add. */
1853 giv_inc = calculate_giv_inc (set, insn, regno);
1855 /* Now find all address giv's that were combined with this
1856 giv 'v'. */
1857 for (tv = bl->giv; tv; tv = tv->next_iv)
1858 if (tv->giv_type == DEST_ADDR && tv->same == v)
1860 int this_giv_inc;
1862 /* If this DEST_ADDR giv was not split, then ignore it. */
1863 if (*tv->location != tv->dest_reg)
1864 continue;
1866 /* Scale this_giv_inc if the multiplicative factors of
1867 the two givs are different. */
1868 this_giv_inc = INTVAL (giv_inc);
1869 if (tv->mult_val != v->mult_val)
1870 this_giv_inc = (this_giv_inc / INTVAL (v->mult_val)
1871 * INTVAL (tv->mult_val));
1873 tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc);
1874 *tv->location = tv->dest_reg;
1876 if (last_iteration && unroll_type != UNROLL_COMPLETELY)
1878 /* Must emit an insn to increment the split address
1879 giv. Add in the const_adjust field in case there
1880 was a constant eliminated from the address. */
1881 rtx value, dest_reg;
1883 /* tv->dest_reg will be either a bare register,
1884 or else a register plus a constant. */
1885 if (GET_CODE (tv->dest_reg) == REG)
1886 dest_reg = tv->dest_reg;
1887 else
1888 dest_reg = XEXP (tv->dest_reg, 0);
1890 /* Check for shared address givs, and avoid
1891 incrementing the shared pseudo reg more than
1892 once. */
1893 if (! tv->same_insn && ! tv->shared)
1895 /* tv->dest_reg may actually be a (PLUS (REG)
1896 (CONST)) here, so we must call plus_constant
1897 to add the const_adjust amount before calling
1898 emit_unrolled_add below. */
1899 value = plus_constant (tv->dest_reg,
1900 tv->const_adjust);
1902 if (GET_CODE (value) == PLUS)
1904 /* The constant could be too large for an add
1905 immediate, so can't directly emit an insn
1906 here. */
1907 emit_unrolled_add (dest_reg, XEXP (value, 0),
1908 XEXP (value, 1));
1912 /* Reset the giv to be just the register again, in case
1913 it is used after the set we have just emitted.
1914 We must subtract the const_adjust factor added in
1915 above. */
1916 tv->dest_reg = plus_constant (dest_reg,
1917 -tv->const_adjust);
1918 *tv->location = tv->dest_reg;
1923 /* If this is a setting of a splittable variable, then determine
1924 how to split the variable, create a new set based on this split,
1925 and set up the reg_map so that later uses of the variable will
1926 use the new split variable. */
1928 dest_reg_was_split = 0;
1930 if ((set = single_set (insn))
1931 && GET_CODE (SET_DEST (set)) == REG
1932 && splittable_regs[REGNO (SET_DEST (set))])
1934 unsigned int regno = REGNO (SET_DEST (set));
1935 unsigned int src_regno;
1937 dest_reg_was_split = 1;
1939 giv_dest_reg = SET_DEST (set);
1940 giv_src_reg = giv_dest_reg;
1941 /* Compute the increment value for the giv, if it wasn't
1942 already computed above. */
1943 if (giv_inc == 0)
1944 giv_inc = calculate_giv_inc (set, insn, regno);
1946 src_regno = REGNO (giv_src_reg);
1948 if (unroll_type == UNROLL_COMPLETELY)
1950 /* Completely unrolling the loop. Set the induction
1951 variable to a known constant value. */
1953 /* The value in splittable_regs may be an invariant
1954 value, so we must use plus_constant here. */
1955 splittable_regs[regno]
1956 = plus_constant (splittable_regs[src_regno],
1957 INTVAL (giv_inc));
1959 if (GET_CODE (splittable_regs[regno]) == PLUS)
1961 giv_src_reg = XEXP (splittable_regs[regno], 0);
1962 giv_inc = XEXP (splittable_regs[regno], 1);
1964 else
1966 /* The splittable_regs value must be a REG or a
1967 CONST_INT, so put the entire value in the giv_src_reg
1968 variable. */
1969 giv_src_reg = splittable_regs[regno];
1970 giv_inc = const0_rtx;
1973 else
1975 /* Partially unrolling loop. Create a new pseudo
1976 register for the iteration variable, and set it to
1977 be a constant plus the original register. Except
1978 on the last iteration, when the result has to
1979 go back into the original iteration var register. */
1981 /* Handle bivs which must be mapped to a new register
1982 when split. This happens for bivs which need their
1983 final value set before loop entry. The new register
1984 for the biv was stored in the biv's first struct
1985 induction entry by find_splittable_regs. */
1987 if (regno < ivs->n_regs
1988 && REG_IV_TYPE (ivs, regno) == BASIC_INDUCT)
1990 giv_src_reg = REG_IV_CLASS (ivs, regno)->biv->src_reg;
1991 giv_dest_reg = giv_src_reg;
1994 #if 0
1995 /* If non-reduced/final-value givs were split, then
1996 this would have to remap those givs also. See
1997 find_splittable_regs. */
1998 #endif
2000 splittable_regs[regno]
2001 = simplify_gen_binary (PLUS, GET_MODE (giv_src_reg),
2002 giv_inc,
2003 splittable_regs[src_regno]);
2004 giv_inc = splittable_regs[regno];
2006 /* Now split the induction variable by changing the dest
2007 of this insn to a new register, and setting its
2008 reg_map entry to point to this new register.
2010 If this is the last iteration, and this is the last insn
2011 that will update the iv, then reuse the original dest,
2012 to ensure that the iv will have the proper value when
2013 the loop exits or repeats.
2015 Using splittable_regs_updates here like this is safe,
2016 because it can only be greater than one if all
2017 instructions modifying the iv are always executed in
2018 order. */
2020 if (! last_iteration
2021 || (splittable_regs_updates[regno]-- != 1))
2023 tem = gen_reg_rtx (GET_MODE (giv_src_reg));
2024 giv_dest_reg = tem;
2025 map->reg_map[regno] = tem;
2026 record_base_value (REGNO (tem),
2027 giv_inc == const0_rtx
2028 ? giv_src_reg
2029 : gen_rtx_PLUS (GET_MODE (giv_src_reg),
2030 giv_src_reg, giv_inc),
2033 else
2034 map->reg_map[regno] = giv_src_reg;
2037 /* The constant being added could be too large for an add
2038 immediate, so can't directly emit an insn here. */
2039 emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
2040 copy = get_last_insn ();
2041 pattern = PATTERN (copy);
2043 else
2045 pattern = copy_rtx_and_substitute (pattern, map, 0);
2046 copy = emit_insn (pattern);
2048 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2050 #ifdef HAVE_cc0
2051 /* If this insn is setting CC0, it may need to look at
2052 the insn that uses CC0 to see what type of insn it is.
2053 In that case, the call to recog via validate_change will
2054 fail. So don't substitute constants here. Instead,
2055 do it when we emit the following insn.
2057 For example, see the pyr.md file. That machine has signed and
2058 unsigned compares. The compare patterns must check the
2059 following branch insn to see which what kind of compare to
2060 emit.
2062 If the previous insn set CC0, substitute constants on it as
2063 well. */
2064 if (sets_cc0_p (PATTERN (copy)) != 0)
2065 cc0_insn = copy;
2066 else
2068 if (cc0_insn)
2069 try_constants (cc0_insn, map);
2070 cc0_insn = 0;
2071 try_constants (copy, map);
2073 #else
2074 try_constants (copy, map);
2075 #endif
2077 /* Make split induction variable constants `permanent' since we
2078 know there are no backward branches across iteration variable
2079 settings which would invalidate this. */
2080 if (dest_reg_was_split)
2082 int regno = REGNO (SET_DEST (set));
2084 if ((size_t) regno < VARRAY_SIZE (map->const_equiv_varray)
2085 && (VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age
2086 == map->const_age))
2087 VARRAY_CONST_EQUIV (map->const_equiv_varray, regno).age = -1;
2089 break;
2091 case JUMP_INSN:
2092 pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0);
2093 copy = emit_jump_insn (pattern);
2094 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2096 if (JUMP_LABEL (insn))
2098 JUMP_LABEL (copy) = get_label_from_map (map,
2099 CODE_LABEL_NUMBER
2100 (JUMP_LABEL (insn)));
2101 LABEL_NUSES (JUMP_LABEL (copy))++;
2103 if (JUMP_LABEL (insn) == start_label && insn == copy_end
2104 && ! last_iteration)
2107 /* This is a branch to the beginning of the loop; this is the
2108 last insn being copied; and this is not the last iteration.
2109 In this case, we want to change the original fall through
2110 case to be a branch past the end of the loop, and the
2111 original jump label case to fall_through. */
2113 if (!invert_jump (copy, exit_label, 0))
2115 rtx jmp;
2116 rtx lab = gen_label_rtx ();
2117 /* Can't do it by reversing the jump (probably because we
2118 couldn't reverse the conditions), so emit a new
2119 jump_insn after COPY, and redirect the jump around
2120 that. */
2121 jmp = emit_jump_insn_after (gen_jump (exit_label), copy);
2122 JUMP_LABEL (jmp) = exit_label;
2123 LABEL_NUSES (exit_label)++;
2124 jmp = emit_barrier_after (jmp);
2125 emit_label_after (lab, jmp);
2126 LABEL_NUSES (lab) = 0;
2127 if (!redirect_jump (copy, lab, 0))
2128 abort ();
2132 #ifdef HAVE_cc0
2133 if (cc0_insn)
2134 try_constants (cc0_insn, map);
2135 cc0_insn = 0;
2136 #endif
2137 try_constants (copy, map);
2139 /* Set the jump label of COPY correctly to avoid problems with
2140 later passes of unroll_loop, if INSN had jump label set. */
2141 if (JUMP_LABEL (insn))
2143 rtx label = 0;
2145 /* Can't use the label_map for every insn, since this may be
2146 the backward branch, and hence the label was not mapped. */
2147 if ((set = single_set (copy)))
2149 tem = SET_SRC (set);
2150 if (GET_CODE (tem) == LABEL_REF)
2151 label = XEXP (tem, 0);
2152 else if (GET_CODE (tem) == IF_THEN_ELSE)
2154 if (XEXP (tem, 1) != pc_rtx)
2155 label = XEXP (XEXP (tem, 1), 0);
2156 else
2157 label = XEXP (XEXP (tem, 2), 0);
2161 if (label && GET_CODE (label) == CODE_LABEL)
2162 JUMP_LABEL (copy) = label;
2163 else
2165 /* An unrecognizable jump insn, probably the entry jump
2166 for a switch statement. This label must have been mapped,
2167 so just use the label_map to get the new jump label. */
2168 JUMP_LABEL (copy)
2169 = get_label_from_map (map,
2170 CODE_LABEL_NUMBER (JUMP_LABEL (insn)));
2173 /* If this is a non-local jump, then must increase the label
2174 use count so that the label will not be deleted when the
2175 original jump is deleted. */
2176 LABEL_NUSES (JUMP_LABEL (copy))++;
2178 else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
2179 || GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
2181 rtx pat = PATTERN (copy);
2182 int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
2183 int len = XVECLEN (pat, diff_vec_p);
2184 int i;
2186 for (i = 0; i < len; i++)
2187 LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
2190 /* If this used to be a conditional jump insn but whose branch
2191 direction is now known, we must do something special. */
2192 if (any_condjump_p (insn) && onlyjump_p (insn) && map->last_pc_value)
2194 #ifdef HAVE_cc0
2195 /* If the previous insn set cc0 for us, delete it. */
2196 if (only_sets_cc0_p (PREV_INSN (copy)))
2197 delete_related_insns (PREV_INSN (copy));
2198 #endif
2200 /* If this is now a no-op, delete it. */
2201 if (map->last_pc_value == pc_rtx)
2203 delete_insn (copy);
2204 copy = 0;
2206 else
2207 /* Otherwise, this is unconditional jump so we must put a
2208 BARRIER after it. We could do some dead code elimination
2209 here, but jump.c will do it just as well. */
2210 emit_barrier ();
2212 break;
2214 case CALL_INSN:
2215 pattern = copy_rtx_and_substitute (PATTERN (insn), map, 0);
2216 copy = emit_call_insn (pattern);
2217 REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
2218 SIBLING_CALL_P (copy) = SIBLING_CALL_P (insn);
2220 /* Because the USAGE information potentially contains objects other
2221 than hard registers, we need to copy it. */
2222 CALL_INSN_FUNCTION_USAGE (copy)
2223 = copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn),
2224 map, 0);
2226 #ifdef HAVE_cc0
2227 if (cc0_insn)
2228 try_constants (cc0_insn, map);
2229 cc0_insn = 0;
2230 #endif
2231 try_constants (copy, map);
2233 /* Be lazy and assume CALL_INSNs clobber all hard registers. */
2234 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
2235 VARRAY_CONST_EQUIV (map->const_equiv_varray, i).rtx = 0;
2236 break;
2238 case CODE_LABEL:
2239 /* If this is the loop start label, then we don't need to emit a
2240 copy of this label since no one will use it. */
2242 if (insn != start_label)
2244 copy = emit_label (get_label_from_map (map,
2245 CODE_LABEL_NUMBER (insn)));
2246 map->const_age++;
2248 break;
2250 case BARRIER:
2251 copy = emit_barrier ();
2252 break;
2254 case NOTE:
2255 /* VTOP and CONT notes are valid only before the loop exit test.
2256 If placed anywhere else, loop may generate bad code. */
2257 /* BASIC_BLOCK notes exist to stabilize basic block structures with
2258 the associated rtl. We do not want to share the structure in
2259 this new block. */
2261 if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2262 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL
2263 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK
2264 && ((NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
2265 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_CONT)
2266 || (last_iteration && unroll_type != UNROLL_COMPLETELY)))
2267 copy = emit_note (NOTE_SOURCE_FILE (insn),
2268 NOTE_LINE_NUMBER (insn));
2269 else
2270 copy = 0;
2271 break;
2273 default:
2274 abort ();
2277 map->insn_map[INSN_UID (insn)] = copy;
2279 while (insn != copy_end);
2281 /* Now finish coping the REG_NOTES. */
2282 insn = copy_start;
2285 insn = NEXT_INSN (insn);
2286 if ((GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN
2287 || GET_CODE (insn) == CALL_INSN)
2288 && map->insn_map[INSN_UID (insn)])
2289 final_reg_note_copy (&REG_NOTES (map->insn_map[INSN_UID (insn)]), map);
2291 while (insn != copy_end);
2293 /* There may be notes between copy_notes_from and loop_end. Emit a copy of
2294 each of these notes here, since there may be some important ones, such as
2295 NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last
2296 iteration, because the original notes won't be deleted.
2298 We can't use insert_before here, because when from preconditioning,
2299 insert_before points before the loop. We can't use copy_end, because
2300 there may be insns already inserted after it (which we don't want to
2301 copy) when not from preconditioning code. */
2303 if (! last_iteration)
2305 for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
2307 /* VTOP notes are valid only before the loop exit test.
2308 If placed anywhere else, loop may generate bad code.
2309 There is no need to test for NOTE_INSN_LOOP_CONT notes
2310 here, since COPY_NOTES_FROM will be at most one or two (for cc0)
2311 instructions before the last insn in the loop, and if the
2312 end test is that short, there will be a VTOP note between
2313 the CONT note and the test. */
2314 if (GET_CODE (insn) == NOTE
2315 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
2316 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_BASIC_BLOCK
2317 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP)
2318 emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
2322 if (final_label && LABEL_NUSES (final_label) > 0)
2323 emit_label (final_label);
2325 tem = gen_sequence ();
2326 end_sequence ();
2327 loop_insn_emit_before (loop, 0, insert_before, tem);
2330 /* Emit an insn, using the expand_binop to ensure that a valid insn is
2331 emitted. This will correctly handle the case where the increment value
2332 won't fit in the immediate field of a PLUS insns. */
2334 void
2335 emit_unrolled_add (dest_reg, src_reg, increment)
2336 rtx dest_reg, src_reg, increment;
2338 rtx result;
2340 result = expand_simple_binop (GET_MODE (dest_reg), PLUS, src_reg, increment,
2341 dest_reg, 0, OPTAB_LIB_WIDEN);
2343 if (dest_reg != result)
2344 emit_move_insn (dest_reg, result);
2347 /* Searches the insns between INSN and LOOP->END. Returns 1 if there
2348 is a backward branch in that range that branches to somewhere between
2349 LOOP->START and INSN. Returns 0 otherwise. */
2351 /* ??? This is quadratic algorithm. Could be rewritten to be linear.
2352 In practice, this is not a problem, because this function is seldom called,
2353 and uses a negligible amount of CPU time on average. */
2356 back_branch_in_range_p (loop, insn)
2357 const struct loop *loop;
2358 rtx insn;
2360 rtx p, q, target_insn;
2361 rtx loop_start = loop->start;
2362 rtx loop_end = loop->end;
2363 rtx orig_loop_end = loop->end;
2365 /* Stop before we get to the backward branch at the end of the loop. */
2366 loop_end = prev_nonnote_insn (loop_end);
2367 if (GET_CODE (loop_end) == BARRIER)
2368 loop_end = PREV_INSN (loop_end);
2370 /* Check in case insn has been deleted, search forward for first non
2371 deleted insn following it. */
2372 while (INSN_DELETED_P (insn))
2373 insn = NEXT_INSN (insn);
2375 /* Check for the case where insn is the last insn in the loop. Deal
2376 with the case where INSN was a deleted loop test insn, in which case
2377 it will now be the NOTE_LOOP_END. */
2378 if (insn == loop_end || insn == orig_loop_end)
2379 return 0;
2381 for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
2383 if (GET_CODE (p) == JUMP_INSN)
2385 target_insn = JUMP_LABEL (p);
2387 /* Search from loop_start to insn, to see if one of them is
2388 the target_insn. We can't use INSN_LUID comparisons here,
2389 since insn may not have an LUID entry. */
2390 for (q = loop_start; q != insn; q = NEXT_INSN (q))
2391 if (q == target_insn)
2392 return 1;
2396 return 0;
2399 /* Try to generate the simplest rtx for the expression
2400 (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
2401 value of giv's. */
2403 static rtx
2404 fold_rtx_mult_add (mult1, mult2, add1, mode)
2405 rtx mult1, mult2, add1;
2406 enum machine_mode mode;
2408 rtx temp, mult_res;
2409 rtx result;
2411 /* The modes must all be the same. This should always be true. For now,
2412 check to make sure. */
2413 if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
2414 || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
2415 || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
2416 abort ();
2418 /* Ensure that if at least one of mult1/mult2 are constant, then mult2
2419 will be a constant. */
2420 if (GET_CODE (mult1) == CONST_INT)
2422 temp = mult2;
2423 mult2 = mult1;
2424 mult1 = temp;
2427 mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
2428 if (! mult_res)
2429 mult_res = gen_rtx_MULT (mode, mult1, mult2);
2431 /* Again, put the constant second. */
2432 if (GET_CODE (add1) == CONST_INT)
2434 temp = add1;
2435 add1 = mult_res;
2436 mult_res = temp;
2439 result = simplify_binary_operation (PLUS, mode, add1, mult_res);
2440 if (! result)
2441 result = gen_rtx_PLUS (mode, add1, mult_res);
2443 return result;
2446 /* Searches the list of induction struct's for the biv BL, to try to calculate
2447 the total increment value for one iteration of the loop as a constant.
2449 Returns the increment value as an rtx, simplified as much as possible,
2450 if it can be calculated. Otherwise, returns 0. */
2453 biv_total_increment (bl)
2454 const struct iv_class *bl;
2456 struct induction *v;
2457 rtx result;
2459 /* For increment, must check every instruction that sets it. Each
2460 instruction must be executed only once each time through the loop.
2461 To verify this, we check that the insn is always executed, and that
2462 there are no backward branches after the insn that branch to before it.
2463 Also, the insn must have a mult_val of one (to make sure it really is
2464 an increment). */
2466 result = const0_rtx;
2467 for (v = bl->biv; v; v = v->next_iv)
2469 if (v->always_computable && v->mult_val == const1_rtx
2470 && ! v->maybe_multiple)
2471 result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
2472 else
2473 return 0;
2476 return result;
2479 /* For each biv and giv, determine whether it can be safely split into
2480 a different variable for each unrolled copy of the loop body. If it
2481 is safe to split, then indicate that by saving some useful info
2482 in the splittable_regs array.
2484 If the loop is being completely unrolled, then splittable_regs will hold
2485 the current value of the induction variable while the loop is unrolled.
2486 It must be set to the initial value of the induction variable here.
2487 Otherwise, splittable_regs will hold the difference between the current
2488 value of the induction variable and the value the induction variable had
2489 at the top of the loop. It must be set to the value 0 here.
2491 Returns the total number of instructions that set registers that are
2492 splittable. */
2494 /* ?? If the loop is only unrolled twice, then most of the restrictions to
2495 constant values are unnecessary, since we can easily calculate increment
2496 values in this case even if nothing is constant. The increment value
2497 should not involve a multiply however. */
2499 /* ?? Even if the biv/giv increment values aren't constant, it may still
2500 be beneficial to split the variable if the loop is only unrolled a few
2501 times, since multiplies by small integers (1,2,3,4) are very cheap. */
2503 static int
2504 find_splittable_regs (loop, unroll_type, unroll_number)
2505 const struct loop *loop;
2506 enum unroll_types unroll_type;
2507 int unroll_number;
2509 struct loop_ivs *ivs = LOOP_IVS (loop);
2510 struct iv_class *bl;
2511 struct induction *v;
2512 rtx increment, tem;
2513 rtx biv_final_value;
2514 int biv_splittable;
2515 int result = 0;
2517 for (bl = ivs->list; bl; bl = bl->next)
2519 /* Biv_total_increment must return a constant value,
2520 otherwise we can not calculate the split values. */
2522 increment = biv_total_increment (bl);
2523 if (! increment || GET_CODE (increment) != CONST_INT)
2524 continue;
2526 /* The loop must be unrolled completely, or else have a known number
2527 of iterations and only one exit, or else the biv must be dead
2528 outside the loop, or else the final value must be known. Otherwise,
2529 it is unsafe to split the biv since it may not have the proper
2530 value on loop exit. */
2532 /* loop_number_exit_count is non-zero if the loop has an exit other than
2533 a fall through at the end. */
2535 biv_splittable = 1;
2536 biv_final_value = 0;
2537 if (unroll_type != UNROLL_COMPLETELY
2538 && (loop->exit_count || unroll_type == UNROLL_NAIVE)
2539 && (REGNO_LAST_LUID (bl->regno) >= INSN_LUID (loop->end)
2540 || ! bl->init_insn
2541 || INSN_UID (bl->init_insn) >= max_uid_for_loop
2542 || (REGNO_FIRST_LUID (bl->regno)
2543 < INSN_LUID (bl->init_insn))
2544 || reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
2545 && ! (biv_final_value = final_biv_value (loop, bl)))
2546 biv_splittable = 0;
2548 /* If any of the insns setting the BIV don't do so with a simple
2549 PLUS, we don't know how to split it. */
2550 for (v = bl->biv; biv_splittable && v; v = v->next_iv)
2551 if ((tem = single_set (v->insn)) == 0
2552 || GET_CODE (SET_DEST (tem)) != REG
2553 || REGNO (SET_DEST (tem)) != bl->regno
2554 || GET_CODE (SET_SRC (tem)) != PLUS)
2555 biv_splittable = 0;
2557 /* If final value is non-zero, then must emit an instruction which sets
2558 the value of the biv to the proper value. This is done after
2559 handling all of the givs, since some of them may need to use the
2560 biv's value in their initialization code. */
2562 /* This biv is splittable. If completely unrolling the loop, save
2563 the biv's initial value. Otherwise, save the constant zero. */
2565 if (biv_splittable == 1)
2567 if (unroll_type == UNROLL_COMPLETELY)
2569 /* If the initial value of the biv is itself (i.e. it is too
2570 complicated for strength_reduce to compute), or is a hard
2571 register, or it isn't invariant, then we must create a new
2572 pseudo reg to hold the initial value of the biv. */
2574 if (GET_CODE (bl->initial_value) == REG
2575 && (REGNO (bl->initial_value) == bl->regno
2576 || REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER
2577 || ! loop_invariant_p (loop, bl->initial_value)))
2579 rtx tem = gen_reg_rtx (bl->biv->mode);
2581 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2582 loop_insn_hoist (loop,
2583 gen_move_insn (tem, bl->biv->src_reg));
2585 if (loop_dump_stream)
2586 fprintf (loop_dump_stream,
2587 "Biv %d initial value remapped to %d.\n",
2588 bl->regno, REGNO (tem));
2590 splittable_regs[bl->regno] = tem;
2592 else
2593 splittable_regs[bl->regno] = bl->initial_value;
2595 else
2596 splittable_regs[bl->regno] = const0_rtx;
2598 /* Save the number of instructions that modify the biv, so that
2599 we can treat the last one specially. */
2601 splittable_regs_updates[bl->regno] = bl->biv_count;
2602 result += bl->biv_count;
2604 if (loop_dump_stream)
2605 fprintf (loop_dump_stream,
2606 "Biv %d safe to split.\n", bl->regno);
2609 /* Check every giv that depends on this biv to see whether it is
2610 splittable also. Even if the biv isn't splittable, givs which
2611 depend on it may be splittable if the biv is live outside the
2612 loop, and the givs aren't. */
2614 result += find_splittable_givs (loop, bl, unroll_type, increment,
2615 unroll_number);
2617 /* If final value is non-zero, then must emit an instruction which sets
2618 the value of the biv to the proper value. This is done after
2619 handling all of the givs, since some of them may need to use the
2620 biv's value in their initialization code. */
2621 if (biv_final_value)
2623 /* If the loop has multiple exits, emit the insns before the
2624 loop to ensure that it will always be executed no matter
2625 how the loop exits. Otherwise emit the insn after the loop,
2626 since this is slightly more efficient. */
2627 if (! loop->exit_count)
2628 loop_insn_sink (loop, gen_move_insn (bl->biv->src_reg,
2629 biv_final_value));
2630 else
2632 /* Create a new register to hold the value of the biv, and then
2633 set the biv to its final value before the loop start. The biv
2634 is set to its final value before loop start to ensure that
2635 this insn will always be executed, no matter how the loop
2636 exits. */
2637 rtx tem = gen_reg_rtx (bl->biv->mode);
2638 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2640 loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg));
2641 loop_insn_hoist (loop, gen_move_insn (bl->biv->src_reg,
2642 biv_final_value));
2644 if (loop_dump_stream)
2645 fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
2646 REGNO (bl->biv->src_reg), REGNO (tem));
2648 /* Set up the mapping from the original biv register to the new
2649 register. */
2650 bl->biv->src_reg = tem;
2654 return result;
2657 /* Return 1 if the first and last unrolled copy of the address giv V is valid
2658 for the instruction that is using it. Do not make any changes to that
2659 instruction. */
2661 static int
2662 verify_addresses (v, giv_inc, unroll_number)
2663 struct induction *v;
2664 rtx giv_inc;
2665 int unroll_number;
2667 int ret = 1;
2668 rtx orig_addr = *v->location;
2669 rtx last_addr = plus_constant (v->dest_reg,
2670 INTVAL (giv_inc) * (unroll_number - 1));
2672 /* First check to see if either address would fail. Handle the fact
2673 that we have may have a match_dup. */
2674 if (! validate_replace_rtx (*v->location, v->dest_reg, v->insn)
2675 || ! validate_replace_rtx (*v->location, last_addr, v->insn))
2676 ret = 0;
2678 /* Now put things back the way they were before. This should always
2679 succeed. */
2680 if (! validate_replace_rtx (*v->location, orig_addr, v->insn))
2681 abort ();
2683 return ret;
2686 /* For every giv based on the biv BL, check to determine whether it is
2687 splittable. This is a subroutine to find_splittable_regs ().
2689 Return the number of instructions that set splittable registers. */
2691 static int
2692 find_splittable_givs (loop, bl, unroll_type, increment, unroll_number)
2693 const struct loop *loop;
2694 struct iv_class *bl;
2695 enum unroll_types unroll_type;
2696 rtx increment;
2697 int unroll_number;
2699 struct loop_ivs *ivs = LOOP_IVS (loop);
2700 struct induction *v, *v2;
2701 rtx final_value;
2702 rtx tem;
2703 int result = 0;
2705 /* Scan the list of givs, and set the same_insn field when there are
2706 multiple identical givs in the same insn. */
2707 for (v = bl->giv; v; v = v->next_iv)
2708 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2709 if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg)
2710 && ! v2->same_insn)
2711 v2->same_insn = v;
2713 for (v = bl->giv; v; v = v->next_iv)
2715 rtx giv_inc, value;
2717 /* Only split the giv if it has already been reduced, or if the loop is
2718 being completely unrolled. */
2719 if (unroll_type != UNROLL_COMPLETELY && v->ignore)
2720 continue;
2722 /* The giv can be split if the insn that sets the giv is executed once
2723 and only once on every iteration of the loop. */
2724 /* An address giv can always be split. v->insn is just a use not a set,
2725 and hence it does not matter whether it is always executed. All that
2726 matters is that all the biv increments are always executed, and we
2727 won't reach here if they aren't. */
2728 if (v->giv_type != DEST_ADDR
2729 && (! v->always_computable
2730 || back_branch_in_range_p (loop, v->insn)))
2731 continue;
2733 /* The giv increment value must be a constant. */
2734 giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2735 v->mode);
2736 if (! giv_inc || GET_CODE (giv_inc) != CONST_INT)
2737 continue;
2739 /* The loop must be unrolled completely, or else have a known number of
2740 iterations and only one exit, or else the giv must be dead outside
2741 the loop, or else the final value of the giv must be known.
2742 Otherwise, it is not safe to split the giv since it may not have the
2743 proper value on loop exit. */
2745 /* The used outside loop test will fail for DEST_ADDR givs. They are
2746 never used outside the loop anyways, so it is always safe to split a
2747 DEST_ADDR giv. */
2749 final_value = 0;
2750 if (unroll_type != UNROLL_COMPLETELY
2751 && (loop->exit_count || unroll_type == UNROLL_NAIVE)
2752 && v->giv_type != DEST_ADDR
2753 /* The next part is true if the pseudo is used outside the loop.
2754 We assume that this is true for any pseudo created after loop
2755 starts, because we don't have a reg_n_info entry for them. */
2756 && (REGNO (v->dest_reg) >= max_reg_before_loop
2757 || (REGNO_FIRST_UID (REGNO (v->dest_reg)) != INSN_UID (v->insn)
2758 /* Check for the case where the pseudo is set by a shift/add
2759 sequence, in which case the first insn setting the pseudo
2760 is the first insn of the shift/add sequence. */
2761 && (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX))
2762 || (REGNO_FIRST_UID (REGNO (v->dest_reg))
2763 != INSN_UID (XEXP (tem, 0)))))
2764 /* Line above always fails if INSN was moved by loop opt. */
2765 || (REGNO_LAST_LUID (REGNO (v->dest_reg))
2766 >= INSN_LUID (loop->end)))
2767 && ! (final_value = v->final_value))
2768 continue;
2770 #if 0
2771 /* Currently, non-reduced/final-value givs are never split. */
2772 /* Should emit insns after the loop if possible, as the biv final value
2773 code below does. */
2775 /* If the final value is non-zero, and the giv has not been reduced,
2776 then must emit an instruction to set the final value. */
2777 if (final_value && !v->new_reg)
2779 /* Create a new register to hold the value of the giv, and then set
2780 the giv to its final value before the loop start. The giv is set
2781 to its final value before loop start to ensure that this insn
2782 will always be executed, no matter how we exit. */
2783 tem = gen_reg_rtx (v->mode);
2784 loop_insn_hoist (loop, gen_move_insn (tem, v->dest_reg));
2785 loop_insn_hoist (loop, gen_move_insn (v->dest_reg, final_value));
2787 if (loop_dump_stream)
2788 fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
2789 REGNO (v->dest_reg), REGNO (tem));
2791 v->src_reg = tem;
2793 #endif
2795 /* This giv is splittable. If completely unrolling the loop, save the
2796 giv's initial value. Otherwise, save the constant zero for it. */
2798 if (unroll_type == UNROLL_COMPLETELY)
2800 /* It is not safe to use bl->initial_value here, because it may not
2801 be invariant. It is safe to use the initial value stored in
2802 the splittable_regs array if it is set. In rare cases, it won't
2803 be set, so then we do exactly the same thing as
2804 find_splittable_regs does to get a safe value. */
2805 rtx biv_initial_value;
2807 if (splittable_regs[bl->regno])
2808 biv_initial_value = splittable_regs[bl->regno];
2809 else if (GET_CODE (bl->initial_value) != REG
2810 || (REGNO (bl->initial_value) != bl->regno
2811 && REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER))
2812 biv_initial_value = bl->initial_value;
2813 else
2815 rtx tem = gen_reg_rtx (bl->biv->mode);
2817 record_base_value (REGNO (tem), bl->biv->add_val, 0);
2818 loop_insn_hoist (loop, gen_move_insn (tem, bl->biv->src_reg));
2819 biv_initial_value = tem;
2821 biv_initial_value = extend_value_for_giv (v, biv_initial_value);
2822 value = fold_rtx_mult_add (v->mult_val, biv_initial_value,
2823 v->add_val, v->mode);
2825 else
2826 value = const0_rtx;
2828 if (v->new_reg)
2830 /* If a giv was combined with another giv, then we can only split
2831 this giv if the giv it was combined with was reduced. This
2832 is because the value of v->new_reg is meaningless in this
2833 case. */
2834 if (v->same && ! v->same->new_reg)
2836 if (loop_dump_stream)
2837 fprintf (loop_dump_stream,
2838 "giv combined with unreduced giv not split.\n");
2839 continue;
2841 /* If the giv is an address destination, it could be something other
2842 than a simple register, these have to be treated differently. */
2843 else if (v->giv_type == DEST_REG)
2845 /* If value is not a constant, register, or register plus
2846 constant, then compute its value into a register before
2847 loop start. This prevents invalid rtx sharing, and should
2848 generate better code. We can use bl->initial_value here
2849 instead of splittable_regs[bl->regno] because this code
2850 is going before the loop start. */
2851 if (unroll_type == UNROLL_COMPLETELY
2852 && GET_CODE (value) != CONST_INT
2853 && GET_CODE (value) != REG
2854 && (GET_CODE (value) != PLUS
2855 || GET_CODE (XEXP (value, 0)) != REG
2856 || GET_CODE (XEXP (value, 1)) != CONST_INT))
2858 rtx tem = gen_reg_rtx (v->mode);
2859 record_base_value (REGNO (tem), v->add_val, 0);
2860 loop_iv_add_mult_hoist (loop, bl->initial_value, v->mult_val,
2861 v->add_val, tem);
2862 value = tem;
2865 splittable_regs[REGNO (v->new_reg)] = value;
2867 else
2869 /* Splitting address givs is useful since it will often allow us
2870 to eliminate some increment insns for the base giv as
2871 unnecessary. */
2873 /* If the addr giv is combined with a dest_reg giv, then all
2874 references to that dest reg will be remapped, which is NOT
2875 what we want for split addr regs. We always create a new
2876 register for the split addr giv, just to be safe. */
2878 /* If we have multiple identical address givs within a
2879 single instruction, then use a single pseudo reg for
2880 both. This is necessary in case one is a match_dup
2881 of the other. */
2883 v->const_adjust = 0;
2885 if (v->same_insn)
2887 v->dest_reg = v->same_insn->dest_reg;
2888 if (loop_dump_stream)
2889 fprintf (loop_dump_stream,
2890 "Sharing address givs in insn %d\n",
2891 INSN_UID (v->insn));
2893 /* If multiple address GIVs have been combined with the
2894 same dest_reg GIV, do not create a new register for
2895 each. */
2896 else if (unroll_type != UNROLL_COMPLETELY
2897 && v->giv_type == DEST_ADDR
2898 && v->same && v->same->giv_type == DEST_ADDR
2899 && v->same->unrolled
2900 /* combine_givs_p may return true for some cases
2901 where the add and mult values are not equal.
2902 To share a register here, the values must be
2903 equal. */
2904 && rtx_equal_p (v->same->mult_val, v->mult_val)
2905 && rtx_equal_p (v->same->add_val, v->add_val)
2906 /* If the memory references have different modes,
2907 then the address may not be valid and we must
2908 not share registers. */
2909 && verify_addresses (v, giv_inc, unroll_number))
2911 v->dest_reg = v->same->dest_reg;
2912 v->shared = 1;
2914 else if (unroll_type != UNROLL_COMPLETELY)
2916 /* If not completely unrolling the loop, then create a new
2917 register to hold the split value of the DEST_ADDR giv.
2918 Emit insn to initialize its value before loop start. */
2920 rtx tem = gen_reg_rtx (v->mode);
2921 struct induction *same = v->same;
2922 rtx new_reg = v->new_reg;
2923 record_base_value (REGNO (tem), v->add_val, 0);
2925 /* If the address giv has a constant in its new_reg value,
2926 then this constant can be pulled out and put in value,
2927 instead of being part of the initialization code. */
2929 if (GET_CODE (new_reg) == PLUS
2930 && GET_CODE (XEXP (new_reg, 1)) == CONST_INT)
2932 v->dest_reg
2933 = plus_constant (tem, INTVAL (XEXP (new_reg, 1)));
2935 /* Only succeed if this will give valid addresses.
2936 Try to validate both the first and the last
2937 address resulting from loop unrolling, if
2938 one fails, then can't do const elim here. */
2939 if (verify_addresses (v, giv_inc, unroll_number))
2941 /* Save the negative of the eliminated const, so
2942 that we can calculate the dest_reg's increment
2943 value later. */
2944 v->const_adjust = -INTVAL (XEXP (new_reg, 1));
2946 new_reg = XEXP (new_reg, 0);
2947 if (loop_dump_stream)
2948 fprintf (loop_dump_stream,
2949 "Eliminating constant from giv %d\n",
2950 REGNO (tem));
2952 else
2953 v->dest_reg = tem;
2955 else
2956 v->dest_reg = tem;
2958 /* If the address hasn't been checked for validity yet, do so
2959 now, and fail completely if either the first or the last
2960 unrolled copy of the address is not a valid address
2961 for the instruction that uses it. */
2962 if (v->dest_reg == tem
2963 && ! verify_addresses (v, giv_inc, unroll_number))
2965 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2966 if (v2->same_insn == v)
2967 v2->same_insn = 0;
2969 if (loop_dump_stream)
2970 fprintf (loop_dump_stream,
2971 "Invalid address for giv at insn %d\n",
2972 INSN_UID (v->insn));
2973 continue;
2976 v->new_reg = new_reg;
2977 v->same = same;
2979 /* We set this after the address check, to guarantee that
2980 the register will be initialized. */
2981 v->unrolled = 1;
2983 /* To initialize the new register, just move the value of
2984 new_reg into it. This is not guaranteed to give a valid
2985 instruction on machines with complex addressing modes.
2986 If we can't recognize it, then delete it and emit insns
2987 to calculate the value from scratch. */
2988 loop_insn_hoist (loop, gen_rtx_SET (VOIDmode, tem,
2989 copy_rtx (v->new_reg)));
2990 if (recog_memoized (PREV_INSN (loop->start)) < 0)
2992 rtx sequence, ret;
2994 /* We can't use bl->initial_value to compute the initial
2995 value, because the loop may have been preconditioned.
2996 We must calculate it from NEW_REG. */
2997 delete_related_insns (PREV_INSN (loop->start));
2999 start_sequence ();
3000 ret = force_operand (v->new_reg, tem);
3001 if (ret != tem)
3002 emit_move_insn (tem, ret);
3003 sequence = gen_sequence ();
3004 end_sequence ();
3005 loop_insn_hoist (loop, sequence);
3007 if (loop_dump_stream)
3008 fprintf (loop_dump_stream,
3009 "Invalid init insn, rewritten.\n");
3012 else
3014 v->dest_reg = value;
3016 /* Check the resulting address for validity, and fail
3017 if the resulting address would be invalid. */
3018 if (! verify_addresses (v, giv_inc, unroll_number))
3020 for (v2 = v->next_iv; v2; v2 = v2->next_iv)
3021 if (v2->same_insn == v)
3022 v2->same_insn = 0;
3024 if (loop_dump_stream)
3025 fprintf (loop_dump_stream,
3026 "Invalid address for giv at insn %d\n",
3027 INSN_UID (v->insn));
3028 continue;
3032 /* Store the value of dest_reg into the insn. This sharing
3033 will not be a problem as this insn will always be copied
3034 later. */
3036 *v->location = v->dest_reg;
3038 /* If this address giv is combined with a dest reg giv, then
3039 save the base giv's induction pointer so that we will be
3040 able to handle this address giv properly. The base giv
3041 itself does not have to be splittable. */
3043 if (v->same && v->same->giv_type == DEST_REG)
3044 addr_combined_regs[REGNO (v->same->new_reg)] = v->same;
3046 if (GET_CODE (v->new_reg) == REG)
3048 /* This giv maybe hasn't been combined with any others.
3049 Make sure that it's giv is marked as splittable here. */
3051 splittable_regs[REGNO (v->new_reg)] = value;
3053 /* Make it appear to depend upon itself, so that the
3054 giv will be properly split in the main loop above. */
3055 if (! v->same)
3057 v->same = v;
3058 addr_combined_regs[REGNO (v->new_reg)] = v;
3062 if (loop_dump_stream)
3063 fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n");
3066 else
3068 #if 0
3069 /* Currently, unreduced giv's can't be split. This is not too much
3070 of a problem since unreduced giv's are not live across loop
3071 iterations anyways. When unrolling a loop completely though,
3072 it makes sense to reduce&split givs when possible, as this will
3073 result in simpler instructions, and will not require that a reg
3074 be live across loop iterations. */
3076 splittable_regs[REGNO (v->dest_reg)] = value;
3077 fprintf (stderr, "Giv %d at insn %d not reduced\n",
3078 REGNO (v->dest_reg), INSN_UID (v->insn));
3079 #else
3080 continue;
3081 #endif
3084 /* Unreduced givs are only updated once by definition. Reduced givs
3085 are updated as many times as their biv is. Mark it so if this is
3086 a splittable register. Don't need to do anything for address givs
3087 where this may not be a register. */
3089 if (GET_CODE (v->new_reg) == REG)
3091 int count = 1;
3092 if (! v->ignore)
3093 count = REG_IV_CLASS (ivs, REGNO (v->src_reg))->biv_count;
3095 splittable_regs_updates[REGNO (v->new_reg)] = count;
3098 result++;
3100 if (loop_dump_stream)
3102 int regnum;
3104 if (GET_CODE (v->dest_reg) == CONST_INT)
3105 regnum = -1;
3106 else if (GET_CODE (v->dest_reg) != REG)
3107 regnum = REGNO (XEXP (v->dest_reg, 0));
3108 else
3109 regnum = REGNO (v->dest_reg);
3110 fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
3111 regnum, INSN_UID (v->insn));
3115 return result;
3118 /* Try to prove that the register is dead after the loop exits. Trace every
3119 loop exit looking for an insn that will always be executed, which sets
3120 the register to some value, and appears before the first use of the register
3121 is found. If successful, then return 1, otherwise return 0. */
3123 /* ?? Could be made more intelligent in the handling of jumps, so that
3124 it can search past if statements and other similar structures. */
3126 static int
3127 reg_dead_after_loop (loop, reg)
3128 const struct loop *loop;
3129 rtx reg;
3131 rtx insn, label;
3132 enum rtx_code code;
3133 int jump_count = 0;
3134 int label_count = 0;
3136 /* In addition to checking all exits of this loop, we must also check
3137 all exits of inner nested loops that would exit this loop. We don't
3138 have any way to identify those, so we just give up if there are any
3139 such inner loop exits. */
3141 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
3142 label_count++;
3144 if (label_count != loop->exit_count)
3145 return 0;
3147 /* HACK: Must also search the loop fall through exit, create a label_ref
3148 here which points to the loop->end, and append the loop_number_exit_labels
3149 list to it. */
3150 label = gen_rtx_LABEL_REF (VOIDmode, loop->end);
3151 LABEL_NEXTREF (label) = loop->exit_labels;
3153 for (; label; label = LABEL_NEXTREF (label))
3155 /* Succeed if find an insn which sets the biv or if reach end of
3156 function. Fail if find an insn that uses the biv, or if come to
3157 a conditional jump. */
3159 insn = NEXT_INSN (XEXP (label, 0));
3160 while (insn)
3162 code = GET_CODE (insn);
3163 if (GET_RTX_CLASS (code) == 'i')
3165 rtx set;
3167 if (reg_referenced_p (reg, PATTERN (insn)))
3168 return 0;
3170 set = single_set (insn);
3171 if (set && rtx_equal_p (SET_DEST (set), reg))
3172 break;
3175 if (code == JUMP_INSN)
3177 if (GET_CODE (PATTERN (insn)) == RETURN)
3178 break;
3179 else if (!any_uncondjump_p (insn)
3180 /* Prevent infinite loop following infinite loops. */
3181 || jump_count++ > 20)
3182 return 0;
3183 else
3184 insn = JUMP_LABEL (insn);
3187 insn = NEXT_INSN (insn);
3191 /* Success, the register is dead on all loop exits. */
3192 return 1;
3195 /* Try to calculate the final value of the biv, the value it will have at
3196 the end of the loop. If we can do it, return that value. */
3199 final_biv_value (loop, bl)
3200 const struct loop *loop;
3201 struct iv_class *bl;
3203 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
3204 rtx increment, tem;
3206 /* ??? This only works for MODE_INT biv's. Reject all others for now. */
3208 if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
3209 return 0;
3211 /* The final value for reversed bivs must be calculated differently than
3212 for ordinary bivs. In this case, there is already an insn after the
3213 loop which sets this biv's final value (if necessary), and there are
3214 no other loop exits, so we can return any value. */
3215 if (bl->reversed)
3217 if (loop_dump_stream)
3218 fprintf (loop_dump_stream,
3219 "Final biv value for %d, reversed biv.\n", bl->regno);
3221 return const0_rtx;
3224 /* Try to calculate the final value as initial value + (number of iterations
3225 * increment). For this to work, increment must be invariant, the only
3226 exit from the loop must be the fall through at the bottom (otherwise
3227 it may not have its final value when the loop exits), and the initial
3228 value of the biv must be invariant. */
3230 if (n_iterations != 0
3231 && ! loop->exit_count
3232 && loop_invariant_p (loop, bl->initial_value))
3234 increment = biv_total_increment (bl);
3236 if (increment && loop_invariant_p (loop, increment))
3238 /* Can calculate the loop exit value, emit insns after loop
3239 end to calculate this value into a temporary register in
3240 case it is needed later. */
3242 tem = gen_reg_rtx (bl->biv->mode);
3243 record_base_value (REGNO (tem), bl->biv->add_val, 0);
3244 loop_iv_add_mult_sink (loop, increment, GEN_INT (n_iterations),
3245 bl->initial_value, tem);
3247 if (loop_dump_stream)
3248 fprintf (loop_dump_stream,
3249 "Final biv value for %d, calculated.\n", bl->regno);
3251 return tem;
3255 /* Check to see if the biv is dead at all loop exits. */
3256 if (reg_dead_after_loop (loop, bl->biv->src_reg))
3258 if (loop_dump_stream)
3259 fprintf (loop_dump_stream,
3260 "Final biv value for %d, biv dead after loop exit.\n",
3261 bl->regno);
3263 return const0_rtx;
3266 return 0;
3269 /* Try to calculate the final value of the giv, the value it will have at
3270 the end of the loop. If we can do it, return that value. */
3273 final_giv_value (loop, v)
3274 const struct loop *loop;
3275 struct induction *v;
3277 struct loop_ivs *ivs = LOOP_IVS (loop);
3278 struct iv_class *bl;
3279 rtx insn;
3280 rtx increment, tem;
3281 rtx seq;
3282 rtx loop_end = loop->end;
3283 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
3285 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
3287 /* The final value for givs which depend on reversed bivs must be calculated
3288 differently than for ordinary givs. In this case, there is already an
3289 insn after the loop which sets this giv's final value (if necessary),
3290 and there are no other loop exits, so we can return any value. */
3291 if (bl->reversed)
3293 if (loop_dump_stream)
3294 fprintf (loop_dump_stream,
3295 "Final giv value for %d, depends on reversed biv\n",
3296 REGNO (v->dest_reg));
3297 return const0_rtx;
3300 /* Try to calculate the final value as a function of the biv it depends
3301 upon. The only exit from the loop must be the fall through at the bottom
3302 (otherwise it may not have its final value when the loop exits). */
3304 /* ??? Can calculate the final giv value by subtracting off the
3305 extra biv increments times the giv's mult_val. The loop must have
3306 only one exit for this to work, but the loop iterations does not need
3307 to be known. */
3309 if (n_iterations != 0
3310 && ! loop->exit_count)
3312 /* ?? It is tempting to use the biv's value here since these insns will
3313 be put after the loop, and hence the biv will have its final value
3314 then. However, this fails if the biv is subsequently eliminated.
3315 Perhaps determine whether biv's are eliminable before trying to
3316 determine whether giv's are replaceable so that we can use the
3317 biv value here if it is not eliminable. */
3319 /* We are emitting code after the end of the loop, so we must make
3320 sure that bl->initial_value is still valid then. It will still
3321 be valid if it is invariant. */
3323 increment = biv_total_increment (bl);
3325 if (increment && loop_invariant_p (loop, increment)
3326 && loop_invariant_p (loop, bl->initial_value))
3328 /* Can calculate the loop exit value of its biv as
3329 (n_iterations * increment) + initial_value */
3331 /* The loop exit value of the giv is then
3332 (final_biv_value - extra increments) * mult_val + add_val.
3333 The extra increments are any increments to the biv which
3334 occur in the loop after the giv's value is calculated.
3335 We must search from the insn that sets the giv to the end
3336 of the loop to calculate this value. */
3338 /* Put the final biv value in tem. */
3339 tem = gen_reg_rtx (v->mode);
3340 record_base_value (REGNO (tem), bl->biv->add_val, 0);
3341 loop_iv_add_mult_sink (loop, extend_value_for_giv (v, increment),
3342 GEN_INT (n_iterations),
3343 extend_value_for_giv (v, bl->initial_value),
3344 tem);
3346 /* Subtract off extra increments as we find them. */
3347 for (insn = NEXT_INSN (v->insn); insn != loop_end;
3348 insn = NEXT_INSN (insn))
3350 struct induction *biv;
3352 for (biv = bl->biv; biv; biv = biv->next_iv)
3353 if (biv->insn == insn)
3355 start_sequence ();
3356 tem = expand_simple_binop (GET_MODE (tem), MINUS, tem,
3357 biv->add_val, NULL_RTX, 0,
3358 OPTAB_LIB_WIDEN);
3359 seq = gen_sequence ();
3360 end_sequence ();
3361 loop_insn_sink (loop, seq);
3365 /* Now calculate the giv's final value. */
3366 loop_iv_add_mult_sink (loop, tem, v->mult_val, v->add_val, tem);
3368 if (loop_dump_stream)
3369 fprintf (loop_dump_stream,
3370 "Final giv value for %d, calc from biv's value.\n",
3371 REGNO (v->dest_reg));
3373 return tem;
3377 /* Replaceable giv's should never reach here. */
3378 if (v->replaceable)
3379 abort ();
3381 /* Check to see if the biv is dead at all loop exits. */
3382 if (reg_dead_after_loop (loop, v->dest_reg))
3384 if (loop_dump_stream)
3385 fprintf (loop_dump_stream,
3386 "Final giv value for %d, giv dead after loop exit.\n",
3387 REGNO (v->dest_reg));
3389 return const0_rtx;
3392 return 0;
3395 /* Look back before LOOP->START for the insn that sets REG and return
3396 the equivalent constant if there is a REG_EQUAL note otherwise just
3397 the SET_SRC of REG. */
3399 static rtx
3400 loop_find_equiv_value (loop, reg)
3401 const struct loop *loop;
3402 rtx reg;
3404 rtx loop_start = loop->start;
3405 rtx insn, set;
3406 rtx ret;
3408 ret = reg;
3409 for (insn = PREV_INSN (loop_start); insn; insn = PREV_INSN (insn))
3411 if (GET_CODE (insn) == CODE_LABEL)
3412 break;
3414 else if (INSN_P (insn) && reg_set_p (reg, insn))
3416 /* We found the last insn before the loop that sets the register.
3417 If it sets the entire register, and has a REG_EQUAL note,
3418 then use the value of the REG_EQUAL note. */
3419 if ((set = single_set (insn))
3420 && (SET_DEST (set) == reg))
3422 rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3424 /* Only use the REG_EQUAL note if it is a constant.
3425 Other things, divide in particular, will cause
3426 problems later if we use them. */
3427 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
3428 && CONSTANT_P (XEXP (note, 0)))
3429 ret = XEXP (note, 0);
3430 else
3431 ret = SET_SRC (set);
3433 /* We cannot do this if it changes between the
3434 assignment and loop start though. */
3435 if (modified_between_p (ret, insn, loop_start))
3436 ret = reg;
3438 break;
3441 return ret;
3444 /* Return a simplified rtx for the expression OP - REG.
3446 REG must appear in OP, and OP must be a register or the sum of a register
3447 and a second term.
3449 Thus, the return value must be const0_rtx or the second term.
3451 The caller is responsible for verifying that REG appears in OP and OP has
3452 the proper form. */
3454 static rtx
3455 subtract_reg_term (op, reg)
3456 rtx op, reg;
3458 if (op == reg)
3459 return const0_rtx;
3460 if (GET_CODE (op) == PLUS)
3462 if (XEXP (op, 0) == reg)
3463 return XEXP (op, 1);
3464 else if (XEXP (op, 1) == reg)
3465 return XEXP (op, 0);
3467 /* OP does not contain REG as a term. */
3468 abort ();
3471 /* Find and return register term common to both expressions OP0 and
3472 OP1 or NULL_RTX if no such term exists. Each expression must be a
3473 REG or a PLUS of a REG. */
3475 static rtx
3476 find_common_reg_term (op0, op1)
3477 rtx op0, op1;
3479 if ((GET_CODE (op0) == REG || GET_CODE (op0) == PLUS)
3480 && (GET_CODE (op1) == REG || GET_CODE (op1) == PLUS))
3482 rtx op00;
3483 rtx op01;
3484 rtx op10;
3485 rtx op11;
3487 if (GET_CODE (op0) == PLUS)
3488 op01 = XEXP (op0, 1), op00 = XEXP (op0, 0);
3489 else
3490 op01 = const0_rtx, op00 = op0;
3492 if (GET_CODE (op1) == PLUS)
3493 op11 = XEXP (op1, 1), op10 = XEXP (op1, 0);
3494 else
3495 op11 = const0_rtx, op10 = op1;
3497 /* Find and return common register term if present. */
3498 if (REG_P (op00) && (op00 == op10 || op00 == op11))
3499 return op00;
3500 else if (REG_P (op01) && (op01 == op10 || op01 == op11))
3501 return op01;
3504 /* No common register term found. */
3505 return NULL_RTX;
3508 /* Determine the loop iterator and calculate the number of loop
3509 iterations. Returns the exact number of loop iterations if it can
3510 be calculated, otherwise returns zero. */
3512 unsigned HOST_WIDE_INT
3513 loop_iterations (loop)
3514 struct loop *loop;
3516 struct loop_info *loop_info = LOOP_INFO (loop);
3517 struct loop_ivs *ivs = LOOP_IVS (loop);
3518 rtx comparison, comparison_value;
3519 rtx iteration_var, initial_value, increment, final_value;
3520 enum rtx_code comparison_code;
3521 HOST_WIDE_INT inc;
3522 unsigned HOST_WIDE_INT abs_inc;
3523 unsigned HOST_WIDE_INT abs_diff;
3524 int off_by_one;
3525 int increment_dir;
3526 int unsigned_p, compare_dir, final_larger;
3527 rtx last_loop_insn;
3528 rtx reg_term;
3529 struct iv_class *bl;
3531 loop_info->n_iterations = 0;
3532 loop_info->initial_value = 0;
3533 loop_info->initial_equiv_value = 0;
3534 loop_info->comparison_value = 0;
3535 loop_info->final_value = 0;
3536 loop_info->final_equiv_value = 0;
3537 loop_info->increment = 0;
3538 loop_info->iteration_var = 0;
3539 loop_info->unroll_number = 1;
3540 loop_info->iv = 0;
3542 /* We used to use prev_nonnote_insn here, but that fails because it might
3543 accidentally get the branch for a contained loop if the branch for this
3544 loop was deleted. We can only trust branches immediately before the
3545 loop_end. */
3546 last_loop_insn = PREV_INSN (loop->end);
3548 /* ??? We should probably try harder to find the jump insn
3549 at the end of the loop. The following code assumes that
3550 the last loop insn is a jump to the top of the loop. */
3551 if (GET_CODE (last_loop_insn) != JUMP_INSN)
3553 if (loop_dump_stream)
3554 fprintf (loop_dump_stream,
3555 "Loop iterations: No final conditional branch found.\n");
3556 return 0;
3559 /* If there is a more than a single jump to the top of the loop
3560 we cannot (easily) determine the iteration count. */
3561 if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1)
3563 if (loop_dump_stream)
3564 fprintf (loop_dump_stream,
3565 "Loop iterations: Loop has multiple back edges.\n");
3566 return 0;
3569 /* If there are multiple conditionalized loop exit tests, they may jump
3570 back to differing CODE_LABELs. */
3571 if (loop->top && loop->cont)
3573 rtx temp = PREV_INSN (last_loop_insn);
3577 if (GET_CODE (temp) == JUMP_INSN)
3579 /* There are some kinds of jumps we can't deal with easily. */
3580 if (JUMP_LABEL (temp) == 0)
3582 if (loop_dump_stream)
3583 fprintf
3584 (loop_dump_stream,
3585 "Loop iterations: Jump insn has null JUMP_LABEL.\n");
3586 return 0;
3589 if (/* Previous unrolling may have generated new insns not
3590 covered by the uid_luid array. */
3591 INSN_UID (JUMP_LABEL (temp)) < max_uid_for_loop
3592 /* Check if we jump back into the loop body. */
3593 && INSN_LUID (JUMP_LABEL (temp)) > INSN_LUID (loop->top)
3594 && INSN_LUID (JUMP_LABEL (temp)) < INSN_LUID (loop->cont))
3596 if (loop_dump_stream)
3597 fprintf
3598 (loop_dump_stream,
3599 "Loop iterations: Loop has multiple back edges.\n");
3600 return 0;
3604 while ((temp = PREV_INSN (temp)) != loop->cont);
3607 /* Find the iteration variable. If the last insn is a conditional
3608 branch, and the insn before tests a register value, make that the
3609 iteration variable. */
3611 comparison = get_condition_for_loop (loop, last_loop_insn);
3612 if (comparison == 0)
3614 if (loop_dump_stream)
3615 fprintf (loop_dump_stream,
3616 "Loop iterations: No final comparison found.\n");
3617 return 0;
3620 /* ??? Get_condition may switch position of induction variable and
3621 invariant register when it canonicalizes the comparison. */
3623 comparison_code = GET_CODE (comparison);
3624 iteration_var = XEXP (comparison, 0);
3625 comparison_value = XEXP (comparison, 1);
3627 if (GET_CODE (iteration_var) != REG)
3629 if (loop_dump_stream)
3630 fprintf (loop_dump_stream,
3631 "Loop iterations: Comparison not against register.\n");
3632 return 0;
3635 /* The only new registers that are created before loop iterations
3636 are givs made from biv increments or registers created by
3637 load_mems. In the latter case, it is possible that try_copy_prop
3638 will propagate a new pseudo into the old iteration register but
3639 this will be marked by having the REG_USERVAR_P bit set. */
3641 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs
3642 && ! REG_USERVAR_P (iteration_var))
3643 abort ();
3645 /* Determine the initial value of the iteration variable, and the amount
3646 that it is incremented each loop. Use the tables constructed by
3647 the strength reduction pass to calculate these values. */
3649 /* Clear the result values, in case no answer can be found. */
3650 initial_value = 0;
3651 increment = 0;
3653 /* The iteration variable can be either a giv or a biv. Check to see
3654 which it is, and compute the variable's initial value, and increment
3655 value if possible. */
3657 /* If this is a new register, can't handle it since we don't have any
3658 reg_iv_type entry for it. */
3659 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs)
3661 if (loop_dump_stream)
3662 fprintf (loop_dump_stream,
3663 "Loop iterations: No reg_iv_type entry for iteration var.\n");
3664 return 0;
3667 /* Reject iteration variables larger than the host wide int size, since they
3668 could result in a number of iterations greater than the range of our
3669 `unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */
3670 else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var))
3671 > HOST_BITS_PER_WIDE_INT))
3673 if (loop_dump_stream)
3674 fprintf (loop_dump_stream,
3675 "Loop iterations: Iteration var rejected because mode too large.\n");
3676 return 0;
3678 else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
3680 if (loop_dump_stream)
3681 fprintf (loop_dump_stream,
3682 "Loop iterations: Iteration var not an integer.\n");
3683 return 0;
3685 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT)
3687 if (REGNO (iteration_var) >= ivs->n_regs)
3688 abort ();
3690 /* Grab initial value, only useful if it is a constant. */
3691 bl = REG_IV_CLASS (ivs, REGNO (iteration_var));
3692 initial_value = bl->initial_value;
3693 if (!bl->biv->always_executed || bl->biv->maybe_multiple)
3695 if (loop_dump_stream)
3696 fprintf (loop_dump_stream,
3697 "Loop iterations: Basic induction var not set once in each iteration.\n");
3698 return 0;
3701 increment = biv_total_increment (bl);
3703 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT)
3705 HOST_WIDE_INT offset = 0;
3706 struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var));
3707 rtx biv_initial_value;
3709 if (REGNO (v->src_reg) >= ivs->n_regs)
3710 abort ();
3712 if (!v->always_executed || v->maybe_multiple)
3714 if (loop_dump_stream)
3715 fprintf (loop_dump_stream,
3716 "Loop iterations: General induction var not set once in each iteration.\n");
3717 return 0;
3720 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
3722 /* Increment value is mult_val times the increment value of the biv. */
3724 increment = biv_total_increment (bl);
3725 if (increment)
3727 struct induction *biv_inc;
3729 increment = fold_rtx_mult_add (v->mult_val,
3730 extend_value_for_giv (v, increment),
3731 const0_rtx, v->mode);
3732 /* The caller assumes that one full increment has occurred at the
3733 first loop test. But that's not true when the biv is incremented
3734 after the giv is set (which is the usual case), e.g.:
3735 i = 6; do {;} while (i++ < 9) .
3736 Therefore, we bias the initial value by subtracting the amount of
3737 the increment that occurs between the giv set and the giv test. */
3738 for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv)
3740 if (loop_insn_first_p (v->insn, biv_inc->insn))
3742 if (REG_P (biv_inc->add_val))
3744 if (loop_dump_stream)
3745 fprintf (loop_dump_stream,
3746 "Loop iterations: Basic induction var add_val is REG %d.\n",
3747 REGNO (biv_inc->add_val));
3748 return 0;
3751 offset -= INTVAL (biv_inc->add_val);
3755 if (loop_dump_stream)
3756 fprintf (loop_dump_stream,
3757 "Loop iterations: Giv iterator, initial value bias %ld.\n",
3758 (long) offset);
3760 /* Initial value is mult_val times the biv's initial value plus
3761 add_val. Only useful if it is a constant. */
3762 biv_initial_value = extend_value_for_giv (v, bl->initial_value);
3763 initial_value
3764 = fold_rtx_mult_add (v->mult_val,
3765 plus_constant (biv_initial_value, offset),
3766 v->add_val, v->mode);
3768 else
3770 if (loop_dump_stream)
3771 fprintf (loop_dump_stream,
3772 "Loop iterations: Not basic or general induction var.\n");
3773 return 0;
3776 if (initial_value == 0)
3777 return 0;
3779 unsigned_p = 0;
3780 off_by_one = 0;
3781 switch (comparison_code)
3783 case LEU:
3784 unsigned_p = 1;
3785 case LE:
3786 compare_dir = 1;
3787 off_by_one = 1;
3788 break;
3789 case GEU:
3790 unsigned_p = 1;
3791 case GE:
3792 compare_dir = -1;
3793 off_by_one = -1;
3794 break;
3795 case EQ:
3796 /* Cannot determine loop iterations with this case. */
3797 compare_dir = 0;
3798 break;
3799 case LTU:
3800 unsigned_p = 1;
3801 case LT:
3802 compare_dir = 1;
3803 break;
3804 case GTU:
3805 unsigned_p = 1;
3806 case GT:
3807 compare_dir = -1;
3808 case NE:
3809 compare_dir = 0;
3810 break;
3811 default:
3812 abort ();
3815 /* If the comparison value is an invariant register, then try to find
3816 its value from the insns before the start of the loop. */
3818 final_value = comparison_value;
3819 if (GET_CODE (comparison_value) == REG
3820 && loop_invariant_p (loop, comparison_value))
3822 final_value = loop_find_equiv_value (loop, comparison_value);
3824 /* If we don't get an invariant final value, we are better
3825 off with the original register. */
3826 if (! loop_invariant_p (loop, final_value))
3827 final_value = comparison_value;
3830 /* Calculate the approximate final value of the induction variable
3831 (on the last successful iteration). The exact final value
3832 depends on the branch operator, and increment sign. It will be
3833 wrong if the iteration variable is not incremented by one each
3834 time through the loop and (comparison_value + off_by_one -
3835 initial_value) % increment != 0.
3836 ??? Note that the final_value may overflow and thus final_larger
3837 will be bogus. A potentially infinite loop will be classified
3838 as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */
3839 if (off_by_one)
3840 final_value = plus_constant (final_value, off_by_one);
3842 /* Save the calculated values describing this loop's bounds, in case
3843 precondition_loop_p will need them later. These values can not be
3844 recalculated inside precondition_loop_p because strength reduction
3845 optimizations may obscure the loop's structure.
3847 These values are only required by precondition_loop_p and insert_bct
3848 whenever the number of iterations cannot be computed at compile time.
3849 Only the difference between final_value and initial_value is
3850 important. Note that final_value is only approximate. */
3851 loop_info->initial_value = initial_value;
3852 loop_info->comparison_value = comparison_value;
3853 loop_info->final_value = plus_constant (comparison_value, off_by_one);
3854 loop_info->increment = increment;
3855 loop_info->iteration_var = iteration_var;
3856 loop_info->comparison_code = comparison_code;
3857 loop_info->iv = bl;
3859 /* Try to determine the iteration count for loops such
3860 as (for i = init; i < init + const; i++). When running the
3861 loop optimization twice, the first pass often converts simple
3862 loops into this form. */
3864 if (REG_P (initial_value))
3866 rtx reg1;
3867 rtx reg2;
3868 rtx const2;
3870 reg1 = initial_value;
3871 if (GET_CODE (final_value) == PLUS)
3872 reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1);
3873 else
3874 reg2 = final_value, const2 = const0_rtx;
3876 /* Check for initial_value = reg1, final_value = reg2 + const2,
3877 where reg1 != reg2. */
3878 if (REG_P (reg2) && reg2 != reg1)
3880 rtx temp;
3882 /* Find what reg1 is equivalent to. Hopefully it will
3883 either be reg2 or reg2 plus a constant. */
3884 temp = loop_find_equiv_value (loop, reg1);
3886 if (find_common_reg_term (temp, reg2))
3887 initial_value = temp;
3888 else
3890 /* Find what reg2 is equivalent to. Hopefully it will
3891 either be reg1 or reg1 plus a constant. Let's ignore
3892 the latter case for now since it is not so common. */
3893 temp = loop_find_equiv_value (loop, reg2);
3895 if (temp == loop_info->iteration_var)
3896 temp = initial_value;
3897 if (temp == reg1)
3898 final_value = (const2 == const0_rtx)
3899 ? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2);
3902 else if (loop->vtop && GET_CODE (reg2) == CONST_INT)
3904 rtx temp;
3906 /* When running the loop optimizer twice, check_dbra_loop
3907 further obfuscates reversible loops of the form:
3908 for (i = init; i < init + const; i++). We often end up with
3909 final_value = 0, initial_value = temp, temp = temp2 - init,
3910 where temp2 = init + const. If the loop has a vtop we
3911 can replace initial_value with const. */
3913 temp = loop_find_equiv_value (loop, reg1);
3915 if (GET_CODE (temp) == MINUS && REG_P (XEXP (temp, 0)))
3917 rtx temp2 = loop_find_equiv_value (loop, XEXP (temp, 0));
3919 if (GET_CODE (temp2) == PLUS
3920 && XEXP (temp2, 0) == XEXP (temp, 1))
3921 initial_value = XEXP (temp2, 1);
3926 /* If have initial_value = reg + const1 and final_value = reg +
3927 const2, then replace initial_value with const1 and final_value
3928 with const2. This should be safe since we are protected by the
3929 initial comparison before entering the loop if we have a vtop.
3930 For example, a + b < a + c is not equivalent to b < c for all a
3931 when using modulo arithmetic.
3933 ??? Without a vtop we could still perform the optimization if we check
3934 the initial and final values carefully. */
3935 if (loop->vtop
3936 && (reg_term = find_common_reg_term (initial_value, final_value)))
3938 initial_value = subtract_reg_term (initial_value, reg_term);
3939 final_value = subtract_reg_term (final_value, reg_term);
3942 loop_info->initial_equiv_value = initial_value;
3943 loop_info->final_equiv_value = final_value;
3945 /* For EQ comparison loops, we don't have a valid final value.
3946 Check this now so that we won't leave an invalid value if we
3947 return early for any other reason. */
3948 if (comparison_code == EQ)
3949 loop_info->final_equiv_value = loop_info->final_value = 0;
3951 if (increment == 0)
3953 if (loop_dump_stream)
3954 fprintf (loop_dump_stream,
3955 "Loop iterations: Increment value can't be calculated.\n");
3956 return 0;
3959 if (GET_CODE (increment) != CONST_INT)
3961 /* If we have a REG, check to see if REG holds a constant value. */
3962 /* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't
3963 clear if it is worthwhile to try to handle such RTL. */
3964 if (GET_CODE (increment) == REG || GET_CODE (increment) == SUBREG)
3965 increment = loop_find_equiv_value (loop, increment);
3967 if (GET_CODE (increment) != CONST_INT)
3969 if (loop_dump_stream)
3971 fprintf (loop_dump_stream,
3972 "Loop iterations: Increment value not constant ");
3973 print_simple_rtl (loop_dump_stream, increment);
3974 fprintf (loop_dump_stream, ".\n");
3976 return 0;
3978 loop_info->increment = increment;
3981 if (GET_CODE (initial_value) != CONST_INT)
3983 if (loop_dump_stream)
3985 fprintf (loop_dump_stream,
3986 "Loop iterations: Initial value not constant ");
3987 print_simple_rtl (loop_dump_stream, initial_value);
3988 fprintf (loop_dump_stream, ".\n");
3990 return 0;
3992 else if (comparison_code == EQ)
3994 if (loop_dump_stream)
3995 fprintf (loop_dump_stream, "Loop iterations: EQ comparison loop.\n");
3996 return 0;
3998 else if (GET_CODE (final_value) != CONST_INT)
4000 if (loop_dump_stream)
4002 fprintf (loop_dump_stream,
4003 "Loop iterations: Final value not constant ");
4004 print_simple_rtl (loop_dump_stream, final_value);
4005 fprintf (loop_dump_stream, ".\n");
4007 return 0;
4010 /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
4011 if (unsigned_p)
4012 final_larger
4013 = ((unsigned HOST_WIDE_INT) INTVAL (final_value)
4014 > (unsigned HOST_WIDE_INT) INTVAL (initial_value))
4015 - ((unsigned HOST_WIDE_INT) INTVAL (final_value)
4016 < (unsigned HOST_WIDE_INT) INTVAL (initial_value));
4017 else
4018 final_larger = (INTVAL (final_value) > INTVAL (initial_value))
4019 - (INTVAL (final_value) < INTVAL (initial_value));
4021 if (INTVAL (increment) > 0)
4022 increment_dir = 1;
4023 else if (INTVAL (increment) == 0)
4024 increment_dir = 0;
4025 else
4026 increment_dir = -1;
4028 /* There are 27 different cases: compare_dir = -1, 0, 1;
4029 final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
4030 There are 4 normal cases, 4 reverse cases (where the iteration variable
4031 will overflow before the loop exits), 4 infinite loop cases, and 15
4032 immediate exit (0 or 1 iteration depending on loop type) cases.
4033 Only try to optimize the normal cases. */
4035 /* (compare_dir/final_larger/increment_dir)
4036 Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
4037 Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
4038 Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
4039 Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
4041 /* ?? If the meaning of reverse loops (where the iteration variable
4042 will overflow before the loop exits) is undefined, then could
4043 eliminate all of these special checks, and just always assume
4044 the loops are normal/immediate/infinite. Note that this means
4045 the sign of increment_dir does not have to be known. Also,
4046 since it does not really hurt if immediate exit loops or infinite loops
4047 are optimized, then that case could be ignored also, and hence all
4048 loops can be optimized.
4050 According to ANSI Spec, the reverse loop case result is undefined,
4051 because the action on overflow is undefined.
4053 See also the special test for NE loops below. */
4055 if (final_larger == increment_dir && final_larger != 0
4056 && (final_larger == compare_dir || compare_dir == 0))
4057 /* Normal case. */
4059 else
4061 if (loop_dump_stream)
4062 fprintf (loop_dump_stream, "Loop iterations: Not normal loop.\n");
4063 return 0;
4066 /* Calculate the number of iterations, final_value is only an approximation,
4067 so correct for that. Note that abs_diff and n_iterations are
4068 unsigned, because they can be as large as 2^n - 1. */
4070 inc = INTVAL (increment);
4071 if (inc > 0)
4073 abs_diff = INTVAL (final_value) - INTVAL (initial_value);
4074 abs_inc = inc;
4076 else if (inc < 0)
4078 abs_diff = INTVAL (initial_value) - INTVAL (final_value);
4079 abs_inc = -inc;
4081 else
4082 abort ();
4084 /* Given that iteration_var is going to iterate over its own mode,
4085 not HOST_WIDE_INT, disregard higher bits that might have come
4086 into the picture due to sign extension of initial and final
4087 values. */
4088 abs_diff &= ((unsigned HOST_WIDE_INT) 1
4089 << (GET_MODE_BITSIZE (GET_MODE (iteration_var)) - 1)
4090 << 1) - 1;
4092 /* For NE tests, make sure that the iteration variable won't miss
4093 the final value. If abs_diff mod abs_incr is not zero, then the
4094 iteration variable will overflow before the loop exits, and we
4095 can not calculate the number of iterations. */
4096 if (compare_dir == 0 && (abs_diff % abs_inc) != 0)
4097 return 0;
4099 /* Note that the number of iterations could be calculated using
4100 (abs_diff + abs_inc - 1) / abs_inc, provided care was taken to
4101 handle potential overflow of the summation. */
4102 loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0);
4103 return loop_info->n_iterations;
4106 /* Replace uses of split bivs with their split pseudo register. This is
4107 for original instructions which remain after loop unrolling without
4108 copying. */
4110 static rtx
4111 remap_split_bivs (loop, x)
4112 struct loop *loop;
4113 rtx x;
4115 struct loop_ivs *ivs = LOOP_IVS (loop);
4116 enum rtx_code code;
4117 int i;
4118 const char *fmt;
4120 if (x == 0)
4121 return x;
4123 code = GET_CODE (x);
4124 switch (code)
4126 case SCRATCH:
4127 case PC:
4128 case CC0:
4129 case CONST_INT:
4130 case CONST_DOUBLE:
4131 case CONST:
4132 case SYMBOL_REF:
4133 case LABEL_REF:
4134 return x;
4136 case REG:
4137 #if 0
4138 /* If non-reduced/final-value givs were split, then this would also
4139 have to remap those givs also. */
4140 #endif
4141 if (REGNO (x) < ivs->n_regs
4142 && REG_IV_TYPE (ivs, REGNO (x)) == BASIC_INDUCT)
4143 return REG_IV_CLASS (ivs, REGNO (x))->biv->src_reg;
4144 break;
4146 default:
4147 break;
4150 fmt = GET_RTX_FORMAT (code);
4151 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4153 if (fmt[i] == 'e')
4154 XEXP (x, i) = remap_split_bivs (loop, XEXP (x, i));
4155 else if (fmt[i] == 'E')
4157 int j;
4158 for (j = 0; j < XVECLEN (x, i); j++)
4159 XVECEXP (x, i, j) = remap_split_bivs (loop, XVECEXP (x, i, j));
4162 return x;
4165 /* If FIRST_UID is a set of REGNO, and FIRST_UID dominates LAST_UID (e.g.
4166 FIST_UID is always executed if LAST_UID is), then return 1. Otherwise
4167 return 0. COPY_START is where we can start looking for the insns
4168 FIRST_UID and LAST_UID. COPY_END is where we stop looking for these
4169 insns.
4171 If there is no JUMP_INSN between LOOP_START and FIRST_UID, then FIRST_UID
4172 must dominate LAST_UID.
4174 If there is a CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
4175 may not dominate LAST_UID.
4177 If there is no CODE_LABEL between FIRST_UID and LAST_UID, then FIRST_UID
4178 must dominate LAST_UID. */
4181 set_dominates_use (regno, first_uid, last_uid, copy_start, copy_end)
4182 int regno;
4183 int first_uid;
4184 int last_uid;
4185 rtx copy_start;
4186 rtx copy_end;
4188 int passed_jump = 0;
4189 rtx p = NEXT_INSN (copy_start);
4191 while (INSN_UID (p) != first_uid)
4193 if (GET_CODE (p) == JUMP_INSN)
4194 passed_jump = 1;
4195 /* Could not find FIRST_UID. */
4196 if (p == copy_end)
4197 return 0;
4198 p = NEXT_INSN (p);
4201 /* Verify that FIRST_UID is an insn that entirely sets REGNO. */
4202 if (! INSN_P (p) || ! dead_or_set_regno_p (p, regno))
4203 return 0;
4205 /* FIRST_UID is always executed. */
4206 if (passed_jump == 0)
4207 return 1;
4209 while (INSN_UID (p) != last_uid)
4211 /* If we see a CODE_LABEL between FIRST_UID and LAST_UID, then we
4212 can not be sure that FIRST_UID dominates LAST_UID. */
4213 if (GET_CODE (p) == CODE_LABEL)
4214 return 0;
4215 /* Could not find LAST_UID, but we reached the end of the loop, so
4216 it must be safe. */
4217 else if (p == copy_end)
4218 return 1;
4219 p = NEXT_INSN (p);
4222 /* FIRST_UID is always executed if LAST_UID is executed. */
4223 return 1;
4226 /* This routine is called when the number of iterations for the unrolled
4227 loop is one. The goal is to identify a loop that begins with an
4228 unconditional branch to the loop continuation note (or a label just after).
4229 In this case, the unconditional branch that starts the loop needs to be
4230 deleted so that we execute the single iteration. */
4232 static rtx
4233 ujump_to_loop_cont (loop_start, loop_cont)
4234 rtx loop_start;
4235 rtx loop_cont;
4237 rtx x, label, label_ref;
4239 /* See if loop start, or the next insn is an unconditional jump. */
4240 loop_start = next_nonnote_insn (loop_start);
4242 x = pc_set (loop_start);
4243 if (!x)
4244 return NULL_RTX;
4246 label_ref = SET_SRC (x);
4247 if (!label_ref)
4248 return NULL_RTX;
4250 /* Examine insn after loop continuation note. Return if not a label. */
4251 label = next_nonnote_insn (loop_cont);
4252 if (label == 0 || GET_CODE (label) != CODE_LABEL)
4253 return NULL_RTX;
4255 /* Return the loop start if the branch label matches the code label. */
4256 if (CODE_LABEL_NUMBER (label) == CODE_LABEL_NUMBER (XEXP (label_ref, 0)))
4257 return loop_start;
4258 else
4259 return NULL_RTX;