Merge from mainline (gomp-merge-2005-02-26).
[official-gcc.git] / gcc / loop.c
blob2b765c821d870487a655ef038df4141a2c130788
1 /* Perform various loop optimizations, including strength reduction.
2 Copyright (C) 1987, 1988, 1989, 1991, 1992, 1993, 1994, 1995,
3 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005
4 Free Software Foundation, Inc.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
23 /* This is the loop optimization pass of the compiler.
24 It finds invariant computations within loops and moves them
25 to the beginning of the loop. Then it identifies basic and
26 general induction variables.
28 Basic induction variables (BIVs) are a pseudo registers which are set within
29 a loop only by incrementing or decrementing its value. General induction
30 variables (GIVs) are pseudo registers with a value which is a linear function
31 of a basic induction variable. BIVs are recognized by `basic_induction_var';
32 GIVs by `general_induction_var'.
34 Once induction variables are identified, strength reduction is applied to the
35 general induction variables, and induction variable elimination is applied to
36 the basic induction variables.
38 It also finds cases where
39 a register is set within the loop by zero-extending a narrower value
40 and changes these to zero the entire register once before the loop
41 and merely copy the low part within the loop.
43 Most of the complexity is in heuristics to decide when it is worth
44 while to do these things. */
46 #include "config.h"
47 #include "system.h"
48 #include "coretypes.h"
49 #include "tm.h"
50 #include "rtl.h"
51 #include "tm_p.h"
52 #include "function.h"
53 #include "expr.h"
54 #include "hard-reg-set.h"
55 #include "basic-block.h"
56 #include "insn-config.h"
57 #include "regs.h"
58 #include "recog.h"
59 #include "flags.h"
60 #include "real.h"
61 #include "cselib.h"
62 #include "except.h"
63 #include "toplev.h"
64 #include "predict.h"
65 #include "insn-flags.h"
66 #include "optabs.h"
67 #include "cfgloop.h"
68 #include "ggc.h"
70 /* Get the loop info pointer of a loop. */
71 #define LOOP_INFO(LOOP) ((struct loop_info *) (LOOP)->aux)
73 /* Get a pointer to the loop movables structure. */
74 #define LOOP_MOVABLES(LOOP) (&LOOP_INFO (LOOP)->movables)
76 /* Get a pointer to the loop registers structure. */
77 #define LOOP_REGS(LOOP) (&LOOP_INFO (LOOP)->regs)
79 /* Get a pointer to the loop induction variables structure. */
80 #define LOOP_IVS(LOOP) (&LOOP_INFO (LOOP)->ivs)
82 /* Get the luid of an insn. Catch the error of trying to reference the LUID
83 of an insn added during loop, since these don't have LUIDs. */
85 #define INSN_LUID(INSN) \
86 (INSN_UID (INSN) < max_uid_for_loop ? uid_luid[INSN_UID (INSN)] \
87 : (abort (), -1))
89 #define REGNO_FIRST_LUID(REGNO) \
90 (REGNO_FIRST_UID (REGNO) < max_uid_for_loop \
91 ? uid_luid[REGNO_FIRST_UID (REGNO)] \
92 : 0)
93 #define REGNO_LAST_LUID(REGNO) \
94 (REGNO_LAST_UID (REGNO) < max_uid_for_loop \
95 ? uid_luid[REGNO_LAST_UID (REGNO)] \
96 : INT_MAX)
98 /* A "basic induction variable" or biv is a pseudo reg that is set
99 (within this loop) only by incrementing or decrementing it. */
100 /* A "general induction variable" or giv is a pseudo reg whose
101 value is a linear function of a biv. */
103 /* Bivs are recognized by `basic_induction_var';
104 Givs by `general_induction_var'. */
106 /* An enum for the two different types of givs, those that are used
107 as memory addresses and those that are calculated into registers. */
108 enum g_types
110 DEST_ADDR,
111 DEST_REG
115 /* A `struct induction' is created for every instruction that sets
116 an induction variable (either a biv or a giv). */
118 struct induction
120 rtx insn; /* The insn that sets a biv or giv */
121 rtx new_reg; /* New register, containing strength reduced
122 version of this giv. */
123 rtx src_reg; /* Biv from which this giv is computed.
124 (If this is a biv, then this is the biv.) */
125 enum g_types giv_type; /* Indicate whether DEST_ADDR or DEST_REG */
126 rtx dest_reg; /* Destination register for insn: this is the
127 register which was the biv or giv.
128 For a biv, this equals src_reg.
129 For a DEST_ADDR type giv, this is 0. */
130 rtx *location; /* Place in the insn where this giv occurs.
131 If GIV_TYPE is DEST_REG, this is 0. */
132 /* For a biv, this is the place where add_val
133 was found. */
134 enum machine_mode mode; /* The mode of this biv or giv */
135 rtx mem; /* For DEST_ADDR, the memory object. */
136 rtx mult_val; /* Multiplicative factor for src_reg. */
137 rtx add_val; /* Additive constant for that product. */
138 int benefit; /* Gain from eliminating this insn. */
139 rtx final_value; /* If the giv is used outside the loop, and its
140 final value could be calculated, it is put
141 here, and the giv is made replaceable. Set
142 the giv to this value before the loop. */
143 unsigned combined_with; /* The number of givs this giv has been
144 combined with. If nonzero, this giv
145 cannot combine with any other giv. */
146 unsigned replaceable : 1; /* 1 if we can substitute the strength-reduced
147 variable for the original variable.
148 0 means they must be kept separate and the
149 new one must be copied into the old pseudo
150 reg each time the old one is set. */
151 unsigned not_replaceable : 1; /* Used to prevent duplicating work. This is
152 1 if we know that the giv definitely can
153 not be made replaceable, in which case we
154 don't bother checking the variable again
155 even if further info is available.
156 Both this and the above can be zero. */
157 unsigned ignore : 1; /* 1 prohibits further processing of giv */
158 unsigned always_computable : 1;/* 1 if this value is computable every
159 iteration. */
160 unsigned always_executed : 1; /* 1 if this set occurs each iteration. */
161 unsigned maybe_multiple : 1; /* Only used for a biv and 1 if this biv
162 update may be done multiple times per
163 iteration. */
164 unsigned cant_derive : 1; /* For giv's, 1 if this giv cannot derive
165 another giv. This occurs in many cases
166 where a giv's lifetime spans an update to
167 a biv. */
168 unsigned maybe_dead : 1; /* 1 if this giv might be dead. In that case,
169 we won't use it to eliminate a biv, it
170 would probably lose. */
171 unsigned auto_inc_opt : 1; /* 1 if this giv had its increment output next
172 to it to try to form an auto-inc address. */
173 unsigned shared : 1;
174 unsigned no_const_addval : 1; /* 1 if add_val does not contain a const. */
175 int lifetime; /* Length of life of this giv */
176 rtx derive_adjustment; /* If nonzero, is an adjustment to be
177 subtracted from add_val when this giv
178 derives another. This occurs when the
179 giv spans a biv update by incrementation. */
180 rtx ext_dependent; /* If nonzero, is a sign or zero extension
181 if a biv on which this giv is dependent. */
182 struct induction *next_iv; /* For givs, links together all givs that are
183 based on the same biv. For bivs, links
184 together all biv entries that refer to the
185 same biv register. */
186 struct induction *same; /* For givs, if the giv has been combined with
187 another giv, this points to the base giv.
188 The base giv will have COMBINED_WITH nonzero.
189 For bivs, if the biv has the same LOCATION
190 than another biv, this points to the base
191 biv. */
192 struct induction *same_insn; /* If there are multiple identical givs in
193 the same insn, then all but one have this
194 field set, and they all point to the giv
195 that doesn't have this field set. */
196 rtx last_use; /* For a giv made from a biv increment, this is
197 a substitute for the lifetime information. */
201 /* A `struct iv_class' is created for each biv. */
203 struct iv_class
205 unsigned int regno; /* Pseudo reg which is the biv. */
206 int biv_count; /* Number of insns setting this reg. */
207 struct induction *biv; /* List of all insns that set this reg. */
208 int giv_count; /* Number of DEST_REG givs computed from this
209 biv. The resulting count is only used in
210 check_dbra_loop. */
211 struct induction *giv; /* List of all insns that compute a giv
212 from this reg. */
213 int total_benefit; /* Sum of BENEFITs of all those givs. */
214 rtx initial_value; /* Value of reg at loop start. */
215 rtx initial_test; /* Test performed on BIV before loop. */
216 rtx final_value; /* Value of reg at loop end, if known. */
217 struct iv_class *next; /* Links all class structures together. */
218 rtx init_insn; /* insn which initializes biv, 0 if none. */
219 rtx init_set; /* SET of INIT_INSN, if any. */
220 unsigned incremented : 1; /* 1 if somewhere incremented/decremented */
221 unsigned eliminable : 1; /* 1 if plausible candidate for
222 elimination. */
223 unsigned nonneg : 1; /* 1 if we added a REG_NONNEG note for
224 this. */
225 unsigned reversed : 1; /* 1 if we reversed the loop that this
226 biv controls. */
227 unsigned all_reduced : 1; /* 1 if all givs using this biv have
228 been reduced. */
232 /* Definitions used by the basic induction variable discovery code. */
233 enum iv_mode
235 UNKNOWN_INDUCT,
236 BASIC_INDUCT,
237 NOT_BASIC_INDUCT,
238 GENERAL_INDUCT
242 /* A `struct iv' is created for every register. */
244 struct iv
246 enum iv_mode type;
247 union
249 struct iv_class *class;
250 struct induction *info;
251 } iv;
255 #define REG_IV_TYPE(ivs, n) ivs->regs[n].type
256 #define REG_IV_INFO(ivs, n) ivs->regs[n].iv.info
257 #define REG_IV_CLASS(ivs, n) ivs->regs[n].iv.class
260 struct loop_ivs
262 /* Indexed by register number, contains pointer to `struct
263 iv' if register is an induction variable. */
264 struct iv *regs;
266 /* Size of regs array. */
267 unsigned int n_regs;
269 /* The head of a list which links together (via the next field)
270 every iv class for the current loop. */
271 struct iv_class *list;
275 typedef struct loop_mem_info
277 rtx mem; /* The MEM itself. */
278 rtx reg; /* Corresponding pseudo, if any. */
279 int optimize; /* Nonzero if we can optimize access to this MEM. */
280 } loop_mem_info;
284 struct loop_reg
286 /* Number of times the reg is set during the loop being scanned.
287 During code motion, a negative value indicates a reg that has
288 been made a candidate; in particular -2 means that it is an
289 candidate that we know is equal to a constant and -1 means that
290 it is a candidate not known equal to a constant. After code
291 motion, regs moved have 0 (which is accurate now) while the
292 failed candidates have the original number of times set.
294 Therefore, at all times, == 0 indicates an invariant register;
295 < 0 a conditionally invariant one. */
296 int set_in_loop;
298 /* Original value of set_in_loop; same except that this value
299 is not set negative for a reg whose sets have been made candidates
300 and not set to 0 for a reg that is moved. */
301 int n_times_set;
303 /* Contains the insn in which a register was used if it was used
304 exactly once; contains const0_rtx if it was used more than once. */
305 rtx single_usage;
307 /* Nonzero indicates that the register cannot be moved or strength
308 reduced. */
309 char may_not_optimize;
311 /* Nonzero means reg N has already been moved out of one loop.
312 This reduces the desire to move it out of another. */
313 char moved_once;
317 struct loop_regs
319 int num; /* Number of regs used in table. */
320 int size; /* Size of table. */
321 struct loop_reg *array; /* Register usage info. array. */
322 int multiple_uses; /* Nonzero if a reg has multiple uses. */
327 struct loop_movables
329 /* Head of movable chain. */
330 struct movable *head;
331 /* Last movable in chain. */
332 struct movable *last;
336 /* Information pertaining to a loop. */
338 struct loop_info
340 /* Nonzero if there is a subroutine call in the current loop. */
341 int has_call;
342 /* Nonzero if there is a libcall in the current loop. */
343 int has_libcall;
344 /* Nonzero if there is a non constant call in the current loop. */
345 int has_nonconst_call;
346 /* Nonzero if there is a prefetch instruction in the current loop. */
347 int has_prefetch;
348 /* Nonzero if there is a volatile memory reference in the current
349 loop. */
350 int has_volatile;
351 /* Nonzero if there is a tablejump in the current loop. */
352 int has_tablejump;
353 /* Nonzero if there are ways to leave the loop other than falling
354 off the end. */
355 int has_multiple_exit_targets;
356 /* Nonzero if there is an indirect jump in the current function. */
357 int has_indirect_jump;
358 /* Register or constant initial loop value. */
359 rtx initial_value;
360 /* Register or constant value used for comparison test. */
361 rtx comparison_value;
362 /* Register or constant approximate final value. */
363 rtx final_value;
364 /* Register or constant initial loop value with term common to
365 final_value removed. */
366 rtx initial_equiv_value;
367 /* Register or constant final loop value with term common to
368 initial_value removed. */
369 rtx final_equiv_value;
370 /* Register corresponding to iteration variable. */
371 rtx iteration_var;
372 /* Constant loop increment. */
373 rtx increment;
374 enum rtx_code comparison_code;
375 /* Holds the number of loop iterations. It is zero if the number
376 could not be calculated. Must be unsigned since the number of
377 iterations can be as high as 2^wordsize - 1. For loops with a
378 wider iterator, this number will be zero if the number of loop
379 iterations is too large for an unsigned integer to hold. */
380 unsigned HOST_WIDE_INT n_iterations;
381 int used_count_register;
382 /* The loop iterator induction variable. */
383 struct iv_class *iv;
384 /* List of MEMs that are stored in this loop. */
385 rtx store_mems;
386 /* Array of MEMs that are used (read or written) in this loop, but
387 cannot be aliased by anything in this loop, except perhaps
388 themselves. In other words, if mems[i] is altered during
389 the loop, it is altered by an expression that is rtx_equal_p to
390 it. */
391 loop_mem_info *mems;
392 /* The index of the next available slot in MEMS. */
393 int mems_idx;
394 /* The number of elements allocated in MEMS. */
395 int mems_allocated;
396 /* Nonzero if we don't know what MEMs were changed in the current
397 loop. This happens if the loop contains a call (in which case
398 `has_call' will also be set) or if we store into more than
399 NUM_STORES MEMs. */
400 int unknown_address_altered;
401 /* The above doesn't count any readonly memory locations that are
402 stored. This does. */
403 int unknown_constant_address_altered;
404 /* Count of memory write instructions discovered in the loop. */
405 int num_mem_sets;
406 /* The insn where the first of these was found. */
407 rtx first_loop_store_insn;
408 /* The chain of movable insns in loop. */
409 struct loop_movables movables;
410 /* The registers used the in loop. */
411 struct loop_regs regs;
412 /* The induction variable information in loop. */
413 struct loop_ivs ivs;
414 /* Nonzero if call is in pre_header extended basic block. */
415 int pre_header_has_call;
418 /* Not really meaningful values, but at least something. */
419 #ifndef SIMULTANEOUS_PREFETCHES
420 #define SIMULTANEOUS_PREFETCHES 3
421 #endif
422 #ifndef PREFETCH_BLOCK
423 #define PREFETCH_BLOCK 32
424 #endif
425 #ifndef HAVE_prefetch
426 #define HAVE_prefetch 0
427 #define CODE_FOR_prefetch 0
428 #define gen_prefetch(a,b,c) (abort(), NULL_RTX)
429 #endif
431 /* Give up the prefetch optimizations once we exceed a given threshold.
432 It is unlikely that we would be able to optimize something in a loop
433 with so many detected prefetches. */
434 #define MAX_PREFETCHES 100
435 /* The number of prefetch blocks that are beneficial to fetch at once before
436 a loop with a known (and low) iteration count. */
437 #define PREFETCH_BLOCKS_BEFORE_LOOP_MAX 6
438 /* For very tiny loops it is not worthwhile to prefetch even before the loop,
439 since it is likely that the data are already in the cache. */
440 #define PREFETCH_BLOCKS_BEFORE_LOOP_MIN 2
442 /* Parameterize some prefetch heuristics so they can be turned on and off
443 easily for performance testing on new architectures. These can be
444 defined in target-dependent files. */
446 /* Prefetch is worthwhile only when loads/stores are dense. */
447 #ifndef PREFETCH_ONLY_DENSE_MEM
448 #define PREFETCH_ONLY_DENSE_MEM 1
449 #endif
451 /* Define what we mean by "dense" loads and stores; This value divided by 256
452 is the minimum percentage of memory references that worth prefetching. */
453 #ifndef PREFETCH_DENSE_MEM
454 #define PREFETCH_DENSE_MEM 220
455 #endif
457 /* Do not prefetch for a loop whose iteration count is known to be low. */
458 #ifndef PREFETCH_NO_LOW_LOOPCNT
459 #define PREFETCH_NO_LOW_LOOPCNT 1
460 #endif
462 /* Define what we mean by a "low" iteration count. */
463 #ifndef PREFETCH_LOW_LOOPCNT
464 #define PREFETCH_LOW_LOOPCNT 32
465 #endif
467 /* Do not prefetch for a loop that contains a function call; such a loop is
468 probably not an internal loop. */
469 #ifndef PREFETCH_NO_CALL
470 #define PREFETCH_NO_CALL 1
471 #endif
473 /* Do not prefetch accesses with an extreme stride. */
474 #ifndef PREFETCH_NO_EXTREME_STRIDE
475 #define PREFETCH_NO_EXTREME_STRIDE 1
476 #endif
478 /* Define what we mean by an "extreme" stride. */
479 #ifndef PREFETCH_EXTREME_STRIDE
480 #define PREFETCH_EXTREME_STRIDE 4096
481 #endif
483 /* Define a limit to how far apart indices can be and still be merged
484 into a single prefetch. */
485 #ifndef PREFETCH_EXTREME_DIFFERENCE
486 #define PREFETCH_EXTREME_DIFFERENCE 4096
487 #endif
489 /* Issue prefetch instructions before the loop to fetch data to be used
490 in the first few loop iterations. */
491 #ifndef PREFETCH_BEFORE_LOOP
492 #define PREFETCH_BEFORE_LOOP 1
493 #endif
495 /* Do not handle reversed order prefetches (negative stride). */
496 #ifndef PREFETCH_NO_REVERSE_ORDER
497 #define PREFETCH_NO_REVERSE_ORDER 1
498 #endif
500 /* Prefetch even if the GIV is in conditional code. */
501 #ifndef PREFETCH_CONDITIONAL
502 #define PREFETCH_CONDITIONAL 1
503 #endif
505 #define LOOP_REG_LIFETIME(LOOP, REGNO) \
506 ((REGNO_LAST_LUID (REGNO) - REGNO_FIRST_LUID (REGNO)))
508 #define LOOP_REG_GLOBAL_P(LOOP, REGNO) \
509 ((REGNO_LAST_LUID (REGNO) > INSN_LUID ((LOOP)->end) \
510 || REGNO_FIRST_LUID (REGNO) < INSN_LUID ((LOOP)->start)))
512 #define LOOP_REGNO_NREGS(REGNO, SET_DEST) \
513 ((REGNO) < FIRST_PSEUDO_REGISTER \
514 ? (int) hard_regno_nregs[(REGNO)][GET_MODE (SET_DEST)] : 1)
517 /* Vector mapping INSN_UIDs to luids.
518 The luids are like uids but increase monotonically always.
519 We use them to see whether a jump comes from outside a given loop. */
521 static int *uid_luid;
523 /* Indexed by INSN_UID, contains the ordinal giving the (innermost) loop
524 number the insn is contained in. */
526 static struct loop **uid_loop;
528 /* 1 + largest uid of any insn. */
530 static int max_uid_for_loop;
532 /* Number of loops detected in current function. Used as index to the
533 next few tables. */
535 static int max_loop_num;
537 /* Bound on pseudo register number before loop optimization.
538 A pseudo has valid regscan info if its number is < max_reg_before_loop. */
539 static unsigned int max_reg_before_loop;
541 /* The value to pass to the next call of reg_scan_update. */
542 static int loop_max_reg;
544 /* During the analysis of a loop, a chain of `struct movable's
545 is made to record all the movable insns found.
546 Then the entire chain can be scanned to decide which to move. */
548 struct movable
550 rtx insn; /* A movable insn */
551 rtx set_src; /* The expression this reg is set from. */
552 rtx set_dest; /* The destination of this SET. */
553 rtx dependencies; /* When INSN is libcall, this is an EXPR_LIST
554 of any registers used within the LIBCALL. */
555 int consec; /* Number of consecutive following insns
556 that must be moved with this one. */
557 unsigned int regno; /* The register it sets */
558 short lifetime; /* lifetime of that register;
559 may be adjusted when matching movables
560 that load the same value are found. */
561 short savings; /* Number of insns we can move for this reg,
562 including other movables that force this
563 or match this one. */
564 ENUM_BITFIELD(machine_mode) savemode : 8; /* Nonzero means it is a mode for
565 a low part that we should avoid changing when
566 clearing the rest of the reg. */
567 unsigned int cond : 1; /* 1 if only conditionally movable */
568 unsigned int force : 1; /* 1 means MUST move this insn */
569 unsigned int global : 1; /* 1 means reg is live outside this loop */
570 /* If PARTIAL is 1, GLOBAL means something different:
571 that the reg is live outside the range from where it is set
572 to the following label. */
573 unsigned int done : 1; /* 1 inhibits further processing of this */
575 unsigned int partial : 1; /* 1 means this reg is used for zero-extending.
576 In particular, moving it does not make it
577 invariant. */
578 unsigned int move_insn : 1; /* 1 means that we call emit_move_insn to
579 load SRC, rather than copying INSN. */
580 unsigned int move_insn_first:1;/* Same as above, if this is necessary for the
581 first insn of a consecutive sets group. */
582 unsigned int is_equiv : 1; /* 1 means a REG_EQUIV is present on INSN. */
583 unsigned int insert_temp : 1; /* 1 means we copy to a new pseudo and replace
584 the original insn with a copy from that
585 pseudo, rather than deleting it. */
586 struct movable *match; /* First entry for same value */
587 struct movable *forces; /* An insn that must be moved if this is */
588 struct movable *next;
592 static FILE *loop_dump_stream;
594 /* Forward declarations. */
596 static void invalidate_loops_containing_label (rtx);
597 static void find_and_verify_loops (rtx, struct loops *);
598 static void mark_loop_jump (rtx, struct loop *);
599 static void prescan_loop (struct loop *);
600 static int reg_in_basic_block_p (rtx, rtx);
601 static int consec_sets_invariant_p (const struct loop *, rtx, int, rtx);
602 static int labels_in_range_p (rtx, int);
603 static void count_one_set (struct loop_regs *, rtx, rtx, rtx *);
604 static void note_addr_stored (rtx, rtx, void *);
605 static void note_set_pseudo_multiple_uses (rtx, rtx, void *);
606 static int loop_reg_used_before_p (const struct loop *, rtx, rtx);
607 static rtx find_regs_nested (rtx, rtx);
608 static void scan_loop (struct loop*, int);
609 #if 0
610 static void replace_call_address (rtx, rtx, rtx);
611 #endif
612 static rtx skip_consec_insns (rtx, int);
613 static int libcall_benefit (rtx);
614 static rtx libcall_other_reg (rtx, rtx);
615 static void record_excess_regs (rtx, rtx, rtx *);
616 static void ignore_some_movables (struct loop_movables *);
617 static void force_movables (struct loop_movables *);
618 static void combine_movables (struct loop_movables *, struct loop_regs *);
619 static int num_unmoved_movables (const struct loop *);
620 static int regs_match_p (rtx, rtx, struct loop_movables *);
621 static int rtx_equal_for_loop_p (rtx, rtx, struct loop_movables *,
622 struct loop_regs *);
623 static void add_label_notes (rtx, rtx);
624 static void move_movables (struct loop *loop, struct loop_movables *, int,
625 int);
626 static void loop_movables_add (struct loop_movables *, struct movable *);
627 static void loop_movables_free (struct loop_movables *);
628 static int count_nonfixed_reads (const struct loop *, rtx);
629 static void loop_bivs_find (struct loop *);
630 static void loop_bivs_init_find (struct loop *);
631 static void loop_bivs_check (struct loop *);
632 static void loop_givs_find (struct loop *);
633 static void loop_givs_check (struct loop *);
634 static int loop_biv_eliminable_p (struct loop *, struct iv_class *, int, int);
635 static int loop_giv_reduce_benefit (struct loop *, struct iv_class *,
636 struct induction *, rtx);
637 static void loop_givs_dead_check (struct loop *, struct iv_class *);
638 static void loop_givs_reduce (struct loop *, struct iv_class *);
639 static void loop_givs_rescan (struct loop *, struct iv_class *, rtx *);
640 static void loop_ivs_free (struct loop *);
641 static void strength_reduce (struct loop *, int);
642 static void find_single_use_in_loop (struct loop_regs *, rtx, rtx);
643 static int valid_initial_value_p (rtx, rtx, int, rtx);
644 static void find_mem_givs (const struct loop *, rtx, rtx, int, int);
645 static void record_biv (struct loop *, struct induction *, rtx, rtx, rtx,
646 rtx, rtx *, int, int);
647 static void check_final_value (const struct loop *, struct induction *);
648 static void loop_ivs_dump (const struct loop *, FILE *, int);
649 static void loop_iv_class_dump (const struct iv_class *, FILE *, int);
650 static void loop_biv_dump (const struct induction *, FILE *, int);
651 static void loop_giv_dump (const struct induction *, FILE *, int);
652 static void record_giv (const struct loop *, struct induction *, rtx, rtx,
653 rtx, rtx, rtx, rtx, int, enum g_types, int, int,
654 rtx *);
655 static void update_giv_derive (const struct loop *, rtx);
656 static HOST_WIDE_INT get_monotonic_increment (struct iv_class *);
657 static bool biased_biv_fits_mode_p (const struct loop *, struct iv_class *,
658 HOST_WIDE_INT, enum machine_mode,
659 unsigned HOST_WIDE_INT);
660 static bool biv_fits_mode_p (const struct loop *, struct iv_class *,
661 HOST_WIDE_INT, enum machine_mode, bool);
662 static bool extension_within_bounds_p (const struct loop *, struct iv_class *,
663 HOST_WIDE_INT, rtx);
664 static void check_ext_dependent_givs (const struct loop *, struct iv_class *);
665 static int basic_induction_var (const struct loop *, rtx, enum machine_mode,
666 rtx, rtx, rtx *, rtx *, rtx **);
667 static rtx simplify_giv_expr (const struct loop *, rtx, rtx *, int *);
668 static int general_induction_var (const struct loop *loop, rtx, rtx *, rtx *,
669 rtx *, rtx *, int, int *, enum machine_mode);
670 static int consec_sets_giv (const struct loop *, int, rtx, rtx, rtx, rtx *,
671 rtx *, rtx *, rtx *);
672 static int check_dbra_loop (struct loop *, int);
673 static rtx express_from_1 (rtx, rtx, rtx);
674 static rtx combine_givs_p (struct induction *, struct induction *);
675 static int cmp_combine_givs_stats (const void *, const void *);
676 static void combine_givs (struct loop_regs *, struct iv_class *);
677 static int product_cheap_p (rtx, rtx);
678 static int maybe_eliminate_biv (const struct loop *, struct iv_class *, int,
679 int, int);
680 static int maybe_eliminate_biv_1 (const struct loop *, rtx, rtx,
681 struct iv_class *, int, basic_block, rtx);
682 static int last_use_this_basic_block (rtx, rtx);
683 static void record_initial (rtx, rtx, void *);
684 static void update_reg_last_use (rtx, rtx);
685 static rtx next_insn_in_loop (const struct loop *, rtx);
686 static void loop_regs_scan (const struct loop *, int);
687 static int count_insns_in_loop (const struct loop *);
688 static int find_mem_in_note_1 (rtx *, void *);
689 static rtx find_mem_in_note (rtx);
690 static void load_mems (const struct loop *);
691 static int insert_loop_mem (rtx *, void *);
692 static int replace_loop_mem (rtx *, void *);
693 static void replace_loop_mems (rtx, rtx, rtx, int);
694 static int replace_loop_reg (rtx *, void *);
695 static void replace_loop_regs (rtx insn, rtx, rtx);
696 static void note_reg_stored (rtx, rtx, void *);
697 static void try_copy_prop (const struct loop *, rtx, unsigned int);
698 static void try_swap_copy_prop (const struct loop *, rtx, unsigned int);
699 static rtx check_insn_for_givs (struct loop *, rtx, int, int);
700 static rtx check_insn_for_bivs (struct loop *, rtx, int, int);
701 static rtx gen_add_mult (rtx, rtx, rtx, rtx);
702 static void loop_regs_update (const struct loop *, rtx);
703 static int iv_add_mult_cost (rtx, rtx, rtx, rtx);
704 static int loop_invariant_p (const struct loop *, rtx);
705 static rtx loop_insn_hoist (const struct loop *, rtx);
706 static void loop_iv_add_mult_emit_before (const struct loop *, rtx, rtx, rtx,
707 rtx, basic_block, rtx);
708 static rtx loop_insn_emit_before (const struct loop *, basic_block,
709 rtx, rtx);
710 static int loop_insn_first_p (rtx, rtx);
711 static rtx get_condition_for_loop (const struct loop *, rtx);
712 static void loop_iv_add_mult_sink (const struct loop *, rtx, rtx, rtx, rtx);
713 static void loop_iv_add_mult_hoist (const struct loop *, rtx, rtx, rtx, rtx);
714 static rtx extend_value_for_giv (struct induction *, rtx);
715 static rtx loop_insn_sink (const struct loop *, rtx);
717 static rtx loop_insn_emit_after (const struct loop *, basic_block, rtx, rtx);
718 static rtx loop_call_insn_emit_before (const struct loop *, basic_block,
719 rtx, rtx);
720 static rtx loop_call_insn_hoist (const struct loop *, rtx);
721 static rtx loop_insn_sink_or_swim (const struct loop *, rtx);
723 static void loop_dump_aux (const struct loop *, FILE *, int);
724 static void loop_delete_insns (rtx, rtx);
725 static HOST_WIDE_INT remove_constant_addition (rtx *);
726 static rtx gen_load_of_final_value (rtx, rtx);
727 void debug_ivs (const struct loop *);
728 void debug_iv_class (const struct iv_class *);
729 void debug_biv (const struct induction *);
730 void debug_giv (const struct induction *);
731 void debug_loop (const struct loop *);
732 void debug_loops (const struct loops *);
734 typedef struct loop_replace_args
736 rtx match;
737 rtx replacement;
738 rtx insn;
739 } loop_replace_args;
741 /* Nonzero iff INSN is between START and END, inclusive. */
742 #define INSN_IN_RANGE_P(INSN, START, END) \
743 (INSN_UID (INSN) < max_uid_for_loop \
744 && INSN_LUID (INSN) >= INSN_LUID (START) \
745 && INSN_LUID (INSN) <= INSN_LUID (END))
747 /* Indirect_jump_in_function is computed once per function. */
748 static int indirect_jump_in_function;
749 static int indirect_jump_in_function_p (rtx);
751 static int compute_luids (rtx, rtx, int);
753 static int biv_elimination_giv_has_0_offset (struct induction *,
754 struct induction *, rtx);
756 /* Benefit penalty, if a giv is not replaceable, i.e. must emit an insn to
757 copy the value of the strength reduced giv to its original register. */
758 static int copy_cost;
760 /* Cost of using a register, to normalize the benefits of a giv. */
761 static int reg_address_cost;
763 void
764 init_loop (void)
766 rtx reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
768 reg_address_cost = address_cost (reg, SImode);
770 copy_cost = COSTS_N_INSNS (1);
773 /* Compute the mapping from uids to luids.
774 LUIDs are numbers assigned to insns, like uids,
775 except that luids increase monotonically through the code.
776 Start at insn START and stop just before END. Assign LUIDs
777 starting with PREV_LUID + 1. Return the last assigned LUID + 1. */
778 static int
779 compute_luids (rtx start, rtx end, int prev_luid)
781 int i;
782 rtx insn;
784 for (insn = start, i = prev_luid; insn != end; insn = NEXT_INSN (insn))
786 if (INSN_UID (insn) >= max_uid_for_loop)
787 continue;
788 /* Don't assign luids to line-number NOTEs, so that the distance in
789 luids between two insns is not affected by -g. */
790 if (!NOTE_P (insn)
791 || NOTE_LINE_NUMBER (insn) <= 0)
792 uid_luid[INSN_UID (insn)] = ++i;
793 else
794 /* Give a line number note the same luid as preceding insn. */
795 uid_luid[INSN_UID (insn)] = i;
797 return i + 1;
800 /* Entry point of this file. Perform loop optimization
801 on the current function. F is the first insn of the function
802 and DUMPFILE is a stream for output of a trace of actions taken
803 (or 0 if none should be output). */
805 void
806 loop_optimize (rtx f, FILE *dumpfile, int flags)
808 rtx insn;
809 int i;
810 struct loops loops_data;
811 struct loops *loops = &loops_data;
812 struct loop_info *loops_info;
814 loop_dump_stream = dumpfile;
816 init_recog_no_volatile ();
818 max_reg_before_loop = max_reg_num ();
819 loop_max_reg = max_reg_before_loop;
821 regs_may_share = 0;
823 /* Count the number of loops. */
825 max_loop_num = 0;
826 for (insn = f; insn; insn = NEXT_INSN (insn))
828 if (NOTE_P (insn)
829 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
830 max_loop_num++;
833 /* Don't waste time if no loops. */
834 if (max_loop_num == 0)
835 return;
837 loops->num = max_loop_num;
839 /* Get size to use for tables indexed by uids.
840 Leave some space for labels allocated by find_and_verify_loops. */
841 max_uid_for_loop = get_max_uid () + 1 + max_loop_num * 32;
843 uid_luid = xcalloc (max_uid_for_loop, sizeof (int));
844 uid_loop = xcalloc (max_uid_for_loop, sizeof (struct loop *));
846 /* Allocate storage for array of loops. */
847 loops->array = xcalloc (loops->num, sizeof (struct loop));
849 /* Find and process each loop.
850 First, find them, and record them in order of their beginnings. */
851 find_and_verify_loops (f, loops);
853 /* Allocate and initialize auxiliary loop information. */
854 loops_info = xcalloc (loops->num, sizeof (struct loop_info));
855 for (i = 0; i < (int) loops->num; i++)
856 loops->array[i].aux = loops_info + i;
858 /* Now find all register lifetimes. This must be done after
859 find_and_verify_loops, because it might reorder the insns in the
860 function. */
861 reg_scan (f, max_reg_before_loop);
863 /* This must occur after reg_scan so that registers created by gcse
864 will have entries in the register tables.
866 We could have added a call to reg_scan after gcse_main in toplev.c,
867 but moving this call to init_alias_analysis is more efficient. */
868 init_alias_analysis ();
870 /* See if we went too far. Note that get_max_uid already returns
871 one more that the maximum uid of all insn. */
872 if (get_max_uid () > max_uid_for_loop)
873 abort ();
874 /* Now reset it to the actual size we need. See above. */
875 max_uid_for_loop = get_max_uid ();
877 /* find_and_verify_loops has already called compute_luids, but it
878 might have rearranged code afterwards, so we need to recompute
879 the luids now. */
880 compute_luids (f, NULL_RTX, 0);
882 /* Don't leave gaps in uid_luid for insns that have been
883 deleted. It is possible that the first or last insn
884 using some register has been deleted by cross-jumping.
885 Make sure that uid_luid for that former insn's uid
886 points to the general area where that insn used to be. */
887 for (i = 0; i < max_uid_for_loop; i++)
889 uid_luid[0] = uid_luid[i];
890 if (uid_luid[0] != 0)
891 break;
893 for (i = 0; i < max_uid_for_loop; i++)
894 if (uid_luid[i] == 0)
895 uid_luid[i] = uid_luid[i - 1];
897 /* Determine if the function has indirect jump. On some systems
898 this prevents low overhead loop instructions from being used. */
899 indirect_jump_in_function = indirect_jump_in_function_p (f);
901 /* Now scan the loops, last ones first, since this means inner ones are done
902 before outer ones. */
903 for (i = max_loop_num - 1; i >= 0; i--)
905 struct loop *loop = &loops->array[i];
907 if (! loop->invalid && loop->end)
909 scan_loop (loop, flags);
910 ggc_collect ();
914 end_alias_analysis ();
916 /* Clean up. */
917 for (i = 0; i < (int) loops->num; i++)
918 free (loops_info[i].mems);
920 free (uid_luid);
921 free (uid_loop);
922 free (loops_info);
923 free (loops->array);
926 /* Returns the next insn, in execution order, after INSN. START and
927 END are the NOTE_INSN_LOOP_BEG and NOTE_INSN_LOOP_END for the loop,
928 respectively. LOOP->TOP, if non-NULL, is the top of the loop in the
929 insn-stream; it is used with loops that are entered near the
930 bottom. */
932 static rtx
933 next_insn_in_loop (const struct loop *loop, rtx insn)
935 insn = NEXT_INSN (insn);
937 if (insn == loop->end)
939 if (loop->top)
940 /* Go to the top of the loop, and continue there. */
941 insn = loop->top;
942 else
943 /* We're done. */
944 insn = NULL_RTX;
947 if (insn == loop->scan_start)
948 /* We're done. */
949 insn = NULL_RTX;
951 return insn;
954 /* Find any register references hidden inside X and add them to
955 the dependency list DEPS. This is used to look inside CLOBBER (MEM
956 when checking whether a PARALLEL can be pulled out of a loop. */
958 static rtx
959 find_regs_nested (rtx deps, rtx x)
961 enum rtx_code code = GET_CODE (x);
962 if (code == REG)
963 deps = gen_rtx_EXPR_LIST (VOIDmode, x, deps);
964 else
966 const char *fmt = GET_RTX_FORMAT (code);
967 int i, j;
968 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
970 if (fmt[i] == 'e')
971 deps = find_regs_nested (deps, XEXP (x, i));
972 else if (fmt[i] == 'E')
973 for (j = 0; j < XVECLEN (x, i); j++)
974 deps = find_regs_nested (deps, XVECEXP (x, i, j));
977 return deps;
980 /* Optimize one loop described by LOOP. */
982 /* ??? Could also move memory writes out of loops if the destination address
983 is invariant, the source is invariant, the memory write is not volatile,
984 and if we can prove that no read inside the loop can read this address
985 before the write occurs. If there is a read of this address after the
986 write, then we can also mark the memory read as invariant. */
988 static void
989 scan_loop (struct loop *loop, int flags)
991 struct loop_info *loop_info = LOOP_INFO (loop);
992 struct loop_regs *regs = LOOP_REGS (loop);
993 int i;
994 rtx loop_start = loop->start;
995 rtx loop_end = loop->end;
996 rtx p;
997 /* 1 if we are scanning insns that could be executed zero times. */
998 int maybe_never = 0;
999 /* 1 if we are scanning insns that might never be executed
1000 due to a subroutine call which might exit before they are reached. */
1001 int call_passed = 0;
1002 /* Number of insns in the loop. */
1003 int insn_count;
1004 int tem;
1005 rtx temp, update_start, update_end;
1006 /* The SET from an insn, if it is the only SET in the insn. */
1007 rtx set, set1;
1008 /* Chain describing insns movable in current loop. */
1009 struct loop_movables *movables = LOOP_MOVABLES (loop);
1010 /* Ratio of extra register life span we can justify
1011 for saving an instruction. More if loop doesn't call subroutines
1012 since in that case saving an insn makes more difference
1013 and more registers are available. */
1014 int threshold;
1015 int in_libcall;
1017 loop->top = 0;
1019 movables->head = 0;
1020 movables->last = 0;
1022 /* Determine whether this loop starts with a jump down to a test at
1023 the end. This will occur for a small number of loops with a test
1024 that is too complex to duplicate in front of the loop.
1026 We search for the first insn or label in the loop, skipping NOTEs.
1027 However, we must be careful not to skip past a NOTE_INSN_LOOP_BEG
1028 (because we might have a loop executed only once that contains a
1029 loop which starts with a jump to its exit test) or a NOTE_INSN_LOOP_END
1030 (in case we have a degenerate loop).
1032 Note that if we mistakenly think that a loop is entered at the top
1033 when, in fact, it is entered at the exit test, the only effect will be
1034 slightly poorer optimization. Making the opposite error can generate
1035 incorrect code. Since very few loops now start with a jump to the
1036 exit test, the code here to detect that case is very conservative. */
1038 for (p = NEXT_INSN (loop_start);
1039 p != loop_end
1040 && !LABEL_P (p) && ! INSN_P (p)
1041 && (!NOTE_P (p)
1042 || (NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_BEG
1043 && NOTE_LINE_NUMBER (p) != NOTE_INSN_LOOP_END));
1044 p = NEXT_INSN (p))
1047 loop->scan_start = p;
1049 /* If loop end is the end of the current function, then emit a
1050 NOTE_INSN_DELETED after loop_end and set loop->sink to the dummy
1051 note insn. This is the position we use when sinking insns out of
1052 the loop. */
1053 if (NEXT_INSN (loop->end) != 0)
1054 loop->sink = NEXT_INSN (loop->end);
1055 else
1056 loop->sink = emit_note_after (NOTE_INSN_DELETED, loop->end);
1058 /* Set up variables describing this loop. */
1059 prescan_loop (loop);
1060 threshold = (loop_info->has_call ? 1 : 2) * (1 + n_non_fixed_regs);
1062 /* If loop has a jump before the first label,
1063 the true entry is the target of that jump.
1064 Start scan from there.
1065 But record in LOOP->TOP the place where the end-test jumps
1066 back to so we can scan that after the end of the loop. */
1067 if (JUMP_P (p)
1068 /* Loop entry must be unconditional jump (and not a RETURN) */
1069 && any_uncondjump_p (p)
1070 && JUMP_LABEL (p) != 0
1071 /* Check to see whether the jump actually
1072 jumps out of the loop (meaning it's no loop).
1073 This case can happen for things like
1074 do {..} while (0). If this label was generated previously
1075 by loop, we can't tell anything about it and have to reject
1076 the loop. */
1077 && INSN_IN_RANGE_P (JUMP_LABEL (p), loop_start, loop_end))
1079 loop->top = next_label (loop->scan_start);
1080 loop->scan_start = JUMP_LABEL (p);
1083 /* If LOOP->SCAN_START was an insn created by loop, we don't know its luid
1084 as required by loop_reg_used_before_p. So skip such loops. (This
1085 test may never be true, but it's best to play it safe.)
1087 Also, skip loops where we do not start scanning at a label. This
1088 test also rejects loops starting with a JUMP_INSN that failed the
1089 test above. */
1091 if (INSN_UID (loop->scan_start) >= max_uid_for_loop
1092 || !LABEL_P (loop->scan_start))
1094 if (loop_dump_stream)
1095 fprintf (loop_dump_stream, "\nLoop from %d to %d is phony.\n\n",
1096 INSN_UID (loop_start), INSN_UID (loop_end));
1097 return;
1100 /* Allocate extra space for REGs that might be created by load_mems.
1101 We allocate a little extra slop as well, in the hopes that we
1102 won't have to reallocate the regs array. */
1103 loop_regs_scan (loop, loop_info->mems_idx + 16);
1104 insn_count = count_insns_in_loop (loop);
1106 if (loop_dump_stream)
1107 fprintf (loop_dump_stream, "\nLoop from %d to %d: %d real insns.\n",
1108 INSN_UID (loop_start), INSN_UID (loop_end), insn_count);
1110 /* Scan through the loop finding insns that are safe to move.
1111 Set REGS->ARRAY[I].SET_IN_LOOP negative for the reg I being set, so that
1112 this reg will be considered invariant for subsequent insns.
1113 We consider whether subsequent insns use the reg
1114 in deciding whether it is worth actually moving.
1116 MAYBE_NEVER is nonzero if we have passed a conditional jump insn
1117 and therefore it is possible that the insns we are scanning
1118 would never be executed. At such times, we must make sure
1119 that it is safe to execute the insn once instead of zero times.
1120 When MAYBE_NEVER is 0, all insns will be executed at least once
1121 so that is not a problem. */
1123 for (in_libcall = 0, p = next_insn_in_loop (loop, loop->scan_start);
1124 p != NULL_RTX;
1125 p = next_insn_in_loop (loop, p))
1127 if (in_libcall && INSN_P (p) && find_reg_note (p, REG_RETVAL, NULL_RTX))
1128 in_libcall--;
1129 if (NONJUMP_INSN_P (p))
1131 /* Do not scan past an optimization barrier. */
1132 if (GET_CODE (PATTERN (p)) == ASM_INPUT)
1133 break;
1134 temp = find_reg_note (p, REG_LIBCALL, NULL_RTX);
1135 if (temp)
1136 in_libcall++;
1137 if (! in_libcall
1138 && (set = single_set (p))
1139 && REG_P (SET_DEST (set))
1140 #ifdef PIC_OFFSET_TABLE_REG_CALL_CLOBBERED
1141 && SET_DEST (set) != pic_offset_table_rtx
1142 #endif
1143 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
1145 int tem1 = 0;
1146 int tem2 = 0;
1147 int move_insn = 0;
1148 int insert_temp = 0;
1149 rtx src = SET_SRC (set);
1150 rtx dependencies = 0;
1152 /* Figure out what to use as a source of this insn. If a
1153 REG_EQUIV note is given or if a REG_EQUAL note with a
1154 constant operand is specified, use it as the source and
1155 mark that we should move this insn by calling
1156 emit_move_insn rather that duplicating the insn.
1158 Otherwise, only use the REG_EQUAL contents if a REG_RETVAL
1159 note is present. */
1160 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
1161 if (temp)
1162 src = XEXP (temp, 0), move_insn = 1;
1163 else
1165 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
1166 if (temp && CONSTANT_P (XEXP (temp, 0)))
1167 src = XEXP (temp, 0), move_insn = 1;
1168 if (temp && find_reg_note (p, REG_RETVAL, NULL_RTX))
1170 src = XEXP (temp, 0);
1171 /* A libcall block can use regs that don't appear in
1172 the equivalent expression. To move the libcall,
1173 we must move those regs too. */
1174 dependencies = libcall_other_reg (p, src);
1178 /* For parallels, add any possible uses to the dependencies, as
1179 we can't move the insn without resolving them first.
1180 MEMs inside CLOBBERs may also reference registers; these
1181 count as implicit uses. */
1182 if (GET_CODE (PATTERN (p)) == PARALLEL)
1184 for (i = 0; i < XVECLEN (PATTERN (p), 0); i++)
1186 rtx x = XVECEXP (PATTERN (p), 0, i);
1187 if (GET_CODE (x) == USE)
1188 dependencies
1189 = gen_rtx_EXPR_LIST (VOIDmode, XEXP (x, 0),
1190 dependencies);
1191 else if (GET_CODE (x) == CLOBBER
1192 && MEM_P (XEXP (x, 0)))
1193 dependencies = find_regs_nested (dependencies,
1194 XEXP (XEXP (x, 0), 0));
1198 if (/* The register is used in basic blocks other
1199 than the one where it is set (meaning that
1200 something after this point in the loop might
1201 depend on its value before the set). */
1202 ! reg_in_basic_block_p (p, SET_DEST (set))
1203 /* And the set is not guaranteed to be executed once
1204 the loop starts, or the value before the set is
1205 needed before the set occurs...
1207 ??? Note we have quadratic behavior here, mitigated
1208 by the fact that the previous test will often fail for
1209 large loops. Rather than re-scanning the entire loop
1210 each time for register usage, we should build tables
1211 of the register usage and use them here instead. */
1212 && (maybe_never
1213 || loop_reg_used_before_p (loop, set, p)))
1214 /* It is unsafe to move the set. However, it may be OK to
1215 move the source into a new pseudo, and substitute a
1216 reg-to-reg copy for the original insn.
1218 This code used to consider it OK to move a set of a variable
1219 which was not created by the user and not used in an exit
1220 test.
1221 That behavior is incorrect and was removed. */
1222 insert_temp = 1;
1224 /* Don't try to optimize a MODE_CC set with a constant
1225 source. It probably will be combined with a conditional
1226 jump. */
1227 if (GET_MODE_CLASS (GET_MODE (SET_DEST (set))) == MODE_CC
1228 && CONSTANT_P (src))
1230 /* Don't try to optimize a register that was made
1231 by loop-optimization for an inner loop.
1232 We don't know its life-span, so we can't compute
1233 the benefit. */
1234 else if (REGNO (SET_DEST (set)) >= max_reg_before_loop)
1236 /* Don't move the source and add a reg-to-reg copy:
1237 - with -Os (this certainly increases size),
1238 - if the mode doesn't support copy operations (obviously),
1239 - if the source is already a reg (the motion will gain nothing),
1240 - if the source is a legitimate constant (likewise). */
1241 else if (insert_temp
1242 && (optimize_size
1243 || ! can_copy_p (GET_MODE (SET_SRC (set)))
1244 || REG_P (SET_SRC (set))
1245 || (CONSTANT_P (SET_SRC (set))
1246 && LEGITIMATE_CONSTANT_P (SET_SRC (set)))))
1248 else if ((tem = loop_invariant_p (loop, src))
1249 && (dependencies == 0
1250 || (tem2
1251 = loop_invariant_p (loop, dependencies)) != 0)
1252 && (regs->array[REGNO (SET_DEST (set))].set_in_loop == 1
1253 || (tem1
1254 = consec_sets_invariant_p
1255 (loop, SET_DEST (set),
1256 regs->array[REGNO (SET_DEST (set))].set_in_loop,
1257 p)))
1258 /* If the insn can cause a trap (such as divide by zero),
1259 can't move it unless it's guaranteed to be executed
1260 once loop is entered. Even a function call might
1261 prevent the trap insn from being reached
1262 (since it might exit!) */
1263 && ! ((maybe_never || call_passed)
1264 && may_trap_p (src)))
1266 struct movable *m;
1267 int regno = REGNO (SET_DEST (set));
1269 /* A potential lossage is where we have a case where two insns
1270 can be combined as long as they are both in the loop, but
1271 we move one of them outside the loop. For large loops,
1272 this can lose. The most common case of this is the address
1273 of a function being called.
1275 Therefore, if this register is marked as being used
1276 exactly once if we are in a loop with calls
1277 (a "large loop"), see if we can replace the usage of
1278 this register with the source of this SET. If we can,
1279 delete this insn.
1281 Don't do this if P has a REG_RETVAL note or if we have
1282 SMALL_REGISTER_CLASSES and SET_SRC is a hard register. */
1284 if (loop_info->has_call
1285 && regs->array[regno].single_usage != 0
1286 && regs->array[regno].single_usage != const0_rtx
1287 && REGNO_FIRST_UID (regno) == INSN_UID (p)
1288 && (REGNO_LAST_UID (regno)
1289 == INSN_UID (regs->array[regno].single_usage))
1290 && regs->array[regno].set_in_loop == 1
1291 && GET_CODE (SET_SRC (set)) != ASM_OPERANDS
1292 && ! side_effects_p (SET_SRC (set))
1293 && ! find_reg_note (p, REG_RETVAL, NULL_RTX)
1294 && (! SMALL_REGISTER_CLASSES
1295 || (! (REG_P (SET_SRC (set))
1296 && (REGNO (SET_SRC (set))
1297 < FIRST_PSEUDO_REGISTER))))
1298 && regno >= FIRST_PSEUDO_REGISTER
1299 /* This test is not redundant; SET_SRC (set) might be
1300 a call-clobbered register and the life of REGNO
1301 might span a call. */
1302 && ! modified_between_p (SET_SRC (set), p,
1303 regs->array[regno].single_usage)
1304 && no_labels_between_p (p,
1305 regs->array[regno].single_usage)
1306 && validate_replace_rtx (SET_DEST (set), SET_SRC (set),
1307 regs->array[regno].single_usage))
1309 /* Replace any usage in a REG_EQUAL note. Must copy
1310 the new source, so that we don't get rtx sharing
1311 between the SET_SOURCE and REG_NOTES of insn p. */
1312 REG_NOTES (regs->array[regno].single_usage)
1313 = (replace_rtx
1314 (REG_NOTES (regs->array[regno].single_usage),
1315 SET_DEST (set), copy_rtx (SET_SRC (set))));
1317 delete_insn (p);
1318 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1319 i++)
1320 regs->array[regno+i].set_in_loop = 0;
1321 continue;
1324 m = xmalloc (sizeof (struct movable));
1325 m->next = 0;
1326 m->insn = p;
1327 m->set_src = src;
1328 m->dependencies = dependencies;
1329 m->set_dest = SET_DEST (set);
1330 m->force = 0;
1331 m->consec
1332 = regs->array[REGNO (SET_DEST (set))].set_in_loop - 1;
1333 m->done = 0;
1334 m->forces = 0;
1335 m->partial = 0;
1336 m->move_insn = move_insn;
1337 m->move_insn_first = 0;
1338 m->insert_temp = insert_temp;
1339 m->is_equiv = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
1340 m->savemode = VOIDmode;
1341 m->regno = regno;
1342 /* Set M->cond if either loop_invariant_p
1343 or consec_sets_invariant_p returned 2
1344 (only conditionally invariant). */
1345 m->cond = ((tem | tem1 | tem2) > 1);
1346 m->global = LOOP_REG_GLOBAL_P (loop, regno);
1347 m->match = 0;
1348 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1349 m->savings = regs->array[regno].n_times_set;
1350 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
1351 m->savings += libcall_benefit (p);
1352 for (i = 0; i < LOOP_REGNO_NREGS (regno, SET_DEST (set)); i++)
1353 regs->array[regno+i].set_in_loop = move_insn ? -2 : -1;
1354 /* Add M to the end of the chain MOVABLES. */
1355 loop_movables_add (movables, m);
1357 if (m->consec > 0)
1359 /* It is possible for the first instruction to have a
1360 REG_EQUAL note but a non-invariant SET_SRC, so we must
1361 remember the status of the first instruction in case
1362 the last instruction doesn't have a REG_EQUAL note. */
1363 m->move_insn_first = m->move_insn;
1365 /* Skip this insn, not checking REG_LIBCALL notes. */
1366 p = next_nonnote_insn (p);
1367 /* Skip the consecutive insns, if there are any. */
1368 p = skip_consec_insns (p, m->consec);
1369 /* Back up to the last insn of the consecutive group. */
1370 p = prev_nonnote_insn (p);
1372 /* We must now reset m->move_insn, m->is_equiv, and
1373 possibly m->set_src to correspond to the effects of
1374 all the insns. */
1375 temp = find_reg_note (p, REG_EQUIV, NULL_RTX);
1376 if (temp)
1377 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1378 else
1380 temp = find_reg_note (p, REG_EQUAL, NULL_RTX);
1381 if (temp && CONSTANT_P (XEXP (temp, 0)))
1382 m->set_src = XEXP (temp, 0), m->move_insn = 1;
1383 else
1384 m->move_insn = 0;
1387 m->is_equiv
1388 = (find_reg_note (p, REG_EQUIV, NULL_RTX) != 0);
1391 /* If this register is always set within a STRICT_LOW_PART
1392 or set to zero, then its high bytes are constant.
1393 So clear them outside the loop and within the loop
1394 just load the low bytes.
1395 We must check that the machine has an instruction to do so.
1396 Also, if the value loaded into the register
1397 depends on the same register, this cannot be done. */
1398 else if (SET_SRC (set) == const0_rtx
1399 && NONJUMP_INSN_P (NEXT_INSN (p))
1400 && (set1 = single_set (NEXT_INSN (p)))
1401 && GET_CODE (set1) == SET
1402 && (GET_CODE (SET_DEST (set1)) == STRICT_LOW_PART)
1403 && (GET_CODE (XEXP (SET_DEST (set1), 0)) == SUBREG)
1404 && (SUBREG_REG (XEXP (SET_DEST (set1), 0))
1405 == SET_DEST (set))
1406 && !reg_mentioned_p (SET_DEST (set), SET_SRC (set1)))
1408 int regno = REGNO (SET_DEST (set));
1409 if (regs->array[regno].set_in_loop == 2)
1411 struct movable *m;
1412 m = xmalloc (sizeof (struct movable));
1413 m->next = 0;
1414 m->insn = p;
1415 m->set_dest = SET_DEST (set);
1416 m->dependencies = 0;
1417 m->force = 0;
1418 m->consec = 0;
1419 m->done = 0;
1420 m->forces = 0;
1421 m->move_insn = 0;
1422 m->move_insn_first = 0;
1423 m->insert_temp = insert_temp;
1424 m->partial = 1;
1425 /* If the insn may not be executed on some cycles,
1426 we can't clear the whole reg; clear just high part.
1427 Not even if the reg is used only within this loop.
1428 Consider this:
1429 while (1)
1430 while (s != t) {
1431 if (foo ()) x = *s;
1432 use (x);
1434 Clearing x before the inner loop could clobber a value
1435 being saved from the last time around the outer loop.
1436 However, if the reg is not used outside this loop
1437 and all uses of the register are in the same
1438 basic block as the store, there is no problem.
1440 If this insn was made by loop, we don't know its
1441 INSN_LUID and hence must make a conservative
1442 assumption. */
1443 m->global = (INSN_UID (p) >= max_uid_for_loop
1444 || LOOP_REG_GLOBAL_P (loop, regno)
1445 || (labels_in_range_p
1446 (p, REGNO_FIRST_LUID (regno))));
1447 if (maybe_never && m->global)
1448 m->savemode = GET_MODE (SET_SRC (set1));
1449 else
1450 m->savemode = VOIDmode;
1451 m->regno = regno;
1452 m->cond = 0;
1453 m->match = 0;
1454 m->lifetime = LOOP_REG_LIFETIME (loop, regno);
1455 m->savings = 1;
1456 for (i = 0;
1457 i < LOOP_REGNO_NREGS (regno, SET_DEST (set));
1458 i++)
1459 regs->array[regno+i].set_in_loop = -1;
1460 /* Add M to the end of the chain MOVABLES. */
1461 loop_movables_add (movables, m);
1466 /* Past a call insn, we get to insns which might not be executed
1467 because the call might exit. This matters for insns that trap.
1468 Constant and pure call insns always return, so they don't count. */
1469 else if (CALL_P (p) && ! CONST_OR_PURE_CALL_P (p))
1470 call_passed = 1;
1471 /* Past a label or a jump, we get to insns for which we
1472 can't count on whether or how many times they will be
1473 executed during each iteration. Therefore, we can
1474 only move out sets of trivial variables
1475 (those not used after the loop). */
1476 /* Similar code appears twice in strength_reduce. */
1477 else if ((LABEL_P (p) || JUMP_P (p))
1478 /* If we enter the loop in the middle, and scan around to the
1479 beginning, don't set maybe_never for that. This must be an
1480 unconditional jump, otherwise the code at the top of the
1481 loop might never be executed. Unconditional jumps are
1482 followed by a barrier then the loop_end. */
1483 && ! (JUMP_P (p) && JUMP_LABEL (p) == loop->top
1484 && NEXT_INSN (NEXT_INSN (p)) == loop_end
1485 && any_uncondjump_p (p)))
1486 maybe_never = 1;
1489 /* If one movable subsumes another, ignore that other. */
1491 ignore_some_movables (movables);
1493 /* For each movable insn, see if the reg that it loads
1494 leads when it dies right into another conditionally movable insn.
1495 If so, record that the second insn "forces" the first one,
1496 since the second can be moved only if the first is. */
1498 force_movables (movables);
1500 /* See if there are multiple movable insns that load the same value.
1501 If there are, make all but the first point at the first one
1502 through the `match' field, and add the priorities of them
1503 all together as the priority of the first. */
1505 combine_movables (movables, regs);
1507 /* Now consider each movable insn to decide whether it is worth moving.
1508 Store 0 in regs->array[I].set_in_loop for each reg I that is moved.
1510 For machines with few registers this increases code size, so do not
1511 move moveables when optimizing for code size on such machines.
1512 (The 18 below is the value for i386.) */
1514 if (!optimize_size
1515 || (reg_class_size[GENERAL_REGS] > 18 && !loop_info->has_call))
1517 move_movables (loop, movables, threshold, insn_count);
1519 /* Recalculate regs->array if move_movables has created new
1520 registers. */
1521 if (max_reg_num () > regs->num)
1523 loop_regs_scan (loop, 0);
1524 for (update_start = loop_start;
1525 PREV_INSN (update_start)
1526 && !LABEL_P (PREV_INSN (update_start));
1527 update_start = PREV_INSN (update_start))
1529 update_end = NEXT_INSN (loop_end);
1531 reg_scan_update (update_start, update_end, loop_max_reg);
1532 loop_max_reg = max_reg_num ();
1536 /* Now candidates that still are negative are those not moved.
1537 Change regs->array[I].set_in_loop to indicate that those are not actually
1538 invariant. */
1539 for (i = 0; i < regs->num; i++)
1540 if (regs->array[i].set_in_loop < 0)
1541 regs->array[i].set_in_loop = regs->array[i].n_times_set;
1543 /* Now that we've moved some things out of the loop, we might be able to
1544 hoist even more memory references. */
1545 load_mems (loop);
1547 /* Recalculate regs->array if load_mems has created new registers. */
1548 if (max_reg_num () > regs->num)
1549 loop_regs_scan (loop, 0);
1551 for (update_start = loop_start;
1552 PREV_INSN (update_start)
1553 && !LABEL_P (PREV_INSN (update_start));
1554 update_start = PREV_INSN (update_start))
1556 update_end = NEXT_INSN (loop_end);
1558 reg_scan_update (update_start, update_end, loop_max_reg);
1559 loop_max_reg = max_reg_num ();
1561 if (flag_strength_reduce)
1563 if (update_end && LABEL_P (update_end))
1564 /* Ensure our label doesn't go away. */
1565 LABEL_NUSES (update_end)++;
1567 strength_reduce (loop, flags);
1569 reg_scan_update (update_start, update_end, loop_max_reg);
1570 loop_max_reg = max_reg_num ();
1572 if (update_end && LABEL_P (update_end)
1573 && --LABEL_NUSES (update_end) == 0)
1574 delete_related_insns (update_end);
1578 /* The movable information is required for strength reduction. */
1579 loop_movables_free (movables);
1581 free (regs->array);
1582 regs->array = 0;
1583 regs->num = 0;
1586 /* Add elements to *OUTPUT to record all the pseudo-regs
1587 mentioned in IN_THIS but not mentioned in NOT_IN_THIS. */
1589 static void
1590 record_excess_regs (rtx in_this, rtx not_in_this, rtx *output)
1592 enum rtx_code code;
1593 const char *fmt;
1594 int i;
1596 code = GET_CODE (in_this);
1598 switch (code)
1600 case PC:
1601 case CC0:
1602 case CONST_INT:
1603 case CONST_DOUBLE:
1604 case CONST:
1605 case SYMBOL_REF:
1606 case LABEL_REF:
1607 return;
1609 case REG:
1610 if (REGNO (in_this) >= FIRST_PSEUDO_REGISTER
1611 && ! reg_mentioned_p (in_this, not_in_this))
1612 *output = gen_rtx_EXPR_LIST (VOIDmode, in_this, *output);
1613 return;
1615 default:
1616 break;
1619 fmt = GET_RTX_FORMAT (code);
1620 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
1622 int j;
1624 switch (fmt[i])
1626 case 'E':
1627 for (j = 0; j < XVECLEN (in_this, i); j++)
1628 record_excess_regs (XVECEXP (in_this, i, j), not_in_this, output);
1629 break;
1631 case 'e':
1632 record_excess_regs (XEXP (in_this, i), not_in_this, output);
1633 break;
1638 /* Check what regs are referred to in the libcall block ending with INSN,
1639 aside from those mentioned in the equivalent value.
1640 If there are none, return 0.
1641 If there are one or more, return an EXPR_LIST containing all of them. */
1643 static rtx
1644 libcall_other_reg (rtx insn, rtx equiv)
1646 rtx note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
1647 rtx p = XEXP (note, 0);
1648 rtx output = 0;
1650 /* First, find all the regs used in the libcall block
1651 that are not mentioned as inputs to the result. */
1653 while (p != insn)
1655 if (INSN_P (p))
1656 record_excess_regs (PATTERN (p), equiv, &output);
1657 p = NEXT_INSN (p);
1660 return output;
1663 /* Return 1 if all uses of REG
1664 are between INSN and the end of the basic block. */
1666 static int
1667 reg_in_basic_block_p (rtx insn, rtx reg)
1669 int regno = REGNO (reg);
1670 rtx p;
1672 if (REGNO_FIRST_UID (regno) != INSN_UID (insn))
1673 return 0;
1675 /* Search this basic block for the already recorded last use of the reg. */
1676 for (p = insn; p; p = NEXT_INSN (p))
1678 switch (GET_CODE (p))
1680 case NOTE:
1681 break;
1683 case INSN:
1684 case CALL_INSN:
1685 /* Ordinary insn: if this is the last use, we win. */
1686 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1687 return 1;
1688 break;
1690 case JUMP_INSN:
1691 /* Jump insn: if this is the last use, we win. */
1692 if (REGNO_LAST_UID (regno) == INSN_UID (p))
1693 return 1;
1694 /* Otherwise, it's the end of the basic block, so we lose. */
1695 return 0;
1697 case CODE_LABEL:
1698 case BARRIER:
1699 /* It's the end of the basic block, so we lose. */
1700 return 0;
1702 default:
1703 break;
1707 /* The "last use" that was recorded can't be found after the first
1708 use. This can happen when the last use was deleted while
1709 processing an inner loop, this inner loop was then completely
1710 unrolled, and the outer loop is always exited after the inner loop,
1711 so that everything after the first use becomes a single basic block. */
1712 return 1;
1715 /* Compute the benefit of eliminating the insns in the block whose
1716 last insn is LAST. This may be a group of insns used to compute a
1717 value directly or can contain a library call. */
1719 static int
1720 libcall_benefit (rtx last)
1722 rtx insn;
1723 int benefit = 0;
1725 for (insn = XEXP (find_reg_note (last, REG_RETVAL, NULL_RTX), 0);
1726 insn != last; insn = NEXT_INSN (insn))
1728 if (CALL_P (insn))
1729 benefit += 10; /* Assume at least this many insns in a library
1730 routine. */
1731 else if (NONJUMP_INSN_P (insn)
1732 && GET_CODE (PATTERN (insn)) != USE
1733 && GET_CODE (PATTERN (insn)) != CLOBBER)
1734 benefit++;
1737 return benefit;
1740 /* Skip COUNT insns from INSN, counting library calls as 1 insn. */
1742 static rtx
1743 skip_consec_insns (rtx insn, int count)
1745 for (; count > 0; count--)
1747 rtx temp;
1749 /* If first insn of libcall sequence, skip to end. */
1750 /* Do this at start of loop, since INSN is guaranteed to
1751 be an insn here. */
1752 if (!NOTE_P (insn)
1753 && (temp = find_reg_note (insn, REG_LIBCALL, NULL_RTX)))
1754 insn = XEXP (temp, 0);
1757 insn = NEXT_INSN (insn);
1758 while (NOTE_P (insn));
1761 return insn;
1764 /* Ignore any movable whose insn falls within a libcall
1765 which is part of another movable.
1766 We make use of the fact that the movable for the libcall value
1767 was made later and so appears later on the chain. */
1769 static void
1770 ignore_some_movables (struct loop_movables *movables)
1772 struct movable *m, *m1;
1774 for (m = movables->head; m; m = m->next)
1776 /* Is this a movable for the value of a libcall? */
1777 rtx note = find_reg_note (m->insn, REG_RETVAL, NULL_RTX);
1778 if (note)
1780 rtx insn;
1781 /* Check for earlier movables inside that range,
1782 and mark them invalid. We cannot use LUIDs here because
1783 insns created by loop.c for prior loops don't have LUIDs.
1784 Rather than reject all such insns from movables, we just
1785 explicitly check each insn in the libcall (since invariant
1786 libcalls aren't that common). */
1787 for (insn = XEXP (note, 0); insn != m->insn; insn = NEXT_INSN (insn))
1788 for (m1 = movables->head; m1 != m; m1 = m1->next)
1789 if (m1->insn == insn)
1790 m1->done = 1;
1795 /* For each movable insn, see if the reg that it loads
1796 leads when it dies right into another conditionally movable insn.
1797 If so, record that the second insn "forces" the first one,
1798 since the second can be moved only if the first is. */
1800 static void
1801 force_movables (struct loop_movables *movables)
1803 struct movable *m, *m1;
1805 for (m1 = movables->head; m1; m1 = m1->next)
1806 /* Omit this if moving just the (SET (REG) 0) of a zero-extend. */
1807 if (!m1->partial && !m1->done)
1809 int regno = m1->regno;
1810 for (m = m1->next; m; m = m->next)
1811 /* ??? Could this be a bug? What if CSE caused the
1812 register of M1 to be used after this insn?
1813 Since CSE does not update regno_last_uid,
1814 this insn M->insn might not be where it dies.
1815 But very likely this doesn't matter; what matters is
1816 that M's reg is computed from M1's reg. */
1817 if (INSN_UID (m->insn) == REGNO_LAST_UID (regno)
1818 && !m->done)
1819 break;
1820 if (m != 0 && m->set_src == m1->set_dest
1821 /* If m->consec, m->set_src isn't valid. */
1822 && m->consec == 0)
1823 m = 0;
1825 /* Increase the priority of the moving the first insn
1826 since it permits the second to be moved as well.
1827 Likewise for insns already forced by the first insn. */
1828 if (m != 0)
1830 struct movable *m2;
1832 m->forces = m1;
1833 for (m2 = m1; m2; m2 = m2->forces)
1835 m2->lifetime += m->lifetime;
1836 m2->savings += m->savings;
1842 /* Find invariant expressions that are equal and can be combined into
1843 one register. */
1845 static void
1846 combine_movables (struct loop_movables *movables, struct loop_regs *regs)
1848 struct movable *m;
1849 char *matched_regs = xmalloc (regs->num);
1850 enum machine_mode mode;
1852 /* Regs that are set more than once are not allowed to match
1853 or be matched. I'm no longer sure why not. */
1854 /* Only pseudo registers are allowed to match or be matched,
1855 since move_movables does not validate the change. */
1856 /* Perhaps testing m->consec_sets would be more appropriate here? */
1858 for (m = movables->head; m; m = m->next)
1859 if (m->match == 0 && regs->array[m->regno].n_times_set == 1
1860 && m->regno >= FIRST_PSEUDO_REGISTER
1861 && !m->insert_temp
1862 && !m->partial)
1864 struct movable *m1;
1865 int regno = m->regno;
1867 memset (matched_regs, 0, regs->num);
1868 matched_regs[regno] = 1;
1870 /* We want later insns to match the first one. Don't make the first
1871 one match any later ones. So start this loop at m->next. */
1872 for (m1 = m->next; m1; m1 = m1->next)
1873 if (m != m1 && m1->match == 0
1874 && !m1->insert_temp
1875 && regs->array[m1->regno].n_times_set == 1
1876 && m1->regno >= FIRST_PSEUDO_REGISTER
1877 /* A reg used outside the loop mustn't be eliminated. */
1878 && !m1->global
1879 /* A reg used for zero-extending mustn't be eliminated. */
1880 && !m1->partial
1881 && (matched_regs[m1->regno]
1884 /* Can combine regs with different modes loaded from the
1885 same constant only if the modes are the same or
1886 if both are integer modes with M wider or the same
1887 width as M1. The check for integer is redundant, but
1888 safe, since the only case of differing destination
1889 modes with equal sources is when both sources are
1890 VOIDmode, i.e., CONST_INT. */
1891 (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest)
1892 || (GET_MODE_CLASS (GET_MODE (m->set_dest)) == MODE_INT
1893 && GET_MODE_CLASS (GET_MODE (m1->set_dest)) == MODE_INT
1894 && (GET_MODE_BITSIZE (GET_MODE (m->set_dest))
1895 >= GET_MODE_BITSIZE (GET_MODE (m1->set_dest)))))
1896 /* See if the source of M1 says it matches M. */
1897 && ((REG_P (m1->set_src)
1898 && matched_regs[REGNO (m1->set_src)])
1899 || rtx_equal_for_loop_p (m->set_src, m1->set_src,
1900 movables, regs))))
1901 && ((m->dependencies == m1->dependencies)
1902 || rtx_equal_p (m->dependencies, m1->dependencies)))
1904 m->lifetime += m1->lifetime;
1905 m->savings += m1->savings;
1906 m1->done = 1;
1907 m1->match = m;
1908 matched_regs[m1->regno] = 1;
1912 /* Now combine the regs used for zero-extension.
1913 This can be done for those not marked `global'
1914 provided their lives don't overlap. */
1916 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1917 mode = GET_MODE_WIDER_MODE (mode))
1919 struct movable *m0 = 0;
1921 /* Combine all the registers for extension from mode MODE.
1922 Don't combine any that are used outside this loop. */
1923 for (m = movables->head; m; m = m->next)
1924 if (m->partial && ! m->global
1925 && mode == GET_MODE (SET_SRC (PATTERN (NEXT_INSN (m->insn)))))
1927 struct movable *m1;
1929 int first = REGNO_FIRST_LUID (m->regno);
1930 int last = REGNO_LAST_LUID (m->regno);
1932 if (m0 == 0)
1934 /* First one: don't check for overlap, just record it. */
1935 m0 = m;
1936 continue;
1939 /* Make sure they extend to the same mode.
1940 (Almost always true.) */
1941 if (GET_MODE (m->set_dest) != GET_MODE (m0->set_dest))
1942 continue;
1944 /* We already have one: check for overlap with those
1945 already combined together. */
1946 for (m1 = movables->head; m1 != m; m1 = m1->next)
1947 if (m1 == m0 || (m1->partial && m1->match == m0))
1948 if (! (REGNO_FIRST_LUID (m1->regno) > last
1949 || REGNO_LAST_LUID (m1->regno) < first))
1950 goto overlap;
1952 /* No overlap: we can combine this with the others. */
1953 m0->lifetime += m->lifetime;
1954 m0->savings += m->savings;
1955 m->done = 1;
1956 m->match = m0;
1958 overlap:
1963 /* Clean up. */
1964 free (matched_regs);
1967 /* Returns the number of movable instructions in LOOP that were not
1968 moved outside the loop. */
1970 static int
1971 num_unmoved_movables (const struct loop *loop)
1973 int num = 0;
1974 struct movable *m;
1976 for (m = LOOP_MOVABLES (loop)->head; m; m = m->next)
1977 if (!m->done)
1978 ++num;
1980 return num;
1984 /* Return 1 if regs X and Y will become the same if moved. */
1986 static int
1987 regs_match_p (rtx x, rtx y, struct loop_movables *movables)
1989 unsigned int xn = REGNO (x);
1990 unsigned int yn = REGNO (y);
1991 struct movable *mx, *my;
1993 for (mx = movables->head; mx; mx = mx->next)
1994 if (mx->regno == xn)
1995 break;
1997 for (my = movables->head; my; my = my->next)
1998 if (my->regno == yn)
1999 break;
2001 return (mx && my
2002 && ((mx->match == my->match && mx->match != 0)
2003 || mx->match == my
2004 || mx == my->match));
2007 /* Return 1 if X and Y are identical-looking rtx's.
2008 This is the Lisp function EQUAL for rtx arguments.
2010 If two registers are matching movables or a movable register and an
2011 equivalent constant, consider them equal. */
2013 static int
2014 rtx_equal_for_loop_p (rtx x, rtx y, struct loop_movables *movables,
2015 struct loop_regs *regs)
2017 int i;
2018 int j;
2019 struct movable *m;
2020 enum rtx_code code;
2021 const char *fmt;
2023 if (x == y)
2024 return 1;
2025 if (x == 0 || y == 0)
2026 return 0;
2028 code = GET_CODE (x);
2030 /* If we have a register and a constant, they may sometimes be
2031 equal. */
2032 if (REG_P (x) && regs->array[REGNO (x)].set_in_loop == -2
2033 && CONSTANT_P (y))
2035 for (m = movables->head; m; m = m->next)
2036 if (m->move_insn && m->regno == REGNO (x)
2037 && rtx_equal_p (m->set_src, y))
2038 return 1;
2040 else if (REG_P (y) && regs->array[REGNO (y)].set_in_loop == -2
2041 && CONSTANT_P (x))
2043 for (m = movables->head; m; m = m->next)
2044 if (m->move_insn && m->regno == REGNO (y)
2045 && rtx_equal_p (m->set_src, x))
2046 return 1;
2049 /* Otherwise, rtx's of different codes cannot be equal. */
2050 if (code != GET_CODE (y))
2051 return 0;
2053 /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
2054 (REG:SI x) and (REG:HI x) are NOT equivalent. */
2056 if (GET_MODE (x) != GET_MODE (y))
2057 return 0;
2059 /* These three types of rtx's can be compared nonrecursively. */
2060 if (code == REG)
2061 return (REGNO (x) == REGNO (y) || regs_match_p (x, y, movables));
2063 if (code == LABEL_REF)
2064 return XEXP (x, 0) == XEXP (y, 0);
2065 if (code == SYMBOL_REF)
2066 return XSTR (x, 0) == XSTR (y, 0);
2068 /* Compare the elements. If any pair of corresponding elements
2069 fail to match, return 0 for the whole things. */
2071 fmt = GET_RTX_FORMAT (code);
2072 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2074 switch (fmt[i])
2076 case 'w':
2077 if (XWINT (x, i) != XWINT (y, i))
2078 return 0;
2079 break;
2081 case 'i':
2082 if (XINT (x, i) != XINT (y, i))
2083 return 0;
2084 break;
2086 case 'E':
2087 /* Two vectors must have the same length. */
2088 if (XVECLEN (x, i) != XVECLEN (y, i))
2089 return 0;
2091 /* And the corresponding elements must match. */
2092 for (j = 0; j < XVECLEN (x, i); j++)
2093 if (rtx_equal_for_loop_p (XVECEXP (x, i, j), XVECEXP (y, i, j),
2094 movables, regs) == 0)
2095 return 0;
2096 break;
2098 case 'e':
2099 if (rtx_equal_for_loop_p (XEXP (x, i), XEXP (y, i), movables, regs)
2100 == 0)
2101 return 0;
2102 break;
2104 case 's':
2105 if (strcmp (XSTR (x, i), XSTR (y, i)))
2106 return 0;
2107 break;
2109 case 'u':
2110 /* These are just backpointers, so they don't matter. */
2111 break;
2113 case '0':
2114 break;
2116 /* It is believed that rtx's at this level will never
2117 contain anything but integers and other rtx's,
2118 except for within LABEL_REFs and SYMBOL_REFs. */
2119 default:
2120 abort ();
2123 return 1;
2126 /* If X contains any LABEL_REF's, add REG_LABEL notes for them to all
2127 insns in INSNS which use the reference. LABEL_NUSES for CODE_LABEL
2128 references is incremented once for each added note. */
2130 static void
2131 add_label_notes (rtx x, rtx insns)
2133 enum rtx_code code = GET_CODE (x);
2134 int i, j;
2135 const char *fmt;
2136 rtx insn;
2138 if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
2140 /* This code used to ignore labels that referred to dispatch tables to
2141 avoid flow generating (slightly) worse code.
2143 We no longer ignore such label references (see LABEL_REF handling in
2144 mark_jump_label for additional information). */
2145 for (insn = insns; insn; insn = NEXT_INSN (insn))
2146 if (reg_mentioned_p (XEXP (x, 0), insn))
2148 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, XEXP (x, 0),
2149 REG_NOTES (insn));
2150 if (LABEL_P (XEXP (x, 0)))
2151 LABEL_NUSES (XEXP (x, 0))++;
2155 fmt = GET_RTX_FORMAT (code);
2156 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2158 if (fmt[i] == 'e')
2159 add_label_notes (XEXP (x, i), insns);
2160 else if (fmt[i] == 'E')
2161 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2162 add_label_notes (XVECEXP (x, i, j), insns);
2166 /* Scan MOVABLES, and move the insns that deserve to be moved.
2167 If two matching movables are combined, replace one reg with the
2168 other throughout. */
2170 static void
2171 move_movables (struct loop *loop, struct loop_movables *movables,
2172 int threshold, int insn_count)
2174 struct loop_regs *regs = LOOP_REGS (loop);
2175 int nregs = regs->num;
2176 rtx new_start = 0;
2177 struct movable *m;
2178 rtx p;
2179 rtx loop_start = loop->start;
2180 rtx loop_end = loop->end;
2181 /* Map of pseudo-register replacements to handle combining
2182 when we move several insns that load the same value
2183 into different pseudo-registers. */
2184 rtx *reg_map = xcalloc (nregs, sizeof (rtx));
2185 char *already_moved = xcalloc (nregs, sizeof (char));
2187 for (m = movables->head; m; m = m->next)
2189 /* Describe this movable insn. */
2191 if (loop_dump_stream)
2193 fprintf (loop_dump_stream, "Insn %d: regno %d (life %d), ",
2194 INSN_UID (m->insn), m->regno, m->lifetime);
2195 if (m->consec > 0)
2196 fprintf (loop_dump_stream, "consec %d, ", m->consec);
2197 if (m->cond)
2198 fprintf (loop_dump_stream, "cond ");
2199 if (m->force)
2200 fprintf (loop_dump_stream, "force ");
2201 if (m->global)
2202 fprintf (loop_dump_stream, "global ");
2203 if (m->done)
2204 fprintf (loop_dump_stream, "done ");
2205 if (m->move_insn)
2206 fprintf (loop_dump_stream, "move-insn ");
2207 if (m->match)
2208 fprintf (loop_dump_stream, "matches %d ",
2209 INSN_UID (m->match->insn));
2210 if (m->forces)
2211 fprintf (loop_dump_stream, "forces %d ",
2212 INSN_UID (m->forces->insn));
2215 /* Ignore the insn if it's already done (it matched something else).
2216 Otherwise, see if it is now safe to move. */
2218 if (!m->done
2219 && (! m->cond
2220 || (1 == loop_invariant_p (loop, m->set_src)
2221 && (m->dependencies == 0
2222 || 1 == loop_invariant_p (loop, m->dependencies))
2223 && (m->consec == 0
2224 || 1 == consec_sets_invariant_p (loop, m->set_dest,
2225 m->consec + 1,
2226 m->insn))))
2227 && (! m->forces || m->forces->done))
2229 int regno;
2230 rtx p;
2231 int savings = m->savings;
2233 /* We have an insn that is safe to move.
2234 Compute its desirability. */
2236 p = m->insn;
2237 regno = m->regno;
2239 if (loop_dump_stream)
2240 fprintf (loop_dump_stream, "savings %d ", savings);
2242 if (regs->array[regno].moved_once && loop_dump_stream)
2243 fprintf (loop_dump_stream, "halved since already moved ");
2245 /* An insn MUST be moved if we already moved something else
2246 which is safe only if this one is moved too: that is,
2247 if already_moved[REGNO] is nonzero. */
2249 /* An insn is desirable to move if the new lifetime of the
2250 register is no more than THRESHOLD times the old lifetime.
2251 If it's not desirable, it means the loop is so big
2252 that moving won't speed things up much,
2253 and it is liable to make register usage worse. */
2255 /* It is also desirable to move if it can be moved at no
2256 extra cost because something else was already moved. */
2258 if (already_moved[regno]
2259 || (threshold * savings * m->lifetime) >=
2260 (regs->array[regno].moved_once ? insn_count * 2 : insn_count)
2261 || (m->forces && m->forces->done
2262 && regs->array[m->forces->regno].n_times_set == 1))
2264 int count;
2265 struct movable *m1;
2266 rtx first = NULL_RTX;
2267 rtx newreg = NULL_RTX;
2269 if (m->insert_temp)
2270 newreg = gen_reg_rtx (GET_MODE (m->set_dest));
2272 /* Now move the insns that set the reg. */
2274 if (m->partial && m->match)
2276 rtx newpat, i1;
2277 rtx r1, r2;
2278 /* Find the end of this chain of matching regs.
2279 Thus, we load each reg in the chain from that one reg.
2280 And that reg is loaded with 0 directly,
2281 since it has ->match == 0. */
2282 for (m1 = m; m1->match; m1 = m1->match);
2283 newpat = gen_move_insn (SET_DEST (PATTERN (m->insn)),
2284 SET_DEST (PATTERN (m1->insn)));
2285 i1 = loop_insn_hoist (loop, newpat);
2287 /* Mark the moved, invariant reg as being allowed to
2288 share a hard reg with the other matching invariant. */
2289 REG_NOTES (i1) = REG_NOTES (m->insn);
2290 r1 = SET_DEST (PATTERN (m->insn));
2291 r2 = SET_DEST (PATTERN (m1->insn));
2292 regs_may_share
2293 = gen_rtx_EXPR_LIST (VOIDmode, r1,
2294 gen_rtx_EXPR_LIST (VOIDmode, r2,
2295 regs_may_share));
2296 delete_insn (m->insn);
2298 if (new_start == 0)
2299 new_start = i1;
2301 if (loop_dump_stream)
2302 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
2304 /* If we are to re-generate the item being moved with a
2305 new move insn, first delete what we have and then emit
2306 the move insn before the loop. */
2307 else if (m->move_insn)
2309 rtx i1, temp, seq;
2311 for (count = m->consec; count >= 0; count--)
2313 /* If this is the first insn of a library call sequence,
2314 something is very wrong. */
2315 if (!NOTE_P (p)
2316 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
2317 abort ();
2319 /* If this is the last insn of a libcall sequence, then
2320 delete every insn in the sequence except the last.
2321 The last insn is handled in the normal manner. */
2322 if (!NOTE_P (p)
2323 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
2325 temp = XEXP (temp, 0);
2326 while (temp != p)
2327 temp = delete_insn (temp);
2330 temp = p;
2331 p = delete_insn (p);
2333 /* simplify_giv_expr expects that it can walk the insns
2334 at m->insn forwards and see this old sequence we are
2335 tossing here. delete_insn does preserve the next
2336 pointers, but when we skip over a NOTE we must fix
2337 it up. Otherwise that code walks into the non-deleted
2338 insn stream. */
2339 while (p && NOTE_P (p))
2340 p = NEXT_INSN (temp) = NEXT_INSN (p);
2342 if (m->insert_temp)
2344 /* Replace the original insn with a move from
2345 our newly created temp. */
2346 start_sequence ();
2347 emit_move_insn (m->set_dest, newreg);
2348 seq = get_insns ();
2349 end_sequence ();
2350 emit_insn_before (seq, p);
2354 start_sequence ();
2355 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2356 m->set_src);
2357 seq = get_insns ();
2358 end_sequence ();
2360 add_label_notes (m->set_src, seq);
2362 i1 = loop_insn_hoist (loop, seq);
2363 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2364 set_unique_reg_note (i1,
2365 m->is_equiv ? REG_EQUIV : REG_EQUAL,
2366 m->set_src);
2368 if (loop_dump_stream)
2369 fprintf (loop_dump_stream, " moved to %d", INSN_UID (i1));
2371 /* The more regs we move, the less we like moving them. */
2372 threshold -= 3;
2374 else
2376 for (count = m->consec; count >= 0; count--)
2378 rtx i1, temp;
2380 /* If first insn of libcall sequence, skip to end. */
2381 /* Do this at start of loop, since p is guaranteed to
2382 be an insn here. */
2383 if (!NOTE_P (p)
2384 && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
2385 p = XEXP (temp, 0);
2387 /* If last insn of libcall sequence, move all
2388 insns except the last before the loop. The last
2389 insn is handled in the normal manner. */
2390 if (!NOTE_P (p)
2391 && (temp = find_reg_note (p, REG_RETVAL, NULL_RTX)))
2393 rtx fn_address = 0;
2394 rtx fn_reg = 0;
2395 rtx fn_address_insn = 0;
2397 first = 0;
2398 for (temp = XEXP (temp, 0); temp != p;
2399 temp = NEXT_INSN (temp))
2401 rtx body;
2402 rtx n;
2403 rtx next;
2405 if (NOTE_P (temp))
2406 continue;
2408 body = PATTERN (temp);
2410 /* Find the next insn after TEMP,
2411 not counting USE or NOTE insns. */
2412 for (next = NEXT_INSN (temp); next != p;
2413 next = NEXT_INSN (next))
2414 if (! (NONJUMP_INSN_P (next)
2415 && GET_CODE (PATTERN (next)) == USE)
2416 && !NOTE_P (next))
2417 break;
2419 /* If that is the call, this may be the insn
2420 that loads the function address.
2422 Extract the function address from the insn
2423 that loads it into a register.
2424 If this insn was cse'd, we get incorrect code.
2426 So emit a new move insn that copies the
2427 function address into the register that the
2428 call insn will use. flow.c will delete any
2429 redundant stores that we have created. */
2430 if (CALL_P (next)
2431 && GET_CODE (body) == SET
2432 && REG_P (SET_DEST (body))
2433 && (n = find_reg_note (temp, REG_EQUAL,
2434 NULL_RTX)))
2436 fn_reg = SET_SRC (body);
2437 if (!REG_P (fn_reg))
2438 fn_reg = SET_DEST (body);
2439 fn_address = XEXP (n, 0);
2440 fn_address_insn = temp;
2442 /* We have the call insn.
2443 If it uses the register we suspect it might,
2444 load it with the correct address directly. */
2445 if (CALL_P (temp)
2446 && fn_address != 0
2447 && reg_referenced_p (fn_reg, body))
2448 loop_insn_emit_after (loop, 0, fn_address_insn,
2449 gen_move_insn
2450 (fn_reg, fn_address));
2452 if (CALL_P (temp))
2454 i1 = loop_call_insn_hoist (loop, body);
2455 /* Because the USAGE information potentially
2456 contains objects other than hard registers
2457 we need to copy it. */
2458 if (CALL_INSN_FUNCTION_USAGE (temp))
2459 CALL_INSN_FUNCTION_USAGE (i1)
2460 = copy_rtx (CALL_INSN_FUNCTION_USAGE (temp));
2462 else
2463 i1 = loop_insn_hoist (loop, body);
2464 if (first == 0)
2465 first = i1;
2466 if (temp == fn_address_insn)
2467 fn_address_insn = i1;
2468 REG_NOTES (i1) = REG_NOTES (temp);
2469 REG_NOTES (temp) = NULL;
2470 delete_insn (temp);
2472 if (new_start == 0)
2473 new_start = first;
2475 if (m->savemode != VOIDmode)
2477 /* P sets REG to zero; but we should clear only
2478 the bits that are not covered by the mode
2479 m->savemode. */
2480 rtx reg = m->set_dest;
2481 rtx sequence;
2482 rtx tem;
2484 start_sequence ();
2485 tem = expand_simple_binop
2486 (GET_MODE (reg), AND, reg,
2487 GEN_INT ((((HOST_WIDE_INT) 1
2488 << GET_MODE_BITSIZE (m->savemode)))
2489 - 1),
2490 reg, 1, OPTAB_LIB_WIDEN);
2491 if (tem == 0)
2492 abort ();
2493 if (tem != reg)
2494 emit_move_insn (reg, tem);
2495 sequence = get_insns ();
2496 end_sequence ();
2497 i1 = loop_insn_hoist (loop, sequence);
2499 else if (CALL_P (p))
2501 i1 = loop_call_insn_hoist (loop, PATTERN (p));
2502 /* Because the USAGE information potentially
2503 contains objects other than hard registers
2504 we need to copy it. */
2505 if (CALL_INSN_FUNCTION_USAGE (p))
2506 CALL_INSN_FUNCTION_USAGE (i1)
2507 = copy_rtx (CALL_INSN_FUNCTION_USAGE (p));
2509 else if (count == m->consec && m->move_insn_first)
2511 rtx seq;
2512 /* The SET_SRC might not be invariant, so we must
2513 use the REG_EQUAL note. */
2514 start_sequence ();
2515 emit_move_insn (m->insert_temp ? newreg : m->set_dest,
2516 m->set_src);
2517 seq = get_insns ();
2518 end_sequence ();
2520 add_label_notes (m->set_src, seq);
2522 i1 = loop_insn_hoist (loop, seq);
2523 if (! find_reg_note (i1, REG_EQUAL, NULL_RTX))
2524 set_unique_reg_note (i1, m->is_equiv ? REG_EQUIV
2525 : REG_EQUAL, m->set_src);
2527 else if (m->insert_temp)
2529 rtx *reg_map2 = xcalloc (REGNO (newreg),
2530 sizeof(rtx));
2531 reg_map2 [m->regno] = newreg;
2533 i1 = loop_insn_hoist (loop, copy_rtx (PATTERN (p)));
2534 replace_regs (i1, reg_map2, REGNO (newreg), 1);
2535 free (reg_map2);
2537 else
2538 i1 = loop_insn_hoist (loop, PATTERN (p));
2540 if (REG_NOTES (i1) == 0)
2542 REG_NOTES (i1) = REG_NOTES (p);
2543 REG_NOTES (p) = NULL;
2545 /* If there is a REG_EQUAL note present whose value
2546 is not loop invariant, then delete it, since it
2547 may cause problems with later optimization passes.
2548 It is possible for cse to create such notes
2549 like this as a result of record_jump_cond. */
2551 if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
2552 && ! loop_invariant_p (loop, XEXP (temp, 0)))
2553 remove_note (i1, temp);
2556 if (new_start == 0)
2557 new_start = i1;
2559 if (loop_dump_stream)
2560 fprintf (loop_dump_stream, " moved to %d",
2561 INSN_UID (i1));
2563 /* If library call, now fix the REG_NOTES that contain
2564 insn pointers, namely REG_LIBCALL on FIRST
2565 and REG_RETVAL on I1. */
2566 if ((temp = find_reg_note (i1, REG_RETVAL, NULL_RTX)))
2568 XEXP (temp, 0) = first;
2569 temp = find_reg_note (first, REG_LIBCALL, NULL_RTX);
2570 XEXP (temp, 0) = i1;
2573 temp = p;
2574 delete_insn (p);
2575 p = NEXT_INSN (p);
2577 /* simplify_giv_expr expects that it can walk the insns
2578 at m->insn forwards and see this old sequence we are
2579 tossing here. delete_insn does preserve the next
2580 pointers, but when we skip over a NOTE we must fix
2581 it up. Otherwise that code walks into the non-deleted
2582 insn stream. */
2583 while (p && NOTE_P (p))
2584 p = NEXT_INSN (temp) = NEXT_INSN (p);
2586 if (m->insert_temp)
2588 rtx seq;
2589 /* Replace the original insn with a move from
2590 our newly created temp. */
2591 start_sequence ();
2592 emit_move_insn (m->set_dest, newreg);
2593 seq = get_insns ();
2594 end_sequence ();
2595 emit_insn_before (seq, p);
2599 /* The more regs we move, the less we like moving them. */
2600 threshold -= 3;
2603 m->done = 1;
2605 if (!m->insert_temp)
2607 /* Any other movable that loads the same register
2608 MUST be moved. */
2609 already_moved[regno] = 1;
2611 /* This reg has been moved out of one loop. */
2612 regs->array[regno].moved_once = 1;
2614 /* The reg set here is now invariant. */
2615 if (! m->partial)
2617 int i;
2618 for (i = 0; i < LOOP_REGNO_NREGS (regno, m->set_dest); i++)
2619 regs->array[regno+i].set_in_loop = 0;
2622 /* Change the length-of-life info for the register
2623 to say it lives at least the full length of this loop.
2624 This will help guide optimizations in outer loops. */
2626 if (REGNO_FIRST_LUID (regno) > INSN_LUID (loop_start))
2627 /* This is the old insn before all the moved insns.
2628 We can't use the moved insn because it is out of range
2629 in uid_luid. Only the old insns have luids. */
2630 REGNO_FIRST_UID (regno) = INSN_UID (loop_start);
2631 if (REGNO_LAST_LUID (regno) < INSN_LUID (loop_end))
2632 REGNO_LAST_UID (regno) = INSN_UID (loop_end);
2635 /* Combine with this moved insn any other matching movables. */
2637 if (! m->partial)
2638 for (m1 = movables->head; m1; m1 = m1->next)
2639 if (m1->match == m)
2641 rtx temp;
2643 /* Schedule the reg loaded by M1
2644 for replacement so that shares the reg of M.
2645 If the modes differ (only possible in restricted
2646 circumstances, make a SUBREG.
2648 Note this assumes that the target dependent files
2649 treat REG and SUBREG equally, including within
2650 GO_IF_LEGITIMATE_ADDRESS and in all the
2651 predicates since we never verify that replacing the
2652 original register with a SUBREG results in a
2653 recognizable insn. */
2654 if (GET_MODE (m->set_dest) == GET_MODE (m1->set_dest))
2655 reg_map[m1->regno] = m->set_dest;
2656 else
2657 reg_map[m1->regno]
2658 = gen_lowpart_common (GET_MODE (m1->set_dest),
2659 m->set_dest);
2661 /* Get rid of the matching insn
2662 and prevent further processing of it. */
2663 m1->done = 1;
2665 /* If library call, delete all insns. */
2666 if ((temp = find_reg_note (m1->insn, REG_RETVAL,
2667 NULL_RTX)))
2668 delete_insn_chain (XEXP (temp, 0), m1->insn);
2669 else
2670 delete_insn (m1->insn);
2672 /* Any other movable that loads the same register
2673 MUST be moved. */
2674 already_moved[m1->regno] = 1;
2676 /* The reg merged here is now invariant,
2677 if the reg it matches is invariant. */
2678 if (! m->partial)
2680 int i;
2681 for (i = 0;
2682 i < LOOP_REGNO_NREGS (regno, m1->set_dest);
2683 i++)
2684 regs->array[m1->regno+i].set_in_loop = 0;
2688 else if (loop_dump_stream)
2689 fprintf (loop_dump_stream, "not desirable");
2691 else if (loop_dump_stream && !m->match)
2692 fprintf (loop_dump_stream, "not safe");
2694 if (loop_dump_stream)
2695 fprintf (loop_dump_stream, "\n");
2698 if (new_start == 0)
2699 new_start = loop_start;
2701 /* Go through all the instructions in the loop, making
2702 all the register substitutions scheduled in REG_MAP. */
2703 for (p = new_start; p != loop_end; p = NEXT_INSN (p))
2704 if (INSN_P (p))
2706 replace_regs (PATTERN (p), reg_map, nregs, 0);
2707 replace_regs (REG_NOTES (p), reg_map, nregs, 0);
2708 INSN_CODE (p) = -1;
2711 /* Clean up. */
2712 free (reg_map);
2713 free (already_moved);
2717 static void
2718 loop_movables_add (struct loop_movables *movables, struct movable *m)
2720 if (movables->head == 0)
2721 movables->head = m;
2722 else
2723 movables->last->next = m;
2724 movables->last = m;
2728 static void
2729 loop_movables_free (struct loop_movables *movables)
2731 struct movable *m;
2732 struct movable *m_next;
2734 for (m = movables->head; m; m = m_next)
2736 m_next = m->next;
2737 free (m);
2741 #if 0
2742 /* Scan X and replace the address of any MEM in it with ADDR.
2743 REG is the address that MEM should have before the replacement. */
2745 static void
2746 replace_call_address (rtx x, rtx reg, rtx addr)
2748 enum rtx_code code;
2749 int i;
2750 const char *fmt;
2752 if (x == 0)
2753 return;
2754 code = GET_CODE (x);
2755 switch (code)
2757 case PC:
2758 case CC0:
2759 case CONST_INT:
2760 case CONST_DOUBLE:
2761 case CONST:
2762 case SYMBOL_REF:
2763 case LABEL_REF:
2764 case REG:
2765 return;
2767 case SET:
2768 /* Short cut for very common case. */
2769 replace_call_address (XEXP (x, 1), reg, addr);
2770 return;
2772 case CALL:
2773 /* Short cut for very common case. */
2774 replace_call_address (XEXP (x, 0), reg, addr);
2775 return;
2777 case MEM:
2778 /* If this MEM uses a reg other than the one we expected,
2779 something is wrong. */
2780 if (XEXP (x, 0) != reg)
2781 abort ();
2782 XEXP (x, 0) = addr;
2783 return;
2785 default:
2786 break;
2789 fmt = GET_RTX_FORMAT (code);
2790 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2792 if (fmt[i] == 'e')
2793 replace_call_address (XEXP (x, i), reg, addr);
2794 else if (fmt[i] == 'E')
2796 int j;
2797 for (j = 0; j < XVECLEN (x, i); j++)
2798 replace_call_address (XVECEXP (x, i, j), reg, addr);
2802 #endif
2804 /* Return the number of memory refs to addresses that vary
2805 in the rtx X. */
2807 static int
2808 count_nonfixed_reads (const struct loop *loop, rtx x)
2810 enum rtx_code code;
2811 int i;
2812 const char *fmt;
2813 int value;
2815 if (x == 0)
2816 return 0;
2818 code = GET_CODE (x);
2819 switch (code)
2821 case PC:
2822 case CC0:
2823 case CONST_INT:
2824 case CONST_DOUBLE:
2825 case CONST:
2826 case SYMBOL_REF:
2827 case LABEL_REF:
2828 case REG:
2829 return 0;
2831 case MEM:
2832 return ((loop_invariant_p (loop, XEXP (x, 0)) != 1)
2833 + count_nonfixed_reads (loop, XEXP (x, 0)));
2835 default:
2836 break;
2839 value = 0;
2840 fmt = GET_RTX_FORMAT (code);
2841 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2843 if (fmt[i] == 'e')
2844 value += count_nonfixed_reads (loop, XEXP (x, i));
2845 if (fmt[i] == 'E')
2847 int j;
2848 for (j = 0; j < XVECLEN (x, i); j++)
2849 value += count_nonfixed_reads (loop, XVECEXP (x, i, j));
2852 return value;
2855 /* Scan a loop setting the elements `loops_enclosed',
2856 `has_call', `has_nonconst_call', `has_volatile', `has_tablejump',
2857 `unknown_address_altered', `unknown_constant_address_altered', and
2858 `num_mem_sets' in LOOP. Also, fill in the array `mems' and the
2859 list `store_mems' in LOOP. */
2861 static void
2862 prescan_loop (struct loop *loop)
2864 int level = 1;
2865 rtx insn;
2866 struct loop_info *loop_info = LOOP_INFO (loop);
2867 rtx start = loop->start;
2868 rtx end = loop->end;
2869 /* The label after END. Jumping here is just like falling off the
2870 end of the loop. We use next_nonnote_insn instead of next_label
2871 as a hedge against the (pathological) case where some actual insn
2872 might end up between the two. */
2873 rtx exit_target = next_nonnote_insn (end);
2875 loop_info->has_indirect_jump = indirect_jump_in_function;
2876 loop_info->pre_header_has_call = 0;
2877 loop_info->has_call = 0;
2878 loop_info->has_nonconst_call = 0;
2879 loop_info->has_prefetch = 0;
2880 loop_info->has_volatile = 0;
2881 loop_info->has_tablejump = 0;
2882 loop_info->has_multiple_exit_targets = 0;
2883 loop->level = 1;
2885 loop_info->unknown_address_altered = 0;
2886 loop_info->unknown_constant_address_altered = 0;
2887 loop_info->store_mems = NULL_RTX;
2888 loop_info->first_loop_store_insn = NULL_RTX;
2889 loop_info->mems_idx = 0;
2890 loop_info->num_mem_sets = 0;
2892 for (insn = start; insn && !LABEL_P (insn);
2893 insn = PREV_INSN (insn))
2895 if (CALL_P (insn))
2897 loop_info->pre_header_has_call = 1;
2898 break;
2902 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
2903 insn = NEXT_INSN (insn))
2905 switch (GET_CODE (insn))
2907 case NOTE:
2908 if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
2910 ++level;
2911 /* Count number of loops contained in this one. */
2912 loop->level++;
2914 else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END)
2915 --level;
2916 break;
2918 case CALL_INSN:
2919 if (! CONST_OR_PURE_CALL_P (insn))
2921 loop_info->unknown_address_altered = 1;
2922 loop_info->has_nonconst_call = 1;
2924 else if (pure_call_p (insn))
2925 loop_info->has_nonconst_call = 1;
2926 loop_info->has_call = 1;
2927 if (can_throw_internal (insn))
2928 loop_info->has_multiple_exit_targets = 1;
2929 break;
2931 case JUMP_INSN:
2932 if (! loop_info->has_multiple_exit_targets)
2934 rtx set = pc_set (insn);
2936 if (set)
2938 rtx src = SET_SRC (set);
2939 rtx label1, label2;
2941 if (GET_CODE (src) == IF_THEN_ELSE)
2943 label1 = XEXP (src, 1);
2944 label2 = XEXP (src, 2);
2946 else
2948 label1 = src;
2949 label2 = NULL_RTX;
2954 if (label1 && label1 != pc_rtx)
2956 if (GET_CODE (label1) != LABEL_REF)
2958 /* Something tricky. */
2959 loop_info->has_multiple_exit_targets = 1;
2960 break;
2962 else if (XEXP (label1, 0) != exit_target
2963 && LABEL_OUTSIDE_LOOP_P (label1))
2965 /* A jump outside the current loop. */
2966 loop_info->has_multiple_exit_targets = 1;
2967 break;
2971 label1 = label2;
2972 label2 = NULL_RTX;
2974 while (label1);
2976 else
2978 /* A return, or something tricky. */
2979 loop_info->has_multiple_exit_targets = 1;
2982 /* Fall through. */
2984 case INSN:
2985 if (volatile_refs_p (PATTERN (insn)))
2986 loop_info->has_volatile = 1;
2988 if (JUMP_P (insn)
2989 && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2990 || GET_CODE (PATTERN (insn)) == ADDR_VEC))
2991 loop_info->has_tablejump = 1;
2993 note_stores (PATTERN (insn), note_addr_stored, loop_info);
2994 if (! loop_info->first_loop_store_insn && loop_info->store_mems)
2995 loop_info->first_loop_store_insn = insn;
2997 if (flag_non_call_exceptions && can_throw_internal (insn))
2998 loop_info->has_multiple_exit_targets = 1;
2999 break;
3001 default:
3002 break;
3006 /* Now, rescan the loop, setting up the LOOP_MEMS array. */
3007 if (/* An exception thrown by a called function might land us
3008 anywhere. */
3009 ! loop_info->has_nonconst_call
3010 /* We don't want loads for MEMs moved to a location before the
3011 one at which their stack memory becomes allocated. (Note
3012 that this is not a problem for malloc, etc., since those
3013 require actual function calls. */
3014 && ! current_function_calls_alloca
3015 /* There are ways to leave the loop other than falling off the
3016 end. */
3017 && ! loop_info->has_multiple_exit_targets)
3018 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
3019 insn = NEXT_INSN (insn))
3020 for_each_rtx (&insn, insert_loop_mem, loop_info);
3022 /* BLKmode MEMs are added to LOOP_STORE_MEM as necessary so
3023 that loop_invariant_p and load_mems can use true_dependence
3024 to determine what is really clobbered. */
3025 if (loop_info->unknown_address_altered)
3027 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
3029 loop_info->store_mems
3030 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
3032 if (loop_info->unknown_constant_address_altered)
3034 rtx mem = gen_rtx_MEM (BLKmode, const0_rtx);
3035 MEM_READONLY_P (mem) = 1;
3036 loop_info->store_mems
3037 = gen_rtx_EXPR_LIST (VOIDmode, mem, loop_info->store_mems);
3041 /* Invalidate all loops containing LABEL. */
3043 static void
3044 invalidate_loops_containing_label (rtx label)
3046 struct loop *loop;
3047 for (loop = uid_loop[INSN_UID (label)]; loop; loop = loop->outer)
3048 loop->invalid = 1;
3051 /* Scan the function looking for loops. Record the start and end of each loop.
3052 Also mark as invalid loops any loops that contain a setjmp or are branched
3053 to from outside the loop. */
3055 static void
3056 find_and_verify_loops (rtx f, struct loops *loops)
3058 rtx insn;
3059 rtx label;
3060 int num_loops;
3061 struct loop *current_loop;
3062 struct loop *next_loop;
3063 struct loop *loop;
3065 num_loops = loops->num;
3067 compute_luids (f, NULL_RTX, 0);
3069 /* If there are jumps to undefined labels,
3070 treat them as jumps out of any/all loops.
3071 This also avoids writing past end of tables when there are no loops. */
3072 uid_loop[0] = NULL;
3074 /* Find boundaries of loops, mark which loops are contained within
3075 loops, and invalidate loops that have setjmp. */
3077 num_loops = 0;
3078 current_loop = NULL;
3079 for (insn = f; insn; insn = NEXT_INSN (insn))
3081 if (NOTE_P (insn))
3082 switch (NOTE_LINE_NUMBER (insn))
3084 case NOTE_INSN_LOOP_BEG:
3085 next_loop = loops->array + num_loops;
3086 next_loop->num = num_loops;
3087 num_loops++;
3088 next_loop->start = insn;
3089 next_loop->outer = current_loop;
3090 current_loop = next_loop;
3091 break;
3093 case NOTE_INSN_LOOP_END:
3094 if (! current_loop)
3095 abort ();
3097 current_loop->end = insn;
3098 current_loop = current_loop->outer;
3099 break;
3101 default:
3102 break;
3105 if (CALL_P (insn)
3106 && find_reg_note (insn, REG_SETJMP, NULL))
3108 /* In this case, we must invalidate our current loop and any
3109 enclosing loop. */
3110 for (loop = current_loop; loop; loop = loop->outer)
3112 loop->invalid = 1;
3113 if (loop_dump_stream)
3114 fprintf (loop_dump_stream,
3115 "\nLoop at %d ignored due to setjmp.\n",
3116 INSN_UID (loop->start));
3120 /* Note that this will mark the NOTE_INSN_LOOP_END note as being in the
3121 enclosing loop, but this doesn't matter. */
3122 uid_loop[INSN_UID (insn)] = current_loop;
3125 /* Any loop containing a label used in an initializer must be invalidated,
3126 because it can be jumped into from anywhere. */
3127 for (label = forced_labels; label; label = XEXP (label, 1))
3128 invalidate_loops_containing_label (XEXP (label, 0));
3130 /* Any loop containing a label used for an exception handler must be
3131 invalidated, because it can be jumped into from anywhere. */
3132 for_each_eh_label (invalidate_loops_containing_label);
3134 /* Now scan all insn's in the function. If any JUMP_INSN branches into a
3135 loop that it is not contained within, that loop is marked invalid.
3136 If any INSN or CALL_INSN uses a label's address, then the loop containing
3137 that label is marked invalid, because it could be jumped into from
3138 anywhere.
3140 Also look for blocks of code ending in an unconditional branch that
3141 exits the loop. If such a block is surrounded by a conditional
3142 branch around the block, move the block elsewhere (see below) and
3143 invert the jump to point to the code block. This may eliminate a
3144 label in our loop and will simplify processing by both us and a
3145 possible second cse pass. */
3147 for (insn = f; insn; insn = NEXT_INSN (insn))
3148 if (INSN_P (insn))
3150 struct loop *this_loop = uid_loop[INSN_UID (insn)];
3152 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
3154 rtx note = find_reg_note (insn, REG_LABEL, NULL_RTX);
3155 if (note)
3156 invalidate_loops_containing_label (XEXP (note, 0));
3159 if (!JUMP_P (insn))
3160 continue;
3162 mark_loop_jump (PATTERN (insn), this_loop);
3164 /* See if this is an unconditional branch outside the loop. */
3165 if (this_loop
3166 && (GET_CODE (PATTERN (insn)) == RETURN
3167 || (any_uncondjump_p (insn)
3168 && onlyjump_p (insn)
3169 && (uid_loop[INSN_UID (JUMP_LABEL (insn))]
3170 != this_loop)))
3171 && get_max_uid () < max_uid_for_loop)
3173 rtx p;
3174 rtx our_next = next_real_insn (insn);
3175 rtx last_insn_to_move = NEXT_INSN (insn);
3176 struct loop *dest_loop;
3177 struct loop *outer_loop = NULL;
3179 /* Go backwards until we reach the start of the loop, a label,
3180 or a JUMP_INSN. */
3181 for (p = PREV_INSN (insn);
3182 !LABEL_P (p)
3183 && ! (NOTE_P (p)
3184 && NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG)
3185 && !JUMP_P (p);
3186 p = PREV_INSN (p))
3189 /* Check for the case where we have a jump to an inner nested
3190 loop, and do not perform the optimization in that case. */
3192 if (JUMP_LABEL (insn))
3194 dest_loop = uid_loop[INSN_UID (JUMP_LABEL (insn))];
3195 if (dest_loop)
3197 for (outer_loop = dest_loop; outer_loop;
3198 outer_loop = outer_loop->outer)
3199 if (outer_loop == this_loop)
3200 break;
3204 /* Make sure that the target of P is within the current loop. */
3206 if (JUMP_P (p) && JUMP_LABEL (p)
3207 && uid_loop[INSN_UID (JUMP_LABEL (p))] != this_loop)
3208 outer_loop = this_loop;
3210 /* If we stopped on a JUMP_INSN to the next insn after INSN,
3211 we have a block of code to try to move.
3213 We look backward and then forward from the target of INSN
3214 to find a BARRIER at the same loop depth as the target.
3215 If we find such a BARRIER, we make a new label for the start
3216 of the block, invert the jump in P and point it to that label,
3217 and move the block of code to the spot we found. */
3219 if (! outer_loop
3220 && JUMP_P (p)
3221 && JUMP_LABEL (p) != 0
3222 /* Just ignore jumps to labels that were never emitted.
3223 These always indicate compilation errors. */
3224 && INSN_UID (JUMP_LABEL (p)) != 0
3225 && any_condjump_p (p) && onlyjump_p (p)
3226 && next_real_insn (JUMP_LABEL (p)) == our_next
3227 /* If it's not safe to move the sequence, then we
3228 mustn't try. */
3229 && insns_safe_to_move_p (p, NEXT_INSN (insn),
3230 &last_insn_to_move))
3232 rtx target
3233 = JUMP_LABEL (insn) ? JUMP_LABEL (insn) : get_last_insn ();
3234 struct loop *target_loop = uid_loop[INSN_UID (target)];
3235 rtx loc, loc2;
3236 rtx tmp;
3238 /* Search for possible garbage past the conditional jumps
3239 and look for the last barrier. */
3240 for (tmp = last_insn_to_move;
3241 tmp && !LABEL_P (tmp); tmp = NEXT_INSN (tmp))
3242 if (BARRIER_P (tmp))
3243 last_insn_to_move = tmp;
3245 for (loc = target; loc; loc = PREV_INSN (loc))
3246 if (BARRIER_P (loc)
3247 /* Don't move things inside a tablejump. */
3248 && ((loc2 = next_nonnote_insn (loc)) == 0
3249 || !LABEL_P (loc2)
3250 || (loc2 = next_nonnote_insn (loc2)) == 0
3251 || !JUMP_P (loc2)
3252 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
3253 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
3254 && uid_loop[INSN_UID (loc)] == target_loop)
3255 break;
3257 if (loc == 0)
3258 for (loc = target; loc; loc = NEXT_INSN (loc))
3259 if (BARRIER_P (loc)
3260 /* Don't move things inside a tablejump. */
3261 && ((loc2 = next_nonnote_insn (loc)) == 0
3262 || !LABEL_P (loc2)
3263 || (loc2 = next_nonnote_insn (loc2)) == 0
3264 || !JUMP_P (loc2)
3265 || (GET_CODE (PATTERN (loc2)) != ADDR_VEC
3266 && GET_CODE (PATTERN (loc2)) != ADDR_DIFF_VEC))
3267 && uid_loop[INSN_UID (loc)] == target_loop)
3268 break;
3270 if (loc)
3272 rtx cond_label = JUMP_LABEL (p);
3273 rtx new_label = get_label_after (p);
3275 /* Ensure our label doesn't go away. */
3276 LABEL_NUSES (cond_label)++;
3278 /* Verify that uid_loop is large enough and that
3279 we can invert P. */
3280 if (invert_jump (p, new_label, 1))
3282 rtx q, r;
3284 /* If no suitable BARRIER was found, create a suitable
3285 one before TARGET. Since TARGET is a fall through
3286 path, we'll need to insert a jump around our block
3287 and add a BARRIER before TARGET.
3289 This creates an extra unconditional jump outside
3290 the loop. However, the benefits of removing rarely
3291 executed instructions from inside the loop usually
3292 outweighs the cost of the extra unconditional jump
3293 outside the loop. */
3294 if (loc == 0)
3296 rtx temp;
3298 temp = gen_jump (JUMP_LABEL (insn));
3299 temp = emit_jump_insn_before (temp, target);
3300 JUMP_LABEL (temp) = JUMP_LABEL (insn);
3301 LABEL_NUSES (JUMP_LABEL (insn))++;
3302 loc = emit_barrier_before (target);
3305 /* Include the BARRIER after INSN and copy the
3306 block after LOC. */
3307 if (squeeze_notes (&new_label, &last_insn_to_move))
3308 abort ();
3309 reorder_insns (new_label, last_insn_to_move, loc);
3311 /* All those insns are now in TARGET_LOOP. */
3312 for (q = new_label;
3313 q != NEXT_INSN (last_insn_to_move);
3314 q = NEXT_INSN (q))
3315 uid_loop[INSN_UID (q)] = target_loop;
3317 /* The label jumped to by INSN is no longer a loop
3318 exit. Unless INSN does not have a label (e.g.,
3319 it is a RETURN insn), search loop->exit_labels
3320 to find its label_ref, and remove it. Also turn
3321 off LABEL_OUTSIDE_LOOP_P bit. */
3322 if (JUMP_LABEL (insn))
3324 for (q = 0, r = this_loop->exit_labels;
3326 q = r, r = LABEL_NEXTREF (r))
3327 if (XEXP (r, 0) == JUMP_LABEL (insn))
3329 LABEL_OUTSIDE_LOOP_P (r) = 0;
3330 if (q)
3331 LABEL_NEXTREF (q) = LABEL_NEXTREF (r);
3332 else
3333 this_loop->exit_labels = LABEL_NEXTREF (r);
3334 break;
3337 for (loop = this_loop; loop && loop != target_loop;
3338 loop = loop->outer)
3339 loop->exit_count--;
3341 /* If we didn't find it, then something is
3342 wrong. */
3343 if (! r)
3344 abort ();
3347 /* P is now a jump outside the loop, so it must be put
3348 in loop->exit_labels, and marked as such.
3349 The easiest way to do this is to just call
3350 mark_loop_jump again for P. */
3351 mark_loop_jump (PATTERN (p), this_loop);
3353 /* If INSN now jumps to the insn after it,
3354 delete INSN. */
3355 if (JUMP_LABEL (insn) != 0
3356 && (next_real_insn (JUMP_LABEL (insn))
3357 == next_real_insn (insn)))
3358 delete_related_insns (insn);
3361 /* Continue the loop after where the conditional
3362 branch used to jump, since the only branch insn
3363 in the block (if it still remains) is an inter-loop
3364 branch and hence needs no processing. */
3365 insn = NEXT_INSN (cond_label);
3367 if (--LABEL_NUSES (cond_label) == 0)
3368 delete_related_insns (cond_label);
3370 /* This loop will be continued with NEXT_INSN (insn). */
3371 insn = PREV_INSN (insn);
3378 /* If any label in X jumps to a loop different from LOOP_NUM and any of the
3379 loops it is contained in, mark the target loop invalid.
3381 For speed, we assume that X is part of a pattern of a JUMP_INSN. */
3383 static void
3384 mark_loop_jump (rtx x, struct loop *loop)
3386 struct loop *dest_loop;
3387 struct loop *outer_loop;
3388 int i;
3390 switch (GET_CODE (x))
3392 case PC:
3393 case USE:
3394 case CLOBBER:
3395 case REG:
3396 case MEM:
3397 case CONST_INT:
3398 case CONST_DOUBLE:
3399 case RETURN:
3400 return;
3402 case CONST:
3403 /* There could be a label reference in here. */
3404 mark_loop_jump (XEXP (x, 0), loop);
3405 return;
3407 case PLUS:
3408 case MINUS:
3409 case MULT:
3410 mark_loop_jump (XEXP (x, 0), loop);
3411 mark_loop_jump (XEXP (x, 1), loop);
3412 return;
3414 case LO_SUM:
3415 /* This may refer to a LABEL_REF or SYMBOL_REF. */
3416 mark_loop_jump (XEXP (x, 1), loop);
3417 return;
3419 case SIGN_EXTEND:
3420 case ZERO_EXTEND:
3421 mark_loop_jump (XEXP (x, 0), loop);
3422 return;
3424 case LABEL_REF:
3425 dest_loop = uid_loop[INSN_UID (XEXP (x, 0))];
3427 /* Link together all labels that branch outside the loop. This
3428 is used by final_[bg]iv_value and the loop unrolling code. Also
3429 mark this LABEL_REF so we know that this branch should predict
3430 false. */
3432 /* A check to make sure the label is not in an inner nested loop,
3433 since this does not count as a loop exit. */
3434 if (dest_loop)
3436 for (outer_loop = dest_loop; outer_loop;
3437 outer_loop = outer_loop->outer)
3438 if (outer_loop == loop)
3439 break;
3441 else
3442 outer_loop = NULL;
3444 if (loop && ! outer_loop)
3446 LABEL_OUTSIDE_LOOP_P (x) = 1;
3447 LABEL_NEXTREF (x) = loop->exit_labels;
3448 loop->exit_labels = x;
3450 for (outer_loop = loop;
3451 outer_loop && outer_loop != dest_loop;
3452 outer_loop = outer_loop->outer)
3453 outer_loop->exit_count++;
3456 /* If this is inside a loop, but not in the current loop or one enclosed
3457 by it, it invalidates at least one loop. */
3459 if (! dest_loop)
3460 return;
3462 /* We must invalidate every nested loop containing the target of this
3463 label, except those that also contain the jump insn. */
3465 for (; dest_loop; dest_loop = dest_loop->outer)
3467 /* Stop when we reach a loop that also contains the jump insn. */
3468 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3469 if (dest_loop == outer_loop)
3470 return;
3472 /* If we get here, we know we need to invalidate a loop. */
3473 if (loop_dump_stream && ! dest_loop->invalid)
3474 fprintf (loop_dump_stream,
3475 "\nLoop at %d ignored due to multiple entry points.\n",
3476 INSN_UID (dest_loop->start));
3478 dest_loop->invalid = 1;
3480 return;
3482 case SET:
3483 /* If this is not setting pc, ignore. */
3484 if (SET_DEST (x) == pc_rtx)
3485 mark_loop_jump (SET_SRC (x), loop);
3486 return;
3488 case IF_THEN_ELSE:
3489 mark_loop_jump (XEXP (x, 1), loop);
3490 mark_loop_jump (XEXP (x, 2), loop);
3491 return;
3493 case PARALLEL:
3494 case ADDR_VEC:
3495 for (i = 0; i < XVECLEN (x, 0); i++)
3496 mark_loop_jump (XVECEXP (x, 0, i), loop);
3497 return;
3499 case ADDR_DIFF_VEC:
3500 for (i = 0; i < XVECLEN (x, 1); i++)
3501 mark_loop_jump (XVECEXP (x, 1, i), loop);
3502 return;
3504 default:
3505 /* Strictly speaking this is not a jump into the loop, only a possible
3506 jump out of the loop. However, we have no way to link the destination
3507 of this jump onto the list of exit labels. To be safe we mark this
3508 loop and any containing loops as invalid. */
3509 if (loop)
3511 for (outer_loop = loop; outer_loop; outer_loop = outer_loop->outer)
3513 if (loop_dump_stream && ! outer_loop->invalid)
3514 fprintf (loop_dump_stream,
3515 "\nLoop at %d ignored due to unknown exit jump.\n",
3516 INSN_UID (outer_loop->start));
3517 outer_loop->invalid = 1;
3520 return;
3524 /* Return nonzero if there is a label in the range from
3525 insn INSN to and including the insn whose luid is END
3526 INSN must have an assigned luid (i.e., it must not have
3527 been previously created by loop.c). */
3529 static int
3530 labels_in_range_p (rtx insn, int end)
3532 while (insn && INSN_LUID (insn) <= end)
3534 if (LABEL_P (insn))
3535 return 1;
3536 insn = NEXT_INSN (insn);
3539 return 0;
3542 /* Record that a memory reference X is being set. */
3544 static void
3545 note_addr_stored (rtx x, rtx y ATTRIBUTE_UNUSED,
3546 void *data ATTRIBUTE_UNUSED)
3548 struct loop_info *loop_info = data;
3550 if (x == 0 || !MEM_P (x))
3551 return;
3553 /* Count number of memory writes.
3554 This affects heuristics in strength_reduce. */
3555 loop_info->num_mem_sets++;
3557 /* BLKmode MEM means all memory is clobbered. */
3558 if (GET_MODE (x) == BLKmode)
3560 if (MEM_READONLY_P (x))
3561 loop_info->unknown_constant_address_altered = 1;
3562 else
3563 loop_info->unknown_address_altered = 1;
3565 return;
3568 loop_info->store_mems = gen_rtx_EXPR_LIST (VOIDmode, x,
3569 loop_info->store_mems);
3572 /* X is a value modified by an INSN that references a biv inside a loop
3573 exit test (i.e., X is somehow related to the value of the biv). If X
3574 is a pseudo that is used more than once, then the biv is (effectively)
3575 used more than once. DATA is a pointer to a loop_regs structure. */
3577 static void
3578 note_set_pseudo_multiple_uses (rtx x, rtx y ATTRIBUTE_UNUSED, void *data)
3580 struct loop_regs *regs = (struct loop_regs *) data;
3582 if (x == 0)
3583 return;
3585 while (GET_CODE (x) == STRICT_LOW_PART
3586 || GET_CODE (x) == SIGN_EXTRACT
3587 || GET_CODE (x) == ZERO_EXTRACT
3588 || GET_CODE (x) == SUBREG)
3589 x = XEXP (x, 0);
3591 if (!REG_P (x) || REGNO (x) < FIRST_PSEUDO_REGISTER)
3592 return;
3594 /* If we do not have usage information, or if we know the register
3595 is used more than once, note that fact for check_dbra_loop. */
3596 if (REGNO (x) >= max_reg_before_loop
3597 || ! regs->array[REGNO (x)].single_usage
3598 || regs->array[REGNO (x)].single_usage == const0_rtx)
3599 regs->multiple_uses = 1;
3602 /* Return nonzero if the rtx X is invariant over the current loop.
3604 The value is 2 if we refer to something only conditionally invariant.
3606 A memory ref is invariant if it is not volatile and does not conflict
3607 with anything stored in `loop_info->store_mems'. */
3609 static int
3610 loop_invariant_p (const struct loop *loop, rtx x)
3612 struct loop_info *loop_info = LOOP_INFO (loop);
3613 struct loop_regs *regs = LOOP_REGS (loop);
3614 int i;
3615 enum rtx_code code;
3616 const char *fmt;
3617 int conditional = 0;
3618 rtx mem_list_entry;
3620 if (x == 0)
3621 return 1;
3622 code = GET_CODE (x);
3623 switch (code)
3625 case CONST_INT:
3626 case CONST_DOUBLE:
3627 case SYMBOL_REF:
3628 case CONST:
3629 return 1;
3631 case LABEL_REF:
3632 return 1;
3634 case PC:
3635 case CC0:
3636 case UNSPEC_VOLATILE:
3637 return 0;
3639 case REG:
3640 if ((x == frame_pointer_rtx || x == hard_frame_pointer_rtx
3641 || x == arg_pointer_rtx || x == pic_offset_table_rtx)
3642 && ! current_function_has_nonlocal_goto)
3643 return 1;
3645 if (LOOP_INFO (loop)->has_call
3646 && REGNO (x) < FIRST_PSEUDO_REGISTER && call_used_regs[REGNO (x)])
3647 return 0;
3649 /* Out-of-range regs can occur when we are called from unrolling.
3650 These registers created by the unroller are set in the loop,
3651 hence are never invariant.
3652 Other out-of-range regs can be generated by load_mems; those that
3653 are written to in the loop are not invariant, while those that are
3654 not written to are invariant. It would be easy for load_mems
3655 to set n_times_set correctly for these registers, however, there
3656 is no easy way to distinguish them from registers created by the
3657 unroller. */
3659 if (REGNO (x) >= (unsigned) regs->num)
3660 return 0;
3662 if (regs->array[REGNO (x)].set_in_loop < 0)
3663 return 2;
3665 return regs->array[REGNO (x)].set_in_loop == 0;
3667 case MEM:
3668 /* Volatile memory references must be rejected. Do this before
3669 checking for read-only items, so that volatile read-only items
3670 will be rejected also. */
3671 if (MEM_VOLATILE_P (x))
3672 return 0;
3674 /* See if there is any dependence between a store and this load. */
3675 mem_list_entry = loop_info->store_mems;
3676 while (mem_list_entry)
3678 if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
3679 x, rtx_varies_p))
3680 return 0;
3682 mem_list_entry = XEXP (mem_list_entry, 1);
3685 /* It's not invalidated by a store in memory
3686 but we must still verify the address is invariant. */
3687 break;
3689 case ASM_OPERANDS:
3690 /* Don't mess with insns declared volatile. */
3691 if (MEM_VOLATILE_P (x))
3692 return 0;
3693 break;
3695 default:
3696 break;
3699 fmt = GET_RTX_FORMAT (code);
3700 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3702 if (fmt[i] == 'e')
3704 int tem = loop_invariant_p (loop, XEXP (x, i));
3705 if (tem == 0)
3706 return 0;
3707 if (tem == 2)
3708 conditional = 1;
3710 else if (fmt[i] == 'E')
3712 int j;
3713 for (j = 0; j < XVECLEN (x, i); j++)
3715 int tem = loop_invariant_p (loop, XVECEXP (x, i, j));
3716 if (tem == 0)
3717 return 0;
3718 if (tem == 2)
3719 conditional = 1;
3725 return 1 + conditional;
3728 /* Return nonzero if all the insns in the loop that set REG
3729 are INSN and the immediately following insns,
3730 and if each of those insns sets REG in an invariant way
3731 (not counting uses of REG in them).
3733 The value is 2 if some of these insns are only conditionally invariant.
3735 We assume that INSN itself is the first set of REG
3736 and that its source is invariant. */
3738 static int
3739 consec_sets_invariant_p (const struct loop *loop, rtx reg, int n_sets,
3740 rtx insn)
3742 struct loop_regs *regs = LOOP_REGS (loop);
3743 rtx p = insn;
3744 unsigned int regno = REGNO (reg);
3745 rtx temp;
3746 /* Number of sets we have to insist on finding after INSN. */
3747 int count = n_sets - 1;
3748 int old = regs->array[regno].set_in_loop;
3749 int value = 0;
3750 int this;
3752 /* If N_SETS hit the limit, we can't rely on its value. */
3753 if (n_sets == 127)
3754 return 0;
3756 regs->array[regno].set_in_loop = 0;
3758 while (count > 0)
3760 enum rtx_code code;
3761 rtx set;
3763 p = NEXT_INSN (p);
3764 code = GET_CODE (p);
3766 /* If library call, skip to end of it. */
3767 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
3768 p = XEXP (temp, 0);
3770 this = 0;
3771 if (code == INSN
3772 && (set = single_set (p))
3773 && REG_P (SET_DEST (set))
3774 && REGNO (SET_DEST (set)) == regno)
3776 this = loop_invariant_p (loop, SET_SRC (set));
3777 if (this != 0)
3778 value |= this;
3779 else if ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX)))
3781 /* If this is a libcall, then any invariant REG_EQUAL note is OK.
3782 If this is an ordinary insn, then only CONSTANT_P REG_EQUAL
3783 notes are OK. */
3784 this = (CONSTANT_P (XEXP (temp, 0))
3785 || (find_reg_note (p, REG_RETVAL, NULL_RTX)
3786 && loop_invariant_p (loop, XEXP (temp, 0))));
3787 if (this != 0)
3788 value |= this;
3791 if (this != 0)
3792 count--;
3793 else if (code != NOTE)
3795 regs->array[regno].set_in_loop = old;
3796 return 0;
3800 regs->array[regno].set_in_loop = old;
3801 /* If loop_invariant_p ever returned 2, we return 2. */
3802 return 1 + (value & 2);
3805 /* Look at all uses (not sets) of registers in X. For each, if it is
3806 the single use, set USAGE[REGNO] to INSN; if there was a previous use in
3807 a different insn, set USAGE[REGNO] to const0_rtx. */
3809 static void
3810 find_single_use_in_loop (struct loop_regs *regs, rtx insn, rtx x)
3812 enum rtx_code code = GET_CODE (x);
3813 const char *fmt = GET_RTX_FORMAT (code);
3814 int i, j;
3816 if (code == REG)
3817 regs->array[REGNO (x)].single_usage
3818 = (regs->array[REGNO (x)].single_usage != 0
3819 && regs->array[REGNO (x)].single_usage != insn)
3820 ? const0_rtx : insn;
3822 else if (code == SET)
3824 /* Don't count SET_DEST if it is a REG; otherwise count things
3825 in SET_DEST because if a register is partially modified, it won't
3826 show up as a potential movable so we don't care how USAGE is set
3827 for it. */
3828 if (!REG_P (SET_DEST (x)))
3829 find_single_use_in_loop (regs, insn, SET_DEST (x));
3830 find_single_use_in_loop (regs, insn, SET_SRC (x));
3832 else
3833 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3835 if (fmt[i] == 'e' && XEXP (x, i) != 0)
3836 find_single_use_in_loop (regs, insn, XEXP (x, i));
3837 else if (fmt[i] == 'E')
3838 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3839 find_single_use_in_loop (regs, insn, XVECEXP (x, i, j));
3843 /* Count and record any set in X which is contained in INSN. Update
3844 REGS->array[I].MAY_NOT_OPTIMIZE and LAST_SET for any register I set
3845 in X. */
3847 static void
3848 count_one_set (struct loop_regs *regs, rtx insn, rtx x, rtx *last_set)
3850 if (GET_CODE (x) == CLOBBER && REG_P (XEXP (x, 0)))
3851 /* Don't move a reg that has an explicit clobber.
3852 It's not worth the pain to try to do it correctly. */
3853 regs->array[REGNO (XEXP (x, 0))].may_not_optimize = 1;
3855 if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
3857 rtx dest = SET_DEST (x);
3858 while (GET_CODE (dest) == SUBREG
3859 || GET_CODE (dest) == ZERO_EXTRACT
3860 || GET_CODE (dest) == STRICT_LOW_PART)
3861 dest = XEXP (dest, 0);
3862 if (REG_P (dest))
3864 int i;
3865 int regno = REGNO (dest);
3866 for (i = 0; i < LOOP_REGNO_NREGS (regno, dest); i++)
3868 /* If this is the first setting of this reg
3869 in current basic block, and it was set before,
3870 it must be set in two basic blocks, so it cannot
3871 be moved out of the loop. */
3872 if (regs->array[regno].set_in_loop > 0
3873 && last_set[regno] == 0)
3874 regs->array[regno+i].may_not_optimize = 1;
3875 /* If this is not first setting in current basic block,
3876 see if reg was used in between previous one and this.
3877 If so, neither one can be moved. */
3878 if (last_set[regno] != 0
3879 && reg_used_between_p (dest, last_set[regno], insn))
3880 regs->array[regno+i].may_not_optimize = 1;
3881 if (regs->array[regno+i].set_in_loop < 127)
3882 ++regs->array[regno+i].set_in_loop;
3883 last_set[regno+i] = insn;
3889 /* Given a loop that is bounded by LOOP->START and LOOP->END and that
3890 is entered at LOOP->SCAN_START, return 1 if the register set in SET
3891 contained in insn INSN is used by any insn that precedes INSN in
3892 cyclic order starting from the loop entry point.
3894 We don't want to use INSN_LUID here because if we restrict INSN to those
3895 that have a valid INSN_LUID, it means we cannot move an invariant out
3896 from an inner loop past two loops. */
3898 static int
3899 loop_reg_used_before_p (const struct loop *loop, rtx set, rtx insn)
3901 rtx reg = SET_DEST (set);
3902 rtx p;
3904 /* Scan forward checking for register usage. If we hit INSN, we
3905 are done. Otherwise, if we hit LOOP->END, wrap around to LOOP->START. */
3906 for (p = loop->scan_start; p != insn; p = NEXT_INSN (p))
3908 if (INSN_P (p) && reg_overlap_mentioned_p (reg, PATTERN (p)))
3909 return 1;
3911 if (p == loop->end)
3912 p = loop->start;
3915 return 0;
3919 /* Information we collect about arrays that we might want to prefetch. */
3920 struct prefetch_info
3922 struct iv_class *class; /* Class this prefetch is based on. */
3923 struct induction *giv; /* GIV this prefetch is based on. */
3924 rtx base_address; /* Start prefetching from this address plus
3925 index. */
3926 HOST_WIDE_INT index;
3927 HOST_WIDE_INT stride; /* Prefetch stride in bytes in each
3928 iteration. */
3929 unsigned int bytes_accessed; /* Sum of sizes of all accesses to this
3930 prefetch area in one iteration. */
3931 unsigned int total_bytes; /* Total bytes loop will access in this block.
3932 This is set only for loops with known
3933 iteration counts and is 0xffffffff
3934 otherwise. */
3935 int prefetch_in_loop; /* Number of prefetch insns in loop. */
3936 int prefetch_before_loop; /* Number of prefetch insns before loop. */
3937 unsigned int write : 1; /* 1 for read/write prefetches. */
3940 /* Data used by check_store function. */
3941 struct check_store_data
3943 rtx mem_address;
3944 int mem_write;
3947 static void check_store (rtx, rtx, void *);
3948 static void emit_prefetch_instructions (struct loop *);
3949 static int rtx_equal_for_prefetch_p (rtx, rtx);
3951 /* Set mem_write when mem_address is found. Used as callback to
3952 note_stores. */
3953 static void
3954 check_store (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
3956 struct check_store_data *d = (struct check_store_data *) data;
3958 if ((MEM_P (x)) && rtx_equal_p (d->mem_address, XEXP (x, 0)))
3959 d->mem_write = 1;
3962 /* Like rtx_equal_p, but attempts to swap commutative operands. This is
3963 important to get some addresses combined. Later more sophisticated
3964 transformations can be added when necessary.
3966 ??? Same trick with swapping operand is done at several other places.
3967 It can be nice to develop some common way to handle this. */
3969 static int
3970 rtx_equal_for_prefetch_p (rtx x, rtx y)
3972 int i;
3973 int j;
3974 enum rtx_code code = GET_CODE (x);
3975 const char *fmt;
3977 if (x == y)
3978 return 1;
3979 if (code != GET_CODE (y))
3980 return 0;
3982 if (COMMUTATIVE_ARITH_P (x))
3984 return ((rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 0))
3985 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 1)))
3986 || (rtx_equal_for_prefetch_p (XEXP (x, 0), XEXP (y, 1))
3987 && rtx_equal_for_prefetch_p (XEXP (x, 1), XEXP (y, 0))));
3990 /* Compare the elements. If any pair of corresponding elements fails to
3991 match, return 0 for the whole thing. */
3993 fmt = GET_RTX_FORMAT (code);
3994 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3996 switch (fmt[i])
3998 case 'w':
3999 if (XWINT (x, i) != XWINT (y, i))
4000 return 0;
4001 break;
4003 case 'i':
4004 if (XINT (x, i) != XINT (y, i))
4005 return 0;
4006 break;
4008 case 'E':
4009 /* Two vectors must have the same length. */
4010 if (XVECLEN (x, i) != XVECLEN (y, i))
4011 return 0;
4013 /* And the corresponding elements must match. */
4014 for (j = 0; j < XVECLEN (x, i); j++)
4015 if (rtx_equal_for_prefetch_p (XVECEXP (x, i, j),
4016 XVECEXP (y, i, j)) == 0)
4017 return 0;
4018 break;
4020 case 'e':
4021 if (rtx_equal_for_prefetch_p (XEXP (x, i), XEXP (y, i)) == 0)
4022 return 0;
4023 break;
4025 case 's':
4026 if (strcmp (XSTR (x, i), XSTR (y, i)))
4027 return 0;
4028 break;
4030 case 'u':
4031 /* These are just backpointers, so they don't matter. */
4032 break;
4034 case '0':
4035 break;
4037 /* It is believed that rtx's at this level will never
4038 contain anything but integers and other rtx's,
4039 except for within LABEL_REFs and SYMBOL_REFs. */
4040 default:
4041 abort ();
4044 return 1;
4047 /* Remove constant addition value from the expression X (when present)
4048 and return it. */
4050 static HOST_WIDE_INT
4051 remove_constant_addition (rtx *x)
4053 HOST_WIDE_INT addval = 0;
4054 rtx exp = *x;
4056 /* Avoid clobbering a shared CONST expression. */
4057 if (GET_CODE (exp) == CONST)
4059 if (GET_CODE (XEXP (exp, 0)) == PLUS
4060 && GET_CODE (XEXP (XEXP (exp, 0), 0)) == SYMBOL_REF
4061 && GET_CODE (XEXP (XEXP (exp, 0), 1)) == CONST_INT)
4063 *x = XEXP (XEXP (exp, 0), 0);
4064 return INTVAL (XEXP (XEXP (exp, 0), 1));
4066 return 0;
4069 if (GET_CODE (exp) == CONST_INT)
4071 addval = INTVAL (exp);
4072 *x = const0_rtx;
4075 /* For plus expression recurse on ourself. */
4076 else if (GET_CODE (exp) == PLUS)
4078 addval += remove_constant_addition (&XEXP (exp, 0));
4079 addval += remove_constant_addition (&XEXP (exp, 1));
4081 /* In case our parameter was constant, remove extra zero from the
4082 expression. */
4083 if (XEXP (exp, 0) == const0_rtx)
4084 *x = XEXP (exp, 1);
4085 else if (XEXP (exp, 1) == const0_rtx)
4086 *x = XEXP (exp, 0);
4089 return addval;
4092 /* Attempt to identify accesses to arrays that are most likely to cause cache
4093 misses, and emit prefetch instructions a few prefetch blocks forward.
4095 To detect the arrays we use the GIV information that was collected by the
4096 strength reduction pass.
4098 The prefetch instructions are generated after the GIV information is done
4099 and before the strength reduction process. The new GIVs are injected into
4100 the strength reduction tables, so the prefetch addresses are optimized as
4101 well.
4103 GIVs are split into base address, stride, and constant addition values.
4104 GIVs with the same address, stride and close addition values are combined
4105 into a single prefetch. Also writes to GIVs are detected, so that prefetch
4106 for write instructions can be used for the block we write to, on machines
4107 that support write prefetches.
4109 Several heuristics are used to determine when to prefetch. They are
4110 controlled by defined symbols that can be overridden for each target. */
4112 static void
4113 emit_prefetch_instructions (struct loop *loop)
4115 int num_prefetches = 0;
4116 int num_real_prefetches = 0;
4117 int num_real_write_prefetches = 0;
4118 int num_prefetches_before = 0;
4119 int num_write_prefetches_before = 0;
4120 int ahead = 0;
4121 int i;
4122 struct iv_class *bl;
4123 struct induction *iv;
4124 struct prefetch_info info[MAX_PREFETCHES];
4125 struct loop_ivs *ivs = LOOP_IVS (loop);
4127 if (!HAVE_prefetch || PREFETCH_BLOCK == 0)
4128 return;
4130 /* Consider only loops w/o calls. When a call is done, the loop is probably
4131 slow enough to read the memory. */
4132 if (PREFETCH_NO_CALL && LOOP_INFO (loop)->has_call)
4134 if (loop_dump_stream)
4135 fprintf (loop_dump_stream, "Prefetch: ignoring loop: has call.\n");
4137 return;
4140 /* Don't prefetch in loops known to have few iterations. */
4141 if (PREFETCH_NO_LOW_LOOPCNT
4142 && LOOP_INFO (loop)->n_iterations
4143 && LOOP_INFO (loop)->n_iterations <= PREFETCH_LOW_LOOPCNT)
4145 if (loop_dump_stream)
4146 fprintf (loop_dump_stream,
4147 "Prefetch: ignoring loop: not enough iterations.\n");
4148 return;
4151 /* Search all induction variables and pick those interesting for the prefetch
4152 machinery. */
4153 for (bl = ivs->list; bl; bl = bl->next)
4155 struct induction *biv = bl->biv, *biv1;
4156 int basestride = 0;
4158 biv1 = biv;
4160 /* Expect all BIVs to be executed in each iteration. This makes our
4161 analysis more conservative. */
4162 while (biv1)
4164 /* Discard non-constant additions that we can't handle well yet, and
4165 BIVs that are executed multiple times; such BIVs ought to be
4166 handled in the nested loop. We accept not_every_iteration BIVs,
4167 since these only result in larger strides and make our
4168 heuristics more conservative. */
4169 if (GET_CODE (biv->add_val) != CONST_INT)
4171 if (loop_dump_stream)
4173 fprintf (loop_dump_stream,
4174 "Prefetch: ignoring biv %d: non-constant addition at insn %d:",
4175 REGNO (biv->src_reg), INSN_UID (biv->insn));
4176 print_rtl (loop_dump_stream, biv->add_val);
4177 fprintf (loop_dump_stream, "\n");
4179 break;
4182 if (biv->maybe_multiple)
4184 if (loop_dump_stream)
4186 fprintf (loop_dump_stream,
4187 "Prefetch: ignoring biv %d: maybe_multiple at insn %i:",
4188 REGNO (biv->src_reg), INSN_UID (biv->insn));
4189 print_rtl (loop_dump_stream, biv->add_val);
4190 fprintf (loop_dump_stream, "\n");
4192 break;
4195 basestride += INTVAL (biv1->add_val);
4196 biv1 = biv1->next_iv;
4199 if (biv1 || !basestride)
4200 continue;
4202 for (iv = bl->giv; iv; iv = iv->next_iv)
4204 rtx address;
4205 rtx temp;
4206 HOST_WIDE_INT index = 0;
4207 int add = 1;
4208 HOST_WIDE_INT stride = 0;
4209 int stride_sign = 1;
4210 struct check_store_data d;
4211 const char *ignore_reason = NULL;
4212 int size = GET_MODE_SIZE (GET_MODE (iv));
4214 /* See whether an induction variable is interesting to us and if
4215 not, report the reason. */
4216 if (iv->giv_type != DEST_ADDR)
4217 ignore_reason = "giv is not a destination address";
4219 /* We are interested only in constant stride memory references
4220 in order to be able to compute density easily. */
4221 else if (GET_CODE (iv->mult_val) != CONST_INT)
4222 ignore_reason = "stride is not constant";
4224 else
4226 stride = INTVAL (iv->mult_val) * basestride;
4227 if (stride < 0)
4229 stride = -stride;
4230 stride_sign = -1;
4233 /* On some targets, reversed order prefetches are not
4234 worthwhile. */
4235 if (PREFETCH_NO_REVERSE_ORDER && stride_sign < 0)
4236 ignore_reason = "reversed order stride";
4238 /* Prefetch of accesses with an extreme stride might not be
4239 worthwhile, either. */
4240 else if (PREFETCH_NO_EXTREME_STRIDE
4241 && stride > PREFETCH_EXTREME_STRIDE)
4242 ignore_reason = "extreme stride";
4244 /* Ignore GIVs with varying add values; we can't predict the
4245 value for the next iteration. */
4246 else if (!loop_invariant_p (loop, iv->add_val))
4247 ignore_reason = "giv has varying add value";
4249 /* Ignore GIVs in the nested loops; they ought to have been
4250 handled already. */
4251 else if (iv->maybe_multiple)
4252 ignore_reason = "giv is in nested loop";
4255 if (ignore_reason != NULL)
4257 if (loop_dump_stream)
4258 fprintf (loop_dump_stream,
4259 "Prefetch: ignoring giv at %d: %s.\n",
4260 INSN_UID (iv->insn), ignore_reason);
4261 continue;
4264 /* Determine the pointer to the basic array we are examining. It is
4265 the sum of the BIV's initial value and the GIV's add_val. */
4266 address = copy_rtx (iv->add_val);
4267 temp = copy_rtx (bl->initial_value);
4269 address = simplify_gen_binary (PLUS, Pmode, temp, address);
4270 index = remove_constant_addition (&address);
4272 d.mem_write = 0;
4273 d.mem_address = *iv->location;
4275 /* When the GIV is not always executed, we might be better off by
4276 not dirtying the cache pages. */
4277 if (PREFETCH_CONDITIONAL || iv->always_executed)
4278 note_stores (PATTERN (iv->insn), check_store, &d);
4279 else
4281 if (loop_dump_stream)
4282 fprintf (loop_dump_stream, "Prefetch: Ignoring giv at %d: %s\n",
4283 INSN_UID (iv->insn), "in conditional code.");
4284 continue;
4287 /* Attempt to find another prefetch to the same array and see if we
4288 can merge this one. */
4289 for (i = 0; i < num_prefetches; i++)
4290 if (rtx_equal_for_prefetch_p (address, info[i].base_address)
4291 && stride == info[i].stride)
4293 /* In case both access same array (same location
4294 just with small difference in constant indexes), merge
4295 the prefetches. Just do the later and the earlier will
4296 get prefetched from previous iteration.
4297 The artificial threshold should not be too small,
4298 but also not bigger than small portion of memory usually
4299 traversed by single loop. */
4300 if (index >= info[i].index
4301 && index - info[i].index < PREFETCH_EXTREME_DIFFERENCE)
4303 info[i].write |= d.mem_write;
4304 info[i].bytes_accessed += size;
4305 info[i].index = index;
4306 info[i].giv = iv;
4307 info[i].class = bl;
4308 info[num_prefetches].base_address = address;
4309 add = 0;
4310 break;
4313 if (index < info[i].index
4314 && info[i].index - index < PREFETCH_EXTREME_DIFFERENCE)
4316 info[i].write |= d.mem_write;
4317 info[i].bytes_accessed += size;
4318 add = 0;
4319 break;
4323 /* Merging failed. */
4324 if (add)
4326 info[num_prefetches].giv = iv;
4327 info[num_prefetches].class = bl;
4328 info[num_prefetches].index = index;
4329 info[num_prefetches].stride = stride;
4330 info[num_prefetches].base_address = address;
4331 info[num_prefetches].write = d.mem_write;
4332 info[num_prefetches].bytes_accessed = size;
4333 num_prefetches++;
4334 if (num_prefetches >= MAX_PREFETCHES)
4336 if (loop_dump_stream)
4337 fprintf (loop_dump_stream,
4338 "Maximal number of prefetches exceeded.\n");
4339 return;
4345 for (i = 0; i < num_prefetches; i++)
4347 int density;
4349 /* Attempt to calculate the total number of bytes fetched by all
4350 iterations of the loop. Avoid overflow. */
4351 if (LOOP_INFO (loop)->n_iterations
4352 && ((unsigned HOST_WIDE_INT) (0xffffffff / info[i].stride)
4353 >= LOOP_INFO (loop)->n_iterations))
4354 info[i].total_bytes = info[i].stride * LOOP_INFO (loop)->n_iterations;
4355 else
4356 info[i].total_bytes = 0xffffffff;
4358 density = info[i].bytes_accessed * 100 / info[i].stride;
4360 /* Prefetch might be worthwhile only when the loads/stores are dense. */
4361 if (PREFETCH_ONLY_DENSE_MEM)
4362 if (density * 256 > PREFETCH_DENSE_MEM * 100
4363 && (info[i].total_bytes / PREFETCH_BLOCK
4364 >= PREFETCH_BLOCKS_BEFORE_LOOP_MIN))
4366 info[i].prefetch_before_loop = 1;
4367 info[i].prefetch_in_loop
4368 = (info[i].total_bytes / PREFETCH_BLOCK
4369 > PREFETCH_BLOCKS_BEFORE_LOOP_MAX);
4371 else
4373 info[i].prefetch_in_loop = 0, info[i].prefetch_before_loop = 0;
4374 if (loop_dump_stream)
4375 fprintf (loop_dump_stream,
4376 "Prefetch: ignoring giv at %d: %d%% density is too low.\n",
4377 INSN_UID (info[i].giv->insn), density);
4379 else
4380 info[i].prefetch_in_loop = 1, info[i].prefetch_before_loop = 1;
4382 /* Find how many prefetch instructions we'll use within the loop. */
4383 if (info[i].prefetch_in_loop != 0)
4385 info[i].prefetch_in_loop = ((info[i].stride + PREFETCH_BLOCK - 1)
4386 / PREFETCH_BLOCK);
4387 num_real_prefetches += info[i].prefetch_in_loop;
4388 if (info[i].write)
4389 num_real_write_prefetches += info[i].prefetch_in_loop;
4393 /* Determine how many iterations ahead to prefetch within the loop, based
4394 on how many prefetches we currently expect to do within the loop. */
4395 if (num_real_prefetches != 0)
4397 if ((ahead = SIMULTANEOUS_PREFETCHES / num_real_prefetches) == 0)
4399 if (loop_dump_stream)
4400 fprintf (loop_dump_stream,
4401 "Prefetch: ignoring prefetches within loop: ahead is zero; %d < %d\n",
4402 SIMULTANEOUS_PREFETCHES, num_real_prefetches);
4403 num_real_prefetches = 0, num_real_write_prefetches = 0;
4406 /* We'll also use AHEAD to determine how many prefetch instructions to
4407 emit before a loop, so don't leave it zero. */
4408 if (ahead == 0)
4409 ahead = PREFETCH_BLOCKS_BEFORE_LOOP_MAX;
4411 for (i = 0; i < num_prefetches; i++)
4413 /* Update if we've decided not to prefetch anything within the loop. */
4414 if (num_real_prefetches == 0)
4415 info[i].prefetch_in_loop = 0;
4417 /* Find how many prefetch instructions we'll use before the loop. */
4418 if (info[i].prefetch_before_loop != 0)
4420 int n = info[i].total_bytes / PREFETCH_BLOCK;
4421 if (n > ahead)
4422 n = ahead;
4423 info[i].prefetch_before_loop = n;
4424 num_prefetches_before += n;
4425 if (info[i].write)
4426 num_write_prefetches_before += n;
4429 if (loop_dump_stream)
4431 if (info[i].prefetch_in_loop == 0
4432 && info[i].prefetch_before_loop == 0)
4433 continue;
4434 fprintf (loop_dump_stream, "Prefetch insn: %d",
4435 INSN_UID (info[i].giv->insn));
4436 fprintf (loop_dump_stream,
4437 "; in loop: %d; before: %d; %s\n",
4438 info[i].prefetch_in_loop,
4439 info[i].prefetch_before_loop,
4440 info[i].write ? "read/write" : "read only");
4441 fprintf (loop_dump_stream,
4442 " density: %d%%; bytes_accessed: %u; total_bytes: %u\n",
4443 (int) (info[i].bytes_accessed * 100 / info[i].stride),
4444 info[i].bytes_accessed, info[i].total_bytes);
4445 fprintf (loop_dump_stream, " index: " HOST_WIDE_INT_PRINT_DEC
4446 "; stride: " HOST_WIDE_INT_PRINT_DEC "; address: ",
4447 info[i].index, info[i].stride);
4448 print_rtl (loop_dump_stream, info[i].base_address);
4449 fprintf (loop_dump_stream, "\n");
4453 if (num_real_prefetches + num_prefetches_before > 0)
4455 /* Record that this loop uses prefetch instructions. */
4456 LOOP_INFO (loop)->has_prefetch = 1;
4458 if (loop_dump_stream)
4460 fprintf (loop_dump_stream, "Real prefetches needed within loop: %d (write: %d)\n",
4461 num_real_prefetches, num_real_write_prefetches);
4462 fprintf (loop_dump_stream, "Real prefetches needed before loop: %d (write: %d)\n",
4463 num_prefetches_before, num_write_prefetches_before);
4467 for (i = 0; i < num_prefetches; i++)
4469 int y;
4471 for (y = 0; y < info[i].prefetch_in_loop; y++)
4473 rtx loc = copy_rtx (*info[i].giv->location);
4474 rtx insn;
4475 int bytes_ahead = PREFETCH_BLOCK * (ahead + y);
4476 rtx before_insn = info[i].giv->insn;
4477 rtx prev_insn = PREV_INSN (info[i].giv->insn);
4478 rtx seq;
4480 /* We can save some effort by offsetting the address on
4481 architectures with offsettable memory references. */
4482 if (offsettable_address_p (0, VOIDmode, loc))
4483 loc = plus_constant (loc, bytes_ahead);
4484 else
4486 rtx reg = gen_reg_rtx (Pmode);
4487 loop_iv_add_mult_emit_before (loop, loc, const1_rtx,
4488 GEN_INT (bytes_ahead), reg,
4489 0, before_insn);
4490 loc = reg;
4493 start_sequence ();
4494 /* Make sure the address operand is valid for prefetch. */
4495 if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
4496 (loc, insn_data[(int)CODE_FOR_prefetch].operand[0].mode))
4497 loc = force_reg (Pmode, loc);
4498 emit_insn (gen_prefetch (loc, GEN_INT (info[i].write),
4499 GEN_INT (3)));
4500 seq = get_insns ();
4501 end_sequence ();
4502 emit_insn_before (seq, before_insn);
4504 /* Check all insns emitted and record the new GIV
4505 information. */
4506 insn = NEXT_INSN (prev_insn);
4507 while (insn != before_insn)
4509 insn = check_insn_for_givs (loop, insn,
4510 info[i].giv->always_executed,
4511 info[i].giv->maybe_multiple);
4512 insn = NEXT_INSN (insn);
4516 if (PREFETCH_BEFORE_LOOP)
4518 /* Emit insns before the loop to fetch the first cache lines or,
4519 if we're not prefetching within the loop, everything we expect
4520 to need. */
4521 for (y = 0; y < info[i].prefetch_before_loop; y++)
4523 rtx reg = gen_reg_rtx (Pmode);
4524 rtx loop_start = loop->start;
4525 rtx init_val = info[i].class->initial_value;
4526 rtx add_val = simplify_gen_binary (PLUS, Pmode,
4527 info[i].giv->add_val,
4528 GEN_INT (y * PREFETCH_BLOCK));
4530 /* Functions called by LOOP_IV_ADD_EMIT_BEFORE expect a
4531 non-constant INIT_VAL to have the same mode as REG, which
4532 in this case we know to be Pmode. */
4533 if (GET_MODE (init_val) != Pmode && !CONSTANT_P (init_val))
4535 rtx seq;
4537 start_sequence ();
4538 init_val = convert_to_mode (Pmode, init_val, 0);
4539 seq = get_insns ();
4540 end_sequence ();
4541 loop_insn_emit_before (loop, 0, loop_start, seq);
4543 loop_iv_add_mult_emit_before (loop, init_val,
4544 info[i].giv->mult_val,
4545 add_val, reg, 0, loop_start);
4546 emit_insn_before (gen_prefetch (reg, GEN_INT (info[i].write),
4547 GEN_INT (3)),
4548 loop_start);
4553 return;
4556 /* Communication with routines called via `note_stores'. */
4558 static rtx note_insn;
4560 /* Dummy register to have nonzero DEST_REG for DEST_ADDR type givs. */
4562 static rtx addr_placeholder;
4564 /* ??? Unfinished optimizations, and possible future optimizations,
4565 for the strength reduction code. */
4567 /* ??? The interaction of biv elimination, and recognition of 'constant'
4568 bivs, may cause problems. */
4570 /* ??? Add heuristics so that DEST_ADDR strength reduction does not cause
4571 performance problems.
4573 Perhaps don't eliminate things that can be combined with an addressing
4574 mode. Find all givs that have the same biv, mult_val, and add_val;
4575 then for each giv, check to see if its only use dies in a following
4576 memory address. If so, generate a new memory address and check to see
4577 if it is valid. If it is valid, then store the modified memory address,
4578 otherwise, mark the giv as not done so that it will get its own iv. */
4580 /* ??? Could try to optimize branches when it is known that a biv is always
4581 positive. */
4583 /* ??? When replace a biv in a compare insn, we should replace with closest
4584 giv so that an optimized branch can still be recognized by the combiner,
4585 e.g. the VAX acb insn. */
4587 /* ??? Many of the checks involving uid_luid could be simplified if regscan
4588 was rerun in loop_optimize whenever a register was added or moved.
4589 Also, some of the optimizations could be a little less conservative. */
4591 /* Searches the insns between INSN and LOOP->END. Returns 1 if there
4592 is a backward branch in that range that branches to somewhere between
4593 LOOP->START and INSN. Returns 0 otherwise. */
4595 /* ??? This is quadratic algorithm. Could be rewritten to be linear.
4596 In practice, this is not a problem, because this function is seldom called,
4597 and uses a negligible amount of CPU time on average. */
4599 static int
4600 back_branch_in_range_p (const struct loop *loop, rtx insn)
4602 rtx p, q, target_insn;
4603 rtx loop_start = loop->start;
4604 rtx loop_end = loop->end;
4605 rtx orig_loop_end = loop->end;
4607 /* Stop before we get to the backward branch at the end of the loop. */
4608 loop_end = prev_nonnote_insn (loop_end);
4609 if (BARRIER_P (loop_end))
4610 loop_end = PREV_INSN (loop_end);
4612 /* Check in case insn has been deleted, search forward for first non
4613 deleted insn following it. */
4614 while (INSN_DELETED_P (insn))
4615 insn = NEXT_INSN (insn);
4617 /* Check for the case where insn is the last insn in the loop. Deal
4618 with the case where INSN was a deleted loop test insn, in which case
4619 it will now be the NOTE_LOOP_END. */
4620 if (insn == loop_end || insn == orig_loop_end)
4621 return 0;
4623 for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
4625 if (JUMP_P (p))
4627 target_insn = JUMP_LABEL (p);
4629 /* Search from loop_start to insn, to see if one of them is
4630 the target_insn. We can't use INSN_LUID comparisons here,
4631 since insn may not have an LUID entry. */
4632 for (q = loop_start; q != insn; q = NEXT_INSN (q))
4633 if (q == target_insn)
4634 return 1;
4638 return 0;
4641 /* Scan the loop body and call FNCALL for each insn. In the addition to the
4642 LOOP and INSN parameters pass MAYBE_MULTIPLE and NOT_EVERY_ITERATION to the
4643 callback.
4645 NOT_EVERY_ITERATION is 1 if current insn is not known to be executed at
4646 least once for every loop iteration except for the last one.
4648 MAYBE_MULTIPLE is 1 if current insn may be executed more than once for every
4649 loop iteration.
4651 typedef rtx (*loop_insn_callback) (struct loop *, rtx, int, int);
4652 static void
4653 for_each_insn_in_loop (struct loop *loop, loop_insn_callback fncall)
4655 int not_every_iteration = 0;
4656 int maybe_multiple = 0;
4657 int past_loop_latch = 0;
4658 rtx p;
4660 /* If loop_scan_start points to the loop exit test, we have to be wary of
4661 subversive use of gotos inside expression statements. */
4662 if (prev_nonnote_insn (loop->scan_start) != prev_nonnote_insn (loop->start))
4663 maybe_multiple = back_branch_in_range_p (loop, loop->scan_start);
4665 /* Scan through loop and update NOT_EVERY_ITERATION and MAYBE_MULTIPLE. */
4666 for (p = next_insn_in_loop (loop, loop->scan_start);
4667 p != NULL_RTX;
4668 p = next_insn_in_loop (loop, p))
4670 p = fncall (loop, p, not_every_iteration, maybe_multiple);
4672 /* Past CODE_LABEL, we get to insns that may be executed multiple
4673 times. The only way we can be sure that they can't is if every
4674 jump insn between here and the end of the loop either
4675 returns, exits the loop, is a jump to a location that is still
4676 behind the label, or is a jump to the loop start. */
4678 if (LABEL_P (p))
4680 rtx insn = p;
4682 maybe_multiple = 0;
4684 while (1)
4686 insn = NEXT_INSN (insn);
4687 if (insn == loop->scan_start)
4688 break;
4689 if (insn == loop->end)
4691 if (loop->top != 0)
4692 insn = loop->top;
4693 else
4694 break;
4695 if (insn == loop->scan_start)
4696 break;
4699 if (JUMP_P (insn)
4700 && GET_CODE (PATTERN (insn)) != RETURN
4701 && (!any_condjump_p (insn)
4702 || (JUMP_LABEL (insn) != 0
4703 && JUMP_LABEL (insn) != loop->scan_start
4704 && !loop_insn_first_p (p, JUMP_LABEL (insn)))))
4706 maybe_multiple = 1;
4707 break;
4712 /* Past a jump, we get to insns for which we can't count
4713 on whether they will be executed during each iteration. */
4714 /* This code appears twice in strength_reduce. There is also similar
4715 code in scan_loop. */
4716 if (JUMP_P (p)
4717 /* If we enter the loop in the middle, and scan around to the
4718 beginning, don't set not_every_iteration for that.
4719 This can be any kind of jump, since we want to know if insns
4720 will be executed if the loop is executed. */
4721 && !(JUMP_LABEL (p) == loop->top
4722 && ((NEXT_INSN (NEXT_INSN (p)) == loop->end
4723 && any_uncondjump_p (p))
4724 || (NEXT_INSN (p) == loop->end && any_condjump_p (p)))))
4726 rtx label = 0;
4728 /* If this is a jump outside the loop, then it also doesn't
4729 matter. Check to see if the target of this branch is on the
4730 loop->exits_labels list. */
4732 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
4733 if (XEXP (label, 0) == JUMP_LABEL (p))
4734 break;
4736 if (!label)
4737 not_every_iteration = 1;
4740 /* Note if we pass a loop latch. If we do, then we can not clear
4741 NOT_EVERY_ITERATION below when we pass the last CODE_LABEL in
4742 a loop since a jump before the last CODE_LABEL may have started
4743 a new loop iteration.
4745 Note that LOOP_TOP is only set for rotated loops and we need
4746 this check for all loops, so compare against the CODE_LABEL
4747 which immediately follows LOOP_START. */
4748 if (JUMP_P (p)
4749 && JUMP_LABEL (p) == NEXT_INSN (loop->start))
4750 past_loop_latch = 1;
4752 /* Unlike in the code motion pass where MAYBE_NEVER indicates that
4753 an insn may never be executed, NOT_EVERY_ITERATION indicates whether
4754 or not an insn is known to be executed each iteration of the
4755 loop, whether or not any iterations are known to occur.
4757 Therefore, if we have just passed a label and have no more labels
4758 between here and the test insn of the loop, and we have not passed
4759 a jump to the top of the loop, then we know these insns will be
4760 executed each iteration. */
4762 if (not_every_iteration
4763 && !past_loop_latch
4764 && LABEL_P (p)
4765 && no_labels_between_p (p, loop->end))
4766 not_every_iteration = 0;
4770 static void
4771 loop_bivs_find (struct loop *loop)
4773 struct loop_regs *regs = LOOP_REGS (loop);
4774 struct loop_ivs *ivs = LOOP_IVS (loop);
4775 /* Temporary list pointers for traversing ivs->list. */
4776 struct iv_class *bl, **backbl;
4778 ivs->list = 0;
4780 for_each_insn_in_loop (loop, check_insn_for_bivs);
4782 /* Scan ivs->list to remove all regs that proved not to be bivs.
4783 Make a sanity check against regs->n_times_set. */
4784 for (backbl = &ivs->list, bl = *backbl; bl; bl = bl->next)
4786 if (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4787 /* Above happens if register modified by subreg, etc. */
4788 /* Make sure it is not recognized as a basic induction var: */
4789 || regs->array[bl->regno].n_times_set != bl->biv_count
4790 /* If never incremented, it is invariant that we decided not to
4791 move. So leave it alone. */
4792 || ! bl->incremented)
4794 if (loop_dump_stream)
4795 fprintf (loop_dump_stream, "Biv %d: discarded, %s\n",
4796 bl->regno,
4797 (REG_IV_TYPE (ivs, bl->regno) != BASIC_INDUCT
4798 ? "not induction variable"
4799 : (! bl->incremented ? "never incremented"
4800 : "count error")));
4802 REG_IV_TYPE (ivs, bl->regno) = NOT_BASIC_INDUCT;
4803 *backbl = bl->next;
4805 else
4807 backbl = &bl->next;
4809 if (loop_dump_stream)
4810 fprintf (loop_dump_stream, "Biv %d: verified\n", bl->regno);
4816 /* Determine how BIVS are initialized by looking through pre-header
4817 extended basic block. */
4818 static void
4819 loop_bivs_init_find (struct loop *loop)
4821 struct loop_ivs *ivs = LOOP_IVS (loop);
4822 /* Temporary list pointers for traversing ivs->list. */
4823 struct iv_class *bl;
4824 int call_seen;
4825 rtx p;
4827 /* Find initial value for each biv by searching backwards from loop_start,
4828 halting at first label. Also record any test condition. */
4830 call_seen = 0;
4831 for (p = loop->start; p && !LABEL_P (p); p = PREV_INSN (p))
4833 rtx test;
4835 note_insn = p;
4837 if (CALL_P (p))
4838 call_seen = 1;
4840 if (INSN_P (p))
4841 note_stores (PATTERN (p), record_initial, ivs);
4843 /* Record any test of a biv that branches around the loop if no store
4844 between it and the start of loop. We only care about tests with
4845 constants and registers and only certain of those. */
4846 if (JUMP_P (p)
4847 && JUMP_LABEL (p) != 0
4848 && next_real_insn (JUMP_LABEL (p)) == next_real_insn (loop->end)
4849 && (test = get_condition_for_loop (loop, p)) != 0
4850 && REG_P (XEXP (test, 0))
4851 && REGNO (XEXP (test, 0)) < max_reg_before_loop
4852 && (bl = REG_IV_CLASS (ivs, REGNO (XEXP (test, 0)))) != 0
4853 && valid_initial_value_p (XEXP (test, 1), p, call_seen, loop->start)
4854 && bl->init_insn == 0)
4856 /* If an NE test, we have an initial value! */
4857 if (GET_CODE (test) == NE)
4859 bl->init_insn = p;
4860 bl->init_set = gen_rtx_SET (VOIDmode,
4861 XEXP (test, 0), XEXP (test, 1));
4863 else
4864 bl->initial_test = test;
4870 /* Look at the each biv and see if we can say anything better about its
4871 initial value from any initializing insns set up above. (This is done
4872 in two passes to avoid missing SETs in a PARALLEL.) */
4873 static void
4874 loop_bivs_check (struct loop *loop)
4876 struct loop_ivs *ivs = LOOP_IVS (loop);
4877 /* Temporary list pointers for traversing ivs->list. */
4878 struct iv_class *bl;
4879 struct iv_class **backbl;
4881 for (backbl = &ivs->list; (bl = *backbl); backbl = &bl->next)
4883 rtx src;
4884 rtx note;
4886 if (! bl->init_insn)
4887 continue;
4889 /* IF INIT_INSN has a REG_EQUAL or REG_EQUIV note and the value
4890 is a constant, use the value of that. */
4891 if (((note = find_reg_note (bl->init_insn, REG_EQUAL, 0)) != NULL
4892 && CONSTANT_P (XEXP (note, 0)))
4893 || ((note = find_reg_note (bl->init_insn, REG_EQUIV, 0)) != NULL
4894 && CONSTANT_P (XEXP (note, 0))))
4895 src = XEXP (note, 0);
4896 else
4897 src = SET_SRC (bl->init_set);
4899 if (loop_dump_stream)
4900 fprintf (loop_dump_stream,
4901 "Biv %d: initialized at insn %d: initial value ",
4902 bl->regno, INSN_UID (bl->init_insn));
4904 if ((GET_MODE (src) == GET_MODE (regno_reg_rtx[bl->regno])
4905 || GET_MODE (src) == VOIDmode)
4906 && valid_initial_value_p (src, bl->init_insn,
4907 LOOP_INFO (loop)->pre_header_has_call,
4908 loop->start))
4910 bl->initial_value = src;
4912 if (loop_dump_stream)
4914 print_simple_rtl (loop_dump_stream, src);
4915 fputc ('\n', loop_dump_stream);
4918 /* If we can't make it a giv,
4919 let biv keep initial value of "itself". */
4920 else if (loop_dump_stream)
4921 fprintf (loop_dump_stream, "is complex\n");
4926 /* Search the loop for general induction variables. */
4928 static void
4929 loop_givs_find (struct loop* loop)
4931 for_each_insn_in_loop (loop, check_insn_for_givs);
4935 /* For each giv for which we still don't know whether or not it is
4936 replaceable, check to see if it is replaceable because its final value
4937 can be calculated. */
4939 static void
4940 loop_givs_check (struct loop *loop)
4942 struct loop_ivs *ivs = LOOP_IVS (loop);
4943 struct iv_class *bl;
4945 for (bl = ivs->list; bl; bl = bl->next)
4947 struct induction *v;
4949 for (v = bl->giv; v; v = v->next_iv)
4950 if (! v->replaceable && ! v->not_replaceable)
4951 check_final_value (loop, v);
4955 /* Try to generate the simplest rtx for the expression
4956 (PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
4957 value of giv's. */
4959 static rtx
4960 fold_rtx_mult_add (rtx mult1, rtx mult2, rtx add1, enum machine_mode mode)
4962 rtx temp, mult_res;
4963 rtx result;
4965 /* The modes must all be the same. This should always be true. For now,
4966 check to make sure. */
4967 if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
4968 || (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
4969 || (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
4970 abort ();
4972 /* Ensure that if at least one of mult1/mult2 are constant, then mult2
4973 will be a constant. */
4974 if (GET_CODE (mult1) == CONST_INT)
4976 temp = mult2;
4977 mult2 = mult1;
4978 mult1 = temp;
4981 mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
4982 if (! mult_res)
4983 mult_res = gen_rtx_MULT (mode, mult1, mult2);
4985 /* Again, put the constant second. */
4986 if (GET_CODE (add1) == CONST_INT)
4988 temp = add1;
4989 add1 = mult_res;
4990 mult_res = temp;
4993 result = simplify_binary_operation (PLUS, mode, add1, mult_res);
4994 if (! result)
4995 result = gen_rtx_PLUS (mode, add1, mult_res);
4997 return result;
5000 /* Searches the list of induction struct's for the biv BL, to try to calculate
5001 the total increment value for one iteration of the loop as a constant.
5003 Returns the increment value as an rtx, simplified as much as possible,
5004 if it can be calculated. Otherwise, returns 0. */
5006 static rtx
5007 biv_total_increment (const struct iv_class *bl)
5009 struct induction *v;
5010 rtx result;
5012 /* For increment, must check every instruction that sets it. Each
5013 instruction must be executed only once each time through the loop.
5014 To verify this, we check that the insn is always executed, and that
5015 there are no backward branches after the insn that branch to before it.
5016 Also, the insn must have a mult_val of one (to make sure it really is
5017 an increment). */
5019 result = const0_rtx;
5020 for (v = bl->biv; v; v = v->next_iv)
5022 if (v->always_computable && v->mult_val == const1_rtx
5023 && ! v->maybe_multiple
5024 && SCALAR_INT_MODE_P (v->mode))
5026 /* If we have already counted it, skip it. */
5027 if (v->same)
5028 continue;
5030 result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
5032 else
5033 return 0;
5036 return result;
5039 /* Try to prove that the register is dead after the loop exits. Trace every
5040 loop exit looking for an insn that will always be executed, which sets
5041 the register to some value, and appears before the first use of the register
5042 is found. If successful, then return 1, otherwise return 0. */
5044 /* ?? Could be made more intelligent in the handling of jumps, so that
5045 it can search past if statements and other similar structures. */
5047 static int
5048 reg_dead_after_loop (const struct loop *loop, rtx reg)
5050 rtx insn, label;
5051 int jump_count = 0;
5052 int label_count = 0;
5054 /* In addition to checking all exits of this loop, we must also check
5055 all exits of inner nested loops that would exit this loop. We don't
5056 have any way to identify those, so we just give up if there are any
5057 such inner loop exits. */
5059 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
5060 label_count++;
5062 if (label_count != loop->exit_count)
5063 return 0;
5065 /* HACK: Must also search the loop fall through exit, create a label_ref
5066 here which points to the loop->end, and append the loop_number_exit_labels
5067 list to it. */
5068 label = gen_rtx_LABEL_REF (VOIDmode, loop->end);
5069 LABEL_NEXTREF (label) = loop->exit_labels;
5071 for (; label; label = LABEL_NEXTREF (label))
5073 /* Succeed if find an insn which sets the biv or if reach end of
5074 function. Fail if find an insn that uses the biv, or if come to
5075 a conditional jump. */
5077 insn = NEXT_INSN (XEXP (label, 0));
5078 while (insn)
5080 if (INSN_P (insn))
5082 rtx set, note;
5084 if (reg_referenced_p (reg, PATTERN (insn)))
5085 return 0;
5087 note = find_reg_equal_equiv_note (insn);
5088 if (note && reg_overlap_mentioned_p (reg, XEXP (note, 0)))
5089 return 0;
5091 set = single_set (insn);
5092 if (set && rtx_equal_p (SET_DEST (set), reg))
5093 break;
5095 if (JUMP_P (insn))
5097 if (GET_CODE (PATTERN (insn)) == RETURN)
5098 break;
5099 else if (!any_uncondjump_p (insn)
5100 /* Prevent infinite loop following infinite loops. */
5101 || jump_count++ > 20)
5102 return 0;
5103 else
5104 insn = JUMP_LABEL (insn);
5108 insn = NEXT_INSN (insn);
5112 /* Success, the register is dead on all loop exits. */
5113 return 1;
5116 /* Try to calculate the final value of the biv, the value it will have at
5117 the end of the loop. If we can do it, return that value. */
5119 static rtx
5120 final_biv_value (const struct loop *loop, struct iv_class *bl)
5122 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
5123 rtx increment, tem;
5125 /* ??? This only works for MODE_INT biv's. Reject all others for now. */
5127 if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
5128 return 0;
5130 /* The final value for reversed bivs must be calculated differently than
5131 for ordinary bivs. In this case, there is already an insn after the
5132 loop which sets this biv's final value (if necessary), and there are
5133 no other loop exits, so we can return any value. */
5134 if (bl->reversed)
5136 if (loop_dump_stream)
5137 fprintf (loop_dump_stream,
5138 "Final biv value for %d, reversed biv.\n", bl->regno);
5140 return const0_rtx;
5143 /* Try to calculate the final value as initial value + (number of iterations
5144 * increment). For this to work, increment must be invariant, the only
5145 exit from the loop must be the fall through at the bottom (otherwise
5146 it may not have its final value when the loop exits), and the initial
5147 value of the biv must be invariant. */
5149 if (n_iterations != 0
5150 && ! loop->exit_count
5151 && loop_invariant_p (loop, bl->initial_value))
5153 increment = biv_total_increment (bl);
5155 if (increment && loop_invariant_p (loop, increment))
5157 /* Can calculate the loop exit value, emit insns after loop
5158 end to calculate this value into a temporary register in
5159 case it is needed later. */
5161 tem = gen_reg_rtx (bl->biv->mode);
5162 record_base_value (REGNO (tem), bl->biv->add_val, 0);
5163 loop_iv_add_mult_sink (loop, increment, GEN_INT (n_iterations),
5164 bl->initial_value, tem);
5166 if (loop_dump_stream)
5167 fprintf (loop_dump_stream,
5168 "Final biv value for %d, calculated.\n", bl->regno);
5170 return tem;
5174 /* Check to see if the biv is dead at all loop exits. */
5175 if (reg_dead_after_loop (loop, bl->biv->src_reg))
5177 if (loop_dump_stream)
5178 fprintf (loop_dump_stream,
5179 "Final biv value for %d, biv dead after loop exit.\n",
5180 bl->regno);
5182 return const0_rtx;
5185 return 0;
5188 /* Return nonzero if it is possible to eliminate the biv BL provided
5189 all givs are reduced. This is possible if either the reg is not
5190 used outside the loop, or we can compute what its final value will
5191 be. */
5193 static int
5194 loop_biv_eliminable_p (struct loop *loop, struct iv_class *bl,
5195 int threshold, int insn_count)
5197 /* For architectures with a decrement_and_branch_until_zero insn,
5198 don't do this if we put a REG_NONNEG note on the endtest for this
5199 biv. */
5201 #ifdef HAVE_decrement_and_branch_until_zero
5202 if (bl->nonneg)
5204 if (loop_dump_stream)
5205 fprintf (loop_dump_stream,
5206 "Cannot eliminate nonneg biv %d.\n", bl->regno);
5207 return 0;
5209 #endif
5211 /* Check that biv is used outside loop or if it has a final value.
5212 Compare against bl->init_insn rather than loop->start. We aren't
5213 concerned with any uses of the biv between init_insn and
5214 loop->start since these won't be affected by the value of the biv
5215 elsewhere in the function, so long as init_insn doesn't use the
5216 biv itself. */
5218 if ((REGNO_LAST_LUID (bl->regno) < INSN_LUID (loop->end)
5219 && bl->init_insn
5220 && INSN_UID (bl->init_insn) < max_uid_for_loop
5221 && REGNO_FIRST_LUID (bl->regno) >= INSN_LUID (bl->init_insn)
5222 && ! reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
5223 || (bl->final_value = final_biv_value (loop, bl)))
5224 return maybe_eliminate_biv (loop, bl, 0, threshold, insn_count);
5226 if (loop_dump_stream)
5228 fprintf (loop_dump_stream,
5229 "Cannot eliminate biv %d.\n",
5230 bl->regno);
5231 fprintf (loop_dump_stream,
5232 "First use: insn %d, last use: insn %d.\n",
5233 REGNO_FIRST_UID (bl->regno),
5234 REGNO_LAST_UID (bl->regno));
5236 return 0;
5240 /* Reduce each giv of BL that we have decided to reduce. */
5242 static void
5243 loop_givs_reduce (struct loop *loop, struct iv_class *bl)
5245 struct induction *v;
5247 for (v = bl->giv; v; v = v->next_iv)
5249 struct induction *tv;
5250 if (! v->ignore && v->same == 0)
5252 int auto_inc_opt = 0;
5254 /* If the code for derived givs immediately below has already
5255 allocated a new_reg, we must keep it. */
5256 if (! v->new_reg)
5257 v->new_reg = gen_reg_rtx (v->mode);
5259 #ifdef AUTO_INC_DEC
5260 /* If the target has auto-increment addressing modes, and
5261 this is an address giv, then try to put the increment
5262 immediately after its use, so that flow can create an
5263 auto-increment addressing mode. */
5264 /* Don't do this for loops entered at the bottom, to avoid
5265 this invalid transformation:
5266 jmp L; -> jmp L;
5267 TOP: TOP:
5268 use giv use giv
5269 L: inc giv
5270 inc biv L:
5271 test biv test giv
5272 cbr TOP cbr TOP
5274 if (v->giv_type == DEST_ADDR && bl->biv_count == 1
5275 && bl->biv->always_executed && ! bl->biv->maybe_multiple
5276 /* We don't handle reversed biv's because bl->biv->insn
5277 does not have a valid INSN_LUID. */
5278 && ! bl->reversed
5279 && v->always_executed && ! v->maybe_multiple
5280 && INSN_UID (v->insn) < max_uid_for_loop
5281 && !loop->top)
5283 /* If other giv's have been combined with this one, then
5284 this will work only if all uses of the other giv's occur
5285 before this giv's insn. This is difficult to check.
5287 We simplify this by looking for the common case where
5288 there is one DEST_REG giv, and this giv's insn is the
5289 last use of the dest_reg of that DEST_REG giv. If the
5290 increment occurs after the address giv, then we can
5291 perform the optimization. (Otherwise, the increment
5292 would have to go before other_giv, and we would not be
5293 able to combine it with the address giv to get an
5294 auto-inc address.) */
5295 if (v->combined_with)
5297 struct induction *other_giv = 0;
5299 for (tv = bl->giv; tv; tv = tv->next_iv)
5300 if (tv->same == v)
5302 if (other_giv)
5303 break;
5304 else
5305 other_giv = tv;
5307 if (! tv && other_giv
5308 && REGNO (other_giv->dest_reg) < max_reg_before_loop
5309 && (REGNO_LAST_UID (REGNO (other_giv->dest_reg))
5310 == INSN_UID (v->insn))
5311 && INSN_LUID (v->insn) < INSN_LUID (bl->biv->insn))
5312 auto_inc_opt = 1;
5314 /* Check for case where increment is before the address
5315 giv. Do this test in "loop order". */
5316 else if ((INSN_LUID (v->insn) > INSN_LUID (bl->biv->insn)
5317 && (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
5318 || (INSN_LUID (bl->biv->insn)
5319 > INSN_LUID (loop->scan_start))))
5320 || (INSN_LUID (v->insn) < INSN_LUID (loop->scan_start)
5321 && (INSN_LUID (loop->scan_start)
5322 < INSN_LUID (bl->biv->insn))))
5323 auto_inc_opt = -1;
5324 else
5325 auto_inc_opt = 1;
5327 #ifdef HAVE_cc0
5329 rtx prev;
5331 /* We can't put an insn immediately after one setting
5332 cc0, or immediately before one using cc0. */
5333 if ((auto_inc_opt == 1 && sets_cc0_p (PATTERN (v->insn)))
5334 || (auto_inc_opt == -1
5335 && (prev = prev_nonnote_insn (v->insn)) != 0
5336 && INSN_P (prev)
5337 && sets_cc0_p (PATTERN (prev))))
5338 auto_inc_opt = 0;
5340 #endif
5342 if (auto_inc_opt)
5343 v->auto_inc_opt = 1;
5345 #endif
5347 /* For each place where the biv is incremented, add an insn
5348 to increment the new, reduced reg for the giv. */
5349 for (tv = bl->biv; tv; tv = tv->next_iv)
5351 rtx insert_before;
5353 /* Skip if location is the same as a previous one. */
5354 if (tv->same)
5355 continue;
5356 if (! auto_inc_opt)
5357 insert_before = NEXT_INSN (tv->insn);
5358 else if (auto_inc_opt == 1)
5359 insert_before = NEXT_INSN (v->insn);
5360 else
5361 insert_before = v->insn;
5363 if (tv->mult_val == const1_rtx)
5364 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
5365 v->new_reg, v->new_reg,
5366 0, insert_before);
5367 else /* tv->mult_val == const0_rtx */
5368 /* A multiply is acceptable here
5369 since this is presumed to be seldom executed. */
5370 loop_iv_add_mult_emit_before (loop, tv->add_val, v->mult_val,
5371 v->add_val, v->new_reg,
5372 0, insert_before);
5375 /* Add code at loop start to initialize giv's reduced reg. */
5377 loop_iv_add_mult_hoist (loop,
5378 extend_value_for_giv (v, bl->initial_value),
5379 v->mult_val, v->add_val, v->new_reg);
5385 /* Check for givs whose first use is their definition and whose
5386 last use is the definition of another giv. If so, it is likely
5387 dead and should not be used to derive another giv nor to
5388 eliminate a biv. */
5390 static void
5391 loop_givs_dead_check (struct loop *loop ATTRIBUTE_UNUSED, struct iv_class *bl)
5393 struct induction *v;
5395 for (v = bl->giv; v; v = v->next_iv)
5397 if (v->ignore
5398 || (v->same && v->same->ignore))
5399 continue;
5401 if (v->giv_type == DEST_REG
5402 && REGNO_FIRST_UID (REGNO (v->dest_reg)) == INSN_UID (v->insn))
5404 struct induction *v1;
5406 for (v1 = bl->giv; v1; v1 = v1->next_iv)
5407 if (REGNO_LAST_UID (REGNO (v->dest_reg)) == INSN_UID (v1->insn))
5408 v->maybe_dead = 1;
5414 static void
5415 loop_givs_rescan (struct loop *loop, struct iv_class *bl, rtx *reg_map)
5417 struct induction *v;
5419 for (v = bl->giv; v; v = v->next_iv)
5421 if (v->same && v->same->ignore)
5422 v->ignore = 1;
5424 if (v->ignore)
5425 continue;
5427 /* Update expression if this was combined, in case other giv was
5428 replaced. */
5429 if (v->same)
5430 v->new_reg = replace_rtx (v->new_reg,
5431 v->same->dest_reg, v->same->new_reg);
5433 /* See if this register is known to be a pointer to something. If
5434 so, see if we can find the alignment. First see if there is a
5435 destination register that is a pointer. If so, this shares the
5436 alignment too. Next see if we can deduce anything from the
5437 computational information. If not, and this is a DEST_ADDR
5438 giv, at least we know that it's a pointer, though we don't know
5439 the alignment. */
5440 if (REG_P (v->new_reg)
5441 && v->giv_type == DEST_REG
5442 && REG_POINTER (v->dest_reg))
5443 mark_reg_pointer (v->new_reg,
5444 REGNO_POINTER_ALIGN (REGNO (v->dest_reg)));
5445 else if (REG_P (v->new_reg)
5446 && REG_POINTER (v->src_reg))
5448 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->src_reg));
5450 if (align == 0
5451 || GET_CODE (v->add_val) != CONST_INT
5452 || INTVAL (v->add_val) % (align / BITS_PER_UNIT) != 0)
5453 align = 0;
5455 mark_reg_pointer (v->new_reg, align);
5457 else if (REG_P (v->new_reg)
5458 && REG_P (v->add_val)
5459 && REG_POINTER (v->add_val))
5461 unsigned int align = REGNO_POINTER_ALIGN (REGNO (v->add_val));
5463 if (align == 0 || GET_CODE (v->mult_val) != CONST_INT
5464 || INTVAL (v->mult_val) % (align / BITS_PER_UNIT) != 0)
5465 align = 0;
5467 mark_reg_pointer (v->new_reg, align);
5469 else if (REG_P (v->new_reg) && v->giv_type == DEST_ADDR)
5470 mark_reg_pointer (v->new_reg, 0);
5472 if (v->giv_type == DEST_ADDR)
5473 /* Store reduced reg as the address in the memref where we found
5474 this giv. */
5475 validate_change (v->insn, v->location, v->new_reg, 0);
5476 else if (v->replaceable)
5478 reg_map[REGNO (v->dest_reg)] = v->new_reg;
5480 else
5482 rtx original_insn = v->insn;
5483 rtx note;
5485 /* Not replaceable; emit an insn to set the original giv reg from
5486 the reduced giv, same as above. */
5487 v->insn = loop_insn_emit_after (loop, 0, original_insn,
5488 gen_move_insn (v->dest_reg,
5489 v->new_reg));
5491 /* The original insn may have a REG_EQUAL note. This note is
5492 now incorrect and may result in invalid substitutions later.
5493 The original insn is dead, but may be part of a libcall
5494 sequence, which doesn't seem worth the bother of handling. */
5495 note = find_reg_note (original_insn, REG_EQUAL, NULL_RTX);
5496 if (note)
5497 remove_note (original_insn, note);
5500 /* When a loop is reversed, givs which depend on the reversed
5501 biv, and which are live outside the loop, must be set to their
5502 correct final value. This insn is only needed if the giv is
5503 not replaceable. The correct final value is the same as the
5504 value that the giv starts the reversed loop with. */
5505 if (bl->reversed && ! v->replaceable)
5506 loop_iv_add_mult_sink (loop,
5507 extend_value_for_giv (v, bl->initial_value),
5508 v->mult_val, v->add_val, v->dest_reg);
5509 else if (v->final_value)
5510 loop_insn_sink_or_swim (loop,
5511 gen_load_of_final_value (v->dest_reg,
5512 v->final_value));
5514 if (loop_dump_stream)
5516 fprintf (loop_dump_stream, "giv at %d reduced to ",
5517 INSN_UID (v->insn));
5518 print_simple_rtl (loop_dump_stream, v->new_reg);
5519 fprintf (loop_dump_stream, "\n");
5525 static int
5526 loop_giv_reduce_benefit (struct loop *loop ATTRIBUTE_UNUSED,
5527 struct iv_class *bl, struct induction *v,
5528 rtx test_reg)
5530 int add_cost;
5531 int benefit;
5533 benefit = v->benefit;
5534 PUT_MODE (test_reg, v->mode);
5535 add_cost = iv_add_mult_cost (bl->biv->add_val, v->mult_val,
5536 test_reg, test_reg);
5538 /* Reduce benefit if not replaceable, since we will insert a
5539 move-insn to replace the insn that calculates this giv. Don't do
5540 this unless the giv is a user variable, since it will often be
5541 marked non-replaceable because of the duplication of the exit
5542 code outside the loop. In such a case, the copies we insert are
5543 dead and will be deleted. So they don't have a cost. Similar
5544 situations exist. */
5545 /* ??? The new final_[bg]iv_value code does a much better job of
5546 finding replaceable giv's, and hence this code may no longer be
5547 necessary. */
5548 if (! v->replaceable && ! bl->eliminable
5549 && REG_USERVAR_P (v->dest_reg))
5550 benefit -= copy_cost;
5552 /* Decrease the benefit to count the add-insns that we will insert
5553 to increment the reduced reg for the giv. ??? This can
5554 overestimate the run-time cost of the additional insns, e.g. if
5555 there are multiple basic blocks that increment the biv, but only
5556 one of these blocks is executed during each iteration. There is
5557 no good way to detect cases like this with the current structure
5558 of the loop optimizer. This code is more accurate for
5559 determining code size than run-time benefits. */
5560 benefit -= add_cost * bl->biv_count;
5562 /* Decide whether to strength-reduce this giv or to leave the code
5563 unchanged (recompute it from the biv each time it is used). This
5564 decision can be made independently for each giv. */
5566 #ifdef AUTO_INC_DEC
5567 /* Attempt to guess whether autoincrement will handle some of the
5568 new add insns; if so, increase BENEFIT (undo the subtraction of
5569 add_cost that was done above). */
5570 if (v->giv_type == DEST_ADDR
5571 /* Increasing the benefit is risky, since this is only a guess.
5572 Avoid increasing register pressure in cases where there would
5573 be no other benefit from reducing this giv. */
5574 && benefit > 0
5575 && GET_CODE (v->mult_val) == CONST_INT)
5577 int size = GET_MODE_SIZE (GET_MODE (v->mem));
5579 if (HAVE_POST_INCREMENT
5580 && INTVAL (v->mult_val) == size)
5581 benefit += add_cost * bl->biv_count;
5582 else if (HAVE_PRE_INCREMENT
5583 && INTVAL (v->mult_val) == size)
5584 benefit += add_cost * bl->biv_count;
5585 else if (HAVE_POST_DECREMENT
5586 && -INTVAL (v->mult_val) == size)
5587 benefit += add_cost * bl->biv_count;
5588 else if (HAVE_PRE_DECREMENT
5589 && -INTVAL (v->mult_val) == size)
5590 benefit += add_cost * bl->biv_count;
5592 #endif
5594 return benefit;
5598 /* Free IV structures for LOOP. */
5600 static void
5601 loop_ivs_free (struct loop *loop)
5603 struct loop_ivs *ivs = LOOP_IVS (loop);
5604 struct iv_class *iv = ivs->list;
5606 free (ivs->regs);
5608 while (iv)
5610 struct iv_class *next = iv->next;
5611 struct induction *induction;
5612 struct induction *next_induction;
5614 for (induction = iv->biv; induction; induction = next_induction)
5616 next_induction = induction->next_iv;
5617 free (induction);
5619 for (induction = iv->giv; induction; induction = next_induction)
5621 next_induction = induction->next_iv;
5622 free (induction);
5625 free (iv);
5626 iv = next;
5630 /* Look back before LOOP->START for the insn that sets REG and return
5631 the equivalent constant if there is a REG_EQUAL note otherwise just
5632 the SET_SRC of REG. */
5634 static rtx
5635 loop_find_equiv_value (const struct loop *loop, rtx reg)
5637 rtx loop_start = loop->start;
5638 rtx insn, set;
5639 rtx ret;
5641 ret = reg;
5642 for (insn = PREV_INSN (loop_start); insn; insn = PREV_INSN (insn))
5644 if (LABEL_P (insn))
5645 break;
5647 else if (INSN_P (insn) && reg_set_p (reg, insn))
5649 /* We found the last insn before the loop that sets the register.
5650 If it sets the entire register, and has a REG_EQUAL note,
5651 then use the value of the REG_EQUAL note. */
5652 if ((set = single_set (insn))
5653 && (SET_DEST (set) == reg))
5655 rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
5657 /* Only use the REG_EQUAL note if it is a constant.
5658 Other things, divide in particular, will cause
5659 problems later if we use them. */
5660 if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
5661 && CONSTANT_P (XEXP (note, 0)))
5662 ret = XEXP (note, 0);
5663 else
5664 ret = SET_SRC (set);
5666 /* We cannot do this if it changes between the
5667 assignment and loop start though. */
5668 if (modified_between_p (ret, insn, loop_start))
5669 ret = reg;
5671 break;
5674 return ret;
5677 /* Find and return register term common to both expressions OP0 and
5678 OP1 or NULL_RTX if no such term exists. Each expression must be a
5679 REG or a PLUS of a REG. */
5681 static rtx
5682 find_common_reg_term (rtx op0, rtx op1)
5684 if ((REG_P (op0) || GET_CODE (op0) == PLUS)
5685 && (REG_P (op1) || GET_CODE (op1) == PLUS))
5687 rtx op00;
5688 rtx op01;
5689 rtx op10;
5690 rtx op11;
5692 if (GET_CODE (op0) == PLUS)
5693 op01 = XEXP (op0, 1), op00 = XEXP (op0, 0);
5694 else
5695 op01 = const0_rtx, op00 = op0;
5697 if (GET_CODE (op1) == PLUS)
5698 op11 = XEXP (op1, 1), op10 = XEXP (op1, 0);
5699 else
5700 op11 = const0_rtx, op10 = op1;
5702 /* Find and return common register term if present. */
5703 if (REG_P (op00) && (op00 == op10 || op00 == op11))
5704 return op00;
5705 else if (REG_P (op01) && (op01 == op10 || op01 == op11))
5706 return op01;
5709 /* No common register term found. */
5710 return NULL_RTX;
5713 /* Determine the loop iterator and calculate the number of loop
5714 iterations. Returns the exact number of loop iterations if it can
5715 be calculated, otherwise returns zero. */
5717 static unsigned HOST_WIDE_INT
5718 loop_iterations (struct loop *loop)
5720 struct loop_info *loop_info = LOOP_INFO (loop);
5721 struct loop_ivs *ivs = LOOP_IVS (loop);
5722 rtx comparison, comparison_value;
5723 rtx iteration_var, initial_value, increment, final_value;
5724 enum rtx_code comparison_code;
5725 HOST_WIDE_INT inc;
5726 unsigned HOST_WIDE_INT abs_inc;
5727 unsigned HOST_WIDE_INT abs_diff;
5728 int off_by_one;
5729 int increment_dir;
5730 int unsigned_p, compare_dir, final_larger;
5731 rtx last_loop_insn;
5732 struct iv_class *bl;
5734 loop_info->n_iterations = 0;
5735 loop_info->initial_value = 0;
5736 loop_info->initial_equiv_value = 0;
5737 loop_info->comparison_value = 0;
5738 loop_info->final_value = 0;
5739 loop_info->final_equiv_value = 0;
5740 loop_info->increment = 0;
5741 loop_info->iteration_var = 0;
5742 loop_info->iv = 0;
5744 /* We used to use prev_nonnote_insn here, but that fails because it might
5745 accidentally get the branch for a contained loop if the branch for this
5746 loop was deleted. We can only trust branches immediately before the
5747 loop_end. */
5748 last_loop_insn = PREV_INSN (loop->end);
5750 /* ??? We should probably try harder to find the jump insn
5751 at the end of the loop. The following code assumes that
5752 the last loop insn is a jump to the top of the loop. */
5753 if (!JUMP_P (last_loop_insn))
5755 if (loop_dump_stream)
5756 fprintf (loop_dump_stream,
5757 "Loop iterations: No final conditional branch found.\n");
5758 return 0;
5761 /* If there is a more than a single jump to the top of the loop
5762 we cannot (easily) determine the iteration count. */
5763 if (LABEL_NUSES (JUMP_LABEL (last_loop_insn)) > 1)
5765 if (loop_dump_stream)
5766 fprintf (loop_dump_stream,
5767 "Loop iterations: Loop has multiple back edges.\n");
5768 return 0;
5771 /* Find the iteration variable. If the last insn is a conditional
5772 branch, and the insn before tests a register value, make that the
5773 iteration variable. */
5775 comparison = get_condition_for_loop (loop, last_loop_insn);
5776 if (comparison == 0)
5778 if (loop_dump_stream)
5779 fprintf (loop_dump_stream,
5780 "Loop iterations: No final comparison found.\n");
5781 return 0;
5784 /* ??? Get_condition may switch position of induction variable and
5785 invariant register when it canonicalizes the comparison. */
5787 comparison_code = GET_CODE (comparison);
5788 iteration_var = XEXP (comparison, 0);
5789 comparison_value = XEXP (comparison, 1);
5791 if (!REG_P (iteration_var))
5793 if (loop_dump_stream)
5794 fprintf (loop_dump_stream,
5795 "Loop iterations: Comparison not against register.\n");
5796 return 0;
5799 /* The only new registers that are created before loop iterations
5800 are givs made from biv increments or registers created by
5801 load_mems. In the latter case, it is possible that try_copy_prop
5802 will propagate a new pseudo into the old iteration register but
5803 this will be marked by having the REG_USERVAR_P bit set. */
5805 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs
5806 && ! REG_USERVAR_P (iteration_var))
5807 abort ();
5809 /* Determine the initial value of the iteration variable, and the amount
5810 that it is incremented each loop. Use the tables constructed by
5811 the strength reduction pass to calculate these values. */
5813 /* Clear the result values, in case no answer can be found. */
5814 initial_value = 0;
5815 increment = 0;
5817 /* The iteration variable can be either a giv or a biv. Check to see
5818 which it is, and compute the variable's initial value, and increment
5819 value if possible. */
5821 /* If this is a new register, can't handle it since we don't have any
5822 reg_iv_type entry for it. */
5823 if ((unsigned) REGNO (iteration_var) >= ivs->n_regs)
5825 if (loop_dump_stream)
5826 fprintf (loop_dump_stream,
5827 "Loop iterations: No reg_iv_type entry for iteration var.\n");
5828 return 0;
5831 /* Reject iteration variables larger than the host wide int size, since they
5832 could result in a number of iterations greater than the range of our
5833 `unsigned HOST_WIDE_INT' variable loop_info->n_iterations. */
5834 else if ((GET_MODE_BITSIZE (GET_MODE (iteration_var))
5835 > HOST_BITS_PER_WIDE_INT))
5837 if (loop_dump_stream)
5838 fprintf (loop_dump_stream,
5839 "Loop iterations: Iteration var rejected because mode too large.\n");
5840 return 0;
5842 else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
5844 if (loop_dump_stream)
5845 fprintf (loop_dump_stream,
5846 "Loop iterations: Iteration var not an integer.\n");
5847 return 0;
5850 /* Try swapping the comparison to identify a suitable iv. */
5851 if (REG_IV_TYPE (ivs, REGNO (iteration_var)) != BASIC_INDUCT
5852 && REG_IV_TYPE (ivs, REGNO (iteration_var)) != GENERAL_INDUCT
5853 && REG_P (comparison_value)
5854 && REGNO (comparison_value) < ivs->n_regs)
5856 rtx temp = comparison_value;
5857 comparison_code = swap_condition (comparison_code);
5858 comparison_value = iteration_var;
5859 iteration_var = temp;
5862 if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == BASIC_INDUCT)
5864 if (REGNO (iteration_var) >= ivs->n_regs)
5865 abort ();
5867 /* Grab initial value, only useful if it is a constant. */
5868 bl = REG_IV_CLASS (ivs, REGNO (iteration_var));
5869 initial_value = bl->initial_value;
5870 if (!bl->biv->always_executed || bl->biv->maybe_multiple)
5872 if (loop_dump_stream)
5873 fprintf (loop_dump_stream,
5874 "Loop iterations: Basic induction var not set once in each iteration.\n");
5875 return 0;
5878 increment = biv_total_increment (bl);
5880 else if (REG_IV_TYPE (ivs, REGNO (iteration_var)) == GENERAL_INDUCT)
5882 HOST_WIDE_INT offset = 0;
5883 struct induction *v = REG_IV_INFO (ivs, REGNO (iteration_var));
5884 rtx biv_initial_value;
5886 if (REGNO (v->src_reg) >= ivs->n_regs)
5887 abort ();
5889 if (!v->always_executed || v->maybe_multiple)
5891 if (loop_dump_stream)
5892 fprintf (loop_dump_stream,
5893 "Loop iterations: General induction var not set once in each iteration.\n");
5894 return 0;
5897 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
5899 /* Increment value is mult_val times the increment value of the biv. */
5901 increment = biv_total_increment (bl);
5902 if (increment)
5904 struct induction *biv_inc;
5906 increment = fold_rtx_mult_add (v->mult_val,
5907 extend_value_for_giv (v, increment),
5908 const0_rtx, v->mode);
5909 /* The caller assumes that one full increment has occurred at the
5910 first loop test. But that's not true when the biv is incremented
5911 after the giv is set (which is the usual case), e.g.:
5912 i = 6; do {;} while (i++ < 9) .
5913 Therefore, we bias the initial value by subtracting the amount of
5914 the increment that occurs between the giv set and the giv test. */
5915 for (biv_inc = bl->biv; biv_inc; biv_inc = biv_inc->next_iv)
5917 if (loop_insn_first_p (v->insn, biv_inc->insn))
5919 if (REG_P (biv_inc->add_val))
5921 if (loop_dump_stream)
5922 fprintf (loop_dump_stream,
5923 "Loop iterations: Basic induction var add_val is REG %d.\n",
5924 REGNO (biv_inc->add_val));
5925 return 0;
5928 /* If we have already counted it, skip it. */
5929 if (biv_inc->same)
5930 continue;
5932 offset -= INTVAL (biv_inc->add_val);
5936 if (loop_dump_stream)
5937 fprintf (loop_dump_stream,
5938 "Loop iterations: Giv iterator, initial value bias %ld.\n",
5939 (long) offset);
5941 /* Initial value is mult_val times the biv's initial value plus
5942 add_val. Only useful if it is a constant. */
5943 biv_initial_value = extend_value_for_giv (v, bl->initial_value);
5944 initial_value
5945 = fold_rtx_mult_add (v->mult_val,
5946 plus_constant (biv_initial_value, offset),
5947 v->add_val, v->mode);
5949 else
5951 if (loop_dump_stream)
5952 fprintf (loop_dump_stream,
5953 "Loop iterations: Not basic or general induction var.\n");
5954 return 0;
5957 if (initial_value == 0)
5958 return 0;
5960 unsigned_p = 0;
5961 off_by_one = 0;
5962 switch (comparison_code)
5964 case LEU:
5965 unsigned_p = 1;
5966 case LE:
5967 compare_dir = 1;
5968 off_by_one = 1;
5969 break;
5970 case GEU:
5971 unsigned_p = 1;
5972 case GE:
5973 compare_dir = -1;
5974 off_by_one = -1;
5975 break;
5976 case EQ:
5977 /* Cannot determine loop iterations with this case. */
5978 compare_dir = 0;
5979 break;
5980 case LTU:
5981 unsigned_p = 1;
5982 case LT:
5983 compare_dir = 1;
5984 break;
5985 case GTU:
5986 unsigned_p = 1;
5987 case GT:
5988 compare_dir = -1;
5989 break;
5990 case NE:
5991 compare_dir = 0;
5992 break;
5993 default:
5994 abort ();
5997 /* If the comparison value is an invariant register, then try to find
5998 its value from the insns before the start of the loop. */
6000 final_value = comparison_value;
6001 if (REG_P (comparison_value)
6002 && loop_invariant_p (loop, comparison_value))
6004 final_value = loop_find_equiv_value (loop, comparison_value);
6006 /* If we don't get an invariant final value, we are better
6007 off with the original register. */
6008 if (! loop_invariant_p (loop, final_value))
6009 final_value = comparison_value;
6012 /* Calculate the approximate final value of the induction variable
6013 (on the last successful iteration). The exact final value
6014 depends on the branch operator, and increment sign. It will be
6015 wrong if the iteration variable is not incremented by one each
6016 time through the loop and (comparison_value + off_by_one -
6017 initial_value) % increment != 0.
6018 ??? Note that the final_value may overflow and thus final_larger
6019 will be bogus. A potentially infinite loop will be classified
6020 as immediate, e.g. for (i = 0x7ffffff0; i <= 0x7fffffff; i++) */
6021 if (off_by_one)
6022 final_value = plus_constant (final_value, off_by_one);
6024 /* Save the calculated values describing this loop's bounds, in case
6025 precondition_loop_p will need them later. These values can not be
6026 recalculated inside precondition_loop_p because strength reduction
6027 optimizations may obscure the loop's structure.
6029 These values are only required by precondition_loop_p and insert_bct
6030 whenever the number of iterations cannot be computed at compile time.
6031 Only the difference between final_value and initial_value is
6032 important. Note that final_value is only approximate. */
6033 loop_info->initial_value = initial_value;
6034 loop_info->comparison_value = comparison_value;
6035 loop_info->final_value = plus_constant (comparison_value, off_by_one);
6036 loop_info->increment = increment;
6037 loop_info->iteration_var = iteration_var;
6038 loop_info->comparison_code = comparison_code;
6039 loop_info->iv = bl;
6041 /* Try to determine the iteration count for loops such
6042 as (for i = init; i < init + const; i++). When running the
6043 loop optimization twice, the first pass often converts simple
6044 loops into this form. */
6046 if (REG_P (initial_value))
6048 rtx reg1;
6049 rtx reg2;
6050 rtx const2;
6052 reg1 = initial_value;
6053 if (GET_CODE (final_value) == PLUS)
6054 reg2 = XEXP (final_value, 0), const2 = XEXP (final_value, 1);
6055 else
6056 reg2 = final_value, const2 = const0_rtx;
6058 /* Check for initial_value = reg1, final_value = reg2 + const2,
6059 where reg1 != reg2. */
6060 if (REG_P (reg2) && reg2 != reg1)
6062 rtx temp;
6064 /* Find what reg1 is equivalent to. Hopefully it will
6065 either be reg2 or reg2 plus a constant. */
6066 temp = loop_find_equiv_value (loop, reg1);
6068 if (find_common_reg_term (temp, reg2))
6069 initial_value = temp;
6070 else if (loop_invariant_p (loop, reg2))
6072 /* Find what reg2 is equivalent to. Hopefully it will
6073 either be reg1 or reg1 plus a constant. Let's ignore
6074 the latter case for now since it is not so common. */
6075 temp = loop_find_equiv_value (loop, reg2);
6077 if (temp == loop_info->iteration_var)
6078 temp = initial_value;
6079 if (temp == reg1)
6080 final_value = (const2 == const0_rtx)
6081 ? reg1 : gen_rtx_PLUS (GET_MODE (reg1), reg1, const2);
6086 loop_info->initial_equiv_value = initial_value;
6087 loop_info->final_equiv_value = final_value;
6089 /* For EQ comparison loops, we don't have a valid final value.
6090 Check this now so that we won't leave an invalid value if we
6091 return early for any other reason. */
6092 if (comparison_code == EQ)
6093 loop_info->final_equiv_value = loop_info->final_value = 0;
6095 if (increment == 0)
6097 if (loop_dump_stream)
6098 fprintf (loop_dump_stream,
6099 "Loop iterations: Increment value can't be calculated.\n");
6100 return 0;
6103 if (GET_CODE (increment) != CONST_INT)
6105 /* If we have a REG, check to see if REG holds a constant value. */
6106 /* ??? Other RTL, such as (neg (reg)) is possible here, but it isn't
6107 clear if it is worthwhile to try to handle such RTL. */
6108 if (REG_P (increment) || GET_CODE (increment) == SUBREG)
6109 increment = loop_find_equiv_value (loop, increment);
6111 if (GET_CODE (increment) != CONST_INT)
6113 if (loop_dump_stream)
6115 fprintf (loop_dump_stream,
6116 "Loop iterations: Increment value not constant ");
6117 print_simple_rtl (loop_dump_stream, increment);
6118 fprintf (loop_dump_stream, ".\n");
6120 return 0;
6122 loop_info->increment = increment;
6125 if (GET_CODE (initial_value) != CONST_INT)
6127 if (loop_dump_stream)
6129 fprintf (loop_dump_stream,
6130 "Loop iterations: Initial value not constant ");
6131 print_simple_rtl (loop_dump_stream, initial_value);
6132 fprintf (loop_dump_stream, ".\n");
6134 return 0;
6136 else if (GET_CODE (final_value) != CONST_INT)
6138 if (loop_dump_stream)
6140 fprintf (loop_dump_stream,
6141 "Loop iterations: Final value not constant ");
6142 print_simple_rtl (loop_dump_stream, final_value);
6143 fprintf (loop_dump_stream, ".\n");
6145 return 0;
6147 else if (comparison_code == EQ)
6149 rtx inc_once;
6151 if (loop_dump_stream)
6152 fprintf (loop_dump_stream, "Loop iterations: EQ comparison loop.\n");
6154 inc_once = gen_int_mode (INTVAL (initial_value) + INTVAL (increment),
6155 GET_MODE (iteration_var));
6157 if (inc_once == final_value)
6159 /* The iterator value once through the loop is equal to the
6160 comparison value. Either we have an infinite loop, or
6161 we'll loop twice. */
6162 if (increment == const0_rtx)
6163 return 0;
6164 loop_info->n_iterations = 2;
6166 else
6167 loop_info->n_iterations = 1;
6169 if (GET_CODE (loop_info->initial_value) == CONST_INT)
6170 loop_info->final_value
6171 = gen_int_mode ((INTVAL (loop_info->initial_value)
6172 + loop_info->n_iterations * INTVAL (increment)),
6173 GET_MODE (iteration_var));
6174 else
6175 loop_info->final_value
6176 = plus_constant (loop_info->initial_value,
6177 loop_info->n_iterations * INTVAL (increment));
6178 loop_info->final_equiv_value
6179 = gen_int_mode ((INTVAL (initial_value)
6180 + loop_info->n_iterations * INTVAL (increment)),
6181 GET_MODE (iteration_var));
6182 return loop_info->n_iterations;
6185 /* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
6186 if (unsigned_p)
6187 final_larger
6188 = ((unsigned HOST_WIDE_INT) INTVAL (final_value)
6189 > (unsigned HOST_WIDE_INT) INTVAL (initial_value))
6190 - ((unsigned HOST_WIDE_INT) INTVAL (final_value)
6191 < (unsigned HOST_WIDE_INT) INTVAL (initial_value));
6192 else
6193 final_larger = (INTVAL (final_value) > INTVAL (initial_value))
6194 - (INTVAL (final_value) < INTVAL (initial_value));
6196 if (INTVAL (increment) > 0)
6197 increment_dir = 1;
6198 else if (INTVAL (increment) == 0)
6199 increment_dir = 0;
6200 else
6201 increment_dir = -1;
6203 /* There are 27 different cases: compare_dir = -1, 0, 1;
6204 final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
6205 There are 4 normal cases, 4 reverse cases (where the iteration variable
6206 will overflow before the loop exits), 4 infinite loop cases, and 15
6207 immediate exit (0 or 1 iteration depending on loop type) cases.
6208 Only try to optimize the normal cases. */
6210 /* (compare_dir/final_larger/increment_dir)
6211 Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
6212 Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
6213 Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
6214 Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
6216 /* ?? If the meaning of reverse loops (where the iteration variable
6217 will overflow before the loop exits) is undefined, then could
6218 eliminate all of these special checks, and just always assume
6219 the loops are normal/immediate/infinite. Note that this means
6220 the sign of increment_dir does not have to be known. Also,
6221 since it does not really hurt if immediate exit loops or infinite loops
6222 are optimized, then that case could be ignored also, and hence all
6223 loops can be optimized.
6225 According to ANSI Spec, the reverse loop case result is undefined,
6226 because the action on overflow is undefined.
6228 See also the special test for NE loops below. */
6230 if (final_larger == increment_dir && final_larger != 0
6231 && (final_larger == compare_dir || compare_dir == 0))
6232 /* Normal case. */
6234 else
6236 if (loop_dump_stream)
6237 fprintf (loop_dump_stream, "Loop iterations: Not normal loop.\n");
6238 return 0;
6241 /* Calculate the number of iterations, final_value is only an approximation,
6242 so correct for that. Note that abs_diff and n_iterations are
6243 unsigned, because they can be as large as 2^n - 1. */
6245 inc = INTVAL (increment);
6246 if (inc > 0)
6248 abs_diff = INTVAL (final_value) - INTVAL (initial_value);
6249 abs_inc = inc;
6251 else if (inc < 0)
6253 abs_diff = INTVAL (initial_value) - INTVAL (final_value);
6254 abs_inc = -inc;
6256 else
6257 abort ();
6259 /* Given that iteration_var is going to iterate over its own mode,
6260 not HOST_WIDE_INT, disregard higher bits that might have come
6261 into the picture due to sign extension of initial and final
6262 values. */
6263 abs_diff &= ((unsigned HOST_WIDE_INT) 1
6264 << (GET_MODE_BITSIZE (GET_MODE (iteration_var)) - 1)
6265 << 1) - 1;
6267 /* For NE tests, make sure that the iteration variable won't miss
6268 the final value. If abs_diff mod abs_incr is not zero, then the
6269 iteration variable will overflow before the loop exits, and we
6270 can not calculate the number of iterations. */
6271 if (compare_dir == 0 && (abs_diff % abs_inc) != 0)
6272 return 0;
6274 /* Note that the number of iterations could be calculated using
6275 (abs_diff + abs_inc - 1) / abs_inc, provided care was taken to
6276 handle potential overflow of the summation. */
6277 loop_info->n_iterations = abs_diff / abs_inc + ((abs_diff % abs_inc) != 0);
6278 return loop_info->n_iterations;
6281 /* Perform strength reduction and induction variable elimination.
6283 Pseudo registers created during this function will be beyond the
6284 last valid index in several tables including
6285 REGS->ARRAY[I].N_TIMES_SET and REGNO_LAST_UID. This does not cause a
6286 problem here, because the added registers cannot be givs outside of
6287 their loop, and hence will never be reconsidered. But scan_loop
6288 must check regnos to make sure they are in bounds. */
6290 static void
6291 strength_reduce (struct loop *loop, int flags)
6293 struct loop_info *loop_info = LOOP_INFO (loop);
6294 struct loop_regs *regs = LOOP_REGS (loop);
6295 struct loop_ivs *ivs = LOOP_IVS (loop);
6296 rtx p;
6297 /* Temporary list pointer for traversing ivs->list. */
6298 struct iv_class *bl;
6299 /* Ratio of extra register life span we can justify
6300 for saving an instruction. More if loop doesn't call subroutines
6301 since in that case saving an insn makes more difference
6302 and more registers are available. */
6303 /* ??? could set this to last value of threshold in move_movables */
6304 int threshold = (loop_info->has_call ? 1 : 2) * (3 + n_non_fixed_regs);
6305 /* Map of pseudo-register replacements. */
6306 rtx *reg_map = NULL;
6307 int reg_map_size;
6308 rtx test_reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
6309 int insn_count = count_insns_in_loop (loop);
6311 addr_placeholder = gen_reg_rtx (Pmode);
6313 ivs->n_regs = max_reg_before_loop;
6314 ivs->regs = xcalloc (ivs->n_regs, sizeof (struct iv));
6316 /* Find all BIVs in loop. */
6317 loop_bivs_find (loop);
6319 /* Exit if there are no bivs. */
6320 if (! ivs->list)
6322 loop_ivs_free (loop);
6323 return;
6326 /* Determine how BIVS are initialized by looking through pre-header
6327 extended basic block. */
6328 loop_bivs_init_find (loop);
6330 /* Look at the each biv and see if we can say anything better about its
6331 initial value from any initializing insns set up above. */
6332 loop_bivs_check (loop);
6334 /* Search the loop for general induction variables. */
6335 loop_givs_find (loop);
6337 /* Try to calculate and save the number of loop iterations. This is
6338 set to zero if the actual number can not be calculated. This must
6339 be called after all giv's have been identified, since otherwise it may
6340 fail if the iteration variable is a giv. */
6341 loop_iterations (loop);
6343 #ifdef HAVE_prefetch
6344 if (flags & LOOP_PREFETCH)
6345 emit_prefetch_instructions (loop);
6346 #endif
6348 /* Now for each giv for which we still don't know whether or not it is
6349 replaceable, check to see if it is replaceable because its final value
6350 can be calculated. This must be done after loop_iterations is called,
6351 so that final_giv_value will work correctly. */
6352 loop_givs_check (loop);
6354 /* Try to prove that the loop counter variable (if any) is always
6355 nonnegative; if so, record that fact with a REG_NONNEG note
6356 so that "decrement and branch until zero" insn can be used. */
6357 check_dbra_loop (loop, insn_count);
6359 /* Create reg_map to hold substitutions for replaceable giv regs.
6360 Some givs might have been made from biv increments, so look at
6361 ivs->reg_iv_type for a suitable size. */
6362 reg_map_size = ivs->n_regs;
6363 reg_map = xcalloc (reg_map_size, sizeof (rtx));
6365 /* Examine each iv class for feasibility of strength reduction/induction
6366 variable elimination. */
6368 for (bl = ivs->list; bl; bl = bl->next)
6370 struct induction *v;
6371 int benefit;
6373 /* Test whether it will be possible to eliminate this biv
6374 provided all givs are reduced. */
6375 bl->eliminable = loop_biv_eliminable_p (loop, bl, threshold, insn_count);
6377 /* This will be true at the end, if all givs which depend on this
6378 biv have been strength reduced.
6379 We can't (currently) eliminate the biv unless this is so. */
6380 bl->all_reduced = 1;
6382 /* Check each extension dependent giv in this class to see if its
6383 root biv is safe from wrapping in the interior mode. */
6384 check_ext_dependent_givs (loop, bl);
6386 /* Combine all giv's for this iv_class. */
6387 combine_givs (regs, bl);
6389 for (v = bl->giv; v; v = v->next_iv)
6391 struct induction *tv;
6393 if (v->ignore || v->same)
6394 continue;
6396 benefit = loop_giv_reduce_benefit (loop, bl, v, test_reg);
6398 /* If an insn is not to be strength reduced, then set its ignore
6399 flag, and clear bl->all_reduced. */
6401 /* A giv that depends on a reversed biv must be reduced if it is
6402 used after the loop exit, otherwise, it would have the wrong
6403 value after the loop exit. To make it simple, just reduce all
6404 of such giv's whether or not we know they are used after the loop
6405 exit. */
6407 if (v->lifetime * threshold * benefit < insn_count
6408 && ! bl->reversed)
6410 if (loop_dump_stream)
6411 fprintf (loop_dump_stream,
6412 "giv of insn %d not worth while, %d vs %d.\n",
6413 INSN_UID (v->insn),
6414 v->lifetime * threshold * benefit, insn_count);
6415 v->ignore = 1;
6416 bl->all_reduced = 0;
6418 else
6420 /* Check that we can increment the reduced giv without a
6421 multiply insn. If not, reject it. */
6423 for (tv = bl->biv; tv; tv = tv->next_iv)
6424 if (tv->mult_val == const1_rtx
6425 && ! product_cheap_p (tv->add_val, v->mult_val))
6427 if (loop_dump_stream)
6428 fprintf (loop_dump_stream,
6429 "giv of insn %d: would need a multiply.\n",
6430 INSN_UID (v->insn));
6431 v->ignore = 1;
6432 bl->all_reduced = 0;
6433 break;
6438 /* Check for givs whose first use is their definition and whose
6439 last use is the definition of another giv. If so, it is likely
6440 dead and should not be used to derive another giv nor to
6441 eliminate a biv. */
6442 loop_givs_dead_check (loop, bl);
6444 /* Reduce each giv that we decided to reduce. */
6445 loop_givs_reduce (loop, bl);
6447 /* Rescan all givs. If a giv is the same as a giv not reduced, mark it
6448 as not reduced.
6450 For each giv register that can be reduced now: if replaceable,
6451 substitute reduced reg wherever the old giv occurs;
6452 else add new move insn "giv_reg = reduced_reg". */
6453 loop_givs_rescan (loop, bl, reg_map);
6455 /* All the givs based on the biv bl have been reduced if they
6456 merit it. */
6458 /* For each giv not marked as maybe dead that has been combined with a
6459 second giv, clear any "maybe dead" mark on that second giv.
6460 v->new_reg will either be or refer to the register of the giv it
6461 combined with.
6463 Doing this clearing avoids problems in biv elimination where
6464 a giv's new_reg is a complex value that can't be put in the
6465 insn but the giv combined with (with a reg as new_reg) is
6466 marked maybe_dead. Since the register will be used in either
6467 case, we'd prefer it be used from the simpler giv. */
6469 for (v = bl->giv; v; v = v->next_iv)
6470 if (! v->maybe_dead && v->same)
6471 v->same->maybe_dead = 0;
6473 /* Try to eliminate the biv, if it is a candidate.
6474 This won't work if ! bl->all_reduced,
6475 since the givs we planned to use might not have been reduced.
6477 We have to be careful that we didn't initially think we could
6478 eliminate this biv because of a giv that we now think may be
6479 dead and shouldn't be used as a biv replacement.
6481 Also, there is the possibility that we may have a giv that looks
6482 like it can be used to eliminate a biv, but the resulting insn
6483 isn't valid. This can happen, for example, on the 88k, where a
6484 JUMP_INSN can compare a register only with zero. Attempts to
6485 replace it with a compare with a constant will fail.
6487 Note that in cases where this call fails, we may have replaced some
6488 of the occurrences of the biv with a giv, but no harm was done in
6489 doing so in the rare cases where it can occur. */
6491 if (bl->all_reduced == 1 && bl->eliminable
6492 && maybe_eliminate_biv (loop, bl, 1, threshold, insn_count))
6494 /* ?? If we created a new test to bypass the loop entirely,
6495 or otherwise drop straight in, based on this test, then
6496 we might want to rewrite it also. This way some later
6497 pass has more hope of removing the initialization of this
6498 biv entirely. */
6500 /* If final_value != 0, then the biv may be used after loop end
6501 and we must emit an insn to set it just in case.
6503 Reversed bivs already have an insn after the loop setting their
6504 value, so we don't need another one. We can't calculate the
6505 proper final value for such a biv here anyways. */
6506 if (bl->final_value && ! bl->reversed)
6507 loop_insn_sink_or_swim (loop,
6508 gen_load_of_final_value (bl->biv->dest_reg,
6509 bl->final_value));
6511 if (loop_dump_stream)
6512 fprintf (loop_dump_stream, "Reg %d: biv eliminated\n",
6513 bl->regno);
6515 /* See above note wrt final_value. But since we couldn't eliminate
6516 the biv, we must set the value after the loop instead of before. */
6517 else if (bl->final_value && ! bl->reversed)
6518 loop_insn_sink (loop, gen_load_of_final_value (bl->biv->dest_reg,
6519 bl->final_value));
6522 /* Go through all the instructions in the loop, making all the
6523 register substitutions scheduled in REG_MAP. */
6525 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
6526 if (INSN_P (p))
6528 replace_regs (PATTERN (p), reg_map, reg_map_size, 0);
6529 replace_regs (REG_NOTES (p), reg_map, reg_map_size, 0);
6530 INSN_CODE (p) = -1;
6533 if (loop_dump_stream)
6534 fprintf (loop_dump_stream, "\n");
6536 loop_ivs_free (loop);
6537 if (reg_map)
6538 free (reg_map);
6541 /*Record all basic induction variables calculated in the insn. */
6542 static rtx
6543 check_insn_for_bivs (struct loop *loop, rtx p, int not_every_iteration,
6544 int maybe_multiple)
6546 struct loop_ivs *ivs = LOOP_IVS (loop);
6547 rtx set;
6548 rtx dest_reg;
6549 rtx inc_val;
6550 rtx mult_val;
6551 rtx *location;
6553 if (NONJUMP_INSN_P (p)
6554 && (set = single_set (p))
6555 && REG_P (SET_DEST (set)))
6557 dest_reg = SET_DEST (set);
6558 if (REGNO (dest_reg) < max_reg_before_loop
6559 && REGNO (dest_reg) >= FIRST_PSEUDO_REGISTER
6560 && REG_IV_TYPE (ivs, REGNO (dest_reg)) != NOT_BASIC_INDUCT)
6562 if (basic_induction_var (loop, SET_SRC (set),
6563 GET_MODE (SET_SRC (set)),
6564 dest_reg, p, &inc_val, &mult_val,
6565 &location))
6567 /* It is a possible basic induction variable.
6568 Create and initialize an induction structure for it. */
6570 struct induction *v = xmalloc (sizeof (struct induction));
6572 record_biv (loop, v, p, dest_reg, inc_val, mult_val, location,
6573 not_every_iteration, maybe_multiple);
6574 REG_IV_TYPE (ivs, REGNO (dest_reg)) = BASIC_INDUCT;
6576 else if (REGNO (dest_reg) < ivs->n_regs)
6577 REG_IV_TYPE (ivs, REGNO (dest_reg)) = NOT_BASIC_INDUCT;
6580 return p;
6583 /* Record all givs calculated in the insn.
6584 A register is a giv if: it is only set once, it is a function of a
6585 biv and a constant (or invariant), and it is not a biv. */
6586 static rtx
6587 check_insn_for_givs (struct loop *loop, rtx p, int not_every_iteration,
6588 int maybe_multiple)
6590 struct loop_regs *regs = LOOP_REGS (loop);
6592 rtx set;
6593 /* Look for a general induction variable in a register. */
6594 if (NONJUMP_INSN_P (p)
6595 && (set = single_set (p))
6596 && REG_P (SET_DEST (set))
6597 && ! regs->array[REGNO (SET_DEST (set))].may_not_optimize)
6599 rtx src_reg;
6600 rtx dest_reg;
6601 rtx add_val;
6602 rtx mult_val;
6603 rtx ext_val;
6604 int benefit;
6605 rtx regnote = 0;
6606 rtx last_consec_insn;
6608 dest_reg = SET_DEST (set);
6609 if (REGNO (dest_reg) < FIRST_PSEUDO_REGISTER)
6610 return p;
6612 if (/* SET_SRC is a giv. */
6613 (general_induction_var (loop, SET_SRC (set), &src_reg, &add_val,
6614 &mult_val, &ext_val, 0, &benefit, VOIDmode)
6615 /* Equivalent expression is a giv. */
6616 || ((regnote = find_reg_note (p, REG_EQUAL, NULL_RTX))
6617 && general_induction_var (loop, XEXP (regnote, 0), &src_reg,
6618 &add_val, &mult_val, &ext_val, 0,
6619 &benefit, VOIDmode)))
6620 /* Don't try to handle any regs made by loop optimization.
6621 We have nothing on them in regno_first_uid, etc. */
6622 && REGNO (dest_reg) < max_reg_before_loop
6623 /* Don't recognize a BASIC_INDUCT_VAR here. */
6624 && dest_reg != src_reg
6625 /* This must be the only place where the register is set. */
6626 && (regs->array[REGNO (dest_reg)].n_times_set == 1
6627 /* or all sets must be consecutive and make a giv. */
6628 || (benefit = consec_sets_giv (loop, benefit, p,
6629 src_reg, dest_reg,
6630 &add_val, &mult_val, &ext_val,
6631 &last_consec_insn))))
6633 struct induction *v = xmalloc (sizeof (struct induction));
6635 /* If this is a library call, increase benefit. */
6636 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
6637 benefit += libcall_benefit (p);
6639 /* Skip the consecutive insns, if there are any. */
6640 if (regs->array[REGNO (dest_reg)].n_times_set != 1)
6641 p = last_consec_insn;
6643 record_giv (loop, v, p, src_reg, dest_reg, mult_val, add_val,
6644 ext_val, benefit, DEST_REG, not_every_iteration,
6645 maybe_multiple, (rtx*) 0);
6650 /* Look for givs which are memory addresses. */
6651 if (NONJUMP_INSN_P (p))
6652 find_mem_givs (loop, PATTERN (p), p, not_every_iteration,
6653 maybe_multiple);
6655 /* Update the status of whether giv can derive other givs. This can
6656 change when we pass a label or an insn that updates a biv. */
6657 if (INSN_P (p) || LABEL_P (p))
6658 update_giv_derive (loop, p);
6659 return p;
6662 /* Return 1 if X is a valid source for an initial value (or as value being
6663 compared against in an initial test).
6665 X must be either a register or constant and must not be clobbered between
6666 the current insn and the start of the loop.
6668 INSN is the insn containing X. */
6670 static int
6671 valid_initial_value_p (rtx x, rtx insn, int call_seen, rtx loop_start)
6673 if (CONSTANT_P (x))
6674 return 1;
6676 /* Only consider pseudos we know about initialized in insns whose luids
6677 we know. */
6678 if (!REG_P (x)
6679 || REGNO (x) >= max_reg_before_loop)
6680 return 0;
6682 /* Don't use call-clobbered registers across a call which clobbers it. On
6683 some machines, don't use any hard registers at all. */
6684 if (REGNO (x) < FIRST_PSEUDO_REGISTER
6685 && (SMALL_REGISTER_CLASSES
6686 || (call_used_regs[REGNO (x)] && call_seen)))
6687 return 0;
6689 /* Don't use registers that have been clobbered before the start of the
6690 loop. */
6691 if (reg_set_between_p (x, insn, loop_start))
6692 return 0;
6694 return 1;
6697 /* Scan X for memory refs and check each memory address
6698 as a possible giv. INSN is the insn whose pattern X comes from.
6699 NOT_EVERY_ITERATION is 1 if the insn might not be executed during
6700 every loop iteration. MAYBE_MULTIPLE is 1 if the insn might be executed
6701 more than once in each loop iteration. */
6703 static void
6704 find_mem_givs (const struct loop *loop, rtx x, rtx insn,
6705 int not_every_iteration, int maybe_multiple)
6707 int i, j;
6708 enum rtx_code code;
6709 const char *fmt;
6711 if (x == 0)
6712 return;
6714 code = GET_CODE (x);
6715 switch (code)
6717 case REG:
6718 case CONST_INT:
6719 case CONST:
6720 case CONST_DOUBLE:
6721 case SYMBOL_REF:
6722 case LABEL_REF:
6723 case PC:
6724 case CC0:
6725 case ADDR_VEC:
6726 case ADDR_DIFF_VEC:
6727 case USE:
6728 case CLOBBER:
6729 return;
6731 case MEM:
6733 rtx src_reg;
6734 rtx add_val;
6735 rtx mult_val;
6736 rtx ext_val;
6737 int benefit;
6739 /* This code used to disable creating GIVs with mult_val == 1 and
6740 add_val == 0. However, this leads to lost optimizations when
6741 it comes time to combine a set of related DEST_ADDR GIVs, since
6742 this one would not be seen. */
6744 if (general_induction_var (loop, XEXP (x, 0), &src_reg, &add_val,
6745 &mult_val, &ext_val, 1, &benefit,
6746 GET_MODE (x)))
6748 /* Found one; record it. */
6749 struct induction *v = xmalloc (sizeof (struct induction));
6751 record_giv (loop, v, insn, src_reg, addr_placeholder, mult_val,
6752 add_val, ext_val, benefit, DEST_ADDR,
6753 not_every_iteration, maybe_multiple, &XEXP (x, 0));
6755 v->mem = x;
6758 return;
6760 default:
6761 break;
6764 /* Recursively scan the subexpressions for other mem refs. */
6766 fmt = GET_RTX_FORMAT (code);
6767 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
6768 if (fmt[i] == 'e')
6769 find_mem_givs (loop, XEXP (x, i), insn, not_every_iteration,
6770 maybe_multiple);
6771 else if (fmt[i] == 'E')
6772 for (j = 0; j < XVECLEN (x, i); j++)
6773 find_mem_givs (loop, XVECEXP (x, i, j), insn, not_every_iteration,
6774 maybe_multiple);
6777 /* Fill in the data about one biv update.
6778 V is the `struct induction' in which we record the biv. (It is
6779 allocated by the caller, with alloca.)
6780 INSN is the insn that sets it.
6781 DEST_REG is the biv's reg.
6783 MULT_VAL is const1_rtx if the biv is being incremented here, in which case
6784 INC_VAL is the increment. Otherwise, MULT_VAL is const0_rtx and the biv is
6785 being set to INC_VAL.
6787 NOT_EVERY_ITERATION is nonzero if this biv update is not know to be
6788 executed every iteration; MAYBE_MULTIPLE is nonzero if this biv update
6789 can be executed more than once per iteration. If MAYBE_MULTIPLE
6790 and NOT_EVERY_ITERATION are both zero, we know that the biv update is
6791 executed exactly once per iteration. */
6793 static void
6794 record_biv (struct loop *loop, struct induction *v, rtx insn, rtx dest_reg,
6795 rtx inc_val, rtx mult_val, rtx *location,
6796 int not_every_iteration, int maybe_multiple)
6798 struct loop_ivs *ivs = LOOP_IVS (loop);
6799 struct iv_class *bl;
6801 v->insn = insn;
6802 v->src_reg = dest_reg;
6803 v->dest_reg = dest_reg;
6804 v->mult_val = mult_val;
6805 v->add_val = inc_val;
6806 v->ext_dependent = NULL_RTX;
6807 v->location = location;
6808 v->mode = GET_MODE (dest_reg);
6809 v->always_computable = ! not_every_iteration;
6810 v->always_executed = ! not_every_iteration;
6811 v->maybe_multiple = maybe_multiple;
6812 v->same = 0;
6814 /* Add this to the reg's iv_class, creating a class
6815 if this is the first incrementation of the reg. */
6817 bl = REG_IV_CLASS (ivs, REGNO (dest_reg));
6818 if (bl == 0)
6820 /* Create and initialize new iv_class. */
6822 bl = xmalloc (sizeof (struct iv_class));
6824 bl->regno = REGNO (dest_reg);
6825 bl->biv = 0;
6826 bl->giv = 0;
6827 bl->biv_count = 0;
6828 bl->giv_count = 0;
6830 /* Set initial value to the reg itself. */
6831 bl->initial_value = dest_reg;
6832 bl->final_value = 0;
6833 /* We haven't seen the initializing insn yet. */
6834 bl->init_insn = 0;
6835 bl->init_set = 0;
6836 bl->initial_test = 0;
6837 bl->incremented = 0;
6838 bl->eliminable = 0;
6839 bl->nonneg = 0;
6840 bl->reversed = 0;
6841 bl->total_benefit = 0;
6843 /* Add this class to ivs->list. */
6844 bl->next = ivs->list;
6845 ivs->list = bl;
6847 /* Put it in the array of biv register classes. */
6848 REG_IV_CLASS (ivs, REGNO (dest_reg)) = bl;
6850 else
6852 /* Check if location is the same as a previous one. */
6853 struct induction *induction;
6854 for (induction = bl->biv; induction; induction = induction->next_iv)
6855 if (location == induction->location)
6857 v->same = induction;
6858 break;
6862 /* Update IV_CLASS entry for this biv. */
6863 v->next_iv = bl->biv;
6864 bl->biv = v;
6865 bl->biv_count++;
6866 if (mult_val == const1_rtx)
6867 bl->incremented = 1;
6869 if (loop_dump_stream)
6870 loop_biv_dump (v, loop_dump_stream, 0);
6873 /* Fill in the data about one giv.
6874 V is the `struct induction' in which we record the giv. (It is
6875 allocated by the caller, with alloca.)
6876 INSN is the insn that sets it.
6877 BENEFIT estimates the savings from deleting this insn.
6878 TYPE is DEST_REG or DEST_ADDR; it says whether the giv is computed
6879 into a register or is used as a memory address.
6881 SRC_REG is the biv reg which the giv is computed from.
6882 DEST_REG is the giv's reg (if the giv is stored in a reg).
6883 MULT_VAL and ADD_VAL are the coefficients used to compute the giv.
6884 LOCATION points to the place where this giv's value appears in INSN. */
6886 static void
6887 record_giv (const struct loop *loop, struct induction *v, rtx insn,
6888 rtx src_reg, rtx dest_reg, rtx mult_val, rtx add_val,
6889 rtx ext_val, int benefit, enum g_types type,
6890 int not_every_iteration, int maybe_multiple, rtx *location)
6892 struct loop_ivs *ivs = LOOP_IVS (loop);
6893 struct induction *b;
6894 struct iv_class *bl;
6895 rtx set = single_set (insn);
6896 rtx temp;
6898 /* Attempt to prove constantness of the values. Don't let simplify_rtx
6899 undo the MULT canonicalization that we performed earlier. */
6900 temp = simplify_rtx (add_val);
6901 if (temp
6902 && ! (GET_CODE (add_val) == MULT
6903 && GET_CODE (temp) == ASHIFT))
6904 add_val = temp;
6906 v->insn = insn;
6907 v->src_reg = src_reg;
6908 v->giv_type = type;
6909 v->dest_reg = dest_reg;
6910 v->mult_val = mult_val;
6911 v->add_val = add_val;
6912 v->ext_dependent = ext_val;
6913 v->benefit = benefit;
6914 v->location = location;
6915 v->cant_derive = 0;
6916 v->combined_with = 0;
6917 v->maybe_multiple = maybe_multiple;
6918 v->maybe_dead = 0;
6919 v->derive_adjustment = 0;
6920 v->same = 0;
6921 v->ignore = 0;
6922 v->new_reg = 0;
6923 v->final_value = 0;
6924 v->same_insn = 0;
6925 v->auto_inc_opt = 0;
6926 v->shared = 0;
6928 /* The v->always_computable field is used in update_giv_derive, to
6929 determine whether a giv can be used to derive another giv. For a
6930 DEST_REG giv, INSN computes a new value for the giv, so its value
6931 isn't computable if INSN insn't executed every iteration.
6932 However, for a DEST_ADDR giv, INSN merely uses the value of the giv;
6933 it does not compute a new value. Hence the value is always computable
6934 regardless of whether INSN is executed each iteration. */
6936 if (type == DEST_ADDR)
6937 v->always_computable = 1;
6938 else
6939 v->always_computable = ! not_every_iteration;
6941 v->always_executed = ! not_every_iteration;
6943 if (type == DEST_ADDR)
6945 v->mode = GET_MODE (*location);
6946 v->lifetime = 1;
6948 else /* type == DEST_REG */
6950 v->mode = GET_MODE (SET_DEST (set));
6952 v->lifetime = LOOP_REG_LIFETIME (loop, REGNO (dest_reg));
6954 /* If the lifetime is zero, it means that this register is
6955 really a dead store. So mark this as a giv that can be
6956 ignored. This will not prevent the biv from being eliminated. */
6957 if (v->lifetime == 0)
6958 v->ignore = 1;
6960 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
6961 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
6964 /* Add the giv to the class of givs computed from one biv. */
6966 bl = REG_IV_CLASS (ivs, REGNO (src_reg));
6967 if (bl)
6969 v->next_iv = bl->giv;
6970 bl->giv = v;
6971 /* Don't count DEST_ADDR. This is supposed to count the number of
6972 insns that calculate givs. */
6973 if (type == DEST_REG)
6974 bl->giv_count++;
6975 bl->total_benefit += benefit;
6977 else
6978 /* Fatal error, biv missing for this giv? */
6979 abort ();
6981 if (type == DEST_ADDR)
6983 v->replaceable = 1;
6984 v->not_replaceable = 0;
6986 else
6988 /* The giv can be replaced outright by the reduced register only if all
6989 of the following conditions are true:
6990 - the insn that sets the giv is always executed on any iteration
6991 on which the giv is used at all
6992 (there are two ways to deduce this:
6993 either the insn is executed on every iteration,
6994 or all uses follow that insn in the same basic block),
6995 - the giv is not used outside the loop
6996 - no assignments to the biv occur during the giv's lifetime. */
6998 if (REGNO_FIRST_UID (REGNO (dest_reg)) == INSN_UID (insn)
6999 /* Previous line always fails if INSN was moved by loop opt. */
7000 && REGNO_LAST_LUID (REGNO (dest_reg))
7001 < INSN_LUID (loop->end)
7002 && (! not_every_iteration
7003 || last_use_this_basic_block (dest_reg, insn)))
7005 /* Now check that there are no assignments to the biv within the
7006 giv's lifetime. This requires two separate checks. */
7008 /* Check each biv update, and fail if any are between the first
7009 and last use of the giv.
7011 If this loop contains an inner loop that was unrolled, then
7012 the insn modifying the biv may have been emitted by the loop
7013 unrolling code, and hence does not have a valid luid. Just
7014 mark the biv as not replaceable in this case. It is not very
7015 useful as a biv, because it is used in two different loops.
7016 It is very unlikely that we would be able to optimize the giv
7017 using this biv anyways. */
7019 v->replaceable = 1;
7020 v->not_replaceable = 0;
7021 for (b = bl->biv; b; b = b->next_iv)
7023 if (INSN_UID (b->insn) >= max_uid_for_loop
7024 || ((INSN_LUID (b->insn)
7025 >= REGNO_FIRST_LUID (REGNO (dest_reg)))
7026 && (INSN_LUID (b->insn)
7027 <= REGNO_LAST_LUID (REGNO (dest_reg)))))
7029 v->replaceable = 0;
7030 v->not_replaceable = 1;
7031 break;
7035 /* If there are any backwards branches that go from after the
7036 biv update to before it, then this giv is not replaceable. */
7037 if (v->replaceable)
7038 for (b = bl->biv; b; b = b->next_iv)
7039 if (back_branch_in_range_p (loop, b->insn))
7041 v->replaceable = 0;
7042 v->not_replaceable = 1;
7043 break;
7046 else
7048 /* May still be replaceable, we don't have enough info here to
7049 decide. */
7050 v->replaceable = 0;
7051 v->not_replaceable = 0;
7055 /* Record whether the add_val contains a const_int, for later use by
7056 combine_givs. */
7058 rtx tem = add_val;
7060 v->no_const_addval = 1;
7061 if (tem == const0_rtx)
7063 else if (CONSTANT_P (add_val))
7064 v->no_const_addval = 0;
7065 if (GET_CODE (tem) == PLUS)
7067 while (1)
7069 if (GET_CODE (XEXP (tem, 0)) == PLUS)
7070 tem = XEXP (tem, 0);
7071 else if (GET_CODE (XEXP (tem, 1)) == PLUS)
7072 tem = XEXP (tem, 1);
7073 else
7074 break;
7076 if (CONSTANT_P (XEXP (tem, 1)))
7077 v->no_const_addval = 0;
7081 if (loop_dump_stream)
7082 loop_giv_dump (v, loop_dump_stream, 0);
7085 /* Try to calculate the final value of the giv, the value it will have at
7086 the end of the loop. If we can do it, return that value. */
7088 static rtx
7089 final_giv_value (const struct loop *loop, struct induction *v)
7091 struct loop_ivs *ivs = LOOP_IVS (loop);
7092 struct iv_class *bl;
7093 rtx insn;
7094 rtx increment, tem;
7095 rtx seq;
7096 rtx loop_end = loop->end;
7097 unsigned HOST_WIDE_INT n_iterations = LOOP_INFO (loop)->n_iterations;
7099 bl = REG_IV_CLASS (ivs, REGNO (v->src_reg));
7101 /* The final value for givs which depend on reversed bivs must be calculated
7102 differently than for ordinary givs. In this case, there is already an
7103 insn after the loop which sets this giv's final value (if necessary),
7104 and there are no other loop exits, so we can return any value. */
7105 if (bl->reversed)
7107 if (loop_dump_stream)
7108 fprintf (loop_dump_stream,
7109 "Final giv value for %d, depends on reversed biv\n",
7110 REGNO (v->dest_reg));
7111 return const0_rtx;
7114 /* Try to calculate the final value as a function of the biv it depends
7115 upon. The only exit from the loop must be the fall through at the bottom
7116 and the insn that sets the giv must be executed on every iteration
7117 (otherwise the giv may not have its final value when the loop exits). */
7119 /* ??? Can calculate the final giv value by subtracting off the
7120 extra biv increments times the giv's mult_val. The loop must have
7121 only one exit for this to work, but the loop iterations does not need
7122 to be known. */
7124 if (n_iterations != 0
7125 && ! loop->exit_count
7126 && v->always_executed)
7128 /* ?? It is tempting to use the biv's value here since these insns will
7129 be put after the loop, and hence the biv will have its final value
7130 then. However, this fails if the biv is subsequently eliminated.
7131 Perhaps determine whether biv's are eliminable before trying to
7132 determine whether giv's are replaceable so that we can use the
7133 biv value here if it is not eliminable. */
7135 /* We are emitting code after the end of the loop, so we must make
7136 sure that bl->initial_value is still valid then. It will still
7137 be valid if it is invariant. */
7139 increment = biv_total_increment (bl);
7141 if (increment && loop_invariant_p (loop, increment)
7142 && loop_invariant_p (loop, bl->initial_value))
7144 /* Can calculate the loop exit value of its biv as
7145 (n_iterations * increment) + initial_value */
7147 /* The loop exit value of the giv is then
7148 (final_biv_value - extra increments) * mult_val + add_val.
7149 The extra increments are any increments to the biv which
7150 occur in the loop after the giv's value is calculated.
7151 We must search from the insn that sets the giv to the end
7152 of the loop to calculate this value. */
7154 /* Put the final biv value in tem. */
7155 tem = gen_reg_rtx (v->mode);
7156 record_base_value (REGNO (tem), bl->biv->add_val, 0);
7157 loop_iv_add_mult_sink (loop, extend_value_for_giv (v, increment),
7158 GEN_INT (n_iterations),
7159 extend_value_for_giv (v, bl->initial_value),
7160 tem);
7162 /* Subtract off extra increments as we find them. */
7163 for (insn = NEXT_INSN (v->insn); insn != loop_end;
7164 insn = NEXT_INSN (insn))
7166 struct induction *biv;
7168 for (biv = bl->biv; biv; biv = biv->next_iv)
7169 if (biv->insn == insn)
7171 start_sequence ();
7172 tem = expand_simple_binop (GET_MODE (tem), MINUS, tem,
7173 biv->add_val, NULL_RTX, 0,
7174 OPTAB_LIB_WIDEN);
7175 seq = get_insns ();
7176 end_sequence ();
7177 loop_insn_sink (loop, seq);
7181 /* Now calculate the giv's final value. */
7182 loop_iv_add_mult_sink (loop, tem, v->mult_val, v->add_val, tem);
7184 if (loop_dump_stream)
7185 fprintf (loop_dump_stream,
7186 "Final giv value for %d, calc from biv's value.\n",
7187 REGNO (v->dest_reg));
7189 return tem;
7193 /* Replaceable giv's should never reach here. */
7194 if (v->replaceable)
7195 abort ();
7197 /* Check to see if the biv is dead at all loop exits. */
7198 if (reg_dead_after_loop (loop, v->dest_reg))
7200 if (loop_dump_stream)
7201 fprintf (loop_dump_stream,
7202 "Final giv value for %d, giv dead after loop exit.\n",
7203 REGNO (v->dest_reg));
7205 return const0_rtx;
7208 return 0;
7211 /* All this does is determine whether a giv can be made replaceable because
7212 its final value can be calculated. This code can not be part of record_giv
7213 above, because final_giv_value requires that the number of loop iterations
7214 be known, and that can not be accurately calculated until after all givs
7215 have been identified. */
7217 static void
7218 check_final_value (const struct loop *loop, struct induction *v)
7220 rtx final_value = 0;
7222 /* DEST_ADDR givs will never reach here, because they are always marked
7223 replaceable above in record_giv. */
7225 /* The giv can be replaced outright by the reduced register only if all
7226 of the following conditions are true:
7227 - the insn that sets the giv is always executed on any iteration
7228 on which the giv is used at all
7229 (there are two ways to deduce this:
7230 either the insn is executed on every iteration,
7231 or all uses follow that insn in the same basic block),
7232 - its final value can be calculated (this condition is different
7233 than the one above in record_giv)
7234 - it's not used before the it's set
7235 - no assignments to the biv occur during the giv's lifetime. */
7237 #if 0
7238 /* This is only called now when replaceable is known to be false. */
7239 /* Clear replaceable, so that it won't confuse final_giv_value. */
7240 v->replaceable = 0;
7241 #endif
7243 if ((final_value = final_giv_value (loop, v))
7244 && (v->always_executed
7245 || last_use_this_basic_block (v->dest_reg, v->insn)))
7247 int biv_increment_seen = 0, before_giv_insn = 0;
7248 rtx p = v->insn;
7249 rtx last_giv_use;
7251 v->replaceable = 1;
7252 v->not_replaceable = 0;
7254 /* When trying to determine whether or not a biv increment occurs
7255 during the lifetime of the giv, we can ignore uses of the variable
7256 outside the loop because final_value is true. Hence we can not
7257 use regno_last_uid and regno_first_uid as above in record_giv. */
7259 /* Search the loop to determine whether any assignments to the
7260 biv occur during the giv's lifetime. Start with the insn
7261 that sets the giv, and search around the loop until we come
7262 back to that insn again.
7264 Also fail if there is a jump within the giv's lifetime that jumps
7265 to somewhere outside the lifetime but still within the loop. This
7266 catches spaghetti code where the execution order is not linear, and
7267 hence the above test fails. Here we assume that the giv lifetime
7268 does not extend from one iteration of the loop to the next, so as
7269 to make the test easier. Since the lifetime isn't known yet,
7270 this requires two loops. See also record_giv above. */
7272 last_giv_use = v->insn;
7274 while (1)
7276 p = NEXT_INSN (p);
7277 if (p == loop->end)
7279 before_giv_insn = 1;
7280 p = NEXT_INSN (loop->start);
7282 if (p == v->insn)
7283 break;
7285 if (INSN_P (p))
7287 /* It is possible for the BIV increment to use the GIV if we
7288 have a cycle. Thus we must be sure to check each insn for
7289 both BIV and GIV uses, and we must check for BIV uses
7290 first. */
7292 if (! biv_increment_seen
7293 && reg_set_p (v->src_reg, PATTERN (p)))
7294 biv_increment_seen = 1;
7296 if (reg_mentioned_p (v->dest_reg, PATTERN (p)))
7298 if (biv_increment_seen || before_giv_insn)
7300 v->replaceable = 0;
7301 v->not_replaceable = 1;
7302 break;
7304 last_giv_use = p;
7309 /* Now that the lifetime of the giv is known, check for branches
7310 from within the lifetime to outside the lifetime if it is still
7311 replaceable. */
7313 if (v->replaceable)
7315 p = v->insn;
7316 while (1)
7318 p = NEXT_INSN (p);
7319 if (p == loop->end)
7320 p = NEXT_INSN (loop->start);
7321 if (p == last_giv_use)
7322 break;
7324 if (JUMP_P (p) && JUMP_LABEL (p)
7325 && LABEL_NAME (JUMP_LABEL (p))
7326 && ((loop_insn_first_p (JUMP_LABEL (p), v->insn)
7327 && loop_insn_first_p (loop->start, JUMP_LABEL (p)))
7328 || (loop_insn_first_p (last_giv_use, JUMP_LABEL (p))
7329 && loop_insn_first_p (JUMP_LABEL (p), loop->end))))
7331 v->replaceable = 0;
7332 v->not_replaceable = 1;
7334 if (loop_dump_stream)
7335 fprintf (loop_dump_stream,
7336 "Found branch outside giv lifetime.\n");
7338 break;
7343 /* If it is replaceable, then save the final value. */
7344 if (v->replaceable)
7345 v->final_value = final_value;
7348 if (loop_dump_stream && v->replaceable)
7349 fprintf (loop_dump_stream, "Insn %d: giv reg %d final_value replaceable\n",
7350 INSN_UID (v->insn), REGNO (v->dest_reg));
7353 /* Update the status of whether a giv can derive other givs.
7355 We need to do something special if there is or may be an update to the biv
7356 between the time the giv is defined and the time it is used to derive
7357 another giv.
7359 In addition, a giv that is only conditionally set is not allowed to
7360 derive another giv once a label has been passed.
7362 The cases we look at are when a label or an update to a biv is passed. */
7364 static void
7365 update_giv_derive (const struct loop *loop, rtx p)
7367 struct loop_ivs *ivs = LOOP_IVS (loop);
7368 struct iv_class *bl;
7369 struct induction *biv, *giv;
7370 rtx tem;
7371 int dummy;
7373 /* Search all IV classes, then all bivs, and finally all givs.
7375 There are three cases we are concerned with. First we have the situation
7376 of a giv that is only updated conditionally. In that case, it may not
7377 derive any givs after a label is passed.
7379 The second case is when a biv update occurs, or may occur, after the
7380 definition of a giv. For certain biv updates (see below) that are
7381 known to occur between the giv definition and use, we can adjust the
7382 giv definition. For others, or when the biv update is conditional,
7383 we must prevent the giv from deriving any other givs. There are two
7384 sub-cases within this case.
7386 If this is a label, we are concerned with any biv update that is done
7387 conditionally, since it may be done after the giv is defined followed by
7388 a branch here (actually, we need to pass both a jump and a label, but
7389 this extra tracking doesn't seem worth it).
7391 If this is a jump, we are concerned about any biv update that may be
7392 executed multiple times. We are actually only concerned about
7393 backward jumps, but it is probably not worth performing the test
7394 on the jump again here.
7396 If this is a biv update, we must adjust the giv status to show that a
7397 subsequent biv update was performed. If this adjustment cannot be done,
7398 the giv cannot derive further givs. */
7400 for (bl = ivs->list; bl; bl = bl->next)
7401 for (biv = bl->biv; biv; biv = biv->next_iv)
7402 if (LABEL_P (p) || JUMP_P (p)
7403 || biv->insn == p)
7405 /* Skip if location is the same as a previous one. */
7406 if (biv->same)
7407 continue;
7409 for (giv = bl->giv; giv; giv = giv->next_iv)
7411 /* If cant_derive is already true, there is no point in
7412 checking all of these conditions again. */
7413 if (giv->cant_derive)
7414 continue;
7416 /* If this giv is conditionally set and we have passed a label,
7417 it cannot derive anything. */
7418 if (LABEL_P (p) && ! giv->always_computable)
7419 giv->cant_derive = 1;
7421 /* Skip givs that have mult_val == 0, since
7422 they are really invariants. Also skip those that are
7423 replaceable, since we know their lifetime doesn't contain
7424 any biv update. */
7425 else if (giv->mult_val == const0_rtx || giv->replaceable)
7426 continue;
7428 /* The only way we can allow this giv to derive another
7429 is if this is a biv increment and we can form the product
7430 of biv->add_val and giv->mult_val. In this case, we will
7431 be able to compute a compensation. */
7432 else if (biv->insn == p)
7434 rtx ext_val_dummy;
7436 tem = 0;
7437 if (biv->mult_val == const1_rtx)
7438 tem = simplify_giv_expr (loop,
7439 gen_rtx_MULT (giv->mode,
7440 biv->add_val,
7441 giv->mult_val),
7442 &ext_val_dummy, &dummy);
7444 if (tem && giv->derive_adjustment)
7445 tem = simplify_giv_expr
7446 (loop,
7447 gen_rtx_PLUS (giv->mode, tem, giv->derive_adjustment),
7448 &ext_val_dummy, &dummy);
7450 if (tem)
7451 giv->derive_adjustment = tem;
7452 else
7453 giv->cant_derive = 1;
7455 else if ((LABEL_P (p) && ! biv->always_computable)
7456 || (JUMP_P (p) && biv->maybe_multiple))
7457 giv->cant_derive = 1;
7462 /* Check whether an insn is an increment legitimate for a basic induction var.
7463 X is the source of insn P, or a part of it.
7464 MODE is the mode in which X should be interpreted.
7466 DEST_REG is the putative biv, also the destination of the insn.
7467 We accept patterns of these forms:
7468 REG = REG + INVARIANT (includes REG = REG - CONSTANT)
7469 REG = INVARIANT + REG
7471 If X is suitable, we return 1, set *MULT_VAL to CONST1_RTX,
7472 store the additive term into *INC_VAL, and store the place where
7473 we found the additive term into *LOCATION.
7475 If X is an assignment of an invariant into DEST_REG, we set
7476 *MULT_VAL to CONST0_RTX, and store the invariant into *INC_VAL.
7478 We also want to detect a BIV when it corresponds to a variable
7479 whose mode was promoted. In that case, an increment
7480 of the variable may be a PLUS that adds a SUBREG of that variable to
7481 an invariant and then sign- or zero-extends the result of the PLUS
7482 into the variable.
7484 Most GIVs in such cases will be in the promoted mode, since that is the
7485 probably the natural computation mode (and almost certainly the mode
7486 used for addresses) on the machine. So we view the pseudo-reg containing
7487 the variable as the BIV, as if it were simply incremented.
7489 Note that treating the entire pseudo as a BIV will result in making
7490 simple increments to any GIVs based on it. However, if the variable
7491 overflows in its declared mode but not its promoted mode, the result will
7492 be incorrect. This is acceptable if the variable is signed, since
7493 overflows in such cases are undefined, but not if it is unsigned, since
7494 those overflows are defined. So we only check for SIGN_EXTEND and
7495 not ZERO_EXTEND.
7497 If we cannot find a biv, we return 0. */
7499 static int
7500 basic_induction_var (const struct loop *loop, rtx x, enum machine_mode mode,
7501 rtx dest_reg, rtx p, rtx *inc_val, rtx *mult_val,
7502 rtx **location)
7504 enum rtx_code code;
7505 rtx *argp, arg;
7506 rtx insn, set = 0, last, inc;
7508 code = GET_CODE (x);
7509 *location = NULL;
7510 switch (code)
7512 case PLUS:
7513 if (rtx_equal_p (XEXP (x, 0), dest_reg)
7514 || (GET_CODE (XEXP (x, 0)) == SUBREG
7515 && SUBREG_PROMOTED_VAR_P (XEXP (x, 0))
7516 && SUBREG_REG (XEXP (x, 0)) == dest_reg))
7518 argp = &XEXP (x, 1);
7520 else if (rtx_equal_p (XEXP (x, 1), dest_reg)
7521 || (GET_CODE (XEXP (x, 1)) == SUBREG
7522 && SUBREG_PROMOTED_VAR_P (XEXP (x, 1))
7523 && SUBREG_REG (XEXP (x, 1)) == dest_reg))
7525 argp = &XEXP (x, 0);
7527 else
7528 return 0;
7530 arg = *argp;
7531 if (loop_invariant_p (loop, arg) != 1)
7532 return 0;
7534 /* convert_modes can emit new instructions, e.g. when arg is a loop
7535 invariant MEM and dest_reg has a different mode.
7536 These instructions would be emitted after the end of the function
7537 and then *inc_val would be an uninitialized pseudo.
7538 Detect this and bail in this case.
7539 Other alternatives to solve this can be introducing a convert_modes
7540 variant which is allowed to fail but not allowed to emit new
7541 instructions, emit these instructions before loop start and let
7542 it be garbage collected if *inc_val is never used or saving the
7543 *inc_val initialization sequence generated here and when *inc_val
7544 is going to be actually used, emit it at some suitable place. */
7545 last = get_last_insn ();
7546 inc = convert_modes (GET_MODE (dest_reg), GET_MODE (x), arg, 0);
7547 if (get_last_insn () != last)
7549 delete_insns_since (last);
7550 return 0;
7553 *inc_val = inc;
7554 *mult_val = const1_rtx;
7555 *location = argp;
7556 return 1;
7558 case SUBREG:
7559 /* If what's inside the SUBREG is a BIV, then the SUBREG. This will
7560 handle addition of promoted variables.
7561 ??? The comment at the start of this function is wrong: promoted
7562 variable increments don't look like it says they do. */
7563 return basic_induction_var (loop, SUBREG_REG (x),
7564 GET_MODE (SUBREG_REG (x)),
7565 dest_reg, p, inc_val, mult_val, location);
7567 case REG:
7568 /* If this register is assigned in a previous insn, look at its
7569 source, but don't go outside the loop or past a label. */
7571 /* If this sets a register to itself, we would repeat any previous
7572 biv increment if we applied this strategy blindly. */
7573 if (rtx_equal_p (dest_reg, x))
7574 return 0;
7576 insn = p;
7577 while (1)
7579 rtx dest;
7582 insn = PREV_INSN (insn);
7584 while (insn && NOTE_P (insn)
7585 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
7587 if (!insn)
7588 break;
7589 set = single_set (insn);
7590 if (set == 0)
7591 break;
7592 dest = SET_DEST (set);
7593 if (dest == x
7594 || (GET_CODE (dest) == SUBREG
7595 && (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD)
7596 && (GET_MODE_CLASS (GET_MODE (dest)) == MODE_INT)
7597 && SUBREG_REG (dest) == x))
7598 return basic_induction_var (loop, SET_SRC (set),
7599 (GET_MODE (SET_SRC (set)) == VOIDmode
7600 ? GET_MODE (x)
7601 : GET_MODE (SET_SRC (set))),
7602 dest_reg, insn,
7603 inc_val, mult_val, location);
7605 while (GET_CODE (dest) == SUBREG
7606 || GET_CODE (dest) == ZERO_EXTRACT
7607 || GET_CODE (dest) == STRICT_LOW_PART)
7608 dest = XEXP (dest, 0);
7609 if (dest == x)
7610 break;
7612 /* Fall through. */
7614 /* Can accept constant setting of biv only when inside inner most loop.
7615 Otherwise, a biv of an inner loop may be incorrectly recognized
7616 as a biv of the outer loop,
7617 causing code to be moved INTO the inner loop. */
7618 case MEM:
7619 if (loop_invariant_p (loop, x) != 1)
7620 return 0;
7621 case CONST_INT:
7622 case SYMBOL_REF:
7623 case CONST:
7624 /* convert_modes aborts if we try to convert to or from CCmode, so just
7625 exclude that case. It is very unlikely that a condition code value
7626 would be a useful iterator anyways. convert_modes aborts if we try to
7627 convert a float mode to non-float or vice versa too. */
7628 if (loop->level == 1
7629 && GET_MODE_CLASS (mode) == GET_MODE_CLASS (GET_MODE (dest_reg))
7630 && GET_MODE_CLASS (mode) != MODE_CC)
7632 /* Possible bug here? Perhaps we don't know the mode of X. */
7633 last = get_last_insn ();
7634 inc = convert_modes (GET_MODE (dest_reg), mode, x, 0);
7635 if (get_last_insn () != last)
7637 delete_insns_since (last);
7638 return 0;
7641 *inc_val = inc;
7642 *mult_val = const0_rtx;
7643 return 1;
7645 else
7646 return 0;
7648 case SIGN_EXTEND:
7649 /* Ignore this BIV if signed arithmetic overflow is defined. */
7650 if (flag_wrapv)
7651 return 0;
7652 return basic_induction_var (loop, XEXP (x, 0), GET_MODE (XEXP (x, 0)),
7653 dest_reg, p, inc_val, mult_val, location);
7655 case ASHIFTRT:
7656 /* Similar, since this can be a sign extension. */
7657 for (insn = PREV_INSN (p);
7658 (insn && NOTE_P (insn)
7659 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG);
7660 insn = PREV_INSN (insn))
7663 if (insn)
7664 set = single_set (insn);
7666 if (! rtx_equal_p (dest_reg, XEXP (x, 0))
7667 && set && SET_DEST (set) == XEXP (x, 0)
7668 && GET_CODE (XEXP (x, 1)) == CONST_INT
7669 && INTVAL (XEXP (x, 1)) >= 0
7670 && GET_CODE (SET_SRC (set)) == ASHIFT
7671 && XEXP (x, 1) == XEXP (SET_SRC (set), 1))
7672 return basic_induction_var (loop, XEXP (SET_SRC (set), 0),
7673 GET_MODE (XEXP (x, 0)),
7674 dest_reg, insn, inc_val, mult_val,
7675 location);
7676 return 0;
7678 default:
7679 return 0;
7683 /* A general induction variable (giv) is any quantity that is a linear
7684 function of a basic induction variable,
7685 i.e. giv = biv * mult_val + add_val.
7686 The coefficients can be any loop invariant quantity.
7687 A giv need not be computed directly from the biv;
7688 it can be computed by way of other givs. */
7690 /* Determine whether X computes a giv.
7691 If it does, return a nonzero value
7692 which is the benefit from eliminating the computation of X;
7693 set *SRC_REG to the register of the biv that it is computed from;
7694 set *ADD_VAL and *MULT_VAL to the coefficients,
7695 such that the value of X is biv * mult + add; */
7697 static int
7698 general_induction_var (const struct loop *loop, rtx x, rtx *src_reg,
7699 rtx *add_val, rtx *mult_val, rtx *ext_val,
7700 int is_addr, int *pbenefit,
7701 enum machine_mode addr_mode)
7703 struct loop_ivs *ivs = LOOP_IVS (loop);
7704 rtx orig_x = x;
7706 /* If this is an invariant, forget it, it isn't a giv. */
7707 if (loop_invariant_p (loop, x) == 1)
7708 return 0;
7710 *pbenefit = 0;
7711 *ext_val = NULL_RTX;
7712 x = simplify_giv_expr (loop, x, ext_val, pbenefit);
7713 if (x == 0)
7714 return 0;
7716 switch (GET_CODE (x))
7718 case USE:
7719 case CONST_INT:
7720 /* Since this is now an invariant and wasn't before, it must be a giv
7721 with MULT_VAL == 0. It doesn't matter which BIV we associate this
7722 with. */
7723 *src_reg = ivs->list->biv->dest_reg;
7724 *mult_val = const0_rtx;
7725 *add_val = x;
7726 break;
7728 case REG:
7729 /* This is equivalent to a BIV. */
7730 *src_reg = x;
7731 *mult_val = const1_rtx;
7732 *add_val = const0_rtx;
7733 break;
7735 case PLUS:
7736 /* Either (plus (biv) (invar)) or
7737 (plus (mult (biv) (invar_1)) (invar_2)). */
7738 if (GET_CODE (XEXP (x, 0)) == MULT)
7740 *src_reg = XEXP (XEXP (x, 0), 0);
7741 *mult_val = XEXP (XEXP (x, 0), 1);
7743 else
7745 *src_reg = XEXP (x, 0);
7746 *mult_val = const1_rtx;
7748 *add_val = XEXP (x, 1);
7749 break;
7751 case MULT:
7752 /* ADD_VAL is zero. */
7753 *src_reg = XEXP (x, 0);
7754 *mult_val = XEXP (x, 1);
7755 *add_val = const0_rtx;
7756 break;
7758 default:
7759 abort ();
7762 /* Remove any enclosing USE from ADD_VAL and MULT_VAL (there will be
7763 unless they are CONST_INT). */
7764 if (GET_CODE (*add_val) == USE)
7765 *add_val = XEXP (*add_val, 0);
7766 if (GET_CODE (*mult_val) == USE)
7767 *mult_val = XEXP (*mult_val, 0);
7769 if (is_addr)
7770 *pbenefit += address_cost (orig_x, addr_mode) - reg_address_cost;
7771 else
7772 *pbenefit += rtx_cost (orig_x, SET);
7774 /* Always return true if this is a giv so it will be detected as such,
7775 even if the benefit is zero or negative. This allows elimination
7776 of bivs that might otherwise not be eliminated. */
7777 return 1;
7780 /* Given an expression, X, try to form it as a linear function of a biv.
7781 We will canonicalize it to be of the form
7782 (plus (mult (BIV) (invar_1))
7783 (invar_2))
7784 with possible degeneracies.
7786 The invariant expressions must each be of a form that can be used as a
7787 machine operand. We surround then with a USE rtx (a hack, but localized
7788 and certainly unambiguous!) if not a CONST_INT for simplicity in this
7789 routine; it is the caller's responsibility to strip them.
7791 If no such canonicalization is possible (i.e., two biv's are used or an
7792 expression that is neither invariant nor a biv or giv), this routine
7793 returns 0.
7795 For a nonzero return, the result will have a code of CONST_INT, USE,
7796 REG (for a BIV), PLUS, or MULT. No other codes will occur.
7798 *BENEFIT will be incremented by the benefit of any sub-giv encountered. */
7800 static rtx sge_plus (enum machine_mode, rtx, rtx);
7801 static rtx sge_plus_constant (rtx, rtx);
7803 static rtx
7804 simplify_giv_expr (const struct loop *loop, rtx x, rtx *ext_val, int *benefit)
7806 struct loop_ivs *ivs = LOOP_IVS (loop);
7807 struct loop_regs *regs = LOOP_REGS (loop);
7808 enum machine_mode mode = GET_MODE (x);
7809 rtx arg0, arg1;
7810 rtx tem;
7812 /* If this is not an integer mode, or if we cannot do arithmetic in this
7813 mode, this can't be a giv. */
7814 if (mode != VOIDmode
7815 && (GET_MODE_CLASS (mode) != MODE_INT
7816 || GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT))
7817 return NULL_RTX;
7819 switch (GET_CODE (x))
7821 case PLUS:
7822 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
7823 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
7824 if (arg0 == 0 || arg1 == 0)
7825 return NULL_RTX;
7827 /* Put constant last, CONST_INT last if both constant. */
7828 if ((GET_CODE (arg0) == USE
7829 || GET_CODE (arg0) == CONST_INT)
7830 && ! ((GET_CODE (arg0) == USE
7831 && GET_CODE (arg1) == USE)
7832 || GET_CODE (arg1) == CONST_INT))
7833 tem = arg0, arg0 = arg1, arg1 = tem;
7835 /* Handle addition of zero, then addition of an invariant. */
7836 if (arg1 == const0_rtx)
7837 return arg0;
7838 else if (GET_CODE (arg1) == CONST_INT || GET_CODE (arg1) == USE)
7839 switch (GET_CODE (arg0))
7841 case CONST_INT:
7842 case USE:
7843 /* Adding two invariants must result in an invariant, so enclose
7844 addition operation inside a USE and return it. */
7845 if (GET_CODE (arg0) == USE)
7846 arg0 = XEXP (arg0, 0);
7847 if (GET_CODE (arg1) == USE)
7848 arg1 = XEXP (arg1, 0);
7850 if (GET_CODE (arg0) == CONST_INT)
7851 tem = arg0, arg0 = arg1, arg1 = tem;
7852 if (GET_CODE (arg1) == CONST_INT)
7853 tem = sge_plus_constant (arg0, arg1);
7854 else
7855 tem = sge_plus (mode, arg0, arg1);
7857 if (GET_CODE (tem) != CONST_INT)
7858 tem = gen_rtx_USE (mode, tem);
7859 return tem;
7861 case REG:
7862 case MULT:
7863 /* biv + invar or mult + invar. Return sum. */
7864 return gen_rtx_PLUS (mode, arg0, arg1);
7866 case PLUS:
7867 /* (a + invar_1) + invar_2. Associate. */
7868 return
7869 simplify_giv_expr (loop,
7870 gen_rtx_PLUS (mode,
7871 XEXP (arg0, 0),
7872 gen_rtx_PLUS (mode,
7873 XEXP (arg0, 1),
7874 arg1)),
7875 ext_val, benefit);
7877 default:
7878 abort ();
7881 /* Each argument must be either REG, PLUS, or MULT. Convert REG to
7882 MULT to reduce cases. */
7883 if (REG_P (arg0))
7884 arg0 = gen_rtx_MULT (mode, arg0, const1_rtx);
7885 if (REG_P (arg1))
7886 arg1 = gen_rtx_MULT (mode, arg1, const1_rtx);
7888 /* Now have PLUS + PLUS, PLUS + MULT, MULT + PLUS, or MULT + MULT.
7889 Put a MULT first, leaving PLUS + PLUS, MULT + PLUS, or MULT + MULT.
7890 Recurse to associate the second PLUS. */
7891 if (GET_CODE (arg1) == MULT)
7892 tem = arg0, arg0 = arg1, arg1 = tem;
7894 if (GET_CODE (arg1) == PLUS)
7895 return
7896 simplify_giv_expr (loop,
7897 gen_rtx_PLUS (mode,
7898 gen_rtx_PLUS (mode, arg0,
7899 XEXP (arg1, 0)),
7900 XEXP (arg1, 1)),
7901 ext_val, benefit);
7903 /* Now must have MULT + MULT. Distribute if same biv, else not giv. */
7904 if (GET_CODE (arg0) != MULT || GET_CODE (arg1) != MULT)
7905 return NULL_RTX;
7907 if (!rtx_equal_p (arg0, arg1))
7908 return NULL_RTX;
7910 return simplify_giv_expr (loop,
7911 gen_rtx_MULT (mode,
7912 XEXP (arg0, 0),
7913 gen_rtx_PLUS (mode,
7914 XEXP (arg0, 1),
7915 XEXP (arg1, 1))),
7916 ext_val, benefit);
7918 case MINUS:
7919 /* Handle "a - b" as "a + b * (-1)". */
7920 return simplify_giv_expr (loop,
7921 gen_rtx_PLUS (mode,
7922 XEXP (x, 0),
7923 gen_rtx_MULT (mode,
7924 XEXP (x, 1),
7925 constm1_rtx)),
7926 ext_val, benefit);
7928 case MULT:
7929 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
7930 arg1 = simplify_giv_expr (loop, XEXP (x, 1), ext_val, benefit);
7931 if (arg0 == 0 || arg1 == 0)
7932 return NULL_RTX;
7934 /* Put constant last, CONST_INT last if both constant. */
7935 if ((GET_CODE (arg0) == USE || GET_CODE (arg0) == CONST_INT)
7936 && GET_CODE (arg1) != CONST_INT)
7937 tem = arg0, arg0 = arg1, arg1 = tem;
7939 /* If second argument is not now constant, not giv. */
7940 if (GET_CODE (arg1) != USE && GET_CODE (arg1) != CONST_INT)
7941 return NULL_RTX;
7943 /* Handle multiply by 0 or 1. */
7944 if (arg1 == const0_rtx)
7945 return const0_rtx;
7947 else if (arg1 == const1_rtx)
7948 return arg0;
7950 switch (GET_CODE (arg0))
7952 case REG:
7953 /* biv * invar. Done. */
7954 return gen_rtx_MULT (mode, arg0, arg1);
7956 case CONST_INT:
7957 /* Product of two constants. */
7958 return GEN_INT (INTVAL (arg0) * INTVAL (arg1));
7960 case USE:
7961 /* invar * invar is a giv, but attempt to simplify it somehow. */
7962 if (GET_CODE (arg1) != CONST_INT)
7963 return NULL_RTX;
7965 arg0 = XEXP (arg0, 0);
7966 if (GET_CODE (arg0) == MULT)
7968 /* (invar_0 * invar_1) * invar_2. Associate. */
7969 return simplify_giv_expr (loop,
7970 gen_rtx_MULT (mode,
7971 XEXP (arg0, 0),
7972 gen_rtx_MULT (mode,
7973 XEXP (arg0,
7975 arg1)),
7976 ext_val, benefit);
7978 /* Propagate the MULT expressions to the innermost nodes. */
7979 else if (GET_CODE (arg0) == PLUS)
7981 /* (invar_0 + invar_1) * invar_2. Distribute. */
7982 return simplify_giv_expr (loop,
7983 gen_rtx_PLUS (mode,
7984 gen_rtx_MULT (mode,
7985 XEXP (arg0,
7987 arg1),
7988 gen_rtx_MULT (mode,
7989 XEXP (arg0,
7991 arg1)),
7992 ext_val, benefit);
7994 return gen_rtx_USE (mode, gen_rtx_MULT (mode, arg0, arg1));
7996 case MULT:
7997 /* (a * invar_1) * invar_2. Associate. */
7998 return simplify_giv_expr (loop,
7999 gen_rtx_MULT (mode,
8000 XEXP (arg0, 0),
8001 gen_rtx_MULT (mode,
8002 XEXP (arg0, 1),
8003 arg1)),
8004 ext_val, benefit);
8006 case PLUS:
8007 /* (a + invar_1) * invar_2. Distribute. */
8008 return simplify_giv_expr (loop,
8009 gen_rtx_PLUS (mode,
8010 gen_rtx_MULT (mode,
8011 XEXP (arg0, 0),
8012 arg1),
8013 gen_rtx_MULT (mode,
8014 XEXP (arg0, 1),
8015 arg1)),
8016 ext_val, benefit);
8018 default:
8019 abort ();
8022 case ASHIFT:
8023 /* Shift by constant is multiply by power of two. */
8024 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
8025 return 0;
8027 return
8028 simplify_giv_expr (loop,
8029 gen_rtx_MULT (mode,
8030 XEXP (x, 0),
8031 GEN_INT ((HOST_WIDE_INT) 1
8032 << INTVAL (XEXP (x, 1)))),
8033 ext_val, benefit);
8035 case NEG:
8036 /* "-a" is "a * (-1)" */
8037 return simplify_giv_expr (loop,
8038 gen_rtx_MULT (mode, XEXP (x, 0), constm1_rtx),
8039 ext_val, benefit);
8041 case NOT:
8042 /* "~a" is "-a - 1". Silly, but easy. */
8043 return simplify_giv_expr (loop,
8044 gen_rtx_MINUS (mode,
8045 gen_rtx_NEG (mode, XEXP (x, 0)),
8046 const1_rtx),
8047 ext_val, benefit);
8049 case USE:
8050 /* Already in proper form for invariant. */
8051 return x;
8053 case SIGN_EXTEND:
8054 case ZERO_EXTEND:
8055 case TRUNCATE:
8056 /* Conditionally recognize extensions of simple IVs. After we've
8057 computed loop traversal counts and verified the range of the
8058 source IV, we'll reevaluate this as a GIV. */
8059 if (*ext_val == NULL_RTX)
8061 arg0 = simplify_giv_expr (loop, XEXP (x, 0), ext_val, benefit);
8062 if (arg0 && *ext_val == NULL_RTX && REG_P (arg0))
8064 *ext_val = gen_rtx_fmt_e (GET_CODE (x), mode, arg0);
8065 return arg0;
8068 goto do_default;
8070 case REG:
8071 /* If this is a new register, we can't deal with it. */
8072 if (REGNO (x) >= max_reg_before_loop)
8073 return 0;
8075 /* Check for biv or giv. */
8076 switch (REG_IV_TYPE (ivs, REGNO (x)))
8078 case BASIC_INDUCT:
8079 return x;
8080 case GENERAL_INDUCT:
8082 struct induction *v = REG_IV_INFO (ivs, REGNO (x));
8084 /* Form expression from giv and add benefit. Ensure this giv
8085 can derive another and subtract any needed adjustment if so. */
8087 /* Increasing the benefit here is risky. The only case in which it
8088 is arguably correct is if this is the only use of V. In other
8089 cases, this will artificially inflate the benefit of the current
8090 giv, and lead to suboptimal code. Thus, it is disabled, since
8091 potentially not reducing an only marginally beneficial giv is
8092 less harmful than reducing many givs that are not really
8093 beneficial. */
8095 rtx single_use = regs->array[REGNO (x)].single_usage;
8096 if (single_use && single_use != const0_rtx)
8097 *benefit += v->benefit;
8100 if (v->cant_derive)
8101 return 0;
8103 tem = gen_rtx_PLUS (mode, gen_rtx_MULT (mode,
8104 v->src_reg, v->mult_val),
8105 v->add_val);
8107 if (v->derive_adjustment)
8108 tem = gen_rtx_MINUS (mode, tem, v->derive_adjustment);
8109 arg0 = simplify_giv_expr (loop, tem, ext_val, benefit);
8110 if (*ext_val)
8112 if (!v->ext_dependent)
8113 return arg0;
8115 else
8117 *ext_val = v->ext_dependent;
8118 return arg0;
8120 return 0;
8123 default:
8124 do_default:
8125 /* If it isn't an induction variable, and it is invariant, we
8126 may be able to simplify things further by looking through
8127 the bits we just moved outside the loop. */
8128 if (loop_invariant_p (loop, x) == 1)
8130 struct movable *m;
8131 struct loop_movables *movables = LOOP_MOVABLES (loop);
8133 for (m = movables->head; m; m = m->next)
8134 if (rtx_equal_p (x, m->set_dest))
8136 /* Ok, we found a match. Substitute and simplify. */
8138 /* If we match another movable, we must use that, as
8139 this one is going away. */
8140 if (m->match)
8141 return simplify_giv_expr (loop, m->match->set_dest,
8142 ext_val, benefit);
8144 /* If consec is nonzero, this is a member of a group of
8145 instructions that were moved together. We handle this
8146 case only to the point of seeking to the last insn and
8147 looking for a REG_EQUAL. Fail if we don't find one. */
8148 if (m->consec != 0)
8150 int i = m->consec;
8151 tem = m->insn;
8154 tem = NEXT_INSN (tem);
8156 while (--i > 0);
8158 tem = find_reg_note (tem, REG_EQUAL, NULL_RTX);
8159 if (tem)
8160 tem = XEXP (tem, 0);
8162 else
8164 tem = single_set (m->insn);
8165 if (tem)
8166 tem = SET_SRC (tem);
8169 if (tem)
8171 /* What we are most interested in is pointer
8172 arithmetic on invariants -- only take
8173 patterns we may be able to do something with. */
8174 if (GET_CODE (tem) == PLUS
8175 || GET_CODE (tem) == MULT
8176 || GET_CODE (tem) == ASHIFT
8177 || GET_CODE (tem) == CONST_INT
8178 || GET_CODE (tem) == SYMBOL_REF)
8180 tem = simplify_giv_expr (loop, tem, ext_val,
8181 benefit);
8182 if (tem)
8183 return tem;
8185 else if (GET_CODE (tem) == CONST
8186 && GET_CODE (XEXP (tem, 0)) == PLUS
8187 && GET_CODE (XEXP (XEXP (tem, 0), 0)) == SYMBOL_REF
8188 && GET_CODE (XEXP (XEXP (tem, 0), 1)) == CONST_INT)
8190 tem = simplify_giv_expr (loop, XEXP (tem, 0),
8191 ext_val, benefit);
8192 if (tem)
8193 return tem;
8196 break;
8199 break;
8202 /* Fall through to general case. */
8203 default:
8204 /* If invariant, return as USE (unless CONST_INT).
8205 Otherwise, not giv. */
8206 if (GET_CODE (x) == USE)
8207 x = XEXP (x, 0);
8209 if (loop_invariant_p (loop, x) == 1)
8211 if (GET_CODE (x) == CONST_INT)
8212 return x;
8213 if (GET_CODE (x) == CONST
8214 && GET_CODE (XEXP (x, 0)) == PLUS
8215 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
8216 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
8217 x = XEXP (x, 0);
8218 return gen_rtx_USE (mode, x);
8220 else
8221 return 0;
8225 /* This routine folds invariants such that there is only ever one
8226 CONST_INT in the summation. It is only used by simplify_giv_expr. */
8228 static rtx
8229 sge_plus_constant (rtx x, rtx c)
8231 if (GET_CODE (x) == CONST_INT)
8232 return GEN_INT (INTVAL (x) + INTVAL (c));
8233 else if (GET_CODE (x) != PLUS)
8234 return gen_rtx_PLUS (GET_MODE (x), x, c);
8235 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
8237 return gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
8238 GEN_INT (INTVAL (XEXP (x, 1)) + INTVAL (c)));
8240 else if (GET_CODE (XEXP (x, 0)) == PLUS
8241 || GET_CODE (XEXP (x, 1)) != PLUS)
8243 return gen_rtx_PLUS (GET_MODE (x),
8244 sge_plus_constant (XEXP (x, 0), c), XEXP (x, 1));
8246 else
8248 return gen_rtx_PLUS (GET_MODE (x),
8249 sge_plus_constant (XEXP (x, 1), c), XEXP (x, 0));
8253 static rtx
8254 sge_plus (enum machine_mode mode, rtx x, rtx y)
8256 while (GET_CODE (y) == PLUS)
8258 rtx a = XEXP (y, 0);
8259 if (GET_CODE (a) == CONST_INT)
8260 x = sge_plus_constant (x, a);
8261 else
8262 x = gen_rtx_PLUS (mode, x, a);
8263 y = XEXP (y, 1);
8265 if (GET_CODE (y) == CONST_INT)
8266 x = sge_plus_constant (x, y);
8267 else
8268 x = gen_rtx_PLUS (mode, x, y);
8269 return x;
8272 /* Help detect a giv that is calculated by several consecutive insns;
8273 for example,
8274 giv = biv * M
8275 giv = giv + A
8276 The caller has already identified the first insn P as having a giv as dest;
8277 we check that all other insns that set the same register follow
8278 immediately after P, that they alter nothing else,
8279 and that the result of the last is still a giv.
8281 The value is 0 if the reg set in P is not really a giv.
8282 Otherwise, the value is the amount gained by eliminating
8283 all the consecutive insns that compute the value.
8285 FIRST_BENEFIT is the amount gained by eliminating the first insn, P.
8286 SRC_REG is the reg of the biv; DEST_REG is the reg of the giv.
8288 The coefficients of the ultimate giv value are stored in
8289 *MULT_VAL and *ADD_VAL. */
8291 static int
8292 consec_sets_giv (const struct loop *loop, int first_benefit, rtx p,
8293 rtx src_reg, rtx dest_reg, rtx *add_val, rtx *mult_val,
8294 rtx *ext_val, rtx *last_consec_insn)
8296 struct loop_ivs *ivs = LOOP_IVS (loop);
8297 struct loop_regs *regs = LOOP_REGS (loop);
8298 int count;
8299 enum rtx_code code;
8300 int benefit;
8301 rtx temp;
8302 rtx set;
8304 /* Indicate that this is a giv so that we can update the value produced in
8305 each insn of the multi-insn sequence.
8307 This induction structure will be used only by the call to
8308 general_induction_var below, so we can allocate it on our stack.
8309 If this is a giv, our caller will replace the induct var entry with
8310 a new induction structure. */
8311 struct induction *v;
8313 if (REG_IV_TYPE (ivs, REGNO (dest_reg)) != UNKNOWN_INDUCT)
8314 return 0;
8316 v = alloca (sizeof (struct induction));
8317 v->src_reg = src_reg;
8318 v->mult_val = *mult_val;
8319 v->add_val = *add_val;
8320 v->benefit = first_benefit;
8321 v->cant_derive = 0;
8322 v->derive_adjustment = 0;
8323 v->ext_dependent = NULL_RTX;
8325 REG_IV_TYPE (ivs, REGNO (dest_reg)) = GENERAL_INDUCT;
8326 REG_IV_INFO (ivs, REGNO (dest_reg)) = v;
8328 count = regs->array[REGNO (dest_reg)].n_times_set - 1;
8330 while (count > 0)
8332 p = NEXT_INSN (p);
8333 code = GET_CODE (p);
8335 /* If libcall, skip to end of call sequence. */
8336 if (code == INSN && (temp = find_reg_note (p, REG_LIBCALL, NULL_RTX)))
8337 p = XEXP (temp, 0);
8339 if (code == INSN
8340 && (set = single_set (p))
8341 && REG_P (SET_DEST (set))
8342 && SET_DEST (set) == dest_reg
8343 && (general_induction_var (loop, SET_SRC (set), &src_reg,
8344 add_val, mult_val, ext_val, 0,
8345 &benefit, VOIDmode)
8346 /* Giv created by equivalent expression. */
8347 || ((temp = find_reg_note (p, REG_EQUAL, NULL_RTX))
8348 && general_induction_var (loop, XEXP (temp, 0), &src_reg,
8349 add_val, mult_val, ext_val, 0,
8350 &benefit, VOIDmode)))
8351 && src_reg == v->src_reg)
8353 if (find_reg_note (p, REG_RETVAL, NULL_RTX))
8354 benefit += libcall_benefit (p);
8356 count--;
8357 v->mult_val = *mult_val;
8358 v->add_val = *add_val;
8359 v->benefit += benefit;
8361 else if (code != NOTE)
8363 /* Allow insns that set something other than this giv to a
8364 constant. Such insns are needed on machines which cannot
8365 include long constants and should not disqualify a giv. */
8366 if (code == INSN
8367 && (set = single_set (p))
8368 && SET_DEST (set) != dest_reg
8369 && CONSTANT_P (SET_SRC (set)))
8370 continue;
8372 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
8373 return 0;
8377 REG_IV_TYPE (ivs, REGNO (dest_reg)) = UNKNOWN_INDUCT;
8378 *last_consec_insn = p;
8379 return v->benefit;
8382 /* Return an rtx, if any, that expresses giv G2 as a function of the register
8383 represented by G1. If no such expression can be found, or it is clear that
8384 it cannot possibly be a valid address, 0 is returned.
8386 To perform the computation, we note that
8387 G1 = x * v + a and
8388 G2 = y * v + b
8389 where `v' is the biv.
8391 So G2 = (y/b) * G1 + (b - a*y/x).
8393 Note that MULT = y/x.
8395 Update: A and B are now allowed to be additive expressions such that
8396 B contains all variables in A. That is, computing B-A will not require
8397 subtracting variables. */
8399 static rtx
8400 express_from_1 (rtx a, rtx b, rtx mult)
8402 /* If MULT is zero, then A*MULT is zero, and our expression is B. */
8404 if (mult == const0_rtx)
8405 return b;
8407 /* If MULT is not 1, we cannot handle A with non-constants, since we
8408 would then be required to subtract multiples of the registers in A.
8409 This is theoretically possible, and may even apply to some Fortran
8410 constructs, but it is a lot of work and we do not attempt it here. */
8412 if (mult != const1_rtx && GET_CODE (a) != CONST_INT)
8413 return NULL_RTX;
8415 /* In general these structures are sorted top to bottom (down the PLUS
8416 chain), but not left to right across the PLUS. If B is a higher
8417 order giv than A, we can strip one level and recurse. If A is higher
8418 order, we'll eventually bail out, but won't know that until the end.
8419 If they are the same, we'll strip one level around this loop. */
8421 while (GET_CODE (a) == PLUS && GET_CODE (b) == PLUS)
8423 rtx ra, rb, oa, ob, tmp;
8425 ra = XEXP (a, 0), oa = XEXP (a, 1);
8426 if (GET_CODE (ra) == PLUS)
8427 tmp = ra, ra = oa, oa = tmp;
8429 rb = XEXP (b, 0), ob = XEXP (b, 1);
8430 if (GET_CODE (rb) == PLUS)
8431 tmp = rb, rb = ob, ob = tmp;
8433 if (rtx_equal_p (ra, rb))
8434 /* We matched: remove one reg completely. */
8435 a = oa, b = ob;
8436 else if (GET_CODE (ob) != PLUS && rtx_equal_p (ra, ob))
8437 /* An alternate match. */
8438 a = oa, b = rb;
8439 else if (GET_CODE (oa) != PLUS && rtx_equal_p (oa, rb))
8440 /* An alternate match. */
8441 a = ra, b = ob;
8442 else
8444 /* Indicates an extra register in B. Strip one level from B and
8445 recurse, hoping B was the higher order expression. */
8446 ob = express_from_1 (a, ob, mult);
8447 if (ob == NULL_RTX)
8448 return NULL_RTX;
8449 return gen_rtx_PLUS (GET_MODE (b), rb, ob);
8453 /* Here we are at the last level of A, go through the cases hoping to
8454 get rid of everything but a constant. */
8456 if (GET_CODE (a) == PLUS)
8458 rtx ra, oa;
8460 ra = XEXP (a, 0), oa = XEXP (a, 1);
8461 if (rtx_equal_p (oa, b))
8462 oa = ra;
8463 else if (!rtx_equal_p (ra, b))
8464 return NULL_RTX;
8466 if (GET_CODE (oa) != CONST_INT)
8467 return NULL_RTX;
8469 return GEN_INT (-INTVAL (oa) * INTVAL (mult));
8471 else if (GET_CODE (a) == CONST_INT)
8473 return plus_constant (b, -INTVAL (a) * INTVAL (mult));
8475 else if (CONSTANT_P (a))
8477 enum machine_mode mode_a = GET_MODE (a);
8478 enum machine_mode mode_b = GET_MODE (b);
8479 enum machine_mode mode = mode_b == VOIDmode ? mode_a : mode_b;
8480 return simplify_gen_binary (MINUS, mode, b, a);
8482 else if (GET_CODE (b) == PLUS)
8484 if (rtx_equal_p (a, XEXP (b, 0)))
8485 return XEXP (b, 1);
8486 else if (rtx_equal_p (a, XEXP (b, 1)))
8487 return XEXP (b, 0);
8488 else
8489 return NULL_RTX;
8491 else if (rtx_equal_p (a, b))
8492 return const0_rtx;
8494 return NULL_RTX;
8497 static rtx
8498 express_from (struct induction *g1, struct induction *g2)
8500 rtx mult, add;
8502 /* The value that G1 will be multiplied by must be a constant integer. Also,
8503 the only chance we have of getting a valid address is if b*c/a (see above
8504 for notation) is also an integer. */
8505 if (GET_CODE (g1->mult_val) == CONST_INT
8506 && GET_CODE (g2->mult_val) == CONST_INT)
8508 if (g1->mult_val == const0_rtx
8509 || (g1->mult_val == constm1_rtx
8510 && INTVAL (g2->mult_val)
8511 == (HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1))
8512 || INTVAL (g2->mult_val) % INTVAL (g1->mult_val) != 0)
8513 return NULL_RTX;
8514 mult = GEN_INT (INTVAL (g2->mult_val) / INTVAL (g1->mult_val));
8516 else if (rtx_equal_p (g1->mult_val, g2->mult_val))
8517 mult = const1_rtx;
8518 else
8520 /* ??? Find out if the one is a multiple of the other? */
8521 return NULL_RTX;
8524 add = express_from_1 (g1->add_val, g2->add_val, mult);
8525 if (add == NULL_RTX)
8527 /* Failed. If we've got a multiplication factor between G1 and G2,
8528 scale G1's addend and try again. */
8529 if (INTVAL (mult) > 1)
8531 rtx g1_add_val = g1->add_val;
8532 if (GET_CODE (g1_add_val) == MULT
8533 && GET_CODE (XEXP (g1_add_val, 1)) == CONST_INT)
8535 HOST_WIDE_INT m;
8536 m = INTVAL (mult) * INTVAL (XEXP (g1_add_val, 1));
8537 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val),
8538 XEXP (g1_add_val, 0), GEN_INT (m));
8540 else
8542 g1_add_val = gen_rtx_MULT (GET_MODE (g1_add_val), g1_add_val,
8543 mult);
8546 add = express_from_1 (g1_add_val, g2->add_val, const1_rtx);
8549 if (add == NULL_RTX)
8550 return NULL_RTX;
8552 /* Form simplified final result. */
8553 if (mult == const0_rtx)
8554 return add;
8555 else if (mult == const1_rtx)
8556 mult = g1->dest_reg;
8557 else
8558 mult = gen_rtx_MULT (g2->mode, g1->dest_reg, mult);
8560 if (add == const0_rtx)
8561 return mult;
8562 else
8564 if (GET_CODE (add) == PLUS
8565 && CONSTANT_P (XEXP (add, 1)))
8567 rtx tem = XEXP (add, 1);
8568 mult = gen_rtx_PLUS (g2->mode, mult, XEXP (add, 0));
8569 add = tem;
8572 return gen_rtx_PLUS (g2->mode, mult, add);
8576 /* Return an rtx, if any, that expresses giv G2 as a function of the register
8577 represented by G1. This indicates that G2 should be combined with G1 and
8578 that G2 can use (either directly or via an address expression) a register
8579 used to represent G1. */
8581 static rtx
8582 combine_givs_p (struct induction *g1, struct induction *g2)
8584 rtx comb, ret;
8586 /* With the introduction of ext dependent givs, we must care for modes.
8587 G2 must not use a wider mode than G1. */
8588 if (GET_MODE_SIZE (g1->mode) < GET_MODE_SIZE (g2->mode))
8589 return NULL_RTX;
8591 ret = comb = express_from (g1, g2);
8592 if (comb == NULL_RTX)
8593 return NULL_RTX;
8594 if (g1->mode != g2->mode)
8595 ret = gen_lowpart (g2->mode, comb);
8597 /* If these givs are identical, they can be combined. We use the results
8598 of express_from because the addends are not in a canonical form, so
8599 rtx_equal_p is a weaker test. */
8600 /* But don't combine a DEST_REG giv with a DEST_ADDR giv; we want the
8601 combination to be the other way round. */
8602 if (comb == g1->dest_reg
8603 && (g1->giv_type == DEST_REG || g2->giv_type == DEST_ADDR))
8605 return ret;
8608 /* If G2 can be expressed as a function of G1 and that function is valid
8609 as an address and no more expensive than using a register for G2,
8610 the expression of G2 in terms of G1 can be used. */
8611 if (ret != NULL_RTX
8612 && g2->giv_type == DEST_ADDR
8613 && memory_address_p (GET_MODE (g2->mem), ret))
8614 return ret;
8616 return NULL_RTX;
8619 /* See if BL is monotonic and has a constant per-iteration increment.
8620 Return the increment if so, otherwise return 0. */
8622 static HOST_WIDE_INT
8623 get_monotonic_increment (struct iv_class *bl)
8625 struct induction *v;
8626 rtx incr;
8628 /* Get the total increment and check that it is constant. */
8629 incr = biv_total_increment (bl);
8630 if (incr == 0 || GET_CODE (incr) != CONST_INT)
8631 return 0;
8633 for (v = bl->biv; v != 0; v = v->next_iv)
8635 if (GET_CODE (v->add_val) != CONST_INT)
8636 return 0;
8638 if (INTVAL (v->add_val) < 0 && INTVAL (incr) >= 0)
8639 return 0;
8641 if (INTVAL (v->add_val) > 0 && INTVAL (incr) <= 0)
8642 return 0;
8644 return INTVAL (incr);
8648 /* Subroutine of biv_fits_mode_p. Return true if biv BL, when biased by
8649 BIAS, will never exceed the unsigned range of MODE. LOOP is the loop
8650 to which the biv belongs and INCR is its per-iteration increment. */
8652 static bool
8653 biased_biv_fits_mode_p (const struct loop *loop, struct iv_class *bl,
8654 HOST_WIDE_INT incr, enum machine_mode mode,
8655 unsigned HOST_WIDE_INT bias)
8657 unsigned HOST_WIDE_INT initial, maximum, span, delta;
8659 /* We need to be able to manipulate MODE-size constants. */
8660 if (HOST_BITS_PER_WIDE_INT < GET_MODE_BITSIZE (mode))
8661 return false;
8663 /* The number of loop iterations must be constant. */
8664 if (LOOP_INFO (loop)->n_iterations == 0)
8665 return false;
8667 /* So must the biv's initial value. */
8668 if (bl->initial_value == 0 || GET_CODE (bl->initial_value) != CONST_INT)
8669 return false;
8671 initial = bias + INTVAL (bl->initial_value);
8672 maximum = GET_MODE_MASK (mode);
8674 /* Make sure that the initial value is within range. */
8675 if (initial > maximum)
8676 return false;
8678 /* Set up DELTA and SPAN such that the number of iterations * DELTA
8679 (calculated to arbitrary precision) must be <= SPAN. */
8680 if (incr < 0)
8682 delta = -incr;
8683 span = initial;
8685 else
8687 delta = incr;
8688 /* Handle the special case in which MAXIMUM is the largest
8689 unsigned HOST_WIDE_INT and INITIAL is 0. */
8690 if (maximum + 1 == initial)
8691 span = LOOP_INFO (loop)->n_iterations * delta;
8692 else
8693 span = maximum + 1 - initial;
8695 return (span / LOOP_INFO (loop)->n_iterations >= delta);
8699 /* Return true if biv BL will never exceed the bounds of MODE. LOOP is
8700 the loop to which BL belongs and INCR is its per-iteration increment.
8701 UNSIGNEDP is true if the biv should be treated as unsigned. */
8703 static bool
8704 biv_fits_mode_p (const struct loop *loop, struct iv_class *bl,
8705 HOST_WIDE_INT incr, enum machine_mode mode, bool unsignedp)
8707 struct loop_info *loop_info;
8708 unsigned HOST_WIDE_INT bias;
8710 /* A biv's value will always be limited to its natural mode.
8711 Larger modes will observe the same wrap-around. */
8712 if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (GET_MODE (bl->biv->src_reg)))
8713 mode = GET_MODE (bl->biv->src_reg);
8715 loop_info = LOOP_INFO (loop);
8717 bias = (unsignedp ? 0 : (GET_MODE_MASK (mode) >> 1) + 1);
8718 if (biased_biv_fits_mode_p (loop, bl, incr, mode, bias))
8719 return true;
8721 if (mode == GET_MODE (bl->biv->src_reg)
8722 && bl->biv->src_reg == loop_info->iteration_var
8723 && loop_info->comparison_value
8724 && loop_invariant_p (loop, loop_info->comparison_value))
8726 /* If the increment is +1, and the exit test is a <, the BIV
8727 cannot overflow. (For <=, we have the problematic case that
8728 the comparison value might be the maximum value of the range.) */
8729 if (incr == 1)
8731 if (loop_info->comparison_code == LT)
8732 return true;
8733 if (loop_info->comparison_code == LTU && unsignedp)
8734 return true;
8737 /* Likewise for increment -1 and exit test >. */
8738 if (incr == -1)
8740 if (loop_info->comparison_code == GT)
8741 return true;
8742 if (loop_info->comparison_code == GTU && unsignedp)
8743 return true;
8746 return false;
8750 /* Given that X is an extension or truncation of BL, return true
8751 if it is unaffected by overflow. LOOP is the loop to which
8752 BL belongs and INCR is its per-iteration increment. */
8754 static bool
8755 extension_within_bounds_p (const struct loop *loop, struct iv_class *bl,
8756 HOST_WIDE_INT incr, rtx x)
8758 enum machine_mode mode;
8759 bool signedp, unsignedp;
8761 switch (GET_CODE (x))
8763 case SIGN_EXTEND:
8764 case ZERO_EXTEND:
8765 mode = GET_MODE (XEXP (x, 0));
8766 signedp = (GET_CODE (x) == SIGN_EXTEND);
8767 unsignedp = (GET_CODE (x) == ZERO_EXTEND);
8768 break;
8770 case TRUNCATE:
8771 /* We don't know whether this value is being used as signed
8772 or unsigned, so check the conditions for both. */
8773 mode = GET_MODE (x);
8774 signedp = unsignedp = true;
8775 break;
8777 default:
8778 abort ();
8781 return ((!signedp || biv_fits_mode_p (loop, bl, incr, mode, false))
8782 && (!unsignedp || biv_fits_mode_p (loop, bl, incr, mode, true)));
8786 /* Check each extension dependent giv in this class to see if its
8787 root biv is safe from wrapping in the interior mode, which would
8788 make the giv illegal. */
8790 static void
8791 check_ext_dependent_givs (const struct loop *loop, struct iv_class *bl)
8793 struct induction *v;
8794 HOST_WIDE_INT incr;
8796 incr = get_monotonic_increment (bl);
8798 /* Invalidate givs that fail the tests. */
8799 for (v = bl->giv; v; v = v->next_iv)
8800 if (v->ext_dependent)
8802 if (incr != 0
8803 && extension_within_bounds_p (loop, bl, incr, v->ext_dependent))
8805 if (loop_dump_stream)
8806 fprintf (loop_dump_stream,
8807 "Verified ext dependent giv at %d of reg %d\n",
8808 INSN_UID (v->insn), bl->regno);
8810 else
8812 if (loop_dump_stream)
8813 fprintf (loop_dump_stream,
8814 "Failed ext dependent giv at %d\n",
8815 INSN_UID (v->insn));
8817 v->ignore = 1;
8818 bl->all_reduced = 0;
8823 /* Generate a version of VALUE in a mode appropriate for initializing V. */
8825 static rtx
8826 extend_value_for_giv (struct induction *v, rtx value)
8828 rtx ext_dep = v->ext_dependent;
8830 if (! ext_dep)
8831 return value;
8833 /* Recall that check_ext_dependent_givs verified that the known bounds
8834 of a biv did not overflow or wrap with respect to the extension for
8835 the giv. Therefore, constants need no additional adjustment. */
8836 if (CONSTANT_P (value) && GET_MODE (value) == VOIDmode)
8837 return value;
8839 /* Otherwise, we must adjust the value to compensate for the
8840 differing modes of the biv and the giv. */
8841 return gen_rtx_fmt_e (GET_CODE (ext_dep), GET_MODE (ext_dep), value);
8844 struct combine_givs_stats
8846 int giv_number;
8847 int total_benefit;
8850 static int
8851 cmp_combine_givs_stats (const void *xp, const void *yp)
8853 const struct combine_givs_stats * const x =
8854 (const struct combine_givs_stats *) xp;
8855 const struct combine_givs_stats * const y =
8856 (const struct combine_givs_stats *) yp;
8857 int d;
8858 d = y->total_benefit - x->total_benefit;
8859 /* Stabilize the sort. */
8860 if (!d)
8861 d = x->giv_number - y->giv_number;
8862 return d;
8865 /* Check all pairs of givs for iv_class BL and see if any can be combined with
8866 any other. If so, point SAME to the giv combined with and set NEW_REG to
8867 be an expression (in terms of the other giv's DEST_REG) equivalent to the
8868 giv. Also, update BENEFIT and related fields for cost/benefit analysis. */
8870 static void
8871 combine_givs (struct loop_regs *regs, struct iv_class *bl)
8873 /* Additional benefit to add for being combined multiple times. */
8874 const int extra_benefit = 3;
8876 struct induction *g1, *g2, **giv_array;
8877 int i, j, k, giv_count;
8878 struct combine_givs_stats *stats;
8879 rtx *can_combine;
8881 /* Count givs, because bl->giv_count is incorrect here. */
8882 giv_count = 0;
8883 for (g1 = bl->giv; g1; g1 = g1->next_iv)
8884 if (!g1->ignore)
8885 giv_count++;
8887 giv_array = alloca (giv_count * sizeof (struct induction *));
8888 i = 0;
8889 for (g1 = bl->giv; g1; g1 = g1->next_iv)
8890 if (!g1->ignore)
8891 giv_array[i++] = g1;
8893 stats = xcalloc (giv_count, sizeof (*stats));
8894 can_combine = xcalloc (giv_count, giv_count * sizeof (rtx));
8896 for (i = 0; i < giv_count; i++)
8898 int this_benefit;
8899 rtx single_use;
8901 g1 = giv_array[i];
8902 stats[i].giv_number = i;
8904 /* If a DEST_REG GIV is used only once, do not allow it to combine
8905 with anything, for in doing so we will gain nothing that cannot
8906 be had by simply letting the GIV with which we would have combined
8907 to be reduced on its own. The losage shows up in particular with
8908 DEST_ADDR targets on hosts with reg+reg addressing, though it can
8909 be seen elsewhere as well. */
8910 if (g1->giv_type == DEST_REG
8911 && (single_use = regs->array[REGNO (g1->dest_reg)].single_usage)
8912 && single_use != const0_rtx)
8913 continue;
8915 this_benefit = g1->benefit;
8916 /* Add an additional weight for zero addends. */
8917 if (g1->no_const_addval)
8918 this_benefit += 1;
8920 for (j = 0; j < giv_count; j++)
8922 rtx this_combine;
8924 g2 = giv_array[j];
8925 if (g1 != g2
8926 && (this_combine = combine_givs_p (g1, g2)) != NULL_RTX)
8928 can_combine[i * giv_count + j] = this_combine;
8929 this_benefit += g2->benefit + extra_benefit;
8932 stats[i].total_benefit = this_benefit;
8935 /* Iterate, combining until we can't. */
8936 restart:
8937 qsort (stats, giv_count, sizeof (*stats), cmp_combine_givs_stats);
8939 if (loop_dump_stream)
8941 fprintf (loop_dump_stream, "Sorted combine statistics:\n");
8942 for (k = 0; k < giv_count; k++)
8944 g1 = giv_array[stats[k].giv_number];
8945 if (!g1->combined_with && !g1->same)
8946 fprintf (loop_dump_stream, " {%d, %d}",
8947 INSN_UID (giv_array[stats[k].giv_number]->insn),
8948 stats[k].total_benefit);
8950 putc ('\n', loop_dump_stream);
8953 for (k = 0; k < giv_count; k++)
8955 int g1_add_benefit = 0;
8957 i = stats[k].giv_number;
8958 g1 = giv_array[i];
8960 /* If it has already been combined, skip. */
8961 if (g1->combined_with || g1->same)
8962 continue;
8964 for (j = 0; j < giv_count; j++)
8966 g2 = giv_array[j];
8967 if (g1 != g2 && can_combine[i * giv_count + j]
8968 /* If it has already been combined, skip. */
8969 && ! g2->same && ! g2->combined_with)
8971 int l;
8973 g2->new_reg = can_combine[i * giv_count + j];
8974 g2->same = g1;
8975 /* For destination, we now may replace by mem expression instead
8976 of register. This changes the costs considerably, so add the
8977 compensation. */
8978 if (g2->giv_type == DEST_ADDR)
8979 g2->benefit = (g2->benefit + reg_address_cost
8980 - address_cost (g2->new_reg,
8981 GET_MODE (g2->mem)));
8982 g1->combined_with++;
8983 g1->lifetime += g2->lifetime;
8985 g1_add_benefit += g2->benefit;
8987 /* ??? The new final_[bg]iv_value code does a much better job
8988 of finding replaceable giv's, and hence this code may no
8989 longer be necessary. */
8990 if (! g2->replaceable && REG_USERVAR_P (g2->dest_reg))
8991 g1_add_benefit -= copy_cost;
8993 /* To help optimize the next set of combinations, remove
8994 this giv from the benefits of other potential mates. */
8995 for (l = 0; l < giv_count; ++l)
8997 int m = stats[l].giv_number;
8998 if (can_combine[m * giv_count + j])
8999 stats[l].total_benefit -= g2->benefit + extra_benefit;
9002 if (loop_dump_stream)
9003 fprintf (loop_dump_stream,
9004 "giv at %d combined with giv at %d; new benefit %d + %d, lifetime %d\n",
9005 INSN_UID (g2->insn), INSN_UID (g1->insn),
9006 g1->benefit, g1_add_benefit, g1->lifetime);
9010 /* To help optimize the next set of combinations, remove
9011 this giv from the benefits of other potential mates. */
9012 if (g1->combined_with)
9014 for (j = 0; j < giv_count; ++j)
9016 int m = stats[j].giv_number;
9017 if (can_combine[m * giv_count + i])
9018 stats[j].total_benefit -= g1->benefit + extra_benefit;
9021 g1->benefit += g1_add_benefit;
9023 /* We've finished with this giv, and everything it touched.
9024 Restart the combination so that proper weights for the
9025 rest of the givs are properly taken into account. */
9026 /* ??? Ideally we would compact the arrays at this point, so
9027 as to not cover old ground. But sanely compacting
9028 can_combine is tricky. */
9029 goto restart;
9033 /* Clean up. */
9034 free (stats);
9035 free (can_combine);
9038 /* Generate sequence for REG = B * M + A. B is the initial value of
9039 the basic induction variable, M a multiplicative constant, A an
9040 additive constant and REG the destination register. */
9042 static rtx
9043 gen_add_mult (rtx b, rtx m, rtx a, rtx reg)
9045 rtx seq;
9046 rtx result;
9048 start_sequence ();
9049 /* Use unsigned arithmetic. */
9050 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
9051 if (reg != result)
9052 emit_move_insn (reg, result);
9053 seq = get_insns ();
9054 end_sequence ();
9056 return seq;
9060 /* Update registers created in insn sequence SEQ. */
9062 static void
9063 loop_regs_update (const struct loop *loop ATTRIBUTE_UNUSED, rtx seq)
9065 rtx insn;
9067 /* Update register info for alias analysis. */
9069 insn = seq;
9070 while (insn != NULL_RTX)
9072 rtx set = single_set (insn);
9074 if (set && REG_P (SET_DEST (set)))
9075 record_base_value (REGNO (SET_DEST (set)), SET_SRC (set), 0);
9077 insn = NEXT_INSN (insn);
9082 /* EMIT code before BEFORE_BB/BEFORE_INSN to set REG = B * M + A. B
9083 is the initial value of the basic induction variable, M a
9084 multiplicative constant, A an additive constant and REG the
9085 destination register. */
9087 static void
9088 loop_iv_add_mult_emit_before (const struct loop *loop, rtx b, rtx m, rtx a,
9089 rtx reg, basic_block before_bb, rtx before_insn)
9091 rtx seq;
9093 if (! before_insn)
9095 loop_iv_add_mult_hoist (loop, b, m, a, reg);
9096 return;
9099 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
9100 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
9102 /* Increase the lifetime of any invariants moved further in code. */
9103 update_reg_last_use (a, before_insn);
9104 update_reg_last_use (b, before_insn);
9105 update_reg_last_use (m, before_insn);
9107 /* It is possible that the expansion created lots of new registers.
9108 Iterate over the sequence we just created and record them all. We
9109 must do this before inserting the sequence. */
9110 loop_regs_update (loop, seq);
9112 loop_insn_emit_before (loop, before_bb, before_insn, seq);
9116 /* Emit insns in loop pre-header to set REG = B * M + A. B is the
9117 initial value of the basic induction variable, M a multiplicative
9118 constant, A an additive constant and REG the destination
9119 register. */
9121 static void
9122 loop_iv_add_mult_sink (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
9124 rtx seq;
9126 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
9127 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
9129 /* Increase the lifetime of any invariants moved further in code.
9130 ???? Is this really necessary? */
9131 update_reg_last_use (a, loop->sink);
9132 update_reg_last_use (b, loop->sink);
9133 update_reg_last_use (m, loop->sink);
9135 /* It is possible that the expansion created lots of new registers.
9136 Iterate over the sequence we just created and record them all. We
9137 must do this before inserting the sequence. */
9138 loop_regs_update (loop, seq);
9140 loop_insn_sink (loop, seq);
9144 /* Emit insns after loop to set REG = B * M + A. B is the initial
9145 value of the basic induction variable, M a multiplicative constant,
9146 A an additive constant and REG the destination register. */
9148 static void
9149 loop_iv_add_mult_hoist (const struct loop *loop, rtx b, rtx m, rtx a, rtx reg)
9151 rtx seq;
9153 /* Use copy_rtx to prevent unexpected sharing of these rtx. */
9154 seq = gen_add_mult (copy_rtx (b), copy_rtx (m), copy_rtx (a), reg);
9156 /* It is possible that the expansion created lots of new registers.
9157 Iterate over the sequence we just created and record them all. We
9158 must do this before inserting the sequence. */
9159 loop_regs_update (loop, seq);
9161 loop_insn_hoist (loop, seq);
9166 /* Similar to gen_add_mult, but compute cost rather than generating
9167 sequence. */
9169 static int
9170 iv_add_mult_cost (rtx b, rtx m, rtx a, rtx reg)
9172 int cost = 0;
9173 rtx last, result;
9175 start_sequence ();
9176 result = expand_mult_add (b, reg, m, a, GET_MODE (reg), 1);
9177 if (reg != result)
9178 emit_move_insn (reg, result);
9179 last = get_last_insn ();
9180 while (last)
9182 rtx t = single_set (last);
9183 if (t)
9184 cost += rtx_cost (SET_SRC (t), SET);
9185 last = PREV_INSN (last);
9187 end_sequence ();
9188 return cost;
9191 /* Test whether A * B can be computed without
9192 an actual multiply insn. Value is 1 if so.
9194 ??? This function stinks because it generates a ton of wasted RTL
9195 ??? and as a result fragments GC memory to no end. There are other
9196 ??? places in the compiler which are invoked a lot and do the same
9197 ??? thing, generate wasted RTL just to see if something is possible. */
9199 static int
9200 product_cheap_p (rtx a, rtx b)
9202 rtx tmp;
9203 int win, n_insns;
9205 /* If only one is constant, make it B. */
9206 if (GET_CODE (a) == CONST_INT)
9207 tmp = a, a = b, b = tmp;
9209 /* If first constant, both constant, so don't need multiply. */
9210 if (GET_CODE (a) == CONST_INT)
9211 return 1;
9213 /* If second not constant, neither is constant, so would need multiply. */
9214 if (GET_CODE (b) != CONST_INT)
9215 return 0;
9217 /* One operand is constant, so might not need multiply insn. Generate the
9218 code for the multiply and see if a call or multiply, or long sequence
9219 of insns is generated. */
9221 start_sequence ();
9222 expand_mult (GET_MODE (a), a, b, NULL_RTX, 1);
9223 tmp = get_insns ();
9224 end_sequence ();
9226 win = 1;
9227 if (tmp == NULL_RTX)
9229 else if (INSN_P (tmp))
9231 n_insns = 0;
9232 while (tmp != NULL_RTX)
9234 rtx next = NEXT_INSN (tmp);
9236 if (++n_insns > 3
9237 || !NONJUMP_INSN_P (tmp)
9238 || (GET_CODE (PATTERN (tmp)) == SET
9239 && GET_CODE (SET_SRC (PATTERN (tmp))) == MULT)
9240 || (GET_CODE (PATTERN (tmp)) == PARALLEL
9241 && GET_CODE (XVECEXP (PATTERN (tmp), 0, 0)) == SET
9242 && GET_CODE (SET_SRC (XVECEXP (PATTERN (tmp), 0, 0))) == MULT))
9244 win = 0;
9245 break;
9248 tmp = next;
9251 else if (GET_CODE (tmp) == SET
9252 && GET_CODE (SET_SRC (tmp)) == MULT)
9253 win = 0;
9254 else if (GET_CODE (tmp) == PARALLEL
9255 && GET_CODE (XVECEXP (tmp, 0, 0)) == SET
9256 && GET_CODE (SET_SRC (XVECEXP (tmp, 0, 0))) == MULT)
9257 win = 0;
9259 return win;
9262 /* Check to see if loop can be terminated by a "decrement and branch until
9263 zero" instruction. If so, add a REG_NONNEG note to the branch insn if so.
9264 Also try reversing an increment loop to a decrement loop
9265 to see if the optimization can be performed.
9266 Value is nonzero if optimization was performed. */
9268 /* This is useful even if the architecture doesn't have such an insn,
9269 because it might change a loops which increments from 0 to n to a loop
9270 which decrements from n to 0. A loop that decrements to zero is usually
9271 faster than one that increments from zero. */
9273 /* ??? This could be rewritten to use some of the loop unrolling procedures,
9274 such as approx_final_value, biv_total_increment, loop_iterations, and
9275 final_[bg]iv_value. */
9277 static int
9278 check_dbra_loop (struct loop *loop, int insn_count)
9280 struct loop_info *loop_info = LOOP_INFO (loop);
9281 struct loop_regs *regs = LOOP_REGS (loop);
9282 struct loop_ivs *ivs = LOOP_IVS (loop);
9283 struct iv_class *bl;
9284 rtx reg;
9285 enum machine_mode mode;
9286 rtx jump_label;
9287 rtx final_value;
9288 rtx start_value;
9289 rtx new_add_val;
9290 rtx comparison;
9291 rtx before_comparison;
9292 rtx p;
9293 rtx jump;
9294 rtx first_compare;
9295 int compare_and_branch;
9296 rtx loop_start = loop->start;
9297 rtx loop_end = loop->end;
9299 /* If last insn is a conditional branch, and the insn before tests a
9300 register value, try to optimize it. Otherwise, we can't do anything. */
9302 jump = PREV_INSN (loop_end);
9303 comparison = get_condition_for_loop (loop, jump);
9304 if (comparison == 0)
9305 return 0;
9306 if (!onlyjump_p (jump))
9307 return 0;
9309 /* Try to compute whether the compare/branch at the loop end is one or
9310 two instructions. */
9311 get_condition (jump, &first_compare, false, true);
9312 if (first_compare == jump)
9313 compare_and_branch = 1;
9314 else if (first_compare == prev_nonnote_insn (jump))
9315 compare_and_branch = 2;
9316 else
9317 return 0;
9320 /* If more than one condition is present to control the loop, then
9321 do not proceed, as this function does not know how to rewrite
9322 loop tests with more than one condition.
9324 Look backwards from the first insn in the last comparison
9325 sequence and see if we've got another comparison sequence. */
9327 rtx jump1;
9328 if ((jump1 = prev_nonnote_insn (first_compare))
9329 && JUMP_P (jump1))
9330 return 0;
9333 /* Check all of the bivs to see if the compare uses one of them.
9334 Skip biv's set more than once because we can't guarantee that
9335 it will be zero on the last iteration. Also skip if the biv is
9336 used between its update and the test insn. */
9338 for (bl = ivs->list; bl; bl = bl->next)
9340 if (bl->biv_count == 1
9341 && ! bl->biv->maybe_multiple
9342 && bl->biv->dest_reg == XEXP (comparison, 0)
9343 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
9344 first_compare))
9345 break;
9348 /* Try swapping the comparison to identify a suitable biv. */
9349 if (!bl)
9350 for (bl = ivs->list; bl; bl = bl->next)
9351 if (bl->biv_count == 1
9352 && ! bl->biv->maybe_multiple
9353 && bl->biv->dest_reg == XEXP (comparison, 1)
9354 && ! reg_used_between_p (regno_reg_rtx[bl->regno], bl->biv->insn,
9355 first_compare))
9357 comparison = gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)),
9358 VOIDmode,
9359 XEXP (comparison, 1),
9360 XEXP (comparison, 0));
9361 break;
9364 if (! bl)
9365 return 0;
9367 /* Look for the case where the basic induction variable is always
9368 nonnegative, and equals zero on the last iteration.
9369 In this case, add a reg_note REG_NONNEG, which allows the
9370 m68k DBRA instruction to be used. */
9372 if (((GET_CODE (comparison) == GT && XEXP (comparison, 1) == constm1_rtx)
9373 || (GET_CODE (comparison) == NE && XEXP (comparison, 1) == const0_rtx))
9374 && GET_CODE (bl->biv->add_val) == CONST_INT
9375 && INTVAL (bl->biv->add_val) < 0)
9377 /* Initial value must be greater than 0,
9378 init_val % -dec_value == 0 to ensure that it equals zero on
9379 the last iteration */
9381 if (GET_CODE (bl->initial_value) == CONST_INT
9382 && INTVAL (bl->initial_value) > 0
9383 && (INTVAL (bl->initial_value)
9384 % (-INTVAL (bl->biv->add_val))) == 0)
9386 /* Register always nonnegative, add REG_NOTE to branch. */
9387 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
9388 REG_NOTES (jump)
9389 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
9390 REG_NOTES (jump));
9391 bl->nonneg = 1;
9393 return 1;
9396 /* If the decrement is 1 and the value was tested as >= 0 before
9397 the loop, then we can safely optimize. */
9398 for (p = loop_start; p; p = PREV_INSN (p))
9400 if (LABEL_P (p))
9401 break;
9402 if (!JUMP_P (p))
9403 continue;
9405 before_comparison = get_condition_for_loop (loop, p);
9406 if (before_comparison
9407 && XEXP (before_comparison, 0) == bl->biv->dest_reg
9408 && (GET_CODE (before_comparison) == LT
9409 || GET_CODE (before_comparison) == LTU)
9410 && XEXP (before_comparison, 1) == const0_rtx
9411 && ! reg_set_between_p (bl->biv->dest_reg, p, loop_start)
9412 && INTVAL (bl->biv->add_val) == -1)
9414 if (! find_reg_note (jump, REG_NONNEG, NULL_RTX))
9415 REG_NOTES (jump)
9416 = gen_rtx_EXPR_LIST (REG_NONNEG, bl->biv->dest_reg,
9417 REG_NOTES (jump));
9418 bl->nonneg = 1;
9420 return 1;
9424 else if (GET_CODE (bl->biv->add_val) == CONST_INT
9425 && INTVAL (bl->biv->add_val) > 0)
9427 /* Try to change inc to dec, so can apply above optimization. */
9428 /* Can do this if:
9429 all registers modified are induction variables or invariant,
9430 all memory references have non-overlapping addresses
9431 (obviously true if only one write)
9432 allow 2 insns for the compare/jump at the end of the loop. */
9433 /* Also, we must avoid any instructions which use both the reversed
9434 biv and another biv. Such instructions will fail if the loop is
9435 reversed. We meet this condition by requiring that either
9436 no_use_except_counting is true, or else that there is only
9437 one biv. */
9438 int num_nonfixed_reads = 0;
9439 /* 1 if the iteration var is used only to count iterations. */
9440 int no_use_except_counting = 0;
9441 /* 1 if the loop has no memory store, or it has a single memory store
9442 which is reversible. */
9443 int reversible_mem_store = 1;
9445 if (bl->giv_count == 0
9446 && !loop->exit_count
9447 && !loop_info->has_multiple_exit_targets)
9449 rtx bivreg = regno_reg_rtx[bl->regno];
9450 struct iv_class *blt;
9452 /* If there are no givs for this biv, and the only exit is the
9453 fall through at the end of the loop, then
9454 see if perhaps there are no uses except to count. */
9455 no_use_except_counting = 1;
9456 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
9457 if (INSN_P (p))
9459 rtx set = single_set (p);
9461 if (set && REG_P (SET_DEST (set))
9462 && REGNO (SET_DEST (set)) == bl->regno)
9463 /* An insn that sets the biv is okay. */
9465 else if (!reg_mentioned_p (bivreg, PATTERN (p)))
9466 /* An insn that doesn't mention the biv is okay. */
9468 else if (p == prev_nonnote_insn (prev_nonnote_insn (loop_end))
9469 || p == prev_nonnote_insn (loop_end))
9471 /* If either of these insns uses the biv and sets a pseudo
9472 that has more than one usage, then the biv has uses
9473 other than counting since it's used to derive a value
9474 that is used more than one time. */
9475 note_stores (PATTERN (p), note_set_pseudo_multiple_uses,
9476 regs);
9477 if (regs->multiple_uses)
9479 no_use_except_counting = 0;
9480 break;
9483 else
9485 no_use_except_counting = 0;
9486 break;
9490 /* A biv has uses besides counting if it is used to set
9491 another biv. */
9492 for (blt = ivs->list; blt; blt = blt->next)
9493 if (blt->init_set
9494 && reg_mentioned_p (bivreg, SET_SRC (blt->init_set)))
9496 no_use_except_counting = 0;
9497 break;
9501 if (no_use_except_counting)
9502 /* No need to worry about MEMs. */
9504 else if (loop_info->num_mem_sets <= 1)
9506 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
9507 if (INSN_P (p))
9508 num_nonfixed_reads += count_nonfixed_reads (loop, PATTERN (p));
9510 /* If the loop has a single store, and the destination address is
9511 invariant, then we can't reverse the loop, because this address
9512 might then have the wrong value at loop exit.
9513 This would work if the source was invariant also, however, in that
9514 case, the insn should have been moved out of the loop. */
9516 if (loop_info->num_mem_sets == 1)
9518 struct induction *v;
9520 /* If we could prove that each of the memory locations
9521 written to was different, then we could reverse the
9522 store -- but we don't presently have any way of
9523 knowing that. */
9524 reversible_mem_store = 0;
9526 /* If the store depends on a register that is set after the
9527 store, it depends on the initial value, and is thus not
9528 reversible. */
9529 for (v = bl->giv; reversible_mem_store && v; v = v->next_iv)
9531 if (v->giv_type == DEST_REG
9532 && reg_mentioned_p (v->dest_reg,
9533 PATTERN (loop_info->first_loop_store_insn))
9534 && loop_insn_first_p (loop_info->first_loop_store_insn,
9535 v->insn))
9536 reversible_mem_store = 0;
9540 else
9541 return 0;
9543 /* This code only acts for innermost loops. Also it simplifies
9544 the memory address check by only reversing loops with
9545 zero or one memory access.
9546 Two memory accesses could involve parts of the same array,
9547 and that can't be reversed.
9548 If the biv is used only for counting, than we don't need to worry
9549 about all these things. */
9551 if ((num_nonfixed_reads <= 1
9552 && ! loop_info->has_nonconst_call
9553 && ! loop_info->has_prefetch
9554 && ! loop_info->has_volatile
9555 && reversible_mem_store
9556 && (bl->giv_count + bl->biv_count + loop_info->num_mem_sets
9557 + num_unmoved_movables (loop) + compare_and_branch == insn_count)
9558 && (bl == ivs->list && bl->next == 0))
9559 || (no_use_except_counting && ! loop_info->has_prefetch))
9561 rtx tem;
9563 /* Loop can be reversed. */
9564 if (loop_dump_stream)
9565 fprintf (loop_dump_stream, "Can reverse loop\n");
9567 /* Now check other conditions:
9569 The increment must be a constant, as must the initial value,
9570 and the comparison code must be LT.
9572 This test can probably be improved since +/- 1 in the constant
9573 can be obtained by changing LT to LE and vice versa; this is
9574 confusing. */
9576 if (comparison
9577 /* for constants, LE gets turned into LT */
9578 && (GET_CODE (comparison) == LT
9579 || (GET_CODE (comparison) == LE
9580 && no_use_except_counting)
9581 || GET_CODE (comparison) == LTU))
9583 HOST_WIDE_INT add_val, add_adjust, comparison_val = 0;
9584 rtx initial_value, comparison_value;
9585 int nonneg = 0;
9586 enum rtx_code cmp_code;
9587 int comparison_const_width;
9588 unsigned HOST_WIDE_INT comparison_sign_mask;
9589 bool keep_first_compare;
9591 add_val = INTVAL (bl->biv->add_val);
9592 comparison_value = XEXP (comparison, 1);
9593 if (GET_MODE (comparison_value) == VOIDmode)
9594 comparison_const_width
9595 = GET_MODE_BITSIZE (GET_MODE (XEXP (comparison, 0)));
9596 else
9597 comparison_const_width
9598 = GET_MODE_BITSIZE (GET_MODE (comparison_value));
9599 if (comparison_const_width > HOST_BITS_PER_WIDE_INT)
9600 comparison_const_width = HOST_BITS_PER_WIDE_INT;
9601 comparison_sign_mask
9602 = (unsigned HOST_WIDE_INT) 1 << (comparison_const_width - 1);
9604 /* If the comparison value is not a loop invariant, then we
9605 can not reverse this loop.
9607 ??? If the insns which initialize the comparison value as
9608 a whole compute an invariant result, then we could move
9609 them out of the loop and proceed with loop reversal. */
9610 if (! loop_invariant_p (loop, comparison_value))
9611 return 0;
9613 if (GET_CODE (comparison_value) == CONST_INT)
9614 comparison_val = INTVAL (comparison_value);
9615 initial_value = bl->initial_value;
9617 /* Normalize the initial value if it is an integer and
9618 has no other use except as a counter. This will allow
9619 a few more loops to be reversed. */
9620 if (no_use_except_counting
9621 && GET_CODE (comparison_value) == CONST_INT
9622 && GET_CODE (initial_value) == CONST_INT)
9624 comparison_val = comparison_val - INTVAL (bl->initial_value);
9625 /* The code below requires comparison_val to be a multiple
9626 of add_val in order to do the loop reversal, so
9627 round up comparison_val to a multiple of add_val.
9628 Since comparison_value is constant, we know that the
9629 current comparison code is LT. */
9630 comparison_val = comparison_val + add_val - 1;
9631 comparison_val
9632 -= (unsigned HOST_WIDE_INT) comparison_val % add_val;
9633 /* We postpone overflow checks for COMPARISON_VAL here;
9634 even if there is an overflow, we might still be able to
9635 reverse the loop, if converting the loop exit test to
9636 NE is possible. */
9637 initial_value = const0_rtx;
9640 /* First check if we can do a vanilla loop reversal. */
9641 if (initial_value == const0_rtx
9642 && GET_CODE (comparison_value) == CONST_INT
9643 /* Now do postponed overflow checks on COMPARISON_VAL. */
9644 && ! (((comparison_val - add_val) ^ INTVAL (comparison_value))
9645 & comparison_sign_mask))
9647 /* Register will always be nonnegative, with value
9648 0 on last iteration */
9649 add_adjust = add_val;
9650 nonneg = 1;
9651 cmp_code = GE;
9653 else
9654 return 0;
9656 if (GET_CODE (comparison) == LE)
9657 add_adjust -= add_val;
9659 /* If the initial value is not zero, or if the comparison
9660 value is not an exact multiple of the increment, then we
9661 can not reverse this loop. */
9662 if (initial_value == const0_rtx
9663 && GET_CODE (comparison_value) == CONST_INT)
9665 if (((unsigned HOST_WIDE_INT) comparison_val % add_val) != 0)
9666 return 0;
9668 else
9670 if (! no_use_except_counting || add_val != 1)
9671 return 0;
9674 final_value = comparison_value;
9676 /* Reset these in case we normalized the initial value
9677 and comparison value above. */
9678 if (GET_CODE (comparison_value) == CONST_INT
9679 && GET_CODE (initial_value) == CONST_INT)
9681 comparison_value = GEN_INT (comparison_val);
9682 final_value
9683 = GEN_INT (comparison_val + INTVAL (bl->initial_value));
9685 bl->initial_value = initial_value;
9687 /* Save some info needed to produce the new insns. */
9688 reg = bl->biv->dest_reg;
9689 mode = GET_MODE (reg);
9690 jump_label = condjump_label (PREV_INSN (loop_end));
9691 new_add_val = GEN_INT (-INTVAL (bl->biv->add_val));
9693 /* Set start_value; if this is not a CONST_INT, we need
9694 to generate a SUB.
9695 Initialize biv to start_value before loop start.
9696 The old initializing insn will be deleted as a
9697 dead store by flow.c. */
9698 if (initial_value == const0_rtx
9699 && GET_CODE (comparison_value) == CONST_INT)
9701 start_value
9702 = gen_int_mode (comparison_val - add_adjust, mode);
9703 loop_insn_hoist (loop, gen_move_insn (reg, start_value));
9705 else if (GET_CODE (initial_value) == CONST_INT)
9707 rtx offset = GEN_INT (-INTVAL (initial_value) - add_adjust);
9708 rtx add_insn = gen_add3_insn (reg, comparison_value, offset);
9710 if (add_insn == 0)
9711 return 0;
9713 start_value
9714 = gen_rtx_PLUS (mode, comparison_value, offset);
9715 loop_insn_hoist (loop, add_insn);
9716 if (GET_CODE (comparison) == LE)
9717 final_value = gen_rtx_PLUS (mode, comparison_value,
9718 GEN_INT (add_val));
9720 else if (! add_adjust)
9722 rtx sub_insn = gen_sub3_insn (reg, comparison_value,
9723 initial_value);
9725 if (sub_insn == 0)
9726 return 0;
9727 start_value
9728 = gen_rtx_MINUS (mode, comparison_value, initial_value);
9729 loop_insn_hoist (loop, sub_insn);
9731 else
9732 /* We could handle the other cases too, but it'll be
9733 better to have a testcase first. */
9734 return 0;
9736 /* We may not have a single insn which can increment a reg, so
9737 create a sequence to hold all the insns from expand_inc. */
9738 start_sequence ();
9739 expand_inc (reg, new_add_val);
9740 tem = get_insns ();
9741 end_sequence ();
9743 p = loop_insn_emit_before (loop, 0, bl->biv->insn, tem);
9744 delete_insn (bl->biv->insn);
9746 /* Update biv info to reflect its new status. */
9747 bl->biv->insn = p;
9748 bl->initial_value = start_value;
9749 bl->biv->add_val = new_add_val;
9751 /* Update loop info. */
9752 loop_info->initial_value = reg;
9753 loop_info->initial_equiv_value = reg;
9754 loop_info->final_value = const0_rtx;
9755 loop_info->final_equiv_value = const0_rtx;
9756 loop_info->comparison_value = const0_rtx;
9757 loop_info->comparison_code = cmp_code;
9758 loop_info->increment = new_add_val;
9760 /* Inc LABEL_NUSES so that delete_insn will
9761 not delete the label. */
9762 LABEL_NUSES (XEXP (jump_label, 0))++;
9764 /* If we have a separate comparison insn that does more
9765 than just set cc0, the result of the comparison might
9766 be used outside the loop. */
9767 keep_first_compare = (compare_and_branch == 2
9768 #ifdef HAVE_CC0
9769 && sets_cc0_p (first_compare) <= 0
9770 #endif
9773 /* Emit an insn after the end of the loop to set the biv's
9774 proper exit value if it is used anywhere outside the loop. */
9775 if (keep_first_compare
9776 || (REGNO_LAST_UID (bl->regno) != INSN_UID (first_compare))
9777 || ! bl->init_insn
9778 || REGNO_FIRST_UID (bl->regno) != INSN_UID (bl->init_insn))
9779 loop_insn_sink (loop, gen_load_of_final_value (reg, final_value));
9781 if (keep_first_compare)
9782 loop_insn_sink (loop, PATTERN (first_compare));
9784 /* Delete compare/branch at end of loop. */
9785 delete_related_insns (PREV_INSN (loop_end));
9786 if (compare_and_branch == 2)
9787 delete_related_insns (first_compare);
9789 /* Add new compare/branch insn at end of loop. */
9790 start_sequence ();
9791 emit_cmp_and_jump_insns (reg, const0_rtx, cmp_code, NULL_RTX,
9792 mode, 0,
9793 XEXP (jump_label, 0));
9794 tem = get_insns ();
9795 end_sequence ();
9796 emit_jump_insn_before (tem, loop_end);
9798 for (tem = PREV_INSN (loop_end);
9799 tem && !JUMP_P (tem);
9800 tem = PREV_INSN (tem))
9803 if (tem)
9804 JUMP_LABEL (tem) = XEXP (jump_label, 0);
9806 if (nonneg)
9808 if (tem)
9810 /* Increment of LABEL_NUSES done above. */
9811 /* Register is now always nonnegative,
9812 so add REG_NONNEG note to the branch. */
9813 REG_NOTES (tem) = gen_rtx_EXPR_LIST (REG_NONNEG, reg,
9814 REG_NOTES (tem));
9816 bl->nonneg = 1;
9819 /* No insn may reference both the reversed and another biv or it
9820 will fail (see comment near the top of the loop reversal
9821 code).
9822 Earlier on, we have verified that the biv has no use except
9823 counting, or it is the only biv in this function.
9824 However, the code that computes no_use_except_counting does
9825 not verify reg notes. It's possible to have an insn that
9826 references another biv, and has a REG_EQUAL note with an
9827 expression based on the reversed biv. To avoid this case,
9828 remove all REG_EQUAL notes based on the reversed biv
9829 here. */
9830 for (p = loop_start; p != loop_end; p = NEXT_INSN (p))
9831 if (INSN_P (p))
9833 rtx *pnote;
9834 rtx set = single_set (p);
9835 /* If this is a set of a GIV based on the reversed biv, any
9836 REG_EQUAL notes should still be correct. */
9837 if (! set
9838 || !REG_P (SET_DEST (set))
9839 || (size_t) REGNO (SET_DEST (set)) >= ivs->n_regs
9840 || REG_IV_TYPE (ivs, REGNO (SET_DEST (set))) != GENERAL_INDUCT
9841 || REG_IV_INFO (ivs, REGNO (SET_DEST (set)))->src_reg != bl->biv->src_reg)
9842 for (pnote = &REG_NOTES (p); *pnote;)
9844 if (REG_NOTE_KIND (*pnote) == REG_EQUAL
9845 && reg_mentioned_p (regno_reg_rtx[bl->regno],
9846 XEXP (*pnote, 0)))
9847 *pnote = XEXP (*pnote, 1);
9848 else
9849 pnote = &XEXP (*pnote, 1);
9853 /* Mark that this biv has been reversed. Each giv which depends
9854 on this biv, and which is also live past the end of the loop
9855 will have to be fixed up. */
9857 bl->reversed = 1;
9859 if (loop_dump_stream)
9861 fprintf (loop_dump_stream, "Reversed loop");
9862 if (bl->nonneg)
9863 fprintf (loop_dump_stream, " and added reg_nonneg\n");
9864 else
9865 fprintf (loop_dump_stream, "\n");
9868 return 1;
9873 return 0;
9876 /* Verify whether the biv BL appears to be eliminable,
9877 based on the insns in the loop that refer to it.
9879 If ELIMINATE_P is nonzero, actually do the elimination.
9881 THRESHOLD and INSN_COUNT are from loop_optimize and are used to
9882 determine whether invariant insns should be placed inside or at the
9883 start of the loop. */
9885 static int
9886 maybe_eliminate_biv (const struct loop *loop, struct iv_class *bl,
9887 int eliminate_p, int threshold, int insn_count)
9889 struct loop_ivs *ivs = LOOP_IVS (loop);
9890 rtx reg = bl->biv->dest_reg;
9891 rtx p;
9893 /* Scan all insns in the loop, stopping if we find one that uses the
9894 biv in a way that we cannot eliminate. */
9896 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
9898 enum rtx_code code = GET_CODE (p);
9899 basic_block where_bb = 0;
9900 rtx where_insn = threshold >= insn_count ? 0 : p;
9901 rtx note;
9903 /* If this is a libcall that sets a giv, skip ahead to its end. */
9904 if (INSN_P (p))
9906 note = find_reg_note (p, REG_LIBCALL, NULL_RTX);
9908 if (note)
9910 rtx last = XEXP (note, 0);
9911 rtx set = single_set (last);
9913 if (set && REG_P (SET_DEST (set)))
9915 unsigned int regno = REGNO (SET_DEST (set));
9917 if (regno < ivs->n_regs
9918 && REG_IV_TYPE (ivs, regno) == GENERAL_INDUCT
9919 && REG_IV_INFO (ivs, regno)->src_reg == bl->biv->src_reg)
9920 p = last;
9925 /* Closely examine the insn if the biv is mentioned. */
9926 if ((code == INSN || code == JUMP_INSN || code == CALL_INSN)
9927 && reg_mentioned_p (reg, PATTERN (p))
9928 && ! maybe_eliminate_biv_1 (loop, PATTERN (p), p, bl,
9929 eliminate_p, where_bb, where_insn))
9931 if (loop_dump_stream)
9932 fprintf (loop_dump_stream,
9933 "Cannot eliminate biv %d: biv used in insn %d.\n",
9934 bl->regno, INSN_UID (p));
9935 break;
9938 /* If we are eliminating, kill REG_EQUAL notes mentioning the biv. */
9939 if (eliminate_p
9940 && (note = find_reg_note (p, REG_EQUAL, NULL_RTX)) != NULL_RTX
9941 && reg_mentioned_p (reg, XEXP (note, 0)))
9942 remove_note (p, note);
9945 if (p == loop->end)
9947 if (loop_dump_stream)
9948 fprintf (loop_dump_stream, "biv %d %s eliminated.\n",
9949 bl->regno, eliminate_p ? "was" : "can be");
9950 return 1;
9953 return 0;
9956 /* INSN and REFERENCE are instructions in the same insn chain.
9957 Return nonzero if INSN is first. */
9959 static int
9960 loop_insn_first_p (rtx insn, rtx reference)
9962 rtx p, q;
9964 for (p = insn, q = reference;;)
9966 /* Start with test for not first so that INSN == REFERENCE yields not
9967 first. */
9968 if (q == insn || ! p)
9969 return 0;
9970 if (p == reference || ! q)
9971 return 1;
9973 /* Either of P or Q might be a NOTE. Notes have the same LUID as the
9974 previous insn, hence the <= comparison below does not work if
9975 P is a note. */
9976 if (INSN_UID (p) < max_uid_for_loop
9977 && INSN_UID (q) < max_uid_for_loop
9978 && !NOTE_P (p))
9979 return INSN_LUID (p) <= INSN_LUID (q);
9981 if (INSN_UID (p) >= max_uid_for_loop
9982 || NOTE_P (p))
9983 p = NEXT_INSN (p);
9984 if (INSN_UID (q) >= max_uid_for_loop)
9985 q = NEXT_INSN (q);
9989 /* We are trying to eliminate BIV in INSN using GIV. Return nonzero if
9990 the offset that we have to take into account due to auto-increment /
9991 div derivation is zero. */
9992 static int
9993 biv_elimination_giv_has_0_offset (struct induction *biv,
9994 struct induction *giv, rtx insn)
9996 /* If the giv V had the auto-inc address optimization applied
9997 to it, and INSN occurs between the giv insn and the biv
9998 insn, then we'd have to adjust the value used here.
9999 This is rare, so we don't bother to make this possible. */
10000 if (giv->auto_inc_opt
10001 && ((loop_insn_first_p (giv->insn, insn)
10002 && loop_insn_first_p (insn, biv->insn))
10003 || (loop_insn_first_p (biv->insn, insn)
10004 && loop_insn_first_p (insn, giv->insn))))
10005 return 0;
10007 return 1;
10010 /* If BL appears in X (part of the pattern of INSN), see if we can
10011 eliminate its use. If so, return 1. If not, return 0.
10013 If BIV does not appear in X, return 1.
10015 If ELIMINATE_P is nonzero, actually do the elimination.
10016 WHERE_INSN/WHERE_BB indicate where extra insns should be added.
10017 Depending on how many items have been moved out of the loop, it
10018 will either be before INSN (when WHERE_INSN is nonzero) or at the
10019 start of the loop (when WHERE_INSN is zero). */
10021 static int
10022 maybe_eliminate_biv_1 (const struct loop *loop, rtx x, rtx insn,
10023 struct iv_class *bl, int eliminate_p,
10024 basic_block where_bb, rtx where_insn)
10026 enum rtx_code code = GET_CODE (x);
10027 rtx reg = bl->biv->dest_reg;
10028 enum machine_mode mode = GET_MODE (reg);
10029 struct induction *v;
10030 rtx arg, tem;
10031 #ifdef HAVE_cc0
10032 rtx new;
10033 #endif
10034 int arg_operand;
10035 const char *fmt;
10036 int i, j;
10038 switch (code)
10040 case REG:
10041 /* If we haven't already been able to do something with this BIV,
10042 we can't eliminate it. */
10043 if (x == reg)
10044 return 0;
10045 return 1;
10047 case SET:
10048 /* If this sets the BIV, it is not a problem. */
10049 if (SET_DEST (x) == reg)
10050 return 1;
10052 /* If this is an insn that defines a giv, it is also ok because
10053 it will go away when the giv is reduced. */
10054 for (v = bl->giv; v; v = v->next_iv)
10055 if (v->giv_type == DEST_REG && SET_DEST (x) == v->dest_reg)
10056 return 1;
10058 #ifdef HAVE_cc0
10059 if (SET_DEST (x) == cc0_rtx && SET_SRC (x) == reg)
10061 /* Can replace with any giv that was reduced and
10062 that has (MULT_VAL != 0) and (ADD_VAL == 0).
10063 Require a constant for MULT_VAL, so we know it's nonzero.
10064 ??? We disable this optimization to avoid potential
10065 overflows. */
10067 for (v = bl->giv; v; v = v->next_iv)
10068 if (GET_CODE (v->mult_val) == CONST_INT && v->mult_val != const0_rtx
10069 && v->add_val == const0_rtx
10070 && ! v->ignore && ! v->maybe_dead && v->always_computable
10071 && v->mode == mode
10072 && 0)
10074 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10075 continue;
10077 if (! eliminate_p)
10078 return 1;
10080 /* If the giv has the opposite direction of change,
10081 then reverse the comparison. */
10082 if (INTVAL (v->mult_val) < 0)
10083 new = gen_rtx_COMPARE (GET_MODE (v->new_reg),
10084 const0_rtx, v->new_reg);
10085 else
10086 new = v->new_reg;
10088 /* We can probably test that giv's reduced reg. */
10089 if (validate_change (insn, &SET_SRC (x), new, 0))
10090 return 1;
10093 /* Look for a giv with (MULT_VAL != 0) and (ADD_VAL != 0);
10094 replace test insn with a compare insn (cmp REDUCED_GIV ADD_VAL).
10095 Require a constant for MULT_VAL, so we know it's nonzero.
10096 ??? Do this only if ADD_VAL is a pointer to avoid a potential
10097 overflow problem. */
10099 for (v = bl->giv; v; v = v->next_iv)
10100 if (GET_CODE (v->mult_val) == CONST_INT
10101 && v->mult_val != const0_rtx
10102 && ! v->ignore && ! v->maybe_dead && v->always_computable
10103 && v->mode == mode
10104 && (GET_CODE (v->add_val) == SYMBOL_REF
10105 || GET_CODE (v->add_val) == LABEL_REF
10106 || GET_CODE (v->add_val) == CONST
10107 || (REG_P (v->add_val)
10108 && REG_POINTER (v->add_val))))
10110 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10111 continue;
10113 if (! eliminate_p)
10114 return 1;
10116 /* If the giv has the opposite direction of change,
10117 then reverse the comparison. */
10118 if (INTVAL (v->mult_val) < 0)
10119 new = gen_rtx_COMPARE (VOIDmode, copy_rtx (v->add_val),
10120 v->new_reg);
10121 else
10122 new = gen_rtx_COMPARE (VOIDmode, v->new_reg,
10123 copy_rtx (v->add_val));
10125 /* Replace biv with the giv's reduced register. */
10126 update_reg_last_use (v->add_val, insn);
10127 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
10128 return 1;
10130 /* Insn doesn't support that constant or invariant. Copy it
10131 into a register (it will be a loop invariant.) */
10132 tem = gen_reg_rtx (GET_MODE (v->new_reg));
10134 loop_insn_emit_before (loop, 0, where_insn,
10135 gen_move_insn (tem,
10136 copy_rtx (v->add_val)));
10138 /* Substitute the new register for its invariant value in
10139 the compare expression. */
10140 XEXP (new, (INTVAL (v->mult_val) < 0) ? 0 : 1) = tem;
10141 if (validate_change (insn, &SET_SRC (PATTERN (insn)), new, 0))
10142 return 1;
10145 #endif
10146 break;
10148 case COMPARE:
10149 case EQ: case NE:
10150 case GT: case GE: case GTU: case GEU:
10151 case LT: case LE: case LTU: case LEU:
10152 /* See if either argument is the biv. */
10153 if (XEXP (x, 0) == reg)
10154 arg = XEXP (x, 1), arg_operand = 1;
10155 else if (XEXP (x, 1) == reg)
10156 arg = XEXP (x, 0), arg_operand = 0;
10157 else
10158 break;
10160 if (CONSTANT_P (arg))
10162 /* First try to replace with any giv that has constant positive
10163 mult_val and constant add_val. We might be able to support
10164 negative mult_val, but it seems complex to do it in general. */
10166 for (v = bl->giv; v; v = v->next_iv)
10167 if (GET_CODE (v->mult_val) == CONST_INT
10168 && INTVAL (v->mult_val) > 0
10169 && (GET_CODE (v->add_val) == SYMBOL_REF
10170 || GET_CODE (v->add_val) == LABEL_REF
10171 || GET_CODE (v->add_val) == CONST
10172 || (REG_P (v->add_val)
10173 && REG_POINTER (v->add_val)))
10174 && ! v->ignore && ! v->maybe_dead && v->always_computable
10175 && v->mode == mode)
10177 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10178 continue;
10180 /* Don't eliminate if the linear combination that makes up
10181 the giv overflows when it is applied to ARG. */
10182 if (GET_CODE (arg) == CONST_INT)
10184 rtx add_val;
10186 if (GET_CODE (v->add_val) == CONST_INT)
10187 add_val = v->add_val;
10188 else
10189 add_val = const0_rtx;
10191 if (const_mult_add_overflow_p (arg, v->mult_val,
10192 add_val, mode, 1))
10193 continue;
10196 if (! eliminate_p)
10197 return 1;
10199 /* Replace biv with the giv's reduced reg. */
10200 validate_change (insn, &XEXP (x, 1 - arg_operand), v->new_reg, 1);
10202 /* If all constants are actually constant integers and
10203 the derived constant can be directly placed in the COMPARE,
10204 do so. */
10205 if (GET_CODE (arg) == CONST_INT
10206 && GET_CODE (v->add_val) == CONST_INT)
10208 tem = expand_mult_add (arg, NULL_RTX, v->mult_val,
10209 v->add_val, mode, 1);
10211 else
10213 /* Otherwise, load it into a register. */
10214 tem = gen_reg_rtx (mode);
10215 loop_iv_add_mult_emit_before (loop, arg,
10216 v->mult_val, v->add_val,
10217 tem, where_bb, where_insn);
10220 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
10222 if (apply_change_group ())
10223 return 1;
10226 /* Look for giv with positive constant mult_val and nonconst add_val.
10227 Insert insns to calculate new compare value.
10228 ??? Turn this off due to possible overflow. */
10230 for (v = bl->giv; v; v = v->next_iv)
10231 if (GET_CODE (v->mult_val) == CONST_INT
10232 && INTVAL (v->mult_val) > 0
10233 && ! v->ignore && ! v->maybe_dead && v->always_computable
10234 && v->mode == mode
10235 && 0)
10237 rtx tem;
10239 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10240 continue;
10242 if (! eliminate_p)
10243 return 1;
10245 tem = gen_reg_rtx (mode);
10247 /* Replace biv with giv's reduced register. */
10248 validate_change (insn, &XEXP (x, 1 - arg_operand),
10249 v->new_reg, 1);
10251 /* Compute value to compare against. */
10252 loop_iv_add_mult_emit_before (loop, arg,
10253 v->mult_val, v->add_val,
10254 tem, where_bb, where_insn);
10255 /* Use it in this insn. */
10256 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
10257 if (apply_change_group ())
10258 return 1;
10261 else if (REG_P (arg) || MEM_P (arg))
10263 if (loop_invariant_p (loop, arg) == 1)
10265 /* Look for giv with constant positive mult_val and nonconst
10266 add_val. Insert insns to compute new compare value.
10267 ??? Turn this off due to possible overflow. */
10269 for (v = bl->giv; v; v = v->next_iv)
10270 if (GET_CODE (v->mult_val) == CONST_INT && INTVAL (v->mult_val) > 0
10271 && ! v->ignore && ! v->maybe_dead && v->always_computable
10272 && v->mode == mode
10273 && 0)
10275 rtx tem;
10277 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10278 continue;
10280 if (! eliminate_p)
10281 return 1;
10283 tem = gen_reg_rtx (mode);
10285 /* Replace biv with giv's reduced register. */
10286 validate_change (insn, &XEXP (x, 1 - arg_operand),
10287 v->new_reg, 1);
10289 /* Compute value to compare against. */
10290 loop_iv_add_mult_emit_before (loop, arg,
10291 v->mult_val, v->add_val,
10292 tem, where_bb, where_insn);
10293 validate_change (insn, &XEXP (x, arg_operand), tem, 1);
10294 if (apply_change_group ())
10295 return 1;
10299 /* This code has problems. Basically, you can't know when
10300 seeing if we will eliminate BL, whether a particular giv
10301 of ARG will be reduced. If it isn't going to be reduced,
10302 we can't eliminate BL. We can try forcing it to be reduced,
10303 but that can generate poor code.
10305 The problem is that the benefit of reducing TV, below should
10306 be increased if BL can actually be eliminated, but this means
10307 we might have to do a topological sort of the order in which
10308 we try to process biv. It doesn't seem worthwhile to do
10309 this sort of thing now. */
10311 #if 0
10312 /* Otherwise the reg compared with had better be a biv. */
10313 if (!REG_P (arg)
10314 || REG_IV_TYPE (ivs, REGNO (arg)) != BASIC_INDUCT)
10315 return 0;
10317 /* Look for a pair of givs, one for each biv,
10318 with identical coefficients. */
10319 for (v = bl->giv; v; v = v->next_iv)
10321 struct induction *tv;
10323 if (v->ignore || v->maybe_dead || v->mode != mode)
10324 continue;
10326 for (tv = REG_IV_CLASS (ivs, REGNO (arg))->giv; tv;
10327 tv = tv->next_iv)
10328 if (! tv->ignore && ! tv->maybe_dead
10329 && rtx_equal_p (tv->mult_val, v->mult_val)
10330 && rtx_equal_p (tv->add_val, v->add_val)
10331 && tv->mode == mode)
10333 if (! biv_elimination_giv_has_0_offset (bl->biv, v, insn))
10334 continue;
10336 if (! eliminate_p)
10337 return 1;
10339 /* Replace biv with its giv's reduced reg. */
10340 XEXP (x, 1 - arg_operand) = v->new_reg;
10341 /* Replace other operand with the other giv's
10342 reduced reg. */
10343 XEXP (x, arg_operand) = tv->new_reg;
10344 return 1;
10347 #endif
10350 /* If we get here, the biv can't be eliminated. */
10351 return 0;
10353 case MEM:
10354 /* If this address is a DEST_ADDR giv, it doesn't matter if the
10355 biv is used in it, since it will be replaced. */
10356 for (v = bl->giv; v; v = v->next_iv)
10357 if (v->giv_type == DEST_ADDR && v->location == &XEXP (x, 0))
10358 return 1;
10359 break;
10361 default:
10362 break;
10365 /* See if any subexpression fails elimination. */
10366 fmt = GET_RTX_FORMAT (code);
10367 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
10369 switch (fmt[i])
10371 case 'e':
10372 if (! maybe_eliminate_biv_1 (loop, XEXP (x, i), insn, bl,
10373 eliminate_p, where_bb, where_insn))
10374 return 0;
10375 break;
10377 case 'E':
10378 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10379 if (! maybe_eliminate_biv_1 (loop, XVECEXP (x, i, j), insn, bl,
10380 eliminate_p, where_bb, where_insn))
10381 return 0;
10382 break;
10386 return 1;
10389 /* Return nonzero if the last use of REG
10390 is in an insn following INSN in the same basic block. */
10392 static int
10393 last_use_this_basic_block (rtx reg, rtx insn)
10395 rtx n;
10396 for (n = insn;
10397 n && !LABEL_P (n) && !JUMP_P (n);
10398 n = NEXT_INSN (n))
10400 if (REGNO_LAST_UID (REGNO (reg)) == INSN_UID (n))
10401 return 1;
10403 return 0;
10406 /* Called via `note_stores' to record the initial value of a biv. Here we
10407 just record the location of the set and process it later. */
10409 static void
10410 record_initial (rtx dest, rtx set, void *data ATTRIBUTE_UNUSED)
10412 struct loop_ivs *ivs = (struct loop_ivs *) data;
10413 struct iv_class *bl;
10415 if (!REG_P (dest)
10416 || REGNO (dest) >= ivs->n_regs
10417 || REG_IV_TYPE (ivs, REGNO (dest)) != BASIC_INDUCT)
10418 return;
10420 bl = REG_IV_CLASS (ivs, REGNO (dest));
10422 /* If this is the first set found, record it. */
10423 if (bl->init_insn == 0)
10425 bl->init_insn = note_insn;
10426 bl->init_set = set;
10430 /* If any of the registers in X are "old" and currently have a last use earlier
10431 than INSN, update them to have a last use of INSN. Their actual last use
10432 will be the previous insn but it will not have a valid uid_luid so we can't
10433 use it. X must be a source expression only. */
10435 static void
10436 update_reg_last_use (rtx x, rtx insn)
10438 /* Check for the case where INSN does not have a valid luid. In this case,
10439 there is no need to modify the regno_last_uid, as this can only happen
10440 when code is inserted after the loop_end to set a pseudo's final value,
10441 and hence this insn will never be the last use of x.
10442 ???? This comment is not correct. See for example loop_givs_reduce.
10443 This may insert an insn before another new insn. */
10444 if (REG_P (x) && REGNO (x) < max_reg_before_loop
10445 && INSN_UID (insn) < max_uid_for_loop
10446 && REGNO_LAST_LUID (REGNO (x)) < INSN_LUID (insn))
10448 REGNO_LAST_UID (REGNO (x)) = INSN_UID (insn);
10450 else
10452 int i, j;
10453 const char *fmt = GET_RTX_FORMAT (GET_CODE (x));
10454 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10456 if (fmt[i] == 'e')
10457 update_reg_last_use (XEXP (x, i), insn);
10458 else if (fmt[i] == 'E')
10459 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10460 update_reg_last_use (XVECEXP (x, i, j), insn);
10465 /* Similar to rtlanal.c:get_condition, except that we also put an
10466 invariant last unless both operands are invariants. */
10468 static rtx
10469 get_condition_for_loop (const struct loop *loop, rtx x)
10471 rtx comparison = get_condition (x, (rtx*) 0, false, true);
10473 if (comparison == 0
10474 || ! loop_invariant_p (loop, XEXP (comparison, 0))
10475 || loop_invariant_p (loop, XEXP (comparison, 1)))
10476 return comparison;
10478 return gen_rtx_fmt_ee (swap_condition (GET_CODE (comparison)), VOIDmode,
10479 XEXP (comparison, 1), XEXP (comparison, 0));
10482 /* Scan the function and determine whether it has indirect (computed) jumps.
10484 This is taken mostly from flow.c; similar code exists elsewhere
10485 in the compiler. It may be useful to put this into rtlanal.c. */
10486 static int
10487 indirect_jump_in_function_p (rtx start)
10489 rtx insn;
10491 for (insn = start; insn; insn = NEXT_INSN (insn))
10492 if (computed_jump_p (insn))
10493 return 1;
10495 return 0;
10498 /* Add MEM to the LOOP_MEMS array, if appropriate. See the
10499 documentation for LOOP_MEMS for the definition of `appropriate'.
10500 This function is called from prescan_loop via for_each_rtx. */
10502 static int
10503 insert_loop_mem (rtx *mem, void *data ATTRIBUTE_UNUSED)
10505 struct loop_info *loop_info = data;
10506 int i;
10507 rtx m = *mem;
10509 if (m == NULL_RTX)
10510 return 0;
10512 switch (GET_CODE (m))
10514 case MEM:
10515 break;
10517 case CLOBBER:
10518 /* We're not interested in MEMs that are only clobbered. */
10519 return -1;
10521 case CONST_DOUBLE:
10522 /* We're not interested in the MEM associated with a
10523 CONST_DOUBLE, so there's no need to traverse into this. */
10524 return -1;
10526 case EXPR_LIST:
10527 /* We're not interested in any MEMs that only appear in notes. */
10528 return -1;
10530 default:
10531 /* This is not a MEM. */
10532 return 0;
10535 /* See if we've already seen this MEM. */
10536 for (i = 0; i < loop_info->mems_idx; ++i)
10537 if (rtx_equal_p (m, loop_info->mems[i].mem))
10539 if (MEM_VOLATILE_P (m) && !MEM_VOLATILE_P (loop_info->mems[i].mem))
10540 loop_info->mems[i].mem = m;
10541 if (GET_MODE (m) != GET_MODE (loop_info->mems[i].mem))
10542 /* The modes of the two memory accesses are different. If
10543 this happens, something tricky is going on, and we just
10544 don't optimize accesses to this MEM. */
10545 loop_info->mems[i].optimize = 0;
10547 return 0;
10550 /* Resize the array, if necessary. */
10551 if (loop_info->mems_idx == loop_info->mems_allocated)
10553 if (loop_info->mems_allocated != 0)
10554 loop_info->mems_allocated *= 2;
10555 else
10556 loop_info->mems_allocated = 32;
10558 loop_info->mems = xrealloc (loop_info->mems,
10559 loop_info->mems_allocated * sizeof (loop_mem_info));
10562 /* Actually insert the MEM. */
10563 loop_info->mems[loop_info->mems_idx].mem = m;
10564 /* We can't hoist this MEM out of the loop if it's a BLKmode MEM
10565 because we can't put it in a register. We still store it in the
10566 table, though, so that if we see the same address later, but in a
10567 non-BLK mode, we'll not think we can optimize it at that point. */
10568 loop_info->mems[loop_info->mems_idx].optimize = (GET_MODE (m) != BLKmode);
10569 loop_info->mems[loop_info->mems_idx].reg = NULL_RTX;
10570 ++loop_info->mems_idx;
10572 return 0;
10576 /* Allocate REGS->ARRAY or reallocate it if it is too small.
10578 Increment REGS->ARRAY[I].SET_IN_LOOP at the index I of each
10579 register that is modified by an insn between FROM and TO. If the
10580 value of an element of REGS->array[I].SET_IN_LOOP becomes 127 or
10581 more, stop incrementing it, to avoid overflow.
10583 Store in REGS->ARRAY[I].SINGLE_USAGE the single insn in which
10584 register I is used, if it is only used once. Otherwise, it is set
10585 to 0 (for no uses) or const0_rtx for more than one use. This
10586 parameter may be zero, in which case this processing is not done.
10588 Set REGS->ARRAY[I].MAY_NOT_OPTIMIZE nonzero if we should not
10589 optimize register I. */
10591 static void
10592 loop_regs_scan (const struct loop *loop, int extra_size)
10594 struct loop_regs *regs = LOOP_REGS (loop);
10595 int old_nregs;
10596 /* last_set[n] is nonzero iff reg n has been set in the current
10597 basic block. In that case, it is the insn that last set reg n. */
10598 rtx *last_set;
10599 rtx insn;
10600 int i;
10602 old_nregs = regs->num;
10603 regs->num = max_reg_num ();
10605 /* Grow the regs array if not allocated or too small. */
10606 if (regs->num >= regs->size)
10608 regs->size = regs->num + extra_size;
10610 regs->array = xrealloc (regs->array, regs->size * sizeof (*regs->array));
10612 /* Zero the new elements. */
10613 memset (regs->array + old_nregs, 0,
10614 (regs->size - old_nregs) * sizeof (*regs->array));
10617 /* Clear previously scanned fields but do not clear n_times_set. */
10618 for (i = 0; i < old_nregs; i++)
10620 regs->array[i].set_in_loop = 0;
10621 regs->array[i].may_not_optimize = 0;
10622 regs->array[i].single_usage = NULL_RTX;
10625 last_set = xcalloc (regs->num, sizeof (rtx));
10627 /* Scan the loop, recording register usage. */
10628 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
10629 insn = NEXT_INSN (insn))
10631 if (INSN_P (insn))
10633 /* Record registers that have exactly one use. */
10634 find_single_use_in_loop (regs, insn, PATTERN (insn));
10636 /* Include uses in REG_EQUAL notes. */
10637 if (REG_NOTES (insn))
10638 find_single_use_in_loop (regs, insn, REG_NOTES (insn));
10640 if (GET_CODE (PATTERN (insn)) == SET
10641 || GET_CODE (PATTERN (insn)) == CLOBBER)
10642 count_one_set (regs, insn, PATTERN (insn), last_set);
10643 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
10645 int i;
10646 for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
10647 count_one_set (regs, insn, XVECEXP (PATTERN (insn), 0, i),
10648 last_set);
10652 if (LABEL_P (insn) || JUMP_P (insn))
10653 memset (last_set, 0, regs->num * sizeof (rtx));
10655 /* Invalidate all registers used for function argument passing.
10656 We check rtx_varies_p for the same reason as below, to allow
10657 optimizing PIC calculations. */
10658 if (CALL_P (insn))
10660 rtx link;
10661 for (link = CALL_INSN_FUNCTION_USAGE (insn);
10662 link;
10663 link = XEXP (link, 1))
10665 rtx op, reg;
10667 if (GET_CODE (op = XEXP (link, 0)) == USE
10668 && REG_P (reg = XEXP (op, 0))
10669 && rtx_varies_p (reg, 1))
10670 regs->array[REGNO (reg)].may_not_optimize = 1;
10675 /* Invalidate all hard registers clobbered by calls. With one exception:
10676 a call-clobbered PIC register is still function-invariant for our
10677 purposes, since we can hoist any PIC calculations out of the loop.
10678 Thus the call to rtx_varies_p. */
10679 if (LOOP_INFO (loop)->has_call)
10680 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
10681 if (TEST_HARD_REG_BIT (regs_invalidated_by_call, i)
10682 && rtx_varies_p (regno_reg_rtx[i], 1))
10684 regs->array[i].may_not_optimize = 1;
10685 regs->array[i].set_in_loop = 1;
10688 #ifdef AVOID_CCMODE_COPIES
10689 /* Don't try to move insns which set CC registers if we should not
10690 create CCmode register copies. */
10691 for (i = regs->num - 1; i >= FIRST_PSEUDO_REGISTER; i--)
10692 if (GET_MODE_CLASS (GET_MODE (regno_reg_rtx[i])) == MODE_CC)
10693 regs->array[i].may_not_optimize = 1;
10694 #endif
10696 /* Set regs->array[I].n_times_set for the new registers. */
10697 for (i = old_nregs; i < regs->num; i++)
10698 regs->array[i].n_times_set = regs->array[i].set_in_loop;
10700 free (last_set);
10703 /* Returns the number of real INSNs in the LOOP. */
10705 static int
10706 count_insns_in_loop (const struct loop *loop)
10708 int count = 0;
10709 rtx insn;
10711 for (insn = loop->top ? loop->top : loop->start; insn != loop->end;
10712 insn = NEXT_INSN (insn))
10713 if (INSN_P (insn))
10714 ++count;
10716 return count;
10719 /* Move MEMs into registers for the duration of the loop. */
10721 static void
10722 load_mems (const struct loop *loop)
10724 struct loop_info *loop_info = LOOP_INFO (loop);
10725 struct loop_regs *regs = LOOP_REGS (loop);
10726 int maybe_never = 0;
10727 int i;
10728 rtx p, prev_ebb_head;
10729 rtx label = NULL_RTX;
10730 rtx end_label;
10731 /* Nonzero if the next instruction may never be executed. */
10732 int next_maybe_never = 0;
10733 unsigned int last_max_reg = max_reg_num ();
10735 if (loop_info->mems_idx == 0)
10736 return;
10738 /* We cannot use next_label here because it skips over normal insns. */
10739 end_label = next_nonnote_insn (loop->end);
10740 if (end_label && !LABEL_P (end_label))
10741 end_label = NULL_RTX;
10743 /* Check to see if it's possible that some instructions in the loop are
10744 never executed. Also check if there is a goto out of the loop other
10745 than right after the end of the loop. */
10746 for (p = next_insn_in_loop (loop, loop->scan_start);
10747 p != NULL_RTX;
10748 p = next_insn_in_loop (loop, p))
10750 if (LABEL_P (p))
10751 maybe_never = 1;
10752 else if (JUMP_P (p)
10753 /* If we enter the loop in the middle, and scan
10754 around to the beginning, don't set maybe_never
10755 for that. This must be an unconditional jump,
10756 otherwise the code at the top of the loop might
10757 never be executed. Unconditional jumps are
10758 followed a by barrier then loop end. */
10759 && ! (JUMP_P (p)
10760 && JUMP_LABEL (p) == loop->top
10761 && NEXT_INSN (NEXT_INSN (p)) == loop->end
10762 && any_uncondjump_p (p)))
10764 /* If this is a jump outside of the loop but not right
10765 after the end of the loop, we would have to emit new fixup
10766 sequences for each such label. */
10767 if (/* If we can't tell where control might go when this
10768 JUMP_INSN is executed, we must be conservative. */
10769 !JUMP_LABEL (p)
10770 || (JUMP_LABEL (p) != end_label
10771 && (INSN_UID (JUMP_LABEL (p)) >= max_uid_for_loop
10772 || INSN_LUID (JUMP_LABEL (p)) < INSN_LUID (loop->start)
10773 || INSN_LUID (JUMP_LABEL (p)) > INSN_LUID (loop->end))))
10774 return;
10776 if (!any_condjump_p (p))
10777 /* Something complicated. */
10778 maybe_never = 1;
10779 else
10780 /* If there are any more instructions in the loop, they
10781 might not be reached. */
10782 next_maybe_never = 1;
10784 else if (next_maybe_never)
10785 maybe_never = 1;
10788 /* Find start of the extended basic block that enters the loop. */
10789 for (p = loop->start;
10790 PREV_INSN (p) && !LABEL_P (p);
10791 p = PREV_INSN (p))
10793 prev_ebb_head = p;
10795 cselib_init (true);
10797 /* Build table of mems that get set to constant values before the
10798 loop. */
10799 for (; p != loop->start; p = NEXT_INSN (p))
10800 cselib_process_insn (p);
10802 /* Actually move the MEMs. */
10803 for (i = 0; i < loop_info->mems_idx; ++i)
10805 regset_head load_copies;
10806 regset_head store_copies;
10807 int written = 0;
10808 rtx reg;
10809 rtx mem = loop_info->mems[i].mem;
10810 rtx mem_list_entry;
10812 if (MEM_VOLATILE_P (mem)
10813 || loop_invariant_p (loop, XEXP (mem, 0)) != 1)
10814 /* There's no telling whether or not MEM is modified. */
10815 loop_info->mems[i].optimize = 0;
10817 /* Go through the MEMs written to in the loop to see if this
10818 one is aliased by one of them. */
10819 mem_list_entry = loop_info->store_mems;
10820 while (mem_list_entry)
10822 if (rtx_equal_p (mem, XEXP (mem_list_entry, 0)))
10823 written = 1;
10824 else if (true_dependence (XEXP (mem_list_entry, 0), VOIDmode,
10825 mem, rtx_varies_p))
10827 /* MEM is indeed aliased by this store. */
10828 loop_info->mems[i].optimize = 0;
10829 break;
10831 mem_list_entry = XEXP (mem_list_entry, 1);
10834 if (flag_float_store && written
10835 && GET_MODE_CLASS (GET_MODE (mem)) == MODE_FLOAT)
10836 loop_info->mems[i].optimize = 0;
10838 /* If this MEM is written to, we must be sure that there
10839 are no reads from another MEM that aliases this one. */
10840 if (loop_info->mems[i].optimize && written)
10842 int j;
10844 for (j = 0; j < loop_info->mems_idx; ++j)
10846 if (j == i)
10847 continue;
10848 else if (true_dependence (mem,
10849 VOIDmode,
10850 loop_info->mems[j].mem,
10851 rtx_varies_p))
10853 /* It's not safe to hoist loop_info->mems[i] out of
10854 the loop because writes to it might not be
10855 seen by reads from loop_info->mems[j]. */
10856 loop_info->mems[i].optimize = 0;
10857 break;
10862 if (maybe_never && may_trap_p (mem))
10863 /* We can't access the MEM outside the loop; it might
10864 cause a trap that wouldn't have happened otherwise. */
10865 loop_info->mems[i].optimize = 0;
10867 if (!loop_info->mems[i].optimize)
10868 /* We thought we were going to lift this MEM out of the
10869 loop, but later discovered that we could not. */
10870 continue;
10872 INIT_REG_SET (&load_copies);
10873 INIT_REG_SET (&store_copies);
10875 /* Allocate a pseudo for this MEM. We set REG_USERVAR_P in
10876 order to keep scan_loop from moving stores to this MEM
10877 out of the loop just because this REG is neither a
10878 user-variable nor used in the loop test. */
10879 reg = gen_reg_rtx (GET_MODE (mem));
10880 REG_USERVAR_P (reg) = 1;
10881 loop_info->mems[i].reg = reg;
10883 /* Now, replace all references to the MEM with the
10884 corresponding pseudos. */
10885 maybe_never = 0;
10886 for (p = next_insn_in_loop (loop, loop->scan_start);
10887 p != NULL_RTX;
10888 p = next_insn_in_loop (loop, p))
10890 if (INSN_P (p))
10892 rtx set;
10894 set = single_set (p);
10896 /* See if this copies the mem into a register that isn't
10897 modified afterwards. We'll try to do copy propagation
10898 a little further on. */
10899 if (set
10900 /* @@@ This test is _way_ too conservative. */
10901 && ! maybe_never
10902 && REG_P (SET_DEST (set))
10903 && REGNO (SET_DEST (set)) >= FIRST_PSEUDO_REGISTER
10904 && REGNO (SET_DEST (set)) < last_max_reg
10905 && regs->array[REGNO (SET_DEST (set))].n_times_set == 1
10906 && rtx_equal_p (SET_SRC (set), mem))
10907 SET_REGNO_REG_SET (&load_copies, REGNO (SET_DEST (set)));
10909 /* See if this copies the mem from a register that isn't
10910 modified afterwards. We'll try to remove the
10911 redundant copy later on by doing a little register
10912 renaming and copy propagation. This will help
10913 to untangle things for the BIV detection code. */
10914 if (set
10915 && ! maybe_never
10916 && REG_P (SET_SRC (set))
10917 && REGNO (SET_SRC (set)) >= FIRST_PSEUDO_REGISTER
10918 && REGNO (SET_SRC (set)) < last_max_reg
10919 && regs->array[REGNO (SET_SRC (set))].n_times_set == 1
10920 && rtx_equal_p (SET_DEST (set), mem))
10921 SET_REGNO_REG_SET (&store_copies, REGNO (SET_SRC (set)));
10923 /* If this is a call which uses / clobbers this memory
10924 location, we must not change the interface here. */
10925 if (CALL_P (p)
10926 && reg_mentioned_p (loop_info->mems[i].mem,
10927 CALL_INSN_FUNCTION_USAGE (p)))
10929 cancel_changes (0);
10930 loop_info->mems[i].optimize = 0;
10931 break;
10933 else
10934 /* Replace the memory reference with the shadow register. */
10935 replace_loop_mems (p, loop_info->mems[i].mem,
10936 loop_info->mems[i].reg, written);
10939 if (LABEL_P (p)
10940 || JUMP_P (p))
10941 maybe_never = 1;
10944 if (! loop_info->mems[i].optimize)
10945 ; /* We found we couldn't do the replacement, so do nothing. */
10946 else if (! apply_change_group ())
10947 /* We couldn't replace all occurrences of the MEM. */
10948 loop_info->mems[i].optimize = 0;
10949 else
10951 /* Load the memory immediately before LOOP->START, which is
10952 the NOTE_LOOP_BEG. */
10953 cselib_val *e = cselib_lookup (mem, VOIDmode, 0);
10954 rtx set;
10955 rtx best = mem;
10956 unsigned j;
10957 struct elt_loc_list *const_equiv = 0;
10958 reg_set_iterator rsi;
10960 if (e)
10962 struct elt_loc_list *equiv;
10963 struct elt_loc_list *best_equiv = 0;
10964 for (equiv = e->locs; equiv; equiv = equiv->next)
10966 if (CONSTANT_P (equiv->loc))
10967 const_equiv = equiv;
10968 else if (REG_P (equiv->loc)
10969 /* Extending hard register lifetimes causes crash
10970 on SRC targets. Doing so on non-SRC is
10971 probably also not good idea, since we most
10972 probably have pseudoregister equivalence as
10973 well. */
10974 && REGNO (equiv->loc) >= FIRST_PSEUDO_REGISTER)
10975 best_equiv = equiv;
10977 /* Use the constant equivalence if that is cheap enough. */
10978 if (! best_equiv)
10979 best_equiv = const_equiv;
10980 else if (const_equiv
10981 && (rtx_cost (const_equiv->loc, SET)
10982 <= rtx_cost (best_equiv->loc, SET)))
10984 best_equiv = const_equiv;
10985 const_equiv = 0;
10988 /* If best_equiv is nonzero, we know that MEM is set to a
10989 constant or register before the loop. We will use this
10990 knowledge to initialize the shadow register with that
10991 constant or reg rather than by loading from MEM. */
10992 if (best_equiv)
10993 best = copy_rtx (best_equiv->loc);
10996 set = gen_move_insn (reg, best);
10997 set = loop_insn_hoist (loop, set);
10998 if (REG_P (best))
11000 for (p = prev_ebb_head; p != loop->start; p = NEXT_INSN (p))
11001 if (REGNO_LAST_UID (REGNO (best)) == INSN_UID (p))
11003 REGNO_LAST_UID (REGNO (best)) = INSN_UID (set);
11004 break;
11008 if (const_equiv)
11009 set_unique_reg_note (set, REG_EQUAL, copy_rtx (const_equiv->loc));
11011 if (written)
11013 if (label == NULL_RTX)
11015 label = gen_label_rtx ();
11016 emit_label_after (label, loop->end);
11019 /* Store the memory immediately after END, which is
11020 the NOTE_LOOP_END. */
11021 set = gen_move_insn (copy_rtx (mem), reg);
11022 loop_insn_emit_after (loop, 0, label, set);
11025 if (loop_dump_stream)
11027 fprintf (loop_dump_stream, "Hoisted regno %d %s from ",
11028 REGNO (reg), (written ? "r/w" : "r/o"));
11029 print_rtl (loop_dump_stream, mem);
11030 fputc ('\n', loop_dump_stream);
11033 /* Attempt a bit of copy propagation. This helps untangle the
11034 data flow, and enables {basic,general}_induction_var to find
11035 more bivs/givs. */
11036 EXECUTE_IF_SET_IN_REG_SET
11037 (&load_copies, FIRST_PSEUDO_REGISTER, j, rsi)
11039 try_copy_prop (loop, reg, j);
11041 CLEAR_REG_SET (&load_copies);
11043 EXECUTE_IF_SET_IN_REG_SET
11044 (&store_copies, FIRST_PSEUDO_REGISTER, j, rsi)
11046 try_swap_copy_prop (loop, reg, j);
11048 CLEAR_REG_SET (&store_copies);
11052 /* Now, we need to replace all references to the previous exit
11053 label with the new one. */
11054 if (label != NULL_RTX && end_label != NULL_RTX)
11055 for (p = loop->start; p != loop->end; p = NEXT_INSN (p))
11056 if (JUMP_P (p) && JUMP_LABEL (p) == end_label)
11057 redirect_jump (p, label, false);
11059 cselib_finish ();
11062 /* For communication between note_reg_stored and its caller. */
11063 struct note_reg_stored_arg
11065 int set_seen;
11066 rtx reg;
11069 /* Called via note_stores, record in SET_SEEN whether X, which is written,
11070 is equal to ARG. */
11071 static void
11072 note_reg_stored (rtx x, rtx setter ATTRIBUTE_UNUSED, void *arg)
11074 struct note_reg_stored_arg *t = (struct note_reg_stored_arg *) arg;
11075 if (t->reg == x)
11076 t->set_seen = 1;
11079 /* Try to replace every occurrence of pseudo REGNO with REPLACEMENT.
11080 There must be exactly one insn that sets this pseudo; it will be
11081 deleted if all replacements succeed and we can prove that the register
11082 is not used after the loop. */
11084 static void
11085 try_copy_prop (const struct loop *loop, rtx replacement, unsigned int regno)
11087 /* This is the reg that we are copying from. */
11088 rtx reg_rtx = regno_reg_rtx[regno];
11089 rtx init_insn = 0;
11090 rtx insn;
11091 /* These help keep track of whether we replaced all uses of the reg. */
11092 int replaced_last = 0;
11093 int store_is_first = 0;
11095 for (insn = next_insn_in_loop (loop, loop->scan_start);
11096 insn != NULL_RTX;
11097 insn = next_insn_in_loop (loop, insn))
11099 rtx set;
11101 /* Only substitute within one extended basic block from the initializing
11102 insn. */
11103 if (LABEL_P (insn) && init_insn)
11104 break;
11106 if (! INSN_P (insn))
11107 continue;
11109 /* Is this the initializing insn? */
11110 set = single_set (insn);
11111 if (set
11112 && REG_P (SET_DEST (set))
11113 && REGNO (SET_DEST (set)) == regno)
11115 if (init_insn)
11116 abort ();
11118 init_insn = insn;
11119 if (REGNO_FIRST_UID (regno) == INSN_UID (insn))
11120 store_is_first = 1;
11123 /* Only substitute after seeing the initializing insn. */
11124 if (init_insn && insn != init_insn)
11126 struct note_reg_stored_arg arg;
11128 replace_loop_regs (insn, reg_rtx, replacement);
11129 if (REGNO_LAST_UID (regno) == INSN_UID (insn))
11130 replaced_last = 1;
11132 /* Stop replacing when REPLACEMENT is modified. */
11133 arg.reg = replacement;
11134 arg.set_seen = 0;
11135 note_stores (PATTERN (insn), note_reg_stored, &arg);
11136 if (arg.set_seen)
11138 rtx note = find_reg_note (insn, REG_EQUAL, NULL);
11140 /* It is possible that we've turned previously valid REG_EQUAL to
11141 invalid, as we change the REGNO to REPLACEMENT and unlike REGNO,
11142 REPLACEMENT is modified, we get different meaning. */
11143 if (note && reg_mentioned_p (replacement, XEXP (note, 0)))
11144 remove_note (insn, note);
11145 break;
11149 if (! init_insn)
11150 abort ();
11151 if (apply_change_group ())
11153 if (loop_dump_stream)
11154 fprintf (loop_dump_stream, " Replaced reg %d", regno);
11155 if (store_is_first && replaced_last)
11157 rtx first;
11158 rtx retval_note;
11160 /* Assume we're just deleting INIT_INSN. */
11161 first = init_insn;
11162 /* Look for REG_RETVAL note. If we're deleting the end of
11163 the libcall sequence, the whole sequence can go. */
11164 retval_note = find_reg_note (init_insn, REG_RETVAL, NULL_RTX);
11165 /* If we found a REG_RETVAL note, find the first instruction
11166 in the sequence. */
11167 if (retval_note)
11168 first = XEXP (retval_note, 0);
11170 /* Delete the instructions. */
11171 loop_delete_insns (first, init_insn);
11173 if (loop_dump_stream)
11174 fprintf (loop_dump_stream, ".\n");
11178 /* Replace all the instructions from FIRST up to and including LAST
11179 with NOTE_INSN_DELETED notes. */
11181 static void
11182 loop_delete_insns (rtx first, rtx last)
11184 while (1)
11186 if (loop_dump_stream)
11187 fprintf (loop_dump_stream, ", deleting init_insn (%d)",
11188 INSN_UID (first));
11189 delete_insn (first);
11191 /* If this was the LAST instructions we're supposed to delete,
11192 we're done. */
11193 if (first == last)
11194 break;
11196 first = NEXT_INSN (first);
11200 /* Try to replace occurrences of pseudo REGNO with REPLACEMENT within
11201 loop LOOP if the order of the sets of these registers can be
11202 swapped. There must be exactly one insn within the loop that sets
11203 this pseudo followed immediately by a move insn that sets
11204 REPLACEMENT with REGNO. */
11205 static void
11206 try_swap_copy_prop (const struct loop *loop, rtx replacement,
11207 unsigned int regno)
11209 rtx insn;
11210 rtx set = NULL_RTX;
11211 unsigned int new_regno;
11213 new_regno = REGNO (replacement);
11215 for (insn = next_insn_in_loop (loop, loop->scan_start);
11216 insn != NULL_RTX;
11217 insn = next_insn_in_loop (loop, insn))
11219 /* Search for the insn that copies REGNO to NEW_REGNO? */
11220 if (INSN_P (insn)
11221 && (set = single_set (insn))
11222 && REG_P (SET_DEST (set))
11223 && REGNO (SET_DEST (set)) == new_regno
11224 && REG_P (SET_SRC (set))
11225 && REGNO (SET_SRC (set)) == regno)
11226 break;
11229 if (insn != NULL_RTX)
11231 rtx prev_insn;
11232 rtx prev_set;
11234 /* Some DEF-USE info would come in handy here to make this
11235 function more general. For now, just check the previous insn
11236 which is the most likely candidate for setting REGNO. */
11238 prev_insn = PREV_INSN (insn);
11240 if (INSN_P (insn)
11241 && (prev_set = single_set (prev_insn))
11242 && REG_P (SET_DEST (prev_set))
11243 && REGNO (SET_DEST (prev_set)) == regno)
11245 /* We have:
11246 (set (reg regno) (expr))
11247 (set (reg new_regno) (reg regno))
11249 so try converting this to:
11250 (set (reg new_regno) (expr))
11251 (set (reg regno) (reg new_regno))
11253 The former construct is often generated when a global
11254 variable used for an induction variable is shadowed by a
11255 register (NEW_REGNO). The latter construct improves the
11256 chances of GIV replacement and BIV elimination. */
11258 validate_change (prev_insn, &SET_DEST (prev_set),
11259 replacement, 1);
11260 validate_change (insn, &SET_DEST (set),
11261 SET_SRC (set), 1);
11262 validate_change (insn, &SET_SRC (set),
11263 replacement, 1);
11265 if (apply_change_group ())
11267 if (loop_dump_stream)
11268 fprintf (loop_dump_stream,
11269 " Swapped set of reg %d at %d with reg %d at %d.\n",
11270 regno, INSN_UID (insn),
11271 new_regno, INSN_UID (prev_insn));
11273 /* Update first use of REGNO. */
11274 if (REGNO_FIRST_UID (regno) == INSN_UID (prev_insn))
11275 REGNO_FIRST_UID (regno) = INSN_UID (insn);
11277 /* Now perform copy propagation to hopefully
11278 remove all uses of REGNO within the loop. */
11279 try_copy_prop (loop, replacement, regno);
11285 /* Worker function for find_mem_in_note, called via for_each_rtx. */
11287 static int
11288 find_mem_in_note_1 (rtx *x, void *data)
11290 if (*x != NULL_RTX && MEM_P (*x))
11292 rtx *res = (rtx *) data;
11293 *res = *x;
11294 return 1;
11296 return 0;
11299 /* Returns the first MEM found in NOTE by depth-first search. */
11301 static rtx
11302 find_mem_in_note (rtx note)
11304 if (note && for_each_rtx (&note, find_mem_in_note_1, &note))
11305 return note;
11306 return NULL_RTX;
11309 /* Replace MEM with its associated pseudo register. This function is
11310 called from load_mems via for_each_rtx. DATA is actually a pointer
11311 to a structure describing the instruction currently being scanned
11312 and the MEM we are currently replacing. */
11314 static int
11315 replace_loop_mem (rtx *mem, void *data)
11317 loop_replace_args *args = (loop_replace_args *) data;
11318 rtx m = *mem;
11320 if (m == NULL_RTX)
11321 return 0;
11323 switch (GET_CODE (m))
11325 case MEM:
11326 break;
11328 case CONST_DOUBLE:
11329 /* We're not interested in the MEM associated with a
11330 CONST_DOUBLE, so there's no need to traverse into one. */
11331 return -1;
11333 default:
11334 /* This is not a MEM. */
11335 return 0;
11338 if (!rtx_equal_p (args->match, m))
11339 /* This is not the MEM we are currently replacing. */
11340 return 0;
11342 /* Actually replace the MEM. */
11343 validate_change (args->insn, mem, args->replacement, 1);
11345 return 0;
11348 static void
11349 replace_loop_mems (rtx insn, rtx mem, rtx reg, int written)
11351 loop_replace_args args;
11353 args.insn = insn;
11354 args.match = mem;
11355 args.replacement = reg;
11357 for_each_rtx (&insn, replace_loop_mem, &args);
11359 /* If we hoist a mem write out of the loop, then REG_EQUAL
11360 notes referring to the mem are no longer valid. */
11361 if (written)
11363 rtx note, sub;
11364 rtx *link;
11366 for (link = &REG_NOTES (insn); (note = *link); link = &XEXP (note, 1))
11368 if (REG_NOTE_KIND (note) == REG_EQUAL
11369 && (sub = find_mem_in_note (note))
11370 && true_dependence (mem, VOIDmode, sub, rtx_varies_p))
11372 /* Remove the note. */
11373 validate_change (NULL_RTX, link, XEXP (note, 1), 1);
11374 break;
11380 /* Replace one register with another. Called through for_each_rtx; PX points
11381 to the rtx being scanned. DATA is actually a pointer to
11382 a structure of arguments. */
11384 static int
11385 replace_loop_reg (rtx *px, void *data)
11387 rtx x = *px;
11388 loop_replace_args *args = (loop_replace_args *) data;
11390 if (x == NULL_RTX)
11391 return 0;
11393 if (x == args->match)
11394 validate_change (args->insn, px, args->replacement, 1);
11396 return 0;
11399 static void
11400 replace_loop_regs (rtx insn, rtx reg, rtx replacement)
11402 loop_replace_args args;
11404 args.insn = insn;
11405 args.match = reg;
11406 args.replacement = replacement;
11408 for_each_rtx (&insn, replace_loop_reg, &args);
11411 /* Emit insn for PATTERN after WHERE_INSN in basic block WHERE_BB
11412 (ignored in the interim). */
11414 static rtx
11415 loop_insn_emit_after (const struct loop *loop ATTRIBUTE_UNUSED,
11416 basic_block where_bb ATTRIBUTE_UNUSED, rtx where_insn,
11417 rtx pattern)
11419 return emit_insn_after (pattern, where_insn);
11423 /* If WHERE_INSN is nonzero emit insn for PATTERN before WHERE_INSN
11424 in basic block WHERE_BB (ignored in the interim) within the loop
11425 otherwise hoist PATTERN into the loop pre-header. */
11427 static rtx
11428 loop_insn_emit_before (const struct loop *loop,
11429 basic_block where_bb ATTRIBUTE_UNUSED,
11430 rtx where_insn, rtx pattern)
11432 if (! where_insn)
11433 return loop_insn_hoist (loop, pattern);
11434 return emit_insn_before (pattern, where_insn);
11438 /* Emit call insn for PATTERN before WHERE_INSN in basic block
11439 WHERE_BB (ignored in the interim) within the loop. */
11441 static rtx
11442 loop_call_insn_emit_before (const struct loop *loop ATTRIBUTE_UNUSED,
11443 basic_block where_bb ATTRIBUTE_UNUSED,
11444 rtx where_insn, rtx pattern)
11446 return emit_call_insn_before (pattern, where_insn);
11450 /* Hoist insn for PATTERN into the loop pre-header. */
11452 static rtx
11453 loop_insn_hoist (const struct loop *loop, rtx pattern)
11455 return loop_insn_emit_before (loop, 0, loop->start, pattern);
11459 /* Hoist call insn for PATTERN into the loop pre-header. */
11461 static rtx
11462 loop_call_insn_hoist (const struct loop *loop, rtx pattern)
11464 return loop_call_insn_emit_before (loop, 0, loop->start, pattern);
11468 /* Sink insn for PATTERN after the loop end. */
11470 static rtx
11471 loop_insn_sink (const struct loop *loop, rtx pattern)
11473 return loop_insn_emit_before (loop, 0, loop->sink, pattern);
11476 /* bl->final_value can be either general_operand or PLUS of general_operand
11477 and constant. Emit sequence of instructions to load it into REG. */
11478 static rtx
11479 gen_load_of_final_value (rtx reg, rtx final_value)
11481 rtx seq;
11482 start_sequence ();
11483 final_value = force_operand (final_value, reg);
11484 if (final_value != reg)
11485 emit_move_insn (reg, final_value);
11486 seq = get_insns ();
11487 end_sequence ();
11488 return seq;
11491 /* If the loop has multiple exits, emit insn for PATTERN before the
11492 loop to ensure that it will always be executed no matter how the
11493 loop exits. Otherwise, emit the insn for PATTERN after the loop,
11494 since this is slightly more efficient. */
11496 static rtx
11497 loop_insn_sink_or_swim (const struct loop *loop, rtx pattern)
11499 if (loop->exit_count)
11500 return loop_insn_hoist (loop, pattern);
11501 else
11502 return loop_insn_sink (loop, pattern);
11505 static void
11506 loop_ivs_dump (const struct loop *loop, FILE *file, int verbose)
11508 struct iv_class *bl;
11509 int iv_num = 0;
11511 if (! loop || ! file)
11512 return;
11514 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
11515 iv_num++;
11517 fprintf (file, "Loop %d: %d IV classes\n", loop->num, iv_num);
11519 for (bl = LOOP_IVS (loop)->list; bl; bl = bl->next)
11521 loop_iv_class_dump (bl, file, verbose);
11522 fputc ('\n', file);
11527 static void
11528 loop_iv_class_dump (const struct iv_class *bl, FILE *file,
11529 int verbose ATTRIBUTE_UNUSED)
11531 struct induction *v;
11532 rtx incr;
11533 int i;
11535 if (! bl || ! file)
11536 return;
11538 fprintf (file, "IV class for reg %d, benefit %d\n",
11539 bl->regno, bl->total_benefit);
11541 fprintf (file, " Init insn %d", INSN_UID (bl->init_insn));
11542 if (bl->initial_value)
11544 fprintf (file, ", init val: ");
11545 print_simple_rtl (file, bl->initial_value);
11547 if (bl->initial_test)
11549 fprintf (file, ", init test: ");
11550 print_simple_rtl (file, bl->initial_test);
11552 fputc ('\n', file);
11554 if (bl->final_value)
11556 fprintf (file, " Final val: ");
11557 print_simple_rtl (file, bl->final_value);
11558 fputc ('\n', file);
11561 if ((incr = biv_total_increment (bl)))
11563 fprintf (file, " Total increment: ");
11564 print_simple_rtl (file, incr);
11565 fputc ('\n', file);
11568 /* List the increments. */
11569 for (i = 0, v = bl->biv; v; v = v->next_iv, i++)
11571 fprintf (file, " Inc%d: insn %d, incr: ", i, INSN_UID (v->insn));
11572 print_simple_rtl (file, v->add_val);
11573 fputc ('\n', file);
11576 /* List the givs. */
11577 for (i = 0, v = bl->giv; v; v = v->next_iv, i++)
11579 fprintf (file, " Giv%d: insn %d, benefit %d, ",
11580 i, INSN_UID (v->insn), v->benefit);
11581 if (v->giv_type == DEST_ADDR)
11582 print_simple_rtl (file, v->mem);
11583 else
11584 print_simple_rtl (file, single_set (v->insn));
11585 fputc ('\n', file);
11590 static void
11591 loop_biv_dump (const struct induction *v, FILE *file, int verbose)
11593 if (! v || ! file)
11594 return;
11596 fprintf (file,
11597 "Biv %d: insn %d",
11598 REGNO (v->dest_reg), INSN_UID (v->insn));
11599 fprintf (file, " const ");
11600 print_simple_rtl (file, v->add_val);
11602 if (verbose && v->final_value)
11604 fputc ('\n', file);
11605 fprintf (file, " final ");
11606 print_simple_rtl (file, v->final_value);
11609 fputc ('\n', file);
11613 static void
11614 loop_giv_dump (const struct induction *v, FILE *file, int verbose)
11616 if (! v || ! file)
11617 return;
11619 if (v->giv_type == DEST_REG)
11620 fprintf (file, "Giv %d: insn %d",
11621 REGNO (v->dest_reg), INSN_UID (v->insn));
11622 else
11623 fprintf (file, "Dest address: insn %d",
11624 INSN_UID (v->insn));
11626 fprintf (file, " src reg %d benefit %d",
11627 REGNO (v->src_reg), v->benefit);
11628 fprintf (file, " lifetime %d",
11629 v->lifetime);
11631 if (v->replaceable)
11632 fprintf (file, " replaceable");
11634 if (v->no_const_addval)
11635 fprintf (file, " ncav");
11637 if (v->ext_dependent)
11639 switch (GET_CODE (v->ext_dependent))
11641 case SIGN_EXTEND:
11642 fprintf (file, " ext se");
11643 break;
11644 case ZERO_EXTEND:
11645 fprintf (file, " ext ze");
11646 break;
11647 case TRUNCATE:
11648 fprintf (file, " ext tr");
11649 break;
11650 default:
11651 abort ();
11655 fputc ('\n', file);
11656 fprintf (file, " mult ");
11657 print_simple_rtl (file, v->mult_val);
11659 fputc ('\n', file);
11660 fprintf (file, " add ");
11661 print_simple_rtl (file, v->add_val);
11663 if (verbose && v->final_value)
11665 fputc ('\n', file);
11666 fprintf (file, " final ");
11667 print_simple_rtl (file, v->final_value);
11670 fputc ('\n', file);
11674 void
11675 debug_ivs (const struct loop *loop)
11677 loop_ivs_dump (loop, stderr, 1);
11681 void
11682 debug_iv_class (const struct iv_class *bl)
11684 loop_iv_class_dump (bl, stderr, 1);
11688 void
11689 debug_biv (const struct induction *v)
11691 loop_biv_dump (v, stderr, 1);
11695 void
11696 debug_giv (const struct induction *v)
11698 loop_giv_dump (v, stderr, 1);
11702 #define LOOP_BLOCK_NUM_1(INSN) \
11703 ((INSN) ? (BLOCK_FOR_INSN (INSN) ? BLOCK_NUM (INSN) : - 1) : -1)
11705 /* The notes do not have an assigned block, so look at the next insn. */
11706 #define LOOP_BLOCK_NUM(INSN) \
11707 ((INSN) ? (NOTE_P (INSN) \
11708 ? LOOP_BLOCK_NUM_1 (next_nonnote_insn (INSN)) \
11709 : LOOP_BLOCK_NUM_1 (INSN)) \
11710 : -1)
11712 #define LOOP_INSN_UID(INSN) ((INSN) ? INSN_UID (INSN) : -1)
11714 static void
11715 loop_dump_aux (const struct loop *loop, FILE *file,
11716 int verbose ATTRIBUTE_UNUSED)
11718 rtx label;
11720 if (! loop || ! file || !BB_HEAD (loop->first))
11721 return;
11723 /* Print diagnostics to compare our concept of a loop with
11724 what the loop notes say. */
11725 if (! PREV_INSN (BB_HEAD (loop->first))
11726 || !NOTE_P (PREV_INSN (BB_HEAD (loop->first)))
11727 || NOTE_LINE_NUMBER (PREV_INSN (BB_HEAD (loop->first)))
11728 != NOTE_INSN_LOOP_BEG)
11729 fprintf (file, ";; No NOTE_INSN_LOOP_BEG at %d\n",
11730 INSN_UID (PREV_INSN (BB_HEAD (loop->first))));
11731 if (! NEXT_INSN (BB_END (loop->last))
11732 || !NOTE_P (NEXT_INSN (BB_END (loop->last)))
11733 || NOTE_LINE_NUMBER (NEXT_INSN (BB_END (loop->last)))
11734 != NOTE_INSN_LOOP_END)
11735 fprintf (file, ";; No NOTE_INSN_LOOP_END at %d\n",
11736 INSN_UID (NEXT_INSN (BB_END (loop->last))));
11738 if (loop->start)
11740 fprintf (file,
11741 ";; start %d (%d), end %d (%d)\n",
11742 LOOP_BLOCK_NUM (loop->start),
11743 LOOP_INSN_UID (loop->start),
11744 LOOP_BLOCK_NUM (loop->end),
11745 LOOP_INSN_UID (loop->end));
11746 fprintf (file, ";; top %d (%d), scan start %d (%d)\n",
11747 LOOP_BLOCK_NUM (loop->top),
11748 LOOP_INSN_UID (loop->top),
11749 LOOP_BLOCK_NUM (loop->scan_start),
11750 LOOP_INSN_UID (loop->scan_start));
11751 fprintf (file, ";; exit_count %d", loop->exit_count);
11752 if (loop->exit_count)
11754 fputs (", labels:", file);
11755 for (label = loop->exit_labels; label; label = LABEL_NEXTREF (label))
11757 fprintf (file, " %d ",
11758 LOOP_INSN_UID (XEXP (label, 0)));
11761 fputs ("\n", file);
11765 /* Call this function from the debugger to dump LOOP. */
11767 void
11768 debug_loop (const struct loop *loop)
11770 flow_loop_dump (loop, stderr, loop_dump_aux, 1);
11773 /* Call this function from the debugger to dump LOOPS. */
11775 void
11776 debug_loops (const struct loops *loops)
11778 flow_loops_dump (loops, stderr, loop_dump_aux, 1);